[game-data-packager] 01/02: spider: check only one game when argv[1] is provided
Alexandre Detiste
detiste-guest at moszumanska.debian.org
Tue Sep 29 13:29:55 UTC 2015
This is an automated email from the git hooks/post-receive script.
detiste-guest pushed a commit to branch master
in repository game-data-packager.
commit 0b664f85fd5fd9af670689d71e7bd923a40ea427
Author: Alexandre Detiste <alexandre.detiste at gmail.com>
Date: Tue Sep 29 15:28:16 2015 +0200
spider: check only one game when argv[1] is provided
---
tools/spider.py | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/tools/spider.py b/tools/spider.py
index e06093c..a966135 100755
--- a/tools/spider.py
+++ b/tools/spider.py
@@ -19,6 +19,7 @@
# in per-engine-wiki pages
# we don't rescan games we already have
+import sys
import time
import urllib.request
from bs4 import BeautifulSoup
@@ -26,6 +27,11 @@ from game_data_packager import load_games
CSV = 'data/wikipedia.csv'
+try:
+ todo = sys.argv[1]
+except IndexError:
+ todo = '*'
+
urls = dict()
with open(CSV, 'r', encoding='utf8') as f:
for line in f.readlines():
@@ -38,16 +44,18 @@ with open(CSV, 'r', encoding='utf8') as f:
def is_wikipedia(href):
return href and "wikipedia" in href
-for shortname, game in load_games().items():
+for shortname, game in load_games(None, game=todo).items():
if not game.wiki:
continue
if shortname in urls:
continue
+ print('processing %s ...' % shortname)
url = game.wikibase + game.wiki
html = urllib.request.urlopen(url)
soup = BeautifulSoup(html, 'lxml')
for tag in soup.find_all(href=is_wikipedia):
+ print(' ' + tag['href'])
urls[shortname] = tag['href']
#break
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/game-data-packager.git
More information about the Pkg-games-commits
mailing list