[geneagrapher] 155/226: Factored out ID extraction code.
Doug Torrance
dtorrance-guest at moszumanska.debian.org
Sat Jul 11 17:10:58 UTC 2015
This is an automated email from the git hooks/post-receive script.
dtorrance-guest pushed a commit to branch master
in repository geneagrapher.
commit cb4d4af91133a0722942730a390bcca39181306b
Author: David Alber <alber.david at gmail.com>
Date: Sun Oct 30 21:52:46 2011 -0700
Factored out ID extraction code.
---
geneagrapher/grabber.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/geneagrapher/grabber.py b/geneagrapher/grabber.py
index f8b11eb..8eb715f 100644
--- a/geneagrapher/grabber.py
+++ b/geneagrapher/grabber.py
@@ -21,7 +21,12 @@ class Grabber:
"""
url = 'http://genealogy.math.ndsu.nodak.edu/id.php?id=' + str(self.id)
return urllib.urlopen(url)
-
+
+ @staticmethod
+ def extract_id(tag):
+ """Extract the ID from a tag with form <a href="id.php?id=7401">."""
+ return tag.attrs[0][-1].split('=')[-1]
+
def extract_node_information(self):
"""
For the mathematician in this object, extract the list of
@@ -56,12 +61,12 @@ class Grabber:
# Get advisor IDs.
for advisor_info in soup.findAll(text=re.compile('Advisor')):
if 'Advisor: Unknown' not in advisor_info:
- advisor_id = advisor_info.findNext().attrs[0][-1].split('=')[1]
+ advisor_id = self.extract_id(advisor_info.findNext())
self.advisors.append(int(advisor_id))
# Get descendant IDs.
if soup.find('table') is not None:
for descendant_info in soup.find('table').findAll('a'):
- descendant_id = descendant_info.attrs[0][-1].split('=')[-1]
+ descendant_id = self.extract_id(descendant_info)
self.descendants.append(int(descendant_id))
return [self.name, self.institution, self.year, self.advisors, self.descendants]
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/geneagrapher.git
More information about the debian-science-commits
mailing list