[geneagrapher] 200/226: Refactor the Grabber class for improved testability.
Doug Torrance
dtorrance-guest at moszumanska.debian.org
Sat Jul 11 17:11:10 UTC 2015
This is an automated email from the git hooks/post-receive script.
dtorrance-guest pushed a commit to branch master
in repository geneagrapher.
commit 8933f7ece1c091bae36c3ace2b702d804677cd40
Author: David Alber <alber.david at gmail.com>
Date: Sun Dec 25 21:19:36 2011 -0800
Refactor the Grabber class for improved testability.
This changeset modifies grabber.py to allow for better testability
of individual functionalities. Doing this enables the use of local
test data, which cuts down on test time and the number of requests
to the Math Genealogy Project's website during testing.
---
src/geneagrapher/grabber.py | 95 ++++++++-----
tests/geneagrapher/test_grabber_methods.py | 221 ++++++++++++++++++-----------
2 files changed, 200 insertions(+), 116 deletions(-)
diff --git a/src/geneagrapher/grabber.py b/src/geneagrapher/grabber.py
index fbaff33..f38eee3 100644
--- a/src/geneagrapher/grabber.py
+++ b/src/geneagrapher/grabber.py
@@ -28,43 +28,70 @@ class Grabber:
soup = BeautifulSoup(page, convertEntities='html')
page.close()
- if soup.firstText().text == u"You have specified an ID that does not \
-exist in the database. Please back up and try again.":
- # Then a bad URL (e.g., a bad record id) was given. Throw an
- # exception.
- msg = "Invalid id {}".format(id)
- raise ValueError(msg)
-
- # Get mathematician name.
- name = soup.find('h2').getText()
-
- # Get institution name (or None, if it there is no institution name).
- institution = soup.find('div', style="line-height: 30px; \
+ return get_record_from_tree(soup, id)
+
+
+def get_record_from_tree(soup, id):
+ """Extract and return the fields in the mathematician record using the
+ input tree."""
+ if not has_record(soup):
+ # Then a bad record id was given. Raise an exception.
+ msg = "Invalid id {}".format(id)
+ raise ValueError(msg)
+
+ name = get_name(soup)
+ institution = get_institution(soup)
+ year = get_year(soup)
+ advisors = get_advisors(soup)
+ descendants = get_descendants(soup)
+ return [name, institution, year, advisors, descendants]
+
+
+def has_record(soup):
+ """Return True if the input tree contains a mathematician record and False
+ otherwise."""
+ return not soup.firstText().text == u"You have specified an ID that does \
+not exist in the database. Please back up and try again."
+
+
+def get_name(soup):
+ """Extract the name from the given tree."""
+ return soup.find('h2').getText()
+
+
+def get_institution(soup):
+ """Return institution name (or None, if there is no institution name)."""
+ institution = soup.find('div', style="line-height: 30px; \
text-align: center; margin-bottom: 1ex").find('span').find('span').text
- if institution == u'':
- institution = None
+ if institution == u'':
+ institution = None
+ return institution
- # Get graduation year, if present.
- inst_year = soup.find('div', style="line-height: 30px; text-align: \
+
+def get_year(soup):
+ """Return graduation year (or None, if there is no graduation year)."""
+ inst_year = soup.find('div', style="line-height: 30px; text-align: \
center; margin-bottom: 1ex").find('span').contents[-1].strip()
- if inst_year.isdigit():
- year = int(inst_year)
- else:
- year = None
-
- # Get advisor IDs.
- advisors = set([extract_id(info.findNext()) for info in
- soup.findAll(text=re.compile('Advisor'))
- if 'Advisor: Unknown' not in info])
-
- # Get descendant IDs.
- if soup.find('table') is not None:
- descendants = set([extract_id(info) for info in
- soup.find('table').findAll('a')])
- else:
- descendants = set([])
-
- return [name, institution, year, advisors, descendants]
+ if inst_year.isdigit():
+ return int(inst_year)
+ else:
+ return None
+
+
+def get_advisors(soup):
+ """Return the set of advisors."""
+ return set([extract_id(info.findNext()) for info in
+ soup.findAll(text=re.compile('Advisor'))
+ if 'Advisor: Unknown' not in info])
+
+
+def get_descendants(soup):
+ """Return the set of descendants."""
+ if soup.find('table') is not None:
+ return set([extract_id(info) for info in
+ soup.find('table').findAll('a')])
+ else:
+ return set([])
def extract_id(tag):
diff --git a/tests/geneagrapher/test_grabber_methods.py b/tests/geneagrapher/test_grabber_methods.py
index 2c6a967..621c5c9 100644
--- a/tests/geneagrapher/test_grabber_methods.py
+++ b/tests/geneagrapher/test_grabber_methods.py
@@ -1,18 +1,22 @@
+import os
+import sys
+from BeautifulSoup import BeautifulSoup
import unittest
-from geneagrapher.grabber import Grabber
+from geneagrapher.grabber import *
class TestGrabberMethods(unittest.TestCase):
- """Unit tests for the Grabber class."""
- def setUp(self):
- self.grabber = Grabber()
-
def test_init(self):
"""Test constructor."""
- self.assertIsInstance(self.grabber, Grabber)
+ grabber = Grabber()
+ self.assertIsInstance(grabber, Grabber)
+
+ def data_file(self, filename):
+ """Return the absolute path to the data file with given name."""
+ return os.path.join(self.data_path, filename)
def test_get_record_bad(self):
- """Verify exception thrown for bad id."""
+ """Verify exception thrown from get_record() method for bad id."""
grabber = Grabber()
self.assertRaises(ValueError, grabber.get_record, 999999999)
@@ -25,8 +29,9 @@ class TestGrabberMethods(unittest.TestCase):
def test_get_record_all_fields(self):
"""Test the get_record() method for a record containing all fields."""
+ grabber = Grabber()
[name, institution, year, advisors,
- descendents] = self.grabber.get_record(18231)
+ descendents] = grabber.get_record(18231)
self.assertEqual(name, u"Carl Friedrich Gau\xdf")
self.assertEqual(institution, u"Universit\xe4t Helmstedt")
self.assertEqual(year, 1799)
@@ -34,9 +39,27 @@ class TestGrabberMethods(unittest.TestCase):
self.assertEqual(descendents, set([18603, 18233, 62547, 29642, 55175,
29458, 19953, 18232, 151876]))
- # Verify calling get_record() twice does not have side effect.
+ def test_get_record_from_tree_bad(self):
+ """Verify exception thrown from get_record_from_tree() method for bad
+ id."""
+ with open(self.data_file('999999999.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertRaises(ValueError, get_record_from_tree, soup, 999999999)
+
+ try:
+ get_record_from_tree(soup, 999999999)
+ except ValueError as e:
+ self.assertEqual(str(e), "Invalid id 999999999")
+ else:
+ self.fail()
+
+ def test_get_record_from_tree_all_fields(self):
+ """Test the get_record_from_tree() method for a record containing all
+ fields."""
+ with open(self.data_file('18231.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
[name, institution, year, advisors,
- descendents] = self.grabber.get_record(18231)
+ descendents] = get_record_from_tree(soup, 18231)
self.assertEqual(name, u"Carl Friedrich Gau\xdf")
self.assertEqual(institution, u"Universit\xe4t Helmstedt")
self.assertEqual(year, 1799)
@@ -44,79 +67,113 @@ class TestGrabberMethods(unittest.TestCase):
self.assertEqual(descendents, set([18603, 18233, 62547, 29642, 55175,
29458, 19953, 18232, 151876]))
- def test_get_record_no_advisor(self):
- """Test the get_record() method for a record with no advisor."""
- grabber = Grabber()
- [name, institution, year, advisors,
- descendents] = grabber.get_record(137717)
- self.assertEqual(name, u"Valentin Alberti")
- self.assertEqual(institution, u"Universit\xe4t Leipzig")
- self.assertEqual(year, 1678)
- self.assertEqual(advisors, set([]))
- self.assertEqual(descendents, set([127946]))
-
- def test_get_record_no_descendants(self):
- """Test the get_record() method for a record with no descendants."""
- # This is currently identical to the get_record_no_year test.
- grabber = Grabber()
- [name, institution, year, advisors,
- descendents] = grabber.get_record(53658)
- self.assertEqual(name, u"S. Cingolani")
- self.assertEqual(institution, u"Scuola Normale Superiore di Pisa")
- self.assertEqual(year, None)
- self.assertEqual(advisors, set([51261]))
- self.assertEqual(descendents, set([]))
-
- def test_get_record_no_year(self):
- """Test the get_record() method for a record with no year."""
- # This example also has no descendents.
- grabber = Grabber()
- [name, institution, year, advisors,
- descendents] = grabber.get_record(53658)
- self.assertEqual(name, u"S. Cingolani")
- self.assertEqual(institution, u"Scuola Normale Superiore di Pisa")
- self.assertEqual(year, None)
- self.assertEqual(advisors, set([51261]))
- self.assertEqual(descendents, set([]))
-
- def test_get_record_no_inst(self):
- """Test the get_record() method for a record with no institution."""
- # This test is also missing additional information already tested.
- grabber = Grabber()
- [name, institution, year, advisors,
- descendents] = grabber.get_record(52965)
- self.assertEqual(name, u"Walter Mayer")
- self.assertEqual(institution, None)
- self.assertEqual(year, None)
- self.assertEqual(advisors, set([]))
- self.assertEqual(descendents, set([52996]))
-
- # Tests for special (from my point of view) characters:
- def test_slash_l(self):
- """Test the get_record() method for a record containing a slash l
- character. Example:
- http://www.genealogy.math.ndsu.nodak.edu/id.php?id=7383."""
- grabber = Grabber()
- [name, institution, year, advisors,
- descendents] = grabber.get_record(7383)
- self.assertEqual(name, u"W\u0142adys\u0142aw Hugo Dyonizy Steinhaus")
- self.assertEqual(institution,
- u"Georg-August-Universit\xe4t G\xf6ttingen")
- self.assertEqual(year, 1911)
- self.assertEqual(advisors, set([7298]))
- self.assertEqual(descendents, set([12681, 28292, 10275, 79297,
- 36991, 17851, 127470, 51907,
- 15165, 89841, 84016]))
-
- def test_multiple_advisors(self):
- """Test for multiple advisors."""
- grabber = Grabber()
+ # Verify calling get_record_from_tree() twice does not have side
+ # effect.
[name, institution, year, advisors,
- descendents] = grabber.get_record(19964)
- self.assertEqual(name, u"Rudolf Otto Sigismund Lipschitz")
- self.assertEqual(institution, u"Universit\xe4t Berlin")
- self.assertEqual(year, 1853)
- self.assertEqual(advisors, set([17946, 47064]))
+ descendents] = get_record_from_tree(soup, 18231)
+ self.assertEqual(name, u"Carl Friedrich Gau\xdf")
+ self.assertEqual(institution, u"Universit\xe4t Helmstedt")
+ self.assertEqual(year, 1799)
+ self.assertEqual(advisors, set([18230]))
+ self.assertEqual(descendents, set([18603, 18233, 62547, 29642, 55175,
+ 29458, 19953, 18232, 151876]))
+
+ def test_has_record_true(self):
+ """Test the has_record() method with a tree containing a
+ mathematician record."""
+ with open(self.data_file('137717.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertTrue(has_record(soup))
+
+ def test_has_record_false(self):
+ """Test the record_exists() method with a tree not containing a
+ mathematician record."""
+ with open(self.data_file('137717.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertTrue(has_record(soup))
+
+ def test_get_name(self):
+ """Test the get_name() method."""
+ with open(self.data_file('137717.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(u"Valentin Alberti", get_name(soup))
+
+ def test_get_name_slash_l(self):
+ """Test the get_name() method for a record containing a non-ASCII
+ character."""
+ with open(self.data_file('7383.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(u"W\u0142adys\u0142aw Hugo Dyonizy Steinhaus",
+ get_name(soup))
+
+ def test_get_institution(self):
+ """Test the get_institution() method for a record with an
+ institution."""
+ with open(self.data_file('137717.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(u"Universit\xe4t Leipzig", get_institution(soup))
+
+ def test_get_institution_no_institution(self):
+ """Test the get_institution() method for a record with no
+ institution."""
+ with open(self.data_file('52965.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertIsNone(get_institution(soup))
+
+ def test_get_year(self):
+ """Test the get_year() method for a record with a graduation year."""
+ with open(self.data_file('137717.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(get_year(soup), 1678)
+
+ def test_get_year_no_year(self):
+ """Test the get_year() method for a record with no graduation year."""
+ with open(self.data_file('53658.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertIsNone(get_year(soup))
+
+ def test_get_advisors(self):
+ """Test the get_advisors() method for a record with advisors."""
+ with open(self.data_file('18231.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(get_advisors(soup), set([18230]))
+
+ def test_get_advisors_multiple(self):
+ """Test the get_advisors() method for a record with multiple
+ advisors."""
+ with open(self.data_file('19964.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(get_advisors(soup), set([17946, 47064]))
+
+ def test_get_advisors_no_advisors(self):
+ """Test the get_advisors() method for a record with no advisors."""
+ with open(self.data_file('137717.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(get_advisors(soup), set([]))
+
+ def test_get_descendants(self):
+ """Test the get_descendants() method for a record with descendants."""
+ expected_descendants = set([18603, 18233, 62547, 29642, 55175, 29458,
+ 19953, 18232, 151876])
+ with open(self.data_file('18231.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(get_descendants(soup), expected_descendants)
+
+ def test_get_descendants_no_descendants(self):
+ """Test the get_descendants() method for a record with no
+ descendants."""
+ with open(self.data_file('53658.html'), 'r') as fin:
+ soup = BeautifulSoup(fin, convertEntities='html')
+ self.assertEqual(get_descendants(soup), set([]))
+
if __name__ == '__main__':
+ file_path = os.path.abspath(sys.argv[0])
+ TestGrabberMethods.data_path = os.path.join(os.path.dirname(file_path),
+ 'testdata')
unittest.main()
+else:
+ file_path = os.path.abspath(sys.argv[0])
+ TestGrabberMethods.data_path = os.path.join(os.path.dirname(file_path),
+ 'tests', 'geneagrapher',
+ 'testdata')
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/geneagrapher.git
More information about the debian-science-commits
mailing list