[geneagrapher] 08/226: * Minor changes to the Grabber class. Extract method now returns extracted information. * Working on Geneagrapher class. Not quite finished yet. This applies to ticket #6.

Doug Torrance dtorrance-guest at moszumanska.debian.org
Sat Jul 11 17:10:31 UTC 2015


This is an automated email from the git hooks/post-receive script.

dtorrance-guest pushed a commit to branch master
in repository geneagrapher.

commit bfed51f30a24b68f98272fcf88bc0507269d4bba
Author: David Alber <alber.david at gmail.com>
Date:   Sun Apr 27 06:29:46 2008 +0000

     * Minor changes to the Grabber class. Extract method now returns extracted information.
     * Working on Geneagrapher class. Not quite finished yet. This applies to ticket #6.
---
 src/geneagraph.py   | 119 ----------------------------------------------------
 src/geneagrapher.py |  81 +++++++++++++++++++++++++++++++++++
 src/grab.py         |   6 ++-
 src/tests.py        | 101 ++++++++++++++++++++++++++++++++++----------
 4 files changed, 165 insertions(+), 142 deletions(-)

diff --git a/src/geneagraph.py b/src/geneagraph.py
deleted file mode 100644
index 482eb9b..0000000
--- a/src/geneagraph.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/python
-
-import cgi
-import random
-import os
-import time
-from grab import *
-from GGraph import *
-#import cgitb; cgitb.enable() # for debugging, comment out for production
-
-form = cgi.FieldStorage()
-name = form.getfirst("name", "")
-extra = form.getfirst("extra", "")
-nodes = form.getlist("node")
-output = form.getfirst("output", "png")
-
-# Save the input to log file.
-f = open("/var/log/geneagraph", "a")
-f.write(time.strftime('%m/%d/%Y %H:%M:%S'))
-f.write(" ")
-f.write(os.environ['REMOTE_ADDR'])
-f.write("\n")
-if name != "":
-	f.write("\tName: ")
-	f.write(name)
-	f.write("\n")
-if extra != "":
-	f.write("\tExtra: ")
-	f.write(extra)
-	f.write("\n")
-if len(nodes) > 0:
-	f.write("\t")
-	f.write(str(nodes))
-	f.write("\n")
-f.close()
-
-try:
-	if len(name) > 100:
-		raise ValueError("Name field longer than maximum allowed length (100 characters).")
-	if len(extra) > 100:
-		raise ValueError("Extra field longer than maximum allowed length (100 characters).")
-	if len(nodes) > 5:
-	#if len(nodes) > 50:
-		raise ValueError("Only five node URLs may be supplied.")
-
-# Replace special characters in name and extra with backslashed form
-	name = name.replace('\\', '\\\\')
-	name = name.replace('\"', '\\"')
-	extra = extra.replace('\\', '\\\\')
-	extra = extra.replace('"', '\\"')
-
-	record = Record(name, extra, -1, 0)
-
-	printHead = True
-	if name == "" and extra == "":
-		printHead = False
-
-	advisors = []
-	for index in range(len(nodes)):
-		if not nodes[index].isspace():
-			if nodes[index].find('id.php?id=') > -1:
-				id = nodes[index].split('id.php?id=')[1].strip()
-				if id.isdigit():
-					advisors.append(int(id))
-				else:
-					raise ValueError("Node " + str(index+1) + " was improperly formatted.")
-			else:
-				raise ValueError("Node " + str(index+1) + " was improperly formatted.")
-
-		
-	node = Node(record, advisors)
-	graph = Graph(node, printHead)
-
-	for advisor in advisors:
-		extractNodeInformation(advisor, graph)
-
-	fnum = str(int(random.random()*1000000000000000))
-	filename = '/tmp/' + fnum + '.dot'
-	graph.writeDotFile(filename)
-
-	if output == "dot":
-		print "Content-Type: text/html"
-		print
-		print "<html><body><pre>"
-		f = open(filename, "r")
-		file = f.read()
-		f.close()
-		print file
-		print "</pre></body></html>"
-	elif output == "png" or output == "ps":
-		psfilename = '/tmp/' + fnum + '.ps'
-		command = '/usr/local/bin/dot -Tps ' + filename + ' -o ' + psfilename
-		os.system(command)
-		if output == "png":
-			pngfilename = '/tmp/' + fnum + '.png'
-			command = '/usr/bin/convert -density 144 -geometry 50% ' + psfilename + ' ' + pngfilename
-			os.system(command)
-			print "Content-type: image/png"
-			print "Content-Disposition: attachment; filename=genealogy.png"
-			print
-			f = open(pngfilename, "r")
-		elif output == "ps":
-			print "Content-Type: application/postscript"
-			print
-			f = open(psfilename, "r")
-		file = f.read()
-		f.close()
-		print file
-	else: # improper output chosen
-		raise ValueError("Return type was improperly formatted. Go back and check it out.")
-
-	command = '/bin/rm /tmp/' + fnum + '.*'
-	os.system(command)
-
-except ValueError, e:
-	print "Content-type: text/html"
-	print
-	print e, "<br>Go back and check it out."
-	raise SystemExit
diff --git a/src/geneagrapher.py b/src/geneagrapher.py
new file mode 100644
index 0000000..f54d478
--- /dev/null
+++ b/src/geneagrapher.py
@@ -0,0 +1,81 @@
+from optparse import OptionParser
+import GGraph
+import grab
+
+class Geneagrapher:
+	"""
+	A class for building Graphviz "dot" files for math genealogies
+	extracted from the Mathematics Genealogy Project website.
+	"""
+	def __init__(self):
+		self.graph = GGraph.Graph()
+		self.leaf_ids = []
+		self.get_ancestors = True
+		self.get_descendents = False
+		self.write_filename = None
+
+	def parseInput(self):
+		"""
+		Parse command-line information.
+		"""
+		self.parser = OptionParser()
+
+		self.parser.set_usage("%prog [options] ID ...")
+		self.parser.set_description('Create a Graphviz "dot" file for a mathematics genealogy, where ID is a record identifier from the Mathematics Genealogy Project. Multiple IDs may be passed.')
+
+		self.parser.add_option("-f", "--file", dest="filename", help="write report to FILE [default: stdout]",
+							   metavar="FILE", default=None)
+		self.parser.add_option("--without-ancestors", action="store_false", dest="get_ancestors", default=True,
+						  help="do not get ancestors of any input IDs")
+		self.parser.add_option("--with-descendents", action="store_true", dest="get_descendents", default=False,
+						  help="do not get ancestors of any input IDs")
+
+		(options, args) = self.parser.parse_args()
+		
+		if len(args) == 0:
+			raise SyntaxError("%s: error: no record IDs passed" % (self.parser.get_prog_name()))
+		
+		self.get_ancestors = options.get_ancestors
+		self.get_descendents = options.get_descendents
+		self.write_filename = options.filename
+		for arg in args:
+			self.leaf_ids.append(int(arg))
+		
+	def buildGraph(self):
+		"""
+		Populate the graph member by grabbing the mathematician
+		pages and extracting relevant data.
+		"""
+		grab_queue = self.leaf_ids
+		while len(grab_queue) != 0:
+			id = grab_queue.pop()
+			if not self.graph.hasNode(id):
+				# Then this information has not yet been grabbed.
+				grabber = grab.Grabber(id)
+				try:
+					[name, institution, year, advisors] = grabber.extractNodeInformation()
+				except ValueError:
+					# The given id does not exist in the Math Genealogy Project's database.
+					raise
+				self.graph.addNode(name, institution, year, id, advisors)
+				if self.get_ancestors:
+					grab_queue += advisors
+					
+	def generateDotFile(self):
+		dotfile = self.graph.generateDotFile()
+		if self.write_filename is not None:
+			outfile = open(self.write_filename, "w")
+			outfile.write(dotfile)
+			outfile.close()
+		else:
+			print dotfile
+		
+if __name__ == "__main__":
+	geneagrapher = Geneagrapher()
+	try:
+		geneagrapher.parseInput()
+	except SyntaxError, e:
+		print geneagrapher.parser.get_usage()
+		print e
+	geneagrapher.buildGraph()
+	geneagrapher.generateDotFile()
\ No newline at end of file
diff --git a/src/grab.py b/src/grab.py
index d9fd0c3..29fa980 100644
--- a/src/grab.py
+++ b/src/grab.py
@@ -1,7 +1,6 @@
 import urllib
 import re
 from htmlentitydefs import name2codepoint
-from GGraph import *
 
 class Grabber:
     """
@@ -38,12 +37,14 @@ class Grabber:
         """
         if self.pagestr is None:
             self.getPage()
+            
+        self.advisors = []
 
         # Split the page string at newline characters.
         psarray = self.pagestr.split('\n')
         
         if psarray[0].find("An error occurred in the forwarding block") > -1:
-            # Then a bad URL was given. Throw an exception.
+            # Then a bad URL (e.g., a bad record id) was given. Throw an exception.
             msg = "Invalid page address for id %d" % (self.id)
             raise ValueError(msg)
 
@@ -69,3 +70,4 @@ class Grabber:
 
             elif 'Student(s)' in line or 'No students known' in line:
                 break
+        return [self.name, self.institution, self.year, self.advisors]
diff --git a/src/tests.py b/src/tests.py
index 5fa5824..14b9a75 100644
--- a/src/tests.py
+++ b/src/tests.py
@@ -1,6 +1,8 @@
+import sys
 import unittest
 import GGraph
 import grab
+import geneagrapher
 
 # Unit tests for GGraph.
 class TestRecordMethods(unittest.TestCase):
@@ -294,40 +296,97 @@ class TestGrabberMethods(unittest.TestCase):
         
     def test004_extract_info_all_fields(self):
         # Test the extractNodeInformation() method for a record containing all fields.
-        self.grabber.extractNodeInformation()
-        self.assertEquals(self.grabber.name, u"Carl Friedrich Gau\xdf")
-        self.assertEquals(self.grabber.institution, u"Universit\xe4t Helmstedt")
-        self.assertEquals(self.grabber.year, 1799)
-        self.assertEquals(self.grabber.advisors, [18230])
+        [name, institution, year, advisors] = self.grabber.extractNodeInformation()
+        self.assertEquals(name, self.grabber.name)
+        self.assertEquals(institution, self.grabber.institution)
+        self.assertEquals(year, self.grabber.year)
+        self.assertEquals(advisors, self.grabber.advisors)
+        self.assertEquals(name, u"Carl Friedrich Gau\xdf")
+        self.assertEquals(institution, u"Universit\xe4t Helmstedt")
+        self.assertEquals(year, 1799)
+        self.assertEquals(advisors, [18230])
+        
+        # Verify calling extractNodeInformation() twice does not have side effect.
+        [name, institution, year, advisors] = self.grabber.extractNodeInformation()
+        self.assertEquals(name, u"Carl Friedrich Gau\xdf")
+        self.assertEquals(institution, u"Universit\xe4t Helmstedt")
+        self.assertEquals(year, 1799)
+        self.assertEquals(advisors, [18230])
         
     def test005_extract_info_no_advisor(self):
         # Test the extractNodeInformation() method for a record with no advisor.
         grabber = grab.Grabber(21235)
-        grabber.extractNodeInformation()
-        self.assertEquals(grabber.name, u"Otto  Mencke")
-        self.assertEquals(grabber.institution, u"Universit\xe4t Leipzig")
-        self.assertEquals(grabber.year, 1665)
-        self.assertEquals(grabber.advisors, [])
+        [name, institution, year, advisors] = grabber.extractNodeInformation()
+        self.assertEquals(name, u"Otto  Mencke")
+        self.assertEquals(institution, u"Universit\xe4t Leipzig")
+        self.assertEquals(year, 1665)
+        self.assertEquals(advisors, [])
         
     def test006_extract_info_no_year(self):
         # Test the extractNodeInformation() method for a record with no year.
         grabber = grab.Grabber(53658)
-        grabber.extractNodeInformation()
-        self.assertEquals(grabber.name, u"S.  Cingolani")
-        self.assertEquals(grabber.institution, u"Universit\xe0 di Pisa")
-        self.assertEquals(grabber.year, None)
-        self.assertEquals(grabber.advisors, [51261])
+        [name, institution, year, advisors] = grabber.extractNodeInformation()
+        self.assertEquals(name, u"S.  Cingolani")
+        self.assertEquals(institution, u"Universit\xe0 di Pisa")
+        self.assertEquals(year, None)
+        self.assertEquals(advisors, [51261])
         
     def test007_extract_info_no_inst(self):
         # Test the extractNodeInformation() method for a record with no institution.
         # This test is also missing additional information already tested.
         grabber = grab.Grabber(52965)
-        grabber.extractNodeInformation()
-        self.assertEquals(grabber.name, u"Walter  Mayer")
-        self.assertEquals(grabber.institution, None)
-        self.assertEquals(grabber.year, None)
-        self.assertEquals(grabber.advisors, [])
+        [name, institution, year, advisors] = grabber.extractNodeInformation()
+        self.assertEquals(name, u"Walter  Mayer")
+        self.assertEquals(institution, None)
+        self.assertEquals(year, None)
+        self.assertEquals(advisors, [])
+        
+class TestGeneagrapherMethods(unittest.TestCase):
+    """
+    Unit tests for the geneagrapher.Geneagrapher class.
+    """
+    def setUp(self):
+        self.ggrapher = geneagrapher.Geneagrapher()
         
+    def test001_init(self):
+        # Test constructor.
+        self.assertEquals(isinstance(self.ggrapher.graph, GGraph.Graph), True)
+        self.assertEquals(self.ggrapher.leaf_ids, [])
+        self.assertEquals(self.ggrapher.get_ancestors, True)
+        self.assertEquals(self.ggrapher.get_descendents, False)
+        self.assertEquals(self.ggrapher.write_filename, None)
+        
+    def test002_parse_empty(self):
+        # Test parseInput() with no arguments.
+        sys.argv = ['geneagrapher']
+        self.assertRaises(SyntaxError, self.ggrapher.parseInput)
+        
+    def test003_parse_default(self):
+        # Test parseInput() with no options.
+        sys.argv = ['geneagrapher', '3']
+        self.ggrapher.get_ancestors = False
+        self.ggrapher.get_descendents = True
+        self.ggrapher.write_filename = "filler"
+        self.ggrapher.parseInput()
+        self.assertEquals(self.ggrapher.get_ancestors, True)
+        self.assertEquals(self.ggrapher.get_descendents, False)
+        self.assertEquals(self.ggrapher.write_filename, None)
+        self.assertEquals(self.ggrapher.leaf_ids, [3])
+
+    def test004_parse_options(self):
+        # Test parseInput() with options.
+        sys.argv = ['geneagrapher', '--without-ancestors', '--with-descendents', '--file=filler', '3', '43']
+        self.ggrapher.parseInput()
+        self.assertEquals(self.ggrapher.get_ancestors, False)
+        self.assertEquals(self.ggrapher.get_descendents, True)
+        self.assertEquals(self.ggrapher.write_filename, "filler")
+        self.assertEquals(self.ggrapher.leaf_ids, [3, 43])
 
 if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+    suite = unittest.TestSuite()
+    #suite.addTest(unittest.makeSuite(TestRecordMethods))
+    #suite.addTest(unittest.makeSuite(TestNodeMethods))
+    #suite.addTest(unittest.makeSuite(TestGraphMethods))
+    suite.addTest(unittest.makeSuite(TestGrabberMethods))
+    suite.addTest(unittest.makeSuite(TestGeneagrapherMethods))
+    unittest.TextTestRunner(verbosity=1).run(suite)
\ No newline at end of file

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/geneagrapher.git



More information about the debian-science-commits mailing list