[geneagrapher] 08/226: * Minor changes to the Grabber class. Extract method now returns extracted information. * Working on Geneagrapher class. Not quite finished yet. This applies to ticket #6.
Doug Torrance
dtorrance-guest at moszumanska.debian.org
Sat Jul 11 17:10:31 UTC 2015
This is an automated email from the git hooks/post-receive script.
dtorrance-guest pushed a commit to branch master
in repository geneagrapher.
commit bfed51f30a24b68f98272fcf88bc0507269d4bba
Author: David Alber <alber.david at gmail.com>
Date: Sun Apr 27 06:29:46 2008 +0000
* Minor changes to the Grabber class. Extract method now returns extracted information.
* Working on Geneagrapher class. Not quite finished yet. This applies to ticket #6.
---
src/geneagraph.py | 119 ----------------------------------------------------
src/geneagrapher.py | 81 +++++++++++++++++++++++++++++++++++
src/grab.py | 6 ++-
src/tests.py | 101 ++++++++++++++++++++++++++++++++++----------
4 files changed, 165 insertions(+), 142 deletions(-)
diff --git a/src/geneagraph.py b/src/geneagraph.py
deleted file mode 100644
index 482eb9b..0000000
--- a/src/geneagraph.py
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/usr/bin/python
-
-import cgi
-import random
-import os
-import time
-from grab import *
-from GGraph import *
-#import cgitb; cgitb.enable() # for debugging, comment out for production
-
-form = cgi.FieldStorage()
-name = form.getfirst("name", "")
-extra = form.getfirst("extra", "")
-nodes = form.getlist("node")
-output = form.getfirst("output", "png")
-
-# Save the input to log file.
-f = open("/var/log/geneagraph", "a")
-f.write(time.strftime('%m/%d/%Y %H:%M:%S'))
-f.write(" ")
-f.write(os.environ['REMOTE_ADDR'])
-f.write("\n")
-if name != "":
- f.write("\tName: ")
- f.write(name)
- f.write("\n")
-if extra != "":
- f.write("\tExtra: ")
- f.write(extra)
- f.write("\n")
-if len(nodes) > 0:
- f.write("\t")
- f.write(str(nodes))
- f.write("\n")
-f.close()
-
-try:
- if len(name) > 100:
- raise ValueError("Name field longer than maximum allowed length (100 characters).")
- if len(extra) > 100:
- raise ValueError("Extra field longer than maximum allowed length (100 characters).")
- if len(nodes) > 5:
- #if len(nodes) > 50:
- raise ValueError("Only five node URLs may be supplied.")
-
-# Replace special characters in name and extra with backslashed form
- name = name.replace('\\', '\\\\')
- name = name.replace('\"', '\\"')
- extra = extra.replace('\\', '\\\\')
- extra = extra.replace('"', '\\"')
-
- record = Record(name, extra, -1, 0)
-
- printHead = True
- if name == "" and extra == "":
- printHead = False
-
- advisors = []
- for index in range(len(nodes)):
- if not nodes[index].isspace():
- if nodes[index].find('id.php?id=') > -1:
- id = nodes[index].split('id.php?id=')[1].strip()
- if id.isdigit():
- advisors.append(int(id))
- else:
- raise ValueError("Node " + str(index+1) + " was improperly formatted.")
- else:
- raise ValueError("Node " + str(index+1) + " was improperly formatted.")
-
-
- node = Node(record, advisors)
- graph = Graph(node, printHead)
-
- for advisor in advisors:
- extractNodeInformation(advisor, graph)
-
- fnum = str(int(random.random()*1000000000000000))
- filename = '/tmp/' + fnum + '.dot'
- graph.writeDotFile(filename)
-
- if output == "dot":
- print "Content-Type: text/html"
- print
- print "<html><body><pre>"
- f = open(filename, "r")
- file = f.read()
- f.close()
- print file
- print "</pre></body></html>"
- elif output == "png" or output == "ps":
- psfilename = '/tmp/' + fnum + '.ps'
- command = '/usr/local/bin/dot -Tps ' + filename + ' -o ' + psfilename
- os.system(command)
- if output == "png":
- pngfilename = '/tmp/' + fnum + '.png'
- command = '/usr/bin/convert -density 144 -geometry 50% ' + psfilename + ' ' + pngfilename
- os.system(command)
- print "Content-type: image/png"
- print "Content-Disposition: attachment; filename=genealogy.png"
- print
- f = open(pngfilename, "r")
- elif output == "ps":
- print "Content-Type: application/postscript"
- print
- f = open(psfilename, "r")
- file = f.read()
- f.close()
- print file
- else: # improper output chosen
- raise ValueError("Return type was improperly formatted. Go back and check it out.")
-
- command = '/bin/rm /tmp/' + fnum + '.*'
- os.system(command)
-
-except ValueError, e:
- print "Content-type: text/html"
- print
- print e, "<br>Go back and check it out."
- raise SystemExit
diff --git a/src/geneagrapher.py b/src/geneagrapher.py
new file mode 100644
index 0000000..f54d478
--- /dev/null
+++ b/src/geneagrapher.py
@@ -0,0 +1,81 @@
+from optparse import OptionParser
+import GGraph
+import grab
+
+class Geneagrapher:
+ """
+ A class for building Graphviz "dot" files for math genealogies
+ extracted from the Mathematics Genealogy Project website.
+ """
+ def __init__(self):
+ self.graph = GGraph.Graph()
+ self.leaf_ids = []
+ self.get_ancestors = True
+ self.get_descendents = False
+ self.write_filename = None
+
+ def parseInput(self):
+ """
+ Parse command-line information.
+ """
+ self.parser = OptionParser()
+
+ self.parser.set_usage("%prog [options] ID ...")
+ self.parser.set_description('Create a Graphviz "dot" file for a mathematics genealogy, where ID is a record identifier from the Mathematics Genealogy Project. Multiple IDs may be passed.')
+
+ self.parser.add_option("-f", "--file", dest="filename", help="write report to FILE [default: stdout]",
+ metavar="FILE", default=None)
+ self.parser.add_option("--without-ancestors", action="store_false", dest="get_ancestors", default=True,
+ help="do not get ancestors of any input IDs")
+ self.parser.add_option("--with-descendents", action="store_true", dest="get_descendents", default=False,
+ help="do not get ancestors of any input IDs")
+
+ (options, args) = self.parser.parse_args()
+
+ if len(args) == 0:
+ raise SyntaxError("%s: error: no record IDs passed" % (self.parser.get_prog_name()))
+
+ self.get_ancestors = options.get_ancestors
+ self.get_descendents = options.get_descendents
+ self.write_filename = options.filename
+ for arg in args:
+ self.leaf_ids.append(int(arg))
+
+ def buildGraph(self):
+ """
+ Populate the graph member by grabbing the mathematician
+ pages and extracting relevant data.
+ """
+ grab_queue = self.leaf_ids
+ while len(grab_queue) != 0:
+ id = grab_queue.pop()
+ if not self.graph.hasNode(id):
+ # Then this information has not yet been grabbed.
+ grabber = grab.Grabber(id)
+ try:
+ [name, institution, year, advisors] = grabber.extractNodeInformation()
+ except ValueError:
+ # The given id does not exist in the Math Genealogy Project's database.
+ raise
+ self.graph.addNode(name, institution, year, id, advisors)
+ if self.get_ancestors:
+ grab_queue += advisors
+
+ def generateDotFile(self):
+ dotfile = self.graph.generateDotFile()
+ if self.write_filename is not None:
+ outfile = open(self.write_filename, "w")
+ outfile.write(dotfile)
+ outfile.close()
+ else:
+ print dotfile
+
+if __name__ == "__main__":
+ geneagrapher = Geneagrapher()
+ try:
+ geneagrapher.parseInput()
+ except SyntaxError, e:
+ print geneagrapher.parser.get_usage()
+ print e
+ geneagrapher.buildGraph()
+ geneagrapher.generateDotFile()
\ No newline at end of file
diff --git a/src/grab.py b/src/grab.py
index d9fd0c3..29fa980 100644
--- a/src/grab.py
+++ b/src/grab.py
@@ -1,7 +1,6 @@
import urllib
import re
from htmlentitydefs import name2codepoint
-from GGraph import *
class Grabber:
"""
@@ -38,12 +37,14 @@ class Grabber:
"""
if self.pagestr is None:
self.getPage()
+
+ self.advisors = []
# Split the page string at newline characters.
psarray = self.pagestr.split('\n')
if psarray[0].find("An error occurred in the forwarding block") > -1:
- # Then a bad URL was given. Throw an exception.
+ # Then a bad URL (e.g., a bad record id) was given. Throw an exception.
msg = "Invalid page address for id %d" % (self.id)
raise ValueError(msg)
@@ -69,3 +70,4 @@ class Grabber:
elif 'Student(s)' in line or 'No students known' in line:
break
+ return [self.name, self.institution, self.year, self.advisors]
diff --git a/src/tests.py b/src/tests.py
index 5fa5824..14b9a75 100644
--- a/src/tests.py
+++ b/src/tests.py
@@ -1,6 +1,8 @@
+import sys
import unittest
import GGraph
import grab
+import geneagrapher
# Unit tests for GGraph.
class TestRecordMethods(unittest.TestCase):
@@ -294,40 +296,97 @@ class TestGrabberMethods(unittest.TestCase):
def test004_extract_info_all_fields(self):
# Test the extractNodeInformation() method for a record containing all fields.
- self.grabber.extractNodeInformation()
- self.assertEquals(self.grabber.name, u"Carl Friedrich Gau\xdf")
- self.assertEquals(self.grabber.institution, u"Universit\xe4t Helmstedt")
- self.assertEquals(self.grabber.year, 1799)
- self.assertEquals(self.grabber.advisors, [18230])
+ [name, institution, year, advisors] = self.grabber.extractNodeInformation()
+ self.assertEquals(name, self.grabber.name)
+ self.assertEquals(institution, self.grabber.institution)
+ self.assertEquals(year, self.grabber.year)
+ self.assertEquals(advisors, self.grabber.advisors)
+ self.assertEquals(name, u"Carl Friedrich Gau\xdf")
+ self.assertEquals(institution, u"Universit\xe4t Helmstedt")
+ self.assertEquals(year, 1799)
+ self.assertEquals(advisors, [18230])
+
+ # Verify calling extractNodeInformation() twice does not have side effect.
+ [name, institution, year, advisors] = self.grabber.extractNodeInformation()
+ self.assertEquals(name, u"Carl Friedrich Gau\xdf")
+ self.assertEquals(institution, u"Universit\xe4t Helmstedt")
+ self.assertEquals(year, 1799)
+ self.assertEquals(advisors, [18230])
def test005_extract_info_no_advisor(self):
# Test the extractNodeInformation() method for a record with no advisor.
grabber = grab.Grabber(21235)
- grabber.extractNodeInformation()
- self.assertEquals(grabber.name, u"Otto Mencke")
- self.assertEquals(grabber.institution, u"Universit\xe4t Leipzig")
- self.assertEquals(grabber.year, 1665)
- self.assertEquals(grabber.advisors, [])
+ [name, institution, year, advisors] = grabber.extractNodeInformation()
+ self.assertEquals(name, u"Otto Mencke")
+ self.assertEquals(institution, u"Universit\xe4t Leipzig")
+ self.assertEquals(year, 1665)
+ self.assertEquals(advisors, [])
def test006_extract_info_no_year(self):
# Test the extractNodeInformation() method for a record with no year.
grabber = grab.Grabber(53658)
- grabber.extractNodeInformation()
- self.assertEquals(grabber.name, u"S. Cingolani")
- self.assertEquals(grabber.institution, u"Universit\xe0 di Pisa")
- self.assertEquals(grabber.year, None)
- self.assertEquals(grabber.advisors, [51261])
+ [name, institution, year, advisors] = grabber.extractNodeInformation()
+ self.assertEquals(name, u"S. Cingolani")
+ self.assertEquals(institution, u"Universit\xe0 di Pisa")
+ self.assertEquals(year, None)
+ self.assertEquals(advisors, [51261])
def test007_extract_info_no_inst(self):
# Test the extractNodeInformation() method for a record with no institution.
# This test is also missing additional information already tested.
grabber = grab.Grabber(52965)
- grabber.extractNodeInformation()
- self.assertEquals(grabber.name, u"Walter Mayer")
- self.assertEquals(grabber.institution, None)
- self.assertEquals(grabber.year, None)
- self.assertEquals(grabber.advisors, [])
+ [name, institution, year, advisors] = grabber.extractNodeInformation()
+ self.assertEquals(name, u"Walter Mayer")
+ self.assertEquals(institution, None)
+ self.assertEquals(year, None)
+ self.assertEquals(advisors, [])
+
+class TestGeneagrapherMethods(unittest.TestCase):
+ """
+ Unit tests for the geneagrapher.Geneagrapher class.
+ """
+ def setUp(self):
+ self.ggrapher = geneagrapher.Geneagrapher()
+ def test001_init(self):
+ # Test constructor.
+ self.assertEquals(isinstance(self.ggrapher.graph, GGraph.Graph), True)
+ self.assertEquals(self.ggrapher.leaf_ids, [])
+ self.assertEquals(self.ggrapher.get_ancestors, True)
+ self.assertEquals(self.ggrapher.get_descendents, False)
+ self.assertEquals(self.ggrapher.write_filename, None)
+
+ def test002_parse_empty(self):
+ # Test parseInput() with no arguments.
+ sys.argv = ['geneagrapher']
+ self.assertRaises(SyntaxError, self.ggrapher.parseInput)
+
+ def test003_parse_default(self):
+ # Test parseInput() with no options.
+ sys.argv = ['geneagrapher', '3']
+ self.ggrapher.get_ancestors = False
+ self.ggrapher.get_descendents = True
+ self.ggrapher.write_filename = "filler"
+ self.ggrapher.parseInput()
+ self.assertEquals(self.ggrapher.get_ancestors, True)
+ self.assertEquals(self.ggrapher.get_descendents, False)
+ self.assertEquals(self.ggrapher.write_filename, None)
+ self.assertEquals(self.ggrapher.leaf_ids, [3])
+
+ def test004_parse_options(self):
+ # Test parseInput() with options.
+ sys.argv = ['geneagrapher', '--without-ancestors', '--with-descendents', '--file=filler', '3', '43']
+ self.ggrapher.parseInput()
+ self.assertEquals(self.ggrapher.get_ancestors, False)
+ self.assertEquals(self.ggrapher.get_descendents, True)
+ self.assertEquals(self.ggrapher.write_filename, "filler")
+ self.assertEquals(self.ggrapher.leaf_ids, [3, 43])
if __name__ == '__main__':
- unittest.main()
\ No newline at end of file
+ suite = unittest.TestSuite()
+ #suite.addTest(unittest.makeSuite(TestRecordMethods))
+ #suite.addTest(unittest.makeSuite(TestNodeMethods))
+ #suite.addTest(unittest.makeSuite(TestGraphMethods))
+ suite.addTest(unittest.makeSuite(TestGrabberMethods))
+ suite.addTest(unittest.makeSuite(TestGeneagrapherMethods))
+ unittest.TextTestRunner(verbosity=1).run(suite)
\ No newline at end of file
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/geneagrapher.git
More information about the debian-science-commits
mailing list