Vocabulary browser

Jesse Weinstein jesse at wefu.org
Sun Feb 14 08:36:29 UTC 2010


On Sun, 2010-02-07 at 09:49 -0200, Tássia Camões wrote:
> I'm developing the script to generate the webpages that will present
> the vocabulary information.
> The code is on alioth: svn+ssh://svn.debian.org/svn/debtags/vocabulary-browser
I felt like doing some refactoring, see below; hope you don't mind.
I'll explain what I did, if needed, later.  I need to go to sleep about
now.

I also wrote a few tests to make me feel more comfortable doing the
refactoring... they are included at the bottom of this message, along
with the test file "vocabulary_test", which is just the first facet from
the "vocabulary" file.

> I also think it's worthy to review the whole vocabulary file, because
> some informations seem inconsistent.
I certainly agree that needs to be done!

BTW, the didn't CC the list on the most recent message you sent to me;
you might want to forward a copy to the list...
 
Jesse

-----------------------------

Index: vocabulary.py
===================================================================
--- vocabulary.py	(revision 2598)
+++ vocabulary.py	(working copy)
@@ -3,121 +3,114 @@
 import re
 import os
 
-class Facet:
-
-    def __init__(self,name):
+class PileOfAttributes:
+    def __eq__(self, other):
+        return self.__dict__==other.__dict__
+    def __ne__(self, other):
+        return self.__dict__!=other.__dict__
+    
+    def __repr__(self):
+        return '%s(%s)' % ((self.__module__ != '__main__' and
+                            self.__module__+'.' or '')+
+                           self.__class__.__name__,
+                           ', '.join(
+            [('%s=%s' % (x[0], `x[1]`)) for x in
self.__dict__.items()]))
+    
+class Facet(PileOfAttributes):
+    def __init__(self,name, **kargs):
         self.name = name
-        self.status = ''
-        self.nature = ''
-        self.responsible = ''
-        self.comment = ''
-        self.description = ''
-        self.help = ''
-        self.tags = []
+        for x in ('status', 'nature', 'responsible', 'comment',
+                  'description', 'help'):
+            setattr(self, x, kargs.get(x, ''))
+        self.tags = kargs.get('tags', [])
 
     def show(self):
-        print "Facet: %s" % (self.name)
+        print str(self)
+    def __str__(self):
+        txt=["Facet: %s" % (self.name)]
         if self.status:
-            print "Status: %s" % (self.status)
+            txt.append("Status: %s" % (self.status))
         if self.nature:
-            print "Nature: %s" % (self.nature)
+            txt.append("Nature: %s" % (self.nature))
         if self.responsible:
-            print "Responsible: %s" % (self.responsible)
+            txt.append("Responsible: %s" % (self.responsible))
         if self.comment:
-            print "Comment: %s" % (self.comment)
+            txt.append("Comment: %s" % (self.comment))
         if self.description:
-            print "Description: %s" % (self.description)
+            txt.append("Description: %s" % (self.description))
         if self.help:
-            print self.help
+            txt.append(self.help)
         else:
-            print ""
+            txt.append("")
+        return '\n'.join(txt)
+    
+class Tag(PileOfAttributes):
 
-class Tag:
-
-    def __init__(self,name):
+    def __init__(self,name, **kargs):
         self.name = name
-        self.responsible = ''
-        self.implies = ''
-        self.flags = ''
-        self.description = ''
-        self.help = '' 
+        for x in ('responsible', 'implies', 'flags',
+                  'description', 'help'):
+            setattr(self, x, kargs.get(x, ''))
 
-    def show(self):
-        print "Tag: %s" % (self.name)
+    def __str__(self):
+        txt=["Tag: %s" % (self.name)]
         if self.responsible:
-            print "Responsible: %s" % (self.responsible)
+            txt.append("Responsible: %s" % (self.responsible))
         if self.implies:
-            print "Implies: %s" % (self.implies)
+            txt.append("Implies: %s" % (self.implies))
         if self.description:
-            print "Description: %s" % (self.description)
+            txt.append("Description: %s" % (self.description))
         if self.flags:
-            print "Flags: %s" % (self.flags)
+            txt.append("Flags: %s" % (self.flags))
         if self.help:
-            print self.help
+            txt.append(self.help)
         else:
-            print ""
-
+            txt.append("")
+        return '\n'.join(txt)
+    
+    def show(self):
+        print str(self)
+        
 class Vocabulary:
   
     def __init__(self, f): 
         self.source = open(f, 'r')
         self.facets = []
+        self.facet_pat = re.compile("""Facet: (.*)\s(^Status:
(?P<status>(.*))\s)?(^Nature: (?P<nature>(.*))\s)?(^Responsible:
(?P<responsible>(.*))\s)?(^Comment: (?P<comment>(.*))\s)?(^Description:
(?P<description>(.*))\s)?(^ (?P<help>(.*))\s)?""", re.MULTILINE)
+        self.tag_pat = re.compile("""Tag: (.*)\s(^Responsible:
(?P<responsible>(.*))\s)?(^Implies: (?P<implies>(.*))\s)?(^Description:
(?P<description>(.*))\s)?(^Flags: (?P<flags>(.*))\s)?(^
(?P<help>(.*))\s)?""", re.MULTILINE)
 
     def load(self):
-        source = open('vocabulary', 'r')
-        for line in source:
+        for line in self.source:
            if line.strip():
                block = ""
                while line.strip():
                    block +=line
-                   line = source.next()
+                   line = self.source.next()
                if block.startswith('Facet:'):
-                   regex = """Facet: (.*)\s(^Status:
(?P<status>(.*))\s)?(^Nature: (?P<nature>(.*))\s)?(^Responsible:
(?P<responsible>(.*))\s)?(^Comment: (?P<comment>(.*))\s)?(^Description:
(?P<description>(.*))\s)?(^ (?P<help>(.*))\s)?"""
-                   pattern = re.compile(regex,re.MULTILINE)
-                   m = pattern.match(block)
-                   try:
-                       facet = Facet(m.group(1).strip())
-                       if m.group('status'):
-                           facet.status = m.group('status')
-                       if m.group('nature'):
-                           facet.nature = m.group('nature')
-                       if m.group('responsible'):
-                           facet.responsible = m.group('responsible')
-                       if m.group('comment'):
-                           facet.comment = m.group('comment')
-                       if m.group('description'):
-                           facet.description = m.group('description')
-                       if m.group('help'):
-                           help = re.findall("\s (.*)",block)
-                           for line in help:
-                               facet.help += " "+line+"\n"
-                       #facet.show()
-                       self.facets.append(facet)
-                   except:
-                       print "Facet: Bad format block"
+                   self.facets.append(self.parse_block(
+                       self.facet_pat, Facet,
+                       ('status', 'nature', 'responsible', 'comment',
'description'),
+                       block))
                if block.startswith('Tag:'):
-                   regex = """Tag: (.*)\s(^Responsible:
(?P<responsible>(.*))\s)?(^Implies: (?P<implies>(.*))\s)?(^Description:
(?P<description>(.*))\s)?(^Flags: (?P<flags>(.*))\s)?(^
(?P<help>(.*))\s)?"""
-                   pattern = re.compile(regex,re.MULTILINE)
-                   m = pattern.match(block)
-                   try:
-                       tag = Tag(m.group(1).strip())
-                       if m.group('responsible'):
-                           tag.responsible = m.group('responsible')
-                       if m.group('implies'):
-                           tag.implies = m.group('implies')
-                       if m.group('description'):
-                           tag.description = m.group('description')
-                       if m.group('flags'):
-                           tag.flags = m.group('flags')
-                       if m.group('help'):
-                           help = re.findall("\s (.*)",block)
-                           for line in help:
-                               tag.help += " "+line+"\n"
-                       #tag.show()
-                       self.facets[-1].tags.append(tag)
-                   except:
-                       print "Tag: Bad format block"
-        source.close()  
+                   self.facets[-1].tags.append(self.parse_block(
+                       self.tag_pat, Tag, ('responsible', 'implies',
+                                           'flags', 'description'),
block))
+        self.source.close()  
+    def parse_block(self, pattern, _class, attrs, block):
+       m = pattern.match(block)
+       try:
+           item = _class(m.group(1).strip())
+           for x in attrs:
+               if m.group(x):
+                   setattr(item, x, m.group(x))
+           if m.group('help'):
+               help = re.findall("\s (.*)",block)
+               for line in help:
+                   item.help += " "+line+"\n"
+           #item.show()
+           return item
+       except:
+           print "Bad format block"
 
     def generateAll(self):
         self.generateHtml(None)
@@ -131,57 +124,18 @@
             html = open('html/index.html', 'w')
         else:
 	    html = open('html/'+f.name+'.html', 'w')
-	html.write('<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01
Transitional//EN\">\n ')
-	html.write('<html>\n ')
-	html.write('<head>\n ')
-	html.write('  <meta http-equiv="content-type" content="text/html;
charset=UTF-8">\n ')
-	html.write('  <link href="main.css" rel="stylesheet"
type="text/css">\n ')
-	html.write('  <title>Debian Package Tags</title>\n ')
-	html.write('</head>\n ')
-	html.write('<body>\n ')
-	html.write('\n ')
-	html.write('<h1>Debtags - Vocabulary Browser</h1>\n ')
-	html.write('\n ')
-	html.write('<div class="linkbar">\n ')
-	html.write('\n ')
-	html.write('	<div class="left">\n ')
-	html.write('		<a href="http://debtags.alioth.debian.org/">Debtags
home</a> -\n ')
-	html.write('		<a href="http://debtags.alioth.debian.org/todo.html">Go
tagging!</a> -\n ')
-	html.write('		<a href="http://debtags.alioth.debian.org/cloud/">Tag
cloud</a> -\n ')
-	html.write('		<a href="http://debtags.alioth.debian.org/edit.html">Tag
editor</a>\n ')
-	html.write('	</div>\n ')
-	html.write('	<div class="right">\n ')
-	html.write('\n ')
-	html.write('		<a
href="http://lists.alioth.debian.org/mailman/listinfo/debtags-devel">Mailing list</a> -\n ')
-	html.write('		<a
href="http://alioth.debian.org/projects/debtags">Alioth project
page</a>\n ')
-	html.write('	</div>\n ')
-	html.write('</div>\n ')
-	html.write('<div id="titlesep"> </div>\n ')
-	html.write('\n ')
-        html.write('<div id="facetsTitle">\n ')
-        html.write('  <h2>Facets</h2>\n ')
-        html.write('</div>\n ')
-        html.write('<div id="tagsTitle">\n ')
-        html.write('  <h2>Tags</h2>\n ')
-        html.write('</div>\n ')
-        html.write('\n ')
-        html.write('<br />\n ')
-        html.write('<br />\n ')
-	html.write('\n ')
-        html.write('<div id="itens">\n ')
-	html.write('\n ')
-	html.write('<ul>\n ')
-        html.write('\n ')
+        html.write(header)
         for facet in self.facets:
             if (facet is f):
-                html.write('<li class="clicked"><a name="'+facet.name
+'" href="'+facet.name+'.html#'+facet.name+'">'+facet.description
+'</a>\n ')
-                html.write('  <p>\n ')
+                html.write('<li class="clicked"><a name="%s" href="%
s.html#%s">%s</a>\n<p>\n' %
+                           (facet.name, facet.name, facet.name,
facet.description))
                 if facet.help:
-                    html.write('      '+facet.help+'<br />\n ')
+                    html.write('      %s<br />\n ' % (facet.help,))
                 html.write('      </p></li> <ul>\n ')
                 for tag in facet.tags:
                     link = tag.name
-                    html.write('         <li><a
href="javascript:void(0)">'+tag.name+'<span>\n ')
+                    html.write('         <li><a
href="javascript:void(0)">%s<span>\n ' %
+                               (tag.name,))
                     if tag.description:
                         html.write('      '+tag.description+'<br />\n
')
                     if tag.responsible:
@@ -196,36 +150,83 @@
                 html.write('    </ul> \n ')
             else: 
                 html.write('<li><a name="'+facet.name+'"
href="'+facet.name+'.html#'+facet.name+'">'+facet.description
+'</a></li>\n ')
-	html.write('\n ')
-	html.write('</ul>\n ')
-	html.write('\n ')
-        html.write('</div>\n ')
-	html.write('\n ')
-	html.write('<div id="footersep"> </div>\n ')
-	html.write('\n ')
-	html.write('<div class="linkbar">\n ')
-	html.write('	<div class="left">\n ')
-	html.write('		<a
href="http://debtags.alioth.debian.org/ssearch.html">Smart package
search</a> -\n ')
-	html.write('		<a href="http://debtags.alioth.debian.org/todo.html">Go
tagging!</a> -\n ')
-	html.write('		<a href="http://debtags.alioth.debian.org/cloud/">Tag
cloud</a> -\n ')
-	html.write('		<a href="http://debtags.alioth.debian.org/edit.html">Tag
editor</a>\n ')
-	html.write('\n ')
-	html.write('	</div>\n ')
-	html.write('	<div class="right">\n ')
-	html.write('		<a
href="http://lists.alioth.debian.org/mailman/listinfo/debtags-devel">Mailing list</a> -\n ')
-	html.write('		<a
href="http://alioth.debian.org/projects/debtags">Alioth project
page</a>\n ')
-	html.write('	</div>\n ')
-	html.write('</div>\n ')
-	html.write('\n ')
-	html.write('</body>\n ')
-	html.write('\n ')
-	html.write('</html>')
-	html.close()
+        html.write(footer)
+        html.close()
 
 def main():
-        vocabulary = Vocabulary('vocabulary')
-        vocabulary.load()
-	vocabulary.generateAll()
-
+    vocabulary = Vocabulary('vocabulary')
+    vocabulary.load()
+    vocabulary.generateAll()
+    
+header= \
+        '<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN
\">\n ' + \
+        '<html>\n ' + \
+        '<head>\n ' + \
+        '  <meta http-equiv="content-type" content="text/html;
charset=UTF-8">\n ' + \
+        '  <link href="main.css" rel="stylesheet" type="text/css">\n '
+ \
+        '  <title>Debian Package Tags</title>\n ' + \
+        '</head>\n ' + \
+        '<body>\n ' + \
+        '\n ' + \
+        '<h1>Debtags - Vocabulary Browser</h1>\n ' + \
+        '\n ' + \
+        '<div class="linkbar">\n ' + \
+        '\n ' + \
+        '	<div class="left">\n ' + \
+        '		<a href="http://debtags.alioth.debian.org/">Debtags home</a>
-\n ' + \
+        '		<a href="http://debtags.alioth.debian.org/todo.html">Go
tagging!</a> -\n ' + \
+        '		<a href="http://debtags.alioth.debian.org/cloud/">Tag
cloud</a> -\n ' + \
+        '		<a href="http://debtags.alioth.debian.org/edit.html">Tag
editor</a>\n ' + \
+        '	</div>\n ' + \
+        '	<div class="right">\n ' + \
+        '\n ' + \
+        '		<a
href="http://lists.alioth.debian.org/mailman/listinfo/debtags-devel">Mailing list</a> -\n ' + \
+        '		<a href="http://alioth.debian.org/projects/debtags">Alioth
project page</a>\n ' + \
+        '	</div>\n ' + \
+        '</div>\n ' + \
+        '<div id="titlesep"> </div>\n ' + \
+        '\n ' + \
+        '<div id="facetsTitle">\n ' + \
+        '  <h2>Facets</h2>\n ' + \
+        '</div>\n ' + \
+        '<div id="tagsTitle">\n ' + \
+        '  <h2>Tags</h2>\n ' + \
+        '</div>\n ' + \
+        '\n ' + \
+        '<br />\n ' + \
+        '<br />\n ' + \
+        '\n ' + \
+        '<div id="itens">\n ' + \
+        '\n ' + \
+        '<ul>\n ' + \
+        '\n '
+footer = \
+       '\n ' + \
+       '</ul>\n ' + \
+       '\n ' + \
+       '</div>\n ' + \
+       '\n ' + \
+       '<div id="footersep"> </div>\n ' + \
+       '\n ' + \
+       '<div class="linkbar">\n ' + \
+       '	<div class="left">\n ' + \
+       '		<a href="http://debtags.alioth.debian.org/ssearch.html">Smart
package search</a> -\n ' + \
+       '		<a href="http://debtags.alioth.debian.org/todo.html">Go
tagging!</a> -\n ' + \
+       '		<a href="http://debtags.alioth.debian.org/cloud/">Tag
cloud</a> -\n ' + \
+       '		<a href="http://debtags.alioth.debian.org/edit.html">Tag
editor</a>\n ' + \
+       '\n ' + \
+       '	</div>\n ' + \
+       '	<div class="right">\n ' + \
+       '		<a
href="http://lists.alioth.debian.org/mailman/listinfo/debtags-devel">Mailing list</a> -\n ' + \
+       '		<a href="http://alioth.debian.org/projects/debtags">Alioth
project page</a>\n ' + \
+       '	</div>\n ' + \
+       '</div>\n ' + \
+       '\n ' + \
+       '</body>\n ' + \
+       '\n ' + \
+       '</html>'
+    
+    
 if __name__ == "__main__":
-	main()
+    main()
+        

-----------------------------------------
#Tests for vocabulary.py
#Written by Jesse Weinstein <jesse at wefu.org>
#Released into the public domain, e.g.
http://creativecommons.org/publicdomain/zero/1.0/

import unittest
import os
os.chdir('/home/jessew/FreeProjects/vocabulary-browser')
import vocabulary
class TestFacet(unittest.TestCase):
    def test_create_empty(self):
        f=vocabulary.Facet('it')
        self.assertEqual(f.name, 'it')
        self.assertEqual(f.status, '')
        self.assertEqual(f.nature, '')
        self.assertEqual(f.responsible, '')
        self.assertEqual(f.comment, '')
        self.assertEqual(f.description, '')
        self.assertEqual(f.help, '')
        self.assertEqual(f.tags, [])
    def test_create_full(self):
        f=vocabulary.Facet('it', status='x', nature='y',
                           responsible='foo', comment='v',
                           description='wtf', help='yep')
        self.assertEqual(f.name, 'it')
        self.assertEqual(f.status, 'x')
        self.assertEqual(f.nature, 'y')
        self.assertEqual(f.responsible, 'foo')
        self.assertEqual(f.comment, 'v')
        self.assertEqual(f.description, 'wtf')
        self.assertEqual(f.help, 'yep')
        self.assertEqual(f.tags, [])
    def test_str(self):
        f=vocabulary.Facet('it', status='x', nature='y',
                           responsible='foo', comment='v',
                           description='wtf', help='yep')
        self.assertEqual(str(f), 'Facet: it\nStatus: x\nNature: y
\nResponsible: foo\nComment: v\nDescription: wtf\nyep')
    def test_repr(self):
        f=vocabulary.Facet('it', status='x', nature='y',
                           responsible='foo', comment='v',
                           description='wtf', help='yep')
        self.assertEqual(`f`, "vocabulary.Facet(status='x', comment='v',
description='wtf', nature='y', responsible='foo', help='yep', tags=[],
name='it')")

    def test_load(self):
        v=vocabulary.Vocabulary('vocabulary_test');v.load()
        self.assertEqual(len(v.facets), 1)
        self.assertEqual([x.name for x in v.facets[0].tags],
['accessibility::input', 'accessibility::ocr',
'accessibility::screen-magnify', 'accessibility::screen-reader',
'accessibility::speech', 'accessibility::speech-recognition',
'accessibility::TODO'])
        for n in range(len(v.facets[0].tags)):
            self.assertEqual(v.facets[0].tags[n], self.good_tags[n])
        self.assertEqual(v.facets[0].tags, self.good_tags)
        self.assertEqual(v.facets, self.good_facets)
    def setUp(self):
        self.good_tags=[vocabulary.Tag(implies='', help=' Applies to
input methods for non-latin languages as well as special input\n
systems.\n', description='Input Systems', responsible='', flags='',
name='accessibility::input'), vocabulary.Tag(implies='', help=' Optical
Character Recognition\n', description='Text Recognition (OCR)',
responsible='', flags='', name='accessibility::ocr'),
vocabulary.Tag(implies='', help='', description='Screen Magnification',
responsible='', flags='', name='accessibility::screen-magnify'),
vocabulary.Tag(implies='', help='', description='Screen Reading',
responsible='', flags='', name='accessibility::screen-reader'),
vocabulary.Tag(implies='', help='', description='Speech Synthesis',
responsible='', flags='', name='accessibility::speech'),
vocabulary.Tag(implies='', help='', description='Speech Recognition',
responsible='', flags='', name='accessibility::speech-recognition'),
vocabulary.Tag(implies='', help=' The package can be categorised along
this facet, but the right tag for it is\n missing.\n .\n Mark a package
with this tag to signal the vocabulary maintainers of cases\n where the
current tag set is lacking.\n', description='Need an extra tag',
responsible='', flags='', name='accessibility::TODO')]
        self.good_facets=[vocabulary.Facet(status='needing-review',
comment='', description='Accessibility Support', nature='energy',
responsible='', help='', tags=self.good_tags, name='accessibility')]
if __name__ == '__main__':
    unittest.main()

---------------------------------------------
Facet: accessibility
Status: needing-review
Nature: energy
Description: Accessibility Support

Tag: accessibility::input
Description: Input Systems
 Applies to input methods for non-latin languages as well as special
input
 systems.

Tag: accessibility::ocr
Description: Text Recognition (OCR)
 Optical Character Recognition

Tag: accessibility::screen-magnify
Description: Screen Magnification

Tag: accessibility::screen-reader
Description: Screen Reading

Tag: accessibility::speech
Description: Speech Synthesis

Tag: accessibility::speech-recognition
Description: Speech Recognition

Tag: accessibility::TODO
Description: Need an extra tag
 The package can be categorised along this facet, but the right tag for
it is
 missing.
 .
 Mark a package with this tag to signal the vocabulary maintainers of
cases
 where the current tag set is lacking.





More information about the Debtags-devel mailing list