r6907 - people/andres

Thu May 8 08:23:55 UTC 2008

Author: ceros-guest
Date: 2008-05-08 08:23:55 +0000 (Thu, 08 May 2008)
New Revision: 6907

Modified:
   people/andres/TODO
   people/andres/wnpp_game_search
Log:
Perfecting wnpp game search script

Modified: people/andres/TODO
===================================================================

--- people/andres/TODO	2008-05-08 06:33:54 UTC (rev 6906)
+++ people/andres/TODO	2008-05-08 08:23:55 UTC (rev 6907)
@@ -1,11 +1,6 @@
 TODOs
-* Make script include/exclude some packages
-* Include all of wnpp bugs related to games (orphaned, need help, etc).
-* Seperate sections in output moinmoin format file
 * Include upstream license, upstream url, download url, descriptions if they're
   available
-* Find a way to include/leave comments and other general information in output
-  moinmoin file.
-  + Will probably need to make use of comments within moinmoin file and have
-    script look at previous generated moinmoin file.
-  + We will need to use http://wiki.debian.org/Games/WNPP?action=raw for this.
+* Perhaps we should get the raw bug logs using rsync and keep it on the hard
+  drive instead of traversing each bug's webpage every time the script is run.
+  + 'rsync -a bugs-mirror.debian.org::bts-spool-db/<last_2_of_bug#>/<bug#.{log,report,status,summary}> <save_path>'

Modified: people/andres/wnpp_game_search
===================================================================
--- people/andres/wnpp_game_search	2008-05-08 06:33:54 UTC (rev 6906)
+++ people/andres/wnpp_game_search	2008-05-08 08:23:55 UTC (rev 6907)
@@ -19,48 +19,142 @@
 import urllib2 #For reading webpages
 import time #For generating the time
 
+# List of packages to always include, if found
+INCLUSIONS = set([])
+
+# List of packages to always exclude, if found
+EXCLUSIONS = set(['openjdk-6', 'pdf2svg', 'zope-educommons'])
+
 # Open the propective webpage for reading and a file for writing
-#fin = open("wnpp_prospective.html", "r") # Use this for testing
-fin = urllib2.urlopen("http://www.debian.org/devel/wnpp/prospective")
+# Use these for testing
+#prospective_in = open("wnpp_prospective.html", "r")
+#work_needing_in = open("wnpp_work_needing.html", "r")
+#help_requested_in = open("wnpp_help_requested.html","r")
+
+# The files we'll actually use
+prospective_in = urllib2.urlopen("http://www.debian.org/devel/wnpp/prospective")
+work_needing_in = urllib2.urlopen("http://www.debian.org/devel/wnpp/work_needing", "r")
+help_requested_in = urllib2.urlopen("http://www.debian.org/devel/wnpp/help_requested","r")
+moinsavein = urllib2.urlopen("http://wiki.debian.org/Games/WNPP?action=raw", "r")
 fout = open("wnpp_propestive_moinmoin.txt", "w")
 
 # Print some heading lines for the moinmoin file
-print "Searching wnpp propespective page."
+print "Searching wnpp pages."
 startTime = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
 print >>fout, "Wiki generated on " + startTime
-print >>fout, "Please do not directly edit this page.\n"
-print >>fout, "== Prospective Packages =="
+print >>fout, "To leave a comment, place them between #####START_COMMENT#####"
+print >>fout, "and #####END_COMMENT##### of each package.\n"
+print >>fout, "[[TableOfContents]]\n"
 
-p_bug = re.compile('http://bugs.debian.org/[0-9]+') #Pattern for bug url
-p_number = re.compile('[0-9]+') #Pattern for the bug number
-p_terms = re.compile('game|gaming|3d', re.IGNORECASE) #Our terms to search
+# Our regex tests
+#Our main terms to search
+p_terms = re.compile('game|gaming|3d', re.IGNORECASE)
+#Pattern for bug url
+p_bug = re.compile('http://bugs.debian.org/[0-9]+')
+#Pattern for the bug number
+p_number = re.compile('[0-9]+')
+#Pattern for prospective web page
+p_prospective = re.compile('<h1>Prospective packages</h1>')
+#Pattern for ITP section
+p_itp_line = re.compile('<h3>Packages being worked on</h3>')
+#Pattern for RFP section
+p_rfp_line = re.compile('<h3>Requested packages</h3>')
+#Pattern for work needed web page
+p_work_needing = re.compile('<h1>Packages in need of a new maintainer</h1>')
+#Pattern for O section
+p_rfa_line = re.compile('<h3>Packages up for adoption</h3>')
+#Pattern for RFA by maintainer section
+p_rfam_line = re.compile('<h3>Packages up for adoption, by maintainer</h3>')
+#Pattern for O section
+p_o_line = re.compile('<h3>Orphaned packages</h3>')
+#Pattern for help requested web page
+p_help_requested = re.compile('<h1>Packages for which help was requested</h1>')
+#Pattern to check for the start of a comment
+p_start_comment = re.compile('#####START_COMMENT#####')
+#Patter to check for the end of a comment
+p_end_comment = re.compile('#####END_COMMENT#####')
 
-# Start the main loop
-for line in fin:
-  bugMatch = p_bug.search(line)
-  if bugMatch:
-    # Do some general processing of each line found
-    string = line
-    string = string.replace('<ul>','')
-    string = string.rstrip()
-    string = string.split('>')
-    string = string[2].replace('</a','')
-    string = string.split(':')
-    # Assign a meaningful variable with the correct string
-    bugPackage = string[0]
-    bugHeadline = string[1].lstrip()
-    bugNumber = p_number.search(bugMatch.group())
-    # Create a tuple that contains the four main strings we need
-    bugTuple = bugPackage, bugHeadline, bugMatch.group(), bugNumber.group()
-    print 'Scanning bug page for ' + bugTuple[0]
-    # Not sure if I want to use the mboxes or the web pages themselves
-    #bugmbox = urllib2.urlopen('http://bugs.debian.org/cgi-bin/bugreport.cgi?msg=5;mbox=yes;bug=' + bugTuple[4])
-    # Now scan each page for the terms we want
-    bugPage = urllib2.urlopen(bugTuple[2])
-    for bugLines in bugPage:
-      termMatch = p_terms.search(bugLines)
-      if termMatch:
-        print '**Including ' + bugTuple[0] + ' in list.**'
-        print >>fout, ' . \'\'\'' + bugTuple[0] + '\'\'\': ' + bugTuple[1]
-        print >>fout, ' . ' + bugTuple[2]
-        break
+# Simple method that checks if we're in a new section and prints the appropriate
+# lines in the output moinmoin file
+def newSection(line):
+  if p_prospective.search(line):
+    print >>fout, "= Prospective Packages ="
+  if p_itp_line.search(line):
+    print >>fout, "== Packages being worked on / ITP =="
+  if p_rfp_line.search(line):
+    print >>fout, "\n" + "== Requested packages / RFP =="
+  if p_work_needing.search(line):
+    print >>fout, "\n" + "= Packages in need of a new maintainer ="
+  if p_rfa_line.search(line):
+    print >>fout, "\n" + "== Packages up for adoption / RFA =="
+  if p_rfam_line.search(line):
+    print >>fout, "\n" + "== Packages up for adoption, by maintainer / RFA =="
+  if p_o_line.search(line):
+    print >>fout, "\n" + "== Orphaned packages =="
+  if p_help_requested.search(line):
+    print >>fout, "\n" + "= Packages for which help was requested ="
+
+# Include comments from the wiki page for a particular package
+def include_comment(package):
+  found_package = False
+  start_comment = False
+  string = '\'\'\'' + package + '\'\'\':' # The package string to find
+  for moinline in moinsavein:
+    lineset = set(moinline.split())
+    if (string in lineset):
+      found_package = True # We found the package
+      continue
+    if p_start_comment.search(moinline):
+      start_comment = True # We found the start of the comment
+      continue
+    if p_end_comment.search(moinline):
+      start_comment = False # We reached the end of the comment
+      if found_package:
+        break # We reached the end of the package section, so break
+    if found_package and start_comment:
+      print >>fout, moinline # Both were found so print each line of comment
+
+# Method that scans a bug page
+def scan_bug(bugTuple):
+  print 'Scanning bug page for ' + bugTuple[0]
+  bugPage = urllib2.urlopen(bugTuple[2])
+  for bugLines in bugPage:
+    termMatch = p_terms.search(bugLines)
+    # Always include packages in inclusion set
+    if (bugTuple[0] in INCLUSIONS) or termMatch:
+      print '**Including ' + bugTuple[0] + ' in list.**'
+      print >>fout, ' . \'\'\'' + bugTuple[0] + '\'\'\': ' + bugTuple[1]
+      print >>fout, ' . ' + bugTuple[2]
+      print >>fout, '#####START_COMMENT#####'
+      include_comment(bugTuple[0]) # Include any comments from wiki page
+      print >>fout, '#####END_COMMENT#####'
+      break
+
+# Method that scans a wnpp page
+def scan_wnpp(page):
+  for line in page:
+    # Ensure we're not in a new section
+    newSection(line)
+    bugMatch = p_bug.search(line)
+    if bugMatch:
+      # Do some general processing of each line found
+      string = line
+      string = string.replace('<ul>','')
+      string = string.rstrip()
+      string = string.split('>')
+      string = string[2].replace('</a','')
+      string = string.split(':')
+      # Assign a meaningful variable with the correct string
+      bugPackage = string[0]
+      bugHeadline = string[1].lstrip()
+      bugNumber = p_number.search(bugMatch.group())
+      # Create a tuple that contains the four main strings we need
+      bugTuple = bugPackage, bugHeadline, bugMatch.group(), bugNumber.group()
+      # Scan bugs of packages not in exclusion set
+      if not (bugPackage in EXCLUSIONS):
+        scan_bug(bugTuple)
+
+# Do our main stuff
+scan_wnpp(prospective_in)
+scan_wnpp(work_needing_in)
+scan_wnpp(help_requested_in)