r6907 - people/andres
Andres Mejia
ceros-guest at alioth.debian.org
Thu May 8 08:23:55 UTC 2008
Author: ceros-guest
Date: 2008-05-08 08:23:55 +0000 (Thu, 08 May 2008)
New Revision: 6907
Modified:
people/andres/TODO
people/andres/wnpp_game_search
Log:
Perfecting wnpp game search script
Modified: people/andres/TODO
===================================================================
--- people/andres/TODO 2008-05-08 06:33:54 UTC (rev 6906)
+++ people/andres/TODO 2008-05-08 08:23:55 UTC (rev 6907)
@@ -1,11 +1,6 @@
TODOs
-* Make script include/exclude some packages
-* Include all of wnpp bugs related to games (orphaned, need help, etc).
-* Seperate sections in output moinmoin format file
* Include upstream license, upstream url, download url, descriptions if they're
available
-* Find a way to include/leave comments and other general information in output
- moinmoin file.
- + Will probably need to make use of comments within moinmoin file and have
- script look at previous generated moinmoin file.
- + We will need to use http://wiki.debian.org/Games/WNPP?action=raw for this.
+* Perhaps we should get the raw bug logs using rsync and keep it on the hard
+ drive instead of traversing each bug's webpage every time the script is run.
+ + 'rsync -a bugs-mirror.debian.org::bts-spool-db/<last_2_of_bug#>/<bug#.{log,report,status,summary}> <save_path>'
Modified: people/andres/wnpp_game_search
===================================================================
--- people/andres/wnpp_game_search 2008-05-08 06:33:54 UTC (rev 6906)
+++ people/andres/wnpp_game_search 2008-05-08 08:23:55 UTC (rev 6907)
@@ -19,48 +19,142 @@
import urllib2 #For reading webpages
import time #For generating the time
+# List of packages to always include, if found
+INCLUSIONS = set([])
+
+# List of packages to always exclude, if found
+EXCLUSIONS = set(['openjdk-6', 'pdf2svg', 'zope-educommons'])
+
# Open the propective webpage for reading and a file for writing
-#fin = open("wnpp_prospective.html", "r") # Use this for testing
-fin = urllib2.urlopen("http://www.debian.org/devel/wnpp/prospective")
+# Use these for testing
+#prospective_in = open("wnpp_prospective.html", "r")
+#work_needing_in = open("wnpp_work_needing.html", "r")
+#help_requested_in = open("wnpp_help_requested.html","r")
+
+# The files we'll actually use
+prospective_in = urllib2.urlopen("http://www.debian.org/devel/wnpp/prospective")
+work_needing_in = urllib2.urlopen("http://www.debian.org/devel/wnpp/work_needing", "r")
+help_requested_in = urllib2.urlopen("http://www.debian.org/devel/wnpp/help_requested","r")
+moinsavein = urllib2.urlopen("http://wiki.debian.org/Games/WNPP?action=raw", "r")
fout = open("wnpp_propestive_moinmoin.txt", "w")
# Print some heading lines for the moinmoin file
-print "Searching wnpp propespective page."
+print "Searching wnpp pages."
startTime = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
print >>fout, "Wiki generated on " + startTime
-print >>fout, "Please do not directly edit this page.\n"
-print >>fout, "== Prospective Packages =="
+print >>fout, "To leave a comment, place them between #####START_COMMENT#####"
+print >>fout, "and #####END_COMMENT##### of each package.\n"
+print >>fout, "[[TableOfContents]]\n"
-p_bug = re.compile('http://bugs.debian.org/[0-9]+') #Pattern for bug url
-p_number = re.compile('[0-9]+') #Pattern for the bug number
-p_terms = re.compile('game|gaming|3d', re.IGNORECASE) #Our terms to search
+# Our regex tests
+#Our main terms to search
+p_terms = re.compile('game|gaming|3d', re.IGNORECASE)
+#Pattern for bug url
+p_bug = re.compile('http://bugs.debian.org/[0-9]+')
+#Pattern for the bug number
+p_number = re.compile('[0-9]+')
+#Pattern for prospective web page
+p_prospective = re.compile('<h1>Prospective packages</h1>')
+#Pattern for ITP section
+p_itp_line = re.compile('<h3>Packages being worked on</h3>')
+#Pattern for RFP section
+p_rfp_line = re.compile('<h3>Requested packages</h3>')
+#Pattern for work needed web page
+p_work_needing = re.compile('<h1>Packages in need of a new maintainer</h1>')
+#Pattern for O section
+p_rfa_line = re.compile('<h3>Packages up for adoption</h3>')
+#Pattern for RFA by maintainer section
+p_rfam_line = re.compile('<h3>Packages up for adoption, by maintainer</h3>')
+#Pattern for O section
+p_o_line = re.compile('<h3>Orphaned packages</h3>')
+#Pattern for help requested web page
+p_help_requested = re.compile('<h1>Packages for which help was requested</h1>')
+#Pattern to check for the start of a comment
+p_start_comment = re.compile('#####START_COMMENT#####')
+#Patter to check for the end of a comment
+p_end_comment = re.compile('#####END_COMMENT#####')
-# Start the main loop
-for line in fin:
- bugMatch = p_bug.search(line)
- if bugMatch:
- # Do some general processing of each line found
- string = line
- string = string.replace('<ul>','')
- string = string.rstrip()
- string = string.split('>')
- string = string[2].replace('</a','')
- string = string.split(':')
- # Assign a meaningful variable with the correct string
- bugPackage = string[0]
- bugHeadline = string[1].lstrip()
- bugNumber = p_number.search(bugMatch.group())
- # Create a tuple that contains the four main strings we need
- bugTuple = bugPackage, bugHeadline, bugMatch.group(), bugNumber.group()
- print 'Scanning bug page for ' + bugTuple[0]
- # Not sure if I want to use the mboxes or the web pages themselves
- #bugmbox = urllib2.urlopen('http://bugs.debian.org/cgi-bin/bugreport.cgi?msg=5;mbox=yes;bug=' + bugTuple[4])
- # Now scan each page for the terms we want
- bugPage = urllib2.urlopen(bugTuple[2])
- for bugLines in bugPage:
- termMatch = p_terms.search(bugLines)
- if termMatch:
- print '**Including ' + bugTuple[0] + ' in list.**'
- print >>fout, ' . \'\'\'' + bugTuple[0] + '\'\'\': ' + bugTuple[1]
- print >>fout, ' . ' + bugTuple[2]
- break
+# Simple method that checks if we're in a new section and prints the appropriate
+# lines in the output moinmoin file
+def newSection(line):
+ if p_prospective.search(line):
+ print >>fout, "= Prospective Packages ="
+ if p_itp_line.search(line):
+ print >>fout, "== Packages being worked on / ITP =="
+ if p_rfp_line.search(line):
+ print >>fout, "\n" + "== Requested packages / RFP =="
+ if p_work_needing.search(line):
+ print >>fout, "\n" + "= Packages in need of a new maintainer ="
+ if p_rfa_line.search(line):
+ print >>fout, "\n" + "== Packages up for adoption / RFA =="
+ if p_rfam_line.search(line):
+ print >>fout, "\n" + "== Packages up for adoption, by maintainer / RFA =="
+ if p_o_line.search(line):
+ print >>fout, "\n" + "== Orphaned packages =="
+ if p_help_requested.search(line):
+ print >>fout, "\n" + "= Packages for which help was requested ="
+
+# Include comments from the wiki page for a particular package
+def include_comment(package):
+ found_package = False
+ start_comment = False
+ string = '\'\'\'' + package + '\'\'\':' # The package string to find
+ for moinline in moinsavein:
+ lineset = set(moinline.split())
+ if (string in lineset):
+ found_package = True # We found the package
+ continue
+ if p_start_comment.search(moinline):
+ start_comment = True # We found the start of the comment
+ continue
+ if p_end_comment.search(moinline):
+ start_comment = False # We reached the end of the comment
+ if found_package:
+ break # We reached the end of the package section, so break
+ if found_package and start_comment:
+ print >>fout, moinline # Both were found so print each line of comment
+
+# Method that scans a bug page
+def scan_bug(bugTuple):
+ print 'Scanning bug page for ' + bugTuple[0]
+ bugPage = urllib2.urlopen(bugTuple[2])
+ for bugLines in bugPage:
+ termMatch = p_terms.search(bugLines)
+ # Always include packages in inclusion set
+ if (bugTuple[0] in INCLUSIONS) or termMatch:
+ print '**Including ' + bugTuple[0] + ' in list.**'
+ print >>fout, ' . \'\'\'' + bugTuple[0] + '\'\'\': ' + bugTuple[1]
+ print >>fout, ' . ' + bugTuple[2]
+ print >>fout, '#####START_COMMENT#####'
+ include_comment(bugTuple[0]) # Include any comments from wiki page
+ print >>fout, '#####END_COMMENT#####'
+ break
+
+# Method that scans a wnpp page
+def scan_wnpp(page):
+ for line in page:
+ # Ensure we're not in a new section
+ newSection(line)
+ bugMatch = p_bug.search(line)
+ if bugMatch:
+ # Do some general processing of each line found
+ string = line
+ string = string.replace('<ul>','')
+ string = string.rstrip()
+ string = string.split('>')
+ string = string[2].replace('</a','')
+ string = string.split(':')
+ # Assign a meaningful variable with the correct string
+ bugPackage = string[0]
+ bugHeadline = string[1].lstrip()
+ bugNumber = p_number.search(bugMatch.group())
+ # Create a tuple that contains the four main strings we need
+ bugTuple = bugPackage, bugHeadline, bugMatch.group(), bugNumber.group()
+ # Scan bugs of packages not in exclusion set
+ if not (bugPackage in EXCLUSIONS):
+ scan_bug(bugTuple)
+
+# Do our main stuff
+scan_wnpp(prospective_in)
+scan_wnpp(work_needing_in)
+scan_wnpp(help_requested_in)
More information about the Pkg-games-commits
mailing list