[Collab-qa-commits] r2184 - in udd: sql udd

Andreas Tille tille at alioth.debian.org
Wed Mar 28 10:46:11 UTC 2012


Author: tille
Date: 2012-03-28 10:46:10 +0000 (Wed, 28 Mar 2012)
New Revision: 2184

Modified:
   udd/sql/bibref.sql
   udd/sql/blends-query-packages.sql
   udd/udd/bibref_gatherer.py
Log:
The bibref gatherer is working on source packages rather than binary packages


Modified: udd/sql/bibref.sql
===================================================================
--- udd/sql/bibref.sql	2012-03-28 01:37:52 UTC (rev 2183)
+++ udd/sql/bibref.sql	2012-03-28 10:46:10 UTC (rev 2184)
@@ -3,11 +3,11 @@
 DROP TABLE IF EXISTS bibref CASCADE;
 
 CREATE TABLE bibref (
-	package	text NOT NULL,
+	source	text NOT NULL,
 	key	text NOT NULL,
 	value	text NOT NULL,
 	rank    int  NOT NULL,
-	PRIMARY KEY (package,key,rank) -- this helps preventing more than one times the same key for a single package
+	PRIMARY KEY (source,key,rank) -- this helps preventing more than one times the same key for a single package
 );
 
 GRANT SELECT ON bibref TO PUBLIC;

Modified: udd/sql/blends-query-packages.sql
===================================================================
--- udd/sql/blends-query-packages.sql	2012-03-28 01:37:52 UTC (rev 2183)
+++ udd/sql/blends-query-packages.sql	2012-03-28 10:46:10 UTC (rev 2184)
@@ -23,11 +23,16 @@
 	 pop.vote, pop.recent,
          tags.debtags,
          screenshot_versions, large_image_urls, small_image_urls,
-         bibyear.value   AS "Published-Year",
-         bibtitle.value  AS "Published-Title",
-         bibauthor.value AS "Published-Authors",
-         bibdoi.value    AS "Published-DOI",
-         bibpmid.value   AS "Published-PubMed",
+         bibyear.value    AS "year",
+         bibtitle.value   AS "title",
+         bibauthor.value  AS "authors",
+         bibdoi.value     AS "doi",
+         bibpmid.value    AS "pubmed",
+         biburl.value     AS "url",
+         bibjournal.value AS "journal",
+         bibvolume.value  AS "volume",
+         bibnumber.value  AS "number",
+         bibpages.value   AS "pages",
          en.description AS description_en, en.long_description AS long_description_en,
          cs.description AS description_cs, cs.long_description AS long_description_cs,
          da.description AS description_da, da.long_description AS long_description_da,
@@ -185,11 +190,16 @@
          WHERE enhances LIKE ANY( $2 )
       ) AS tmpenh GROUP BY package
     ) enh ON enh.package = p.package
-    LEFT OUTER JOIN bibref bibyear   ON p.package = bibyear.package   AND bibyear.key   = 'Reference-Year'
-    LEFT OUTER JOIN bibref bibtitle  ON p.package = bibtitle.package  AND bibtitle.key  = 'Reference-Title'
-    LEFT OUTER JOIN bibref bibauthor ON p.package = bibauthor.package AND bibauthor.key = 'Reference-Author'
-    LEFT OUTER JOIN bibref bibdoi    ON p.package = bibdoi.package    AND bibdoi.key    = 'DOI'
-    LEFT OUTER JOIN bibref bibpmid   ON p.package = bibpmid.package   AND bibpmid.key   = 'PMID'
+    LEFT OUTER JOIN bibref bibyear    ON p.source = bibyear.source    AND bibyear.rank = 0   AND bibyear.key    = 'year'
+    LEFT OUTER JOIN bibref bibtitle   ON p.source = bibtitle.source   AND bibtitle.rank = 0   AND bibtitle.key   = 'title'
+    LEFT OUTER JOIN bibref bibauthor  ON p.source = bibauthor.source  AND bibauthor.rank = 0  AND bibauthor.key  = 'author'
+    LEFT OUTER JOIN bibref bibdoi     ON p.source = bibdoi.source     AND bibdoi.rank = 0     AND bibdoi.key     = 'doi'
+    LEFT OUTER JOIN bibref bibpmid    ON p.source = bibpmid.source    AND bibpmid.rank = 0    AND bibpmid.key    = 'pmid'
+    LEFT OUTER JOIN bibref biburl     ON p.source = biburl.source     AND biburl.rank = 0     AND biburl.key     = 'url'
+    LEFT OUTER JOIN bibref bibjournal ON p.source = bibjournal.source AND bibjournal.rank = 0 AND bibjournal.key = 'journal'
+    LEFT OUTER JOIN bibref bibvolume  ON p.source = bibvolume.source  AND bibvolume.rank = 0  AND bibvolume.key  = 'volume'
+    LEFT OUTER JOIN bibref bibnumber  ON p.source = bibnumber.source  AND bibnumber.rank = 0  AND bibnumber.key  = 'number'
+    LEFT OUTER JOIN bibref bibpages   ON p.source = bibpages.source   AND bibpages.rank = 0   AND bibpages.key   = 'pages'
     ORDER BY p.package
  $$ LANGUAGE 'SQL';
 

Modified: udd/udd/bibref_gatherer.py
===================================================================
--- udd/udd/bibref_gatherer.py	2012-03-28 01:37:52 UTC (rev 2183)
+++ udd/udd/bibref_gatherer.py	2012-03-28 10:46:10 UTC (rev 2184)
@@ -41,7 +41,7 @@
     self.bibrefs = []
     self.bibrefsinglelist = []
 
-  def setref(self, references, package, rank):
+  def setref(self, references, source, rank):
     year=''
     defined_fields = { 'article'   : 0,
                        'author'    : 0,
@@ -66,16 +66,16 @@
       key = r.lower()
       if defined_fields.has_key(key):
         if defined_fields[key] > 0:
-          self.log.error("Duplicated key in package '%s': %s", package, key)
+          self.log.error("Duplicated key in source package '%s': %s", source, key)
           continue
         else:
           defined_fields[key] = 1
       else:
-          self.log.warning("Unexpected key in package '%s': %s", package, key)
+          self.log.warning("Unexpected key in source package '%s': %s", source, key)
           defined_fields[key] = 1
       ref={}
       ref['rank']    = rank
-      ref['package'] = package
+      ref['source']  = source
       ref['key']     = key
       if isinstance(references[r], int):
         ref['value']   = str(references[r])
@@ -85,16 +85,16 @@
       if r.lower() == 'year':
         year = ref['value']
     # Create unique BibTeX key
-    bibtexkey = package
+    bibtexkey = source
     if bibtexkey in self.bibrefsinglelist and year != '':
-      bibtexkey = package+year
+      bibtexkey = source+year
     if bibtexkey in self.bibrefsinglelist:
-      # if there are more than one reference per package and even in
+      # if there are more than one reference per source package and even in
       # the same year append the rank as letter
       bibtexkey += 'abcdefghijklmnopqrstuvwxyz'[rank]
     ref={}
     ref['rank']    = rank
-    ref['package'] = package
+    ref['source']  = source
     ref['key']     = 'bibtex'
     ref['value']   = bibtexkey
     self.bibrefsinglelist.append(bibtexkey)
@@ -111,14 +111,13 @@
 
     for u in u_dirs:
       upath=my_config['path']+'/'+u
-      packages = []
+      sources = []
       for file in listdir(upath):
         if fnmatch(file, '*.upstream'):
-          packages.append(re.sub("\.upstream", "", file))
-      # packages = listdir(upath)
-      for package in packages:
-        print package
-        ufile = upath+'/'+package+'.upstream'
+          sources.append(re.sub("\.upstream", "", file))
+      for source in sources:
+        print source
+        ufile = upath+'/'+source+'.upstream'
         uf = open(ufile)
         try:
           fields = yaml.load(uf.read())
@@ -128,24 +127,24 @@
         try:
           references=fields['Reference']
         except KeyError:
-          self.log.warning("No references found for package %s (Keys: %s)" % (package,str(fields.keys())))
+          self.log.warning("No references found for source package %s (Keys: %s)" % (source, str(fields.keys())))
           continue
         except TypeError:
-          self.log.warning("debian/upstream file of package %s does not seem to be a YAML file" % (package))
+          self.log.warning("debian/upstream file of source package %s does not seem to be a YAML file" % (source))
           continue
 
         if isinstance(references, list):
           # upstream file contains more than one reference
           rank=0
           for singleref in references:
-            self.setref(singleref, package, rank)
+            self.setref(singleref, source, rank)
             rank += 1
         elif isinstance(references, str):
           # upstream file has wrongly formatted reference
           self.log.error("File %s has following references: %s" % (ufile, references))
         else:
           # upstream file has exactly one reference
-          self.setref(references, package, 0)
+          self.setref(references, source, 0)
 
         for key in fields.keys():
           keyl=key.lower()
@@ -153,26 +152,26 @@
     	    # sometimes DOI and PMID are stored separately:
     	    if keyl.endswith('doi'):
     	      if references.has_key('doi') or references.has_key('DOI'):
-                self.log.warning("Extra key in package '%s': %s - please remove from upstream file!", package, key)
+                self.log.warning("Extra key in source package '%s': %s - please remove from upstream file!", source, key)
     	        continue
               rdoi={}
               rdoi['rank']    = 0
-              rdoi['package'] = package
+              rdoi['source']  = source
               rdoi['key']     = 'doi'
               rdoi['value']   = fields[key]
               self.bibrefs.append(rdoi)
     	    elif keyl.endswith('pmid'):
     	      if references.has_key('pmid') or references.has_key('PMID'):
-                self.log.warning("Extra key in package '%s': %s - please remove from upstream file!", package, key)
+                self.log.warning("Extra key in source package '%s': %s - please remove from upstream file!", source, key)
     	        continue
               rpmid={}
               rpmid['rank']    = 0
-              rpmid['package'] = package
+              rpmid['source']  = source
               rpmid['key']     = 'pmid'
               rpmid['value']   = fields[key]
               self.bibrefs.append(rpmid)
     	    else:
-    	      print "Package %s has %s : %s" % (package, key, fields[key])
+    	      print "Source package %s has %s : %s" % (source, key, fields[key])
     # only truncate table if there are really some references found
     if len(self.bibrefs) == 0:
       self.log.error("No references found in any upstream file.")
@@ -182,11 +181,11 @@
     cur.execute("TRUNCATE %s" % (my_config['table']))
     query = """PREPARE bibref_insert (text, text, text, int) AS
                    INSERT INTO %s
-                   (package, key, value, rank)
+                   (source, key, value, rank)
                     VALUES ($1, $2, $3, $4)""" % (my_config['table'])
     cur.execute(query)
 
-    query = "EXECUTE bibref_insert (%(package)s, %(key)s, %(value)s, %(rank)s)"
+    query = "EXECUTE bibref_insert (%(source)s, %(key)s, %(value)s, %(rank)s)"
     for ref in self.bibrefs:
       try:
         cur.execute(query, ref)




More information about the Collab-qa-commits mailing list