[Collab-qa-commits] r1723 - in udd: sql udd

Andreas Tille tille at alioth.debian.org
Sun Mar 14 19:46:59 UTC 2010


Author: tille
Date: 2010-03-14 19:46:59 +0000 (Sun, 14 Mar 2010)
New Revision: 1723

Modified:
   udd/sql/i18n-apps.sql
   udd/udd/ftpnew_gatherer.py
   udd/udd/i18n_apps_gatherer.py
Log:
Fix parsing problem in ftpnew, make i18n-apps usable


Modified: udd/sql/i18n-apps.sql
===================================================================
--- udd/sql/i18n-apps.sql	2010-03-13 22:00:28 UTC (rev 1722)
+++ udd/sql/i18n-apps.sql	2010-03-14 19:46:59 UTC (rev 1723)
@@ -8,10 +8,12 @@
     po_file         text,
       -- *.pot ignorieren!
     language        text,
-    ID              text,  -- no idea what this field means
     pkg_version_lang text, -- no idea what sense this field makes
     last_translator text,
     language_team   text,
+    translated      int,
+    fuzzy           int,
+    untranslated    int,
     PRIMARY KEY (package, version, release, language)
 );
 
@@ -29,6 +31,9 @@
     pkg_version_lang text, -- no idea what sense this field makes
     last_translator text,
     language_team   text,
+    translated      int,
+    fuzzy           int,
+    untranslated    int,
     PRIMARY KEY (package, version, release, language)
 );
 

Modified: udd/udd/ftpnew_gatherer.py
===================================================================
--- udd/udd/ftpnew_gatherer.py	2010-03-13 22:00:28 UTC (rev 1722)
+++ udd/udd/ftpnew_gatherer.py	2010-03-14 19:46:59 UTC (rev 1723)
@@ -15,7 +15,7 @@
 import email.Utils
 import re
 from time import ctime
-from psycopg2 import IntegrityError
+from psycopg2 import IntegrityError, ProgrammingError
 
 def get_gatherer(connection, config, source):
   return ftpnew_gatherer(connection, config, source)
@@ -75,7 +75,9 @@
   def __str__(self):
     str  = "Source %(Source)s: %(Version)s, (%(Architecture)s), %(Last_modified)s, %(Queue)s, %(Distribution)s" % \
         (self.s)
-    str += "   %(maintainer_name)s <%(maintainer_email)s>, %(Closes)i" % (self.s)
+    if self.s.has_key('maintainer_name') and self.s.has_key('maintainer_email') and \
+          self.s.has_key('Closes'):
+       str += "   %(maintainer_name)s <%(maintainer_email)s>, %(Closes)i" % (self.s)
     return str
 
 class bin_pkg():
@@ -211,6 +213,8 @@
         srcpkg.s['Last_modified'] = ctime(int(stanza['last-modified'])) # We want a real time object instead of an epoch
         srcpkg.s['Distribution']  = stanza['distribution']
         srcpkg.s['Changed-By']    = stanza['changed-by']
+        # remove comma between binaries which are inserted in *.dsc information
+        srcpkg.s['Bin']           = re.sub(", +", " ", stanza['binary'])
         try:
           srcpkg.s['Section']       = stanza['section']
           if stanza['section'].startswith('non-free'):
@@ -267,6 +271,12 @@
                 break
               if in_source:
                 in_source = 0
+                # we need to initialise some fields in the binary package
+                binpkg_changes = bin_pkg(value.split(' ')[0], srcpkg.s['Source'])
+                for key in ['Architecture', 'Component', 'Distribution', 'Version', 'Maintainer']:
+                  binpkg_changes.b[key]     = srcpkg.s[key]
+                binpkg_changes.b['Description']      = 'binary package information is missing in new queue'
+                binpkg_changes.b['Long_Description'] = '' # no long description available in *.changes file
               if binpkg:
                 binpkgs.append(binpkg)
               binpkg = bin_pkg(value, srcpkg.s['Source'])
@@ -276,17 +286,6 @@
               if in_source:
                 srcpkg.s[field]   = value
                 srcpkg.s['maintainer_name'], srcpkg.s['maintainer_email'] = email.Utils.parseaddr(srcpkg.s['Maintainer'])
-                # if bin_pkg_changes == None:
-                binpkg_changes.b[field] = value
-
-                binpkg_changes.b['Distribution']     = srcpkg.s['Distribution']
-                binpkg_changes.b['Description']      = 'binary package information is missing in new queue'
-                binpkg_changes.b['Long_Description'] = '' # no long description available in *.changes file
-                binpkg_changes.b['Component']        = srcpkg.s['Component']
-                binpkg_changes.b['Architecture']     = srcpkg.s['Architecture']
-                binpkg_changes.b['Version']          = srcpkg.s['Version']
-                binpkg_changes.b['Maintainer']       = srcpkg.s['Maintainer']
-
               else:
                 binpkg.b[field]   = value
               print >>srco, "%s: %s" % (field, value)
@@ -300,8 +299,6 @@
             elif field == 'Architecture':
               if in_source:
                 srcpkg.s[field] = value
-                #*** if binpkg_changes != None:
-                binpkg_changes.b[field] = value
               else:
                 binpkg.b[field] = value
               print >>srco, "%s: %s" % (field, value)
@@ -318,8 +315,6 @@
                   print >>stderr, "Incompatible version numbers between new.822(%s) and %s.html (%s)" % \
                       (srcpkg.s[field], src_info_base, value)
                 srcpkg.s[field]         = value
-                if binpkg_changes != None:
-                  binpkg_changes.b[field] = value
               else:
                 binpkg.b[field]   = value
               print >>srco, "%s: %s" % (field, value)
@@ -372,18 +367,19 @@
                     (src_info_base, value)
             elif field == 'Binary':
               if in_source:
-                # Binaries are mentioned in different syntax in *.changes and *.dsc
-                value = re.sub(", +", " ", value)
+                # Remove ',' in *.dsc information (not needed in *.changes)
+                value = re.sub(", +", " ", value)   # !!!!
               if self.check_existing_binaries(value.split(' '), srcpkg.s['Queue']):
                 srcpkg.s['Queue'] = 'ignore'
                 break
               if in_source:
-                if srcpkg.s['Bin'] != () and value != srcpkg.s['Bin']:
+                # if srcpkg.s['Bin'] != () and value != srcpkg.s['Bin']:
+                # Sometimes the order of multi binary packages is different - it is sufficient
+                # to assume that the package names are the same if the strings are equally long
+                if srcpkg.s['Bin'] != () and len(value) != len(srcpkg.s['Bin']):
                   print >>stderr, "Incompatible binaries between new.822(%s) and %s.html (%s)" % \
                       (srcpkg.s['Bin'], src_info_base, value)
                 srcpkg.s['Bin'] = value
-                # we need to initialise some fields in the binary package and the 'Maintainer' field is the last of them mentioned in the ftpnew formatted files
-                binpkg_changes = bin_pkg(value.split(' ')[0], srcpkg.s['Source'])
                 print >>srco, "%s: %s" % (field, value)
               else:
                 print >>stderr, "Binary should not mention Binary field in %s.html (%s)" % \
@@ -459,7 +455,10 @@
                     %(Vcs-Type)s, %(Vcs-Url)s, %(Vcs-Browser)s,
                     %(Section)s, %(Distribution)s, %(Component)s, %(Closes)s, %(License)s,
                     %(Last_modified)s, %(Queue)s)"""
-          cur.execute(query, srcpkg.s)
+          try:
+            cur.execute(query, srcpkg.s)
+          except ProgrammingError, err:
+            print "ProgrammingError", err, "\n", query, "\n", srcpkg.s
           for binpkg in binpkgs:
             # print binpkg
             if not binpkg:

Modified: udd/udd/i18n_apps_gatherer.py
===================================================================
--- udd/udd/i18n_apps_gatherer.py	2010-03-13 22:00:28 UTC (rev 1722)
+++ udd/udd/i18n_apps_gatherer.py	2010-03-14 19:46:59 UTC (rev 1723)
@@ -19,7 +19,8 @@
 
 debug=0
 
-check_char_re = re.compile('&#[0-9][0-9][0-9];')
+check_char_re               = re.compile('&#[0-9][0-9][0-9];')
+parse_translation_status_re = re.compile('^(\d+)t(\d+)f(\d+)u$')
 
 def replace_special_char(string):
   if not check_char_re.search(string):
@@ -43,18 +44,16 @@
     self.release          = release
     self.version          = ''
     self.maintainer       = ''
-    self.po_info          = {}
-    self.debconfpo_info   = {}
 
   def __str__(self):
     return "Package %s: %s, %s\n%s" % \
-        (self.package, self.maintainer, self.version, self.po_info)
+        (self.package, self.maintainer, self.version)
 
 class po_info():
   def __init__(self, poline):
     po = poline.strip().split('!')
     # ignore .pot and .templates files
-    if po[0].endswith('.pot') or po[0].endswith('.templates'):
+    if not po[0].endswith('.po'):
       # or po[1].startswith('_') :
       self.infofields = 0
       return
@@ -68,7 +67,15 @@
       print >>stderr, "Invalid language '%s'. Po filename is %s." % (self.language, self.po_file)
       self.infofields = 0
       return
-    self.ID               = po[2]       # Need to ask Nicolas for the meaning of this
+    match = parse_translation_status_re.match(po[2])
+    if not match:
+      self.translated   = 'NULL'
+      self.fuzzy        = 'NULL'
+      self.untranslated = 'NULL'
+    else:
+      self.translated   = match.groups()[0]
+      self.fuzzy        = match.groups()[1]
+      self.untranslated = match.groups()[2]
     self.pkg_version_lang = po[3]       # Meaning is unclear
 
     # sometimes language translation team is missing
@@ -97,16 +104,71 @@
 
     cur = self.cursor()
     # create prepared statements here!
-    query = """PREPARE i18n_apps_insert
-                   (text, text, text, text, text, text, text, text, text, text)
+    query = """PREPARE %s_insert
+                   (text, text, text, text, text, text, text, text, text, int, int, int)
                 AS INSERT INTO %s
                    (package, version, release, maintainer, po_file, language,
-                    id, pkg_version_lang, last_translator, language_team)
-                    VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)""" % (my_config['table_apps'])
-    cur.execute(query)
+                    pkg_version_lang, last_translator, language_team,
+                    translated, fuzzy, untranslated)
+                    VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)"""
+    cur.execute(query % (my_config['table_apps'], my_config['table_apps']))
+    cur.execute(query % (my_config['table_debconf'], my_config['table_debconf']))
 
     pkg = None
 
+  def parse_po_infoline(self, po_type, data):
+    cur = self.cursor()
+
+    if po_type == 'PO':
+      target_table = self.my_config['table_apps']
+    elif po_type == 'PODEBCONF':
+      target_table = self.my_config['table_debconf']
+    else:
+      print >>stderr, "Wrong PO type %s ignored." % po_type
+      return
+
+    po_info_dict = {}
+    for poline in data[po_type].split("\n"):
+      # ignore first empty line
+      if len(poline) <= 1:
+        continue
+      poinfo = po_info(poline)
+      if poinfo.infofields == 0:
+        continue
+      # Sometimes there is more than one po file in a package.  We inject the file
+      # which contains better info about translator
+      # Attention: For the current application it is completely sufficient that we
+      #            keep the information *that* a package contains translation for
+      #            a certain package in UDD.  Other applications might need more
+      #            complete information.
+      if po_info_dict.has_key(poinfo.language):
+        po_info_dict[poinfo.language] = max(po_info_dict[poinfo.language], poinfo)
+      else:
+        po_info_dict[poinfo.language] = poinfo
+
+    for lang in po_info_dict.keys():
+      poinfo = po_info_dict[lang]
+      query = "EXECUTE %s_insert (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % \
+                (target_table, \
+                 quote(self.pkg.package), quote(self.pkg.version), quote(self.pkg.release), \
+                 quote(self.pkg.maintainer), quote(poinfo.po_file), quote(poinfo.language), \
+                 quote(poinfo.pkg_version_lang), \
+                 quote(poinfo.last_translator), quote(poinfo.language_team), \
+                 poinfo.translated, poinfo.fuzzy, poinfo.untranslated)
+      try:
+        cur.execute(query)
+      except IntegrityError, err:
+        print str(err).strip()
+        print len(po), po, poline, self.pkg
+      except InternalError, err:
+        print "InternalError:", err
+        print len(po), po, poline, self.pkg, po_type
+        print query
+        exit(-1)
+      except UnicodeEncodeError, err:
+        print err
+        print query
+
   def run(self):
     my_config = self.my_config
     #start harassing the DB, preparing the final inserts and making place
@@ -141,46 +203,12 @@
             continue
           self.pkg.version     = stanza['Version']
           self.pkg.maintainer  = stanza['Maintainer']
+
           if stanza.has_key('PO'):
-            for poline in stanza['PO'].split("\n"):
-              # ignore first empty line
-              if len(poline) <= 1:
-                continue
-              poinfo = po_info(poline)
-              if poinfo.infofields == 0:
-                continue
-              # Sometimes there is more than one po file in a package.  We inject the file
-              # which contains better info about translator
-              # Attention: For the current application it is completely sufficient that we
-              #            keep the information *that* a package contains translation for
-              #            a certain package in UDD.  Other applications might need more
-              #            complete information.
-              if self.pkg.po_info.has_key(poinfo.language):
-                self.pkg.po_info[poinfo.language] = max(self.pkg.po_info[poinfo.language], poinfo)
-              else:
-                self.pkg.po_info[poinfo.language] = poinfo
+            self.parse_po_infoline('PO', stanza)
+          if stanza.has_key('PODEBCONF'):
+            self.parse_po_infoline('PODEBCONF', stanza)
 
-            for lang in self.pkg.po_info.keys():
-              poinfo = self.pkg.po_info[lang]
-              query = "EXECUTE i18n_apps_insert (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % \
-                        (quote(self.pkg.package), quote(self.pkg.version), quote(self.pkg.release), \
-                         quote(self.pkg.maintainer), quote(poinfo.po_file), quote(poinfo.language), \
-                         quote(poinfo.ID), quote(poinfo.pkg_version_lang), \
-                         quote(poinfo.last_translator), quote(poinfo.language_team))
-
-              try:
-                cur.execute(query)
-              except IntegrityError, err:
-                print str(err).strip()
-                print len(po), po, poline, self.pkg
-              except InternalError, err:
-                print "InternalError:", err
-                print len(po), po, poline, self.pkg
-                print query
-                exit(-1)
-              except UnicodeEncodeError, err:
-                print err
-                print query
       except IOError, err:
         print >>stderr, "Error reading %s (%s)" % (file, err)
 




More information about the Collab-qa-commits mailing list