[Collab-qa-commits] r2163 - in udd: . sql udd

Andreas Tille tille at alioth.debian.org
Sun Feb 26 13:05:23 UTC 2012


Author: tille
Date: 2012-02-26 13:05:23 +0000 (Sun, 26 Feb 2012)
New Revision: 2163

Modified:
   udd/config-org.yaml
   udd/sql/setup.sql
   udd/sql/upgrade.sql
   udd/udd/ddtp_gatherer.py
   udd/udd/packages_gatherer.py
Log:
Finally also distribution needs to be gathered inside descriptions table because the injection of squeeze and squeeze-backports would just let the last one win otherwise


Modified: udd/config-org.yaml
===================================================================
--- udd/config-org.yaml	2012-02-24 14:48:55 UTC (rev 2162)
+++ udd/config-org.yaml	2012-02-26 13:05:23 UTC (rev 2163)
@@ -871,7 +871,8 @@
    path: /org/udd.debian.org/mirrors/ddtp
    files: Translation-.*\.gz
    mirrorpath: /org/mirrors/ftp.debian.org/ftp/dists
-   table: descriptions
+   descriptions-table: descriptions
+   imports-table: description_imports
    schema: ddtp
 
 ftpnew:

Modified: udd/sql/setup.sql
===================================================================
--- udd/sql/setup.sql	2012-02-24 14:48:55 UTC (rev 2162)
+++ udd/sql/setup.sql	2012-02-26 13:05:23 UTC (rev 2163)
@@ -602,38 +602,41 @@
 GRANT SELECT ON all_bugs TO PUBLIC;
 
 CREATE TABLE descriptions (
-       package      text not null,
-       release      text not null,
-       component    text not null,
-       language     text not null,
-       description  text not null,
-       long_description text not null,
-       description_md5  text not null, -- md5 sum of the original English description
-    PRIMARY KEY (package, release, component, language, description, description_md5)
+    package          text not null,
+    distribution     text not null,
+    release          text not null,
+    component        text not null,
+    language         text not null,
+    description      text not null,
+    long_description text not null,
+    description_md5  text not null, -- md5 sum of the original English description
+    PRIMARY KEY (package, distribution, release, component, language, description, description_md5)
 );
 GRANT SELECT ON descriptions TO PUBLIC;
 
 CREATE TABLE ubuntu_descriptions (
-       package      text not null,
-       release      text not null,
-       component    text not null,
-       language     text not null,
-       description  text not null,
-       long_description text not null,
-       description_md5  text not null, -- md5 sum of the original English description
-    PRIMARY KEY (package, release, component, language, description, description_md5)
+    package          text not null,
+    distribution     text not null,
+    release          text not null,
+    component        text not null,
+    language         text not null,
+    description      text not null,
+    long_description text not null,
+    description_md5  text not null, -- md5 sum of the original English description
+    PRIMARY KEY (package, distribution, release, component, language, description, description_md5)
 );
 GRANT SELECT ON ubuntu_descriptions TO PUBLIC;
 
 CREATE TABLE derivatives_descriptions (
-       package      text not null,
-       release      text not null,
-       component    text not null,
-       language     text not null,
-       description  text not null,
-       long_description text not null,
-       description_md5  text not null, -- md5 sum of the original English description
-    PRIMARY KEY (package, release, component, language, description, description_md5)
+    package          text not null,
+    distribution     text not null,
+    release          text not null,
+    component        text not null,
+    language         text not null,
+    description      text not null,
+    long_description text not null,
+    description_md5  text not null, -- md5 sum of the original English description
+    PRIMARY KEY (package, distribution, release, component, language, description, description_md5)
 );
 GRANT SELECT ON derivatives_descriptions TO PUBLIC;
 
@@ -641,13 +644,14 @@
 -- was imported previousely and thus reducing workload on UDD host in
 -- preventing doing duplicate work
 CREATE TABLE description_imports (
-    release                     text,
-    component                   text,
-    language                    text,
-    translationfile             text,
-    translationfile_sha1        text,
-    import_date                 timestamp default now(),
-    PRIMARY KEY (release, component, language)
+    distribution         text not null,
+    release              text not null,
+    component            text not null,
+    language             text not null,
+    translationfile      text not null,
+    translationfile_sha1 text not null,
+    import_date          timestamp default now(),
+    PRIMARY KEY (distribution, release, component, language)
 );
 
 -- active_dds view

Modified: udd/sql/upgrade.sql
===================================================================
--- udd/sql/upgrade.sql	2012-02-24 14:48:55 UTC (rev 2162)
+++ udd/sql/upgrade.sql	2012-02-26 13:05:23 UTC (rev 2163)
@@ -367,59 +367,70 @@
 -- Finally assemble necessary changes for description-less packages file
 --   - ddtp table renamed to descriptions
 --   - add ubuntu_descriptions & derivative_descriptions
---   - fix primary key constraint (It is necessary to add component because
---     there is at least one case (clustalw) which has the same
---     (package,release, language, description, description_md5)
---     key because it was freed in squeeze-backports and thus
---     component needs to be regarded
-DROP TABLE ddtp;
+--   - fix primary key constraint (It is necessary to add
+--      - component because  there is at least one case (clustalw) which
+--        has the same
+--         (package, release, language, description, description_md5)
+--        key because it was freed in squeeze-backports and thus
+--        component needs to be regarded
+--      - distribution because the subsequent import of debian-squeeze
+--        and debian-backports-squeeze deletes descriptions of the previous
+--        one so the last imported distribution would win
+DROP TABLE IF EXISTS  ddtp;
+DROP TABLE IF EXISTS  descriptions;
 CREATE TABLE descriptions (
-       package      text not null,
-       release      text not null,
-       component    text not null,
-       language     text not null,
-       description  text not null,
-       long_description text not null,
-       description_md5  text not null, -- md5 sum of the original English description
-    PRIMARY KEY (package, release, component, language, description, description_md5)
+    package          text not null,
+    distribution     text not null,
+    release          text not null,
+    component        text not null,
+    language         text not null,
+    description      text not null,
+    long_description text not null,
+    description_md5  text not null, -- md5 sum of the original English description
+    PRIMARY KEY (package, distribution, release, component, language, description, description_md5)
 );
 GRANT SELECT ON descriptions TO PUBLIC;
-DROP TABLE ubuntu_descriptions;
+
+DROP TABLE IF EXISTS ubuntu_descriptions;
 CREATE TABLE ubuntu_descriptions (
-       package      text not null,
-       release      text not null,
-       component    text not null,
-       language     text not null,
-       description  text not null,
-       long_description text not null,
-       description_md5  text not null, -- md5 sum of the original English description
-    PRIMARY KEY (package, release, component, language, description, description_md5)
+    package          text not null,
+    distribution     text not null,
+    release          text not null,
+    component        text not null,
+    language         text not null,
+    description      text not null,
+    long_description text not null,
+    description_md5  text not null, -- md5 sum of the original English description
+    PRIMARY KEY (package, distribution, release, component, language, description, description_md5)
 );
 GRANT SELECT ON ubuntu_descriptions TO PUBLIC;
 
-DROP TABLE derivatives_descriptions;
+DROP TABLE IF EXISTS derivatives_descriptions;
 CREATE TABLE derivatives_descriptions (
-       package      text not null,
-       release      text not null,
-       component    text not null,
-       language     text not null,
-       description  text not null,
-       long_description text not null,
-       description_md5  text not null, -- md5 sum of the original English description
-    PRIMARY KEY (package, release, component, language, description, description_md5)
+    package          text not null,
+    distribution     text not null,
+    release          text not null,
+    component        text not null,
+    language         text not null,
+    description      text not null,
+    long_description text not null,
+    description_md5  text not null, -- md5 sum of the original English description
+    PRIMARY KEY (package, distribution, release, component, language, description, description_md5)
 );
 GRANT SELECT ON derivatives_descriptions TO PUBLIC;
 
 -- Add house keeping table to enable deciding whether some translation file
 -- was imported previousely and thus reducing workload on UDD host in
 -- preventing doing duplicate work
+DROP TABLE IF EXISTS description_imports;
 CREATE TABLE description_imports (
-    release			text,
-    component			text,
-    language			text,
-    translationfile		text,
-    translationfile_sha1	text,
-    import_date			timestamp default now(),
-    PRIMARY KEY (release, component, language)
+    distribution         text not null,
+    release              text not null,
+    component            text not null,
+    language             text not null,
+    translationfile      text not null,
+    translationfile_sha1 text not null,
+    import_date          timestamp default now(),
+    PRIMARY KEY (distribution, release, component, language)
 );
 

Modified: udd/udd/ddtp_gatherer.py
===================================================================
--- udd/udd/ddtp_gatherer.py	2012-02-24 14:48:55 UTC (rev 2162)
+++ udd/udd/ddtp_gatherer.py	2012-02-26 13:05:23 UTC (rev 2163)
@@ -56,8 +56,9 @@
 
   def __init__(self, connection, config, source):
     gatherer.__init__(self, connection, config, source)
-    self.assert_my_config('path', 'files', 'mirrorpath', 'table')
-    my_config = self.my_config
+    # The ID for the distribution we want to include
+    self._distr = None
+    self.assert_my_config('path', 'files', 'mirrorpath', 'descriptions-table', 'imports-table')
     self.log = logging.getLogger(self.__class__.__name__)
     if debug==1:
 	self.log.setLevel(logging.DEBUG)
@@ -69,55 +70,53 @@
     self.log.addHandler(handler)
 
     cur = self.cursor()
-    query = "PREPARE ddtp_delete (text, text) AS DELETE FROM %s WHERE release = $1 AND component = $2 AND language = $3" % my_config['table']
-    cur.execute(query)
 
-    query = """PREPARE ddtp_insert AS INSERT INTO %s
-                   (package, release, component, language, description, long_description, description_md5)
-                    VALUES ($1, $2, $3, $4, $5, $6, $7)""" % (my_config['table'])
-    cur.execute(query)
+    pkg = None
 
-    query = """PREPARE ddtp_check_before_insert (text, text, text, text, text, text) AS
-                  SELECT COUNT(*) FROM %s
-                    WHERE package = $1 AND release = $2 AND component = $3 AND language = $4 AND 
-                          description = $5 AND description_md5 = $6""" % (my_config['table'])
-    cur.execute(query)
+  def run(self):
+    src_cfg = self.my_config
+    
+    table = src_cfg['descriptions-table']
+    imports = src_cfg['imports-table']
+    
+    # Set distribution ID
+    # FIXME: This simple and straightforeward setting does only work
+    #        as long as backports and security does not contain i18n files
+    #        The right way to go would be to read the config file again fo
+    #        each release below
+    self._distr = 'debian'
 
+    cur = self.cursor()
     query = """PREPARE ddtp_check_previous_import (text, text, text) AS
-                  SELECT translationfile_sha1, import_date FROM description_imports
-                    WHERE release = $1 AND component = $2 AND language = $3"""
+                  SELECT translationfile_sha1, import_date FROM %s
+                    WHERE distribution = '%s' AND release = $1 AND component = $2 AND language = $3""" \
+            % (imports, self._distr)
     cur.execute(query)
-
     query = """PREPARE ddtp_update_current_import (text, text, text) AS
-                  UPDATE description_imports SET import_date = now() WHERE release = $1 AND component = $2 AND language = $3"""
+                  UPDATE %s SET import_date = now() WHERE distribution = '%s' AND release = $1 AND component = $2 AND language = $3""" \
+            % (imports, self._distr)
     cur.execute(query)
-
     query = """PREPARE ddtp_insert_current_import (text, text, text, text, text) AS
-                  INSERT INTO description_imports (release, component, language, translationfile, translationfile_sha1)
-                         VALUES ($1, $2, $3, $4, $5)"""
+                  INSERT INTO %s (distribution, release, component, language, translationfile, translationfile_sha1)
+                         VALUES ('%s', $1, $2, $3, $4, $5)""" \
+            % (imports, self._distr)
     cur.execute(query)
 
-    pkg = None
-
-  def run(self):
-    my_config = self.my_config
-    cur = self.cursor()
-
     cur.execute('SELECT component FROM packages GROUP by component')
     rows = cur.fetchall()
     valid_components = []
     for r in rows:
         valid_components.append(r[0])
-    releases = listdir(my_config['path'])
+    releases = listdir(src_cfg['path'])
     for rel in releases:
-      cpath = my_config['path']+'/'+rel+'/'
+      cpath = src_cfg['path']+'/'+rel+'/'
       components = listdir(cpath)
       for comp in components:
         if comp not in valid_components:
           self.log.error("Invallid component '%s' file found in %s", comp, cpath)
           continue
         cfp = open(cpath+'/'+comp,'r')
-        trfilepath = my_config['mirrorpath']+'/'+rel+'/'+comp+'/i18n/'
+        trfilepath = src_cfg['mirrorpath']+'/'+rel+'/'+comp+'/i18n/'
         for line in cfp.readlines():
           (sha1, size, file) = line.strip().split(' ')
           trfile = trfilepath + file
@@ -149,6 +148,24 @@
             if prev_import[0] == sha1:
               self.log.info("File %s was imported at %s and has not changed since then" % (trfile, prev_import[1])) 
               continue
+
+          # Once it is sure that the file needs to be imported prepare the relevant statements
+          query = """PREPARE ddtp_delete (text) AS DELETE FROM %s
+                        WHERE distribution = '%s' AND release = '%s' AND component = '%s' AND language = $1""" \
+                     % ( table, self._distr, rel, comp )
+          cur.execute(query)
+          query = """PREPARE ddtp_check_before_insert (text, text, text, text) AS
+                        SELECT COUNT(*) FROM %s
+                        WHERE package = $1 AND distribution = '%s' AND release = '%s' AND component = '%s' AND
+                              language = $2 AND description = $3 AND description_md5 = $4""" \
+                     % (table, self._distr, rel, comp)
+          cur.execute(query)
+          query = """PREPARE ddtp_insert (text, text, text, text, text) AS INSERT INTO %s
+                        (package, distribution, release, component, language, description, long_description, description_md5)
+                        VALUES ($1, '%s', '%s', '%s', $2, $3, $4, $5)""" \
+                     % (table, self._distr, rel, comp)
+          cur.execute(query)
+
           self.import_translations(trfile, rel, comp, lang)
           if has_previous_import == True:
             cur.execute("EXECUTE ddtp_update_current_import (%s, %s, %s)", (rel, comp, lang))
@@ -156,10 +173,12 @@
             cur.execute("EXECUTE ddtp_insert_current_import (%s, %s, %s, %s, %s)", (rel, comp, lang, trfile, sha1))
           # commit every successfully language to make sure we get any languages in and will not be blocked by a single failing import
           self.connection.commit()
+          cur.execute("DEALLOCATE ddtp_delete")
+          cur.execute("DEALLOCATE ddtp_check_before_insert")
+          cur.execute("DEALLOCATE ddtp_insert")
         cfp.close()
        
-    cur.execute("DEALLOCATE ddtp_insert")
-    cur.execute("ANALYZE %s" % my_config['table'])
+    cur.execute("ANALYZE %s" % table)
 
 
   def import_translations(self, trfile, rel, comp, lang):
@@ -168,8 +187,7 @@
         cur = self.cursor()
         # Delete only records where we actually have Translation files.  This
         # prevents dump deletion of all data in case of broken downloads
-        cur.execute('EXECUTE ddtp_delete (%s, %s, %s)', (rel, comp, lang))
-        self.log.debug('EXECUTE ddtp_delete (%s, %s, %s)', (rel, comp, lang))
+        self.log.debug('EXECUTE ddtp_delete (%s)', (lang))
 
         i18n_error_flag=0
         descstring = 'Description-'+lang
@@ -196,16 +214,16 @@
             for line in lines[1:]:
               self.pkg.long_description += line + "\n"
 
-            paramtuple = (self.pkg.package, self.pkg.release, self.pkg.component, self.pkg.language, self.pkg.description, self.pkg.description_md5)
-            cur.execute('EXECUTE ddtp_check_before_insert (%s, %s, %s, %s, %s, %s)', paramtuple)
+            paramtuple = (self.pkg.package, self.pkg.language, self.pkg.description, self.pkg.description_md5)
+            cur.execute('EXECUTE ddtp_check_before_insert (%s, %s, %s, %s)', paramtuple)
             if cur.fetchone()[0] > 0:
-              self.log.error("Duplicated key for package %s in release %s component %s in language %s: %s / %s" % paramtuple)
+              self.log.error("Duplicated key for package %s in release %s component %s in language %s: %s / %s" % \
+                (self.pkg.package, rel, comp, self.pkg.language, self.pkg.description, self.pkg.description_md5))
             else:
-              query = 'EXECUTE ddtp_insert (%s, %s, %s, %s, %s, %s, %s)'
+              query = 'EXECUTE ddtp_insert (%s, %s, %s, %s, %s)'
               try:
                 self.log.debug(query, tuple([quote(item) for item in paramtuple]))
-                cur.execute(query, (self.pkg.package, self.pkg.release, self.pkg.component, self.pkg.language, self.pkg.description, self.pkg.long_description, self.pkg.description_md5))
-                # self.connection.commit() # commit every single insert as long as translation files are featuring duplicated keys
+                cur.execute(query, (self.pkg.package, self.pkg.language, self.pkg.description, self.pkg.long_description, self.pkg.description_md5))
               except IntegrityError, err:
                 self.log.exception("Duplicated key in language %s: (%s)", self.pkg.language,
                                  ", ".join([to_unicode(item) for item in (self.pkg.package, self.pkg.release, self.pkg.component, self.pkg.description, self.pkg.description_md5)]))

Modified: udd/udd/packages_gatherer.py
===================================================================
--- udd/udd/packages_gatherer.py	2012-02-24 14:48:55 UTC (rev 2162)
+++ udd/udd/packages_gatherer.py	2012-02-26 13:05:23 UTC (rev 2163)
@@ -195,8 +195,8 @@
       # For releases that have long descriptions in Packages and not in Translation,
       # add the description to the configured descriptions table.
       if self.add_descriptions:
-        cur.execute("DELETE FROM %s WHERE release = '%s' AND component = '%s' AND language = '%s'" %\
-          (src_cfg['descriptions-table'], src_cfg['release'], comp, 'en'))
+        cur.execute("DELETE FROM %s WHERE distribution = '%s' AND release = '%s' AND component = '%s' AND language = '%s'" %\
+          (src_cfg['descriptions-table'], self._distr, src_cfg['release'], comp, 'en'))
       for arch in src_cfg['archs']:
 	path = os.path.join(src_cfg['directory'], comp, 'binary-' + arch, 'Packages.gz')
 	try:
@@ -216,8 +216,8 @@
           if self.add_descriptions:
             cur.execute("""PREPARE description_insert AS
               INSERT INTO %s
-                (package, release, component, language, description, long_description, description_md5)
-                (SELECT $1 AS package, '%s' AS release, '%s' AS component, $2 AS language,
+                (package, distribution, release, component, language, description, long_description, description_md5)
+                (SELECT $1 AS package, '%s' AS distribution, '%s' AS release, '%s' AS component, $2 AS language,
                         $3 AS description, $4 AS long_description, $5 AS description_md5
                   WHERE NOT EXISTS
                   (SELECT 1
@@ -225,7 +225,7 @@
                     WHERE package=$1 AND release='%s' AND component='%s' AND language=$2 AND
                           description=$3 AND long_description=$4 AND description_md5=$5))
 
-            """ % (src_cfg['descriptions-table'], src_cfg['release'], comp,
+            """ % (src_cfg['descriptions-table'], self._distr, src_cfg['release'], comp,
                     src_cfg['descriptions-table'], src_cfg['release'], comp))
 #	  aux.print_debug("Reading file " + path)
 	  # Copy content from gzipped file to temporary file, so that apt_pkg is




More information about the Collab-qa-commits mailing list