[Collab-qa-commits] r1642 - in udd: scripts udd

Andreas Tille tille at alioth.debian.org
Wed Dec 16 20:21:58 UTC 2009


Author: tille
Date: 2009-12-16 20:21:58 +0000 (Wed, 16 Dec 2009)
New Revision: 1642

Modified:
   udd/scripts/fetch_ddtp_translations.sh
   udd/udd/ddtp_gatherer.py
Log:
Update ddtp data only in case of changes in the language translations.  This saves a lot of time for the importer


Modified: udd/scripts/fetch_ddtp_translations.sh
===================================================================
--- udd/scripts/fetch_ddtp_translations.sh	2009-12-16 07:39:50 UTC (rev 1641)
+++ udd/scripts/fetch_ddtp_translations.sh	2009-12-16 20:21:58 UTC (rev 1642)
@@ -12,9 +12,13 @@
 # rm -rf "$TARGETPATH"
 for rel in $RELEASES; do
     TARGETDIR="$TARGETPATH"/${rel}
-    rm -rf "$TARGETDIR"
+    find "$TARGETPATH"/${rel} -name '*.md5' -exec mv '{}' '{}'.prev \;
+    rm -rf "$TARGETDIR"/*.gz
     [ -d $TARGETDIR ] || mkdir -p $TARGETDIR
+    # store a copy of md5 sums of previous files
     `dirname $0`/getlinks.pl "$HTTPMIRROR"/dists/${rel}/main/i18n/ "$TARGETPATH"/${rel} 'Translation-.*\.gz$'
+    # create md5 sums of translation files to enable deciding whether processing is needed or not
+    for zipfile in `find "$TARGETPATH"/${rel} -name '*.gz'` ; do md5sum $zipfile > "$TARGETPATH"/${rel}/`basename $zipfile .gz`.md5 ; done
     # getlinks.pl always returns 0 independently from success so we have to verify that the target dir is
     # not empty.
     NUMFILES=`ls "$TARGETPATH"/${rel} | wc -l`

Modified: udd/udd/ddtp_gatherer.py
===================================================================
--- udd/udd/ddtp_gatherer.py	2009-12-16 07:39:50 UTC (rev 1641)
+++ udd/udd/ddtp_gatherer.py	2009-12-16 20:21:58 UTC (rev 1642)
@@ -17,6 +17,7 @@
 from debian_bundle import deb822
 from os import listdir, access, F_OK
 from sys import stderr, exit
+from filecmp import cmp
 import gzip
 # import bz2
 from psycopg2 import IntegrityError, InternalError
@@ -53,7 +54,7 @@
     my_config = self.my_config
 
     cur = self.cursor()
-    query = "DELETE FROM %s" % my_config['table']
+    query = "PREPARE ddtp_delete (text, text) AS DELETE FROM %s WHERE release = $1::release AND language = $2" % my_config['table']
     cur.execute(query)
     query = """PREPARE ddtp_insert AS INSERT INTO %s
                    (package, distribution, component, release, language, version, description, long_description, md5sum)
@@ -63,7 +64,7 @@
     query = """PREPARE ddtp_check_before_insert (text, text, text, text, text, text) AS
                   SELECT COUNT(*) FROM %s
                     WHERE package = $1 AND distribution = $2 AND component = $3 AND
-                          release = $4 AND language = $5 AND version = $6""" % (my_config['table'])
+                          release = $4::release AND language = $5 AND version = $6""" % (my_config['table'])
     cur.execute(query)
 
 
@@ -79,7 +80,7 @@
 #                   version
 #                  FROM packages
 #                  WHERE package = $1 AND distribution = $2 AND component = $3 AND
-#                  release = $4
+#                  release = $4::release
 #               ) AS tmp GROUP BY full_description"""
 #    cur.execute(query)
 
@@ -102,6 +103,21 @@
         if not match:
           continue
         lang = match.groups()[0]
+        md5file=dir + 'Translation-' + lang + '.md5'
+        try:
+          if ( cmp(md5file, md5file + '.prev' ) ):
+            print md5file + 'has not changed.  No update needed.'
+            continue
+          else:
+            print md5file + 'changed.  Go on updating language ' + lang
+        except OSError:
+          print 'md5file for ' + lang + ' missing,  Go updating'
+
+        # Delete only records where we actually have Translation files.  This
+        # prevents dump deletion of all data in case of broken downloads
+        query = "EXECUTE ddtp_delete ('%s', '%s')" % (rel, lang)
+        cur.execute(query)
+
         descstring = 'Description-'+lang
         g = gzip.GzipFile(dir + filename)
         try:




More information about the Collab-qa-commits mailing list