[Collab-qa-commits] r1642 - in udd: scripts udd
Andreas Tille
tille at alioth.debian.org
Wed Dec 16 20:21:58 UTC 2009
Author: tille
Date: 2009-12-16 20:21:58 +0000 (Wed, 16 Dec 2009)
New Revision: 1642
Modified:
udd/scripts/fetch_ddtp_translations.sh
udd/udd/ddtp_gatherer.py
Log:
Update ddtp data only in case of changes in the language translations. This saves a lot of time for the importer
Modified: udd/scripts/fetch_ddtp_translations.sh
===================================================================
--- udd/scripts/fetch_ddtp_translations.sh 2009-12-16 07:39:50 UTC (rev 1641)
+++ udd/scripts/fetch_ddtp_translations.sh 2009-12-16 20:21:58 UTC (rev 1642)
@@ -12,9 +12,13 @@
# rm -rf "$TARGETPATH"
for rel in $RELEASES; do
TARGETDIR="$TARGETPATH"/${rel}
- rm -rf "$TARGETDIR"
+ find "$TARGETPATH"/${rel} -name '*.md5' -exec mv '{}' '{}'.prev \;
+ rm -rf "$TARGETDIR"/*.gz
[ -d $TARGETDIR ] || mkdir -p $TARGETDIR
+ # store a copy of md5 sums of previous files
`dirname $0`/getlinks.pl "$HTTPMIRROR"/dists/${rel}/main/i18n/ "$TARGETPATH"/${rel} 'Translation-.*\.gz$'
+ # create md5 sums of translation files to enable deciding whether processing is needed or not
+ for zipfile in `find "$TARGETPATH"/${rel} -name '*.gz'` ; do md5sum $zipfile > "$TARGETPATH"/${rel}/`basename $zipfile .gz`.md5 ; done
# getlinks.pl always returns 0 independently from success so we have to verify that the target dir is
# not empty.
NUMFILES=`ls "$TARGETPATH"/${rel} | wc -l`
Modified: udd/udd/ddtp_gatherer.py
===================================================================
--- udd/udd/ddtp_gatherer.py 2009-12-16 07:39:50 UTC (rev 1641)
+++ udd/udd/ddtp_gatherer.py 2009-12-16 20:21:58 UTC (rev 1642)
@@ -17,6 +17,7 @@
from debian_bundle import deb822
from os import listdir, access, F_OK
from sys import stderr, exit
+from filecmp import cmp
import gzip
# import bz2
from psycopg2 import IntegrityError, InternalError
@@ -53,7 +54,7 @@
my_config = self.my_config
cur = self.cursor()
- query = "DELETE FROM %s" % my_config['table']
+ query = "PREPARE ddtp_delete (text, text) AS DELETE FROM %s WHERE release = $1::release AND language = $2" % my_config['table']
cur.execute(query)
query = """PREPARE ddtp_insert AS INSERT INTO %s
(package, distribution, component, release, language, version, description, long_description, md5sum)
@@ -63,7 +64,7 @@
query = """PREPARE ddtp_check_before_insert (text, text, text, text, text, text) AS
SELECT COUNT(*) FROM %s
WHERE package = $1 AND distribution = $2 AND component = $3 AND
- release = $4 AND language = $5 AND version = $6""" % (my_config['table'])
+ release = $4::release AND language = $5 AND version = $6""" % (my_config['table'])
cur.execute(query)
@@ -79,7 +80,7 @@
# version
# FROM packages
# WHERE package = $1 AND distribution = $2 AND component = $3 AND
-# release = $4
+# release = $4::release
# ) AS tmp GROUP BY full_description"""
# cur.execute(query)
@@ -102,6 +103,21 @@
if not match:
continue
lang = match.groups()[0]
+ md5file=dir + 'Translation-' + lang + '.md5'
+ try:
+ if ( cmp(md5file, md5file + '.prev' ) ):
+ print md5file + 'has not changed. No update needed.'
+ continue
+ else:
+ print md5file + 'changed. Go on updating language ' + lang
+ except OSError:
+ print 'md5file for ' + lang + ' missing, Go updating'
+
+ # Delete only records where we actually have Translation files. This
+ # prevents dump deletion of all data in case of broken downloads
+ query = "EXECUTE ddtp_delete ('%s', '%s')" % (rel, lang)
+ cur.execute(query)
+
descstring = 'Description-'+lang
g = gzip.GzipFile(dir + filename)
try:
More information about the Collab-qa-commits
mailing list