[Collab-qa-commits] r1697 - udd/udd
Andreas Tille
tille at alioth.debian.org
Wed Feb 24 22:18:15 UTC 2010
Author: tille
Date: 2010-02-24 22:18:15 +0000 (Wed, 24 Feb 2010)
New Revision: 1697
Modified:
udd/udd/ddtp_gatherer.py
Log:
Better handling of double entries
Modified: udd/udd/ddtp_gatherer.py
===================================================================
--- udd/udd/ddtp_gatherer.py 2010-02-24 09:01:23 UTC (rev 1696)
+++ udd/udd/ddtp_gatherer.py 2010-02-24 22:18:15 UTC (rev 1697)
@@ -22,7 +22,7 @@
# import bz2
from psycopg2 import IntegrityError, InternalError
-online=0
+debug=0
def get_gatherer(connection, config, source):
return ddtp_gatherer(connection, config, source)
@@ -31,7 +31,7 @@
def __init__(self, package, release, language):
self.package = package
self.distribution = 'debian' # No DDTP translations for debian-backports / debian-volatile
- self.release = release # sid for the moment
+ self.release = release
self.component = 'main' # Only main translated for the moment
self.language = language
self.description = ''
@@ -67,23 +67,34 @@
release = $4 AND language = $5 AND version = $6""" % (my_config['table'])
cur.execute(query)
+ query = """PREPARE ddtp_get_duplicate (text, text, text, text, text, text) AS
+ SELECT description, long_description, md5sum FROM %s
+ WHERE package = $1 AND distribution = $2 AND component = $3 AND
+ release = $4 AND language = $5 AND version = $6""" % (my_config['table'])
+ cur.execute(query)
- # Query for english package description, its md5 sum and package version
-# Not used any more because the Translation files now contain version numbers
-# but keep the query as comment to store the knowledge how to calculate MD5 sums
-# for the descriptions for possible later use
-# query = """PREPARE ddtp_packages_recieve_description_md5 AS
-# SELECT md5(full_description || E'\n' ) AS md5,
-# full_description, MAX(version) AS version FROM (
-# SELECT DISTINCT
-# description || E'\n' || long_description AS full_description,
-# version
-# FROM packages
-# WHERE package = $1 AND distribution = $2 AND component = $3 AND
-# release = $4
-# ) AS tmp GROUP BY full_description"""
-# cur.execute(query)
+ # Query for english package description of the i386 architecture because this is the
+ # most popular arch. In case a package description might differ in very seldom cases
+ # we put the translation of the i386 architecture into UDD
+ query = """PREPARE ddtp_packages_recieve_description_md5 (text, text, text, text, text) AS
+ SELECT md5(full_description || E'\n' ) AS md5,
+ full_description FROM (
+ SELECT DISTINCT
+ description || E'\n' || long_description AS full_description
+ FROM packages
+ WHERE package = $1 AND distribution = $2 AND component = $3 AND
+ release = $4 AND version = $5 AND architecture = 'i386'
+ ) AS tmp GROUP BY full_description"""
+ cur.execute(query)
+ # In some cases a just imported translation has to be removed again because
+ # of a further translation which matches MD5 sum of i386 architecture
+ query = """PREPARE ddtp_delete_duplicate(text, text, text, text, text, text) AS
+ DELETE FROM %s
+ WHERE package = $1 AND distribution = $2 AND component = $3 AND
+ release = $4 AND language = $5 AND version = $6""" % (my_config['table'])
+ cur.execute(query)
+
pkg = None
def run(self):
@@ -106,10 +117,12 @@
md5file=dir + 'Translation-' + lang + '.md5'
try:
if ( cmp(md5file, md5file + '.prev' ) ):
- print md5file + 'has not changed. No update needed.'
+ if debug:
+ print md5file + ' has not changed. No update needed.'
continue
else:
- print md5file + 'changed. Go on updating language ' + lang
+ if debug:
+ print md5file + ' changed. Go on updating language ' + lang
except OSError:
print 'md5file for ' + lang + ' missing, Go updating'
@@ -135,10 +148,41 @@
self.pkg.release, self.pkg.language, self.pkg.version)
cur.execute(query)
if cur.fetchone()[0] > 0:
- print >>stderr, "Duplicated key in language %s: " % self.pkg.language, \
- self.pkg.package, self.pkg.distribution, self.pkg.component, self.pkg.release, \
- self.pkg.version, self.pkg.description, self.pkg.md5sum
- continue
+ if debug > 0:
+ print >>stderr, "Just imported key in language %s: " % self.pkg.language, \
+ self.pkg.package, self.pkg.distribution, self.pkg.component, self.pkg.release, \
+ self.pkg.version, self.pkg.description, self.pkg.md5sum
+
+ query = " EXECUTE ddtp_packages_recieve_description_md5 ('%s', '%s', '%s', '%s', '%s')" % \
+ (self.pkg.package, self.pkg.distribution, self.pkg.component, \
+ self.pkg.release, self.pkg.version)
+ cur.execute(query)
+ if cur.rowcount <= 0:
+ print >>stderr, "Did not found descriptopn for i386 in", self.pkg.package, self.pkg.distribution, self.pkg.component, \
+ self.pkg.release, self.pkg.version
+ # print >>stderr, query
+ continue
+
+ md5sum = cur.fetchone()[0]
+ if md5sum.startswith(self.pkg.md5sum):
+ if debug > 0:
+ print >>stderr, "Correkt translation is just in the Database."
+ continue
+
+ query = "EXECUTE ddtp_get_duplicate ('%s', '%s', '%s', '%s', '%s', '%s')" % \
+ (self.pkg.package, self.pkg.distribution, self.pkg.component, \
+ self.pkg.release, self.pkg.language, self.pkg.version)
+ cur.execute(query)
+ # print >>stderr, "Other translations:"
+ for r in cur.fetchall():
+ # print >>stderr, r[0], r[2]
+ if md5sum.startswith(r[2]):
+ # print >>stderr, "This translation matches, delete existing translation", md5sum
+ query = "EXECUTE ddtp_delete_duplicate ('%s', '%s', '%s', '%s', '%s', '%s')" % \
+ (self.pkg.package, self.pkg.distribution, self.pkg.component, \
+ self.pkg.release, self.pkg.language, self.pkg.version)
+ cur.execute(query)
+
query = "EXECUTE ddtp_insert (%s, '%s', '%s', '%s', '%s', '%s', %s, %s, %s)" % \
(quote(self.pkg.package), self.pkg.distribution, self.pkg.component, self.pkg.release, \
self.pkg.language, self.pkg.version, quote(self.pkg.description), \
More information about the Collab-qa-commits
mailing list