[Collab-qa-commits] r1723 - in udd: sql udd
Andreas Tille
tille at alioth.debian.org
Sun Mar 14 19:46:59 UTC 2010
Author: tille
Date: 2010-03-14 19:46:59 +0000 (Sun, 14 Mar 2010)
New Revision: 1723
Modified:
udd/sql/i18n-apps.sql
udd/udd/ftpnew_gatherer.py
udd/udd/i18n_apps_gatherer.py
Log:
Fix parsing problem in ftpnew, make i18n-apps usable
Modified: udd/sql/i18n-apps.sql
===================================================================
--- udd/sql/i18n-apps.sql 2010-03-13 22:00:28 UTC (rev 1722)
+++ udd/sql/i18n-apps.sql 2010-03-14 19:46:59 UTC (rev 1723)
@@ -8,10 +8,12 @@
po_file text,
-- *.pot ignorieren!
language text,
- ID text, -- no idea what this field means
pkg_version_lang text, -- no idea what sense this field makes
last_translator text,
language_team text,
+ translated int,
+ fuzzy int,
+ untranslated int,
PRIMARY KEY (package, version, release, language)
);
@@ -29,6 +31,9 @@
pkg_version_lang text, -- no idea what sense this field makes
last_translator text,
language_team text,
+ translated int,
+ fuzzy int,
+ untranslated int,
PRIMARY KEY (package, version, release, language)
);
Modified: udd/udd/ftpnew_gatherer.py
===================================================================
--- udd/udd/ftpnew_gatherer.py 2010-03-13 22:00:28 UTC (rev 1722)
+++ udd/udd/ftpnew_gatherer.py 2010-03-14 19:46:59 UTC (rev 1723)
@@ -15,7 +15,7 @@
import email.Utils
import re
from time import ctime
-from psycopg2 import IntegrityError
+from psycopg2 import IntegrityError, ProgrammingError
def get_gatherer(connection, config, source):
return ftpnew_gatherer(connection, config, source)
@@ -75,7 +75,9 @@
def __str__(self):
str = "Source %(Source)s: %(Version)s, (%(Architecture)s), %(Last_modified)s, %(Queue)s, %(Distribution)s" % \
(self.s)
- str += " %(maintainer_name)s <%(maintainer_email)s>, %(Closes)i" % (self.s)
+ if self.s.has_key('maintainer_name') and self.s.has_key('maintainer_email') and \
+ self.s.has_key('Closes'):
+ str += " %(maintainer_name)s <%(maintainer_email)s>, %(Closes)i" % (self.s)
return str
class bin_pkg():
@@ -211,6 +213,8 @@
srcpkg.s['Last_modified'] = ctime(int(stanza['last-modified'])) # We want a real time object instead of an epoch
srcpkg.s['Distribution'] = stanza['distribution']
srcpkg.s['Changed-By'] = stanza['changed-by']
+ # remove comma between binaries which are inserted in *.dsc information
+ srcpkg.s['Bin'] = re.sub(", +", " ", stanza['binary'])
try:
srcpkg.s['Section'] = stanza['section']
if stanza['section'].startswith('non-free'):
@@ -267,6 +271,12 @@
break
if in_source:
in_source = 0
+ # we need to initialise some fields in the binary package
+ binpkg_changes = bin_pkg(value.split(' ')[0], srcpkg.s['Source'])
+ for key in ['Architecture', 'Component', 'Distribution', 'Version', 'Maintainer']:
+ binpkg_changes.b[key] = srcpkg.s[key]
+ binpkg_changes.b['Description'] = 'binary package information is missing in new queue'
+ binpkg_changes.b['Long_Description'] = '' # no long description available in *.changes file
if binpkg:
binpkgs.append(binpkg)
binpkg = bin_pkg(value, srcpkg.s['Source'])
@@ -276,17 +286,6 @@
if in_source:
srcpkg.s[field] = value
srcpkg.s['maintainer_name'], srcpkg.s['maintainer_email'] = email.Utils.parseaddr(srcpkg.s['Maintainer'])
- # if bin_pkg_changes == None:
- binpkg_changes.b[field] = value
-
- binpkg_changes.b['Distribution'] = srcpkg.s['Distribution']
- binpkg_changes.b['Description'] = 'binary package information is missing in new queue'
- binpkg_changes.b['Long_Description'] = '' # no long description available in *.changes file
- binpkg_changes.b['Component'] = srcpkg.s['Component']
- binpkg_changes.b['Architecture'] = srcpkg.s['Architecture']
- binpkg_changes.b['Version'] = srcpkg.s['Version']
- binpkg_changes.b['Maintainer'] = srcpkg.s['Maintainer']
-
else:
binpkg.b[field] = value
print >>srco, "%s: %s" % (field, value)
@@ -300,8 +299,6 @@
elif field == 'Architecture':
if in_source:
srcpkg.s[field] = value
- #*** if binpkg_changes != None:
- binpkg_changes.b[field] = value
else:
binpkg.b[field] = value
print >>srco, "%s: %s" % (field, value)
@@ -318,8 +315,6 @@
print >>stderr, "Incompatible version numbers between new.822(%s) and %s.html (%s)" % \
(srcpkg.s[field], src_info_base, value)
srcpkg.s[field] = value
- if binpkg_changes != None:
- binpkg_changes.b[field] = value
else:
binpkg.b[field] = value
print >>srco, "%s: %s" % (field, value)
@@ -372,18 +367,19 @@
(src_info_base, value)
elif field == 'Binary':
if in_source:
- # Binaries are mentioned in different syntax in *.changes and *.dsc
- value = re.sub(", +", " ", value)
+ # Remove ',' in *.dsc information (not needed in *.changes)
+ value = re.sub(", +", " ", value) # !!!!
if self.check_existing_binaries(value.split(' '), srcpkg.s['Queue']):
srcpkg.s['Queue'] = 'ignore'
break
if in_source:
- if srcpkg.s['Bin'] != () and value != srcpkg.s['Bin']:
+ # if srcpkg.s['Bin'] != () and value != srcpkg.s['Bin']:
+ # Sometimes the order of multi binary packages is different - it is sufficient
+ # to assume that the package names are the same if the strings are equally long
+ if srcpkg.s['Bin'] != () and len(value) != len(srcpkg.s['Bin']):
print >>stderr, "Incompatible binaries between new.822(%s) and %s.html (%s)" % \
(srcpkg.s['Bin'], src_info_base, value)
srcpkg.s['Bin'] = value
- # we need to initialise some fields in the binary package and the 'Maintainer' field is the last of them mentioned in the ftpnew formatted files
- binpkg_changes = bin_pkg(value.split(' ')[0], srcpkg.s['Source'])
print >>srco, "%s: %s" % (field, value)
else:
print >>stderr, "Binary should not mention Binary field in %s.html (%s)" % \
@@ -459,7 +455,10 @@
%(Vcs-Type)s, %(Vcs-Url)s, %(Vcs-Browser)s,
%(Section)s, %(Distribution)s, %(Component)s, %(Closes)s, %(License)s,
%(Last_modified)s, %(Queue)s)"""
- cur.execute(query, srcpkg.s)
+ try:
+ cur.execute(query, srcpkg.s)
+ except ProgrammingError, err:
+ print "ProgrammingError", err, "\n", query, "\n", srcpkg.s
for binpkg in binpkgs:
# print binpkg
if not binpkg:
Modified: udd/udd/i18n_apps_gatherer.py
===================================================================
--- udd/udd/i18n_apps_gatherer.py 2010-03-13 22:00:28 UTC (rev 1722)
+++ udd/udd/i18n_apps_gatherer.py 2010-03-14 19:46:59 UTC (rev 1723)
@@ -19,7 +19,8 @@
debug=0
-check_char_re = re.compile('&#[0-9][0-9][0-9];')
+check_char_re = re.compile('&#[0-9][0-9][0-9];')
+parse_translation_status_re = re.compile('^(\d+)t(\d+)f(\d+)u$')
def replace_special_char(string):
if not check_char_re.search(string):
@@ -43,18 +44,16 @@
self.release = release
self.version = ''
self.maintainer = ''
- self.po_info = {}
- self.debconfpo_info = {}
def __str__(self):
return "Package %s: %s, %s\n%s" % \
- (self.package, self.maintainer, self.version, self.po_info)
+ (self.package, self.maintainer, self.version)
class po_info():
def __init__(self, poline):
po = poline.strip().split('!')
# ignore .pot and .templates files
- if po[0].endswith('.pot') or po[0].endswith('.templates'):
+ if not po[0].endswith('.po'):
# or po[1].startswith('_') :
self.infofields = 0
return
@@ -68,7 +67,15 @@
print >>stderr, "Invalid language '%s'. Po filename is %s." % (self.language, self.po_file)
self.infofields = 0
return
- self.ID = po[2] # Need to ask Nicolas for the meaning of this
+ match = parse_translation_status_re.match(po[2])
+ if not match:
+ self.translated = 'NULL'
+ self.fuzzy = 'NULL'
+ self.untranslated = 'NULL'
+ else:
+ self.translated = match.groups()[0]
+ self.fuzzy = match.groups()[1]
+ self.untranslated = match.groups()[2]
self.pkg_version_lang = po[3] # Meaning is unclear
# sometimes language translation team is missing
@@ -97,16 +104,71 @@
cur = self.cursor()
# create prepared statements here!
- query = """PREPARE i18n_apps_insert
- (text, text, text, text, text, text, text, text, text, text)
+ query = """PREPARE %s_insert
+ (text, text, text, text, text, text, text, text, text, int, int, int)
AS INSERT INTO %s
(package, version, release, maintainer, po_file, language,
- id, pkg_version_lang, last_translator, language_team)
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)""" % (my_config['table_apps'])
- cur.execute(query)
+ pkg_version_lang, last_translator, language_team,
+ translated, fuzzy, untranslated)
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)"""
+ cur.execute(query % (my_config['table_apps'], my_config['table_apps']))
+ cur.execute(query % (my_config['table_debconf'], my_config['table_debconf']))
pkg = None
+ def parse_po_infoline(self, po_type, data):
+ cur = self.cursor()
+
+ if po_type == 'PO':
+ target_table = self.my_config['table_apps']
+ elif po_type == 'PODEBCONF':
+ target_table = self.my_config['table_debconf']
+ else:
+ print >>stderr, "Wrong PO type %s ignored." % po_type
+ return
+
+ po_info_dict = {}
+ for poline in data[po_type].split("\n"):
+ # ignore first empty line
+ if len(poline) <= 1:
+ continue
+ poinfo = po_info(poline)
+ if poinfo.infofields == 0:
+ continue
+ # Sometimes there is more than one po file in a package. We inject the file
+ # which contains better info about translator
+ # Attention: For the current application it is completely sufficient that we
+ # keep the information *that* a package contains translation for
+ # a certain package in UDD. Other applications might need more
+ # complete information.
+ if po_info_dict.has_key(poinfo.language):
+ po_info_dict[poinfo.language] = max(po_info_dict[poinfo.language], poinfo)
+ else:
+ po_info_dict[poinfo.language] = poinfo
+
+ for lang in po_info_dict.keys():
+ poinfo = po_info_dict[lang]
+ query = "EXECUTE %s_insert (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % \
+ (target_table, \
+ quote(self.pkg.package), quote(self.pkg.version), quote(self.pkg.release), \
+ quote(self.pkg.maintainer), quote(poinfo.po_file), quote(poinfo.language), \
+ quote(poinfo.pkg_version_lang), \
+ quote(poinfo.last_translator), quote(poinfo.language_team), \
+ poinfo.translated, poinfo.fuzzy, poinfo.untranslated)
+ try:
+ cur.execute(query)
+ except IntegrityError, err:
+ print str(err).strip()
+ print len(po), po, poline, self.pkg
+ except InternalError, err:
+ print "InternalError:", err
+ print len(po), po, poline, self.pkg, po_type
+ print query
+ exit(-1)
+ except UnicodeEncodeError, err:
+ print err
+ print query
+
def run(self):
my_config = self.my_config
#start harassing the DB, preparing the final inserts and making place
@@ -141,46 +203,12 @@
continue
self.pkg.version = stanza['Version']
self.pkg.maintainer = stanza['Maintainer']
+
if stanza.has_key('PO'):
- for poline in stanza['PO'].split("\n"):
- # ignore first empty line
- if len(poline) <= 1:
- continue
- poinfo = po_info(poline)
- if poinfo.infofields == 0:
- continue
- # Sometimes there is more than one po file in a package. We inject the file
- # which contains better info about translator
- # Attention: For the current application it is completely sufficient that we
- # keep the information *that* a package contains translation for
- # a certain package in UDD. Other applications might need more
- # complete information.
- if self.pkg.po_info.has_key(poinfo.language):
- self.pkg.po_info[poinfo.language] = max(self.pkg.po_info[poinfo.language], poinfo)
- else:
- self.pkg.po_info[poinfo.language] = poinfo
+ self.parse_po_infoline('PO', stanza)
+ if stanza.has_key('PODEBCONF'):
+ self.parse_po_infoline('PODEBCONF', stanza)
- for lang in self.pkg.po_info.keys():
- poinfo = self.pkg.po_info[lang]
- query = "EXECUTE i18n_apps_insert (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % \
- (quote(self.pkg.package), quote(self.pkg.version), quote(self.pkg.release), \
- quote(self.pkg.maintainer), quote(poinfo.po_file), quote(poinfo.language), \
- quote(poinfo.ID), quote(poinfo.pkg_version_lang), \
- quote(poinfo.last_translator), quote(poinfo.language_team))
-
- try:
- cur.execute(query)
- except IntegrityError, err:
- print str(err).strip()
- print len(po), po, poline, self.pkg
- except InternalError, err:
- print "InternalError:", err
- print len(po), po, poline, self.pkg
- print query
- exit(-1)
- except UnicodeEncodeError, err:
- print err
- print query
except IOError, err:
print >>stderr, "Error reading %s (%s)" % (file, err)
More information about the Collab-qa-commits
mailing list