[Collab-qa-commits] r2232 - in udd: scripts udd
Andreas Tille
tille at alioth.debian.org
Fri Apr 20 22:08:12 UTC 2012
Author: tille
Date: 2012-04-20 22:08:11 +0000 (Fri, 20 Apr 2012)
New Revision: 2232
Modified:
udd/scripts/fetch_bibref.sh
udd/udd/bibref_gatherer.py
Log:
Do not try to parse HTML/XML files and rather remove these from the archive to parse
Modified: udd/scripts/fetch_bibref.sh
===================================================================
--- udd/scripts/fetch_bibref.sh 2012-04-20 21:46:21 UTC (rev 2231)
+++ udd/scripts/fetch_bibref.sh 2012-04-20 22:08:11 UTC (rev 2232)
@@ -5,10 +5,17 @@
TARGETDIR=/org/udd.debian.org/mirrors/bibref
FETCHURL=http://blends.debian.net/packages-metadata/packages-metadata.tar.bz2
ARCHIVE=`basename $FETCHURL`
-#set -x
+CURDIR=`pwd`
+
rm -rf $TARGETDIR
mkdir -p $TARGETDIR
wget -q ${FETCHURL} -O ${TARGETDIR}/${ARCHIVE}
cd $TARGETDIR
tar -xjf ${ARCHIVE}
+# There is no point in keeping non-yaml files which later just cause errors
+rm -f $CURDIR/bibref_gatherer_fetch.log
+for nonyamlfile in `find . -name "*.upstream" -exec file \{\} \; | grep -e HTML -e XML | sed 's/:.*$//'` ; do
+ file $nonyamlfile >> $CURDIR/bibref_gatherer_fetch.log
+ rm -f $nonyamlfile
+done
rm -rf ${ARCHIVE}
Modified: udd/udd/bibref_gatherer.py
===================================================================
--- udd/udd/bibref_gatherer.py 2012-04-20 21:46:21 UTC (rev 2231)
+++ udd/udd/bibref_gatherer.py 2012-04-20 22:08:11 UTC (rev 2232)
@@ -133,10 +133,10 @@
try:
fields = yaml.load(uf.read())
except yaml.scanner.ScannerError, err:
- self.log.error("Syntax error in file %s: %s" % (ufile, str(err)))
+ self.log.error("Scanner error in file %s: %s" % (ufile, str(err)))
continue
except yaml.parser.ParserError, err:
- self.log.error("Syntax error in file %s: %s" % (ufile, str(err)))
+ self.log.error("Parser error in file %s: %s" % (ufile, str(err)))
continue
except yaml.reader.ReaderError, err:
self.log.error("Encoding problem in file %s: %s" % (ufile, str(err)))
More information about the Collab-qa-commits
mailing list