[Debian-l10n-commits] r2735 - in /ddtp-web: Changelog ddts-stats update.sh
nekral-guest at users.alioth.debian.org
nekral-guest at users.alioth.debian.org
Thu Jul 28 14:07:25 UTC 2011
Author: nekral-guest
Date: Thu Jul 28 14:07:25 2011
New Revision: 2735
URL: http://svn.debian.org/wsvn/?sc=1&rev=2735
Log:
* ddts-stats: Added script to update the translation statistics.
* update.sh: Use the ddts-stats scripts instead of one from a home
directory. And redirect logs / error messages.
Added:
ddtp-web/ddts-stats (with props)
Modified:
ddtp-web/Changelog
ddtp-web/update.sh
Modified: ddtp-web/Changelog
URL: http://svn.debian.org/wsvn/ddtp-web/Changelog?rev=2735&op=diff
==============================================================================
--- ddtp-web/Changelog (original)
+++ ddtp-web/Changelog Thu Jul 28 14:07:25 2011
@@ -1,3 +1,9 @@
+2011-07-28 Nicolas Francois <nicolas.francois at centraliens.net>
+
+ * ddts-stats: Added script to update the translation statistics.
+ * update.sh: Use the ddts-stats scripts instead of one from a home
+ directory. And redirect logs / error messages.
+
2011-07-28 Nicolas Francois <nicolas.francois at centraliens.net>
* Packages2db.pl: is_description_id_active() added.
Added: ddtp-web/ddts-stats
URL: http://svn.debian.org/wsvn/ddtp-web/ddts-stats?rev=2735&op=file
==============================================================================
--- ddtp-web/ddts-stats (added)
+++ ddtp-web/ddts-stats Thu Jul 28 14:07:25 2011
@@ -1,0 +1,473 @@
+#!/usr/bin/perl -w
+use strict;
+use LWP::Simple;
+use POSIX qw(strftime);
+use Digest::MD5 qw(md5_hex);
+use DBI;
+my @DSN = ("DBI:Pg:dbname=ddtp", "", "");
+
+my $dbh = DBI->connect(@DSN,
+ { PrintError => 0,
+ RaiseError => 1,
+ AutoCommit => 0,
+ });
+
+die $DBI::errstr unless $dbh;
+
+#my @langs = qw(da de eo es fi fr hu it ja km_KH nl pl pt_BR pt ru sk sv uk zh_CN zh_TW cs ko ca);
+my @langs = map { $_->[0] } @{ $dbh->selectall_arrayref("SELECT DISTINCT language FROM translation_tb") };
+my %longnames = (
+ da => 'Danish',
+ de => 'German',
+ ca => 'Catalan',
+ cs => 'Czech',
+ fr => 'French',
+ hu => 'Hungarian',
+ it => 'Italian',
+ ja => 'Japanese',
+ ko => 'Korean',
+ nl => 'Dutch',
+ pl => 'Polish',
+ pt_BR => 'Brazilian Portuguese',
+ pt => 'Portugese',
+ ru => 'Russian',
+ sk => 'Slovak',
+ sv => 'Swedish',
+ uk => 'Ukrainian',
+ es => 'Spanish',
+ eo => 'Esperanto',
+ fi => 'Finnish',
+ zh_CN => 'Simplified Chinese',
+ zh_TW => 'Traditional Chinese',
+ km_KH => 'Cambodian (Khmer)',
+);
+my $data = "/org/ddtp.debian.net/Packages/";
+
+my $DIST = shift || "etch";
+my $ARCH = "i386";
+my $SECTION = "main";
+
+my $POPCON = "http://popcon.debian.org/by_vote";
+my $OUTPUT = "/org/ddtp.debian.net/www/stats/stats-${DIST}.html";
+
+my $POPCON_COUNT = 500;
+
+my %descrmd5; # $descrmd5{$md5} = $desc_id, represents all known descriptions
+my %descrlist; # $descrlist{$package}{$md5} exists for each package in package file
+ # $descrlist{$package}{priority} = package priority
+my %total_counts; # $total_counts{$priority} = number of packages with that priority
+my %important_packages; # $important_packages{$package}{$md5} exists for packages+description of priority standard or higher
+my %desc_id_index; # $desc_id_index{$desc_id}{md5} = md5 of that description
+ # $desc_id_index{$desc_id}{translated}{$lang} exists if translated in that lang
+my %missing; # $missing{$lang}{$priority} = [ array of packages not translated ]
+my %popconN; # $popcon{$package} = $rank;
+my %popconrank; # $popconrank{$package} = $vote_in_popcon;
+my $popconrank_total;# total of all votes in popcon
+my %output;
+
+my %scores = ( required => 1.0, important => 0.75, standard => 0.5, optional => 0.25, extra => 0.10, popconN => 0.0, popconrank => 0.0 );
+my @Priorities = qw(required important standard optional extra popconN popconrank);
+my %output_header = ( popconN => "Popcon$POPCON_COUNT", popconrank => "PopconRank" );
+
+#load_desc_index();
+fetch_data();
+load_packages(); # Read packages file
+load_popcon(); # Read popcon data
+parse_ddtp_index(); # Get desc info from website
+count_packages(); # Cross-reference package and count them
+report_translations(); # Generate report
+write_output();
+exit;
+
+sub fetch_data
+{
+ my $code = mirror( $POPCON, "$data/popcon.txt" );
+ print STDERR "$POPCON: $code\n";
+}
+
+sub load_packages
+{
+ print STDERR "Loading package file\n";
+ my $fh = open_bz2_file( "/org/ddtp.debian.net/Packages/Packages_${DIST}_main_${ARCH}.bz2" );
+ parse_header_format( $fh, \&process_package );
+ close $fh;
+}
+
+# Helper for load_packages
+sub process_package
+{
+ my $hash = shift;
+ my $md5 = md5_hex( $hash->{Description}."\n" );
+# print "$hash->{Package} : $md5\n";
+ my $package = $hash->{Package};
+ $descrlist{$package}{$md5} = 0;
+ $hash->{Priority} ||= 'unknown';
+ $descrlist{$package}{priority} = $hash->{Priority};
+ $descrlist{$package}{md5} = $md5;
+
+ print "[".$hash->{Description}."]\n" if $package eq "kuvert";
+ print "$package: $hash->{Priority} ($md5)\n" if $package eq "kuvert";
+
+ if( $hash->{Priority} =~ /required|important|standard/ )
+ { $important_packages{$package}{$md5} = 0 }
+}
+
+# This goes through the package list and compares it against the DDTP.
+# Firstly to count the number of each priority, secondly to detect missing
+# package entries.
+
+sub count_packages
+{
+ $output{zombie} = "";
+ my %zombie;
+ PACKAGE: for my $package (keys %descrlist)
+ {
+ my $md5 = $descrlist{$package}{md5};
+ my $prio = $descrlist{$package}{priority};
+
+ my @alt_md5s = ();
+
+ if( not defined $descrmd5{$md5} )
+ {
+ warn "Can't find md5 ($md5), package $package\n";
+ next;
+ }
+ my $desc_id = $descrmd5{$md5};
+
+# next unless $prio =~ /required|important|standard/;
+# for my $desc_id ($descrlist{$package}{$md5}, keys %desc_id_index)
+# {
+# next if $desc_id == 0;
+# next unless $desc_id_index{$desc_id}{package} eq $package;
+# push @alt_md5s, $desc_id_index{$desc_id}{md5};
+# next unless $desc_id_index{$desc_id}{md5} eq $md5;
+
+ $total_counts{$prio}++;
+# next PACKAGE;
+# }
+# if( not defined $zombie{$prio} ) { $zombie{$prio} = [] }
+# push @{$zombie{$prio}}, $package;
+#
+# print STDERR "Zombie: $package ($md5) [".join(",", at alt_md5s)."]\n";
+ }
+
+ $total_counts{popconN} = $POPCON_COUNT;
+ $total_counts{popconrank} = $popconrank_total;
+
+ for my $prio (@Priorities)
+ {
+ next if not defined $zombie{$prio};
+ my $count = scalar(@{$zombie{$prio}});
+ $output{zombie} .= "<li>$count $prio packages<br>\n".join(", ",@{$zombie{$prio}})."\n";
+ }
+}
+
+sub report_translations
+{
+ for my $prio (@Priorities)
+ {
+ $output_header{$prio} ||= ucfirst($prio);
+ }
+ $output{mainheader} = "<tr><th>Lang</th>".join("", map { "<th>".$output_header{$_}."</th>" } @Priorities)."</tr>\n";
+ $output{mainfooter} = "<tr><th>Package count</th>".join("", map { "<td align=right>$total_counts{$_}</td>" } @Priorities)."</tr>\n";
+ for my $lang (@langs)
+ {
+ my %count;
+# for my $desc_id (keys %desc_id_index)
+ for my $package (keys %descrlist)
+ {
+# my $package = $desc_id_index{$desc_id}{package};
+
+# die "No package name for desc_id $desc_id\n" unless defined $package;
+
+# my $md5 = $desc_id_index{$desc_id}{md5};
+ my $md5 = $descrlist{$package}{md5};
+
+ my $desc_id = $descrmd5{$md5};
+ if (not defined $desc_id)
+ {
+# warn "Unknown md5 ($md5), package $package\n";
+ next;
+ }
+
+ # Check if relevent to us
+# next if not defined $descrlist{$package}{$md5};
+
+ # Eventually this will assign the desc_id to each package
+# $descrlist{$package}{$md5} = $desc_id;
+
+ my $priority = $descrlist{$package}{priority} || 'unknown';
+
+ if( not exists $desc_id_index{$desc_id}{translated}{$lang} )
+ {
+ # This package missing
+ if( not defined $missing{$lang}{$priority} )
+ {
+ $missing{$lang}{$priority} = [];
+ }
+ push @{$missing{$lang}{$priority}}, $package;
+ next;
+ }
+
+ $count{$priority}++
+ }
+
+ # Popcon500 scores
+ for my $package (keys %popconN)
+ {
+ if( not defined $descrlist{$package} )
+ { print STDERR "Popcon: unknown package $package\n"; next }
+
+ my $md5 = $descrlist{$package}{md5};
+
+ if( not exists $descrmd5{$md5} )
+ { print STDERR "Popcon: package $package with md5 not have known desc_id\n"; next }
+
+ my $desc_id = $descrmd5{$md5};
+
+ if( not exists $desc_id_index{$desc_id}{translated}{$lang} )
+ {
+ # This package missing
+ if( not defined $missing{$lang}{popconN} )
+ {
+ $missing{$lang}{popconN} = [];
+ }
+ push @{$missing{$lang}{popconN}}, $package;
+ next;
+ }
+
+ $count{popconN}++;
+ }
+ if( exists $missing{$lang}{popconN} )
+ {
+ $missing{$lang}{popconN} = [ sort { $popconN{$a} <=> $popconN{$b} } @{ $missing{$lang}{popconN} } ];
+ }
+ $total_counts{popconN} = $POPCON_COUNT;
+
+ # PopconRank scores, we go throug the entire list of packages and if it's translated, add the vote count
+ for my $package (keys %popconrank)
+ {
+ # We don't worry about warnings here, too many packages to worry about
+ if( not defined $descrlist{$package} )
+ { next }
+
+ my $md5 = $descrlist{$package}{md5};
+
+ if( not exists $descrmd5{$md5} )
+ { next }
+
+ my $desc_id = $descrmd5{$md5};
+
+ if( not exists $desc_id_index{$desc_id}{translated}{$lang} )
+ {
+ # This package missing
+ if( not defined $missing{$lang}{popconrank} )
+ {
+ $missing{$lang}{popconrank} = [];
+ }
+ push @{$missing{$lang}{popconrank}}, $package;
+ next;
+ }
+
+ $count{popconrank} += $popconrank{$package};
+ }
+ if( exists $missing{$lang}{popconrank} )
+ {
+ $missing{$lang}{popconrank} = [ sort { $popconrank{$b} <=> $popconrank{$a} } @{ $missing{$lang}{popconrank} } ];
+ }
+ $total_counts{popconrank} = $popconrank_total;
+
+ my $score = 0;
+ $output{main}{$lang} .= "<tr><td>$lang ".(defined $longnames{$lang}?"($longnames{$lang})":"")."</td>";
+ for my $prio (@Priorities)
+ {
+ if( not defined $count{$prio} ) { $count{$prio} = 0 }
+
+ $score += ($scores{$prio}/$total_counts{$prio}) * $count{$prio};
+
+ print STDERR "$lang: $prio ($count{$prio}/$total_counts{$prio}) ";
+ if( $total_counts{$prio} - $count{$prio} < 10 and defined $missing{$lang}{$prio})
+ { print STDERR "[", join(",", @{$missing{$lang}{$prio}}), "]" }
+ print STDERR"\n";
+
+ $output{main}{$lang} .= colour_field( $count{$prio},$total_counts{$prio}, $missing{$lang}{$prio}, $prio eq "popconrank" );
+ }
+ $output{main}{$lang} .= "</tr>\n";
+
+ $output{main}{$lang} = [ $score, $output{main}{$lang} ];
+
+# print "$lang: ", (map { "$_($count{$_}/$total_counts{$_}) " } keys %count), "\n";
+ }
+}
+
+# Called after load_package, to retrive update info from website
+sub parse_ddtp_index
+{
+ $important_packages{'kuvert'} = 1;
+
+ # First pull stuff from DB
+ my $sth = $dbh->prepare( "select package, description_id, description_md5, language ".
+ "from description_tag_tb dt ".
+ "inner join description_tb d using(description_id) ".
+ "left outer join translation_tb using (description_id) ".
+ "where tag = ? ORDER BY date_end" );
+ $sth->execute($DIST);
+
+ while( my($package,$desc_id,$md5,$lang) = $sth->fetchrow_array )
+ {
+ $descrmd5{$md5} = $desc_id;
+ $desc_id_index{$desc_id}{md5} = $md5;
+# $desc_id_index{$desc_id}{package} = $package;
+ next if not defined $lang;
+ $desc_id_index{$desc_id}{translated}{$lang} = 0;
+ }
+
+ print STDERR "Have ", scalar(keys %descrmd5), " md5s on record\n";
+
+}
+
+sub open_bz2_file
+{
+ my $file = shift;
+
+ my $fh;
+
+ open $fh, "bzcat $file |" or die "Couldn't open $file ($!)\n";
+
+ return $fh;
+}
+
+sub parse_header_format
+{
+ my $fh = shift;
+ my $sub = shift;
+
+ my $lastfield = undef;
+ my %hash;
+ while(<$fh>)
+ {
+ chomp;
+ if( /^([\w.-]+): (.*)/ )
+ {
+ $lastfield = $1;
+ $hash{$1} = $2;
+ }
+ elsif( /^( .*)/ )
+ {
+ $hash{$lastfield} .= "\n$_";
+ }
+ elsif( /^$/ )
+ {
+ $sub->( \%hash );
+ %hash = ();
+ $lastfield = undef;
+ }
+ }
+}
+
+sub write_output
+{
+ $output{main} = join( "", map { $_->[1] } sort { $b->[0] <=> $a->[0] } values %{ $output{main} } );
+
+ my $fh;
+ open $fh, ">$OUTPUT" or die "Couldn't write to '$OUTPUT': $!\n";
+
+ print $fh <<EOF;
+<html>
+<head><title>DDTP stats</title></head>
+<body>
+<h1>DDTP stats</h1>
+Here are some stats generated from the DDTP server with regards to translations.
+<p>
+These stats were current as of @{[ strftime "%F %T %Z (%z)", localtime ]}.
+<p>
+The distribution measured is <b>$DIST/$SECTION</b><br>
+The architecture used is <b>$ARCH</b>
+<p>
+Note this uses the actual priorities out of the Packages files. The DDTS
+uses a slightly different measurement, which is why they don\'t use exactly
+the same list.
+<p>
+The numbers are counts. If it says 35, that means that 35 of the 43 (where
+43 is the total number listed at the bottom) packages of that priority have
+been translated. If you hold your mouse over the number, it gives examples
+of what is missing.
+<p>
+Popcon500 simply counts the number of translated packages in the top 500 ranked in the
+<a href="http://popcon.debian.org/by_vote">PopCon-rank-by-vote</a>. This is to get some idea
+of the translations with respect to the packages people actually use.
+<p>
+Popconrank counts the number of votes for translated packages. That is,
+roughly, the percentage indicates the probability that a vote for a package
+in popcon was a vote for a translated package.
+<table>
+<caption>Number of translated package descriptions per category from the Package file:</caption>
+<tr><td></td><th colspan=5>Category</th></tr>
+$output{mainheader}
+$output{main}
+$output{mainfooter}
+</table>
+<p>
+There are also a number of packages which cannot be translated due to their descriptions not being in the DDTS
+<ul>
+$output{zombie}
+</ul>
+</body>
+</html>
+EOF
+}
+
+sub colour_field
+{
+ my ($num,$total,$missing,$scale) = @_;
+ my $frac = $num/$total;
+
+ my $r = 255*(1-$frac);
+ my $g = 255*$frac;
+ my $b = 0;
+
+ ($r,$g,$b) = map { (2*$_+255)/3 } ($r,$g,$b);
+
+ my $colour = sprintf "#%02X%02X%02X", $r, $g, $b;
+
+ if( not defined $missing )
+ { $missing = [] }
+
+ my $missingstr;
+
+ if( scalar(@$missing) > 20 )
+ { $missingstr = "Missing: ".join(", ",@$missing[0..15])."..." }
+
+ elsif( scalar(@$missing) > 0 )
+ { $missingstr = "Missing: ".join(", ",@$missing) }
+
+ my $percent = int(100*$num/$total);
+
+ if($scale) { $num = sprintf "%.2fM", $num/1000000 }
+
+ return "<td bgcolor=$colour align=center><span".(defined $missingstr?" title='$missingstr'":"").">$num ($percent%)</span></td>";
+}
+
+sub load_popcon
+{
+ my $fh;
+ open $fh, "$data/popcon.txt" or die "Couldn't read popcon data ($!)\n";
+
+ my $count = 0;
+ $popconrank_total = 0;
+ while(<$fh>)
+ {
+ next if /^#/;
+ next unless /^\d+\s+/;
+ my @F = split /\s+/;
+ next unless defined $descrlist{$F[1]};
+ $count++;
+ if( $count <= $POPCON_COUNT )
+ { $popconN{$F[1]} = $count }
+ # List of all packages, with vote count
+ $popconrank{$F[1]} = $F[3];
+ $popconrank_total += $F[3];
+ }
+
+ close $fh;
+}
Propchange: ddtp-web/ddts-stats
------------------------------------------------------------------------------
svn:executable = *
Modified: ddtp-web/update.sh
URL: http://svn.debian.org/wsvn/ddtp-web/update.sh?rev=2735&op=diff
==============================================================================
--- ddtp-web/update.sh (original)
+++ ddtp-web/update.sh Thu Jul 28 14:07:25 2011
@@ -18,9 +18,9 @@
./file2Translation.sh >> $LOGPREFIX.log 2>> $LOGPREFIX.err
# Regenerate the stats files
-/home/kleptog/stats/ddts-stats sid >/dev/null
-/home/kleptog/stats/ddts-stats wheezy >/dev/null
-/home/kleptog/stats/ddts-stats squeeze >/dev/null
+./ddts-stats sid >> $LOGPREFIX.log 2>> $LOGPREFIX.err
+./ddts-stats wheezy >> $LOGPREFIX.log 2>> $LOGPREFIX.err
+./ddts-stats squeeze >> $LOGPREFIX.log 2>> $LOGPREFIX.err
#cp -a /home/grisu/public_html/ddtp/* /var/www/ddtp/
More information about the Debian-l10n-commits
mailing list