[Debian-l10n-commits] r1480 - in /dl10n/trunk: Changelog dl10n-mail lib/Debian/L10n/BTS.pm lib/Debian/L10n/Db.pm lib/Debian/L10n/Mail.pm lib/Debian/L10n/Spider.pm lib/Debian/L10n/Utils.pm

nekral-guest at users.alioth.debian.org nekral-guest at users.alioth.debian.org
Sat Nov 29 15:57:40 UTC 2008


Author: nekral-guest
Date: Sat Nov 29 15:57:39 2008
New Revision: 1480

URL: http://svn.debian.org/wsvn/?sc=1&rev=1480
Log:
	* lib/Debian/L10n/Spider.pm, lib/Debian/L10n/Utils.pm: %Status,
	%Status_syn, %Type_syn, %LanguageList, %Language, parse_subject,
	parse_from, and parse_date moved from Spider to Utils.
	* lib/Debian/L10n/Spider.pm, lib/Debian/L10n/BTS.pm:
	parse_submitter, check_bts, check_bts_soap, and check_bts_bug_soap
	moved from Spider to BTS.
	* lib/Debian/L10n/Spider.pm, lib/Debian/L10n/Db.pm: clean_db moved
	from Spider to Db.
	* lib/Debian/L10n/Db.pm: Added support for the Message-ID header.
	This indicate the last imported mail and replace the
	year/month/message headers when Mail is used instead of Spider.
	* lib/Debian/L10n/Mail.pm, dl10n-mail: New tool intended to
	replace dl10n-spider. This tool can be used to receive mail from
	an mbox or from stdin (procmail filter).
	* lib/Debian/L10n/Utils.pm: parse_from: better handling of MIME
	encoded from lines.
	* lib/Debian/L10n/BTS.pm: check_bts_soap as I'm receiving lots of
	timeout from soap, add a possibility to write the database every
	10 analyzed bugs.

Added:
    dl10n/trunk/dl10n-mail   (with props)
    dl10n/trunk/lib/Debian/L10n/BTS.pm
    dl10n/trunk/lib/Debian/L10n/Mail.pm
    dl10n/trunk/lib/Debian/L10n/Utils.pm
Modified:
    dl10n/trunk/Changelog
    dl10n/trunk/lib/Debian/L10n/Db.pm
    dl10n/trunk/lib/Debian/L10n/Spider.pm

Modified: dl10n/trunk/Changelog
URL: http://svn.debian.org/wsvn/dl10n/trunk/Changelog?rev=1480&op=diff
==============================================================================
--- dl10n/trunk/Changelog (original)
+++ dl10n/trunk/Changelog Sat Nov 29 15:57:39 2008
@@ -1,3 +1,25 @@
+2008-11-29  Nicolas François  <nicolas.francois at centraliens.net>
+
+	* lib/Debian/L10n/Spider.pm, lib/Debian/L10n/Utils.pm: %Status,
+	%Status_syn, %Type_syn, %LanguageList, %Language, parse_subject,
+	parse_from, and parse_date moved from Spider to Utils.
+	* lib/Debian/L10n/Spider.pm, lib/Debian/L10n/BTS.pm:
+	parse_submitter, check_bts, check_bts_soap, and check_bts_bug_soap
+	moved from Spider to BTS.
+	* lib/Debian/L10n/Spider.pm, lib/Debian/L10n/Db.pm: clean_db moved
+	from Spider to Db.
+	* lib/Debian/L10n/Db.pm: Added support for the Message-ID header.
+	This indicate the last imported mail and replace the
+	year/month/message headers when Mail is used instead of Spider.
+	* lib/Debian/L10n/Mail.pm, dl10n-mail: New tool intended to
+	replace dl10n-spider. This tool can be used to receive mail from
+	an mbox or from stdin (procmail filter).
+	* lib/Debian/L10n/Utils.pm: parse_from: better handling of MIME
+	encoded from lines.
+	* lib/Debian/L10n/BTS.pm: check_bts_soap as I'm receiving lots of
+	timeout from soap, add a possibility to write the database every
+	10 analyzed bugs.
+
 2008-11-28  Nicolas François  <nicolas.francois at centraliens.net>
 
 	* dl10n-rrd/manpages-rrd.pl: No more m68k in unstable?

Added: dl10n/trunk/dl10n-mail
URL: http://svn.debian.org/wsvn/dl10n/trunk/dl10n-mail?rev=1480&op=file
==============================================================================
--- dl10n/trunk/dl10n-mail (added)
+++ dl10n/trunk/dl10n-mail Sat Nov 29 15:57:39 2008
@@ -1,0 +1,200 @@
+#!/usr/bin/perl -w
+
+use strict;
+use utf8;
+
+
+=head1 NAME
+
+dl10n-mail -- translator mailing lists (and BTS) robot for status updates
+
+=head1 SYNOPSIS
+
+dl10n-mail [options] lang+
+
+=head1 DESCRIPTION
+
+This script can receive mails from the debian-l10n-E<lt>languageE<gt>
+mailing list or read an archivein thembox format.
+It looks for emails which title follow a specific format
+indicating what the author intend to translate, or the current status of
+his work on this translation.
+
+Those informations are saved to a dl10n database which can then be used to
+build a l10n coordination page or any other useless statistics.
+
+=cut
+
+use Getopt::Long; #to parse the args
+use Debian::L10n::Mail;
+
+
+my $progname = $0;
+   $progname = $1 if $progname =~ m,([^/])+$,;
+
+my $VERSION = "1.0";			 # External Version Number
+my $BANNER  = "Debian l10n infrastructure -- mailing list parser v$VERSION"; # Version Banner - text form
+
+my $cmdline_file  = undef;
+my $cmdline_msgid  = undef;
+my $cmdline_mboxfolder  = undef;
+my $check_bts=0;
+
+
+=head1 Command line option parsing
+
+=over4
+
+=item General options:
+
+=over4
+
+=item -h, --help
+
+display short help text
+
+=item -V, --version
+
+display version and exit
+
+=item --check-bts
+
+check the BTS
+
+=back
+
+=item Begin point of the crawling:
+
+=over4
+
+=item --msgid=Message-ID
+
+=back
+
+if not specified, will crawl for new messages.
+
+=item ...
+
+=over4
+
+=item --mboxfolder=MBOX_FOLDER
+
+...
+
+=back
+
+=item Database to fill:
+
+=over4
+
+=item --sdb=STATUS_FILE
+
+use STATUS_FILE as status file (instead of $STATUS_FILE)
+
+=back
+
+=back
+
+=cut
+
+# This is put into a block to avoid main namespace pollution
+{
+	sub syntax_msg {
+		my $message = shift;
+		if (defined $message) {
+		        print "$progname: $message\n";
+		} else {
+		        print "$BANNER\n";
+		}
+		print <<EOF
+Syntax: $0 [options] [lang]+
+General options:
+    -h, --help                display short help text
+    -V, --version             display version and exit
+    --check-bts               check the BTS
+
+Begin point of the crawling:
+    --msgid=Mesage-ID
+
+    --mboxfolder=mbox_folder
+
+    If not specified, will crawl for new messages.
+
+Database to fill:
+    --sdb=STATUS_FILE         use STATUS_FILE as status file
+EOF
+		;
+
+		if (defined $message) {
+			exit 1;
+		} else {
+			exit 0;
+		}
+	}
+
+
+	# Display Version Banner
+	# Options: -V|--version, --print-version
+	sub banner {
+		if ($_[0] eq 'print-version') {
+			print "$VERSION\n";
+		} else {
+			print "$BANNER\n";
+		}
+		exit 0;
+	}
+
+	# Hash used to process commandline options
+	my %opthash = (
+		# ------------------ general options
+		"help|h"    => \&syntax_msg,
+		"version|V" => \&banner,
+		"check-bts" => \$check_bts,
+
+		# ------------------ configuration options
+		"msgid=s"    => \$cmdline_msgid,
+
+		"mboxfolder=s"=> \$cmdline_mboxfolder,
+
+		"sdb=s"     => \$cmdline_file,
+	);
+
+
+	# init commandline parser
+	Getopt::Long::config('bundling', 'no_getopt_compat', 'no_auto_abbrev');
+
+	# process commandline options
+	GetOptions(%opthash)
+		or syntax_msg("error parsing options");
+}
+
+
+my @langs = @ARGV;
+
+foreach my $l (@langs) {
+	Mail::process($cmdline_mboxfolder, $l, $check_bts, undef);
+}
+
+=head1 LICENSE
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+=head1 COPYRIGHT (C)
+
+ 2003,2004 Tim Dijkstra
+ 2004 Nicolas Bertolissio
+ 2004 Martin Quinson
+
+=cut
+
+1;

Propchange: dl10n/trunk/dl10n-mail
------------------------------------------------------------------------------
    svn:executable = *

Added: dl10n/trunk/lib/Debian/L10n/BTS.pm
URL: http://svn.debian.org/wsvn/dl10n/trunk/lib/Debian/L10n/BTS.pm?rev=1480&op=file
==============================================================================
--- dl10n/trunk/lib/Debian/L10n/BTS.pm (added)
+++ dl10n/trunk/lib/Debian/L10n/BTS.pm Sat Nov 29 15:57:39 2008
@@ -1,0 +1,205 @@
+package Debian::L10n::BTS;
+
+use strict;
+use utf8;
+
+use LWP::UserAgent;
+use Date::Parse;
+use Date::Format;
+use Encode qw(decode_utf8);
+use HTML::Entities; # encode_entities
+
+use Data::Dumper;
+
+
+my $VERSION = "1.0";				# External Version Number
+
+my $Web_agent =  LWP::UserAgent -> new;
+$Web_agent->env_proxy;
+
+
+=head2 check_bts
+
+check_bts searches in the BTS for open bugs, it fixes the bug submission date
+if necessary, checks whether the bug is fixed or closed or not and updates the
+database accordingly.
+
+=cut
+
+sub check_bts($@) {
+	my $db = shift;
+	my $dbName = shift;
+	check_bts_soap($db, $dbName);
+}
+
+my $BTS = "http://bugs.debian.org";
+#my $BTS = "http://bugs.donarmstrong.com";
+use Data::Dumper;
+use SOAP::Lite;
+
+my $soap = SOAP::Lite->uri('Debbugs/SOAP')->proxy("$BTS/cgi-bin/soap.cgi");
+
+sub parse_submitter($) {
+	my $submitter = shift;
+
+	$submitter = decode_base64($submitter) if $submitter =~ /^: /;
+	$submitter = encode_entities(decode_utf8($submitter));
+	$submitter =~ s/</</;
+	$submitter =~ s/>/>/;
+	$submitter = Debian::L10n::Utils::parse_from($submitter);
+
+	return $submitter;
+}
+
+my %seen;
+my %opendate;
+my %closedate;
+my %bugsubmitter;
+sub check_bts_soap($$) {
+	my $db = shift;
+	my $dbName = shift;
+	my $count = 0;
+
+	foreach my $pkg (sort (grep { $db->has_status($_) } $db->list_packages())) {
+	    PKG_BTS:
+		foreach my $statusline (@{$db->status($pkg)}) {
+			my ($type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb) = @{$statusline};
+			warn "$pkg\:$type\:$file does not specify the status\n"
+				unless defined $status_from_db;
+			next	unless defined $status_from_db;
+			next	unless $bug_nb;
+			next	unless    ($status_from_db eq 'bts')
+			               || ($status_from_db eq 'wontfix')
+			               || ($status_from_db eq 'fix');
+			check_bts_bug_soap ($db, $pkg, $statusline);
+			# Note: this will only add the DONE tags if bugs are now closed.
+			#       There is no need to parse these additional statusline.
+			$count++;
+			if ($count > 10) {
+				$count = 0;
+				$db->write($dbName) if defined $dbName;
+			}
+		}
+	}
+}
+
+sub check_bts_bug_soap ($$$) {
+	my $db = shift;
+	my $pkg = shift;
+	my $statusline = shift;
+	my $changed = 0; # 0: No changes
+	                 # 1: Updated
+	                 # 2: New status added
+			my ($type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb) = @{$statusline};
+			my $bugwontfix = 0;
+
+			unless ($seen{$bug_nb}) {
+				my $bugdone = 0;
+				my $content;
+				my $soap_bugs = $soap->get_status($bug_nb)->result();
+				if (not defined $soap_bugs or not length $soap_bugs) {
+					warn "Cannot find bug '$bug_nb' ($pkg: $type#$file)\n";
+					return 0;
+				}
+
+				my $pkg_bug = $soap_bugs->{$bug_nb}->{package};
+				if (    ($pkg_bug ne $pkg)
+				    and ($pkg_bug ne "wnpp")) {
+					print STDERR "Warning: #$bug_nb filled against $pkg_bug but $pkg is in the database\n";
+				}
+				$opendate{$bug_nb} = Date::Format::time2str("%Y-%m-%d %T %z", $soap_bugs->{$bug_nb}->{date}, "GMT");
+				$bugsubmitter{$bug_nb} = parse_submitter($soap_bugs->{$bug_nb}->{originator});
+
+				if (    defined $soap_bugs->{$bug_nb}->{done}
+				    and length $soap_bugs->{$bug_nb}->{done}) {
+					$bugdone = 1;
+# TODO: differentiate fixed and done ?
+				} else {
+					$bugwontfix = 1 if ($soap_bugs->{$bug_nb}->{tags} =~ m/\bwontfix\b/);
+				}
+
+				if ($bugdone) {
+					my $bts_url = "$BTS/cgi-bin/bugreport.cgi?bug=$bug_nb'}";
+					my $answer  = $Web_agent -> request(HTTP::Request -> new (GET => $bts_url));
+					return 0 unless $answer -> is_success;
+					$content =  $answer -> content_ref;
+					return 0 unless $$content;
+					$seen{$bug_nb} = 1;
+
+					$$content =~ /(.*?)Message #[0-9]+<\/a> received at (?:submit|maintonly)\@bugs\.debian\.org(.*)/ms;
+
+					my $v = $$content;
+					$v = $1 while ($v =~ /Message #[0-9]+<\/a> received at $bug_nb-(?:close|done)\@bugs\.debian\.org(.*)/ms);
+					$v =~ /^<b>Date:<\/b> (.*)/m;
+					$closedate{$bug_nb} = $1;
+					$closedate{$bug_nb} =
+					Debian::L10n::Utils::parse_date("Date: ".$closedate{$bug_nb} || $date);
+				}
+			}
+
+			if ($closedate{$bug_nb}) {
+				if ($closedate{$bug_nb} ne $date) {
+					$date = $closedate{$bug_nb};
+					$changed = 1 unless $changed == 2;
+				}
+				if ($status_from_db ne 'done') {
+					print "close #$bug_nb of $pkg (at $closedate{$bug_nb})\n";
+					$status_from_db = 'done';
+					$changed = 2;
+				}
+			} else {
+				if ($opendate{$bug_nb} ne $date) {
+					print "fix date of #$bug_nb of $pkg from $date to $opendate{$bug_nb}.\n";
+					$date = $opendate{$bug_nb};
+					$changed = 1 unless $changed == 2;
+				}
+				if ($bugwontfix and $status_from_db ne 'wontfix') {
+					print "wontfix #$bug_nb of $pkg\n";
+					$status_from_db = 'wontfix';
+					$changed = 2;
+				}
+			}
+			if ($bugsubmitter{$bug_nb} ne $translator) {
+				print "fix submitter of #$bug_nb of $pkg from $translator to $bugsubmitter{$bug_nb}.\n";
+				$translator = $bugsubmitter{$bug_nb};
+				$changed = 1 unless $changed == 2;
+			}
+			if ($status_from_db eq 'wontfix' and not $bugwontfix) {
+				print "removing wontfix tag for #$bug_nb of $pkg\n";
+				$status_from_db = 'bts';
+				$changed = 2;
+			}
+
+			if ($changed == 2) {
+				$db->add_status($pkg, $type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb);
+			} elsif ($changed == 1) {
+				$db->set_status($pkg, $type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb, $statusline);
+			}
+
+	return $changed;
+}
+
+=head1 LICENSE
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+=head1 COPYRIGHT (C)
+
+ 2003,2004 Tim Dijkstra
+ 2004 Nicolas Bertolissio
+ 2004 Martin Quinson
+ 2008 Nicolas François
+
+=cut
+
+1;

Modified: dl10n/trunk/lib/Debian/L10n/Db.pm
URL: http://svn.debian.org/wsvn/dl10n/trunk/lib/Debian/L10n/Db.pm?rev=1480&op=diff
==============================================================================
--- dl10n/trunk/lib/Debian/L10n/Db.pm (original)
+++ dl10n/trunk/lib/Debian/L10n/Db.pm Sat Nov 29 15:57:39 2008
@@ -36,6 +36,7 @@
 package Debian::L10n::Db;
 use strict;
 use Time::localtime;
+use Time::Local 'timelocal';
 use File::Path;
 use Data::Dumper;
 
@@ -59,7 +60,7 @@
 	        #   as fields of a package called '' (that's the same trick than in po files)
 	        
 	        # Language Year Month Message are for the spider
-	        headers => [qw{Date Language Year Month Message Page}],
+	        headers => [qw{Date Language Year Month Message Page Message-ID}],
                 #   Fields below are written into file in the same order
                 #   Package must always be the first field
 	    
@@ -459,6 +460,42 @@
     set_header($_[0],'Date',$_[1]);
 }
 
+=item clean-db
+
+clean_db cleans the database by removing data for a document whose status is
+'done' for more than three days.
+
+=cut
+
+sub clean_db($) {
+	my $db = shift;
+
+	my $now = time;
+	my $offset = 60 * 60 * 24 * 3; # 3 days in seconds
+
+	foreach my $pkg (sort( grep { $db->has_status($_) } $db->list_packages())) {
+READ_LINES:
+		if ($db->has_package($pkg)) { # The package may have disapeared after del_status
+			foreach my $statusline (@{$db->status($pkg)}) {
+				my ($type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb) = @{$statusline};
+				warn "$pkg\:$type\:$file does not specify the status\n"
+					unless defined $status_from_db;
+				next	unless defined $status_from_db;
+				next	unless ($status_from_db eq 'done' or $status_from_db eq 'fix');
+
+				my $date_done =  $date;
+				   $date_done =~ s/ .*//;
+				   $date_done =~ m/(\d\d\d\d)-(\d\d)-(\d+)/;
+				my $time_done = timelocal(0,0,0, $3,$2-1,$1);
+				if ($now - $time_done > $offset) {
+					print "Remove ".($bug_nb?"#$bug_nb":"DONE")." about the $type of $pkg because it's done since more than 3 days\n";
+					$db->del_status($pkg, $type, $file, $statusline);
+					goto READ_LINES;
+				}
+			}
+		}
+	}
+}
 
 =back
 

Added: dl10n/trunk/lib/Debian/L10n/Mail.pm
URL: http://svn.debian.org/wsvn/dl10n/trunk/lib/Debian/L10n/Mail.pm?rev=1480&op=file
==============================================================================
--- dl10n/trunk/lib/Debian/L10n/Mail.pm (added)
+++ dl10n/trunk/lib/Debian/L10n/Mail.pm Sat Nov 29 15:57:39 2008
@@ -1,0 +1,157 @@
+package Mail;
+
+use strict;
+use utf8;
+
+=head1 NAME
+
+dl10n-mail -- crawl translator mails (and BTS) for status updates
+
+=head1 SYNOPSIS
+
+dl10n-mail [options] mailbox lang+
+
+=head1 DESCRIPTION
+
+=cut
+
+use Debian::L10n::Db;
+use Debian::L10n::BTS;
+use Debian::L10n::Utils;
+use Mail::Box::Mbox;
+
+use Data::Dumper;
+
+my $VERSION = "1.0";				# External Version Number
+
+my $Status_file='./data/status.$lang';
+
+my $DEFAULT_MSGID;
+
+
+sub process($$$$) {
+	my $mboxfolder = shift;
+	my $lang = shift;
+	my $check_bts = shift;
+	my $init_msgId = shift;
+	$Status_file = shift || $Status_file;
+
+print STDERR "mboxfolder: $mboxfolder\n";
+
+	my $db = Debian::L10n::Db->new();
+	my $dbName = $Status_file;
+	$dbName =~ s/\$lang/$lang/g;
+	my $msgId;
+	if (-e $dbName) {
+		$db->read($dbName, 0);
+		$msgId = defined($init_msgId) ? $init_msgId : ($db->get_header('Message-ID') || $DEFAULT_MSGID );
+		print "Spider.pm Continue $lang from message $msgId\n";
+	} else {
+		print "Spider.pm Creating a new DB for $lang\n";
+#                        $year    = $init_year;
+#                        $month   = $init_month;
+#                        $message = $init_message;
+#                        $page    = 1;
+#                        die "Cannot guess the begin year. Please use the --year options\n"       unless defined($year);
+#                        die "Cannot guess the begin month. Please use the --month options\n"     unless defined($month);
+#                        die "Cannot guess the begin message. Please use the --message options\n" unless defined($message);
+	}
+
+	if (not defined $mboxfolder) {
+# TODO: use tmpfile
+		open TMP, ">", "/tmp/tata"
+		    or die "Cannot open ...: $!";
+		while (<STDIN>) {
+			print TMP $_;
+		}
+		close TMP;
+		$mboxfolder = "/tmp/tata";
+	}
+
+	my $f = Mail::Box::Mbox->new(folder => $mboxfolder, lock_type => undef)
+	    or die "Cannot open mailbox $mboxfolder.\n";
+
+	my $url = ""; # not used.
+
+	# Try to see if this Message-ID is in the mailbox
+	my $found = 0;
+	if (defined $msgId) {
+		foreach my $m ($f->messages) {
+			if ($m->messageId eq $msgId) {
+				$found = 1;
+				last;
+			}
+		}
+	}
+
+	my ($status, $type, $bug_nb, @names);
+	foreach my $m ($f->messages) {
+		if ($found) {
+			if ($m->messageId eq $msgId) {
+				$found = 0;
+			}
+			last;
+		}
+
+		($status, $type, $bug_nb, @names) = Debian::L10n::Utils::parse_subject($m->subject);
+		print "Mail.pm: ".$m->subject."\n";
+		next unless $status; # unparsable mail
+		my $translator;
+		my $sender = $m->sender;
+		if (defined $sender) {
+			$translator = Debian::L10n::Utils::parse_from($sender->format());
+		} else {
+			$translator = "UNDEF";
+		}
+		my  $date = Debian::L10n::Utils::parse_date("Date: ".$m->head->get('Date'));
+		# We keep this Message-ID to reference this message
+		my  $list = $m->messageId;
+		$msgId = $m->messageId;
+		foreach my $pkg (@names) {
+			my $file = $pkg;
+
+			if (($type eq 'webwml') or ($type eq 'wml')) {
+				$type = "wml";
+				$pkg  =~ s|/.*||;
+				if (($pkg=~/\./) && not($pkg =~ /\.wml$/)) {
+					$file =~ s|.*?/||;
+				} else {                        # www.debian.org
+					$pkg = 'www.debian.org';
+				}
+			} else {
+				$pkg  =~ s|/.*||;
+				$file =~ s|.*?/||;
+			}
+			if ($db->has_package($pkg)) {
+				# If a cycle was already finished.
+				# Clear the status of this file
+				# before we add status for the
+				# new cycle.
+				foreach my $statusline (@{$db->status($pkg)}) {
+					my ($type_from_db, $file_from_db, $date_from_db, $status_from_db, $translator_from_db, $list_from_db, $url_from_db, $bug_nb_from_db) = @{$statusline};
+					if (    $type eq $type_from_db
+					    and $file eq $file_from_db
+					    and $status_from_db eq 'done'
+					    and $status ne 'done') {
+						$db->del_status($pkg, $type, $file, $statusline);
+					}
+				}
+			}
+			unless ($db->has_package($pkg)) {
+				$db->package($pkg);
+				$db->add_package($pkg,$pkg);
+			}
+			$db->add_status($pkg, $type, $file, $date, $status, $translator, $list, $url, ($bug_nb || ""));
+		}
+	} continue {
+		$db->set_header('Message-ID',    $msgId   );
+		$db->write($dbName);
+	}
+
+	Debian::L10n::BTS::check_bts($db, $dbName) if $check_bts;
+	$db->write($dbName);
+	Debian::L10n::Db::clean_db($db);
+	$db->write($dbName);
+}
+
+1;

Modified: dl10n/trunk/lib/Debian/L10n/Spider.pm
URL: http://svn.debian.org/wsvn/dl10n/trunk/lib/Debian/L10n/Spider.pm?rev=1480&op=diff
==============================================================================
--- dl10n/trunk/lib/Debian/L10n/Spider.pm (original)
+++ dl10n/trunk/lib/Debian/L10n/Spider.pm Sat Nov 29 15:57:39 2008
@@ -25,16 +25,10 @@
 =cut
 
 use LWP::UserAgent;
-use Mail::Address;
-use Date::Parse;
-use Date::Format;
-use Digest::MD5 qw(md5_base64);
 use Debian::L10n::Db;
-use Time::Local 'timelocal';
-use Net::LDAP;
+use Debian::L10n::BTS;
+use Debian::L10n::Utils;
 use MIME::Base64;
-use HTML::Entities;
-use Encode qw(decode_utf8);
 use List::Util qw(max);
 
 use Data::Dumper;
@@ -50,190 +44,6 @@
 
 my $Web_agent =  LWP::UserAgent -> new;
 $Web_agent->env_proxy;
-
-
-my %Status = (
-	todo => 0,
-	itt  => 1,
-	rfr  => 2,
-	itr  => 3,
-	lcfc => 4,
-	bts  => 5,
-	fix  => 6,
-	done => 7,
-	hold => 8,
-	maj  => 9,
-	);
-
-my %Status_syn = (
-	ddr  => 'rfr',
-	relu => 'lcfc',
-	lfcf => 'lcfc', #this seems to be a current typo
-	taf  => 'todo',
-	);
-
-my %Type_syn = (
-	'debian-installer' => 'podebconf',	# debian-installer is a sub-category
-	'debconf-po'       => 'podebconf',	# typo
-	'po-debconf'       => 'podebconf',	# That's the way it should be witten in DB
-	'po-man'           => 'man',      	# nobody uses po4a so far, but it may come
-	);
-
-my %LanguageList = (
-	ar    => 'arabic',
-	ca    => 'catalan',
-	cs    => 'czech',
-	de    => 'german',
-	en    => 'english',
-	es    => 'spanish',
-	fr    => 'french',
-	nl    => 'dutch',
-#	pt    => 'portuguese',
-	pt_BR => 'portuguese',
-	ro    => 'romanian',
-	sv    => 'swedish',
-	tr    => 'turkish',
-	);
-my %Language = (
-	ar    => 'arabic',
-	ca    => 'catalan',
-	cs    => 'czech',
-	de    => 'german',
-	en    => 'english',
-	es    => 'spanish',
-	fr    => 'french',
-	nl    => 'dutch',
-#	pt    => 'portuguese',
-	pt_BR => 'brazilian',
-	ro    => 'romanian',
-	sv    => 'swedish',
-	tr    => 'turkish',
-	);
-
-=head2 parse_subject(SUBJECT)
-
-parse_subject extract valuable informations from a subject line.
-
-It gets a string containing the subject line (SUBJECT).
-
-It returns an array containing the status, type, filename strings and bug
-number if provided or 'undef' if no status is found.
-
-=cut
-
-sub parse_subject($) {
-	my $subject = shift;
-	$subject =~ s/^Subject: //;
-
-	$subject =~ m/^\p{IsSpace}*\[([^\]]*)\].*?([^:\p{IsSpace}]*):\/\/(\P{IsSpace}*)(.*)$/;
-
-	return undef unless $1;
-
-	my $status = lc $1;
-	my $type   = lc $2;
-	my $names  =    $3;
-	my $subject_end = $4;
-
-	# Mutt split long subject and can introduce tabulations even if there were no spaces.
-	# We remove the tabulations if inside {}, which deals with most of the long subjects.
-	while (    defined $subject_end
-	       and $names =~ m/\{[^\}]*$/
-	       and $subject_end =~ m/^\t+(\S*)(.*)\}(.*)$/) {
-		$names .= $1;
-		$subject_end = $2."\}".$3;
-	}
-	if (defined $subject_end) {
-		if ($subject_end =~ m/^(\S+)/) {
-			$names .= $1;
-		}
-	}
-
-	$status =~ s/\p{IsSpace}//g;
-	$status =~ s/#?\p{IsDigit}*$//;
-	$status = $Status_syn{$status} if (defined $Status_syn{$status} && defined $Status{$Status_syn{$status}});
-	return undef unless defined $Status{$status};
-
-	$type = $Type_syn{$type} if defined $Type_syn{$type};
-
-	$subject =~ m/#\p{IsSpace}*(\p{IsDigit}+)/;
-	my $bug_nb = $1 || undef;
-
-	my @names;
-	if ($names =~ m/{/) {
-		$names =~ m/^([^{]*){([^}]*)}(.*)$/;
-		my $begin = $1 || "";
-		my $end   = $3 || "";
-		if (defined $2) {
-			@names = map { "$begin$_$end" } split(/,/, $2);
-		} else {
-			warn "Could not parse Subject: '$subject'\n";
-		}
-	} else {
-		@names = ($names);
-	}
-
-#	print "Status='$status'; Type='$type'; ". (defined $bug_nb ? "bug_nb='$bug_nb'":"[no bug]")."\n";
-	return ($status, $type, $bug_nb, @names);
-}
-
-
-=head2 parse_from(FROM)
-
-parse_from extract the sender name from the 'From:' field.
-
-The name is build from the phrase part of the field, or if none is found, from
-the comment part where parentheses are removed, or if none is found, from the
-address where all non-alphanumeric characters are turned into spaces.
-
-It gets a string containing the 'From:' field (FROM).
-
-It returns a string containing the name.
-
-=cut
-
-sub parse_from($) {
-	$_ = shift;
-	s/^From: //;
-	s/"//g;
-	s/;/SEMICOLON/g;
-
-	my @from = Mail::Address -> parse($_);
-
-	$_ = $from[0]->phrase;
-	s/SEMICOLON/;/g;
-	s/ ; /;/g;
-	return $_ if $_;
-
-	$_ = $from[0]->comment;
-	s/^\p{IsSpace}*\(?//;
-	s/\)?\p{IsSpace}*$//;
-	s/SEMICOLON/;/g;
-	s/ ; /;/g;
-	return $_ if $_;
-
-	$_ = $from[0]->address;
-	s/\P{IsAlnum}/ /g;
-	return $_;
-}
-
-
-=head2 parse_date(DATE)
-
-parse_date extract the date from a 'Date:' field.
-
-It gets a string containing the 'Date:' field (DATE).
-
-It returns a string containing the date in ISO format yyyy-mm-dd hh:mm:ss
-±hh:mm based on GMT
-
-=cut
-
-sub parse_date($) {
-	$_ = shift;
-	s/^Date: //;
-
-	Date::Format::time2str("%Y-%m-%d %T %z", Date::Parse::str2time($_), "GMT");
-}
 
 
 =head2 get_header(HTML)
@@ -279,16 +89,6 @@
 error occured.
 
 =cut
-
-sub parse_submitter($) {
-  my $submitter = shift;
-  $submitter = decode_base64($submitter) if $submitter =~ /^: /;
-  $submitter = encode_entities(decode_utf8($submitter));
-  $submitter =~ s/</</;
-  $submitter =~ s/>/>/;
-  $submitter = parse_from($submitter);
-  return $submitter;
-}
 
 sub get_message($$$$) {
 	my $language = shift;
@@ -368,182 +168,6 @@
 	return \%messages;
 }
 
-=head2 check_bts
-
-check_bts searches in the BTS for open bugs, it fixes the bug submission date
-if necessary, checks whether the bug is fixed or closed or not and updates the
-database accordingly.
-
-=cut
-
-sub check_bts($) {
-	my $db = shift;
-	check_bts_soap($db);
-}
-
-my $BTS = "http://bugs.debian.org";
-#my $BTS = "http://bugs.donarmstrong.com";
-use Data::Dumper;
-use SOAP::Lite;
-
-my $soap = SOAP::Lite->uri('Debbugs/SOAP')->proxy("$BTS/cgi-bin/soap.cgi");
-
-my %seen;
-my %opendate;
-my %closedate;
-my %bugsubmitter;
-sub check_bts_soap($) {
-	my $db = shift;
-
-	foreach my $pkg (sort (grep { $db->has_status($_) } $db->list_packages())) {
-	    PKG_BTS:
-		foreach my $statusline (@{$db->status($pkg)}) {
-			my ($type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb) = @{$statusline};
-			warn "$pkg\:$type\:$file does not specify the status\n"
-				unless defined $status_from_db;
-			next	unless defined $status_from_db;
-			next	unless $bug_nb;
-			next	unless    ($status_from_db eq 'bts')
-			               || ($status_from_db eq 'wontfix')
-			               || ($status_from_db eq 'fix');
-			check_bts_bug_soap ($db, $pkg, $statusline);
-			# Note: this will only add the DONE tags if bugs are now closed.
-			#       There is no need to parse these additional statusline.
-		}
-	}
-}
-
-sub check_bts_bug_soap ($$$) {
-	my $db = shift;
-	my $pkg = shift;
-	my $statusline = shift;
-	my $changed = 0; # 0: No changes
-	                 # 1: Updated
-	                 # 2: New status added
-			my ($type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb) = @{$statusline};
-			my $bugwontfix = 0;
-
-			unless ($seen{$bug_nb}) {
-				my $bugdone = 0;
-				my $content;
-				my $soap_bugs = $soap->get_status($bug_nb)->result();
-				if (not defined $soap_bugs or not length $soap_bugs) {
-					warn "Cannot find bug '$bug_nb' ($pkg: $type#$file)\n";
-					return 0;
-				}
-
-				my $pkg_bug = $soap_bugs->{$bug_nb}->{package};
-				if (    ($pkg_bug ne $pkg)
-				    and ($pkg_bug ne "wnpp")) {
-					print STDERR "Warning: #$bug_nb filled against $pkg_bug but $pkg is in the database\n";
-				}
-				$opendate{$bug_nb} = Date::Format::time2str("%Y-%m-%d %T %z", $soap_bugs->{$bug_nb}->{date}, "GMT");
-				$bugsubmitter{$bug_nb} = parse_submitter($soap_bugs->{$bug_nb}->{originator});
-
-				if (    defined $soap_bugs->{$bug_nb}->{done}
-				    and length $soap_bugs->{$bug_nb}->{done}) {
-					$bugdone = 1;
-# TODO: differentiate fixed and done ?
-				} else {
-					$bugwontfix = 1 if ($soap_bugs->{$bug_nb}->{tags} =~ m/\bwontfix\b/);
-				}
-
-				if ($bugdone) {
-					my $bts_url = "$BTS/cgi-bin/bugreport.cgi?bug=$bug_nb'}";
-					my $answer  = $Web_agent -> request(HTTP::Request -> new (GET => $bts_url));
-					return 0 unless $answer -> is_success;
-					$content =  $answer -> content_ref;
-					return 0 unless $$content;
-					$seen{$bug_nb} = 1;
-
-					$$content =~ /(.*?)Message #[0-9]+<\/a> received at (?:submit|maintonly)\@bugs\.debian\.org(.*)/ms;
-
-					my $v = $$content;
-					$v = $1 while ($v =~ /Message #[0-9]+<\/a> received at $bug_nb-(?:close|done)\@bugs\.debian\.org(.*)/ms);
-					$v =~ /^<b>Date:<\/b> (.*)/m;
-					$closedate{$bug_nb} = $1;
-					$closedate{$bug_nb} = parse_date($closedate{$bug_nb} || $date);
-				}
-			}
-
-			if ($closedate{$bug_nb}) {
-				if ($closedate{$bug_nb} ne $date) {
-					$date = $closedate{$bug_nb};
-					$changed = 1 unless $changed == 2;
-				}
-				if ($status_from_db ne 'done') {
-					print "close #$bug_nb of $pkg (at $closedate{$bug_nb})\n";
-					$status_from_db = 'done';
-					$changed = 2;
-				}
-			} else {
-				if ($opendate{$bug_nb} ne $date) {
-					print "fix date of #$bug_nb of $pkg from $date to $opendate{$bug_nb}.\n";
-					$date = $opendate{$bug_nb};
-					$changed = 1 unless $changed == 2;
-				}
-				if ($bugwontfix and $status_from_db ne 'wontfix') {
-					print "wontfix #$bug_nb of $pkg\n";
-					$status_from_db = 'wontfix';
-					$changed = 2;
-				}
-			}
-			if ($bugsubmitter{$bug_nb} ne $translator) {
-				print "fix submitter of #$bug_nb of $pkg from $translator to $bugsubmitter{$bug_nb}.\n";
-				$translator = $bugsubmitter{$bug_nb};
-				$changed = 1 unless $changed == 2;
-			}
-			if ($status_from_db eq 'wontfix' and not $bugwontfix) {
-				print "removing wontfix tag for #$bug_nb of $pkg\n";
-				$status_from_db = 'bts';
-				$changed = 2;
-			}
-
-			if ($changed == 2) {
-				$db->add_status($pkg, $type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb);
-			} elsif ($changed == 1) {
-				$db->set_status($pkg, $type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb, $statusline);
-			}
-
-	return $changed;
-}
-
-=head2 clean-db
-
-clean_db cleans the database by removing data for a document whose status is
-'done' for more than three days.
-
-=cut
-
-sub clean_db($) {
-	my $db = shift;
-
-	my $now = time;
-	my $offset = 60 * 60 * 24 * 3; # 3 days in seconds
-
-	foreach my $pkg (sort( grep { $db->has_status($_) } $db->list_packages())) {
-READ_LINES:
-		if ($db->has_package($pkg)) { # The package may have disapeared after del_status
-		foreach my $statusline (@{$db->status($pkg)}) {
-			my ($type, $file, $date, $status_from_db, $translator, $list, $url, $bug_nb) = @{$statusline};
-			warn "$pkg\:$type\:$file does not specify the status\n"
-				unless defined $status_from_db;
-			next	unless defined $status_from_db;
-			next	unless ($status_from_db eq 'done' or $status_from_db eq 'fix');
-
-			my $date_done =  $date;
-			   $date_done =~ s/ .*//;
-			   $date_done =~ m/(\d\d\d\d)-(\d\d)-(\d+)/;
-			my $time_done = timelocal(0,0,0, $3,$2-1,$1);
-			if ($now - $time_done > $offset) {
-				print "Remove ".($bug_nb?"#$bug_nb":"DONE")." about the $type of $pkg because it's done since more than 3 days old\n";
-				$db->del_status($pkg, $type, $file, $statusline);
-				goto READ_LINES;
-			}
-		}
-		}
-	}
-}
 
 sub spider($$$$$@) {
 	my $init_year    = shift;
@@ -555,14 +179,15 @@
 
 	my @langs;
 	if (m/^all$/i) {
-		@langs = keys %Language;
+		@langs = keys %Debian::L10n::Utils::Language;
 	} else {
 		@langs = ($_, @_);
 	}
 
 	while (my $lang = shift @langs) {
-		die "Spider.pm: Lang '$lang' unknown. Please update \%Language.\n" unless $Language{$lang};
-		print "Spider.pm $Language{$lang}\n";
+		die "Spider.pm: Lang '$lang' unknown. Please update \%Language.\n"
+		    unless $Debian::L10n::Utils::Language{$lang};
+		print "Spider.pm $Debian::L10n::Utils::Language{$lang}\n";
 		my $year;
 		my $month;
 		my $message;
@@ -576,9 +201,9 @@
 			$month   = (defined($init_month)   ? $init_month   : ($db->get_header('Month')   || $DEFAULT_MONTH  ));
 			$message = (defined($init_message) ? $init_message : ($db->get_header('Message') || $DEFAULT_MESSAGE));
 			$page    = (defined($init_message) ? 1             : ($db->get_header('Page')    || 1               ));
-			print "Spider.pm Continue $Language{$lang} from message $year/$month/$message\n";
+			print "Spider.pm Continue $Debian::L10n::Utils::Language{$lang} from message $year/$month/$message\n";
 		} else {
-			print "Spider.pm Creating a new DB for $Language{$lang}\n";
+			print "Spider.pm Creating a new DB for $Debian::L10n::Utils::Language{$lang}\n";
 			$year    = $init_year;
 			$month   = $init_month;
 			$message = $init_message;
@@ -593,7 +218,7 @@
 		$message++;
 
 		while (1) {
-			my $messages = get_indexpage($LanguageList{$lang}, $year, $month, $page);
+			my $messages = get_indexpage($Debian::L10n::Utils::LanguageList{$lang}, $year, $month, $page);
 
 			# if no more page, check if we need to look at next month
 			unless ($messages) {
@@ -614,19 +239,19 @@
 				my $key = sprintf("%05d", $message);
 				my ($status, $type, $bug_nb, @names);
 				if (defined ${$messages}{$key}) {
-				  ($status, $type, $bug_nb, @names) = parse_subject(${$messages}{$key});
+				  ($status, $type, $bug_nb, @names) = Debian::L10n::Utils::parse_subject(${$messages}{$key});
 				  print "Spider.pm: [$lang:$year/$month/$message] ${$messages}{$key}\n";
-				  next unless $status; 	# unparsable mail
+				  next unless $status; # unparsable mail
 				}
 
-				my $html = get_message($LanguageList{$lang}, $year, $month, $message);
+				my $html = get_message($Debian::L10n::Utils::LanguageList{$lang}, $year, $month, $message);
 				next unless defined $html;
 
 				my $header = get_header($html);
 
 				if (not defined ${$messages}{$key}) {
 				  my ($s) = grep(/Subject: / , @$header);
-				  ($status, $type, $bug_nb, @names) = parse_subject($s);
+				  ($status, $type, $bug_nb, @names) = Debian::L10n::Utils::parse_subject($s);
 				  print "Spider.pm: [$lang:$year/$month/$message] $s\n";
 				  next unless $status; 	# unparsable mail
 				}
@@ -634,10 +259,10 @@
 				my ($t) = grep(/From: / , @$header);
 				# In case there's no From header:
 				$t = "UNKNOWN" unless defined $t;
-				my  $translator = parse_from($t);
+				my  $translator = Debian::L10n::Utils::parse_from($t);
 
 				my ($d) = grep(/Date: / , @$header);
-				my  $date = parse_date($d);
+				my  $date = Debian::L10n::Utils::parse_date($d);
 
 				my  $list = sprintf("%04d-%02d-%05d", $year, $month, $message);
 
@@ -700,9 +325,9 @@
 			$page++;
 		}
 
-		check_bts($db) if $check_bts;
+		Debian::L10n::BTS::check_bts($db) if $check_bts;
 		$db->write($dbName);
-		clean_db($db);
+		Debian::L10n::Db::clean_db($db);
 		$db->write($dbName);
 	}
 }

Added: dl10n/trunk/lib/Debian/L10n/Utils.pm
URL: http://svn.debian.org/wsvn/dl10n/trunk/lib/Debian/L10n/Utils.pm?rev=1480&op=file
==============================================================================
--- dl10n/trunk/lib/Debian/L10n/Utils.pm (added)
+++ dl10n/trunk/lib/Debian/L10n/Utils.pm Sat Nov 29 15:57:39 2008
@@ -1,0 +1,238 @@
+package Debian::L10n::Utils;
+
+use strict;
+use utf8;
+
+use Mail::Address;
+use Date::Parse;
+use Date::Format;
+use Encode;
+
+my $VERSION = "1.0";				# External Version Number
+
+my %Status = (
+	todo => 0,
+	itt  => 1,
+	rfr  => 2,
+	itr  => 3,
+	lcfc => 4,
+	bts  => 5,
+	fix  => 6,
+	done => 7,
+	hold => 8,
+	maj  => 9,
+	);
+
+my %Status_syn = (
+	ddr  => 'rfr',
+	relu => 'lcfc',
+	lfcf => 'lcfc', #this seems to be a current typo
+	taf  => 'todo',
+	);
+
+my %Type_syn = (
+	'debian-installer' => 'podebconf',	# debian-installer is a sub-category
+	'debconf-po'       => 'podebconf',	# typo
+	'po-debconf'       => 'podebconf',	# That's the way it should be witten in DB
+	'po-man'           => 'man',      	# nobody uses po4a so far, but it may come
+	);
+
+my %LanguageList = (
+	ar    => 'arabic',
+	ca    => 'catalan',
+	cs    => 'czech',
+	de    => 'german',
+	en    => 'english',
+	es    => 'spanish',
+	fr    => 'french',
+	nl    => 'dutch',
+#	pt    => 'portuguese',
+	pt_BR => 'portuguese',
+	ro    => 'romanian',
+	sv    => 'swedish',
+	tr    => 'turkish',
+	);
+
+my %Language = (
+	ar    => 'arabic',
+	ca    => 'catalan',
+	cs    => 'czech',
+	de    => 'german',
+	en    => 'english',
+	es    => 'spanish',
+	fr    => 'french',
+	nl    => 'dutch',
+#	pt    => 'portuguese',
+	pt_BR => 'brazilian',
+	ro    => 'romanian',
+	sv    => 'swedish',
+	tr    => 'turkish',
+	);
+
+
+=head2 parse_subject(SUBJECT)
+
+parse_subject extract valuable informations from a subject line.
+
+It gets a string containing the subject line (SUBJECT).
+
+It returns an array containing the status, type, filename strings and bug
+number if provided or 'undef' if no status is found.
+
+=cut
+
+sub parse_subject($) {
+	my $subject = shift;
+	$subject =~ s/^Subject: //;
+
+	$subject =~ m/^\p{IsSpace}*\[([^\]]*)\].*?([^:\p{IsSpace}]*):\/\/(\P{IsSpace}*)(.*)$/;
+
+	return undef unless $1;
+
+	my $status = lc $1;
+	my $type   = lc $2;
+	my $names  =    $3;
+	my $subject_end = $4;
+
+	# Mutt split long subject and can introduce tabulations even if there were no spaces.
+	# We remove the tabulations if inside {}, which deals with most of the long subjects.
+	while (    defined $subject_end
+	       and $names =~ m/\{[^\}]*$/
+	       and $subject_end =~ m/^\t+(\S*)(.*)\}(.*)$/) {
+		$names .= $1;
+		$subject_end = $2."\}".$3;
+	}
+	if (defined $subject_end) {
+		if ($subject_end =~ m/^(\S+)/) {
+			$names .= $1;
+		}
+	}
+
+	$status =~ s/\p{IsSpace}//g;
+	$status =~ s/#?\p{IsDigit}*$//;
+	$status = $Status_syn{$status} if (defined $Status_syn{$status} && defined $Status{$Status_syn{$status}});
+	return undef unless defined $Status{$status};
+
+	$type = $Type_syn{$type} if defined $Type_syn{$type};
+
+	$subject =~ m/#\p{IsSpace}*(\p{IsDigit}+)/;
+	my $bug_nb = $1 || undef;
+
+	my @names;
+	if ($names =~ m/{/) {
+		$names =~ m/^([^{]*){([^}]*)}(.*)$/;
+		my $begin = $1 || "";
+		my $end   = $3 || "";
+		if (defined $2) {
+			@names = map { "$begin$_$end" } split(/,/, $2);
+		} else {
+			warn "Could not parse Subject: '$subject'\n";
+		}
+	} else {
+		@names = ($names);
+	}
+
+#	print "Status='$status'; Type='$type'; ". (defined $bug_nb ? "bug_nb='$bug_nb'":"[no bug]")."\n";
+	return ($status, $type, $bug_nb, @names);
+}
+
+
+=head2 parse_from(FROM)
+
+parse_from extract the sender name from the 'From:' field.
+
+The name is build from the phrase part of the field, or if none is found, from
+the comment part where parentheses are removed, or if none is found, from the
+address where all non-alphanumeric characters are turned into spaces.
+
+It gets a string containing the 'From:' field (FROM).
+
+It returns a string containing the name.
+
+=cut
+
+sub parse_from($) {
+	$_ = shift;
+
+	return "UNDEF" if not defined $_;
+
+	Encode::from_to($_, 'MIME-Header', 'utf8');
+
+	s/^From: //;
+	s/"//g;
+	s/;/SEMICOLON/g;
+
+	my @from = Mail::Address -> parse($_);
+
+	$_ = $from[0]->phrase;
+	s/SEMICOLON/;/g;
+	s/ ; /;/g;
+
+	unless ($_) {
+		$_ = $from[0]->comment;
+		s/^\p{IsSpace}*\(?//;
+		s/\)?\p{IsSpace}*$//;
+		s/SEMICOLON/;/g;
+		s/ ; /;/g;
+	}
+
+	unless ($_) {
+		$_ = $from[0]->address;
+		s/\P{IsAlnum}/ /g;
+	}
+
+	$_ =~ s/^\s*"(.*)"\s*$/$1/g;
+
+	return $_;
+}
+
+
+=head2 parse_date(DATE)
+
+parse_date extract the date from a 'Date:' field.
+
+It gets a string containing the 'Date:' field (DATE).
+
+It returns a string containing the date in ISO format yyyy-mm-dd hh:mm:ss
+±hh:mm based on GMT
+
+=cut
+
+sub parse_date($) {
+	my $d = shift;
+
+	my $date;
+
+	if ($d =~ m/^Date: (.*)$/) {
+		$date = Date::Format::time2str("%Y-%m-%d %T %z", Date::Parse::str2time($1), "GMT");
+	} else {
+		$date = Date::Format::time2str("%Y-%m-%d %T %z", $d, "GMT");
+	}
+
+	return $date;
+}
+
+=head1 LICENSE
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+# You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+=head1 COPYRIGHT (C)
+
+ 2003,2004 Tim Dijkstra
+ 2004 Nicolas Bertolissio
+ 2004 Martin Quinson
+ 2008 Nicolas François
+
+=cut
+
+1;




More information about the Debian-l10n-commits mailing list