r21116 - in /trunk/libparse-mediawikidump-perl: ./ debian/ examples/ lib/Parse/ t/

gregoa at users.alioth.debian.org gregoa at users.alioth.debian.org
Sat Jun 14 21:13:17 UTC 2008


Author: gregoa
Date: Sat Jun 14 21:13:16 2008
New Revision: 21116

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=21116
Log:
New upstream release.

Added:
    trunk/libparse-mediawikidump-perl/TODO
      - copied unchanged from r21115, branches/upstream/libparse-mediawikidump-perl/current/TODO
    trunk/libparse-mediawikidump-perl/examples/
      - copied from r21115, branches/upstream/libparse-mediawikidump-perl/current/examples/
    trunk/libparse-mediawikidump-perl/t/links_test.sql
      - copied unchanged from r21115, branches/upstream/libparse-mediawikidump-perl/current/t/links_test.sql
    trunk/libparse-mediawikidump-perl/t/pages_test.xml
      - copied unchanged from r21115, branches/upstream/libparse-mediawikidump-perl/current/t/pages_test.xml
Removed:
    trunk/libparse-mediawikidump-perl/links_test.sql
    trunk/libparse-mediawikidump-perl/pages_test.xml
    trunk/libparse-mediawikidump-perl/t/pod-coverage.t
    trunk/libparse-mediawikidump-perl/t/pod.t
Modified:
    trunk/libparse-mediawikidump-perl/Changes
    trunk/libparse-mediawikidump-perl/MANIFEST
    trunk/libparse-mediawikidump-perl/META.yml
    trunk/libparse-mediawikidump-perl/Makefile.PL
    trunk/libparse-mediawikidump-perl/debian/changelog
    trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm
    trunk/libparse-mediawikidump-perl/t/links-compat.t
    trunk/libparse-mediawikidump-perl/t/links.t
    trunk/libparse-mediawikidump-perl/t/pages-compat.t
    trunk/libparse-mediawikidump-perl/t/pages.t

Modified: trunk/libparse-mediawikidump-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/Changes?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/Changes (original)
+++ trunk/libparse-mediawikidump-perl/Changes Sat Jun 14 21:13:16 2008
@@ -1,6 +1,18 @@
 Revision history for Parse-MediaWikiDump
-0.40
-	Jun 21, 2006
+
+0.51	May 31, 2008
+	* Fix for bug 36255 "Parse::MediaWikiDump::page::namespace may return
+  	  a string which is not really a namespace" provided by Amir E. Aharoni.
+	* Moved test data into t/ and moved speed_test.pl into examples/
+	* Exceedingly complicated functions (parse_head() and parse_page()) are
+	  not funny. Added some comments on how to rectify that situation.
+	* Tightened up the tests a little bit.
+
+0.50	Jun 27, 2006
+	* Added category links parser.
+	* Removed all instances of shift() from the code.
+
+0.40	Jun 21, 2006
 	* Increased processing speed by around 40%!
 
 0.33	Jun 18, 2006

Modified: trunk/libparse-mediawikidump-perl/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/MANIFEST?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/MANIFEST (original)
+++ trunk/libparse-mediawikidump-perl/MANIFEST Sat Jun 14 21:13:16 2008
@@ -1,15 +1,15 @@
-pages_test.xml
-links_test.sql
 Changes
 MANIFEST
 META.yml # Will be created by "make dist"
 Makefile.PL
 README
+examples/speed_test
 lib/Parse/MediaWikiDump.pm
 t/00-load.t
-t/pod-coverage.t
-t/pod.t
 t/pages.t
 t/links.t
 t/pages-compat.t
 t/links-compat.t
+t/pages_test.xml
+t/links_test.sql
+TODO

Modified: trunk/libparse-mediawikidump-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/META.yml?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/META.yml (original)
+++ trunk/libparse-mediawikidump-perl/META.yml Sat Jun 14 21:13:16 2008
@@ -1,10 +1,11 @@
 # http://module-build.sourceforge.net/META-spec.html
 #XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
 name:         Parse-MediaWikiDump
-version:      0.40
+version:      0.51
 version_from: lib/Parse/MediaWikiDump.pm
 installdirs:  site
 requires:
+    List::Util:                    0
     Test::More:                    0
     XML::Parser:                   0
 

Modified: trunk/libparse-mediawikidump-perl/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/Makefile.PL?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/Makefile.PL (original)
+++ trunk/libparse-mediawikidump-perl/Makefile.PL Sat Jun 14 21:13:16 2008
@@ -9,6 +9,7 @@
     ABSTRACT_FROM       => 'lib/Parse/MediaWikiDump.pm',
     PL_FILES            => {},
     PREREQ_PM => {
+ 	'List::Util' => 0,
         'Test::More' => 0,
 	'XML::Parser' => 0,
     },

Modified: trunk/libparse-mediawikidump-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/debian/changelog?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/debian/changelog (original)
+++ trunk/libparse-mediawikidump-perl/debian/changelog Sat Jun 14 21:13:16 2008
@@ -1,3 +1,9 @@
+libparse-mediawikidump-perl (0.51-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+
+ -- gregor herrmann <gregoa at debian.org>  Sat, 14 Jun 2008 23:11:18 +0200
+
 libparse-mediawikidump-perl (0.40-5) unstable; urgency=low
 
   * New maintainer is Debian perl group

Modified: trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm (original)
+++ trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm Sat Jun 14 21:13:16 2008
@@ -1,9 +1,7 @@
 package Parse::MediaWikiDump;
-our $VERSION = '0.40';
+our $VERSION = '0.51';
+
 #the POD is at the end of this file
-#avoid shift() - it is computationally more expensive than pop
-#and shifting values for subroutine input should be avoided in
-#any subroutines that get called often, like the handlers
 
 package Parse::MediaWikiDump::Pages;
 
@@ -16,22 +14,22 @@
 
 use strict;
 use warnings;
+use List::Util;
 use XML::Parser;
 
 #tokens in the buffer are an array ref with the 0th element specifying
 #its type; these are the constants for those types. 
 
 sub new {
-	my $class = shift;
-	my $source = shift;
+	my ($class, $source) = @_;
 	my $self = {};
 
 	bless($self, $class);
 
 	$$self{PARSER} = XML::Parser->new(ProtocolEncoding => 'UTF-8');
 	$$self{PARSER}->setHandlers('Start', \&start_handler,
-				    'End', \&end_handler);
-        $$self{EXPAT} = $$self{PARSER}->parse_start(state => $self);
+					'End', \&end_handler);
+		$$self{EXPAT} = $$self{PARSER}->parse_start(state => $self);
 	$$self{BUFFER} = []; 
 	$$self{CHUNK_SIZE} = 32768;
 	$$self{BUF_LIMIT} = 10000;
@@ -45,7 +43,7 @@
 }
 
 sub next {
-	my $self = shift;
+	my ($self) = @_;
 	my $buffer = $$self{BUFFER};
 	my $offset;
 	my @page;
@@ -75,9 +73,12 @@
 
 #outputs a nicely formated representation of the tokens on the buffer specified
 sub dump {
-	my $self = shift;
-	my $buffer = shift || $$self{BUFFER};
+	my ($self, $buffer) = @_;
 	my $offset = 0;
+
+	if (! defined($buffer)) {
+		$buffer = $$self{BUFFER};
+	}
 
 	foreach my $i (0 .. $#$buffer) {
 		my $token = $$buffer[$i];
@@ -118,37 +119,42 @@
 }
 
 sub sitename {
+	my ($self) = @_;
+	return $$self{HEAD}{sitename};
+}
+
+sub base {
+	my ($self) = @_;
+	return $$self{HEAD}{base};
+}
+
+sub generator {
+	my ($self) = @_;
+	return $$self{HEAD}{generator};
+}
+
+sub case {
+	my ($self) = @_;
+	return $$self{HEAD}{case};
+}
+
+sub namespaces {
+	my ($self) = @_;
+	return $$self{HEAD}{namespaces};
+}
+
+sub namespaces_names {
 	my $self = shift;
-	return $$self{HEAD}{sitename};
-}
-
-sub base {
-	my $self = shift;
-	return $$self{HEAD}{base};
-}
-
-sub generator {
-	my $self = shift;
-	return $$self{HEAD}{generator};
-}
-
-sub case {
-	my $self = shift;
-	return $$self{HEAD}{case};
-}
-
-sub namespaces {
-	my $self = shift;
-	return $$self{HEAD}{namespaces};
+	return $$self{HEAD}{namespaces_names};
 }
 
 sub current_byte {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{BYTE};
 }
 
 sub size {
-	my $self = shift;
+	my ($self) = @_;
 	
 	return undef unless defined $$self{SOURCE_FILE};
 
@@ -161,14 +167,13 @@
 
 #replaced by next()
 sub page {
-	my $self = shift;
+	my ($self) = @_;
 	return $self->next(@_);
 }
 
 #private functions with OO interface
 sub open {
-	my $self = shift;
-	my $source = shift;
+	my ($self, $source) = @_;
 
 	if (ref($source) eq 'GLOB') {
 		$$self{SOURCE} = $source;
@@ -186,7 +191,7 @@
 }
 
 sub init {
-	my $self = shift;
+	my ($self) = @_;
 	my $offset;
 	my @head;
 
@@ -248,12 +253,16 @@
 	return -1;
 }
 
-#this function is very frightning =)
+#this function is very frightning :-( 
+#a better alternative would be to have each part of the stack handled by a 
+#function that handles all the logic for that specific node in the tree
 sub parse_head {
-	my $self = shift;
-	my $buffer = shift;
+	my ($self, $buffer) = @_;
 	my $state = 'start';
-	my %data = (namespaces => []);
+	my %data = (
+		namespaces			=> [],
+		namespaces_names	=> [],
+	);
 
 	for (my $i = 0; $i <= $#$buffer; $i++) {
 		my $token = $$buffer[$i];
@@ -375,6 +384,7 @@
 			}
 
 			push(@{$data{namespaces}}, [$key, $name]);
+			push(@{$data{namespaces_names}}, $name);
 
 			$token = $$buffer[++$i];
 
@@ -408,10 +418,11 @@
 	return 1;
 }
 
-#this function is very frightning =)
+#this function is very frightning :-(
+#see the parse_head function comments for thoughts on improving these
+#awful functions
 sub parse_page {
-	my $self = shift;
-	my $buffer = shift;
+	my ($self, $buffer) = @_;
 	my %data;
 	my $state = 'start';
 
@@ -621,6 +632,18 @@
 			}
 		} else {
 			die "unknown state: $state";
+		}
+	}
+
+	$data{namespace} = '';
+	# Many pages just have a : in the title, but it's not necessary
+	# a namespace designation.
+	if ($data{title} =~ m/^([^:]+)\:/) {
+		my $possible_namespace = $1;
+		if (List::Util::first { $_ eq $possible_namespace }
+			@{ $self->namespaces_names() })
+		{
+			$data{namespace} = $possible_namespace;
 		}
 	}
 
@@ -647,7 +670,7 @@
 }
 
 sub token2text {
-	my $token = shift;
+	my ($token) = @_;
 
 	if (ref $token eq 'ARRAY') {
 		return "<$$token[0]>";
@@ -674,9 +697,9 @@
 sub start_handler {
 	my ($p, $tag, %atts) = @_;	
 	my $self = $p->{state};
-	my $good_tags = $self->{GOOD_TAGS};
-
-	push @{ $self->{BUFFER} }, [$tag, \%atts];
+	my $good_tags = $$self{GOOD_TAGS};
+
+	push @{ $$self{BUFFER} }, [$tag, \%atts];
 
 	if (defined($good_tags->{$tag})) {
 		$p->setHandlers(Char => \&char_handler);
@@ -689,7 +712,7 @@
 	my ($p, $tag) = @_;
 	my $self = $p->{state};
 
-	push @{ $self->{BUFFER} }, ["/$tag"];
+	push @{ $$self{BUFFER} }, ["/$tag"];
 
 	$p->setHandlers(Char => undef);
 	
@@ -730,23 +753,13 @@
 }
 
 sub namespace {
-	my $self = shift;
-
-	return $$self{CACHE}{namespace} if defined($$self{CACHE}{namespace});
-
-	my $title = $$self{DATA}{title};
-
-	if ($title =~ m/^([^:]+)\:/) {
-		$$self{CACHE}{namespace} = $1;
-		return $1;
-	} else {
-		$$self{CACHE}{namespace} = '';
-		return '';
-	}
+	my ($self) = @_;
+
+	return $$self{DATA}{namespace};
 }
 
 sub categories {
-	my $self = shift;
+	my ($self) = @_;
 	my $anchor = $$self{CATEGORY_ANCHOR};
 
 	return $$self{CACHE}{categories} if defined($$self{CACHE}{categories});
@@ -770,7 +783,7 @@
 }
 
 sub redirect {
-	my $self = shift;
+	my ($self) = @_;
 	my $text = $$self{DATA}{text};
 
 	return $$self{CACHE}{redirect} if exists($$self{CACHE}{redirect});
@@ -785,42 +798,42 @@
 }
 
 sub title {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{DATA}{title};
 }
 
 sub id {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{DATA}{id};
 }
 
 sub revision_id {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{DATA}{revision_id};
 }
 
 sub timestamp {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{DATA}{timestamp};
 }
 
 sub username {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{DATA}{username};
 }
 
 sub userid {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{DATA}{userid};
 }
 
 sub minor {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{DATA}{minor};
 }
 
 sub text {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self{DATA}{text};
 }
 
@@ -830,8 +843,7 @@
 use warnings;
 
 sub new {
-	my $class = shift;
-	my $source = shift;
+	my ($class, $source) = @_;
 	my $self = {};
 	$$self{BUFFER} = [];
 
@@ -844,7 +856,7 @@
 }
 
 sub next {
-	my $self = shift;
+	my ($self) = @_;
 	my $buffer = $$self{BUFFER};
 	my $link;
 
@@ -862,7 +874,7 @@
 
 #private functions with OO interface
 sub parse_more {
-	my $self = shift;
+	my ($self) = @_;
 	my $source = $$self{SOURCE};
 	my $need_data = 1;
 	
@@ -886,8 +898,7 @@
 }
 
 sub open {
-	my $self = shift;
-	my $source = shift;
+	my ($self, $source) = @_;
 
 	if (ref($source) ne 'GLOB') {
 		die "could not open $source: $!" unless
@@ -902,7 +913,7 @@
 }
 
 sub init {
-	my $self = shift;
+	my ($self) = @_;
 	my $source = $$self{SOURCE};
 	my $found = 0;
 	
@@ -920,7 +931,7 @@
 
 #replaced by next()
 sub link {
-	my $self = shift;
+	my ($self) = @_;
 	$self->next(@_);
 }
 
@@ -928,8 +939,7 @@
 
 #you must pass in a fully populated link array reference
 sub new {
-	my $class = shift;
-	my $self = shift;
+	my ($class, $self) = @_;
 
 	bless($self, $class);
 
@@ -937,21 +947,305 @@
 }
 
 sub from {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self[0];
 }
 
 sub namespace {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self[1];
 }
 
 sub to {
-	my $self = shift;
+	my ($self) = @_;
 	return $$self[2];
 }
 
-
+package Parse::MediaWikiDump::CategoryLinks;
+
+use strict;
+use warnings;
+
+sub new {
+	my ($class, $source) = @_;
+	my $self = {};
+
+	$$self{BUFFER} = [];
+	$$self{BYTE} = 0;
+
+	bless($self, $class);
+
+	$self->open($source);
+	$self->init;
+
+	return $self;
+}
+
+sub next {
+	my ($self) = @_;
+	my $buffer = $$self{BUFFER};
+	my $link;
+
+	while(1) {
+		if (defined($link = pop(@$buffer))) {
+			last;
+		}
+
+		#signals end of input
+		return undef unless $self->parse_more;
+	}
+
+	return Parse::MediaWikiDump::category_link->new($link);
+}
+
+#private functions with OO interface
+sub parse_more {
+	my ($self) = @_;
+	my $source = $$self{SOURCE};
+	my $need_data = 1;
+	
+	while($need_data) {
+		my $line = <$source>;
+
+		last unless defined($line);
+
+		$$self{BYTE} += length($line);
+
+		while($line =~ m/\((\d+),'(.*?)','(.*?)',(\d+)\)[;,]/g) {
+			push(@{$$self{BUFFER}}, [$1, $2, $3, $4]);
+			$need_data = 0;
+		}
+	}
+
+	#if we still need data and we are here it means we ran out of input
+	if ($need_data) {
+		return 0;
+	}
+	
+	return 1;
+}
+
+sub open {
+	my ($self, $source) = @_;
+
+	if (ref($source) ne 'GLOB') {
+		die "could not open $source: $!" unless
+			open($$self{SOURCE}, $source);
+
+		$$self{SOURCE_FILE} = $source;
+	} else {
+		$$self{SOURCE} = $source;
+	}
+
+	binmode($$self{SOURCE}, ':utf8');
+
+	return 1;
+}
+
+sub init {
+	my ($self) = @_;
+	my $source = $$self{SOURCE};
+	my $found = 0;
+	
+	while(<$source>) {
+		if (m/^LOCK TABLES `categorylinks` WRITE;/) {
+			$found = 1;
+			last;
+		}
+	}
+
+	die "not a MediaWiki link dump file" unless $found;
+}
+
+sub current_byte {
+	my ($self) = @_;
+
+	return $$self{BYTE};
+}
+
+sub size {
+	my ($self) = @_;
+	
+	return undef unless defined $$self{SOURCE_FILE};
+
+	my @stat = stat($$self{SOURCE_FILE});
+
+	return $stat[7];
+}
+
+package Parse::MediaWikiDump::category_link;
+
+#you must pass in a fully populated link array reference
+sub new {
+	my ($class, $self) = @_;
+
+	bless($self, $class);
+
+	return $self;
+}
+
+sub from {
+	my ($self) = @_;
+	return $$self[0];
+}
+
+sub to {
+	my ($self) = @_;
+	return $$self[1];
+}
+
+sub sortkey {
+	my ($self) = @_;
+	return $$self[2];
+}
+
+sub timestamp {
+	my ($self) = @_;
+	return $$self[3];
+}
+
+#package Parse::MediaWikiDump::ExternalLinks;
+#
+#use strict;
+#use warnings;
+#
+#sub new {
+#	my ($class, $source) = @_;
+#	my $self = {};
+#
+#	$$self{BUFFER} = [];
+#	$$self{BYTE} = 0;
+#
+#	bless($self, $class);
+#
+#	$self->open($source);
+#	$self->init;
+#
+#	return $self;
+#}
+#
+#sub next {
+#	my ($self) = @_;
+#	my $buffer = $$self{BUFFER};
+#	my $link;
+#
+#	while(1) {
+#		if (defined($link = pop(@$buffer))) {
+#			last;
+#		}
+#
+#		#signals end of input
+#		return undef unless $self->parse_more;
+#	}
+#
+#	return Parse::MediaWikiDump::external_link->new($link);
+#}
+#
+##private functions with OO interface
+#sub parse_more {
+#	my ($self) = @_;
+#	my $source = $$self{SOURCE};
+#	my $need_data = 1;
+#	
+#	while($need_data) {
+#		my $line = <$source>;
+#
+#		last unless defined($line);
+#
+#		$$self{BYTE} += length($line);
+#
+#		while($line =~ m/\((\d+),'(.*?)','(.*?)'\)[;,]/g) {
+#			push(@{$$self{BUFFER}}, [$1, $2, $3]);
+#			$need_data = 0;
+#		}
+#	}
+#
+#	#if we still need data and we are here it means we ran out of input
+#	if ($need_data) {
+#		return 0;
+#	}
+#	
+#	return 1;
+#}
+#
+#sub open {
+#	my ($self, $source) = @_;
+#
+#	if (ref($source) ne 'GLOB') {
+#		die "could not open $source: $!" unless
+#			open($$self{SOURCE}, $source);
+#
+#		$$self{SOURCE_FILE} = $source;
+#	} else {
+#		$$self{SOURCE} = $source;
+#	}
+#
+#	binmode($$self{SOURCE}, ':utf8');
+#
+#	return 1;
+#}
+#
+#sub init {
+#	my ($self) = @_;
+#	my $source = $$self{SOURCE};
+#	my $found = 0;
+#	
+#	while(<$source>) {
+#		if (m/^LOCK TABLES `externallinks` WRITE;/) {
+#			$found = 1;
+#			last;
+#		}
+#	}
+#
+#	die "not a MediaWiki link dump file" unless $found;
+#}
+#
+#sub current_byte {
+#	my ($self) = @_;
+#
+#	return $$self{BYTE};
+#}
+#
+#sub size {
+#	my ($self) = @_;
+#	
+#	return undef unless defined $$self{SOURCE_FILE};
+#
+#	my @stat = stat($$self{SOURCE_FILE});
+#
+#	return $stat[7];
+#}
+#
+#package Parse::MediaWikiDump::external_link;
+#
+##you must pass in a fully populated link array reference
+#sub new {
+#	my ($class, $self) = @_;
+#
+#	bless($self, $class);
+#
+#	return $self;
+#}
+#
+#sub from {
+#	my ($self) = @_;
+#	return $$self[0];
+#}
+#
+#sub to {
+#	my ($self) = @_;
+#	return $$self[1];
+#}
+#
+#sub index {
+#	my ($self) = @_;
+#	return $$self[2];
+#}
+#
+#sub timestamp {
+#	my ($self) = @_;
+#	return $$self[3];
+#
 1;
 
 __END__
@@ -985,6 +1279,7 @@
   $pages->generator;
   $pages->case;
   $pages->namespaces;
+  $pages->namespaces_names;
   $pages->current_byte;
   $pages->size;
 
@@ -1081,6 +1376,16 @@
 namespace number and the second is the namespace name. In the case of namespace
 0 the text stored for the name is ''.
 
+=item $pages->namespaces_names
+
+Returns an array reference to a list of namspace names only; this is a single
+dimensional array with plain text string values.
+
+=item $pages->namespaces
+
+Returns an array reference to the list of namespaces names in the instance,
+without namespaces numbers. Main namespace name is ''.
+
 =item $pages->current_byte
 
 Returns the number of bytes parsed so far.
@@ -1395,7 +1700,11 @@
 
 =head1 AUTHOR
 
-This module was created and documented by Tyler Riddle E<lt>triddle at gmail.comE<gt>. 
+This module was created, documented, and is maintained by 
+Tyler Riddle E<lt>triddle at gmail.comE<gt>. 
+
+Fix for bug 36255 "Parse::MediaWikiDump::page::namespace may return a string
+which is not really a namespace" provided by Amir E. Aharoni.
 
 =head1 BUGS
 

Modified: trunk/libparse-mediawikidump-perl/t/links-compat.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/t/links-compat.t?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/t/links-compat.t (original)
+++ trunk/libparse-mediawikidump-perl/t/links-compat.t Sat Jun 14 21:13:16 2008
@@ -5,7 +5,7 @@
 use warnings;
 use Parse::MediaWikiDump;
 
-my $file = 'links_test.sql';
+my $file = 't/links_test.sql';
 
 my $links = Parse::MediaWikiDump::Links->new($file);
 

Modified: trunk/libparse-mediawikidump-perl/t/links.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/t/links.t?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/t/links.t (original)
+++ trunk/libparse-mediawikidump-perl/t/links.t Sat Jun 14 21:13:16 2008
@@ -5,7 +5,7 @@
 use warnings;
 use Parse::MediaWikiDump;
 
-my $file = 'links_test.sql';
+my $file = 't/links_test.sql';
 
 my $links = Parse::MediaWikiDump::Links->new($file);
 

Modified: trunk/libparse-mediawikidump-perl/t/pages-compat.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/t/pages-compat.t?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/t/pages-compat.t (original)
+++ trunk/libparse-mediawikidump-perl/t/pages-compat.t Sat Jun 14 21:13:16 2008
@@ -4,7 +4,7 @@
 use strict;
 use Parse::MediaWikiDump;
 
-my $file = 'pages_test.xml';
+my $file = 't/pages_test.xml';
 my $fh;
 
 test_all($file);

Modified: trunk/libparse-mediawikidump-perl/t/pages.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/t/pages.t?rev=21116&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/t/pages.t (original)
+++ trunk/libparse-mediawikidump-perl/t/pages.t Sat Jun 14 21:13:16 2008
@@ -1,10 +1,10 @@
 #!perl -w
 
-use Test::Simple tests => 46;
+use Test::Simple tests => 74;
 use strict;
 use Parse::MediaWikiDump;
 
-my $file = 'pages_test.xml';
+my $file = 't/pages_test.xml';
 my $fh;
 my $pages;
 
@@ -20,12 +20,18 @@
 	test_one();
 	test_two();
 	test_three();
+	test_four();
+
+	ok(! defined($pages->next));
 }
 
 sub test_one {
 	my $page = $pages->next;
 	my $text = $page->text;
 
+	ok(defined($page));
+
+	ok($page->namespace eq '');
 	ok($pages->sitename eq 'Sitename Test Value');
 	ok($pages->base eq 'Base Test Value');
 	ok($pages->generator eq 'Generator Test Value');
@@ -42,6 +48,8 @@
 sub test_two {
 	my $page = $pages->next;
 
+	ok(defined($page));
+	ok($page->namespace eq '');
 	ok($page->redirect eq 'fooooo');
 	ok($page->title eq 'Title Test Value #2');
 	ok($page->id == 2);
@@ -53,6 +61,8 @@
 sub test_three {
 	my $page = $pages->next;
 
+	ok(defined($page));
+	ok($page->namespace eq '');
 	ok($page->redirect eq 'fooooo');
 	ok($page->title eq 'Title Test Value #3');
 	ok($page->id == 3);
@@ -60,3 +70,18 @@
 	ok($page->username eq 'Username Test Value');
 	ok($page->userid == 1292);
 }
+
+sub test_four {
+	my $page = $pages->next;
+
+	ok(defined($page));
+
+	ok($page->id == 4);
+	ok($page->timestamp eq '2005-07-09T18:41:10Z');
+	ok($page->username eq 'Username Test Value');
+	ok($page->userid == 1292);
+
+	#test for bug 36255
+	ok($page->namespace eq '');
+	ok($page->title eq 'NotANameSpace:Bar');
+}




More information about the Pkg-perl-cvs-commits mailing list