r46501 - in /trunk/libparse-mediawikidump-perl: ./ debian/ lib/Parse/ lib/Parse/MediaWikiDump/ t/
jawnsy-guest at users.alioth.debian.org
jawnsy-guest at users.alioth.debian.org
Thu Oct 29 13:24:42 UTC 2009
Author: jawnsy-guest
Date: Thu Oct 29 13:24:37 2009
New Revision: 46501
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=46501
Log:
* New upstream release
* Update dependencies per upstream
Added:
trunk/libparse-mediawikidump-perl/t/30-links-compat.t
- copied unchanged from r46500, branches/upstream/libparse-mediawikidump-perl/current/t/30-links-compat.t
trunk/libparse-mediawikidump-perl/t/30-links.t
- copied unchanged from r46500, branches/upstream/libparse-mediawikidump-perl/current/t/30-links.t
trunk/libparse-mediawikidump-perl/t/30-pages.t
- copied unchanged from r46500, branches/upstream/libparse-mediawikidump-perl/current/t/30-pages.t
trunk/libparse-mediawikidump-perl/t/30-revisions.t
- copied unchanged from r46500, branches/upstream/libparse-mediawikidump-perl/current/t/30-revisions.t
trunk/libparse-mediawikidump-perl/t/40-pages-single-revision-only.t
- copied unchanged from r46500, branches/upstream/libparse-mediawikidump-perl/current/t/40-pages-single-revision-only.t
trunk/libparse-mediawikidump-perl/t/40-pre-factory.t
- copied unchanged from r46500, branches/upstream/libparse-mediawikidump-perl/current/t/40-pre-factory.t
trunk/libparse-mediawikidump-perl/t/70-memory-cycle.t
- copied unchanged from r46500, branches/upstream/libparse-mediawikidump-perl/current/t/70-memory-cycle.t
trunk/libparse-mediawikidump-perl/t/memory-leak.off
- copied unchanged from r46500, branches/upstream/libparse-mediawikidump-perl/current/t/memory-leak.off
Removed:
trunk/libparse-mediawikidump-perl/t/links-compat.t
trunk/libparse-mediawikidump-perl/t/links.t
trunk/libparse-mediawikidump-perl/t/pages-single-revision-only.t
trunk/libparse-mediawikidump-perl/t/pages.t
trunk/libparse-mediawikidump-perl/t/pre-factory.t
trunk/libparse-mediawikidump-perl/t/revisions.t
Modified:
trunk/libparse-mediawikidump-perl/Changes
trunk/libparse-mediawikidump-perl/MANIFEST
trunk/libparse-mediawikidump-perl/META.yml
trunk/libparse-mediawikidump-perl/Makefile.PL
trunk/libparse-mediawikidump-perl/debian/changelog
trunk/libparse-mediawikidump-perl/debian/control
trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm
trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Pages.pm
trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Revisions.pm
trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/XML.pm
trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/page.pm
trunk/libparse-mediawikidump-perl/t/revisions_test.xml
Modified: trunk/libparse-mediawikidump-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/Changes?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/Changes (original)
+++ trunk/libparse-mediawikidump-perl/Changes Thu Oct 29 13:24:37 2009
@@ -1,4 +1,10 @@
Revision history for Parse-MediaWikiDump
+
+0.98 Oct 28, 2009
+ * Bumped processing speed back up
+ * Fixed possible infinite loop scenario
+ * Ordered tests
+ * Added test to find circular references
0.97 Oct 23, 2009
* Fixed all known memory leaks
Modified: trunk/libparse-mediawikidump-perl/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/MANIFEST?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/MANIFEST (original)
+++ trunk/libparse-mediawikidump-perl/MANIFEST Thu Oct 29 13:24:37 2009
@@ -6,15 +6,18 @@
README
examples/speed_test
t/00-load.t
-t/pages.t
-t/links.t
-t/links-compat.t
+t/00-load.t
+t/30-links-compat.t
+t/30-links.t
+t/30-pages.t
+t/30-revisions.t
+t/40-pages-single-revision-only.t
+t/40-pre-factory.t
+t/70-memory-cycle.t
+t/links_test.sql
+t/memory-leak.off
t/pages_test.xml
-t/links_test.sql
t/revisions_test.xml
-t/revisions.t
-t/pre-factory.t
-t/pages-single-revision-only.t
lib/Parse/MediaWikiDump/category_link.pm
lib/Parse/MediaWikiDump/CategoryLinks.pm
lib/Parse/MediaWikiDump/link.pm
Modified: trunk/libparse-mediawikidump-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/META.yml?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/META.yml (original)
+++ trunk/libparse-mediawikidump-perl/META.yml Thu Oct 29 13:24:37 2009
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: Parse-MediaWikiDump
-version: 0.97
+version: 0.98
abstract: Tools to process MediaWiki dump files
author:
- Tyler Riddle <triddle at gmail.com>
@@ -9,11 +9,13 @@
configure_requires:
ExtUtils::MakeMaker: 0
requires:
- List::Util: 0
- Scalar::Util: 0
- Test::Exception: 0
- Test::More: 0
- XML::Parser: 0
+ List::Util: 0
+ PadWalker: 0
+ Scalar::Util: 0
+ Test::Exception: 0
+ Test::Memory::Cycle: 0
+ Test::More: 0
+ XML::Parser: 0
no_index:
directory:
- t
Modified: trunk/libparse-mediawikidump-perl/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/Makefile.PL?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/Makefile.PL (original)
+++ trunk/libparse-mediawikidump-perl/Makefile.PL Thu Oct 29 13:24:37 2009
@@ -10,6 +10,8 @@
ABSTRACT_FROM => 'lib/Parse/MediaWikiDump.pm',
PL_FILES => {},
PREREQ_PM => {
+ 'PadWalker' => 0,
+ 'Test::Memory::Cycle' => 0,
'Test::More' => 0,
'Test::Exception' => 0,
'XML::Parser' => 0,
Modified: trunk/libparse-mediawikidump-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/debian/changelog?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/debian/changelog (original)
+++ trunk/libparse-mediawikidump-perl/debian/changelog Thu Oct 29 13:24:37 2009
@@ -1,3 +1,10 @@
+libparse-mediawikidump-perl (0.98-1) UNRELEASED; urgency=low
+
+ * New upstream release
+ * Update dependencies per upstream
+
+ -- Jonathan Yu <jawnsy at cpan.org> Thu, 29 Oct 2009 05:48:04 -0400
+
libparse-mediawikidump-perl (0.97-1) unstable; urgency=low
[ Angel Abad ]
Modified: trunk/libparse-mediawikidump-perl/debian/control
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/debian/control?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/debian/control (original)
+++ trunk/libparse-mediawikidump-perl/debian/control Thu Oct 29 13:24:37 2009
@@ -2,7 +2,8 @@
Section: perl
Priority: optional
Build-Depends: debhelper (>= 7)
-Build-Depends-Indep: perl, libxml-parser-perl, libtest-exception-perl
+Build-Depends-Indep: perl, libxml-parser-perl, libtest-exception-perl,
+ libpadwalker-perl, libtest-memory-cycle-perl
Maintainer: Debian Perl Group <pkg-perl-maintainers at lists.alioth.debian.org>
Uploaders: Xavier Oswald <xoswald at debian.org>,
gregor herrmann <gregoa at debian.org>, Ansgar Burchardt <ansgar at 43-1.org>,
@@ -14,7 +15,8 @@
Package: libparse-mediawikidump-perl
Architecture: all
-Depends: ${misc:Depends}, ${perl:Depends}, libxml-parser-perl
+Depends: ${misc:Depends}, ${perl:Depends}, libxml-parser-perl,
+ libpadwalker-perl
Description: Perl module to parse MediaWiki dump files
Parse::MediaWikiDump is a Perl module which provides tools needed to process
the contents of XML dump files generated by MediaWiki (page information, with
Modified: trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm (original)
+++ trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump.pm Thu Oct 29 13:24:37 2009
@@ -1,5 +1,5 @@
package Parse::MediaWikiDump;
-our $VERSION = '0.97';
+our $VERSION = '0.98';
use Parse::MediaWikiDump::XML;
use Parse::MediaWikiDump::Revisions;
Modified: trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Pages.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Pages.pm?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Pages.pm (original)
+++ trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Pages.pm Thu Oct 29 13:24:37 2009
@@ -1,6 +1,6 @@
package Parse::MediaWikiDump::Pages;
-our $VERSION = '0.97';
+our $VERSION = '0.98';
use base qw(Parse::MediaWikiDump::Revisions);
@@ -8,6 +8,9 @@
use warnings;
use Scalar::Util qw(weaken);
+#the only difference between this class and ::Revisions
+#is that this class enforces a single revision per each
+#page node
sub new_accumulator_engine {
my ($self) = @_;
@@ -76,11 +79,7 @@
}
sub save_namespace_node {
- my ($parser, $accum, $text, $element, $attrs) = @_;
- my $key = $attrs->{key};
- my $namespaces = $accum->{namespaces};
-
- push(@{ $accum->{namespaces} }, [$key, $text] );
+ return Parse::MediaWikiDump::Revisions::save_namespace_node(@_);
}
Modified: trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Revisions.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Revisions.pm?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Revisions.pm (original)
+++ trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/Revisions.pm Thu Oct 29 13:24:37 2009
@@ -1,6 +1,6 @@
package Parse::MediaWikiDump::Revisions;
-our $VERSION = '0.97';
+our $VERSION = '0.98';
use 5.8.0;
@@ -48,6 +48,8 @@
my $page;
+ #look for an available page and if one isn't
+ #there then parse more XML
while(1) {
$page = shift(@{ $self->{PAGE_LIST} } );
@@ -148,7 +150,7 @@
#load the information from the siteinfo section so it is available before
#someone calls ->next
while(scalar(@{$self->{PAGE_LIST}}) < 1) {
- $self->parse_more;
+ die "hit end of document" unless $self->parse_more;
}
}
@@ -218,6 +220,10 @@
return 0;
}
+ #expat has a bug where the current_byte
+ #value overflows around 2 gigabytes
+ #so we track how much data has been
+ #processed ourselves
$$self{BYTE} += $read;
$$self{EXPAT}->parse_more($buf);
@@ -243,6 +249,7 @@
}
#helper functions that the xml accumulator uses
+
sub save_namespace_node {
my ($parser, $accum, $text, $element, $attrs) = @_;
my $key = $attrs->{key};
Modified: trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/XML.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/XML.pm?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/XML.pm (original)
+++ trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/XML.pm Thu Oct 29 13:24:37 2009
@@ -71,7 +71,7 @@
$self->{root} = $root;
$self->{element_stack} = [];
$self->{accum} = $accum;
- $self->{char_buf} = '';
+ $self->{char_buf} = [];
$self->{node_stack} = [ $root ];
return $self;
@@ -156,7 +156,7 @@
sub handle_char_event {
my ($self, $expat, $chars) = @_;
- $self->{char_buf} .= $chars;
+ push(@{$self->{char_buf}}, $chars);
}
sub flush_chars {
@@ -170,9 +170,9 @@
$cur_element = [];
}
- defined $handler && &$handler($self, $self->{accum}, $self->{char_buf}, @$cur_element);
-
- $self->{char_buf} = '';
+ defined $handler && &$handler($self, $self->{accum}, join('', @{$self->{char_buf}}), @$cur_element);
+
+ $self->{char_buf} = [];
return undef;
}
@@ -395,4 +395,4 @@
$a->{$store_as} = $chars;
}
-1;
+1;
Modified: trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/page.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/page.pm?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/page.pm (original)
+++ trunk/libparse-mediawikidump-perl/lib/Parse/MediaWikiDump/page.pm Thu Oct 29 13:24:37 2009
@@ -25,20 +25,11 @@
my $title = $self->title;
my $namespace = '';
- #warn "size " . scalar(@{ $self->{NAMESPACES} });
-
return $$self{CACHE}{namespace} if defined $$self{CACHE}{namespace};
if ($title =~ m/^([^:]+):(.*)/) {
-# warn "got a namespace candidate: $1 - $2";
-
foreach (@{ $self->{NAMESPACES} } ) {
my ($num, $name) = @$_;
-
-# warn $name;
-
-# warn "$1 $name";
-
if ($1 eq $name) {
$namespace = $1;
last;
@@ -46,10 +37,6 @@
}
}
-# warn "this function is still broken";
-
-# warn "namespace: $namespace";
-
$$self{CACHE}{namespace} = $namespace;
return $namespace;
Modified: trunk/libparse-mediawikidump-perl/t/revisions_test.xml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libparse-mediawikidump-perl/t/revisions_test.xml?rev=46501&op=diff
==============================================================================
--- trunk/libparse-mediawikidump-perl/t/revisions_test.xml (original)
+++ trunk/libparse-mediawikidump-perl/t/revisions_test.xml Thu Oct 29 13:24:37 2009
@@ -48,12 +48,12 @@
<contributor><username>Username Test Value 2</username><id>12</id></contributor>
<comment>Comment Test Value 2</comment>
<text xml:space="preserve">#redirect : [[fooooo]]</text>
+ <minor/>
</revision>
<revision>
<id>47086</id>
<timestamp>2005-07-09T18:41:10Z</timestamp>
<contributor><username>Username Test Value</username><id>1292</id></contributor>
- <minor/>
<comment>Comment Test Value</comment>
<text xml:space="preserve">#redirect [[fooooo]]
</text>
More information about the Pkg-perl-cvs-commits
mailing list