r46265 - in /branches/upstream/libparse-mediawikidump-perl/current: Changes META.yml Makefile.PL TODO lib/Parse/MediaWikiDump.pm lib/Parse/MediaWikiDump/Pages.pm lib/Parse/MediaWikiDump/Revisions.pm lib/Parse/MediaWikiDump/XML.pm
gregoa at users.alioth.debian.org
gregoa at users.alioth.debian.org
Fri Oct 23 20:16:56 UTC 2009
Author: gregoa
Date: Fri Oct 23 20:16:51 2009
New Revision: 46265
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=46265
Log:
[svn-upgrade] Integrating new upstream version, libparse-mediawikidump-perl (0.97)
Modified:
branches/upstream/libparse-mediawikidump-perl/current/Changes
branches/upstream/libparse-mediawikidump-perl/current/META.yml
branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL
branches/upstream/libparse-mediawikidump-perl/current/TODO
branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm
branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm
branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm
branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm
Modified: branches/upstream/libparse-mediawikidump-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/Changes?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/Changes (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/Changes Fri Oct 23 20:16:51 2009
@@ -1,4 +1,9 @@
Revision history for Parse-MediaWikiDump
+
+0.97 Oct 23, 2009
+ * Fixed all known memory leaks
+ * No more Object::Destroyer
+ * Cleaned out some old cruft
0.96 Oct 22, 2009
* Allowed parsing of 0.4 version XML dump files but not
Modified: branches/upstream/libparse-mediawikidump-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/META.yml?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/META.yml (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/META.yml Fri Oct 23 20:16:51 2009
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: Parse-MediaWikiDump
-version: 0.96
+version: 0.97
abstract: Tools to process MediaWiki dump files
author:
- Tyler Riddle <triddle at gmail.com>
@@ -9,12 +9,11 @@
configure_requires:
ExtUtils::MakeMaker: 0
requires:
- List::Util: 0
- Object::Destroyer: 0
- Scalar::Util: 0
- Test::Exception: 0
- Test::More: 0
- XML::Parser: 0
+ List::Util: 0
+ Scalar::Util: 0
+ Test::Exception: 0
+ Test::More: 0
+ XML::Parser: 0
no_index:
directory:
- t
Modified: branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL Fri Oct 23 20:16:51 2009
@@ -11,10 +11,9 @@
PL_FILES => {},
PREREQ_PM => {
'Test::More' => 0,
+ 'Test::Exception' => 0,
'XML::Parser' => 0,
'List::Util' => 0,
- 'Object::Destroyer' => 0,
- 'Test::Exception' => 0,
'Scalar::Util' => 0,
},
dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
Modified: branches/upstream/libparse-mediawikidump-perl/current/TODO
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/TODO?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/TODO (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/TODO Fri Oct 23 20:16:51 2009
@@ -1,3 +1,1 @@
- * Fix memory leak bug
- * Investigate if using pop for removing parsed items from the buffer will make Pages and Revisions faster;
- if so, add an option for such
+ * Comment the code
Modified: branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm Fri Oct 23 20:16:51 2009
@@ -1,5 +1,5 @@
package Parse::MediaWikiDump;
-our $VERSION = '0.96';
+our $VERSION = '0.97';
use Parse::MediaWikiDump::XML;
use Parse::MediaWikiDump::Revisions;
Modified: branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm Fri Oct 23 20:16:51 2009
@@ -1,15 +1,18 @@
package Parse::MediaWikiDump::Pages;
-our $VERSION = '0.96';
+our $VERSION = '0.97';
use base qw(Parse::MediaWikiDump::Revisions);
use strict;
use warnings;
-use Carp;
+use Scalar::Util qw(weaken);
sub new_accumulator_engine {
my ($self) = @_;
+
+ weaken($self);
+
my $f = Parse::MediaWikiDump::XML::Accumulator->new;
my $store_siteinfo = $self->{SITEINFO};
my $store_page = $self->{PAGE_LIST};
@@ -34,7 +37,7 @@
Start => sub { $_[1]->{minor} = 0 },
End => sub {
if (defined($_[1]->{seen_revision})) {
- die "only one revision per page is allowed\n";
+ $self->{DIE_REQUESTED} = "only one revision per page is allowed";
}
$_[1]->{seen_revision} = 1;
@@ -292,7 +295,8 @@
=head1 LIMITATIONS
-=head2 Memory Leak
-
-This class is not performing proper garbage collection at destruction and will leak memory like crazy if
-multiple instances of it are created inside one perl script.
+=head2 Version 0.4
+
+This class was updated to support version 0.4 dump files from
+a MediaWiki instance but it does not currently support any of
+the new information available in those files.
Modified: branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm Fri Oct 23 20:16:51 2009
@@ -1,15 +1,24 @@
package Parse::MediaWikiDump::Revisions;
-our $VERSION = '0.96';
+our $VERSION = '0.97';
use 5.8.0;
use strict;
use warnings;
+use Carp;
+
use List::Util;
use Scalar::Util qw(weaken reftype);
-use Object::Destroyer;
use Data::Dumper;
+
+sub DESTROY {
+ my ($self) = @_;
+
+ if (! $self->{FINISHED}) {
+ $self->{EXPAT}->parse_done;
+ }
+}
#public methods
sub new {
@@ -29,7 +38,6 @@
$self->open($source);
$self->init;
- #return Object::Destroyer($self, 'cleanup');
return $self;
}
@@ -112,17 +120,6 @@
#private functions with OO interface
-sub cleanup {
- my ($self) = @_;
-
- #warn "executing cleanup";
-
- $self->{EXPAT}->setHandlers(Init => undef, Final => undef, Start => undef,
- End => undef, Char => undef);
- $self->{EXPAT}->parse_done;
- #$self->{XML} = undef;
-}
-
sub open {
my ($self, $source) = @_;
@@ -146,21 +143,13 @@
$self->{XML} = $self->new_accumulator_engine;
my $expat_bb = $$self{XML}->parser->parse_start();
- #$$self{EXPAT} = Object::Destroyer->new($expat_bb, 'parse_done'); #causes exceptions not to be thrown
$$self{EXPAT} = $expat_bb;
#load the information from the siteinfo section so it is available before
#someone calls ->next
- while(1) {
- if (scalar(@{$self->{PAGE_LIST}}) > 0) {
- last;
- }
-
+ while(scalar(@{$self->{PAGE_LIST}}) < 1) {
$self->parse_more;
}
-
- #XML::Accumulator holds a copy of itself
- weaken($self->{XML});
}
sub new_accumulator_engine {
@@ -224,7 +213,6 @@
die "error during read: $!";
} elsif ($read == 0) {
$$self{FINISHED} = 1;
- #$$self{EXPAT} = undef; #Object::Destroyer cleans this up
$$self{EXPAT}->parse_done;
return 0;
@@ -232,7 +220,11 @@
$$self{BYTE} += $read;
$$self{EXPAT}->parse_more($buf);
-
+
+ if ($self->{DIE_REQUESTED}) {
+ die "$self->{DIE_REQUESTED}\n";
+ }
+
return 1;
}
@@ -249,14 +241,6 @@
return undef;
}
-
-#sub save_page {
-# my ($page, $save_to) = @_;
-# my %page = %$page; #make a local copy
-#
-# push(@{ $self->{PAGE_LIST} }, \%page);
-#}
-
#helper functions that the xml accumulator uses
sub save_namespace_node {
@@ -422,7 +406,8 @@
=head1 LIMITATIONS
-=head2 Memory Leak
-
-This class is not performing proper garbage collection at destruction and will leak memory like crazy if
-multiple instances of it are created inside one perl script.
+=head2 Version 0.4
+
+This class was updated to support version 0.4 dump files from
+a MediaWiki instance but it does not currently support any of
+the new information available in those files.
Modified: branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm Fri Oct 23 20:16:51 2009
@@ -1,8 +1,20 @@
#this is set to become a new module on CPAN after
#testing is done and documentation is written
+
+#this module is a thin wrapper around XML::Accumulator that
+#provides a tree interface for the event handlers. The engine
+#follows the tree as it receives events from XML::Accumulator
+#so that context can be pulled out from the location in the
+#tree.
+
+#Handlers for this module are also registered as callbacks but
+#exist at a specific node on the tree. Each handler is invoked
+#with the same information that came from the XML::Parser event
+#but is also given an additional argument that is an accumulator
+#variable to store data in.
package Parse::MediaWikiDump::XML::Accumulator;
-our $VERSION = '0.95';
+our $VERSION = '0.97';
use warnings;
use strict;
@@ -43,7 +55,6 @@
use Scalar::Util qw(weaken);
use XML::Parser;
-use Object::Destroyer;
sub new {
my ($class, $root, $accum) = @_;
@@ -61,24 +72,16 @@
$self->{element_stack} = [];
$self->{accum} = $accum;
$self->{char_buf} = '';
- $self->{char_dirty} = 0;
$self->{node_stack} = [ $root ];
- #return Object::Destroyer->new($self, 'cleanup');
return $self;
}
-sub cleanup {
- my ($self) = @_;
-
- $self->parser->setHandlers(Init => undef, Final => undef, Start => undef,
- End => undef, Char => undef);
-}
-
sub init_parser {
my ($self) = @_;
- #warn "init_parser called";
+ #stop a giant memory leak
+ weaken($self);
my $parser = XML::Parser->new(
Handlers => {
More information about the Pkg-perl-cvs-commits
mailing list