r46265 - in /branches/upstream/libparse-mediawikidump-perl/current: Changes META.yml Makefile.PL TODO lib/Parse/MediaWikiDump.pm lib/Parse/MediaWikiDump/Pages.pm lib/Parse/MediaWikiDump/Revisions.pm lib/Parse/MediaWikiDump/XML.pm

gregoa at users.alioth.debian.org gregoa at users.alioth.debian.org
Fri Oct 23 20:16:56 UTC 2009


Author: gregoa
Date: Fri Oct 23 20:16:51 2009
New Revision: 46265

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=46265
Log:
[svn-upgrade] Integrating new upstream version, libparse-mediawikidump-perl (0.97)

Modified:
    branches/upstream/libparse-mediawikidump-perl/current/Changes
    branches/upstream/libparse-mediawikidump-perl/current/META.yml
    branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL
    branches/upstream/libparse-mediawikidump-perl/current/TODO
    branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm
    branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm
    branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm
    branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm

Modified: branches/upstream/libparse-mediawikidump-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/Changes?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/Changes (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/Changes Fri Oct 23 20:16:51 2009
@@ -1,4 +1,9 @@
 Revision history for Parse-MediaWikiDump
+
+0.97	Oct 23, 2009
+	* Fixed all known memory leaks
+	* No more Object::Destroyer
+	* Cleaned out some old cruft
 
 0.96	Oct 22, 2009
 	* Allowed parsing of 0.4 version XML dump files but not

Modified: branches/upstream/libparse-mediawikidump-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/META.yml?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/META.yml (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/META.yml Fri Oct 23 20:16:51 2009
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:               Parse-MediaWikiDump
-version:            0.96
+version:            0.97
 abstract:           Tools to process MediaWiki dump files
 author:
     - Tyler Riddle <triddle at gmail.com>
@@ -9,12 +9,11 @@
 configure_requires:
     ExtUtils::MakeMaker:  0
 requires:
-    List::Util:         0
-    Object::Destroyer:  0
-    Scalar::Util:       0
-    Test::Exception:    0
-    Test::More:         0
-    XML::Parser:        0
+    List::Util:       0
+    Scalar::Util:     0
+    Test::Exception:  0
+    Test::More:       0
+    XML::Parser:      0
 no_index:
     directory:
         - t

Modified: branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/Makefile.PL Fri Oct 23 20:16:51 2009
@@ -11,10 +11,9 @@
     PL_FILES            => {},
     PREREQ_PM => {
     'Test::More' => 0,
+    'Test::Exception' => 0,
     'XML::Parser' => 0,
     'List::Util' => 0,
-    'Object::Destroyer' => 0,
-    'Test::Exception' => 0,
     'Scalar::Util' => 0,
     },
     dist                => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },

Modified: branches/upstream/libparse-mediawikidump-perl/current/TODO
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/TODO?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/TODO (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/TODO Fri Oct 23 20:16:51 2009
@@ -1,3 +1,1 @@
-  * Fix memory leak bug
-  * Investigate if using pop for removing parsed items from the buffer will make Pages and Revisions faster; 
-    if so, add an option for such
+  * Comment the code

Modified: branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump.pm Fri Oct 23 20:16:51 2009
@@ -1,5 +1,5 @@
 package Parse::MediaWikiDump;
-our $VERSION = '0.96';
+our $VERSION = '0.97';
 
 use Parse::MediaWikiDump::XML;
 use Parse::MediaWikiDump::Revisions;

Modified: branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Pages.pm Fri Oct 23 20:16:51 2009
@@ -1,15 +1,18 @@
 package Parse::MediaWikiDump::Pages;
 
-our $VERSION = '0.96';
+our $VERSION = '0.97';
 
 use base qw(Parse::MediaWikiDump::Revisions);
 
 use strict;
 use warnings;
-use Carp;
+use Scalar::Util qw(weaken);
 
 sub new_accumulator_engine {
 	my ($self) = @_;
+	
+	weaken($self);
+	
 	my $f = Parse::MediaWikiDump::XML::Accumulator->new;
 	my $store_siteinfo = $self->{SITEINFO};
 	my $store_page = $self->{PAGE_LIST};
@@ -34,7 +37,7 @@
 		Start => sub { $_[1]->{minor} = 0 }, 
 		End => sub { 
 			if (defined($_[1]->{seen_revision})) {
-				die "only one revision per page is allowed\n";
+				$self->{DIE_REQUESTED} = "only one revision per page is allowed";
 			}
 			
 			$_[1]->{seen_revision} = 1;
@@ -292,7 +295,8 @@
 
 =head1 LIMITATIONS
 
-=head2 Memory Leak
-
-This class is not performing proper garbage collection at destruction and will leak memory like crazy if 
-multiple instances of it are created inside one perl script. 
+=head2 Version 0.4
+
+This class was updated to support version 0.4 dump files from
+a MediaWiki instance but it does not currently support any of
+the new information available in those files. 

Modified: branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/Revisions.pm Fri Oct 23 20:16:51 2009
@@ -1,15 +1,24 @@
 package Parse::MediaWikiDump::Revisions;
 
-our $VERSION = '0.96';
+our $VERSION = '0.97';
 
 use 5.8.0;
 
 use strict;
 use warnings;
+use Carp;
+
 use List::Util;
 use Scalar::Util qw(weaken reftype);
-use Object::Destroyer;
 use Data::Dumper;
+
+sub DESTROY {
+	my ($self) = @_;
+	
+	if (! $self->{FINISHED}) {
+		$self->{EXPAT}->parse_done;
+	}
+}
 
 #public methods
 sub new {
@@ -29,7 +38,6 @@
 	$self->open($source);
 	$self->init;
 	
-	#return Object::Destroyer($self, 'cleanup');
 	return $self;
 }
 
@@ -112,17 +120,6 @@
 
 #private functions with OO interface
 
-sub cleanup {
-	my ($self) = @_;
-	
-	#warn "executing cleanup";
-	
-	$self->{EXPAT}->setHandlers(Init => undef, Final => undef, Start => undef, 
-		End => undef, Char => undef);
-	$self->{EXPAT}->parse_done;	
-	#$self->{XML} = undef;
-}
-
 sub open {
 	my ($self, $source) = @_;
 
@@ -146,21 +143,13 @@
 	
 	$self->{XML} = $self->new_accumulator_engine;
 	my $expat_bb = $$self{XML}->parser->parse_start();
-	#$$self{EXPAT} = Object::Destroyer->new($expat_bb, 'parse_done'); #causes exceptions not to be thrown
 	$$self{EXPAT} = $expat_bb;
 	
 	#load the information from the siteinfo section so it is available before
 	#someone calls ->next
-	while(1) {
-		if (scalar(@{$self->{PAGE_LIST}}) > 0) {
-			last;
-		}	
-		
+	while(scalar(@{$self->{PAGE_LIST}}) < 1) {
 		$self->parse_more;	
 	}
-	
-	#XML::Accumulator holds a copy of itself
-	weaken($self->{XML});
 }
 
 sub new_accumulator_engine {
@@ -224,7 +213,6 @@
                 die "error during read: $!";
         } elsif ($read == 0) {
                 $$self{FINISHED} = 1;
-                #$$self{EXPAT} = undef; #Object::Destroyer cleans this up
                 $$self{EXPAT}->parse_done;
                 
                 return 0;
@@ -232,7 +220,11 @@
 
         $$self{BYTE} += $read;
         $$self{EXPAT}->parse_more($buf);
-
+        
+    	if ($self->{DIE_REQUESTED}) {
+			die "$self->{DIE_REQUESTED}\n";
+		}
+        
         return 1;
 }
 
@@ -249,14 +241,6 @@
 	
 	return undef;
 }
-
-#sub save_page {
-#	my ($page, $save_to) = @_;
-#	my %page = %$page; #make a local copy
-#	
-#	push(@{ $self->{PAGE_LIST} }, \%page);
-#}
-
 
 #helper functions that the xml accumulator uses
 sub save_namespace_node {
@@ -422,7 +406,8 @@
   
 =head1 LIMITATIONS
 
-=head2 Memory Leak
-
-This class is not performing proper garbage collection at destruction and will leak memory like crazy if 
-multiple instances of it are created inside one perl script. 
+=head2 Version 0.4
+
+This class was updated to support version 0.4 dump files from
+a MediaWiki instance but it does not currently support any of
+the new information available in those files. 

Modified: branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm?rev=46265&op=diff
==============================================================================
--- branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm (original)
+++ branches/upstream/libparse-mediawikidump-perl/current/lib/Parse/MediaWikiDump/XML.pm Fri Oct 23 20:16:51 2009
@@ -1,8 +1,20 @@
 #this is set to become a new module on CPAN after
 #testing is done and documentation is written
+
+#this module is a thin wrapper around XML::Accumulator that
+#provides a tree interface for the event handlers. The engine
+#follows the tree as it receives events from XML::Accumulator
+#so that context can be pulled out from the location in the 
+#tree.
+
+#Handlers for this module are also registered as callbacks but
+#exist at a specific node on the tree. Each handler is invoked
+#with the same information that came from the XML::Parser event
+#but is also given an additional argument that is an accumulator
+#variable to store data in. 
 package Parse::MediaWikiDump::XML::Accumulator;
 
-our $VERSION = '0.95';
+our $VERSION = '0.97';
 
 use warnings;
 use strict;
@@ -43,7 +55,6 @@
 
 use Scalar::Util qw(weaken);
 use XML::Parser;
-use Object::Destroyer;
 
 sub new {
 	my ($class, $root, $accum) = @_;
@@ -61,24 +72,16 @@
 	$self->{element_stack} = [];
 	$self->{accum} = $accum;
 	$self->{char_buf} = '';
-	$self->{char_dirty} = 0;
 	$self->{node_stack} = [ $root ];
 	
-	#return Object::Destroyer->new($self, 'cleanup');
 	return $self;
 }
 
-sub cleanup {
-	my ($self) = @_;
-	
-	$self->parser->setHandlers(Init => undef, Final => undef, Start => undef, 
-		End => undef, Char => undef);
-}
-
 sub init_parser {
 	my ($self) = @_;
 	
-	#warn "init_parser called";
+	#stop a giant memory leak
+	weaken($self);
 	
 	my $parser = XML::Parser->new(
 		Handlers => {




More information about the Pkg-perl-cvs-commits mailing list