r35821 - in /trunk/libhtml-treebuilder-xpath-perl: Changes META.yml Makefile.PL debian/changelog debian/compat debian/control debian/copyright debian/rules lib/HTML/TreeBuilder/XPath.pm t/HTML-TreeBuilder-XPath.t

jawnsy-guest at users.alioth.debian.org jawnsy-guest at users.alioth.debian.org
Tue May 19 05:48:19 UTC 2009


Author: jawnsy-guest
Date: Tue May 19 05:48:14 2009
New Revision: 35821

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=35821
Log:
* New upstream release
* Added new dependencies for XML::XPathEngine
* Upgraded to Standards-Version 3.8.1
* Added machine-readable copyright file

Modified:
    trunk/libhtml-treebuilder-xpath-perl/Changes
    trunk/libhtml-treebuilder-xpath-perl/META.yml
    trunk/libhtml-treebuilder-xpath-perl/Makefile.PL
    trunk/libhtml-treebuilder-xpath-perl/debian/changelog
    trunk/libhtml-treebuilder-xpath-perl/debian/compat
    trunk/libhtml-treebuilder-xpath-perl/debian/control
    trunk/libhtml-treebuilder-xpath-perl/debian/copyright
    trunk/libhtml-treebuilder-xpath-perl/debian/rules
    trunk/libhtml-treebuilder-xpath-perl/lib/HTML/TreeBuilder/XPath.pm
    trunk/libhtml-treebuilder-xpath-perl/t/HTML-TreeBuilder-XPath.t

Modified: trunk/libhtml-treebuilder-xpath-perl/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/Changes?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/Changes (original)
+++ trunk/libhtml-treebuilder-xpath-perl/Changes Tue May 19 05:48:14 2009
@@ -1,51 +1,70 @@
 $Id: /html-treebuilder-xpath/Changes 40 2006-05-15T07:42:34.182385Z mrodrigu  $
 Revision history for Perl extension HTML::TreeBuilder::XPath.
 
--
-0.09
+version: 0.10
+date: 2008-02-11
+# minor feature addition
+new: find_nodes_as_strings method which returns a list of strings
+new: findvalues method which returns a list of values  
+new: as_XML_compact method, a replacement for HTML::TreeBuilder as_XML
+new: as_XML_indented method, same as as_XML_compact, except indents the output
+   
 
-  - added support for the id function, see RT #30792 
-    at https://rt.cpan.org/Ticket/Display.html?id=30792 bug reported, and
-    a fix proposed by tokuhirom 
+version: 0.09
+date: 2007-11-20
+# 2 bug fixes
+fix:
+         added support for the id function, see RT #30792 
+         at https://rt.cpan.org/Ticket/Display.html?id=30792 bug reported, and
+         a fix proposed by tokuhirom 
 
-  - fixed a bug where the as_XML method on text nodes returned non escaped
-    text, spotted by Tatsuhiko Miyagawa
-    at the moment the output is quite ugly, as ugly as HTML::Element as_XML.
+fix:     a bug where the as_XML method on text nodes returned non escaped
+         text, spotted by Tatsuhiko Miyagawa
+         at the moment the output is quite ugly, as ugly as HTML::Element as_XM.
 
-0.08
 
-  - fixed a bug that prevented the 'following' and 'preceding'
-    axis to work
+version: 0.08
+date: 2007-01-20
+# bug fixes
+fix:     a bug that prevented the 'following' and 'preceding' axis to work
+fix:     set version dependency with XML::XPathEngine 
 
-  - set version dependency with XML::XPathEngine 
+version: 0.07
+date:    2007-01-05
+# bug fix
+fix:     a bug that prevented the 'following' axis to be used
 
-0.07 2007-01-05
+version: 0.06
+date:    2006-08-07
+# bug fix
+fix: 
+         a bug that caused a crash when an element had a value of 0 
+         (patch by Martin Sarfy)
 
-  - fixed a bug that prevented the 'following' axis to be used
+version: 0.05 
+date:    2006-05-17
+# more tests
+tests:   added pod and pod coverage tests
 
-0.06 2006-08-07
+version: 0.04 
+date:    2006-05-15
+# extended perl version support
+fix:     changed the required version of perl from 5.8.4 to 5.6.0
 
-  - fixed a bug that caused a crash when an element had a value of 0
-    patch by Martin Sarfy
+version: 0.03 
+date:    2006-04-20
+# bug fix
+fix:     bug that caused results not to be ordered properly when
+         there were more than 10 results (cf RT #18705) spotted by rnapier
 
-0.05 2006-05-17
+version: 0.02 
+date:    2006-02-27
+# bug fix
+fix:     dependency to XML::XPathEngine in the Makefile
 
-  - added pod and pod coverage tests
+version: 0.01 
+date:    2006-02-15
+new:     original version; created by h2xs 1.23 with options
+           -A -X -nHTML::TreeBuilder::XPath --use-new-tests --skip-exporter
+           --skip-autoloader
 
-0.04 2006-05-15
-
-  - changed the required version of perl from 5.8.4 to 5.6.0
-
-0.03 2006-04-20
-
-  - fixed bug that caused results not to be ordered properly when
-    there were more than 10 results (cf RT #18705) spotted by rnapier
-
-0.02 2006-02-27
-
-  - fixed dependency to XML::XPathEngine in the Makefile
-
-0.01 2006-02-15
-	- original version; created by h2xs 1.23 with options
-		-A -X -nHTML::TreeBuilder::XPath --use-new-tests --skip-exporter --skip-autoloader
-

Modified: trunk/libhtml-treebuilder-xpath-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/META.yml?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/META.yml (original)
+++ trunk/libhtml-treebuilder-xpath-perl/META.yml Tue May 19 05:48:14 2009
@@ -1,12 +1,16 @@
-# http://module-build.sourceforge.net/META-spec.html
-#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
-name:         HTML-TreeBuilder-XPath
-version:      0.09
-version_from: lib/HTML/TreeBuilder/XPath.pm
-installdirs:  site
-requires:
+--- #YAML:1.0
+name:                HTML-TreeBuilder-XPath
+version:             0.10
+abstract:            add XPath support to HTML::TreeBuilder
+license:             perl
+author:              
+    - Michel Rodriguez <mrodrigu at localdomain>
+generated_by:        ExtUtils::MakeMaker version 6.42
+distribution_type:   module
+requires:     
     HTML::TreeBuilder:             0
-    XML::XPathEngine:              0.08
-
-distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.30_01
+    List::Util:                    0
+    XML::XPathEngine:              0.12
+meta-spec:
+    url:     http://module-build.sourceforge.net/META-spec-v1.3.html
+    version: 1.3

Modified: trunk/libhtml-treebuilder-xpath-perl/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/Makefile.PL?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/Makefile.PL (original)
+++ trunk/libhtml-treebuilder-xpath-perl/Makefile.PL Tue May 19 05:48:14 2009
@@ -2,10 +2,15 @@
 use ExtUtils::MakeMaker;
 # See lib/ExtUtils/MakeMaker.pm for details of how to influence
 # the contents of the Makefile that is written.
+
+(my $EUMM= $ExtUtils::MakeMaker::VERSION)=~ tr/_//d;
+my @license = $EUMM > 6.30 ? qw(LICENSE perl) : ();
+
 WriteMakefile(
     NAME              => 'HTML::TreeBuilder::XPath',
     VERSION_FROM      => 'lib/HTML/TreeBuilder/XPath.pm', # finds $VERSION
-    PREREQ_PM         => { XML::XPathEngine => 0.08, HTML::TreeBuilder => 0, }, # e.g., Module::Name => 1.1
+    PREREQ_PM         => { XML::XPathEngine => 0.12, HTML::TreeBuilder => 0, List::Util => 0 },
+    @license,
     ($] >= 5.005 ?     ## Add these new keywords supported since 5.005
       (ABSTRACT_FROM  => 'lib/HTML/TreeBuilder/XPath.pm', # retrieve abstract from module
        AUTHOR         => 'Michel Rodriguez <mrodrigu at localdomain>') : ()),

Modified: trunk/libhtml-treebuilder-xpath-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/debian/changelog?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/debian/changelog (original)
+++ trunk/libhtml-treebuilder-xpath-perl/debian/changelog Tue May 19 05:48:14 2009
@@ -1,11 +1,18 @@
-libhtml-treebuilder-xpath-perl (0.09-4) UNRELEASED; urgency=low
+libhtml-treebuilder-xpath-perl (0.10-1) unstable; urgency=low
 
+  [ Jonathan Yu ]
+  * New upstream release
+  * Added new dependencies for XML::XPathEngine
+  * Upgraded to Standards-Version 3.8.1
+  * Added machine-readable copyright file
+
+  [ gregor herrmann ]
   * Add debian/README.source to document quilt usage, as required by
     Debian Policy since 3.8.0.
   * debian/control: Changed: Switched Vcs-Browser field to ViewSVN
     (source stanza).
 
- -- gregor herrmann <gregoa at debian.org>  Wed, 06 Aug 2008 21:33:42 -0300
+ -- Jonathan Yu <frequency at cpan.org>  Tue, 19 May 2009 01:46:18 -0400
 
 libhtml-treebuilder-xpath-perl (0.09-3) unstable; urgency=low
 

Modified: trunk/libhtml-treebuilder-xpath-perl/debian/compat
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/debian/compat?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/debian/compat (original)
+++ trunk/libhtml-treebuilder-xpath-perl/debian/compat Tue May 19 05:48:14 2009
@@ -1,1 +1,1 @@
-5
+7

Modified: trunk/libhtml-treebuilder-xpath-perl/debian/control
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/debian/control?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/debian/control (original)
+++ trunk/libhtml-treebuilder-xpath-perl/debian/control Tue May 19 05:48:14 2009
@@ -1,13 +1,13 @@
 Source: libhtml-treebuilder-xpath-perl
 Section: perl
 Priority: optional
-Build-Depends: debhelper (>= 5.0.0), quilt
-Build-Depends-Indep: perl (>= 5.8.8-7), libtest-pod-perl,
- libtest-pod-coverage-perl, libhtml-tree-perl, libxml-xpathengine-perl
+Build-Depends: debhelper (>= 7), quilt
+Build-Depends-Indep: perl (>= 5.8.8-7), libtest-pod-perl, libhtml-tree-perl,
+ libtest-pod-coverage-perl, libxml-xpathengine-perl (>= 0.12)
 Maintainer: Debian Perl Group <pkg-perl-maintainers at lists.alioth.debian.org>
-Uploaders: Damyan Ivanov <dmn at debian.org>,
- gregor herrmann <gregor+debian at comodo.priv.at>
-Standards-Version: 3.7.3
+Uploaders: Damyan Ivanov <dmn at debian.org>, gregor herrmann <gregoa at debian.org>,
+ Jonathan Yu <frequency at cpan.org>
+Standards-Version: 3.8.1
 Vcs-Svn: svn://svn.debian.org/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/
 Vcs-Browser: http://svn.debian.org/viewsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/
 Homepage: http://search.cpan.org/dist/HTML-TreeBuilder-XPath/
@@ -15,7 +15,7 @@
 Package: libhtml-treebuilder-xpath-perl
 Architecture: all
 Depends: ${perl:Depends}, ${misc:Depends}, libhtml-tree-perl,
- libxml-xpathengine-perl
-Description: add XPath support to HTML::TreeBuilder
- HTML::TreeBuilder::XPath adds typical XPath methods to HTML::TreeBuilder,
- making it easy to query a document.
+ libxml-xpathengine-perl (>= 0.12)
+Description: Perl module to add XPath support to HTML::TreeBuilder
+ HTML::TreeBuilder::XPath is a drop-in module that adds typical XPath methods
+ to HTML::TreeBuilder, making it easy to query a document.

Modified: trunk/libhtml-treebuilder-xpath-perl/debian/copyright
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/debian/copyright?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/debian/copyright (original)
+++ trunk/libhtml-treebuilder-xpath-perl/debian/copyright Tue May 19 05:48:14 2009
@@ -1,24 +1,33 @@
-This is the debian package for the HTML-TreeBuilder-XPath module.
-It was created by Jeremiah C. Foster <jeremiah at jeremiahfoster.com> 
-using dh-make-perl and is maintained by the Debian Perl Group.
+Format-Specification:
+    http://wiki.debian.org/Proposals/CopyrightFormat?action=recall&rev=196
+Upstream-Maintainer: Michel Rodriguez <mirod at cpan.org>
+Upstream-Source: http://search.cpan.org/dist/HTML-TreeBuilder-XPath/
+Upstream-Name: HTML-TreeBuilder-XPath
 
-It was downloaded from http://search.cpan.org/dist/HTML-TreeBuilder-XPath/
+Files: *
+Copyright: 2006, Michel Rodriguez <mirod at cpan.org>
+License-Alias: Perl
+License: Artistic | GPL-1+
+ This library is free software; you can redistribute it and/or modify it under
+ the same terms as Perl itself, either Perl version 5.8.4 or, at your option,
+ any later version of Perl 5 you may have available.
 
-The upstream author is: Michel Rodriguez <mirod at cpan.org>.
+Files: debian/*
+Copyright: 2009, Jonathan Yu <frequency at cpan.org>
+ 2007, Jeremiah C. Foster <jeremiah at jeremiahfoster.com>
+License: Artistic | GPL-1+
 
-Copyright (C) 2006 by Michel Rodriguez
+License: Artistic
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the Artistic License, which comes with Perl.
+    On Debian GNU/Linux systems, the complete text of the Artistic License
+    can be found in `/usr/share/common-licenses/Artistic'
 
-  This library is free software; you can redistribute it and/or modify it
-  under the same terms as Perl itself, either Perl version 5.8.4 or, at your
-  option, any later version of Perl 5 you may have available.
-                     
+License: GPL-1+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 1, or (at your option)
+    any later version.
+    On Debian GNU/Linux systems, the complete text of the GNU General
+    Public License can be found in `/usr/share/common-licenses/GPL'
 
-Perl is distributed under your choice of the GNU General Public 
-License or the Artistic License.  On Debian GNU/Linux systems, 
-the complete text of the GNU General Public License can be found 
-in `/usr/share/common-licenses/GPL' and the Artistic Licence in 
-`/usr/share/common-licenses/Artistic'.
-
-The Debian packaging is (C) 2007, Jeremiah C. Foster 
-<jeremiah at jeremiahfoster.com> and is licensed under the same terms 
-as the software itself (see above).

Modified: trunk/libhtml-treebuilder-xpath-perl/debian/rules
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/debian/rules?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/debian/rules (original)
+++ trunk/libhtml-treebuilder-xpath-perl/debian/rules Tue May 19 05:48:14 2009
@@ -1,81 +1,24 @@
 #!/usr/bin/make -f
-# This debian/rules file is provided as a template for normal perl
-# packages. It was created by Marc Brockschmidt <marc at dch-faq.de> for
-# the Debian Perl Group (http://pkg-perl.alioth.debian.org/) but may
-# be used freely wherever it is useful.
-
-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
-
-include /usr/share/quilt/quilt.make
-
-# If set to a true value then MakeMaker's prompt function will
-# always return the default without waiting for user input.
-export PERL_MM_USE_DEFAULT=1
-
-PACKAGE=$(shell dh_listpackages)
-
-ifndef PERL
-PERL = /usr/bin/perl
-endif
-
-TMP     =$(CURDIR)/debian/$(PACKAGE)
 
 build: build-stamp
-build-stamp: $(QUILT_STAMPFN)
-	dh_testdir
+build-stamp:
+	dh build
+	touch $@
 
-	# Add commands to compile the package here
-	$(PERL) Makefile.PL INSTALLDIRS=vendor
-	$(MAKE)
-	$(MAKE) test
-
-	touch build-stamp
-
-clean: unpatch
-	dh_testdir
-	dh_testroot
-
-	dh_clean build-stamp install-stamp
-
-	# Add commands to clean up after the build process here
-	[ ! -f Makefile ] || $(MAKE) realclean
+clean:
+	dh $@
 
 install: install-stamp
 install-stamp: build-stamp
-	dh_testdir
-	dh_testroot
-	dh_clean -k
-
-	# Add commands to install the package into debian/$PACKAGE_NAME here
-	$(MAKE) install DESTDIR=$(TMP) PREFIX=/usr
-
-	# As this is a architecture independent package, we are not
-	# supposed to install stuff to /usr/lib. MakeMaker creates
-	# the dirs, we delete them from the deb:
-	[ ! -d $(TMP)/usr/lib/perl5 ] || rmdir --ignore-fail-on-non-empty --parents --verbose $(TMP)/usr/lib/perl5
-
-	touch install-stamp
+	dh install
+	touch $@
 
 binary-arch:
-# We have nothing to do here for an architecture-independent package
 
-binary-indep: build install
-	dh_testdir
-	dh_testroot
-	dh_installdocs
-	dh_installchangelogs Changes
-	dh_perl
-	dh_compress
-	dh_fixperms
-	dh_installdeb
-	dh_gencontrol
-	dh_md5sums
-	dh_builddeb
+binary-indep: install
+	dh $@
 
+binary: binary-arch binary-indep
 
-source diff:
-	@echo >&2 'source and diff are obsolete - use dpkg-source -b'; false
+.PHONY: binary binary-arch binary-indep install clean build
 
-binary: binary-indep binary-arch
-.PHONY: build clean binary-indep binary-arch binary

Modified: trunk/libhtml-treebuilder-xpath-perl/lib/HTML/TreeBuilder/XPath.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/lib/HTML/TreeBuilder/XPath.pm?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/lib/HTML/TreeBuilder/XPath.pm (original)
+++ trunk/libhtml-treebuilder-xpath-perl/lib/HTML/TreeBuilder/XPath.pm Tue May 19 05:48:14 2009
@@ -1,19 +1,20 @@
 package HTML::TreeBuilder::XPath;
 
+use List::Util qw( first);
 
 use strict;
 use warnings;
 
 use vars qw($VERSION);
 
-$VERSION = '0.09';
-
-my %ENT= ( '&' => '&amp;', '<' => '&lt;', '>' => '&gt;', );
+$VERSION = '0.10';
+
+my %CHAR2DEFAULT_ENT= ( '&' => '&amp;', '<' => '&lt;', '>' => '&gt;', '"' => '&quot;');
+my %NUM2DEFAULT_ENT= ( '38' => 'amp', '60' => 'lt', '62' => 'gt', '"' => '&quot;');
 
 package HTML::TreeBuilder::XPath;
 
 use base( 'HTML::TreeBuilder');
-
 
 package HTML::TreeBuilder::XPath::Node;
 
@@ -29,6 +30,9 @@
 sub getFirstChild { return undef; }
 sub getLastChild { return undef; }
 
+# need to do a complete look_down each time, as the id could have been changed 
+# without any object being involved, hence without a potential cache being
+# up to date
 sub getElementById 
   { my ($self, $id) = @_;
     return scalar $self->look_down( id => $id);
@@ -104,13 +108,16 @@
     }
 }
 
-sub findnodes           { my( $elt, $path)= @_; return xp->findnodes(           $path, $elt); }
-sub findnodes_as_string { my( $elt, $path)= @_; return xp->findnodes_as_string( $path, $elt); }
-sub findvalue           { my( $elt, $path)= @_; return xp->findvalue(           $path, $elt); }
-sub exists              { my( $elt, $path)= @_; return xp->exists(              $path, $elt); }
-sub find_xpath          { my( $elt, $path)= @_; return xp->find(                $path, $elt); }
-sub matches             { my( $elt, $path)= @_; return xp->matches( $elt, $path, $elt); }
-sub set_namespace       { my $elt= shift; xp->new->set_namespace( @_); }
+
+sub findnodes            { my( $elt, $path)= @_; return xp->findnodes(            $path, $elt); }
+sub findnodes_as_string  { my( $elt, $path)= @_; return xp->findnodes_as_string(  $path, $elt); }
+sub findnodes_as_strings { my( $elt, $path)= @_; return xp->findnodes_as_strings( $path, $elt); }
+sub findvalue            { my( $elt, $path)= @_; return xp->findvalue(            $path, $elt); }
+sub findvalues           { my( $elt, $path)= @_; return xp->findvalues(           $path, $elt); }
+sub exists               { my( $elt, $path)= @_; return xp->exists(               $path, $elt); }
+sub find_xpath           { my( $elt, $path)= @_; return xp->find(                 $path, $elt); }
+sub matches              { my( $elt, $path)= @_; return xp->matches( $elt, $path, $elt);        }
+sub set_namespace        { my $elt= shift; xp->new->set_namespace( @_); }
 
 sub getRootNode
   { my $elt= shift;
@@ -205,6 +212,187 @@
     return $elt_or_text;
   }
 
+sub toString { return shift->as_XML( @_); }
+
+# produces better looking XML
+{
+  no warnings 'redefine'; 
+  sub as_XML_compact
+    { my( $node, $opt)= @_;
+      my $name = $node->{'_tag'};
+      if( $name eq '~literal')     { return _xml_escape_text( $node->{text});                     }
+
+      if( $name eq '~declaration') { return '<!'   . _xml_escape_text( $node->{text})    . ">";   }
+      if( $name eq '~pi')          { return '<?'   . _xml_escape_text( $node->{text})    . '?>';  }
+      if( $name eq '~comment')     { return '<!--' . _xml_escape_comment( $node->{text}) . '-->'; }
+
+      my $lc_name= lc $name;
+
+      my $xml= $node->_start_tag;
+
+      if( $HTML::Tagset::isCDATA_Parent{$lc_name})
+        { my $content= $node->{_content} || '';
+          if( ref $content eq 'ARRAY' || $content->isa( 'ARRAY'))
+            { $xml .= _xml_escape_cdata( join( '', @$content), $opt); }
+          else
+            { $xml .= $content; }
+        }
+      else
+        { # start tag
+          foreach my $child ($node->content_list) 
+            { if( ref $child) { $xml .= $child->as_XML_compact(); }
+              else            { $xml .=  _xml_escape_text( $child); }
+            }
+        }
+          $xml.= "</$name>" unless $HTML::Tagset::emptyElement{$lc_name};
+      return $xml;
+    }
+}
+
+    
+
+{ my %phrase_name;    # all phrase tags, + literals (those are not indented)
+  my %extra_newline;  # tags that get an extra newline before the end tag
+  my $default_indent; # 2 spaces, change with the 'indent' option
+  BEGIN 
+    { %phrase_name= %HTML::Tagset::isPhraseMarkup;
+      $phrase_name{'~literal'}= 1;
+      $default_indent= '  ';
+      %extra_newline= map { $_ => 1 } qw(html head body script div table tbody thead tfoot tr form dl ol ul);
+    }
+
+  sub as_XML_indented
+    { my( $node, $opt)= @_;
+
+
+      my $name = $node->{'_tag'};
+      my $lc_name= lc $name;
+
+      if( $name eq '~literal')     { return _xml_escape_text( $node->{text});                     }
+      if( $name eq '~declaration') { return '<!'   . _xml_escape_text( $node->{text})    . ">\n"; }
+
+
+      if( $name eq '~pi')          { return '<?'   . _xml_escape_text( $node->{text})    . '?>';  }
+      if( $name eq '~comment')     { return '<!--' . _xml_escape_comment( $node->{text}) . '-->'; }
+      
+      my $xml;
+      my $pre_tag_indent='';
+      if(!$phrase_name{$lc_name}) { $pre_tag_indent=  "\n" . ($opt->{indent} || $default_indent) x ($opt->{indent_level}||0); }
+      if( $opt->{indent_level}) { $xml .= $pre_tag_indent; }
+
+      $xml.= $node->_start_tag();
+
+      my $content='';
+
+      if( $HTML::Tagset::isCDATA_Parent{$lc_name})
+        { my $content= $node->{_content} || '';
+          if( ref $content && (ref $content eq 'ARRAY' || $content->isa( 'ARRAY') ))
+            { $content= _xml_escape_cdata( join( '', @$content), $opt); }
+        }
+      else
+        { 
+          my %child_opt= %$opt;
+          $child_opt{indent_level}++;
+          foreach my $child ($node->content_list) 
+            { if( ref $child) { $content .= $child->as_XML_indented( \%child_opt ); }
+              else            { $content .=  _xml_escape_text( $child);             }
+            }
+        }
+      $xml .= $content;
+
+      if( $extra_newline{$lc_name} && $content ne '' ) { $xml.= $pre_tag_indent; }
+      $xml.= "</$name>" unless $HTML::Tagset::emptyElement{$lc_name};
+      $xml .="\n" if( !$opt->{indent_level});
+     
+      return $xml;
+    }
+}
+
+sub _start_tag
+  { my( $node)= @_;
+    my $name = $node->{'_tag'};
+    my $start_tag.= "<$name";
+    foreach my $att_name (sort keys %$node) 
+      { next if( (!length $att_name) ||  ($att_name=~ m{^_}) || ($att_name eq '/') );
+        my $well_formed_att_name= well_formed_name( $att_name);
+        $start_tag .= qq{ $well_formed_att_name="} . _xml_escape_attribute_value( $node->{$att_name}) . qq{"};
+      }
+    $start_tag.= $HTML::Tagset::emptyElement{lc $name} ? " />" : ">";
+    return $start_tag;
+  }
+
+sub well_formed_name
+  { my( $name)= @_;
+    $name=~ s{[^\w:_-]+}{_}g;
+    if( $name=~ m{^\d}) { $name= "a$name"; }
+    return $name;
+  }
+
+sub _indent_level
+  { my( $node)= @_;
+    my $level= scalar grep { !$HTML::Tagset::isPhraseMarkup{lc $_->{_tag}} } $node->lineage;
+    return $level;
+  }
+
+{ my( $indent, %extra_newline, $nl);
+  BEGIN 
+    { $indent= '  '; 
+      $nl= "\n";
+      %extra_newline= map { $_ => 1 } qw(html head body script div table tr form ol ul);
+    }
+   
+  sub indents
+    { my( $opt, $name)= @_;
+      my $indents= { pre_start_tag => '', post_start_tag => '', pre_end_tag => '', post_end_tag => ''};
+      if( $opt->{indented})
+        { my $indent_level= $opt->{indent_level};
+          my $wrapping_nl= $nl; 
+          if( !defined( $indent_level)) { $indent_level = 0; $wrapping_nl= ''; }
+          if( $HTML::Tagset::isKnown{lc $name} && !$HTML::Tagset::isPhraseMarkup{lc $name} && $indent_level > 0) 
+            { $indents->{pre_start_tag}= $wrapping_nl . ($indent x $indent_level); }
+          if( $extra_newline{lc $name})
+            { $indents->{post_start_tag}= $nl; 
+              $indents->{pre_end_tag}= $nl . ($indent x $indent_level);
+            }
+          if( $indent_level == 0) 
+            { $indents->{post_end_tag} = $wrapping_nl; }
+        }
+      return $indents;
+    }
+}
+
+    
+sub _xml_escape_attribute_value
+  { my( $text)= @_;
+    $text=~ s{([&<>"])}{$CHAR2DEFAULT_ENT{$1}}g; # escape also quote, as it is the attribute separator
+    return $text;
+  }
+
+sub _xml_escape_text
+  { my( $text)= @_;
+    $text=~ s{([&<>])}{$CHAR2DEFAULT_ENT{$1}}g;
+    return $text;
+  }
+
+sub _xml_escape_comment
+  { my( $text)= @_;
+    $text=~ s{([&<>])}{$CHAR2DEFAULT_ENT{$1}}g;
+    $text=~ s{--}{-&#45;}g; # can't have double --'s in XML comments
+    return $text;
+  }
+
+sub _xml_escape_cdata
+  { my( $text, $opt)= @_;
+    if( $opt->{force_escape_cdata} || $text=~ m{[<&]})
+      { $text=~ s{^\s*\Q<![CDATA[}{}s;
+        $text=~ s{\Q]]>\E\s*$}{}s;
+        $text=~ s{]]>}{]]&#62;}g; # can't have]]> in CDATA
+        $text=  "<![CDATA[$text]]>";
+      }
+    return $text;
+  }
+
+
 package HTML::TreeBuilder::XPath::TextNode;
 
 use base 'HTML::TreeBuilder::XPath::Node';
@@ -214,18 +402,18 @@
 sub isTextNode    { return 1;                   }
 sub getAttributes { return wantarray ? () : []; }
 
-# extracted from _HTML::Element as_XML
+# similar to HTML::Element as_XML
 sub as_XML
   { my( $node, $entities)= @_;
     my $content= $node->{_content};
     if( $node->{_parent} && $node->{_parent}->{_tag} eq 'script')
       { $content=~ s{(&\w+;)}{HTML::Entities::decode($1)}eg; }
     else
-      { HTML::Element::_xml_escape($content); }
+      { $content= HTML::Element::_xml_escape_text($content); }
     return $content;
   }
-
-
+*as_XML_compact  = *as_XML;
+*as_XML_indented = *as_XML;
 
 
 sub getPreviousSibling
@@ -264,12 +452,6 @@
     return $text->{_parent}->is_inside( $node);
   }
 
-sub xml_escape
-  { my( $text)= @_;
-    $text=~ s{([&<>])}{$ENT{$1}}g;
-    return $text;
-  }
-
 1;
 
 
@@ -283,7 +465,7 @@
 sub string_value    { return $_[0]->{_value}; }
 sub to_number       { return XML::XPathEngine::Number->new( $_[0]->{_value}); }
 sub isAttributeNode { 1 }
-sub toString        { return qq{$_[0]->{_name}="$_[0]->{_value}"}; }
+sub toString        { return qq{ $_[0]->{_name}="$_[0]->{_value}"}; }
 
 # awfully inefficient, but hopefully this is called only for weird (read test-case) queries
 sub getPreviousSibling
@@ -336,6 +518,10 @@
 sub getChildNodes   { my @content= ( $_[0]->{_root}); return wantarray ? @content : \@content; }
 sub getAttributes   { return []        }
 sub isDocumentNode  { return 1         }
+sub getRootNode     { return $_[0]     }
+sub getName         { return           }
+sub getNextSibling  { return           }
+sub getPreviousSibling { return        }
 
 # added to provide element-like methods to root, for use by cmp
 sub lineage {  return ($_[0]); }
@@ -377,7 +563,11 @@
 
 =head2 findnodes_as_string ($path)
 
-Returns the text values of the nodes
+Returns the text values of the nodes, as one string.
+
+=head2 findnodes_as_strings ($path)
+
+Returns a list of the values of the result nodes. 
 
 =head2 findvalue ($path)
 
@@ -389,6 +579,12 @@
 print the value found, or manipulate it in the ways you would a normal
 perl value (e.g. using regular expressions).
 
+=head2 findvalues ($path)
+
+Returns the values of the matching nodes as a list. This is mostly the same
+as findnodes_as_strings, except that the elements of the list are objects
+(with overloaded stringification) instead of plain strings.
+
 =head2 exists ($path)
 
 Returns true if the given path exists.
@@ -407,7 +603,20 @@
 you need to check how many nodes it found you should check $nodeset->size.
 See L<XML::XPathEngine::NodeSet>.
 
-
+=head2 as_XML_compact
+
+HTML::TreeBuilder's C<as_XML> output is not really nice to look at, so
+I added a new method, that can be used as a simple replacement for it. 
+It escapes only the '<', '>' and '&' (plus '"' in attribute values), and
+wraps CDATA elements in CDATA sections.
+
+Note that the XML is actually not garanteed to be valid at this point. Nothing
+is done about the encoding of the string. Patches or just ideas of how it could
+work are welcome.
+
+=head2 as_XML_indented 
+
+Same as as_XML, except that the output is indented.
 
 =head1 SEE ALSO
 

Modified: trunk/libhtml-treebuilder-xpath-perl/t/HTML-TreeBuilder-XPath.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libhtml-treebuilder-xpath-perl/t/HTML-TreeBuilder-XPath.t?rev=35821&op=diff
==============================================================================
--- trunk/libhtml-treebuilder-xpath-perl/t/HTML-TreeBuilder-XPath.t (original)
+++ trunk/libhtml-treebuilder-xpath-perl/t/HTML-TreeBuilder-XPath.t Tue May 19 05:48:14 2009
@@ -3,7 +3,7 @@
 
 #########################
 
-use Test::More tests => 19;
+use Test::More tests => 29;
 BEGIN { use_ok('HTML::TreeBuilder::XPath') };
 
 #########################
@@ -36,6 +36,10 @@
 is( $html->findvalue( '//*[@id="foo"]/@class|//*[@id="foo"]/@id'), 'myspanfoo', '2 atts on same element (unsorted)');
 
 is( $html->findvalue( '//b'), 'boldall', '2 texts');
+is( join( '|', $html->findvalues( '//b')), 'bold|all', '2 texts with findvalues');
+is( join( '|', $html->findnodes_as_strings( '//b')), 'bold|all', '2 texts with findnodes_as_strings');
+is( join( '|', $html->findvalues( '//a/@href')), 'http://foo.com/|/bar/', '2 texts with findvalues');
+is( join( '|', $html->findnodes_as_strings( '//a/@href')), 'http://foo.com/|/bar/', '2 texts with findnodes_as_strings');
 is( $html->findvalue( '//p[@id="toto"]/a'), 'linksmore links', '2 siblings');
 is( $html->findvalue( '//p[@id="toto"]/a[1]|//p[@id="toto"]/a[2]'), 'linksmore links', '2 siblings');
 
@@ -55,6 +59,19 @@
 is( $html->findvalue('id("foo")/@id'), 'foo', 'id function (attribute)');
 }
 
+
+{
+# test for root
+my ($fake_root)=$html->findnodes('/');
+ok( !$fake_root->getParentNode => "fake root does not have a parent");
+is( $fake_root->getRootNode, $fake_root, "fake root is its own root");
+ok( !@{$fake_root->getAttributes} => "fake root has no attributes");
+ok( !defined($fake_root->getName) => "fake root does not have a name");
+ok( !defined($fake_root->getNextSibling) => "fake root does not have a next sibling");
+ok( !defined($fake_root->getPreviousSibling) => "fake root does not have a prev sibling");
+
+}
+
 __END__
 /html/body/h1            1 Example header
 //@id[.="toto"]          2 toto




More information about the Pkg-perl-cvs-commits mailing list