[libhtml-scrubber-perl] 08/13: fix handling of self closing tags, like <hr/>

Florian Schlichting fsfs at moszumanska.debian.org
Sat Nov 11 13:46:05 UTC 2017


This is an automated email from the git hooks/post-receive script.

fsfs pushed a commit to annotated tag release/0.10-TRIAL
in repository libhtml-scrubber-perl.

commit 4532022c588ada5b43752564a1719d0d2664ac41
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Fri Sep 20 20:10:09 2013 +0400

    fix handling of self closing tags, like <hr/>
    
    Use empty_element_tags in HTML::Parser, distinguish
    close events by $text argument (empry - self closing,
    not empty - real </tag>) as it's documented.
    
    No tags autoclosing, so '<br>' results in '<br>'.
    
    Space is added before '/>', so '<hr/>' result in '<hr />'.
    
    Fixes tickets #19063 and #25477.
---
 Changes                  |  2 ++
 lib/HTML/Scrubber.pm     | 21 ++++++++++++++++-----
 t/rt19063_xhtml.t        | 18 ++++++++++++++++++
 t/rt25477_self_closing.t | 28 ++++++++++++++++++++++++++++
 4 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/Changes b/Changes
index 7d16e79..fc8cc2e 100644
--- a/Changes
+++ b/Changes
@@ -2,6 +2,8 @@ Revision history for Perl extension HTML::Scrubber.
 
 {{$NEXT}}
     - RT3008 Changed examples to be XSS free
+    - RT19063, RT25477 fixed handling of self closing tags,
+      for example '<hr />'
 
 0.09      2011-04-01 16:35:50 Europe/London
     - Basic conversion to Dist::Zilla/git
diff --git a/lib/HTML/Scrubber.pm b/lib/HTML/Scrubber.pm
index b018ea1..5b8b93e 100644
--- a/lib/HTML/Scrubber.pm
+++ b/lib/HTML/Scrubber.pm
@@ -57,7 +57,7 @@ If you're new to perl, good luck to you.
 
 use strict;
 use warnings;
-use HTML::Parser();
+use HTML::Parser 3.47 ();
 use HTML::Entities;
 our( @_scrub, @_scrub_fh );
 
@@ -79,6 +79,7 @@ sub new {
         unbroken_text   => 1,
         case_sensitive  => 0,
         boolean_attribute_value => undef,
+        empty_element_tags => 1,
     );
 
     my $self = {
@@ -295,6 +296,7 @@ sub scrub_file {
     $_[0]->{_p}->parse_file($_[1]);
 
     return delete $_[0]->{_r} unless exists $_[0]->{_out};
+    print { $_[0]->{_out} } $_[0]->{_r} if length $_[0]->{_r};
     delete $_[0]->{_out};
     return 1;
 }
@@ -424,11 +426,19 @@ sub _scrub_str {
         }
     }
     elsif ( $e eq 'end' ) {
+        my $place = 0;
         if ( exists $s->{_rules}->{$t} ) {
-            $outstr .= "</$t>" if $s->{_rules}->{$t};
+            $place = 1 if $s->{_rules}->{$t};
         }
         elsif ( $s->{_rules}->{'*'} ) {
-            $outstr .= "</$t>";
+            $place = 1;
+        }
+        if ( $place ) {
+            if ( length $text ) {
+                $outstr .= "</$t>";
+            } else {
+                substr $s->{_r}, -1, 0, ' /';
+            }
         }
     }
     elsif ( $e eq 'comment' ) {
@@ -458,8 +468,9 @@ Now calls _scrub_str and pushes that out to a file.
 =cut
 
 sub _scrub_fh {
-
-    print { $_[0]->{"\0_s"}->{_out} } _scrub_str(@_);
+    my $self =  $_[0]->{"\0_s"};
+    print { $self->{_out} } $self->{'_r'} if length $self->{_r};
+    $self->{'_r'} = _scrub_str(@_);
 }
 
 =for comment _scrub
diff --git a/t/rt19063_xhtml.t b/t/rt19063_xhtml.t
new file mode 100644
index 0000000..39f6874
--- /dev/null
+++ b/t/rt19063_xhtml.t
@@ -0,0 +1,18 @@
+# Tests related to RT25477 - https://rt.cpan.org/Public/Bug/Display.html?id=25477
+use strict;
+use warnings;
+use File::Spec;
+use Test::More;
+
+use_ok('HTML::Scrubber');
+use HTML::Scrubber;
+
+my $scrubber = HTML::Scrubber->new;
+$scrubber->default(1);
+is(
+    $scrubber->scrub('<hr/><hr><hr /><hr></hr>'),
+    '<hr /><hr><hr /><hr></hr>',
+    "correct result"
+);
+
+done_testing;
diff --git a/t/rt25477_self_closing.t b/t/rt25477_self_closing.t
new file mode 100644
index 0000000..a939caa
--- /dev/null
+++ b/t/rt25477_self_closing.t
@@ -0,0 +1,28 @@
+# Tests related to RT25477 - https://rt.cpan.org/Public/Bug/Display.html?id=25477
+use strict;
+use warnings;
+use File::Spec;
+use Test::More;
+
+use_ok('HTML::Scrubber');
+use HTML::Scrubber;
+
+my $scrubber = HTML::Scrubber->new;
+$scrubber->default(1);
+my $scrubbed = $scrubber->scrub( <<'END' );
+<script src="www.google.com/script.js" />
+<b>one</b>
+<script type="text/javascript">
+        alert("hello")
+</script>
+<b>two</b>
+END
+
+is($scrubbed, <<'END', "correct result");
+
+<b>one</b>
+
+<b>two</b>
+END
+
+done_testing;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libhtml-scrubber-perl.git



More information about the Pkg-perl-cvs-commits mailing list