r28944 - in /branches/upstream/libhtml-strip-perl: ./ current/ current/Changes current/MANIFEST current/Makefile.PL current/README current/Strip.pm current/Strip.xs current/strip_html.c current/strip_html.h current/test.pl current/typemap
dmn at users.alioth.debian.org
dmn at users.alioth.debian.org
Mon Dec 29 15:57:13 UTC 2008
Author: dmn
Date: Mon Dec 29 15:57:10 2008
New Revision: 28944
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=28944
Log:
[svn-inject] Installing original source of libhtml-strip-perl
Added:
branches/upstream/libhtml-strip-perl/
branches/upstream/libhtml-strip-perl/current/
branches/upstream/libhtml-strip-perl/current/Changes
branches/upstream/libhtml-strip-perl/current/MANIFEST
branches/upstream/libhtml-strip-perl/current/Makefile.PL
branches/upstream/libhtml-strip-perl/current/README
branches/upstream/libhtml-strip-perl/current/Strip.pm
branches/upstream/libhtml-strip-perl/current/Strip.xs
branches/upstream/libhtml-strip-perl/current/strip_html.c
branches/upstream/libhtml-strip-perl/current/strip_html.h
branches/upstream/libhtml-strip-perl/current/test.pl
branches/upstream/libhtml-strip-perl/current/typemap
Added: branches/upstream/libhtml-strip-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/Changes?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/Changes (added)
+++ branches/upstream/libhtml-strip-perl/current/Changes Mon Dec 29 15:57:10 2008
@@ -1,0 +1,50 @@
+Revision history for Perl extension HTML::Strip.
+
+1.06 Fri Feb 10 11:18:35 2006
+ - documented 'set_decode_entities' method
+
+1.05 Thu Feb 9 12:11:50 2006
+ - added 'set_decode_entities' method
+
+1.04 Mon Jan 24 16:41:51 2005
+ - Replaced all instances of strcmp with strcasecmp to make the
+ module case-insensitive towards HTML tag names
+
+1.03 Wed Jul 7 13:42:26 2004
+ - Added 'emit_spaces' configuration option which can turn off
+ attempted conversion of HTML tags into spaces
+ - Constructor options now passed in a hash
+
+1.02 Tue Feb 24 16:24:18 2004
+ - Yet more checks to prevent extraneous whitespace
+ - Added many more tests
+
+1.01 Mon Jul 7 18:15:59 2003
+ - Removed provision for escaped quotes in attributes values
+ - More checks to prevent the outputting of extraneous whitespace
+
+1.00 Wed Jun 11 12:05:47 2003
+ - rewritten in C, using a struct for each object to keep track
+ of state and striptags
+
+0.05 Thu May 22 19:49:25 2003
+ - removed "XSOPT => '-C++'" from Makefile.PL as it was
+ unnecessary and causing problems for some people
+ - added "#include <string.h>" to strip_html.cpp as it's
+ absence was causing problems for some people
+
+0.04 Sun Mar 23 12:45:13 2003
+ - Tweaked docs, added FAQ explaining why 0.03 failed cpan testing
+
+0.03 Sat Mar 22 11:20:34 2003
+ - rewritten in C++ to make striptags an attribute of each
+ object
+
+0.02 Mon Mar 17 18:20:01 2003
+ - added set_striptags() method
+ - documented module
+
+0.01 Tue Mar 4 18:17:38 2003
+ - original version; created by h2xs 1.21 with options
+ -A -n HTML::Strip html_strip.h
+
Added: branches/upstream/libhtml-strip-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/MANIFEST?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/MANIFEST (added)
+++ branches/upstream/libhtml-strip-perl/current/MANIFEST Mon Dec 29 15:57:10 2008
@@ -1,0 +1,10 @@
+Changes
+Makefile.PL
+MANIFEST
+README
+Strip.pm
+Strip.xs
+strip_html.h
+strip_html.c
+typemap
+test.pl
Added: branches/upstream/libhtml-strip-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/Makefile.PL?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/Makefile.PL (added)
+++ branches/upstream/libhtml-strip-perl/current/Makefile.PL Mon Dec 29 15:57:10 2008
@@ -1,0 +1,18 @@
+use ExtUtils::MakeMaker;
+
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ 'NAME' => 'HTML::Strip',
+ 'VERSION_FROM' => 'Strip.pm', # finds $VERSION
+ 'PREREQ_PM' => {}, # e.g., Module::Name => 1.1
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT_FROM => 'Strip.pm', # retrieve abstract from module
+ AUTHOR => 'Alex Bowley <kilinrax at cpan.org>') : ()),
+ 'LIBS' => [''], # e.g., '-lm'
+ 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING'
+ # Insert -I. if you add *.h files later:
+ 'INC' => '', # e.g., '-I/usr/include/other'
+ # Un-comment this if you add C files to link with later:
+ 'OBJECT' => '$(O_FILES)', # link all the C files too
+);
Added: branches/upstream/libhtml-strip-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/README?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/README (added)
+++ branches/upstream/libhtml-strip-perl/current/README Mon Dec 29 15:57:10 2008
@@ -1,0 +1,23 @@
+HTML::Strip
+===========
+
+This module strips HTML-like markup from text.
+It is written in XS, and thus about five times quicker than using
+regular expressions for the same task.
+
+INSTALLATION
+
+To install this module type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+COPYRIGHT AND LICENCE
+
+Please report any bugs/suggestions to Alex Bowley <kilinrax at cpan.org>
+
+Copyright (c) 2003 Alex Bowley. All rights reserved.
+This program is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
Added: branches/upstream/libhtml-strip-perl/current/Strip.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/Strip.pm?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/Strip.pm (added)
+++ branches/upstream/libhtml-strip-perl/current/Strip.pm Mon Dec 29 15:57:10 2008
@@ -1,0 +1,242 @@
+package HTML::Strip;
+
+use 5.006;
+use warnings;
+use strict;
+
+use Carp qw( carp croak );
+
+require Exporter;
+require DynaLoader;
+
+our @ISA = qw(Exporter DynaLoader);
+
+# Items to export into callers namespace by default. Note: do not export
+# names by default without a very good reason. Use EXPORT_OK instead.
+# Do not simply export all your public functions/methods/constants.
+
+# This allows declaration use HTML::Strip ':all';
+# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
+# will save memory.
+our %EXPORT_TAGS = ( 'all' => [ qw(
+ ) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw();
+
+our $VERSION = '1.06';
+
+bootstrap HTML::Strip $VERSION;
+
+# Preloaded methods go here.
+
+my $_html_entities_p = eval 'require HTML::Entities';
+
+my %defaults = (
+ striptags => [qw( title
+ style
+ script
+ applet )],
+ emit_spaces => 1,
+ decode_entities => 1,
+ );
+
+sub new {
+ my $class = shift;
+ my $obj = create();
+ bless $obj, $class;
+
+ my %args = (%defaults, @_);
+ while( my ($key, $value) = each %args ) {
+ my $method = "set_${key}";
+ if( $obj->can($method) ) {
+ $obj->$method($value);
+ } else {
+ carp "Invalid setting '$key'";
+ }
+ }
+ return $obj;
+}
+
+sub set_striptags {
+ my ($self, @tags) = @_;
+ if( ref($tags[0]) eq 'ARRAY' ) {
+ $self->set_striptags_ref( $tags[0] );
+ } else {
+ $self->set_striptags_ref( \@tags );
+ }
+}
+
+sub parse {
+ my ($self, $text) = @_;
+ my $stripped = $self->strip_html( $text );
+ if( $self->decode_entities && $_html_entities_p ) {
+ $stripped = HTML::Entities::decode($stripped);
+ }
+ return $stripped;
+}
+
+sub eof {
+ my $self = shift;
+ $self->reset();
+}
+
+1;
+__END__
+# Below is stub documentation for your module. You better edit it!
+
+=head1 NAME
+
+HTML::Strip - Perl extension for stripping HTML markup from text.
+
+=head1 SYNOPSIS
+
+ use HTML::Strip;
+
+ my $hs = HTML::Strip->new();
+
+ my $clean_text = $hs->parse( $raw_html );
+ $hs->eof;
+
+=head1 DESCRIPTION
+
+This module simply strips HTML-like markup from text in a very quick
+and brutal manner. It could quite easily be used to strip XML or SGML
+from text as well; but removing HTML markup is a much more common
+problem, hence this module lives in the HTML:: namespace.
+
+It is written in XS, and thus about five times quicker than using
+regular expressions for the same task.
+
+It does I<not> do any syntax checking (if you want that, use
+L<HTML::Parser>), instead it merely applies the following rules:
+
+=over 4
+
+=item 1
+
+Anything that looks like a tag, or group of tags will be replaced with
+a single space character. Tags are considered to be anything that
+starts with a C<E<lt>> and ends with a C<E<gt>>; with the caveat that a
+C<E<gt>> character may appear in either of the following without
+ending the tag:
+
+=over 4
+
+=item Quote
+
+Quotes are considered to start with either a C<'> or a C<"> character,
+and end with a matching character I<not> preceded by an even number or
+escaping slashes (i.e. C<\"> does not end the quote but C<\\\\"> does).
+
+=item Comment
+
+If the tag starts with an exclamation mark, it is assumed to be a
+declaration or a comment. Within such tags, C<E<gt>> characters do not
+end the tag if they appear within pairs of double dashes (e.g. C<E<lt>!--
+E<lt>a href="old.htm"E<gt>old pageE<lt>/aE<gt> --E<gt>> would be
+stripped completely).
+
+=back
+
+=item 2
+
+Anything the appears within so-called I<strip tags> is stripped as
+well. By default, these tags are C<title>, C<script>, C<style> and
+C<applet>.
+
+=back
+
+HTML::Strip maintains state between calls, so you can parse a document
+in chunks should you wish. If one chunk ends half-way through a tag,
+quote, comment, or whatever; it will remember this, and expect the
+next call to parse to start with the remains of said tag.
+
+If this is not going to be the case, be sure to call $hs->eof()
+between calls to $hs->parse().
+
+=head2 METHODS
+
+=item new()
+
+Constructor. Can optionally take a hash of settings (with keys
+corresponsing to the C<set_> methods below).
+
+For example, the following is a valid constructor:
+
+ my $hs = HTML::Strip->new(
+ striptags => [ 'script', 'iframe' ],
+ emit_spaces => 0
+ );
+
+=item parse()
+
+Takes a string as an argument, returns it stripped of HTML.
+
+=item eof()
+
+Resets the current state information, ready to parse a new block of HTML.
+
+=item clear_striptags()
+
+Clears the current set of strip tags.
+
+=item add_striptag()
+
+Adds the string passed as an argument to the current set of strip tags.
+
+=item set_striptags()
+
+Takes a reference to an array of strings, which replace the current
+set of strip tags.
+
+=item set_emit_spaces()
+
+Takes a boolean value. If set to false, HTML::Strip will not attempt
+any conversion of tags into spaces. Set to true by default.
+
+=item set_decode_entities()
+
+Takes a boolean value. If set to false, HTML::Strip will decode HTML
+entities. Set to true by default.
+
+=head2 LIMITATIONS
+
+=over 4
+
+=item Whitespace
+
+Despite only outputting one space character per group of tags, and
+avoiding doing so when tags are bordered by spaces or the start or
+end of strings, HTML::Strip can often output more than desired; such
+as with the following HTML:
+
+ <h1> HTML::Strip </h1> <p> <em> <strong> fast, and brutal </strong> </em> </p>
+
+Which gives the following output:
+
+C<E<nbsp>HTML::StripE<nbsp>E<nbsp>E<nbsp>E<nbsp>fast, and brutalE<nbsp>E<nbsp>E<nbsp>>
+
+Thus, you may want to post-filter the output of HTML::Strip to remove
+excess whitespace (for example, using C<tr/ / /s;>).
+(This has been improved since previous releases, but is still an issue)
+
+=item HTML Entities
+
+HTML::Strip will only attempt decoding of HTML entities if
+L<HTML::Entities> is installed.
+
+=head2 EXPORT
+
+None by default.
+
+=head1 AUTHOR
+
+Alex Bowley E<lt>kilinrax at cpan.orgE<gt>
+
+=head1 SEE ALSO
+
+L<perl>, L<HTML::Parser>, L<HTML::Entities>
+
+=cut
Added: branches/upstream/libhtml-strip-perl/current/Strip.xs
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/Strip.xs?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/Strip.xs (added)
+++ branches/upstream/libhtml-strip-perl/current/Strip.xs Mon Dec 29 15:57:10 2008
@@ -1,0 +1,105 @@
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include "strip_html.h"
+
+MODULE = HTML::Strip PACKAGE = HTML::Strip
+
+PROTOTYPES: ENABLE
+
+Stripper *
+create()
+ PREINIT:
+ Stripper * stripper;
+ CODE:
+ New( 0, stripper, 1, Stripper );
+ reset( stripper );
+ RETVAL = stripper;
+ OUTPUT:
+ RETVAL
+
+void
+DESTROY( stripper )
+ Stripper * stripper
+ CODE:
+ Safefree( stripper );
+
+char *
+strip_html( stripper, raw )
+ Stripper * stripper
+ char * raw
+ PREINIT:
+ char * clean;
+ int size = strlen(raw) + 1;
+ INIT:
+ New( 0, clean, size, char );
+ CODE:
+ strip_html( stripper, raw, clean );
+ RETVAL = clean;
+ OUTPUT:
+ RETVAL
+ CLEANUP:
+ Safefree( clean );
+
+void
+reset( stripper )
+ Stripper * stripper
+
+void
+clear_striptags( stripper )
+ Stripper * stripper
+
+void
+add_striptag( stripper, tag )
+ Stripper * stripper
+ char * tag
+
+void
+set_emit_spaces( stripper, emit )
+ Stripper * stripper
+ int emit
+ CODE:
+ stripper->o_emit_spaces = emit;
+
+void
+set_decode_entities( stripper, decode )
+ Stripper * stripper
+ int decode
+ CODE:
+ stripper->o_decode_entities = decode;
+
+int
+decode_entities( stripper )
+ Stripper * stripper
+ CODE:
+ RETVAL = stripper->o_decode_entities;
+ OUTPUT:
+ RETVAL
+
+void
+set_striptags_ref( stripper, tagref )
+ Stripper * stripper
+ SV * tagref
+ PREINIT:
+ AV * tags;
+ I32 numtags = 0;
+ int n;
+ if( (SvROK(tagref)) &&
+ (SvTYPE(SvRV(tagref)) == SVt_PVAV) ) {
+ tags = (AV *) SvRV(tagref);
+ } else {
+ XSRETURN_UNDEF;
+ }
+ numtags = av_len(tags);
+ if( numtags < 0 ) {
+ XSRETURN_UNDEF;
+ }
+ CODE:
+ clear_striptags( stripper );
+ for (n = 0; n <= numtags; n++) {
+ STRLEN l;
+ char * tag = SvPV(*av_fetch(tags, n, 0), l);
+ add_striptag( stripper, tag );
+ }
Added: branches/upstream/libhtml-strip-perl/current/strip_html.c
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/strip_html.c?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/strip_html.c (added)
+++ branches/upstream/libhtml-strip-perl/current/strip_html.c Mon Dec 29 15:57:10 2008
@@ -1,0 +1,196 @@
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include "strip_html.h"
+
+
+void
+strip_html( Stripper * stripper, const char * raw, char * output ) {
+ const char * p_raw = raw;
+ const char * raw_end = raw + strlen(raw);
+ char * p_output = output;
+
+ while( p_raw < raw_end ) {
+ if( stripper->f_in_tag ) {
+ /* inside a tag */
+ /* check if we know either the tagname, or that we're in a declaration */
+ if( !stripper->f_full_tagname && !stripper->f_in_decl ) {
+ /* if this is the first character, check if it's a '!'; if so, we're in a declaration */
+ if( stripper->p_tagname == stripper->tagname && *p_raw == '!' ) {
+ stripper->f_in_decl = 1;
+ }
+ /* then check if the first character is a '/', in which case, this is a closing tag */
+ else if( stripper->p_tagname == stripper->tagname && *p_raw == '/' ) {
+ stripper->f_closing = 1;
+ } else {
+ /* if we don't have the full tag name yet, add current character unless it's whitespace, a '/', or a '>';
+ otherwise null pad the string and set the full tagname flag, and check the tagname against stripped ones.
+ also sanity check we haven't reached the array bounds, and truncate the tagname here if we have */
+ if( (!isspace( *p_raw ) && *p_raw != '/' && *p_raw != '>') &&
+ !( (stripper->p_tagname - stripper->tagname) == MAX_TAGNAMELENGTH ) ) {
+ *stripper->p_tagname++ = *p_raw;
+ } else {
+ *stripper->p_tagname = 0;
+ stripper->f_full_tagname = 1;
+ /* if we're in a stripped tag block, and this is a closing tag, check to see if it ends the stripped block */
+ if( stripper->f_in_striptag && stripper->f_closing ) {
+ if( strcasecmp( stripper->tagname, stripper->striptag ) == 0 ) {
+ stripper->f_in_striptag = 0;
+ }
+ /* if we're outside a stripped tag block, check tagname against stripped tag list */
+ } else if( !stripper->f_in_striptag && !stripper->f_closing ) {
+ int i;
+ for( i = 0; i <= stripper->numstriptags; i++ ) {
+ if( strcasecmp( stripper->tagname, stripper->o_striptags[i] ) == 0 ) {
+ stripper->f_in_striptag = 1;
+ strcpy( stripper->striptag, stripper->tagname );
+ }
+ }
+ }
+ check_end( stripper, *p_raw );
+ }
+ }
+ } else {
+ if( stripper->f_in_quote ) {
+ /* inside a quote */
+ /* end of quote if current character matches the opening quote character */
+ if( *p_raw == stripper->quote ) {
+ stripper->quote = 0;
+ stripper->f_in_quote = 0;
+ }
+ } else {
+ /* not in a quote */
+ /* check for quote characters */
+ if( *p_raw == '\'' || *p_raw == '\"' ) {
+ stripper->f_in_quote = 1;
+ stripper->quote = *p_raw;
+ /* reset lastchar_* flags in case we have something perverse like '-"' or '/"' */
+ stripper->f_lastchar_minus = 0;
+ stripper->f_lastchar_slash = 0;
+ } else {
+ if( stripper->f_in_decl ) {
+ /* inside a declaration */
+ if( stripper->f_lastchar_minus ) {
+ /* last character was a minus, so if current one is, then we're either entering or leaving a comment */
+ if( *p_raw == '-' ) {
+ stripper->f_in_comment = !stripper->f_in_comment;
+ }
+ stripper->f_lastchar_minus = 0;
+ } else {
+ /* if current character is a minus, we might be starting a comment marker */
+ if( *p_raw == '-' ) {
+ stripper->f_lastchar_minus = 1;
+ }
+ }
+ if( !stripper->f_in_comment ) {
+ check_end( stripper, *p_raw );
+ }
+ } else {
+ check_end( stripper, *p_raw );
+ }
+ } /* quote character check */
+ } /* in quote check */
+ } /* full tagname check */
+ }
+ else {
+ /* not in a tag */
+ /* check for tag opening, and reset parameters if one has */
+ if( *p_raw == '<' ) {
+ stripper->f_in_tag = 1;
+ stripper->tagname[0] = 0;
+ stripper->p_tagname = stripper->tagname;
+ stripper->f_full_tagname = 0;
+ stripper->f_closing = 0;
+ stripper->f_just_seen_tag = 1;
+ }
+ else {
+ /* copy to stripped provided we're not in a stripped block */
+ if( !stripper->f_in_striptag ) {
+ /* only emit spaces if we're configured to do so (on by default) */
+ if( stripper->o_emit_spaces ){
+ /* output a space in place of tags we have previously parsed,
+ and set a flag so we only do this once for every group of tags.
+ done here to prevent unnecessary trailing spaces */
+ if( isspace(*p_raw) ) {
+ /* don't output a space if this character is one anyway */
+ stripper->f_outputted_space = 1;
+ } else {
+ if( !stripper->f_outputted_space &&
+ stripper->f_just_seen_tag ) {
+ *p_output++ = ' ';
+ stripper->f_outputted_space = 1;
+ } else {
+ /* this character must not be a space */
+ stripper->f_outputted_space = 0;
+ }
+ }
+ }
+ *p_output++ = *p_raw;
+ /* reset 'just seen tag' flag */
+ stripper->f_just_seen_tag = 0;
+ }
+ }
+ } /* in tag check */
+ p_raw++;
+ } /* while loop */
+
+ *p_output = 0;
+}
+
+void
+reset( Stripper * stripper ) {
+ stripper->f_in_tag = 0;
+ stripper->f_closing = 0;
+ stripper->f_lastchar_slash = 0;
+ stripper->f_full_tagname = 0;
+ /* hack to stop a space being output on strings starting with a tag */
+ stripper->f_outputted_space = 1;
+ stripper->f_just_seen_tag = 0;
+
+ stripper->f_in_quote = 0;
+
+ stripper->f_in_decl = 0;
+ stripper->f_in_comment = 0;
+ stripper->f_lastchar_minus = 0;
+
+ stripper->f_in_striptag = 0;
+}
+
+void
+clear_striptags( Stripper * stripper ) {
+ strcpy(stripper->o_striptags[0], "");
+ stripper->numstriptags = 0;
+}
+
+void
+add_striptag( Stripper * stripper, char * striptag ) {
+ if( stripper->numstriptags < MAX_STRIPTAGS-1 ) {
+ strcpy(stripper->o_striptags[stripper->numstriptags++], striptag);
+ } else {
+ fprintf( stderr, "Cannot have more than %i strip tags", MAX_STRIPTAGS );
+ }
+}
+
+
+void
+check_end( Stripper * stripper, char end ) {
+ /* if current character is a slash, may be a closed tag */
+ if( end == '/' ) {
+ stripper->f_lastchar_slash = 1;
+ } else {
+ /* if the current character is a '>', then the tag has ended */
+ if( end == '>' ) {
+ stripper->f_in_quote = 0;
+ stripper->f_in_comment = 0;
+ stripper->f_in_decl = 0;
+ stripper->f_in_tag = 0;
+ /* Do not start a stripped tag block if the tag is a closed one, e.g. '<script src="foo" />' */
+ if( stripper->f_lastchar_slash &&
+ (strcasecmp( stripper->striptag, stripper->tagname ) == 0) ) {
+ stripper->f_in_striptag = 0;
+ }
+ }
+ stripper->f_lastchar_slash = 0;
+ }
+}
Added: branches/upstream/libhtml-strip-perl/current/strip_html.h
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/strip_html.h?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/strip_html.h (added)
+++ branches/upstream/libhtml-strip-perl/current/strip_html.h Mon Dec 29 15:57:10 2008
@@ -1,0 +1,37 @@
+
+#define MAX_TAGNAMELENGTH 20
+#define MAX_STRIPTAGS 20
+
+typedef struct Stripper {
+ int f_in_tag;
+ int f_closing;
+ int f_lastchar_slash;
+
+ char tagname[MAX_TAGNAMELENGTH];
+ char * p_tagname;
+ char f_full_tagname;
+
+ int f_outputted_space;
+ int f_just_seen_tag;
+
+ int f_in_quote;
+ char quote;
+
+ int f_in_decl;
+ int f_in_comment;
+ int f_lastchar_minus;
+
+ int f_in_striptag;
+ char striptag[MAX_TAGNAMELENGTH];
+ char o_striptags[MAX_STRIPTAGS][MAX_TAGNAMELENGTH];
+ int numstriptags;
+ int o_emit_spaces;
+ int o_decode_entities;
+} Stripper;
+
+void strip_html( Stripper * stripper, const char * raw, char * clean );
+void reset( Stripper * stripper );
+void clear_striptags( Stripper * stripper );
+void add_striptag( Stripper * stripper, char * tag );
+
+void check_end( Stripper * stripper, char );
Added: branches/upstream/libhtml-strip-perl/current/test.pl
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/test.pl?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/test.pl (added)
+++ branches/upstream/libhtml-strip-perl/current/test.pl Mon Dec 29 15:57:10 2008
@@ -1,0 +1,91 @@
+# Before `make install' is performed this script should be runnable with
+# `make test'. After `make install' it should work as `perl test.pl'
+
+#########################
+
+# change 'tests => 1' to 'tests => last_test_to_print';
+
+use Test;
+BEGIN { plan tests => 17 };
+use HTML::Strip;
+ok(1); # If we made it this far, we're ok.
+
+#########################
+
+# Insert your test code below, the Test module is use()ed here so read
+# its man page ( perldoc Test ) for help writing this test script.
+
+my $hs = new HTML::Strip;
+
+ok( $hs->parse( 'test' ), 'test' );
+$hs->eof;
+
+ok( $hs->parse( '<em>test</em>' ), 'test' );
+$hs->eof;
+
+ok( $hs->parse( 'foo<br>bar' ), 'foo bar' );
+$hs->eof;
+
+ok( $hs->parse( '<p align="center">test</p>' ), 'test' );
+$hs->eof;
+
+ok( $hs->parse( '<p align="center>test</p>' ), '' );
+$hs->eof;
+
+ok( $hs->parse( '<foo>bar' ), 'bar' );
+ok( $hs->parse( '</foo>baz' ), ' baz' );
+$hs->eof;
+
+ok( $hs->parse( '<!-- <p>foo</p> bar -->baz' ), 'baz' );
+$hs->eof;
+
+ok( $hs->parse( '<img src="foo.gif" alt="a > b">bar' ), 'bar' );
+$hs->eof;
+
+ok( $hs->parse( '<script>if (a<b && a>c)</script>bar' ), 'bar' );
+$hs->eof;
+
+ok( $hs->parse( '<# just data #>bar' ), 'bar' );
+$hs->eof;
+
+#ok( $hs->parse( '<![INCLUDE CDATA [ >>>>>>>>>>>> ]]>bar' ), 'bar' );
+#$hs->eof;
+
+ok( $hs->parse( '<script>foo</script>bar' ), 'bar' );
+$hs->eof;
+
+my $html_entities_p = eval 'require HTML::Entities' ? '' : 'HTML::Entities not available';
+skip( $html_entities_p, $hs->parse( '<foo>' ), '<foo>' );
+$hs->eof;
+skip( $html_entities_p, $hs->parse( '<foo>' ), '<foo>' );
+$hs->eof;
+$hs->set_decode_entities(0);
+skip( $html_entities_p, $hs->parse( '<foo>' ), '<foo>' );
+$hs->eof;
+skip( $html_entities_p, $hs->parse( '<foo>' ), '<foo>' );
+$hs->eof;
+
+
+my $hs2 = new HTML::Strip;
+$hs2->set_striptags( [ 'foo' ] );
+
+ok( $hs2->parse( '<script>foo</script>bar' ), 'foo bar' );
+$hs2->eof;
+
+ok( $hs2->parse( '<foo>foo</foo>bar' ), 'bar' );
+$hs2->eof;
+
+ok( $hs->parse( '<script>foo</script>bar' ), 'bar' );
+$hs->eof;
+
+my @striptags = qw(baz quux);
+$hs->set_striptags( @striptags );
+
+ok( $hs->parse( '<baz>fumble</baz>bar<quux>foo</quux>' ), 'bar' );
+$hs->eof;
+
+ok( $hs->parse( '<baz>fumble<quux/>foo</baz>bar' ), 'bar' );
+$hs->eof;
+
+ok( $hs->parse( '<foo> </foo> <bar> baz </bar>' ), ' baz ' );
+$hs->eof;
Added: branches/upstream/libhtml-strip-perl/current/typemap
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libhtml-strip-perl/current/typemap?rev=28944&op=file
==============================================================================
--- branches/upstream/libhtml-strip-perl/current/typemap (added)
+++ branches/upstream/libhtml-strip-perl/current/typemap Mon Dec 29 15:57:10 2008
@@ -1,0 +1,15 @@
+TYPEMAP
+Stripper * O_STRIP
+
+INPUT
+O_STRIP
+ if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) )
+ $var = ($type)SvIV((SV*)SvRV( $arg ));
+ else{
+ warn( \"${Package}::$func_name() -- $var is not a blessed SV reference\" );
+ XSRETURN_UNDEF;
+ }
+
+OUTPUT
+O_STRIP
+ sv_setref_pv( $arg, "HTML::Strip", (void*)$var );
More information about the Pkg-perl-cvs-commits
mailing list