[libhtml-restrict-perl] 01/02: Import original source of HTML-Restrict 2.2.4

Andrew Shadura andrewsh at debian.org
Wed Aug 30 13:37:20 UTC 2017


This is an automated email from the git hooks/post-receive script.

andrewsh pushed a commit to branch master
in repository libhtml-restrict-perl.

commit ec06787d2bdda608b8b24ca8b29d9edbea81ac6a
Author: Andrew Shadura <andrewsh at debian.org>
Date:   Wed Aug 30 15:33:58 2017 +0300

    Import original source of HTML-Restrict 2.2.4
---
 CONTRIBUTORS              |  21 ++
 Changes                   | 117 ++++++++
 INSTALL                   |  43 +++
 LICENSE                   | 379 +++++++++++++++++++++++++
 MANIFEST                  |  36 +++
 META.json                 |  95 +++++++
 META.yml                  |  56 ++++
 Makefile.PL               |  73 +++++
 README.md                 | 349 +++++++++++++++++++++++
 cpanfile                  |  35 +++
 dist.ini                  |   9 +
 examples/sanitize_file.pl |  23 ++
 lib/HTML/Restrict.pm      | 686 ++++++++++++++++++++++++++++++++++++++++++++++
 perlcriticrc              |  86 ++++++
 perltidyrc                |  12 +
 t/00-load.t               | 125 +++++++++
 t/attribute_constraints.t |  66 +++++
 t/author-pod-coverage.t   |  15 +
 t/author-pod-spell.t      |  74 +++++
 t/author-synopsis.t       |  13 +
 t/author-tidyall.t        |  19 ++
 t/comments.t              |  33 +++
 t/declaration.t           |  22 ++
 t/js.t                    |  24 ++
 t/lowercase.t             |  29 ++
 t/memory-leak.t           |  24 ++
 t/perlcriticrc            |   4 +
 t/pod.t                   |  12 +
 t/release-cpan-changes.t  |  18 ++
 t/replace_img.t           |  62 +++++
 t/scheme.t                |  70 +++++
 t/stack.t                 |  19 ++
 t/style.t                 |  24 ++
 t/xss.t                   |  35 +++
 tidyall.ini               |  26 ++
 35 files changed, 2734 insertions(+)

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
index 0000000..285239f
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,21 @@
+
+# HTML-RESTRICT CONTRIBUTORS #
+
+This is the (likely incomplete) list of people who have helped
+make this distribution what it is, either via code contributions, 
+patches, bug reports, help with troubleshooting, etc. A huge
+'thank you' to all of them.
+
+    * Arthur Axel 'fREW' Schmidt
+    * Ben Bullock
+    * David Golden
+    * Fitz Elliott
+    * Graham Knop
+    * Graham TerMarsch
+    * Karen Etheridge
+    * Mark Jubenville
+    * Olaf Alders
+    * perlpong
+    * skaufman
+
+
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..8d6b733
--- /dev/null
+++ b/Changes
@@ -0,0 +1,117 @@
+Revision history for HTML-Restrict
+
+2.2.4     2017-03-01 14:22:13-05:00 America/Toronto
+        - Updates SEE ALSO pod section (Ben Bullock)
+
+2.2.3     2016-07-26 18:00:55-04:00 America/Toronto
+        - Now with fewer non-core prerequisites (Karen Etheridge)
+
+2.2.2     2014-10-16 22:16:29-04:00 America/Toronto
+        - Fixes attribute value encoding. (Graham Knop)
+
+2.2.1     2014-08-18 17:22:05EDT-0400 America/Toronto
+        - Replace MooX::Types::MooseLike with Type::Tiny. (Samuel Kaufman)
+
+2.2.0   2014-08-15 23:01:22EDT-0400 America/Toronto
+        - Fix Changes file
+        - Bump to next minor version rather than changing the format of the
+          version numbers
+
+2.1.9   2013-12-10
+        - Swaps namespace::clean for namespace::autoclean. (Fitz Elliott)
+
+2.1.8   2013-06-06
+        - Unlike 2.1.7, This release doesn't contain a local::lib.
+
+2.1.7   2013-06-05
+        - Removes Perl6::Junction in order to squash smartmatch warnings.
+
+2.1.6   2013-05-06
+        - Fixes incorrect use of Moo's weak_ref. (Thanks to Carwyn Ellis,
+          Dagfinn Ilmari Mannsåker and Graham Knop for help with this.)
+
+2.1.5   2013-05-01
+        - Eliminates a circular reference. (Graham TerMarsch)
+
+2.1.4   2013-04-19
+        - Minor version should have been bumped in previous release. To bump it
+          now would be confusing.
+        - Require Moo 1.001000 as 1.000008 breaks this module.
+
+2.1.3   2013-04-18
+        - Add replace_img feature (David Golden).
+        - Add 'cite' to list of attributes for URI scheme checks (David
+          Golden).
+
+2.1.2   2013-03-20
+        - Fixes broken Pod in example script.
+        - Adds x_contributors to META.json.
+
+2.1.1   2013-03-08
+        - Updates Pod.
+
+2.1.0   2013-03-02
+        - Allows attributes to be validated against regexes (perlpong).
+
+2.0.0   2013-02-27
+        - Bumps version to 2.0.0 due to new features breaking backwards
+          compatibility.
+        - Forces all tag rules to be provided in lower case.  Breaks
+          backcompat, but mixed or upper case rules would have been silently
+          ignored.  So, this is really a bug fix.
+        - Strips content between script tags by default. Breaks backcompat,
+          but the default behaviour was likely not what anyone wanted.
+        - Strips content between style tags by default.  Breaks backcompat,
+          but the default behaviour was likely not what anyone wanted.
+        - Adds Pod for previously undocumented allow_comments,
+          allow_declaration and strip_enclosed_content.
+        - Ensures stripper stack is always reset (perlpong).
+        - Protects against undef string comparison (perlpong).
+        - Checks URI scheme for src attributes (perlpong).
+        - Removes accessor/mutator documentation.  Would prefer if all params
+          were set at object instatiation.
+        - Adds ridiculously basic example script.
+
+
+1.0.4   2012-05-22
+        - Explicity require MooX::Types::MooseLike rather than
+          MooX::Types::MooseLike::Base (Matt Phillips/Github #2)
+
+1.0.3   2012-03-27
+        - Replaces Moose with Moo for speed in non-persistent apps (Arthur Axel
+          'fREW' Schmidt)
+        - Adds uri scheme checking for href and src attributes
+        - Adds (undocumented) allow_comments() and allow_declartion() methods
+
+1.0.2   2011-10-11
+        - No longer emit warnings when pure HTML is supplied for processing
+            Rick Moore (RT #71553)
+
+1.0.1   2011-06-30
+        - Pod updates
+
+1.0.0   2011-06-30
+        - Bumped version to 1.0.0 This is no longer beta software.
+
+0.06    2010-03-22
+        - Fixed RT #55775 "Strings equating to boolean false returned as undef"
+            (Duncan Forsyth)
+
+0.05    2010-02-03
+        - process( undef ) returns without attempting to do anything.  Avoids
+            warnings about uninitialized vars.
+
+0.04    2009-11-19
+        - MooseX::Params::Validate 0.12 now required.  Some earlier versions do
+            not include pos_validated_list()
+
+0.03    2009-11-17
+        - process() no longer dies when passed an undef -- patch provided by
+            Mark Jubenville (ioncache)
+
+0.02    2009-09-17
+        - Improved documentation layout and clarity
+            Removed documentation for parser() method -- seems like a bad idea
+
+0.01    2009-09-17
+        - Beta release
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..5993a04
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,43 @@
+This is the Perl distribution HTML-Restrict.
+
+Installing HTML-Restrict is straightforward.
+
+## Installation with cpanm
+
+If you have cpanm, you only need one line:
+
+    % cpanm HTML::Restrict
+
+If it does not have permission to install modules to the current perl, cpanm
+will automatically set up and install to a local::lib in your home directory.
+See the local::lib documentation (https://metacpan.org/pod/local::lib) for
+details on enabling it in your environment.
+
+## Installing with the CPAN shell
+
+Alternatively, if your CPAN shell is set up, you should just be able to do:
+
+    % cpan HTML::Restrict
+
+## Manual installation
+
+As a last resort, you can manually install it. Download the tarball, untar it,
+then build it:
+
+    % perl Makefile.PL
+    % make && make test
+
+Then install it:
+
+    % make install
+
+If your perl is system-managed, you can create a local::lib in your home
+directory to install modules to. For details, see the local::lib documentation:
+https://metacpan.org/pod/local::lib
+
+## Documentation
+
+HTML-Restrict documentation is available as POD.
+You can run perldoc from a shell to read the documentation:
+
+    % perldoc HTML::Restrict
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5695594
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,379 @@
+This software is copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+Terms of the Perl programming language system itself
+
+a) the GNU General Public License as published by the Free
+   Software Foundation; either version 1, or (at your option) any
+   later version, or
+b) the "Artistic License"
+
+--- The GNU General Public License, Version 1, February 1989 ---
+
+This software is Copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software, licensed under:
+
+  The GNU General Public License, Version 1, February 1989
+
+                    GNU GENERAL PUBLIC LICENSE
+                     Version 1, February 1989
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The license agreements of most software companies try to keep users
+at the mercy of those companies.  By contrast, our General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  The
+General Public License applies to the Free Software Foundation's
+software and to any other program whose authors commit to using it.
+You can use it for your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Specifically, the General Public License is designed to make
+sure that you have the freedom to give away or sell copies of free
+software, that you receive source code or can get it if you want it,
+that you can change the software or use pieces of it in new free
+programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of a such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must tell them their rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any program or other work which
+contains a notice placed by the copyright holder saying it may be
+distributed under the terms of this General Public License.  The
+"Program", below, refers to any such program or work, and a "work based
+on the Program" means either the Program or any work containing the
+Program or a portion of it, either verbatim or with modifications.  Each
+licensee is addressed as "you".
+
+  1. You may copy and distribute verbatim copies of the Program's source
+code as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice and
+disclaimer of warranty; keep intact all the notices that refer to this
+General Public License and to the absence of any warranty; and give any
+other recipients of the Program a copy of this General Public License
+along with the Program.  You may charge a fee for the physical act of
+transferring a copy.
+
+  2. You may modify your copy or copies of the Program or any portion of
+it, and copy and distribute such modifications under the terms of Paragraph
+1 above, provided that you also do the following:
+
+    a) cause the modified files to carry prominent notices stating that
+    you changed the files and the date of any change; and
+
+    b) cause the whole of any work that you distribute or publish, that
+    in whole or in part contains the Program or any part thereof, either
+    with or without modifications, to be licensed at no charge to all
+    third parties under the terms of this General Public License (except
+    that you may choose to grant warranty protection to some or all
+    third parties, at your option).
+
+    c) If the modified program normally reads commands interactively when
+    run, you must cause it, when started running for such interactive use
+    in the simplest and most usual way, to print or display an
+    announcement including an appropriate copyright notice and a notice
+    that there is no warranty (or else, saying that you provide a
+    warranty) and that users may redistribute the program under these
+    conditions, and telling the user how to view a copy of this General
+    Public License.
+
+    d) You may charge a fee for the physical act of transferring a
+    copy, and you may at your option offer warranty protection in
+    exchange for a fee.
+
+Mere aggregation of another independent work with the Program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other work under the scope of these terms.
+
+  3. You may copy and distribute the Program (or a portion or derivative of
+it, under Paragraph 2) in object code or executable form under the terms of
+Paragraphs 1 and 2 above provided that you also do one of the following:
+
+    a) accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of
+    Paragraphs 1 and 2 above; or,
+
+    b) accompany it with a written offer, valid for at least three
+    years, to give any third party free (except for a nominal charge
+    for the cost of distribution) a complete machine-readable copy of the
+    corresponding source code, to be distributed under the terms of
+    Paragraphs 1 and 2 above; or,
+
+    c) accompany it with the information you received as to where the
+    corresponding source code may be obtained.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form alone.)
+
+Source code for a work means the preferred form of the work for making
+modifications to it.  For an executable file, complete source code means
+all the source code for all modules it contains; but, as a special
+exception, it need not include source code for modules which are standard
+libraries that accompany the operating system on which the executable
+file runs, or for standard header files or definitions files that
+accompany that operating system.
+
+  4. You may not copy, modify, sublicense, distribute or transfer the
+Program except as expressly provided under this General Public License.
+Any attempt otherwise to copy, modify, sublicense, distribute or transfer
+the Program is void, and will automatically terminate your rights to use
+the Program under this License.  However, parties who have received
+copies, or rights to use copies, from you under this General Public
+License will not have their licenses terminated so long as such parties
+remain in full compliance.
+
+  5. By copying, distributing or modifying the Program (or any work based
+on the Program) you indicate your acceptance of this license to do so,
+and all its terms and conditions.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the original
+licensor to copy, distribute or modify the Program subject to these
+terms and conditions.  You may not impose any further restrictions on the
+recipients' exercise of the rights granted herein.
+
+  7. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of the license which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+the license, you may choose any version ever published by the Free Software
+Foundation.
+
+  8. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+        Appendix: How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to humanity, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+  To do so, attach the following notices to the program.  It is safest to
+attach them to the start of each source file to most effectively convey
+the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 1, or (at your option)
+    any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19xx name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License.  Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items--whatever suits your
+program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  program `Gnomovision' (a program to direct compilers to make passes
+  at assemblers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
+--- The Artistic License 1.0 ---
+
+This software is Copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software, licensed under:
+
+  The Artistic License 1.0
+
+The Artistic License
+
+Preamble
+
+The intent of this document is to state the conditions under which a Package
+may be copied, such that the Copyright Holder maintains some semblance of
+artistic control over the development of the package, while giving the users of
+the package the right to use and distribute the Package in a more-or-less
+customary fashion, plus the right to make reasonable modifications.
+
+Definitions:
+
+  - "Package" refers to the collection of files distributed by the Copyright
+    Holder, and derivatives of that collection of files created through
+    textual modification. 
+  - "Standard Version" refers to such a Package if it has not been modified,
+    or has been modified in accordance with the wishes of the Copyright
+    Holder. 
+  - "Copyright Holder" is whoever is named in the copyright or copyrights for
+    the package. 
+  - "You" is you, if you're thinking about copying or distributing this Package.
+  - "Reasonable copying fee" is whatever you can justify on the basis of media
+    cost, duplication charges, time of people involved, and so on. (You will
+    not be required to justify it to the Copyright Holder, but only to the
+    computing community at large as a market that must bear the fee.) 
+  - "Freely Available" means that no fee is charged for the item itself, though
+    there may be fees involved in handling the item. It also means that
+    recipients of the item may redistribute it under the same conditions they
+    received it. 
+
+1. You may make and give away verbatim copies of the source form of the
+Standard Version of this Package without restriction, provided that you
+duplicate all of the original copyright notices and associated disclaimers.
+
+2. You may apply bug fixes, portability fixes and other modifications derived
+from the Public Domain or from the Copyright Holder. A Package modified in such
+a way shall still be considered the Standard Version.
+
+3. You may otherwise modify your copy of this Package in any way, provided that
+you insert a prominent notice in each changed file stating how and when you
+changed that file, and provided that you do at least ONE of the following:
+
+  a) place your modifications in the Public Domain or otherwise make them
+     Freely Available, such as by posting said modifications to Usenet or an
+     equivalent medium, or placing the modifications on a major archive site
+     such as ftp.uu.net, or by allowing the Copyright Holder to include your
+     modifications in the Standard Version of the Package.
+
+  b) use the modified Package only within your corporation or organization.
+
+  c) rename any non-standard executables so the names do not conflict with
+     standard executables, which must also be provided, and provide a separate
+     manual page for each non-standard executable that clearly documents how it
+     differs from the Standard Version.
+
+  d) make other distribution arrangements with the Copyright Holder.
+
+4. You may distribute the programs of this Package in object code or executable
+form, provided that you do at least ONE of the following:
+
+  a) distribute a Standard Version of the executables and library files,
+     together with instructions (in the manual page or equivalent) on where to
+     get the Standard Version.
+
+  b) accompany the distribution with the machine-readable source of the Package
+     with your modifications.
+
+  c) accompany any non-standard executables with their corresponding Standard
+     Version executables, giving the non-standard executables non-standard
+     names, and clearly documenting the differences in manual pages (or
+     equivalent), together with instructions on where to get the Standard
+     Version.
+
+  d) make other distribution arrangements with the Copyright Holder.
+
+5. You may charge a reasonable copying fee for any distribution of this
+Package.  You may charge any fee you choose for support of this Package. You
+may not charge a fee for this Package itself. However, you may distribute this
+Package in aggregate with other (possibly commercial) programs as part of a
+larger (possibly commercial) software distribution provided that you do not
+advertise this Package as a product of your own.
+
+6. The scripts and library files supplied as input to or produced as output
+from the programs of this Package do not automatically fall under the copyright
+of this Package, but belong to whomever generated them, and may be sold
+commercially, and may be aggregated with this Package.
+
+7. C or perl subroutines supplied by you and linked into this Package shall not
+be considered part of this Package.
+
+8. The name of the Copyright Holder may not be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+The End
+
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..6cb9a59
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,36 @@
+# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.008.
+CONTRIBUTORS
+Changes
+INSTALL
+LICENSE
+MANIFEST
+META.json
+META.yml
+Makefile.PL
+README.md
+cpanfile
+dist.ini
+examples/sanitize_file.pl
+lib/HTML/Restrict.pm
+perlcriticrc
+perltidyrc
+t/00-load.t
+t/attribute_constraints.t
+t/author-pod-coverage.t
+t/author-pod-spell.t
+t/author-synopsis.t
+t/author-tidyall.t
+t/comments.t
+t/declaration.t
+t/js.t
+t/lowercase.t
+t/memory-leak.t
+t/perlcriticrc
+t/pod.t
+t/release-cpan-changes.t
+t/replace_img.t
+t/scheme.t
+t/stack.t
+t/style.t
+t/xss.t
+tidyall.ini
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..f1fd977
--- /dev/null
+++ b/META.json
@@ -0,0 +1,95 @@
+{
+   "abstract" : "Strip unwanted HTML tags and attributes",
+   "author" : [
+      "Olaf Alders <olaf at wundercounter.com>"
+   ],
+   "dynamic_config" : 0,
+   "generated_by" : "Dist::Zilla version 6.008, CPAN::Meta::Converter version 2.150005",
+   "license" : [
+      "perl_5"
+   ],
+   "meta-spec" : {
+      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+      "version" : 2
+   },
+   "name" : "HTML-Restrict",
+   "no_index" : {
+      "directory" : [
+         "examples",
+         "t",
+         "xt"
+      ]
+   },
+   "prereqs" : {
+      "configure" : {
+         "requires" : {
+            "ExtUtils::MakeMaker" : "0",
+            "perl" : "5.006"
+         }
+      },
+      "develop" : {
+         "requires" : {
+            "Pod::Coverage::TrustPod" : "0",
+            "Test::CPAN::Changes" : "0.19",
+            "Test::Code::TidyAll" : "0.50",
+            "Test::More" : "0.88",
+            "Test::Pod::Coverage" : "1.08",
+            "Test::Spelling" : "0.12",
+            "Test::Synopsis" : "0"
+         }
+      },
+      "runtime" : {
+         "requires" : {
+            "Carp" : "0",
+            "Data::Dump" : "0",
+            "HTML::Entities" : "0",
+            "HTML::Parser" : "0",
+            "List::Util" : "1.33",
+            "Moo" : "1.002000",
+            "Scalar::Util" : "0",
+            "Sub::Quote" : "0",
+            "Types::Standard" : "1.000001",
+            "URI" : "0",
+            "namespace::clean" : "0",
+            "perl" : "5.006",
+            "strict" : "0"
+         }
+      },
+      "test" : {
+         "requires" : {
+            "Test::Fatal" : "0",
+            "Test::More" : "0",
+            "perl" : "5.006",
+            "warnings" : "0"
+         }
+      }
+   },
+   "release_status" : "stable",
+   "resources" : {
+      "bugtracker" : {
+         "web" : "https://github.com/oalders/html-restrict/issues"
+      },
+      "homepage" : "https://github.com/oalders/html-restrict",
+      "repository" : {
+         "type" : "git",
+         "url" : "https://github.com/oalders/html-restrict.git",
+         "web" : "https://github.com/oalders/html-restrict"
+      }
+   },
+   "version" : "2.2.4",
+   "x_contributors" : [
+      "Arthur Axel 'fREW' Schmidt <frioux at gmail.com>",
+      "Ben Bullock <benkasminbullock at gmail.com>",
+      "David Golden <dagolden at cpan.org>",
+      "Fitz Elliott <fitz.elliott at gmail.com>",
+      "Graham Knop <haarg at haarg.org>",
+      "Graham TerMarsch <graham at howlingfrog.com>",
+      "Karen Etheridge <ether at cpan.org>",
+      "Mark Jubenville <mark at raybec.com>",
+      "Olaf Alders <olaf at wundersolutions.com>",
+      "perlpong <calyx238 at gmail.com>",
+      "skaufman <sam at socialflow.com>"
+   ],
+   "x_serialization_backend" : "Cpanel::JSON::XS version 3.0225"
+}
+
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..e4f5b68
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,56 @@
+---
+abstract: 'Strip unwanted HTML tags and attributes'
+author:
+  - 'Olaf Alders <olaf at wundercounter.com>'
+build_requires:
+  Test::Fatal: '0'
+  Test::More: '0'
+  perl: '5.006'
+  warnings: '0'
+configure_requires:
+  ExtUtils::MakeMaker: '0'
+  perl: '5.006'
+dynamic_config: 0
+generated_by: 'Dist::Zilla version 6.008, CPAN::Meta::Converter version 2.150005'
+license: perl
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: '1.4'
+name: HTML-Restrict
+no_index:
+  directory:
+    - examples
+    - t
+    - xt
+requires:
+  Carp: '0'
+  Data::Dump: '0'
+  HTML::Entities: '0'
+  HTML::Parser: '0'
+  List::Util: '1.33'
+  Moo: '1.002000'
+  Scalar::Util: '0'
+  Sub::Quote: '0'
+  Types::Standard: '1.000001'
+  URI: '0'
+  namespace::clean: '0'
+  perl: '5.006'
+  strict: '0'
+resources:
+  bugtracker: https://github.com/oalders/html-restrict/issues
+  homepage: https://github.com/oalders/html-restrict
+  repository: https://github.com/oalders/html-restrict.git
+version: 2.2.4
+x_contributors:
+  - "Arthur Axel 'fREW' Schmidt <frioux at gmail.com>"
+  - 'Ben Bullock <benkasminbullock at gmail.com>'
+  - 'David Golden <dagolden at cpan.org>'
+  - 'Fitz Elliott <fitz.elliott at gmail.com>'
+  - 'Graham Knop <haarg at haarg.org>'
+  - 'Graham TerMarsch <graham at howlingfrog.com>'
+  - 'Karen Etheridge <ether at cpan.org>'
+  - 'Mark Jubenville <mark at raybec.com>'
+  - 'Olaf Alders <olaf at wundersolutions.com>'
+  - 'perlpong <calyx238 at gmail.com>'
+  - 'skaufman <sam at socialflow.com>'
+x_serialization_backend: 'YAML::Tiny version 1.69'
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..570d8bf
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,73 @@
+# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.008.
+use strict;
+use warnings;
+
+use 5.006;
+
+use ExtUtils::MakeMaker;
+
+my %WriteMakefileArgs = (
+  "ABSTRACT" => "Strip unwanted HTML tags and attributes",
+  "AUTHOR" => "Olaf Alders <olaf\@wundercounter.com>",
+  "CONFIGURE_REQUIRES" => {
+    "ExtUtils::MakeMaker" => 0
+  },
+  "DISTNAME" => "HTML-Restrict",
+  "LICENSE" => "perl",
+  "MIN_PERL_VERSION" => "5.006",
+  "NAME" => "HTML::Restrict",
+  "PREREQ_PM" => {
+    "Carp" => 0,
+    "Data::Dump" => 0,
+    "HTML::Entities" => 0,
+    "HTML::Parser" => 0,
+    "List::Util" => "1.33",
+    "Moo" => "1.002000",
+    "Scalar::Util" => 0,
+    "Sub::Quote" => 0,
+    "Types::Standard" => "1.000001",
+    "URI" => 0,
+    "namespace::clean" => 0,
+    "strict" => 0
+  },
+  "TEST_REQUIRES" => {
+    "Test::Fatal" => 0,
+    "Test::More" => 0,
+    "warnings" => 0
+  },
+  "VERSION" => "2.2.4",
+  "test" => {
+    "TESTS" => "t/*.t"
+  }
+);
+
+
+my %FallbackPrereqs = (
+  "Carp" => 0,
+  "Data::Dump" => 0,
+  "HTML::Entities" => 0,
+  "HTML::Parser" => 0,
+  "List::Util" => "1.33",
+  "Moo" => "1.002000",
+  "Scalar::Util" => 0,
+  "Sub::Quote" => 0,
+  "Test::Fatal" => 0,
+  "Test::More" => 0,
+  "Types::Standard" => "1.000001",
+  "URI" => 0,
+  "namespace::clean" => 0,
+  "strict" => 0,
+  "warnings" => 0
+);
+
+
+unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) {
+  delete $WriteMakefileArgs{TEST_REQUIRES};
+  delete $WriteMakefileArgs{BUILD_REQUIRES};
+  $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs;
+}
+
+delete $WriteMakefileArgs{CONFIGURE_REQUIRES}
+  unless eval { ExtUtils::MakeMaker->VERSION(6.52) };
+
+WriteMakefile(%WriteMakefileArgs);
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0796222
--- /dev/null
+++ b/README.md
@@ -0,0 +1,349 @@
+# NAME
+
+HTML::Restrict - Strip unwanted HTML tags and attributes
+
+# VERSION
+
+version 2.2.4
+
+# SYNOPSIS
+
+    use HTML::Restrict;
+
+    my $hr = HTML::Restrict->new();
+
+    # use default rules to start with (strip away all HTML)
+    my $processed = $hr->process('  <b>i am bold</b>  ');
+
+    # $processed now equals: 'i am bold'
+
+    # Now, a less restrictive example:
+    use HTML::Restrict;
+
+    my $hr = HTML::Restrict->new(
+        rules => {
+            b   => [],
+            img => [qw( src alt / )]
+        }
+    );
+
+    my $html = q[<body><b>hello</b> <img src="pic.jpg" alt="me" id="test" /></body>];
+    my $processed = $hr->process( $html );
+
+    # $processed now equals: <b>hello</b> <img src="pic.jpg" alt="me" />
+
+# DESCRIPTION
+
+This module uses [HTML::Parser](https://metacpan.org/pod/HTML::Parser) to strip HTML from text in a restrictive
+manner.  By default all HTML is restricted.  You may alter the default
+behaviour by supplying your own tag rules.
+
+# CONSTRUCTOR AND STARTUP
+
+## new()
+
+Creates and returns a new HTML::Restrict object.
+
+    my $hr = HTML::Restrict->new()
+
+HTML::Restrict doesn't require any params to be passed to new.  If your goal is
+to remove all HTML from text, then no further setup is required.  Just pass
+your text to the process() method and you're done:
+
+    my $plain_text = $hr->process( $html );
+
+If you need to set up specific rules, have a look at the params which
+HTML::Restrict recognizes:
+
+- `rules => \%rules`
+
+    Sets the rules which will be used to process your data.  By default all HTML
+    tags are off limits.  Use this argument to define the HTML elements and
+    corresponding attributes you'd like to use.  Essentially, consider the default
+    behaviour to be:
+
+        rules => {}
+
+    Rules should be passed as a HASHREF of allowed tags.  Each hash value should
+    represent the allowed attributes for the listed tag.  For example, if you want
+    to allow a fair amount of HTML, you can try something like this:
+
+        my %rules = (
+            a       => [qw( href target )],
+            b       => [],
+            caption => [],
+            center  => [],
+            em      => [],
+            i       => [],
+            img     => [qw( alt border height width src style )],
+            li      => [],
+            ol      => [],
+            p       => [qw(style)],
+            span    => [qw(style)],
+            strong  => [],
+            sub     => [],
+            sup     => [],
+            table   => [qw( style border cellspacing cellpadding align )],
+            tbody   => [],
+            td      => [],
+            tr      => [],
+            u       => [],
+            ul      => [],
+        );
+
+        my $hr = HTML::Restrict->new( rules => \%rules )
+
+    Or, to allow only bolded text:
+
+        my $hr = HTML::Restrict->new( rules => { b => [] } );
+
+    Allow bolded text, images and some (but not all) image attributes:
+
+        my %rules = (
+            b   => [ ],
+            img => [qw( src alt width height border / )
+        );
+        my $hr = HTML::Restrict->new( rules => \%rules );
+
+    Since [HTML::Parser](https://metacpan.org/pod/HTML::Parser) treats a closing slash as an attribute, you'll need to
+    add "/" to your list of allowed attributes if you'd like your tags to retain
+    closing slashes.  For example:
+
+        my $hr = HTML::Restrict->new( rules =>{ hr => [] } );
+        $hr->process( "<hr />"); # returns: <hr>
+
+        my $hr = HTML::Restrict->new( rules =>{ hr => [qw( / )] } );
+        $hr->process( "<hr />"); # returns: <hr />
+
+    HTML::Restrict strips away any tags and attributes which are not explicitly
+    allowed. It also rebuilds your explicitly allowed tags and places their
+    attributes in the order in which they appear in your rules.
+
+    So, if you define the following rules:
+
+        my %rules = (
+            ...
+            img => [qw( src alt title width height id / )]
+            ...
+        );
+
+    then your image tags will all be built like this:
+
+        <img src=".." alt="..." title="..." width="..." height="..." id=".." />
+
+    This gives you greater consistency in your tag layout.  If you don't care about
+    element order you don't need to pay any attention to this, but you should be
+    aware that your elements are being reconstructed rather than just stripped
+    down.
+
+    As of 2.1.0, you can also specify a regex to be tested against the attribute
+    value. This feature should be considered experimental for the time being:
+
+        my $hr = HTML::Restrict->new(
+            rules => {
+                iframe => [
+                    qw( width height allowfullscreen ),
+                    {   src         => qr{^http://www\.youtube\.com},
+                        frameborder => qr{^(0|1)$},
+                    }
+                ],
+                img => [ qw( alt ), { src => qr{^/my/images/} }, ],
+            },
+        );
+
+        my $html = '<img src="http://www.example.com/image.jpg" alt="Alt Text">';
+        my $processed = $hr->process( $html );
+
+        # $processed now equals: <img alt="Alt Text">
+
+- `trim => [0|1]`
+
+    By default all leading and trailing spaces will be removed when text is
+    processed.  Set this value to 0 in order to disable this behaviour.
+
+- `uri_schemes => [undef, 'http', 'https', 'irc', ... ]`
+
+    As of version 1.0.3, URI scheme checking is performed on all href and src tag
+    attributes. The following schemes are allowed out of the box.  No action is
+    required on your part:
+
+        [ undef, 'http', 'https' ]
+
+    (undef represents relative URIs). These restrictions have been put in place to
+    prevent XSS in the form of:
+
+        <a href="javascript:alert(document.cookie)">click for cookie!</a>
+
+    See [URI](https://metacpan.org/pod/URI) for more detailed info on scheme parsing.  If, for example, you
+    wanted to filter out every scheme barring SSL, you would do it like this:
+
+        uri_schemes => ['https']
+
+    This feature is new in 1.0.3.  Previous to this, there was no schema checking
+    at all.  Moving forward, you'll need to whitelist explicitly all URI schemas
+    which are not supported by default.  This is in keeping with the whitelisting
+    behaviour of this module and is also the safest possible approach.  Keep in
+    mind that changes to uri\_schemes are not additive, so you'll need to include
+    the defaults in any changes you make, should you wish to keep them:
+
+        # defaults + irc + mailto
+        uri_schemes => [ 'undef', 'http', 'https', 'irc', 'mailto' ]
+
+- allow\_declaration => \[0|1\]
+
+    Set this value to true if you'd like to allow/preserve DOCTYPE declarations in
+    your content.  Useful when cleaning up your own static files or templates. This
+    feature is off by default.
+
+        my $html = q[<!doctype html><body>foo</body>];
+
+        my $hr = HTML::Restrict->new( allow_declaration => 1 );
+        $html = $hr->process( $html );
+        # $html is now: "<!doctype html>foo"
+
+- allow\_comments => \[0|1\]
+
+    Set this value to true if you'd like to allow/preserve HTML comments in your
+    content.  Useful when cleaning up your own static files or templates. This
+    feature is off by default.
+
+        my $html = q[<body><!-- comments! -->foo</body>];
+
+        my $hr = HTML::Restrict->new( allow_comments => 1 );
+        $html = $hr->process( $html );
+        # $html is now: "<!-- comments! -->foo"
+
+- replace\_img => \[0|1|CodeRef\]
+
+    Set the value to true if you'd like to have img tags replaced with
+    `[IMAGE: ...]` containing the alt attribute text.  If you set it to a
+    code reference, you can provide your own replacement (which may
+    even contain HTML).
+
+        sub replacer {
+            my ($tagname, $attr, $text) = @_; # from HTML::Parser
+            return qq{<a href="$attr->{src}">IMAGE: $attr->{alt}</a>};
+        }
+
+        my $hr = HTML::Restrict->new( replace_img => \&replacer );
+
+    This attribute will only take effect if the img tag is not included
+    in the allowed HTML.
+
+- strip\_enclosed\_content => \[0|1\]
+
+    The default behaviour up to 1.0.4 was to preserve the content between script
+    and style tags, even when the tags themselves were being deleted.  So, you'd be
+    left with a bunch of JavaScript or CSS, just with the enclosing tags missing.
+    This is almost never what you want, so starting at 1.0.5 the default will be to
+    remove any script or style info which is enclosed in these tags, unless they
+    have specifically been whitelisted in the rules.  This will be a sane default
+    when cleaning up content submitted via a web form.  However, if you're using
+    HTML::Restrict to purge your own HTML you can be more restrictive.
+
+        # strip the head section, in addition to JS and CSS
+        my $html = '<html><head>...</head><body>...<script>JS here</script>foo';
+
+        my $hr = HTML::Restrict->new(
+            strip_enclosed_content => [ 'script', 'style', 'head' ]
+        );
+
+        $html = $hr->process( $html );
+        # $html is now '<html><body>...foo';
+
+    The caveat here is that HTML::Restrict will not try to fix broken HTML. In the
+    above example, if you have any opening script, style or head tags which don't
+    also include matching closing tags, all following content will be stripped
+    away, regardless of any parent tags.
+
+    Keep in mind that changes to strip\_enclosed\_content are not additive, so if you
+    are adding additional tags you'll need to include the entire list of tags whose
+    enclosed content you'd like to remove.  This feature strips script and style
+    tags by default.
+
+# SUBROUTINES/METHODS
+
+## process( $html )
+
+This is the method which does the real work.  It parses your data, removes any
+tags and attributes which are not specifically allowed and returns the
+resulting text.  Requires and returns a SCALAR.
+
+## get\_rules
+
+Accessor which returns a hash ref of the current rule set.
+
+## get\_uri\_schemes
+
+Accessor which returns an array ref of the current valid uri schemes.
+
+# CAVEATS
+
+Please note that all tag and attribute names passed via the rules param must be
+supplied in lower case.
+
+    # correct
+    my $hr = HTML::Restrict->new( rules => { body => ['onload'] } );
+
+    # throws a fatal error
+    my $hr = HTML::Restrict->new( rules => { Body => ['onLoad'] } );
+
+# MOTIVATION
+
+There are already several modules on the CPAN which accomplish much of the same
+thing, but after doing a lot of poking around, I was unable to find a solution
+with a simple setup which I was happy with.
+
+The most common use case might be stripping HTML from user submitted data
+completely or allowing just a few tags and attributes to be displayed.  With
+the exception of URI scheme checking, this module doesn't do any validation on
+the actual content of the tags or attributes.  If this is a requirement, you
+can either mess with the parser object, post-process the text yourself or have
+a look at one of the more feature-rich modules in the SEE ALSO section below.
+
+My aim here is to keep things easy and, hopefully, cover a lot of the less
+complex use cases with just a few lines of code and some brief documentation.
+The idea is to be up and running quickly.
+
+# SEE ALSO
+
+[HTML::TagFilter](https://metacpan.org/pod/HTML::TagFilter), [HTML::Defang](https://metacpan.org/pod/HTML::Defang), [MojoMojo::Declaw](https://metacpan.org/pod/MojoMojo::Declaw), [HTML::StripScripts](https://metacpan.org/pod/HTML::StripScripts),
+[HTML::Detoxifier](https://metacpan.org/pod/HTML::Detoxifier), HTML::Sanitizer, [HTML::Scrubber](https://metacpan.org/pod/HTML::Scrubber)
+
+# ACKNOWLEDGEMENTS
+
+Thanks to Raybec Communications [http://www.raybec.com](http://www.raybec.com) for funding my
+work on this module and for releasing it to the world.
+
+Thanks also to the following for patches, bug reports and assistance:
+
+Mark Jubenville (ioncache)
+
+Duncan Forsyth
+
+Rick Moore
+
+Arthur Axel 'fREW' Schmidt
+
+perlpong
+
+David Golden
+
+Graham TerMarsch
+
+Dagfinn Ilmari Mannsåker
+
+Graham Knop
+
+Carwyn Ellis
+
+# AUTHOR
+
+Olaf Alders <olaf at wundercounter.com>
+
+# COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
diff --git a/cpanfile b/cpanfile
new file mode 100644
index 0000000..5b011ff
--- /dev/null
+++ b/cpanfile
@@ -0,0 +1,35 @@
+requires "Carp" => "0";
+requires "Data::Dump" => "0";
+requires "HTML::Entities" => "0";
+requires "HTML::Parser" => "0";
+requires "List::Util" => "1.33";
+requires "Moo" => "1.002000";
+requires "Scalar::Util" => "0";
+requires "Sub::Quote" => "0";
+requires "Types::Standard" => "1.000001";
+requires "URI" => "0";
+requires "namespace::clean" => "0";
+requires "perl" => "5.006";
+requires "strict" => "0";
+
+on 'test' => sub {
+  requires "Test::Fatal" => "0";
+  requires "Test::More" => "0";
+  requires "perl" => "5.006";
+  requires "warnings" => "0";
+};
+
+on 'configure' => sub {
+  requires "ExtUtils::MakeMaker" => "0";
+  requires "perl" => "5.006";
+};
+
+on 'develop' => sub {
+  requires "Pod::Coverage::TrustPod" => "0";
+  requires "Test::CPAN::Changes" => "0.19";
+  requires "Test::Code::TidyAll" => "0.50";
+  requires "Test::More" => "0.88";
+  requires "Test::Pod::Coverage" => "1.08";
+  requires "Test::Spelling" => "0.12";
+  requires "Test::Synopsis" => "0";
+};
diff --git a/dist.ini b/dist.ini
new file mode 100644
index 0000000..bc62ed5
--- /dev/null
+++ b/dist.ini
@@ -0,0 +1,9 @@
+name    = HTML-Restrict
+author  = Olaf Alders <olaf at wundercounter.com>
+license = Perl_5
+copyright_holder = Olaf Alders
+copyright_year   = 2013-2017
+version = 2.2.4
+main_module = lib/HTML/Restrict.pm
+
+[@Author::OALDERS]
diff --git a/examples/sanitize_file.pl b/examples/sanitize_file.pl
new file mode 100755
index 0000000..c2c3e31
--- /dev/null
+++ b/examples/sanitize_file.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use File::Slurp;
+use HTML::Restrict;
+
+my $filename = shift @ARGV;
+die "usage: perl $0 path/to/file > path/to/new/file" if !$filename;
+
+my $text = read_file($filename);
+
+my $hr = HTML::Restrict->new;
+print $hr->process($text);
+
+=pod
+
+=head1 SYNOPSIS
+
+    perl sanitize_file path/to/filename > path/to/sanitized/file
+
+=cut
diff --git a/lib/HTML/Restrict.pm b/lib/HTML/Restrict.pm
new file mode 100644
index 0000000..bb299e4
--- /dev/null
+++ b/lib/HTML/Restrict.pm
@@ -0,0 +1,686 @@
+use strict;
+use 5.006;
+
+package HTML::Restrict;
+$HTML::Restrict::VERSION = '2.2.4';
+use Carp qw( croak );
+use Data::Dump qw( dump );
+use HTML::Parser;
+use HTML::Entities qw( encode_entities );
+use Types::Standard 1.000001 qw[ Bool HashRef ArrayRef CodeRef ];
+use List::Util 1.33 qw( any none );
+use Scalar::Util qw( reftype weaken );
+use Sub::Quote 'quote_sub';
+use URI;
+
+use Moo 1.002000;
+use namespace::clean;
+
+has 'allow_comments' => (
+    is      => 'rw',
+    isa     => Bool,
+    default => 0,
+);
+
+has 'allow_declaration' => (
+    is      => 'rw',
+    isa     => Bool,
+    default => 0,
+);
+
+has 'debug' => (
+    is      => 'rw',
+    isa     => Bool,
+    default => 0,
+);
+
+has 'parser' => (
+    is      => 'ro',
+    lazy    => 1,
+    builder => '_build_parser',
+);
+
+has 'rules' => (
+    is       => 'rw',
+    isa      => HashRef,
+    required => 0,
+    default  => quote_sub(q{ {} }),
+    trigger  => \&_build_parser,
+    reader   => 'get_rules',
+    writer   => 'set_rules',
+);
+
+has 'strip_enclosed_content' => (
+    is      => 'rw',
+    isa     => ArrayRef,
+    default => sub { [ 'script', 'style' ] },
+);
+
+has 'replace_img' => (
+    is      => 'rw',
+    isa     => Bool | CodeRef,
+    default => 0,
+);
+
+has 'trim' => (
+    is      => 'rw',
+    isa     => Bool,
+    default => 1,
+);
+
+has 'uri_schemes' => (
+    is       => 'rw',
+    isa      => ArrayRef,
+    required => 0,
+    default  => sub { [ undef, 'http', 'https' ] },
+    reader   => 'get_uri_schemes',
+    writer   => 'set_uri_schemes',
+);
+
+has '_processed' => (
+    is  => 'rw',
+    isa => quote_sub(
+        q{
+        die "$_[0] is not false or a string!"
+            unless !defined($_[0]) || $_[0] eq "" || "$_[0]" eq '0' || ref(\$_[0]) eq 'SCALAR'
+    }
+    ),
+    clearer => '_clear_processed',
+);
+
+has '_stripper_stack' => (
+    is      => 'rw',
+    isa     => ArrayRef,
+    default => sub { [] },
+);
+
+sub _build_parser {
+
+    my $self  = shift;
+    my $rules = shift;
+
+    # don't allow any upper case tag or attribute names
+    # these rules would otherwise silently be ignored
+    if ($rules) {
+        foreach my $tag_name ( keys %{$rules} ) {
+            if ( lc $tag_name ne $tag_name ) {
+                croak "All tag names must be lower cased";
+            }
+            if ( reftype $rules->{$tag_name} eq 'ARRAY' ) {
+                my @attr_names;
+                foreach my $attr_item ( @{ $rules->{$tag_name} } ) {
+                    ref $attr_item eq 'HASH'
+                        ? push( @attr_names, keys(%$attr_item) )
+                        : push( @attr_names, $attr_item );
+                }
+                for (@attr_names) {
+                    croak "All attribute names must be lower cased"
+                        if lc $_ ne $_;
+                }
+            }
+        }
+    }
+
+    weaken($self);
+    return HTML::Parser->new(
+
+        start_h => [
+            sub {
+                my ( $p, $tagname, $attr, $text ) = @_;
+                print "starting tag:  $tagname", "\n" if $self->debug;
+                my $more = q{};
+
+                if ( any { $_ eq $tagname } keys %{ $self->get_rules } ) {
+                    print dump $attr if $self->debug;
+
+                    foreach my $source_type ( 'href', 'src', 'cite' ) {
+
+                        if ( $attr->{$source_type} ) {
+                            my $uri = URI->new( $attr->{$source_type} );
+                            if ( defined $uri->scheme ) {
+                                delete $attr->{$source_type}
+                                    if none { $_ eq $uri->scheme }
+                                grep defined, @{ $self->get_uri_schemes };
+                            }
+                            else {    # relative uri
+                                delete $attr->{$source_type}
+                                    unless grep !defined,
+                                    @{ $self->get_uri_schemes };
+                            }
+                        }
+                    }
+
+                    foreach
+                        my $attr_item ( @{ $self->get_rules->{$tagname} } ) {
+                        if ( ref $attr_item eq 'HASH' ) {
+
+                            # validate against regex contraints
+                            for my $attr_name ( sort keys %$attr_item ) {
+                                if ( exists $attr->{$attr_name} ) {
+                                    my $value = encode_entities(
+                                        $attr->{$attr_name} );
+                                    $more .= qq[ $attr_name="$value" ]
+                                        if $attr->{$attr_name}
+                                        =~ $attr_item->{$attr_name};
+                                }
+                            }
+                        }
+                        else {
+                            my $attr_name = $attr_item;
+                            if ( exists $attr->{$attr_name} ) {
+                                my $value
+                                    = encode_entities( $attr->{$attr_name} );
+                                $more .= qq[ $attr_name="$value" ]
+                                    unless $attr_name eq q{/};
+                            }
+                        }
+                    }
+
+                    # closing slash should (naturally) close the tag
+                    if ( exists $attr->{q{/}} && $attr->{q{/}} eq q{/} ) {
+                        $more .= ' /';
+                    }
+
+                    my $elem = "<$tagname $more>";
+                    $elem =~ s{\s*>}{>}gxms;
+                    $elem =~ s{\s+}{ }gxms;
+
+                    $self->_processed( ( $self->_processed || q{} ) . $elem );
+                }
+                elsif ( $tagname eq 'img' && $self->replace_img ) {
+                    my $alt;
+                    if ( ref $self->replace_img ) {
+                        $alt = $self->replace_img->( $tagname, $attr, $text );
+                    }
+                    else {
+                        $alt
+                            = defined( $attr->{alt} ) ? ": $attr->{alt}" : "";
+                        $alt = "[IMAGE$alt]";
+                    }
+                    $self->_processed( ( $self->_processed || q{} ) . $alt );
+                }
+                elsif (
+                    any { $_ eq $tagname }
+                    @{ $self->strip_enclosed_content }
+                    ) {
+                    print "adding $tagname to strippers" if $self->debug;
+                    push @{ $self->_stripper_stack }, $tagname;
+                }
+
+            },
+            "self,tagname,attr,text"
+        ],
+
+        end_h => [
+            sub {
+                my ( $p, $tagname, $attr, $text ) = @_;
+                print "end: $text\n" if $self->debug;
+                if ( any { $_ eq $tagname } keys %{ $self->get_rules } ) {
+                    $self->_processed( ( $self->_processed || q{} ) . $text );
+                }
+                elsif ( any { $_ eq $tagname } @{ $self->_stripper_stack } ) {
+                    $self->_delete_tag_from_stack($tagname);
+                }
+
+            },
+            "self,tagname,attr,text"
+        ],
+
+        text_h => [
+            sub {
+                my ( $p, $text ) = @_;
+                print "text: $text\n" if $self->debug;
+                if ( !@{ $self->_stripper_stack } ) {
+                    $self->_processed( ( $self->_processed || q{} ) . $text );
+                }
+            },
+            "self,text"
+        ],
+
+        comment_h => [
+            sub {
+                my ( $p, $text ) = @_;
+                print "comment: $text\n" if $self->debug;
+                if ( $self->allow_comments ) {
+                    $self->_processed( ( $self->_processed || q{} ) . $text );
+                }
+            },
+            "self,text"
+        ],
+
+        declaration_h => [
+            sub {
+                my ( $p, $text ) = @_;
+                print "declaration: $text\n" if $self->debug;
+                if ( $self->allow_declaration ) {
+                    $self->_processed( ( $self->_processed || q{} ) . $text );
+                }
+            },
+            "self,text"
+        ],
+
+    );
+
+}
+
+sub process {
+
+    my $self = shift;
+
+    # returns undef if no value was passed
+    return if !@_;
+    return $_[0] if !$_[0];
+
+    my ($content) = @_;
+    die 'content must be a string!'
+        unless ref( \$content ) eq 'SCALAR';
+    $self->_clear_processed;
+
+    my $parser = $self->parser;
+    $parser->parse($content);
+    $parser->eof;
+
+    my $text = $self->_processed;
+
+    if ( $self->trim && $text ) {
+        $text =~ s{\A\s*}{}gxms;
+        $text =~ s{\s*\z}{}gxms;
+    }
+    $self->_processed($text);
+
+    # ensure stripper stack is reset in case of broken html
+    $self->_stripper_stack( [] );
+
+    return $self->_processed;
+
+}
+
+# strip_enclosed_content tags could be nested in the source HTML, so we
+# maintain a stack of these tags.
+
+sub _delete_tag_from_stack {
+
+    my $self        = shift;
+    my $closing_tag = shift;
+
+    my $found    = 0;
+    my @tag_list = ();
+
+    foreach my $tag ( reverse @{ $self->_stripper_stack } ) {
+        if ( $tag eq $closing_tag && $found == 0 ) {
+            $found = 1;
+            next;
+        }
+        push @tag_list, $tag;
+    }
+
+    $self->_stripper_stack( [ reverse @tag_list ] );
+
+    return;
+}
+
+1;    # End of HTML::Restrict
+
+# ABSTRACT: Strip unwanted HTML tags and attributes
+
+__END__
+
+=pod
+
+=encoding UTF-8
+
+=head1 NAME
+
+HTML::Restrict - Strip unwanted HTML tags and attributes
+
+=head1 VERSION
+
+version 2.2.4
+
+=head1 SYNOPSIS
+
+    use HTML::Restrict;
+
+    my $hr = HTML::Restrict->new();
+
+    # use default rules to start with (strip away all HTML)
+    my $processed = $hr->process('  <b>i am bold</b>  ');
+
+    # $processed now equals: 'i am bold'
+
+    # Now, a less restrictive example:
+    use HTML::Restrict;
+
+    my $hr = HTML::Restrict->new(
+        rules => {
+            b   => [],
+            img => [qw( src alt / )]
+        }
+    );
+
+    my $html = q[<body><b>hello</b> <img src="pic.jpg" alt="me" id="test" /></body>];
+    my $processed = $hr->process( $html );
+
+    # $processed now equals: <b>hello</b> <img src="pic.jpg" alt="me" />
+
+=head1 DESCRIPTION
+
+This module uses L<HTML::Parser> to strip HTML from text in a restrictive
+manner.  By default all HTML is restricted.  You may alter the default
+behaviour by supplying your own tag rules.
+
+=head1 CONSTRUCTOR AND STARTUP
+
+=head2 new()
+
+Creates and returns a new HTML::Restrict object.
+
+    my $hr = HTML::Restrict->new()
+
+HTML::Restrict doesn't require any params to be passed to new.  If your goal is
+to remove all HTML from text, then no further setup is required.  Just pass
+your text to the process() method and you're done:
+
+    my $plain_text = $hr->process( $html );
+
+If you need to set up specific rules, have a look at the params which
+HTML::Restrict recognizes:
+
+=over 4
+
+=item * C<< rules => \%rules >>
+
+Sets the rules which will be used to process your data.  By default all HTML
+tags are off limits.  Use this argument to define the HTML elements and
+corresponding attributes you'd like to use.  Essentially, consider the default
+behaviour to be:
+
+    rules => {}
+
+Rules should be passed as a HASHREF of allowed tags.  Each hash value should
+represent the allowed attributes for the listed tag.  For example, if you want
+to allow a fair amount of HTML, you can try something like this:
+
+    my %rules = (
+        a       => [qw( href target )],
+        b       => [],
+        caption => [],
+        center  => [],
+        em      => [],
+        i       => [],
+        img     => [qw( alt border height width src style )],
+        li      => [],
+        ol      => [],
+        p       => [qw(style)],
+        span    => [qw(style)],
+        strong  => [],
+        sub     => [],
+        sup     => [],
+        table   => [qw( style border cellspacing cellpadding align )],
+        tbody   => [],
+        td      => [],
+        tr      => [],
+        u       => [],
+        ul      => [],
+    );
+
+    my $hr = HTML::Restrict->new( rules => \%rules )
+
+Or, to allow only bolded text:
+
+    my $hr = HTML::Restrict->new( rules => { b => [] } );
+
+Allow bolded text, images and some (but not all) image attributes:
+
+    my %rules = (
+        b   => [ ],
+        img => [qw( src alt width height border / )
+    );
+    my $hr = HTML::Restrict->new( rules => \%rules );
+
+Since L<HTML::Parser> treats a closing slash as an attribute, you'll need to
+add "/" to your list of allowed attributes if you'd like your tags to retain
+closing slashes.  For example:
+
+    my $hr = HTML::Restrict->new( rules =>{ hr => [] } );
+    $hr->process( "<hr />"); # returns: <hr>
+
+    my $hr = HTML::Restrict->new( rules =>{ hr => [qw( / )] } );
+    $hr->process( "<hr />"); # returns: <hr />
+
+HTML::Restrict strips away any tags and attributes which are not explicitly
+allowed. It also rebuilds your explicitly allowed tags and places their
+attributes in the order in which they appear in your rules.
+
+So, if you define the following rules:
+
+    my %rules = (
+        ...
+        img => [qw( src alt title width height id / )]
+        ...
+    );
+
+then your image tags will all be built like this:
+
+    <img src=".." alt="..." title="..." width="..." height="..." id=".." />
+
+This gives you greater consistency in your tag layout.  If you don't care about
+element order you don't need to pay any attention to this, but you should be
+aware that your elements are being reconstructed rather than just stripped
+down.
+
+As of 2.1.0, you can also specify a regex to be tested against the attribute
+value. This feature should be considered experimental for the time being:
+
+    my $hr = HTML::Restrict->new(
+        rules => {
+            iframe => [
+                qw( width height allowfullscreen ),
+                {   src         => qr{^http://www\.youtube\.com},
+                    frameborder => qr{^(0|1)$},
+                }
+            ],
+            img => [ qw( alt ), { src => qr{^/my/images/} }, ],
+        },
+    );
+
+    my $html = '<img src="http://www.example.com/image.jpg" alt="Alt Text">';
+    my $processed = $hr->process( $html );
+
+    # $processed now equals: <img alt="Alt Text">
+
+=item * C<< trim => [0|1] >>
+
+By default all leading and trailing spaces will be removed when text is
+processed.  Set this value to 0 in order to disable this behaviour.
+
+=item * C<< uri_schemes => [undef, 'http', 'https', 'irc', ... ] >>
+
+As of version 1.0.3, URI scheme checking is performed on all href and src tag
+attributes. The following schemes are allowed out of the box.  No action is
+required on your part:
+
+    [ undef, 'http', 'https' ]
+
+(undef represents relative URIs). These restrictions have been put in place to
+prevent XSS in the form of:
+
+    <a href="javascript:alert(document.cookie)">click for cookie!</a>
+
+See L<URI> for more detailed info on scheme parsing.  If, for example, you
+wanted to filter out every scheme barring SSL, you would do it like this:
+
+    uri_schemes => ['https']
+
+This feature is new in 1.0.3.  Previous to this, there was no schema checking
+at all.  Moving forward, you'll need to whitelist explicitly all URI schemas
+which are not supported by default.  This is in keeping with the whitelisting
+behaviour of this module and is also the safest possible approach.  Keep in
+mind that changes to uri_schemes are not additive, so you'll need to include
+the defaults in any changes you make, should you wish to keep them:
+
+    # defaults + irc + mailto
+    uri_schemes => [ 'undef', 'http', 'https', 'irc', 'mailto' ]
+
+=item * allow_declaration => [0|1]
+
+Set this value to true if you'd like to allow/preserve DOCTYPE declarations in
+your content.  Useful when cleaning up your own static files or templates. This
+feature is off by default.
+
+    my $html = q[<!doctype html><body>foo</body>];
+
+    my $hr = HTML::Restrict->new( allow_declaration => 1 );
+    $html = $hr->process( $html );
+    # $html is now: "<!doctype html>foo"
+
+=item * allow_comments => [0|1]
+
+Set this value to true if you'd like to allow/preserve HTML comments in your
+content.  Useful when cleaning up your own static files or templates. This
+feature is off by default.
+
+    my $html = q[<body><!-- comments! -->foo</body>];
+
+    my $hr = HTML::Restrict->new( allow_comments => 1 );
+    $html = $hr->process( $html );
+    # $html is now: "<!-- comments! -->foo"
+
+=item * replace_img => [0|1|CodeRef]
+
+Set the value to true if you'd like to have img tags replaced with
+C<[IMAGE: ...]> containing the alt attribute text.  If you set it to a
+code reference, you can provide your own replacement (which may
+even contain HTML).
+
+    sub replacer {
+        my ($tagname, $attr, $text) = @_; # from HTML::Parser
+        return qq{<a href="$attr->{src}">IMAGE: $attr->{alt}</a>};
+    }
+
+    my $hr = HTML::Restrict->new( replace_img => \&replacer );
+
+This attribute will only take effect if the img tag is not included
+in the allowed HTML.
+
+=item * strip_enclosed_content => [0|1]
+
+The default behaviour up to 1.0.4 was to preserve the content between script
+and style tags, even when the tags themselves were being deleted.  So, you'd be
+left with a bunch of JavaScript or CSS, just with the enclosing tags missing.
+This is almost never what you want, so starting at 1.0.5 the default will be to
+remove any script or style info which is enclosed in these tags, unless they
+have specifically been whitelisted in the rules.  This will be a sane default
+when cleaning up content submitted via a web form.  However, if you're using
+HTML::Restrict to purge your own HTML you can be more restrictive.
+
+    # strip the head section, in addition to JS and CSS
+    my $html = '<html><head>...</head><body>...<script>JS here</script>foo';
+
+    my $hr = HTML::Restrict->new(
+        strip_enclosed_content => [ 'script', 'style', 'head' ]
+    );
+
+    $html = $hr->process( $html );
+    # $html is now '<html><body>...foo';
+
+The caveat here is that HTML::Restrict will not try to fix broken HTML. In the
+above example, if you have any opening script, style or head tags which don't
+also include matching closing tags, all following content will be stripped
+away, regardless of any parent tags.
+
+Keep in mind that changes to strip_enclosed_content are not additive, so if you
+are adding additional tags you'll need to include the entire list of tags whose
+enclosed content you'd like to remove.  This feature strips script and style
+tags by default.
+
+=back
+
+=head1 SUBROUTINES/METHODS
+
+=head2 process( $html )
+
+This is the method which does the real work.  It parses your data, removes any
+tags and attributes which are not specifically allowed and returns the
+resulting text.  Requires and returns a SCALAR.
+
+=head2 get_rules
+
+Accessor which returns a hash ref of the current rule set.
+
+=head2 get_uri_schemes
+
+Accessor which returns an array ref of the current valid uri schemes.
+
+=head1 CAVEATS
+
+Please note that all tag and attribute names passed via the rules param must be
+supplied in lower case.
+
+    # correct
+    my $hr = HTML::Restrict->new( rules => { body => ['onload'] } );
+
+    # throws a fatal error
+    my $hr = HTML::Restrict->new( rules => { Body => ['onLoad'] } );
+
+=head1 MOTIVATION
+
+There are already several modules on the CPAN which accomplish much of the same
+thing, but after doing a lot of poking around, I was unable to find a solution
+with a simple setup which I was happy with.
+
+The most common use case might be stripping HTML from user submitted data
+completely or allowing just a few tags and attributes to be displayed.  With
+the exception of URI scheme checking, this module doesn't do any validation on
+the actual content of the tags or attributes.  If this is a requirement, you
+can either mess with the parser object, post-process the text yourself or have
+a look at one of the more feature-rich modules in the SEE ALSO section below.
+
+My aim here is to keep things easy and, hopefully, cover a lot of the less
+complex use cases with just a few lines of code and some brief documentation.
+The idea is to be up and running quickly.
+
+=head1 SEE ALSO
+
+L<HTML::TagFilter>, L<HTML::Defang>, L<MojoMojo::Declaw>, L<HTML::StripScripts>,
+L<HTML::Detoxifier>, HTML::Sanitizer, L<HTML::Scrubber>
+
+=head1 ACKNOWLEDGEMENTS
+
+Thanks to Raybec Communications L<http://www.raybec.com> for funding my
+work on this module and for releasing it to the world.
+
+Thanks also to the following for patches, bug reports and assistance:
+
+Mark Jubenville (ioncache)
+
+Duncan Forsyth
+
+Rick Moore
+
+Arthur Axel 'fREW' Schmidt
+
+perlpong
+
+David Golden
+
+Graham TerMarsch
+
+Dagfinn Ilmari Mannsåker
+
+Graham Knop
+
+Carwyn Ellis
+
+=head1 AUTHOR
+
+Olaf Alders <olaf at wundercounter.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+This software is copyright (c) 2013-2017 by Olaf Alders.
+
+This is free software; you can redistribute it and/or modify it under
+the same terms as the Perl 5 programming language system itself.
+
+=cut
diff --git a/perlcriticrc b/perlcriticrc
new file mode 100644
index 0000000..7819a28
--- /dev/null
+++ b/perlcriticrc
@@ -0,0 +1,86 @@
+severity = 3
+verbose = 11
+
+theme = core + pbp + bugs + maintenance + cosmetic + complexity + security + tests + moose
+
+exclude = Subroutines::ProhibitCallsToUndeclaredSubs
+
+[BuiltinFunctions::ProhibitStringySplit]
+severity = 3
+
+[CodeLayout::RequireTrailingCommas]
+severity = 3
+
+[ControlStructures::ProhibitCStyleForLoops]
+severity = 3
+
+[InputOutput::RequireCheckedSyscalls]
+functions = :builtins
+exclude_functions = sleep
+severity = 3
+
+[Moose::RequireCleanNamespace]
+modules = Moose Moose::Role MooseX::Role::Parameterized Moose::Util::TypeConstraints
+cleaners = namespace::autoclean
+
+[NamingConventions::Capitalization]
+package_exemptions = [A-Z]\w+|minFraud
+file_lexical_variables = [A-Z]\w+|[^A-Z]+
+global_variables = :starts_with_upper
+scoped_lexical_variables = [A-Z]\w+|[^A-Z]+
+severity = 3
+
+# Given our code base, leaving this at 5 would be a huge pain
+[Subroutines::ProhibitManyArgs]
+max_arguments = 10
+
+[RegularExpressions::ProhibitComplexRegexes]
+max_characters = 200
+
+[RegularExpressions::ProhibitUnusualDelimiters]
+severity = 3
+
+[Subroutines::ProhibitUnusedPrivateSubroutines]
+private_name_regex = _(?!build)\w+
+skip_when_using = Moo::Role Moose::Role MooseX::Role::Parameterized Role::Tiny Test::Class::Moose::Role
+
+[TestingAndDebugging::ProhibitNoWarnings]
+allow = redefine
+
+[ValuesAndExpressions::ProhibitEmptyQuotes]
+severity = 3
+
+[ValuesAndExpressions::ProhibitInterpolationOfLiterals]
+severity = 3
+
+[ValuesAndExpressions::RequireUpperCaseHeredocTerminator]
+severity = 3
+
+[Variables::ProhibitPackageVars]
+add_packages = Test::Builder
+
+[TestingAndDebugging::RequireUseStrict]
+
+[TestingAndDebugging::RequireUseWarnings]
+
+[-ControlStructures::ProhibitCascadingIfElse]
+
+[-ErrorHandling::RequireCarping]
+[-InputOutput::RequireBriefOpen]
+
+[-ValuesAndExpressions::ProhibitConstantPragma]
+
+# No need for /xsm everywhere
+[-RegularExpressions::RequireDotMatchAnything]
+[-RegularExpressions::RequireExtendedFormatting]
+[-RegularExpressions::RequireLineBoundaryMatching]
+
+[-Subroutines::ProhibitExplicitReturnUndef]
+
+# http://stackoverflow.com/questions/2275317/why-does-perlcritic-dislike-using-shift-to-populate-subroutine-variables
+[-Subroutines::RequireArgUnpacking]
+
+[-Subroutines::RequireFinalReturn]
+
+# "use v5.14" is more readable than "use 5.014"
+[-ValuesAndExpressions::ProhibitVersionStrings]
diff --git a/perltidyrc b/perltidyrc
new file mode 100644
index 0000000..b7ed624
--- /dev/null
+++ b/perltidyrc
@@ -0,0 +1,12 @@
+--blank-lines-before-packages=0
+--iterations=2
+--no-outdent-long-comments
+-b
+-bar
+-boc
+-ci=4
+-i=4
+-l=78
+-nolq
+-se
+-wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x="
diff --git a/t/00-load.t b/t/00-load.t
new file mode 100644
index 0000000..1c1421c
--- /dev/null
+++ b/t/00-load.t
@@ -0,0 +1,125 @@
+#!perl
+
+use Test::More;
+
+use strict;
+use warnings;
+
+use Data::Dump;
+use HTML::Restrict;
+use Scalar::Util;
+
+my $version = $HTML::Restrict::VERSION || 'development';
+diag( "Testing HTML::Restrict $version, Perl $], $^X" );
+
+my $hr = HTML::Restrict->new( debug => 0 );
+isa_ok( $hr, 'HTML::Restrict' );
+
+isa_ok( $hr->parser, 'HTML::Parser' );
+
+my $default_rules = $hr->get_rules;
+
+cmp_ok( Scalar::Util::reftype( $default_rules ),
+    'eq', 'HASH', "default rules are empty" );
+
+# basic stripping -- all tags strippped
+my $bold      = '<b>i am bold</b>';
+my $processed = $hr->process( $bold );
+cmp_ok( $processed, 'eq', 'i am bold', "b tag stripped" );
+
+# updating rules
+my $b_rules = { b => [] };
+$hr->set_rules( $b_rules );
+my $updated_rules = $hr->get_rules;
+is_deeply( $b_rules, $updated_rules, "rules update correctly" );
+
+# ensure allowed tags aren't stripped
+$processed = $hr->process( $bold );
+cmp_ok( $processed, 'eq', $bold, "b tag not stripped" );
+
+# more complex set with multiple tags
+# ensure allowed tags aren't stripped and others are removed
+$hr->set_rules( { a => [qw( href target )] } );
+my $link
+    = q[<center><a href="http://google.com" target="_blank" id="test">google</a></center>];
+my $processed_link = $hr->process( $link );
+cmp_ok(
+    $processed_link, 'eq',
+    q[<a href="http://google.com" target="_blank">google</a>],
+    "allowed link but not center tag",
+);
+
+# ensure closing slash is maintained for tags
+# with no end tag
+$hr->set_rules( { img => [qw( src width height /)] } );
+my $img = q[<body><img src="/face.jpg" width="10" height="10" /></body>];
+my $processed_img = $hr->process( $img );
+cmp_ok(
+    $processed_img, 'eq',
+    '<img src="/face.jpg" width="10" height="10" />',
+    "closing slash preserved in image"
+);
+
+# rest rules to default set
+$hr->set_rules( {} );
+cmp_ok( $hr->process( $bold ), 'eq', 'i am bold', "back to default rules" );
+
+# stripping of comments
+cmp_ok( $hr->process( "<!-- comment this -->ok" ),
+    'eq', 'ok', "comments are stripped" );
+
+# stripping of javascript includes
+cmp_ok(
+    $hr->process(
+        q{<script type="text/javascript" src="/js/jquery-1.3.2.js"></script>ok}
+    ),
+    'eq', 'ok',
+    "javascript includes are stripped"
+);
+
+# stripping of css includes
+cmp_ok(
+    $hr->process(
+        q{<link href="/style.css" media="screen" rel="stylesheet" type="text/css" />ok}
+    ),
+    'eq', 'ok',
+    "css includes are stripped"
+);
+
+ok( $hr->trim, "trim enabled by default" );
+
+# stripping of leading and trailing spaces
+cmp_ok( $hr->process( "   ok ok ok  " ),
+    'eq', 'ok ok ok', "leading and trailing spaces trimmed" );
+
+# stripping of div tags
+cmp_ok( $hr->process( "<div>ok</div>" ),
+    'eq', 'ok', "divs are stripped away" );
+
+# undef should be returned when no value is passed to the process method
+is( $hr->process(), undef, "undef is returned when no value passed" );
+
+
+# start fresh
+# RT #55775
+$hr = HTML::Restrict->new;
+
+cmp_ok( $hr->process( 0 ), 'eq', '0', "untrue values not processed");
+cmp_ok( $hr->process( '0' ), 'eq', '0', "untrue values not processed");
+cmp_ok( $hr->process( '000' ), 'eq', '000', "untrue values not processed");
+
+ok( !$hr->process("<html>"), "process only HTML" );
+
+
+# bugfix: check URI scheme for "src" attributes
+$hr = HTML::Restrict->new( rules => { img => [qw( src )] } );
+$hr->set_uri_schemes( [ undef, 'http', 'https' ] );
+cmp_ok( $hr->process('<img src="file:/some/file">'), 'eq', '<img>' );
+
+# bugfix: ensure stripper stack is reset in case of broken html
+$hr = HTML::Restrict->new;
+$hr->strip_enclosed_content( ['script'] );
+$hr->process('<script < b >');
+cmp_ok($hr->process('some text'), 'eq', 'some text', "stripper stack reset");
+
+done_testing();
diff --git a/t/attribute_constraints.t b/t/attribute_constraints.t
new file mode 100644
index 0000000..1f58bc8
--- /dev/null
+++ b/t/attribute_constraints.t
@@ -0,0 +1,66 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new(
+    rules => {
+        iframe => [
+            qw( width height ),
+            {
+                src         => qr{^http://www\.youtube\.com},
+                frameborder => qr{^(0|1)$},
+            }
+        ],
+    },
+);
+
+cmp_ok(
+    $hr->process(
+        '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'
+    ),
+    'eq',
+    '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>',
+    'all constraints pass',
+);
+
+cmp_ok(
+    $hr->process(
+        '<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="0"></iframe>'
+    ),
+    'eq',
+    '<iframe width="560" height="315" frameborder="0"></iframe>',
+    'one constraint fails',
+);
+
+cmp_ok(
+    $hr->process(
+        '<iframe width="560" height="315" src="http://www.hostile.com/" frameborder="A"></iframe>'
+    ),
+    'eq',
+    '<iframe width="560" height="315"></iframe>',
+    'two constraints fail',
+);
+
+$hr = HTML::Restrict->new(
+    rules => {
+        iframe => [
+            { src         => qr{^http://www\.youtube\.com} },
+            { frameborder => qr{^(0|1)$} },
+            { height      => qr{^315$} },
+            { width       => qr{^560$} },
+        ],
+    },
+);
+
+cmp_ok(
+    $hr->process(
+        '<iframe width="560" height="315" frameborder="0" src="http://www.youtube.com/embed/9gKeRZM2Iyc"></iframe>'
+    ),
+    'eq',
+    '<iframe src="http://www.youtube.com/embed/9gKeRZM2Iyc" frameborder="0" height="315" width="560"></iframe>',
+    'possible to maintain order',
+);
+
+done_testing;
diff --git a/t/author-pod-coverage.t b/t/author-pod-coverage.t
new file mode 100644
index 0000000..243340f
--- /dev/null
+++ b/t/author-pod-coverage.t
@@ -0,0 +1,15 @@
+#!perl
+
+BEGIN {
+  unless ($ENV{AUTHOR_TESTING}) {
+    print qq{1..0 # SKIP these tests are for testing by the author\n};
+    exit
+  }
+}
+
+# This file was automatically generated by Dist::Zilla::Plugin::PodCoverageTests.
+
+use Test::Pod::Coverage 1.08;
+use Pod::Coverage::TrustPod;
+
+all_pod_coverage_ok({ coverage_class => 'Pod::Coverage::TrustPod' });
diff --git a/t/author-pod-spell.t b/t/author-pod-spell.t
new file mode 100644
index 0000000..8b32a99
--- /dev/null
+++ b/t/author-pod-spell.t
@@ -0,0 +1,74 @@
+
+BEGIN {
+  unless ($ENV{AUTHOR_TESTING}) {
+    print qq{1..0 # SKIP these tests are for testing by the author\n};
+    exit
+  }
+}
+
+use strict;
+use warnings;
+use Test::More;
+
+# generated by Dist::Zilla::Plugin::Test::PodSpelling 2.007004
+use Test::Spelling 0.12;
+use Pod::Wordlist;
+
+
+add_stopwords(<DATA>);
+all_pod_files_spelling_ok( qw( bin lib ) );
+__DATA__
+Alders
+Alders'
+Arthur
+Axel
+Ben
+Bullock
+Carwyn
+DOCTYPE
+Dagfinn
+David
+Elliott
+Etheridge
+Fitz
+Forsyth
+Golden
+Graham
+HTML
+Ilmari
+Jubenville
+Karen
+Knop
+Mannsåker
+Mark
+Olaf
+Raybec
+Restrict
+Schmidt
+TerMarsch
+XSS
+benkasminbullock
+bolded
+calyx238
+dagolden
+ether
+fREW
+fitz
+frioux
+graham
+haarg
+href
+img
+ioncache
+lib
+mark
+olaf
+param
+params
+perlpong
+sam
+schemas
+skaufman
+src
+whitelisted
+whitelisting
diff --git a/t/author-synopsis.t b/t/author-synopsis.t
new file mode 100644
index 0000000..5d1d4a7
--- /dev/null
+++ b/t/author-synopsis.t
@@ -0,0 +1,13 @@
+#!perl
+
+BEGIN {
+  unless ($ENV{AUTHOR_TESTING}) {
+    print qq{1..0 # SKIP these tests are for testing by the author\n};
+    exit
+  }
+}
+
+
+use Test::Synopsis;
+
+all_synopsis_ok();
diff --git a/t/author-tidyall.t b/t/author-tidyall.t
new file mode 100644
index 0000000..f161a3e
--- /dev/null
+++ b/t/author-tidyall.t
@@ -0,0 +1,19 @@
+
+BEGIN {
+  unless ($ENV{AUTHOR_TESTING}) {
+    print qq{1..0 # SKIP these tests are for testing by the author\n};
+    exit
+  }
+}
+
+# This file was automatically generated by Dist::Zilla::Plugin::Test::TidyAll v$VERSION
+
+use Test::More 0.88;
+use Test::Code::TidyAll 0.24;
+
+tidyall_ok(
+     verbose => ( exists $ENV{TEST_TIDYALL_VERBOSE} ? $ENV{TEST_TIDYALL_VERBOSE} : 0 ),
+     jobs => ( exists $ENV{TEST_TIDYALL_JOBS} ? $ENV{TEST_TIDYALL_JOBS} : 1 ),
+);
+
+done_testing;
diff --git a/t/comments.t b/t/comments.t
new file mode 100644
index 0000000..2ceccbb
--- /dev/null
+++ b/t/comments.t
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use HTML::Restrict;
+use Test::More;
+
+my $hr = HTML::Restrict->new;
+
+my $text = '<!-- comment here -->stuff';
+$hr->debug(0);
+
+is $hr->process($text), 'stuff', 'comments allowed';
+$hr->allow_comments(1);
+is $hr->process($text), $text, 'comments allowd';
+
+$text = 'before<!-- This is a comment -- -- So is this -->after';
+$hr->allow_comments(0);
+
+is $hr->process($text), 'beforeafter', 'comment allowed';
+
+$hr->allow_comments(1);
+is $hr->process($text), $text, 'comments allowd';
+
+$hr->allow_comments(0);
+$text = '<!-- <script> <h1> -->';
+is $hr->process($text), undef, 'tags nested in comments removed';
+
+#$hr->set_rules({ script => [], 'h1' => [] });
+#is $hr->process( $text ), $text, 'tags nested in comments not removed when explicitly allowed';
+
+done_testing();
diff --git a/t/declaration.t b/t/declaration.t
new file mode 100644
index 0000000..2d5e9bf
--- /dev/null
+++ b/t/declaration.t
@@ -0,0 +1,22 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use HTML::Restrict;
+use Test::More;
+
+my $hr = HTML::Restrict->new;
+
+my $text = '<!DOCTYPE HTML> ';
+$hr->debug(1);
+
+is $hr->process($text), '', 'declaration not preserved';
+$hr->allow_declaration(1);
+is $hr->process($text), '<!DOCTYPE HTML>', 'declaration is preserved';
+
+$text
+    = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">';
+is $hr->process($text), $text, 'declaration preserved';
+
+done_testing();
diff --git a/t/js.t b/t/js.t
new file mode 100644
index 0000000..6d25754
--- /dev/null
+++ b/t/js.t
@@ -0,0 +1,24 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my $hr = HTML::Restrict->new( debug => 0 );
+
+my $html = q[<script type="text/javascript">
+$(document).ready(function() {
+        $('a.gallery').fancybox();
+});
+</script>];
+
+is( $hr->process($html), undef, "content of script tags removed by default" );
+
+$hr->set_rules( { script => ['type'] } );
+
+is( $hr->process($html), $html, "content of script preserved" );
+
+done_testing();
diff --git a/t/lowercase.t b/t/lowercase.t
new file mode 100644
index 0000000..7284884
--- /dev/null
+++ b/t/lowercase.t
@@ -0,0 +1,29 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use Test::More;
+use Test::Fatal;
+use HTML::Restrict;
+
+my $html
+    = q[<!doctype html><!-- comments go here --><body onLoad="stuff">foo</body>];
+
+like(
+    exception {
+        my $hr = HTML::Restrict->new( rules => { Body => ['onload'] } );
+    },
+    qr{tag names must be lower cased},
+    "dies on mixed case tag names",
+);
+
+like(
+    exception {
+        my $hr = HTML::Restrict->new( rules => { body => ['onLoad'] } );
+    },
+    qr{attribute names must be lower cased},
+    "dies on mixed case attributes",
+);
+
+done_testing();
diff --git a/t/memory-leak.t b/t/memory-leak.t
new file mode 100644
index 0000000..7111746
--- /dev/null
+++ b/t/memory-leak.t
@@ -0,0 +1,24 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+use Scalar::Util qw(weaken);
+
+# Ensure that we don't have any circular references between the HTML::Restrict
+# object and its parser.
+my $hr = HTML::Restrict->new;
+my $p  = $hr->parser;
+
+my $weak_hr = $hr;
+my $weak_p  = $p;
+weaken($weak_hr);
+weaken($weak_p);
+
+undef $hr;
+undef $p;
+
+ok !defined $weak_hr, 'HTML::Restrict freed; no circular reference.';
+ok !defined $weak_p,  'HTML::Parser freed; no circular reference.';
+
+done_testing();
diff --git a/t/perlcriticrc b/t/perlcriticrc
new file mode 100644
index 0000000..d6574dd
--- /dev/null
+++ b/t/perlcriticrc
@@ -0,0 +1,4 @@
+severity = 3
+[-Subroutines::RequireArgUnpacking]
+[-ControlStructures::ProhibitPostfixControls]
+[-Miscellanea::RequireRcsKeywords]
diff --git a/t/pod.t b/t/pod.t
new file mode 100644
index 0000000..ee8b18a
--- /dev/null
+++ b/t/pod.t
@@ -0,0 +1,12 @@
+#!perl -T
+
+use strict;
+use warnings;
+use Test::More;
+
+# Ensure a recent version of Test::Pod
+my $min_tp = 1.22;
+eval "use Test::Pod $min_tp";
+plan skip_all => "Test::Pod $min_tp required for testing POD" if $@;
+
+all_pod_files_ok();
diff --git a/t/release-cpan-changes.t b/t/release-cpan-changes.t
new file mode 100644
index 0000000..08331d3
--- /dev/null
+++ b/t/release-cpan-changes.t
@@ -0,0 +1,18 @@
+
+BEGIN {
+  unless ($ENV{RELEASE_TESTING}) {
+    print qq{1..0 # SKIP these tests are for release candidate testing\n};
+    exit
+  }
+}
+
+use strict;
+use warnings;
+
+# this test was generated with Dist::Zilla::Plugin::Test::CPAN::Changes 0.012
+
+use Test::More 0.96 tests => 1;
+use Test::CPAN::Changes;
+subtest 'changes_ok' => sub {
+    changes_file_ok('Changes');
+};
diff --git a/t/replace_img.t b/t/replace_img.t
new file mode 100644
index 0000000..f40aa90
--- /dev/null
+++ b/t/replace_img.t
@@ -0,0 +1,62 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my @texts = (
+    {
+        label => "<img ... />",
+        html  => q{<img alt="foo bar" src="http://example.com/foo.jpg" />},
+    },
+    {
+        label => "<img ... ></img>",
+        html => q{<img alt="foo bar" src="http://example.com/foo.jpg"></img>},
+    },
+);
+
+my @cases = (
+    {
+        label  => "default args",
+        args   => {},
+        expect => undef,
+    },
+    {
+        label  => "replace_img => 0",
+        args   => { replace_img => 0 },
+        expect => undef,
+    },
+    {
+        label  => "replace_img => 1",
+        args   => { replace_img => 1 },
+        expect => q{[IMAGE: foo bar]},
+    },
+    {
+        label  => "replace_img => CODE",
+        args   => { replace_img => \&replacer },
+        expect => q{[IMAGE REMOVED: foo bar]},
+    },
+);
+
+sub replacer {
+    my ( $tag, $attr, $text ) = @_;
+    return "[IMAGE REMOVED: $attr->{alt}]";
+}
+
+for my $c (@cases) {
+    ok(
+        my $hr = HTML::Restrict->new( debug => 0, %{ $c->{args} } ),
+        "$c->{label}: HTML::Restrict->new(...)"
+    );
+    for my $t (@texts) {
+        is(
+            $hr->process( $t->{html} ), $c->{expect},
+            "$c->{label}: $t->{label}"
+        );
+    }
+}
+
+done_testing();
diff --git a/t/scheme.t b/t/scheme.t
new file mode 100644
index 0000000..8bebd90
--- /dev/null
+++ b/t/scheme.t
@@ -0,0 +1,70 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new(
+    rules => {
+        a          => [qw( href )],
+        img        => [qw( src /)],
+        blockquote => [qw( cite )],
+    },
+);
+
+$hr->set_uri_schemes( [ 'http', 'https', undef, 'ftp' ] );
+
+cmp_ok(
+    $hr->process('<a href="http://example.com">link</a>'),
+    'eq', '<a href="http://example.com">link</a>',
+    'http scheme preserved',
+);
+
+cmp_ok(
+    $hr->process('<a href="https://example.com">link</a>'),
+    'eq', '<a href="https://example.com">link</a>',
+    'https scheme preserved',
+);
+
+cmp_ok(
+    $hr->process('<a href="/some/file">link</a>'),
+    'eq', '<a href="/some/file">link</a>',
+    'relative scheme preserved',
+);
+
+cmp_ok(
+    $hr->process('<a href="ftp://example.com">link</a>'),
+    'eq', '<a href="ftp://example.com">link</a>',
+    'ftp scheme preserved',
+);
+
+cmp_ok(
+    $hr->process('<a href="file://example.com">link</a>'),
+    'eq', '<a>link</a>',
+    'file scheme removed',
+);
+
+cmp_ok(
+    $hr->process('<img src="javascript:evil_fc()" />'),
+    'eq', '<img />',
+    'img src with javascript removed',
+);
+
+cmp_ok(
+    $hr->process(
+        '<blockquote cite="javascript:evil_fc()">blockquote</blockquote>'),
+    'eq',
+    '<blockquote>blockquote</blockquote>',
+    'blockquote cite with javascript removed',
+);
+
+# disable relative schemes
+$hr->set_uri_schemes( [ 'http', 'https', 'ftp' ] );
+
+cmp_ok(
+    $hr->process('<a href="/some/file">link</a>'),
+    'eq', '<a>link</a>',
+    'relative scheme removed',
+);
+
+done_testing();
diff --git a/t/stack.t b/t/stack.t
new file mode 100644
index 0000000..b95f0ff
--- /dev/null
+++ b/t/stack.t
@@ -0,0 +1,19 @@
+use strict;
+use warnings;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new;
+
+ok( !@{ $hr->_stripper_stack }, "stack empty" );
+
+push @{ $hr->_stripper_stack }, 'script', 'style', 'pre', 'script';
+$hr->_delete_tag_from_stack('script');
+is_deeply(
+    $hr->_stripper_stack,
+    [ 'script', 'style', 'pre' ],
+    'deletes from stack in correct order and amount'
+);
+
+done_testing();
diff --git a/t/style.t b/t/style.t
new file mode 100644
index 0000000..2491fa5
--- /dev/null
+++ b/t/style.t
@@ -0,0 +1,24 @@
+#!perl
+
+use strict;
+use warnings;
+
+use HTML::Restrict;
+use Scalar::Util;
+use Test::More;
+
+my $hr = HTML::Restrict->new( debug => 0 );
+
+my $html = q[<style type="text/css">
+hr {color:sienna;}
+p {margin-left:20px;}
+body {background-image:url("images/back40.gif");}
+</style>];
+
+is( $hr->process($html), undef, "content of style tag removed by default" );
+
+$hr->set_rules( { style => ['type'] } );
+
+is( $hr->process($html), $html, "content of style tag preserved" );
+
+done_testing();
diff --git a/t/xss.t b/t/xss.t
new file mode 100755
index 0000000..0f98789
--- /dev/null
+++ b/t/xss.t
@@ -0,0 +1,35 @@
+#!/usr/bin/env perl
+
+use warnings;
+use strict;
+
+use Test::More;
+use HTML::Restrict;
+
+my $hr = HTML::Restrict->new;
+$hr->debug(0);
+$hr->set_rules( { a => [ 'href', 'class' ] } );
+
+my $text = '<a href="javascript:alert(1)">oops!</a>';
+
+my $clean = $hr->process($text);
+is $clean, '<a>oops!</a>', "bad scheme removed";
+
+is $hr->process('<a href="javascript:evil_script()">evil</a>'),
+    '<a>evil</a>', 'bad scheme removed';
+
+foreach my $uri (
+    'http://vilerichard.com', 'https://vilerichard.com',
+    '//vilerichard.com',      '/music'
+    ) {
+    my $img = qq[<a href="$uri">click</a>];
+    is $hr->process($img), $img, "good uri scheme preserved";
+}
+
+is $hr->process(
+    '<a class=""><script>alert("oops");</script><a href=""></a>'
+    ),
+    '<a class=""><script>alert("oops");</script><a href=""></a>',
+    'attribute value filtered';
+
+done_testing();
diff --git a/tidyall.ini b/tidyall.ini
new file mode 100644
index 0000000..6b3438d
--- /dev/null
+++ b/tidyall.ini
@@ -0,0 +1,26 @@
+[PerlTidy]
+select = **/*.{pl,pm,t,psgi}
+ignore = t/00-*
+ignore = t/author-*
+ignore = t/release-*
+ignore = blib/**/*
+ignore = .build/**/*
+ignore = HTML-Restrict-*/**/*
+argv = --profile=$ROOT/perltidyrc
+
+;[PerlCritic]
+;select = **/*.{pl,pm,t,psgi}
+;ignore = t/00-*
+;ignore = t/author-*
+;ignore = t/release-*
+;ignore = blib/**/*
+;ignore = .build/**/*
+;ignore = HTML-Restrict-*/**/*
+;argv = --profile $ROOT/perlcriticrc --program-extensions .pl  --program-extensions .t --program-extensions .psgi
+
+[SortLines::Naturally]
+select = .gitignore
+select = .stopwords
+
+[JSON]
+select = t/**/*json

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libhtml-restrict-perl.git



More information about the Pkg-perl-cvs-commits mailing list