[libtext-markup-perl] 01/04: Import original source of Text-Markup 0.22
Lucas Kanashiro
kanashiro-guest at moszumanska.debian.org
Sun Mar 1 23:39:05 UTC 2015
This is an automated email from the git hooks/post-receive script.
kanashiro-guest pushed a commit to branch master
in repository libtext-markup-perl.
commit 065ce931009237992caf3cedb47bc678c2a71a46
Author: Lucas Kanashiro <kanashiro.duarte at gmail.com>
Date: Sat Feb 28 13:28:42 2015 -0300
Import original source of Text-Markup 0.22
---
Build.PL | 44 ++++
Changes | 71 ++++++
MANIFEST | 46 ++++
META.json | 120 +++++++++
META.yml | 81 +++++++
Makefile.PL | 30 +++
README.md | 59 +++++
lib/Text/Markup.pm | 467 ++++++++++++++++++++++++++++++++++++
lib/Text/Markup/Asciidoc.pm | 152 ++++++++++++
lib/Text/Markup/Bbcode.pm | 70 ++++++
lib/Text/Markup/Creole.pm | 67 ++++++
lib/Text/Markup/HTML.pm | 60 +++++
lib/Text/Markup/Markdown.pm | 82 +++++++
lib/Text/Markup/Mediawiki.pm | 72 ++++++
lib/Text/Markup/Multimarkdown.pm | 76 ++++++
lib/Text/Markup/None.pm | 58 +++++
lib/Text/Markup/Pod.pm | 96 ++++++++
lib/Text/Markup/Rest.pm | 186 ++++++++++++++
lib/Text/Markup/Textile.pm | 73 ++++++
lib/Text/Markup/Trac.pm | 71 ++++++
lib/Text/Markup/rst2html_lenient.py | 282 ++++++++++++++++++++++
t/base.t | 156 ++++++++++++
t/empty.txt | 2 +
t/formats.t | 79 ++++++
t/html/asciidoc.html | 22 ++
t/html/bbcode.html | 12 +
t/html/creole.html | 18 ++
t/html/html.html | 14 ++
t/html/markdown.html | 12 +
t/html/mediawiki.html | 20 ++
t/html/multimarkdown.html | 21 ++
t/html/pod.html | 33 +++
t/html/rest.html | 37 +++
t/html/textile.html | 16 ++
t/html/trac.html | 17 ++
t/markups/asciidoc.txt | 8 +
t/markups/bbcode.txt | 10 +
t/markups/creole.txt | 8 +
t/markups/html.txt | 14 ++
t/markups/markdown.txt | 6 +
t/markups/mediawiki.txt | 9 +
t/markups/multimarkdown.txt | 9 +
t/markups/pod.txt | 22 ++
t/markups/rest.txt | 37 +++
t/markups/textile.txt | 8 +
t/markups/trac.txt | 11 +
46 files changed, 2864 insertions(+)
diff --git a/Build.PL b/Build.PL
new file mode 100644
index 0000000..ebc6969
--- /dev/null
+++ b/Build.PL
@@ -0,0 +1,44 @@
+use strict;
+use warnings;
+use Module::Build;
+
+my $build = Module::Build->new(
+ module_name => 'Text::Markup',
+ license => 'perl',
+ create_makefile_pl => 'traditional',
+ configure_requires => { 'Module::Build' => '0.30' },
+ build_requires => {
+ 'File::Spec::Functions' => 0,
+ 'Module::Build' => '0.30',
+ 'Test::More' => '0.96',
+ },
+ requires => {
+ 'File::BOM' => '0.14',
+ 'HTML::Entities' => 0,
+ 'HTML::Tagset' => 0, # Needed by Text::MediawikiFormat
+ 'IPC::Open3' => 0,
+ 'perl' => 5.008001,
+ 'Pod::Simple::XHTML' => '3.15',
+ 'Symbol' => 0,
+ 'Text::Markdown' => '1.000004',
+ 'Text::MultiMarkdown' => '1.000028',
+ 'Text::MediawikiFormat' => '1.0',
+ 'Text::Textile' => '2.10',
+ 'Text::Trac' => '0.10',
+ 'Parse::BBCode' => '0.15',
+ 'Text::WikiCreole' => '0.07',
+ },
+ recommends => {
+ 'Test::Pod' => '1.41',
+ 'Test::Pod::Coverage' => '1.06',
+ },
+ meta_merge => {
+ resources => {
+ homepage => 'http://search.cpan.org/dist/Text-Markup/',
+ bugtracker => 'http://github.com/theory/text-markup/issues/',
+ repository => 'http://github.com/theory/text-markup',
+ }
+ },
+);
+$build->add_build_element('py');
+$build->create_build_script;
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..43e96f2
--- /dev/null
+++ b/Changes
@@ -0,0 +1,71 @@
+Revision history for Perl extension Text-Markup.
+
+0.22 2015-02-20T03:52:36Z
+ - Added support for Creole. Thanks to Lucas Kanashiro for the patch!
+
+0.21 2015-02-17T00:18:20Z
+ - Fixed description of Asciidoc and added it to the README.
+ - Added support for BBcode. Thanks to Lucas Kanashiro for the patch!
+
+0.20 2015-01-22T00:54:50Z
+ - Fixed a failing test for the Mediawiki format due to a change in CGI
+ v4.11. Thanks to Andreas Koenig for the report and diagnosis.
+
+0.19 2014-02-07T04:00:56Z
+ - Fixed Pod markup so that it does not strip text from the first line of
+ verbatim blocks.
+ - Removed the `--safe` option from Asciidoc. It is just borked with the
+ XHTML back end.
+
+0.18 2013-06-08T23:24:09Z
+ - Now require HTML::Tagset, since Text::MediawikiFormat seems to need it
+ but only recommends it.
+ - Removed all Pod tests from the distribution.
+ - Updated reST to support docutils 0.7 - 0.10 and when Pygments is not
+ available. Daniele Varrazzo.
+
+0.17 2012-02-28T04:31:07Z
+ - Added Asciidoc support.
+
+0.16 2012-02-13T17:23:00Z
+ - Rest support now uses its own implementation of `rst2html` so that it
+ can render specialized reST documents, such as Sphinx files, in a more
+ forgiving way: it preserves the content of unknown directives while not
+ emulating specialized rendering. Written by Daniele Varrazzo.
+ - Improved handling of Sphinx directives in Rest output. Most directives
+ are stripped out, although `function` directives now turn out much
+ nicer. Props to Daniele Varrazzo for the work on the Rest parser.
+ - Fixed character encoding issues with the Rest parser.
+ - Fixed a character encoding issue in the test suite.
+
+0.15 2012-01-13T23:04:16Z
+ - Changed the parsers to return `undef` if no content was parsed from a
+ file.
+ - Added reST support, with thanks to Daniele Varrazzo.
+
+0.14 2011-10-09T17:45:28Z
+ - Added option processing to the Pod parser. Thanks to Mark Allen for
+ the pull request.
+
+0.13 2011-05-21T15:33:28
+ - Fixed broken regular expression in the Pod parser that could eat the
+ first line of a verbatim block.
+
+0.12 2011-04-04T23:16:12
+ - Documented that the `file` parameter to `parse()` is required.
+ - Added MultiMarkdown support.
+ - Fixed test failure in `t/formats.t` when no supported markup parser is
+ installed.
+
+0.11 2011-02-22T22:41:15
+ - Added list of supported markups to the README.
+ - Fixed test failures on Perls earlier than 5.12.
+
+0.10 2011-02-22T19:45:27
+ - Initial version. Includes parsers for:
+ + HTML
+ + Markdown
+ + MediaWiki
+ + Pod
+ + Textile
+ + Trac
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..3ae92de
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,46 @@
+Build.PL
+Changes
+lib/Text/Markup.pm
+lib/Text/Markup/Asciidoc.pm
+lib/Text/Markup/Bbcode.pm
+lib/Text/Markup/Creole.pm
+lib/Text/Markup/HTML.pm
+lib/Text/Markup/Markdown.pm
+lib/Text/Markup/Mediawiki.pm
+lib/Text/Markup/Multimarkdown.pm
+lib/Text/Markup/None.pm
+lib/Text/Markup/Pod.pm
+lib/Text/Markup/Rest.pm
+lib/Text/Markup/rst2html_lenient.py
+lib/Text/Markup/Textile.pm
+lib/Text/Markup/Trac.pm
+Makefile.PL
+MANIFEST This list of files
+META.json
+META.yml
+README.md
+t/base.t
+t/empty.txt
+t/formats.t
+t/html/asciidoc.html
+t/html/bbcode.html
+t/html/creole.html
+t/html/html.html
+t/html/markdown.html
+t/html/mediawiki.html
+t/html/multimarkdown.html
+t/html/pod.html
+t/html/rest.html
+t/html/textile.html
+t/html/trac.html
+t/markups/asciidoc.txt
+t/markups/bbcode.txt
+t/markups/creole.txt
+t/markups/html.txt
+t/markups/markdown.txt
+t/markups/mediawiki.txt
+t/markups/multimarkdown.txt
+t/markups/pod.txt
+t/markups/rest.txt
+t/markups/textile.txt
+t/markups/trac.txt
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..7225e9a
--- /dev/null
+++ b/META.json
@@ -0,0 +1,120 @@
+{
+ "abstract" : "Parse text markup into HTML",
+ "author" : [
+ "David E. Wheeler <david at justatheory.com>"
+ ],
+ "dynamic_config" : 1,
+ "generated_by" : "Module::Build version 0.4206",
+ "license" : [
+ "perl_5"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : "2"
+ },
+ "name" : "Text-Markup",
+ "prereqs" : {
+ "build" : {
+ "requires" : {
+ "File::Spec::Functions" : "0",
+ "Module::Build" : "0.30",
+ "Test::More" : "0.96"
+ }
+ },
+ "configure" : {
+ "requires" : {
+ "Module::Build" : "0.30"
+ }
+ },
+ "runtime" : {
+ "recommends" : {
+ "Test::Pod" : "1.41",
+ "Test::Pod::Coverage" : "1.06"
+ },
+ "requires" : {
+ "File::BOM" : "0.14",
+ "HTML::Entities" : "0",
+ "HTML::Tagset" : "0",
+ "IPC::Open3" : "0",
+ "Parse::BBCode" : "0.15",
+ "Pod::Simple::XHTML" : "3.15",
+ "Symbol" : "0",
+ "Text::Markdown" : "1.000004",
+ "Text::MediawikiFormat" : "1.0",
+ "Text::MultiMarkdown" : "1.000028",
+ "Text::Textile" : "2.10",
+ "Text::Trac" : "0.10",
+ "Text::WikiCreole" : "0.07",
+ "perl" : "5.008001"
+ }
+ }
+ },
+ "provides" : {
+ "Text::Markup" : {
+ "file" : "lib/Text/Markup.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Asciidoc" : {
+ "file" : "lib/Text/Markup/Asciidoc.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Bbcode" : {
+ "file" : "lib/Text/Markup/Bbcode.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Creole" : {
+ "file" : "lib/Text/Markup/Creole.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::HTML" : {
+ "file" : "lib/Text/Markup/HTML.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Markdown" : {
+ "file" : "lib/Text/Markup/Markdown.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Mediawiki" : {
+ "file" : "lib/Text/Markup/Mediawiki.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Multimarkdown" : {
+ "file" : "lib/Text/Markup/Multimarkdown.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::None" : {
+ "file" : "lib/Text/Markup/None.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Pod" : {
+ "file" : "lib/Text/Markup/Pod.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Rest" : {
+ "file" : "lib/Text/Markup/Rest.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Textile" : {
+ "file" : "lib/Text/Markup/Textile.pm",
+ "version" : "0.22"
+ },
+ "Text::Markup::Trac" : {
+ "file" : "lib/Text/Markup/Trac.pm",
+ "version" : "0.22"
+ }
+ },
+ "release_status" : "stable",
+ "resources" : {
+ "bugtracker" : {
+ "web" : "http://github.com/theory/text-markup/issues/"
+ },
+ "homepage" : "http://search.cpan.org/dist/Text-Markup/",
+ "license" : [
+ "http://dev.perl.org/licenses/"
+ ],
+ "repository" : {
+ "url" : "http://github.com/theory/text-markup"
+ }
+ },
+ "version" : "0.22"
+}
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..ff930c8
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,81 @@
+---
+abstract: 'Parse text markup into HTML'
+author:
+ - 'David E. Wheeler <david at justatheory.com>'
+build_requires:
+ File::Spec::Functions: 0
+ Module::Build: 0.30
+ Test::More: 0.96
+configure_requires:
+ Module::Build: 0.30
+dynamic_config: 1
+generated_by: 'Module::Build version 0.4206, CPAN::Meta::Converter version 2.120921'
+license: perl
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
+name: Text-Markup
+provides:
+ Text::Markup:
+ file: lib/Text/Markup.pm
+ version: 0.22
+ Text::Markup::Asciidoc:
+ file: lib/Text/Markup/Asciidoc.pm
+ version: 0.22
+ Text::Markup::Bbcode:
+ file: lib/Text/Markup/Bbcode.pm
+ version: 0.22
+ Text::Markup::Creole:
+ file: lib/Text/Markup/Creole.pm
+ version: 0.22
+ Text::Markup::HTML:
+ file: lib/Text/Markup/HTML.pm
+ version: 0.22
+ Text::Markup::Markdown:
+ file: lib/Text/Markup/Markdown.pm
+ version: 0.22
+ Text::Markup::Mediawiki:
+ file: lib/Text/Markup/Mediawiki.pm
+ version: 0.22
+ Text::Markup::Multimarkdown:
+ file: lib/Text/Markup/Multimarkdown.pm
+ version: 0.22
+ Text::Markup::None:
+ file: lib/Text/Markup/None.pm
+ version: 0.22
+ Text::Markup::Pod:
+ file: lib/Text/Markup/Pod.pm
+ version: 0.22
+ Text::Markup::Rest:
+ file: lib/Text/Markup/Rest.pm
+ version: 0.22
+ Text::Markup::Textile:
+ file: lib/Text/Markup/Textile.pm
+ version: 0.22
+ Text::Markup::Trac:
+ file: lib/Text/Markup/Trac.pm
+ version: 0.22
+recommends:
+ Test::Pod: 1.41
+ Test::Pod::Coverage: 1.06
+requires:
+ File::BOM: 0.14
+ HTML::Entities: 0
+ HTML::Tagset: 0
+ IPC::Open3: 0
+ Parse::BBCode: 0.15
+ Pod::Simple::XHTML: 3.15
+ Symbol: 0
+ Text::Markdown: 1.000004
+ Text::MediawikiFormat: 1.0
+ Text::MultiMarkdown: 1.000028
+ Text::Textile: 2.10
+ Text::Trac: 0.10
+ Text::WikiCreole: 0.07
+ perl: 5.008001
+resources:
+ bugtracker: http://github.com/theory/text-markup/issues/
+ homepage: http://search.cpan.org/dist/Text-Markup/
+ license: http://dev.perl.org/licenses/
+ repository: http://github.com/theory/text-markup
+version: 0.22
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..ed2466e
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,30 @@
+# Note: this file was auto-generated by Module::Build::Compat version 0.4206
+require 5.008001;
+use ExtUtils::MakeMaker;
+WriteMakefile
+(
+ 'NAME' => 'Text::Markup',
+ 'VERSION_FROM' => 'lib/Text/Markup.pm',
+ 'PREREQ_PM' => {
+ 'File::BOM' => '0.14',
+ 'File::Spec::Functions' => 0,
+ 'HTML::Entities' => 0,
+ 'HTML::Tagset' => 0,
+ 'IPC::Open3' => 0,
+ 'Module::Build' => '0.30',
+ 'Parse::BBCode' => '0.15',
+ 'Pod::Simple::XHTML' => '3.15',
+ 'Symbol' => 0,
+ 'Test::More' => '0.96',
+ 'Text::Markdown' => '1.000004',
+ 'Text::MediawikiFormat' => '1.0',
+ 'Text::MultiMarkdown' => '1.000028',
+ 'Text::Textile' => '2.10',
+ 'Text::Trac' => '0.10',
+ 'Text::WikiCreole' => '0.07'
+ },
+ 'INSTALLDIRS' => 'site',
+ 'EXE_FILES' => [],
+ 'PL_FILES' => {}
+)
+;
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..b65f98f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,59 @@
+Text/Markup version 0.22
+========================
+
+This library's module, Text::Markup, provides an single interface for parsing
+a large number of text markup formats and converting them to HTML. It
+currently supports the following markups:
+
+* [Asciidoc](http://www.methods.co.nz/asciidoc/)
+* [HTML](http://whatwg.org/html)
+* [Markdown](http://daringfireball.net/projects/markdown/)
+* [MultiMarkdown](http://fletcherpenney.net/multimarkdown/)
+* [MediaWiki](http://en.wikipedia.org/wiki/Help:Contents/Editing_Wikipedia)
+* [Pod](http://search.cpan.org/perldoc?perlpod)
+* [reStructuredText](http://docutils.sourceforge.net/docs/user/rst/quickref.html)
+* [Textile](http://textism.com/tools/textile/)
+* [Trac](http://trac.edgewall.org/wiki/WikiFormatting)
+* [BBcode](http://www.bbcode.org/)
+* [Creole](http://www.wikicreole.org/)
+
+Installation
+------------
+
+To install this module, type the following:
+
+ perl Build.PL
+ ./Build
+ ./Build test
+ ./Build install
+
+Or, if you don't have Module::Build installed, type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+Dependencies
+------------
+
+Text-Markup requires the following modules:
+
+* File::BOM 0.15
+* HTML::Entities
+* perl 5.8.1
+* Pod::Simple::XHTML 3.15
+* Text::Markdown 1.000004
+* Text::MediawikiFormat 1.0
+* Text::Textile 2.10
+* Text::Trac 0.10
+* Parse::BBCode 0.15
+* Text::WikiCreole 0.07
+
+Copyright and Licence
+---------------------
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
diff --git a/lib/Text/Markup.pm b/lib/Text/Markup.pm
new file mode 100644
index 0000000..f4e1623
--- /dev/null
+++ b/lib/Text/Markup.pm
@@ -0,0 +1,467 @@
+package Text::Markup;
+
+use 5.8.1;
+use strict;
+use Text::Markup::None;
+use Carp;
+
+our $VERSION = '0.22';
+
+my %_PARSER_FOR;
+my %REGEX_FOR = (
+ html => qr{x?html?},
+ markdown => qr{m(?:d(?:own)?|kdn?|arkdown)},
+ multimarkdown => qr{mm(?:d(?:own)?|kdn?|arkdown)},
+ pod => qr{p(?:od|m|l)},
+ textile => qr{textile},
+ trac => qr{tra?c},
+ mediawiki => qr{(?:m(?:edia)?)?wiki},
+ rest => qr{re?st},
+ asciidoc => qr{a(?:sc(?:iidoc)?|doc)?},
+ bbcode => qr{bb(?:code)?},
+ creole => qr{creole},
+);
+
+sub register {
+ my ($class, $name, $regex) = @_;
+ my $pkg = caller;
+ $REGEX_FOR{$name} = $regex;
+ $_PARSER_FOR{$name} = $pkg->can('parser')
+ or croak "No parser() function defind in $pkg";
+}
+
+sub _parser_for {
+ my ($self, $format) = @_;
+ return Text::Markup::None->can('parser') unless $format;
+ return $_PARSER_FOR{$format} if $_PARSER_FOR{$format};
+ my $pkg = __PACKAGE__ . '::' . ($format eq 'html' ? 'HTML' : ucfirst $format);
+ eval "require $pkg; 1" or die $@;
+ return $_PARSER_FOR{$format} = $pkg->can('parser')
+ || croak "No parser() function defind in $pkg";
+}
+
+sub formats {
+ sort keys %REGEX_FOR;
+}
+
+sub new {
+ my $class = shift;
+ bless { default_encoding => 'UTF-8', @_ } => $class;
+}
+
+sub parse {
+ my $self = shift;
+ my %p = @_;
+ my $file = $p{file} or croak "No file parameter passed to parse()";
+ croak "$file does not exist" unless -e $file && !-d _;
+
+ my $parser = $self->_get_parser(\%p);
+ return $parser->(
+ $file,
+ $p{encoding} || $self->default_encoding,
+ $p{options}
+ );
+}
+
+sub default_format {
+ my $self = shift;
+ return $self->{default_format} unless @_;
+ $self->{default_format} = shift;
+}
+
+sub default_encoding {
+ my $self = shift;
+ return $self->{default_encoding} unless @_;
+ $self->{default_encoding} = shift;
+}
+
+sub _get_parser {
+ my ($self, $p) = @_;
+ my $format = $p->{format}
+ || $self->guess_format($p->{file})
+ || $self->default_format;
+
+ return $self->_parser_for($format);
+}
+
+sub guess_format {
+ my ($self, $file) = @_;
+ for my $format (keys %REGEX_FOR) {
+ return $format if $file =~ qr{[.]$REGEX_FOR{$format}$};
+ }
+ return;
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup - Parse text markup into HTML
+
+=head1 Synopsis
+
+ my $parser = Text::Markup->new(
+ default_format => 'markdown',
+ default_encoding => 'UTF-8',
+ );
+
+ my $html = $parser->parse(file => $markup_file);
+
+=head1 Description
+
+This class is really simple. All it does is take the name of a file and return
+an HTML-formatted version of that file. The idea is that one might have files
+in lots of different markups, and not know or care what markups each uses.
+It's the job of this module to figure that out, parse it, and give you the
+resulting HTML.
+
+This distribution includes support for a number of markup formats:
+
+=over
+
+=item * L<Asciidoc|http://www.methods.co.nz/asciidoc/>
+
+=item * L<HTML|http://whatwg.org/html>
+
+=item * L<Markdown|http://daringfireball.net/projects/markdown/>
+
+=item * L<MultiMarkdown|http://fletcherpenney.net/multimarkdown/>
+
+=item * L<MediaWiki|http://en.wikipedia.org/wiki/Help:Contents/Editing_Wikipedia>
+
+=item * L<Pod|perlpod>
+
+=item * L<reStructuredText|http://docutils.sourceforge.net/docs/user/rst/quickref.html>
+
+=item * L<Textile|http://textism.com/tools/textile/>
+
+=item * L<Trac|http://trac.edgewall.org/wiki/WikiFormatting>
+
+=back
+
+Adding support for more markup languages is straight-forward, and patches
+adding them to this distribution are also welcome. See L</Add a Parser> for
+step-by-step instructions.
+
+Or if you just want to use this module, then read on!
+
+=head1 Interface
+
+=head2 Constructor
+
+=head3 C<new>
+
+ my $parser = Text::Markup->new(default_format => 'markdown');
+
+Supported parameters:
+
+=over
+
+=item C<default_format>
+
+The default format to use if one isn't passed to C<parse()> and one can't be
+guessed.
+
+=item C<default_encoding>
+
+The character encoding in which to assume a file is encoded if it's not
+otherwise explicitly determined by examination of the source file. Defaults to
+"UTF-8".
+
+=back
+
+=head2 Class Methods
+
+=head3 C<register>
+
+ Text::Markup->register(foobar => qr{fb|foob(?:ar)?});
+
+Registers a markup parser. You likely won't need to use this method unless
+you're creating a new markup parser and not contributing it back to the
+Text::Markup project. See L</Add a Parser> for details.
+
+=head3 formats
+
+ my @formats = Text::Markup->formats;
+
+Returns a list of all of the formats currently recognized by Text::Markup.
+This will include all core parsers (except for "None") and any that have been
+loaded elsewhere and that call C<register> to register themselves.
+
+=head2 Instance Methods
+
+=head3 C<parse>
+
+ my $html = $parser->parse(file => $file_to_parse);
+
+Parses a file and return the generated HTML, or C<undef> if no markup was
+found in the file. Supported parameters:
+
+=over
+
+=item C<file>
+
+The file from which to read the markup to be parsed. Required.
+
+=item C<format>
+
+The markup format in the file, which determines the parser used to parse it.
+If not specified, Text::Markup will try to guess the format from the file's
+suffix. If it can't guess, it falls back on C<default_format>. And if that
+attribute is not set, it uses the C<none> parser, which simply encodes the
+entire file and wraps it in a C<< <pre> >> element.
+
+=item C<encoding>
+
+The character encoding to assume the source file is encoded in (if such cannot
+be determined by other means, such as a
+L<BOM|http://en.wikipedia.org/wiki/Byte_order_mark>). If not specified, the
+value of the C<default_encoding> attribute will be used, and if that attribute
+is not set, UTF-8 will be assumed.
+
+=item C<options>
+
+An array reference of options for the parser. See the documentation of the
+various parser modules for details.
+
+=back
+
+=head3 C<default_format>
+
+ my $format = $parser->default_format;
+ $parser->default_format('markdown');
+
+An accessor for the default format attribute.
+
+=head3 C<default_encoding>
+
+ my $encoding = $parser->default_encoding;
+ $parser->default_encoding('Big5');
+
+An accessor for the default encoding attribute.
+
+=head3 C<guess_format>
+
+ my $format = $parser->guess_format($filename);
+
+Compares the passed file name's suffix to the regular expressions of all
+registered formatting parser and returns the first one that matches. Returns
+C<undef> if none matches.
+
+=head1 Add a Parser
+
+Adding support for markup formats not supported by the core Text::Markup
+distribution is a straight-forward exercise. Say you wanted to add a "FooBar"
+markup parser. Here are the steps to take:
+
+=over
+
+=item 1
+
+Fork L<this project on GitHub|https://github.com/theory/text-markup/>
+
+=item 2
+
+Clone your fork and create a new branch in which to work:
+
+ git clone git at github.com:$USER/text-markup.git
+ cd text-markup
+ git checkout -b foobar
+
+=item 3
+
+Create a new module named C<Text::Markup::FooBar>. The simplest thing to do is
+copy an existing module and modify it. The HTML parser is probably the simplest:
+
+ cp lib/Text/Markup/HTML.pm lib/Text/Markup/FooBar.pm
+ perl -i -pe 's{HTML}{FooBar}g' lib/Text/Markup/FooBar.pm
+ perl -i -pe 's{html}{foobar}g' lib/Text/Markup/FooBar.pm
+
+=item 4
+
+Implement the C<parser> function in your new module. If you were to use a
+C<Text::FooBar> module, it might look something like this:
+
+ package Text::Markup::FooBar;
+
+ use 5.8.1;
+ use strict;
+ use Text::FooBar ();
+ use File::BOM qw(open_bom)
+
+ sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $md = Text::FooBar->new(@{ $opts || [] });
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = $md->parse(<$fh>);
+ return unless $html =~ /\S/;
+ utf8::encode($html);
+ return join( "\n",
+ '<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />',
+ '</head>',
+ '<body>',
+ $html,
+ '</body>',
+ '</html>',
+ );
+ }
+
+Use the C<$encoding> argument as appropriate to read in the source file. If
+your parser requires that text be decoded to Perl's internal form, use of
+L<File::BOM> is recommended, so that an explicit BOM will determine the
+encoding. Otherwise, fall back on the specified encoding. Note that some
+parsers, such as an HTML parser, would want text encoded before it parsed it.
+In such a case, read in the file as raw bytes:
+
+ open my $fh, '<:raw', $file or die "Cannot open $file: $!\n";
+
+The returned HTML, however, B<must be encoded in UTF-8>. Please include an
+L<encoding
+declaration|http://en.wikipedia.org/wiki/Character_encodings_in_HTML>, such as
+a content-type C<< <meta> >> element:
+
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+
+This will allow any consumers of the returned HTML to parse it correctly.
+If the parser parsed no content, C<parser()> should return C<undef>.
+
+=item 5
+
+Edit F<lib/Text/Markup.pm> and add an entry to its C<%REGEX_FOR> hash for your
+new format. The key should be the name of the format (lowercase, the same as
+the last part of your module's name). The value should be a regular expression
+that matches the file extensions that suggest that a file is formatted in your
+parser's markup language. For our FooBar parser, the line might look like
+this:
+
+ foobar => qr{fb|foob(?:ar)?},
+
+=item 6
+
+Add a file in your parser's markup language to F<t/markups>. It should be
+named for your parser and end in F<.txt>, that is, F<t/markups/foobar.txt>.
+
+=item 7
+
+Add an HTML file, F<t/html/foobar.html>, which should be the expected output
+once F<t/markups/foobar.txt> is parsed into HTML. This will be used to test
+that your parser works correctly.
+
+=item 8
+
+Edit F<t/formats.t> by adding a line to its C<__DATA__> section. The line
+should be a comma-separated list describing your parser. The columns are:
+
+=over
+
+=item * Format
+
+The lowercased name of the format.
+
+=item * Format Module
+
+The name of the parser module.
+
+=item * Required Module
+
+The name of a module that's required to be installed in order for your parser
+to load.
+
+=item * Extensions
+
+Additional comma-separated values should be a list of file extensions that
+your parser should recognize.
+
+=back
+
+So for our FooBar parser, it might look like this:
+
+ markdown,Text::Markup::FooBar,Text::FooBar 0.22,fb,foob,foobar
+
+=item 9
+
+Test your new parser by running
+
+ prove -lv t/formats.t
+
+This will test I<all> included parsers, but of course you should only pay
+attention to how your parser works. Tweak until your tests pass. Note that one
+test has the parser parse a file with just a couple of empty lines, to ensure
+that the parser finds no content and returns C<undef>.
+
+=item 10
+
+Don't forget to write the documentation in your new parser module! If you
+copied F<Text::Markup::HTML>, you can just modify as appropriate.
+
+=item 11
+
+Add any new module requirements to the C<requires> section of F<Build.PL>.
+
+=item 12
+
+Commit and push the branch to your fork on GitHub:
+
+ git add .
+ git commit -am 'Add great new FooBar parser!'
+ git push origin -u foobar
+
+=item 13
+
+And finally, submit a pull request to the upstream repository via the GitHub
+UI.
+
+=back
+
+If you don't want to submit your parser, you can still create and use one
+independently. Rather than add its information to the C<%REGEX_FOR> hash in
+this module, you can just load your parser manually, and have it call the
+C<register> method, like so:
+
+ package My::Markup::FooBar;
+ use Text::Markup;
+ Text::Markup->register(foobar => qr{fb|foob(?:ar)?});
+
+This will be useful for creating private parsers you might not want to
+contribute, or that you'd want to distribute independently.
+
+=head1 See Also
+
+=over
+
+=item *
+
+The L<markup|https://github.com/github/markup> Ruby library -- the inspiration
+for this module -- provides similar functionality, and is used to parse
+F<README.your_favorite_markup> on GitHub.
+
+=item *
+
+L<Markup::Unified> offers similar functionality.
+
+=back
+
+=head1 Support
+
+This module is stored in an open L<GitHub
+repository|http://github.com/theory/text-markup/>. Feel free to fork and
+contribute!
+
+Please file bug reports via L<GitHub
+Issues|http://github.com/theory/text-markup/issues/> or by sending mail to
+L<bug-Text-Markup at rt.cpan.org|mailto:bug-Text-Markup at rt.cpan.org>.
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Asciidoc.pm b/lib/Text/Markup/Asciidoc.pm
new file mode 100644
index 0000000..e19929f
--- /dev/null
+++ b/lib/Text/Markup/Asciidoc.pm
@@ -0,0 +1,152 @@
+package Text::Markup::Asciidoc;
+
+use 5.8.1;
+use strict;
+use File::Spec;
+use constant WIN32 => $^O eq 'MSWin32';
+use Symbol 'gensym';
+use IPC::Open3;
+use utf8;
+
+our $VERSION = '0.22';
+
+# Find Asciidoc.
+my $ASCIIDOC;
+FIND: {
+ my @path = (
+ File::Spec->path,
+ WIN32 ? (map { "C:\\asciidoc$_" } '', '-8.6.6') : ()
+ );
+ EXE: {
+ for my $exe (qw(asciidoc asciidoc.py)) {
+ for my $p (@path) {
+ my $path = File::Spec->catfile($p, $exe);
+ next unless -f $path && -x $path;
+ $ASCIIDOC = $path;
+ last EXE;
+ }
+ }
+ }
+
+ unless ($ASCIIDOC) {
+ use Carp;
+ my $sep = WIN32 ? ';' : ':';
+ Carp::croak(
+ "Cannot find asciidoc or asciidoc.py in path " . join $sep => @path
+ );
+ }
+
+ # Make sure it looks like it will work.
+ my $output = gensym;
+ my $pid = open3 undef, $output, $output, $ASCIIDOC, '--version';
+ waitpid $pid, 0;
+ if ($?) {
+ use Carp;
+ local $/;
+ Carp::croak(
+ qq{$ASCIIDOC will not execute\n},
+ <$output>
+ );
+ }
+}
+
+# Arguments to pass to asciidoc.
+# Restore --safe if Asciidoc ever fixes it with the XHTML back end.
+# https://groups.google.com/forum/#!topic/asciidoc/yEr5PqHm4-o
+my @OPTIONS = qw(
+ --no-header-footer
+ --out-file -
+ --attribute newline=\\n
+);
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $html = do {
+ my $fh = _fh(
+ $ASCIIDOC, @OPTIONS,
+ '--attribute' => "encoding=$encoding",
+ $file
+ );
+
+ binmode $fh, ":encoding($encoding)";
+ local $/;
+ <$fh>;
+ };
+
+ # Make sure we have something.
+ return unless $html =~ /\S/;
+ utf8::encode $html;
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+$html
+</body>
+</html>
+};
+}
+
+# Stolen from SVN::Notify.
+sub _fh {
+ # Ignored; looks like docutils always emits UTF-8.
+ if (WIN32) {
+ my $cmd = join join(q{" "}, @_) . q{"|};
+ open my $fh, $cmd or die "Cannot fork: $!\n";
+ return $fh;
+ }
+
+ my $pid = open my $fh, '-|';
+ die "Cannot fork: $!\n" unless defined $pid;
+
+ if ($pid) {
+ # Parent process, return the file handle.
+ return $fh;
+ } else {
+ # Child process. Execute the commands.
+ exec @_ or die "Cannot exec $_[0]: $!\n";
+ # Not reached.
+ }
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Asciidoc - Asciidoc parser for Text::Markup
+
+=head1 Synopsis
+
+ use Text::Markup;
+ my $html = Text::Markup->new->parse(file => 'hello.adoc');
+
+=head1 Description
+
+This is the L<Asciidoc|http://www.methods.co.nz/asciidoc/> parser for
+L<Text::Markup>. It depends on the C<asciidoc> command-line application, for
+which there are many
+L<binary distributions|http://www.methods.co.nz/asciidoc/INSTALL.html>. It
+recognizes files with the following extensions as Asciidoc:
+
+=over
+
+=item F<.asciidoc>
+
+=item F<.asc>
+
+=item F<.adoc>
+
+=back
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2012-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Bbcode.pm b/lib/Text/Markup/Bbcode.pm
new file mode 100644
index 0000000..3f57997
--- /dev/null
+++ b/lib/Text/Markup/Bbcode.pm
@@ -0,0 +1,70 @@
+package Text::Markup::Bbcode;
+
+use 5.8.1;
+use strict;
+use File::BOM qw(open_bom);
+use Parse::BBCode;
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $parse = Parse::BBCode->new;
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = $parse->render(<$fh>);
+ return unless $html =~ /\S/;
+ utf8::encode($html);
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+$html
+</body>
+</html>
+};
+
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Bbcode - BBcode parser for Text::Markup
+
+=head1 Synopsis
+
+ my $html = Text::Markup->new->parse(file => 'file.bbcode');
+
+=head1 Description
+
+This is the L<BBcode|http://www.bbcode.org/> parser for L<Text::Markup>. It
+reads in the file (relying on a
+L<BOM|http://www.unicode.org/unicode/faq/utf_bom.html#BOM>), hands it off to
+L<Text::Markdown> for parsing, and then returns the generated HTML as an
+encoded UTF-8 string with an C<http-equiv="Content-Type"> element identifying
+the encoding as UTF-8.
+
+It recognizes files with the following extensions as Markdown:
+
+=over
+
+=item F<.bb>
+
+=item F<.bbcode>
+
+=back
+
+=head1 Author
+
+Lucas Kanashiro <kanashiro.duarte at gmail.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 Lucas Kanashiro. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Creole.pm b/lib/Text/Markup/Creole.pm
new file mode 100644
index 0000000..8670151
--- /dev/null
+++ b/lib/Text/Markup/Creole.pm
@@ -0,0 +1,67 @@
+package Text::Markup::Creole;
+
+use 5.8.1;
+use strict;
+use File::BOM qw(open_bom);
+use Text::WikiCreole;
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = creole_parse(<$fh>);
+ return unless $html =~ /\S/;
+ utf8::encode($html);
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+$html
+</body>
+</html>
+};
+
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Creole - Creole parser for Text::Markup
+
+=head1 Synopsis
+
+ my $html = Text::Markup->new->parse(file => 'file.creole');
+
+=head1 Description
+
+This is the L<Creole|http://www.wikicreole.org/> parser for L<Text::Markup>. It
+reads in the file (relying on a
+L<BOM|http://www.unicode.org/unicode/faq/utf_bom.html#BOM>), hands it off to
+L<Text::Markdown> for parsing, and then returns the generated HTML as an
+encoded UTF-8 string with an C<http-equiv="Content-Type"> element identifying
+the encoding as UTF-8.
+
+It recognizes files with the following extensions as Markdown:
+
+=over
+
+=item F<.creole>
+
+=back
+
+=head1 Author
+
+Lucas Kanashiro <kanashiro.duarte at gmail.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 Lucas Kanashiro. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/HTML.pm b/lib/Text/Markup/HTML.pm
new file mode 100644
index 0000000..6ce40ba
--- /dev/null
+++ b/lib/Text/Markup/HTML.pm
@@ -0,0 +1,60 @@
+package Text::Markup::HTML;
+
+use 5.8.1;
+use strict;
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $html = do {
+ open my $fh, '<:raw', $file or die "Cannot open $file: $!\n";
+ local $/;
+ <$fh>;
+ };
+ return $html =~ /\S/ ? $html : undef
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::HTML - HTML parser for Text::Markup
+
+=head1 Synopsis
+
+ use Text::Markup;
+ my $html = Text::Markup->new->parse(file => 'hello.html');
+
+=head1 Description
+
+This is the L<HTML|http://whatwg.org/html/> parser for L<Text::Markup>. All it
+does is read in the HTML file and return it as a string. It makes no
+assumptions about encoding, and returns the string raw as read from the file,
+with no decoding. It recognizes files with the following extensions as HTML:
+
+=over
+
+=item F<.html>
+
+=item F<.htm>
+
+=item F<.xhtml>
+
+=item F<.xhtm>
+
+=back
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Markdown.pm b/lib/Text/Markup/Markdown.pm
new file mode 100644
index 0000000..7048345
--- /dev/null
+++ b/lib/Text/Markup/Markdown.pm
@@ -0,0 +1,82 @@
+package Text::Markup::Markdown;
+
+use 5.8.1;
+use strict;
+use File::BOM qw(open_bom);
+use Text::Markdown ();
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $md = Text::Markdown->new(@{ $opts || [] });
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = $md->markdown(<$fh>);
+ return unless $html =~ /\S/;
+ utf8::encode($html);
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+$html
+</body>
+</html>
+};
+
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Markdown - Markdown parser for Text::Markup
+
+=head1 Synopsis
+
+ my $html = Text::Markup->new->parse(file => 'README.md');
+
+=head1 Description
+
+This is the L<Markdown|http://daringfireball.net/projects/markdown/> parser
+for L<Text::Markup>. It reads in the file (relying on a
+L<BOM|http://www.unicode.org/unicode/faq/utf_bom.html#BOM>), hands it off to
+L<Text::Markdown> for parsing, and then returns the generated HTML as an
+encoded UTF-8 string with an C<http-equiv="Content-Type"> element identifying
+the encoding as UTF-8.
+
+It recognizes files with the following extensions as Markdown:
+
+=over
+
+=item F<.md>
+
+=item F<.mkd>
+
+=item F<.mkdn>
+
+=item F<.mdown>
+
+=item F<.markdown>
+
+=back
+
+=head1 See Also
+
+L<National Funk Congress Deadlocked On Get Up/Get Down
+Issue|http://www.theonion.com/articles/national-funk-congress-deadlocked-on-get-upget-dow,625/>.
+MarkI<up> or MarkI<down>?
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Mediawiki.pm b/lib/Text/Markup/Mediawiki.pm
new file mode 100644
index 0000000..79a0e88
--- /dev/null
+++ b/lib/Text/Markup/Mediawiki.pm
@@ -0,0 +1,72 @@
+package Text::Markup::Mediawiki;
+
+use 5.8.1;
+use strict;
+use File::BOM qw(open_bom);
+use Text::MediawikiFormat '1.0';
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = Text::MediawikiFormat::format(<$fh>, @{ $opts || [] });
+ return unless $html =~ /\S/;
+ utf8::encode($html);
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+$html
+</body>
+</html>
+};
+
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Mediawiki - MediaWiki syntax parser for Text::Markup
+
+=head1 Synopsis
+
+ my $html = Text::Markup->new->parse(file => 'README.mediawiki');
+
+=head1 Description
+
+This is the L<MediaWiki
+syntax|http://en.wikipedia.org/wiki/Help:Contents/Editing_Wikipedia> parser
+for L<Text::Markup>. It reads in the file (relying on a
+L<BOM|http://www.unicode.org/unicode/faq/utf_bom.html#BOM>), hands it off to
+L<Text::MediawikiFormat> for parsing, and then returns the generated HTML as
+an encoded UTF-8 string with an C<http-equiv="Content-Type"> element
+identifying the encoding as UTF-8.
+
+It recognizes files with the following extensions as MediaWiki:
+
+=over
+
+=item F<.mediawiki>
+
+=item F<.mwiki>
+
+=item F<.wiki>
+
+=back
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Multimarkdown.pm b/lib/Text/Markup/Multimarkdown.pm
new file mode 100644
index 0000000..cdf6cdd
--- /dev/null
+++ b/lib/Text/Markup/Multimarkdown.pm
@@ -0,0 +1,76 @@
+package Text::Markup::Multimarkdown;
+
+use 5.8.1;
+use strict;
+use File::BOM qw(open_bom);
+use Text::MultiMarkdown ();
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $md = Text::MultiMarkdown->new(@{ $opts || [] });
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = $md->markdown(<$fh>);
+ return unless $html =~ /\S/;
+ utf8::encode($html);
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+$html
+</body>
+</html>
+};
+
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Multimarkdown - MultiMarkdown parser for Text::Markup
+
+=head1 Synopsis
+
+ my $html = Text::Markup->new->parse(file => 'README.md');
+
+=head1 Description
+
+This is the L<MultiMarkdown|http://fletcherpenney.net/multimarkdown/> parser
+for L<Text::Markup>. It reads in the file (relying on a
+L<BOM|http://www.unicode.org/unicode/faq/utf_bom.html#BOM>), hands it off to
+L<Text::MultiMarkdown> for parsing, and then returns the generated HTML as an
+encoded UTF-8 string with an C<http-equiv="Content-Type"> element identifying
+the encoding as UTF-8.
+
+It recognizes files with the following extensions as MultiMarkdown:
+
+=over
+
+=item F<.mmd>
+
+=item F<.mmkd>
+
+=item F<.mmkdn>
+
+=item F<.mmdown>
+
+=item F<.multimarkdown>
+
+=back
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/None.pm b/lib/Text/Markup/None.pm
new file mode 100644
index 0000000..8ce746e
--- /dev/null
+++ b/lib/Text/Markup/None.pm
@@ -0,0 +1,58 @@
+package Text::Markup::None;
+
+use 5.8.1;
+use strict;
+use HTML::Entities;
+use File::BOM qw(open_bom);
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = encode_entities(<$fh>, '<>&"');
+ utf8::encode($html);
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<pre>$html</pre>
+</body>
+</html>
+};
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::None - Turn a file with no known markup into HTML
+
+=head1 Synopsis
+
+ use Text::Markup;
+ my $html = Text::Markup->new->parse(file => 'README');
+
+=head1 Description
+
+This is the default parser used by Text::Markdown in the event that it cannot
+determine the format of a text file. All it does is read the file in (relying
+on a L<BOM|http://www.unicode.org/unicode/faq/utf_bom.html#BOM>, encodes all
+entities, and then returns an HTML string with the file in a C<< <pre> >>
+element. This will be handy for files that really are nothing but plain text,
+like F<README> files.
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Pod.pm b/lib/Text/Markup/Pod.pm
new file mode 100644
index 0000000..c64b518
--- /dev/null
+++ b/lib/Text/Markup/Pod.pm
@@ -0,0 +1,96 @@
+package Text::Markup::Pod;
+
+use 5.8.1;
+use strict;
+use Pod::Simple::XHTML '3.15';
+
+# Disable the use of HTML::Entities.
+$Pod::Simple::XHTML::HAS_HTML_ENTITIES = 0;
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $p = Pod::Simple::XHTML->new;
+ # Output everything as UTF-8.
+ $p->html_header_tags('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />');
+ $p->strip_verbatim_indent(sub {
+ my $lines = shift;
+ (my $i = $lines->[0]) =~ s/\S.*//;
+ return $i;
+ });
+ $p->output_string(\my $html);
+ # Want user supplied options to override even these default behaviors,
+ # if necessary
+ my $opt = $opts ? { @$opts } : {};
+ foreach my $method ( keys %$opt ) {
+ my $v = $opt->{$method};
+ $p->$method($v);
+ }
+ $p->parse_file($file);
+ return unless $p->content_seen;
+ utf8::encode($html);
+ return $html;
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Pod - Pod parser for Text::Markup
+
+=head1 Synopsis
+
+ use Text::Markup;
+ my $pod = Text::Markup->new->parse(file => 'README.pod');
+
+=head1 Description
+
+This is the L<Pod|perlpod> parser for L<Text::Markup>. It runs the file
+through L<Pod::Simple::XHTML> and returns the result. If the Pod contains any
+non-ASCII characters, the encoding must be declared either via a BOM or via
+the C<=encoding> tag. Text::Markup::Pod recognizes files with the following
+extensions as Pod:
+
+=over
+
+=item F<.pod>
+
+=item F<.pm>
+
+=item F<.pl>
+
+=back
+
+=head1 Options
+
+You may pass an arrayref of settings to this parser which changes the output returned. For example,
+to suppress an HTML header and footer, pass:
+
+ my $pod_fragment = Text::Markup->new->parse(
+ file => 'README.pod',
+ options => [
+ html_header => '',
+ html_footer => '',
+ ]
+ );
+
+This implementation makes method calls to the L<Pod::Simple::XHTML> parser using the key as the method
+name and the value as the parameter list to pass.
+
+See L<Pod::Simple::XHTML> and L<Pod::Simple> for the full list of options and inherited options
+which can be manipulated.
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Rest.pm b/lib/Text/Markup/Rest.pm
new file mode 100644
index 0000000..a513da9
--- /dev/null
+++ b/lib/Text/Markup/Rest.pm
@@ -0,0 +1,186 @@
+package Text::Markup::Rest;
+
+use 5.8.1;
+use strict;
+use File::Spec;
+use File::Basename ();
+use constant WIN32 => $^O eq 'MSWin32';
+use Symbol 'gensym';
+use IPC::Open3;
+
+our $VERSION = '0.22';
+
+# Find Python (process stolen from App::Info).
+my ($PYTHON, $RST2HTML);
+for my $exe (WIN32 ? 'python.exe' : 'python') {
+ my @path = (
+ File::Spec->path,
+ WIN32 ? (map { "C:\\Python$_" } '', 27, 26, 25) : ()
+ );
+
+ for my $p (@path) {
+ my $path = File::Spec->catfile($p, $exe);
+ next unless -f $path && -x $path;
+ $PYTHON = $path;
+ last;
+ }
+
+ unless ($PYTHON) {
+ use Carp;
+ my $sep = WIN32 ? ';' : ':';
+ Carp::croak(
+ "Cannot find $exe in path " . join $sep => @path
+ );
+ }
+
+ # We have python, let's find out if we have docutils.
+ my $output = gensym;
+ my $pid = open3 undef, $output, $output, $PYTHON, '-c', 'import docutils';
+ waitpid $pid, 0;
+ if ($?) {
+ use Carp;
+ local $/;
+ Carp::croak(
+ qq{Missing required Python "docutils" module\n},
+ <$output>
+ );
+ }
+
+ # We ship with our own rst2html that's lenient with unknown directives.
+ $RST2HTML = File::Spec->catfile(
+ File::Basename::dirname(__FILE__),
+ 'rst2html_lenient.py'
+ );
+
+ # Make sure it looks like it will work.
+ $pid = open3 undef, $output, $output, $PYTHON, $RST2HTML, '--test-patch';
+ waitpid $pid, 0;
+ if ($?) {
+ use Carp;
+ local $/;
+ Carp::croak(
+ qq{$RST2HTML will not execute\n},
+ <$output>
+ );
+ }
+}
+
+# Optional arguments to pass to rst2html
+my @OPTIONS = qw(
+ --no-raw
+ --no-file-insertion
+ --stylesheet=
+ --cloak-email-address
+ --no-generator
+ --quiet
+);
+
+# Options to improve rendering of Sphinx documents
+my @SPHINX_OPTIONS = qw(
+ --dir-ignore toctree
+ --dir-ignore highlight
+ --dir-ignore index
+ --dir-ignore default-domain
+
+ --dir-nested note
+ --dir-nested warning
+ --dir-nested versionadded
+ --dir-nested versionchanged
+ --dir-nested deprecated
+ --dir-nested seealso
+ --dir-nested hlist
+ --dir-nested glossary
+
+ --dir-notitle code-block
+
+ --dir-nested module
+ --dir-nested function
+ --output-encoding utf-8
+);
+# note: domains directive (last 2 options) incomplete
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $html = do {
+ my $fh = _fh(
+ $PYTHON, $RST2HTML,
+ @OPTIONS, @SPHINX_OPTIONS,
+ '--input-encoding', $encoding,
+ $file
+ );
+ local $/;
+ <$fh>;
+ };
+
+ # Make sure we have something.
+ return undef if $html =~ m{<div\s+class\s*=\s*(['"])document\1>\s+</div>}ms;
+
+ # Alas, --no-generator does not remove the generator meta tag. :-(
+ $html =~ s{^\s*<meta\s+name\s*=\s*(['"])generator\1[^>]+>\n}{}ms;
+
+ return $html;
+}
+
+# Stolen from SVN::Notify.
+sub _fh {
+ # Ignored; looks like docutils always emits UTF-8.
+ if (WIN32) {
+ my $cmd = join join(q{" "}, @_) . q{"|};
+ open my $fh, $cmd or die "Cannot fork: $!\n";
+ return $fh;
+ }
+
+ my $pid = open my $fh, '-|';
+ die "Cannot fork: $!\n" unless defined $pid;
+
+ if ($pid) {
+ # Parent process, return the file handle.
+ return $fh;
+ } else {
+ # Child process. Execute the commands.
+ exec @_ or die "Cannot exec $_[0]: $!\n";
+ # Not reached.
+ }
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Rest - reStructuredText parser for Text::Markup
+
+=head1 Synopsis
+
+ use Text::Markup;
+ my $html = Text::Markup->new->parse(file => 'hello.rst');
+
+=head1 Description
+
+This is the
+L<reStructuredText|http://docutils.sourceforge.net/docs/user/rst/quickref.html>
+parser for L<Text::Markup>. It depends on the C<docutils> Python package
+(which can be found as C<python-docutils> in many Linux distributions, or
+installed using the command C<easy_install docutils>). It recognizes files
+with the following extensions as reST:
+
+=over
+
+=item F<.rest>
+
+=item F<.rst>
+
+=back
+
+=head1 Author
+
+Daniele Varrazzo <daniele.varrazzo at gmail.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 Daniele Varrazzo. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Textile.pm b/lib/Text/Markup/Textile.pm
new file mode 100644
index 0000000..a01db85
--- /dev/null
+++ b/lib/Text/Markup/Textile.pm
@@ -0,0 +1,73 @@
+package Text::Markup::Textile;
+
+use 5.8.1;
+use strict;
+use File::BOM qw(open_bom);
+use Text::Textile '2.10';
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $textile = Text::Textile->new(
+ charset => 'utf-8',
+ char_encoding => 0,
+ trim_spaces => 1,
+ @{ $opts || [] }
+ );
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = $textile->process(<$fh>);
+ return unless $html =~ /\S/;
+ utf8::encode($html);
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+$html
+</body>
+</html>
+};
+
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Textile - Textile parser for Text::Markup
+
+=head1 Synopsis
+
+ my $html = Text::Markup->new->parse(file => 'README.textile');
+
+=head1 Description
+
+This is the L<Textile|http://www.textism.com/tools/textile/> parser for
+L<Text::Markup>. It reads in the file (relying on a
+L<BOM|http://www.unicode.org/unicode/faq/utf_bom.html#BOM>), hands it off to
+L<Text::Textile> for parsing, and then returns the generated HTML as an
+encoded UTF-8 string with an C<http-equiv="Content-Type"> element identifying
+the encoding as UTF-8.
+
+It recognizes files with the following extension as Textile:
+
+=over
+
+=item F<.textile>
+
+=back
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/Trac.pm b/lib/Text/Markup/Trac.pm
new file mode 100644
index 0000000..7b729f7
--- /dev/null
+++ b/lib/Text/Markup/Trac.pm
@@ -0,0 +1,71 @@
+package Text::Markup::Trac;
+
+use 5.8.1;
+use strict;
+use File::BOM qw(open_bom);
+use Text::Trac '0.10';
+
+our $VERSION = '0.22';
+
+sub parser {
+ my ($file, $encoding, $opts) = @_;
+ my $trac = Text::Trac->new(@{ $opts || [] });
+ open_bom my $fh, $file, ":encoding($encoding)";
+ local $/;
+ my $html = $trac->parse(<$fh>);
+ return unless $html =~ /\S/;
+ utf8::encode($html);
+ return qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+$html
+</body>
+</html>
+};
+
+}
+
+1;
+__END__
+
+=head1 Name
+
+Text::Markup::Trac - Trac wiki syntax parser for Text::Markup
+
+=head1 Synopsis
+
+ my $html = Text::Markup->new->parse(file => 'README.trac');
+
+=head1 Description
+
+This is the L<Trac wiki
+syntax|http://projects.edgewall.com/trac/wiki/WikiFormatting> parser for
+L<Text::Markup>. It reads in the file (relying on a
+L<BOM|http://www.unicode.org/unicode/faq/utf_bom.html#BOM>), hands it off to
+L<Text::Trac> for parsing, and then returns the generated HTML as an encoded
+UTF-8 string with an C<http-equiv="Content-Type"> element identifying the
+encoding as UTF-8.
+
+It recognizes files with the following extensions as Trac:
+
+=over
+
+=item F<.trac>
+
+=item F<.trc>
+
+=back
+
+=head1 Author
+
+David E. Wheeler <david at justatheory.com>
+
+=head1 Copyright and License
+
+Copyright (c) 2011-2014 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
+=cut
diff --git a/lib/Text/Markup/rst2html_lenient.py b/lib/Text/Markup/rst2html_lenient.py
new file mode 100755
index 0000000..7778d49
--- /dev/null
+++ b/lib/Text/Markup/rst2html_lenient.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python
+"""
+Parse a reST file into HTML in a very forgiving way.
+
+The script is meant to render specialized reST documents, such as Sphinx
+files, preserving the content, while not emulating the original rendering.
+
+The script is currently tested against docutils 0.7-0.10. Other versions may
+break it as it deals with the parser at a relatively low level. Use
+--test-patch to verify if the script works as expected with your library
+version.
+"""
+
+import sys
+
+import docutils
+from docutils import nodes, utils, SettingsSpec
+from docutils.core import publish_cmdline, publish_string, default_description
+from docutils.parsers.rst import Directive, directives, roles
+from docutils.writers.html4css1 import HTMLTranslator, Writer
+from docutils.parsers.rst.states import Body, Inliner
+from docutils.frontend import validate_boolean
+
+class any_directive(nodes.General, nodes.FixedTextElement):
+ """A generic directive to deal with any unknown directive we may find."""
+ pass
+
+class AnyDirective(Directive):
+ """A directive returning its unaltered body."""
+ optional_arguments = 100 # should suffice
+ has_content = True
+
+ def run(self):
+ if self.name in self.state.document.settings.dir_ignore:
+ return []
+
+ children = []
+
+ if self.name not in self.state.document.settings.dir_notitle:
+ children.append(nodes.strong(self.name, u"%s: " % self.name))
+ # keep the arguments, drop the options
+ for a in self.arguments:
+ if a.startswith(':') and a.endswith(':'):
+ break
+ children.append(nodes.emphasis(a, u"%s " % a))
+
+ if self.name in self.state.document.settings.dir_nested:
+ if self.content:
+ container = nodes.Element()
+ self.state.nested_parse(self.content, self.content_offset,
+ container)
+ children.extend(container.children)
+ else:
+ content = u'\n'.join(self.content)
+ children.append(nodes.literal_block(content, content))
+
+ node = any_directive(self.block_text, '', *children, dir_name=self.name)
+
+ return [node]
+
+
+class any_role(nodes.Inline, nodes.TextElement):
+ """A generic role to deal with any unknown role we may find."""
+ pass
+
+class AnyRole:
+ """A role to be rendered as a generic element with a specific class."""
+ def __init__(self, role_name):
+ self.role_name = role_name
+
+ def __call__(self, role, rawtext, text, lineno, inliner,
+ options={}, content=[]):
+ roles.set_classes(options)
+ options['role_name'] = self.role_name
+ node = any_role(rawtext, utils.unescape(text), **options)
+ return [node], []
+
+
+def catchall_directive(self, match, **option_presets):
+ """Directive dispatch method.
+
+ Replacement for Body.directive(): if a directive is not known, build one
+ on the fly instead of reporting an error.
+ """
+ type_name = match.group(1)
+ directive_class, messages = directives.directive(
+ type_name, self.memo.language, self.document)
+
+ # in case it's missing, register a generic directive
+ if not directive_class:
+ directives.register_directive(type_name, AnyDirective)
+ directive_class, messages = directives.directive(
+ type_name, self.memo.language, self.document)
+ assert directive_class, "can't find just defined directive"
+
+ self.parent += messages
+ return self.run_directive(
+ directive_class, match, type_name, option_presets)
+
+
+def catchall_interpreted(self, rawsource, text, role, lineno):
+ """Interpreted text role dispatch method.
+
+ Replacement for Inliner.interpreted(): if a role is not known, build one
+ on the fly instead of reporting an error.
+ """
+ role_fn, messages = roles.role(role, self.language, lineno,
+ self.reporter)
+ # in case it's missing, register a generic role
+ if not role_fn:
+ role_obj = AnyRole(role)
+ roles.register_canonical_role(role, role_obj)
+ role_fn, messages = roles.role(
+ role, self.language, lineno, self.reporter)
+ assert role_fn, "can't find just defined role"
+
+ nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
+ return nodes, messages + messages2
+
+
+def patch_docutils():
+ """Change the docutils parser behaviour."""
+ # Patch the constructs dispatch table
+ for i, (f, p) in enumerate(Body.explicit.constructs):
+ if f is Body.directive.im_func is f:
+ Body.explicit.constructs[i] = (catchall_directive, p)
+ break
+ else:
+ assert False, "can't find directive dispatch entry"
+
+ # Patch the parser so that when an unknown directive is found, a generic one
+ # is generated on the fly.
+ Body.directive = catchall_directive
+
+ # Patch the parser so that when an unknown interpreted text role is found,
+ # a generic one is generated on the fly.
+ Inliner.interpreted = catchall_interpreted
+
+
+class MyTranslator(HTMLTranslator):
+ """An HTML translator that can render with any_role/any_directive.
+ """
+ def visit_any_directive(self, node):
+ cls = node.get('dir_name')
+ cls = cls and 'directive-%s' % cls or 'directive'
+ self.body.append(self.starttag(node, 'div', CLASS=cls))
+
+ def depart_any_directive(self, node):
+ self.body.append('\n</div>\n')
+
+ def visit_any_role(self, node):
+ cls = node.get('role_name')
+ cls = cls and 'role-%s' % cls or 'role'
+ self.body.append(self.starttag(node, 'span', '', CLASS=cls))
+
+ def depart_any_role(self, node):
+ self.body.append('</span>')
+
+
+class LenientSettingsSpecs(SettingsSpec):
+ settings_spec = ("Lenient parsing options", None, (
+ ("Directive whose content should be interpreted as reST. "
+ "By default emit the content as unparsed text block. "
+ "Can be specified more than once",
+ ["--dir-nested"],
+ {'metavar': 'NAME', 'default': [], 'action': 'append'}),
+ ("Directive that should produce no output. "
+ "Can be specified more than once",
+ ["--dir-ignore"],
+ {'metavar': 'NAME', 'default': [], 'action': 'append'}),
+ ("Only emit the content of the directive, no title and options. "
+ "Can be specified more than once",
+ ["--dir-notitle"],
+ {'metavar': 'NAME', 'default': [], 'action': 'append'}),
+ ("Verify that lenient customization works fine. "
+ "Immediately return with 0 (success) or 1 (error). "
+ "In case of error, print a report on stdout.",
+ ['--test-patch'],
+ {'action': 'store_true', 'validator': validate_boolean}),
+ ))
+
+
+def main():
+
+ # Create a writer to deal with the generic element we may have created.
+ writer = Writer()
+ writer.translator_class = MyTranslator
+
+ description = (
+ 'Generates (X)HTML documents from standalone reStructuredText '
+ 'sources. Be forgiving against unknown elements. '
+ + default_description)
+
+ # the parser processes the settings too late: we want to decide earlier if
+ # we are running or testing.
+ if ('--test-patch' in sys.argv
+ and not ('-h' in sys.argv or '--help' in sys.argv)):
+ return test_patch(writer)
+
+ else:
+ # Make docutils lenient.
+ patch_docutils()
+
+ overrides = {
+ # If Pygments is missing, code-block directives are swallowed
+ # with Docutils >= 0.9.
+ 'syntax_highlight': 'none',
+
+ # not available on Docutils < 0.8 so can't pass as an option
+ 'math_output': 'HTML',
+ }
+
+ publish_cmdline(writer=writer, description=description,
+ settings_spec=LenientSettingsSpecs, settings_overrides=overrides)
+ return 0
+
+def test_patch(writer):
+ """Verify that patching docutils works as expected."""
+ TEST_SOURCE = """`
+Hello `role`:norole:
+
+.. nodirective::
+"""
+ rv = 0
+ problems = []
+ exc = None
+
+ # patch and use lenient docutils
+ try:
+ try:
+ patch_docutils()
+ except Exception, exc:
+ problems.append("error during library patching")
+ raise
+
+ try:
+ out = publish_string(TEST_SOURCE,
+ writer=writer, settings_spec=LenientSettingsSpecs)
+ except Exception, exc:
+ problems.append("error while running patched docutils")
+ raise
+
+ except:
+ pass
+
+ # verify conform output
+ else:
+ out = out.replace("'", '"')
+ if '<span class="role-norole">' not in out:
+ problems.append(
+ "unknown role didn't produce the expected output")
+
+ if '<div class="directive-nodirective">' not in out:
+ problems.append(
+ "unknown directive didn't produce the expected output")
+
+ # report problems if any
+ if problems:
+ rv = 1
+ print >> sys.stderr, "Patching docutils failed!"
+ for problem in problems:
+ print >> sys.stderr, "-", problem
+
+ if rv:
+ print >> sys.stderr, "\nVersions:", \
+ 'docutils:', docutils.__version__, docutils.__version_details__, \
+ '\nPython:', sys.version
+
+ if exc:
+ if '--traceback' in sys.argv:
+ print >> sys.stderr
+ import traceback
+ traceback.print_exc()
+ else:
+ print >> sys.stderr, \
+ "\nUse --traceback to display the error stack trace."
+
+ return rv
+
+if __name__ == '__main__':
+ sys.exit(main())
+
diff --git a/t/base.t b/t/base.t
new file mode 100644
index 0000000..a6660ff
--- /dev/null
+++ b/t/base.t
@@ -0,0 +1,156 @@
+#!/usr/bin/env perl -w
+
+use strict;
+use warnings;
+use Test::More tests => 25;
+#use Test::More 'no_plan';
+use File::Spec::Functions qw(catdir);
+use HTML::Entities;
+
+BEGIN { use_ok 'Text::Markup' or die; }
+
+can_ok 'Text::Markup' => qw(
+ register
+ formats
+ new
+ parse
+ default_format
+ _get_parser
+);
+
+# Find core parsers.
+my $dir = catdir qw(lib Text Markup);
+opendir my $dh, $dir or die "Cannot open diretory $dir: $!\n";
+my @core_parsers;
+while (my $f = readdir $dh) {
+ next if $f eq '.' || $f eq '..' || $f eq 'None.pm';
+ $f =~ s{[.]pm$}{} or next;
+ push @core_parsers => lc $f;
+}
+
+is_deeply [Text::Markup->formats], [sort @core_parsers],
+ 'Should have core parsers';
+
+# Register one.
+PARSER: {
+ package My::Cool::Parser;
+ use Text::Markup;
+ Text::Markup->register(cool => qr{cool});
+ sub parser {
+ return $_[2] ? $_[2]->[0] : 'hello';
+ }
+}
+
+is_deeply [Text::Markup->formats], [sort @core_parsers, 'cool'],
+ 'Should be now have the "cool" parser';
+
+my $parser = new_ok 'Text::Markup';
+is $parser->default_format, undef, 'Should have no default format';
+
+$parser = new_ok 'Text::Markup', [default_format => 'cool'];
+is $parser->default_format, 'cool', 'Should have default format';
+
+is $parser->_get_parser({ format => 'cool' }), My::Cool::Parser->can('parser'),
+ 'Should be able to find specific parser';
+
+is $parser->_get_parser({ file => 'foo' }), My::Cool::Parser->can('parser'),
+ 'Should be able to find default format parser';
+
+$parser->default_format(undef);
+is $parser->_get_parser({ file => 'foo'}), Text::Markup::None->can('parser'),
+ 'Should be find the specified default parser';
+
+# Now make it guess the format.
+$parser->default_format(undef);
+is $parser->_get_parser({ file => 'foo.cool'}),
+ My::Cool::Parser->can('parser'),
+ 'Should be able to guess the parser file the file name';
+
+# Now test guess_format.
+is $parser->guess_format('foo.cool'), 'cool',
+ 'Should guess "cool" format file "foo.cool"';
+is $parser->guess_format('foocool'), undef,
+ 'Should not guess "cool" format file "foocool"';
+is $parser->guess_format('foo.cool.txt'), undef,
+ 'Should not guess "cool" format file "foo.cool.txt"';
+
+# Add another parser.
+PARSER: {
+ package My::Funky::Parser;
+ Text::Markup->register(funky => qr{funky(?:[.]txt)?});
+ sub parser {
+ # Must return a UTF-8 encoded string.
+ use utf8;
+ my $ret = 'fünky';
+ utf8::encode($ret);
+ return $ret;
+ }
+}
+
+is_deeply [Text::Markup->formats], [sort @core_parsers, qw(cool funky)],
+ 'Should be now have the "cool" and "funky" parsers';
+is $parser->guess_format('foo.cool'), 'cool',
+ 'Should still guess "cool" format file "foo.cool"';
+is $parser->guess_format('foo.funky'), 'funky',
+ 'Should guess "funky" format file "foo.funky"';
+is $parser->guess_format('foo.funky.txt'), 'funky',
+ 'Should guess "funky" format file "foo.funky.txt"';
+
+# Now try parsing.
+is $parser->parse(
+ file => 'README.md',
+ format => 'cool',
+), 'hello', 'Test the "cool" parser';
+
+# Send output to a file.
+is $parser->parse(
+ file => 'README.md',
+ format => 'funky',
+), 'fünky', 'Test the "funky" parser';
+
+# Test opts to the parser.
+is $parser->parse(
+ file => 'README.md',
+ format => 'cool',
+ options => ['goodbye'],
+), 'goodbye', 'Test the "cool" parser with options';
+
+my $pod_dir = catdir (qw(t markups));
+
+like $parser->parse(
+ file => "$pod_dir/pod.txt",
+ format => "pod",
+ options => [
+ html_header => '',
+ ],
+ ), qr|</html>|, 'Test pod option to suppress HTML header';
+
+unlike $parser->parse(
+ file => "$pod_dir/pod.txt",
+ format => "pod",
+ options => [
+ html_header => '',
+ html_footer => '',
+ ],
+ ), qr|</html>|, 'Test pod options to suppress HTML header and footer';
+
+# Test the "none" parser.
+my $output = do {
+ my $f = __FILE__;
+ open my $fh, '<:utf8', $f or die "Cannot open $f: $!\n";
+ local $/;
+ my $html = encode_entities(<$fh>, '<>&"');
+ utf8::encode($html);
+ qq{<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<pre>$html</pre>
+</body>
+</html>
+};
+};
+$parser->default_format(undef);
+is $parser->parse(
+ file => __FILE__,
+), $output, 'Test the "none" parser';
diff --git a/t/empty.txt b/t/empty.txt
new file mode 100644
index 0000000..139597f
--- /dev/null
+++ b/t/empty.txt
@@ -0,0 +1,2 @@
+
+
diff --git a/t/formats.t b/t/formats.t
new file mode 100644
index 0000000..58c503d
--- /dev/null
+++ b/t/formats.t
@@ -0,0 +1,79 @@
+#!/usr/bin/env perl -w
+
+use strict;
+use warnings;
+use Test::More 0.96;
+use File::Spec::Functions qw(catfile);
+use Carp;
+
+# Need to have at least one test outside subtests, in case no subtests are run
+# at all. So it might as well be this.
+BEGIN { use_ok 'Text::Markup' or die; }
+
+sub slurp($$) {
+ my ($filter, $file) = @_;
+ $filter ||= sub { shift };
+ open my $fh, '<:raw', $file or die "Cannot open $file: $!\n";
+ local $/;
+ return $filter->(<$fh>);
+}
+
+my %filter_for = (
+ mediawiki => sub {
+ $_[0] =~ s/ö/CGI::escapeHTML(do { use utf8; 'ö' })/e
+ if eval { CGI->VERSION >= 4.11 };
+ return shift;
+ },
+);
+
+my @loaded = Text::Markup->formats;
+while (my $data = <DATA>) {
+ next if $data =~ /^#/;
+ chomp $data;
+ my ($format, $module, $req, @exts) = split /,/ => $data;
+ subtest "Testing $format format" => sub {
+ local $@;
+ eval "use $req; 1;" if $req;
+ plan skip_all => "$module not loading" if $@;
+ plan tests => @exts + 5;
+ use_ok $module or next;
+
+ push @loaded => $format unless grep { $_ eq $format } @loaded;
+ is_deeply [Text::Markup->formats], \@loaded,
+ "$format should be loaded";
+
+ my $parser = new_ok 'Text::Markup';
+ for my $ext (@exts) {
+ is $parser->guess_format("foo.$ext"), $format,
+ "Should guess that .$ext extension is $format";
+ }
+
+ my $expect = slurp $filter_for{$format}, catfile('t', 'html', "$format.html");
+ is $parser->parse(
+ file => catfile('t', 'markups', "$format.txt"),
+ format => $format,
+ ), $expect, "Parse $format file";
+
+ is $parser->parse(
+ file => catfile('t', 'empty.txt'),
+ format => $format,
+ ), undef, "Parse empty $format file";
+
+ }
+}
+
+done_testing;
+
+__DATA__
+# Format,Format Module,Required Module,extensions
+markdown,Text::Markup::Markdown,Text::Markdown 1.000004,md,mkdn,mkd,mdown,markdown
+html,Text::Markup::HTML,,html,htm,xhtml,xhtm
+pod,Text::Markup::Pod,Pod::Simple::XHTML 3.15,pod,pm,pl
+trac,Text::Markup::Trac,Text::Trac 0.10,trac,trc
+textile,Text::Markup::Textile,Text::Textile 2.10,textile
+mediawiki,Text::Markup::Mediawiki,Text::MediawikiFormat 1.0,wiki,mwiki,mediawiki
+multimarkdown,Text::Markup::Multimarkdown,Text::MultiMarkdown 1.000033,mmd,mmkdn,mmkd,mmdown,mmarkdown
+rest,Text::Markup::Rest,Text::Markup::Rest,rest,rst
+asciidoc,Text::Markup::Asciidoc,Text::Markup::Asciidoc,asciidoc,asc,adoc
+bbcode,Text::Markup::Bbcode,Parse::BBCode,bbcode,bb
+creole,Text::Markup::Creole,Text::WikiCreole,creole
diff --git a/t/html/asciidoc.html b/t/html/asciidoc.html
new file mode 100644
index 0000000..c8fd814
--- /dev/null
+++ b/t/html/asciidoc.html
@@ -0,0 +1,22 @@
+<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<div class="paragraph"><p>start paragraph</p></div>
+<div class="paragraph"><p>another paragraph</p></div>
+<div class="ulist"><ul>
+<li>
+<p>
+list of things with <a href="http://www.jerakeen.org">ürls</a> in
+</p>
+</li>
+<li>
+<p>
+more things in the list
+</p>
+</li>
+</ul></div>
+<div class="paragraph"><p>a <a href="http://bare.url.here">http://bare.url.here</a>. and an <a href="mailto:email at address.com">email at address.com</a></p></div>
+
+</body>
+</html>
diff --git a/t/html/bbcode.html b/t/html/bbcode.html
new file mode 100644
index 0000000..68e5dbd
--- /dev/null
+++ b/t/html/bbcode.html
@@ -0,0 +1,12 @@
+<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<b>BBcode Test File</b><br>
+<br>
+This file tests <i>BBcode</i>.<br>
+<br>
+<ul><li> BBcode</li><li> Test</li><li> File</li></ul><br>
+
+</body>
+</html>
diff --git a/t/html/creole.html b/t/html/creole.html
new file mode 100644
index 0000000..b0de8b4
--- /dev/null
+++ b/t/html/creole.html
@@ -0,0 +1,18 @@
+<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<p><strong>Creole Test File</strong></p>
+
+<p>This file tests <em>Creole markup language</em>.</p>
+
+<ul>
+ <li>Creole
+</li>
+ <li>Test
+</li>
+ <li>File</li>
+</ul>
+
+</body>
+</html>
diff --git a/t/html/html.html b/t/html/html.html
new file mode 100644
index 0000000..4b46b2e
--- /dev/null
+++ b/t/html/html.html
@@ -0,0 +1,14 @@
+<!DOCTYPE
+ html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >
+ <head>
+ <meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
+ <title>Hi There</title>
+ </head>
+ <body>
+ <h1>The Header</h1>
+ <p>This is the body. I mean a paragraph in the body.</p>
+ </body>
+</html>
+
diff --git a/t/html/markdown.html b/t/html/markdown.html
new file mode 100644
index 0000000..a463729
--- /dev/null
+++ b/t/html/markdown.html
@@ -0,0 +1,12 @@
+<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<h1>Markdown Test File</h1>
+
+<p>This file tests the <a href="http://daringfireball.net/projects/markdown/">Markdown</a>
+parser — which is powered by <a href="http://p3rl/Text::Markdown">Text::Markdown</a>.
+Öy.</p>
+
+</body>
+</html>
diff --git a/t/html/mediawiki.html b/t/html/mediawiki.html
new file mode 100644
index 0000000..38664c0
--- /dev/null
+++ b/t/html/mediawiki.html
@@ -0,0 +1,20 @@
+<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<a name='heading'></a><h1>heading</h1>
+
+
+<p> * unordered item
+
+ 1. ordered item
+</p>
+
+<p> some code
+</p>
+
+<p>a normal paragraph, yö.
+</p>
+
+</body>
+</html>
diff --git a/t/html/multimarkdown.html b/t/html/multimarkdown.html
new file mode 100644
index 0000000..0f2537c
--- /dev/null
+++ b/t/html/multimarkdown.html
@@ -0,0 +1,21 @@
+<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<h1 id="markdowntestfile">Markdown Test File</h1>
+
+<p>This file tests the <a href="http://fletcherpenney.net/multimarkdown/">MultiMarkdown</a>
+parser — which is powered by
+<a href="http://p3rl/Text::MultiMarkdown">Text::MultiMarkdown</a>. Öy.<a href="#fn:1" id="fnref:1" class="footnote">1</a></p>
+
+<div class="footnotes">
+<hr />
+<ol>
+
+<li id="fn:1"><p>Yes, you heard right.<a href="#fnref:1" class="reversefootnote"> ↩</a></p></li>
+
+</ol>
+</div>
+
+</body>
+</html>
diff --git a/t/html/pod.html b/t/html/pod.html
new file mode 100644
index 0000000..c34a4f4
--- /dev/null
+++ b/t/html/pod.html
@@ -0,0 +1,33 @@
+
+<html>
+<head>
+<title></title>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+
+
+<h1 id="Title">Title</h1>
+
+<p>This is a Pod document</p>
+
+<h1 id="Description">Description</h1>
+
+<p>This is where we have a précis describing stuff, you know?</p>
+
+<p>And now, for some Japanese.</p>
+
+<p>萎衣謂違遺医井亥域育郁磯一壱溢逸稲茨芋鰯允印咽員因姻引飲淫胤蔭</p>
+
+<p>Now some verbatim text:</p>
+
+<pre><code>-- Provide a comment
+SELECT *
+ FROM users
+ WHERE nickname = 'theory';</code></pre>
+
+<p>The end.</p>
+
+</body>
+</html>
+
diff --git a/t/html/rest.html b/t/html/rest.html
new file mode 100644
index 0000000..5516aa6
--- /dev/null
+++ b/t/html/rest.html
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<title>I am a reST document</title>
+
+</head>
+<body>
+<div class="document" id="i-am-a-rest-document">
+<h1 class="title">I am a reST document</h1>
+<h2 class="subtitle" id="and-i-am-its-subtitle">And I am its subtitle</h2>
+
+<p><a class="reference external" href="http://docutils.sourceforge.net/">Rest</a> allows <em>inline</em> <strong>style</strong> and other stuff,
+but I guess this <tt class="docutils literal">code</tt> is enough.</p>
+<div class="directive-unknown">
+<strong>unknown: </strong><em>arg1 </em><em>arg2 </em><pre class="literal-block">
+We also support unknown directive, so that no content is lost if reST
+specialization is parsed.
+</pre>
+
+</div>
+<p>Of course we may also find <span class="role-unknown">new roles</span>.</p>
+<div class="directive-module">
+<strong>module: </strong><em>mymodule </em><p>This is a module that doesn't <em>exist</em></p>
+<div class="directive-function">
+<strong>function: </strong><em>foo() </em><p>In Sphinx documentation you could find this stuff.</p>
+
+</div>
+
+</div>
+<pre class="code python literal-block">
+print "I have no title"
+</pre>
+</div>
+</body>
+</html>
diff --git a/t/html/textile.html b/t/html/textile.html
new file mode 100644
index 0000000..70fb025
--- /dev/null
+++ b/t/html/textile.html
@@ -0,0 +1,16 @@
+<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<p>start paragraph</p>
+
+<p>another paragraph</p>
+
+<ul>
+<li>list of things with <a href="http://www.jerakeen.org">ürls</a> in</li>
+<li>more things in the list</li>
+</ul>
+
+<p>a http://bare.url.here. and an email at address.com</p>
+</body>
+</html>
diff --git a/t/html/trac.html b/t/html/trac.html
new file mode 100644
index 0000000..d603332
--- /dev/null
+++ b/t/html/trac.html
@@ -0,0 +1,17 @@
+<html>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<h1 id="TracMarkup">Trac Markup</h1>
+<p>
+This should contain <i>Trac</i> markup. Öy.
+</p>
+<ul><li>Item 1
+<ul><li>Item 1.1
+<ul><li>Item 1.1.1
+</li><li>Item 1.1.2
+</li><li>Item 1.1.3
+</li></ul></li><li>Item 1.2
+</li></ul></li><li>Item 2</li></ul>
+</body>
+</html>
diff --git a/t/markups/asciidoc.txt b/t/markups/asciidoc.txt
new file mode 100644
index 0000000..eee5cdd
--- /dev/null
+++ b/t/markups/asciidoc.txt
@@ -0,0 +1,8 @@
+start paragraph
+
+another paragraph
+
+- list of things with http://www.jerakeen.org[ürls] in
+- more things in the list
+
+a http://bare.url.here. and an email at address.com
diff --git a/t/markups/bbcode.txt b/t/markups/bbcode.txt
new file mode 100644
index 0000000..77fe2b2
--- /dev/null
+++ b/t/markups/bbcode.txt
@@ -0,0 +1,10 @@
+[b]BBcode Test File[/b]
+
+This file tests [i]BBcode[/i].
+
+[list]
+[*] BBcode
+[*] Test
+[*] File
+[/list]
+
diff --git a/t/markups/creole.txt b/t/markups/creole.txt
new file mode 100644
index 0000000..2f8d6dc
--- /dev/null
+++ b/t/markups/creole.txt
@@ -0,0 +1,8 @@
+**Creole Test File**
+
+This file tests //Creole markup language//.
+
+* Creole
+* Test
+* File
+
diff --git a/t/markups/html.txt b/t/markups/html.txt
new file mode 100644
index 0000000..4b46b2e
--- /dev/null
+++ b/t/markups/html.txt
@@ -0,0 +1,14 @@
+<!DOCTYPE
+ html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >
+ <head>
+ <meta http-equiv="Content-type" content="text/html;charset=UTF-8" />
+ <title>Hi There</title>
+ </head>
+ <body>
+ <h1>The Header</h1>
+ <p>This is the body. I mean a paragraph in the body.</p>
+ </body>
+</html>
+
diff --git a/t/markups/markdown.txt b/t/markups/markdown.txt
new file mode 100644
index 0000000..061be6c
--- /dev/null
+++ b/t/markups/markdown.txt
@@ -0,0 +1,6 @@
+Markdown Test File
+==================
+
+This file tests the [Markdown](http://daringfireball.net/projects/markdown/)
+parser — which is powered by [Text::Markdown](http://p3rl/Text::Markdown).
+Öy.
\ No newline at end of file
diff --git a/t/markups/mediawiki.txt b/t/markups/mediawiki.txt
new file mode 100644
index 0000000..1d51160
--- /dev/null
+++ b/t/markups/mediawiki.txt
@@ -0,0 +1,9 @@
+= heading =
+
+ * unordered item
+ 1. ordered item
+
+ some code
+
+a normal paragraph, yö.
+
diff --git a/t/markups/multimarkdown.txt b/t/markups/multimarkdown.txt
new file mode 100644
index 0000000..06b9b56
--- /dev/null
+++ b/t/markups/multimarkdown.txt
@@ -0,0 +1,9 @@
+Markdown Test File
+==================
+
+This file tests the [MultiMarkdown](http://fletcherpenney.net/multimarkdown/)
+parser — which is powered by
+[Text::MultiMarkdown](http://p3rl/Text::MultiMarkdown). Öy.[^1]
+
+[^1]: Yes, you heard right.
+
diff --git a/t/markups/pod.txt b/t/markups/pod.txt
new file mode 100644
index 0000000..90dd363
--- /dev/null
+++ b/t/markups/pod.txt
@@ -0,0 +1,22 @@
+=encoding UTF-8
+
+=head1 Title
+
+This is a Pod document
+
+=head1 Description
+
+This is where we have a précis describing stuff, you know?
+
+And now, for some Japanese.
+
+萎衣謂違遺医井亥域育郁磯一壱溢逸稲茨芋鰯允印咽員因姻引飲淫胤蔭
+
+Now some verbatim text:
+
+ -- Provide a comment
+ SELECT *
+ FROM users
+ WHERE nickname = 'theory';
+
+The end.
diff --git a/t/markups/rest.txt b/t/markups/rest.txt
new file mode 100644
index 0000000..e75a649
--- /dev/null
+++ b/t/markups/rest.txt
@@ -0,0 +1,37 @@
+=====================
+I am a reST document
+=====================
+And I am its subtitle
+=====================
+
+Rest__ allows *inline* **style** and other stuff,
+but I guess this ``code`` is enough.
+
+.. __: http://docutils.sourceforge.net/
+
+.. unknown:: arg1 arg2
+ :option: foo
+
+ We also support unknown directive, so that no content is lost if reST
+ specialization is parsed.
+
+Of course we may also find :unknown:`new roles`.
+
+.. module:: mymodule
+
+ This is a module that doesn't *exist*
+
+ .. function:: foo()
+
+ In Sphinx documentation you could find this stuff.
+
+.. toctree::
+
+ we
+ dont
+ care
+
+.. code-block:: python
+
+ print "I have no title"
+
diff --git a/t/markups/textile.txt b/t/markups/textile.txt
new file mode 100644
index 0000000..d5fb9da
--- /dev/null
+++ b/t/markups/textile.txt
@@ -0,0 +1,8 @@
+start paragraph
+
+another paragraph
+
+* list of things with "ürls":http://www.jerakeen.org in
+* more things in the list
+
+a http://bare.url.here. and an email at address.com
diff --git a/t/markups/trac.txt b/t/markups/trac.txt
new file mode 100644
index 0000000..5e4cb9e
--- /dev/null
+++ b/t/markups/trac.txt
@@ -0,0 +1,11 @@
+= Trac Markup =
+
+This should contain ''Trac'' markup. Öy.
+
+ * Item 1
+ * Item 1.1
+ * Item 1.1.1
+ * Item 1.1.2
+ * Item 1.1.3
+ * Item 1.2
+ * Item 2
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libtext-markup-perl.git
More information about the Pkg-perl-cvs-commits
mailing list