r20111 - in /branches/upstream/libhtml-wikiconverter-perl/current: Changes META.yml README lib/HTML/WikiConverter.pm lib/HTML/WikiConverter/Dialects.pod t/pod-coverage.t

rmayorga-guest at users.alioth.debian.org rmayorga-guest at users.alioth.debian.org
Sun May 18 23:35:45 UTC 2008


Author: rmayorga-guest
Date: Sun May 18 23:35:45 2008
New Revision: 20111

URL: http://svn.debian.org/wsvn/?sc=1&rev=20111
Log:
[svn-upgrade] Integrating new upstream version, libhtml-wikiconverter-perl (0.62)

Modified:
    branches/upstream/libhtml-wikiconverter-perl/current/Changes
    branches/upstream/libhtml-wikiconverter-perl/current/META.yml
    branches/upstream/libhtml-wikiconverter-perl/current/README
    branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter.pm
    branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter/Dialects.pod
    branches/upstream/libhtml-wikiconverter-perl/current/t/pod-coverage.t

Modified: branches/upstream/libhtml-wikiconverter-perl/current/Changes
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-wikiconverter-perl/current/Changes?rev=20111&op=diff
==============================================================================
--- branches/upstream/libhtml-wikiconverter-perl/current/Changes (original)
+++ branches/upstream/libhtml-wikiconverter-perl/current/Changes Sun May 18 23:35:45 2008
@@ -1,4 +1,11 @@
 # Change log for HTML::WikiConverter
+
+version: 0.62
+date:    ???
+changes:
+  - blocked elements are not blocked if they are contained within a blocked element
+    (with much thanks to Dominick Bellizzi for the patch)
+  - rules_for_tag() now dereferences the 'alias' subrule
 
 version: 0.61
 date:   Fri Jul 21 10:00:00 EST 2006

Modified: branches/upstream/libhtml-wikiconverter-perl/current/META.yml
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-wikiconverter-perl/current/META.yml?rev=20111&op=diff
==============================================================================
--- branches/upstream/libhtml-wikiconverter-perl/current/META.yml (original)
+++ branches/upstream/libhtml-wikiconverter-perl/current/META.yml Sun May 18 23:35:45 2008
@@ -1,7 +1,7 @@
 # http://module-build.sourceforge.net/META-spec.html
 #XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
 name:         HTML-WikiConverter
-version:      0.61
+version:      0.62
 version_from: lib/HTML/WikiConverter.pm
 installdirs:  site
 requires:

Modified: branches/upstream/libhtml-wikiconverter-perl/current/README
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-wikiconverter-perl/current/README?rev=20111&op=diff
==============================================================================
--- branches/upstream/libhtml-wikiconverter-perl/current/README (original)
+++ branches/upstream/libhtml-wikiconverter-perl/current/README Sun May 18 23:35:45 2008
@@ -1,4 +1,4 @@
-HTML::WikiConverter version 0.61
+HTML::WikiConverter version 0.62
 ================================
 
 HTML::WikiConverter is an HTML to wiki converter capable of converting
@@ -68,6 +68,12 @@
   * HTML::Element
   * CSS
 
+CHANGES IN VERSION 0.62
+
+  * Blocked elements are not blocked if they are contained within a
+    blocked element (with much thanks to Dominick Bellizzi for the
+    patch)
+
 CHANGES IN VERSION 0.61
 
   * (bug #20607) Add missing CSS and HTML::Element prerequisites

Modified: branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter.pm
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter.pm?rev=20111&op=diff
==============================================================================
--- branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter.pm (original)
+++ branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter.pm Sun May 18 23:35:45 2008
@@ -15,7 +15,7 @@
 use URI::Escape;
 use URI;
 
-our $VERSION = '0.61';
+our $VERSION = '0.62';
 our $AUTOLOAD;
 
 =head1 NAME
@@ -26,30 +26,26 @@
 
   use HTML::WikiConverter;
   my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' );
-
-  # Provide HTML directly
-  print $wc->html2wiki( $html );
-
-  # ...which is the same as
-  print $wc->html2wiki( html => $html );
-
-  # Or fetch it from a file
-  print $wc->html2wiki( file => $path );
-
-  # ...slurp it all at once rather than parsing incrementally
-  print $wc->html2wiki( file => $path, slurp => 1 );
-
-  # Or from a URI
-  print $wc->html2wiki( uri => $uri );
-
-  # Get a list of installed dialects
+  print $wc->html2wiki( html => '<b>text</b>' ), "\n\n";
+
+  # A more complete example
+
+  my $html = qq(
+    <p><i>Italic</i>, <b>bold</b>, <span style="font-weight:bold">also bold</span>, etc.</p>
+  );
+
   my @dialects = HTML::WikiConverter->available_dialects;
+  foreach my $dialect ( @dialects ) {
+    my $wc = new HTML::WikiConverter( dialect => $dialect );
+    my $wiki = $wc->html2wiki( html => $html );
+    printf "The %s dialect gives:\n\n%s\n\n", $dialect, $wiki;
+  }
 
 =head1 DESCRIPTION
 
-C<HTML::WikiConverter> is an HTML to wiki converter. It can convert HTML
-source into a variety of wiki markups, called wiki "dialects". The following
-dialects are supported:
+C<HTML::WikiConverter> is an HTML to wiki converter. It can convert
+HTML source into a variety of wiki markups, called wiki
+"dialects". The following dialects are supported:
 
   DokuWiki
   Kwiki
@@ -244,7 +240,10 @@
   # Preprocess, save tree and parsed HTML
   $self->__root( $tree );
   $self->__preprocess_tree();
-  $self->parsed_html( $tree->as_HTML(undef, '  ') );
+
+  $self->__root->deobjectify_text();
+  $self->parsed_html( $tree->as_HTML(undef, '  ', {}) );
+  $self->__root->objectify_text();
 
   # Convert and preprocess
   my $output = $self->__wikify($tree);
@@ -277,7 +276,6 @@
     return '<!--' . $node->attr('text') . '-->';
   } else {
     my $rules = $self->rules_for_tag( $node->tag );
-    $rules = $self->__rules->{$rules->{alias}} if $rules->{alias};
 
     return $self->__subst($rules->{replace}, $node, $rules) if exists $rules->{replace};
 
@@ -320,13 +318,21 @@
     $output = $output.$self->__subst($rules->{end}, $node, $rules) if $rules->{end};
     
     # Nested block elements themselves are not blocked...
-    $output = "\n\n$output\n\n" if $rules->{block} && ! $node->parent->look_up( _tag => $node->tag );
+    $output = "\n\n$output\n\n" if $rules->{block} && ! $self->elem_within_block($node);
 
     # ...but they are put on their own line
     $output = "\n$output" if $rules->{block} and $node->parent->look_up( _tag => $node->tag ) and $trim ne 'none';
 
     return $output;
   }
+}
+
+sub elem_within_block {
+  my( $self, $node ) = @_;
+  foreach my $p ( $node->lineage ) {
+    return 1 if $self->rules_for_tag($p->tag || '')->{block};
+  }
+  return 0;
 }
 
 sub __subst {
@@ -409,7 +415,7 @@
   my( $self, $uri ) = @_;
   my $ua = $self->__user_agent;
   my $res = $ua->get($uri);
-  croak sprintf "request for <$uri> failed with status %s", $res->status unless $res->is_success;
+  croak "request for <$uri> failed" unless $res->is_success;
   my $encoding = $self->encoding || $self->__guess_encoding($res) || 'utf-8';
   my $html = encode( $self->encoding, decode( $encoding, $res->content ) );
   return $html;
@@ -652,9 +658,11 @@
 
   my $parsed_html = $wc->parsed_html;
 
-Returns L<HTML::TreeBuilder>'s string representation of the
-last-parsed syntax tree, showing how the input HTML was parsed
-internally. Also useful for debugging.
+Returns a string containing the post-processed HTML from the last
+C<html2wiki> call. Post-processing includes parsing by
+L<HTML::TreeBuilder>, CSS normalization by
+L<HTML::WikiConverter::Normalizer>, and calls to the C<preprocess> and
+C<preprocess_tree> dialect methods.
 
 =cut
 
@@ -689,12 +697,18 @@
   my $rules = $wc->rules_for_tag( $tag );
 
 Returns the rules that will be used for converting elements of the
-given tag. Note that the rules used for a particular tag may depend on
-the current set of attributes being used.
+given tag. Follows C<alias> references. Note that the rules used for a
+particular tag may depend on the current set of attributes being used.
 
 =cut
 
 sub rules_for_tag {
+  my( $self, $tag ) = @_;
+  my $rules = $self->__rules_for_tag($tag);
+  return $rules->{alias} ? $self->__rules_for_tag( $rules->{alias} ) : $rules;
+}
+
+sub __rules_for_tag {
   my( $self, $tag ) = @_;
   return $self->__rules->{$tag} if $self->__rules->{$tag};
   return $self->__rules->{UNKNOWN} if $self->__rules->{UNKNOWN} and !$isKnownTag{$tag};

Modified: branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter/Dialects.pod
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter/Dialects.pod?rev=20111&op=diff
==============================================================================
--- branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter/Dialects.pod (original)
+++ branches/upstream/libhtml-wikiconverter-perl/current/lib/HTML/WikiConverter/Dialects.pod Sun May 18 23:35:45 2008
@@ -7,13 +7,15 @@
   # In your dialect module:
 
   package HTML::WikiConverter::MySlimWiki;
-  use HTML::WikiConverter -dialect;
-
-  rule b => { start => '**', end => '**' };
-  rule i => { start => '//', end => '//' };
-  rule strong => { alias => 'b' };
-  rule em => { alias => 'i' };
-  rule hr => { replace => "\n----\n" };
+  use base 'HTML::WikiConverter';
+
+  sub rules { {
+    b => { start => '**', end => '**' },
+    i => { start => '//', end => '//' },
+    strong => { alias => 'b' },
+    em => { alias => 'i' },
+    hr => { replace => "\n----\n" }
+  } }
 
   # In a nearby piece of code:
 
@@ -69,16 +71,16 @@
 Dialects guide H::WC's conversion process with a set of rules that
 define how HTML elements are turned into their wiki counterparts.
 Each rule corresponds to an HTML tag (including nonstandard tags), and
-there may be any number of rules. Rules are added with the C<rule()>
-function that was imported when you subclassed H::WC (see above).
-
-The syntax for C<rule()> is as follows:
-
-    rule $tag => \%subrules;
-
-where C<$tag> is the name of the HTML tag (e.g., C<"b">, C<"em">, etc.)
-and C<%subrules> contains subrules that specify how that tag will be
-converted.
+there may be any number of rules. Rules are specified in your
+dialect's C<rules()> method, which returns a reference to a hash of
+rules. Each entry in the hash maps a tag name to a set of subrules,
+as in:
+
+    $tag => \%subrules
+
+where C<$tag> is the name of the HTML tag (e.g., C<"b">, C<"em">,
+etc.)  and C<%subrules> contains subrules that specify how that tag
+will be converted when it is encountered in the HTML input.
 
 =head3 Subrules
 
@@ -105,19 +107,21 @@
 The following rules could be used for a dialect that uses
 C<*asterisks*> for bold and C<_underscores_> for italic text:
 
-  rule b => { start => '*', end => '*' };
-  rule i => { start => '_', end => '_' };
+  sub rules {
+    b => { start => '*', end => '*' },
+    i => { start => '_', end => '_' },
+  }
+
+(For brevity, this documentation will use a shorthand notation to
+describe rules.)
 
 =head3 Aliases
 
 To add C<E<lt>strongE<gt>> and C<E<lt>emE<gt>> as aliases of C<E<lt>bE<gt>> and
 C<E<lt>iE<gt>>, use the C<alias> subrule:
 
-  rule b => { start => '*', end => '*' };
-  rule i => { start => '_', end => '_' };
-
-  rule strong => { alias => 'b' };
-  rule em => { alias => 'i' };
+  strong => { alias => 'b' },
+  em => { alias => 'i' },
 
 (The C<alias> subrule cannot be used with any other subrule.)
 
@@ -126,7 +130,7 @@
 Many dialects separate paragraphs and other block-level elements
 with a blank line. To indicate this, use the C<block> subrule:
 
-  rule p => { block => 1 };
+  p => { block => 1 },
 
 (To better support nested block elements, if a block elements are
 nested inside each other, blank lines are only added to the outermost
@@ -147,7 +151,7 @@
 then use C<"blocks">. For example, paragraphs are specified like so in
 the MediaWiki dialect:
 
-  rule p => { block => 1, line_format => 'multi', trim => 'both' };
+  p => { block => 1, line_format => 'multi', trim => 'both' },
 
 =head3 Trimming whitespace
 
@@ -163,7 +167,7 @@
 string. This is specified with the C<line_prefix> subrule. For
 example, preformatted text in MediaWiki is prefixed with a space:
 
-  rule pre => { block => 1, line_prefix => ' ' };
+  pre => { block => 1, line_prefix => ' ' },
 
 There is a known bug in H::WC (see
 L<https://rt.cpan.org/Ticket/Display.html?id=14527>) with the
@@ -178,7 +182,7 @@
 PhpWiki, three percent signs, C<"%%%">, represents a line break,
 C<E<lt>brE<gt>>, hence:
 
-  rule br => { replace => '%%%' };
+  br => { replace => '%%%' },
 
 (The C<replace> subrule cannot be used with any other rule.)
 
@@ -191,13 +195,13 @@
 may specify that some be preserved using the C<preserve> subrule. For
 example, to allow C<E<lt>fontE<gt>> tag in wiki markup:
 
-  rule font => { preserve => 1 };
+  font => { preserve => 1 },
 
 Preserved tags may also specify a list of attributes that may also
 passthrough from HTML to wiki markup. This is done with the
 C<attributes> subrule:
 
-  rule font => { preserve => 1, attributes => [ qw/ style class / ] };
+  font => { preserve => 1, attributes => [ qw/ style class / ] },
 
 (The C<attributes> subrule can only be used if the C<preserve> subrule
 is also present.)
@@ -209,11 +213,11 @@
 replaced with C<"E<lt>tag /E<gt>"> and no end tag. For example,
 MediaWiki handles line breaks like so:
 
-  rule br => {
+  br => {
     preserve => 1,
     attributes => [ qw/ id class title style clear / ],
     empty => 1
-  };
+  },
 
 This will convert, for example, C<"E<lt>br clear='both'E<gt>"> into
 C<"E<lt>br clear='both' /E<gt>">. Without specifying the C<empty>
@@ -226,22 +230,22 @@
 =head3 Rules that depend on attribute values
 
 In some circumstances, you might want your dialect's conversion rules
-to depend on the value of one or more attributes. The problem is that
-a dialect's rules are loaded at compile-time (when the dialect module
-is imported via C<use>), but attribute values aren't known until
-run-time (when the client code creates a new H::WC object).
-
-The solution is to define an C<_init()> method, which H::WC will call
-on your dialect after attribute values have been assigned. The
-C<_init()> method takes a single argument, the H::WC object. In it,
-you may redefine any rules based on attributes' values; its return
-value is discarded. For example, MediaWiki's C<_init()> looks like
-this:
-
-  sub _init {
+to depend on the value of one or more attributes. This can be achieved
+by producing rules in a conditional manner within C<rules()>. For
+example:
+
+  sub rules {
     my $self = shift;
-    rule( i => { preserve => 1, attributes => \@common_attrs } ) if $self->preserve_italic;
-    rule( b => { preserve => 1, attributes => \@common_attrs } ) if $self->preserve_bold;
+    
+    my %rules = (
+      em => { start => "''", end => "''" },
+      strong => { start => "'''", end => "'''" },
+    );
+
+    $rules{i} = { preserve => 1 } if $self->preserve_italic;
+    $rules{b} = { preserve => 1 } if $self->preserve_bold;
+
+    return \%rules;
   }
 
 =head2 Dynamic subrules
@@ -256,9 +260,9 @@
 
 For example, MoinMoin handles lists like so:
 
-  rule ul => { line_format => 'multi', block => 1, line_prefix => '  ' };
-  rule li => { start => \&_li_start, trim => 'leading' };
-  rule ol => { alias => 'ul' };
+  ul => { line_format => 'multi', block => 1, line_prefix => '  ' },
+  li => { start => \&_li_start, trim => 'leading' },
+  ol => { alias => 'ul' },
 
 It then defines C<_li_start()>:
 
@@ -289,10 +293,11 @@
 
 H::WC's constructor accepts a number of attributes that help determine
 how conversion takes place. Dialects can alter these attributes or add
-their own by using the C<attribute()> function, which (like C<rule()>)
-was imported when H::WC was subclassed (see above). Its syntax is:
-
-  attribute $attr => \%spec;
+their own by defining an C<attributes()> method, which returns a
+reference to a hash of attributes. Each entry in the hash maps the
+attribute's name to an attribute specification, as in:
+
+  $attr => \%spec
 
 where C<$attr> is the name of the attribute and C<%spec> is a
 L<Params::Validate> specification for the attribute.
@@ -300,7 +305,9 @@
 For example, to add a boolean attribute called C<camel_case> which is
 disabled by default:
 
-  attribute camel_case => { default => 0 };
+  sub attributes {
+    camel_case => { default => 0 },
+  }
 
 Attributes defined liks this are given accessor and mutator methods
 via Perl's C<AUTOLOAD> mechanism, so you can later say:
@@ -314,7 +321,7 @@
 default (since all attributes are optional by default). Thus the
 PbWiki dialect could override this using:
 
-  attribute base_uri => { optional => 0 };
+  base_uri => { optional => 0 },
 
 =head2 Preprocessing
 
@@ -440,7 +447,9 @@
 attributes is provided for when you need to override an attribute's
 accessor/mutator method, as in:
 
-  attribute my_attr => { default => 1 };
+  sub attributes { {
+    my_attr => { default => 1 },
+  } }
 
   sub my_attr {
     my( $wc, $name, $new_value ) = @_;

Modified: branches/upstream/libhtml-wikiconverter-perl/current/t/pod-coverage.t
URL: http://svn.debian.org/wsvn/branches/upstream/libhtml-wikiconverter-perl/current/t/pod-coverage.t?rev=20111&op=diff
==============================================================================
--- branches/upstream/libhtml-wikiconverter-perl/current/t/pod-coverage.t (original)
+++ branches/upstream/libhtml-wikiconverter-perl/current/t/pod-coverage.t Sun May 18 23:35:45 2008
@@ -9,6 +9,7 @@
      get_elem_contents
     |get_wiki_page
     |get_attr_str
+    |elem_within_block
     |is_camel_case
     |rule
     |rules




More information about the Pkg-perl-cvs-commits mailing list