[libtext-worddiff-perl] 01/12: Imported Upstream version 0.08
gregor herrmann
gregoa at debian.org
Sun Jan 12 17:25:30 UTC 2014
This is an automated email from the git hooks/post-receive script.
gregoa pushed a commit to branch master
in repository libtext-worddiff-perl.
commit 161d20a4818fdcd7e81f1ee96508291d1a36e068
Author: gregor herrmann <gregoa at debian.org>
Date: Sun Jan 12 18:13:02 2014 +0100
Imported Upstream version 0.08
---
Build.PL | 12 +-
Changes | 15 ++
MANIFEST | 5 +-
META.json | 76 ++++++++
META.yml | 52 +++---
Makefile.PL | 51 ++----
README | 353 --------------------------------------
README.md | 44 +++++
lib/Text/WordDiff.pm | 111 ++++++------
lib/Text/WordDiff/ANSIColor.pm | 4 +-
lib/Text/WordDiff/HTML.pm | 4 +-
lib/Text/WordDiff/HTMLTwoLines.pm | 179 +++++++++++++++++++
t/ansicolor.t | 18 +-
t/base.t | 2 -
t/data/left.txt | 3 +
t/data/right.txt | 3 +
t/html.t | 24 +--
t/{html.t => htmltwolines.t} | 54 ++++--
t/pod.t | 8 +-
19 files changed, 498 insertions(+), 520 deletions(-)
diff --git a/Build.PL b/Build.PL
index a7a1d42..13b6aaf 100644
--- a/Build.PL
+++ b/Build.PL
@@ -3,10 +3,9 @@ use Module::Build;
my $build = Module::Build->new(
module_name => 'Text::WordDiff',
license => 'perl',
- create_makefile_pl => 'passthrough',
- create_readme => 1,
+ create_makefile_pl => 'traditional',
configure_requires => { 'Module::Build' => '0.2701' },
- recommends => { 'Test::Pod' => '1.20' },
+ recommends => { 'Test::Pod' => '1.41' },
requires => {
Algorithm::Diff => '1.19',
Term::ANSIColor => '0',
@@ -17,5 +16,12 @@ my $build = Module::Build->new(
'Test::More' => '0.17',
'File::Spec' => '0',
},
+ meta_merge => {
+ resources => {
+ homepage => 'http://search.cpan.org/dist/Text-WordDiff/',
+ bugtracker => 'http://github.com/theory/test-xpath/issues/',
+ repository => 'http://github.com/theory/test-xpath/tree/',
+ }
+ },
);
$build->create_build_script;
diff --git a/Changes b/Changes
index a6dfc29..a752c0b 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,20 @@
Revision history for Perl extension Text::WordDiff.
+0.08 2011-10-19T22:08:34Z
+ - Control and punctuation characters are now treated as standalone
+ chunks rather than as part of the words the preceed them. This makes
+ for much more intuitive-looking diffs. Thanks to Artem Krivopolenov
+ for the pull request.
+
+0.07 2011-06-19T05:04:42
+ - Removed `.gitignore` file.
+ - Added proper links to the GitHub repository.
+ - Added bug reporting and repository metadata.
+
+0.06 2011-06-18T22:20:53
+ - Moved repository to GitHub.
+ - Added Text::WordDiff::HTMLTwoLines by Amelia Ireland.
+
0.05 2009-04-06T18:34:42
- Fixed documentation typo in Search::WordDiff::ANSIColor. Patch from
Jeff Lavallee (Ticket #40509).
diff --git a/MANIFEST b/MANIFEST
index bf16090..12e93c6 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -4,13 +4,16 @@ eg/word_diff.css
lib/Text/WordDiff.pm
lib/Text/WordDiff/ANSIColor.pm
lib/Text/WordDiff/HTML.pm
+lib/Text/WordDiff/HTMLTwoLines.pm
Makefile.PL
MANIFEST This list of files
+META.json
META.yml
-README
+README.md
t/ansicolor.t
t/base.t
t/data/left.txt
t/data/right.txt
t/html.t
+t/htmltwolines.t
t/pod.t
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..df8a8b5
--- /dev/null
+++ b/META.json
@@ -0,0 +1,76 @@
+{
+ "abstract" : "Track changes between documents",
+ "author" : [
+ "David E. Wheeler <david at justatheory.com>"
+ ],
+ "dynamic_config" : 1,
+ "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.112150",
+ "license" : [
+ "perl_5"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : "2"
+ },
+ "name" : "Text-WordDiff",
+ "prereqs" : {
+ "build" : {
+ "requires" : {
+ "File::Spec" : 0,
+ "Module::Build" : "0.2701",
+ "Test::More" : "0.17"
+ }
+ },
+ "configure" : {
+ "requires" : {
+ "Module::Build" : "0.2701"
+ }
+ },
+ "runtime" : {
+ "recommends" : {
+ "Test::Pod" : "1.41"
+ },
+ "requires" : {
+ "Algorithm::Diff" : "1.19",
+ "HTML::Entities" : 0,
+ "Term::ANSIColor" : 0
+ }
+ }
+ },
+ "provides" : {
+ "Text::WordDiff" : {
+ "file" : "lib/Text/WordDiff.pm",
+ "version" : "0.08"
+ },
+ "Text::WordDiff::ANSIColor" : {
+ "file" : "lib/Text/WordDiff/ANSIColor.pm",
+ "version" : "0.08"
+ },
+ "Text::WordDiff::Base" : {
+ "file" : "lib/Text/WordDiff.pm",
+ "version" : 0
+ },
+ "Text::WordDiff::HTML" : {
+ "file" : "lib/Text/WordDiff/HTML.pm",
+ "version" : "0.08"
+ },
+ "Text::WordDiff::HTMLTwoLines" : {
+ "file" : "lib/Text/WordDiff/HTMLTwoLines.pm",
+ "version" : "0.08"
+ }
+ },
+ "release_status" : "stable",
+ "resources" : {
+ "bugtracker" : {
+ "web" : "http://github.com/theory/test-xpath/issues/"
+ },
+ "homepage" : "http://search.cpan.org/dist/Text-WordDiff/",
+ "license" : [
+ "http://dev.perl.org/licenses/"
+ ],
+ "repository" : {
+ "url" : "http://github.com/theory/test-xpath/tree/"
+ }
+ },
+ "version" : "0.08"
+}
diff --git a/META.yml b/META.yml
index 9acd0f7..6358856 100644
--- a/META.yml
+++ b/META.yml
@@ -1,37 +1,45 @@
---
-name: Text-WordDiff
-version: 0.05
+abstract: 'Track changes between documents'
author:
- - 'David Wheeler <david at kineticode.com>'
-abstract: Track changes between documents
-license: perl
-resources:
- license: http://dev.perl.org/licenses/
-configure_requires:
- Module::Build: 0.2701
-requires:
- Algorithm::Diff: 1.19
- HTML::Entities: 0
- Term::ANSIColor: 0
+ - 'David E. Wheeler <david at justatheory.com>'
build_requires:
File::Spec: 0
Module::Build: 0.2701
Test::More: 0.17
-recommends:
- Test::Pod: 1.20
+configure_requires:
+ Module::Build: 0.2701
+dynamic_config: 1
+generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.112150'
+license: perl
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
+name: Text-WordDiff
provides:
Text::WordDiff:
file: lib/Text/WordDiff.pm
- version: 0.05
+ version: 0.08
Text::WordDiff::ANSIColor:
file: lib/Text/WordDiff/ANSIColor.pm
- version: 0.05
+ version: 0.08
Text::WordDiff::Base:
file: lib/Text/WordDiff.pm
+ version: 0
Text::WordDiff::HTML:
file: lib/Text/WordDiff/HTML.pm
- version: 0.05
-generated_by: Module::Build version 0.32
-meta-spec:
- url: http://module-build.sourceforge.net/META-spec-v1.2.html
- version: 1.2
+ version: 0.08
+ Text::WordDiff::HTMLTwoLines:
+ file: lib/Text/WordDiff/HTMLTwoLines.pm
+ version: 0.08
+recommends:
+ Test::Pod: 1.41
+requires:
+ Algorithm::Diff: 1.19
+ HTML::Entities: 0
+ Term::ANSIColor: 0
+resources:
+ bugtracker: http://github.com/theory/test-xpath/issues/
+ homepage: http://search.cpan.org/dist/Text-WordDiff/
+ license: http://dev.perl.org/licenses/
+ repository: http://github.com/theory/test-xpath/tree/
+version: 0.08
diff --git a/Makefile.PL b/Makefile.PL
index f062431..3678904 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,32 +1,19 @@
-# Note: this file was auto-generated by Module::Build::Compat version 0.32
-
- unless (eval "use Module::Build::Compat 0.02; 1" ) {
- print "This module requires Module::Build to install itself.\n";
-
- require ExtUtils::MakeMaker;
- my $yn = ExtUtils::MakeMaker::prompt
- (' Install Module::Build now from CPAN?', 'y');
-
- unless ($yn =~ /^y/i) {
- die " *** Cannot install without Module::Build. Exiting ...\n";
- }
-
- require Cwd;
- require File::Spec;
- require CPAN;
-
- # Save this 'cause CPAN will chdir all over the place.
- my $cwd = Cwd::cwd();
-
- CPAN::Shell->install('Module::Build::Compat');
- CPAN::Shell->expand("Module", "Module::Build::Compat")->uptodate
- or die "Couldn't install Module::Build, giving up.\n";
-
- chdir $cwd or die "Cannot chdir() back to $cwd: $!";
- }
- eval "use Module::Build::Compat 0.02; 1" or die $@;
-
- Module::Build::Compat->run_build_pl(args => \@ARGV);
- exit(0) unless(-e 'Build'); # cpantesters convention
- require Module::Build;
- Module::Build::Compat->write_makefile(build_class => 'Module::Build');
+# Note: this file was auto-generated by Module::Build::Compat version 0.3800
+use ExtUtils::MakeMaker;
+WriteMakefile
+(
+ 'NAME' => 'Text::WordDiff',
+ 'VERSION_FROM' => 'lib/Text/WordDiff.pm',
+ 'PREREQ_PM' => {
+ 'Algorithm::Diff' => '1.19',
+ 'File::Spec' => 0,
+ 'HTML::Entities' => '0',
+ 'Module::Build' => '0.2701',
+ 'Term::ANSIColor' => '0',
+ 'Test::More' => '0.17'
+ },
+ 'INSTALLDIRS' => 'site',
+ 'EXE_FILES' => [],
+ 'PL_FILES' => {}
+)
+;
diff --git a/README b/README
deleted file mode 100644
index 35a767e..0000000
--- a/README
+++ /dev/null
@@ -1,353 +0,0 @@
-Name
- Text::WordDiff - Track changes between documents
-
-Synopsis
- use Text::WordDiff;
-
- my $diff = word_diff 'file1.txt', 'file2.txt', { STYLE => 'HTML' };
- my $diff = word_diff \$string1, \$string2, { STYLE => 'ANSIColor' };
- my $diff = word_diff \*FH1, \*FH2; \%options;
- my $diff = word_diff \&reader1, \&reader2;
- my $diff = word_diff \@records1, \@records2;
-
- # May also mix input types:
- my $diff = word_diff \@records1, 'file_B.txt';
-
-Description
- This module is a variation on the lovely Text::Diff module. Rather than
- generating traditional line-oriented diffs, however, it generates
- word-oriented diffs. This can be useful for tracking changes in
- narrative documents or documents with very long lines. To diff source
- code, one is still best off using Text::Diff. But if you want to see how
- a short story changed from one version to the next, this module will do
- the job very nicely.
-
- What is a Word?
- I'm glad you asked! Well, sort of. It's a really hard question to
- answer. I consulted a number of sources, but really just did my best to
- punt on the question by reformulating it as, "How do I split text up
- into individual words?" The short answer is to split on word boundaries.
- However, every word has two boundaries, one at the beginning and one at
- the end. So splitting on "/\b/" didn't work so well. What I really
- wanted to do was to split on the *beginning* of every word. Fortunately,
- _Mastering Regular Expressions_ has a recipe for that:
- "/(?<!\w)(?=\w)/". I've borrowed this regular expression for use in
- Perls before 5.6.x, but go for the Unicode variant in 5.6.0 and newer:
- "/(?<!\p{IsWord})(?=\p{IsWord})/". With either of these regular
- expressions, this sentence, for example, would be split up into the
- following tokens:
-
- my @words = (
- 'With ',
- 'either ',
- 'of ',
- 'these ',
- 'regular ',
- "expressions,\n",
- 'this ',
- 'sentence, ',
- 'for ',
- 'example, ',
- 'would ',
- 'be ',
- 'split ',
- 'up ',
- 'into ',
- 'the ',
- 'following ',
- 'tokens:'
- );
-
- Note that this allows the tokens to include any spacing or punctuation
- after each word. So it's not just comparing words, but word-like tokens.
- This makes sense to me, at least, as the diff is between these tokens,
- and thus leads to a nice word-and-space-and-punctation type diff. It's
- not unlike what a word processor might do (although a lot of them are
- character-based, but that seemed a bit extreme--feel free to dupe this
- module into Text::CharDiff!).
-
- Now, I acknowledge that there are localization issues with this
- approach. In particular, it will fail with Chinese, Japanese, and Korean
- text, as these languages don't put non-word characters between words.
- Ideally, Test::WordDiff would then split on every charaters (since a
- single character often equals a word), but such is not the case when the
- "utf8" flag is set on a string. For example, This simple script:
-
- use strict;
- use utf8;
- use Data::Dumper;
- my $string = '뼈뼉뼘뼙뼛뼜뼝뽀뽁뽄뽈뽐뽑뽕뾔뾰뿅뿌뿍뿐뿔뿜뿟뿡쀼쁑쁘쁜쁠쁨쁩삐';
- my @tokens = split /(?<!\p{IsWord})(?=\p{IsWord})/msx, $string;
- print Dumper \@tokens;
-
- Outputs:
-
- $VAR1 = [
- "\x{bf08}\x{bf09}\x{bf18}\x{bf19}\x{bf1b}\x{bf1c}\x{bf1d}\x{bf40}\x{bf41}\x{bf44}\x{bf48}\x{bf50}\x{bf51}\x{bf55}\x{bf94}\x{bfb0}\x{bfc5}\x{bfcc}\x{bfcd}\x{bfd0}\x{bfd4}\x{bfdc}\x{bfdf}\x{bfe1}\x{c03c}\x{c051}\x{c058}\x{c05c}\x{c060}\x{c068}\x{c069}\x{c090}"
- ];
-
- Not so useful. It seems to be less of a problem if the "use utf8;" line
- is commented out, in which case we get:
-
- $VAR1 = [
- '뼈',
- '뼉',
- '뼘',
- '뼙',
- '뼛',
- '뼜',
- '뼝',
- '뽀',
- '뽁',
- '뽄',
- '뽈',
- '뽐',
- '뽑',
- '뽕',
- '뾔',
- '뾰',
- '뿅',
- '뿌',
- '뿍',
- '뿐',
- '뿔',
- '뿜',
- '뿟',
- '뿡',
- '?',
- '?쁑',
- '쁘',
- '쁜',
- '쁠',
- '쁨',
- '쁩',
- '삐'
- ];
-
- Someone whose more familiar with non-space-using languages will have to
- explain to me how I might be able to duplicate this pattern within the
- scope of "use utf8;", seing as it may very well be important to have it
- on in order to ensure proper character semantics.
-
- However, if my word tokenization approach is just too naive, and you
- decide that you need to take a different approach (maybe use
- Lingua::ZH::Toke or similar module), you can still use this module;
- you'll just have to tokenize your strings into words yourself, and pass
- them to word_diff() as array references:
-
- word_diff \@my_words1, \@my_words2;
-
-Options
- word_diff() takes two arguments from which to draw input and an optional
- hash reference of options to control its output. The first two arguments
- contain the data to be diffed, and each may be in the form of any of the
- following (that is, they can be in two different formats):
-
- * String
-
- A bare scalar will be assumed to be a file name. The file will be
- opened and split up into words. word_diff() will also "stat" the
- file to get the last modified time for use in the header, unless the
- relevant option ("MTIME_A" or "MTIME_B") has been specified
- explicitly.
-
- * Scalar Reference
-
- A scalar reference will be assumed to refer to a string. That string
- will be split up into words.
-
- * Array Reference
-
- An array reference will be assumed to be a list of words.
-
- * File Handle
-
- A glob or IO::Handle-derived object will be read from and split up
- into its constituent words.
-
- The optional hash reference may contain the following options.
- Additional options may be specified by the formattting class; see the
- specific class for details.
-
- * STYLE
-
- "ANSIColor", "HTML" or an object or class name for a class providing
- "file_header()", "hunk_header()", "same_items()", "delete_items()",
- "insert_items()", "hunk_footer()" and "file_footer()" methods.
- Defaults to "ANSIColor" for nice display of diffs in an ANSI
- Color-supporting terminal.
-
- If the package indicated by the "STYLE" has no "new()" method,
- "word_diff()" will load it automatically (lazy loading). It will
- then instantiate an object of that class, passing in the options
- hash reference with which the formatting class can initialize the
- object.
-
- Styles may be specified as class names ("STYLE => "My::Foo""), in
- which case they will be instantiated by calling the "new()"
- construcctor and passing in the options hash reference, or as
- objects ("STYLE => My::Foo->new").
-
- The simplest way to implement your own formatting style is to create
- a new class that inherits from Text::WordDiff::Base, wherein the
- "new()" method is already provided, and the "file_header()" returns
- a Unified diff-style header. All of the other formatting methods
- simply return empty strings, and are therefore ripe for overriding.
-
- * FILENAME_A, MTIME_A, FILENAME_B, MTIME_B
-
- The name of the file and the modification time "files" in epoch
- seconds. Unless a defined value is specified for these options, they
- will be filled in for each file when word_diff() is passed a
- filename. If a filename is not passed in and "FILENAME_A" and
- "FILENAME_B" are not defined, the header will not be printed by the
- base formatting base class.
-
- * OUTPUT
-
- The method by which diff output should be, well, *output*. Examples
- and their equivalent subroutines:
-
- OUTPUT => \*FOOHANDLE, # like: sub { print FOOHANDLE shift() }
- OUTPUT => \$output, # like: sub { $output .= shift }
- OUTPUT => \@output, # like: sub { push @output, shift }
- OUTPUT => sub { $output .= shift },
-
- If "OUTPUT" is not defined, word_diff() will simply return the diff
- as a string. If "OUTPUT" is a code reference, it will be called once
- with the file header, once for each hunk body, and once for each
- piece of content. If "OUTPUT" is an IO::Handle-derived object,
- output will be sent to that handle.
-
- * FILENAME_PREFIX_A, FILENAME_PREFIX_B
-
- The string to print before the filename in the header. Defaults are
- "---", "+++".
-
- * DIFF_OPTS
-
- A hash reference to be passed as the options to
- "Algorithm::Diff->new". See Algorithm::Diff for details on available
- options.
-
-Formatting Classes
- Text::WordDiff comes with two formatting classes:
-
- Text::WordDiff::ANSIColor
- This is the default formatting class. It emits a header and then the
- diff content, with deleted text in bodfaced red and inserted text in
- boldfaced green.
-
- Text::WordDiff::HTML
- Specify "STYLE => 'HTML'" to take advantage of this formatting
- class. It outputs the diff content as XHTML, with deleted text in
- "<del>" elements and inserted text in "<ins>" elements.
-
- To implement your own formatting class, simply inherit from
- Text::WordDiff::Base and override its methods as necssary. By default,
- only the "file_header()" formatting method returns a value. All others
- simply return empty strings, and are therefore ripe for overriding:
-
- package My::WordDiff::Format;
- use base 'Text::WordDiff::Base';
-
- sub file_footer { return "End of diff\n"; }
-
- The methods supplied by the base class are:
-
- "new()"
- Constructs and returns a new formatting object. It takes a single
- hash reference as its argument, and uses it to construct the object.
- The nice thing about this is that if you want to support other
- options in your formatting class, you can just use them in the
- formatting object constructed by the Text::WordDiff::Base class and
- document that they can be passed as part of the options hash
- refernce to word_diff().
-
- "file_header()"
- Called once for a single call to "word_diff()", this method outputs
- the header for the whole diff. This is the only formatting method in
- the base class that returns anything other than an empty string. It
- collects the filenames from "filname_a()" and "filename_b()" and, if
- they're defined, uses the relevant prefixes and modification times
- to return a unified diff-style header.
-
- "hunk_header()"
- This method is called for each diff hunk. It should output any
- necessary header for the hunk.
-
- "same_items()"
- This method is called for items that have not changed between the
- two sequnces being compared. The unchanged items will be passed as a
- list to the method.
-
- "delete_items"
- This method is called for items in the first sequence that are not
- present in the second sequcne. The deleted items will be passed as a
- list to the method.
-
- "insert_items"
- This method is called for items in the second sequence that are not
- present in the first sequcne. The inserted items will be passed as a
- list to the method.
-
- "hunk_footer"
- This method is called at the end of a hunk. It should output any
- necessary content to close out the hunk.
-
- "file_footer()"
- This method is called once when the whole diff has been procssed. It
- should output any necessary content to close out the diff file.
-
- "filename_a"
- This accessor returns the value specified for the "FILENAME_A"
- option to word_diff().
-
- "filename_b"
- This accessor returns the value specified for the "FILENAME_B"
- option to word_diff().
-
- "mtime_a"
- This accessor returns the value specified for the "MTIME_A" option
- to word_diff().
-
- "mtime_b"
- This accessor returns the value specified for the "MTIME_B" option
- to word_diff().
-
- "filename_prefix_a"
- This accessor returns the value specified for the
- "FILENAME_PREFIX_A" option to word_diff().
-
- "filename_prefix_b"
- This accessor returns the value specified for the
- "FILENAME_PREFIX_B" option to word_diff().
-
-See Also
- Text::Diff
- Inspired the interface and implementation of this module. Thanks
- Barry!
-
- Text::ParagraphDiff
- A module that attempts to diff paragraphs and the words in them.
-
- Algorithm::Diff
- The module that makes this all possible.
-
-Support
- This module is stored in an open repository at the following address:
-
- <https://svn.kineticode.com/Text-WordDiff/trunk/>
-
- Patches against Text::WordDiff are welcome. Please send bug reports to
- <bug-text-worddiff at rt.cpan.org>.
-
-Author
- David Wheeler <david at kineticode.com>
-
-Copyright and License
- Copyright (c) 2005-2008 David Wheeler. Some Rights Reserved.
-
- This module is free software; you can redistribute it and/or modify it
- under the same terms as Perl itself.
-
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3356ebb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,44 @@
+Text/WordDiff version 0.08
+==========================
+
+This library's module, Text::WordDiff, is a variation on the lovely
+[Text::Diff](http://search.cpan.org/perldoc?Text::Diff) module. Rather than
+generating traditional line-oriented diffs, however, it generates
+word-oriented diffs. This can be useful for tracking changes in narrative
+documents or documents with very long lines. To diff source code, one is still
+best off using Text::Diff. But if you want to see how a short story changed
+from one version to the next, this module will do the job very nicely.
+
+INSTALLATION
+
+To install this module, type the following:
+
+ perl Build.PL
+ ./Build
+ ./Build test
+ ./Build install
+
+Or, if you don't have Module::Build installed, type the following:
+
+ perl Makefile.PL
+ make
+ make test
+ make install
+
+Dependencies
+------------
+
+Text::WordDiff requires the following modules:
+
+* Algorithm::Diff '1.19',
+* Term::ANSIColor '0',
+* HTML::Entities '0',
+
+Copyright and Licence
+---------------------
+
+Copyright (c) 2005-2011 David E. Wheeler. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under
+the same terms as Perl itself.
+
diff --git a/lib/Text/WordDiff.pm b/lib/Text/WordDiff.pm
index 37b42ef..375c3fe 100644
--- a/lib/Text/WordDiff.pm
+++ b/lib/Text/WordDiff.pm
@@ -1,23 +1,22 @@
package Text::WordDiff;
-# $Id: WordDiff.pm 3830 2008-05-06 17:23:15Z david $
-
use strict;
use vars qw(@ISA $VERSION);
use Algorithm::Diff ();
use IO::File;
use Carp;
-$VERSION = '0.05';
+$VERSION = '0.08';
# _Mastering Regular Expressions_, p. 132.
my $BEGIN_WORD = $] >= 5.006
- ? qr/(?<!\p{IsWord})(?=\p{IsWord})/msx
- : qr/(?<!\w)(?=\w)/msx;
+ ? qr/(?:(?<!\p{IsWord})(?=\p{IsWord})|(?<!\p{IsPunct})(?=\p{IsPunct})|(?<!\p{IsCntrl})(?=\p{IsCntrl}))/msx
+ : qr/(?:(?<!\w)(?=\w)|(?<![\]\[!"%&'()*,\.\/:;?\{}\-@])(?=[\]\[!"%&'()*,\.\/:;?\{}\-@])|(?<![\n\r\t])(?=[\n\r\t]))/msx;
my %styles = (
- ANSIColor => undef,
- HTML => undef,
+ ANSIColor => undef,
+ HTML => undef,
+ HTMLTwoLines => undef,
);
sub import {
@@ -189,17 +188,6 @@ __END__
##############################################################################
-=begin comment
-
-Fake-out Module::Build. Delete if it ever changes to support =head1 headers
-other than all uppercase.
-
-=head1 NAME
-
-Text::WordDiff - Track changes between documents
-
-=end comment
-
=head1 Name
Text::WordDiff - Track changes between documents
@@ -238,37 +226,46 @@ C</\b/> didn't work so well. What I really wanted to do was to split on the
I<beginning> of every word. Fortunately, _Mastering Regular Expressions_ has a
recipe for that: C<< /(?<!\w)(?=\w)/ >>. I've borrowed this regular expression
for use in Perls before 5.6.x, but go for the Unicode variant in 5.6.0 and
-newer: C<< /(?<!\p{IsWord})(?=\p{IsWord})/ >>. With either of these regular
-expressions, this sentence, for example, would be split up into the following
-tokens:
+newer: C<< /(?<!\p{IsWord})(?=\p{IsWord})/ >>. Adding some additional controls
+for punctuation and control characters, this sentence, for example, would be
+split up into the following tokens:
my @words = (
- 'With ',
- 'either ',
- 'of ',
- 'these ',
- 'regular ',
- "expressions,\n",
- 'this ',
- 'sentence, ',
- 'for ',
- 'example, ',
- 'would ',
- 'be ',
- 'split ',
- 'up ',
- 'into ',
- 'the ',
- 'following ',
- 'tokens:'
+ "Adding ",
+ "some ",
+ "additional ",
+ "controls",
+ "\n",
+ "for ",
+ "punctuation ",
+ "and ",
+ "control ",
+ "characters",
+ ", ",
+ "this ",
+ "sentence",
+ ", ",
+ "for ",
+ "example",
+ ", ",
+ "would ",
+ "be",
+ "\n",
+ "split ",
+ "up ",
+ "into ",
+ "the ",
+ "following ",
+ "tokens",
+ ":",
);
-Note that this allows the tokens to include any spacing or punctuation after
-each word. So it's not just comparing words, but word-like tokens. This makes
-sense to me, at least, as the diff is between these tokens, and thus leads to
-a nice word-and-space-and-punctation type diff. It's not unlike what a word
-processor might do (although a lot of them are character-based, but that
-seemed a bit extreme--feel free to dupe this module into Text::CharDiff!).
+So it's not just comparing words, but word-like tokens and control/punctuation
+tokens. This makes sense to me, at least, as the diff is between these tokens,
+and thus leads to a nice word-and-space-and-punctuation type diff. It's not
+unlike what a word processor might do (although a lot of them are
+character-based, but that seemed a bit extreme--feel free to dupe this module
+into Text::CharDiff!).
Now, I acknowledge that there are localization issues with this approach. In
particular, it will fail with Chinese, Japanese, and Korean text, as these
@@ -575,29 +572,21 @@ The module that makes this all possible.
=head1 Support
-This module is stored in an open repository at the following address:
-
-L<https://svn.kineticode.com/Text-WordDiff/trunk/>
+This module is stored in an open L<GitHub
+repository|http://github.com/theory/text-worddiff/>. Feel free to fork and
+contribute!
-Patches against Text::WordDiff are welcome. Please send bug reports to
-<bug-text-worddiff at rt.cpan.org>.
+Please file bug reports via L<GitHub
+Issues|http://github.com/theory/text-worddiff/issues/> or by sending mail to
+L<bug-Text-WordDiff at rt.cpan.org|mailto:bug-Text-WordDiff at rt.cpan.org>.
=head1 Author
-=begin comment
-
-Fake-out Module::Build. Delete if it ever changes to support =head1 headers
-other than all uppercase.
-
-=head1 AUTHOR
-
-=end comment
-
-David Wheeler <david at kineticode.com>
+David E. Wheeler <david at justatheory.com>
=head1 Copyright and License
-Copyright (c) 2005-2008 David Wheeler. Some Rights Reserved.
+Copyright (c) 2005-2011 David E. Wheeler. Some Rights Reserved.
This module is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
diff --git a/lib/Text/WordDiff/ANSIColor.pm b/lib/Text/WordDiff/ANSIColor.pm
index 2250190..a7c985e 100644
--- a/lib/Text/WordDiff/ANSIColor.pm
+++ b/lib/Text/WordDiff/ANSIColor.pm
@@ -7,7 +7,7 @@ use vars qw($VERSION @ISA);
# Term::ANSIColor doesn't support STRIKETHROUGH, so we'll do it ourselves.
use constant STRIKETHROUGH => "\e[9m";
-$VERSION = '0.05';
+$VERSION = '0.08';
@ISA = qw(Text::WordDiff::Base);
sub same_items {
@@ -117,7 +117,7 @@ David Wheeler <david at kineticode.com>
=head1 Copyright and License
-Copyright (c) 2005-2008 David Wheeler. Some Rights Reserved.
+Copyright (c) 2005-2011 David E. Wheeler. Some Rights Reserved.
This module is free software; you can redistribute it and/or modify it under the
same terms as Perl itself.
diff --git a/lib/Text/WordDiff/HTML.pm b/lib/Text/WordDiff/HTML.pm
index faf96a9..348c2d5 100644
--- a/lib/Text/WordDiff/HTML.pm
+++ b/lib/Text/WordDiff/HTML.pm
@@ -4,7 +4,7 @@ use strict;
use HTML::Entities qw(encode_entities);
use vars qw($VERSION @ISA);
-$VERSION = '0.05';
+$VERSION = '0.08';
@ISA = qw(Text::WordDiff::Base);
sub file_header {
@@ -152,7 +152,7 @@ David Wheeler <david at kineticode.com>
=head1 Copyright and License
-Copyright (c) 2005-2008 David Wheeler. Some Rights Reserved.
+Copyright (c) 2005-2011 David E. Wheeler. Some Rights Reserved.
This module is free software; you can redistribute it and/or modify it under the
same terms as Perl itself.
diff --git a/lib/Text/WordDiff/HTMLTwoLines.pm b/lib/Text/WordDiff/HTMLTwoLines.pm
new file mode 100644
index 0000000..14ab7f4
--- /dev/null
+++ b/lib/Text/WordDiff/HTMLTwoLines.pm
@@ -0,0 +1,179 @@
+package Text::WordDiff::HTMLTwoLines;
+
+use strict;
+use HTML::Entities qw(encode_entities);
+use vars qw($VERSION @ISA);
+
+$VERSION = '0.08';
+ at ISA = qw(Text::WordDiff::Base);
+
+sub file_header {
+ my $self = shift;
+ my $fn1 = $self->filename_a;
+ my $fn2 = $self->filename_b;
+
+ if (defined $fn1 && defined $fn2)
+ { my $p1 = $self->filename_prefix_a;
+ my $t1 = $self->mtime_a;
+ my $p2 = $self->filename_prefix_b;
+ my $t2 = $self->mtime_b;
+
+ $self->{__str1} = '<div class="file"><span class="fileheader">'
+ . "$p1 $fn1" . (defined $t1 ? " " . localtime $t1 : '') . '</span>';
+
+ $self->{__str2} = '<div class="file"><span class="fileheader">'
+ . "$p2 $fn2" . (defined $t2 ? " " . localtime $t2 : '') . '</span>';
+ }
+ else
+ { $self->{__str1} = $self->{__str2} = '<div class="file">';
+ }
+ return '';
+}
+
+sub hunk_header {
+ my $self = shift;
+ $self->{__str1} .= '<span class="hunk">';
+ $self->{__str2} .= '<span class="hunk">';
+ return '';
+}
+sub hunk_footer {
+ my $self = shift;
+ $self->{__str1} .= '</span>';
+ $self->{__str2} .= '</span>';
+ return '';
+}
+
+sub file_footer {
+ my $self = shift;
+ return $self->{__str1} . "</div>\n" . $self->{__str2} . "</div>\n";
+}
+
+sub same_items {
+ my $self = shift;
+ $self->{__str1} .= encode_entities( join '', @_ );
+ $self->{__str2} .= encode_entities( join '', @_ );
+ return '';
+}
+
+sub delete_items {
+ my $self = shift;
+ $self->{__str1} .= '<del>' . encode_entities( join '', @_ ) . '</del>';
+ return '';
+}
+
+sub insert_items {
+ my $self = shift;
+ $self->{__str2} .= '<ins>' . encode_entities( join '', @_ ) . '</ins>';
+ return '';
+}
+
+1;
+
+__END__
+
+=head1 Name
+
+Text::WordDiff::HTMLTwoLines - XHTML formatting for Text::WordDiff with content on two lines
+
+=head1 Synopsis
+
+ use Text::WordDiff;
+
+ my $diff = word_diff 'file1.txt', 'file2.txt'; { STYLE => 'HTMLTwoLines' };
+ my $diff = word_diff \$string1, \$string2, { STYLE => 'HTMLTwoLines' };
+ my $diff = word_diff \*FH1, \*FH2, { STYLE => 'HTMLTwoLines' };
+ my $diff = word_diff \&reader1, \&reader2, { STYLE => 'HTMLTwoLines' };
+ my $diff = word_diff \@records1, \@records2, { STYLE => 'HTMLTwoLines' };
+
+ # May also mix input types:
+ my $diff = word_diff \@records1, 'file_B.txt', { STYLE => 'HTMLTwoLines' };
+
+=head1 Description
+
+This class subclasses Text::WordDiff::Base to provide a XHTML formatting for
+Text::WordDiff. See L<Term::WordDiff|Term::WordDiff> for usage details. This
+class should never be used directly.
+
+Text::WordDiff::HTMLTwoLines formats word diffs for viewing in a Web browser.
+The output is similar to that produced by
+L<Term::WordDiff::HTML|Term::WordDiff::HTML> but the two lines (or files,
+records, etc.) are shown separately, with deleted items highlighted in the
+first line and inserted items highlighted in the second. HTMLTwoLines puts a
+span tag around each word or set of words in the diff.
+
+The diff content is highlighted as follows:
+
+=over
+
+=item * C<< <div class="file"> >>
+
+The inputs to C<word_diff()> are each contained in a div element of class
+"file". All the following results are subsumed by these elements.
+
+=over
+
+=item * C<< <span class="fileheader"> >>
+
+The header section for the files being C<diff>ed, usually something like:
+
+ --- in.txt Thu Sep 1 12:51:03 2005
+
+for the first file, and
+
+ +++ out.txt Thu Sep 1 12:52:12 2005
+
+for the second.
+
+This element immediately follows the opening "file" C<< <div> >> element, but
+will not be present if Text::WordDiff cannot determine the file names for both
+files being compared.
+
+=item * C<< <span class="hunk"> >>
+
+This element contains a single diff "hunk". Each hunk may contain the
+following elements:
+
+=over
+
+=item * C<< <ins> >>
+
+Inserted content.
+
+=item * C<< <del> >>
+
+Deleted content.
+
+=back
+
+=back
+
+=back
+
+You may do whatever you like with these elements and classes; I highly
+recommend that you style them using CSS. You'll find an example CSS file in
+the F<eg> directory in the Text-WordDiff distribution.
+
+=head1 See Also
+
+=over
+
+=item L<Text::WordDiff|Text::WordDiff>
+
+=item L<Text::WordDiff::ANSIColor|Text::WordDiff::HTML>
+
+=item L<Text::WordDiff::ANSIColor|Text::WordDiff::ANSIColor>
+
+=back
+
+=head1 Author
+
+Amelia Ireland <join(".", $firstname, $lastname) . "@gmail.com">
+
+=head1 Copyright and License
+
+Copyright (c) 2011 Amelia Ireland. Some Rights Reserved.
+
+This module is free software; you can redistribute it and/or modify it under the
+same terms as Perl itself.
+
+=cut
diff --git a/t/ansicolor.t b/t/ansicolor.t
index d4845d3..dc3998e 100644
--- a/t/ansicolor.t
+++ b/t/ansicolor.t
@@ -1,7 +1,5 @@
#!/usr/bin/perl -w
-# $Id: ansicolor.t 3373 2008-02-05 00:17:33Z david $
-
use strict;
use Test::More tests => 17;
use Term::ANSIColor qw(:constants);
@@ -47,15 +45,19 @@ my $time1 = localtime( (stat $filename1)[9] );
my $time2 = localtime( (stat $filename2)[9] );
my $header = "--- $filename1\t$time1\n+++ $filename2\t$time2\n";
-my $file_diff = 'This is a ' . BOLD . RED . STRIKETHROUGH . "tst;\n"
- . 'it ' . RESET . BOLD . GREEN . UNDERLINE . "test.\n"
- . 'It ' . RESET . "is only a\n"
+my $file_diff = 'This is a ' . BOLD . RED . STRIKETHROUGH . "tst;"
+ . RESET . BOLD . GREEN . UNDERLINE . "test." . RESET . "\n"
+ . BOLD . RED . STRIKETHROUGH . "it " . RESET
+ . BOLD . GREEN . UNDERLINE . "It " . RESET . "is only a\n"
. 'test. Had ' . BOLD . RED . STRIKETHROUGH . 'it ' . RESET
. BOLD . GREEN . UNDERLINE . 'this ' . RESET . "been an\n"
. "actual diff, the results would\n"
- . 'have been output to ' . BOLD . RED . STRIKETHROUGH
- . "HTML.\n" . RESET . BOLD . GREEN . UNDERLINE
- . "the terminal.\n" . RESET;
+ . 'have been output to ' . BOLD . RED . STRIKETHROUGH . "HTML"
+ . RESET . BOLD . GREEN . UNDERLINE . "the terminal" . RESET . ".\n\n"
+ . 'Some string with ' . BOLD . RED . STRIKETHROUGH . 'funny $'
+ . RESET . BOLD . GREEN . UNDERLINE . 'funny @' . RESET . "\n"
+ . 'chars in the end' . BOLD . RED . STRIKETHROUGH . '*'
+ . RESET . BOLD . GREEN . UNDERLINE . '?' . RESET . "\n";
is word_diff($filename1, $filename2), $header . $file_diff,
'Diff by file name should include a header';
diff --git a/t/base.t b/t/base.t
index 1a5750f..55ebabe 100755
--- a/t/base.t
+++ b/t/base.t
@@ -1,7 +1,5 @@
#!/usr/bin/perl -w
-# $Id: base.t 3373 2008-02-05 00:17:33Z david $
-
use strict;
use Test::More tests => 4;
diff --git a/t/data/left.txt b/t/data/left.txt
index 2a66475..9375e3a 100644
--- a/t/data/left.txt
+++ b/t/data/left.txt
@@ -3,3 +3,6 @@ it is only a
test. Had it been an
actual diff, the results would
have been output to HTML.
+
+Some string with funny $
+chars in the end*
diff --git a/t/data/right.txt b/t/data/right.txt
index 1b2ab3f..e17c1e7 100644
--- a/t/data/right.txt
+++ b/t/data/right.txt
@@ -3,3 +3,6 @@ It is only a
test. Had this been an
actual diff, the results would
have been output to the terminal.
+
+Some string with funny @
+chars in the end?
diff --git a/t/html.t b/t/html.t
index ba9933b..4dd7b17 100644
--- a/t/html.t
+++ b/t/html.t
@@ -1,7 +1,5 @@
#!/usr/bin/perl -w
-# $Id: html.t 3373 2008-02-05 00:17:33Z david $
-
use strict;
use Test::More tests => 17;
use Term::ANSIColor qw(:constants);
@@ -47,15 +45,21 @@ my $time2 = localtime( (stat $filename2)[9] );
my $header = qq{<span class="fileheader">--- $filename1\t$time1\n}
. qq{+++ $filename2\t$time2\n</span>};
-my $file_diff = qq{<div class="file">$header<span class="hunk">This is a }
- . qq{</span><span class="hunk"><del>tst;\nit </del><ins>test.\n}
- . qq{It </ins></span><span class="hunk">is only a\n}
- . qq{test. Had </span><span class="hunk"><del>it </del>}
- . qq{<ins>this </ins></span><span class="hunk">been an\n}
+my $file_diff = qq{<div class="file">$header<span class="hunk">This is a </span>}
+ . qq{<span class="hunk"><del>tst;</del><ins>test.</ins></span>}
+ . qq{<span class="hunk">\n</span>}
+ . qq{<span class="hunk"><del>it </del><ins>It </ins></span>}
+ . qq{<span class="hunk">is only a\ntest. Had </span>}
+ . qq{<span class="hunk"><del>it </del><ins>this </ins></span>}
+ . qq{<span class="hunk">been an\n}
. qq{actual diff, the results would\n}
- . qq{have been output to </span><span class="hunk"><del>HTML.\n}
- . qq{</del><ins>the terminal.\n</ins></span></div>}
- ;
+ . qq{have been output to </span><span class="hunk"><del>HTML</del>}
+ . qq{<ins>the terminal</ins></span>}
+ . qq{<span class="hunk">.\n\nSome string with </span>}
+ . qq{<span class="hunk"><del>funny \$</del><ins>funny \@</ins></span>}
+ . qq{<span class="hunk">\nchars in the end</span>}
+ . qq{<span class="hunk"><del>*</del><ins>?</ins></span>}
+ . qq{<span class="hunk">\n</span></div>};
is word_diff($filename1, $filename2, \%opts), $file_diff,
'Diff by file name should include a header';
diff --git a/t/html.t b/t/htmltwolines.t
similarity index 61%
copy from t/html.t
copy to t/htmltwolines.t
index ba9933b..da3e61d 100644
--- a/t/html.t
+++ b/t/htmltwolines.t
@@ -1,6 +1,6 @@
#!/usr/bin/perl -w
-# $Id: html.t 3373 2008-02-05 00:17:33Z david $
+# $Id$
use strict;
use Test::More tests => 17;
@@ -8,16 +8,18 @@ use Term::ANSIColor qw(:constants);
use File::Spec::Functions qw(catfile);
use IO::File;
-BEGIN {
+use lib qw( /Users/gwg/Text-WordDiff-0.05/lib );
+
+# BEGIN {
use_ok 'Text::WordDiff' or die;
- use_ok 'Text::WordDiff::HTML' or die;
-}
+ use_ok 'Text::WordDiff::HTMLTwoLines' or die;
+# }
my $string1 = 'This is a test';
my $string2 = 'That was a test';
-my $term_diff = '<div class="file"><span class="hunk"><del>This is </del><ins>That was </ins></span><span class="hunk">a test</span></div>';
+my $term_diff = '<div class="file"><span class="hunk"><del>This is </del></span><span class="hunk">a test</span></div>' . "\n" . '<div class="file"><span class="hunk"><ins>That was </ins></span><span class="hunk">a test</span></div>' . "\n";
my %opts = (
- STYLE => 'HTML',
+ STYLE => 'HTMLTwoLines',
);
# Test scalar refs.
@@ -44,24 +46,38 @@ my $filename1 = catfile qw(t data left.txt);
my $filename2 = catfile qw(t data right.txt);
my $time1 = localtime( (stat $filename1)[9] );
my $time2 = localtime( (stat $filename2)[9] );
-my $header = qq{<span class="fileheader">--- $filename1\t$time1\n}
- . qq{+++ $filename2\t$time2\n</span>};
-
-my $file_diff = qq{<div class="file">$header<span class="hunk">This is a }
- . qq{</span><span class="hunk"><del>tst;\nit </del><ins>test.\n}
- . qq{It </ins></span><span class="hunk">is only a\n}
- . qq{test. Had </span><span class="hunk"><del>it </del>}
- . qq{<ins>this </ins></span><span class="hunk">been an\n}
- . qq{actual diff, the results would\n}
- . qq{have been output to </span><span class="hunk"><del>HTML.\n}
- . qq{</del><ins>the terminal.\n</ins></span></div>}
- ;
+my $header1 = qq{<span class="fileheader">--- $filename1 $time1</span>};
+my $header2 = qq{<span class="fileheader">+++ $filename2 $time2</span>};
+
+my $file_diff = qq{<div class="file">$header1}
+ . qq{<span class="hunk">This is a </span><span class="hunk"><del>tst;</del></span>}
+ . qq{<span class="hunk">\n</span>}
+ . qq{<span class="hunk"><del>it </del></span><span class="hunk">is only a\n}
+ . qq{test. Had </span><span class="hunk"><del>it </del></span><span class="hunk">been an\n}
+ . qq{actual diff, the results would\n}
+ . qq{have been output to </span><span class="hunk"><del>HTML</del></span>}
+ . qq{<span class="hunk">.\n\nSome string with </span>}
+ . qq{<span class="hunk"><del>funny \$</del></span>}
+ . qq{<span class="hunk">\nchars in the end</span>}
+ . qq{<span class="hunk"><del>*</del></span><span class="hunk">\n</span></div>\n}
+ . qq{<div class="file">$header2}
+ . qq{<span class="hunk">This is a </span><span class="hunk"><ins>test.</ins></span>}
+ . qq{<span class="hunk">\n</span>}
+ . qq{<span class="hunk"><ins>It </ins></span><span class="hunk">is only a\n}
+ . qq{test. Had </span><span class="hunk"><ins>this </ins></span><span class="hunk">been an\n}
+ . qq{actual diff, the results would\n}
+ . qq{have been output to </span><span class="hunk"><ins>the terminal</ins></span>}
+ . qq{<span class="hunk">.\n\nSome string with </span>}
+ . qq{<span class="hunk"><ins>funny \@</ins></span>}
+ . qq{<span class="hunk">\nchars in the end</span>}
+ . qq{<span class="hunk"><ins>?</ins></span><span class="hunk">\n</span></div>\n};
is word_diff($filename1, $filename2, \%opts), $file_diff,
'Diff by file name should include a header';
# No more header after this.
-$file_diff =~ s/\Q$header\E//;
+$file_diff =~ s/\Q$header1\E//;
+$file_diff =~ s/\Q$header2\E//;
# Try globs.
local (*FILE1, *FILE2);
diff --git a/t/pod.t b/t/pod.t
index 43334be..2cb5045 100755
--- a/t/pod.t
+++ b/t/pod.t
@@ -1,11 +1,9 @@
-#!perl -w
-
-# $Id: pod.t 3827 2008-05-06 17:20:35Z david $
+#!/usr/bin/perl -w
use strict;
use Test::More;
-eval "use Test::Pod 1.20";
-plan skip_all => "Test::Pod 1.20 required for testing POD" if $@;
+eval "use Test::Pod 1.41";
+plan skip_all => "Test::Pod 1.41 required for testing POD" if $@;
eval 'use Encode';
plan skip_all => 'Encode 1.20 required for testing POD because it has UTF-8 charactters' if $@;
all_pod_files_ok();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libtext-worddiff-perl.git
More information about the Pkg-perl-cvs-commits
mailing list