r52057 - in /branches/upstream/libfile-countlines-perl: ./ current/ current/lib/ current/lib/File/ current/t/
mxey-guest at users.alioth.debian.org
mxey-guest at users.alioth.debian.org
Tue Feb 2 19:44:39 UTC 2010
Author: mxey-guest
Date: Tue Feb 2 19:44:28 2010
New Revision: 52057
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=52057
Log:
[svn-inject] Installing original source of libfile-countlines-perl
Added:
branches/upstream/libfile-countlines-perl/
branches/upstream/libfile-countlines-perl/current/
branches/upstream/libfile-countlines-perl/current/Build.PL
branches/upstream/libfile-countlines-perl/current/Changes
branches/upstream/libfile-countlines-perl/current/MANIFEST
branches/upstream/libfile-countlines-perl/current/META.yml
branches/upstream/libfile-countlines-perl/current/Makefile.PL
branches/upstream/libfile-countlines-perl/current/README
branches/upstream/libfile-countlines-perl/current/lib/
branches/upstream/libfile-countlines-perl/current/lib/File/
branches/upstream/libfile-countlines-perl/current/lib/File/CountLines.pm
branches/upstream/libfile-countlines-perl/current/t/
branches/upstream/libfile-countlines-perl/current/t/01-pod.t
branches/upstream/libfile-countlines-perl/current/t/basic.t
Added: branches/upstream/libfile-countlines-perl/current/Build.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/Build.PL?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/Build.PL (added)
+++ branches/upstream/libfile-countlines-perl/current/Build.PL Tue Feb 2 19:44:28 2010
@@ -1,0 +1,25 @@
+use strict;
+use warnings;
+use Module::Build;
+
+my $build = Module::Build->new(
+ create_readme => 1,
+ create_makefile_pl => 'traditional',
+ license => 'perl',
+ module_name => 'File::CountLines',
+ dist_author => 'Moritz Lenz',
+ dist_abstract => 'Efficiently count the number of line breaks in a file',
+ dist_version_from => 'lib/File/CountLines.pm',
+ requires => {
+ 'Exporter' => '5.57',
+ 'Carp' => 0,
+ 'charnames' => 1.01,
+ 'warnings' => 0,
+ 'strict' => 0,
+ },
+ recommends => {},
+ sign => 0,
+);
+$build->create_build_script;
+
+# vim: sw=4 ts=4 expandtab
Added: branches/upstream/libfile-countlines-perl/current/Changes
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/Changes?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/Changes (added)
+++ branches/upstream/libfile-countlines-perl/current/Changes Tue Feb 2 19:44:28 2010
@@ -1,0 +1,8 @@
+Revision History for Perl module File::CountLines
+
+0.0.2 Wed Nov 12 20:01:34 CET 2008
+- require a sufficiently new version of `charnames'
+- Small documentation fix
+
+0.0.1 Sun Nov 9 23:29:53 CET 2008
+- Initial release
Added: branches/upstream/libfile-countlines-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/MANIFEST?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/MANIFEST (added)
+++ branches/upstream/libfile-countlines-perl/current/MANIFEST Tue Feb 2 19:44:28 2010
@@ -1,0 +1,9 @@
+Build.PL
+Changes
+lib/File/CountLines.pm
+Makefile.PL
+MANIFEST This list of files
+META.yml
+README
+t/01-pod.t
+t/basic.t
Added: branches/upstream/libfile-countlines-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/META.yml?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/META.yml (added)
+++ branches/upstream/libfile-countlines-perl/current/META.yml Tue Feb 2 19:44:28 2010
@@ -1,0 +1,23 @@
+---
+name: File-CountLines
+version: 0.0.2
+author:
+ - Moritz Lenz
+abstract: Efficiently count the number of line breaks in a file
+license: perl
+resources:
+ license: http://dev.perl.org/licenses/
+requires:
+ Carp: 0
+ Exporter: 5.57
+ charnames: 1.01
+ strict: 0
+ warnings: 0
+provides:
+ File::CountLines:
+ file: lib/File/CountLines.pm
+ version: 0.0.2
+generated_by: Module::Build version 0.280801
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.2.html
+ version: 1.2
Added: branches/upstream/libfile-countlines-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/Makefile.PL?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/Makefile.PL (added)
+++ branches/upstream/libfile-countlines-perl/current/Makefile.PL Tue Feb 2 19:44:28 2010
@@ -1,0 +1,18 @@
+# Note: this file was auto-generated by Module::Build::Compat version 0.2808_01
+use ExtUtils::MakeMaker;
+WriteMakefile
+(
+ 'PL_FILES' => {},
+ 'INSTALLDIRS' => 'site',
+ 'NAME' => 'File::CountLines',
+ 'EXE_FILES' => [],
+ 'VERSION_FROM' => 'lib/File/CountLines.pm',
+ 'PREREQ_PM' => {
+ 'warnings' => 0,
+ 'strict' => 0,
+ 'charnames' => '1.01',
+ 'Exporter' => '5.57',
+ 'Carp' => 0
+ }
+ )
+;
Added: branches/upstream/libfile-countlines-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/README?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/README (added)
+++ branches/upstream/libfile-countlines-perl/current/README Tue Feb 2 19:44:28 2010
@@ -1,0 +1,99 @@
+NAME
+ File::CountLines - efficiently count the number of line breaks in a
+ file.
+
+SYNOPSIS
+ use File::CountLines qw(count_lines);
+ my $no_of_lines = count_lines('/etc/passwd');
+
+ # other uses
+ my $carriage_returns = count_lines(
+ 'path/to/file.txt',
+ style => 'cr',
+ );
+ # possible styles are 'native' (the default), 'cr', 'lf'
+
+DESCRIPTION
+ perlfaq5 answers the question on how to count the number of lines in a
+ file. This module is a convenient wrapper around that method, with
+ additional options.
+
+ More specifically, it counts the number of *line breaks* rather than
+ lines. On Unix systems nearlly all text files end with a newline (by
+ convention), so usually the number of lines and number of line breaks is
+ equal.
+
+ Since different operating systems have different ideas of what a newline
+ is, you can specifiy a "style" option, which can be one of the following
+ values:
+
+ "native"
+ This takes Perl's "\n" as the line separator, which should be the
+ right thing in most cases. See perlport for details. This is the
+ default.
+
+ "cr"
+ Take a carriage return as line separator (MacOS style)
+
+ "lf"
+ Take a line feed as line separator (Unix style)
+
+ "crlf"
+ Take a carriage return followed by a line feed as separator
+ (Microsoft Windows style)
+
+ Alternatively you can specify an arbitrary separator like this:
+
+ my $lists = count_lines($file, separator => '\end{itemize}');
+
+ It is taken verbatim and searched for in the file.
+
+ The file is read in equally sized blocks. The size of the blocks can be
+ supplied with the "blocksize" option. The default is 4096, and can be
+ changed by setting $File::CountLines::BlockSize.
+
+ Do not use a block size smaller than the length of the separator, that
+ might produce wrong results. (In general there's no reason to chose a
+ smaller block size at all. Depending on your size a larger block size
+ might speed up things a bit.)
+
+Character Encodings
+ If you supply a separator yourself, it should not be a decoded string.
+
+ The file is read in binary mode, which implies that this module works
+ fine for text files in ASCII-compatible encodings, including ASCII
+ itself, UTF-8 and all the ISO-8859-* encodings (aka Latin-1, Latin-2,
+ ...).
+
+ Note that the multi byte encodings like UTF-32, UTF-16le, UTF-16be and
+ UCS-2 encode a line feed character in a way that the 0x0A byte is a
+ substring of the encoded character, but if you search blindly for that
+ byte you will get false positives. For example the *LATIN CAPITAL LETTER
+ C WITH DOT ABOVE*, U+010A has the byte sequence "0x0A 0x01" when encoded
+ as UTF-16le, so it would be counted as a newline. Even search for "0x0A
+ 0x00" might give false positives.
+
+ So the summary is that for now you can't use this module in a meaningful
+ way to count lines of text files in encodings that are not
+ ASCII-compatible. If there's demand for, I can implement that though.
+
+Extending
+ You can add your own EOL styles by adding them to the
+ %File::CountLines::StyleMap hash, with the name of the style as hash key
+ and the seperator as the value.
+
+AUTHOR
+ Moritz Lenz <http://perlgeek.de>, <mailto:moritz at faui2k3.org>
+
+COPYRIGHT AND LICENSE
+ Copyright (C) 2008 by Moritz A. Lenz. This module is free software. You
+ may use, redistribute and modify it under the same terms as perl itself.
+
+ Example code included in this package may be used as if it were in the
+ Public Domain.
+
+DEVELOPMENT
+ You can obtain the latest development version via subversion:
+
+ svn co https://faui2k3.org/svn/moritz/cpan/File-CountLines/
+
Added: branches/upstream/libfile-countlines-perl/current/lib/File/CountLines.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/lib/File/CountLines.pm?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/lib/File/CountLines.pm (added)
+++ branches/upstream/libfile-countlines-perl/current/lib/File/CountLines.pm Tue Feb 2 19:44:28 2010
@@ -1,0 +1,209 @@
+package File::CountLines;
+use strict;
+use warnings;
+
+our $VERSION = '0.0.2';
+our @EXPORT_OK = qw(count_lines);
+
+use Exporter 5.057;
+Exporter->import('import');
+
+use Carp qw(croak);
+use charnames qw(:full);
+
+our %StyleMap = (
+ 'cr' => "\N{CARRIAGE RETURN}",
+ 'lf' => "\N{LINE FEED}",
+ 'crlf' => "\N{CARRIAGE RETURN}\N{LINE FEED}",
+ 'native' => "\n",
+);
+
+our $BlockSize = 4096;
+
+sub count_lines {
+ my $filename = shift;
+ croak 'expected filename in call to count_lines()'
+ unless defined $filename;
+ my %options = @_;
+ my $sep = $options{separator};
+ unless (defined $sep) {
+ my $style = exists $options{style} ? $options{style} : 'native';
+ $sep = $StyleMap{$style};
+ die "Don't know how to map style '$style'" unless defined $sep;
+ }
+ if (length($sep) > 1) {
+ return _cl_sysread_multiple_chars(
+ $filename,
+ $sep,
+ $options{blocksize} || $BlockSize,
+ );
+ } else {
+ return _cl_sysread_one_char(
+ $filename,
+ $sep,
+ $options{blocksize} || $BlockSize,
+ );
+ }
+}
+
+sub _cl_sysread_one_char {
+ my ($filename, $sep, $blocksize) = @_;
+ local $Carp::CarpLevel = 1;
+ open my $handle, '<:raw', $filename
+ or croak "Can't open file `$filename' for reading: $!";
+ binmode $handle;
+ my $lines = 0;
+ $sep =~ s/([\\{}])/\\$1/g;
+ # need eval here because tr/// doesn't interpolate
+ my $sysread_status;
+ eval qq[
+ while (\$sysread_status = sysread \$handle, my \$buffer, $blocksize) {
+ \$lines += (\$buffer =~ tr{$sep}{});
+ }
+ ];
+ die "Can't sysread() from file `$filename': $!"
+ unless defined ($sysread_status);
+ close $handle or croak "Can't close file `$filename': $!";
+ return $lines;
+}
+
+sub _cl_sysread_multiple_chars {
+ my ($filename, $sep, $blocksize) = @_;
+ local $Carp::CarpLevel = 1;
+ open my $handle, '<:raw', $filename
+ or croak "Can't open file `$filename' for reading: $!";
+ binmode $handle;
+ my $len = length($sep);
+ my $lines = 0;
+ my $buffer = '';
+ my $sysread_status;
+ while ($sysread_status = sysread $handle, $buffer, $blocksize, length($buffer)) {
+ my $offset = -$len;
+ while (-1 != ($offset = index $buffer, $sep, $offset + $len)) {
+ $lines++;
+ }
+ # we assume $len >= 2; otherwise use _cl_sysread_one_char()
+ $buffer = substr $buffer, 1 - $len;
+ }
+ die "Can't sysread() from file `$filename': $!"
+ unless defined ($sysread_status);
+ close $handle or croak "Can't close file `$filename': $!";
+ return $lines;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+File::CountLines - efficiently count the number of line breaks in a file.
+
+=head1 SYNOPSIS
+
+ use File::CountLines qw(count_lines);
+ my $no_of_lines = count_lines('/etc/passwd');
+
+ # other uses
+ my $carriage_returns = count_lines(
+ 'path/to/file.txt',
+ style => 'cr',
+ );
+ # possible styles are 'native' (the default), 'cr', 'lf'
+
+=head1 DESCRIPTION
+
+L<perlfaq5> answers the question on how to count the number of lines
+in a file. This module is a convenient wrapper around that method, with
+additional options.
+
+More specifically, it counts the number of I<line breaks> rather than lines.
+On Unix systems nearlly all text files end with a newline (by convention), so
+usually the number of lines and number of line breaks is equal.
+
+Since different operating systems have different ideas of what a newline is,
+you can specifiy a C<style> option, which can be one of the following values:
+
+=over
+
+=item C<native>
+
+This takes Perl's C<\n> as the line separator, which should be the right thing in most cases. See L<perlport> for details. This is the default.
+
+=item C<cr>
+
+Take a carriage return as line separator (MacOS style)
+
+=item C<lf>
+
+Take a line feed as line separator (Unix style)
+
+=item C<crlf>
+
+Take a carriage return followed by a line feed as separator (Microsoft
+Windows style)
+
+=back
+
+Alternatively you can specify an arbitrary separator like this:
+
+ my $lists = count_lines($file, separator => '\end{itemize}');
+
+It is taken verbatim and searched for in the file.
+
+The file is read in equally sized blocks. The size of the blocks
+can be supplied with the C<blocksize> option. The default is 4096,
+and can be changed by setting C<$File::CountLines::BlockSize>.
+
+Do not use a block size smaller than the length of the separator, that
+might produce wrong results. (In general there's no reason to chose a
+smaller block size at all. Depending on your size a larger block size
+might speed up things a bit.)
+
+=head1 Character Encodings
+
+If you supply a separator yourself, it should not be a decoded string.
+
+The file is read in binary mode, which implies that this module
+works fine for text files in ASCII-compatible encodings, including
+ASCII itself, UTF-8 and all the ISO-8859-* encodings (aka Latin-1,
+Latin-2, ...).
+
+Note that the multi byte encodings like UTF-32, UTF-16le, UTF-16be
+and UCS-2 encode a line feed character in a way that the C<0x0A> byte
+is a substring of the encoded character, but if you search blindly for
+that byte you will get false positives. For example the I<LATIN CAPITAL
+LETTER C WITH DOT ABOVE>, U+010A has the byte sequence C<0x0A 0x01> when
+encoded as UTF-16le, so it would be counted as a newline. Even search for
+C<0x0A 0x00> might give false positives.
+
+So the summary is that for now you can't use this module in a meaningful
+way to count lines of text files in encodings that are not ASCII-compatible.
+If there's demand for, I can implement that though.
+
+=head1 Extending
+
+You can add your own EOL styles by adding them to the
+C<%File::CountLines::StyleMap> hash, with the name of the style as hash key
+and the seperator as the value.
+
+=head1 AUTHOR
+
+Moritz Lenz L<http://perlgeek.de>, L<mailto:moritz at faui2k3.org>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2008 by Moritz A. Lenz. This module is free software.
+You may use, redistribute and modify it under the same terms as perl itself.
+
+Example code included in this package may be used as if it were in the Public
+Domain.
+
+=head1 DEVELOPMENT
+
+You can obtain the latest development version via subversion:
+
+ svn co https://faui2k3.org/svn/moritz/cpan/File-CountLines/
+
+=cut
+
Added: branches/upstream/libfile-countlines-perl/current/t/01-pod.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/t/01-pod.t?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/t/01-pod.t (added)
+++ branches/upstream/libfile-countlines-perl/current/t/01-pod.t Tue Feb 2 19:44:28 2010
@@ -1,0 +1,8 @@
+use strict;
+use Test::More;
+
+eval "use Test::Pod";
+plan skip_all => "Test::Pod required for testing POD" if $@;
+
+my @poddirs = qw( blib );
+all_pod_files_ok( all_pod_files( @poddirs ) );
Added: branches/upstream/libfile-countlines-perl/current/t/basic.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libfile-countlines-perl/current/t/basic.t?rev=52057&op=file
==============================================================================
--- branches/upstream/libfile-countlines-perl/current/t/basic.t (added)
+++ branches/upstream/libfile-countlines-perl/current/t/basic.t Tue Feb 2 19:44:28 2010
@@ -1,0 +1,66 @@
+use Test::More tests => 21;
+use strict;
+use warnings;
+
+use File::Temp qw(tempfile);
+use lib 'lib';
+use lib '../lib';
+use File::CountLines qw(count_lines);
+use charnames ':full';
+my $cr = "\N{CARRIAGE RETURN}";
+my $lf = "\N{LINE FEED}";
+
+my @tests = (
+ ["a\nb\nc", 2, 'basic sanity', []],
+ ["a", 0, 'no newline', []],
+ ["", 0, 'empty file', []],
+ ["\na\n\n", 3, 'multiple successive newlines', []],
+ ["$cr $lf $cr", 1, 'count a linefeed', [style => 'lf']],
+ ["$cr $lf $cr", 2, 'count two carriage returns', [style => 'cr']],
+ ["a", 0, 'no newline (lf)', [style => 'lf']],
+ ["a", 0, 'no newline (cr)', [style => 'cr']],
+ ["a$cr$lf b $cr $lf $cr$lf c", 2 , 'crlf', [style => 'crlf']],
+ ["abababa", 2, 'multi-char separator (1)', [separator => 'aba']],
+ ["aaaabaa", 3, 'multi-char separator', [separator => 'aa']],
+ ["aaaabaa", 3, 'multi-char overlapping with block size',
+ [separator => 'aa', blocksize => 3]],
+ ["aaaabaa", 3, 'multi-char, cut at block size',
+ [separator => 'aa', blocksize => 2]],
+# TODO: blocksize < length(sep)
+# ["aaaabaa", 3, 'multi-char, block size 1',
+# [separator => 'aa', blocksize => 1]],
+ ["\a\0\0b\0", 3, 'Zero byte as separator', [separator => "\0"]],
+ ["\\\\\\b\\", 4, 'Backslash as separator', [separator => "\\"]],
+ ["{}}a{}", 3, 'Curly braces as separator', [separator => "}" ]],
+ ["{}}a{}", 2, 'Curly braces as separator', [separator => "{" ]],
+
+);
+
+for (@tests) {
+ my ($handle, $file) = tempfile();
+ print $handle $_->[0];
+ close $handle or warn "Can't close file: $!";
+ is count_lines($file, @{$_->[3]}), $_->[1], $_->[2];
+ unlink $file or warn "Can't remove temporary test file `$file': $!";
+}
+
+# test that it dies for non-existent files:
+for (1, 2) {
+ my ($handle, $file) = tempfile();
+ close $handle;
+ unlink $file;
+
+ SKIP: {
+ if (-e $file) {
+ skip "Can't find a non-existing file for croak testing", 1;
+ } else {
+ # XXX there could be a race condtion between the -e and this
+ # test, but I don't know how to avoid this
+ ok !eval { count_lines($file, separator => 'x' x $_); 1 },
+ 'Dies on non-existing file';
+ }
+ }
+}
+
+ok !eval { count_lines; 1 }, 'Dies without filename';
+ok !eval { count_lines(undef); 1 }, 'Dies with undef filename';
More information about the Pkg-perl-cvs-commits
mailing list