[SCM] Debian packaging of libtext-csv-xs-perl branch, master, updated. debian/0.87-1

Sat Mar 10 15:58:13 UTC 2012

The following commit has been merged in the master branch:
commit 347541786d3127f77a24ffdc8798b066017433d7
Author: Ansgar Burchardt <ansgar at debian.org>
Date:   Sat Mar 10 16:43:29 2012 +0100

    Imported Upstream version 0.87

diff --git a/CSV_XS.pm b/CSV_XS.pm
index 8d04a79..a8e08dc 100644
--- a/CSV_XS.pm
+++ b/CSV_XS.pm
@@ -27,7 +27,7 @@ use DynaLoader ();
 use Carp;
 
 use vars   qw( $VERSION @ISA );
-$VERSION = "0.86";
+$VERSION = "0.87";
 @ISA     = qw( DynaLoader );
 bootstrap Text::CSV_XS $VERSION;
 
@@ -1224,6 +1224,32 @@ declare your column names.
 
 L</getline_hr> will croak if called before L</column_names>.
 
+Note that L</getline_hr> creates a hashref for every row and might be much
+slower than the combined use of L</bind_columns> and L</getline> but still
+offering the same ease of use hashref inside the loop:
+
+ my @cols = @{$csv->getline ($io)};
+ $csv->column_names (@cols);
+ while (my $row = $csv->getline_hr ($io)) {
+     print $row->{price};
+     }
+
+Could easily be rewritten to the much faster:
+
+ my @cols = @{$csv->getline ($io)};
+ my $row = {};
+ $csv->bind_columns (\@{$row}{@cols});
+ while ($csv->getline ($io)) {
+     print $row->{price};
+     }
+
+Your mileage may vary for the size of the data and the numbers of rows, but 
+with perl-5.14.2 the difference is like for a 100_000 line file with 14 rows:
+
+            Rate hashrefs getlines
+ hashrefs 1.00/s       --     -76%
+ getlines 4.15/s     313%       --
+
 =head2 getline_hr_all
 X<getline_hr_all>
 
diff --git a/CSV_XS.xs b/CSV_XS.xs
index 0847113..ff20ae5 100644
--- a/CSV_XS.xs
+++ b/CSV_XS.xs
@@ -19,6 +19,7 @@
 #include "ppport.h"
 #if (PERL_BCDVERSION <= 0x5005005)
 #  define sv_utf8_upgrade(sv)	/* no-op */
+#  define is_utf8_string(s,l)	0
 #  define SvUTF8_on(sv)		/* no-op */
 #  define SvUTF8(sv)		0
 #  endif
@@ -601,7 +602,7 @@ static int cx_Print (pTHX_ csv_t *csv, SV *dst)
 	PUSHs ((dst));
 	PUSHs (tmp);
 	PUTBACK;
-	if (csv->utf8)
+	if (csv->utf8 && is_utf8_string (SvPV_nolen (tmp), 0))
 	    SvUTF8_on (tmp);
 	result = call_sv (m_print, G_SCALAR | G_METHOD);
 	SPAGAIN;
@@ -617,7 +618,7 @@ static int cx_Print (pTHX_ csv_t *csv, SV *dst)
 	sv_catpvn (SvRV (dst), csv->buffer, csv->used);
 	result = TRUE;
 	}
-    if (csv->utf8 && SvROK (dst))
+    if (csv->utf8 && SvROK (dst) && is_utf8_string (SvPV_nolen (SvRV (dst)), 0))
 	SvUTF8_on (SvRV (dst));
     csv->used = 0;
     return result;
diff --git a/ChangeLog b/ChangeLog
index 541f934..b85a985 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+0.87	- 2012-03-08, H.Merijn Brand
+    * Extra check on utf8 output (RT#74330)
+    * examples/csvdiff now recognizes numerically sorted CSV files
+    * Document example comparing getline_hr vs bind_columns + getline
+
 0.86	- 2012-01-22, H.Merijn Brand
     * Introduce quote_binary attribute
     * Update copyright to 2012
@@ -13,7 +18,7 @@
 
 0.83	- 2011-08-07, H.Merijn Brand
     * Fix spurious auto_diag warning (RT#69673)
-    * Tested with 50 versions of perl, including 1.15.1
+    * Tested with 50 versions of perl, including 5.15.1
 
 0.82	- 2011-05-01, H.Merijn Brand
     * Doc fix (RT#66905, Peter Newman)
diff --git a/META.json b/META.json
index 4e162ef..6b8e110 100644
--- a/META.json
+++ b/META.json
@@ -14,7 +14,7 @@
       },
    "distribution_type" : "module",
    "generated_by" : "Author",
-   "version" : "0.86",
+   "version" : "0.87",
    "name" : "Text-CSV_XS",
    "author" : [
       "H.Merijn Brand <h.m.brand at xs4all.nl>"
@@ -25,7 +25,7 @@
       },
    "provides" : {
       "Text::CSV_XS" : {
-         "version" : "0.86",
+         "version" : "0.87",
          "file" : "CSV_XS.pm"
          }
       },
diff --git a/META.yml b/META.yml
index 6c9f0db..f6cf3ff 100644
--- a/META.yml
+++ b/META.yml
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:                    Text-CSV_XS
-version:                 0.86
+version:                 0.87
 abstract:                Comma-Separated Values manipulation routines
 license:                 perl
 author:              
@@ -10,7 +10,7 @@ distribution_type:       module
 provides:
     Text::CSV_XS:
         file:            CSV_XS.pm
-        version:         0.86
+        version:         0.87
 requires:     
     perl:                5.005
     DynaLoader:          0
diff --git a/examples/csvdiff b/examples/csvdiff
index e18ba37..051b5e8 100755
--- a/examples/csvdiff
+++ b/examples/csvdiff
@@ -37,7 +37,7 @@ use Text::CSV_XS;
 my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 0 });
 
 if ($opt_h) {
-    binmode STDOUT, ":utf8";
+    binmode STDOUT, ":encoding(utf-8)";
     print <<EOH;
 <?xml version="1.0" encoding="utf-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
@@ -50,6 +50,7 @@ if ($opt_h) {
   <style type="text/css">
     .rd { background:	#ffe0e0;	}
     .gr { background:	#e0ffe0;	}
+    .hd { background:	#e0e0ff;	}
     .b0 { background:	#e0e0e0;	}
     .b1 { background:	#f0f0f0;	}
     .r  { color:	red;		}
@@ -71,21 +72,28 @@ elsif (!$opt_c) {
     }
 
 my @f;
+my $opt_n = 1;
 foreach my $x (0, 1) {
     open my $fh, "<", $ARGV[$x] or die "$ARGV[$x]: $!\n";
-    while (1) { $_ = $csv->getline ($fh) or last; @$_ and push @{$f[$x]}, $_ }
+    my $n = 0;
+    while (1) {
+	my $row = $csv->getline ($fh) or last;
+	@$row and push @{$f[$x]}, $row;
+	$n++ && $row->[0] =~ m/\D/ and $opt_n = 0;
+	}
     }
 my @n   = map { $#{$f[$_]} } 0, 1;
 my @i   = (1, 1);
 my $hdr = "# csvdiff   < $ARGV[0]    > $ARGV[1]\n";
 
-$f[$_][1+$n[$_]][0] = "\xff\xff\xff\xff" for 0, 1;
+$f[$_][1+$n[$_]][0] = $opt_n ? 2147483647 : "\xff\xff\xff\xff" for 0, 1;
 
 my %cls;
    %cls = (
     "b" => 0,
     "-"	=> sub { "rd" },
     "+"	=> sub { "gr" },
+    "H"	=> sub { "hd" },
     "<"	=> sub { $cls{b} ^= 1; "b$cls{b}" },
     ">"	=> sub { "b$cls{b}" },
     );
@@ -109,17 +117,39 @@ sub show
     $hdr = "";
     } # show
 
+# Skip first line of both are same: it probably is a header
+my @h0 = @{$f[0][0]};
+my @h1 = @{$f[1][0]};
+if ("@h0" eq "@h1") {
+    if ($opt_h) {
+    	$i[0]--;
+    	show ("H", 0);
+    	}
+    shift @{$f[0]};
+    shift @{$f[1]};
+    }
+
+my $x = 0;
 while ($i[0] <= $n[0] || $i[1] <= $n[1]) {
-    $f[0][$i[0]][0] lt $f[1][$i[1]][0] and show ("-", 0), next;
-    $f[0][$i[0]][0] gt $f[1][$i[1]][0] and show ("+", 1), next;
+    my @r0 = @{$f[0][$i[0]]};
+    my @r1 = @{$f[1][$i[1]]};
+
+    if ($opt_n) {
+	$r0[0] <  $r1[0] and show ("-", 0), next;
+	$r0[0] >  $r1[0] and show ("+", 1), next;
+	}
+    else {
+	$r0[0] lt $r1[0] and show ("-", 0), next;
+	$r0[0] gt $r1[0] and show ("+", 1), next;
+	}
 
-    "@{[@{$f[0][$i[0]]}]}" eq "@{[@{$f[1][$i[1]]}]}" and
-	$i[0]++, $i[1]++, next;
+    "@r0" eq "@r1" and $i[0]++, $i[1]++, next;
 
-    foreach my $c (1 .. $#{$f[0][0]}) {
-	$f[0][$i[0]][$c] eq $f[1][$i[1]][$c] and next;
-	$f[0][$i[0]][$c] = RED   . $f[0][$i[0]][$c] . RESET;
-	$f[1][$i[1]][$c] = GREEN . $f[1][$i[1]][$c] . RESET;
+    foreach my $c (1 .. $#h0) {
+	my ($L, $R) = map { defined $_ ? $_ : "" } $r0[$c], $r1[$c];
+	$L eq $R and next;
+	$f[0][$i[0]][$c] = RED   . $L . RESET;
+	$f[1][$i[1]][$c] = GREEN . $R . RESET;
 	}
 
     show ("<", 0);
diff --git a/t/70_rt.t b/t/70_rt.t
index c20cf6c..f30568d 100644
--- a/t/70_rt.t
+++ b/t/70_rt.t
@@ -4,7 +4,7 @@ use strict;
 $^W = 1;
 
 #use Test::More "no_plan";
- use Test::More tests => 442;
+ use Test::More tests => 449;
 
 BEGIN {
     use_ok "Text::CSV_XS", ();
@@ -383,6 +383,29 @@ while (<DATA>) {
 	}
     }
 
+SKIP: {   # http://rt.cpan.org/Ticket/Display.html?id=74220
+    $] < 5.008002 and skip "UTF8 unreliable in perl $]", 7;
+
+    $rt = "74220"; # Text::CSV_XS can be made to produce bad strings
+    my $csv = Text::CSV_XS->new ({ binary => 1 });
+
+    my $ax = chr (0xfa);
+    my $bx = "foo";
+
+    # We set the UTF-8 flag on a string with no funny characters
+    utf8::upgrade ($bx);
+    is ($bx, "foo", "no funny characters in the string");
+
+    ok (utf8::valid ($ax), "first string correct in Perl");
+    ok (utf8::valid ($bx), "second string correct in Perl");
+
+    ok ($csv->combine ($ax, $bx),	"combine ()");
+    ok (my $foo = $csv->string (),	"string ()");
+
+    ok (utf8::valid ($foo), "is combined string correct inside Perl?");
+    is ($foo, qq{\xfa,foo}, "expected result");
+    }
+
 __END__
 «24386» - \t doesn't work in _XS, works in _PP
 VIN	StockNumber	Year	Make	Model	MD	Engine	EngineSize	Transmission	DriveTrain	Trim	BodyStyle	CityFuel	HWYFuel	Mileage	Color	InteriorColor	InternetPrice	RetailPrice	Notes	ShortReview	Certified	NewUsed	Image_URLs	Equipment
@@ -432,6 +455,7 @@ B:035_03_	fission, one	horns	@p 03-035.bmp	@p 03-035.bmp			obsolete Heising ex
 3,4
 5,6
 7,8
+«74330» - Text::CSV_XS can be made to produce bad strings
 «x1001» - Lines starting with "0" (Ruslan Dautkhanov)
 "0","A"
 "0","A"

-- 
Debian packaging of libtext-csv-xs-perl