r21856 - in /trunk/libtext-csv-xs-perl: CSV_XS.pm CSV_XS.xs ChangeLog META.yml debian/changelog t/50_utf8.t t/75_hashref.t

Wed Jun 18 17:34:14 UTC 2008

Author: gregoa
Date: Wed Jun 18 17:34:14 2008
New Revision: 21856

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=21856
Log:
New upstream release.

Modified:
    trunk/libtext-csv-xs-perl/CSV_XS.pm
    trunk/libtext-csv-xs-perl/CSV_XS.xs
    trunk/libtext-csv-xs-perl/ChangeLog
    trunk/libtext-csv-xs-perl/META.yml
    trunk/libtext-csv-xs-perl/debian/changelog
    trunk/libtext-csv-xs-perl/t/50_utf8.t
    trunk/libtext-csv-xs-perl/t/75_hashref.t

Modified: trunk/libtext-csv-xs-perl/CSV_XS.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-xs-perl/CSV_XS.pm?rev=21856&op=diff
==============================================================================

--- trunk/libtext-csv-xs-perl/CSV_XS.pm (original)
+++ trunk/libtext-csv-xs-perl/CSV_XS.pm Wed Jun 18 17:34:14 2008
@@ -30,7 +30,7 @@
 use Carp;
 
 use vars   qw( $VERSION @ISA );
-$VERSION = "0.50";
+$VERSION = "0.51";
 @ISA     = qw( DynaLoader );
 
 sub PV { 0 }
@@ -118,9 +118,10 @@
     has_types		=> 21,
     verbatim		=> 22,
 
-    _is_bound		=> 23,
+    _is_bound		=> 23,	# 23 .. 26
     );
-sub _set_attr
+
+sub _set_attr_C
 {
     my ($self, $name, $val) = @_;
     $self->{$name} = $val;
@@ -128,28 +129,39 @@
     my @cache = unpack "C*", $self->{_CACHE};
     $cache[$_cache_id{$name}] = defined $val ? unpack "C", $val : 0;
     $self->{_CACHE} = pack "C*", @cache;
-    } # _set_attr
+    } # _set_attr_C
+
+sub _set_attr_N
+{
+    my ($self, $name, $val) = @_;
+    $self->{$name} = $val;
+    $self->{_CACHE} or return;
+    my @cache = unpack "C*", $self->{_CACHE};
+    my $i = $_cache_id{$name};
+    $cache[$i++] = $_ for unpack "C*", pack "N", defined $val ? $val : 0;
+    $self->{_CACHE} = pack "C*", @cache;
+    } # _set_attr_N
 
 # Accessor methods.
 #   It is unwise to change them halfway through a single file!
 sub quote_char
 {
     my $self = shift;
-    @_ and $self->_set_attr ("quote_char", shift);
+    @_ and $self->_set_attr_C ("quote_char", shift);
     $self->{quote_char};
     } # quote_char
 
 sub escape_char
 {
     my $self = shift;
-    @_ and $self->_set_attr ("escape_char", shift);
+    @_ and $self->_set_attr_C ("escape_char", shift);
     $self->{escape_char};
     } # escape_char
 
 sub sep_char
 {
     my $self = shift;
-    @_ and $self->_set_attr ("sep_char", shift);
+    @_ and $self->_set_attr_C ("sep_char", shift);
     $self->{sep_char};
     } # sep_char
 
@@ -178,56 +190,56 @@
 sub always_quote
 {
     my $self = shift;
-    @_ and $self->_set_attr ("always_quote", shift);
+    @_ and $self->_set_attr_C ("always_quote", shift);
     $self->{always_quote};
     } # always_quote
 
 sub binary
 {
     my $self = shift;
-    @_ and $self->_set_attr ("binary", shift);
+    @_ and $self->_set_attr_C ("binary", shift);
     $self->{binary};
     } # binary
 
 sub keep_meta_info
 {
     my $self = shift;
-    @_ and $self->_set_attr ("keep_meta_info", shift);
+    @_ and $self->_set_attr_C ("keep_meta_info", shift);
     $self->{keep_meta_info};
     } # keep_meta_info
 
 sub allow_loose_quotes
 {
     my $self = shift;
-    @_ and $self->_set_attr ("allow_loose_quotes", shift);
+    @_ and $self->_set_attr_C ("allow_loose_quotes", shift);
     $self->{allow_loose_quotes};
     } # allow_loose_quotes
 
 sub allow_loose_escapes
 {
     my $self = shift;
-    @_ and $self->_set_attr ("allow_loose_escapes", shift);
+    @_ and $self->_set_attr_C ("allow_loose_escapes", shift);
     $self->{allow_loose_escapes};
     } # allow_loose_escapes
 
 sub allow_whitespace
 {
     my $self = shift;
-    @_ and $self->_set_attr ("allow_whitespace", shift);
+    @_ and $self->_set_attr_C ("allow_whitespace", shift);
     $self->{allow_whitespace};
     } # allow_whitespace
 
 sub blank_is_undef
 {
     my $self = shift;
-    @_ and $self->_set_attr ("blank_is_undef", shift);
+    @_ and $self->_set_attr_C ("blank_is_undef", shift);
     $self->{blank_is_undef};
     } # blank_is_undef
 
 sub verbatim
 {
     my $self = shift;
-    @_ and $self->_set_attr ("verbatim", shift);
+    @_ and $self->_set_attr_C ("verbatim", shift);
     $self->{verbatim};
     } # verbatim
 
@@ -408,7 +420,7 @@
 	croak ($self->SetDiag (3001));
 	}
 
-    $self->{_is_bound} && @keys != unpack "C", $self->{_is_bound} and
+    $self->{_is_bound} && @keys != $self->{_is_bound} and
 	croak ($self->SetDiag (3003));
 
     $self->{_COLUMN_NAMES} = [ @keys ];
@@ -427,12 +439,10 @@
     $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} and
 	croak ($self->SetDiag (3003));
 
-    @refs > 255 and croak ($self->SetDiag (3005));
-
     join "", map { ref $_ eq "SCALAR" ? "" : "*" } @refs and
 	croak ($self->SetDiag (3004));
 
-    $self->_set_attr ("_is_bound", pack "C" => scalar @refs);
+    $self->_set_attr_N ("_is_bound", scalar @refs);
     $self->{_BOUND_COLUMNS} = [ @refs ];
     @refs;
     } # column_names
@@ -783,6 +793,11 @@
 including line feeds, carriage returns and NULL bytes. (The latter must
 be escaped as C<"0>.) By default this feature is off.
 
+If a string is marked UTF8, binary will be turned on automatically when
+binary characters other than CR or NL are encountered. Note that a simple
+string like C<"\x{00a0}"> might still be binary, but not marked UTF8, so
+setting C<{ binary => 1 }> is still a wise option.
+
 =item types
 
 A set of column types; this attribute is immediately passed to the
@@ -979,7 +994,7 @@
 
 =head2 bind_columns
 
-Takes a list of references to scalars (max 255) to store the fields fetched
+Takes a list of references to scalars to store the fields fetched
 C<getline ()> in. When you don't pass enough references to store the
 fetched fields in, C<getline ()> will fail. If you pass more than there are
 fields to return, the remaining references are left untouched.
@@ -1138,7 +1153,7 @@
 
  $csv->SetDiag (0);
 
-Use to reset the diagnosticts if you are dealing with errors.
+Use to reset the diagnostics if you are dealing with errors.
 
 =head1 INTERNALS
 
@@ -1347,7 +1362,7 @@
 normal cases - when no error occured - may cause unexpected results.
 
 Currently errors as described below are available. I've tried to make the error
-itself explainatory enough, but more descriptions will be added. For most of
+itself explanatory enough, but more descriptions will be added. For most of
 these errors, the first three capitals describe the error category:
 
 =over 2
@@ -1362,7 +1377,7 @@
 
 =item EOF
 
-Enf-Of-File related parse error.
+End-Of-File related parse error.
 
 =item EIQ
 
@@ -1454,8 +1469,6 @@
 =item 3003 "EHR - bind_columns () and column_names () fields count mismatch"
 
 =item 3004 "EHR - bind_columns () only accepts refs to scalars"
-
-=item 3005 "EHR - bind_columns () takes 254 refs max"
 
 =item 3006 "EHR - bind_columns () did not pass enough refs for parsed fields"
 

Modified: trunk/libtext-csv-xs-perl/CSV_XS.xs
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-xs-perl/CSV_XS.xs?rev=21856&op=diff
==============================================================================
--- trunk/libtext-csv-xs-perl/CSV_XS.xs (original)
+++ trunk/libtext-csv-xs-perl/CSV_XS.xs Wed Jun 18 17:34:14 2008
@@ -90,7 +90,7 @@
 
     byte	blank_is_undef;
     byte	verbatim;
-    byte	is_bound;
+    long	is_bound;
     byte	reserved1;
 #endif
 
@@ -156,7 +156,6 @@
     { 3002, "EHR - getline_hr () called before column_names ()"			},
     { 3003, "EHR - bind_columns () and column_names () fields count mismatch"	},
     { 3004, "EHR - bind_columns () only accepts refs to scalars"		},
-    { 3005, "EHR - bind_columns () takes 254 refs max"				},
     { 3006, "EHR - bind_columns () did not pass enough refs for parsed fields"	},
     { 3007, "EHR - bind_columns needs refs to writeable scalars"		},
     { 3008, "EHR - unexpected error in bound fields"				},
@@ -221,7 +220,6 @@
 	csv->blank_is_undef		= csv->cache[CACHE_ID_blank_is_undef	];
 	csv->verbatim			= csv->cache[CACHE_ID_verbatim		];
 #endif
-	csv->is_bound			= csv->cache[CACHE_ID__is_bound		];
 	csv->eol_is_cr			= csv->cache[CACHE_ID_eol_is_cr		];
 	csv->eol_len			= csv->cache[CACHE_ID_eol_len		];
 	if (csv->eol_len < 8)
@@ -236,6 +234,11 @@
 		csv->eol_is_cr = 0;
 		}
 	    }
+	csv->is_bound			=
+	    (csv->cache[CACHE_ID__is_bound    ] << 24) |
+	    (csv->cache[CACHE_ID__is_bound + 1] << 16) |
+	    (csv->cache[CACHE_ID__is_bound + 2] <<  8) |
+	    (csv->cache[CACHE_ID__is_bound + 3]);
 
 	csv->types = NULL;
 	if (csv->cache[CACHE_ID_has_types]) {
@@ -315,12 +318,15 @@
 	csv->cache[CACHE_ID_blank_is_undef]		= csv->blank_is_undef;
 	csv->cache[CACHE_ID_verbatim]			= csv->verbatim;
 #endif
-	csv->cache[CACHE_ID__is_bound]			= csv->is_bound;
 	csv->cache[CACHE_ID_eol_is_cr]			= csv->eol_is_cr;
 	csv->cache[CACHE_ID_eol_len]			= csv->eol_len;
 	if (csv->eol_len > 0 && csv->eol_len < 8 && csv->eol)
 	    strcpy ((char *)&csv->cache[CACHE_ID_eol], csv->eol);
 	csv->cache[CACHE_ID_has_types]			= csv->types ? 1 : 0;
+	csv->cache[CACHE_ID__is_bound    ] = (csv->is_bound & 0xFF000000) >> 24;
+	csv->cache[CACHE_ID__is_bound + 1] = (csv->is_bound & 0x00FF0000) >> 16;
+	csv->cache[CACHE_ID__is_bound + 2] = (csv->is_bound & 0x0000FF00) >>  8;
+	csv->cache[CACHE_ID__is_bound + 3] = (csv->is_bound & 0x000000FF);
 
 	if ((csv->tmp = newSVpvn ((char *)csv->cache, CACHE_SIZE)))
 	    hv_store (self, "_CACHE", 6, csv->tmp, 0);
@@ -428,11 +434,15 @@
 		int	e = 0;
 
 		if (!csv->binary && is_csv_binary (c)) {
-		    SvREFCNT_inc (*svp);
-		    unless (hv_store (csv->self, "_ERROR_INPUT", 12, *svp, 0))
-			SvREFCNT_dec (*svp);
-		    (void)SetDiag (csv, 2110);
-		    return FALSE;
+		    if (SvUTF8 (*svp))
+			csv->binary = 1;
+		    else {
+			SvREFCNT_inc (*svp);
+			unless (hv_store (csv->self, "_ERROR_INPUT", 12, *svp, 0))
+			    SvREFCNT_dec (*svp);
+			(void)SetDiag (csv, 2110);
+			return FALSE;
+			}
 		    }
 		if (csv->quote_char  && c == csv->quote_char)
 		    e = 1;
@@ -1004,7 +1014,7 @@
 	    if (f & CSV_FLAGS_QUO) {
 		if (is_csv_binary (c)) {
 		    f |= CSV_FLAGS_BIN;
-		    unless (csv->binary)
+		    unless (csv->binary || csv->utf8)
 			ERROR_INSIDE_QUOTES (2026);
 		    }
 		CSV_PUT_SV (c);
@@ -1012,7 +1022,7 @@
 	    else {
 		if (is_csv_binary (c)) {
 		    f |= CSV_FLAGS_BIN;
-		    unless (csv->binary)
+		    unless (csv->binary || csv->utf8)
 			ERROR_INSIDE_FIELD (2037);
 		    }
 		CSV_PUT_SV (c);

Modified: trunk/libtext-csv-xs-perl/ChangeLog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-xs-perl/ChangeLog?rev=21856&op=diff
==============================================================================
--- trunk/libtext-csv-xs-perl/ChangeLog (original)
+++ trunk/libtext-csv-xs-perl/ChangeLog Wed Jun 18 17:34:14 2008
@@ -1,3 +1,9 @@
+2008-06-17  0.51 - H.Merijn Brand   <h.m.brand at xs4all.nl>
+
+	* Allow UTF8 even without binary => 1
+	* Fixed a few pod typo's
+	* Lifted the max of 255 for bind_columns
+
 2008-06-04  0.50 - H.Merijn Brand   <h.m.brand at xs4all.nl>
 
 	* Skip a few tests in automated testing, as they confuse
@@ -13,8 +19,8 @@
 	* Use Test::MinimumVersion (not distributed)
 	* Added option -F to examples/csv2xls
 	* More source code cleanup
-	* Nailed the UTF-8 issues for parsing
-	* Nailed the UTF-8 issues for combining
+	* Nailed the UTF8 issues for parsing
+	* Nailed the UTF8 issues for combining
 
 2008-04-23  0.45 - H.Merijn Brand   <h.m.brand at xs4all.nl>
 

Modified: trunk/libtext-csv-xs-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-xs-perl/META.yml?rev=21856&op=diff
==============================================================================
--- trunk/libtext-csv-xs-perl/META.yml (original)
+++ trunk/libtext-csv-xs-perl/META.yml Wed Jun 18 17:34:14 2008
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:              Text-CSV_XS
-version:           0.50
+version:           0.51
 abstract:          Comma-Separated Values manipulation routines
 license:           perl
 author:              
@@ -10,7 +10,7 @@
 provides:
     Text::CSV_XS:
         file:      CSV_XS.pm
-        version:   0.50
+        version:   0.51
 requires:     
     perl:          5.005
     DynaLoader:    0

Modified: trunk/libtext-csv-xs-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-xs-perl/debian/changelog?rev=21856&op=diff
==============================================================================
--- trunk/libtext-csv-xs-perl/debian/changelog (original)
+++ trunk/libtext-csv-xs-perl/debian/changelog Wed Jun 18 17:34:14 2008
@@ -1,3 +1,9 @@
+libtext-csv-xs-perl (0.51-1) UNRELEASED; urgency=low
+
+  * New upstream release.
+
+ -- gregor herrmann <gregoa at debian.org>  Wed, 18 Jun 2008 19:32:59 +0200
+
 libtext-csv-xs-perl (0.50-1) unstable; urgency=low
 
   * New upstream release

Modified: trunk/libtext-csv-xs-perl/t/50_utf8.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-xs-perl/t/50_utf8.t?rev=21856&op=diff
==============================================================================
--- trunk/libtext-csv-xs-perl/t/50_utf8.t (original)
+++ trunk/libtext-csv-xs-perl/t/50_utf8.t Wed Jun 18 17:34:14 2008
@@ -10,7 +10,7 @@
 	plan skip_all => "UTF8 tests useless in this ancient perl version";
 	}
     else {
-	plan tests => 64;
+	plan tests => 67;
 	}
     }
 
@@ -20,8 +20,8 @@
     require "t/util.pl";
     }
 
+# No binary => 1, as UTF8 is supposed to be allowed without it
 my $csv = Text::CSV_XS->new ({
-    binary         => 1,
     always_quote   => 1,
     keep_meta_info => 1,
     });
@@ -44,7 +44,7 @@
   #  characters in 128..255
   ) {
     my ($u, $msg) = @$test;
-    utf8::encode ($u);
+    ($u = "$u\x{0123}") =~ s/.$//;	# Make sure it's marked UTF8
     my @in  = ("", " ", $u, "");
     my $exp = join ",", map { qq{"$_"} } @in;
 
@@ -61,6 +61,11 @@
 	is_binary ($in[$_], $out[$_],	"field $_ $msg");
 	}
     }
+
+# Test if the UTF8 part is accepted, but the \n is not
+is ($csv->parse (qq{"\x{0123}\n\x{20ac}"}), 0, "\\n still needs binary");
+is ($csv->binary, 0, "bin flag still unset");
+is ($csv->error_diag + 0, 2021, "Error 2021");
 
 # As all utf tests are skipped for older pers, It's safe to use 3-arg open this way
 my $file = "files/utf8.csv";

Modified: trunk/libtext-csv-xs-perl/t/75_hashref.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-xs-perl/t/75_hashref.t?rev=21856&op=diff
==============================================================================
--- trunk/libtext-csv-xs-perl/t/75_hashref.t (original)
+++ trunk/libtext-csv-xs-perl/t/75_hashref.t Wed Jun 18 17:34:14 2008
@@ -67,8 +67,6 @@
 eval { $csv->bind_columns ({}, {}, {}, {}) };
 is ($csv->error_diag () + 0, 3004,		"bad arg types");
 is ($csv->column_names (undef), undef,		"reset column_names");
-eval { $csv->bind_columns ((\$code) x 300) };
-is ($csv->error_diag () + 0, 3005,		"too many args");
 ok ($csv->bind_columns (\($code, $name, $price)), "Bind columns");
 
 eval { $csv->column_names ("foo") };
@@ -96,9 +94,12 @@
 ($code, $name, $price, $desc, $foo) = (101 .. 105);
 ok ($csv->getline (*FH),			"fetch less than expected");
 is_deeply ( [ $code, $name, $price, $desc, $foo ],
-	    [ 2, "Drinks", "82.78", "Drinks", 105 ],		"unfetched not reset");
+	    [ 2, "Drinks", "82.78", "Drinks", 105 ],	"unfetched not reset");
 
-ok ($csv->bind_columns (\1, \2, \3, \""),	"bind too many columns");
+my @foo = (0) x 0x012345;
+ok ($csv->bind_columns (\(@foo)),		"bind a lot of columns");
+
+ok ($csv->bind_columns (\1, \2, \3, \""),	"bind too constant columns");
 is ($csv->getline (*FH), undef,			"fetch to read-only ref");
 is ($csv->error_diag () + 0, 3008,		"Read-only");