r45842 - in /branches/upstream/libtext-csv-xs-perl/current: CSV_XS.pm CSV_XS.xs ChangeLog META.yml README examples/csv-check t/45_eol.t t/65_allow.t

jawnsy-guest at users.alioth.debian.org jawnsy-guest at users.alioth.debian.org
Thu Oct 15 15:58:39 UTC 2009


Author: jawnsy-guest
Date: Thu Oct 15 15:58:35 2009
New Revision: 45842

URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=45842
Log:
[svn-upgrade] Integrating new upstream version, libtext-csv-xs-perl (0.69)

Modified:
    branches/upstream/libtext-csv-xs-perl/current/CSV_XS.pm
    branches/upstream/libtext-csv-xs-perl/current/CSV_XS.xs
    branches/upstream/libtext-csv-xs-perl/current/ChangeLog
    branches/upstream/libtext-csv-xs-perl/current/META.yml
    branches/upstream/libtext-csv-xs-perl/current/README
    branches/upstream/libtext-csv-xs-perl/current/examples/csv-check
    branches/upstream/libtext-csv-xs-perl/current/t/45_eol.t
    branches/upstream/libtext-csv-xs-perl/current/t/65_allow.t

Modified: branches/upstream/libtext-csv-xs-perl/current/CSV_XS.pm
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-csv-xs-perl/current/CSV_XS.pm?rev=45842&op=diff
==============================================================================
--- branches/upstream/libtext-csv-xs-perl/current/CSV_XS.pm (original)
+++ branches/upstream/libtext-csv-xs-perl/current/CSV_XS.pm Thu Oct 15 15:58:35 2009
@@ -30,7 +30,7 @@
 use Carp;
 
 use vars   qw( $VERSION @ISA );
-$VERSION = "0.68";
+$VERSION = "0.69";
 @ISA     = qw( DynaLoader );
 bootstrap Text::CSV_XS $VERSION;
 
@@ -78,6 +78,7 @@
     _ERROR_INPUT	=> undef,
     _COLUMN_NAMES	=> undef,
     _BOUND_COLUMNS	=> undef,
+    _AHEAD		=> undef,
     );
 my $last_new_err = Text::CSV_XS->SetDiag (0);
 
@@ -116,7 +117,8 @@
     $self;
     } # new
 
-my %_cache_id = (	# Keep in sync with XS!
+# Keep in sync with XS!
+my %_cache_id = ( # Only expose what is accessed from within PM
     quote_char		=>  0,
     escape_char		=>  1,
     sep_char		=>  2,
@@ -128,15 +130,10 @@
     allow_double_quoted	=>  8,
     allow_whitespace	=>  9,
     blank_is_undef	=> 10,
-
     eol			=> 11,	# 11 .. 18
-    eol_len		=> 19,
-    eol_is_cr		=> 20,
-    has_types		=> 21,
     verbatim		=> 22,
     empty_is_undef	=> 23,
     auto_diag		=> 24,
-
     _is_bound		=> 25,	# 25 .. 28
     );
 
@@ -147,10 +144,7 @@
     defined $val or $val = 0;
     $] >= 5.008002 and utf8::decode ($val);
     $self->{$name} = $val;
-    $self->{_CACHE} or return;
-    my @cache = unpack "C*", $self->{_CACHE};
-    $cache[$_cache_id{$name}] = unpack "C", $val;
-    $self->{_CACHE} = pack "C*", @cache;
+    $self->_cache_set ($_cache_id{$name}, $val);
     } # _set_attr_C
 
 # A flag
@@ -159,22 +153,15 @@
     my ($self, $name, $val) = @_;
     defined $val or $val = 0;
     $self->{$name} = $val;
-    $self->{_CACHE} or return;
-    my @cache = unpack "C*", $self->{_CACHE};
-    $cache[$_cache_id{$name}] = 0 + $val;
-    $self->{_CACHE} = pack "C*", @cache;
-    } # _set_attr_C
+    $self->_cache_set ($_cache_id{$name}, 0 + $val);
+    } # _set_attr_X
 
 # A number
 sub _set_attr_N
 {
     my ($self, $name, $val) = @_;
     $self->{$name} = $val;
-    $self->{_CACHE} or return;
-    my @cache = unpack "C*", $self->{_CACHE};
-    my $i = $_cache_id{$name};
-    $cache[$i++] = $_ for unpack "C*", pack "N", $val;
-    $self->{_CACHE} = pack "C*", @cache;
+    $self->_cache_set ($_cache_id{$name}, 0 + $val);
     } # _set_attr_N
 
 # Accessor methods.
@@ -216,19 +203,8 @@
     if (@_) {
 	my $eol = shift;
 	defined $eol or $eol = "";
-	my $eol_len = length $eol;
 	$self->{eol} = $eol;
-	$self->{_CACHE} or return;
-	my @cache = unpack "C*", $self->{_CACHE};
-	if (($cache[$_cache_id{eol_len}] = $eol_len) < 8) {
-	    $cache[$_cache_id{eol_is_cr}] = $eol eq "\r" ? 1 : 0;
-	    }
-	else {
-	    $cache[$_cache_id{eol_is_cr}] = 0;
-	    }
-	$eol .= "\0\0\0\0\0\0\0\0";
-	$cache[$_cache_id{eol} + $_] = unpack "C", substr $eol, $_, 1 for 0 .. 7;
-	$self->{_CACHE} = pack "C*", @cache;
+	$self->_cache_set ($_cache_id{eol}, $eol);
 	}
     $self->{eol};
     } # eol
@@ -299,7 +275,7 @@
 sub verbatim
 {
     my $self = shift;
-    @_ and $self->_set_attr_C ("verbatim", shift);
+    @_ and $self->_set_attr_X ("verbatim", shift);
     $self->{verbatim};
     } # verbatim
 
@@ -1603,7 +1579,7 @@
 Sequences like C<1,"foo\rbar",2> are only allowed when the binary option
 has been selected with the constructor.
 
-=item 2023 "EIQ - QUO character not allowed
+=item 2023 "EIQ - QUO character not allowed"
 
 Sequences like C<"foo "bar" baz",quux> and C<2023,",2008-04-05,"Foo, Bar",\n>
 will cause this error.
@@ -1661,7 +1637,7 @@
 
 =item 3007 "EHR - bind_columns needs refs to writeable scalars"
 
-=item 3008 "EHR - unexpected error in bound fields
+=item 3008 "EHR - unexpected error in bound fields"
 
 =back
 

Modified: branches/upstream/libtext-csv-xs-perl/current/CSV_XS.xs
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-csv-xs-perl/current/CSV_XS.xs?rev=45842&op=diff
==============================================================================
--- branches/upstream/libtext-csv-xs-perl/current/CSV_XS.xs (original)
+++ branches/upstream/libtext-csv-xs-perl/current/CSV_XS.xs Thu Oct 15 15:58:35 2009
@@ -9,9 +9,12 @@
 #include <XSUB.h>
 #define NEED_PL_parser
 #define DPPP_PL_parser_NO_DUMMY
+#define NEED_load_module
+#define NEED_my_snprintf
+#define NEED_newRV_noinc
+#define NEED_pv_escape
+#define	NEED_pv_pretty
 #define NEED_sv_2pv_flags
-#define NEED_load_module
-#define NEED_newRV_noinc
 #define NEED_vload_module
 #include "ppport.h"
 #if (PERL_BCDVERSION <= 0x5005005)
@@ -49,6 +52,7 @@
 #define CACHE_ID_empty_is_undef		23
 #define CACHE_ID_auto_diag		24
 #define CACHE_ID__is_bound		25
+#define CACHE_ID__has_ahead		29
 
 #define CSV_FLAGS_QUO	0x0001
 #define CSV_FLAGS_BIN	0x0002
@@ -95,15 +99,16 @@
     byte	empty_is_undef;
     byte	verbatim;
     byte	auto_diag;
+
     long	is_bound;
 
-    byte	cache[CACHE_SIZE];
+    byte *	cache;
 
     SV *	pself;
     HV *	self;
     SV *	bound;
 
-    char *	eol;
+    byte *	eol;
     STRLEN	eol_len;
     char *	types;
     STRLEN	types_len;
@@ -111,6 +116,7 @@
     char *	bptr;
     SV *	tmp;
     int		utf8;
+    byte	has_ahead;
     STRLEN	size;
     STRLEN	used;
     char	buffer[BUFFER_SIZE];
@@ -232,6 +238,142 @@
     return (err);
     } /* SetDiag */
 
+#define xs_cache_set(hv,idx,val)	cx_xs_cache_set (aTHX_ hv, idx, val)
+static void cx_xs_cache_set (pTHX_ HV *hv, int idx, SV *val)
+{
+    SV **svp;
+    byte *cp;
+
+    unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp)
+	return;
+
+    cp = (byte *)SvPV_nolen (*svp);
+
+    /* single char/byte */
+    if ( idx == CACHE_ID_quote_char	||
+	 idx == CACHE_ID_escape_char	||
+	 idx == CACHE_ID_sep_char) {
+	cp[idx] = SvPOK (val) ? *(SvPVX (val)) : 0;
+	return;
+	}
+
+    /* boolean/numeric */
+    if ( idx == CACHE_ID_binary			||
+         idx == CACHE_ID_keep_meta_info		||
+         idx == CACHE_ID_always_quote		||
+         idx == CACHE_ID_allow_loose_quotes	||
+         idx == CACHE_ID_allow_loose_escapes	||
+         idx == CACHE_ID_allow_double_quoted	||
+         idx == CACHE_ID_allow_whitespace	||
+         idx == CACHE_ID_blank_is_undef		||
+	 idx == CACHE_ID_empty_is_undef		||
+	 idx == CACHE_ID_verbatim		||
+	 idx == CACHE_ID_auto_diag) {
+	cp[idx] = SvIV (val);
+	return;
+	}
+
+    /* a 4-byte IV */
+    if (idx == CACHE_ID__is_bound) {
+	long v = SvIV (val);
+
+	cp[idx    ] = (v & 0xFF000000) >> 24;
+	cp[idx + 1] = (v & 0x00FF0000) >> 16;
+	cp[idx + 2] = (v & 0x0000FF00) >>  8;
+	cp[idx + 3] = (v & 0x000000FF);
+	return;
+	}
+
+    if (idx == CACHE_ID_eol) {
+	STRLEN len = 0;
+	char  *eol = SvPOK (val) ? SvPV (val, len) : "";
+
+	memset (cp + CACHE_ID_eol, 0, 8);
+	cp[CACHE_ID_eol_len]   = len;
+	cp[CACHE_ID_eol_is_cr] = len == 1 && *eol == CH_CR ? 1 : 0;
+	if (len > 0 && len < 8)
+	    memcpy (cp + CACHE_ID_eol, eol, len);
+	}
+    } /* cache_set */
+
+#define _pretty_str(csv,xse)	cx_pretty_str (aTHX_ csv, xse)
+static char *cx_pretty_str (pTHX_ byte *s, STRLEN l)
+{
+    SV *dsv = newSVpvs ("");
+    return (pv_pretty (dsv, (char *)s, l, 0, NULL, NULL,
+	    (PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT)));
+    } /* _pretty_str */
+
+#define _cache_show_byte(trim,idx) \
+    c = cp[idx]; (void)fprintf (stderr, "  %-20s %02x:%3d\n", trim, c, c)
+#define _cache_show_char(trim,idx) \
+    c = cp[idx]; (void)fprintf (stderr, "  %-20s %02x:%s\n",  trim, c, _pretty_str (&c, 1))
+#define _cache_show_str(trim,l,s) \
+    (void)fprintf (stderr, "  %-20s %02d:%s\n",  trim, l, _pretty_str (s, l))
+#define _cache_show_cstr(trim,l,idx) _cache_show_str (trim, l, cp + idx)
+
+#define xs_cache_diag(hv)	cx_xs_cache_diag (aTHX_ hv)
+static void cx_xs_cache_diag (pTHX_ HV *hv)
+{
+    SV **svp;
+    byte *cp, c;
+
+    unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp) {
+	(void)fprintf (stderr, "CACHE: invalid\n");
+	return;
+	}
+
+    cp = (byte *)SvPV_nolen (*svp);
+    (void)fprintf (stderr, "CACHE:\n");
+    _cache_show_char ("quote",			CACHE_ID_quote_char);
+    _cache_show_char ("escape",			CACHE_ID_escape_char);
+    _cache_show_char ("sep",			CACHE_ID_sep_char);
+    _cache_show_byte ("binary",			CACHE_ID_binary);
+
+    _cache_show_byte ("allow_double_quoted",	CACHE_ID_allow_double_quoted);
+    _cache_show_byte ("allow_loose_escapes",	CACHE_ID_allow_loose_escapes);
+    _cache_show_byte ("allow_loose_quotes",	CACHE_ID_allow_loose_quotes);
+    _cache_show_byte ("allow_whitespace",	CACHE_ID_allow_whitespace);
+    _cache_show_byte ("always_quote",		CACHE_ID_always_quote);
+    _cache_show_byte ("auto_diag",		CACHE_ID_auto_diag);
+    _cache_show_byte ("blank_is_undef",		CACHE_ID_blank_is_undef);
+    _cache_show_byte ("empty_is_undef",		CACHE_ID_empty_is_undef);
+    _cache_show_byte ("has_ahead",		CACHE_ID__has_ahead);
+    _cache_show_byte ("has_types",		CACHE_ID_has_types);
+    _cache_show_byte ("keep_meta_info",		CACHE_ID_keep_meta_info);
+    _cache_show_byte ("verbatim",		CACHE_ID_verbatim);
+
+    _cache_show_byte ("eol_is_cr",		CACHE_ID_eol_is_cr);
+    _cache_show_byte ("eol_len",		CACHE_ID_eol_len);
+    if (c < 8)
+	_cache_show_cstr ("eol", c,		CACHE_ID_eol);
+    else if ((svp = hv_fetchs (hv, "eol", FALSE)) && *svp && SvOK (*svp)) {
+	STRLEN len;
+	byte *eol = (byte *)SvPV (*svp, len);
+	_cache_show_str  ("eol", len,		eol);
+	}
+    else
+	_cache_show_str  ("eol", 8,		(byte *)"<broken>");
+
+    /* csv->is_bound			=
+	    (csv->cache[CACHE_ID__is_bound    ] << 24) |
+	    (csv->cache[CACHE_ID__is_bound + 1] << 16) |
+	    (csv->cache[CACHE_ID__is_bound + 2] <<  8) |
+	    (csv->cache[CACHE_ID__is_bound + 3]);
+    */
+    } /* xs_cache_diag */
+
+#define set_eol_is_cr(csv)	cx_set_eol_is_cr (aTHX_ csv)
+static void cx_set_eol_is_cr (pTHX_ csv_t *csv)
+{
+		      csv->cache[CACHE_ID_eol    ]   = CH_CR;
+		      csv->cache[CACHE_ID_eol + 1]   = 0;
+    csv->eol_is_cr =  csv->cache[CACHE_ID_eol_is_cr] = 1;
+    csv->eol_len   =  csv->cache[CACHE_ID_eol_len]   = 1;
+    csv->eol       = &csv->cache[CACHE_ID_eol];
+    (void)hv_store (csv->self, "eol",  3, newSVpvn ((char *)csv->eol, 1), 0);
+    } /* set_eol_is_cr */
+
 #define SetupCsv(csv,self,pself)	cx_SetupCsv (aTHX_ csv, self, pself)
 static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself)
 {
@@ -243,7 +385,7 @@
     csv->pself = pself;
 
     if ((svp = hv_fetchs (self, "_CACHE", FALSE)) && *svp) {
-	memcpy (csv->cache, SvPV (*svp, len), CACHE_SIZE);
+	csv->cache = (byte *)SvPVX (*svp);
 
 	csv->quote_char			= csv->cache[CACHE_ID_quote_char	];
 	csv->escape_char		= csv->cache[CACHE_ID_escape_char	];
@@ -261,17 +403,18 @@
 	csv->blank_is_undef		= csv->cache[CACHE_ID_blank_is_undef	];
 	csv->empty_is_undef		= csv->cache[CACHE_ID_empty_is_undef	];
 	csv->verbatim			= csv->cache[CACHE_ID_verbatim		];
+	csv->has_ahead			= csv->cache[CACHE_ID__has_ahead	];
 	csv->eol_is_cr			= csv->cache[CACHE_ID_eol_is_cr		];
 	csv->eol_len			= csv->cache[CACHE_ID_eol_len		];
 	if (csv->eol_len < 8)
-	    csv->eol = (char *)&csv->cache[CACHE_ID_eol];
+	    csv->eol = &csv->cache[CACHE_ID_eol];
 	else {
 	    /* Was too long to cache. must re-fetch */
 	    csv->eol       = NULL;
 	    csv->eol_is_cr = 0;
 	    csv->eol_len   = 0;
 	    if ((svp = hv_fetchs (self, "eol",     FALSE)) && *svp && SvOK (*svp)) {
-		csv->eol = SvPV (*svp, len);
+		csv->eol = (byte *)SvPV (*svp, len);
 		csv->eol_len = len;
 		}
 	    }
@@ -290,6 +433,8 @@
 	    }
 	}
     else {
+	SV *sv_cache;
+
 	csv->quote_char = '"';
 	if ((svp = hv_fetchs (self, "quote_char",  FALSE)) && *svp) {
 	    if (SvOK (*svp)) {
@@ -316,11 +461,11 @@
 		csv->sep_char = *ptr;
 	    }
 
-	csv->eol       = "";
+	csv->eol       = (byte *)"";
 	csv->eol_is_cr = 0;
 	csv->eol_len   = 0;
 	if ((svp = hv_fetchs (self, "eol",         FALSE)) && *svp && SvOK (*svp)) {
-	    csv->eol = SvPV (*svp, len);
+	    csv->eol = (byte *)SvPV (*svp, len);
 	    csv->eol_len = len;
 	    if (len == 1 && *csv->eol == CH_CR)
 		csv->eol_is_cr = 1;
@@ -332,10 +477,9 @@
 	    csv->types_len = len;
 	    }
 
-	csv->is_bound = 0;
-	if ((svp = hv_fetchs (self, "_is_bound", FALSE)) && *svp && SvOK(*svp)) {
+	csv->is_bound  = 0;
+	if ((svp = hv_fetchs (self, "_is_bound", FALSE)) && *svp && SvOK(*svp))
 	    csv->is_bound = SvIV(*svp);
-	    }
 
 	csv->binary			= bool_opt ("binary");
 	csv->keep_meta_info		= bool_opt ("keep_meta_info");
@@ -349,6 +493,11 @@
 	csv->verbatim			= bool_opt ("verbatim");
 	csv->auto_diag			= bool_opt ("auto_diag");
 
+	sv_cache = newSVpvn ("", CACHE_SIZE);
+	csv->cache = (byte *)SvPVX (sv_cache);
+	memset (csv->cache, 0, CACHE_SIZE);
+	SvREADONLY_on (sv_cache);
+
 	csv->cache[CACHE_ID_quote_char]			= csv->quote_char;
 	csv->cache[CACHE_ID_escape_char]		= csv->escape_char;
 	csv->cache[CACHE_ID_sep_char]			= csv->sep_char;
@@ -368,16 +517,20 @@
 	csv->cache[CACHE_ID_eol_is_cr]			= csv->eol_is_cr;
 	csv->cache[CACHE_ID_eol_len]			= csv->eol_len;
 	if (csv->eol_len > 0 && csv->eol_len < 8 && csv->eol)
-	    strcpy ((char *)&csv->cache[CACHE_ID_eol], csv->eol);
+	    memcpy ((char *)&csv->cache[CACHE_ID_eol], csv->eol, csv->eol_len);
 	csv->cache[CACHE_ID_has_types]			= csv->types ? 1 : 0;
+	csv->cache[CACHE_ID__has_ahead]			= csv->has_ahead = 0;
 	csv->cache[CACHE_ID__is_bound    ] = (csv->is_bound & 0xFF000000) >> 24;
 	csv->cache[CACHE_ID__is_bound + 1] = (csv->is_bound & 0x00FF0000) >> 16;
 	csv->cache[CACHE_ID__is_bound + 2] = (csv->is_bound & 0x0000FF00) >>  8;
 	csv->cache[CACHE_ID__is_bound + 3] = (csv->is_bound & 0x000000FF);
 
-	if ((csv->tmp = newSVpvn ((char *)csv->cache, CACHE_SIZE)))
-	    (void)hv_store (self, "_CACHE", 6, csv->tmp, 0);
-	}
+	(void)hv_store (self, "_CACHE", 6, sv_cache, 0);
+	}
+
+    csv->utf8 = 0;
+    csv->size = 0;
+    csv->used = 0;
 
     if (csv->is_bound) {
 	if ((svp = hv_fetchs (self, "_BOUND_COLUMNS", FALSE)) && _is_arrayref (*svp))
@@ -385,8 +538,6 @@
 	else
 	    csv->is_bound = 0;
 	}
-    csv->utf8 = 0;
-    csv->used = 0;
     } /* SetupCsv */
 
 #define Print(csv,dst)		cx_Print (aTHX_ csv, dst)
@@ -518,7 +669,7 @@
 	}
     if (csv->eol_len) {
 	STRLEN	len = csv->eol_len;
-	char   *ptr = csv->eol;
+	byte   *ptr = csv->eol;
 
 	while (len--)
 	    CSV_PUT (csv, dst, *ptr++);
@@ -604,11 +755,9 @@
     }
 
 #define CSV_GET					\
-    ((c_ungetc != EOF)				\
-	? c_ungetc				\
-	: ((csv->used < csv->size)		\
-	    ? ((byte)csv->bptr[(csv)->used++])	\
-	    : CsvGet (csv, src)))
+    ((csv->used < csv->size)			\
+	? ((byte)csv->bptr[csv->used++])	\
+	: CsvGet (csv, src))
 
 #define AV_PUSH {						\
     *SvEND (sv) = (char)0;					\
@@ -687,7 +836,6 @@
 static int cx_Parse (pTHX_ csv_t *csv, SV *src, AV *fields, AV *fflags)
 {
     int		 c, f = 0;
-    int		 c_ungetc		= EOF;
     int		 waitingForField	= 1;
     SV		*sv			= NULL;
     STRLEN	 len;
@@ -802,6 +950,14 @@
 		    goto restart;
 		    }
 
+		if (csv->useIO && csv->eol_len == 0 && !is_csv_binary (c2)) {
+		    set_eol_is_cr (csv);
+		    c = CH_NL;
+		    csv->used--;
+		    csv->has_ahead++;
+		    goto restart;
+		    }
+
 		csv->used--;
 		ERROR_INSIDE_FIELD (2031);
 		}
@@ -824,6 +980,14 @@
 		c2 = CSV_GET;
 
 		if (c2 == CH_NL) {
+		    AV_PUSH;
+		    return TRUE;
+		    }
+
+		if (csv->useIO && csv->eol_len == 0 && !is_csv_binary (c2)) {
+		    set_eol_is_cr (csv);
+		    csv->used--;
+		    csv->has_ahead++;
 		    AV_PUSH;
 		    return TRUE;
 		    }
@@ -937,6 +1101,14 @@
 
 			if (c3 == CH_NL) {
 			    AV_PUSH;
+			    return TRUE;
+			    }
+
+			if (csv->useIO && csv->eol_len == 0 && !is_csv_binary (c3)) {
+			    set_eol_is_cr (csv);
+			    AV_PUSH;
+			    csv->used--;
+			    csv->has_ahead++;
 			    return TRUE;
 			    }
 			}
@@ -1005,8 +1177,8 @@
 	    } /* ESC char */
 	else {
 #if MAINT_DEBUG > 1
-	    fprintf (stderr, "# %d/%d/%02x pos %d = === '%c' '%c'\n",
-		waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c, c_ungetc);
+	    fprintf (stderr, "# %d/%d/%02x pos %d = === '%c'\n",
+		waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
 #endif
 	    if (waitingForField) {
 		if (csv->allow_whitespace && is_whitespace (c)) {
@@ -1076,12 +1248,19 @@
 static int cx_xsParse (pTHX_ SV *self, HV *hv, AV *av, AV *avf, SV *src, bool useIO)
 {
     csv_t	csv;
-    int		result;
+    int		result, ahead = 0;
 
     SetupCsv (&csv, hv, self);
+
     if ((csv.useIO = useIO)) {
 	csv.tmp  = NULL;
-	csv.size = 0;
+	if ((ahead = csv.has_ahead)) {
+	    SV **svp;
+	    if ((svp = hv_fetchs (hv, "_AHEAD", FALSE)) && *svp) {
+		csv.bptr = SvPV (csv.tmp = *svp, csv.size);
+		csv.used = 0;
+		}
+	    }
 	}
     else {
 	csv.tmp  = src;
@@ -1089,12 +1268,23 @@
 	csv.bptr = SvPV (src, csv.size);
 	}
     (void)hv_delete (hv, "_ERROR_INPUT", 12, G_DISCARD);
+
     result = Parse (&csv, src, av, avf);
-    if (csv.useIO & useIO_EOF)
-	(void)hv_store (hv, "_EOF", 4, &PL_sv_yes, 0);
-    else
-	(void)hv_store (hv, "_EOF", 4, &PL_sv_no,  0);
+
+    (void)hv_store (hv, "_EOF", 4, &PL_sv_no,  0);
     if (csv.useIO) {
+	if (csv.tmp && csv.used < csv.size && csv.has_ahead) {
+	    SV *sv = newSVpvn (csv.bptr + csv.used, csv.size - csv.used);
+	    (void)hv_delete (hv, "_AHEAD", 6, G_DISCARD);
+	    (void)hv_store  (hv, "_AHEAD", 6, sv, 0);
+	    }
+	else {
+	    csv.has_ahead = 0;
+	    if (csv.useIO & useIO_EOF)
+		(void)hv_store (hv, "_EOF", 4, &PL_sv_yes, 0);
+	    }
+	csv.cache[CACHE_ID__has_ahead] = csv.has_ahead;
+
 	if (csv.keep_meta_info) {
 	    (void)hv_delete (hv, "_FFLAGS", 7, G_DISCARD);
 	    (void)hv_store  (hv, "_FFLAGS", 7, newRV_noinc ((SV *)avf), 0);
@@ -1259,3 +1449,29 @@
 	: &PL_sv_undef;
     XSRETURN (1);
     /* XS getline */
+
+void
+_cache_set (self, idx, val)
+    SV		*self
+    int		 idx
+    SV		*val
+
+  PPCODE:
+    HV	*hv;
+
+    CSV_XS_SELF;
+    xs_cache_set (hv, idx, val);
+    XSRETURN (1);
+    /* XS _cache_diag */
+
+void
+_cache_diag (self)
+    SV		*self
+
+  PPCODE:
+    HV	*hv;
+
+    CSV_XS_SELF;
+    xs_cache_diag (hv);
+    XSRETURN (1);
+    /* XS _cache_diag */

Modified: branches/upstream/libtext-csv-xs-perl/current/ChangeLog
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-csv-xs-perl/current/ChangeLog?rev=45842&op=diff
==============================================================================
--- branches/upstream/libtext-csv-xs-perl/current/ChangeLog (original)
+++ branches/upstream/libtext-csv-xs-perl/current/ChangeLog Thu Oct 15 15:58:35 2009
@@ -1,4 +1,11 @@
-2009-09-25 0.68 - H.Merijn Brand   <h.m.brand at xs4all.nl>
+2009-10-10 0.69 - H.Merijn Brand   <h.m.brand at xs4all.nl>
+
+    * Missing end quotes in error code docs
+    * examples/csv-check now shows detected eol
+    * Auto detection of eol => "\r" in streams
+    * Optimized caching. All cache changes now in XS
+
+2009-10-04 0.68 - H.Merijn Brand   <h.m.brand at xs4all.nl>
 
     * Attribute auto_diag now localizes to +1 if autodie is active
     * Output name generation in csv2xls (RT#48954)

Modified: branches/upstream/libtext-csv-xs-perl/current/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-csv-xs-perl/current/META.yml?rev=45842&op=diff
==============================================================================
--- branches/upstream/libtext-csv-xs-perl/current/META.yml (original)
+++ branches/upstream/libtext-csv-xs-perl/current/META.yml Thu Oct 15 15:58:35 2009
@@ -1,6 +1,6 @@
 --- #YAML:1.1
 name:                    Text-CSV_XS
-version:                 0.68
+version:                 0.69
 abstract:                Comma-Separated Values manipulation routines
 license:                 perl
 author:              
@@ -10,7 +10,7 @@
 provides:
     Text::CSV_XS:
         file:            CSV_XS.pm
-        version:         0.68
+        version:         0.69
 requires:     
     perl:                5.005
     DynaLoader:          0

Modified: branches/upstream/libtext-csv-xs-perl/current/README
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-csv-xs-perl/current/README?rev=45842&op=diff
==============================================================================
--- branches/upstream/libtext-csv-xs-perl/current/README (original)
+++ branches/upstream/libtext-csv-xs-perl/current/README Thu Oct 15 15:58:35 2009
@@ -28,6 +28,9 @@
 Prerequisites:
     perl 5.005_03. 5.005 will not be able to build the manual pages.
     examples require 5.006, but are probably easy to mold into 5.005
+    examples/csv-check requires perl with defined-or and PerlIO, and
+    making that work on other versions is left as an exercise to the
+    reader.
 
 Build/Installation:
     Standard build/installation:

Modified: branches/upstream/libtext-csv-xs-perl/current/examples/csv-check
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-csv-xs-perl/current/examples/csv-check?rev=45842&op=diff
==============================================================================
--- branches/upstream/libtext-csv-xs-perl/current/examples/csv-check (original)
+++ branches/upstream/libtext-csv-xs-perl/current/examples/csv-check Thu Oct 15 15:58:35 2009
@@ -1,7 +1,9 @@
 #!/pro/bin/perl
 
 # csv-check: Check validity of CSV file and report
-#	   (m)'08 [10 Aug 2008] Copyright H.M.Brand 2007-2009
+#	   (m)'09 [12 Oct 2009] Copyright H.M.Brand 2007-2009
+
+# This code requires the defined-or feature and PerlIO
 
 use strict;
 use warnings;
@@ -9,7 +11,7 @@
 use Data::Peek;
 use Encode qw( decode );
 
-our $VERSION = "1.3";	# 2009-04-09
+our $VERSION = "1.4";	# 2009-10-12
 
 sub usage
 {
@@ -37,27 +39,19 @@
 
 use Text::CSV_XS;
 
-if (@ARGV && -f $ARGV[0] && !-s $ARGV[0]) {
-    print STDERR "$ARGV[0] is empty\n";
-    exit 0;
-    }
+my $fn = $ARGV[0] // "-";
+my $data = do { local $/; <> } or die "No data to analyze\n";
 
-my ($bin, $rows, %cols, $firstline) = (0, 0);
+my ($bin, $rows, $eol, %cols) = (0, 0, undef);
 unless ($sep) { # No sep char passed, try to auto-detect;
-    while (<>) {
-	m/\S/ or next;	# Skip empty leading blank lines
-	$sep = m/["\d],["\d,]/ ? ","  :
-	       m/["\d];["\d;]/ ? ";"  :
-	       m/["\d]\t["\d]/ ? "\t" :
-	       # If neither, then for unquoted strings
-	       m/\w,[\w,]/     ? ","  :
-	       m/\w;[\w;]/     ? ";"  :
-	       m/\w\t[\w]/     ? "\t" :
-				 ","  ;
-	$firstline = $_;
-	$rows++;
-	last;
-	}
+    $sep = $data =~ m/["\d],["\d,]/ ? ","  :
+	   $data =~ m/["\d];["\d;]/ ? ";"  :
+	   $data =~ m/["\d]\t["\d]/ ? "\t" :
+	   # If neither, then for unquoted strings
+	   $data =~ m/\w,[\w,]/     ? ","  :
+	   $data =~ m/\w;[\w;]/     ? ";"  :
+	   $data =~ m/\w\t[\w]/     ? "\t" : ",";
+    $data =~ m/([\r\n]+)\Z/ and $eol = DDisplay "$1";
     }
 
 my $csv = Text::CSV_XS-> new ({
@@ -78,8 +72,9 @@
 	my @coll = sort { $a <=> $b } keys %cols;
 	local $" = ", ";
 	my $cols = @coll == 1 ? $coll[0] : "(@coll)";
+	defined $eol or $eol = $csv->eol || "--unknown--";
 	print "OK: rows: $rows, columns: $cols\n";
-	print "    sep = <$sep>, quo = <$quo>, bin = <$bin>\n";
+	print "    sep = <$sep>, quo = <$quo>, bin = <$bin>, eol = <$eol>\n";
 	exit 0;
 	}
 
@@ -116,12 +111,8 @@
 	}
     } # stats
 
-if ($firstline) {
-    $csv->parse ($_) or done;
-    stats [ $csv->fields ];
-    }
-
-while (my $row = $csv->getline (*ARGV)) {
+open my $fh, "<", \$data or die "$fn: $!\n";
+while (my $row = $csv->getline ($fh)) {
     $rows++;
     stats $row;
     }

Modified: branches/upstream/libtext-csv-xs-perl/current/t/45_eol.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-csv-xs-perl/current/t/45_eol.t?rev=45842&op=diff
==============================================================================
--- branches/upstream/libtext-csv-xs-perl/current/t/45_eol.t (original)
+++ branches/upstream/libtext-csv-xs-perl/current/t/45_eol.t Thu Oct 15 15:58:35 2009
@@ -3,7 +3,7 @@
 use strict;
 $^W = 1;
 
-use Test::More tests => 262;
+use Test::More tests => 278;
 
 BEGIN {
     require_ok "Text::CSV_XS";
@@ -14,6 +14,8 @@
 $| = 1;
 
 # Embedded newline tests
+
+my $def_rs = $/;
 
 foreach my $rs ("\n", "\r\n", "\r") {
     for $\ (undef, $rs) {
@@ -55,7 +57,7 @@
 		    }
 		else {
 		    ok (my $row = $csv->getline (*FH),	"getline |$s_eol|");
-		    is (ref $row, "ARRAY",			"row     |$s_eol|");
+		    is (ref $row, "ARRAY",		"row     |$s_eol|");
 		    @p = @$row;
 		    }
 
@@ -69,6 +71,7 @@
 	unlink "_eol.csv";
 	}
     }
+$/ = $def_rs;
 
 {   my $csv = Text::CSV_XS->new ({ escape_char => undef });
 
@@ -106,6 +109,26 @@
 	unlink "_eol.csv";
 	}
     }
+$/ = $def_rs;
+
+ok (1, "Auto-detecting \\r");
+{   my @row = qw( a b c ); local $" = ",";
+    for (["\n", "\\n"], ["\r\n", "\\r\\n"], ["\r", "\\r"]) {
+	my ($eol, $s_eol) = @$_;
+	open  FH, ">_eol.csv";
+	print FH qq{@row$eol at row$eol at row$eol\x91};
+	close FH;
+	open  FH, "<_eol.csv";
+	my $c = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
+	is ($c->eol (),			"",		"default EOL");
+	is_deeply ($c->getline (*FH),	[ @row ],	"EOL 1 $s_eol");
+	is ($c->eol (),	$eol eq "\r" ? "\r" : "",	"EOL");
+	is_deeply ($c->getline (*FH),	[ @row ],	"EOL 2 $s_eol");
+	is_deeply ($c->getline (*FH),	[ @row ],	"EOL 3 $s_eol");
+	close FH;
+	unlink "_eol.csv";
+	}
+    }
 
 ok (1, "Specific \\r test from tfrayner");
 {   $/ = "\r";
@@ -126,6 +149,7 @@
     close FH;
     unlink "_eol.csv";
     }
+$/ = $def_rs;
 
 ok (1, "EOL undef");
 {   $/ = "\r";
@@ -142,5 +166,6 @@
     close FH;
     unlink "_eol.csv";
     }
+$/ = $def_rs;
 
 1;

Modified: branches/upstream/libtext-csv-xs-perl/current/t/65_allow.t
URL: http://svn.debian.org/wsvn/pkg-perl/branches/upstream/libtext-csv-xs-perl/current/t/65_allow.t?rev=45842&op=diff
==============================================================================
--- branches/upstream/libtext-csv-xs-perl/current/t/65_allow.t (original)
+++ branches/upstream/libtext-csv-xs-perl/current/t/65_allow.t Thu Oct 15 15:58:35 2009
@@ -313,12 +313,14 @@
 	my @fld = $csv->fields;
 	is (@fld, 4,				"#\\r\\n $gc fields");
 	is ($fld[2], "Abe",			"#\\r\\n $gc fld 2");
-	is ($fld[3], "Timmerman#\r\n",		"#\\r\\n $gc fld 3");
+	is ($fld[3], $gc ? "Timmerman#\r\n"
+			 : "Timmerman#",	"#\\r\\n $gc fld 3");
 
 	ok ($csv->parse ($str[1]),		"#\\r\\n $gc parse");
 	@fld = $csv->fields;
 	is (@fld, 3,				"#\\r\\n $gc fields");
-	is ($fld[2], "Abe\nTimmerman#\r\n",	"#\\r\\n $gc fld 2");
+	is ($fld[2], $gc ? "Abe\nTimmerman#\r\n"
+			 : "Abe",		"#\\r\\n $gc fld 2");
 	}
 
     ok (1, "verbatim on getline (*FH)");
@@ -335,11 +337,13 @@
 	ok ($row = $csv->getline (*FH),		"#\\r\\n $gc getline");
 	is (@$row, 4,				"#\\r\\n $gc fields");
 	is ($row->[2], "Abe",			"#\\r\\n $gc fld 2");
-	is ($row->[3], "Timmerman",		"#\\r\\n $gc fld 3");
+	is ($row->[3], $gc ? "Timmerman"
+			   : "Timmerman#",	"#\\r\\n $gc fld 3");
 
 	ok ($row = $csv->getline (*FH),		"#\\r\\n $gc parse");
 	is (@$row, 3,				"#\\r\\n $gc fields");
-	is ($row->[2], "Abe\nTimmerman",	"#\\r\\n $gc fld 2");
+	is ($row->[2], $gc ? "Abe\nTimmerman"
+			   : "Abe",		"#\\r\\n $gc fld 2");
 	}
 
     $gc = $csv->verbatim ();




More information about the Pkg-perl-cvs-commits mailing list