r19171 - in /branches/upstream/libtext-csv-perl/current: CSV_XS.pm CSV_XS.xs ChangeLog MANIFEST META.yml Makefile.PL examples/csv-check examples/parser-xs.pl t/80_diag.t

gregoa at users.alioth.debian.org gregoa at users.alioth.debian.org
Fri Apr 25 21:59:51 UTC 2008


Author: gregoa
Date: Fri Apr 25 21:59:50 2008
New Revision: 19171

URL: http://svn.debian.org/wsvn/?sc=1&rev=19171
Log:
[svn-upgrade] Integrating new upstream version, libtext-csv-perl (0.45)

Added:
    branches/upstream/libtext-csv-perl/current/examples/parser-xs.pl   (with props)
Modified:
    branches/upstream/libtext-csv-perl/current/CSV_XS.pm
    branches/upstream/libtext-csv-perl/current/CSV_XS.xs
    branches/upstream/libtext-csv-perl/current/ChangeLog
    branches/upstream/libtext-csv-perl/current/MANIFEST
    branches/upstream/libtext-csv-perl/current/META.yml
    branches/upstream/libtext-csv-perl/current/Makefile.PL
    branches/upstream/libtext-csv-perl/current/examples/csv-check
    branches/upstream/libtext-csv-perl/current/t/80_diag.t

Modified: branches/upstream/libtext-csv-perl/current/CSV_XS.pm
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/CSV_XS.pm?rev=19171&op=diff
==============================================================================
--- branches/upstream/libtext-csv-perl/current/CSV_XS.pm (original)
+++ branches/upstream/libtext-csv-perl/current/CSV_XS.pm Fri Apr 25 21:59:50 2008
@@ -30,7 +30,7 @@
 use Carp;
 
 use vars   qw( $VERSION @ISA );
-$VERSION = "0.43";
+$VERSION = "0.45";
 @ISA     = qw( DynaLoader );
 
 sub PV { 0 }
@@ -441,7 +441,7 @@
 {
     my ($self, @args, %hr) = @_;
     $self->{_COLUMN_NAMES} or croak ($self->SetDiag (3002));
-    my $fr = $self->getline (@args) or return undef;
+    my $fr = $self->getline (@args) or return;
     @hr{@{$self->{_COLUMN_NAMES}}} = @$fr;
     \%hr;
     } # getline_hr
@@ -1114,8 +1114,9 @@
 
 If called in list context, it will return the error code and the error
 message in that order. If the last error was from parsing, the third
-value returned is the best guess at the location within the line that was
-being parsed. It's value is 1-based.
+value returned is a best guess at the location within the line that was
+being parsed. It's value is 1-based. See C<examples/csv-check> for how
+this can be used.
 
 If called in scalar context, it will return the diagnostics in a single
 scalar, a-la $!. It will contain the error code in numeric context, and
@@ -1207,7 +1208,10 @@
   close $fh;
 
 For more extended examples, see the C<examples/> subdirectory in the
-original distribution.
+original distribution. Included is C<examples/parser-xs.pl>, that could
+be used to `fix' bad CSV
+
+  perl examples/parser-xs.pl bad.csv >good.csv
 
 =head1 TODO
 
@@ -1272,15 +1276,6 @@
         encoding_in  => "iso-8859-1", # Only the input
         encoding_out => "cp1252",     # Only the output
         });
-
-=item Double double quotes
-
-There seem to be applications around that write their dates like
-
-   1,4,""12/11/2004"",4,1
-
-If we would support that, probably through allow_double_quoted
-Definitely belongs in t/65_allow.t
 
 =item Parse the whole file at once
 

Modified: branches/upstream/libtext-csv-perl/current/CSV_XS.xs
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/CSV_XS.xs?rev=19171&op=diff
==============================================================================
--- branches/upstream/libtext-csv-perl/current/CSV_XS.xs (original)
+++ branches/upstream/libtext-csv-perl/current/CSV_XS.xs Fri Apr 25 21:59:50 2008
@@ -180,10 +180,12 @@
 	sv_upgrade (err, SVt_PVIV);
 	SvIV_set (err, xse);
 	SvIOK_on (err);
-	hv_store (csv->self, "_ERROR_DIAG", 11, err, 0);
-	}
-    if (xse == 0)
-	hv_store (csv->self, "_ERROR_POS",  10, newSViv (0), 0);
+	hv_store (csv->self, "_ERROR_DIAG",  11, err,           0);
+	}
+    if (xse == 0) {
+	hv_store (csv->self, "_ERROR_POS",   10, newSViv  (0),  0);
+	hv_store (csv->self, "_ERROR_INPUT", 12, newSVpvs (""), 0);
+	}
     return (err);
     } /* SetDiag */
 
@@ -514,12 +516,12 @@
 
 #define ERROR_INSIDE_QUOTES(diag_code) {	\
     SvREFCNT_dec (sv);				\
-    ParseError (csv, diag_code, spl);		\
+    ParseError (csv, diag_code, csv->used - 1);	\
     return FALSE;				\
     }
 #define ERROR_INSIDE_FIELD(diag_code) {		\
     SvREFCNT_dec (sv);				\
-    ParseError (csv, diag_code, spl);		\
+    ParseError (csv, diag_code, csv->used - 1);	\
     return FALSE;				\
     }
 
@@ -745,6 +747,7 @@
 		    goto restart;
 		    }
 
+		csv->used--;
 		ERROR_INSIDE_FIELD (2031);
 		}
 
@@ -823,7 +826,7 @@
 /* uncovered */		    return TRUE;
 			    }
 
-			ParseError (csv, 2010, spl);
+			ParseError (csv, 2010, csv->used - 2);
 			return FALSE;
 			}
 
@@ -838,7 +841,7 @@
 			goto restart;
 			}
 
-		    ParseError (csv, 2011, spl);
+		    ParseError (csv, 2011, csv->used - 1);
 		    return FALSE;
 		    }
 
@@ -906,6 +909,7 @@
 			}
 #endif
 
+		    csv->used--;
 		    ERROR_INSIDE_QUOTES (2023);
 		    }
 		}
@@ -933,8 +937,10 @@
 	    if (f & CSV_FLAGS_QUO) {
 		int	c2 = CSV_GET;
 
-		if (c2 == EOF)
+		if (c2 == EOF) {
+		    csv->used--;
 		    ERROR_INSIDE_QUOTES (2024);
+		    }
 
 		if (c2 == '0')
 		    CSV_PUT_SV (sv, 0)
@@ -946,15 +952,19 @@
 #endif
 		     )
 		    CSV_PUT_SV (sv, c2)
-		else
+		else {
+		    csv->used--;
 		    ERROR_INSIDE_QUOTES (2025);
+		    }
 		}
 	    else
 	    if (sv) {
 		int	c2 = CSV_GET;
 
-		if (c2 == EOF)
+		if (c2 == EOF) {
+		    csv->used--;
 		    ERROR_INSIDE_FIELD (2035);
+		    }
 
 		CSV_PUT_SV (sv, c2);
 		}

Modified: branches/upstream/libtext-csv-perl/current/ChangeLog
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/ChangeLog?rev=19171&op=diff
==============================================================================
--- branches/upstream/libtext-csv-perl/current/ChangeLog (original)
+++ branches/upstream/libtext-csv-perl/current/ChangeLog Fri Apr 25 21:59:50 2008
@@ -1,3 +1,14 @@
+2008-04-23  0.45 - H.Merijn Brand   <h.m.brand at xs4all.nl>
+
+	* Forgot to pack examples/parser-xs.pl
+
+2008-04-23  0.44 - H.Merijn Brand   <h.m.brand at xs4all.nl>
+
+	* Fixed the error position returned as third arg in error_diag ()
+	* Made examples/csv-check use this even more vebose
+	* Removed double-double quote from TODO
+	* Added examples/parse-xs.pl (attempt to fix bad CSV)
+
 2008-04-21  0.43 - H.Merijn Brand   <h.m.brand at xs4all.nl>
 
 	* parse errors try to remember failing position

Modified: branches/upstream/libtext-csv-perl/current/MANIFEST
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/MANIFEST?rev=19171&op=diff
==============================================================================
--- branches/upstream/libtext-csv-perl/current/MANIFEST (original)
+++ branches/upstream/libtext-csv-perl/current/MANIFEST Fri Apr 25 21:59:50 2008
@@ -28,5 +28,6 @@
 t/util.pl		Extra test utilities
 examples/csv2xls	Script to onvert CSV files to M$Excel
 examples/csv-check	Script to check a CSV file/stream
+examples/parser-xs.pl	Parse CSV stream, be forgiving on bad lines
 examples/speed.pl	Small benchmark script
 META.yml                                 Module meta-data (added by MakeMaker)

Modified: branches/upstream/libtext-csv-perl/current/META.yml
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/META.yml?rev=19171&op=diff
==============================================================================
--- branches/upstream/libtext-csv-perl/current/META.yml (original)
+++ branches/upstream/libtext-csv-perl/current/META.yml Fri Apr 25 21:59:50 2008
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:              Text-CSV_XS
-version:           0.43
+version:           0.45
 abstract:          Comma-Separated Values manipulation routines
 license:           perl
 author:              
@@ -10,7 +10,7 @@
 provides:
     Text::CSV_XS:
         file:      CSV_XS.pm
-        version:   0.43
+        version:   0.45
 requires:     
     perl:          5.005
     DynaLoader:    0

Modified: branches/upstream/libtext-csv-perl/current/Makefile.PL
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/Makefile.PL?rev=19171&op=diff
==============================================================================
--- branches/upstream/libtext-csv-perl/current/Makefile.PL (original)
+++ branches/upstream/libtext-csv-perl/current/Makefile.PL Fri Apr 25 21:59:50 2008
@@ -83,8 +83,9 @@
 	'fixmeta:	distmeta',
 	'	perl genMETA.pl',
 	'',
-	'tgzdist:	fixmeta $(DISTVNAME).tar.gz',
+	'tgzdist:	fixmeta $(DISTVNAME).tar.gz distcheck',
 	'	- at mv -f $(DISTVNAME).tar.gz $(DISTVNAME).tgz',
+	'	- at cpants_lint.pl $(DISTVNAME).tgz',
 	'',
 	'test_speed: pure_all',
 	'	PERL_DL_NONLAZY=1 $(FULLPERLRUN) -I"$(INST_LIB)" -I"$(INST_ARCHLIB)" examples/speed.pl',

Modified: branches/upstream/libtext-csv-perl/current/examples/csv-check
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/examples/csv-check?rev=19171&op=diff
==============================================================================
--- branches/upstream/libtext-csv-perl/current/examples/csv-check (original)
+++ branches/upstream/libtext-csv-perl/current/examples/csv-check Fri Apr 25 21:59:50 2008
@@ -3,7 +3,7 @@
 use strict;
 use warnings;
 
-our $VERSION = "1.0";	# 2008-04-17
+our $VERSION = "1.1";	# 2008-04-17
 
 sub usage
 {
@@ -28,6 +28,11 @@
     ) or usage (1);
 
 use Text::CSV_XS;
+
+if (@ARGV && -f $ARGV[0] && !-s $ARGV[0]) {
+    print STDERR "$ARGV[0] is empty\n";
+    exit 0;
+    }
 
 my ($bin, $rows, %cols, $firstline) = (0, 0);
 unless ($sep) { # No sep char passed, try to auto-detect;
@@ -59,6 +64,7 @@
 
 sub done
 {
+    print "Checked with $0 $VERSION using Text::CSV_XS $Text::CSV_XS::VERSION\n";
     my @diag = $csv->error_diag;
     if ($diag[0] == 2012 && $csv->eof) {
 	my @coll = sort { $a <=> $b } keys %cols;
@@ -68,9 +74,19 @@
 	print "    sep = <$sep>, quo = <$quo>, bin = <$bin>\n";
 	exit 0;
 	}
-    my $loc = $.;
-    $diag[2] and $loc .= "/$diag[2]";
-    print "$ARGV line $loc - $diag[1]\n";
+
+    if ($diag[2]) {
+	print "$ARGV line $./$diag[2] - $diag[0] - $diag[1]\n";
+	my $ep  = $diag[2] - 1; # diag[2] is 1-based
+	my $err = $csv->error_input . "         ";
+	substr $err, $ep + 1, 0, "*";
+	substr $err, $ep,     0, "*";
+	($err = substr $err, $ep - 5, 12) =~ s/ +$//;
+	print "    |$err|\n";
+	}
+    else {
+	print "$ARGV line $. - $diag[1]\n";
+	}
     exit $diag[0];
     } # done
 

Added: branches/upstream/libtext-csv-perl/current/examples/parser-xs.pl
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/examples/parser-xs.pl?rev=19171&op=file
==============================================================================
--- branches/upstream/libtext-csv-perl/current/examples/parser-xs.pl (added)
+++ branches/upstream/libtext-csv-perl/current/examples/parser-xs.pl Fri Apr 25 21:59:50 2008
@@ -1,0 +1,69 @@
+#!/pro/bin/perl
+
+# This script can be used as a base to parse unreliable CSV streams
+# Modify to your own needs
+
+use strict;
+use warnings;
+
+use Text::CSV_XS;
+
+my $csv = Text::CSV_XS->new ({ binary             => 1,
+			       blank_is_undef     => 1,
+			       eol                => $/,
+			       });
+my $csa = Text::CSV_XS->new ({ binary             => 1,
+			       allow_loose_quotes => 1,
+			       blank_is_undef     => 1,
+			       escape_char        => undef,
+			       });
+
+my $file = @ARGV ? shift : "test.csv";
+open my $fh, "<", $file or die "$file: $!\n";
+
+my %err_eol = map { $_ => 1 } 2010, 2027, 2031, 2032;
+
+print STDERR "Reading $file with Text::CSV_XS $Text::CSV_XS::VERSION\n";
+while (1) {
+    my $row = $csv->getline ($fh);
+
+    unless ($row) {	# Parsing failed
+
+	# Could be end of file
+	$csv->eof and last;
+
+	# Diagnose and show what was wrong
+	my @diag = $csv->error_diag;
+	print STDERR "$file line $./$diag[2] - $diag[0] - $diag[1]\n";
+	my $ep  = $diag[2] - 1; # diag[2] is 1-based
+	my $ein = $csv->error_input;	# The line scanned so far
+	my $err = $ein . "         ";
+	substr $err, $ep + 1, 0, "*";	# Bad character marked between **
+	substr $err, $ep,     0, "*";
+	($err = substr $err, $ep - 5, 12) =~ s/ +$//;
+	print STDERR "    |$err|\n";
+
+	REPARSE: {	# Now retry with allowed options
+	    if ($csa->parse ($ein)) {
+		print STDERR "Accepted in allow mode ...\n";
+		$row = [ $csa->fields ];
+		}
+	    else {	# Still fails
+		my @diag = $csa->error_diag;
+		if (exists $err_eol{$diag[0]}) { # \r or \n inside field
+		    print STDERR "  Extending line with next chunk\n";
+		    $ein .= scalar <$fh>;
+		    goto REPARSE;
+		    }
+
+		print STDERR "  Also could not parse it in allow mode\n";
+		print STDERR "  $./$diag[2] - $diag[0] - $diag[1]\n";
+		print STDERR "  Line skipped\n";
+		next;
+		}
+	    }
+	}
+
+    # Data was fine, print data properly quoted
+    $csv->print (*STDOUT, $row);
+    }

Propchange: branches/upstream/libtext-csv-perl/current/examples/parser-xs.pl
------------------------------------------------------------------------------
    svn:executable = *

Modified: branches/upstream/libtext-csv-perl/current/t/80_diag.t
URL: http://svn.debian.org/wsvn/branches/upstream/libtext-csv-perl/current/t/80_diag.t?rev=19171&op=diff
==============================================================================
--- branches/upstream/libtext-csv-perl/current/t/80_diag.t (original)
+++ branches/upstream/libtext-csv-perl/current/t/80_diag.t Fri Apr 25 21:59:50 2008
@@ -45,7 +45,7 @@
 is ($csv->error_diag (), "",		"No errors yet");
 
 parse_err 2010,  3, qq{"x"\r};
-parse_err 2011,  3, qq{"x"x};
+parse_err 2011,  4, qq{"x"x};
 
 parse_err 2021,  2, qq{"\n"};
 parse_err 2022,  2, qq{"\r"};




More information about the Pkg-perl-cvs-commits mailing list