r18404 - in /trunk/libtext-csv-perl: CSV_XS.pm CSV_XS.xs ChangeLog MANIFEST META.yml debian/changelog examples/speed.pl t/65_allow.t t/70_rt.t t/75_hashref.t t/80_diag.t
gregoa-guest at users.alioth.debian.org
gregoa-guest at users.alioth.debian.org
Tue Apr 8 16:55:25 UTC 2008
Author: gregoa-guest
Date: Tue Apr 8 16:55:24 2008
New Revision: 18404
URL: http://svn.debian.org/wsvn/pkg-perl/?sc=1&rev=18404
Log:
New upstream release.
Added:
trunk/libtext-csv-perl/t/75_hashref.t
- copied unchanged from r18403, branches/upstream/libtext-csv-perl/current/t/75_hashref.t
Modified:
trunk/libtext-csv-perl/CSV_XS.pm
trunk/libtext-csv-perl/CSV_XS.xs
trunk/libtext-csv-perl/ChangeLog
trunk/libtext-csv-perl/MANIFEST
trunk/libtext-csv-perl/META.yml
trunk/libtext-csv-perl/debian/changelog
trunk/libtext-csv-perl/examples/speed.pl
trunk/libtext-csv-perl/t/65_allow.t
trunk/libtext-csv-perl/t/70_rt.t
trunk/libtext-csv-perl/t/80_diag.t
Modified: trunk/libtext-csv-perl/CSV_XS.pm
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/CSV_XS.pm?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/CSV_XS.pm (original)
+++ trunk/libtext-csv-perl/CSV_XS.pm Tue Apr 8 16:55:24 2008
@@ -27,9 +27,10 @@
use warnings;
use DynaLoader ();
+use Carp;
use vars qw( $VERSION @ISA );
-$VERSION = "0.37";
+$VERSION = "0.40";
@ISA = qw( DynaLoader );
sub PV { 0 }
@@ -66,12 +67,15 @@
verbatim => 0,
types => undef,
+
_EOF => 0,
_STATUS => undef,
_FIELDS => undef,
_FFLAGS => undef,
_STRING => undef,
_ERROR_INPUT => undef,
+ _COLUMN_NAMES => undef,
+ _BOUND_COLUMNS => undef,
);
my $last_new_err = "";
@@ -113,6 +117,8 @@
eol_is_cr => 20,
has_types => 21,
verbatim => 22,
+
+ _is_bound => 23,
);
sub _set_attr
{
@@ -120,7 +126,7 @@
$self->{$name} = $val;
$self->{_CACHE} or return;
my @cache = unpack "C*", $self->{_CACHE};
- $cache[$_cache_id{$name}] = defined $val ? ord $val : 0;
+ $cache[$_cache_id{$name}] = defined $val ? unpack "C", $val : 0;
$self->{_CACHE} = pack "C*", @cache;
} # _set_attr
@@ -163,7 +169,7 @@
$cache[$_cache_id{eol_is_cr}] = 0;
}
$eol .= "\0\0\0\0\0\0\0\0";
- $cache[$_cache_id{eol} + $_] = ord substr $eol, $_, 1 for 0 .. 7;
+ $cache[$_cache_id{eol} + $_] = unpack "C", substr $eol, $_, 1 for 0 .. 7;
$self->{_CACHE} = pack "C*", @cache;
}
$self->{eol};
@@ -380,6 +386,60 @@
$self->{_STATUS};
} # parse
+sub column_names
+{
+ my ($self, @keys) = @_;
+ @keys or
+ return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : undef;
+
+ @keys == 1 && ! defined $keys[0] and
+ return $self->{_COLUMN_NAMES} = undef;
+
+ if (@keys == 1 && ref $keys[0] eq "ARRAY") {
+ @keys = @{$keys[0]};
+ }
+ elsif (join "", map { defined $_ ? ref $_ : "UNDEF" } @keys) {
+ croak ($self->SetDiag (3001));
+ }
+
+ $self->{_is_bound} && @keys != unpack "C", $self->{_is_bound} and
+ croak ($self->SetDiag (3003));
+
+ $self->{_COLUMN_NAMES} = [ @keys ];
+ @keys;
+ } # column_names
+
+sub bind_columns
+{
+ my ($self, @refs) = @_;
+ @refs or
+ return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef;
+
+ @refs == 1 && ! defined $refs[0] and
+ return $self->{_BOUND_COLUMNS} = undef;
+
+ $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} and
+ croak ($self->SetDiag (3003));
+
+ @refs > 255 and croak ($self->SetDiag (3005));
+
+ join "", map { ref $_ eq "SCALAR" ? "" : "*" } @refs and
+ croak ($self->SetDiag (3004));
+
+ $self->_set_attr ("_is_bound", pack "C" => scalar @refs);
+ $self->{_BOUND_COLUMNS} = [ @refs ];
+ @refs;
+ } # column_names
+
+sub getline_hr
+{
+ my ($self, @args, %hr) = @_;
+ $self->{_COLUMN_NAMES} or croak ($self->SetDiag (3002));
+ my $fr = $self->getline (@args) or return undef;
+ @hr{@{$self->{_COLUMN_NAMES}}} = @$fr;
+ \%hr;
+ } # getline_hr
+
bootstrap Text::CSV_XS $VERSION;
sub types
@@ -431,6 +491,8 @@
$colref = $csv->getline ($io); # Read a line from file $io,
# parse it and return an array
# ref of fields
+ $csv->column_names (@names); # Set column names for getline_hr ()
+ $ref = $csv->getline_hr ($io); # getline (), but returns a hashref
$eof = $csv->eof (); # Indicate if last parse or
# getline () hit End Of File
@@ -562,13 +624,11 @@
=head1 FUNCTIONS
-=over 4
-
-=item version ()
+=head2 version ()
(Class method) Returns the current module version.
-=item new (\%attr)
+=head2 new (\%attr)
(Class method) Returns a new instance of Text::CSV_XS. The objects
attributes are described by the (optional) hash ref C<\%attr>.
@@ -796,7 +856,7 @@
"Unknown attribute 'ecs_char'"
-=item combine
+=head2 combine
$status = $csv->combine (@columns);
@@ -807,7 +867,7 @@
C<string ()> is undefined and C<error_input ()> can be called to retrieve an
invalid argument.
-=item print
+=head2 print
$status = $csv->print ($io, $colref);
@@ -829,14 +889,14 @@
I<$csv->fields ()> and I<$csv-E<gt>error_input ()> methods are meaningless
after executing this method.
-=item string
+=head2 string
$line = $csv->string ();
This object function returns the input to C<parse ()> or the resultant CSV
string of C<combine ()>, whichever was called more recently.
-=item parse
+=head2 parse
$status = $csv->parse ($line);
@@ -850,7 +910,7 @@
You may use the I<types ()> method for setting column types. See the
description below.
-=item getline
+=head2 getline
$colref = $csv->getline ($io);
@@ -862,7 +922,43 @@
The I<$csv-E<gt>string ()>, I<$csv-E<gt>fields ()> and I<$csv-E<gt>status ()>
methods are meaningless, again.
-=item eof
+=head2 getline_hr
+
+The C<getline_hr ()> and C<column_names ()> methods work together to allow
+you to have rows returned as hashrefs. You must call C<column_names ()>
+first to declare your column names.
+
+ $csv->column_names (qw( code name price description ));
+ $hr = $csv->getline_hr ($io);
+ print "Price for $hr->{name} is $hr->{price} EUR\n";
+
+C<getline_hr ()> will croak if called before C<column_names ()>.
+
+=head2 column_names
+
+Set the keys that will be used in the C<getline_hr ()> calls. If no keys
+(column names) are passed, it'll return the current setting.
+
+C<column_names ()> accepts a list of scalars (the column names) or a
+single array_ref, so you can pass C<getline ()>
+
+ $csv->column_names ($csv->getline ($io));
+
+C<column_names ()> croaks on invalid arguments.
+
+=head2 bind_columns
+
+Takes a list of references to scalars (max 255) to store the fields fetched
+C<by getline_hr ()> in. When you don't pass enough references to store the
+fetched fields in, C<getline ()> will fail. If you pass more than there are
+fields to return, the remaining references are left untouched.
+
+ $csv->bind_columns (\$code, \$name, \$price, \$description);
+ while ($csv->getline ()) {
+ print "The price of a $name is \x{20ac} $price\n";
+ }
+
+=head2 eof
$eof = $csv->eof ();
@@ -871,7 +967,7 @@
it will return false (''). This is useful to see the difference between
a failure and end of file.
-=item types
+=head2 types
$csv->types (\@tref);
@@ -911,14 +1007,14 @@
=back
-=item fields
+=head2 fields
@columns = $csv->fields ();
This object function returns the input to C<combine ()> or the resultant
decomposed fields of C<parse ()>, whichever was called more recently.
-=item meta_info
+=head2 meta_info
@flags = $csv->meta_info ();
@@ -944,7 +1040,7 @@
See the C<is_*** ()> methods below.
-=item is_quoted
+=head2 is_quoted
my $quoted = $csv->is_quoted ($column_idx);
@@ -956,7 +1052,7 @@
where C<,20070108,> is to be treated as a numeric value, and where
C<,"20070108",> is explicitly marked as character string data.
-=item is_binary
+=head2 is_binary
my $binary = $csv->is_binary ($column_idx);
@@ -966,21 +1062,21 @@
This returns a true value if the data in the indicated column
contained any byte in the range [\x00-\x08,\x10-\x1F,\x7F-\xFF]
-=item status
+=head2 status
$status = $csv->status ();
This object function returns success (or failure) of C<combine ()> or
C<parse ()>, whichever was called more recently.
-=item error_input
+=head2 error_input
$bad_argument = $csv->error_input ();
This object function returns the erroneous argument (if it exists) of
C<combine ()> or C<parse ()>, whichever was called more recently.
-=item error_diag
+=head2 error_diag
$csv->error_diag ();
$error_code = 0 + $csv->error_diag ();
@@ -1000,8 +1096,6 @@
scalar, a-la $!. It will contain the error code in numeric context, and
the diagnostics message in string context.
-=back
-
=head1 INTERNALS
=over 4
@@ -1009,6 +1103,8 @@
=item Combine (...)
=item Parse (...)
+
+=item SetDiag (...)
=back
@@ -1116,7 +1212,14 @@
Probably the best way to do this is to make a subclass
Text::CSV_XS::Encoded that can be passed the required encoding and
-then behaves transparently (but slower).
+then behaves transparently (but slower), something like this:
+
+ use Text::CSV::Encoded;
+ my $csv = Text::CSV::Encoded->new ({
+ encoding => "utf-8", # Both in and out
+ encoding_in => "iso-8859-1", # Only the input
+ encoding_out => "cp1252", # Only the output
+ });
=item Double double quotes
@@ -1163,6 +1266,7 @@
=item next
+ - This might very well be 1.00
- DIAGNOSTICS setction in pod to *describe* the errors (see below)
- croak / carp
@@ -1215,7 +1319,11 @@
=item ECB
-Combine error
+Combine error.
+
+=item EHR
+
+HashRef parse related error.
=back
@@ -1283,6 +1391,22 @@
=item 2037 "EIF - Binary character in unquoted field, binary off"
=item 2110 "ECB - Binary character in Combine, binary off"
+
+=item 3001 "EHR - Unsupported syntax for column_names ()"
+
+=item 3002 "EHR - getline_hr () called before column_names ()"
+
+=item 3003 "EHR - bind_columns () and column_names () fields count mismatch"
+
+=item 3004 "EHR - bind_columns () only accepts refs to scalars"
+
+=item 3005 "EHR - bind_columns () takes 254 refs max"
+
+=item 3006 "EHR - bind_columns () did not pass enough refs for parsed fields"
+
+=item 3007 "EHR - bind_columns needs refs to writeable scalars"
+
+=item 3008 "EHR - unexpected error in bound fields
=back
Modified: trunk/libtext-csv-perl/CSV_XS.xs
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/CSV_XS.xs?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/CSV_XS.xs (original)
+++ trunk/libtext-csv-perl/CSV_XS.xs Tue Apr 8 16:55:24 2008
@@ -29,7 +29,7 @@
#define CACHE_ID_sep_char 2
#define CACHE_ID_binary 3
#define CACHE_ID_keep_meta_info 4
-#define CACHE_ID_alwasy_quote 5
+#define CACHE_ID_always_quote 5
#define CACHE_ID_allow_loose_quotes 6
#define CACHE_ID_allow_loose_escapes 7
#define CACHE_ID_allow_double_quoted 8
@@ -40,6 +40,7 @@
#define CACHE_ID_eol_is_cr 20
#define CACHE_ID_has_types 21
#define CACHE_ID_verbatim 22
+#define CACHE_ID__is_bound 23
#define CSV_FLAGS_QUO 0x0001
#define CSV_FLAGS_BIN 0x0002
@@ -54,6 +55,9 @@
#define useIO_EOF 0x10
#define unless(expr) if (!(expr))
+
+#define _is_arrayref(f) \
+ ( f && SvOK (f) && SvROK (f) && SvTYPE (SvRV (f)) == SVt_PVAV )
#define CSV_XS_SELF \
if (!self || !SvOK (self) || !SvROK (self) || \
@@ -69,7 +73,7 @@
byte binary;
byte keep_meta_info;
- byte alwasy_quote;
+ byte always_quote;
byte useIO; /* Also used to indicate EOF */
byte eol_is_cr;
@@ -81,13 +85,14 @@
byte blank_is_undef;
byte verbatim;
+ byte is_bound;
byte reserved1;
- byte reserved2;
#endif
byte cache[CACHE_SIZE];
HV* self;
+ SV* bound;
char *eol;
STRLEN eol_len;
@@ -102,7 +107,7 @@
} csv_t;
#define bool_opt(o) \
- (((svp = hv_fetch (self, o, strlen (o), 0)) && *svp) ? SvTRUE (*svp) : 0)
+ (((svp = hv_fetchs (self, o, FALSE)) && *svp) ? SvTRUE (*svp) : 0)
typedef struct {
int xs_errno;
@@ -139,6 +144,16 @@
/* Combine errors */
{ 2110, "ECB - Binary character in Combine, binary off" },
+ /* Hash-Ref errors */
+ { 3001, "EHR - Unsupported syntax for column_names ()" },
+ { 3002, "EHR - getline_hr () called before column_names ()" },
+ { 3003, "EHR - bind_columns () and column_names () fields count mismatch" },
+ { 3004, "EHR - bind_columns () only accepts refs to scalars" },
+ { 3005, "EHR - bind_columns () takes 254 refs max" },
+ { 3006, "EHR - bind_columns () did not pass enough refs for parsed fields" },
+ { 3007, "EHR - bind_columns needs refs to writeable scalars" },
+ { 3008, "EHR - unexpected error in bound fields" },
+
{ 0, "" },
};
@@ -153,10 +168,10 @@
io_handle_loaded = 1; \
}
-static void SetDiag (csv_t *csv, int xse)
+static SV *SetDiag (csv_t *csv, int xse)
{
int i = 0;
- SV *err;
+ SV *err = NULL;
while (xs_errors[i].xs_errno && xs_errors[i].xs_errno != xse) i++;
if ((err = newSVpv (xs_errors[i].xs_errstr, 0))) {
@@ -165,6 +180,7 @@
SvIOK_on (err);
hv_store (csv->self, "_ERROR_DIAG", 11, err, 0);
}
+ return (err);
} /* SetDiag */
static void SetupCsv (csv_t *csv, HV *self)
@@ -175,7 +191,7 @@
csv->self = self;
- if ((svp = hv_fetch (self, "_CACHE", 6, 0)) && *svp) {
+ if ((svp = hv_fetchs (self, "_CACHE", FALSE)) && *svp) {
memcpy (csv->cache, SvPV (*svp, len), CACHE_SIZE);
csv->quote_char = csv->cache[CACHE_ID_quote_char ];
@@ -184,7 +200,7 @@
csv->binary = csv->cache[CACHE_ID_binary ];
csv->keep_meta_info = csv->cache[CACHE_ID_keep_meta_info ];
- csv->alwasy_quote = csv->cache[CACHE_ID_alwasy_quote ];
+ csv->always_quote = csv->cache[CACHE_ID_always_quote ];
#if ALLOW_ALLOW
csv->allow_loose_quotes = csv->cache[CACHE_ID_allow_loose_quotes];
@@ -194,6 +210,7 @@
csv->blank_is_undef = csv->cache[CACHE_ID_blank_is_undef ];
csv->verbatim = csv->cache[CACHE_ID_verbatim ];
#endif
+ csv->is_bound = csv->cache[CACHE_ID__is_bound ];
csv->eol_is_cr = csv->cache[CACHE_ID_eol_is_cr ];
csv->eol_len = csv->cache[CACHE_ID_eol_len ];
if (csv->eol_len < 8)
@@ -202,7 +219,7 @@
/* Was too long to cache. must re-fetch */
csv->eol = NULL;
csv->eol_is_cr = 0;
- if ((svp = hv_fetch (self, "eol", 3, 0)) && *svp && SvOK (*svp)) {
+ if ((svp = hv_fetchs (self, "eol", FALSE)) && *svp && SvOK (*svp)) {
csv->eol = SvPV (*svp, len);
csv->eol_len = len;
csv->eol_is_cr = 0;
@@ -211,7 +228,7 @@
csv->types = NULL;
if (csv->cache[CACHE_ID_has_types]) {
- if ((svp = hv_fetch (self, "_types", 6, 0)) && *svp && SvOK (*svp)) {
+ if ((svp = hv_fetchs (self, "_types", FALSE)) && *svp && SvOK (*svp)) {
csv->types = SvPV (*svp, len);
csv->types_len = len;
}
@@ -219,7 +236,7 @@
}
else {
csv->quote_char = '"';
- if ((svp = hv_fetch (self, "quote_char", 10, 0)) && *svp) {
+ if ((svp = hv_fetchs (self, "quote_char", FALSE)) && *svp) {
if (SvOK (*svp)) {
ptr = SvPV (*svp, len);
csv->quote_char = len ? *ptr : (char)0;
@@ -229,7 +246,7 @@
}
csv->escape_char = '"';
- if ((svp = hv_fetch (self, "escape_char", 11, 0)) && *svp) {
+ if ((svp = hv_fetchs (self, "escape_char", FALSE)) && *svp) {
if (SvOK (*svp)) {
ptr = SvPV (*svp, len);
csv->escape_char = len ? *ptr : (char)0;
@@ -238,7 +255,7 @@
csv->escape_char = (char)0;
}
csv->sep_char = ',';
- if ((svp = hv_fetch (self, "sep_char", 8, 0)) && *svp && SvOK (*svp)) {
+ if ((svp = hv_fetchs (self, "sep_char", FALSE)) && *svp && SvOK (*svp)) {
ptr = SvPV (*svp, len);
if (len)
csv->sep_char = *ptr;
@@ -246,7 +263,7 @@
csv->eol = NULL;
csv->eol_is_cr = 0;
- if ((svp = hv_fetch (self, "eol", 3, 0)) && *svp && SvOK (*svp)) {
+ if ((svp = hv_fetchs (self, "eol", FALSE)) && *svp && SvOK (*svp)) {
csv->eol = SvPV (*svp, len);
csv->eol_len = len;
if (len == 1 && *csv->eol == CH_CR)
@@ -254,14 +271,14 @@
}
csv->types = NULL;
- if ((svp = hv_fetch (self, "_types", 6, 0)) && *svp && SvOK (*svp)) {
+ if ((svp = hv_fetchs (self, "_types", FALSE)) && *svp && SvOK (*svp)) {
csv->types = SvPV (*svp, len);
csv->types_len = len;
}
csv->binary = bool_opt ("binary");
csv->keep_meta_info = bool_opt ("keep_meta_info");
- csv->alwasy_quote = bool_opt ("always_quote");
+ csv->always_quote = bool_opt ("always_quote");
#if ALLOW_ALLOW
csv->allow_loose_quotes = bool_opt ("allow_loose_quotes");
csv->allow_loose_escapes = bool_opt ("allow_loose_escapes");
@@ -277,7 +294,7 @@
csv->cache[CACHE_ID_binary] = csv->binary;
csv->cache[CACHE_ID_keep_meta_info] = csv->keep_meta_info;
- csv->cache[CACHE_ID_alwasy_quote] = csv->alwasy_quote;
+ csv->cache[CACHE_ID_always_quote] = csv->always_quote;
#if ALLOW_ALLOW
csv->cache[CACHE_ID_allow_loose_quotes] = csv->allow_loose_quotes;
@@ -287,6 +304,7 @@
csv->cache[CACHE_ID_blank_is_undef] = csv->blank_is_undef;
csv->cache[CACHE_ID_verbatim] = csv->verbatim;
#endif
+ csv->cache[CACHE_ID__is_bound] = csv->is_bound;
csv->cache[CACHE_ID_eol_is_cr] = csv->eol_is_cr;
csv->cache[CACHE_ID_eol_len] = csv->eol_len;
if (csv->eol_len > 0 && csv->eol_len < 8 && csv->eol)
@@ -297,12 +315,18 @@
hv_store (self, "_CACHE", 6, csv->tmp, 0);
}
+ if (csv->is_bound) {
+ if ((svp = hv_fetchs (self, "_BOUND_COLUMNS", FALSE)) && _is_arrayref (*svp))
+ csv->bound = *svp;
+ else
+ csv->is_bound = 0;
+ }
csv->used = 0;
} /* SetupCsv */
static int Print (csv_t *csv, SV *dst)
{
- int result;
+ int result;
if (csv->useIO) {
SV *tmp = newSVpv (csv->buffer, csv->used);
@@ -342,7 +366,7 @@
int i;
if (csv->sep_char == csv->quote_char || csv->sep_char == csv->escape_char) {
- SetDiag (csv, 1001);
+ (void)SetDiag (csv, 1001);
return FALSE;
}
@@ -354,10 +378,10 @@
if ((svp = av_fetch (fields, i, 0)) && *svp && SvOK (*svp)) {
STRLEN len;
char *ptr = SvPV (*svp, len);
- int quoteMe = csv->alwasy_quote;
+ int quoteMe = csv->always_quote;
/* Do we need quoting? We do quote, if the user requested
- * (alwasy_quote), if binary or blank characters are found
+ * (always_quote), if binary or blank characters are found
* and if the string contains quote or escape characters.
*/
if (!quoteMe &&
@@ -388,7 +412,7 @@
SvREFCNT_inc (*svp);
unless (hv_store (csv->self, "_ERROR_INPUT", 12, *svp, 0))
SvREFCNT_dec (*svp);
- SetDiag (csv, 2110);
+ (void)SetDiag (csv, 2110);
return FALSE;
}
if (csv->quote_char && c == csv->quote_char)
@@ -430,7 +454,7 @@
if (hv_store (csv->self, "_ERROR_INPUT", 12, csv->tmp, 0))
SvREFCNT_inc (csv->tmp);
}
- SetDiag (csv, xse);
+ (void)SetDiag (csv, xse);
} /* ParseError */
static int CsvGet (csv_t *csv, SV *src)
@@ -480,12 +504,12 @@
} /* CsvGet */
#define ERROR_INSIDE_QUOTES(diag_code) { \
- SvREFCNT_dec (insideQuotes); \
+ SvREFCNT_dec (sv); \
ParseError (csv, diag_code); \
return FALSE; \
}
#define ERROR_INSIDE_FIELD(diag_code) { \
- SvREFCNT_dec (insideField); \
+ SvREFCNT_dec (sv); \
ParseError (csv, diag_code); \
return FALSE; \
}
@@ -505,26 +529,30 @@
: CsvGet (csv, src)))
#if ALLOW_ALLOW
-#define AV_PUSH(sv) { \
- *SvEND (sv) = (char)0; \
- if (!(f & CSV_FLAGS_QUO) && SvCUR (sv) == 0 && csv->blank_is_undef) \
- av_push (fields, &PL_sv_undef); \
- else { \
+#define AV_PUSH { \
+ *SvEND (sv) = (char)0; \
+ if (!(f & CSV_FLAGS_QUO) && SvCUR (sv) == 0 && csv->blank_is_undef) {\
+ sv_setpvn (sv, NULL, 0); \
+ unless (csv->is_bound) av_push (fields, sv); \
+ } \
+ else { \
if (csv->allow_whitespace && ! (f & CSV_FLAGS_QUO)) \
- strip_trail_whitespace (sv); \
- av_push (fields, sv); \
- } \
- if (csv->keep_meta_info) \
- av_push (fflags, newSViv (f)); \
- f = 0; \
+ strip_trail_whitespace (sv); \
+ unless (csv->is_bound) av_push (fields, sv); \
+ } \
+ sv = NULL; \
+ if (csv->keep_meta_info) \
+ av_push (fflags, newSViv (f)); \
+ waitingForField = 1; \
}
#else
-#define AV_PUSH(sv) { \
- *SvEND (sv) = (char)0; \
- av_push (fields, sv); \
- if (csv->keep_meta_info) \
- av_push (fflags, newSViv (f)); \
- f = 0; \
+#define AV_PUSH { \
+ *SvEND (sv) = (char)0; \
+ unless (csv->is_bound) av_push (fields, sv); \
+ sv = NULL; \
+ if (csv->keep_meta_info) \
+ av_push (fflags, newSViv (f)); \
+ waitingForField = 1; \
}
#endif
@@ -539,26 +567,66 @@
SvCUR_set (sv, len);
} /* strip_trail_whitespace */
+static SV *bound_field (csv_t *csv, int i)
+{
+ SV *sv = csv->bound;
+ AV *av;
+
+ /* fprintf (stderr, "# New bind %d/%d\n", i, csv->is_bound);\ */
+ if (i >= csv->is_bound) {
+ (void)SetDiag (csv, 3006);
+ return (NULL);
+ }
+
+ if (sv && SvROK (sv)) {
+ av = (AV *)(SvRV (sv));
+ /* fprintf (stderr, "# Bind %d/%d/%d\n", i, csv->is_bound, av_len (av)); */
+ sv = *av_fetch (av, i, FALSE);
+ if (sv && SvROK (sv)) {
+ sv = SvRV (sv);
+ unless (SvREADONLY (sv)) {
+ sv_setpvn (sv, "", 0);
+ return (sv);
+ }
+ }
+ }
+ SetDiag (csv, 3008);
+ return (NULL);
+ } /* bound_field */
+
+#define NewField \
+ unless (sv) { \
+ if (csv->is_bound) \
+ sv = bound_field (csv, fnum++); \
+ else \
+ sv = newSVpvs (""); \
+ unless (sv) return FALSE; \
+ f = 0; \
+ }
+
static int Parse (csv_t *csv, SV *src, AV *fields, AV *fflags)
{
int c, f = 0;
int c_ungetc = EOF;
int waitingForField = 1;
- SV *insideQuotes = NULL;
- SV *insideField = NULL;
+ SV *sv = NULL;
STRLEN len;
int seenSomething = FALSE;
+ int fnum = 0;
#if MAINT_DEBUG
int spl = -1;
memset (str_parsed, 0, 40);
#endif
if (csv->sep_char == csv->quote_char || csv->sep_char == csv->escape_char) {
- SetDiag (csv, 1001);
+ (void)SetDiag (csv, 1001);
return FALSE;
}
while ((c = CSV_GET) != EOF) {
+
+ NewField;
+
seenSomething = TRUE;
#if MAINT_DEBUG
if (++spl < 39) str_parsed[spl] = c;
@@ -566,45 +634,46 @@
restart:
if (c == csv->sep_char) {
#if MAINT_DEBUG > 1
- fprintf (stderr, "# %d/%d/%d pos %d = SEP '%c'\n",
- waitingForField ? 1 : 0, insideQuotes ? 1 : 0,
- insideField ? 1 : 0, spl, c);
+ fprintf (stderr, "# %d/%d/%02x pos %d = SEP '%c'\n",
+ waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
#endif
if (waitingForField) {
#if ALLOW_ALLOW
if (csv->blank_is_undef)
- av_push (fields, &PL_sv_undef);
+ sv_setpvn (sv, NULL, 0);
else
#endif
- av_push (fields, newSVpv ("", 0));
+ sv_setpvn (sv, "", 0);
+ unless (csv->is_bound)
+ av_push (fields, sv);
+ sv = NULL;
#if ALLOW_ALLOW
if (csv->keep_meta_info)
av_push (fflags, newSViv (f));
#endif
}
else
- if (insideQuotes)
- CSV_PUT_SV (insideQuotes, c)
+ if (f & CSV_FLAGS_QUO)
+ CSV_PUT_SV (sv, c)
else {
- AV_PUSH (insideField);
- insideField = NULL;
- waitingForField = 1;
+ AV_PUSH;
}
} /* SEP char */
else
if (c == CH_NL) {
#if MAINT_DEBUG > 1
- fprintf (stderr, "# %d/%d/%d pos %d = NL\n",
- waitingForField ? 1 : 0, insideQuotes ? 1 : 0,
- insideField ? 1 : 0, spl);
+ fprintf (stderr, "# %d/%d/%02x pos %d = NL\n",
+ waitingForField ? 1 : 0, sv ? 1 : 0, f, spl);
#endif
if (waitingForField) {
#if ALLOW_ALLOW
if (csv->blank_is_undef)
- av_push (fields, &PL_sv_undef);
+ sv_setpvn (sv, NULL, 0);
else
#endif
- av_push (fields, newSVpv ("", 0));
+ sv_setpvn (sv, "", 0);
+ unless (csv->is_bound)
+ av_push (fields, sv);
#if ALLOW_ALLOW
if (csv->keep_meta_info)
av_push (fflags, newSViv (f));
@@ -612,12 +681,12 @@
return TRUE;
}
- if (insideQuotes) {
+ if (f & CSV_FLAGS_QUO) {
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_QUOTES (2021);
- CSV_PUT_SV (insideQuotes, c);
+ CSV_PUT_SV (sv, c);
}
#if ALLOW_ALLOW
else
@@ -626,11 +695,11 @@
unless (csv->binary)
ERROR_INSIDE_FIELD (2030);
- CSV_PUT_SV (insideField, c);
+ CSV_PUT_SV (sv, c);
}
#endif
else {
- AV_PUSH (insideField);
+ AV_PUSH;
return TRUE;
}
} /* CH_NL */
@@ -641,12 +710,13 @@
#endif
) {
#if MAINT_DEBUG > 1
- fprintf (stderr, "# %d/%d/%d pos %d = CR\n",
- waitingForField ? 1 : 0, insideQuotes ? 1 : 0,
- insideField ? 1 : 0, spl);
+ fprintf (stderr, "# %d/%d/%02x pos %d = CR\n",
+ waitingForField ? 1 : 0, sv ? 1 : 0, f, spl);
#endif
if (waitingForField) {
int c2;
+
+ waitingForField = 0;
if (csv->eol_is_cr) {
c = CH_NL;
@@ -656,8 +726,6 @@
c2 = CSV_GET;
if (c2 == EOF) {
- insideField = newSVpv ("", 0);
- waitingForField = 0;
c = EOF;
goto restart;
}
@@ -670,25 +738,25 @@
ERROR_INSIDE_FIELD (2031);
}
- if (insideQuotes) {
+ if (f & CSV_FLAGS_QUO) {
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_QUOTES (2022);
- CSV_PUT_SV (insideQuotes, c);
+ CSV_PUT_SV (sv, c);
}
else {
int c2;
if (csv->eol_is_cr) {
- AV_PUSH (insideField);
+ AV_PUSH;
return TRUE;
}
c2 = CSV_GET;
if (c2 == CH_NL) {
- AV_PUSH (insideField);
+ AV_PUSH;
return TRUE;
}
@@ -698,24 +766,20 @@
else
if (csv->quote_char && c == csv->quote_char) {
#if MAINT_DEBUG > 1
- fprintf (stderr, "# %d/%d/%d pos %d = QUO '%c'\n",
- waitingForField ? 1 : 0, insideQuotes ? 1 : 0,
- insideField ? 1 : 0, spl, c);
+ fprintf (stderr, "# %d/%d/%02x pos %d = QUO '%c'\n",
+ waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
#endif
if (waitingForField) {
- insideQuotes = newSVpv ("", 0);
f |= CSV_FLAGS_QUO;
waitingForField = 0;
}
else
- if (insideQuotes) {
+ if (f & CSV_FLAGS_QUO) {
int c2;
if (!csv->escape_char || c != csv->escape_char) {
/* Field is terminated */
- AV_PUSH (insideQuotes);
- insideQuotes = NULL;
- waitingForField = 1;
+ AV_PUSH;
c2 = CSV_GET;
#if ALLOW_ALLOW
@@ -764,24 +828,22 @@
#endif
if (c2 == EOF) {
- AV_PUSH (insideQuotes);
+ AV_PUSH;
return TRUE;
}
if (c2 == csv->sep_char) {
- AV_PUSH (insideQuotes);
- insideQuotes = NULL;
- waitingForField = 1;
+ AV_PUSH;
}
else
if (c2 == '0')
- CSV_PUT_SV (insideQuotes, 0)
+ CSV_PUT_SV (sv, 0)
else
if (c2 == csv->quote_char || c2 == csv->sep_char)
- CSV_PUT_SV (insideQuotes, c2)
+ CSV_PUT_SV (sv, c2)
else
if (c2 == CH_NL) {
- AV_PUSH (insideQuotes);
+ AV_PUSH;
return TRUE;
}
@@ -790,14 +852,14 @@
int c3;
if (csv->eol_is_cr) {
- AV_PUSH (insideQuotes);
+ AV_PUSH;
return TRUE;
}
c3 = CSV_GET;
if (c3 == CH_NL) {
- AV_PUSH (insideQuotes);
+ AV_PUSH;
return TRUE;
}
}
@@ -820,7 +882,7 @@
#if ALLOW_ALLOW
if (csv->allow_loose_quotes) { /* 1,foo "boo" d'uh,1 */
f |= CSV_FLAGS_EIF;
- CSV_PUT_SV (insideField, c);
+ CSV_PUT_SV (sv, c);
}
else
#endif
@@ -829,24 +891,21 @@
else
if (csv->escape_char && c == csv->escape_char) {
#if MAINT_DEBUG > 1
- fprintf (stderr, "# %d/%d/%d pos %d = ESC '%c'\n",
- waitingForField ? 1 : 0, insideQuotes ? 1 : 0,
- insideField ? 1 : 0, spl, c);
+ fprintf (stderr, "# %d/%d/%02x pos %d = ESC '%c'\n",
+ waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
#endif
/* This means quote_char != escape_char */
- if (waitingForField) {
- insideField = newSVpv ("", 0);
+ if (waitingForField)
waitingForField = 0;
- }
else
- if (insideQuotes) {
+ if (f & CSV_FLAGS_QUO) {
int c2 = CSV_GET;
if (c2 == EOF)
ERROR_INSIDE_QUOTES (2024);
if (c2 == '0')
- CSV_PUT_SV (insideQuotes, 0)
+ CSV_PUT_SV (sv, 0)
else
if ( c2 == csv->quote_char || c2 == csv->sep_char ||
c2 == csv->escape_char
@@ -854,27 +913,26 @@
|| csv->allow_loose_escapes
#endif
)
- CSV_PUT_SV (insideQuotes, c2)
+ CSV_PUT_SV (sv, c2)
else
ERROR_INSIDE_QUOTES (2025);
}
else
- if (insideField) {
+ if (sv) {
int c2 = CSV_GET;
if (c2 == EOF)
ERROR_INSIDE_FIELD (2035);
- CSV_PUT_SV (insideField, c2);
+ CSV_PUT_SV (sv, c2);
}
else
ERROR_INSIDE_FIELD (2036); /* I think there's no way to get here */
} /* ESC char */
else {
#if MAINT_DEBUG > 1
- fprintf (stderr, "# %d/%d/%d pos %d = === '%c' '%c'\n",
- waitingForField ? 1 : 0, insideQuotes ? 1 : 0,
- insideField ? 1 : 0, spl, c, c_ungetc);
+ fprintf (stderr, "# %d/%d/%02x pos %d = === '%c' '%c'\n",
+ waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c, c_ungetc);
#endif
if (waitingForField) {
#if ALLOW_ALLOW
@@ -885,20 +943,18 @@
goto restart;
}
#endif
-
- insideField = newSVpv ("", 0);
waitingForField = 0;
goto restart;
}
- if (insideQuotes) {
+ if (f & CSV_FLAGS_QUO) {
if (is_csv_binary (c)) {
f |= CSV_FLAGS_BIN;
unless (csv->binary)
ERROR_INSIDE_QUOTES (2026);
}
- CSV_PUT_SV (insideQuotes, c);
+ CSV_PUT_SV (sv, c);
}
else {
if (is_csv_binary (c)) {
@@ -907,7 +963,7 @@
ERROR_INSIDE_FIELD (2037);
}
- CSV_PUT_SV (insideField, c);
+ CSV_PUT_SV (sv, c);
}
}
@@ -920,31 +976,33 @@
if (waitingForField) {
if (seenSomething) {
+ unless (sv) NewField;
#if ALLOW_ALLOW
if (csv->blank_is_undef)
- av_push (fields, &PL_sv_undef);
+ sv_setpvn (sv, NULL, 0);
else
#endif
- av_push (fields, newSVpv ("", 0));
+ sv_setpvn (sv, "", 0);
+ unless (csv->is_bound)
+ av_push (fields, sv);
#if ALLOW_ALLOW
if (csv->keep_meta_info)
av_push (fflags, newSViv (f));
#endif
- }
- else {
- if (csv->useIO) {
- SetDiag (csv, 2012);
- return FALSE;
- }
+ return TRUE;
+ }
+ if (csv->useIO) {
+ (void)SetDiag (csv, 2012);
+ return FALSE;
}
}
else
- if (insideQuotes) {
+ if (f & CSV_FLAGS_QUO) {
ERROR_INSIDE_QUOTES (2027);
}
else
- if (insideField)
- AV_PUSH (insideField);
+ if (sv)
+ AV_PUSH;
return TRUE;
} /* Parse */
@@ -1014,12 +1072,24 @@
return Combine (&csv, io, av);
} /* xsCombine */
-#define _is_arrayref(f) \
- ( f && SvOK (f) && SvROK (f) && SvTYPE (SvRV (f)) == SVt_PVAV )
-
MODULE = Text::CSV_XS PACKAGE = Text::CSV_XS
PROTOTYPES: DISABLE
+
+SV*
+SetDiag (self, xse)
+ SV *self
+ int xse
+
+ PPCODE:
+ HV *hv;
+ csv_t csv;
+
+ CSV_XS_SELF;
+ SetupCsv (&csv, hv);
+ ST (0) = SetDiag (&csv, xse);
+ XSRETURN (1);
+ /* XS SetDiag */
SV*
Combine (self, dst, fields, useIO)
Modified: trunk/libtext-csv-perl/ChangeLog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/ChangeLog?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/ChangeLog (original)
+++ trunk/libtext-csv-perl/ChangeLog Tue Apr 8 16:55:24 2008
@@ -1,3 +1,19 @@
+2008-04-07 0.40 - H.Merijn Brand <h.m.brand at xs4all.nl>
+
+ * Implemented getline_hr () and column_names () RT 34474
+ (suggestions accepted from Mark Stosberg)
+ * Corrected misspelled variable names in XS
+ * Functions are now =head2 type doc entries (Mark Stosberg)
+ * Make SetDiag() available to the perl level, so errors can
+ be centralized and consistent
+ * Integrate the non-XS errors into XS
+ * Add t/75_hashref.t
+ * Testcase for error 2023 (Michael P Randall)
+ * Completely refactored the XS part of parse/getline, which
+ is now up to 6% faster. YMMV
+ * Completed bind_columns. On straight fetches now up to three
+ times as fast as normal fetches (both using getline ())
+
2008-03-11 0.37 - H.Merijn Brand <h.m.brand at xs4all.nl>
* Copied GIT repo to public mirror
Modified: trunk/libtext-csv-perl/MANIFEST
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/MANIFEST?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/MANIFEST (original)
+++ trunk/libtext-csv-perl/MANIFEST Tue Apr 8 16:55:24 2008
@@ -22,6 +22,7 @@
t/60_samples.t Miscellaneous problems from the modules history.
t/65_allow.t Allow bad formats
t/70_rt.t Tests based on RT reports
+t/75_hashref.t getline_hr related tests
t/80_diag.t Error diagnostics
t/util.pl Extra test utilities
examples/csv2xls Script to onvert CSV files to M$Excel
Modified: trunk/libtext-csv-perl/META.yml
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/META.yml?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/META.yml (original)
+++ trunk/libtext-csv-perl/META.yml Tue Apr 8 16:55:24 2008
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: Text-CSV_XS
-version: 0.37
+version: 0.40
abstract: Comma-Separated Values manipulation routines
license: perl
author:
Modified: trunk/libtext-csv-perl/debian/changelog
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/debian/changelog?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/debian/changelog (original)
+++ trunk/libtext-csv-perl/debian/changelog Tue Apr 8 16:55:24 2008
@@ -1,3 +1,9 @@
+libtext-csv-perl (0.40-1) UNRELEASED; urgency=low
+
+ * New upstream release.
+
+ -- gregor herrmann <gregor+debian at comodo.priv.at> Tue, 08 Apr 2008 18:53:09 +0200
+
libtext-csv-perl (0.37-1) unstable; urgency=low
[ gregor herrmann ]
Modified: trunk/libtext-csv-perl/examples/speed.pl
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/examples/speed.pl?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/examples/speed.pl (original)
+++ trunk/libtext-csv-perl/examples/speed.pl Tue Apr 8 16:55:24 2008
@@ -38,13 +38,15 @@
});
sub max { $_[0] >= $_[1] ? $_[0] : $_[1] }
-my $line_count = max (1_000_000, 20_000 * $duration);
+my $line_count = max (100_000, 20_000 * $duration);
open our $io, ">", $bigfile;
$csv->print ($io, \@fields10) or die "Cannot print ()\n";
timethese ($line_count, { "print io" => q{ $csv->print ($io, \@fields10) }});
close $io;
-s $bigfile or die "File is empty!\n";
+my @f = @fields10;
+$csv->can ("bind_columns") and $csv->bind_columns (\(@f));
open $io, "<", $bigfile;
timethese ($line_count, { "getline io" => q{ my $ref = $csv->getline ($io) }});
close $io;
Modified: trunk/libtext-csv-perl/t/65_allow.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/t/65_allow.t?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/t/65_allow.t (original)
+++ trunk/libtext-csv-perl/t/65_allow.t Tue Apr 8 16:55:24 2008
@@ -64,7 +64,7 @@
$csv->allow_loose_escapes (1);
if ($tst >= 8) {
# Should always fail
- ok (!$csv->parse ($bad), "$tst - parse () pass");
+ ok (!$csv->parse ($bad), "$tst - parse () fail");
}
else {
ok ($csv->parse ($bad), "$tst - parse () pass");
Modified: trunk/libtext-csv-perl/t/70_rt.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/t/70_rt.t?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/t/70_rt.t (original)
+++ trunk/libtext-csv-perl/t/70_rt.t Tue Apr 8 16:55:24 2008
@@ -4,7 +4,7 @@
$^W = 1;
#use Test::More "no_plan";
- use Test::More tests => 52;
+ use Test::More tests => 69;
BEGIN {
use_ok "Text::CSV_XS", ();
@@ -90,6 +90,29 @@
is ($fld[1], "It's an apostrophee", "Content field 2");
}
+{ # http://rt.cpan.org/Ticket/Display.html?id=34474
+ # 34474: wish: integrate row-as-hashref feature from Parse::CSV
+ open FH, ">_test.csv";
+ print FH @{$input{34474}};
+ close FH;
+ ok (my $csv = Text::CSV_XS->new (), "RT-34474: getline_hr ()");
+ is ($csv->column_names, undef, "No headers yet");
+ open FH, "<_test.csv";
+ my $row;
+ ok ($row = $csv->getline (*FH), "getline headers");
+ is ($row->[0], "code", "Header line");
+ $csv->column_names (@$row);
+ is_deeply ([ $csv->column_names ], [ @$row ], "Keys set");
+ while (my $hr = $csv->getline_hr (*FH)) {
+ ok (exists $hr->{code}, "Line has a code field");
+ like ($hr->{code}, qr/^[0-9]+$/, "Code is numeric");
+ ok (exists $hr->{name}, "Line has a name field");
+ like ($hr->{name}, qr/^[A-Z][a-z]+$/, "Name");
+ }
+ close FH;
+ unlink "_test.csv";
+ }
+
__END__
«24386» - \t doesn't work in _XS, works in _PP
VIN StockNumber Year Make Model MD Engine EngineSize Transmission DriveTrain Trim BodyStyle CityFuel HWYFuel Mileage Color InteriorColor InternetPrice RetailPrice Notes ShortReview Certified NewUsed Image_URLs Equipment
@@ -110,3 +133,8 @@
",~"~,~""~,~"""~,,~~,
«15076» - escape_char before characters that do not need to be escaped.
"Example";"It\'s an apostrophee"
+«34474» - wish: integrate row-as-hashref feature from Parse::CSV
+code,name,price,description
+1,Dress,240.00,"Evening gown"
+2,Drinks,82.78,"Drinks"
+3,Sex,-9999.99,"Priceless"
Modified: trunk/libtext-csv-perl/t/80_diag.t
URL: http://svn.debian.org/wsvn/pkg-perl/trunk/libtext-csv-perl/t/80_diag.t?rev=18404&op=diff
==============================================================================
--- trunk/libtext-csv-perl/t/80_diag.t (original)
+++ trunk/libtext-csv-perl/t/80_diag.t Tue Apr 8 16:55:24 2008
@@ -3,7 +3,7 @@
use strict;
$^W = 1;
- use Test::More tests => 61;
+ use Test::More tests => 66;
#use Test::More "no_plan";
my %err;
@@ -21,7 +21,7 @@
$| = 1;
-my $csv = (Text::CSV_XS->new ({ escape_char => "+", eol => "\n" }));
+my $csv = Text::CSV_XS->new ();
is (Text::CSV_XS::error_diag (), "", "Last failure for new () - OK");
sub parse_err ($$)
@@ -37,6 +37,9 @@
is ($s_diag, $s_err, "Str diag in list context");
} # parse_err
+parse_err 2023, qq{2023,",2008-04-05,"Foo, Bar",\n};
+
+$csv = Text::CSV_XS->new ({ escape_char => "+", eol => "\n" });
is ($csv->error_diag (), undef, "No errors yet");
parse_err 2010, qq{"x"\r};
More information about the Pkg-perl-cvs-commits
mailing list