@@ -1,6 +1,6 @@
package Text::CSV_XS;
-# Copyright (c) 2007-2013 H.Merijn Brand. All rights reserved.
+# Copyright (c) 2007-2014 H.Merijn Brand. All rights reserved.
# Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
# Copyright (c) 1997 Alan Citterman. All rights reserved.
#
@@ -23,12 +23,14 @@ require 5.006001;
use strict;
use warnings;
+require Exporter;
use DynaLoader ();
use Carp;
-use vars qw( $VERSION @ISA );
-$VERSION = "1.02";
-@ISA = qw( DynaLoader );
+use vars qw( $VERSION @ISA @EXPORT_OK );
+$VERSION = "1.11";
+@ISA = qw( DynaLoader Exporter );
+@EXPORT_OK = qw( csv );
bootstrap Text::CSV_XS $VERSION;
sub PV { 0 }
@@ -51,27 +53,28 @@ sub version
# a newly created Text::CSV object.
my %def_attr = (
+ eol => '',
+ sep_char => ',',
quote_char => '"',
escape_char => '"',
- sep_char => ',',
- eol => '',
- always_quote => 0,
- quote_space => 1,
- quote_null => 1,
- quote_binary => 1,
binary => 0,
decode_utf8 => 1,
- keep_meta_info => 0,
+ auto_diag => 0,
+ diag_verbose => 0,
+ blank_is_undef => 0,
+ empty_is_undef => 0,
+ allow_whitespace => 0,
allow_loose_quotes => 0,
allow_loose_escapes => 0,
allow_unquoted_escape => 0,
- allow_whitespace => 0,
- blank_is_undef => 0,
- empty_is_undef => 0,
+ always_quote => 0,
+ quote_space => 1,
+ quote_null => 1,
+ quote_binary => 1,
+ keep_meta_info => 0,
verbatim => 0,
- auto_diag => 0,
- diag_verbose => 0,
types => undef,
+ callbacks => undef,
_EOF => 0,
_RECNO => 0,
@@ -84,53 +87,121 @@ my %def_attr = (
_BOUND_COLUMNS => undef,
_AHEAD => undef,
);
+my %attr_alias = (
+ quote_always => "always_quote",
+ verbose_diag => "diag_verbose",
+ );
my $last_new_err = Text::CSV_XS->SetDiag (0);
-sub _check_sanity
+# NOT a method: is also used before bless
+sub _unhealthy_whitespace
{
- my $attr = shift;
- for (qw( sep_char quote_char escape_char )) {
- defined $attr->{$_} && $attr->{$_} =~ m/[\r\n]/ and
- return 1003;
- }
- $attr->{allow_whitespace} and
- (defined $attr->{quote_char} && $attr->{quote_char} =~ m/^[ \t]$/) ||
- (defined $attr->{escape_char} && $attr->{escape_char} =~ m/^[ \t]$/) and
+ my $self = shift;
+ $_[0] or return 0; # no checks needed without allow_whitespace
+
+ my $quo = $self->{quote};
+ defined $quo && length ($quo) or $quo = $self->{quote_char};
+ my $esc = $self->{escape_char};
+
+ (defined $quo && $quo =~ m/^[ \t]/) || (defined $esc && $esc =~ m/^[ \t]/) and
return 1002;
+
return 0;
+ } # _sane_whitespace
+
+sub _check_sanity
+{
+ my $self = shift;
+
+ my $sep = $self->{sep};
+ defined $sep && length ($sep) or $sep = $self->{sep_char};
+ my $quo = $self->{quote};
+ defined $quo && length ($quo) or $quo = $self->{quote_char};
+ my $esc = $self->{escape_char};
+
+# use DP;::diag ("SEP: '", DPeek ($sep),
+# "', QUO: '", DPeek ($quo),
+# "', ESC: '", DPeek ($esc),"'");
+ # sep_char cannot be undefined
+ defined $quo && $quo eq $sep and return 1001;
+ defined $esc && $esc eq $sep and return 1001;
+
+ defined $_ && $_ =~ m/[\r\n]/ and return 1003 for $sep, $quo, $esc;
+
+ return _unhealthy_whitespace ($self, $self->{allow_whitespace});
} # _check_sanity
sub new
{
- $last_new_err = SetDiag (undef, 1000,
+ $last_new_err = Text::CSV_XS->SetDiag (1000,
"usage: my \$csv = Text::CSV_XS->new ([{ option => value, ... }]);");
my $proto = shift;
my $class = ref ($proto) || $proto or return;
@_ > 0 && ref $_[0] ne "HASH" and return;
my $attr = shift || {};
-
- for (keys %{$attr}) {
+ my %attr = map {
+ my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
+ exists $attr_alias{$k} and $k = $attr_alias{$k};
+ $k => $attr->{$_};
+ } keys %$attr;
+
+ my $sep_aliased = 0;
+ if (defined $attr{sep}) {
+ $attr{sep_char} = delete $attr{sep};
+ $sep_aliased = 1;
+ }
+ my $quote_aliased = 0;
+ if (defined $attr{quote}) {
+ $attr{quote_char} = delete $attr{quote};
+ $quote_aliased = 1;
+ }
+ for (keys %attr) {
if (m/^[a-z]/ && exists $def_attr{$_}) {
- defined $attr->{$_} && $] >= 5.008002 && m/_char$/ and
- utf8::decode ($attr->{$_});
+ # uncoverable condition false
+ defined $attr{$_} && $] >= 5.008002 && m/_char$/ and
+ utf8::decode ($attr{$_});
next;
}
# croak?
- $last_new_err = SetDiag (undef, 1000, "INI - Unknown attribute '$_'");
- $attr->{auto_diag} and error_diag ();
+ $last_new_err = Text::CSV_XS->SetDiag (1000, "INI - Unknown attribute '$_'");
+ $attr{auto_diag} and error_diag ();
return;
}
+ if ($sep_aliased) {
+ my @b = unpack "U0C*", $attr{sep_char};
+ if (@b > 1) {
+ $attr{sep} = $attr{sep_char};
+ $attr{sep_char} = "\0";
+ }
+ else {
+ $attr{sep} = undef;
+ }
+ }
+ if ($quote_aliased) {
+ my @b = unpack "U0C*", $attr{quote_char};
+ if (@b > 1) {
+ $attr{quote} = $attr{quote_char};
+ $attr{quote_char} = "\0";
+ }
+ else {
+ $attr{quote} = undef;
+ }
+ }
- my $self = { %def_attr, %{$attr} };
+ my $self = { %def_attr, %attr };
if (my $ec = _check_sanity ($self)) {
- $last_new_err = SetDiag (undef, $ec);
- $attr->{auto_diag} and error_diag ();
+ $last_new_err = Text::CSV_XS->SetDiag ($ec);
+ $attr{auto_diag} and error_diag ();
return;
}
+ if (defined $self->{callbacks} && ref $self->{callbacks} ne "HASH") {
+ warn "The 'callbacks' attribute is set but is not a hash: ignored\n";
+ $self->{callbacks} = undef;
+ }
- $last_new_err = SetDiag (undef, 0);
- defined $\ && !exists $attr->{eol} and $self->{eol} = $\;
+ $last_new_err = Text::CSV_XS->SetDiag (0);
+ defined $\ && !exists $attr{eol} and $self->{eol} = $\;
bless $self, $class;
defined $self->{types} and $self->types ($self->{types});
$self;
@@ -141,6 +212,7 @@ my %_cache_id = ( # Only expose what is accessed from within PM
quote_char => 0,
escape_char => 1,
sep_char => 2,
+ sep => 38, # 38 .. 54
binary => 3,
keep_meta_info => 4,
always_quote => 5,
@@ -149,7 +221,8 @@ my %_cache_id = ( # Only expose what is accessed from within PM
allow_unquoted_escape => 8,
allow_whitespace => 9,
blank_is_undef => 10,
- eol => 11, # 11 .. 18
+ eol => 11,
+ quote => 15,
verbatim => 22,
empty_is_undef => 23,
auto_diag => 24,
@@ -158,6 +231,7 @@ my %_cache_id = ( # Only expose what is accessed from within PM
quote_null => 31,
quote_binary => 32,
decode_utf8 => 35,
+ _has_hooks => 36,
_is_bound => 26, # 26 .. 29
);
@@ -196,29 +270,81 @@ sub quote_char
{
my $self = shift;
if (@_) {
- my $qc = shift;
- $self->_set_attr_C ("quote_char", $qc);
+ $self->_set_attr_C ("quote_char", shift);
+ $self->_cache_set ($_cache_id{quote}, "");
}
$self->{quote_char};
} # quote_char
-sub escape_char
+sub quote
{
my $self = shift;
if (@_) {
- my $ec = shift;
- $self->_set_attr_C ("escape_char", $ec);
+ my $quote = shift;
+ defined $quote or $quote = "";
+ $] >= 5.008002 and utf8::decode ($quote);
+ my @b = unpack "U0C*", $quote;
+ if (@b > 1) {
+ $self->quote_char ("\0");
+ }
+ else {
+ $self->quote_char ($quote);
+ $quote = "";
+ }
+ $self->{quote} = $quote;
+
+ my $ec = _check_sanity ($self);
+ $ec and croak ($self->SetDiag ($ec));
+
+ $self->_cache_set ($_cache_id{quote}, $quote);
}
+ my $quote = $self->{quote};
+ defined $quote && length ($quote) ? $quote : $self->{quote_char};
+ } # quote
+
+sub escape_char
+{
+ my $self = shift;
+ @_ and $self->_set_attr_C ("escape_char", shift);
$self->{escape_char};
} # escape_char
sub sep_char
{
my $self = shift;
- @_ and $self->_set_attr_C ("sep_char", shift);
+ if (@_) {
+ $self->_set_attr_C ("sep_char", shift);
+ $self->_cache_set ($_cache_id{sep}, "");
+ }
$self->{sep_char};
} # sep_char
+sub sep
+{
+ my $self = shift;
+ if (@_) {
+ my $sep = shift;
+ defined $sep or $sep = "";
+ $] >= 5.008002 and utf8::decode ($sep);
+ my @b = unpack "U0C*", $sep;
+ if (@b > 1) {
+ $self->sep_char ("\0");
+ }
+ else {
+ $self->sep_char ($sep);
+ $sep = "";
+ }
+ $self->{sep} = $sep;
+
+ my $ec = _check_sanity ($self);
+ $ec and croak ($self->SetDiag ($ec));
+
+ $self->_cache_set ($_cache_id{sep}, $sep);
+ }
+ my $sep = $self->{sep};
+ defined $sep && length ($sep) ? $sep : $self->{sep_char};
+ } # sep
+
sub eol
{
my $self = shift;
@@ -299,9 +425,7 @@ sub allow_whitespace
my $self = shift;
if (@_) {
my $aw = shift;
- $aw and
- (defined $self->{quote_char} && $self->{quote_char} =~ m/^[ \t]$/) ||
- (defined $self->{escape_char} && $self->{escape_char} =~ m/^[ \t]$/) and
+ _unhealthy_whitespace ($self, $aw) and
croak ($self->SetDiag (1002));
$self->_set_attr_X ("allow_whitespace", $aw);
}
@@ -342,7 +466,7 @@ sub auto_diag
if (@_) {
my $v = shift;
!defined $v || $v eq "" and $v = 0;
- $v =~ m/^[0-9]/ or $v = $v ? 1 : 0; # default for true/false
+ $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
$self->_set_attr_X ("auto_diag", $v);
}
$self->{auto_diag};
@@ -354,7 +478,7 @@ sub diag_verbose
if (@_) {
my $v = shift;
!defined $v || $v eq "" and $v = 0;
- $v =~ m/^[0-9]/ or $v = $v ? 1 : 0; # default for true/false
+ $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
$self->_set_attr_X ("diag_verbose", $v);
}
$self->{diag_verbose};
@@ -377,6 +501,54 @@ sub eof
return $self->{_EOF};
} # status
+sub types
+{
+ my $self = shift;
+ if (@_) {
+ if (my $types = shift) {
+ $self->{_types} = join "", map { chr $_ } @{$types};
+ $self->{types} = $types;
+ }
+ else {
+ delete $self->{types};
+ delete $self->{_types};
+ undef;
+ }
+ }
+ else {
+ $self->{types};
+ }
+ } # types
+
+sub callbacks
+{
+ my $self = shift;
+ if (@_) {
+ my $cb;
+ my $hf = 0x00;
+ if (defined $_[0]) {
+ grep { !defined $_ } @_ and croak ($self->SetDiag (1004));
+ $cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
+ : @_ % 2 == 0 ? { @_ }
+ : croak ($self->SetDiag (1004));
+ foreach my $cbk (keys %$cb) {
+ (!ref $cbk && $cbk =~ m/^[\w.]+$/) && ref $cb->{$cbk} eq "CODE" or
+ croak ($self->SetDiag (1004));
+ }
+ exists $cb->{error} and $hf |= 0x01;
+ exists $cb->{after_parse} and $hf |= 0x02;
+ exists $cb->{before_print} and $hf |= 0x04;
+ }
+ elsif (@_ > 1) {
+ # (undef, whatever)
+ croak ($self->SetDiag (1004));
+ }
+ $self->_set_attr_X ("_has_hooks", $hf);
+ $self->{callbacks} = $cb;
+ }
+ $self->{callbacks};
+ } # callbacks
+
# erro_diag
#
# If (and only if) an error occurred, this function returns a code that
@@ -393,6 +565,9 @@ sub error_diag
$diag[1] = $self->{_ERROR_DIAG};
$diag[2] = 1 + $self->{_ERROR_POS} if exists $self->{_ERROR_POS};
$diag[3] = $self->{_RECNO};
+
+ $diag[0] && $self && $self->{callbacks} && $self->{callbacks}{error} and
+ return $self->{callbacks}{error}->(@diag);
}
my $context = wantarray;
@@ -493,17 +668,17 @@ sub is_binary
sub is_missing
{
my ($self, $idx, $val) = @_;
- ref $self->{_FFLAGS} &&
- $idx >= 0 && $idx < @{$self->{_FFLAGS}} or return;
+ $idx < 0 || !ref $self->{_FFLAGS} and return;
+ $idx >= @{$self->{_FFLAGS}} and return 1;
$self->{_FFLAGS}[$idx] & 0x0010 ? 1 : 0;
} # is_missing
# combine
#
-# object method returning success or failure. the given arguments are
-# combined into a single comma-separated value. failure can be the
-# result of no arguments or an argument containing an invalid character.
-# side-effects include:
+# Object method returning success or failure. The given arguments are
+# combined into a single comma-separated value. Failure can be the
+# result of no arguments or an argument containing an invalid character.
+# side-effects include:
# setting status ()
# setting fields ()
# setting string ()
@@ -522,10 +697,10 @@ sub combine
# parse
#
-# object method returning success or failure. the given argument is
-# expected to be a valid comma-separated value. failure can be the
-# result of no arguments or an argument containing an invalid sequence
-# of characters. side-effects include:
+# Object method returning success or failure. The given argument is
+# expected to be a valid comma-separated value. Failure can be the
+# result of no arguments or an argument containing an invalid sequence
+# of characters. Side-effects include:
# setting status ()
# setting fields ()
# setting meta_info ()
@@ -556,7 +731,7 @@ sub column_names
{
my ($self, @keys) = @_;
@keys or
- return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : undef;
+ return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : ();
@keys == 1 && ! defined $keys[0] and
return $self->{_COLUMN_NAMES} = undef;
@@ -625,24 +800,271 @@ sub print_hr
$self->print ($io, [ map { $hr->{$_} } $self->column_names ]);
} # print_hr
-sub types
+sub fragment
{
- my $self = shift;
- if (@_) {
- if (my $types = shift) {
- $self->{_types} = join "", map { chr $_ } @{$types};
- $self->{types} = $types;
+ my ($self, $io, $spec) = @_;
+
+ my $qd = qr{\s* [0-9]+ \s* }x; # digit
+ my $qs = qr{\s* (?: [0-9]+ | \* ) \s*}x; # digit or star
+ my $qr = qr{$qd (?: - $qs )?}x; # range
+ my $qc = qr{$qr (?: ; $qr )*}x; # list
+ defined $spec && $spec =~ m{^ \s*
+ \x23 ? \s* # optional leading #
+ ( row | col | cell ) \s* =
+ ( $qc # for row and col
+ | $qd , $qd (?: - $qs , $qs)? # for cell (ranges)
+ (?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists
+ ) \s* $}xi or croak ($self->SetDiag (2013));
+ my ($type, $range) = (lc $1, $2);
+
+ my @h = $self->column_names ();
+
+ my @c;
+ if ($type eq "cell") {
+ my @spec;
+ my $min_row;
+ my $max_row = 0;
+ for (split m/\s*;\s*/ => $range) {
+ my ($tlr, $tlc, $brr, $brc) = (m{
+ ^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s*
+ (?: - \s* ([0-9]+ | \*) \s* , \s* ([0-9]+ | \*) \s* )?
+ $}x) or croak ($self->SetDiag (2013));
+ defined $brr or ($brr, $brc) = ($tlr, $tlc);
+ $tlr == 0 || $tlc == 0 ||
+ ($brr ne "*" && ($brr == 0 || $brr < $tlr)) ||
+ ($brc ne "*" && ($brc == 0 || $brc < $tlc))
+ and croak ($self->SetDiag (2013));
+ $tlc--;
+ $brc-- unless $brc eq "*";
+ defined $min_row or $min_row = $tlr;
+ $tlr < $min_row and $min_row = $tlr;
+ $brr eq "*" || $brr > $max_row and
+ $max_row = $brr;
+ push @spec, [ $tlr, $tlc, $brr, $brc ];
+ }
+ my $r = 0;
+ while (my $row = $self->getline ($io)) {
+ ++$r < $min_row and next;
+ my %row;
+ my $lc;
+ foreach my $s (@spec) {
+ my ($tlr, $tlc, $brr, $brc) = @$s;
+ $r < $tlr || ($brr ne "*" && $r > $brr) and next;
+ !defined $lc || $tlc < $lc and $lc = $tlc;
+ my $rr = $brc eq "*" ? $#$row : $brc;
+ $row{$_} = $row->[$_] for $tlc .. $rr;
+ }
+ push @c, [ @row{sort { $a <=> $b } keys %row } ];
+ if (@h) {
+ my %h; @h{@h} = @{$c[-1]};
+ $c[-1] = \%h;
+ }
+ $max_row ne "*" && $r == $max_row and last;
+ }
+ return \@c;
+ }
+
+ # row or col
+ my @r;
+ my $eod = 0;
+ for (split m/\s*;\s*/ => $range) {
+ my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ | \* ))? \s* $/x
+ or croak ($self->SetDiag (2013));
+ $to ||= $from;
+ $to eq "*" and ($to, $eod) = ($from, 1);
+ $from <= 0 || $to <= 0 || $to < $from and croak ($self->SetDiag (2013));
+ $r[$_] = 1 for $from .. $to;
+ }
+
+ my $r = 0;
+ $type eq "col" and shift @r;
+ $_ ||= 0 for @r;
+ while (my $row = $self->getline ($io)) {
+ $r++;
+ if ($type eq "row") {
+ if (($r > $#r && $eod) || $r[$r]) {
+ push @c, $row;
+ if (@h) {
+ my %h; @h{@h} = @{$c[-1]};
+ $c[-1] = \%h;
+ }
+ }
+ next;
+ }
+ push @c, [ map { ($_ > $#r && $eod) || $r[$_] ? $row->[$_] : () } 0..$#$row ];
+ if (@h) {
+ my %h; @h{@h} = @{$c[-1]};
+ $c[-1] = \%h;
+ }
+ }
+
+ return \@c;
+ } # fragment
+
+my $csv_usage = q{usage: my $aoa = csv (in => $file);};
+
+sub _csv_attr
+{
+ my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak;
+
+ $attr{binary} = 1;
+
+ my $enc = delete $attr{encoding} || "";
+ $enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
+
+ my $fh;
+ my $cls = 0; # If I open a file, I have to close it
+ my $in = delete $attr{in} || delete $attr{file} or croak $csv_usage;
+ my $out = delete $attr{out} || delete $attr{file};
+
+ ref $in eq "CODE" || ref $in eq "ARRAY" and $out ||= \*STDOUT;
+
+ if ($out) {
+ $in or croak $csv_usage; # No out without in
+ defined $attr{eol} or $attr{eol} = "\r\n";
+ if ((ref $out and ref $out ne "SCALAR") or "GLOB" eq ref \$out) {
+ $fh = $out;
}
else {
- delete $self->{types};
- delete $self->{_types};
- undef;
+ open $fh, ">$enc", $out or croak "$out: $!";
+ $cls = 1;
+ }
+ }
+
+ if ( ref $in eq "CODE" or ref $in eq "ARRAY") {
+ # All done
+ }
+ elsif (ref $in eq "SCALAR") {
+ # Strings with code points over 0xFF may not be mapped into in-memory file handles
+ # "<$enc" does not change that :(
+ open $fh, "<", $in or croak "Cannot open from SCALAR using PerlIO";
+ $cls = 1;
+ }
+ elsif (ref $in or "GLOB" eq ref \$in) {
+ if (!ref $in && $] < 5.008005) {
+ $fh = \*$in;
+ }
+ else {
+ $fh = $in;
}
}
else {
- $self->{types};
+ open $fh, "<$enc", $in or croak "$in: $!";
+ $cls = 1;
}
- } # types
+ $fh or croak qq{No valid source passed. "in" is required};
+
+ my $hdrs = delete $attr{headers};
+ my $frag = delete $attr{fragment};
+ my $key = delete $attr{key};
+
+ my $cbai = delete $attr{callbacks}{after_in} ||
+ delete $attr{after_in} ||
+ delete $attr{callbacks}{after_parse} ||
+ delete $attr{after_parse};
+ my $cbbo = delete $attr{callbacks}{before_out} ||
+ delete $attr{before_out};
+ my $cboi = delete $attr{callbacks}{on_in} ||
+ delete $attr{on_in};
+
+ defined $attr{auto_diag} or $attr{auto_diag} = 1;
+ my $csv = Text::CSV_XS->new (\%attr) or croak $last_new_err;
+
+ return {
+ csv => $csv,
+ fh => $fh,
+ cls => $cls,
+ in => $in,
+ out => $out,
+ hdrs => $hdrs,
+ key => $key,
+ frag => $frag,
+ cbai => $cbai,
+ cbbo => $cbbo,
+ cboi => $cboi,
+ };
+ } # _csv_attr
+
+sub csv
+{
+ # This is a function, not a method
+ @_ && ref $_[0] ne __PACKAGE__ or croak $csv_usage;
+
+ my $c = _csv_attr (@_);
+
+ my ($csv, $in, $fh, $hdrs) = @{$c}{"csv", "in", "fh", "hdrs"};
+
+ if ($c->{out}) {
+ if (ref $in eq "CODE") {
+ my $hdr = 1;
+ while (my $row = $in->($csv)) {
+ if (ref $row eq "ARRAY") {
+ $csv->print ($fh, $row);
+ next;
+ }
+ if (ref $row eq "HASH") {
+ if ($hdr) {
+ $hdrs ||= [ keys %$row ];
+ $csv->print ($fh, $hdrs);
+ $hdr = 0;
+ }
+ $csv->print ($fh, [ @{$row}{@$hdrs} ]);
+ }
+ }
+ }
+ elsif (ref $in->[0] eq "ARRAY") { # aoa
+ ref $hdrs and $csv->print ($fh, $hdrs);
+ for (@{$in}) {
+ $c->{cboi} and $c->{cboi}->($csv, $_);
+ $c->{cbbo} and $c->{cbbo}->($csv, $_);
+ $csv->print ($fh, $_);
+ }
+ }
+ else { # aoh
+ my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
+ defined $hdrs or $hdrs = "auto";
+ ref $hdrs || $hdrs eq "auto" and $csv->print ($fh, \@hdrs);
+ for (@{$in}) {
+ $c->{cboi} and $c->{cboi}->($csv, $_);
+ $c->{cbbo} and $c->{cbbo}->($csv, $_);
+ $csv->print ($fh, [ @{$_}{@hdrs} ]);
+ }
+ }
+
+ $c->{cls} and close $fh;
+ return 1;
+ }
+
+ ref $in eq "CODE" and croak "CODE only valid fro in when using out";
+
+ my $key = $c->{key} and $hdrs ||= "auto";
+ if (defined $hdrs && !ref $hdrs) {
+ $hdrs eq "skip" and $csv->getline ($fh);
+ $hdrs eq "auto" and $hdrs = $csv->getline ($fh);
+ }
+
+ my $frag = $c->{frag};
+ my $ref = ref $hdrs
+ ? # aoh
+ do {
+ $csv->column_names ($hdrs);
+ $frag ? $csv->fragment ($fh, $frag) :
+ $key ? { map { $_->{$key} => $_ } @{$csv->getline_hr_all ($fh)} }
+ : $csv->getline_hr_all ($fh);
+ }
+ : # aoa
+ $frag ? $csv->fragment ($fh, $frag)
+ : $csv->getline_all ($fh);
+ $ref or Text::CSV_XS->auto_diag;
+ $c->{cls} and close $fh;
+ if ($ref and $c->{cbai} || $c->{cboi}) {
+ for (@{$ref}) {
+ $c->{cbai} and $c->{cbai}->($csv, $_);
+ $c->{cboi} and $c->{cboi}->($csv, $_);
+ }
+ }
+ return $ref;
+ } # csv
1;
@@ -654,6 +1076,14 @@ Text::CSV_XS - comma-separated values manipulation routines
=head1 SYNOPSIS
+ # Functional interface
+ use Text::CSV_XS qw( csv );
+ # Read whole file in memory as array of arrays
+ my $aoa = csv (in => "data.csv");
+ # Write array of arrays as csv file
+ csv (in => $aoa, out => "file.csv", sep_char=> ";");
+
+ # Object interface
use Text::CSV_XS;
my @rows;
@@ -672,53 +1102,54 @@ Text::CSV_XS - comma-separated values manipulation routines
=head1 DESCRIPTION
-Text::CSV_XS provides facilities for the composition and decomposition of
-comma-separated values. An instance of the Text::CSV_XS class will combine
-fields into a CSV string and parse a CSV string into fields.
+Text::CSV_XS provides facilities for the composition and decomposition of
+comma-separated values. An instance of the Text::CSV_XS class will combine
+fields into a C<CSV> string and parse a C<CSV> string into fields.
-The module accepts either strings or files as input and support the use of
+The module accepts either strings or files as input and support the use of
user-specified characters for delimiters, separators, and escapes.
=head2 Embedded newlines
-B<Important Note>: The default behavior is to accept only ASCII characters
-in the range from C<0x20> (space) to C<0x7E> (tilde). This means that
-fields can not contain newlines. If your data contains newlines embedded
-in fields, or characters above 0x7e (tilde), or binary data, you
-B<I<must>> set C<< binary => 1 >> in the call to L</new>. To cover the
-widest range of parsing options, you will always want to set binary.
+B<Important Note>: The default behavior is to accept only ASCII characters
+in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
+fields can not contain newlines. If your data contains newlines embedded in
+fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
+set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
+parsing options, you will always want to set binary.
-But you still have the problem that you have to pass a correct line to the
+But you still have the problem that you have to pass a correct line to the
L</parse> method, which is more complicated from the usual point of usage:
my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
while (<>) { # WRONG!
$csv->parse ($_);
my @fields = $csv->fields ();
+ }
-will break, as the while might read broken lines, as that does not care
-about the quoting. If you need to support embedded newlines, the way to go
-is to B<not> pass C<eol> in the parser (it accepts C<\n>, C<\r>, B<and>
-C<\r\n> by default) and then
+this will break, as the C<while> might read broken lines: it does not care
+about the quoting. If you need to support embedded newlines, the way to go
+is to B<not> pass L<C<eol>|/eol> in the parser (it accepts C<\n>, C<\r>,
+B<and> C<\r\n> by default) and then
my $csv = Text::CSV_XS->new ({ binary => 1 });
open my $io, "<", $file or die "$file: $!";
while (my $row = $csv->getline ($io)) {
my @fields = @$row;
+ }
The old(er) way of using global file handles is still supported
- while (my $row = $csv->getline (*ARGV)) {
+ while (my $row = $csv->getline (*ARGV)) { ... }
=head2 Unicode
Unicode is only tested to work with perl-5.8.2 and up.
-On parsing (both for L</getline> and L</parse>), if the source is marked
-being UTF8, then all fields that are marked binary will also be marked
-UTF8.
+On parsing (both for L</getline> and L</parse>), if the source is marked
+being UTF8, then all fields that are marked binary will also be marked UTF8.
-For complete control over encoding, please use Text::CSV::Encoded:
+For complete control over encoding, please use L<Text::CSV::Encoded>:
use Text::CSV::Encoded;
my $csv = Text::CSV::Encoded->new ({
@@ -734,47 +1165,48 @@ For complete control over encoding, please use Text::CSV::Encoded:
# combine () and print () accept UTF8 marked data
# parse () and getline () return UTF8 marked data
-On combining (L</print> and L</combine>), if any of the combining fields
-was marked UTF8, the resulting string will be marked UTF8. Note however
-that all fields I<before> the first field that was marked UTF8 and
-contained 8-bit characters that were not upgraded to UTF8, these will be
-bytes in the resulting string too, causing errors. If you pass data of
-different encoding, or you don't know if there is different encoding, force
-it to be upgraded before you pass them on:
+On combining (L</print> and L</combine>): if any of the combining fields
+was marked UTF8, the resulting string will be marked as UTF8. Note however
+that all fields I<before> the first field marked UTF8 and contained 8-bit
+characters that were not upgraded to UTF8, these will be C<bytes> in the
+resulting string too, possibly causing unexpected errors. If you pass data
+of different encoding, or you don't know if there is different encoding,
+force it to be upgraded before you pass them on:
$csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
=head1 SPECIFICATION
-While no formal specification for CSV exists, RFC 4180 1) describes a
-common format and establishes "text/csv" as the MIME type registered with
-the IANA.
+While no formal specification for CSV exists, RFC 4180 I<1>) describes the
+common format and establishes C<text/csv> as the MIME type registered with
+the IANA. RFC 7111 I<2> adds fragments to CSV.
-Many informal documents exist that describe the CSV format. How To: The
-Comma Separated Value (CSV) File Format 2) provides an overview of the CSV
-format in the most widely used applications and explains how it can best be
-used and supported.
+Many informal documents exist that describe the C<CSV> format. "How To: The
+Comma Separated Value (CSV) File Format" I<3>) provides an overview of the
+C<CSV> format in the most widely used applications and explains how it can
+best be used and supported.
1) http://tools.ietf.org/html/rfc4180
- 2) http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
+ 2) http://tools.ietf.org/html/rfc7111
+ 3) http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
The basic rules are as follows:
-B<CSV> is a delimited data format that has fields/columns separated by the
+B<CSV> is a delimited data format that has fields/columns separated by the
comma character and records/rows separated by newlines. Fields that contain
-a special character (comma, newline, or double quote), must be enclosed in
-double quotes. However, if a line contains a single entry that is the
-empty string, it may be enclosed in double quotes. If a field's value
-contains a double quote character it is escaped by placing another double
-quote character next to it. The CSV file format does not require a specific
+a special character (comma, newline, or double quote), must be enclosed in
+double quotes. However, if a line contains a single entry that is the empty
+string, it may be enclosed in double quotes. If a field's value contains a
+double quote character it is escaped by placing another double quote
+character next to it. The C<CSV> file format does not require a specific
character encoding, byte order, or line terminator format.
=over 2
=item *
-Each record is a single line ended by a line feed (ASCII/LF=0x0A) or a
-carriage return and line feed pair (ASCII/CRLF=0x0D 0x0A), however,
+Each record is a single line ended by a line feed (ASCII/C<LF>=C<0x0A>) or
+a carriage return and line feed pair (ASCII/C<CRLF>=C<0x0D 0x0A>), however,
line-breaks may be embedded.
=item *
@@ -783,18 +1215,18 @@ Fields are separated by commas.
=item *
-Allowable characters within a CSV field include 0x09 (tab) and the
-inclusive range of 0x20 (space) through 0x7E (tilde). In binary mode all
-characters are accepted, at least in quoted fields.
+Allowable characters within a C<CSV> field include C<0x09> (C<TAB>) and the
+inclusive range of C<0x20> (space) through C<0x7E> (tilde). In binary mode
+all characters are accepted, at least in quoted fields.
=item *
-A field within CSV must be surrounded by double-quotes to contain a the
+A field within C<CSV> must be surrounded by double-quotes to contain a
separator character (comma).
=back
-Though this is the most clear and restrictive definition, Text::CSV_XS is
+Though this is the most clear and restrictive definition, Text::CSV_XS is
way more liberal than this, and allows extension:
=over 2
@@ -806,32 +1238,32 @@ Line termination by a single carriage return is accepted by default
=item *
The separation-, escape-, and escape- characters can be any ASCII character
-in the range from 0x20 (space) to 0x7E (tilde). Characters outside this
-range may or may not work as expected. Multibyte characters, like U+060c
-(ARABIC COMMA), U+FF0C (FULLWIDTH COMMA), U+241B (SYMBOL FOR ESCAPE),
-U+2424 (SYMBOL FOR NEWLINE), U+FF02 (FULLWIDTH QUOTATION MARK), and U+201C
-(LEFT DOUBLE QUOTATION MARK) (to give some examples of what might look
-promising) are therefor not allowed.
-
-If you use perl-5.8.2 or higher, these three attributes are utf8-decoded,
-to increase the likelihood of success. This way U+00FE will be allowed as a
+in the range from C<0x20> (space) to C<0x7E> (tilde). Characters outside
+this range may or may not work as expected. Multibyte characters, like UTF
+C<U+060C> (ARABIC COMMA), C<U+FF0C> (FULLWIDTH COMMA), C<U+241B> (SYMBOL
+FOR ESCAPE), C<U+2424> (SYMBOL FOR NEWLINE), C<U+FF02> (FULLWIDTH QUOTATION
+MARK), and C<U+201C> (LEFT DOUBLE QUOTATION MARK) (to give some examples of
+what might look promising) are therefore not allowed.
+
+If you use perl-5.8.2 or higher these three attributes are utf8-decoded, to
+increase the likelihood of success. This way C<U+00FE> will be allowed as a
quote character.
=item *
-A field within CSV must be surrounded by double-quotes to contain an
-embedded double-quote, represented by a pair of consecutive double-quotes.
-In binary mode you may additionally use the sequence C<"0> for
-representation of a NULL byte.
+A field in C<CSV> must be surrounded by double-quotes to make an embedded
+double-quote, represented by a pair of consecutive double-quotes, valid. In
+binary mode you may additionally use the sequence C<"0> for representation
+of a NULL byte. Using C<0x00> in binary mode is just as valid.
=item *
-Several violations of the above specification may be allowed by passing
-options to the object creator.
+Several violations of the above specification may be lifted by passing some
+options as attributes to the object constructor.
=back
-=head1 FUNCTIONS
+=head1 METHODS
=head2 version
X<version>
@@ -841,8 +1273,8 @@ X<version>
=head2 new
X<new>
-(Class method) Returns a new instance of Text::CSV_XS. The objects
-attributes are described by the (optional) hash ref C<\%attr>.
+(Class method) Returns a new instance of class Text::CSV_XS. The attributes
+are described by the (optional) hash ref C<\%attr>.
my $csv = Text::CSV_XS->new ({ attributes ... });
@@ -853,307 +1285,430 @@ The following attributes are available:
=item eol
X<eol>
-An end-of-line string to add to rows.
+ my $csv = Text::CSV_XS->new ({ eol => $/ });
+ $csv->eol (undef);
+ my $eol = $csv->eol;
-When not passed in a B<parser> instance, the default behavior is to accept
+The end-of-line string to add to rows for L</print> or the record separator
+for L</getline>.
+
+When not passed in a B<parser> instance, the default behavior is to accept
C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
all. Passing C<undef> or the empty string behave the same.
-Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
-(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
-Return). The C<eol> attribute cannot exceed 7 (ASCII) characters.
+When not passed in a B<generating> instance, records are not terminated at
+all, so it is probably wise to pass something you expect. A safe choice for
+C<eol> on output is either C<$/> or C<\r\n>.
+
+Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
+(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
+Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters.
-If both C<$/> and C<eol> equal C<"\015">, parsing lines that end on only a
-Carriage Return without Line Feed, will be L</parse>d correct.
+If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on
+only a Carriage Return without Line Feed, will be L</parse>d correct.
=item sep_char
X<sep_char>
+ my $csv = Text::CSV_XS->new ({ sep_char => ";" });
+ $csv->sep_char (";");
+ my $c = $csv->sep_char;
+
The char used to separate fields, by default a comma. (C<,>). Limited to a
-single-byte character, usually in the range from 0x20 (space) to 0x7e
-(tilde).
+single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
+(tilde). When longer sequences are required, use L<C<sep>|/sep>.
-The separation character can not be equal to the quote character. The
-separation character can not be equal to the escape character.
+The separation character can not be equal to the quote character or to the
+escape character.
See also L</CAVEATS>
-=item allow_whitespace
-X<allow_whitespace>
+=item sep
+X<sep>
-When this option is set to true, whitespace (TAB's and SPACE's) surrounding
-the separation character is removed when parsing. If either TAB or SPACE is
-one of the three major characters C<sep_char>, C<quote_char>, or
-C<escape_char> it will not be considered whitespace.
+ my $csv = Text::CSV_XS->new ({ sep => "\N{FULLWIDTH COMMA}" });
+ $csv->sep (";");
+ my $sep = $csv->sep;
-Now lines like:
+The chars used to separate fields, by default undefined. Limited to 8 bytes.
- 1 , "foo" , bar , 3 , zapp
+When set, overrules L<C<sep_char>|/sep_char>. If its length is one byte it
+acts as an alias to L<C<sep_char>|/sep_char>.
-are correctly parsed, even though it violates the CSV specs.
+See also L</CAVEATS>
-Note that B<all> whitespace is stripped from start and end of each field.
-That would make it more a I<feature> than a way to enable parsing bad CSV
-lines, as
+=item quote_char
+X<quote_char>
- 1, 2.0, 3, ape , monkey
+ my $csv = Text::CSV_XS->new ({ quote_char => "'" });
+ $csv->quote_char (undef);
+ my $c = $csv->quote_char;
-will now be parsed as
+The character to quote fields containing blanks or binary data, by default
+the double quote character (C<">). A value of undef suppresses quote chars
+(for simple cases only). Limited to a single-byte character, usually in the
+range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
+required, use L<C<quote>|/quote>.
- ("1", "2.0", "3", "ape", "monkey")
+C<quote_char> can not be equal to L<C<sep_char>|/sep_char>.
-even if the original line was perfectly sane CSV.
+=item quote
+X<quote>
-=item blank_is_undef
-X<blank_is_undef>
+ my $csv = Text::CSV_XS->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
+ $csv->quote ("'");
+ my $quote = $csv->quote;
-Under normal circumstances, CSV data makes no distinction between quoted-
-and unquoted empty fields. These both end up in an empty string field once
-read, thus
+The chars used to quote fields, by default undefined. Limited to 8 bytes.
- 1,"",," ",2
+When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte
+it acts as an alias to L<C<quote_char>|/quote_char>.
-is read as
+See also L</CAVEATS>
- ("1", "", "", " ", "2")
+=item escape_char
+X<escape_char>
-When I<writing> CSV files with C<always_quote> set, the unquoted empty
-field is the result of an undefined value. To make it possible to also make
-this distinction when reading CSV data, the C<blank_is_undef> option will
-cause unquoted empty fields to be set to undef, causing the above to be
-parsed as
+ my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
+ $csv->escape_char (undef);
+ my $c = $csv->escape_char;
- ("1", "", undef, " ", "2")
+The character to escape certain characters inside quoted fields. This is
+limited to a single-byte character, usually in the range from C<0x20>
+(space) to C<0x7E> (tilde).
-=item empty_is_undef
-X<empty_is_undef>
+The C<escape_char> defaults to being the double-quote mark (C<">). In other
+words the same as the default L<C<quote_char>|/quote_char>. This means that
+doubling the quote mark in a field escapes it:
-Going one step further than C<blank_is_undef>, this attribute converts all
-empty fields to undef, so
+ "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
- 1,"",," ",2
+If you change the L<C<quote_char>|/quote_char> without changing the
+C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
+If instead you want to escape the L<C<quote_char>|/quote_char> by doubling
+it you will need to also change the C<escape_char> to be the same as what
+you have changed the L<C<quote_char>|/quote_char> to.
-is read as
+The escape character can not be equal to the separation character.
- (1, undef, undef, " ", 2)
+=item binary
+X<binary>
-Note that this effects only fields that are I<really> empty, not fields
-that are empty after stripping allowed whitespace. YMMV.
+ my $csv = Text::CSV_XS->new ({ binary => 1 });
+ $csv->binary (0);
+ my $f = $csv->binary;
-=item quote_char
-X<quote_char>
+If this attribute is C<1>, you may use binary characters in quoted fields,
+including line feeds, carriage returns and C<NULL> bytes. (The latter could
+be escaped as C<"0>.) By default this feature is off.
-The character to quote fields containing blanks, by default the double
-quote character (C<">). A value of undef suppresses quote chars (for simple
-cases only). Limited to a single-byte character, usually in the range from
-0x20 (space) to 0x7e (tilde).
+If a string is marked UTF8, C<binary> will be turned on automatically when
+binary characters other than C<CR> and C<NL> are encountered. Note that a
+simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
+so setting C<< { binary => 1 } >> is still a wise option.
-The quote character can not be equal to the separation character.
+=item decode_utf8
+X<decode_utf8>
-=item allow_loose_quotes
-X<allow_loose_quotes>
+ my $csv = Text::CSV_XS->new ({ decode_utf8 => 1 });
+ $csv->decode_utf8 (0);
+ my $f = $csv->decode_utf8;
-By default, parsing fields that have C<quote_char> characters inside an
-unquoted field, like
+This attributes defaults to TRUE.
- 1,foo "bar" baz,42
+While I<parsing>, fields that are valid UTF-8, are automatically set to be
+UTF-8, so that
-would result in a parse error. Though it is still bad practice to allow
-this format, we cannot help the fact some vendors make their applications
-spit out lines styled that way.
+ $csv->parse ("\xC4\xA8\n");
-If there is B<really> bad CSV data, like
+results in
- 1,"foo "bar" baz",42
+ PV("\304\250"\0) [UTF8 "\x{128}"]
-or
+Sometimes it might not be a desired action. To prevent those upgrades, set
+this attribute to false, and the result will be
- 1,""foo bar baz"",42
+ PV("\304\250"\0)
-there is a way to get that parsed, and leave the quotes inside the quoted
-field as-is. This can be achieved by setting C<allow_loose_quotes> B<AND>
-making sure that the C<escape_char> is I<not> equal to C<quote_char>.
+=item auto_diag
+X<auto_diag>
-=item escape_char
-X<escape_char>
+ my $csv = Text::CSV_XS->new ({ auto_diag => 1 });
+ $csv->auto_diag (2);
+ my $l = $csv->auto_diag;
-The character to escape certain characters inside quoted fields. Limited
-to a single-byte character, usually in the range from 0x20 (space) to 0x7e
-(tilde).
+Set this attribute to a number between C<1> and C<9> causes L</error_diag>
+to be automatically called in void context upon errors.
-The C<escape_char> defaults to being the literal double-quote mark (C<">)
-in other words, the same as the default C<quote_char>. This means that
-doubling the quote mark in a field escapes it:
+In case of error C<2012 - EOF>, this call will be void.
- "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
+If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
+on errors instead of C<warn>. If set to anything unrecognized, it will be
+silently ignored.
-If you change the default quote_char without changing the default
-escape_char, the escape_char will still be the quote mark. If instead you
-want to escape the quote_char by doubling it, you will need to change the
-escape_char to be the same as what you changed the quote_char to.
+Future extensions to this feature will include more reliable auto-detection
+of C<autodie> being active in the scope of which the error occurred which
+will increment the value of C<auto_diag> with C<1> the moment the error is
+detected.
-The escape character can not be equal to the separation character.
+=item diag_verbose
+X<diag_verbose>
-=item allow_loose_escapes
-X<allow_loose_escapes>
+ my $csv = Text::CSV_XS->new ({ diag_verbose => 1 });
+ $csv->diag_verbose (2);
+ my $l = $csv->diag_verbose;
-By default, parsing fields that have C<escape_char> characters that escape
-characters that do not need to be escaped, like:
+Set the verbosity of the output triggered by C<auto_diag>. Currently only
+adds the current input-record-number (if known) to the diagnostic output
+with an indication of the position of the error.
- my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
- $csv->parse (qq{1,"my bar\'s",baz,42});
+=item blank_is_undef
+X<blank_is_undef>
-would result in a parse error. Though it is still bad practice to allow
-this format, this option enables you to treat all escape character
-sequences equal.
+ my $csv = Text::CSV_XS->new ({ blank_is_undef => 1 });
+ $csv->blank_is_undef (0);
+ my $f = $csv->blank_is_undef;
-=item allow_unquoted_escape
-X<allow_unquoted_escape>
+Under normal circumstances, C<CSV> data makes no distinction between quoted-
+and unquoted empty fields. These both end up in an empty string field once
+read, thus
-There is a backward compatibility issue in that the escape character, when
-differing from the quotation character, cannot be on the first position of
-a field. e.g. with C<quote_char> equal to the default C<"> and
-C<escape_char> set to C<\>, this would be illegal:
+ 1,"",," ",2
- 1,\0,2
+is read as
-To overcome issues with backward compatibility, you can allow this by
-setting this attribute to 1.
+ ("1", "", "", " ", "2")
-=item binary
-X<binary>
+When I<writing> C<CSV> files with L<C<always_quote>|/always_quote> set, the
+unquoted I<empty> field is the result of an undefined value. To enable this
+distinction when I<reading> C<CSV> data, the C<blank_is_undef> attribute
+will cause unquoted empty fields to be set to C<undef>, causing the above
+to be parsed as
-If this attribute is TRUE, you may use binary characters in quoted fields,
-including line feeds, carriage returns and NULL bytes. (The latter must be
-escaped as C<"0>.) By default this feature is off.
+ ("1", "", undef, " ", "2")
-If a string is marked UTF8, binary will be turned on automatically when
-binary characters other than CR or NL are encountered. Note that a simple
-string like C<"\x{00a0}"> might still be binary, but not marked UTF8, so
-setting C<{ binary => 1 }> is still a wise option.
+=item empty_is_undef
+X<empty_is_undef>
-=item decode_utf8
-X<decode_utf8>
+ my $csv = Text::CSV_XS->new ({ empty_is_undef => 1 });
+ $csv->empty_is_undef (0);
+ my $f = $csv->empty_is_undef;
-This attributes defaults to TRUE.
+Going one step further than L<C<blank_is_undef>|/blank_is_undef>, this
+attribute converts all empty fields to C<undef>, so
-While parsing, fields that are valid UTF-8, are automatically set to be
-UTF-8, so that
+ 1,"",," ",2
- $csv->parse ("\xC4\xA8\n");
+is read as
-results in
+ (1, undef, undef, " ", 2)
- PV("\304\250"\0) [UTF8 "\x{128}"]
+Note that this effects only fields that are originally empty, not fields
+that are empty after stripping allowed whitespace. YMMV.
-Sometimes it might not be a desired action. To prevent those upgrades,
-set this attribute to false, and the result will be
+=item allow_whitespace
+X<allow_whitespace>
- PV("\304\250"\0)
+ my $csv = Text::CSV_XS->new ({ allow_whitespace => 1 });
+ $csv->allow_whitespace (0);
+ my $f = $csv->allow_whitespace;
-=item types
-X<types>
+When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
+surrounding the separation character is removed when parsing. If either
+C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>,
+L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not
+be considered whitespace.
+
+Now lines like:
+
+ 1 , "foo" , bar , 3 , zapp
+
+are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
+
+Note that B<all> whitespace is stripped from both start and end of each
+field. That would make it I<more> than a I<feature> to enable parsing bad
+C<CSV> lines, as
+
+ 1, 2.0, 3, ape , monkey
+
+will now be parsed as
+
+ ("1", "2.0", "3", "ape", "monkey")
+
+even if the original line was perfectly acceptable C<CSV>.
+
+=item allow_loose_quotes
+X<allow_loose_quotes>
-A set of column types; this attribute is immediately passed to the
-L</types> method. You must not set this attribute otherwise, except for
-using the L</types> method.
+ my $csv = Text::CSV_XS->new ({ allow_loose_quotes => 1 });
+ $csv->allow_loose_quotes (0);
+ my $f = $csv->allow_loose_quotes;
+
+By default, parsing unquoted fields containing L<C<quote_char>|/quote_char>
+characters like
+
+ 1,foo "bar" baz,42
+
+would result in parse error 2034. Though it is still bad practice to allow
+this format, we cannot help the fact that some vendors make their
+applications spit out lines styled this way.
+
+If there is B<really> bad C<CSV> data, like
+
+ 1,"foo "bar" baz",42
+
+or
+
+ 1,""foo bar baz"",42
+
+there is a way to get this data-line parsed and leave the quotes inside the
+quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
+B<AND> making sure that the L<C<escape_char>|/escape_char> is I<not> equal
+to L<C<quote_char>|/quote_char>.
+
+=item allow_loose_escapes
+X<allow_loose_escapes>
+
+ my $csv = Text::CSV_XS->new ({ allow_loose_escapes => 1 });
+ $csv->allow_loose_escapes (0);
+ my $f = $csv->allow_loose_escapes;
+
+Parsing fields that have L<C<escape_char>|/escape_char> characters that
+escape characters that do not need to be escaped, like:
+
+ my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
+ $csv->parse (qq{1,"my bar\'s",baz,42});
+
+would result in parse error 2025. Though it is bad practice to allow this
+format, this attribute enables you to treat all escape character sequences
+equal.
+
+=item allow_unquoted_escape
+X<allow_unquoted_escape>
+
+ my $csv = Text::CSV_XS->new ({ allow_unquoted_escape => 1 });
+ $csv->allow_unquoted_escape (0);
+ my $f = $csv->allow_unquoted_escape;
+
+A backward compatibility issue where L<C<escape_char>|/escape_char> differs
+from L<C<quote_char>|/quote_char> prevents L<C<escape_char>|/escape_char>
+to be in the first position of a field. If L<C<quote_char>|/quote_char> is
+equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>,
+this would be illegal:
+
+ 1,\0,2
+
+Setting this attribute to C<1> might help to overcome issues with backward
+compatibility and allow this style.
=item always_quote
X<always_quote>
-By default the generated fields are quoted only if they need to be. For
+ my $csv = Text::CSV_XS->new ({ always_quote => 1 });
+ $csv->always_quote (0);
+ my $f = $csv->always_quote;
+
+By default the generated fields are quoted only if they I<need> to be. For
example, if they contain the separator character. If you set this attribute
-to a TRUE value, then all defined fields will be quoted. (C<undef> fields
-are not quoted, see L</blank_is_undef>)). This is typically easier to
-handle in external applications. (Poor creatures who are not using
-Text::CSV_XS. :-)
+to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
+quoted, see L</blank_is_undef>). This makes it quite often easier to handle
+exported data in external applications. (Poor creatures who are better to
+use Text::CSV_XS. :)
=item quote_space
X<quote_space>
-By default, a space in a field would trigger quotation. As no rule exists
-this to be forced in CSV, nor any for the opposite, the default is true for
-safety. You can exclude the space from this trigger by setting this
+ my $csv = Text::CSV_XS->new ({ quote_space => 1 });
+ $csv->quote_space (0);
+ my $f = $csv->quote_space;
+
+By default, a space in a field would trigger quotation. As no rule exists
+this to be forced in C<CSV>, nor any for the opposite, the default is true
+for safety. You can exclude the space from this trigger by setting this
attribute to 0.
=item quote_null
X<quote_null>
-By default, a NULL byte in a field would be escaped. This attribute enables
-you to treat the NULL byte as a simple binary character in binary mode (the
-C<< { binary => 1 } >> is set). The default is true. You can prevent NULL
-escapes by setting this attribute to 0.
+ my $csv = Text::CSV_XS->new ({ quote_null => 1 });
+ $csv->quote_null (0);
+ my $f = $csv->quote_null;
+
+By default, a C<NULL> byte in a field would be escaped. This option enables
+you to treat the C<NULL> byte as a simple binary character in binary mode
+(the C<< { binary => 1 } >> is set). The default is true. You can prevent
+C<NULL> escapes by setting this attribute to C<0>.
=item quote_binary
X<quote_binary>
+ my $csv = Text::CSV_XS->new ({ quote_binary => 1 });
+ $csv->quote_binary (0);
+ my $f = $csv->quote_binary;
+
By default, all "unsafe" bytes inside a string cause the combined field to
-be quoted. By setting this attribute to 0, you can disable that trigger for
-bytes >= 0x7f.
+be quoted. By setting this attribute to C<0>, you can disable that trigger
+for bytes >= C<0x7F>.
=item keep_meta_info
X<keep_meta_info>
-By default, the parsing of input lines is as simple and fast as possible.
-However, some parsing information - like quotation of the original field -
-is lost in that process. Set this flag to true to enable retrieving that
-information after parsing with the methods L</meta_info>, L</is_quoted>,
-and L</is_binary> described below. Default is false.
+ my $csv = Text::CSV_XS->new ({ keep_meta_info => 1 });
+ $csv->keep_meta_info (0);
+ my $f = $csv->keep_meta_info;
+
+By default, the parsing of input records is as simple and fast as possible.
+However, some parsing information - like quotation of the original field -
+is lost in that process. Setting this flag to true enables retrieving that
+information after parsing with the methods L</meta_info>, L</is_quoted>,
+and L</is_binary> described below. Default is false for performance.
=item verbatim
X<verbatim>
-This is a quite controversial attribute to set, but it makes hard things
+ my $csv = Text::CSV_XS->new ({ verbatim => 1 });
+ $csv->verbatim (0);
+ my $f = $csv->verbatim;
+
+This is a quite controversial attribute to set, but makes some hard things
possible.
-The basic thought behind this is to tell the parser that the normally
-special characters newline (NL) and Carriage Return (CR) will not be
-special when this flag is set, and be dealt with as being ordinary binary
+The rationale behind this attribute is to tell the parser that the normally
+special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
+special when this flag is set, and be dealt with as being ordinary binary
characters. This will ease working with data with embedded newlines.
-When C<verbatim> is used with L</getline>, L</getline> auto-chomp's every
-line.
+When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
+every line.
Imagine a file format like
M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
-where, the line ending is a very specific "#\r\n", and the sep_char is a ^
-(caret). None of the fields is quoted, but embedded binary data is likely
-to be present. With the specific line ending, that should not be too hard
-to detect.
+where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
+C<^> (caret). None of the fields is quoted, but embedded binary data is
+likely to be present. With the specific line ending, this should not be too
+hard to detect.
-By default, Text::CSV_XS' parse function is instructed to only know about
-"\n" and "\r" to be legal line endings, and so has to deal with the
-embedded newline as a real end-of-line, so it can scan the next line if
-binary is true, and the newline is inside a quoted field. With this
-attribute, we tell parse () to parse the line as if "\n" is just nothing
-more than a binary character.
+By default, Text::CSV_XS' parse function is instructed to only know about
+C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
+embedded newline as a real C<end-of-line>, so it can scan the next line if
+binary is true, and the newline is inside a quoted field. With this option,
+we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
+a binary character.
-For parse () this means that the parser has no idea about line ending
-anymore, and getline () chomps line endings on reading.
+For L</parse> this means that the parser has no more idea about line ending
+and L</getline> C<chomp>s line endings on reading.
-=item auto_diag
-X<auto_diag>
-
-Set to a true number between 1 and 9 will cause L</error_diag> to be
-automatically be called in void context upon errors.
-
-In case of error C<2012 - EOF>, this call will be void.
+=item types
-If set to a value greater than 1, it will die on errors instead of warn.
-If set to anything unsupported, it will be silently ignored.
+A set of column types; the attribute is immediately passed to the L</types>
+method.
-Future extensions to this feature will include more reliable auto-detection
-of the C<autodie> module being enabled, which will raise the value of
-C<auto_diag> with C<1> on the moment the error is detected.
+=item callbacks
+X<callbacks>
-=item diag_verbose
-X<diag_verbose>
-
-Set the verbosity of the C<auto_diag> output. Currently only adds the
-current input line (if known) to the diagnostic output with an indication
-of the position of the error.
+See the L</Callbacks> section below.
=back
@@ -1164,26 +1719,30 @@ To sum it up,
is equivalent to
$csv = Text::CSV_XS->new ({
+ eol => undef, # \r, \n, or \r\n
+ sep_char => ',',
+ sep => undef,
quote_char => '"',
+ quote => undef,
escape_char => '"',
- sep_char => ',',
- eol => $\,
- always_quote => 0,
- quote_space => 1,
- quote_null => 1,
- quote_binary => 1,
binary => 0,
decode_utf8 => 1,
- keep_meta_info => 0,
+ auto_diag => 0,
+ diag_verbose => 0,
+ blank_is_undef => 0,
+ empty_is_undef => 0,
+ allow_whitespace => 0,
allow_loose_quotes => 0,
allow_loose_escapes => 0,
allow_unquoted_escape => 0,
- allow_whitespace => 0,
- blank_is_undef => 0,
- empty_is_undef => 0,
+ always_quote => 0,
+ quote_space => 1,
+ quote_null => 1,
+ quote_binary => 1,
+ keep_meta_info => 0,
verbatim => 0,
- auto_diag => 0,
- diag_verbose => 0,
+ types => undef,
+ callbacks => undef,
});
For all of the above mentioned flags, an accessor method is available where
@@ -1192,11 +1751,11 @@ you can inquire the current value, or change the value
my $quote = $csv->quote_char;
$csv->binary (1);
-It is unwise to change these settings halfway through writing CSV data to a
-stream. If however, you want to create a new stream using the available CSV
-object, there is no harm in changing them.
+It is not wise to change these settings halfway through writing C<CSV> data
+to a stream. If however you want to create a new stream using the available
+C<CSV> object, there is no harm in changing them.
-If the L</new> constructor call fails, it returns C<undef>, and makes the
+If the L</new> constructor call fails, it returns C<undef>, and makes the
fail reason available through the L</error_diag> method.
$csv = Text::CSV_XS->new ({ ecs_char => 1 }) or
@@ -1211,17 +1770,17 @@ X<print>
$status = $csv->print ($io, $colref);
-Similar to L</combine> + L</string> + L</print>, but way more efficient. It
-expects an array ref as input (not an array!) and the resulting string is
-not really created, but immediately written to the I<$io> object, typically
-an IO handle or any other object that offers a L</print> method.
+Similar to L</combine> + L</string> + L</print>, but much more efficient.
+It expects an array ref as input (not an array!) and the resulting string
+is not really created, but immediately written to the C<$io> object,
+typically an IO handle or any other object that offers a L</print> method.
-For performance reasons the print method does not create a result string.
-In particular the L</string>, L</status>, L</fields>, and L</error_input>
-methods are meaningless after executing this method.
+For performance reasons C<print> does not create a result string, so all
+L</string>, L</status>, L</fields>, and L</error_input> methods will return
+undefined information after executing this method.
-If C<$colref> is C<undef> (explicit, not through a variable argument) and
-L</bind_columns> was used to specify fields to be printed, it is possible
+If C<$colref> is C<undef> (explicit, not through a variable argument) and
+L</bind_columns> was used to specify fields to be printed, it is possible
to make performance improvements, as otherwise data would have to be copied
as arguments to the method call:
@@ -1233,44 +1792,57 @@ A short benchmark
my @data = ("aa" .. "zz");
$csv->bind_columns (\(@data));
- $csv->print ($io, [ @data ]); # 10800 recs/sec
- $csv->print ($io, \@data ); # 57100 recs/sec
- $csv->print ($io, undef ); # 50500 recs/sec
+ $csv->print ($io, [ @data ]); # 11800 recs/sec
+ $csv->print ($io, \@data ); # 57600 recs/sec
+ $csv->print ($io, undef ); # 48500 recs/sec
+
+=head2 print_hr
+X<print_hr>
+
+ $csv->print_hr ($io, $ref);
+
+Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
+provided the column names are set with L</column_names>.
+
+It is just a wrapper method with basic parameter checks over
+
+ $csv->print ($io, [ map { $ref->{$_} } $csv->column_names ]);
=head2 combine
X<combine>
$status = $csv->combine (@columns);
-This object function constructs a CSV string from the arguments, returning
-success or failure. Failure can result from lack of arguments or an
-argument containing an invalid character. Upon success, L</string> can be
-called to retrieve the resultant CSV string. Upon failure, the value
-returned by L</string> is undefined and L</error_input> can be called to
-retrieve an invalid argument.
+This method constructs a C<CSV> string from C<@columns>, returning success
+or failure. Failure can result from lack of arguments or an argument that
+contains an invalid character. Upon success, L</string> can be called to
+retrieve the resultant C<CSV> string. Upon failure, the value returned by
+L</string> is undefined and L</error_input> could be called to retrieve the
+invalid argument.
=head2 string
X<string>
$line = $csv->string ();
-This object function returns the input to L</parse> or the resultant CSV
-string of L</combine>, whichever was called more recently.
+This method returns the input to L</parse> or the resultant C<CSV> string
+of L</combine>, whichever was called more recently.
=head2 getline
X<getline>
$colref = $csv->getline ($io);
-This is the counterpart to L</print>, as L</parse> is the counterpart to
-L</combine>: It reads a row from the IO object using C<< $io->getline >>
-and parses this row into an array ref. This array ref is returned by the
-function or undef for failure.
+This is the counterpart to L</print>, as L</parse> is the counterpart to
+L</combine>: it parses a row from the C<$io> handle using the L</getline>
+method associated with C<$io> and parses this row into an array ref. This
+array ref is returned by the function or C<undef> for failure. When C<$io>
+does not support C<getline>, you are likely to hit errors.
-When fields are bound with L</bind_columns>, the return value is a
-reference to an empty list.
+When fields are bound with L</bind_columns> the return value is a reference
+to an empty list.
-The L</string>, L</fields>, and L</status> methods are meaningless, again.
+The L</string>, L</fields>, and L</status> methods are meaningless again.
=head2 getline_all
X<getline_all>
@@ -1280,8 +1852,8 @@ X<getline_all>
$arrayref = $csv->getline_all ($io, $offset, $length);
This will return a reference to a list of L<getline ($io)|/getline> results.
-In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
-with C<splice>, only the last C<abs ($offset)> records of C<$io> are taken
+In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
+with C<splice>, only the last C<abs ($offset)> records of C<$io> are taken
into consideration.
Given a CSV file with 10 lines:
@@ -1297,26 +1869,11 @@ Given a CSV file with 10 lines:
8..9 $csv->getline_all ($io, -2) # last 2 rows
6..7 $csv->getline_all ($io, -4, 2) # first 2 of last 4 rows
-=head2 parse
-X<parse>
-
- $status = $csv->parse ($line);
-
-This object function decomposes a CSV string into fields, returning success
-or failure. Failure can result from a lack of argument or the given CSV
-string is improperly formatted. Upon success, L</fields> can be called to
-retrieve the decomposed fields . Upon failure, the value returned by
-L</fields> is undefined and L</error_input> can be called to retrieve the
-invalid argument.
-
-You may use the L</types> method for setting column types. See L</types>'
-description below.
-
=head2 getline_hr
X<getline_hr>
-The L</getline_hr> and L</column_names> methods work together to allow you
-to have rows returned as hashrefs. You must call L</column_names> first to
+The L</getline_hr> and L</column_names> methods work together to allow you
+to have rows returned as hashrefs. You must call L</column_names> first to
declare your column names.
$csv->column_names (qw( code name price description ));
@@ -1325,8 +1882,8 @@ declare your column names.
L</getline_hr> will croak if called before L</column_names>.
-Note that L</getline_hr> creates a hashref for every row and will be much
-slower than the combined use of L</bind_columns> and L</getline> but still
+Note that L</getline_hr> creates a hashref for every row and will be much
+slower than the combined use of L</bind_columns> and L</getline> but still
offering the same ease of use hashref inside the loop:
my @cols = @{$csv->getline ($io)};
@@ -1358,41 +1915,135 @@ X<getline_hr_all>
$arrayref = $csv->getline_hr_all ($io, $offset);
$arrayref = $csv->getline_hr_all ($io, $offset, $length);
-This will return a reference to a list of L<getline_hr ($io)|/getline_hr>
-results. In this call, C<keep_meta_info> is disabled.
+This will return a reference to a list of L<getline_hr ($io)|/getline_hr>
+results. In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled.
-=head2 print_hr
-X<print_hr>
+=head2 parse
+X<parse>
- $csv->print_hr ($io, $ref);
+ $status = $csv->parse ($line);
-Provides an easy way to print a C<$ref> as fetched with L<getline_hr>
-provided the column names are set with L<column_names>.
+This method decomposes a C<CSV> string into fields, returning success or
+failure. Failure can result from a lack of argument or the given C<CSV>
+string is improperly formatted. Upon success, L</fields> can be called to
+retrieve the decomposed fields. Upon failure calling L</fields> will return
+undefined data and L</error_input> can be called to retrieve the invalid
+argument.
-It is just a wrapper method with basic parameter checks over
+You may use the L</types> method for setting column types. See L</types>'
+description below.
- $csv->print ($io, [ map { $ref->{$_} } $csv->column_names ]);
+=head2 fragment
+X<fragment>
+
+This function tries to implement RFC7111 (URI Fragment Identifiers for the
+text/csv Media Type) - http://tools.ietf.org/html/rfc7111
+
+ my $AoA = $csv->fragment ($io, $spec);
+
+In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
+to indicate a range. All indices are C<1>-based: the first row or column
+has index C<1>. Selections can be combined with the semi-colon (C<;>).
+
+When using this method in combination with L</column_names>, the returned
+reference will point to a list of hashes instead of a list of lists. A
+disjointed cell-based combined selection might return rows with different
+number of columns making the use of hashes unpredictable.
+
+ $csv->column_names ("Name", "Age");
+ my $AoH = $csv->fragment ($io, "col=3;8");
+
+If the L</after_parse> callback is active, it is also called on every line
+parsed and skipped before the fragment.
+
+=over 2
+
+=item row
+
+ row=4
+ row=5-7
+ row=6-*
+ row=1-2;4;6-*
+
+=item col
+
+ col=2
+ col=1-3
+ col=4-*
+ col=1-2;4;7-*
+
+=item cell
+
+In cell-based selection, the comma (C<,>) is used to pair row and column
+
+ cell=4,1
+
+The range operator (C<->) using C<cell>s can be used to define top-left and
+bottom-right C<cell> location
+
+ cell=3,1-4,6
+
+The C<*> is only allowed in the second part of a pair
+
+ cell=3,2-*,2 # row 3 till end, only column 2
+ cell=3,2-3,* # column 2 till end, only row 3
+ cell=3,2-*,* # strip row 1 and 2, and column 1
+
+Cells and cell ranges may be combined with C<;>, possibly resulting in rows
+with different number of columns
+
+ cell=1,1-2,2;3,3-4,4;1,4;4,1
+
+Disjointed selections will only return selected cells. The cells that are
+not specified will not be included in the returned set, not even as
+C<undef>. As an example given a C<CSV> like
+
+ 11,12,13,...19
+ 21,22,...28,29
+ : :
+ 91,...97,98,99
+
+with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
+
+ 11,12,14
+ 21,22
+ 33,34
+ 41,43,44
+
+Overlapping cell-specs will return those cells only once, So
+C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
+
+ 11,12,13
+ 21,22,23,24
+ 31,32,33,34
+ 42,43,44
+
+=back
+
+L<RFC7111|http://tools.ietf.org/html/rfc7111> does B<not> allow different
+types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
+Passing an invalid fragment specification will croak and set error 2013.
=head2 column_names
X<column_names>
-Set the keys that will be used in the L</getline_hr> calls. If no keys
-(column names) are passed, it'll return the current setting.
+Set the "keys" that will be used in the L</getline_hr> calls. If no keys
+(column names) are passed, it will return the current setting as a list.
-L</column_names> accepts a list of scalars (the column names) or a single
-array_ref, so you can pass L</getline>
+L</column_names> accepts a list of scalars (the column names) or a single
+array_ref, so you can pass the return value from L</getline> too:
$csv->column_names ($csv->getline ($io));
L</column_names> does B<no> checking on duplicates at all, which might lead
-to unwanted results. Undefined entries will be replaced with the string
+to unexpected results. Undefined entries will be replaced with the string
C<"\cAUNDEF\cA">, so
$csv->column_names (undef, "", "name", "name");
$hr = $csv->getline_hr ($io);
-Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
-the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
+Will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
+the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
field.
L</column_names> croaks on invalid arguments.
@@ -1400,23 +2051,23 @@ L</column_names> croaks on invalid arguments.
=head2 bind_columns
X<bind_columns>
-Takes a list of references to scalars to be printed with L</print> or to
-store the fields fetched by L</getline> in. When you don't pass enough
-references to store the fetched fields in, L</getline> will fail. If you
-pass more than there are fields to return, the remaining references are
-left untouched.
+Takes a list of scalar references to be used for output with L</print> or
+to store in the fields fetched by L</getline>. When you do not pass enough
+references to store the fetched fields in, L</getline> will fail with error
+C<3006>. If you pass more than there are fields to return, the content of
+the remaining references is left untouched.
$csv->bind_columns (\$code, \$name, \$price, \$description);
while ($csv->getline ($io)) {
print "The price of a $name is \x{20ac} $price\n";
}
-To reset or clear all column binding, call L</bind_columns> with a single
+To reset or clear all column binding, call L</bind_columns> with the single
argument C<undef>. This will also clear column names.
$csv->bind_columns (undef);
-If no arguments are passed at all, L</bind_columns> will return the list
+If no arguments are passed at all, L</bind_columns> will return the list of
current bindings or C<undef> if no binds are active.
=head2 eof
@@ -1424,9 +2075,9 @@ X<eof>
$eof = $csv->eof ();
-If L</parse> or L</getline> was used with an IO stream, this method will
-return true (1) if the last call hit end of file, otherwise it will return
-false (''). This is useful to see the difference between a failure and end
+If L</parse> or L</getline> was used with an IO stream, this method will
+return true (1) if the last call hit end of file, otherwise it will return
+false (''). This is useful to see the difference between a failure and end
of file.
=head2 types
@@ -1434,17 +2085,17 @@ X<types>
$csv->types (\@tref);
-This method is used to force that columns are of a given type. For example,
-if you have an integer column, two double columns and a string column, then
-you might do a
+This method is used to force that (all) columns are of a given type. For
+example, if you have an integer column, two columns with doubles and a
+string column, then you might do a
$csv->types ([Text::CSV_XS::IV (),
Text::CSV_XS::NV (),
Text::CSV_XS::NV (),
Text::CSV_XS::PV ()]);
-Column types are used only for decoding columns, in other words by the
-L</parse> and L</getline> methods.
+Column types are used only for I<decoding> columns while parsing, in other
+words by the L</parse> and L</getline> methods.
You can unset column types by doing a
@@ -1478,9 +2129,8 @@ X<fields>
@columns = $csv->fields ();
-This object function returns the input to L</combine> or the resultant
-decomposed fields of a successful L</parse>, whichever was called more
-recently.
+This method returns the input to L</combine> or the resultant decomposed
+fields of a successful L</parse>, whichever was called more recently.
Note that the return value is undefined after using L</getline>, which does
not fill the data structures returned by L</parse>.
@@ -1490,13 +2140,13 @@ X<meta_info>
@flags = $csv->meta_info ();
-This object function returns the flags of the input to L</combine> or the
-flags of the resultant decomposed fields of L</parse>, whichever was called
-more recently.
+This method returns the "flags" of the input to L</combine> or the flags of
+the resultant decomposed fields of L</parse>, whichever was called more
+recently.
-For each field, a meta_info field will hold flags that tell something about
-the field returned by the L</fields> method or passed to the L</combine>
-method. The flags are bit-wise-or'd like:
+For each field, a meta_info field will hold flags that inform something
+about the field returned by the L</fields> method or passed to the
+L</combine> method. The flags are bit-wise-C<or>'d like:
=over 2
@@ -1517,20 +2167,20 @@ X<is_quoted>
my $quoted = $csv->is_quoted ($column_idx);
-Where C<$column_idx> is the (zero-based) index of the column in the last
+Where C<$column_idx> is the (zero-based) index of the column in the last
result of L</parse>.
-This returns a true value if the data in the indicated column was enclosed
-in C<quote_char> quotes. This might be important for data where
-C<,20070108,> is to be treated as a numeric value, and where C<,"20070108",>
-is explicitly marked as character string data.
+This returns a true value if the data in the indicated column was enclosed
+in L<C<quote_char>|/quote_char> quotes. This might be important for fields
+where content C<,20070108,> is to be treated as a numeric value, and where
+C<,"20070108",> is explicitly marked as character string data.
=head2 is_binary
X<is_binary>
my $binary = $csv->is_binary ($column_idx);
-Where C<$column_idx> is the (zero-based) index of the column in the last
+Where C<$column_idx> is the (zero-based) index of the column in the last
result of L</parse>.
This returns a true value if the data in the indicated column contained any
@@ -1541,36 +2191,37 @@ X<is_missing>
my $missing = $csv->is_missing ($column_idx);
-Where C<$column_idx> is the (zero-based) index of the column in the last
+Where C<$column_idx> is the (zero-based) index of the column in the last
result of L</getline_hr>.
+ $csv->keep_meta_info (1);
while (my $hr = $csv->getline_hr ($fh)) {
$csv->is_missing (0) and next; # This was an empty line
}
-When using L</getline_hr> for parsing, it is impossible to tell if the
-fields are C<undef> because they where not filled in the CSV stream or
-because they were not read at all, as B<all> the fields defined by
-L</column_names> are set in the hash-ref. If you still need to know if all
-fields in each row are provided, you should enable C<keep_meta_info> so you
-can check the flags.
+When using L</getline_hr>, it is impossible to tell if the parsed fields
+are C<undef> because they where not filled in the C<CSV> stream or because
+they were not read at all, as B<all> the fields defined by L</column_names>
+are set in the hash-ref. If you still need to know if all fields in each
+row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so
+you can check the flags.
=head2 status
X<status>
$status = $csv->status ();
-This object function returns success (or failure) of L</combine> or
-L</parse>, whichever was called more recently.
+This method returns success (or failure) of the last invoked L</combine> or
+L</parse> call.
=head2 error_input
X<error_input>
$bad_argument = $csv->error_input ();
-This object function returns the erroneous argument (if it exists) of
-L</combine> or L</parse>, whichever was called more recently. If the last
-call was successful, C<error_input> will return C<undef>.
+This method returns the erroneous argument (if it exists) of L</combine> or
+L</parse>, whichever was called more recently. If the last invocation was
+successful, C<error_input> will return C<undef>.
=head2 error_diag
X<error_diag>
@@ -1581,32 +2232,32 @@ X<error_diag>
$error_str = "" . $csv->error_diag ();
($cde, $str, $pos, $recno) = $csv->error_diag ();
-If (and only if) an error occurred, this function returns the diagnostics
+If (and only if) an error occurred, this function returns the diagnostics
of that error.
-If called in void context, it will print the internal error code and the
+If called in void context, this will print the internal error code and the
associated error message to STDERR.
-If called in list context, it will return the error code and the error
-message in that order. If the last error was from parsing, the third value
-returned is a best guess at the location within the line that was being
-parsed. Its value is 1-based. The forth value represents the record count
-parsed by this csv object See F<examples/csv-check> for how this can be
+If called in list context, this will return the error code and the error
+message in that order. If the last error was from parsing, the third value
+returned is a best guess at the location within the line that was being
+parsed. Its value is 1-based. The fourth value represents the record count
+parsed by this csv instance. See F<examples/csv-check> for how this can be
used.
-If called in scalar context, it will return the diagnostics in a single
-scalar, a-la $!. It will contain the error code in numeric context, and the
-diagnostics message in string context.
+If called in scalar context, it will return the diagnostics in a single
+scalar, a-la C<$!>. It will contain the error code in numeric context, and
+the diagnostics message in string context.
-When called as a class method or a direct function call, the error
-diagnostics is that of the last L</new> call.
+When called as a class method or a direct function call, the diagnostics
+are that of the last L</new> call.
=head2 record_number
X<record_number>
$recno = $csv->record_number ();
-Returns the records parsed by this csv instance. This value should be more
+Returns the records parsed by this csv instance. This value should be more
accurate than C<$.> when embedded newlines come in play. Records written by
this instance are not counted.
@@ -1617,6 +2268,319 @@ X<SetDiag>
Use to reset the diagnostics if you are dealing with errors.
+=head1 FUNCTIONS
+
+=head2 csv
+X<csv>
+
+This function is not exported by default and should be explicitly requested:
+
+ use Text::CSV_XS qw( csv );
+
+This is the second draft. This function will stay, but the arguments might
+change based on user feedback.
+
+This is an high-level function that aims at simple (user) interfaces. This
+can be used to read/parse a C<CSV> file or stream (the default behavior) or
+to produce a file or write to a stream (define the C<out> attribute). It
+returns an array- or hash-reference on parsing (or C<undef> on fail) or the
+numeric value of L</error_diag> on writing. When this function fails you
+can get to the error using the class call to L</error_diag>
+
+ my $aoa = csv (in => "test.csv") or
+ die Text::CSV_XS->error_diag;
+
+This function takes the arguments as key-value pairs. This can be passed as
+a list or as an anonymous hash:
+
+ my $aoa = csv ( in => "test.csv", sep_char => ";");
+ my $aoh = csv ({ in => $fh, headers => "auto" });
+
+The arguments passed consist of two parts: the arguments to L</csv> itself
+and the optional attributes to the C<CSV> object used inside the function
+as enumerated and explained in L</new>.
+
+If not overridden, the default option used for CSV is
+
+ auto_diag => 1
+
+The option that is always set and cannot be altered is
+
+ binary => 1
+
+=head3 in
+X<in>
+
+Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
+which will be opened for reading and closed when finished, a file handle
+(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
+itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
+
+When used with L</out>, C<in> should be a reference to a CSV structure (AoA
+or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
+The code-ref will be invoked with no arguments and .
+
+ my $aoa = csv (in => "file.csv");
+
+ open my $fh, "<", "file.csv";
+ my $aoa = csv (in => $fh);
+
+ my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
+ my $err = csv (in => $csv, out => "file.csv");
+
+=head3 out
+X<out>
+
+In output mode, the default CSV options when producing CSV are
+
+ eol => "\r\n"
+
+The L</fragment> attribute is ignored in output mode.
+
+C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
+writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
+reference to a glob (e.g. C<\*STDOUT>), or the glob itself (e.g. C<*STDOUT>).
+
+ csv (in => sub { $sth->fetch }, out => "dump.csv");
+ csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
+ headers => $sth->{NAME_lc});
+
+When a code-ref is used, the output is generated per invocation, so no
+buffering is involved. This implies that there is no size restriction on
+the number of records. The function ends when the coderef returns a false
+value.
+
+=head3 encoding
+X<encoding>
+
+If passed, it should be an encoding accepted by the C<:encoding()> option
+to C<open>. There is no default value. This attribute does not work in perl
+5.6.x.
+
+=head3 headers
+X<headers>
+
+If this attribute is not given, the default behavior is to produce an array
+of arrays.
+
+If C<headers> is supplied, it should be either an anonymous list of column
+names or a flag: C<auto> or C<skip>. When C<skip> is used, the header will
+not be included in the output.
+
+ my $aoa = csv (in => $fh, headers => "skip");
+
+If C<auto> is used, the first line of the C<CSV> source will be read as the
+list of field headers and used to produce an array of hashes.
+
+ my $aoh = csv (in => $fh, headers => "auto");
+
+If C<headers> is an anonymous list, the entries in the list will be used
+instead
+
+ my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
+ csv (in => $aoa, out => $fh, headers => [qw( code description price }]);
+
+=head3 key
+X<key>
+
+If passed, will default L<C<headers>|/headers> to C<"auto"> and return a
+hashref instead of an array of hashes.
+
+ my $ref = csv (in => "test.csv", key => "code");
+
+with test.csv like
+
+ code,product,price,color
+ 1,pc,850,gray
+ 2,keyboard,12,white
+ 3,mouse,5,black
+
+will return
+
+ { 1 => {
+ code => 1,
+ color => 'gray',
+ price => 850,
+ product => 'pc'
+ },
+ 2 => {
+ code => 2,
+ color => 'white',
+ price => 12,
+ product => 'keyboard'
+ },
+ 3 => {
+ code => 3,
+ color => 'black',
+ price => 5,
+ product => 'mouse'
+ }
+ }
+
+=head3 fragment
+X<fragment>
+
+Only output the fragment as defined in the L</fragment> method. This option
+is ignored when I<generating> C<CSV>. See L</out>.
+
+Combining all of them could give something like
+
+ use Text::CSV_XS qw( csv );
+ my $aoh = csv (
+ in => "test.txt",
+ encoding => "utf-8",
+ headers => "auto",
+ sep_char => "|",
+ fragment => "row=3;6-9;15-*",
+ );
+ say $aoh->[15]{Foo};
+
+=head2 Callbacks
+
+Callbacks enable actions triggered from the I<inside> of Text::CSV_XS.
+
+While most of what this enables can easily be done in an unrolled loop as
+described in the L</SYNOPSIS> callbacks can be used to meet special demands
+or enhance the L</csv> function.
+
+=over 2
+
+=item error
+X<error>
+
+ $csv->callbacks (error => sub { $csv->SetDiag (0) });
+
+the C<error> callback is invoked when an error occurs, but I<only> when
+L</auto_diag> is set to a true value. A callback is invoked with the values
+returned by L</error_diag>:
+
+ my ($c, $s);
+
+ sub ignore3006
+ {
+ my ($err, $msg, $pos, $recno) = @_;
+ if ($err == 3006) {
+ # ignore this error
+ ($c, $s) = (undef, undef);
+ SetDiag (0);
+ }
+ # Any other error
+ return;
+ } # ignore3006
+
+ $csv->callbacks (error => \&ignore3006);
+ $csv->bind_columns (\$c, \$s);
+ while ($csv->getline ($fh)) {
+ # Error 3006 will not stop the loop
+ }
+
+=item after_parse
+X<after_parse>
+
+ $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
+ while (my $row = $csv->getline ($fh)) {
+ $row->[-1] eq "NEW";
+ }
+
+This callback is invoked after parsing with L</getline> only if no error
+occurred. The callback is invoked with two arguments: the current C<CSV>
+parser object and an array reference to the fields parsed.
+
+The return code of the callback is ignored.
+
+ sub add_from_db
+ {
+ my ($csv, $row) = @_;
+ $sth->execute ($row->[4]);
+ push @$row, $sth->fetchrow_array;
+ } # add_from_db
+
+ my $aoa = csv (in => "file.csv", callbacks => {
+ after_parse => \&add_from_db });
+
+=item before_print
+X<before_print>
+
+ my $idx = 1;
+ $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
+ $csv->print (*STDOUT, [ 0, $_ ]) for @members;
+
+This callback is invoked before printing with L</print> only if no error
+occurred. The callback is invoked with two arguments: the current C<CSV>
+parser object and an array reference to the fields passed.
+
+The return code of the callback is ignored.
+
+ sub max_4_fields
+ {
+ my ($csv, $row) = @_;
+ @$row > 4 and splice @$row, 4;
+ } # max_4_fields
+
+ csv (in => csv (in => "file.csv"), out => *STDOUT,
+ callbacks => { before print => \&max_4_fields });
+
+This callback is not active for L</combine>.
+
+=back
+
+=head3 Callbacks for csv ()
+
+The L</csv> allows for some callbacks that do not integrate in XS internals
+but only feature the L</csv> function.
+
+ csv (in => "file.csv",
+ callbacks => {
+ after_parse => sub { say "AFTER PARSE"; }, # first
+ after_in => sub { say "AFTER IN"; }, # second
+ on_in => sub { say "ON IN"; }, # third
+ },
+ );
+
+ csv (in => $aoh,
+ out => "file.csv",
+ callbacks => {
+ on_in => sub { say "ON IN"; }, # first
+ before_out => sub { say "BEFORE OUT"; }, # second
+ before_print => sub { say "BEFORE PRINT"; }, # third
+ },
+ );
+
+=over 2
+
+=item after_in
+X<after_in>
+
+This callback is invoked for each record after all records have been parsed
+but before returning the reference to the caller. The hook is invoked with
+two arguments: the current C<CSV> parser object and a reference to the
+record. The reference can be a reference to a HASH or a reference to an
+ARRAY as determined by the arguments.
+
+This callback can also be passed as an attribute without the C<callbacks>
+wrapper.
+
+=item before_out
+X<before_out>
+
+This callback is invoked for each record before the record is printed. The
+hook is invoked with two arguments: the current C<CSV> parser object and a
+reference to the record. The reference can be a reference to a HASH or a
+reference to an ARRAY as determined by the arguments.
+
+This callback can also be passed as an attribute without the C<callbacks>
+wrapper.
+
+=item on_in
+X<on_in>
+
+This callback acts exactly as the L</after_in> or the L</before_out> hooks.
+
+This callback can also be passed as an attribute without the C<callbacks>
+wrapper.
+
+=back
+
=head1 INTERNALS
=over 4
@@ -1627,10 +2591,10 @@ Use to reset the diagnostics if you are dealing with errors.
=back
-The arguments to these two internal functions are deliberately not
-described or documented in order to enable the module author(s) to change
-it when they feel the need for it. Using them is highly discouraged as the
-API may change in future releases.
+The arguments to these internal functions are deliberately not described or
+documented in order to enable the module authors make changes it when they
+feel the need for it. Using them is highly discouraged as the API may
+change in future releases.
=head1 EXAMPLES
@@ -1651,6 +2615,11 @@ API may change in future releases.
my @column = map { $_->[3] } @{$csv->getline_all ($fh)};
close $fh or die "file.csv: $!";
+with L</csv>, you could do
+
+ my @column = map { $_->[0] }
+ @{csv (in => "file.csv", fragment => "col=4")};
+
=head2 Parsing CSV strings:
my $csv = Text::CSV_XS->new ({ keep_meta_info => 1, binary => 1 });
@@ -1674,16 +2643,12 @@ API may change in future releases.
=head3 The fast way: using L</print>
-An example for creating CSV files using the L</print> method, like in
-dumping the content of a database ($dbh) table ($tbl) to CSV:
+An example for creating C<CSV> files using the L</print> method:
my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
- open my $fh, ">", "$tbl.csv" or die "$tbl.csv: $!";
- my $sth = $dbh->prepare ("select * from $tbl");
- $sth->execute;
- $csv->print ($fh, $sth->{NAME_lc});
- while (my $row = $sth->fetch) {
- $csv->print ($fh, $row) or $csv->error_diag;
+ open my $fh, ">", "foo.csv" or die "foo.csv: $!";
+ for (1 .. 10) {
+ $csv->print ($fh, [ $_, "$_" ]) or $csv->error_diag;
}
close $fh or die "$tbl.csv: $!";
@@ -1707,10 +2672,41 @@ or using the slower L</combine> and L</string> methods:
}
close $csv_fh or die "hello.csv: $!";
+=head2 Rewriting CSV
+
+Rewrite C<CSV> files with C<;> as separator character to well-formed C<CSV>:
+
+ use Text::CSV_XS qw( csv );
+ csv (in => csv (in => "bad.csv", sep_char => ";"), out => *STDOUT);
+
+=head2 Dumping database tables to CSV
+
+Dumping a database table can be simple as this (TIMTOWTDI):
+
+ my $dbh = DBI->connect (...);
+ my $sql = "select * from foo";
+
+ # using your own loop
+ open my $fh, ">", "foo.csv" or die "foo.csv: $!\n";
+ my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\r\n" });
+ my $sth = $dbh->prepare ($sql); $sth->execute;
+ $csv->print ($fh, $sth->{NAME_lc});
+ while (my $row = $sth->fetch) {
+ $csv->print ($fh, $row);
+ }
+
+ # using the csv function, all in memory
+ csv (out => "foo.csv", in => $dbh->selectall_arrayref ($sql));
+
+ # using the csv function, streaming with callbacks
+ my $sth = $dbh->prepare ($sql); $sth->execute;
+ csv (out => "foo.csv", in => sub { $sth->fetch });
+ csv (out => "foo.csv", in => sub { $sth->fetchrow_hashref });
+
=head2 The examples folder
-For more extended examples, see the F<examples/> (1) sub-directory in the
-original distribution or the git repository (2).
+For more extended examples, see the F<examples/> C<1>) sub-directory in the
+original distribution or the git repository C<2>).
1. http://repo.or.cz/w/Text-CSV_XS.git?a=tree;f=examples
2. http://repo.or.cz/w/Text-CSV_XS.git
@@ -1722,15 +2718,16 @@ The following files can be found there:
=item parser-xs.pl
X<parser-xs.pl>
-This can be used as a boilerplate to `fix' bad CSV and parse beyond errors.
+This can be used as a boilerplate to parse invalid C<CSV> and parse beyond
+(expected) errors alternative to using the L</error> callback.
$ perl examples/parser-xs.pl bad.csv >good.csv
=item csv-check
X<csv-check>
-This is a command-line tool that uses parser-xs.pl techniques to check the
-CSV file and report on its content.
+This is a command-line tool that uses parser-xs.pl techniques to check the
+C<CSV> file and report on its content.
$ csv-check files/utf8.csv
Checked with examples/csv-check 1.5 using Text::CSV_XS 0.81
@@ -1740,14 +2737,14 @@ CSV file and report on its content.
=item csv2xls
X<csv2xls>
-A script to convert CSV to Microsoft Excel. This requires L<Date::Calc> and
-L<Spreadsheet::WriteExcel>. The converter accepts various options and can
-produce UTF-8 Excel files.
+A script to convert C<CSV> to Microsoft Excel. This requires L<Date::Calc>
+and L<Spreadsheet::WriteExcel>. The converter accepts various options and
+can produce UTF-8 Excel files.
=item csvdiff
X<csvdiff>
-A script that provides colorized diff on sorted CSV files, assuming first
+A script that provides colorized diff on sorted CSV files, assuming first
line is header and first field is the key. Output options include colorized
ANSI escape codes or HTML.
@@ -1757,20 +2754,20 @@ ANSI escape codes or HTML.
=head1 CAVEATS
-C<Text::CSV_XS> is not designed to detect the characters used to quote and
-separate fields. The parsing is done using predefined settings. In the
-examples sub-directory, you can find scripts that demonstrate how you can
-try to detect these characters yourself.
+Text::CSV_XS is I<not> designed to detect the characters used to quote and
+separate fields. The parsing is done using predefined (default) settings.
+In the examples sub-directory, you can find scripts that demonstrate how
+you could try to detect these characters yourself.
=head2 Microsoft Excel
The import/export from Microsoft Excel is a I<risky task>, according to the
-documentation in C<Text::CSV::Separator>. Microsoft uses the system's
-default list separator defined in the regional settings, which happens to
-be a semicolon for Dutch, German and Spanish (and probably some others as
-well). For the English locale, the default is a comma. In Windows however,
-the user is free to choose a predefined locale, and then change every
-individual setting in it, so checking the locale is no solution.
+documentation in C<Text::CSV::Separator>. Microsoft uses the system's list
+separator defined in the regional settings, which happens to be a semicolon
+for Dutch, German and Spanish (and probably some others as well). For the
+English locale, the default is a comma. In Windows however, the user is
+free to choose a predefined locale, and then change I<every> individual
+setting in it, so checking the locale is no solution.
=head1 TODO
@@ -1778,27 +2775,32 @@ individual setting in it, so checking the locale is no solution.
=item More Errors & Warnings
-New extensions ought to be clear and concise in reporting what error
-occurred where and why, and possibly also tell a remedy to the problem.
-error_diag is a (very) good start, but there is more work to be done here.
+New extensions ought to be clear and concise in reporting what error has
+occurred where and why, and maybe also offer a remedy to the problem.
+
+L</error_diag> is a (very) good start, but there is more work to be done in
+this area.
-Basic calls should croak or warn on illegal parameters. Errors should be
+Basic calls should croak or warn on illegal parameters. Errors should be
documented.
=item setting meta info
Future extensions might include extending the L</meta_info>, L</is_quoted>,
-and L</is_binary> to accept setting these flags for fields, so you can
+and L</is_binary> to accept setting these flags for fields, so you can
specify which fields are quoted in the L</combine>/L</string> combination.
$csv->meta_info (0, 1, 1, 3, 0, 0);
$csv->is_quoted (3, 1);
+L<Metadata Vocabulary for Tabular Data|http://w3c.github.io/csvw/metadata/>
+(a W3C editor's draft) could be an example for supporting more metadata.
+
=item Parse the whole file at once
-Implement new methods that enable parsing of a complete file at once,
-returning a list of hashes. Possible extension to this could be to enable a
-column selection on the call:
+Implement new methods or functions that enable parsing of a complete file
+at once, returning a list of hashes. Possible extension to this could be to
+enable a column selection on the call:
my @AoH = $csv->parse_file ($filename, { cols => [ 1, 4..8, 12 ]});
@@ -1812,8 +2814,19 @@ Returning something like
},
]
-Note that L</getline_all> already returns all rows for an open stream, but
-this will not return flags.
+Note that the L</csv> function already supports most of this, but does not
+return flags. L</getline_all> returns all rows for an open stream, but this
+will not return flags either. L</fragment> can reduce the required rows
+I<or> columns, but cannot combine them.
+
+=item Cookbook
+
+Write a document that has recipes for most known non-standard (and maybe
+some standard) C<CSV> formats, including formats that use C<TAB>, C<;>,
+C<|>, or other non-comma separators.
+
+Examples could be taken from W3C's L<CSV on the Web: Use Cases and
+Requirements|http://w3c.github.io/csvw/use-cases-and-requirements/index.html>
=back
@@ -1824,9 +2837,9 @@ this will not return flags.
=item combined methods
Requests for adding means (methods) that combine L</combine> and L</string>
-in a single call will B<not> be honored. Likewise for L</parse> and
-L</fields>. Given the trouble with embedded newlines, using L</getline> and
-L</print> instead is the preferred way to go.
+in a single call will B<not> be honored (use L</print> instead). Likewise
+for L</parse> and L</fields> (use L</getline> instead), given the problems
+with embedded newlines.
=back
@@ -1836,25 +2849,23 @@ No guarantees, but this is what I had in mind some time ago:
=over 2
-=item next
+=item *
- - This might very well be 1.00
- - DIAGNOSTICS setction in pod to *describe* the errors (see below)
- - croak / carp
+DIAGNOSTICS section in pod to *describe* the errors (see below)
-=item next + 1
+=item *
- - csv2csv - a script to regenerate a CSV file to follow standards
+Multi-byte quotation support
=back
=head1 EBCDIC
-The hard-coding of characters and character ranges makes this module
-unusable on EBCDIC systems.
+The current hard-coding of characters and character ranges makes this code
+unusable on C<EBCDIC> systems. Recent work in perl-5.20 might change that.
-Opening EBCDIC encoded files on ASCII+ systems is likely to succeed
-using Encode's cp37, cp1047, or posix-bc:
+Opening C<EBCDIC> encoded files on C<ASCII>+ systems is likely to succeed
+using Encode's C<cp37>, C<cp1047>, or C<posix-bc>:
open my $fh, "<:encoding(cp1047)", "ebcdic_file.csv" or die "...";
@@ -1862,19 +2873,21 @@ using Encode's cp37, cp1047, or posix-bc:
Still under construction ...
-If an error occurred, C<$csv->error_diag> can be used to get more
-information on the cause of the failure. Note that for speed reasons, the
-internal value is never cleared on success, so using the value returned by
-L</error_diag> in normal cases - when no error occurred - may cause
-unexpected results.
+If an error occurs, C<< $csv->error_diag >> can be used to get information
+on the cause of the failure. Note that for speed reasons the internal value
+is never cleared on success, so using the value returned by L</error_diag>
+in normal cases - when no error occurred - may cause unexpected results.
If the constructor failed, the cause can be found using L</error_diag> as a
-class method, like C<Text::CSV_XS->error_diag>.
+class method, like C<< Text::CSV_XS->error_diag >>.
-C<$csv->error_diag> is automatically called upon error when the contractor
-was called with C<auto_diag> set to 1 or 2, or when C<autodie> is in effect.
-When set to 1, this will cause a C<warn> with the error message, when set
-to 2, it will C<die>. C<2012 - EOF> is excluded from C<auto_diag> reports.
+The C<< $csv->error_diag >> method is automatically invoked upon error when
+the contractor was called with L<C<auto_diag>|/auto_diag> set to C<1> or
+C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
+C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
+EOF> is excluded from L<C<auto_diag>|/auto_diag> reports.
+
+Errors can be (individually) caught using the L</error> callback.
The errors as described below are available. I have tried to make the error
itself explanatory enough, but more descriptions will be added. For most of
@@ -1927,38 +2940,48 @@ And below should be the complete list of error codes that can be returned:
1001 "INI - sep_char is equal to quote_char or escape_char"
X<1001>
-The separation character cannot be equal to either the quotation character
-or the escape character, as that will invalidate all parsing rules.
+The L<separation character|/sep_char> cannot be equal to L<the quotation
+character|/quote_char> or to L<the escape character|/escape_char>, as this
+would invalidate all parsing rules.
=item *
1002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
X<1002>
-Using C<allow_whitespace> when either C<escape_char> or C<quote_char> is
-equal to SPACE or TAB is too ambiguous to allow.
+Using the L<C<allow_whitespace>|/allow_whitespace> attribute when either
+L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char> is equal to
+C<SPACE> or C<TAB> is too ambiguous to allow.
=item *
1003 "INI - \r or \n in main attr not allowed"
X<1003>
-Using default C<eol> characters in either C<sep_char>, C<quote_char>, or
-C<escape_char> is not allowed.
+Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>,
+L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> is not
+allowed.
+
+=item *
+1004 "INI - callbacks should be undef or a hashref"
+X<1004>
+
+The L<C<callbacks>|/Callbacks> attribute only allows one to be C<undef> or
+a hash reference.
=item *
2010 "ECR - QUO char inside quotes followed by CR not part of EOL"
X<2010>
-When C<eol> has been set to something specific, other than the default,
-like C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
-C<quote_char>, where the characters following the C<"\r"> do not make up
-the C<eol> sequence, this is an error.
+When L<C<eol>|/eol> has been set to anything but the default, like
+C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
+L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do
+not make up the L<C<eol>|/eol> sequence, this is an error.
=item *
2011 "ECR - Characters after end of quoted field"
X<2011>
-Sequences like C<1,foo,"bar"baz,2> are not allowed. C<"bar"> is a quoted
-field, and after the closing quote, there should be either a new-line
+Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
+field and after the closing double-quote, there should be either a new-line
sequence or a separation character.
=item *
@@ -1966,28 +2989,34 @@ sequence or a separation character.
X<2012>
Self-explaining. End-of-file while inside parsing a stream. Can happen only
-when reading from streams with L</getline>, as using L</parse> is done on
-strings that are not required to have a trailing C<eol>.
+when reading from streams with L</getline>, as using L</parse> is done on
+strings that are not required to have a trailing L<C<eol>|/eol>.
+
+=item *
+2013 "INI - Specification error for fragments RFC7111"
+X<2013>
+
+Invalid specification for URI L</fragment> specification.
=item *
2021 "EIQ - NL char inside quotes, binary off"
X<2021>
-Sequences like C<1,"foo\nbar",2> are allowed only when the binary option
+Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
has been selected with the constructor.
=item *
2022 "EIQ - CR char inside quotes, binary off"
X<2022>
-Sequences like C<1,"foo\rbar",2> are allowed only when the binary option
+Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
has been selected with the constructor.
=item *
2023 "EIQ - QUO character not allowed"
X<2023>
-Sequences like C<"foo "bar" baz",quux> and C<2023,",2008-04-05,"Foo, Bar",\n>
+Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
will cause this error.
=item *
@@ -2001,24 +3030,24 @@ The escape character is not allowed as last character in an input stream.
X<2025>
An escape character should escape only characters that need escaping.
-Allowing the escape for other characters is possible with the
-C<allow_loose_escape> attribute.
+
+Allowing the escape for other characters is possible with the attribute
+L</allow_loose_escape>.
=item *
2026 "EIQ - Binary character inside quoted field, binary off"
X<2026>
-Binary characters are not allowed by default. Exceptions are fields that
-contain valid UTF-8, that will automatically be upgraded is the content is
-valid UTF-8. Pass the C<binary> attribute with a true value to accept
-binary characters.
+Binary characters are not allowed by default. Exceptions are fields that
+contain valid UTF-8, that will automatically be upgraded if the content is
+valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data.
=item *
2027 "EIQ - Quoted field not terminated"
X<2027>
-When parsing a field that started with a quotation character, the field is
-expected to be closed with a quotation character. When the parsed line is
+When parsing a field that started with a quotation character, the field is
+expected to be closed with a quotation character. When the parsed line is
exhausted before the quote is found, that field is not terminated.
=item *
@@ -2097,34 +3126,34 @@ X<3010>
=head1 SEE ALSO
-L<perl>, L<IO::File>, L<IO::Handle>, L<IO::Wrap>, L<Text::CSV>,
-L<Text::CSV_PP>, L<Text::CSV::Encoded>, L<Text::CSV::Separator>, and
-L<Spreadsheet::Read>.
+L<IO::File>, L<IO::Handle>, L<IO::Wrap>, L<Text::CSV>, L<Text::CSV_PP>,
+L<Text::CSV::Encoded>, L<Text::CSV::Separator>, L<Spreadsheet::CSV> and
+L<Spreadsheet::Read>, and of course L<perl>.
-=head1 AUTHORS and MAINTAINERS
+=head1 AUTHOR
Alan Citterman F<E<lt>alan@mfgrtl.comE<gt>> wrote the original Perl module.
-Please don't send mail concerning Text::CSV_XS to Alan, as he's not
-involved in the C part that is now the main part of the module.
+Please don't send mail concerning Text::CSV_XS to Alan, who is not involved
+in the C/XS part that is now the main part of the module.
-Jochen Wiedmann F<E<lt>joe@ispsoft.deE<gt>> rewrote the encoding and
-decoding in C by implementing a simple finite-state machine and added the
-variable quote, escape and separator characters, the binary mode and the
-print and getline methods. See F<ChangeLog> releases 0.10 through 0.23.
+Jochen Wiedmann F<E<lt>joe@ispsoft.deE<gt>> rewrote the en- and decoding in
+C by implementing a simple finite-state machine. He added variable quote,
+escape and separator characters, the binary mode and the print and getline
+methods. See F<ChangeLog> releases 0.10 through 0.23.
-H.Merijn Brand F<E<lt>h.m.brand@xs4all.nlE<gt>> cleaned up the code, added
-the field flags methods, wrote the major part of the test suite, completed
-the documentation, fixed some RT bugs and added all the allow flags. See
-ChangeLog releases 0.25 and on.
+H.Merijn Brand F<E<lt>h.m.brand@xs4all.nlE<gt>> cleaned up the code, added
+the field flags methods, wrote the major part of the test suite, completed
+the documentation, fixed most RT bugs, added all the allow flags and the
+L</csv> function. See ChangeLog releases 0.25 and on.
=head1 COPYRIGHT AND LICENSE
- Copyright (C) 2007-2013 H.Merijn Brand. All rights reserved.
+ Copyright (C) 2007-2014 H.Merijn Brand. All rights reserved.
Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
Copyright (C) 1997 Alan Citterman. All rights reserved.
-This library is free software; you can redistribute it and/or modify it
-under the same terms as Perl itself.
+This library is free software; you can redistribute and/or modify it under
+the same terms as Perl itself.
=cut
@@ -1,4 +1,4 @@
-/* Copyright (c) 2007-2013 H.Merijn Brand. All rights reserved.
+/* Copyright (c) 2007-2014 H.Merijn Brand. All rights reserved.
* Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
* This program is free software; you can redistribute it and/or
* modify it under the same terms as Perl itself.
@@ -28,9 +28,51 @@
#define CSV_XS_TYPE_IV 1
#define CSV_XS_TYPE_NV 2
-/* Keep in sync with .pm! */
-#define CACHE_SIZE 40
+#define MAX_SEP_LEN 16
+#define MAX_EOL_LEN 16
+#define MAX_QUO_LEN 16
+
+#define CSV_FLAGS_QUO 0x0001
+#define CSV_FLAGS_BIN 0x0002
+#define CSV_FLAGS_EIF 0x0004
+#define CSV_FLAGS_MIS 0x0010
+
+#define HOOK_ERROR 0x0001
+#define HOOK_AFTER_PARSE 0x0002
+#define HOOK_BEFORE_PRINT 0x0004
+
+#define CH_TAB '\011'
+#define CH_NL '\012'
+#define CH_CR '\015'
+#define CH_SPACE '\040'
+#define CH_DEL '\177'
+#define CH_EOLX 1215
+#define CH_SEPX 8888
+#define CH_SEP *csv->sep
+#define CH_QUOTEX 8889
+#define CH_QUOTE *csv->quo
+
+#define useIO_EOF 0x10
+
+#define unless(expr) if (!(expr))
+
+#define _is_arrayref(f) ( f && \
+ (SvROK (f) || (SvRMAGICAL (f) && (mg_get (f), 1) && SvROK (f))) && \
+ SvOK (f) && SvTYPE (SvRV (f)) == SVt_PVAV )
+#define _is_hashref(f) ( f && \
+ (SvROK (f) || (SvRMAGICAL (f) && (mg_get (f), 1) && SvROK (f))) && \
+ SvOK (f) && SvTYPE (SvRV (f)) == SVt_PVHV )
+#define _is_coderef(f) ( f && \
+ (SvROK (f) || (SvRMAGICAL (f) && (mg_get (f), 1) && SvROK (f))) && \
+ SvOK (f) && SvTYPE (SvRV (f)) == SVt_PVCV )
+#define CSV_XS_SELF \
+ if (!self || !SvOK (self) || !SvROK (self) || \
+ SvTYPE (SvRV (self)) != SVt_PVHV) \
+ croak ("self is not a hash ref"); \
+ hv = (HV *)SvRV (self)
+
+/* Keep in sync with .pm! */
#define CACHE_ID_quote_char 0
#define CACHE_ID_escape_char 1
#define CACHE_ID_sep_char 2
@@ -42,10 +84,13 @@
#define CACHE_ID_allow_unquoted_escape 8
#define CACHE_ID_allow_whitespace 9
#define CACHE_ID_blank_is_undef 10
+#define CACHE_ID_sep 38
+#define CACHE_ID_sep_len 37
#define CACHE_ID_eol 11
-#define CACHE_ID_eol_len 19
-#define CACHE_ID_eol_is_cr 20
-#define CACHE_ID_has_types 21
+#define CACHE_ID_eol_len 12
+#define CACHE_ID_eol_is_cr 13
+#define CACHE_ID_quo 15
+#define CACHE_ID_quo_len 16
#define CACHE_ID_verbatim 22
#define CACHE_ID_empty_is_undef 23
#define CACHE_ID_auto_diag 24
@@ -57,38 +102,13 @@
#define CACHE_ID_diag_verbose 33
#define CACHE_ID_has_error_input 34
#define CACHE_ID_decode_utf8 35
-
-#define CSV_FLAGS_QUO 0x0001
-#define CSV_FLAGS_BIN 0x0002
-#define CSV_FLAGS_EIF 0x0004
-#define CSV_FLAGS_MIS 0x0010
-
-#define CH_TAB '\011'
-#define CH_NL '\012'
-#define CH_CR '\015'
-#define CH_SPACE '\040'
-#define CH_DEL '\177'
-#define CH_EOLX 1215
-
-#define useIO_EOF 0x10
-
-#define unless(expr) if (!(expr))
-
-#define _is_arrayref(f) ( f && \
- (SvROK (f) || (SvRMAGICAL (f) && (mg_get (f), 1) && SvROK (f))) && \
- SvOK (f) && SvTYPE (SvRV (f)) == SVt_PVAV )
-
-#define CSV_XS_SELF \
- if (!self || !SvOK (self) || !SvROK (self) || \
- SvTYPE (SvRV (self)) != SVt_PVHV) \
- croak ("self is not a hash ref"); \
- hv = (HV *)SvRV (self)
+#define CACHE_ID__has_hooks 36
#define byte unsigned char
typedef struct {
byte quote_char;
byte escape_char;
- byte sep_char;
+ byte sep_char; /* not used anymore */
byte binary;
byte keep_meta_info;
@@ -114,6 +134,7 @@ typedef struct {
byte diag_verbose;
byte has_error_input;
byte decode_utf8;
+ byte has_hooks;
long is_bound;
@@ -123,10 +144,12 @@ typedef struct {
HV * self;
SV * bound;
- byte * eol;
- STRLEN eol_len;
char * types;
- STRLEN types_len;
+
+ byte eol_len;
+ byte sep_len;
+ byte quo_len;
+ byte types_len;
char * bptr;
SV * tmp;
@@ -136,6 +159,9 @@ typedef struct {
int eol_pos;
STRLEN size;
STRLEN used;
+ byte eol[MAX_EOL_LEN];
+ byte sep[MAX_SEP_LEN];
+ byte quo[MAX_QUO_LEN];
char buffer[BUFFER_SIZE];
} csv_t;
@@ -157,11 +183,13 @@ xs_error_t xs_errors[] = {
{ 1001, "INI - sep_char is equal to quote_char or escape_char" },
{ 1002, "INI - allow_whitespace with escape_char or quote_char SP or TAB" },
{ 1003, "INI - \r or \n in main attr not allowed" },
+ { 1004, "INI - callbacks should be undef or a hashref" },
/* Parse errors */
{ 2010, "ECR - QUO char inside quotes followed by CR not part of EOL" },
{ 2011, "ECR - Characters after end of quoted field" },
{ 2012, "EOF - End of data in parsing input stream" },
+ { 2013, "ESP - Specification error for fragments RFC7111" },
/* EIQ - Error Inside Quotes */
{ 2021, "EIQ - NL char inside quotes, binary off" },
@@ -201,9 +229,52 @@ xs_error_t xs_errors[] = {
{ 0, "" },
};
-static char init_cache[CACHE_SIZE];
static int io_handle_loaded = 0;
static SV *m_getline, *m_print, *m_read;
+static int last_error = 0;
+
+#define __is_SEPX(c) (c == CH_SEP && (csv->sep_len == 0 || (\
+ csv->size - csv->used >= csv->sep_len - 1 &&\
+ !memcmp (csv->bptr + csv->used, csv->sep + 1, csv->sep_len - 1) &&\
+ (csv->used += csv->sep_len - 1) &&\
+ (c = CH_SEPX))))
+#if MAINT_DEBUG > 1
+static byte _is_SEPX (unsigned int c, csv_t *csv, int line)
+{
+ unsigned int b = __is_SEPX (c);
+ (void)fprintf (stderr, "# %4d - is_SEPX:\t%d (%d)\n", line, b, csv->sep_len);
+ if (csv->sep_len)
+ (void)fprintf (stderr,
+ "# len: %d, siz: %d, usd: %d, c: %02x, *sep: %02x\n",
+ csv->sep_len, csv->size, csv->used, c, CH_SEP);
+ return b;
+ } /* _is_SEPX */
+#define is_SEP(c) _is_SEPX (c, csv, __LINE__)
+#else
+#define is_SEP(c) __is_SEPX (c)
+#endif
+
+#define __is_QUOTEX(c) (CH_QUOTE && c == CH_QUOTE && (csv->quo_len == 0 || (\
+ csv->size - csv->used >= csv->quo_len - 1 &&\
+ !memcmp (csv->bptr + csv->used, csv->quo + 1, csv->quo_len - 1) &&\
+ (csv->used += csv->quo_len - 1) &&\
+ (c = CH_QUOTEX))))
+#if MAINT_DEBUG > 1
+static byte _is_QUOTEX (unsigned int c, csv_t *csv, int line)
+{
+ unsigned int b = __is_QUOTEX (c);
+ (void)fprintf (stderr, "# %4d - is_QUOTEX:\t%d (%d)\n", line, b, csv->quo_len);
+
+ if (csv->quo_len)
+ (void)fprintf (stderr,
+ "# len: %d, siz: %d, usd: %d, c: %02x, *quo: %02x\n",
+ csv->quo_len, csv->size, csv->used, c, CH_QUOTE);
+ return b;
+ } /* _is_QUOTEX */
+#define is_QUOTE(c) _is_QUOTEX (c, csv, __LINE__)
+#else
+#define is_QUOTE(c) __is_QUOTEX (c)
+#endif
#define require_IO_Handle \
unless (io_handle_loaded) {\
@@ -215,8 +286,8 @@ static SV *m_getline, *m_print, *m_read;
}
#define is_whitespace(ch) \
- ( (ch) != csv->sep_char && \
- (ch) != csv->quote_char && \
+ ( (ch) != CH_SEP && \
+ (ch) != CH_QUOTE && \
(ch) != csv->escape_char && \
( (ch) == CH_SPACE || \
(ch) == CH_TAB \
@@ -244,14 +315,16 @@ static SV *cx_SetDiag (pTHX_ csv_t *csv, int xse)
dSP;
SV *err = SvDiag (xse);
- if (err)
+ last_error = xse;
(void)hv_store (csv->self, "_ERROR_DIAG", 11, err, 0);
if (xse == 0) {
(void)hv_store (csv->self, "_ERROR_POS", 10, newSViv (0), 0);
(void)hv_store (csv->self, "_ERROR_INPUT", 12, &PL_sv_undef, 0);
csv->has_error_input = 0;
}
- if (err && csv->pself && csv->auto_diag) {
+ if (xse == 2012) /* EOF */
+ (void)hv_store (csv->self, "_EOF", 4, &PL_sv_yes, 0);
+ if (csv->pself && csv->auto_diag) {
ENTER;
SAVETMPS;
PUSHMARK (SP);
@@ -267,65 +340,98 @@ static SV *cx_SetDiag (pTHX_ csv_t *csv, int xse)
#define xs_cache_set(hv,idx,val) cx_xs_cache_set (aTHX_ hv, idx, val)
static void cx_xs_cache_set (pTHX_ HV *hv, int idx, SV *val)
{
- SV **svp;
- byte *cp;
+ SV **svp;
+ byte *cache;
+
+ csv_t csvs;
+ csv_t *csv = &csvs;
+
+ IV iv;
+ char *cp = "\0";
+ STRLEN len = 0;
unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp)
return;
- cp = (byte *)SvPV_nolen (*svp);
+ cache = (byte *)SvPV_nolen (*svp);
+ memcpy (csv, cache, sizeof (csv_t));
- /* single char/byte */
- if ( idx == CACHE_ID_quote_char ||
- idx == CACHE_ID_escape_char ||
- idx == CACHE_ID_sep_char) {
- cp[idx] = SvPOK (val) ? *(SvPVX (val)) : 0;
- return;
- }
+ if (SvPOK (val))
+ cp = SvPV (val, len);
+ if (SvIOK (val))
+ iv = SvIV (val);
+ else if (SvNOK (val)) /* Needed for 5.6.x but safe for 5.8.x+ */
+ iv = (IV)SvNV (val);
+ else
+ iv = *cp;
- /* boolean/numeric */
- if ( idx == CACHE_ID_binary ||
- idx == CACHE_ID_keep_meta_info ||
- idx == CACHE_ID_always_quote ||
- idx == CACHE_ID_quote_space ||
- idx == CACHE_ID_quote_null ||
- idx == CACHE_ID_quote_binary ||
- idx == CACHE_ID_decode_utf8 ||
- idx == CACHE_ID_allow_loose_escapes ||
- idx == CACHE_ID_allow_loose_quotes ||
- idx == CACHE_ID_allow_unquoted_escape ||
- idx == CACHE_ID_allow_whitespace ||
- idx == CACHE_ID_blank_is_undef ||
- idx == CACHE_ID_empty_is_undef ||
- idx == CACHE_ID_verbatim ||
- idx == CACHE_ID_auto_diag ||
- idx == CACHE_ID_diag_verbose ||
- idx == CACHE_ID_has_error_input) {
- cp[idx] = (byte)SvIV (val);
- return;
- }
+ switch (idx) {
- /* a 4-byte IV */
- if (idx == CACHE_ID__is_bound) {
- long v = SvIV (val);
+ /* single char/byte */
+ case CACHE_ID_sep_char:
+ CH_SEP = *cp;
+ csv->sep_len = 0;
+ break;
- cp[idx ] = (v & 0xFF000000) >> 24;
- cp[idx + 1] = (v & 0x00FF0000) >> 16;
- cp[idx + 2] = (v & 0x0000FF00) >> 8;
- cp[idx + 3] = (v & 0x000000FF);
- return;
- }
+ case CACHE_ID_quote_char:
+ CH_QUOTE = *cp;
+ csv->quo_len = 0;
+ break;
- if (idx == CACHE_ID_eol) {
- STRLEN len = 0;
- char *eol = SvPOK (val) ? SvPV (val, len) : "";
+ case CACHE_ID_escape_char: csv->escape_char = *cp; break;
+
+ /* boolean/numeric */
+ case CACHE_ID_binary: csv->binary = iv; break;
+ case CACHE_ID_keep_meta_info: csv->keep_meta_info = iv; break;
+ case CACHE_ID_always_quote: csv->always_quote = iv; break;
+ case CACHE_ID_quote_space: csv->quote_space = iv; break;
+ case CACHE_ID_quote_null: csv->quote_null = iv; break;
+ case CACHE_ID_quote_binary: csv->quote_binary = iv; break;
+ case CACHE_ID_decode_utf8: csv->decode_utf8 = iv; break;
+ case CACHE_ID_allow_loose_escapes: csv->allow_loose_escapes = iv; break;
+ case CACHE_ID_allow_loose_quotes: csv->allow_loose_quotes = iv; break;
+ case CACHE_ID_allow_unquoted_escape: csv->allow_unquoted_escape = iv; break;
+ case CACHE_ID_allow_whitespace: csv->allow_whitespace = iv; break;
+ case CACHE_ID_blank_is_undef: csv->blank_is_undef = iv; break;
+ case CACHE_ID_empty_is_undef: csv->empty_is_undef = iv; break;
+ case CACHE_ID_verbatim: csv->verbatim = iv; break;
+ case CACHE_ID_auto_diag: csv->auto_diag = iv; break;
+ case CACHE_ID_diag_verbose: csv->diag_verbose = iv; break;
+ case CACHE_ID__has_hooks: csv->has_hooks = iv; break;
+ case CACHE_ID_has_error_input: csv->has_error_input = iv; break;
+
+ /* a 4-byte IV */
+ case CACHE_ID__is_bound: csv->is_bound = iv; break;
+
+ /* string */
+ case CACHE_ID_sep:
+ if (len < MAX_SEP_LEN) {
+ memcpy (csv->sep, cp, len);
+ csv->sep_len = len == 1 ? 0 : len;
+ }
+ break;
+
+ case CACHE_ID_quo:
+ if (len < MAX_QUO_LEN) {
+ memcpy (csv->quo, cp, len);
+ csv->quo_len = len == 1 ? 0 : len;
+ }
+ break;
+
+ case CACHE_ID_eol:
+ if (len < MAX_EOL_LEN) {
+ memcpy (csv->eol, cp, len);
+ csv->eol_len = len;
+ csv->eol_is_cr = len == 1 && *cp == CH_CR ? 1 : 0;
+ }
+ break;
- memset (cp + CACHE_ID_eol, 0, 8);
- cp[CACHE_ID_eol_len] = len;
- cp[CACHE_ID_eol_is_cr] = len == 1 && *eol == CH_CR ? 1 : 0;
- if (len > 0 && len < 8)
- memcpy (cp + CACHE_ID_eol, eol, len);
+ default:
+ warn ("Unknown cache index %d ignored\n", idx);
}
+
+ csv->cache = cache;
+ memcpy (cache, csv, sizeof (csv_t));
} /* cache_set */
#define _pretty_str(csv,xse) cx_pretty_str (aTHX_ csv, xse)
@@ -336,79 +442,72 @@ static char *cx_pretty_str (pTHX_ byte *s, STRLEN l)
(PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_UNI_DETECT)));
} /* _pretty_str */
-#define _cache_show_byte(trim,idx) \
- c = cp[idx]; warn (" %-21s %02x:%3d\n", trim, c, c)
-#define _cache_show_char(trim,idx) \
- c = cp[idx]; warn (" %-21s %02x:%s\n", trim, c, _pretty_str (&c, 1))
+#define _cache_show_byte(trim,c) \
+ warn (" %-21s %02x:%3d\n", trim, c, c)
+#define _cache_show_char(trim,c) \
+ warn (" %-21s %02x:%s\n", trim, c, _pretty_str (&c, 1))
#define _cache_show_str(trim,l,str) \
warn (" %-21s %02d:%s\n", trim, l, _pretty_str (str, l))
-#define _cache_show_cstr(trim,l,idx) _cache_show_str (trim, l, cp + idx)
#define xs_cache_diag(hv) cx_xs_cache_diag (aTHX_ hv)
static void cx_xs_cache_diag (pTHX_ HV *hv)
{
- SV **svp;
- byte *cp, c;
+ SV **svp;
+ byte *cache;
+ csv_t csvs;
+ csv_t *csv = &csvs;
unless ((svp = hv_fetchs (hv, "_CACHE", FALSE)) && *svp) {
warn ("CACHE: invalid\n");
return;
}
- cp = (byte *)SvPV_nolen (*svp);
+ cache = (byte *)SvPV_nolen (*svp);
+ memcpy (csv, cache, sizeof (csv_t));
warn ("CACHE:\n");
- _cache_show_char ("quote", CACHE_ID_quote_char);
- _cache_show_char ("escape", CACHE_ID_escape_char);
- _cache_show_char ("sep", CACHE_ID_sep_char);
- _cache_show_byte ("binary", CACHE_ID_binary);
- _cache_show_byte ("decode_utf8", CACHE_ID_decode_utf8);
-
- _cache_show_byte ("allow_loose_escapes", CACHE_ID_allow_loose_escapes);
- _cache_show_byte ("allow_loose_quotes", CACHE_ID_allow_loose_quotes);
- _cache_show_byte ("allow_unquoted_escape", CACHE_ID_allow_unquoted_escape);
- _cache_show_byte ("allow_whitespace", CACHE_ID_allow_whitespace);
- _cache_show_byte ("always_quote", CACHE_ID_always_quote);
- _cache_show_byte ("quote_space", CACHE_ID_quote_space);
- _cache_show_byte ("quote_null", CACHE_ID_quote_null);
- _cache_show_byte ("quote_binary", CACHE_ID_quote_binary);
- _cache_show_byte ("auto_diag", CACHE_ID_auto_diag);
- _cache_show_byte ("diag_verbose", CACHE_ID_diag_verbose);
- _cache_show_byte ("has_error_input", CACHE_ID_has_error_input);
- _cache_show_byte ("blank_is_undef", CACHE_ID_blank_is_undef);
- _cache_show_byte ("empty_is_undef", CACHE_ID_empty_is_undef);
- _cache_show_byte ("has_ahead", CACHE_ID__has_ahead);
- _cache_show_byte ("has_types", CACHE_ID_has_types);
- _cache_show_byte ("keep_meta_info", CACHE_ID_keep_meta_info);
- _cache_show_byte ("verbatim", CACHE_ID_verbatim);
-
- _cache_show_byte ("eol_is_cr", CACHE_ID_eol_is_cr);
- _cache_show_byte ("eol_len", CACHE_ID_eol_len);
- if (c < 8)
- _cache_show_cstr ("eol", c, CACHE_ID_eol);
- else if ((svp = hv_fetchs (hv, "eol", FALSE)) && *svp && SvOK (*svp)) {
- STRLEN len;
- byte *eol = (byte *)SvPV (*svp, len);
- _cache_show_str ("eol", (int)len, eol);
- }
- else
- _cache_show_str ("eol", 8, (byte *)"<broken>");
-
- /* csv->is_bound =
- (csv->cache[CACHE_ID__is_bound ] << 24) |
- (csv->cache[CACHE_ID__is_bound + 1] << 16) |
- (csv->cache[CACHE_ID__is_bound + 2] << 8) |
- (csv->cache[CACHE_ID__is_bound + 3]);
- */
+ _cache_show_char ("quote_char", CH_QUOTE);
+ _cache_show_char ("escape_char", csv->escape_char);
+ _cache_show_char ("sep_char", CH_SEP);
+ _cache_show_byte ("binary", csv->binary);
+ _cache_show_byte ("decode_utf8", csv->decode_utf8);
+
+ _cache_show_byte ("allow_loose_escapes", csv->allow_loose_escapes);
+ _cache_show_byte ("allow_loose_quotes", csv->allow_loose_quotes);
+ _cache_show_byte ("allow_unquoted_escape", csv->allow_unquoted_escape);
+ _cache_show_byte ("allow_whitespace", csv->allow_whitespace);
+ _cache_show_byte ("always_quote", csv->always_quote);
+ _cache_show_byte ("quote_space", csv->quote_space);
+ _cache_show_byte ("quote_null", csv->quote_null);
+ _cache_show_byte ("quote_binary", csv->quote_binary);
+ _cache_show_byte ("auto_diag", csv->auto_diag);
+ _cache_show_byte ("diag_verbose", csv->diag_verbose);
+ _cache_show_byte ("has_error_input", csv->has_error_input);
+ _cache_show_byte ("blank_is_undef", csv->blank_is_undef);
+ _cache_show_byte ("empty_is_undef", csv->empty_is_undef);
+ _cache_show_byte ("has_ahead", csv->has_ahead);
+ _cache_show_byte ("keep_meta_info", csv->keep_meta_info);
+ _cache_show_byte ("verbatim", csv->verbatim);
+
+ _cache_show_byte ("has_hooks", csv->has_hooks);
+ _cache_show_byte ("eol_is_cr", csv->eol_is_cr);
+ _cache_show_byte ("eol_len", csv->eol_len);
+ _cache_show_str ("eol", csv->eol_len, csv->eol);
+ _cache_show_byte ("sep_len", csv->sep_len);
+ if (csv->sep_len > 1)
+ _cache_show_str ("sep", csv->sep_len, csv->sep);
+ _cache_show_byte ("quo_len", csv->quo_len);
+ if (csv->quo_len > 1)
+ _cache_show_str ("quote", csv->quo_len, csv->quo);
} /* xs_cache_diag */
#define set_eol_is_cr(csv) cx_set_eol_is_cr (aTHX_ csv)
static void cx_set_eol_is_cr (pTHX_ csv_t *csv)
{
- csv->cache[CACHE_ID_eol ] = CH_CR;
- csv->cache[CACHE_ID_eol + 1] = 0;
- csv->eol_is_cr = csv->cache[CACHE_ID_eol_is_cr] = 1;
- csv->eol_len = csv->cache[CACHE_ID_eol_len] = 1;
- csv->eol = &csv->cache[CACHE_ID_eol];
+ csv->eol[0] = CH_CR;
+ csv->eol_is_cr = 1;
+ csv->eol_len = 1;
+ memcpy (csv->cache, csv, sizeof (csv_t));
+
(void)hv_store (csv->self, "eol", 3, newSVpvn ((char *)csv->eol, 1), 0);
} /* set_eol_is_cr */
@@ -419,78 +518,52 @@ static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself)
STRLEN len;
char *ptr;
- csv->self = self;
- csv->pself = pself;
+ last_error = 0;
if ((svp = hv_fetchs (self, "_CACHE", FALSE)) && *svp) {
- csv->cache = (byte *)SvPVX (*svp);
-
- csv->quote_char = csv->cache[CACHE_ID_quote_char ];
- csv->escape_char = csv->cache[CACHE_ID_escape_char ];
- csv->sep_char = csv->cache[CACHE_ID_sep_char ];
- csv->binary = csv->cache[CACHE_ID_binary ];
- csv->decode_utf8 = csv->cache[CACHE_ID_decode_utf8 ];
-
- csv->keep_meta_info = csv->cache[CACHE_ID_keep_meta_info ];
- csv->always_quote = csv->cache[CACHE_ID_always_quote ];
- csv->auto_diag = csv->cache[CACHE_ID_auto_diag ];
- csv->diag_verbose = csv->cache[CACHE_ID_diag_verbose ];
- csv->has_error_input = csv->cache[CACHE_ID_has_error_input ];
- csv->quote_space = csv->cache[CACHE_ID_quote_space ];
- csv->quote_null = csv->cache[CACHE_ID_quote_null ];
- csv->quote_binary = csv->cache[CACHE_ID_quote_binary ];
-
- csv->allow_loose_quotes = csv->cache[CACHE_ID_allow_loose_quotes];
- csv->allow_loose_escapes = csv->cache[CACHE_ID_allow_loose_escapes];
- csv->allow_unquoted_escape = csv->cache[CACHE_ID_allow_unquoted_escape];
- csv->allow_whitespace = csv->cache[CACHE_ID_allow_whitespace ];
- csv->blank_is_undef = csv->cache[CACHE_ID_blank_is_undef ];
- csv->empty_is_undef = csv->cache[CACHE_ID_empty_is_undef ];
- csv->verbatim = csv->cache[CACHE_ID_verbatim ];
- csv->has_ahead = csv->cache[CACHE_ID__has_ahead ];
- csv->eol_is_cr = csv->cache[CACHE_ID_eol_is_cr ];
- csv->eol_len = csv->cache[CACHE_ID_eol_len ];
- if (csv->eol_len < 8)
- csv->eol = &csv->cache[CACHE_ID_eol];
- else {
- /* Was too long to cache. must re-fetch */
- csv->eol = NULL;
- csv->eol_is_cr = 0;
- csv->eol_len = 0;
- if ((svp = hv_fetchs (self, "eol", FALSE)) && *svp && SvOK (*svp)) {
- csv->eol = (byte *)SvPV (*svp, len);
- csv->eol_len = len;
- }
- }
- csv->is_bound =
- (csv->cache[CACHE_ID__is_bound ] << 24) |
- (csv->cache[CACHE_ID__is_bound + 1] << 16) |
- (csv->cache[CACHE_ID__is_bound + 2] << 8) |
- (csv->cache[CACHE_ID__is_bound + 3]);
-
- csv->types = NULL;
- if (csv->cache[CACHE_ID_has_types]) {
- if ((svp = hv_fetchs (self, "_types", FALSE)) && *svp && SvOK (*svp)) {
- csv->types = SvPV (*svp, len);
- csv->types_len = len;
- }
- }
+ byte *cache = (byte *)SvPVX (*svp);
+ memcpy (csv, cache, sizeof (csv_t));
}
else {
SV *sv_cache;
- csv->quote_char = '"';
- if ((svp = hv_fetchs (self, "quote_char", FALSE)) && *svp) {
+ memset (csv, 0, sizeof (csv_t)); /* Reset everything */
+
+ csv->self = self;
+ csv->pself = pself;
+
+ CH_SEP = ',';
+ if ((svp = hv_fetchs (self, "sep_char", FALSE)) && *svp && SvOK (*svp))
+ CH_SEP = *SvPV (*svp, len);
+ if ((svp = hv_fetchs (self, "sep", FALSE)) && *svp && SvOK (*svp)) {
+ ptr = SvPV (*svp, len);
+ if (len < MAX_SEP_LEN) {
+ memcpy (csv->sep, ptr, len);
+ if (len > 1)
+ csv->sep_len = len;
+ }
+ }
+
+ CH_QUOTE = '"';
+ if ((svp = hv_fetchs (self, "quote_char", FALSE)) && *svp) {
if (SvOK (*svp)) {
ptr = SvPV (*svp, len);
- csv->quote_char = len ? *ptr : (char)0;
+ CH_QUOTE = len ? *ptr : (char)0;
}
else
- csv->quote_char = (char)0;
+ CH_QUOTE = (char)0;
+ }
+ if ((svp = hv_fetchs (self, "quote", FALSE)) && *svp && SvOK (*svp)) {
+ ptr = SvPV (*svp, len);
+ if (len < MAX_QUO_LEN) {
+ memcpy (csv->quo, ptr, len);
+ if (len > 1)
+ csv->quo_len = len;
+ }
}
csv->escape_char = '"';
- if ((svp = hv_fetchs (self, "escape_char", FALSE)) && *svp) {
+ if ((svp = hv_fetchs (self, "escape_char", FALSE)) && *svp) {
if (SvOK (*svp)) {
ptr = SvPV (*svp, len);
csv->escape_char = len ? *ptr : (char)0;
@@ -498,32 +571,31 @@ static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself)
else
csv->escape_char = (char)0;
}
- csv->sep_char = ',';
- if ((svp = hv_fetchs (self, "sep_char", FALSE)) && *svp && SvOK (*svp)) {
- ptr = SvPV (*svp, len);
- if (len)
- csv->sep_char = *ptr;
- }
- csv->eol = (byte *)"";
- csv->eol_is_cr = 0;
- csv->eol_len = 0;
- if ((svp = hv_fetchs (self, "eol", FALSE)) && *svp && SvOK (*svp)) {
- csv->eol = (byte *)SvPV (*svp, len);
- csv->eol_len = len;
- if (len == 1 && *csv->eol == CH_CR)
- csv->eol_is_cr = 1;
+ if ((svp = hv_fetchs (self, "eol", FALSE)) && *svp && SvOK (*svp)) {
+ char *eol = SvPV (*svp, len);
+ if (len < MAX_EOL_LEN) {
+ memcpy (csv->eol, eol, len);
+ csv->eol_len = len;
+ if (len == 1 && *csv->eol == CH_CR)
+ csv->eol_is_cr = 1;
+ }
}
- csv->types = NULL;
- if ((svp = hv_fetchs (self, "_types", FALSE)) && *svp && SvOK (*svp)) {
+ if ((svp = hv_fetchs (self, "_types", FALSE)) && *svp && SvOK (*svp)) {
csv->types = SvPV (*svp, len);
csv->types_len = len;
}
- csv->is_bound = 0;
- if ((svp = hv_fetchs (self, "_is_bound", FALSE)) && *svp && SvOK(*svp))
+ if ((svp = hv_fetchs (self, "_is_bound", FALSE)) && *svp && SvOK (*svp))
csv->is_bound = SvIV(*svp);
+ if ((svp = hv_fetchs (self, "callbacks", FALSE)) && _is_hashref (*svp)) {
+ HV *cb = (HV *)SvRV (*svp);
+ if ((svp = hv_fetchs (cb, "after_parse", FALSE)) && _is_coderef (*svp))
+ csv->has_hooks |= HOOK_AFTER_PARSE;
+ if ((svp = hv_fetchs (cb, "before_print", FALSE)) && _is_coderef (*svp))
+ csv->has_hooks |= HOOK_BEFORE_PRINT;
+ }
csv->binary = bool_opt ("binary");
csv->decode_utf8 = bool_opt ("decode_utf8");
@@ -542,43 +614,12 @@ static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself)
csv->auto_diag = num_opt ("auto_diag");
csv->diag_verbose = num_opt ("diag_verbose");
- csv->has_error_input = 0;
- sv_cache = newSVpvn (init_cache, CACHE_SIZE);
+ sv_cache = newSVpvn ((char *)csv, sizeof (csv_t));
csv->cache = (byte *)SvPVX (sv_cache);
SvREADONLY_on (sv_cache);
- csv->cache[CACHE_ID_quote_char] = csv->quote_char;
- csv->cache[CACHE_ID_escape_char] = csv->escape_char;
- csv->cache[CACHE_ID_sep_char] = csv->sep_char;
- csv->cache[CACHE_ID_binary] = csv->binary;
- csv->cache[CACHE_ID_decode_utf8] = csv->decode_utf8;
-
- csv->cache[CACHE_ID_keep_meta_info] = csv->keep_meta_info;
- csv->cache[CACHE_ID_always_quote] = csv->always_quote;
- csv->cache[CACHE_ID_quote_space] = csv->quote_space;
- csv->cache[CACHE_ID_quote_null] = csv->quote_null;
- csv->cache[CACHE_ID_quote_binary] = csv->quote_binary;
-
- csv->cache[CACHE_ID_allow_loose_quotes] = csv->allow_loose_quotes;
- csv->cache[CACHE_ID_allow_loose_escapes] = csv->allow_loose_escapes;
- csv->cache[CACHE_ID_allow_unquoted_escape] = csv->allow_unquoted_escape;
- csv->cache[CACHE_ID_allow_whitespace] = csv->allow_whitespace;
- csv->cache[CACHE_ID_blank_is_undef] = csv->blank_is_undef;
- csv->cache[CACHE_ID_empty_is_undef] = csv->empty_is_undef;
- csv->cache[CACHE_ID_verbatim] = csv->verbatim;
- csv->cache[CACHE_ID_auto_diag] = csv->auto_diag;
- csv->cache[CACHE_ID_diag_verbose] = csv->diag_verbose;
- csv->cache[CACHE_ID_eol_is_cr] = csv->eol_is_cr;
- csv->cache[CACHE_ID_eol_len] = csv->eol_len;
- if (csv->eol_len > 0 && csv->eol_len < 8 && csv->eol)
- memcpy ((char *)&csv->cache[CACHE_ID_eol], csv->eol, csv->eol_len);
- csv->cache[CACHE_ID_has_types] = csv->types ? 1 : 0;
- csv->cache[CACHE_ID__has_ahead] = csv->has_ahead = 0;
- csv->cache[CACHE_ID__is_bound ] = (csv->is_bound & 0xFF000000) >> 24;
- csv->cache[CACHE_ID__is_bound + 1] = (csv->is_bound & 0x00FF0000) >> 16;
- csv->cache[CACHE_ID__is_bound + 2] = (csv->is_bound & 0x0000FF00) >> 8;
- csv->cache[CACHE_ID__is_bound + 3] = (csv->is_bound & 0x000000FF);
+ memcpy (csv->cache, csv, sizeof (csv_t));
(void)hv_store (self, "_CACHE", 6, sv_cache, 0);
}
@@ -604,6 +645,10 @@ static void cx_SetupCsv (pTHX_ csv_t *csv, HV *self, SV *pself)
? 0
: 1
: 0;
+ if (csv->sep_len && is_utf8_string ((U8 *)(csv->sep), csv->sep_len))
+ csv->utf8 = 1;
+ if (csv->quo_len && is_utf8_string ((U8 *)(csv->quo), csv->quo_len))
+ csv->utf8 = 1;
} /* SetupCsv */
#define Print(csv,dst) cx_Print (aTHX_ csv, dst)
@@ -619,12 +664,10 @@ static int cx_Print (pTHX_ csv_t *csv, SV *dst)
PUSHMARK (sp);
EXTEND (sp, 2);
PUSHs ((dst));
- PUSHs (tmp);
- PUTBACK;
if (csv->utf8) {
STRLEN len;
char *ptr;
- int j, l;
+ int j;
ptr = SvPV (tmp, len);
while (len > 0 && !is_utf8_sv (tmp) && keep < 16) {
@@ -636,6 +679,8 @@ static int cx_Print (pTHX_ csv_t *csv, SV *dst)
csv->buffer[j] = csv->buffer[csv->used - keep + j];
SvUTF8_on (tmp);
}
+ PUSHs (tmp);
+ PUTBACK;
result = call_sv (m_print, G_SCALAR | G_METHOD);
SPAGAIN;
if (result) {
@@ -659,9 +704,9 @@ static int cx_Print (pTHX_ csv_t *csv, SV *dst)
#define CSV_PUT(csv,dst,c) { \
if ((csv)->used == sizeof ((csv)->buffer) - 1) { \
- unless (Print ((csv), (dst))) \
+ unless (Print ((csv), (dst))) \
return FALSE; \
- } \
+ } \
(csv)->buffer[(csv)->used++] = (c); \
}
@@ -704,11 +749,6 @@ static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields)
{
int i, n, bound = 0;
- if (csv->sep_char == csv->quote_char || csv->sep_char == csv->escape_char) {
- (void)SetDiag (csv, 1001);
- return FALSE;
- }
-
n = av_len (fields);
if (n < 0 && csv->is_bound) {
n = csv->is_bound - 1;
@@ -718,8 +758,14 @@ static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields)
for (i = 0; i <= n; i++) {
SV *sv;
- if (i > 0)
- CSV_PUT (csv, dst, csv->sep_char);
+ if (i > 0) {
+ CSV_PUT (csv, dst, CH_SEP);
+ if (csv->sep_len) {
+ int x;
+ for (x = 1; x < (int)csv->sep_len; x++)
+ CSV_PUT (csv, dst, csv->sep[x]);
+ }
+ }
if (bound)
sv = bound_field (csv, i, 1);
@@ -746,7 +792,7 @@ static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields)
* and if the string contains quote or escape characters.
*/
if (!quoteMe &&
- ( quoteMe = (!SvIOK (sv) && !SvNOK (sv) && csv->quote_char))) {
+ ( quoteMe = (!SvIOK (sv) && !SvNOK (sv) && CH_QUOTE))) {
char *ptr2;
STRLEN l;
@@ -755,8 +801,8 @@ static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields)
if (c < csv->first_safe_char ||
(csv->quote_binary && c >= 0x7f && c <= 0xa0) ||
- (csv->quote_char && c == csv->quote_char) ||
- (csv->sep_char && c == csv->sep_char) ||
+ (CH_QUOTE && c == CH_QUOTE) ||
+ (CH_SEP && c == CH_SEP) ||
(csv->escape_char && c == csv->escape_char)) {
/* Binary character */
break;
@@ -764,8 +810,14 @@ static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields)
}
quoteMe = (l > 0);
}
- if (quoteMe)
- CSV_PUT (csv, dst, csv->quote_char);
+ if (quoteMe) {
+ CSV_PUT (csv, dst, CH_QUOTE);
+ if (csv->quo_len) {
+ int x;
+ for (x = 1; x < (int)csv->quo_len; x++)
+ CSV_PUT (csv, dst, csv->quo[x]);
+ }
+ }
while (len-- > 0) {
char c = *ptr++;
int e = 0;
@@ -778,7 +830,8 @@ static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields)
(void)SetDiag (csv, 2110);
return FALSE;
}
- if (c == csv->quote_char && csv->quote_char)
+ if (CH_QUOTE && (byte)c == CH_QUOTE && (csv->quo_len == 0 ||
+ memcmp (ptr, csv->quo +1, csv->quo_len - 1) == 0))
e = 1;
else
if (c == csv->escape_char && csv->escape_char)
@@ -792,8 +845,14 @@ static int cx_Combine (pTHX_ csv_t *csv, SV *dst, AV *fields)
CSV_PUT (csv, dst, csv->escape_char);
CSV_PUT (csv, dst, c);
}
- if (quoteMe)
- CSV_PUT (csv, dst, csv->quote_char);
+ if (quoteMe) {
+ CSV_PUT (csv, dst, CH_QUOTE);
+ if (csv->quo_len) {
+ int x;
+ for (x = 1; x < (int)csv->quo_len; x++)
+ CSV_PUT (csv, dst, csv->quo[x]);
+ }
+ }
}
}
if (csv->eol_len) {
@@ -901,11 +960,19 @@ static int cx_CsvGet (pTHX_ csv_t *csv, SV *src)
#if MAINT_DEBUG > 4
#define PUT_RPT fprintf (stderr, "# CSV_PUT @ %4d: 0x%02x '%c'\n", __LINE__, c, isprint (c) ? c : '?')
+#define PUT_SEPX_RPT1 fprintf (stderr, "# PUT SEPX @ %4d\n", __LINE__)
+#define PUT_SEPX_RPT2 fprintf (stderr, "# Done putting SEPX\n")
+#define PUT_QUOX_RPT1 fprintf (stderr, "# PUT QUOX @ %4d\n", __LINE__)
+#define PUT_QUOX_RPT2 fprintf (stderr, "# Done putting QUOX\n")
#define PUT_EOLX_RPT1 fprintf (stderr, "# PUT EOLX @ %4d\n", __LINE__)
#define PUT_EOLX_RPT2 fprintf (stderr, "# Done putting EOLX\n")
#define PUSH_RPT fprintf (stderr, "# AV_PUSHd @ %4d\n", __LINE__); sv_dump (sv)
#else
#define PUT_RPT
+#define PUT_SEPX_RPT1
+#define PUT_SEPX_RPT2
+#define PUT_QUOX_RPT1
+#define PUT_QUOX_RPT2
#define PUT_EOLX_RPT1
#define PUT_EOLX_RPT2
#define PUSH_RPT
@@ -927,6 +994,18 @@ static int cx_CsvGet (pTHX_ csv_t *csv, SV *src)
csv->eol_pos = -1; \
PUT_EOLX_RPT2; \
} \
+ else if (c == CH_SEPX) { \
+ int x; PUT_SEPX_RPT1; \
+ for (x = 0; x < (int)csv->sep_len; x++) \
+ CSV_PUT_SV1 (csv->sep[x]); \
+ PUT_SEPX_RPT2; \
+ } \
+ else if (c == CH_QUOTEX) { \
+ int x; PUT_QUOX_RPT1; \
+ for (x = 0; x < (int)csv->quo_len; x++) \
+ CSV_PUT_SV1 (csv->quo[x]); \
+ PUT_QUOX_RPT2; \
+ } \
else \
CSV_PUT_SV1 (c); \
}
@@ -978,9 +1057,8 @@ static void cx_strip_trail_whitespace (pTHX_ SV *sv)
STRLEN len;
char *s = SvPV (sv, len);
unless (s && len) return;
- while (s[len - 1] == CH_SPACE || s[len - 1] == CH_TAB) {
+ while (s[len - 1] == CH_SPACE || s[len - 1] == CH_TAB)
s[--len] = (char)0;
- }
SvCUR_set (sv, len);
} /* strip_trail_whitespace */
@@ -998,6 +1076,21 @@ static void cx_strip_trail_whitespace (pTHX_ SV *sv)
static char str_parsed[40];
#endif
+#if MAINT_DEBUG > 1
+static char *_sep_string (csv_t *csv)
+{
+ char sep[64];
+ if (csv->sep_len) {
+ int x;
+ for (x = 0; x < csv->sep_len; x++)
+ sprintf (sep + x * x, "%02x ", csv->sep[x]);
+ }
+ else
+ sprintf (sep, "'%c' (0x%02x)", CH_SEP, CH_SEP);
+ return sep;
+ } /* _sep_string */
+#endif
+
#define Parse(csv,src,fields,fflags) cx_Parse (aTHX_ csv, src, fields, fflags)
static int cx_Parse (pTHX_ csv_t *csv, SV *src, AV *fields, AV *fflags)
{
@@ -1012,11 +1105,6 @@ static int cx_Parse (pTHX_ csv_t *csv, SV *src, AV *fields, AV *fflags)
memset (str_parsed, 0, 40);
#endif
- if (csv->sep_char == csv->quote_char || csv->sep_char == csv->escape_char) {
- (void)SetDiag (csv, 1001);
- return FALSE;
- }
-
while ((c = CSV_GET) != EOF) {
NewField;
@@ -1027,10 +1115,10 @@ static int cx_Parse (pTHX_ csv_t *csv, SV *src, AV *fields, AV *fflags)
if (spl < 39) str_parsed[spl] = c;
#endif
restart:
- if (c == csv->sep_char) {
+ if (is_SEP (c)) {
#if MAINT_DEBUG > 1
- fprintf (stderr, "# %d/%d/%02x pos %d = SEP '%c'\n",
- waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
+ fprintf (stderr, "# %d/%d/%02x pos %d = SEP %s\n",
+ waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, _sep_string (csv));
#endif
if (waitingForField) {
if (csv->blank_is_undef || csv->empty_is_undef)
@@ -1161,7 +1249,7 @@ restart:
}
} /* CH_CR */
else
- if (c == csv->quote_char && csv->quote_char) {
+ if (is_QUOTE (c)) {
#if MAINT_DEBUG > 1
fprintf (stderr, "# %d/%d/%02x pos %d = QUO '%c'\n",
waitingForField ? 1 : 0, sv ? 1 : 0, f, spl, c);
@@ -1183,7 +1271,7 @@ restart:
c2 = CSV_GET;
}
- if (c2 == csv->sep_char) {
+ if (is_SEP (c2)) {
AV_PUSH;
continue;
}
@@ -1238,17 +1326,20 @@ restart:
return TRUE;
}
- if (c2 == csv->sep_char) {
+ if (is_SEP (c2)) {
AV_PUSH;
}
else
if (c2 == '0')
CSV_PUT_SV (0)
else
- if (c2 == csv->quote_char || c2 == csv->sep_char)
+ if (is_QUOTE (c2)) {
+ if (csv->utf8)
+ f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
+ }
else
- if (c2 == CH_NL || c2 == CH_EOLX) {
+ if (c2 == CH_NL || c2 == CH_EOLX) {
AV_PUSH;
return TRUE;
}
@@ -1278,7 +1369,7 @@ restart:
}
}
- if (csv->allow_loose_escapes && csv->escape_char == csv->quote_char) {
+ if (csv->allow_loose_escapes && csv->escape_char == CH_QUOTE) {
CSV_PUT_SV (c);
c = c2;
goto restart;
@@ -1322,9 +1413,12 @@ restart:
if (c2 == '0')
CSV_PUT_SV (0)
else
- if ( c2 == csv->quote_char || c2 == csv->sep_char ||
- c2 == csv->escape_char || csv->allow_loose_escapes)
+ if ( is_QUOTE (c2) || is_SEP (c2) ||
+ c2 == csv->escape_char || csv->allow_loose_escapes) {
+ if (csv->utf8)
+ f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
+ }
else {
csv->used--;
ERROR_INSIDE_QUOTES (2025);
@@ -1343,9 +1437,12 @@ restart:
if (c2 == '0')
CSV_PUT_SV (0)
else
- if ( c2 == csv->quote_char || c2 == csv->sep_char ||
- c2 == csv->escape_char || csv->allow_loose_escapes)
+ if ( is_QUOTE (c2) || is_SEP (c2) ||
+ c2 == csv->escape_char || csv->allow_loose_escapes) {
+ if (csv->utf8)
+ f |= CSV_FLAGS_BIN;
CSV_PUT_SV (c2)
+ }
else {
csv->used--;
ERROR_INSIDE_QUOTES (2025);
@@ -1447,7 +1544,6 @@ static int cx_c_xsParse (pTHX_ csv_t csv, HV *hv, AV *av, AV *avf, SV *src, bool
}
if ((csv.useIO = useIO)) {
- dSP;
require_IO_Handle;
csv.tmp = NULL;
@@ -1486,7 +1582,8 @@ static int cx_c_xsParse (pTHX_ csv_t csv, HV *hv, AV *av, AV *avf, SV *src, bool
if (csv.useIO & useIO_EOF)
(void)hv_store (hv, "_EOF", 4, &PL_sv_yes, 0);
}
- csv.cache[CACHE_ID__has_ahead] = csv.has_ahead;
+ /* csv.cache[CACHE_ID__has_ahead] = csv.has_ahead; */
+ memcpy (csv.cache, &csv, sizeof (csv_t));
if (avf) {
if (csv.keep_meta_info) {
@@ -1526,12 +1623,45 @@ static int cx_c_xsParse (pTHX_ csv_t csv, HV *hv, AV *av, AV *avf, SV *src, bool
return result;
} /* c_xsParse */
+static void hook (pTHX_ HV *hv, char *cb_name, AV *av)
+{
+ SV **svp;
+ HV *cb;
+
+#if MAINT_DEBUG > 1
+ fprintf (stderr, "# HOOK %s %x\n", cb_name, av);
+#endif
+ unless ((svp = hv_fetchs (hv, "callbacks", FALSE)) && _is_hashref (*svp))
+ return;
+
+ cb = (HV *)SvRV (*svp);
+ svp = hv_fetch (cb, cb_name, strlen (cb_name), FALSE);
+ unless (svp && _is_coderef (*svp))
+ return;
+
+ { dSP;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK (SP);
+ XPUSHs (newRV_noinc ((SV *)hv));
+ XPUSHs (newRV_noinc ((SV *)av));
+ PUTBACK;
+ call_sv (*svp, G_VOID | G_DISCARD);
+ FREETMPS;
+ LEAVE;
+ }
+ } /* hook */
+
#define xsParse(self,hv,av,avf,src,useIO) cx_xsParse (aTHX_ self, hv, av, avf, src, useIO)
static int cx_xsParse (pTHX_ SV *self, HV *hv, AV *av, AV *avf, SV *src, bool useIO)
{
csv_t csv;
+ int state;
SetupCsv (&csv, hv, self);
- return (c_xsParse (csv, hv, av, avf, src, useIO));
+ state = c_xsParse (csv, hv, av, avf, src, useIO);
+ if (state && csv.has_hooks & HOOK_AFTER_PARSE)
+ hook (aTHX_ hv, "after_parse", av);
+ return (state || !last_error);
} /* xsParse */
#define av_empty(av) cx_av_empty (aTHX_ av)
@@ -1541,13 +1671,6 @@ static void cx_av_empty (pTHX_ AV *av)
sv_free (av_pop (av));
} /* av_empty */
-#define av_free(av) cx_av_free (aTHX_ av)
-static void cx_av_free (pTHX_ AV *av)
-{
- av_empty (av);
- sv_free ((SV *)av);
- } /* av_free */
-
#define xsParse_all(self,hv,io,off,len) cx_xsParse_all (aTHX_ self, hv, io, off, len)
static SV *cx_xsParse_all (pTHX_ SV *self, HV *hv, SV *io, SV *off, SV *len)
{
@@ -1584,6 +1707,8 @@ static SV *cx_xsParse_all (pTHX_ SV *self, HV *hv, SV *io, SV *off, SV *len)
n--;
}
+ if (csv.has_hooks & HOOK_AFTER_PARSE)
+ hook (aTHX_ hv, "after_parse", row);
av_push (avr, newRV_noinc ((SV *)row));
if (n >= length && skip >= 0)
@@ -1611,9 +1736,11 @@ static int cx_xsCombine (pTHX_ SV *self, HV *hv, AV *av, SV *io, bool useIO)
SetupCsv (&csv, hv, self);
csv.useIO = useIO;
#if (PERL_BCDVERSION >= 0x5008000)
- if (csv.eol && *csv.eol)
+ if (*csv.eol)
PL_ors_sv = NULL;
#endif
+ if (useIO && csv.has_hooks & HOOK_BEFORE_PRINT)
+ hook (aTHX_ hv, "before_print", av);
result = Combine (&csv, io, av);
#if (PERL_BCDVERSION >= 0x5008000)
PL_ors_sv = ors;
@@ -1662,8 +1789,6 @@ error_input (self)
SV *self
PPCODE:
- HV *hv;
-
if (self && SvOK (self) && SvROK (self) && SvTYPE (SvRV (self)) == SVt_PVHV) {
HV *hv = (HV *)SvRV (self);
SV **sv = hv_fetchs (hv, "_ERROR_INPUT", FALSE);
@@ -1753,7 +1878,7 @@ getline (self, io)
av = newAV ();
avf = newAV ();
ST (0) = xsParse (self, hv, av, avf, io, 1)
- ? sv_2mortal (newRV_noinc ((SV *)av))
+ ? sv_2mortal (newRV_noinc ((SV *)av))
: &PL_sv_undef;
XSRETURN (1);
/* XS getline */
@@ -1,3 +1,51 @@
+1.11 - 2014-08-08, H.Merijn Brand
+ - Fixed eof (RT#97742)
+ - Check for attribute conflicts early
+ - csv (in => [..]) now defaults to *STDOUT for out
+ - Support for multi-byte quote_char
+ - New attribute "key" for csv ()
+
+1.10 - 2014-08-04, H.Merijn Brand
+ * Support for scalar ref in out: csv (out => \(my $x = ""), ...)
+ * Support for multi-byte sep_char
+ * Simplified the cache coding
+
+1.09 - 2014-06-09, H.Merijn Brand
+ * Missed defined-or need in csv2xls (RT#95787)
+
+1.08 - 2014-05-17, H.Merijn Brand
+ * Documentation updates
+ * Allow disjointed CSV cell fragment lists
+
+1.07 - 2014-04-28, H.Merijn Brand
+ * Allow ref to scalar for csv (in => ...)
+ * Allow CODE ref to in attribute
+ * Allow * in fragment cellspec's bottom-right cell
+
+1.06 - 2014-04-20, H.Merijn Brand
+ * Fix possible fail in tests on Windows (Thanks Mithaldu for explaing)
+ * Only close file handles in csv () for files
+ * new callbacks for csv ()
+
+1.05 - 2014-03-02, H.Merijn Brand
+ * Allow case insensitive attributes and attribute aliases
+ (quote_always = always_quote)
+ * Enhanced the csv () function (diagnostics)
+ * Start callbacks support
+ * Minor doc fixes
+ * Make subclassing safer
+
+1.04 - 2014-02-06, H.Merijn Brand
+ * column_names () with no argument now returns the empty list
+ instead of undef when no columns defined
+ * fragments (rfc7111) now also support AoH (was AoA only)
+ * Error code conflict for fragments resolved to 2013
+ * New function "csv" (not a method)
+
+1.03 - 2014-01-21, H.Merijn Brand
+ * Update copyright to 2014
+ * Implement RFC7111
+
1.02 - 2013-09-25, H.Merijn Brand
* Add example for reading only a single column
* Don't store NULL in _ERROR_INPUT (RT#86217/Clone)
@@ -26,14 +26,19 @@ t/65_allow.t Allow bad formats
t/70_rt.t Tests based on RT reports
t/75_hashref.t getline_hr related tests
t/76_magic.t array_ref from magic
-t/77_getall.t gat all rows at once
+t/77_getall.t Get all rows at once
+t/78_fragment.t Get fragments according to RFC7111 specs
+t/79_callbacks.t Test callback features
t/80_diag.t Error diagnostics
t/81_subclass.t Subclassed
+t/90_csv.t Function csv () checks
+t/91_csv_cb.t Callbacks with csv function
t/util.pl Extra test utilities
examples/csv2xls Script to onvert CSV files to M$Excel
examples/csv-check Script to check a CSV file/stream
examples/csvdiff Script to shoff diff between sorted CSV files
examples/parser-xs.pl Parse CSV stream, be forgiving on bad lines
+examples/rewrite.pl Simple CSV rewriter
examples/speed.pl Small benchmark script
files/macosx.csv A CSV files exported on MacOSX
files/utf8.csv A UTF-8 encode test file
@@ -1,18 +1,26 @@
{
- "release_status" : "stable",
+ "license" : [
+ "perl_5"
+ ],
+ "dynamic_config" : 1,
"meta-spec" : {
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
"version" : "2"
},
- "license" : [
- "perl_5"
- ],
"name" : "Text-CSV_XS",
+ "abstract" : "Comma-Separated Values manipulation routines",
+ "author" : [
+ "H.Merijn Brand <h.m.brand@xs4all.nl>"
+ ],
"prereqs" : {
- "test" : {
+ "configure" : {
"requires" : {
- "Tie::Scalar" : "0",
- "Test::More" : "0"
+ "ExtUtils::MakeMaker" : "0"
+ }
+ },
+ "build" : {
+ "requires" : {
+ "Config" : "0"
}
},
"runtime" : {
@@ -22,42 +30,34 @@
"IO::Handle" : "0"
},
"recommends" : {
- "Encode" : "2.55",
- "perl" : "5.018001"
+ "Encode" : "2.62",
+ "perl" : "5.020000"
}
},
- "configure" : {
- "requires" : {
- "ExtUtils::MakeMaker" : "0"
- }
- },
- "build" : {
+ "test" : {
"requires" : {
- "Config" : "0"
+ "Test::More" : "0",
+ "Tie::Scalar" : "0"
}
}
},
- "generated_by" : "Author",
- "version" : "1.02",
- "dynamic_config" : 1,
- "author" : [
- "H.Merijn Brand <h.m.brand@xs4all.nl>"
- ],
- "abstract" : "Comma-Separated Values manipulation routines",
"provides" : {
"Text::CSV_XS" : {
- "version" : "1.02",
- "file" : "CSV_XS.pm"
+ "file" : "CSV_XS.pm",
+ "version" : "1.11"
}
},
+ "generated_by" : "Author",
+ "release_status" : "stable",
"resources" : {
"license" : [
"http://dev.perl.org/licenses/"
],
"repository" : {
- "url" : "http://repo.or.cz/r/Text-CSV_XS.git",
+ "type" : "git",
"web" : "http://repo.or.cz/w/Text-CSV_XS.git",
- "type" : "git"
+ "url" : "http://repo.or.cz/r/Text-CSV_XS.git"
}
- }
+ },
+ "version" : "1.11"
}
@@ -7,7 +7,7 @@ build_requires:
configure_requires:
ExtUtils::MakeMaker: 0
dynamic_config: 1
-generated_by: Author, CPAN::Meta::Converter version 2.132830
+generated_by: Author, CPAN::Meta::Converter version 2.142060
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
@@ -16,10 +16,10 @@ name: Text-CSV_XS
provides:
Text::CSV_XS:
file: CSV_XS.pm
- version: '1.02'
+ version: '1.11'
recommends:
- Encode: '2.55'
- perl: '5.018001'
+ Encode: '2.62'
+ perl: '5.020000'
requires:
DynaLoader: 0
IO::Handle: 0
@@ -29,4 +29,4 @@ requires:
resources:
license: http://dev.perl.org/licenses/
repository: http://repo.or.cz/r/Text-CSV_XS.git
-version: '1.02'
+version: '1.11'
@@ -1,6 +1,6 @@
#!/usr/bin/perl
-# Copyright PROCURA B.V. (c) 2006-2013 H.Merijn Brand
+# Copyright PROCURA B.V. (c) 2006-2014 H.Merijn Brand
require 5.006001; # <- also see postamble at the bottom for META.yml
use strict;
@@ -25,6 +25,7 @@ my %wm = (
CSV_XS.xs.gcov
cover_db
valgrind.log
+ pod2htmd.tmp
)
},
@@ -74,11 +75,14 @@ sub postamble
' -@env TEST_FILES="xt/*.t" make -e test_dynamic',
''
: "";
+ my $no_inc = join " " => map { "-ignore $_" } grep { -s $_ }
+ map { "$_/CORE/inline.h" } @INC;
join "\n" =>
'cover $make_sep test_cover:',
' ccache -C',
' -@rm -f *.gc??',
' cover -test',
+ " cover -report html_basic $no_inc",
'',
'leakrun:',
' env HARNESS_PERL=sandbox/leakperl make test',
@@ -108,9 +112,13 @@ sub postamble
' -@mv -f $(DISTVNAME).tar.gz $(DISTVNAME).tgz',
' -@cpants_lint.pl $(DISTVNAME).tgz',
' -@rm -f Debian_CPANTS.txt',
+ ' -@echo "Consider running sandbox/used-by.pl now"',
'',
'test_speed: pure_all',
' PERL_DL_NONLAZY=1 $(FULLPERLRUN) -I"$(INST_LIB)" -I"$(INST_ARCHLIB)" examples/speed.pl',
'',
+ 'test_used: test',
+ ' prove -vwb sandbox/used-by.pl',
+ '',
$min_vsn;
} # postamble
@@ -7,7 +7,7 @@ Description:
combine fields into a CSV string and parse a CSV string into fields.
Copying:
- Copyright (c) 2007-2013 H.Merijn Brand. All rights reserved.
+ Copyright (c) 2007-2014 H.Merijn Brand. All rights reserved.
Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
Portions Copyright (c) 1997 Alan Citterman. All rights reserved.
@@ -1,7 +1,7 @@
#!/pro/bin/perl
# csv-check: Check validity of CSV file and report
-# (m)'13 [10 Jul 2013] Copyright H.M.Brand 2007-2013
+# (m)'13 [10 Jul 2013] Copyright H.M.Brand 2007-2014
# This code requires the defined-or feature and PerlIO
@@ -1,12 +1,12 @@
#!/pro/bin/perl
# csv2xls: Convert csv to xls
-# (m)'11 [06 Oct 2011] Copyright H.M.Brand 2007-2013
+# (m)'14 [20 May 2014] Copyright H.M.Brand 2007-2014
use strict;
use warnings;
-our $VERSION = "1.71";
+our $VERSION = "1.73";
sub usage
{
@@ -14,13 +14,13 @@ sub usage
print <<EOU;
usage: csv2xls [-s <sep>] [-q <quot>] [-w <width>] [-d <dtfmt>]
[-o <xls>] [file.csv]
- -s <sep> use <sep> as seperator char. Auto-detect, default = ','
+ -s <sep> use <sep> as seperator char, auto-detect, default = ','
The string "tab" is allowed.
- -e <esc> use <sep> as seperator char. Auto-detect, default = ','
+ -e <esc> use <esc> as escape char, auto-detect, default = '"'
The string "undef" is allowed.
- -q <quot> use <quot> as quotation char. Default = '"'
+ -q <quot> use <quot> as quotation char, default = '"'
The string "undef" will disable quotation.
- -w <width> use <width> as default minimum column width (4)
+ -w <width> use <width> as default minimum column width default = 4
-o <xls> write output to file named <xls>, defaults
to input file name with .csv replaced with .xls
if from standard input, defaults to csv2xls.xls
@@ -28,6 +28,7 @@ usage: csv2xls [-s <sep>] [-q <quot>] [-w <width>] [-d <dtfmt>]
an equal sign are forced to string
-f force usage of <xls> if already exists (unlink before use)
-d <dtfmt> use <dtfmt> as date formats. Default = 'dd-mm-yyyy'
+ -C <C:fmt> use <fmt> as currency formats for currency <C>, no default
-D cols only convert dates in columns <cols>. Default is everywhere.
-u CSV is UTF8
-v [<lvl>] verbosity (default = 1)
@@ -45,6 +46,7 @@ my $frc = 0; # Force use of file
my $utf = 0; # Data is encoded in Unicode
my $frm = 0; # Allow formula's
my $dtf = "dd-mm-yyyy"; # Date format
+my $crf = ""; # Currency format, e.g.: $:### ### ##0.00
my $opt_v = 1;
my $dtc;
@@ -58,6 +60,7 @@ GetOptions (
"o|x=s" => \$xls,
"d=s" => \$dtf,
"D=s" => \$dtc,
+ "C=s" => \$crf,
"f" => \$frc,
"F" => \$frm,
"u" => \$utf,
@@ -79,8 +82,6 @@ use Date::Calc qw( Delta_Days Days_in_Month );
use Spreadsheet::WriteExcel;
use Encode qw( from_to );
-my $csv;
-
my $wbk = Spreadsheet::WriteExcel->new ($xls);
my $wks = $wbk->add_worksheet ();
$dtf =~ s/j/y/g;
@@ -94,6 +95,10 @@ my %fmt = (
align => "left",
),
);
+$crf =~ s/^([^:]+):(.*)/$1/ and $fmt{currency} = $wbk->add_format (
+ num_format => "$1 $2",
+ align => "right",
+ );
my ($h, $w, @w) = (0, 1); # data height, -width, and default column widths
my $row;
@@ -117,7 +122,7 @@ unless ($sep) { # No sep char passed, try to auto-detect;
last;
}
}
-$csv = Text::CSV_XS-> new ({
+my $csv = Text::CSV_XS-> new ({
sep_char => $sep eq "tab" ? "\t" : $sep,
quote_char => $quo eq "undef" ? undef : $quo,
escape_char => $esc eq "undef" ? undef : $esc,
@@ -151,7 +156,7 @@ while ($row && @$row or $row = $csv->getline (*ARGV)) {
my @row = @$row;
@row > $w and push @w, ($wdt) x (($w = @row) - @w);
foreach my $c (0 .. $#row) {
- my $val = $row[$c] || "";
+ my $val = defined $row[$c] ? $row[$c] : "";
my $l = length $val;
$l > $w[$c] and $w[$c] = $l;
@@ -187,6 +192,10 @@ while ($row && @$row or $row = $csv->getline (*ARGV)) {
next;
}
}
+ if ($crf and $val =~ m/^\s*\Q$crf\E\s*([0-9.]+)$/) {
+ $wks->write ($h, $c, $1 + 0, $fmt{currency});
+ next;
+ }
if (!$frm && $val =~ m/^=/) {
$wks->write_string ($h, $c, $val);
@@ -3,7 +3,7 @@
# This script can be used as a base to parse unreliable CSV streams
# Modify to your own needs
#
-# (m)'08 [23 Apr 2008] Copyright H.M.Brand 2008-2013
+# (m)'08 [23 Apr 2008] Copyright H.M.Brand 2008-2014
use strict;
use warnings;
@@ -0,0 +1,17 @@
+#!/pro/bin/perl
+
+use strict;
+use warnings;
+
+use Text::CSV_XS qw( csv );
+
+my $io = shift || \*DATA;
+
+csv (in => csv (in => $io, sep_char => ";"), out => \*STDOUT);
+
+__END__
+a;b;c;d;e;f
+1;2;3;4;5;6
+2;3;4;5;6;7
+3;4;5;6;7;8
+4;5;6;7;8;9
@@ -1,7 +1,7 @@
#!/usr/bin/perl -w
# speed.pl: compare different versions of Text-CSV* modules
-# (m)'08 [07 Apr 2008] Copyright H.M.Brand 2007-2013
+# (m)'08 [07 Apr 2008] Copyright H.M.Brand 2007-2014
require 5.006001;
use strict;
@@ -27,7 +27,7 @@ $csv->combine (@fields1 ); our $str1 = $csv->string;
$csv->combine (@fields10 ); our $str10 = $csv->string;
$csv->combine (@fields100); our $str100 = $csv->string;
-timethese (-1.2,{
+timethese (-1.5,{
"combine 1" => q{ $csv->combine (@fields1 ) },
"combine 10" => q{ $csv->combine (@fields10 ) },
@@ -4,9 +4,9 @@
/*
----------------------------------------------------------------------
- ppport.h -- Perl/Pollution/Portability Version 3.21
+ ppport.h -- Perl/Pollution/Portability Version 3.24
- Automatically created by Devel::PPPort running under perl 5.018000.
+ Automatically created by Devel::PPPort running under perl 5.020000.
Do NOT edit this file directly! -- Edit PPPort_pm.PL and the
includes in parts/inc/ instead.
@@ -21,7 +21,7 @@ SKIP
=head1 NAME
-ppport.h - Perl/Pollution/Portability version 3.21
+ppport.h - Perl/Pollution/Portability version 3.24
=head1 SYNOPSIS
@@ -56,7 +56,7 @@ ppport.h - Perl/Pollution/Portability version 3.21
=head1 COMPATIBILITY
This version of F<ppport.h> is designed to support operation with Perl
-installations back to 5.003, and has been tested up to 5.11.5.
+installations back to 5.003, and has been tested up to 5.20.
=head1 OPTIONS
@@ -226,6 +226,7 @@ same function or variable in your project.
grok_numeric_radix() NEED_grok_numeric_radix NEED_grok_numeric_radix_GLOBAL
grok_oct() NEED_grok_oct NEED_grok_oct_GLOBAL
load_module() NEED_load_module NEED_load_module_GLOBAL
+ mg_findext() NEED_mg_findext NEED_mg_findext_GLOBAL
my_snprintf() NEED_my_snprintf NEED_my_snprintf_GLOBAL
my_sprintf() NEED_my_sprintf NEED_my_sprintf_GLOBAL
my_strlcat() NEED_my_strlcat NEED_my_strlcat_GLOBAL
@@ -245,6 +246,7 @@ same function or variable in your project.
sv_pvn_force_flags() NEED_sv_pvn_force_flags NEED_sv_pvn_force_flags_GLOBAL
sv_setpvf_mg() NEED_sv_setpvf_mg NEED_sv_setpvf_mg_GLOBAL
sv_setpvf_mg_nocontext() NEED_sv_setpvf_mg_nocontext NEED_sv_setpvf_mg_nocontext_GLOBAL
+ sv_unmagicext() NEED_sv_unmagicext NEED_sv_unmagicext_GLOBAL
vload_module() NEED_vload_module NEED_vload_module_GLOBAL
vnewSVpvf() NEED_vnewSVpvf NEED_vnewSVpvf_GLOBAL
warner() NEED_warner NEED_warner_GLOBAL
@@ -325,7 +327,7 @@ before sending a bug report.
If F<ppport.h> was generated using the latest version of
C<Devel::PPPort> and is causing failure of this module, please
-file a bug report using the CPAN Request Tracker at L<http://rt.cpan.org/>.
+file a bug report here: L<https://github.com/mhx/Devel-PPPort/issues/>
Please include the following information:
@@ -378,7 +380,7 @@ use strict;
# Disable broken TRIE-optimization
BEGIN { eval '${^RE_TRIE_MAXBUF} = -1' if $] >= 5.009004 && $] <= 5.009005 }
-my $VERSION = 3.21;
+my $VERSION = 3.24;
my %opt = (
quiet => 0,
@@ -499,7 +501,7 @@ GvHV|||
GvSVn|5.009003||p
GvSV|||
Gv_AMupdate||5.011000|
-HEf_SVKEY||5.004000|
+HEf_SVKEY|5.004000|5.004000|p
HeHASH||5.004000|
HeKEY||5.004000|
HeKLEN||5.004000|
@@ -507,7 +509,7 @@ HePV||5.004000|
HeSVKEY_force||5.004000|
HeSVKEY_set||5.004000|
HeSVKEY||5.004000|
-HeUTF8||5.010001|
+HeUTF8|5.010001|5.010001|p
HeVAL||5.004000|
HvENAMELEN||5.015004|
HvENAMEUTF8||5.015004|
@@ -536,6 +538,8 @@ LINKLIST||5.013006|
LVRET|||
MARK|||
MULTICALL||5.019003|
+MUTABLE_PTR|||p
+MUTABLE_SV|||p
MY_CXT_CLONE|5.009002||p
MY_CXT_INIT|5.007003||p
MY_CXT|5.007003||p
@@ -1884,7 +1888,7 @@ mg_clear|||
mg_copy|||
mg_dup|||
mg_find_mglob|||
-mg_findext||5.013008|
+mg_findext|5.013008|5.013008|p
mg_find|||
mg_free_type||5.013006|
mg_free|||
@@ -2598,7 +2602,7 @@ sv_taint||5.004000|
sv_true||5.005000|
sv_unglob|||
sv_uni_display||5.007003|
-sv_unmagicext||5.013008|
+sv_unmagicext|5.013008|5.013008|p
sv_unmagic|||
sv_unref_flags||5.007001|
sv_unref|||
@@ -4416,6 +4420,16 @@ typedef OP* (CPERLscope(*Perl_check_t)) (pTHX_ OP*);
#endif
+/* Until we figure out how to support this in older perls... */
+#if (PERL_BCDVERSION >= 0x5008000)
+#ifndef HeUTF8
+# define HeUTF8(he) ((HeKLEN(he) == HEf_SVKEY) ? \
+ SvUTF8(HeKEY_sv(he)) : \
+ (U32)HeKUTF8(he))
+#endif
+
+#endif
+
#ifndef PERL_SIGNALS_UNSAFE_FLAG
#define PERL_SIGNALS_UNSAFE_FLAG 0x0001
@@ -4762,6 +4776,9 @@ DPPP_(my_vload_module)(U32 flags, SV *name, SV *ver, va_list *args)
#if (PERL_BCDVERSION >= 0x5004000)
utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(FALSE, 0),
veop, modname, imop);
+#elif (PERL_BCDVERSION > 0x5003000)
+ utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(),
+ veop, modname, imop);
#else
utilize(!(flags & PERL_LOADMOD_DENY), start_subparse(),
modname, imop);
@@ -6032,6 +6049,22 @@ DPPP_(my_warner)(U32 err, const char *pat, ...)
#ifndef SvGETMAGIC
# define SvGETMAGIC(x) STMT_START { if (SvGMAGICAL(x)) mg_get(x); } STMT_END
#endif
+
+/* Some random bits for sv_unmagicext. These should probably be pulled in for
+ real and organized at some point */
+#ifndef HEf_SVKEY
+# define HEf_SVKEY -2
+#endif
+
+#if defined(__GNUC__) && !defined(PERL_GCC_BRACE_GROUPS_FORBIDDEN)
+# define MUTABLE_PTR(p) ({ void *_p = (p); _p; })
+#else
+# define MUTABLE_PTR(p) ((void *) (p))
+#endif
+
+#define MUTABLE_SV(p) ((SV *)MUTABLE_PTR(p))
+
+/* end of random bits */
#ifndef PERL_MAGIC_sv
# define PERL_MAGIC_sv '\0'
#endif
@@ -6347,6 +6380,103 @@ DPPP_(my_warner)(U32 err, const char *pat, ...)
#endif
+#if !defined(mg_findext)
+#if defined(NEED_mg_findext)
+static MAGIC * DPPP_(my_mg_findext)(pTHX_ SV * sv, int type, const MGVTBL *vtbl);
+static
+#else
+extern MAGIC * DPPP_(my_mg_findext)(pTHX_ SV * sv, int type, const MGVTBL *vtbl);
+#endif
+
+#ifdef mg_findext
+# undef mg_findext
+#endif
+#define mg_findext(a,b,c) DPPP_(my_mg_findext)(aTHX_ a,b,c)
+#define Perl_mg_findext DPPP_(my_mg_findext)
+
+#if defined(NEED_mg_findext) || defined(NEED_mg_findext_GLOBAL)
+
+MAGIC *
+DPPP_(my_mg_findext)(pTHX_ SV * sv, int type, const MGVTBL *vtbl) {
+ if (sv) {
+ MAGIC *mg;
+
+#ifdef AvPAD_NAMELIST
+ assert(!(SvTYPE(sv) == SVt_PVAV && AvPAD_NAMELIST(sv)));
+#endif
+
+ for (mg = SvMAGIC (sv); mg; mg = mg->mg_moremagic) {
+ if (mg->mg_type == type && mg->mg_virtual == vtbl)
+ return mg;
+ }
+ }
+
+ return NULL;
+}
+
+#endif
+#endif
+
+#if !defined(sv_unmagicext)
+#if defined(NEED_sv_unmagicext)
+static int DPPP_(my_sv_unmagicext)(pTHX_ SV * const sv, const int type, MGVTBL * vtbl);
+static
+#else
+extern int DPPP_(my_sv_unmagicext)(pTHX_ SV * const sv, const int type, MGVTBL * vtbl);
+#endif
+
+#ifdef sv_unmagicext
+# undef sv_unmagicext
+#endif
+#define sv_unmagicext(a,b,c) DPPP_(my_sv_unmagicext)(aTHX_ a,b,c)
+#define Perl_sv_unmagicext DPPP_(my_sv_unmagicext)
+
+#if defined(NEED_sv_unmagicext) || defined(NEED_sv_unmagicext_GLOBAL)
+
+int
+DPPP_(my_sv_unmagicext)(pTHX_ SV *const sv, const int type, MGVTBL *vtbl)
+{
+ MAGIC* mg;
+ MAGIC** mgp;
+
+ if (SvTYPE(sv) < SVt_PVMG || !SvMAGIC(sv))
+ return 0;
+ mgp = &(SvMAGIC(sv));
+ for (mg = *mgp; mg; mg = *mgp) {
+ const MGVTBL* const virt = mg->mg_virtual;
+ if (mg->mg_type == type && virt == vtbl) {
+ *mgp = mg->mg_moremagic;
+ if (virt && virt->svt_free)
+ virt->svt_free(aTHX_ sv, mg);
+ if (mg->mg_ptr && mg->mg_type != PERL_MAGIC_regex_global) {
+ if (mg->mg_len > 0)
+ Safefree(mg->mg_ptr);
+ else if (mg->mg_len == HEf_SVKEY) /* Questionable on older perls... */
+ SvREFCNT_dec(MUTABLE_SV(mg->mg_ptr));
+ else if (mg->mg_type == PERL_MAGIC_utf8)
+ Safefree(mg->mg_ptr);
+ }
+ if (mg->mg_flags & MGf_REFCOUNTED)
+ SvREFCNT_dec(mg->mg_obj);
+ Safefree(mg);
+ }
+ else
+ mgp = &mg->mg_moremagic;
+ }
+ if (SvMAGIC(sv)) {
+ if (SvMAGICAL(sv)) /* if we're under save_magic, wait for restore_magic; */
+ mg_magical(sv); /* else fix the flags now */
+ }
+ else {
+ SvMAGICAL_off(sv);
+ SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK)) >> PRIVSHIFT;
+ }
+ return 0;
+}
+
+#endif
+#endif
+
#ifdef USE_ITHREADS
#ifndef CopFILE
# define CopFILE(c) ((c)->cop_file)
@@ -7309,10 +7439,10 @@ DPPP_(my_pv_escape)(pTHX_ SV *dsv, char const * const str,
if (u > 255 || (flags & PERL_PV_ESCAPE_ALL)) {
if (flags & PERL_PV_ESCAPE_FIRSTCHAR)
chsize = my_snprintf(octbuf, sizeof octbuf,
- "%"UVxf, u);
+ "%" UVxf, u);
else
chsize = my_snprintf(octbuf, sizeof octbuf,
- "%cx{%"UVxf"}", esc, u);
+ "%cx{%" UVxf "}", esc, u);
} else if (flags & PERL_PV_ESCAPE_NOBACKSLASH) {
chsize = 1;
} else {
@@ -3,7 +3,7 @@
use strict;
use warnings;
-use Test::More tests => 133;
+use Test::More tests => 152;
BEGIN {
use_ok "Text::CSV_XS";
@@ -14,8 +14,10 @@ my $csv;
ok ($csv = Text::CSV_XS->new, "new ()");
is ($csv->quote_char, '"', "quote_char");
+is ($csv->quote, '"', "quote");
is ($csv->escape_char, '"', "escape_char");
is ($csv->sep_char, ',', "sep_char");
+is ($csv->sep, ',', "sep");
is ($csv->eol, '', "eol");
is ($csv->always_quote, 0, "always_quote");
is ($csv->binary, 0, "binary");
@@ -33,6 +35,7 @@ is ($csv->quote_space, 1, "quote_space");
is ($csv->quote_null, 1, "quote_null");
is ($csv->quote_binary, 1, "quote_binary");
is ($csv->record_number, 0, "record_number");
+is ($csv->decode_utf8, 1, "decode_utf8");
is ($csv->binary (1), 1, "binary (1)");
my @fld = ( 'txt =, "Hi!"', "Yes", "", 2, undef, "1.09", "\r", undef );
@@ -41,7 +44,10 @@ is ($csv->string,
qq{"txt =, ""Hi!""",Yes,,2,,1.09,"\r",}, "string");
is ($csv->sep_char (";"), ';', "sep_char (;)");
+is ($csv->sep (";"), ';', "sep (;)");
+is ($csv->sep_char (), ';', "sep_char ()");
is ($csv->quote_char ("="), '=', "quote_char (=)");
+is ($csv->quote ("="), '=', "quote (=)");
is ($csv->eol (undef), "", "eol (undef)");
is ($csv->eol (""), "", "eol ('')");
is ($csv->eol ("\r"), "\r", "eol (\\r)");
@@ -58,12 +64,14 @@ is ($csv->auto_diag (1), 1, "auto_diag (1)");
is ($csv->auto_diag (2), 2, "auto_diag (2)");
is ($csv->auto_diag (9), 9, "auto_diag (9)");
is ($csv->auto_diag ("true"), 1, "auto_diag (\"true\")");
+is ($csv->auto_diag ("false"), 0, "auto_diag (\"false\")");
is ($csv->auto_diag (undef), 0, "auto_diag (undef)");
is ($csv->auto_diag (""), 0, "auto_diag (\"\")");
is ($csv->diag_verbose (1), 1, "diag_verbose (1)");
is ($csv->diag_verbose (2), 2, "diag_verbose (2)");
is ($csv->diag_verbose (9), 9, "diag_verbose (9)");
is ($csv->diag_verbose ("true"), 1, "diag_verbose (\"true\")");
+is ($csv->diag_verbose ("false"), 0, "diag_verbose (\"false\")");
is ($csv->diag_verbose (undef), 0, "diag_verbose (undef)");
is ($csv->diag_verbose (""), 0, "diag_verbose (\"\")");
is ($csv->verbatim (1), 1, "verbatim (1)");
@@ -75,9 +83,15 @@ ok ($csv->combine (@fld), "combine");
is ($csv->string,
qq{=txt \\=, "Hi!"=;=Yes=;==;=2=;;=1.09=;=\r=;\r}, "string");
+is ($csv->allow_whitespace (0), 0, "allow_whitespace (0)");
is ($csv->quote_space (0), 0, "quote_space (0)");
is ($csv->quote_null (0), 0, "quote_null (0)");
is ($csv->quote_binary (0), 0, "quote_binary (0)");
+is ($csv->decode_utf8 (0), 0, "decode_utf8 (0)");
+is ($csv->sep ("--"), "--", "sep (\"--\")");
+is ($csv->sep_char (), "\0", "sep_char");
+is ($csv->quote ("++"), "++", "quote (\"++\")");
+is ($csv->quote_char (), "\0", "quote_char");
# Funny settings, all three translate to \0 internally
ok ($csv = Text::CSV_XS->new ({
@@ -86,7 +100,9 @@ ok ($csv = Text::CSV_XS->new ({
escape_char => undef,
}), "new (undef ...)");
is ($csv->sep_char, undef, "sep_char undef");
+is ($csv->sep, undef, "sep undef");
is ($csv->quote_char, undef, "quote_char undef");
+is ($csv->quote, undef, "quote undef");
is ($csv->escape_char, undef, "escape_char undef");
ok ($csv->parse ("foo"), "parse (foo)");
$csv->sep_char (",");
@@ -99,6 +115,11 @@ ok (!$csv->parse ("foo,foo\0bar"), "parse (foo)");
$csv->binary (1);
ok ( $csv->parse ("foo,foo\0bar"), "parse (foo)");
+# Attribute aliasses
+ok ($csv = Text::CSV_XS-> new ({ quote_always => 1, verbose_diag => 1}));
+is ($csv->always_quote, 1, "always_quote = quote_always");
+is ($csv->diag_verbose, 1, "diag_verbose = verbose_diag");
+
# Some forbidden combinations
foreach my $ws (" ", "\t") {
ok ($csv = Text::CSV_XS->new ({ escape_char => $ws }), "New blank escape");
@@ -3,7 +3,7 @@
use strict;
use warnings;
-use Test::More tests => 197;
+use Test::More tests => 203;
BEGIN {
use_ok "Text::CSV_XS";
@@ -148,6 +148,7 @@ sub crnlsp
is ($csv->is_quoted (0), undef, "is_quoted () before parse");
is ($csv->is_binary (0), undef, "is_binary () before parse");
+ is ($csv->is_missing (0), undef, "is_missing () before parse");
my $bintxt = chr (0x20ac);
ok ( $csv->parse (qq{,"1","a\rb",0,"a\nb",1,\x8e,"a\r\n","$bintxt","",}),
@@ -160,17 +161,22 @@ sub crnlsp
is ($csv->is_quoted (0), 0, "fflag 0 - not quoted");
is ($csv->is_binary (0), 0, "fflag 0 - not binary");
+ is ($csv->is_missing (0), 0, "fflag 0 - not missig");
is ($csv->is_quoted (2), 1, "fflag 2 - quoted");
is ($csv->is_binary (2), 1, "fflag 2 - binary");
+ is ($csv->is_missing (2), 0, "fflag 2 - not missing");
is ($csv->is_quoted (6), 0, "fflag 5 - not quoted");
is ($csv->is_binary (6), 1, "fflag 5 - binary");
+ is ($csv->is_missing (6), 0, "fflag 5 - not missing");
is ($csv->is_quoted (-1), undef, "fflag -1 - undefined");
is ($csv->is_binary (-8), undef, "fflag -8 - undefined");
+ is ($csv->is_missing (-8), undef, "fflag -8 - undefined");
is ($csv->is_quoted (21), undef, "fflag 21 - undefined");
is ($csv->is_binary (98), undef, "fflag 98 - undefined");
+ is ($csv->is_missing (98), 1, "fflag 98 - missing");
}
{ my $csv = Text::CSV_XS->new ({ escape_char => "+" });
@@ -2,11 +2,14 @@
use strict;
use warnings;
+use charnames ":full";
+$| = 1;
use Test::More;
+$| = 1;
BEGIN {
- $] < 5.008001 and
+ $] < 5.008002 and
plan skip_all => "UTF8 tests useless in this ancient perl version";
}
@@ -43,11 +46,16 @@ BEGIN {
[ "bytes up :encoding(UTF-8)", ":encoding(UTF-8)", $bytes_up, "utf8", "no warn", ],
);
- plan tests => 11 + 6 * @tests;
+ my $builder = Test::More->builder;
+ binmode $builder->output, ":encoding(utf8)";
+ binmode $builder->failure_output, ":encoding(utf8)";
+ binmode $builder->todo_output, ":encoding(utf8)";
+
+ plan tests => 11 + 6 * @tests + 4 * 22 + 6;
}
BEGIN {
- require_ok "Text::CSV_XS";
+ use_ok "Text::CSV_XS", ("csv");
plan skip_all => "Cannot load Text::CSV_XS" if $@;
require "t/util.pl";
}
@@ -55,7 +63,7 @@ BEGIN {
sub hexify { join " ", map { sprintf "%02x", $_ } unpack "C*", @_ }
sub warned { length ($_[0]) ? "warn" : "no warn" }
-my $csv = Text::CSV_XS->new ({ auto_diag => 1, binary => 1 });
+my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
for (@tests) {
my ($test, $perlio, $data, $enc, $expect_w) = @$_;
@@ -134,3 +142,74 @@ for (@tests) {
[ "", "", 1, "", "", "", 1, "" ], "UTF8 flags");
}
}
+
+my $sep = "\x{2665}";#"\N{INVISIBLE SEPARATOR}";
+my $quo = "\x{2661}";#"\N{FULLWIDTH QUOTATION MARK}";
+foreach my $new (0, 1, 2, 3) {
+ my %attr = (
+ binary => 1,
+ always_quote => 1,
+ );;
+ $new & 1 and $attr{sep} = $sep;
+ $new & 2 and $attr{quote} = $quo;
+ my $csv = Text::CSV_XS->new (\%attr);
+
+ my $s = $attr{sep} || ',';
+ my $q = $attr{quote} || '"';
+
+ note ("Test SEP: '$s', QUO: '$q'");
+ is ($csv->sep, $s, "sep");
+ is ($csv->quote, $q, "quote");
+
+ foreach my $data (
+ [ 1, 2 ],
+ [ "\N{EURO SIGN}", "\N{SNOWMAN}" ],
+# [ $sep, $quo ],
+ ) {
+
+ my $exp8 = join $s => map { qq{$q$_$q} } @$data;
+ utf8::encode (my $expb = $exp8);
+ my @exp = ($expb, $exp8);
+
+ ok ($csv->combine (@$data), "combine");
+ my $x = $csv->string;
+ is ($csv->string, $exp8, "string");
+
+ open my $fh, ">:encoding(utf8)", \(my $out = "");
+ ok ($csv->print ($fh, $data), "print with UTF8 sep");
+ close $fh;
+
+ is ($out, $expb, "output");
+
+ ok ($csv->parse ($expb), "parse");
+ is_deeply ([ $csv->fields ], $data, "fields");
+
+ open $fh, "<", \$expb;
+ is_deeply ($csv->getline ($fh), $data, "data from getline ()");
+ close $fh;
+
+ $expb =~ tr/"//d;
+
+ ok ($csv->parse ($expb), "parse");
+ is_deeply ([ $csv->fields ], $data, "fields");
+
+ open $fh, "<", \$expb;
+ is_deeply ($csv->getline ($fh), $data, "data from getline ()");
+ close $fh;
+ }
+ }
+
+{ my $h = "\N{WHITE HEART SUIT}";
+ my $H = "\N{BLACK HEART SUIT}";
+ my $str = "${h}I$h$H${h}L\"${h}ve$h$H${h}Perl$h";
+ utf8::encode ($str);
+ ok (my $aoa = csv (in => \$str, sep => $H, quote => $h), "Hearts");
+ is_deeply ($aoa, [[ "I", "L${h}ve", "Perl"]], "I $H Perl");
+
+ ok (my $csv = Text::CSV_XS->new ({
+ binary => 1, sep => $H, quote => $h }), "new hearts");
+ ok ($csv->combine (@{$aoa->[0]}), "combine");
+ ok ($str = $csv->string, "string");
+ utf8::decode ($str);
+ is ($str, "I${H}${h}L\"${h}ve${h}${H}Perl", "Correct quotation");
+ }
@@ -3,7 +3,7 @@
use strict;
use warnings;
-use Test::More tests => 9074;
+use Test::More tests => 25102;
BEGIN {
require_ok "Text::CSV_XS";
@@ -29,20 +29,50 @@ sub combi
my $combi = join " ", "--",
map { sprintf "%6s", _readable $attr{$_} } @attrib, "always_quote";
ok (1, $combi);
- foreach my $attr (sort keys %attr) {
- $csv->$attr ($attr{$attr});
- is ($csv->$attr (), $attr{$attr}, "check $attr");
- }
- my $ret = $csv->combine (@input);
+ # use legal non-special characters
+ is ($csv->allow_whitespace (0), 0, "Reset allow WS");
+ is ($csv->sep_char ("\x03"), "\x03", "Reset sep");
+ is ($csv->quote_char ("\x04"), "\x04", "Reset quo");
+ is ($csv->escape_char ("\x05"), "\x05", "Reset esc");
+ # Set the attributes and check failure
+ my %state;
+ foreach my $attr (sort keys %attr) {
+ eval { $csv->$attr ($attr{$attr}); };
+ $@ or next;
+ $state{0 + $csv->error_diag} ||= $@;
+ }
if ($attr{sep_char} eq $attr{quote_char} ||
$attr{sep_char} eq $attr{escape_char}) {
- is ($ret, undef, "Illegal combo for combine");
-
- ok (!$csv->parse ("foo"), "illegal combo for parse");
- return;
+ ok (exists $state{1001}, "Illegal combo");
+ like ($state{1001}, qr{sep_char is equal to}, "Illegal combo");
}
+ else {
+ ok (!exists $state{1001}, "No char conflict");
+ }
+ if (!exists $state{1001} and
+ $attr{sep_char} =~ m/[\r\n]/ ||
+ $attr{quote_char} =~ m/[\r\n]/ ||
+ $attr{escape_char} =~ m/[\r\n]/
+ ) {
+ ok (exists $state{1003}, "Special contains eol");
+ like ($state{1003}, qr{in main attr not}, "Illegal combo");
+ }
+ if ($attr{allow_whitespace} and
+ $attr{quote_char} =~ m/^[ \t]/ ||
+ $attr{escape_char} =~ m/^[ \t]/
+ ) {
+ #diag (join " -> ** " => $combi, join ", " => sort %state);
+ ok (exists $state{1002}, "Illegal combo under allow_whitespace");
+ like ($state{1002}, qr{allow_whitespace with}, "Illegal combo");
+ }
+ %state and return;
+
+ # Check success
+ is ($csv->$_ (), $attr{$_}, "check $_") for sort keys %attr;
+
+ my $ret = $csv->combine (@input);
ok ($ret, "combine");
ok (my $str = $csv->string, "string");
@@ -71,16 +101,19 @@ sub combi
}
} # combi
+foreach my $aw (0, 1) {
foreach my $aq (0, 1) {
foreach my $qc (@special) {
foreach my $ec (@special, "+") {
foreach my $sc (@special, "\0") {
combi (
- quote_char => $qc,
- escape_char => $ec,
- sep_char => $sc,
- always_quote => $aq,
+ sep_char => $sc,
+ quote_char => $qc,
+ escape_char => $ec,
+ always_quote => $aq,
+ allow_whitespace => $aw,
);
+ }
}
}
}
@@ -483,7 +483,7 @@ code,name,price,description
«43927» - Is bind_columns broken or am I using it wrong?
1,2
«44402» - Unexpected results parsing tab-separated spaces
-«x1000» - Detlev reported inconsisten behavior between XS and PP
+«x1000» - Detlev reported inconsistent behavior between XS and PP
B:033_02_ -drop, +drop animal legs @p 02-033.bmp @p 02-033.bmp \x{A} 1 :c/b01:!1 ! 13 !6.!6 :b/b01:0 B:033_02_ R#012a 2
B:034_02c diagonal, trac -bound up @p 02-034c.bmp @p 02-034c.bmp Found through e_sect2.pdf as U+F824 ( ,) and U+2E88 (⺈,) but won't display \x{A} 1 :c/b01:!1 ! 11 !10 :b/b01:0 2E88 B:034_02c R#018b 2
B:035_02_ +drop, -drop fission 丷 Aufgrund folgender Fälle definiere ich einen neuen Baustein, der simp. mit "horns&" identisch ist.\x{A}隊队 (jap.: pinnacle, horns&sow)\x{A}曾曾兌兑\x{A}über "golden calf":\x{A}送送 1 :c/b01:!1 ! 11 !10 :b/b01:0 4E37 B:035_02_ 2
@@ -4,7 +4,7 @@ use strict;
use warnings;
#use Test::More "no_plan";
- use Test::More tests => 75;
+ use Test::More tests => 79;
BEGIN {
use_ok "Text::CSV_XS", ();
@@ -129,10 +129,16 @@ ok ($csv->column_names (sort keys %$hr), "set column names");
ok ($csv->eol ("\n"), "set eol for output");
ok ($csv->print ($fh, [ $csv->column_names ]), "print header");
ok ($csv->print_hr ($fh, $hr), "print_hr");
+ok ($csv->print ($fh, []), "empty print");
close $fh;
+ok ($csv->keep_meta_info (1), "keep meta info");
open $fh, "<", "_75test.csv";
ok ($csv->column_names ($csv->getline ($fh)), "get column names");
is_deeply ($csv->getline_hr ($fh), $hr, "compare to written hr");
+
+is_deeply ($csv->getline_hr ($fh),
+ { c_foo => "", foo => undef, zebra => undef }, "compare to written hr");
+is ($csv->is_missing (1), 1, "No col 1");
close $fh;
unlink "_75test.csv";
@@ -0,0 +1,101 @@
+#!/pro/bin/perl
+
+use strict;
+use warnings;
+$| = 1;
+
+use Config;
+use Test::More;
+
+BEGIN {
+ unless (exists $Config{useperlio} &&
+ defined $Config{useperlio} &&
+ $] >= 5.008 && # perlio was experimental in 5.6.2, but not reliable
+ $Config{useperlio} eq "define") {
+ plan skip_all => "No reliable perlIO available";
+ }
+ else {
+ plan tests => 18;
+ }
+ }
+
+use Text::CSV_XS;
+my $csv = Text::CSV_XS->new ();
+
+my @test = (
+ "row=1" => [[ 11,12,13,14,15,16,17,18,19 ]],
+ "row=2-3" => [[ 21,22,23,24,25,26,27,28,29 ],
+ [ 31,32,33,34,35,36,37,38,39 ]],
+ "row=2;4;6" => [[ 21,22,23,24,25,26,27,28,29 ],
+ [ 41,42,43,44,45,46,47,48,49 ],
+ [ 61,62,63,64,65,66,67,68,69 ]],
+ "row=1-2;4;6-*" => [[ 11,12,13,14,15,16,17,18,19 ],
+ [ 21,22,23,24,25,26,27,28,29 ],
+ [ 41,42,43,44,45,46,47,48,49 ],
+ [ 61,62,63,64,65,66,67,68,69 ],
+ [ 71,72,73,74,75,76,77,78,79 ],
+ [ 81,82,83,84,85,86,87,88,89 ],
+ [ 91,92,93,94,95,96,97,98,99 ]],
+ "col=1" => [[11],[21],[31],[41],[51],[61],[71],[81],[91]],
+ "col=2-3" => [[12,13],[22,23],[32,33],[42,43],[52,53],
+ [62,63],[72,73],[82,83],[92,93]],
+ "col=2;4;6" => [[12,14,16],[22,24,26],[32,34,36],[42,44,46],[52,54,56],
+ [62,64,66],[72,74,76],[82,84,86],[92,94,96]],
+ "col=1-2;4;6-*" => [[11,12,14,16,17,18,19], [21,22,24,26,27,28,29],
+ [31,32,34,36,37,38,39], [41,42,44,46,47,48,49],
+ [51,52,54,56,57,58,59], [61,62,64,66,67,68,69],
+ [71,72,74,76,77,78,79], [81,82,84,86,87,88,89],
+ [91,92,94,96,97,98,99]],
+ #cell=R,C
+ "cell=7,7" => [[ 77 ]],
+ "cell=7,7-8,8" => [[ 77,78 ], [ 87,88 ]],
+ "cell=7,7-*,8" => [[ 77,78 ], [ 87,88 ], [ 97,98 ]],
+ "cell=7,7-8,*" => [[ 77,78,79 ], [ 87,88,89 ]],
+ "cell=7,7-*,*" => [[ 77,78,79 ], [ 87,88,89 ], [ 97,98,99 ]],
+
+ "cell=1,1-2,2;3,3-4,4" => [
+ [11,12],
+ [21,22],
+ [33,34],
+ [43,44]],
+ "cell=1,1-3,3;2,3-4,4" => [
+ [11,12,13],
+ [21,22,23,24],
+ [31,32,33,34],
+ [43,44]],
+ "cell=1,1-3,3;2,2-4,4;2,3;4,2" => [
+ [11,12,13],
+ [21,22,23,24],
+ [31,32,33,34],
+ [42,43,44]],
+ "cell=1,1-2,2;3,3-4,4;1,4;4,1" => [
+ [11,12, 14],
+ [21,22],
+ [33,34],
+ [41, 43,44]],
+ );
+my $todo = "";
+my $data = join "" => <DATA>;
+while (my ($spec, $expect) = splice @test, 0, 2) {
+ open my $io, "<", \$data;
+ my $aoa = $csv->fragment ($io, $spec);
+ is_deeply ($aoa, $expect, "${todo}Fragment $spec");
+ }
+
+$csv->column_names ("c3","c4");
+open my $io, "<", \$data;
+is_deeply ($csv->fragment ($io, "cell=3,2-4,3"),
+ [ { c3 => 32, c4 =>33 }, { c3 => 42, c4 => 43 }], "Fragment to AoH");
+
+#$csv->eol ("\n");
+#foreach my $r (1..9){$csv->print(*STDOUT,[map{$r.$_}1..9])}
+__END__
+11,12,13,14,15,16,17,18,19
+21,22,23,24,25,26,27,28,29
+31,32,33,34,35,36,37,38,39
+41,42,43,44,45,46,47,48,49
+51,52,53,54,55,56,57,58,59
+61,62,63,64,65,66,67,68,69
+71,72,73,74,75,76,77,78,79
+81,82,83,84,85,86,87,88,89
+91,92,93,94,95,96,97,98,99
@@ -0,0 +1,163 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+ use Test::More tests => 110;
+#use Test::More "no_plan";
+
+BEGIN {
+ require_ok "Text::CSV_XS";
+ plan skip_all => "Cannot load Text::CSV_XS" if $@;
+ require "t/util.pl";
+ }
+
+$| = 1;
+
+my $csv;
+
+# These tests are for the constructor
+{ my $warn;
+ local $SIG{__WARN__} = sub { $warn = shift; };
+ ok ($csv = Text::CSV_XS->new ({ callbacks => undef }), "new");
+ is ($warn, undef, "no warn for undef");
+ is ($csv->callbacks, $warn = undef, "no callbacks for undef");
+ ok ($csv = Text::CSV_XS->new ({ callbacks => 0 }), "new");
+ like ($warn, qr{: ignored$}, "warn for 0");
+ is ($csv->callbacks, $warn = undef, "no callbacks for 0");
+ ok ($csv = Text::CSV_XS->new ({ callbacks => 1 }), "new");
+ like ($warn, qr{: ignored$}, "warn for 1");
+ is ($csv->callbacks, $warn = undef, "no callbacks for 1");
+ ok ($csv = Text::CSV_XS->new ({ callbacks => \1 }), "new");
+ like ($warn, qr{: ignored$}, "warn for \\1");
+ is ($csv->callbacks, $warn = undef, "no callbacks for \\1");
+ ok ($csv = Text::CSV_XS->new ({ callbacks => "" }), "new");
+ like ($warn, qr{: ignored$}, "warn for ''");
+ is ($csv->callbacks, $warn = undef, "no callbacks for ''");
+ ok ($csv = Text::CSV_XS->new ({ callbacks => [] }), "new");
+ like ($warn, qr{: ignored$}, "warn for []");
+ is ($csv->callbacks, $warn = undef, "no callbacks for []");
+ ok ($csv = Text::CSV_XS->new ({ callbacks => sub {} }), "new");
+ like ($warn, qr{: ignored$}, "warn for sub {}");
+ is ($csv->callbacks, $warn = undef, "no callbacks for sub {}");
+ }
+
+ok ($csv = Text::CSV_XS->new (), "new");
+is ($csv->callbacks, undef, "no callbacks");
+ok ($csv->bind_columns (\my ($c, $s)), "bind");
+ok ($csv->getline (*DATA), "parse ok");
+is ($c, 1, "key");
+is ($s, "foo", "value");
+$s = "untouched";
+ok ($csv->getline (*DATA), "parse bad");
+is ($c, 1, "key");
+is ($s, "untouched", "untouched");
+ok ($csv->getline (*DATA), "parse bad");
+is ($c, "foo", "key");
+is ($s, "untouched", "untouched");
+ok ($csv->getline (*DATA), "parse good");
+is ($c, 2, "key");
+is ($s, "bar", "value");
+eval { is ($csv->getline (*DATA), undef,"parse bad"); };
+my @diag = $csv->error_diag;
+is ($diag[0], 3006, "too many values");
+
+# These tests are for the method
+foreach my $args ([""], [1], [[]], [sub{}], [1,2], [1,2,3],
+ [undef,"error"], ["error",undef],
+ ["%23bad",sub {}],["error",sub{0;},undef,1],
+ ["error",[]],["error","error"],["",sub{0;}],
+ [sub{0;},0],[[],""]) {
+ eval { $csv->callbacks (@$args); };
+ my @diag = $csv->error_diag;
+ is ($diag[0], 1004, "invalid callbacks");
+ is ($csv->callbacks, undef, "not set");
+ }
+
+# These tests are for invalid arguments *inside* the hash
+foreach my $arg (undef, 0, 1, \1, "", [], $csv) {
+ eval { $csv->callbacks ({ error => $arg }); };
+ my @diag = $csv->error_diag;
+ is ($diag[0], 1004, "invalid callbacks");
+ is ($csv->callbacks, undef, "not set");
+ }
+ok ($csv->callbacks (bogus => sub { 0; }), "useless callback");
+
+my $error = 3006;
+sub ignore
+{
+ is ($_[0], $error, "Caught error $error");
+ $csv->SetDiag (0); # Ignore this error
+ } # ignore
+
+my $idx = 1;
+ok ($csv->auto_diag (1), "set auto_diag");
+my $callbacks = {
+ error => \&ignore,
+ after_parse => sub {
+ my ($c, $av) = @_;
+ # Just add a field
+ push @$av, "NEW";
+ },
+ before_print => sub {
+ my ($c, $av) = @_;
+ # First field set to line number
+ $av->[0] = $idx++;
+ # Maximum 2 fields
+ @{$av} > 2 and splice @{$av}, 2;
+ # Minimum 2 fields
+ @{$av} < 2 and push @{$av}, "";
+ },
+ };
+is (ref $csv->callbacks ($callbacks), "HASH", "callbacks set");
+ok ($csv->getline (*DATA), "parse ok");
+is ($c, 1, "key");
+is ($s, "foo", "value");
+ok ($csv->getline (*DATA), "parse bad, skip 3006");
+ok ($csv->getline (*DATA), "parse good");
+is ($c, 2, "key");
+is ($s, "bar", "value");
+
+$csv->bind_columns (undef);
+ok (my $row = $csv->getline (*DATA), "get row");
+is_deeply ($row, [ 1, 2, 3, "NEW" ], "fetch + value from hook");
+
+$error = 2012; # EOF
+ok ($csv->getline (*DATA), "parse past eof");
+
+my $fn = "_79test.csv";
+END { unlink $fn; }
+
+ok ($csv->eol ("\n"), "eol for output");
+open my $fh, ">", $fn or die "$fn: $!";
+ok ($csv->print ($fh, [ 0, "foo" ]), "print OK");
+ok ($csv->print ($fh, [ 0, "bar", 3 ]), "print too many");
+ok ($csv->print ($fh, [ 0 ]), "print too few");
+close $fh;
+
+open $fh, "<", $fn or die "$fn: $!";
+is (do { local $/; <$fh> }, "1,foo\n2,bar\n3,\n", "Modified output");
+close $fh;
+
+# Test the non-IO interface
+ok ($csv->parse ("10,blah,33\n"), "parse");
+is_deeply ([ $csv->fields ], [ 10, "blah", 33, "NEW" ], "fields");
+
+ok ($csv->combine (11, "fri", 22, 18), "combine - no hook");
+is ($csv->string, qq{11,fri,22,18\n}, "string");
+
+is ($csv->callbacks (undef), undef, "clear callbacks");
+
+is_deeply (Text::CSV_XS::csv (in => $fn, callbacks => $callbacks),
+ [[1,"foo","NEW"],[2,"bar","NEW"],[3,"","NEW"]], "using getline_all");
+
+__END__
+1,foo
+1
+foo
+2,bar
+3,baz,2
+1,foo
+3,baz,2
+2,bar
+1,2,3
@@ -3,7 +3,7 @@
use strict;
use warnings;
- use Test::More tests => 143;
+ use Test::More tests => 201;
#use Test::More "no_plan";
my %err;
@@ -64,6 +64,10 @@ parse_err 2037, 1, qq{\0 };
like ($warn[0], qr{^# CSV_XS ERROR: 2037 - EIF}, "error content");
}
+is ($csv->eof, "", "No EOF");
+$csv->SetDiag (2012);
+is ($csv->eof, 1, "EOF caused by 2012");
+
is (Text::CSV_XS->new ({ ecs_char => ":" }), undef, "Unsupported option");
{ my @warn;
@@ -146,6 +150,35 @@ $csv = Text::CSV_XS->new ({ auto_diag => 1 });
is ($csv->{_ERROR_INPUT}, undef, "Undefined error_input");
}
+foreach my $spec (
+ undef, # No spec at all
+ "", # No spec at all
+ "row=0", # row > 0
+ "col=0", # col > 0
+ "cell=0", # cell = r,c
+ "cell=0,0", # col & row > 0
+ "row=*", # * only after n-
+ "col=3-1", # to >= from
+ "cell=4,1;1", # cell has no ;
+ "cell=3,3-2,1", # bottom-right should be right to and below top-left
+ "cell=1,*", # * in single cell col
+ "cell=*,1", # * in single cell row
+ "cell=*,*", # * in single cell row and column
+ "cell=1,*-8,9", # * in cell range top-left cell col
+ "cell=*,1-8,9", # * in cell range top-left cell row
+ "cell=*,*-8,9", # * in cell range top-left cell row and column
+ "row=/", # illegal character
+ "col=4;row=3", # cannot combine rows and columns
+ ) {
+ my $csv = Text::CSV_XS->new ();
+ my $r;
+ eval { $r = $csv->fragment (undef, $spec); };
+ is ($r, undef, "Cannot do fragment with bad RFC7111 spec");
+ my ($c_diag, $s_diag, $p_diag) = $csv->error_diag ();
+ is ($c_diag, 2013, "Illegal RFC7111 spec");
+ is ($p_diag, 0, "Position");
+ }
+
my $diag_file = "_$$.out";
open EH, ">&STDERR";
open STDERR, ">", $diag_file;
@@ -7,7 +7,7 @@ use warnings;
use base "Text::CSV_XS";
-use Test::More tests => 5;
+use Test::More tests => 6;
ok (1, "Subclassed");
@@ -22,4 +22,6 @@ is ($@, "", "error");
is (Text::CSV_XS::Subclass->error_diag (),
"INI - Unknown attribute 'ecs_char'", "Last failure for new () - FAIL");
+is (Text::CSV_XS::Subclass->new ({ fail_me => "now" }), undef, "bad new ()");
+
1;
@@ -0,0 +1,102 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Config;
+
+#use Test::More "no_plan";
+ use Test::More tests => 28;
+
+BEGIN {
+ use_ok "Text::CSV_XS", ("csv");
+ plan skip_all => "Cannot load Text::CSV_XS" if $@;
+ require "t/util.pl";
+ }
+
+my $file = "_90test.csv"; END { -f $file and unlink $file }
+my $data =
+ "foo,bar,baz\n".
+ "1,2,3\n".
+ "2,a b,\n";
+open FH, ">", $file or die "_90test.csv: $!";
+print FH $data;
+close FH;
+
+my @hdr = qw( foo bar baz );
+my $aoa = [
+ \@hdr,
+ [ 1, 2, 3 ],
+ [ 2, "a b", "" ],
+ ];
+my $aoh = [
+ { foo => 1, bar => 2, baz => 3 },
+ { foo => 2, bar => "a b", baz => "" },
+ ];
+
+SKIP: for my $io ([ $file, "file" ], [ \*FH, "globref" ], [ *FH, "glob" ], [ \$data, "ScalarIO"] ) {
+ $] < 5.008 && ref $io->[0] eq "SCALAR" and skip "No ScalarIO support for $]", 1;
+ open FH, "<", $file;
+ is_deeply (csv ({ in => $io->[0] }), $aoa, "AOA $io->[1]");
+ close FH;
+ }
+
+SKIP: for my $io ([ $file, "file" ], [ \*FH, "globref" ], [ *FH, "glob" ], [ \$data, "ScalarIO"] ) {
+ $] < 5.008 && ref $io->[0] eq "SCALAR" and skip "No ScalarIO support for $]", 1;
+ open FH, "<", $file;
+ is_deeply (csv (in => $io->[0], headers => "auto"), $aoh, "AOH $io->[1]");
+ close FH;
+ }
+
+my @aoa = @{$aoa}[1,2];
+is_deeply (csv (file => $file, headers => "skip"), \@aoa, "AOA skip");
+is_deeply (csv (file => $file, fragment => "row=2-3"), \@aoa, "AOA fragment");
+
+if ($] >= 5.008) {
+ is_deeply (csv (in => $file, encoding => "utf-8", headers => ["a", "b", "c"],
+ fragment => "row=2", sep_char => ","),
+ [{ a => 1, b => 2, c => 3 }], "AOH headers fragment");
+ }
+else {
+ ok (1, q{This perl does not support open with "<:encoding(...)"});
+ }
+
+ok (csv (in => $aoa, out => $file), "AOA out file");
+is_deeply (csv (in => $file), $aoa, "AOA parse out");
+
+ok (csv (in => $aoh, out => $file, headers => "auto"), "AOH out file");
+is_deeply (csv (in => $file, headers => "auto"), $aoh, "AOH parse out");
+
+ok (csv (in => $aoh, out => $file, headers => "skip"), "AOH out file no header");
+is_deeply (csv (in => $file, headers => [keys %{$aoh->[0]}]),
+ $aoh, "AOH parse out no header");
+
+my $idx = 0;
+sub getrowa { return $aoa->[$idx++]; }
+sub getrowh { return $aoh->[$idx++]; }
+
+ok (csv (in => \&getrowa, out => $file), "out from CODE/AR");
+is_deeply (csv (in => $file), $aoa, "data from CODE/AR");
+
+$idx = 0;
+ok (csv (in => \&getrowh, out => $file, headers => \@hdr), "out from CODE/HR");
+is_deeply (csv (in => $file, headers => "auto"), $aoh, "data from CODE/HR");
+
+$idx = 0;
+ok (csv (in => \&getrowh, out => $file), "out from CODE/HR (auto headers)");
+is_deeply (csv (in => $file, headers => "auto"), $aoh, "data from CODE/HR");
+
+unlink $file;
+
+eval {
+ exists $Config{useperlio} &&
+ defined $Config{useperlio} &&
+ $] >= 5.008 &&
+ $Config{useperlio} eq "define" or skip "No scalar ref in this perl", 4;
+ my $out = "";
+ open my $fh, ">", \$out;
+ ok (csv (in => [[ 1, 2, 3 ]], out => $fh), "out to fh to scalar ref");
+ is ($out, "1,2,3\r\n", "Scalar out");
+ $out = "";
+ ok (csv (in => [[ 1, 2, 3 ]], out => \$out), "out to scalar ref");
+ is ($out, "1,2,3\r\n", "Scalar out");
+ };
@@ -0,0 +1,59 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+#use Test::More "no_plan";
+ use Test::More tests => 17;
+
+BEGIN {
+ use_ok "Text::CSV_XS", ("csv");
+ plan skip_all => "Cannot load Text::CSV_XS" if $@;
+ require "t/util.pl";
+ }
+
+my $file = "_90test.csv"; END { -f $file and unlink $file }
+my $data =
+ "foo,bar,baz\n".
+ "1,2,3\n".
+ "2,a b,\n";
+open FH, ">", $file or die "_90test.csv: $!";
+print FH $data;
+close FH;
+
+my $aoa = [
+ [qw( foo bar baz )],
+ [ 1, 2, 3 ],
+ [ 2, "a b", "" ],
+ ];
+my $aoh = [
+ { foo => 1, bar => 2, baz => 3 },
+ { foo => 2, bar => "a b", baz => "" },
+ ];
+
+for (qw( after_in on_in before_out )) {
+ is_deeply (csv (in => $file, $_ => sub {}), $aoa, "callback $_ on AOA with empty sub");
+ is_deeply (csv (in => $file, callbacks => { $_ => sub {} }), $aoa, "callback $_ on AOA with empty sub");
+ }
+is_deeply (csv (in => $file, after_in => sub {},
+ callbacks => { on_in => sub {} }), $aoa, "callback after_in and on_in on AOA");
+
+for (qw( after_in on_in before_out )) {
+ is_deeply (csv (in => $file, headers => "auto", $_ => sub {}), $aoh, "callback $_ on AOH with empty sub");
+ is_deeply (csv (in => $file, headers => "auto", callbacks => { $_ => sub {} }), $aoh, "callback $_ on AOH with empty sub");
+ }
+is_deeply (csv (in => $file, headers => "auto", after_in => sub {},
+ callbacks => { on_in => sub {} }), $aoh, "callback after_in and on_in on AOH");
+
+is_deeply (csv (in => $file, after_in => sub { push @{$_[1]}, "A" }), [
+ [qw( foo bar baz A )],
+ [ 1, 2, 3, "A" ],
+ [ 2, "a b", "", "A" ],
+ ], "AOA ith after_in callback");
+
+is_deeply (csv (in => $file, headers => "auto", after_in => sub { $_[1]{baz} = "A" }), [
+ { foo => 1, bar => 2, baz => "A" },
+ { foo => 2, bar => "a b", baz => "A" },
+ ], "AOH with after_in callback");
+
+unlink $file;