The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Catmandu::Importer::CSV;

use Catmandu::Sane;

our $VERSION = '1.0002';

use Text::CSV;
use List::Util qw(reduce);
use Moo;
use namespace::clean;

with 'Catmandu::Importer';

has csv => (is => 'ro', lazy => 1, builder => '_build_csv');
has sep_char => (
    is      => 'ro',
    default => sub {','},
    coerce  => sub {
        my $sep_char = $_[0];
        $sep_char =~ s/(\\[abefnrt])/"qq{$1}"/gee;
        return $sep_char;
    }
);
has quote_char => (is => 'ro', default => sub { '"' });
has escape_char => (is => 'ro', default => sub { '"' });
has allow_loose_quotes => (is => 'ro', default => sub { 0 });
has allow_loose_escapes => (is => 'ro', default => sub { 0 });
has header => (is => 'ro', default => sub { 1 });
has fields => (
    is     => 'rwp',
    coerce => sub {
        my $fields = $_[0];
        if (ref $fields eq 'ARRAY') { return $fields }
        if (ref $fields eq 'HASH')  { return [sort keys %$fields] }
        return [split ',', $fields];
    },
);

sub _build_csv {
    my ($self) = @_;
    Text::CSV->new({
        binary => 1,
        sep_char => $self->sep_char,
        quote_char => $self->quote_char ? $self->quote_char : undef,
        escape_char => $self->escape_char ? $self->escape_char : undef,
        allow_loose_quotes => $self->allow_loose_quotes,
        allow_loose_escapes => $self->allow_loose_escapes,
    });
}

sub generator {
    my ($self) = @_;
    sub {
        state $line = 0;
        state $fh  = $self->fh;
        state $csv = do {
            if ($self->header) {
                if ($self->fields) {
                    $self->csv->getline($fh);
                    $line++;
                } else {
                    $self->_set_fields($self->csv->getline($fh));
                    $line++;
                }
            }
            if ($self->fields) {
                $self->csv->column_names($self->fields);
            }
            $self->csv;
        };

        # generate field names if needed
        unless ($self->fields) {
            my $row = $csv->getline($fh) // return;
            $line++;
            my $fields = [0 .. (@$row -1)];
            $self->_set_fields($fields);
            $csv->column_names($fields);
            return reduce {
               $a->{$b} = $row->[$b] if length $row->[$b];
               $a;
            } +{}, @$fields;
        }

        my $rec = $csv->getline_hr($fh);
        $line++;

        if (defined $rec || $csv->eof()) {
            return $rec;
        }
        else {
            my ($cde, $str, $pos) = $csv->error_diag ();
            die "at line $line (byte $pos) found a Text::CSV parse error($cde) $str";
        }
    };
}

1;

__END__

=pod

=head1 NAME

Catmandu::Importer::CSV - Package that imports CSV data

=head1 SYNOPSIS

    # From the command line

    # convert a CSV file to JSON
    catmandu convert CSV to JSON < journals.csv

    # set column names if CSV file has no header line
    echo '12157,"The Journal of Headache and Pain",2193-1801' | \
      catmandu convert CSV --header 0 --fields 'id,title,issn' to YAML
    
    # set field separator and quote character 
    echo '12157;$The Journal of Headache and Pain$;2193-1801' | \
      catmandu convert CSV --header 0 --fields 'id,title,issn' --sep_char ';' --quote_char '$' to XLSX --file journal.xlsx

    # In a Perl script

    use Catmandu;

    my $importer = Catmandu->importer('CSV', file => "/foo/bar.csv");

    my $n = $importer->each(sub {
        my $hashref = $_[0];
        # ...
    });

=head1 DESCRIPTION

The package imports comma-separated values (CSV).  The object
fields are read from the CSV header line or given via the C<fields> parameter.
Strings in CSV are quoted by C<quote_char> and fields are separated by
C<sep_char>.

=head1 CONFIGURATION

=over

=item file

Read input from a local file given by its path. Alternatively a scalar
reference can be passed to read from a string.

=item fh

Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to
create the input stream from the C<file> argument or by using STDIN.

=item encoding

Binmode of the input stream C<fh>. Set to C<:utf8> by default.

=item fix

An ARRAY of one or more fixes or file scripts to be applied to imported items.

=item fields

List of fields to be used as columns, given as array reference, comma-separated
string, or hash reference. If C<header> is C<0> and C<fields> is C<undef> the
fields will be named by column index ("0", "1", "2", ...).

=item header

Read fields from a header line with the column names, if set to C<1> (the
default).

=item sep_char

Column separator (C<,> by default)

=item quote_char

Quotation character (C<"> by default)

=item escape_char

Character for escaping inside quoted field (C<"> by default)

=item allow_loose_quotes

=item allow_loose_escapes

Allow common bad-practice in CSV escaping

=back

=head1 METHODS

Every L<Catmandu::Importer> is a L<Catmandu::Iterable> all its methods are
inherited.  The methods are not idempotent: CSV streams can only be read once.

=head1 SEE ALSO

L<Catmandu::Exporter::CSV>, L<Catmandu::Importer::XLS>

=cut