The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Net::Amazon::MechanicalTurk::DelimitedReader;
use strict;
use warnings;
use IO::File;
use Carp;
use Net::Amazon::MechanicalTurk::BaseObject;

our $VERSION = '1.01_01';

our @ISA = qw{ Net::Amazon::MechanicalTurk::BaseObject };

Net::Amazon::MechanicalTurk::DelimitedReader->attributes(qw{
    fieldSeparator
    input
    file
    utf8
    autoclose
});

# The following CPAN modules do not support new lines in a column:
# (So I wrote this class)
# Text::CSV
# Text::CSV_XS
# Text::CSV_PP

sub init {
    my $self = shift;
    $self->setAttributes(@_);
    $self->setAttributesIfNotDefined(
        fieldSeparator => ',',
        utf8 => 1,
        autoclose => 0
    );
    $self->assertRequiredAttributes(qw{
        fieldSeparator
    });
    if (!defined $self->input) {
        if (!defined $self->file) {
            Carp::croak("Either input or file must be specified.");
        }
        my $in = IO::File->new($self->file, "r");
        if (!$in) {
            Carp::croak("Couldn't open " . $self->file . " - $!.");
        }
        if ($self->utf8) {
            # By using utf8 these modules should be able to handle
            # non-english answers with recent versions of perl.
            eval { binmode($in, ":utf8") };
            warn "Couldn't set filehandle to utf8." if $@;
        }
        $self->autoclose(1);
        $self->input($in);
    }
    else {
        $self->file(sprintf "%s", $self->input);
    }
}

sub DESTROY {
    my $self = shift;
    if ($self->autoclose) {
        $self->close;
    }
}

sub close {
    my $self = shift;
    if ($self->input) {
        $self->input->close;
        $self->input(undef);
    }
}

sub next {
    my $self = shift;
    my $in = $self->input;
    my $fs = $self->fieldSeparator;
    my $row = [];
    my $lastWasQuote = 0;
    my $quotedCell = 0;
    my $cell = '';
    
    return undef unless $self->input;
    
    while (1) {
        my $c = getc($in);

        # Handle end of input        
        if (!defined($c)) {
            push(@$row, $cell);
            $self->close;
            $self->input(undef);
            return $row;
        }
        
        next if ($c eq "\r"); # just throw away \r
        
        if ($quotedCell) {
           if ($c eq "\n") {
               if ($lastWasQuote) {
                   push(@$row, $cell);
                   return $row;
               }
               $cell .= "\n";
               $lastWasQuote = 0;
           }
           elsif ($c eq $fs) {
               if ($lastWasQuote) {
                   push(@$row, $cell);
                   $cell = '';
                   $lastWasQuote = 0;
                   $quotedCell = 0;
               }
               else {
                   $cell .= $c;
                   $lastWasQuote = 0;
               }
           }
           elsif ($c eq '"') {
               if ($lastWasQuote) {
                   $cell .= $c;
                   $lastWasQuote = 0;
               }
               else {
                   $lastWasQuote = 1;
               }
           }
           else {
               if ($lastWasQuote) {
                   warn "Single quote found in cell which was not escaped.\n";
               }
               $cell .= $c;
               $lastWasQuote = 0;
           }
        }
        else {
           if ($cell eq '' and $c eq '"') {
               $quotedCell = 1;
           }
           elsif ($c eq "\n") {
               push(@$row, $cell);
               return $row;
           }
           elsif ($c eq $fs) {
               push(@$row, $cell);
               $cell = '';
           }
           else {
               $cell .= $c;
           }
        }
    }
}

return 1;