package Net::Amazon::MechanicalTurk::DelimitedReader;
use strict;
use warnings;
use IO::File;
use Carp;
use Net::Amazon::MechanicalTurk::BaseObject;
our $VERSION = '1.01_01';
our @ISA = qw{ Net::Amazon::MechanicalTurk::BaseObject };
Net::Amazon::MechanicalTurk::DelimitedReader->attributes(qw{
fieldSeparator
input
file
utf8
autoclose
});
# The following CPAN modules do not support new lines in a column:
# (So I wrote this class)
# Text::CSV
# Text::CSV_XS
# Text::CSV_PP
sub init {
my $self = shift;
$self->setAttributes(@_);
$self->setAttributesIfNotDefined(
fieldSeparator => ',',
utf8 => 1,
autoclose => 0
);
$self->assertRequiredAttributes(qw{
fieldSeparator
});
if (!defined $self->input) {
if (!defined $self->file) {
Carp::croak("Either input or file must be specified.");
}
my $in = IO::File->new($self->file, "r");
if (!$in) {
Carp::croak("Couldn't open " . $self->file . " - $!.");
}
if ($self->utf8) {
# By using utf8 these modules should be able to handle
# non-english answers with recent versions of perl.
eval { binmode($in, ":utf8") };
warn "Couldn't set filehandle to utf8." if $@;
}
$self->autoclose(1);
$self->input($in);
}
else {
$self->file(sprintf "%s", $self->input);
}
}
sub DESTROY {
my $self = shift;
if ($self->autoclose) {
$self->close;
}
}
sub close {
my $self = shift;
if ($self->input) {
$self->input->close;
$self->input(undef);
}
}
sub next {
my $self = shift;
my $in = $self->input;
my $fs = $self->fieldSeparator;
my $row = [];
my $lastWasQuote = 0;
my $quotedCell = 0;
my $cell = '';
return undef unless $self->input;
while (1) {
my $c = getc($in);
# Handle end of input
if (!defined($c)) {
push(@$row, $cell);
$self->close;
$self->input(undef);
return $row;
}
next if ($c eq "\r"); # just throw away \r
if ($quotedCell) {
if ($c eq "\n") {
if ($lastWasQuote) {
push(@$row, $cell);
return $row;
}
$cell .= "\n";
$lastWasQuote = 0;
}
elsif ($c eq $fs) {
if ($lastWasQuote) {
push(@$row, $cell);
$cell = '';
$lastWasQuote = 0;
$quotedCell = 0;
}
else {
$cell .= $c;
$lastWasQuote = 0;
}
}
elsif ($c eq '"') {
if ($lastWasQuote) {
$cell .= $c;
$lastWasQuote = 0;
}
else {
$lastWasQuote = 1;
}
}
else {
if ($lastWasQuote) {
warn "Single quote found in cell which was not escaped.\n";
}
$cell .= $c;
$lastWasQuote = 0;
}
}
else {
if ($cell eq '' and $c eq '"') {
$quotedCell = 1;
}
elsif ($c eq "\n") {
push(@$row, $cell);
return $row;
}
elsif ($c eq $fs) {
push(@$row, $cell);
$cell = '';
}
else {
$cell .= $c;
}
}
}
}
return 1;