The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Net::Amazon::MechanicalTurk::XMLParser;
use strict;
use warnings;
use Carp;
use Net::Amazon::MechanicalTurk::BaseObject;
use Net::Amazon::MechanicalTurk::DataStructure;
use Net::Amazon::MechanicalTurk::ModuleUtil;
use Net::Amazon::MechanicalTurk::IOUtil;
use IO::File;

our $VERSION = '1.01_01';

our @ISA = qw{ Net::Amazon::MechanicalTurk::BaseObject };
our @XML_MODULES = qw{
    XML::Parser
    XML::Parser::Lite
};

Net::Amazon::MechanicalTurk::XMLParser->attributes(qw{
    parser
});

sub init {
    my $self = shift;
    if ($#_ >= 0) {
        $self->parser(shift);
    }
    else {
        $self->parser(newParser());
    }
}

sub parseURL {
    my ($self, $url) = @_;
    require LWP::UserAgent;
    my $userAgent = LWP::UserAgent->new;
    # Not available on all LWP's
    #$userAgent->default_headers->push_header("Connection" => "close");
    my $response = $userAgent->get($url);
    if (!$response->is_success) {
        Carp::croak("Could not retrieve url $url - " . $response->status_line);
    }
    return $self->parse($response->content);
}

sub parseFile {
    my ($self, $file) = @_;
    my $in = IO::File->new($file, "r");
    if (!$in) {
        Carp::croak("Could not open file $file - $!");
    }
    return $self->parse($in);
}

sub parse {
    my ($self, $xml) = @_;

    if (UNIVERSAL::isa($xml, "GLOB")) {
        $xml = Net::Amazon::MechanicalTurk::IOUtil->readContents($xml);
    }

    my $context = { root => undef, rootElement => undef, stack => [] };
    my $parser = $self->newParser();
    $parser->setHandlers(
        Start => sub { $self->xmlOnStart($context, @_); },
        End   => sub { $self->xmlOnEnd($context, @_); },
        Char  => sub { $self->xmlOnChar($context, @_); }
    );

    $parser->parse($xml);
    my $data = Net::Amazon::MechanicalTurk::DataStructure->wrap(xmlCondenseText($context->{root})); 

    return (wantarray) ? ($data, $context->{rootElement}) : $data;
}

sub newParser {
    return Net::Amazon::MechanicalTurk::ModuleUtil->requireFirst(@XML_MODULES)->new;
}

sub xmlOnStart {
    my $self = shift;
    my $context = shift;
    my $parser = shift;
    my $element = shift;
    my %attrs = @_;
    
    my $stack = $context->{stack};

    my $node = {};
    if ($#${stack} >= 0) {
        my $parent = $stack->[$#{$stack}];
        if (!exists $parent->{$element}) {
            $parent->{$element} = [];
        }
        push(@{$parent->{$element}}, $node);
        push(@{$stack}, $node);
    }
    else {
        $context->{root} = $node;
        $context->{rootElement} = $element;
        push(@{$stack}, $node);
    } 

    if (%attrs) {
        while (my ($name,$value) = each %attrs) {
            $self->xmlOnStart($context, $parser, $name);
            $self->xmlOnChar($context, $parser, $value);
            $self->xmlOnEnd($context, $parser, $name);
        }
    }
}

sub xmlOnChar {
    my ($self, $context, $parser, $text) = @_;
    my $parent = $context->{stack}[$#{$context->{stack}}];
    if (!exists $parent->{_value}) {
        $parent->{_value} = $text; 
    }
    else {
        $parent->{_value} .= $text;
    }
}

sub xmlOnEnd {
    my ($self, $context, $parser, $element) = @_;
    pop(@{$context->{stack}}); 
}

sub xmlCondenseText {
    my ($node) = @_;

    return unless defined ($node);

    while (my ($name, $array) = each(%$node)) {
        if ($name eq "_value") {
            if ($array =~ /^\s*$/) {
                delete $node->{$name};
            }
            next;
        }

        next unless UNIVERSAL::isa($array, "ARRAY");

        for (my $i=0; $i<=$#{$array}; $i++) {
            my $subNode = $array->[$i];
            if (UNIVERSAL::isa($subNode, 'HASH')) {
                if (exists $subNode->{_value} and $subNode->{_value} =~ /^\s*$/) {
                    delete $subNode->{_value};
                }
                if (exists $subNode->{_value} and (scalar keys %$subNode) == 1) {
                    $array->[$i] = $subNode->{_value}; 
                }
                elsif ((scalar keys %$subNode) == 0) {
                    $array->[$i] = undef;
                }
                else {
                    xmlCondenseText($subNode);
                }
            }
        }
    }

    return $node;
}

return 1;