The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Catmandu::Importer::OAI::DC;

use Catmandu::Sane;
use Moo;

has metadataPrefix => (is => 'ro' , default => sub { "oai_dc" });

sub parse {
    my ($self,$dom) = @_;

    return undef unless defined $dom;

    my $rec = {};

    for ($dom->findnodes("./*")) {
        my $name  = $_->localName;
        my $value = $_->textContent;
        push(@{$rec->{$name}}, $value);
    }

    $rec;
}

package Catmandu::Importer::OAI;

use Catmandu::Sane;
use Moo;
use HTTP::OAI;
use Data::Dumper;

with 'Catmandu::Importer';

has url     => (is => 'ro', required => 1);
has metadataPrefix => (is => 'ro' , default => sub { "oai_dc" });
has handler => (is => 'ro', default => sub { Catmandu::Importer::OAI::DC->new });
has set     => (is => 'ro');
has from    => (is => 'ro');
has until   => (is => 'ro');
has oai     => (is => 'ro', lazy => 1, builder => '_build_oai');

sub _build_oai {
    my ($self) = @_;
    HTTP::OAI::Harvester->new(baseURL => $_[0]->url, resume => 1);
}

sub _map_record {
    my ($self, $rec) = @_;

    my $sets       = [ $rec->header->setSpec ];
    my $identifier = $rec->identifier;
    my $datestamp  = $rec->datestamp;
    my $status     = $rec->status // "";
    my $dom        = $rec->metadata ? $rec->metadata->dom->nonBlankChildNodes->[0]->nonBlankChildNodes->[0] : undef;
    my $metadata   = $dom ? $dom->toString : "";
    my $about      = [];

    for ($rec->about) {
        push(@$about , $_->dom->nonBlankChildNodes->[0]->nonBlankChildNodes->[0]->toString);
    }

    my $values  = {};

    if (defined $self->handler && $self->metadataPrefix eq $self->handler->metadataPrefix) {
        $values  = $self->handler->parse($dom) // {};
    }

    my $data = {
        _id => $identifier ,
        _identifier => $identifier ,
        _datestamp  => $datestamp ,
        _status     => $status ,
        _metadata   => $metadata ,
        _setSpec    => $sets ,
        _about      => $about ,
        %$values
    };

    $data;
}

sub generator {
    my ($self) = @_;
    sub {
        state $res = $self->oai->ListRecords(
                                metadataPrefix => $self->metadataPrefix,
                                set => $self->set ,
                                from => $self->from ,
                                until => $self->until ,
                    );

        if ($res->is_error) {
            warn $res->message;
            return;
        }
        if (my $rec = $res->next) {
            return $self->_map_record($rec);
        }
        return;
    };
}

=head1 NAME

Catmandu::Importer::OAI - Package that imports OAI-PMH feeds

=head1 SYNOPSIS

    use Catmandu::Importer::OAI;

    my $importer = Catmandu::Importer::OAI->new(
                    url => "...",
                    metadataPrefix => "..." ,
                    from => "..." ,
                    until => "..." ,
                    set => "...",
                    handler => "..." );

    my $n = $importer->each(sub {
        my $hashref = $_[0];
        # ...
    });

=head1 METHODS

=head2 new(url => URL, %options)

Create a new OAI-PMH importer for the URL. Optionally provide OAI-PMH parameters:
metadataPrefix, from, until and set. To parse metadata records into Perl hashes optionally
a handler can be provided. This is a Perl package that implements two methods:

  * metadataPrefix  - which should return a metadataPrefix string for which it can parse
  the metadata
  * parse($dom) - which recieves a XML::LibXML::DOM object and should return a Perl hash

E.g.

  package MyHandler;

  use Moo;

  has metadataPrefix => (is => 'ro' , default => sub { "oai_dc" });

  sub parse {
      my ($self,$dom) = @_;
      return {};
  }

=head2 count

=head2 each(&callback)

=head2 ...

Every Catmandu::Importer is a Catmandu::Iterable all its methods are inherited. The
Catmandu::Importer::OAI methods are not idempotent: OAI-PMH feeds can only be read once.

=head1 SEE ALSO

L<Catmandu::Iterable>

=cut

1;