The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Dancer::Plugin::Catmandu::OAI; # TODO hierarchical sets, setDescription

=head1 NAME

Dancer::Plugin::Catmandu::OAI - OAI-PMH provider backed by a searchable Catmandu::Store

=cut

our $VERSION = '0.0306';

use Catmandu::Sane;
use Catmandu::Util qw(:is);
use Catmandu;
use Catmandu::Fix;
use Catmandu::Exporter::Template;
use Dancer::Plugin;
use Dancer qw(:syntax);
use DateTime;
use DateTime::Format::Strptime;
use Clone qw(clone);

my $DEFAULT_LIMIT = 100;

my $VERBS = {
    GetRecord => {
        valid    => {metadataPrefix => 1, identifier => 1},
        required => [qw(metadataPrefix identifier)],
    },
    Identify => {
        valid    => {},
        required => [],
    },
    ListIdentifiers => {
        valid    => {metadataPrefix => 1, from => 1, until => 1, set => 1, resumptionToken => 1},
        required => [qw(metadataPrefix)],
    },
    ListMetadataFormats => {
        valid    => {identifier => 1, resumptionToken => 1},
        required => [],
    },
    ListRecords => {
        valid    => {metadataPrefix => 1, from => 1, until => 1, set => 1, resumptionToken => 1},
        required => [qw(metadataPrefix)],
    },
    ListSets => {
        valid    => {resumptionToken => 1},
        required => [],
    },
};

sub parse_oai_datestamp {
    my ($date) = @_;
    my @d = $date =~ /^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})Z$/;
    DateTime->new(
        year      => $d[0],
        month     => $d[1],
        day       => $d[2],
        hour      => $d[3],
        minute    => $d[4],
        second    => $d[5],
        time_zone => 'UTC',
    );
}

sub render {
    my ($tmpl, $data) = @_;
    my $out = "";
    my $exporter = Catmandu::Exporter::Template->new(template => $tmpl, file => \$out);
    $exporter->add($data);
    $exporter->commit;
    $out;
}

sub oai_provider {
    my ($path, %opts) = @_;

    my $setting = clone(plugin_setting);

    $setting->{granularity} //= "YYYY-MM-DDThh:mm:ssZ";
    $setting->{get_record_cql_pattern} //= '_id exact "%s"';

    # TODO this was for backwards compatibility. Remove?
    if ($setting->{filter}) {
        $setting->{cql_filter} = delete $setting->{filter};
    }

    $setting->{default_search_params} ||= {};

    my $datestamp_parser;
    if ($setting->{datestamp_pattern}) {
        $datestamp_parser = DateTime::Format::Strptime->new(
            pattern  => $setting->{datestamp_pattern},
            on_error => 'undef',
        );
    }

    my $format_datestamp = $datestamp_parser ? sub {
        $datestamp_parser->parse_datetime($_[0])->iso8601.'Z';
    } : sub {
        $_[0];
    };

    my $metadata_formats = do {
        my $list = $setting->{metadata_formats};
        my $hash = {};
        for my $format (@$list) {
            my $prefix = $format->{metadataPrefix};
            $format = {%$format};
            if (my $fix = $format->{fix}) {
                $format->{fix} = Catmandu::Fix->new(fixes => $fix);
            }
            $hash->{$prefix} = $format;
        }
        $hash;
    };

    my $sets = do {
        if (my $list = $setting->{sets}) {
            my $hash = {};
            for my $set (@$list) {
                my $key = $set->{setSpec};
                $hash->{$key} = $set;
            }
            $hash;
        } else {
            0;
        }
    };

    my $ns = "oai:$setting->{repositoryIdentifier}:";

    my $branding = "";
    if (my $icon = $setting->{collectionIcon}) {
        if (my $url = $icon->{url}) {
            $branding .= <<TT;
<description>
<branding xmlns="http://www.openarchives.org/OAI/2.0/branding/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/branding/ http://www.openarchives.org/OAI/2.0/branding.xsd">
<collectionIcon>
<url>$url</url>
TT
            for my $tag (qw(link title width height)) {
                my $val = $icon->{$tag} // next;
                $branding .= "<$tag>$val</$tag>\n";
            }

            $branding .= <<TT;
</collectionIcon>
</branding>
</description>
TT
        }
    }

    my $template_header = <<TT;
<?xml version="1.0" encoding="UTF-8"?>
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
<responseDate>[% response_date %]</responseDate>
[%- IF params.resumptionToken %]
<request verb="[% params.verb %]" resumptionToken="[% params.resumptionToken %]">[% uri_base %]</request>
[%- ELSE %]
<request[% FOREACH param IN params %] [% param.key %]="[% param.value | xml %]"[% END %]>[% uri_base %]</request>
[%- END %]
TT

    my $template_footer = <<TT;
</OAI-PMH>
TT

    my $template_error = <<TT;
$template_header
[%- FOREACH error IN errors %]
<error code="[% error.0 %]">[% error.1 | xml %]</error>
[%- END %]
$template_footer
TT

    my $template_record_header = <<TT;
<header[% IF deleted %] status="deleted"[% END %]>
    <identifier>${ns}[% id %]</identifier>
    <datestamp>[% datestamp %]</datestamp>
    [%- FOREACH s IN setSpec %]
    <setSpec>[% s %]</setSpec>
    [%- END %]
</header>
TT

    my $template_get_record = <<TT;
$template_header
<GetRecord>
<record>
$template_record_header
[%- UNLESS deleted %]
<metadata>
[% metadata %]
</metadata>
[%- END %]
</record>
</GetRecord>
$template_footer
TT

    my $template_identify = <<TT;
$template_header
<Identify>
<repositoryName>$setting->{repositoryName}</repositoryName>
<baseURL>[% uri_base %]</baseURL>
<protocolVersion>2.0</protocolVersion>
<adminEmail>$setting->{adminEmail}</adminEmail>
<earliestDatestamp>$setting->{earliestDatestamp}</earliestDatestamp>
<deletedRecord>$setting->{deletedRecord}</deletedRecord>
<granularity>$setting->{granularity}</granularity>
<description>
    <oai-identifier xmlns="http://www.openarchives.org/OAI/2.0/oai-identifier"
                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                    xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd">
        <scheme>oai</scheme>
        <repositoryIdentifier>$setting->{repositoryIdentifier}</repositoryIdentifier>
        <delimiter>$setting->{delimiter}</delimiter>
        <sampleIdentifier>$setting->{sampleIdentifier}</sampleIdentifier>
    </oai-identifier>
</description>
$branding
</Identify>
$template_footer
TT

    my $template_list_identifiers = <<TT;
$template_header
<ListIdentifiers>
[%- FOREACH records %]
$template_record_header
[%- END %]
[%- IF token %]
<resumptionToken cursor="[% start %]" completeListSize="[% total %]">[% token %]</resumptionToken>
[%- ELSE %]
<resumptionToken cursor="[% start %]" completeListSize="[% total %]"/>
[%- END %]
</ListIdentifiers>
$template_footer
TT

    my $template_list_records = <<TT;
$template_header
<ListRecords>
[%- FOREACH records %]
<record>
$template_record_header
[%- UNLESS deleted %]
<metadata>
[% metadata %]
</metadata>
[%- END %]
</record>
[%- END %]
[%- IF token %]
<resumptionToken cursor="[% start %]" completeListSize="[% total %]">[% token %]</resumptionToken>
[%- ELSE %]
<resumptionToken cursor="[% start %]" completeListSize="[% total %]"/>
[%- END %]
</ListRecords>
$template_footer
TT

    my $template_list_metadata_formats = "";
    $template_list_metadata_formats .= <<TT;
$template_header
<ListMetadataFormats>
TT
    for my $format (values %$metadata_formats) {
        $template_list_metadata_formats .= <<TT;
<metadataFormat>
    <metadataPrefix>$format->{metadataPrefix}</metadataPrefix>
    <schema>$format->{schema}</schema>
    <metadataNamespace>$format->{metadataNamespace}</metadataNamespace>
</metadataFormat>
TT
    }
    $template_list_metadata_formats .= <<TT;
</ListMetadataFormats>
$template_footer
TT

    my $template_list_sets = <<TT;
$template_header
<ListSets>
TT
    for my $set (values %$sets) {
        $template_list_sets .= <<TT;
<set>
    <setSpec>$set->{setSpec}</setSpec>
    <setName>$set->{setName}</setName>
</set>
TT
    }
    $template_list_sets .= <<TT;
</ListSets>
$template_footer
TT

    my $fix = $opts{fix} || $setting->{fix};
    my $sub_deleted = $opts{deleted} || sub { 0 };
    my $sub_set_specs_for = $opts{set_specs_for} || sub { [] };

    my $bag = Catmandu->store($opts{store} || $setting->{store})->bag($opts{bag} || $setting->{bag});

    any ['get', 'post'] => $path => sub {
        my $uri_base = $setting->{uri_base} // request->uri_base;
        my $response_date = DateTime->now->iso8601.'Z';
        my $params = request->is_get ? params('query') : params('body');
        my $errors = [];
        my $format;
        my $set;
        my $verb = $params->{verb};
        my $vars = {
            uri_base => $uri_base,
            request_uri => $uri_base . $path,
            response_date => $response_date,
            errors => $errors,
        };

        if ($verb and my $spec = $VERBS->{$verb}) {
            my $valid = $spec->{valid};
            my $required = $spec->{required};

            if ($valid->{resumptionToken} and exists $params->{resumptionToken}) {
                if (keys(%$params) > 2) {
                    push @$errors, [badArgument => "resumptionToken cannot be combined with other parameters"];
                }
            } else {
                for my $key (keys %$params) {
                    next if $key eq 'verb';
                    unless ($valid->{$key}) {
                        push @$errors, [badArgument => "parameter $key is illegal"];
                    }
                }
                for my $key (@$required) {
                    unless (exists $params->{$key}) {
                        push @$errors, [badArgument => "parameter $key is missing"];
                    }
                }
            }
        } else {
            push @$errors, [badVerb => "illegal OAI verb"];
        }

        if (@$errors) {
            return render(\$template_error, $vars);
        }

        $vars->{params} = $params;

        if ($params->{resumptionToken}) {
            unless (is_string($params->{resumptionToken})) {
                push @$errors, [badResumptionToken => "resumptionToken is not in the correct format"];
            }

            if ($verb eq 'ListSets') {
                push @$errors, [badResumptionToken => "resumptionToken isn't necessary"];
            } else {
                my @parts = split '!', $params->{resumptionToken};

                unless (@parts == 5) {
                    push @$errors, [badResumptionToken => "resumptionToken is not in the correct format"];
                }

                $params->{set}            = $parts[0];
                $params->{from}           = $parts[1];
                $params->{until}          = $parts[2];
                $params->{metadataPrefix} = $parts[3];
                $vars->{start}            = $parts[4];
            }
        }

        if ($params->{set}) {
            unless ($sets) {
                push @$errors, [noSetHierarchy => "sets are not supported"];
            }
            unless ($set = $sets->{$params->{set}}) {
                push @$errors, [badArgument => "set does not exist"];
            }
        }

        if (my $prefix = $params->{metadataPrefix}) {
            unless ($format = $metadata_formats->{$prefix}) {
                push @$errors, [cannotDisseminateFormat => "metadataPrefix $prefix is not supported"];
            }
        }

        if (@$errors) {
            return render(\$template_error, $vars);
        }

        content_type 'xml';

        if ($verb eq 'GetRecord') {
            my $id = $params->{identifier};
            $id =~ s/^$ns//;

            my $rec = $bag->search(
                %{ $setting->{default_search_params} },
                cql_query => sprintf($setting->{get_record_cql_pattern}, $id),
                start     => 0,
                limit     => 1,

            )->first;

            if (defined $rec) {
                if ($fix) {
                    $rec = Catmandu->fixer($fix)->fix($rec);
                }

                $vars->{id} = $id;
                $vars->{datestamp} = $format_datestamp->($rec->{$setting->{datestamp_field}});
                $vars->{deleted} = $sub_deleted->($rec);
                $vars->{setSpec} = $sub_set_specs_for->($rec);
                my $metadata = "";
                my $exporter = Catmandu::Exporter::Template->new(
                    template => $format->{template},
                    file => \$metadata,
                    fix => $format->{fix},
                );
                $exporter->add($rec);
                $exporter->commit;
                $vars->{metadata} = $metadata;
                unless ($vars->{deleted} and $setting->{deletedRecord} eq 'no') {
                    return render(\$template_get_record, $vars);
                }
            }
            push @$errors, [idDoesNotExist => "identifier $params->{identifier} is unknown or illegal"];
            return render(\$template_error, $vars);

        } elsif ($verb eq 'Identify') {
            return render(\$template_identify, $vars);

        } elsif ($verb eq 'ListIdentifiers' || $verb eq 'ListRecords') {
            my $limit = $setting->{limit} // $DEFAULT_LIMIT;
            my $start = $vars->{start} //= 0;
            my $from  = $params->{from};
            my $until = $params->{until};

            for my $datestamp (($from, $until)) {
                $datestamp || next;
                if ($datestamp !~ /^\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2}:\d{2}Z)?$/) {
                    push @$errors, [badArgument => "datestamps must have the format YYYY-MM-DD or YYYY-MM-DDThh:mm:ssZ"];
                    return render(\$template_error, $vars);
                };
            }

            if ($from && $until && length($from) != length($until)) {
                push @$errors, [badArgument => "datestamps must have the same granularity"];
                return render(\$template_error, $vars);
            }

            if ($from && $until && $from gt $until) {
                push @$errors, [badArgument => "from is more recent than until"];
                return render(\$template_error, $vars);
            }

            if ($from && length($from) == 10) {
                $from = "${from}T00:00:00Z";
            }
            if ($until && length($until) == 10) {
                $until = "${until}T00:00:00Z";
            }

            my @cql;
            my $cql_from  = $from;
            my $cql_until = $until;
            if (my $pattern = $setting->{datestamp_pattern}) {
                $cql_from  = DateTime::Format::Strptime::strftime($pattern, parse_oai_datestamp($cql_from))  if $cql_from;
                $cql_until = DateTime::Format::Strptime::strftime($pattern, parse_oai_datestamp($cql_until)) if $cql_until;
            }

            push @cql, qq|($setting->{cql_filter})| if $setting->{cql_filter};
            push @cql, qq|($format->{cql})| if $format->{cql};
            push @cql, qq|($set->{cql})| if $set && $set->{cql};
            push @cql, qq|($setting->{datestamp_field} >= "$cql_from")| if $cql_from;
            push @cql, qq|($setting->{datestamp_field} <= "$cql_until")| if $cql_until;
            unless (@cql) {
                push @cql, "(cql.allRecords)";
            }

            my $search = $bag->search(
                %{ $setting->{default_search_params} },
                cql_query => join(' and ', @cql),
                limit     => $limit,
                start     => $start,
            );

            unless ($search->total) {
                push @$errors, [noRecordsMatch => "no records found"];
                return render(\$template_error, $vars);
            }

            if ($start + $limit < $search->total) {
                $vars->{token} = join '!',
                    $params->{set} || '',
                    $from ? $from : '',
                    $until ? $until : '',
                    $params->{metadataPrefix},
                    $start + $limit;
            }

            $vars->{total} = $search->total;

            if ($verb eq 'ListIdentifiers') {
                $vars->{records} = [map {
                    my $rec = $_;
                    if ($fix) {
                        $rec = Catmandu->fixer($fix)->fix($rec);
                    }
                    {
                        id        => $rec->{_id},
                        datestamp => $format_datestamp->($rec->{$setting->{datestamp_field}}),
                        deleted   => $sub_deleted->($rec),
                        setSpec   => $sub_set_specs_for->($rec),
                    };
                } @{$search->hits}];
                return render(\$template_list_identifiers, $vars);
            } else {
                $vars->{records} = [map {
                    my $rec = $_;

                    if ($fix) {
                        $rec = Catmandu->fixer($fix)->fix($rec);
                    }

                    my $deleted = $sub_deleted->($rec);
                    my $metadata;
                    unless ($deleted) {
                        $metadata = "";
                        my $exporter = Catmandu::Exporter::Template->new(
                            template => $format->{template},
                            file     => \$metadata,
                            fix      => $format->{fix},
                        );
                        $exporter->add($rec);
                        $exporter->commit;
                    }
                    {
                        id        => $rec->{_id},
                        datestamp => $format_datestamp->($rec->{$setting->{datestamp_field}}),
                        deleted   => $deleted,
                        setSpec   => $sub_set_specs_for->($rec),
                        metadata  => $metadata,
                    };
                } @{$search->hits}];
                return render(\$template_list_records, $vars);
            }

        } elsif ($verb eq 'ListMetadataFormats') {
            if (my $id = $params->{identifier}) {
                $id =~ s/^$ns//;
                unless ($bag->get($id)) {
                    push @$errors, [idDoesNotExist => "identifier $params->{identifier} is unknown or illegal"];
                    return render(\$template_error, $vars);
                }
            }
            return render(\$template_list_metadata_formats, $vars);

        } elsif ($verb eq 'ListSets') {
            return render(\$template_list_sets, $vars);
        }
    }
};

register oai_provider => \&oai_provider;

register_plugin;

1;

=head1 SYNOPSIS

    use Dancer;
    use Dancer::Plugin::Catmandu::SRU;

    oai_provider '/oai';


=head1 CONFIGURATION

    plugins:
        'Catmandu::OAI':
            store: oai
            bag: publication
            datestamp_field: date_updated
            datestamp_pattern: "%Y-%H-%M %H:%M:%S"
            repositoryName: "My OAI Service Provider"
            uri_base: "http://oai.service.com/oai"
            adminEmail: me@example.com
            earliestDatestamp: "1970-01-01T00:00:01Z"
            deletedRecord: persistent
            repositoryIdentifier: oai.service.com
            limit: 200
            delimiter: ":"
            sampleIdentifier: "oai:oai.service.com:1585315"
            cql_filter: 'status exact public'
            get_record_cql_pattern: 'id exact "%s"'
            metadata_formats:
                -
                    metadataPrefix: oai_dc
                    schema: "http://www.openarchives.org/OAI/2.0/oai_dc.xsd"
                    metadataNamespace: "http://www.openarchives.org/OAI/2.0/oai_dc/"
                    template: views/oai_dc.tt
                    cql: 'status exact public OR status exact deleted'
                    fix:
                      - publication_to_dc()
                -
                    metadataPrefix: mods
                    schema: "http://www.loc.gov/standards/mods/v3/mods-3-0.xsd"
                    metadataNamespace: "http://www.loc.gov/mods/v3"
                    template: views/mods.tt
                    cql: 'status exact public'
                    fix:
                      - publication_to_mods()
            sets:
                -
                    setSpec: openaccess
                    setName: Open Access
                    cql: 'oa=1'
                -
                    setSpec: journal_article
                    setName: Journal article
                    cql: 'documenttype exact journal_article'
                -
                    setSpec: book
                    setName: Book
                    cql: 'documenttype exact book'

=head1 SEE ALSO

L<Dancer::Plugin::Catmandu::SRU>, L<Catmandu>, L<Catmandu::Store>

=head1 AUTHOR

Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >>

=head1 CONTRIBUTORS

Nicolas Franck, C<< <nicolas.franck at ugent.be> >>

Vitali Peil, C<< <vitali.peil at uni-bielefeld.de> >>

=head1 LICENSE

This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=cut