The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package HTTP::OAI::Repository;

use strict;
use warnings;

use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
require Exporter;

@ISA = qw(Exporter);

@EXPORT = qw();
@EXPORT_OK = qw( &validate_request &validate_request_1_1 &validate_date &validate_metadataPrefix &validate_responseDate &validate_setSpec );
%EXPORT_TAGS = (validate=>[qw(&validate_request &validate_date &validate_metadataPrefix &validate_responseDate &validate_setSpec)]);

use HTTP::OAI::Error qw(%OAI_ERRORS);

# Copied from Simeon Warner's tutorial at
# http://library.cern.ch/HEPLW/4/papers/3/OAIServer.pm
# (note: corrected grammer for ListSets)
# 0 = optional, 1 = required, 2 = exclusive
my %grammer = (
	'GetRecord' =>
	{
		'identifier' => [1, \&validate_identifier],
		'metadataPrefix' => [1, \&validate_metadataPrefix]
	},
	'Identify' => {},
	'ListIdentifiers' =>
	{
		'from' => [0, \&validate_date],
		'until' => [0, \&validate_date],
		'set' => [0, \&validate_setSpec_2_0],
		'metadataPrefix' => [1, \&validate_metadataPrefix],
		'resumptionToken' => [2, sub { 0 }]
	},
	'ListMetadataFormats' =>
	{
		'identifier' => [0, \&validate_identifier]
	},
	'ListRecords' =>
	{
		'from' => [0, \&validate_date],
		'until' => [0, \&validate_date],
		'set' => [0, \&validate_setSpec_2_0],
		'metadataPrefix' => [1, \&validate_metadataPrefix],
		'resumptionToken' => [2, sub { 0 }]
	},
	'ListSets' =>
	{
		'resumptionToken' => [2, sub { 0 }]
	}
);

sub new {
	my ($class,%args) = @_;
	my $self = bless {}, $class;
	$self;
}

sub validate_request { validate_request_2_0(@_); }

sub validate_request_2_0 {
	my %params = @_;
	my $verb = $params{'verb'};
	delete $params{'verb'};

	my @errors;

	return (new HTTP::OAI::Error(code=>'badVerb',message=>'No verb supplied')) unless defined $verb;

	my $grm = $grammer{$verb} or return (new HTTP::OAI::Error(code=>'badVerb',message=>"Unknown verb '$verb'"));

	if( defined $params{'from'} && defined $params{'until'} ) {
		if( granularity($params{'from'}) ne granularity($params{'until'}) ) {
			return (new HTTP::OAI::Error(
				code=>'badArgument',
				message=>'Granularity used in from and until must be the same'
			));
		}
	}

	# Check exclusivity
	foreach my $arg (keys %$grm) {
		my ($type, $valid_func) = @{$grm->{$arg}};
		next unless ($type == 2 && defined($params{$arg}));

		if( my $err = &$valid_func($params{$arg}) ) {
			return (new HTTP::OAI::Error(
					code=>'badArgument', 
					message=>("Bad argument ($arg): " . $err)
				));
		}

		delete $params{$arg};
		if( %params ) {
			for(keys %params) {
				push @errors, new HTTP::OAI::Error(
					code=>'badArgument',
					message=>"'$_' can not be used in conjunction with $arg"
				);
			}
			return @errors;
		} else {
			return ();
		}
	}

	# Check required/optional
	foreach my $arg (keys %$grm) {
		my ($type, $valid_func) = @{$grm->{$arg}};

		if( $params{$arg} ) {
			if( my $err = &$valid_func($params{$arg}) ) {
				return (new HTTP::OAI::Error(code=>'badArgument',message=>"Bad argument ($arg): " . $err))
			}
		}
		if( $type == 1 && (!defined($params{$arg}) || $params{$arg} eq '') ) {
			return (new HTTP::OAI::Error(code=>'badArgument',message=>"Required argument '$arg' was undefined"));
		}
		delete $params{$arg};
	}

	if( %params ) {
		for(keys %params) {
			push @errors, new HTTP::OAI::Error(
				code=>'badArgument',
				message=>"'$_' is not a recognised argument for $verb"
			);
		}
		return @errors;
	} else {
		return ();
	}
}

sub granularity {
	my $date = shift;
	return 'year' if $date =~ /^\d{4}-\d{2}-\d{2}$/;
	return 'seconds' if $date =~ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/;
}

sub validate_date {
	my $date = shift;
	return "Date not in OAI format (yyyy-mm-dd or yyyy-mm-ddThh:mm:ssZ)" unless $date =~ /^(\d{4})-(\d{2})-(\d{2})(T\d{2}:\d{2}:\d{2}Z)?$/;
	my( $y, $m, $d ) = ($1,($2||1),($3||1));
	return "Month in date is not in range 1-12" if ($m < 1 || $m > 12);
	return "Day in date is not in range 1-31" if ($d < 1 || $d > 31);
	0;
}

sub validate_responseDate {
	return 
		shift =~ /^(\d{4})\-([01][0-9])\-([0-3][0-9])T([0-2][0-9]):([0-5][0-9]):([0-5][0-9])[\+\-]([0-2][0-9]):([0-5][0-9])$/ ?
		0 :
		"responseDate not in OAI format (yyyy-mm-ddThh:mm:dd:ss[+-]hh:mm)";
}

sub validate_setSpec {
	return 
		shift =~ /^([A-Za-z0-9])+(:[A-Za-z0-9]+)*$/ ?
		0 :
		"Set spec not in OAI format, must match ^([A-Za-z0-9])+(:[A-Za-z0-9]+)*\$";
}

sub validate_setSpec_2_0 {
	return
		shift =~ /^([A-Za-z0-9_!'\$\(\)\+\-\.\*])+(:[A-Za-z0-9_!'\$\(\)\+\-\.\*]+)*$/ ?
		0 :
		"Set spec not in OAI format, must match ([A-Za-z0-9_!'\\\$\(\\)\\+\\-\\.\\*])+(:[A-Za-z0-9_!'\\$\\(\\)\\+\\-\\.\\*]+)*";
}

sub validate_metadataPrefix {
	return
		shift =~ /^[\w]+$/ ?
		0 :
		"Metadata prefix not in OAI format, must match regexp ^[\\w]+\$";
}

# OAI 2 requires identifiers by valid URIs
# This doesn't check for invalid chars, merely <sheme>:<scheme-specific>
sub validate_identifier {
	return
		shift =~ /^[[:alpha:]][[:alnum:]\+\-\.]*:.+/ ?
		0 :
		"Identifier not in OAI format, must match regexp ^[[:alpha:]][[:alnum:]\\+\\-\\.]*:.+";
}

1;

__END__

=head1 NAME

HTTP::OAI::Repository - Documentation for building an OAI compliant repository using OAI-PERL

=head1 DESCRIPTION

Using the OAI-PERL library in a repository context requires the user to build the OAI responses to be sent to OAI harvesters.

=head1 SYNOPSIS 1

	use HTTP::OAI::Harvester;
	use HTTP::OAI::Metadata::OAI_DC;
	use XML::SAX::Writer;
	use XML::LibXML;

	# (all of these options _must_ be supplied to comply with the OAI protocol)
	# (protocolVersion and responseDate both have sensible defaults)
	my $r = new HTTP::OAI::Identify(
		baseURL=>'http://yourhost/cgi/oai',
		adminEmail=>'youremail@yourhost',
		repositoryName=>'agoodname',
		requestURL=>self_url()
	);

	# Include a description (an XML::LibXML Dom object)
	$r->description(new HTTP::OAI::Metadata(dom=>$dom));

	my $r = HTTP::OAI::Record->new(
		header=>HTTP::OAI::Header->new(
			identifier=>'oai:myrepo:10',
			datestamp=>'2004-10-01'
			),
		metadata=>HTTP::OAI::Metadata::OAI_DC->new(
			dc=>{title=>['Hello, World!'],description=>['My Record']}
			)
	);
	$r->about(HTTP::OAI::Metadata->new(dom=>$dom));

	my $writer = XML::SAX::Writer->new();
	$r->set_handler($writer);
	$r->generate;

=head1 Building an OAI compliant repository

The validation scripts included in this module provide the repository admin with a number of tools for helping with being OAI compliant, however they can not be exhaustive in themselves.

=head1 METHODS

=over 4

=item $r = HTTP::OAI::Repository::validate_request(%paramlist)

=item $r = HTTP::OAI::Repository::validate_request_2_0(%paramlist)

These functions, exported by the Repository module, validate an OAI request against the protocol requirements. Returns an L<HTTP::Response|HTTP::Response> object, with the code set to 200 if the request is well-formed, or an error code and the message set.

e.g:

	my $r = validate_request(%paramlist);

	print header(-status=>$r->code.' '.$r->message),
		$r->error_as_HTML;

Note that validate_request attempts to be as strict to the Protocol as possible.

=item $b = HTTP::OAI::Repository::validate_date($date)

=item $b = HTTP::OAI::Repository::validate_metadataPrefix($mdp)

=item $b = HTTP::OAI::Repository::validate_responseDate($date)

=item $b = HTTP::OAI::Repository::validate_setSpec($set)

These functions, exported by the Repository module, validate the given type of OAI data. Returns true if the given value is sane, false otherwise.

=back

=head1 EXAMPLE

See the bin/gateway.pl for an example implementation (it's actually for creating a static repository gateway, but you get the idea!).