package SemanticWeb::OAI::ORE::Model;
#$Id: Model.pm,v 1.16 2010-12-06 14:44:15 simeon Exp $

=head1 NAME

SemanticWeb::OAI::ORE::Model - Module for model component of an OAI-ORE Resource Map

=head1 SYNOPSIS

In essence, the model is simply a set of triples and we thus store them in
a triple store provided by L<RDF::Core::Model>, L<RDF::Core::Storage> etc..

=cut

use strict;
use warnings;
use Carp qw(croak carp);

use SemanticWeb::OAI::ORE::Constant qw(:all);
use SemanticWeb::OAI::ORE::N3;

use RDF::Core::Model;
use RDF::Core::Model::Serializer;
use RDF::Core::Storage;
use RDF::Core::Storage::Memory;
use RDF::Core::Resource;
use RDF::Core::Literal;
use RDF::Core::Statement;
use Class::Accessor;

use base qw(Class::Accessor RDF::Core::Model);
SemanticWeb::OAI::ORE::Model->mk_accessors(qw(die_level));

=head1 METHODS

=head2 CREATION AND MODIFICATION

=head3 SemanticWeb::OAI::ORE::Model->new(%args) or SemanticWeb::OAI::ORE::Model->new($rdf_model)

Create new relationships object as part of a resource map.

If supplied with a single argument that is a L<RDF::Core::Model> then that
object is blessed into this class an returned. Otherwise a new  
L<RDF::Core::Model> object is created and any %args are passed to the 
creator.

=cut

sub new {
  my $class=shift;
  my $self;
  if (ref($_[0]) and $_[0]->isa('RDF::Core::Model')) {
    $self=$_[0];
  } else {
    $self=RDF::Core::Model->new(Storage=>RDF::Core::Storage::Memory->new(),@_);
  }
  bless $self, $class;
  $self->die_level(FATAL);
  return($self);
}


=head3 $model->add($model_or_statement)

Add either another model object or a single statement to this $model.
Returns the number of statements added.

=cut

sub add {
  my $self=shift;
  my $count=0;
  foreach my $to_add (@_) {
    if ($to_add->isa('RDF::Core::Model')) {
      my $enum=$to_add->getStmts(undef,undef,undef);
      my $statement=$enum->getFirst();
      while ($statement) {
        $self->addStmt($statement);
        $count++;
        $statement=$enum->getNext();
      }
      $enum->close();
    } elsif ($to_add->isa('RDF::Core::Statement')) {
      $self->addStmt($to_add);
      $count++;
    } else {
      die "Don't know how to add a ".ref($to_add)." to the Model";
    }
  }
  return($count);
}


=head3 $model->add_rel_to_resource($subject,$predicate,$object)

Add relationship where the object is a resource (URI).

=cut

sub add_rel_to_resource {
  my $self=shift;
  my ($subject,$predicate,$object)=@_;
  $subject=RDF::Core::Resource->new($subject);
  $predicate=RDF::Core::Resource->new($predicate);
  $object=RDF::Core::Resource->new($object);
  $self->addStmt(RDF::Core::Statement->new($subject,$predicate,$object));
}


=head3 $model->add_rel_to_literal($subject,$predicate,$object)

Add relationship where the object is a literal.

=cut

sub add_rel_to_literal {
  my $self=shift;
  my ($subject,$predicate,$object)=@_;
  $subject=RDF::Core::Resource->new($subject);
  $predicate=RDF::Core::Resource->new($predicate);
  $object=RDF::Core::Literal->new($object);
  $self->addStmt(RDF::Core::Statement->new($subject,$predicate,$object));
}


=head3 $model->count()

Returns the number of statements or relationships.

=cut

sub count {
  my $self=shift;
  return($self->countStmts(undef,undef,undef));
}


=head3 $model->as_array()

Return an array reference to all triples each as a four element array with
[subject, predicate, object, object_is_literal] for each statement.

FIXME - should perhaps implement iterator or similar...

=cut

sub as_array {
  my $self=shift;
  my $enum=$self->getStmts(undef,undef,undef);
  my $statement=$enum->getFirst();
  my @triples=();
  while ($statement) {
    push(@triples,[$statement->getSubject()->getLabel(),
                   $statement->getPredicate()->getLabel(),
                   $statement->getObject()->getLabel(),
                   $statement->getObject()->isLiteral()]);
    $statement=$enum->getNext();
  }
  return(\@triples);
}


=head3 $model->objects_matching($subject,$predicate,$only)

Return an array of objects from triples where the subject and predicate 
are as specified. Will return an empty array if there are no matches.

If $only is not specified then the objects matching will be returned.

If $only is RESOURCE then only resource labels will be included, if 
LITERAL then only literal labels will be returned.

=cut

sub objects_matching {
  my $self=shift;
  my ($subject,$predicate,$only)=@_;

  if (not defined($subject)) {
    #empty list
    return([]);
  } elsif (not ref($subject)) {
    $subject=RDF::Core::Resource->new($subject);
  }
  if (not defined($predicate)) {
    #leave undef so we match any
  } elsif (not ref($predicate)) {
    $predicate=expand_qname($predicate);
    $predicate=RDF::Core::Resource->new($predicate);
  }

  my $enum=$self->getStmts($subject,$predicate,undef);
  my $statement=$enum->getFirst();
  my @matching=();
  while ($statement) {
    my $obj=$statement->getObject();
    if ($only) {
      if ($only==RESOURCE) {
        next if ($obj->isLiteral());
      } elsif ($only==LITERAL) {
        next if (not $obj->isLiteral());
      } 
      push(@matching,$obj->getLabel());
    } else {
      push(@matching,$obj);
    }
    $statement=$enum->getNext();
  }
  return(@matching);
}



=head3 $model->literal_matching($subject,$predicate)

Wrapper around objects_matching to get the first literal matching the
specified condition, else retursn undef. Ignores any other matches.

=cut

sub literal_matching {
  my $self=shift;
  my ($subject,$predicate)=@_;
  my @objects=$self->objects_matching($subject,$predicate,LITERAL);
  return(@objects ? $objects[0] : undef );
}



# Return URI or literal based on whether string looks like a URI
#
sub __uri_or_literal {
  my ($str)=@_;
  my $ul;
  if ($str=~/^[a-z]+:\S+$/) {
    $ul=RDF::Core::Resource->new($str);
  } else {
    $ul=RDF::Core::Literal->new($str);
  }
  return($ul);
}


=head2 VALIDATION

=head3 $model->check_model($uri_rem,$rem)

Take an RDF model of type RDF::Core::Model in $self and a Resource
Map URI $uri_rem. Attempt to parse/interpret it as a resource map. Will 
croak if parsing fails so usual call would be to wrap in an eval:

  eval {
    $model->check_model($uri_rem,$rem);
  };
  if ($@) {
    # oops
  }

If $rem is supplied then this is expected to be a SemanticWeb::OAI::ORE::ReM object
with methods uri(), aggregation(), creator() and 
timestamp_as_iso8601() which are used to set these values for easy reference.

The requirements are based mainly on the table given in 
L<http://www.openarchives.org/ore/1.0/datamodel#Constraints>.

=cut

sub check_model {
  my $self=shift;
  my ($uri_rem,$rem)=@_;

  my $resource_map=RDF::Core::Resource->new(RESOURCE_MAP);
  my $aggregation=RDF::Core::Resource->new(AGGREGATION);
  my $has_type=RDF::Core::Resource->new(HAS_TYPE);
  my $describes=RDF::Core::Resource->new(DESCRIBES);
  my $aggregates=RDF::Core::Resource->new(AGGREGATES);

  # First, work out what the Resource Map URI (URI-R) is
  {
    my $statement=undef;
    my $uri=undef;
    my $cnt=$self->countStmts(undef,$has_type,$resource_map);
    if ($cnt==0) {
      $self->err(FATAL,"No resource map node defined as such and not URI-R supplied") if (not defined $uri_rem);
      #if FATAL turned off or $uri_rem supplied then just assume $uri_rem as given
      $self->err(WARN,"Using supplied URI-R ($uri_rem) as resource map URI") if (defined $uri_rem);
      $uri=$uri_rem;
    } elsif ($cnt==1) {
      my $enum=$self->getStmts(undef,$has_type,$resource_map);
      $statement=$enum->getFirst;
      $enum->close();
      $uri=$statement->getSubject->getURI;
    } else {
      # more than one match, can't handle that yet so barf.
      # can probably work it out by looking for an AGGREGATES arc from the same Subject
      $self->err(FATAL,"Got $cnt candidates for resourceMap node");
      return(0); #if FATAL turned off
    }
    # Only get here if we found $statement and extracted $uri
    if (defined $rem) {
      $rem->uri($uri);
    }
    if (defined $uri_rem and $uri_rem ne $uri) {
      $self->err(WARN,"URI for ReM supplied ($uri_rem) but does not match that inside object ($uri)");
    }  
    $uri_rem=$uri;
  }

  # Second, work out what the Aggregation URI (URI-A) is. First look for a DESCRIBES
  # predicate, look for a node typed as an aggregation if that fails.
  my $uri_agg=undef;
  {
    my $statement=undef;
    my $rem_resource=RDF::Core::Resource->new($uri_rem);
    my $cnt=$self->countStmts($rem_resource,$describes,undef);
    if ($cnt==1) {
      my $enum=$self->getStmts($rem_resource,$describes,undef);
      $statement=$enum->getFirst();
      $enum->close();
      $uri_agg=$statement->getObject()->getURI();
    } elsif ($cnt==0) {
      # Any describes statement..    
      my $cnt=$self->countStmts(undef,$describes,undef);
      if ($cnt==1) {
        my $enum=$self->getStmts(undef,$describes,undef);
        $statement=$enum->getFirst();
        $enum->close();
        $uri_agg=$statement->getObject()->getURI();
      }
    }
    # If that did not work, try typed node
    if (not defined $uri_agg) {
      my $cnt=$self->countStmts(undef,$has_type,$aggregation);
      if ($cnt==0) {
        $self->err(FATAL,"Failed to find an Aggregation node!");
        return(0);
      } elsif ($cnt==1) {
        my $enum=$self->getStmts(undef,$has_type,$aggregation);
        $statement=$enum->getFirst();
        $enum->close();
        $uri_agg=$statement->getSubject()->getURI();
      } else {
        # more than one match, can't handle that yet so barf.
        $self->err(FATAL,"Got $cnt candidates for Aggregation node");
        return(0);
      }
    }
    # Only get here if we found $statement and extracted $uri_agg, record 
    # in model.
    if (defined $rem) {
      $rem->aggregation($uri_agg);
    }
  }

  # Now look for $uri_agg AGGREGATES <blah> statements and add to 
  # the list of aggregated resources
  my $uri_agg_resource=RDF::Core::Resource->new($uri_agg);
  {
    my $cnt=$self->countStmts($uri_agg_resource,$aggregates,undef);
    if ($cnt==0) {
      $self->err(WARN,"No resources aggregated by Aggregation $uri_agg. This is legal but perhaps not what is intended.");
    } else {
      carp "Found $cnt aggregated resources" if ($self->{debug});
    }
  }

  # Now look for essential metadata: creator and modified
  {
    if (scalar($self->creators($uri_rem))==0) {
      $self->err(FATAL,"Resource map must have at least one ".CREATOR);
      return(0);
    }
  }

  my $uri_rem_resource=RDF::Core::Resource->new($uri_rem);
  if (my $timestamp=$self->get_timestamp($uri_rem_resource,1)) {
    $rem->timestamp_as_iso8601(MODIFIED,$timestamp);
  } else {
    # Will have already thrown error
    return(0);
  }
  
  return(1);
}


=head3 $model->creators($uri)

Find all the CREATOR objects (resources or literals) for $uri.

=cut

sub creators {
  my $self=shift;
  my ($uri_rem)=@_;
  my $uri_rem_resource=RDF::Core::Resource->new($uri_rem);
  return($self->objects_matching($uri_rem_resource,CREATOR));
}


=head3 $model->get_timestamp($uri_rem,$throw_error)

Return timestamp literal associated with $uri_rem. There must be
just one otherwise nothing (error) will be returned.

=cut

sub get_timestamp {
  my $self=shift;
  my ($uri_rem,$throw_error)=@_;
  my @timestamps=$self->objects_matching($uri_rem,MODIFIED);
  if (scalar(@timestamps)!=1) {
    if ($throw_error) {
      $self->err(FATAL,"Resource map must have one and only one ".MODIFIED);
    }
    return();
  }
  my $timestamp=$timestamps[0];
  if (not $timestamp->isLiteral()) {
    if ($throw_error) {
      $self->err(FATAL,"Resource map timestamp must be a literal value");
    }
    return();
  }
  return($timestamp->getLabel());
}


=head3 $model->err($level,$msg)

Error handling. Will use similar error method of $self->{errobj} if
that is set. Otherwise handles here.

=cut

sub err {
  my $self=shift;
  if ($self->{errobj}) {
    return($self->{errobj}->err(@_));
  }
  my ($level,$msg)=@_;
  if ($level>=$self->die_level) {
    croak "ERROR: $msg";
  }
  $self->add_errstr($msg);
}


=head2 INTROSPECTION

These routines support examination of the model to pull out key reference
points and information such as the Resource Map URI or the Aggregation URI.

=head3 $model->find_rem

Attempt to find the Resource Map. Returns the appropriate Resource object 
if successful, nothing otherwise. 

=cut

sub find_rem {
  my $self=shift;

  my $rem=undef;

  my $resource_map=RDF::Core::Resource->new(RESOURCE_MAP);
  my $has_type=RDF::Core::Resource->new(HAS_TYPE);
  my $enum=$self->getStmts(undef,$has_type,$resource_map);
  if (my $statement=$enum->getFirst) {
    # If more than one match, recklessly pick the 'first'
    # FIXME - could look for one with describes link
    $rem=$statement->getSubject;
    $enum->close;
  } else {
    # None found from that test, try looking for something 
    # that DESCRIBES
    my $describes=RDF::Core::Resource->new(DESCRIBES);
    my $cnt=$self->countStmts(undef,$describes,undef);
    if ($cnt==1) {
      # Just one so we take it
      $enum=$self->getStmts(undef,$describes,undef);
      $rem=$enum->getFirst->getSubject;
      $enum->close;
    } else {
      # FIXME - look for one with other matches
    }
  }

  # Now have Resource in $rem if we found it
  return( $rem || () );
}


=head3 $model->find_rem_uri(%opts)

Wrapper around $model->find_rem that returns a URI on
success, nothing otherwise.

=cut

sub find_rem_uri {
  my $self=shift;
  my $agg=$self->find_rem(@_);
  return($agg ? $agg->getURI : () );
}


=head3 $model->find_aggregation(%opts)

Find the Aggregation in this Resource Map. Returns the appropriate 
Resource object if successful, nothing otherwise.

Valid options are:

 uri_rem -> Resurce Map URI,

=cut 

sub find_aggregation {
  my $self=shift;

  my $agg=undef;

  my $aggregation=RDF::Core::Resource->new(AGGREGATION);
  my $has_type=RDF::Core::Resource->new(HAS_TYPE);
  my $cnt=$self->countStmts(undef,$has_type,$aggregation);
  if ($cnt==1) {
    my $enum=$self->getStmts(undef,$has_type,$aggregation);
    $agg=$enum->getFirst->getSubject;
    $enum->close;
  } elsif ($cnt>1) {
    # FIXME - do something smarter than taking the first
    my $enum=$self->getStmts(undef,$has_type,$aggregation);
    $agg=$enum->getFirst->getSubject;
    $enum->close;
  } else { # ($cnt==0)
    # None found from that test, try looking for something 
    # that the rem DESCRIBES
    my $describes=RDF::Core::Resource->new(DESCRIBES);
    my $cnt=$self->countStmts(undef,$describes,undef);
    if ($cnt==1) {
      # Just one so we take it
      my $enum=$self->getStmts(undef,$describes,undef);
      $agg=$enum->getFirst->getObject;
      $enum->close;
    } else {
      # FIXME - look for one with other matches
    }
    # ???
  }

  return( $agg || () );
}


=head3 $model->find_aggregation_uri(%opts)

Wrapper around $model->find_aggregation that returns a URI on
success, nothing otherwise.

=cut

sub find_aggregation_uri {
  my $self=shift;
  my $agg=$self->find_aggregation(@_);
  return($agg ? $agg->getURI : () );
}


=head2 DATA DUMP 

These are low-level data dump methods. It is expected that normally
the methods provided via L<SemanticWeb::OAI::ORE::ReM>::serialize will be used.

=head3 $model->as_n3($unsorted)

Very simple dump of this object as N3. No prefixes are used and the triples
are sorted alphabetically by line unless $unsorted is set true (in which case 
the output will be essentially random).

See L<SemanticWeb::OAI::ORE::N3> for "pretty printing" methods.

=cut

sub as_n3 {
  my $self=shift;
  my ($unsorted)=@_;

  my @triples=();
  my $enum=$self->getStmts(undef,undef,undef);
  my $statement=$enum->getFirst();
  while ($statement) {
    my $subject='<'.$statement->getSubject()->getLabel().'>';
    my $predicate='<'.$statement->getPredicate()->getLabel().'>';
    my $obj=$statement->getObject();
    my $object=$obj->getLabel();
    if ($obj->isa('RDF::Core::Resource')) {
      $object='<'.$object.'>';
    } else {
      $object='"'.SemanticWeb::OAI::ORE::N3::_n3_escape($object).'"';
    }
    push(@triples,"$subject $predicate $object.\n");
    $statement=$enum->getNext();
  }

  my $str="# Dump of OAI-ORE Resource Map model as N3\n";
  if ($unsorted) {
    $str.=join('',@triples);
  } else {
    $str.=join('',sort(@triples));
  }
  return($str);
}


=head3 $model->as_rdfxml

Simple RDF XML dump, returns string. For more sophisticated output
see L<SemanticWeb::OAI::ORE::RDFXML>.

=cut

sub as_rdfxml {
  my $self=shift;
  my $xml = '';
  my $serializer = new RDF::Core::Model::Serializer(Model=>$self,
                                                    Output=>\$xml,
                                                    BaseURI => 'http://example.com/',
                                                   );
  $serializer->serialize;
  return($xml);
}


=head1 SEE ALSO

L<SemanticWeb::OAI::ORE::ReM>

=head1 AUTHOR

Simeon Warner

=head1 LICENSE AND COPYRIGHT

Copyright 2007-2010 Simeon Warner.

This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself. See L<perlartistic>.

=cut

1;