#-----------------------------------------------------------------
# MOBY::RDF::Ontologies::Cache::ServiceTypeCache
# Author: Edward Kawas <edward.kawas@gmail.com>,
# For copyright and disclaimer see below.
#
# $Id: ServiceTypeCache.pm,v 1.4 2008/09/02 13:12:33 kawas Exp $
#-----------------------------------------------------------------
package MOBY::RDF::Ontologies::Cache::ServiceTypeCache;
use XML::LibXML;
use RDF::Core::Model::Parser;
use RDF::Core::Storage::Memory;
use RDF::Core::Model;
use RDF::Core::Resource;
use RDF::Core::Literal;
use RDF::Core::Statement;
use RDF::Core::Model::Serializer;
use Fcntl ':flock';
use MOBY::RDF::Utils;
use MOBY::RDF::Ontologies::ServiceTypes;
use MOBY::RDF::Ontologies::Cache::CacheUtils;
use MOBY::Client::Central;
use SOAP::Lite;
use Data::Dumper;
use strict;
use vars qw /$VERSION/;
$VERSION = sprintf "%d.%02d", q$Revision: 1.4 $ =~ /: (\d+)\.(\d+)/;
=head1 NAME
MOBY::RDF::Ontologies::Cache::ServiceTypeCache - Module for creating a cache of service types for use when generating RDF
=head1 SYNOPSIS
use MOBY::RDF::Ontologies::Cache::ServiceTypeCache;
# required
my $cachedir = "/tmp/";
# optional - gets default values from MOBY::Client::Central
my $url = "http://moby.ucalgary.ca/moby/MOBY-Central.pl";
my $uri = "http://moby.ucalgary.ca/MOBY/Central";
my $x = MOBY::RDF::Ontologies::Cache::ServiceTypeCache->new(
endpoint => $url,
namespace => $uri,
cache => $cachedir,
);
# create the service type cache
$x->create_service_type_cache();
# update the cache
$x->update_service_type_cache();
# obtain the RDF in a thread safe manner
my $rdf = $x->get_rdf
=head1 DESCRIPTION
This module aids in the creation and maintainence of a service type cache for use in generating datatype RDF
=cut
=head1 AUTHORS
Edward Kawas (edward.kawas [at] gmail [dot] com)
=cut
#-----------------------------------------------------------------
=head1 SUBROUTINES
=cut
#-----------------------------------------------------------------
# new
#-----------------------------------------------------------------
=head2 new
Instantiate a ServiceTypeCache object.
Parameters:
* A Hash with keys:
-> endpoint => the BioMOBY registry endpoint to use <optional>
-> namespace => the BioMOBY registry namespace to use <optional>
-> cache => the directory to store the cache <REQUIRED>
This subroutine attempts to create the cache directories right away
and if any problems occur then an Exception is thrown.
=cut
sub new {
my ( $class, %args ) = @_;
# create an object
my $self = bless {}, ref($class) || $class;
# set various variables
$self->{endpoint} = $args{endpoint} if $args{endpoint};
$self->{namespace} = $args{namespace} if $args{namespace};
$self->{cachedir} = $args{cache} if $args{cache};
eval {
$self->{endpoint} = MOBY::Client::Central->new()->{default_MOBY_server};
} unless $args{endpoint};
# if the values arent set, set to default values
$self->{endpoint} = "http://moby.ucalgary.ca/moby/MOBY-Central.pl"
unless $self->{endpoint};
$self->{namespace} = "http://moby.ucalgary.ca/MOBY/Central"
unless $self->{namespace};
$self->{cachedir} = "/tmp/" unless $self->{cachedir};
$self->{utils} = MOBY::RDF::Ontologies::Cache::CacheUtils->new(
cache => $self->{cachedir},
endpoint => $self->{endpoint},
namespace => $self->{namespace}
);
# create the cache directory if necessary
$self->{utils}->create_cache_dirs unless $self->{utils}->cache_exists;
# done
return $self;
}
#-----------------------------------------------------------------
# create_service_type_cache
#-----------------------------------------------------------------
=head2 create_service_type_cache
Create the service type cache. This will over write any pre-existing
cache that it finds.
This method is not thread safe.
Throw an exception if any of the following occurs:
* A SOAP error as a result of calling the registry
* Problems writing to the cache directory
=cut
sub create_service_type_cache {
my ($self) = @_;
# 2 steps:
# -> create a LIST file
my $xml = $self->_create_list_file;
# 2-> foreach datatype store RDF for the authority
my $parser = XML::LibXML->new();
my $doc = $parser->parse_string($xml);
my %authorities_completed = ();
my $nodes = $doc->documentElement()->getChildrenByTagName('serviceType');
for ( 1 .. $nodes->size() ) {
my $name = $nodes->get_node($_)->getAttribute('name');
next if $authorities_completed{$name};
$authorities_completed{$name} = 1;
$xml = MOBY::RDF::Ontologies::ServiceTypes->new(
endpoint => $self->{utils}->_endpoint );
$xml = $xml->createByName( { term => $name });
my $file = File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE,
$name
);
open( FILE, ">$file" )
or die("Can't open file '$file' for writing: $!");
print FILE $xml;
close FILE;
}
}
#-----------------------------------------------------------------
# update_service_type_cache
#-----------------------------------------------------------------
=head2 update_service_type_cache
Update the service type cache. This will update any items that are 'old',
by relying on the LSID for the service type. This method is not thread safe.
This method returns the number of changed resources.
To update the cache with a thread safe method, call C<get_rdf>.
Throw an exception if any of the following occur:
* There is a SOAP error calling the registry
* There were read/write errors on the cache directory or its contents
=cut
sub update_service_type_cache {
my ($self) = @_;
my $wasOld = 0;
my %old_services = ();
my %new_services = ();
my %changed_services = ();
if (
!(
-e File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE
)
)
)
{
$self->create_service_type_cache;
return -1;
}
if (
!(
-e File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE,
$self->{utils}->LIST_FILE
)
)
)
{
warn( "service type LIST_FILE doesn't exist, so I created the cache from scratch!"
);
$self->create_service_type_cache;
return -1;
}
# steps:
# read in the LIST file and extract lsids for all datatypes
my $file = File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE,
$self->{utils}->LIST_FILE
);
my $parser = XML::LibXML->new();
my $doc;
eval {
$doc = $parser->parse_file($file);
};
warn "There was something wrong with '$file' and we couldn't parse it.\nWill attempt to create from scratch.\n" if $@;
$doc = $parser->parse_string($self->_create_list_file) if $@;
my $nodes = $doc->documentElement()->getChildrenByTagName('serviceType');
for ( 1 .. $nodes->size() ) {
my $name = $nodes->get_node($_)->getAttribute('name');
my $lsid = $nodes->get_node($_)->getAttribute('lsid');
$old_services{$name}{$lsid} = 1;
}
# get the new LIST file and extract lsids for all objects
my $soap =
SOAP::Lite->uri( $self->{utils}->_namespace )
->proxy( $self->{utils}->_endpoint )->on_fault(
sub {
my $soap = shift;
my $res = shift;
die( "There was a problem calling the registry: "
. $self->{utils}->_endpoint . "\@ "
. $self->{utils}->_namespace . ".\n"
. $res );
}
);
my $xml = $soap->retrieveServiceTypes()->result;
$parser = XML::LibXML->new();
$doc = $parser->parse_string($xml);
$nodes = $doc->documentElement()->getChildrenByTagName('serviceType');
for ( 1 .. $nodes->size() ) {
my $name = $nodes->get_node($_)->getAttribute('name');
my $lsid = $nodes->get_node($_)->getAttribute('lsid');
$new_services{$name}{$lsid} = 1;
}
# go through the keys of the new one and if the keys doesnt exist or has been modified, add to 'download' queue
foreach my $auth ( keys %new_services ) {
next if $changed_services{$auth};
foreach my $lsid ( keys %{ $new_services{$auth} } ) {
next unless !$old_services{$auth}{$lsid};
$changed_services{$auth} = 1;
}
}
# if their where changes, save new LIST file over the old one and get changes
if ( keys %changed_services ) {
# save new LIST file
open( FILE, ">$file" )
or die("Can't open file '$file' for writing: $!");
print FILE $xml;
close FILE;
# clear used values
$xml = undef;
$file = undef;
$parser = undef;
$doc = undef;
$nodes = undef;
foreach my $authURI ( keys %changed_services ) {
$wasOld++;
$xml = MOBY::RDF::Ontologies::ServiceTypes->new(
endpoint => $self->{utils}->_endpoint, );
$xml = $xml->createByName( { term => $authURI });
$file = File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE,
$authURI
);
open( FILE, ">$file" )
or die("Can't open file '$file' for writing: $!");
print FILE $xml;
close FILE;
}
}
# remove any old files that should not be cached
my $cachedir = File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE
);
eval {
my @files = $self->{utils}->plainfiles($cachedir);
foreach my $path (@files) {
my $filename = substr $path, length($cachedir)+1;
next if -d $filename;
next
if $filename eq $self->{utils}->RDF_FILE
or $filename eq $self->{utils}->LIST_FILE
or $filename eq $self->{utils}->UPDATE_FILE;
unlink($path) unless $new_services{$filename};
$wasOld++;
}
};
return $wasOld;
}
#-----------------------------------------------------------------
# get_rdf
# Return a cached copy of the RDF
#-----------------------------------------------------------------
=head2 get_rdf
Gets the cached copy of the RDF for all service types. This subroutine
is thread safe as it performs a flock on a Lock file in the
directory while performing operations.
Throw an exception if any of the following occur:
* There was a SOAP problem communicating with a registr
* There was a file read/write while performing cache related
activities
* There was a problem parsing XML
=cut
sub get_rdf {
my ($self) = @_;
my $xml = "";
my $lock = File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE,
$self->{utils}->UPDATE_FILE
);
my $file = File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE,
$self->{utils}->RDF_FILE
);
my $dir = File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE
);
open( LOCK, ">$lock" );
flock( LOCK, LOCK_EX );
eval {
# check if we need to re-merge the RDF
my $isStale = $self->update_service_type_cache;
if ( $isStale or !( -e $file ) ) {
my $providers = $self->_get_object_names;
# re-merge rdf
my $parser = XML::LibXML->new();
my $doc = undef;
opendir DIR, $dir
or die "Could not open directory for reading: $!\n";
# foreach authority, parse the rdf - add to a single document
foreach my $RDF ( readdir DIR ) {
next if -d $RDF;
next
if $RDF eq $self->{utils}->RDF_FILE
or $RDF eq $self->{utils}->LIST_FILE
or $RDF eq $self->{utils}->UPDATE_FILE;
#remove those authorities that dont have any objects
unlink(File::Spec->catfile( $dir, $RDF )) unless $providers->{$RDF};
do {
eval {
$doc =
$parser->parse_file(
File::Spec->catfile( $dir, $RDF ) );
};
warn $@ if $@;
next;
} unless $doc;
my $temp_doc = eval {
$parser->parse_file( File::Spec->catfile( $dir, $RDF ) );
};
warn $@ if $@;
next if $@;
foreach
# here
my $service (
$temp_doc->findnodes('/rdf:RDF/rdf:Description') )
{
$doc->documentElement->appendChild($service);
}
}
$xml = $doc->toString() if $doc;
$xml = new MOBY::RDF::Utils->empty_rdf unless $doc;
# save new RDF file
open( FILE, ">$file" )
or die("Can't open file '$file' for writing: $!");
print FILE $xml;
close FILE;
} else {
# send existing rdf
open( RDF_FILE, $file );
$xml = join "", <RDF_FILE>;
}
};
flock( LOCK, LOCK_UN );
close(LOCK);
die $@ if $@;
return $xml;
}
sub _get_object_names {
my ($self) = @_;
my $soap =
SOAP::Lite->uri( $self->{utils}->_namespace )
->proxy( $self->{utils}->_endpoint )->on_fault(
sub {
my $soap = shift;
my $res = shift;
die( "There was a problem calling the registry: "
. $self->{utils}->_endpoint . "\@ "
#. $self->{utils}->_namespace . ".\n"
. $res );
}
);
my $xml = $soap->retrieveServiceTypes()->result;
my %providers = ();
my $parser = XML::LibXML->new();
my $doc = $parser->parse_string($xml);
my $nodes = $doc->documentElement()->getChildrenByTagName('serviceType');
for ( 1 .. $nodes->size() ) {
my $name = $nodes->get_node($_)->getAttribute('name');
next if $providers{$name};
$providers{$name} = 1;
}
return \%providers;
}
# creates the list file and returns it as a string
sub _create_list_file {
my ($self) = @_;
my $soap =
SOAP::Lite->uri( $self->{utils}->_namespace )
->proxy( $self->{utils}->_endpoint )->on_fault(
sub {
my $soap = shift;
my $res = shift;
die( "There was a problem calling the registry: "
. $self->{utils}->_endpoint . "\@ "
. $self->{utils}->_namespace . ".\n"
. $res );
}
);
my $xml = $soap->retrieveServiceTypes()->result;
# create cache dirs as needed
$self->{utils}->create_cache_dirs;
my $file = File::Spec->catfile(
$self->{utils}->cachedir,
$self->{utils}->_clean( $self->{utils}->_endpoint ),
$self->{utils}->SERVICETYPES_CACHE,
$self->{utils}->LIST_FILE
);
open( FILE, ">$file" )
or die("Can't open file '$file' for writing: $!");
print FILE $xml;
close FILE;
return $xml;
}
1;
__END__