The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package RDF::RDB2RDF::R2RML;

use 5.010;
use strict;
use utf8;

use Digest::MD5 qw[md5_hex];
use RDF::Trine qw[statement blank literal];
use RDF::Trine::Namespace qw[rdf rdfs owl xsd];
use Scalar::Util qw[blessed];
use Storable qw[dclone];

our $rr = RDF::Trine::Namespace->new('http://www.w3.org/ns/r2rml#');

use namespace::clean;
use base qw[
	RDF::RDB2RDF::Simple
];

our $AUTHORITY = 'cpan:TOBYINK';
our $VERSION   = '0.007';

sub new
{
	my ($class, $r2rml) = @_;
	my $self = $class->SUPER::new();
	$self->_r2rml($r2rml);
	return $self;
}

sub process_turtle
{
	my ($self, $dbh, %options) = @_;
	my $rv = $self->SUPER::process_turtle($dbh, %options);
	
	unless ($options{no_r2rml})
	{
		my $r2rml = RDF::Trine::Serializer
			->new('Turtle', namespaces => { $self->namespaces })
			->serialize_model_to_string($self->{r2rml});
		$r2rml =~ s/^/# /gm;
		$rv = "# R2RML\n#\n${r2rml}\n${rv}";
	}
}

sub _r2rml
{
	my ($self, $r2rml) = @_;
	
	unless (blessed($r2rml) and $r2rml->isa('RDF::Trine::Model'))
	{
		$self->{namespaces} = RDF::Trine::NamespaceMap->new;
		my $parser = RDF::Trine::Parser->new('Turtle', namespaces=>$self->{namespaces});
		my $model  = RDF::Trine::Model->temporary_model;
		$parser->parse_into_model('http://example.com/', $r2rml, $model);
		$r2rml = $model;
	}
	
	my @TMC = values %{ {
			map { $_->as_ntriples => $_ }
			(
				$r2rml->subjects($rdf->type, $rr->TriplesMap),
				$r2rml->subjects($rdf->type, $rr->TriplesMapClass),
				$r2rml->subjects($rr->subjectMap, undef),
			)
		} };
		
	foreach my $tmc (@TMC)
	{
		$self->_r2rml_TriplesMapClass($r2rml, $tmc);
	}
	
	$self->{r2rml} = $r2rml;
}

sub _r2rml_TriplesMapClass
{
	my ($self, $r2rml, $tmc) = @_;
	my $mapping = {};
		
	if ( $self->{tmc}{$tmc} )
	{
		return $self->{tmc}{$tmc};
	}
	
	my ($tablename, $sqlquery);
	foreach ($r2rml->objects_for_predicate_list($tmc, $rr->SQLQuery, $rr->sqlQuery))
	{
		next unless $_->is_literal;
		$sqlquery = $_->literal_value;
		last;
	}
	if ($sqlquery)
	{
		$tablename = sprintf('+q%s', md5_hex($sqlquery));
		$mapping->{sql} = $sqlquery;
	}
	else
	{
		foreach ($r2rml->objects($tmc, $rr->tableName))
		{
			next unless $_->is_literal;
			$tablename = $_->literal_value;
			last;
		}
		if ($tablename)
		{
			foreach ($r2rml->objects($tmc, $rr->tableOwner))
			{
				next unless $_->is_literal;
				$tablename = sprintf('%s.%s', $_->literal_value, $tablename);
				last;
			}
		}
	}
	
	unless ($tablename)
	{
		LOGICALTABLE: foreach my $lt ($r2rml->objects($tmc, $rr->logicalTable))
		{
			next LOGICALTABLE if $lt->is_literal;

			foreach ($r2rml->objects_for_predicate_list($lt, $rr->sqlQuery, $rr->SQLQuery))
			{
				next unless $_->is_literal;
				$sqlquery = $_->literal_value;
				last;
			}
			if ($sqlquery)
			{
				$tablename = sprintf('+q%s', md5_hex($sqlquery));
				$mapping->{sql} = $sqlquery;
				last LOGICALTABLE;
			}

			TABLENAME: foreach ($r2rml->objects($lt, $rr->tableName))
			{
				next TABLENAME unless $_->is_literal;
				$tablename = $_->literal_value;
				last TABLENAME;
			}
			if ($tablename)
			{
				TABLEOWNER: foreach ($r2rml->objects($tmc, $rr->tableOwner))
				{
					next TABLEOWNER unless $_->is_literal;
					$tablename = sprintf('%s.%s', $_->literal_value, $tablename);
					last TABLEOWNER ;
				}
				last LOGICALTABLE;
			}
		}
	}
	
	return unless $tablename;
	$self->{tmc}{$tmc} = $mapping;
	$mapping->{from} = $tablename unless defined $mapping->{sql};
	
	foreach ($r2rml->objects($tmc, $rr->subjectMap))
	{
		next if $_->is_literal;
		$self->_r2rml_SubjectMapClass($r2rml, $_, $mapping);
		last;
	}
	
	unless ($mapping->{about})
	{
		($mapping->{about}) = grep { !$_->is_literal } $r2rml->objects_for_predicate_list($tmc, $rr->subject);
	}

	foreach ($r2rml->objects($tmc, $rr->predicateObjectMap))
	{
		next if $_->is_literal;
		$self->_r2rml_PredicateObjectMapClass($r2rml, $_, $mapping);
	}	

	my $key = $tablename;
	while (defined $self->{mappings}{$key})
	{
		$key = sprintf('+t%s', md5_hex($key));
	}
	
	$self->{mappings}{$key} = $mapping;
	
	return $mapping;
}

sub _r2rml_graph
{
	my ($self, $r2rml, $thing) = @_;
	
	my ($graph) =
		map { $_->is_resource ? $_->uri : $_->as_ntriples }
		grep { !$_->is_literal }
		$r2rml->objects($thing, $rr->graph);
	return $graph if $graph;

	foreach my $map ($r2rml->objects($thing, $rr->graphMap))
	{
		($graph) =
			map { $_->is_resource ? $_->uri : $_->as_ntriples }
			grep { !$_->is_literal }
			$r2rml->objects_for_predicate_list($thing, $rr->constant, $rr->graph);
		return $graph if $graph;
		
		($graph) =
			map { sprintf('{%s}', $_->literal_value) }
			grep { $_->is_literal }
			$r2rml->objects($thing, $rr->column);
		return $graph if $graph;

		($graph) =
			map { $_->literal_value }
			grep { $_->is_literal }
			$r2rml->objects($thing, $rr->template);
		return $graph if $graph;
	}
	
	return;
}

sub _r2rml_SubjectMapClass
{
	my ($self, $r2rml, $smc, $mapping) = @_;
	
	# the easy bit
	$mapping->{typeof} = [ grep { !$_->is_literal } $r2rml->objects($smc, $rr->class) ];
	
	# graph
	$mapping->{graph} = $self->_r2rml_graph($r2rml, $smc);

	# subject
	($mapping->{about}) =
		map { $_->is_resource ? $_->uri : $_->as_ntriples }
		grep { !$_->is_literal }
		$r2rml->objects_for_predicate_list($smc, $rr->constant, $rr->subject);
	unless ($mapping->{about})
	{
		my ($col) = grep { $_->is_literal } $r2rml->objects($smc, $rr->column);
		$mapping->{about} = sprintf('{%s}', $col->literal_value) if $col;
		$mapping->{_about_is_column} = 1 if $col;
	}
	unless ($mapping->{about})
	{
		my ($tmpl) = grep { $_->is_literal } $r2rml->objects($smc, $rr->template);
		$mapping->{about} = $tmpl->literal_value if $tmpl;
		$mapping->{_about_is_template} = 1 if $tmpl;
	}
	
	# termtype
	my ($termtype) =
		map {
				if ($_->as_ntriples =~ /(uri|iri|blank|blanknode|literal).?$/i)
					{ { uri=>'IRI', iri=>'IRI', blank=>'BlankNode', blanknode=>'BlankNode', literal=>'Literal' }->{lc $1} }
				else
					{ $_->as_ntriples }
			}
		$r2rml->objects_for_predicate_list($smc, $rr->termType, $rr->termtype);
	$termtype //= '';
	
	if ($mapping->{about} and $termtype =~ /^blank/i)
	{
		$mapping->{about} = sprintf('_:%s', $mapping->{about})
			unless $mapping->{about} =~ /^_:/;
	}
}

sub _r2rml_PredicateObjectMapClass
{
	my ($self, $r2rml, $pomc, $mapping) = @_;
	
	# graph
	my $graph = $self->_r2rml_graph($r2rml, $pomc);

	# predicates
	my @predicates;
	foreach ($r2rml->objects($pomc, $rr->predicateMap))
	{
		next if $_->is_literal;
		push @predicates, $self->_r2rml_PredicateMapClass($r2rml, $_);
	}

	push @predicates,
		map { $_->uri } 
		grep { $_->is_resource }
		$r2rml->objects_for_predicate_list($pomc, $rr->predicate);

	# objects
	my @objects;
	foreach ($r2rml->objects($pomc, $rr->objectMap))
	{
		next if $_->is_literal;
		my $obj = $self->_r2rml_ObjectMapClass($r2rml, $_);
		push @objects, $obj if defined $obj;
	}

	push @objects,
		map {
				my $x = {};
				if ($_->is_literal)
				{
					$x->{content}  = $_->literal_value;
					$x->{lang}     = $_->literal_value_language;
					$x->{datatype} = $_->literal_datatype;
					$x->{kind}     = 'property';
				}
				elsif ($_->is_resource)
				{
					$x->{resource} = $_->uri;
					$x->{kind}     = 'rel';
				}
				elsif ($_->is_blank)
				{
					$x->{resource} = $_->as_ntriples;
					$x->{kind}     = 'rel';
				}
				$x;
			} 
		$r2rml->objects_for_predicate_list($pomc, $rr->object);

	foreach ($r2rml->objects($pomc, $rr->refObjectMap))
	{
		next if $_->is_literal;
		my $obj = $self->_r2rml_RefObjectMapClass($r2rml, $_);
		push @objects, $obj if defined $obj;
	}

	foreach my $obj (@objects)
	{
		foreach my $p (@predicates)
		{
			my $o = dclone($obj);
			my $column = delete $o->{column} || '_';
			my $kind   = delete $o->{kind}   || 'property';
			$o->{$kind} = $p;
			
			push @{ $mapping->{columns}{$column} }, $o;
		}
	}
}

sub _r2rml_PredicateMapClass
{
	my ($self, $r2rml, $pmc) = @_;
	
	my ($p) = map { $_->uri } grep { $_->is_resource } $r2rml->objects_for_predicate_list($pmc, $rr->constant, $rr->predicate);
	unless ($p)
	{
		my ($col) = grep { $_->is_literal } $r2rml->objects($pmc, $rr->column);
		$p = sprintf('{%s}', $col->literal_value) if $col;
	}
	unless ($p)
	{
		my ($tmpl) = grep { $_->is_literal } $r2rml->objects($pmc, $rr->template);
		$p = $tmpl->literal_value if $tmpl;
	}

	return ($p);
}

sub _r2rml_ObjectMapClass
{
	my ($self, $r2rml, $omc) = @_;
	
	my ($datatype, $language, $termtype, $column);
	my ($o) = map {
			if ($_->is_resource)   { $termtype = 'IRI'; $_->value; }
			elsif ($_->is_blank)   { $termtype = 'BlankNode'; $_->as_ntriples; }
			elsif ($_->is_literal) { $datatype = $_->literal_datatype; $language = $_->literal_value_language; $termtype = 'Literal'; $_->literal_value; }
			else                   { $_->as_ntriples; }
		}
		$r2rml->objects_for_predicate_list($omc, $rr->constant, $rr->object);
	unless (defined $o)
	{
		my ($col) = grep { $_->is_literal } $r2rml->objects($omc, $rr->column);
		$o        = sprintf('{%s}', $col->literal_value) if $col;
		$column   = $col->literal_value if $col;
	}
	unless (defined $o)
	{
		my ($tmpl) = grep { $_->is_literal } $r2rml->objects($omc, $rr->template);
		$o = $tmpl->literal_value if $tmpl;
	}

	($datatype) =
		map { $_->uri }
		grep { $_->is_resource }
		$r2rml->objects($omc, $rr->datatype)
		unless $datatype;
	($language) =
		map { $_->literal_value }
		grep {  $_->is_literal }
		$r2rml->objects($omc, $rr->language)
		unless $language;
	($termtype) =
		map {
				if ($_->as_ntriples =~ /(uri|iri|blank|blanknode|literal).?$/i)
					{ { uri=>'IRI', iri=>'IRI', blank=>'BlankNode', blanknode=>'BlankNode', literal=>'Literal' }->{lc $1} }
				else
					{ $_->as_ntriples }
			}
		$r2rml->objects_for_predicate_list($omc, $rr->termType, $rr->termtype)
		unless $termtype;
	
	$termtype ||= 'Literal' if $datatype || $language || defined $column;
	$termtype ||= 'IRI';
	
	$o = sprintf('_:%s', $o)
		if (!ref $o) && $termtype =~ /^blank/i && $o !~ /^_:/;
		
	my $map = {};
	
	if ($column)
	{
		#$column = $1 if $column =~ m{^"(.+)"$};
		$map->{column} = $column;
	}
	else
	{
		my $x = ($termtype =~ /literal/i) ? 'content' : 'resource';
		$map->{$x} = $o;
	}
	
	$map->{datatype} = $datatype if $datatype;
	$map->{lang}     = $language if $language;
	$map->{kind}     = ($termtype =~ /literal/i) ? 'property' : 'rel';

	return $map;
}

sub _r2rml_RefObjectMapClass
{
	my ($self, $r2rml, $romc) = @_;
		
	my $parent;
	PARENT: foreach my $ptm ($r2rml->objects($romc, $rr->parentTriplesMap))
	{
		next PARENT if $ptm->is_literal;
		$parent = $self->_r2rml_TriplesMapClass($r2rml, $ptm);
		last PARENT if $parent;
	}
	return unless $parent;
	
	my $joins = [];
	JOIN: foreach my $jc ($r2rml->objects($romc, $rr->joinCondition))
	{
		my ($p) = grep { $_->is_literal }
			$r2rml->objects($jc, $rr->parent);
		my ($c) = grep { $_->is_literal }
			$r2rml->objects($jc, $rr->child);
		
		if ($p && $c)
		{
			push @$joins, { parent => $p->literal_value, child => $c->literal_value };
		}
	}
	
	return {
		column   => '_',
		join     => $parent->{sql} || $parent->{from},
		on       => $joins,
		resource => $parent->{about},
		method   => $parent->{sql} ? 'subquery' : 'table',
		};
}

1;

__END__

=encoding utf8

=head1 NAME

RDF::RDB2RDF::R2RML - map relational database to RDF using R2RML

=head1 SYNOPSIS

 my $mapper = RDF::RDB2RDF->new('R2RML', $r2rml);
 print $mapper->process_turtle($dbh);

=head1 DESCRIPTION

This class offers support for W3C R2RML, based on the 29 May 2012 working
draft. See the COMPLIANCE AND COMPATIBILITY section below for a list on
unimplemented areas.

This is a subclass of RDF::RDB2RDF::Simple. Differences noted below...

=head2 Constructor

=over 

=item * C<< RDF::RDB2RDF::R2RML->new($r2rml) >>

=item * C<< RDF::RDB2RDF->new('R2RML', $r2rml) >>

A single parameter is expected, this can either be an R2RML document as a
Turtle string, or an L<RDF::Trine::Model> containing R2RML data. If a Turtle
string, then the namespaces from it are also kept.

=back

=head2 Methods

=over

=item * C<< process_turtle($dbh, %options) >>

The mapping is included as an R2RML comment at the top of the Turtle. Passing
C<< no_r2rml => 1 >> can disable that feature.

=back


=head1 COMPLIANCE AND COMPATIBILITY

This implementation should be mostly compliant with the Direct Mapping
specification, with the following provisos:

=over

=item * rr:RefObjectMap, rr:parentTriplesMap, rr:joinCondition,
rr:JoinCondition, rr:child, rr:parent are only partially working.

=item * rr:defaultGraph is not understood.

=back

Other quirks are database-specific:

=over

=item * This module expects DBI to return utf8 character strings. Depending
on your database engine, you might need to play games with DBI and
your database server to get this working. If you're using 7-bit safe
ASCII, then this probably doesn't concern you.

=item * Different databases support different SQL datatypes. This module
attempts to map them to their XSD equivalents, but may not recognise
some exotic ones.

=item * This module has only been extensively tested on SQLite 3.6.23.1
and PostgreSQL 8.4.4. I know of no reason it shouldn't work with other
relational database engines, provided they are supported by DBI, but as
with all things SQL, I wouldn't be surprised if there were one or two
problems. Patches welcome.

=back

=head1 BUGS

Please report any bugs to
L<http://rt.cpan.org/Dist/Display.html?Queue=RDF-RDB2RDF>.

=head1 SEE ALSO

L<RDF::Trine>, L<RDF::RDB2RDF>, L<RDF::RDB2RDF::Simple>.

L<http://www.perlrdf.org/>.

L<http://www.w3.org/TR/2012/WD-r2rml-20120529/>.

=head1 AUTHOR

Toby Inkster E<lt>tobyink@cpan.orgE<gt>.

=head1 COPYRIGHT

Copyright 2011-2012 Toby Inkster

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=head1 DISCLAIMER OF WARRANTIES

THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.