The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package XML::GRDDL::Transformation::RDF_EASE::Functional;

use 5.008;
use base qw[Exporter];
use strict;

use CSS;
use CSS::Parse::PRDGrammar;
use XML::GRDDL::Transformation::RDF_EASE::Selector;
use Exporter;
use XML::LibXML;

our $VERSION = '0.004';

our @EXPORT_OK = qw(&rdfease_to_rdfa &parse_sheet &parse_value &bnode_for_element &rule_matches_node);
our %EXPORT_TAGS = (
		'standard' => [qw(&rdfease_to_rdfa)],
		'extended' => [qw(&rdfease_to_rdfa &parse_sheet &parse_value &bnode_for_element &rule_matches_node)]
	);

our $_RDFEASE_MatcherCacher   = {};
our $_RDFEASE_BlankNodes      = {};
our $_RDFEASE_BlankNode_Count = 0;
our $_RDFEASE_Protocols_Seen  = {};
	
sub rdfease_to_rdfa
{
	my $ease  = shift;
	my $html  = shift;
	my $asDOM = shift || 0;
	
	# Initialise shared variables
	$_RDFEASE_MatcherCacher   = {};
	$_RDFEASE_BlankNodes      = {};
	$_RDFEASE_BlankNode_Count = 0;
	$_RDFEASE_Protocols_Seen  = {};

	# Parse XHTML into DOM tree
	my $parser  = XML::LibXML->new();
	my $DOMTree = $parser->parse_string($html);
	
	# RDF-EASE Algorithm: step 2.
	# Generate a 'kwijibo' string
	my $kwijibo = 'RDFEASE';
	while ($html =~ /$kwijibo/i)
		{ $kwijibo = 'RDFEASE'.int(rand(900000)+100000); }
	
	# RDF-EASE Algorithm, steps 1, 3, 4 and 5.
	# Parse RDF-EASE into structure
	my $ParsedEASE = parse_sheet($ease);
	
	# RDF-EASE Algorithm: step 6 and a little of step 2.
	# Process tree
	process_tree($DOMTree, $ParsedEASE, $kwijibo);
	
	# RDF-EASE Algorithm: we don't do step 7, as we want to return RDFa.
	{} ;
	
	# If they requested the DOM representation, then return it
	return $DOMTree if $asDOM;
	
	# Otherwise, return the result as an XHTML string.
	return $DOMTree->documentElement->toString;
}

sub process_tree
{
	my $DOM     = shift;
	my $EASE    = shift;
	my $kwijibo = shift;
	
	process_element($DOM->documentElement, $DOM, $EASE, $kwijibo);

	foreach my $proto (keys %$_RDFEASE_Protocols_Seen)
	{
		$DOM->documentElement->setAttribute('xmlns:'.$kwijibo.$proto, $proto.':');
	}
}

sub process_element
{
	my $elem    = shift; # 'E'
	my $DOM     = shift;
	my $CSS     = shift;
	my $kwijibo = shift;

	# For each rule set rs in RuleList
	foreach my $rule_block (@{$CSS->{'data'}})
	{
		# If the selector of rule set rs does not match element E, move on to the
		# next rule set in RuleList
		next unless rule_matches_node($rule_block, $elem, $DOM);

		# Each property value pair (p, v) within rs should be handled as follows
		foreach my $rule (@{$rule_block->{'properties'}})
		{
			# Skip non "-rdf-" rules.
			my $prop;
			if ($rule->{'property'} =~ /^\-rdf\-(.*)$/i)
				{ $prop = lc($1); }
			else
				{ next; }

			my @vals = parse_value($rule->{'value'}, $CSS->{'prefixes'});

			if ($prop =~ /^(typeof|rel|rev|property|role)$/)
			{
				if (grep {/^reset$/i} @vals)
					{ $elem->setAttribute('x-rdf-'.$prop, undef); }
					
				my $new = $elem->getAttribute('x-rdf-'.$prop);
				$new .= ' ' if ($new);
				foreach my $v (@vals)
				{
					next if ($v eq 'reset');
					$_RDFEASE_Protocols_Seen->{$1} = 1
						if ($v =~ /^([^:]+)/);
					$new .= "$kwijibo$v ";
				}
				$new =~ s/ $//;
				$elem->setAttribute('x-rdf-'.$prop, $new);
			}
			elsif ($prop eq 'about')
			{
				my $v = $vals[0];

				if (lc($v) eq 'reset')
					{ $elem->removeAttribute('x-rdf-about'); }
				elsif (lc($v) eq 'document')
				{
					$elem->setAttribute('x-rdf-about', '')
						unless (defined $elem->getAttribute($prop));
				}
				elsif ($v =~ /^NEAR:\s+(.+)$/)
				{
					my @matched = $DOM->documentElement->findnodes(XML::GRDDL::Transformation::RDF_EASE::Selector::to_xpath($1));
					my $best_match;
					foreach my $matching_node (@matched)
					{
						if (substr($elem->nodePath, 0, length($matching_node->nodePath)) eq $matching_node->nodePath)
						{
							$best_match = $matching_node
								if ((!$best_match)
								||  (length($matching_node->nodePath) > length($best_match->nodePath)));
						}
					}
					if ($best_match)
					{
						$elem->setAttribute('x-rdf-about', '['.bnode_for_element($best_match, $kwijibo).']');
					}
				}
			}
			elsif ($prop eq 'content')
			{
				if ($rule->{'value'} =~ /^\s*attr\([\'\"]?(.+)[\'\"]?\)\s*$/i)
					{ $elem->setAttribute('x-rdf-content', $elem->getAttribute($1)); }
			}
			elsif ($prop eq 'datatype')
			{
				my $v = $vals[0];
				
				if (lc($v) eq 'reset')
					{ $elem->removeAttribute('x-rdf-datatype'); }
				elsif (lc($v) eq 'string')
					{ $elem->setAttribute('x-rdf-datatype', ''); }
				elsif ($v =~ /\:/)
				{
					$_RDFEASE_Protocols_Seen->{$1} = 1
						if ($v =~ /^([^:]+)/);
					$elem->setAttribute('x-rdf-datatype', "$kwijibo$v");
				}
			}
		}
	}
	
	foreach my $prop (qw(about content datatype))
	{
		if (defined $elem->getAttribute('x-rdf-'.$prop))
		{
			$elem->setAttribute($prop, $elem->getAttribute('x-rdf-'.$prop))
				if (!defined $elem->getAttribute($prop));
			$elem->removeAttribute('x-rdf-'.$prop);
		}
	}
	foreach my $prop (qw(typeof rel rev property role))
	{
		if ($elem->getAttribute('x-rdf-'.$prop))
		{
			if ($elem->getAttribute($prop))
			{
				$elem->setAttribute($prop,
					$elem->getAttribute($prop).' '.
					$elem->getAttribute('x-rdf-'.$prop));
			}
			else
			{
				$elem->setAttribute($prop,
					$elem->getAttribute('x-rdf-'.$prop));
			}
			$elem->removeAttribute('x-rdf-'.$prop);
		}
	}

	my $recurse = 1;
	if (length $elem->getAttribute('property'))
	{
		$recurse = 0 if (!defined $elem->getAttribute('datatype'));
		$recurse = 0 if ($elem->getAttribute('datatype') =~ /XMLLiteral\s*$/);
		$recurse = 1 if (defined $elem->getAttribute('content'));
	}
	
	if ($recurse)
	{
		foreach my $child ($elem->getChildrenByTagName('*'))
		{
			process_element($child, $DOM, $CSS, $kwijibo);
		}
	}
}

sub bnode_for_element
{
	my $elem     = shift;
	my $kwijibo  = shift;
	my $nodepath = $elem->nodePath;
	
	unless (defined $_RDFEASE_BlankNodes->{$nodepath})
	{
		$_RDFEASE_BlankNode_Count++;
		$_RDFEASE_BlankNodes->{$nodepath} = sprintf('%s_Node%s',
			$kwijibo, $_RDFEASE_BlankNode_Count);
	}

	return '_:'.$_RDFEASE_BlankNodes->{$nodepath};
}

sub rule_matches_node
{
	my $rule = shift;
	my $elem = shift;
	my $dom  = shift;
	
	my $rulepath = $rule->{'xpath'};
	my $elempath = $elem->nodePath;
	
	return $_RDFEASE_MatcherCacher->{'Answers'}->{$rulepath}->{$elempath}
		if defined $_RDFEASE_MatcherCacher->{'Answers'}->{$rulepath}->{$elempath};
		
	unless (defined $_RDFEASE_MatcherCacher->{'Lists'}->{$rulepath})
	{
		my $xpc = XML::LibXML::XPathContext->new;
		$xpc->registerNs(xhtml => 'http://www.w3.org/1999/xhtml');
		$_RDFEASE_MatcherCacher->{'Lists'}->{$rulepath} = $xpc->findnodes($rulepath, $dom);
	}
	
	my $rv = 0;
	foreach my $match ($_RDFEASE_MatcherCacher->{'Lists'}->{$rulepath}->get_nodelist)
	{
		if ($match->isSameNode($elem))
		{
			$rv++;
			last;
		}
	}
	
	#warn sprintf("%s %s %s\n", $rulepath, ($rv?'matches':'DOES NOT MATCH'), $elempath);
	
	$_RDFEASE_MatcherCacher->{'Answers'}->{$rulepath}->{$elempath} = $rv;
	
	return $rv;
}

sub parse_value
{
	my $vals = shift;
	my $pfxs = shift;
	my @rv;
	
	return @rv
		if ($vals =~ /^ \s* normal \s* $/i); 
	
	while (length $vals)
	{
		if ($vals =~ /^ \s* (reset|document|string) \s* (.*) $/x)
		{
			push @rv, $1;
			$vals = $2;
		}
		elsif ($vals =~ /^ \s* url\(\s*\'([^\']*)\'\s*\) \s* (.*) $/ix)
		{
			push @rv, $1;
			$vals = $2;
		}
		elsif ($vals =~ /^ \s* url\(\s*\"([^\"]*)\"\s*\) \s* (.*) $/ix)
		{
			push @rv, $1;
			$vals = $2;
		}
		elsif ($vals =~ /^ \s* url\(\s*([^\"\'\)]*)\s*\) \s* (.*) $/ix)
		{
			push @rv, $1;
			$vals = $2;
		}
		elsif ($vals =~ /^ \s* nearest\-ancestor\(\s*\'([^\']*)\'\s*\) \s* (.*) $/ix)
		{
			push @rv, "NEAR: $1";
			$vals = $2;
		}
		elsif ($vals =~ /^ \s* nearest\-ancestor\(\s*\"([^\"]*)\"\s*\) \s* (.*) $/ix)
		{
			push @rv, "NEAR: $1";
			$vals = $2;
		}
		elsif ($vals =~ /^ \s* nearest\-ancestor\(\s*([^\"\'\)]*)\s*\) \s* (.*) $/ix)
		{
			push @rv, "NEAR: $1";
			$vals = $2;
		}
		elsif ($vals =~ /^ \s* \'([^\'\:]*)\:([^\']*)\' \s* (.*) $/ix)
		{
			push @rv, $pfxs->{$1}.$2;
			$vals = $3;
		}
		elsif ($vals =~ /^ \s* \"([^\"\:]*)\:([^\"]*)\" \s* (.*) $/ix)
		{
			push @rv, $pfxs->{$1}.$2;
			$vals = $3;
		}
		elsif ($vals =~ /^ \s* ([^\"\'\:\s]*)\:([^\"\'\s]*) \s* (.*) $/ix)
		{
			push @rv, $pfxs->{$1}.$2;
			$vals = $3;
		}
		else
		{
			my @null;
			return @null;
		}
	}
	
	return @rv;
}

sub parse_sheet
{
	my $css = shift;

	my @data;
	my ($prefixes, $i) = ({
		'dc'    => 'http://purl.org/dc/terms/',
		'foaf'  => 'http://xmlns.com/foaf/0.1/',
		'owl'   => 'http://www.w3.org/2002/07/owl#',
		'rdf'   => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
		'rdfs'  => 'http://www.w3.org/2000/01/rdf-schema#',
		'sioc'  => 'http://rdfs.org/sioc/ns#',
		'skos'  => 'http://www.w3.org/2004/02/skos/core#',
		'xsd'   => 'http://www.w3.org/2001/XMLSchema#'
	}, 0);

	# Handle at-rules in advance, as CSS::Parse::Heavy doesn't support them.
	while ($css =~ /^\s*(\@\S+)\s+([^\;]+)\s*\;\s*(.*)$/s)
	{
		$css = $3;

		my $atrule = $1;
		my $stuff  = $2;
		
		if ($atrule =~ /^\@(prefix|namespace)$/i)
		{
			if ($stuff =~ /^\s*([A-Za-z0-9\._-]+)\s+(.+)\s*$/)
			{
				my $pfx = $1;
				my $uri = $2;
				
				if ($uri =~ /^url\((.*)\)$/)
					{ $uri = $1; }
				if ($uri =~ /^\"(.*)\"$/)
					{ $uri = $1; }
				elsif ($uri =~ /^\'(.*)\'$/)
					{ $uri = $1; }
					
				$prefixes->{ $pfx } = $uri;
			}
		}
	}

	# Patch CSS::Parse::Heavy because it doesn't support CSS properties that
	# start with a dash.
	$CSS::Parse::PRDGrammar::GRAMMAR =~ s#macro_nmstart:\s+/\[a-zA-Z\]/
	                                     #macro_nmstart:   /[a-zA-Z_-]/
	                                     #x;	

	# Actually parse the CSS, using CSS::Parse::Heavy.
	my $parser = CSS->new( { 'parser' => 'CSS::Parse::Heavy' } )->read_string($css);

	foreach my $block (@$parser)
	{
		foreach my $selector (@{ $block->{selectors} })
		{
			if ($selector->{name} eq '_')
			{
				foreach my $property (@{ $block->{properties} })
				{
					my $prefix = $property->{options}->{property};
					my $url    = $property->{options}->{value};
					
					$url = $1 if ($url =~ /url\([\'\"]?([^\'\"]+)[\'\"]?\)/i);
					$prefixes->{$prefix} = $url;
				}
				next;
			}
			
			my $x = {};
			foreach my $property (@{ $block->{properties} })
			{
				push @{ $x->{properties} }, $property->{options};
			}
			$x->{selector}    = $selector->{name};
			$x->{order}       = ++$i;
			$x->{tokens}      = XML::GRDDL::Transformation::RDF_EASE::Selector::get_tokens($x->{selector});
			$x->{specificity} = XML::GRDDL::Transformation::RDF_EASE::Selector::specificity(@{ $x->{tokens} });
			$x->{xpath}       = XML::GRDDL::Transformation::RDF_EASE::Selector::to_xpath(@{ $x->{tokens} });
			push @data, $x;
		}
	}
	
	my @sorted = sort css21_cascade_order @data;
	
	return {
		prefixes => $prefixes,
		data     => \@sorted
	};
}

sub css21_cascade_order
{
	return ($a->{order} <=> $b->{order})
		if $a->{specificity} == $b->{specificity};

	return ($a->{specificity} <=> $b->{specificity});
}

1;

__END__

=head1 NAME

XML::GRDDL::Transformation::RDF_EASE::Functional - stand-alone RDF-EASE module

=head1 DESCRIPTION

This module exports one function:

=over 4

=item C<< rdfease_to_rdfa( $css, $xhtml, $as_dom ) >>

Takes an RDF-EASE (CSS) transformation and an XHTML document (well-formed
string) and returns the resulting XHTML+RDFa document, which can then be
fed to L<RDF::RDFa::Parser>.

If $as_dom is true, returns an XML::LibXML::Document; otherwise, a string.

=back

=head1 SEE ALSO

L<XML::GRDDL>, L<XML::GRDDL::Transformation::RDF_EASE>.

L<RDF::RDFa::Parser>.

=head1 AUTHOR

Toby Inkster E<lt>tobyink@cpan.orgE<gt>.

=head1 COPYRIGHT AND LICENCE

Copyright 2008-2012 Toby Inkster

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=head1 DISCLAIMER OF WARRANTIES

THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.