lib/XML/Atom/OWL.pm - metacpan.org

package XML::Atom::OWL;

use 5.010;
use common::sense;

use Carp 1.00;
use DateTime 0;
use Encode 0 qw(encode_utf8);
use HTTP::Link::Parser 0.100;
use LWP::UserAgent 0;
use MIME::Base64 0 qw(decode_base64);
use RDF::Trine 0.135;
use Scalar::Util 0 qw(blessed);
use URI 1.30;
use URI::URL 0;
use XML::LibXML 1.70 qw(:all);

use constant AAIR_NS  => 'http://xmlns.notu.be/aair#';
use constant ATOM_NS  => 'http://www.w3.org/2005/Atom';
use constant AWOL_NS  => 'http://bblfish.net/work/atom-owl/2006-06-06/#';
use constant AS_NS    => 'http://activitystrea.ms/spec/1.0/';
use constant AX_NS    => 'http://buzzword.org.uk/rdf/atomix#';
use constant FH_NS    => 'http://purl.org/syndication/history/1.0';
use constant FOAF_NS  => 'http://xmlns.com/foaf/0.1/';
use constant IANA_NS  => 'http://www.iana.org/assignments/relation/';
use constant RDF_NS   => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
use constant RDF_TYPE => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
use constant THR_NS   => 'http://purl.org/syndication/thread/1.0';
use constant XSD_NS   => 'http://www.w3.org/2001/XMLSchema#';

our $VERSION = '0.103';

sub new
{
	my $class   = shift;
	my $content = shift;
	my $baseuri = shift;
	my $options = shift || undef;
	my $store   = shift || undef;
	my $domtree;
	
	unless (defined $content)
	{
		my $ua = LWP::UserAgent->new;
		$ua->agent(sprintf('%s/%s ', __PACKAGE__, $VERSION));
		$ua->default_header("Accept" => "application/atom+xml, application/xml;q=0.1, text/xml;q=0.1");
		my $response = $ua->get($baseuri);
		croak "HTTP response not successful\n"
			unless $response->is_success;
		croak "Non-Atom HTTP response\n"
			unless $response->content_type =~ m`^(text/xml)|(application/(atom\+xml|xml))$`;
		$content = $response->decoded_content;
	}

	if (blessed($content) and $content->isa('XML::LibXML::Document'))
	{
		($domtree, $content) = ($content, $content->toString);
	}
	else
	{
		my $xml_parser = XML::LibXML->new;
		$domtree = $xml_parser->parse_string($content);
	}

	$store = RDF::Trine::Store::DBI->temporary_store
		unless defined $store;

	my $self = bless {
		'content'   => $content,
		'baseuri'   => $baseuri,
		'options'   => $options,
		'DOM'       => $domtree,
		'sub'       => {},
		'RESULTS'   => RDF::Trine::Model->new($store),
		}, $class;

	return $self;
}

sub uri
{
	my $this  = shift;
	my $param = shift || '';
	my $opts  = shift || {};
	
	if ((ref $opts) =~ /^XML::LibXML/)
	{
		my $x = {'element' => $opts};
		$opts = $x;
	}
	
	if ($param =~ /^([a-z][a-z0-9\+\.\-]*)\:/i)
	{
		# seems to be an absolute URI, so can safely return "as is".
		return $param;
	}
	elsif ($opts->{'require-absolute'})
	{
		return undef;
	}
	
	my $base = $this->{baseuri};
	if ($opts->{'element'})
	{
		$base = $this->get_node_base($opts->{'element'});
	}
	
	my $url = url $param, $base;
	my $rv  = $url->abs->as_string;

	while ($rv =~ m!^(http://.*)(\.\./|\.)+(\.\.|\.)?$!i)
	{
		$rv = $1;
	}
	
	return $rv;
}

sub dom
{
	my $this = shift;
	return $this->{DOM};
}


sub graph
{
	my $this = shift;
	$this->consume;
	return $this->{RESULTS};
}

sub graphs
{
	my $this = shift;
	$this->consume;
	return { $this->{'baseuri'} => $this->{RESULTS} };
}

sub root_identifier
{
	my $self = shift;
	$self->consume;
	if ($self->{'root_identifier'} =~ /^_:(.*)/)
	{
		return RDF::Trine::Node::Blank->new($1);
	}
	else
	{
		return RDF::Trine::Node::Resource->new($self->{'root_identifier'});
	}
}

sub set_callbacks
# Set callback functions for handling RDF triples.
{
	my $this = shift;

	if ('HASH' eq ref $_[0])
	{
		$this->{'sub'} = $_[0];
	}
	elsif (defined $_[0])
	{
		die("What kind of callback hashref was that??\n");
	}
	else
	{
		$this->{'sub'} = undef;
	}
	
	return $this;
}

sub consume
{
	my $self = shift;

	return $self if $self->{'comsumed'};

	my $root = $self->dom->documentElement;
	
	if ($root->namespaceURI eq ATOM_NS and $root->localname eq 'feed')
	{
		$self->{'root_identifier'} = $self->consume_feed($root);
	}
	elsif ($root->namespaceURI eq ATOM_NS and $root->localname eq 'entry')
	{
		$self->{'root_identifier'} = $self->consume_entry($root);
	}
	
	$self->{'comsumed'}++;
	
	return $self;
}

sub consume_feed
{
	my $self = shift;
	my $feed = shift;
	my $skip_entries = shift || 0;
	
	# Feed
	my $feed_identifier = $self->bnode($feed);
	$self->rdf_triple($feed, $feed_identifier, RDF_TYPE, AWOL_NS.'Feed');

	# Common stuff
	$self->consume_feed_or_entry($feed, $feed_identifier);

	# fh:archive and fh:complete
	if ($feed->getChildrenByTagNameNS(FH_NS, 'archive'))
	{
		$self->rdf_triple($feed, $feed_identifier, RDF_TYPE, AX_NS.'ArchiveFeed');
	}
	my $complete = 0;
	if ($feed->getChildrenByTagNameNS(FH_NS, 'complete'))
	{
		$complete = 1;
		$self->rdf_triple($feed, $feed_identifier, RDF_TYPE, AX_NS.'CompleteFeed');
	}
	
	my $last_listid;

	# entry
	unless ($skip_entries)
	{
		my @elems = $feed->getChildrenByTagNameNS(ATOM_NS, 'entry');
		foreach my $e (@elems)
		{
			my $entry_identifier = $self->consume_entry($e);
			$self->rdf_triple($e, $feed_identifier, AWOL_NS.'entry', $entry_identifier);

			# If this feed is known to be complete, include an rdf:List
			# to assist in open-world reasoning.
			if ($complete)
			{
				my $listid = $self->bnode;
				if (defined $last_listid)
				{
					$self->rdf_triple($e, $last_listid, RDF_NS.'rest', $listid);
				}
				else
				{
					$self->rdf_triple($e, $feed_identifier, AX_NS.'entry-list', $listid);
				}
				$self->rdf_triple($e, $listid, RDF_TYPE, RDF_NS.'List');
				$self->rdf_triple($e, $listid, RDF_NS.'first', $entry_identifier);
				$last_listid = $listid;
			}
		}
	}
	if ($complete)
	{
		if (defined $last_listid)
		{
			$self->rdf_triple($feed, $last_listid, RDF_NS.'rest', RDF_NS.'nil');
		}
		else
		{
			$self->rdf_triple($feed, $feed_identifier, AX_NS.'entry-list', RDF_NS.'nil');
		}
	}
	
	# icon and logo
	foreach my $role (qw(icon logo))
	{
		my @elems = $feed->getChildrenByTagNameNS(ATOM_NS, $role);
		foreach my $e (@elems)
		{
			my $img = $self->uri($e->textContent, $e);
			$self->rdf_triple($e, $feed_identifier, AWOL_NS.$role, $img);
			$self->rdf_triple($e, $img, RDF_TYPE, FOAF_NS.'Image');
		}
	}

	# generator
	{
		my @elems = $feed->getChildrenByTagNameNS(ATOM_NS, 'generator');
		foreach my $e (@elems)
		{
			my $gen_identifier = $self->consume_generator($e);
			$self->rdf_triple($e, $feed_identifier, AWOL_NS.'generator', $gen_identifier);
		}
	}
	
	# subtitle
	{
		my @elems = $feed->getChildrenByTagNameNS(ATOM_NS, 'subtitle');
		foreach my $e (@elems)
		{
			my $content_identifier = $self->consume_textconstruct($e);
			$self->rdf_triple($e, $feed_identifier, AWOL_NS.'subtitle', $content_identifier);
		}
	}

	return $feed_identifier;
}

sub consume_entry
{
	my $self  = shift;
	my $entry = shift;
	
	# Entry
	my $entry_identifier = $self->bnode($entry);
	$self->rdf_triple($entry, $entry_identifier, RDF_TYPE, AWOL_NS.'Entry');

	# Common stuff
	$self->consume_feed_or_entry($entry, $entry_identifier);
	
	# published
	{
		my @elems = $entry->getChildrenByTagNameNS(ATOM_NS, 'published');
		foreach my $e (@elems)
		{
			$self->rdf_triple_literal($e, $entry_identifier, AWOL_NS.'published', $e->textContent, XSD_NS.'dateTime');
		}
	}

	# summary
	{
		my @elems = $entry->getChildrenByTagNameNS(ATOM_NS, 'content');
		foreach my $e (@elems)
		{
			my $content_identifier = $self->consume_content($e);
			$self->rdf_triple($e, $entry_identifier, AWOL_NS.'content', $content_identifier);
		}
	}
	
	# source
	{
		my @elems = $entry->getChildrenByTagNameNS(ATOM_NS, 'source');
		foreach my $e (@elems)
		{
			my $feed_identifier = $self->consume_feed($e, 1);
			$self->rdf_triple($e, $entry_identifier, AWOL_NS.'source', $feed_identifier);
		}
	}

	# summary
	{
		my @elems = $entry->getChildrenByTagNameNS(ATOM_NS, 'summary');
		foreach my $e (@elems)
		{
			my $content_identifier = $self->consume_textconstruct($e);
			$self->rdf_triple($e, $entry_identifier, AWOL_NS.'summary', $content_identifier);
		}
	}

	# thr:in-reply-to
	{
		my @elems = $entry->getChildrenByTagNameNS(THR_NS, 'in-reply-to');
		foreach my $e (@elems)
		{
			my $irt_id = $self->consume_inreplyto($e);
			$self->rdf_triple($e, $entry_identifier, AX_NS.'in-reply-to', $irt_id);
		}
	}

	# thr:total
	{
		my @elems = $entry->getChildrenByTagNameNS(THR_NS, 'total');
		foreach my $e (@elems)
		{
			my $total = $e->textContent;
			$self->rdf_triple_literal($e, $entry_identifier, AX_NS.'total', $total, XSD_NS.'integer');
		}
	}
	
	return $entry_identifier;
}

sub consume_feed_or_entry
{
	my $self = shift;
	my $fore = shift;
	my $id   = shift;
	
	my @elems = $fore->getChildrenByTagNameNS(ATOM_NS, 'id');
	foreach my $e (@elems)
	{
		my $_id = $self->uri($e->textContent, $e);
		$self->rdf_triple_literal($e, $id, AWOL_NS.'id', $_id, XSD_NS.'anyURI');
	}
	
	my $is_as = 0;
	
	# activitystreams:object, activitystreams:target
	foreach my $role (qw(object target))
	{
		my @elems = $fore->getChildrenByTagNameNS(AS_NS, $role);
		foreach my $e (@elems)
		{
			$is_as++;
			my $obj_id = $self->consume_entry($e, $id);
			$self->rdf_triple($e, $id, AAIR_NS.'activity'.ucfirst($role), $obj_id);
		}
	}

	# activitystreams:verb
	{
		my @elems = $fore->getChildrenByTagNameNS(AS_NS, 'verb');
		foreach my $e (@elems)
		{
			$is_as++;
			my $url = $e->textContent;
			$url =~ s/(^\s*)|(\s*$)//g;
			$url = url $url, 'http://activitystrea.ms/schema/1.0/';
			$self->rdf_triple($e, $id, AAIR_NS.'activityVerb', "$url");
		}
		if ($is_as && !@elems)
		{
			$self->rdf_triple($fore, $id, AAIR_NS.'activityVerb', "http://activitystrea.ms/schema/1.0/post");
		}
	}

	# activitystreams:object-type
	{
		my @elems = $fore->getChildrenByTagNameNS(AS_NS, 'object-type');
		foreach my $e (@elems)
		{
			my $url = $e->textContent;
			$url =~ s/(^\s*)|(\s*$)//g;
			$url = url $url, 'http://activitystrea.ms/schema/1.0/';
			$self->rdf_triple($e, $id, RDF_NS.'type', "$url");
		}
	}

	# authors and contributors
	foreach my $role (qw(author contributor))
	{
		my @elems = $fore->getChildrenByTagNameNS(ATOM_NS, $role);
		foreach my $e (@elems)
		{
			my $person_identifier = $self->consume_person($e);
			$self->rdf_triple($e, $id, AWOL_NS.$role, $person_identifier);
			
			if ($role eq 'author' and $is_as)
			{
				$self->rdf_triple($e, $person_identifier, RDF_NS.'type', AAIR_NS.'Actor');
				$self->rdf_triple($e, $id, AAIR_NS.'activityActor', $person_identifier);
			}
		}
	}

	# updated
	{
		my @elems = $fore->getChildrenByTagNameNS(ATOM_NS, 'updated');
		foreach my $e (@elems)
		{
			$self->rdf_triple_literal($e, $id, AWOL_NS.'updated', $e->textContent, XSD_NS.'dateTime');
		}
	}

	# link
	{
		my @elems = $fore->getChildrenByTagNameNS(ATOM_NS, 'link');
		foreach my $e (@elems)
		{
			my $link_identifier = $self->consume_link($e, $id);
			$self->rdf_triple($e, $id, AWOL_NS.'link', $link_identifier);
		}
	}

	# title and rights
	foreach my $role (qw(title rights))
	{
		my @elems = $fore->getChildrenByTagNameNS(ATOM_NS, $role);
		foreach my $e (@elems)
		{
			my $content_identifier = $self->consume_textconstruct($e);
			$self->rdf_triple($e, $id, AWOL_NS.$role, $content_identifier);
		}
	}
	
	# category
	{
		my @elems = $fore->getChildrenByTagNameNS(ATOM_NS, 'category');
		foreach my $e (@elems)
		{
			my $cat_identifier = $self->consume_category($e, $id);
			$self->rdf_triple($e, $id, AWOL_NS.'category', $cat_identifier);
		}
	}

	# Unknown Extensions!
	{
		my @elems = $fore->getChildrenByTagName('*');
		foreach my $e (@elems)
		{
			next if $e->namespaceURI eq ATOM_NS;
			next if $e->namespaceURI eq AS_NS;
			next if $e->namespaceURI eq FH_NS;
			next if $e->namespaceURI eq THR_NS;
			
			my $xml = $self->xmlify_inclusive($e);
			$self->rdf_triple_literal($e, $id, AX_NS.'extension-element', $xml, RDF_NS.'XMLLiteral');
		}
	}

	return $id;
}

sub consume_textconstruct
{
	my $self = shift;
	my $elem = shift;
	
	my $id = $self->bnode($elem);
	$self->rdf_triple($elem, $id, RDF_TYPE, AWOL_NS.'TextContent');
	
	my $lang = $self->get_node_lang($elem);
	
	if (lc $elem->getAttribute('type') eq 'xhtml')
	{
		my $cnt = $self->xmlify($elem, $lang);
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'xhtml', $cnt, RDF_NS.'XMLLiteral');
	}

	elsif (lc $elem->getAttribute('type') eq 'html')
	{
		my $cnt = $elem->textContent;
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'html', $cnt, undef, $lang);
	}

	else
	{
		my $cnt = $elem->textContent;
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'text', $cnt, undef, $lang);
	}
	
	return $id;
}

sub consume_content
{
	my $self = shift;
	my $elem = shift;
	
	my $id = $self->bnode($elem);
	$self->rdf_triple($elem, $id, RDF_TYPE, AWOL_NS.'Content');
	
	my $lang = $self->get_node_lang($elem);
	my $base = $self->get_node_base($elem);
	
	if ($elem->hasAttribute('src'))
	{
		my $link = $self->uri($elem->getAttribute('src'), $elem);
		$self->rdf_triple($elem, $id, AWOL_NS.'src', $link);
		
		if ($self->{'options'}->{'no_fetch_content_src'})
		{
			$self->rdf_triple_literal($elem, $id, AWOL_NS.'type', $elem->getAttribute('type'))
				if $elem->hasAttribute('type');
		}
		else
		{
			my $ua = LWP::UserAgent->new;
			$ua->agent(sprintf('%s/%s ', __PACKAGE__, $VERSION));
			if ($elem->hasAttribute('type'))
			{
				$ua->default_header("Accept" => $elem->getAttribute('type').", */*;q=0.1");
			}
			else
			{
				$ua->default_header("Accept" => "application/xhtml+xml, text/html, text/plain, */*;q=0.1");
			}
			my $response = $ua->get($self->uri($elem->getAttribute('src'), $elem));
			if ($response->is_success)
			{
				$self->rdf_triple_literal($elem, $id, AWOL_NS.'body', $response->decoded_content);
				
				if ($response->content_type)
					{ $self->rdf_triple_literal($elem, $id, AWOL_NS.'type', $response->content_type); }
				elsif ($elem->hasAttribute('type'))
					{ $self->rdf_triple_literal($elem, $id, AWOL_NS.'type', $elem->getAttribute('type')); }

				if ($response->content_language =~ /^\s*([a-z]{2,3})\b/i)
					{ $self->rdf_triple_literal($elem, $id, AWOL_NS.'lang', lc $1, XSD_NS.'language'); }

				if ($response->base)
					{ $self->rdf_triple($elem, $id, AWOL_NS.'base', $response->base); }
			}
			else
			{
				$self->rdf_triple_literal($elem, $id, AWOL_NS.'type', $elem->getAttribute('type'))
					if $elem->hasAttribute('type');
			}
		}
	}
	
	elsif (lc $elem->getAttribute('type') eq 'text' or !$elem->hasAttribute('type'))
	{
		my $cnt = $elem->textContent;
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'body', $cnt, undef, $lang);
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'type', 'text/plain');
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'lang', $lang, XSD_NS.'language') if $lang;
		$self->rdf_triple($elem, $id, AWOL_NS.'base', $base) if $base;
	}
	
	elsif (lc $elem->getAttribute('type') eq 'xhtml')
	{
		my $cnt = $self->xmlify($elem, $lang);
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'body', $cnt, RDF_NS.'XMLLiteral');
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'type', 'application/xhtml+xml');
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'lang', $lang, XSD_NS.'language') if $lang;
		$self->rdf_triple($elem, $id, AWOL_NS.'base', $base) if $base;
	}

	elsif (lc $elem->getAttribute('type') eq 'html')
	{
		my $cnt = $elem->textContent;
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'body', $cnt, undef, $lang);
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'type', 'text/html');
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'lang', $lang, XSD_NS.'language') if $lang;
		$self->rdf_triple($elem, $id, AWOL_NS.'base', $base) if $base;
	}

	elsif ($elem->getAttribute('type') =~ m'([\+\/]xml)$'i)
	{
		my $cnt = $self->xmlify($elem, $lang);
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'body', $cnt, RDF_NS.'XMLLiteral');
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'type', $elem->getAttribute('type'));
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'lang', $lang, XSD_NS.'language') if $lang;
		$self->rdf_triple($elem, $id, AWOL_NS.'base', $base) if $base;
	}
	
	elsif ($elem->getAttribute('type') =~ m'^text\/'i)
	{
		my $cnt = $elem->textContent;
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'body', $cnt, undef, $lang);
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'type', $elem->getAttribute('type'));
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'lang', $lang, XSD_NS.'language') if $lang;
		$self->rdf_triple($elem, $id, AWOL_NS.'base', $base) if $base;
	}
	
	elsif ($elem->hasAttribute('type'))
	{
		my $cnt = $elem->textContent;
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'body', decode_base64($cnt));
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'type', $elem->getAttribute('type'));
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'lang', $lang, XSD_NS.'language') if $lang;
		$self->rdf_triple($elem, $id, AWOL_NS.'base', $base) if $base;
	}

	return $id;
}

sub consume_person
{
	my $self   = shift;
	my $person = shift;
	
	# Person
	my $person_identifier = $self->bnode($person);
	$self->rdf_triple($person, $person_identifier, RDF_TYPE, AWOL_NS.'Person');
	
	# name
	{
		my @elems = $person->getChildrenByTagNameNS(ATOM_NS, 'name');
		foreach my $e (@elems)
		{
			$self->rdf_triple_literal($e, $person_identifier, AWOL_NS.'name', $e->textContent);
		}
	}

	# uri
	{
		my @elems = $person->getChildrenByTagNameNS(ATOM_NS, 'uri');
		foreach my $e (@elems)
		{
			my $link = $self->uri($e->textContent, $e);
			$self->rdf_triple($e, $person_identifier, AWOL_NS.'uri', $link);
		}
	}

	# email
	{
		my @elems = $person->getChildrenByTagNameNS(ATOM_NS, 'email');
		foreach my $e (@elems)
		{
			$self->rdf_triple($e, $person_identifier, AWOL_NS.'email', 'mailto:'.$e->textContent);
		}
	}

	return $person_identifier;
}

sub consume_generator
{
	my $self   = shift;
	my $elem   = shift;
	
	# Person
	my $identifier = $self->bnode($elem);
	$self->rdf_triple($elem, $identifier, RDF_TYPE, AWOL_NS.'Generator');
	
	# name
	{
		my $lang = $self->get_node_lang($elem);
		$self->rdf_triple_literal($elem, $identifier, AWOL_NS.'name', $elem->textContent, undef, $lang);
	}

	# uri
	if ($elem->hasAttribute('uri'))
	{
		my $link = $self->uri($elem->getAttribute('uri'), $elem);
		$self->rdf_triple($elem, $identifier, AWOL_NS.'uri', $link);
	}

	# version
	if ($elem->hasAttribute('uri'))
	{
		$self->rdf_triple($elem, $identifier, AWOL_NS.'version', $elem->getAttribute('version'));
	}

	return $identifier;
}

sub consume_inreplyto
{
	my $self    = shift;
	my $link    = shift;
	
	my $id = $self->bnode($link);
	$self->rdf_triple($link, $id, RDF_TYPE, AWOL_NS.'Entry');
	
	if ($link->hasAttribute('ref'))
	{
		$self->rdf_triple_literal($link, $id, AWOL_NS.'id', $link->getAttribute('ref'), XSD_NS.'anyURI');
	}
	
	if ($link->hasAttribute('href'))
	{
		my $href = $self->uri($link->getAttribute('href'), $link);
		$self->rdf_triple($link, $id, IANA_NS.'self', $href);
	}
	
	# TODO: "type".
	
	if ($link->hasAttribute('source'))
	{
		my $fid  = $self->bnode;
		my $href = $self->uri($link->getAttribute('href'), $link);
		$self->rdf_triple($link, $id, AWOL_NS.'source', $fid);
		$self->rdf_triple($link, $fid, RDF_TYPE, AWOL_NS.'Feed');
		$self->rdf_triple($link, $fid, IANA_NS.'self', $href);
	}
	
	return $id;
}

sub consume_link
{
	my $self    = shift;
	my $link    = shift;
	my $subject = shift || undef;
	
	# Link
	my $link_identifier = $self->bnode($link);
	$self->rdf_triple($link, $link_identifier, RDF_TYPE, AWOL_NS.'Link');

	# Destination
	my $destination_identifier = $self->bnode;
	$self->rdf_triple($link, $destination_identifier, RDF_TYPE, AWOL_NS.'Content');
	$self->rdf_triple($link, $link_identifier, AWOL_NS.'to', $destination_identifier);

	# rel
	{
		my $rel = HTTP::Link::Parser::relationship_uri(
			$link->hasAttribute('rel') ? $link->getAttribute('rel') : 'alternate');
		$self->rdf_triple($link, $link_identifier, AWOL_NS.'rel', $rel);
		
		if ($link->hasAttribute('href') and defined $subject)
		{
			my $href = $self->uri($link->getAttribute('href'), $link);
			$self->rdf_triple($link, $subject, $rel, $href);
		}
	}
	
	# href
	if ($link->hasAttribute('href'))
	{
		my $href = $self->uri($link->getAttribute('href'), $link);
		$self->rdf_triple($link, $destination_identifier, AWOL_NS.'src', $href);
	}

	# hreflang
	if ($link->hasAttribute('hreflang'))
	{
		my $hreflang = $link->getAttribute('hreflang');
		$self->rdf_triple_literal($link, $destination_identifier, AWOL_NS.'lang', $hreflang);
	}

	# length
	if ($link->hasAttribute('length'))
	{
		my $length = $link->getAttribute('length');
		$self->rdf_triple_literal($link, $destination_identifier, AWOL_NS.'length', $length, XSD_NS.'integer');
	}

	# type
	if ($link->hasAttribute('type'))
	{
		my $type = $link->getAttribute('type');
		$self->rdf_triple_literal($link, $destination_identifier, AWOL_NS.'type', $type);
	}

	# title: TODO - check this uses AWOL properly.
	if ($link->hasAttribute('title'))
	{
		my $lang  = $self->get_node_lang($link);
		my $title = $link->getAttribute('title');
		$self->rdf_triple_literal($link, $link_identifier, AWOL_NS.'title', $title, undef, $lang);
	}

	# thr:count
	if ($link->hasAttributeNS(THR_NS, 'count'))
	{
		my $count = $link->getAttributeNS(THR_NS, 'count');
		$self->rdf_triple_literal($link, $link_identifier, AX_NS.'count', $count, XSD_NS.'integer');
	}

	# thr:updated
	if ($link->hasAttributeNS(THR_NS, 'updated'))
	{
		my $u = $link->getAttributeNS(THR_NS, 'updated');
		$self->rdf_triple_literal($link, $link_identifier, AX_NS.'updated', $u, XSD_NS.'dateTime');
	}

	return $link_identifier;
}

sub consume_category
{
	my $self    = shift;
	my $elem    = shift;
	
	# Link
	my $id = $self->bnode($elem);
	$self->rdf_triple($elem, $id, RDF_TYPE, AWOL_NS.'Category');

	# term
	if ($elem->hasAttribute('term'))
	{
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'term', $elem->getAttribute('term'));
	}
	
	# label
	if ($elem->hasAttribute('label'))
	{
		my $lang = $self->get_node_lang($elem);
		$self->rdf_triple_literal($elem, $id, AWOL_NS.'label', $elem->getAttribute('label'), undef, $lang);
	}

	# scheme
	if ($elem->hasAttribute('scheme'))
	{
		my $link = $self->uri($elem->getAttribute('scheme'), $elem);
		$self->rdf_triple($elem, $id, AWOL_NS.'scheme', $link);
	}

	return $id;
}

sub xmlify
# Function only used internally.
{
	my $this = shift;
	my $dom  = shift;
	my $lang = shift;
	my $rv;

	$lang = $this->get_node_lang($dom)
		unless $lang;

	foreach my $kid ($dom->childNodes)
	{
		my $fakelang = 0;
		if (($kid->nodeType == XML_ELEMENT_NODE) && defined $lang)
		{
			unless ($kid->hasAttributeNS(XML_XML_NS, 'lang'))
			{
				$kid->setAttributeNS(XML_XML_NS, 'lang', $lang);
				$fakelang++;
			}
		}
		
		$rv .= $kid->toStringEC14N(1);
		
		if ($fakelang)
		{
			$kid->removeAttributeNS(XML_XML_NS, 'lang');
		}
	}
	
	return $rv;
}


sub xmlify_inclusive
# Function only used internally.
{
	my $this = shift;
	my $dom  = shift;
	my $lang = shift;
	my $rv;
	
	$lang = $this->get_node_lang($dom)
		unless $lang;
	
	my $fakelang = 0;
	if (($dom->nodeType == XML_ELEMENT_NODE) && defined $lang)
	{
		unless ($dom->hasAttributeNS(XML_XML_NS, 'lang'))
		{
			$dom->setAttributeNS(XML_XML_NS, 'lang', $lang);
			$fakelang++;
		}
	}
	
	$rv = $dom->toStringEC14N(1);
	
	if ($fakelang)
	{
		$dom->removeAttributeNS(XML_XML_NS, 'lang');
	}
	
	return $rv;
}

sub get_node_lang
{
	my $this = shift;
	my $node = shift;

	if ($node->hasAttributeNS(XML_XML_NS, 'lang'))
	{
		return valid_lang($node->getAttributeNS(XML_XML_NS, 'lang')) ?
			$node->getAttributeNS(XML_XML_NS, 'lang'):
			undef;
	}

	if ($node != $this->{'DOM'}->documentElement
	&&  defined $node->parentNode
	&&  $node->parentNode->nodeType == XML_ELEMENT_NODE)
	{
		return $this->get_node_lang($node->parentNode);
	}
	
	return undef;
}

sub get_node_base
{
	my $this = shift;
	my $node = shift;

	my @base;

	while (1)
	{
		push @base, $node->getAttributeNS(XML_XML_NS, 'base')
			if $node->hasAttributeNS(XML_XML_NS, 'base');
			
		$node = $node->parentNode;
		last unless blessed($node) && $node->isa('XML::LibXML::Element');
	}
	
	my $rv = url $this->uri; # document URI.
	
	while (my $b = pop @base)
	{
		$rv = url $b, $rv->abs->as_string;
	}
	
	return $rv->abs->as_string;
}

sub rdf_triple
# Function only used internally.
{
	my $this = shift;

	my $suppress_triple = 0;
	$suppress_triple = $this->{'sub'}->{'pretriple_resource'}($this, @_)
		if defined $this->{'sub'}->{'pretriple_resource'};
	return if $suppress_triple;
	
	my $element   = shift;  # A reference to the XML::LibXML element being parsed
	my $subject   = shift;  # Subject URI or bnode
	my $predicate = shift;  # Predicate URI
	my $object    = shift;  # Resource URI or bnode
	my $graph     = shift;  # Graph URI or bnode (if named graphs feature is enabled)

	# First make sure the object node type is ok.
	my $to;
	if ($object =~ m/^_:(.*)/)
	{
		$to = RDF::Trine::Node::Blank->new($1);
	}
	else
	{
		$to = RDF::Trine::Node::Resource->new($object);
	}

	# Run the common function
	return $this->rdf_triple_common($element, $subject, $predicate, $to, $graph);
}

sub rdf_triple_literal
# Function only used internally.
{
	my $this = shift;

	my $suppress_triple = 0;
	$suppress_triple = $this->{'sub'}->{'pretriple_literal'}($this, @_)
		if defined $this->{'sub'}->{'pretriple_literal'};
	return if $suppress_triple;

	my $element   = shift;  # A reference to the XML::LibXML element being parsed
	my $subject   = shift;  # Subject URI or bnode
	my $predicate = shift;  # Predicate URI
	my $object    = shift;  # Resource Literal
	my $datatype  = shift;  # Datatype URI (possibly undef or '')
	my $language  = shift;  # Language (possibly undef or '')
	my $graph     = shift;  # Graph URI or bnode (if named graphs feature is enabled)

	# Now we know there's a literal
	my $to;
	
	# Work around bad Unicode handling in RDF::Trine.
	$object = encode_utf8($object);

	if (defined $datatype)
	{
		if ($datatype eq 'http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral')
		{
			if ($this->{'options'}->{'use_rtnlx'})
			{
				eval
				{
					require RDF::Trine::Node::Literal::XML;
					$to = RDF::Trine::Node::Literal::XML->new($element->childNodes);
				};
			}
			
			if ( $@ || !defined $to)
			{
				my $orig = $RDF::Trine::Node::Literal::USE_XMLLITERALS;
				$RDF::Trine::Node::Literal::USE_XMLLITERALS = 0;
				$to = RDF::Trine::Node::Literal->new($object, undef, $datatype);
				$RDF::Trine::Node::Literal::USE_XMLLITERALS = $orig;
			}
		}
		else
		{
			$to = RDF::Trine::Node::Literal->new($object, undef, $datatype);
		}
	}
	else
	{
		$to = RDF::Trine::Node::Literal->new($object, $language, undef);
	}

	# Run the common function
	$this->rdf_triple_common($element, $subject, $predicate, $to, $graph);
}

sub rdf_triple_common
# Function only used internally.
{
	my $this      = shift;  # A reference to the Parser object
	my $element   = shift;  # A reference to the XML::LibXML element being parsed
	my $subject   = shift;  # Subject URI or bnode
	my $predicate = shift;  # Predicate URI
	my $to        = shift;  # RDF::Trine::Node Resource URI or bnode
	my $graph     = shift;  # Graph URI or bnode (if named graphs feature is enabled)

	# First, make sure subject and predicates are the right kind of nodes
	my $tp = RDF::Trine::Node::Resource->new($predicate);
	my $ts;
	if ($subject =~ m/^_:(.*)/)
	{
		$ts = RDF::Trine::Node::Blank->new($1);
	}
	else
	{
		$ts = RDF::Trine::Node::Resource->new($subject);
	}

	my $statement;

	# If we are configured for it, and graph name can be found, add it.
	if (ref($this->{'options'}->{'named_graphs'}) && ($graph))
	{
		$this->{Graphs}->{$graph}++;
		
		my $tg;
		if ($graph =~ m/^_:(.*)/)
		{
			$tg = RDF::Trine::Node::Blank->new($1);
		}
		else
		{
			$tg = RDF::Trine::Node::Resource->new($graph);
		}

		$statement = RDF::Trine::Statement::Quad->new($ts, $tp, $to, $tg);
	}
	else
	{
		$statement = RDF::Trine::Statement->new($ts, $tp, $to);
	}

	my $suppress_triple = 0;
	$suppress_triple = $this->{'sub'}->{'ontriple'}($this, $element, $statement)
		if ($this->{'sub'}->{'ontriple'});
	return if $suppress_triple;

	$this->{RESULTS}->add_statement($statement);
}

sub bnode
# Function only used internally.
{
	my $this    = shift;
	my $element = shift;
	
	if (defined $this->{'bnode_generator'})
	{
		return $this->{'bnode_generator'}->bnode($element);
	}
	
	return sprintf('_:AwolAutoNode%03d', $this->{bnodes}++);
}

sub valid_lang
{
	my $value_to_test = shift;

	return 1 if (defined $value_to_test) && ($value_to_test eq '');
	return 0 unless defined $value_to_test;
	
	# Regex for recognizing RFC 4646 well-formed tags
	# http://www.rfc-editor.org/rfc/rfc4646.txt
	# http://tools.ietf.org/html/draft-ietf-ltru-4646bis-21

	# The structure requires no forward references, so it reverses the order.
	# It uses Java/Perl syntax instead of the old ABNF
	# The uppercase comments are fragments copied from RFC 4646

	# Note: the tool requires that any real "=" or "#" or ";" in the regex be escaped.

	my $alpha      = '[a-z]';      # ALPHA
	my $digit      = '[0-9]';      # DIGIT
	my $alphanum   = '[a-z0-9]';   # ALPHA / DIGIT
	my $x          = 'x';          # private use singleton
	my $singleton  = '[a-wyz]';    # other singleton
	my $s          = '[_-]';       # separator -- lenient parsers will use [_-] -- strict will use [-]

	# Now do the components. The structure is slightly different to allow for capturing the right components.
	# The notation (?:....) is a non-capturing version of (...): so the "?:" can be deleted if someone doesn't care about capturing.

	my $language   = '([a-z]{2,8}) | ([a-z]{2,3} $s [a-z]{3})';
	
	# ABNF (2*3ALPHA) / 4ALPHA / 5*8ALPHA  --- note: because of how | works in regex, don't use $alpha{2,3} | $alpha{4,8} 
	# We don't have to have the general case of extlang, because there can be only one extlang (except for zh-min-nan).

	# Note: extlang invalid in Unicode language tags

	my $script = '[a-z]{4}' ;   # 4ALPHA 

	my $region = '(?: [a-z]{2}|[0-9]{3})' ;    # 2ALPHA / 3DIGIT

	my $variant    = '(?: [a-z0-9]{5,8} | [0-9] [a-z0-9]{3} )' ;  # 5*8alphanum / (DIGIT 3alphanum)

	my $extension  = '(?: [a-wyz] (?: [_-] [a-z0-9]{2,8} )+ )' ; # singleton 1*("-" (2*8alphanum))

	my $privateUse = '(?: x (?: [_-] [a-z0-9]{1,8} )+ )' ; # "x" 1*("-" (1*8alphanum))

	# Define certain grandfathered codes, since otherwise the regex is pretty useless.
	# Since these are limited, this is safe even later changes to the registry --
	# the only oddity is that it might change the type of the tag, and thus
	# the results from the capturing groups.
	# http://www.iana.org/assignments/language-subtag-registry
	# Note that these have to be compared case insensitively, requiring (?i) below.

	my $grandfathered  = '(?:
			  (en [_-] GB [_-] oed)
			| (i [_-] (?: ami | bnn | default | enochian | hak | klingon | lux | mingo | navajo | pwn | tao | tay | tsu ))
			| (no [_-] (?: bok | nyn ))
			| (sgn [_-] (?: BE [_-] (?: fr | nl) | CH [_-] de ))
			| (zh [_-] min [_-] nan)
			)';

	# old:         | zh $s (?: cmn (?: $s Hans | $s Hant )? | gan | min (?: $s nan)? | wuu | yue );
	# For well-formedness, we don't need the ones that would otherwise pass.
	# For validity, they need to be checked.

	# $grandfatheredWellFormed = (?:
	#         art $s lojban
	#     | cel $s gaulish
	#     | zh $s (?: guoyu | hakka | xiang )
	# );

	# Unicode locales: but we are shifting to a compatible form
	# $keyvalue = (?: $alphanum+ \= $alphanum+);
	# $keywords = ($keyvalue (?: \; $keyvalue)*);

	# We separate items that we want to capture as a single group

	my $variantList   = $variant . '(?:' . $s . $variant . ')*' ;     # special for multiples
	my $extensionList = $extension . '(?:' . $s . $extension . ')*' ; # special for multiples

	my $langtag = "
			($language)
			($s ( $script ) )?
			($s ( $region ) )?
			($s ( $variantList ) )?
			($s ( $extensionList ) )?
			($s ( $privateUse ) )?
			";

	# Here is the final breakdown, with capturing groups for each of these components
	# The variants, extensions, grandfathered, and private-use may have interior '-'
	
	my $r = ($value_to_test =~ 
		/^(
			($langtag)
		 | ($privateUse)
		 | ($grandfathered)
		 )$/xi);
	return $r;
}

'A man, a plan, a canal: Panama'; # E, r u true?

__END__

=head1 NAME

XML::Atom::OWL - parse an Atom file into RDF

=head1 SYNOPSIS

 use XML::Atom::OWL;
 
 $parser = XML::Atom::OWL->new($xml, $baseuri);
 $graph  = $parser->graph;

=head1 DESCRIPTION

This has a pretty similar interface to L<RDF::RDFa::Parser>.

=head2 Constructor

=over 4

=item C<< new($xml, $baseuri, \%options, $storage) >>

This method creates a new XML::Atom::OWL object and returns it.

The $xml variable may contain an XML (Atom) string, or an
L<XML::LibXML::Document> object. If a string, the document is parsed
using L<XML::LibXML>, which will throw an exception if it is not
well-formed. XML::Atom::OWL does not catch the exception.

The base URI is used to resolve relative URIs found in the document.

Currently only one option is defined, 'no_fetch_content_src', a boolean
indicating whether <content src> URLs should be automatically fetched
and added to the model as if inline content had been provided. They are
fetched by default, but it's pretty rare for feeds to include this attribute.

$storage is an RDF::Trine::Storage object. If undef, then a new
temporary store is created.

=back

=head2 Public Methods

=over 4

=item C<< uri >>

Returns the base URI of the document being parsed. This will usually be the
same as the base URI provided to the constructor.

Optionally it may be passed a parameter - an absolute or relative URI - in
which case it returns the same URI which it was passed as a parameter, but
as an absolute URI, resolved relative to the document's base URI.

This seems like two unrelated functions, but if you consider the consequence
of passing a relative URI consisting of a zero-length string, it in fact makes
sense.

=item C<< dom >>

Returns the parsed XML::LibXML::Document.

=item C<< graph >>

This method will return an RDF::Trine::Model object with all
statements of the full graph.

This method automatically calls C<consume>.

=item C<< root_identifier >>

Returns the blank node or URI for the root element of the Atom
document as an RDF::Trine::Node

Calls C<consume> automatically.

=item C<< set_callbacks(\%callbacks) >>

Set callback functions for the parser to call on certain events. These are only necessary if
you want to do something especially unusual.

  $p->set_callbacks({
    'pretriple_resource' => sub { ... } ,
    'pretriple_literal'  => sub { ... } ,
    'ontriple'           => undef ,
    });

For details of the callback functions, see the section CALLBACKS. C<set_callbacks> must
be used I<before> C<consume>. C<set_callbacks> itself returns a reference to the parser
object itself.

=item C<< consume >>

The document is parsed. Triples extracted from the document are passed
to the callbacks as each one is found; triples are made available in the
model returned by the C<graph> method.

This function returns the parser object itself, making it easy to
abbreviate several of XML::Atom::OWL's functions:

  my $iterator = XML::Atom::OWL->new(undef, $uri)
                 ->consume->graph->as_stream;

You probably only need to call this explicitly if you're using callbacks.

=back

=head1 CALLBACKS

Several callback functions are provided. These may be set using the C<set_callbacks> function,
which taskes a hashref of keys pointing to coderefs. The keys are named for the event to fire the
callback on.

=head2 pretriple_resource

This is called when a triple has been found, but before preparing the triple for
adding to the model. It is only called for triples with a non-literal object value.

The parameters passed to the callback function are:

=over 4

=item * A reference to the C<XML::Atom::OWL> object

=item * A reference to the C<XML::LibXML::Element> being parsed

=item * Subject URI or bnode (string)

=item * Predicate URI (string)

=item * Object URI or bnode (string)

=item * Graph URI or bnode (string or undef)

=back

The callback should return 1 to tell the parser to skip this triple (not add it to
the graph); return 0 otherwise.

=head2 pretriple_literal

This is the equivalent of pretriple_resource, but is only called for triples with a
literal object value.

The parameters passed to the callback function are:

=over 4

=item * A reference to the C<XML::Atom::OWL> object

=item * A reference to the C<XML::LibXML::Element> being parsed

=item * Subject URI or bnode (string)

=item * Predicate URI (string)

=item * Object literal (string)

=item * Datatype URI (string or undef)

=item * Language (string or undef)

=item * Graph URI or bnode (string or undef)

=back

Beware: sometimes both a datatype I<and> a language will be passed. 
This goes beyond the normal RDF data model.)

The callback should return 1 to tell the parser to skip this triple (not add it to
the graph); return 0 otherwise.

=head2 ontriple

This is called once a triple is ready to be added to the graph. (After the pretriple
callbacks.) The parameters passed to the callback function are:

=over 4

=item * A reference to the C<XML::Atom::OWL> object

=item * A reference to the C<XML::LibXML::Element> being parsed

=item * An RDF::Trine::Statement object.

=back

The callback should return 1 to tell the parser to skip this triple (not add it to
the graph); return 0 otherwise. The callback may modify the RDF::Trine::Statement
object.

=head1 BUGS

Please report any bugs to L<http://rt.cpan.org/>.

=head1 SEE ALSO

L<RDF::Trine>, L<XML::Atom::FromOWL>.

L<http://www.perlrdf.org/>.

=head1 AUTHOR

Toby Inkster E<lt>tobyink@cpan.orgE<gt>.

=head1 COPYRIGHT AND LICENCE

Copyright 2010-2011 Toby Inkster

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=head1 DISCLAIMER OF WARRANTIES

THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)