The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# RDF::Trine::Parser::NTriples
# -----------------------------------------------------------------------------

=head1 NAME

RDF::Trine::Parser::NTriples - N-Triples Parser

=head1 VERSION

This document describes RDF::Trine::Parser::NTriples version 1.018

=head1 SYNOPSIS

 use RDF::Trine::Parser;
 my $parser	= RDF::Trine::Parser->new( 'ntriples' );
 $parser->parse_into_model( $base_uri, $data, $model );

=head1 DESCRIPTION

...

=head1 METHODS

Beyond the methods documented below, this class inherits methods from the
L<RDF::Trine::Parser> class.

=over 4

=cut

package RDF::Trine::Parser::NTriples;

use strict;
use warnings;
use utf8;

use base qw(RDF::Trine::Parser);

use Carp;
use Encode qw(decode);
use Data::Dumper;
use Log::Log4perl;
use Scalar::Util qw(blessed reftype);

use RDF::Trine qw(literal);
use RDF::Trine::Node;
use RDF::Trine::Statement;
use RDF::Trine::Error qw(:try);

######################################################################

our ($VERSION);
BEGIN {
	$VERSION	= '1.018';
	$RDF::Trine::Parser::parser_names{ 'ntriples' }	= __PACKAGE__;
	foreach my $ext (qw(nt)) {
		$RDF::Trine::Parser::file_extensions{ $ext }	= __PACKAGE__;
	}
	my $class										= __PACKAGE__;
	$RDF::Trine::Parser::canonical_media_types{ $class }	= 'text/plain';
	foreach my $type (qw(text/plain)) {
		$RDF::Trine::Parser::media_types{ $type }	= __PACKAGE__;
	}
	$RDF::Trine::Parser::format_uris{ 'http://www.w3.org/ns/formats/N-Triples' }	= __PACKAGE__;
}

######################################################################

=item C<< new >>

=cut

sub new {
	my $class	= shift;
	my $self = bless( {@_}, $class);
	return $self;
}

=item C<< parse_into_model ( $base_uri, $data, $model [, context => $context] ) >>

Parses the bytes in C<< $data >>.
For each RDF statement parsed, will call C<< $model->add_statement( $statement ) >>.

=item C<< parse_file_into_model ( $base_uri, $fh, $model [, context => $context] ) >>

Parses all data read from the filehandle C<< $fh >>.
For each RDF statement parsed, will call C<< $model->add_statement( $statement ) >>.

=cut


=item C<< parse ( $base_uri, $rdf, \&handler ) >>

=cut

sub parse {
	my $self	= shift;
	my $base	= shift;
	my $string	= shift;
	my $handler	= shift;
	open( my $fh, '<:encoding(UTF-8)', \$string );
	return $self->parse_file( $base, $fh, $handler );
}

=item C<< parse_node ( $string, $base ) >>

Returns the RDF::Trine::Node object corresponding to the node whose N-Triples
serialization is found at the beginning of C<< $string >>.

=cut

sub parse_node {
	my $self	= shift;
	my $string	= shift;
	my $uri		= shift;
	my $n		= $self->_eat_node( $uri, 0, $string );
	return $n;
}

=item C<< parse_file ( $base, $fh, \&handler ) >>

=cut

sub parse_file {
	my $self	= shift;
	my $base	= shift;
	my $fh		= shift;
	my $handler	= shift;
	
	unless (ref($fh)) {
		my $filename	= $fh;
		undef $fh;
		open( $fh, '<:encoding(UTF-8)', $filename ) or throw RDF::Trine::Error::ParserError -text => $!;
	}
	
	my $lineno	= 0;
	no warnings 'uninitialized';
	while (defined(my $line = <$fh>)) {
LINE:
		($line, my @extra)	= split(/\r\n|\r|\n/, $line, 2);
		$lineno++;
		
		next unless (defined($line) and length($line));
		next unless ($line =~ /\S/);
		chomp($line);
		$line	=~ s/^\s*//;
		$line	=~ s/\s*$//;
		next if ($line =~ /^#/);
		
		my @nodes	= ();
		try {
			while (my $n = $self->_eat_node( $base, $lineno, $line )) {
				push(@nodes, $n);
				$line	=~ s/^\s*//;
			}
		};
		$line	=~ s/^\s//g;
		unless ($line eq '.') {
# 			Carp::cluck 'N-Triples parser failed: ' . Dumper(\@nodes, $line);
			throw RDF::Trine::Error::ParserError -text => "Missing expected '.' at line $lineno";
		}
		
		$self->_emit_statement( $handler, \@nodes, $lineno );
		if (@extra) {
			$line	= shift(@extra);
			goto LINE;
		}
	}
}

sub _emit_statement {
	my $self	= shift;
	my $handler	= shift;
	my $nodes	= shift;
	my $lineno	= shift;
	my $st;
	if (scalar(@$nodes) == 3) {
		if ($self->{canonicalize}) {
			if ($nodes->[2]->isa('RDF::Trine::Node::Literal') and $nodes->[2]->has_datatype) {
				my $value	= $nodes->[2]->literal_value;
				my $dt		= $nodes->[2]->literal_datatype;
				my $canon	= RDF::Trine::Node::Literal->canonicalize_literal_value( $value, $dt, 1 );
				$nodes->[2]	= literal( $canon, undef, $dt );
			}
		}
		$st	= RDF::Trine::Statement->new( @$nodes );
# 	} elsif (scalar(@$nodes) == 4) {
# 		$st	= RDF::Trine::Statement::Quad->new( @$nodes );
	} else {
# 		warn Dumper($nodes);
		throw RDF::Trine::Error::ParserError -text => qq[Not valid N-Triples data at line $lineno];
	}
	$handler->( $st );
}

sub _eat_node {
	my $self	= shift;
	my $base	= shift;
	my $lineno	= shift;
	$_[0]	=~ s/^\s*//;
	return unless length($_[0]);
	my $char	= substr($_[0], 0, 1);
	return if ($char eq '.');
	
	if ($char eq '<') {
		my ($uri)	= $_[0] =~ m/^<([^>]*)>/;
		substr($_[0], 0, length($uri)+2)	= '';
		return RDF::Trine::Node::Resource->new( _unescape($uri, $lineno) );
	} elsif ($char eq '_') {
		my ($name)	= $_[0] =~ m/^_:([A-Za-z][A-Za-z0-9]*)/;
		substr($_[0], 0, length($name)+2)	= '';
		return RDF::Trine::Node::Blank->new( $name );
	} elsif ($char eq '"') {
		substr($_[0], 0, 1)	= '';
		my $value	= decode('utf8', '');
		while (length($_[0]) and substr($_[0], 0, 1) ne '"') {
			while ($_[0] =~ m/^([^"\\]+)/) {
				$value	.= $1;
				substr($_[0],0,length($1))	= '';
			}
			if (substr($_[0],0,1) eq '\\') {
				while ($_[0] =~ m/^\\(.)/) {
					if ($1 eq 't') {
						$value	.= "\t";
						substr($_[0],0,2)	= '';
					} elsif ($1 eq 'r') {
						$value	.= "\r";
						substr($_[0],0,2)	= '';
					} elsif ($1 eq 'n') {
						$value	.= "\n";
						substr($_[0],0,2)	= '';
					} elsif ($1 eq '"') {
						$value	.= '"';
						substr($_[0],0,2)	= '';
					} elsif ($1 eq '\\') {
						$value	.= "\\";
						substr($_[0],0,2)	= '';
					} elsif ($1 eq 'u') {
						$_[0] =~ m/^\\u([0-9A-Fa-f]{4})/ or throw RDF::Trine::Error::ParserError -text => qq[Bad N-Triples \\u escape at line $lineno, near "$_[0]"];
						$value	.= chr(oct('0x' . $1));
						substr($_[0],0,6)	= '';
					} elsif ($1 eq 'U') {
						$_[0] =~ m/^\\U([0-9A-Fa-f]{8})/ or throw RDF::Trine::Error::ParserError -text => qq[Bad N-Triples \\U escape at line $lineno, near "$_[0]"];
						$value	.= chr(oct('0x' . $1));
						substr($_[0],0,10)	= '';
					} else {
						throw RDF::Trine::Error::ParserError -text => qq[Not valid N-Triples escape character '\\$1' at line $lineno, near "$_[0]"];
					}
				}
			}
		}
		if (substr($_[0],0,1) eq '"') {
			substr($_[0],0,1)	= '';
		} else {
			throw RDF::Trine::Error::ParserError -text => qq[Ending double quote not found at line $lineno];
		}
		
		if ($_[0] =~ m/^@([a-z]+(-[a-zA-Z0-9]+)*)/) {
			my $lang	= $1;
			substr($_[0],0,1+length($lang))	= '';
			return RDF::Trine::Node::Literal->new($value, $lang);
		} elsif (substr($_[0],0,3) eq '^^<') {
			substr($_[0],0,3)	= '';
			my ($uri)	= $_[0] =~ m/^([^>]*)>/;
			substr($_[0], 0, length($uri)+1)	= '';
			return RDF::Trine::Node::Literal->new($value, undef, $uri);
		} else {
			return RDF::Trine::Node::Literal->new($value);
		}
	} else {
		throw RDF::Trine::Error::ParserError -text => qq[Not valid N-Triples node start character '$char' at line $lineno, near "$_[0]"];
	}
}

sub _unescape {
	my $string	= shift;
	my $lineno	= shift;
	my $value	= '';
	while (length($string)) {
		while ($string =~ m/^([^\\]+)/) {
			$value	.= $1;
			substr($string,0,length($1))	= '';
		}
		if (length($string)) {
			if ($string eq '\\') {
				throw RDF::Trine::Error::ParserError -text => qq[Backslash in N-Triples node without escaped character at line $lineno];
			}
			if ($string =~ m/^\\([tbnrf"'uU])/) {
				while ($string =~ m/^\\([tbnrf"'uU])/) {
					if ($1 eq 't') {
						$value	.= "\t";
						substr($string,0,2)	= '';
					} elsif ($1 eq 'b') {
						$value	.= "\b";
						substr($string,0,2)	= '';
					} elsif ($1 eq 'n') {
						$value	.= "\n";
						substr($string,0,2)	= '';
					} elsif ($1 eq 'r') {
						$value	.= "\r";
						substr($string,0,2)	= '';
					} elsif ($1 eq 'f') {
						$value	.= "\f";
						substr($string,0,2)	= '';
					} elsif ($1 eq '"') {
						$value	.= '"';
						substr($string,0,2)	= '';
					} elsif ($1 eq '\\') {
						$value	.= "\\";
						substr($string,0,2)	= '';
					} elsif ($1 eq 'u') {
						$string =~ m/^\\u([0-9A-F]{4})/ or throw RDF::Trine::Error::ParserError -text => qq[Bad N-Triples \\u escape at line $lineno, near "$string"];
						$value	.= chr(oct('0x' . $1));
						substr($string,0,6)	= '';
					} elsif ($1 eq 'U') {
						$string =~ m/^\\U([0-9A-F]{8})/ or throw RDF::Trine::Error::ParserError -text => qq[Bad N-Triples \\U escape at line $lineno, near "$string"];
						$value	.= chr(oct('0x' . $1));
						substr($string,0,10)	= '';
					}
				}
			} else {
				my $esc	= substr($string, 0, 2);
				throw RDF::Trine::Error::ParserError -text => qq[Not a valid N-Triples escape sequence '$esc' at line $lineno, near "$string"];
			}
		}
	}
	return $value;
}

1;

__END__

=back

=head1 BUGS

Please report any bugs or feature requests to through the GitHub web interface
at L<https://github.com/kasei/perlrdf/issues>.

=head1 AUTHOR

Gregory Todd Williams  C<< <gwilliams@cpan.org> >>

=head1 COPYRIGHT

Copyright (c) 2006-2012 Gregory Todd Williams. This
program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

=cut