use strict;
use warnings;
package Data::Beacon;
#ABSTRACT: BEACON format validating parser and serializer
use Time::Piece;
use Scalar::Util qw(blessed);
use URI::Escape;
use Carp;
use base 'Exporter';
our @EXPORT = qw(plainbeaconlink beacon);
=head1 DESCRIPTION
THIS MODULE DOES NOT REFLECT THE CURRENT STATE OF BEACON SPECIFICATION!
This package implements a validating L</BEACON format> parser and serializer.
In short, a "Beacon" is a set of links, together with some meta fields. Each
link at least consists of "source" URI (also referred to as "id") and a "target"
URI. In addition it has a "label" and a "description", which are both Unicode
strings, being the empty string by default.
=head1 SYNOPSIS
use Data::Beacon;
$beacon = beacon(); # create new Beacon object
$beacon = beacon( { # create new Beacon with meta fields
PREFIX => $p, TARGET => $t, XY => $z
} );
$beacon = beacon( $filename ); # open file and parse its meta fields
if ( $b->errors ) { # check for errors
print STDERR $b->lasterror . "\n"
($msg,$lineno,$line) = $b->lasterror;
}
$beacon->parse; # parse all links from opened file
$beacon->parse( links => \&handler ); # parse links, call handler for each
$beacon->parse( errors => \&handler ); # parse links, on error call handler
$beacon->parse( errors => 'print' ); # parse links, print errors to STDERR
$beacon->parse( errors => 'warn' ); # parse links, warn on errors
$beacon->parse( errors => 'die' ); # parse links, die on errors
while ( $beacon->nextlink ) { # parse and iterate all valid links
($source,$label,$descr,$target) = $beacon->link; # raw link as read
($source,$label,$descr,$target) = $beacon->expanded; # full link expanded
}
$descr = $beacon->meta( 'DESCRIPTION' ); # get meta field
$beacon->meta( DESCRIPTION => 'my links' ); # set meta fields
%meta = $beacon->meta; # get all meta fields
print $beacon->metafields; # serialize meta fields
$beacon->parse( \$beaconstring ); # parse from string
$beacon->parse( sub { return $nextline } ); # parse from callback
$beacon->count; # number of parsed links
$beacon->appendline( $line );
$beacon->appendlink( $source, $label, $descr, $target );
=head1 BEACON format
B<BEACON format> is the serialization format for Beacons. It defines a very
condense syntax to express links without having to deal much with technical
specifications.
See L<http://gbv.github.com/beaconspec/beacon.html> for a more detailed
description.
=head1 USAGE
=head2 Serializing
To serialize only BEACON meta fields, create a new Beacon object, and set its
meta fields (passed to the constructor, or with L</meta>). You can then get
the meta fields in BEACON format with L</metafields>:
my $beacon = beacon( { PREFIX => ..., TARGET => ... } );
print $beacon->metafields;
The easiest way to serialize links in BEACON format, is to set your Beacon
object's link handler to C<print>, so each link is directly printed to STDOUT.
my $beacon = beacon( \%metafields, links => 'print' );
print $b->metafields();
while ( ... ) {
$beacon->appendlink( $source, $label, $description, $target );
}
Alternatively you can use the function L</plainbeaconlink>. In this case you
should validate links before printing:
if ( $beacon->appendlink( $source, $label, $description, $target ) ) {
print plainbeaconlink( $beacon->link ) . "\n";
}
=head2 Parsing
You can parse BEACON format either as iterator:
my $beacon = beacon( $file );
while ( $beacon->nextlink ) {
my ($source, $label, $description, $target) = $beacon->link;
...
}
Or by push parsing with handler callbacks:
my $beacon = beacon( $file );
$beacon->parse( 'link' => \link_handler );
$errors = $beacon->errors;
Instead of a filename, you can also provide a scalar reference, to parse
from a string. The meta fields are parsed immediately:
my $beacon = beacon( $file );
print $beacon->metafields . "\n";
my $errors = $beacon->errors;
To quickly parse a BEACON file:
use Data::Beacon;
beacon($file)->parse();
=head2 Querying
Data::Beacon does only read or write links. To store links, use one of
its subclasses (to be described later).
=head2 Handlers
To handle errors and links, you can pass handler arguments to the constructor
and to the L</parse> method.
=over
=item C<errors>
By default, errors are silently ignored (C<errors =E<gt> 0>. You should enable
one of the error handlers C<warn> (errors create a warning with C<carp>),
C<die> (errors will let the program die with C<croak>), or C<print> (error
messages will be print to STDERR). Alternatively you can provide a custom error
handler function as code reference. On error this function is provided one to
three arguments: first an error message, second a line number, and third the
content of the current line, if the error resulted in parsing a line of BEACON
format.
=item C<links>
See the L</parse> method for description.
=back
=head1 METHODS
=head2 new ( [ $from ] [, $metafields ] [, $handlers ] )
Create a new Beacon object, optionally from a given file. If you specify a
source via C<$from> argument or as handler C<from =E<gt> $from>, it will be
opened for parsing and all meta fields will immediately be read from it.
Otherwise a new Beacon object will be created, optionally with given meta
fields.
=cut
sub new {
my $class = shift;
my $self = bless { }, $class;
$self->_initparams( @_ );
$self->_startparsing;
return $self;
}
=head2 meta ( [ $key [ => $value [ ... ] ] ] )
Get and/or set one or more meta fields. Returns a hash (no arguments),
or string or undef (one argument), or croaks on invalid arguments. A
meta field can be unset by setting its value to the empty string.
The FORMAT field cannot be unset. This method may also croak if a known
fields, such as FORMAT, PREFIX, FEED, EXAMPLES, REVISIT, TIMESTAMP is
tried to set to an invalid value. Such an error will not change the
error counter of this object or modify C<lasterror>.
=cut
sub meta { # TODO: document meta fields
my $self = shift;
return %{$self->{meta}} unless @_;
if (@_ == 1) {
my $key = uc(shift @_);
$key =~ s/^\s+|\s+$//g;
return $self->{meta}->{$key};
}
croak('Wrong number of arguments in SeeAlso::Beacon->meta') if @_ % 2;
my %list = (@_);
foreach my $key (keys %list) {
croak('invalid meta name: "'.$key.'"')
unless $key =~ /^\s*([a-zA-Z_-]+)\s*$/;
my $value = $list{$key};
$key = uc($1);
if ( defined $value ) {
$value =~ s/^\s+|\s+$|\n//g;
} else {
$value = '';
}
if ($value eq '') { # empty field: unset
croak 'You cannot unset meta field #FORMAT' if $key eq 'FORMAT';
delete $self->{meta}->{$key};
} else { # check format of known meta fields
if ($key eq 'TARGET') {
$value =~ s/{id}/{ID}/g;
# TODO: document that {ID} in target is optional (will be appended)
$value .= '{ID}' unless $value =~ /{ID}}/;
my $uri = $value;
$uri =~ s/{ID}//g;
croak 'Invalid #TARGET field: must be an URI pattern'
unless _is_uri($uri);
} elsif ($key eq 'FEED') {
croak 'FEED meta value must be a HTTP/HTTPS URL'
unless $value =~
/^http(s)?:\/\/[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(\/[^#|]*)?(\?[^#|]*)?$/i;
} elsif ($key eq 'PREFIX') {
croak 'PREFIX meta value must be a URI'
unless _is_uri($value);
} elsif ( $key =~ /^(REVISIT|TIMESTAMP)$/) {
if ($value =~ /^[0-9]+$/) { # seconds since epoch
$value = gmtime($value)->datetime() . 'Z';
# Note that this conversion does not trigger an error
# or warning, but may be dropped in a future version
} else {
# ISO 8601 combined date and time in UTC
$value =~ s/Z$//;
croak $key . ' meta value must be of form YYYY-MM-DDTHH:MM:SSZ'
unless $value = Time::Piece->strptime(
$value, '%Y-%m-%dT%T' );
$value = $value->datetime();
}
} elsif ( $key eq 'FORMAT' ) {
croak 'Invalid FORMAT, must be BEACON or end with -BEACON'
unless $value =~ /^([A-Z]+-)?BEACON$/;
} elsif ( $key eq 'EXAMPLES' ) {
my @examples = map { s/^\s+|\s+$//g; $_ } split '\|', $value;
$self->{examples} = [ grep { $_ ne '' } @examples ];
%{$self->{expected_examples}} =
map { $_ => 1 } @{$self->{examples}};
$value = join '|', @{$self->{examples}};
if ($value eq '') { # yet another edge case: "EXAMPLES: |" etc.
delete $self->{meta}->{EXAMPLES};
$self->{expected_examples} = undef;
next;
}
# Note that examples are not checked for validity,
# because PREFIX may not be set yet.
}
$self->{meta}->{$key} = $value;
}
}
}
=head2 count
If parsing has been started, returns the number of links, successfully read so
far (or zero). If only the meta fields have been parsed, this returns the value
of the meta field. In contrast to C<meta('count')>, this method always returns
a number. Note that all valid links that could be parsed are included, no matter
if processed by a link handler or not.
=cut
sub count {
my $count = $_[0]->meta('COUNT');
return defined $count ? $count : 0;
}
=head2 line
Returns the current line number or zero.
=cut
sub line {
return $_[0]->{line};
}
=head2 lasterror
Returns the last parsing error message (if any). Errors triggered by directly
calling C<meta> are not included. In list context returns a list of error
message, line number, and current line content.
=cut
sub lasterror {
return wantarray ? @{$_[0]->{lasterror}} : $_[0]->{lasterror}->[0];
}
=head2 errors
Returns the number of parsing errors or zero.
=cut
sub errors {
return $_[0]->{errors};
}
=head2 metafields
Return all meta fields, serialized and sorted as string. Althugh the order of
fields is irrelevant, but this implementation always returns the same fields
in same order. To get all meta fields as hash, use the C<meta> method.
=cut
sub metafields {
my $self = shift;
my %meta = $self->meta();
my %fields = %meta;
# determine default order
my @order = (qw(FORMAT PREFIX TARGET MESSAGE RELATION ANNOTATION),
qw(DESCRIPTION CREATOR CONTACT HOMEPAGE FEED TIMESTAMP UPDATE),
qw(SOURCESET TARGETSET NAME INSTITUTION));
my @lines = map { "#$_: " . $meta{$_} } grep { defined $meta{$_} } @order;
return @lines ? join ("\n", @lines) . "\n" : "";
}
=head2 parse ( [ $from ] { handler => coderef | option => $value } )
Parse all remaining links (push parsing). If provided a C<from> parameter,
this starts a new Beacon. That means the following three are equivalent:
$b = new SeeAlso::Beacon( $from );
$b = new SeeAlso::Beacon( from => $from );
$b = new SeeAlso::Beacon;
$b->parse( $from );
If C<from> is a scalar, it is used as file to parse from. Alternatively you
can supply a string reference, or a code reference.
The C<pre> option can be used to set some meta fields before parsing starts.
These fields are cached and reused every time you call C<parse>.
If the C<mtime> option is given, the TIMESTAMP meta value will be initialized
as last modification time of the given file.
By default, all errors are silently ignored, unless you specifiy an error handler
The last error can be retrieved with the C<lasterror> method. The current number
of errors by C<errors>.
Finally, the C<link> handler can be a code reference to a method that is
called for each link (that is each line in the input that contains a valid
link). The following arguments are passed to the handler:
=over
=item C<$source>
Link source as given in BEACON format.
This may be abbreviated but not the empty string.
=item C<$label>
Label as string. This may be the empty string.
=item C<$description>
Description as string. This may be the empty string.
=item C<$target>
Link target as given in BEACON format.
This may be abbreviated or the empty string.
=back
The number of sucessfully parsed links is returned by C<count>.
Errors in link handler and input handler are catched, and produce an
error that is given to the error handler.
=cut
sub parse {
my $self = shift;
$self->_initparams( @_ );
$self->_startparsing if defined $self->{from}; # start from new source
my $line = $self->{lookaheadline};
$line = $self->_readline() unless defined $line;
while (defined $line) {
$self->appendline( $line );
$line = $self->_readline();
}
return $self->errors == 0;
}
=head2 nextlink
Read from the input stream until the next link has been parsed. Empty lines
and invalid lines are skipped, but the error handler is called on invalid
lines. This method can be used for pull parsing. Always returns either the
link as list or an empty list if the end of input has been reached.
=cut
sub nextlink {
my $self = shift;
my $line = $self->{lookaheadline};
if (defined $line) {
$self->{lookaheadline} = undef;
} else {
$line = $self->_readline();
return unless defined $line; # undef => EOF
}
do {
my @link = $self->appendline( $line );
return @link if @link; # proceed on empty lines or errors
} while($line = $self->_readline());
return; # EOF
}
=head2 link
Returns the last valid link, that has been read. The link is returned
as list of four values (source, label, description, target) without
expansion. Use the L</expanded> method to get the link with full URIs.
=cut
sub link {
my $self = shift;
return @{$self->{link}} if $self->{link};
}
=head2 expanded
Returns the last valid link, that has been read in expanded form. The
link is returned as list of four values (source, label, description,
target), possibly expanded by the meta fields PREFIX, TARGET/TARGETPREFIX,
MESSAGE etc. Use L</expand> to expand an arbitrary link.
=cut
sub expanded {
my $self = shift;
if ( $self->{link} ) {
unless ( $self->{expanded} ) {
@{$self->{expanded}} = @{$self->{link}};
$self->_expandlink( $self->{expanded} )
}
return @{$self->{expanded}};
}
}
=head2 expand ( $source, $label, $description, $target )
Expand a link, consisting of source (mandatory), and label, description,
and target (all optional). Returns the expanded link as array with four
values, or an empty list. This method does append the link to the Beacon
object, nor call any handlers.
=cut
sub expand {
my $self = shift;
my @fields = @_ > 0 ? @_ : '';
@fields = map { s/^\s+|\s+$//g; $_ }
map { defined $_ ? $_ : '' } @fields;
return if $fields[0] eq '' or (grep { $_ =~ /\||\n|\r/ } @fields);
$self->_expandlink( \@fields );
return unless _is_uri($fields[0]) && _is_uri($fields[3]);
return @fields;
}
=head2 expandsource( $source )
Expand the source part of a link, by prepending the PREFIX meta field, if
given. This method always returns a string, which is the empty string, if
the source parameter could not be expanded to a valid URI.
=cut
sub expandsource {
my ($self, $source) = @_;
return '' unless defined $source;
$source =~ s/^\s+|\s+$//g;
return '' if $source eq '';
$source = $self->{meta}->{PREFIX} . $source
if defined $self->{meta}->{PREFIX};
return _is_uri($source) ? $source : '';
}
=head2 appendline( $line )
Append a line of of BEACON format. This method parses the line, and calls the
link handler, or error handler. In scalar context returns whether a link has
been read (that can then be accessed with C<link>). In list context, returns
the parsed link as list, or the empty list, if the line could not be parsed.
=cut
sub appendline {
my ($self, $line) = @_;
return unless defined $line;
chomp $line;
$self->{line}++;
$self->{currentline} = $line;
my @parts = split ('\|',$line);
return if (@parts < 1 || $parts[0] =~ /^\s*$/ );
my $link = $self->_fields( @parts );
my $has_link = $self->appendlink( @$link );
$self->{currentline} = undef;
if ( $has_link ) {
return wantarray ? @{ $self->{link} } : 1;
}
return;
}
=head2 appendlink ( $source [, $label [, $description [, $target ] ] ] )
Append a link. The link is validated and returned as list of four values.
On error the error handler is called and an empty list is returned.
On success the link handler is called.
=cut
sub appendlink {
my $self = shift;
my @fields = map { defined $_ ? $_ : '' } @_[0..3];
@fields = map { s/^\s+|\s+$//g; $_ } @fields;
if ( $fields[0] eq '' ) {
$self->_handle_error( 'missing source' );
return;
} elsif ( grep { $_ =~ /\|/ } @fields ) {
$self->_handle_error( 'link fields must not contain \'|\'' );
return;
} elsif ( grep { $_ =~ /[\n\r]/ } @fields ) {
$self->_handle_error( 'link fields must not contain line breaks' );
return;
}
my $msg = $self->_checklink( @fields );
if ( $msg ) {
$self->_handle_error( $msg );
return;
}
# finally got a valid link
$self->{link} = \@fields;
$self->{expanded} = undef;
$self->{meta}->{COUNT}++;
if ( defined $self->{expected_examples} ) { # examples may contain prefix
my @idforms = $fields[0];
my $prefix = $self->{meta}->{PREFIX};
push @idforms, $prefix . $fields[0] if defined $prefix;
foreach my $source (@idforms) {
if ( $self->{expected_examples}->{$source} ) {
delete $self->{expected_examples}->{$source};
$self->{expected_examples} = undef
unless keys %{ $self->{expected_examples} };
}
}
}
if ( $self->{link_handler} ) {
if ( $self->{link_handler} eq 'print' ) {
print plainbeaconlink( @fields ) . "\n";
} elsif ( $self->{link_handler} eq 'expand' ) {
print join('|',$self->expanded) . "\n";
} else {
# TODO: call with expanded link on request
eval { $self->{link_handler}->( @fields ); };
if ( $@ ) {
$self->_handle_error( "link handler died: $@" );
return;
}
}
}
return @fields; # TODO: return expanded on request
}
=head1 FUNCTIONS
The following functions are exported by default.
=head2 beacon ( [ $from ] { handler => coderef } )
Shortcut for C<Data::Beacon-E<gt>new>.
=cut
sub beacon {
return Data::Beacon->new( @_ );
}
=head2 plainbeaconlink ( $source, $label, $description, $target )
Serialize a link, consisting of source (mandatory), label, description,
and target (all optional) as condensed string in BEACON format. This
function does not check whether the arguments form a valid link or not.
You can pass a simple link, as returned by the L</link> method, or an
expanded link, as returned by L</expanded>.
This function will be removed or renamed.
=cut
sub plainbeaconlink {
shift if ref($_[0]) and UNIVERSAL::isa($_[0],'Data::Beacon');
return '' unless @_;
my @link = map { defined $_ ? $_ : '' } @_[0..3];
@link = map { s/^\s+|\s+$//g; $_; } @link;
return '' if $link[0] eq '';
if ( $link[3] eq '' ){
pop @link;
if ($link[2] eq '') {
pop @link;
pop @link if ($link[1] eq '');
}
} elsif ( _is_uri($link[3]) ) { # only position of _is_uri where argument may be undefined
my $uri = pop @link;
if ($link[2] eq '') {
pop @link;
pop @link if ($link[1] eq '');
}
push @link, $uri;
}
return join('|', @link);
}
=head1 INTERNAL METHODS
If you directly call any of this methods, puppies will die.
=head2 _initparams ( [ $from ] { handler => coderef | option => value } | $metafield )
Initialize parameters as passed to C<new> or C<parse>. Known parameters
are C<from>, C<error>, and C<link> (C<from> is not checked here). In
addition you cann pass C<pre> and C<mtime> as options.
=cut
sub _initparams {
my $self = shift;
my %param;
if ( @_ % 2 && !blessed($_[0]) && ref($_[0]) && ref($_[0]) eq 'HASH' ) {
my $pre = shift;
%param = @_;
$param{pre} = $pre;
} else {
$self->{from} = (@_ % 2) ? shift(@_) : undef;
%param = @_;
}
$self->{from} = $param{from}
if exists $param{from};
if ( $param{errors} ) {
my $handler = $param{errors};
$handler = $Data::Beacon::ERROR_HANDLERS{lc($handler)}
unless ref($handler);
unless ( ref($handler) and ref($handler) eq 'CODE' ) {
my $msg = 'error handler must be code or '
. join('/',keys %Data::Beacon::ERROR_HANDLERS)
. ', got '
. (defined $handler ? $handler : 'undef');
croak $msg;
}
$self->{error_handler} = $handler;
}
if ( $param{links} ) {
my $handler = $param{links};
croak 'link handler must be code or \'print\' or \'expand\''
unless $handler =~ /^(print|expand)$/
or (ref($handler) and ref($handler) eq 'CODE');
$self->{link_handler} = $handler;
}
if ( defined $param{pre} ) {
croak "pre option must be a hash reference"
unless ref($param{pre}) and ref($param{pre}) eq 'HASH';
$self->{pre} = $param{pre};
} elsif ( exists $param{pre} ) {
$self->{pre} = undef;
}
$self->{mtime} = $param{mtime};
}
=head2 _startparsing
Open a BEACON file and parse all meta fields. Calling this method will reset
the whole object but not the parameters as set with C<_initparams>. If no
source had been specified (with parameter C<from>), this is all the method
does. If a source is given, it is opened and parsed. Parsing stops when the
first non-empty and non-meta field line is encountered. This line is internally
stored as lookahead.
=cut
sub _startparsing {
my $self = shift;
# we do not init $self->{meta} because it is set in initparams;
$self->{meta} = { 'FORMAT' => 'BEACON' };
$self->meta( %{ $self->{pre} } ) if $self->{pre};
$self->{line} = 0;
$self->{link} = undef;
$self->{expanded} = undef;
$self->{errors} = 0;
$self->{lasterror} = [];
$self->{lookaheadline} = undef;
$self->{fh} = undef;
$self->{inputlines} = [];
$self->{examples} = [];
return unless defined $self->{from};
# decide where to parse from
my $type = ref($self->{from});
if ($type) {
if ($type eq 'SCALAR') {
$self->{inputlines} = [ split("\n",${$self->{from}}) ];
} elsif ($type ne 'CODE') {
$self->_handle_error( "Unknown input $type" );
return;
}
} elsif( $self->{from} eq '-' ) {
$self->{fh} = \*STDIN;
} else {
if(!(open $self->{fh}, $self->{from})) {
$self->_handle_error( 'Failed to open ' . $self->{from} );
return;
}
}
# initlialize TIMESTAMP
if ($self->{mtime}) {
my @stat = stat( $self->{from} );
$self->meta('TIMESTAMP', gmtime( $stat[9] )->datetime() . 'Z' );
}
# start parsing
my $line = $self->_readline();
return unless defined $line;
$line =~ s/^\xEF\xBB\xBF//; # UTF-8 BOM (optional)
do {
$line =~ s/^\s+|\s*\n?$//g;
if ($line eq '') {
$self->{line}++;
} elsif ($line =~ /^#([^:=\s]+)(\s*[:=]?\s*|\s+)(.*)$/) {
$self->{line}++;
eval { $self->meta($1,$3); };
if ($@) {
my $msg = $@; $msg =~ s/ at .*$//;
$self->_handle_error( $msg, $line );
}
} else {
$self->{lookaheadline} = $line;
return;
}
$line = $self->_readline();
} while (defined $line);
}
=head2 _handle_error ( $msg [, $line ] )
Internal error handler that calls a custom error handler,
increases the error counter and stores the last error.
=cut
sub _handle_error {
my $self = shift;
my $msg = shift;
my $line = shift || $self->{currentline} || '';
chomp $line;
$self->{lasterror} = [ $msg, $self->{line}, $line ];
$self->{errors}++;
$self->{error_handler}->( $msg, $self->{line}, $line ) if $self->{error_handler};
}
our %ERROR_HANDLERS = (
'print' => sub {
my ($msg, $lineno) = @_;
$msg .= " at line $lineno" if $lineno ;
print STDERR "$msg\n";
},
'warn' => sub {
my ($msg, $lineno) = @_;
$msg .= " at line $lineno" if $lineno;
carp $msg;
},
'die' => sub {
my ($msg, $lineno) = @_;
$msg .= " at line $lineno" if $lineno;
croak $msg;
}
);
=head2 _readline
Internally read and return a line for parsing afterwards. May trigger an error.
=cut
sub _readline {
my $self = shift;
if ($self->{fh}) {
return eval { no warnings; readline $self->{fh} };
} elsif (ref($self->{from}) && ref($self->{from}) eq 'CODE') {
my $line = eval { $self->{from}->(); };
if ($@) { # input handler died
$self->_handle_error( $@, '' );
$self->{from} = undef;
}
return $line;
} else {
return @{$self->{inputlines}} ? shift(@{$self->{inputlines}}) : undef;
}
}
=head2 _fields
Gets one or more fields, that are strings, which do not contain C<|> or
newlines. The first string is not empty. Returns a reference to an array
of four fields.
=cut
sub _fields {
my $self = shift;
my @parts = @_;
my $n = scalar @parts;
my $link = [shift @parts,"","",""];
my $target = $self->{meta}->{TARGET};
my $targetprefix = $self->{meta}->{TARGETPREFIX};
if ($target or $targetprefix) {
$link->[1] = shift @parts if @parts;
$link->[2] = shift @parts if @parts;
# TODO: do we want both #TARGET links and explicit links in one file?
$link->[3] = shift @parts if @parts;
} else {
$link->[3] = pop @parts
if ($n > 1 && _is_uri($parts[$n-2]));
$link->[1] = shift @parts if @parts;
$link->[2] = shift @parts if @parts;
}
return $link
}
sub _checklink {
my ($self, @fields) = @_;
my @exp = @fields;
# TODO: check only - we don't need full expansion
$self->_expandlink( \@exp );
return "source is no URI: ".$exp[0]
unless _is_uri($exp[0]);
# TODO: we could encode bad characters etc.
return "target is no URI: ".$exp[3]
unless _is_uri($exp[3]);
return undef;
}
=head2 _expandlink ( $link )
Expand a link, provided as array reference without validation. The link
must have four defined, trimmed fields. After expansion, source and target
must still be checked whether they are valid URIs.
=cut
sub _expandlink {
my ($self, $link) = @_;
my $prefix = $self->{meta}->{PREFIX};
my $source = $link->[0];
# TODO: document this expansion
if ( $link->[1] =~ /^[0-9]*$/ ) { # if label is number (of hits) or empty
my $label = $link->[1];
my $descr = $link->[2];
# TODO: handle zero hits
my $msg = $self->{meta}->{$label eq '1' ? 'ONEMESSAGE' : 'SOMEMESSAGE'}
|| $self->{meta}->{'MESSAGE'};
if ( defined $msg ) {
_str_replace( $msg, '{id}', $link->[0] ); # unexpanded
_str_replace( $msg, '{hits}', $link->[1] );
_str_replace( $msg, '{label}', $link->[1] );
_str_replace( $msg, '{description}', $link->[2] );
_str_replace( $msg, '{target}', $link->[3] ); # unexpanded
} else {
$msg = $self->{meta}->{'NAME'} || $self->{meta}->{'INSTITUTION'};
}
if ( defined $msg && $msg ne '' ) {
# if ( $link->[1] == "") $descr = $label;
$link->[1] = $msg;
$link->[1] =~ s/^\s+|\s+$//g;
$link->[1] =~ s/\s+/ /g;
}
} else {
_str_replace( $link->[1], '{id}', $link->[0] ); # unexpanded
_str_replace( $link->[1], '{description}', $link->[2] );
_str_replace( $link->[1], '{target}', $link->[3] ); # unexpanded
# trim label, because it may have changed
$link->[1] =~ s/^\s+|\s+$//g;
$link->[1] =~ s/\s+/ /g;
}
# expand source
$link->[0] = $prefix . $link->[0] if defined $prefix;
# expand target
my $target = $self->{meta}->{TARGET};
my $targetprefix = $self->{meta}->{TARGETPREFIX};
if (defined $target) {
$link->[3] = $target;
my $label = $link->[1];
$link->[3] =~ s/{ID}/$source/g;
} elsif( defined $targetprefix ) {
$link->[3] = $targetprefix . $link->[3];
}
return @$link;
}
sub _str_replace {
$_[0] =~ s/\Q$_[1]\E/$_[2]/g;
}
=head2 _is_uri
Check whether a given string is an URI. This function is based on code of
L<Data::Validate::URI>, adopted for performance.
=cut
sub _is_uri {
my $value = $_[0];
return unless defined($value);
# check for illegal characters
return if $value =~ /[^a-z0-9\:\/\?\#\[\]\@\!\$\&\'\(\)\*\+\,\;\=\.\-\_\~\%]/i;
# check for hex escapes that aren't complete
return if $value =~ /%[^0-9a-f]/i;
return if $value =~ /%[0-9a-f](:?[^0-9a-f]|$)/i;
# split uri (from RFC 3986)
my($scheme, $authority, $path, $query, $fragment)
= $value =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
# scheme and path are required, though the path can be empty
return unless (defined($scheme) && length($scheme) && defined($path));
# if authority is present, the path must be empty or begin with a /
if(defined($authority) && length($authority)){
return unless(length($path) == 0 || $path =~ m!^/!);
} else {
# if authority is not present, the path must not start with //
return if $path =~ m!^//!;
}
# scheme must begin with a letter, then consist of letters, digits, +, ., or -
return unless lc($scheme) =~ m!^[a-z][a-z0-9\+\-\.]*$!;
return 1;
}
1;
=head1 DEVELOPMENT
Please visit http://github.com/nichtich/p5-data-beacon for the latest
development snapshot, bug reports, feature requests, and such.
=head1 SEE ALSO
See also L<SeeAlso::Server> for an API to exchange single sets of
beacon links, based on the same source identifier.
=cut