package PPIx::XPath;
use strict;
use warnings;
use PPI;
use Carp;
use Scalar::Util qw(reftype blessed);
use Tree::XPathEngine;
use 5.006;
our $VERSION = '2.02'; # VERSION
# ABSTRACT: an XPath implementation for the PDOM
sub new {
my ($class,$source) = @_;
croak "PPIx::XPath->new needs a source document" unless defined($source);
my $doc;
if (blessed($source) && $source->isa('PPI::Node')) {
$doc = $source;
}
elsif (reftype($source) eq 'SCALAR'
or (!ref($source) && -f $source)) {
$doc = PPI::Document->new($source);
}
else {
croak "PPIx::XPath expects either a PPI::Node or a file" .
" got a: [" .( ref($source) || $source ). ']';
}
return bless {doc=>$doc},$class;
}
{
my $legacy_names_rx;my %new_name_for;
sub clean_xpath_expr {
my (undef,$expr)=@_;
$expr =~ s{$legacy_names_rx}{$new_name_for{$1}}ge;
return $expr;
}
my @PPI_Packs;
# taken from Devel::Symdump
# breadth-first search of packages under C<PPI>
my @packages=('PPI');
while (my $pack=shift(@packages)) {
my %pack_symbols = do {
no strict 'refs'; ## no critic(ProhibitNoStrict)
%{*{"$pack\::"}}
};
while (my ($key,$val)=each(%pack_symbols)) {
# that {HASH} lookup is special for typeglobs, so we have
# to alias a local typeglob to make it work
local *ENTRY=$val;
# does this symbol table entry look like a sub-package?
if (defined $val && defined *ENTRY{HASH} && $key=~/::$/
&& $key !~ /^::/
&& $key ne 'main::' && $key ne '<none>::') {
# add it to the search list
my $p = "$pack\::$key";$p =~ s{::$}{};
push @packages,$p;
# and add it to the map of names
$p =~ s{^PPI::}{};
next unless $p=~/::/;
my $newname=$p;
$newname =~ s{::}{-}g;
push @PPI_Packs,$p;
$new_name_for{$p}=$newname;
}
}
}
# @PPI_Packs now contains all the old-style names, build a regex
# to match them (the sort is important, we want to match longer
# names first!)
$legacy_names_rx='\b('.join(q{|},
sort {length($b) <=> length($a)} @PPI_Packs
).')\b';
$legacy_names_rx=qr{$legacy_names_rx};
}
sub match {
my ($self,$expr) = @_;
$expr=$self->clean_xpath_expr($expr);
Tree::XPathEngine->new()->findnodes($expr,$self->{doc});
}
package PPI::Element; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_name { my $pack_name=substr($_[0]->class,5);
$pack_name =~ s/::/-/g;
$pack_name }
sub xpath_get_next_sibling { $_[0]->snext_sibling }
sub xpath_get_previous_sibling { $_[0]->sprevious_sibling }
sub xpath_get_root_node { $_[0]->top }
sub xpath_get_parent_node { $_[0]->parent }
sub xpath_is_element_node { 1 }
sub xpath_is_attribute_node { 0 }
sub xpath_is_document_node { 0 }
sub xpath_get_attributes {
return
PPIx::XPath::Attr->new($_[0],'significant'),
PPIx::XPath::Attr->new($_[0],'content'),
}
sub xpath_to_literal { "$_[0]" }
sub xpath_get_value { "$_[0]" }
sub xpath_string_value { "$_[0]" }
sub xpath_cmp {
my( $a, $b)= @_;
if ( UNIVERSAL::isa( $b, 'PPIx::XPath::Attr')) {
# elt <=> att, compare the elt to the att->{elt}
# if the elt is the att->{elt} (cmp return 0) then -1, elt is before att
return ($a->_xpath_elt_cmp( $b->{parent}) ) || -1 ;
}
elsif ( UNIVERSAL::isa( $b, 'PPI::Document')) {
# elt <=> document, elt is after document
return 1;
} else {
# 2 elts, compare them
return $a->_xpath_elt_cmp( $b);
}
}
sub _xpath_elt_cmp {
my ($a,$b)=@_;
# easy cases
return 0 if( $a == $b);
return 1 if( $a->_xpath_in($b)); # a starts after b
return -1 if( $b->_xpath_in($a)); # a starts before b
# ancestors does not include the element itself
my @a_pile= ($a, $a->_xpath_ancestors);
my @b_pile= ($b, $b->_xpath_ancestors);
# the 2 elements are not in the same twig
return undef unless( $a_pile[-1] == $b_pile[-1]);
# find the first non common ancestors (they are siblings)
my $a_anc= pop @a_pile;
my $b_anc= pop @b_pile;
while( $a_anc == $b_anc) {
$a_anc= pop @a_pile;
$b_anc= pop @b_pile;
}
# from there move left and right and figure out the order
my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc);
while () {
$a_prev= $a_prev->sprevious_sibling || return( -1);
return 1 if( $a_prev == $b_next);
$a_next= $a_next->snext_sibling || return( 1);
return -1 if( $a_next == $b_prev);
$b_prev= $b_prev->sprevious_sibling || return( 1);
return -1 if( $b_prev == $a_next);
$b_next= $b_next->snext_sibling || return( -1);
return 1 if( $b_next == $a_prev);
}
}
sub _xpath_in {
my ($self, $ancestor)= @_;
while ( $self= $self->parent) {
return $self if ( $self == $ancestor);
}
}
sub _xpath_ancestors {
my( $self)= @_;
my @ancestors;
while ( $self= $self->parent) {
push @ancestors, $self;
}
return @ancestors;
}
package PPI::Token; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_child_nodes { return }
package PPI::Token::Quote::Double; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'interpolations'),
}
package PPI::Token::Number; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'base'),
}
package PPI::Token::Word; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'method-call'),
}
package PPI::Token::Comment; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'line'),
}
package PPI::Token::HereDoc; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
# TODO: add access to the contents of the heredoc (->heredoc method)
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'terminator'),
}
package PPI::Token::Prototype; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_to_literal { $_[0]->prototype }
sub xpath_get_value { $_[0]->prototype }
sub xpath_string_value { $_[0]->prototype }
package PPI::Node; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_child_nodes { $_[0]->schildren }
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'scope'),
}
package PPI::Token::Attribute; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'identifier'),
PPIx::XPath::Attr->new($_[0],'parameters'),
}
package PPI::Token::Symbol; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'symbol'),
PPIx::XPath::Attr->new($_[0],'canonical'),
PPIx::XPath::Attr->new($_[0],'raw_type'),
PPIx::XPath::Attr->new($_[0],'symbol_typel'),
}
package PPI::Statement; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'label'),
PPIx::XPath::Attr->new($_[0],'stable'),
PPIx::XPath::Attr->new($_[0],'type'),
}
package PPI::Statement::Sub; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'name'),
PPIx::XPath::Attr->new($_[0],'prototype'),
PPIx::XPath::Attr->new($_[0],'forward'),
PPIx::XPath::Attr->new($_[0],'reserved'),
}
package PPI::Statement::Package; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'namespace'),
PPIx::XPath::Attr->new($_[0],'file-scoped'),
}
package PPI::Statement::Include; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'module'),
PPIx::XPath::Attr->new($_[0],'module-version'),
PPIx::XPath::Attr->new($_[0],'version'),
PPIx::XPath::Attr->new($_[0],'version-literal'),
PPIx::XPath::Attr->new($_[0],'pragma'),
}
package PPI::Structure; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_attributes {
return $_[0]->SUPER::xpath_get_attributes,
PPIx::XPath::Attr->new($_[0],'start'),
PPIx::XPath::Attr->new($_[0],'finish'),
PPIx::XPath::Attr->new($_[0],'braces'),
}
package PPI::Document; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub xpath_get_root_node { $_[0] }
sub xpath_get_parent_node { return }
sub xpath_is_attribute_node { 0 }
sub xpath_is_document_node { 1 }
package PPIx::XPath::Attr; ## no critic(ProhibitMultiplePackages)
use strict;
use warnings;
sub new {
my ($class,$parent,$name)=@_;
my $meth=$parent->can($name);
return unless $meth;
my $value;
eval {$value=$meth->($parent);1} or return;
return unless defined $value;
return bless {parent=>$parent,name=>$name,value=>$value},$class;
}
sub xpath_get_name { $_[0]->{name} }
sub xpath_get_root_node { $_[0]->{parent}->top }
sub xpath_get_parent_node { $_[0]->{parent} }
sub xpath_is_element_node { 0 }
sub xpath_is_attribute_node { 1 }
sub xpath_is_document_node { 0 }
sub xpath_to_literal { $_[0]->{value} }
sub xpath_get_value { $_[0]->{value} }
sub xpath_string_value { $_[0]->{value} }
sub xpath_to_number { Tree::XPathEngine::Number->new($_[0]->{value}) }
sub xpath_cmp {
my( $a, $b)= @_;
if ( UNIVERSAL::isa( $b, 'PPIx::XPath::Attr')) {
# 2 attributes, compare their elements, then their name
return ($a->{parent}->_xpath_elt_cmp( $b->{parent}) )
|| ($a->{name} cmp $b->{name});
}
elsif ( UNIVERSAL::isa( $b, 'PPI::Document')) {
# att <=> document, att is after document
return 1;
}
else {
# att <=> elt : compare the att->elt and the elt
# if att->elt is the elt (cmp returns 0) then 1 (elt is before att)
return ($a->{parent}->_xpath_elt_cmp( $b) ) || 1 ;
}
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
PPIx::XPath - an XPath implementation for the PDOM
=head1 VERSION
version 2.02
=head1 SYNOPSIS
use PPI;
use PPIx::XPath;
use Tree::XPathEngine;
my $pdom = PPI::Document->new('some_code.pl');
my $xpath = Tree::XPathEngine->new();
my @subs = $xpath->findnodes('//Statement-Sub',$pdom);
my @vars = $xpath->findnodes('//Token-Symbol',$pdom);
Deprecated interface, backward-compatible with C<PPIx::XPath> version
1:
use PPIx::XPath;
my $pxp = PPIx::XPath->new("some_code.pl");
my @subs = $pxp->match("//Statement::Sub");
my $vars = $pxp->match("//Token::Symbol");
=head1 DESCRIPTION
This module augments L<PPI>'s classes with the methods required by
L<Tree::XPathEngine>, allowing you to perform complex XPath matches
against any PDOM tree.
See L<Tree::XPathEngine> for details about its methods.
=head2 Mapping the PDOM to the XPath data model
=over 4
=item *
Each node in the PDOM is an element as seen by XPath
=item *
The name of the element is the class name of the node, minus the
initial C<PPI::>, with C<::> replaced by C<->. That is:
($xpath_name = substr($pdom_node->class,5)) =~ s/::/-/g;
=item *
Only "significant" nodes are seen by XPath
=item *
all scalar-valued accessors of PDOM nodes are visible as attributes
=item *
"here-docs" contents are I<not> mapped
=back
=head1 METHODS
=head2 C<new>
my $pxp = PPIx::XPath->new("some_code.pl");
my $pxp = PPIx::XPath->new($pdom);
Only useful for the backward-compatible, and deprecated, interface.
Returns an instance of C<PPIx::XPath> tied to the given document.
=head2 C<clean_xpath_expr>
my $new_xpath_expr = $pxp->clean_xpath_expr($old_xpath_expr);
C<PPIx::XPath> version 1.0.0 allowed the use of partial package names
(like C<Token::Number>) as element names: this collides with the axis
specification of proper XPath. For this reason, in newer version of
C<PPIx::XPath>, the element name is the class name of the PDOM node,
minus the initial C<PPI::>, with C<::> replaced by C<-> (like
C<Token-Number>).
This method replaces all occurrences of PPI package names in the given
string with the new names.
=head2 C<match>
my @subs = $pxp->match("//Statement::Sub");
my $vars = $pxp->match("//Token::Symbol");
Only useful for the backward-compatible, and deprecated,
interface. From the document this instance was built against, returns
the nodes that match the given XPath expression.
You should not use this method, you should call L<<
C<findnodes>|Tree::XPathEngine/findnodes >> instead:
my $xpath = Tree::XPathEngine->new();
my @subs = $xpath->findnodes('//Statement-Sub',$pdom);
my @vars = $xpath->findnodes('//Token-Symbol',$pdom);
=head1 BUGS and LIMITATIONS
=over 4
=item *
"here-docs" contents are I<not> mapped
=item *
node ordering is slow, because I could not find a way in PPI to
compare two nodes for document order; suggestions are most welcome
=back
=head1 SEE ALSO
L<PPI>
L<Tree::XPathEngine>
L<http://www.w3.org/TR/xpath> (the XPath specification)
=head1 AUTHORS
Dan Brook <cpan@broquaint.com> original author
Gianni Ceccarelli <dakkar@thenautilus.net> Tree::XPathEngine-based re-implementation
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2016 by Gianni Ceccarelli <dakkar@thenautilus.net>.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut