# Copyrights 2008-2013 by [Mark Overmeer].
# For other contributors see ChangeLog.
# See the manual pages for details on the licensing terms.
# Pod stripped from pm file by OODoc 2.01.
package XML::LibXML::Simple;
use vars '$VERSION';
$VERSION = '0.93';
use base 'Exporter';
use strict;
use warnings;
our @EXPORT = qw(XMLin);
our @EXPORT_OK = qw(xml_in);
use XML::LibXML ();
use File::Slurp qw/read_file/;
use File::Basename qw/fileparse/;
use File::Spec ();
use Carp;
use Data::Dumper; #to be removed
my %known_opts = map { ($_ => 1) }
qw(keyattr keeproot forcecontent contentkey noattr searchpath
forcearray grouptags nsexpand normalisespace normalizespace
valueattr nsstrip parser parseropts);
my @DefKeyAttr = qw(name key id);
my $DefContentKey = qq(content);
sub new(@)
{ my $class = shift;
my $self = bless {}, $class;
my $opts = $self->{opts} = $self->_take_opts(@_);
# parser object cannot be reused
!defined $opts->{parser}
or error __x"parser option for XMLin only";
$self;
}
sub XMLin
{ my $self = @_ > 1 && UNIVERSAL::isa($_[0], __PACKAGE__) ? shift
: __PACKAGE__->new;
my $target = shift;
my $this = $self->_take_opts(@_);
my $opts = $self->_init($self->{opts}, $this);
my $xml = $self->_get_xml($target, $opts)
or return;
my $top = $self->collapse($xml, $opts);
if($opts->{keeproot})
{ my $subtop
= $opts->{forcearray_always} && ref $top ne 'ARRAY' ? [$top] : $top;
$top = { $xml->localName => $subtop };
}
$top;
}
*xml_in = \&XMLin;
sub _get_xml($$)
{ my ($self, $source, $opts) = @_;
$source = $self->default_data_source($opts)
unless defined $source;
$source = \*STDIN
if $source eq '-';
my $parser = $opts->{parser}
|| $self->_create_parser($opts->{parseropts});
my $xml
= UNIVERSAL::isa($source,'XML::LibXML::Document') ? $source
: UNIVERSAL::isa($source,'XML::LibXML::Element' ) ? $source
: ref $source eq 'SCALAR' ? $parser->parse_string($$source)
: ref $source ? $parser->parse_fh($source)
: $source =~ m{^\s*\<.*?\>\s*$}s ? $parser->parse_string($source)
: $parser->parse_file
($self->find_xml_file($source, @{$opts->{searchpath}}));
$xml = $xml->documentElement
if $xml->isa('XML::LibXML::Document');
$xml;
}
sub _create_parser(@)
{ my $self = shift;
my @popt = @_ != 1 ? @_ : ref $_[0] eq 'HASH' ? %{$_[0]} : @{$_[0]};
XML::LibXML->new
( line_numbers => 1
, no_network => 1
, expand_xinclude => 0
, expand_entities => 1
, load_ext_dtd => 0
, ext_ent_handler =>
sub { alert __x"parsing external entities disabled"; '' }
, @popt
);
}
sub _take_opts(@)
{ my $self = shift;
my %opts;
@_ % 2==0
or die "ERROR: odd number of options.\n";
while(@_)
{ my ($key, $val) = (shift, shift);
my $lkey = lc $key;
$lkey =~ s/_//g;
$known_opts{$lkey} or croak "Unrecognised option: $key";
$opts{$lkey} = $val;
}
\%opts;
}
# Returns the name of the XML file to parse if no filename or XML string
# was provided explictly.
sub default_data_source($)
{ my ($self, $opts) = @_;
my ($basename, $script_dir, $ext) = fileparse $0, qr[\.[^\.]+];
# Add script directory to searchpath
unshift @{$opts->{searchpath}}, $script_dir
if $script_dir;
"$basename.xml";
}
sub _init($$)
{ my ($self, $global, $this) = @_;
my %opt = (%$global, %$this);
if(defined $opt{contentkey})
{ $opt{collapseagain} = $opt{contentkey} =~ s/^\-// }
else { $opt{contentkey} = $DefContentKey }
$opt{normalisespace} ||= $opt{normalizespace} || 0;
$opt{searchpath} ||= [];
ref $opt{searchpath} eq 'ARRAY'
or $opt{searchpath} = [ $opt{searchpath} ];
my $fa = delete $opt{forcearray} || 0;
my (@fa_regex, %fa_elem);
if(ref $fa)
{ foreach (ref $fa eq 'ARRAY' ? @$fa : $fa)
{ if(ref $_ eq 'Regexp') { push @fa_regex, $_ }
else { $fa_elem{$_} = 1 }
}
}
else { $opt{forcearray_always} = $fa }
$opt{forcearray_regex} = \@fa_regex;
$opt{forcearray_elem} = \%fa_elem;
# Special cleanup for {keyattr} which could be arrayref or hashref,
# which behave differently.
my $ka = $opt{keyattr} || \@DefKeyAttr;
$ka = [ $ka ] unless ref $ka;
if(ref $ka eq 'ARRAY')
{ if(@$ka) { $opt{keyattr} = $ka }
else { delete $opt{keyattr} }
}
elsif(ref $ka eq 'HASH')
{ # Convert keyattr => { elem => '+attr' }
# to keyattr => { elem => [ 'attr', '+' ] }
my %at;
while(my($k,$v) = each %$ka)
{ $v =~ /^(\+|-)?(.*)$/;
$at{$k} = [ $2, $1 || '' ];
}
$opt{keyattr} = \%at;
}
# Special cleanup for {valueattr} which could be arrayref or hashref
my $va = delete $opt{valueattr} || {};
$va = +{ map +($_ => 1), @$va } if ref $va eq 'ARRAY';
$opt{valueattrlist} = $va;
# make sure there's nothing weird in {grouptags}
!$opt{grouptags} || ref $opt{grouptags} eq 'HASH'
or croak "Illegal value for 'GroupTags' option -expected a hashref";
$opt{parseropts} ||= {};
\%opt;
}
sub find_xml_file($@)
{ my ($self, $file) = (shift, shift);
my @search_path = @_ ? @_ : '.';
my ($filename, $filedir) = fileparse $file;
if($filename eq $file)
{ foreach my $path (@search_path)
{ my $fullpath = File::Spec->catfile($path, $file);
return $fullpath if -e $fullpath;
}
}
elsif(-e $file) # Ignore searchpath if dir component
{ return $file;
}
local $" = ':';
die "data source $file not found in @search_path\n";
}
sub _add_kv($$$$)
{ my ($d, $k, $v, $opts) = @_;
if(defined $d->{$k})
{ # Combine duplicate attributes into arrayref if required
if(ref $d->{$k} eq 'ARRAY') { push @{$d->{$k}}, $v }
else { $d->{$k} = [ $d->{$k}, $v ] } }
elsif(ref $v eq 'ARRAY') { push @{$d->{$k}}, $v }
elsif(ref $v eq 'HASH'
&& $k ne $opts->{contentkey}
&& $opts->{forcearray_always}) { push @{$d->{$k}}, $v }
elsif($opts->{forcearray_elem}{$k}
|| grep $k =~ $_, @{$opts->{forcearray_regex}}
) { push @{$d->{$k}}, $v }
else { $d->{$k} = $v }
$d->{$k};
}
# Takes the parse tree that XML::LibXML::Parser produced from the supplied
# XML and recurse through it 'collapsing' unnecessary levels of indirection
# (nested arrays etc) to produce a data structure that is easier to work with.
sub _expand_name($)
{ my $node = shift;
my $uri = $node->namespaceURI || '';
(length $uri ? "{$uri}" : '') . $node->localName;
}
sub collapse($$)
{ my ($self, $xml, $opts) = @_;
$xml->isa('XML::LibXML::Element') or return;
my (%data, $text);
unless($opts->{noattr})
{ foreach my $attr ($xml->attributes)
{ my $value = $attr->value;
$value = $self->normalise_space($value)
if $opts->{normalisespace}==2;
my $n = !$attr->isa('XML::LibXML::Attr') ? $attr->nodeName
: $opts->{nsexpand} ? _expand_name($attr)
: $opts->{nsstrip} ? $attr->localName
: $attr->nodeName;
_add_kv \%data, $n, $value, $opts;
}
}
my $nr_attrs = keys %data;
my $nr_elems = 0;
CHILD:
foreach my $child ($xml->childNodes)
{
if($child->isa('XML::LibXML::Element'))
{ $nr_elems++;
my $v = $self->collapse($child, $opts);
my $n = $opts->{nsexpand} ? _expand_name($child)
: $opts->{nsstrip} ? $child->localName
: $child->nodeName;
_add_kv \%data, $n, $v, $opts if defined $v;
}
elsif($child->isa('XML::LibXML::Text'))
{ $text .= $child->data;
}
}
$text = $self->normalise_space($text)
if defined $text && $opts->{normalisespace}==2;
return $opts->{forcecontent} ? { $opts->{contentkey} => $text } : $text
if $nr_attrs+$nr_elems==0 && defined $text;
$data{$opts->{contentkey}} = $text
if defined $text && $nr_elems==0;
# Roll up 'value' attributes (but only if no nested elements)
if(keys %data==1)
{ my ($k) = keys %data;
return $data{$k} if $opts->{valueattrlist}{$k};
}
# Turn arrayrefs into hashrefs if key fields present
if($opts->{keyattr})
{ while(my ($key, $val) = each %data)
{ $data{$key} = $self->array_to_hash($key, $val, $opts)
if ref $val eq 'ARRAY';
}
}
# disintermediate grouped tags
if(my $gr = $opts->{grouptags})
{
ELEMENT:
while(my ($key, $val) = each %data)
{ my $sub = $gr->{$key} or next;
if(ref $val eq 'ARRAY')
{ next ELEMENT
if grep { keys %$_!=1 || !exists $_->{$sub} } @$val;
$data{$key} = { map { %{$_->{$sub}} } @$val };
}
else
{ ref $val eq 'HASH' && keys %$val==1 or next;
my ($child_key, $child_val) = %$val;
$data{$key} = $child_val
if $gr->{$key} eq $child_key;
}
}
}
# Fold hashes containing a single anonymous array up into just the array
return $data{anon}
if keys %data == 1
&& exists $data{anon}
&& ref $data{anon} eq 'ARRAY';
# Roll up named elements with named nested 'value' attributes
if(my $va = $opts->{valueattrlist})
{ while(my($key, $val) = each %data)
{ $va->{$key} && ref $val eq 'HASH' && keys %$val==1 or next;
$data{$key} = $val->{$va->{$key}};
}
}
$nr_elems+$nr_attrs ? \%data
: !defined $text ? {}
: $opts->{forcecontent} ? { $opts->{contentkey} => $text }
: $text;
}
sub normalise_space($)
{ my $self = shift;
local $_ = shift;
s/^\s+//s;
s/\s+$//s;
s/\s\s+/ /sg;
$_;
}
# Attempts to 'fold' an array of hashes into an hash of hashes. Returns a
# reference to the hash on success or the original array if folding is
# not possible. Behaviour is controlled by 'keyattr' option.
#
sub array_to_hash($$$$)
{ my ($self, $name, $in, $opts) = @_;
my %out;
my $ka = $opts->{keyattr} or return $in;
if(ref $ka eq 'HASH')
{ my $newkey = $ka->{$name} or return $in;
my ($key, $flag) = @$newkey;
foreach my $h (@$in)
{ unless(ref $h eq 'HASH' && defined $h->{$key})
{ warn "<$name> element has no '$key' key attribute\n" if $^W;
return $in;
}
my $val = $h->{$key};
if(ref $val)
{ warn "<$name> element has non-scalar '$key' key attribute\n" if $^W;
return $in;
}
$val = $self->normalise_space($val)
if $opts->{normalisespace}==1;
warn "<$name> element has non-unique value in '$key' "
. "key attribute: $val\n" if $^W && defined $out{$val};
$out{$val} = { %$h };
$out{$val}{"-$key"} = $out{$val}{$key} if $flag eq '-';
delete $out{$val}{$key} if $flag ne '+';
}
}
else # Arrayref
{ my $default_keys = "@DefKeyAttr" eq "@$ka";
ELEMENT:
foreach my $h (@$in)
{ ref $h eq 'HASH' or return $in;
foreach my $key (@$ka)
{ my $val = $h->{$key};
defined $val or next;
if(ref $val)
{ warn "<$name> element has non-scalar '$key' key attribute"
if $^W && ! $default_keys;
return $in;
}
$val = $self->normalise_space($val)
if $opts->{normalisespace} == 1;
warn "<$name> element has non-unique value in '$key' "
. "key attribute: $val" if $^W && $out{$val};
$out{$val} = { %$h };
delete $out{$val}{$key};
next ELEMENT;
}
return $in; # No keyfield matched
}
}
$opts->{collapseagain}
or return \%out;
# avoid over-complicated structures like
# dir => { libexecdir => { content => '$exec_prefix/libexec' },
# localstatedir => { content => '$prefix' },
# }
# into
# dir => { libexecdir => '$exec_prefix/libexec',
# localstatedir => '$prefix',
# }
my $contentkey = $opts->{contentkey};
# first go through the values, checking that they are fit to collapse
foreach my $v (values %out)
{ next if ref $v eq 'HASH' && keys %$v == 1 && $v->{$contentkey};
return \%out;
}
$out{$_} = $out{$_}{$contentkey} for keys %out;
\%out;
}
1;
__END__