# Copyright (c) 2004 Timothy Appnel
# http://www.timaoutloud.org/
# This code is released under the Artistic License.
#
# XML::Parser::Style::Elemental - A slightly more advanced object
# tree style for XML::Parser.
#
package XML::Parser::Style::Elemental;
use strict;
use vars qw($VERSION);
$VERSION = '0.50';
sub Init {
my $xp = shift;
$xp->{Elemental} ||= {};
my $e = $xp->{Elemental};
$e->{Document} ?
eval "use $e->{Document};" :
compile_class($xp,'Document');
$e->{Element} ?
eval "use $e->{Element};" :
compile_class($xp,'Element');
$e->{Characters} ?
eval "use $e->{Characters};" :
compile_class($xp,'Characters');
$xp->{__doc} = $e->{Document}->new();
push( @{ $xp->{__stack} }, $xp->{__doc} );
# $xp->{__NSMAP} = {} if ($xp->{NSMap});
}
sub Start {
my $xp = shift;
my $tag = shift;
my $node = $xp->{Elemental}->{Element}->new();
$node->name( ns_qualify($xp,$tag) );
$node->parent( $xp->{__stack}->[-1] );
if (@_) {
$node->attributes({});
while (@_) {
my($key,$value) = (shift @_,shift @_);
$node->attributes->{ns_qualify($xp,$key,$tag)} = $value
}
}
$node->parent->contents([]) unless $node->parent->contents;
push( @{ $node->parent->contents }, $node);
push( @{ $xp->{__stack} }, $node);
#if ($xp->{NSMap} && $xp->new_ns_prefixes) {
# my %newns;
# map { $newns{$_} = $xp->expand_ns_prefix($_) }
# $xp->new_ns_prefixes;
# $xp->{__NSMAP}->{$node} = \%newns;
#}
}
sub Char {
my ($xp,$data)=@_;
my $parent = $xp->{__stack}->[-1];
$parent->contents([]) unless $parent->contents;
my $contents = $parent->contents();
my $class = $xp->{Elemental}->{Characters};
unless ($contents && ref($contents->[-1]) eq $class) {
return if ($xp->{Elemental}->{No_Whitespace} && $data!~/\S/);
my $node = $class->new();
$node->parent($parent);
$node->data($data);
push ( @{ $contents }, $node );
} else {
my $d = $contents->[-1]->data() || '';
return if ( $xp->{Elemental}->{No_Whitespace} && $d!~/\S/ );
$contents->[-1]->data("$d$data");
}
}
sub End { pop( @{ $_[0]->{__stack} } ) }
sub Final {
delete $_[0]->{__stack};
$_[0]->{__doc}; # , $_[0]->{__NSMAP};
}
sub ns_qualify {
return $_[1] unless $_[0]->{Namespaces};
my $ns=$_[0]->namespace($_[1]) ||
( $_[2] ? $_[0]->namespace($_[2]) : return $_[1] );
$ns=~m!(/|#)$! ? "$ns$_[1]" : "$ns/$_[1]";
}
#--- Dynamic Class Factory
{
my $methods = {
# All get root methods through special handling
# Element gets a text_content method also through
# special handling
Document => [ qw(contents) ],
Element => [ qw(name parent contents attributes) ],
Characters => [ qw(parent data) ]
};
sub compile_class {
my $xp = shift;
my $type = shift;
my $class = "${$xp}{Pkg}::$type";
no strict 'refs';
*{"${class}::new"} = sub { bless { }, $class };
foreach my $field ( @{$methods->{$type}} ) {
*{"${class}::${field}"} =
sub {
$_[0]->{$field} = $_[1] if defined $_[1];
$_[0]->{$field};
};
}
*{"${class}::root"} =
sub {
my $o=shift;
while($o->can('parent') && $o->parent) { $o = $o->parent }
$o;
};
if ($type eq 'Element') {
*{"${class}::text_content"} =
sub {
return '' unless ref($_[0]->contents);
join('', map { ref($_) eq $class ?
$_->text_content : $_->data }
@{ $_[0]->contents } );
};
}
$xp->{Elemental}->{$type} = $class;
}
}
1;
__END__
=begin
=head1 NAME
XML::Parser::Style::Elemental - a slightly more advanced and flexible
object tree style for XML::Parser
=head1 SYNOPSIS
#!/usr/bin/perl -w
use XML::Parser;
use Data::Dumper;
my $p = XML::Parser->new( Style => 'Elemental', Pkg => 'E' );
my $doc = <<DOC;
<foo>
<bar key="value">The world is foo enough.</bar>
</foo>
DOC
my ($e) = $p->parse($doc);
print Data::Dumper->Dump( [$e] );
my $test_node = $e->contents->[0];
print "root: ".$test_node->root." is ".$e."\n";
print "text content of ".$test_node->name."\n";
print $test_node->text_content;
=head1 DESCRIPTION
This module is similar to the L<XML::Parser> Objects style, but
slightly more advanced and flexible. Like the Objects style, an
object is created for each element. Elemental uses a dynamic class
factory to create objects with accessor methods or can use any
supplied classes that support the same method signatures. This module
also provides full namespace support when the C<Namespace> option is in
use in addition to a C<No_Whitespace> option for stripping out
extraneous non-markup characters that are commonly introduced
when formatting XML to be human readable.
=head1 CLASS TYPES
Elemental style creates its parse tree with three class types --
Document, Element and Character. Developers have the option
of using the built-in dynamic classes or registering their own.
The following explains the purpose and method prototypes of each
class type.
=item Document - The root of the tree.
=over 4
=item contents - An array reference of direct decendents.
=item root - Return reference of itself.
=back
=item Element - The tags in the document.
=over 4
=item name - The tag name. If the Namespace options is set to true,
the extend name is stored.
=item parent - A reference to the parent object.
=item contents - An ordered array reference of direct
descendents/children objects.
=item attributes - A hash reference of key-value pairs representing
the tags attributes.
=item text_content - The text content of all siblings, whitespace included.
=item root - A reference to the Document object.
=back
=item Characters - Non-markup text.
=over 4
=item data - A string of non-markup characters.
=item parent - A reference to the parent object.
=item root - A reference to the Document object.
=back
=head1 OPTIONS
Elemental specific options are set in the L<XML::Parser> constructor
through a hash element with a key of 'Elemental', The value of
Elemental is expected to be a hash reference with one of more of the
option keys detailed in the following sections.
=head2 USING DYNAMIC CLASS OBJECTS
When parsing a document, Elemental uses a dynamic class factory to
create minimal lightweight objects with accessor methods. These
classes implement the pattern detailed in L<CLASS TYPES> in addition
to a parameterless constructor method of C<new>. Similar to the
Objects style these classes are blessed into the package set with
the C<Pkg> option.
Here we create a parser that uses Elemental to create Document, Element
and Characters objects in the E package.
my $p = XML::Parser->new( Style => 'Elemental', Pkg => 'E' );
=head2 REGISTERING CLASSES
If you require something more functional then the generated dynamic
classes you can register your own with Elemental. Like the Elemental
class types, the option keys are C<Document>, C<Element> and
C<Characters>. Here we register three classes and turn on the
C<No_Whitespace> option.
my $p = XML::Parser->new( Style => 'Elemental',
Namespace => 1,
Elemental=>{
Document=>'Foo::Doc',
Element=>'Foo::El',
Characters=>'Foo::Chars',
No_Whitespace=>1
}
);
Note that, the same class can be registered for more then one class type as long
as it supports all of the necessary method prototypes it is being
registered to handle. See L<CLASS TYPES> for more detail.
=head2 NO_WHITESPACE
When set to true, C<No_Whitespace> causes Elemental to pass over character
strings of all whitespace instead of creating a new Character object. This
options is helpful in stripping out extraneous non-markup characters that
are commonly introduced when formatting XML to be human readable.
=head1 SEE ALSO
L<XML::Parser::Style::Objects>
=head1 TO DO
=item * Implement xml::base support instead of No_Whitespace.
=head1 LICENSE
The software is released under the Artistic License. The terms of
the Artistic License are described at
L<http://www.perl.com/language/misc/Artistic.html>.
=head1 AUTHOR & COPYRIGHT
Except where otherwise noted, XML::Parser::Style::Elemental is
Copyright 2004, Timothy Appnel, cpan@timaoutloud.org. All rights
reserved.
=cut
=end