@@ -1,5 +1,10 @@
# XML::TreePP Changes
+2014/11/17 (0.43)
+ * Sync OpenBSD patches from their Ports tree. thanks to kucharskim
+ https://github.com/kawanet/XML-TreePP/pull/3
+ * README.md added
+
2013/11/07 (0.42)
* add empty_element_tag_end option. thanks to Songmu
https://github.com/kawanet/XML-TreePP/pull/2
@@ -3,6 +3,7 @@ MANIFEST
META.yml
Makefile.PL
README
+README.md
example/envxml.cgi
lib/XML/TreePP.pm
make-dist.sh
@@ -38,5 +38,5 @@
}
},
"release_status" : "stable",
- "version" : "0.42"
+ "version" : "0.43"
}
@@ -20,4 +20,4 @@ no_index:
requires:
LWP: 5.811
Test::More: 0
-version: 0.42
+version: 0.43
@@ -381,6 +381,9 @@ OPTIONS FOR BOTH
AUTHOR
Yusuke Kawasaki, http://www.kawa.net/
+REPOSITORY
+ https://github.com/kawanet/XML-TreePP
+
COPYRIGHT
The following copyright notice applies to all the files provided in this
distribution, including binary files, unless explicitly noted otherwise.
@@ -0,0 +1,437 @@
+# NAME
+
+XML::TreePP -- Pure Perl implementation for parsing/writing XML documents
+
+# SYNOPSIS
+
+parse an XML document from file into hash tree:
+
+ use XML::TreePP;
+ my $tpp = XML::TreePP->new();
+ my $tree = $tpp->parsefile( "index.rdf" );
+ print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n";
+ print "URL: ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n";
+
+write an XML document as string from hash tree:
+
+ use XML::TreePP;
+ my $tpp = XML::TreePP->new();
+ my $tree = { rss => { channel => { item => [ {
+ title => "The Perl Directory",
+ link => "http://www.perl.org/",
+ }, {
+ title => "The Comprehensive Perl Archive Network",
+ link => "http://cpan.perl.org/",
+ } ] } } };
+ my $xml = $tpp->write( $tree );
+ print $xml;
+
+get a remote XML document by HTTP-GET and parse it into hash tree:
+
+ use XML::TreePP;
+ my $tpp = XML::TreePP->new();
+ my $tree = $tpp->parsehttp( GET => "http://use.perl.org/index.rss" );
+ print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n";
+ print "URL: ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n";
+
+get a remote XML document by HTTP-POST and parse it into hash tree:
+
+ use XML::TreePP;
+ my $tpp = XML::TreePP->new( force_array => [qw( item )] );
+ my $cgiurl = "http://search.hatena.ne.jp/keyword";
+ my $keyword = "ajax";
+ my $cgiquery = "mode=rss2&word=".$keyword;
+ my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery );
+ print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n";
+ print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n";
+
+# DESCRIPTION
+
+XML::TreePP module parses an XML document and expands it for a hash tree.
+This generates an XML document from a hash tree as the opposite way around.
+This is a pure Perl implementation and requires no modules depended.
+This can also fetch and parse an XML document from remote web server
+like the XMLHttpRequest object does at JavaScript language.
+
+# EXAMPLES
+
+## Parse XML file
+
+Sample XML document:
+
+ <?xml version="1.0" encoding="UTF-8"?>
+ <family name="Kawasaki">
+ <father>Yasuhisa</father>
+ <mother>Chizuko</mother>
+ <children>
+ <girl>Shiori</girl>
+ <boy>Yusuke</boy>
+ <boy>Kairi</boy>
+ </children>
+ </family>
+
+Sample program to read a xml file and dump it:
+
+ use XML::TreePP;
+ use Data::Dumper;
+ my $tpp = XML::TreePP->new();
+ my $tree = $tpp->parsefile( "family.xml" );
+ my $text = Dumper( $tree );
+ print $text;
+
+Result dumped:
+
+ $VAR1 = {
+ 'family' => {
+ '-name' => 'Kawasaki',
+ 'father' => 'Yasuhisa',
+ 'mother' => 'Chizuko',
+ 'children' => {
+ 'girl' => 'Shiori'
+ 'boy' => [
+ 'Yusuke',
+ 'Kairi'
+ ],
+ }
+ }
+ };
+
+Details:
+
+ print $tree->{family}->{father}; # the father's given name.
+
+The prefix '-' is added on every attribute's name.
+
+ print $tree->{family}->{"-name"}; # the family name of the family
+
+The array is used because the family has two boys.
+
+ print $tree->{family}->{children}->{boy}->[1]; # The second boy's name
+ print $tree->{family}->{children}->{girl}; # The girl's name
+
+## Text node and attributes:
+
+If a element has both of a text node and attributes
+or both of a text node and other child nodes,
+value of a text node is moved to `#text` like child nodes.
+
+ use XML::TreePP;
+ use Data::Dumper;
+ my $tpp = XML::TreePP->new();
+ my $source = '<span class="author">Kawasaki Yusuke</span>';
+ my $tree = $tpp->parse( $source );
+ my $text = Dumper( $tree );
+ print $text;
+
+The result dumped is following:
+
+ $VAR1 = {
+ 'span' => {
+ '-class' => 'author',
+ '#text' => 'Kawasaki Yusuke'
+ }
+ };
+
+The special node name of `#text` is used because this elements
+has attribute(s) in addition to the text node.
+See also ["text\_node\_key"](#text_node_key) option.
+
+# METHODS
+
+## new
+
+This constructor method returns a new XML::TreePP object with `%options`.
+
+ $tpp = XML::TreePP->new( %options );
+
+## set
+
+This method sets a option value for `option_name`.
+If `$option_value` is not defined, its option is deleted.
+
+ $tpp->set( option_name => $option_value );
+
+See OPTIONS section below for details.
+
+## get
+
+This method returns a current option value for `option_name`.
+
+ $tpp->get( 'option_name' );
+
+## parse
+
+This method reads an XML document by string and returns a hash tree converted.
+The first argument is a scalar or a reference to a scalar.
+
+ $tree = $tpp->parse( $source );
+
+## parsefile
+
+This method reads an XML document by file and returns a hash tree converted.
+The first argument is a filename.
+
+ $tree = $tpp->parsefile( $file );
+
+## parsehttp
+
+This method receives an XML document from a remote server via HTTP and
+returns a hash tree converted.
+
+ $tree = $tpp->parsehttp( $method, $url, $body, $head );
+
+`$method` is a method of HTTP connection: GET/POST/PUT/DELETE
+`$url` is an URI of an XML file.
+`$body` is a request body when you use POST method.
+`$head` is a request headers as a hash ref.
+[LWP::UserAgent](https://metacpan.org/pod/LWP::UserAgent) module or [HTTP::Lite](https://metacpan.org/pod/HTTP::Lite) module is required to fetch a file.
+
+ ( $tree, $xml, $code ) = $tpp->parsehttp( $method, $url, $body, $head );
+
+In array context, This method returns also raw XML document received
+and HTTP response's status code.
+
+## write
+
+This method parses a hash tree and returns an XML document as a string.
+
+ $source = $tpp->write( $tree, $encode );
+
+`$tree` is a reference to a hash tree.
+
+## writefile
+
+This method parses a hash tree and writes an XML document into a file.
+
+ $tpp->writefile( $file, $tree, $encode );
+
+`$file` is a filename to create.
+`$tree` is a reference to a hash tree.
+
+# OPTIONS FOR PARSING XML
+
+This module accepts option parameters following:
+
+## force\_array
+
+This option allows you to specify a list of element names which
+should always be forced into an array representation.
+
+ $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] );
+
+The default value is null, it means that context of the elements
+will determine to make array or to keep it scalar or hash.
+Note that the special wildcard name `'*'` means all elements.
+
+## force\_hash
+
+This option allows you to specify a list of element names which
+should always be forced into an hash representation.
+
+ $tpp->set( force_hash => [ 'item', 'image' ] );
+
+The default value is null, it means that context of the elements
+will determine to make hash or to keep it scalar as a text node.
+See also ["text\_node\_key"](#text_node_key) option below.
+Note that the special wildcard name `'*'` means all elements.
+
+## cdata\_scalar\_ref
+
+This option allows you to convert a cdata section into a reference
+for scalar on parsing an XML document.
+
+ $tpp->set( cdata_scalar_ref => 1 );
+
+The default value is false, it means that each cdata section is converted into a scalar.
+
+## user\_agent
+
+This option allows you to specify a HTTP\_USER\_AGENT string which
+is used by parsehttp() method.
+
+ $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' );
+
+The default string is `'XML-TreePP/#.##'`, where `'#.##'` is
+substituted with the version number of this library.
+
+## http\_lite
+
+This option forces pasrsehttp() method to use a [HTTP::Lite](https://metacpan.org/pod/HTTP::Lite) instance.
+
+ my $http = HTTP::Lite->new();
+ $tpp->set( http_lite => $http );
+
+## lwp\_useragent
+
+This option forces parsehttp() method to use a [LWP::UserAgent](https://metacpan.org/pod/LWP::UserAgent) instance.
+
+ my $ua = LWP::UserAgent->new();
+ $ua->timeout( 60 );
+ $ua->env_proxy;
+ $tpp->set( lwp_useragent => $ua );
+
+You may use this with [LWP::UserAgent::WithCache](https://metacpan.org/pod/LWP::UserAgent::WithCache).
+
+## base\_class
+
+This blesses class name for each element's hashref.
+Each class is named straight as a child class of it parent class.
+
+ $tpp->set( base_class => 'MyElement' );
+ my $xml = '<root><parent><child key="val">text</child></parent></root>';
+ my $tree = $tpp->parse( $xml );
+ print ref $tree->{root}->{parent}->{child}, "\n";
+
+A hash for <child> element above is blessed to `MyElement::root::parent::child`
+class. You may use this with [Class::Accessor](https://metacpan.org/pod/Class::Accessor).
+
+## elem\_class
+
+This blesses class name for each element's hashref.
+Each class is named horizontally under the direct child of `MyElement`.
+
+ $tpp->set( base_class => 'MyElement' );
+ my $xml = '<root><parent><child key="val">text</child></parent></root>';
+ my $tree = $tpp->parse( $xml );
+ print ref $tree->{root}->{parent}->{child}, "\n";
+
+A hash for <child> element above is blessed to `MyElement::child` class.
+
+## xml\_deref
+
+This option dereferences the numeric character references, like ë,
+漢, etc., in an XML document when this value is true.
+
+ $tpp->set( xml_deref => 1 );
+
+Note that, for security reasons and your convenient,
+this module dereferences the predefined character entity references,
+&, <, >, ' and ", and the numeric character
+references up to U+007F without xml\_deref per default.
+
+## require\_xml\_decl
+
+This option requires XML declaration at the top of XML document to parse.
+
+ $tpp->set( require_xml_decl => 1 );
+
+This will die when <?xml .../?> declration not found.
+
+# OPTIONS FOR WRITING XML
+
+## first\_out
+
+This option allows you to specify a list of element/attribute
+names which should always appears at first on output XML document.
+
+ $tpp->set( first_out => [ 'link', 'title', '-type' ] );
+
+The default value is null, it means alphabetical order is used.
+
+## last\_out
+
+This option allows you to specify a list of element/attribute
+names which should always appears at last on output XML document.
+
+ $tpp->set( last_out => [ 'items', 'item', 'entry' ] );
+
+## indent
+
+This makes the output more human readable by indenting appropriately.
+
+ $tpp->set( indent => 2 );
+
+This doesn't strictly follow the XML specification but does looks nice.
+
+## xml\_decl
+
+This module inserts an XML declaration on top of the XML document generated
+per default. This option forces to change it to another or just remove it.
+
+ $tpp->set( xml_decl => '' );
+
+## output\_encoding
+
+This option allows you to specify a encoding of the XML document generated
+by write/writefile methods.
+
+ $tpp->set( output_encoding => 'UTF-8' );
+
+On Perl 5.8.0 and later, you can select it from every
+encodings supported by Encode.pm. On Perl 5.6.x and before with
+Jcode.pm, you can use `Shift_JIS`, `EUC-JP`, `ISO-2022-JP` and
+`UTF-8`. The default value is `UTF-8` which is recommended encoding.
+
+## empty\_element\_tag\_end
+
+ $tpp->set( empty_element_tag_end => '>' );
+
+Set characters which close empty tag. The default value is ' />'.
+
+# OPTIONS FOR BOTH
+
+## utf8\_flag
+
+This makes utf8 flag on for every element's value parsed
+and makes it on for the XML document generated as well.
+
+ $tpp->set( utf8_flag => 1 );
+
+Perl 5.8.1 or later is required to use this.
+
+## attr\_prefix
+
+This option allows you to specify a prefix character(s) which
+is inserted before each attribute names.
+
+ $tpp->set( attr_prefix => '@' );
+
+The default character is `'-'`.
+Or set `'@'` to access attribute values like E4X, ECMAScript for XML.
+Zero-length prefix `''` is available as well, it means no prefix is added.
+
+## text\_node\_key
+
+This option allows you to specify a hash key for text nodes.
+
+ $tpp->set( text_node_key => '#text' );
+
+The default key is `#text`.
+
+## ignore\_error
+
+This module calls Carp::croak function on an error per default.
+This option makes all errors ignored and just returns.
+
+ $tpp->set( ignore_error => 1 );
+
+## use\_ixhash
+
+This option keeps the order for each element appeared in XML.
+[Tie::IxHash](https://metacpan.org/pod/Tie::IxHash) module is required.
+
+ $tpp->set( use_ixhash => 1 );
+
+This makes parsing performance slow.
+(about 100% slower than default)
+
+# AUTHOR
+
+Yusuke Kawasaki, http://www.kawa.net/
+
+# REPOSITORY
+
+https://github.com/kawanet/XML-TreePP
+
+# COPYRIGHT
+
+The following copyright notice applies to all the files provided in
+this distribution, including binary files, unless explicitly noted
+otherwise.
+
+Copyright 2006-2010 Yusuke Kawasaki
+
+# LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
@@ -419,6 +419,10 @@ This makes parsing performance slow.
Yusuke Kawasaki, http://www.kawa.net/
+=head1 REPOSITORY
+
+https://github.com/kawanet/XML-TreePP
+
=head1 COPYRIGHT
The following copyright notice applies to all the files provided in
@@ -440,7 +444,7 @@ use Carp;
use Symbol;
use vars qw( $VERSION );
-$VERSION = '0.42';
+$VERSION = '0.43';
my $XML_ENCODING = 'UTF-8';
my $INTERNAL_ENCODING = 'UTF-8';
@@ -635,7 +639,6 @@ sub parsehttp_lwp {
my $ua = $self->{lwp_useragent} if exists $self->{lwp_useragent};
if ( ! ref $ua ) {
$ua = LWP::UserAgent->new();
- $ua->timeout(10);
$ua->env_proxy();
$ua->agent( $self->{__user_agent} ) if defined $self->{__user_agent};
} else {
@@ -36,10 +36,15 @@ doit make disttest
name=`grep '^name:' META.yml | sed 's#^.*: *##; s#-#/#g;'`
main=`grep "$name.pm$" < MANIFEST | head -1`
[ "$main" == "" ] && die "main module is not found in MANIFEST"
+
doit pod2text $main > README~
diff README README~ > /dev/null || doit /bin/mv -f README~ README
/bin/rm -f README~
+doit pod2markdown $main > README.md~
+diff README.md README.md~ > /dev/null || doit /bin/mv -f README.md~ README.md
+/bin/rm -f README.md~
+
doit make dist
[ -d blib ] && doit /bin/rm -fr blib
[ -f pm_to_blib ] && doit /bin/rm -f pm_to_blib
@@ -21,10 +21,10 @@ sub parsehttp_get {
my $tpp = XML::TreePP->new();
my $name = ( $0 =~ m#([^/:\\]+)$# )[0];
$tpp->set( user_agent => "$name " );
- my $url = "http://use.perl.org/index.rss";
+ my $url = "http://rss.slashdot.org/Slashdot/slashdot";
my $tree = $tpp->parsehttp( GET => $url );
ok( ref $tree, $url );
- like( $tree->{"rdf:RDF"}->{channel}->{link}, qr{^http://}, "$url link" );
+ like( $tree->{"rss"}->{channel}->{link}, qr{^http://}, "$url link" );
}
# ----------------------------------------------------------------
sub parsehttp_post {
@@ -21,10 +21,10 @@ sub parsehttp_get {
my $tpp = XML::TreePP->new();
my $name = ( $0 =~ m#([^/:\\]+)$# )[0];
$tpp->set( user_agent => "$name " );
- my $url = "http://use.perl.org/index.rss";
+ my $url = "http://rss.slashdot.org/Slashdot/slashdot";
my $tree = $tpp->parsehttp( GET => $url );
ok( ref $tree, $url );
- like( $tree->{"rdf:RDF"}->{channel}->{link}, qr{^http://}, "$url link" );
+ like( $tree->{"rss"}->{channel}->{link}, qr{^http://}, "$url link" );
}
# ----------------------------------------------------------------
sub parsehttp_post {