Search the CPAN - metacpan.org

Changes	0 5
MANIFEST	0 1
META.json	1 1
META.yml	1 1
README	0 3
README.md	0 437
lib/XML/TreePP.pm	2 5
make-dist.sh	0 5
t/09_http-lite.t	2 2
t/10_http-lwp.t	2 2
10 files changed (This is a version diff)	8462

Changes

@@ -1,5 +1,10 @@
 # XML::TreePP Changes
 
+2014/11/17 (0.43)
+    * Sync OpenBSD patches from their Ports tree. thanks to kucharskim
+      https://github.com/kawanet/XML-TreePP/pull/3
+    * README.md added
+
 2013/11/07 (0.42)
     * add empty_element_tag_end option. thanks to Songmu
       https://github.com/kawanet/XML-TreePP/pull/2

MANIFEST

@@ -3,6 +3,7 @@ MANIFEST
 META.yml
 Makefile.PL
 README
+README.md
 example/envxml.cgi
 lib/XML/TreePP.pm
 make-dist.sh

META.json

@@ -38,5 +38,5 @@
       }
    },
    "release_status" : "stable",
-   "version" : "0.42"
+   "version" : "0.43"
 }

META.yml

@@ -20,4 +20,4 @@ no_index:
 requires:
   LWP: 5.811
   Test::More: 0
-version: 0.42
+version: 0.43

README

@@ -381,6 +381,9 @@ OPTIONS FOR BOTH
 AUTHOR
     Yusuke Kawasaki, http://www.kawa.net/
 
+REPOSITORY
+    https://github.com/kawanet/XML-TreePP
+
 COPYRIGHT
     The following copyright notice applies to all the files provided in this
     distribution, including binary files, unless explicitly noted otherwise.

README.md

@@ -0,0 +1,437 @@
+# NAME
+
+XML::TreePP -- Pure Perl implementation for parsing/writing XML documents
+
+# SYNOPSIS
+
+parse an XML document from file into hash tree:
+
+    use XML::TreePP;
+    my $tpp = XML::TreePP->new();
+    my $tree = $tpp->parsefile( "index.rdf" );
+    print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n";
+    print "URL:   ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n";
+
+write an XML document as string from hash tree:
+
+    use XML::TreePP;
+    my $tpp = XML::TreePP->new();
+    my $tree = { rss => { channel => { item => [ {
+        title   => "The Perl Directory",
+        link    => "http://www.perl.org/",
+    }, {
+        title   => "The Comprehensive Perl Archive Network",
+        link    => "http://cpan.perl.org/",
+    } ] } } };
+    my $xml = $tpp->write( $tree );
+    print $xml;
+
+get a remote XML document by HTTP-GET and parse it into hash tree:
+
+    use XML::TreePP;
+    my $tpp = XML::TreePP->new();
+    my $tree = $tpp->parsehttp( GET => "http://use.perl.org/index.rss" );
+    print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n";
+    print "URL:   ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n";
+
+get a remote XML document by HTTP-POST and parse it into hash tree:
+
+    use XML::TreePP;
+    my $tpp = XML::TreePP->new( force_array => [qw( item )] );
+    my $cgiurl = "http://search.hatena.ne.jp/keyword";
+    my $keyword = "ajax";
+    my $cgiquery = "mode=rss2&word=".$keyword;
+    my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery );
+    print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n";
+    print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n";
+
+# DESCRIPTION
+
+XML::TreePP module parses an XML document and expands it for a hash tree.
+This generates an XML document from a hash tree as the opposite way around.
+This is a pure Perl implementation and requires no modules depended.
+This can also fetch and parse an XML document from remote web server
+like the XMLHttpRequest object does at JavaScript language.
+
+# EXAMPLES
+
+## Parse XML file
+
+Sample XML document:
+
+    <?xml version="1.0" encoding="UTF-8"?>
+    <family name="Kawasaki">
+        <father>Yasuhisa</father>
+        <mother>Chizuko</mother>
+        <children>
+            <girl>Shiori</girl>
+            <boy>Yusuke</boy>
+            <boy>Kairi</boy>
+        </children>
+    </family>
+
+Sample program to read a xml file and dump it:
+
+    use XML::TreePP;
+    use Data::Dumper;
+    my $tpp = XML::TreePP->new();
+    my $tree = $tpp->parsefile( "family.xml" );
+    my $text = Dumper( $tree );
+    print $text;
+
+Result dumped:
+
+    $VAR1 = {
+        'family' => {
+            '-name' => 'Kawasaki',
+            'father' => 'Yasuhisa',
+            'mother' => 'Chizuko',
+            'children' => {
+                'girl' => 'Shiori'
+                'boy' => [
+                    'Yusuke',
+                    'Kairi'
+                ],
+            }
+        }
+    };
+
+Details:
+
+    print $tree->{family}->{father};        # the father's given name.
+
+The prefix '-' is added on every attribute's name.
+
+    print $tree->{family}->{"-name"};       # the family name of the family
+
+The array is used because the family has two boys.
+
+    print $tree->{family}->{children}->{boy}->[1];  # The second boy's name
+    print $tree->{family}->{children}->{girl};      # The girl's name
+
+## Text node and attributes:
+
+If a element has both of a text node and attributes
+or both of a text node and other child nodes,
+value of a text node is moved to `#text` like child nodes.
+
+    use XML::TreePP;
+    use Data::Dumper;
+    my $tpp = XML::TreePP->new();
+    my $source = '<span class="author">Kawasaki Yusuke</span>';
+    my $tree = $tpp->parse( $source );
+    my $text = Dumper( $tree );
+    print $text;
+
+The result dumped is following:
+
+    $VAR1 = {
+        'span' => {
+            '-class' => 'author',
+            '#text'  => 'Kawasaki Yusuke'
+        }
+    };
+
+The special node name of `#text` is used because this elements
+has attribute(s) in addition to the text node.
+See also ["text\_node\_key"](#text_node_key) option.
+
+# METHODS
+
+## new
+
+This constructor method returns a new XML::TreePP object with `%options`.
+
+    $tpp = XML::TreePP->new( %options );
+
+## set
+
+This method sets a option value for `option_name`.
+If `$option_value` is not defined, its option is deleted.
+
+    $tpp->set( option_name => $option_value );
+
+See OPTIONS section below for details.
+
+## get
+
+This method returns a current option value for `option_name`.
+
+    $tpp->get( 'option_name' );
+
+## parse
+
+This method reads an XML document by string and returns a hash tree converted.
+The first argument is a scalar or a reference to a scalar.
+
+        $tree = $tpp->parse( $source );
+
+## parsefile
+
+This method reads an XML document by file and returns a hash tree converted.
+The first argument is a filename.
+
+    $tree = $tpp->parsefile( $file );
+
+## parsehttp
+
+This method receives an XML document from a remote server via HTTP and
+returns a hash tree converted.
+
+    $tree = $tpp->parsehttp( $method, $url, $body, $head );
+
+`$method` is a method of HTTP connection: GET/POST/PUT/DELETE
+`$url` is an URI of an XML file.
+`$body` is a request body when you use POST method.
+`$head` is a request headers as a hash ref.
+[LWP::UserAgent](https://metacpan.org/pod/LWP::UserAgent) module or [HTTP::Lite](https://metacpan.org/pod/HTTP::Lite) module is required to fetch a file.
+
+    ( $tree, $xml, $code ) = $tpp->parsehttp( $method, $url, $body, $head );
+
+In array context, This method returns also raw XML document received
+and HTTP response's status code.
+
+## write
+
+This method parses a hash tree and returns an XML document as a string.
+
+    $source = $tpp->write( $tree, $encode );
+
+`$tree` is a reference to a hash tree.
+
+## writefile
+
+This method parses a hash tree and writes an XML document into a file.
+
+    $tpp->writefile( $file, $tree, $encode );
+
+`$file` is a filename to create.
+`$tree` is a reference to a hash tree.
+
+# OPTIONS FOR PARSING XML
+
+This module accepts option parameters following:
+
+## force\_array
+
+This option allows you to specify a list of element names which
+should always be forced into an array representation.
+
+    $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] );
+
+The default value is null, it means that context of the elements
+will determine to make array or to keep it scalar or hash.
+Note that the special wildcard name `'*'` means all elements.
+
+## force\_hash
+
+This option allows you to specify a list of element names which
+should always be forced into an hash representation.
+
+    $tpp->set( force_hash => [ 'item', 'image' ] );
+
+The default value is null, it means that context of the elements
+will determine to make hash or to keep it scalar as a text node.
+See also ["text\_node\_key"](#text_node_key) option below.
+Note that the special wildcard name `'*'` means all elements.
+
+## cdata\_scalar\_ref
+
+This option allows you to convert a cdata section into a reference
+for scalar on parsing an XML document.
+
+    $tpp->set( cdata_scalar_ref => 1 );
+
+The default value is false, it means that each cdata section is converted into a scalar.
+
+## user\_agent
+
+This option allows you to specify a HTTP\_USER\_AGENT string which
+is used by parsehttp() method.
+
+    $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' );
+
+The default string is `'XML-TreePP/#.##'`, where `'#.##'` is
+substituted with the version number of this library.
+
+## http\_lite
+
+This option forces pasrsehttp() method to use a [HTTP::Lite](https://metacpan.org/pod/HTTP::Lite) instance.
+
+    my $http = HTTP::Lite->new();
+    $tpp->set( http_lite => $http );
+
+## lwp\_useragent
+
+This option forces parsehttp() method to use a [LWP::UserAgent](https://metacpan.org/pod/LWP::UserAgent) instance.
+
+    my $ua = LWP::UserAgent->new();
+    $ua->timeout( 60 );
+    $ua->env_proxy;
+    $tpp->set( lwp_useragent => $ua );
+
+You may use this with [LWP::UserAgent::WithCache](https://metacpan.org/pod/LWP::UserAgent::WithCache).
+
+## base\_class
+
+This blesses class name for each element's hashref.
+Each class is named straight as a child class of it parent class.
+
+    $tpp->set( base_class => 'MyElement' );
+    my $xml  = '<root><parent><child key="val">text</child></parent></root>';
+    my $tree = $tpp->parse( $xml );
+    print ref $tree->{root}->{parent}->{child}, "\n";
+
+A hash for <child> element above is blessed to `MyElement::root::parent::child`
+class. You may use this with [Class::Accessor](https://metacpan.org/pod/Class::Accessor).
+
+## elem\_class
+
+This blesses class name for each element's hashref.
+Each class is named horizontally under the direct child of `MyElement`.
+
+    $tpp->set( base_class => 'MyElement' );
+    my $xml  = '<root><parent><child key="val">text</child></parent></root>';
+    my $tree = $tpp->parse( $xml );
+    print ref $tree->{root}->{parent}->{child}, "\n";
+
+A hash for <child> element above is blessed to `MyElement::child` class.
+
+## xml\_deref
+
+This option dereferences the numeric character references, like &#xEB;,
+&#28450;, etc., in an XML document when this value is true.
+
+    $tpp->set( xml_deref => 1 );
+
+Note that, for security reasons and your convenient,
+this module dereferences the predefined character entity references,
+&amp;, &lt;, &gt;, &apos; and &quot;, and the numeric character
+references up to U+007F without xml\_deref per default.
+
+## require\_xml\_decl
+
+This option requires XML declaration at the top of XML document to parse.
+
+    $tpp->set( require_xml_decl => 1 );
+
+This will die when <?xml .../?> declration not found.
+
+# OPTIONS FOR WRITING XML
+
+## first\_out
+
+This option allows you to specify a list of element/attribute
+names which should always appears at first on output XML document.
+
+    $tpp->set( first_out => [ 'link', 'title', '-type' ] );
+
+The default value is null, it means alphabetical order is used.
+
+## last\_out
+
+This option allows you to specify a list of element/attribute
+names which should always appears at last on output XML document.
+
+    $tpp->set( last_out => [ 'items', 'item', 'entry' ] );
+
+## indent
+
+This makes the output more human readable by indenting appropriately.
+
+    $tpp->set( indent => 2 );
+
+This doesn't strictly follow the XML specification but does looks nice.
+
+## xml\_decl
+
+This module inserts an XML declaration on top of the XML document generated
+per default. This option forces to change it to another or just remove it.
+
+    $tpp->set( xml_decl => '' );
+
+## output\_encoding
+
+This option allows you to specify a encoding of the XML document generated
+by write/writefile methods.
+
+    $tpp->set( output_encoding => 'UTF-8' );
+
+On Perl 5.8.0 and later, you can select it from every
+encodings supported by Encode.pm. On Perl 5.6.x and before with
+Jcode.pm, you can use `Shift_JIS`, `EUC-JP`, `ISO-2022-JP` and
+`UTF-8`. The default value is `UTF-8` which is recommended encoding.
+
+## empty\_element\_tag\_end
+
+    $tpp->set( empty_element_tag_end => '>' );
+
+Set characters which close empty tag. The default value is ' />'.
+
+# OPTIONS FOR BOTH
+
+## utf8\_flag
+
+This makes utf8 flag on for every element's value parsed
+and makes it on for the XML document generated as well.
+
+    $tpp->set( utf8_flag => 1 );
+
+Perl 5.8.1 or later is required to use this.
+
+## attr\_prefix
+
+This option allows you to specify a prefix character(s) which
+is inserted before each attribute names.
+
+    $tpp->set( attr_prefix => '@' );
+
+The default character is `'-'`.
+Or set `'@'` to access attribute values like E4X, ECMAScript for XML.
+Zero-length prefix `''` is available as well, it means no prefix is added.
+
+## text\_node\_key
+
+This option allows you to specify a hash key for text nodes.
+
+    $tpp->set( text_node_key => '#text' );
+
+The default key is `#text`.
+
+## ignore\_error
+
+This module calls Carp::croak function on an error per default.
+This option makes all errors ignored and just returns.
+
+    $tpp->set( ignore_error => 1 );
+
+## use\_ixhash
+
+This option keeps the order for each element appeared in XML.
+[Tie::IxHash](https://metacpan.org/pod/Tie::IxHash) module is required.
+
+    $tpp->set( use_ixhash => 1 );
+
+This makes parsing performance slow.
+(about 100% slower than default)
+
+# AUTHOR
+
+Yusuke Kawasaki, http://www.kawa.net/
+
+# REPOSITORY
+
+https://github.com/kawanet/XML-TreePP
+
+# COPYRIGHT
+
+The following copyright notice applies to all the files provided in
+this distribution, including binary files, unless explicitly noted
+otherwise.
+
+Copyright 2006-2010 Yusuke Kawasaki
+
+# LICENSE
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.

lib/XML/TreePP.pm

@@ -419,6 +419,10 @@ This makes parsing performance slow.
 
 Yusuke Kawasaki, http://www.kawa.net/
 
+=head1 REPOSITORY
+
+https://github.com/kawanet/XML-TreePP
+
 =head1 COPYRIGHT
 
 The following copyright notice applies to all the files provided in
@@ -440,7 +444,7 @@ use Carp;
 use Symbol;
 
 use vars qw( $VERSION );
-$VERSION = '0.42';
+$VERSION = '0.43';
 
 my $XML_ENCODING      = 'UTF-8';
 my $INTERNAL_ENCODING = 'UTF-8';
@@ -635,7 +639,6 @@ sub parsehttp_lwp {
     my $ua = $self->{lwp_useragent} if exists $self->{lwp_useragent};
     if ( ! ref $ua ) {
         $ua = LWP::UserAgent->new();
-        $ua->timeout(10);
         $ua->env_proxy();
         $ua->agent( $self->{__user_agent} ) if defined $self->{__user_agent};
     } else {

make-dist.sh

@@ -36,10 +36,15 @@ doit make disttest
 name=`grep '^name:' META.yml | sed 's#^.*: *##; s#-#/#g;'`
 main=`grep "$name.pm$" < MANIFEST | head -1`
 [ "$main" == "" ] && die "main module is not found in MANIFEST"
+
 doit pod2text $main > README~
 diff README README~ > /dev/null || doit /bin/mv -f README~ README
 /bin/rm -f README~
 
+doit pod2markdown $main > README.md~
+diff README.md README.md~ > /dev/null || doit /bin/mv -f README.md~ README.md
+/bin/rm -f README.md~
+
 doit make dist
 [ -d blib ] && doit /bin/rm -fr blib
 [ -f pm_to_blib ] && doit /bin/rm -f pm_to_blib

t/09_http-lite.t

@@ -21,10 +21,10 @@ sub parsehttp_get {
     my $tpp = XML::TreePP->new();
     my $name = ( $0 =~ m#([^/:\\]+)$# )[0];
     $tpp->set( user_agent => "$name " );
-    my $url = "http://use.perl.org/index.rss";
+    my $url = "http://rss.slashdot.org/Slashdot/slashdot";
     my $tree = $tpp->parsehttp( GET => $url );
     ok( ref $tree, $url );
-    like( $tree->{"rdf:RDF"}->{channel}->{link}, qr{^http://}, "$url link" );
+    like( $tree->{"rss"}->{channel}->{link}, qr{^http://}, "$url link" );
 }
 # ----------------------------------------------------------------
 sub parsehttp_post {

t/10_http-lwp.t

@@ -21,10 +21,10 @@ sub parsehttp_get {
     my $tpp = XML::TreePP->new();
     my $name = ( $0 =~ m#([^/:\\]+)$# )[0];
     $tpp->set( user_agent => "$name " );
-    my $url = "http://use.perl.org/index.rss";
+    my $url = "http://rss.slashdot.org/Slashdot/slashdot";
     my $tree = $tpp->parsehttp( GET => $url );
     ok( ref $tree, $url );
-    like( $tree->{"rdf:RDF"}->{channel}->{link}, qr{^http://}, "$url link" );
+    like( $tree->{"rss"}->{channel}->{link}, qr{^http://}, "$url link" );
 }
 # ----------------------------------------------------------------
 sub parsehttp_post {

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)