The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
Build.PL 2013
Changes 08
MANIFEST 04
META.json 88
META.yml 76
README.md 625
lib/Web/Query/LibXML.pm 19
lib/Web/Query.pm 2873
t/06_new_from_url_error_handling.t 08
t/bad-url-with-options.t 024
t/no_space_compacting.t 031
t/tagname.t 023
t/xpath.t 022
13 files changed (This is a version diff) 70254
@@ -12,8 +12,6 @@ use utf8;
 use Module::Build;
 use File::Basename;
 use File::Spec;
-use CPAN::Meta;
-use CPAN::Meta::Prereqs;
 
 my %args = (
     license              => 'perl',
@@ -34,7 +32,7 @@ my %args = (
     test_files           => ((-d '.git' || $ENV{RELEASE_TESTING}) && -d 'xt') ? 't/ xt/' : 't/',
     recursive_test_files => 1,
 
-    
+
 );
 if (-d 'share') {
     $args{share_dir} = 'share';
@@ -53,20 +51,15 @@ my $builder = Module::Build->subclass(
 )->new(%args);
 $builder->create_build_script();
 
-my $mbmeta = CPAN::Meta->load_file('MYMETA.json');
-my $meta = CPAN::Meta->load_file('META.json');
-my $prereqs_hash = CPAN::Meta::Prereqs->new(
-    $meta->prereqs
-)->with_merged_prereqs(
-    CPAN::Meta::Prereqs->new($mbmeta->prereqs)
-)->as_string_hash;
-my $mymeta = CPAN::Meta->new(
-    {
-        %{$meta->as_struct},
-        prereqs => $prereqs_hash
-    }
-);
-print "Merging cpanfile prereqs to MYMETA.yml\n";
-$mymeta->save('MYMETA.yml', { version => 1.4 });
-print "Merging cpanfile prereqs to MYMETA.json\n";
-$mymeta->save('MYMETA.json', { version => 2 });
+use File::Copy;
+
+print "cp META.json MYMETA.json\n";
+copy("META.json","MYMETA.json") or die "Copy failed(META.json): $!";
+
+if (-f 'META.yml') {
+    print "cp META.yml MYMETA.yml\n";
+    copy("META.yml","MYMETA.yml") or die "Copy failed(META.yml): $!";
+} else {
+    print "There is no META.yml... You may install this module from the repository...\n";
+}
+
@@ -1,5 +1,13 @@
 Revision history for Perl extension Web::Query
 
+0.27 2014-12-24T00:52:33Z
+
+    - new() with a bad url wasn't returning 'undef'
+        when options were given. (yanick)
+    - Add 'no_space_compacting' option. #33 (yanick)
+    - Add 'tagname' to query/modify tag names. #34 (yanick)
+    - XPath expressions can now be used as well. #35 (yanick)
+
 0.26 2014-03-31T08:23:34Z
 
     - impl prev() and next() method #31
@@ -23,6 +23,7 @@ t/add.t
 t/add_class.t
 t/after.t
 t/append.t
+t/bad-url-with-options.t
 t/before.t
 t/clone.t
 t/contents.t
@@ -37,11 +38,14 @@ t/insert_before.t
 t/lib/My/TreeBuilder.pm
 t/lib/My/Web/Query.pm
 t/next.t
+t/no_space_compacting.t
 t/prepend.t
 t/prev.t
 t/remove_class.t
 t/replace_with.t
 t/store_comments.t
+t/tagname.t
+t/xpath.t
 xt/02_perlcritic.t
 xt/live/01_simple.t
 META.yml
@@ -4,7 +4,7 @@
       "Tokuhiro Matsuno <tokuhirom AAJKLFJEF@ GMAIL COM>"
    ],
    "dynamic_config" : 0,
-   "generated_by" : "Minilla/v0.13.0",
+   "generated_by" : "Minilla/v2.3.0",
    "license" : [
       "perl_5"
    ],
@@ -28,8 +28,6 @@
    "prereqs" : {
       "configure" : {
          "requires" : {
-            "CPAN::Meta" : "0",
-            "CPAN::Meta::Prereqs" : "0",
             "Module::Build" : "0.38"
          }
       },
@@ -37,6 +35,7 @@
          "requires" : {
             "Test::CPAN::Meta" : "0",
             "Test::MinimumVersion::Fast" : "0.04",
+            "Test::PAUSE::Permissions" : "0.04",
             "Test::Pod" : "1.41",
             "Test::Spellunker" : "v0.2.7"
          }
@@ -61,11 +60,11 @@
    "provides" : {
       "Web::Query" : {
          "file" : "lib/Web/Query.pm",
-         "version" : "0.26"
+         "version" : "0.27"
       },
       "Web::Query::LibXML" : {
          "file" : "lib/Web/Query/LibXML.pm",
-         "version" : "0.26"
+         "version" : "0.27"
       }
    },
    "release_status" : "stable",
@@ -79,7 +78,7 @@
          "web" : "https://github.com/tokuhirom/Web-Query"
       }
    },
-   "version" : "0.26",
+   "version" : "0.27",
    "x_authority" : "cpan:TOKUHIROM",
    "x_contributors" : [
       "Hiroki Honda <cside.story@gmail.com>",
@@ -88,8 +87,9 @@
       "Carlos Fernando Avila Gratz <cafe01@gmail.com>",
       "DQNEO <dqneoo@gmail.com>",
       "Carlos Fernando Avila Gratz <cafe@q1software.com>",
-      "Yanick Champoux <yanick@babyl.dyndns.org>",
       "xaicron <xaicron@gmail.com>",
-      "Tokuhiro Matsuno <tokuhirom@gmail.com>"
+      "moznion <moznion@gmail.com>",
+      "Tokuhiro Matsuno <tokuhirom@gmail.com>",
+      "Yanick Champoux <yanick@babyl.dyndns.org>"
    ]
 }
@@ -5,11 +5,9 @@ author:
 build_requires:
   Test::More: '0.98'
 configure_requires:
-  CPAN::Meta: '0'
-  CPAN::Meta::Prereqs: '0'
   Module::Build: '0.38'
 dynamic_config: 0
-generated_by: 'Minilla/v0.13.0, CPAN::Meta::Converter version 2.133380'
+generated_by: 'Minilla/v2.3.0, CPAN::Meta::Converter version 2.142690'
 license: perl
 meta-spec:
   url: http://module-build.sourceforge.net/META-spec-v1.4.html
@@ -28,10 +26,10 @@ no_index:
 provides:
   Web::Query:
     file: lib/Web/Query.pm
-    version: '0.26'
+    version: '0.27'
   Web::Query::LibXML:
     file: lib/Web/Query/LibXML.pm
-    version: '0.26'
+    version: '0.27'
 requires:
   HTML::Entities: '0'
   HTML::Selector::XPath: '0.06'
@@ -44,7 +42,7 @@ resources:
   bugtracker: https://github.com/tokuhirom/Web-Query/issues
   homepage: https://github.com/tokuhirom/Web-Query
   repository: git://github.com/tokuhirom/Web-Query.git
-version: '0.26'
+version: '0.27'
 x_authority: cpan:TOKUHIROM
 x_contributors:
   - 'Hiroki Honda <cside.story@gmail.com>'
@@ -53,6 +51,7 @@ x_contributors:
   - 'Carlos Fernando Avila Gratz <cafe01@gmail.com>'
   - 'DQNEO <dqneoo@gmail.com>'
   - 'Carlos Fernando Avila Gratz <cafe@q1software.com>'
-  - 'Yanick Champoux <yanick@babyl.dyndns.org>'
   - 'xaicron <xaicron@gmail.com>'
+  - 'moznion <moznion@gmail.com>'
   - 'Tokuhiro Matsuno <tokuhirom@gmail.com>'
+  - 'Yanick Champoux <yanick@babyl.dyndns.org>'
@@ -22,9 +22,13 @@ Web::Query built at top of the CPAN modules, [HTML::TreeBuilder::XPath](https://
 
 So, this module uses [HTML::Selector::XPath](https://metacpan.org/pod/HTML::Selector::XPath) and only supports the CSS 3
 selector supported by that module.
-Web::Query doesn't support jQuery's extended queries(yet?).
+Web::Query doesn't support jQuery's extended queries(yet?). If a selector is 
+passed as a scalar ref, it'll be taken as a straight XPath expression.
 
-__THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE__.
+    $wq( '<div><p>hello</p><p>there</p></div>' )->find( 'p' );       # css selector
+    $wq( '<div><p>hello</p><p>there</p></div>' )->find( \'/div/p' ); # xpath selector
+
+**THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE**.
 
 # FUNCTIONS
 
@@ -44,8 +48,9 @@ __THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE__.
 
     This method returns undefined value on non-successful response with URL.
 
-    Currently, the only option valid option is _indent_, which will be used as
-    the indentation string if the object is printed.
+    Currently, the only two valid options are _indent_, which will be used as
+    the indentation string if the object is printed, and _no\_space\_compacting_, 
+    which will prevent the compaction of whitespace characters in text blocks.
 
 - my $q = Web::Query->new\_from\_element($element: HTML::Element)
 
@@ -125,9 +130,9 @@ Get the descendants of each element in the current set of matched elements, filt
     my $q2 = $q->find($selector); # $selector is a CSS3 selector.
     
 
-__NOTE__ If you want to match the element itself, use ["filter"](#filter).
+**NOTE** If you want to match the element itself, use ["filter"](#filter).
 
-__INCOMPATIBLE CHANGE__ 
+**INCOMPATIBLE CHANGE** 
 From v0.14 to v0.19 (inclusive) find() also matched the element itself, which is not jQuery compatible.
 You can achieve that result using `filter()`, `add()` and `find()`:
 
@@ -212,6 +217,14 @@ Get/Set the attribute value in element.
 
     $q->attr($name, $val);
 
+### tagname
+
+Get/Set the tag name of elements.
+
+    my $name = $q->tagname;
+
+    $q->tagname($new_name);
+
 ### before
 
 Insert content, specified by the parameter, before each element in the set of matched elements.
@@ -310,6 +323,12 @@ Get/Set the text.
 If called in a scalar context, only return the string representation
 of the first element
 
+## OTHERS
+
+- Web::Query->last\_response()
+
+    Returns last HTTP response status that generated by `new_from_url()`.
+
 # HOW DO I CUSTOMIZE USER AGENT?
 
 You can specify your own instance of [LWP::UserAgent](https://metacpan.org/pod/LWP::UserAgent).
@@ -6,7 +6,7 @@ use parent qw/Web::Query Exporter/;
 use HTML::TreeBuilder::LibXML;
 
 
-our $VERSION = "0.26";
+our $VERSION = "0.27";
 
 our @EXPORT = qw/wq/;
 
@@ -58,6 +58,14 @@ sub next {
     return (ref $self || $self)->new_from_element(\@new, $self);
 }
 
+sub tagname {
+    my $self = shift;
+    my $method = @_ ? 'setNodeName' : 'nodeName';
+    
+    my @retval = map { $_->{node}->$method(@_) } @{$self->{trees}};
+    return wantarray ? @retval : $retval[0];
+}
+
 1;
 __END__
 
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use 5.008001;
 use parent qw/Exporter/;
-our $VERSION = '0.26';
+our $VERSION = '0.27';
 use HTML::TreeBuilder::XPath;
 use LWP::UserAgent;
 use HTML::Selector::XPath 0.06 qw/selector_to_xpath/;
@@ -24,77 +24,86 @@ sub __ua {
 }
 
 sub _build_tree {
-    my $class = shift;
-    my $tree = HTML::TreeBuilder::XPath->new();
+    my( $self, $options ) = @_;
+
+    my $no_space_compacting = ref $self ? $self->{no_space_compacting} 
+    : ref $options eq 'HASH' ? $options->{no_space_compacting} : 0;
+
+    my $tree = HTML::TreeBuilder::XPath->new( 
+        no_space_compacting => $no_space_compacting
+    );
     $tree->ignore_unknown(0);
     $tree->store_comments(1);
-    $tree;    
+    $tree;
 }
 
 sub new {
     my ($class, $stuff, $options) = @_;
 
-    my $self = $class->_resolve_new($stuff);
+    my $self = $class->_resolve_new($stuff,$options)
+        or return undef;
 
     $self->{indent} = $options->{indent} if $options->{indent};
 
+    $self->{no_space_compacting} = $options->{no_space_compacting};
+
     return $self;
 }
 
 sub _resolve_new {
-    my( $class, $stuff ) = @_;
+    my( $class, $stuff, $options) = @_;
 
     if (blessed $stuff) {
         if ($stuff->isa('HTML::Element')) {
-            return $class->new_from_element([$stuff]);
+            return $class->new_from_element([$stuff],$options);
         } 
         
         if ($stuff->isa('URI')) {
-            return $class->new_from_url($stuff->as_string);
+            return $class->new_from_url($stuff->as_string,$options);
         } 
         
         if ($stuff->isa($class)) {
-            return $class->new_from_element($stuff->{trees});
+            return $class->new_from_element($stuff->{trees}, $options);
         } 
 
         die "Unknown source type: $stuff";
     }
 
-    return $class->new_from_element($stuff) if ref $stuff eq 'ARRAY';
+    return $class->new_from_element($stuff,$options) if ref $stuff eq 'ARRAY';
 
-    return $class->new_from_url($stuff) if $stuff =~ m{^(?:https?|file)://};
+    return $class->new_from_url($stuff,$options) if $stuff =~ m{^(?:https?|file)://};
 
-    return $class->new_from_html($stuff) if $stuff =~ /<.*?>/;
+    return $class->new_from_html($stuff,$options) if $stuff =~ /<.*?>/;
 
-    return $class->new_from_file($stuff) if $stuff !~ /\n/ && -f $stuff;
+    return $class->new_from_file($stuff,$options) if $stuff !~ /\n/ && -f $stuff;
 
     die "Unknown source type: $stuff";
 }
 
 sub new_from_url {
-    my ($class, $url) = @_;
+    my ($class, $url,$options) = @_;
     $RESPONSE = __ua()->get($url);
     if ($RESPONSE->is_success) {
-        return $class->new_from_html($RESPONSE->decoded_content);
+        return $class->new_from_html($RESPONSE->decoded_content,$options);
     } else {
         return undef;
     }
 }
 
 sub new_from_file {
-    my ($class, $fname) = @_;
-    my $tree = $class->_build_tree;
+    my ($class, $fname, $options) = @_;
+    my $tree = $class->_build_tree($options);
     $tree->parse_file($fname);
-    my $self = $class->new_from_element([$tree->disembowel]);
+    my $self = $class->new_from_element([$tree->disembowel],$options);
     $self->{need_delete}++;
     return $self;
 }
 
 sub new_from_html {
-    my ($class, $html) = @_;
-    my $tree = $class->_build_tree;
+    my ($class, $html,$options) = @_;
+    my $tree = $class->_build_tree($options);
     $tree->parse_content($html);
-    my $self = $class->new_from_element([$tree->disembowel]);
+    my $self = $class->new_from_element([$tree->disembowel],$options);
     $self->{need_delete}++;
     return $self;
 }
@@ -147,7 +156,7 @@ sub eq {
 sub find {
     my ($self, $selector) = @_;
     
-    my $xpath = selector_to_xpath($selector, root => './');    
+    my $xpath = ref $selector ? $$selector : selector_to_xpath($selector, root => './');
     my @new = map { $_->findnodes($xpath) } @{$self->{trees}};
     
     return (ref $self || $self)->new_from_element(\@new, $self);
@@ -159,7 +168,7 @@ sub contents {
     my @new = map { $_->content_list } @{$self->{trees}};
     
     if ($selector) {
-        my $xpath = selector_to_xpath($selector);
+        my $xpath = ref $selector ? $$selector : selector_to_xpath($selector);
         @new = grep { $_->matches($xpath) } @new;        
     }
     
@@ -218,6 +227,12 @@ sub attr {
     return wantarray ? @retval : $retval[0];
 }
 
+sub tagname {
+    my $self = shift;
+    my @retval = map { $_->tag(@_) } @{$self->{trees}};
+    return wantarray ? @retval : $retval[0];
+}
+
 sub each {
     my ($self, $code) = @_;
     my $i = 0;
@@ -252,7 +267,7 @@ sub filter {
         return $self;
 
     } else {
-        my $xpath = selector_to_xpath($_[0]);
+        my $xpath = ref $_[0] ? ${$_[0]} : selector_to_xpath($_[0]);
         my @new = grep { $_->matches($xpath) } @{$self->{trees}};        
         return (ref $self || $self)->new_from_element(\@new, $self);
     }
@@ -448,7 +463,8 @@ sub add {
     
     # add(selector, context)
     if (@stuff == 2 && !ref $stuff[0] && $stuff[1]->isa('HTML::Element')) {
-        push @nodes, $stuff[1]->findnodes(selector_to_xpath($stuff[0]), root => './');        
+        my $xpath = ref $stuff[0] ? ${$stuff[0]} : selector_to_xpath($stuff[0]);
+        push @nodes, $stuff[1]->findnodes( $xpath, root => './');
     }
     else {
         # handle any combination of html string, element object and web::query object
@@ -481,6 +497,11 @@ sub next {
     return (ref $self || $self)->new_from_element(\@new, $self);
 }
 
+sub last_response {
+    my ($class) = @_;
+    return $RESPONSE;
+}
+
 sub DESTROY {
     return unless $_[0]->{need_delete};
 
@@ -519,7 +540,12 @@ Web::Query built at top of the CPAN modules, L<HTML::TreeBuilder::XPath>, L<LWP:
 
 So, this module uses L<HTML::Selector::XPath> and only supports the CSS 3
 selector supported by that module.
-Web::Query doesn't support jQuery's extended queries(yet?).
+Web::Query doesn't support jQuery's extended queries(yet?). If a selector is 
+passed as a scalar ref, it'll be taken as a straight XPath expression.
+
+    $wq( '<div><p>hello</p><p>there</p></div>' )->find( 'p' );       # css selector
+    $wq( '<div><p>hello</p><p>there</p></div>' )->find( \'/div/p' ); # xpath selector
+
 
 B<THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE>.
 
@@ -547,8 +573,9 @@ This method throw the exception on unknown $stuff.
 
 This method returns undefined value on non-successful response with URL.
 
-Currently, the only option valid option is I<indent>, which will be used as
-the indentation string if the object is printed.
+Currently, the only two valid options are I<indent>, which will be used as
+the indentation string if the object is printed, and I<no_space_compacting>, 
+which will prevent the compaction of whitespace characters in text blocks.
 
 =item my $q = Web::Query->new_from_element($element: HTML::Element)
 
@@ -718,6 +745,14 @@ Get/Set the attribute value in element.
 
     $q->attr($name, $val);
 
+=head3 tagname
+
+Get/Set the tag name of elements.
+
+    my $name = $q->tagname;
+
+    $q->tagname($new_name);
+
 =head3 before
 
 Insert content, specified by the parameter, before each element in the set of matched elements.
@@ -814,6 +849,16 @@ Get/Set the text.
 If called in a scalar context, only return the string representation
 of the first element
 
+=head2 OTHERS
+
+=over 4
+
+=item Web::Query->last_response()
+
+Returns last HTTP response status that generated by C<new_from_url()>.
+
+=back
+
 =head1 HOW DO I CUSTOMIZE USER AGENT?
 
 You can specify your own instance of L<LWP::UserAgent>.
@@ -19,14 +19,22 @@ $ua->add_handler(request_send => sub {
 subtest 'bad status code' => sub {
     my $q = wq('http://bad.com/');
     is($q, undef);
+
     isa_ok($Web::Query::RESPONSE, 'HTTP::Response');
     is($Web::Query::RESPONSE->code, 500);
+
+    isa_ok(Web::Query->last_response, 'HTTP::Response');
+    is(Web::Query::last_response->code, 500);
 };
 subtest 'good status code' => sub {
     my $q = wq('http://good.com/');
     ok($q);
+
     isa_ok($Web::Query::RESPONSE, 'HTTP::Response');
     is($Web::Query::RESPONSE->code, 200);
+
+    isa_ok(Web::Query->last_response, 'HTTP::Response');
+    is(Web::Query::last_response->code, 200);
 };
 
 done_testing;
@@ -0,0 +1,24 @@
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use LWP::UserAgent;
+use Web::Query;
+
+my $ua = LWP::UserAgent->new( agent => 'Mozilla/5.0' );
+$Web::Query::UserAgent = $ua;
+$ua->add_handler(request_send => sub {
+    my ($request, $ua, $h) = @_;
+    if ($request->uri->host eq 'bad.com') {
+        return HTTP::Response->new(500);
+    } else {
+        return HTTP::Response->new(200);
+    }
+});
+
+plan tests => 2;
+
+is( Web::Query->new('http://bad.com/'), undef, 'without options' );
+
+is( Web::Query->new('http://bad.com/', { indent => 3 }) => undef, 'with options' );
+
@@ -0,0 +1,31 @@
+use strict;
+use warnings;
+
+use Test::More tests => 3;
+
+use Web::Query;
+
+is( Web::Query->new_from_html(<<'END')->as_html, '<div><span><p> hello there </p></span></div>', 'spaces trimmed' );
+<div> <span> <p> hello  there </p> </span> </div>
+END
+
+is( Web::Query->new_from_html(<<'END', {no_space_compacting => 1})->as_html, '<div><span><p> hello  there </p></span></div>', 'spaces left' );
+<div> <span> <p> hello  there </p> </span> </div>
+END
+
+subtest 'LibXML' => sub {
+    eval "require Web::Query::LibXML; 1" 
+        or plan skip_all => "couldn't load Web::Query::LibXML";
+
+    # LibXML doesn't trim by default
+
+    is( Web::Query::LibXML->new_from_html(<<'END')->as_html, '<div> <span> <p> hello  there </p> </span> </div>' );
+<div> <span> <p> hello  there </p> </span> </div>
+END
+
+    is( Web::Query::LibXML->new_from_html(<<'END', {no_space_compacting => 1})->as_html, '<div> <span> <p> hello  there </p> </span> </div>' );
+<div> <span> <p> hello  there </p> </span> </div>
+END
+};
+
+
@@ -0,0 +1,23 @@
+use strict;
+use warnings;
+
+use Test::More;
+
+my @modules = qw/ Web::Query Web::Query::LibXML /;
+
+plan tests => scalar @modules;
+
+for my $module ( @modules ) {
+    subtest $module => sub {
+        eval "require $module; 1" 
+            or plan skip_all => "couldn't load $module";
+
+        my $wq = $module->new_from_html(<<'END');
+        <div><p><b>hello</b></p><p>there</p></div>
+END
+
+        $wq->find('p')->each(sub{ $_->tagname('q') });
+
+        is $wq->as_html, '<div><q><b>hello</b></q><q>there</q></div>', 'p -> q';
+    };
+}
@@ -0,0 +1,22 @@
+use strict;
+use warnings;
+
+use Test::More;
+
+my @modules = qw/ Web::Query Web::Query::LibXML /;
+
+plan tests => scalar @modules;
+
+for my $module ( @modules ) {
+    subtest $module => sub {
+        eval "require $module; 1" 
+            or plan skip_all => "couldn't load $module";
+
+        my $wq = $module->new_from_html(<<'END');
+        <div><p><b>hello</b></p><p>there</p></div>
+END
+
+        is $wq->find('b')->html => 'hello', 'css';
+        is $wq->find('//b')->text => 'hello', 'xpath';
+    };
+}