@@ -12,8 +12,6 @@ use utf8;
use Module::Build;
use File::Basename;
use File::Spec;
-use CPAN::Meta;
-use CPAN::Meta::Prereqs;
my %args = (
license => 'perl',
@@ -34,7 +32,7 @@ my %args = (
test_files => ((-d '.git' || $ENV{RELEASE_TESTING}) && -d 'xt') ? 't/ xt/' : 't/',
recursive_test_files => 1,
-
+
);
if (-d 'share') {
$args{share_dir} = 'share';
@@ -53,20 +51,15 @@ my $builder = Module::Build->subclass(
)->new(%args);
$builder->create_build_script();
-my $mbmeta = CPAN::Meta->load_file('MYMETA.json');
-my $meta = CPAN::Meta->load_file('META.json');
-my $prereqs_hash = CPAN::Meta::Prereqs->new(
- $meta->prereqs
-)->with_merged_prereqs(
- CPAN::Meta::Prereqs->new($mbmeta->prereqs)
-)->as_string_hash;
-my $mymeta = CPAN::Meta->new(
- {
- %{$meta->as_struct},
- prereqs => $prereqs_hash
- }
-);
-print "Merging cpanfile prereqs to MYMETA.yml\n";
-$mymeta->save('MYMETA.yml', { version => 1.4 });
-print "Merging cpanfile prereqs to MYMETA.json\n";
-$mymeta->save('MYMETA.json', { version => 2 });
+use File::Copy;
+
+print "cp META.json MYMETA.json\n";
+copy("META.json","MYMETA.json") or die "Copy failed(META.json): $!";
+
+if (-f 'META.yml') {
+ print "cp META.yml MYMETA.yml\n";
+ copy("META.yml","MYMETA.yml") or die "Copy failed(META.yml): $!";
+} else {
+ print "There is no META.yml... You may install this module from the repository...\n";
+}
+
@@ -1,5 +1,13 @@
Revision history for Perl extension Web::Query
+0.27 2014-12-24T00:52:33Z
+
+ - new() with a bad url wasn't returning 'undef'
+ when options were given. (yanick)
+ - Add 'no_space_compacting' option. #33 (yanick)
+ - Add 'tagname' to query/modify tag names. #34 (yanick)
+ - XPath expressions can now be used as well. #35 (yanick)
+
0.26 2014-03-31T08:23:34Z
- impl prev() and next() method #31
@@ -23,6 +23,7 @@ t/add.t
t/add_class.t
t/after.t
t/append.t
+t/bad-url-with-options.t
t/before.t
t/clone.t
t/contents.t
@@ -37,11 +38,14 @@ t/insert_before.t
t/lib/My/TreeBuilder.pm
t/lib/My/Web/Query.pm
t/next.t
+t/no_space_compacting.t
t/prepend.t
t/prev.t
t/remove_class.t
t/replace_with.t
t/store_comments.t
+t/tagname.t
+t/xpath.t
xt/02_perlcritic.t
xt/live/01_simple.t
META.yml
@@ -4,7 +4,7 @@
"Tokuhiro Matsuno <tokuhirom AAJKLFJEF@ GMAIL COM>"
],
"dynamic_config" : 0,
- "generated_by" : "Minilla/v0.13.0",
+ "generated_by" : "Minilla/v2.3.0",
"license" : [
"perl_5"
],
@@ -28,8 +28,6 @@
"prereqs" : {
"configure" : {
"requires" : {
- "CPAN::Meta" : "0",
- "CPAN::Meta::Prereqs" : "0",
"Module::Build" : "0.38"
}
},
@@ -37,6 +35,7 @@
"requires" : {
"Test::CPAN::Meta" : "0",
"Test::MinimumVersion::Fast" : "0.04",
+ "Test::PAUSE::Permissions" : "0.04",
"Test::Pod" : "1.41",
"Test::Spellunker" : "v0.2.7"
}
@@ -61,11 +60,11 @@
"provides" : {
"Web::Query" : {
"file" : "lib/Web/Query.pm",
- "version" : "0.26"
+ "version" : "0.27"
},
"Web::Query::LibXML" : {
"file" : "lib/Web/Query/LibXML.pm",
- "version" : "0.26"
+ "version" : "0.27"
}
},
"release_status" : "stable",
@@ -79,7 +78,7 @@
"web" : "https://github.com/tokuhirom/Web-Query"
}
},
- "version" : "0.26",
+ "version" : "0.27",
"x_authority" : "cpan:TOKUHIROM",
"x_contributors" : [
"Hiroki Honda <cside.story@gmail.com>",
@@ -88,8 +87,9 @@
"Carlos Fernando Avila Gratz <cafe01@gmail.com>",
"DQNEO <dqneoo@gmail.com>",
"Carlos Fernando Avila Gratz <cafe@q1software.com>",
- "Yanick Champoux <yanick@babyl.dyndns.org>",
"xaicron <xaicron@gmail.com>",
- "Tokuhiro Matsuno <tokuhirom@gmail.com>"
+ "moznion <moznion@gmail.com>",
+ "Tokuhiro Matsuno <tokuhirom@gmail.com>",
+ "Yanick Champoux <yanick@babyl.dyndns.org>"
]
}
@@ -5,11 +5,9 @@ author:
build_requires:
Test::More: '0.98'
configure_requires:
- CPAN::Meta: '0'
- CPAN::Meta::Prereqs: '0'
Module::Build: '0.38'
dynamic_config: 0
-generated_by: 'Minilla/v0.13.0, CPAN::Meta::Converter version 2.133380'
+generated_by: 'Minilla/v2.3.0, CPAN::Meta::Converter version 2.142690'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
@@ -28,10 +26,10 @@ no_index:
provides:
Web::Query:
file: lib/Web/Query.pm
- version: '0.26'
+ version: '0.27'
Web::Query::LibXML:
file: lib/Web/Query/LibXML.pm
- version: '0.26'
+ version: '0.27'
requires:
HTML::Entities: '0'
HTML::Selector::XPath: '0.06'
@@ -44,7 +42,7 @@ resources:
bugtracker: https://github.com/tokuhirom/Web-Query/issues
homepage: https://github.com/tokuhirom/Web-Query
repository: git://github.com/tokuhirom/Web-Query.git
-version: '0.26'
+version: '0.27'
x_authority: cpan:TOKUHIROM
x_contributors:
- 'Hiroki Honda <cside.story@gmail.com>'
@@ -53,6 +51,7 @@ x_contributors:
- 'Carlos Fernando Avila Gratz <cafe01@gmail.com>'
- 'DQNEO <dqneoo@gmail.com>'
- 'Carlos Fernando Avila Gratz <cafe@q1software.com>'
- - 'Yanick Champoux <yanick@babyl.dyndns.org>'
- 'xaicron <xaicron@gmail.com>'
+ - 'moznion <moznion@gmail.com>'
- 'Tokuhiro Matsuno <tokuhirom@gmail.com>'
+ - 'Yanick Champoux <yanick@babyl.dyndns.org>'
@@ -22,9 +22,13 @@ Web::Query built at top of the CPAN modules, [HTML::TreeBuilder::XPath](https://
So, this module uses [HTML::Selector::XPath](https://metacpan.org/pod/HTML::Selector::XPath) and only supports the CSS 3
selector supported by that module.
-Web::Query doesn't support jQuery's extended queries(yet?).
+Web::Query doesn't support jQuery's extended queries(yet?). If a selector is
+passed as a scalar ref, it'll be taken as a straight XPath expression.
-__THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE__.
+ $wq( '<div><p>hello</p><p>there</p></div>' )->find( 'p' ); # css selector
+ $wq( '<div><p>hello</p><p>there</p></div>' )->find( \'/div/p' ); # xpath selector
+
+**THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE**.
# FUNCTIONS
@@ -44,8 +48,9 @@ __THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE__.
This method returns undefined value on non-successful response with URL.
- Currently, the only option valid option is _indent_, which will be used as
- the indentation string if the object is printed.
+ Currently, the only two valid options are _indent_, which will be used as
+ the indentation string if the object is printed, and _no\_space\_compacting_,
+ which will prevent the compaction of whitespace characters in text blocks.
- my $q = Web::Query->new\_from\_element($element: HTML::Element)
@@ -125,9 +130,9 @@ Get the descendants of each element in the current set of matched elements, filt
my $q2 = $q->find($selector); # $selector is a CSS3 selector.
-__NOTE__ If you want to match the element itself, use ["filter"](#filter).
+**NOTE** If you want to match the element itself, use ["filter"](#filter).
-__INCOMPATIBLE CHANGE__
+**INCOMPATIBLE CHANGE**
From v0.14 to v0.19 (inclusive) find() also matched the element itself, which is not jQuery compatible.
You can achieve that result using `filter()`, `add()` and `find()`:
@@ -212,6 +217,14 @@ Get/Set the attribute value in element.
$q->attr($name, $val);
+### tagname
+
+Get/Set the tag name of elements.
+
+ my $name = $q->tagname;
+
+ $q->tagname($new_name);
+
### before
Insert content, specified by the parameter, before each element in the set of matched elements.
@@ -310,6 +323,12 @@ Get/Set the text.
If called in a scalar context, only return the string representation
of the first element
+## OTHERS
+
+- Web::Query->last\_response()
+
+ Returns last HTTP response status that generated by `new_from_url()`.
+
# HOW DO I CUSTOMIZE USER AGENT?
You can specify your own instance of [LWP::UserAgent](https://metacpan.org/pod/LWP::UserAgent).
@@ -6,7 +6,7 @@ use parent qw/Web::Query Exporter/;
use HTML::TreeBuilder::LibXML;
-our $VERSION = "0.26";
+our $VERSION = "0.27";
our @EXPORT = qw/wq/;
@@ -58,6 +58,14 @@ sub next {
return (ref $self || $self)->new_from_element(\@new, $self);
}
+sub tagname {
+ my $self = shift;
+ my $method = @_ ? 'setNodeName' : 'nodeName';
+
+ my @retval = map { $_->{node}->$method(@_) } @{$self->{trees}};
+ return wantarray ? @retval : $retval[0];
+}
+
1;
__END__
@@ -3,7 +3,7 @@ use strict;
use warnings;
use 5.008001;
use parent qw/Exporter/;
-our $VERSION = '0.26';
+our $VERSION = '0.27';
use HTML::TreeBuilder::XPath;
use LWP::UserAgent;
use HTML::Selector::XPath 0.06 qw/selector_to_xpath/;
@@ -24,77 +24,86 @@ sub __ua {
}
sub _build_tree {
- my $class = shift;
- my $tree = HTML::TreeBuilder::XPath->new();
+ my( $self, $options ) = @_;
+
+ my $no_space_compacting = ref $self ? $self->{no_space_compacting}
+ : ref $options eq 'HASH' ? $options->{no_space_compacting} : 0;
+
+ my $tree = HTML::TreeBuilder::XPath->new(
+ no_space_compacting => $no_space_compacting
+ );
$tree->ignore_unknown(0);
$tree->store_comments(1);
- $tree;
+ $tree;
}
sub new {
my ($class, $stuff, $options) = @_;
- my $self = $class->_resolve_new($stuff);
+ my $self = $class->_resolve_new($stuff,$options)
+ or return undef;
$self->{indent} = $options->{indent} if $options->{indent};
+ $self->{no_space_compacting} = $options->{no_space_compacting};
+
return $self;
}
sub _resolve_new {
- my( $class, $stuff ) = @_;
+ my( $class, $stuff, $options) = @_;
if (blessed $stuff) {
if ($stuff->isa('HTML::Element')) {
- return $class->new_from_element([$stuff]);
+ return $class->new_from_element([$stuff],$options);
}
if ($stuff->isa('URI')) {
- return $class->new_from_url($stuff->as_string);
+ return $class->new_from_url($stuff->as_string,$options);
}
if ($stuff->isa($class)) {
- return $class->new_from_element($stuff->{trees});
+ return $class->new_from_element($stuff->{trees}, $options);
}
die "Unknown source type: $stuff";
}
- return $class->new_from_element($stuff) if ref $stuff eq 'ARRAY';
+ return $class->new_from_element($stuff,$options) if ref $stuff eq 'ARRAY';
- return $class->new_from_url($stuff) if $stuff =~ m{^(?:https?|file)://};
+ return $class->new_from_url($stuff,$options) if $stuff =~ m{^(?:https?|file)://};
- return $class->new_from_html($stuff) if $stuff =~ /<.*?>/;
+ return $class->new_from_html($stuff,$options) if $stuff =~ /<.*?>/;
- return $class->new_from_file($stuff) if $stuff !~ /\n/ && -f $stuff;
+ return $class->new_from_file($stuff,$options) if $stuff !~ /\n/ && -f $stuff;
die "Unknown source type: $stuff";
}
sub new_from_url {
- my ($class, $url) = @_;
+ my ($class, $url,$options) = @_;
$RESPONSE = __ua()->get($url);
if ($RESPONSE->is_success) {
- return $class->new_from_html($RESPONSE->decoded_content);
+ return $class->new_from_html($RESPONSE->decoded_content,$options);
} else {
return undef;
}
}
sub new_from_file {
- my ($class, $fname) = @_;
- my $tree = $class->_build_tree;
+ my ($class, $fname, $options) = @_;
+ my $tree = $class->_build_tree($options);
$tree->parse_file($fname);
- my $self = $class->new_from_element([$tree->disembowel]);
+ my $self = $class->new_from_element([$tree->disembowel],$options);
$self->{need_delete}++;
return $self;
}
sub new_from_html {
- my ($class, $html) = @_;
- my $tree = $class->_build_tree;
+ my ($class, $html,$options) = @_;
+ my $tree = $class->_build_tree($options);
$tree->parse_content($html);
- my $self = $class->new_from_element([$tree->disembowel]);
+ my $self = $class->new_from_element([$tree->disembowel],$options);
$self->{need_delete}++;
return $self;
}
@@ -147,7 +156,7 @@ sub eq {
sub find {
my ($self, $selector) = @_;
- my $xpath = selector_to_xpath($selector, root => './');
+ my $xpath = ref $selector ? $$selector : selector_to_xpath($selector, root => './');
my @new = map { $_->findnodes($xpath) } @{$self->{trees}};
return (ref $self || $self)->new_from_element(\@new, $self);
@@ -159,7 +168,7 @@ sub contents {
my @new = map { $_->content_list } @{$self->{trees}};
if ($selector) {
- my $xpath = selector_to_xpath($selector);
+ my $xpath = ref $selector ? $$selector : selector_to_xpath($selector);
@new = grep { $_->matches($xpath) } @new;
}
@@ -218,6 +227,12 @@ sub attr {
return wantarray ? @retval : $retval[0];
}
+sub tagname {
+ my $self = shift;
+ my @retval = map { $_->tag(@_) } @{$self->{trees}};
+ return wantarray ? @retval : $retval[0];
+}
+
sub each {
my ($self, $code) = @_;
my $i = 0;
@@ -252,7 +267,7 @@ sub filter {
return $self;
} else {
- my $xpath = selector_to_xpath($_[0]);
+ my $xpath = ref $_[0] ? ${$_[0]} : selector_to_xpath($_[0]);
my @new = grep { $_->matches($xpath) } @{$self->{trees}};
return (ref $self || $self)->new_from_element(\@new, $self);
}
@@ -448,7 +463,8 @@ sub add {
# add(selector, context)
if (@stuff == 2 && !ref $stuff[0] && $stuff[1]->isa('HTML::Element')) {
- push @nodes, $stuff[1]->findnodes(selector_to_xpath($stuff[0]), root => './');
+ my $xpath = ref $stuff[0] ? ${$stuff[0]} : selector_to_xpath($stuff[0]);
+ push @nodes, $stuff[1]->findnodes( $xpath, root => './');
}
else {
# handle any combination of html string, element object and web::query object
@@ -481,6 +497,11 @@ sub next {
return (ref $self || $self)->new_from_element(\@new, $self);
}
+sub last_response {
+ my ($class) = @_;
+ return $RESPONSE;
+}
+
sub DESTROY {
return unless $_[0]->{need_delete};
@@ -519,7 +540,12 @@ Web::Query built at top of the CPAN modules, L<HTML::TreeBuilder::XPath>, L<LWP:
So, this module uses L<HTML::Selector::XPath> and only supports the CSS 3
selector supported by that module.
-Web::Query doesn't support jQuery's extended queries(yet?).
+Web::Query doesn't support jQuery's extended queries(yet?). If a selector is
+passed as a scalar ref, it'll be taken as a straight XPath expression.
+
+ $wq( '<div><p>hello</p><p>there</p></div>' )->find( 'p' ); # css selector
+ $wq( '<div><p>hello</p><p>there</p></div>' )->find( \'/div/p' ); # xpath selector
+
B<THIS LIBRARY IS UNDER DEVELOPMENT. ANY API MAY CHANGE WITHOUT NOTICE>.
@@ -547,8 +573,9 @@ This method throw the exception on unknown $stuff.
This method returns undefined value on non-successful response with URL.
-Currently, the only option valid option is I<indent>, which will be used as
-the indentation string if the object is printed.
+Currently, the only two valid options are I<indent>, which will be used as
+the indentation string if the object is printed, and I<no_space_compacting>,
+which will prevent the compaction of whitespace characters in text blocks.
=item my $q = Web::Query->new_from_element($element: HTML::Element)
@@ -718,6 +745,14 @@ Get/Set the attribute value in element.
$q->attr($name, $val);
+=head3 tagname
+
+Get/Set the tag name of elements.
+
+ my $name = $q->tagname;
+
+ $q->tagname($new_name);
+
=head3 before
Insert content, specified by the parameter, before each element in the set of matched elements.
@@ -814,6 +849,16 @@ Get/Set the text.
If called in a scalar context, only return the string representation
of the first element
+=head2 OTHERS
+
+=over 4
+
+=item Web::Query->last_response()
+
+Returns last HTTP response status that generated by C<new_from_url()>.
+
+=back
+
=head1 HOW DO I CUSTOMIZE USER AGENT?
You can specify your own instance of L<LWP::UserAgent>.
@@ -19,14 +19,22 @@ $ua->add_handler(request_send => sub {
subtest 'bad status code' => sub {
my $q = wq('http://bad.com/');
is($q, undef);
+
isa_ok($Web::Query::RESPONSE, 'HTTP::Response');
is($Web::Query::RESPONSE->code, 500);
+
+ isa_ok(Web::Query->last_response, 'HTTP::Response');
+ is(Web::Query::last_response->code, 500);
};
subtest 'good status code' => sub {
my $q = wq('http://good.com/');
ok($q);
+
isa_ok($Web::Query::RESPONSE, 'HTTP::Response');
is($Web::Query::RESPONSE->code, 200);
+
+ isa_ok(Web::Query->last_response, 'HTTP::Response');
+ is(Web::Query::last_response->code, 200);
};
done_testing;
@@ -0,0 +1,24 @@
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+use LWP::UserAgent;
+use Web::Query;
+
+my $ua = LWP::UserAgent->new( agent => 'Mozilla/5.0' );
+$Web::Query::UserAgent = $ua;
+$ua->add_handler(request_send => sub {
+ my ($request, $ua, $h) = @_;
+ if ($request->uri->host eq 'bad.com') {
+ return HTTP::Response->new(500);
+ } else {
+ return HTTP::Response->new(200);
+ }
+});
+
+plan tests => 2;
+
+is( Web::Query->new('http://bad.com/'), undef, 'without options' );
+
+is( Web::Query->new('http://bad.com/', { indent => 3 }) => undef, 'with options' );
+
@@ -0,0 +1,31 @@
+use strict;
+use warnings;
+
+use Test::More tests => 3;
+
+use Web::Query;
+
+is( Web::Query->new_from_html(<<'END')->as_html, '<div><span><p> hello there </p></span></div>', 'spaces trimmed' );
+<div> <span> <p> hello there </p> </span> </div>
+END
+
+is( Web::Query->new_from_html(<<'END', {no_space_compacting => 1})->as_html, '<div><span><p> hello there </p></span></div>', 'spaces left' );
+<div> <span> <p> hello there </p> </span> </div>
+END
+
+subtest 'LibXML' => sub {
+ eval "require Web::Query::LibXML; 1"
+ or plan skip_all => "couldn't load Web::Query::LibXML";
+
+ # LibXML doesn't trim by default
+
+ is( Web::Query::LibXML->new_from_html(<<'END')->as_html, '<div> <span> <p> hello there </p> </span> </div>' );
+<div> <span> <p> hello there </p> </span> </div>
+END
+
+ is( Web::Query::LibXML->new_from_html(<<'END', {no_space_compacting => 1})->as_html, '<div> <span> <p> hello there </p> </span> </div>' );
+<div> <span> <p> hello there </p> </span> </div>
+END
+};
+
+
@@ -0,0 +1,23 @@
+use strict;
+use warnings;
+
+use Test::More;
+
+my @modules = qw/ Web::Query Web::Query::LibXML /;
+
+plan tests => scalar @modules;
+
+for my $module ( @modules ) {
+ subtest $module => sub {
+ eval "require $module; 1"
+ or plan skip_all => "couldn't load $module";
+
+ my $wq = $module->new_from_html(<<'END');
+ <div><p><b>hello</b></p><p>there</p></div>
+END
+
+ $wq->find('p')->each(sub{ $_->tagname('q') });
+
+ is $wq->as_html, '<div><q><b>hello</b></q><q>there</q></div>', 'p -> q';
+ };
+}
@@ -0,0 +1,22 @@
+use strict;
+use warnings;
+
+use Test::More;
+
+my @modules = qw/ Web::Query Web::Query::LibXML /;
+
+plan tests => scalar @modules;
+
+for my $module ( @modules ) {
+ subtest $module => sub {
+ eval "require $module; 1"
+ or plan skip_all => "couldn't load $module";
+
+ my $wq = $module->new_from_html(<<'END');
+ <div><p><b>hello</b></p><p>there</p></div>
+END
+
+ is $wq->find('b')->html => 'hello', 'css';
+ is $wq->find('//b')->text => 'hello', 'xpath';
+ };
+}