package LWP::Protocol::Net::Curl;
# ABSTRACT: the power of libcurl in the palm of your hands!
use strict;
use utf8;
use warnings qw(all);
use base qw(LWP::Protocol);
use Carp qw(carp);
use Config;
use Fcntl;
use HTTP::Date;
use LWP::UserAgent;
use Net::Curl::Easy qw(:constants);
use Net::Curl::Multi qw(:constants);
use Net::Curl::Share qw(:constants);
use Scalar::Util qw(looks_like_number);
use URI;
our $VERSION = '0.021'; # VERSION
my %curlopt;
my $share;
unless (defined $Config{usethreads}) {
$share = Net::Curl::Share->new({ started => time });
$share->setopt(CURLSHOPT_SHARE ,=> CURL_LOCK_DATA_COOKIE);
$share->setopt(CURLSHOPT_SHARE ,=> CURL_LOCK_DATA_DNS);
## no critic (RequireCheckingReturnValueOfEval)
eval { $share->setopt(CURLSHOPT_SHARE ,=> CURL_LOCK_DATA_SSL_SESSION) };
}
## no critic (ProhibitPackageVars)
my %protocols = map { ($_) x 2 } @{Net::Curl::version_info()->{protocols}};
our @implements =
sort grep { defined }
@protocols
{qw{ftp ftps gopher http https sftp scp}};
our %implements = map { $_ => 1 } @implements;
our $use_select = Net::Curl::Multi->can(q(wait)) ? 0 : 1;
# Resolve libcurl constants by string
sub _curlopt {
my ($key, $no_carp) = @_;
return 0 + $key if looks_like_number($key);
$key =~ s/^Net::Curl::Easy:://ix;
$key =~ y/-/_/;
$key =~ s/\W//gx;
$key = uc $key;
$key = qq(CURLOPT_${key}) if $key !~ /^CURL(?:M|SH)?OPT_/x;
my $const = eval {
## no critic (ProhibitNoStrict ProhibitNoWarnings)
no strict qw(refs);
no warnings qw(once);
return *$key->();
};
carp qq(Invalid libcurl constant: $key)
if $@
and not defined $no_carp;
return $const;
}
# Sugar for a common setopt() pattern
sub _setopt_ifdef {
my ($curl, $key, $value, $no_carp) = @_;
my $curlopt_key = _curlopt($key, $no_carp);
$curl->setopt($curlopt_key => $value)
if defined $curlopt_key
and defined $value;
return;
}
# Pre-configure the module
sub import {
my ($class, @args) = @_;
my $takeover = 1;
if (@args) {
my %args = @args;
while (my ($key, $value) = each %args) {
if ($key eq q(takeover)) {
$takeover = $value;
} else {
my $const = _curlopt($key);
$curlopt{$const} = $value
if defined $const;
}
}
}
if ($takeover) {
LWP::Protocol::implementor($_ => $class)
for @implements;
}
return;
}
# Properly setup libcurl to handle each method in a compatible way
sub _handle_method {
my ($ua, $easy, $request) = @_;
my $method = uc $request->method;
my %dispatch = (
GET => sub {
$easy->setopt(CURLOPT_HTTPGET ,=> 1);
}, POST => sub {
$easy->setopt(CURLOPT_POST ,=> 1);
$easy->setopt(CURLOPT_POSTFIELDS,=> $request->content);
$easy->setopt(CURLOPT_POSTFIELDSIZE,=> length $request->content);
}, HEAD => sub {
$easy->setopt(CURLOPT_NOBODY ,=> 1);
}, DELETE => sub {
$easy->setopt(CURLOPT_CUSTOMREQUEST ,=> $method);
}, PUT => sub {
$easy->setopt(CURLOPT_UPLOAD ,=> 1);
my $buf = $request->content;
my $off = 0;
$easy->setopt(CURLOPT_INFILESIZE,=> length $buf);
$easy->setopt(CURLOPT_READFUNCTION ,=> sub {
my (undef, $maxlen) = @_;
my $chunk = substr $buf, $off, $maxlen;
$off += length $chunk;
return \$chunk;
});
},
);
my $method_ref = $dispatch{$method};
if (defined $method_ref) {
$method_ref->();
} else {
## no critic (RequireCarping)
die HTTP::Response->new(
&HTTP::Status::RC_BAD_REQUEST,
qq(Bad method '$method')
);
}
# handle redirects internally (except POST, greatly fsck'd up by IIS servers)
if ($method ne q(POST) and grep { $method eq uc } @{$ua->requests_redirectable}) {
$easy->setopt(CURLOPT_AUTOREFERER ,=> 1);
$easy->setopt(CURLOPT_FOLLOWLOCATION,=> 1);
$easy->setopt(CURLOPT_MAXREDIRS ,=> $ua->max_redirect);
} else {
$easy->setopt(CURLOPT_FOLLOWLOCATION,=> 0);
}
return $method;
}
# Compatibilize request headers
sub _fix_headers {
my ($ua, $easy, $key, $value) = @_;
return 0 unless defined $value;
# stolen from LWP::Protocol::http
$key =~ s/^://x;
$value =~ s/\n/ /gx;
my $encoding = 0;
if ($key =~ /^accept-encoding$/ix) {
my @encoding =
map { /^(?:x-)?(deflate|gzip|identity)$/ix ? lc $1 : () }
split /\s*,\s*/x, $value;
if (@encoding) {
++$encoding;
$easy->setopt(CURLOPT_ENCODING ,=> join(q(,) => @encoding));
}
} elsif ($key =~ /^user-agent$/ix) {
# While we try our best to look like LWP on the client-side,
# it's *definitely* different on the server-site!
# I guess it would be nice to introduce ourselves in a polite way.
$value =~ s/\b(\Q@{[ $ua->_agent ]}\E)\b/qq($1 ) . Net::Curl::version()/egx;
$easy->setopt(CURLOPT_USERAGENT ,=> $value);
} elsif ($key =~ /^x[-_](curlopt[-\w]+)$/ix) {
_setopt_ifdef($easy, $1 => $value);
} else {
$easy->pushopt(CURLOPT_HTTPHEADER ,=> [qq[$key: $value]]);
}
return $encoding;
}
# Wrap libcurl perform() in a (potentially) non-blocking way
sub _perform_loop {
my ($multi) = @_;
my $running = 0;
do {
my $timeout = $multi->timeout;
if ($running and $timeout > 9) {
if ($use_select) {
my ($r, $w, $e) = $multi->fdset;
select($r, $w, $e, $timeout / 1000);
} else {
$multi->wait($timeout);
}
}
$running = $multi->perform;
while (my (undef, $easy, $result) = $multi->info_read) {
$multi->remove_handle($easy);
if ($result == CURLE_TOO_MANY_REDIRECTS) {
# will return the last request
} elsif ($result) {
## no critic (RequireCarping)
die HTTP::Response->new(
&HTTP::Status::RC_BAD_REQUEST,
qq($result),
);
}
}
} while ($running);
return $running;
}
## no critic (ProhibitManyArgs)
sub request {
my ($self, $request, $proxy, $arg, $size, $timeout) = @_;
my $ua = $self->{ua};
unless (q(Net::Curl::Multi) eq ref $ua->{curl_multi}) {
$ua->{curl_multi} = Net::Curl::Multi->new({ def_headers => $ua->{def_headers} });
# avoid "callback function is not set" warning
_setopt_ifdef(
$ua->{curl_multi},
q(CURLMOPT_SOCKETFUNCTION) => sub { return 0 },
1,
);
}
my $data = '';
my $header = '';
my $writedata;
my $easy = Net::Curl::Easy->new({ request => $request });
$ua->{curl_multi}->add_handle($easy);
my $previous = undef;
my $response = HTTP::Response->new(&HTTP::Status::RC_OK);
$response->request($request);
$easy->setopt(CURLOPT_HEADERFUNCTION ,=> sub {
my ($_easy, $line) = @_;
$header .= $line;
# I hope only HTTP sends "empty line" as delimiters
if ($line =~ /^\s*$/sx) {
$response = HTTP::Response->parse($header);
my $msg = $response->message;
$msg = '' unless defined $msg;
$msg =~ s/^\s+|\s+$//gsx;
$response->message($msg);
$response->request($request->clone);
my $effective_url = URI->new('' . $_easy->getinfo(CURLINFO_EFFECTIVE_URL));
$response->request->uri($effective_url);
$response->previous($previous) if defined $previous;
$previous = $response;
$header = '';
}
return length $line;
});
if (q(CODE) eq ref $arg) {
$easy->setopt(CURLOPT_WRITEFUNCTION ,=> sub {
my (undef, $chunk) = @_;
$arg->($chunk, $response, $self);
return length $chunk;
});
$writedata = undef;
} elsif (defined $arg) {
# will die() later
sysopen $writedata, $arg, O_CREAT | O_NONBLOCK | O_WRONLY;
binmode $writedata;
} else {
$writedata = \$data;
}
my $encoding = 0;
while (my ($key, $value) = each %curlopt) {
++$encoding if $key == CURLOPT_ENCODING;
$easy->setopt($key, $value);
}
# SSL stuff, may not be compiled
if ($request->uri->scheme =~ /s$/ix) {
_setopt_ifdef($easy, CAINFO => $ua->{ssl_opts}{SSL_ca_file});
_setopt_ifdef($easy, CAPATH => $ua->{ssl_opts}{SSL_ca_path});
# fixes a security flaw denied by libcurl v7.28.1
_setopt_ifdef($easy, SSL_VERIFYHOST => (!!$ua->{ssl_opts}{verify_hostname}) << 1);
_setopt_ifdef($easy, SSL_VERIFYPEER => 0) unless $ua->{ssl_opts}{verify_hostname};
}
$easy->setopt(CURLOPT_FILETIME ,=> 1);
$easy->setopt(CURLOPT_URL ,=> $request->uri);
_setopt_ifdef($easy, CURLOPT_BUFFERSIZE ,=> $size);
_setopt_ifdef($easy, CURLOPT_INTERFACE ,=> $ua->{local_address});
_setopt_ifdef($easy, CURLOPT_MAXFILESIZE,=> $ua->max_size);
_setopt_ifdef($easy, q(CURLOPT_NOPROXY) => join(q(,) => @{$ua->{no_proxy}}), 1);
_setopt_ifdef($easy, CURLOPT_PROXY ,=> $proxy);
_setopt_ifdef($easy, CURLOPT_SHARE ,=> $share);
_setopt_ifdef($easy, CURLOPT_TIMEOUT ,=> $timeout);
_setopt_ifdef($easy, CURLOPT_WRITEDATA ,=> $writedata);
if ($ua->{show_progress}) {
$easy->setopt(CURLOPT_NOPROGRESS ,=> 0);
_setopt_ifdef(
$easy,
q(CURLOPT_PROGRESSFUNCTION) => sub {
my (undef, $dltotal, $dlnow) = @_;
$ua->progress($dltotal ? $dlnow / $dltotal : q(tick));
return 0;
},
1,
);
}
_handle_method($ua, $easy, $request);
$request->headers->scan(sub { $encoding += _fix_headers($ua, $easy, @_) });
_perform_loop($ua->{curl_multi});
$response->code($easy->getinfo(CURLINFO_RESPONSE_CODE) || 200);
my $time = $easy->getinfo(CURLINFO_FILETIME);
$response->headers->header(last_modified => time2str($time))
if $time > 0;
# handle decoded_content() & direct file write
if (q(GLOB) eq ref $writedata) {
close $writedata;
# avoid truncate by collect()
$arg = undef;
} elsif ($encoding) {
$response->headers->header(content_encoding => q(identity));
}
return $self->collect_once($arg, $response, $data);
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
LWP::Protocol::Net::Curl - the power of libcurl in the palm of your hands!
=head1 VERSION
version 0.021
=head1 SYNOPSIS
#!/usr/bin/env perl;
use common::sense;
use LWP::Protocol::Net::Curl;
use WWW::Mechanize;
...
=head1 DESCRIPTION
Drop-in replacement for L<LWP>, L<WWW::Mechanize> and their derivatives to use L<Net::Curl> as a backend.
Advantages:
=over 4
=item *
support ftp/ftps/http/https/sftp/scp protocols out-of-box (secure layer require L<libcurl|http://curl.haxx.se/> to be compiled with TLS/SSL/libssh2 support)
=item *
support SOCKS4/5 proxy out-of-box
=item *
connection persistence and DNS cache (independent from L<LWP::ConnCache>)
=item *
lightning-fast L<HTTP compression|https://en.wikipedia.org/wiki/Http_compression> and redirection
=item *
lower CPU usage: this matters if you C<fork()> multiple downloader instances
=item *
asynchronous threading via L<Coro::Select> (see F<eg/async.pl>)
=item *
at last but not least: B<100% compatible> with both L<LWP> and L<WWW::Mechanize> test suites!
=back
=head1 LIBCURL INTERFACE
You may query which L<LWP> protocols are implemented through L<Net::Curl> by accessing C<@LWP::Protocol::Net::Curl::implements> or C<%LWP::Protocol::Net::Curl::implements>.
By default, B<every protocol> listed in that array will be implemented via L<LWP::Protocol::Net::Curl>.
It is possible to import only specific protocols:
use LWP::Protocol::Net::Curl takeover => 0;
LWP::Protocol::implementor(https => 'LWP::Protocol::Net::Curl');
The default value of C<takeover> option is I<true>, resulting in exactly the same behavior as in:
use LWP::Protocol::Net::Curl takeover => 0;
LWP::Protocol::implementor($_ => 'LWP::Protocol::Net::Curl')
for @LWP::Protocol::Net::Curl::implements;
Default L<curl_easy_setopt() options|http://curl.haxx.se/libcurl/c/curl_easy_setopt.html> can be set during initialization:
use LWP::Protocol::Net::Curl
encoding => '', # use HTTP compression by default
referer => 'http://google.com/',
verbose => 1; # make libcurl print lots of stuff to STDERR
Or during runtime, using special HTTP headers (prefixed by C<X-CurlOpt->):
use LWP::Protocol::Net::Curl;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new;
my $res = $ua->get(
'https://metacpan.org/',
X_CurlOpt_Verbose => 1,
);
Options set this way have the lowest precedence.
For instance, if L<WWW::Mechanize> sets the I<Referer:> by it's own, the value you defined above won't be used.
=head1 DEBUGGING
Quickly enable libcurl I<verbose> mode via C<PERL5OPT> environment variable:
PERL5OPT=-MLWP::Protocol::Net::Curl=verbose,1 perl your-script.pl
B<Bonus:> it works even if you don't include the C<use LWP::Protocol::Net::Curl> line!
=for Pod::Coverage import
request
=head1 TODO
=over 4
=item *
better implementation for non-HTTP protocols
=item *
more tests
=item *
expose the inner guts of libcurl while handling encoding/redirects internally
=item *
revise L<Net::Curl::Multi> "event loop" code
=back
=head1 BUGS
=over 4
=item *
sometimes still complains about I<Attempt to free unreferenced scalar: SV 0xdeadbeef during global destruction.>
=item *
in "async mode", each L<LWP::UserAgent> instance "blocks" until all requests finish
=item *
parallel requests via L<Coro::Select> are B<very inefficient>; consider using L<YADA> if you're into event-driven parallel user agents
=item *
L<Net::Curl::Share> support is disabled on threaded Perl builds
=back
=head1 SEE ALSO
=over 4
=item *
L<LWP::Protocol::GHTTP> - used as a reference for L<LWP::Protocol> implementation
=item *
L<LWP::Protocol::AnyEvent::http> - another L<LWP::Protocol> reference
=item *
L<YADA> - L<Net::Curl> usage reference
=item *
L<Net::Curl> - backend for this module
=item *
L<LWP::Curl> - provides L<LWP::UserAgent>-compatible API via L<WWW::Curl>
=back
=head1 AUTHOR
Stanislaw Pusep <stas@sysd.org>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2014 by Stanislaw Pusep.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut