lib/WWW/Curl/UserAgent.pm

package WWW::Curl::UserAgent;
{
  $WWW::Curl::UserAgent::VERSION = '0.9.4';
}

# ABSTRACT: UserAgent based on libcurl

use Moose;
use v5.10;

use WWW::Curl::Easy;
use WWW::Curl::Multi;
use HTTP::Response;
use Time::HiRes;
use IO::Select;

use WWW::Curl::UserAgent::Handler;
use WWW::Curl::UserAgent::Request;

# timeout in milliseconds
has timeout => (
    is      => 'rw',
    isa     => 'Int',
    default => 0,
);

# connection timeout in milliseconds
has connect_timeout => (
    is      => 'rw',
    isa     => 'Int',
    default => 300,
);

# maximum of requests done in parallel
has parallel_requests => (
    is      => 'rw',
    isa     => 'Int',
    default => 5,
);

# use connection keep-alive
has keep_alive => (
    is      => 'rw',
    isa     => 'Bool',
    default => 1,
);

# follow redirects
has followlocation => (
    is      => 'ro',
    isa     => 'Bool',
    default => 0,
);

# maximum number of redirects
has max_redirects => (
    is      => 'ro',
    isa     => 'Int',
    default => -1,
);

# identifier in each request
has user_agent_string => (
    is      => 'rw',
    isa     => 'Str',
    default => sub { "www.curl.useragent/$WWW::Curl::UserAgent::VERSION" },
);

has _curl_multi => (
    is      => 'ro',
    isa     => 'WWW::Curl::Multi',
    default => sub { WWW::Curl::Multi->new },
);

has _handler_queue => (
    is      => 'ro',
    isa     => 'ArrayRef[WWW::Curl::UserAgent::Handler]',
    default => sub { [] },
    traits  => ['Array'],
    handles => {
        add_handler             => 'push',
        _get_handler_from_queue => 'shift',
        _has_handler_in_queue   => 'count',
        request_queue_size      => 'count',
    }
);

has _active_handler_map => (
    is      => 'ro',
    isa     => 'HashRef[WWW::Curl::UserAgent::Handler]',
    default => sub { {} },
    traits  => ['Hash'],
    handles => {
        _active_handlers    => 'count',
        _set_active_handler => 'set',
        _get_active_handler => 'delete',
    }
);

has _max_private_id => (
    is      => 'ro',
    isa     => 'Num',
    default => 1,
    traits  => ['Counter'],
    handles => { _inc_private_id => 'inc', }
);

sub request {
    my ( $self, $request, %args ) = @_;

    my $timeout         = $args{timeout}         // $self->timeout;
    my $connect_timeout = $args{connect_timeout} // $self->connect_timeout;
    my $keep_alive      = $args{keep_alive}      // $self->keep_alive;
    my $followlocation  = $args{followlocation}  // $self->followlocation;
    my $max_redirects   = $args{max_redirects}   // $self->max_redirects;

    my $response;
    $self->add_handler(
        WWW::Curl::UserAgent::Handler->new(
            on_success => sub {
                my ( $req, $res ) = @_;
                $response = $res;
            },
            on_failure => sub {
                my ( $req, $msg, $desc ) = @_;
                $response = HTTP::Response->new( 500, $msg, [], $desc );
            },
            request => WWW::Curl::UserAgent::Request->new(
                http_request    => $request,
                connect_timeout => $connect_timeout,
                timeout         => $timeout,
                keep_alive      => $keep_alive,
                followlocation  => $followlocation,
                max_redirects   => $max_redirects,
            ),
        )
    );
    $self->perform;

    return $response;
}

sub add_request {
    my ( $self, %args ) = @_;

    my $on_success      = $args{on_success};
    my $on_failure      = $args{on_failure};
    my $request         = $args{request};
    my $timeout         = $args{timeout}         // $self->timeout;
    my $connect_timeout = $args{connect_timeout} // $self->connect_timeout;
    my $keep_alive      = $args{keep_alive}      // $self->keep_alive;
    my $followlocation  = $args{followlocation}  // $self->followlocation;
    my $max_redirects   = $args{max_redirects}   // $self->max_redirects;

    my $handler = WWW::Curl::UserAgent::Handler->new(
        on_success => $on_success,
        on_failure => $on_failure,
        request    => WWW::Curl::UserAgent::Request->new(
            http_request    => $request,
            connect_timeout => $connect_timeout,
            timeout         => $timeout,
            keep_alive      => $keep_alive,
            followlocation  => $followlocation,
            max_redirects   => $max_redirects,
        ),
    );
    $self->add_handler($handler);

    return $handler;
}

sub perform {
    my $self = shift;

    my $active_handlers;

    # activate handlers by draining the queue
    while ( $active_handlers = $self->_drain_handler_queue ) {

        # loop until there is a response available
        $self->_wait_for_response($active_handlers);

        # execute callbacks for all received responses
        $self->_perform_callbacks;
    }
}

sub _wait_for_response {
    my $self            = shift;
    my $active_handlers = shift;

    my $curl_multi = $self->_curl_multi;

    while ( $curl_multi->perform == $active_handlers ) {
        Time::HiRes::nanosleep(1);
        my @select = map {
            my $s = IO::Select->new;
            $s->add( @{$_} );
            $s;
        } ( $curl_multi->fdset );
        IO::Select->select( @select, 0.1 );
    }
}

sub _perform_callbacks {
    my $self = shift;

    while ( my ( $active_transfer_id, $return_code ) = $self->_curl_multi->info_read ) {

        unless ($active_transfer_id) {
            Time::HiRes::nanosleep(1);    # do not eat the whole cpu
            next;
        }

        my $handler   = $self->_get_active_handler($active_transfer_id);
        my $request   = $handler->request;
        my $curl_easy = $request->curl_easy;

        if ( $return_code == 0 ) {
            my $response = $self->_build_http_response( ${ $request->header_ref }, ${ $request->content_ref } );
            $handler->on_success->( $request->http_request, $response, $curl_easy );
        }
        else {
            $handler->on_failure->(
                $request->http_request, $curl_easy->strerror($return_code),
                $curl_easy->errbuf, $curl_easy
            );
        }
    }
}

sub _drain_handler_queue {
    my $self = shift;

    while ( $self->_has_handler_in_queue && $self->_active_handlers < $self->parallel_requests ) {
        $self->_activate_handler( $self->_get_handler_from_queue );
    }

    return $self->_active_handlers;
}

sub _activate_handler {
    my $self    = shift;
    my $handler = shift;

    # set up curl easy
    $self->_inc_private_id;
    my $private_id = $self->_max_private_id;
    my $easy       = $handler->request->curl_easy;
    $easy->setopt( CURLOPT_PRIVATE,   $private_id );
    $easy->setopt( CURLOPT_USERAGENT, $self->user_agent_string );

    # reference the handler on its handler id (CURLOPT_PRIVATE)
    $self->_set_active_handler( $private_id => $handler );

    # finally add the curl easy to curl multi
    $self->_curl_multi->add_handle($easy);
}

sub _build_http_response {
    my $self    = shift;
    my $header  = shift;
    my $content = shift;

    # PUT requests may contain continue header
    my @header = split "\r\n\r\n", $header;

    my $response = HTTP::Response->parse($header[-1]);
    $response->content($content) if defined $content;

    # message might include a bad char
    my $message = $response->message;
    $response->message($message)
        if $message =~ s/\r//g;

    return $response;
}

no Moose;
__PACKAGE__->meta->make_immutable;
1;

__END__

=pod

=head1 NAME

WWW::Curl::UserAgent - UserAgent based on libcurl

=head1 VERSION

version 0.9.4

=head1 SYNOPSIS

    use HTTP::Request;
    use WWW::Curl::UserAgent;

    my $ua = WWW::Curl::UserAgent->new(
        timeout         => 10000,
        connect_timeout => 1000,
    );

    $ua->add_request(
        request    => HTTP::Request->new('http://search.cpan.org/'),
        on_success => sub {
            my ( $request, $response ) = @_;
            if ($response->is_success) {
                print $response->content;
            }
            else {
                die $response->status_line;
            }
        },
        on_failure => sub {
            my ( $request, $error_msg, $error_desc ) = @_;
            die "$error_msg: $error_desc";
        },
    );
    $ua->perform;

=head1 DESCRIPTION

C<WWW::Curl::UserAgent> is a web user agent based on libcurl. It can be used
easily with C<HTTP::Request> and C<HTTP::Response> objects and handler
callbacks. For an easier interface there is also a method to map a single
request to a response.

C<WWW::Curl> is used for the power of libcurl, which e.g. handles connection
keep-alive, parallel requests, asynchronous callbacks and much more. This
package was written, because C<WWW::Curl::Simple> does not handle keep-alive
correctly and also does not consider PUT, HEAD and other request methods like
DELETE.

There is a simpler interface too, which just returns a C<HTTP::Response> for a
given C<HTTP::Request>, named request(). The normal approach to use this
library is to add as many requests with callbacks as your code allows to do and
run C<perform> afterwards. Then the callbacks will be excecuted sequentially
when the responses arrive beginning with the first received response. The
simple method request() does not support this of course, because there are no
callbacks defined.

This library is in production use on L<https://www.xing.com>.

=head1 CONSTRUCTOR METHODS

The following constructor methods are available:

=over 4

=item $ua = WWW::Curl::UserAgent->new( %options )

This method constructs a new C<WWW::Curl::UserAgent> object and returns it.
Key/value pair arguments may be provided to set up the initial state.
The default values should be based on the default values of libcurl.
The following options correspond to attribute methods described below:

    KEY                     DEFAULT
    -----------             --------------------
    user_agent_string       www.curl.useragent/$VERSION
    connect_timeout         300
    timeout                 0
    parallel_requests       5
    keep_alive              1
    followlocation          0
    max_redirects           -1

=back

=head1 ATTRIBUTES

=over

=item $ua->connect_timeout / $ua->connect_timeout($connect_timeout)

Get/set the timeout in milliseconds waiting for the response to be received. If the
response is not received within the timeout the on_failure handler is called.

=item $ua->timeout / $ua->timeout($timeout)

Get/set the timeout in milliseconds waiting for the response to be received. If the
response is not received within the timeout the on_failure handler is called.

=item $ua->parallel_requests / $ua->parallel_requests($parallel_requests)

Get/set the number of the maximum of requests performed in parallel. libcurl
itself may use less requests than this number but not more.

=item $ua->keep_alive / $ua->keep_alive($boolean)

Get/set if TCP connections should be reused with keep-alive. Therefor the
TCP connection is forced to be closed after receiving the response and the
corresponding header "Connection: close" is set. If keep-alive is enabled
(default) libcurl will handle the connections.

=item $ua->followlocation / $ua->followlocation($boolean)

Get/set if curl should follow redirects. The headers of the redirect respones
are thrown away while redirecting, so that the final response will be passed
into the corresponding handler.

=item $ua->max_redirects / $ua->max_redirects($max_redirects)

Get/set the maximum amount of redirects. -1 (default) means infinite redirects.
0 means no redirects at all. If the maximum redirect is reached the on_failure
handler will be called.

=item $ua->user_agent_string / $ua->user_agent_string($user_agent)

Get/set the user agent submitted in each request.

=item $ua->request_queue_size

Get the size of the not performed requests.

=item $ua->request( $request, %args )

Perform immediately a single C<HTTP::Request>. Parameters can be submitted
optionally, which will override the user agents settings for this single
request. Possible options are:

    connect_timeout
    timeout
    keep_alive
    followlocation
    max_redirects

Some examples for a request

    my $request = HTTP::Request->new('http://search.cpan.org/');

    $response = $ua->request($request);
    $response = $ua->request($request,
        timeout    => 3000,
        keep_alive => 0,
    );

If there is an error e.g. like a timeout the corresponding C<HTTP::Response>
object will have the statuscode 500, the short error description as message
and a longer message description as content. It runs perform() internally, so
queued requests will be performed, too.

=item $ua->add_request(%args)

Adds a request with some callback handler on receiving messages. The on_success
callback will be called for every successful read response, even those
containing error codes. The on_failure handler will be called when libcurl
reports errors, e.g. timeouts or bad curl settings. The parameters
C<request>, C<on_success> and C<on_failure> are mandatory. Optional are
C<timeout>, C<connect_timeout>, C<keep_alive>, C<followlocation> and
C<max_redirects>.

    $ua->add_request(
        request    => HTTP::Request->new('http://search.cpan.org/'),
        on_success => sub {
            my ( $request, $response, $easy ) = @_;
            print $request->as_string;
            print $response->as_string;
        },
        on_failure => sub {
            my ( $request, $err_msg, $err_desc, $easy ) = @_;
            # error handling
        }
    );

The callbacks provide as last parameter a C<WWW:Curl::Easy> object which was
used to perform the request. This can be used to obtain some informations like
statistical data about the request.

Chaining of C<add_request> calls is a feature of this module. If you add a
request within an C<on_success> handler it will be immediately executed when
the callback is executed. This can be useful to immediately react on a
response:

    $ua->add_request(
        request    => HTTP::Request->new( POST => 'http://search.cpan.org/', [], $form ),
        on_failure => sub { die },
        on_success => sub {
            my ( $request, $response ) = @_;

            my $target_url = get_target_from($response);
            $ua->add_request(
                request    => HTTP::Request->new( GET => $target_url ),
                on_failure => sub { die },
                on_success => sub {
                    my ( $request, $response ) = @_;
                    # actually do sth.
                }
            );
        },
    );
    $ua->perform; # executes both requests

=item $ua->add_handler($handler)

To have more control over the handler you can add a C<WWW::Curl::UserAgent::Handler>
by yourself. The C<WWW::Curl::UserAgent::Request> inside of the handler needs
all parameters provided to libcurl as mandatory to prevent defining duplicates of
default values. Within the C<WWW::Curl::UserAgent::Request> is the possiblity to
modify the C<WWW::Curl::Easy> object before it gets performed.

    my $handler = WWW::Curl::UserAgent::Handler->new(
        on_success => sub {
            my ( $request, $response, $easy ) = @_;
            print $request->as_string;
            print $response->as_string;
        },
        on_failure => sub {
            my ( $request, $err_msg, $err_desc, $easy ) = @_;
            # error handling
        }
        request    => WWW::Curl::UserAgent::Request->new(
            http_request    => HTTP::Request->new('http://search.cpan.org/'),
            connect_timeout => $ua->connect_timeout,
            timeout         => $ua->timeout,
            keep_alive      => $ua->keep_alive,
            followlocation  => $ua->followlocation,
            max_redirects   => $ua->max_redirects,
        ),
    );

    $handler->request->curl_easy->setopt( ... );

    $ua->add_handler($handler);

=item $ua->perform

Perform all queued requests. This method will return after all responses have
been received and handler have been processed.

=back

=head1 SEE ALSO

See L<HTTP::Request> and L<HTTP::Response> for a description of the
message objects dispatched and received.  See L<HTTP::Request::Common>
and L<HTML::Form> for other ways to build request objects.

See L<WWW::Curl> for a description of the settings and options possible
on libcurl.

=head1 AUTHORS

=over 4

=item *

Julian Knocke

=item *

Othello Maurer

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2013 by XING AG.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)