The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Plagger::Plugin::Filter::FetchEnclosure::ParallelUA;
use strict;
use base qw(Plagger::Plugin::Filter::FetchEnclosure);

use LWP::Parallel::UserAgent;
use HTTP::Request;
use Plagger::Cookies;

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'update.entry.fixup' => \&enqueue,
        'update.fixup'       => \&fetch,
        'plugin.init'        => \&plugin_init,
    );
}

sub plugin_init {
    my $self = shift;
    $self->{ua} = LWP::Parallel::UserAgent->new;
    $self->{ua}->max_hosts( $self->conf->{concurrency} || 10 );
    $self->{ua}->max_req( $self->conf->{max_requests_per_host} || 2 );

    my $conf = Plagger->context->conf->{user_agent};
    if ($conf->{cookies}) {
        $self->{ua}->cookie_jar( Plagger::Cookies->create($conf->{cookies}) );
    }
}

sub enqueue {
    my($self, $context, $args) = @_;

    for my $enclosure ($args->{entry}->enclosures) {
        # TODO: do all of this in the base class ::Command
        my $feed_dir = File::Spec->catfile($self->conf->{dir}, $args->{feed}->id_safe);
        unless (-e $feed_dir && -d _) {
            $context->log(info => "mkdir $feed_dir");
            mkdir $feed_dir, 0777;
        }

        my $path = File::Spec->catfile($feed_dir, $enclosure->filename);

        if ($enclosure->length && -e $path && -s _ == $enclosure->length) {
            # TODO: if-none-match
            $context->log(debug => $enclosure->url . "is already stored in $path");
            next;
        }

        $context->log(info => "fetch " . $enclosure->url . " to " . $path);

        my $req = HTTP::Request->new(GET => $enclosure->url);

        if ($self->conf->{fake_referer}) {
            $context->log(debug => "Sending Referer: " . $args->{entry}->permalink);
            $req->header('Referer' => $args->{entry}->permalink);
        }

        $self->{ua}->register($req, $path);
        $self->{callback}->{$enclosure->url} = sub {
            my $response = shift;

            if ($response->code =~ /^[23]/) {
                if (my $length = $response->header('Content-Length')) {
                    $enclosure->length($length);
                    $enclosure->local_path($path);
                }
            } else {
                # xxx
            }
        };
    }
}

sub fetch {
    my($self, $context) = @_;

    $context->log(debug => "wait for responses from Parallel UA ...");
    my $entries = $self->{ua}->wait;

    for my $entry (values %$entries) {
        my $response = $entry->response;

        if (my $cb = $self->{callback}->{$response->request->url}) {
            $cb->($response);
        }
    }
}

1;

__END__

=head1 NAME

Plagger::Plugin::Filter::FetchEnclosure::ParallelUA - Fetch enclosures using Parallel UA

=head1 SYNOPSIS

  - module: Filter::FetchEnclosure::ParallelUA
    config:
      dir: /path/to/download
      concurrency: 5
      max_requests_per_host: 2

=head1 DESCRIPTION

This plugin uses LWP::Parallel UA to download enclosures from multiple hosts in parallel.

=head1 AUTHOR

Tatsuhiko Miyagawa

=head1 SEE ALSO

L<Plagger>, L<LWP::Parallel::UserAgent>

=cut