#!/usr/bin/env perl
# ABSTRACT: Yet Another Download Accelerator CLI wrapper
# PODNAME: yada
use strict;
use utf8;
use warnings qw(all);
use Config;
use Fcntl;
use File::Copy;
use Getopt::Long;
use Pod::Usage;
use YADA;
our $VERSION = '0.047'; # VERSION
GetOptions(
q(help) => \my $help,
q(dups!) => \my $dups,
q(encoding=s) => \my $encoding,
q(max=i) => \my $max,
q(maxredirs=i) => \my $maxredirs,
q(proxy=s) => \my $proxy,
q(quiet!) => \my $quiet,
q(referer=s) => \my $referer,
q(tcp_nodelay) => \my $tcp_nodelay,
q(timeout=i) => \my $timeout,
q(useragent=s) => \my $useragent,
q(verbose) => \my $verbose,
) or pod2usage(q(-verbose) => 1);
pod2usage(q(-verbose) => 1) if $help;
my @urls = <>;
chomp @urls;
local $| = 1;
my $q = YADA->new({
allow_dups => $dups // 1,
max => $max // 4,
timeout => $timeout // 600,
});
for my $url (@urls) {
my ($fh, $name);
$q->append(
$url,
sub {
my ($self) = @_;
$name = get_filename($self->initial_url);
# will die() later
sysopen($fh, $name, O_CREAT | O_NONBLOCK | O_WRONLY);
binmode $fh;
$self->setopt(
encoding => $encoding // q(),
maxredirs => $maxredirs // 5,
noprogress => $quiet,
proxy => $proxy,
referer => $referer,
tcp_nodelay => $tcp_nodelay // 0,
useragent => $useragent // qq(yada/$VERSION ($Config{archname}; Perl/$Config{version}) @{[ Net::Curl::version() ]}),
verbose => $verbose,
autoreferer => 1,
ssl_verifyhost => 0,
ssl_verifypeer => 0,
unrestricted_auth => 1,
writedata => $fh,
);
},
sub {
my ($self) = @_;
close $fh;
## no critic (ProhibitComplexRegexes)
#if ($self->has_error or $self->getinfo(q(response_code)) =~ m{^5[0-9]{2}$}) {
if ($self->has_error) {
unlink $name;
} elsif (${$self->header} =~ m{\bContent-Disposition:\s*attachment;\s*filename=("?[\w\.\-]+"?);?}isx) {
my $filename = $1;
$filename =~ s/^"//x;
$filename =~ s/"$//x;
move($name, $name . q(.tmp));
my $new_name = get_filename(URI->new(q(file:///) . $filename));
move($name . q(.tmp), $new_name);
} elsif ($self->final_url ne $self->initial_url) {
move($name, $name . q(.tmp));
my $new_name = get_filename($self->final_url);
move($name . q(.tmp), $new_name);
}
},
);
}
$q->wait;
sub get_filename {
my $url = shift->clone->canonical;
my $orig = ($url->path_segments)[-1] || q(index.html);
$orig .= q(?) . $url->query
if $url->query;
$orig =~ s{[^\w\.\-]}{_}gsx;
my $i = 1;
my $name = $orig;
for (;;) {
-e $name
? $name = $orig . q(.) . $i++
: last;
}
return $name;
}
__END__
=pod
=encoding UTF-8
=head1 NAME
yada - Yet Another Download Accelerator CLI wrapper
=head1 VERSION
version 0.047
=head1 SYNOPSIS
yada [options] URLs_file
yada [options] < URLs_file
=head1 WARNING: GONE MOO!
This module isn't using L<Any::Moose> anymore due to the announced deprecation status of that module.
The switch to the L<Moo> is known to break modules that do C<extend 'AnyEvent::Net::Curl::Queued::Easy'> / C<extend 'YADA::Worker'>!
To keep the compatibility, make sure that you are using L<MooseX::NonMoose>:
package YourSubclassingModule;
use Moose;
use MooseX::NonMoose;
extends 'AnyEvent::Net::Curl::Queued::Easy';
...
Or L<MouseX::NonMoose>:
package YourSubclassingModule;
use Mouse;
use MouseX::NonMoose;
extends 'AnyEvent::Net::Curl::Queued::Easy';
...
Or the L<Any::Moose> equivalent:
package YourSubclassingModule;
use Any::Moose;
use Any::Moose qw(X::NonMoose);
extends 'AnyEvent::Net::Curl::Queued::Easy';
...
However, the recommended approach is to switch your subclassing module to L<Moo> altogether (you can use L<MooX::late> to smoothen the transition):
package YourSubclassingModule;
use Moo;
use MooX::late;
extends 'AnyEvent::Net::Curl::Queued::Easy';
...
=head1 DESCRIPTION
Employs L<AnyEvent::Net::Curl::Queued> to download a set of URLs in batch mode (parallelizing connections).
Downloading the same file in the same directory will result in the original copy of file being preserved and the second copy being named F<file.1>.
If that file is downloaded yet again, the third copy will be named F<file.2>, and so on.
(behavior/description copied from L<wget>)
=head1 OPTIONS
=over 4
=item --help
This.
=item --[no]dups
Allow repeated requests to the same URLs (default: true).
=item --encoding
Sets the contents of the C<Accept-Encoding:> header sent in a HTTP request, and enables decoding of a response when a C<Content-Encoding:> header is received.
Three encodings are supported: I<identity>, which does nothing, I<deflate> which requests the server to compress its response using the L<zlib> algorithm, and I<gzip> which requests the L<gzip> algorithm.
If a zero-length string is set, then an C<Accept-Encoding:> header containing all supported encodings is sent (default).
=item --max
Download that many files in parallel (default: 4).
=item --maxredirs
Redirection limit (default: 5).
If that many redirections have been followed, the next redirect will cause an error.
=item --proxy
Set HTTP proxy to use.
The proxy string may be specified with a C<protocol://> prefix to specify alternative proxy protocols.
Use C<socks4://>, C<socks4a://>, C<socks5://> or C<socks5h://> (the last one to enable socks5 and asking the proxy to do the resolving) to request the specific SOCKS version to be used.
No protocol specified, C<http://> and all others will be treated as HTTP proxies.
=item --[no]quiet
Turn off progress output.
=item --referer
Used to set the C<Referer:> header in the http request sent to the remote server.
=item --tcp_nodelay
Setting this option will disable TCP's Nagle algorithm.
The purpose of this algorithm is to try to minimize the number of small packets on the network
(where "small packets" means TCP segments less than the Maximum Segment Size (MSS) for the network).
=item --timeout
The maximum time in seconds that you allow the transfer operation to take (default: 600 seconds).
=item --useragent
Used to set the C<User-Agent:> header in the http request sent to the remote server.
=item --verbose
Set the parameter to 1 to get the utility to display a lot of verbose information about its operations.
Very useful for L<libcurl> and/or protocol debugging and understanding.
The verbose information will be sent to C<STDERR>.
=back
=head1 SEE ALSO
=over 4
=item *
L<AnyEvent::Net::Curl::Queued>
=item *
L<curl>
=back
=head1 AUTHOR
Stanislaw Pusep <stas@sysd.org>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2014 by Stanislaw Pusep.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut