#line 1 "inc/File/Fetch.pm - /Users/kane/sources/p4/other/file-fetch/lib/File/Fetch.pm"
package File::Fetch;
use strict;
use FileHandle;
use File::Copy;
use File::Spec 0.82;
use File::Spec::Unix;
use File::Fetch::Item;
use File::Basename qw[dirname];
use Cwd qw[cwd];
use IPC::Cmd qw[can_run run];
use File::Path qw[mkpath];
use Params::Check qw[check];
use Module::Load::Conditional qw[can_load];
use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
$BLACKLIST $METHOD_FAIL $VERSION $METHODS
$FTP_PASSIVE $DEBUG
];
$VERSION = 0.02;
$PREFER_BIN = 0; # XXX TODO implement
$FROM_EMAIL = 'File-Fetch@example.com';
$USER_AGENT = 'File::Fetch/$VERSION';
$BLACKLIST = [qw|ftp|];
$METHOD_FAIL = { };
$FTP_PASSIVE = 1;
$DEBUG = 0;
### methods available to fetch the file depending on the scheme
$METHODS = {
http => [ qw|lwp wget curl lynx| ],
ftp => [ qw|lwp netftp wget curl ncftp ftp| ],
file => [ qw|lwp| ],
#rsync => [ qw|rsync| ], # XXX TODO
};
### silly warnings ###
local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
local $Module::Load::Conditional = $Module::Load::Conditional = 0;
#line 88
sub new {
my $class = shift;
my %hash = @_;
my ($uri);
my $tmpl = {
uri => { required => 1, store => \$uri },
};
check( $tmpl, \%hash ) or return;
### parse the uri to usable parts ###
my $href = __PACKAGE__->_parse_uri( $uri ) or return;
### make it into a FFI object ###
my $ffi = File::Fetch::Item->new( %$href ) or return;
### return the object ###
return $ffi;
}
### parses an uri to a hash structure:
###
### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
###
### becomes:
###
### $href = {
### scheme => 'ftp',
### host => 'ftp.cpan.org',
### path => '/pub/mirror',
### file => 'index.html'
### };
###
sub _parse_uri {
my $self = shift;
my $uri = shift or return;
my $href = { uri => $uri };
### find the scheme ###
$uri =~ s|^(\w+)://||;
$href->{scheme} = $1;
### file:// paths have no host ###
if( $href->{scheme} eq 'file' ) {
$href->{path} = $uri;
$href->{host} = '';
} else {
@{$href}{qw|host path|} = $uri =~ m|([^/]*)(/.*)$|s;
}
### split the path into file + dir ###
{ my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
$href->{path} = $parts[1];
$href->{file} = $parts[2];
}
return $href;
}
#line 162
sub fetch {
my $self = shift or return;
my %hash = @_;
my $to;
my $tmpl = {
to => { default => cwd(), store => \$to },
};
check( $tmpl, \%hash ) or return;
### create the path if it doesn't exist yet ###
unless( -d $to ) {
eval { mkpath( $to ) };
if( $@ ) {
warn "Could not create path '$to'\n";
return;
}
}
### set passive ftp if required ###
local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
###
for my $method ( @{ $METHODS->{$self->scheme} } ) {
my $sub = '_'.$method.'_fetch';
unless( __PACKAGE__->can($sub) ) {
warn "Can not call method for '$method' -- WEIRD!\n";
next;
}
### method is blacklisted ###
next if grep { lc $_ eq $method } @$BLACKLIST;
### method is known to fail ###
next if $METHOD_FAIL->{$method};
if(my $file = $self->$sub(to=>File::Spec->catfile($to,$self->file))){
unless( -e $file && -s _ ) {
warn "'$method' said it fetched '$file', ".
"but it was not created\n";
### mark the failure ###
$METHOD_FAIL->{$method} = 1;
next;
} else {
my $abs = File::Spec->rel2abs( $file );
return $abs;
}
}
}
### if we got here, we looped over all methods, but we weren't able
### to fetch it.
return;
}
#line 256
########################
### _*_fetch methods ###
########################
### LWP fetching ###
sub _lwp_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### modules required to download with lwp ###
my $use_list = {
LWP => '0.0',
'LWP::UserAgent' => '0.0',
'HTTP::Request' => '0.0',
'HTTP::Status' => '0.0',
URI => '0.0',
};
if( can_load(modules => $use_list) ) {
### setup the uri object
my $uri = URI->new( File::Spec::Unix->catfile(
$self->path, $self->file
) );
### special rules apply for file:// uris ###
$uri->scheme( $self->scheme );
$uri->host( $self->scheme eq 'file' ? '' : $self->host );
$uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
### set up the useragent object
my $ua = LWP::UserAgent->new();
$ua->agent( $USER_AGENT );
$ua->from( $FROM_EMAIL );
$ua->env_proxy;
my $res = $ua->mirror($uri, $to) or return;
### uptodate or fetched ok ###
if ( $res->code == 304 or $res->code == 200 ) {
return $to;
} else {
warn "Fetch failed! HTTP response code: '". $res->code ."' [".
HTTP::Status::status_message($res->code). "]\n";
return;
}
} else {
$METHOD_FAIL->{'lwp'} = 1;
return;
}
}
### Net::FTP fetching
sub _netftp_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### required modules ###
my $use_list = { 'Net::FTP' => 0 };
if( can_load( modules => $use_list ) ) {
### make connection ###
my $ftp;
unless( $ftp = Net::FTP->new( $self->host ) ) {
warn "Ftp creation failed: $@";
return;
}
### login ###
unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
warn "Could not login to '".$self->host."'\n";
return;
}
### set binary mode, just in case ###
$ftp->binary;
### create the remote path ###
my $remote = File::Spec->catfile( $self->path, $self->file );
### fetch the file ###
my $target;
unless( $target = $ftp->get( $remote, $to ) ) {
warn "Could not fetch '$remote' from '".$self->host."'\n";
return;
}
### log out ###
$ftp->quit;
return $target;
} else {
$METHOD_FAIL->{'netftp'} = 1;
return;
}
}
### /bin/wget fetch ###
sub _wget_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### see if we have a wget binary ###
if( my $wget = can_run('wget') ) {
### no verboseness, thanks ###
my $cmd = [ $wget, '--quiet' ];
### run passsive if specified ###
push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
### set the output document, add the uri ###
push @$cmd, '--output-document', $to, $self->uri;
### shell out ###
my $captured;
unless( run( command => $cmd, buffer => \$captured, verbose => 0 ) ) {
warn "Command failed: $captured";
return;
}
return $to;
} else {
$METHOD_FAIL->{'wget'} = 1;
return;
}
}
### /bin/ftp fetch ###
sub _ftp_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### see if we have a wget binary ###
if( my $ftp = can_run('ftp') ) {
my $fh = FileHandle->new;
local $SIG{CHLD} = 'IGNORE';
unless ($fh->open("|$ftp -n")) {
warn "/bin/ftp creation failed: $!\n";
return;
}
my @dialog = (
"lcd " . dirname($to),
"open " . $self->host,
"user anonymous $FROM_EMAIL",
"cd /",
"cd " . $self->path,
"binary",
"get " . $self->file . " " . $self->file,
"quit",
);
foreach (@dialog) { $fh->print($_, "\n") }
$fh->close;
return $to;
}
}
### lynx is stupid - it decompresses any .gz file it finds to be text
### use /bin/lynx to fetch files
sub _lynx_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### see if we have a wget binary ###
if( my $lynx = can_run('lynx') ) {
### write to the output file ourselves, since lynx ass_u_mes to much
my $local = FileHandle->new(">$to")
or (
warn ("Could not open '$to' for writing: $!\n"),
return
);
### dump to stdout ###
my $cmd = [
$lynx,
'-source',
"-auth=anonymous:$FROM_EMAIL",
$self->uri
];
### shell out ###
my $captured;
unless(run( command => $cmd,
buffer => \$captured,
verbose => $DEBUG )
) {
warn "Command failed: $captured";
return;
}
### print to local file ###
$local->print( $captured );
$local->close or return;
return $to;
} else {
$METHOD_FAIL->{'lynx'} = 1;
return;
}
}
### use /bin/ncftp to fetch files
sub _ncftp_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
### we can only set passive mode in interactive sesssions, so bail out
### if $FTP_PASSIVE is set
return if $FTP_PASSIVE;
### see if we have a wget binary ###
if( my $ncftp = can_run('ncftp') ) {
my $cmd = [
$ncftp,
'-V', # do not be verbose
'-p', $FROM_EMAIL, # email as password
$self->host, # hostname
dirname($to), # local dir for the file
# remote path to the file
File::Spec::Unix->catdir( $self->path, $self->file ),
];
### shell out ###
my $captured;
unless(run( command => $cmd,
buffer => \$captured,
verbose => $DEBUG )
) {
warn "Command failed: $captured";
return;
}
return $to;
} else {
$METHOD_FAIL->{'ncftp'} = 1;
return;
}
}
### use /bin/curl to fetch files
sub _curl_fetch {
my $self = shift;
my %hash = @_;
my ($to);
my $tmpl = {
to => { required => 1, store => \$to }
};
check( $tmpl, \%hash ) or return;
if (my $curl = can_run('curl')) {
### these long opts are self explanatory - I like that -jmb
my $cmd = [ $curl ];
push(@$cmd, '--silent') unless $DEBUG;
### curl does the right thing with passive, regardless ###
if ($self->scheme eq 'ftp') {
push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
}
push @$cmd, '--fail', '--output', $to, $self->uri;
my $captured;
unless(run( command => $cmd,
buffer => \$captured,
verbose => $DEBUG )
) {
warn "command failed: $captured";
return;
}
return $to;
} else {
$METHOD_FAIL->{'curl'} = 1;
return;
}
}
### use File::Copy for fetching file:// urls ###
### XXX file:// uri to local path conversion is just too weird...
### depend on LWP to do it for us
# sub _file_fetch {
# my $self = shift;
# my %hash = @_;
#
# my ($to);
# my $tmpl = {
# to => { required => 1, store => \$to }
# };
# check( $tmpl, \%hash ) or return;
#
# my $remote = File::Spec->catfile( $self->path, $self->file );
#
# ### File::Copy is littered with 'die' statements :( ###
# my $rv = eval { File::Copy::copy( $remote, $to ) };
#
# ### something went wrong ###
# if( !$rv or $@ ) {
# warn "Could not copy '$remote' to '$to': $! $@";
# return;
# }
#
# return $to;
# }
1;
#line 778
# Local variables:
# c-indentation-style: bsd
# c-basic-offset: 4
# indent-tabs-mode: nil
# End:
# vim: expandtab shiftwidth=4: