The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#line 1 "inc/File/Fetch.pm - /Users/kane/sources/p4/other/file-fetch/lib/File/Fetch.pm"
package File::Fetch;

use strict;
use FileHandle;
use File::Copy;
use File::Spec 0.82;
use File::Spec::Unix;
use File::Fetch::Item;
use File::Basename              qw[dirname];

use Cwd                         qw[cwd];
use IPC::Cmd                    qw[can_run run];
use File::Path                  qw[mkpath];
use Params::Check               qw[check];
use Module::Load::Conditional   qw[can_load];

use vars    qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
                $BLACKLIST $METHOD_FAIL $VERSION $METHODS
                $FTP_PASSIVE $DEBUG
            ];

$VERSION        = 0.02;
$PREFER_BIN     = 0;        # XXX TODO implement
$FROM_EMAIL     = 'File-Fetch@example.com';
$USER_AGENT     = 'File::Fetch/$VERSION';
$BLACKLIST      = [qw|ftp|];
$METHOD_FAIL    = { };
$FTP_PASSIVE    = 1;
$DEBUG          = 0;

### methods available to fetch the file depending on the scheme
$METHODS = {
    http    => [ qw|lwp wget curl lynx| ],
    ftp     => [ qw|lwp netftp wget curl ncftp ftp| ],
    file    => [ qw|lwp| ],
    #rsync   => [ qw|rsync| ], # XXX TODO
};

### silly warnings ###
local $Params::Check::VERBOSE     = $Params::Check::VERBOSE     = 1;
local $Module::Load::Conditional  = $Module::Load::Conditional  = 0;

#line 88

sub new {
    my $class = shift;
    my %hash  = @_;
    
    my ($uri);
    my $tmpl = {
        uri => { required => 1, store => \$uri },
    };

    check( $tmpl, \%hash ) or return;

    ### parse the uri to usable parts ###
    my $href    = __PACKAGE__->_parse_uri( $uri ) or return;
    
    ### make it into a FFI object ###
    my $ffi     = File::Fetch::Item->new( %$href ) or return;


    ### return the object ###
    return $ffi;
}

### parses an uri to a hash structure:
###
### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
###
### becomes:
###
### $href = {
###     scheme  => 'ftp',
###     host    => 'ftp.cpan.org',
###     path    => '/pub/mirror',
###     file    => 'index.html'
### };
###
sub _parse_uri {
    my $self = shift;
    my $uri  = shift or return;
    
    my $href = { uri => $uri };
    
    ### find the scheme ###
    $uri            =~ s|^(\w+)://||;
    $href->{scheme} = $1;
  
    ### file:// paths have no host ###
    if( $href->{scheme} eq 'file' ) {
        $href->{path} = $uri;
        $href->{host} = '';
    
    } else {
        @{$href}{qw|host path|} = $uri =~ m|([^/]*)(/.*)$|s;
    }
 
    ### split the path into file + dir ###
    {   my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
        $href->{path} = $parts[1];
        $href->{file} = $parts[2];
    }       
 
 
    return $href;
}

#line 162

sub fetch {
    my $self = shift or return;
    my %hash = @_;
    
    my $to;
    my $tmpl = {
        to  => { default => cwd(), store => \$to },       
    };
    
    check( $tmpl, \%hash ) or return;
    
    ### create the path if it doesn't exist yet ###
    unless( -d $to ) {             
        eval { mkpath( $to ) };
        if( $@ ) {
            warn "Could not create path '$to'\n";
            return;
        }
    }               
   
    ### set passive ftp if required ###
    local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;

    ### 
    for my $method ( @{ $METHODS->{$self->scheme} } ) {
        my $sub =  '_'.$method.'_fetch';
        
        unless( __PACKAGE__->can($sub) ) {
            warn "Can not call method for '$method' -- WEIRD!\n";
            next;
        }

        ### method is blacklisted ###
        next if grep { lc $_ eq $method } @$BLACKLIST;

        ### method is known to fail ###
        next if $METHOD_FAIL->{$method};
             
        if(my $file = $self->$sub(to=>File::Spec->catfile($to,$self->file))){
            
            unless( -e $file && -s _ ) {
                warn "'$method' said it fetched '$file', ".
                     "but it was not created\n";

                ### mark the failure ###
                $METHOD_FAIL->{$method} = 1;
            
                next;
            
            } else {
            
                my $abs = File::Spec->rel2abs( $file );
                return $abs;
            }
        } 
    }
    
    
    ### if we got here, we looped over all methods, but we weren't able 
    ### to fetch it.
    return;
}        

#line 256

########################
### _*_fetch methods ###
########################

### LWP fetching ###
sub _lwp_fetch {
    my $self = shift;
    my %hash = @_;
    
    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };     
    check( $tmpl, \%hash ) or return;

    ### modules required to download with lwp ###
    my $use_list = {
        LWP                 => '0.0',
        'LWP::UserAgent'    => '0.0',
        'HTTP::Request'     => '0.0',
        'HTTP::Status'      => '0.0',
        URI                 => '0.0',

    };

    if( can_load(modules => $use_list) ) {
      
        ### setup the uri object
        my $uri = URI->new( File::Spec::Unix->catfile(
                                    $self->path, $self->file
                        ) );

        ### special rules apply for file:// uris ###
        $uri->scheme( $self->scheme );
        $uri->host( $self->scheme eq 'file' ? '' : $self->host );                        
        $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';

        ### set up the useragent object 
        my $ua = LWP::UserAgent->new();
        $ua->agent( $USER_AGENT );
        $ua->from( $FROM_EMAIL );
        $ua->env_proxy;
    
        my $res = $ua->mirror($uri, $to) or return;

        ### uptodate or fetched ok ###
        if ( $res->code == 304 or $res->code == 200 ) {
            return $to;
        
        } else {
            warn "Fetch failed! HTTP response code: '". $res->code ."' [".
            HTTP::Status::status_message($res->code). "]\n";     
            return;            
        }
    
    } else {       
        $METHOD_FAIL->{'lwp'} = 1;
        return;
    }
}          

### Net::FTP fetching    
sub _netftp_fetch {
    my $self = shift;
    my %hash = @_;
    
    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };     
    check( $tmpl, \%hash ) or return;
    
    ### required modules ###
    my $use_list = { 'Net::FTP' => 0 };
                
    if( can_load( modules => $use_list ) ) {

        ### make connection ###    
        my $ftp;
        unless( $ftp = Net::FTP->new( $self->host ) ) {
            warn "Ftp creation failed: $@";
            return;
        }
        
        ### login ###
        unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
            warn "Could not login to '".$self->host."'\n";
            return;
        }
        
        ### set binary mode, just in case ###
        $ftp->binary;
        
        ### create the remote path ###
        my $remote = File::Spec->catfile( $self->path, $self->file );
    
        ### fetch the file ###
        my $target;
        unless( $target = $ftp->get( $remote, $to ) ) {
            warn "Could not fetch '$remote' from '".$self->host."'\n";
            return;
        }
        
        ### log out ###
        $ftp->quit;
        
        return $target;
        
    } else {       
        $METHOD_FAIL->{'netftp'} = 1;
        return;
    }    
}    

### /bin/wget fetch ###
sub _wget_fetch {    
    my $self = shift;
    my %hash = @_;
    
    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };     
    check( $tmpl, \%hash ) or return;
    
    ### see if we have a wget binary ###
    if( my $wget = can_run('wget') ) {
        
        ### no verboseness, thanks ###
        my $cmd = [ $wget, '--quiet' ];
        
        ### run passsive if specified ###
        push @$cmd, '--passive-ftp' if $FTP_PASSIVE; 
        
        ### set the output document, add the uri ###
        push @$cmd, '--output-document', $to, $self->uri;                      

        ### shell out ###
        my $captured;
        unless( run( command => $cmd, buffer => \$captured, verbose => 0 ) ) {
            warn "Command failed: $captured";
            return;
        } 
    
        return $to;
    
    } else {
        $METHOD_FAIL->{'wget'} = 1;
        return;
    }            
}    


### /bin/ftp fetch ###
sub _ftp_fetch {    
    my $self = shift;
    my %hash = @_;
    
    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };     
    check( $tmpl, \%hash ) or return;
    
    ### see if we have a wget binary ###
    if( my $ftp = can_run('ftp') ) {
        
        my $fh = FileHandle->new;

        local $SIG{CHLD} = 'IGNORE';
        
        unless ($fh->open("|$ftp -n")) {
            warn "/bin/ftp creation failed: $!\n";
            return;
        }

        my @dialog = (
            "lcd " . dirname($to),
            "open " . $self->host,
            "user anonymous $FROM_EMAIL",
            "cd /",
            "cd " . $self->path,
            "binary",
            "get " . $self->file . " " . $self->file,
            "quit",
        );

        foreach (@dialog) { $fh->print($_, "\n") }
        $fh->close;

        return $to;
    }
}

### lynx is stupid - it decompresses any .gz file it finds to be text
### use /bin/lynx to fetch files
sub _lynx_fetch {
    my $self = shift;
    my %hash = @_;
    
    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };     
    check( $tmpl, \%hash ) or return;
    
    ### see if we have a wget binary ###
    if( my $lynx = can_run('lynx') ) {
        
        
        ### write to the output file ourselves, since lynx ass_u_mes to much 
        my $local = FileHandle->new(">$to") 
                        or (
                            warn ("Could not open '$to' for writing: $!\n"), 
                            return 
                        );
        
        ### dump to stdout ###
        my $cmd = [
            $lynx,
            '-source',
            "-auth=anonymous:$FROM_EMAIL",
            $self->uri
        ];
        
        ### shell out ###
        my $captured;
        unless(run( command => $cmd, 
                    buffer  => \$captured, 
                    verbose => $DEBUG ) 
        ) {
            warn "Command failed: $captured";
            return;
        } 
    
        ### print to local file ###
        $local->print( $captured );
        $local->close or return;
    
        return $to;
    
    } else {
        $METHOD_FAIL->{'lynx'} = 1;
        return;
    }            
}    

### use /bin/ncftp to fetch files
sub _ncftp_fetch {
    my $self = shift;
    my %hash = @_;
    
    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };     
    check( $tmpl, \%hash ) or return;
    
    ### we can only set passive mode in interactive sesssions, so bail out
    ### if $FTP_PASSIVE is set
    return if $FTP_PASSIVE;
    
    ### see if we have a wget binary ###
    if( my $ncftp = can_run('ncftp') ) {
        
        my $cmd = [
            $ncftp,
            '-V',                   # do not be verbose
            '-p', $FROM_EMAIL,      # email as password
            $self->host,            # hostname
            dirname($to),           # local dir for the file
                                    # remote path to the file
            File::Spec::Unix->catdir( $self->path, $self->file ),
        ];
        
        ### shell out ###
        my $captured;
        unless(run( command => $cmd, 
                    buffer  => \$captured, 
                    verbose => $DEBUG ) 
        ) {
            warn "Command failed: $captured";
            return;
        } 
    
        return $to;
    
    } else {
        $METHOD_FAIL->{'ncftp'} = 1;
        return;
    }            
}    

### use /bin/curl to fetch files
sub _curl_fetch {
    my $self = shift;
    my %hash = @_;
    
    my ($to);
    my $tmpl = {
        to  => { required => 1, store => \$to }
    };     
    check( $tmpl, \%hash ) or return;
    
    if (my $curl = can_run('curl')) {

        ### these long opts are self explanatory - I like that -jmb
	    my $cmd = [ $curl ];

	    push(@$cmd, '--silent') unless $DEBUG;

        ### curl does the right thing with passive, regardless ###
    	if ($self->scheme eq 'ftp') {
    		push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
    	}

        push @$cmd, '--fail', '--output', $to, $self->uri;

        my $captured;
        unless(run( command => $cmd,
                    buffer  => \$captured,
                    verbose => $DEBUG ) 
        ) {
        
            warn "command failed: $captured";
            return;
        }

        return $to;

    } else {
        $METHOD_FAIL->{'curl'} = 1;
        return;
    }
}


### use File::Copy for fetching file:// urls ###
### XXX file:// uri to local path conversion is just too weird...
### depend on LWP to do it for us
# sub _file_fetch {
#     my $self = shift;
#     my %hash = @_;
#     
#     my ($to);
#     my $tmpl = {
#         to  => { required => 1, store => \$to }
#     };     
#     check( $tmpl, \%hash ) or return;
#     
#     my $remote = File::Spec->catfile( $self->path, $self->file );
#     
#     ### File::Copy is littered with 'die' statements :( ###
#     my $rv = eval { File::Copy::copy( $remote, $to ) };
#     
#     ### something went wrong ###
#     if( !$rv or $@ ) {
#         warn "Could not copy '$remote' to '$to': $! $@";
#         return;
#     }
#     
#     return $to;
# }

1;

#line 778

# Local variables:
# c-indentation-style: bsd
# c-basic-offset: 4
# indent-tabs-mode: nil
# End:
# vim: expandtab shiftwidth=4: