The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package URL::Grab;

use 5.008;
use strict;
use warnings;
require Carp;
require LWP::UserAgent;

use Carp qw/carp/;
use LWP::UserAgent;

require Exporter;

our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw() ] );

our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );

our @EXPORT = qw();

(our $VERSION) = '$Revision: 1.4 $' =~ /([\d.]+)/;

sub new {
	my $class = shift;
	my $args = shift;
	my $self = { };

	$self->{retries} = $args->{retries} || 0;
	$self->{ua} = LWP::UserAgent->new(agent => "URL::Grab $VERSION");
	$self->{ua}->{timeout} = $args->{timeout} || 30;
	
	bless($self, $class);
	return $self;
}

sub grab_single {
	my $self = shift;
	my $url = shift;

	my $retval;
	if($url =~ /^https?:\/\// || $url =~ /^ftp:\/\//) {
		my $res = $self->{ua}->get($url);
		my $retries = 0;
		while($self->{retries} >= $retries) {
			if($res->is_success()) {
				return { $url => $res->content() };
			}
			$retries++;
		}
	} else {
		if($url =~ /^file:\/\// || $url =~ /^\//) {
			my $tmp_url = $url;
			$tmp_url =~ s/^file://;
			if(-f $tmp_url) {
				my $res;
				open(FH, $tmp_url);
				$res .= $_ while(<FH>);
				close(FH);
				return { $url => $res };
			} else {
				carp "No such file or directory";
			}
		} else {
			carp "Unknown transport protocol";
			return undef;
		}
	}
	return undef;
}

sub grab {
	my $self = shift;
	my @urls;
	while(my $arg = shift) {
		if(ref $arg eq 'ARRAY') { push @urls, $_ foreach(@{$arg});
		} else { push @urls, $arg; }
	}
	$self->{retval}->{$_} = $self->grab_single($_) foreach (@urls);
	return $self->{retval};
}

sub grab_failover {
	my $self = shift;
	my @urls;

	while (my $arg = shift) {
		push @urls, $arg;
	}

	foreach my $url (@urls) {
		my $content = $self->grab_single($url);
		return $content if $content;
	}
}

sub grab_mirrorlist {
	my $self = shift;
	my @urls;
	while(my $arg = shift) {
		push @urls, $arg;
	}
	foreach my $mirror (@urls) {
		if(ref $mirror eq 'SCALAR' || ref \$mirror eq 'SCALAR') {
			$self->{retval} = $self->grab_single($mirror)
		}
		$self->{retval} = $self->grab_failover(@{$mirror}) if ref $mirror eq 'ARRAY';
	}
	return $self->{retval};
}

1;
__END__
=head1 NAME

URL::Grab - Perl extension for blah blah blah

=head1 SYNOPSIS

  use URL::Grab;
  my $cnt_hsh;

  # IMPORTANT note (see also #32434):
  # Please note, that URL::Grab doesn't return you the content itself as a
  # scalar, but instead, returns a hash-reference. The keys of the
  # hash are the URLs.

  $cnt_hsh = $urlgrabber->grab('http://google.at');

  # The content then is available in $cnt_hsh->{'http://google.at'}->{'http://google.at'}.
  # Sorry, this is a design issue, that cannot be changed any more :-)
  # If you are fetching only one URL, you would better use grab_single!

  $cnt_hsh = $urlgrabber->grab(qw(http://google.at));
  $cnt_hsh = $urlgrabber->grab([ qw(http://google.at http://asdf.org) ]);
  $cnt_hsh = $urlgrabber->grab([ qw(http://google.at http://asdf.org) ], 'http://perl.com');

  $cnt_hsh = $urlgrabber->grab_mirrorlist(
    'http://linux.duke.edu/projects/yum/',
    [qw(http://www.netfilter.org http://www.at.netfilter.org)]
  );

  # Please note, the following example will return only *one* hash-reference - it will use the
  # first that works!!!
  $cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
     http://www.netfilter.org http://www.at.netfilter.org
  )]);

  $cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
    ftp://linux-kernel.at/packages/yum.conf2
    http://filelister.linux-kernel.at/downloads/packages/yum.conf
  )]);

  $cnt_hsh = $urlgrabber->grab_mirrorlist(
    'ftp://linux-kernel.at/packages/yum.conf'
  );

  $cnt_hsh = $urlgrabber->grab_mirrorlist([qw(
    /etc/yum.conf
    ftp://linux-kernel.at/packages/yum.conf
  )]);

=head1 DESCRIPTION

URL::Grab is a perl module that drastically simplifies the fetching of files
from within a local source (eg. local filesystem) and/or remote sources
(eg. http, ftp). It is designed to be used in programs that need common (but
not necessarily simple) url-fetching features. It is extremely simple to drop
into an existing program and provides a clean interface to protocol-independant
file-access. Best of all, URL::Grab takes care of all those pesky
file-fetching details, and lets you focus on whatever it is that your program
is written to do!

=head2 EXPORT

None by default.

=head1 SEE ALSO

LWP::UserAgent

Project mailinglist:
	http://lists.linux-kernel.at/wwsympa.fcgi/info/url-grab

Project website:
	http://projects.linux-kernel.at/URL-Grab/

=head1 AUTHOR

Oliver Falk, E<lt>oliver@linux-kernel.atE<gt>

=head1 THANKS

Gary Krueger E<lt>gkrueger@browsermedia.comE<gt> for pointing out some issues
 - #32434
 - #32433

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006-2008 by Oliver Falk E<lt>oliver@linux-kernel.atE<gt>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.0 or,
at your option, any later version of Perl 5 you may have available.

=cut