The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
############################################################
#
#   $Id: VenusEnvy.pm,v 1.10 2006/01/28 13:17:40 nicolaw Exp $
#   WWW::VenusEnvy - Retrieve VenusEnvy comic strip images
#
#   Copyright 2005,2006 Nicola Worthington
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#
############################################################

package WWW::VenusEnvy;
# vim:ts=4:sw=4:tw=78

use strict;
use Exporter;
use LWP::UserAgent qw();
use HTTP::Request qw();
use Carp qw(carp croak);
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

$VERSION     = '1.10' || sprintf('%d.%02d', q$Revision$ =~ /(\d+)/g);
@ISA         = qw(Exporter);
@EXPORT      = ();
@EXPORT_OK   = qw(&get_strip &strip_url &mirror_strip);
%EXPORT_TAGS = ( all => \@EXPORT_OK );

sub mirror_strip {
	my $filename = shift;
	my $url = shift || strip_url();

	my $blob = get_strip($url);
	return undef if !defined($blob);

	if ((!defined($filename) || !length($filename)) && defined($url)) {
		($filename = $url) =~ s#.*/##;
	}
	my $ext = _image_format($blob);
	$filename =~ s/(\.(jpe?g|gif|png))?$/.$ext/i;

	open(FH,">$filename") ||
		croak "Unable to open file handle FH for file '$filename': $!";
	binmode FH;
	print FH $blob;
	close(FH) ||
		carp "Unable to close file handle FH for file '$filename': $!";
	return $filename;
}

sub get_strip {
	my $url = shift || strip_url() || '';

	if ($url =~ /^(\d{8}\d*(\.(jpg|gif|png))?)$/i) {
		$url = "http://venusenvy.keenspace.com/comics/$1";
		$url .= '.jpg' unless $url =~ /\.(jpg|gif|png)$/i;
	}

	my $ua = _new_agent();
	my $req = HTTP::Request->new(GET => $url); 
	#$req->referer('http://venusenvy.keenspace.com/');
	$req->referer('http://venusenvy.comicgenesis.com/');
	my $response = $ua->request($req);

	my $status;
	unless ($response->is_success) {
		$status = $response->status_line;
		unless ($url =~ s/\.gif$/.jpg/i) { $url =~ s/\.jpg$/.gif/i; }
		$req = HTTP::Request->new(GET => $url); 
		#$req->referer('http://venusenvy.keenspace.com/');
		$req->referer('http://venusenvy.comicgenesis.com/');
		$response = $ua->request($req);
	}

	if ($response->is_success) {
		unless (_image_format($response->content)) {
			carp('Unrecognised image format') if $^W;
			return undef;
		}
		if (length($response->content) < 1300) {
			if ($response->content =~ /(anti\-?)?hotlinking/i) {
				carp('Image has been blocked by anti-hotlinking server') if $^W;
				return undef;
			}
			carp('Image data is too') if $^W;
			return undef;
		}
		return $response->content;
	} elsif ($^W) {
		carp($status);
	}
	return undef;
}

sub strip_url {
	my $ua = _new_agent();

	my $response = $ua->get('http://venusenvy.keenspace.com');
	if ($response->is_success) {
		my $html = $response->content;
		if ($html =~ m#<img\s+.*?src="((https?://venusenvy\.(keenspace|comicgenesis)\.com)?
						/comics/\d{8}\d*\.(gif|jpg|png))".*?>#imsx) {
			my $url = $1;
			$url = "http://venusenvy.keenspace.com$1" unless $url =~ /^https?:\/\//i;
			return $url;
		}

	} elsif ($^W) {
		carp($response->status_line);
	}

	return undef;
}

sub _image_format {
	local $_ = shift || '';
	return 'gif' if /^GIF8[79]a/;
	return 'jpg' if /^\xFF\xD8/;
	return 'png' if /^\x89PNG\x0d\x0a\x1a\x0a/;
	return undef;
}

sub _new_agent {
	my @agents = (
			'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1).',
			'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.8) '.
			'Gecko/20050718 Firefox/1.0.4 (Debian package 1.0.4-2sarge1)',
			'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.7.5) '.
			'Gecko/20041110 Firefox/1.0',
			'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) '.
			'AppleWebKit/125.5.5 (KHTML, like Gecko) Safari/125.12',
			'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)',
		);

	my $ua = LWP::UserAgent->new(
			agent => $agents[int(rand(@agents))],
			timeout => 20
		);
	$ua->env_proxy;
	$ua->max_size(1024*500);
	return $ua;
}


1;

=pod

=head1 NAME

WWW::VenusEnvy - Retrieve VenusEnvy comic strip images

=head1 SYNOPSIS

 use WWW::VenusEnvy qw(get_strip mirror_strip strip_url);
 
 # Get the URL for todays strip
 my $image_url = strip_url();
 
 # Get todays strip
 my $image_blob = get_strip();
 
 # Get a specific strip by specifying the ID
 my $christmas_kiss = get_strip("20051229");
 
 # Write todays strip to local_filename.gif on disk
 my $filename_written = mirror_strip("local_filename.gif");
 
 # Write a specific strip to mystrip.gif on disk
 my $filename_written = mirror_strip("mystrip.gif","20051229");

=head1 DESCRIPTION

This module will download the latest VenusEnvy comic strip from
the Keenspace website and return a binary blob of the image, or
write it to disk. 

=head1 EXPORTS

The following functions can be exported with the C<:all> export
tag, or individually as is show in the above example.

=head2 strip_url

 # Return todays strip URL
 my $url = strip_url();
 
 # Return the strip URL for 19th August 2005
 $url = strip_url("20050819");

Accepts an optional argument specifying the date of the comic
strip in ISO format C<YYYYMMDD>.

=head2 get_strip

 # Get todays comic strip image
 my $image_blob = get_strip();

Accepts an optional argument specifying the date of the comic
strip in ISO format C<YYYYMMDD>.

=head2 mirror_strip

 # Write todays comic strip to "mystrip.gif" on disk
 my $filename_written = mirror_strip("mystrip.gif");

Accepts two optional arguments. The first is the filename that
the comic strip should be written to on disk. The second specifies
the date of the comic strip in ISO format C<YYYYMMDD>.

Returns the name of the file that was written to disk.

=head1 VERSION

$Id: VenusEnvy.pm,v 1.10 2006/01/28 13:17:40 nicolaw Exp $

=head1 AUTHOR

Nicola Worthington <nicolaw@cpan.org>

L<http://perlgirl.org.uk>

=head1 COPYRIGHT

Copyright 2005,2006 Nicola Worthington.

This software is licensed under The Apache Software License, Version 2.0.

L<http://www.apache.org/licenses/LICENSE-2.0>

=cut