package CGI::BasePlus;
require 5.001;
use CGI::Base;
use URI::Escape qw(uri_escape uri_unescape);
use CGI::Carp;
@ISA = qw(CGI::Base);
$revision='$Id: BasePlus.pm,v 2.76 1997/4/5 08:20:00 lstein Exp $';
($VERSION=$revision)=~s/.*(\d+\.\d+).*/$1/;
=head1 NAME
CGI::BasePlus - HTTP CGI Base Class with Handling of Multipart Forms
=head1 DESCRIPTION
This module implements a CGI::BasePlus object that is identical in
behavior to CGI::Base except that it provides special handling for
postings of MIME type multipart/form-data (which may get very long).
In the case of these types of postings, parts that are described
as being from a file upload are copied into a temporary file in
/usr/tmp, a filehandle is opened on the temporary files, and the name
of the filehandle is returned to the caller in the
$CGI::Base:QUERY_STRING variable.
Please see L<CGI::Base> for more information.
=head2 SEE ALSO
URI::URL, CGI::Request, CGI::MiniSvr, CGI::Base
=cut
;
############ SUPPORT ROUTINES FOR THE NEW MULTIPART ENCODING ##########
package MultipartBuffer;
# how many bytes to read at a time. We use
# a 5K buffer by default.
$FILLUNIT = 1024 * 5;
$TIMEOUT = 10*60; # 10 minute timeout
$SPIN_LOOP_MAX = 1000; # bug fix for some Netscape servers
$CRLF="\015\012";
sub new {
my($package,$boundary,$length,$filehandle) = @_;
my $IN;
if ($filehandle) {
my($package) = caller;
# force into caller's package if necessary
$IN = $filehandle=~/[':]/ ? $filehandle : "$package\:\:$filehandle";
}
$IN = "main::STDIN" unless $IN;
binmode($IN);
# Netscape seems to be a little bit unreliable
# about providing boundary strings.
if ($boundary) {
# Under the MIME spec, the boundary consists of the
# characters "--" PLUS the Boundary string
$boundary = "--$boundary";
# Read the topmost (boundary) line plus the CRLF
my($null) = '';
$length -= read($IN,$null,length($boundary)+2,0);
} else { # otherwise we find it ourselves
my($old);
($old,$/) = ($/,$CRLF); # read a CRLF-delimited line
$boundary = <$IN>; # BUG: This won't work correctly under mod_perl
$length -= length($boundary);
chomp($boundary); # remove the CRLF
$/ = $old; # restore old line separator
}
my $self = {LENGTH=>$length,
BOUNDARY=>$boundary,
IN=>$IN,
BUFFER=>'',
};
$FILLUNIT = length($boundary) if length($boundary) > $FILLUNIT;
return bless $self,$package;
}
# This reads and returns the header as an associative array.
# It looks for the pattern CRLF/CRLF to terminate the header.
sub readHeader {
my($self) = @_;
my($end);
my($ok) = 0;
do {
$self->fillBuffer($FILLUNIT);
$ok++ if ($end = index($self->{BUFFER},"${CRLF}${CRLF}")) >= 0;
$ok++ if $self->{BUFFER} eq '';
$FILLUNIT *= 2 if length($self->{BUFFER}) >= $FILLUNIT;
} until $ok;
my($header) = substr($self->{BUFFER},0,$end+2);
substr($self->{BUFFER},0,$end+4) = '';
my %return;
while ($header=~/^([\w-]+): (.*)$CRLF/mog) {
$return{$1}=$2;
}
return %return;
}
# This reads and returns the body as a single scalar value.
sub readBody {
my($self) = @_;
my($data);
my($returnval)='';
while (defined($data = $self->read)) {
$returnval .= $data;
}
return $returnval;
}
# This will read $bytes or until the boundary is hit, whichever happens
# first. After the boundary is hit, we return undef. The next read will
# skip over the boundary and begin reading again;
sub read {
my($self,$bytes) = @_;
# default number of bytes to read
$bytes = $bytes || $FILLUNIT;
# Fill up our internal buffer in such a way that the boundary
# is never split between reads.
$self->fillBuffer($bytes);
# Find the boundary in the buffer (it may not be there).
my $start = index($self->{BUFFER},$self->{BOUNDARY});
# If the boundary begins the data, then skip past it
# and return undef. The +2 here is a fiendish plot to
# remove the CR/LF pair at the end of the boundary.
if ($start == 0) {
# clear us out completely if we've hit the last boundary.
if (index($self->{BUFFER},"$self->{BOUNDARY}--")==0) {
$self->{BUFFER}='';
$self->{LENGTH}=0;
return undef;
}
# just remove the boundary.
substr($self->{BUFFER},0,length($self->{BOUNDARY})+2)='';
return undef;
}
my $bytesToReturn;
if ($start > 0) { # read up to the boundary
$bytesToReturn = $start > $bytes ? $bytes : $start;
} else { # read the requested number of bytes
# leave enough bytes in the buffer to allow us to read
# the boundary. Thanks to Kevin Hendrick for finding
# this one.
$bytesToReturn = $bytes - (length($self->{BOUNDARY})+1);
}
my $returnval=substr($self->{BUFFER},0,$bytesToReturn);
substr($self->{BUFFER},0,$bytesToReturn)='';
# If we hit the boundary, remove the CRLF from the end.
return ($start > 0) ? substr($returnval,0,-2) : $returnval;
}
# This fills up our internal buffer in such a way that the
# boundary is never split between reads
sub fillBuffer {
my($self,$bytes) = @_;
return unless $self->{LENGTH};
my($boundaryLength) = length($self->{BOUNDARY});
my($bufferLength) = length($self->{BUFFER});
my($bytesToRead) = $bytes - $bufferLength + $boundaryLength + 2;
$bytesToRead = $self->{LENGTH} if $self->{LENGTH} < $bytesToRead;
# Try to read some data. We may hang here if the browser is screwed up.
my $bytesRead = read($self->{IN},$self->{BUFFER},$bytesToRead,$bufferLength);
# An apparent bug in the Netscape Commerce server causes the read()
# to return zero bytes repeatedly without blocking if the
# remote user aborts during a file transfer. I don't know how
# they manage this, but the workaround is to abort if we get
# more than SPIN_LOOP_MAX consecutive zero reads.
if ($bytesRead == 0) {
die "CGI::BasePlus: Server closed socket during multipart read (client aborted?).\n"
if ($self->{ZERO_LOOP_COUNTER}++ >= $SPIN_LOOP_MAX);
} else {
$self->{ZERO_LOOP_COUNTER}=0;
}
$self->{LENGTH} -= $bytesRead;
}
# Return true when we've finished reading
sub eof {
my($self) = @_;
return 1 if (length($self->{BUFFER}) == 0)
&& ($self->{LENGTH} <= 0);
}
package TempFile;
@TEMP=('/usr/tmp','/var/tmp','/tmp',);
unshift(@TEMP,$ENV{TMPDIR}) if defined($ENV{TMPDIR});
foreach (@TEMP) {
do {$TMPDIRECTORY = $_; last} if -w $_;
}
$TMPDIRECTORY = "." unless $TMPDIRECTORY;
$SEQUENCE="CGItemp${$}0000";
# cute feature, but no longer supported
# %OVERLOAD = ('""'=>'as_string');
# Create a temporary file that will be automatically
# unlinked when finished.
sub new {
my($package) = @_;
$SEQUENCE++;
my $directory = "${TMPDIRECTORY}/${SEQUENCE}";
return bless \$directory;
}
sub DESTROY {
my($self) = @_;
unlink $$self; # get rid of the file
}
sub as_string {
my($self) = @_;
return $$self;
}
############ OVERRIDDEN ROUTINES IN CGI::Base ##########
package CGI::BasePlus;
# Read entity body in such a way that file uploads are stored
# to temporary disk files. See below.
sub read_post_body {
my $self = shift;
# Use parent's read_post_body() method unless we have a
# new multipart/form-data type of body to deal with.
return &CGI::Base::read_post_body($self)
unless $CGI::Base::CONTENT_TYPE =~ m|^multipart/form-data|;
# Handle multipart/form-data postings. For compatability
# with the Request.pm module, the name/value pairs are
# converted into canonical (URL-encoded) form and stored
# into $CGI::Base::QUERY_STRING.
my($boundary) = $ENV{'CONTENT_TYPE'}=~/boundary=(\S+)/;
$self->read_multipart($boundary,$ENV{'CONTENT_LENGTH'});
}
sub read_multipart {
my($self,$boundary,$length) = @_;
my($buffer) = new MultipartBuffer($boundary,$length);
my(%header,$body);
while (!$buffer->eof) {
%header = $buffer->readHeader;
# In beta1 it was "Content-disposition". In beta2 it's "Content-Disposition"
# Sheesh.
my($key) = $header{'Content-disposition'} ? 'Content-disposition' : 'Content-Disposition';
my($param) = $header{$key}=~/ name="(.*?)"/;
my($filename) = $header{$key}=~/ filename="(.*?)"/;
my($value);
if ($filename) {
# If we get here, then we are dealing with a potentially large
# uploaded file. Save the data to a temporary file, then open
# the file for reading, and stash the filehandle name inside
# the query string.
my($tmpfile) = new TempFile;
my $tmp = $tmpfile->as_string;
open (OUT,">$tmp") || croak "CGI open of $tmpfile: $!\n";
chmod 0666,$tmp; # make sure anyone can delete it.
binmode(OUT);
my $data;
while ($data = $buffer->read) {
print OUT $data;
}
close OUT;
# Now create a new filehandle in the caller's namespace.
# The name of this filehandle just happens to be identical
# to the original filename (NOT the name of the temporary
# file, which is hidden!)
my($filehandle);
if ($filename=~/^[a-zA-Z_]/) {
my($frame,$cp) = (1);
do { $cp = caller($frame++); } until $cp!~/^CGI/;
$filehandle = "$cp\:\:$filename";
} else {
$filehandle = "\:\:$filename";
}
warn "Filehandle = $filehandle tmpfile = $tmp";
open($filehandle,$tmp) || croak "CGI open of $tmpfile: $!\n";
binmode($filehandle);
$value = $filename;
# Under Unix, it is safe to let the temporary file be deleted
# when it goes out of scope. The storage is not deallocated
# until the last file descriptor is closed. So we do nothing
# special here.
}
# If we get here then we're dealing a non-file form field, which we
# will assume can fit into memory OK.
else {
$value = $buffer->readBody;
}
# Now we store the parameter name and the value into our
# query string for later retrieval
$CGI::Base::QUERY_STRING .= '&' if $CGI::Base::QUERY_STRING;
$CGI::Base::QUERY_STRING .= uri_escape($param) . '=' . uri_escape($value);
}
1;
}
$VERSION; # prevent spurious warning message