The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package HTTP::Rollup;

require 5.005;

use strict;
use CGI::Util qw( unescape );
use Exporter;

use vars qw($VERSION @ISA @EXPORT_OK);

@ISA = qw(Exporter);
@EXPORT_OK = qw(RollupQueryString);

$VERSION = '0.8';

my $DEFAULT_DELIMITER = "&";

# Turn on special checking for Doug MacEachern's modperl
my $MOD_PERL = 0;
if (exists $ENV{MOD_PERL}) {
    if ($ENV{MOD_PERL_API_VERSION} == 2) {
        $MOD_PERL = 2;
        require Apache2::RequestUtil;
        require APR::Table;
    } else {
        $MOD_PERL = 1;
        require Apache;
    }
}

=head1 NAME

HTTP::Rollup - translate an HTTP query string to a hierarchical structure

=head1 SYNOPSIS

 use HTTP::Rollup qw(RollupQueryString);

 my $rollup = new HTTP::Rollup;

 my $hashref = $rollup->RollupQueryString($query_string);

=head1 DESCRIPTION

Given input text of the format:

  employee.name.first=Jane
  employee.name.last=Smith
  employee.address=123%20Main%20St.
  employee.city=New%20York
  id=444
  phone=(212)123-4567
  phone=(212)555-1212
  @fax=(212)999-8877

Construct an output data structure like this:

  $hashref = {
    employee => {
		  name => {
			   "first" => "Jane",
			   "last" => "Smith",
			  },
		  address => "123 Main St.",
		  city => "New York"
		},
    phone => [
	       "(212)123-4567",
	       "(212)555-1212"
	     ],
    fax => [
	     "(212)999-8877"
	   ],
    id => 444
  };

This is intended as a drop-in replacement for the HTTP query string
parsing implemented in CGI.pm, adding the ability to assemble a nested
data structure (CGI.pm constructs purely flat structures).

e.g. given the sample input above, CGI.pm would produce:

  $hashref = {
    "employee.name.first" => [ "Jason" ],
    "employee.name.last" => [ "Smith" ],
    "employee.name.address" => [ "123 Main St." ],
    "employee.name.city" => [ "New York" ],
    "phone" => [ "(212)123-4567", "(212)555-1212" ],
    "@fax"=> [ "(212)999-8877" ],
    "id" => [ 444 ]
  };

If no $query_string parameter is provided, HTTP::Rollup will attempt to find
the input in the same manner used by CGI.pm (the internal _query_string
function is pretty much cloned from CGI.pm).

HTTP::Rollup runs under both CGI or mod_perl contexts, and from the
command line (reads from @ARGV or stdin).

=head1 FEATURES

=over

=item *

Data nesting using dot notation

=item *

Recognizes a list if there is more than one value with the same name

=item *

Lists can be forced with a leading @-sign, to allow for lists that could
have just one element (eliminating ambiguity between scalar and single-
element list).  The @ will be stripped.

=back

=head1 FUNCTIONS

=item new([ FORCE_LIST => 1 ], [ DELIM => ";" ])

The FORCE_LIST switch causes CGI.pm-style behavior, as above,
for backward compatibility.

The DELIM option specifies the input field delimiter.  This is not
auto-detected.  Default is the standard ampersand, though semicolon has
been proposed as a replacement to avoid conflict with the ampersand used
for character entities.

Specifying "\n" for the delimiter is helpful for parsing parameters on stdin.

=item RollupQueryString()

Workhorse function.

=begin testing

use lib "./blib/lib";
use HTTP::Rollup qw(RollupQueryString);
use Data::Dumper;

my $s1 = "one=abc&two=def&three=ghi";
my $r1 = new HTTP::Rollup;
my $hr = $r1->RollupQueryString($s1); # default delimiter
ok ($hr->{one} eq "abc");
ok ($hr->{two} eq "def");
ok ($hr->{three} eq "ghi");

my $string = <<_END_;
employee.name.first=Jane
employee.name.last=Smith
employee.address=123%20Main%20St.
employee.city=New%20York
id=444
phone=(212)123-4567
phone=(212)555-1212
\@fax=(212)999-8877
_END_

my $r2 = new HTTP::Rollup(DELIM => "\n");
my $hashref = $r2->RollupQueryString($string);
ok($hashref->{employee}->{name}->{first} eq "Jane",
   "2-nested scalar");
ok($hashref->{employee}->{city} eq "New York",
   "1-nested scalar, with unescape");
ok($hashref->{id} eq "444",
   "top-level scalar");
ok($hashref->{phone}->[1] eq "(212)555-1212",
   "auto-list");
ok($hashref->{fax}->[0] eq "(212)999-8877",
   "\@-list");

my $string2 = "employee.name.first=Jane;employee.name.last=Smith;employee.address=123%20Main%20St.;employee.city=New%York;id=444;phone=(212)123-4567;phone=(212)555-1212;\@fax=(212)999-8877";

my $r3 = new HTTP::Rollup(DELIM => ";");
$hashref = $r3->RollupQueryString($string2);
ok($hashref->{employee}->{name}->{first} eq "Jane",
   "nested scalar");
ok($hashref->{id} eq "444",
   "top-level scalar");
ok($hashref->{phone}->[1] eq "(212)555-1212",
   "auto-list");
ok($hashref->{fax}->[0] eq "(212)999-8877",
   "\@-list");

my $r4 = new HTTP::Rollup(FORCE_LIST => 1, DELIM => "\n");
my $hashref2 = $r4->RollupQueryString($string);
ok($hashref2->{'employee.name.first'}->[0] eq "Jane",
   "nested scalar");
ok($hashref2->{id}->[0] eq "444",
   "top-level scalar");
ok($hashref2->{phone}->[1] eq "(212)555-1212",
   "auto-list");
ok($hashref2->{'@fax'}->[0] eq "(212)999-8877",
   "\@-list");

=end testing

=cut

my %legal_parameters = (
			FORCE_LIST => 1,
			DELIM => 1,
		       );
sub new {
    my $cl  = shift;
    my $class = ref($cl) || $cl;
    my %params = @_;

    my $self = {};
    bless $self, $class;

    for my $param (keys %params) {
	if ($legal_parameters{$param}) {
	    $self->{$param} = $params{$param};
	} else {
	    print STDERR __PACKAGE__, ": illegal config parameter $param\n";
	}
    }

    return $self;
}

sub RollupQueryString {
    my $self = shift;
    my $input = shift;

    my $delimiter = $self->{DELIM} || $DEFAULT_DELIMITER;

    if (!defined $input) {
	$input = _query_string();
    }

    my $root = {};

    return $root if !$input;

    # query strings are name-value pairs delimited by & or by newline or semicolon
    foreach my $nvp (split(/$delimiter/, $input)) {
	last if $nvp eq "=";	# sometimes appears as query string terminator

      PARSE:
	my ($name, $value) = split /=/, $nvp;
	my @levels = split /\./, $name;
	$value = CGI::Util::unescape($value);

	if ($self->{FORCE_LIST}) {
	    # always use a list, for CGI.pm-style behavior
	    if (ref $root->{$name}) {
		# there's already a list there
		push @{$root->{$name}}, $value;
	    } else {
		$root->{$name} = [ $value ];
	    }
	    next;
	}

      TRAVERSE:
	my $node = $root;
	my $leaf;
	for ($leaf = shift @levels;
	     scalar(@levels) >= 1;
	     $leaf = shift @levels) {
	    $node->{$leaf} = {}
	      unless defined $node->{$leaf};	# vivify
	    $node = $node->{$leaf};
	}

      SAVE:
	if (ref $node->{$leaf}) {
	    # there's already a list there
	    $leaf =~ s/^@//;
	    push @{$node->{$leaf}}, $value;
	} elsif (defined $node->{$leaf}) {
	    # scalar now, convert to a list
	    $node->{$leaf} = [ $node->{$leaf}, $value ];
	} elsif ($leaf =~ /^\@/) {
	    # leading @ forces list
	    $leaf =~ s/^@//;
	    $node->{$leaf} = [ $value ];
	} else {
	    $node->{$leaf} = $value;
	}
    }

    return $root;
}


# Most of the following was copied from CGI.pm (some version <2.8).
# Frozen here to avoid breakage on CGI changes, and to allow local
# alterations (e.g. support for PUT).

sub _query_string {
    my $meth = $ENV{'REQUEST_METHOD'};
    my $query_string;

    if (!defined $meth) {
	# no REQUEST_METHOD, so must be command-line usage

	return _read_from_cmdline();
    }

    if ($meth =~ /^(GET|HEAD)$/o) {
	if ($MOD_PERL == 1) {
	    return Apache->request->args;
	} elsif ($MOD_PERL ==2) {
	    return Apache2::RequestUtil->request->args;
	} else {
	    # CGI mode, not mod_perl
	    return $ENV{QUERY_STRING} ||  $ENV{REDIRECT_QUERY_STRING};
	}
    }

    # this is a POST

    my $content_length = $ENV{CONTENT_LENGTH} || 0;

    _read_from_client(\*STDIN,
		      \$query_string,
		      $content_length,
		      0)
      if $content_length > 0;

    # Have our cake and eat it too! (see CGI.pm)
    # Append query string contents to the POST data.
    if ($ENV{QUERY_STRING}) {
	$query_string .= (length($query_string) ? '&' : '') . $ENV{QUERY_STRING};
    }
    return $query_string;
}

sub _read_from_client {
    my($fh, $buff, $len, $offset) = @_;
    local $^W=0;                # prevent a warning
    return undef unless defined($fh);
    return read($fh, $$buff, $len, $offset);
}

# Note: multiple parameters on cmdline are always linked with ampersand;
# so better not change DELIM for this input style.

sub _read_from_cmdline {
    my($input,@words);
    my($query_string);

    if (@ARGV) {
	@words = @ARGV;
    } else {
	my @lines;
	chomp(@lines = <STDIN>); # remove newlines
	$input = join(" ",@lines);
	@words = _shellwords($input);    
    }
    foreach (@words) {
	s/\\=/%3D/g;
	s/\\&/%26/g;	    
    }

    if ("@words"=~/=/) {
	$query_string = join('&',@words);
    } else {
	$query_string = join('+',@words);
    }

    return $query_string;
}

# Taken from shellwords.pl in the Perl 5.6 distribution.
#
# Usage:
#	@words = &shellwords($line);
#	or
#	@words = &shellwords(@lines);
#	or
#	@words = &shellwords;		# defaults to $_ (and clobbers it)

sub _shellwords {
    local ($_) = join('', @_) if @_;
    my (@words, $snippet, $field);

    s/^\s+//;
    if ($_ ne '') {
	$field = '';
	for (;;) {
	    if (s/^"(([^"\\]|\\.)*)"//) {
		($snippet = $1) =~ s#\\(.)#$1#g;
	    }
	    elsif (/^"/) {
		die "Unmatched double quote: $_\n";
	    }
	    elsif (s/^'(([^'\\]|\\.)*)'//) {
		($snippet = $1) =~ s#\\(.)#$1#g;
	    }
	    elsif (/^'/) {
		die "Unmatched single quote: $_\n";
	    }
	    elsif (s/^\\(.)//) {
		$snippet = $1;
	    }
	    elsif (s/^([^\s\\'"]+)//) {
		$snippet = $1;
	    }
	    else {
		s/^\s+//;
		last;
	    }
	    $field .= $snippet;
	}
	push(@words, $field);
    }
    @words;
}

1;

=head1 AUTHOR

Jason W. May <jmay@pobox.com>

=head1 COPYRIGHT

Copyright (C) 2002-2005 Jason W. May.  All rights reserved.
This module is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut