The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Sniffer::Connection::HTTP;
use strict;
use Sniffer::Connection;
use HTTP::Request;
use HTTP::Response;

=head1 NAME

Sniffer::Connection::HTTP - Callbacks for a HTTP connection

=head1 SYNOPSIS

You shouldn't use this directly but via L<Sniffer::HTTP>
which encapsulates most of this.

  my $sniffer = Sniffer::Connection::HTTP->new(
    callbacks => {
      request  => sub { my ($req,$conn) = @_; print $req->uri,"\n" if $req },
      response => sub { my ($res,$req,$conn) = @_; print $res->code,"\n" },
    }
  );

  # retrieve TCP packet in $tcp, for example via Net::Pcap
  my $tcp = sniff_tcp_packet;

  $sniffer->handle_packet($tcp);

=cut

use base 'Class::Accessor';

use vars qw($VERSION);

$VERSION = '0.23';

my @callbacks = qw(request response closed log);
__PACKAGE__->mk_accessors(qw(tcp_connection sent_buffer recv_buffer _response _response_chunk_size _response_len _request prev_request),
                          @callbacks);

sub new {
  my ($class,%args) = @_;

  my $packet = delete $args{tcp};

  # Set up dummy callbacks as the default
  for (@callbacks) { $args{$_} ||= sub {}; };

  for (qw(sent_buffer recv_buffer)) {
    $args{$_} ||= \(my $buffer);
  };

  my $tcp_log = delete $args{tcp_log} || sub {};

  my $self = $class->SUPER::new(\%args);
  $self->tcp_connection(Sniffer::Connection->new(
    tcp           => $packet,
    sent_data     => sub { $self->sent_data(@_) },
    received_data => sub { $self->received_data(@_) },
    closed        => sub {},
    teardown      => sub { $self->closed->($self) },
    log           => $tcp_log,
  ));

  $self;
};

sub sent_data {
  my ($self,$data,$conn) = @_;
  $self->flush_received;
  ${$self->{sent_buffer}} .= $data;
  $self->flush_sent;
};

sub received_data {
  my ($self,$data,$conn) = @_;
  $self->flush_sent;
  ${$self->{recv_buffer}} .= $data;
  #warn $data;
  $self->flush_received;
};

sub extract_chunksize {
  my ($self,$buffer) = @_;
  my $chunksize;
  #$self->log->("---Extracting from\n$$buffer\n---");
  if (! ($$buffer =~ s!^\s*([a-f0-9]+)[ \t]*\r\n!!si)) {
    $self->log->("Extracting chunked size failed.");
    #$self->log->($$buffer);
    (my $copy = $$buffer) =~ s!\n!\\n\n!gs;
    $copy =~ s!\r!\\r!gs;
    $self->log->($copy);
  } else {
    $chunksize = hex $1;
    #$self->log->(sprintf "Found chunked size %s (%s remaining)\n", $chunksize, length $$buffer);
    #$self->log->(length $$buffer);
    $self->_response_chunk_size($chunksize);
  };
  #$self->log->("---Buffer is now\n$$buffer\n---");
  return $chunksize
};

sub flush_received {
  my ($self) = @_;
  my $buffer = $self->recv_buffer;
  #$self->log->($$buffer);
  while ($$buffer) {
    if (! (my $res = $self->_response)) {
      # We need to find something that looks like a valid HTTP request in our stream
      if (not $$buffer =~ s!.*^(HTTP/\d\..*? [12345]\d\d\b)!$1!m) {
        # Need to discard-and-sync
        $$buffer = "";
        #$self->recv_buffer(undef);
        return;
      };

      if (! ($$buffer =~ s!^(.*?\r?\n\r?\n)!!sm)) {
        # need more data before header is complete
        $self->log->("Need more header data");
        #$self->recv_buffer($buffer);
        return;
      };

      my $h = $1;
      $res = HTTP::Response->parse($h);
      $self->_response($res);

      my $len = $res->header('Content-Length');

      $self->_response_len( $len );
    };

    my $res = $self->_response;
    my $len = $self->_response_len;
    my $chunksize = $self->_response_chunk_size;

    my $te = lc ($res->header('Transfer-Encoding') || '');
    if ($te and $te eq 'chunked') {
      if (! defined $chunksize) {
        $chunksize = $self->extract_chunksize($buffer);
      };

      if (defined $chunksize) {
        #$self->log->("Chunked size: $chunksize\n");
        #$self->log->("Got buffer of size " + length $$buffer);

        while (defined $chunksize and length $$buffer >= $chunksize) {
          #$self->log->("Got chunk of size $chunksize");
          #$self->log->(">>$$buffer<<");
          $self->_response->add_content(substr($$buffer,0,$chunksize));
          #$self->log->(substr($$buffer,0,$chunksize));
          $$buffer = substr($$buffer,$chunksize);
          $$buffer =~ s!^\r\n!!;
          #$self->log->(sprintf "Remaining are %s bytes ($$buffer)", length $$buffer);

          $self->_response_chunk_size(undef);
          if ($chunksize == 0) {
            $self->log->("Got chunksize 0, reporting response");
            $self->report_response($res);
            #$$buffer =~ s!^\r\n!!;

            if ($$buffer eq '') {
              return;
            };
          } elsif (length $$buffer) {
            # Get next chunksize, if available
            $chunksize = $self->extract_chunksize($buffer);
            #$self->log->("Next size is $chunksize");
          } else {
            # We've read/received exactly the chunk.
          };

          return
            if ! defined $chunksize;
        };
      };
      return
    };

    # Non-chunked handling:
    if (defined $len and length $$buffer < $len) {
      # need more data before header is complete
      $self->log->(sprintf "Need more response body data (%0.0f%%)\r", 100 * ((length $$buffer) / $len))
        if $len;
      return;
    };

    if (defined $len and $len == 0) {
      # can only flush at closing of connection
      $self->log->("Would need to collect whole buffer in connection (unimplemented, taking what I've got)" );
      $len = length $$buffer;
    };

    $self->report_response_buffer($buffer,$len);
  };
};

sub report_response_buffer {
  my ($self,$buffer,$len) = @_;
  my $res = $self->_response;

  $len = length $$buffer
    if (! defined $len);

  $res->content(substr($$buffer,0,$len));
  $self->log->("Response header and content are ready ($len bytes)");

  $$buffer = substr($$buffer,$len);
  if (length $$buffer) {
    $self->log->("Leftover data: $$buffer");
  };
  $self->report_response($res);
};

sub report_response {
  my ($self,$res) = @_;
  $self->response->($res,$self->prev_request,$self);
  $self->_response(undef);
  $self->_response_len(undef);
};

sub flush_sent {
  my ($self) = @_;
  my $buffer = $self->sent_buffer;
  while ($$buffer) {
    if (! (my $req = $self->_request)) {
      # We need to find something that looks like a valid HTTP request in our stream
      $$buffer =~ s!.*^(GET|POST)!$1!m;

      if (! ($$buffer =~ s!^(.*?\r?\n\r?\n)!!sm)) {
        # need more data before header is complete
        $self->log->("Need more header data");
        #$self->sent_buffer($buffer);
        return;
      };

      # Consider prepending the hostname in front of
      # the URI for nicer equivalence with HTTP::Proxy?

      $self->log->("Got header");
      my $h = $1;
      $req = HTTP::Request->parse($h);

      my $host;
      # should be the IP address of some TCP packet if we don't find the header ...
      if ($req->header('Host')) {
        $host = $req->header('Host');
      } else {
        warn "Missing Host: header. Don't know how to determine hostname";
        $host = "???"
      };
      $req->uri->scheme('http');
      $req->uri->host($host);
      #$req->uri->port(80); # fix from TCP packet!

      $self->_request($req);
    };

    my $req = $self->_request;
    my $len = $req->header('Content-Length') || 0; # length $$buffer; # not clean

    if (length $$buffer < $len) {
      # need more data before header is complete
      return;
    };

    $self->_request->content(substr($$buffer,0,$len));
    $self->log->("Request header and content are ready ($len bytes)");

    $self->request->($req,$self);

    $$buffer = substr($$buffer,$len);

    # Tie request and response together in a better way than serial request->response->request ...
    $self->prev_request($req);
    $self->_request(undef);
  };
};

# Delegate some methods
sub handle_packet { my $self = shift;$self->tcp_connection->handle_packet(@_); };
sub flow { my $self = shift; return $self->tcp_connection->flow(@_);};
sub last_activity { my $self = shift; $self->tcp_connection->last_activity(@_) }

1;

=head1 TODO

=over 4

=item *

Think about pipelined connections. These are not easily massaged into the
request/response scheme. Well, maybe they are, with a bit of hidden
logic here.

=item *

Every response accumulates all data in memory instead of
giving the user the partial response so it can be written
to disk. This should maybe later be improved.

=back

=head1 BUGS

The whole module suite has almost no tests.

If you experience problems, I<please> supply me with a complete,
relevant packet dump as the included C<dump-raw.pl> creates. Even
better, supply me with (failing) tests.

=head1 AUTHOR

Max Maischein (corion@cpan.org)

=head1 COPYRIGHT

Copyright (C) 2005-2011 Max Maischein.  All Rights Reserved.

This code is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut