The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Regexp::Log::Monster;

use strict;
use warnings;

use base qw( Regexp::Log );
use vars qw( %DEFAULT %FORMAT %REGEXP );

our $VERSION = 0.05;

=head1 NAME

Regexp::Log::Monster - A regexp parser for the Extended Log Format + vhost

=head1 SYNOPSIS

    my $foo = Regexp::Log::Monster->new(
        format  => ':logmonster';
        capture => [qw( ts request )],
    );

    # the format() and capture() methods can be used to set or get
    $foo->format('custom %date %request %status %bytes');
    $foo->capture(qw( ts req ));

    # this is necessary to know in which order
    # we will receive the captured fields from the regexp
    my @fields = $foo->capture;

    # the all-powerful capturing regexp :-)
    my $re = $foo->regexp;

    while (<>) {
        my %data;
        @data{@fields} = /$re/;    # no need for /o, it's a compiled regexp

        # now munge the fields
        ...
    }

=head1 DESCRIPTION

Regexp::Log::Monster uses Regexp::Log as a base class, to generate regular
expressions for performing the usual data munging tasks on log files that
cannot be simply split().

This specific module enables the computation of regular expressions for
parsing the log files created using the Monster Log Format. An example of
this format are the logs generated by the httpd web server using the
keyword 'common'.

The module also allows for the use of the Extended Monster Log Format.

For more information on how to use this module, please see Regexp::Log.

=head1 ABSTRACT

Regexp::Log::Monster enables simple parsing of log files created using the
Extended Log Format, such as the logs generated by the httpd web server 
using the keyword 'common'.

=cut

# default values
%DEFAULT = (
	format  => '%host %rfc %authuser %date %request %status %bytes %referer %useragent %vhost',
	capture => [ 'host', 'rfc', 'authuser', 'date', 'ts', 'request', 'req',
				 'status', 'bytes', 'referer', 'ref', 'useragent', 'ua', 'vhost' ],
);

# predefined format strings
%FORMAT = (
	':default'    => '%host %rfc %authuser %date %request %status %bytes %referer %useragent %vhost',
	':common'     => '%host %rfc %authuser %date %request %status %bytes',
	':extended'   => '%host %rfc %authuser %date %request %status %bytes %referer %useragent',
	':logmonster' => '%host %rfc %authuser %date %request %status %bytes %referer %useragent %vhost',
);

# the regexps that match the various fields
%REGEXP = (
#	'%host' => '(?#=host)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?#!host)', 	# IPv4 only
	'%host'     => '(?#=host)\S+(?#!host)',					# numeric or name of remote host
	'%rfc'      => '(?#=rfc).*?(?#!rfc)',					# rfc931
	'%authuser' => '(?#=authuser).*?(?#!authuser)',				# authuser
	'%date'     => '(?#=date)\[(?#=ts)\d{2}\/\w{3}\/\d{4}(?::\d{2}){3} [-+]\d{4}(?#!ts)\](?#!date)',
										# [date] (see note)
	'%request'  => '(?#=request)\"(?#=req).*?(?#!req)\"(?#!request)',	# "request"
	'%status'   => '(?#=status)\d+(?#!status)',				# status
	'%bytes'    => '(?#=bytes)-|\d+(?#!bytes)',				# bytes
	'%referer'  => '(?#=referer)\"(?#=ref).*?(?#!ref)\"(?#!referer)',	# "referer"
	'%useragent'=> '(?#=useragent)\"(?#=ua).*?(?#!ua)\"(?#!useragent)',	# "user_agent"
	'%vhost'    => '(?#=vhost)\S+(?#!vhost)',				# name of local vhost
);

# note: date is in the format [01/Jan/1997:13:07:21 -0600]

1;

=head1 SEE ALSO

  Regexp::Log

=cut