The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Copyright (c) 2002i-2003 the World Wide Web Consortium :
#	Keio University,
#	European Research Consortium for Informatics and Mathematics
#	Massachusetts Institute of Technology.
# written by Olivier Thereaux <ot@w3.org> for W3C
#
# $Id: Config.pm,v 1.7 2004/09/10 00:41:24 ot Exp $

package W3C::LogValidator::Config;
use strict;

require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw() ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our @EXPORT = qw();
our $VERSION = sprintf "%d.%03d",q$Revision: 1.7 $ =~ /(\d+)\.(\d+)/;

our $config_filename;
our %conf;

###########################
# usual package interface #
###########################
sub new
{
        my $self  = {};
        my $proto = shift;
        my $class = ref($proto) || $proto;
        $self->{NAME}   = undef;
        $self->{SERVERADMIN}    = undef;
        $self->{DOCUMENTROOT}   = undef;
        $self->{LOGFILES}       = undef;
        $self->{LOGFORMAT}      = undef;
        $self->{LOGTYPE}        = undef;
        $self->{MODULES}        = undef;
        $self->{MAX_INVALID}    = undef;
	$self->{DIRECTORYINDEX}	= undef;
        bless($self, $class);
	# default values

	# did we get a config file as an argument?
	if (@_)
	{
		$config_filename = shift;
		$self->config_file;
	}
	else
	{
		$self->config_default;
	}
#	$self->give_config;	# debug
        return $self;
}

sub configure
{
	return %conf;
}

sub config_default
{
	use Sys::Hostname qw(hostname);
	my $self = shift;
	# setting default values for LogProcessor
	if (!exists $conf{LogProcessor}{DirectoryIndex})
	{	
		$conf{LogProcessor}{DirectoryIndex}=("index.html index.htm index");
	}
	if (!exists $conf{LogProcessor}{MaxInvalid})
	{
		$conf{LogProcessor}{MaxInvalid}=10;
	}
	if (!exists $conf{LogProcessor}{ServerName})
	{
		$conf{LogProcessor}{ServerName}=hostname();
	}
	if (!exists $conf{LogProcessor}{DocumentRoot})
	{
		$conf{LogProcessor}{DocumentRoot}="/var/www/";
	}
	if (!exists $conf{LogProcessor}{LogFiles})
	{	
		push @{$conf{LogProcessor}{LogFiles}}, "/var/log/apache/access.log";
		$conf{LogProcessor}{LogType}{"/var/log/apache/access.log"}="common";
	}

	if (!exists $conf{LogProcessor}{UseValidationModule})
	{
		#adding modules and options for the modules
		push @{$conf{LogProcessor}{UseValidationModule}}, "W3C::LogValidator::HTMLValidator";
		push @{$conf{LogProcessor}{UseValidationModule}}, "W3C::LogValidator::Basic";
		$conf{"W3C::LogValidator::HTMLValidator"}{max_invalid}=10;
	}
	# adding default limit  - useful for very (too) big logfiles
	if (!exists $conf{LogProcessor}{EntriesPerLogfile})
	{
		$conf{LogProcessor}{EntriesPerLogfile}=100000; 
	}
}



sub config_file
{
	my $self = shift;
	use Config::General;
	my $config_read = new Config::General(-ConfigFile => "$config_filename")
	|| die "could not load config $config_filename : $!";
	# using C::General to read logfile
	my %tmpconf = $config_read->getall;
	# extracting modules config
	# Config::General will give the hash this structure

	# HASH {
	# foo -> valfoo
	# bar -> valbar
	# Module {
	#	module1 {
	#			ga -> valga
	#		}
	#	}
	# }

	# 	and we want 

	
	# HASH {
	# LogProcessor {
	#	foo -> valfoo
	#	bar -> valbar
	#	}
	# module1 {
	#	ga -> valga
	#	}
	# }

	
	# so First we extract what's in the Module subhash
	if (exists($tmpconf{Module}))
	{
		%conf = %{$tmpconf{Module}};
	}
	# remove it
	delete $tmpconf{Module};
	# and merging with the global values we put in the LogProcessor subhash
	%{$conf{LogProcessor}} = %tmpconf;

	# specific action is needed for "CustomLog"
	if (exists($tmpconf{CustomLog}))
	{
		# if there are several log files, $tmpconf{CustomLog} is an array
		if (defined @{ $tmpconf{CustomLog} })
		{ 
		   foreach my $customlog (@{ $tmpconf{CustomLog} })
		   {
			$_ = $customlog;
			if (/^(.*) (.*)$/) 
			{
				# only supported (so far) is the syntax:
				# CustomLog path/file nickname
				push @{$conf{LogProcessor}{LogFiles}}, $1;
				$conf{LogProcessor}{LogType}{$1}=$2;
			}
		   }

		}
		else # one log file, $tmpconf{CustomLog} is not an array
		{ 
			$_ = $tmpconf{CustomLog};
			if (/^(.*) (.*)$/) 
			{
				push @{$conf{LogProcessor}{LogFiles}}, $1;
				$conf{LogProcessor}{LogType}{$1}=$2;			
			}
		}
		delete $conf{LogProcessor}{CustomLog};
	}
	
	# add default values for variables not included in the config file
	$self->config_default();	
}

package W3C::LogValidator::Config;
1;

__END__

=head1 NAME

W3C::LogValidator::Config - Configuration parsing for the Log Validator

=head1 SYNOPSIS

    use W3C::LogValidator::Config;
    if ($config_filename)
    { # parse configuration file and populate config hash
        %config = W3C::LogValidator::Config->new($config_filename)->configure();
    }
    else
    { # populate config hash with "default" values
        %config = W3C::LogValidator::Config->new()->configure();
    }

=head1 DESCRIPTION

C<W3C::LogValidator::Config> parses configuration files or directives for the Log Validator


=head1 API

=head2 Constructor

=over 2

=item $c = W3C::LogValidator::Config->new

Constructs a new C<W3C::LogValidator::Config> configuration processor.
A file name may be passed as a variable, as follows:

  $c = W3C::LogValidator::Config->new("path/to/file.conf")

=back

=head2 General methods

=over 4

=item $c->configure

Returns a hash containing configuration variables for the main module and for each processing module.

Hash structure as follows:
$conf{LogProcessor} is a hash containing configuration info for the main Log Validator process
e.g : 

    $conf{LogProcessor}{MaxInvalid} is an int with the general setting for MaxInvalid, 
    $conf{LogProcessor}{UseValidationModule} is an array with all processing modules used

for each processing module, $conf{ProcessingModuleX} is a hash containing configuration info specific to that processing module.
Typically this is used to override general setting.

=item $c->config_default

Populates the configuration hash (which will then be returned by C<$c-E<gt>configure>) with reasonable default values

=item $c->config_file

Populates the configuration hash by parsing the configuration file given while constructing C<W3C::LogValidator::Config>
Does not work if that parameter was not passed during construction

The configuration file uses a syntax very similar to the one used by the Apache Web server.
Both syntax and vocabulary are documented in the sample configuration file (F<samples/logprocess.conf>) 
distributed with the module.

=back

=head1 BUGS

Public bug-tracking interface at http://www.w3.org/Bugs/Public/

=head1 AUTHOR

Olivier Thereaux <ot@w3.org> for The World Wide Web Consortium


=head1 SEE ALSO

perl(1).
Up-to-date information on this tool at http://www.w3.org/QA/Tools/LogValidator/

=cut