The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Sman::Autoconfig;
use Sman::Man::Convert;
use Storable;


#$Id: Autoconfig.pm,v 1.12 2008/05/25 02:40:42 joshr Exp $

use strict;
use warnings;

# this package finds which man command works best on this system 
# Chooses 'best' man command for Sman.
# Our logic is that either 'man %F' or 'man %S %C' will work
# given a list of manfiles, we deterministically pick
# representatives, and see which man command works best 
# on each, XML-wise.
	# this works for most linuxes we've tested
	#MANCMD man -c %F 
	# this works for freebsd 4.4 and Mac OS X up to 10.3
	#MANCMD man -c %S %C

	# these are the man commands we try
#my @tries = ( 'man -c %F', 'man -c %S %C', 'cat %F | gunzip -f --stdout | man -c' );	
	# the last option above does not work, so it's been removed. (it needs tmp file in the middle.)
	# we once left it in anyway, it won't get used if doesn't work, but it causes warnings under cron
my @tries = ( 
                'man %F',              # debian 4.0 needs this, the simplest one, which none supported for years.
                'man -c %F', 
                'man -c %S %C',
            );
	# man -c means to reparse manpage input (and not use the manpage cache)
	# gunzip -f means just cat it if it's not compressed
	# gunzip --stdout means put the output to stdout (I think this is the default)


sub GetBestManCommand {
	my ($smanconfig, $manfilesref) = @_;
	
	my %converters = ();
	for my $cmd (@tries) {
		my $newconfig = Storable::dclone($smanconfig);
		$newconfig->SetConfigData("MANCMD", $cmd);
		$newconfig->SetConfigData("AUTOCONFIGURING", 1);	# internal flag
		$converters{ $cmd } = new Sman::Man::Convert($newconfig, { nocache=>1 } );
	}
	my $numfiles = 10;	# number of files to test
	my @testfiles = ();	# the files we'll be testing
	if (scalar(@$manfilesref) < $numfiles) { $numfiles = scalar(@$manfilesref); }
	for (my $i=0; $i < $numfiles; $i++) {
		push(@testfiles, $manfilesref->[ int(  $i / $numfiles * scalar(@$manfilesref) ) ] );
	}

	my %cmdwins = ();	# hash of cmd -> sum of lengths of output for this command
	for my $file (@testfiles) {
		warn "Testing $file" if $smanconfig->GetConfigData("VERBOSE");
		my ($maxlen, $winningcmd) = (0, "");
		for my $mancmd (keys(%converters)) {	# go through the converters
			my ($parser, $contentref) = $converters{$mancmd}->ConvertManfile($file);
            printf( "$0: Got %d bytes from %s\n", length($$contentref), $mancmd ) if $smanconfig->GetConfigData( "DEBUG" );
			if (length($$contentref) > $maxlen) {	# record the largest output and its cmd
				$maxlen = length($$contentref);
				$winningcmd = $mancmd;
			}
		}
		$cmdwins{$winningcmd}++;	# whichever cmd had largest output gets a point
	}
	my @wins = sort { $cmdwins{$b} <=> $cmdwins{$a} } keys(%cmdwins);
	if (scalar(@wins)) { return $wins[0]; }
	return 'man %S %C';  # or 'man %F'
} 

1;
__END__ 

=head1 NAME

Sman::Autoconfig - Automatically choose the 'best' man command

=head1 SYNOPSIS 

	...
	my $mancmd = Sman::Autoconfig::GetBestManCommand(
		$smanconfig, \@manfiles);
	...
	
=head1 DESCRIPTION

Chooses a representative sample of the manfiles passed and tests
which usual man command seems to work best on this system's man
files.

=head1 AUTHOR

Josh Rabinowitz <joshr>

=head1 SEE ALSO

L<Sman::Man::Convert>, L<Sman::Config>, L<sman.conf>

=cut