The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w
# $Id: sman-update,v 1.47 2008/05/25 02:41:17 joshr Exp $
# man indexing SWISH-E prog 

# Copyright (c) Josh Rabinowitz 2004-2007

# A descendent of the example in the Linux Journal 
# article "How To Index Anything".

use strict;
use warnings;
use bytes; # NOTE: swish-e won't understand UTF8 nor multi-byte chars 
use Getopt::Long qw(:config no_ignore_case); 
use Sman;   # for $VERSION
use Sman::Util;
use Sman::Config;
use Sman::Swishe;
use Sman::Man::Find;
use Sman::Man::Convert;
use Sman::Autoconfig;
use Sman::IndexVersion;

#use Data::Dumper;
use vars qw( $swisheconfigfile );    
    # if this is defined, we created this tmp file

BEGIN { 
    $swisheconfigfile = "";
    $|++;
    $ENV{PATH} = "/bin:/usr/bin:/usr/local/bin:/sw/bin";
} 

my $configfile = "";
# these will overwrite corresponding config settings 
# if set by GetOptions() 

# if no $config, we try /etc/sman.conf, then /usr/local/etc/sman.conf, then
# $FindBin::Bin/sman.conf

#################################################
main();
#################################################

sub main {
    my $help = 0;
    my $verbose;
    my $dryrun = 0; 
    my $warn;
    my $debug;
    my $debugxml;
    my $rman = "";
    my $zcat = "";
    my $swishe = "";
    my $testfile = ""; 
    my $index; 
    my $showversion = ""; 
    my $clearcache = 0;
    my $max = 0;    # set to non-zero for testing that many files
    my $progress = 0;   # like rsync, kind of

    GetOptions( "help"        => \$help, 
                    "configfile"  => \$configfile,
                    "n"           => \$dryrun,
                    "dryrun"      => \$dryrun,
                    "clearcache"      => \$clearcache,
                    "verbose!"     => \$verbose,
                    "VERSION"     => \$showversion,
                    "warn!"        => \$warn,
                    "debug!"       => \$debug,
                    "debugxml!"    => \$debugxml,
                    "index=s"     => \$index,
                    "rman=s"      => \$rman,
                    "zcat=s"      => \$zcat,
                    "swishe=s"      => \$swishe,
                    "testfile=s"  => \$testfile,    # run just this file through. 
                    "max=i"      => \$max,
                    "progress!"      => \$progress,
    ) || die Usage(); 

    if ($help) {
        print Usage();
        exit(0);
    } 

    my $versionok = Sman::Util::CheckSwisheVersion();
    die "sman-update: swish-e not in PATH, /usr/local/lib not in ldconfig, or need newer version?: $!" unless $versionok;

    my $smanconfig = new Sman::Config(); 
    if ($configfile) { 
        #@configfiles = ($self->FindDefaultConfigFile(), $configfile);
        $smanconfig->ReadSingleConfigFile($configfile);
    } else {    # otherwise use all the configfiles we find (see FindConfigFiles()) 
        $smanconfig->ReadDefaultConfigFile($verbose);
    }
    if ($showversion) {
        $|++;
        my $str = Sman::Util::GetVersionString(
            "sman-update",
            $smanconfig->GetConfigData("SWISHECMD") || 'swish-e');
        print "$str\n";
        print Sman::Util::GetIndexDescriptionString( 
            $smanconfig->GetConfigData("SWISHE_IndexFile")
         );
        exit(0);
    }

    # overwrite settings with command line values if present
    if ($rman) {            $smanconfig->SetConfigData( "RMANCMD",          $rman); }
    if ($zcat) {            $smanconfig->SetConfigData( "ZCATCMD",          $zcat); }
    if ($swishe) {          $smanconfig->SetConfigData( "SWISHECMD",      $swishe); }
    if (defined($verbose)) { $smanconfig->SetConfigData( "VERBOSE",          $verbose); }
    if (defined($warn)) {    $smanconfig->SetConfigData( "WARN",             $warn); }
    if (defined($debug)) {   $smanconfig->SetConfigData( "DEBUG",            $debug); }
    if (defined($debugxml)){ $smanconfig->SetConfigData( "DEBUGXML",         $debugxml); }
    if (defined($index)) {   $smanconfig->SetConfigData( "SWISHE_IndexFile", $index); }

    my (@files) = ($testfile || Sman::Man::Find::FindManFiles());

    if ($smanconfig->GetConfigData("MANCMD") =~ /(^AUTOCONFIG$)|(^$)/) {
        print "sman-update: Autoconfiguring MANCMD...\n" if $smanconfig->GetConfigData("VERBOSE"); 
        my $newmancmd = Sman::Autoconfig::GetBestManCommand($smanconfig, \@files);
        print "sman-update: MANCMD autoconfigured to '$newmancmd'\n" if $smanconfig->GetConfigData("VERBOSE");
        $smanconfig->SetConfigData("MANCMD", $newmancmd);
    }

    print $smanconfig->Dump() if $smanconfig->GetConfigData("VERBOSE");

    # set environment variables. Affects children from here forward.
    my @envs_set = $smanconfig->SetEnvironmentVariablesFromConfig();

    my $converter = new Sman::Man::Convert($smanconfig);
    if ($clearcache) {   
        print "sman-update: Clearing Sman cache...\n";
        $converter->ClearCache();
        exit(0);
    }

    my $smanswishe = new Sman::Swishe($smanconfig);

    $swisheconfigfile = $smanswishe->WriteConfigFile();

    if ($verbose) {
        print "sman-update: SWISHE CONFIG FILE:\n";
        print "=======================\n";
        print Sman::Util::ReadFile($swisheconfigfile);
        print "=======================\n";
    }


    my $swishecmd = $smanconfig->GetConfigData("SWISHECMD");
    my $cmd = "| $swishecmd -S prog -c $swisheconfigfile -i stdin";
    print "Running '$cmd'\n" if $debug;

    unless($dryrun) {
        open(SWISHE, $cmd) || die "sman-update: couldn't open '$cmd'"; 
    }
    print "sman-update: " . scalar @files . " man pages to index...\n" if $verbose || $progress; 
    for (my $i=0; $i < scalar(@files) && ($max==0 || $i < $max); $i++) {
        my $f = $files[$i];
        print "** processing $i\n" if ($progress && $i % 500 == 0) ;
        print "** working on $f\n" if $debug;
        my ($type, $outputref) = 
            $converter->ConvertManfile($f); 

        # next two lines are from swish-e 2.4.0's 'spider.pl':
        # 'ugly and maybe expensive, but perhaps more portable than "use bytes"'
        #my $bytecount = length pack 'C0a*', $$outputref;

        # this fails on Redhat 9 and ES3 if LANG is not C (or LC_ALL is not C)
        my $bytecount = length $$outputref;

        unless($dryrun) {
            print SWISHE "Path-Name: $f\n",
                "Document-Type: $type\n",
                "Content-Length: $bytecount\n\n", $$outputref;
        }
        if($smanconfig->GetConfigData("DEBUGXML")) {
            print "**==== BEGIN XML of $f =========\n" . 
                    $$outputref .
                    " **====  END  XML of $f =========\n\n";
        }
    } 
    unless ($dryrun) {
        close(SWISHE) || die "sman-update: Failure closing pipe to $swishe";

        # update the sman.index.version file
        my $index_versions = new Sman::IndexVersion( $smanconfig );
        $index_versions->set_versions( 
            { VERSION=> $Sman::VERSION, SMAN_DATA_VERSION=>$Sman::SMAN_DATA_VERSION }
        );
        
    }
    # note that the swisheconfig file is in Sman::Swishe for now
    #unlink($swisheconfigfile) || warn "sman-update: Couldn't delete $swisheconfigfile: $!";
    $swisheconfigfile = ""; 
}
sub Usage {
    return "sman-update: [--help] [--config=s] [--rman=s] [--zcat=s] [--col=s]\n" .
             "             [--(no)verbose] [--(no)warn] [--(no)debug] [--index=s]\n" .
             "             [--clearcache]\n" .
             "Builds index for sman.\n" .  
             "  --config=/file/sman.conf     config file to read\n" .
             "  --man='/path/to/man -opt'    path to prog like 'man'\n" . 
             "  --zcat='/path/to/zcat -f'    path to prog like 'zcat -f'\n" .
             "  --col='/path/to/col -b'      path to prog like 'col -b'\n" .
             "  --rman='/path/to/rman -opt'  path to prog like 'rman -f XML'\n" .
             "  --verbose/--noverbose        verbosity, default off\n" . 
             "  --warn   /--nowarn           warnings from children, default off\n" . 
             "  --debug  /--nodebug          debug output, default off\n" . 
             "  --clearcache                 clear the cache of converted pages\n" . 
             "  --testfile                   just one file, for testing\n" . 
             "  --dryrun  (or -n)            don't write anything to the index.\n" .
             "  --help: this text. For more info, see 'perldoc sman-update'\n"; 
}
END {   
    if ($swisheconfigfile && -e $swisheconfigfile) {
        unlink($swisheconfigfile) || warn "sman-update: Couldn't delete $swisheconfigfile: $!";
        $swisheconfigfile = "";
    }
}

__END__

=head1 NAME

sman-update - Perl program to index man pages (for searching with sman program)

=head1 SYNOPSIS

  % sman-update --conf=/my/dir/sman.conf --verbose

or just

  % sman-update

=head1 ABSTRACT

 sman-update: [--help] [--config=s] [--rman=s] [--zcat=s] [--col=s]
              [--(no)verbose] [--(no)warn] [--(no)debug] [--index=s]
              [--clearcache]

 Builds index for sman.
  --config=/file/sman.conf     config file to read
  --man='/path/to/man -opt'    path to prog like 'man'
  --zcat='/path/to/zcat -f'    path to prog like 'zcat -f'
  --col='/path/to/col -b'      path to prog like 'col -b'
  --rman='/path/to/rman -opt'  path to prog like 'rman -f XML'
  --verbose/--noverbose        verbosity, default off
  --warn   /--nowarn           warnings from children, default off
  --debug  /--nodebug          debug output, default off
  --clearcache                 clear the cache of converted pages 
  --testfile                   just one file, for testing
  --dryrun  (or -n)            don't write anything to the index.
  --help: this text. For more info, see 'perldoc sman-update'

=head1 DESCRIPTION

Sman-update creates the index of man pages for the sman program,
which searches on that index.
By default the index is stored in /var/lib/sman.

Sman-update should be run periodically to keep your sman index in sync
with your system's man pages.

Both sman and sman-update search for the first configuration file 
named sman.conf in /etc, /usr/local/etc/, $HOME, and the directory 
with sman. If no sman.conf file is found, (or specified through 
sman or sman-update's -conf option), then the default configuration in 
/usr/local/etc/sman-defaults.conf will be used.

In all cases command line options take precedence over directives read from
configuration files.

=head1 SECURITY

For increased security, sman-update can be run as a non-priveleged user. To
do so, chown the directory /var/lib/sman and its contents to the 
appropriate user.

=head1 AUTHOR

Josh Rabinowitz <joshr>

=head1 SEE ALSO

L<sman>, L<sman-update>, L<sman.conf>

=cut