The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#AnyDBM handling from perlindex:
# NDBM_File as LAST resort

package
    AnyDBM_File; # hide from indexer
use vars '@ISA';
my @try; @try = qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File) unless @ISA;
my $mod;
for $mod (@try) {
    if (eval "require $mod") {
	@ISA = $mod;
	last;
    }
};

package Tk::Pod::Search_db;

use strict;
use vars qw($VERSION);

$VERSION = '5.09';

use Carp;
use Fcntl;
use File::Basename qw(dirname);
use File::Spec;
use Text::English;
use Config;

my $PREFIX = $Config::Config{prefix};
# Bug in perlindex: because of assuming Unix directory separators the
# index files are stored in man/man1, not in man on Windows:
my $IDXDIR = $^O eq 'MSWin32' ? $Config::Config{man1dir} : dirname $Config::Config{man1dir};
$IDXDIR ||= $PREFIX; # use perl directory if no manual directory exists
# Debian uses a non-standard directory:
if (-e "/etc/debian_version" && -d "/var/cache/perlindex") {
    $IDXDIR = "/var/cache/perlindex";
    # XXX What to do if perlindex is installed by the user and uses
    # the man directory for storing the index files?
}
# Deliberately ignore the INDEXDIR environment variable which is used
# by perlindex

sub new {
    my $class = shift;
    my $idir  = shift;
    $idir ||= $IDXDIR;
    my (%self, %IF, %IDF, %FN);
    my $if_file  = File::Spec->catfile($idir, "index_if");
    tie (%IF,   'AnyDBM_File', $if_file,   O_RDONLY, 0644)
        	or confess "Could not tie $if_file: $!\n".
           	"Did you install Text::English and run 'perlindex -index'?\n";
    my $idf_file = File::Spec->catfile($idir, "index_idf");
    tie (%IDF,  'AnyDBM_File', $idf_file,   O_RDONLY, 0644)
	       	or confess "Could not tie $idf_file: $!\n";
    my $fn_file  = File::Spec->catfile($idir, "index_fn");
    tie (%FN,   'AnyDBM_File', $fn_file,   O_RDONLY, 0644)
	       	or confess "Could not tie $fn_file: $!\n";

    $self{IF}  = \%IF;
    $self{IDF} = \%IDF;
    $self{FN}  = \%FN;
    #xxx: -idir depended but where can I get this info?
    #	o A fourth index file?
    #   o todo: check perlindex index routine
    $self{PREFIX} = $PREFIX;

    bless \%self, $class;
}

# changes to perlindex's normalize
#	o removed useless(?) stemmer check
#	o lexicalized $word

sub normalize {
    my $line = join ' ', @_;
    my @result;

    $line =~ tr/A-Z/a-z/;
    $line =~ tr/a-z0-9_/ /cs;

    my $word;
    for $word (split / /, $line ) {
        $word =~ s/^\d+//;
        next unless length($word) > 2;
        push @result, &Text::English::stem($word);
    }
    @result;
}

# changes for perlindex's search slightly modified
sub searchWords {
    my($self, $term, %args) = @_;

    my @words = split / /, $term;

    my $restrict_pod = $args{-restrictpod};
    if (defined $restrict_pod) {
	my(@modparts) = split /::/, $restrict_pod;
	$restrict_pod = join('[/\\\\]', map { quotemeta } @modparts);
    }

    #print "try words|", join('|',@_),"\n";
    my $p = 'w';
    my %score;
    my %termhits;
    my $maxhits = 50;
    my (@unknown, @stop);

    my $IF  = $self->{IF};
    my $IDF = $self->{IDF};
    my $FN  = $self->{FN};

    #&initstop if $opt_verbose;
    for my $word (normalize(@words)) {
        unless ($IF->{$word}) {
#             if ($stop{$word}) {
#                 push @stop, $word;
#             } else {
#                 push @unknown, $word;
#             }
            next;
        }
        my %post = unpack($p.'*',$IF->{$word});
        my $idf = log($FN->{'last'}/$IDF->{$word});
        for my $did (keys %post) {
            my ($maxtf) = unpack($p, $FN->{$did});
            $score{$did} = 0 unless defined $score{$did}; # perl -w 
            $score{$did} += $post{$did} / $maxtf * $idf;
	    $termhits{$did}++;
        }
    }

    my @results;
    for my $did (sort {	$termhits{$b} <=> $termhits{$a} || $score{$b} <=> $score{$a} } keys %score) {
	my ($mtf, $path) = unpack($p.'a*', $FN->{$did});
	# XXX Should not use Tk::Pod::Search::split_path, or split_path should be moved to another package
	if ($restrict_pod) {
	    my($check_path) = Tk::Pod::Search::split_path($path);
	    next if $check_path !~ /^$restrict_pod/;
	}
	#next if ($restrict_pod && $path !~ /$restrict_pod/);
	$path = File::Spec->catfile($self->prefix, $path) unless $^O eq 'MSWin32'; # This seems to be a perlindex bug in MSWin32
	push @results, { termhits => $termhits{$did}, score => $score{$did}, path => $path };
	last unless --$maxhits;
    }

    #print "results|", join('|',@results),"\n";
    @results;
}

sub prefix {
    shift->{PREFIX};
}

1;
__END__

=head1 NAME

Tk::Pod::Search_db - dirty OO wrapper for C<perlindex>'s search functionality

=head1 SYNOPSIS

    ** THIS IS ALPHA SOFTWARE everything may and should change **
    **   stuff here is more a scratch pad than docomentation!  **

    use Tk::Pod::Search_db;
    ...
    $idx = Tk::Pod::Search_db->new?(INDEXDIR)?;
    ...
    @hits = $idx->searchWords(WORD1,...); # @hits is a list of
                                             # relpath1,score1,...  where
                                             # score is increasing
    $prefix = $idx->prefix();

    @word = Tk::Pod::Search_db::normalize(STRING1,...);

=head1 DESCRIPTION

Module to search Pod documentation.  Before you can use
the module one should create the indices with C<perlindex -index>.

=head1 MISSING

Enable options like -maxhits (currently = 15).  Solve PREFIX
dependency.  Interface for @stop and @unknown also as methods
return lists for last searchWords call?

Lots more ...

=head1 METHODS

=over 4

=item $idx = Tk::Pod::Search_db->new(INDEXDIR)

Interface may change to support options like -maxhits

=item $idx->seachWords(WORD1?,...?)

search for WORD(s). Return a list of

  relpath1, score1, relpath2, score2, ...

or empty list if no match is found.

=item $pathprefix = $idx->pathprefix()

The return path prefix and C<$relpath> give together the full path
name of the Pod documentation.

	$fullpath = $patchprefix . '/' . $relpath

B<Note:> Should make it easy to use Tk::Pod::Search with perlindex but
index specific prefix handling is a mess up to know.

=back

=head1 SEE ALSO

L<tkpod>, L<perlindex>, L<perlpod>, L<Tk::Pod::Search>

=head1 AUTHORS

Achim Bohnet  <F<ach@mpe.mpg.de>>

Most of the code here is borrowed from L<perlindex> written by
Ulrich Pfeifer <F<Ulrich.Pfeifer@de.uu.net>>.

Current maintainer is Slaven ReziE<0x0107> <F<slaven@rezic.de>>.

Copyright (c) 1997-1998 Achim Bohnet. All rights reserved.  This program is
free software; you can redistribute it and/or modify it under the same
terms as Perl itself.

=cut