The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
#
# Simple example script demonstrating query expansion.
#
# Copyright (C) 2003 James Aylett
# Copyright (C) 2004,2006,2007,2009 Olly Betts
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
# USA

use 5.006;
use strict;
use warnings;

use Search::Xapian (':all');

# We require at least two command line arguments.
if (scalar @ARGV < 2) {
    print STDERR "Usage: $0 PATH_TO_DATABASE QUERY [-- [DOCID...]]\n";
    exit(1);
}

eval {
    # Open the database for searching.
    my $database = Search::Xapian::Database->new(shift @ARGV);

    # Start an enquire session.
    my $enquire = Search::Xapian::Enquire->new($database);

    # Combine command line arguments up to "--" with spaces between
    # them, so that simple queries don't have to be quoted at the shell
    # level.
    my $sep_index = 0;
    while (exists $ARGV[$sep_index] && $ARGV[$sep_index] ne '--') {
	$sep_index++;
    }

    my $query_string = join ' ', @ARGV[0 .. $sep_index - 1];

    # Create an RSet with the listed docids in.
    my $reldocs = Search::Xapian::RSet->new();
    foreach my $did (@ARGV[$sep_index + 1 .. $#ARGV]) {
        $reldocs->add_document($did);
    }

    # Parse the query string to produce a Xapian::Query object.
    my $qp = Search::Xapian::QueryParser->new();
    my $stemmer = Search::Xapian::Stem->new("english");
    $qp->set_stemmer($stemmer);
    $qp->set_database($database);
    $qp->set_stemming_strategy(STEM_SOME);
    my $query = $qp->parse_query($query_string);

    my $mset;
    if (!$query->empty()) {
	print "Parsed query is: $query\n";

        # Find the top 10 results for the query.
	$enquire->set_query($query);
	$mset = $enquire->get_mset(0, 10, $reldocs);

        # Display the results.
	printf "%i results found.\n", $mset->get_matches_estimated();
	printf "Results 1-%i:\n", $mset->size();

	foreach my $m ($mset->items()) {
	    printf "%i: %i%% docid=%i [%s]\n", $m->get_rank() + 1, $m->get_percent(), $m->get_docid(), $m->get_document()->get_data();
	}
    }

    # Put the top 5 (at most) docs into the rset if rset is empty
    if ($reldocs->empty() && defined $mset) {
	my $last = $mset->size() - 1;
	if ($last > 4) {
	   $last = 4;
	} 
	foreach my $m (($mset->items())[0..$last]) {
            $reldocs->add_document($m->get_docid());
	}
    }

    # Get the suggested expand terms
    my $eset = $enquire->get_eset(10, $reldocs);
    printf "%i suggested additional terms\n", $eset->size();
    for my $k ($eset->items()) {
        printf "%s: %f\n", $k->get_termname(), $k->get_weight();
    }
};
if ($@) {
    print STDERR "Exception: $@\n";
    exit(1);
}