The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
#
# A sample indexer which demonstrates many of Xapian's commonly used features.
#
# Copyright (C) 2009 Olly Betts
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

use 5.006;
use strict;
use warnings;

use Search::Xapian (':all');
use POSIX;

# Constants denoting what we're using the number value slots for.
my $SLOT_DATE = 0;
my $SLOT_DOCNUM = 1;
my $SLOT_TYPE = 2;
my $SLOT_AUTHOR = 3;
my $SLOT_TITLE = 4;

# We want exactly one command line argument.
if (scalar @ARGV != 1) {
    print STDERR "Usage: $0 PATH_TO_DATABASE\n";
    exit 1;
}

my ($database, $indexer);

eval {
    # Open the database for writing.  If it doesn't exist, create it.
    $database = Search::Xapian::WritableDatabase->new(
	    $ARGV[0],
	    DB_CREATE_OR_OPEN);

    # Set up a TermGenerator to index text stemmed with the "english" stemmer.
    $indexer = Search::Xapian::TermGenerator->new();
    my $stemmer = Search::Xapian::Stem->new("english");
    $indexer->set_stemmer($stemmer);
};
if ($@) {
    # Report the exception which we've caught.
    print STDERR "Exception: $@\n";
    exit 1;
}

# Add some documents to the index (in a real indexer, this data would come from
# some external source like a file or a database).
index_document(
    "The Old Man and the Sea",
    "Ernest Hemingway",
    "Santiago goes fishing, without much success.",
    "978-0-684-80122-3",
    "1952-09-01",
    "book"
);
index_document(
    "Star Wars",
    "George Lucas",
    "Luke goes to meet his destiny in the stars.",
    "tt0076759",
    "1977-05-25",
    "film"
);
index_document(
    "Accidental Death of an Anarchist",
    "Dario Fo",
    "An anarchist dies, accidentally!",
    "12345",
    "1970-12-10",
    "play"
);

sub index_document {
    my ($doc_name, $author, $keywords, $doc_number, $date, $type) = @_;

    eval {
	my $doc = Search::Xapian::Document->new();
	$indexer->set_document($doc);

	# Set the document data to the doc_name so we can show it for matches.
	$doc->set_data($doc_name);

	# Index the author to allow fielded free-text searching.
	$indexer->index_text($author, 1, "A");

	# Index the title to allow fielded free-text searching.
	$indexer->index_text($doc_name, 1, "S");

	# Index the title without a prefix too.
	$indexer->index_text($doc_name);

	# Increase the term position so that phrases can't straddle the
	# doc_name and keywords.
	$indexer->increase_termpos();

	# Index the keywords as free-text.
	$indexer->index_text($keywords);

	# Unique ID.
	$doc->add_term("Q" . $doc_number);

	# To allow boolean filtering by type.
	$doc->add_term("XTYPE" . lc $type);

	# To allow date range searching and sorting by date.
	if ($date =~ /^(\d{4})-(\d\d)-(\d\d)$/) {
	    # DateValueRangeProcessor wants values in the form "YYYYMMDD".
	    $doc->add_value($SLOT_DATE, "$1$2$3");
	}

	# To allow sorting by document number.
	$doc->add_value($SLOT_DOCNUM, $doc_number);

	# To allow sorting by document type.
	$doc->add_value($SLOT_TYPE, lc $type);

	# To allow sorting by author.
	$doc->add_value($SLOT_AUTHOR, $author);

	# To allow sorting by title..
	$doc->add_value($SLOT_TITLE, $doc_name);

	# Add the document to the database.
	$database->add_document($doc);
    };
    if ($@) {
	# Report the exception which we've caught.
	print STDERR "Exception: $@\n";
	exit 1;
    }
}