The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Search::Mousse;
use strict;
our $VERSION = '0.32';
use base qw(Class::Accessor::Chained::Fast);
__PACKAGE__->mk_accessors(
  qw(directory name stemmer key_to_id id_to_key id_to_value word_to_id
    id_to_related and
  )
);
use CDB_File;
use CDB_File_Thawed;
use List::Uniq qw(uniq);
use Path::Class;
use Search::QueryParser;
use Set::Scalar;

sub new {
  my $class = shift;
  my $self  = {};
  bless $self, $class;

	my %args = @_;
	$self->directory($args{directory});
  $self->name($args{name});
  $self->stemmer(
    $args{stemmer} ||
    sub {
      my $words = lc shift;
      return uniq(split / /, $words);
    }
  );
  $self->and($args{and} || 0);

	$self->_init;
  return $self;
}

sub _init {
  my ($self) = @_;
  my $name   = $self->name;
  my $dir    = $self->directory;

  my $filename = file($dir, "${name}_key_to_id.cdb");
  tie my %cdb1, 'CDB_File', $filename or die "tie failed: $!\n";
  $self->key_to_id(\%cdb1);

  $filename = file($dir, "${name}_id_to_key.cdb");
  tie my %cdb2, 'CDB_File', $filename or die "tie failed: $!\n";
  $self->id_to_key(\%cdb2);

  $filename = file($dir, "${name}_id_to_value.cdb");
  tie my %cdb3, 'CDB_File_Thawed', $filename or die "tie failed: $!\n";
  $self->id_to_value(\%cdb3);

  $filename = file($dir, "${name}_word_to_id.cdb");
  tie my %cdb4, 'CDB_File_Thawed', $filename or die "tie failed: $!\n";
  $self->word_to_id(\%cdb4);

  $filename = file($dir, "${name}_id_to_related.cdb");
  if (-f $filename) {
    tie my %cdb7, 'CDB_File_Thawed', $filename or die "tie failed: $!\n";
    $self->id_to_related(\%cdb7);
  }
}

sub fetch {
  my ($self, $key) = @_;

  my $id = $self->key_to_id->{$key};
  return unless $id;
  return $self->id_to_value->{$id};
}

sub fetch_related {
  my ($self, $key) = @_;
  my $id_to_value = $self->id_to_value;
  
  my $id = $self->key_to_id->{$key};
  return unless $id;
  my $ids = $self->id_to_related->{$id} || [];
  return map { $id_to_value->{$_} } @$ids;
}

sub fetch_related_keys {
  my ($self, $key) = @_;
  my $id_to_key = $self->id_to_key;
  
  my $id = $self->key_to_id->{$key};
  return unless $id;
  my $ids = $self->id_to_related->{$id} || [];
  return map { $id_to_key->{$_} } @$ids;
}

sub search {
  my ($self, $words) = @_;

  my @ids = $self->_search_ids($words);

  my @values = map { $self->id_to_value->{$_} } @ids;
  return @values;
}

sub search_keys {
  my ($self, $words) = @_;
  my @ids = $self->_search_ids($words);

  my @keys = map { $self->id_to_key->{$_} } @ids;
  return @keys;
}

sub _search_ids {
  my ($self, $words) = @_;

  my $qp = Search::QueryParser->new;
  my $query = $qp->parse($words);
  return unless $query;

  my @union;
  foreach my $term (@{$query->{""}}) {
    my $value = $term->{value};
    my @values = $self->stemmer->($value);
    push @union, $values[0];
  }
  
  my @plus;
  foreach my $term (@{$query->{"+"}}) {
    my $value = $term->{value};
    my @values = $self->stemmer->($value);
    push @plus, $values[0];
  }
  
  my @minus;
  foreach my $term (@{$query->{"-"}}) {
    my $value = $term->{value};
    my @values = $self->stemmer->($value);
    push @minus, $values[0];
  }
  
  if ($self->and) {
    push @plus, @union;
    @union = ();
  }
  
  my $s = Set::Scalar->new;

	if (@union) {
    foreach my $word (@union) {
      next unless exists $self->word_to_id->{$word};
      my @ids = @{ $self->word_to_id->{$word} };
      $s->insert(@ids);
    }
  
    foreach my $word (@plus) {
      return unless exists $self->word_to_id->{$word};
      my @ids = @{ $self->word_to_id->{$word} };
      my $s2 = Set::Scalar->new(@ids);
      $s = $s->intersection($s2);
    }
  } else {
    my $word = pop @plus;
    my @ids = @{ $self->word_to_id->{$word} };
    $s->insert(@ids);

    foreach my $word (@plus) {
      return unless exists $self->word_to_id->{$word};
      my @ids = @{ $self->word_to_id->{$word} };
      my $s2 = Set::Scalar->new(@ids);
      $s = $s->intersection($s2);
    }   
  }

  foreach my $word (@minus) {
    next unless exists $self->word_to_id->{$word};
    my @ids = @{ $self->word_to_id->{$word} };
    $s = $s->delete(@ids);
  }
  
  return $s->members;
}

1;

__END__

=head1 NAME

Search::Mousse - A simple and fast inverted index

=head1 SYNOPSIS

  my $mousse = Search::Mousse->new(
    directory => $directory,
    name      => 'recipes',
  );
  my $recipe = $mousse->fetch("Hearty Russian Beet Soup");
  my @recipes = $mousse->search("crumb");
  my @recipe_keys = $mousse->search_keys("italian soup");
  
=head1 DESCRIPTION

L<Search::Mousse> provides a simple and fast inverted index.

It is intended for constant databases (this is why it can be fast).
Documents have a key, keywords (which the document can later be search
for with) and a value (which can be a Perl data structure or object).

Use L<Search::Mousse::Writer> to construct a database.

The default stemmer is:

  sub {
    my $words = lc shift;
    return uniq(split / /, $words);
  }

Why is it called Search::Mousse? Well, in culinary terms, mousses are
simple to make, can include quite complicated ingredients, and are
inverted before presentation.

=head1 CONSTRUCTOR

=head2 new

The constructor takes a few arguments: the directory to store files in,
and a name for the database. If you have a custom stemmer, also pass it in:

  my $mousse = Search::Mousse->new(
    directory => $directory,
    name      => 'recipes',
  );
  
  my $mousse2 = Search::Mousse->new(
    directory => $directory,
    name      => 'photos',
    stemmer   => \&stemmer,
  );

=head1 METHODS

=head2 and

If this is set to true, query terms are ANDed by default. Thus "white
bread" would be parsed the same as "+white +bread":

  $mousse->and(1);

=head2 fetch

Returns a value from the database, given a key:

  my $recipe = $mousse->fetch("Hearty Russian Beet Soup");

=head2 fetch_related

If you have used L<Search::Mousse::Writer::Related> to analyse the
database, the fetch_related() method returns a list of values that are
similar to the given key:

  my @recipes = $mousse->fetch_related("Hearty Russian Beet Soup");

=head2 fetch_related_keys

If you have used L<Search::Mousse::Writer::Related> to analyse the
database, the fetch_related_keys() method returns a list of keys that
are similar to the given key:

  my @keys = $mousse->fetch_related_keys("Hearty Russian Beet Soup");
  
=head2 search

Returns a list of values that match the search terms (but see and()):

  my @white_or_bread = $mousse->search("white bread");
  my @white_bread    = $mousse->search("+white +bread");
  my @nonwhite_bread = $mousse->search("-white +bread");

=head2 search_keys

Returns a list of keys that have all the keywords passed:

  my @recipe_keys = $mousse->search_keys("italian soup");

=head1 SEE ALSO

L<Search::Mousse::Writer>, L<Search::Mousse::Writer::Related>

=head1 AUTHOR

Leon Brocard, C<< <acme@astray.com> >>

=head1 COPYRIGHT

Copyright (C) 2005, Leon Brocard

This module is free software; you can redistribute it or modify it
under the same terms as Perl itself.