package Plucene::Search::PhraseQuery;
=head1 NAME
Plucene::Search::PhraseQuery - A query that matchs a phrase
=head1 SYNOPSIS
# isa Plucene::Search::Query
=head1 DESCRIPTION
A Query that matches documents containing a particular sequence of terms.
A phrase query represents a query that is matched against a consecutive
sequence of terms in the field. For example, the phrase query 'winding road'
should match 'winding road' but not 'road winding' (with the exception of
more relaxed slop factors).
Phrase queries are represented in Plucene's API by instances of the
PharseQuery class. These instances contain an ordered list of Term objects
that represent the terms to match. For obvious reasons, all terms in a
PhraseQuery must refer to the same field.
A phrase query may have an optional boost factor and an optional slop
parameter (default = 0). The slop parameter can be used to relax the phrase
matching by accepting somewhat out of order sequences of the terms.
=head1 METHODS
=cut
use strict;
use warnings;
use Carp;
use Plucene::Search::Similarity;
use Plucene::Search::TermQuery;
use Plucene::Search::TermScorer;
use Plucene::Search::PhraseScorer::Exact;
use Plucene::Search::PhraseScorer::Sloppy;
use base 'Plucene::Search::Query';
__PACKAGE__->mk_accessors(qw(slop terms field idf weight));
sub new {
my $self = shift->SUPER::new(@_);
$self->slop(0);
$self->terms([]);
$self;
}
=head2 add
Adds a term to the end of the query phrase.
=cut
sub add {
my ($self, $term) = @_;
if (@{ $self->terms } == 0) {
$self->field($term->field);
} elsif ($self->field ne $term->field) {
carp "All terms in this phrase should be in the same field: "
. $self->field;
}
push @{ $self->terms }, $term;
}
=head2 sum_squared_weights
The sum squared weights of this query.
=cut
sub sum_squared_weights {
my ($self, $searcher) = @_;
$self->{idf} += Plucene::Search::Similarity->idf($_, $searcher)
for @{ $self->terms };
$self->{weight} = $self->idf * $self->boost;
$self->boost * $self->boost;
}
=head2 normalize
Normalize the query.
=cut
sub normalize {
my ($self, $norm) = @_;
$self->{weight} *= $norm * $self->idf;
}
sub _scorer {
my ($self, $reader) = @_;
return unless @{ $self->{terms} };
if (@{ $self->{terms} } == 1) {
my $term = $self->{terms}->[0];
my $docs = $reader->term_docs($term);
return unless $docs;
return Plucene::Search::TermScorer->new({
term_docs => $docs,
norms => $reader->norms($term->field),
weight => $self->weight
});
}
my @tps;
for my $term (@{ $self->terms }) {
my $tp = $reader->term_positions($term);
return unless $tp;
push @tps, $tp;
}
my $class =
"Plucene::Search::PhraseScorer::"
. (($self->slop == 0) ? "Exact" : "Sloppy");
$class->new({
tps => \@tps,
norms => $reader->norms($self->field),
weight => $self->weight,
slop => $self->slop
});
}
=head2 to_string
Prints a user-readable version of this query.
=cut
sub to_string {
my ($self, $field) = @_;
my $buffer = "";
$buffer = $self->field . ":" if $field ne $self->field;
$buffer .= sprintf('"%s"', join(" ", map $_->text, @{ $self->terms }));
$buffer .= "~" . $self->slop if $self->slop;
$buffer .= "^" . $self->boost if $self->boost != 1;
$buffer;
}
1;