The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Plucene::Search::PhraseQuery;

=head1 NAME 

Plucene::Search::PhraseQuery - A query that matchs a phrase

=head1 SYNOPSIS

	# isa Plucene::Search::Query

=head1 DESCRIPTION

A Query that matches documents containing a particular sequence of terms.

A phrase query represents a query that is matched against a consecutive 
sequence of terms in the field. For example, the phrase query 'winding road' 
should match 'winding road' but not 'road winding' (with the exception of 
more relaxed slop factors).

Phrase queries are represented in Plucene's API by instances of the 
PharseQuery class.  These instances contain an ordered list of Term objects 
that represent the terms to match. For obvious reasons, all terms in a 
PhraseQuery must refer to the same field.

A phrase query may have an optional boost factor and an optional slop 
parameter (default = 0). The slop parameter can be used to relax the phrase 
matching by accepting somewhat out of order sequences of the terms. 

=head1 METHODS

=cut

use strict;
use warnings;

use Carp;

use Plucene::Search::Similarity;
use Plucene::Search::TermQuery;
use Plucene::Search::TermScorer;
use Plucene::Search::PhraseScorer::Exact;
use Plucene::Search::PhraseScorer::Sloppy;

use base 'Plucene::Search::Query';

__PACKAGE__->mk_accessors(qw(slop terms field idf weight));

sub new {
	my $self = shift->SUPER::new(@_);
	$self->slop(0);
	$self->terms([]);
	$self;
}

=head2 add

Adds a term to the end of the query phrase.

=cut

sub add {
	my ($self, $term) = @_;
	if (@{ $self->terms } == 0) {
		$self->field($term->field);
	} elsif ($self->field ne $term->field) {
		carp "All terms in this phrase should be in the same field: "
			. $self->field;
	}
	push @{ $self->terms }, $term;
}

=head2 sum_squared_weights

The sum squared weights of this query.

=cut

sub sum_squared_weights {
	my ($self, $searcher) = @_;
	$self->{idf} += Plucene::Search::Similarity->idf($_, $searcher)
		for @{ $self->terms };
	$self->{weight} = $self->idf * $self->boost;
	$self->boost * $self->boost;
}

=head2 normalize

Normalize the query.

=cut

sub normalize {
	my ($self, $norm) = @_;
	$self->{weight} *= $norm * $self->idf;
}

sub _scorer {
	my ($self, $reader) = @_;
	return unless @{ $self->{terms} };
	if (@{ $self->{terms} } == 1) {
		my $term = $self->{terms}->[0];
		my $docs = $reader->term_docs($term);
		return unless $docs;
		return Plucene::Search::TermScorer->new({
				term_docs => $docs,
				norms     => $reader->norms($term->field),
				weight    => $self->weight
			});
	}

	my @tps;
	for my $term (@{ $self->terms }) {
		my $tp = $reader->term_positions($term);
		return unless $tp;
		push @tps, $tp;
	}

	my $class =
		"Plucene::Search::PhraseScorer::"
		. (($self->slop == 0) ? "Exact" : "Sloppy");
	$class->new({
			tps    => \@tps,
			norms  => $reader->norms($self->field),
			weight => $self->weight,
			slop   => $self->slop
		});
}

=head2 to_string

Prints a user-readable version of this query.

=cut

sub to_string {
	my ($self, $field) = @_;
	my $buffer = "";
	$buffer = $self->field . ":" if $field ne $self->field;
	$buffer .= sprintf('"%s"', join(" ", map $_->text, @{ $self->terms }));
	$buffer .= "~" . $self->slop  if $self->slop;
	$buffer .= "^" . $self->boost if $self->boost != 1;
	$buffer;
}

1;