The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Plucene::Index::TermInfosReader;

=head1 NAME 

Plucene::Index::TermInfosReader - read the term infos file

=head1 SYNOPSIS

	my $reader = Plucene::Index::TermInfosReader->new(
			$dir_name, $segment, $fis);

	my Plucene::Index::TermInfo $term_info = 
		$reader->get(Plucene::Index::Term $term);
		
	my Plucene::Index::SegmentTermEnum $enum = 
		$reader->terms(Plucene::Index::Term $term);
	
=head1 DESCRIPTION

This reads a term infos file.

=head1 METHODS

=cut

use strict;
use warnings;

use Memoize;

use Carp qw/confess/;

use Plucene::Index::SegmentTermEnum;
use Plucene::Index::TermInfosWriter;
use Plucene::Store::InputStream;

=head2 new

	my $reader = Plucene::Index::TermInfosReader->new(
			$dir_name, $segment, $fis);

This will create a new Plucene::Index::TermInfosReader object with
the passed directory name, segment name and field infos.
			
=cut

sub new {
	my ($class, $dir, $seg, $fis) = @_;
	my $file = "$dir/$seg.tis";
	confess("$file is already open!") unless -s $file;

	my $self = bless {
		directory   => $dir,
		segment     => $seg,
		field_infos => $fis,
		enum        => Plucene::Index::SegmentTermEnum->new(
			Plucene::Store::InputStream->new($file),
			$fis, 0
		),
	}, $class;
	$self->{size} = $self->{enum}->size;
	$self->_read_index;
	return $self;
}

sub _read_index {
	my $self       = shift;
	my $index_enum = Plucene::Index::SegmentTermEnum->new(
		Plucene::Store::InputStream->new(
			"$self->{directory}/$self->{segment}.tii"),
		$self->{field_infos},
		1
	);
	my $size = $index_enum->size;
	$self->{index_terms}    = [];
	$self->{index_infos}    = [];
	$self->{index_pointers} = [];
	for (my $i = 0 ; $index_enum->next ; $i++) {
		$self->{index_terms}->[$i] = $index_enum->term;

		# Need to clone here.
		$self->{index_infos}->[$i] =
			Plucene::Index::TermInfo->new({ %{ $index_enum->term_info } });
		$self->{index_pointers}->[$i] = $index_enum->index_pointer;
	}
}

memoize('_get_index_offset');

sub _get_index_offset {
	my ($self, $term) = @_;
	my $lo = 0;
	my $hi = $#{ $self->{index_terms} };

	while ($hi >= $lo) {
		my $mid = ($lo + $hi) >> 1;

		# Terms are comparable, hooray
		my $delta = $term->_cmp($self->{index_terms}->[$mid]);
		if    ($delta < 0) { $hi = $mid - 1; }
		elsif ($delta > 0) { $lo = $mid + 1; }
		else { return $mid }
	}
	return $hi;
}

=head2 get

	my Plucene::Index::TermInfo $term_info = 
		$reader->get(Plucene::Index::Term $term);

=cut

sub get {
	my ($self, $term) = @_;
	return unless $self->{size};
	$self->_seek_enum($self->_get_index_offset($term));
	return $self->_scan_enum($term);
}

sub _seek_enum {
	my ($self, $offset) = @_;
	$self->{enum}->seek(
		$self->{index_pointers}->[$offset],
		$offset * Plucene::Index::TermInfosWriter::INDEX_INTERVAL() - 1,
		$self->{index_terms}->[$offset],
		$self->{index_infos}->[$offset]);
}

sub _scan_enum {
	my ($self, $term) = @_;
	1 while $term->gt($self->{enum}->term) && $self->{enum}->next;
	return $self->{enum}->term_info
		if $self->{enum}->term
		and $self->{enum}->term->eq($term);
	return;
}

=head2 get_int / get_position

These are never called.

=cut

sub get_int      { }
sub get_position { }

=head2 terms

	my Plucene::Index::SegmentTermEnum $enum = 
		$reader->terms(Plucene::Index::Term $term);

This will return the Plucene::Index::SegmentTermEnum for the passed-in
Plucene::Index::Term.
		
=cut

sub terms {
	my ($self, $term) = @_;
	$term ? $self->get($term) : $self->_seek_enum(0);
	$self->{enum}->clone;
}

1;