The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Search::Estraier;

use 5.008;
use strict;
use warnings;

our $VERSION = '0.09';

=head1 NAME

Search::Estraier - pure perl module to use Hyper Estraier search engine

=head1 SYNOPSIS

=head2 Simple indexer

	use Search::Estraier;

	# create and configure node
	my $node = new Search::Estraier::Node(
		url => 'http://localhost:1978/node/test',
		user => 'admin',
		passwd => 'admin',
		create => 1,
		label => 'Label for node',
		croak_on_error => 1,
	);

	# create document
	my $doc = new Search::Estraier::Document;

	# add attributes
	$doc->add_attr('@uri', "http://estraier.gov/example.txt");
	$doc->add_attr('@title', "Over the Rainbow");

	# add body text to document
	$doc->add_text("Somewhere over the rainbow.  Way up high.");
	$doc->add_text("There's a land that I heard of once in a lullaby.");

	die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });

=head2 Simple searcher

	use Search::Estraier;

	# create and configure node
	my $node = new Search::Estraier::Node(
		url => 'http://localhost:1978/node/test',
		user => 'admin',
		passwd => 'admin',
		croak_on_error => 1,
	);

	# create condition
	my $cond = new Search::Estraier::Condition;

	# set search phrase
	$cond->set_phrase("rainbow AND lullaby");

	my $nres = $node->search($cond, 0);

	if (defined($nres)) {
		print "Got ", $nres->hits, " results\n";

		# for each document in results
		for my $i ( 0 ... $nres->doc_num - 1 ) {
			# get result document
			my $rdoc = $nres->get_doc($i);
			# display attribte
			print "URI: ", $rdoc->attr('@uri'),"\n";
			print "Title: ", $rdoc->attr('@title'),"\n";
			print $rdoc->snippet,"\n";
		}
	} else {
		die "error: ", $node->status,"\n";
	}

=head1 DESCRIPTION

This module is implementation of node API of Hyper Estraier. Since it's
perl-only module with dependencies only on standard perl modules, it will
run on all platforms on which perl runs. It doesn't require compilation
or Hyper Estraier development files on target machine.

It is implemented as multiple packages which closly resamble Ruby
implementation. It also includes methods to manage nodes.

There are few examples in C<scripts> directory of this distribution.

=cut

=head1 Inheritable common methods

This methods should really move somewhere else.

=head2 _s

Remove multiple whitespaces from string, as well as whitespaces at beginning or end

 my $text = $self->_s(" this  is a text  ");
 $text = 'this is a text';

=cut

sub _s {
	my $text = $_[1];
	return unless defined($text);
	$text =~ s/\s\s+/ /gs;
	$text =~ s/^\s+//;
	$text =~ s/\s+$//;
	return $text;
}

package Search::Estraier::Document;

use Carp qw/croak confess/;

use Search::Estraier;
our @ISA = qw/Search::Estraier/;

=head1 Search::Estraier::Document

This class implements Document which is single item in Hyper Estraier.

It's is collection of:

=over 4

=item attributes

C<< 'key' => 'value' >> pairs which can later be used for filtering of results

You can add common filters to C<attrindex> in estmaster's C<_conf>
file for better performance. See C<attrindex> in
L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>.

=item vectors

also C<< 'key' => 'value' >> pairs

=item display text

Text which will be used to create searchable corpus of your index and
included in snippet output.

=item hidden text

Text which will be searchable, but will not be included in snippet.

=back

=head2 new

Create new document, empty or from draft.

  my $doc = new Search::HyperEstraier::Document;
  my $doc2 = new Search::HyperEstraier::Document( $draft );

=cut

sub new {
	my $class = shift;
	my $self = {};
	bless($self, $class);

	$self->{id} = -1;

	my $draft = shift;

	if ($draft) {
		my $in_text = 0;
		foreach my $line (split(/\n/, $draft)) {

			if ($in_text) {
				if ($line =~ /^\t/) {
					push @{ $self->{htexts} }, substr($line, 1);
				} else {
					push @{ $self->{dtexts} }, $line;
				}
				next;
			}

			if ($line =~ m/^%VECTOR\t(.+)$/) {
				my @fields = split(/\t/, $1);
				if ($#fields % 2 == 1) {
					$self->{kwords} = { @fields };
				} else {
					warn "can't decode $line\n";
				}
				next;
			} elsif ($line =~ m/^%SCORE\t(.+)$/) {
			    $self->{score} = $1;
			    next;
			} elsif ($line =~ m/^%/) {
				# What is this? comment?
				#warn "$line\n";
				next;
			} elsif ($line =~ m/^$/) {
				$in_text = 1;
				next;
			} elsif ($line =~ m/^(.+)=(.*)$/) {
				$self->{attrs}->{ $1 } = $2;
				next;
			}

			warn "draft ignored: '$line'\n";
		}
	}

	$self ? return $self : return undef;
}


=head2 add_attr

Add an attribute.

  $doc->add_attr( name => 'value' );

Delete attribute using

  $doc->add_attr( name => undef );

=cut

sub add_attr {
	my $self = shift;
	my $attrs = {@_};

	while (my ($name, $value) = each %{ $attrs }) {
		if (! defined($value)) {
			delete( $self->{attrs}->{ $self->_s($name) } );
		} else {
			$self->{attrs}->{ $self->_s($name) } = $self->_s($value);
		}
	}

	return 1;
}


=head2 add_text

Add a sentence of text.

  $doc->add_text('this is example text to display');

=cut

sub add_text {
	my $self = shift;
	my $text = shift;
	return unless defined($text);

	push @{ $self->{dtexts} }, $self->_s($text);
}


=head2 add_hidden_text

Add a hidden sentence.

  $doc->add_hidden_text('this is example text just for search');

=cut

sub add_hidden_text {
	my $self = shift;
	my $text = shift;
	return unless defined($text);

	push @{ $self->{htexts} }, $self->_s($text);
}

=head2 add_vectors

Add a vectors

  $doc->add_vector(
  	'vector_name' => 42,
	'another' => 12345,
  );

=cut

sub add_vectors {
	my $self = shift;
	return unless (@_);

	# this is ugly, but works
	die "add_vector needs HASH as argument" unless ($#_ % 2 == 1);

	$self->{kwords} = {@_};
}

=head2 set_score

Set the substitute score

  $doc->set_score(12345);

=cut

sub set_score {
    my $self = shift;
    my $score = shift;
    return unless (defined($score));
    $self->{score} = $score;
}

=head2 score

Get the substitute score

=cut

sub score {
    my $self = shift;
    return -1 unless (defined($self->{score}));
    return $self->{score};
}

=head2 id

Get the ID number of document. If the object has never been registred, C<-1> is returned.

  print $doc->id;

=cut

sub id {
	my $self = shift;
	return $self->{id};
}


=head2 attr_names

Returns array with attribute names from document object.

  my @attrs = $doc->attr_names;

=cut

sub attr_names {
	my $self = shift;
	return unless ($self->{attrs});
	#croak "attr_names return array, not scalar" if (! wantarray);
	return sort keys %{ $self->{attrs} };
}


=head2 attr

Returns value of an attribute.

  my $value = $doc->attr( 'attribute' );

=cut

sub attr {
	my $self = shift;
	my $name = shift;
	return unless (defined($name) && $self->{attrs});
	return $self->{attrs}->{ $name };
}


=head2 texts

Returns array with text sentences.

  my @texts = $doc->texts;

=cut

sub texts {
	my $self = shift;
	#confess "texts return array, not scalar" if (! wantarray);
	return @{ $self->{dtexts} } if ($self->{dtexts});
}


=head2 cat_texts

Return whole text as single scalar.

 my $text = $doc->cat_texts;

=cut

sub cat_texts {
	my $self = shift;
	return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
}


=head2 dump_draft

Dump draft data from document object.

  print $doc->dump_draft;

=cut

sub dump_draft {
	my $self = shift;
	my $draft;

	foreach my $attr_name (sort keys %{ $self->{attrs} }) {
		next unless defined(my $v = $self->{attrs}->{$attr_name});
		$draft .= $attr_name . '=' . $v . "\n";
	}

	if ($self->{kwords}) {
		$draft .= '%VECTOR';
		while (my ($key, $value) = each %{ $self->{kwords} }) {
			$draft .= "\t$key\t$value";
		}
		$draft .= "\n";
	}

	if (defined($self->{score}) && $self->{score} >= 0) {
	    $draft .= "%SCORE\t" . $self->{score} . "\n";
	}

	$draft .= "\n";

	$draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
	$draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});

	return $draft;
}


=head2 delete

Empty document object

  $doc->delete;

This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
convinience. Document objects which go out of scope will be destroyed
automatically.

=cut

sub delete {
	my $self = shift;

	foreach my $data (qw/attrs dtexts stexts kwords/) {
		delete($self->{$data});
	}

	$self->{id} = -1;

	return 1;
}



package Search::Estraier::Condition;

use Carp qw/carp confess croak/;

use Search::Estraier;
our @ISA = qw/Search::Estraier/;

=head1 Search::Estraier::Condition

=head2 new

  my $cond = new Search::HyperEstraier::Condition;

=cut

sub new {
	my $class = shift;
	my $self = {};
	bless($self, $class);

	$self->{max} = -1;
	$self->{options} = 0;

	$self ? return $self : return undef;
}


=head2 set_phrase

  $cond->set_phrase('search phrase');

=cut

sub set_phrase {
	my $self = shift;
	$self->{phrase} = $self->_s( shift );
}


=head2 add_attr

  $cond->add_attr('@URI STRINC /~dpavlin/');

=cut

sub add_attr {
	my $self = shift;
	my $attr = shift || return;
	push @{ $self->{attrs} }, $self->_s( $attr );
}


=head2 set_order

  $cond->set_order('@mdate NUMD');

=cut

sub set_order {
	my $self = shift;
	$self->{order} = shift;
}


=head2 set_max

  $cond->set_max(42);

=cut

sub set_max {
	my $self = shift;
	my $max = shift;
	croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
	$self->{max} = $max;
}


=head2 set_options

  $cond->set_options( 'SURE' );

  $cond->set_options( qw/AGITO NOIDF SIMPLE/ );

Possible options are:

=over 8

=item SURE

check every N-gram

=item USUAL

check every second N-gram

=item FAST

check every third N-gram

=item AGITO

check every fourth N-gram

=item NOIDF

don't perform TF-IDF tuning

=item SIMPLE

use simplified query phrase

=back

Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
options;

This option changed in version C<0.04> of this module. It's backwards compatibile.

=cut

my $options = {
	SURE => 1 << 0,
	USUAL => 1 << 1,
	FAST => 1 << 2,
	AGITO => 1 << 3,
	NOIDF => 1 << 4,
	SIMPLE => 1 << 10,
};

sub set_options {
	my $self = shift;
	my $opt = 0;
	foreach my $option (@_) {
		my $mask;
		unless ($mask = $options->{$option}) {
			if ($option eq '1') {
				next;
			} else {
				croak "unknown option $option";
			}
		}
		$opt += $mask;
	}
	$self->{options} = $opt;
}


=head2 phrase

Return search phrase.

  print $cond->phrase;

=cut

sub phrase {
	my $self = shift;
	return $self->{phrase};
}


=head2 order

Return search result order.

  print $cond->order;

=cut

sub order {
	my $self = shift;
	return $self->{order};
}


=head2 attrs

Return search result attrs.

  my @cond_attrs = $cond->attrs;

=cut

sub attrs {
	my $self = shift;
	#croak "attrs return array, not scalar" if (! wantarray);
	return @{ $self->{attrs} } if ($self->{attrs});
}


=head2 max

Return maximum number of results.

  print $cond->max;

C<-1> is returned for unitialized value, C<0> is unlimited.

=cut

sub max {
	my $self = shift;
	return $self->{max};
}


=head2 options

Return options for this condition.

  print $cond->options;

Options are returned in numerical form.

=cut

sub options {
	my $self = shift;
	return $self->{options};
}


=head2 set_skip

Set number of skipped documents from beginning of results

  $cond->set_skip(42);

Similar to C<offset> in RDBMS.

=cut

sub set_skip {
	my $self = shift;
	$self->{skip} = shift;
}

=head2 skip

Return skip for this condition.

  print $cond->skip;

=cut

sub skip {
	my $self = shift;
	return $self->{skip};
}


=head2 set_distinct

  $cond->set_distinct('@author');

=cut

sub set_distinct {
	my $self = shift;
	$self->{distinct} = shift;
}

=head2 distinct

Return distinct attribute

  print $cond->distinct;

=cut

sub distinct {
	my $self = shift;
	return $self->{distinct};
}

=head2 set_mask

Filter out some links when searching.

Argument array of link numbers, starting with 0 (current node).

  $cond->set_mask(qw/0 1 4/);

=cut

sub set_mask {
	my $self = shift;
	return unless (@_);
	$self->{mask} = \@_;
}


package Search::Estraier::ResultDocument;

use Carp qw/croak/;

#use Search::Estraier;
#our @ISA = qw/Search::Estraier/;

=head1 Search::Estraier::ResultDocument

=head2 new

  my $rdoc = new Search::HyperEstraier::ResultDocument(
  	uri => 'http://localhost/document/uri/42',
	attrs => {
		foo => 1,
		bar => 2,
	},
	snippet => 'this is a text of snippet'
	keywords => 'this\tare\tkeywords'
  );

=cut

sub new {
	my $class = shift;
	my $self = {@_};
	bless($self, $class);

	croak "missing uri for ResultDocument" unless defined($self->{uri});

	$self ? return $self : return undef;
}


=head2 uri

Return URI of result document

  print $rdoc->uri;

=cut

sub uri {
	my $self = shift;
	return $self->{uri};
}


=head2 attr_names

Returns array with attribute names from result document object.

  my @attrs = $rdoc->attr_names;

=cut

sub attr_names {
	my $self = shift;
	croak "attr_names return array, not scalar" if (! wantarray);
	return sort keys %{ $self->{attrs} };
}


=head2 attr

Returns value of an attribute.

  my $value = $rdoc->attr( 'attribute' );

=cut

sub attr {
	my $self = shift;
	my $name = shift || return;
	return $self->{attrs}->{ $name };
}


=head2 snippet

Return snippet from result document

  print $rdoc->snippet;

=cut

sub snippet {
	my $self = shift;
	return $self->{snippet};
}


=head2 keywords

Return keywords from result document

  print $rdoc->keywords;

=cut

sub keywords {
	my $self = shift;
	return $self->{keywords};
}


package Search::Estraier::NodeResult;

use Carp qw/croak/;

#use Search::Estraier;
#our @ISA = qw/Search::Estraier/;

=head1 Search::Estraier::NodeResult

=head2 new

  my $res = new Search::HyperEstraier::NodeResult(
  	docs => @array_of_rdocs,
	hits => %hash_with_hints,
  );

=cut

sub new {
	my $class = shift;
	my $self = {@_};
	bless($self, $class);

	foreach my $f (qw/docs hints/) {
		croak "missing $f for ResultDocument" unless defined($self->{$f});
	}

	$self ? return $self : return undef;
}


=head2 doc_num

Return number of documents

  print $res->doc_num;

This will return real number of documents (limited by C<max>).
If you want to get total number of hits, see C<hits>.

=cut

sub doc_num {
	my $self = shift;
	return $#{$self->{docs}} + 1;
}


=head2 get_doc

Return single document

  my $doc = $res->get_doc( 42 );

Returns undef if document doesn't exist.

=cut

sub get_doc {
	my $self = shift;
	my $num = shift;
	croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
	return undef if ($num < 0 || $num > $self->{docs});
	return $self->{docs}->[$num];
}


=head2 hint

Return specific hint from results.

  print $res->hint( 'VERSION' );

Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
C<TIME>, C<LINK#n>, C<VIEW>.

=cut

sub hint {
	my $self = shift;
	my $key = shift || return;
	return $self->{hints}->{$key};
}

=head2 hints

More perlish version of C<hint>. This one returns hash.

  my %hints = $res->hints;

=cut

sub hints {
	my $self = shift;
	return $self->{hints};
}

=head2 hits

Syntaxtic sugar for total number of hits for this query

  print $res->hits;

It's same as

  print $res->hint('HIT');

but shorter.

=cut

sub hits {
	my $self = shift;
	return $self->{hints}->{'HIT'} || 0;
}

package Search::Estraier::Node;

use Carp qw/carp croak confess/;
use URI;
use MIME::Base64;
use IO::Socket::INET;
use URI::Escape qw/uri_escape/;

=head1 Search::Estraier::Node

=head2 new

  my $node = new Search::HyperEstraier::Node;

or optionally with C<url> as parametar

  my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );

or in more verbose form

  my $node = new Search::HyperEstraier::Node(
  	url => 'http://localhost:1978/node/test',
	user => 'admin',
	passwd => 'admin'
	create => 1,
	label => 'optional node label',
	debug => 1,
	croak_on_error => 1
  );

with following arguments:

=over 4

=item url

URL to node

=item user

specify username for node server authentication

=item passwd

password for authentication

=item create

create node if it doesn't exists

=item label

optional label for new node if C<create> is used

=item debug

dumps a B<lot> of debugging output

=item croak_on_error

very helpful during development. It will croak on all errors instead of
silently returning C<-1> (which is convention of Hyper Estraier API in other
languages).

=back

=cut

sub new {
	my $class = shift;
	my $self = {
		pxport => -1,
		timeout => 0,	# this used to be -1
		wwidth => 480,
		hwidth => 96,
		awidth => 96,
		status => -1,
	};

	bless($self, $class);

	if ($#_ == 0) {
		$self->{url} = shift;
	} else {
		%$self = ( %$self, @_ );

		$self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});

		warn "## Node debug on\n" if ($self->{debug});
	}

	$self->{inform} = {
		dnum => -1,
		wnum => -1,
		size => -1.0,
	};

	if ($self->{create}) {
		if (! eval { $self->name } || $@) {
			my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
			croak "can't find node name in '$self->{url}'" unless ($name);
			my $label = $self->{label} || $name;
			$self->master(
				action => 'nodeadd',
				name => $name,
				label => $label,
			) || croak "can't create node $name ($label)";
		}
	}

	$self ? return $self : return undef;
}


=head2 set_url

Specify URL to node server

  $node->set_url('http://localhost:1978');

=cut

sub set_url {
	my $self = shift;
	$self->{url} = shift;
}


=head2 set_proxy

Specify proxy server to connect to node server

  $node->set_proxy('proxy.example.com', 8080);

=cut

sub set_proxy {
	my $self = shift;
	my ($host,$port) = @_;
	croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
	$self->{pxhost} = $host;
	$self->{pxport} = $port;
}


=head2 set_timeout

Specify timeout of connection in seconds

  $node->set_timeout( 15 );

=cut

sub set_timeout {
	my $self = shift;
	my $sec = shift;
	croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
	$self->{timeout} = $sec;
}


=head2 set_auth

Specify name and password for authentication to node server.

  $node->set_auth('clint','eastwood');

=cut

sub set_auth {
	my $self = shift;
	my ($login,$passwd) = @_;
	my $basic_auth = encode_base64( "$login:$passwd" );
	chomp($basic_auth);
	$self->{auth} = $basic_auth;
}


=head2 status

Return status code of last request.

  print $node->status;

C<-1> means connection failure.

=cut

sub status {
	my $self = shift;
	return $self->{status};
}


=head2 put_doc

Add a document

  $node->put_doc( $document_draft ) or die "can't add document";

Return true on success or false on failure.

=cut

sub put_doc {
	my $self = shift;
	my $doc = shift || return;
	return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
	if ($self->shuttle_url( $self->{url} . '/put_doc',
		'text/x-estraier-draft',
		$doc->dump_draft,
		undef
	) == 200) {
		$self->_clear_info;
		return 1;
	}
	return undef;
}


=head2 out_doc

Remove a document

  $node->out_doc( document_id ) or "can't remove document";

Return true on success or false on failture.

=cut

sub out_doc {
	my $self = shift;
	my $id = shift || return;
	return unless ($self->{url});
	croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
	if ($self->shuttle_url( $self->{url} . '/out_doc',
		'application/x-www-form-urlencoded',
		"id=$id",
		undef
	) == 200) {
		$self->_clear_info;
		return 1;
	}
	return undef;
}


=head2 out_doc_by_uri

Remove a registrated document using it's uri

  $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";

Return true on success or false on failture.

=cut

sub out_doc_by_uri {
	my $self = shift;
	my $uri = shift || return;
	return unless ($self->{url});
	if ($self->shuttle_url( $self->{url} . '/out_doc',
		'application/x-www-form-urlencoded',
		"uri=" . uri_escape($uri),
		undef
	) == 200) {
		$self->_clear_info;
		return 1;
	}
	return undef;
}


=head2 edit_doc

Edit attributes of a document

  $node->edit_doc( $document_draft ) or die "can't edit document";

Return true on success or false on failture.

=cut

sub edit_doc {
	my $self = shift;
	my $doc = shift || return;
	return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
	if ($self->shuttle_url( $self->{url} . '/edit_doc',
		'text/x-estraier-draft',
		$doc->dump_draft,
		undef
	) == 200) {
		$self->_clear_info;
		return 1;
	}
	return undef;
}


=head2 get_doc

Retreive document

  my $doc = $node->get_doc( document_id ) or die "can't get document";

Return true on success or false on failture.

=cut

sub get_doc {
	my $self = shift;
	my $id = shift || return;
	return $self->_fetch_doc( id => $id );
}


=head2 get_doc_by_uri

Retreive document

  my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";

Return true on success or false on failture.

=cut

sub get_doc_by_uri {
	my $self = shift;
	my $uri = shift || return;
	return $self->_fetch_doc( uri => $uri );
}


=head2 get_doc_attr

Retrieve the value of an atribute from object

  my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
  	die "can't get document attribute";

=cut

sub get_doc_attr {
	my $self = shift;
	my ($id,$name) = @_;
	return unless ($id && $name);
	return $self->_fetch_doc( id => $id, attr => $name );
}


=head2 get_doc_attr_by_uri

Retrieve the value of an atribute from object

  my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
  	die "can't get document attribute";

=cut

sub get_doc_attr_by_uri {
	my $self = shift;
	my ($uri,$name) = @_;
	return unless ($uri && $name);
	return $self->_fetch_doc( uri => $uri, attr => $name );
}


=head2 etch_doc

Exctract document keywords

  my $keywords = $node->etch_doc( document_id ) or die "can't etch document";

=cut

sub etch_doc {
	my $self = shift;
	my $id = shift || return;
	return $self->_fetch_doc( id => $id, etch => 1 );
}

=head2 etch_doc_by_uri

Retreive document

  my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";

Return true on success or false on failture.

=cut

sub etch_doc_by_uri {
	my $self = shift;
	my $uri = shift || return;
	return $self->_fetch_doc( uri => $uri, etch => 1 );
}


=head2 uri_to_id

Get ID of document specified by URI

  my $id = $node->uri_to_id( 'file:///document/uri/42' );

This method won't croak, even if using C<croak_on_error>.

=cut

sub uri_to_id {
	my $self = shift;
	my $uri = shift || return;
	return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
}


=head2 _fetch_doc

Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
C<etch_doc>, C<etch_doc_by_uri>.

 # this will decode received draft into Search::Estraier::Document object
 my $doc = $node->_fetch_doc( id => 42 );
 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );

 # to extract keywords, add etch
 my $doc = $node->_fetch_doc( id => 42, etch => 1 );
 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );

 # to get document attrubute add attr
 my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );

 # more general form which allows implementation of
 # uri_to_id
 my $id = $node->_fetch_doc(
 	uri => 'file:///document/uri/42',
	path => '/uri_to_id',
	chomp_resbody => 1
 );

=cut

sub _fetch_doc {
	my $self = shift;
	my $a = {@_};
	return unless ( ($a->{id} || $a->{uri}) && $self->{url} );

	my ($arg, $resbody);

	my $path = $a->{path} || '/get_doc';
	$path = '/etch_doc' if ($a->{etch});

	if ($a->{id}) {
		croak "id must be number not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
		$arg = 'id=' . $a->{id};
	} elsif ($a->{uri}) {
		$arg = 'uri=' . uri_escape($a->{uri});
	} else {
		confess "unhandled argument. Need id or uri.";
	}

	if ($a->{attr}) {
		$path = '/get_doc_attr';
		$arg .= '&attr=' . uri_escape($a->{attr});
		$a->{chomp_resbody} = 1;
	}

	my $rv = $self->shuttle_url( $self->{url} . $path,
		'application/x-www-form-urlencoded',
		$arg,
		\$resbody,
		$a->{croak_on_error},
	);

	return if ($rv != 200);

	if ($a->{etch}) {
		$self->{kwords} = {};
		return +{} unless ($resbody);
		foreach my $l (split(/\n/, $resbody)) {
			my ($k,$v) = split(/\t/, $l, 2);
			$self->{kwords}->{$k} = $v if ($v);
		}
		return $self->{kwords};
	} elsif ($a->{chomp_resbody}) {
		return unless (defined($resbody));
		chomp($resbody);
		return $resbody;
	} else {
		return new Search::Estraier::Document($resbody);
	}
}


=head2 name

  my $node_name = $node->name;

=cut

sub name {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{name});
	return $self->{inform}->{name};
}


=head2 label

  my $node_label = $node->label;

=cut

sub label {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{label});
	return $self->{inform}->{label};
}


=head2 doc_num

  my $documents_in_node = $node->doc_num;

=cut

sub doc_num {
	my $self = shift;
	$self->_set_info if ($self->{inform}->{dnum} < 0);
	return $self->{inform}->{dnum};
}


=head2 word_num

  my $words_in_node = $node->word_num;

=cut

sub word_num {
	my $self = shift;
	$self->_set_info if ($self->{inform}->{wnum} < 0);
	return $self->{inform}->{wnum};
}


=head2 size

  my $node_size = $node->size;

=cut

sub size {
	my $self = shift;
	$self->_set_info if ($self->{inform}->{size} < 0);
	return $self->{inform}->{size};
}


=head2 search

Search documents which match condition

  my $nres = $node->search( $cond, $depth );

C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
depth for meta search.

Function results C<Search::Estraier::NodeResult> object.

=cut

sub search {
	my $self = shift;
	my ($cond, $depth) = @_;
	return unless ($cond && defined($depth) && $self->{url});
	croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
	croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);

	my $resbody;

	my $rv = $self->shuttle_url( $self->{url} . '/search',
		'application/x-www-form-urlencoded',
		$self->cond_to_query( $cond, $depth ),
		\$resbody,
	);
	return if ($rv != 200);

	my @records 	= split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
	my $hintsText	= splice @records, 0, 2; # starts with empty record
	my $hints		= { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };

	# process records
	my $docs = [];
	foreach my $record (@records)
	{
		# split into keys and snippets
		my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;

		# create document hash
		my $doc				= { $keys =~ m/^(.*?)=(.*?)$/gsm };
		$doc->{'@keywords'}	= $doc->{keywords};
		($doc->{keywords})	= $keys =~ m/^%VECTOR\t(.*?)$/gm;
		$doc->{snippet}		= $snippet;

		push @$docs, new Search::Estraier::ResultDocument(
			attrs 		=> $doc,
			uri 		=> $doc->{'@uri'},
			snippet 	=> $snippet,
			keywords 	=> $doc->{'keywords'},
		);
	}

	return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
}


=head2 cond_to_query

Return URI encoded string generated from Search::Estraier::Condition

  my $args = $node->cond_to_query( $cond, $depth );

=cut

sub cond_to_query {
	my $self = shift;

	my $cond = shift || return;
	croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
	my $depth = shift;

	my @args;

	if (my $phrase = $cond->phrase) {
		push @args, 'phrase=' . uri_escape($phrase);
	}

	if (my @attrs = $cond->attrs) {
		for my $i ( 0 .. $#attrs ) {
			push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
		}
	}

	if (my $order = $cond->order) {
		push @args, 'order=' . uri_escape($order);
	}
		
	if (my $max = $cond->max) {
		push @args, 'max=' . $max;
	} else {
		push @args, 'max=' . (1 << 30);
	}

	if (my $options = $cond->options) {
		push @args, 'options=' . $options;
	}

	push @args, 'depth=' . $depth if ($depth);
	push @args, 'wwidth=' . $self->{wwidth};
	push @args, 'hwidth=' . $self->{hwidth};
	push @args, 'awidth=' . $self->{awidth};
	push @args, 'skip=' . $cond->{skip} if ($cond->{skip});

	if (my $distinct = $cond->distinct) {
		push @args, 'distinct=' . uri_escape($distinct);
	}

	if ($cond->{mask}) {
		my $mask = 0;
		map { $mask += ( 2 ** $_ ) } @{ $cond->{mask} };

		push @args, 'mask=' . $mask if ($mask);
	}

	return join('&', @args);
}


=head2 shuttle_url

This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
master.

  my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );

C<$resheads> and C<$resbody> booleans controll if response headers and/or response
body will be saved within object.

=cut

use LWP::UserAgent;

sub shuttle_url {
	my $self = shift;

	my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;

	$croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);

	$self->{status} = -1;

	warn "## $url\n" if ($self->{debug});

	$url = new URI($url);
	if (
			!$url || !$url->scheme || !$url->scheme eq 'http' ||
			!$url->host || !$url->port || $url->port < 1
		) {
		carp "can't parse $url\n";
		return -1;
	}

	my $ua = LWP::UserAgent->new;
	$ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );

	my $req;
	if ($reqbody) {
		$req = HTTP::Request->new(POST => $url);
	} else {
		$req = HTTP::Request->new(GET => $url);
	}

	$req->headers->header( 'Host' => $url->host . ":" . $url->port );
	$req->headers->header( 'Connection', 'close' );
	$req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
	$req->content_type( $content_type );

	warn $req->headers->as_string,"\n" if ($self->{debug});

	if ($reqbody) {
		warn "$reqbody\n" if ($self->{debug});
		$req->content( $reqbody );
	}

	my $res = $ua->request($req) || croak "can't make request to $url: $!";

	warn "## response status: ",$res->status_line,"\n" if ($self->{debug});

	($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);

	if (! $res->is_success) {
		if ($croak_on_error) {
			croak("can't get $url: ",$res->status_line);
		} else {
			return -1;
		}
	}

	$$resbody .= $res->content;

	warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});

	return $self->{status};
}


=head2 set_snippet_width

Set width of snippets in results

  $node->set_snippet_width( $wwidth, $hwidth, $awidth );

C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
is not sent with results. If it is negative, whole document text is sent instead of snippet.

C<$hwidth> specified width of strings from beginning of string. Default
value is C<96>. Negative or zero value keep previous value.

C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
If negative of zero value is provided previous value is kept unchanged.

=cut

sub set_snippet_width {
	my $self = shift;

	my ($wwidth, $hwidth, $awidth) = @_;
	$self->{wwidth} = $wwidth;
	$self->{hwidth} = $hwidth if ($hwidth >= 0);
	$self->{awidth} = $awidth if ($awidth >= 0);
}


=head2 set_user

Manage users of node

  $node->set_user( 'name', $mode );

C<$mode> can be one of:

=over 4

=item 0

delete account

=item 1

set administrative right for user

=item 2

set user account as guest

=back

Return true on success, otherwise false.

=cut

sub set_user {
	my $self = shift;
	my ($name, $mode) = @_;

	return unless ($self->{url});
	croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);

	$self->shuttle_url( $self->{url} . '/_set_user',
		'application/x-www-form-urlencoded',
		'name=' . uri_escape($name) . '&mode=' . $mode,
		undef
	) == 200;
}


=head2 set_link

Manage node links

  $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);

If C<$credit> is negative, link is removed.

=cut

sub set_link {
	my $self = shift;
	my ($url, $label, $credit) = @_;

	return unless ($self->{url});
	croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);

	my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
	$reqbody .= '&credit=' . $credit if ($credit > 0);

	if ($self->shuttle_url( $self->{url} . '/_set_link',
		'application/x-www-form-urlencoded',
		$reqbody,
		undef
	) == 200) {
		# refresh node info after adding link
		$self->_clear_info;
		return 1;
	}
	return undef;
}

=head2 admins

 my @admins = @{ $node->admins };

Return array of users with admin rights on node

=cut

sub admins {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{name});
	return $self->{inform}->{admins};
}

=head2 guests

 my @guests = @{ $node->guests };

Return array of users with guest rights on node

=cut

sub guests {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{name});
	return $self->{inform}->{guests};
}

=head2 links

 my $links = @{ $node->links };

Return array of links for this node

=cut

sub links {
	my $self = shift;
	$self->_set_info unless ($self->{inform}->{name});
	return $self->{inform}->{links};
}

=head2 cacheusage

Return cache usage for a node

  my $cache = $node->cacheusage;

=cut

sub cacheusage {
	my $self = shift;

	return unless ($self->{url});

	my $resbody;
	my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
		'text/plain',
		undef,
		\$resbody,
	);

	return if ($rv != 200 || !$resbody);

	return $resbody;
}

=head2 master

Set actions on Hyper Estraier node master (C<estmaster> process)

  $node->master(
  	action => 'sync'
  );

All available actions are documented in
L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>

=cut

my $estmaster_rest = {
	shutdown => {
		status => 202,
	},
	sync => {
		status => 202,
	},
	backup => {
		status => 202,
	},
	userlist => {
		status => 200,
		returns => [ qw/name passwd flags fname misc/ ],
	},
	useradd => {
		required => [ qw/name passwd flags/ ],
		optional => [ qw/fname misc/ ],
		status => 200,
	},
	userdel => {
		required => [ qw/name/ ],
		status => 200,
	},
	nodelist => {
		status => 200,
		returns => [ qw/name label doc_num word_num size/ ],
	},
	nodeadd => {
		required => [ qw/name/ ],
		optional => [ qw/label/ ],
		status => 200,
	},
	nodedel => {
		required => [ qw/name/ ],
		status => 200,
	},
	nodeclr => {
		required => [ qw/name/ ],
		status => 200,
	},
	nodertt => {
		status => 200,	
	},
};

sub master {
	my $self = shift;

	my $args = {@_};

	# have action?
	my $action = $args->{action} || croak "need action, available: ",
		join(", ",keys %{ $estmaster_rest });

	# check if action is valid
	my $rest = $estmaster_rest->{$action};
	croak "action '$action' is not supported, available actions: ",
		join(", ",keys %{ $estmaster_rest }) unless ($rest);

	croak "BUG: action '$action' needs return status" unless ($rest->{status});

	my @args;

	if ($rest->{required} || $rest->{optional}) {

		map {
			croak "need parametar '$_' for action '$action'" unless ($args->{$_});
			push @args, $_ . '=' . uri_escape( $args->{$_} );
		} ( @{ $rest->{required} } );

		map {
			push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
		} ( @{ $rest->{optional} } );

	}

	my $uri = new URI( $self->{url} );

	my $resbody;

	my $status = $self->shuttle_url(
		'http://' . $uri->host_port . '/master?action=' . $action ,
		'application/x-www-form-urlencoded',
		join('&', @args),
		\$resbody,
		1,
	) or confess "shuttle_url failed";

	if ($status == $rest->{status}) {

		# refresh node info after sync
		$self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);

		if ($rest->{returns} && wantarray) {

			my @results;
			my $fields = $#{$rest->{returns}};

			foreach my $line ( split(/[\r\n]/,$resbody) ) {
				my @e = split(/\t/, $line, $fields + 1);
				my $row;
				foreach my $i ( 0 .. $fields) {
					$row->{ $rest->{returns}->[$i] } = $e[ $i ];
				}
				push @results, $row;
			}

			return @results;

		} elsif ($resbody) {
			chomp $resbody;
			return $resbody;
		} else {
			return 0E0;
		}
	}

	carp "expected status $rest->{status}, but got $status";
	return undef;
}

=head1 PRIVATE METHODS

You could call those directly, but you don't have to. I hope.

=head2 _set_info

Set information for node

  $node->_set_info;

=cut

sub _set_info {
	my $self = shift;

	$self->{status} = -1;
	return unless ($self->{url});

	my $resbody;
	my $rv = $self->shuttle_url( $self->{url} . '/inform',
		'text/plain',
		undef,
		\$resbody,
	);

	return if ($rv != 200 || !$resbody);

	my @lines = split(/[\r\n]/,$resbody);

	$self->_clear_info;

	( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
		$self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);

	return $resbody unless (@lines);

	shift @lines;

	while(my $admin = shift @lines) {
		push @{$self->{inform}->{admins}}, $admin;
	}

	while(my $guest = shift @lines) {
		push @{$self->{inform}->{guests}}, $guest;
	}

	while(my $link = shift @lines) {
		push @{$self->{inform}->{links}}, $link;
	}

	return $resbody;

}

=head2 _clear_info

Clear information for node

  $node->_clear_info;

On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
info will be fetch again from Hyper Estraier.

=cut
sub _clear_info {
	my $self = shift;
	$self->{inform} = {
		dnum => -1,
		wnum => -1,
		size => -1.0,
	};
}

###

=head1 EXPORT

Nothing.

=head1 SEE ALSO

L<http://hyperestraier.sourceforge.net/>

Hyper Estraier Ruby interface on which this module is based.

Hyper Estraier now also has pure-perl binding included in distribution. It's
a faster way to access databases directly if you are not running
C<estmaster> P2P server.

=head1 AUTHOR

Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>

Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2005-2006 by Dobrica Pavlinusic

This library is free software; you can redistribute it and/or modify
it under the GPL v2 or later.

=cut

1;