@@ -220,7 +220,6 @@ my $builder = Module::Build->new(
'Compress::Zlib' => 0,
'Lingua::Stem::Snowball' => 0.94,
'Lingua::StopWords' => 0.02,
- 'Clone' => 0.18,
},
build_requires => {
'ExtUtils::CBuilder' => 0,
@@ -1,5 +1,22 @@
Revision history for KinoSearch
+0.161 2007-09-16
+ * Update ppport.h.
+ * Fix copyright dates.
+
+0.16 2007-09-16
+ * Remove dependency on Clone.
+ * Improve compatibility with blead perl 5.10.
+ * Fix underflow bug in PhraseScorer (isolated by Matthew O'Connor, patch
+ by Nathan Kurz).
+ * Extend QueryParser language to include 'field:(foo bar)'
+ (Matthew O'Connor).
+ * Fix occasional crash in Highlighter (reported by Henry Combrinck).
+ * Fix PhraseQuery crashes with empty indexes / nonexistent fields
+ (reported by Dmitri Tikhonov, Mike Andrews).
+ * Fix a memory leak in BitVector (Matthew Berk).
+ * Attempt to fix locking test failures reported by Andreas Koenig.
+
0.15 2006-12-04
* Remove dead lock files when possible (with a warning), rather than failing
outright. (Credit to Matthew O'Connor, Luke Closs, Socialtext for
@@ -1,6 +1,6 @@
---
name: KinoSearch
-version: 0.15
+version: 0.161
author:
- 'Marvin Humphrey <marvin at rectangular dot com>'
abstract: search engine library
@@ -8,7 +8,6 @@ license: perl
resources:
license: http://dev.perl.org/licenses/
requires:
- Clone: 0.18
Compress::Zlib: 0
Lingua::Stem::Snowball: 0.94
Lingua::StopWords: 0.02
@@ -18,7 +17,7 @@ build_requires:
provides:
KinoSearch:
file: lib/KinoSearch.pm
- version: 0.15
+ version: 0.161
KinoSearch::Analysis::Analyzer:
file: lib/KinoSearch/Analysis/Analyzer.pm
KinoSearch::Analysis::LCNormalizer:
@@ -209,7 +208,7 @@ provides:
file: lib/KinoSearch/Util/ToolSet.pm
KinoSearch::Util::VerifyArgs:
file: lib/KinoSearch/Util/VerifyArgs.pm
-generated_by: Module::Build version 0.2805
+generated_by: Module::Build version 0.2808
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.2.html
version: 1.2
@@ -18,7 +18,7 @@ KinoSearch.xs gets written on the fly when 'perl Makefile.PL' is invoked.
COPYRIGHT AND LICENCE
-Copyright (C) 2005-2006 Marvin Humphrey
+Copyright (C) 2005-2007 Marvin Humphrey
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
\ No newline at end of file
@@ -47,11 +47,11 @@ or a new one.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -49,11 +49,11 @@ though it's a no-op for now.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -120,11 +120,11 @@ sequence should be: normalize, tokenize, stopalize, stem.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -77,11 +77,11 @@ be an ISO two-letter code that Lingua::Stem::Snowball understands.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -170,11 +170,11 @@ L<Lingua::StopWords|Lingua::StopWords>
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -106,11 +106,11 @@ match for the query '"three blind mice"'.
=head1 COPYRIGHT
-Copyright 2006 Marvin Humphrey
+Copyright 2006-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -669,11 +669,11 @@ Set/get the position increment of the current Token.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -146,10 +146,10 @@ B<token_re> - must be a pre-compiled regular expression matching one token.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -287,11 +287,11 @@ each "paragraph".
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -231,9 +231,9 @@ L<KinoSearch::Docs::DevGuide|KinoSearch::Docs::DevGuide> for a discussion.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
@@ -165,7 +165,7 @@ L<HTML::Parser|HTML::Parser>.
|;
}
- $q =~ s/"/"/g;
+ $q = CGI::escapeHTML($q);
# display info about the number of hits, paging links
my $total_hits = $hits->total_hits;
@@ -282,9 +282,9 @@ L<HTML::Parser|HTML::Parser>.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
@@ -98,10 +98,10 @@ other documents.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -47,6 +47,8 @@ use KinoSearch::Index::FieldsReader;
use KinoSearch::Index::FieldInfos;
use KinoSearch::Index::TermVector;
+use Storable qw( dclone );
+
sub init_instance {
my $self = shift;
@@ -61,6 +63,11 @@ sub init_instance {
}
}
+sub clone {
+ my $self = shift;
+ return dclone($self);
+}
+
# Given two Field objects, return a child which has all the positive
# attributes of both parents (meaning: values are OR'd).
sub breed_with {
@@ -277,11 +284,11 @@ Fields can only be defined or manipulated indirectly, via InvIndexer and Doc.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -37,10 +37,10 @@ KinoSearch::Highlight::SimpleHTMLEncoder.
=head1 COPYRIGHT
-Copyright 2006 Marvin Humphrey
+Copyright 2006-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -38,10 +38,10 @@ etc.
=head1 COPYRIGHT
-Copyright 2006 Marvin Humphrey
+Copyright 2006-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -12,7 +12,7 @@ BEGIN {
analyzer => undef,
formatter => undef,
encoder => undef,
- terms => [],
+ terms => undef,
excerpt_length => 200,
pre_tag => undef, # back compat
post_tag => undef, # back compat
@@ -31,6 +31,7 @@ sub init_instance {
my $self = shift;
croak("Missing required arg 'excerpt_field'")
unless defined $self->{excerpt_field};
+ $self->{terms} = [];
# assume HTML
if ( !defined $self->{encoder} ) {
@@ -260,9 +261,15 @@ TERM: for my $term ( @{ $self->{terms} } ) {
}
}
- # sort and return
- @posits = sort { $a->[0] <=> $b->[0] } @posits;
- return \@posits;
+ # sort, uniquify and return
+ @posits = sort { $a->[0] <=> $b->[0] || $b->[1] <=> $a->[1] } @posits;
+ my @unique;
+ my $last = ~0;
+ for (@posits) {
+ push @unique, $_ if $_->[0] != $last;
+ $last = $_->[0];
+ }
+ return \@unique;
}
=for comment
@@ -380,10 +387,10 @@ B<post_tag> - deprecated.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -40,10 +40,10 @@ entities. Currently, this module takes a minimal approach, encoding only
=head1 COPYRIGHT
-Copyright 2006 Marvin Humphrey
+Copyright 2006-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -69,10 +69,10 @@ highlightable text.
=head1 COPYRIGHT
-Copyright 2006 Marvin Humphrey
+Copyright 2006-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -93,11 +93,11 @@ package.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -10,11 +10,16 @@ BEGIN {
invindex => undef,
filename => undef,
# members
- entries => {},
+ entries => undef,
merged => 0,
);
}
+sub init_instance {
+ my $self = shift;
+ $self->{entries} = {};
+}
+
# Add a file to the list of files-to-merge.
sub add_file {
my ( $self, $filename ) = @_;
@@ -78,11 +83,11 @@ original files are not deleted, so cleanup must be done externally.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -211,11 +211,11 @@ right away without having to reread the .del file.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -13,8 +13,8 @@ our @EXPORT_OK;
BEGIN {
__PACKAGE__->init_instance_vars(
# members
- by_name => {},
- by_num => [],
+ by_name => undef,
+ by_num => undef,
from_file => 0,
);
__PACKAGE__->ready_get_set(qw( from_file ));
@@ -27,7 +27,28 @@ BEGIN {
}
use KinoSearch::Document::Field;
-use Clone qw( clone );
+
+sub init_instance {
+ my $self = shift;
+ $self->{by_name} = {};
+ $self->{by_num} = [];
+}
+
+sub clone {
+ my $self = shift;
+ my $evil_twin = __PACKAGE__->new;
+ $evil_twin->{from_file} = $self->{from_file};
+ my @by_num;
+ my %by_name;
+ for my $finfo ( @{ $self->{by_num} } ) {
+ my $dupe = $finfo->clone;
+ push @by_num, $dupe;
+ $by_name{ $finfo->get_name } = $dupe;
+ }
+ $evil_twin->{by_num} = \@by_num;
+ $evil_twin->{by_name} = \%by_name;
+ return $evil_twin;
+}
# Add a user-supplied Field object to the collection.
sub add_field {
@@ -138,7 +159,7 @@ sub consolidate {
$infos->{$name} = $other_finfo->breed_with( $infos->{$name} );
}
else {
- $infos->{$name} = clone($other_finfo);
+ $infos->{$name} = $other_finfo->clone;
}
}
}
@@ -204,11 +225,11 @@ have this property.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -138,11 +138,11 @@ merging segments efficiently.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -107,11 +107,11 @@ field index files.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -66,11 +66,11 @@ similarly to the way InStream and OutStream abstract filehandle operations.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -230,11 +230,11 @@ IndexReader.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -7,10 +7,10 @@ use base qw( KinoSearch::Index::IndexReader );
BEGIN {
__PACKAGE__->init_instance_vars(
invindex => undef,
- sub_readers => [],
- starts => [],
+ sub_readers => undef,
+ starts => undef,
max_doc => 0,
- norms_cache => {},
+ norms_cache => undef,
);
}
@@ -24,6 +24,9 @@ use KinoSearch::Index::MultiTermDocs;
sub init_instance {
my $self = shift;
+ $self->{sub_readers} ||= [];
+ $self->{starts} ||= [];
+ $self->{norms_cache} ||= {};
$self->_init_sub_readers;
}
@@ -213,11 +216,11 @@ Multi-segment implementation of IndexReader.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -6,8 +6,8 @@ use base qw( KinoSearch::Index::TermDocs );
BEGIN {
__PACKAGE__->init_instance_vars(
- sub_readers => [],
- starts => [],
+ sub_readers => undef,
+ starts => undef,
);
}
our %instance_vars;
@@ -18,8 +18,10 @@ sub new {
my %args = ( %instance_vars, @_ );
# get a SegTermDocs for each segment
- my @sub_term_docs = map { $_->term_docs } @{ $args{sub_readers} };
- _init_child( $self, \@sub_term_docs, $args{starts} );
+ my $sub_readers = $args{sub_readers} || [];
+ my $starts = $args{starts} || [];
+ my @sub_term_docs = map { $_->term_docs } @$sub_readers;
+ _init_child( $self, \@sub_term_docs, $starts );
return $self;
}
@@ -350,11 +352,11 @@ Multi-segment implementation of KinoSearch::Index::TermDocs.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -62,11 +62,11 @@ id fields.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -436,11 +436,11 @@ positional data files, plus feeds data to TermInfosWriter.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -9,15 +9,21 @@ use constant FORMAT => -1;
BEGIN {
__PACKAGE__->init_instance_vars(
# members
- infos => {},
+ infos => undef,
counter => 0,
- version => ( time * 1000 ),
+ version => undef,
);
__PACKAGE__->ready_get_set(qw( counter ));
}
use Time::HiRes qw( time );
+sub init_instance {
+ my $self = shift;
+ $self->{infos} = {};
+ $self->{version} ||= int(time * 1000);
+}
+
# Add a SegInfo to the collection.
sub add_info {
my ( $self, $info ) = @_;
@@ -127,11 +133,11 @@ counter used to name new segments.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -18,7 +18,7 @@ BEGIN {
freq_stream => undef,
prox_stream => undef,
deldocs => undef,
- norms_readers => {},
+ norms_readers => undef,
);
__PACKAGE__->ready_get(
@@ -48,6 +48,7 @@ use KinoSearch::Index::DelDocs;
sub init_instance {
my $self = shift;
my ( $seg_name, $invindex ) = @{$self}{ 'seg_name', 'invindex' };
+ $self->{norms_readers} = {};
# initialize DelDocs
$self->{deldocs} = KinoSearch::Index::DelDocs->new(
@@ -218,11 +219,11 @@ Single-segment implementation of IndexReader.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -501,11 +501,11 @@ Single-segment implemetation of KinoSearch::Index::TermDocs.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -581,11 +581,11 @@ Single-segment implementation of KinoSearch::Index::TermEnum.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -12,7 +12,7 @@ BEGIN {
finfos => undef,
field_sims => undef,
# members
- norm_outstreams => [],
+ norm_outstreams => undef,
fields_writer => undef,
postings_writer => undef,
doc_count => 0,
@@ -29,10 +29,11 @@ use KinoSearch::Index::IndexFileNames
sub init_instance {
my $self = shift;
- my ( $invindex, $norm_outstreams, $seg_name, $finfos )
- = @{$self}{ 'invindex', 'norm_outstreams', 'seg_name', 'finfos' };
+ my ( $invindex, $seg_name, $finfos )
+ = @{$self}{ 'invindex', 'seg_name', 'finfos' };
# init norms
+ my $norm_outstreams = $self->{norm_outstreams} = [];
my @indexed_field_nums = map { $_->get_field_num }
grep { $_->get_indexed } $finfos->get_infos;
for my $field_num (@indexed_field_nums) {
@@ -269,11 +270,11 @@ its way to low-level writers such as FieldsWriter and TermInfosWriter.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -96,11 +96,11 @@ Returns a string representation of the Term object.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -187,11 +187,11 @@ buffer.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -326,11 +326,11 @@ term appears in the document.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -83,11 +83,11 @@ iterate through the array while loading as little as possible into memory.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -220,11 +220,11 @@ Constructor. All 5 arguments are required.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -133,11 +133,11 @@ index.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -340,11 +340,11 @@ Find the optimum TermIndexInterval.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -9,9 +9,9 @@ BEGIN {
# params / members
field => undef,
text => undef,
- positions => [],
- start_offsets => [],
- end_offsets => [],
+ positions => undef,
+ start_offsets => undef,
+ end_offsets => undef,
);
__PACKAGE__->ready_get_set(
qw(
@@ -24,6 +24,10 @@ BEGIN {
);
}
+sub init_instance {
+ my $self = shift;
+ $self->{$_} ||= [] for qw( positions start_offsets end_offsets );
+}
1;
__END__
@@ -42,11 +46,11 @@ Ancillary information about a Term.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -17,19 +17,20 @@ BEGIN {
# members
reader => undef,
- analyzers => {},
+ analyzers => undef,
sinfos => undef,
finfos => undef,
doc_template => undef,
+ frozen_doc => undef,
similarity => undef,
- field_sims => {},
+ field_sims => undef,
seg_writer => undef,
write_lock => undef,
state => UNINITIALIZED,
);
}
-use Clone qw( clone );
+use Storable qw( freeze thaw );
use File::Spec::Functions qw( catfile tmpdir );
use KinoSearch::Document::Doc;
@@ -48,6 +49,8 @@ use KinoSearch::Search::Similarity;
sub init_instance {
my $self = shift;
+ $self->{analyzers} = {};
+ $self->{field_sims} = {};
# use a no-op Analyzer if not supplied
$self->{analyzer} ||= KinoSearch::Analysis::Analyzer->new;
@@ -136,6 +139,7 @@ sub _delayed_init {
for my $field ( $doc->get_fields ) {
$field->set_field_num( $finfos->get_field_num( $field->get_name ) );
}
+ $self->{frozen_doc} = freeze($doc);
# set sim for each field
my $main_sim = $self->{similarity};
@@ -191,7 +195,7 @@ sub spec_field {
sub new_doc {
my $self = shift;
$self->_delayed_init unless $self->{state} == INITIALIZED;
- return clone( $self->{doc_template} );
+ return thaw( $self->{frozen_doc} );
}
sub set_similarity {
@@ -601,11 +605,11 @@ most compact form, which will yield the fastest queries.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -12,8 +12,8 @@ BEGIN {
default_field => undef, # back compat
fields => undef,
# members
- bool_groups => {},
- phrases => {},
+ bool_groups => undef,
+ phrases => undef,
bool_group_re => undef,
phrase_re => undef,
label_inc => 0,
@@ -29,6 +29,8 @@ use KinoSearch::Index::Term;
sub init_instance {
my $self = shift;
+ $self->{bool_groups} = {};
+ $self->{phrases} = {};
croak("default_boolop must be either 'AND' or 'OR'")
unless $self->{default_boolop} =~ /^(?:AND|OR)$/;
@@ -96,9 +98,9 @@ my $field_re = qr/^
/xsm;
sub parse {
- my ( $self, $qstring_orig ) = @_;
+ my ( $self, $qstring_orig, $default_fields ) = @_;
$qstring_orig = '' unless defined $qstring_orig;
- my $default_fields = $self->{fields};
+ $default_fields ||= $self->{fields};
my $default_boolop = $self->{default_boolop};
my @clauses;
@@ -158,7 +160,7 @@ sub parse {
elsif (s/$self->{bool_group_re}//) {
# parse boolean subqueries recursively
my $inner_text = delete $self->{bool_groups}{$1};
- my $query = $self->parse($inner_text);
+ my $query = $self->parse($inner_text, $fields);
push @clauses, { query => $query, occur => $occur };
}
# what's left is probably a term
@@ -331,6 +333,10 @@ Field-specific terms, in the form of C<fieldname:termtext>. (The field
specified by fieldname will be used instead of the QueryParser's default
fields).
+A field can also be given to a logical group, in which case it is the same as
+if the field had been prepended onto every term in the group. For example:
+C<foo:(bar baz)> is the same as C<foo:bar foo:baz>.
+
=back
=head1 METHODS
@@ -381,11 +387,11 @@ L<KinoSearch::Search::Query|KinoSearch::Search::Query>.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -52,11 +52,11 @@ A clause in a BooleanQuery.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -9,7 +9,7 @@ BEGIN {
# constructor args / members
disable_coord => 0,
# members
- clauses => [],
+ clauses => undef,
max_clause_count => 1024,
);
__PACKAGE__->ready_get(qw( clauses ));
@@ -17,6 +17,11 @@ BEGIN {
use KinoSearch::Search::BooleanClause;
+sub init_instance {
+ my $self = shift;
+ $self->{clauses} = [];
+}
+
# Add an subquery tagged with boolean characteristics.
sub add_clause {
my $self = shift;
@@ -57,20 +62,7 @@ sub create_weight {
}
-sub clone {
- my $self = shift;
-
- # remove then restore clauses in case some queries aren't clone-safe.
- my $clauses = delete $self->{clauses};
- my $evil_twin = Clone::clone($self);
- $self->{clauses} = $clauses;
-
- # clone each Clause in turn
- my @cloned_clauses = map { $_->clone } @$clauses;
- $evil_twin->{clauses} = \@cloned_clauses;
-
- return $evil_twin;
-}
+sub clone { shift->todo_death }
package KinoSearch::Search::BooleanWeight;
use strict;
@@ -81,7 +73,7 @@ use base qw( KinoSearch::Search::Weight );
BEGIN {
__PACKAGE__->init_instance_vars(
# members
- weights => [],
+ weights => undef,
);
}
@@ -89,6 +81,7 @@ use KinoSearch::Search::BooleanScorer;
sub init_instance {
my $self = shift;
+ $self->{weights} = [];
my ( $weights, $searcher ) = @{$self}{ 'weights', 'searcher' };
$self->{similarity} = $self->{parent}->get_similarity($searcher);
@@ -225,11 +218,11 @@ B<occur> - must be one of three possible values: 'SHOULD', 'MUST', or
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -381,11 +381,11 @@ Implementation of Scorer for BooleanQuery.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -76,9 +76,9 @@ Return the values of the Hit's constituent fields as a hashref.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
@@ -420,11 +420,11 @@ collector to "see" doc_num/score pairs which make it through the filter.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -105,11 +105,11 @@ The encoding algorithm is functionally equivalent to this:
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -187,11 +187,11 @@ L<KinoSearch::Highlight::Highlighter|KinoSearch::Highlight::Highlighter>.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -18,6 +18,7 @@ use KinoSearch::Search::Similarity;
sub init_instance {
my $self = shift;
+ $self->{field_sims} = {};
# derive max_doc, relative start offsets
my $max_doc = 0;
@@ -245,11 +246,11 @@ B<searchables> - a reference to an array of searchers.
=head1 COPYRIGHT
-Copyright 2006 Marvin Humphrey
+Copyright 2006-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -10,8 +10,8 @@ BEGIN {
slop => 0,
# members
field => undef,
- terms => [],
- positions => [],
+ terms => undef,
+ positions => undef,
);
__PACKAGE__->ready_get_set(qw( slop ));
__PACKAGE__->ready_get(qw( terms ));
@@ -21,6 +21,12 @@ use KinoSearch::Search::TermQuery;
use KinoSearch::Document::Field;
use KinoSearch::Util::ToStringUtils qw( boost_to_string );
+sub init_instance {
+ my $self = shift;
+ $self->{terms} = [];
+ $self->{positions} = [];
+}
+
# Add a term/position combo to the query. The position is specified
# explicitly in order to allow for phrases with gaps, two terms at the same
# position, etc.
@@ -100,10 +106,11 @@ sub scorer {
# look up each term
my @term_docs;
for my $term ( @{ $query->{terms} } ) {
- my $td = $reader->term_docs($term);
# bail if any one of the terms isn't in the index
- return unless defined $td;
+ return unless $reader->doc_freq($term);;
+
+ my $td = $reader->term_docs($term);
push @term_docs, $td;
# turn on positions
@@ -113,11 +120,12 @@ sub scorer {
# bail if there are no terms
return unless @term_docs;
+ my $norms_reader = $reader->norms_reader( $query->{field} );
return KinoSearch::Search::PhraseScorer->new(
weight => $self,
slop => $query->{slop},
similarity => $self->{similarity},
- norms_reader => $reader->norms_reader( $query->{field} ),
+ norms_reader => $norms_reader,
term_docs => \@term_docs,
phrase_offsets => $query->{positions},
);
@@ -163,11 +171,11 @@ L<KinoSearch::Index::Term|KinoSearch::Index::Term> object.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -8,8 +8,8 @@ BEGIN {
__PACKAGE__->init_instance_vars(
# constructor params
weight => undef,
- term_docs => [],
- phrase_offsets => [],
+ term_docs => undef,
+ phrase_offsets => undef,
norms_reader => undef,
slop => 0,
);
@@ -17,10 +17,10 @@ BEGIN {
our %instance_vars;
sub new {
- my $self = shift->SUPER::new;
+ my $either = shift;
confess kerror() unless verify_args( \%instance_vars, @_ );
my %args = ( %instance_vars, @_ );
-
+ my $self = $either->SUPER::new;
$self->_init_child;
# set/derive some member vars
@@ -281,7 +281,6 @@ Kino_PhraseScorer_calc_phrase_freq(Scorer *scorer) {
U32 *new_anchors;
U32 *candidates;
U32 *candidates_end;
- U32 target;
U32 phrase_offset;
U32 i;
STRLEN len;
@@ -314,6 +313,24 @@ Kino_PhraseScorer_calc_phrase_freq(Scorer *scorer) {
= (U32*)SvEND( term_docs[i]->get_positions(term_docs[i]) );
while (anchors < anchors_end) {
+ U32 target;
+
+ /* Discard positions that occur too early in the field to match as
+ * a part of the phrase. For example, if the field begins with
+ * "The ants go marching one by one", that initial "the" cannot
+ * match as the second term in a phrase search for
+ * "fight the power".
+ */
+ target = phrase_offset;
+ while (candidates < candidates_end && *candidates < target) {
+ candidates++;
+ }
+ if (candidates == candidates_end)
+ break;
+
+ /* Discard partial matches which seemed promising earlier but
+ * which fail on this go-round.
+ */
target = *candidates - phrase_offset;
while (anchors < anchors_end && *anchors < target) {
anchors++;
@@ -321,14 +338,19 @@ Kino_PhraseScorer_calc_phrase_freq(Scorer *scorer) {
if (anchors == anchors_end)
break;
+ /* Blast past any positions for the current term which are too low
+ * for the partial phrase matched in earlier iters.
+ */
target = *anchors + phrase_offset;
while (candidates < candidates_end && *candidates < target) {
candidates++;
}
if (candidates == candidates_end)
break;
- if (*candidates == *anchors + phrase_offset) {
- /* the anchor has made it through another elimination round */
+
+ /* Does the current position fall into the slot? */
+ if (*candidates == target) {
+ /* The anchor has made it through another elimination round. */
*new_anchors = *anchors;
new_anchors++;
}
@@ -400,11 +422,11 @@ Score phrases.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -148,11 +148,11 @@ Here's another way of looking at the divided responsibilities:
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -84,10 +84,10 @@ L<KinoSearch::Search::Query|KinoSearch::Search::Query>.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -282,11 +282,11 @@ further processing, typically by a HitCollector.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -23,6 +23,7 @@ sub init_instance {
my $self = shift;
$self->{similarity} ||= KinoSearch::Search::Similarity->new;
+ $self->{field_sims} = {};
# establish a connection
my $sock = IO::Socket::INET->new(
@@ -195,11 +196,11 @@ Limiting search results with a QueryFilter is not yet supported.
=head1 COPYRIGHT
-Copyright 2006 Marvin Humphrey
+Copyright 2006-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -219,11 +219,11 @@ Open a listening socket on localhost and wait for SearchClients to connect.
=head1 COPYRIGHT
-Copyright 2006 Marvin Humphrey
+Copyright 2006-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -8,7 +8,7 @@ BEGIN {
__PACKAGE__->init_instance_vars(
# members
similarity => undef,
- field_sims => {},
+ field_sims => undef, # {}
);
}
@@ -138,11 +138,11 @@ subclass is KinoSearch::Searcher.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -357,11 +357,11 @@ Lucene scoring algorithm, which KinoSearch implements.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -134,11 +134,11 @@ B<term> - a L<KinoSearch::Index::Term>.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -374,11 +374,11 @@ Subclass of Scorer which scores individual Terms.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -91,11 +91,11 @@ are encapsulated within a class.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -30,6 +30,7 @@ sub init_instance {
$self->{analyzer} ||= KinoSearch::Analysis::Analyzer->new;
$self->{similarity} = KinoSearch::Search::Similarity->new;
+ $self->{field_sims} = {};
if ( !defined $self->{reader} ) {
# confirm or create an InvIndex object
@@ -250,8 +251,8 @@ process completes.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
@@ -192,10 +192,10 @@ invindex should be opened.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -86,11 +86,11 @@ L<KinoSearch::Store::Lock|KinoSearch::Store::Lock>.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -609,11 +609,11 @@ L<OutStream|KinoSearch::Store::OutStream>.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -176,11 +176,11 @@ L<KinoSearch::Docs::FileFormat|KinoSearch::Docs::FileFormat>
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -100,11 +100,11 @@ C<make_lock> factory method of KinoSearch::Store::InvIndex.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -587,11 +587,11 @@ TEMPLATE.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -7,7 +7,7 @@ use base qw( KinoSearch::Store::InvIndex );
BEGIN {
__PACKAGE__->init_instance_vars(
# members
- ramfiles => {},
+ ramfiles => undef,
);
}
@@ -19,6 +19,7 @@ use KinoSearch::Store::RAMLock;
sub init_instance {
my $self = shift;
+ $self->{ramfiles} = {};
# read in an FSInvIndex if specified
$self->_read_invindex if defined $self->{path};
@@ -143,10 +144,10 @@ location into memory.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.07.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -46,11 +46,11 @@ Implementation of KinoSearch::Store::Lock entirely in memory.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -183,7 +183,7 @@ PREINIT:
AV *out_av;
PPCODE:
out_av = Kino_BitVec_to_array(bit_vec);
- XPUSHs(newRV_noinc( (SV*)out_av ));
+ XPUSHs( sv_2mortal(newRV_noinc( (SV*)out_av )) );
XSRETURN(1);
@@ -604,11 +604,11 @@ Accessible from both C and Perl.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -219,11 +219,11 @@ size and capacity, so it can contain arbitrary binary data.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -83,11 +83,11 @@ KinoSearch::Util::Class.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -62,11 +62,11 @@ KinoSearch::Util::ToolSet;".
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -3,7 +3,6 @@ use strict;
use warnings;
use KinoSearch::Util::ToolSet;
-use Clone 'clone';
use KinoSearch::Util::VerifyArgs qw( verify_args kerror );
sub new {
@@ -27,12 +26,8 @@ sub new {
confess kerror() unless verify_args( $defaults, @_ );
}
- # merge var => val pairs into new object
- my $self = clone($defaults);
- bless $self, $class;
- %$self = ( %$self, @_ );
-
- # call customizable initialization routine
+ # merge var => val pairs into new object, call customizable init routine
+ my $self = bless { %$defaults, @_ }, $class;
$self->init_instance;
return $self;
@@ -59,6 +54,7 @@ sub init_instance_vars {
my $package = shift;
no strict 'refs';
+ no warnings 'once';
my $first_isa = ${ $package . '::ISA' }[0];
%{ $package . '::instance_vars' }
= ( %{ $first_isa . '::instance_vars' }, @_ );
@@ -190,7 +186,7 @@ as arguments to new().
__PACKAGE__->init_instance_vars(
# constructor params / members
foo => undef,
- bar => {},
+ bar => 10,
# members
baz => '',
);
@@ -208,8 +204,8 @@ as arguments to new().
boffo => $boffo,
);
-%instance_vars may contain hashrefs and array-refs, as L<Clone|Clone>'s
-C<clone()> method is used to produce a deep copy.
+%instance_vars may only contain scalar values, as the defaults are merged
+into the object using a shallow copy.
init_instance_vars() must be called from within a BEGIN block and before any
C<use> directives load a child class -- if children are born before their
@@ -223,9 +219,9 @@ A generic constructor with basic argument checking. new() expects hash-style
labeled parameters; the label names must be present in the %instance_vars
hash, or it will croak().
-After verifying the labeled parameters, new() creates a deep clone of
-%instance_vars, and merges in the labeled arguments. It then calls
-$self->init_instance() before returning the blessed reference.
+After verifying the labeled parameters, new() merges %instance_vars and @_
+into a new object. It then calls $self->init_instance() before returning the
+blessed reference.
=head2 init_instance
@@ -273,11 +269,11 @@ todo_death indicates a feature that might get implemented someday.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -84,11 +84,11 @@ is out of range or the number at the index is -1.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -97,11 +97,11 @@ in guaranteed Big-endian byte order.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -105,11 +105,11 @@ No Perl interface.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -454,11 +454,11 @@ default behavior is to compare the SvIV value of two scalars.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -829,11 +829,11 @@ External sorting implementation, using lexical comparison.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -85,11 +85,11 @@ String related utilities, e.g. string comparison functions.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -28,11 +28,11 @@ Provide functions which help with to_string.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -43,6 +43,7 @@ our @EXPORT = qw(
K_DEBUG
kdump
+ kerror
verify_args
a_isa_b
@@ -111,10 +112,10 @@ explanation.
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS, etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=cut
@@ -158,11 +158,11 @@ Provide some utility functions under the general heading of "verification".
=head1 COPYRIGHT
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
=head1 LICENSE, DISCLAIMER, BUGS etc.
-See L<KinoSearch|KinoSearch> version 0.15.
+See L<KinoSearch|KinoSearch> version 0.161.
=end devdocs
=cut
@@ -4,7 +4,7 @@ use warnings;
use 5.008003;
-our $VERSION = '0.15';
+our $VERSION = '0.161';
use constant K_DEBUG => 0;
@@ -66,7 +66,7 @@ KinoSearch - search engine library
=head1 VERSION
-0.15
+0.161
=head1 BACKWARDS COMPATIBILITY POLICY
@@ -255,7 +255,7 @@ L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=KinoSearch>.
=head1 COPYRIGHT & LICENSE
-Copyright 2005-2006 Marvin Humphrey
+Copyright 2005-2007 Marvin Humphrey
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
@@ -4,7 +4,7 @@
/*
----------------------------------------------------------------------
- ppport.h -- Perl/Pollution/Portability Version 3.10
+ ppport.h -- Perl/Pollution/Portability Version 3.11
Automatically created by Devel::PPPort running under perl 5.008006.
@@ -21,7 +21,7 @@ SKIP
=head1 NAME
-ppport.h - Perl/Pollution/Portability version 3.10
+ppport.h - Perl/Pollution/Portability version 3.11
=head1 SYNOPSIS
@@ -340,7 +340,7 @@ module from CPAN.
=head1 COPYRIGHT
-Version 3.x, Copyright (c) 2004-2006, Marcus Holland-Moritz.
+Version 3.x, Copyright (c) 2004-2007, Marcus Holland-Moritz.
Version 2.x, Copyright (C) 2001, Paul Marquess.
@@ -357,7 +357,7 @@ See L<Devel::PPPort>.
use strict;
-my $VERSION = 3.10;
+my $VERSION = 3.11;
my %opt = (
quiet => 0,
@@ -492,7 +492,7 @@ IVdf|5.006000||p
LEAVE|||
LVRET|||
MARK|||
-MULTICALL||5.009004|
+MULTICALL||5.009005|
MY_CXT_CLONE|5.009002||p
MY_CXT_INIT|5.007003||p
MY_CXT|5.007003||p
@@ -530,7 +530,7 @@ PAD_SET_CUR|||
PAD_SVl|||
PAD_SV|||
PERL_ABS|5.008001||p
-PERL_BCDVERSION|5.009004||p
+PERL_BCDVERSION|5.009005||p
PERL_GCC_BRACE_GROUPS_FORBIDDEN|5.008001||p
PERL_INT_MAX|5.004000||p
PERL_INT_MIN|5.004000||p
@@ -547,7 +547,7 @@ PERL_MAGIC_envelem|5.007002||p
PERL_MAGIC_env|5.007002||p
PERL_MAGIC_ext|5.007002||p
PERL_MAGIC_fm|5.007002||p
-PERL_MAGIC_glob|5.007002||p
+PERL_MAGIC_glob|5.009005||p
PERL_MAGIC_isaelem|5.007002||p
PERL_MAGIC_isa|5.007002||p
PERL_MAGIC_mutex|5.007002||p
@@ -602,9 +602,10 @@ PERL_USE_GCC_BRACE_GROUPS|5.009004||p
PERL_USHORT_MAX|5.004000||p
PERL_USHORT_MIN|5.004000||p
PERL_VERSION|5.006000||p
+PL_DBsignal|5.005000||p
PL_DBsingle|||pn
PL_DBsub|||pn
-PL_DBtrace|||n
+PL_DBtrace|||pn
PL_Sv|5.005000||p
PL_compiling|5.004050||p
PL_copline|5.005000||p
@@ -619,6 +620,7 @@ PL_errgv|5.004050||p
PL_hexdigit|5.005000||p
PL_hints|5.005000||p
PL_last_in_gv|||n
+PL_laststatval|5.005000||p
PL_modglobal||5.005000|n
PL_na|5.004050||pn
PL_no_modify|5.006000||p
@@ -632,6 +634,7 @@ PL_rs|||n
PL_signals|5.008001||p
PL_stack_base|5.004050||p
PL_stack_sp|5.004050||p
+PL_statcache|5.005000||p
PL_stdingv|5.004050||p
PL_sv_arenaroot|5.004050||p
PL_sv_no|5.004050||pn
@@ -639,7 +642,7 @@ PL_sv_undef|5.004050||pn
PL_sv_yes|5.004050||pn
PL_tainted|5.004050||p
PL_tainting|5.004050||p
-POP_MULTICALL||5.009004|
+POP_MULTICALL||5.009005|
POPi|||n
POPl|||n
POPn|||n
@@ -653,7 +656,7 @@ PTR2UV|5.006000||p
PTR2ul|5.007001||p
PTRV|5.006000||p
PUSHMARK|||
-PUSH_MULTICALL||5.009004|
+PUSH_MULTICALL||5.009005|
PUSHi|||
PUSHmortal|5.009002||p
PUSHn|||
@@ -917,6 +920,8 @@ _aMY_CXT|5.007003||p
_pMY_CXT|5.007003||p
aMY_CXT_|5.007003||p
aMY_CXT|5.007003||p
+aTHXR_|||p
+aTHXR|||p
aTHX_|5.006000||p
aTHX|5.006000||p
add_data|||n
@@ -1016,7 +1021,6 @@ ck_return|||
ck_rfun|||
ck_rvconst|||
ck_sassign|||
-ck_say|||
ck_select|||
ck_shift|||
ck_sort|||
@@ -1067,6 +1071,7 @@ dNOOP|5.006000||p
dORIGMARK|||
dSP|||
dTHR|5.004050||p
+dTHXR|||p
dTHXa|5.006000||p
dTHXoa|5.006000||p
dTHX|5.006000||p
@@ -1294,7 +1299,7 @@ he_dup|||
hek_dup|||
hfreeentries|||
hsplit|||
-hv_assert||5.009001|
+hv_assert||5.009005|
hv_auxinit|||n
hv_backreferences_p|||
hv_clear_placeholders||5.009001|
@@ -1739,7 +1744,7 @@ pad_peg|||n
pad_push|||
pad_reset|||
pad_setsv|||
-pad_sv||5.009004|
+pad_sv||5.009005|
pad_swipe|||
pad_tidy|||
pad_undef|||
@@ -1791,6 +1796,7 @@ re_croak2|||
re_dup|||
re_intuit_start||5.006000|
re_intuit_string||5.006000|
+readpipe_override|||
realloc||5.007002|n
reentrant_free|||
reentrant_init|||
@@ -1805,7 +1811,13 @@ refcounted_he_value|||
refkids|||
refto|||
ref||5.009003|
+reg_check_named_buff_matched|||
+reg_named_buff_sv|||
+reg_namedseq|||
reg_node|||
+reg_recode|||
+reg_scan_name|||
+reg_stringify|||
reganode|||
regatom|||
regbranch|||
@@ -1815,8 +1827,10 @@ regcppop|||
regcppush|||
regcurly|||n
regdump||5.005000|
+regdupe|||
regexec_flags||5.005000|
reghop3|||n
+reghop4|||n
reghopmaybe3|||n
reginclass|||
reginitcolors||5.006000|
@@ -1964,7 +1978,7 @@ stack_grow|||
start_force|||
start_glob|||
start_subparse||5.004000|
-stashpv_hvname_match||5.009004|
+stashpv_hvname_match||5.009005|
stdize_locale|||
strEQ|||
strGE|||
@@ -1973,7 +1987,6 @@ strLE|||
strLT|||
strNE|||
str_to_version||5.006000|
-stringify_regexp|||
strip_return|||
strnEQ|||
strnNE|||
@@ -2066,7 +2079,6 @@ sv_nosharing||5.007003|
sv_nounlocking|||
sv_nv||5.005000|
sv_peek||5.005000|
-sv_pos_b2u_forwards|||
sv_pos_b2u_midway|||
sv_pos_b2u||5.006000|
sv_pos_u2b_cached|||
@@ -3409,42 +3421,6 @@ __DATA__
# define Newxz(v,n,t) Newz(0,v,n,t)
#endif
-#if ((PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION <= 5)))
-/* Replace: 1 */
-# define PL_DBsingle DBsingle
-# define PL_DBsub DBsub
-# define PL_Sv Sv
-# define PL_compiling compiling
-# define PL_copline copline
-# define PL_curcop curcop
-# define PL_curstash curstash
-# define PL_debstash debstash
-# define PL_defgv defgv
-# define PL_diehook diehook
-# define PL_dirty dirty
-# define PL_dowarn dowarn
-# define PL_errgv errgv
-# define PL_hexdigit hexdigit
-# define PL_hints hints
-# define PL_na na
-# define PL_no_modify no_modify
-# define PL_perl_destruct_level perl_destruct_level
-# define PL_perldb perldb
-# define PL_ppaddr ppaddr
-# define PL_rsfp_filters rsfp_filters
-# define PL_rsfp rsfp
-# define PL_stack_base stack_base
-# define PL_stack_sp stack_sp
-# define PL_stdingv stdingv
-# define PL_sv_arenaroot sv_arenaroot
-# define PL_sv_no sv_no
-# define PL_sv_undef sv_undef
-# define PL_sv_yes sv_yes
-# define PL_tainted tainted
-# define PL_tainting tainting
-/* Replace: 0 */
-#endif
-
#ifndef PERL_UNUSED_DECL
# ifdef HASATTRIBUTE
# if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER)
@@ -3661,16 +3637,74 @@ typedef NVTYPE NV;
#define PERL_SIGNALS_UNSAFE_FLAG 0x0001
+#if ((PERL_VERSION < 8) || ((PERL_VERSION == 8) && (PERL_SUBVERSION < 0)))
+# define D_PPP_PERL_SIGNALS_INIT PERL_SIGNALS_UNSAFE_FLAG
+#else
+# define D_PPP_PERL_SIGNALS_INIT 0
+#endif
+
#if defined(NEED_PL_signals)
-static U32 DPPP_(my_PL_signals) = PERL_SIGNALS_UNSAFE_FLAG;
+static U32 DPPP_(my_PL_signals) = D_PPP_PERL_SIGNALS_INIT;
#elif defined(NEED_PL_signals_GLOBAL)
-U32 DPPP_(my_PL_signals) = PERL_SIGNALS_UNSAFE_FLAG;
+U32 DPPP_(my_PL_signals) = D_PPP_PERL_SIGNALS_INIT;
#else
extern U32 DPPP_(my_PL_signals);
#endif
#define PL_signals DPPP_(my_PL_signals)
#endif
+
+/* Hint: PL_ppaddr
+ * Calling an op via PL_ppaddr requires passing a context argument
+ * for threaded builds. Since the context argument is different for
+ * 5.005 perls, you can use aTHXR (supplied by ppport.h), which will
+ * automatically be defined as the correct argument.
+ */
+
+#if ((PERL_VERSION < 5) || ((PERL_VERSION == 5) && (PERL_SUBVERSION <= 4)))
+/* Replace: 1 */
+# define PL_ppaddr ppaddr
+# define PL_no_modify no_modify
+/* Replace: 0 */
+#endif
+
+#if ((PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION <= 5)))
+/* Replace: 1 */
+# define PL_DBsignal DBsignal
+# define PL_DBsingle DBsingle
+# define PL_DBsub DBsub
+# define PL_DBtrace DBtrace
+# define PL_Sv Sv
+# define PL_compiling compiling
+# define PL_copline copline
+# define PL_curcop curcop
+# define PL_curstash curstash
+# define PL_debstash debstash
+# define PL_defgv defgv
+# define PL_diehook diehook
+# define PL_dirty dirty
+# define PL_dowarn dowarn
+# define PL_errgv errgv
+# define PL_hexdigit hexdigit
+# define PL_hints hints
+# define PL_laststatval laststatval
+# define PL_na na
+# define PL_perl_destruct_level perl_destruct_level
+# define PL_perldb perldb
+# define PL_rsfp_filters rsfp_filters
+# define PL_rsfp rsfp
+# define PL_stack_base stack_base
+# define PL_stack_sp stack_sp
+# define PL_statcache statcache
+# define PL_stdingv stdingv
+# define PL_sv_arenaroot sv_arenaroot
+# define PL_sv_no sv_no
+# define PL_sv_undef sv_undef
+# define PL_sv_yes sv_yes
+# define PL_tainted tainted
+# define PL_tainting tainting
+/* Replace: 0 */
+#endif
#ifndef dTHR
# define dTHR dNOOP
#endif
@@ -3696,6 +3730,21 @@ extern U32 DPPP_(my_PL_signals);
#ifndef aTHX_
# define aTHX_
#endif
+
+#if ((PERL_VERSION < 6) || ((PERL_VERSION == 6) && (PERL_SUBVERSION < 0)))
+# ifdef USE_THREADS
+# define aTHXR thr
+# define aTHXR_ thr,
+# else
+# define aTHXR
+# define aTHXR_
+# endif
+# define dTHXR dTHR
+#else
+# define aTHXR aTHX
+# define aTHXR_ aTHX_
+# define dTHXR dTHX
+#endif
#ifndef dTHXoa
# define dTHXoa(x) dTHXa(x)
#endif
@@ -36,24 +36,30 @@ Dead_locks_are_removed: {
}
# Fork a process that will create a lock and then exit
- if ( fork() == 0 ) { # child
- # double fork to daemonize
- if ( fork() == 0 ) { # sub child
- make_lock();
- }
+ my $pid = fork();
+ if ( $pid == 0 ) { # child
+ make_lock();
exit;
}
-
- # wait for the daemon to secure the lock, then a little longer for exit
- for ( 0 .. 20 ) {
- sleep .1 unless -e $lock_path;
+ else {
+ waitpid( $pid, 0 );
}
- sleep .1;
- ok( -e $lock_path, "daemon secured lock" );
+ ok( -e $lock_path, "child secured lock" );
+
+ # The locking attempt will fail if the pid from the process that made the
+ # lock is active, so do the best we can to see whether another process
+ # started up with the child's pid (which would be weird).
+ my $pid_active = kill( 0, $pid );
eval { make_lock() };
warn $@ if $@;
- ok( !$@, 'second lock attempt did not die' );
+ my $saved_err = $@;
+ $pid_active ||= kill( 0, $pid );
+ SKIP: {
+ skip( "Child's pid is active", 1 ) if $pid_active;
+ ok( !$saved_err,
+ 'second lock attempt clobbered dead lock file and did not die' );
+ }
}
package MockIndex;
@@ -2,7 +2,7 @@ use strict;
use warnings;
use lib 't';
-use Test::More tests => 7;
+use Test::More tests => 9;
BEGIN {
use_ok('KinoSearch::Searcher');
@@ -10,16 +10,36 @@ BEGIN {
use_ok('KinoSearch::Highlight::Highlighter');
}
-use KinoSearchTestInvIndex qw( create_invindex );
+use KinoSearch::InvIndexer;
+use KinoSearch::Store::RAMInvIndex;
+my $tokenizer = KinoSearch::Analysis::Tokenizer->new;
+my $invindex = KinoSearch::Store::RAMInvIndex->new( create => 1 );
+my $invindexer = KinoSearch::InvIndexer->new(
+ invindex => $invindex,
+ analyzer => $tokenizer,
+);
+$invindexer->spec_field( name => 'content' );
+$invindexer->spec_field( name => 'alt', boost => 0.1 );
my $string = '1 2 3 4 5 ' x 20; # 200 characters
$string .= 'a b c d x y z h i j k ';
$string .= '6 7 8 9 0 ' x 20;
my $with_quotes = '"I see," said the blind man.';
-my $invindex = create_invindex( $string, $with_quotes );
-my $tokenizer = KinoSearch::Analysis::Tokenizer->new;
-my $searcher = KinoSearch::Searcher->new(
+for ( $string, $with_quotes ) {
+ my $doc = $invindexer->new_doc;
+ $doc->set_value( content => $_ );
+ $invindexer->add_doc($doc);
+}
+{
+ my $doc = $invindexer->new_doc;
+ $doc->set_value( alt => $string . " and extra stuff so it scores lower" );
+ $doc->set_value( content => "x but not why or 2ee" );
+ $invindexer->add_doc($doc);
+}
+$invindexer->finish;
+
+my $searcher = KinoSearch::Searcher->new(
invindex => $invindex,
analyzer => $tokenizer,
);
@@ -28,7 +48,7 @@ my $highlighter
my $hits = $searcher->search( query => '"x y z" AND b' );
$hits->create_excerpts( highlighter => $highlighter );
-$hits->seek( 0, 1 );
+$hits->seek( 0, 2 );
my $hit = $hits->fetch_hit_hashref;
like( $hit->{excerpt}, qr/b.*?z/, "excerpt contains all relevant terms" );
like(
@@ -39,6 +59,17 @@ like(
like( $hit->{excerpt}, qr#<strong>b</strong>#,
"highlighter tagged the single term" );
+like( $hits->fetch_hit_hashref()->{excerpt},
+ qr/x/,
+ "excerpt field with partial hit doesn't cause highlighter freakout" );
+
+$hits = $searcher->search( query => 'x "x y z" AND b' );
+$hits->create_excerpts( highlighter => $highlighter );
+$hits->seek( 0, 2 );
+like( $hits->fetch_hit_hashref()->{excerpt},
+ qr/x y z/,
+ "query with same word in both phrase and term doesn't cause freakout" );
+
$hits = $searcher->search( query => 'blind' );
$hits->create_excerpts( highlighter => $highlighter );
like( $hits->fetch_hit_hashref()->{excerpt},
@@ -1,7 +1,7 @@
#!/usr/bin/perl
use lib 't';
-use Test::More tests => 4;
+use Test::More tests => 5;
use File::Spec::Functions qw( catfile );
BEGIN { use_ok('KinoSearch::Search::PhraseQuery') }
@@ -39,3 +39,11 @@ my $second_hit = $hits->fetch_hit_hashref;
ok( $first_hit->{score} > $second_hit->{score},
"best match scores higher: $first_hit->{score} > $second_hit->{score}" );
+$phrase_query = KinoSearch::Search::PhraseQuery->new( slop => 0 );
+for (qw( c a )) {
+ my $term = KinoSearch::Index::Term->new( 'content', $_ );
+ $phrase_query->add_term($term);
+}
+$hits = $searcher->search( query => $phrase_query );
+is( $hits->total_hits, 1, 'avoid underflow when subtracting offset' );
+
@@ -4,7 +4,7 @@ use warnings;
use lib 't';
use KinoSearch qw( kdump );
-use Test::More tests => 205;
+use Test::More tests => 217;
use File::Spec::Functions qw( catfile );
BEGIN { use_ok('KinoSearch::QueryParser::QueryParser') }
@@ -138,6 +138,10 @@ my @logical_tests = (
'bogusfield:a' => [ 0, 0, 0, 0, ],
'bogusfield:a content:b' => [ 3, 0, 3, 0, ],
+ 'content:b content:c' => [ 3, 2, 3, 2 ],
+ 'content:(b c)' => [ 3, 2, 3, 2 ],
+ 'bogusfield:(b c)' => [ 0, 0, 0, 0 ],
+
);
my $i = 0;