The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

use strict;
use warnings;

package LucyX::Search::Filter;
BEGIN { our @ISA = qw( Lucy::Search::Query ) }
our $VERSION = '0.003003';
$VERSION = eval $VERSION;
use Carp;
use Storable qw( nfreeze thaw );
use Scalar::Util qw( blessed weaken );
use bytes;
no bytes;

# Inside-out member vars.
our %query;
our %cached_bits;

sub new {
    my ( $either, %args ) = @_;
    my $query = delete $args{query};
    confess("required parameter query is not a Lucy::Search::Query")
        unless ( blessed($query)
        && $query->isa('Lucy::Search::Query') );
    my $self = $either->SUPER::new(%args);
    $self->_init_cache;
    $query{$$self} = $query;
    $self->set_boost(0);
    return $self;
}

sub DESTROY {
    my $self = shift;
    delete $query{$$self};
    delete $cached_bits{$$self};
    $self->SUPER::DESTROY;
}

sub make_compiler {
    my ( $self, %args ) = @_;
    my $subordinate = delete $args{subordinate};
    my $compiler
        = LucyX::Search::FilterCompiler->new( %args, parent => $self );
    $compiler->normalize unless $subordinate;
    return $compiler;
}

sub serialize {
    my ( $self, $outstream ) = @_;
    $self->SUPER::serialize($outstream);
    my $frozen = nfreeze( $query{$$self} );
    $outstream->write_c32( bytes::length($frozen) );
    $outstream->print($frozen);
}

sub deserialize {
    my ( $self, $instream ) = @_;
    $self->SUPER::deserialize($instream);
    my $len = $instream->read_c32;
    my $frozen;
    $instream->read( $frozen, $len );
    $query{$$self} = thaw($frozen);
    return $self;
}

sub equals {
    my ( $self, $other ) = @_;
    return 0 unless $other->isa(__PACKAGE__);
    return 0 unless $query{$$self}->equals( $query{$$other} );
    return 0 unless $self->get_boost == $other->get_boost;
    return 1;
}

sub to_string {
    my $self = shift;
    return 'Filter(' . $query{$$self}->to_string . ')';
}

sub _bits {
    my ( $self, $seg_reader ) = @_;

    my $cached_bits = $self->_fetch_cached_bits($seg_reader);

    # Fill the cache.
    if ( !defined $cached_bits ) {
        $cached_bits = Lucy::Object::BitVector->new(
            capacity => $seg_reader->doc_max + 1 );
        $self->_store_cached_bits( $seg_reader, $cached_bits );

        my $collector = Lucy::Search::Collector::BitCollector->new(
            bit_vector => $cached_bits );

        my $polyreader = Lucy::Index::PolyReader->new(
            schema      => $seg_reader->get_schema,
            folder      => $seg_reader->get_folder,
            snapshot    => $seg_reader->get_snapshot,
            sub_readers => [$seg_reader],
        );
        my $searcher
            = Lucy::Search::IndexSearcher->new( index => $polyreader );

        # Perform the search.
        $searcher->collect(
            query     => $query{$$self},
            collector => $collector,
        );
    }

    return $cached_bits;
}

# Store a cached BitVector associated with a particular SegReader.  Store a
# weak reference to the SegReader as an indicator of cache validity.
sub _store_cached_bits {
    my ( $self, $seg_reader, $bits ) = @_;
    my $pair = { seg_reader => $seg_reader, bits => $bits };
    weaken( $pair->{seg_reader} );
    $cached_bits{$$self}{ $seg_reader->hash_sum } = $pair;
}

# Retrieve a cached BitVector associated with a particular SegReader.  As a
# side effect, clear away any BitVectors which are no longer valid because
# their SegReaders have gone away.
sub _fetch_cached_bits {
    my ( $self, $seg_reader ) = @_;
    my $cached_bits = $cached_bits{$$self};

    # Sweep.
    while ( my ( $hash_sum, $pair ) = each %$cached_bits ) {
        # If weak ref has decomposed into undef, SegReader is gone... so
        # delete.
        next if defined $pair->{seg_reader};
        delete $cached_bits->{$hash_sum};
    }

    # Fetch.
    my $pair = $cached_bits->{ $seg_reader->hash_sum };
    return $pair->{bits} if defined $pair;
    return;
}

# Kill any existing cached filters.
sub _init_cache {
    my $self = shift;
    $cached_bits{$$self} = {};
}

# Testing only.
sub _cached_count {
    my $self = shift;
    return scalar grep { defined $cached_bits{$$self}{$_}{seg_reader} }
        keys %{ $cached_bits{$$self} };
}

package LucyX::Search::FilterCompiler;
our $VERSION = '0.003003';
$VERSION = eval $VERSION;
BEGIN { our @ISA = qw( Lucy::Search::Compiler ) }

sub new {
    my ( $class, %args ) = @_;
    $args{similarity} ||= $args{searcher}->get_schema->get_similarity;
    return $class->SUPER::new(%args);
}

sub make_matcher {
    my ( $self, %args ) = @_;
    my $seg_reader = $args{reader};
    my $bits       = $self->get_parent->_bits($seg_reader);
    return LucyX::Search::FilterMatcher->new(
        bits    => $bits,
        doc_max => $seg_reader->doc_max,
    );
}

package LucyX::Search::FilterMatcher;
our $VERSION = '0.003003';
$VERSION = eval $VERSION;
BEGIN { our @ISA = qw( Lucy::Search::Matcher ) }

1;

__END__

__BINDING__

Clownfish::CFC::Binding::Perl::Class->register(
    parcel            => "Lucy",
    class_name        => "LucyX::Search::FilterMatcher",
    bind_constructors => ["new"],
);

__POD__

=head1 NAME

LucyX::Search::Filter - Build a caching filter based on results of a Query.

=head1 SYNOPSIS

    my %category_filters;
    for my $category (qw( sweet sour salty bitter )) {
        my $cat_query = Lucy::Search::TermQuery->new(
            field => 'category',
            term  => $category,
        );
        $category_filters{$category} = LucyX::Search::Filter->new( 
            query => $cat_query, 
        );
    }
    
    while ( my $cgi = CGI::Fast->new ) {
        my $user_query = $cgi->param('q');
        my $filter     = $category_filters{ $cgi->param('category') };
        my $and_query  = Lucy::Search::ANDQuery->new;
        $and_query->add_child($user_query);
        $and_query->add_child($filter);
        my $hits = $searcher->hits( query => $and_query );
        ...

=head1 DESCRIPTION 

A Filter is a L<Lucy::Search::Query> subclass that can be used to filter
the results of another Query.  The effect is very similar to simply using the
wrapped inner query, but there are two important differences:

=over

=item

A Filter does not contribute to the score of the documents it matches.  

=item

A Filter caches its results, so it is more efficient if you use it more than
once.

=back

To obtain logically equivalent results to the Filter but avoid the caching,
substitute the wrapped query but use set_boost() to set its C<boost> to 0.

=head1 METHODS

=head2 new

    my $filter = LucyX::Search::Filter->new(
        query => $query;
    );

Constructor.  Takes one hash-style parameter, C<query>, which must be an
object belonging to a subclass of L<Lucy::Search::Query>.

=head1 BUGS

Filters do not cache when used in a search cluster with LucyX::Remote's
SearchServer and SearchClient.

=cut