The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package LucyX::Search::WildcardQuery;
use strict;
use warnings;
use base qw( Lucy::Search::Query );
use Carp;
use Scalar::Util qw( blessed );
use LucyX::Search::WildcardCompiler;

our $VERSION = '0.05';

=head1 NAME

LucyX::Search::WildcardQuery - Lucy query extension

=head1 SYNOPSIS

 my $query = LucyX::Search::WildcardQuery->new(
    term    => 'green*',
    field   => 'color',
 );
 my $hits = $searcher->hits( query => $query );
 

=head1 DESCRIPTION

LucyX::Search::WildcardQuery extends the 
Lucy::QueryParser syntax
to support wildcards. This code is similar to the sample PrefixQuery
code in the Lucy distribution and the KinoSearch::Search::WildCardQuery
and Search::Query::Dialect::KSx::WildcardQuery module on CPAN.

=head1 METHODS

This class is a subclass of Lucy::Search::Query. Only new or overridden
methods are documented here.

=cut

# Inside-out member vars
my %term;
my %field;
my %regex;
my %prefix;
my %suffix;
my %lex_terms;

=head2 new( I<args> )

Create a new WildcardQuery object. I<args> must contain key/value pairs
for C<field> and C<term>.

=cut

sub new {
    my ( $class, %args ) = @_;
    my $term  = delete $args{term};
    my $field = delete $args{field};
    my $self  = $class->SUPER::new(%args);
    confess("'term' param is required")
        unless defined $term;
    confess("Invalid term: '$term'")
        unless $term =~ /[\*\?]/;
    confess("'field' param is required")
        unless defined $field;
    $term{$$self}  = $term;
    $field{$$self} = $field;
    $self->_build_regex($term);
    return $self;
}

sub _build_regex {
    my ( $self, $term ) = @_;
    $term = quotemeta($term);  # turn into a regexp that matches a literal str
    $term =~ s/\\\*/.*/g;          # convert wildcards into regex
    $term =~ s/\\\?/.?/g;          # convert wildcards into regex
    $term =~ s/(?:\.\*){2,}/.*/g;  # eliminate multiple consecutive wild cards
    $term =~ s/(?:\.\?){2,}/.?/g;  # eliminate multiple consecutive wild cards
    $term =~ s/^/^/;    # unless $term =~ s/^\.\*//;    # anchor the regexp to
    $term
        =~ s/\z/\\z/;  # unless $term =~ s/\.\*\z//;    # the ends of the term
    $regex{$$self} = qr/$term/;

    # get the literal prefix of the regexp, if any.
    if ($regex{$$self} =~ m<^
            (?:    # prefix for qr//'s, without allowing /i :
                \(\? ([a-hj-z]*) (?:-[a-z]*)?:
            )?
            (\\[GA]|\^) # anchor
            ([^#\$()*+.?[\]\\^]+) # literal pat (no metachars or comments)
        >x
        )
    {
        {
            my ( $mod, $anchor, $prefix ) = ( $1 || '', $2, $3 );
            $anchor eq '^' and $mod =~ /m/ and last;
            for ($prefix) {
                $mod =~ /x/ and s/\s+//g;
            }
            $prefix{$$self} = $prefix;
        }
    }

    if ( $term =~ m/\.[\?\*](\w+)/ ) {
        my $suffix = $1;
        $suffix{$$self} = $suffix;
    }

}

=head2 get_term

=head2 get_field

Retrieve the value set in new().

=head2 get_regex

Retrieve the qr// object representing I<term>.

=head2 get_prefix

Retrieve the literal string (if any) that precedes the wildcards
in I<term>.

=head2 get_suffix

Retrieve the literal string (if any) that follows the wildcards
in I<term>.

=cut

sub get_term   { my $self = shift; return $term{$$self} }
sub get_field  { my $self = shift; return $field{$$self} }
sub get_regex  { my $self = shift; return $regex{$$self} }
sub get_prefix { my $self = shift; return $prefix{$$self} }
sub get_suffix { my $self = shift; return $suffix{$$self} }

=head2 add_lex_term( I<term> )

Push I<term> onto the stack of lexicon terms that this Query matches.

=cut

sub add_lex_term {
    my $self = shift;
    my $t    = shift;
    croak "term required" unless defined $t;
    $lex_terms{$$self}->{$t}++;
}

=head2 get_lex_terms

Returns array ref of terms in the lexicons that this
query matches.

=cut

sub get_lex_terms {
    my $self = shift;
    return [ keys %{ $lex_terms{$$self} } ];
}

sub DESTROY {
    my $self = shift;
    delete $term{$$self};
    delete $field{$$self};
    delete $prefix{$$self};
    delete $suffix{$$self};
    delete $regex{$$self};
    delete $lex_terms{$$self};
    $self->SUPER::DESTROY;
}

=head2 equals

Returns true (1) if the object represents the same kind of query
clause as another WildcardQuery.

NOTE: Currently a NOTWildcardQuery and a WildcardQuery object will
evaluate as equal if they have the same terma and field. This is a bug.

=cut

sub equals {
    my ( $self, $other ) = @_;
    return 0 unless blessed($other);
    return 0
        unless $other->isa( blessed($self) );
    return 0 unless $self->get_field eq $other->get_field;
    return 0 unless $self->get_term  eq $other->get_term;
    return 1;
}

=head2 to_string

Returns the query clause the object represents.

=cut

sub to_string {
    my $self = shift;
    return "$field{$$self}:$term{$$self}";
}

=head2 make_compiler

Returns a LucyX::Search::WildcardCompiler object.

=cut

sub make_compiler {
    my $self        = shift;
    my %args        = @_;
    my $subordinate = delete $args{subordinate};    # new in Lucy 0.2.2
    $args{parent} = $self;
    my $compiler = LucyX::Search::WildcardCompiler->new(%args);

    # unlike Search::Query synopsis, normalize()
    # is called internally in $compiler.
    # This should be fixed in a C re-write.
    #$compiler->normalize unless $subordinate;

    return $compiler;
}

1;

__END__

=head1 AUTHOR

Peter Karman, C<< <karman at cpan.org> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-lucyx-search-wildcardquery at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=LucyX-Search-WildcardQuery>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc LucyX::Search::WildcardQuery


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=LucyX-Search-WildcardQuery>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/LucyX-Search-WildcardQuery>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/LucyX-Search-WildcardQuery>

=item * Search CPAN

L<http://search.cpan.org/dist/LucyX-Search-WildcardQuery/>

=back

=head1 COPYRIGHT & LICENSE

Copyright 2011 Peter Karman.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See http://dev.perl.org/licenses/ for more information.


=cut