The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package WebService::Solr::Query;

use Moo;

use Types::Standard qw(ArrayRef);

use overload q("") => 'stringify';

my $escape_chars = quotemeta( '+-&|!(){}[]^"~*?:\\' );

has 'query' => ( is => 'ro', isa => ArrayRef, default => sub { [] } );

use constant D => 0;

sub BUILDARGS {
    my $class = shift;

    if ( @_ == 1 && ref $_[ 0 ] && ref $_[ 0 ] eq 'ARRAY' ) {
        return { query => $_[ 0 ] };
    }

    return { query => \@_ };
}

sub stringify {
    my $self = shift;

    return $self->_dispatch_struct( $self->query );
}

sub _dispatch_struct {
    my ( $self, $struct ) = @_;

    my $method = '_struct_' . ref $struct;

    D && $self->___log( "Dispatching to ->$method " . __dumper( $struct ) );

    my $rv = $self->$method( $struct );

    D && $self->___log( "Returned: $rv" );

    return $rv;
}

sub _struct_HASH {
    my ( $self, $struct ) = @_;

    my @clauses;

    for my $k ( sort keys %$struct ) {
        my $v = $struct->{ $k };

        D && $self->___log( "Key => $k, value => " . __dumper( $v ) );

        if ( $k =~ m{^-(.+)} ) {
            my $method = "_op_$1";

            D && $self->___log( "Dispatch ->$method " . __dumper( $v ) );
            push @clauses, $self->$method( $v );
        }
        else {
            D
                && $self->___log(
                "Dispatch ->_dispatch_value $k, " . __dumper( $v ) );
            push @clauses, $self->_dispatch_value( $k, $v );
        }
    }

    my $rv = join( ' AND ', @clauses );

    D && $self->___log( "Returning: $rv" );

    return $rv;
}

sub _struct_ARRAY {
    my ( $self, $struct ) = @_;

    my $rv
        = '('
        . join( " OR ", map { $self->_dispatch_struct( $_ ) } @$struct )
        . ')';

    D && $self->___log( "Returning: $rv" );

    return $rv;
}

sub _dispatch_value {
    my ( $self, $k, $v ) = @_;

    my $rv;
    ### it's an array ref, the first element MAY be an operator!
    ### it would look something like this:
    # [ '-and',
    #   { '-require' => 'star' },
    #   { '-require' => 'wars' }
    # ];
    if (    ref $v
        and UNIVERSAL::isa( $v, 'ARRAY' )
        and defined $v->[ 0 ]
        and $v->[ 0 ] =~ /^ - ( AND|OR ) $/ix )
    {
        ### XXX we're assuming that all the next statements MUST
        ### be hashrefs. is this correct?
        $v = [ @$v ]; # Copy the array because we're going to be modifying it.
        shift @$v;
        my $op = uc $1;

        D
            && $self->___log(
            "Special operator detected: $op " . __dumper( $v ) );

        my @clauses;
        for my $href ( @$v ) {
            D
                && $self->___log( "Dispatch ->_dispatch_struct({ $k, "
                    . __dumper( $href )
                    . '})' );

            ### the individual directive ($href) pertains to the key,
            ### so we should send that along.
            my $part = $self->_dispatch_struct( { $k => $href } );

            D && $self->___log( "Returned $part" );

            push @clauses, '(' . $part . ')';
        }

        $rv = '(' . join( " $op ", @clauses ) . ')';

        ### nothing special about this combo, so do a usual dispatch
    }
    else {
        my $method = '_value_' . ( ref $v || 'SCALAR' );

        D && $self->___log( "Dispatch ->$method $k, " . __dumper( $v ) );

        $rv = $self->$method( $k, $v );
    }

    D && $self->___log( "Returning: $rv" );

    return $rv;
}

sub _value_SCALAR {
    my ( $self, $k, $v ) = @_;

    if ( ref $v ) {
        $v = $$v;
    }
    else {
        $v = '"' . $self->escape( $v ) . '"';
    }

    my $r = qq($k:$v);
    $r =~ s{^:}{};

    D && $self->___log( "Returning: $r" );

    return $r;
}

sub _value_HASH {
    my ( $self, $k, $v ) = @_;

    my @clauses;

    for my $op ( sort keys %$v ) {
        my $struct = $v->{ $op };
        $op =~ s{^-(.+)}{_op_$1};

        D && $self->___log( "Dispatch ->$op $k, " . __dumper( $v ) );

        push @clauses, $self->$op( $k, $struct );
    }

    my $rv = join( ' AND ', @clauses );

    D && $self->___log( "Returning: $rv" );

    return $rv;
}

sub _value_ARRAY {
    my ( $self, $k, $v ) = @_;

    my $rv = '('
        . join( ' OR ', map { $self->_value_SCALAR( $k, $_ ) } @$v ) . ')';

    D && $self->___log( "Returning: $rv" );

    return $rv;
}

sub _op_default {
    my ( $self, $v ) = @_;
    return $self->_dispatch_value( '', $v );
}

sub _op_range {
    my ( $self, $k ) = ( shift, shift );
    my @v = @{ shift() };
    return "$k:[$v[ 0 ] TO $v[ 1 ]]";
}

*_op_range_inc = \&_op_range;

sub _op_range_exc {
    my ( $self, $k ) = ( shift, shift );
    my @v = @{ shift() };
    return "$k:{$v[ 0 ] TO $v[ 1 ]}";
}

sub _op_boost {
    my ( $self, $k ) = ( shift, shift );
    my ( $v, $boost ) = @{ shift() };
    $v = $self->escape( $v );
    return qq($k:"$v"^$boost);
}

sub _op_fuzzy {
    my ( $self, $k ) = ( shift, shift );
    my ( $v, $distance ) = @{ shift() };
    $v = $self->escape( $v );
    return qq($k:$v~$distance);
}

sub _op_proximity {
    my ( $self, $k ) = ( shift, shift );
    my ( $v, $distance ) = @{ shift() };
    $v = $self->escape( $v );
    return qq($k:"$v"~$distance);
}

sub _op_require {
    my ( $self, $k, $v ) = @_;
    $v = $self->escape( $v );
    return qq(+$k:"$v");
}

sub _op_prohibit {
    my ( $self, $k, $v ) = @_;
    $v = $self->escape( $v );
    return qq(-$k:"$v");
}

sub escape {
    my ( $self, $text ) = @_;
    $text =~ s{([$escape_chars])}{\\$1}g;
    return $text;
}

sub unescape {
    my ( $self, $text ) = @_;
    $text =~ s{\\([$escape_chars])}{$1}g;
    return $text;
}

sub ___log {
    my $self = shift;
    my $msg  = shift;

    ### subroutine the log call came from, and line number the log
    ### call came from. that's 2 different caller frames :(
    my $who = join ':', [ caller( 1 ) ]->[ 3 ], [ caller( 0 ) ]->[ 2 ];

    ### make sure we prefix every line with a #
    $msg =~ s/\n/\n#/g;

    print "# $who: $msg\n";
}

sub __dumper {
    require Data::Dumper;

    return Data::Dumper::Dumper( @_ );
}

no Moo;

1;

__END__

=head1 NAME

WebService::Solr::Query - Abstract query syntax for Solr queries

=head1 SYNOPSIS

    my $query  = WebService::Solr::Query->new( { foo => 'bar' } );
    my $result = $solr->search( $query );

=head1 DESCRIPTION

WebService::Solr::Query provides a programmatic way to generate
queries to be sent to Solr. Syntax wise, it attempts to be as close to 
L<SQL::Abstract> WHERE clauses as possible, with obvious exceptions for 
idioms that do not exist in SQL. Just as values in SQL::Abstract are 
SQL-escaped, this module does the appropriate Solr-escaping on all values 
passed to the object (see C<escape()>).

=head1 QUERY SYNTAX

=head2 Key-Value Pairs

The simplest way to search is with key value pairs.

    my $q = WebService::Solr::Query->new( { foo => 'bar' } );
    # RESULT: (foo:"bar")

=head2 Implicit AND and OR

By default, data received as a HASHREF is AND'ed together.

    my $q = WebService::Solr::Query->new( { foo => 'bar', baz => 'quux' } );
    # RESULT: (foo:"bar" AND baz:"quux")

Furthermore, data received as an ARRAYREF is OR'ed together.

    my $q = WebService::Solr::Query->new( { foo => [ 'bar', 'baz' ] } );
    # RESULT: (foo:"bar" OR foo:"baz")

=head2 Nested AND and OR

The ability to nest AND and OR boolean operators is essential to express
complex queries. The C<-and> and C<-or> prefixes have been provided for this
need.

    my $q = WebService::Solr::Query->new( { foo => [
        -and => { -prohibit => 'bar' }, { -require => 'baz' }
    ] } );
    # RESULT: (((-foo:"bar") AND (+foo:"baz")))
    
    my $q = WebService::Solr::Query->new( { foo => [
        -or => { -require => 'bar' }, { -prohibit => 'baz' }
    ] } );
    # RESULT: (((+foo:"bar") OR (-foo:"baz")))

=head2 Default Field

To search the default field, use the C<-default> prefix.

    my $q = WebService::Solr::Query->new( { -default => 'bar' } );
    # RESULT: ("bar")

=head2 Require/Prohibit

    my $q = WebService::Solr::Query->new( { foo => { -require => 'bar' } } );
    # RESULT: (+foo:"bar")
    
    my $q = WebService::Solr::Query->new( { foo => { -prohibit => 'bar' } } );
    # RESULT: (-foo:"bar")

=head2 Range

There are two types of range queries, inclusive (C<-range_inc>) and 
exclusive (C<-range_exc>). The C<-range> prefix can be used in place of
C<-range_inc>.

    my $q = WebService::Solr::Query->new( { foo => { -range => ['a', 'z'] } } );
    # RESULT: (+foo:[a TO z])
    
    my $q = WebService::Solr::Query->new( { foo => { -range_exc => ['a', 'z'] } } );
    # RESULT: (+foo:{a TO z})

=head2 Boost

    my $q = WebService::Solr::Query->new( { foo => { -boost => [ 'bar', '2.0' ] } } );
    # RESULT: (foo:"bar"^2.0)

=head2 Proximity

    my $q = WebService::Solr::Query->new( { foo => { -proximity => [ 'bar baz', 10 ] } } );
    # RESULT: (foo:"bar baz"~10)

=head2 Fuzzy

    my $q = WebService::Solr::Query->new( { foo => { -fuzzy => [ 'bar', '0.8' ] } } );
    # RESULT: (foo:bar~0.8)

=head2 Literal Queries

Specifying a scalar ref as a value in a key-value pair will allow arbitrary
queries to be sent across the line. B<NB:> This will bypass any data
massaging done on regular strings, thus the onus of properly escaping the
data is left to the user.

    my $q = WebService::Solr::Query->new( { '*' => \'*' } )
    # RESULT (*:*)

=head1 ACCESSORS

=over 4

=item * query - stores the original query structure

=back

=head1 METHODS

=head2 new( \%query )

Creates a new query object with the given hashref.

=head2 stringify( )

Converts the supplied structure into a Solr/Lucene query.

=head2 escape( $value )

The following values must be escaped in a search value:

    + - & | ! ( ) { } [ ] ^ " ~ * ? : \

B<NB:> Values sent to C<new()> are automatically escaped for you.

=head2 unescape( $value )

Unescapes values escaped in C<escape()>.

=head2 D

Debugging constant, default: off.

=head2 BUILDARGS

Moo method to handle input to C<new()>.

=head1 SEE ALSO

=over 4

=item * L<WebService::Solr>

=item * http://wiki.apache.org/solr/SolrQuerySyntax

=back

=head1 AUTHORS

Brian Cassidy E<lt>bricas@cpan.orgE<gt>

Jos Boumans E<lt>kane@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright 2008-2016 National Adult Literacy Database

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut