The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More tests => 34;
use strict;
use Data::Dump qw( dump );
use Search::Tools::UTF8;

#binmode Test::More->builder->output,         ":utf8";
#binmode Test::More->builder->failure_output, ":utf8";

use_ok('SWISH::Prog');
use_ok('SWISH::Prog::KSx::InvIndex');
use_ok('SWISH::Prog::KSx::Searcher');

ok( my $invindex = SWISH::Prog::KSx::InvIndex->new(
        clobber => 0,                 # KS handles this
        path    => 't/index.swish',
    ),
    "new invindex"
);

my $program = make_program();

ok( $program->index('t/'), "run program" );

is( $program->count, 3, "indexed test docs" );

ok( my $searcher
        = SWISH::Prog::KSx::Searcher->new( invindex => 't/index.swish', ),
    "new searcher"
);

ok( my $results = $searcher->search('test'), "search()" );

#diag( dump $results );

is( $results->hits, 1, "1 hit" );

ok( my $result = $results->next, "next result" );

is( $result->uri, 't/test.html', 'get uri' );

is( $result->title, "test html doc", "get title" );

diag( $result->score );

# test limit
ok( my $results2 = $searcher->search(
        'some', { limit => [ [qw( date 2010-01-01 2010-12-31 )] ] }
    ),
    "search()"
);
is( $results2->hits, 1, "1 hit" );

my $utf8_title = sprintf( "%c%s%c", 8220, qq/ima xml doc/, 8221 );

#Search::Tools::describe($utf8_title);
#diag($utf8_title);

while ( my $result2 = $results2->next ) {
    my $title = sprintf( "%s %s", $result2->title, "AND MORE" );

    #print STDERR $title . "\n";
    if ( !is_flagged_utf8($title) ) {
        warn("not flagged utf8");
    }

    #Search::Tools::describe($title);
    #diag( $result2->uri );
    #diag( $result2->title );
    #diag( $result2->score );
    #diag($title);
    is( $result2->uri,   't/test.xml', 'get uri' );
    is( $result2->title, $utf8_title,  "get title" );

}

# test sort
ok( my $results3 = $searcher->search(
        'some', { order => 'swishdocpath asc swishrank desc' }
    ),
    "search()"
);
is( $results3->hits, 2, "2 hits" );
my @results;
while ( my $result3 = $results3->next ) {
    push @results, $result3->swishdocpath;
}
is_deeply( \@results, [qw( t/test.html t/test.xml )], "results sorted ok" );

# test wildcard query
ok( my $results4 = $searcher->search('S?M*'), "search()" );
is( $results4->hits, 2, "2 hits" );

ok( my $results5 = $searcher->search('running*'),
    "search stemmable wildcard" );
is( $results5->hits, 1, "1 hit" );

ok( my $results6 = $searcher->search(qq/"text here"~4/), "search proximity" );
is( $results6->hits, 1, "1 hit" );

ok( my $results7 = $searcher->search(qq/(som* or word*) and here/),
    "compound wildcard" );
is( $results7->hits, 2, "2 hits for compound wildcard query" );

# break the query parser
eval { $results7 = $searcher->search(qq/"out touch~2/); };
ok( $@, "query parser catches poor syntax" );

# boolop
ok( my $results_OR
        = $searcher->search( qq/some words/, { default_boolop => 'OR' } ),
    "search with boolop=OR"
);
ok( my $results_AND
        = $searcher->search( qq/some words/, { default_boolop => 'AND' } ),
    "search with boolop=AND"
);
cmp_ok( $results_OR->hits, '>', $results_AND->hits,
    "OR gives more hits than AND" );

# properties/aliases
ok( my $sorted_by_title = $searcher->search( qq/some/, { order => 'title' } ),
    "search sorted by title"
);
show_results_by_uri($sorted_by_title);
ok( my $sorted_by_lastmod
        = $searcher->search( qq/some/, { order => 'lastmod' } ),
    "search sorted by lastmod"
);
show_results_by_uri($sorted_by_lastmod);

sub make_program {
    ok( my $program = SWISH::Prog->new(
            invindex   => $invindex,
            aggregator => 'fs',
            indexer    => 'ks',
            config     => 't/config.xml',

            #verbose    => 1,
            #debug      => 1,
        ),
        "new program"
    );

    # skip the index dir every time
    # the '1' arg indicates to append the value, not replace.
    $program->config->FileRules( 'dirname is index.swish',               1 );
    $program->config->FileRules( 'filename is config.xml',               1 );
    $program->config->FileRules( 'filename is config-nostemmer.xml',     1 );
    $program->config->FileRules( 'filename contains \.t',                1 );
    $program->config->FileRules( 'dirname contains (testindex|\.index)', 1 );
    $program->config->FileRules( 'filename contains \.conf',             1 );
    $program->config->FileRules( 'dirname contains mailfs',              1 );

    return $program;
}

sub show_results_by_uri {
    my ($results) = @_;
    while ( my $r = $results->next ) {
        diag( $r->uri );
    }
}

END {
    unless ( $ENV{PERL_DEBUG} ) {
        $invindex->path->rmtree;
    }
}