The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package SnipHelp;
use Test::More;
use strict;
use warnings;
use Data::Dump qw( dump );
use File::Slurp;
use Search::Tools::XML;
use Search::Tools::Snipper;
use Search::Tools::UTF8;

my $num_tests = 18;

sub test {
    my ( $file, $q, $snipper_type ) = @_;
    use_ok('Search::Tools');
    use_ok('Search::Tools::Snipper');
    use_ok('Search::Tools::HiLiter');
    use_ok('Search::Tools::XML');
    ok( my $XML   = Search::Tools::XML->new, "new XML object" );
    ok( my $html  = read_file($file),        "read buf" );
    ok( my $plain = $XML->strip_html($html), "strip_html" );

    if ( $XML->looks_like_html($html) ) {
        cmp_ok( $html, 'ne', $plain, "strip_html ok" );
        if ( $XML->looks_like_html($plain) ) {
            fail("plain text has no html");
        }
        else {
            pass("plain text has no html");
        }
    }
    else {
        pass("strip_html skipped");
        pass("strip_html skipped");
    }
    ok( my $qparser = Search::Tools->parser(), "new qparser" );
    ok( my $query   = $qparser->parse($q),     "new query" );
    ok( my $snipper = Search::Tools::Snipper->new(
            query     => $query,
            occur     => 1,
            context   => 25,
            max_chars => 190,
            type      => $snipper_type,    # make explicit
                                           #escape    => 1,
        ),
        "new snipper"
    );
    ok( my $hiliter = Search::Tools::HiLiter->new(
            query => $query,
            tag   => "b",
            class => "x",
            tty   => $snipper->debug,
        ),
        "new hiliter"
    );

    ok( my $snip    = $snipper->snip($plain),  "snip plain" );
    ok( my $hilited = $hiliter->hilite($snip), "hilite" );
    ok( my @snip_words  = split( m/\W+/, $snip ),  "split snipped words" );
    ok( my @plain_words = split( m/\W+/, $plain ), "split plain words" );
    if ( scalar(@plain_words) > $snipper->context ) {

        # the -5 fuzziness is to allow for edge cases with lots
        # of treat_like_phrase matches, like email address, urls, etc.
        # these generate a lot of tokens in tokenizer,
        # so the context is fairly high
        # but our QueryParser regex (and the one above) doesn't catch them.
        cmp_ok(
            scalar(@snip_words), '>=',
            ( $snipper->context - 5 ),
            "context length >="
        );
        #diag( "context == " . scalar(@snip_words) );
    }
    else {
        cmp_ok( scalar(@snip_words), '==', scalar(@plain_words),
            "context length ==" );
    }

    return ( $snip, $hilited, $query, $plain, $num_tests );
}

1;