t/27-bigfile.t - metacpan.org

use strict;
use Test::More;
use lib 't';
plan skip_all => "set TEST_BIG_FILE to run $0"
    unless $ENV{TEST_BIG_FILE};

SKIP: {
    eval 'use IO::Uncompress::Gunzip ()';
    skip 'IO::Uncompress::Gunzip is required to test bigfile', 2
        if $@;

    use Data::Dump qw( dump );
    use Search::Tools::XML;
    use Search::Tools::Snipper;
    use Search::Tools::Tokenizer;

    my $file = 't/docs/bigfile.html.gz';
    my $q    = qq/child adoption/;
    my $fh   = IO::Uncompress::Gunzip->new($file);
    my $buf;
    {
        local $/;
        $buf = <$fh>;
        $buf = $buf x 20;    # 20x for big fun
    }
    diag( "working on " . length($buf) . " html bytes" );
    my $plain = Search::Tools::XML->strip_html($buf);
    diag( "working on " . length($plain) . " plain bytes" );
    my $snipper = Search::Tools::Snipper->new(
        query        => $q,
        occur        => 1,
        context      => 25,
        max_chars    => 190,
        as_sentences => 1,
        type         => 'offset',    # because we want to profile
    );

    #my $tokenizer = Search::Tools::Tokenizer->new();
    #diag("get_offsets");
    #my $offsets = $tokenizer->get_offsets($plain, qr/child/i);
    #diag("get_offsets done");
    #diag(dump $offsets);

    my $snip = $snipper->snip($plain);

    like( $snip, qr/child .+ child/, "match snip" );
    cmp_ok( length $snip, '<', 200, "length is sane" );
    
    done_testing(2);
    #sleep 5;   # just so we can measure mem use manually

}

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)