The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
use strict;
use warnings;
use Search::Tools::Tokenizer;
use Search::Tools::UTF8;
use Benchmark qw(:all);
use File::Slurp;

my $greek     = to_utf8( read_file('t/docs/greek_and_ojibwe.txt') );
my $ascii     = to_utf8( read_file('t/docs/ascii.txt') );
my $tokenizer = Search::Tools::Tokenizer->new();

cmpthese(
    10000,
    {   'pure-perl-greek' => sub {
            my $tokens = $tokenizer->tokenize_pp( $greek, \&heat_seeker );
        },
        'xs-greek' => sub {
            my $tokens = $tokenizer->tokenize( $greek, \&heat_seeker );
        },
        'pure-perl-ascii' => sub {
            my $tokens = $tokenizer->tokenize_pp( $ascii, \&heat_seeker );
        },
        'xs-ascii' => sub {
            my $tokens = $tokenizer->tokenize( $ascii, \&heat_seeker );
        },
        'xs-ascii-heatseeker-qr' => sub {
            my $tokens = $tokenizer->tokenize( $ascii, qr/\w/ );
        },
    }
);

sub heat_seeker {

    # trivial case to measure sub call overhead vs qr//
    $_[0]->set_hot( $_[0] =~ m/\w/ );
}