The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl t/getsimilaritystrings.t'
# Note that because of the file paths used this must be run from the
# directory in which /t resides
#
# Last modified by : '$Id: getsimilaritystrings.t,v 1.3 2008/04/04 18:33:52 tpederse Exp $'
#########################

# change 'tests => 1' to 'tests => last_test_to_print';

use Test::More tests => 22;

BEGIN {use_ok Text::Similarity}
BEGIN {use_ok Text::Similarity::Overlaps}

# these results should be normalized

my %opt_hash = ('normalize' => 1);

my $overlapmod = Text::Similarity::Overlaps->new (\%opt_hash);
ok ($overlapmod);

# test cases

$string1 = 'this is our          test case today';
$string2 = '   this is our     test case                      today           ';
$string3 = ' winston churchill winston churchill ';
$string4 = ' winston churchill';
$string5 = ' WINSTON CHURCHILL';
$string6 = ' our test case today is winston churchill';
$string7 = ' ';

# exact matching between two identical strings

$score = $overlapmod->getSimilarityStrings ($string1, $string1);
is ($score, 1, "self similarity of string1, normalized");

# differ only by spaces

$score = $overlapmod->getSimilarityStrings ($string1, $string2);
is ($score, 1, "similarity of string1 and string2, normalized");

# differ by half the number of words

$score = $overlapmod->getSimilarityStrings ($string3, $string4);

# answer is around .666

cmp_ok($score, '<', .7);
cmp_ok($score, '>', .6);

# differ due to case, but that is ignored

$score = $overlapmod->getSimilarityStrings ($string4, $string5);
is ($score, 1, "similarity of string5 and string6, normalized");

# partial match

$score = $overlapmod->getSimilarityStrings ($string1, $string6);
cmp_ok($score, '<', .8);
cmp_ok($score, '>', .7);

# test on empty string

$score = $overlapmod->getSimilarityStrings ($string7, $string7);
is ($score, 0, "empty string7, normalized");

# test on empty string with non-empty

$score = $overlapmod->getSimilarityStrings ($string7, $string1);
is ($score, 0, "empty string7 with string1, normalized");

# test on undefined strings

$score = $overlapmod->getSimilarityStrings ($string99, $string99);
is ($score, undef, "undefined string99, normalized");

# -----------------------------------------------------------------

# these results should NOT be normalized

%opt_hash = ('normalize' => 0);

$overlapmod = Text::Similarity::Overlaps->new (\%opt_hash);
ok ($overlapmod);

# exact matching between two identical strings

$score = $overlapmod->getSimilarityStrings ($string1, $string1);
is ($score, 6, "self similarity of string1, unnormalized");

# differ only by spaces

$score = $overlapmod->getSimilarityStrings ($string1, $string2);
is ($score, 6, "similarity of string1 and string2, unnormalized");

# differ by half the number of words

$score = $overlapmod->getSimilarityStrings ($string3, $string4);
is ($score, 2, "similarity of string1 and string2, unnormalized");

# differ due to case, but that is ignored

$score = $overlapmod->getSimilarityStrings ($string4, $string5);
is ($score, 2, "similarity of string5 and string6, unnormalized");

# partial match

$score = $overlapmod->getSimilarityStrings ($string1, $string6);
is ($score, 5, "similarity of string1 and string6, unnormalized");

# test on empty string

$score = $overlapmod->getSimilarityStrings ($string7, $string7);
is ($score, 0, "empty string7, unnormalized");

# test on empty string with non-empty

$score = $overlapmod->getSimilarityStrings ($string7, $string1);
is ($score, 0, "empty string7 with string1, unnormalized");

# test on undefined strings

$score = $overlapmod->getSimilarityStrings ($string99, $string99);
is ($score, undef, "undefined string99, unnormalized");