# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl t/getsimilaritystrings.t'
# Note that because of the file paths used this must be run from the
# directory in which /t resides
#
# Last modified by : '$Id: getsimilaritystrings.t,v 1.3 2008/04/04 18:33:52 tpederse Exp $'
#########################
# change 'tests => 1' to 'tests => last_test_to_print';
use Test::More tests => 22;
BEGIN {use_ok Text::Similarity}
BEGIN {use_ok Text::Similarity::Overlaps}
# these results should be normalized
my %opt_hash = ('normalize' => 1);
my $overlapmod = Text::Similarity::Overlaps->new (\%opt_hash);
ok ($overlapmod);
# test cases
$string1 = 'this is our test case today';
$string2 = ' this is our test case today ';
$string3 = ' winston churchill winston churchill ';
$string4 = ' winston churchill';
$string5 = ' WINSTON CHURCHILL';
$string6 = ' our test case today is winston churchill';
$string7 = ' ';
# exact matching between two identical strings
$score = $overlapmod->getSimilarityStrings ($string1, $string1);
is ($score, 1, "self similarity of string1, normalized");
# differ only by spaces
$score = $overlapmod->getSimilarityStrings ($string1, $string2);
is ($score, 1, "similarity of string1 and string2, normalized");
# differ by half the number of words
$score = $overlapmod->getSimilarityStrings ($string3, $string4);
# answer is around .666
cmp_ok($score, '<', .7);
cmp_ok($score, '>', .6);
# differ due to case, but that is ignored
$score = $overlapmod->getSimilarityStrings ($string4, $string5);
is ($score, 1, "similarity of string5 and string6, normalized");
# partial match
$score = $overlapmod->getSimilarityStrings ($string1, $string6);
cmp_ok($score, '<', .8);
cmp_ok($score, '>', .7);
# test on empty string
$score = $overlapmod->getSimilarityStrings ($string7, $string7);
is ($score, 0, "empty string7, normalized");
# test on empty string with non-empty
$score = $overlapmod->getSimilarityStrings ($string7, $string1);
is ($score, 0, "empty string7 with string1, normalized");
# test on undefined strings
$score = $overlapmod->getSimilarityStrings ($string99, $string99);
is ($score, undef, "undefined string99, normalized");
# -----------------------------------------------------------------
# these results should NOT be normalized
%opt_hash = ('normalize' => 0);
$overlapmod = Text::Similarity::Overlaps->new (\%opt_hash);
ok ($overlapmod);
# exact matching between two identical strings
$score = $overlapmod->getSimilarityStrings ($string1, $string1);
is ($score, 6, "self similarity of string1, unnormalized");
# differ only by spaces
$score = $overlapmod->getSimilarityStrings ($string1, $string2);
is ($score, 6, "similarity of string1 and string2, unnormalized");
# differ by half the number of words
$score = $overlapmod->getSimilarityStrings ($string3, $string4);
is ($score, 2, "similarity of string1 and string2, unnormalized");
# differ due to case, but that is ignored
$score = $overlapmod->getSimilarityStrings ($string4, $string5);
is ($score, 2, "similarity of string5 and string6, unnormalized");
# partial match
$score = $overlapmod->getSimilarityStrings ($string1, $string6);
is ($score, 5, "similarity of string1 and string6, unnormalized");
# test on empty string
$score = $overlapmod->getSimilarityStrings ($string7, $string7);
is ($score, 0, "empty string7, unnormalized");
# test on empty string with non-empty
$score = $overlapmod->getSimilarityStrings ($string7, $string1);
is ($score, 0, "empty string7 with string1, unnormalized");
# test on undefined strings
$score = $overlapmod->getSimilarityStrings ($string99, $string99);
is ($score, undef, "undefined string99, unnormalized");