The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl t/text_similarity.t'
# Note that because of the file paths used this must be run from the 
# directory in which /t resides 
#
# Last modified by : $Id: text_similarity.t,v 1.1.1.1 2013/06/26 02:38:12 tpederse Exp $
#########################

# change 'tests => 1' to 'tests => last_test_to_print';

use Test::More tests => 28;

# set up file access in an OS neutral way
use File::Spec;

$text_similarity_pl = File::Spec->catfile ('bin','text_similarity.pl');
ok (-e $text_similarity_pl);

$stoplist_txt = File::Spec->catfile ('samples','stoplist-nsp.regex');
ok (-e $stoplist_txt);

$file1_txt = File::Spec->catfile ('t','file1.txt');
ok (-e $file1_txt);

$file11_txt = File::Spec->catfile ('t','file11.txt');
ok (-e $file11_txt);

$file2_txt = File::Spec->catfile ('t','file2.txt');
ok (-e $file2_txt);

$file22_txt = File::Spec->catfile ('t','file22.txt');
ok (-e $file22_txt);

# use this to find Text::Similarity::Overlaps module

$inc = "-Iblib/lib";

# ---------------------------------------------------------------------
# test default operation with two different files 

$output = `$^X $inc $text_similarity_pl --type Text::Similarity::Overlaps $file1_txt $file2_txt`; 
chomp $output;

# result is around .5

cmp_ok ($output, '>', .4);
cmp_ok ($output, '<', .6);

# ---------------------------------------------------------------------
# test two different files and no normalization

$output = `$^X $inc $text_similarity_pl --nonormalize --type Text::Similarity::Overlaps $file1_txt $file2_txt`; 
chomp $output;

is ($output, 40, "basic file comparison with nonormalize");

# ---------------------------------------------------------------------
# test two different files w normalization and stoplist

$output = `$^X $inc $text_similarity_pl --stoplist $stoplist_txt --type Text::Similarity::Overlaps $file1_txt $file2_txt`; 
chomp $output;

# result is around 

# result is around .5

cmp_ok ($output, '>', .4);
cmp_ok ($output, '<', .6);

# ---------------------------------------------------------------------
# test two different files and no normalization and stoplist

$output = `$^X $inc $text_similarity_pl --stoplist $stoplist_txt --nonormalize --type Text::Similarity::Overlaps $file1_txt $file2_txt`; 
chomp $output;

# result is around 

is ($output, 21, "basic file comparison with nonormalize and stoplist");

# ---------------------------------------------------------------------
# same tests as above, except use one file that has all content on one line
# ---------------------------------------------------------------------
# test default operation with two different files 

$output = `$^X $inc $text_similarity_pl --type Text::Similarity::Overlaps $file1_txt $file22_txt`; 
chomp $output;

# result is around .5

cmp_ok ($output, '>', .4);
cmp_ok ($output, '<', .6);

# ---------------------------------------------------------------------
# test two different files and no normalization

$output = `$^X $inc $text_similarity_pl --nonormalize --type Text::Similarity::Overlaps $file1_txt $file22_txt`; 
chomp $output;

is ($output, 40, "basic file comparison with nonormalize");

# ---------------------------------------------------------------------
# test two different files w normalization and stoplist

$output = `$^X $inc $text_similarity_pl --stoplist $stoplist_txt --type Text::Similarity::Overlaps $file1_txt $file22_txt`; 
chomp $output;

# result is around 

# result is around .5

cmp_ok ($output, '>', .4);
cmp_ok ($output, '<', .6);

# ---------------------------------------------------------------------
# test two different files and no normalization and stoplist

$output = `$^X $inc $text_similarity_pl --stoplist $stoplist_txt --nonormalize --type Text::Similarity::Overlaps $file1_txt $file22_txt`; 
chomp $output;

# result is around 

is ($output, 21, "basic file comparison with nonormalize and stoplist");

# ---------------------------------------------------------------------
# same tests as above, except both files have all content on one line
# ---------------------------------------------------------------------
# test default operation with two different files 

$output = `$^X $inc $text_similarity_pl --type Text::Similarity::Overlaps $file11_txt $file22_txt`; 
chomp $output;

# result is around .5

cmp_ok ($output, '>', .4);
cmp_ok ($output, '<', .6);

# ---------------------------------------------------------------------
# test two different files and no normalization

$output = `$^X $inc $text_similarity_pl --nonormalize --type Text::Similarity::Overlaps $file11_txt $file22_txt`; 
chomp $output;

is ($output, 40, "basic file comparison with nonormalize");

# ---------------------------------------------------------------------
# test two different files w normalization and stoplist

$output = `$^X $inc $text_similarity_pl --stoplist $stoplist_txt --type Text::Similarity::Overlaps $file11_txt $file22_txt`; 
chomp $output;

# result is around 

# result is around .5

cmp_ok ($output, '>', .4);
cmp_ok ($output, '<', .6);

# ---------------------------------------------------------------------
# test two different files and no normalization and stoplist

$output = `$^X $inc $text_similarity_pl --stoplist $stoplist_txt --nonormalize --type Text::Similarity::Overlaps $file11_txt $file22_txt`; 
chomp $output;

# result is around 

is ($output, 21, "basic file comparison with nonormalize and stoplist");


# ---------------------------------------------------------------------
# same tests as above, except files are identical
# ---------------------------------------------------------------------
# test default operation with two different $files 

$output = `$^X $inc $text_similarity_pl --type Text::Similarity::Overlaps $file1_txt $file1_txt`; 
chomp $output;

# result is 1 

is ($output, 1, "test on identical files");

# ---------------------------------------------------------------------
# test two different files and no normalization

$output = `$^X $inc $text_similarity_pl --nonormalize --type Text::Similarity::Overlaps $file1_txt $file1_txt`; 
chomp $output;

is ($output, 80, "basic file comparison with nonormalize on identical files");

# ---------------------------------------------------------------------
# test two different files w normalization and stoplist

$output = `$^X $inc $text_similarity_pl --stoplist $stoplist_txt --type Text::Similarity::Overlaps $file1_txt $file1_txt`; 
chomp $output;

# result is 1

is ($output, 1, "test on identical files w stoplist");

# ---------------------------------------------------------------------
# test two different files and no normalization and stoplist

$output = `$^X $inc $text_similarity_pl --stoplist $stoplist_txt --nonormalize --type Text::Similarity::Overlaps $file1_txt $file1_txt`; 
chomp $output;

# result is around 

is ($output, 44, "basic file comparison with nonormalize and stoplist");