#!/usr/bin/perl
use strict;
use warnings;
use Test::More tests => 61;
use lib 't/lib';
use Utils qw/is_between compare_hash_by_ranges/;
use Benchmark;
use Statistics::Descriptive;
{
# test #1
my $stat = Statistics::Descriptive::Full->new();
my @results = $stat->least_squares_fit();
# TEST
ok (!scalar(@results), "Least-squares results on a non-filled object are empty.");
# test #2
# data are y = 2*x - 1
$stat->add_data( 1, 3, 5, 7 );
@results = $stat->least_squares_fit();
# TEST
is_deeply (
[@results[0..1]],
[-1, 2],
"least_squares_fit returns the correct result."
);
}
{
# test #3
# test error condition on harmonic mean : one element zero
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data( 1.1, 2.9, 4.9, 0.0 );
my $single_result = $stat->harmonic_mean();
# TEST
ok (!defined($single_result),
"harmonic_mean is undefined if there's a 0 datum."
);
}
{
# test #4
# test error condition on harmonic mean : sum of elements zero
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data( 1.0, -1.0 );
my $single_result = $stat->harmonic_mean();
# TEST
ok (!defined($single_result),
"harmonic_mean is undefined if the sum of the reciprocals is zero."
);
}
{
# test #5
# test error condition on harmonic mean : sum of elements near zero
my $stat = Statistics::Descriptive::Full->new();
local $Statistics::Descriptive::Tolerance = 0.1;
$stat->add_data( 1.01, -1.0 );
my $single_result = $stat->harmonic_mean();
# TEST
ok (! defined( $single_result ),
"test error condition on harmonic mean : sum of elements near zero"
);
}
{
# test #6
# test normal function of harmonic mean
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data( 1,2,3 );
my $single_result = $stat->harmonic_mean();
# TEST
ok (scalar(abs( $single_result - 1.6363 ) < 0.001),
"test normal function of harmonic mean",
);
}
{
# test #7
# test stringification of hash keys in frequency distribution
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(0.1,
0.15,
0.16,
1/3);
my %f = $stat->frequency_distribution(2);
# TEST
compare_hash_by_ranges(
\%f,
[[0.216666,0.216667,3],[0.3333,0.3334,1]],
"Test stringification of hash keys in frequency distribution",
);
# test #8
##Test memorization of last frequency distribution
my %g = $stat->frequency_distribution();
# TEST
is_deeply(
\%f,
\%g,
"memorization of last frequency distribution"
);
}
{
# test #9
# test the frequency distribution with specified bins
my $stat = Statistics::Descriptive::Full->new();
my @freq_bins=(20,40,60,80,100);
$stat->add_data(23.92,
32.30,
15.27,
39.89,
8.96,
40.71,
16.20,
34.61,
27.98,
74.40);
my %f = $stat->frequency_distribution(\@freq_bins);
# TEST
is_deeply(
\%f,
{
20 => 3,
40 => 5,
60 => 1,
80 => 1,
100 => 0,
},
"Test the frequency distribution with specified bins"
);
}
{
# test #10 and #11
# Test the percentile function and caching
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(-5,-2,4,7,7,18);
##Check algorithm
# TEST
is ($stat->percentile(50),
4,
"percentile function and caching - 1",
);
# TEST
is ($stat->percentile(25),
-2,
"percentile function and caching - 2",
);
}
{
# tests #12 and #13
# Check correct parsing of method parameters
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(1,2,3,4,5,6,7,8,9,10);
# TEST
is(
$stat->trimmed_mean(0.1,0.1),
$stat->trimmed_mean(0.1),
"correct parsing of method parameters",
);
# TEST
is ($stat->trimmed_mean(0.1,0),
6,
"correct parsing of method parameters - 2",
);
}
{
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data((0.001) x 6);
# TEST
is_between ($stat->variance(),
0,
0.00001,
"Workaround to avoid rounding errors that yield negative variance."
);
# TEST
is_between ($stat->standard_deviation(),
0,
0.00001,
"Workaround to avoid rounding errors that yield negative std-dev."
);
}
{
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(1, 2, 3, 5);
# TEST
is ($stat->count(),
4,
"There are 4 elements."
);
# TEST
is ($stat->sum(),
11,
"The sum is 11",
);
# TEST
is ($stat->sumsq(),
39,
"The sum of squares is 39"
);
# TEST
is ($stat->min(),
1,
"The minimum is 1."
);
# TEST
is ($stat->max(),
5,
"The maximum is 5."
);
}
{
# test #9
# test the frequency distribution with specified bins
my $stat = Statistics::Descriptive::Full->new();
my @freq_bins=(20,40,60,80,100);
$stat->add_data(23.92,
32.30,
15.27,
39.89,
8.96,
40.71,
16.20,
34.61,
27.98,
74.40);
my $f_d = $stat->frequency_distribution_ref(\@freq_bins);
# TEST
is_deeply(
$f_d,
{
20 => 3,
40 => 5,
60 => 1,
80 => 1,
100 => 0,
},
"Test the frequency distribution returned as a scalar reference"
);
}
{
# test #9
# test the frequency distribution with specified bins
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(2, 4, 8);
# TEST
is_between(
$stat->geometric_mean(),
(4-1e-4),
(4+1e-4),
"Geometric Mean Test #1",
)
}
{
my $stat = Statistics::Descriptive::Full->new();
my $expected;
$stat->add_data(1 .. 9, 100);
# TEST
$expected = 3.11889574523909;
is_between ($stat->skewness(),
$expected - 1E-13,
$expected + 1E-13,
"Skewness of $expected +/- 1E-13"
);
# TEST
$expected = 9.79924471616366;
is_between ($stat->kurtosis(),
$expected - 1E-13,
$expected + 1E-13,
"Kurtosis of $expected +/- 1E-13"
);
$stat->add_data(100 .. 110);
# now check that cached skew and kurt values are recalculated
# TEST
$expected = -0.306705104889384;
is_between ($stat->skewness(),
$expected - 1E-13,
$expected + 1E-13,
"Skewness of $expected +/- 1E-13"
);
# TEST
$expected = -2.09839497356215;
is_between ($stat->kurtosis(),
$expected - 1E-13,
$expected + 1E-13,
"Kurtosis of $expected +/- 1E-13"
);
}
{
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data(1,2);
my $def;
# TEST
$def = defined $stat->skewness() ? 1 : 0;
is ($def,
0,
'Skewness is undef for 2 samples'
);
$stat->add_data (1);
# TEST
$def = defined $stat->kurtosis() ? 1 : 0;
is ($def,
0,
'Kurtosis is undef for 3 samples'
);
}
{
# This is a fix for:
# https://rt.cpan.org/Ticket/Display.html?id=72495
# Thanks to Robert Messer
my $stat = Statistics::Descriptive::Full->new();
my $ret = $stat->percentile(100);
# TEST
ok (!defined($ret), 'Returns undef and does not die.');
}
# test stats when no data have been added
{
my $stat = Statistics::Descriptive::Full->new();
my ($result, $str);
# An accessor method for _permitted would be handy,
# or one to get all the stats methods
my @methods = qw {
mean sum variance standard_deviation
min mindex max maxdex sample_range
skewness kurtosis median
harmonic_mean geometric_mean
mode least_squares_fit
percentile frequency_distribution
};
# least_squares_fit is handled in an earlier test, so is actually a duplicate here
#diag 'Results are undef when no data added';
# need to update next line when new methods are tested here
# TEST:$method_count=18
foreach my $method (sort @methods) {
$result = $stat->$method;
# TEST*$method_count
ok (!defined ($result), "$method is undef when object has no data.");
}
# quantile and trimmed_mean require valid args, so don't test in the method loop
my $method = 'quantile';
$result = $stat->$method(1);
# TEST
ok (!defined ($result), "$method is undef when object has no data.");
$method = 'trimmed_mean';
$result = $stat->$method(0.1);
# TEST
ok (!defined ($result), "$method is undef when object has no data.");
}
# test SD when only one value added
{
my $stat = Statistics::Descriptive::Full->new();
$stat->add_data( 1 );
my $result = $stat->standard_deviation();
# TEST
ok ($result == 0, "SD is zero when object has one record.");
}
# Test function returns undef in list context when no data added.
# The test itself is almost redundant.
# Fixes https://rt.cpan.org/Ticket/Display.html?id=74890
{
my $stat = Statistics::Descriptive::Full->new();
# TEST
is_deeply(
[ $stat->median(), ],
[ undef() ],
"->median() Returns undef in list-context.",
);
# TEST
is_deeply(
[ $stat->standard_deviation(), ],
[ undef() ],
"->standard_deviation() Returns undef in list-context.",
);
}
{
my $stats = Statistics::Descriptive::Full->new();
$stats->add_data_with_samples([{1 => 10}, {2 => 20}, {3 => 30}, {4 => 40}, {5 => 50}]);
# TEST
is_deeply(
$stats->_data(),
[ 1, 2, 3, 4, 5 ],
'add_data_with_samples: data set is correct',
);
# TEST
is_deeply(
$stats->_samples(),
[ 10, 20, 30, 40, 50 ],
'add_data_with_samples: samples are correct',
);
}
# Tests for mindex and maxdex on unsorted data,
# including when new data are added which should not change the values
{
my $stats_class = 'Statistics::Descriptive::Full';
my $stat1 = $stats_class->new();
my @data1 = (20, 1 .. 3, 100, 1..5);
my @data2 = (25, 30);
my $e_maxdex = 4;
my $e_mindex = 1;
$stat1->add_data(@data1); # initialise
# TEST*2
is ($stat1->mindex, $e_mindex, "initial mindex is correct");
is ($stat1->maxdex, $e_maxdex, "initial maxdex is correct");
# TEST*2
$stat1->add_data(@data2); # add new data
is ($stat1->mindex, $e_mindex, "mindex is correct after new data added");
is ($stat1->maxdex, $e_maxdex, "maxdex is correct after new data added");
# TEST*2
$stat1->median; # trigger a sort
$e_maxdex = scalar @data1 + scalar @data2 - 1;
is ($stat1->mindex, 0, "mindex is correct after sorting");
is ($stat1->maxdex, $e_maxdex, "maxdex is correct after sorting");
}
# what happens when we add new data?
# Recycle the same data so mean, sd etc remain the same
{
my $stats_class = 'Statistics::Descriptive::Full';
my $stat1 = $stats_class->new();
my $stat2 = $stats_class->new();
my @data1 = (1 .. 9, 100);
my @data2 = (100 .. 110);
# sample of methods
my @methods = qw /mean standard_deviation count skewness kurtosis median/;
$stat1->add_data(@data1); # initialise
foreach my $meth (@methods) { # run some methods
$stat1->$meth;
}
$stat1->add_data(@data2); # add new data
foreach my $meth (@methods) { # re-run some methods
$stat1->$meth;
}
$stat2->add_data(@data1, @data2); # initialise with all data
foreach my $meth (@methods) { # run some methods
$stat2->$meth;
}
# TEST
is_deeply (
$stat1,
$stat2,
'stats consistent after adding new data',
);
}