@@ -3,7 +3,7 @@ use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
require Exporter;
-$VERSION = '3.2';
+$VERSION = '3.6';
@ISA = qw(Exporter);
@EXPORT = ();
@EXPORT_OK = qw(min max range sum count mean median mode variance stddev variancep stddevp statshash statsinfo frequencies);
@@ -14,69 +14,83 @@ $VERSION = '3.2';
stats => [qw<statshash statsinfo>],
);
+sub definedvals
+{
+ return grep{defined}@_;
+}
+
sub count
-{ return scalar @_; }
+{
+ return scalar definedvals @_;
+}
sub min
-{
- return unless @_;
- return $_[0] unless @_ > 1;
- my $min= shift;
- foreach(@_) { $min= $_ if $_ < $min; }
+{
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
+ my $min= shift @data;
+ foreach(@data) { $min= $_ if $_ < $min; }
return $min;
}
sub max
{
- return unless @_;
- return $_[0] unless @_ > 1;
- my $max= shift;
- foreach(@_) { $max= $_ if $_ > $max; }
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
+ my $max= shift @data;
+ foreach(@data) { $max= $_ if $_ > $max; }
return $max;
}
sub range
{
- return unless @_;
- return 0 unless @_ > 1;
- return abs($_[1]-$_[0]) unless @_ > 2;
- my $min= shift; my $max= $min;
- foreach(@_) { $min= $_ if $_ < $min; $max= $_ if $_ > $max; }
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ return abs($data[1]-$data[0]) unless @data > 2;
+ my $min= shift @data; my $max= $min;
+ foreach(@data) { $min= $_ if $_ < $min; $max= $_ if $_ > $max; }
return $max - $min;
}
sub sum
{
- return unless @_;
- return $_[0] unless @_ > 1;
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
my $sum;
- foreach(@_) { $sum+= $_; }
+ foreach(@data) { $sum+= $_; }
return $sum;
}
sub mean
{
- return unless @_;
- return $_[0] unless @_ > 1;
- return sum(@_)/scalar(@_);
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
+ return sum(@data)/scalar(@data);
}
sub median
{
- return unless @_;
- return $_[0] unless @_ > 1;
- @_= sort{$a<=>$b}@_;
- return $_[$#_/2] if @_&1;
- my $mid= @_/2;
- return ($_[$mid-1]+$_[$mid])/2;
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
+ @data= sort{$a<=>$b}@data;
+ return $data[$#data/2] if @data&1;
+ my $mid= @data/2;
+ return ($data[$mid-1]+$data[$mid])/2;
}
sub mode
{
- return unless @_;
- return $_[0] unless @_ > 1;
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
my %count;
- foreach(@_) { $count{$_}++; }
+ foreach(@data) { $count{$_}++; }
my $maxhits= max(values %count);
foreach(keys %count) { delete $count{$_} unless $count{$_} == $maxhits; }
return mean(keys %count);
@@ -84,60 +98,65 @@ sub mode
sub variance
{
- return unless @_;
- return 0 unless @_ > 1;
- my $mean= mean @_;
- return (sum map { ($_ - $mean)**2 } @_) / $#_;
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ my $mean= mean @data;
+ return (sum map { ($_ - $mean)**2 } @data) / $#data;
}
sub variancep
{
- return unless @_;
- return 0 unless @_ > 1;
- my $mean= mean @_;
- return (sum map { ($_ - $mean)**2 } @_) / ( $#_ +1 );
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ my $mean= mean @data;
+ return (sum map { ($_ - $mean)**2 } @data) / ( $#data +1 );
}
sub stddev
{
- return unless @_;
- return 0 unless @_ > 1;
- return sqrt variance @_;
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ return sqrt variance @data;
}
sub stddevp
{
- return unless @_;
- return 0 unless @_ > 1;
- return sqrt variancep @_;
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ return sqrt variancep @data;
}
sub statshash
{
- return unless @_;
+ my @data = definedvals @_;
+ return unless @data;
return
(
count => 1,
- min => $_[0],
- max => $_[0],
+ min => $data[0],
+ max => $data[0],
range => 0,
- sum => $_[0],
- mean => $_[0],
- median => $_[0],
- mode => $_[0],
+ sum => $data[0],
+ mean => $data[0],
+ median => $data[0],
+ mode => $data[0],
variance => 0,
stddev => 0,
variancep => 0,
stddevp => 0
- ) unless @_ > 1;
- my $count= scalar(@_);
- @_= sort{$a<=>$b}@_;
+ ) unless @data > 1;
+ my $count= scalar(@data);
+ @data= sort{$a<=>$b}@data;
my $median;
- if(@_&1) { $median= $_[$#_/2]; }
- else { my $mid= @_/2; $median= ($_[$mid-1]+$_[$mid])/2; }
+ if(@data&1) { $median= $data[$#data/2]; }
+ else { my $mid= @data/2; $median= ($data[$mid-1]+$data[$mid])/2; }
my $sum= 0;
my %count;
- foreach(@_) { $sum+= $_; $count{$_}++; }
+ foreach(@data) { $sum+= $_; $count{$_}++; }
my $mean= $sum/$count;
my $maxhits= max(values %count);
foreach(keys %count)
@@ -145,17 +164,17 @@ sub statshash
return
(
count => $count,
- min => $_[0],
- max => $_[-1],
- range => ($_[-1] - $_[0]),
+ min => $data[0],
+ max => $data[-1],
+ range => ($data[-1] - $data[0]),
sum => $sum,
mean => $mean,
median => $median,
mode => mean(keys %count),
- variance => variance(@_),
- stddev => stddev(@_),
- variancep => variancep(@_),
- stddevp => stddevp(@_)
+ variance => variance(@data),
+ stddev => stddev(@data),
+ variancep => variancep(@data),
+ stddevp => stddevp(@data)
);
}
@@ -180,10 +199,11 @@ stddevp = $stats{stddevp}
sub frequencies
{
- return unless @_;
- return ( $_[0], 1 ) unless @_ > 1;
+ my @data = definedvals @_;
+ return unless @data;
+ return ( $data[0], 1 ) unless @data > 1;
my %count;
- foreach(@_) { $count{$_}++; }
+ foreach(@data) { $count{$_}++; }
return %count;
}
@@ -217,13 +237,9 @@ This is also a module for dilettantes.
When you just want something to give some very basic, high-school-level statistical values,
without having to set up and populate an object first, this module may be useful.
-=over 6
-
=head2 NOTE
-This version now implements standard deviation and variance calculated by both the unbiased and biased estimators.
-
-=back
+This module implements standard deviation and variance calculated by both the unbiased and biased estimators.
=head1 FUNCTIONS
@@ -232,21 +248,27 @@ This version now implements standard deviation and variance calculated by both t
=item C<min(@data)>, C<max(@data)>, C<range(@data)>, C<sum(@data)>, C<count(@data)>
Return the minimum value, maximum value, range (max - min),
-sum, or count of values in C<@data>.
-(Count simply returns C<scalar(@data)>.)
+sum, or count of values in C<@data>. Undefined values are ignored.
+(Count simply returns C<scalar(@data)>. B<Please note> that this module does not ignore undefined values in your
+data; instead those are treated as zero.)
=item C<mean(@data)>, C<median(@data)>, C<mode(@data)>
-Calculates the mean, median, or mode average of the values in C<@data>.
+Calculates the mean, median, or mode average of the values in C<@data>. Undefined values are ignored.
(In the event of ties in the mode average, their mean is returned.)
=item C<variance(@data)>, C<stddev(@data)>
Return the standard deviation or variance of C<@data> for a sample (same as Excel's STDEV).
+This is also called the Unbiased Sample Variance and involves dividing the
+sample's squared deviations by N-1 (the sample count minus 1).
+The standard deviation is just the square root of the variance.
=item C<variancep(@data)>, C<stddevp(@data)>
Return the standard deviation or variance of C<@data> for the population (same as Excel's STDEVP).
+This involves dividing the squared deviations of the population by N (the population size).
+The standard deviation is just the square root of the variance.
=item C<statshash(@data)>
@@ -271,11 +293,27 @@ current namespace (use with caution).
To import the individual statistical funcitons, use the import tag C<:funcs>;
use C<:stats> to import C<statshash(@data)> and C<statsinfo(@data)>.
+=head1 REPOSITORY
+
+L<https://github.com/brianary/Statistics-Lite>
+
=head1 AUTHOR
Brian Lalonde E<lt>brian@webcoder.infoE<gt>,
C<stddev(@data)>, C<stddevp(@data)>, C<variance(@data)>, C<variancep(@data)>,
-and additional motivation by Nathan Haigh.
+additional motivation by Nathan Haigh, with kind support from Alexander Zangerl.
+
+The project lives at https://github.com/brianary/Statistics-Lite
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2000 Brian Lalonde E<lt>brian@webcoder.infoE<gt>, Nathan Haigh,
+Alexander Zangerl, and Ton Voon.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
=head1 SEE ALSO
@@ -1,7 +1,7 @@
-#!perl
+#!/usr/bin/perl
use strict;
use warnings;
-use Test::More tests => 28;
+use Test::More tests => 60;
BEGIN { use_ok( 'Statistics::Lite', ':all' ); }
@@ -16,13 +16,46 @@ is(max(1,2,3), 3, "call max - functional interface");
is(range(1,2,3), 2, "call range - functional interface");
is(sum(1,2,3), 6, "call sum - functional interface");
is(count(1,2,3), 3, "call count - functional interface");
+is(count(undef,1,2,3), 3, "call count with undef - functional interface");
is(mean(1,2,3), 2, "call mean - functional interface");
is(median(1,2,3), 2, "call median - functional interface");
+is(median(2,4,6,8), 5, "call median with even number of values - functional interface");
is(mode(1,2,3), 2, "call mode - functional interface");
-is(variance(1,2,3), 1, "call variance - functional interface");
-is(stddev(1,2,3), 1, "call stddev - functional interface");
+is(min(1,-5,8), -5, "call min with negative numbers" );
+is(range(-6,-9), 3, "call range with negative values" );
+is(range(6,-9), 15, "call range with data crossing 0" );
+# undef checking
+is(min(undef), undef, "call min with only single undefined value" );
+is(max(undef), undef, "call max with only single undefined value" );
+is(min(), undef, "call min without values" );
+is(max(), undef, "call max without values" );
+is(min(6,undef,10), 6, "call min with undefined value" );
+is(max(-6,-10,undef), -6, "call max with undefined value" );
+is(min(undef, 7, -5), -5, "call min with initial undefined value" );
+is(max(undef, 7, -5), 7, "call max with initial undefined value" );
+is(min(undef,undef,undef), undef, "call min with only undefined values" );
+is(max(undef,undef,undef), undef, "call max with only undefined values" );
+is(count(undef, 7, -5), 2, "call count with undefined value" );
+is(sum(undef, 7, -5), 2, "call sum with undefined value" );
+is(mean(undef, 7, -5), 1, "call mean with undefined value" );
+is(count(undef,undef,undef), 0, "call count with only undefined values" );
+is(mean(undef,undef,undef), undef, "call mean with only undefined values" );
+is(range(6,9,undef), 3, "call range with undefined value" );
+is(range(undef,6,9), 3, "call range with leading undefined value" );
+is(range(undef,undef,undef,7), 0, "call range with single defined value" );
+is(range(undef,undef,undef), undef, "call range with only undefined values" );
+
+# unbiased sample test
+my @values = (3, -10, 8, undef, 7, undef, 8, 3, 6, 3);
+is(mean(@values), 3.5, "call unbiased sample set mean" );
+is(median(@values), 4.5, "call unbiased sample set median" );
+is(mode(@values), 3, "call unbiased sample set mode" );
+is(variance(1,2,3), 1, "call unbiased sample set variance");
+is(stddev(1,2,3), 1, "call unbiased sample set standard deviation");
+
+# population sample test
is(variancep(2,4,2,4), 1, "call variancep - functional interface");
is(stddevp(2,4,2,4), 1, "call stddevp - functional interface");
@@ -41,12 +74,21 @@ is($stats{mode}, 2, "call mode - hash-based interface");
is($stats{variance}, 1, "call variance - hash-based interface");
is($stats{stddev}, 1, "call stddev - hash-based interface");
+# a tiny bit more substantial data set
+%stats = statshash(0..10,1);
+is($stats{sum},56,"call sum - hash-based");
+is($stats{mean},4+2/3,"call mean - hash-based");
+is($stats{variance},11+1/3,"call variance - hash-based");
+is($stats{variancep},10.3+8/90,"call variancep - hash-based");
+
%stats= statshash(2,4,2,4);
ok($stats{variancep}, "call variancep - hash-based interface");
ok($stats{stddevp}, "call stddevp - hash-based interface");
%stats= frequencies(1,2,3,3);
-is($stats{1},1, "frequencies matched correctly");
-is($stats{2},1, "frequencies matched correctly");
-is($stats{3},2, "frequencies matched correctly");
+is($stats{1}, 1, "frequencies matched correctly for 1");
+is($stats{2}, 1, "frequencies matched correctly for 2");
+is($stats{3}, 2, "frequencies matched correctly for 3");
+is($stats{4}, undef, "frequencies matched correctly for 4");
+