@@ -1,23 +1,35 @@
Revision history for Perl extension Statistics::Lite.
-0.01 Thu Sep 21 09:56:43 2000
- - original version; created by h2xs 1.19
-0.05 Mon Jan 22 08:13:00 2001
- - various optimizations
-1.00 Thu Jan 10 14:25:38 2002
- - fixed median algorithm
-1.01 Thu Jan 10 14:59:52 2002
- - bugfix version
-1.02 Fri Nov 4 12:25:00 2005
- - expanded explanation of limitations
-
- ... time passed, people laughed and loved and sat, and we all learned something about the true meaning of New Year's Day ...
-
-3.00 Sat Mar 3 16:48:00 2007
- - after much delay, stddev has been restored to the sample or biased version of the formula; if you need the unbiased version, use stddev(0,@data)
- - added frequencies() function from Nathan Haigh
+3.61 Fri 27 Feb 2015 01:14:44 PM PST
+ - Upgraded ExtUtils::MakeMaker and repackaged (no functionality changes)
+3.6 Sun 22 Feb 2015 09:59:02 PM PST
+ - Added github repo to dist metadata and pod
+3.5 Sat 21 Feb 2015 11:38:52 PM PST
+ - Exclude undefined values, per CPAN bug # 50448
+3.4 Mon Feb 16 10:04:50 2015
+ - documentation fixes and additional tests
+ - now version controlled! https://github.com/brianary/Statistics-Lite
+3.3 Fri Feb 13 10:03:45 2015
+ - added license info
+3.2 Sun Jun 24 11:13:00 2007
+ - updated tests
3.1
- fixed and renamed biased (population) versions
- added unbiased versions
-3.2 Sun Jun 24 11:13:00 2007
- - updated tests
+3.00 Sat Mar 3 16:48:00 2007
+ - after much delay, stddev has been restored to the sample or biased version of the formula; if you need the unbiased version, use stddev(0,@data)
+ - added frequencies() function from Nathan Haigh
+
+ ... time passed, people laughed and loved and sat, and we all learned something about the true meaning of New Year's Day ...
+
+1.02 Fri Nov 4 12:25:00 2005
+ - expanded explanation of limitations
+1.01 Thu Jan 10 14:59:52 2002
+ - bugfix version
+1.00 Thu Jan 10 14:25:38 2002
+ - fixed median algorithm
+0.05 Mon Jan 22 08:13:00 2001
+ - various optimizations
+0.01 Thu Sep 21 09:56:43 2000
+ - original version; created by h2xs 1.19
+
@@ -3,7 +3,7 @@ use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
require Exporter;
-$VERSION = '3.2';
+$VERSION = '3.61';
@ISA = qw(Exporter);
@EXPORT = ();
@EXPORT_OK = qw(min max range sum count mean median mode variance stddev variancep stddevp statshash statsinfo frequencies);
@@ -14,69 +14,83 @@ $VERSION = '3.2';
stats => [qw<statshash statsinfo>],
);
+sub definedvals
+{
+ return grep{defined}@_;
+}
+
sub count
-{ return scalar @_; }
+{
+ return scalar definedvals @_;
+}
sub min
-{
- return unless @_;
- return $_[0] unless @_ > 1;
- my $min= shift;
- foreach(@_) { $min= $_ if $_ < $min; }
+{
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
+ my $min= shift @data;
+ foreach(@data) { $min= $_ if $_ < $min; }
return $min;
}
sub max
{
- return unless @_;
- return $_[0] unless @_ > 1;
- my $max= shift;
- foreach(@_) { $max= $_ if $_ > $max; }
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
+ my $max= shift @data;
+ foreach(@data) { $max= $_ if $_ > $max; }
return $max;
}
sub range
{
- return unless @_;
- return 0 unless @_ > 1;
- return abs($_[1]-$_[0]) unless @_ > 2;
- my $min= shift; my $max= $min;
- foreach(@_) { $min= $_ if $_ < $min; $max= $_ if $_ > $max; }
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ return abs($data[1]-$data[0]) unless @data > 2;
+ my $min= shift @data; my $max= $min;
+ foreach(@data) { $min= $_ if $_ < $min; $max= $_ if $_ > $max; }
return $max - $min;
}
sub sum
{
- return unless @_;
- return $_[0] unless @_ > 1;
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
my $sum;
- foreach(@_) { $sum+= $_; }
+ foreach(@data) { $sum+= $_; }
return $sum;
}
sub mean
{
- return unless @_;
- return $_[0] unless @_ > 1;
- return sum(@_)/scalar(@_);
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
+ return sum(@data)/scalar(@data);
}
sub median
{
- return unless @_;
- return $_[0] unless @_ > 1;
- @_= sort{$a<=>$b}@_;
- return $_[$#_/2] if @_&1;
- my $mid= @_/2;
- return ($_[$mid-1]+$_[$mid])/2;
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
+ @data= sort{$a<=>$b}@data;
+ return $data[$#data/2] if @data&1;
+ my $mid= @data/2;
+ return ($data[$mid-1]+$data[$mid])/2;
}
sub mode
{
- return unless @_;
- return $_[0] unless @_ > 1;
+ my @data = definedvals @_;
+ return unless @data;
+ return $data[0] unless @data > 1;
my %count;
- foreach(@_) { $count{$_}++; }
+ foreach(@data) { $count{$_}++; }
my $maxhits= max(values %count);
foreach(keys %count) { delete $count{$_} unless $count{$_} == $maxhits; }
return mean(keys %count);
@@ -84,60 +98,65 @@ sub mode
sub variance
{
- return unless @_;
- return 0 unless @_ > 1;
- my $mean= mean @_;
- return (sum map { ($_ - $mean)**2 } @_) / $#_;
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ my $mean= mean @data;
+ return (sum map { ($_ - $mean)**2 } @data) / $#data;
}
sub variancep
{
- return unless @_;
- return 0 unless @_ > 1;
- my $mean= mean @_;
- return (sum map { ($_ - $mean)**2 } @_) / ( $#_ +1 );
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ my $mean= mean @data;
+ return (sum map { ($_ - $mean)**2 } @data) / ( $#data +1 );
}
sub stddev
{
- return unless @_;
- return 0 unless @_ > 1;
- return sqrt variance @_;
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ return sqrt variance @data;
}
sub stddevp
{
- return unless @_;
- return 0 unless @_ > 1;
- return sqrt variancep @_;
+ my @data = definedvals @_;
+ return unless @data;
+ return 0 unless @data > 1;
+ return sqrt variancep @data;
}
sub statshash
{
- return unless @_;
+ my @data = definedvals @_;
+ return unless @data;
return
(
count => 1,
- min => $_[0],
- max => $_[0],
+ min => $data[0],
+ max => $data[0],
range => 0,
- sum => $_[0],
- mean => $_[0],
- median => $_[0],
- mode => $_[0],
+ sum => $data[0],
+ mean => $data[0],
+ median => $data[0],
+ mode => $data[0],
variance => 0,
stddev => 0,
variancep => 0,
stddevp => 0
- ) unless @_ > 1;
- my $count= scalar(@_);
- @_= sort{$a<=>$b}@_;
+ ) unless @data > 1;
+ my $count= scalar(@data);
+ @data= sort{$a<=>$b}@data;
my $median;
- if(@_&1) { $median= $_[$#_/2]; }
- else { my $mid= @_/2; $median= ($_[$mid-1]+$_[$mid])/2; }
+ if(@data&1) { $median= $data[$#data/2]; }
+ else { my $mid= @data/2; $median= ($data[$mid-1]+$data[$mid])/2; }
my $sum= 0;
my %count;
- foreach(@_) { $sum+= $_; $count{$_}++; }
+ foreach(@data) { $sum+= $_; $count{$_}++; }
my $mean= $sum/$count;
my $maxhits= max(values %count);
foreach(keys %count)
@@ -145,17 +164,17 @@ sub statshash
return
(
count => $count,
- min => $_[0],
- max => $_[-1],
- range => ($_[-1] - $_[0]),
+ min => $data[0],
+ max => $data[-1],
+ range => ($data[-1] - $data[0]),
sum => $sum,
mean => $mean,
median => $median,
mode => mean(keys %count),
- variance => variance(@_),
- stddev => stddev(@_),
- variancep => variancep(@_),
- stddevp => stddevp(@_)
+ variance => variance(@data),
+ stddev => stddev(@data),
+ variancep => variancep(@data),
+ stddevp => stddevp(@data)
);
}
@@ -180,10 +199,11 @@ stddevp = $stats{stddevp}
sub frequencies
{
- return unless @_;
- return ( $_[0], 1 ) unless @_ > 1;
+ my @data = definedvals @_;
+ return unless @data;
+ return ( $data[0], 1 ) unless @data > 1;
my %count;
- foreach(@_) { $count{$_}++; }
+ foreach(@data) { $count{$_}++; }
return %count;
}
@@ -217,13 +237,9 @@ This is also a module for dilettantes.
When you just want something to give some very basic, high-school-level statistical values,
without having to set up and populate an object first, this module may be useful.
-=over 6
-
=head2 NOTE
-This version now implements standard deviation and variance calculated by both the unbiased and biased estimators.
-
-=back
+This module implements standard deviation and variance calculated by both the unbiased and biased estimators.
=head1 FUNCTIONS
@@ -232,21 +248,27 @@ This version now implements standard deviation and variance calculated by both t
=item C<min(@data)>, C<max(@data)>, C<range(@data)>, C<sum(@data)>, C<count(@data)>
Return the minimum value, maximum value, range (max - min),
-sum, or count of values in C<@data>.
-(Count simply returns C<scalar(@data)>.)
+sum, or count of values in C<@data>. Undefined values are ignored.
+(Count simply returns C<scalar(@data)>. B<Please note> that this module does not ignore undefined values in your
+data; instead those are treated as zero.)
=item C<mean(@data)>, C<median(@data)>, C<mode(@data)>
-Calculates the mean, median, or mode average of the values in C<@data>.
+Calculates the mean, median, or mode average of the values in C<@data>. Undefined values are ignored.
(In the event of ties in the mode average, their mean is returned.)
=item C<variance(@data)>, C<stddev(@data)>
Return the standard deviation or variance of C<@data> for a sample (same as Excel's STDEV).
+This is also called the Unbiased Sample Variance and involves dividing the
+sample's squared deviations by N-1 (the sample count minus 1).
+The standard deviation is just the square root of the variance.
=item C<variancep(@data)>, C<stddevp(@data)>
Return the standard deviation or variance of C<@data> for the population (same as Excel's STDEVP).
+This involves dividing the squared deviations of the population by N (the population size).
+The standard deviation is just the square root of the variance.
=item C<statshash(@data)>
@@ -271,11 +293,27 @@ current namespace (use with caution).
To import the individual statistical funcitons, use the import tag C<:funcs>;
use C<:stats> to import C<statshash(@data)> and C<statsinfo(@data)>.
+=head1 REPOSITORY
+
+L<https://github.com/brianary/Statistics-Lite>
+
=head1 AUTHOR
Brian Lalonde E<lt>brian@webcoder.infoE<gt>,
C<stddev(@data)>, C<stddevp(@data)>, C<variance(@data)>, C<variancep(@data)>,
-and additional motivation by Nathan Haigh.
+additional motivation by Nathan Haigh, with kind support from Alexander Zangerl.
+
+The project lives at https://github.com/brianary/Statistics-Lite
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2000 Brian Lalonde E<lt>brian@webcoder.infoE<gt>, Nathan Haigh,
+Alexander Zangerl, and Ton Voon.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
=head1 SEE ALSO
@@ -1,6 +1,7 @@
-Changes
-Lite.pm
-Makefile.PL
-MANIFEST
-test.pl
-META.yml Module meta-data (added by MakeMaker)
+Changes
+Lite.pm
+Makefile.PL
+MANIFEST
+test.pl
+META.yml Module meta-data (added by MakeMaker)
+META.json Module JSON meta-data (added by MakeMaker)
@@ -0,0 +1,43 @@
+{
+ "abstract" : "Small stats stuff.",
+ "author" : [
+ "Brian Lalonde (brian@webcoder.info)"
+ ],
+ "dynamic_config" : 1,
+ "generated_by" : "ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.120921",
+ "license" : [
+ "perl_5"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : "2"
+ },
+ "name" : "Statistics-Lite",
+ "no_index" : {
+ "directory" : [
+ "t",
+ "inc"
+ ]
+ },
+ "prereqs" : {
+ "build" : {
+ "requires" : {
+ "ExtUtils::MakeMaker" : "0"
+ }
+ },
+ "configure" : {
+ "requires" : {
+ "ExtUtils::MakeMaker" : "0"
+ }
+ }
+ },
+ "release_status" : "stable",
+ "resources" : {
+ "repository" : {
+ "type" : "git",
+ "url" : "https://github.com/brianary/Statistics-Lite.git",
+ "web" : "https://github.com/brianary/Statistics-Lite"
+ }
+ },
+ "version" : "3.61"
+}
@@ -1,10 +1,22 @@
-# http://module-build.sourceforge.net/META-spec.html
-#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
-name: Statistics-Lite
-version: 3.2
-version_from: Lite.pm
-installdirs: site
-requires:
-
-distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.30
+---
+abstract: 'Small stats stuff.'
+author:
+ - 'Brian Lalonde (brian@webcoder.info)'
+build_requires:
+ ExtUtils::MakeMaker: 0
+configure_requires:
+ ExtUtils::MakeMaker: 0
+dynamic_config: 1
+generated_by: 'ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.120921'
+license: perl
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
+name: Statistics-Lite
+no_index:
+ directory:
+ - t
+ - inc
+resources:
+ repository: https://github.com/brianary/Statistics-Lite.git
+version: 3.61
@@ -1,27 +1,30 @@
use ExtUtils::MakeMaker;
-sub MY::postamble { <<'.'; }
-
-dist_both : dist ppmdist
-
-ppmdist : ppd pure_all
- $(TAR) $(TARFLAGS) $(DISTNAME).ppm.tar blib
- $(RM_RF) blib
- $(RM_RF) pm_to_blib
- $(COMPRESS) $(DISTNAME).ppm.tar
-.
+my $mm_ver = $ExtUtils::MakeMaker::VERSION;
+if ($mm_ver =~ /_/) { # developer release
+ $mm_ver = eval $mm_ver;
+ die $@ if $@;
+}
WriteMakefile(
NAME => 'Statistics::Lite',
AUTHOR => 'Brian Lalonde (brian@webcoder.info)',
+ LICENSE => 'perl',
VERSION_FROM => 'Lite.pm',
ABSTRACT_FROM => 'Lite.pm',
- BINARY_LOCATION => 'Statistics-Lite.ppm.tar.gz',
- dist =>
- {
- TAR => 'C:\\Tools\\cygwin\\bin\\tar.exe',
- TARFLAGS => 'cvf',
- COMPRESS => 'C:\\Tools\\cygwin\\bin\\gzip.exe --best',
- SUFFIX => '.gz',
- },
+
+ ($mm_ver <= 6.45
+ ? ()
+ : (META_MERGE => {
+ 'meta-spec' => { version => 2 },
+ resources => {
+ repository => {
+ type => 'git',
+ web => 'https://github.com/brianary/Statistics-Lite',
+ url => 'https://github.com/brianary/Statistics-Lite.git',
+ },
+ },
+ })
+ ),
+
);
@@ -1,7 +1,7 @@
-#!perl
+#!/usr/bin/perl
use strict;
use warnings;
-use Test::More tests => 28;
+use Test::More tests => 60;
BEGIN { use_ok( 'Statistics::Lite', ':all' ); }
@@ -16,13 +16,46 @@ is(max(1,2,3), 3, "call max - functional interface");
is(range(1,2,3), 2, "call range - functional interface");
is(sum(1,2,3), 6, "call sum - functional interface");
is(count(1,2,3), 3, "call count - functional interface");
+is(count(undef,1,2,3), 3, "call count with undef - functional interface");
is(mean(1,2,3), 2, "call mean - functional interface");
is(median(1,2,3), 2, "call median - functional interface");
+is(median(2,4,6,8), 5, "call median with even number of values - functional interface");
is(mode(1,2,3), 2, "call mode - functional interface");
-is(variance(1,2,3), 1, "call variance - functional interface");
-is(stddev(1,2,3), 1, "call stddev - functional interface");
+is(min(1,-5,8), -5, "call min with negative numbers" );
+is(range(-6,-9), 3, "call range with negative values" );
+is(range(6,-9), 15, "call range with data crossing 0" );
+# undef checking
+is(min(undef), undef, "call min with only single undefined value" );
+is(max(undef), undef, "call max with only single undefined value" );
+is(min(), undef, "call min without values" );
+is(max(), undef, "call max without values" );
+is(min(6,undef,10), 6, "call min with undefined value" );
+is(max(-6,-10,undef), -6, "call max with undefined value" );
+is(min(undef, 7, -5), -5, "call min with initial undefined value" );
+is(max(undef, 7, -5), 7, "call max with initial undefined value" );
+is(min(undef,undef,undef), undef, "call min with only undefined values" );
+is(max(undef,undef,undef), undef, "call max with only undefined values" );
+is(count(undef, 7, -5), 2, "call count with undefined value" );
+is(sum(undef, 7, -5), 2, "call sum with undefined value" );
+is(mean(undef, 7, -5), 1, "call mean with undefined value" );
+is(count(undef,undef,undef), 0, "call count with only undefined values" );
+is(mean(undef,undef,undef), undef, "call mean with only undefined values" );
+is(range(6,9,undef), 3, "call range with undefined value" );
+is(range(undef,6,9), 3, "call range with leading undefined value" );
+is(range(undef,undef,undef,7), 0, "call range with single defined value" );
+is(range(undef,undef,undef), undef, "call range with only undefined values" );
+
+# unbiased sample test
+my @values = (3, -10, 8, undef, 7, undef, 8, 3, 6, 3);
+is(mean(@values), 3.5, "call unbiased sample set mean" );
+is(median(@values), 4.5, "call unbiased sample set median" );
+is(mode(@values), 3, "call unbiased sample set mode" );
+is(variance(1,2,3), 1, "call unbiased sample set variance");
+is(stddev(1,2,3), 1, "call unbiased sample set standard deviation");
+
+# population sample test
is(variancep(2,4,2,4), 1, "call variancep - functional interface");
is(stddevp(2,4,2,4), 1, "call stddevp - functional interface");
@@ -41,12 +74,21 @@ is($stats{mode}, 2, "call mode - hash-based interface");
is($stats{variance}, 1, "call variance - hash-based interface");
is($stats{stddev}, 1, "call stddev - hash-based interface");
+# a tiny bit more substantial data set
+%stats = statshash(0..10,1);
+is($stats{sum},56,"call sum - hash-based");
+is($stats{mean},4+2/3,"call mean - hash-based");
+is($stats{variance},11+1/3,"call variance - hash-based");
+is($stats{variancep},10.3+8/90,"call variancep - hash-based");
+
%stats= statshash(2,4,2,4);
ok($stats{variancep}, "call variancep - hash-based interface");
ok($stats{stddevp}, "call stddevp - hash-based interface");
%stats= frequencies(1,2,3,3);
-is($stats{1},1, "frequencies matched correctly");
-is($stats{2},1, "frequencies matched correctly");
-is($stats{3},2, "frequencies matched correctly");
+is($stats{1}, 1, "frequencies matched correctly for 1");
+is($stats{2}, 1, "frequencies matched correctly for 2");
+is($stats{3}, 2, "frequencies matched correctly for 3");
+is($stats{4}, undef, "frequencies matched correctly for 4");
+