The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
Changes 1828
Lite.pm 77115
MANIFEST 67
META.json 031
META.yml 1019
Makefile.PL 1821
test.pl 749
7 files changed (This is a version diff) 136270
@@ -1,23 +1,33 @@
 Revision history for Perl extension Statistics::Lite.
 
-0.01  Thu Sep 21 09:56:43 2000
-	- original version; created by h2xs 1.19
-0.05  Mon Jan 22 08:13:00 2001
-    - various optimizations
-1.00  Thu Jan 10 14:25:38 2002
-    - fixed median algorithm
-1.01  Thu Jan 10 14:59:52 2002
-    - bugfix version
-1.02  Fri Nov  4 12:25:00 2005
-    - expanded explanation of limitations
-
- ... time passed, people laughed and loved and sat, and we all learned something about the true meaning of New Year's Day ...
-
-3.00  Sat Mar  3 16:48:00 2007
-    - after much delay, stddev has been restored to the sample or biased version of the formula; if you need the unbiased version, use stddev(0,@data)
-    - added frequencies() function from Nathan Haigh
+3.6   Sun 22 Feb 2015 09:59:02 PM PST 
+    - Added github repo to dist metadata and pod
+3.5   Sat 21 Feb 2015 11:38:52 PM PST
+    - Exclude undefined values, per CPAN bug # 50448
+3.4   Mon Feb 16 10:04:50 2015
+    - documentation fixes and additional tests
+    - now version controlled! https://github.com/brianary/Statistics-Lite
+3.3   Fri Feb 13 10:03:45 2015
+    - added license info
+3.2   Sun Jun 24 11:13:00 2007
+    - updated tests
 3.1   
     - fixed and renamed biased (population) versions
     - added unbiased versions
-3.2   Sun Jun 24 11:13:00 2007
-    - updated tests
+3.00  Sat Mar  3 16:48:00 2007
+    - after much delay, stddev has been restored to the sample or biased version of the formula; if you need the unbiased version, use stddev(0,@data)
+    - added frequencies() function from Nathan Haigh
+
+ ... time passed, people laughed and loved and sat, and we all learned something about the true meaning of New Year's Day ...
+
+1.02  Fri Nov  4 12:25:00 2005
+    - expanded explanation of limitations
+1.01  Thu Jan 10 14:59:52 2002
+    - bugfix version
+1.00  Thu Jan 10 14:25:38 2002
+    - fixed median algorithm
+0.05  Mon Jan 22 08:13:00 2001
+    - various optimizations
+0.01  Thu Sep 21 09:56:43 2000
+	- original version; created by h2xs 1.19
+
@@ -3,7 +3,7 @@ use strict;
 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
 require Exporter;
 
-$VERSION = '3.2';
+$VERSION = '3.6';
 @ISA = qw(Exporter);
 @EXPORT = ();
 @EXPORT_OK = qw(min max range sum count mean median mode variance stddev variancep stddevp statshash statsinfo frequencies);
@@ -14,69 +14,83 @@ $VERSION = '3.2';
 	stats => [qw<statshash statsinfo>],
 );
 
+sub definedvals
+{
+	return grep{defined}@_;
+}
+
 sub count
-{ return scalar @_; }
+{
+	return scalar definedvals @_;
+}
 
 sub min 
-{ 
-	return unless @_;
-	return $_[0] unless @_ > 1;
-	my $min= shift;
-	foreach(@_) { $min= $_ if $_ < $min; }
+{
+	my @data = definedvals @_;
+	return unless @data;
+	return $data[0] unless @data > 1;
+	my $min= shift @data;
+	foreach(@data) { $min= $_ if $_ < $min; }
 	return $min;
 }
 
 sub max 
 { 
-	return unless @_;
-	return $_[0] unless @_ > 1;
-	my $max= shift;
-	foreach(@_) { $max= $_ if $_ > $max; }
+	my @data = definedvals @_;
+	return unless @data;
+	return $data[0] unless @data > 1;
+	my $max= shift @data;
+	foreach(@data) { $max= $_ if $_ > $max; }
 	return $max;
 }
 
 sub range
 {
-	return unless @_;
-	return 0 unless @_ > 1;
-	return abs($_[1]-$_[0]) unless @_ > 2;
-	my $min= shift; my $max= $min;
-	foreach(@_) { $min= $_ if $_ < $min; $max= $_ if $_ > $max; }
+	my @data = definedvals @_;
+	return unless @data;
+	return 0 unless @data > 1;
+	return abs($data[1]-$data[0]) unless @data > 2;
+	my $min= shift @data; my $max= $min;
+	foreach(@data) { $min= $_ if $_ < $min; $max= $_ if $_ > $max; }
 	return $max - $min;
 }
 
 sub sum
 {
-	return unless @_;
-	return $_[0] unless @_ > 1;
+	my @data = definedvals @_;
+	return unless @data;
+	return $data[0] unless @data > 1;
 	my $sum;
-	foreach(@_) { $sum+= $_; }
+	foreach(@data) { $sum+= $_; }
 	return $sum;
 }
 
 sub mean
 {
-	return unless @_;
-	return $_[0] unless @_ > 1;
-	return sum(@_)/scalar(@_);
+	my @data = definedvals @_;
+	return unless @data;
+	return $data[0] unless @data > 1;
+	return sum(@data)/scalar(@data);
 }
 
 sub median
 {
-	return unless @_;
-	return $_[0] unless @_ > 1;
-	@_= sort{$a<=>$b}@_;
-	return $_[$#_/2] if @_&1;
-	my $mid= @_/2;
-	return ($_[$mid-1]+$_[$mid])/2;
+	my @data = definedvals @_;
+	return unless @data;
+	return $data[0] unless @data > 1;
+	@data= sort{$a<=>$b}@data;
+	return $data[$#data/2] if @data&1;
+	my $mid= @data/2;
+	return ($data[$mid-1]+$data[$mid])/2;
 }
 
 sub mode
 {
-	return unless @_;
-	return $_[0] unless @_ > 1;
+	my @data = definedvals @_;
+	return unless @data;
+	return $data[0] unless @data > 1;
 	my %count;
-	foreach(@_) { $count{$_}++; }
+	foreach(@data) { $count{$_}++; }
 	my $maxhits= max(values %count);
 	foreach(keys %count) { delete $count{$_} unless $count{$_} == $maxhits; }
 	return mean(keys %count);
@@ -84,60 +98,65 @@ sub mode
 
 sub variance
 {
-	return unless @_;
-	return 0 unless @_ > 1;
-	my $mean= mean @_;
-	return (sum map { ($_ - $mean)**2 } @_) / $#_;
+	my @data = definedvals @_;
+	return unless @data;
+	return 0 unless @data > 1;
+	my $mean= mean @data;
+	return (sum map { ($_ - $mean)**2 } @data) / $#data;
 }
 
 sub variancep
 {
-	return unless @_;
-	return 0 unless @_ > 1;
-	my $mean= mean @_;
-	return (sum map { ($_ - $mean)**2 } @_) / ( $#_ +1 );
+	my @data = definedvals @_;
+	return unless @data;
+	return 0 unless @data > 1;
+	my $mean= mean @data;
+	return (sum map { ($_ - $mean)**2 } @data) / ( $#data +1 );
 }
 
 sub stddev
 {
-	return unless @_;
-	return 0 unless @_ > 1;
-	return sqrt variance @_;
+	my @data = definedvals @_;
+	return unless @data;
+	return 0 unless @data > 1;
+	return sqrt variance @data;
 }
 
 sub stddevp
 {
-	return unless @_;
-	return 0 unless @_ > 1;
-	return sqrt variancep @_;
+	my @data = definedvals @_;
+	return unless @data;
+	return 0 unless @data > 1;
+	return sqrt variancep @data;
 }
 
 sub statshash
 {
-	return unless @_;
+	my @data = definedvals @_;
+	return unless @data;
 	return
 	(
 		count     => 1,
-		min       => $_[0],
-		max       => $_[0],
+		min       => $data[0],
+		max       => $data[0],
 		range     => 0,
-		sum       => $_[0],
-		mean      => $_[0],
-		median    => $_[0],
-		mode      => $_[0],
+		sum       => $data[0],
+		mean      => $data[0],
+		median    => $data[0],
+		mode      => $data[0],
 		variance  => 0,
 		stddev    => 0,
 		variancep => 0,
 		stddevp   => 0
-	) unless @_ > 1;
-	my $count= scalar(@_);
-	@_= sort{$a<=>$b}@_;
+	) unless @data > 1;
+	my $count= scalar(@data);
+	@data= sort{$a<=>$b}@data;
 	my $median;
-	if(@_&1) { $median= $_[$#_/2]; }
-	else { my $mid= @_/2; $median= ($_[$mid-1]+$_[$mid])/2; }
+	if(@data&1) { $median= $data[$#data/2]; }
+	else { my $mid= @data/2; $median= ($data[$mid-1]+$data[$mid])/2; }
 	my $sum= 0;
 	my %count;
-	foreach(@_) { $sum+= $_; $count{$_}++; }
+	foreach(@data) { $sum+= $_; $count{$_}++; }
 	my $mean= $sum/$count;
 	my $maxhits= max(values %count);
 	foreach(keys %count) 
@@ -145,17 +164,17 @@ sub statshash
 	return
 	(
 		count     => $count,
-		min       => $_[0],
-		max       => $_[-1],
-		range     => ($_[-1] - $_[0]),
+		min       => $data[0],
+		max       => $data[-1],
+		range     => ($data[-1] - $data[0]),
 		sum       => $sum,
 		mean      => $mean,
 		median    => $median,
 		mode      => mean(keys %count),
-		variance  => variance(@_),
-		stddev    => stddev(@_),
-		variancep => variancep(@_),
-		stddevp   => stddevp(@_)
+		variance  => variance(@data),
+		stddev    => stddev(@data),
+		variancep => variancep(@data),
+		stddevp   => stddevp(@data)
 	);
 }
 
@@ -180,10 +199,11 @@ stddevp   = $stats{stddevp}
 
 sub frequencies
 {
-	return unless @_;
-	return ( $_[0], 1 ) unless @_ > 1;
+	my @data = definedvals @_;
+	return unless @data;
+	return ( $data[0], 1 ) unless @data > 1;
 	my %count;
-	foreach(@_) { $count{$_}++; }
+	foreach(@data) { $count{$_}++; }
 	return %count;
 }
 
@@ -217,13 +237,9 @@ This is also a module for dilettantes.
 When you just want something to give some very basic, high-school-level statistical values, 
 without having to set up and populate an object first, this module may be useful.
 
-=over 6
-
 =head2 NOTE
 
-This version now implements standard deviation and variance calculated by both the unbiased and biased estimators.
-
-=back
+This module implements standard deviation and variance calculated by both the unbiased and biased estimators.
 
 =head1 FUNCTIONS
 
@@ -232,21 +248,27 @@ This version now implements standard deviation and variance calculated by both t
 =item C<min(@data)>, C<max(@data)>, C<range(@data)>, C<sum(@data)>, C<count(@data)>
 
 Return the minimum value, maximum value, range (max - min),
-sum, or count of values in C<@data>.
-(Count simply returns C<scalar(@data)>.)
+sum, or count of values in C<@data>. Undefined values are ignored.
+(Count simply returns C<scalar(@data)>. B<Please note> that this module does not ignore undefined values in your
+data; instead those are treated as zero.)
 
 =item C<mean(@data)>, C<median(@data)>, C<mode(@data)>
 
-Calculates the mean, median, or mode average of the values in C<@data>.
+Calculates the mean, median, or mode average of the values in C<@data>. Undefined values are ignored.
 (In the event of ties in the mode average, their mean is returned.)
 
 =item C<variance(@data)>, C<stddev(@data)>
 
 Return the standard deviation or variance of C<@data> for a sample (same as Excel's STDEV).
+This is also called the Unbiased Sample Variance and involves dividing the 
+sample's squared deviations by N-1 (the sample count minus 1).
+The standard deviation is just the square root of the variance.
 
 =item C<variancep(@data)>, C<stddevp(@data)>
 
 Return the standard deviation or variance of C<@data> for the population (same as Excel's STDEVP).
+This involves dividing the squared deviations of the population by N (the population size).
+The standard deviation is just the square root of the variance.
 
 =item C<statshash(@data)>
 
@@ -271,11 +293,27 @@ current namespace (use with caution).
 To import the individual statistical funcitons, use the import tag C<:funcs>;
 use C<:stats> to import C<statshash(@data)> and C<statsinfo(@data)>.
 
+=head1 REPOSITORY
+
+L<https://github.com/brianary/Statistics-Lite>
+
 =head1 AUTHOR
 
 Brian Lalonde E<lt>brian@webcoder.infoE<gt>, 
 C<stddev(@data)>, C<stddevp(@data)>, C<variance(@data)>, C<variancep(@data)>, 
-and additional motivation by Nathan Haigh.
+additional motivation by Nathan Haigh, with kind support from Alexander Zangerl.
+
+The project lives at https://github.com/brianary/Statistics-Lite
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2000 Brian Lalonde E<lt>brian@webcoder.infoE<gt>, Nathan Haigh,
+Alexander Zangerl, and Ton Voon.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
 
 =head1 SEE ALSO
 
@@ -1,6 +1,7 @@
-Changes
-Lite.pm
-Makefile.PL
-MANIFEST
-test.pl
-META.yml                                 Module meta-data (added by MakeMaker)
+Changes
+Lite.pm
+Makefile.PL
+MANIFEST
+test.pl
+META.yml                                 Module meta-data (added by MakeMaker)
+META.json                                Module JSON meta-data (added by MakeMaker)
@@ -0,0 +1,31 @@
+{
+   "abstract" : "Small stats stuff.",
+   "author" : [
+      "Brian Lalonde (brian@webcoder.info)"
+   ],
+   "dynamic_config" : 1,
+   "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.120921",
+   "license" : [
+      "perl_5"
+   ],
+   "meta-spec" : {
+      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+      "version" : "2"
+   },
+   "name" : "Statistics-Lite",
+   "no_index" : {
+      "directory" : [
+         "t",
+         "inc"
+      ]
+   },
+   "release_status" : "stable",
+   "resources" : {
+      "repository" : {
+         "type" : "git",
+         "url" : "https://github.com/brianary/Statistics-Lite.git",
+         "web" : "https://github.com/brianary/Statistics-Lite"
+      }
+   },
+   "version" : "3.6"
+}
@@ -1,10 +1,19 @@
-# http://module-build.sourceforge.net/META-spec.html
-#XXXXXXX This is a prototype!!!  It will change in the future!!! XXXXX#
-name:         Statistics-Lite
-version:      3.2
-version_from: Lite.pm
-installdirs:  site
-requires:
-
-distribution_type: module
-generated_by: ExtUtils::MakeMaker version 6.30
+---
+abstract: 'Small stats stuff.'
+author:
+  - 'Brian Lalonde (brian@webcoder.info)'
+build_requires: {}
+dynamic_config: 1
+generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.120921'
+license: perl
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: 1.4
+name: Statistics-Lite
+no_index:
+  directory:
+    - t
+    - inc
+resources:
+  repository: https://github.com/brianary/Statistics-Lite.git
+version: 3.6
@@ -1,27 +1,30 @@
 use ExtUtils::MakeMaker;
 
-sub MY::postamble { <<'.'; }
-
-dist_both : dist ppmdist 
-
-ppmdist : ppd pure_all
-	$(TAR) $(TARFLAGS) $(DISTNAME).ppm.tar blib
-	$(RM_RF) blib
-	$(RM_RF) pm_to_blib
-	$(COMPRESS) $(DISTNAME).ppm.tar
-.
+my $mm_ver = $ExtUtils::MakeMaker::VERSION;
+if ($mm_ver =~ /_/) { # developer release
+    $mm_ver = eval $mm_ver;
+    die $@ if $@;
+}
 
 WriteMakefile(
   NAME	          => 'Statistics::Lite',
   AUTHOR          => 'Brian Lalonde (brian@webcoder.info)',
+  LICENSE         => 'perl',
   VERSION_FROM    => 'Lite.pm', 
   ABSTRACT_FROM   => 'Lite.pm',
-  BINARY_LOCATION => 'Statistics-Lite.ppm.tar.gz',
-  dist            => 
-  {
-    TAR      => 'C:\\Tools\\cygwin\\bin\\tar.exe',
-    TARFLAGS => 'cvf',
-    COMPRESS => 'C:\\Tools\\cygwin\\bin\\gzip.exe --best',
-    SUFFIX   => '.gz',
-  },
+
+  ($mm_ver <= 6.45
+    ? ()
+    : (META_MERGE => {
+      'meta-spec' => { version => 2 },
+        resources => {
+          repository  => {
+            type => 'git',
+            web  => 'https://github.com/brianary/Statistics-Lite',
+            url  => 'https://github.com/brianary/Statistics-Lite.git',
+          },
+        },
+      })
+  ),
+
 );
@@ -1,7 +1,7 @@
-#!perl
+#!/usr/bin/perl
 use strict;
 use warnings;
-use Test::More tests => 28;
+use Test::More tests => 60;
 
 BEGIN { use_ok( 'Statistics::Lite', ':all' ); }
 
@@ -16,13 +16,46 @@ is(max(1,2,3),    3, "call max - functional interface");
 is(range(1,2,3),  2, "call range - functional interface");
 is(sum(1,2,3),    6, "call sum - functional interface");
 is(count(1,2,3),  3, "call count - functional interface");
+is(count(undef,1,2,3), 3, "call count with undef - functional interface");
 is(mean(1,2,3),   2, "call mean - functional interface");
 is(median(1,2,3), 2, "call median - functional interface");
+is(median(2,4,6,8), 5, "call median with even number of values - functional interface");
 is(mode(1,2,3),   2, "call mode - functional interface");
 
-is(variance(1,2,3), 1, "call variance - functional interface");
-is(stddev(1,2,3),   1, "call stddev - functional interface");
+is(min(1,-5,8), -5, "call min with negative numbers" );
+is(range(-6,-9), 3, "call range with negative values" );
+is(range(6,-9), 15, "call range with data crossing 0" );
 
+# undef checking
+is(min(undef), undef, "call min with only single undefined value" );
+is(max(undef), undef, "call max with only single undefined value" );
+is(min(), undef, "call min without values" );
+is(max(), undef, "call max without values" );
+is(min(6,undef,10), 6, "call min with undefined value" );
+is(max(-6,-10,undef), -6, "call max with undefined value" );
+is(min(undef, 7, -5), -5, "call min with initial undefined value" );
+is(max(undef, 7, -5), 7, "call max with initial undefined value" );
+is(min(undef,undef,undef), undef, "call min with only undefined values" );
+is(max(undef,undef,undef), undef, "call max with only undefined values" );
+is(count(undef, 7, -5), 2, "call count with undefined value" );
+is(sum(undef, 7, -5), 2, "call sum with undefined value" );
+is(mean(undef, 7, -5), 1, "call mean with undefined value" );
+is(count(undef,undef,undef), 0, "call count with only undefined values" );
+is(mean(undef,undef,undef), undef, "call mean with only undefined values" );
+is(range(6,9,undef), 3, "call range with undefined value" );
+is(range(undef,6,9), 3, "call range with leading undefined value" );
+is(range(undef,undef,undef,7), 0, "call range with single defined value" );
+is(range(undef,undef,undef), undef, "call range with only undefined values" );
+
+# unbiased sample test
+my @values = (3, -10, 8, undef, 7, undef, 8, 3, 6, 3);
+is(mean(@values), 3.5, "call unbiased sample set mean" );
+is(median(@values), 4.5, "call unbiased sample set median" );
+is(mode(@values), 3, "call unbiased sample set mode" );
+is(variance(1,2,3), 1, "call unbiased sample set variance");
+is(stddev(1,2,3),   1, "call unbiased sample set standard deviation");
+
+# population sample test
 is(variancep(2,4,2,4), 1, "call variancep - functional interface");
 is(stddevp(2,4,2,4),   1, "call stddevp - functional interface");
 
@@ -41,12 +74,21 @@ is($stats{mode},   2, "call mode - hash-based interface");
 is($stats{variance}, 1, "call variance - hash-based interface");
 is($stats{stddev},   1, "call stddev - hash-based interface");
 
+# a tiny bit more substantial data set
+%stats = statshash(0..10,1);
+is($stats{sum},56,"call sum - hash-based");
+is($stats{mean},4+2/3,"call mean - hash-based");
+is($stats{variance},11+1/3,"call variance - hash-based");
+is($stats{variancep},10.3+8/90,"call variancep - hash-based");
+
 
 %stats= statshash(2,4,2,4);
 ok($stats{variancep}, "call variancep - hash-based interface");
 ok($stats{stddevp},   "call stddevp - hash-based interface");
 
 %stats= frequencies(1,2,3,3);
-is($stats{1},1, "frequencies matched correctly");
-is($stats{2},1, "frequencies matched correctly");
-is($stats{3},2, "frequencies matched correctly");
+is($stats{1}, 1, "frequencies matched correctly for 1");
+is($stats{2}, 1, "frequencies matched correctly for 2");
+is($stats{3}, 2, "frequencies matched correctly for 3");
+is($stats{4}, undef, "frequencies matched correctly for 4");
+