The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Statistics::Robust::Scale;

use strict;
use warnings;
use Carp;

use POSIX qw(floor);
use Math::CDF qw(qnorm pbeta);
use Math::Round qw(round_even);
use Statistics::Robust::Location qw(median);

use base qw(Statistics::Robust);

our @EXPORT = ();
our @EXPORT_OK = (qw(
variance
MAD
MADN
idealf
winvar
trimvar
msmedse
pbvar
));
our %EXPORT_TAGS = ( all => \@EXPORT_OK );

# We need to implement variance since Statistics::Basic
# currently is messing up the unbiased variance implementation.
sub
variance
{
 my($x) = @_;

 my $length = scalar @$x;
 my $sum = Statistics::Robust::_sum($x);
 my $mean = $sum/$length;

 my $var = 0;
 for(my $i=0;$i<@$x;$i++)
 {
  $var += ($mean-$x->[$i])**2;
 }

 return $var/($length-1);
}

# The Median Absolute Deviation
sub
MAD
{
 my($x) = @_;

 my @ad = ();

 my($median) = Statistics::Robust::Location::median($x);

 foreach my $xi (@$x)
 {
  my($adi) = abs($xi - $median);
  push(@ad,$adi);
 }

 my($mad) = Statistics::Robust::Location::median(\@ad) + 0.0;


 return $mad;
}

# The rescaled Median Absolute Deviation
sub
MADN
{
 my($x) = @_;
 
 my $mad = MAD($x);
 $mad /= qnorm(0.75);

 return $mad;
}

sub
idealf
{
#
# Compute the ideal fourths for data in x
# and return the lower and upper quartiles
#
 my($x) = @_;

 my $n = scalar @$x;

 my $j = floor($n/4 + 5/12) - 1;
 my @y = sort {$a <=> $b} @$x;
 my $g = ($n/4) - $j + (5/12) - 1;
 my $ql = (1-$g)*$y[$j] + $g*$y[$j+1];
 my $k = $n - $j - 1;
 my $qu = (1-$g)*$y[$k] + $g*$y[$k-1];

 return ($ql,$qu); 
}

sub
winvar
{
 my($x,$tr) = @_;

 if ( not defined $tr )
 {
  $tr = 0.2;
 }
 my @y = sort {$a <=> $b} @$x;

 my $n = scalar @$x;
 my $ibot = floor($tr*$n);# + 1;
 my $itop = $n - $ibot;# + 1;
 my $xbot = $y[$ibot]; 
 my $xtop = $y[$itop]; 
 for(my $i=0;$i<$n;$i++)
 {
  if ( $y[$i] <= $xbot )
  {
   $y[$i] = $xbot;
  }
  if ( $y[$i] >= $xtop )
  {
   $y[$i] = $xtop;
  }
 }
 my $winvar = variance(\@y);

 return $winvar;
}

# The variance of the trimmed mean
sub
trimvar
{
 my($x,$tr) = @_;

 if ( not defined $tr )
 {
  $tr = 0.2;
 }

 my $n = scalar @$x;
 my $winvar = winvar($x,$tr);
 my $trimvar = $winvar/((1-2*$tr)**2*$n);

 return $trimvar;
}

# Given an array, estimate the standard error of the sample median from pg. 65
# of Wilcox, "Introduction to Robust Estimation and Hypothesis Testing", 2005

sub
msmedse
{
 my($x) = @_;
 
 my @sorted = sort {$a <=> $b} @$x;
 unshift @sorted, 0.0;
 my $n = @sorted -1;
 my $av = round_even(($n+1)/2.0 - 2.575829*sqrt($n/4.0));
 if ( $av == 0 )
 {
  $av = 1.0; 
 }
 my $top = ($n - $av + 1);
 my $sqse = (($sorted[$top] - $sorted[$av])/(2*2.575829))**2;
 $sqse = sqrt($sqse);

 return $sqse;
}

# The percentage bend midvariance
sub
pbvar
{
 my($x,$beta) = @_;

 if ( not defined $beta )
 {
  $beta = 0.2;
 }
 my $pbvar = 0;
 my $n = scalar @$x;
 my $median = Statistics::Robust::Location::median($x) + 0.0;

 my @w = ();
 for(my $i=0;$i<@$x;$i++)
 {
  $w[$i] = ($x->[$i] - $median);
 }

 my @sorted = sort {abs($a) <=> abs($b)} @w;
 my $m = floor((1-$beta)*$n + 0.5); 
 my $omega = $sorted[$m];

 if ( $omega > 0 )
 {
  my @z=0;
  my $np = 0;

  for(my $i=0;$i<@w;$i++)
  {
   my $y = $w[$i]/$omega; 
   if ( $y >= 1.0 )
   {
    $y = 1.0;
   }
   elsif ( $y <= -1 )
   {
    $y = -1.0;
   }
   else
   {
    $np++;
   }
   $z[$i] = $y**2;
  }
  
  $pbvar = $n*$omega**2*Statistics::Robust::_sum(\@z)/($np**2);
 }

 return $pbvar;
}

1;

=head1 NAME

Statistics::Robust::Scale - Robust Measures of Scale

=head1 SYNOPSIS

 my @x = (1,4,5,3,7,2,4);

 my($mad) = MAD(\@x);
 my($madn) = MADN(\@x);
 my($ql,qu) = idealf(\@x);
 my($winvar) = winvar(\@x);

=head1 FUNCTIONS

=head2 MAD

 my($mad) = MAD(\@x);

 Return the non-normalized Median Absolute Deviation.

=head2 MADN

 my($madn) = MADN(\@x);

 Return the Median Absolute Deviation normalized by the 0.75 quartile of the normal distribution.

=head2 idealf

 my($ql,$qu) = idealf(\@x); 

 Return the Ideal Fourths estimate of the lower and upper quartiles (in that order).

=head2 winvar

 my($winvar) = winvar(\@x,$tr);

 Return the Winsorized variance.  If the amount of trimming ($tr) is not specified, it defaults to 0.2.

=head2 trimvar

 my($trimvar) = trimvar(\@x,$tr);

 Return the variance for the trimmed mean with $tr trimming.  If the amount of trimming is not specified,
 it defaults to 0.2.

=head2 variance

 my($var) = variance(\@x);

 The unbiased sample variance.

=head2 msmedse

 my($mse) = msmedse(\@x);

 An estimate of the standard error of the median.

=head2 pbvar

 my($pb) = pbvar(\@x, $beta);

 The percentage-bend midvariance

=head1 AUTHOR

Walter Szeliga C<< <walter@geology.cwu.edu> >>

=cut