The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Statistics::Robust::Density;

use strict;
use warnings;
use Carp;

use Math::CDF qw(qnorm);
use Statistics::Robust::Location qw(mean);
use Statistics::Robust::Scale qw(variance);

use base 'Statistics::Robust';

our @EXPORT = ();
our @EXPORT_OK = (qw(
rdplot
akerd
));
our %EXPORT_TAGS = ( all => \@EXPORT_OK );

# Expected Frequency Curve
sub
rdplot
{
 my($x,$fr) = @_;

 if ( not defined $fr )
 {
  $fr = 0.8;
 }

 my $n = scalar @$x;
 my @y = sort {$a <=> $b} @$x;
 my $mad = Statistics::Robust::Scale::MADN(\@y);
 my @rmd = ();

 for(my $i=0;$i<$n;$i++)
 {
  my $near = _near(\@y,$y[$i],$mad,$fr);
  $rmd[$i] = Statistics::Robust::_sum($near);
 } 
 if ( $mad != 0 )
 {
  for(my $i=0;$i<$n;$i++)
  {
   $rmd[$i] = $rmd[$i]/(2*$fr*$mad);
  }
 }
 for(my $i=0;$i<$n;$i++)
 {
  $rmd[$i] = $rmd[$i]/$n;
 }

 return \@y,\@rmd;
}

# Adaptive kernel density estimate
sub
akerd
{
 my($xx,$fr,$aval) = @_;

 # Set the variance calculation to use the un-biased estimate
 my $prior_value = 0;
 if ( defined $ENV{UNBIAS} )
 {
  $prior_value = $ENV{UNBIAS};
 }
 $ENV{UNBIAS} = 1; 

 if ( not defined $fr )
 {
  $fr = 0.8;
 }
 if ( not defined $aval )
 {
  $aval = 0.5;
 }

 my @x = sort {$a <=> $b} @$xx;

 # First, a measure of dispersion
 my $m = 0;
 $m = Statistics::Robust::Scale::MADN(\@x);
 if ($m == 0)
 {
  my($ql,$qu) = Statistics::Robust::Scale::idealf(\@x);
  $m = ($qu - $ql)/(qnorm(0.75) - qnorm(0.25));
 }
 if ( $m == 0 )
 {
  $m = sqrt(Statistics::Robust::Scale::winvar(\@x)/0.4129);
 }
 if ( $m == 0 )
 {
  carp "All measures of dispersion are equal to 0\n";
  return undef;
 }

 # Estimate the density using the Expected Frequency Curve 
 my(undef,$fhat) = rdplot(\@x);
 if ( $m > 0 )
 {
  for(my $i=0;$i<@$fhat;$i++)
  { 
   $fhat->[$i] = $fhat->[$i]/(2*$fr*$m);
  }
 }

 # Calculate the span
 my $n = scalar @x;
 my $sig = sqrt(variance(\@x));
 my($ql,$qu) = Statistics::Robust::Scale::idealf(\@x);
 my $iqr = ($qu-$ql)/1.34;
 my $A = Statistics::Robust::_min($sig,$iqr);
 if( $A == 0 )
 {
  $A = sqrt(Statistics::Robust::Scale::winvar(\@x))/0.64;
 }
 my $hval = 1.06*$A/($n**0.2);
 # See Silverman, 1986, pp. 47-48

 my @log_fhat = ();
 my @alam = ();
 for(my $i=0;$i<scalar @$fhat;$i++)
 {
  if ( $fhat > 0 )
  {
   $log_fhat[$i] = log($fhat->[$i]);
  }
 }
 my $gm = exp(mean(\@log_fhat));
 for(my $i=0;$i<scalar @$fhat;$i++)
 {
  $alam[$i] = ($fhat->[$i]/$gm)**(0.0-$aval);
 }

 my @dhat = ();
 my @temp = ();
 for(my $j=0;$j<$n;$j++)
 { 
  # Calculate t
  for(my $i=0;$i<$n;$i++)
  {
   $temp[$i] = ($x[$j]-$x[$i])/($hval*$alam[$i]);
  }
 
  # Calculate the Epanechnikov kernel
  for(my $i=0;$i<$n;$i++)
  {
   my $epan = 0;
   if ( abs($temp[$i]) < sqrt(5) )
   {
    $epan = 0.75*(1-0.2*$temp[$i]**2)/sqrt(5);
   }
   else
   {
    $epan = 0.0;
   }
   $temp[$i] = ($epan/($alam[$i]*$hval));
  }
  $dhat[$j] = mean(\@temp) + 0.0;
 }

 # Principal of least surprise
 $ENV{UNBIAS} = $prior_value; 
 
 return \@x,\@dhat;
}

# Determine which values in $x are near $pt
sub
_near
{
 my($x,$pt,$m,$fr) = @_;

 if ( not defined $fr )
 {
  $fr = 1;
 }
 if ( $m == 0 )
 {
  my($ql,$qu) = idealf($x);
  $m = ($qu-$ql)/(qnorm(0.75) - qnorm(0.25));
 }
 if ( $m == 0 )
 {
  $m = $m = sqrt(winvar($x)/0.4129);
 }
 if ( $m == 0 )
 {
  carp "All measures of dispersion are equal to 0\n";
 }

 my @dflag = ();
 for(my $i=0;$i<scalar @$x;$i++)
 {
  my $dis = abs($x->[$i]-$pt);
  push(@dflag, ($dis <= $fr*$m));
 }

 return \@dflag;
}

1;

=head1 NAME

Statistics::Robust::Density - Robust Probability Density Estimators

=head1 SYNOPSIS

 my @x = (1,4,5,3,7,2,4);
 
 my($pts,$akerd) = akerd(\@x);

=head1 FUNCTIONS

=head2 rdplot

 my($pts,$rdplot) = rdplot(\@x);

 Return the expected frequency curve for the data in \@x.

=head2 akerd

 my($pts,$akerd) = akerd(\@x);

 Return the adaptive kernel density estimate for the data in \@x.

=head1 AUTHOR

Walter Szeliga C<< <walter@geology.cwu.edu> >>

=cut