#!/usr/bin/perl

pp_add_exported( );

pp_addpm({At=>'Top'}, <<'EOD');

use strict;
use warnings;

use Carp;
use PDL::LiteF;

$PDL::onlinedoc->scan(__FILE__) if $PDL::onlinedoc;

eval {
  require PDL::Graphics::PGPLOT::Window;
  PDL::Graphics::PGPLOT::Window->import( 'pgwin' );
};
my $PGPLOT = 1 if !$@;

my $DEV = ($^O =~ /win/i)? '/png' : '/xs';

=head1 NAME

PDL::Stats::Distr -- parameter estimations and probability density functions for distributions.

=head1 DESCRIPTION

Parameter estimate is maximum likelihood estimate when there is closed form estimate, otherwise it is method of moments estimate.

=head1 SYNOPSIS

    use PDL::LiteF;
    use PDL::Stats::Distr;

      # do a frequency (probability) plot with fitted normal curve

    my ($xvals, $hist) = $data->hist;

      # turn frequency into probability
    $hist /= $data->nelem;

      # get maximum likelihood estimates of normal curve parameters
    my ($m, $v) = $data->mle_gaussian();

      # fitted normal curve probabilities
    my $p = $xvals->pdf_gaussian($m, $v);

    use PDL::Graphics::PGPLOT::Window;
    my $win = pgwin( Dev=>"/xs" );

    $win->bin( $hist );
    $win->hold;
    $win->line( $p, {COLOR=>2} );
    $win->close;

Or, play with different distributions with B<plot_distr> :)

    $data->plot_distr( 'gaussian', 'lognormal' );

=cut

EOD

pp_addhdr('
#include <math.h>
#include <gsl/gsl_sf_gamma.h>

');

pp_def('mme_beta',
  Pars      => 'a(n); float+ [o]alpha(); float+ [o]beta()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(alpha) sa, a2, m, v;
    sa = 0; a2 = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
      a2 += pow($a(), 2);
    %}
    m = sa / N;
    v = a2 / N - pow(m, 2);
    $alpha() = m * ( m * (1 - m) / v - 1 ); 
    $beta()  = (1 - m) * ( m * (1 - m) / v - 1 );
  ',
  BadCode   => '
    $GENERIC(alpha) sa, a2, m, v;
    sa = 0; a2 = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
	a2 += pow($a(), 2);
        N ++;
      }
    %}
    if (N) {
      m = sa / N;
      v = a2 / N - pow(m, 2);
      $alpha() = m * ( m * (1 - m) / v - 1 ); 
      $beta()  = (1 - m) * ( m * (1 - m) / v - 1 );
    }
    else {
      $SETBAD(alpha());
      $SETBAD(beta());
    }
  ',
  Doc      => '

=for usage

    my ($a, $b) = $data->mme_beta();

=for ref

beta distribution. pdf: f(x; a,b) = 1/B(a,b) x^(a-1) (1-x)^(b-1)

=cut

  ',

);

pp_def('pdf_beta',
  Pars      => 'x(); a(); b(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

  if ($x()>=0 && $x()<=1) {
    double B_1 = 1 / gsl_sf_beta( $a(), $b() );
    $p() = B_1 * pow($x(), $a()-1) * pow(1-$x(), $b()-1);
  }
  else {
    barf("x out of range [0,1]");
  }
  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($a()) || $ISBAD($b()) ) {
  $SETBAD( $p() );
}
else {
  if ($x()>=0 && $x()<=1) {
    double B_1 = 1 / gsl_sf_beta( $a(), $b() );
    $p() = B_1 * pow($x(), $a()-1) * pow(1-$x(), $b()-1);
  }
  else {
    barf("x out of range [0,1]");
  }
}

  ',
  Doc      => '

=for ref

probability density function for beta distribution. x defined on [0,1].

=cut

  ',

);

pp_def('mme_binomial',
  Pars      => 'a(n); int [o]n_(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(p) sa, a2, m, v;
    sa = 0; a2 = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
      a2 += pow($a(), 2);
    %}
    m = sa / N;
    v = a2 / N - pow(m, 2);
    $p()  = 1 - v/m;
    $n_() = m / $p() >= 0? (int) (m / $p() + .5) : (int) (m / $p() - .5);
    $p()  = m / $n_(); 
  ',
  BadCode   => '
    $GENERIC(p) sa, a2, m, v;
    sa = 0; a2 = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
	a2 += pow($a(), 2);
        N ++;
      }
    %}
    if (N) {
      m = sa / N;
      v = a2 / N - pow(m, 2);
      $p()  = 1 - v/m;
      $n_() = m / $p() >= 0? (int) (m / $p() + .5) : (int) (m / $p() - .5);
      $p()  = m / $n_();
    }
    else {
      $SETBAD(n_());
      $SETBAD(p());
    }
  ',
  Doc      => '

=for usage

    my ($n, $p) = $data->mme_binomial;

=for ref

binomial distribution. pmf: f(k; n,p) = (n k) p^k (1-p)^(n-k) for k = 0,1,2..n 

=cut

  ',

);

pp_def('pmf_binomial',
  Pars      => 'ushort x(); ushort n(); p(); float+ [o]out()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

    $GENERIC(out) bc = gsl_sf_choose($n(), $x());
    $out() = bc * pow($p(), $x()) * pow(1-$p(), $n() - $x());
  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($n()) || $ISBAD($p()) ) {
  $SETBAD( $out() );
}
else {
  $GENERIC(out) bc = gsl_sf_choose($n(), $x());
  $out() = bc * pow($p(), $x()) * pow(1-$p(), $n() - $x());
}

  ',
  Doc      => '

=for ref

probability mass function for binomial distribution.

=cut

  ',

);

pp_def('mle_exp',
  Pars      => 'a(n); float+ [o]l()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(l) sa = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
    %}
    $l() = N / sa;
  ',
  BadCode   => '
    $GENERIC(l) sa = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
        N ++;
      }
    %}
    if (sa > 0) {  $l() = N / sa;  }
    else        {  $SETBAD(l());   }
  ',
  Doc      => '

=for usage

    my $lamda = $data->mle_exp;

=for ref

exponential distribution. mle same as method of moments estimate.

=cut

  ',

);

pp_def('pdf_exp',
  Pars      => 'x(); l(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

  $p() = $l() * exp( -1 * $l() * $x() );

  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($l()) ) {
  $SETBAD( $p() );
}
else {
  $p() = $l() * exp( -1 * $l() * $x() );
}

  ',
  Doc      => '

=for ref

probability density function for exponential distribution.

=cut

  ',

);

pp_def('mme_gamma',
  Pars      => 'a(n); float+ [o]shape(); float+ [o]scale()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(shape) sa, a2, m, v;
    sa = 0; a2 = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
      a2 += pow($a(), 2);
    %}
    m = sa / N;
    v = a2 / N - pow(m, 2);
    $shape() = pow(m, 2) / v; 
    $scale() = v / m;
  ',
  BadCode   => '
    $GENERIC(shape) sa, a2, m, v;
    sa = 0; a2 = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
	a2 += pow($a(), 2);
        N ++;
      }
    %}
    if (N) {
      m = sa / N;
      v = a2 / N - pow(m, 2);
      $shape() = pow(m, 2) / v; 
      $scale() = v / m;
    }
    else {
      $SETBAD(shape());
      $SETBAD(scale());
    }
  ',
  Doc      => '

=for usage

    my ($shape, $scale) = $data->mme_gamma();

=for ref

two-parameter gamma distribution

=cut

  ',

);

pp_def('pdf_gamma',
  Pars      => 'x(); a(); t(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

  double g = gsl_sf_gamma( $a() );
  $p() = pow($x(), $a()-1) * exp(-1*$x() / $t()) / (pow($t(), $a()) * g);

  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($a()) || $ISBAD($t()) ) {
  $SETBAD( $p() );
}
else {
  double g = gsl_sf_gamma( $a() );
  $p() = pow($x(), $a()-1) * exp(-1*$x() / $t()) / (pow($t(), $a()) * g);
}

  ',
  Doc      => '

=for ref

probability density function for two-parameter gamma distribution.

=cut

  ',

);

pp_def('mle_gaussian',
  Pars      => 'a(n); float+ [o]m(); float+ [o]v()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(m) sa, a2;
    sa = 0; a2 = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
      a2 += pow($a(), 2);
    %}
    $m()  = sa / N;
    $v() = a2 / N - pow($m(),2);
  ',
  BadCode   => '
    $GENERIC(m) sa, a2;
    sa = 0; a2 = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
	a2 += pow($a(), 2);
        N ++;
      }
    %}
    if (N) {
      $m()  = sa / N;
      $v() = a2 / N - pow($m(),2);
    }
    else {
      $SETBAD(m());
      $SETBAD(v());
    }
  ',
  Doc      => '

=for usage

    my ($m, $v) = $data->mle_gaussian();

=for ref

gaussian aka normal distribution. same results as $data->average and $data->var. mle same as method of moments estimate.

=cut

  ',

);

pp_def('pdf_gaussian',
  Pars      => 'x(); m(); v(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

  $p() = 1 / sqrt($v() * 2 * M_PI)
       * exp( -1 * pow($x() - $m(), 2) / (2*$v()) );

  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($m()) || $ISBAD($v()) ) {
  $SETBAD( $p() );
}
else {
  $p() = 1 / sqrt($v() * 2 * M_PI)
       * exp( -1 * pow($x() - $m(), 2) / (2*$v()) );
}

  ',
  Doc      => '

=for ref

probability density function for gaussian distribution.

=cut

  ',

);

pp_def('mle_geo',
  Pars      => 'a(n); float+ [o]p();',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(p) sa = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
    %}
    $p()  = 1 / (1 + sa/N);
  ',
  BadCode   => '
    $GENERIC(p) sa = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
        N ++;
      }
    %}
    if (N) {  $p()  = 1 / (1 + sa/N);  }
    else   {  $SETBAD(p());  }
  ',
  Doc      => '

=for ref

geometric distribution. mle same as method of moments estimate.

=cut

  ',

);

pp_def('pmf_geo',
  Pars      => 'ushort x(); p(); float+ [o]out()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

  $out() = pow(1-$p(), $x()) * $p();

  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($p()) ) {
  $SETBAD( $out() );
}
else {
  $out() = pow(1-$p(), $x()) * $p();
}

  ',
  Doc      => '

=for ref

probability mass function for geometric distribution. x >= 0.

=cut

  ',

);

pp_def('mle_geosh',
  Pars      => 'a(n); float+ [o]p();',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(p) sa = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
    %}
    $p()  = N / sa;
  ',
  BadCode   => '
    $GENERIC(p) sa = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
        N ++;
      }
    %}
    if (sa > 0) { $p()  = N / sa; }
    else        { $SETBAD(p()); }
  ',
  Doc      => '

=for ref

shifted geometric distribution. mle same as method of moments estimate.

=cut

  ',

);

pp_def('pmf_geosh',
  Pars      => 'ushort x(); p(); float+ [o]out()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
  if ( $x() >= 1 ) {
    $out() = pow(1-$p(), $x()-1) * $p();
  }
  else {
    barf( "x >= 1 please" );
  }

  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($p()) ) {
  $SETBAD( $out() );
}
else {
  if ( $x() >= 1 ) {
    $out() = pow(1-$p(), $x()-1) * $p();
  }
  else {
    barf( "x >= 1 please" );
  }
}

  ',
  Doc      => '

=for ref

probability mass function for shifted geometric distribution. x >= 1.

=cut

  ',

);

pp_def('mle_lognormal',
  Pars      => 'a(n); float+ [o]m(); float+ [o]v()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(m) sa, a2;
    sa = 0; a2 = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += log($a());
    %}
    $m() = sa / N;
    loop (n) %{
      a2 += pow(log($a()) - $m(), 2);
    %}
    $v() = a2 / N;
  ',
  BadCode   => '
    $GENERIC(m) sa, a2;
    sa = 0; a2 = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
        sa += log($a());
        N ++;
      }
    %}
    if (N) {
      $m() = sa / N;
      loop (n) %{
        if ($ISGOOD( $a() )) {
          a2 += pow(log($a()) - $m(), 2);
        }
      %}
      $v() = a2 / N;
    }
    else {
      $SETBAD(m());
      $SETBAD(v());
    }
    
  ',
  Doc      => '

=for usage

    my ($m, $v) = $data->mle_lognormal();

=for ref

lognormal distribution. maximum likelihood estimation.

=cut

  ',

);

pp_def('mme_lognormal',
  Pars      => 'a(n); float+ [o]m(); float+ [o]v()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(m) sa, a2;
    sa = 0; a2 = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
      a2 += pow($a(), 2);
    %}
    $m()  = 2 * log(sa / N) - 1/2 * log( a2 / N );
    $v() = log( a2 / N ) - 2 * log( sa / N );
  ',
  BadCode   => '
    $GENERIC(m) sa, a2;
    sa = 0; a2 = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
	a2 += pow($a(), 2);
        N ++;
      }
    %}
    if (N) {
      $m()  = 2 * log(sa / N) - 1/2 * log( a2 / N );
      $v() = log( a2 / N ) - 2 * log( sa / N );
    }
    else {
      $SETBAD(m());
      $SETBAD(v());
    }
  ',
  Doc      => '

=for usage

    my ($m, $v) = $data->mme_lognormal();

=for ref

lognormal distribution. method of moments estimation.

=cut

  ',

);

pp_def('pdf_lognormal',
  Pars      => 'x(); m(); v(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

  if ( $x() > 0 && $v() > 0 ) {
    $p() = 1 / ($x() * sqrt($v() * 2 * M_PI))
	 * exp( -1 * pow(log($x()) - $m(), 2) / (2*$v()) );
  }
  else {
    barf( "x and v > 0 please" );
  }

  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($m()) || $ISBAD($v()) ) {
  $SETBAD( $p() );
}
else {
  if ( $x() > 0 && $v() > 0 ) {
    $p() = 1 / ($x() * sqrt($v() * 2 * M_PI))
	 * exp( -1 * pow(log($x()) - $m(), 2) / (2*$v()) );
  }
  else {
    barf( "x and v > 0 please" );
  }
}

  ',
  Doc      => '

=for ref

probability density function for lognormal distribution. x > 0. v > 0.

=cut

  ',

);


pp_def('mme_nbd',
  Pars      => 'a(n); float+ [o]r(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(p) sa, a2, m, v;
    sa = 0; a2 = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
      a2 += pow($a(), 2);
    %}
    m = sa / N;
    v = a2 / N - pow(m, 2);
    $r() = pow(m, 2) / (v - m);
    $p() = m / v; 
  ',
  BadCode   => '
    $GENERIC(p) sa, a2, m, v;
    sa = 0; a2 = 0;
    long N = 0;
    loop (n) %{
      if ($ISGOOD( $a() )) {
	sa += $a();
	a2 += pow($a(), 2);
        N ++;
      }
    %}
    if (N) {
      m = sa / N;
      v = a2 / N - pow(m, 2);
      $r() = pow(m, 2) / (v - m);
      $p() = m / v; 
    }
    else {
      $SETBAD(r());
      $SETBAD(p());
    }
  ',
  Doc      => '

=for usage

    my ($r, $p) = $data->mme_nbd();

=for ref

negative binomial distribution. pmf: f(x; r,p) = (x+r-1  r-1) p^r (1-p)^x for x=0,1,2...

=cut

  ',

);

pp_def('pmf_nbd',
  Pars      => 'ushort x(); r(); p(); float+ [o]out()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

  $GENERIC(out) nbc
    = gsl_sf_gamma($x()+$r()) / (gsl_sf_fact($x()) * gsl_sf_gamma($r()));
  $out() = nbc * pow($p(),$r()) * pow(1-$p(), $x());

  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($r()) || $ISBAD($p()) ) {
  $SETBAD( $out() );
}
else {
  $GENERIC(out) nbc
    = gsl_sf_gamma($x()+$r()) / (gsl_sf_fact($x()) * gsl_sf_gamma($r()));
  $out() = nbc * pow($p(),$r()) * pow(1-$p(), $x());
}

  ',
  Doc      => '

=for ref

probability mass function for negative binomial distribution.

=cut

  ',

);


pp_def('mme_pareto',
  Pars      => 'a(n); float+ [o]k(); float+ [o]xm()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(xm) sa, min;
    sa = 0; min = $a(n=>0);
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
      if (min > $a())
        min = $a();
    %}
    if (min > 0) {
      $k()  = (sa - min) / ( N*( sa/N - min ) );
      $xm() = (N * $k() - 1) * min / ( N * $k() );
    }
    else {
      barf("min <= 0!");
    }
  ',
  BadCode   => '
    $GENERIC(xm) sa, min;
    sa = 0; min = $a(n=>0);
    long N = 0;
    loop (n) %{
      if ( $ISGOOD($a()) ) {
        sa += $a();
	if (min > $a())
	  min = $a();
        N ++;
      }
    %}
    if (min > 0) {
      $k()  = (sa - min) / ( N*( sa/N - min ) );
      $xm() = (N * $k() - 1) * min / ( N * $k() );
    }
    else {
      barf("min <= 0!");
    }
  ',
  Doc      => '

=for usage

    my ($k, $xm) = $data->mme_pareto();

=for ref

pareto distribution. pdf: f(x; k,xm) = k xm^k / x^(k+1) for x >= xm > 0.

=cut

  ',

);

pp_def('pdf_pareto',
  Pars      => 'x(); k(); xm(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '

  if ( $xm() > 0 && $x() >= $xm() ) {
    $p() = $k() * pow($xm(),$k()) / pow($x(), $k()+1);
  }
  else {
    barf("x >= xm > 0 please");
  }

  ',
  BadCode   => '

if ( $ISBAD($x()) || $ISBAD($k()) || $ISBAD($xm()) ) {
  $SETBAD( $p() );
}
else {
  if ( $xm() > 0 && $x() >= $xm() ) {
    $p() = $k() * pow($xm(),$k()) / pow($x(), $k()+1);
  }
  else {
    barf("x >= xm > 0 please");
  }
}

  ',
  Doc      => '

=for ref

probability density function for pareto distribution. x >= xm > 0.

=cut

  ',

);

pp_def('mle_poisson',
  Pars      => 'a(n); float+ [o]l();',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => '
    $GENERIC(l) sa;
    sa = 0;
    long N = $SIZE(n);
    loop (n) %{
      sa += $a();
    %}
    $l()  = sa / N;
  ',
  BadCode   => '
    $GENERIC(l) sa;
    sa = 0;
    long N = 0;
    loop (n) %{
      if ( $ISGOOD($a()) ) {
        sa += $a();
        N ++;
      }
    %}
    if (N) { $l()  = sa / N; }
    else   { $SETBAD(l()); }
  ',
  Doc      => '

=for usage

    my $lamda = $data->mle_poisson();

=for ref

poisson distribution. pmf: f(x;l) = e^(-l) * l^x / x!

=cut

  ',

);

pp_def('pmf_poisson',
  Pars      => 'x(); l(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => q{

  if ($x() < 0) {
    $p() = 0;
  }
  else if ($x() < GSL_SF_FACT_NMAX / 2) {
    /* Exact formula */
    $p() = exp( -1 * $l()) * pow($l(),$x()) / gsl_sf_fact( (unsigned int) $x() );
  }
  else {
    /* Use Stirling's approximation. See
     * http://en.wikipedia.org/wiki/Stirling%27s_approximation
     */
    double log_p = $x() - $l() + $x() * log($l() / $x())
      - 0.5 * log(2*M_PI * $x()) - 1. / 12. / $x()
      + 1 / 360. / $x()/$x()/$x() - 1. / 1260. / $x()/$x()/$x()/$x()/$x();
    $p() = exp(log_p);
  }

  },
  BadCode   => q{

  if ( $ISBAD($x()) || $ISBAD($l()) ) {
    $SETBAD( $p() );
  }
  else {
    if ($x() < 0) {
      $p() = 0;
    }
    else if ($x() < GSL_SF_FACT_NMAX / 2) {
      /* Exact formula */
      $p() = exp( -1 * $l()) * pow($l(),$x()) / gsl_sf_fact( (unsigned int) $x() );
    }
    else {
      /* Use Stirling's approximation. See
       * http://en.wikipedia.org/wiki/Stirling%27s_approximation
       */
      double log_p = $x() - $l() + $x() * log($l() / $x())
        - 0.5 * log(2*M_PI * $x()) - 1. / 12. / $x()
        + 1 / 360. / $x()/$x()/$x() - 1. / 1260. / $x()/$x()/$x()/$x()/$x();
      $p() = exp(log_p);
    }
  }

  },
  Doc      => q{

=for ref

Probability mass function for poisson distribution. Uses Stirling's formula for x > 85.

=cut

  },

);

pp_def('pmf_poisson_stirling',
  Pars      => 'x(); l(); [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => q{

  if ($x() < 0) {
    $p() = 0;
  }
  else if ($x() == 0) {
    $p() = exp(-$l());
  }
  else {
    /* Use Stirling's approximation. See
     * http://en.wikipedia.org/wiki/Stirling%27s_approximation
     */
    double log_p = $x() - $l() + $x() * log($l() / $x())
      - 0.5 * log(2*M_PI * $x()) - 1. / 12. / $x()
      + 1 / 360. / $x()/$x()/$x() - 1. / 1260. / $x()/$x()/$x()/$x()/$x();
    $p() = exp(log_p);
  }

  },
  BadCode   => q{

  if ( $ISBAD($x()) || $ISBAD($l()) ) {
    $SETBAD( $p() );
  }
  else if ($x() < 0) {
    $p() = 0;
  }
  else if ($x() == 0) {
    $p() = exp(-$l());
  }
  else {
    /* Use Stirling's approximation. See
     * http://en.wikipedia.org/wiki/Stirling%27s_approximation
     */
    double log_p = $x() - $l() + $x() * log($l() / $x())
      - 0.5 * log(2*M_PI * $x()) - 1. / 12. / $x()
      + 1 / 360. / $x()/$x()/$x() - 1. / 1260. / $x()/$x()/$x()/$x()/$x();
    $p() = exp(log_p);
  }

  },
  Doc      => q{

=for ref

Probability mass function for poisson distribution. Uses Stirling's formula for all values of the input. See http://en.wikipedia.org/wiki/Stirling's_approximation for more info.

=cut

  },

);



pp_addpm(<<'EOD');

=head2 pmf_poisson_factorial

=for sig

  Signature: ushort x(); l(); float+ [o]p()

=for ref

Probability mass function for poisson distribution. Input is limited to x < 170 to avoid gsl_sf_fact() overflow.

=cut

*pmf_poisson_factorial = \&PDL::pmf_poisson_factorial;
sub PDL::pmf_poisson_factorial {
	my ($x, $l) = @_;

	my $pdlx = pdl($x);
	if (any( $pdlx >= 170 )) {
		croak "Does not support input greater than 170. Please use pmf_poisson or pmf_poisson_stirling instead.";
	} else {
		return _pmf_poisson_factorial(@_);
	}
}

EOD

pp_def('_pmf_poisson_factorial',
  Pars      => 'ushort x(); l(); float+ [o]p()',
  GenericTypes => [F,D],
  HandleBad => 1,
  Code      => q{

  if ($x() < GSL_SF_FACT_NMAX) {
    $p() = exp( -1 * $l()) * pow($l(),$x()) / gsl_sf_fact( $x() );
  }
  else {
    /* bail out */
    $p() = 0;
  }

  },
  BadCode   => q{

  if ( $ISBAD($x()) || $ISBAD($l()) ) {
    $SETBAD( $p() );
  }
  else {
    if ($x() < GSL_SF_FACT_NMAX) {
      $p() = exp( -1 * $l()) * pow($l(),$x()) / gsl_sf_fact( $x() );
    }
    else {
      $p() = 0;
    }
  }

  },
  Doc      => undef,

);

pp_addpm({At=>'Bot'}, <<'EOD');

=head2 plot_distr

=for ref

Plots data distribution. When given specific distribution(s) to fit, returns % ref to sum log likelihood and parameter values under fitted distribution(s). See FUNCTIONS above for available distributions. 

=for options

Default options (case insensitive):

    MAXBN => 20, 
      # see PDL::Graphics::PGPLOT::Window for next options
    WIN   => undef,   # pgwin object. not closed here if passed
                      # allows comparing multiple distr in same plot
                      # set env before passing WIN
    DEV   => '/xs' ,  # open and close dev for plotting if no WIN
                      # defaults to '/png' in Windows
    COLOR => 1,       # color for data distr

=for usage

Usage:

      # yes it threads :)
    my $data = grandom( 500, 3 )->abs;
      # ll on plot is sum across 3 data curves
    my ($ll, $pars)
      = $data->plot_distr( 'gaussian', 'lognormal', {DEV=>'/png'} );

      # pars are from normalized data (ie data / bin_size)
    print "$_\t@{$pars->{$_}}\n" for (sort keys %$pars);
    print "$_\t$ll->{$_}\n" for (sort keys %$ll);

=cut

*plot_distr = \&PDL::plot_distr;
sub PDL::plot_distr {
  if (!$PGPLOT) {
    carp "No PDL::Graphics::PGPLOT, no plot :(";
    return;
  }
  my ($self, @distr) = @_;

  my %opt = (
    MAXBN => 20, 
    WIN   => undef,     # pgwin object. not closed here if passed
    DEV   => $DEV,      # open and close default win if no WIN
    COLOR => 1,         # color for data distr
  );
  my $opt = pop @distr
    if ref $distr[-1] eq 'HASH';
  $opt and $opt{uc $_} = $opt->{$_} for (keys %$opt);

  $self = $self->squeeze;

    # use int range, step etc for int xvals--pmf compatible
  my $INT = 1
    if grep { /(?:binomial)|(?:geo)|(?:nbd)|(?:poisson)/ } @distr;

  my ($range, $step, $step_int);
  $range = $self->max - $self->min;
  $step  = $range / $opt{MAXBN};
  $step_int = ($range <= $opt{MAXBN})? 1 
            :                          PDL::ceil( $range / $opt{MAXBN} )
            ;
    # use min to make it pure scalar for sequence()
  $opt{MAXBN} = PDL::ceil( $range / $step )->min;

  my $hist = $self->double->histogram($step, $self->min, $opt{MAXBN});
    # turn fre into prob
  $hist /= $self->dim(0);

  my $xvals = $self->min + sequence( $opt{MAXBN} ) * $step;
  my $xvals_int
    = PDL::ceil($self->min) + sequence( $opt{MAXBN} ) * $step_int;
  $xvals_int = $xvals_int->where( $xvals_int <= $xvals->max )->sever;

  my $win = $opt{WIN};
  if (!$win) {
    $win = pgwin( Dev=>$opt{DEV} );
    $win->env($xvals->minmax,0,1, {XTitle=>'xvals', YTitle=>'probability'});
  }

  $win->line( $xvals, $hist, { COLOR=>$opt{COLOR} } );

  if (!@distr) {
    $win->close
      unless defined $opt{WIN};
    return;
  }

  my (%ll, %pars, @text, $c);
  $c = $opt{COLOR};        # fitted lines start from ++$c
  for my $distr ( @distr ) {
      # find mle_ or mme_$distr;
    my @funcs = grep { /_$distr$/ } (keys %PDL::Stats::Distr::);
    if (!@funcs) {
      carp "Do not recognize $distr distribution!";
      next;
    }
      # might have mle and mme for a distr. sort so mle comes first 
    @funcs = sort @funcs;
    my ($f_para, $f_prob) = @funcs[0, -1];

    my $nrmd = $self / $step;
    eval {
      my @paras = $nrmd->$f_para();
      $pars{$distr} = \@paras;
  
      @paras = map { $_->dummy(0) } @paras;
      $ll{$distr} = $nrmd->$f_prob( @paras )->log->sumover;
      push @text, sprintf "$distr  LL = %.2f", $ll{$distr}->sum;

      if ($f_prob =~ /^pdf/) { 
        $win->line( $xvals, ($xvals/$step)->$f_prob(@paras), {COLOR=>++$c} );
      }
      else {
        $win->points( $xvals_int, ($xvals_int/$step_int)->$f_prob(@paras), {COLOR=>++$c} );
      }
    };
    carp $@ if $@;
  }
  $win->legend(\@text, ($xvals->min + $xvals->max)/2, .95,
               {COLOR=>[$opt{COLOR}+1 .. $c], TextFraction=>.75} );
  $win->close
    unless defined $opt{WIN};
  return (\%ll, \%pars);
}

=head1 DEPENDENCIES

GSL - GNU Scientific Library

=head1 SEE ALSO

PDL::Graphics::PGPLOT

PDL::GSL::CDF

=head1 AUTHOR

Copyright (C) 2009 Maggie J. Xiong <maggiexyz users.sourceforge.net>, David Mertens

All rights reserved. There is no warranty. You are allowed to redistribute this software / documentation as described in the file COPYING in the PDL distribution.

=cut

EOD

pp_done();