The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# BioPerl module for Bio::Community::Tools::Accumulator
#
# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Copyright 2011-2014 Florent Angly <florent.angly@gmail.com>
#
# You may distribute this module under the same terms as perl itself


=head1 NAME

Bio::Community::Tools::Accumulator - Species accumulation curves

=head1 SYNOPSIS

  use Bio::Community::Tools::Accumulator;

  # A collector curve 
  my $collector = Bio::Community::Tools::Accumulator->new(
     -metacommunity => $meta,
     -type          => 'collector',
  );
  my $numbers = $collector->get_numbers;
  # or
  my $strings = $collector->get_strings;

  # A rarefaction curve, with custom parameters
  my $rarefaction = Bio::Community::Tools::Accumulator->new(
     -metacommunity   => $meta,
     -type            => 'rarefaction',
     -num_repetitions => 100,
     -num_ticks       => 8,
     -tick_spacing    => 'linear', 
     -alpha_types     => ['simpson', 'shannon'],
  );
  my $numbers = $rarefaction->get_numbers;

=head1 DESCRIPTION

This module takes a metacommunity and produces one of two types of species
accumulation curves: a rarefaction curve or a collector curve.

In a rarefaction curve, an increasing number of randomly drawn members is
sampled from the given communities and alpha diversity is calculated. In a
collector curve, an increasing number of communities is randomly drawn and
combined and their cumulative alpha diversity is determined. 

The average alpha diversity for the different sampling sizes is reported either
as array references or a tab-delimited string. Note that no plot is actually
drawn.

=head1 AUTHOR

Florent Angly L<florent.angly@gmail.com>

=head1 SUPPORT AND BUGS

User feedback is an integral part of the evolution of this and other Bioperl
modules. Please direct usage questions or support issues to the mailing list, 
L<bioperl-l@bioperl.org>, rather than to the module maintainer directly. Many
experienced and reponsive experts will be able look at the problem and quickly 
address it. Please include a thorough description of the problem with code and
data examples if at all possible.

If you have found a bug, please report it on the BioPerl bug tracking system
to help us keep track the bugs and their resolution:
L<https://redmine.open-bio.org/projects/bioperl/>

=head1 COPYRIGHT

Copyright 2011-2014 by Florent Angly <florent.angly@gmail.com>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=head2 new

 Function: Create a new Bio::Community::Tool::Accumulator object
 Usage   : my $accumulator = Bio::Community::Tool::Accumulator->new( );
 Args    : -metacommunity: see metacommunity()
           -type            : 'rarefaction' or 'collector'
           -num_repetitions : see num_repetitions()
           -num_ticks       : see num_ticks()
           -tick_spacing    : see tick_spacing()
           -alpha_types     : see alpha_types()
           -seed            : see set_seed()
 Returns : a new Bio::Community::Tools::Accumulator object

=cut


package Bio::Community::Tools::Accumulator;

use Moose;
use MooseX::NonMoose;
use MooseX::StrictConstructor;
use namespace::autoclean;
use Bio::Community::Alpha;
use Bio::Community::Meta;
use Bio::Community::Tools::Rarefier;
use List::Util qw( shuffle );
use Method::Signatures;

extends 'Bio::Root::Root';


=head2 metacommunity

 Function: Get or set the metacommunity to normalize.
 Usage   : my $meta = $accumulator->metacommunity;
 Args    : A Bio::Community::Meta object
 Returns : A Bio::Community::Meta object

=cut

has metacommunity => (
   is => 'rw',
   isa => 'Maybe[Bio::Community::Meta]',
   required => 0,
   default => undef,
   lazy => 1,
   init_arg => '-metacommunity',
);


=head2 type

 Function: Get or set the type of accumulation curve to produce.
 Usage   : my $type = $accumulator->type;
 Args    : String of the accumulation type: 'rarefaction' (default) or 'collector'
 Returns : String of the accumulation type

=cut

has type => (
   is => 'rw',
   isa => 'AccumulationType',
   required => 0,
   default => 'rarefaction',
   lazy => 1,
   init_arg => '-type',
);


=head2 num_repetitions

 Function: Get or set the number of num_repetitions to do at each sampling depth.
 Usage   : my $num_repetitions = $accumulator->num_repetitions;
 Args    : positive integer for the number of repetitions
 Returns : positive integer for the number of repetitions

=cut

has num_repetitions => (
   is => 'rw',
   isa => 'Maybe[PositiveInt | Str]',
   required => 0, 
   default => 10,
   lazy => 1,
   init_arg => '-num_repetitions',
);


=head2 num_ticks

 Function: For rarefaction curves, get or set how many different numbers of
           individuals to sample, for the smallest community. This number may
           not always be honored because ticks have to be integer numbers.
 Usage   : my $num_ticks = $accumulator->num_ticks;
 Args    : positive integer for the number of ticks (default: 10)
 Returns : positive integer for the number of ticks

=cut

has num_ticks => (
   is => 'rw',
   isa => 'NumTicks',
   required => 0, 
   default => 12,
   lazy => 1,
   init_arg => '-num_ticks',
);


=head2 tick_spacing

 Function: Get or set the type of spacing between the ticks of a rarefaction
           curve.
 Usage   : my $tick_spacing = $accumulator->tick_spacing;
 Args    : String, either 'logarithmic' (default) or 'linear'
 Returns : String

=cut

has tick_spacing => (
   is => 'rw',
   isa => 'SpacingType',
   required => 0, 
   default => 'logarithmic',
   lazy => 1,
   init_arg => '-tick_spacing',
);


=head2 alpha_types

 Function: Get or set the type of alpha diversity to calculate.
 Usage   : my $alphas = $accumulator->alpha_types;
 Args    : Arrayref of alpha diversity types (['observed'] by default).
           See C<type()> in L<Bio::Community::Alpha> for details.
 Returns : Arrayref of alpha diversity types.

=cut

has alpha_types => (
   is => 'rw',
   isa => 'ArrayRef[AlphaType]',
   required => 0, 
   default => sub { ['observed'] },
   lazy => 1,
   init_arg => '-alpha_types',
);


=head2 verbose

 Function: Get or set verbose mode. This displays the number of ticks to use
           before the accumulation curve itself is computed.
 Usage   : $accumulator->verbose(1);
 Args    : 0 (default) or 1
 Returns : 0 or 1

=cut

has verbose => (
   is => 'rw',
   isa => 'Bool',
   required => 0, 
   default => 0,
   lazy => 1,
   init_arg => '-verbose',
);


=head2 get_numbers

 Function: Calculate the accumulation curve and return the numbers.
 Usage   : my $nums = $accumulator->get_numbers;
 Args    : none
 Returns : A structure containing the average alpha diversity of the communities
           for each tick value, for each requested alpha type:
              { alpha_type => [ [tick, alpha1, alpha2, ... ], ... ], ... }

=cut

method get_numbers {

   # Sanity check
   my $meta = $self->metacommunity;
   if ( (not $meta) || ($meta->get_communities_count == 0) ) {
      $self->throw('Should have a metacommunity containing at least one community');
   }

   my $ticks = $self->_get_ticks(); # Determine range of sample sizes
   print "Using ticks: ".join(' ', @$ticks)."\n" if $self->verbose;

   my $rarefier;
   my $comm_names = [map { $_->name } @{$meta->get_all_communities}];
   if ($self->type eq 'rarefaction') {
      # Rarefaction curve
      $rarefier = Bio::Community::Tools::Rarefier->new(
         -metacommunity   => $meta,
         -num_repetitions => 1,
         -drop            => 1,
         -verbose         => 0,
      );
   } else {
      # Collector curve
      $comm_names = ['collector'];
   }

   my $avg_alphas;
   for my $tick (@$ticks) {

      for my $rep (1 .. $self->num_repetitions) {

         # Rarefy or collect communities
         my $acc_meta;
         if ($rarefier) {
            $rarefier->sample_size($tick);
            $acc_meta = $rarefier->get_repr_meta;
         } else {
            $acc_meta = $self->_collect_comms($tick);
         }

         for my $alpha (@{$self->alpha_types}) {

            # Calculate the alpha diversity of the communities
            for my $i (0 .. $#$comm_names) {
               my $acc_comm = $rarefier ?
                          $acc_meta->get_community_by_name($comm_names->[$i]) :
                          $acc_meta->get_metacommunity;

               my $val = Bio::Community::Alpha->new(
                              -community => $acc_comm,
                              -type      => $alpha,
                           )->get_alpha 
                           if $acc_comm;

               if (defined $val) {
                  $avg_alphas->{$alpha}->{$tick}->[$i] += $val;
               } else {
                  $avg_alphas->{$alpha}->{$tick}->[$i] = undef;
               }

            }

         }

      }

   }

   # Divide results to obtain averages and format in a friendly way
   my $res;
   for my $alpha (@{$self->alpha_types}) {
      for my $tick (@$ticks) {
         my @vals = map { defined($_) ? $_ / $self->num_repetitions : '' } @{$avg_alphas->{$alpha}->{$tick}};
         push @{$res->{$alpha}}, [$tick, @vals];
      }
   }

   return $res;
}


=head2 get_strings

 Function: Calculate the accumulation curves and return them as strings.
 Usage   : my $strings = $accumulator->get_strings;
 Args    : none
 Returns : A arrayref of strings, each of which represents the accumulation
           curve for a given alpha diversity type.

=cut

method get_strings {
   my $comm_names = [map {$_->name} @{$self->metacommunity->get_all_communities}];
   my $header = join("\t", '', @$comm_names)."\n";
   my $nums = $self->get_numbers;
   my @res;
   for my $alpha (@{$self->alpha_types}) {
      my $str = '# '.$alpha.$header;
      for my $row (@{$nums->{$alpha}}) {
         $str .= join("\t", @$row)."\n";
      }
      push @res, $str;
   }
   return \@res;
}


method _collect_comms ($num) {
   # Create a community that is the combination of a random subset of the
   # communities in the given metacommunity
   my @rand_comms = shuffle @{$self->metacommunity->get_all_communities};
   @rand_comms = @rand_comms[0..$num-1];
   my $meta = Bio::Community::Meta->new( -communities => \@rand_comms );   
   return $meta;
}


method _get_ticks {
   # Calculate which number of members or communities to sample from for the
   # accumulation curve.

   # Sanity check
   my $meta = $self->metacommunity;
   if ( (not $meta) || ($meta->get_communities_count == 0) ) {
      $self->throw('Should have a metacommunity containing at least one community');
   }

   my @ticks;
   if ($self->type eq 'collector') {
      # Ticks for a collector curve, i.e. number of communities
      @ticks = 0 .. $meta->get_communities_count;

   } else {
      # Ticks for a rarefaction curve, i.e. number of individuals
      my $comms = $meta->get_all_communities;

      my $counts = [ map {$_->get_members_count} @$comms ];
      my $sort_order = [ sort { $counts->[$a] <=> $counts->[$b] } 0..$#$comms ];
      my $min_count = $counts->[0];
      my $max_count = $counts->[1];

      @ticks = (0);
      my $num_ticks = $self->num_ticks - 1;
      my $linear_spacing = $self->tick_spacing eq 'linear';

      my $param = $linear_spacing ?
                  ($min_count-1) / ($num_ticks-1) :
                  ($min_count-1) / (exp($num_ticks-1)-1);

      my $tick_num = -1;
      for my $i (@$sort_order) {
         my $count = $counts->[$i];
         $count = int( $count + 0.5 ); # round
         my $tick;
         while (1) {
            $tick_num++;
            $tick = $linear_spacing ?
                    1 + $tick_num * $param :
                    $param*(exp($tick_num)-1)+1;
            $tick = int( $tick + 0.5 ); # round
            if ($tick < $count) {
               push @ticks, $tick  if $tick  != $ticks[-1]; # avoid dups
            } else {
               push @ticks, $count if $count != $ticks[-1]; # avoid dups
               $tick_num-- if $tick > $count;
               last;
            }
         }
      }

   }

   return \@ticks;
}

__PACKAGE__->meta->make_immutable;

1;