The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# BioPerl module for Bio::Community::Tools::ShrapnelCleaner
#
# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Copyright 2011-2014 Florent Angly <florent.angly@gmail.com>
#
# You may distribute this module under the same terms as perl itself


=head1 NAME

Bio::Community::Tools::ShrapnelCleaner - Remove low-count, low-abundance community members

=head1 SYNOPSIS

  use Bio::Community::Tools::ShrapnelCleaner;

  # Remove singletons the communities in the given metacommunity
  my $cleaner = Bio::Community::Tools::ShrapnelCleaner->new(
     -metacommunity => $meta,
  );
  $cleaner->clean;

=head1 DESCRIPTION

This module takes biological communities (contained in a metacommunity) and
removes shrapnel, low abundance, low prevalence members that are likely to be
the result of sequencing errors (when doing sequence-based analyses). By default,
the cleaner removes only singletons, i.e. community members that appear in only
one community (prevalence of 1) and have only 1 count. You can specify your own
count and prevalence thresholds though.

=head1 AUTHOR

Florent Angly L<florent.angly@gmail.com>

=head1 SUPPORT AND BUGS

User feedback is an integral part of the evolution of this and other Bioperl
modules. Please direct usage questions or support issues to the mailing list, 
L<bioperl-l@bioperl.org>, rather than to the module maintainer directly. Many
experienced and reponsive experts will be able look at the problem and quickly 
address it. Please include a thorough description of the problem with code and
data examples if at all possible.

If you have found a bug, please report it on the BioPerl bug tracking system
to help us keep track the bugs and their resolution:
L<https://redmine.open-bio.org/projects/bioperl/>

=head1 COPYRIGHT

Copyright 2011-2014 by Florent Angly <florent.angly@gmail.com>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.10.1 or,
at your option, any later version of Perl 5 you may have available.

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=head2 new

 Function: Create a new Bio::Community::Tool::ShrapnelCleaner object
 Usage   : my $cleaner = Bio::Community::Tool::ShrapnelCleaner->new( );
 Args    : -metacommunity       : See metacommunity().
           -count_threshold     : See count_threshold().
           -prevalence_threshold: See prevalence_threshold().
 Returns : a new Bio::Community::Tools::ShrapnelCleaner object

=cut


package Bio::Community::Tools::ShrapnelCleaner;

use Moose;
use MooseX::NonMoose;
use MooseX::StrictConstructor;
use Method::Signatures;
use namespace::autoclean;

extends 'Bio::Root::Root';


=head2 metacommunity

 Function: Get or set the communities to process.
 Usage   : my $communities = $cleaner->communities;
 Args    : A Bio::Community::Meta object
 Returns : A Bio::Community::Meta object

=cut

has metacommunity => (
   is => 'rw',
   isa => 'Maybe[Bio::Community::Meta]',
   required => 0,
   default => undef,
   lazy => 1,
   init_arg => '-metacommunity',
);


=head2 count_threshold

 Function: Get or set the count threshold. Community members with a count equal
           or lower than this threshold are removed (provided they also meet the
           prevalence_threshold).
 Usage   : my $count_thresh = $cleaner->count_threshold;
 Args    : positive integer for the count
 Returns : positive integer for the count

=cut

has count_threshold => (
   is => 'rw',
   isa => 'Maybe[PositiveNum]',
   required => 0, 
   default => 1,
   lazy => 1,
   init_arg => '-count_threshold',
);


=head2 prevalence_threshold

 Function: Get or set the prevalence threshold. Community members with a
           prevalence (number of communities that the member is found in) equal
           or lower than this threshold are removed (provided they also meet the
           count_threshold).
 Usage   : my $prevalence_thresh = $cleaner->prevalence_threshold;
 Args    : positive integer for the prevalence
 Returns : positive integer for the prevalence

=cut

has prevalence_threshold => (
   is => 'rw',
   isa => 'Maybe[PositiveNum]',
   required => 0, 
   default => 1,
   lazy => 1,
   init_arg => '-prevalence_threshold',
);


=head2 clean

 Function: Remove singletons from the communities and return the updated
           metacommunity.
 Usage   : my $meta = $cleaner->clean;
 Args    : none
 Returns : arrayref of Bio::Community objects

=cut

method clean () {
   # Sanity check
   my $meta = $self->metacommunity;
   if ( (not $meta) || ($meta->get_communities_count == 0) ) {
      $self->throw('Should have a metacommunity containing at least one community');
   }

   # Remove singletons
   my $count_thres = $self->count_threshold;
   my $prevalence_thres = $self->prevalence_threshold;
   for my $member ( @{$meta->get_all_members} ) {
      my $total_count = 0; # sum of member counts in all communities
      my $prevalence  = 0; # in how many communities was the member seen
      while (my $community = $meta->next_community) {
         my $count = $community->get_count($member);
         if ($count > 0) {
            $prevalence++;
            $total_count += $count;
         }
      }
      if ( ($total_count <= $count_thres) && ($prevalence <= $prevalence_thres) ) {
         # Remove all of this member
         while (my $community = $meta->next_community) {
            $community->remove_member($member);
         }
      }
   }

   return $meta;
}


__PACKAGE__->meta->make_immutable;

1;