The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
use strict;

use InSilicoSpectro;
use InSilicoSpectro::InSilico::MassCalculator;


package InSilicoSpectro::Spectra::Filter::MSFilterAlgorithm;


use InSilicoSpectro::Spectra::MSSpectra;
#use InSilicoSpectro::Spectra::MSMSSpectra;
#use InSilicoSpectro::Spectra::MSRun;

require Exporter;
our (@ISA,@EXPORT,@EXPORT_OK, $VERSION);
@ISA=qw (Exporter InSilicoSpectro::Spectra::Filter::MSFilter);
@EXPORT=qw($VERSION);
@EXPORT_OK=qw();


=head1 NAME

InSilicoSpectro::Spectra::Filter::MSFilterAlgorithm

=head1 SYNOPSIS

my $file= "a.mgf";
my $format="mgf";
my $sp=InSilicoSpectro::Spectra::MSSpectra->new(source=>$file, format=>$format);
$sp->open();

my $sf = new InSilicoSpectro::Spectra::Filter::MSFilter();
$sf->readXml('a.xml');
$sf->filterSpectra($sp);

$sp->write('mgf', "another.mgf");

=head1 DESCRIPTION

This class allows for quality assasement and filtering of spectra or peaks using algorithms mainly presented in the following paper:

M. Bern et al.
Vol. 20 Suppl. 1 2004, pages i49-i54
Bioinformatics

=head1 FUNCTIONS

=item compare_float($float_A, $comparator, $float_B, $precision)

This function is used to corretctly compare two floating-point values using a certain precision and the comparator (<, >, <=, ..). Makes it slower but it's still faster than using Math::FixedPrecision.

=head1 METHODS

=over 4

=item my $sf=InSilicoSpectro::Spectra::Filter::MSFilterAlogrithm->new()

create a new object.

=item $sf->readXml($filename)

opens the provided XML-file containing the information which filter to use and its parameters. 

=item $sf->readTwigEl($el)

reads the TwigEl passed by $el and executes twig_addSpectrumFilter() with this values.

=item $sf->filterSpectra($Spectra)

apply the XML-file previously loaded on the Spectra (can be MSRun, MS, MSMS, or MSCmpd-Spectra).

=item $sf->applyAction();

applies the action on the currentSpectra under the threshold conditions.

=item $sf->computeFilterValue($sp);

only prepared for further classes inheriting from this one.

=item $sf->checkValidity();

checks if the different values provided by the xml are valid.

=item $sf->smartPeaks();

puts a score for each peak into filterValue. The score is higher for peaks with high intensities but gets lower if they're in a region with high-intensity peaks and or a lot of peaks. A high weightIntensity-value puts more weight into regions of high peaks. A high weightDensity-value puts  more weight into regions of high peak density.

=item $sf->selectPeakWindow();

divides the whole a moz-adefined number of bands and makes a normalization of the intensities in each of those bands. 


=item $sf->fragment([$val]);

gets the values (moz, intensity) from the fragments of the currentSpectra (has to be an MSCmpd). The results are stored into filterValue. Which values should be extracted can be selected by $val, otherwise the next name in the filterName is used.

=item $sf->precursor([$val]);

gets the values (moz, intensity) from the precursors of the currentSpectra (has to be an MSMSSpectra). The The results are stored into filterValue. Which values should be extracted can be selected by $val, otherwise the next name in the filterName is used.

=item $sf->intensity();

calls fragment("intensity"). Allows you just to write intensity as a filter-name instead.

=item $sf->moz();

calls fragment("moz"). Allows you just to write moz as a filter-name instead.

=item $sf->normRank();

calculates the rank from the intensity of the fragments from the smallest first and biggest last (1, 3, 3, 4, 5, ..) and divides it by the total number of peaks.

=item $sf->normRankBern();

makes the normalisation of the int-peaks as described in the paper from M. Bern et al. Because we can make a selection of peaks to be tested before, it doesnt make a lot of sense in this context.

=item $sf->banishNeighbors();

takes off the neighbouring peaks close to the selected ones (change number to select by selectStrongest). The region where the peaks should be taken off can be adjusted by banishRange

=item $sf->currentSpectra([$sp]);

set or get a current spectra.

=item $sf->thresholdValue([$val]);

set or get the threshold value.

=item $sf->filterValue([$val]);

set or get the filter value.

=head1 EXAMPLES

see the MSFilterAlgorithm.t for an example.

=head1 SEE ALSO

search.cpan.org

=head1 COPYRIGHT

Copyright (C) 2004-2005  Geneva Bioinformatics www.genebio.com

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

=head1 AUTHORS

Roman Mylonas, www.genebio.com

=cut



use File::Basename;
use Carp;
use List::Util qw(max min sum first);


$VERSION = "0.9";

###############################



sub new{
  
 my $pkg = shift;

 my $self = {};
 my $class = ref($pkg) || $pkg;
 bless $self, $class;
 return $self;
}


##############################
### read xml 

#get the parameters used by this algorithm

sub readXmlFilterType{
  my ($this)= @_;


   my $el= $this->currentTwigEl();


  my %param;
  
  foreach($el->get_xpath('*')){
    $param{$_->atts->{name}}= $_->text;
  }
  
  my $filter_name=  $param{''};
  delete $param{''};
  
  my @names= split(/\./, $filter_name);

  #if we need to come back to the original
  $this->originalFilterName(\@names);

  $this->param(\%param);


}


#################################################
### computeFilterValue and the methods it uses

#to set the precision of float-values after comma used by method compare_float
my $precision = 5;

############################################################  

#uses parameter weightIntensity to put more weight into regions near very high peaks
#parameter weightDensity to put more weight into regions with a lot of peaks

sub smartPeaks{
  my $this= shift;
  require Statistics::Basic::Mean;
  require Statistics::Basic::StdDev;

  my @moz;
  my @int;

  #use this variable in case, there's no peak at all in the spectra
  my $peaks=0;

  foreach(@{$this->currentSpectra()->spectrum()}){
    #push @{$this->filterValue()}, $_->[$pdInd];
    push @int, $_->[$this->{fragPD}->{intensity}];
    push @moz, $_->[$this->{fragPD}->{moz}];
  }

  my $win_size= $this->param()->{winSize};
  my $step_size= $this->param()->{stepSize};
  my ($min_moz,$max_moz) = (min(@moz), max(@moz));
  my ($max_int) = (max(@int));
  my @mean_win_int;

  my $last=0;

  #go through the whole spectrum
  #for(my $i=$min_moz; &compare_float($i+$win_size, '<=', $max_moz+$step_size, $precision); $i+= $step_size){
  for(my $i=$min_moz; $i+$win_size <= $max_moz+$step_size; $i+= $step_size){
    my $found=0;
    my ($start_pos, $end_pos)= (undef, undef);

    #to prevent the window of going further than the last peak of the spectra
    if($i+$win_size > $max_moz){
      $i= $max_moz-$win_size;
      $last= 1;
    }

    #get the start and end-index of the peaks in the current-window
    #my @current_win= map {&compare_float($_, '>=', $i, $precision) and &compare_float($_, '<=', $i+$win_size, $precision)} @moz;
    my @current_win= map {($_ >=$i) and ($_<=$i+$win_size)} @moz;

    foreach(0..$#current_win){
      if(!defined $start_pos){
	$start_pos= $_ if($current_win[$_]);
	next;
      }elsif(!$current_win[$_]){
	$end_pos= $_-1;
	last;
      }
    }

    #to get the last position if this is a true value
    $end_pos= $#current_win unless defined $end_pos;

    #if there are no peaks in the current window
    next unless defined $start_pos;

    #calculate the mean of the current window
    my @actual_window=@int[$start_pos..$end_pos];
    my $sum= sum @actual_window;

    #weightDensity to put more weight into windows with a lot of peaks
    my $mean= $sum/ ((scalar @actual_window)**(1/$this->param()->{weightDensity}));

    #the mean intensity of this window is stored for all the peaks ($j) in the current window
    for(my $j=$start_pos; $j<=$end_pos; $j++){
	push @{$mean_win_int[$j]}, $mean;
    }

    #ok, there was a peak
    $peaks=1;

    last if $last;
  }

  #use $this->{copy} in MSFilter::applyAction for debugging..
  #my @cp = @int;
  #$this->{copy}= \@cp;

  #no peaks, so just leave this function without touching anything!
  return $this unless $peaks;

  #the mean of all the windows in which the peak $i was found
  my @peak;
  for(my $i=0; $i<=$#int; $i++){
    my @peak= @{$mean_win_int[$i]};

    my $mean = Statistics::Basic::Mean->new(\@peak)->query;
    #weightIntensity to put more weight onto peaks in a region of high peaks
    $int[$i]/= $mean**($this->param()->{weightIntensity});
  }

  #change the filterValue, so other methods can use this values
  $this->filterValue(\@int);


  #to have a value between 0..1 as a result
  # my $max= max (@{$this->filterValue()});
  #  for(my $i=0; $i<=$#{$this->filterValue()}; $i++){
  #    $this->filterValue()->[$i]/= $max;
  #  }

  if($this->{actionType} eq 'algorithm'){
    foreach(0..$#int){
      $this->currentSpectra()->spectra()->[$_]->[$this->{fragPD}->{intensity}]= $int[$_];
    }
  }


}


#########################

#calculates the balance of the peaks using relative std-dev

sub balance{
 my $this= shift;
  require Statistics::Basic::Mean;
  require Statistics::Basic::StdDev;


 if(ref($this->currentSpectra()) ne 'InSilicoSpectro::Spectra::MSMSCmpd'){

   #have to backup the currentSpectra
   my $backup_sp= $this->currentSpectra();
  
   my @tmp;
   foreach my $a_cmpnd (@{$this->currentSpectra()->spectrum()}) {
     $this->currentSpectra($a_cmpnd);

     #go recursively through all the cmpds and get the results of all of them
     push @tmp, $this->balance();
   }

   $this->currentSpectra($backup_sp);
   #the way to pass the obtained values
   $this->filterValue(\@tmp);


   # go through the cmpd
 } else {
   my @moz;
   my @int;
   
   foreach(@{$this->currentSpectra()->spectrum()}){
     push @int, $_->[$this->{fragPD}->{intensity}];
     push @moz, $_->[$this->{fragPD}->{moz}];
     
   }

   my $division= $this->param()->{bands};

   #get min/max moz values. if there's no value as low/high as chosen by the param, the lowest/highest are selected
   my ($min_moz,$max_moz);
   ($this->param()->{minMoz}) > (min(@moz))?($min_moz=$this->param()->{minMoz}):($min_moz=min(@moz));
   ($this->param()->{maxMoz}) < (max(@moz))?($max_moz=$this->param()->{maxMoz}):($max_moz=max(@moz));
   $min_moz=min(@moz) unless defined ($this->param()->{minMoz});
   $max_moz=max(@moz) unless defined ($this->param()->{maxMoz});



   #the size of one part
   my $part_size= ($max_moz-$min_moz)/$division;
   my @sums;

   #go through all the parts of the spectrum
   for(my $i=$min_moz; &compare_float($i+$part_size, '<=', $max_moz, $precision); $i+= $part_size){
     #for(my $i=$min_moz; $i+$part_size <= $max_moz; $i+= $part_size){

     #make an array with true values for all the moz values inside the current part
     #gives the same result in the test without compare_float and is a lot faster
     #my @keep= map {&compare_float($i, '<=', $_, $precision) and &compare_float($_, '<=', $i+$part_size, $precision)} @moz;
     my @keep= map {($i<= $_) and ($_<= $i+$part_size)} @moz;

     #part contains all the intensities of current part
     #use the found, so it's not going meaningless trough the whole end of the array
     my @part;
     my $found=0;
     foreach (0..$#keep) {
       if ($keep[$_]) {
	 push @part, $int[$_];
	 $found++ unless $found;
       } elsif ($found) {
	 last;
       }
     }

     #put a 0 into undef part
     push @part, 0 unless $part[0];

     #the sum of all the intensities of a part
     my $sum = sum (@part);
     push @sums, $sum; 
   }
 
   my $std_dev= Statistics::Basic::StdDev->new(\@sums)->query;
   my $mean= Statistics::Basic::Mean->new(\@sums)->query;


   #divide one by rel-stdev, because the smaller the stddev, the better the result
   return 0 unless $mean;
   return 1 unless $std_dev;
   return 1/($std_dev/$mean);

 }
 
}



#need this function in order to compare float-point values correctly
#makes it slow but still faster than using Math::FixedPrecision

sub compare_float{
  my ($A, $comparator,  $B, $dp)= @_;

  my $result= eval "sprintf(\"%.${dp}g\", $A) $comparator sprintf(\"%.${dp}g\", $B)";
  carp($@) if $@;

  return $result;
}



#the balance algorithm described in the paper from Bern et al.
#should be possible to make a lot faster one. But first I want to see if it's worth something..

sub balanceBern{
 my $this= shift;

 if(ref($this->currentSpectra()) ne 'InSilicoSpectro::Spectra::MSMSCmpd'){

   my $backup_sp= $this->currentSpectra();
   my @tmp;
   my $i=0;
   foreach my $a_cmpnd (@{$this->currentSpectra()->spectrum()}) {
     $this->currentSpectra($a_cmpnd);
     $tmp[$i++]=$this->balanceBern();

    
   }
    
   $this->currentSpectra($backup_sp);
   $this->filterValue(\@tmp);

 
 } else {

   my @moz;
   my @tmp;
   
   foreach(@{$this->currentSpectra()->spectrum()}){
     push @tmp, $_->[$this->{fragPD}->{intensity}];
     push @moz, $_->[$this->{fragPD}->{moz}];
     
   }

     
   $this->filterValue(\@tmp);

   my $division= 10;
   
 #get min/max moz values. if there's no value as low/high as chosen by the param, the lowest/highest are selected
   my ($min_moz,$max_moz);
   ($this->param()->{minMoz}) > (min(@moz))?($min_moz=$this->param()->{minMoz}):($min_moz=min(@moz));
   ($this->param()->{maxMoz}) < (max(@moz))?($max_moz=$this->param()->{maxMoz}):($max_moz=max(@moz));
   $min_moz=min(@moz) unless defined ($this->param()->{minMoz});
   $max_moz=max(@moz) unless defined ($this->param()->{maxMoz});

   
   my $part_size= ($max_moz-$min_moz)/$division;
   my @sums;


   #for(my $i=$min_moz; &compare_float($i+$part_size, '<=', $max_moz, $precision); $i+= $part_size){
   for (my $i=$min_moz; $i+$part_size <= $max_moz; $i+= $part_size){
     
     #my @keep= map {&compare_float($i, '<=', $_, $precision) and &compare_float($_, '<=', $i+$part_size, $precision)} @moz;
     my @keep= map {($i <= $_) and ($_<=$i+$part_size)} @moz;
     
     #use the found, so it's not going meaningless trough the whole end of the array
     my @part;
     my $found=0;
     foreach (0..$#keep) {
       if ($keep[$_]) {
	 push @part, $this->filterValue()->[$_];
	 $found++ unless $found;
       } elsif ($found) {
	 last;
       }
     }

     #put a 0 into undef part
     push @part, 0 unless $part[0];

     my $sum = sum (@part);
     #print "sum: $sum\t\n";
     push @sums, $sum; 
   }


   my @sorted= sort{$b <=> $a} @sums;
   #print ($sorted[0]+$sorted[1]-$sorted[-7]-$sorted[-6]-$sorted[-5]-$sorted[-4]-$sorted[-3]-$sorted[-2]-$sorted[-1], "\n");
 
   return $sorted[0]+$sorted[1]-$sorted[-7]-$sorted[-6]-$sorted[-5]-$sorted[-4]-$sorted[-3]-$sorted[-2]-$sorted[-1];

 }
 
}



########################################


sub goodDiff{
  my $this= shift;
  #if current spectra is MSMSSpectra we call goodDiff recursively for each compound
  if(ref($this->currentSpectra()) ne 'InSilicoSpectro::Spectra::MSMSCmpd'){

    my $backup_sp= $this->currentSpectra();
    my @tmp;
    my $i=0;
    foreach my $a_cmpnd(@{$this->currentSpectra()->spectrum()}){
      $this->currentSpectra($a_cmpnd);

      #because this method already used the first name we leave this one away
      my @double= @{$this->originalFilterName()}[1..$#{$this->originalFilterName()}];
      $this->filterName(\@double);

      #call this function recursively
      $tmp[$i++]=$this->goodDiff();

    }

    $this->currentSpectra($backup_sp);
    $this->filterValue(\@tmp);

    return $this;

  #calculate goodDiff for actual compound 
  }else{

    #load the amino acid-masses from insilicodef.xml
    if(!defined $this->{aaMasses}){

#    print "".__FILE__.":".__LINE__.": problem between here\n";

    InSilicoSpectro::init();
    
 #   print "".__FILE__.":".__LINE__.": and here??\n";



      my @aa_list= qw/A C D E F G H I K L M N P Q R S T V W Y/;

      my @mono;
      my @average;

      foreach (@aa_list){
	push @mono, InSilicoSpectro::InSilico::MassCalculator::getMass("aa_$_", 0);
	push @average, InSilicoSpectro::InSilico::MassCalculator::getMass("aa_$_", 1);

      }

      $this->{aaMasses}->{mono}= \@mono;
      $this->{aaMasses}->{average}= \@average;



    }

    my @moz;
    my @int;
    my @singly_charged;

    #use singly_charged to keep only singly charged values
    my $i=0;
    if(defined $this->{fragPD}->{charge}){
      foreach(@{$this->currentSpectra()->spectra()}){
	if ($_->[$this->{fragPD}->{charge}] <= 1) {
	  push @singly_charged, $i;
	  push @moz, $_->[$this->{fragPD}->{moz}];	
	}
	$i++;
      }
    }else{
      foreach(@{$this->currentSpectra()->spectra()}){
	if ($_->[$this->{fragPD}->{chargemask}] <= 3) {
	  push @singly_charged, $i;
	  push @moz, $_->[$this->{fragPD}->{moz}];	
	}
	$i++;
      }
    }

    #because I make a copy of the current-spectrum, I can delete peaks out of it
    my @tmp = @{$this->currentSpectra->spectrum()}[@singly_charged];
    $this->currentSpectra->spectrum(\@tmp);

    

    #get the int-values from the following filter-name (so we can use normalisation eg: goodDiff.normRank) 
    my $current_name= shift @{$this->filterName()};

    eval "\$this->".$current_name."()";
    carp($@) if $@;
    @int=@{$this->filterValue()};

    my $tolerance= $this->param()->{tolerance};

    #select if avarage or monoisotopic mass
    my $mass_id= $this->param()->{mass};

    #if we want to apply this function just on a defined number of peaks using a filter
    if(defined $this->param()->{filter}){
      
      my $filter= $this->param()->{filter};
    
      eval "\$this->".$filter."()";
      carp($@) if $@;

      my $backup= $this->thresholdValue();
      $this->thresholdValue(1+$this->param()->{peakNr});
      my $threshold= $this->nFix();
      $this->thresholdValue($backup);

      my @keep= map {$_ >= $threshold} @{$this->filterValue()};

      my @keep_id;
     
      foreach(0..$#keep){
	push @keep_id, $_ if $keep[$_]==1;
	
      }

      #keep only the int and moz values of the selected peaks
      @int = @int[@keep_id];
      @moz= @moz[@keep_id];

     

    }

    #for debugging..
   # my @count;
#    foreach(0..$#moz){
#      $count[$_]='';
#    }


    

    my $count=0;
      
    #it gives a different result, by using the &compare_float function in this algorithm.
    #but because the errors are few and it's a lot faster, I leave it like this..
   
    for(my $current_peak=0; $current_peak<$#moz; $current_peak++){
    #to consider each peak_pair only once, even if several aa-masses (eg. leu/ile) match
      my %used_i;
      #check all the 20 amino-acids
      for(my $j=0; $j<=$#{$this->{aaMasses}->{$mass_id}}; $j++){
	#if there's one that matches the distance between the peaks within the tolerance..
	for(my $i=$current_peak+1; ($moz[$i]-$moz[$current_peak] <= $this->{aaMasses}->{$mass_id}->[$j]+$tolerance) and $i<=$#moz; $i++){
	  if(($moz[$i]-$moz[$current_peak] >= $this->{aaMasses}->{$mass_id}->[$j]-$tolerance)and !defined($used_i{$i})){
	    #for debugging only..
	    #$count[$current_peak].="<".$this->filterValue()->[$i].">".$this->{aaMasses}->{$mass_id}->[$j];

	    #add the values of these peaks to the total
	    $count+= $int[$i] + $int[$current_peak];
	    $used_i{$i}=1;
	  }
	
	}
      }
    }

    #for debugging only..
    #foreach(0..$#count){
#      printf ("%f\t%f\t%s\n", $this->filterValue()->[$_], $moz[$_], $count[$_]);
#    }


    return $count;

  }

}



########################################
#basically the same as goodDiff.. but looks only for the distance of the mass of water..
#have a look at goodDiff for better documentation..


sub waterLosses{
  my $this= shift; 

  #if current spectra is MSSpectra we call waterLosses recursively for each compound
  if(ref($this->currentSpectra()) ne 'InSilicoSpectro::Spectra::MSMSCmpd'){

   
    my $backup_sp= $this->currentSpectra();
    my @tmp;
    my $i=0;
    foreach my $a_cmpnd(@{$this->currentSpectra()->spectrum()}){
      $this->currentSpectra($a_cmpnd);
      my @double= @{$this->originalFilterName()}[1..$#{$this->originalFilterName()}];
      $this->filterName(\@double);
      $tmp[$i++]=$this->waterLosses();
      
    }

    $this->currentSpectra($backup_sp);
    $this->filterValue(\@tmp);

       
  #calculate waterLosses for actual compound
  }else{


    my %waterMasses= (
		      'mono' => 18.01056, 
		      'average' => 18.01524, 
		     );

    my @moz;
    my @int;
    my @singly_charged;

   
    my $i=0;
    if(defined $this->{fragPD}->{charge}){
      foreach(@{$this->currentSpectra()->spectra()}){

	#print ("chargemask: ",$this->{fragPD}->{chargemask}, "\n");
	#print ($_->[$this->{fragPD}->{charge}], "\n");

	if ($_->[$this->{fragPD}->{charge}] <= 1) {
	  push @singly_charged, $i;
	  push @moz, $_->[$this->{fragPD}->{moz}];	
	}
	$i++;
      }
    }else{
      foreach(@{$this->currentSpectra()->spectra()}){

	#print ("chargemask: ",$this->{fragPD}->{chargemask}, "\n");
	#print ($_->[$this->{fragPD}->{chargemask}], "\n");

	if ($_->[$this->{fragPD}->{chargemask}] <= 3) {
	  push @singly_charged, $i;
	  push @moz, $_->[$this->{fragPD}->{moz}];	
	}
	$i++;
      }
    }

    my @tmp = @{$this->currentSpectra->spectrum()}[@singly_charged];
    $this->currentSpectra->spectrum(\@tmp);

    my $current_name= shift @{$this->filterName()};
    eval "\$this->".$current_name."()";
    carp($@) if $@;
    @int=@{$this->filterValue()};

    my $tolerance= $this->param()->{tolerance};
    my $mass_id= $this->param()->{mass};

    #if we want to apply this function just on a defined number of peaks using a filter
    if(defined $this->param()->{filter}){

      my $filter= $this->param()->{filter};

      eval "\$this->".$filter."()";
      carp($@) if $@;

      my $backup= $this->thresholdValue();
      $this->thresholdValue(1+$this->param()->{peakNr});
      my $threshold= $this->nFix();
      $this->thresholdValue($backup);


      my @keep= map {$_ >= $threshold} @{$this->filterValue()};
      
      my @keep_id;

      foreach(0..$#keep){
	
	push @keep_id, $_ if $keep[$_]==1;
	
      }

      @int = @int[@keep_id];

      @moz= @moz[@keep_id];
    }


     #for debugging..
    # my @count;
#        foreach(0..$#moz){
#          $count[$_]='';
#        }



    my $count=0;

    for(my $current_peak=0; $current_peak<$#moz; $current_peak++){
      #because a tolerance is used we have to watch out to consider each peak_pair only once
      
      for(my $i=$current_peak+1; ($moz[$i]-$moz[$current_peak]<=$waterMasses{$mass_id}+$tolerance) and $i<=$#moz; $i++){
	if($moz[$i]-$moz[$current_peak]>=$waterMasses{$mass_id}-$tolerance){
	  #$count[$current_peak].="<$moz[$i]>".$waterMasses{$mass_id};

	  #there's a good distance, so add the value of both the peaks to the total count.
	  $count+= $int[$i]+ $int[$current_peak];
	  
	}
	
      }
    }
    
    #for debugging only..
    #foreach(0..$#count){
#      print "$count[$_]\n";
#    }

    return $count;
    
  }
  
}



########################################3

# will be implemented later.. probably..


sub isotopes{
  my $this= shift;

  croak "filter \'isotopes\' not implemented yet!\n";

}


###########################################

# takes off the neighbouring peaks close to the selected ones (change number to select by selectStrongest)
# the region where the peaks should be taken off can be adjusted by banishRange


sub selectPeakWindow{
  my $this= shift;

#  print ($this->currentSpectra()->{title}, "\n");

  croak "banishNeighbors can only be applied on level: peaks\n" unless($this->{level} eq 'peaks');


  my $action_type= $this->{actionType};
  my $window_nr= $this->param()->{nrWindows};


  #get moz and int
    my @moz;
    my @int;

    foreach (@{$this->currentSpectra()->spectrum()}) {
      #push @{$this->filterValue()}, $_->[$pdInd];
      push @int, $_->[$this->{fragPD}->{intensity}];
      push @moz, $_->[$this->{fragPD}->{moz}];
    }

    my $moz_min= min(@moz);
    my $moz_max= max(@moz);
    my $moz_range= $moz[$#moz]-$moz[0];

    my $win_size= $moz_range/$window_nr;

    my $pos= 0;
    my $current_max_moz= $moz_min+ $win_size;


  if($action_type eq 'algorithm'){
    my $peak_nr = $this->param()->{nrPeaksTotal};
    my $peak_nr_window= int($this->param()->{nrPeaksTotal}/$window_nr);

    #print "peaks to selcet: $peak_nr_window\n";

    my @to_keep;

    #loop through all the windows
    for (my $j=0; $j<$window_nr; $j++) {
      my $old_pos= $pos;

      #print ("part from ", $moz[$old_pos], " to ", $current_max_moz, ": \n");
      my $win_int_max= 0;

      my @win_int;

      #loop trouhg all the peaks in one window
      while (($moz[$pos]<=$current_max_moz) and ($pos<$#moz)) {
	push @win_int, $int[$pos];
	$pos++;
      }

      my $threshold= ((sort{$b <=> $a} @win_int)[$peak_nr_window-1]);
      #if there are less nr than u want
      $threshold= min(@win_int) if(! defined $threshold);
      #if theres nothing at all
      next if(! defined $threshold);

      #print ("pekas in wind: ", scalar(@win_int), "\n");
      #print ("higest", max(@win_int), "\n");

      #print ("lowest", min(@win_int), "\n");

      #print "threshold: $threshold\n";

      for (my $i=$old_pos; $i<$pos; $i++) {
	push @to_keep, $i if $int[$i]>=$threshold;
	#print "$int[$i]\t$threshold\n";
      }
      $current_max_moz+= $win_size;
    }

    #print "*******************************************\n";
    #foreach(@to_keep){
        #print "$_\n";
    #}
    

    @{$this->currentSpectra()->spectra}=  @{$this->currentSpectra()->spectra}[@to_keep];

  }else{

    my @filter_value;

    #loop through all the windows
    for (my $j=0; $j<$window_nr; $j++) {
      my $old_pos= $pos;

      #print ("part from ", $moz[$old_pos], " to ", $current_max_moz, ": \n");
      my $win_int_max= 0;

      #loop trouhg all the peaks in one window
      while (($moz[$pos]<=$current_max_moz) and ($pos<$#moz)) {
	#     print ($moz[$pos], "\t", $int[$pos], "\n");
	$win_int_max= $int[$pos] if $int[$pos]>$win_int_max;
	$pos++;
      }

      #loop again trough peaks to divide by max-int
      for (my $i=$old_pos; $i<$pos; $i++) {
	push @filter_value, ($int[$i]/$win_int_max);
	#print ($moz[$i], "\t", $int[$i], "\n");
      }

      $current_max_moz+= $win_size;
    }

    $this->filterValue(\@filter_value);

  }

}



sub banishNeighbors{
  my $this= shift;

  croak "banishNeighbors can only be applied on level: peaks\n" unless($this->{level} eq 'peaks');


  #backup the parameters
  my $action_type= $this->{actionType};
  
  #get the parameters
  my $select_quantile= 1-$this->param()->{selectStrongest};
  my $win_size= $this->param()->{banishRange};
  my $skip_spectra_below= $this->param()->{skipSpectraBelow} if ($action_type eq 'algorithm');
  my $banish_limit= $this->param()->{banishLimit};


  #first we have to use a seperate filter on the currentSpectra to get
  #get the intensities
  $this->fragment("intensity");
  my @int= @{$this->filterValue()};


  my @ranks;
  if($action_type eq 'algorithm'){
    return $this->filterValue() if (scalar(@int) < $skip_spectra_below);
  }
  else{
    #get the ranks.. need them to make the peaks without interest smaller
    
    my %rank;
    my $i=1;
    my $j=0;
    my $last=0;

    foreach (sort {$b <=> $a} @{$this->filterValue()}) {
      if ($_==$last) {
	$j++;
      } else {
	$rank{$_}= $j+$i++;
	$j=0 if $j;
	$last=$_;
      }
    }
    foreach (@{$this->filterValue()}) {
      push @ranks, $rank{$_};
    }
  }
 
  my $qtIndx=int ($select_quantile*@int);
  my $threshold= (sort {$a <=> $b} @int)[$qtIndx];
  my @keep= map {$_ >= $threshold} @int;

  my @selected_peaks_moz;

  foreach(0..$#keep){
    push @selected_peaks_moz, $this->currentSpectra()->spectra()->[$_]->[$this->{fragPD}->{'moz'}] if $keep[$_];
  }


  $this->fragment("moz");
  my @moz= @{$this->filterValue()};


  my $min_int= min(@int);
  my $min_moz= min(@moz);
  my $max_moz= max(@moz);


  my %to_delete;

  #go through peaks considered as 'strong'..
  foreach my $j(0..$#selected_peaks_moz){
    my $left_border= $selected_peaks_moz[$j]-$win_size>=$min_moz ? $selected_peaks_moz[$j]-$win_size : $min_moz;
    my $right_border= $selected_peaks_moz[$j]+$win_size<=$max_moz ? $selected_peaks_moz[$j]+$win_size : $max_moz;

    #ze window
    my @current_win= map {($_ >=$left_border) and ($_<=$right_border)}@moz;
    my %current_win_values;
    my $after_window=0;

    foreach my $i(0..$#current_win){
       last if ($current_win[$i]==0 and $after_window);
       next unless $current_win[$i]==1;

       $current_win_values{$i}=$int[$i];
       $after_window=1 unless $after_window;
     }

    my $peak_limit= max(values %current_win_values)*$banish_limit;


    foreach(keys %current_win_values){
      if($action_type eq 'algorithm'){
	$to_delete{$_}=1 if($current_win_values{$_}<$peak_limit);

      }else{
	#make division by the rank to make them smaller than even the smallest peak
	$int[$_]= $min_int/$ranks[$_] if($current_win_values{$_}<$peak_limit);
      }

    }

  }

  if($action_type eq 'algorithm'){

    my @to_keep;
    foreach (0..$#int) {
      push @to_keep, $_ unless defined $to_delete{$_};
    }
    @{$this->currentSpectra()->spectra}=  @{$this->currentSpectra()->spectra}[@to_keep];
  }

  $this->filterValue(\@int);
}



#############################################################
#looks if the moz of two peaks add up to the precursor-ion

#the first part is essentially the same as goodDiff, so have a look there for better documentation


sub complements{
  my $this= shift;

 #if current spectra is MSSpectra we call recursively for each compound
  if(ref($this->currentSpectra()) ne 'InSilicoSpectro::Spectra::MSMSCmpd'){

    my $backup_sp= $this->currentSpectra();
    my @tmp;
    my $i=0;
    foreach my $a_cmpnd(@{$this->currentSpectra()->spectrum()}){
      $this->currentSpectra($a_cmpnd);
      my @double= @{$this->originalFilterName()}[1..$#{$this->originalFilterName()}];
      $this->filterName(\@double);
      $tmp[$i++]=$this->complements();
    }

    $this->currentSpectra($backup_sp);
    $this->filterValue(\@tmp);

  #calculate complements for actual compound
  }else{

    my @moz;
    my @int;
    my @singly_charged;
    my @parent_charge;

    #to get the different charge states of the precursor into an array
    my $parent_moz= $this->currentSpectra()->getParentData($this->{parentPD}->{moz});

    if(defined $this->{parentPD}->{chargemask}){
      my $mask= $this->currentSpectra()->getParentData($this->{parentPD}->{chargemask});

      #get the different probable charges of the precursor-ion into array
      my $charges= chargemask2string($mask);

      @parent_charge = split(',',$charges);

    }else{
      @parent_charge = split(',',$this->currentSpectra()->getParentData($this->{parentPD}->{charge}));
    }
      

    #get a list with all the singly charged values and the moz-values
    my $i=0;

    if(defined $this->{fragPD}->{charge}){
      foreach(@{$this->currentSpectra()->spectra()}){
	#print ("chargemask: ",$this->{fragPD}->{chargemask}, "\n");
	#print ($_->[$this->{fragPD}->{charge}], "\n");

	if ($_->[$this->{fragPD}->{charge}] <= 1) {
	  push @singly_charged, $i;
	  push @moz, $_->[$this->{fragPD}->{moz}];	
	}
	$i++;
      }
    }else{
      foreach(@{$this->currentSpectra()->spectra()}){

	#print ("chargemask: ",$this->{fragPD}->{chargemask}, "\n");
	#print ($_->[$this->{fragPD}->{chargemask}], "\n");

	if ($_->[$this->{fragPD}->{chargemask}] <= 3) {
	  push @singly_charged, $i;
	  push @moz, $_->[$this->{fragPD}->{moz}];	
	}
	$i++;
      }
    }

    my @tmp = @{$this->currentSpectra->spectrum()}[@singly_charged];
    $this->currentSpectra->spectrum(\@tmp);

    #get the int-values
    my $current_name= shift @{$this->filterName()};
    #print "current_name: $current_name\n";
    eval "\$this->".$current_name."()";
    carp($@) if $@;
    @int=@{$this->filterValue()};


    my $tolerance= $this->param()->{tolerance};
    my $mass_id= $this->param()->{mass};

    #if we want to apply this function just on a defined number of peaks using a filter
    if(defined $this->param()->{filter}){

      my $filter= $this->param()->{filter};

      eval "\$this->".$filter."()";
      carp($@) if $@;

      my $backup= $this->thresholdValue();
      $this->thresholdValue(1+$this->param()->{peakNr});
      my $threshold= $this->nFix();
      $this->thresholdValue($backup);


      my @keep= map {$_ >= $threshold} @{$this->filterValue()};
      my @keep_id;

      foreach(0..$#keep){
	push @keep_id, $_ if $keep[$_]==1;
      }

      @int = @int[@keep_id];
      @moz= @moz[@keep_id];
    }

    #only for debugging
    #my @print_count;
    #foreach(0..$#moz){
    #      $print_count[$_]='';
    #    }
    #    my $j=0;
    
    my @count;


    #set all to 0
    foreach(@parent_charge){
      $count[$_]=0;
    }


    #go through all the peaks..
    for(my $current_peak=0; $current_peak<$#moz; $current_peak++){
      #..and compare them with all following peaks
      for(my $i=$current_peak+1; $i<=$#moz; $i++){
	#consider the different parent-charge states
	foreach(@parent_charge){
	  my $sum_moz= $moz[$i]+$moz[$current_peak];
	
	  #if the sum is equal (with tolerance) the parent-moz
	  if (($sum_moz>=($parent_moz*$_)-$tolerance)and ($sum_moz<=($parent_moz*$_)+$tolerance)){

	    #only for debugging..
	    #    $print_count[$j++]=sprintf ("sum: %f\t-t: %f\t+t: %f p1/p2/chr: %i/%i/%i m1/m2: %i/%i\n", $sum_moz, ($parent_moz*$_)-$tolerance, ($parent_moz*$_)+$tolerance, $current_peak, $i, $_, $moz[$current_peak], $moz[$i]);
	
	    $count[$_]= $int[$i] + $int[$current_peak];
	
	  }
	}
	
      }
    }

    #only for debbuging..
   # foreach(0..$#print_count){
    #  print "$print_count[$_]\n";
    #}


    #and only return the bigger result for the possible parent-charge states
    return (max(@count));

  }

}





##########################################
###   setters and getters


sub param{
  my ($this, $sp)=@_;

  if(defined $sp){
    $this->{param}= $sp;
    return $this;
  }
  else{
    return $this->{param};
  }
} 










return 1;