The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Mecom::Align::Subset;


use 5.006;
use strict;
no strict "refs";
use warnings;
use Carp;
use Bio::SeqIO;
use base("Bio::Root::Root");


###############################################################################
# Class data and methods
###############################################################################
{  
    # A list of all attributes wiht default values and read/write/required properties
    my %_attribute_properties = (
        _file => ["????", "read.required"],
        _format    => ["????", "read.required"],
        _identifiers   => ["????", "read.write"   ],
        _sequences => ["????", "read.write"   ],
        _seq_length=> [0     , "read.write"   ]
    );
    
    # Global variable to keep count of existing objects
    my $_count = 0;
    
    # The list of all attributes
    sub _all_attributes {
        keys %_attribute_properties;
    }
    
    # Check if a given property is set for a given attribute
    sub _permissions{
        my ($self,$attribute, $permissions) = @_;
        $_attribute_properties{$attribute}[1] =~/$permissions/;
    }
    
    # Return the default value for a given attribute
    sub _attribute_default{
        my ($self,$attribute) = @_;
        $_attribute_properties{$attribute}[0];
    }
    
    # Manage the count of existing objects
    sub get_count{
        $_count;
    }
    sub _incr_count{
        ++$_count;
    }
    sub _decr_count{
        --$_count;
    }
    
}
#
# The constructor of the class
#
sub new {
    
    my ($class, %arg) = @_;
    my $self = bless {}, $class;
    
    foreach my $attribute ($self->_all_attributes()){
        
        # E.g. attribute = "_name", argument = "name"
        my ($argument) = ($attribute =~ /^_(.*)/);
        
        # If explicitly given
        if(exists $arg{$argument}){
            $self->{$attribute} = $arg{$argument};
        }
        
        # If not given but required
        elsif($self->_permissions($attribute, 'required')){
            croak("No $argument attribute as required");
        }
        
        # Set to default
        else{
            $self->{$attribute} = $self->_attribute_default($attribute);            
        }
        
    }
    
    # Called $class because it is a gobal method
    $class->_incr_count;
    
    $self->_extract_sequences;
    return $self;
    
}


#
# Obtaining the sequences in a Array
#
sub _extract_sequences{
    
    my $self = $_[0];
        
    my @identifiers;
    my @sequences;
    
    my $seqIO = Bio::SeqIO->new(
                             -file   => $self->get_file,
                             -format => $self->get_format
                            );
    
    while( my $seq = $seqIO->next_seq){
        
        my $sequence_string = $seq->seq;
        $sequence_string =~ s/\s//g;
        
        push(@identifiers, $seq->id);
        $self->_verify_chain($sequence_string);
        push(@sequences, $sequence_string);
        
    }
    
    $self->set_identifiers(\@identifiers);
    $self->set_sequences(\@sequences);
    
}



#
# Build a subset
#
sub build_subset{
    
    my ($self, $subset) = @_;
    
    
    # Initialite array for the new sequences
    my @new_sequences = ();
    
    for(my $i=0;$i<=$#{$self->get_sequences};$i++){
        # Initialite a new string for the new sequence
        my $new_sequence = "";
        for my $index (@{$subset}){
            if(($index-1)*3 > length(${$self->get_sequences}[$i])){ last }
            $new_sequence.= substr(${$self->get_sequences}[$i],($index-1)*3,3);
        }
        push(@new_sequences, $new_sequence);
    }
    
    my @identifiers   = @{$self->get_identifiers};
    # Create the new align object
    my $aln_obj = Bio::SimpleAlign->new();
    
    # Build a new Bio::LocatableSeq obj for each sequence
    for(my $i=0;$i<=$#identifiers;$i++){
        
        my $id = substr($identifiers[$i],0,9);
        my $iden_plus_num = $i.$id;
        
        # Create such object
        my $newSeq = Bio::LocatableSeq->new(-seq   => $new_sequences[$i],
                                            -id    => substr($iden_plus_num,0,9),
                                            -start => 0,
                                            -end   => length($new_sequences[$i]));
        
        # Append the new sequence object to the new alignmen object
        $aln_obj->add_seq($newSeq);
        
    }
    
    # Once the loop is finished, return the alignment object
    # with all the sequences appended.
    return $aln_obj;
    
}

###############################################################################
# Auxiliary methods
###############################################################################
{
    #
    # Set the sequence length of the whole alignment
    #
    sub _set_sequence_length{
        my $self = $_[0];
        $self->{_seq_length} = $_[1];
    }
    
    #
    # Check if a the length of a given sequence match with the length of
    # the whole alignment.
    #
    sub _check_sequence_length{
        my $self = $_[0];
        my $tested_sequence_length = $_[1];
        $tested_sequence_length == $self->get_seq_length ? return 1 : return 0;
    }
    
    #
    # Verifies the integrity of a given sequence
    #
    sub _verify_chain{
        
        my ($self,$sequence) = @_;
        my $seq_length = length($sequence);
        
        
        # 1. The chain must be a DNA sequence
        $self->_isdna($sequence) ? 1 : $self->warn("\nThe following sequence does not seems as a dna/rna (ATGCU) sequence:\n\n<< $sequence >>\n");
        
        # 2. Also, all the sequences must be equal. But if $_sequence_length
        # has not been updated, it takes the value of the length of this sequence.
        if($self->get_seq_length == 0){
            # The input file must be wrapped (non untermitated codons)
            $seq_length % 3 == 0 ? 1 : $self->throw("The sequence length is not multiple of 3 ($seq_length)");
            $self->_set_sequence_length($seq_length);
        }else{
            $self->_check_sequence_length($seq_length) ? 1 : croak("A sequence length does not match with the length of the whole alignment");
        }
        return 1;
        
    }
    
    #
    # Verifies if a given string is a DNA sequence
    #
    sub _isdna{
        my ($self,$sequence) = ($_[0],uc($_[1]));
        if($sequence =~ /^[ACGTU]+$/){
             return 1;
        }else{
             return 0;
        }
    }
    
    
}
###############################################################################


###############################################################################
# Accessor Methods
###############################################################################
# This kind of method is called Accesor
# Method. It returns the value of a key
# and avoid the direct acces to the inner
# value of $obj->{_file}.
###############################################################################
sub get_file { $_[0] -> {_file} }
sub get_format    { $_[0] -> {_format}    }
sub get_sequences { $_[0] -> {_sequences} }
sub get_identifiers   { $_[0] -> {_identifiers}   }
sub get_seq_length{ $_[0] -> {_seq_length}}
###############################################################################


###############################################################################
# Mutator Methods
###############################################################################
sub set_file { my ($self, $file) = @_;
                    $self-> {_file} = $file if $file;
                  }
sub set_format    { my ($self, $format) = @_;
                    $self-> {_format} = $format if $format;
                  }
sub set_identifiers   { my ($self, $identifiers) = @_;
                    $self-> {_identifiers} = $identifiers if $identifiers;
                  }
sub set_sequences { my ($self, $sequences) = @_;
                    $self-> {_sequences} = $sequences if $sequences;
                  }
###############################################################################





1;