The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
=head1 NAME

Bio::Polloc::Genome - A group of sequences from the same organism

=head1 AUTHOR - Luis M. Rodriguez-R

Email lmrodriguezr at gmail dot com

=head1 LICENSE

This package is licensed under the Artistic License - see LICENSE.txt

=head1 IMPLEMENTS OR EXTENDS

=over

=item *

L<Bio::Polloc::Polloc::Root>

=back

=cut

package Bio::Polloc::Genome;
use strict;
use base qw(Bio::Polloc::Polloc::Root);
use Bio::SeqIO;
our $VERSION = 1.0503; # [a-version] from Bio::Polloc::Polloc::Version


=head1 PUBLIC METHODS

Methods provided by the package

=head2 new

=over

=item 

The basic initialization method

=item Arguments

=over

=item -name I<str>

The name of the genome (from file if not provided).

=item -file I<str>

The file containing the (multi-)fasta with the genome.

=back

=back

=cut

sub new {
   my($caller,@args) = @_;
   my $self = $caller->SUPER::new(@args);
   $self->_initialize(@args);
   return $self;
}

=head2 build_set

=over

=item 

Builds a set of genomes.

=item Arguments

=over

=item -files I<arrayref>

An arrayref containing the files from which genomes must be build.

=item -names I<arrayref>

An arrayref containing the names of the genomes (in the same order
of the files).

=back

=back

=cut

sub build_set {
   my($self, @args) = @_;
   my($files, $names) = $self->_rearrange([qw(FILES NAMES)], @args);
   return unless defined $files;
   $names||= [];
   my $out = [];
   for my $i (0 .. $#$files){
      push @$out, Bio::Polloc::Genome->new(-file=>$files->[$i], -name=>$names->[$i]);
   }
   return $out;
}

=head2 file

=over

=item 

Sets/gets the file containing the genome

=back

=cut

sub file {
   my($self, $value) = @_;
   if(defined $value){
      $self->{'_file'} = $value;
      $self->{'_sequences'} = undef;
   }
   return $self->{'_file'};
}

=head2 get_sequences

=over

=item 

Gets the collection of sequences.

=item Returns

An array of L<Bio::Seq> objects.

=back

=cut

sub get_sequences {
   my $self = shift;
   return $self->{'_sequences'} if defined $self->{'_sequences'};
   $self->{'_sequences'} = [];
   my $seqIO = Bio::SeqIO->new(-file=>$self->file, -format=>'Fasta');
   while(my $seq = $seqIO->next_seq){
      push @{ $self->{'_sequences'} }, $seq;
   }
   return wantarray ? @{$self->{'_sequences'}} : $self->{'_sequences'};
}

=head2 search_sequence

=over

=item 

Search a sequence by ID

=item Arguments

The id (I<str>) of the sequence.

=item Returns

The sequence (L<Bio::Seq>) or C<undef>.

=back

=cut

sub search_sequence {
   my($self, $value) = @_;
   return unless defined $value;
   for my $seq (@{$self->get_sequences}){
      (my $sid = $seq->display_id) =~ s/^(?:[^\s]*\|)?([^\s^\|]+?)\|?$/$1/;
      return $seq if $seq->display_id eq $value or $sid eq $value;
   }
   return;
}

=head2 name

=over

=item 

Gets/sets the name of the genome.  If no name is set, tries to use the file instead.

=back

=cut

sub name {
   my ($self, $value) = @_;
   $self->{'_name'} = $value if defined $value;
   unless(defined $self->{'_name'}){
      my $f = $self->file;
      return unless defined $f;
      $f =~ s/.*\///;
      $f =~ s/\.[^\.]+$//;
      $self->{'_name'} = $f;
   }
   return $self->{'_name'};
}

=head1 INTERNAL METHODS

Methods intended to be used only within the scope of Bio::Polloc::*

=head2 _initialize

=cut

sub _initialize {
   my ($self, @args) = @_;
   my($name, $file) = $self->_rearrange([qw(NAME FILE)], @args);
   $self->name($name);
   $self->file($file);
}

1;