The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
use strict;

package InSilicoSpectro::Databanks::DBEntry;
require Exporter;
use Carp;

=head1 NAME

InSilico::Databanks::DBEntry - fasta databank entry + header's annotated info

=head1 SYNOPSIS


=head1 DESCRIPTION

describes an databnk entry (header + sequence)

=head1 FUNCTIONS


=head1 METHODS


=head3 my $dbe=InSilico::Databanks::DBEntry->new([\%h]);

=head2 Accessors/Setters

=head3 $dbe->AC([val])

get/set accession code

=head3 $dbe->ID([val])

get/set accession ID

=head3 $dbe->dbName([val])

[GS]et databank name

=head3 $dbe->taxoID([val])

[GS]et taxonomy if (typically NCBI taxid

=head3 $dbe->description([val])

=head3 $dbe->sequence([val])

Get/Set AC, ID, ...

=head3 $dbe->seqType(["AA"|"DNA"])

Get or set the sequence type

=head3 $dbe->annotatedModRes(string|[[pos1, mod1], [pos2, mod2]]);

get or set annotated PTM from a string (e.g. (1|ACET_nterm)(2|ACET_nterm)(185|PHOS)) or an array. All previously set TPM are removed

=head3 $dbe->add_annotatedModRes(pos, modkey)

add an annotated PTM

=head3 $dbe->clear_annotatedModRes()

remove all annotated modres

=head3 $dbe->variants(string|[[pos1, seq1a, seq1b], [pos2, seq2a, seq2b]]);

get or set variant a string (e.g. ("(9|F|Y)(30|D|N)(41|A|G)(43|Q|R)") or an array. All previously set TPM are removed

=head3 $dbe->add_variant(pos, seqa, seqb)

add a VARIANT, replacing seqa by seqb (often just one aminoacid) at pos

=head3 $dbe->clear_variants()

remove all Variants

=head2 I/O

=head3 $dbe->readFasta($fastacontent);

read info from fasta contents (fitrs line with '>' and info + remaining is sequence.

=head3 $dbe->printFasta();

print the entry under fasta format (use SelectSaver or whatever select method to redirect towards a file descriptor);

=head3 $dbe->printHtml();

Print the entry under a html format.

=head1 EXAMPLES


=head1 SEE ALSO

=head1 COPYRIGHT

Copyright (C) 2004-2005  Geneva Bioinformatics www.genebio.com

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

=head1 AUTHORS

Alexandre Masselot, www.genebio.com

=cut

use File::Basename;
{
  use Object::InsideOut;

  my @AC :Field(Accessor => 'AC' );
  my @ID :Field(Accessor => 'ID' );
  my @ACorig :Field(Accessor => 'ACorig' );
  my @dbName :Field(Accessor => 'dbName' );
  my @taxoID :Field(Accessor => 'taxoID' );
  my @__sequence :Field(Accessor => '__sequence',  permission=>'Private');
  my @description :Field(Accessor => 'description' );
  my @ncbiTaxid :Field(Accessor => 'ncbiTaxid' );
  my @annotatedModRes :Field(Accessor => '__annotatedModRes', Type=>'List', permission=>'Private');
  my @variants :Field(Accessor => '__variants', Type=>'List', permission=>'Private');
  my @PI :Field(Accessor => 'PI' );
  my @mass :Field(Accessor => 'mass' );

  my %init_args :InitArgs = (
			     AC=>'AC',
			     COPY=>qr/^copy$/i,
			    );
  sub _init :Init{
    my ($self, $h) = @_;
    $self->__annotatedModRes([]);
    $self->__variants([]);
    if($h->{COPY}){
      my $src=$h->{COPY};
      foreach(qw(AC ID ACorig __sequence description ncbiTaxid __annotatedModRes __variants)){
	$self->$_($src->$_());
      }
    }else{
      foreach (keys %$h) {
	$self->$_($h->{$_});
      }
    }
    return $self;
  };


  sub sequence{
    my ($self, $val)=@_;
    if (defined $val) {
      $val=~s/\s+//g;
      $self->__sequence($val);
      return $val;
    }
    return $self->__sequence;
  }

  sub seqType{
    my ($self, $val)=@_;
    if (defined $val) {
      croak "DBEntry seqtype must be of (AA|DNA)" unless $val=~/^(AA|DNA)$/;
      $self->{seqType}=$val;
      return $val;
    }
    return $self->{seqType};
  }

  sub annotatedModRes{
    my $self=shift;
    my $set=exists $_[0];
    if ($set) {

      if ((ref $_[0]) eq 'ARRAY') {
	my @tmp=@{$_[0]};
	$self->__annotatedModRes(\@tmp);
      } else {
	my @tmp;
	my $s=$_[0];
	while ($s=/\((\d+)\|([^\)]+)\)/g) {
	  push @tmp, [$1, $2];
	}
	$self->__annotatedModRes(\@tmp);
      }
      return;
    }
    return undef unless defined  $self->__annotatedModRes;
    my @tmp=@{$self->__annotatedModRes};
    if (wantarray) {
      return @tmp;
    } else {
      my $ret;
      foreach (@tmp) {
	$ret.="($_->[0]|$_->[1])";
      }
      return $ret;
    }
  }

  sub add_annotatedModRes{
    my ($self, $p, $m)=@_;

    push @{$self->__annotatedModRes}, [$p, $m];
  }


  sub clear_annotatedModRes{
    my $self=shift;
    $self->__annotatedModRes([]);
  }

  sub variants{
    my $self=shift;
    my $set=exists $_[0];
    if ($set) {

      if ((ref $_[0]) eq 'ARRAY') {
	my @tmp=@{$_[0]};
	$self->__variants(\@tmp);
      } else {
	my @tmp;
	my $s=$_[0];
	while ($s=/\((\d+)\|(\w+)\|(\w+)\)/g) {
	  push @tmp, [$1, $2, $3];
	}
	$self->__variants(\@tmp);
      }
      return;
    }
    return undef unless defined  $self->__variants;
    my @tmp=@{$self->__variants};
    if (wantarray) {
      return @tmp;
    } else {
      my $ret;
      foreach (@tmp) {
	$ret.="($_->[0]|$_->[1]|$_->[2])";
      }
      return $ret;
    }
  }

  sub add_variant{
    my ($self, $p, $s1, $s2)=@_;

    push @{$self->__variants}, [$p, $s1, $s2];
  }


  sub clear_variants{
    my $self=shift;
    $self->__variants([]);
  }

  # I/O
  sub readFasta{
    my $self=shift;
    my ($header, $seq)=split /\n/, shift, 2;
    $header=~s/^>//;
    $header=~s/^(\S+)\s*// or croak "fasta header [$header] does not start with an no empty string for AC";
    my $ac=$1;
    $self->AC($ac);
    foreach (split /\\(?=\w+=)/, $header) {
      my ($key, $val)=split /=/, $_, 2;
      $val=~s/\s+$// if $val;
      next unless $key;
      if ($key eq 'AC') {
	$self->AC($val);
      } elsif ($key eq 'ACOR') {
	$self->ACorig($val);
      } elsif ($key eq 'ID') {
	$self->ID($val);
      } elsif ($key eq 'DE') {
	$self->description($val);
      } elsif ($key eq 'NCBITAXID') {
	$self->ncbiTaxid($val);
      } elsif ($key eq 'MODRES') {
	$self->annotatedModRes($val);
      } elsif ($key eq 'VARIANT') {
	$self->variants($val);
      } elsif ($key eq 'LENGTH') {
      } else {
	carp "DBEntry::readFasta no function handler for fasta head [$key]";
      }
    }
    $self->description($header) unless $self->description;
    $seq=~s/\s+//g;
    $self->sequence($seq);
  }

  sub printFasta{
    my $self=shift;
    print ">".$self->AC()." \\ID=".($self->ID|| '')." \\MODRES=".($self->annotatedModRes() ||'');
    print " \\VARIANT=".$self->variants if $self->variants;
    print " \\ACOR=".$self->ACorig if $self->ACorig;
    print " \\NCBITAXID=".$self->ncbiTaxid if $self->ncbiTaxid;
    print " \\DE=".$self->description."\n";
    my $seq=$self->sequence();
    $seq=~s/(.{60})(?=.)/$1\n/g;
    #$seq=~s/(.{10})(?=.)/$1 /g;

    print $seq."\n";
  }

  sub printHtml{
    my $self=shift;
    print "<h3>".$self->AC."</h3>\n";
    print "
    <table border=1 cellspacing=0>
      <tr><td><b>AC</b></td><td>".$self->AC."</td></tr>
      <tr><td><b>ID</b></td><td>".$self->ID."</td></tr>
";
    print "      <tr><td><b>original AC</b></td><td>".$self->ACorig."</td></tr>\n" if $self->ACorig;
    print "      <tr><td><b>description</b></td><td>".$self->description."</td></tr>
    </table>
";
    my @seq=split //, $self->sequence();
    my @annot;
    if ($self->annotatedModRes) {
      my @tmp=$self->annotatedModRes;
      foreach (@tmp) {
	my ($p, $mr)=($_->[0]-1, $_->[1]);
	$annot[$p].="</br>\n" if $annot[$p];
	$annot[$p].="MOD_RES $mr";
      }
    }
    if ($self->variants) {
      my @tmp=$self->variants;
      foreach (@tmp) {
	my ($p, $s, $u)=($_->[0]-1, $_->[1], $_->[2]);
	$annot[$p].="</br>" if $annot[$p];
	$annot[$p].="VARIANT $s -> $u";
      }
    }
    print "<div class='sequence'>\n";
    foreach (0..$#seq) {
      print " " unless $_%10;
      print "</br>\n" unless $_%60;
      if ($annot[$_]) {
	print "<b><a onmouseover=\"return escape('$annot[$_]')\">$seq[$_]</a></b>";
      } else {
	print $seq[$_];
      }
    }
    print "</div>\n";
  }
}

return 1;