The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# $Id: GeneProduct.pm,v 1.11 2007/09/13 12:44:47 girlwithglasses Exp $
#
# This GO module is maintained by Chris Mungall <cjm@fruitfly.org>
#
# see also - http://www.geneontology.org
#          - http://www.godatabase.org/dev
#
# You may distribute this module under the same terms as perl itself

package GO::Model::GeneProduct;

=head1 NAME

  GO::Model::GeneProduct;

=head1 DESCRIPTION

represents a gene product in a particular species (this will
effectively always be refered to implicitly by the gene symbol even
though a gene may have >1 product)

=cut


use Carp;
use Exporter;
use GO::Utils qw(rearrange);
use GO::Model::Root;
use strict;
use vars qw(@ISA);
use Data::Dumper;

@ISA = qw(GO::Model::Root Exporter);

sub _valid_params {
    return qw(id acc symbol properties full_name type_id type xref speciesdb synonym_list seq_list species);
}

sub _initialize 
{
    my $self = shift;
    my $paramh = shift;

    my $db;
    if ($paramh->{speciesdb}) {
	$db = $paramh->{speciesdb};
    }
    else {
	$db = $paramh->{xref_dbname};
    }

    my $xref = 
      GO::Model::Xref->new({xref_key=>$paramh->{acc},
			    xref_keytype=>"acc",
                            xref_dbname=>$db});

    $self->xref($xref);
    delete $paramh->{acc};
    delete $paramh->{speciesdb};

    $self->SUPER::_initialize($paramh);
}

=head2 acc

  Usage   -
  Returns -
  Args    -

=cut

sub acc {
    my $self = shift;
    my $acc = shift;
    if ($acc) {
	$self->xref->xref_key($acc);
    }
    return $self->xref->xref_key;
}


=head2 symbol

  Usage   -
  Returns -
  Args    -

=cut

# AUTOGENERATED

=head2 type

  Usage   -
  Returns -
  Args    -

=cut

# AUTOGENERATED

=head2 full_name

  Usage   -
  Returns -
  Args    -

=cut

# AUTOGENERATED

=head2 as_str

  Usage   -
  Returns -
  Args    -

=cut

sub as_str {
    my $self = shift;
    return "GP-".$self->xref->as_str;
}

=head2 add_synonym

=cut

sub add_synonym {
    my $self = shift;
    if (!$self->{synonym_list}) {
	$self->{synonym_list} = [];
    }
    push(@{$self->{synonym_list}}, (shift));
    return $self->{synonym_list};
}


=head2 synonym_list

accessor: gets/set list of synonyms [array reference]

=cut

# AUTOGENERATED

=head2 speciesdb

  Usage   -
  Returns -
  Args    -

=cut

sub speciesdb {
    my $self = shift;
    my $db = shift;
    if ($db) {
	$self->xref->xref_dbname ($db);
    }
    return $self->xref->xref_dbname;
}


=head2 add_seq

  Usage   -
  Returns -
  Args    - GO::Model::Seq

=cut

sub add_seq {
    my $self = shift;
    my $seq = shift;
    
    if ($seq->isa("Bio::SeqI")) {
        my $bpseq = $seq;
        $seq = GO::Model::Seq->new;
        $seq->pseq($bpseq);
    }
    $seq->isa("GO::Model::Seq") or confess ("Not a seq object");
    $self->{seq_list} = [] unless $self->{seq_list};

    push(@{$self->{seq_list}}, $seq);
    $self->{seq_list};
}

#indicate fetching seqs has been done: no need to try even if no seq (see seq_list)
sub _seqs_obtained {
    my $self = shift;
    $self->{_seqs_obtained} = shift if @_;
    return $self->{_seqs_obtained};
}

=head2 seq_list

  Usage   -
  Returns - GO::Model::Seq listref
  Args    -

=cut

sub seq_list {
    my $self = shift;
    if (@_) {
        $self->{seq_list} = shift;
    }
    else {
        if (!defined($self->{seq_list})) {
            $self->{seq_list} =
              $self->apph->get_seqs({product=>$self}) unless ($self->_seqs_obtained);
        }
    }
    return $self->{seq_list};
}


=head2 seq

  Usage   -
  Returns - GO::Model::Seq
  Args    -

returns representative sequence object for this product

=cut

sub seq {
    my $self = shift;
    my $seqs = $self->seq_list;
    my $str = "";
    # longest by default
    my $longest;
    
    foreach my $seq (@$seqs) {
        if (!defined($longest) || $seq->length > $longest->length) {
            $longest = $seq;
        }
    }
    return $longest;
}

=head2 properties

  Usage   -
  Returns - hashref
  Args    - hashref

=cut


=head2 set_property

  Usage   - $sf->set_property("wibble", "on");
  Returns -
  Args    - property key, property scalar

note: the property is assumed to be multivalued, therefore
  $sf->set_property($k, $scalar) will add to the array, and
  $sf->set_property($k, $arrayref) will set the array

=cut

sub set_property {
    my $self = shift;
    my $p = shift;
    my $v = shift;
    if (!$self->properties) {
        $self->properties({});
    }
    if (ref($v) eq 'ARRAY') {
        confess("@$v is not all scalar") if grep {ref($_)} @$v;
        $self->properties->{$p} = $v;
    }
    else {
        push(@{$self->properties->{$p}}, $v);
    }
    # uniqify
    my @vals = @{$self->properties->{$p}};
    my %h = ();
    my @nu_vals = ();
    foreach (@vals) {
        next if $h{$_};
        $h{$_} = 1;
        push(@nu_vals, $_);
    }
    $self->properties->{$p} = \@nu_vals;
    $v;
}

=head2 get_property

  Usage   -
  Returns - first element of the property
  Args    - property key

=cut

sub get_property {
    my $self = shift;
    my $p = shift;
    if (!$self->properties) {
        $self->properties({});
    }
    my $val = $self->properties->{$p};
    if ($val) {
        $val = $val->[0];
    }
    return $val;
}

=head2 get_property_list

  Usage   -
  Returns - the property arrayref
  Args    - property key

=cut

sub get_property_list {
    my $self = shift;
    my $p = shift;
    if (!$self->properties) {
        $self->properties({});
    }
    $self->properties->{$p};
}


=head2 to_fasta

  Usage   -
  Returns -
  Args    -

returns the longest seq by default

=cut

sub to_fasta {
    my $self = shift;
    my ($fullhdr, $hdrinfo, $gethdr) = 
      rearrange([qw(fullheader headerinfo getheader)], @_);
    $hdrinfo = " " . ($hdrinfo || "");
    my $seqs = $self->seq_list;
    my $str = "";
    # longest by default
    my $longest;
    
    return "" unless @{$seqs || []};

    foreach my $seq (@$seqs) {
        if (!defined($longest) || $seq->length > $longest->length) {
            $longest = $seq;
        } 
    }
    $seqs = [$longest];
    if ($gethdr) {
      my $apph = $self->get_apph;
      my $terms = $apph->get_terms({product=>$self});
      my @h_elts = ();
      foreach my $term (@$terms) {
	my $al = $term->selected_association_list;
	my %codes = ();
	map { $codes{$_->code} = 1 } map { @{$_->evidence_list} } @$al;
	push(@h_elts,
	     sprintf("%s evidence=%s",
		     $term->public_acc,
		     join(";", keys %codes),
		    )
	    );
      }
      $hdrinfo = join(", ", @h_elts);
    }
    foreach my $seq (@$seqs) {
        my $desc;
        if ($fullhdr) {
            $desc = $fullhdr;
        }
        else {
            $desc =
              sprintf("%s|%s symbol:%s %s %s %s",
                      uc($self->xref->xref_dbname),
                      $self->xref->xref_key,
                      $self->symbol,
                      $self->species ? sprintf("species:%s \"%s\"", $self->species->ncbi_taxa_id, $self->species->binomial) : '-',                      
                      $hdrinfo,
                      join(" ",
                           map {$_->as_str} @{$seq->xref_list || []})
                     );
        }
        $seq->description($desc);
        $str.= $seq->to_fasta;
    }
    return $str;
}

sub to_idl_struct {
    my $self = shift;
    return 
      {
	  "symbol"=>$self->symbol,
	  "full_name"=>$self->full_name,
	  "acc"=>$self->xref->xref_key,
	  "speciesdb"=>$self->xref->xref_dbname,
      };
}

sub to_ptuples {
    my $self = shift;
    my ($th) =
      rearrange([qw(tuples)], @_);
    my @s = ();
    push(@s,
         ["product",
          $self->xref->as_str,
          $self->symbol,
          $self->full_name,
          ]);
    push(@s, $self->xref->to_ptuples(-tuples=>$th));
    @s;
}

# **** EXPERIMENTAL CODE ****
# the idea is to be homogeneous and use graphs for
# everything; eg gene products are nodes in a graph,
# associations are arcs
# cf rdf, daml+oil etc

# args - optional graph to add to
sub graphify {
    my $self = shift;
    my ($ref, $subg, $opts) =
      rearrange([qw(ref graph opts)], @_);

    $opts = {} unless $opts;
    $subg = $self->apph->create_graph_obj unless $subg;

    my $t =
      $self->apph->create_term_obj({name=>$self->as_str,
                                    acc=>$self->as_str});
    $subg->add_node($t);
    $subg->add_arc($t, $ref, "hasProduct");
    return $subg;
}

1;