The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::GMOD::GenericGenePage;
use strict;
use warnings;
use English;
use Carp;

our $VERSION = 0.12;


=head1 NAME

Bio::GMOD::GenericGenePage - Generic GMOD gene page base class

=head1 SYNOPSIS

    my $page = Bio::GMOD::GenericGenePage->new( $gene_identifier );
    my $xml = $page->render_xml();

=head1 DESCRIPTION

Bio::GMOD::GenericGenePage is an abstract class to make it easier for 
Model Organism Databases (MODs) to serve up a simple XML that describes
attributes of their gene models.  In order to implement this, the user
needs to subclass Bio::GMOD::GenericGenePage and provide the methods
listed below as abstract classes.  These methods are then used by
the render_xml method to create XML for a given gene.

There is one example implementation included with this distribution,
Bio::GMOD::GenericGenePage::Chado, which is a Chado adapter for a 
yeast database derived from SGD's GFF3.  In order to implement this for
another Chado database it should be fairly easy to modify the provided
methods to create your own adaptor.  For example, ParameciumDB could
subclass Bio::GMOD::GenericGenePage::Chado and create
Bio::GMOD::GenericGenePage::Chado::ParameciumDB and only override
the data_provider and organism methods to have a working adaptor.
Databases not based on Chado will only have slightly more work, in order
to implement all of the abstract classes in Bio::GMOD::GenericGenePage.

Another example implementation is included, CXGN::Phenome::GenericGenePage,
however this is only a partial implementation and will not work with 
the current release of Bio::GMOD::GenericGenePage.

=head1 BASE CLASS(ES)

none

=head1 SUBCLASSES

Bio::GMOD::GenericGenePage::Chado
CXGN::Phenome::GenericGenePage

=head1 BUGS AND SUPPORT

Please report bugs and make support requests on the GMOD developers list, 
gmod-devel@lists.sourceforge.net.

=head1 AUTHOR

    Scott Cain
    CPAN ID: SCAIN
    Cold Spring Harbor Laboratory
    scain@cpan.org
    http://www.gmod.org/

and Robert Buels.

=head1 COPYRIGHT

Copyright (c) 2008 Scott Cain and Robert Buels.  All rights reserved.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.

The full text of the license can be found in the
LICENSE file included with this module.


=head1 SEE ALSO

perl(1).

=head1 PROVIDED METHODS

=cut

=head2 new

  Usage: my $genepage = MyGenePage->new( -id => $gene_identifier );
  Desc : create a new gene page object.  should be overridden
  Args : not specified
  Ret  : a new gene page object
  Side Effects: none as implemented here, but subclass
                implementations may have side effects
  Example:

=cut

sub new {
  my ($class,%args) = @_;
  return bless {}, $class;
}

sub _counter {
  my $self = shift;
  $self->{'counter'}++;
  return $self->{'counter'};
}

=head2 render_xml

  Usage: my $xml = $page->render_xml();
  Desc : render the XML for this generic gene page
  Args : none
  Ret  : string of xml
  Side Effects: none

=cut

sub render_xml {
  my ($self) = @_;

  my $data_provider = "  <data_provider>".$self->data_provider."</data_provider>\n";

  my @accs = $self->accessions;
  my $accession  = join "\n", map {
    qq|  <accession>$_</accession>|
  } @accs;


  my $name = $self->name;

  my @syn = $self->synonyms;
  #shift @syn; #should the real name be in there too?
  #I don't think so--querying the synonym table is easy, why bother with
  #a join when we don't need it

  #also, synonym should probably be a hash to optionally allow for a 
  #synonym type

  my $synonyms = join "\n", map {
    qq|  <name type="synonym">$_</name>|
  } @syn;

  my $dbreferences   = $self->_xml_render_colon_separated_dbrefs( 2, $self->dbxrefs);
  my $organism       = $self->_xml_render_organism();
  my $ontology_terms = $self->_xml_render_ontology_terms( 4, $self->ontology_terms);
  my $literature     = $self->_xml_render_colon_separated_dbrefs( 4, $self->literature_references);

  my $maplocations = join "\n", map {
    qq|    <mapLocation map="$_->{map_name}" chromosome="$_->{chromosome}" position="$_->{position}" units="$_->{units}" />|
  } $self->map_locations;

  my $comments       = $self->_xml_render_comments($self->comments);

  return <<EOXML;
<gene>
$data_provider
$accession

  <name type="primary">$name</name>

$synonyms

$dbreferences

$organism

  <mapLocations>
$maplocations
  </mapLocations>

  <ontology>
$ontology_terms
  </ontology>

  <literature>
$literature
  </literature>

$comments

</gene>
EOXML
}

sub _xml_render_organism {
  my $self = shift;
  my $counter = $self->_counter;
  my $org = $self->organism;
  my $organism = <<END;
  <organism>
    <name type="common">$org->{common}</name>
    <name type="scientific">$org->{binomial}</name>
    <dbReference type="NCBI Taxonomy" key="$counter" id="$org->{ncbi_taxon_id}" />
  </organism>
END
  return $organism;
}

sub _xml_render_colon_separated_dbrefs {
  my ($self,$spaces,@refs) = @_;
  my $refstring = '';
  for my $ref (@refs) {
    my $counter = $self->_counter;
    my ($type,$id) = split /:/,$ref,2;
    $refstring .= (' 'x$spaces).qq|<dbReference type="$type" key="$counter" id="$id" />\n|
  }
  return $refstring;
}

sub _xml_render_ontology_terms {
  my ($self,$spaces,%term) = @_;
 
  my $xml_string = ''; 
  for my $key (keys %term) {
      my ($type,$id) = split /:/,$key;
      my $value = $term{$key};
      my $counter = $self->_counter;

      # the low casing of the 'o' seems inconsistent, so go with the upper
      #$type = "Go" if ($type eq "GO");
      $xml_string .= (' 'x$spaces).qq|<dbReference type="$type" key="$counter" id="$key">\n|; 
      $xml_string .= (' 'x($spaces+2)).qq|<property value="$value" type="term"/>\n|;
      $xml_string .= (' 'x$spaces).qq|</dbReference>\n|;
  }
  return $xml_string;
}

sub _xml_render_comments {
  my ($self, %comments) = @_;

  my $xml_string = '';

  for my $key (keys %comments) {
    my $value = $comments{$key} || "miscellaneous";
    $xml_string .= "  <comment type=\"$value\">\n";
    $xml_string .= "    <text>$key</text>\n";
    $xml_string .= "  </comment>\n\n"; 
  }

  return $xml_string;
}

=head2 render_html NOT IMPLEMENTED!

  Usage: my $html = $page->render_html();
  Desc : render HTML for this generic gene page.  you may want to
         override this method for your implementation
  Args : none
  Ret  : string of html
  Side Effects: none

=cut

sub render_html {
  my ($self) = @_;

  return <<EOHTML;

EOHTML
}

#helper method that calls all those functions
sub _info {
  my ($self) = @_;

  return
    { name => $self->name,
      syn  => [$self->synonyms],
      loc  => [$self->map_locations],
      ont  => [$self->ontology_terms],
      dbx  => [$self->dbxrefs],
      lit  => [$self->lit_refs],
      comments => [$self->comment_text],
      species => $self->species,
    };
}


=head1 ABSTRACT METHODS

Methods below should be overridden by each GenericGenePage implementation.

=head2 name

  Usage: my $name = $genepage->name();
  Desc : get the string name of this gene
  Args : none
  Ret  : string gene name, e.g. 'Pax6'
  Side Effects: none

=cut

sub name {
  my ($self) = @_;
  die 'name() method is abstract, must be implemented in a subclass;'
}

=head2 accessions

  Usage: my @accessions = $genepage->accessions();
  Desc : get a list of local accession values
  Args : none
  Ret  : a list of local accessions
  Side Effects: none

Note that these are the accessions that are used by the MOD providing the
information, not accessions in external databases like GenBank.

=cut

sub accessions {
  my ($self) = @_;
  die 'accession() method is abstract, must be implemented in a subclass;'
}

=head2 data_provider

  Usage: my $data_provider = $genepage->data_provider();
  Desc : The name of the data providing authority (ie, WormBase, SGD, etc)
  Args : none
  Ret  : string, name of the data provider
  Side Effects: none

=cut

sub data_provider {
  my ($self) = @_;
  die 'data_provider() method is abstract, must be implemented in a subclass;'
}


=head2 synonyms

  Usage: my @syn = $genepage->synonyms();
  Desc : get a list of synonyms for this gene
  Args : none

  Ret : list of strings,
        e.g. (  '1500038E17Rik',
                'AEY11',
                'Dey',
                "Dickie's small eye",
                'Gsfaey11',
                'Pax-6',
             )
  Side Effects: none

=cut

sub synonyms {
  my ($self) = @_;
  die 'synonyms() method is abstract, must be implemented in a subclass;'
}

=head2 map_locations

  Usage: my @locs = $genepage->map_locations()
  Desc : get a list of known map locations for this gene
  Args : none
  Ret  : list of map locations, each a hashref as:
         {  map_name => string map name,
            chromosome => string chromosome name,
            marker     => (optional) associated marker name,
            position   => numerical position on the map,
            units      => map units, either 'cm', for centimorgans,
                          or 'b', for bases
         }
  Side Effects: none

=cut

sub map_locations {
  my ($self) = @_;
  die 'map_locations() method is abstract, must be implemented in a subclass;'
}


=head2 ontology_terms

  Usage: my @terms = $genepage->ontology_terms();
  Desc : get a list of ontology terms
  Args : none
  Ret  : hash-style list as:
           termname => human-readable description,
  Side Effects: none
  Example:

     my %terms = $genepage->ontology_terms()

     # and %terms is now
     (  GO:0016711 => 'F:flavonoid 3'-monooxygenase activity',
        ...
     )

Note that the value in that has is the the concatenation of F:, B: or C:
for molecular_function, biological_process, or cellular_component GO terms
respectively.  If the term does not belong to GO, there is no prepended
identifier.

=cut

sub ontology_terms {
  my ($self) = @_;
  die 'go_terms() method is abstract, must be implemented in a subclass'
}

=head2 dbxrefs

  Usage: my @dbxrefs = $genepage->dbxrefs();
  Desc : get a list of database cross-references for info related to this gene
  Args : none
  Ret  : list of strings, like type:id e.g. ('PFAM:00012')
  Side Effects: none

=cut

sub dbxrefs {
  my ($self) = @_;
  die 'dbxrefs() method is abstract, must be implemented in a subclass'
}

=head2 comments

  Usage: my @comments = $genepage->comments();
  Desc : get a list of comments with types
  Args : none
  Ret  : a hash of comment=>type, where type is optional (empty string)
  Side Effects: none

=cut

sub comments {
  my ($self) = @_;
  die 'comments() method is abstract, must be impemented in a subclass';
}


=head2 literature_references

  Usage: my @refs = $genepage->lit_refs();
  Desc : get a list of literature references for this gene
  Args : none
  Ret  : list of literature reference identifers, as type:id,
         like ('PMID:0023423',...)
  Side Effects: none

=cut

sub literature_references {
  my ($self) = @_;
  die 'lit_refs() method is abstract, must be implemented in a subclass'
}


=head2 summary_text

  Usage: my $summary = $page->summary_text();
  Desc : get a text string of plain-English summary text for this gene
  Args : none
  Ret  : string of summary text
  Side Effects: none

=cut

sub summary_text {
  my ($self) = @_;
  die 'summary_text() method is abstract, must be implemented in a subclass'
}

=head2 organism

  Usage: my $species_info = $genepage->organism
  Desc : get a handful of species-related information
  Args : none
  Ret  : hashref as:
         { ncbi_taxon_id => ncbi taxon id, (e.g. 3702),
           binomial      => e.g. 'Arabidopsis thaliana',
           common        => e.g. 'Mouse-ear cress',
         }
  Side Effects: none

=cut

sub organism {
  my ($self) = @_;
  die 'organism() method is abstract, must be implemented in a subclass';
}


1;
# The preceding line will help the module return a true value