package GO::TermFinderReport::Html;
=pod
=head1 NAME
GO::TermFinderReport::Html - prints an html table of the results of GO::TermFinder
=head1 DESCRIPTION
This print() method of this Perl module receives a reference to an the
array that is the return value from the findTerms method of
GO::TermFinder, the aspect for which terms were found, the number of
genes that were used to generate the terms, and the number of genes
that were said to be in the genome. It will then generate an html
table that summarizes those results. Optionally, filehandle, p-value
cutoff, gene URL, and GOID URL arguments may also be passed in. Url
links should have the string <REPLACE_THIS> to indicate where the gene
name, or GOID should be put.
=head1 SYNOPSIS
use GO::TermFinder;
use GO::TermFinderReport::Html;
.
.
.
my @pvalues = $termFinder->findTerms(genes=>\@genes);
my $report = GO::TermFinderReport::Html->new();
open (HTML, ">blah.html");
print HTML "<html><body>";
my $numRows = $report->print(pvalues => \@pvalues,
aspect => $aspect,
numGenes => scalar(@genes),
totalNum => $totalNum,
fh => \*HTML,
cutoff => 0.01,
geneUrl => 'http://db.yeastgenome.org/cgi-bin/SGD/locus.pl?locus=<REPLACE_THIS>',
goidUrl => 'http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&depth=0&query=<REPLACE_THIS>');
print HTML "</body></html>";
close HTML;
=cut
use strict;
use warnings;
use diagnostics;
use vars qw ($VERSION);
$VERSION = 0.12;
use CGI qw/:all :html3/;
######################################################################################
sub new{
######################################################################################
=head2 new
This is the constructor.
Usage:
my $report = GO::TermFinderReport::Html->new();
A GO::TermFinderReport::Html object is returned.
=cut
######################################################################################
my $self = {};
bless $self, shift;
return $self;
}
######################################################################################
sub print{
######################################################################################
=head2 print
This method prints out the report, in the form of an html table. The
table is ordered in ascending order of p-value (i.e. most significant
first), and will print out the GO node, the frequency of use of that
node within the selected group of genes, and the population as a
whole, the corrected p-value of that, and a list of the genes
annotated to that node. If the FDR was calculated, the FDR will also
be printed. It returns the number of annotation rows in the table
that exceed the provided p-value cutoff (which may even be zero, in
which case nothing is printed).
Usage:
my $numRows = $report->print(pvalues => \@pvalues,
aspect => $aspect, # P, C, or F
numGenes => scalar(@genes),
totalNum => $totalNum,
fh => \*HTML,
pvalueCutOff => 0.01,
geneUrl => 'http://db.yeastgenome.org/cgi-bin/SGD/locus.pl?locus=<REPLACE_THIS>',
goidUrl => 'http://amigo.geneontology.org/cgi-bin/amigo/go.cgi?view=details&search_constraint=terms&depth=0&query=<REPLACE_THIS>');
Required arguments:
pvalues : A reference to the array returned by the findTerms() method of GO::TermFinder
aspect : The aspect of the Gene Ontology for which terms were found (C, F or P)
numGenes : The number of genes that were in the list passed to the findTerms method
totalNum : The total number of genes that were indicated to be in the genome for finding terms.
Optional arguments:
fh : A reference to a file handle to which the table should be
printed. Defaults to standard out.
pvalueCutOff : The p-value cutoff, above which p-values and associated
information will not be printed. Default is no cutoff.
geneUrl : A url to which you want genes linked. Must contain the
text '<REPLACE_THIS>', which will be replaced with the
gene name.
goidUrl : A url to which you want the GOIDs linked. Must contain the
text '<REPLACE_THIS>', which will be replaced with the
goid.
=cut
######################################################################################
my ($self, %args) = @_;
if (!exists($args{'pvalues'})){
die "You must supply a pvalues argument to the print method.";
}
if (!exists($args{'aspect'})){
die "You must supply a aspect argument to the print method.";
}
if (!exists $args{'numGenes'}){
die "You must supply a numGene argument to the print method.";
}
if (!exists $args{'totalNum'}){
die "You must supply a totalNum argument to the print method.";
}
my $pvalues = $args{'pvalues'};
my $aspect = $args{'aspect'};
my $numGenes = $args{'numGenes'};
my $totalNum = $args{'totalNum'};
my $fh = $args{'fh'} || \*STDOUT;
my $cutoff = $args{'pvalueCutOff'} || 1;
my $geneUrl = $args{'geneUrl'};
my $goidUrl = $args{'goidUrl'};
my $replacementText = "<REPLACE_THIS>";
my $rows;
my $numRows = 0;
my $hasFdr = 0;
foreach my $pvalue (@{$pvalues}){
# skip if above cutoff
next if ($pvalue->{CORRECTED_PVALUE} > $cutoff);
# now generate a list of loci annotated to this node, with
# links if requested
my @loci;
foreach my $databaseId (keys %{$pvalue->{ANNOTATED_GENES}}){
my $gene = $pvalue->{ANNOTATED_GENES}->{$databaseId};
if (defined $geneUrl) {
my $url = $geneUrl;
$url =~ s/$replacementText/$gene/;
$gene = a({-href=>$url,
-target=>'infowin'}, $gene);
}
push (@loci, $gene);
}
my $loci = join(", ", @loci);
# now calculate the frequency for annotation for the list and the genome
my $frequencyPercent = sprintf("%.1f", ($pvalue->{NUM_ANNOTATIONS}/$numGenes) * 100);
my $frequency = $pvalue->{NUM_ANNOTATIONS}." out of $numGenes genes, $frequencyPercent\%";
my $geneFrequencyPercent = sprintf("%.1f", ($pvalue->{TOTAL_NUM_ANNOTATIONS}/$totalNum) * 100);
my $genomeFrequency = $pvalue->{TOTAL_NUM_ANNOTATIONS}." out of $totalNum genes, $geneFrequencyPercent\%";
# now format the p-value
my $value = $pvalue->{CORRECTED_PVALUE};
# if it's in scientific notation, we want up to two of the decimal places
$value =~ s/^(.*\.[0-9]{2}).*(e.+)$/$1$2/;
# otherwise, we'll take up to five decimal places
$value =~ s/^(0\.[0-9]{5})[0-9]*$/$1/;
if (defined ($pvalue->{NUM_OBSERVATIONS}) && $pvalue->{NUM_OBSERVATIONS} == 0){
# simulations were used to generate the corrected p-value.
# If we never saw anything better than this p-value in the
# simulations, then prepend a less than sign to the
# corrected p-value
$value = "<".$value;
}
# now deal with the GOID column
my $goColumn;
if (defined $goidUrl){
my $url = $goidUrl;
my $goid = $pvalue->{NODE}->goid;
$url =~ s/$replacementText/$goid/;
# make link with name of term as the text
$goColumn = a({-href=>$url,
-target=>'infowin'}, $pvalue->{NODE}->term);
}else{
# if no link, just use term, and parenthetical GOID
$goColumn = $pvalue->{NODE}->term." (".$pvalue->{NODE}->goid.")";
}
# deal with FDR
my ($fdr, $falsePositives);
if (exists ($pvalue->{FDR_RATE})){
$hasFdr = 1;
$fdr = sprintf ("%.2f%%", $pvalue->{FDR_RATE} * 100);
$falsePositives = sprintf ("%.2f", $pvalue->{EXPECTED_FALSE_POSITIVES});
}
$rows .= $self->_oneRow($goColumn, $frequency,
$genomeFrequency, $value, $loci, $fdr,
$falsePositives);
$numRows++;
}
# print the table out, if there were any rows
$self->_printTable($fh, $rows, $aspect, $cutoff, $hasFdr) if ($numRows > 0);
return $numRows;
}
###################################################################
sub _oneRow{
###################################################################
# This protected method simply returns a row from the html table,
# based on what was passed in.
my ($self, $goColumn, $frequency, $genomeFrequency, $pvalue,
$loci, $fdr, $falsePositives) = @_;
my $row = td($goColumn).
td($frequency).
td($genomeFrequency).
td($pvalue);
if (defined($fdr)){
$row .= td($fdr).td($falsePositives);
}
td($loci);
$row .= td($loci);
return Tr($row);
}
###################################################################
sub _printTable{
###################################################################
# This method prints out the actual html table
my ($self, $fh, $rows, $aspect, $cutoff, $hasFdr) = @_;
$aspect =~ s/^F/Function/i;
$aspect =~ s/^P/Process/i;
$aspect =~ s/^C/Component/i;
print $fh a({-name=>'table'});
print $fh center(h3("Result Table")), p;
print $fh table({-align => 'center',
-border => 1,
-cellpadding => 2,
-width => 400},
Tr(td({-bgcolor => '#FFCC99',
-align => 'center',
-width => '100%',
-nowrap => undef},
b("Terms from the $aspect Ontology with p-value as good or better than $cutoff"))));
my $headings = th({-align => 'center'}, "Gene Ontology term").
th({-align => 'center'}, "Cluster frequency").
th({-align => 'center'}, "Genome frequency of use").
th({-align => 'center'}, "Corrected P-value");
if ($hasFdr){
$headings .= th({-align => 'center'}, "FDR").
th({-align => 'center'}, "False Positives");
}
$headings .= th({-align => 'center'}, "Genes annotated to the term");
print $fh table({-align => 'center',
-border => 2},
Tr({-bgcolor => '#CCCCFF'},
$headings).
$rows), p;
}
1; # to keep Perl happy
=pod
=head1 AUTHOR
Gavin Sherlock
sherlock@genome.stanford.edu
=cut