package GO::TermFinderReport::Text;
=pod
=head1 NAME
GO::TermFinderReport::Text - prints results of GO::TermFinder as a text report
=head1 DESCRIPTION
This print() method of this Perl module receives a reference to an the
array that is the return value from the findTerms method of
GO::TermFinder, the number of genes that were used to generate the
terms, and the number of genes that were said to be in the genome. It
will then generate a text report that summarizes those results.
Optionally, filehandle and p-value cutoff arguments may also be passed
in. It will return the
=head1 SYNOPSIS
use GO::TermFinder;
use GO::TermFinderReport::Text;
.
.
.
my @pvalues = $termFinder->findTerms(genes=>\@genes);
my $report = GO::TermFinderReport::Text->new();
open (OUT, ">report.text");
my $numHypotheses = $report->print(pvalues => \@pvalues,
aspect => $aspect,
numGenes => scalar(@genes),
totalNum => $totalNum,
cutoff => 0.01,
fh => \*OUT);
close OUT;
=cut
use strict;
use warnings;
use diagnostics;
use vars qw ($VERSION);
$VERSION = 0.10;
######################################################################################
sub new{
######################################################################################
=head2 new
This is the constructor.
Usage:
my $report = GO::TermFinderReport::Text->new();
A GO::TermFinderReport::Text object is returned.
=cut
######################################################################################
my $self = {};
bless $self, shift;
return $self;
}
######################################################################################
sub print{
######################################################################################
=head2 print
This method prints out the text report of the passed in hypotheses.
The report is ordered in ascending order of p-value (i.e. most
significant first). If the FDR was calculated, the FDR will also be
printed. It returns the number of hypotheses that had corrected
p-values as good or better than the passed in cutoff.
Usage:
my $numHypotheses = $report->print(pvalues => \@pvalues,
numGenes => scalar(@genes),
totalNum => $totalNum,
cutoff => 0.01,
fh => \*OUT,
table => 0 );
Required arguments:
pvalues : A reference to the array returned by the findTerms() method
of GO::TermFinder
numGenes : The number of genes that were in the list passed to the
findTerms method
totalNum : The total number of genes that were indicated to be in the
genome for finding terms.
Optional arguments:
fh : A reference to a file handle to which the table should be
printed. Defaults to standard out.
cutoff : The p-value cutoff, above which p-values and associated
information will not be printed. Default is no cutoff.
table : 0 for standard output, 1 for tab delimited table. Default is 0
=cut
######################################################################################
my ($self, %args) = @_;
if (!exists($args{'pvalues'})){
die "You must supply a pvalues argument to the print method.";
}
if (!exists $args{'numGenes'}){
die "You must supply a numGene argument to the print method.";
}
if (!exists $args{'totalNum'}){
die "You must supply a totalNum argument to the print method.";
}
my $pvalues = $args{'pvalues'};
my $numGenes = $args{'numGenes'};
my $totalNum = $args{'totalNum'};
my $fh = $args{'fh'} || \*STDOUT;
my $cutoff = $args{'cutoff'} || 1;
my $table = $args{'table'} || 0;
my $rows;
my $numRows = 0;
my $hasFdr = 0;
my $hypothesis = 1;
my @header = ("GOID", "TERM", "CORRECTED_PVALUE",
"UNCORRECTED_PVALUE", "NUM_LIST_ANNOTATIONS",
"LIST_SIZE", "TOTAL_NUM_ANNOTATIONS",
"POPULATION_SIZE", "FDR_RATE",
"EXPECTED_FALSE_POSITIVES", "ANNOTATED_GENES");
print $fh join("\t", @header), "\n" if ($table);
foreach my $pvalue (@{$pvalues}){
# skip if above cutoff
next if ($pvalue->{CORRECTED_PVALUE} > $cutoff);
# now format the p-value
my $value = $pvalue->{CORRECTED_PVALUE};
# if it's in scientific notation, we want up to two of the decimal places
$value =~ s/^(.*\.[0-9]{2}).*(e.+)$/$1$2/;
# otherwise, we'll take up to five decimal places
$value =~ s/^(0\.[0-9]{5})[0-9]*$/$1/;
if (defined ($pvalue->{NUM_OBSERVATIONS}) && $pvalue->{NUM_OBSERVATIONS} == 0){
# simulations were used to generate the corrected p-value.
# If we never saw anything better than this p-value in the
# simulations, then prepend a less than sign to the
# corrected p-value
$value = "<".$value;
}
if (!$table){
print $fh
"-- $hypothesis of ", scalar @{$pvalues}, " --\n",
"GOID\t", $pvalue->{NODE}->goid, "\n",
"TERM\t", $pvalue->{NODE}->term, "\n",
"CORRECTED P-VALUE\t", $pvalue->{CORRECTED_PVALUE}, "\n",
"UNCORRECTED P-VALUE\t", $pvalue->{PVALUE}, "\n";
}else{
print $fh join("\t", ($pvalue->{NODE}->goid,
$pvalue->{NODE}->term,
$pvalue->{CORRECTED_PVALUE},
$pvalue->{PVALUE},
$pvalue->{NUM_ANNOTATIONS},
$numGenes,
$pvalue->{TOTAL_NUM_ANNOTATIONS},
$totalNum)), "\t";
}
# deal with FDR
my ($fdr, $falsePositives);
if (exists ($pvalue->{FDR_RATE})){
$fdr = sprintf ("%.2f%%", $pvalue->{FDR_RATE} * 100);
$falsePositives = sprintf ("%.2f", $pvalue->{EXPECTED_FALSE_POSITIVES});
if(!$table){
print $fh
"FDR_RATE\t", $fdr, "\n",
"EXPECTED_FALSE_POSITIVES\t", $falsePositives, "\n";
}else{
print $fh $fdr, "\t", $falsePositives, "\t";
}
}else{
print $fh "\t\t" if ($table); # Gotta fill in the blanks
}
if (!$table){
print $fh "NUM_ANNOTATIONS\t";
print $fh $pvalue->{NUM_ANNOTATIONS};
print $fh " of $numGenes in the list, vs ";
print $fh $pvalue->{TOTAL_NUM_ANNOTATIONS};
print $fh " of $totalNum in the genome\n";
print $fh "The genes annotated to this node are:\n";;
}
print $fh join(", ", values(%{$pvalue->{ANNOTATED_GENES}})), "\n";
print $fh "\n" if (!$table);
$hypothesis++;
}
return ($hypothesis - 1);
}
1; # to keep Perl happy
=pod
=head1 AUTHOR
Gavin Sherlock
sherlock@genome.stanford.edu
=cut