The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package GO::TermFinderReport::Text;

=pod

=head1 NAME

GO::TermFinderReport::Text - prints results of GO::TermFinder as a text report

=head1 DESCRIPTION

This print() method of this Perl module receives a reference to an the
array that is the return value from the findTerms method of
GO::TermFinder, the number of genes that were used to generate the
terms, and the number of genes that were said to be in the genome.  It
will then generate a text report that summarizes those results.
Optionally, filehandle and p-value cutoff arguments may also be passed
in.  It will return the 

=head1 SYNOPSIS

    use GO::TermFinder;
    use GO::TermFinderReport::Text;

    .
    .
    .

    my @pvalues = $termFinder->findTerms(genes=>\@genes);

    my $report  = GO::TermFinderReport::Text->new();

    open (OUT, ">report.text");

    my $numHypotheses = $report->print(pvalues  => \@pvalues,
                                       aspect   => $aspect,
                                       numGenes => scalar(@genes),
                                       totalNum => $totalNum,
                                       cutoff   => 0.01,
                                       fh       => \*OUT);

    close OUT;

=cut

use strict;
use warnings;
use diagnostics;

use vars qw ($VERSION);

$VERSION = 0.10;

######################################################################################
sub new{
######################################################################################

=head2 new

This is the constructor.

Usage:

    my $report = GO::TermFinderReport::Text->new();

A GO::TermFinderReport::Text object is returned.

=cut

######################################################################################

    my $self = {};

    bless $self, shift;

    return $self;

}

######################################################################################
sub print{
######################################################################################

=head2 print

This method prints out the text report of the passed in hypotheses.
The report is ordered in ascending order of p-value (i.e. most
significant first).  If the FDR was calculated, the FDR will also be
printed.  It returns the number of hypotheses that had corrected
p-values as good or better than the passed in cutoff.

Usage:

    my $numHypotheses = $report->print(pvalues  => \@pvalues,
				       numGenes => scalar(@genes),
				       totalNum => $totalNum,
				       cutoff   => 0.01,
				       fh       => \*OUT,
                                       table    => 0 );

Required arguments:

pvalues  : A reference to the array returned by the findTerms() method
           of GO::TermFinder

numGenes : The number of genes that were in the list passed to the
           findTerms method

totalNum : The total number of genes that were indicated to be in the
           genome for finding terms.

Optional arguments:

fh       : A reference to a file handle to which the table should be
           printed.  Defaults to standard out.

cutoff   : The p-value cutoff, above which p-values and associated
           information will not be printed.  Default is no cutoff.

table    : 0 for standard output, 1 for tab delimited table.  Default is 0

=cut

######################################################################################

    my ($self, %args) = @_;

    if (!exists($args{'pvalues'})){

	die "You must supply a pvalues argument to the print method.";

    }

    if (!exists $args{'numGenes'}){

	die "You must supply a numGene argument to the print method.";

    }

    if (!exists $args{'totalNum'}){

	die "You must supply a totalNum argument to the print method.";

    }

    my $pvalues  = $args{'pvalues'};
    my $numGenes = $args{'numGenes'};
    my $totalNum = $args{'totalNum'};
    my $fh       = $args{'fh'}     || \*STDOUT;
    my $cutoff   = $args{'cutoff'} || 1;
    my $table    = $args{'table'}  || 0;

    my $rows;
    my $numRows = 0;

    my $hasFdr = 0;

    my $hypothesis = 1;

    my @header = ("GOID", "TERM", "CORRECTED_PVALUE",
		  "UNCORRECTED_PVALUE", "NUM_LIST_ANNOTATIONS",
		  "LIST_SIZE", "TOTAL_NUM_ANNOTATIONS",
		  "POPULATION_SIZE", "FDR_RATE",
		  "EXPECTED_FALSE_POSITIVES", "ANNOTATED_GENES");

    print $fh join("\t", @header), "\n" if ($table);

    foreach my $pvalue (@{$pvalues}){

	# skip if above cutoff

	next if ($pvalue->{CORRECTED_PVALUE} > $cutoff);
	
	# now format the p-value	

	my $value = $pvalue->{CORRECTED_PVALUE};

	# if it's in scientific notation, we want up to two of the decimal places

	$value =~ s/^(.*\.[0-9]{2}).*(e.+)$/$1$2/;
	
	# otherwise, we'll take up to five decimal places

	$value =~ s/^(0\.[0-9]{5})[0-9]*$/$1/;

	if (defined ($pvalue->{NUM_OBSERVATIONS}) && $pvalue->{NUM_OBSERVATIONS} == 0){

	    # simulations were used to generate the corrected p-value.
	    # If we never saw anything better than this p-value in the
	    # simulations, then prepend a less than sign to the
	    # corrected p-value

	    $value = "<".$value;

	}
	
	if (!$table){

	    print $fh 

		"-- $hypothesis of ", scalar @{$pvalues}, " --\n",
		"GOID\t", $pvalue->{NODE}->goid, "\n",
		"TERM\t", $pvalue->{NODE}->term, "\n",
		"CORRECTED P-VALUE\t", $pvalue->{CORRECTED_PVALUE}, "\n",
		"UNCORRECTED P-VALUE\t", $pvalue->{PVALUE}, "\n";

	}else{

	    print $fh join("\t", ($pvalue->{NODE}->goid, 
				  $pvalue->{NODE}->term,
				  $pvalue->{CORRECTED_PVALUE},
				  $pvalue->{PVALUE},
				  $pvalue->{NUM_ANNOTATIONS},
				  $numGenes,
				  $pvalue->{TOTAL_NUM_ANNOTATIONS},
				  $totalNum)), "\t";

	}
	
	# deal with FDR

	my ($fdr, $falsePositives);

	if (exists ($pvalue->{FDR_RATE})){

	    $fdr = sprintf ("%.2f%%", $pvalue->{FDR_RATE} * 100);

	    $falsePositives = sprintf ("%.2f", $pvalue->{EXPECTED_FALSE_POSITIVES});

	    if(!$table){

		print $fh 

		    "FDR_RATE\t", $fdr, "\n",
		    "EXPECTED_FALSE_POSITIVES\t", $falsePositives, "\n";

	    }else{

	      print $fh $fdr, "\t", $falsePositives, "\t";

	    }

	}else{

	    print $fh "\t\t" if ($table); # Gotta fill in the blanks

	}

	if (!$table){
	
	    print $fh "NUM_ANNOTATIONS\t"; 
	    print $fh $pvalue->{NUM_ANNOTATIONS};
	    print $fh " of $numGenes in the list, vs ";
	    print $fh $pvalue->{TOTAL_NUM_ANNOTATIONS};
	    print $fh " of $totalNum in the genome\n";
	    print $fh "The genes annotated to this node are:\n";;

	}

	print $fh join(", ", values(%{$pvalue->{ANNOTATED_GENES}})), "\n";
	print $fh "\n" if (!$table);
	
	$hypothesis++;
	
    }

    return ($hypothesis - 1);

}
    
1; # to keep Perl happy

=pod

=head1 AUTHOR

Gavin Sherlock

sherlock@genome.stanford.edu

=cut