The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
#!/usr/bin/perl

# $Id: termFinderClient.pl,v 1.9 2007/03/18 05:46:39 sherlock Exp $

# License information (the MIT license)

# Copyright (c) 2003-2007 Gavin Sherlock; Stanford University

# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:

# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

use strict;
use warnings;
use diagnostics;

use GO::TermFinder;
use GO::AnnotationProvider::AnnotationParser;
use GO::OntologyProvider::OboParser;

use GO::TermFinderReport::Text;

use GO::Utils::File qw (GenesFromFile);

print "Enter the fully qualified name of your obo file:\n";

chomp(my $ontologyFile = <STDIN>);

print "What is the aspect of this ontology (F, P, or C)?\n";

chomp(my $aspect = uc(<STDIN>));

print "Enter the fully qualified name of your associations file:\n";

chomp(my $annotationFile = <STDIN>); 

print "Enter a the fully qualified name of your file with a list of genes for which to find term:\n";

chomp(my $genesFile = <STDIN>);

print "How many genes (roughly) exist within the organism?\n";

chomp(my $totalNum = <STDIN>);

print "Finding terms...\n";

my $ontology   = GO::OntologyProvider::OboParser->new(ontologyFile => $ontologyFile,
						      aspect       => $aspect);

my $annotation = GO::AnnotationProvider::AnnotationParser->new(annotationFile=>$annotationFile);

my $termFinder = GO::TermFinder->new(annotationProvider => $annotation,
				     ontologyProvider   => $ontology,
				     totalNumGenes      => $totalNum,
				     aspect             => $aspect);

my @genes = GenesFromFile($genesFile);

my @pvalues    = $termFinder->findTerms(genes        => \@genes,
					calculateFDR => 1);

# now just print the info back to the client

my $report = GO::TermFinderReport::Text->new();

my $cutoff = 0.05;

my $numHypotheses = $report->print(pvalues  => \@pvalues,
				   numGenes => scalar(@genes),
				   totalNum => $totalNum,
				   cutoff   => $cutoff);

# if they had no significant P-values

if ($numHypotheses == 0){
    
    print "No terms were found for this aspect with a corrected P-value <= $cutoff.\n";
    
}

=pod

=head1 NAME

termFinderClient.pl - interactive client to find significant GO terms for a list of genes

=head1 SYNOPSIS

This program is a very simply client for the GO::TermFinder object,
that prompts a user for the various pieces of information that are
required to determine significant GO terms associated with the list of
genes.  It uses a p-value cut-off (for the corrected p-value) of .05,
and simply prints the information back to the screen.  An example
below uses the genes : YPL250C, MET11, MXR1, MET17, SAM3, MET28, STR3,
MMP1, MET1, YIL074C, MHT1, MET14, MET16, MET3, MET10, ECM17, MET2,
MUP1 and MET6, which formed the so called methionine cluster from
Spellman et al, 1998:

    > termFinderClient.pl
    Enter the fully qualified name of your obo file:
    ../t/gene_ontology_edit.obo
    What is the aspect of this ontology you want to use (F, P, or C)?
    P
    Enter the fully qualified name of your associations file:
    ../t/gene_association.sgd
    Enter a the fully qualified name of your file with a list of genes for which to find term:
    genes.txt
    How many genes (roughly) exist within the organism?
    7300
    Finding terms...
    -- 1 of 37 --
    GOID    GO:0006790
    TERM    sulfur metabolism
    CORRECTED P-VALUE       1.47659150177758e-22
    UNCORRECTED P-VALUE     3.99078784264211e-24
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 13 of 19 in the list, vs 59 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET3, STR3, ECM17, MET28, MHT1, MET6, MET10, MET1, MET11, MET2, MET16, MET17
    
    -- 2 of 37 --
    GOID    GO:0000096
    TERM    sulfur amino acid metabolism
    CORRECTED P-VALUE       4.5287552955708e-21
    UNCORRECTED P-VALUE     1.22398791772184e-22
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 11 of 19 in the list, vs 32 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET3, STR3, MET28, MHT1, MET6, MET1, MET11, MET2, MET16, MET17
    
    -- 3 of 37 --
    GOID    GO:0006555
    TERM    methionine metabolism
    CORRECTED P-VALUE       1.70050438966192e-17
    UNCORRECTED P-VALUE     4.59595780989708e-19
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 9 of 19 in the list, vs 23 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET1, MET11, MET3, STR3, MET2, MET16, MET17, MET6
    
    -- 4 of 37 --
    GOID    GO:0006520
    TERM    amino acid metabolism
    CORRECTED P-VALUE       9.54842088570323e-16
    UNCORRECTED P-VALUE     2.58065429343331e-17
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 13 of 19 in the list, vs 184 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET3, STR3, ECM17, MET28, MHT1, MET6, MET1, MET11, MET2, MET16, YIL074C, MET17
    
    -- 5 of 37 --
    GOID    GO:0000097
    TERM    sulfur amino acid biosynthesis
    CORRECTED P-VALUE       1.01918487720888e-15
    UNCORRECTED P-VALUE     2.75455372218618e-17
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 7 of 19 in the list, vs 10 of 7300 in the genome
    The genes annotated to this node are:
    MET1, MET11, STR3, MET2, MET28, MET17, MET6
    
    -- 6 of 37 --
    GOID    GO:0006519
    TERM    amino acid and derivative metabolism
    CORRECTED P-VALUE       2.70276218187099e-15
    UNCORRECTED P-VALUE     7.30476265370537e-17
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 13 of 19 in the list, vs 199 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET3, STR3, ECM17, MET28, MHT1, MET6, MET1, MET11, MET2, MET16, YIL074C, MET17
    
    -- 7 of 37 --
    GOID    GO:0009308
    TERM    amine metabolism
    CORRECTED P-VALUE       1.14774161056767e-14
    UNCORRECTED P-VALUE     3.10200435288561e-16
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 13 of 19 in the list, vs 222 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET3, STR3, ECM17, MET28, MHT1, MET6, MET1, MET11, MET2, MET16, YIL074C, MET17
    
    -- 8 of 37 --
    GOID    GO:0009066
    TERM    aspartate family amino acid metabolism
    CORRECTED P-VALUE       1.79446311877879e-14
    UNCORRECTED P-VALUE     4.84990032102375e-16
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 9 of 19 in the list, vs 45 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET1, MET11, MET3, STR3, MET2, MET16, MET17, MET6
    
    -- 9 of 37 --
    GOID    GO:0006807
    TERM    nitrogen compound metabolism
    CORRECTED P-VALUE       3.77265283361447e-14
    UNCORRECTED P-VALUE     1.01963590097688e-15
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 13 of 19 in the list, vs 243 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET3, STR3, ECM17, MET28, MHT1, MET6, MET1, MET11, MET2, MET16, YIL074C, MET17
    
    -- 10 of 37 --
    GOID    GO:0044272
    TERM    sulfur compound biosynthesis
    CORRECTED P-VALUE       2.67188341651488e-13
    UNCORRECTED P-VALUE     7.2213065311213e-15
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 7 of 19 in the list, vs 18 of 7300 in the genome
    The genes annotated to this node are:
    MET1, MET11, STR3, MET2, MET28, MET17, MET6
    
    -- 11 of 37 --
    GOID    GO:0006082
    TERM    organic acid metabolism
    CORRECTED P-VALUE       7.06399274294626e-13
    UNCORRECTED P-VALUE     1.90918722782331e-14
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 13 of 19 in the list, vs 304 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET3, STR3, ECM17, MET28, MHT1, MET6, MET1, MET11, MET2, MET16, YIL074C, MET17
    
    -- 12 of 37 --
    GOID    GO:0019752
    TERM    carboxylic acid metabolism
    CORRECTED P-VALUE       7.06399274294626e-13
    UNCORRECTED P-VALUE     1.90918722782331e-14
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 13 of 19 in the list, vs 304 of 7300 in the genome
    The genes annotated to this node are:
    MET14, MET3, STR3, ECM17, MET28, MHT1, MET6, MET1, MET11, MET2, MET16, YIL074C, MET17
    
    -- 13 of 37 --
    GOID    GO:0000103
    TERM    sulfate assimilation
    CORRECTED P-VALUE       9.98928836937253e-13
    UNCORRECTED P-VALUE     2.69980766739798e-14
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 6 of 19 in the list, vs 10 of 7300 in the genome
    The genes annotated to this node are:
    MET10, MET14, MET1, MET3, ECM17, MET16
    
    -- 14 of 37 --
    GOID    GO:0006791
    TERM    sulfur utilization
    CORRECTED P-VALUE       9.98928836937253e-13
    UNCORRECTED P-VALUE     2.69980766739798e-14
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 6 of 19 in the list, vs 10 of 7300 in the genome
    The genes annotated to this node are:
    MET10, MET14, MET1, MET3, ECM17, MET16
    
    -- 15 of 37 --
    GOID    GO:0008652
    TERM    amino acid biosynthesis
    CORRECTED P-VALUE       4.728301511661e-11
    UNCORRECTED P-VALUE     1.27791932747595e-12
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 9 of 19 in the list, vs 103 of 7300 in the genome
    The genes annotated to this node are:
    MET1, MET11, STR3, MET2, ECM17, YIL074C, MET28, MET17, MET6
    
    -- 16 of 37 --
    GOID    GO:0009309
    TERM    amine biosynthesis
    CORRECTED P-VALUE       9.42530020402032e-11
    UNCORRECTED P-VALUE     2.54737843351901e-12
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 9 of 19 in the list, vs 111 of 7300 in the genome
    The genes annotated to this node are:
    MET1, MET11, STR3, MET2, ECM17, YIL074C, MET28, MET17, MET6
    
    -- 17 of 37 --
    GOID    GO:0044271
    TERM    nitrogen compound biosynthesis
    CORRECTED P-VALUE       9.42530020402032e-11
    UNCORRECTED P-VALUE     2.54737843351901e-12
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 9 of 19 in the list, vs 111 of 7300 in the genome
    The genes annotated to this node are:
    MET1, MET11, STR3, MET2, ECM17, YIL074C, MET28, MET17, MET6
    
    -- 18 of 37 --
    GOID    GO:0009086
    TERM    methionine biosynthesis
    CORRECTED P-VALUE       1.81352587001198e-08
    UNCORRECTED P-VALUE     4.90142127030265e-10
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 4 of 19 in the list, vs 6 of 7300 in the genome
    The genes annotated to this node are:
    MET1, STR3, MET2, MET6
    
    -- 19 of 37 --
    GOID    GO:0009067
    TERM    aspartate family amino acid biosynthesis
    CORRECTED P-VALUE       4.58690073266533e-06
    UNCORRECTED P-VALUE     1.23970290072036e-07
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 4 of 19 in the list, vs 19 of 7300 in the genome
    The genes annotated to this node are:
    MET1, STR3, MET2, MET6
    
    -- 20 of 37 --
    GOID    GO:0000101
    TERM    sulfur amino acid transport
    CORRECTED P-VALUE       5.51387718608536e-06
    UNCORRECTED P-VALUE     1.49023707732037e-07
    FDR_RATE        0.00%
    EXPECTED_FALSE_POSITIVES        0.00
    NUM_ANNOTATIONS 3 of 19 in the list, vs 5 of 7300 in the genome
    The genes annotated to this node are:
    MMP1, MUP1, SAM3
    
    -- 21 of 37 --
    GOID    GO:0009069
    TERM    serine family amino acid metabolism
    CORRECTED P-VALUE       0.00108164983836796
    UNCORRECTED P-VALUE     2.92337794153502e-05
    FDR_RATE        0.10%
    EXPECTED_FALSE_POSITIVES        0.02
    NUM_ANNOTATIONS 3 of 19 in the list, vs 24 of 7300 in the genome
    The genes annotated to this node are:
    MET2, YIL074C, MET17
    
    -- 22 of 37 --
    GOID    GO:0006865
    TERM    amino acid transport
    CORRECTED P-VALUE       0.00406448231248132
    UNCORRECTED P-VALUE     0.000109850873310306
    FDR_RATE        0.18%
    EXPECTED_FALSE_POSITIVES        0.04
    NUM_ANNOTATIONS 3 of 19 in the list, vs 37 of 7300 in the genome
    The genes annotated to this node are:
    MMP1, MUP1, SAM3
    
    -- 23 of 37 --
    GOID    GO:0044249
    TERM    cellular biosynthesis
    CORRECTED P-VALUE       0.00858173589265904
    UNCORRECTED P-VALUE     0.000231938807909704
    FDR_RATE        0.26%
    EXPECTED_FALSE_POSITIVES        0.06
    NUM_ANNOTATIONS 9 of 19 in the list, vs 927 of 7300 in the genome
    The genes annotated to this node are:
    MET1, MET11, STR3, MET2, ECM17, YIL074C, MET28, MET17, MET6
    
    -- 24 of 37 --
    GOID    GO:0015837
    TERM    amine transport
    CORRECTED P-VALUE       0.00888521048932255
    UNCORRECTED P-VALUE     0.000240140824035745
    FDR_RATE        0.33%
    EXPECTED_FALSE_POSITIVES        0.08
    NUM_ANNOTATIONS 3 of 19 in the list, vs 48 of 7300 in the genome
    The genes annotated to this node are:
    MMP1, MUP1, SAM3
    
    -- 25 of 37 --
    GOID    GO:0046942
    TERM    carboxylic acid transport
    CORRECTED P-VALUE       0.0100357228763322
    UNCORRECTED P-VALUE     0.000271235753414384
    FDR_RATE        0.40%
    EXPECTED_FALSE_POSITIVES        0.10
    NUM_ANNOTATIONS 3 of 19 in the list, vs 50 of 7300 in the genome
    The genes annotated to this node are:
    MMP1, MUP1, SAM3
    
    -- 26 of 37 --
    GOID    GO:0015849
    TERM    organic acid transport
    CORRECTED P-VALUE       0.0106454218366372
    UNCORRECTED P-VALUE     0.000287714103692899
    FDR_RATE        0.46%
    EXPECTED_FALSE_POSITIVES        0.12
    NUM_ANNOTATIONS 3 of 19 in the list, vs 51 of 7300 in the genome
    The genes annotated to this node are:
    MMP1, MUP1, SAM3
    
    -- 27 of 37 --
    GOID    GO:0009070
    TERM    serine family amino acid biosynthesis
    CORRECTED P-VALUE       0.0154325886116854
    UNCORRECTED P-VALUE     0.000417096989505011
    FDR_RATE        0.44%
    EXPECTED_FALSE_POSITIVES        0.12
    NUM_ANNOTATIONS 2 of 19 in the list, vs 12 of 7300 in the genome
    The genes annotated to this node are:
    YIL074C, MET17
    
    -- 28 of 37 --
    GOID    GO:0009058
    TERM    biosynthesis
    CORRECTED P-VALUE       0.0156316633253993
    UNCORRECTED P-VALUE     0.000422477387172954
    FDR_RATE        0.43%
    EXPECTED_FALSE_POSITIVES        0.12
    NUM_ANNOTATIONS 9 of 19 in the list, vs 1002 of 7300 in the genome
    The genes annotated to this node are:
    MET1, MET11, STR3, MET2, ECM17, YIL074C, MET28, MET17, MET6

=head1 AUTHORS

Gavin Sherlock, sherlock@genome.stanford.edu

=cut