The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl 

=head1 NAME

findPathToRoot.pl - This program returns all possible paths 
from a given CUI to the root.

=head1 SYNOPSIS

This program takes a CUI or a term and returns all of possible 
paths to the root.

=head1 USAGE

Usage: findPathToRoot.pl [OPTIONS] [CUI|TERM|ST]

=head1 INPUT

=head2 Required Arguments:

=head3 [CUI|TERM|ST]

A concept (CUI), a term or a semantic type (using the --st option) from the 
Unified Medical Language System. 

=head2 Optional Arguments:

=head3 --st 

The input is a semantic type and the path to root information is obtained 
from the semantic network. This can either be a TUI or the Abbrevation of 
the semantic type. 

=head3 --config FILE

This is the configuration file. The format of the configuration 
file is as follows:

SAB :: <include|exclude> <source1, source2, ... sourceN>

REL :: <include|exclude> <relation1, relation2, ... relationN>

RELA :: <include|exclude> <rela1, rela2, ... relaN>  (optional)

For example, if we wanted to use the MSH vocabulary with only 
the RB/RN relations, the configuration file would be:

SAB :: include MSH
REL :: include RB, RN
RELA :: include inverse_isa, isa

or 

SAB :: include MSH
REL :: exclude PAR, CHD

If you go to the configuration file directory, there will 
be example configuration files for the different runs that 
you have performed.

=head3 --infile

File containing a list of CUIs or terms. Each CUI or term 
is required to be on its own line. For example:

CUI1
term1
CUI2
CUI3
...

=head3 --debug

Sets the debug flag for testing

=head3 --username STRING

Username is required to access the umls database on MySql
unless it was specified in the my.cnf file at installation

=head3 --password STRING

Password is required to access the umls database on MySql
unless it was specified in the my.cnf file at installation

=head3 --hostname STRING

Hostname where mysql is located. DEFAULT: localhost

=head3 --socket STRING

The socket your mysql is using. DEFAULT: /tmp/mysql.sock

=head3 --database STRING        

Database contain UMLS DEFAULT: umls

=head3 --realtime

This option will not create a database of the path information
for all of concepts in the specified set of sources and relations 
in the config file but obtain the information for just the 
input concept

=head3 --forcerun

This option will bypass any command prompts such as asking 
if you would like to continue with the index creation. 

=head3 --verbose

This option will print out the table information to the 
config file that you specified.

=head3 --cuilist FILE

This option takes in a file containing a list of CUIs (one CUI 
per line) and stores only the path information for those CUIs 
rather than for all of the CUIs given the specified set of 
sources and relations

=head3 --info

This prints out the relation and source information between the 
CUIs in the path. This option is only available for the CUI 
network. 

=head3 --icpropagation FILE

Takes in a propagation file and then outputs the information 
content of the CUIs in the shortest path

=head3 --help

Displays the quick summary of program options.

=head3 --version

Displays the version information.

=head1 OUTPUT

Path(s) from given CUIor term to the root

=head1 SYSTEM REQUIREMENTS

=over

=item * Perl (version 5.8.5 or better) - http://www.perl.org

=back

=head1 AUTHOR

 Bridget T. McInnes, University of Minnesota

=head1 COPYRIGHT

Copyright (c) 2007-2009,

 Bridget T. McInnes, University of Minnesota
 bthomson at cs.umn.edu
    
 Ted Pedersen, University of Minnesota Duluth
 tpederse at d.umn.edu

 Siddharth Patwardhan, University of Utah, Salt Lake City
 sidd@cs.utah.edu
 
 Serguei Pakhomov, University of Minnesota Twin Cities
 pakh0002@umn.edu

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to:

 The Free Software Foundation, Inc.,
 59 Temple Place - Suite 330,
 Boston, MA  02111-1307, USA.

=cut

###############################################################################

#                               THE CODE STARTS HERE
###############################################################################

#                           ================================
#                            COMMAND LINE OPTIONS AND USAGE
#                           ================================

use UMLS::Interface;
use Getopt::Long;

eval(GetOptions( "version", "help", "forcerun", "debug", "infile=s", "username=s", "password=s", "hostname=s", "database=s", "socket=s", "config=s", "verbose", "debugpath=s", "cuilist=s", "realtime", "icpropagation=s", "info", "st")) or die ("Please check the above mentioned option(s).\n");


#  if help is defined, print out help
if( defined $opt_help ) {
    $opt_help = 1;
    &showHelp();
    exit;
}

#  if version is requested, show version
if( defined $opt_version ) {
    $opt_version = 1;
    &showVersion();
    exit;
}

# At least 1 term should be given on the command line.
if( (!(defined $opt_infile)) and (scalar(@ARGV) < 1) ) {
    print STDERR "No term was specified on the command line\n";
    &minimalUsageNotes();
    exit;
}

if( (defined $opt_info) && (defined $opt_st) ) {
    print STDERR "The --info option is not available for the semantic network.\n";
    &minimalUsageNotes();
    exit;
}
    
my $umls = "";

my %option_hash = ();

if(defined $opt_icpropagation) { 
    $option_hash{"icpropagation"} = $opt_icpropagation;
}
if(defined $opt_debug) {
    $option_hash{"debug"} = $opt_debug;
}
if(defined $opt_realtime) {
    $option_hash{"realtime"} = $opt_realtime;
}
if(defined $opt_config) {
    $option_hash{"config"} = $opt_config;
}
if(defined $opt_forcerun) {
    $option_hash{"forcerun"} = $opt_forcerun;
}
if(defined $opt_debugpath) {
    $option_hash{"debugpath"} = $opt_debugpath;
}
if(defined $opt_verbose) {
    $option_hash{"verbose"} = $opt_verbose;
}
if(defined $opt_cuilist) {
    $option_hash{"cuilist"} = $opt_cuilist;
}
if(defined $opt_username) {
    $option_hash{"username"} = $opt_username;
}
if(defined $opt_driver) {
    $option_hash{"driver"}   = $opt_driver;
}
if(defined $opt_database) {
    $option_hash{"database"} = $opt_database;
}
if(defined $opt_password) {
    $option_hash{"password"} = $opt_password;
}
if(defined $opt_hostname) {
    $option_hash{"hostname"} = $opt_hostname;
}
if(defined $opt_socket) {
    $option_hash{"socket"}   = $opt_socket;
}

$umls = UMLS::Interface->new(\%option_hash); 
die "Unable to create UMLS::Interface object.\n" if(!$umls);

my @inputarray = ();
if(defined $opt_infile) {
    open(FILE, $opt_infile) || die "Could not open infile $opt_infile\n";
    while(<FILE>) { 
	chomp; 
	$_=~s/^\s+//g;
	$_=~s/\s+$//g;
	push @inputarray, $_;
    }
}
else {
    my $input = shift;
    push @inputarray, $input;
}

foreach my $input (@inputarray) {
    my $term  = $input;
    
    my $c = undef;
    if(defined $opt_st) { 
	if($input=~/T[0-9]+/) { 
	    push @{$c}, $input;
	    $term = $umls->getStAbr($input);
	}
	else { 
	    push @{$c}, $umls->getStTui($input);
	}
    }
    else {
	if($input=~/C[0-9]+/) {
	    push @{$c}, $input;
	    $term = shift @{$umls->getTermList($input)};
	}
	else {
	    $c = $umls->getConceptList($input);
	}
    }
    
    my $printFlag = 0; 
    my $precision = 4;
    my $floatformat = join '', '%', '.', $precision, 'f';
   
    foreach my $cui (@{$c}) {
		
	#  make certain cui exists in this view
	
	if( (!defined $opt_st) && ($umls->exists($cui) == 0) ) { next; }
	
	my $paths = "";
	if(defined $opt_st) { 
	    $paths = $umls->stPathsToRoot($cui);
	}
	else {
	    $paths = $umls->pathsToRoot($cui);
	}
	
	if($#{$paths} < 0) {
	    print "There are no paths between $term ($cui) and the root.\n";
	    $printFlag = 1;
	}
	else {
	    print "The paths between $term ($cui) and the root:\n";
	    foreach  $path (@{$paths}) {
	    my @array = split/\s+/, $path;
	    print "  => ";
	    foreach my $i (0..$#array){
		my $element = $array[$i];
		my $t = "";
		if(defined $opt_st) { 
		    $t = $umls->getStAbr($element);
		}
		else {
		    ($t) = shift @{$umls->getTermList($element)}; 
		}
		print "$element ($t) ";
		if(defined $opt_icpropagation) {
		    my $value = $umls->getIC($element);
		    my $ic = sprintf $floatformat, $value;
		    print "($ic) ";
		}
		if( (defined $opt_info) and ($i < $#array) ) {
		    my $second = $array[$i+1];
		    my $relations = $umls->getRelationsBetweenCuis($element, $second);
		    print " => @{$relations} => ";
		}
	    } print "\n";
	    
	    $printFlag = 1;
	    }
	}
    }
    
    if(! ($printFlag) ) {
	print "There are no paths from the given $input to the root.\n";
    }
}

##############################################################################
#  function to output minimal usage notes
##############################################################################
sub minimalUsageNotes {
    
    print "Usage: findPathToRoot.pl [OPTIONS] [CUI|TERM]\n";
    &askHelp();
    exit;
}

##############################################################################
#  function to output help messages for this program
##############################################################################
sub showHelp() {

        
    print "This is a utility that takes as input a CUI or a \n";
    print "term and returns all possible paths to the root.\n\n";
  
    print "Usage: findPathToRoot.pl [OPTIONS] [CUI|TERM]\n\n";

    print "Options:\n\n";
                                     
    print "--st                     The input is a semantic type\n\n";

    print "--debug                  Sets the debug flag for testing\n\n";

    print "--username STRING        Username required to access mysql\n\n";

    print "--password STRING        Password required to access mysql\n\n";

    print "--hostname STRING        Hostname for mysql (DEFAULT: localhost)\n\n";

    print "--database STRING        Database contain UMLS (DEFAULT: umls)\n\n";
    
    print "--socket STRING          Socket used by mysql (DEFAULT: /tmp.mysql.sock)\n\n";

    print "--config FILE            Configuration file\n\n";

    print "--realtime               This option will not create a database of the\n";
    print "                         path information for all of concepts but just\n"; 
    print "                         obtain the information for the input concept\n\n";

    print "--forcerun               This option will bypass any command \n";
    print "                         prompts such as asking if you would \n";
    print "                         like to continue with the index \n";
    print "                         creation. \n\n";

    print "--debugpath FILE         This option prints out the path\n";
    print "                         information for debugging purposes\n\n";

    print "--verbose                This option prints out the table information\n";
    print "                         to a file in your config directory.\n\n";    

    print "--cuilist FILE           This option takes in a file containing a \n";
    print "                         list of CUIs (one CUI per line) and stores\n";
    print "                         only the path information for those CUIs\n"; 
    print "                         rather than for all of the CUIs\n\n";

    print "--info                   This prints out the relation and source \n";
    print "                         information between the CUIs in the path\n\n";

    print "--icpropagation FILE     This option returns the information content\n";
    print "                         of the CUIs in the path based on the counts\n";
    print "                         from the propogation file\n\n";

    print "--version                Prints the version number\n\n";
 
    print "--help                   Prints this help message.\n\n";
}

##############################################################################
#  function to output the version number
##############################################################################
sub showVersion {
    print '$Id: findPathToRoot.pl,v 1.25 2011/08/29 16:37:03 btmcinnes Exp $';
    print "\nCopyright (c) 2008, Ted Pedersen & Bridget McInnes\n";
}

##############################################################################
#  function to output "ask for help" message when user's goofed
##############################################################################
sub askHelp {
    print STDERR "Type findPathToRoot.pl --help for help.\n";
}