=head1 NAME
Bio::Prospect::LocalClient -- execute Prospect locally
$Id: LocalClient.pm,v 1.32 2003/11/18 19:45:45 rkh Exp $
=head1 SYNOPSIS
my $in = new Bio::SeqIO( -format=> 'Fasta', '-file' => $ARGV[0] );
my $po = new Bio::Prospect::Options( seq=>1, svm=>1, global_local=>1,
templates=>['1alu', '1bgc','1eera']);
my $pf = new Bio::Prospect::LocalClient( {options=>$po );
while ( my $s = $in->next_seq() ) {
my @threads = $pf->thread( $s );
}
=head1 DESCRIPTION
B<Bio::Prospect::LocalClient> is runs Prospect locally. It is intended to be
used to facilitate high-throughput protein sequence threading and as the
server-side component of B<Bio::Prospect::SoapClient>, with which it is API
compatible.
=head1 ROUTINES & METHODS
=cut
package Bio::Prospect::LocalClient;
use base Bio::Prospect::Client;
use warnings;
use strict;
use File::Temp qw( tempfile tempdir );
use Carp qw(cluck);
use IO::File;
use Bio::Prospect::Exceptions;
use Bio::Prospect::utilities;
use Bio::Prospect::ThreadSummary;
use Bio::Prospect::Init;
use Digest::MD5;
use vars qw( $VERSION );
$VERSION = sprintf( "%d.%02d", q$Revision: 1.32 $ =~ /(\d+)\.(\d+)/ );
#-------------------------------------------------------------------------------
# new()
#-------------------------------------------------------------------------------
=head2 new()
Name: new()
Purpose: constructor
Arguments: hash reference with following key/value pairs
options => Bio::Prospect::Options object (required)
Returns: Bio::Prospect::LocalClient
=cut
sub new(;%) {
my $self = shift->SUPER::new(@_);
$self->_setenv();
$self->_prepare_options();
$self->{'xmlCacheName'} = 'xmlCache'; # name of xml file cache
$self->{'sortCacheName'} = 'sortCache'; # name of sort file cache
return $self;
}
#-------------------------------------------------------------------------------
# thread()
#-------------------------------------------------------------------------------
=head2 thread()
Name: thread()
Purpose: return a list of Thread objects
Arguments: scalar sequence or Bio::PrimarySeqI-derived object
Returns: list of Bio::Prospect::Thread objects
=cut
sub thread($$) {
my ($self,$s) = @_;
if ( not defined $s or (ref $s and not $s->isa('Bio::PrimarySeqI')) ) {
throw Bio::Prospect::BadUsage(
"Bio::Prospect::LocalClient::thread() requires one Bio::PrimarySeqI subclass or " .
"scalar sequence argument" );
}
my $seq = ref $s ? $s->seq() : $s;
my $xfn = $self->_thread_to_file( $seq );
my $pf = new Bio::Prospect::File;
$pf->open( "<$xfn" ) || throw Bio::Prospect::RuntimeError("$xfn: $!\n");
$self->{'threads'} = [];
while( my $t = $pf->next_thread() ) {
push @{$self->{'threads'}}, $t;
}
return( @{$self->{'threads'}} );
}
#-------------------------------------------------------------------------------
# thread_summary()
#-------------------------------------------------------------------------------
=head2 thread_summary()
Name: thread_summary()
Purpose: return a list of ThreadSummary objects
Arguments: Bio::Seq object
Returns: list of rospect2::ThreadSummary objects
=cut
sub thread_summary($$) {
my ($self,$s) = @_;
my @summary;
foreach my $t ( $self->thread($s) ) {
push @summary, new Bio::Prospect::ThreadSummary( $t );
}
return( @summary );
}
#-------------------------------------------------------------------------------
# xml()
#-------------------------------------------------------------------------------
=head2 xml()
Name: xml()
Purpose: return xml string
Arguments: Bio::Seq object
Returns: string
=cut
sub xml($$) {
my ($self,$s) = @_;
my $xfn = $self->_thread_to_file( $s );
my $in = new IO::File "<$xfn" or throw
Bio::Prospect::RuntimeError( "can't open $xfn for reading");
my $xml='';
while(<$in>){ $xml .= $_; }
return( $xml );
}
#-------------------------------------------------------------------------------
# DEPRECATED METHODS - will be removed in subsequent releases.
#-------------------------------------------------------------------------------
sub score_summary($$) {
cluck("This function is deprecated on Oct-23-2003:\n");
my ($self,$s) = @_;
my $xfn = $self->thread_to_file( $s );
return Bio::Prospect::utilities::score_summary( $xfn );
}
sub thread_to_file($$) {
cluck("This function is deprecated on Oct-23-2003:\n");
return _thread_to_file($_[0],$_[1]);
}
#-------------------------------------------------------------------------------
# INTERNAL METHODS: not intended for use outside this module
#-------------------------------------------------------------------------------
=pod
=head1 INTERNAL METHODS & ROUTINES
The following functions are documented for developers' benefit. THESE
SHOULD NOT BE CALLED OUTSIDE OF THIS MODULE. YOU'VE BEEN WARNED.
=cut
#-------------------------------------------------------------------------------
# _get_svm_scores()
#-------------------------------------------------------------------------------
=head2 _get_svm_scores()
Name: _get_svm_scores()
Purpose: return a hash of svm scores from a prospect sort file
Arguments: sort filename
Returns: hash
=cut
sub _get_svm_scores($$) {
my ($self,$fn) = @_;
my %retval;
my $in = new IO::File $fn || throw Bio::Prospect::RuntimeError( "can't open $fn for reading" );
my @fld;
while(<$in>) {
next if m/^:Protein/;
@fld = split /\s+/;
$retval{$fld[0]} = $fld[3];
}
if ( scalar (keys %retval) == 0 ) {
throw Bio::Prospect::RuntimeError
( 'Sort file is empty',
"The sort file for this sequence is empty. sortProspect likely failed!",
"Execute sortProspect on the command-line and check output messages. sortProspect " .
"can fail because of erroneous characters in the output xml file (e.g. null character)."
);
}
close($in);
return %retval;
}
sub _thread_to_file($$)
{
my ($self,$s) = @_;
my $xfn;
my $seq = ref $s ? $s->seq() : $s;
# check the cache for a cached file cooresponding to this sequence.
# if available then return it rather than running prospect
my $cached = $self->_get_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'} );
if ( defined $cached and -e $cached ) {
warn("retrieved cache threading info $cached\n") if $ENV{DEBUG};
return $cached;
}
my $ifn = $self->_write_seqfile( $seq );
$xfn = $self->_thread1( $ifn );
unlink( $ifn );
# new version of prospect outputs svm score when threading. no
# longer need to run sortProspect in this case. for backwards
# compatibility, check the xml file for svmScore tag. If
# not present, then run sortProspect.
if ( $self->_hasSvmScore( $xfn ) ) {
print(STDERR "xml file ($xfn) already contains svm scores - skip sortProspect step\n") if $ENV{DEBUG};
$self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $xfn );
return $xfn;
} else {
print(STDERR "xml file ($xfn) doesn't contain svm scores - run sortProspect\n") if $ENV{DEBUG};
# run sortProspect so as to get svm score
my $sfn = $self->_sort1( $xfn );
# insert svm score into the prospect output
my $ffn = $self->_output_svm_score( $xfn, $sfn );
unlink( $xfn );
unlink( $sfn );
# cache the prospect output filename
$self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $ffn );
return $ffn;
}
=pod
=over
=item B<::_thread_to_file( Bio::Seq | scalar )>
Thread one sequence in the Bio::Seq object or the scalar string. The xml
ouptut filename is returned. Threading results are cached by sequence for
the lifetime of the LocalClient object. See also B<::thread>.
=back
=cut
}
sub _thread1($$)
{
my ($self,$ifn) = @_;
my $xfn = "$ifn.xml";
my @cl = @{$self->{commandline}};
$cl[1] = sprintf($cl[1],$ifn);
$cl[2] = sprintf($cl[2],$xfn);
print(STDERR "about to @cl\n") if $ENV{DEBUG};
if ( eval { system("@cl") } ) {
my $s = $?;
if ($s & 127) {
$s &= 127;
my $sn = Bio::Prospect::utilities::signame($s);
throw Bio::Prospect::RuntimeError
( 'failed to execute Prospect',
"received signal $s ($sn)" );
}
$s >>= 8;
throw Bio::Prospect::RuntimeError
( 'failed to execute Prospect',
"system(@cl) exited with status $s",
'check your prospect installation manually' );
}
my $fh = new IO::File;
$fh->open("<$xfn")
|| throw Bio::Prospect::Exception("Prospect failed",
"prospect completed but didn't create an output file");
while(<$fh>) { # ugh-prospect sometimes barfs
if (m/<scoreInfo>/) { # and completes with status 0
$fh->close(); return $xfn; } # (e.g., large sequences)
}
throw Bio::Prospect::Exception("Prospect failed",
"prospect completed but the output wasn't valid",
"prospect may fail if the sequence is "
."too large or there's not enough memory. Try "
."running the sequence manually.");
return undef;
=pod
=over
=item B<::_thread1( filename )>
Threads the fasta-formatted sequence in C<filename> which is passed
directly to prospect. The name of a temporary file which contains the raw
xml output is returned. This method will work with multiple sequences in
C<filename>, but other routines in this module will not understand
multi-query xml output reliably. Most callers should use thread()
instead.
=back
=cut
}
#-------------------------------------------------------------------------------
# _hasSvmScore()
#-------------------------------------------------------------------------------
=head2 _hasSvmScore()
Name: _hasSvmScore()
Purpose: check whether the prospect xml file already contains a svmScore tag
Arguments: prospect xml file
Returns: 1 (has svm score) or 0 (no svm score)
=cut
sub _hasSvmScore {
my ($self,$xmlFile) = @_;
my $in = new IO::File "$xmlFile" or
throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading");
my $retval = 0;
while(<$in>) {
if ( m/svmScore/ ) {
$retval = 1;
last;
}
}
$in->close();
return $retval;
}
#-------------------------------------------------------------------------------
# _output_svm_score()
#-------------------------------------------------------------------------------
=head2 _output_svm_score()
Name: _output_svm_score()
Purpose: output the svm score in the propsect output file
Arguments: prospect xml file, prospect sort file
Returns: prospect xml file with svm score
=cut
sub _output_svm_score {
my ($self,$xmlFile,$sortFile) = @_;
my %svm = $self->_get_svm_scores( $sortFile );
my $outFile = "$xmlFile.svm";
my $in = new IO::File "$xmlFile" or throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading");
my $out = new IO::File ">$outFile" or throw Bio::Prospect::RuntimeError("can't open $outFile for reading");
local $/ = '</threading>';
while(<$in>) {
next if ! m/threading/; # make sure that we have valid prospect thread
m#template="(\w+)"#;
my $t = $1;
if ( ! defined $svm{$t} or $svm{$t} eq '') {
throw Bio::Prospect::RuntimeError
( 'Unable to retrieve svm sort',
"no svm score for template=$t" );
}
s#(<rawScore>.*?</rawScore>)#$1\n<svmScore>$svm{$t}</svmScore>#g;
print $out $_;
}
close($in);
return( $outFile );
}
#-------------------------------------------------------------------------------
# _sort1()
#-------------------------------------------------------------------------------
=head2 _sort1()
Name: _sort1()
Purpose: run sortProspect on threading file
Arguments: prospect xml file
Returns: filename of sortProspect results
=cut
sub _sort1($$) {
my ($self,$xfn) = @_;
my $sfn = "$xfn.sort";
my $cmd = "sortProspect $xfn 2>/dev/null 1>$sfn";
print(STDERR "about to $cmd\n") if $ENV{DEBUG};
if ( eval { system("$cmd") } )
{
my $s = $?;
if ($s & 127)
{
$s &= 127;
my $sn = Bio::Prospect::utilities::signame($s);
throw Bio::Prospect::RuntimeError
( 'failed to execute Prospect',
"received signal $s ($sn)" );
}
$s >>= 8;
throw Bio::Prospect::RuntimeError
( 'failed to execute Prospect',
"system($cmd) exited with status $s",
'check your prospect installation manually' );
}
# sanity checks on the sort output??
return $sfn;
}
sub _setenv {
if (not -d $Bio::Prospect::Init::PROSPECT_PATH ) {
throw Bio::Prospect::Exception
( "PROSPECT_PATH is not set correctly",
"PROSPECT_PATH ($Bio::Prospect::Init::PROSPECT_PATH}) is not a valid directory",
"Check your prospect installation and set PROSPECT_PATH in Bio::Prospect::Init or as an environment variable" );
} else {
$ENV{'PROSPECT_PATH'} = $Bio::Prospect::Init::PROSPECT_PATH;
}
if (not -d $Bio::Prospect::Init::PDB_PATH) {
throw Bio::Prospect::Exception
( "PDB_PATH is not set correctly",
"PDB_PATH ($Bio::Prospect::Init::PDB_PATH) is not a valid directory",
"Check your prospect installation and set PDB_PATH in Bio::Prospect::Init or as an environment variable" );
} else {
$ENV{'PDB_PATH'} = $Bio::Prospect::Init::PDB_PATH;
}
}
sub _prepare_options($$) {
my $self = shift;
my $opts = $self->{options};
(ref $opts eq 'Bio::Prospect::Options')
|| throw Bio::Prospect::BadUsage('Bio::Prospect::Options argument is missing');
my @cl = ( "$Bio::Prospect::Init::PROSPECT_PATH/bin/prospect" );
if (exists $opts->{phd}) {
throw Exception::NotYetSupported
( "phd threading isn't implemented" );
} elsif (exists $opts->{ssp}) {
throw Exception::NotYetSupported
( "ssp threading isn't implemented" );
} elsif (exists $opts->{seq}) {
push( @cl, '-seqfile %s' );
} else {
throw Bio::Prospect::BadUsage("Bio::Prospect::Options doesn't specify input type");
}
push(@cl, '-o %s');
push(@cl, '-ncpus '.($opts->{ncpus}||2) );
push(@cl, '-freqfile',$opts->{freqfile} ) if ( exists $opts->{freqfile} );
push(@cl, '-reliab') if $opts->{zscore};
push(@cl, '-3d') if $opts->{'3d'};
push(@cl, $opts->{global_local} ? '-global_local' : '-global');
# template set selection
# ONE of -scop, -tfile, -templates (array), or -fssp (default)
if ($opts->{scop}) {
push(@cl, '-scop')
} elsif (exists $opts->{tfile}) {
push(@cl, '-tfile', $opts->{tfile})
} elsif (exists $opts->{templates}) {
my ($fh,$fn) = $self->_tempfile('lst');
$fh->print(join("\n",@{$opts->{templates}}),"\n");
$fh->close();
push(@cl, '-tfile', $fn);
} else {
push(@cl, '-fssp');
}
push(@cl, '2> /dev/null' ) unless (defined $ENV{DEBUG} and $ENV{DEBUG}>5);
push(@cl, '1>&2');
@{$self->{commandline}} = @cl;
return @cl;
=pod
=over
=item B<::_prepare_options()>
Prepares temporary files based on options (e.g., writes a temporary
`tfile') and generates an array of command line options in
@{$self->{commandline}}. Args 1 and 2 are input and output respectively
and MUST be sprintf'd before use. See thread_1_file().
=back
=cut
}
sub _write_seqfile($$)
{
my ($self,$seq) = @_;
throw Exception ('seq undefined') unless defined $seq;
my ($fh,$fn) = $self->_tempfile('fa');
$seq =~ s/\s//g;
my $len = length($seq);
$seq =~ s/.{60}/$&\n/g; # wrap at 60 cols
$fh->print( ">LocalClient /len=$len\n$seq\n");
$fh->close();
return $fn;
}
=pod
=head1 SEE ALSO
B<Bio::Prospect::Options>, B<Bio::Prospect::File>,
B<Bio::Prospect::Client>, B<Bio::Prospect::SoapClient>,
B<Bio::Prospect::Thread>, B<Bio::Prospect::ThreadSummary>
http://www.bioinformaticssolutions.com/
=cut
1;