The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# $Id$
#
# BioPerl module for Bio::Tools::Run::StandAloneBlastPlus::BlastMethods
#
# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Cared for by Mark A. Jensen <maj -at- fortinbras -dot- us>
#
# Copyright Mark A. Jensen
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

Bio::Tools::Run::StandAloneBlastPlus::BlastMethods - Provides BLAST methods to StandAloneBlastPlus

=head1 SYNOPSIS

 # create a factory:
 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
    -db_name => 'testdb'
 );
 # get your results
 $result = $fac->blastn( -query => 'query_seqs.fas',
                         -outfile => 'query.bls',
                         -method_args => [ '-num_alignments' => 10 ] );

 $result = $fac->tblastx( -query => $an_alignment_object,
                          -outfile => 'query.bls',
                          -outformat => 7 );
 # do a bl2seq
 $fac->bl2seq( -method => 'blastp',
               -query => $seq_object_1,
               -subject => $seq_object_2 );

=head1 DESCRIPTION

This module provides the BLAST methods (blastn, blastp, psiblast,
etc.) to the L<Bio::Tools::Run::StandAloneBlastPlus> object.

=head1 USAGE

This POD describes the use of BLAST methods against a
L<Bio::Tools::Run::StandAloneBlastPlus> factory object. The object
itself has extensive facilities for creating, formatting, and masking
BLAST databases; please refer to
L<Bio::Tools::Run::StandAloneBlastPlus> POD for these details.

Given a C<StandAloneBlastPlus> factory, such as

 $fac = Bio::Tools::Run::StandAloneBlastPlus->new(
    -db_name => 'testdb'
 );

you can run the desired BLAST method directly from the factory object,
against the database currently attached to the factory (in the
example, C<testdb>). C<-query> is a required argument:

 $result = $fac->blastn( -query => 'query_seqs.fas' );

Here, C<$result> is a L<Bio::Search::Result::BlastResult> object.

Other details:

=over

=item * The blast output file can be named explicitly:

 $result = $fac->blastn( -query => 'query_seqs.fas',
                         -outfile => 'query.bls' );

=item * The output format can be specified:

 $result = $fac->blastn( -query => 'query_seqs.fas',
                         -outfile => 'query.bls',
                         -outformat => 7 ); #tabular

=item * Additional arguments to the method can be specified:

 $result = $fac->blastn( -query => 'query_seqs.fas',
                         -outfile => 'query.bls',
                         -method_args => [ '-num_alignments' => 10 ,
                                           '-evalue' => 100 ]);

=item * To get the name of the blast output file, do 

 $file = $fac->blast_out;

=item * To clean up the temp files (you must do this explicitly):

 $fac->cleanup;

=back

=head2 bl2seq()

Running C<bl2seq> is similar, but both C<-query> and C<-subject> are
required, and the attached database is ignored. The blast method must
be specified explicitly with the C<-method> parameter:

 $fac->bl2seq( -method => 'blastp',
               -query => $seq_object_1,
               -subject => $seq_object_2 );

Other parameters ( C<-method_args>, C<-outfile>, and C<-outformat> ) are valid. 

=head2 Return values

The return value is always a L<Bio::Search::Result::BlastResult>
object on success, undef on failure.

=head1 SEE ALSO

L<Bio::Tools::Run::StandAloneBlastPlus>, L<Bio::Tools::Run::BlastPlus>

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list.  Your participation is much appreciated.

  bioperl-l@bioperl.org                  - General discussion
http://bioperl.org/wiki/Mailing_lists  - About the mailing lists

=head2 Support

Please direct usage questions or support issues to the mailing list:

L<bioperl-l@bioperl.org>

rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
the web:

  http://redmine.open-bio.org/projects/bioperl/

=head1 AUTHOR - Mark A. Jensen

Email maj -at- fortinbras -dot- us

Describe contact details here

=head1 CONTRIBUTORS

Additional contributors names and emails here

=head1 APPENDIX

The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _

=cut

# Let the code begin...

# note: providing methods directly to the namespace...
package Bio::Tools::Run::StandAloneBlastPlus;
use strict;
use warnings;

use Bio::SearchIO;
use lib '../../../..';
use Bio::Tools::Run::BlastPlus;
use File::Temp;
use File::Copy;
use File::Spec;
our @BlastMethods = qw( blastp blastn blastx tblastn tblastx 
                       psiblast rpsblast rpstblastn );

=head2 run()

 Title   : run
 Usage   : 
 Function: Query the attached database using a specified blast
           method
 Returns : Bio::Search::Result::BlastResult object
 Args    : key => value:
           -method => $method [blastp|blastn|blastx|tblastx|tblastn|
                               rpsblast|psiblast|rpstblastn]
           -query => $query_sequences (a fasta file name or BioPerl sequence
                      object or sequence collection object)
           -outfile => $blast_report_file (optional: default creates a tempfile)
           -outformat => $format_code (integer in [0..10], see blast+ docs)
           -method_args => [ -key1 => $value1, ... ] (additional arguments
                         for the given method)

=cut

sub run {
    my $self = shift;
    my @args = @_;
    my ($method, $query, $outfile, $outformat, $method_args) = $self->_rearrange( [qw( 
                                         METHOD
                                         QUERY
                                         OUTFILE
                                         OUTFORMAT
                                         METHOD_ARGS
                                         )], @args);
    my $ret;
    my (%blast_args, %usr_args);
    
    unless ($method) {
	$self->throw("Blast run: method not specified, use -method");
    }
    unless ($query) {
	$self->throw("Blast run: query data required, use -query");
    }
    unless ($outfile) { # create a tempfile name
	my $fh = File::Temp->new(TEMPLATE => 'BLOXXXXX',
				 DIR => $self->db_dir,
				 UNLINK => 0);
	$outfile = $fh->filename;
	$fh->close;
	$self->_register_temp_for_cleanup($outfile);
    }

    if ($outformat) { 
	unless ($outformat =~ /^"?[0-9]{1,2}/) {
	    $self->throw("Blast run: output format code should be integer 0-10");
	}
	$blast_args{'-outfmt'} = $outformat;
    }

    if ($method_args) {
	$self->throw("Blast run: method arguments must be name => value pairs") unless !(@$method_args % 2);
	%usr_args = @$method_args;
    }
    # make db if necessary
    $self->make_db unless $self->check_db or $self->is_remote or $usr_args{'-subject'} or $usr_args{'-SUBJECT'}; # no db nec if this is bl2seq...
    $self->{_factory} = Bio::Tools::Run::BlastPlus->new( -command => $method );
    if (%usr_args) {
	my @avail_parms = $self->factory->available_parameters('all');
	while ( my( $key, $value ) = each %usr_args ) {
	    $key =~ s/^-//;
	    unless (grep /^$key$/, @avail_parms) {
		$self->throw("Blast run: parameter '$key' is not available for method '$method'");
	    }
	}
    }

    # remove a leading ./ on remote databases. Something adds that in the
    # factory, easier to remove here.
    my $db = $self->db_path;
    if ($self->is_remote) {
        $db =~ s#^\./##;
    }
    $blast_args{-db} = $db;
    $blast_args{-query} = $self->_fastize($query);
    $blast_args{-out} = $outfile;
    # user arg override
    if (%usr_args) {
	$blast_args{$_} = $usr_args{$_} for keys %usr_args;
    }
    # override for bl2seq;
    if ($blast_args{'-db'} && $blast_args{'-subject'}) {
	delete $blast_args{'-db'};
    }
    $self->factory->set_parameters( %blast_args );
    $self->factory->no_throw_on_crash( $self->no_throw_on_crash );
    my $status = $self->_run;

    return $status unless $status;
    # kludge to demodernize the bl2seq output
    if ($blast_args{'-subject'}) {
	unless (_demodernize($outfile)) {
	    $self->throw("Ack! demodernization failed!");
	}
    }

    # if here, success 
    for ($method) {
	m/^(t|psi|rps|rpst)?blast[npx]?/ && do { 
	    $ret = Bio::SearchIO->new(-file => $outfile);

	    $self->{_blastout} = $outfile;
	    $self->{_results} = $ret;
	    $ret = $ret->next_result;
	    last;
	};
	do {
	    1; # huh?
	};
    }
    
    return $ret;
}

=head2 bl2seq()

 Title   : bl2seq
 Usage   : 
 Function: emulate bl2seq using blast+ programs
 Returns : Bio::Search::Result::BlastResult object
 Args    : key => value
           -method => $blast_method [blastn|blastp|blastx|
                                     tblastn|tblastx]
           -query => $query (fasta file or BioPerl sequence object
           -subject => $subject (fasta file or BioPerl sequence object)
           -outfile => $blast_report_file
           -method_args => [ $key1 => $value1, ... ] (additional method 
                        parameters)

=cut

sub bl2seq {
    my $self = shift;
    my @args = @_;
    my ($method, $query, $subject, $outfile, $outformat, $method_args) = $self->_rearrange( [qw( 
                                         METHOD
                                         QUERY
                                         SUBJECT
                                         OUTFILE
                                         OUTFORMAT
                                         METHOD_ARGS
                                         )], @args);

    unless ($method) {
	$self->throw("bl2seq: blast method not specified, use -method");
    }
    unless ($query) {
	$self->throw("bl2seq: query data required, use -query");
    }
    unless ($subject) {
	$self->throw("bl2seq: subject data required, use -subject");
    }
    $subject = $self->_fastize($subject);

    my @run_args;
    if ($method_args) {
	@run_args = @$method_args;
    }
    return $self->run( -method => $method,
		       -query => $query,
		       -outfile => $outfile, 
		       -outformat => $outformat,
		       -method_args => [ @run_args, '-subject' => $subject ]
	);

}

=head2 next_result()

 Title   : next_result
 Usage   : $result = $fac->next_result;
 Function: get the next BLAST result
 Returns : Bio::Search::Result::BlastResult object
 Args    : none

=cut

sub next_result() {
    my $self = shift;
    return unless $self->{_results};
    return $self->{_results}->next_result;
}

=head2 rewind_results()

 Title   : rewind_results
 Usage   : $fac->rewind_results;
 Function: rewind BLAST results
 Returns : true on success
 Args    : 

=cut

sub rewind_results {
    my $self = shift;
    return unless $self->blast_out;
    $self->{_results} = Bio::SearchIO->new( -file => $self->blast_out );
    return 1;
}


=head2 blast_out()

 Title   : blast_out
 Usage   : $file = $fac->blast_out
 Function: get the filename of the blast report file
 Returns : scalar string
 Args    : none

=cut

sub blast_out { shift->{_blastout} }

# =head2 _demodernize()

#  Title   : _demodernize
#  Usage   : 
#  Function: Ha! Wouldn't you like to know!
#  Returns : 
#  Args    : 

# =cut

sub _demodernize {
    my $file = shift;
    my $tf = File::Temp->new();
    open (my $f, $file);
    while (<$f>) {
	s/^Subject=\s+/>/;
	print $tf $_;
    }
    $tf->close;
    copy($tf->filename, $file);
}
1;