The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
#
#   Usage: ./mrsblast -h
#
#   martin.senger@gmail.com
#   February 2010
#
# ABSTRACT: program running BLAST using the MRS Retrieval System
# PODNAME: MRS::Client
#-----------------------------------------------------------------------------
use warnings;
use strict;

our $VERSION = '1.0.1'; # VERSION

use FindBin;
use lib "$FindBin::Bin/../lib";
use lib "$FindBin::Bin/../lib/perl5";
use MRS::Client;

sub say { print @_, "\n"; }

sub get_usage {
    return <<"END_OF_USAGE";
Usage:
   mrsblast [options] -d <databank> -i <fasta_file>

where
   "fasta_file" is a file with one or more protein sequences
                in FASTA format, and
   "databank" is a databank ID to search against

where 'options' are:
   -e <endpoint>  URL of the MRS blast service
   -S <name>      service name of the MRS blast service
   -H <hostname>  endpoint host (with standard ports)
   -V <5|6>       version of the MRS service
   -E             show what endpoint and service name are used

   Parameters for Blast run:
   -c <float>     e-value cutoff (default 10.0)
   -w <size>      word size (default 3)
   -b <matrix>    scoring matrix (default BLOSUM62)
   -a <cost>      gap opening cost (default 11)
   -z <cost>      gap extension cost (default 1)
   -F             do NOT apply low complexity filter
   -g             do NOT perform gapped alignment
   -m <maxhits>   maximum number of returned hits
   -q <query>     MRS boolean query to limit the search space
   -P             do not run Blast; only show parameters

   Parameters for exploring Blast run/results:
   -J             start Blast but do not wait for it; print
                  the Job Id that can be used later (-j option)
   -j <job-ID>    use this job ID (do not run a new Blast)
   -t             how often (in seconds) to poll for results
                  (default 10; not used when -J or -j)
   -s             show Blast run status (only with -j)
   -r             show results (only with -j)
   -R             forget/remove/clear the job (only with -j)

   By default, the results are shown as hits with titles and
   scores. It can be changed by -x (making and XML output) or by -N
   (showing only statistics) or by -f (showing full results):
   -x <output>    create XML <output> file with results
   -N             do NOT show hits, only Blast statistics
   -f             show full results (hits anh HSPs)

   -h             this help
   -v             show version

END_OF_USAGE
}

# be prepare for command-line options/arguments
my @all_args = @ARGV;
use Getopt::Std;

use vars qw/ $opt_h $opt_v /;                       # general
use vars qw/ $opt_e $opt_S $opt_H $opt_E /;         # endpoints
use vars qw/ $opt_d $opt_i $opt_c $opt_w /;         # blats run
use vars qw/ $opt_b $opt_a $opt_z $opt_F $opt_g $opt_q /;
use vars qw/ $opt_m $opt_q $opt_P $opt_J $opt_j /;  # results,status
use vars qw/ $opt_t $opt_s $opt_x $opt_N $opt_f $opt_p $opt_r $opt_R/;
use vars qw/ $opt_V /;
my $switches = 'abcdeHijmpqSwVxz';   # switches taking an argument
getopt ($switches);

# help wanted?
if ($opt_h or @all_args == 0) {
    print get_usage;
    exit 0;
}

# print version and exit
if ($opt_v) {
    ## no critic
    no strict;    # because the $VERSION will be added only when
    no warnings;  # the distribution is fully built up
    print "$0 using MRS::Client version $MRS::Client::VERSION\n";
    exit(0);
}

# use UTF8 for output
binmode STDOUT, ":encoding(UTF-8)";

sub print_results {
    my $job = shift;
    my $format;
    if    ($opt_N) { $format = MRS::BlastOutputFormat->STATS; }
    elsif ($opt_x) { $format = MRS::BlastOutputFormat->XML; }
    elsif ($opt_f) { $format = MRS::BlastOutputFormat->FULL; }
    else {           $format = MRS::BlastOutputFormat->HITS; }
    my $results = $job->results ($format);
    return unless $results;
    if ($opt_x) {
        # write results to a file
        open my $xml, '>', $opt_x
            or die "Cannot create file '" . $opt_x . "': $!\n";
        print $xml $results;
        close $xml;
    } else {
        print $results;
    }
}

# create the main worker
my @args = ();
push (@args, blast_url => $opt_e) if defined $opt_e;
push (@args, host => $opt_H) if defined $opt_H;
push (@args, blast_service => $opt_S) if defined $opt_S;
push (@args, mrs_version => $opt_V) if defined $opt_V;
our $client = MRS::Client->new (@args);

# print environment (where to find server, etc.)
if (defined $opt_E) {
    say 'Blast URL:            ' . $client->blast_url       if $client->blast_url;;
    say 'Blast service name:   ' . $client->blast_service   if $client->blast_service;
    say 'Blast WSDL:           ' . $client->blast_wsdl      if $client->blast_wsdl;
}

# collect parameters for a blast run
my @run_args = ();
push (@run_args, db => $opt_d) if $opt_d;
push (@run_args, filter => 0) if $opt_F;
push (@run_args, gapped => 0) if $opt_g;
push (@run_args, expect => $opt_c) if defined $opt_c;
push (@run_args, word_size => $opt_w) if defined $opt_w;
push (@run_args, matrix => $opt_b) if $opt_b;
push (@run_args, open_cost => $opt_a) if defined $opt_a;
push (@run_args, extend_cost => $opt_z) if defined $opt_z;
push (@run_args, query => $opt_q) if $opt_q;
push (@run_args, max_hits => $opt_m) if defined $opt_m;
push (@run_args, program => $opt_p) if $opt_p;
push (@run_args, fasta_file => $opt_i) if $opt_i;

# show run parameters (including default values)
if ($opt_P) {
    my $job = MRS::Client::Blast::Job->_new (client => $client);
    print $job->_set_parameters (@run_args);
}

# play with an existing job ID
if ($opt_j) {
    my $job = $client->blast->job ($opt_j, @run_args);
    if ($opt_s) {
        say 'Status: ' . $job->status;
    }
    if ($opt_r or $opt_x or $opt_f or $opt_N) {
        print_results ($job);
    }
    if ($opt_R) {
        $client->blast->remove_job ($opt_j);
        say 'Job removed: ' . $opt_j;
    }
}

# real run
if (not $opt_P and not $opt_j) {
    my $sleep = ($opt_t or 10);
    $sleep = 10 unless $sleep =~ /^[+-]?\d+$/;

    my $job = $client->blast->run (@run_args);
    print STDERR 'JOB ID: ' . $job->id . ' [' . $job->status . "]\n";
    print $job;

    unless ($opt_J) {
        while (not $job->completed) {
            print STDERR 'Waiting for ' . $sleep . ' seconds... [status: ' . $job->status . "]\n";
            sleep $sleep;
        }
        say $job->error if $job->failed;
        print_results ($job);
    }
}



=pod

=head1 NAME

MRS::Client - program running BLAST using the MRS Retrieval System

=head1 VERSION

version 1.0.1

=head1 AUTHOR

Martin Senger <martin.senger@gmail.com>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2013 by Martin Senger, CBRC - KAUST (Computational Biology Research Center - King Abdullah University of Science and Technology) All Rights Reserved..

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut


__END__