The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# -*-Perl-*- Test Harness script for Bioperl
# $Id: SearchIO_infernal.t 14672 2008-04-22 21:42:50Z cjfields $

use strict;
use warnings;

BEGIN {
    use lib '.';
    use Bio::Root::Test;

    test_begin(-tests => 496);

    use_ok('Bio::SearchIO');
}

my ($in, $result, $iter, $hit, $hsp, $algorithm, $meta);

### Infernal v. 1.1 ###
{
    # one query vs one database sequence report
    $in = Bio::SearchIO->new(
        -format  => 'infernal',
        -file    => test_input_file('cmsearch_output.txt')
    );
    $result = $in->next_result;
    isa_ok($result, 'Bio::Search::Result::ResultI');
    is( ref($result), 'Bio::Search::Result::INFERNALResult', 'Check for the correct Result reference type');
    is( $result->algorithm,         'CMSEARCH',    'Check algorithm' );
    is( $result->algorithm_version, '1.1.1',       'Check cmsearch algorithm version' );
    is( $result->cm_name,           'RF00174.cm',  'Check cm_name');
    is( $result->database_name,     'NC_000913.fna','Check database_name' );
    is( $result->database_entries,  1,              'Check database_entries' );
    is( $result->database_letters,  9283304,        'Check database_letters' );
    is( $result->query_name,        'Cobalamin',    'Check query_name' );
    is( $result->query_length,      191,            'Check query_length' );
    is( $result->query_accession,   'RF00174',      'Check query_accession' );
    is( $result->query_description, '',             'Check query_description' );
    is( $result->num_hits(),        2,              'Check num_hits' );

    # 1st hit

    $hit = $result->next_hit;
    is( ref($hit), 'Bio::Search::Hit::ModelHit', 'Check for the correct hit reference type' );
    is( $hit->algorithm, 'CMSEARCH', "Hit algorithm");
    is( $hit->name,              'gi|556503834|ref|NC_000913.3|', 'Check hit name' );
    is( $hit->description,       'Escherichia coli str. K-12 substr. MG1655, complete genome', 'Check hit description' );
    is( $hit->length,             0,       'Check hit length' );
    is( $hit->score,              98.2,    'Check hit score' );
    is( $hit->bits,               98.2,    'Check hit bits' );
    is( $hit->num_hsps,           1,       'Check number of HSPs' );
    float_is( $hit->significance, 8.7e-16, 'Check hit significance' );
    is($hit->rank,                1,       'Check hit rank' );

    $hsp = $hit->next_hsp;
    is( ref($hsp), 'Bio::Search::HSP::ModelHSP', 'Check for correct hsp reference type' );
    isa_ok( $hsp, 'Bio::Search::HSP::HSPI' );
    isa_ok( $hsp->get_aln, 'Bio::Align::AlignI' );
    isa_ok( $hsp->hit,     'Bio::SeqFeature::Similarity', "Check for hsp hit isa seqfeature similarity" );

    is( $hsp->hit->seq_id(),   'gi|556503834|ref|NC_000913.3|', 'Check for HSP hit seq_id' );
    is( $hsp->query->seq_id(), 'Cobalamin', 'Check for HSP query seq_id' );
    is( $hsp->start('query'),     1,       'Check hsp query start' );
    is( $hsp->end('query'),       191,     'Check hsp query end' );
    is( $hsp->start('hit'),       4163384, 'Check hsp hit start' );
    is( $hsp->end('hit'),         4163574, 'Check hsp hit end' );
    is( $hsp->score,              98.2,    'Check hsp score' );
    is( $hsp->bits,               98.2,    'Check hsp bits' );
    float_is( $hsp->significance, 8.7e-16, 'Check hsp evalue' );

    is( $hsp->length('query'), 191, 'Check for hsp query length' );
    is( $hsp->length('hit'),   191, 'Check for hsp hit length' );
    is( $hsp->length,          207, 'Check for hsp total length' );
    is( $hsp->gaps('query'),   16,   'Check for hsp query gaps' );
    is( $hsp->gaps('hit'),     16,   'Check for hsp hit gaps' );
    is( $hsp->gaps,            32,   'Check for hsp total gaps' );
    is( $hsp->strand('hit'),      1,       'Check hsp hit strand' );
    
    # 2nd hit 
     
    $hit = $result->next_hit;
    is( $hit->name,              'gi|556503834|ref|NC_000913.3|',                 'Check hit name' );
    is( $hit->description,       'Escherichia coli str. K-12 substr. MG1655, complete genome','Check hit description' );
    is( $hit->score,              8.4,  'Check hit score' );
    is( $hit->raw_score,          8.4, "Check hit raw_score");
    is( $hit->bits,               8.4,    'Check hit bits' );
    float_is( $hit->significance, 0.63, 'Check hit significance' );
    is( $hit->length,             0,  'Check hit length' );
    is($hit->rank, 2, "Hit rank");

    $hsp = $hit->next_hsp;
    is( $hsp->hit->seq_id(),   'gi|556503834|ref|NC_000913.3|', 'Check for hit seq_id' );
    is( $hsp->query->seq_id(), 'Cobalamin', 'Check for query seq_id' );
    is( $hsp->start('query'),     1,       'Check hsp query start' );
    is( $hsp->end('query'),       191,     'Check hsp query end' );
    is( $hsp->start('hit'),       4593356, 'Check hsp hit start' );
    is( $hsp->end('hit'),         4593565, 'Check hsp hit end' );
    is( $hsp->score,              8.4,     'Check hsp score' );
    is( $hsp->bits,               8.4,     'Check hsp bits' );
    float_is( $hsp->significance, 0.63,    'Check hsp evalue' );

    is( $hsp->gaps('query'),   67,   'Check for hsp query gaps' );
    is( $hsp->gaps('hit'),     48,   'Check for hsp hit gaps' );
    is( $hsp->gaps,            115,   'Check for hsp total gaps' );
    is( $hsp->strand('hit'),      1,       'Check hsp hit strand' );

    is( $hsp->noncanonical_string,
        '               v                                    v             v   v        v       v  v       vvvvvv     vvv    vvv                                   vvv      vvvvvvvvv                                              v  v                    v               ',
        'Check for NC string');
    is( $hsp->meta,
        ':::::::::::::::[[[[[[,<<<____________>>>,,,,,(((,,,<<<<<_______>>>>>,,<<<____>>>,<<<---<<<<.------<<<<<<-----<<<-<<<<<<_____............................._>>>>>>--->>>>>>>>>----------....................................>>>>----.>>>,,,,)))]]]]]]:::::::::::::::',
        'Check for CS string');
    is( $hsp->query_string,
        'uuaaauugaaacgaugauGGUuccccuuuaaagugaaggguuAAaaGGGAAcccGGUGaaAaUCCgggGCuGcCCCCgCaACuGUAAgcGg.agagcaccccccAauAaGCCACUggcccgcaa.............................gggccGGGAAGGCggggggaaggaaugac....................................cCgcgAGc.CaGGAGACCuGCCaucaguuuuugaaucucc',
        'Check for query string');
    is( $hsp->homology_string,
        '  A AUU+A+++    :UGG  :C +U ++  G     G: +AA : GGAA:  G         C  :+  GCCCCCGC +C GU+A ::     GCA ++ ++ A   GCCA   G+C G                                                                                                  :: +AG+ C GGA AC : CCA:  + + + + AU    ',
        'Check for homology string');
    is( $hsp->hit_string,
        'GGAGAUUAAUCUUUACGUGGG-UCGUUGAUCGG---CUGACGAACCAGGAAGAUGU-------ACGCCAGUGCCCCCGCUGCGGUGACGCAa-CCGCAGAUGAUUAGU-GCCA---GACGG---aaugagugggugguaucaacaauaaaacc-----------------------------aguaaugaucggcgcaaaagaggcgcagaugaagcuGGCAAAGUuCUGGAUACUGCCCACCGACGCAGUCAUGCGA',
        'Check for hit string');
    is( $hsp->posterior_string,
        '*********************.88877554444...5777779*********9996.......7999********************88873.333333333333333.4544...33333...44566655444444444444444444444.............................566666666666666666666666677777777776788899966*******************************',
        'Check for posterior probability string');

    isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
    isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
    ($meta) = $hsp->feature1->get_tag_values('meta');
    is($meta, ':::::::::::::::[[[[[[,<<<____________>>>,,,,,(((,,,<<<<<_______>>>>>,,<<<____>>>,<<<---<<<<.------<<<<<<-----<<<-<<<<<<_____............................._>>>>>>--->>>>>>>>>----------....................................>>>>----.>>>,,,,)))]]]]]]:::::::::::::::', "Check hsp feature1 get_tag_values");
    ($meta) = $hsp->feature2->get_tag_values('meta');
    is($meta, ':::::::::::::::[[[[[[,<<<____________>>>,,,,,(((,,,<<<<<_______>>>>>,,<<<____>>>,<<<---<<<<.------<<<<<<-----<<<-<<<<<<_____............................._>>>>>>--->>>>>>>>>----------....................................>>>>----.>>>,,,,)))]]]]]]:::::::::::::::', "Check hsp feature2 get_tag_values");

    $result = $in->next_result;
    is( $result, undef, 'Check for undefined result' );


    # multi query vs multi sequence database report
    $in = Bio::SearchIO->new(
        -format  => 'infernal',
        -file    => test_input_file('cmsearch.multi.out')
    );
      # 1st query
    $result = $in->next_result;
    is( $result->num_hits,   12, 'Check result num_hits - multi report');
    is( $result->query_name, 'tRNA5', 'Check result query_name - multi report');
    $hit = $result->next_hit;
    is( $hit->length,        72, 'Check hit length - multi report' );

      # 2nd query
    $result = $in->next_result;
    is( $result->num_hits,   1, 'Check result#2 num_hits - multi report');
    is( $result->query_name, 'Cobalamin', 'Check result#2 query_name - multi report');
    $hit = $result->next_hit;
    is( $hit->length,        0, 'Check result#2 hit length - multi report' );
    $hsp = $hit->next_hsp;
    is( $hsp->strand('hit'), -1, 'Check result#2 hsp hit strand - multi report');


    # report with no hits
    $in = Bio::SearchIO->new(
        -format  => 'infernal',
        -file    => test_input_file('cmsearch.nohit.out')
    );
    $result = $in->next_result;
    is( $result->cm_name, 'Cobalamin.c.cm', 'Check cm_name' );
    $hit = $result->next_hit;
    is( $hit, undef, 'Check for undefined hit' );

}


### Infernal v. 1.0 ####

my $searchio = Bio::SearchIO->new( -format => 'infernal',
                                -file   => test_input_file('test2.infernal'),
                                -model => 'tRNAtest',
                                -query_acc => 'RF01234',
                                -query_desc => 'tRNA',
                               );

$result = $searchio->next_result;
isa_ok($result, 'Bio::Search::Result::ResultI');
is($result->algorithm, 'CMSEARCH', "Result");
is($result->algorithm_reference, undef, "Result reference");
is($result->algorithm_version, '1.0', "Result version");
is($result->available_parameters, 0, "Result parameters");
is($result->available_statistics, 0, "Result statistics");
is($result->database_entries, '', "Result entries");
is($result->database_letters, 600000, "Result letters");
is($result->database_name, 'tosearch.300Kb.db',
   "Result database_name");
is($result->num_hits, 1, "Result num_hits");
is($result->program_reference, undef, "Result program_reference");
is($result->query_accession, 'RF01234', "Result query_accession");
is($result->query_description, 'tRNA', "Result query_description");
is($result->query_length, 72, "Result query_length");
is($result->query_name, 'trna.5-1', "Result query_name");

$hit = $result->next_hit;

isa_ok($hit, 'Bio::Search::Hit::HitI');
is($hit->ncbi_gi, '', "Hit GI");
is($hit->accession, 'example', "Hit accession");
is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
is($hit->bits, '78.06', "Hit bits");
is($hit->description, '', "Hit description"); # no hit descs yet
is($hit->locus, '', "Hit locus");
is($hit->n, 3, "Hit n");
is($hit->name, 'example', "Hit name");
is($hit->num_hsps, 3, "Hit num_hsps");

# These Bio::Search::Hit::HitI methods are currently unimplemented in
# Bio::Search::Hit::ModelHit; they may be integrated over time but will require
# some reconfiguring for Model-based searches

# these need to be replaced by dies_ok() or warnings_like()
warning_like { $hit->length_aln() }
    qr'length_aln not implemented for Model-based searches',
    "Hit length_aln() not implemented";
warning_like {$hit->num_unaligned_hit}
    qr'num_unaligned_hit/num_unaligned_sbjct not implemented for Model-based searches',
    "Hit num_unaligned_hit() not implemented";
warning_like {$hit->num_unaligned_query}
    qr'num_unaligned_query not implemented for Model-based searches',
    "Hit num_unaligned_query() not implemented";
warning_like {$hit->num_unaligned_sbjct}
    qr'num_unaligned_hit/num_unaligned_sbjct not implemented for Model-based searches',
    "Hit num_unaligned_sbjct() not implemented";
warning_like {$hit->start}
    qr'start not implemented for Model-based searches',
    'Hit start not implemented';
warning_like {$hit->end}
    qr'end not implemented for Model-based searches',
    'Hit end not implemented';
warning_like {$hit->strand}
    qr'strand not implemented for Model-based searches',
    'Hit strand not implemented';
warning_like {$hit->logical_length}
    qr'logical_length not implemented for Model-based searches',
    'Hit logical_length not implemented';
warning_like {$hit->frac_aligned_hit}
    qr'frac_aligned_hit not implemented for Model-based searches',
    'Hit frac_aligned_hit not implemented';
warning_like {$hit->frac_aligned_query}
    qr'frac_aligned_query not implemented for Model-based searches',
    'Hit frac_aligned_query not implemented';
warning_like {$hit->frac_conserved}
    qr'frac_conserved not implemented for Model-based searches',
    'Hit frac_conserved not implemented';
warning_like {$hit->frac_identical}
    qr'frac_identical not implemented for Model-based searches',
    'Hit frac_identical not implemented';
warning_like {$hit->matches}
    qr'matches not implemented for Model-based searches',
    'Hit matches not implemented';
warning_like {$hit->gaps}
    qr'gaps not implemented for Model-based searches',
    'Hit gaps not implemented';
warning_like {$hit->frame}
    qr'frame not implemented for Model-based searches',
    'Hit frame not implemented';
warning_like {$hit->range}
    qr'range not implemented for Model-based searches',
    'Hit range not implemented';
warning_like {$hit->seq_inds}
    qr'seq_inds not implemented for Model-based searches',
    'Hit seq_inds not implemented';

is($hit->length, 0, "Hit length");
is($hit->overlap, 0, "Hit overlap");
is($hit->query_length, 72, "Hit query_length");
is($hit->rank, 1, "Hit rank");
is($hit->raw_score, '78.06', "Hit raw_score");
is($hit->score, '78.06', "Hit score");
float_is($hit->p, '2.906e-26', "Hit p");
float_is($hit->significance, '3.133e-21');

$hsp = $hit->next_hsp;
isa_ok($hsp, 'Bio::Search::HSP::HSPI');
is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
float_is($hsp->evalue, '3.133e-21');
isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
($meta) = $hsp->feature1->get_tag_values('meta');
is($meta, '(((((((,,<<<<___.____>>>>,<<<<<_______>>>>>,,,,,<<<<<_______>>>>>))))))):');
($meta) = $hsp->feature2->get_tag_values('meta');
is($meta, '(((((((,,<<<<___.____>>>>,<<<<<_______>>>>>,,,,,<<<<<_______>>>>>))))))):');

is($hsp->frame('query'), 0, "HSP frame");
is($hsp->gaps, 1, "HSP gaps");
is($hit->length, 0, "Hit length");
isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
is($hsp->hit_string,
   'GCGGAUUUAGCUCAGUuGGGAGAGCGCCAGACUGAAGAUCUGGAGGUCCUGUGUUCGAUCCACAGAAUUCGCA',
   "HSP hit_string");
is($hsp->homology_string,
   'GC::A::UAGC:CAGU GG AG:GCGCCAG:CUG+++A:CUGGAGGUCC:G:GUUCGAU C:C:G::U::GCA',
   "HSP homology_string");
is($hsp->hsp_group, undef, "HSP hsp_group");
is($hsp->hsp_length, 73, "HSP hsp_length");
is($hsp->length, 73, "HSP length");
is($hsp->links, undef, "HSP links");
is($hsp->n, 1, "HSP n");
float_is($hsp->pvalue, 2.906e-26, "HSP pvalue");
isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
is($hsp->query_string,
   'gCcgacAUaGcgcAgU.GGuAgcgCgccagccUgucAagcuggAGgUCCgggGUUCGAUuCcccGUgucgGca',
   "HSP query_string");
is($hsp->range, 72, "HSP range");
is($hsp->rank, 1, "HSP rank");
float_is($hsp->significance, 3.133e-21);
is($hsp->end, 72, "HSP end");
float_is($hsp->expect, '3.133e-21', "HSP expect");

# These Bio::Search::HSP::HSPI methods are currently unimplemented in
# Bio::Search::HSP::ModelHSP; they may be integrated over time but will require
# some reconfiguring for Model-based searches

warning_like {$hsp->seq_inds}
    qr'seq_inds not implemented for Model-based searches',
    'HSP seq_inds not implemented';
warning_like {$hsp->matches}
    qr'matches not implemented for Model-based searches',
    'HSP matches not implemented';
warning_like {$hsp->frac_conserved}
    qr'frac_conserved not implemented for Model-based searches',
    'HSP frac_conserved not implemented';
warning_like {$hsp->frac_identical}
    qr'frac_identical not implemented for Model-based searches',
    'HSP frac_identical not implemented';
warning_like {$hsp->num_conserved}
    qr'num_conserved not implemented for Model-based searches',
    'HSP num_conserved not implemented';
warning_like {$hsp->num_identical}
    qr'num_identical not implemented for Model-based searches',
    'HSP num_identical not implemented';
warning_like {$hsp->percent_identity}
    qr'percent_identity not implemented for Model-based searches',
    'HSP percent_identity not implemented';
warning_like {$hsp->cigar_string}
    qr'cigar_string not implemented for Model-based searches',
    'HSP cigar_string not implemented';
warning_like {$hsp->generate_cigar_string}
    qr'generate_cigar_string not implemented for Model-based searches',
    'HSP cigar_string not implemented';

isa_ok($hsp->seq, 'Bio::LocatableSeq');
is($hsp->seq_str,
   'gCcgacAUaGcgcAgU.GGuAgcgCgccagccUgucAagcuggAGgUCCgggGUUCGAUuCcccGUgucgGca',
   "HSP seq_str");
is($hsp->start, 1, "HSP start");
is($hsp->custom_score, undef, "HSP custom_score");
is($hsp->meta,
   '(((((((,,<<<<___.____>>>>,<<<<<_______>>>>>,,,,,<<<<<_______>>>>>))))))):',
   "HSP meta");
is($hsp->strand('hit'), 1, "HSP strand");

$hsp = $hit->next_hsp;
isa_ok($hsp, 'Bio::Search::HSP::HSPI');
is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
float_is($hsp->evalue, 0.6752);
isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
is($hsp->frame('query'), 0, "HSP frame");
is($hsp->gaps, 4, "HSP gaps");
# infernal can return alignment data
isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
is($hsp->hit_string,
   'UCUGCUAUGGCGUAAUGGCCACGCGC----CCAUCAACAAAGAUAUC*[19]*UAACAGGA',
   "HSP hit_string");
is($hsp->homology_string,
   ' C:G :AU+GCG:A+UGG  :CGCGC    C  UCAA +++GA +UC      U: C:G A',
   "HSP homology_string");
is($hsp->hsp_group, undef, "HSP hsp_group");
is($hsp->hsp_length, 73, "HSP hsp_length");
is($hsp->length, 73, "HSP length");
is($hsp->links, undef, "HSP links");
is($hsp->n, 1, "HSP n");
float_is($hsp->pvalue, 6.263e-06, "HSP pvalue");
isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
is($hsp->query_string,
   'gCcgacAUaGcgcAgUGGuAgcgCgccagccUgucAagcuggAGgUC*[17]*UgucgGca',
   "HSP query_string");
is($hsp->range, 72, "HSP range");
is($hsp->rank, 2, "HSP rank");
float_is($hsp->significance, 0.6752);
is($hsp->end, 72, "HSP end");
float_is($hsp->expect, 0.6752, "HSP expect");
isa_ok($hsp->seq, 'Bio::LocatableSeq');
# this should probably default to the hit string
is($hsp->seq_str,
   'gCcgacAUaGcgcAgUGGuAgcgCgccagccUgucAagcuggAGgUC*[17]*UgucgGca',
   "HSP seq_str");
is($hsp->start, 1, "HSP start");
is($hsp->custom_score, undef, "HSP custom_score");
is($hsp->meta,
   '(((((((,,<<<<_______>>>>,<<<<<_______>>>>>,,,,,~~~~~~))))))):',
   "HSP meta");
is($hsp->strand('hit'), 1, "HSP strand");

### Infernal pre-v. 1.0 ####

$searchio = Bio::SearchIO->new( -format => 'infernal',
                                -file   => test_input_file('test.infernal'),
                                # version is reset to the correct one by parser
                                -version => 0.7,
                                -model => 'Purine',
                                -query_acc => 'RF00167',
                                -query_desc => 'Purine riboswitch',
                                -database => 'b_sub.fas',
                                -hsp_minscore => 40,
                                -convert_meta => 0,
                               );

$result = $searchio->next_result;
isa_ok($result, 'Bio::Search::Result::ResultI');
$algorithm = $result->algorithm;
is($result->algorithm, 'CMSEARCH', "Result $algorithm");
is($result->algorithm_reference, undef, "Result $algorithm reference");
is($result->algorithm_version, 0.7, "Result $algorithm version");
is($result->available_parameters, 0, "Result parameters");
is($result->available_statistics, 0, "Result statistics");
is($result->database_entries, '', "Result entries");
is($result->database_letters, '', "Result letters");
is($result->database_name, 'b_sub.fas', "Result database_name");
is($result->num_hits, 2, "Result num_hits");
is($result->program_reference, undef, "Result program_reference");
is($result->query_accession, 'RF00167', "Result query_accession");
is($result->query_description, 'Purine riboswitch', "Result query_description");
is($result->query_length, 102, "Result query_length");
is($result->query_name, 'Purine', "Result query_name");

$hit = $result->next_hit;

isa_ok($hit, 'Bio::Search::Hit::HitI');
is($hit->ncbi_gi, '2239287', "Hit GI");
is($hit->accession, 'U51115.1', "Hit accession");
is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
is($hit->bits, 81.29, "Hit bits");
is($hit->description, '', "Hit description"); # no hit descs yet
is($hit->locus, 'BSU51115', "Hit locus");
is($hit->n, 2, "Hit n");
is($hit->name, 'gi|2239287|gb|U51115.1|BSU51115', "Hit name");
is($hit->num_hsps, 2, "Hit num_hsps");

# p() works but there are no evalues yet for Infernal output, so catch and check...
warning_like {$hit->p}
    qr'P-value not defined. Using significance\(\) instead',
    "No p values";

is($hit->length, 0, "Hit length");
is($hit->overlap, 0, "Hit overlap");
is($hit->query_length, 102, "Hit query_length");
is($hit->rank, 1, "Hit rank");
is($hit->raw_score, 81.29, "Hit raw_score");
is($hit->score, 81.29, "Hit score");
float_is($hit->significance, undef);

$hsp = $hit->next_hsp;
isa_ok($hsp, 'Bio::Search::HSP::HSPI');
is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
float_is($hsp->evalue, undef);
isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
($meta) = $hsp->feature1->get_tag_values('meta');
is($meta, ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,)))).))))::::::::::::::');
($meta) = $hsp->feature2->get_tag_values('meta');
is($meta, ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,)))).))))::::::::::::::');

is($hsp->frame('query'), 0, "HSP frame");
is($hsp->gaps, 1, "HSP gaps");
is($hit->length, 0, "Hit length");
isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
is($hsp->hit_string,
   'CAUGAAAUCAAAACACGACCUCAUAUAAUCUUGGGAAUAUGGCCCAUAAGUUUCUACCCGGCAACCGUAAAUUGCCGGACUAUGcAGGGAAGUGAUCGAUAAA',
   "HSP hit_string");
is($hsp->homology_string,
   ' A+ A+A+ AAAA A   :CUC:UAUAAU: :GGGAAUAUGGCCC: :AGUUUCUACC:GGCAACCGUAAAUUGCC:GACUA:G AG: AA + ++  +++++',
   "HSP homology_string");
is($hsp->hsp_group, undef, "HSP hsp_group");
is($hsp->hsp_length, 103, "HSP hsp_length");
is($hsp->length, 103, "HSP length");
is($hsp->links, undef, "HSP links");
is($hsp->n, 1, "HSP n");
float_is($hsp->pvalue, undef, "HSP pvalue");
isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
is($hsp->query_string,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcG.aGuaAauauuaaauauuu',
   "HSP query_string");
is($hsp->range, 102, "HSP range");
is($hsp->rank, 1, "HSP rank");
float_is($hsp->significance, undef);
is($hsp->end, 102, "HSP end");
float_is($hsp->expect, undef, "HSP expect");

isa_ok($hsp->seq, 'Bio::LocatableSeq');
is($hsp->seq_str,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcG.aGuaAauauuaaauauuu',
   "HSP seq_str");
is($hsp->start, 1, "HSP start");
is($hsp->custom_score, undef, "HSP custom_score");
is($hsp->meta,
   ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,)))).))))::::::::::::::',
   "HSP meta");
is($hsp->strand('hit'), 1, "HSP strand");

$hsp = $hit->next_hsp;
isa_ok($hsp, 'Bio::Search::HSP::HSPI');
is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
float_is($hsp->evalue, undef);
isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
is($hsp->frame('query'), 0, "HSP frame");
is($hsp->gaps, 0, "HSP gaps");
# infernal can return alignment data
isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
is($hsp->hit_string,
   'AGAAAUCAAAUAAGAUGAAUUCGUAUAAUCGCGGGAAUAUGGCUCGCAAGUCUCUACCAAGCUACCGUAAAUGGCUUGACUACGUAAACAUUUCUUUCGUUU',
   "HSP hit_string");
is($hsp->homology_string,
   'A AAAU AAA+AA A+   : CGUAUAAU::CG:GAAUAUGGC:CG::AGU UCUACCA:GC ACCGUAAAU GC:UGACUACG :   AU+U +++  UUU',
   "HSP homology_string");
is($hsp->hsp_group, undef, "HSP hsp_group");
is($hsp->hsp_length, 103, "HSP hsp_length");
is($hsp->length, 103, "HSP length");
is($hsp->links, undef, "HSP links");
is($hsp->n, 1, "HSP n");
float_is($hsp->pvalue, undef, "HSP pvalue");
isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
is($hsp->query_string,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
   "HSP query_string");
is($hsp->range, 102, "HSP range");
is($hsp->rank, 2, "HSP rank");
float_is($hsp->significance, undef);
is($hsp->end, 102, "HSP end");
float_is($hsp->expect, undef, "HSP expect");
#is($hsp->matches, 2, "HSP matches");
isa_ok($hsp->seq, 'Bio::LocatableSeq');
# this should probably default to the hit string
is($hsp->seq_str,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
   "HSP seq_str");
is($hsp->start, 1, "HSP start");
is($hsp->custom_score, undef, "HSP custom_score");
is($hsp->meta,
   ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::',
   "HSP meta");
is($hsp->strand('hit'), 1, "HSP strand");

# one more hit...

$hit = $result->next_hit;
isa_ok($hit, 'Bio::Search::Hit::HitI');
is($hit->accession, 'X83878.1', "Hit accession");
is($hit->ncbi_gi, '633168', "Hit GI");
is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
is($hit->bits, 79.36, "Hit bits");
is($hit->description, '', "Hit description"); # no hit descs yet
is($hit->length, 0, "Hit length");
is($hit->locus, '', "Hit locus");
is($hit->n, 1, "Hit n");
is($hit->name, 'gi|633168|emb|X83878.1|', "Hit name");
is($hit->num_hsps, 1, "Hit num_hsps");
is($hit->overlap, 0, "Hit overlap");
is($hit->query_length, 102, "Hit query_length");
is($hit->rank, 2, "Hit rank");
is($hit->raw_score, 79.36, "Hit raw_score");
is($hit->score, 79.36, "Hit score");
float_is($hit->significance, undef);

# one more HSP...

$hsp = $hit->next_hsp;
isa_ok($hsp, 'Bio::Search::HSP::HSPI');
is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
float_is($hsp->evalue, undef);
isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
is($hsp->frame('query'), 0, "HSP frame");
is($hsp->gaps, 2, "HSP gaps");
isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
is($hsp->hit_string,
   'UUACAAUAUAAUAGGAACACUCAUAUAAUCGCGUGGAUAUGGCACGCAAGUUUCUACCGGGCA-CCGUAAA-UGUCCGACUAUGGGUGAGCAAUGGAACCGC',
   "HSP hit_string");
is($hsp->homology_string,
   '+ A A++A AA A  AA:AC+C:UAUAAU::CG:G AUAUGGC:CG::AGUUUCUACC:G CA CCGUAAA UG C:GACUA:G+GU:A  A+U  A+    ',
   "HSP homology_string");
is($hsp->hsp_group, undef, "HSP hsp_group");
is($hsp->hsp_length, 103, "HSP hsp_length");
is($hsp->length, 103, "HSP length");
is($hsp->links, undef, "HSP links");
is($hsp->n, 1, "HSP n");
isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
is($hsp->query_string,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
   "HSP query_string");
is($hsp->range, 102, "HSP range");
is($hsp->rank, 1, "HSP rank");
float_is($hsp->significance, undef);
is($hsp->end, 102, "HSP end");
float_is($hsp->expect, undef, "HSP expect");
isa_ok($hsp->seq, 'Bio::LocatableSeq');
is($hsp->seq_str,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
   "HSP seq_str");
is($hsp->start, 1, "HSP start");
is($hsp->custom_score, undef, "HSP custom_score");
is($hsp->meta,
   ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::',
   "HSP meta");
is($hsp->strand('hit'), 1, "HSP strand");

my $symbols = {
            '5-prime'        => '(',
            '3-prime'        => ')',
            'single-strand'  => ':',
            'unknown'        => '?',
            'gap'            => '-'
             # may add more for quartets, triplets
              };

$searchio = Bio::SearchIO->new( -format => 'infernal',
                                -file   => test_input_file('test.infernal'),
                                # version is reset to the correct one by parser
                                -version => 0.7,
                                -model => 'Purine',
                                -query_acc => 'RF00167',
                                -query_desc => 'Purine riboswitch',
                                -database => 'b_sub.fas',
                                -hsp_minscore => 40,
                                -convert_meta => 1,
                                -symbols => $symbols,
                               );

$result = $searchio->next_result;
$hit = $result->next_hit;
$hsp = $hit->next_hsp;
is($hsp->meta,
   ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))-))))::::::::::::::',
   "HSP meta gap bug");
$hsp = $hit->next_hsp;
is($hsp->meta,
   ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))))))::::::::::::::',
   "HSP meta");
$hit = $result->next_hit;
$hsp = $hit->next_hsp;
is($hsp->meta,
   ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))))))::::::::::::::',
   "HSP meta");
($meta) = $hsp->feature1->get_tag_values('meta');
is($meta, ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))))))::::::::::::::');
($meta) = $hsp->feature2->get_tag_values('meta');
is($meta, ':::::::::::::::::((((((((:::(((((((:::::::)))))))::::::::(((((((:::::::)))))))::))))))))::::::::::::::');

## Infernal 0.81 parsing ##

$searchio = Bio::SearchIO->new( -format => 'infernal',
                                -file   => test_input_file('purine_v081.infernal'),
                                # version is reset to the correct one by parser
                                -query_acc => 'RF00167',
                                -query_desc => 'Purine riboswitch',
                                -database => 'b_sub.fas',
                                -convert_meta => 0,
                               );

$result = $searchio->next_result;

isa_ok($result, 'Bio::Search::Result::ResultI');
$algorithm = $result->algorithm;
is($result->algorithm, 'CMSEARCH', "Result $algorithm");
is($result->algorithm_reference, undef, "Result $algorithm reference");
is($result->algorithm_version, 0.81, "Result $algorithm version");
is($result->available_parameters, 0, "Result parameters");
is($result->available_statistics, 0, "Result statistics");
is($result->database_entries, '', "Result entries");
is($result->database_letters, '', "Result letters");
is($result->database_name, 'b_sub.fas', "Result database_name");
is($result->num_hits, 3, "Result num_hits");
is($result->program_reference, undef, "Result program_reference");
is($result->query_accession, 'RF00167', "Result query_accession");
is($result->query_description, 'Purine riboswitch', "Result query_description");
is($result->query_length, 102, "Result query_length");
is($result->query_name, 'Purine', "Result query_name");

$hit = $result->next_hit;
isa_ok($hit, 'Bio::Search::Hit::HitI');
is($hit->ncbi_gi, '633168', "Hit GI");
is($hit->accession, 'X83878.1', "Hit accession");
is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
is($hit->bits, 79.36, "Hit bits");
is($hit->description, '', "Hit description"); # no hit descs yet
is($hit->locus, '', "Hit locus");
is($hit->n, 2, "Hit n");
is($hit->name, 'gi|633168|emb|X83878.1|', "Hit name");
is($hit->num_hsps, 2, "Hit num_hsps");

# p() works but there are no evalues yet for Infernal output, so catch and check...
warnings_like {$hit->p} qr'P-value not defined. Using significance\(\) instead',
     "No p values";

is($hit->length, 0, "Hit length");
is($hit->overlap, 0, "Hit overlap");
is($hit->query_length, 102, "Hit query_length");
is($hit->rank, 1, "Hit rank");
is($hit->raw_score, 79.36, "Hit raw_score");
is($hit->score, 79.36, "Hit score");
float_is($hit->significance, 1.945e-07);

$hsp = $hit->next_hsp;
isa_ok($hsp, 'Bio::Search::HSP::HSPI');
is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
float_is($hsp->evalue, 1.945e-07);
isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
($meta) = $hsp->feature1->get_tag_values('meta');
is($meta, ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::');
($meta) = $hsp->feature2->get_tag_values('meta');
is($meta, ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::');

is($hsp->frame('query'), 0, "HSP frame");
is($hsp->gaps, 2, "HSP gaps");
is($hit->length, 0, "Hit length");
isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
is($hsp->hit_string,
   'UUACAAUAUAAUAGGAACACUCAUAUAAUCGCGUGGAUAUGGCACGCAAGUUUCUACCGGGCA-CCGUAAA-UGUCCGACUAUGGGUGAGCAAUGGAACCGC',
   "HSP hit_string");
is($hsp->homology_string,
   '+ A A++A AA A  AA:AC+C:UAUAAU::CG:G AUAUGGC:CG::AGUUUCUACC:G CA CCGUAAA UG C:GACUA:G+GU:A  A+U  A+    ',
   "HSP homology_string");
is($hsp->hsp_group, undef, "HSP hsp_group");
is($hsp->hsp_length,102, "HSP hsp_length");
is($hsp->length, 102, "HSP length");
is($hsp->links, undef, "HSP links");
is($hsp->n, 1, "HSP n");
float_is($hsp->pvalue, 1.945e-07, "HSP pvalue");
isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
is($hsp->query_string,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
   "HSP query_string");
is($hsp->range, 102, "HSP range");
is($hsp->rank, 1, "HSP rank");
float_is($hsp->significance, 1.945e-07);
is($hsp->end, 102, "HSP end");
float_is($hsp->expect, 1.945e-07, "HSP expect");

isa_ok($hsp->seq, 'Bio::LocatableSeq');
is($hsp->seq_str,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
   "HSP seq_str");
is($hsp->start, 1, "HSP start");
is($hsp->custom_score, undef, "HSP custom_score");
is($hsp->meta,
   ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::',
   "HSP meta");
is($hsp->strand('hit'), 1, "HSP strand");

$hsp = $hit->next_hsp;
isa_ok($hsp, 'Bio::Search::HSP::HSPI');
is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
float_is($hsp->evalue, 6.802);
isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
is($hsp->frame('query'), 0, "HSP frame");
is($hsp->gaps, 4, "HSP gaps");
# infernal can return alignment data
isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
is($hsp->hit_string,
   'CGUGCGGUUCCAUUGCUCACCCAUA-GUCGGACAU-UUACGG-UGCCCGGUAGAAACUUGCGUGCCAUAUCCACGCGAUUaUAUGAGUGUUCCUAUUAUAUUG',
   "HSP hit_string");
is($hsp->homology_string,
   '  +    +   A    +:AC C:UA  +::: ::   UA GG :: :::GU    AC: G::::CC UA  ::::C :   UA:G GU: +  U+++AUAUU ',
   "HSP homology_string");
is($hsp->hsp_group, undef, "HSP hsp_group");
is($hsp->hsp_length, 102, "HSP hsp_length");
is($hsp->length, 102, "HSP length");
is($hsp->links, undef, "HSP links");
is($hsp->n, 1, "HSP n");
float_is($hsp->pvalue, 0.9989, "HSP pvalue");
isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
is($hsp->query_string,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGAC.UAcGaGuaAauauuaaauauuu',
   "HSP query_string");
is($hsp->range, 102, "HSP range");
is($hsp->rank, 2, "HSP rank");
float_is($hsp->significance, 6.802);
is($hsp->end, 102, "HSP end");
float_is($hsp->expect, 6.802, "HSP expect");
#is($hsp->matches, 2, "HSP matches");
isa_ok($hsp->seq, 'Bio::LocatableSeq');
# this should probably default to the hit string
is($hsp->seq_str,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGAC.UAcGaGuaAauauuaaauauuu',
   "HSP seq_str");
is($hsp->start, 1, "HSP start");
is($hsp->custom_score, undef, "HSP custom_score");
is($hsp->meta,
   ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,.))))))))::::::::::::::',
   "HSP meta");
is($hsp->strand('hit'), -1, "HSP strand");

# one more hit...

$hit = $result->next_hit;
isa_ok($hit, 'Bio::Search::Hit::HitI');
is($hit->accession, 'U51115.1', "Hit accession");
is($hit->ncbi_gi, '2239287', "Hit GI");
is($hit->algorithm, 'CMSEARCH', "Hit algorithm");
is($hit->bits, 81.29, "Hit bits");
is($hit->description, '', "Hit description"); # no hit descs yet
is($hit->length, 0, "Hit length");
is($hit->locus, 'BSU51115', "Hit locus");
is($hit->n, 11, "Hit n");
is($hit->name, 'gi|2239287|gb|U51115.1|BSU51115', "Hit name");
is($hit->num_hsps, 11, "Hit num_hsps");
is($hit->overlap, 0, "Hit overlap");
is($hit->query_length, 102, "Hit query_length");
is($hit->rank, 2, "Hit rank");
is($hit->raw_score, 81.29, "Hit raw_score");
is($hit->score, 81.29, "Hit score");
float_is($hit->significance, 1.259e-07);

# one more HSP...

$hsp = $hit->next_hsp;
isa_ok($hsp, 'Bio::Search::HSP::HSPI');
is($hsp->algorithm, 'CMSEARCH', "HSP algorithm");
float_is($hsp->evalue, 1.259e-07);
isa_ok($hsp->feature1, 'Bio::SeqFeature::Similarity');
isa_ok($hsp->feature2, 'Bio::SeqFeature::Similarity');
is($hsp->frame('query'), 0, "HSP frame");
is($hsp->gaps, 0, "HSP gaps");
isa_ok($hsp->get_aln, 'Bio::Align::AlignI');
isa_ok($hsp->hit, 'Bio::SeqFeature::Similarity', "HSP hit");
is($hsp->hit_string,
   'AGAAAUCAAAUAAGAUGAAUUCGUAUAAUCGCGGGAAUAUGGCUCGCAAGUCUCUACCAAGCUACCGUAAAUGGCUUGACUACGUAAACAUUUCUUUCGUUU',
   "HSP hit_string");
is($hsp->homology_string,
   'A AAAU AAA+AA A+   : CGUAUAAU::CG:GAAUAUGGC:CG::AGU UCUACCA:GC ACCGUAAAU GC:UGACUACG :   AU+U +++  UUU',
   "HSP homology_string");
is($hsp->hsp_group, undef, "HSP hsp_group");
is($hsp->hsp_length, 102, "HSP hsp_length");
is($hsp->length, 102, "HSP length");
is($hsp->links, undef, "HSP links");
is($hsp->n, 1, "HSP n");
isa_ok($hsp->query, 'Bio::SeqFeature::Similarity', "HSP query");
is($hsp->query_string,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
   "HSP query_string");
is($hsp->range, 102, "HSP range");
is($hsp->rank, 1, "HSP rank");
float_is($hsp->significance, 1.259e-07);
is($hsp->end, 102, "HSP end");
float_is($hsp->expect, 1.259e-07, "HSP expect");
isa_ok($hsp->seq, 'Bio::LocatableSeq');
is($hsp->seq_str,
   'aAaaauaaAaaaaaaaauaCuCgUAUAaucucgggAAUAUGGcccgagaGUuUCUACCaGgcaaCCGUAAAuugcCuGACUAcGaGuaAauauuaaauauuu',
   "HSP seq_str");
is($hsp->start, 1, "HSP start");
is($hsp->custom_score, undef, "HSP custom_score");
is($hsp->meta,
   ':::::::::::::::::((((((((,,,<<<<<<<_______>>>>>>>,,,,,,,,<<<<<<<_______>>>>>>>,,))))))))::::::::::::::',
   "HSP meta");
is($hsp->strand('hit'), 1, "HSP strand");