The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# -*-Perl-*- Test Harness script for Bioperl
# $Id$

use strict;

BEGIN {
	use lib '.';
	use Bio::Root::Test;

	test_begin(-tests => 44,
			   -requires_modules => [qw(IO::String
									    LWP::UserAgent
										HTTP::Request::Common)],
			   -requires_networking => 1);

	use_ok('Bio::DB::GenBank');
}

my %expected_lengths = (
    'MUSIGHBA1' => 408,
	'J00522.1' => 408,
    'AF303112'  => 1611,
    'AF303112.1' => 1611,
    'AF041456'  => 1156,
    'CELRABGDI' => 1743,
    'JH374761'  => 38055,
);

my ($gb, $seq, $seqio, $seqin);

#
# Bio::DB::GenBank
#

ok $gb = Bio::DB::GenBank->new('-delay'=>0), 'Bio::DB::GenBank';

# get a single seq
SKIP: {
    eval {$seq = $gb->get_Seq_by_id('J00522');1};
    skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Do you have network access? Skipping GenBank tests: $@", 4 if $@;
    is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
    eval {$seq = $gb->get_Seq_by_acc('AF303112');};
    skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests: $@", 3 if $@;
    is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
    eval {$seq = $gb->get_Seq_by_version('AF303112.1');};
    skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests: $@", 2 if $@;
    is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
    eval {$seq = $gb->get_Seq_by_gi('405830');};
    skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests: $@", 1 if $@;
    is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
}

$seq = $seqio = undef;

# batch mode
SKIP: {
    eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
    skip "Batch access test failed for Genbank. Skipping those tests", 4 if $@;
    my $done = 0;
    while (my $s = $seqio->next_seq) {
        is $s->length, $expected_lengths{$s->display_id}, $s->display_id;
        $done++;
    }
    skip('No seqs returned', 4) if !$done;
    is $done, 3;
}

$seq = $seqio = undef;

# test the temporary file creation and fasta
ok $gb = Bio::DB::GenBank->new('-format' => 'fasta', '-retrievaltype' => 'tempfile', '-delay' => 0);
SKIP: {
    eval {$seq = $gb->get_Seq_by_id('J00522');};
    skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
    # last part of id holds the key
    is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]}, $seq->display_id;
    eval {$seq = $gb->get_Seq_by_acc('AF303112');};
    skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
    # last part of id holds the key
    is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]}, $seq->display_id;
    # batch mode requires genbank format
    $gb->request_format("gb");
    eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
    skip "Couldn't connect to complete GenBank batch tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
    my $done = 0;
    while (my $s = $seqio->next_seq) {
        is $s->length, $expected_lengths{$s->display_id};
        undef $gb; # test the case where the db is gone,
        # but a temp file should remain until seqio goes away.
        $done++;
    }
    skip('No seqs returned', 4) if !$done;
    is $done, 3;
}

$seq = $seqio = undef;

# test pipeline creation
ok $gb = Bio::DB::GenBank->new('-retrievaltype' => 'pipeline', '-delay' => 0);
SKIP: {
    eval {$seq = $gb->get_Seq_by_id('J00522');};
    skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@;
    is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
    eval {$seq = $gb->get_Seq_by_acc('AF303112');};
    skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@;
    is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
    eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);};
    skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@;
    my $done = 0;
    while (my $s = $seqio->next_seq) {
        is $s->length, $expected_lengths{$s->display_id}, $s->display_id;
        undef $gb; # test the case where the db is gone,
        # but the pipeline should remain until seqio goes away
        $done++;
    }
    skip('No seqs returned', 4) if !$done;
    is $done, 3;
}

$seq = $seqio = undef;

# test contig retrieval
ok $gb = Bio::DB::GenBank->new('-delay'  => 0, '-format' => 'gbwithparts');
SKIP: {
    eval {$seq = $gb->get_Seq_by_id('JH374761');};
    skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 3 if $@;
    is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
    # now to check that postprocess_data in NCBIHelper catches CONTIG...
    ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gb');
    eval {$seq = $gb->get_Seq_by_id('JH374761');};
    skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 1 if $@;
    is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id;
}

$seq = $seqio = undef;

# bug 1405
my @result;
ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -seq_start  => 2, -seq_stop   => 7);
SKIP: {
    eval {$seq = $gb->get_Seq_by_acc("A11111");};
    skip "Couldn't connect to complete GenBank tests. Skipping those tests", 15 if $@;
    is $seq->length, 6;
    # complexity tests
    ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -complexity => 0);
    eval {$seqin = $gb->get_Stream_by_acc("5");};
    skip "Couldn't connect to complete GenBank tests. Skipping those tests", 13 if $@;
    @result = (1136, 'dna', 342, 'protein');
    while ($seq = $seqin->next_seq) {
        is $seq->length, shift(@result);
        is $seq->alphabet, shift(@result);
    }
    is @result, 0;
    # Real batch retrieval using epost/efetch
    # these tests may change if integrated further into Bio::DB::Gen*
    # Currently only useful for retrieving GI's via get_seq_stream
    $gb = Bio::DB::GenBank->new();
    eval {$seqin = $gb->get_seq_stream(-uids => [4887706 ,431229, 147460], -mode => 'batch');};
    skip "Couldn't connect to complete GenBank batchmode epost/efetch tests. Skipping those tests", 8 if $@;
    my %result = ('M59757' => 12611 ,'X76083'=> 3140, 'J01670'=> 1593);
	my $ct = 0;
    while ($seq = $seqin->next_seq) {
		$ct++;
		my $acc = $seq->accession;
        ok exists $result{ $acc };
        is $seq->length, $result{ $acc };
		delete $result{$acc};
    }
    skip('No seqs returned', 8) if !$ct;
	is $ct, 3;
    is %result, 0;
}