The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# -*-Perl-*- Test Harness script for Bioperl
# $Id$

use strict;
use warnings;

BEGIN {
    use lib '.';
    use Bio::Root::Test;
    test_begin(-tests => 194);

    use_ok('Bio::Seq');
    use_ok('Bio::Seq::Quality');
    use_ok('Bio::PrimarySeq');
    use_ok('Bio::LocatableSeq');
    use_ok('Bio::Seq::SimulatedRead');
}

my $VERBOSE = test_debug();

my ($ref, $ref2, $ref3, $ref4, $ref5, $read, $errors);

$ref = Bio::Seq::Quality->new(-id    => 'human_id',
                               -seq   => 'TAAAAAAACCCC',
                               -qual  => '1 2 3 4 5 6 7 8 9 10 11 12',
                               -trace => '0 5 10 15 20 25 30 35 40 45 50 55',
                               -desc  => 'The human genome' );

$ref2 = Bio::Seq->new(-id   => 'other_genome',
                       -seq  => 'ACGTACGT',
                       -desc => '"Secret" sequence');

$ref3 = Bio::PrimarySeq->new(-seq => 'ACACTGATCTAGCGTCGTGCTAGCTGACGTAGCTGAT' );

$ref4 = Bio::LocatableSeq->new(-id  => 'a_thaliana',
                                -seq => 'CGTATTCTGAGGAGAGCTCT' );


# Basic object

ok $read = Bio::Seq::SimulatedRead->new();
isa_ok $read, 'Bio::Seq::SimulatedRead';
isa_ok $read, 'Bio::LocatableSeq';
isa_ok $read, 'Bio::Seq::Quality';

$errors->{'1'}->{'+'} = 'T';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->reference, $ref;
ok $read->errors;
is $read->errors->{'1'}->{'+'}->[0], 'T';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 0 );
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'TAAAAAAACCCC';
is $read->track, 0;
is $read->desc, undef;
is $read->revcom->seq, 'GGGGTTTTTTTA';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1 );
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'TAAAAAAACCCC';
is join(' ',@{$read->qual}), '';
is $read->track, 1;
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1, -coord_style => 'bioperl' );
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -track => 1, -coord_style => 'genbank' );
is $read->desc, 'reference=human_id position=1..12 description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -qual_levels => [30, 10]);
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'TAAAAAAACCCC';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30';
is $read->track, 1;
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';
is $read->revcom->seq, 'GGGGTTTTTTTA';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref2 );
is $read->start, 1;
is $read->end, 8;
is $read->seq, 'ACGTACGT';
is join(' ',@{$read->qual}), '';
is $read->desc, 'reference=other_genome start=1 end=8 strand=+1 description="\"Secret\" sequence"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref3 );
is $read->start, 1;
is $read->end, 37;
is $read->seq, 'ACACTGATCTAGCGTCGTGCTAGCTGACGTAGCTGAT';
is join(' ',@{$read->qual}), '';
is $read->desc, 'start=1 end=37 strand=+1';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -qual_levels => [30, 10]);
is $read->seq, 'GGGGTTTTTTTA';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30';
is $read->desc, 'reference=human_id start=1 end=12 strand=-1 description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -qual_levels => [30, 10], -coord_style => 'genbank' );
is $read->desc, 'reference=human_id position=complement(1..12) description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -start => 2, -end => 8, -qual_levels => [30, 10]);
is $read->seq, 'AAAAAAA';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30';
is $read->desc, 'reference=human_id start=2 end=8 strand=+1 description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -start => 2, -end => 8, -qual_levels => [30, 10]);
is $read->seq, 'TTTTTTT';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30';
is $read->desc, 'reference=human_id start=2 end=8 strand=-1 description="The human genome"';

$errors = {};
$errors->{'6'}->{'+'} = 'GG';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => -1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]);
is $read->start, 2;
is $read->end, 8;
is $read->seq, 'TTTTTTGGT';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30';
is $read->desc, 'reference=human_id start=2 end=8 strand=-1 errors=6+G,6+G description="The human genome"';

$errors = {};
$errors->{'6'}->{'+'} = 'GG';
$errors->{'1'}->{'%'} = 'T';
$errors->{'3'}->{'-'} = undef;
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => 1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]);
is $read->start, 2;
is $read->end, 8;
is $read->seq, 'TAAAAGGA';
is join(' ', @{$read->qual}), '10 30 30 30 30 10 10 30';
is $read->desc, 'reference=human_id start=2 end=8 strand=+1 errors=1%T,3-,6+G,6+G description="The human genome"';

$errors = {};
$errors->{'6'}->{'+'} = 'GG';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors, -qual_levels => [30, 10]);
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'TAAAAAGGAACCCC';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30 30 30 30 30 30';
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 errors=6+G,6+G description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors, -mid => 'ACGT', -errors => $errors, -qual_levels => [30, 10]);
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'ACGTTAGGAAAAAACCCC';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 10 10 30 30 30 30 30 30 30 30 30 30';
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=ACGT errors=6+G,6+G description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => 'TTTAAA', -qual_levels => [30, 10]);
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'TTTAAATAAAAAAACCCC';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=TTTAAA description="The human genome"';

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => '', -qual_levels => []);
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'TAAAAAAACCCC';
is join(' ', @{$read->qual}), '';
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';


# Redundant errors

$errors = {};
$errors->{'6'}->{'+'} = ['G', 'G'];
$errors->{'1'}->{'%'} = ['A', 'G', 'T'];
$errors->{'3'}->{'-'} = [undef, undef];
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -strand => 1, -start => 2, -end => 8, -errors => $errors, -qual_levels => [30, 10]), 'redundant errors';
is $read->start, 2;
is $read->end, 8;
is $read->seq, 'TAAAAGGA';
is join(' ', @{$read->qual}), '10 30 30 30 30 10 10 30';
is $read->desc, 'reference=human_id start=2 end=8 strand=+1 errors=1%A,1%G,1%T,3-,3-,6+G,6+G description="The human genome"';


# Specifying errors() after new()

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => '', -qual_levels => []);
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'TAAAAAAACCCC';
is join(' ', @{$read->qual}), '';
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';

$errors = {};
ok $read->errors($errors), 'errors()';
is $read->start, 1;
is $read->end, 12;
is $read->seq, 'TAAAAAAACCCC';
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 description="The human genome"';

$errors = {};
$errors->{'6'}->{'+'} = 'GG';
ok $read->errors($errors);
is $read->seq, 'TAAAAAGGAACCCC';
is $read->start, 1;
is $read->end, 12;
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 errors=6+G,6+G description="The human genome"';


# More tracking tests

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -mid => 'ACGT', -qual_levels => [], -coord_style => 'genbank' );
is $read->desc, 'reference=human_id position=1..12 mid=ACGT description="The human genome"';

ok $read->mid('AAAA');
is $read->desc, 'reference=human_id position=1..12 mid=AAAA description="The human genome"';

$errors = {};
$errors->{'6'}->{'+'} = 'GG';
ok $read->errors($errors);
is $read->desc, 'reference=human_id position=1..12 mid=AAAA errors=6+G,6+G description="The human genome"';

ok not($read->track(0)), 'track()';
is $read->track, 0;
is $read->desc, undef;
ok $read->track(1);
is $read->track, 1;
is $read->desc, 'reference=human_id position=1..12 mid=AAAA errors=6+G,6+G description="The human genome"';


# qual_levels() method

ok $read = Bio::Seq::SimulatedRead->new(-verbose => $VERBOSE, );
ok $read->qual_levels([30, 10]), 'qual_levels()';
is join(' ', @{$read->qual_levels}), '30 10';

# reference() method

ok $read->reference($ref), 'reference()';
is $read->reference(), $ref;

# mid() method

ok $read = Bio::Seq::SimulatedRead->new(-verbose => $VERBOSE, ), 'mid()';
ok $read->qual_levels([30, 10]);
ok $read->reference($ref);
ok $read->mid('ACGT');
ok $read->mid, 'ACGT';

is $read->seq, 'ACGTTAAAAAAACCCC';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=ACGT description="The human genome"';

ok $read->mid('TTTAAA');
ok $read->mid, 'TTTAAA';
is $read->seq, 'TTTAAATAAAAAAACCCC';
is join(' ', @{$read->qual}), '30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30';
is $read->desc, 'reference=human_id start=1 end=12 strand=+1 mid=TTTAAA description="The human genome"';


# Edge case... mutation of the last bases of a simulated read with MID

$errors = {};
$errors->{'18'}->{'%'} = 'T';
$read->errors($errors);
is $read->seq, 'TTTAAATAAAAAAACCCT';


# Try different BioPerl object types

ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref  ), 'Bio::Seq::Quality';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref2 ), 'Bio::Seq';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref3 ), 'Bio::PrimarySeq';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref4 ), 'Bio::LocatableSeq';


# More detailed tests of the error specifications

$errors = {};
$errors->{'0'}->{'-'} = undef;
warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
    qr/Positions of substitutions and deletions have to be strictly positive but got 0/;
is $read->seq, 'TAAAAAAACCCC';

$errors = {};
$errors->{'1'}->{'-'} = undef;
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'AAAAAAACCCC';

$errors = {};
$errors->{'12'}->{'-'} = undef;
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'TAAAAAAACCC';

$errors = {};
$errors->{'13'}->{'-'} = undef;
warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
    qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
is $read->seq, 'TAAAAAAACCCC';

$errors = {};
$errors->{'0'}->{'%'} = 'G';
warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors )}
    qr/Positions of substitutions and deletions have to be strictly positive/; # there should be a warning too
is $read->seq, 'TAAAAAAACCCC';

$errors = {};
$errors->{'1'}->{'%'} = 'G';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'GAAAAAAACCCC';

$errors = {};
$errors->{'12'}->{'%'} = 'G';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'TAAAAAAACCCG';

$errors = {};
$errors->{'13'}->{'%'} = 'G';
warning_like { $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors ) }
    qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
is $read->seq, 'TAAAAAAACCCC';

$errors = {};
$errors->{'0'}->{'+'} = 'A';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'ATAAAAAAACCCC';

$errors = {};
$errors->{'1'}->{'+'} = 'A';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'TAAAAAAAACCCC';

$errors = {};
$errors->{'12'}->{'+'} = 'A';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'TAAAAAAACCCCA';

$errors = {};
$errors->{'13'}->{'+'} = 'A';
warning_like {$read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors ) }
    qr/Position 13 is beyond end of read \(12 residues\)/; # there should be a warning too
is $read->seq, 'TAAAAAAACCCC';

$errors = {};
$errors->{'1'}->{'%'} = 'G';
$errors->{'2'}->{'%'} = 'G';
$errors->{'3'}->{'%'} = 'G';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'GGGAAAAACCCC';

$errors = {};
$errors->{'1'}->{'+'} = 'G';
$errors->{'2'}->{'+'} = 'G';
$errors->{'3'}->{'+'} = 'G';
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'TGAGAGAAAAACCCC';

$errors = {};
$errors->{'1'}->{'-'} = undef;
$errors->{'2'}->{'-'} = undef;
$errors->{'3'}->{'-'} = undef;
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'AAAAACCCC';

$errors = {};
$errors->{'1'}->{'+'} = 'GGG';
$errors->{'2'}->{'-'} = undef;
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'TGGGAAAAAACCCC';

$errors = {};
$errors->{'2'}->{'+'} = 'CC';
$errors->{'2'}->{'-'} = undef;
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'TCCAAAAAACCCC';

$errors = {};
$errors->{'2'}->{'%'} = 'C';
$errors->{'2'}->{'-'} = undef;
ok $read = Bio::Seq::SimulatedRead->new(-reference => $ref, -errors => $errors );
is $read->seq, 'TAAAAAACCCC';