The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# -*-Perl-*- Test Harness script for Bioperl
# $Id$

use strict;

BEGIN {
    use lib '.';
    use Bio::Root::Test;

    test_begin(-tests => 325);

    use_ok('Bio::Factory::FTLocationFactory');
}

my $simple_impl = "Bio::Location::Simple";
my $fuzzy_impl  = "Bio::Location::Fuzzy";
my $split_impl  = "Bio::Location::Split";

# Holds strings and results. The latter is an array of expected class name,
# min/max start position and position type, min/max end position and position
# type, location type, the number of locations, and the strand.
#
my %testcases = (
    # note: the following are directly taken from
    # http://www.insdc.org/documents/feature_table.html#3.4.3
    "467"
        => [$simple_impl, 467, 467, "EXACT",
                          467, 467, "EXACT",
                          "EXACT", 1, 1],
    "340..565"
        => [$simple_impl, 340, 340, "EXACT",
                          565, 565, "EXACT",
                          "EXACT", 1, 1],
    "<345..500"
        => [$fuzzy_impl, undef, 345, "BEFORE",
                         500,   500, "EXACT",
                         "EXACT", 1, 1],
    "<1..888"
        => [$fuzzy_impl, undef, 1,   "BEFORE",
                         888,   888, "EXACT",
                         "EXACT", 1, 1],
    "1..>888"
        => [$fuzzy_impl, 1,   1,     "EXACT",
                         888, undef, "AFTER",
                         "EXACT", 1, 1],
    "(102.110)"
        => [$fuzzy_impl, 102, 102, "EXACT",
                         110, 110, "EXACT",
                         "WITHIN", 1, 1],
    "(23.45)..600"
        => [$fuzzy_impl, 23,  45,  "WITHIN",
                         600, 600, "EXACT",
                         "EXACT", 1, 1],
    "(122.133)..(204.221)"
        => [$fuzzy_impl, 122, 133, "WITHIN",
                         204, 221, "WITHIN",
                         "EXACT", 1, 1],
    "123^124"
        => [$simple_impl, 123, 123, "EXACT",
                          124, 124, "EXACT",
                          "IN-BETWEEN", 1, 1],
    "145^177"
        => [$fuzzy_impl, 145, 145, "EXACT",
                         177, 177, "EXACT",
                         "IN-BETWEEN", 1, 1],
    "join(12..78,134..202)"
        => [$split_impl, 12,  12,  "EXACT",
                         202, 202, "EXACT",
                         "EXACT", 2, 1],
    "complement(join(2691..4571,4918..5163))"
        => [$split_impl, 2691, 2691, "EXACT",
                         5163, 5163, "EXACT",
                         "EXACT", 2, -1],
    # Partial frameshifted gene at the end of a contig
    "complement(join(94468..94578,94578..>94889))"
        => [$split_impl, 94468, 94468, "EXACT",
                         94889, undef, "AFTER",
                         "EXACT", 2, -1],
    "complement(34..(122.126))"
        => [$fuzzy_impl, 34,  34,  "EXACT",
                         122, 126, "WITHIN",
                         "EXACT", 1, -1],
    "J00194:100..202"
        => [$simple_impl, 100, 100, "EXACT",
                          202, 202, "EXACT",
                          "EXACT", 1, 1],
    "join(1..100,J00194.1:100..202)"
        => [$split_impl, 1,   1,   "EXACT",
                         100, 100, "EXACT",
                         "EXACT", 2, 1],

    # this variant is not really allowed by the FT definition
    # document but we want to be able to cope with it
    "J00194:(100..202)"
        => [$simple_impl, 100, 100, "EXACT",
                          202, 202, "EXACT",
                          "EXACT", 1, 1],
    "((122.133)..(204.221))"
        => [$fuzzy_impl, 122, 133, "WITHIN",
                         204, 221, "WITHIN",
                         "EXACT", 1, 1],
    "join(AY016290.1:108..185,AY016291.1:1546..1599)"
        => [$split_impl, 108, 108, "EXACT",
                         185, 185, "EXACT",
                         "EXACT", 2, 1],

    # UNCERTAIN locations and positions (Swissprot)
    "?2465..2774"
        => [$fuzzy_impl, 2465, 2465, "UNCERTAIN",
                         2774, 2774, "EXACT",
                         "EXACT", 1, 1],
    "22..?64"
        => [$fuzzy_impl, 22, 22, "EXACT",
                         64, 64, "UNCERTAIN",
                         "EXACT", 1, 1],
    "?22..?64"
        => [$fuzzy_impl, 22, 22, "UNCERTAIN",
                         64, 64, "UNCERTAIN",
                         "EXACT", 1, 1],
    "?..>393"
        => [$fuzzy_impl, undef, undef, "UNCERTAIN",
                         393,   undef, "AFTER",
                         "UNCERTAIN", 1, 1],
    "<1..?"
        => [$fuzzy_impl, undef, 1,     "BEFORE",
                         undef, undef, "UNCERTAIN",
                         "UNCERTAIN", 1, 1],
    "?..536"
        => [$fuzzy_impl, undef, undef, "UNCERTAIN",
                         536,   536,   "EXACT",
                         "UNCERTAIN", 1, 1],
    "1..?"
        => [$fuzzy_impl, 1,     1,     "EXACT",
                         undef, undef, "UNCERTAIN",
                         "UNCERTAIN", 1, 1],
    "?..?"
        => [$fuzzy_impl, undef, undef, "UNCERTAIN",
                         undef, undef, "UNCERTAIN",
                         "UNCERTAIN", 1, 1],
    "?1..12"
        => [$fuzzy_impl, 1,  1,  "UNCERTAIN",
                         12, 12, "EXACT",
                         "EXACT", 1, 1]
);

my $locfac = Bio::Factory::FTLocationFactory->new();
isa_ok($locfac,'Bio::Factory::LocationFactoryI');

# sorting is to keep the order constant from one run to the next
foreach my $locstr (keys %testcases) {
    my $loc = $locfac->from_string($locstr);
    if($locstr eq "join(AY016290.1:108..185,AY016291.1:1546..1599)") {
        $loc->seq_id("AY016295.1");
    }
    if($locstr eq "join(1..100,J00194.1:100..202)") {
        $loc->seq_id("unknown");
    }
    my @res = @{$testcases{$locstr}};
    is(ref($loc), $res[0], $res[0]);
    is($loc->min_start(), $res[1]);
    is($loc->max_start(), $res[2]);
    is($loc->start_pos_type(), $res[3]);
    is($loc->min_end(), $res[4]);
    is($loc->max_end(), $res[5]);
    is($loc->end_pos_type(), $res[6]);
    is($loc->location_type(), $res[7]);
    my @locs = $loc->each_Location();
    is(@locs, $res[8]);
    my $ftstr = $loc->to_FTstring();
    # this is a somewhat ugly hack, but we want clean output from to_FTstring()
    # Umm, then these should really fail, correct?
    # Should we be engineering workarounds for tests?
    $locstr = "J00194:100..202"      if $locstr eq "J00194:(100..202)";
    $locstr = "(122.133)..(204.221)" if $locstr eq "((122.133)..(204.221))";
    # now test
    is($ftstr, $locstr, "Location String: $locstr");
    # test strand production
    is($loc->strand(), $res[9]);
}

SKIP: {
    skip('nested matches in regex only supported in v5.6.1 and higher', 8) unless $^V gt v5.6.0;

    # Tests based on location definition (http://www.insdc.org/documents/feature_table.html#3.4)
    my $string1  = 'complement(join(2691..4571,4918..5163))';
    my $string2  = 'join(complement(4918..5163),complement(2691..4571))';
    my $loc1     = $locfac->from_string($string1);
    my $loc2     = $locfac->from_string($string2);
    my $loc1_str = $loc1->to_FTstring;
    my $loc2_str = $loc2->to_FTstring;
    is($loc1_str, $string1, $string1);
    is($loc2_str, $string1, $string2);
    is($loc1_str, $loc2_str, 'equivalent remote location strings');

    # Test for equivalent reverse strand locations adding one remote component
    $string1  = 'complement(join(TEST0001.1:2691..4571,4918..5163))';
    $string2  = 'join(complement(4918..5163),complement(TEST0001.1:2691..4571))';
    $loc1     = $locfac->from_string($string1);
    $loc2     = $locfac->from_string($string2);
    $loc1_str = $loc1->to_FTstring;
    $loc2_str = $loc2->to_FTstring;
    is($loc1_str, $string1, $string1);
    is($loc2_str, $string1, $string2);
    is($loc1_str, $loc2_str, 'equivalent remote location strings');

    # Test for equivalent reverse strand locations adding two remote components
    $string1  = 'complement(join(TEST0001.1:2691..4571,TEST0008.1:4918..5163))';
    $string2  = 'join(complement(TEST0008.1:4918..5163),complement(TEST0001.1:2691..4571))';
    $loc1     = $locfac->from_string($string1);
    $loc2     = $locfac->from_string($string2);
    $loc1_str = $loc1->to_FTstring;
    $loc2_str = $loc2->to_FTstring;
    is($loc1_str, $string1, $string1);
    is($loc2_str, $string1, $string2);
    is($loc1_str, $loc2_str, 'equivalent remote location strings');

    # bug #1674, #1765, 2101
    # EMBL-like (BAC19856.3 protein)
    # join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))
    # GenBank-like
    # join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))
    # Note that
    # join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)
    # is the same as
    # join(1000..2000,3000..4000,5000..6000,7000..8000,9000..10000)

    my @expected = (# intentionally testing same expected string twice
                    # as I am providing two different encodings
                    # that should mean the same thing
    'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
    'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
    # ditto
    'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
    'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
    # this is just seen once
    'join(1000..2000,3000..4000,5000..6000,7000..8000,9000..10000)',
    'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
   );

    for my $locstr (
        'join(11025..11049,join(complement(239890..240081),complement(241499..241580),complement(251354..251412),complement(315036..315294)))',
        'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
        'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
        'join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))',
        'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)',
        'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
       ) {
        my $loc = $locfac->from_string($locstr);
        my $ftstr = $loc->to_FTstring();
        is($ftstr, shift @expected, $locstr);
    }
}