The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
use version;
our $API_VERSION = $Bio::SeqIO::msout::API_VERSION;
use strict;
use File::Path qw(mkpath rmtree);

BEGIN {
    use lib '.';    # for core package test scripts only
    use Bio::Root::Test;

    test_begin(
        -tests               => 165,
        -requires_modules    => [q(Bio::SeqIO::msout)],
        -requires_networking => 0
    );

    use_ok('Bio::SeqIO::msout');

}

# skip tests if the msout.pm module is too old.
my $api_version = $Bio::SeqIO::msout::API_VERSION;
cmp_ok( $api_version, '>=', qv('1.1.5'),
    "Bio::SeqIO::msout is at least api version 1.1.5" );

test_file_1( 0, "msout/msout_infile1" );    # 23 tests
test_file_2( 0, "msout/msout_infile2" );    # 22 tests
test_file_3( 0, "msout/msout_infile3" );    # 17 tests

# tests to run for api versions >= 1.1.6
SKIP: {
    skip q($Bio::SeqIO::msout::API_VERSION < 1.1.6), 22
      unless $api_version >= qv('1.1.6');
    test_file_1( 0, q(msout/msout_infile4) );
}

# tests to run for api versions >= 1.1.7
SKIP: {
    skip q($Bio::SeqIO::msout::API_VERSION < 1.1.7), 4
      unless $api_version >= qv('1.1.7');
    bad_test_file_1( 0, q(msout/bad_msout_infile1) ); # 2 tests
    bad_test_file_2( 0, q(msout/bad_msout_infile2) ); # 2 tests
}

# tests to run for api version >= 1.1.8
SKIP: {
    skip q($Bio::SeqIO::msout::API_VERSION < 1.1.8), 75
      unless $api_version >= qv('1.1.8');

    test_file_1( 0, "msout/msout_infile1", 100 );
    test_file_2( 0, "msout/msout_infile2", 10 );
    test_file_1( 0, q(msout/msout_infile4), 100 );
    bad_test_file_1( 0, q(msout/bad_msout_infile1), 1000 );
    bad_test_file_2( 0, q(msout/bad_msout_infile2), 1000 );
    bad_n_sites( 0, q(msout/msout_infile1) ); # 2 tests
}

sub create_dir {

    my $dir = shift;

    $dir = Bio::Root::Test::test_input_file($dir);

    unless ( -d $dir ) {
        mkpath($dir);
    }
}

sub remove_dir {

    my $dir = shift;

    $dir = Bio::Root::Test::test_input_file($dir);

    if ( -d $dir ) {
        rmtree($dir);
    }
    else { warn "Tried to remove $dir, but it does not exist" }
}

sub test_file_1 {
##############################################################################
## Test file 1
##############################################################################

    my $gzip    = shift;
    my $infile  = shift;
    my $n_sites = shift;
    $infile = Bio::Root::Test::test_input_file($infile);

    # the files are now part of the git repo and don't have to be printed
    #    print_file1( $infile, $gzip );

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gzip -dc <$file_sequence |";
    }
    my $msout = Bio::SeqIO->new(
        -file    => "$file_sequence",
        -format  => 'msout',
        -n_sites => $n_sites,
    );

    isa_ok( $msout, 'Bio::SeqIO::msout' );

    my $rh_base_conversion_table = $msout->get_base_conversion_table;

    my %attributes = (
        RUNS                 => 3,
        SEGSITES             => 7,
        N_SITES              => $n_sites,
        SEEDS                => [qw(1 1 1)],
        MS_INFO_LINE         => 'ms 6 3 -s 7 -I 3 3 2 1',
        TOT_RUN_HAPS         => 6,
        POPS                 => [qw(3 2 1)],
        NEXT_RUN_NUM         => 1,
        LAST_READ_HAP_NUM    => 0,
        POSITIONS            => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
        CURRENT_RUN_SEGSITES => 7
    );

    foreach my $attribute ( keys %attributes ) {
        my $func = lc($attribute);

        if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
            $func = ucfirst($func);
        }

        $func = 'get_' . $func;
        my @returns = $msout->$func();
        my ( $return, $got );

        # If there were more than one return value, then compare references to
        # arrays instead of scalars
        unless ( @returns > 1 ) {
            $got = shift @returns;
        }
        else { $got = \@returns }

        my $expected = $attributes{$attribute};

        if ( defined $got && defined $expected ) {
            is_deeply( $got, $expected, "Get $attribute" );
        }
        else { is_deeply( $got, $expected, "Get $attribute" ) }
    }

    # Testing next_hap at beginning of run
    my @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
    my @data_expected;
    if ( !defined($n_sites) ) {
        @data_expected = qw(1111111);
    }
    else {
        @data_expected =
          qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap at beginning of run" );

    # Testing next_hap after beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
    if ( !defined($n_sites) ) {
        @data_expected = qw(5555555);
    }
    else {
        @data_expected =
          qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap after beginning of run" );

    # Surprise test! testing msout::outgroup
    my $outgroup = $msout->outgroup;
    is( $outgroup, 1, "Testing msout::outgroup" );

    # Testing next_pop after beginning of pop
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
    if ( !defined($n_sites) ) {
        @data_expected = qw(4444444);
    }
    else {
        @data_expected =
          qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_pop after beginning of pop" );

    # Testing next_pop at beginning of pop
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
    if ( !defined($n_sites) ) {
        @data_expected = qw(4444444 5555555);
    }
    else {
        @data_expected =
          qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_pop at beginning of pop" );

    # Testing next_run after beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
    if ( !defined($n_sites) ) {
        @data_expected = qw(4444444);
    }
    else {
        @data_expected =
          qw(4000000000000000000000004000004000400000000000000000000000000000000400000004000000004000000000000000);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_run after beginning of run" );

    # Testing next_pop at beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
    if ( !defined($n_sites) ) {
        @data_expected = qw(5555555 5555555 5555555);
    }
    else {
        @data_expected =
          qw(5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000 5000000000000000000000005000005000500000000000000000000000000000000500000005000000005000000000000000);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_pop at beginning of run" );

    # Testing next_hap after pop
    @data_got      = $msout->get_next_hap;
    @data_expected = qw(1010101);
    is_deeply( \@data_got, \@data_expected, "Get next_hap after pop" );

    # Testing next_run after pop and hap
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
    if ( !defined($n_sites) ) {
        @data_expected = qw(1111111 1515151);
    }
    else {
        @data_expected =
          qw(1000000000000000000000001000001000100000000000000000000000000000000100000001000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
    }
    is_deeply( \@data_got, \@data_expected, "Get next_run after pop and hap" );

    # Testing next_run at beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
    if ( !defined($n_sites) ) {
        @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
    }
    else {
        @data_expected =
          qw(1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000004000001000400000000000000000000000000000000100000004000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000 1000000000000000000000005000001000500000000000000000000000000000000100000005000000001000000000000000);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_run at beginning of run" );

    is( $msout->get_next_run_num, undef, 'have all lines been read?' );
}

sub test_file_2 {
##############################################################################
## Test file 2
##############################################################################

    my $gzip    = shift;
    my $infile  = shift;
    my $n_sites = shift;
    $infile = Bio::Root::Test::test_input_file($infile);

    # the files are now part of the git repo and don't have to be printed
    #    print_file2( $infile, $gzip );

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gzip -dc <$file_sequence |";
    }

    my $msout = Bio::SeqIO->new(
        -file    => "$file_sequence",
        -format  => 'msout',
        -n_sites => $n_sites,
    );

    isa_ok( $msout, 'Bio::SeqIO::msout' );

    my %attributes = (
        RUNS                 => 3,
        SEGSITES             => 7,
        N_SITES              => $n_sites,
        SEEDS                => [qw(1 1 1)],
        MS_INFO_LINE         => 'ms 6 3',
        TOT_RUN_HAPS         => 6,
        POPS                 => 6,
        NEXT_RUN_NUM         => 1,
        LAST_READ_HAP_NUM    => 0,
        POSITIONS            => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
        CURRENT_RUN_SEGSITES => 7
    );

    foreach my $attribute ( keys %attributes ) {
        my $func = lc($attribute);

        if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
            $func = ucfirst($func);
        }

        $func = 'get_' . $func;
        my @returns = $msout->$func();
        my ( $return, $got );

        # If there were more than one return value, then compare references to
        # arrays instead of scalars
        unless ( @returns > 1 ) {
            $got = shift @returns;
        }
        else { $got = \@returns }

        my $expected = $attributes{$attribute};

        if ( defined $got && defined $expected ) {
            is_deeply( $got, $expected, "Get $attribute" );
        }
        else { is_deeply( $got, $expected, "Get $attribute" ) }
    }

    my $rh_base_conversion_table = $msout->get_base_conversion_table;

    # Testing next_hap at beginning of run
    my @data_got      = $msout->get_next_hap;
    my @data_expected = '1111111';
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap at beginning of run" );

    # Testing next_hap after beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
    if ( !defined($n_sites) ) {
        @data_expected = '5555555';
    }
    else {
        @data_expected = '5555055500';
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap after beginning of run" );

    # Surprise test! testing msout::outgroup
    my $outgroup = $msout->outgroup;
    is( $outgroup, 0, "Testing msout::outgroup" );

    # Testing next_pop after beginning of pop
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
    if ( !defined($n_sites) ) {
        @data_expected = qw(4444444 4444444 5555555 4444444);
    }
    else {
        @data_expected = qw(4444044400 4444044400 5555055500 4444044400);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_pop after beginning of pop" );

    # Testing next_pop at beginning of pop/run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
    if ( !defined($n_sites) ) {
        @data_expected = qw(5555555 5555555 5555555 1010101 1111111 1515151);
    }
    else {
        @data_expected =
          qw(5555055500 5555055500 5555055500 1010010100 1111011100 1515015100);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_pop at beginning of pop/run" );

    # Testing next_run at beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
    if ( !defined($n_sites) ) {
        @data_expected = qw(1414141 1414141 1515151 1414141 1515151 1515151);
    }
    else {
        @data_expected =
          qw(1414014100 1414014100 1515015100 1414014100 1515015100 1515015100);
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_run at beginning of run" );

    # Testing next_hap at beginning of run 2
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_seq );
    if ( !defined($n_sites) ) {
        @data_expected = '1515151';
    }
    else {
        @data_expected = '1515015100';
    }
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap at beginning of run 2" );

    # Testing next_run after hap
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_run );
    if ( !defined($n_sites) ) {
        @data_expected = qw(5050505 5151515 5555555 5454545 5454545);
    }
    else {
        @data_expected =
          qw(5050050500 5151051500 5555055500 5454054500 5454054500);
    }
    is_deeply( \@data_got, \@data_expected, "Get next_run after hap" );

    is( $msout->get_next_run_num, 5, 'next run should be 5.' );

    # getting the last hap of the file via next hap
    # Testing next_run after hap
    @data_got      = $msout->get_next_hap;
    @data_expected = qw( 5555555 );
    is_deeply( \@data_got, \@data_expected, "Get last hap through next_hap" );

}

sub test_file_3 {
##############################################################################
## Test file 3
##############################################################################

    my $gzip    = shift;
    my $infile  = shift;
    $infile = Bio::Root::Test::test_input_file($infile);

    # the files are now part of the git repo and don't have to be printed
    #    print_file3( $infile, $gzip );

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gzip -dc <$file_sequence |";
    }
    my $msout = Bio::SeqIO->new(
        -file   => "$file_sequence",
        -format => 'msout',
    );

    isa_ok( $msout, 'Bio::SeqIO::msout' );

    my $rh_base_conversion_table = $msout->get_base_conversion_table;

    my %attributes = (
        RUNS                 => 1,
        SEGSITES             => 7,
        SEEDS                => [qw(1 1 1)],
        MS_INFO_LINE         => 'ms 3 1',
        TOT_RUN_HAPS         => 3,
        POPS                 => 3,
        NEXT_RUN_NUM         => 1,
        LAST_READ_HAP_NUM    => 0,
        POSITIONS            => [qw(0.01 0.25 0.31 0.35 0.68 0.76 0.85)],
        CURRENT_RUN_SEGSITES => 7
    );

    foreach my $attribute ( keys %attributes ) {
        my $func = lc($attribute);

        if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
            $func = ucfirst($func);
        }

        $func = 'get_' . $func;
        my @returns = $msout->$func();
        my ( $return, $got );

        # If there were more than one return value, then compare references to
        # arrays instead of scalars
        unless ( @returns > 1 ) {
            $got = shift @returns;
        }
        else { $got = \@returns }

        my $expected = $attributes{$attribute};

        if ( defined $got && defined $expected ) {
            is_deeply( $got, $expected, "Get $attribute" );
        }
        else { is_deeply( $got, $expected, "Get $attribute" ) }
    }

    # Testing next_hap at beginning of run
    my @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $msout->get_next_pop );
    my @data_expected = qw(1111111 5555555 4444444);
    is_deeply( \@data_got, \@data_expected, "Get next_pop at end of run" );

    is( $msout->get_next_run_num, undef, 'have all lines been read?' );

    # Testing what happens when we read from empty stream
    @data_got      = $msout->get_next_pop;
    @data_expected = ();
    is_deeply( \@data_got, \@data_expected, "Get next_pop at eof" );

    # Testing what happens when we read from empty stream
    @data_got      = $msout->get_next_run;
    @data_expected = ();
    is_deeply( \@data_got, \@data_expected, "Get next_run at eof" );

    # Testing what happens when we read from empty stream
    @data_got      = $msout->get_next_hap;
    @data_expected = undef;
    is_deeply( \@data_got, \@data_expected, "Get next_hap at eof" );

    # Testing what happens when we read from empty stream
    @data_got      = $msout->get_next_seq;
    @data_expected = ();
    is_deeply( \@data_got, \@data_expected, "Get next_seq at eof" );

}

sub print_to_file {
    my ( $ra_in, $out ) = @_;
    unless ( open OUT, ">$out" ) {
        die "\nCould not open outfile $out!!\n\n";
    }
    print OUT ("@$ra_in");
    close OUT;
}

sub convert_bases_to_nums {

    my ( $rh_base_conversion_table, @seqs ) = @_;

    my @out_seqstrings;
    foreach my $seq (@seqs) {
        my $seqstring = $seq->seq;
        foreach my $base ( keys %{$rh_base_conversion_table} ) {
            $seqstring =~ s/($base)/$rh_base_conversion_table->{$base}/g;
        }
        push @out_seqstrings, $seqstring;
    }

    return @out_seqstrings;

}

sub bad_test_file_1 {
##############################################################################
## Bad Test file 1
##############################################################################

    # This sub tests to see if msout.pm will catch if the msinfo line's
    # advertized haps are less than are actually in the file

    my $gzip    = shift;
    my $infile  = shift;
    my $n_sites = shift;
    $infile = test_input_file($infile);

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gunzip -c <$file_sequence |";
    }
    my $msout = Bio::SeqIO->new(
        -file    => "$file_sequence",
        -format  => 'msout',
        -n_sites => $n_sites,
    );

    isa_ok( $msout, 'Bio::SeqIO::msout' );

    throws_ok { $msout->get_next_run }
qr/msout file has only 2 hap\(s\), which is less than indicated in msinfo line \( 9 \)/,
      q(Caught error in bad msout file 1);

}

sub bad_test_file_2 {
##############################################################################
## Bad Test file 2
##############################################################################

    # This sub tests to see if msout.pm will catch if the msinfo line's
    # advertized haps are more than are actually in the file

    my $gzip    = shift;
    my $infile  = shift;
    my $n_sites = shift;
    $infile = test_input_file($infile);

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gunzip -c <$file_sequence |";
    }
    my $msout = Bio::SeqIO->new(
        -file    => "$file_sequence",
        -format  => 'msout',
        -n_sites => $n_sites,
    );

    isa_ok( $msout, 'Bio::SeqIO::msout' );

    throws_ok { $msout->get_next_run }
qr/\'\/\/\' not encountered when expected. There are more haplos in one of the msOUT runs than advertised in the msinfo line/,
      q(Caught error in bad msout file 2);

}

sub bad_n_sites {
##############################################################################
## Bad n_sites
##############################################################################

    # this sub tests if msout.pm dies when n_sites is smaller than segsites
    my $gzip    = shift;
    my $infile  = shift;
    $infile = Bio::Root::Test::test_input_file($infile);

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gzip -dc <$file_sequence |";
    }
    my $msout = Bio::SeqIO->new(
            -file    => "$file_sequence",
            -format  => 'msout',
    );

    # test nsites -1
    throws_ok { $msout->set_n_sites(-1) } qr|first argument needs to be a positive integer. argument supplied: -1|;

    # test nsites smaller than next hap
    $msout->set_n_sites(1);
    throws_ok{$msout->get_next_seq} qr/n_sites needs to be at least the number of segsites of every run/, 'too few n_sites failed OK';
    
}