The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
use version;
our $API_VERSION = qv('1.1.3');

use strict;
use File::Path qw(mkpath rmtree);
use Carp;

BEGIN {
    use lib '.';    # for core package test scripts only
    use Bio::Root::Test;

    test_begin(
        -tests               => 74,
        -requires_modules    => [q(Bio::SeqIO::mbsout)],
        -requires_networking => 0
    );

    use_ok('Bio::SeqIO::mbsout');
}

# skip tests if the msout.pm module is too old.
cmp_ok( $Bio::SeqIO::mbsout::API_VERSION,
    '>=', qv('1.1.3'), "Bio::SeqIO::mbsout is at least api version 1.1.3" );

create_dir("mbsout");
test_file_1( 0, "mbsout/mbsout_infile1" );
test_file_2( 0, "mbsout/mbsout_infile2" );
test_file_3( 0, "mbsout/mbsout_infile3" );

sub create_dir {

    my $dir = shift;

    $dir = test_input_file($dir);

    unless ( -d $dir ) {
        mkpath($dir);
    }
}

sub test_file_1 {
##############################################################################
## Test file 1
##############################################################################

    my $gzip   = shift;
    my $infile = shift;
    $infile = test_input_file($infile);

    #print_file1( $infile, $gzip );

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gunzip -c <$file_sequence |";
    }
    my $mbsout = Bio::SeqIO->new(
        -file   => "$file_sequence",
        -format => 'mbsout',
    );

    isa_ok( $mbsout, 'Bio::SeqIO::mbsout' );

    my $rh_base_conversion_table = $mbsout->get_base_conversion_table;

    isa_ok( $mbsout, 'Bio::SeqIO::mbsout' );

    my %attributes = (
        RUNS     => 3,
        SEGSITES => 7,
        MBS_INFO_LINE =>
          'command:        mbs 6 -t 0.001 -r 0.00025 -s 5000 2500 -f 3 1 traj ',
        TOT_RUN_HAPS      => 6,
        NEXT_RUN_NUM      => 1,
        LAST_READ_HAP_NUM => 0,
        POSITIONS => [qw(79.1001 80.1001 81.101 82.101 83.10001 84.801 85)],
        CURRENT_RUN_SEGSITES      => 7,
        POP_MUT_PARAM_PER_SITE    => 0.001,
        POP_RECOMB_PARAM_PER_SITE => 0.00025,
        NSITES                    => 5000,
        SELPOS                    => 2500,
        NFILES                    => 3,
        NREPS                     => 1,
        TRAJ_FILENAME             => 'traj'
    );

    foreach my $attribute ( keys %attributes ) {
        my $func = lc($attribute);

        if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
            $func = ucfirst($func);
        }

        $func = 'get_' . $func;
        my @returns = $mbsout->$func();
        my ( $return, $got );

        # If there were more than one return value, then compare references to
        # arrays instead of scalars
        unless ( @returns > 1 ) {
            $got = shift @returns;
        }
        else { $got = \@returns }

        my $expected = $attributes{$attribute};

        if ( defined $got && defined $expected ) {
            is_deeply( $got, $expected, "Get $attribute" );
        }
        else { is_deeply( $got, $expected, "Get $attribute" ) }
    }

    # Testing next_hap at beginning of run
    my @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_seq );
    my @data_expected = qw(1111111);
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap at beginning of run" );

    # Testing next_hap after beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_seq );
    @data_expected = qw(5555555);
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap after beginning of run" );

    # Testing next_pop after beginning of pop
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_seq );
    @data_expected = qw(4444444);
    is_deeply( \@data_got, \@data_expected,
        "Get next_pop after beginning of pop" );

    # Testing next_hap
    @data_got      = $mbsout->get_next_hap;
    @data_expected = qw(4444444);
    is_deeply( \@data_got, \@data_expected, "Get next_hap" );

    # Testing next_hap
    @data_got      = $mbsout->get_next_hap;
    @data_expected = qw(5555555);
    is_deeply( \@data_got, \@data_expected, "Get next_hap" );

    # Testing next_run after beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_run );
    @data_expected = qw(4444444);
    is_deeply( \@data_got, \@data_expected,
        "Get next_run after beginning of run" );

    # Testing next_run at beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_run );
    @data_expected = qw(5555555 5555555 5555555 1010101 1111111 1515151);
    is_deeply( \@data_got, \@data_expected,
        "Get next_run at beginning of run" );

    # Testing next_run at beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_run );
    @data_expected = qw(
      1414141
      1414141
      1515151
      1414141
      1515151
      1515151);
    is_deeply( \@data_got, \@data_expected,
        "Get next_run at beginning of run" );

    is( $mbsout->get_next_run_num, undef, 'have all lines been read?' );
}

sub test_file_2 {
##############################################################################
## Test file 2
##############################################################################

    my $gzip   = shift;
    my $infile = shift;
    $infile = test_input_file($infile);

    #print_file2( $infile, $gzip );

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gunzip -c <$file_sequence |";
    }

    my $mbsout = Bio::SeqIO->new(
        -file   => "$file_sequence",
        -format => 'mbsout',
    );

    isa_ok( $mbsout, 'Bio::SeqIO::mbsout' );

    my %attributes = (
        RUNS     => 5,
        SEGSITES => 7,
        MBS_INFO_LINE =>
          'command:        mbs 6 -t 0.001 -r 0.00025 -s 5000 2500 -f 5 1 traj ',
        TOT_RUN_HAPS      => 6,
        NEXT_RUN_NUM      => 1,
        LAST_READ_HAP_NUM => 0,
        POSITIONS => [qw(79.1001 80.1001 81.101 82.101 83.10001 84.801 85)],
        CURRENT_RUN_SEGSITES      => 7,
        POP_MUT_PARAM_PER_SITE    => 0.001,
        POP_RECOMB_PARAM_PER_SITE => 0.00025,
        NSITES                    => 5000,
        SELPOS                    => 2500,
        NFILES                    => 5,
        NREPS                     => 1,
        TRAJ_FILENAME             => 'traj'
    );

    foreach my $attribute ( keys %attributes ) {
        my $func = lc($attribute);
        if ( $attribute =~ m/POSITIONS/ ) {
            $func = ucfirst($func);
        }
        elsif ( $attribute =~ m/\_file/ ) {
            $func = q(infile);
        }

        $func = 'get_' . $func;
        my @returns = $mbsout->$func();
        my ( $return, $got );

        # If there were more than one return value, then compare references to
        # arrays instead of scalars
        unless ( @returns > 1 ) {
            $got = shift @returns;
        }
        else { $got = \@returns }

        my $expected = $attributes{$attribute};

        if ( defined $got && defined $expected ) {
            is_deeply( $got, $expected, "Get $attribute" );
        }
        else { is_deeply( $got, $expected, "Get $attribute" ) }
    }

    my $rh_base_conversion_table = $mbsout->get_base_conversion_table;

    # Testing next_hap at beginning of run
    my @data_got      = $mbsout->get_next_hap;
    my @data_expected = qw(1111111);
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap at beginning of run" );

    # Testing next_hap after beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_seq );
    @data_expected = '5555555';
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap after beginning of run" );

    # Surprise test! testing mbsout::outgroup
    my $outgroup = $mbsout->outgroup;
    is( $outgroup, 0, "Testing mbsout::outgroup" );

    # Testing next_run after beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_run );
    @data_expected = qw(
      4444444
      4444444
      5555555
      4444444);
    is_deeply( \@data_got, \@data_expected,
        "Get next_run after beginning of run" );

    # Testing next_run after beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_run );
    @data_expected = qw(
      5555555
      5555555
      5555555
      1010101
      1111111
      1515151);
    is_deeply( \@data_got, \@data_expected,
        "Get next_run after beginning of run" );

    # Testing next_run at beginning of run
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_run );
    @data_expected = qw(
      1414141
      1414141
      1515151
      1414141
      1515151
      1515151);
    is_deeply( \@data_got, \@data_expected,
        "Get next_run at beginning of run" );

    # Testing next_hap at beginning of run 2
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_seq );
    @data_expected = '1515151';
    is_deeply( \@data_got, \@data_expected,
        "Get next_hap at beginning of run 2" );

    # Testing next_run after hap
    @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_run );
    @data_expected = qw(
      5050505
      5151515
      5555555
      5454545
      5454545);
    is_deeply( \@data_got, \@data_expected, "Get next_run after hap" );

    is( $mbsout->get_next_run_num, 5, 'next run should be 5.' );

}

sub test_file_3 {
##############################################################################
## Test file 3
##############################################################################

    my $gzip   = shift;
    my $infile = shift;
    $infile = test_input_file($infile);

    #print_file3( $infile, $gzip );

    my $file_sequence = $infile;
    if ($gzip) {
        $file_sequence = "gunzip -c <$file_sequence |";
    }
    my $mbsout = Bio::SeqIO->new(
        -file   => "$file_sequence",
        -format => 'mbsout',
    );

    isa_ok( $mbsout, 'Bio::SeqIO::mbsout' );

    my $rh_base_conversion_table = $mbsout->get_base_conversion_table;

    my %attributes = (
        RUNS     => 1,
        SEGSITES => 7,
        MBS_INFO_LINE =>
          'command:        mbs 3 -t 0.001 -r 0.00025 -s 5000 2500 -f 1 1 traj ',
        TOT_RUN_HAPS      => 3,
        NEXT_RUN_NUM      => 1,
        LAST_READ_HAP_NUM => 0,
        POSITIONS => [qw(79.1001 80.1001 81.101 82.101 83.10001 84.801 85)],
        CURRENT_RUN_SEGSITES      => 7,
        POP_MUT_PARAM_PER_SITE    => 0.001,
        POP_RECOMB_PARAM_PER_SITE => 0.00025,
        NSITES                    => 5000,
        SELPOS                    => 2500,
        NFILES                    => 1,
        NREPS                     => 1,
        TRAJ_FILENAME             => 'traj'
    );

    foreach my $attribute ( keys %attributes ) {
        my $func = lc($attribute);

        if ( $attribute =~ m/POPS|SEEDS|POSITIONS/ ) {
            $func = ucfirst($func);
        }

        $func = 'get_' . $func;
        my @returns = $mbsout->$func();
        my ( $return, $got );

        # If there were more than one return value, then compare references to
        # arrays instead of scalars
        unless ( @returns > 1 ) {
            $got = shift @returns;
        }
        else { $got = \@returns }

        my $expected = $attributes{$attribute};

        if ( defined $got && defined $expected ) {
            is_deeply( $got, $expected, "Get $attribute" );
        }
        else { is_deeply( $got, $expected, "Get $attribute" ) }
    }

    # Testing next_run at beginning of run
    my @data_got =
      convert_bases_to_nums( $rh_base_conversion_table, $mbsout->get_next_run );
    my @data_expected = qw(1111111 5555555 4444444);
    is_deeply( \@data_got, \@data_expected,
        "Get next_run at end/beginning of run" );

    is( $mbsout->get_next_run_num, undef, 'have all lines been read?' );

    # Testing what happens when we read from empty stream
    @data_got      = $mbsout->get_next_run;
    @data_expected = ();
    is_deeply( \@data_got, \@data_expected, "Get next_run at eof" );

    # Testing what happens when we read from empty stream
    @data_got      = $mbsout->get_next_hap;
    @data_expected = undef;
    is_deeply( \@data_got, \@data_expected, "Get next_hap at eof" );

    # Testing what happens when we read from empty stream
    @data_got      = $mbsout->get_next_seq;
    @data_expected = ();
    is_deeply( \@data_got, \@data_expected, "Get next_seq at eof" );

}

sub print_file1 {

    my $destination = shift;
    my $gzip        = shift;

    my $out = <<END
command:        mbs 6 -t 0.001 -r 0.00025 -s 5000 2500 -f 3 1 traj 

//0-1   allele: a a a d d d d 
segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
1111111
5555555
4444444
4444444
5555555
4444444
//1-1   allele: a a a a d d d 
segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
5555555
5555555
5555555
1010101
1111111
1515151
//2-1   allele: a d d d d d d 
segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
1414141
1414141
1515151
1414141
1515151
1515151
END
      ;

    if ($gzip) {
        $gzip = "| gzip";
    }
    else { $gzip = ' '; }
    open OUT, "$gzip >$destination" or croak("Unable to open $destination\n");

    print OUT $out;
    close OUT;
}

sub print_file2 {

    my $destination = shift;
    my $gzip        = shift;

    my $out = <<END
command:        mbs 6 -t 0.001 -r 0.00025 -s 5000 2500 -f 5 1 traj 

//0-1   allele: a a a d d d d 
segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
1111111
5555555
4444444
4444444
5555555
4444444
//1-1   allele: a a a d d d d 
segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
5555555
5555555
5555555
1010101
1111111
1515151
//2-1   allele: a a a d d d d 

segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
1414141
1414141
1515151
1414141
1515151
1515151
//3-1   allele: a a a a a d d 

segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
1515151
5050505
5151515
5555555
5454545
5454545
//4-1   allele: a a d d d d d 

segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
5555555
END
      ;

    if ($gzip) {
        $gzip = "| gzip";
    }
    else { $gzip = ' '; }

    open OUT, "$gzip >$destination" or croak("Unable to open $destination\n");

    print OUT $out;
    close OUT;
}

sub print_file3 {

    my $destination = shift;
    my $gzip        = shift;

    my $out = <<END ;
command:        mbs 3 -t 0.001 -r 0.00025 -s 5000 2500 -f 1 1 traj 

//0-1   allele: a a a d d d d 
segsites: 7
positions: 79.1001 80.1001 81.101 82.101 83.10001 84.801 85
1111111
5555555
4444444
END

    if ($gzip) {
        $gzip = "| gzip";
    }
    else { $gzip = ' '; }

    open OUT, "$gzip >$destination" or croak("Unable to open $destination\n");

    print OUT $out;
    close OUT;
}

sub print_to_file {
    my ( $ra_in, $out ) = @_;
    unless ( open OUT, ">$out" ) {
        croak("\nCould not open outfile $out!!\n\n");
    }
    print OUT ("@$ra_in");
    close OUT;
}

sub convert_bases_to_nums {

    my ( $rh_base_conversion_table, @seqs ) = @_;

    my @out_seqstrings;
    foreach my $seq (@seqs) {
        my $seqstring = $seq->seq;
        foreach my $base ( keys %{$rh_base_conversion_table} ) {
            $seqstring =~ s/($base)/$rh_base_conversion_table->{$base}/g;
        }
        push @out_seqstrings, $seqstring;
    }

    return @out_seqstrings;

}