The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# -*-Perl-*- Test Harness script for Bioperl
# $Id$

use strict;

BEGIN { 
    use lib '.';
    use Bio::Root::Test;
    
    test_begin(-tests => 12);
	
	use_ok('Bio::SeqIO');
	use_ok('Bio::SeqFeature::Tools::Unflattener');
}

my $verbosity = test_debug();

my ($seq, @sfs);
my $unflattener = Bio::SeqFeature::Tools::Unflattener->new;
$unflattener->verbose($verbosity);

if (1) {
    
    # this is an arabidopsise gbk record. it has no mRNA features.
    # it has explicit exon/intron records

    my @path = ("ATF14F8.gbk");
    $seq = getseq(@path);
    
    is ($seq->accession_number, 'AL391144');
    my @topsfs = $seq->get_SeqFeatures;
    my @cdss = grep {$_->primary_tag eq 'CDS'} @topsfs;
    my $n = scalar(@topsfs);
    if( $verbosity > 0 ) {
	warn sprintf "TOP:%d\n", scalar(@topsfs);
	write_hier(@topsfs);
    }
    # UNFLATTEN
    @sfs = $unflattener->unflatten_seq(-seq=>$seq,
				       -use_magic=>1,
				      );
    @sfs = $seq->get_SeqFeatures;
    if( $verbosity > 0 ) {
	warn "\n\nPOST PROCESSING:\n";
	write_hier(@sfs);
	warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
    }
    is(@sfs,28);
    my @allsfs = $seq->get_all_SeqFeatures;
    is(@allsfs,202);
    my @mrnas = grep {$_->primary_tag eq 'mRNA'} @allsfs;
    if( $verbosity > 0 ) {
	warn sprintf "ALL:%d\n", scalar(@allsfs);
	warn sprintf "mRNAs:%d\n", scalar(@mrnas);
    }

    # relationship between mRNA and CDS should be one-one
    is(@mrnas,@cdss);
}

if (1) {
    
    # this is a record from FlyBase
    # it has mRNA features, and explicit exon/intron records

    my @path = ("AnnIX-v003.gbk");
    $seq = getseq(@path);
    
    my @topsfs = $seq->get_SeqFeatures;
    if( $verbosity > 0 ) {
	warn sprintf "TOP:%d\n", scalar(@topsfs);
	write_hier(@topsfs);
    }
    # UNFLATTEN
    @sfs = $unflattener->unflatten_seq(-seq=>$seq,
				       -use_magic=>1,
				      );
    @sfs = $seq->get_SeqFeatures;
    if( $verbosity > 0 ) {
	warn "\n\nPOST PROCESSING:\n";
	write_hier(@sfs);
	warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
    }
    is scalar(@sfs), 1;
    my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
    is scalar(@exons), 6;    # total number of exons per splice
    my %numberh = map {$_->get_tag_values("number") => 1} @exons;
    my @numbers = keys %numberh;
    if( $verbosity > 0 ) {
	warn sprintf "DISTINCT EXONS: %d [@numbers]\n", scalar(@numbers);
    }
    is scalar(@numbers), 6;  # distinct exons
}

if (1) {
    
    # example of a BAD genbank entry

    my @path = ("dmel_2Lchunk.gb");
    $seq = getseq(@path);
    
    my @topsfs = $seq->get_SeqFeatures;
    if( $verbosity > 0 ) {
	warn sprintf "TOP:%d\n", scalar(@topsfs);
	write_hier(@topsfs);
    }
    # UNFLATTEN
    #
    # we EXPECT problems with this erroneous record
    $unflattener->error_threshold(2);
    @sfs = $unflattener->unflatten_seq(-seq=>$seq,
                                       -use_magic=>1,
                                      );
    my @probs = $unflattener->get_problems;
    $unflattener->report_problems(\*STDERR) if $verbosity > 0;
    $unflattener->clear_problems;
    @sfs = $seq->get_SeqFeatures;
    if( $verbosity > 0 ) {
	warn "\n\nPOST PROCESSING:\n";
	write_hier(@sfs);
	warn sprintf "PROCESSED/TOP:%d\n", scalar(@sfs);
    }
    is scalar(@sfs), 2;
    my @exons = grep {$_->primary_tag eq 'exon'} $seq->get_all_SeqFeatures;
    is scalar(@exons), 2;    # total number of exons per splice
    if( $verbosity > 0 ) {
	warn sprintf "PROBLEMS ENCOUNTERED: %d (EXPECTED: 6)\n", scalar(@probs);
    }
    is scalar(@probs), 6;
}


sub write_hier {
    my @sfs = @_;
    _write_hier(0, @sfs);
}

sub _write_hier {
    my $indent = shift;
    my @sfs = @_;
    foreach my $sf (@sfs) {
        my $label = '?';
        if ($sf->has_tag('gene')) {
            ($label) = $sf->get_tag_values('gene');
        }
        if ($sf->has_tag('product')) {
            ($label) = $sf->get_tag_values('product');
        }
        if ($sf->has_tag('number')) {
            $label = join("; ", $sf->get_tag_values('number'));
        }
        printf "%s%s $label\n", '  ' x $indent, $sf->primary_tag;
        my @sub_sfs = $sf->sub_SeqFeature;
        _write_hier($indent+1, @sub_sfs);
    }
}

sub getseq {
    my @path = @_;
    my $seqio =
      Bio::SeqIO->new('-file'=> test_input_file(@path), 
                      '-format' => 'GenBank');
    $seqio->verbose($verbosity);

    my $seq = $seqio->next_seq();
    return $seq;
}