The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#
# BioPerl module for Bio::AlignIO::xmfa
#
# Copyright Chris Fields
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code

=head1 NAME

Bio::AlignIO::xmfa - XMFA MSA Sequence input/output stream

=head1 SYNOPSIS

Do not use this module directly.  Use it via the L<Bio::AlignIO> 
class.

=head1 DESCRIPTION

This object can transform L<Bio::SimpleAlign> objects from
XMFA flat file databases.  For more information, see:

  http://asap.ahabs.wisc.edu/mauve-aligner/mauve-user-guide/mauve-output-file-formats.html

This module is based on the AlignIO::fasta parser written by
Peter Schattner

=head1 TODO

Finish write_aln(), clean up code, allow LargeLocatableSeq (ie for
very large sequences a'la Mauve)

=head1 FEEDBACK

=head2 Support 

Please direct usage questions or support issues to the mailing list:

I<bioperl-l@bioperl.org>

rather than to the module maintainer directly. Many experienced and 
reponsive experts will be able look at the problem and quickly 
address it. Please include a thorough description of the problem 
with code and data examples if at all possible.

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.  Bug reports can be submitted via the
web:

  https://redmine.open-bio.org/projects/bioperl/

=head1 AUTHORS

Chris Fields

=head1 APPENDIX

The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _

=cut

# Let the code begin...

package Bio::AlignIO::xmfa;
use strict;

use base qw(Bio::AlignIO);
our $WIDTH = 60;

=head2 next_aln

 Title   : next_aln
 Usage   : $aln = $stream->next_aln
 Function: returns the next alignment in the stream.
 Returns : Bio::Align::AlignI object - returns 0 on end of file
            or on error
 Args    : -width => optional argument to specify the width sequence
           will be written (60 chars by default)

See L<Bio::Align::AlignI>

=cut

sub next_aln {
    my $self = shift;
    my ($width) = $self->_rearrange([qw(WIDTH)],@_);
    $self->width($width || $WIDTH);

    my ($name, $tempname, $seqchar);
    my $aln = Bio::SimpleAlign->new();
    my $seqs = 0;
    # alignments
    while (defined (my $entry = $self->_readline) ) {
        chomp $entry;
        if ( index($entry, '=') == 0 ) {
            if (defined $name && $seqchar) {
                my $seq = $self->_process_seq($name, $seqchar);
                $aln->add_seq($seq);
            }
            if ($aln && $entry =~ m{score\s*=\s*(\d+)}) {
                $aln->score($1);
            }
            $seqchar = '';
            undef $name;
            last;
        } elsif ( $entry =~ m{^>.+$}xms) {
            if ( defined $name ) {
                my $seq = $self->_process_seq($name, $seqchar);
                $aln->add_seq($seq);
            }
            $seqchar = '';
            $name = $entry;
        } else {
            $seqchar .= $entry;
        }
    }
    
    # this catches last sequence if '=' is not present (Mauve)
    if ( defined $name ) {
        my $seq = $self->_process_seq($name, $seqchar);
        $aln->add_seq($seq);
    }
    $aln->num_sequences ? return $aln : return;
}

=head2 write_aln

 Title   : write_aln
 Usage   : $stream->write_aln(@aln)
 Function: writes the $aln object into the stream in xmfa format
 Returns : 1 for success and 0 for error
 Args    : L<Bio::Align::AlignI> object

See L<Bio::Align::AlignI>

=cut

sub write_aln {
    my ($self,@aln) = @_;
    my $width = $self->width;
    my ($seq,$desc,$rseq,$name,$count,$length,$seqsub,$start,$end,$strand,$id);

    foreach my $aln (@aln) {
        if( ! $aln || ! $aln->isa('Bio::Align::AlignI')  ) { 
            $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
            next;
        }
        #if( $self->force_displayname_flat ) {
        #    $aln->set_displayname_flat(1);
        #}
        my $seqct = 1;
        foreach $rseq ( $aln->each_seq() ) {
            ($start, $end, $strand, $id) = ($rseq->start, $rseq->end, $rseq->strand || 0,
                                            $rseq->display_id);
            $strand = ($strand == 1)  ? '+' :
                      ($strand == -1) ? '-' :
                      '';
            $name = sprintf("%d:%d-%d %s %s",$seqct,$start,$end,$strand,$id);
            $seq  = $rseq->seq();
            $desc = $rseq->description || '';
            $self->_print (">$name $desc\n") or return ;	
            $count = 0;
            $length = length($seq);
            if(defined $seq && $length > 0) {
            $seq =~ s/(.{1,$width})/$1\n/g;
            } else {
            $seq = "\n";
            }
            $self->_print($seq) || return 0;
            $seqct++;
        }
        my $alndesc = '';
        $alndesc = "score = ".$aln->score if ($aln->score);
        $self->_print("= $alndesc\n") || return 0;
        
    }
    $self->flush if $self->_flush_on_write && defined $self->_fh;
    return 1;
}

=head2 _get_len

 Title   : _get_len
 Usage   : 
 Function: determine number of alphabetic chars
 Returns : integer
 Args    : sequence string

=cut

sub _get_len {
    my ($self,$seq) = @_;
    $seq =~ s/[^A-Z]//gi;
    return CORE::length($seq);
}

=head2 width

 Title   : width
 Usage   : $obj->width($newwidth)
           $width = $obj->width;
 Function: Get/set width of alignment
 Returns : integer value of width 
 Args    : on set, new value (a scalar or undef, optional)


=cut

sub width{
    my $self = shift;

    return $self->{'_width'} = shift if @_;
    return $self->{'_width'} || $WIDTH;
}

####### PRIVATE #######

sub _process_seq {
    my ($self, $entry, $seq) = @_;
    my ($start, $end, $strand, $seqname, $desc, $all);
    # put away last name and sequence
    if ( $entry =~ m{^>\s*\d+:(\d+)-(\d+)\s([+-]{1})(?:\s+(\S+)\s*(\S\.*)?)?} ) {
        ($start, $end, $seqname, $desc) = ($1, $2, $4, $5);
        $strand = ($3 eq '+')  ?  1  : -1;
    } else {
        $self->throw("Line does not comform to XMFA format:\n$entry");
    }
    my $seqobj = Bio::LocatableSeq->new(
					-nowarnonempty => 1,
					-strand      => $strand,
					-seq         => $seq,
					-display_id  => $seqname,
					-description => $desc || $all,
					-start       => $start,
					-end         => $end,
					-alphabet    => $self->alphabet,
					);
    $self->debug("Reading $seqname\n");
    return $seqobj;
}

1;