The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl

use warnings;
use strict;

use Carp;
use Pod::Usage;
use Getopt::Long;

use Bio::SeqFeature::Generic;
use Bio::FeatureIO;
use Bio::SeqFeature::Annotated;
use Bio::FeatureHolderI;
use Bio::SeqFeatureI;
use Bio::Annotation::OntologyTerm;

my %a; # commandline %a rguments

GetOptions( \%a, 'help|h|?', 'switch-query-reference|switch|s' ) or pod2usage( -verbose => 2 );

pod2usage( -verbose => 2 ) if ( $a{help} );

my ( $coords_file, $gff_file ) = @ARGV;
pod2usage(
    -msg       => "ERROR: Coordinate file not supplied or non-existent\n",
    -verbose   => 1,
    -noperldoc => 1
) unless ( $coords_file && -f $coords_file );

my %gff_opt = (
    -format  => 'gff',
    -version => 3,
    #-validate_terms => 1,
);

if ( $gff_file && $gff_file ne '-' ) {
    # real file
    $gff_opt{'-file'} = ">$gff_file";
}
else {
    # no file/stdout
    $gff_opt{'-fh'} = \*STDOUT;
}

my $feat_out = Bio::FeatureIO->new(%gff_opt);

open my $coords_fh, '<', $coords_file or confess "Can't open filehandle: $!";

#fast forward to NUCMER tag
while ( <$coords_fh> !~ /^NUCMER/ ) { }

my @header;
my $i = 0;
while ( my $c = <$coords_fh> ) {
        $c =~ s/\r\n/\n/;
    chomp $c;

    #skip empty lines
    next if ( $c =~ /^\s*$/ );

    #we found the header line
    if ( $c =~ /^\[/ ) {
        $c =~ y/[]//d;
        @header = split /\t/, $c;
        next;
    }

    #skip lines that don't start with coordinates(numbers)
    next unless ( $c =~ /^\d/ );

    #split line into columns
    my @contents = split /\t/, $c;

    if ( $a{'switch-query-reference'} ) {
        #FIXME quick and dirty, reorder fields, so that seq1 => seq2 and seq2 => seq1
        @contents = @contents[ 2, 3, 0, 1, 5, 4, 6, 8, 7 ];
    }

    my %coords;
    #figure out correct orientation
    if ( $contents[0] > $contents[1] && $contents[2] > $contents[3] ) {
        %coords = ( r_range => [ 1, 0 ], strand => 1, q_range => [ 3, 2 ] );
    }
    elsif ( $contents[0] < $contents[1] && $contents[2] > $contents[3] ) {
        %coords = ( r_range => [ 0, 1 ], strand => -1, q_range => [ 3, 2 ] );
    }
    elsif ( $contents[0] < $contents[1] && $contents[2] < $contents[3] ) {
        %coords = ( r_range => [ 0, 1 ], strand => 1, q_range => [ 2, 3 ] );
    }
    elsif ( $contents[0] > $contents[1] && $contents[2] < $contents[3] ) {
        %coords = ( r_range => [ 1, 0 ], strand => -1, q_range => [ 2, 3 ] );
    }
    else {
        die "error in coordinate parsing";
    }

    my $b = create_feat( \@contents, \%coords );
    $b->add_Annotation( Bio::Annotation::SimpleValue->new( -tagname => 'ID', -value => sprintf( "align%09d", $i++ ) ) );

    $feat_out->write_feature($b);
}

sub create_feat {
    my ( $c, $ranges ) = @_;

    my $qid   = $c->[8];
    my $rid   = $c->[7];
    my $score = $c->[6];

    my $strand = $ranges->{'strand'};

    my ( $qstart, $qend ) = @{$c}[ @{ $ranges->{'q_range'} } ];
    my ( $rstart, $rend ) = @{$c}[ @{ $ranges->{'r_range'} } ];

    my %orientation = (
        -score  => $score,
        -source => 'coords2gff',
        -seq_id => $rid,
        -type   => Bio::Annotation::OntologyTerm->new( -identifier => 'SO:0000343', -name => 'match' ),
    );
    my $b = Bio::SeqFeature::Annotated->new(
        %orientation,
        -start  => $rstart,
        -end    => $rend,
        -strand => $strand,
    );
    $b->add_Annotation( 'Target',
        Bio::Annotation::Target->new( -target_id => $qid, -start => $qstart, -end => $qend ) );
    return $b;
}
__END__

=head1 NAME

coords2gff.pl - convert mummer coords files to gff3 format

=head1 SYNOPSIS
    
    # store results in OUTPUT.gff3
    perl coords2gff.pl [OPTIONS] <MUMMER_ALIGNMENT.coords> <OUTPUT.gff3>

    # print results to standard output
    perl coords2gff.pl [OPTIONS] <MUMMER_ALIGNMENT.coords>
    # or
    perl coords2gff.pl [OPTIONS] <MUMMER_ALIGNMENT.coords> -

=head1 DESCRIPTION

This script converts mummer output in coords files to gff3 formatted output.

=head1 OPTIONS

Alternative option names are separated by "|".

=over 4

=item B<< --switch-query-reference|switch|s >>

Take the query (2nd sequence in the alignment) as reference and
the reference (1st sequence in the alignment) as query. Adjust coordinates
accordingly.

=item B<< --help|h|? >>

Complete help.

=back

=cut
 

=head1 SEE ALSO

L<Bio::FeatureIO>, L<Bio::SeqFeature::Generic>

=head1 AUTHOR

jw bargsten, C<< <joachim.bargsten at wur.nl> >>

=cut