bin/aln2aln.pl - metacpan.org

#!/usr/bin/env perl

use warnings;
use strict;

use Getopt::Long;
use Pod::Usage;
use Bio::AlignIO;
use Data::Dumper;

my $help;
my $informat  = 'fasta';
my $outformat = 'clustalw';
my $id;
my $shorten_id_from_end;
GetOptions(
    'id=s'                  => \$id,
    'to=s'                  => \$outformat,
    'from=s'                => \$informat,
    'help'                  => \$help,
    'shorten-id-from-end|s' => \$shorten_id_from_end,
) or die "there is a problems with parsing the options. $!";

pod2usage( -verbose => 2, -noperldoc => 1 ) if $help;

#get filenames from arguments
my ( $in_file, $out_file ) = @ARGV;
#die if input file doesn't exist
die ">>$in_file<< is no file"
    unless ( -f $in_file );
#die if output file is not vaild
die ">>$out_file<< is not a valid output file name"
    unless ( $out_file && $out_file ne '' );

#open input file
my $aln_in_fh = Bio::AlignIO->new(
    -format => $informat,
    -file   => $in_file
);

#open output file
my $aln_out_fh = Bio::AlignIO->new(
    -format => $outformat,
    -file   => '>' . $out_file
);

no warnings 'redefine';

sub Bio::SimpleAlign::set_displayname_flat {
    my $self = shift;
    my ( $nse, $seq );

    foreach $seq ( $self->each_seq() ) {
        $nse = $seq->get_nse();
        $self->displayname( $nse, substr( $seq->id, -10 ) );
    }
    return 1;
}

#write from input to output
while ( my $align_object = $aln_in_fh->next_aln ) {

    $align_object->id($id)
        if ($id);
    if ($shorten_id_from_end) {
        $align_object->set_displayname_flat;
        foreach my $seq ( $align_object->each_seq ) {
            if ( length( $seq->display_id ) > 10 ) {
                my $nse = $seq->get_nse();
            }
        }
    }
    $aln_out_fh->write_aln($align_object);
}

__END__

=head1 NAME

aln2aln - convert different alignment formats to another alignment format

=head1 SYNOPSIS

    perl aln2aln.pl [OPTIONS] [--help] <input_alignment_file> <file_in_other_format>

=head1 DESCRIPTION

Takes <input_alignment_file> and converts it to <file_in_other_format>
On default, it assumes 'fasta' as input format, but you can
use the --from option to make sure that aln2aln uses a different
format. Same applies to --to

Some formats have a limitation on identifier length (>30 characters or so), so
check this first, if strange problems occur.
    
=head1 OPTIONS

=over 4

=item B<< --shorten-id-from-end >>

If the sequence ids from the input alignment are too long, take the last 10 characters of the id.

=item B<--from <FORMAT>> and B<--to <FORMAT>>

Assume one of the following alignment input formats:

FORMATS:

    fasta       FASTA format
    pfam        pfam format
    selex       selex (hmmer) format
    stockholm   stockholm format
    prodom      prodom (protein domain) format
    msf         msf (GCG) format
    mase        mase (seaview) format
    bl2seq      Bl2seq Blast output
    nexus       Swofford et al NEXUS format
    pfam        Pfam sequence alignment format
    phylip      Felsenstein's PHYLIP format
    clustalw    ClustalW format

=back

=head1 SEE ALSO

L<Bio::AlignIO>

=head1 AUTHOR

jw bargsten, C<< <joachim.bargsten at wur.nl> >>

=cut

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)