The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# $Id: Config.pm kortsch $
#
# BioPerl module for Bio::Tools::Run::Bowtie::Config
#
# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Cared for by Dan Kortschak <dan.kortschak@adelaide.edu.au>
#
# Copyright Dan Kortschak and Mark A. Jensen
#
# You may distribute this module under the same terms as perl itself

# POD documentation - main docs before the code

=head1 NAME

Bio::Tools::Run::Bowtie::Config - Configuration data for bowtie commands

=head1 SYNOPSIS

Used internally by L<Bio::Tools::Run::Bowtie>.

=head1 DESCRIPTION

This package exports information describing bowtie commands, parameters,
switches, and input and output filetypes for individual bowtie commands.

=head1 FEEDBACK

=head2 Mailing Lists

User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list.  Your participation is much appreciated.

bioperl-l@bioperl.org                  - General discussion
http://bioperl.org/wiki/Mailing_lists  - About the mailing lists

=head2 Support

Please direct usage questions or support issues to the mailing list:

L<bioperl-l@bioperl.org>

rather than to the module maintainer directly. Many experienced and
reponsive experts will be able look at the problem and quickly
address it. Please include a thorough description of the problem
with code and data examples if at all possible.

=head2 Reporting Bugs

Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
the web:

  http://redmine.open-bio.org/projects/bioperl/

=head1 AUTHOR - Dan Kortschak

Email dan.kortschak adelaide.edu.au

Describe contact details here

=head1 CONTRIBUTORS

Additional contributors names and emails here

=head1 APPENDIX

The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _

=cut

# Let the code begin...


package Bio::Tools::Run::Bowtie::Config;
use strict;
use warnings;
no warnings qw(qw);
use Bio::Root::Root;
use Exporter;
use base qw(Bio::Root::Root);

our (@ISA, @EXPORT, @EXPORT_OK);
push @ISA, 'Exporter';
@EXPORT = qw(
             @program_commands
             %command_executables
             %format_lookup
             %command_prefixes
             %composite_commands
             @program_params
             @program_switches
             %incompat_params
             %corequisite_switches
             %param_translation
             %command_files
             %accepted_types
            );

@EXPORT_OK = qw();



our @program_commands = qw(
    single
    paired
    crossbow
    build
    inspect
);


our %command_executables = (
    'single'     => 'bowtie',
    'paired'     => 'bowtie',
    'crossbow'   => 'bowtie',
    'build'      => 'bowtie-build',
    'inspect'    => 'bowtie-inspect'
    );

# These should be in clobbering order - more delicate formats first
our %format_lookup = (
    'sam_format'       => 'sam',
    'refidx'           => 'bowtie',
    'concise'          => undef,
    'suppress_columns' => undef,
    'refout'           => undef
    );


# composite commands: pseudo-commands that run a 
# sequence of commands
# composite command prefix => list of prefixes of commands this
#  composite command runs
#

our %composite_commands = (
    );

# prefixes only for commands that take params/switches...
our %command_prefixes = (
    'single'     => 'one',
    'paired'     => 'par',
    'crossbow'   => 'crb',
    'build'      => 'bld',
    'inspect'    => 'ins'
    );

our @program_params = qw(
    command
    one|qualities
    one|skip
    one|upto
    one|trim5
    one|trim3
    one|max_seed_mismatches
    one|max_qual_mismatch
    one|max_quality_sum
    one|snp_penalty
    one|snp_frac
    one|seed_length
    one|max_mismatches
    one|max_backtracks
    one|max_search_ram
    one|report_n_alignments
    one|supress
    one|supress_random
    one|offset_base
    one|defaul_mapq
    one|sam_rg
    one|suppress_columns
    one|alignmed_file
    one|unaligned_file
    one|excess_file
    one|threads
    one|offrate
    one|random_seed

    par|qualities1
    par|qualities2
    par|min_insert_size
    par|max_insert_size
    par|max_mate_attempts

    bld|max_bucket_block
    bld|max_bucket_div
    bld|diff_cover
    bld|off_rate
    bld|ftabchars

    bld|seed
    bld|cutoff

    ins|seq_width
);

our @program_switches = qw(
    one|fastq
    one|fasta
    one|raw
    one|inline
    one|color_space
    one|phred33
    one|phred64
    one|solexa
    one|solexa1_3
    one|integer_qual
    one|no_maq_rounding
    one|no_forward_alignment
    one|no_reverse_alignment
    one|try_hard
    one|all
    one|best
    one|strata
    one|fix_strand_bias
    one|print_color
    one|color_quals
    one|color_keep_ends
    one|sam_format
    one|sam_no_head
    one|sam_no_sq
    one|concise
    one|time
    one|be_quiet
    one|ref_map
    one|ref_index
    one|full_ref_name
    one|memory_mapped_io
    one|shared_memory

    par|forward_reverse
    par|reverse_reverse
    par|forward_forward

    bld|fasta
    bld|inline
    bld|color_space
    bld|both
    bld|no_auto
    bld|packed
    bld|no_diff_cover
    bld|no_ref
    bld|just_ref
    bld|NtoA
    bld|big_endian
    bld|little_endian

    ins|names_only
    ins|summary
    ins|reconstruct
);

# be careful of collisions here - this could do with command specification
our %incompat_params = (
    qualities                => [qw( qualities1 qualities2 )],
    qualities1               => [qw( qualities )],
    qualities2               => [qw( qualities )],
    max_seed_mismatches      => [qw( max_mismatches )],
    max_mismatches           => [qw( max_seed_mismatches )],
    fastq                    => [qw( fasta raw inline )],
    fasta                    => [qw( fastq raw inline )],
    raw                      => [qw( fastq fasta inline )],
    inline                   => [qw( fastq fasta raw )],
    phred33                  => [qw( phred64 solexa solexa1_3 integer_qual )],
    phred64                  => [qw( phred33 solexa solexa1_3 integer_qual )],
    solexa                   => [qw( phred33 phred64 solexa1_3 integer_qual )],
    solexa1_3                => [qw( phred33 phred64 solexa integer_qual )],
    integer_qual             => [qw( phred33 phred64 solexa solexa1_3 )],
    no_forward_alignment     => [qw( no_reverse_alignment )],
    no_reverse_alignment     => [qw( no_forward_alignment )],
    all                      => [qw( report_n_alignments )],
    report_n_alignments      => [qw( all )],
    forward_reverse          => [qw( reverse_reverse forward_forward )],
    reverse_reverse          => [qw( forward_reverse forward_forward )],
    forward_forward          => [qw( reverse_reverse forward_forward )],
    color_space              => [qw( both )],
    both                     => [qw( color_space)]
);

our %corequisite_switches = (
    qualities1               => [qw( qualities2 )],
    qualities2               => [qw( qualities1 )],
    strata                   => [qw( best )],
    suppress_random          => [qw( best )],
    snp_penalty              => [qw( color_space )],
    snp_frac                 => [qw( color_space )],
    print_color              => [qw( color_space )],
    color_quals              => [qw( color_space )],
    color_keep_ends          => [qw( color_space )],
    defaul_mapq              => [qw( sam_format )],
    sam_no_head              => [qw( sam_format )],
    sam_no_sq                => [qw( sam_format )],
    sam_rg                   => [qw( sam_format )]
);



our %param_translation = (
    'one|fastq'                    => 'q',
    'one|fasta'                    => 'f',
    'one|raw'                      => 'r',
    'one|inline'                   => 'c',
    'one|color_space'              => 'C',
    'one|qualities'                => 'Q',
    'one|skip'                     => 's',
    'one|upto'                     => 'u',
    'one|trim5'                    => '5',
    'one|trim3'                    => '3',
    'one|phred33'                  => 'phred33-quals',
    'one|phred64'                  => 'phred64-quals',
    'one|solexa'                   => 'solexa-quals',
    'one|solexa1_3'                => 'solexa1.3-quals',
    'one|integer_qual'             => 'integer-quals',
    'one|max_seed_mismatches'      => 'n',
    'one|max_qual_mismatch'        => 'e',
    'one|seed_length'              => 'l',
    'one|no_maq_rounding'          => 'nomaqround',
    'one|max_mismatches'           => 'v',
    'one|no_forward_alignment'     => 'nofw',
    'one|no_reverse_alignment'     => 'norc',
    'one|max_backtracks'           => 'maxbts',
    'one|try_hard'                 => 'y',
    'one|max_search_ram'           => 'chunkmbs',
    'one|report_n_alignments'      => 'k',
    'one|all'                      => 'a',
    'one|supress'                  => 'm',
    'one|supress_random'           => 'M',
    'one|best'                     => 'best',
    'one|strata'                   => 'strata',
    'one|snp_penalty'              => 'snpphred',
    'one|snp_frac'                 => 'snpfrac',
    'one|print_color'              => 'col-cseq',
    'one|color_quals'              => 'colc-cqual',
    'one|color_keep_ends'          => 'col-keepends',
    'one|sam_format'               => 'S',
    'one|defaul_mapq'              => 'mapq',
    'one|sam_no_head'              => 'sam_nohead',
    'one|sam_no_sq'                => 'sam_nosq',
    'one|sam_rg'                   => 'sam-RG',
    'one|suppress_columns'         => 'suppress',
    'one|time'                     => 't',
    'one|offset_base'              => 'B',
    'one|be_quiet'                 => 'quiet',
    'one|ref_map'                  => 'refout',
    'one|ref_index'                => 'refidx',
    'one|alignmed_file'            => 'al',
    'one|unaligned_file'           => 'un',
    'one|excess_file'              => 'max',
    'one|full_ref_name'            => 'fullref',
    'one|threads'                  => 'p',
    'one|offrate'                  => 'o',
    'one|memory_mapped_io'         => 'mm',
    'one|shared_memory'            => 'shmem',
    'one|random_seed'              => 'seed',
    'one|version'                  => 'version',

    'par|fastq'                    => 'q',
    'par|fasta'                    => 'f',
    'par|raw'                      => 'r',
    'par|inline'                   => 'c',
    'par|color_space'              => 'C',
    'par|qualities'                => 'Q', # Don't know if bowtie will accept this - won't break if left in
    'par|qualities1'               => 'Q1',
    'par|qualities2'               => 'Q2',
    'par|skip'                     => 's',
    'par|upto'                     => 'u',
    'par|trim5'                    => '5',
    'par|trim3'                    => '3',
    'par|phred33'                  => 'phred33-quals',
    'par|phred64'                  => 'phred64-quals',
    'par|solexa'                   => 'solexa-quals',
    'par|solexa1_3'                => 'solexa1.3-quals',
    'par|integer_qual'             => 'integer-quals',
    'par|max_seed_mismatches'      => 'n',
    'par|max_qual_mismatch'        => 'e',
    'par|seed_length'              => 'l',
    'par|no_maq_rounding'          => 'nomaqround',
    'par|max_mismatches'           => 'v',
    'par|min_insert_size'          => 'I',
    'par|max_insert_size'          => 'X',
    'par|forward_reverse'          => 'fr',
    'par|reverse_forward'          => 'rf',
    'par|forward_forward'          => 'ff',
    'par|no_forward_alignment'     => 'nofw',
    'par|no_reverse_alignment'     => 'norc',
    'par|max_backtracks'           => 'maxbts',
    'par|max_mate_attempts'        => 'pairtries',
    'par|try_hard'                 => 'y',
    'par|max_search_ram'           => 'chunkmbs',
    'par|report_n_alignments'      => 'k',
    'par|all'                      => 'a',
    'par|suppress'                 => 'm',
    'par|suppress_random'          => 'M',
    'par|best'                     => 'best',
    'par|strata'                   => 'strata',
    'par|snp_penalty'              => 'snpphred',
    'par|snp_frac'                 => 'snpfrac',
    'par|print_color'              => 'col-cseq',
    'par|color_quals'              => 'colc-cqual',
    'par|color_keep_ends'          => 'col-keepends',
    'par|sam_format'               => 'S',
    'par|defaul_mapq'              => 'mapq',
    'par|sam_no_head'              => 'sam_nohead',
    'par|sam_no_sq'                => 'sam_nosq',
    'par|sam_rg'                   => 'sam-RG',
    'par|suppress_columns'         => 'suppress',
    'par|time'                     => 't',
    'par|offset_base'              => 'B',
    'par|be_quiet'                 => 'quiet',
    'par|ref_map'                  => 'refout',
    'par|ref_index'                => 'refidx',
    'par|alignmed_file'            => 'al',
    'par|unaligned_file'           => 'un',
    'par|excess_file'              => 'max',
    'par|full_ref_name'            => 'fullref',
    'par|threads'                  => 'p',
    'par|offrate'                  => 'o',
    'par|memory_mapped_io'         => 'mm',
    'par|shared_memory'            => 'shmem',
    'par|random_seed'              => 'seed',
    'par|version'                  => 'version',

    'crb|fastq'                    => 'q',
    'crb|fasta'                    => 'f',
    'crb|raw'                      => 'r',
    'crb|inline'                   => 'c',
    'crb|color_space'              => 'C',
    'crb|qualities'                => 'Q',
    'crb|skip'                     => 's',
    'crb|upto'                     => 'u',
    'crb|trim5'                    => '5',
    'crb|trim3'                    => '3',
    'crb|phred33'                  => 'phred33-quals',
    'crb|phred64'                  => 'phred64-quals',
    'crb|solexa'                   => 'solexa-quals',
    'crb|solexa1_3'                => 'solexa1.3-quals',
    'crb|integer_qual'             => 'integer-quals',
    'crb|max_seed_mismatches'      => 'n',
    'crb|max_qual_mismatch'        => 'e',
    'crb|seed_length'              => 'l',
    'crb|no_maq_rounding'          => 'nomaqround',
    'crb|max_mismatches'           => 'v',
    'crb|min_insert_size'          => 'I',
    'crb|max_insert_size'          => 'X',
    'crb|forward_reverse'          => 'fr',
    'crb|reverse_forward'          => 'rf',
    'crb|forward_forward'          => 'ff',
    'crb|no_forward_alignment'     => 'nofw',
    'crb|no_reverse_alignment'     => 'norc',
    'crb|max_backtracks'           => 'maxbts',
    'crb|max_mate_attempts'        => 'pairtries',
    'crb|try_hard'                 => 'y',
    'crb|max_search_ram'           => 'chunkmbs',
    'crb|report_n_alignments'      => 'k',
    'crb|all'                      => 'a',
    'crb|suppress'                 => 'm',
    'crb|suppress_random'          => 'M',
    'crb|best'                     => 'best',
    'crb|strata'                   => 'strata',
    'crb|snp_penalty'              => 'snpphred',
    'crb|snp_frac'                 => 'snpfrac',
    'crb|print_color'              => 'col-cseq',
    'crb|color_quals'              => 'colc-cqual',
    'crb|color_keep_ends'          => 'col-keepends',
    'crb|sam_format'               => 'S',
    'crb|defaul_mapq'              => 'mapq',
    'crb|sam_no_head'              => 'sam_nohead',
    'crb|sam_no_sq'                => 'sam_nosq',
    'crb|sam_rg'                   => 'sam-RG',
    'crb|suppress_columns'         => 'suppress',
    'crb|time'                     => 't',
    'crb|offset_base'              => 'B',
    'crb|be_quiet'                 => 'quiet',
    'crb|ref_map'                  => 'refout',
    'crb|ref_index'                => 'refidx',
    'crb|alignmed_file'            => 'al',
    'crb|unaligned_file'           => 'un',
    'crb|excess_file'              => 'max',
    'crb|full_ref_name'            => 'fullref',
    'crb|threads'                  => 'p',
    'crb|offrate'                  => 'o',
    'crb|memory_mapped_io'         => 'mm',
    'crb|shared_memory'            => 'shmem',
    'crb|random_seed'              => 'seed',
    'crb|version'                  => 'version',

    'bld|fasta'                    => 'f',
    'bld|inline'                   => 'c',
    'bld|color_space'              => 'C',
    'bld|both'                     => 'B',
    'bld|no_auto'                  => 'a',
    'bld|packed'                   => 'p',
    'bld|max_bucket_block'         => 'bmax',
    'bld|max_bucket_div'           => 'bmaxdivn',
    'bld|diff_cover'               => 'dcv',
    'bld|no_diff_cover'            => 'nodc',
    'bld|no_ref'                   => 'r',
    'bld|just_ref'                 => '3',
    'bld|off_rate'                 => 'o',
    'bld|ftabchars'                => 't',
    'bld|NtoA'                     => 'ntoa',
    'bld|big_endian'               => 'big',
    'bld|little_endian'            => 'little',
    'bld|seed'                     => 'seed',
    'bld|cutoff'                   => 'cutoff',
    'bld|version'                  => 'version',

    'ins|seq_width'                => 'a',
    'ins|names_only'               => 'n',
    'ins|summary'                  => 's',
    'ins|reconstruct'              => 'e',
    'ins|version'                  => 'version'
    );

#
# the order in the arrayrefs is the order required
# on the command line
#
# the strings in the arrayrefs (less special chars)
# become the keys for named parameters to run_bowtie
# 
# special chars:
#
# '#' implies optional
# '*' implies variable number of this type
# <|> implies stdin/stdout redirect
#

our %command_files = (
    'single'     => [qw( ind seq #out )],
    'paired'     => [qw( ind -1|seq -2|seq2 #out )],
    'crossbow'   => [qw( ind -12|seq #out )],
    'build'      => [qw( ref #out )],
    'inspect'    => [qw( ind >#out )]
    );

our %accepted_types = ( # ind is not a single file, so not included here
    'seq'        => [qw( fasta fastq raw crossbow )],
    'seq2'       => [qw( fasta fastq raw )],
    'ref'        => [qw( fasta )]
    );

foreach (@program_params) {
        push @program_params, "par\|".$1 if (m/^one\|(.*)/);
        push @program_params, "crb\|".$1 if (m/^par\|(.*)/) && !(m/^par\|(?:fasta|fastq|raw|qualities[12])/);
}
foreach (@program_switches) {
        push @program_switches, "par\|".$1 if (m/^one\|(.*)/);
        push @program_switches, "crb\|".$1 if (m/^par\|(.*)/) && !(m/^par\|(?:fasta|fastq|raw)/);
}

1;