The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl

#-----------------------------------------------------------------------------
# PROGRAM : seq_pattern.pl 
# PURPOSE : This is a simple driver used to test the Bio::Tools::SeqPattern.pm
#           module for working with sequence patterns (regexps that recognize
#           nucleotide or peptide sequences).
# AUTHOR  : Steve Chervitz (sac@bioperl.org)
# CREATED : 28 Aug 1997
# USAGE   : seq_pattern.pl -h
# COMMENTS: 
#    This is a driver script for the Bio::Tools::SeqPattern.pm Bioperl module
#    that can be used for working with both nucleotide and peptide sequence and 
#    offers features such as:
#    
#      -- generate reverse complement of sequence pattern
#      -- ensuring pattern has no invalid characters
#      -- untainting pattern
#      -- expanding ambiguity codes. 
#    
#     Functionality is not yet complete but it may be of use as-is. 
# 
# INSTALLATION
#    Edit the use lib "...." line to point the directory
#    containing your Bioperl modules.
#
# DOCUMENTATION:
#  http://genome-www.stanford.edu/perlOOP/bioperl/lib/Bio/Tools/SeqPattern.pm.html
#
#-----------------------------------------------------------------------------

use lib "/Users/steve/lib/perl";
use Bio::Tools::SeqPattern ();
use Getopt::Std;

$opt_h = 0;
$opt_n = 0;
$opt_p = 0;
$opt_r = 0;

getopts('hnprv:');
$pat = $ARGV[0] || '';

$opt_h and die <<"QQ_USAGE_QQ";

Usage: seq_pattern.pl [-n|p|r|h] 'REGEXP' 

 regexp : full-regular expression for a nucleotide or peptide sequence.
          Must be listed *after* one of the following options:
 -n     : interpret regexp as a nucleotide pattern.
 -p     : interpret regexp as a peptide pattern.
 -r     : output only the reverse complement of the nucleotide pattern.
 -h     : print usage.
 
QQ_USAGE_QQ


## Nucleotide test patterns (most are based on actual patterns submitted by users):

%nucpat = (1 =>'YR...CG(CCG){5,7}CG[^G]TN{10,}[SA]{4}NN(ACA){2,}GCGTTT.{20,40}GT>',
	   2 =>'cggnnn[ta][ta][ta]n{3,5}[ta][ta][ta]nnnccg',
	   3 =>'<ATGX{6,10}RTTRTT',
	   4 =>'cggnnnwwwn{3,5}wwwnnnccg',
	   5 =>'(CCCCT)N{1,200}(agggg)N{1,200}(agggg)', 
	   6 =>'cccct{2,}', 
	   7 =>'(a){10,40}', 
	   8 =>'(cag){36,}', 
	   9 =>'rgaatgx{2,}ygtttca(cag){5,}',
	   10 =>'yattgtt(n){20,80}yattgtt', 
	   11 =>'yattgtt(aca){20,80}yattgtt', 
	   12 =>'TATAAAN{30,100}[AT][CAT][AT]YCAAR[CAT][AT][CAT]', 
	   13 =>'TGACTC[N]{1,300}TGACTC',
	   14 =>'TGACTCN*GAGTCAN*GAGTCAN*TGACTC',
	   15 =>'TGACTC(TCA)*GAGTCA',
	   16 =>'TGACTCN*GAG(TCA)*GAGTCA',
	   17 =>'[at][at]ttcacatgy',
	   );

%peppat = (1 =>'<X{10,}[WFY]XXXDN[BK][ST]Z{5,}>',
	   2 =>'<x{10,40}[gas]x[gasct]x*[gascdn]x[gas]x{0,10}[bst]{8,}x{0,8}>', 
	   );

#----------------------
# Main

if($opt_r) {
    print Bio::Tools::SeqPattern->new(-SEQ =>$pat, -TYPE =>'Dna')->revcom->str,"\n";
    
} else {
    test_nuc($pat) if ($opt_n and !$opt_p);
    test_pep($pat) if ($opt_p and !$opt_n);
    (test_nuc($pat), test_pep($pat)) if !($opt_p or $opt_n);
}
    
exit 0;

#----------------------

sub test_nuc {
# Create nucleotide pattern object:
    my $pat = shift;
    $pat ||= $nucpat{9};

    $npat = new Bio::Tools::SeqPattern(-seq =>$pat, -type =>'Dna');

    print "\nNucleotide Pattern:\n";
    print "-----------------------\n";
    printf "%18s: %s\n", 'Type',    $npat->type;
    printf "%18s: %s\n", 'Original',$npat->str;
    printf "%18s: %s\n", 'Expanded', $npat->expand;
    printf "%18s: %s\n", 'Reverse-Comp', $npat->revcom->str;
    printf "%18s: %s\n", 'Rev-Comp+Expanded', $npat->revcom(1)->str; # Hate this syntax. May change.
    print "\n";
}


sub test_pep {
# Create peptide pattern object:
    my $pat = shift;
    $pat ||= $peppat{1};

    $ppat = new Bio::Tools::SeqPattern(-seq =>$pat, -type =>'Amino');

    print "\nPeptide Pattern:\n";
    print "-----------------------\n";
    printf "%18s: %s\n", 'Type',    $ppat->type;
    printf "%18s: %s\n", 'Original',$ppat->str;
    printf "%18s: %s\n", 'Expanded', $ppat->expand;
    print "\n";
}