#
# GeneDesign engine
#
=head1 NAME
Bio::GeneDesign
=head1 VERSION
Version 5.51
=head1 DESCRIPTION
=head1 AUTHOR
Sarah Richardson <smrichardson@lbl.gov>
=cut
package Bio::GeneDesign;
use base qw(Bio::Root::Root);
use Bio::GeneDesign::ConfigData;
use Bio::GeneDesign::Basic qw(:GD);
use Bio::GeneDesign::CodonJuggle qw(:GD);
use Bio::GeneDesign::Codons qw(:GD);
use Bio::GeneDesign::Oligo qw(:GD);
use Bio::GeneDesign::Random qw(:GD);
use Bio::GeneDesign::RestrictionEnzymes qw(:GD);
use Bio::GeneDesign::ReverseTranslate qw(:GD);
use Bio::GeneDesign::PrefixTree;
use File::Basename;
use Bio::SeqIO;
use Carp;
use strict;
use warnings;
my $VERSION = 5.51;
=head1 CONSTRUCTORS
=head2 new
Returns an initialized Bio::GeneDesign object.
This function reads the ConfigData written at installation, imports the
relevant sublibraries, and sets the relevant paths.
my $GD = Bio::GeneDesign->new();
=cut
sub new
{
my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
bless $self, $class;
$self->{script_path} = Bio::GeneDesign::ConfigData->config('script_path');
$self->{conf} = Bio::GeneDesign::ConfigData->config('conf_path');
$self->{conf} .= q{/} unless substr($self->{conf}, -1, 1) eq q{/};
$self->{tmp_path} = Bio::GeneDesign::ConfigData->config('tmp_path');
$self->{tmp_path} .= q{/} unless substr($self->{tmp_path}, -1, 1) eq q{/};
$self->{graph} = Bio::GeneDesign::ConfigData->config('graphing_support');
if ($self->{graph})
{
require Bio::GeneDesign::Graph;
import Bio::GeneDesign::Graph qw(:GD);
}
$self->{EMBOSS} = Bio::GeneDesign::ConfigData->config('EMBOSS_support');
if ($self->{EMBOSS})
{
require Bio::GeneDesign::Palindrome;
import Bio::GeneDesign::Palindrome qw(:GD);
}
$self->{BLAST} = Bio::GeneDesign::ConfigData->config('BLAST_support');
if ($self->BLAST)
{
#$ENV{BLASTPLUSDIR} = Bio::GeneDesign::ConfigData->config('blast_path');
require Bio::GeneDesign::Blast;
import Bio::GeneDesign::Blast qw(:GD);
}
$self->{vmatch} = Bio::GeneDesign::ConfigData->config('vmatch_support');
if ($self->{vmatch})
{
require Bio::GeneDesign::Vmatch;
import Bio::GeneDesign::Vmatch qw(:GD);
}
$self->{codon_path} = $self->{conf} . 'codon_tables/';
$self->{organism} = undef;
$self->{codontable} = undef;
$self->{enzyme_set} = undef;
$self->{version} = $VERSION;
$self->{amb_trans_memo} = {};
return $self;
}
=head1 ACCESSORS
=cut
=head2 EMBOSS
returns a value if EMBOSS_support was vetted and approved during installation.
=cut
sub EMBOSS
{
my ($self) = @_;
return $self->{'EMBOSS'};
}
=head2 BLAST
returns a value if BLAST_support was vetted and approved during installation.
=cut
sub BLAST
{
my ($self) = @_;
return $self->{'BLAST'};
}
=head2 graph
returns a value if graphing_support was vetted and approved during installation.
=cut
sub graph
{
my ($self) = @_;
return $self->{'graph'};
}
=head2 vmatch
returns a value if vmatch_support was vetted and approved during installation.
=cut
sub vmatch
{
my ($self) = @_;
return $self->{'vmatch'};
}
=head2 enzyme_set
Returns a hash reference where the keys are enzyme names and the values are
L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> objects, if the enzyme
set has been defined.
To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>.
=cut
sub enzyme_set
{
my ($self) = @_;
return $self->{'enzyme_set'};
}
=head2 enzyme_set_name
Returns the name of the enzyme set in use, if there is one.
To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>.
=cut
sub enzyme_set_name
{
my ($self) = @_;
return $self->{'enzyme_set_name'};
}
=head2 all_enzymes
Returns a hash reference where the keys are enzyme names and the values are
L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> objects
To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>.
=cut
sub all_enzymes
{
my ($self) = @_;
return $self->{'all_enzymes'};
}
=head2 organism
Returns the name of the organism in use, if there is one.
To set this value, use L<set_organism|/set_organism>.
=cut
sub organism
{
my ($self) = @_;
return $self->{'organism'};
}
=head2 codontable
Returns the codon table in use, if there is one.
The codon table is a hash reference where the keys are upper case nucleotides
and the values are upper case single letter amino acids.
my $codon_t = $GD->codontable();
$codon_t->{"ATG"} eq "M" || die;
To set this value, use L<set_codontable|/set_codontable>.
=cut
sub codontable
{
my ($self) = @_;
return $self->{'codontable'};
}
=head2 reversecodontable
Returns the reverse codon table in use, if there is one.
The reverse codon table is a hash reference where the keys are upper case single
letter amino acids and the values are upper case nucleotides.
my $revcodon_t = $GD->reversecodontable();
$revcodon_t->{"M"} eq "ATG" || die;
This value is set automatically when L<set_codontable|/set_codontable> is run.
=cut
sub reversecodontable
{
my ($self) = @_;
return $self->{'reversecodontable'};
}
=head2 rscutable
Returns the RSCU table in use, if there is one.
The RSCU codon table is a hash reference where the keys are upper case
nucleotides and the values are floats.
my $rscu_t = $GD->rscutable();
$rscu_t->{"ATG"} eq 1.00 || die;
To set this value, use L<set_rscu_table|/set_rscutable>.
=cut
sub rscutable
{
my ($self) = @_;
return $self->{'rscutable'};
}
=head1 FUNCTIONS
=cut
=head2 melt
my $Tm = $GD->melt(-sequence => $myseq);
The -sequence argument is required.
Returns the melting temperature of a DNA sequence.
You can set the salt and DNA concentrations with the -salt and -concentration
arguments; they are 50mm (.05) and 100 pm (.0000001) respectively.
You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object to be analyzed with the -sequence flag.
There are four different formulae to choose from. If you wish to use the nearest
neighbor method, use the -nearest_neighbor flag. Otherwise the appropriate
formula will be determined by the length of your -sequence argument.
For sequences under 14 base pairs:
Tm = (4 * #GC) + (2 * #AT).
For sequences between 14 and 50 base pairs:
Tm = 100.5 + (41 * #GC / length) - (820 / length) + 16.6 * log10(salt)
For sequences over 50 base pairs:
Tm = 81.5 + (41 * #GC / length) - (500 / length) + 16.6 * log10(salt) - .62;
=cut
sub melt
{
my ($self, @args) = @_;
my ($seq, $salt, $conc, $nnflag)
= $self->_rearrange([qw(
sequence
salt
concentration
nearest_neighbor)], @args);
$self->throw("No sequence provided for the melt function")
unless ($seq);
$nnflag = $nnflag ? 1 : 0;
$salt = $salt || .05;
$conc = $conc || .0000001;
my $str = $self->_stripdown($seq, q{}, 1);
if ($nnflag)
{
return _ntherm($str, $salt, $conc);
}
return _melt($str, $salt, $conc);
}
=head2 complement
$my_seq = "AATTCG";
my $complemented_seq = $GD->complement($my_seq);
$complemented_seq eq "TTAAGC" || die;
my $reverse_complemented_seq = $GD->complement($my_seq, 1);
$reverse_complemented_seq eq "CGAATT" || die;
#clean
my $complemented_seq = $GD->complement(-sequence => $my_seq);
$complemented_seq eq "TTAAGC" || die;
my $reverse_complemented_seq = $GD->complement(-sequence => $my_seq,
-reverse => 1);
$reverse_complemented_seq eq "CGAATT" || die;
The -sequence argument is required.
Complements or reverse complements a DNA sequence.
You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object to be processed.
If you also pass along a true statement, the sequence will be reversed and
complemented.
=cut
sub complement
{
my ($self, @args) = @_;
my ($seq, $swit)
= $self->_rearrange([qw(
sequence
reverse)], @args);
$self->throw("No sequence provided for the complement function")
unless $seq;
$swit = $swit || 0;
my $str = $self->_stripdown($seq, q{}, 1);
return _complement($str, $swit);
}
=head2 count
$my_seq = "AATTCG";
my $count = $GD->count($my_seq);
$count->{C} == 1 || die;
$count->{G} == 1 || die;
$count->{A} == 2 || die;
$count->{GCp} == 33.3 || die;
$count->{ATp} == 66.7 || die;
#clean
my $count = $GD->count(-sequence => $my_seq);
You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object.
the count function counts the bases in a DNA sequence and returns a hash
reference where each base (including the ambiguous bases) are keys and the
values are the number of times they appear in the sequence. There are also the
special values GCp and ATp for GC and AT percentage.
=cut
sub count
{
my ($self, @args) = @_;
my ($seq) = $self->_rearrange([qw(sequence)], @args);
$self->throw("No sequence provided for the count function")
unless ($seq);
my $str = $self->_stripdown($seq, q{}, 1);
return _count($str);
}
=head2 regex_nt
my $my_seq = "ABC";
my $regex = $GD->regex_nt(-sequence => $my_seq);
# $regex is qr/A[CGT]C/;
my $regarr = $GD->regex_nt(-sequence => $my_seq --reverse_complement => 1);
# $regarr is [qr/A[CGT]C/, qr/G[ACG]T/]
You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object to be processed with the -sequence flag.
regex_nt creates a compiled regular expression or a set of them that can be used
to query large nucleotide sequences for possibly ambiguous subsequences.
If you want to get regular expressions for both the forward and reverse senses
of the DNA, use the -reverse_complement flag and expect a reference to an array
of compiled regexes.
=cut
sub regex_nt
{
my ($self, @args) = @_;
my ($seq, $arrswit)
= $self->_rearrange([qw(
sequence
reverse_complement)], @args
);
$self->throw("no sequence provided the regex_nt function")
unless ($seq);
my $str = $self->_stripdown($seq, q{}, 1);
if ($arrswit)
{
return _regarr($str);
}
else
{
return _regres($str, 1);
}
}
=head2 regex_aa
my $my_pep = "AEQ*";
my $regex = $GD->regex_aa(-sequence => $my_pep);
$regex == qr/AEQ[\*]/ || die;
Creates a compiled regular expression or a set of them that can be used to query
large amino acid sequences for smaller subsequences.
You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object to be processed with the -sequence flag.
=cut
sub regex_aa
{
my ($self, $seq) = @_;
$self->throw("no sequence provided for the regex_aa function")
unless ($seq);
my $str = $self->_stripdown($seq, q{}, 1);
return _regres($str, 2);
}
=head2 sequence_is_ambiguous
my $my_seq = "ABC";
my $flag = $GD->sequence_is_ambiguous($my_seq);
$flag == 1 || die;
$my_seq = "ATC";
$flag = $GD->sequence_is_ambiguous($my_seq);
$flag == 0 || die;
Checks to see if a DNA sequence contains ambiguous bases (RYMKWSBDHVN) and
returns true if it does.
You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object to be processed.
=cut
sub sequence_is_ambiguous
{
my ($self, $seq) = @_;
$self->throw("no sequence provided for the sequence_is_ambiguous function")
unless ($seq);
my $str = $self->_stripdown($seq, q{}, 1);
return _is_ambiguous($str);
}
=head2 ambiguous_translation
my $my_seq = "ABC";
my @peps = $GD->ambiguous_translation(-sequence => $my_seq, -frame => 1);
# @peps is qw(I T C)
You must pass a string variable, a Bio::Seq object, or a Bio::SeqFeatureI object
to be processed.
Translates a nucleotide sequence that may have ambiguous bases and returns an
array of possible peptides.
The frame argument may be 1, 2, 3, -1, -2, or -3.
It may also be t (three, 1, 2, 3), or s (six, 1, 2, 3, -1, -2, -3).
It defaults to 1.
=cut
sub ambiguous_translation
{
my ($self, @args) = @_;
my ($seq, $frame)
= $self->_rearrange([qw(sequence frame)], @args);
$self->throw("no sequence provided for the ambiguous_translation function")
unless ($seq);
$frame = $frame || 1;
my %ambtransswits = map {$_ => 1} qw(1 2 3 -1 -2 -3 s t);
$self->throw("Bad frame argument to ambiguous_translation")
unless (exists $ambtransswits{$frame});
my $str = $self->_stripdown($seq, q{}, 1);
return _amb_translation($str,
$self->{codontable},
$frame,
$self->{amb_trans_memo});
}
=head2 ambiguous_transcription
my $my_seq = "ABC";
my $seqs = $GD->ambiguous_transcription($my_seq);
# $seqs is [qw(ACC AGC ATC)]
Deambiguates a nucleotide sequence that may have ambiguous bases and returns a
reference to a sorted array of possible unambiguous sequences.
You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
object to be processed.
=cut
sub ambiguous_transcription
{
my ($self, $seq) = @_;
$self->throw("no sequence provided for the ambiguous_transcription function")
unless ($seq);
my $str = $self->_stripdown($seq, q{}, 1);
return _amb_transcription($str);
}
=head2 positions
my $seq = "TGCTGACTGCAGTCAGTACACTACGTACGTGCATGAC";
my $seek = "CWC";
my $positions = $GD->positions(-sequence => $seq,
-query => $seek);
# $positions is {18 => "CAC"}
$positions = $GD->positions(-sequence => $seq,
-query => $seek,
-reverse_complement => 1);
# $positions is {18 => "CAC", 28 => "GTG"}
Finds and returns all the positions and sequences of a potentially ambiguous
subsequence in a larger sequence. The reverse_complement flag is off by default.
You can pass either string variables, Bio::Seq objects, or Bio::SeqFeatureI
objects as the sequence and query arguments; additionally you may pass a
L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> object as the query
argument.
=cut
sub positions
{
my ($self, @args) = @_;
my ($seq, $seek, $revcom)
= $self->_rearrange([qw(
sequence
query
reverse_complement)], @args);
$self->throw("no sequence provided for the positions function")
unless ($seq);
$self->throw("no query provided for the positions function")
unless ($seek);
my $base = $self->_stripdown($seq, q{}, 1);
my $regarr = [];
my $query = $seek;
my $qref = ref($seek);
if ($qref)
{
$self->throw("object $qref is not a Bio::Seq or Bio::SeqFeature, or " .
"Bio::GeneDesign::RestrictionEnzyme object")
unless ($seek->isa("Bio::Seq")
|| $seek->isa("Bio::SeqFeatureI")
|| $seek->isa("Bio::GeneDesign::RestrictionEnzyme"));
if ($seek->isa("Bio::GeneDesign::RestrictionEnzyme"))
{
$regarr = $seek->regex;
}
else
{
$query = ref($seek->seq) ? $seek->seq->seq : $seek->seq;
$regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
}
}
else
{
$regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
}
return _positions($base, $regarr);
}
=head2 set_codontable
# load a codon table from the GeneDesign configuration directory
$GD->set_codontable(-organism_name => "yeast");
# load a codon table from an arbitrary path and catch it in a variable
my $codon_t = $GD->set_codontable(-organism_name => "custom",
-table_path => "/path/to/table.ct");
The -organism_name argument is required.
This function loads, sets, and returns a codon definition table. After it is run
the accessor L<codontable|/codontable> will return the hash reference that
represents the codon table.
If no path is provided, the configuration directory /codon_tables is checked for
tables that match the provided organism name. If there is no table in that
directory, a warning will appear and the standard codon table will be
used.
Any codon table that is missing a definition for a codon will cause a warning to
be issued. The table format for codon tables is
# Standard genetic code
{TTT} = F
{TTC} = F
{TTA} = L
...
See L<NCBI's table|http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG1>
=cut
sub set_codontable
{
my ($self, @args) = @_;
my ($orgname, $table_path)
= $self->_rearrange([qw(
organism_name
table_path)], @args);
$self->throw("No organsim name provided") unless ($orgname);
$self->{organism} = $orgname;
$self->throw("$table_path does not exist")
if ($table_path && ! -e $table_path);
if (! $table_path)
{
$table_path = $self->{codon_path} . $orgname . ".ct";
if (! -e $table_path)
{
warn "No Codon table for $orgname found. Using Standard values\n";
$table_path = $self->{codon_path} . "Standard.ct";
}
}
$self->{codontable} = _parse_codon_file($table_path);
$self->{reversecodontable} = _reverse_codon_table($self->{codontable});
return $self->{codontable};
}
=head2 set_rscutable
# load a RSCU table from the GeneDesign configuration directory
$GD->set_rscutable(-organism_name => "yeast");
# load an RSCU table from an arbitrary path and catch it in a variable
my $rscu_t = $GD->set_rscutable(-organism_name => "custom",
-table_path => "/path/to/table.rscu");
The -organism_name argument is required.
This function loads, sets, and returns an RSCU table. After it is run
the accessor L<rscutable|/rscutable> will return the hash reference that
represents the RSCU table.
If no path is provided, the configuration directory /codon_tables is checked for
tables that match the provided organism name. If there is no table in that
directory, a warning will appear and the flat RSCU table will be used.
Any RSCU table that is missing a definition for a codon will cause a warning to
be issued. The table format for RSCU tables is
# Saccharomyces cerevisiae (Highly expressed genes)
# Nucleic Acids Res 16, 8207-8211 (1988)
{TTT} = 0.19
{TTC} = 1.81
{TTA} = 0.49
...
See L<Sharp et al. 1986|http://www.ncbi.nlm.nih.gov/pubmed/3526280>.
=cut
sub set_rscutable
{
my ($self, @args) = @_;
my ($orgname, $rscu_path)
= $self->_rearrange([qw(
organism_name
rscu_path)], @args);
$self->throw("No organsim name provided") unless ($orgname);
$self->{organism} = $orgname;
$self->throw("$rscu_path does not exist")
if ($rscu_path && ! -e $rscu_path);
if (! $rscu_path)
{
$rscu_path = $self->{codon_path} . $orgname . ".rscu";
if (! -e $rscu_path)
{
warn "No RSCU table for $orgname found. Using Flat values\n";
$rscu_path = $self->{codon_path} . "Flat.rscu";
}
}
$self->{rscutable} = _parse_codon_file($rscu_path);
return $self->{rscutable};
}
=head2 set_organism
# load both codon tables and RSCU tables simultaneously
$GD->set_organism(-organism_name => "yeast");
# with arguments
$GD->set_organism(-organism_name => "custom",
-table_path => "/path/to/table.ct",
-rscu_path => "/path/to/table.rscu");
The -organism_name argument is required.
This function is just a shortcut; it runs L<set_codontable/set_codontable> and
L<set_rscutable/set_rscutable>. See those functions for details.
=cut
sub set_organism
{
my ($self, @args) = @_;
my ($orgname, $table_path, $rscu_path)
= $self->_rearrange([qw(
organism_name
table_path
rscu_path)], @args);
$self->throw("No organsim name provided") unless ($orgname);
$self->{organism} = $orgname;
$self->set_codontable(-organism_name => $orgname, -table_path => $table_path);
$self->set_rscutable(-organism_name => $orgname, -rscu_path=> $rscu_path);
return;
}
=head2 codon_count
# count the codons in a list of sequences
my $tally = $GD->codon_count(-input => \@sequences);
# add a gene to an existing codon count
$tally = $GD->codon_count(-input => $sequence,
-count => $tally);
# add a list of Bio::Seq objects to an existing codon count
$tally = $GD->codon_count(-input => \@seqobjects,
-count => $tally);
The -input argument is required and will take a string variable, a Bio::Seq
object, a Bio::SeqFeatureI object, or a reference to an array full of any
combination of those things.
The codon_count function takes a set of sequences and counts how often each
codon appears in them. It returns a hash reference where the keys are upper case
nucleotide codons and the values are integers. If you pass a hash reference
containing codon counts with the -count argument, new counts will be added to
the old values.
This function will warn you if non nucleotide codons are found.
TODO: what about ambiguous codons?
=cut
sub codon_count
{
my ($self, @args) = @_;
my ($input, $count) = $self->_rearrange([qw(input count)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no sequences were provided to count")
unless $input;
my $list = $self->_stripdown($input, 'ARRAY', 0);
my $cod_count = _codon_count($list,
$self->{codontable},
$count);
warn("There are bad codons in codon count\n") if (exists $cod_count->{XXX});
return $cod_count;
}
=head2 generate_RSCU_table
my $rscu_t = $GD->generate_RSCU_table(-sequences => \@list_of_sequences);
The -sequences argument is required and will take a string variable, a Bio::Seq
object, a Bio::SeqFeatureI object, or a reference to an array full of any
combination of those things.
The generate_RSCU_table function takes a set of sequences, counts how often each
codon appears, and returns an RSCU table as a hash reference where the keys are
upper case nucleotide codons and the values are floats.
See L<Sharp et al. 1986|http://www.ncbi.nlm.nih.gov/pubmed/3526280>.
=cut
sub generate_RSCU_table
{
my ($self, @args) = @_;
my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("Sequence set must be provided")
unless ($seqobjs);
return _generate_RSCU_table(
$self->codon_count(-input => $seqobjs),
$self->{codontable},
$self->{reversecodontable}
);
}
=head2 generate_codon_report
my $report = $GD->generate_codon_report(-sequences => \@list_of_sequences);
The report will have the format
TTT (F) 12800 0.74
TTC (F) 21837 1.26
TTA (L) 4859 0.31
TTG (L) 18806 1.22
where the first column in each group is the codon, the second column is the one
letter amino acid abbreviation in parentheses, the third column is the number of
times that codon has been seen, and the fourth column is the RSCU value for that
codon.
This report comes in a 4x4 layout, as would a standard genetic code table in a
textbook.
NO TEST
=cut
sub generate_codon_report
{
my ($self, @args) = @_;
my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("Sequence set must be provided")
unless ($seqobjs);
my $count_t = $self->codon_count(-input => $seqobjs);
my $rscu_t = _generate_RSCU_table(
$count_t,
$self->{codontable},
$self->{reversecodontable}
);
my $report = _generate_codon_report(
$count_t,
$self->{codontable},
$rscu_t
);
return $report;
}
=head2 generate_RSCU_file
my $contents = $GD->generate_RSCU_file(
-sequences => \@seqs,
-comments => ["Got these codons from mice"]
);
open (my $OUT, '>', '/path/to/cods') || die "can't write to /path/to/cods";
print $OUT $contents;
close $OUT;
This function generates a string that can be written to file to serve as a
GeneDesign RSCU table. Provide a set of sequences and an optional array
reference of comments to prepend to the file.
The file will have the format
# Comment 1
# ...
# Comment n
{TTT} = 0.19
{TTC} = 1.81
...
NO TEST
=cut
sub generate_RSCU_file
{
my ($self, @args) = @_;
my ($seqobjs, $comments) = $self->_rearrange([qw(sequences comments)], @args);
$self->throw('No codon table has been defined')
unless $self->{codontable};
$self->throw('Sequence set must be provided')
unless ($seqobjs);
$self->throw('Comment argument must be array reference')
unless ref($comments) eq 'ARRAY';
my $count_t = $self->codon_count(-input => $seqobjs);
my $rscu_t = _generate_RSCU_table(
$count_t,
$self->{codontable},
$self->{reversecodontable}
);
my $report = _generate_codon_file(
$rscu_t,
$self->{reversecodontable},
$comments
);
return $report;
}
=head2 list_enzyme_sets
my @available_enzlists = $GD->list_enzyme_sets();
# @available_enzlists == ('standard_and_IIB', 'blunts', 'IIB', 'nonpal', ...)
Returns an array containing the names of every restriction enzyme recognition
list GeneDesign knows about.
=cut
sub list_enzyme_sets
{
my ($self) = @_;
my $epath = $self->{conf} . 'enzymes/';
my @sets = ();
opendir (my $ENZDIR, $epath) || $self->throw("can't opendir $epath");
foreach my $list (readdir($ENZDIR))
{
my $name = basename($list);
$name =~ s{\.[^.]+$}{}x;
next if ($name eq q{.} || $name eq q{..});
push @sets, $name;
}
closedir($ENZDIR);
return @sets;
}
=head2 set_restriction_enzymes
$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
or
$GD->set_restriction_enzymes(-list_path => '/path/to/enzyme_file');
or even
$GD->set_restriction_enzymes(
-list_path => '/path/to/enzyme_file',
-enzyme_set => 'custom_enzymes'
);
All will return a hash structure full of restriction enzymes.
Tell GeneDesign which set of restriction enzymes to use. If you provide only a
set name with the -enzyme_set flag, GeneDesign will check its config path for a
matching file. Otherwise you must provide a path to a file (and optionally a
name for the set).
=cut
sub set_restriction_enzymes
{
my ($self, @args) = @_;
my ($set_name, $set_path)
= $self->_rearrange([qw(enzyme_set list_path)], @args);
my $def = 'all_enzymes';
my $defpath = $self->{conf} . 'enzymes/' . $def;
$self->{all_enzymes} = _define_sites($defpath);
if (! $set_name && ! $set_path)
{
$self->{enzyme_set} = $self->{all_enzymes};
$self->{enzyme_set_name} = $def;
}
elsif ($set_name && ! $set_path)
{
$set_path = $self->{conf} . "enzymes/$set_name";
$self->throw("No enzyme set found that matches $set_name\n")
unless (-e $set_path);
my $list = _parse_enzyme_list($set_path);
my $set = {};
foreach my $id (@$list)
{
if (! exists $self->{all_enzymes}->{$id})
{
warn "$id was not recognized as an enzyme... skipping\n";
next;
}
$set->{$id} = $self->{all_enzymes}->{$id};
}
$self->{enzyme_set} = $set;
$self->{enzyme_set_name} = $set_name;
}
elsif ($set_path)
{
$self->throw("No enzyme list found at $set_path\n")
unless (-e $set_path);
my $list = _parse_enzyme_list($set_path);
my $set = {};
foreach my $id (@$list)
{
if (! exists $self->{all_enzymes}->{$id})
{
warn "$id was not recognized as an enzyme... skipping\n";
next;
}
$set->{$id} = $self->{all_enzymes}->{$id};
}
$self->{enzyme_set} = $set;
$self->{enzyme_set_name} = $set_name || basename($set_path);
}
return $self->{enzyme_set};
}
=head2 remove_from_enzyme_set
Removes a subset of enzymes from an enzyme list. This only happens in memory, no
files will be altered. The argument is an array reference of enzyme names.
$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
$GD->remove_from_enzyme_set(-enzymes => ['SmaI', 'MlyI']);
NO TEST
=cut
sub remove_from_enzyme_set
{
my ($self, @args) = @_;
my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
return unless ($enzes);
$self->throw("no enzyme set has been defined")
unless $self->{enzyme_set};
my @toremove = ();
my $ref = ref ($enzes);
if ($ref eq "ARRAY")
{
push @toremove, @$enzes;
}
else
{
push @toremove, $enzes;
}
foreach my $enz (@toremove)
{
my $eid = $enz;
my $eref = ref $enz;
if ($eref)
{
$self->throw("object in enzymes is not a " .
"Bio::GeneDesign::RestrictionEnzyme object")
unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
$eid = $enz->id;
}
if (exists $self->{enzyme_set}->{$eid})
{
delete $self->{enzyme_set}->{$eid};
}
}
return;
}
=head2 add_to_enzyme_set
Adds a subset of enzymes to an enzyme list. This only happens in memory, no
files will be altered. The argument is an array reference of RestrictionEnzyme
objects.
#Grab all known enzymes
my $allenz = $GD->set_restriction_enzymes(-enzyme_set => 'standard_and_IIB');
#Pull out a few
my @keepers = ($allenz->{'BmrI'}, $allenz->{'HphI'});
#Give GeneDesign the enzyme set you want
$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
#Add the few enzymes it didn't have before
$GD->add_to_enzyme_set(-enzymes => \@keepers);
NO TEST
=cut
sub add_to_enzyme_set
{
my ($self, @args) = @_;
my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
return unless ($enzes);
$self->throw("no enzyme set has been defined")
unless $self->{enzyme_set};
my @toadds = ();
my $ref = ref ($enzes);
if ($ref eq "ARRAY")
{
push @toadds, @$enzes;
}
else
{
push @toadds, $enzes;
}
foreach my $enz (@toadds)
{
$self->throw("object in enzymes is not a " .
"Bio::GeneDesign::RestrictionEnzyme object")
unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
next if (exists $self->{enzyme_set}->{$enz->id});
$self->{enzyme_set}->{$enz->id} = $enz;
}
return;
}
=head2 restriction_status
=cut
sub restriction_status
{
my ($self, @args) = @_;
my ($seq) = $self->_rearrange([qw(sequence)], @args);
$self->throw("no enzyme set has been defined")
unless $self->{enzyme_set};
$self->throw("No arguments provided for set_restriction_enzymes")
unless ($seq);
my $str = $self->_stripdown($seq, q{}, 0);
my @reslist = values %{$self->{enzyme_set}};
return _define_site_status($str, \@reslist);
}
=head2 build_prefix_tree
Take an array reference of nucleotide sequences (they can be strings, Bio::Seq
objects, or Bio::GeneDesign::RestrictionEnzyme objects) and create a suffix
tree. If you add the peptide flag, the sequences will be ambiguously translated
before they are added to the suffix tree. Otherwise they will be ambiguously
transcribed. It will add the reverse complement of any non peptide sequence as
long as the reverse complement is different.
my $tree = $GD->build_prefix_tree(-input => ['GGATCC']);
my $ptree = $GD->build_prefix_tree(
-input => ['GGCCNNNNNGGCC'],
-peptide => 1
);
=cut
sub build_prefix_tree
{
my ($self, @args) = @_;
my ($list, $pep) = $self->_rearrange([qw(input peptide)], @args);
$self->throw("no input provided")
unless ($list);
my $tree = Bio::GeneDesign::PrefixTree->new();
foreach my $seq (@$list)
{
my $base = $seq;
my $id = $seq;
my $ref = ref($seq);
if ($ref)
{
$self->throw("object in input is not a Bio::Seq, Bio::SeqFeature, or " .
"Bio::GeneDesign::RestrictionEnzyme object")
unless ($seq->isa("Bio::Seq")
|| $seq->isa("Bio::SeqFeatureI")
|| $seq->isa("Bio::GeneDesign::RestrictionEnzyme")
);
$base = ref($seq->seq) ? $seq->seq->seq : $seq->seq;
$id = $seq->id;
}
if ($pep)
{
$self->throw('No codon table has been defined')
unless $self->{codontable};
my @fpeptides = _amb_translation($base, $self->{codontable},
't', $self->{amb_trans_memo});
$tree->add_prefix($_, $id, $base) foreach (@fpeptides);
my $esab = _complement($base, 1);
my $lagcheck = $esab;
while (substr($lagcheck, -1) eq "N")
{
$lagcheck = substr($lagcheck, 0, length($lagcheck) - 1);
}
if ($esab ne $base && $lagcheck eq $esab)
{
my @rpeptides = _amb_translation($esab, $self->{codontable},
't', $self->{amb_trans_memo});
$tree->add_prefix($_, $id, $esab) foreach (@rpeptides);
}
}
else
{
my $fnucs = _amb_transcription($base);
$tree->add_prefix($_, $id, undef) foreach (@$fnucs);
my $esab = _complement($base, 1);
if ($esab ne $base)
{
my $rnucs = _amb_transcription($esab);
$tree->add_prefix($_, $id, undef) foreach (@$rnucs);
}
}
}
return $tree;
}
=head2 search_prefix_tree
Takes a suffix tree and a sequence and searches for results, which are returned
as in the Bio::GeneDesign::PrefixTree documentation.
my $hits = $GD->search_prefix_tree(-tree => $ptree, -sequence => $mygeneseq);
# @$hits = (['BamHI', 4, 'GGATCC', 'i hope this didn't pop up'],
# ['OhnoI', 21, 'GGCCC', 'I hope these pop up'],
# ['WoopsII', 21, 'GGCCC', 'I hope these pop up']
#);
=cut
sub search_prefix_tree
{
my ($self, @args) = @_;
my ($tree, $seq) = $self->_rearrange([qw(tree sequence)], @args);
$self->throw("no query sequence provided")
unless ($seq);
$self->throw("no suffix tree provided")
unless ($tree);
$self->throw("tree is not a Bio::GeneDesign::PrefixTree")
unless ($tree->isa("Bio::GeneDesign::PrefixTree"));
my $str = $self->_stripdown($seq, q{}, 0);
my @hits = $tree->find_prefixes($str);
return \@hits;
}
=head2 pattern_aligner
=cut
sub pattern_aligner
{
my ($self, @args) = @_;
my ($seq, $pattern, $peptide, $re)
= $self->_rearrange([qw(sequence pattern peptide offset)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no nucleotide sequence provided")
unless $seq;
$re = $re || 0;
my $str = $self->_stripdown($seq, q{}, 1);
$peptide = $peptide || $self->translate(-sequence => $str);
my ($newpattern, $offset) = _pattern_aligner($str,
$pattern,
$peptide,
$self->{codontable},
$self->{amb_trans_memo}
);
return $re ? ($newpattern, $offset) : $newpattern;
}
=head2 pattern_adder
=cut
sub pattern_adder
{
my ($self, @args) = @_;
my ($seq, $pattern) = $self->_rearrange([qw(sequence pattern)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no nucleotide sequence provided")
unless $seq;
my $str = $self->_stripdown($seq, q{}, 1);
my $pat = $self->_stripdown($pattern, q{}, 1);
my $newsequence = _pattern_adder($str,
$pat,
$self->{codontable},
$self->{reversecodontable},
$self->{amb_trans_memo}
);
return $newsequence;
}
=head2 codon_change_type
=cut
sub codon_change_type
{
my ($self, @args) = @_;
my ($codold, $codnew) = $self->_rearrange([qw(from to)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no from sequence provided")
unless $codold;
$self->throw("no to sequence provided")
unless $codnew;
my $changetype = _codon_change_type($codold, $codnew, $self->{codontable});
return $changetype;
}
=head2 translate
=cut
sub translate
{
my ($self, @args) = @_;
my ($seq, $frame) = $self->_rearrange([qw(sequence frame)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no nucleotide sequence provided")
unless $seq;
my $str = $self->_stripdown($seq, q{}, 1);
$frame = $frame ? $frame : 1;
$self->throw("frame must be -3, -2, -1, 1, 2, or 3")
if (abs($frame) > 4 || abs($frame) < 0);
my $peptide = _translate($str, $frame, $self->{codontable});
if (ref $seq)
{
my $newobj = $seq->clone();
my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
$desc = "translated with " . $self->{organism} . " codon values";
$newobj->seq($peptide);
$newobj->alphabet("protein");
$newobj->desc($desc);
return $newobj;
}
else
{
return $peptide;
}
}
=head2 reverse_translate
=cut
sub reverse_translate
{
my ($self, @args) = @_;
my ($pep, $algorithm)
= $self->_rearrange([qw(
peptide
algorithm)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no RSCU table has been defined")
unless $self->{rscutable};
$self->throw("no peptide sequence provided")
unless $pep;
my $str = $self->_stripdown($pep, q{}, 1);
my ($newstr, @baddies) = _sanitize($str, 'pep');
$str = $newstr;
if (scalar @baddies)
{
print "\nGD_WARNING: removed bad characters (", join q{, }, @baddies;
print ") from input sequence\n";
}
$algorithm = $algorithm || "balanced";
$algorithm = lc $algorithm;
$algorithm =~ s{\;}{}xg;
my $name = "_reversetranslate" . "_" . $algorithm;
my $subref = \&$name;
my $seq = &$subref($self->{reversecodontable}, $self->{rscutable}, $str)
|| $self->throw("Can't reverse translate with $algorithm? $!");
if (ref $pep)
{
my $newobj = $pep->clone();
my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
$desc .= "$algorithm reverse translated with " . $self->{organism};
$desc .= " RSCU values";
$newobj->seq($seq);
$newobj->desc($desc);
return $newobj;
}
else
{
return $seq;
}
}
=head2 codon_juggle
=cut
sub codon_juggle
{
my ($self, @args) = @_;
my ($seq, $algorithm)
= $self->_rearrange([qw(
sequence
algorithm)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no RSCU table has been defined")
unless $self->{rscutable};
$self->throw("no nucleotide sequence provided")
unless $seq;
$self->throw("no algorithm provided")
unless $algorithm;
my $str = $self->_stripdown($seq, q{}, 0);
##REPLACE THIS WITH CODE THAT GRACEFULLY JUGGLES JUST CDSES OR GENES
$self->throw("sequence does not appear to be the right length to be a gene")
unless length($str) % 3 == 0;
$algorithm = lc $algorithm;
$algorithm =~ s/\W//xg;
my $name = "_codonJuggle_" . $algorithm;
my $subref = \&$name;
my $newseq = &$subref($self->{codontable},
$self->{reversecodontable},
$self->{rscutable},
$str
) || $self->throw("Can't run $algorithm? $!");
if (ref $seq)
{
my $newobj = $seq->clone();
my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
$desc .= "$algorithm codon juggled with " . $self->{organism};
$desc .= " RSCU values";
$newobj->seq($newseq);
$newobj->desc($desc);
return $newobj;
}
else
{
return $newseq;
}
}
=head2 subtract_sequence
=cut
sub subtract_sequence
{
my ($self, @args) = @_;
my ($seq, $rem) = $self->_rearrange([qw(sequence remove)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no RSCU table has been defined")
unless $self->{rscutable};
$self->throw("no nucleotide sequence provided")
unless $seq;
$self->throw("no sequence to be removed was defined")
unless ($rem);
my $str = $self->_stripdown($seq, q{}, 0);
my $regarr;
my $less = $rem;
if (ref($rem))
{
if ($rem->isa("Bio::Seq") || $rem->isa("Bio::SeqFeatureI"))
{
$less = ref($rem->seq) ? $rem->seq->seq : $rem->seq;
$regarr = _regarr($less, 1);
}
elsif ($rem->isa("Bio::GeneDesign::RestrictionEnzyme"))
{
$less = $rem->seq;
$regarr = $rem->regex;
}
else
{
$self->throw("removal argument is not a Bio::Seq, Bio::SeqFeature, or "
. "Bio::GeneDesign::RestrictionEnzyme");
}
}
else
{
$regarr = _regarr($less, 1);
}
my $newseq = _subtract( uc $str,
uc $less,
$regarr,
$self->{codontable},
$self->{rscutable},
$self->{reversecodontable}
);
if (ref $seq)
{
my $newobj = $seq->clone();
my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
$desc .= $rem->id . " subtracted with " . $self->{organism};
$desc .= " RSCU values";
$newobj->seq($newseq);
$newobj->desc($desc);
return $newobj;
}
else
{
return $newseq;
}
}
=head2 repeat_smash
=cut
sub repeat_smash
{
my ($self, @args) = @_;
my ($seq) = $self->_rearrange([qw(sequence)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no RSCU table has been defined")
unless $self->{rscutable};
$self->throw("no nucleotide sequence provided")
unless $seq;
my $str = $self->_stripdown($seq, q{}, 0);
return _minimize_local_alignment_dp(
$str,
$self->{codontable},
$self->{reversecodontable},
$self->{rscutable}
);
}
=head2 make_amplification_primers
NO TEST
=cut
sub make_amplification_primers
{
my ($self, @args) = @_;
my ($seq, $temp) = $self->_rearrange([qw(sequence temperature)], @args);
$self->throw("no sequence provided") unless ($seq);
$temp = $temp || 60;
my $str = $self->_stripdown($seq, q{}, 0);
return _make_amplification_primers($str, $temp);
}
=head2 contains_homopolymer
Returns 1 if the sequence contains a homopolymer of the provided length (default
is 5bp) and 0 else
=cut
sub contains_homopolymer
{
my ($self, @args) = @_;
my ($seq, $length) = $self->_rearrange([qw(sequence length)], @args);
$self->throw("no sequence provided") unless ($seq);
$length = $length || 5;
my $str = $self->_stripdown($seq, q{}, 0);
return _check_for_homopolymer($str, $length);
}
=head2 filter_homopolymers
=cut
sub filter_homopolymers
{
my ($self, @args) = @_;
my ($seqs, $length) = $self->_rearrange([qw(sequences length)], @args);
$self->throw("no argument provided to filter_homopolymers")
unless $seqs;
$length = $length || 5;
my $arrref = $self->_stripdown($seqs, 'ARRAY', 1);
my $seqarr = _filter_homopolymer( $arrref, $length);
return $seqarr;
}
=head2 search_vmatch
=cut
sub search_vmatch
{
my ($self, @args) = @_;
$self->throw("vmatch searching is not available")
unless $self->vmatch;
my ($parent, $seqobj, $mismatches, $revcom)
= $self->_rearrange([qw(
parent
sequence
mismatches
revcom)], @args);
$self->throw("no nucleotide sequence provided")
unless $seqobj;
$self->throw("sequence argument is not a Bio::Seq object")
unless $seqobj->isa("Bio::Seq");
$self->throw("$seqobj is not a nucleotide sequence")
unless $seqobj->alphabet eq "dna";
$self->throw("no parent sequence provided")
unless $parent;
$self->throw("parent argument is not a Bio::Seq object")
unless $parent->isa("Bio::Seq");
$self->throw("$parent is not a nucleotide sequence")
unless $parent->alphabet eq "dna";
my $writedir = $self->{tmp_path} || "./";
$mismatches = $mismatches || 1;
$revcom = $revcom || 1;
my $hits = _search_vmatch( $parent,
$seqobj,
$mismatches,
$revcom,
$writedir);
return $hits;
}
=head2 filter_blast
=cut
sub filter_blast
{
my ($self, @args) = @_;
$self->throw("BLAST+ filtering is not available")
unless $self->BLAST;
my ($parent, $seqobjs, $percent, $revcom)
= $self->_rearrange([qw(
parent
sequences
percent_identity
revcom)], @args);
$self->throw("no nucleotide sequences provided")
unless $seqobjs;
$self->throw("sequences argument is not an array reference")
unless ref($seqobjs) eq "ARRAY";
foreach my $seqobj (@$seqobjs)
{
$self->throw(ref($seqobj) . " is not a Bio::Seq object")
unless $seqobj->isa("Bio::Seq");
$self->throw("$seqobj is not a nucleotide sequence")
unless $seqobj->alphabet eq "dna";
}
$self->throw("no parent sequence provided")
unless $parent;
$self->throw(ref($parent) . " is not a Bio::Seq object")
unless $parent->isa("Bio::Seq");
$self->throw("$parent is not a nucleotide sequence")
unless $parent->alphabet eq "dna";
my $writedir = $self->{tmp_path} || "./";
$percent = $percent || 75;
$revcom = $revcom || 1;
my $seqarr = _filter_blast( $parent,
$seqobjs,
$percent,
$writedir);
return $seqarr;
}
=head2 carve_building_blocks
NO TEST
=cut
sub carve_building_blocks
{
my ($self, @args) = @_;
my ($chobj, $algorithm, $tarbblen, $maxbblen,
$minbblen, $tarbblap, $stitch, $excludes,
$fpexcludes, $tpexcludes, $verbose)
= $self->_rearrange([qw(
sequence
algorithm
target_length
max_length
min_length
overlap_length
stitch_temp
excludes
fpexcludes
tpexcludes
verbose)], @args);
$self->throw("no nucleotide sequence provided")
unless $chobj;
$self->throw("object " . ref($chobj) . " is not a Bio::Seq")
unless $chobj->isa("Bio::Seq");
$tarbblen = $tarbblen || 1000;
$maxbblen = $maxbblen || 1023;
$minbblen = $minbblen || 200;
$self->throw("Target building block size $tarbblen is outside of allowable " .
"range min $minbblen to max $maxbblen")
if ($tarbblen < $minbblen || $tarbblen > $maxbblen);
$self->throw("Minimum and maximum building block sizes don't make sense")
if ($maxbblen < $minbblen || $maxbblen < 0 || $minbblen < 0 );
$tarbblap = $tarbblap || 40;
$algorithm = $algorithm || "overlap";
$algorithm =~ s/\;//xg;
my $module = "Bio::GeneDesign::BuildingBlocks::$algorithm";
(my $require_name = $module . ".pm") =~ s{::}{/}xg;
my $req = eval
{
require $require_name;
};
if (! $req)
{
$self->throw("BuildingBlocks module for $algorithm not found\n$@");
}
my $name = $module . "::_carve_building_blocks";
my $subref = \&$name;
my $BB_list = [];
my $run = eval
{
$BB_list = &$subref( $self, $chobj, $tarbblen, $maxbblen,
$minbblen, $tarbblap, $stitch,
$excludes, $fpexcludes, $tpexcludes,
$verbose
);
};
my $e;
if ($e = Bio::GeneDesign::Exception::UnBBable->caught())
{
print $chobj->id . " cannot be carved into building blocks: "
. $e->error . "\n";
}
return $BB_list;
}
=head2 chop_oligos
NO TEST
=cut
sub chop_oligos
{
my ($self, @args) = @_;
my ($bbobj, $algorithm, $olilenmin, $olilenmax, $olilen, $laptemp, $laplenmin,
$tmtol, $poolsize, $poolnummax, $verbose)
= $self->_rearrange([qw(
building_block
algorithm
oligo_len_min
oligo_len_max
oligo_len
overlap_tm
overlap_len_min
tm_tolerance
pool_size
pool_num_max
verbose)], @args);
$self->throw("no building block provided")
unless $bbobj;
$self->throw("object " . ref($bbobj) . " is not a Bio::SeqFeatureI")
unless $bbobj->isa("Bio::SeqFeatureI");
$olilen = $olilen || 180;
$olilenmin = $olilenmin || 45;
$olilenmax = $olilenmax || 200;
$laptemp = $laptemp || 70;
$tmtol = $tmtol || .5;
$verbose = $verbose || undef;
$self->throw("maximum oligo length is less than the target oligo length")
if ($olilenmax && $olilen && $olilenmax < $olilen);
$algorithm = $algorithm || "JGI";
$algorithm =~ s/\;//xg;
my $module = "Bio::GeneDesign::Oligos::$algorithm";
(my $require_name = $module . ".pm") =~ s{::}{/}xg;
my $req = eval
{
require $require_name;
};
if (! $req)
{
$self->throw("Oligos module for algorithm $algorithm not found:\n$@\n\n");
}
my $name = $module . "::_chop_oligos";
my $subref = \&$name;
my $OL_list = [];
my $run = eval
{
$OL_list = &$subref( $self, $bbobj, $olilenmax, $olilenmin, $olilen,
$laptemp, $laplenmin, $tmtol, $poolsize,
$poolnummax, $verbose
);
};
my $e;
if ($e = Bio::GeneDesign::Exception::UnOLable->caught())
{
print "Cannot chop into oligos: " . $e->error . "\n";
}
return $OL_list;
}
=head2 make_graph
=cut
sub make_graph
{
my ($self, @args) = @_;
$self->throw("Graphing is not available")
unless $self->{graph};
my ($seqobjs, $window)
= $self->_rearrange([qw(sequences window)], @args);
$self->throw("no codon table has been defined")
unless $self->{codontable};
$self->throw("no RSCU table has been defined")
unless $self->{rscutable};
$self->throw("no nucleotide sequences provided")
unless $seqobjs;
$self->throw("sequences argument is not an array reference")
unless ref($seqobjs) eq "ARRAY";
foreach my $seqobj (@$seqobjs)
{
$self->throw(ref($seqobj) . " is not a Bio::Seq object")
unless $seqobj->isa("Bio::Seq");
$self->throw("$seqobj is not a nucleotide sequence")
unless $seqobj->alphabet eq "dna";
}
my ($graph, $format) = _make_graph( $seqobjs,
$window,
$self->{organism},
$self->{codontable},
$self->{rscutable},
$self->{reversecodontable});
return ($graph, $format);
}
=head2 make_dotplot
=cut
sub make_dotplot
{
my ($self, @args) = @_;
$self->throw("Graphing is not available")
unless $self->{graph};
my ($seq1, $seq2, $window, $stringency)
= $self->_rearrange([qw(first second window stringency)], @args);
$window = $window || 10;
$stringency = $stringency || 10;
$self->throw("no nucleotide sequences provided")
unless ($seq1 && $seq2);
foreach my $seqobj ($seq1, $seq2)
{
$self->throw(ref($seqobj) . " is not a Bio::Seq object")
unless $seqobj->isa("Bio::Seq");
$self->throw("$seqobj is not a nucleotide sequence")
unless $seqobj->alphabet eq "dna";
}
my $graph = _dotplot(
$seq1->seq,
$seq2->seq,
$window,
$stringency
);
return $graph;
}
=head2 import_seqs
NO TEST
=cut
sub import_seqs
{
my ($self, $path) = @_;
$self->throw("$path does not exist.")
if (! -e $path);
my $iterator = Bio::SeqIO->new(-file => $path)
|| $self->throw("Can't parse $path!");
my ($filename, $dirs, $suffix) = fileparse($path, qr/\.[^.]*/x);
$suffix = substr($suffix, 1) if (substr($suffix, 0, 1) eq ".");
$suffix = 'fasta' if ($suffix eq 'fa');
return ($iterator, $filename, $suffix);
}
=head2 export_seqs
NO TEST
=cut
sub export_seqs
{
my ($self, @args) = @_;
my ($filename, $outpath, $outformat, $seqarr)
= $self->_rearrange([qw(
filename
path
format
sequences)], @args);
$outpath = $outpath ? $outpath : ".";
$outpath .= "/" if (substr($outpath, -1, 1) !~ /[ \/ ]/x);
$self->throw("$outpath does not exist") if (! -e $outpath);
$outformat = $outformat ? $outformat : "genbank";
my $module = "Bio::SeqIO::$outformat";
(my $require_name = $module . ".pm") =~ s{::}{/}xg;
my $flag = eval
{
require $require_name;
};
$self->throw("$outformat is not a format recognized by BioPerl")
unless ($outformat && $flag);
#Long attributes that come in from a genbank file will have corruption
#remove spaces and reattribute to fix bbs in genbank file ):
if ($outformat eq 'genbank')
{
foreach my $seq (@{$seqarr})
{
foreach my $feat ($seq->get_SeqFeatures)
{
foreach my $tag ($feat->get_all_tags())
{
my $value = join(q{}, $feat->get_tag_values($tag));
$value =~ s/\s//xg;
$feat->remove_tag($tag);
$feat->add_tag_value($tag, $value);
}
}
}
}
my $path = $outpath . $filename;
open (my $OUTFH, '>', $path ) || $self->throw("Can't write to $path? $!");
my $FOUT = Bio::SeqIO->new(-fh => $OUTFH, -format => $outformat);
$FOUT->write_seq($_) foreach (@$seqarr);
close $OUTFH;
return;
}
=head2 random_dna
=cut
sub random_dna
{
my ($self, @args) = @_;
my ($rlen, $rgc, $rstop)
= $self->_rearrange([qw(
length
gc_percentage
no_stops)], @args);
$self->throw("no codon table has been defined")
if ($rstop && ! $self->{codontable});
$rgc = $rgc || 50;
$self->throw("gc_percentage must be between 0 and 100")
if ($rgc && ($rgc < 0 || $rgc > 100));
if (! $rlen || $rlen < 1)
{
return q{};
}
elsif ($rlen == 1)
{
return $rgc ? _randombase_weighted($rgc) : _randombase;
}
return _randomDNA($rlen, $rgc, $rstop, $self->{codontable});
}
=head2 replace_ambiguous_bases
=cut
sub replace_ambiguous_bases
{
my ($self, $seq) = @_;
$self->throw("no sequence provided ")
unless ($seq);
my $str = $self->_stripdown($seq, q{}, 1);
my $newstr = _replace_ambiguous_bases($str);
if (ref $seq)
{
my $newobj = $seq->clone();
my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
$desc .= "deambiguated";
$newobj->seq($newstr);
$newobj->desc($desc);
return $newobj;
}
else
{
return $newstr;
}
}
=head1 PLEASANTRIES
=head2 pad
my $name = 5;
my $nice = $GD->pad($name, 3);
$nice == "005" || die;
$name = "oligo";
$nice = $GD->pad($name, 7, "_");
$nice == "__oligo" || die;
Pads an integer with leading zeroes (by default) or any provided set of
characters. This is useful both to make reports pretty and to standardize the
length of designations.
=cut
sub pad
{
my ($self, $num, $thickness, $chars) = @_;
my $t = $num;
$chars = $chars || "0";
$t = $chars . $t while (length($t) < $thickness);
return $t;
}
=head2 attitude
my $adverb = $GD->attitude();
Ask GeneDesign how it handled your request.
=cut
sub attitude
{
my @adverbs = qw(Elegantly Energetically Enthusiastically Excitedly Daintily
Deliberately Diligently Dreamily Courageously Cooly Cleverly Cheerfully
Carefully Calmly Briskly Blindly Bashfully Absentmindedly Awkwardly
Faithfully Ferociously Fervently Fiercely Fondly Gently Gleefully Gratefully
Gracefully Happily Helpfully Heroically Honestly Joyfully Jubilantly
Jovially Keenly Kindly Knowingly Kookily Loftily Lovingly Loyally
Majestically Mechanically Merrily Mostly Neatly Nicely Obediently Officially
Optimistically Patiently Perfectly Playfully Positively Powerfully
Punctually Properly Promptly Quaintly Quickly Quirkily Rapidly Readily
Reassuringly Righteously Sedately Seriously Sharply Shyly Silently Smoothly
Solemnly Speedily Strictly Successfully Suddenly Sweetly Swiftly Tenderly
Thankfully Throroughly Thoughtfully Triumphantly Ultimately Unabashedly
Utterly Upliftingly Urgently Usefully Valiantly Victoriously Vivaciously
Warmly Wholly Wisely Wonderfully Yawningly Zealously Zestfully
);
my $index = _random_index(scalar(@adverbs));
return $adverbs[$index];
}
=head2 endslash
=cut
sub endslash
{
my ($self, $path) = @_;
if (substr($path, -1, 1) ne q{/})
{
$path .= q{/};
}
return $path;
}
=head2 _stripdown
=cut
sub _stripdown
{
my ($self, $seqarg, $type, $enz_allowed) = @_;
$enz_allowed = $enz_allowed || 0;
my @seqs = ref($seqarg) eq "ARRAY" ? @$seqarg : ($seqarg);
my @list;
foreach my $seq (@seqs)
{
my $str = $seq;
my $ref = ref($seq);
if ($ref)
{
my $bit = $self->_checkref($seq, $enz_allowed);
$self->throw("object $ref is not a compatible object $bit") if ($bit < 1);
$str = ref($seq->seq) ? $seq->seq->seq : $seq->seq;
}
push @list, uc $str;
}
return \@list if ($type eq "ARRAY");
return $list[0];
}
=head2 _checkref
=cut
sub _checkref
{
my ($self, $pobj, $enz_allowed) = @_;
my $ref = ref $pobj;
return -1 if (! $ref);
$enz_allowed = $enz_allowed || 0;
my ($bioseq, $bioseqfeat) = (0, 0);
$bioseq = $pobj->isa("Bio::Seq");
$bioseqfeat = $pobj->isa("Bio::SeqFeatureI");
if ($enz_allowed)
{
$enz_allowed = $pobj->isa("Bio::GeneDesign::RestrictionEnzyme");
}
return $bioseq + $bioseqfeat + $enz_allowed;
}
1;
__END__
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2013, GeneDesign developers
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
* The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
National Laboratory, the Department of Energy, and the GeneDesign developers may
not be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=cut