The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
package Bio::Tools::DNAGen;
use 5.006;
use strict;

our $VERSION = '0.02';

use XSLoader;
XSLoader::load 'Bio::Tools::DNAGen';

use Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(calc_gcratio calc_mt);

use List::Util qw(shuffle);

# Standalone Functions
sub calc_gcratio { gcratio($_[0]) }
sub calc_mt { mt($_[0]) }

sub subseq {
    map{substr($_, length($_)-1)}grep{!is_selfcplm($_)}map{substr($_[0],1).$_} shuffle qw/a c g t/;
}


sub new {
    my $pkg = shift;
    my $arg = ref($_[0]) ? $_[0] : {@_};
    bless {
	gcratio => $arg->{gcratio},
	mt => $arg->{mt},
	limit => $arg->{limit} || 1,
	prefix => $arg->{prefix} || join(q//, subseq),
	len => $arg->{len} || 10,
	_result => '',
	_seqcnt => 0,
    }, $pkg;
}

sub set_limit   { $_[0]->{limit} = $_[1] || 1 }
sub set_gcratio { $_[0]->{gcratio} = (ref($_[1]) ? $_[1] : [@_[1..$#_]]) || undef }
sub set_mt      { $_[0]->{mt} = (ref($_[1]) ? $_[1] : [@_[1..$#_]]) || undef }
sub set_prefix  { $_[0]->{prefix} = $_[1] || join (q//, subseq) }
sub set_len     { $_[0]->{len} = $_[1] || 10 }


sub genseq($) {
    $_[0]->{_seqcnt} = 0;
    $_[0]->{_result} = undef;
    die "Prefix's length is greater than sequence's length\n" if length($_[0]->{prefix}) > $_[0]->{len};
    _genseq($_[0], $_[0]->{prefix});
    grep{$_}split /\n/, $_[0]->{_result};
}

use subs qw/_genseq/;
sub _genseq {
    my $prefix = $_[1];
    return if length $prefix == $_[0]->{len};
    if(length $prefix == $_[0]->{len}-1){
	for (
	     grep {
		 if(defined $_[0]->{mt} && ref($_[0]->{mt})){
		     if(@{$_[0]->{mt}} >= 2){
			 mt($_) >= $_[0]->{mt}->[0] && mt($_) <= $_[0]->{mt}->[1];
		     }
		     else{
			 mt($_) == $_[0]->{mt}->[0];
		     }
		 }
		 else{
		     $_;
		 }
	     }
	     grep {
		 if(defined $_[0]->{gcratio} && ref($_[0]->{gcratio})){
		     if(@{$_[0]->{gcratio}} >= 2){
			 gcratio($_) >= $_[0]->{gcratio}->[0] && gcratio($_) <= $_[0]->{gcratio}->[1];
		     }
		     else{
			 gcratio($_) == $_[0]->{gcratio}->[0];
		     }
		 }
		 else{
		     gcratio($_);
		 }
	     }
	     map{$prefix.$_} subseq $prefix){

	    if(++$_[0]->{_seqcnt} <= $_[0]->{limit}){
		$_[0]->{_result} .= $_."\n";
	    }
	    else{
		return;
	    }
	}
    }
    return if $_[0]->{_seqcnt} > $_[0]->{limit};
    map { _genseq($_[0], $prefix.$_) } subseq($prefix);
    return;
}

1;
__END__
# Below is stub documentation for your module. You better edit it!

=head1 NAME

Bio::Tools::DNAGen - Generating a pool of DNA sequences

=head1 SYNOPSIS

  use Bio::Tools::DNAGen;
  $gen = Bio::Tools::DNAGen;

  $gen->set_gcratio(50);

  $gen->set_prefix('acgt');

  $gen->getseq($len) for 1..10;

=head1 DESCRIPTION

This module is a tool for generating a pool of DNA sequences which meet some logical and physical requirements such as uniqueness, melting temperature and GC ratio. When you have set the parameters, this module will create an array of sequences for you.

=head1 USAGE

=head2 new

Constructor.

You may specify all the parameters here.

 $gen = Bio::Tools::DNAGen->new(
				gcratio => 50,
				mt => 30,
				limit => 10,
				prefix => 'acgt',
				len => 10,
				);


=head2 set_gcratio

Setting for the GC ratio.

You can give it a specific value, like

    $gen->set_gcratio(50);

or an array to say a range

    $gen->set_gcratio([40, 60]);

    # or

    $gen->set_gcratio(40, 60);

Default is 'undef', which means gc-ratio is not related to sequence selection.

=head2 set_mt

Setting for the melting temperature. For now, Wallace formula is adopted for calculation.

You can give it a specific value, like

    $gen->set_mt(30);

or an array to say a range

    $gen->set_mt([20, 30]);

    # or

    $gen->set_mt(20, 30);

Default is 'undef', which means melting temperature is not related to sequence selection.

=head2 set_len

    $gen->set_len(15);

Setting for the length of sequences to be generated.

Default is '10'.

=head2 set_prefix

    $gen->set_prefix('acgt');

Setting for the common prefix of sequences to be generated.

Default is a random prefix of length 4 composed of qw/a c g t/, and all the substrings of prefix's length in generated sequences will be checked for their uniqueness.

=head2 set_limit

    $gen->set_limit(20);

Setting for the number of sequences to be generated.

Default is '1'.

=head2 genseq

    print join $/, $gen->genseq;

Generating DNA sequences that meet your requirements.

=head1 EXPORTS

This module also automatically exports two utilities.

=head2 calc_gcratio

    print calc_gcratio($seq);

It returns the GC ratio of $seq 

=head2 calc_mt

    print calc_mt($seq);

It returns the melting temperature of $seq 

=head1 SEE ALSO

B<DNASequenceGenerator: A Program for the Construction of DNA Sequences> by I<Udo Feldkamp, Sam Saghafi, Wolfgang Banzhaf, and Rauhe>

=head1 COPYRIGHT

xern E<lt>xern@cpan.orgE<gt>

This module is free software; you can redistribute it or modify it under the same terms as Perl itself.

=cut