The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl

# see http://zfish.nichd.nih.gov/tools/subsequence.cgi

# uncomment and modify the next two lines 
#  if your perl is in a nonstandard directory
#use lib '/disk3/local/lib/perl5/site_perl';
#use lib '/disk3/local/lib/perl5/';

use CGI qw/:standard :html3/;
use Bio::DB::GenBank;
use File::Temp;
use FileHandle;

print header,
  start_html(-title => 'find subsequence of large GenBank entries',-author => 'Jonathan_Epstein\@nih.gov');
print_form()    unless param;
print_results() if param;

sub print_results {
  $gb = new Bio::DB::GenBank;
  $accession = param('accession');
  eval {
    $seq = $gb->get_Seq_by_acc($accession); # Accession Number
  };
  if ($@) {
    print "***ERROR: accession $accession not found***\n";
    return;
  }
  $segment_start = param('start');
  $segment_end = param('length_or_end_value');
  $segment_end = $segment_start+$segment_end-1 if param('length_or_end_choice') eq 'Length';
  if ($segment_end<$segment_start || $segment_start<0) {
    print "***ERROR: invalid segment start and end values:$segment_start,$segment_end***\n";
    return;
  }
  $len = $seq->length();
  if ($segment_end>$len) {
    print "***ERROR: maximum length $len exceeded***\n";
    return;
  }
  $subseq = $seq->subseq ($segment_start,$segment_end);
  
  $name = "subsequence of $accession";
  $strand = "+";
  $strand = "-" if (param('reverse'));
  
  # For some reason, there seems to be a problem if you use the file
  # handle provided by File::Temp.  Similarly, there's a problem if you
  # pass a filename to BioPerl below rather than a file handle.  However,
  # constructing our own file handle and then passing it to BioPerl works
  # fine.
  (undef, $filename) = File::Temp::tempfile();
  $fh = new FileHandle "> $filename";
  $seqoutlong = Bio::SeqIO->new( '-format' => 'Fasta',-fh => $fh);
  $seqobj = Bio::PrimarySeq->new ( -seq => $subseq,
				   -id  => $name . "[length:$len]:" . $segment_start . "-" . $segment_end . "(" . $strand . "strand)",
				   -moltype => 'dna'
				 );
  $seqobj = $seqobj->revcom if ($strand ne "+");
  $seqoutlong->write_seq($seqobj);
  $fh->close;
  undef $fh;
  
  # Now we parse the FASTA file which was just generated, and perform
  # some simple conversions to HTML.   
  open (TEMPORARY, "<$filename") or die "unable to open temporary file $filename\n";
  print "<tt>\n";
  while (<TEMPORARY>) {
    print $_;
    print "<br>\n";
  }
  close TEMPORARY;
  print "</tt>\n";
  unlink $filename;
}

sub print_form {
  print p("This web page permits you to extract a short subsequence of DNA from a large GenBank entry.  This is especially useful in an era of huge \"contigs\" of genomic DNA, where you only want to extract a few hundred base pairs for subsequent analysis.\n");
  
  print p,"This program also illustrates the power of ",a({-href => 'http://www.BioPerl.org/'}, "BioPerl"), ", a powerful set of tools for molecular biology analysis.  The ", a({-href => 'subsequence.pl.txt'}, "source code"), " for this program is less than 90 lines long.\n";
  
  print p,"You must specify the GenBank accession number along with a start position.  You may specify either the length of the subsequence you wish to extract or, equivalently, the endpoint.\n";
  
  print "The sequence may be reverse-complemented if you wish, e.g., the reverse complement of <font color=green>ATCGC</font> is <font color=yellow>GCGAT</font>.\n";
  
  print p,"To test this web page, try accession NT_004002, start 50000, length 400.\n";
  
  print start_form,table(
			 Tr(td("Enter your GenBank accession"),td(textfield(-name => 'accession',-size => 20))),
			 Tr(td("Start position"),td(textfield(-name => 'start',-size => 10))),
			 Tr(td("Specify length or end position"), td(radio_group (-name => 'length_or_end_choice',-values => [Length, End], default => Length))),
			 Tr(td("Length or end position"), td(textfield (-name => length_or_end_value,-size => 20))),
			 Tr(td("Reverse complement?"), td(checkbox (-name => 'reverse')))),
    submit ("Find my subsequence");
  
  print hr(),"Credits: Jonathan Epstein (Jonathan_Epstein\@nih.gov)";

}