The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl
# Brian Osborne
# script to run genscan on all nucleotide sequences in a fasta file
# and save results as the fasta files <file>.gs.pept and <file>.gs.cds,
# and <file>.gs.exons

use Bio::SeqIO;
use Bio::Seq;
use Getopt::Long;
use Bio::Tools::Genscan;
use strict;

# GENSCAN matrix
my $matrix = "/home/bosborne/src/genscan/HumanIso.smat";

my ($file,$i);

GetOptions( "f|file=s" => \$file );
usage() if ( !$file );

my $pept_out = Bio::SeqIO->new(-file   => ">$file.gs.pept",
			       -format => "fasta");
my $cds_out = Bio::SeqIO->new(-file   => ">$file.gs.cds",
			      -format => "fasta");
my $exons_out = Bio::SeqIO->new(-file   => ">$file.gs.exons",
				-format => "fasta");

my $in = Bio::SeqIO->new(-file => $file , -format => 'Fasta');

while ( my $seq = $in->next_seq() ) {
   die "Input sequence is protein\n" if ( $seq->alphabet eq 'protein' );

   # create temp file, input to GENSCAN
   my $temp_out = Bio::SeqIO->new(-file   => ">temp.fa",
				   -format => "fasta");
   $temp_out->write_seq($seq);

   my $file_id = $seq->display_id;
   $file_id =~ s/\|/-/g;

   system "genscan $matrix temp.fa -cds > $file_id.gs.raw";
   unlink "temp.fa";

   my $genscan = Bio::Tools::Genscan->new( -file => "$file_id.gs.raw");
   while ( my $gene = $genscan->next_prediction() ) {
      $i++;
      my $pept = $gene->predicted_protein;
      my $cds = $gene->predicted_cds;
      my @exon_arr = $gene->exons;

      if ( defined $cds  ) {
	 my $cds_seq = Bio::Seq->new(-seq => $cds->seq,
				     -display_id => $cds->display_id);
	 $cds_out->write_seq($cds_seq);
      }

      if ( defined $pept ) {
	 my $pept_seq = Bio::Seq->new(-seq => $pept->seq,
				      -display_id => $pept->display_id);
	 $pept_out->write_seq($pept_seq);
      }

      for my $exon (@exon_arr) {
	 my $desc = $exon->strand . " " . $exon->start . "-" . $exon->end .
	   " " . $exon->primary_tag . " " . "GENSCAN_predicted_$i";
	 my $exon_seq = Bio::Seq->new(-seq => $seq->subseq($exon->start,
							   $exon->end),
				      -display_id => $seq->display_id,
				      -desc => $desc );
	 $exons_out->write_seq($exon_seq);
      }
   }
   $genscan->close();
   unlink "$file_id.gs.raw";
}

sub usage {
    print "
Usage    : $0 -f <file>
Function : run genscan on all nucleotide sequences in a multiple fasta file
Output   : <file>.gs.pept, <file>.gs.cds, <file>.gs.exons

";
    exit;
}