The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::Graphics::Browser2::Plugin::OligoFinder;
# $Id: OligoFinder.pm,v 1.9 2004-08-23 15:56:31 lstein Exp $
# test plugin
use strict;
use Bio::Graphics::Browser2::Plugin;
use Bio::Graphics::Feature;
use Text::Shellwords;
use DBI;
use CGI qw(:standard *table);

use vars '$VERSION','@ISA';
$VERSION = '0.15';

@ISA = qw(Bio::Graphics::Browser2::Plugin);

sub name { "Short Oligos" }

sub description {
  p("The oligo finder plugin finds oligos between 11 and 50 bp in length.",
    "It does a slow search, making it suitable only for small (<150 MB) genomes.",
    "[NOTE TO SYSADMINS: The browser must use the Bio::DB::GFF schema for this plugin to work.]").
  p("This plugin was written by Lincoln Stein.");
}

sub type { 'finder' }

sub config_defaults {
  my $self = shift;
  return { };
}

# we have no stable configuration
# sub reconfigure { }

sub configure_form {
  my $self = shift;
  my $oligo = $self->config_param('searcholigo');
  my $msg  =  $oligo && !$self->valid_oligo($oligo)
              ? font({-color=>'red'},"Invalid oligo: either too short or not DNA")
	      : '';
  return $msg .
    table(TR({-class=>'searchtitle'},
	     th({-colspan=>2,-align=>'LEFT'},
		'Enter an oligonucleotide between 12 and 50 bp in length.',
		'The browser will identify all genomic regions that contain',
		'this oligo.  This is NOT a fast algorithm, so have patience.')),
	  TR({-class=>'searchbody'},
	     th('Enter oligo:'),
	     td(textfield(-name=>$self->config_name('searcholigo'),
			  -size=>50,-width=>50))
	    )
	 );
}

# find() returns undef unless the OligoFinder.searcholigo parameter
# is specified and valid.  Returning undef signals the browser to invoke the
# configure_form() method.
# If successful, it returns an array ref of Bio::SeqFeatureI objects.
sub find {
  my $self     = shift;
  my $segments = shift; # current segments - can search inside them or ignore
                        # In this example we do a global search.

  my $oligo = lc $self->config_param('searcholigo');
  $self->auto_find($oligo);
}

# auto_find() does the actual work
# It is also called by the main page as a last resort when the user
# types something into the search box that isn't recognized.
sub auto_find {
  my $self  = shift;
  my $oligo = lc shift;

  $self->valid_oligo($oligo) or return;

  (my $reversec = $oligo) =~ tr/gatcGATC/ctagCTAG/;
  $reversec = reverse $reversec;
  my $length = length $oligo;

  my $db    = $self->database or die "I do not have a database";
  my $dbi   = $db->features_db;

  my @chroms = $self->get_chroms($db);
  my $in     = join ',',map {$dbi->quote($_)} @chroms;
  my $sth =
    $dbi->prepare("select fref,foffset,fdna from fdna where fref in ($in) order by fref,foffset",
		  { 'mysql_use_result' => 1}) or die "Couldn't prepare ",$db->errstr;
  $sth->execute or die "Couldn't execute ",$db->errstr;

  my $bit_to_keep = length($oligo) - 1;
  my ($current_ref,$offset,$dna,@results);
  
  while (my ($ref,$off,$d) = $sth->fetchrow_array) {
    if (!defined($current_ref) or $current_ref ne $ref) {
      $offset = 0;
      $dna    = '';
    }
    $current_ref = $ref;

    # truncate all but the last length(oligo)-1 bases
    substr($dna,0,length($dna)-$bit_to_keep) = '';
    $offset  = $off - length($dna);
    $dna    .= lc $d;

    my @forward = $self->exact_matches($dna,$oligo);
    my @reverse = $self->exact_matches($dna,$reversec);

    for ([$oligo=>\@forward],[$reversec=>\@reverse]) {
      my ($name,$pos) = @$_;
      for my $p (@$pos) {
	push @results, Bio::Graphics::Feature->new(-ref   => $ref,
						   -type  => 'oligo',
						   -name  => $name,
						   -start => $offset+$p+1,
						   -score => '100%',
						   -end   => $offset+$p+length($oligo),
						   -factory=> $db,
						  )
      }
    }
  }
  return \@results;
}

sub exact_matches {
  my $self   = shift;
  my ($dna,$oligo) = @_;
  my @results;
  my $offset = 0;
  while ((my $i = index($dna,$oligo,$offset)) >= 0) {
    push @results,$i;
    $offset = $i+length($oligo);
  }
  @results;
}

# This is a slowish query, so cache the results per-source.
# Bio::DB::GFF has no concept of the reference DNA, so we just look
# inside segments of DNA that are more than 20K in length and hence
# likely to represent genomic.
sub get_chroms {
  my $self = shift;
  my $db   = shift;
  my $dbi  = $db->features_db;
  my $source = $self->browser_config();
  my $chroms;
  my @chroms = shellwords($self->browser_config->plugin_setting('search_segments'));
  if (@chroms) {
    $chroms = \@chroms;
  }
  else {
    return @{$self->{chroms}{$source}} if ref($self->{chroms}{$source});
    $chroms = $dbi->selectcol_arrayref('select fref from fdna group by fref having count(fref)>10');
  }
  $self->{chroms}{$source} = $chroms;
  return @$chroms;
}

# return true if a valid oligo
sub valid_oligo {
  my $self = shift;
  my $oligo = shift or return;
  return $oligo =~ /^[gatcn]{11,50}$/i
}

1;