The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::Graphics::Browser2::Plugin::Aligner;
# $Id: Aligner.pm,v 1.13 2008-09-18 15:27:07 lstein Exp $

use strict;
use Bio::Graphics::Browser2::Plugin;
use CGI qw(table a TR td th p popup_menu radio_group checkbox checkbox_group h1 h2 pre);
use Bio::Graphics::Browser2::Realign 'align_segs';
use Bio::Graphics::Browser2::PadAlignment;
use Bio::Graphics::Browser2::Util 'shellwords';

use constant DEBUG => 0;
use constant DEFAULT_RAGGED_ENDS => (0,10,25,50,100,150,500);

use vars '$VERSION','@ISA';
$VERSION = '0.23';
@ISA = qw(Bio::Graphics::Browser2::Plugin);

use constant TARGET    => 0;
use constant SRC_START => 1;
use constant SRC_END   => 2;
use constant TGT_START => 3;
use constant TGT_END   => 4;

sub name { "Alignments" }

sub description {
  p("This plugin prints out a multiple alignment of the selected features.",
    'It was written by',a({-href=>'mailto:lstein@cshl.org'},'Lincoln Stein.')
   );
}

sub init {
  my $self = shift;
  my $browser_conf = $self->browser_config;
  my @alignable       = shellwords($browser_conf->plugin_setting('alignable_tracks'));
  @alignable = grep {$browser_conf->setting($_=>'draw_target') } $browser_conf->labels
    unless @alignable;
  $self->{alignable} = \@alignable;

  my @upcase          = shellwords($browser_conf->plugin_setting('upcase_tracks'));
  $self->{upcase}     = \@upcase;

  my @ragged          = shellwords($browser_conf->plugin_setting('ragged_ends'));
  @ragged             = DEFAULT_RAGGED_ENDS unless @ragged;
  $self->{ragged}     = \@ragged;

  $self->{upcase_default} = $browser_conf->plugin_setting('upcase_default');
  $self->{align_default}  = $browser_conf->plugin_setting('align_default')
                           ? [shellwords($browser_conf->plugin_setting('align_default'))]
			   : \@alignable;
  $self->{ragged_default} = $browser_conf->plugin_setting('ragged_default');

}

sub config_defaults {
  my $self = shift;
  return { align  => @{$self->{align_default}} ? $self->{align_default} : $self->{alignable},
	   upcase => $self->{upcase}[0]
	 };
}

sub configure_form {
  my $self    = shift;
  my $current = $self->configuration;
  my $browser = $self->browser_config;
  my $html;
  if ($self->{upcase}) {
    my %labels = map {$_ => $browser->setting($_=>'key') || $_} @{$self->{upcase}};
    $html .= TR(
		th('Features to render uppercase:'),
		td(radio_group(-name    => $self->config_name('upcase'),
			       -values  => ['none',@{$self->{upcase}}],
			       -default  => $current->{upcase} || $self->{upcase_default} || 'none',
			       -labels   => \%labels,
			       @{$self->{upcase}} > 4 ? (-cols     => 4) : ()
			      ))
	       );
  }
  if ($self->{alignable} && @{$self->{alignable}}) {
    my %labels = map {$_ => $browser->setting($_=>'key') || $_} @{$self->{alignable}};
    $html .= TR(
		th('Features to include in alignment:'),
		td(checkbox_group(-name     => $self->config_name('align'),
				  -values   => $self->{alignable},
				  -defaults => $current->{align},
				  -labels   => \%labels,
				  @{$self->{alignable}} > 4 ? (-cols     => 4) : ()
				 )));
  }
  $html .= TR(
	      th({-colspan=>2,-align=>'left'},
		 'Allow up to',popup_menu(-name     => $self->config_name('ragged'),
					  -values   => $self->{ragged},
					  -default  => $current->{ragged} || $self->{ragged_default} || 0),
		 ' bp of unaligned sequence at ends.')
	      );
  return $html ? table({-class=>'searchtitle'},$html) : undef;
}

sub reconfigure {
  my $self = shift;
  my $current = $self->configuration;
  my @align   = $self->config_param('align');
  my $upcase  = $self->config_param('upcase');
  $current->{align}  = \@align;
  $current->{upcase} = $upcase eq 'none' ? undef : $upcase;
  $current->{ragged} = $self->config_param('ragged');
  $current->{flip} = $self->config_param('flip');
}

sub mime_type { 'text/html' }

sub dump {
  my $self    = shift;
  my $segment = shift;

  unless ($segment) {
    print "No sequence specified.\n";
    exit 0;
  }

  my $database      = $self->database;
  my $browser       = $self->browser_config;
  my $configuration = $self->configuration;

#  $configuration->{flip} = $self->page_settings->{flip};

  my $flipped = $configuration->{flip} ? " (reverse complemented)" :'';
  print h1("Alignments for $segment$flipped");

  my $ref_dna = lc $segment->dna;

  if ($segment->strand < 0) {  # don't ask
    $ref_dna    = reversec($ref_dna);
    $configuration->{flip} = 1;
  }

  my ($abs_start,$abs_end) = ($segment->start,$segment->end);

  # do upcasing
  if (my $upcase_track  = $configuration->{upcase}) {
    my @upcase_types    = shellwords($browser->setting($upcase_track=>'feature'));
    my @upcase_features = $segment->features(-types=>\@upcase_types);
    for my $f (@upcase_features) {
      my @segments = $f->segments;
      @segments    = $f unless @segments;
      for my $s (@segments) {
	my $upstart   = $s->low-$abs_start;
	my $uplength  = $s->length;
	$upstart      = 0 if $upstart < 0;
	$uplength     = length($ref_dna) if $uplength > length($ref_dna);
	substr($ref_dna,$upstart,$uplength) =~ tr/a-z/A-Z/;
      }
    }
  }

  # here's where we handle aligned objects
  my @feature_types = map {shellwords($browser->setting($_=>'feature'))} @{$configuration->{align}};
  my @features      = $segment->features(-types=>\@feature_types);

  my (@segments,%strands);

  for my $f (@features) {
    warn "f strand = ",$f->strand if DEBUG;
    my @s = $f->segments;
    @s    = $f unless @s;
    @s    = grep {$abs_start<=$_->abs_end && $abs_end>=$_->abs_start} @s;

    for my $s (@s) {
      my $target = $s->target;
      my ($src_start,$src_end) = ($s->start,$s->end);
      my ($tgt_start,$tgt_end) = ($target->start,$target->end);

      my $flip_bug;

      unless (exists $strands{$target}) {
	my $strand = $f->strand;
	if ($tgt_start > $tgt_end) {
	  $strand    = -1;
	  ($tgt_start,$tgt_end) = ($tgt_end,$tgt_start);
	  $flip_bug++;
	}
	$strands{$target}         = $strand;
	$strands{$target->seq_id} = $strand;
      }

      # Realign the segment a bit
      my ($sdna,$tdna) = ($s->dna,$target->dna);
      if ($flip_bug) {
	$sdna = reversec($sdna);
	$tdna = reversec($tdna);
      }
      warn "raw alignment:\n" if DEBUG;
      warn   $sdna,"\n",$tdna,"\n" if DEBUG;
      warn   "Realigning [$target,$src_start,$src_end,$tgt_start,$tgt_end].\n" if DEBUG;
      my @result = $self->realign($sdna,$tdna);
      foreach (@result) {
	warn "=========> [$target,@$_]\n" if DEBUG;
	my $a = $strands{$target} >= 0 ? [$target->seq_id,$_->[0]+$src_start,$_->[1]+$src_start,$_->[2]+$tgt_start,$_->[3]+$tgt_start]
	                               : [$target->seq_id,$src_end-$_->[1],$src_end-$_->[0],$_->[2]+$tgt_start,$_->[3]+$tgt_start];
	warn "[$target,$_->[0]+$src_start,$_->[1]+$src_start,$tgt_end-$_->[3],$tgt_end-$_->[2]]" if DEBUG;
	warn "=========> [@$a]\n" if DEBUG;
	warn substr($sdna,     $_->[0],$_->[1]-$_->[0]+1),"\n" if DEBUG;
	warn substr($tdna,$_->[2],$_->[3]-$_->[2]+1),"\n"      if DEBUG;
	push @segments,$a;
      }
    }
  }

  # We're now going to do all the alignments
  my %clip;
  for my $seg (@segments) {

    warn "clipping [@$seg]\n" if DEBUG;
    my $target = $seg->[TARGET];

    # left clipping
    if ( (my $delta = $seg->[SRC_START] - $abs_start) < 0 ) {
      warn "clip left $delta" if DEBUG;
      $seg->[SRC_START] = $abs_start;
      if ($strands{$target} >= 0) {
	$seg->[TGT_START] -= $delta;
      }
      warn "Left clipping gives [@$seg]\n" if DEBUG;
    }

    # right clipping
    if ( (my $delta = $abs_end - $seg->[SRC_END]) < 0) {
      warn "clip right $delta" if DEBUG;
      $seg->[SRC_END] = $abs_end;
      if ($strands{$target} < 0) {
	$seg->[TGT_START] -= $delta;
      }
      warn "Right clipping gives [@$seg]\n" if DEBUG;
    }

    my $length = $seg->[SRC_END]-$seg->[SRC_START]+1;
    $seg->[TGT_END] = $seg->[TGT_START]+$length-1;

    warn "Clipping gives [@$seg]\n" if DEBUG;
    $clip{$target}{low} = $seg->[TGT_START]
      if !defined $clip{$target}{low} || $clip{$target}{low} > $seg->[TGT_START];
    $clip{$target}{high} = $seg->[TGT_END]
      if !defined $clip{$target}{high} || $seg->[TGT_END] > $clip{$target}{high};
  }

  my $ragged = $configuration->{ragged} || 0;

  # sort aligned sequences from left to right and store them in the data structure
  # needed by Bio::Graphics::Browser2::PadAlignment
  my @sequences = ($segment->seq_id => $ref_dna);

  my %seqs;
  for my $t (sort {$clip{$a}{low}<=>$clip{$b}{low}} keys %clip) {

    # adjust for ragged ends
    $clip{$t}{low}  -= $ragged;
    $clip{$t}{high} += $ragged;

    $clip{$t}{low}   = 1 if $clip{$t}{low} < 1;

    my @order = $strands{$t}>=0?('low','high'):('high','low');
    my $dna = lc $database->dna($t,@{$clip{$t}}{@order});
    push @sequences,($t => $dna);  # dna() api gives implicit reversec

    # sanity check - needed for adjusting for ragged ends
    warn "$t low = $clip{$t}{low}, dna = $dna\n" if DEBUG;
    warn "expected ",$clip{$t}{high}-$clip{$t}{low}+1," and got ",length($dna) if DEBUG;
    $clip{$t}{high} = $clip{$t}{low}+length($dna)-1 if $clip{$t}{high} > $clip{$t}{low}+length($dna)-1;
  }

  for my $seg (@segments) {
    my ($target,$src_start,$src_end,$tgt_start,$tgt_end) = @$seg;
    warn "clip high = $clip{$target}{high}" if DEBUG;
    warn "was [$target,$src_start,$src_end,$tgt_start,$tgt_end]" if DEBUG;
    $seg->[SRC_START] -= $abs_start;
    $seg->[SRC_END]   -= $abs_start;

    if ($strands{$target} >= 0) {
      $seg->[TGT_START] -= $clip{$target}{low};
      $seg->[TGT_END]   -= $clip{$target}{low};
    } else {
      @{$seg}[TGT_START,TGT_END] = ($clip{$target}{high} - $seg->[TGT_END],
				    $clip{$target}{high} - $seg->[TGT_START]);
    }
    ($target,$src_start,$src_end,$tgt_start,$tgt_end) = @$seg;
    warn "is  [$target,$src_start,$src_end,$tgt_start,$tgt_end]" if DEBUG;
  }

  # remove segments that got clipped out of existence
  @segments = grep { $_->[SRC_START]<=$_->[SRC_END] } @segments;

  if (DEBUG) {
    warn "DEBUG:";
    my %sequences = @sequences;
    foreach (@segments) {
      my ($t,$s,$e,$ts,$te) = @$_;
      warn "[@$_]\n";
      warn substr($sequences{$segment->display_name},$s,$e-$s+1),"\n";
      warn substr($sequences{$t},$ts,$te-$ts+1),"\n";
    }
  }

  my $align = Bio::Graphics::Browser2::PadAlignment->new(\@sequences,\@segments);
  my %offsets = map {$_ => $strands{$_} >= 0 ? $clip{$_}{low} : -$clip{$_}{low}} keys %clip;
  $offsets{$segment->display_name} = $abs_start;

  print pre($align->alignment(\%offsets,{show_mismatches => 1,
					 flip            => $configuration->{flip}}
			     ));
}

sub realign {
  my $self = shift;
  my ($src,$tgt) = @_;
  warn join "\n",Bio::Graphics::Browser2::Realign::align($src,$tgt) if DEBUG;
  return align_segs($src,$tgt);
}

sub reversec {
  my $dna = reverse shift;
  $dna =~ tr/gatcGATC/ctagCTAG/;
  $dna;
}

1;

__END__

=head1 NAME

Bio::Graphics::Browser2::Plugin::Aligner - Dump multiple alignments from GBrowse

=head1 SYNOPSIS

In the appropriate gbrowse configuration file:

 plugins = Aligner

 # and later
 [Aligner:plugin]
 alignable_tracks   = EST
 upcase_tracks      = CDS Motifs
 upcase_default     = CDS

=head1 DESCRIPTION

The Aligner plugin dumps multiple nucleotide-to-nucleotide alignments
in text form.  For it to work properly, the genomic DNA must be
loaded, as well as the DNAs for each of the aligned objects.  In
addition, the GFF load file must represent both the source and the
target of the alignment using the Target notation.  For example:

  ctgA  est  match  1050  3202  .  +  .  Target EST:agt830.5 1 554
  ctgA  est  HSP    1050  1500  .  +  .  Target EST:agt830.5 1 451
  ctgA  est  HSP    3000  3202  .  +  .  Target EST:agt830.5 452 654

=head1 OPTIONS

The following options are recognized.  They must be placed into a
configuration file section named [Aligner:plugin].

 Option             Description

 alignable_tracks   Space-delimited list of tracks to include in
                    the multiple alignment. The genome is always
                    included. If this option is not present, then
                    gbrowse will automatically include any track
                    that has the "draw_target" option set.

 upcase_tracks      Space-delimited list of tracks that will be used
                    to UPCASE the genomic DNA. This is very useful if
                    you want to embed the positions of coding regions
                    or other features inside the multiple alignment.
                    Uppercasing will not be turned on by default. The
                    user must press the "Configure" button, and select
                    which of the uppercase tracks are to be activated
                    from a radiolist.
 upcase_default     A space-delimited list of tracks that will be uppercased
                    by default.


 ragged_default     A small integer indicating that the aligner should
                    include some unaligned bases from the end of each sequence.
                    This is useful for seeing the sequencing primer or cloning
                    site in ESTs.

=head1 BUGS

None known yet.

=head1 SEE ALSO

L<Bio::Graphics::Browser2::Plugin>

=head1 AUTHOR

Lincoln Stein E<lt>lstein@cshl.orgE<gt>.

Copyright (c) 2001 Cold Spring Harbor Laboratory.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut