The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Bio::Graphics::Glyph::dna;

use strict;
use base qw(Bio::Graphics::Glyph::generic);

my %complement = (g=>'c',a=>'t',t=>'a',c=>'g',n=>'n',
		  G=>'C',A=>'T',T=>'A',C=>'G',N=>'N');

sub my_description {
    return <<END;
This glyph draws DNA sequences.  At high magnifications, this glyph
will draw the actual base pairs of the sequence (both strands).  At
low magnifications, the glyph will plot the GC content.  By default,
the GC calculation will use non-overlapping bins, but this can be
changed by specifying the gc_window option, in which case, a 
sliding window calculation will be used.

For this glyph to work, the attached feature must return 
a Bio::PrimarySeq DNA in response to the seq() method.
END
}

sub my_options {
    return {
	do_gc => [
	    'boolean',
	    1,
	    'Whether to draw the GC graph at low magnifications.'],
	gc_window=> [
	    'integer',
	    undef,
	    'Size of the sliding window to use in the GC content',
	    'calculation.  If this is not defined, non-overlapping',
	    'bins will be used. If this is set to "auto", then the',
	    'glyph will choose a window equal to 1% of the interval.'],
	gc_bins => [
	    'integer',
	    100,
	    'Fixed number of intervals to sample across the track.'],
	axis_color => [
	    'color',
	    'black',
	    'Color of the vertical axes in the GC content graph.'],
	strand => [
	    [qw(forward reverse both auto)],
	    'auto',
	    'Show both forward and reverse strand, one of "forward", "reverse",',
	    '"both" or "auto". In "auto" mode, +1 strand features will',
	    'show the plus strand, -1 strand features will',
	    'show the reverse complement and strandless features will show both.'],
    };
}

# turn off description
sub description { 0 }

# turn off label
# sub label { 1 }

sub pad_top {
    my $self = shift;
    my $font = $self->font;
    my $pt   = $self->SUPER::pad_top;
    return $self->dna_fits 
	   ? $pt + $font->height+5
           : 16;
}

sub height {
  my $self = shift;
  my $font = $self->font;
  return $self->dna_fits ? 2*$font->height
       : $self->do_gc    ? $self->SUPER::height
       : 0;
}

sub do_gc {
  my $self = shift;
  my $do_gc = $self->option('do_gc');
  return  if defined($do_gc) && !$do_gc;
  return  1;
}

sub draw_component {
  my $self = shift;
  my $gd = shift;
  my ($x1,$y1,$x2,$y2) = $self->bounds(@_);

  my $dna        = eval { $self->feature->seq };
  $dna           = $dna->seq if ref($dna) and $dna->can('seq'); # to catch Bio::PrimarySeqI objects
  $dna or return;

  # workaround for my misreading of interface -- LS
  $dna = $dna->seq if ref($dna) && $dna->can('seq');

  if ($self->dna_fits) {
    $self->draw_dna($gd,$dna,$x1,$y1,$x2,$y2);
  } elsif ($self->do_gc) {
    $self->draw_gc_content($gd,$dna,$x1,$y1,$x2,$y2);
  }
}

sub draw_dna {
  my $self = shift;

  my ($gd,$dna,$x1,$y1,$x2,$y2) = @_;
  my $pixels_per_base = $self->scale;
  my $feature = $self->feature;

  my $strand = $feature->strand || 1;
  $strand *= -1 if $self->{flip};

  my @bases = split '',$strand >= 0 ? $dna : $self->reversec($dna);

  my $color = $self->fgcolor;
  my $font  = $self->font;
  my $lineheight = $font->height;
  $y1 -= $lineheight/2 - 3;
  my $strands = $self->option('strand') || 'auto';

  my ($forward,$reverse);
  if ($strands eq 'auto') {
    $forward = $feature->strand >= 0;
    $reverse = $feature->strand <= 0;
  } elsif ($strands eq 'both') {
    $forward = $reverse = 1;
  } elsif ($strands eq 'reverse') {
    $reverse = 1;
  } else {
    $forward = 1;
  }
  # minus strand features align right, not left
  $x1 += $pixels_per_base - $font->width - 1 if $strand < 0;
  for (my $i=0;$i<@bases;$i++) {
    my $x = $x1 + $i * $pixels_per_base;
    $gd->char($font,$x+2,$y1,$bases[$i],$color)                                   if $forward;
    $gd->char($font,$x+2,$y1+($forward ? $lineheight:0),
	      $complement{$bases[$i]}||$bases[$i],$color)                         if $reverse;
  }

}

sub draw_gc_content {
  my $self     = shift;
  my $gd       = shift;
  my $dna      = shift;
  my ($x1,$y1,$x2,$y2) = @_;

  $dna = $self->reversec($dna) if $self->{flip};

# get the options that tell us how to draw the GC content

  my $bin_size = length($dna) / ($self->option('gc_bins') || 100);
  $bin_size = 10 if $bin_size < 10;
  my $gc_window = $self->option('gc_window');
#  if ($gc_window && $gc_window eq 'auto' or $gc_window <= length($dna)) {
  if ($gc_window) {
    if ($gc_window eq 'auto') {
      $gc_window = length($dna)/100;
    } elsif ($gc_window > length($dna)) {
      $gc_window = length($dna);
    }
  }

# Calculate the GC content...

  my (@bins, @datapoints, $i);
  my $gc = 0;
  my $maxgc = -1000;
  my $mingc = +1000;

  if ($gc_window) { # ...using a sliding window...

    my @dna = split '', $dna;
    for ($i = 0; $i < @dna; $i++) {
      if ($dna[$i] eq 'G' or $dna[$i] eq 'C' or $dna[$i] eq 'g' or $dna[$i] eq 'c') {
	$dna[$i] = 1;
      } else {
	$dna[$i] = 0;
      }
    }

    for ($i = 0; $i < $gc_window; $i++) {$gc += $dna[$i]}

    push @datapoints, $gc;
    $datapoints[$#datapoints] > $maxgc and $maxgc = $datapoints[$#datapoints];
    $datapoints[$#datapoints] < $mingc and $mingc = $datapoints[$#datapoints];
    for ($i = 0; $i < @dna - $gc_window; $i++) {
      $gc -= $dna[$i];
      $gc += $dna[$i + $gc_window];
      push @datapoints, $gc;
      $datapoints[$#datapoints] > $maxgc and $maxgc = $datapoints[$#datapoints];
      $datapoints[$#datapoints] < $mingc and $mingc = $datapoints[$#datapoints];
    }

    my $scale = $maxgc - $mingc;
    $scale    = 1 unless $scale;
    for ($i = 0; $i < @datapoints; $i++) {
      $datapoints[$i] = ($datapoints[$i] - $mingc) / $scale;
    }
    $maxgc = int($maxgc * 100 / $gc_window);
    $mingc = int($mingc * 100 / $gc_window);

  } else { # ...or a fixed number of bins.

    for (my $i = 0; $i < length($dna) - $bin_size; $i+= $bin_size) {
      my $subseq  = substr($dna,$i,$bin_size);
      my $gc      = $subseq =~ tr/gcGC/gcGC/;
      my $content = $gc/$bin_size;
      $maxgc = $content if ($content > $maxgc);
      $mingc = $content if ($content < $mingc);
      push @bins,$content;
    }

    my $scale = $maxgc - $mingc;
    foreach (my $i; $i < @bins; $i++)
      {
	$bins[$i] = $scale != 0 ? ($bins[$i] - $mingc) / $scale : 0;
      }
    $maxgc = int($maxgc * 100);
    $mingc = int($mingc * 100);

  }

# Calculate values that will be used in the layout

  push @bins,0.5 unless @bins;  # avoid div by zero
  my $bin_width  = ($x2-$x1)/@bins;
  my $bin_height = $y2-$y1;
  my $fgcolor    = $self->fgcolor;
  my $bgcolor    = $self->factory->translate_color(
                      $self->option('grid color') || $self->panel->gridmajorcolor
                   );
  my $axiscolor  = $self->color('axis_color') || $fgcolor;

# Draw the axes
  my $fontwidth = $self->font->width;
  $gd->line($x1,  $y1,        $x1,  $y2,        $axiscolor);
  $gd->line($x2-2,$y1,        $x2-2,$y2,        $axiscolor);
  $gd->line($x1,  $y1,        $x1+3,$y1,        $axiscolor);
  $gd->line($x1,  $y2,        $x1+3,$y2,        $axiscolor);
  $gd->line($x1,  ($y2+$y1)/2,$x1+3,($y2+$y1)/2,$axiscolor);
  $gd->line($x2-4,$y1,        $x2-1, $y1,       $axiscolor);
  $gd->line($x2-4,$y2,        $x2-1, $y2,       $axiscolor);
  $gd->line($x2-4,($y2+$y1)/2,$x2-1,($y2+$y1)/2,$axiscolor);
  $gd->line($x1+5,$y2,        $x2-5,$y2,        $bgcolor);
  $gd->line($x1+5,($y2+$y1)/2,$x2-5,($y2+$y1)/2,$bgcolor);
  $gd->line($x1+5,$y1,        $x2-5,$y1,        $bgcolor);
  $gd->string($self->font,$x1-length('% gc')*$fontwidth,$y1,'% gc',$axiscolor) if $bin_height > $self->font->height*2;

# If we are using a sliding window, the GC graph will be scaled to use the full
# height of the glyph, so label the right vertical axis to show the scaling that# is in effect

  $gd->string($self->font,$x2+3,$y1,"${maxgc}%",$axiscolor) 
    if $bin_height > $self->font->height*2.5;
  $gd->string($self->font,$x2+3,$y2-$self->font->height,"${mingc}%",$axiscolor) 
    if $bin_height > $self->font->height*2.5;

# Draw the GC content graph itself

  if ($gc_window) {

    my $graphwidth = $x2 - $x1;

    # the $points_to_draw variable here can be used in various ways to adjust the
    # sampling of the graphic output. It could be converted to a glphy
    # option that can be set by user.  Here we are just using $graphwidth

#    my $points_to_draw = 1800;
    my $points_to_draw = $graphwidth;

    # if $points_to_draw is taken to mean the total number of points to show along the graph then
    my $inc = int (length($dna) / $points_to_draw) + 1;
    # it might be useful to set this to the x pixel resolution of your
    # screen or the "default width" option in the gbrowse config file or current pixel width

    # if $points_to_draw is taken to mean the number of points per $gc_window length then
#    my $inc = int ($gc_window / $points_to_draw) + 1;

    my $gc_window_width = $gc_window * $self->panel->scale; # pixels for the gc_window length
    my $scale = ($graphwidth - $gc_window_width) / @datapoints;
    for ($i = $inc; $i < @datapoints; $i += $inc) {
      my $x = $i + $gc_window / 2; # the base at the center of the gc_window
      my $xlo = $x1 + ($x - $inc) * $scale; # pixel coordinate for previous point
      my $xhi = $x1 + $x * $scale;
      $gd->line($xlo, $y2 - ($bin_height * $datapoints[$i-$inc]),
		$xhi, $y2 - ($bin_height * $datapoints[$i]),
		$fgcolor);
    }
  }
  else
  {
    for (my $i = 0; $i < @bins; $i++) 
      {
	  my $bin_start  = $x1+$i*$bin_width;
	  my $bin_stop   = $bin_start + $bin_width;
	  my $y          = $y2 - ($bin_height*$bins[$i]);
	  $gd->line($bin_start,$y,
		    $bin_stop,$y,
		    $fgcolor);
	  $gd->line($bin_stop,$y,
		    $bin_stop,$y2 - ($bin_height*$bins[$i+1]),
		    $fgcolor)
	      if $i < @bins-1;
      }
  }
}

sub make_key_feature {
  my $self = shift;
  my @gatc = qw(g a t c);
  my $offset = $self->panel->offset;
  my $scale = 1/$self->scale;  # base pairs/pixel

  my $start = $offset+1;
  my $stop  = $offset+100*$scale;
  my $feature =
    Bio::Graphics::Feature->new(-start=> $start,
				-stop => $stop,
				-seq  => join('',map{$gatc[rand 4]} (1..500)),
				-name => $self->option('key'),
				-strand => '+1',
			       );
  $feature;
}

1;

__END__

=head1 NAME

Bio::Graphics::Glyph::dna - The "dna" glyph

=head1 SYNOPSIS

  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.

=head1 DESCRIPTION

This glyph draws DNA sequences.  At high magnifications, this glyph
will draw the actual base pairs of the sequence (both strands).  At
low magnifications, the glyph will plot the GC content.  By default,
the GC calculation will use non-overlapping bins, but this can be
changed by specifying the gc_window option, in which case, a 
sliding window calculation will be used.

For this glyph to work, the feature must return a Bio::PrimarySeq DNA
object in response to the seq() method. For example, you can use a
Bio::SeqFeature::Generic object with an attached Bio::PrimarySeq like
this:

  my $dna = Bio::PrimarySeq->new( -seq => 'A' x 1000 );
  my $feature = Bio::SeqFeature::Generic->new( -start => 1, -end => 800 );
  $feature->attach_seq($dna);
  $panel->add_track( $feature, -glyph => 'dna' );

A Bio::Graphics::Feature object may also be used.

=head2 OPTIONS

The following options are standard among all Glyphs.  See
L<Bio::Graphics::Glyph> for a full explanation.

  Option      Description                      Default
  ------      -----------                      -------

  -fgcolor      Foreground color	       black

  -outlinecolor	Synonym for -fgcolor

  -bgcolor      Background color               turquoise

  -fillcolor    Synonym for -bgcolor

  -linewidth    Line width                     1

  -height       Height of glyph		       10

  -font         Glyph font		       gdSmallFont

  -connector    Connector type                 0 (false)

  -connector_color
                Connector color                black

  -label        Whether to draw a label	       0 (false)

  -description  Whether to draw a description  0 (false)

  -hilite       Highlight color                undef (no color)

In addition to the common options, the following glyph-specific
options are recognized:

  Option      Description               Default
  ------      -----------               -------

  -do_gc      Whether to draw the GC      true
              graph at low mags

  -gc_window  Size of the sliding window  E<lt>noneE<gt>
  	      to use in the GC content 
	      calculation.  If this is 
	      not defined, non-
	      overlapping bins will be 
	      used. If this is set to
              "auto", then the glyph will
              choose a window equal to
              1% of the interval.

  -gc_bins    Fixed number of intervals   100
              to sample across the
              panel.

  -axis_color Color of the vertical axes  fgcolor
              in the GC content graph

  -strand      Show both forward and      auto
              reverse strand, one of
              "forward", "reverse",
              "both" or "auto".
              In "auto" mode,
              +1 strand features will
              show the plus strand
              -1 strand features will
              show the reverse complement
              and strandless features will
              show both

NOTE: -gc_window=E<gt>'auto' gives nice results and is recommended for
drawing GC content. The GC content axes draw slightly outside the
panel, so you may wish to add some extra padding on the right and
left.

=head1 BUGS

Please report them.

=head1 SEE ALSO

L<Bio::Graphics::Panel>,
L<Bio::Graphics::Glyph>,
L<Bio::Graphics::Glyph::arrow>,
L<Bio::Graphics::Glyph::cds>,
L<Bio::Graphics::Glyph::crossbox>,
L<Bio::Graphics::Glyph::diamond>,
L<Bio::Graphics::Glyph::dna>,
L<Bio::Graphics::Glyph::dot>,
L<Bio::Graphics::Glyph::ellipse>,
L<Bio::Graphics::Glyph::extending_arrow>,
L<Bio::Graphics::Glyph::generic>,
L<Bio::Graphics::Glyph::graded_segments>,
L<Bio::Graphics::Glyph::heterogeneous_segments>,
L<Bio::Graphics::Glyph::line>,
L<Bio::Graphics::Glyph::pinsertion>,
L<Bio::Graphics::Glyph::primers>,
L<Bio::Graphics::Glyph::rndrect>,
L<Bio::Graphics::Glyph::segments>,
L<Bio::Graphics::Glyph::ruler_arrow>,
L<Bio::Graphics::Glyph::toomany>,
L<Bio::Graphics::Glyph::transcript>,
L<Bio::Graphics::Glyph::transcript2>,
L<Bio::Graphics::Glyph::translation>,
L<Bio::Graphics::Glyph::triangle>,
L<Bio::DB::GFF>,
L<Bio::SeqI>,
L<Bio::SeqFeatureI>,
L<Bio::Das>,
L<GD>

=head1 AUTHOR

Lincoln Stein E<lt>lstein@cshl.orgE<gt>.

Sliding window GC calculation added by Peter Ashton E<lt>pda@sanger.ac.ukE<gt>.

Copyright (c) 2001 Cold Spring Harbor Laboratory

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.  See DISCLAIMER.txt for
disclaimers of warranty.

=cut