The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package IntervalTree;

use 5.006;
use POSIX qw(ceil); 
use List::Util qw(max min);
use strict;
use warnings;
no warnings 'once';

our $VERSION = '0.04';

=head1 NAME

IntervalTree.pm

=head1 VERSION

Version 0.01

=head1 DESCRIPTION

Data structure for performing intersect queries on a set of intervals which
preserves all information about the intervals (unlike bitset projection methods).

=cut

# Historical note:
#    This module original contained an implementation based on sorted endpoints
#    and a binary search, using an idea from Scott Schwartz and Piotr Berman.
#    Later an interval tree implementation was implemented by Ian for Galaxy's
#    join tool (see `bx.intervals.operations.quicksect.py`). This was then
#    converted to Cython by Brent, who also added support for
#    upstream/downstream/neighbor queries. This was modified by James to
#    handle half-open intervals strictly, to maintain sort order, and to
#    implement the same interface as the original Intersecter.

=head1 SYNOPSIS


Data structure for performing window intersect queries on a set of 
of possibly overlapping 1d intervals.

Usage
=====

Create an empty IntervalTree

    >>> use IntervalTree;
    >>> my $intersecter = IntervalTree->new();

An interval is a start and end position and a value (possibly None).
You can add any object as an interval:

    >>> $intersecter->insert( 0, 10, "food" );
    >>> $intersecter->insert( 3, 7, {foo=>'bar'} );

    >>> $intersecter->find( 2, 5 );
    ['food', {'foo'=>'bar'}]

If the object has start and end attributes (like the IntervalTree::Interval class) there
is are some shortcuts:

    >>> my $intersecter = IntervalTree->new();
    >>> $intersecter->insert_interval( IntervalTree::Interval->new( 0, 10 ) );
    >>> $intersecter->insert_interval( IntervalTree::Interval->new( 3, 7 ) );
    >>> $intersecter->insert_interval( IntervalTree::Interval->new( 3, 40 ) );
    >>> $intersecter->insert_interval( IntervalTree::Interval->new( 13, 50 ) );

    >>> $intersecter->find( 30, 50 );
    [IntervalTree::Interval(3, 40), IntervalTree::Interval(13, 50)]
    >>> $intersecter->find( 100, 200 );
    []

Before/after for intervals

    >>> $intersecter->before_interval( IntervalTree::Interval->new( 10, 20 ) );
    [IntervalTree::Interval(3, 7)]
    >>> $intersecter->before_interval( IntervalTree::Interval->new( 5, 20 ) );
    []

Upstream/downstream

    >>> $intersecter->upstream_of_interval(IntervalTree::Interval->new(11, 12));
    [IntervalTree::Interval(0, 10)]
    >>> $intersecter->upstream_of_interval(IntervalTree::Interval->new(11, 12, undef, undef, "-"));
    [IntervalTree::Interval(13, 50)]

    >>> intersecter.upstream_of_interval(IntervalTree::Interval(1, 2, undef, undef, "-"), 3);
    [IntervalTree::Interval(3, 7), IntervalTree::Interval(3, 40), IntervalTree::Interval(13, 50)]

=cut
  
sub new {
  my ( $class ) = @_;
  my $self = {};
  $self->{root} = undef;
  return bless $self, $class;
}
    
# ---- Position based interfaces -----------------------------------------

=head2 insert

Insert the interval [start,end) associated with value `value`.

=cut

sub insert {
  my ( $self, $start, $end, $value) = @_;
  if (!defined $self->{root}) {
    $self->{root} = IntervalTree::Node->new( $start, $end, $value );
  }
  else {
    $self->{root} = $self->{root}->insert( $start, $end, $value );
  }
}

*add = \&insert;

=head2 find

Return a sorted list of all intervals overlapping [start,end).

=cut

sub find {
  my ( $self, $start, $end ) = @_;
  if (!defined $self->{root}) {
    return [];
  }
  return $self->{root}->find( $start, $end );
}

=head2 before

Find `num_intervals` intervals that lie before `position` and are no
further than `max_dist` positions away

=cut
    
sub before {
  my ( $self, $position, $num_intervals, $max_dist ) = @_;
  $num_intervals = 1 if !defined $num_intervals;
  $max_dist = 2500 if !defined $max_dist;
  
  if (!defined $self->{root}) {
    return [];
  }
  return $self->{root}->left( $position, $num_intervals, $max_dist );
}

=head2 after

Find `num_intervals` intervals that lie after `position` and are no
further than `max_dist` positions away

=cut

sub after {
  my ( $self, $position, $num_intervals, $max_dist) = @_;
  $num_intervals = 1 if !defined $num_intervals;
  $max_dist = 2500 if !defined $max_dist;

  if (!defined $self->{root}) {
    return [];
  }
  return $self->{root}->right( $position, $num_intervals, $max_dist );
}

# ---- Interval-like object based interfaces -----------------------------

=head2 insert_interval

Insert an "interval" like object (one with at least start and end
attributes)

=cut

sub insert_interval {
  my ( $self, $interval ) = @_;
  $self->insert( $interval->{start}, $interval->{end}, $interval );
}

*add_interval = \&insert_interval;

=head2 before_interval

Find `num_intervals` intervals that lie completely before `interval`
and are no further than `max_dist` positions away

=cut

sub before_interval {
  my ( $self, $interval, $num_intervals, $max_dist ) = @_;
  $num_intervals = 1 if !defined $num_intervals;
  $max_dist = 2500 if !defined $max_dist;
  
  if (!defined $self->{root}) {
    return [];
  }
  return $self->{root}->left( $interval->{start}, $num_intervals, $max_dist );
}

=head2 after_interval

Find `num_intervals` intervals that lie completely after `interval` and
are no further than `max_dist` positions away

=cut

sub after_interval {
  my ( $self, $interval, $num_intervals, $max_dist ) = @_;
  $num_intervals = 1 if !defined $num_intervals;
  $max_dist = 2500 if !defined $max_dist;

  if (!defined $self->{root}) {
    return [];
  }
  return $self->{root}->right( $interval->{end}, $num_intervals, $max_dist );
}

=head2 upstream_of_interval

Find `num_intervals` intervals that lie completely upstream of
`interval` and are no further than `max_dist` positions away

=cut

sub upstream_of_interval {
  my ( $self, $interval, $num_intervals, $max_dist ) = @_;
  $num_intervals = 1 if !defined $num_intervals;
  $max_dist = 2500 if !defined $max_dist;

  if (!defined $self->{root}) {
    return [];
  }
  if ($interval->{strand} == -1 || $interval->{strand} eq "-") {
    return $self->{root}->right( $interval->{end}, $num_intervals, $max_dist );
  }
  else {
    return $self->{root}->left( $interval->{start}, $num_intervals, $max_dist );
  }
}

=head2 downstream_of_interval

Find `num_intervals` intervals that lie completely downstream of
`interval` and are no further than `max_dist` positions away

=cut


sub downstream_of_interval {
  my ( $self, $interval, $num_intervals, $max_dist ) = @_;
  $num_intervals = 1 if !defined $num_intervals;
  $max_dist = 2500 if !defined $max_dist;

  if (!defined $self->{root}) {
    return [];
  }
  if ($interval->{strand} == -1 || $interval->{strand} eq "-") {
    return $self->{root}->left( $interval->{start}, $num_intervals, $max_dist );
  }
  else {
    return $self->{root}->right( $interval->{end}, $num_intervals, $max_dist );
  }
}

=head2 traverse
  
call fn for each element in the tree

=cut

sub traverse {
  my ($self, $fn) = @_;
  if (!defined $self->{root}) {
    return undef;
  }
  return $self->{root}->traverse($fn);
}

=head2 IntervalTree::Node

A single node of an `IntervalTree`.

NOTE: Unless you really know what you are doing, you probably should us
      `IntervalTree` rather than using this directly. 

=cut

package IntervalTree::Node;
use List::Util qw(min max);

our $EmptyNode = IntervalTree::Node->new( 0, 0, IntervalTree::Interval->new(0, 0));

sub nlog {
  return -1.0 / log(0.5);
}

sub left_node {
  my ($self) = @_;
  return $self->{cleft} != $EmptyNode ? $self->{cleft} : undef;
}

sub right_node {
  my ($self) = @_;
  return $self->{cright} != $EmptyNode ? $self->{cright}  : undef;
}

sub root_node {
  my ($self) = @_;
  return $self->{croot} != $EmptyNode ? $self->{croot} : undef;
}
    
sub str {
  my ($self) = @_;
  return "Node($self->{start}, $self->{end})";
}

sub new {
  my ($class, $start, $end, $interval) = @_;
  # Perl lacks the binomial distribution, so we convert a
  # uniform into a binomial because it naturally scales with
  # tree size.  Also, perl's uniform is perfect since the
  # upper limit is not inclusive, which gives us undefined here.
  my $self = {};
  $self->{priority} = POSIX::ceil(nlog() * log(-1.0/(1.0 * rand() - 1)));
  $self->{start}    = $start;
  $self->{end}      = $end;
  $self->{interval} = $interval;
  $self->{maxend}   = $end;
  $self->{minstart} = $start;
  $self->{minend}   = $end;
  $self->{cleft}    = $EmptyNode;
  $self->{cright}   = $EmptyNode;
  $self->{croot}    = $EmptyNode;
  return bless $self, $class;
}

=head2 insert
  
Insert a new IntervalTree::Node into the tree of which this node is
currently the root. The return value is the new root of the tree (which
may or may not be this node!)

=cut

sub insert {
  my ($self, $start, $end, $interval) = @_;
  my $croot = $self;
  # If starts are the same, decide which to add interval to based on
  # end, thus maintaining sortedness relative to start/end
  my $decision_endpoint = $start;
  if ($start == $self->{start}) {
    $decision_endpoint = $end;
  }

  if ($decision_endpoint > $self->{start}) {
    # insert to cright tree
    if ($self->{cright} != $EmptyNode) {
      $self->{cright} = $self->{cright}->insert( $start, $end, $interval );
    }
    else {
      $self->{cright} = IntervalTree::Node->new( $start, $end, $interval );
    }
    # rebalance tree
    if ($self->{priority} < $self->{cright}{priority}) {
      $croot = $self->rotate_left();
    }
  }
  else {
    # insert to cleft tree
    if ($self->{cleft} != $EmptyNode) {
      $self->{cleft} = $self->{cleft}->insert( $start, $end, $interval);
    }
    else {
      $self->{cleft} = IntervalTree::Node->new( $start, $end, $interval);
    }
    # rebalance tree
    if ($self->{priority} < $self->{cleft}{priority}) {
      $croot = $self->rotate_right();
    }
  }

  $croot->set_ends();
  $self->{cleft}{croot}  = $croot;
  $self->{cright}{croot} = $croot;
  return $croot;
}

sub rotate_right {
  my ($self) = @_;
  my $croot = $self->{cleft};
  $self->{cleft}  = $self->{cleft}{cright};
  $croot->{cright} = $self;
  $self->set_ends();
  return $croot;
}

sub rotate_left {
  my ($self) = @_;
  my $croot = $self->{cright};
  $self->{cright} = $self->{cright}{cleft};
  $croot->{cleft}  = $self;
  $self->set_ends();
  return $croot;
}

sub set_ends {
  my ($self) = @_;
  if ($self->{cright} != $EmptyNode && $self->{cleft} != $EmptyNode) {
    $self->{maxend} = max($self->{end}, $self->{cright}{maxend}, $self->{cleft}{maxend});
    $self->{minend} = min($self->{end}, $self->{cright}{minend}, $self->{cleft}{minend});
    $self->{minstart} = min($self->{start}, $self->{cright}{minstart}, $self->{cleft}{minstart});
  }
  elsif ( $self->{cright} != $EmptyNode) {
    $self->{maxend} = max($self->{end}, $self->{cright}{maxend});
    $self->{minend} = min($self->{end}, $self->{cright}{minend});
    $self->{minstart} = min($self->{start}, $self->{cright}{minstart});
  }
  elsif ( $self->{cleft} != $EmptyNode) {
    $self->{maxend} = max($self->{end}, $self->{cleft}{maxend});
    $self->{minend} = min($self->{end}, $self->{cleft}{minend});
    $self->{minstart} = min($self->{start}, $self->{cleft}{minstart});
  }
}


=head2 intersect

given a start and a end, return a list of features
falling within that range

=cut

sub intersect {
  my ( $self, $start, $end, $sort ) = @_;
  $sort = 1 if !defined $sort;
  my $results = [];
  $self->_intersect( $start, $end, $results );
  return $results;
}

*find = \&intersect;

sub _intersect {
  my ( $self, $start, $end, $results) = @_;
  # Left subtree
  if ($self->{cleft} != $EmptyNode && $self->{cleft}{maxend} > $start) {
    $self->{cleft}->_intersect( $start, $end, $results );
  }
  # This interval
  if (( $self->{end} > $start ) && ( $self->{start} < $end )) {
    push @$results, $self->{interval};
  }
  # Right subtree
  if ($self->{cright} != $EmptyNode && $self->{start} < $end) {
    $self->{cright}->_intersect( $start, $end, $results );
  }
}
    

sub _seek_left {
  my ($self, $position, $results, $n, $max_dist) = @_;
  # we know we can bail in these 2 cases.
  if ($self->{maxend} + $max_dist < $position) {
    return;
  }
  if ($self->{minstart} > $position) { 
    return;
  }

  # the ordering of these 3 blocks makes it so the results are
  # ordered nearest to farest from the query position
  if ($self->{cright} != $EmptyNode) {
    $self->{cright}->_seek_left($position, $results, $n, $max_dist);
  }

  if (-1 < $position - $self->{end} && $position - $self->{end} < $max_dist) {
    push @$results, $self->{interval};
  }

  # TODO: can these conditionals be more stringent?
  if ($self->{cleft} != $EmptyNode) {
    $self->{cleft}->_seek_left($position, $results, $n, $max_dist);
  }
}


    
sub _seek_right {
  my ($self, $position, $results, $n, $max_dist) = @_;
  # we know we can bail in these 2 cases.
  return if $self->{maxend} < $position;
  return if $self->{minstart} - $max_dist > $position;

  #print "SEEK_RIGHT:",self, self.cleft, self.maxend, self.minstart, position

  # the ordering of these 3 blocks makes it so the results are
  # ordered nearest to farest from the query position
  if ($self->{cleft} != $EmptyNode) {
    $self->{cleft}->_seek_right($position, $results, $n, $max_dist);
  }

  if (-1 < $self->{start} - $position && $self->{start} - $position < $max_dist) {
    push @$results, $self->{interval};
  }

  if ($self->{cright} != $EmptyNode) {
    $self->{cright}->_seek_right($position, $results, $n, $max_dist);
  }
}

=head2 left

find n features with a start > than `position`
    f: a IntervalTree::Interval object (or anything with an `end` attribute)
    n: the number of features to return
    max_dist: the maximum distance to look before giving up.

=cut
    
sub left {
  my ($self, $position, $n, $max_dist) = @_;
  $n = 1 if !defined $n;
  $max_dist = 2500 if !defined $max_dist;

  my $results = [];
  # use start - 1 becuase .left() assumes strictly left-of
  $self->_seek_left( $position - 1, $results, $n, $max_dist );
  return $results if length(@$results) == $n;

  my $r = $results;
  @$r = sort {$b->{end} <=> $a->{end}} @$r;
  return @$r[0..$n-1];
}

=head2 right

find n features with a end < than position
    f: a IntervalTree::Interval object (or anything with a `start` attribute)
    n: the number of features to return
    max_dist: the maximum distance to look before giving up.

=cut

sub right {
  my ($self, $position, $n, $max_dist) = @_;
  $n = 1 if !defined $n;
  $max_dist = 2500 if !defined $max_dist;

  my $results = [];
  # use end + 1 because .right() assumes strictly right-of
  $self->_seek_right($position + 1, $results, $n, $max_dist);
  return $results if length(@$results) == $n;

  my $r = $results;
  @$r = sort {$a->{start} <=> $b->{start}} @$r;
  return @$r[0..$n-1];
}

sub traverse {
  my ($self, $func) = @_;
  $self->_traverse($func);
}

sub _traverse {
  my ($self, $func) = @_;
  $self->{cleft}->_traverse($func) if $self->{cleft} != $EmptyNode;
  $func->($self);
  $self->{cright}->_traverse($func) if $self->{cright} != $EmptyNode;
}

## ---- Wrappers that retain the old interface -------------------------------

=head2 IntervalTree::Interval

Basic feature, with required integer start and end properties.
Also accepts optional strand as +1 or -1 (used for up/downstream queries),
a name, and any arbitrary data is sent in on the info keyword argument

    >>> from bx.intervals.intersection import IntervalTree::Interval

    >>> f1 = IntervalTree::Interval->new(23, 36);
    >>> f2 = IntervalTree::Interval->new(34, 48, {'chr':12, 'anno':'transposon'});
    >>> f2
    Interval(34, 48, {'anno': 'transposon', 'chr': 12})

=cut

package IntervalTree::Interval;

sub new {
  my ($class, $start, $end, $value, $chrom, $strand) = @_;
  die "start must be less than end" unless $start <= $end;
  my $self = {};
  $self->{start}  = $start;
  $self->{end}   = $end;
  $self->{value} = $value;
  $self->{chrom} = $chrom;
  $self->{strand} = $strand;
  return bless $self, $class;
}

sub str {
  my ($self) = @_;
  my $fstr = "Interval($self->{start}, $self->{end}";
  if (defined($self->{value})) {
    $fstr .= ", value=$self->{value}";
  }
  $fstr .= ")";
  return $fstr;
}

=head2 AUTHOR

Ben Booth, C<< <benwbooth at gmail.com> >>

Original Authors: 

    James Taylor (james@jamestaylor.org),
    Ian Schenk (ian.schenck@gmail.com),
    Brent Pedersen (bpederse@gmail.com)

=head2 BUGS

Please report any bugs or feature requests to C<bug-intervaltree at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=IntervalTree>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head2 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc IntervalTree


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker (report bugs here)

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=IntervalTree>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/IntervalTree>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/IntervalTree>

=item * Search CPAN

L<http://search.cpan.org/dist/IntervalTree/>

=back

=head2 ACKNOWLEDGEMENTS

This code was directly ported from the bx-python project:

https://bitbucket.org/james_taylor/bx-python/src/tip/lib/bx/intervals/intersection.pyx

Original Authors: 

    James Taylor (james@jamestaylor.org),
    Ian Schenk (ian.schenck@gmail.com),
    Brent Pedersen (bpederse@gmail.com)

=head2 LICENSE AND COPYRIGHT

Copyright 2012 Ben Booth.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See http://dev.perl.org/licenses/ for more information.


=cut

1; # End of IntervalTree