lib/Math/Vector/Real/kdTree.pm

package Math::Vector::Real::kdTree;

our $VERSION = '0.15';

use 5.010;
use strict;
use warnings;
use Carp;

use Math::Vector::Real;
use Sort::Key::Top qw(nkeypartref nhead ntail nkeyhead);
use Hash::Util::FieldHash qw(idhash);

our $max_per_pole = 12;
our $recommended_per_pole = 6;

use constant _n    => 0; # elements on subtree
use constant _c0   => 1; # corner 0
use constant _c1   => 2; # corner 1
use constant _sum  => 3; # centroid * n
use constant _s0   => 4; # subtree 0
use constant _s1   => 5; # subtree 1
use constant _axis => 6; # cut axis
use constant _cut  => 7; # cut point (mediam)

# on leaf nodes:
use constant _ixs   => 4;
use constant _leaf_size => _ixs + 1;

sub new {
    my $class = shift;
    my @v = map V(@$_), @_;
    my $self = { vs   => \@v,
                 tree => (@v ? _build(\@v, [0..$#v]) : undef) };
    bless $self, $class;
}

sub clone {
    my $self = shift;
    require Storable;
    my $clone = { vs   => [@{$self->{vs}}],
                  tree => Storable::dclone($self->{tree}) };
    $clone->{hidden} = { %{$self->{hidden}} } if $self->{hidden};
    bless $clone, ref $self;
}

sub _build {
    my ($v, $ixs) = @_;
    if (@$ixs > $recommended_per_pole) {
        my ($b, $t) = Math::Vector::Real->box(@$v[@$ixs]);
        my $axis = ($t - $b)->max_component_index;
        my $bstart = @$ixs >> 1;
        my ($p0, $p1) = nkeypartref { $v->[$_][$axis] } $bstart => @$ixs;
        my $s0 = _build($v, $p0);
        my $s1 = _build($v, $p1);
        my ($c0, $c1) = Math::Vector::Real->box(@{$s0}[_c0, _c1], @{$s1}[_c0, _c1]);
        my $cut = 0.5 * ($s0->[_c1][$axis] + $s1->[_c0][$axis]);
        # warn "b: $b, t: $t, axis: $axis, p0: $p0, p1: $p1, s0: $s0, s1: $s1, c0: $c0, c1: $c1, cut: $cut\n";
        # [n sum s0 s1 axis cut]
        [scalar(@$ixs), $c0, $c1, $s0->[_sum] + $s1->[_sum], $s0, $s1, $axis, $cut];
    }
    else {
        # [n, sum, ixs]
        my @vs = @{$v}[@$ixs];
        my ($c0, $c1) = Math::Vector::Real->box(@vs);
        [scalar(@$ixs), $c0, $c1, Math::Vector::Real->sum(@vs), $ixs];
    }
}

sub size { scalar @{shift->{vs}} }

sub at {
    my ($self, $ix) = @_;
    Math::Vector::Real::clone($self->{vs}[$ix]);
}

sub insert {
    my $self = shift;
    @_ or return;
    my $vs = $self->{vs};
    my $ix = @$vs;
    if (my $tree = $self->{tree}) {
        for (@_) {
            my $v = V(@$_);
            push @$vs, $v;
            _insert($vs, $self->{tree}, $#$vs)
        }
    }
    else {
        @$vs = map V(@$_), @_;
        $self->{tree} = _build($vs, [0..$#$vs]);
    }
    return $ix;
}

# _insert does not return anything but modifies its $t argument in
# place. This is really ugly but done to improve performance.

sub _insert {
    my ($vs, $t, $ix) = @_;
    my $v = $vs->[$ix];

    # update aggregated values
    my $n = $t->[_n]++;
    @{$t}[_c0, _c1] = Math::Vector::Real->box($v, @{$t}[_c0, _c1]);
    $t->[_sum] += $v;

    if (defined (my $axis = $t->[_axis])) {
        my $cut = $t->[_cut];
        my $c = $v->[$axis];

        my $n0 = $t->[_s0][_n];
        my $n1 = $t->[_s1][_n];

        if ($c <= $cut) {
            if (2 * $n1 + $max_per_pole >= $n0) {
                _insert($vs, $t->[_s0], $ix);
                return;
            }
        }
        else {
            if (2 * $n0 + $max_per_pole >= $n1) {
                _insert($vs, $t->[_s1], $ix);
                return;
            }
        }

        # tree needs rebalancing
        my @store;
        $#store = $n; # preallocate space
        @store = ($ix);
        _push_all($t, \@store);
        $_[1] = _build($vs, \@store);
    }
    else {
        my $ixs = $t->[_ixs];
        push @$ixs, $ix;
        if ($n > $max_per_pole) {
            $_[1] = _build($vs, $ixs);
        }
    }
}

sub move {
    my ($self, $ix, $v) = @_;
    my $vs = $self->{vs};
    ($ix >= 0 and $ix < @$vs) or croak "index out of range";
    _delete($vs, $self->{tree}, $ix);
    $vs->[$ix] = Math::Vector::Real::clone($v);
    _insert($vs, $self->{tree}, $ix);
}

sub _delete {
    my ($vs, $t, $ix) = @_;
    if (defined (my $axis = $t->[_axis])) {
        my $v = $vs->[$ix];
        my $c = $v->[$axis];
        my ($s0, $s1, $cut) = @{$t}[_s0, _s1, _cut];
        if ($c <= $cut and _delete($vs, $s0, $ix)) {
            if ($s0->[_n]) {
                $t->[_n]--;
                $t->[_sum] -= $v;
            }
            else {
                # when one subnode becomes empty, the other gets promoted up:
                @$t = @$s1;
            }
            return 1;
        }
        elsif ($c >= $cut and _delete($vs, $s1, $ix)) {
            if ($s1->[_n]) {
                $t->[_n]--;
                $t->[_sum] -= $v;
            }
            else {
                @$t = @$s0;
            }
            return 1;
        }
    }
    else {
        my $ixs = $t->[_ixs];
        for (0..$#$ixs) {
            if ($ixs->[$_] == $ix) {
                splice(@$ixs, $_, 1);
                $t->[_n]--;
                $t->[_sum] -= $vs->[$ix];
                return 1;
            }
        }
    }
    return 0;
}

sub hide {
    my ($self, $ix) = @_;
    my $vs = $self->{vs};
    ($ix >= 0 and $ix < @$vs) or croak "index out of range";
    _delete($vs, $self->{tree}, $ix);
    ($self->{hidden} //= {})->{$ix} = 1;
}

sub _push_all {
    my ($t, $store) = @_;
    my @q;
    while ($t) {
        if (defined $t->[_axis]) {
            push @q, $t->[_s1];
            $t = $t->[_s0];
        }
        else {
            push @$store, @{$t->[_ixs]};
            $t = pop @q;
        }
    }
}

sub path {
    my ($self, $ix) = @_;
    my $p = _path($self->{vs}, $self->{tree}, $ix);
    my $l = 1;
    $l = (($l << 1) | $_) for @$p;
    $l
}

sub _path {
    my ($vs, $t, $ix) = @_;
    if (defined (my $axis = $t->[_axis])) {
        my $v = $vs->[$ix];
        my $c = $v->[$axis];
        my $cut = $t->[_cut];
        my $p;
        if ($c <= $cut) {
            if ($p = _path($vs, $t->[_s0], $ix)) {
                unshift @$p, 0;
                return $p;
            }
        }
        if ($c >= $cut) {
            if ($p = _path($vs, $t->[_s1], $ix)) {
                unshift @$p, 1;
                return $p;
            }
        }
    }
    else {
        return [] if grep $_ == $ix, @{$t->[_ixs]}
    }
    ()
}

sub find {
    my ($self, $v) = @_;
    _find($self->{vs}, $self->{tree}, $v);
}

sub _find {
    my ($vs, $t, $v) = @_;
    while (defined (my $axis = $t->[_axis])) {
        my $cut = $t->[_cut];
        my $c = $v->[$axis];
        if ($c < $cut) {
            $t = $t->[_s0];
        }
        else {
            if ($c == $cut) {
                my $ix = _find($vs, $t->[_s0], $v);
                return $ix if defined $ix;
            }
            $t = $t->[_s1];
        }
    }

    for (@{$t->[_ixs]}) {
        return $_ if $vs->[$_] == $v;
    }
    ()
}

sub find_nearest_vector {
    my ($self, $v, $d, @but) = @_;
    my $t = $self->{tree} or return;
    my $vs = $self->{vs};
    my $d2 = (defined $d ? $d * $d : 'inf');

    my $but;
    if (@but) {
        if (@but == 1 and ref $but[0] eq 'HASH') {
            $but = $but[0];
        }
        else {
            my %but = map { $_ => 1 } @but;
            $but = \%but;
        }
    }

    my ($rix, $rd2) = _find_nearest_vector($vs, $t, $v, $d2, undef, $but);
    $rix // return;
    wantarray ? ($rix, sqrt($rd2)) : $rix;
}

*find_nearest_neighbor = \&find_nearest_vector; # for backwards compatibility

sub find_nearest_vector_internal {
    my ($self, $ix, $d) = @_;
    $ix >= 0 or croak "index out of range";
    $self->find_nearest_vector($self->{vs}[$ix], $d, $ix);
}

*find_nearest_neighbor_internal = \&find_nearest_vector_internal; # for backwards compatibility

sub _find_nearest_vector {
    my ($vs, $t, $v, $best_d2, $best_ix, $but) = @_;

    my @queue;
    my @queue_d2;

    while (1) {
        if (defined (my $axis = $t->[_axis])) {
            # substitute the current one by the best subtree and queue
            # the worst for later
            ($t, my ($q)) = @{$t}[($v->[$axis] <= $t->[_cut]) ? (_s0, _s1) : (_s1, _s0)];
            my $q_d2 = $v->dist2_to_box(@{$q}[_c0, _c1]);
            if ($q_d2 <= $best_d2) {
                my $j;
                for ($j = $#queue_d2; $j >= 0; $j--) {
                    last if $queue_d2[$j] >= $q_d2;
                }
                splice @queue, ++$j, 0, $q;
                splice @queue_d2, $j, 0, $q_d2;
            }
        }
        else {
            for (@{$t->[_ixs]}) {
                next if $but and $but->{$_};
                my $d21 = $vs->[$_]->dist2($v);
                if ($d21 <= $best_d2) {
                    $best_d2 = $d21;
                    $best_ix = $_;
                }
            }

            if ($t = pop @queue) {
                if ($best_d2 >= pop @queue_d2) {
                    next;
                }
            }

            return ($best_ix, $best_d2);
        }
    }
}

sub find_nearest_vector_in_box {
    my ($self, $v, $a, $b, $d, @but) = @_;

    my $t = $self->{tree} or return;
    my $vs = $self->{vs};
    my ($a1, $b1) = Math::Vector::Real->box($a, $b);
    my $d2 = (defined $d ? $d * $d : $v->max_dist2_to_box($a1, $b1));
    my $but;
    if (@but) {
        if (@but == 1 and ref $but[0] eq 'HASH') {
            $but = $but[0];
        }
        else {
            my %but = map { $_ => 1 } @but;
            $but = \%but;
        }
    }
    my ($rix, $rd2) = _find_nearest_vector_in_box($vs, $t, $v, $a1, $b1, $d2, $but);
    $rix // return;
    wantarray ? ($rix, sqrt($rd2)) : $rix;
}

sub _find_nearest_vector_in_box {
    my ($vs, $t, $v, $a, $b, $best_d2, $but) = @_;
    my $best_ix;
    my @queue = $t;
    my @queue_d2 = 0;

    while (my $t = pop @queue) {
        last if $best_d2 < pop @queue_d2;
        if (defined (my $axis = $t->[_axis])) {
            my @sides;
            push @sides, $t->[_s0] if $a->[$axis] <= $t->[_cut];
            push @sides, $t->[_s1] if $b->[$axis] >= $t->[_cut];
            for my $s (@sides) {
                my $d2 = $v->dist2_to_box(@$s[_c0, _c1]);
                if ($d2 <= $best_d2) {
                    my $j;
                    for ($j = $#queue_d2; $j >= 0; $j--) {
                        last if $queue_d2[$j] >= $d2;
                    }
                    splice @queue, ++$j, 0, $s;
                    splice @queue_d2, $j, 0, $d2;
                }
            }
        }
        else {
            for (@{$t->[_ixs]}) {
                next if $but and $but->{$_};
                my $v1 = $vs->[$_];
                my $d2 = $v1->dist2($v);
                if ($d2 <= $best_d2 and $v1->dist2_to_box($a, $b) == 0) {
                    $best_d2 = $d2;
                    $best_ix = $_;
                }
            }
        }
    }
    return ($best_ix, $best_d2);
}

sub find_nearest_vector_in_box_chebyshev {
    my ($self, $v, $a, $b, $d, @but) = @_;

    my $t = $self->{tree} or return;
    my $vs = $self->{vs};
    my ($a1, $b1) = Math::Vector::Real->box($a, $b);
    my $d2 = (defined $d ? $d * $d : $v->max_dist2_to_box($a1, $b1));
    my $but;
    if (@but) {
        if (@but == 1 and ref $but[0] eq 'HASH') {
            $but = $but[0];
        }
        else {
            my %but = map { $_ => 1 } @but;
            $but = \%but;
        }
    }
    my ($rix, $rd2) = _find_nearest_vector_in_box($vs, $t, $v, $a1, $b1, $d2, $but);
    $rix // return;
    wantarray ? ($rix, sqrt($rd2)) : $rix;
}

sub _find_nearest_vector_in_box_chebyshev {
    my ($vs, $t, $v, $a, $b, $best_d, $but) = @_;
    my $best_ix;
    my @queue = $t;
    my @queue_d = 0;

    while (my $t = pop @queue) {
        last if $best_d < pop @queue_d;
        if (defined (my $axis = $t->[_axis])) {
            my @sides;
            push @sides, $t->[_s0] if $a->[$axis] <= $t->[_cut];
            push @sides, $t->[_s1] if $b->[$axis] >= $t->[_cut];
            for my $s (@sides) {
                my $d = $v->chebyshev_dist_to_box(@$s[_c0, _c1]);
                if ($d <= $best_d) {
                    my $j;
                    for ($j = $#queue_d; $j >= 0; $j--) {
                        last if $queue_d[$j] >= $d;
                    }
                    splice @queue, ++$j, 0, $s;
                    splice @queue_d, $j, 0, $d;
                }
            }
        }
        else {
            for (@{$t->[_ixs]}) {
                next if $but and $but->{$_};
                my $v1 = $vs->[$_];
                my $d = $v1->chebyshev_dist($v);
                if ($d <= $best_d and $v1->chebyshev_dist_to_box($a, $b) == 0) {
                    $best_d = $d;
                    $best_ix = $_;
                }
            }
        }
    }
    return ($best_ix, $best_d);
}

sub find_nearest_vector_all_internal {
    my ($self, $d) = @_;
    my $vs = $self->{vs};
    return unless @$vs > 1;
    my $d2 = (defined $d ? $d * $d : 'inf');

    my @best = ((undef) x @$vs);
    my @d2   = (($d2)   x @$vs);
    _find_nearest_vector_all_internal($vs, $self->{tree}, \@best, \@d2);
    return @best;
}

*find_nearest_neighbor_all_internal = \&find_nearest_vector_all_internal; # for backwards compatibility

sub _find_nearest_vector_all_internal {
    my ($vs, $t, $bests, $d2s) = @_;
    if (defined (my $axis = $t->[_axis])) {
        my @all_leafs;
        for my $side (0, 1) {
            my @leafs = _find_nearest_vector_all_internal($vs, $t->[_s0 + $side], $bests, $d2s);
            my $other = $t->[_s1 - $side];
            my ($c0, $c1) = @{$other}[_c0, _c1];
            for my $leaf (@leafs) {
                for my $ix (@{$leaf->[_ixs]}) {
                    my $v = $vs->[$ix];
                    if ($v->dist2_to_box($c0, $c1) < $d2s->[$ix]) {
                        ($bests->[$ix], $d2s->[$ix]) =
                            _find_nearest_vector($vs, $other, $v, $d2s->[$ix], $bests->[$ix]);
                    }
                }
            }
            push @all_leafs, @leafs;
        }
        return @all_leafs;
    }
    else {
        my $ixs = $t->[_ixs];
        for my $i (1 .. $#$ixs) {
            my $ix_i = $ixs->[$i];
            my $v_i = $vs->[$ix_i];
            for my $ix_j (@{$ixs}[0 .. $i - 1]) {
                my $d2 = $v_i->dist2($vs->[$ix_j]);
                if ($d2 < $d2s->[$ix_i]) {
                    $d2s->[$ix_i] = $d2;
                    $bests->[$ix_i] = $ix_j;
                }
                if ($d2 < $d2s->[$ix_j]) {
                    $d2s->[$ix_j] = $d2;
                    $bests->[$ix_j] = $ix_i;
                }
            }
        }
        return $t;
    }
}

sub find_two_nearest_vectors {
    my $self = shift;
    my $t = $self->{tree} or return;
    my $vs = $self->{vs};
    if (my ($rix0, $rix1, $rd2) = _find_two_nearest_vectors($vs, $t)) {
        return wantarray ? ($rix0, $rix1, sqrt($rd2)) : sqrt($rd2)
    }
    ()
}

sub _pole_id {
    my ($id, $deep) = __pole_id(@_);
    "$id/$deep";
}

sub __pole_id {
    my ($vs, $t) = @_;
    if (defined $t->[_axis]) {
        my ($id, $deep) = __pole_id($vs, $t->[_s0]);
        return ($id, $deep+1);
    }
    return ($t->[_ixs][0], 0)
}

sub _find_two_nearest_vectors {
    my ($vs, $t) = @_;

    my @best_ixs = (undef, undef);
    my $best_d2 = 'inf' + 0;

    my @inner;
    my @queue_t1;
    my @queue_t2;
    while ($t) {
        if (defined $t->[_axis]) {
            my ($s0, $s1) = @{$t}[_s0, _s1];
            push @inner, $s1;
            push @queue_t1, $s0;
            push @queue_t2, $s1;
            $t = $s0;
        }
        else {
            my $ixs = $t->[_ixs];
            for my $i (1 .. $#$ixs) {
                my $ix1 = $ixs->[$i];
                my $v1 = $vs->[$ix1];
                for my $j (0 .. $i - 1) {
                    my $ix2 = $ixs->[$j];
                    my $d2 = Math::Vector::Real::dist2($v1, $vs->[$ix2]);
                    if ($d2 < $best_d2) {
                        $best_d2 = $d2;
                        @best_ixs = ($ix1, $ix2);
                    }
                }
            }
            $t = pop @inner;
        }
    }

    my @queue_d2 = (0) x @queue_t1;
    while (my $t1 = pop @queue_t1) {
        my $t2 = pop @queue_t2;
        my $d2 = pop @queue_d2;
        if ($d2 < $best_d2) {
            unless (defined $t1->[_axis]) {
                unless (defined $t2->[_axis]) {
                    for my $ix1 (@{$t1->[_ixs]}) {
                        my $v1 = $vs->[$ix1];
                        for my $ix2 (@{$t2->[_ixs]}) {
                            my $d2 = Math::Vector::Real::dist2($v1, $vs->[$ix2]);
                            if ($d2 < $best_d2) {
                                $best_d2 = $d2;
                                @best_ixs = ($ix1, $ix2);
                            }
                        }
                    }
                    next;
                }
                ($t1, $t2) = ($t2, $t1);
            }
            for my $s (@{$t1}[_s0, _s1]) {
                my $d2 = Math::Vector::Real->dist2_between_boxes(@{$s}[_c0, _c1], @{$t2}[_c0, _c1]);
                if ($d2) {
                    if ($d2 < $best_d2) {
                        unshift @queue_t1, $t2;
                        unshift @queue_t2, $s;
                        unshift @queue_d2, $d2;
                    }
                }
                else {
                    push @queue_t1, $t2;
                    push @queue_t2, $s;
                    push @queue_d2, 0;
                }
            }
        }
    }
    (@best_ixs, $best_d2)
}

sub find_in_ball {
    my ($self, $z, $d, $but) = @_;
    if (defined $but and ref $but ne 'HASH') {
        $but = { $but => 1 };
    }
    _find_in_ball($self->{vs}, $self->{tree}, $z, $d * $d, $but);
}

sub _find_in_ball {
    my ($vs, $t, $z, $d2, $but) = @_;
    my (@queue, @r);
    my $r = 0;

    while (1) {
        if (defined (my $axis = $t->[_axis])) {
            my $c = $z->[$axis];
            my $cut = $t->[_cut];
            ($t, my ($q)) = @{$t}[$c <= $cut ? (_s0, _s1) : (_s1, _s0)];
            push @queue, $q if $z->dist2_to_box(@{$q}[_c0, _c1]) <= $d2;
        }
        else {
            my $ixs = $t->[_ixs];
            if (wantarray) {
                push @r, grep { $vs->[$_]->dist2($z) <= $d2 } @$ixs;
            }
            else {
                $r += ( $but
                        ? grep { !$but->{$_} and $vs->[$_]->dist2($z) <= $d2 } @$ixs
                        : grep { $vs->[$_]->dist2($z) <= $d2 } @$ixs );
            }

            $t = pop @queue or last;
        }
    }

    if (wantarray) {
        if ($but) {
            return grep !$but->{$_}, @r;
        }
        return @r;
    }
    return $r;
}

sub find_in_box {
    my ($self, $a, $b, $but) = @_;
    my ($a1, $b1) = Math::Vector::Real->box($a, $b);
    if (defined $but and ref $but ne 'HASH') {
        $but = { $but => 1 };
    }
    _find_in_box($self->{vs}, $self->{tree}, $a1, $b1, $but);
}

sub _find_in_box {
    my ($vs, $t, $a, $b, $but) = @_;
    my (@r, $r);
    my @queue;
    while (1) {
        if (defined (my $axis = $t->[_axis])) {
            my $cut = $t->[_cut];
            push @queue, $t->[_s0] if $cut >= $a->[$axis];
            push @queue, $t->[_s1] if $cut <= $b->[$axis];
        }
        else {
            my $ixs = $t->[_ixs];
            if (wantarray) {
                push @r, grep { $vs->[$_]->dist2_to_box($a, $b) <= 0 } @$ixs;
            }
            else {
                $r += ( $but
                        ? grep { !$but->{$_} and $vs->[$_]->dist2_to_box($a, $b) <= 0 } @$ixs
                        : grep { $vs->[$_]->dist2_to_box($a, $b) <= 0 } @$ixs );
            }
        }
        $t = pop @queue or last;
    }

    if (wantarray) {
        if ($but) {
            return grep !$but->{$_}, @r;
        }
        return @r;
    }
    return $r;
}

sub find_farthest_vector {
    my ($self, $v, $d, @but) = @_;
    my $t = $self->{tree} or return;
    my $vs = $self->{vs};
    my $d2 = ($d ? $d * $d : -1);
    my $but;
    if (@but) {
        if (@but == 1 and ref $but[0] eq 'HASH') {
            $but = $but[0];
        }
        else {
            my %but = map { $_ => 1 } @but;
            $but = \%but;
        }
    }

    my ($rix, $rd2) = _find_farthest_vector($vs, $t, $v, $d2, undef, $but);
    $rix // return;
    wantarray ? ($rix, sqrt($d2)) : $rix;
}

sub find_farthest_vector_internal {
    my ($self, $ix, $d) = @_;
    $ix >= 0 or croak "index out of range";
    $self->find_farthest_vector($self->{vs}[$ix], $d, $ix);
}

sub _find_farthest_vector {
    my ($vs, $t, $v, $best_d2, $best_ix, $but) = @_;

    my @queue;
    my @queue_d2;

    while (1) {
        if (defined (my $axis = $t->[_axis])) {
            # substitute the current one by the best subtree and queue
            # the worst for later
            ($t, my ($q)) = @{$t}[($v->[$axis] >= $t->[_cut]) ? (_s0, _s1) : (_s1, _s0)];
            my $q_d2 = $v->max_dist2_to_box(@{$q}[_c0, _c1]);
            if ($q_d2 >= $best_d2) {
                my $j;
                for ($j = $#queue_d2; $j >= 0; $j--) {
                    last if $queue_d2[$j] <= $q_d2;
                }
                splice @queue, ++$j, 0, $q;
                splice @queue_d2, $j, 0, $q_d2;
            }
        }
        else {
            for (@{$t->[_ixs]}) {
                next if $but and $but->{$_};
                my $d21 = $vs->[$_]->dist2($v);
                if ($d21 >= $best_d2) {
                    $best_d2 = $d21;
                    $best_ix = $_;
                }
            }

            if ($t = pop @queue) {
                if ($best_d2 <= pop @queue_d2) {
                    next;
                }
            }
            return ($best_ix, $best_d2);
        }
    }
}

sub find_random_vector {
    my $self = shift;
    my $t = $self->{tree} or return;
    my $vs = $self->{vs};
    my $hidden = $self->{hidden};
    if (not $hidden or @$vs > 20 * keys(%$hidden)) {
        # pick directly when the hidden elements are less than 5% of the total
        while (1) {
            my $ix = int rand @$vs;
            return $ix unless $hidden and $hidden->{$ix};
        }
    }
    _find_random_vector($vs, $t);
}

sub _find_random_vector {
    my ($vs, $t) = @_;
    while (defined $t->[_axis]) {
        $t = $t->[rand($t->[_n]) < $t->[_s0][_n] ? _s0 : _s1];
    }
    $t->[_ixs][rand $t->[_n]]
}

sub k_means_seed {
    my ($self, $n_req) = @_;
    $n_req = int($n_req) or return;
    my $t = $self->{tree} or return;
    my $vs = $self->{vs};
    _k_means_seed($vs, $t, $n_req);
}

*k_means_start = \&k_means_seed;

sub _k_means_seed {
    my ($vs, $t, $n_req) = @_;
    if ($n_req <= 1) {
        return if $n_req < 1;
        # print STDERR "returning centroid\n";
        return $t->[_sum] / $t->[_n];
    }
    else {
        my $n = $t->[_n];
        if (defined $t->[_axis]) {
            my ($s0, $s1) = @{$t}[_s0, _s1];
            my $n0 = $s0->[_n];
            my $n1 = $s1->[_n];
            my $n0_req = int(0.5 + $n_req * ($n0 / $n));
            $n0_req = $n0 if $n0_req > $n0;
            return (_k_means_seed($vs, $s0, $n0_req),
                    _k_means_seed($vs, $s1, $n_req - $n0_req));
        }
        else {
            my $ixs = $t->[_ixs];
            my @out;
            for (0..$#$ixs) {
                push @out, $vs->[$ixs->[$_]]
                    if rand($n - $_) < ($n_req - @out);
            }
            # print STDERR "asked for $n_req elements, returning ".scalar(@out)."\n";

            return @out;
        }
    }
}

our $k_means_seed_pp_test;

sub _k_means_seed_pp_test {
    my ($self, $err, $kms, $players, $weights) = @_;
    my @w;
    my $last = 0;
    for my $i (0..$#$players) {
        my $p = $players->[$i];
        my $w = $weights->[$i] - $last;
        $last = $weights->[$i];

        my @store;
        if (ref $p) {
            _push_all($p, \@store);
        }
        else {
            @store = $p
        }
        if (@store) {
            $w /= @store;
            $w[$_] = $w for @store;
        }
    }
    my $vs = $self->{vs};
    $w[$_] //= 0 for 0..$#$vs;

    $k_means_seed_pp_test->($self, $err, [map $self->{vs}[$_], @$kms], \@w);
}

sub k_means_seed_pp {
    my ($self, $n_req, $err) = @_;
    $n_req = int($n_req) or return;
    $err ||= 0.5;
    my $t = $self->{tree} or return;
    my $vs = $self->{vs};
    my $km = $self->find_random_vector;

    my (@km, @d2);
    idhash my %extra; # [$min_d2, $max_d2]

    # my (@player, @weight, @queue);
    # $#player = @$vs; # preallocate memory

    my (@weight, @queue);
    $#weight = @$vs; # preallocate memory

    while (1) {
        push @km, $km;
        last unless @km < $n_req;

        # update distances
        @queue = $t;
        while (my $p = pop @queue) {
            my $kmv = $vs->[$km];
            my ($c0, $c1) = @{$p}[_c0, _c1];
            my $extra = $extra{$p} //= ['inf', 'inf'];
            my ($min_d2, $max_d2) = @$extra;
            my $min_d2_to_box = $kmv->dist2_to_box($c0, $c1);
            if ($max_d2 > $min_d2_to_box) {
                if (defined $p->[_axis]) {
                    push @queue, @{$p}[_s0, _s1];
                }
                else {
                    for (@{$p->[_ixs]}) {
                        my $d2 = $kmv->dist2($vs->[$_]);
                        if ($d2 < ($d2[$_] //= $d2)) {
                            $d2[$_] = $d2;
                        }
                    }
                }

                if ($min_d2_to_box < $min_d2) {
                    $extra->[0] = $min_d2_to_box;
                }

                my $max_d2_to_box = $kmv->max_dist2_to_box($c0, $c1);
                if ($max_d2_to_box < $max_d2) {
                    $extra->[1] = $max_d2_to_box;
                }
            }
        }

        # find players and weight them
        my $weight = 0;
        # @player = ();
        @weight = ();

        # @queue = $t;
        # while (my $p = pop @queue) {
        #     my $extra = $extra{$p} or die "internal error: extra information missing for $p";
        #     my ($min_d2, $max_d2) = @$extra;

        #     if ($max_d2 * $err < $min_d2) {
        #         $weight += $p->[_n] * ($min_d2 + $max_d2) * 0.5;
        #         push @weight, $weight;
        #         push @player, $p;
        #     }
        #     else {
        #         if (defined $p->[_axis]) {
        #             push @queue, @{$p}[_s0, _s1];
        #         }
        #         else {
        #             for (@{$p->[_ixs]}) {
        #                 if (my $d2 = $d2[$_]) {
        #                     $weight += $d2;
        #                     push @weight, $weight;
        #                     push @player, $_;
        #                 }
        #             }
        #         }
        #     }
        # }

        for my $ix (0..@$vs) {
            $weight += $d2[$ix] // 0;
            $weight[$ix] += $weight;
        }

        # in order to check the algorithm we have to tap it here
        # $k_means_seed_pp_test and @km > 1 and
        #    $self->_k_means_seed_pp_test($err, \@km, \@player, \@weight);

        # to many k-means requested?
        # @player or last;

        # select a position on the weight queue:
        my $dice = rand($weight);

        # and use binary search to look for it:
        my $i = 0;
        my $j = @weight;
        while ($i < $j) {
            my $pivot = (($i + $j) >> 1);
            if ($weight[$pivot] < $dice) {
                $i = $pivot + 1;
            }
            else {
                $j = $pivot;
           }
        }
        #my $player = $player[$i];
        #$km = (ref $player ? _find_random_vector($vs, $player) : $player);
        $km = $i;
    }
    return @{$vs}[@km];
}

sub k_means_loop {
    my ($self, @k) = @_;
    @k or next;
    my $t = $self->{tree} or next;
    my $vs = $self->{vs};
    while (1) {
        my $diffs;
        my @n = ((0) x @k);
        my @sum = ((undef) x @k);

        _k_means_step($vs, $t, \@k, [0..$#k], \@n, \@sum);

        for (0..$#k) {
            if (my $n = $n[$_]) {
                my $k = $sum[$_] / $n;
                $diffs++ if $k != $k[$_];
                $k[$_] = $k;
            }
        }
        unless ($diffs) {
            return (wantarray ? @k : $k[0]);
        }
    }
}

sub k_means_step {
    my $self = shift;
    @_ or return;
    my $t = $self->{tree} or return;
    my $vs = $self->{vs};

    my @n = ((0) x @_);
    my @sum = ((undef) x @_);

   _k_means_step($vs, $t, \@_, [0..$#_], \@n, \@sum);

    for (0..$#n) {
        if (my $n = $n[$_]) {
            $sum[$_] /= $n;
        }
        else {
            # otherwise let the original value stay
            $sum[$_] = $_[$_];
        }
    }
    wantarray ? @sum : $sum[0];
}

sub _k_means_step {
    my ($vs, $t, $centers, $cixs, $ns, $sums) = @_;
    my ($n, $sum, $c0, $c1) = @{$t}[_n, _sum, _c0, _c1];
    if ($n) {
        my $centroid = $sum/$n;
        my $best = nkeyhead { $centroid->dist2($centers->[$_]) } @$cixs;
        my $max_d2 = Math::Vector::Real::max_dist2_to_box($centers->[$best], $c0, $c1);
        my @down = grep { Math::Vector::Real::dist2_to_box($centers->[$_], $c0, $c1) <= $max_d2 } @$cixs;
        if (@down <= 1) {
            $ns->[$best] += $n;
            # FIXME: M::V::R objects should support this undef + vector logic natively!
            if (defined $sums->[$best]) {
                $sums->[$best] += $sum;
            }
            else {
                $sums->[$best] = V(@$sum);
            }
        }
        else {
            if (defined (my $axis = $t->[_axis])) {
                my ($s0, $s1) = @{$t}[_s0, _s1];
                _k_means_step($vs, $t->[_s0], $centers, \@down, $ns, $sums);
                _k_means_step($vs, $t->[_s1], $centers, \@down, $ns, $sums);
            }
            else {
                for my $ix (@{$t->[_ixs]}) {
                    my $v = $vs->[$ix];
                    my $best = nkeyhead { $v->dist2($centers->[$_]) } @down;
                    $ns->[$best]++;
                    if (defined $sums->[$best]) {
                        $sums->[$best] += $v;
                    }
                    else {
                        $sums->[$best] = V(@$v);
                    }
                }
            }
        }
    }
}

sub k_means_assign {
    my $self = shift;
    @_ or return;
    my $t = $self->{tree} or return;
    my $vs = $self->{vs};

    my @out = ((undef) x @$vs);
   _k_means_assign($vs, $t, \@_, [0..$#_], \@out);
    @out;
}

sub _k_means_assign {
    my ($vs, $t, $centers, $cixs, $outs) = @_;
    my ($n, $sum, $c0, $c1) = @{$t}[_n, _sum, _c0, _c1];
        if ($n) {
        my $centroid = $sum/$n;
        my $best = nkeyhead { $centroid->dist2($centers->[$_]) } @$cixs;
        my $max_d2 = Math::Vector::Real::max_dist2_to_box($centers->[$best], $c0, $c1);
        my @down = grep { Math::Vector::Real::dist2_to_box($centers->[$_], $c0, $c1) <= $max_d2 } @$cixs;
        if (@down <= 1) {
            _k_means_assign_1($t, $best, $outs);
        }
        else {
            if (defined (my $axis = $t->[_axis])) {
                my ($s0, $s1) = @{$t}[_s0, _s1];
                _k_means_assign($vs, $t->[_s0], $centers, \@down, $outs);
                _k_means_assign($vs, $t->[_s1], $centers, \@down, $outs);
            }
            else {
                for my $ix (@{$t->[_ixs]}) {
                    my $v = $vs->[$ix];
                    my $best = nkeyhead { $v->dist2($centers->[$_]) } @down;
                    $outs->[$ix] = $best;
                }
            }
        }
    }
}

sub _k_means_assign_1 {
    my ($t, $best, $outs) = @_;
    if (defined (my $axis = $t->[_axis])) {
        _k_means_assign_1($t->[_s0], $best, $outs);
        _k_means_assign_1($t->[_s1], $best, $outs);
    }
    else {
        $outs->[$_] = $best for @{$t->[_ixs]};
    }
}

sub ordered_by_proximity {
    my $self = shift;
    my @r;
    $#r = $#{$self->{vs}}; $#r = -1; # preallocate
    _ordered_by_proximity($self->{tree}, \@r);
    return @r;
}

sub _ordered_by_proximity {
    my $t = shift;
    my $r = shift;
    if (defined $t->[_axis]) {
        _ordered_by_proximity($t->[_s0], $r);
        _ordered_by_proximity($t->[_s1], $r);
    }
    else {
        push @$r, @{$t->[_ixs]}
    }
}

sub _dump_to_string {
    my ($vs, $t, $indent, $opts) = @_;
    my ($n, $c0, $c1, $sum) = @{$t}[_n, _c0, _c1, _sum];
    my $id = ($opts->{pole_id} ? _pole_id($vs, $t)." " : '');
    if (defined (my $axis = $t->[_axis])) {
        my ($s0, $s1, $cut) = @{$t}[_s0, _s1, _cut];
        return ( "${indent}${id}n: $n, c0: $c0, c1: $c1, sum: $sum, axis: $axis, cut: $cut\n" .
                 _dump_to_string($vs, $s0, "$indent$opts->{tab}", $opts) .
                 _dump_to_string($vs, $s1, "$indent$opts->{tab}", $opts) );
    }
    else {
        my $remark = $opts->{remark} // [];
        my $o = ( "${indent}${id}n: $n, c0: $c0, c1: $c1, sum: $sum\n" .
                  "${indent}$opts->{tab}ixs: [" );
        my @str;
        for my $ix (@{$t->[_ixs]}) {
            my $colored_ix = (@$remark and grep($ix == $_, @$remark)
                              ? Term::ANSIColor::colored($ix, 'red')
                              : $ix);
            if ($opts->{dump_vectors} // 1) {
                push @str, "$colored_ix $vs->[$ix]";
            }
            else {
                push @str, $colored_ix;
            }
        }
        return $o . join(', ', @str) . "]\n";
    }
}

sub dump_to_string {
    my ($self, %opts) = @_;
    my $tab = $opts{tab} //= '    ';
    my $vs = $self->{vs};
    my $nvs = @$vs;
    my $hidden = join ", ", keys %{$self->{hidden} || {}};
    my $o = "tree: n: $nvs, hidden: {$hidden}\n";
    if (my $t = $self->{tree}) {
        require Term::ANSIColor if $opts{remark};
        return $o . _dump_to_string($vs, $t, $tab, \%opts);
    }
    else {
        return "$o${tab}(empty)\n";
    }
}

sub dump {
    my $self = shift;
    print $self->dump_to_string(@_);
}

1;
__END__

=head1 NAME

Math::Vector::Real::kdTree - kd-Tree implementation on top of Math::Vector::Real

=head1 SYNOPSIS

  use Math::Vector::Real::kdTree;

  use Math::Vector::Real;
  use Math::Vector::Real::Random;

  my @v = map Math::Vector::Real->random_normal(4), 1..1000;

  my $tree = Math::Vector::Real::kdTree->new(@v);

  my $ix = $tree->find_nearest_vector(V(0, 0, 0, 0));

  say "nearest vector is $ix, $v[$ix]";

=head1 DESCRIPTION

This module implements a kd-Tree data structure in Perl and common
algorithms on top of it.

=head2 Methods

The following methods are provided:

=over 4

=item $t = Math::Vector::Real::kdTree->new(@points)

Creates a new kd-Tree containing the given points.

=item $t2 = $t->clone

Creates a duplicate of the tree. The two trees will share internal
read only data so this method is more efficient in terms of memory
usage than others performing a deep copy.

=item my $ix = $t->insert($p0, $p1, ...)

Inserts the given points into the kd-Tree.

Returns the index assigned to the first point inserted.

=item $s = $t->size

Returns the number of points inside the tree.

=item $p = $t->at($ix)

Returns the point at the given index inside the tree.

=item $t->move($ix, $p)

Moves the point at index C<$ix> to the new given position readjusting
the tree structure accordingly.

=item ($ix, $d) = $t->find_nearest_vector($p, $max_d, @but_ix)

=item ($ix, $d) = $t->find_nearest_vector($p, $max_d, \%but_ix)

Find the nearest vector for the given point C<$p> and returns its
index and the distance between the two points (in scalar context the
index is returned).

If C<$max_d> is defined, the search is limited to the points within that distance

Optionally, a list of point indexes to be excluded from the search can be
passed or, alternatively, a reference to a hash containing the indexes
of the points to be excluded.

=item @ix = $t->find_nearest_vector_all_internal

Returns the index of the nearest vector from the tree.

It is equivalent to the following code (though, it uses a better
algorithm):

  @ix = map {
            scalar $t->nearest_vector($t->at($_), undef, $_)
        } 0..($t->size - 1);

=item $ix = $t->find_nearest_vector_in_box($p, $a, $b, $max_d, @but_ix)

=item $ix = $t->find_nearest_vector_in_box($p, $a, $b, $max_d, \%but_ix)

Returns the nearest vector for the given point from those that are
also inside the box defined by C<$a> and C<$b>.

The other arguments have the same meaning as for the method
C<find_nearest_vector>.

=item $ix = $t->find_nearest_vector_in_box_chebyshev($p, $a, $b, $max_d, @but_ix)

=item $ix = $t->find_nearest_vector_in_box_chebyshev($p, $a, $b, $max_d, \%but_ix)

This method is similar to C<find_nearest_vector_in_box> but using the
Chebyshev metric.

=item $ix = $t->find_farthest_vector($p, $min_d, @but_ix)

Find the point from the tree farthest from the given C<$p>.

The optional argument C<$min_d> specifies a minimal distance. Undef is
returned when not point farthest that it is found.

C<@but_ix> specifies points that should not be considered when looking
for the farthest point.

=item $ix = $t->find_farthest_vector_internal($ix, $min_d, @but_ix)

Given the index of a point on the tree this method returns the index
of the farthest vector also from the tree.

=item ($ix0, $ix1, $d) = $t->find_two_nearest_vectors

This method returns the indexes of two vectors from the three such
that the distance between them is minimal. The distance is returned as
the third output value.

In scalar context, just the distance is returned.

=item @k = $t->k_means_seed($n)

This method uses the internal tree structure to generate a set of
point that can be used as seeds for other C<k_means> methods.

There isn't any guarantee on the quality of the generated seeds, but
the used algorithm seems to perform well in practice.

=item @k = $t->k_means_step(@k)

Performs a step of the L<Lloyd's
algorithm|http://en.wikipedia.org/wiki/Lloyd%27s_algorithm> for
k-means calculation.

=item @k = $t->k_means_loop(@k)

Iterates until the Lloyd's algorithm converges and returns the final
means.

=item @ix = $t->k_means_assign(@k)

Returns for every point in the three the index of the cluster it
belongs to.

=item @ix = $t->find_in_ball($z, $d, $but)

=item $n = $t->find_in_ball($z, $d, $but)

Finds the points inside the tree contained in the hypersphere with
center C<$z> and radius C<$d>.

In scalar context returns the number of points found. In list context
returns the indexes of the points.

If the extra argument C<$but> is provided. The point with that index
is ignored.

=item @ix = $t->find_in_box($a, $b, $but)

=item $n = $t->find_in_box($a, $b, $but)

Finds the points inside the tree contained in the axis-aligned box
defined by two opposite vertices C<$a> and C<$b>.

In scalar context returns the number of points found. In list context
returns the indexes of the points.

If the extra argument C<$but> is provided. The point with that index
is ignored.

=item @ix = $t->ordered_by_proximity

Returns the indexes of the points in an ordered where is likely that
the indexes of near vectors are also in near positions in the list.

=back

=head2 k-means

The module can be used to calculate the k-means of a set of vectors as follows:

  # inputs
  my @v = ...; my $k = ...;
  
  # k-mean calculation
  my $t = Math::Vector::Real::kdTree->new(@v);
  my @means = $t->k_means_seed($k);
  @means = $t->k_means_loop(@means);
  @assign = $t->k_means_assign(@means);
  my @cluster = map [], 1..$k;
  for (0..$#assign) {
    my $cluster_ix = $assign[$_];
    my $cluster = $cluster[$cluster_ix];
    push @$cluster, $t->at($_);
  }
  
  use Data::Dumper;
  print Dumper \@cluster;

=head1 SEE ALSO

L<Wikipedia k-d Tree entry|http://en.wikipedia.org/wiki/K-d_tree>.

L<K-means filtering algorithm|https://www.cs.umd.edu/~mount/Projects/KMeans/pami02.pdf>.

L<Math::Vector::Real>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2011-2015 by Salvador FandiE<ntilde>o E<lt>sfandino@yahoo.comE<gt>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.12.3 or,
at your option, any later version of Perl 5 you may have available.


=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)