Math-Vector-Real-kdTree-0.15/0000755000175000017500000000000012513224505014662 5ustar salvasalvaMath-Vector-Real-kdTree-0.15/t/0000755000175000017500000000000012513224505015125 5ustar salvasalvaMath-Vector-Real-kdTree-0.15/t/count.t0000644000175000017500000000054312340612161016441 0ustar salvasalva#!/usr/bin/perl use strict; use warnings; use Test::More tests => 40; use Math::Vector::Real::kdTree; my $tree = Math::Vector::Real::kdTree->new(); my @is; for my $i (0..39) { push @is, $i; $tree->insert(Math::Vector::Real->new($i)); my @all = sort { $a <=> $b } $tree->ordered_by_proximity; is ("@all", "@is", "count indexes - $i"); } Math-Vector-Real-kdTree-0.15/t/find_two_nearest_vectors.t0000644000175000017500000000311412500022660022402 0ustar salvasalva#!/usr/bin/perl use strict; use warnings; use Test::More tests => 379; use_ok('Math::Vector::Real::kdTree'); use Sort::Key::Top qw(nhead); use Math::Vector::Real; use Math::Vector::Real::Test qw(eq_vector); sub find_two_nearest_vectors_bruteforce { my @best_ix = (undef, undef); my $best_d2 = 'inf' + 0; for my $i (1..$#_) { my $v = $_[$i]; for my $j (0..$i - 1) { my $d2 = Math::Vector::Real::dist2($v, $_[$j]); if ($d2 < $best_d2) { $best_d2 = $d2; @best_ix = ($i, $j); } } } (@best_ix, sqrt($best_d2)) } my %gen = ( num => sub { V(map rand, 1..$_[0]) }, int => sub { V(map int(rand 10), 1..$_[0]) }, dia => sub { V((rand) x $_[0]) } ); #srand 318275924; diag "srand: " . srand; for my $g (keys %gen) { for my $d (1, 2, 3, 4, 5, 6, 10) { for my $n (2, 5, 10, 20, 40, 50, 60, 70, 80, 90, 100, 120, 150, 180, 200, 250, 500, 1000) { my $id = "gen: $g, d: $d, n: $n"; my @o = map $gen{$g}->($d), 1..$n; my $t = Math::Vector::Real::kdTree->new(@o); my ($b1, $b2, $min_d2) = $t->find_two_nearest_vectors; my ($b1bf, $b2bf, $min_d2_bf) = find_two_nearest_vectors_bruteforce(@o); is($min_d2, $min_d2_bf, "nearest_two_vectors - $id") or do { diag "values differ: $min_d2 $min_d2_bf best: $b1, $b2, best_bf: $b1bf, $b2bf\n"; diag $t->dump_to_string(pole_id => 1, remark => [$b1, $b2, $b1bf, $b2bf]); diag "end"; }; } } } Math-Vector-Real-kdTree-0.15/t/Math-Vector-Real-kdTree.t0000644000175000017500000002361612477774301021565 0ustar salvasalva#!/usr/bin/perl use strict; use warnings; use Test::More tests => 21209; use_ok('Math::Vector::Real::kdTree'); use Sort::Key::Top qw(nhead); use Math::Vector::Real; use Math::Vector::Real::Test qw(eq_vector); sub find_in_ball_bruteforce { my ($vs, $ix, $d) = @_; my $d2 = $d * $d; grep { $ix <=> $_ and $vs->[$ix]->dist2($vs->[$_]) <= $d2 } 0..$#$vs; } sub nearest_vectors_bruteforce { my ($bottom, $top) = Math::Vector::Real->box(@_); my $box = $top - $bottom; my $v = [map $_ - $bottom, @_]; my $ixs = [0..$#_]; my $dist2 = [($box->abs2 * 10 + 1) x @_]; my $neighbors = [(undef) x @_]; _nearest_vectors_bruteforce($v, $ixs, $dist2, $neighbors, $box, 0); return @$neighbors; } sub _nearest_vectors_bruteforce { my ($v, $ixs, $dist2, $neighbors) = @_; my $ixix = 0; for my $i (@$ixs) { $ixix++; my $v0 = $v->[$i]; for my $j (@$ixs[$ixix..$#$ixs]) { my $d2 = $v0->dist2($v->[$j]); if ($dist2->[$i] > $d2) { $dist2->[$i] = $d2; $neighbors->[$i] = $j; } if ($dist2->[$j] > $d2) { $dist2->[$j] = $d2; $neighbors->[$j] = $i; } } } } sub farthest_vectors_bruteforce { my @best_ix; my @best_d2 = ((-1) x @_); for my $i (1..$#_) { my $v = $_[$i]; for my $j (0..$i - 1) { my $d2 = Math::Vector::Real::dist2($v, $_[$j]); if ($d2 > $best_d2[$i]) { $best_d2[$i] = $d2; $best_ix[$i] = $j; } if ($d2 > $best_d2[$j]) { $best_d2[$j] = $d2; $best_ix[$j] = $i; } } } return @best_ix; } sub find_two_nearest_vectors_bruteforce { my @best_ix = (undef, undef); my $best_d2 = 'inf' + 0; for my $i (1..$#_) { my $v = $_[$i]; for my $j (0..$i - 1) { my $d2 = Math::Vector::Real::dist2($v, $_[$j]); if ($d2 < $best_d2) { $best_d2 = $d2; @best_ix = ($i, $j); } } } (@best_ix, sqrt($best_d2)) } sub test_neighbors { unshift @_, $_[0]; goto &test_neighbors_indirect; } sub test_neighbors_indirect { my ($o1, $o2, $n1, $n2, $msg) = @_; my (@d1, @d2); for my $ix (0..$#$o1) { my $eo = $o1->[$ix]; my $ixn1 = $n1->[$ix]; defined $ixn1 or do { fail($msg); diag("expected index for element $ix is undefined"); goto break_me; }; my $ixn2 = $n2->[$ix]; defined $ixn2 or do { fail($msg); diag("template index for element $ix is undefined"); goto break_me; }; $ixn1 < @$o2 or do { fail($msg); diag("expected index $ixn1 out of range"); goto break_me; }; $ixn2 < @$o2 or do { fail($msg); diag("template index $ixn1 out of range"); goto break_me; }; my $en1 = $o2->[$ixn1]; my $en2 = $o2->[$ixn2]; push @d1, $eo->dist2($en1); push @d2, $eo->dist2($en2); } is "@d1", "@d2", $msg and return 1; break_me: diag "break me!"; 0; } my %gen = ( num => sub { rand }, int => sub { int rand(10) } ); #srand 318275924; diag "srand: " . srand; for my $g (keys %gen) { for my $d (1, 2, 3, 10) { for my $n (2, 10, 50, 250, 500) { # for my $n ((2) x 100) { my $id = "gen: $g, d: $d, n: $n"; my @o = map V(map $gen{$g}->(), 1..$d), 1..$n; my @nbf = nearest_vectors_bruteforce(@o); my $t = Math::Vector::Real::kdTree->new(@o); my @n = map scalar($t->find_nearest_vector_internal($_)), 0..$#o; is ($#n, $#o, "count find_nearest_vector_internal - build - $id"); test_neighbors(\@o, \@n, \@nbf, "find_nearest_vector_internal - build - $id"); is_deeply([map $t->at($_), 0..$#o], \@o , "at - build - after find_nearest_vector_internal - $id"); @n = $t->find_nearest_vector_all_internal; is ($#n, $#o, "count find_nearest_vector_all_internal - build - $id"); test_neighbors(\@o, \@n, \@nbf, "find_nearest_vector_all_internal - build - $id"); is_deeply([map $t->at($_), 0..$#o], \@o , "at - build - after find_nearest_vector_all_internal - $id"); $t = Math::Vector::Real::kdTree->new; for my $ix (0..$#o) { $t->insert($o[$ix]); my @obp = $t->ordered_by_proximity; is ($ix, $#obp, "ordered_by_proxymity - count - $id, ix: $ix"); } is_deeply([map $t->at($_), 0..$#o], \@o , "at - insert - after insert - $id"); @n = map scalar($t->find_nearest_vector_internal($_)), 0..$#o; test_neighbors(\@o, \@n, \@nbf, "find_nearest_vector_internal - insert - $id"); is_deeply([map $t->at($_), 0..$#o], \@o , "at - insert - after find_nearest_vector_internal - $id"); @n = $t->find_nearest_vector_all_internal; test_neighbors(\@o, \@n, \@nbf, "find_nearest_vector_all_internal - insert - $id"); is_deeply([map $t->at($_), 0..$#o], \@o , "at - insert - after find_nearest_vector_all_internal - $id"); my @fbf = farthest_vectors_bruteforce(@o); @n = map scalar($t->find_farthest_vector_internal($_)), 0..$#o; test_neighbors(\@o, \@n, \@fbf, "find_farthest_vector_internal - insert - $id"); is_deeply([map $t->at($_), 0..$#o], \@o , "at - insert - after find_farthest_vector_internal - $id"); my ($b1, $b2, $min_d2) = $t->find_two_nearest_vectors; my ($b1bf, $b2bf, $min_d2_bf) = find_two_nearest_vectors_bruteforce(@o); is($min_d2, $min_d2_bf, "nearest_two_vectors") or do { diag "values differ: $min_d2 $min_d2_bf best: $b1, $b2, best_bf: $b1bf, $b2bf\n"; diag $t->dump_to_string(pole_id => 1, remark => [$b1, $b2, $b1bf, $b2bf]); }; my %seed_errs = (k_means_seed => [1], k_means_seed_pp => [1, 0.9, 0.5]); my $k; for ($k = 1; $k < @n; $k *= 2) { for my $seed_method (qw(k_means_seed)) { # k_means_seed_pp)) { for my $err (@{$seed_errs{$seed_method}}) { no warnings 'once'; local $Math::Vector::Real::kdTree::k_means_seed_pp_test = sub { my ($t, $err, $kmvs, $ws) = @_; # use Data::Dumper; # diag Dumper $ws; # diag Dumper $kmvs; my @error; for my $ix (0..$#o) { my $w = nhead map { $o[$ix]->dist2($_) } @$kmvs; # diag "checking element $ix, o: $o[$ix] ws: $ws->[$ix], w: $w"; if ($ws->[$ix] + 0.0001 < $w * $err or $ws->[$ix] * $err > $w + 0.0001) { push @error, "weight calculation failed for ix $ix: precise: $w, estimated: $ws->[$ix], err: $err" } } ok(@error == 0, "k_means_seed_pp_test, k: $k, err: $err, $id"); diag $_ for @error; }; my @kms = $t->$seed_method($k, $err); my $k_gen = scalar(@kms); if ($seed_method eq 'k_means_seed') { is ($k_gen, $k, "$seed_method generates $k results - err: $err, $id"); } else { ok(1, "keep number of tests unchanged") for $k_gen..$k-1; ok($k_gen >= 1, "$seed_method generates at least one result"); ok($k_gen <= $k, "$seed_method generates $k or less results"); } my @km = $t->k_means_loop(@kms); is (scalar(@km), $k_gen, "k_means_loop generates $k_gen results - err: $err, $id") or do { diag "break me 2"; }; my @kma = $t->k_means_assign(@km); my $t1 = Math::Vector::Real::kdTree->new(@km); my @n = map scalar($t1->find_nearest_vector($_)), @o; test_neighbors_indirect(\@o, \@km, \@kma, \@n, "k_means_assign - err: $err, k: $k, $id"); my @sum = map V((0) x $d), 1..$k_gen; my @count = ((0) x $k_gen); for my $ix (0..$#kma) { my $cluster = $kma[$ix]; $count[$cluster]++; $sum[$cluster] += $o[$ix]; } for my $cluster (0..$#sum) { if ($count[$cluster]) { $sum[$cluster] /= $count[$cluster]; } else { $sum[$cluster] = $km[$cluster]; } eq_vector($sum[$cluster], $km[$cluster], "cluster centroid - $cluster - k: $k, $id"); } ok (1, "keep number of tests unchanged") for $#sum..$k-1; } } } for my $ix (0..$#o) { my $r = 0.0001 + rand(1); my @bix = sort { $a <=> $b } $t->find_in_ball($o[$ix], $r, $ix); my @bixbf = find_in_ball_bruteforce(\@o, $ix, $r); is_deeply (\@bix, \@bixbf, "find_in_ball - $ix - $id") or do {; diag "break me 3"; } } } } } Math-Vector-Real-kdTree-0.15/Makefile.PL0000644000175000017500000000070712500013527016634 0ustar salvasalvause 5.010; use ExtUtils::MakeMaker; WriteMakefile( NAME => 'Math::Vector::Real::kdTree', VERSION_FROM => 'lib/Math/Vector/Real/kdTree.pm', PREREQ_PM => { 'Math::Vector::Real' => '0.16', 'Sort::Key::Top' => '0.08' }, ABSTRACT_FROM => 'lib/Math/Vector/Real/kdTree.pm', AUTHOR => 'Salvador Fandiño ' ); Math-Vector-Real-kdTree-0.15/sample/0000755000175000017500000000000012513224505016143 5ustar salvasalvaMath-Vector-Real-kdTree-0.15/sample/boxes.pl0000644000175000017500000000377612340612161017632 0ustar salvasalva#!/usr/bin/perl use 5.010; use strict; use warnings; use GD; use Math::Vector::Real; use Math::Vector::Real::Random; use Math::Vector::Real::kdTree; my $p = 10000; my $w = 1024; my $im = GD::Image->new($w, $w); my $white = $im->colorAllocate(255,255,255); my $black = $im->colorAllocate(0, 0, 0); # $im->transparent($white); $im->interlaced('true'); sub scl { my $p = shift; @{$w * (0.3 * $p + [0.5, 0.5])}[0, 1]; } sub sscl { my $s = shift; $w * (0.3*$s); } my @p; while () { s/\s//g; push @p, V($1, $2) if /\{(-?[\d\.]+)\,(-?[\d\.]+)}/; } if (@p < $p) { @p = map Math::Vector::Real->random_normal(2, 0.6), 1..$p; } # @p = @p[0..$#p]; my $tree = Math::Vector::Real::kdTree->new(@p); my %path; for (0..$#p) { push @{$path{$tree->path($_)}}, $_; } my @colors; for my $set (values %path) { my $color = $im->colorAllocate(map int $_, @{Math::Vector::Real->random_versor(3, 255.9)}); if ($color >= 0) { push @colors, $color; } else { $color = $colors[rand @colors]; } my ($b, $t) = Math::Vector::Real->box(@p[@$set]); $im->rectangle(scl($b), scl($t), $color); $im->filledEllipse(scl($_), 3, 3, $color) for @p[@$set]; } open my $fh, ">output.png"; print $fh $im->png; __DATA__ {0.360273672659563, 0.681459947082673} {0.176663207393249, 0.404644066772445} {1.05728054075933, 0.14704076332749} {0.553760099403691, 0.0547824407714363} {0.0479743023870717, 0.963824108595659} {0.776144419292398, 0.10956787723834} {1.0209388373113, 0.971510537877524} {0.0706926934034146, -0.114056543099417} {0.515397128075519, 0.612716041689772} {0.26975413715681, 1.03607341990943} {0.402473337889129, 0.292854432427144} {0.502995986052089, 0.713098206321628} {0.654221365535431, 0.413225433599031} {0.809595420704002, 0.23428087884566} {0.727126702840134, 0.589085324802046} {0.495470529749333, 0.16063680625426} {0.290835931539363, 1.06150585371687} {0.159456912007944, 0.897423580738299} {0.160445827140042, 0.503289145929537} {0.26744213070957, 0.147348345161297} Math-Vector-Real-kdTree-0.15/README0000644000175000017500000000116612477772065015570 0ustar salvasalvaMath-Vector-Real-kdTree ======================= k-d Trees in Perl INSTALLATION To install this module type the following: perl Makefile.PL make make test make install DEPENDENCIES This module requires these other modules and libraries: Math::Vector::Real Sort::Key::Top COPYRIGHT AND LICENCE Put the correct copyright and licence information here. Copyright (C) 2011-2015 by Salvador Fandiño This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.12.3 or, at your option, any later version of Perl 5 you may have available. Math-Vector-Real-kdTree-0.15/Changes0000644000175000017500000000370612513224055016163 0ustar salvasalvaRevision history for Perl extension Math::Vector::Real::kdTree. 0.15 Apr 14, 2015 - add find_nearest_vector_in_box_chebyshev and find_in_box methods 0.14 Apr 14, 2015 - add find_nearest_vector_in_box 0.13 Mar 11, 2015 - improve find_two_nearest_vectors algorithm - add tests 0.12 Mar 11, 2015 - add method find_two_nearest_vectors - dump_to_string now supports the options remark and pole_id - find_in_ball was broken - rename k_means_start to k_means_seed 0.11 Jun 2, 2014 - find_farthest_vector was failing for the case where all the vectors on the tree were equal 0.10 Jun 1, 2014 - add k_means* methods - add find_farthest_vector* methods - rename find_nearest_neighbor* methods to find_nearest_vector* (though, the old names are keep in order to preserve backward compatibility) - most methods have been rewritten from scratch using better algorithms and removing recursion when possible - change internal data structure - lots of new tests 0.09 Jun 17, 2013 - add support for excluding more than one point in neighbor searching methods 0.08 Feb 16, 2013 - add test for RT but #83330 0.07 Feb 15, 2013 - insert method was broken (bug report by Greg Bronevetsky) - add test suite 0.06 Oct 22, 2012 - add a clone method 0.05 Sep 21, 2012 - insert now accepts several points and returns the index of the first element inserted 0.04 Jun 19, 2012 - add hide method - move and _delete methods were broken - add ordered_by_proximity method 0.03 Jun 22, 2011 - solve bug on find_nearest_neighbor when distance is limited 0.02 Jun 16, 2011 - add find_in_ball method 0.01 Wed Jun 15 18:09:43 2011 - original version; created by h2xs 1.23 with options -AXn Math::Vector::Real::kdTree Math-Vector-Real-kdTree-0.15/MANIFEST0000644000175000017500000000047612513224505016022 0ustar salvasalvaChanges Makefile.PL MANIFEST README t/Math-Vector-Real-kdTree.t t/count.t lib/Math/Vector/Real/kdTree.pm sample/boxes.pl t/find_two_nearest_vectors.t META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) Math-Vector-Real-kdTree-0.15/lib/0000755000175000017500000000000012513224505015430 5ustar salvasalvaMath-Vector-Real-kdTree-0.15/lib/Math/0000755000175000017500000000000012513224505016321 5ustar salvasalvaMath-Vector-Real-kdTree-0.15/lib/Math/Vector/0000755000175000017500000000000012513224505017563 5ustar salvasalvaMath-Vector-Real-kdTree-0.15/lib/Math/Vector/Real/0000755000175000017500000000000012513224505020446 5ustar salvasalvaMath-Vector-Real-kdTree-0.15/lib/Math/Vector/Real/kdTree.pm0000644000175000017500000011661712513224067022241 0ustar salvasalvapackage Math::Vector::Real::kdTree; our $VERSION = '0.15'; use 5.010; use strict; use warnings; use Carp; use Math::Vector::Real; use Sort::Key::Top qw(nkeypartref nhead ntail nkeyhead); use Hash::Util::FieldHash qw(idhash); our $max_per_pole = 12; our $recommended_per_pole = 6; use constant _n => 0; # elements on subtree use constant _c0 => 1; # corner 0 use constant _c1 => 2; # corner 1 use constant _sum => 3; # centroid * n use constant _s0 => 4; # subtree 0 use constant _s1 => 5; # subtree 1 use constant _axis => 6; # cut axis use constant _cut => 7; # cut point (mediam) # on leaf nodes: use constant _ixs => 4; use constant _leaf_size => _ixs + 1; sub new { my $class = shift; my @v = map V(@$_), @_; my $self = { vs => \@v, tree => (@v ? _build(\@v, [0..$#v]) : undef) }; bless $self, $class; } sub clone { my $self = shift; require Storable; my $clone = { vs => [@{$self->{vs}}], tree => Storable::dclone($self->{tree}) }; $clone->{hidden} = { %{$self->{hidden}} } if $self->{hidden}; bless $clone, ref $self; } sub _build { my ($v, $ixs) = @_; if (@$ixs > $recommended_per_pole) { my ($b, $t) = Math::Vector::Real->box(@$v[@$ixs]); my $axis = ($t - $b)->max_component_index; my $bstart = @$ixs >> 1; my ($p0, $p1) = nkeypartref { $v->[$_][$axis] } $bstart => @$ixs; my $s0 = _build($v, $p0); my $s1 = _build($v, $p1); my ($c0, $c1) = Math::Vector::Real->box(@{$s0}[_c0, _c1], @{$s1}[_c0, _c1]); my $cut = 0.5 * ($s0->[_c1][$axis] + $s1->[_c0][$axis]); # warn "b: $b, t: $t, axis: $axis, p0: $p0, p1: $p1, s0: $s0, s1: $s1, c0: $c0, c1: $c1, cut: $cut\n"; # [n sum s0 s1 axis cut] [scalar(@$ixs), $c0, $c1, $s0->[_sum] + $s1->[_sum], $s0, $s1, $axis, $cut]; } else { # [n, sum, ixs] my @vs = @{$v}[@$ixs]; my ($c0, $c1) = Math::Vector::Real->box(@vs); [scalar(@$ixs), $c0, $c1, Math::Vector::Real->sum(@vs), $ixs]; } } sub size { scalar @{shift->{vs}} } sub at { my ($self, $ix) = @_; Math::Vector::Real::clone($self->{vs}[$ix]); } sub insert { my $self = shift; @_ or return; my $vs = $self->{vs}; my $ix = @$vs; if (my $tree = $self->{tree}) { for (@_) { my $v = V(@$_); push @$vs, $v; _insert($vs, $self->{tree}, $#$vs) } } else { @$vs = map V(@$_), @_; $self->{tree} = _build($vs, [0..$#$vs]); } return $ix; } # _insert does not return anything but modifies its $t argument in # place. This is really ugly but done to improve performance. sub _insert { my ($vs, $t, $ix) = @_; my $v = $vs->[$ix]; # update aggregated values my $n = $t->[_n]++; @{$t}[_c0, _c1] = Math::Vector::Real->box($v, @{$t}[_c0, _c1]); $t->[_sum] += $v; if (defined (my $axis = $t->[_axis])) { my $cut = $t->[_cut]; my $c = $v->[$axis]; my $n0 = $t->[_s0][_n]; my $n1 = $t->[_s1][_n]; if ($c <= $cut) { if (2 * $n1 + $max_per_pole >= $n0) { _insert($vs, $t->[_s0], $ix); return; } } else { if (2 * $n0 + $max_per_pole >= $n1) { _insert($vs, $t->[_s1], $ix); return; } } # tree needs rebalancing my @store; $#store = $n; # preallocate space @store = ($ix); _push_all($t, \@store); $_[1] = _build($vs, \@store); } else { my $ixs = $t->[_ixs]; push @$ixs, $ix; if ($n > $max_per_pole) { $_[1] = _build($vs, $ixs); } } } sub move { my ($self, $ix, $v) = @_; my $vs = $self->{vs}; ($ix >= 0 and $ix < @$vs) or croak "index out of range"; _delete($vs, $self->{tree}, $ix); $vs->[$ix] = Math::Vector::Real::clone($v); _insert($vs, $self->{tree}, $ix); } sub _delete { my ($vs, $t, $ix) = @_; if (defined (my $axis = $t->[_axis])) { my $v = $vs->[$ix]; my $c = $v->[$axis]; my ($s0, $s1, $cut) = @{$t}[_s0, _s1, _cut]; if ($c <= $cut and _delete($vs, $s0, $ix)) { if ($s0->[_n]) { $t->[_n]--; $t->[_sum] -= $v; } else { # when one subnode becomes empty, the other gets promoted up: @$t = @$s1; } return 1; } elsif ($c >= $cut and _delete($vs, $s1, $ix)) { if ($s1->[_n]) { $t->[_n]--; $t->[_sum] -= $v; } else { @$t = @$s0; } return 1; } } else { my $ixs = $t->[_ixs]; for (0..$#$ixs) { if ($ixs->[$_] == $ix) { splice(@$ixs, $_, 1); $t->[_n]--; $t->[_sum] -= $vs->[$ix]; return 1; } } } return 0; } sub hide { my ($self, $ix) = @_; my $vs = $self->{vs}; ($ix >= 0 and $ix < @$vs) or croak "index out of range"; _delete($vs, $self->{tree}, $ix); ($self->{hidden} //= {})->{$ix} = 1; } sub _push_all { my ($t, $store) = @_; my @q; while ($t) { if (defined $t->[_axis]) { push @q, $t->[_s1]; $t = $t->[_s0]; } else { push @$store, @{$t->[_ixs]}; $t = pop @q; } } } sub path { my ($self, $ix) = @_; my $p = _path($self->{vs}, $self->{tree}, $ix); my $l = 1; $l = (($l << 1) | $_) for @$p; $l } sub _path { my ($vs, $t, $ix) = @_; if (defined (my $axis = $t->[_axis])) { my $v = $vs->[$ix]; my $c = $v->[$axis]; my $cut = $t->[_cut]; my $p; if ($c <= $cut) { if ($p = _path($vs, $t->[_s0], $ix)) { unshift @$p, 0; return $p; } } if ($c >= $cut) { if ($p = _path($vs, $t->[_s1], $ix)) { unshift @$p, 1; return $p; } } } else { return [] if grep $_ == $ix, @{$t->[_ixs]} } () } sub find { my ($self, $v) = @_; _find($self->{vs}, $self->{tree}, $v); } sub _find { my ($vs, $t, $v) = @_; while (defined (my $axis = $t->[_axis])) { my $cut = $t->[_cut]; my $c = $v->[$axis]; if ($c < $cut) { $t = $t->[_s0]; } else { if ($c == $cut) { my $ix = _find($vs, $t->[_s0], $v); return $ix if defined $ix; } $t = $t->[_s1]; } } for (@{$t->[_ixs]}) { return $_ if $vs->[$_] == $v; } () } sub find_nearest_vector { my ($self, $v, $d, @but) = @_; my $t = $self->{tree} or return; my $vs = $self->{vs}; my $d2 = (defined $d ? $d * $d : 'inf'); my $but; if (@but) { if (@but == 1 and ref $but[0] eq 'HASH') { $but = $but[0]; } else { my %but = map { $_ => 1 } @but; $but = \%but; } } my ($rix, $rd2) = _find_nearest_vector($vs, $t, $v, $d2, undef, $but); $rix // return; wantarray ? ($rix, sqrt($rd2)) : $rix; } *find_nearest_neighbor = \&find_nearest_vector; # for backwards compatibility sub find_nearest_vector_internal { my ($self, $ix, $d) = @_; $ix >= 0 or croak "index out of range"; $self->find_nearest_vector($self->{vs}[$ix], $d, $ix); } *find_nearest_neighbor_internal = \&find_nearest_vector_internal; # for backwards compatibility sub _find_nearest_vector { my ($vs, $t, $v, $best_d2, $best_ix, $but) = @_; my @queue; my @queue_d2; while (1) { if (defined (my $axis = $t->[_axis])) { # substitute the current one by the best subtree and queue # the worst for later ($t, my ($q)) = @{$t}[($v->[$axis] <= $t->[_cut]) ? (_s0, _s1) : (_s1, _s0)]; my $q_d2 = $v->dist2_to_box(@{$q}[_c0, _c1]); if ($q_d2 <= $best_d2) { my $j; for ($j = $#queue_d2; $j >= 0; $j--) { last if $queue_d2[$j] >= $q_d2; } splice @queue, ++$j, 0, $q; splice @queue_d2, $j, 0, $q_d2; } } else { for (@{$t->[_ixs]}) { next if $but and $but->{$_}; my $d21 = $vs->[$_]->dist2($v); if ($d21 <= $best_d2) { $best_d2 = $d21; $best_ix = $_; } } if ($t = pop @queue) { if ($best_d2 >= pop @queue_d2) { next; } } return ($best_ix, $best_d2); } } } sub find_nearest_vector_in_box { my ($self, $v, $a, $b, $d, @but) = @_; my $t = $self->{tree} or return; my $vs = $self->{vs}; my ($a1, $b1) = Math::Vector::Real->box($a, $b); my $d2 = (defined $d ? $d * $d : $v->max_dist2_to_box($a1, $b1)); my $but; if (@but) { if (@but == 1 and ref $but[0] eq 'HASH') { $but = $but[0]; } else { my %but = map { $_ => 1 } @but; $but = \%but; } } my ($rix, $rd2) = _find_nearest_vector_in_box($vs, $t, $v, $a1, $b1, $d2, $but); $rix // return; wantarray ? ($rix, sqrt($rd2)) : $rix; } sub _find_nearest_vector_in_box { my ($vs, $t, $v, $a, $b, $best_d2, $but) = @_; my $best_ix; my @queue = $t; my @queue_d2 = 0; while (my $t = pop @queue) { last if $best_d2 < pop @queue_d2; if (defined (my $axis = $t->[_axis])) { my @sides; push @sides, $t->[_s0] if $a->[$axis] <= $t->[_cut]; push @sides, $t->[_s1] if $b->[$axis] >= $t->[_cut]; for my $s (@sides) { my $d2 = $v->dist2_to_box(@$s[_c0, _c1]); if ($d2 <= $best_d2) { my $j; for ($j = $#queue_d2; $j >= 0; $j--) { last if $queue_d2[$j] >= $d2; } splice @queue, ++$j, 0, $s; splice @queue_d2, $j, 0, $d2; } } } else { for (@{$t->[_ixs]}) { next if $but and $but->{$_}; my $v1 = $vs->[$_]; my $d2 = $v1->dist2($v); if ($d2 <= $best_d2 and $v1->dist2_to_box($a, $b) == 0) { $best_d2 = $d2; $best_ix = $_; } } } } return ($best_ix, $best_d2); } sub find_nearest_vector_in_box_chebyshev { my ($self, $v, $a, $b, $d, @but) = @_; my $t = $self->{tree} or return; my $vs = $self->{vs}; my ($a1, $b1) = Math::Vector::Real->box($a, $b); my $d2 = (defined $d ? $d * $d : $v->max_dist2_to_box($a1, $b1)); my $but; if (@but) { if (@but == 1 and ref $but[0] eq 'HASH') { $but = $but[0]; } else { my %but = map { $_ => 1 } @but; $but = \%but; } } my ($rix, $rd2) = _find_nearest_vector_in_box($vs, $t, $v, $a1, $b1, $d2, $but); $rix // return; wantarray ? ($rix, sqrt($rd2)) : $rix; } sub _find_nearest_vector_in_box_chebyshev { my ($vs, $t, $v, $a, $b, $best_d, $but) = @_; my $best_ix; my @queue = $t; my @queue_d = 0; while (my $t = pop @queue) { last if $best_d < pop @queue_d; if (defined (my $axis = $t->[_axis])) { my @sides; push @sides, $t->[_s0] if $a->[$axis] <= $t->[_cut]; push @sides, $t->[_s1] if $b->[$axis] >= $t->[_cut]; for my $s (@sides) { my $d = $v->chebyshev_dist_to_box(@$s[_c0, _c1]); if ($d <= $best_d) { my $j; for ($j = $#queue_d; $j >= 0; $j--) { last if $queue_d[$j] >= $d; } splice @queue, ++$j, 0, $s; splice @queue_d, $j, 0, $d; } } } else { for (@{$t->[_ixs]}) { next if $but and $but->{$_}; my $v1 = $vs->[$_]; my $d = $v1->chebyshev_dist($v); if ($d <= $best_d and $v1->chebyshev_dist_to_box($a, $b) == 0) { $best_d = $d; $best_ix = $_; } } } } return ($best_ix, $best_d); } sub find_nearest_vector_all_internal { my ($self, $d) = @_; my $vs = $self->{vs}; return unless @$vs > 1; my $d2 = (defined $d ? $d * $d : 'inf'); my @best = ((undef) x @$vs); my @d2 = (($d2) x @$vs); _find_nearest_vector_all_internal($vs, $self->{tree}, \@best, \@d2); return @best; } *find_nearest_neighbor_all_internal = \&find_nearest_vector_all_internal; # for backwards compatibility sub _find_nearest_vector_all_internal { my ($vs, $t, $bests, $d2s) = @_; if (defined (my $axis = $t->[_axis])) { my @all_leafs; for my $side (0, 1) { my @leafs = _find_nearest_vector_all_internal($vs, $t->[_s0 + $side], $bests, $d2s); my $other = $t->[_s1 - $side]; my ($c0, $c1) = @{$other}[_c0, _c1]; for my $leaf (@leafs) { for my $ix (@{$leaf->[_ixs]}) { my $v = $vs->[$ix]; if ($v->dist2_to_box($c0, $c1) < $d2s->[$ix]) { ($bests->[$ix], $d2s->[$ix]) = _find_nearest_vector($vs, $other, $v, $d2s->[$ix], $bests->[$ix]); } } } push @all_leafs, @leafs; } return @all_leafs; } else { my $ixs = $t->[_ixs]; for my $i (1 .. $#$ixs) { my $ix_i = $ixs->[$i]; my $v_i = $vs->[$ix_i]; for my $ix_j (@{$ixs}[0 .. $i - 1]) { my $d2 = $v_i->dist2($vs->[$ix_j]); if ($d2 < $d2s->[$ix_i]) { $d2s->[$ix_i] = $d2; $bests->[$ix_i] = $ix_j; } if ($d2 < $d2s->[$ix_j]) { $d2s->[$ix_j] = $d2; $bests->[$ix_j] = $ix_i; } } } return $t; } } sub find_two_nearest_vectors { my $self = shift; my $t = $self->{tree} or return; my $vs = $self->{vs}; if (my ($rix0, $rix1, $rd2) = _find_two_nearest_vectors($vs, $t)) { return wantarray ? ($rix0, $rix1, sqrt($rd2)) : sqrt($rd2) } () } sub _pole_id { my ($id, $deep) = __pole_id(@_); "$id/$deep"; } sub __pole_id { my ($vs, $t) = @_; if (defined $t->[_axis]) { my ($id, $deep) = __pole_id($vs, $t->[_s0]); return ($id, $deep+1); } return ($t->[_ixs][0], 0) } sub _find_two_nearest_vectors { my ($vs, $t) = @_; my @best_ixs = (undef, undef); my $best_d2 = 'inf' + 0; my @inner; my @queue_t1; my @queue_t2; while ($t) { if (defined $t->[_axis]) { my ($s0, $s1) = @{$t}[_s0, _s1]; push @inner, $s1; push @queue_t1, $s0; push @queue_t2, $s1; $t = $s0; } else { my $ixs = $t->[_ixs]; for my $i (1 .. $#$ixs) { my $ix1 = $ixs->[$i]; my $v1 = $vs->[$ix1]; for my $j (0 .. $i - 1) { my $ix2 = $ixs->[$j]; my $d2 = Math::Vector::Real::dist2($v1, $vs->[$ix2]); if ($d2 < $best_d2) { $best_d2 = $d2; @best_ixs = ($ix1, $ix2); } } } $t = pop @inner; } } my @queue_d2 = (0) x @queue_t1; while (my $t1 = pop @queue_t1) { my $t2 = pop @queue_t2; my $d2 = pop @queue_d2; if ($d2 < $best_d2) { unless (defined $t1->[_axis]) { unless (defined $t2->[_axis]) { for my $ix1 (@{$t1->[_ixs]}) { my $v1 = $vs->[$ix1]; for my $ix2 (@{$t2->[_ixs]}) { my $d2 = Math::Vector::Real::dist2($v1, $vs->[$ix2]); if ($d2 < $best_d2) { $best_d2 = $d2; @best_ixs = ($ix1, $ix2); } } } next; } ($t1, $t2) = ($t2, $t1); } for my $s (@{$t1}[_s0, _s1]) { my $d2 = Math::Vector::Real->dist2_between_boxes(@{$s}[_c0, _c1], @{$t2}[_c0, _c1]); if ($d2) { if ($d2 < $best_d2) { unshift @queue_t1, $t2; unshift @queue_t2, $s; unshift @queue_d2, $d2; } } else { push @queue_t1, $t2; push @queue_t2, $s; push @queue_d2, 0; } } } } (@best_ixs, $best_d2) } sub find_in_ball { my ($self, $z, $d, $but) = @_; if (defined $but and ref $but ne 'HASH') { $but = { $but => 1 }; } _find_in_ball($self->{vs}, $self->{tree}, $z, $d * $d, $but); } sub _find_in_ball { my ($vs, $t, $z, $d2, $but) = @_; my (@queue, @r); my $r = 0; while (1) { if (defined (my $axis = $t->[_axis])) { my $c = $z->[$axis]; my $cut = $t->[_cut]; ($t, my ($q)) = @{$t}[$c <= $cut ? (_s0, _s1) : (_s1, _s0)]; push @queue, $q if $z->dist2_to_box(@{$q}[_c0, _c1]) <= $d2; } else { my $ixs = $t->[_ixs]; if (wantarray) { push @r, grep { $vs->[$_]->dist2($z) <= $d2 } @$ixs; } else { $r += ( $but ? grep { !$but->{$_} and $vs->[$_]->dist2($z) <= $d2 } @$ixs : grep { $vs->[$_]->dist2($z) <= $d2 } @$ixs ); } $t = pop @queue or last; } } if (wantarray) { if ($but) { return grep !$but->{$_}, @r; } return @r; } return $r; } sub find_in_box { my ($self, $a, $b, $but) = @_; my ($a1, $b1) = Math::Vector::Real->box($a, $b); if (defined $but and ref $but ne 'HASH') { $but = { $but => 1 }; } _find_in_box($self->{vs}, $self->{tree}, $a1, $b1, $but); } sub _find_in_box { my ($vs, $t, $a, $b, $but) = @_; my (@r, $r); my @queue; while (1) { if (defined (my $axis = $t->[_axis])) { my $cut = $t->[_cut]; push @queue, $t->[_s0] if $cut >= $a->[$axis]; push @queue, $t->[_s1] if $cut <= $b->[$axis]; } else { my $ixs = $t->[_ixs]; if (wantarray) { push @r, grep { $vs->[$_]->dist2_to_box($a, $b) <= 0 } @$ixs; } else { $r += ( $but ? grep { !$but->{$_} and $vs->[$_]->dist2_to_box($a, $b) <= 0 } @$ixs : grep { $vs->[$_]->dist2_to_box($a, $b) <= 0 } @$ixs ); } } $t = pop @queue or last; } if (wantarray) { if ($but) { return grep !$but->{$_}, @r; } return @r; } return $r; } sub find_farthest_vector { my ($self, $v, $d, @but) = @_; my $t = $self->{tree} or return; my $vs = $self->{vs}; my $d2 = ($d ? $d * $d : -1); my $but; if (@but) { if (@but == 1 and ref $but[0] eq 'HASH') { $but = $but[0]; } else { my %but = map { $_ => 1 } @but; $but = \%but; } } my ($rix, $rd2) = _find_farthest_vector($vs, $t, $v, $d2, undef, $but); $rix // return; wantarray ? ($rix, sqrt($d2)) : $rix; } sub find_farthest_vector_internal { my ($self, $ix, $d) = @_; $ix >= 0 or croak "index out of range"; $self->find_farthest_vector($self->{vs}[$ix], $d, $ix); } sub _find_farthest_vector { my ($vs, $t, $v, $best_d2, $best_ix, $but) = @_; my @queue; my @queue_d2; while (1) { if (defined (my $axis = $t->[_axis])) { # substitute the current one by the best subtree and queue # the worst for later ($t, my ($q)) = @{$t}[($v->[$axis] >= $t->[_cut]) ? (_s0, _s1) : (_s1, _s0)]; my $q_d2 = $v->max_dist2_to_box(@{$q}[_c0, _c1]); if ($q_d2 >= $best_d2) { my $j; for ($j = $#queue_d2; $j >= 0; $j--) { last if $queue_d2[$j] <= $q_d2; } splice @queue, ++$j, 0, $q; splice @queue_d2, $j, 0, $q_d2; } } else { for (@{$t->[_ixs]}) { next if $but and $but->{$_}; my $d21 = $vs->[$_]->dist2($v); if ($d21 >= $best_d2) { $best_d2 = $d21; $best_ix = $_; } } if ($t = pop @queue) { if ($best_d2 <= pop @queue_d2) { next; } } return ($best_ix, $best_d2); } } } sub find_random_vector { my $self = shift; my $t = $self->{tree} or return; my $vs = $self->{vs}; my $hidden = $self->{hidden}; if (not $hidden or @$vs > 20 * keys(%$hidden)) { # pick directly when the hidden elements are less than 5% of the total while (1) { my $ix = int rand @$vs; return $ix unless $hidden and $hidden->{$ix}; } } _find_random_vector($vs, $t); } sub _find_random_vector { my ($vs, $t) = @_; while (defined $t->[_axis]) { $t = $t->[rand($t->[_n]) < $t->[_s0][_n] ? _s0 : _s1]; } $t->[_ixs][rand $t->[_n]] } sub k_means_seed { my ($self, $n_req) = @_; $n_req = int($n_req) or return; my $t = $self->{tree} or return; my $vs = $self->{vs}; _k_means_seed($vs, $t, $n_req); } *k_means_start = \&k_means_seed; sub _k_means_seed { my ($vs, $t, $n_req) = @_; if ($n_req <= 1) { return if $n_req < 1; # print STDERR "returning centroid\n"; return $t->[_sum] / $t->[_n]; } else { my $n = $t->[_n]; if (defined $t->[_axis]) { my ($s0, $s1) = @{$t}[_s0, _s1]; my $n0 = $s0->[_n]; my $n1 = $s1->[_n]; my $n0_req = int(0.5 + $n_req * ($n0 / $n)); $n0_req = $n0 if $n0_req > $n0; return (_k_means_seed($vs, $s0, $n0_req), _k_means_seed($vs, $s1, $n_req - $n0_req)); } else { my $ixs = $t->[_ixs]; my @out; for (0..$#$ixs) { push @out, $vs->[$ixs->[$_]] if rand($n - $_) < ($n_req - @out); } # print STDERR "asked for $n_req elements, returning ".scalar(@out)."\n"; return @out; } } } our $k_means_seed_pp_test; sub _k_means_seed_pp_test { my ($self, $err, $kms, $players, $weights) = @_; my @w; my $last = 0; for my $i (0..$#$players) { my $p = $players->[$i]; my $w = $weights->[$i] - $last; $last = $weights->[$i]; my @store; if (ref $p) { _push_all($p, \@store); } else { @store = $p } if (@store) { $w /= @store; $w[$_] = $w for @store; } } my $vs = $self->{vs}; $w[$_] //= 0 for 0..$#$vs; $k_means_seed_pp_test->($self, $err, [map $self->{vs}[$_], @$kms], \@w); } sub k_means_seed_pp { my ($self, $n_req, $err) = @_; $n_req = int($n_req) or return; $err ||= 0.5; my $t = $self->{tree} or return; my $vs = $self->{vs}; my $km = $self->find_random_vector; my (@km, @d2); idhash my %extra; # [$min_d2, $max_d2] # my (@player, @weight, @queue); # $#player = @$vs; # preallocate memory my (@weight, @queue); $#weight = @$vs; # preallocate memory while (1) { push @km, $km; last unless @km < $n_req; # update distances @queue = $t; while (my $p = pop @queue) { my $kmv = $vs->[$km]; my ($c0, $c1) = @{$p}[_c0, _c1]; my $extra = $extra{$p} //= ['inf', 'inf']; my ($min_d2, $max_d2) = @$extra; my $min_d2_to_box = $kmv->dist2_to_box($c0, $c1); if ($max_d2 > $min_d2_to_box) { if (defined $p->[_axis]) { push @queue, @{$p}[_s0, _s1]; } else { for (@{$p->[_ixs]}) { my $d2 = $kmv->dist2($vs->[$_]); if ($d2 < ($d2[$_] //= $d2)) { $d2[$_] = $d2; } } } if ($min_d2_to_box < $min_d2) { $extra->[0] = $min_d2_to_box; } my $max_d2_to_box = $kmv->max_dist2_to_box($c0, $c1); if ($max_d2_to_box < $max_d2) { $extra->[1] = $max_d2_to_box; } } } # find players and weight them my $weight = 0; # @player = (); @weight = (); # @queue = $t; # while (my $p = pop @queue) { # my $extra = $extra{$p} or die "internal error: extra information missing for $p"; # my ($min_d2, $max_d2) = @$extra; # if ($max_d2 * $err < $min_d2) { # $weight += $p->[_n] * ($min_d2 + $max_d2) * 0.5; # push @weight, $weight; # push @player, $p; # } # else { # if (defined $p->[_axis]) { # push @queue, @{$p}[_s0, _s1]; # } # else { # for (@{$p->[_ixs]}) { # if (my $d2 = $d2[$_]) { # $weight += $d2; # push @weight, $weight; # push @player, $_; # } # } # } # } # } for my $ix (0..@$vs) { $weight += $d2[$ix] // 0; $weight[$ix] += $weight; } # in order to check the algorithm we have to tap it here # $k_means_seed_pp_test and @km > 1 and # $self->_k_means_seed_pp_test($err, \@km, \@player, \@weight); # to many k-means requested? # @player or last; # select a position on the weight queue: my $dice = rand($weight); # and use binary search to look for it: my $i = 0; my $j = @weight; while ($i < $j) { my $pivot = (($i + $j) >> 1); if ($weight[$pivot] < $dice) { $i = $pivot + 1; } else { $j = $pivot; } } #my $player = $player[$i]; #$km = (ref $player ? _find_random_vector($vs, $player) : $player); $km = $i; } return @{$vs}[@km]; } sub k_means_loop { my ($self, @k) = @_; @k or next; my $t = $self->{tree} or next; my $vs = $self->{vs}; while (1) { my $diffs; my @n = ((0) x @k); my @sum = ((undef) x @k); _k_means_step($vs, $t, \@k, [0..$#k], \@n, \@sum); for (0..$#k) { if (my $n = $n[$_]) { my $k = $sum[$_] / $n; $diffs++ if $k != $k[$_]; $k[$_] = $k; } } unless ($diffs) { return (wantarray ? @k : $k[0]); } } } sub k_means_step { my $self = shift; @_ or return; my $t = $self->{tree} or return; my $vs = $self->{vs}; my @n = ((0) x @_); my @sum = ((undef) x @_); _k_means_step($vs, $t, \@_, [0..$#_], \@n, \@sum); for (0..$#n) { if (my $n = $n[$_]) { $sum[$_] /= $n; } else { # otherwise let the original value stay $sum[$_] = $_[$_]; } } wantarray ? @sum : $sum[0]; } sub _k_means_step { my ($vs, $t, $centers, $cixs, $ns, $sums) = @_; my ($n, $sum, $c0, $c1) = @{$t}[_n, _sum, _c0, _c1]; if ($n) { my $centroid = $sum/$n; my $best = nkeyhead { $centroid->dist2($centers->[$_]) } @$cixs; my $max_d2 = Math::Vector::Real::max_dist2_to_box($centers->[$best], $c0, $c1); my @down = grep { Math::Vector::Real::dist2_to_box($centers->[$_], $c0, $c1) <= $max_d2 } @$cixs; if (@down <= 1) { $ns->[$best] += $n; # FIXME: M::V::R objects should support this undef + vector logic natively! if (defined $sums->[$best]) { $sums->[$best] += $sum; } else { $sums->[$best] = V(@$sum); } } else { if (defined (my $axis = $t->[_axis])) { my ($s0, $s1) = @{$t}[_s0, _s1]; _k_means_step($vs, $t->[_s0], $centers, \@down, $ns, $sums); _k_means_step($vs, $t->[_s1], $centers, \@down, $ns, $sums); } else { for my $ix (@{$t->[_ixs]}) { my $v = $vs->[$ix]; my $best = nkeyhead { $v->dist2($centers->[$_]) } @down; $ns->[$best]++; if (defined $sums->[$best]) { $sums->[$best] += $v; } else { $sums->[$best] = V(@$v); } } } } } } sub k_means_assign { my $self = shift; @_ or return; my $t = $self->{tree} or return; my $vs = $self->{vs}; my @out = ((undef) x @$vs); _k_means_assign($vs, $t, \@_, [0..$#_], \@out); @out; } sub _k_means_assign { my ($vs, $t, $centers, $cixs, $outs) = @_; my ($n, $sum, $c0, $c1) = @{$t}[_n, _sum, _c0, _c1]; if ($n) { my $centroid = $sum/$n; my $best = nkeyhead { $centroid->dist2($centers->[$_]) } @$cixs; my $max_d2 = Math::Vector::Real::max_dist2_to_box($centers->[$best], $c0, $c1); my @down = grep { Math::Vector::Real::dist2_to_box($centers->[$_], $c0, $c1) <= $max_d2 } @$cixs; if (@down <= 1) { _k_means_assign_1($t, $best, $outs); } else { if (defined (my $axis = $t->[_axis])) { my ($s0, $s1) = @{$t}[_s0, _s1]; _k_means_assign($vs, $t->[_s0], $centers, \@down, $outs); _k_means_assign($vs, $t->[_s1], $centers, \@down, $outs); } else { for my $ix (@{$t->[_ixs]}) { my $v = $vs->[$ix]; my $best = nkeyhead { $v->dist2($centers->[$_]) } @down; $outs->[$ix] = $best; } } } } } sub _k_means_assign_1 { my ($t, $best, $outs) = @_; if (defined (my $axis = $t->[_axis])) { _k_means_assign_1($t->[_s0], $best, $outs); _k_means_assign_1($t->[_s1], $best, $outs); } else { $outs->[$_] = $best for @{$t->[_ixs]}; } } sub ordered_by_proximity { my $self = shift; my @r; $#r = $#{$self->{vs}}; $#r = -1; # preallocate _ordered_by_proximity($self->{tree}, \@r); return @r; } sub _ordered_by_proximity { my $t = shift; my $r = shift; if (defined $t->[_axis]) { _ordered_by_proximity($t->[_s0], $r); _ordered_by_proximity($t->[_s1], $r); } else { push @$r, @{$t->[_ixs]} } } sub _dump_to_string { my ($vs, $t, $indent, $opts) = @_; my ($n, $c0, $c1, $sum) = @{$t}[_n, _c0, _c1, _sum]; my $id = ($opts->{pole_id} ? _pole_id($vs, $t)." " : ''); if (defined (my $axis = $t->[_axis])) { my ($s0, $s1, $cut) = @{$t}[_s0, _s1, _cut]; return ( "${indent}${id}n: $n, c0: $c0, c1: $c1, sum: $sum, axis: $axis, cut: $cut\n" . _dump_to_string($vs, $s0, "$indent$opts->{tab}", $opts) . _dump_to_string($vs, $s1, "$indent$opts->{tab}", $opts) ); } else { my $remark = $opts->{remark} // []; my $o = ( "${indent}${id}n: $n, c0: $c0, c1: $c1, sum: $sum\n" . "${indent}$opts->{tab}ixs: [" ); my @str; for my $ix (@{$t->[_ixs]}) { my $colored_ix = (@$remark and grep($ix == $_, @$remark) ? Term::ANSIColor::colored($ix, 'red') : $ix); if ($opts->{dump_vectors} // 1) { push @str, "$colored_ix $vs->[$ix]"; } else { push @str, $colored_ix; } } return $o . join(', ', @str) . "]\n"; } } sub dump_to_string { my ($self, %opts) = @_; my $tab = $opts{tab} //= ' '; my $vs = $self->{vs}; my $nvs = @$vs; my $hidden = join ", ", keys %{$self->{hidden} || {}}; my $o = "tree: n: $nvs, hidden: {$hidden}\n"; if (my $t = $self->{tree}) { require Term::ANSIColor if $opts{remark}; return $o . _dump_to_string($vs, $t, $tab, \%opts); } else { return "$o${tab}(empty)\n"; } } sub dump { my $self = shift; print $self->dump_to_string(@_); } 1; __END__ =head1 NAME Math::Vector::Real::kdTree - kd-Tree implementation on top of Math::Vector::Real =head1 SYNOPSIS use Math::Vector::Real::kdTree; use Math::Vector::Real; use Math::Vector::Real::Random; my @v = map Math::Vector::Real->random_normal(4), 1..1000; my $tree = Math::Vector::Real::kdTree->new(@v); my $ix = $tree->find_nearest_vector(V(0, 0, 0, 0)); say "nearest vector is $ix, $v[$ix]"; =head1 DESCRIPTION This module implements a kd-Tree data structure in Perl and common algorithms on top of it. =head2 Methods The following methods are provided: =over 4 =item $t = Math::Vector::Real::kdTree->new(@points) Creates a new kd-Tree containing the given points. =item $t2 = $t->clone Creates a duplicate of the tree. The two trees will share internal read only data so this method is more efficient in terms of memory usage than others performing a deep copy. =item my $ix = $t->insert($p0, $p1, ...) Inserts the given points into the kd-Tree. Returns the index assigned to the first point inserted. =item $s = $t->size Returns the number of points inside the tree. =item $p = $t->at($ix) Returns the point at the given index inside the tree. =item $t->move($ix, $p) Moves the point at index C<$ix> to the new given position readjusting the tree structure accordingly. =item ($ix, $d) = $t->find_nearest_vector($p, $max_d, @but_ix) =item ($ix, $d) = $t->find_nearest_vector($p, $max_d, \%but_ix) Find the nearest vector for the given point C<$p> and returns its index and the distance between the two points (in scalar context the index is returned). If C<$max_d> is defined, the search is limited to the points within that distance Optionally, a list of point indexes to be excluded from the search can be passed or, alternatively, a reference to a hash containing the indexes of the points to be excluded. =item @ix = $t->find_nearest_vector_all_internal Returns the index of the nearest vector from the tree. It is equivalent to the following code (though, it uses a better algorithm): @ix = map { scalar $t->nearest_vector($t->at($_), undef, $_) } 0..($t->size - 1); =item $ix = $t->find_nearest_vector_in_box($p, $a, $b, $max_d, @but_ix) =item $ix = $t->find_nearest_vector_in_box($p, $a, $b, $max_d, \%but_ix) Returns the nearest vector for the given point from those that are also inside the box defined by C<$a> and C<$b>. The other arguments have the same meaning as for the method C. =item $ix = $t->find_nearest_vector_in_box_chebyshev($p, $a, $b, $max_d, @but_ix) =item $ix = $t->find_nearest_vector_in_box_chebyshev($p, $a, $b, $max_d, \%but_ix) This method is similar to C but using the Chebyshev metric. =item $ix = $t->find_farthest_vector($p, $min_d, @but_ix) Find the point from the tree farthest from the given C<$p>. The optional argument C<$min_d> specifies a minimal distance. Undef is returned when not point farthest that it is found. C<@but_ix> specifies points that should not be considered when looking for the farthest point. =item $ix = $t->find_farthest_vector_internal($ix, $min_d, @but_ix) Given the index of a point on the tree this method returns the index of the farthest vector also from the tree. =item ($ix0, $ix1, $d) = $t->find_two_nearest_vectors This method returns the indexes of two vectors from the three such that the distance between them is minimal. The distance is returned as the third output value. In scalar context, just the distance is returned. =item @k = $t->k_means_seed($n) This method uses the internal tree structure to generate a set of point that can be used as seeds for other C methods. There isn't any guarantee on the quality of the generated seeds, but the used algorithm seems to perform well in practice. =item @k = $t->k_means_step(@k) Performs a step of the L for k-means calculation. =item @k = $t->k_means_loop(@k) Iterates until the Lloyd's algorithm converges and returns the final means. =item @ix = $t->k_means_assign(@k) Returns for every point in the three the index of the cluster it belongs to. =item @ix = $t->find_in_ball($z, $d, $but) =item $n = $t->find_in_ball($z, $d, $but) Finds the points inside the tree contained in the hypersphere with center C<$z> and radius C<$d>. In scalar context returns the number of points found. In list context returns the indexes of the points. If the extra argument C<$but> is provided. The point with that index is ignored. =item @ix = $t->find_in_box($a, $b, $but) =item $n = $t->find_in_box($a, $b, $but) Finds the points inside the tree contained in the axis-aligned box defined by two opposite vertices C<$a> and C<$b>. In scalar context returns the number of points found. In list context returns the indexes of the points. If the extra argument C<$but> is provided. The point with that index is ignored. =item @ix = $t->ordered_by_proximity Returns the indexes of the points in an ordered where is likely that the indexes of near vectors are also in near positions in the list. =back =head2 k-means The module can be used to calculate the k-means of a set of vectors as follows: # inputs my @v = ...; my $k = ...; # k-mean calculation my $t = Math::Vector::Real::kdTree->new(@v); my @means = $t->k_means_seed($k); @means = $t->k_means_loop(@means); @assign = $t->k_means_assign(@means); my @cluster = map [], 1..$k; for (0..$#assign) { my $cluster_ix = $assign[$_]; my $cluster = $cluster[$cluster_ix]; push @$cluster, $t->at($_); } use Data::Dumper; print Dumper \@cluster; =head1 SEE ALSO L. L. L =head1 COPYRIGHT AND LICENSE Copyright (C) 2011-2015 by Salvador FandiEo Esfandino@yahoo.comE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.12.3 or, at your option, any later version of Perl 5 you may have available. =cut Math-Vector-Real-kdTree-0.15/META.yml0000664000175000017500000000110312513224505016130 0ustar salvasalva--- abstract: 'kd-Tree implementation on top of Math::Vector::Real' author: - 'Salvador Fandiño ' build_requires: ExtUtils::MakeMaker: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.150001' license: unknown meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Math-Vector-Real-kdTree no_index: directory: - t - inc requires: Math::Vector::Real: '0.16' Sort::Key::Top: '0.08' version: '0.15' Math-Vector-Real-kdTree-0.15/META.json0000664000175000017500000000171112513224505016305 0ustar salvasalva{ "abstract" : "kd-Tree implementation on top of Math::Vector::Real", "author" : [ "Salvador Fandiño " ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 7.04, CPAN::Meta::Converter version 2.150001", "license" : [ "unknown" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Math-Vector-Real-kdTree", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "Math::Vector::Real" : "0.16", "Sort::Key::Top" : "0.08" } } }, "release_status" : "stable", "version" : "0.15" }