RcppAnnoy/ 0000755 0001762 0000144 00000000000 15131171252 012161 5 ustar ligges users RcppAnnoy/tests/ 0000755 0001762 0000144 00000000000 15037214756 013337 5 ustar ligges users RcppAnnoy/tests/tinytest.R 0000644 0001762 0000144 00000000232 15037214756 015342 0 ustar ligges users if (requireNamespace("tinytest", quietly=TRUE)) {
set.seed(42) # Set a seed to make the test deterministic
tinytest::test_package("RcppAnnoy")
}
RcppAnnoy/MD5 0000644 0001762 0000144 00000004501 15131171252 012471 0 ustar ligges users a43ff1d8733252cf2f5e6378b9e1a6b0 *ChangeLog
b11974ab08d58c3cde5740573f0500d7 *DESCRIPTION
dae4f8ca1beeb667ee6121de3c975d2c *NAMESPACE
7ae968c5de1e6252a2c1ab5a6568517d *R/RcppExports.R
7514e414b0e89bf1533417a714243518 *R/annoy.R
4d86ce7f605efd09238d1259c3edf71b *R/version.R
30a96085112d8ac88486f48c22163cd4 *README.md
f386779b34155e79f467f3703f65213d *build/vignette.rds
febc42ec3b6fd6c342a9fd8d38f20cd0 *cleanup
b9f392bb4c54af8b21cf529aca47bbb8 *demo/00Index
2dbd8f45f0fba5eea1284ac50ad8984a *demo/irisExample.R
626d1ee412f1f366f860e2fa1b7e15a1 *demo/simpleExample.R
fb6df02390e9a84cf135c6e38c3a2621 *inst/NEWS.Rd
bb51c8add2af3e7a174c1c84db46f2c3 *inst/doc/UsingAnnoyInCpp.pdf
609af8779f245aa9cabadd01378bdd41 *inst/doc/UsingAnnoyInCpp.pdf.asis
17668ef628922f5e2a5a50b934c11d74 *inst/include/RcppAnnoy.h
1fc7d61f9386cf81b7e7be80ab27dd4a *inst/include/annoylib.h
d71f69b770dcc346f36da24381e45814 *inst/include/kissrandom.h
bb5e4ec24ecaed6c71be0d76836eedba *inst/include/mman.h
31469ffae9d97a5c2db428d7a4f9ead2 *inst/rmd/UsingAnnoyInCpp.Rmd
59f4b2571897b164e2f6cceeb6f75555 *inst/rmd/rcppannoy.bib
14c740fb1a1c4d78afc7d3b76641d9e6 *inst/tinytest/data/test.tree
4bb99f63e5ba7c7ae5b730625cf8e2eb *inst/tinytest/testAngular.R
4aae48f11cb4f25bdf0fdcc82ae35fef *inst/tinytest/testDotProduct.R
56f9fece1ea637c58c269f6b530712b5 *inst/tinytest/testEuclidean.R
1ec108d7dc0af03ced95ccf131c6daf0 *inst/tinytest/testHamming.R
7d8dae4a58700885f9f424d9f38c63aa *inst/tinytest/testIndex.R
bfc5aa0efdfe1fde02cc8fdb51e6b447 *inst/tinytest/testManhattan.R
15e6bfe848de489a6e6e0b972980c159 *inst/tinytest/testOnDiskBuild.R
c31e274a819e1b945f2fcef39c3e9d51 *inst/tinytest/testSeeds.R
ebb89d9c7e2a0af6a3ca60edfd328a54 *inst/tinytest/testVignette.R
df2116ebcbed31030d069c79c47f26a0 *man/AnnoyIndex.Rd
cbe66b243d9b4737c44ce13d427c2554 *man/RcppAnnoy-package.Rd
5ac2065ddceaf546aa75a251678ac58e *man/getAnnoyVersion.Rd
eb1a07c50cc3ab66aad1f84abf7cd18c *man/getArchictectureStatus.Rd
ea3b103fe89cb728f9d81adc5c2a1380 *src/Makevars
20d086e35aec627fea427ae2a810a4db *src/RcppExports.cpp
3fe2f1942958f63e8f5070916e941cab *src/annoy.cpp
f61d5764f4ba6cf6e90127398a30e7be *src/arch.cpp
10f6597510bb81d9b8bbbb5735eae48f *src/init.c
95527d46e099ce463e3654a399b64b6f *src/version.cpp
4259ac998e053bf1c769663d5298dfd5 *tests/tinytest.R
609af8779f245aa9cabadd01378bdd41 *vignettes/UsingAnnoyInCpp.pdf.asis
RcppAnnoy/R/ 0000755 0001762 0000144 00000000000 14725061646 012377 5 ustar ligges users RcppAnnoy/R/RcppExports.R 0000644 0001762 0000144 00000001064 14653660622 015013 0 ustar ligges users # Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#' Report CPU Architecture and Compiler
#'
#' @return A constant direct created at compile-time describing
#' the extent of AVX instructions (512 bit, 128 bit, or none)
#' and compiler use where currently recognised are MSC (unlikely
#' for R), GCC, Clang, or \sQuote{other}.
getArchictectureStatus <- function() {
.Call(`_RcppAnnoy_getArchictectureStatus`)
}
.annoy_version <- function() {
.Call(`_RcppAnnoy_annoy_version`)
}
RcppAnnoy/R/version.R 0000644 0001762 0000144 00000001071 13766143727 014213 0 ustar ligges users #' Get the Annoy library version
#'
#' Get the version of the Annoy C++ library that RcppAnnoy was compiled with.
#'
#' @param compact Logical scalar indicating whether a compact
#' \code{\link{package_version}} should be returned.
#'
#' @return An integer vector containing the major, minor and patch version numbers;
#' or if \code{compact=TRUE}, a \code{\link{package_version}} object.
#'
#' @author Aaron Lun
getAnnoyVersion <- function(compact=FALSE) {
v <- .annoy_version()
if (compact) as.package_version(paste(unname(v), collapse = "."))
else v
}
RcppAnnoy/R/annoy.R 0000644 0001762 0000144 00000013324 14725061646 013651 0 ustar ligges users #' @name AnnoyIndex
#'
#' @aliases
#' AnnoyEuclidean Rcpp_AnnoyEuclidean-class Rcpp_AnnoyEuclidean
#' AnnoyAngular Rcpp_AnnoyAngular-class Rcpp_AnnoyAngular
#' AnnoyManhattan Rcpp_AnnoyManhattan-class Rcpp_AnnoyManhattan
#' AnnoyHamming Rcpp_AnnoyHamming-class Rcpp_AnnoyHamming
#' AnnoyDotProduct Rcpp_AnnoyDotProduct-class Rcpp_AnnoyDotProduct
#'
#' @title Approximate Nearest Neighbors with Annoy
#'
#' @description
#' Annoy is a small library written to provide fast and memory-efficient
#' nearest neighbor lookup from a possibly static index which can be
#' shared across processes.
#'
#' @section Usage:
#' \preformatted{
#' a <- new(AnnoyEuclidean, vectorsz)
#'
#' a$setSeed(0)
#' a$setVerbose(0)
#'
#' a$addItem(i, dv)
#'
#' a$getNItems()
#'
#' a$getItemsVector(i)
#' a$getDistance(i, j)
#'
#' a$build(n_trees)
#'
#' a$getNNsByItem(i, n)
#' a$getNNsByItemList(i, n, search_k, include_distances)
#'
#' a$getNNsByVector(v, n)
#' a$getNNsByVectorList(v, n, search_k, include_distances)
#'
#' a$save(fn)
#' a$load(fn)
#' a$unload()
#' }
#'
#' @section Details:
#'
#' \code{new(Class, vectorsz)}
#' Create a new Annoy instance of type \code{Class} where \code{Class}
#' is on of the following:
#' \code{AnnoyEuclidean},
#' \code{AnnoyAngular},
#' \code{AnnoyManhattan},
#' \code{AnnoyHamming}.
#' \code{vectorsz} denotes the length of the vectors that the Annoy instance
#' will be indexing.
#'
#' \code{$addItem(i, v)}
#' Adds item \code{i} (any nonnegative integer) with vector \code{v}.
#' Note that it will allocate memory for \code{max(i) + 1} items.
#'
#' \code{$build(n_trees)}
#' Builds a forest of \code{n_trees} trees.
#' More trees gives higher precision when querying.
#' After calling \code{build}, no more items can be added.
#'
#' \code{$save(fn)}
#' Saves the index to disk as filename \code{fn}.
#' After saving, no more items can be added.
#'
#' \code{$load(fn)}
#' Loads (mmaps) an index from filename \code{fn} on disk.
#'
#' \code{$unload()}
#' Unloads index.
#'
#' \code{$getDistance(i, j)}
#' Returns the distance between items \code{i} and \code{j}
#'
#' \code{$getNNsByItem(i, n)}
#' Returns the \code{n} closest items as an integer vector of indices.
#'
#' \code{$getNNsByVector(v, n)}
#' Same as \code{$getNNsByItem}, but queries by vector \code{v} rather than
#' index \code{i}.
#'
#' \code{$getNNsByItemList(i, n, search_k = -1, include_distances = FALSE)}
#' Returns the n closest items to item \code{i} as a list.
#' During the query it will inspect up to \code{search_k} nodes which
#' defaults to \code{n_trees * n} if not provided.
#' \code{search_k} gives you a run-time tradeoff between better accuracy and
#' speed.
#' If you set \code{include_distances} to \code{TRUE},
#' it will return a length 2 list with elements \code{"item"} &
#' \code{"distance"}.
#' The \code{"item"} element contains the \code{n} closest items as an integer
#' vector of indices.
#' The optional \code{"distance"} element contains the corresponding distances
#' to \code{"item"} as a numeric vector.
#'
#' \code{$getNNsByVectorList(i, n, search_k = -1, include_distances = FALSE)}
#' Same as \code{$getNNsByItemList}, but queries by vector \code{v} rather than
#' index \code{i}
#'
#' \code{$getItemsVector(i)}
#' Returns the vector for item \code{i} that was previously added.
#'
#' \code{$getNItems()}
#' Returns the number of items in the index.
#'
#' \code{$setVerbose()}
#' If \code{1} then messages will be printed during processing.
#' If \code{0} then messages will be suppressed during processing.
#'
#' \code{$setSeed()}
#' Set random seed for annoy (integer).
#'
#' @examples
#' library(RcppAnnoy)
#'
#' # BUILDING ANNOY INDEX ---------------------------------------------------------
#' vector_size <- 10
#' a <- new(AnnoyEuclidean, vector_size)
#'
#' a$setSeed(42)
#'
#' # Turn on verbose status messages (0 to turn off)
#' a$setVerbose(1)
#'
#' # Load 100 random vectors into index
#' for (i in 1:100) a$addItem(i - 1, runif(vector_size)) # Annoy uses zero indexing
#'
#' # Display number of items in index
#' a$getNItems()
#'
#' # Retrieve item at postition 0 in index
#' a$getItemsVector(0)
#'
#' # Calculate distance between items at postitions 0 & 1 in index
#' a$getDistance(0, 1)
#'
#' # Build forest with 50 trees
#' a$build(50)
#'
#'
#' # PERFORMING ANNOY SEARCH ------------------------------------------------------
#'
#' # Retrieve 5 nearest neighbors to item 0
#' # Returned as integer vector of indices
#' a$getNNsByItem(0, 5)
#'
#' # Retrieve 5 nearest neighbors to item 0
#' # search_k = -1 will invoke default search_k value of n_trees * n
#' # Return results as list with an element for distance
#' a$getNNsByItemList(0, 5, -1, TRUE)
#'
#' # Retrieve 5 nearest neighbors to item 0
#' # search_k = -1 will invoke default search_k value of n_trees * n
#' # Return results as list without an element for distance
#' a$getNNsByItemList(0, 5, -1, FALSE)
#'
#'
#' v <- runif(vector_size)
#' # Retrieve 5 nearest neighbors to vector v
#' # Returned as integer vector of indices
#' a$getNNsByVector(v, 5)
#'
#' # Retrieve 5 nearest neighbors to vector v
#' # search_k = -1 will invoke default search_k value of n_trees * n
#' # Return results as list with an element for distance
#' a$getNNsByVectorList(v, 5, -1, TRUE)
#'
#' # SAVING/LOADING ANNOY INDEX ---------------------------------------------------
#'
#' # Create a tempfile, replace with a local file to keep
#' treefile <- tempfile(pattern="annoy", fileext=".tree")
#'
#' # Save annoy tree to disk
#' a$save(treefile)
#'
#' # Load annoy tree from disk
#' a$load(treefile)
#'
#' # Unload index from memory
#' a$unload()
NULL
## ensure module gets loaded
loadModule("AnnoyAngular", TRUE)
loadModule("AnnoyEuclidean", TRUE)
loadModule("AnnoyManhattan", TRUE)
loadModule("AnnoyHamming", TRUE)
loadModule("AnnoyDotProduct", TRUE)
RcppAnnoy/cleanup 0000755 0001762 0000144 00000000245 15131164023 013535 0 ustar ligges users #!/bin/sh
rm -rf *~ */*~ src/*.o src/*.so src/*.dll src/*.dylib src/symbols.rds \
vignettes/jss.bst vignettes/pinp.cls vignettes/auto \
vignettes/annoy.index
RcppAnnoy/demo/ 0000755 0001762 0000144 00000000000 15005765312 013113 5 ustar ligges users RcppAnnoy/demo/simpleExample.R 0000644 0001762 0000144 00000001046 12427546253 016052 0 ustar ligges users
## cf the simple example at https://github.com/spotify/annoy
library(RcppAnnoy)
set.seed(123) # be reproducible
f <- 40
a <- new(AnnoyEuclidean, f)
n <- 50 # not specified
for (i in seq(n)) {
v <- rnorm(f)
a$addItem(i-1, v)
}
a$build(50) # 50 trees
a$save("/tmp/test.tree")
b <- new(AnnoyEuclidean, f) # new object, could be in another process
b$load("/tmp/test.tree") # super fast, will just mmap the file
print(b$getNNsByItem(0, 40))
RcppAnnoy/demo/irisExample.R 0000644 0001762 0000144 00000003071 14725062313 015520 0 ustar ligges users
## Suggested by @SamGG in https://github.com/eddelbuettel/rcppannoy/issues/79#issuecomment-2518597494
library(RcppAnnoy)
# IRIS EXAMPLE -----------------------------------------------------------------
data(iris)
# Converts to numeric, ignoring the species
X <- as.matrix(iris[,-5])
# BuildinG index
a <- new(AnnoyEuclidean, ncol(X))
a$setSeed(42)
# Load dataset into index; Annoy uses zero indexing
for (i in 1:nrow(X))
a$addItem(i - 1, X[i,])
# Build forest with 20 trees
a$build(50)
# Reports about the forest
a$getNItems()
a$getNTrees()
# Performing search
k <- 5 # number of nearest neighbors
nn.index <- matrix(nrow = nrow(X), ncol = k)
for (i in 1:nrow(X))
nn.index[i,] <- a$getNNsByVector(X[i,], k)
# Annoy uses zero indexing, so index must be incremented
nn.index = nn.index + 1
# The first match is the query itself most of the time
plot(1:nrow(X), nn.index[,1])
# Explore the second nearest neighbor
opar = par(mfrow = c(2, 2))
for (i in 1:ncol(X))
plot(X[, i], X[nn.index[,2], i], xlab = colnames(X)[i], ylab = "nearest")
par(opar)
# Perform search with distance
k <- 5
nn.index <- matrix(nrow = nrow(X), ncol = k)
nn.distance <- matrix(nrow = nrow(X), ncol = k)
for (i in 1:nrow(X)) {
res <- a$getNNsByVectorList(X[i,], k, -1, TRUE)
nn.index[i,] <- res$item
nn.distance[i,] <- res$distance
}
# Annoy uses zero indexing, so index must be incremented
nn.index = nn.index + 1
# Explore distance to the second nearest neighbor
hist(nn.distance[,2], xlab = "Distance to the 2nd NN",
main = "Histogram of distance")
# Unload index from memory
a$unload()
rm(a)
RcppAnnoy/demo/00Index 0000644 0001762 0000144 00000000173 15005765312 014246 0 ustar ligges users simpleExample A first introductory example
irisExample A more comprehensive example using the 'iris' data set
RcppAnnoy/vignettes/ 0000755 0001762 0000144 00000000000 15131164023 014167 5 ustar ligges users RcppAnnoy/vignettes/UsingAnnoyInCpp.pdf.asis 0000644 0001762 0000144 00000000305 15037456253 020660 0 ustar ligges users %\VignetteIndexEntry{Using Annoy in C++}
%\VignetteKeywords{Rcpp, Annoy, R, Cpp, Approximate Nearest Neighbours}
%\VignettePackage{RcppAnnoy}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{Rcpp::asis}
RcppAnnoy/src/ 0000755 0001762 0000144 00000000000 15131164023 012746 5 ustar ligges users RcppAnnoy/src/annoy.cpp 0000644 0001762 0000144 00000034310 14653657263 014624 0 ustar ligges users
// RcppAnnoy -- Rcpp bindings to Annoy library for Approximate Nearest Neighbours
//
// Copyright (C) 2014 - 2023 Dirk Eddelbuettel
//
// This file is part of RcppAnnoy
//
// RcppAnnoy is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 2 of the License, or
// (at your option) any later version.
//
// RcppAnnoy is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with RcppAnnoy. If not, see .
// simple C++ modules to wrap to templated classes from Annoy
//
// uses annoylib.h (from Annoy) and provides R access via Rcpp
//
// Dirk Eddelbuettel, Nov 2014
#include "RcppAnnoy.h"
namespace Annoy {
template< typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy >
class Annoy
{
protected:
AnnoyIndex *ptr;
unsigned int vectorsz;
public:
Annoy(int n) : vectorsz(n) {
ptr = new AnnoyIndex(n);
}
~Annoy() { if (ptr != NULL) delete ptr; }
void addItem(S item, Rcpp::NumericVector dv) {
if (item < 0) Rcpp::stop("Inadmissible item value %d", item);
std::vector fv(dv.size());
std::copy(dv.begin(), dv.end(), fv.begin());
char *errormsg;
if (!ptr->add_item(item, &fv[0], &errormsg)) Rcpp::stop(errormsg);
}
void callBuild(int n) { ptr->build(n); }
void callUnbuild() { ptr->unbuild(); }
void callSave(std::string filename) { ptr->save(filename.c_str()); }
void callLoad(std::string filename) { ptr->load(filename.c_str()); }
void callUnload() { ptr->unload(); }
int getNItems() { return ptr->get_n_items(); }
int getNTrees() { return ptr->get_n_trees(); }
double getDistance(int i, int j) { return ptr->get_distance(i, j); }
void verbose(bool v) { ptr->verbose(v); }
void setSeed(int s) { ptr->set_seed(s); }
std::vector getNNsByItem(S item, size_t n) {
std::vector result;
ptr->get_nns_by_item(item, n, -1, &result, NULL);
return result;
}
Rcpp::List getNNsByItemList(S item, size_t n, int search_k, bool include_distances) {
if (include_distances) {
std::vector result;
std::vector distances;
ptr->get_nns_by_item(item, n, search_k, &result, &distances);
return Rcpp::List::create(Rcpp::Named("item") = result,
Rcpp::Named("distance") = distances);
} else {
std::vector result;
ptr->get_nns_by_item(item, n, search_k, &result, NULL);
return Rcpp::List::create(Rcpp::Named("item") = result);
}
}
std::vector getNNsByVector(std::vector dv, size_t n) {
std::vector fv(dv.size());
std::copy(dv.begin(), dv.end(), fv.begin());
std::vector result;
ptr->get_nns_by_vector(&fv[0], n, -1, &result, NULL);
return result;
}
Rcpp::List getNNsByVectorList(std::vector fv, size_t n, int search_k, bool include_distances) {
if (fv.size() != vectorsz) {
Rcpp::stop("fv.size() != vector_size");
}
if (include_distances) {
std::vector result;
std::vector distances;
ptr->get_nns_by_vector(&fv[0], n, search_k, &result, &distances);
return Rcpp::List::create(
Rcpp::Named("item") = result,
Rcpp::Named("distance") = distances);
} else {
std::vector result;
ptr->get_nns_by_vector(&fv[0], n, search_k, &result, NULL);
return Rcpp::List::create(Rcpp::Named("item") = result);
}
}
std::vector getItemsVector(S item) {
std::vector fv(vectorsz);
ptr->get_item(item, &fv[0]);
std::vector dv(fv.size());
std::copy(fv.begin(), fv.end(), dv.begin());
return dv;
}
bool onDiskBuild(std::string fname) {
char *errormsg;
if (!ptr->on_disk_build(fname.c_str(), &errormsg)) Rcpp::stop(errormsg);
return true;
}
};
}
typedef Annoy::Annoy AnnoyDotProduct;
typedef Annoy::Annoy AnnoyAngular;
typedef Annoy::Annoy AnnoyEuclidean;
typedef Annoy::Annoy AnnoyManhattan;
typedef Annoy::Annoy AnnoyHamming;
RCPP_EXPOSED_CLASS_NODECL(AnnoyDotProduct)
RCPP_MODULE(AnnoyDotProduct) {
Rcpp::class_("AnnoyDotProduct")
.constructor("constructor with integer count")
.method("addItem", &AnnoyDotProduct::addItem, "add item")
.method("build", &AnnoyDotProduct::callBuild, "build an index")
.method("unbuild", &AnnoyDotProduct::callUnbuild, "unbuild an index")
.method("save", &AnnoyDotProduct::callSave, "save index to file")
.method("load", &AnnoyDotProduct::callLoad, "load index from file")
.method("unload", &AnnoyDotProduct::callUnload, "unload index")
.method("getDistance", &AnnoyDotProduct::getDistance, "get distance between i and j")
.method("getNNsByItem", &AnnoyDotProduct::getNNsByItem,
"retrieve Nearest Neigbours given item")
.method("getNNsByItemList", &AnnoyDotProduct::getNNsByItemList,
"retrieve Nearest Neigbours given item")
.method("getNNsByVector", &AnnoyDotProduct::getNNsByVector,
"retrieve Nearest Neigbours given vector")
.method("getNNsByVectorList", &AnnoyDotProduct::getNNsByVectorList,
"retrieve Nearest Neigbours given vector")
.method("getItemsVector", &AnnoyDotProduct::getItemsVector, "retrieve item vector")
.method("getNItems", &AnnoyDotProduct::getNItems, "get number of items")
.method("getNTrees", &AnnoyDotProduct::getNTrees, "get number of trees")
.method("setVerbose", &AnnoyDotProduct::verbose, "set verbose")
.method("setSeed", &AnnoyDotProduct::setSeed, "set seed")
.method("onDiskBuild", &AnnoyDotProduct::onDiskBuild, "build in given file")
;
}
RCPP_EXPOSED_CLASS_NODECL(AnnoyAngular)
RCPP_MODULE(AnnoyAngular) {
Rcpp::class_("AnnoyAngular")
.constructor("constructor with integer count")
.method("addItem", &AnnoyAngular::addItem, "add item")
.method("build", &AnnoyAngular::callBuild, "build an index")
.method("unbuild", &AnnoyAngular::callUnbuild, "unbuild an index")
.method("save", &AnnoyAngular::callSave, "save index to file")
.method("load", &AnnoyAngular::callLoad, "load index from file")
.method("unload", &AnnoyAngular::callUnload, "unload index")
.method("getDistance", &AnnoyAngular::getDistance, "get distance between i and j")
.method("getNNsByItem", &AnnoyAngular::getNNsByItem,
"retrieve Nearest Neigbours given item")
.method("getNNsByItemList", &AnnoyAngular::getNNsByItemList,
"retrieve Nearest Neigbours given item")
.method("getNNsByVector", &AnnoyAngular::getNNsByVector,
"retrieve Nearest Neigbours given vector")
.method("getNNsByVectorList", &AnnoyAngular::getNNsByVectorList,
"retrieve Nearest Neigbours given vector")
.method("getItemsVector", &AnnoyAngular::getItemsVector, "retrieve item vector")
.method("getNItems", &AnnoyAngular::getNItems, "get number of items")
.method("getNTrees", &AnnoyAngular::getNTrees, "get number of trees")
.method("setVerbose", &AnnoyAngular::verbose, "set verbose")
.method("setSeed", &AnnoyAngular::setSeed, "set seed")
.method("onDiskBuild", &AnnoyAngular::onDiskBuild, "build in given file")
;
}
RCPP_EXPOSED_CLASS_NODECL(AnnoyEuclidean)
RCPP_MODULE(AnnoyEuclidean) {
Rcpp::class_("AnnoyEuclidean")
.constructor("constructor with integer count")
.method("addItem", &AnnoyEuclidean::addItem, "add item")
.method("build", &AnnoyEuclidean::callBuild, "build an index")
.method("unbuild", &AnnoyEuclidean::callUnbuild, "unbuild an index")
.method("save", &AnnoyEuclidean::callSave, "save index to file")
.method("load", &AnnoyEuclidean::callLoad, "load index from file")
.method("unload", &AnnoyEuclidean::callUnload, "unload index")
.method("getDistance", &AnnoyEuclidean::getDistance, "get distance between i and j")
.method("getNNsByItem", &AnnoyEuclidean::getNNsByItem,
"retrieve Nearest Neigbours given item")
.method("getNNsByItemList", &AnnoyEuclidean::getNNsByItemList,
"retrieve Nearest Neigbours given item")
.method("getNNsByVector", &AnnoyEuclidean::getNNsByVector,
"retrieve Nearest Neigbours given vector")
.method("getNNsByVectorList",&AnnoyEuclidean::getNNsByVectorList,
"retrieve Nearest Neigbours given vector")
.method("getItemsVector", &AnnoyEuclidean::getItemsVector, "retrieve item vector")
.method("getNItems", &AnnoyEuclidean::getNItems, "get number of items")
.method("getNTrees", &AnnoyEuclidean::getNTrees, "get number of trees")
.method("setVerbose", &AnnoyEuclidean::verbose, "set verbose")
.method("setSeed", &AnnoyEuclidean::setSeed, "set seed")
.method("onDiskBuild", &AnnoyEuclidean::onDiskBuild, "build in given file")
;
}
RCPP_EXPOSED_CLASS_NODECL(AnnoyManhattan)
RCPP_MODULE(AnnoyManhattan) {
Rcpp::class_("AnnoyManhattan")
.constructor("constructor with integer count")
.method("addItem", &AnnoyManhattan::addItem, "add item")
.method("build", &AnnoyManhattan::callBuild, "build an index")
.method("unbuild", &AnnoyManhattan::callUnbuild, "unbuild an index")
.method("save", &AnnoyManhattan::callSave, "save index to file")
.method("load", &AnnoyManhattan::callLoad, "load index from file")
.method("unload", &AnnoyManhattan::callUnload, "unload index")
.method("getDistance", &AnnoyManhattan::getDistance, "get distance between i and j")
.method("getNNsByItem", &AnnoyManhattan::getNNsByItem,
"retrieve Nearest Neigbours given item")
.method("getNNsByItemList", &AnnoyManhattan::getNNsByItemList,
"retrieve Nearest Neigbours given item")
.method("getNNsByVector", &AnnoyManhattan::getNNsByVector,
"retrieve Nearest Neigbours given vector")
.method("getNNsByVectorList",&AnnoyManhattan::getNNsByVectorList,
"retrieve Nearest Neigbours given vector")
.method("getItemsVector", &AnnoyManhattan::getItemsVector, "retrieve item vector")
.method("getNItems", &AnnoyManhattan::getNItems, "get number of items")
.method("getNTrees", &AnnoyManhattan::getNTrees, "get number of trees")
.method("setVerbose", &AnnoyManhattan::verbose, "set verbose")
.method("setSeed", &AnnoyManhattan::setSeed, "set seed")
.method("onDiskBuild", &AnnoyManhattan::onDiskBuild, "build in given file")
;
}
RCPP_EXPOSED_CLASS_NODECL(AnnoyHamming)
RCPP_MODULE(AnnoyHamming) {
Rcpp::class_("AnnoyHamming")
.constructor("constructor with integer count")
.method("addItem", &AnnoyHamming::addItem, "add item")
.method("build", &AnnoyHamming::callBuild, "build an index")
.method("unbuild", &AnnoyHamming::callUnbuild, "unbuild an index")
.method("save", &AnnoyHamming::callSave, "save index to file")
.method("load", &AnnoyHamming::callLoad, "load index from file")
.method("unload", &AnnoyHamming::callUnload, "unload index")
.method("getDistance", &AnnoyHamming::getDistance, "get distance between i and j")
.method("getNNsByItem", &AnnoyHamming::getNNsByItem,
"retrieve Nearest Neigbours given item")
.method("getNNsByItemList", &AnnoyHamming::getNNsByItemList,
"retrieve Nearest Neigbours given item")
.method("getNNsByVector", &AnnoyHamming::getNNsByVector,
"retrieve Nearest Neigbours given vector")
.method("getNNsByVectorList",&AnnoyHamming::getNNsByVectorList,
"retrieve Nearest Neigbours given vector")
.method("getItemsVector", &AnnoyHamming::getItemsVector, "retrieve item vector")
.method("getNItems", &AnnoyHamming::getNItems, "get number of items")
.method("getNTrees", &AnnoyHamming::getNTrees, "get number of trees")
.method("setVerbose", &AnnoyHamming::verbose, "set verbose")
.method("setSeed", &AnnoyHamming::setSeed, "set seed")
.method("onDiskBuild", &AnnoyHamming::onDiskBuild, "build in given file")
;
}
RcppAnnoy/src/RcppExports.cpp 0000644 0001762 0000144 00000001660 14653660171 015763 0 ustar ligges users // Generated by using Rcpp::compileAttributes() -> do not edit by hand
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#include "../inst/include/RcppAnnoy.h"
#include
using namespace Rcpp;
#ifdef RCPP_USE_GLOBAL_ROSTREAM
Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get();
Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
#endif
// getArchictectureStatus
std::string getArchictectureStatus();
RcppExport SEXP _RcppAnnoy_getArchictectureStatus() {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
rcpp_result_gen = Rcpp::wrap(getArchictectureStatus());
return rcpp_result_gen;
END_RCPP
}
// annoy_version
Rcpp::IntegerVector annoy_version();
RcppExport SEXP _RcppAnnoy_annoy_version() {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
rcpp_result_gen = Rcpp::wrap(annoy_version());
return rcpp_result_gen;
END_RCPP
}
RcppAnnoy/src/arch.cpp 0000644 0001762 0000144 00000001645 13753267105 014412 0 ustar ligges users
#include "RcppAnnoy.h"
#if defined(USE_AVX512)
#define AVX_INFO "Using 512-bit AVX instructions"
#elif defined(USE_AVX128)
#define AVX_INFO "Using 128-bit AVX instructions"
#else
#define AVX_INFO "Not using AVX instructions"
#endif
#if defined(_MSC_VER)
#define COMPILER_INFO "Compiled using MSC"
#elif defined(__GNUC__)
#define COMPILER_INFO "Compiled using GCC"
#elif defined(__clang__)
#define COMPILER_INFO "Compiled using Clang"
#else
#define COMPILER_INFO "Compiled on unknown platform"
#endif
#define ANNOY_DOC (COMPILER_INFO ". " AVX_INFO ".")
//' Report CPU Architecture and Compiler
//'
//' @return A constant direct created at compile-time describing
//' the extent of AVX instructions (512 bit, 128 bit, or none)
//' and compiler use where currently recognised are MSC (unlikely
//' for R), GCC, Clang, or \sQuote{other}.
// [[Rcpp::export]]
std::string getArchictectureStatus() {
return std::string(ANNOY_DOC);
}
RcppAnnoy/src/init.c 0000644 0001762 0000144 00000002533 14653657263 014105 0 ustar ligges users #include
#include
#include // for NULL
#include
/* FIXME:
Check these declarations against the C/Fortran source code.
*/
/* .Call calls */
extern SEXP _rcpp_module_boot_AnnoyDotProduct(void);
extern SEXP _rcpp_module_boot_AnnoyAngular(void);
extern SEXP _rcpp_module_boot_AnnoyEuclidean(void);
extern SEXP _rcpp_module_boot_AnnoyManhattan(void);
extern SEXP _rcpp_module_boot_AnnoyHamming(void);
extern SEXP _RcppAnnoy_getArchictectureStatus(void);
extern SEXP _RcppAnnoy_annoy_version(void);
static const R_CallMethodDef CallEntries[] = {
{"_rcpp_module_boot_AnnoyDotProduct",(DL_FUNC) &_rcpp_module_boot_AnnoyDotProduct,0},
{"_rcpp_module_boot_AnnoyAngular", (DL_FUNC) &_rcpp_module_boot_AnnoyAngular, 0},
{"_rcpp_module_boot_AnnoyEuclidean", (DL_FUNC) &_rcpp_module_boot_AnnoyEuclidean, 0},
{"_rcpp_module_boot_AnnoyManhattan", (DL_FUNC) &_rcpp_module_boot_AnnoyManhattan, 0},
{"_rcpp_module_boot_AnnoyHamming", (DL_FUNC) &_rcpp_module_boot_AnnoyHamming, 0},
{"_RcppAnnoy_getArchictectureStatus",(DL_FUNC) &_RcppAnnoy_getArchictectureStatus,0},
{"_RcppAnnoy_annoy_version",(DL_FUNC) &_RcppAnnoy_annoy_version,0},
{NULL, NULL, 0}
};
void R_init_RcppAnnoy(DllInfo *dll) {
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE);
}
RcppAnnoy/src/version.cpp 0000644 0001762 0000144 00000000544 13766143544 015163 0 ustar ligges users #include "RcppAnnoy.h"
// [[Rcpp::export(.annoy_version)]]
Rcpp::IntegerVector annoy_version() {
return Rcpp::IntegerVector::create(Rcpp::Named("major")=RCPPANNOY_VERSION_MAJOR,
Rcpp::Named("minor")=RCPPANNOY_VERSION_MINOR,
Rcpp::Named("patch")=RCPPANNOY_VERSION_PATCH);
}
RcppAnnoy/src/Makevars 0000644 0001762 0000144 00000000634 15131024050 014440 0 ustar ligges users
## Multithreaded indexing for Annoy (version 1.17 or later)
## requires this define to be set
# USE_MULTITTHREADING=-DANNOYLIB_MULTITHREADED_BUILD
## So to use multithreaded indexing, comment out the setting above
## Note that it may alter your result (or their order) slightly due
## to the use of multithreading which alters the sequence of RNG draws
PKG_CPPFLAGS = -I../inst/include/ ${USE_MULTITTHREADING}
RcppAnnoy/ChangeLog 0000644 0001762 0000144 00000051074 15131161754 013750 0 ustar ligges users 2026-01-12 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.23
* inst/include/RcppAnnoy.h: Idem
2026-01-11 Dirk Eddelbuettel
* src/Makevars: No longer need to (explicitly) request C++17
2026-01-10 Dirk Eddelbuettel
* inst/rmd/rcppannoy.bib: Updated references
* vignettes/UsingAnnoyInCpp.pdf: Regenerated under updated pinp
package to get improved DOI links
* DESCRIPTION (Description): No longer require
Additional_repositories as Rcpp 1.1.1 is on CRAN
2026-01-07 Dirk Eddelbuettel
* .github/workflows/ci.yaml: Switch to actions/checkout@v6
2025-11-23 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll micro version and date
2025-11-22 Amos Elberg
* inst/include/RcppAnnoy.h: Turn __ERROR_PRINTER_OVERRIDE__ into
function to quash a warning emitted from 64-bit integer vertex
2025-07-21 Dirk Eddelbuettel
* vignettes/UsingAnnoyInCpp.pdf: Regenerated
* inst/rmd/UsingAnnoyInCpp.Rmd: One linebreak-avoidance edit
* inst/rmd/rcppannoy.bib: Update references, add DOIs fields
2025-07-20 Dirk Eddelbuettel
* vignettes/UsingAnnoyInCpp.pdf.asis: Switch to 'asis' vignette builder
* DESCRIPTION (VignetteBuilder): Add 'VignetteBuilder: Rcpp' along
with versioned Imports: on version 1.1.0.1 or newer
* .github/workflows/ci.yaml: (Temporary) installation of Rcpp 1.1.0.1
in order to access the 'asis' vignette builder provided by it
* DESCRIPTION: (Temporary) addition of Additional_repositories to
ensure updated Rcpp is used during r-universe processing
* tests/tinytest.R: Small refresh and edit
2025-05-04 Dirk Eddelbuettel
* demo/00Index: Add missing newline
2025-03-08 Dirk Eddelbuettel
* .github/workflows/ci.yaml: Use r-ci action with embedded bootstrap
2024-12-07 Dirk Eddelbuettel
* demo/irisExample.R: New contributed demo
* demo/00Index: Indexed
* R/annoy.R: Remove duplicate example
* man/AnnoyIndex.Rd: Regenerated to update example
2024-12-04 Dirk Eddelbuettel
* R/annoy.R: Add missing dot to fileext argument in Rd part
* man/AnnoyIndex.Rd: Regenerated to update example
2024-09-06 Dirk Eddelbuettel
* DESCRIPTION (Authors@R): Added
2024-08-04 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll micro version
* inst/include/RcppAnnoy.h: Idem
* R/annoy.R: Add alias for AnnoyDotProduct
* man/AnnoyIndex.Rd: Re-rendered
* README.md: Switch two more http:// URLs to https://
2024-08-03 Benjamin James
* R/annoy.R: Add AnnoyDotProduct to namespace
* inst/tinytest/testDotProduct.R: Unit tests for 'AnnoyDotProduct'
* src/init.c: Added new dot product distance measure (via template)
* src/annoy.cpp: Added template and module for AnnoyDotProduct
2024-05-20 Dirk Eddelbuettel
* README.md: Use tinyverse.netlify.app for dependency badge
2024-02-23 Dirk Eddelbuettel
* .github/workflows/ci.yaml (jobs): Update to actions/checkout@v4,
add r-ci-setup actions
2024-01-23 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.22
* inst/include/RcppAnnoy.h: Idem
* man/RcppAnnoy-package.Rd: Remove reference to example left from
auto-generated stanza (to satisfy a CRAN request)
2023-07-02 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.21
* inst/include/RcppAnnoy.h: Idem
2023-07-01 Dirk Eddelbuettel
* src/Makevars (USE_MULTITTHREADING): Revert back to not enabling
multithreading by default to remain consistent with prior behaviour
2023-06-30 Dirk Eddelbuettel
* README.md: Add r-universe badge
2023-06-15 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll micro version
* inst/include/RcppAnnoy.h: Idem
* inst/include/annoylib.h: Sync with upstream 1.17.3
* src/annoy.cpp: Accomodate updated upstream changes
* inst/include/RcppAnnoy.h: Idem
* inst/rmd/UsingAnnoyInCpp.Rmd (Annoy): Idem
2023-03-17 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/RcppAnnoy.h: Idem
* src/Makevars: No longer restrict build to C++11, also turn on
multithreading and switch to C++17 (instead of minimum C++14)
2022-10-27 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.20
* src/init.c: And 'void' to six prototypes to make clang-15 happy
* inst/include/annoylib.h (set_error_from_errno): Use snprint to make
xcode/macos 14 happy
* .github/workflows/ci.yaml (jobs): Update to actions/checkout@v3
2021-11-30 Dirk Eddelbuettel
* README.md: Remove depcreated CI badge
* .travis/: Removed
2021-07-30 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.19
2021-04-13 Dirk Eddelbuettel
* DESCRIPTION (URL, BugRreports): Added to DESCRIPTION file
2020-12-25 Dirk Eddelbuettel
* .github/workflows/ci.yaml: Small tweaks to CI YAML file
2020-12-15 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.18
* src/version.cpp: Export as .annoy_version as we have an R wrapper
* .github/workflows/ci.yaml: Add CI runner using r-ci
* README.md: Add new badge
2020-12-06 Aaron Lun
* src/version.cpp: Helper function to report Annoy version
* src/init.c: Register helper
* R/version.R: R wrapper
* man/getAnnoyVersion.Rd: Documentation
2020-12-04 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/RcppAnnoy.h: Idem
* inst/include/annoylib.h: Upstream sync post PR #522
* inst/include/kissrandom.h: Idem
2020-11-23 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/RcppAnnoy.h: Idem
* .travis.yml: Switch to r-ci
2020-11-22 Dirk Eddelbuettel
* inst/tinytest/testVignette.R: New test file
* inst/rmd/: Moved from vignettes/rmd
* inst/rmd/UsingAnnoyInCpp.Rmd: Allow for index file
2020-11-22 Aaron Lun
* vignettes/rmd/UsingAnnoyInCpp.Rmd: Allow compilation of vignette
without adding more dependencies
2020-11-19 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/RcppAnnoy.h: Idem
* inst/include/RcppAnnoy.h: Additional typedefs for threading policy
* src/annoy.cpp: Threading policy typedef removed
2020-11-15 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.17
2020-11-12 Dirk Eddelbuettel
* inst/include/RcppAnnoy.h: New header file for includes and defines
* src/annoy.cpp: Use new header file RcppAnnoy.h
* inst/include/annoylib.h: Add a temporary define to identify version
2020-10-19 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* src/Makevars: Default to serial use (and C++11) but add comment to
detail how enable multithreaded indexing
* src/annoy.cpp: Default to serial use
* inst/tinytest/testSeeds.R: Re-enable test
2020-10-18 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/annoylib.h: Update to Annoy 1.17
* inst/include/mman.h: Idem
* src/annoy.cpp: Idem, also enable multithreaded build
* src/Makevars: Switch to C++14 for shared_timed_mutex
* .travis.yml: Switch Travis CI to bspm use and focal
* README.md: Update two URLs
* inst/tinytest/testSeeds.R: Disable (already optional) test on
seeding does not pass in multi-threaded mode
2020-05-30 Dirk Eddelbuettel
* README.md: Add 'last commit' badge
* .travis.yml: Switch to bionic and R 4.0.0
2020-03-12 Dirk Eddelbuettel
* README.md: Standardize header and badges
2020-03-06 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.16
2020-03-03 Dirk Eddelbuettel
* src/arch.cpp (getArchictectureStatus): Add simple helper function
to show AVX and compiler status now that pragma has been removed
2020-03-02 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/annoylib.h: Updated to upstream PR #462 which removes
the (optional and no longer needed) packing and one set of warnings
2020-03-01 Dirk Eddelbuettel
* inst/include/annoylib.h: Updated upstream PRs PRs #460 (which uses
int not size_t in one interface) and #461 (which inlines two helpers)
2020-02-27 Aaron Lun
* inst/include/annoylib.h: Replace two size_t interfaces with int
2020-02-25 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.15
* inst/include/annoylib.h: Use alloca() portably
2020-02-24 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/annoylib.h: Updated upstream master post PR455
* inst/include/mmap.h: Idem
* .travis.yml: Use r-cran-tinytest, no longer need edd/r-3.5
2019-11-12 Dirk Eddelbuettel
* R/annoy.R: Small help page correction thanks to Bill Venables
* man/AnnoyIndex.Rd: Idem
2019-11-11 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.14
2019-11-10 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* cleanup: Make safe for checkbashism by using only /bin/sh
* inst/include/annoylib.h: New upstream v1.16.2 (plus up to pr436)
2019-09-23 Dirk Eddelbuettel
* DESCRIPTION (Date, Version): Release 0.0.13
2019-09-22 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* src/annoy.cpp: Add unbuild() and onDiskBuild() functions
* inst/tinytest/testOnDiskBuild.R: Add tests for onDiskBuild()
* inst/tinytest/testIndex.R: Can now use tinytest::exit_file()
* inst/tinytest/testSeeds.R: Idem
* inst/include/annoylib.h: Use return code
* inst/include/mman.h: Only conditionally define ftruncate
* README.md: Small edits
2019-09-21 Peter Hickey
* vignettes/rmd/UsingAnnoyInCpp.Rmd: Remove spurious comma
2019-09-21 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/annoylib.h: New upstream v1.16 (plus up to pr410)
* inst/include/mman.h: Idem
* src/annoy.cpp: Add getNTrees(), use error message in addItem()
2019-09-15 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* R/annoy.R: Use a tempfile() in example to save + load an annoy tree
* man/AnnoyIndex.Rd: Ditto
2019-05-12 Dirk Eddelbuettel
* DESCRIPTION (Date, Version): Release 0.0.12
2019-05-11 Dirk Eddelbuettel
* vignettes/UsingAnnoyInCcppWrapper.Rnw: Vignette wrapper with tip of
the hat to Mark van der Loo for his January 2019 blog post
* vignettes/rmd/UsingAnnoyInCpp.Rmd (vignette): Moved
* vignettes/rmd/rcppannoy.bib: Idem
* DESCRIPTION (Suggests): Remove three packages needed for vignette
* .travis.yml (install): Idem
* .Rbuildignore: Exclude vignettes/rmd/ from build
2019-05-10 Dirk Eddelbuettel
* tests/tinytest.R: New test runner using tinytest
* inst/tinytest/testIndex.R: New test file using tinytest
* inst/tinytest/testAngular.R: Idem
* inst/tinytest/testEuclidean.R: Idem
* inst/tinytest/testHamming.R: Idem
* inst/tinytest/testManhattan.R: Idem
* .travis.yml (install): Add tinytest to Travis setup
* local/: Old RUnit test files in source but not in package
* .Rbuildignore: Exclude local/ from build
2019-05-06 Adam Spannbauer
* R/annoy.R: Added documentation
* man/AnnoyIndex.Rd: Rendered documentation
2019-04-12 Dirk Eddelbuettel
* inst/tests/runit.seeds.R (test01seeds): Simplified
2019-04-11 Dirk Eddelbuettel
* inst/tests/runit.seeds.R (test01seeds): Add tests
2019-04-11 James Melville
* vignettes/UsingAnnoyInCpp.Rmd: Document setSeed
2019-04-10 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* src/annoy.cpp: Support setting of seed for KISS RNG
2018-10-30 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.11
2018-10-29 Dirk Eddelbuettel
* inst/include/annoylib.h: Only define NOMINMAX if not defined
* vignettes/UsingAnnoyInCpp.Rmd: Simplified by having C++ snippets
typeset by pandoc instead of attempting OS-dependent compilation
2018-10-28 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/annoylib.h: New upstream version 1.13 past pr325
* inst/include/kissrandom.h: Idem
* vignettes/UsingAnnoyInCpp: Renamed vignette, one size_t use
2018-10-17 Dirk Eddelbuettel
* README.md: Added dependency count badge
2018-10-16 Dirk Eddelbuettel
* vignettes/UsingAnnoyInC++.Rmd: Renamed vignette, minor edits
* vignettes/rcppannoy.bib: Expanded, sorted (thanks, Emacs)
2018-10-14 Dirk Eddelbuettel
* vignettes/rcppannoy.Rmd: Minor edits
2018-10-09 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
2018-10-09 Aaron Lun
* vignettes/rcppannoy.Rmd: Minor fixes
2018-10-07 Dirk Eddelbuettel
* inst/include/annoylib.h: New upstream version 1.13 plus two PRs by
Aaron and Dirk, respectively.
* inst/include/mman.h: Idem
2018-10-06 Aaron Lun
* vignettes/rcppannoy.Rmd: New vignette on using Annoy from C++
* DESCRIPTION: Added required Suggests: and VignetteBuilder:
* .travis.yml: Added r-cran-knitr and r-cran-rmarkdown
2018-09-01 Dirk Eddelbuettel
* .travis.yml: Switch Travis CI to R 3.5 repo
2017-12-16 Dirk Eddelbuettel
* inst/include/kissrandom.h: New upstream version
* inst/include/annoylib.h: Idem; plus some small changes to avoid
g++ warnings, also sent upstream
* inst/tests/runit.euclidean.R: Relaxed one '<' comparison to '<='
* inst/tests/runit.manhattan.R: Idem
* src/init.c: Added new Hamming distance measure (via template)
* src/annoy.cpp: Idem; plus more use of template type
* inst/tests/runit.hamming.R: Unit tests for 'AnnoyHamming'
* man/RcppAnnoy-package.Rd: Documentation alias for 'AnnoyHamming'
* .Rbuildignore: Ignore top-level *tar.gz file
* .gitignore: Idem
2017-09-25 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.10
* inst/tests/runit.index.R (test03getVectors): New test function
* tests/doRUnit.R: Small edits and improvements
2017-09-23 Dirk Eddelbuettel
* src/annoy.cpp (getItemsVector): Initialized vector (#24)
2017-08-31 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.9
2017-07-21 Dirk Eddelbuettel
* inst/tests/runit.angular.R: Additional tests from upstream
* inst/tests/runit.euclidean.R: Idem
* inst/tests/runit.manhattan.R: Idem
2017-07-18 Dirk Eddelbuettel
* src/annoy.cpp (RCPP_MODULE): New Manhattan distance module
* inst/tests/runit.manhattan.R: New test file
* R/annoy.R: Load module AnnoyManhattan
* src/init.c: Register module boot function
* man/RcppAnnoy-package.Rd: Aliases for AnnoyManhattan
2017-07-17 Dirk Eddelbuettel
* inst/include/annoylib.h: New upstream version annoy 1.9.1
* inst/include/kissrandom.h: Idem
* src/annoy.cpp (Annoy): Update call to match updated interface
* inst/tests/runit.angular.R: Update three tests as the returned
distance metric is now the square root of the previous value
* .travis.yml (group): Added per Travis blog
2017-07-14 Dirk Eddelbuettel
* .travis.yml (before_install): Use https for curl fetch
2017-07-02 Dirk Eddelbuettel
* README.md: Use alternate for img.shields.io GPL-2+ badge
2017-07-01 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* inst/include/annoylib.h (>): Ensure we write as binary
2017-04-09 Dirk Eddelbuettel
* src/init.c (R_init_RcppAnnoy): Call R_registerRoutines()
and R_useDynamicSymbols()
* NAMESPACE: Use .registration=TRUE on useDynLib
2016-10-01 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Release 0.0.8
* .travis.yml: Switch to using run.sh for Travis CI
* README.md: More canonical URLs
* src/annoy.cpp: Use unsigned int for vector size
* src/annoy.cpp: Added index admissibility test to addItem()
* inst/tests/runit.index.R (test02badvalues): New test
2016-09-29 Dirk Eddelbuettel
* inst/NEWS.Rd: Added
2016-09-28 Dirk Eddelbuettel
* DESCRIPTION (Version, Date): Roll minor version
* src/annoy.cpp (RCPP_MODULE): New List returning functions from #12
2016-02-02 Michael Phan-Ba
* src/annoy.cpp: Added explicit destructor
2015-11-17 Daniel C. Dillon
* src/annoy.cpp: Templating Annoy classes
* inst/include/annoylib.h: Ditto
2015-11-15 Dirk Eddelbuettel
* DESCRIPTION: Version 0.0.7
* src/annoy.cpp: Changes to adapt to new Annoy interface
2015-11-14 Dirk Eddelbuettel
* DESCRIPTION: Rolled Date and Version
* inst/include/annoylib.h: Upgraded to new version from Annoy 1.6.2
2015-05-26 Dirk Eddelbuettel
* DESCRIPTION: Rolled Date and Version
* inst/include/annoylib.h: Upgraded to new version from Annoy 1.3.1
2015-05-03 Dirk Eddelbuettel
* DESCRIPTION: Version 0.0.6
* inst/include/annoylib.h: Upgraded to new version from Annoy 1.2.2
based on our pull request (and fix) to support supplying an RNG
* src/annoy.cpp: Supply R's own RNG instead of the default of rand
* .travis.yml: No longer need BH, and install Rcpp via PPA
2015-05-02 Dirk Eddelbuettel
* inst/include/annoylib.h: Upgraded to Annoy 1.1.1 (which no longer
needs Boost)
* inst/include/annoylib.h: Replace rand() with random()
* src/annoy.cpp: Call save() + load() with use const char* arguments
* DESCRIPTION: Roll Version: and Date:, remove BH dependency
* cleanup: Clean a bit more in src/
2015-01-22 Dirk Eddelbuettel
* DESCRIPTION: Version 0.0.5
2015-01-21 Dirk Eddelbuettel
* inst/include/annoylib.h: Synced once more with upstream; this
version addresses the UBSAN runtime issue
2015-01-10 Dirk Eddelbuettel
* inst/include/annoylib.h: Synced with upstream repo
2015-01-06 Dirk Eddelbuettel
* DESCRIPTION: Bumped Version: and Date:
* src/Makevars: One char correction requested by CRAN Maintainers
2014-12-07 Dirk Eddelbuettel
* DESCRIPTION: Version 0.0.4
* DESCRIPTION: Added Depends: R (>= 3.1) as need for C++11
prohibits deployment on R-oldrel
2014-11-17 Dirk Eddelbuettel
* DESCRIPTION: Version 0.0.3
* inst/include/annoylib.h: Updated to new version from Annoy
2014-11-16 Qiang Kou
* inst/include/annoylib.h: Added Windows support for mmap
* inst/include/mman.h: Windows implementation of mmap
* src/annoy.cpp: Add two #undef needed on Windows
2014-11-16 Dirk Eddelbuettel
* README.md: Updated to note CRAN package, possible Windows port
2014-11-15 Dirk Eddelbuettel
* inst/include/annoylib.h: New upstream version
2014-11-14 Dirk Eddelbuettel
* inst/include/annoylib.h: New upstream version with templated int
* src/annoy.cpp: Updated for new Annoy library
2014-11-13 Dirk Eddelbuettel
* DESCRIPTION: Version 0.0.2
* inst/include/annoylib.h: New version with verbosity toggle
* src/annoy.cpp: Added setter for verbosity
2014-11-12 Dirk Eddelbuettel
* .travis.yml: Enable binary index test
2014-11-11 Dirk Eddelbuettel
* inst/tests/runit.index.R: New test against binary index
* inst/tests/data/test.tree: Binary file used by test
* inst/tests/runit.euclidean.R: Added more tests
2014-11-10 Dirk Eddelbuettel
* tests/runUnitTests.R: Added RUnit unit test caller
* inst/tests/runit.angular.R: First set of unit tests
* inst/tests/runit.euclidean.R: Idem
* DESCRIPTION: Added Suggests: RUnit
* .travis.yml: Added to enable Travis CI
2014-11-09 Dirk Eddelbuettel
* inst/include/annoylib.h: Updated to new version from annoy
2014-11-08 Dirk Eddelbuettel
* DESCRIPTION: Initial version 0.0.1
RcppAnnoy/NAMESPACE 0000644 0001762 0000144 00000000226 13132210432 013371 0 ustar ligges users useDynLib(RcppAnnoy, .registration=TRUE)
import(methods, Rcpp)
exportPattern("^[[:alpha:]]+") # export all identifiers starting with letters
RcppAnnoy/inst/ 0000755 0001762 0000144 00000000000 15131164023 013134 5 ustar ligges users RcppAnnoy/inst/include/ 0000755 0001762 0000144 00000000000 15131017650 014562 5 ustar ligges users RcppAnnoy/inst/include/annoylib.h 0000644 0001762 0000144 00000131704 15110617765 016566 0 ustar ligges users // Copyright (c) 2013 Spotify AB
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
#ifndef ANNOY_ANNOYLIB_H
#define ANNOY_ANNOYLIB_H
#include
#include
#ifndef _MSC_VER
#include
#endif
#include
#include
#include
#include
#include
#if defined(_MSC_VER) && _MSC_VER == 1500
typedef unsigned char uint8_t;
typedef signed __int32 int32_t;
typedef unsigned __int64 uint64_t;
typedef signed __int64 int64_t;
#else
#include
#endif
#if defined(_MSC_VER) || defined(__MINGW32__)
// a bit hacky, but override some definitions to support 64 bit
#define off_t int64_t
#define lseek_getsize(fd) _lseeki64(fd, 0, SEEK_END)
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include "mman.h"
#include
#else
#include
#define lseek_getsize(fd) lseek(fd, 0, SEEK_END)
#endif
#include
#include
#include
#include
#include
#include
#include
#if __cplusplus >= 201103L
#include
#endif
#ifdef ANNOYLIB_MULTITHREADED_BUILD
#include
#include
#include
#endif
#ifdef _MSC_VER
// Needed for Visual Studio to disable runtime checks for mempcy
#pragma runtime_checks("s", off)
#endif
// This allows others to supply their own logger / error printer without
// requiring Annoy to import their headers. See RcppAnnoy for a use case.
#ifndef __ERROR_PRINTER_OVERRIDE__
#define annoylib_showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); }
#else
#define annoylib_showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
#endif
// Portable alloc definition, cf Writing R Extensions, Section 1.6.4
#ifdef __GNUC__
// Includes GCC, clang and Intel compilers
# undef alloca
# define alloca(x) __builtin_alloca((x))
#elif defined(__sun) || defined(_AIX)
// this is necessary (and sufficient) for Solaris 10 and AIX 6:
# include
#endif
// We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number.
// Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low.
#define ANNOYLIB_V_ARRAY_SIZE 65536
#ifndef _MSC_VER
#define annoylib_popcount __builtin_popcountll
#else // See #293, #358
#define annoylib_popcount cole_popcount
#endif
#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402
#define ANNOYLIB_USE_AVX512
#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__)
#define ANNOYLIB_USE_AVX
#else
#endif
#if defined(ANNOYLIB_USE_AVX) || defined(ANNOYLIB_USE_AVX512)
#if defined(_MSC_VER)
#include
#elif defined(__GNUC__)
#include
#endif
#endif
#if !defined(__MINGW32__)
#define ANNOYLIB_FTRUNCATE_SIZE(x) static_cast(x)
#else
#define ANNOYLIB_FTRUNCATE_SIZE(x) (x)
#endif
namespace Annoy {
inline void set_error_from_errno(char **error, const char* msg) {
annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
if (error) {
*error = (char *)malloc(256); // TODO: win doesn't support snprintf
snprintf(*error, 255, "%s: %s (%d)", msg, strerror(errno), errno);
}
}
inline void set_error_from_string(char **error, const char* msg) {
annoylib_showUpdate("%s\n", msg);
if (error) {
*error = (char *)malloc(strlen(msg) + 1);
strcpy(*error, msg);
}
}
using std::vector;
using std::pair;
using std::numeric_limits;
using std::make_pair;
inline bool remap_memory_and_truncate(void** _ptr, int _fd, size_t old_size, size_t new_size) {
#ifdef __linux__
*_ptr = mremap(*_ptr, old_size, new_size, MREMAP_MAYMOVE);
bool ok = ftruncate(_fd, new_size) != -1;
#else
munmap(*_ptr, old_size);
bool ok = ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(new_size)) != -1;
#ifdef MAP_POPULATE
*_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0);
#else
*_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0);
#endif
#endif
return ok;
}
namespace {
template
inline Node* get_node_ptr(const void* _nodes, const size_t _s, const S i) {
return (Node*)((uint8_t *)_nodes + (_s * i));
}
template
inline T dot(const T* x, const T* y, int f) {
T s = 0;
for (int z = 0; z < f; z++) {
s += (*x) * (*y);
x++;
y++;
}
return s;
}
template
inline T manhattan_distance(const T* x, const T* y, int f) {
T d = 0.0;
for (int i = 0; i < f; i++)
d += fabs(x[i] - y[i]);
return d;
}
template
inline T euclidean_distance(const T* x, const T* y, int f) {
// Don't use dot-product: avoid catastrophic cancellation in #314.
T d = 0.0;
for (int i = 0; i < f; ++i) {
const T tmp=*x - *y;
d += tmp * tmp;
++x;
++y;
}
return d;
}
#ifdef ANNOYLIB_USE_AVX
// Horizontal single sum of 256bit vector.
inline float hsum256_ps_avx(__m256 v) {
const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v));
const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128));
const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55));
return _mm_cvtss_f32(x32);
}
template<>
inline float dot(const float* x, const float *y, int f) {
float result = 0;
if (f > 7) {
__m256 d = _mm256_setzero_ps();
for (; f > 7; f -= 8) {
d = _mm256_add_ps(d, _mm256_mul_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y)));
x += 8;
y += 8;
}
// Sum all floats in dot register.
result += hsum256_ps_avx(d);
}
// Don't forget the remaining values.
for (; f > 0; f--) {
result += *x * *y;
x++;
y++;
}
return result;
}
template<>
inline float manhattan_distance(const float* x, const float* y, int f) {
float result = 0;
int i = f;
if (f > 7) {
__m256 manhattan = _mm256_setzero_ps();
__m256 minus_zero = _mm256_set1_ps(-0.0f);
for (; i > 7; i -= 8) {
const __m256 x_minus_y = _mm256_sub_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y));
const __m256 distance = _mm256_andnot_ps(minus_zero, x_minus_y); // Absolute value of x_minus_y (forces sign bit to zero)
manhattan = _mm256_add_ps(manhattan, distance);
x += 8;
y += 8;
}
// Sum all floats in manhattan register.
result = hsum256_ps_avx(manhattan);
}
// Don't forget the remaining values.
for (; i > 0; i--) {
result += fabsf(*x - *y);
x++;
y++;
}
return result;
}
template<>
inline float euclidean_distance(const float* x, const float* y, int f) {
float result=0;
if (f > 7) {
__m256 d = _mm256_setzero_ps();
for (; f > 7; f -= 8) {
const __m256 diff = _mm256_sub_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y));
d = _mm256_add_ps(d, _mm256_mul_ps(diff, diff)); // no support for fmadd in AVX...
x += 8;
y += 8;
}
// Sum all floats in dot register.
result = hsum256_ps_avx(d);
}
// Don't forget the remaining values.
for (; f > 0; f--) {
float tmp = *x - *y;
result += tmp * tmp;
x++;
y++;
}
return result;
}
#endif
#ifdef ANNOYLIB_USE_AVX512
template<>
inline float dot(const float* x, const float *y, int f) {
float result = 0;
if (f > 15) {
__m512 d = _mm512_setzero_ps();
for (; f > 15; f -= 16) {
//AVX512F includes FMA
d = _mm512_fmadd_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y), d);
x += 16;
y += 16;
}
// Sum all floats in dot register.
result += _mm512_reduce_add_ps(d);
}
// Don't forget the remaining values.
for (; f > 0; f--) {
result += *x * *y;
x++;
y++;
}
return result;
}
template<>
inline float manhattan_distance(const float* x, const float* y, int f) {
float result = 0;
int i = f;
if (f > 15) {
__m512 manhattan = _mm512_setzero_ps();
for (; i > 15; i -= 16) {
const __m512 x_minus_y = _mm512_sub_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y));
manhattan = _mm512_add_ps(manhattan, _mm512_abs_ps(x_minus_y));
x += 16;
y += 16;
}
// Sum all floats in manhattan register.
result = _mm512_reduce_add_ps(manhattan);
}
// Don't forget the remaining values.
for (; i > 0; i--) {
result += fabsf(*x - *y);
x++;
y++;
}
return result;
}
template<>
inline float euclidean_distance(const float* x, const float* y, int f) {
float result=0;
if (f > 15) {
__m512 d = _mm512_setzero_ps();
for (; f > 15; f -= 16) {
const __m512 diff = _mm512_sub_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y));
d = _mm512_fmadd_ps(diff, diff, d);
x += 16;
y += 16;
}
// Sum all floats in dot register.
result = _mm512_reduce_add_ps(d);
}
// Don't forget the remaining values.
for (; f > 0; f--) {
float tmp = *x - *y;
result += tmp * tmp;
x++;
y++;
}
return result;
}
#endif
template
inline T get_norm(T* v, int f) {
return sqrt(dot(v, v, f));
}
template
inline void two_means(const vector& nodes, int f, Random& random, bool cosine, Node* p, Node* q) {
/*
This algorithm is a huge heuristic. Empirically it works really well, but I
can't motivate it well. The basic idea is to keep two centroids and assign
points to either one of them. We weight each centroid by the number of points
assigned to it, so to balance it.
*/
static int iteration_steps = 200;
size_t count = nodes.size();
size_t i = random.index(count);
size_t j = random.index(count-1);
j += (j >= i); // ensure that i != j
Distance::template copy_node(p, nodes[i], f);
Distance::template copy_node(q, nodes[j], f);
if (cosine) { Distance::template normalize(p, f); Distance::template normalize(q, f); }
Distance::init_node(p, f);
Distance::init_node(q, f);
int ic = 1, jc = 1;
for (int l = 0; l < iteration_steps; l++) {
size_t k = random.index(count);
T di = ic * Distance::distance(p, nodes[k], f),
dj = jc * Distance::distance(q, nodes[k], f);
T norm = cosine ? get_norm(nodes[k]->v, f) : 1;
if (!(norm > T(0))) {
continue;
}
if (di < dj) {
for (int z = 0; z < f; z++)
p->v[z] = (p->v[z] * ic + nodes[k]->v[z] / norm) / (ic + 1);
Distance::init_node(p, f);
ic++;
} else if (dj < di) {
for (int z = 0; z < f; z++)
q->v[z] = (q->v[z] * jc + nodes[k]->v[z] / norm) / (jc + 1);
Distance::init_node(q, f);
jc++;
}
}
}
} // namespace
struct Base {
template
static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) {
// Override this in specific metric structs below if you need to do any pre-processing
// on the entire set of nodes passed into this index.
}
template
static inline void zero_value(Node* dest) {
// Initialize any fields that require sane defaults within this node.
}
template
static inline void copy_node(Node* dest, const Node* source, const int f) {
memcpy(dest->v, source->v, f * sizeof(T));
}
template
static inline void normalize(Node* node, int f) {
T norm = get_norm(node->v, f);
if (norm > 0) {
for (int z = 0; z < f; z++)
node->v[z] /= norm;
}
}
};
struct Angular : Base {
template
struct Node {
/*
* We store a binary tree where each node has two things
* - A vector associated with it
* - Two children
* All nodes occupy the same amount of memory
* All nodes with n_descendants == 1 are leaf nodes.
* A memory optimization is that for nodes with 2 <= n_descendants <= K,
* we skip the vector. Instead we store a list of all descendants. K is
* determined by the number of items that fits in the space of the vector.
* For nodes with n_descendants == 1 the vector is a data point.
* For nodes with n_descendants > K the vector is the normal of the split plane.
* Note that we can't really do sizeof(node) because we cheat and allocate
* more memory to be able to fit the vector outside
*/
S n_descendants;
union {
S children[2]; // Will possibly store more than 2
T norm;
};
T v[ANNOYLIB_V_ARRAY_SIZE];
};
template
static inline T distance(const Node* x, const Node* y, int f) {
// want to calculate (a/|a| - b/|b|)^2
// = a^2 / a^2 + b^2 / b^2 - 2ab/|a||b|
// = 2 - 2cos
T pp = x->norm ? x->norm : dot(x->v, x->v, f); // For backwards compatibility reasons, we need to fall back and compute the norm here
T qq = y->norm ? y->norm : dot(y->v, y->v, f);
T pq = dot(x->v, y->v, f);
T ppqq = pp * qq;
if (ppqq > 0) return 2.0 - 2.0 * pq / sqrt(ppqq);
else return 2.0; // cos is 0
}
template
static inline T margin(const Node* n, const T* y, int f) {
return dot(n->v, y, f);
}
template
static inline bool side(const Node* n, const T* y, int f, Random& random) {
T dot = margin(n, y, f);
if (dot != 0)
return (dot > 0);
else
return (bool)random.flip();
}
template
static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) {
Node* p = (Node*)alloca(s);
Node* q = (Node*)alloca(s);
two_means >(nodes, f, random, true, p, q);
for (int z = 0; z < f; z++)
n->v[z] = p->v[z] - q->v[z];
Base::normalize >(n, f);
}
template
static inline T normalized_distance(T distance) {
// Used when requesting distances from Python layer
// Turns out sometimes the squared distance is -0.0
// so we have to make sure it's a positive number.
return sqrt(std::max(distance, T(0)));
}
template
static inline T pq_distance(T distance, T margin, int child_nr) {
if (child_nr == 0)
margin = -margin;
return std::min(distance, margin);
}
template
static inline T pq_initial_value() {
return numeric_limits::infinity();
}
template
static inline void init_node(Node* n, int f) {
n->norm = dot(n->v, n->v, f);
}
static const char* name() {
return "angular";
}
};
struct DotProduct : Angular {
template
struct Node {
/*
* This is an extension of the Angular node with an extra attribute for the scaled norm.
*/
S n_descendants;
S children[2]; // Will possibly store more than 2
T dot_factor;
T v[ANNOYLIB_V_ARRAY_SIZE];
};
static const char* name() {
return "dot";
}
template
static inline T distance(const Node* x, const Node* y, int f) {
return -dot(x->v, y->v, f);
}
template
static inline void zero_value(Node* dest) {
dest->dot_factor = 0;
}
template
static inline void init_node(Node* n, int f) {
}
template
static inline void copy_node(Node* dest, const Node* source, const int f) {
memcpy(dest->v, source->v, f * sizeof(T));
dest->dot_factor = source->dot_factor;
}
template
static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) {
Node* p = (Node*)alloca(s);
Node* q = (Node*)alloca(s);
DotProduct::zero_value(p);
DotProduct::zero_value(q);
two_means >(nodes, f, random, true, p, q);
for (int z = 0; z < f; z++)
n->v[z] = p->v[z] - q->v[z];
n->dot_factor = p->dot_factor - q->dot_factor;
DotProduct::normalize >(n, f);
}
template
static inline void normalize(Node* node, int f) {
T norm = sqrt(dot(node->v, node->v, f) + pow(node->dot_factor, 2));
if (norm > 0) {
for (int z = 0; z < f; z++)
node->v[z] /= norm;
node->dot_factor /= norm;
}
}
template
static inline T margin(const Node* n, const T* y, int f) {
return dot(n->v, y, f) + (n->dot_factor * n->dot_factor);
}
template
static inline bool side(const Node* n, const T* y, int f, Random& random) {
T dot = margin(n, y, f);
if (dot != 0)
return (dot > 0);
else
return (bool)random.flip();
}
template
static inline T normalized_distance(T distance) {
return -distance;
}
template
static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) {
// This uses a method from Microsoft Research for transforming inner product spaces to cosine/angular-compatible spaces.
// (Bachrach et al., 2014, see https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf)
// Step one: compute the norm of each vector and store that in its extra dimension (f-1)
for (S i = 0; i < node_count; i++) {
Node* node = get_node_ptr(nodes, _s, i);
T d = dot(node->v, node->v, f);
T norm = d < 0 ? 0 : sqrt(d);
node->dot_factor = norm;
}
// Step two: find the maximum norm
T max_norm = 0;
for (S i = 0; i < node_count; i++) {
Node* node = get_node_ptr(nodes, _s, i);
if (node->dot_factor > max_norm) {
max_norm = node->dot_factor;
}
}
// Step three: set each vector's extra dimension to sqrt(max_norm^2 - norm^2)
for (S i = 0; i < node_count; i++) {
Node* node = get_node_ptr(nodes, _s, i);
T node_norm = node->dot_factor;
T squared_norm_diff = pow(max_norm, static_cast(2.0)) - pow(node_norm, static_cast(2.0));
T dot_factor = squared_norm_diff < 0 ? 0 : sqrt(squared_norm_diff);
node->dot_factor = dot_factor;
}
}
};
struct Hamming : Base {
template
struct Node {
S n_descendants;
S children[2];
T v[ANNOYLIB_V_ARRAY_SIZE];
};
static const size_t max_iterations = 20;
template