RcppAnnoy/0000755000176200001440000000000015131171252012161 5ustar liggesusersRcppAnnoy/tests/0000755000176200001440000000000015037214756013337 5ustar liggesusersRcppAnnoy/tests/tinytest.R0000644000176200001440000000023215037214756015342 0ustar liggesusersif (requireNamespace("tinytest", quietly=TRUE)) { set.seed(42) # Set a seed to make the test deterministic tinytest::test_package("RcppAnnoy") } RcppAnnoy/MD50000644000176200001440000000450115131171252012471 0ustar liggesusersa43ff1d8733252cf2f5e6378b9e1a6b0 *ChangeLog b11974ab08d58c3cde5740573f0500d7 *DESCRIPTION dae4f8ca1beeb667ee6121de3c975d2c *NAMESPACE 7ae968c5de1e6252a2c1ab5a6568517d *R/RcppExports.R 7514e414b0e89bf1533417a714243518 *R/annoy.R 4d86ce7f605efd09238d1259c3edf71b *R/version.R 30a96085112d8ac88486f48c22163cd4 *README.md f386779b34155e79f467f3703f65213d *build/vignette.rds febc42ec3b6fd6c342a9fd8d38f20cd0 *cleanup b9f392bb4c54af8b21cf529aca47bbb8 *demo/00Index 2dbd8f45f0fba5eea1284ac50ad8984a *demo/irisExample.R 626d1ee412f1f366f860e2fa1b7e15a1 *demo/simpleExample.R fb6df02390e9a84cf135c6e38c3a2621 *inst/NEWS.Rd bb51c8add2af3e7a174c1c84db46f2c3 *inst/doc/UsingAnnoyInCpp.pdf 609af8779f245aa9cabadd01378bdd41 *inst/doc/UsingAnnoyInCpp.pdf.asis 17668ef628922f5e2a5a50b934c11d74 *inst/include/RcppAnnoy.h 1fc7d61f9386cf81b7e7be80ab27dd4a *inst/include/annoylib.h d71f69b770dcc346f36da24381e45814 *inst/include/kissrandom.h bb5e4ec24ecaed6c71be0d76836eedba *inst/include/mman.h 31469ffae9d97a5c2db428d7a4f9ead2 *inst/rmd/UsingAnnoyInCpp.Rmd 59f4b2571897b164e2f6cceeb6f75555 *inst/rmd/rcppannoy.bib 14c740fb1a1c4d78afc7d3b76641d9e6 *inst/tinytest/data/test.tree 4bb99f63e5ba7c7ae5b730625cf8e2eb *inst/tinytest/testAngular.R 4aae48f11cb4f25bdf0fdcc82ae35fef *inst/tinytest/testDotProduct.R 56f9fece1ea637c58c269f6b530712b5 *inst/tinytest/testEuclidean.R 1ec108d7dc0af03ced95ccf131c6daf0 *inst/tinytest/testHamming.R 7d8dae4a58700885f9f424d9f38c63aa *inst/tinytest/testIndex.R bfc5aa0efdfe1fde02cc8fdb51e6b447 *inst/tinytest/testManhattan.R 15e6bfe848de489a6e6e0b972980c159 *inst/tinytest/testOnDiskBuild.R c31e274a819e1b945f2fcef39c3e9d51 *inst/tinytest/testSeeds.R ebb89d9c7e2a0af6a3ca60edfd328a54 *inst/tinytest/testVignette.R df2116ebcbed31030d069c79c47f26a0 *man/AnnoyIndex.Rd cbe66b243d9b4737c44ce13d427c2554 *man/RcppAnnoy-package.Rd 5ac2065ddceaf546aa75a251678ac58e *man/getAnnoyVersion.Rd eb1a07c50cc3ab66aad1f84abf7cd18c *man/getArchictectureStatus.Rd ea3b103fe89cb728f9d81adc5c2a1380 *src/Makevars 20d086e35aec627fea427ae2a810a4db *src/RcppExports.cpp 3fe2f1942958f63e8f5070916e941cab *src/annoy.cpp f61d5764f4ba6cf6e90127398a30e7be *src/arch.cpp 10f6597510bb81d9b8bbbb5735eae48f *src/init.c 95527d46e099ce463e3654a399b64b6f *src/version.cpp 4259ac998e053bf1c769663d5298dfd5 *tests/tinytest.R 609af8779f245aa9cabadd01378bdd41 *vignettes/UsingAnnoyInCpp.pdf.asis RcppAnnoy/R/0000755000176200001440000000000014725061646012377 5ustar liggesusersRcppAnnoy/R/RcppExports.R0000644000176200001440000000106414653660622015013 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #' Report CPU Architecture and Compiler #' #' @return A constant direct created at compile-time describing #' the extent of AVX instructions (512 bit, 128 bit, or none) #' and compiler use where currently recognised are MSC (unlikely #' for R), GCC, Clang, or \sQuote{other}. getArchictectureStatus <- function() { .Call(`_RcppAnnoy_getArchictectureStatus`) } .annoy_version <- function() { .Call(`_RcppAnnoy_annoy_version`) } RcppAnnoy/R/version.R0000644000176200001440000000107113766143727014213 0ustar liggesusers#' Get the Annoy library version #' #' Get the version of the Annoy C++ library that RcppAnnoy was compiled with. #' #' @param compact Logical scalar indicating whether a compact #' \code{\link{package_version}} should be returned. #' #' @return An integer vector containing the major, minor and patch version numbers; #' or if \code{compact=TRUE}, a \code{\link{package_version}} object. #' #' @author Aaron Lun getAnnoyVersion <- function(compact=FALSE) { v <- .annoy_version() if (compact) as.package_version(paste(unname(v), collapse = ".")) else v } RcppAnnoy/R/annoy.R0000644000176200001440000001332414725061646013651 0ustar liggesusers#' @name AnnoyIndex #' #' @aliases #' AnnoyEuclidean Rcpp_AnnoyEuclidean-class Rcpp_AnnoyEuclidean #' AnnoyAngular Rcpp_AnnoyAngular-class Rcpp_AnnoyAngular #' AnnoyManhattan Rcpp_AnnoyManhattan-class Rcpp_AnnoyManhattan #' AnnoyHamming Rcpp_AnnoyHamming-class Rcpp_AnnoyHamming #' AnnoyDotProduct Rcpp_AnnoyDotProduct-class Rcpp_AnnoyDotProduct #' #' @title Approximate Nearest Neighbors with Annoy #' #' @description #' Annoy is a small library written to provide fast and memory-efficient #' nearest neighbor lookup from a possibly static index which can be #' shared across processes. #' #' @section Usage: #' \preformatted{ #' a <- new(AnnoyEuclidean, vectorsz) #' #' a$setSeed(0) #' a$setVerbose(0) #' #' a$addItem(i, dv) #' #' a$getNItems() #' #' a$getItemsVector(i) #' a$getDistance(i, j) #' #' a$build(n_trees) #' #' a$getNNsByItem(i, n) #' a$getNNsByItemList(i, n, search_k, include_distances) #' #' a$getNNsByVector(v, n) #' a$getNNsByVectorList(v, n, search_k, include_distances) #' #' a$save(fn) #' a$load(fn) #' a$unload() #' } #' #' @section Details: #' #' \code{new(Class, vectorsz)} #' Create a new Annoy instance of type \code{Class} where \code{Class} #' is on of the following: #' \code{AnnoyEuclidean}, #' \code{AnnoyAngular}, #' \code{AnnoyManhattan}, #' \code{AnnoyHamming}. #' \code{vectorsz} denotes the length of the vectors that the Annoy instance #' will be indexing. #' #' \code{$addItem(i, v)} #' Adds item \code{i} (any nonnegative integer) with vector \code{v}. #' Note that it will allocate memory for \code{max(i) + 1} items. #' #' \code{$build(n_trees)} #' Builds a forest of \code{n_trees} trees. #' More trees gives higher precision when querying. #' After calling \code{build}, no more items can be added. #' #' \code{$save(fn)} #' Saves the index to disk as filename \code{fn}. #' After saving, no more items can be added. #' #' \code{$load(fn)} #' Loads (mmaps) an index from filename \code{fn} on disk. #' #' \code{$unload()} #' Unloads index. #' #' \code{$getDistance(i, j)} #' Returns the distance between items \code{i} and \code{j} #' #' \code{$getNNsByItem(i, n)} #' Returns the \code{n} closest items as an integer vector of indices. #' #' \code{$getNNsByVector(v, n)} #' Same as \code{$getNNsByItem}, but queries by vector \code{v} rather than #' index \code{i}. #' #' \code{$getNNsByItemList(i, n, search_k = -1, include_distances = FALSE)} #' Returns the n closest items to item \code{i} as a list. #' During the query it will inspect up to \code{search_k} nodes which #' defaults to \code{n_trees * n} if not provided. #' \code{search_k} gives you a run-time tradeoff between better accuracy and #' speed. #' If you set \code{include_distances} to \code{TRUE}, #' it will return a length 2 list with elements \code{"item"} & #' \code{"distance"}. #' The \code{"item"} element contains the \code{n} closest items as an integer #' vector of indices. #' The optional \code{"distance"} element contains the corresponding distances #' to \code{"item"} as a numeric vector. #' #' \code{$getNNsByVectorList(i, n, search_k = -1, include_distances = FALSE)} #' Same as \code{$getNNsByItemList}, but queries by vector \code{v} rather than #' index \code{i} #' #' \code{$getItemsVector(i)} #' Returns the vector for item \code{i} that was previously added. #' #' \code{$getNItems()} #' Returns the number of items in the index. #' #' \code{$setVerbose()} #' If \code{1} then messages will be printed during processing. #' If \code{0} then messages will be suppressed during processing. #' #' \code{$setSeed()} #' Set random seed for annoy (integer). #' #' @examples #' library(RcppAnnoy) #' #' # BUILDING ANNOY INDEX --------------------------------------------------------- #' vector_size <- 10 #' a <- new(AnnoyEuclidean, vector_size) #' #' a$setSeed(42) #' #' # Turn on verbose status messages (0 to turn off) #' a$setVerbose(1) #' #' # Load 100 random vectors into index #' for (i in 1:100) a$addItem(i - 1, runif(vector_size)) # Annoy uses zero indexing #' #' # Display number of items in index #' a$getNItems() #' #' # Retrieve item at postition 0 in index #' a$getItemsVector(0) #' #' # Calculate distance between items at postitions 0 & 1 in index #' a$getDistance(0, 1) #' #' # Build forest with 50 trees #' a$build(50) #' #' #' # PERFORMING ANNOY SEARCH ------------------------------------------------------ #' #' # Retrieve 5 nearest neighbors to item 0 #' # Returned as integer vector of indices #' a$getNNsByItem(0, 5) #' #' # Retrieve 5 nearest neighbors to item 0 #' # search_k = -1 will invoke default search_k value of n_trees * n #' # Return results as list with an element for distance #' a$getNNsByItemList(0, 5, -1, TRUE) #' #' # Retrieve 5 nearest neighbors to item 0 #' # search_k = -1 will invoke default search_k value of n_trees * n #' # Return results as list without an element for distance #' a$getNNsByItemList(0, 5, -1, FALSE) #' #' #' v <- runif(vector_size) #' # Retrieve 5 nearest neighbors to vector v #' # Returned as integer vector of indices #' a$getNNsByVector(v, 5) #' #' # Retrieve 5 nearest neighbors to vector v #' # search_k = -1 will invoke default search_k value of n_trees * n #' # Return results as list with an element for distance #' a$getNNsByVectorList(v, 5, -1, TRUE) #' #' # SAVING/LOADING ANNOY INDEX --------------------------------------------------- #' #' # Create a tempfile, replace with a local file to keep #' treefile <- tempfile(pattern="annoy", fileext=".tree") #' #' # Save annoy tree to disk #' a$save(treefile) #' #' # Load annoy tree from disk #' a$load(treefile) #' #' # Unload index from memory #' a$unload() NULL ## ensure module gets loaded loadModule("AnnoyAngular", TRUE) loadModule("AnnoyEuclidean", TRUE) loadModule("AnnoyManhattan", TRUE) loadModule("AnnoyHamming", TRUE) loadModule("AnnoyDotProduct", TRUE) RcppAnnoy/cleanup0000755000176200001440000000024515131164023013535 0ustar liggesusers#!/bin/sh rm -rf *~ */*~ src/*.o src/*.so src/*.dll src/*.dylib src/symbols.rds \ vignettes/jss.bst vignettes/pinp.cls vignettes/auto \ vignettes/annoy.index RcppAnnoy/demo/0000755000176200001440000000000015005765312013113 5ustar liggesusersRcppAnnoy/demo/simpleExample.R0000644000176200001440000000104612427546253016052 0ustar liggesusers ## cf the simple example at https://github.com/spotify/annoy library(RcppAnnoy) set.seed(123) # be reproducible f <- 40 a <- new(AnnoyEuclidean, f) n <- 50 # not specified for (i in seq(n)) { v <- rnorm(f) a$addItem(i-1, v) } a$build(50) # 50 trees a$save("/tmp/test.tree") b <- new(AnnoyEuclidean, f) # new object, could be in another process b$load("/tmp/test.tree") # super fast, will just mmap the file print(b$getNNsByItem(0, 40)) RcppAnnoy/demo/irisExample.R0000644000176200001440000000307114725062313015520 0ustar liggesusers ## Suggested by @SamGG in https://github.com/eddelbuettel/rcppannoy/issues/79#issuecomment-2518597494 library(RcppAnnoy) # IRIS EXAMPLE ----------------------------------------------------------------- data(iris) # Converts to numeric, ignoring the species X <- as.matrix(iris[,-5]) # BuildinG index a <- new(AnnoyEuclidean, ncol(X)) a$setSeed(42) # Load dataset into index; Annoy uses zero indexing for (i in 1:nrow(X)) a$addItem(i - 1, X[i,]) # Build forest with 20 trees a$build(50) # Reports about the forest a$getNItems() a$getNTrees() # Performing search k <- 5 # number of nearest neighbors nn.index <- matrix(nrow = nrow(X), ncol = k) for (i in 1:nrow(X)) nn.index[i,] <- a$getNNsByVector(X[i,], k) # Annoy uses zero indexing, so index must be incremented nn.index = nn.index + 1 # The first match is the query itself most of the time plot(1:nrow(X), nn.index[,1]) # Explore the second nearest neighbor opar = par(mfrow = c(2, 2)) for (i in 1:ncol(X)) plot(X[, i], X[nn.index[,2], i], xlab = colnames(X)[i], ylab = "nearest") par(opar) # Perform search with distance k <- 5 nn.index <- matrix(nrow = nrow(X), ncol = k) nn.distance <- matrix(nrow = nrow(X), ncol = k) for (i in 1:nrow(X)) { res <- a$getNNsByVectorList(X[i,], k, -1, TRUE) nn.index[i,] <- res$item nn.distance[i,] <- res$distance } # Annoy uses zero indexing, so index must be incremented nn.index = nn.index + 1 # Explore distance to the second nearest neighbor hist(nn.distance[,2], xlab = "Distance to the 2nd NN", main = "Histogram of distance") # Unload index from memory a$unload() rm(a) RcppAnnoy/demo/00Index0000644000176200001440000000017315005765312014246 0ustar liggesuserssimpleExample A first introductory example irisExample A more comprehensive example using the 'iris' data set RcppAnnoy/vignettes/0000755000176200001440000000000015131164023014167 5ustar liggesusersRcppAnnoy/vignettes/UsingAnnoyInCpp.pdf.asis0000644000176200001440000000030515037456253020660 0ustar liggesusers%\VignetteIndexEntry{Using Annoy in C++} %\VignetteKeywords{Rcpp, Annoy, R, Cpp, Approximate Nearest Neighbours} %\VignettePackage{RcppAnnoy} %\VignetteEncoding{UTF-8} %\VignetteEngine{Rcpp::asis} RcppAnnoy/src/0000755000176200001440000000000015131164023012746 5ustar liggesusersRcppAnnoy/src/annoy.cpp0000644000176200001440000003431014653657263014624 0ustar liggesusers // RcppAnnoy -- Rcpp bindings to Annoy library for Approximate Nearest Neighbours // // Copyright (C) 2014 - 2023 Dirk Eddelbuettel // // This file is part of RcppAnnoy // // RcppAnnoy is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 2 of the License, or // (at your option) any later version. // // RcppAnnoy is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with RcppAnnoy. If not, see . // simple C++ modules to wrap to templated classes from Annoy // // uses annoylib.h (from Annoy) and provides R access via Rcpp // // Dirk Eddelbuettel, Nov 2014 #include "RcppAnnoy.h" namespace Annoy { template< typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy > class Annoy { protected: AnnoyIndex *ptr; unsigned int vectorsz; public: Annoy(int n) : vectorsz(n) { ptr = new AnnoyIndex(n); } ~Annoy() { if (ptr != NULL) delete ptr; } void addItem(S item, Rcpp::NumericVector dv) { if (item < 0) Rcpp::stop("Inadmissible item value %d", item); std::vector fv(dv.size()); std::copy(dv.begin(), dv.end(), fv.begin()); char *errormsg; if (!ptr->add_item(item, &fv[0], &errormsg)) Rcpp::stop(errormsg); } void callBuild(int n) { ptr->build(n); } void callUnbuild() { ptr->unbuild(); } void callSave(std::string filename) { ptr->save(filename.c_str()); } void callLoad(std::string filename) { ptr->load(filename.c_str()); } void callUnload() { ptr->unload(); } int getNItems() { return ptr->get_n_items(); } int getNTrees() { return ptr->get_n_trees(); } double getDistance(int i, int j) { return ptr->get_distance(i, j); } void verbose(bool v) { ptr->verbose(v); } void setSeed(int s) { ptr->set_seed(s); } std::vector getNNsByItem(S item, size_t n) { std::vector result; ptr->get_nns_by_item(item, n, -1, &result, NULL); return result; } Rcpp::List getNNsByItemList(S item, size_t n, int search_k, bool include_distances) { if (include_distances) { std::vector result; std::vector distances; ptr->get_nns_by_item(item, n, search_k, &result, &distances); return Rcpp::List::create(Rcpp::Named("item") = result, Rcpp::Named("distance") = distances); } else { std::vector result; ptr->get_nns_by_item(item, n, search_k, &result, NULL); return Rcpp::List::create(Rcpp::Named("item") = result); } } std::vector getNNsByVector(std::vector dv, size_t n) { std::vector fv(dv.size()); std::copy(dv.begin(), dv.end(), fv.begin()); std::vector result; ptr->get_nns_by_vector(&fv[0], n, -1, &result, NULL); return result; } Rcpp::List getNNsByVectorList(std::vector fv, size_t n, int search_k, bool include_distances) { if (fv.size() != vectorsz) { Rcpp::stop("fv.size() != vector_size"); } if (include_distances) { std::vector result; std::vector distances; ptr->get_nns_by_vector(&fv[0], n, search_k, &result, &distances); return Rcpp::List::create( Rcpp::Named("item") = result, Rcpp::Named("distance") = distances); } else { std::vector result; ptr->get_nns_by_vector(&fv[0], n, search_k, &result, NULL); return Rcpp::List::create(Rcpp::Named("item") = result); } } std::vector getItemsVector(S item) { std::vector fv(vectorsz); ptr->get_item(item, &fv[0]); std::vector dv(fv.size()); std::copy(fv.begin(), fv.end(), dv.begin()); return dv; } bool onDiskBuild(std::string fname) { char *errormsg; if (!ptr->on_disk_build(fname.c_str(), &errormsg)) Rcpp::stop(errormsg); return true; } }; } typedef Annoy::Annoy AnnoyDotProduct; typedef Annoy::Annoy AnnoyAngular; typedef Annoy::Annoy AnnoyEuclidean; typedef Annoy::Annoy AnnoyManhattan; typedef Annoy::Annoy AnnoyHamming; RCPP_EXPOSED_CLASS_NODECL(AnnoyDotProduct) RCPP_MODULE(AnnoyDotProduct) { Rcpp::class_("AnnoyDotProduct") .constructor("constructor with integer count") .method("addItem", &AnnoyDotProduct::addItem, "add item") .method("build", &AnnoyDotProduct::callBuild, "build an index") .method("unbuild", &AnnoyDotProduct::callUnbuild, "unbuild an index") .method("save", &AnnoyDotProduct::callSave, "save index to file") .method("load", &AnnoyDotProduct::callLoad, "load index from file") .method("unload", &AnnoyDotProduct::callUnload, "unload index") .method("getDistance", &AnnoyDotProduct::getDistance, "get distance between i and j") .method("getNNsByItem", &AnnoyDotProduct::getNNsByItem, "retrieve Nearest Neigbours given item") .method("getNNsByItemList", &AnnoyDotProduct::getNNsByItemList, "retrieve Nearest Neigbours given item") .method("getNNsByVector", &AnnoyDotProduct::getNNsByVector, "retrieve Nearest Neigbours given vector") .method("getNNsByVectorList", &AnnoyDotProduct::getNNsByVectorList, "retrieve Nearest Neigbours given vector") .method("getItemsVector", &AnnoyDotProduct::getItemsVector, "retrieve item vector") .method("getNItems", &AnnoyDotProduct::getNItems, "get number of items") .method("getNTrees", &AnnoyDotProduct::getNTrees, "get number of trees") .method("setVerbose", &AnnoyDotProduct::verbose, "set verbose") .method("setSeed", &AnnoyDotProduct::setSeed, "set seed") .method("onDiskBuild", &AnnoyDotProduct::onDiskBuild, "build in given file") ; } RCPP_EXPOSED_CLASS_NODECL(AnnoyAngular) RCPP_MODULE(AnnoyAngular) { Rcpp::class_("AnnoyAngular") .constructor("constructor with integer count") .method("addItem", &AnnoyAngular::addItem, "add item") .method("build", &AnnoyAngular::callBuild, "build an index") .method("unbuild", &AnnoyAngular::callUnbuild, "unbuild an index") .method("save", &AnnoyAngular::callSave, "save index to file") .method("load", &AnnoyAngular::callLoad, "load index from file") .method("unload", &AnnoyAngular::callUnload, "unload index") .method("getDistance", &AnnoyAngular::getDistance, "get distance between i and j") .method("getNNsByItem", &AnnoyAngular::getNNsByItem, "retrieve Nearest Neigbours given item") .method("getNNsByItemList", &AnnoyAngular::getNNsByItemList, "retrieve Nearest Neigbours given item") .method("getNNsByVector", &AnnoyAngular::getNNsByVector, "retrieve Nearest Neigbours given vector") .method("getNNsByVectorList", &AnnoyAngular::getNNsByVectorList, "retrieve Nearest Neigbours given vector") .method("getItemsVector", &AnnoyAngular::getItemsVector, "retrieve item vector") .method("getNItems", &AnnoyAngular::getNItems, "get number of items") .method("getNTrees", &AnnoyAngular::getNTrees, "get number of trees") .method("setVerbose", &AnnoyAngular::verbose, "set verbose") .method("setSeed", &AnnoyAngular::setSeed, "set seed") .method("onDiskBuild", &AnnoyAngular::onDiskBuild, "build in given file") ; } RCPP_EXPOSED_CLASS_NODECL(AnnoyEuclidean) RCPP_MODULE(AnnoyEuclidean) { Rcpp::class_("AnnoyEuclidean") .constructor("constructor with integer count") .method("addItem", &AnnoyEuclidean::addItem, "add item") .method("build", &AnnoyEuclidean::callBuild, "build an index") .method("unbuild", &AnnoyEuclidean::callUnbuild, "unbuild an index") .method("save", &AnnoyEuclidean::callSave, "save index to file") .method("load", &AnnoyEuclidean::callLoad, "load index from file") .method("unload", &AnnoyEuclidean::callUnload, "unload index") .method("getDistance", &AnnoyEuclidean::getDistance, "get distance between i and j") .method("getNNsByItem", &AnnoyEuclidean::getNNsByItem, "retrieve Nearest Neigbours given item") .method("getNNsByItemList", &AnnoyEuclidean::getNNsByItemList, "retrieve Nearest Neigbours given item") .method("getNNsByVector", &AnnoyEuclidean::getNNsByVector, "retrieve Nearest Neigbours given vector") .method("getNNsByVectorList",&AnnoyEuclidean::getNNsByVectorList, "retrieve Nearest Neigbours given vector") .method("getItemsVector", &AnnoyEuclidean::getItemsVector, "retrieve item vector") .method("getNItems", &AnnoyEuclidean::getNItems, "get number of items") .method("getNTrees", &AnnoyEuclidean::getNTrees, "get number of trees") .method("setVerbose", &AnnoyEuclidean::verbose, "set verbose") .method("setSeed", &AnnoyEuclidean::setSeed, "set seed") .method("onDiskBuild", &AnnoyEuclidean::onDiskBuild, "build in given file") ; } RCPP_EXPOSED_CLASS_NODECL(AnnoyManhattan) RCPP_MODULE(AnnoyManhattan) { Rcpp::class_("AnnoyManhattan") .constructor("constructor with integer count") .method("addItem", &AnnoyManhattan::addItem, "add item") .method("build", &AnnoyManhattan::callBuild, "build an index") .method("unbuild", &AnnoyManhattan::callUnbuild, "unbuild an index") .method("save", &AnnoyManhattan::callSave, "save index to file") .method("load", &AnnoyManhattan::callLoad, "load index from file") .method("unload", &AnnoyManhattan::callUnload, "unload index") .method("getDistance", &AnnoyManhattan::getDistance, "get distance between i and j") .method("getNNsByItem", &AnnoyManhattan::getNNsByItem, "retrieve Nearest Neigbours given item") .method("getNNsByItemList", &AnnoyManhattan::getNNsByItemList, "retrieve Nearest Neigbours given item") .method("getNNsByVector", &AnnoyManhattan::getNNsByVector, "retrieve Nearest Neigbours given vector") .method("getNNsByVectorList",&AnnoyManhattan::getNNsByVectorList, "retrieve Nearest Neigbours given vector") .method("getItemsVector", &AnnoyManhattan::getItemsVector, "retrieve item vector") .method("getNItems", &AnnoyManhattan::getNItems, "get number of items") .method("getNTrees", &AnnoyManhattan::getNTrees, "get number of trees") .method("setVerbose", &AnnoyManhattan::verbose, "set verbose") .method("setSeed", &AnnoyManhattan::setSeed, "set seed") .method("onDiskBuild", &AnnoyManhattan::onDiskBuild, "build in given file") ; } RCPP_EXPOSED_CLASS_NODECL(AnnoyHamming) RCPP_MODULE(AnnoyHamming) { Rcpp::class_("AnnoyHamming") .constructor("constructor with integer count") .method("addItem", &AnnoyHamming::addItem, "add item") .method("build", &AnnoyHamming::callBuild, "build an index") .method("unbuild", &AnnoyHamming::callUnbuild, "unbuild an index") .method("save", &AnnoyHamming::callSave, "save index to file") .method("load", &AnnoyHamming::callLoad, "load index from file") .method("unload", &AnnoyHamming::callUnload, "unload index") .method("getDistance", &AnnoyHamming::getDistance, "get distance between i and j") .method("getNNsByItem", &AnnoyHamming::getNNsByItem, "retrieve Nearest Neigbours given item") .method("getNNsByItemList", &AnnoyHamming::getNNsByItemList, "retrieve Nearest Neigbours given item") .method("getNNsByVector", &AnnoyHamming::getNNsByVector, "retrieve Nearest Neigbours given vector") .method("getNNsByVectorList",&AnnoyHamming::getNNsByVectorList, "retrieve Nearest Neigbours given vector") .method("getItemsVector", &AnnoyHamming::getItemsVector, "retrieve item vector") .method("getNItems", &AnnoyHamming::getNItems, "get number of items") .method("getNTrees", &AnnoyHamming::getNTrees, "get number of trees") .method("setVerbose", &AnnoyHamming::verbose, "set verbose") .method("setSeed", &AnnoyHamming::setSeed, "set seed") .method("onDiskBuild", &AnnoyHamming::onDiskBuild, "build in given file") ; } RcppAnnoy/src/RcppExports.cpp0000644000176200001440000000166014653660171015763 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include "../inst/include/RcppAnnoy.h" #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // getArchictectureStatus std::string getArchictectureStatus(); RcppExport SEXP _RcppAnnoy_getArchictectureStatus() { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = Rcpp::wrap(getArchictectureStatus()); return rcpp_result_gen; END_RCPP } // annoy_version Rcpp::IntegerVector annoy_version(); RcppExport SEXP _RcppAnnoy_annoy_version() { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = Rcpp::wrap(annoy_version()); return rcpp_result_gen; END_RCPP } RcppAnnoy/src/arch.cpp0000644000176200001440000000164513753267105014412 0ustar liggesusers #include "RcppAnnoy.h" #if defined(USE_AVX512) #define AVX_INFO "Using 512-bit AVX instructions" #elif defined(USE_AVX128) #define AVX_INFO "Using 128-bit AVX instructions" #else #define AVX_INFO "Not using AVX instructions" #endif #if defined(_MSC_VER) #define COMPILER_INFO "Compiled using MSC" #elif defined(__GNUC__) #define COMPILER_INFO "Compiled using GCC" #elif defined(__clang__) #define COMPILER_INFO "Compiled using Clang" #else #define COMPILER_INFO "Compiled on unknown platform" #endif #define ANNOY_DOC (COMPILER_INFO ". " AVX_INFO ".") //' Report CPU Architecture and Compiler //' //' @return A constant direct created at compile-time describing //' the extent of AVX instructions (512 bit, 128 bit, or none) //' and compiler use where currently recognised are MSC (unlikely //' for R), GCC, Clang, or \sQuote{other}. // [[Rcpp::export]] std::string getArchictectureStatus() { return std::string(ANNOY_DOC); } RcppAnnoy/src/init.c0000644000176200001440000000253314653657263014105 0ustar liggesusers#include #include #include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .Call calls */ extern SEXP _rcpp_module_boot_AnnoyDotProduct(void); extern SEXP _rcpp_module_boot_AnnoyAngular(void); extern SEXP _rcpp_module_boot_AnnoyEuclidean(void); extern SEXP _rcpp_module_boot_AnnoyManhattan(void); extern SEXP _rcpp_module_boot_AnnoyHamming(void); extern SEXP _RcppAnnoy_getArchictectureStatus(void); extern SEXP _RcppAnnoy_annoy_version(void); static const R_CallMethodDef CallEntries[] = { {"_rcpp_module_boot_AnnoyDotProduct",(DL_FUNC) &_rcpp_module_boot_AnnoyDotProduct,0}, {"_rcpp_module_boot_AnnoyAngular", (DL_FUNC) &_rcpp_module_boot_AnnoyAngular, 0}, {"_rcpp_module_boot_AnnoyEuclidean", (DL_FUNC) &_rcpp_module_boot_AnnoyEuclidean, 0}, {"_rcpp_module_boot_AnnoyManhattan", (DL_FUNC) &_rcpp_module_boot_AnnoyManhattan, 0}, {"_rcpp_module_boot_AnnoyHamming", (DL_FUNC) &_rcpp_module_boot_AnnoyHamming, 0}, {"_RcppAnnoy_getArchictectureStatus",(DL_FUNC) &_RcppAnnoy_getArchictectureStatus,0}, {"_RcppAnnoy_annoy_version",(DL_FUNC) &_RcppAnnoy_annoy_version,0}, {NULL, NULL, 0} }; void R_init_RcppAnnoy(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } RcppAnnoy/src/version.cpp0000644000176200001440000000054413766143544015163 0ustar liggesusers#include "RcppAnnoy.h" // [[Rcpp::export(.annoy_version)]] Rcpp::IntegerVector annoy_version() { return Rcpp::IntegerVector::create(Rcpp::Named("major")=RCPPANNOY_VERSION_MAJOR, Rcpp::Named("minor")=RCPPANNOY_VERSION_MINOR, Rcpp::Named("patch")=RCPPANNOY_VERSION_PATCH); } RcppAnnoy/src/Makevars0000644000176200001440000000063415131024050014440 0ustar liggesusers ## Multithreaded indexing for Annoy (version 1.17 or later) ## requires this define to be set # USE_MULTITTHREADING=-DANNOYLIB_MULTITHREADED_BUILD ## So to use multithreaded indexing, comment out the setting above ## Note that it may alter your result (or their order) slightly due ## to the use of multithreading which alters the sequence of RNG draws PKG_CPPFLAGS = -I../inst/include/ ${USE_MULTITTHREADING} RcppAnnoy/ChangeLog0000644000176200001440000005107415131161754013750 0ustar liggesusers2026-01-12 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.23 * inst/include/RcppAnnoy.h: Idem 2026-01-11 Dirk Eddelbuettel * src/Makevars: No longer need to (explicitly) request C++17 2026-01-10 Dirk Eddelbuettel * inst/rmd/rcppannoy.bib: Updated references * vignettes/UsingAnnoyInCpp.pdf: Regenerated under updated pinp package to get improved DOI links * DESCRIPTION (Description): No longer require Additional_repositories as Rcpp 1.1.1 is on CRAN 2026-01-07 Dirk Eddelbuettel * .github/workflows/ci.yaml: Switch to actions/checkout@v6 2025-11-23 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll micro version and date 2025-11-22 Amos Elberg * inst/include/RcppAnnoy.h: Turn __ERROR_PRINTER_OVERRIDE__ into function to quash a warning emitted from 64-bit integer vertex 2025-07-21 Dirk Eddelbuettel * vignettes/UsingAnnoyInCpp.pdf: Regenerated * inst/rmd/UsingAnnoyInCpp.Rmd: One linebreak-avoidance edit * inst/rmd/rcppannoy.bib: Update references, add DOIs fields 2025-07-20 Dirk Eddelbuettel * vignettes/UsingAnnoyInCpp.pdf.asis: Switch to 'asis' vignette builder * DESCRIPTION (VignetteBuilder): Add 'VignetteBuilder: Rcpp' along with versioned Imports: on version 1.1.0.1 or newer * .github/workflows/ci.yaml: (Temporary) installation of Rcpp 1.1.0.1 in order to access the 'asis' vignette builder provided by it * DESCRIPTION: (Temporary) addition of Additional_repositories to ensure updated Rcpp is used during r-universe processing * tests/tinytest.R: Small refresh and edit 2025-05-04 Dirk Eddelbuettel * demo/00Index: Add missing newline 2025-03-08 Dirk Eddelbuettel * .github/workflows/ci.yaml: Use r-ci action with embedded bootstrap 2024-12-07 Dirk Eddelbuettel * demo/irisExample.R: New contributed demo * demo/00Index: Indexed * R/annoy.R: Remove duplicate example * man/AnnoyIndex.Rd: Regenerated to update example 2024-12-04 Dirk Eddelbuettel * R/annoy.R: Add missing dot to fileext argument in Rd part * man/AnnoyIndex.Rd: Regenerated to update example 2024-09-06 Dirk Eddelbuettel * DESCRIPTION (Authors@R): Added 2024-08-04 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll micro version * inst/include/RcppAnnoy.h: Idem * R/annoy.R: Add alias for AnnoyDotProduct * man/AnnoyIndex.Rd: Re-rendered * README.md: Switch two more http:// URLs to https:// 2024-08-03 Benjamin James * R/annoy.R: Add AnnoyDotProduct to namespace * inst/tinytest/testDotProduct.R: Unit tests for 'AnnoyDotProduct' * src/init.c: Added new dot product distance measure (via template) * src/annoy.cpp: Added template and module for AnnoyDotProduct 2024-05-20 Dirk Eddelbuettel * README.md: Use tinyverse.netlify.app for dependency badge 2024-02-23 Dirk Eddelbuettel * .github/workflows/ci.yaml (jobs): Update to actions/checkout@v4, add r-ci-setup actions 2024-01-23 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.22 * inst/include/RcppAnnoy.h: Idem * man/RcppAnnoy-package.Rd: Remove reference to example left from auto-generated stanza (to satisfy a CRAN request) 2023-07-02 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.21 * inst/include/RcppAnnoy.h: Idem 2023-07-01 Dirk Eddelbuettel * src/Makevars (USE_MULTITTHREADING): Revert back to not enabling multithreading by default to remain consistent with prior behaviour 2023-06-30 Dirk Eddelbuettel * README.md: Add r-universe badge 2023-06-15 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll micro version * inst/include/RcppAnnoy.h: Idem * inst/include/annoylib.h: Sync with upstream 1.17.3 * src/annoy.cpp: Accomodate updated upstream changes * inst/include/RcppAnnoy.h: Idem * inst/rmd/UsingAnnoyInCpp.Rmd (Annoy): Idem 2023-03-17 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/RcppAnnoy.h: Idem * src/Makevars: No longer restrict build to C++11, also turn on multithreading and switch to C++17 (instead of minimum C++14) 2022-10-27 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.20 * src/init.c: And 'void' to six prototypes to make clang-15 happy * inst/include/annoylib.h (set_error_from_errno): Use snprint to make xcode/macos 14 happy * .github/workflows/ci.yaml (jobs): Update to actions/checkout@v3 2021-11-30 Dirk Eddelbuettel * README.md: Remove depcreated CI badge * .travis/: Removed 2021-07-30 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.19 2021-04-13 Dirk Eddelbuettel * DESCRIPTION (URL, BugRreports): Added to DESCRIPTION file 2020-12-25 Dirk Eddelbuettel * .github/workflows/ci.yaml: Small tweaks to CI YAML file 2020-12-15 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.18 * src/version.cpp: Export as .annoy_version as we have an R wrapper * .github/workflows/ci.yaml: Add CI runner using r-ci * README.md: Add new badge 2020-12-06 Aaron Lun * src/version.cpp: Helper function to report Annoy version * src/init.c: Register helper * R/version.R: R wrapper * man/getAnnoyVersion.Rd: Documentation 2020-12-04 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/RcppAnnoy.h: Idem * inst/include/annoylib.h: Upstream sync post PR #522 * inst/include/kissrandom.h: Idem 2020-11-23 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/RcppAnnoy.h: Idem * .travis.yml: Switch to r-ci 2020-11-22 Dirk Eddelbuettel * inst/tinytest/testVignette.R: New test file * inst/rmd/: Moved from vignettes/rmd * inst/rmd/UsingAnnoyInCpp.Rmd: Allow for index file 2020-11-22 Aaron Lun * vignettes/rmd/UsingAnnoyInCpp.Rmd: Allow compilation of vignette without adding more dependencies 2020-11-19 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/RcppAnnoy.h: Idem * inst/include/RcppAnnoy.h: Additional typedefs for threading policy * src/annoy.cpp: Threading policy typedef removed 2020-11-15 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.17 2020-11-12 Dirk Eddelbuettel * inst/include/RcppAnnoy.h: New header file for includes and defines * src/annoy.cpp: Use new header file RcppAnnoy.h * inst/include/annoylib.h: Add a temporary define to identify version 2020-10-19 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * src/Makevars: Default to serial use (and C++11) but add comment to detail how enable multithreaded indexing * src/annoy.cpp: Default to serial use * inst/tinytest/testSeeds.R: Re-enable test 2020-10-18 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/annoylib.h: Update to Annoy 1.17 * inst/include/mman.h: Idem * src/annoy.cpp: Idem, also enable multithreaded build * src/Makevars: Switch to C++14 for shared_timed_mutex * .travis.yml: Switch Travis CI to bspm use and focal * README.md: Update two URLs * inst/tinytest/testSeeds.R: Disable (already optional) test on seeding does not pass in multi-threaded mode 2020-05-30 Dirk Eddelbuettel * README.md: Add 'last commit' badge * .travis.yml: Switch to bionic and R 4.0.0 2020-03-12 Dirk Eddelbuettel * README.md: Standardize header and badges 2020-03-06 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.16 2020-03-03 Dirk Eddelbuettel * src/arch.cpp (getArchictectureStatus): Add simple helper function to show AVX and compiler status now that pragma has been removed 2020-03-02 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/annoylib.h: Updated to upstream PR #462 which removes the (optional and no longer needed) packing and one set of warnings 2020-03-01 Dirk Eddelbuettel * inst/include/annoylib.h: Updated upstream PRs PRs #460 (which uses int not size_t in one interface) and #461 (which inlines two helpers) 2020-02-27 Aaron Lun * inst/include/annoylib.h: Replace two size_t interfaces with int 2020-02-25 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.15 * inst/include/annoylib.h: Use alloca() portably 2020-02-24 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/annoylib.h: Updated upstream master post PR455 * inst/include/mmap.h: Idem * .travis.yml: Use r-cran-tinytest, no longer need edd/r-3.5 2019-11-12 Dirk Eddelbuettel * R/annoy.R: Small help page correction thanks to Bill Venables * man/AnnoyIndex.Rd: Idem 2019-11-11 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.14 2019-11-10 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * cleanup: Make safe for checkbashism by using only /bin/sh * inst/include/annoylib.h: New upstream v1.16.2 (plus up to pr436) 2019-09-23 Dirk Eddelbuettel * DESCRIPTION (Date, Version): Release 0.0.13 2019-09-22 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * src/annoy.cpp: Add unbuild() and onDiskBuild() functions * inst/tinytest/testOnDiskBuild.R: Add tests for onDiskBuild() * inst/tinytest/testIndex.R: Can now use tinytest::exit_file() * inst/tinytest/testSeeds.R: Idem * inst/include/annoylib.h: Use return code * inst/include/mman.h: Only conditionally define ftruncate * README.md: Small edits 2019-09-21 Peter Hickey * vignettes/rmd/UsingAnnoyInCpp.Rmd: Remove spurious comma 2019-09-21 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/annoylib.h: New upstream v1.16 (plus up to pr410) * inst/include/mman.h: Idem * src/annoy.cpp: Add getNTrees(), use error message in addItem() 2019-09-15 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * R/annoy.R: Use a tempfile() in example to save + load an annoy tree * man/AnnoyIndex.Rd: Ditto 2019-05-12 Dirk Eddelbuettel * DESCRIPTION (Date, Version): Release 0.0.12 2019-05-11 Dirk Eddelbuettel * vignettes/UsingAnnoyInCcppWrapper.Rnw: Vignette wrapper with tip of the hat to Mark van der Loo for his January 2019 blog post * vignettes/rmd/UsingAnnoyInCpp.Rmd (vignette): Moved * vignettes/rmd/rcppannoy.bib: Idem * DESCRIPTION (Suggests): Remove three packages needed for vignette * .travis.yml (install): Idem * .Rbuildignore: Exclude vignettes/rmd/ from build 2019-05-10 Dirk Eddelbuettel * tests/tinytest.R: New test runner using tinytest * inst/tinytest/testIndex.R: New test file using tinytest * inst/tinytest/testAngular.R: Idem * inst/tinytest/testEuclidean.R: Idem * inst/tinytest/testHamming.R: Idem * inst/tinytest/testManhattan.R: Idem * .travis.yml (install): Add tinytest to Travis setup * local/: Old RUnit test files in source but not in package * .Rbuildignore: Exclude local/ from build 2019-05-06 Adam Spannbauer * R/annoy.R: Added documentation * man/AnnoyIndex.Rd: Rendered documentation 2019-04-12 Dirk Eddelbuettel * inst/tests/runit.seeds.R (test01seeds): Simplified 2019-04-11 Dirk Eddelbuettel * inst/tests/runit.seeds.R (test01seeds): Add tests 2019-04-11 James Melville * vignettes/UsingAnnoyInCpp.Rmd: Document setSeed 2019-04-10 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * src/annoy.cpp: Support setting of seed for KISS RNG 2018-10-30 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.11 2018-10-29 Dirk Eddelbuettel * inst/include/annoylib.h: Only define NOMINMAX if not defined * vignettes/UsingAnnoyInCpp.Rmd: Simplified by having C++ snippets typeset by pandoc instead of attempting OS-dependent compilation 2018-10-28 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/annoylib.h: New upstream version 1.13 past pr325 * inst/include/kissrandom.h: Idem * vignettes/UsingAnnoyInCpp: Renamed vignette, one size_t use 2018-10-17 Dirk Eddelbuettel * README.md: Added dependency count badge 2018-10-16 Dirk Eddelbuettel * vignettes/UsingAnnoyInC++.Rmd: Renamed vignette, minor edits * vignettes/rcppannoy.bib: Expanded, sorted (thanks, Emacs) 2018-10-14 Dirk Eddelbuettel * vignettes/rcppannoy.Rmd: Minor edits 2018-10-09 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version 2018-10-09 Aaron Lun * vignettes/rcppannoy.Rmd: Minor fixes 2018-10-07 Dirk Eddelbuettel * inst/include/annoylib.h: New upstream version 1.13 plus two PRs by Aaron and Dirk, respectively. * inst/include/mman.h: Idem 2018-10-06 Aaron Lun * vignettes/rcppannoy.Rmd: New vignette on using Annoy from C++ * DESCRIPTION: Added required Suggests: and VignetteBuilder: * .travis.yml: Added r-cran-knitr and r-cran-rmarkdown 2018-09-01 Dirk Eddelbuettel * .travis.yml: Switch Travis CI to R 3.5 repo 2017-12-16 Dirk Eddelbuettel * inst/include/kissrandom.h: New upstream version * inst/include/annoylib.h: Idem; plus some small changes to avoid g++ warnings, also sent upstream * inst/tests/runit.euclidean.R: Relaxed one '<' comparison to '<=' * inst/tests/runit.manhattan.R: Idem * src/init.c: Added new Hamming distance measure (via template) * src/annoy.cpp: Idem; plus more use of template type * inst/tests/runit.hamming.R: Unit tests for 'AnnoyHamming' * man/RcppAnnoy-package.Rd: Documentation alias for 'AnnoyHamming' * .Rbuildignore: Ignore top-level *tar.gz file * .gitignore: Idem 2017-09-25 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.10 * inst/tests/runit.index.R (test03getVectors): New test function * tests/doRUnit.R: Small edits and improvements 2017-09-23 Dirk Eddelbuettel * src/annoy.cpp (getItemsVector): Initialized vector (#24) 2017-08-31 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.9 2017-07-21 Dirk Eddelbuettel * inst/tests/runit.angular.R: Additional tests from upstream * inst/tests/runit.euclidean.R: Idem * inst/tests/runit.manhattan.R: Idem 2017-07-18 Dirk Eddelbuettel * src/annoy.cpp (RCPP_MODULE): New Manhattan distance module * inst/tests/runit.manhattan.R: New test file * R/annoy.R: Load module AnnoyManhattan * src/init.c: Register module boot function * man/RcppAnnoy-package.Rd: Aliases for AnnoyManhattan 2017-07-17 Dirk Eddelbuettel * inst/include/annoylib.h: New upstream version annoy 1.9.1 * inst/include/kissrandom.h: Idem * src/annoy.cpp (Annoy): Update call to match updated interface * inst/tests/runit.angular.R: Update three tests as the returned distance metric is now the square root of the previous value * .travis.yml (group): Added per Travis blog 2017-07-14 Dirk Eddelbuettel * .travis.yml (before_install): Use https for curl fetch 2017-07-02 Dirk Eddelbuettel * README.md: Use alternate for img.shields.io GPL-2+ badge 2017-07-01 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * inst/include/annoylib.h (>): Ensure we write as binary 2017-04-09 Dirk Eddelbuettel * src/init.c (R_init_RcppAnnoy): Call R_registerRoutines() and R_useDynamicSymbols() * NAMESPACE: Use .registration=TRUE on useDynLib 2016-10-01 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Release 0.0.8 * .travis.yml: Switch to using run.sh for Travis CI * README.md: More canonical URLs * src/annoy.cpp: Use unsigned int for vector size * src/annoy.cpp: Added index admissibility test to addItem() * inst/tests/runit.index.R (test02badvalues): New test 2016-09-29 Dirk Eddelbuettel * inst/NEWS.Rd: Added 2016-09-28 Dirk Eddelbuettel * DESCRIPTION (Version, Date): Roll minor version * src/annoy.cpp (RCPP_MODULE): New List returning functions from #12 2016-02-02 Michael Phan-Ba * src/annoy.cpp: Added explicit destructor 2015-11-17 Daniel C. Dillon * src/annoy.cpp: Templating Annoy classes * inst/include/annoylib.h: Ditto 2015-11-15 Dirk Eddelbuettel * DESCRIPTION: Version 0.0.7 * src/annoy.cpp: Changes to adapt to new Annoy interface 2015-11-14 Dirk Eddelbuettel * DESCRIPTION: Rolled Date and Version * inst/include/annoylib.h: Upgraded to new version from Annoy 1.6.2 2015-05-26 Dirk Eddelbuettel * DESCRIPTION: Rolled Date and Version * inst/include/annoylib.h: Upgraded to new version from Annoy 1.3.1 2015-05-03 Dirk Eddelbuettel * DESCRIPTION: Version 0.0.6 * inst/include/annoylib.h: Upgraded to new version from Annoy 1.2.2 based on our pull request (and fix) to support supplying an RNG * src/annoy.cpp: Supply R's own RNG instead of the default of rand * .travis.yml: No longer need BH, and install Rcpp via PPA 2015-05-02 Dirk Eddelbuettel * inst/include/annoylib.h: Upgraded to Annoy 1.1.1 (which no longer needs Boost) * inst/include/annoylib.h: Replace rand() with random() * src/annoy.cpp: Call save() + load() with use const char* arguments * DESCRIPTION: Roll Version: and Date:, remove BH dependency * cleanup: Clean a bit more in src/ 2015-01-22 Dirk Eddelbuettel * DESCRIPTION: Version 0.0.5 2015-01-21 Dirk Eddelbuettel * inst/include/annoylib.h: Synced once more with upstream; this version addresses the UBSAN runtime issue 2015-01-10 Dirk Eddelbuettel * inst/include/annoylib.h: Synced with upstream repo 2015-01-06 Dirk Eddelbuettel * DESCRIPTION: Bumped Version: and Date: * src/Makevars: One char correction requested by CRAN Maintainers 2014-12-07 Dirk Eddelbuettel * DESCRIPTION: Version 0.0.4 * DESCRIPTION: Added Depends: R (>= 3.1) as need for C++11 prohibits deployment on R-oldrel 2014-11-17 Dirk Eddelbuettel * DESCRIPTION: Version 0.0.3 * inst/include/annoylib.h: Updated to new version from Annoy 2014-11-16 Qiang Kou * inst/include/annoylib.h: Added Windows support for mmap * inst/include/mman.h: Windows implementation of mmap * src/annoy.cpp: Add two #undef needed on Windows 2014-11-16 Dirk Eddelbuettel * README.md: Updated to note CRAN package, possible Windows port 2014-11-15 Dirk Eddelbuettel * inst/include/annoylib.h: New upstream version 2014-11-14 Dirk Eddelbuettel * inst/include/annoylib.h: New upstream version with templated int * src/annoy.cpp: Updated for new Annoy library 2014-11-13 Dirk Eddelbuettel * DESCRIPTION: Version 0.0.2 * inst/include/annoylib.h: New version with verbosity toggle * src/annoy.cpp: Added setter for verbosity 2014-11-12 Dirk Eddelbuettel * .travis.yml: Enable binary index test 2014-11-11 Dirk Eddelbuettel * inst/tests/runit.index.R: New test against binary index * inst/tests/data/test.tree: Binary file used by test * inst/tests/runit.euclidean.R: Added more tests 2014-11-10 Dirk Eddelbuettel * tests/runUnitTests.R: Added RUnit unit test caller * inst/tests/runit.angular.R: First set of unit tests * inst/tests/runit.euclidean.R: Idem * DESCRIPTION: Added Suggests: RUnit * .travis.yml: Added to enable Travis CI 2014-11-09 Dirk Eddelbuettel * inst/include/annoylib.h: Updated to new version from annoy 2014-11-08 Dirk Eddelbuettel * DESCRIPTION: Initial version 0.0.1 RcppAnnoy/NAMESPACE0000644000176200001440000000022613132210432013371 0ustar liggesusersuseDynLib(RcppAnnoy, .registration=TRUE) import(methods, Rcpp) exportPattern("^[[:alpha:]]+") # export all identifiers starting with letters RcppAnnoy/inst/0000755000176200001440000000000015131164023013134 5ustar liggesusersRcppAnnoy/inst/include/0000755000176200001440000000000015131017650014562 5ustar liggesusersRcppAnnoy/inst/include/annoylib.h0000644000176200001440000013170415110617765016566 0ustar liggesusers// Copyright (c) 2013 Spotify AB // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy of // the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. #ifndef ANNOY_ANNOYLIB_H #define ANNOY_ANNOYLIB_H #include #include #ifndef _MSC_VER #include #endif #include #include #include #include #include #if defined(_MSC_VER) && _MSC_VER == 1500 typedef unsigned char uint8_t; typedef signed __int32 int32_t; typedef unsigned __int64 uint64_t; typedef signed __int64 int64_t; #else #include #endif #if defined(_MSC_VER) || defined(__MINGW32__) // a bit hacky, but override some definitions to support 64 bit #define off_t int64_t #define lseek_getsize(fd) _lseeki64(fd, 0, SEEK_END) #ifndef NOMINMAX #define NOMINMAX #endif #include "mman.h" #include #else #include #define lseek_getsize(fd) lseek(fd, 0, SEEK_END) #endif #include #include #include #include #include #include #include #if __cplusplus >= 201103L #include #endif #ifdef ANNOYLIB_MULTITHREADED_BUILD #include #include #include #endif #ifdef _MSC_VER // Needed for Visual Studio to disable runtime checks for mempcy #pragma runtime_checks("s", off) #endif // This allows others to supply their own logger / error printer without // requiring Annoy to import their headers. See RcppAnnoy for a use case. #ifndef __ERROR_PRINTER_OVERRIDE__ #define annoylib_showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); } #else #define annoylib_showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); } #endif // Portable alloc definition, cf Writing R Extensions, Section 1.6.4 #ifdef __GNUC__ // Includes GCC, clang and Intel compilers # undef alloca # define alloca(x) __builtin_alloca((x)) #elif defined(__sun) || defined(_AIX) // this is necessary (and sufficient) for Solaris 10 and AIX 6: # include #endif // We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number. // Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low. #define ANNOYLIB_V_ARRAY_SIZE 65536 #ifndef _MSC_VER #define annoylib_popcount __builtin_popcountll #else // See #293, #358 #define annoylib_popcount cole_popcount #endif #if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402 #define ANNOYLIB_USE_AVX512 #elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__) #define ANNOYLIB_USE_AVX #else #endif #if defined(ANNOYLIB_USE_AVX) || defined(ANNOYLIB_USE_AVX512) #if defined(_MSC_VER) #include #elif defined(__GNUC__) #include #endif #endif #if !defined(__MINGW32__) #define ANNOYLIB_FTRUNCATE_SIZE(x) static_cast(x) #else #define ANNOYLIB_FTRUNCATE_SIZE(x) (x) #endif namespace Annoy { inline void set_error_from_errno(char **error, const char* msg) { annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno); if (error) { *error = (char *)malloc(256); // TODO: win doesn't support snprintf snprintf(*error, 255, "%s: %s (%d)", msg, strerror(errno), errno); } } inline void set_error_from_string(char **error, const char* msg) { annoylib_showUpdate("%s\n", msg); if (error) { *error = (char *)malloc(strlen(msg) + 1); strcpy(*error, msg); } } using std::vector; using std::pair; using std::numeric_limits; using std::make_pair; inline bool remap_memory_and_truncate(void** _ptr, int _fd, size_t old_size, size_t new_size) { #ifdef __linux__ *_ptr = mremap(*_ptr, old_size, new_size, MREMAP_MAYMOVE); bool ok = ftruncate(_fd, new_size) != -1; #else munmap(*_ptr, old_size); bool ok = ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(new_size)) != -1; #ifdef MAP_POPULATE *_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0); #else *_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0); #endif #endif return ok; } namespace { template inline Node* get_node_ptr(const void* _nodes, const size_t _s, const S i) { return (Node*)((uint8_t *)_nodes + (_s * i)); } template inline T dot(const T* x, const T* y, int f) { T s = 0; for (int z = 0; z < f; z++) { s += (*x) * (*y); x++; y++; } return s; } template inline T manhattan_distance(const T* x, const T* y, int f) { T d = 0.0; for (int i = 0; i < f; i++) d += fabs(x[i] - y[i]); return d; } template inline T euclidean_distance(const T* x, const T* y, int f) { // Don't use dot-product: avoid catastrophic cancellation in #314. T d = 0.0; for (int i = 0; i < f; ++i) { const T tmp=*x - *y; d += tmp * tmp; ++x; ++y; } return d; } #ifdef ANNOYLIB_USE_AVX // Horizontal single sum of 256bit vector. inline float hsum256_ps_avx(__m256 v) { const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v)); const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); return _mm_cvtss_f32(x32); } template<> inline float dot(const float* x, const float *y, int f) { float result = 0; if (f > 7) { __m256 d = _mm256_setzero_ps(); for (; f > 7; f -= 8) { d = _mm256_add_ps(d, _mm256_mul_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y))); x += 8; y += 8; } // Sum all floats in dot register. result += hsum256_ps_avx(d); } // Don't forget the remaining values. for (; f > 0; f--) { result += *x * *y; x++; y++; } return result; } template<> inline float manhattan_distance(const float* x, const float* y, int f) { float result = 0; int i = f; if (f > 7) { __m256 manhattan = _mm256_setzero_ps(); __m256 minus_zero = _mm256_set1_ps(-0.0f); for (; i > 7; i -= 8) { const __m256 x_minus_y = _mm256_sub_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y)); const __m256 distance = _mm256_andnot_ps(minus_zero, x_minus_y); // Absolute value of x_minus_y (forces sign bit to zero) manhattan = _mm256_add_ps(manhattan, distance); x += 8; y += 8; } // Sum all floats in manhattan register. result = hsum256_ps_avx(manhattan); } // Don't forget the remaining values. for (; i > 0; i--) { result += fabsf(*x - *y); x++; y++; } return result; } template<> inline float euclidean_distance(const float* x, const float* y, int f) { float result=0; if (f > 7) { __m256 d = _mm256_setzero_ps(); for (; f > 7; f -= 8) { const __m256 diff = _mm256_sub_ps(_mm256_loadu_ps(x), _mm256_loadu_ps(y)); d = _mm256_add_ps(d, _mm256_mul_ps(diff, diff)); // no support for fmadd in AVX... x += 8; y += 8; } // Sum all floats in dot register. result = hsum256_ps_avx(d); } // Don't forget the remaining values. for (; f > 0; f--) { float tmp = *x - *y; result += tmp * tmp; x++; y++; } return result; } #endif #ifdef ANNOYLIB_USE_AVX512 template<> inline float dot(const float* x, const float *y, int f) { float result = 0; if (f > 15) { __m512 d = _mm512_setzero_ps(); for (; f > 15; f -= 16) { //AVX512F includes FMA d = _mm512_fmadd_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y), d); x += 16; y += 16; } // Sum all floats in dot register. result += _mm512_reduce_add_ps(d); } // Don't forget the remaining values. for (; f > 0; f--) { result += *x * *y; x++; y++; } return result; } template<> inline float manhattan_distance(const float* x, const float* y, int f) { float result = 0; int i = f; if (f > 15) { __m512 manhattan = _mm512_setzero_ps(); for (; i > 15; i -= 16) { const __m512 x_minus_y = _mm512_sub_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y)); manhattan = _mm512_add_ps(manhattan, _mm512_abs_ps(x_minus_y)); x += 16; y += 16; } // Sum all floats in manhattan register. result = _mm512_reduce_add_ps(manhattan); } // Don't forget the remaining values. for (; i > 0; i--) { result += fabsf(*x - *y); x++; y++; } return result; } template<> inline float euclidean_distance(const float* x, const float* y, int f) { float result=0; if (f > 15) { __m512 d = _mm512_setzero_ps(); for (; f > 15; f -= 16) { const __m512 diff = _mm512_sub_ps(_mm512_loadu_ps(x), _mm512_loadu_ps(y)); d = _mm512_fmadd_ps(diff, diff, d); x += 16; y += 16; } // Sum all floats in dot register. result = _mm512_reduce_add_ps(d); } // Don't forget the remaining values. for (; f > 0; f--) { float tmp = *x - *y; result += tmp * tmp; x++; y++; } return result; } #endif template inline T get_norm(T* v, int f) { return sqrt(dot(v, v, f)); } template inline void two_means(const vector& nodes, int f, Random& random, bool cosine, Node* p, Node* q) { /* This algorithm is a huge heuristic. Empirically it works really well, but I can't motivate it well. The basic idea is to keep two centroids and assign points to either one of them. We weight each centroid by the number of points assigned to it, so to balance it. */ static int iteration_steps = 200; size_t count = nodes.size(); size_t i = random.index(count); size_t j = random.index(count-1); j += (j >= i); // ensure that i != j Distance::template copy_node(p, nodes[i], f); Distance::template copy_node(q, nodes[j], f); if (cosine) { Distance::template normalize(p, f); Distance::template normalize(q, f); } Distance::init_node(p, f); Distance::init_node(q, f); int ic = 1, jc = 1; for (int l = 0; l < iteration_steps; l++) { size_t k = random.index(count); T di = ic * Distance::distance(p, nodes[k], f), dj = jc * Distance::distance(q, nodes[k], f); T norm = cosine ? get_norm(nodes[k]->v, f) : 1; if (!(norm > T(0))) { continue; } if (di < dj) { for (int z = 0; z < f; z++) p->v[z] = (p->v[z] * ic + nodes[k]->v[z] / norm) / (ic + 1); Distance::init_node(p, f); ic++; } else if (dj < di) { for (int z = 0; z < f; z++) q->v[z] = (q->v[z] * jc + nodes[k]->v[z] / norm) / (jc + 1); Distance::init_node(q, f); jc++; } } } } // namespace struct Base { template static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) { // Override this in specific metric structs below if you need to do any pre-processing // on the entire set of nodes passed into this index. } template static inline void zero_value(Node* dest) { // Initialize any fields that require sane defaults within this node. } template static inline void copy_node(Node* dest, const Node* source, const int f) { memcpy(dest->v, source->v, f * sizeof(T)); } template static inline void normalize(Node* node, int f) { T norm = get_norm(node->v, f); if (norm > 0) { for (int z = 0; z < f; z++) node->v[z] /= norm; } } }; struct Angular : Base { template struct Node { /* * We store a binary tree where each node has two things * - A vector associated with it * - Two children * All nodes occupy the same amount of memory * All nodes with n_descendants == 1 are leaf nodes. * A memory optimization is that for nodes with 2 <= n_descendants <= K, * we skip the vector. Instead we store a list of all descendants. K is * determined by the number of items that fits in the space of the vector. * For nodes with n_descendants == 1 the vector is a data point. * For nodes with n_descendants > K the vector is the normal of the split plane. * Note that we can't really do sizeof(node) because we cheat and allocate * more memory to be able to fit the vector outside */ S n_descendants; union { S children[2]; // Will possibly store more than 2 T norm; }; T v[ANNOYLIB_V_ARRAY_SIZE]; }; template static inline T distance(const Node* x, const Node* y, int f) { // want to calculate (a/|a| - b/|b|)^2 // = a^2 / a^2 + b^2 / b^2 - 2ab/|a||b| // = 2 - 2cos T pp = x->norm ? x->norm : dot(x->v, x->v, f); // For backwards compatibility reasons, we need to fall back and compute the norm here T qq = y->norm ? y->norm : dot(y->v, y->v, f); T pq = dot(x->v, y->v, f); T ppqq = pp * qq; if (ppqq > 0) return 2.0 - 2.0 * pq / sqrt(ppqq); else return 2.0; // cos is 0 } template static inline T margin(const Node* n, const T* y, int f) { return dot(n->v, y, f); } template static inline bool side(const Node* n, const T* y, int f, Random& random) { T dot = margin(n, y, f); if (dot != 0) return (dot > 0); else return (bool)random.flip(); } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { Node* p = (Node*)alloca(s); Node* q = (Node*)alloca(s); two_means >(nodes, f, random, true, p, q); for (int z = 0; z < f; z++) n->v[z] = p->v[z] - q->v[z]; Base::normalize >(n, f); } template static inline T normalized_distance(T distance) { // Used when requesting distances from Python layer // Turns out sometimes the squared distance is -0.0 // so we have to make sure it's a positive number. return sqrt(std::max(distance, T(0))); } template static inline T pq_distance(T distance, T margin, int child_nr) { if (child_nr == 0) margin = -margin; return std::min(distance, margin); } template static inline T pq_initial_value() { return numeric_limits::infinity(); } template static inline void init_node(Node* n, int f) { n->norm = dot(n->v, n->v, f); } static const char* name() { return "angular"; } }; struct DotProduct : Angular { template struct Node { /* * This is an extension of the Angular node with an extra attribute for the scaled norm. */ S n_descendants; S children[2]; // Will possibly store more than 2 T dot_factor; T v[ANNOYLIB_V_ARRAY_SIZE]; }; static const char* name() { return "dot"; } template static inline T distance(const Node* x, const Node* y, int f) { return -dot(x->v, y->v, f); } template static inline void zero_value(Node* dest) { dest->dot_factor = 0; } template static inline void init_node(Node* n, int f) { } template static inline void copy_node(Node* dest, const Node* source, const int f) { memcpy(dest->v, source->v, f * sizeof(T)); dest->dot_factor = source->dot_factor; } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { Node* p = (Node*)alloca(s); Node* q = (Node*)alloca(s); DotProduct::zero_value(p); DotProduct::zero_value(q); two_means >(nodes, f, random, true, p, q); for (int z = 0; z < f; z++) n->v[z] = p->v[z] - q->v[z]; n->dot_factor = p->dot_factor - q->dot_factor; DotProduct::normalize >(n, f); } template static inline void normalize(Node* node, int f) { T norm = sqrt(dot(node->v, node->v, f) + pow(node->dot_factor, 2)); if (norm > 0) { for (int z = 0; z < f; z++) node->v[z] /= norm; node->dot_factor /= norm; } } template static inline T margin(const Node* n, const T* y, int f) { return dot(n->v, y, f) + (n->dot_factor * n->dot_factor); } template static inline bool side(const Node* n, const T* y, int f, Random& random) { T dot = margin(n, y, f); if (dot != 0) return (dot > 0); else return (bool)random.flip(); } template static inline T normalized_distance(T distance) { return -distance; } template static inline void preprocess(void* nodes, size_t _s, const S node_count, const int f) { // This uses a method from Microsoft Research for transforming inner product spaces to cosine/angular-compatible spaces. // (Bachrach et al., 2014, see https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf) // Step one: compute the norm of each vector and store that in its extra dimension (f-1) for (S i = 0; i < node_count; i++) { Node* node = get_node_ptr(nodes, _s, i); T d = dot(node->v, node->v, f); T norm = d < 0 ? 0 : sqrt(d); node->dot_factor = norm; } // Step two: find the maximum norm T max_norm = 0; for (S i = 0; i < node_count; i++) { Node* node = get_node_ptr(nodes, _s, i); if (node->dot_factor > max_norm) { max_norm = node->dot_factor; } } // Step three: set each vector's extra dimension to sqrt(max_norm^2 - norm^2) for (S i = 0; i < node_count; i++) { Node* node = get_node_ptr(nodes, _s, i); T node_norm = node->dot_factor; T squared_norm_diff = pow(max_norm, static_cast(2.0)) - pow(node_norm, static_cast(2.0)); T dot_factor = squared_norm_diff < 0 ? 0 : sqrt(squared_norm_diff); node->dot_factor = dot_factor; } } }; struct Hamming : Base { template struct Node { S n_descendants; S children[2]; T v[ANNOYLIB_V_ARRAY_SIZE]; }; static const size_t max_iterations = 20; template static inline T pq_distance(T distance, T margin, int child_nr) { return distance - (margin != (unsigned int) child_nr); } template static inline T pq_initial_value() { return numeric_limits::max(); } template static inline int cole_popcount(T v) { // Note: Only used with MSVC 9, which lacks intrinsics and fails to // calculate std::bitset::count for v > 32bit. Uses the generalized // approach by Eric Cole. // See https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 v = v - ((v >> 1) & (T)~(T)0/3); v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3); v = (v + (v >> 4)) & (T)~(T)0/255*15; return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8; } template static inline T distance(const Node* x, const Node* y, int f) { size_t dist = 0; for (int i = 0; i < f; i++) { dist += annoylib_popcount(x->v[i] ^ y->v[i]); } return dist; } template static inline bool margin(const Node* n, const T* y, int f) { static const size_t n_bits = sizeof(T) * 8; T chunk = n->v[0] / n_bits; return (y[chunk] & (static_cast(1) << (n_bits - 1 - (n->v[0] % n_bits)))) != 0; } template static inline bool side(const Node* n, const T* y, int f, Random& random) { return margin(n, y, f); } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { size_t cur_size = 0; size_t i = 0; int dim = f * 8 * sizeof(T); for (; i < max_iterations; i++) { // choose random position to split at n->v[0] = random.index(dim); cur_size = 0; for (typename vector*>::const_iterator it = nodes.begin(); it != nodes.end(); ++it) { if (margin(n, (*it)->v, f)) { cur_size++; } } if (cur_size > 0 && cur_size < nodes.size()) { break; } } // brute-force search for splitting coordinate if (i == max_iterations) { int j = 0; for (; j < dim; j++) { n->v[0] = j; cur_size = 0; for (typename vector*>::const_iterator it = nodes.begin(); it != nodes.end(); ++it) { if (margin(n, (*it)->v, f)) { cur_size++; } } if (cur_size > 0 && cur_size < nodes.size()) { break; } } } } template static inline T normalized_distance(T distance) { return distance; } template static inline void init_node(Node* n, int f) { } static const char* name() { return "hamming"; } }; struct Minkowski : Base { template struct Node { S n_descendants; T a; // need an extra constant term to determine the offset of the plane S children[2]; T v[ANNOYLIB_V_ARRAY_SIZE]; }; template static inline T margin(const Node* n, const T* y, int f) { return n->a + dot(n->v, y, f); } template static inline bool side(const Node* n, const T* y, int f, Random& random) { T dot = margin(n, y, f); if (dot != 0) return (dot > 0); else return (bool)random.flip(); } template static inline T pq_distance(T distance, T margin, int child_nr) { if (child_nr == 0) margin = -margin; return std::min(distance, margin); } template static inline T pq_initial_value() { return numeric_limits::infinity(); } }; struct Euclidean : Minkowski { template static inline T distance(const Node* x, const Node* y, int f) { return euclidean_distance(x->v, y->v, f); } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { Node* p = (Node*)alloca(s); Node* q = (Node*)alloca(s); two_means >(nodes, f, random, false, p, q); for (int z = 0; z < f; z++) n->v[z] = p->v[z] - q->v[z]; Base::normalize >(n, f); n->a = 0.0; for (int z = 0; z < f; z++) n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2; } template static inline T normalized_distance(T distance) { return sqrt(std::max(distance, T(0))); } template static inline void init_node(Node* n, int f) { } static const char* name() { return "euclidean"; } }; struct Manhattan : Minkowski { template static inline T distance(const Node* x, const Node* y, int f) { return manhattan_distance(x->v, y->v, f); } template static inline void create_split(const vector*>& nodes, int f, size_t s, Random& random, Node* n) { Node* p = (Node*)alloca(s); Node* q = (Node*)alloca(s); two_means >(nodes, f, random, false, p, q); for (int z = 0; z < f; z++) n->v[z] = p->v[z] - q->v[z]; Base::normalize >(n, f); n->a = 0.0; for (int z = 0; z < f; z++) n->a += -n->v[z] * (p->v[z] + q->v[z]) / 2; } template static inline T normalized_distance(T distance) { return std::max(distance, T(0)); } template static inline void init_node(Node* n, int f) { } static const char* name() { return "manhattan"; } }; template class AnnoyIndexInterface { public: // Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL virtual ~AnnoyIndexInterface() {}; virtual bool add_item(S item, const T* w, char** error=NULL) = 0; virtual bool build(int q, int n_threads=-1, char** error=NULL) = 0; virtual bool unbuild(char** error=NULL) = 0; virtual bool save(const char* filename, bool prefault=false, char** error=NULL) = 0; virtual void unload() = 0; virtual bool load(const char* filename, bool prefault=false, char** error=NULL) = 0; virtual T get_distance(S i, S j) const = 0; virtual void get_nns_by_item(S item, size_t n, int search_k, vector* result, vector* distances) const = 0; virtual void get_nns_by_vector(const T* w, size_t n, int search_k, vector* result, vector* distances) const = 0; virtual S get_n_items() const = 0; virtual S get_n_trees() const = 0; virtual void verbose(bool v) = 0; virtual void get_item(S item, T* v) const = 0; virtual void set_seed(R q) = 0; virtual bool on_disk_build(const char* filename, char** error=NULL) = 0; }; template class AnnoyIndex : public AnnoyIndexInterface= 201103L typename std::remove_const::type #else typename Random::seed_type #endif > { /* * We use random projection to build a forest of binary trees of all items. * Basically just split the hyperspace into two sides by a hyperplane, * then recursively split each of those subtrees etc. * We create a tree like this q times. The default q is determined automatically * in such a way that we at most use 2x as much memory as the vectors take. */ public: typedef Distance D; typedef typename D::template Node Node; #if __cplusplus >= 201103L typedef typename std::remove_const::type R; #else typedef typename Random::seed_type R; #endif protected: const int _f; size_t _s; S _n_items; void* _nodes; // Could either be mmapped, or point to a memory buffer that we reallocate S _n_nodes; S _nodes_size; vector _roots; S _K; R _seed; bool _loaded; bool _verbose; int _fd; bool _on_disk; bool _built; public: AnnoyIndex(int f) : _f(f), _seed(Random::default_seed) { _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node _verbose = false; _built = false; _K = (S) (((size_t) (_s - offsetof(Node, children))) / sizeof(S)); // Max number of descendants to fit into node reinitialize(); // Reset everything } ~AnnoyIndex() { unload(); } int get_f() const { return _f; } bool add_item(S item, const T* w, char** error=NULL) { return add_item_impl(item, w, error); } template bool add_item_impl(S item, const W& w, char** error=NULL) { if (_loaded) { set_error_from_string(error, "You can't add an item to a loaded index"); return false; } _allocate_size(item + 1); Node* n = _get(item); D::zero_value(n); n->children[0] = 0; n->children[1] = 0; n->n_descendants = 1; for (int z = 0; z < _f; z++) n->v[z] = w[z]; D::init_node(n, _f); if (item >= _n_items) _n_items = item + 1; return true; } bool on_disk_build(const char* file, char** error=NULL) { _on_disk = true; #ifndef _MSC_VER _fd = open(file, O_RDWR | O_CREAT | O_TRUNC, (int) 0600); #else _fd = _open(file, _O_RDWR | _O_CREAT | _O_TRUNC, (int) 0600); #endif if (_fd == -1) { set_error_from_errno(error, "Unable to open"); _fd = 0; return false; } _nodes_size = 1; if (ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(_s) * ANNOYLIB_FTRUNCATE_SIZE(_nodes_size)) == -1) { set_error_from_errno(error, "Unable to truncate"); return false; } #ifdef MAP_POPULATE _nodes = (Node*) mmap(0, _s * _nodes_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0); #else _nodes = (Node*) mmap(0, _s * _nodes_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0); #endif return true; } bool build(int q, int n_threads=-1, char** error=NULL) { if (_loaded) { set_error_from_string(error, "You can't build a loaded index"); return false; } if (_built) { set_error_from_string(error, "You can't build a built index"); return false; } D::template preprocess(_nodes, _s, _n_items, _f); _n_nodes = _n_items; ThreadedBuildPolicy::template build(this, q, n_threads); // Also, copy the roots into the last segment of the array // This way we can load them faster without reading the whole file _allocate_size(_n_nodes + (S)_roots.size()); for (size_t i = 0; i < _roots.size(); i++) memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s); _n_nodes += _roots.size(); if (_verbose) annoylib_showUpdate("has %d nodes\n", _n_nodes); if (_on_disk) { if (!remap_memory_and_truncate(&_nodes, _fd, static_cast(_s) * static_cast(_nodes_size), static_cast(_s) * static_cast(_n_nodes))) { // TODO: this probably creates an index in a corrupt state... not sure what to do set_error_from_errno(error, "Unable to truncate"); return false; } _nodes_size = _n_nodes; } _built = true; return true; } bool unbuild(char** error=NULL) { if (_loaded) { set_error_from_string(error, "You can't unbuild a loaded index"); return false; } _roots.clear(); _n_nodes = _n_items; _built = false; return true; } bool save(const char* filename, bool prefault=false, char** error=NULL) { if (!_built) { set_error_from_string(error, "You can't save an index that hasn't been built"); return false; } if (_on_disk) { return true; } else { // Delete file if it already exists (See issue #335) #ifndef _MSC_VER unlink(filename); #else _unlink(filename); #endif FILE *f = fopen(filename, "wb"); if (f == NULL) { set_error_from_errno(error, "Unable to open"); return false; } if (fwrite(_nodes, _s, _n_nodes, f) != (size_t) _n_nodes) { set_error_from_errno(error, "Unable to write"); return false; } if (fclose(f) == EOF) { set_error_from_errno(error, "Unable to close"); return false; } unload(); return load(filename, prefault, error); } } void reinitialize() { _fd = 0; _nodes = NULL; _loaded = false; _n_items = 0; _n_nodes = 0; _nodes_size = 0; _on_disk = false; _seed = Random::default_seed; _roots.clear(); } void unload() { if (_on_disk && _fd) { #ifndef _MSC_VER close(_fd); #else _close(_fd); #endif munmap(_nodes, _s * _nodes_size); } else { if (_fd) { // we have mmapped data #ifndef _MSC_VER close(_fd); #else _close(_fd); #endif munmap(_nodes, _n_nodes * _s); } else if (_nodes) { // We have heap allocated data free(_nodes); } } reinitialize(); if (_verbose) annoylib_showUpdate("unloaded\n"); } bool load(const char* filename, bool prefault=false, char** error=NULL) { #ifndef _MSC_VER _fd = open(filename, O_RDONLY, (int)0400); #else _fd = _open(filename, _O_RDONLY, (int)0400); #endif if (_fd == -1) { set_error_from_errno(error, "Unable to open"); _fd = 0; return false; } off_t size = lseek_getsize(_fd); if (size == -1) { set_error_from_errno(error, "Unable to get size"); return false; } else if (size == 0) { set_error_from_errno(error, "Size of file is zero"); return false; } else if (size % _s) { // Something is fishy with this index! set_error_from_errno(error, "Index size is not a multiple of vector size. Ensure you are opening using the same metric you used to create the index."); return false; } int flags = MAP_SHARED; if (prefault) { #ifdef MAP_POPULATE flags |= MAP_POPULATE; #else annoylib_showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform"); #endif } _nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0); _n_nodes = (S)(size / _s); // Find the roots by scanning the end of the file and taking the nodes with most descendants _roots.clear(); S m = -1; for (S i = _n_nodes - 1; i >= 0; i--) { S k = _get(i)->n_descendants; if (m == -1 || k == m) { _roots.push_back(i); m = k; } else { break; } } // hacky fix: since the last root precedes the copy of all roots, delete it if (_roots.size() > 1 && _get(_roots.front())->children[0] == _get(_roots.back())->children[0]) _roots.pop_back(); _loaded = true; _built = true; _n_items = m; if (_verbose) annoylib_showUpdate("found %zu roots with degree %d\n", _roots.size(), m); return true; } T get_distance(S i, S j) const { return D::normalized_distance(D::distance(_get(i), _get(j), _f)); } void get_nns_by_item(S item, size_t n, int search_k, vector* result, vector* distances) const { // TODO: handle OOB const Node* m = _get(item); _get_all_nns(m->v, n, search_k, result, distances); } void get_nns_by_vector(const T* w, size_t n, int search_k, vector* result, vector* distances) const { _get_all_nns(w, n, search_k, result, distances); } S get_n_items() const { return _n_items; } S get_n_trees() const { return (S)_roots.size(); } void verbose(bool v) { _verbose = v; } void get_item(S item, T* v) const { // TODO: handle OOB Node* m = _get(item); memcpy(v, m->v, (_f) * sizeof(T)); } void set_seed(R seed) { _seed = seed; } void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) { // Each thread needs its own seed, otherwise each thread would be building the same tree(s) Random _random(_seed + thread_idx); vector thread_roots; while (1) { if (q == -1) { threaded_build_policy.lock_n_nodes(); if (_n_nodes >= 2 * _n_items) { threaded_build_policy.unlock_n_nodes(); break; } threaded_build_policy.unlock_n_nodes(); } else { if (thread_roots.size() >= (size_t)q) { break; } } if (_verbose) annoylib_showUpdate("pass %zd...\n", thread_roots.size()); vector indices; threaded_build_policy.lock_shared_nodes(); for (S i = 0; i < _n_items; i++) { if (_get(i)->n_descendants >= 1) { // Issue #223 indices.push_back(i); } } threaded_build_policy.unlock_shared_nodes(); thread_roots.push_back(_make_tree(indices, true, _random, threaded_build_policy)); } threaded_build_policy.lock_roots(); _roots.insert(_roots.end(), thread_roots.begin(), thread_roots.end()); threaded_build_policy.unlock_roots(); } protected: void _reallocate_nodes(S n) { const double reallocation_factor = 1.3; S new_nodes_size = std::max(n, (S) ((_nodes_size + 1) * reallocation_factor)); void *old = _nodes; if (_on_disk) { if (!remap_memory_and_truncate(&_nodes, _fd, static_cast(_s) * static_cast(_nodes_size), static_cast(_s) * static_cast(new_nodes_size)) && _verbose) annoylib_showUpdate("File truncation error\n"); } else { _nodes = realloc(_nodes, _s * new_nodes_size); memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s); } _nodes_size = new_nodes_size; if (_verbose) annoylib_showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes); } void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) { if (n > _nodes_size) { threaded_build_policy.lock_nodes(); _reallocate_nodes(n); threaded_build_policy.unlock_nodes(); } } void _allocate_size(S n) { if (n > _nodes_size) { _reallocate_nodes(n); } } Node* _get(const S i) const { return get_node_ptr(_nodes, _s, i); } double _split_imbalance(const vector& left_indices, const vector& right_indices) { double ls = (float)left_indices.size(); double rs = (float)right_indices.size(); float f = ls / (ls + rs + 1e-9); // Avoid 0/0 return std::max(f, 1-f); } S _make_tree(const vector& indices, bool is_root, Random& _random, ThreadedBuildPolicy& threaded_build_policy) { // The basic rule is that if we have <= _K items, then it's a leaf node, otherwise it's a split node. // There's some regrettable complications caused by the problem that root nodes have to be "special": // 1. We identify root nodes by the arguable logic that _n_items == n->n_descendants, regardless of how many descendants they actually have // 2. Root nodes with only 1 child need to be a "dummy" parent // 3. Due to the _n_items "hack", we need to be careful with the cases where _n_items <= _K or _n_items > _K if (indices.size() == 1 && !is_root) return indices[0]; if (indices.size() <= (size_t)_K && (!is_root || (size_t)_n_items <= (size_t)_K || indices.size() == 1)) { threaded_build_policy.lock_n_nodes(); _allocate_size(_n_nodes + 1, threaded_build_policy); S item = _n_nodes++; threaded_build_policy.unlock_n_nodes(); threaded_build_policy.lock_shared_nodes(); Node* m = _get(item); m->n_descendants = is_root ? _n_items : (S)indices.size(); // Using std::copy instead of a loop seems to resolve issues #3 and #13, // probably because gcc 4.8 goes overboard with optimizations. // Using memcpy instead of std::copy for MSVC compatibility. #235 // Only copy when necessary to avoid crash in MSVC 9. #293 if (!indices.empty()) memcpy(m->children, &indices[0], indices.size() * sizeof(S)); threaded_build_policy.unlock_shared_nodes(); return item; } threaded_build_policy.lock_shared_nodes(); vector children; for (size_t i = 0; i < indices.size(); i++) { S j = indices[i]; Node* n = _get(j); if (n) children.push_back(n); } vector children_indices[2]; Node* m = (Node*)alloca(_s); for (int attempt = 0; attempt < 3; attempt++) { children_indices[0].clear(); children_indices[1].clear(); D::create_split(children, _f, _s, _random, m); for (size_t i = 0; i < indices.size(); i++) { S j = indices[i]; Node* n = _get(j); if (n) { bool side = D::side(m, n->v, _f, _random); children_indices[side].push_back(j); } else { annoylib_showUpdate("No node for index %d?\n", j); } } if (_split_imbalance(children_indices[0], children_indices[1]) < 0.95) break; } threaded_build_policy.unlock_shared_nodes(); // If we didn't find a hyperplane, just randomize sides as a last option while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) { if (_verbose) annoylib_showUpdate("\tNo hyperplane found (left has %zu children, right has %zu children)\n", children_indices[0].size(), children_indices[1].size()); children_indices[0].clear(); children_indices[1].clear(); // Set the vector to 0.0 for (int z = 0; z < _f; z++) m->v[z] = 0; for (size_t i = 0; i < indices.size(); i++) { S j = indices[i]; // Just randomize... children_indices[_random.flip()].push_back(j); } } int flip = (children_indices[0].size() > children_indices[1].size()); m->n_descendants = is_root ? _n_items : (S)indices.size(); for (int side = 0; side < 2; side++) { // run _make_tree for the smallest child first (for cache locality) m->children[side^flip] = _make_tree(children_indices[side^flip], false, _random, threaded_build_policy); } threaded_build_policy.lock_n_nodes(); _allocate_size(_n_nodes + 1, threaded_build_policy); S item = _n_nodes++; threaded_build_policy.unlock_n_nodes(); threaded_build_policy.lock_shared_nodes(); memcpy(_get(item), m, _s); threaded_build_policy.unlock_shared_nodes(); return item; } void _get_all_nns(const T* v, size_t n, int search_k, vector* result, vector* distances) const { Node* v_node = (Node *)alloca(_s); D::template zero_value(v_node); memcpy(v_node->v, v, sizeof(T) * _f); D::init_node(v_node, _f); std::priority_queue > q; if (search_k == -1) { search_k = n * _roots.size(); } for (size_t i = 0; i < _roots.size(); i++) { q.push(make_pair(Distance::template pq_initial_value(), _roots[i])); } std::vector nns; while (nns.size() < (size_t)search_k && !q.empty()) { const pair& top = q.top(); T d = top.first; S i = top.second; Node* nd = _get(i); q.pop(); if (nd->n_descendants == 1 && i < _n_items) { nns.push_back(i); } else if (nd->n_descendants <= _K) { const S* dst = nd->children; nns.insert(nns.end(), dst, &dst[nd->n_descendants]); } else { T margin = D::margin(nd, v, _f); q.push(make_pair(D::pq_distance(d, margin, 1), static_cast(nd->children[1]))); q.push(make_pair(D::pq_distance(d, margin, 0), static_cast(nd->children[0]))); } } // Get distances for all items // To avoid calculating distance multiple times for any items, sort by id std::sort(nns.begin(), nns.end()); vector > nns_dist; S last = -1; for (size_t i = 0; i < nns.size(); i++) { S j = nns[i]; if (j == last) continue; last = j; if (_get(j)->n_descendants == 1) // This is only to guard a really obscure case, #284 nns_dist.push_back(make_pair(D::distance(v_node, _get(j), _f), j)); } size_t m = nns_dist.size(); size_t p = n < m ? n : m; // Return this many items std::partial_sort(nns_dist.begin(), nns_dist.begin() + p, nns_dist.end()); for (size_t i = 0; i < p; i++) { if (distances) distances->push_back(D::normalized_distance(nns_dist[i].first)); result->push_back(nns_dist[i].second); } } }; class AnnoyIndexSingleThreadedBuildPolicy { public: template static void build(AnnoyIndex* annoy, int q, int n_threads) { AnnoyIndexSingleThreadedBuildPolicy threaded_build_policy; annoy->thread_build(q, 0, threaded_build_policy); } void lock_n_nodes() {} void unlock_n_nodes() {} void lock_nodes() {} void unlock_nodes() {} void lock_shared_nodes() {} void unlock_shared_nodes() {} void lock_roots() {} void unlock_roots() {} }; #ifdef ANNOYLIB_MULTITHREADED_BUILD class AnnoyIndexMultiThreadedBuildPolicy { private: std::shared_timed_mutex nodes_mutex; std::mutex n_nodes_mutex; std::mutex roots_mutex; public: template static void build(AnnoyIndex* annoy, int q, int n_threads) { AnnoyIndexMultiThreadedBuildPolicy threaded_build_policy; if (n_threads == -1) { // If the hardware_concurrency() value is not well defined or not computable, it returns 0. // We guard against this by using at least 1 thread. n_threads = std::max(1, (int)std::thread::hardware_concurrency()); } vector threads(n_threads); for (int thread_idx = 0; thread_idx < n_threads; thread_idx++) { int trees_per_thread = q == -1 ? -1 : (int)floor((q + thread_idx) / n_threads); threads[thread_idx] = std::thread( &AnnoyIndex::thread_build, annoy, trees_per_thread, thread_idx, std::ref(threaded_build_policy) ); } for (auto& thread : threads) { thread.join(); } } void lock_n_nodes() { n_nodes_mutex.lock(); } void unlock_n_nodes() { n_nodes_mutex.unlock(); } void lock_nodes() { nodes_mutex.lock(); } void unlock_nodes() { nodes_mutex.unlock(); } void lock_shared_nodes() { nodes_mutex.lock_shared(); } void unlock_shared_nodes() { nodes_mutex.unlock_shared(); } void lock_roots() { roots_mutex.lock(); } void unlock_roots() { roots_mutex.unlock(); } }; #endif } #endif // vim: tabstop=2 shiftwidth=2 RcppAnnoy/inst/include/RcppAnnoy.h0000644000176200001440000000373515131017650016654 0ustar liggesusers// Emacs make this -*- mode: C++; -*- // RcppAnnoy // // R bindings for the 'Annoy' Approximate Nearest Neighbor Library #ifndef RCPPANNOY_H #define RCPPANNOY_H // -- include Rcpp headers #define R_NO_REMAP #define STRICT_R_HEADERS #include // -- define a few things needed for Annoy #if defined(__MINGW32__) #undef Realloc #undef Free #endif // define a typesafe function to wrap annoylib.h's debug output #include inline void rcpp_annoy_printer(const char* fmt, ...) { char buffer[1024]; va_list args; va_start(args, fmt); vsnprintf(buffer, sizeof(buffer), fmt, args); va_end(args); REprintf("%s", buffer); } #define __ERROR_PRINTER_OVERRIDE__ rcpp_annoy_printer #include "annoylib.h" #include "kissrandom.h" // -- some version housekeeping not provided by Annoy #define ANNOY_VERSION_MAJOR 1 #define ANNOY_VERSION_MINOR 17 #define ANNOY_VERSION_PATCH 3 // create a single 'comparable' number out of version, minor and patch #define Annoy_Version(v,m,p) (((v) * 65536) + ((m) * 256) + (p)) // current build is encoded in ANNOY_VERSION #define ANNOY_VERSION Annoy_Version(ANNOY_VERSION_MAJOR,ANNOY_VERSION_MINOR,ANNOY_VERSION_PATCH) // -- same for RcppAnnoy #define RCPPANNOY_VERSION_MAJOR 0 #define RCPPANNOY_VERSION_MINOR 0 #define RCPPANNOY_VERSION_PATCH 23 #define RCPPANNOY_VERSION_MICRO 0 #define RcppAnnoyVersion(maj, min, rev, dev) (((maj)*1000000) + ((min)*10000) + ((rev)*100) + (dev)) #define RCPPANNOY_VERSION RcppAnnoyVersion(RCPPANNOY_VERSION_MAJOR,RCPPANNOY_VERSION_MINOR,RCPPANNOY_VERSION_PATCH,RCPP_ANNOY_VERSION_MICRO) // -- convenience typedefs // prefixed with Rcpp to ensure we are most unlikely to clash with upstream defines // usage of these is entire optional #ifdef ANNOYLIB_MULTITHREADED_BUILD typedef Annoy::AnnoyIndexMultiThreadedBuildPolicy RcppAnnoyIndexThreadPolicy; #else typedef Annoy::AnnoyIndexSingleThreadedBuildPolicy RcppAnnoyIndexThreadPolicy; #endif #endif RcppAnnoy/inst/include/kissrandom.h0000644000176200001440000000504713763170231017117 0ustar liggesusers#ifndef KISSRANDOM_H #define KISSRANDOM_H #if defined(_MSC_VER) && _MSC_VER == 1500 typedef unsigned __int32 uint32_t; typedef unsigned __int64 uint64_t; #else #include #endif // KISS = "keep it simple, stupid", but high quality random number generator // http://www0.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf -> "Use a good RNG and build it into your code" // http://mathforum.org/kb/message.jspa?messageID=6627731 // https://de.wikipedia.org/wiki/KISS_(Zufallszahlengenerator) // 32 bit KISS struct Kiss32Random { uint32_t x; uint32_t y; uint32_t z; uint32_t c; static const uint32_t default_seed = 123456789; #if __cplusplus < 201103L typedef uint32_t seed_type; #endif // seed must be != 0 Kiss32Random(uint32_t seed = default_seed) { x = seed; y = 362436000; z = 521288629; c = 7654321; } uint32_t kiss() { // Linear congruence generator x = 69069 * x + 12345; // Xor shift y ^= y << 13; y ^= y >> 17; y ^= y << 5; // Multiply-with-carry uint64_t t = 698769069ULL * z + c; c = t >> 32; z = (uint32_t) t; return x + y + z; } inline int flip() { // Draw random 0 or 1 return kiss() & 1; } inline size_t index(size_t n) { // Draw random integer between 0 and n-1 where n is at most the number of data points you have return kiss() % n; } inline void set_seed(uint32_t seed) { x = seed; } }; // 64 bit KISS. Use this if you have more than about 2^24 data points ("big data" ;) ) struct Kiss64Random { uint64_t x; uint64_t y; uint64_t z; uint64_t c; static const uint64_t default_seed = 1234567890987654321ULL; #if __cplusplus < 201103L typedef uint64_t seed_type; #endif // seed must be != 0 Kiss64Random(uint64_t seed = default_seed) { x = seed; y = 362436362436362436ULL; z = 1066149217761810ULL; c = 123456123456123456ULL; } uint64_t kiss() { // Linear congruence generator z = 6906969069LL*z+1234567; // Xor shift y ^= (y<<13); y ^= (y>>17); y ^= (y<<43); // Multiply-with-carry (uint128_t t = (2^58 + 1) * x + c; c = t >> 64; x = (uint64_t) t) uint64_t t = (x<<58)+c; c = (x>>6); x += t; c += (x #include #include #include #define PROT_NONE 0 #define PROT_READ 1 #define PROT_WRITE 2 #define PROT_EXEC 4 #define MAP_FILE 0 #define MAP_SHARED 1 #define MAP_PRIVATE 2 #define MAP_TYPE 0xf #define MAP_FIXED 0x10 #define MAP_ANONYMOUS 0x20 #define MAP_ANON MAP_ANONYMOUS #define MAP_FAILED ((void *)-1) /* Flags for msync. */ #define MS_ASYNC 1 #define MS_SYNC 2 #define MS_INVALIDATE 4 #ifndef FILE_MAP_EXECUTE #define FILE_MAP_EXECUTE 0x0020 #endif static int __map_mman_error(const DWORD err, const int deferr) { if (err == 0) return 0; //TODO: implement return err; } static DWORD __map_mmap_prot_page(const int prot) { DWORD protect = 0; if (prot == PROT_NONE) return protect; if ((prot & PROT_EXEC) != 0) { protect = ((prot & PROT_WRITE) != 0) ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ; } else { protect = ((prot & PROT_WRITE) != 0) ? PAGE_READWRITE : PAGE_READONLY; } return protect; } static DWORD __map_mmap_prot_file(const int prot) { DWORD desiredAccess = 0; if (prot == PROT_NONE) return desiredAccess; if ((prot & PROT_READ) != 0) desiredAccess |= FILE_MAP_READ; if ((prot & PROT_WRITE) != 0) desiredAccess |= FILE_MAP_WRITE; if ((prot & PROT_EXEC) != 0) desiredAccess |= FILE_MAP_EXECUTE; return desiredAccess; } inline void* mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off) { HANDLE fm, h; void * map = MAP_FAILED; #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable: 4293) #endif const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD)) ? (DWORD)off : (DWORD)(off & 0xFFFFFFFFL); const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD)) ? (DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFFL); const DWORD protect = __map_mmap_prot_page(prot); const DWORD desiredAccess = __map_mmap_prot_file(prot); const off_t maxSize = off + (off_t)len; const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD)) ? (DWORD)maxSize : (DWORD)(maxSize & 0xFFFFFFFFL); const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD)) ? (DWORD)0 : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL); #ifdef _MSC_VER #pragma warning(pop) #endif errno = 0; if (len == 0 /* Unsupported flag combinations */ || (flags & MAP_FIXED) != 0 /* Usupported protection combinations */ || prot == PROT_EXEC) { errno = EINVAL; return MAP_FAILED; } h = ((flags & MAP_ANONYMOUS) == 0) ? (HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE; if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE) { errno = EBADF; return MAP_FAILED; } fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL); if (fm == NULL) { errno = __map_mman_error(GetLastError(), EPERM); return MAP_FAILED; } map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len); CloseHandle(fm); if (map == NULL) { errno = __map_mman_error(GetLastError(), EPERM); return MAP_FAILED; } return map; } inline int munmap(void *addr, size_t len) { if (UnmapViewOfFile(addr)) return 0; errno = __map_mman_error(GetLastError(), EPERM); return -1; } inline int mprotect(void *addr, size_t len, int prot) { DWORD newProtect = __map_mmap_prot_page(prot); DWORD oldProtect = 0; if (VirtualProtect(addr, len, newProtect, &oldProtect)) return 0; errno = __map_mman_error(GetLastError(), EPERM); return -1; } inline int msync(void *addr, size_t len, int flags) { if (FlushViewOfFile(addr, len)) return 0; errno = __map_mman_error(GetLastError(), EPERM); return -1; } inline int mlock(const void *addr, size_t len) { if (VirtualLock((LPVOID)addr, len)) return 0; errno = __map_mman_error(GetLastError(), EPERM); return -1; } inline int munlock(const void *addr, size_t len) { if (VirtualUnlock((LPVOID)addr, len)) return 0; errno = __map_mman_error(GetLastError(), EPERM); return -1; } #if !defined(__MINGW32__) inline int ftruncate(const int fd, const int64_t size) { if (fd < 0) { errno = EBADF; return -1; } HANDLE h = reinterpret_cast(_get_osfhandle(fd)); LARGE_INTEGER li_start, li_size; li_start.QuadPart = static_cast(0); li_size.QuadPart = size; if (SetFilePointerEx(h, li_start, NULL, FILE_CURRENT) == ~0 || SetFilePointerEx(h, li_size, NULL, FILE_BEGIN) == ~0 || !SetEndOfFile(h)) { unsigned long error = GetLastError(); fprintf(stderr, "I/O error while truncating: %lu\n", error); switch (error) { case ERROR_INVALID_HANDLE: errno = EBADF; break; default: errno = EIO; break; } return -1; } return 0; } #endif #endif RcppAnnoy/inst/rmd/0000755000176200001440000000000015130530730013717 5ustar liggesusersRcppAnnoy/inst/rmd/UsingAnnoyInCpp.Rmd0000644000176200001440000002027515130530624017417 0ustar liggesusers--- title: Using Annoy in package C++ code author: - name: Aaron Lun affiliation: a address: - code: a address: \url{https://github.com/LTLA} lead_author_surname: Lun doi: "https://cran.r-project.org/package=RcppAnnoy" keywords: - Rcpp - Annoy - Approximate Nearest Neighbours footer_contents: "RcppAnnoy Vignette" date: \today output: pinp::pinp: collapse: true keep_tex: false bibliography: rcppannoy.bib skip_final_break: true abstract: | This note shows how to use the Annoy library for _Approximate Nearest Neighbours (Oh Yeah)_ from C++ code using the headers provided by the RcppAnnoy package. header-includes: > \newcommand{\proglang}[1]{\textsf{#1}} \newcommand{\pkg}[1]{\textbf{#1}} vignette: > %\VignetteIndexEntry{Using Annoy in C++} %\VignetteEngine{knitr::rmarkdown} %\VignetteKeywords{Rcpp, Annoy, R, Cpp, Approximate Nearest Neighbours} %\VignettePackage{RcppAnnoy} %\usepackage[utf8]{inputenc} --- ```{r, echo=FALSE, results="hide"} knitr::opts_chunk$set(error=FALSE, warning=FALSE, message=FALSE, eval=FALSE) ``` # Setting up your package The [\pkg{Annoy}](https://github.com/spotify/annoy) \proglang{C++} library \citep{Github:annoy} implements a quick and simple method for _approximate nearest neighbor (oh yeah)_ searching. The \pkg{RcppAnnoy} package \citep{CRAN:RcppAnnoy} provides a centralized resource for developers to use this code in their own \proglang{R} packages by relying on \pkg{Rcpp} \citep{TAS:Rcpp,CRAN:Rcpp}. To use \pkg{Annoy} in \proglang{C++} code, simply put in your `DESCRIPTION` the line ``` LinkingTo: RcppAnnoy ``` and the header files will be available for inclusion into your package's source files. Note that \pkg{Annoy} is a header-only library so no additional commands are necessary for the linker. # Including the header files Obviously, the header files need to be `include`d in any \proglang{C++} source file that uses \pkg{Annoy}. A few macros also need to be added to handle Windows-specific behaviour and to ensure that error messages are printed through R. Version number comparison macros help in conditioning changes on a particular version. Since release 0.0.17 all this is now expressed centrally in a header in the package so users can just use this one-liner: ```{Rcpp, eval=FALSE} #include "RcppAnnoy.h" ``` # Defining the search type The `AnnoyIndex` template class can accommodate different data types, distance metrics, random number generators, and threading policies (where the latter are a choice between sequential or multithreaded). Here, we will consider the most common application of a nearest-neighbor search on floating-point data with Euclidean distance. We `typedef` the type and realized template for convenience: ```{Rcpp, eval=FALSE} typedef float ANNOYTYPE; typedef Annoy::AnnoyIndex MyAnnoyIndex; ``` Note that we use `float` by default, rather than the more conventional `double`. This is chosen for speed and to be consistent with the original Python implementation. The \pkg{Annoy} library uses random number generation during index creation (via the `Kiss64Random` class), with a seed that is separate from R's RNG seed. By default, the seed is fixed and results will be "deterministic" in the sense that repeated runs on the same data will yield the same result. They will also be unresponsive to the state of R's RNG seed. The seed used by `AnnoyIndex` can be specified by the `set_seed` method, which should be called before adding items to the index. # Building an index Let's say we have an `Rcpp::NumericMatrix` named `mat`, where each row corresponds to a sample and each column corresponds to a dimension/variable. ```{Rcpp, eval=FALSE} const size_t nsamples=mat.nrow(); const size_t ndims=mat.ncol(); ``` It is simple to build a `MyAnnoyIndex` containing the data in this matrix. Note the copy from the double-precision matrix into a `float` vector before calling `add_item()`. ```{Rcpp, eval=FALSE} MyAnnoyIndex obj(ndims); // from std::vector tmp(ndims); for (size_t i=0; i std::copy(cr.begin(), cr.end(), tmp.begin()); obj.add_item(i, tmp.data()); } obj.build(50); ``` The `build()` method accepts an integer argument specifying the number of trees to use to construct the index. Indices with more trees are larger (in memory and on file) but yield greater search accuracy. The index can also be saved to file via ```{Rcpp, eval=FALSE} obj.save(indexfile.c_str()); ``` and reloaded in some other context: ```{Rcpp, eval=FALSE} MyAnnoyIndex obj2(ndims); obj2.load(indexfile.c_str()); // same as 'obj'. ``` This is helpful for parallelization across workers running in different \proglang{R} sessions. It also allows us to avoid rebuilding the index in applications where the same data set is to be queried multiple times. # Searching for nearest neighbors Let's say that we want to find the `K` (approximate) nearest neighbors of sample `c` in the original data set used to construct `obj`. To do this, we write: ```{Rcpp, eval=FALSE} std::vector neighbor_index; std::vector neighbor_dist; obj.get_nns_by_item(c, K + 1, -1, &neighbor_index, &neighbor_dist); ``` Upon return, the `neighbor_index` vector will be filled with the sample numbers of the `K` nearest neighbors (i.e., rows of the original `mat`, in this case). The `neighbor_dist` vector will be filled with the distances to each of those neighbors. Note that: - We ask for the `K+1` nearest neighbors, as the set returned in `neighbor_index` will usually include `c` itself. This should be taken into consideration when the results are used in downstream calculations. - The returned neighbors are sorted by increasing distance from `c`. However, note that `c` itself may not necessarily be at the start if there is another point with the same coordinates. - `get_nns_by_item()` requires pointers to the vectors rather than the vectors themselves. If the pointer to the output vector for distances is `NULL`, distances will not be returned. This provides a slight performance boost when only the identities of the neighbors are of interest. - The `-1` is the default value for a tuning parameter that specifies how many samples should be collected from the trees for exhaustive distance calculations. This defaults to the number of trees multiplied by the number of requested neighbors; larger values will increase accuracy at the cost of speed. Another application is to query the index for the neighbors of a new sample given its coordinates. Assuming we have a `float*` to an array of coordinates of length `ndims`, we do: ```{Rcpp, eval=FALSE} obj.get_nns_by_vector(query, K+1, -1, &neighbor_index, &neighbor_dist); ``` # Further information The [Annoy repository](https://github.com/spotify/Annoy) is the canonical source of all things Annoying. Questions or issues related to the \pkg{Annoy} \proglang{C++} library itself should be posted there. Any issues specific to the \pkg{RcppAnnoy} interface should be posted at its separate [Github](https://github.com/eddelbuettel/rcppannoy) repository. An example of using the Annoy library via \pkg{RcppAnnoy} is available in the [\pkg{BiocNeighbors}](https://bioconductor.org/packages/BiocNeighbors) package \citep{Bioc:BiocNeighbors}. RcppAnnoy/inst/rmd/rcppannoy.bib0000644000176200001440000000617015130476621016422 0ustar liggesusers@String{CRAN = "https://CRAN.R-Project.org/" } @Manual{Bioc:BiocNeighbors, title = {BiocNeighbors: Nearest Neighbor Detection for Bioconductor Packages}, author = {Aaron Lun}, year = 2025, doi = {10.18129/B9.bioc.BiocNeighbors}, url = {https://bioconductor.org/packages/BiocNeighbors}, note = {R package version 2.4.0}, } @Manual{CRAN:Rcpp, title = {{Rcpp}: Seamless {R} and {C++} Integration}, author = {Dirk Eddelbuettel and Romain Fran\c{c}ois and JJ Allaire and Kevin Ushey and Qiang Kou and Nathan Russel and John Chambers and Douglas Bates}, year = 2026, note = {R package version 1.1.1}, doi = {10.32614/CRAN.package.Rcpp}, url = CRAN # "package=Rcpp" } @Manual{CRAN:RcppAnnoy, title = {RcppAnnoy: {Rcpp} Bindings for {Annoy}, a Library for Approximate Nearest Neighbors}, author = {Dirk Eddelbuettel}, year = 2026, note = {R package version 0.0.23}, doi = {10.32614/CRAN.package.RcppAnnoy}, url = CRAN # "package=RcppAnnoy" } @Book{Eddelbuettel:2013:Rcpp, author = {Dirk Eddelbuettel}, title = {Seamless R and C++ Integration with Rcpp}, publisher = {Springer}, series = {Use R!}, year = 2013, address = {New York}, doi = {10.1007/978-1-4614-6868-4}, isbn = {978-1-4614-6867-7} } @Manual{Github:annoy, author = {Erik Bernhardsson}, title = {Annoy: Approximate Nearest Neighbors in C++/Python}, year = 2023, note = {Python package version 1.17.2}, url = {https://github.com/spotify/annoy} } @Article{JSS:Rcpp, title = {{Rcpp}: Seamless {R} and {C++} Integration}, author = {Dirk Eddelbuettel and Romain Fran\c{c}ois}, journal = {Journal of Statistical Software}, year = 2011, volume = 40, number = 8, pages = {1--18}, url = {http://www.jstatsoft.org/v40/i08/}, doi = {10.18637/jss.v040.i08>} } @Article{Lun+Richard+Marioni:2017, author = {Aaron T. L. Lun and Arianne C. Richard and John C. Marioni}, title = {Testing for differential abundance in mass cytometry data}, journal = {Nat. Methods}, year = 2017, volume = 14, number = 7, pages = {707--709}, month = {Jul}, doi = {10.1038/nmeth.4295} } @Article{TAS:Rcpp, author = {Dirk Eddelbuettel and James Joseph Balamuta}, title = {Extending R with C++: A Brief Introduction to Rcpp}, journal = {The American Statistician}, volume = 72, number = 1, year = 2018, month = {August}, doi = {10.1080/00031305.2017.1375990} } @Article{Wang:2012, author = {Xueyi Wang}, title = {A fast exact k-nearest neighbors algorithm for high dimensional search using k-means clustering and triangle inequality}, journal = {Proc Int Jt Conf Neural Netw}, year = 2012, volume = 43, number = 6, pages = {2351--2358}, month = {Feb}, doi = {10.1016/j.patcog.2010.01.003} } RcppAnnoy/inst/doc/0000755000176200001440000000000015131164023013701 5ustar liggesusersRcppAnnoy/inst/doc/UsingAnnoyInCpp.pdf.asis0000644000176200001440000000030515037456253020372 0ustar liggesusers%\VignetteIndexEntry{Using Annoy in C++} %\VignetteKeywords{Rcpp, Annoy, R, Cpp, Approximate Nearest Neighbours} %\VignettePackage{RcppAnnoy} %\VignetteEncoding{UTF-8} %\VignetteEngine{Rcpp::asis} RcppAnnoy/inst/doc/UsingAnnoyInCpp.pdf0000644000176200001440000017373715131164023017442 0ustar liggesusers%PDF-1.5 % 1 0 obj << /Type /ObjStm /Length 4409 /Filter /FlateDecode /N 78 /First 652 >> stream x\YsƖ~_K@˭$+"6evn"! E*$; Bpxd ttݠX$#m"y#iDd"td#8SE\("8E 99F*BGBIHxv6R*4>RHH3cP\&Id4:2Ynй..^ JDb. }&oNb9Wf,O(sQ<Ń<[@L,ڙ,c9J@yyUzuŘ͘Ux|R|,cձLuW=Q@)([}~3K*6UL r̺=Or XPLʘAq_{2uƭMAɒtL\̈́fV­;gB|(p ?ºE+O褴dZ7E)_B$m>:^.9kKDh4F< {m?%.>EU SÖR_3u1vL9~#| e褒#7{%ǽ mAyRڴNfUXeEXN .E29KzEs LooAɰ?I&8 b5$Zl46ӿI2}I${,/ٴ&\MM|^I Q]2X `,lAy)"BDw{GY:;ywԛ'UMtmtMPDX׉#8.ݼO0”qv)*h_jMU 9]AH"+qīD…o~)k[~(z}'8V@1+kIYш]IGJG xGDFPyi SOPVOϟgsYrN$o !}|zٟ%,Hx:.4F,| %eqzY??,^C"obnG955mp3KӦNhki:0j=xuPڻZ]Ff#ioڨ^/vYk U AM i2;2xi+#S⼇BhSڪ M|a=V|˼[ `wH{}s4O EFI~UeNf|5r=>Ks[Ll>M~M^ QV]S;jx_GW<-YmJwi<9>-l?!·M$ŵA(7EMQj&Y>E>.kdPHurM'9Y'C|dbG¬LI!Bv{Fv+Ў[:%mo?^֭,HJKqc$ny\TBsӹbrmrLL;L.E&ɃFb^Lދ{y/6ؼRdHSd8YX]LK HmSb>Vq8Z/)bC.f*ǔ%N(+@oI1t!a+gYL'@}L;Š؆C%NXь%qt&Mc͑jw Ȩ_Ѧt,m;?gGw)Og@%wopww|?# 3^`ǕkVcd:OluP%eځ$a=%!}0\eQAqp_%T5Tm$gY^G*DzY6*ʎ@Pry?~|g}W﹓Yy\9ѽм==T-nM esM|O R׎Ծj=6i߷\(6#$m]{lfyhfamIUMfRT :`kiz{n~,ISz .\!&hNt|H,'38Rp鈌)|!-e\x`:.`u''9'^Ng!5(D ⛇kޓwBy;zS cw% ;r&J=I)rL9#\taNG$eUw| Cɝ,N}i(zB}іtc@=#5a‘#/X;wendstream endobj 80 0 obj << /Subtype /XML /Type /Metadata /Length 1658 >> stream GPL Ghostscript 10.05.0 2026-01-12T06:24:51-06:00 2026-01-12T06:24:51-06:00 2026-01-12T06:24:51-06:00 LaTeX with hyperref endstream endobj 81 0 obj << /Filter /FlateDecode /Length 7222 >> stream x=ےƕUyښG"JUv*qdGM*%m0$f$(^fL9MF0 6 ΓUCD7cUr׶sLmk7aݼ}ȓ/Qv#G+%{MOMxjK_כDr4c XEs,WU;LuBLj_WU}3jZEMB(aE6vd1-B&[J5U.i8A&Fe ߩd^ Hx执FHxl ZϘ}'*Pz^ejySrsa@L@iv4wH@\\ Q D,aء =EUF ٴpcc@e3nd{){P" r2cR[k%iBZDq k #8IڔAÂ:B =&բY Jt O?38|zgp.!ITZa_mpQ^Wއ/D[2!ADTT9Mfۦ]: !+"YkLrbF1& qs#&Agz+#4mKXɭ x˝7v2^?D ^Dȟ0"2j>eT1 GBO9aA2Ţڞg1 ΀1O}{u[7-@o|g^ؖ`+"t4- ,(s L>a;p ~[ bٙU}5BaVc4D C`Zs5Tbɋ%H BTk$lAPFLr9Ad0@cR-ؖ\~HJAՉ ovd`<Wn;ٮi40%r膣 49,1M׶Zmk)L-l8,QJ,QU[*PR>ހ"ǹ+f3(i/cQAHw"S2rR.=]}"WlæǴY1D" g.i|b^vO~Q떧AſQnP@4TqȂU5"hgD4 {܅T?!R[T2D>)ߣdڭ:s.PjqFKbKt1t̳5f٥i,㞳l8655&5m^tJ݅@Gp5GGS8vF- Jpkk-Ԛhh;˜ h+GRn2<=WghICЧ!BNW@}^xŠsd:>'>f;@ey_v#%H ,5T5o(ӏy ׏n5ѐ,!r ђu \ͪ7-4Aˊ3D4~[FLvL` m|drb3]Y}}]m@?Oj$",Rs~5efvW:\a xSO i7`hMr]M~٬iȑ 6ZHYwΝ|\P(X`ܞy%,;R_p%pzVd1N!=aʴ+CzQOZ\ma/ߚlCg'QݰrD &jJܢ kI;hnޢ~a؉d:TiT- Ā"g/uV]L.~o:w Ku _jh:=bNqO/Vu>j߄8ֈX@(w=܃,\k٠Al(nز `kɡ/0=PA&:/_+@Ԗ9s~|񣴟Q=_z.ב"p?u")ʪN+ZޓQ ~vܱvb@ ŋlT16 S 38b&ZxɀBxvI F<B9G+}ْ4Dd&D2 ɑ.Y1o1-Z 7kswȸ%fuSRWf\˿-L@X!h}S5fυ^ʬϛ>C'`961XWIn7ߋqW~wsAjҊ­Md!1mn })f%o3n3_(O9C0Ϧ YS#@TPM$81 # Sl<4F}T JهKcZTmL %۷c(c)4kpp"-QJ5n('Ìui= i*\9XHxR$_>65 etW [ ,3'b9J)oH䳟ۿJݕ"$jp`Xf=5 05Y6zSOzq;gh?d[m'OHɗM!<(j0c%?B;M"T{&P,^d&?C0gn/ uڢV;M!4+`'*yvh6ݵ,ys5P yuI dY#]CIZbƒrbI5(f5ÜíS՟g*iR`#KGmPiw@T82 .׷>C"IkLI*xdO<昭 ]5jv)pq Ve|eDIOZi6[V(j_/=|,Xagc@;>3M\ؖx*& wFz7"t/ȋ~YmݦWa u3;jamZ!Dzd edA^@)Cb a-rڞ}H[=͆p򗙐[vU<_ycO ru_} @(}]P,e nt N6C@gڳEI1 s \bX씶qɻ @ \QZ͉RS /44q5dGdm;*p)' A fWtwOR<;){߫H*`nQޘ+!.hqqUo^4Dy'wzV) fPMގP\@f?^ 7 ,)A9Yb簼E`Eq} ,O Ob`M"،<{IۻEu*é09It9GCoy20lbO[) Qe]Ys 䤯}m38mM UZ"# u$gׄaPUw 36{>W )*>6Yo "GHA:=Iv2'-nz0s)a.4t[Mwa˜" sYNzmm:j=R7t­ e?zKkz201T!8@_[ *@O`}J7 ;Bg쇫kFRWsx,Q '_G^q0+wEً^9v=X>cAcMCc߲sdR=*2 W#rql|yVpY 'x O$tV\ݏxΥcgX?᮪z NJɌ3gZ=z(Jk ~F=1$ʴ+$z NT03gQA8GBN㭳aE0/Y;#"hl3pZg{ѩe_*G{>QZ6z Z]ߴ7gd:]wpθt9p7,*77?%TBy}&9+_DTOġbe:Nݦ⻠1j/bn]rx'FM/WzPu{X8&LeΩddIܛZen0}hlЬߟ-*#b~ ֽWkX1fm:o)^<\)*d_2.\bm|jk-*fٖ|C/Z=@rxn#DH?pWhH]6E | |u#G tf#=x8\'2@r#cݻp@ G'G-}s`Lw:KjєVCw5Y%M8Jlw/i cUTv8o*c dG茇A<^eХ*ؐWl(~f% v}<]멃ږOal{@ۼ+$~ ,>~\Nj{Sy>-JKX:Zn: E-|yxן;U ,:!N' ü1ǁBtet Wx|nP1 ӥNLĀA9'Қ>d?:H]AW{gQ)pK]T~wm0rxW9M7,t2L[fc5ݥyS׵lg|OfQ}{Y!([7~!RK~%?W%?ɯ6ggy(O g;9OCql1g1.Eendstream endobj 82 0 obj << /Filter /FlateDecode /Length 262 >> stream x]n0 Dw}RP`hI -?`t!8CG:x@QryG]vX> stream xeUyPwauYb[QID]B)XdW ,1QddapkDE.U<+d(fM+yM?{}OBP}H:/0`qPvfĉ0W"xHďwonRps=|%o0t:%H&f)ڝ\tbJ_ WnP%ddk2Ҕ*ur&_5araZLVf'+)#7G*9,:<75 (N>'OUGQ0*Z@EPQT BPj#F-vIRwoUu.wۤw]\oN h^&dn4uƄxH; 7g4,58⩧~/@A#CNh<ُRO&$vbj=ʹ%L|FD;Npz|^fC䧞ü?lrmⅫ,;I9+u3m9&˰ . kS@/Є,\3FV5/!Ɋ N\zv-(  ~A ljNiO$G:>*olRe4UWB&]MNM%tF`oX#!_?<^38GWKAk"ɺBȉK~%Yȃ%o9$;s Ts eb S˹9LRD~%}K%愞THC?3eճ(@-8f,7oi&es%sޚ՞诼П]̎] wpz*LeAwxZ9PUħ_Hp& D-: E1PBo-TZ#U( 3t\ `.6+ReD|/Ĩafd cf]0w¨F#|(l{ (|~5Շ2մlq?f^L.]r |e^/+4jűѐZ7YCxbES3t8Gp4@ XD_H~~BA#JVHD__`}He\]{ , s> stream x]MN09EnQ6,@@8( ( n̴`16/vv|w/)s=˸e˥>yڮ|sXӰ eYv_k'lQCӤ4,㿿Bmۭ1)M+0'%)\ipI.)hIѓt`@/ uO 'O zr>)=qH p &((bHԠACE5(raEi虺ƮgkƤ#5ٸfÛ0 ۰1 c#lLFF#.Ӗ̦q ss6.#e4rظFa2r9l\FN#iq91]:'Gme9繟 ~"endstream endobj 85 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 3317 >> stream xW{TSWֿroxEz}䪈؊"GEQEIh4 B1GU*hPQ>fij;beN:;7][ʺk{={o'"ͅDokt+M J 䇉\ᮅw"^rE^nUo4/ŧϥ? "ѿ.\8`ܸY,Qi VXW+\ڤsdZ5dzs0VsL5ѪYGscrjڨr +%7GTM.`NiЫ4fAo f8gR+5d:W,Q1ȘӘ BoV8ļf.h :BL%LfҨ2scB܉ќ0 ~Mɗ*2[85B7qfuYBͩ4,BSYFBI{gTg(*+ߝZ8_1`gT9V7B EQA3YYFɜ:.GbERgd.$&ƹ ;C)j$5E%P~|j4H"*ZLPRj5J"(JNES1T,5BIj.OyQ:eS>ʗD Rn jŋf!%emWQ׋F%=ŻqF)ǔ2%Nr_npQQȳ+K5} d[a یLj!;tb<%0>dz-x-ԋy@V><r6u6n#{M'oW-_2{3YR)==طh)d6lۄ6(QNx8Tg }Nt^IZ"j&vޅ n< >Y]3j-/.|"$eKVr2J,ҵ{?15{k.LAwЕچQ Y. VF%յs]IKKxD)#49wA:d틲:j4}:Ja}U/B{8T{,Q7lid*{ZV~P'|cO;Fdx?!0q2]v!."eQ26KI~lSt2ل> 2?{bĘ%1]~Yx2[C5Z `47)DOb:t,> stream x]nPw?0Β.Zm_@%C / IǧW7uv{ߧ\u=t}3_uz۸?_g׶{imK=۶:U \YjSRT$X}; v;"V%+lG ;z`6K&-<0 v k`aa`aa4s ! c!d 211111100ҰH8  [`}]\]!`P $ APP0( BBA@H((N!VAVN!VAV Q:(HInmPC``PC``PS``[S{{[S{{[S{{[S{{[S{{[S{{[S{{[S{{[S{{[S{{RO@E {O@m|vt=Zh}]/,fmWͲendstream endobj 87 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 5181 >> stream xWTTWp e vcAD1AiJ,4A)"M5& X"jE,bt\潼ks.2J%r rlZ!=!q5I4#G}2 @B?Xԟ!9_d0it#@WAz}_@/!>kB x,R|ky. ==`ZCj oW_6oU_a~@ EC\7Y{e>v~_M04a2dԯ͘9:y E 5eCR2j eG}ISc)ʑC9Qs rjLͧ&R (#j!52EdʜBYP&bj*Q(?/~VP<D tJH SC(zRsH)-rD*{áG\[Ei}uPKC=*figX{ݳI/wxk7MkioG+3CR_F\R|ÀxUH~5Bm *̖jFɡL3S?26Qpd͓_4D37Ҹ%DRZ8 C; d˨T (޴w_[ sRX& ܲ":7Ga'Q`:JߜOMYa<}?]<-A_ 5?Æ_m_k& n칭YtGi+Y7OE;Aozyc)^܋O:n/]<"J\jpX+[X{ aYwů4AFퟜ鯌z@;c]dsMI^瞗EW;˖VTnlqJ.o`ĺP/19 Չ ΔSúb }$(63^+׋\b Q;1Eݧv,vZ4XXxL( pP3@+E kҷ:fw^^!+{*Nk" _tbì t6cKF|5U]hrtVqlPVS'Z]QF3|#=58@8>?bU=ږN@spv@\lDʨ擠+JCQk& ؚOdP ~3 ٕ'Ur:[f&)1m"nh<<<7:k,غJ[بYX%q"a$Í/X:[K dKl9>U0VVe[w'č2 G(QmLB*JA)w ˰xt t9v 8]-[5#mEbFiYEy gecf5 a)1aQ68[ٗG[d?]jqvZ !`I$K2=v`$I]"RWPjjFٸ")d֩@g2<;H34!La"~݅!ݗ X$7)EPcv$}f!ߝ )YϢLJB/cIPܹdfV8B6P(lXs(P^QцC>$upYkh%Hm[BQ 睟ögd̿-ԅ4G k;(SVe:kbsU'E)32ӧ`"cebzlȝp2iƮtOP+7gEK[3ڰGݞ<n< į,5M( 9}@U̅;N%M%9 {.EB- dyNy̟y&I6<æ?OϠ 4^ɚ% tC -,z|XwaK׹u&c{zc-ˢo[q Up>,ɉ1xb/%jJf]~Dd_}k;*dA1NSZ @F9, )xQGRӛ:_ٖVI\4j,Ga㢵?li֔pd϶Y*6Xn*kթ6hlSȈ2d9^e`X_w?00Yx58^J};4`,k/% 2DaU*l| /Ԭ\Y" @8(K{ dq ;x#~R[;܇!?-= qr 5F_JY)%)Hױ6i&?Q0YP?p H_pZ k8"k-IR'uIQFꀄxILY3d] F:o#[Z4?Fo?o,9Dv|`9(TbK97pX0DVG堂^3}Iɑ#%GTq5e//r( $06Lrzvb$^;8 ,oI*,["gN|N:BwQaJU,2hpv2rƿY$ψ?"$2Ew'Z= + A_(8MoysK23Rҕ؂k?t<# ,$LTGcJcENb~+w4Iخ1^|N^t3=~5\#'ϡ67Wqcb'7(so4 6?pO N8ΰsT%iOzyjF~d+_}%! V;޵`HLKůFO-<1.5;7Ͱĸlnb2OT.# -&%&n  un]{0S`}*pwHdGpPVK-E9Og`OTbԞXb6l\-Z:o/gs![fw1x:{8l`0|o)mmAY ޾)(ᓸU,8qq[j5k`~3GGzZ.Eg&?g2pw?sh;wSEM6z#ikʩS7f;:x+B[$:ԾUVXcLgO+yGtg G trKLY|RB 7H-B4ǗR=N&KzO#꒒Ra/?#FI^>* ']ߜtp7>$4+ K+d--/=w4u[j^#Fɨ~R+\jE]x{ jq0'#̡5eዃvx '3ս{S&:خendstream endobj 88 0 obj << /Filter /FlateDecode /Length 559 >> stream x]APD7fE(XAfۧ"TW>=}^.ncοڽ;_ޮܺc{,aם.:Nt:ڶ?~Gzjoin봼͡| 㧻/{^iբE{E;˞E{mբmj0I{^ijjdjюQ-ڐ-բ--dZ)p 8Nբd?:¨A0U*H QT! Q*D$ SDaUpbaFpϡ{,5\l,jFcQ(0:N'Cdh28 OB@K/Z X \TX \TX \TX \T=x஠~0Y[Tup逩p逩p逩p逩p逩p逩p逩p逩ItT=S-SUƩ͓y+9y,tćΈ7Mwu/endstream endobj 89 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 4027 >> stream xmW XSֽ!ޫ"*Q (NLDT8rJ-5΢Z"*8V*RH-V7 {'/{k24u9aqC֬~ghh EH $΄=P X830;%)9">U$% B .(Mkv!:&jDQ6:qe֛=:"|D#:2ld'F9 3ꝑHjkTftf/glh<0&m1g5vH+\g@&ׁ3E4>ˇd_N F$Cۓ&mn`O6'Z{2l}X/->֮2;U {^iҥ.uLwnE_ejoM@F*V?<{Ԙ_x.|ވ ?`=2Q'?xP6? crO|l{0-iJGںu9+Km:U}'|9Zb/쥲0xRACJj,A |FaC韅BrXg*Lt_34 /7 &,/0R MK`(~jQncnwt) %O)x'/L  C6 szh"0bxt{P!'xcm,)MI_qb6ި>\:^tu=SFWv h{UiE"^dK/q`[M TdCb\\s3#u nrAS\6d$_K JN=ҀKQJ #tHt|kuJ{8IZ<đo'#݅{Ybsfx*R^ 㠐p S$IfDC8̋ Dp@:= Q/w]B͵7CttC6A^WeK_UJD)e_*|Y`щsz` 1 otfx\2лxAqxxըCd-fwp%Ɯ Kk\ k7q䝳A$`U~jI,2vSArAl}fg{J9tC,MϤb7tfԩEeh ;l:T7.Za +l@ @:AZV~[.31:;Jj9ůLb+ ~!8Sr,D^y*e]baM|᳹LR#=Ff88]S!Թ&*x1e;?)-Gˆ讀 εCO(9!?59(Qj[aױ6:YE?o\-zKT" glެh$rs5?R-tS3+-?,I:]Tn[n ^?L69C2d N!pIc͓x,ݟNo,wpn˽<[}[Ty~}w\^p$fqb֮guOH\H"ҋ 8{ fGf2ZBDv®Nؒ&6!xo㽁HH?(䬚 dC~N6i;mt~)lx¼!TrN=0ի?Ur*;ry#=ls]I[ۿ 2(][Xt޼ezP_u6DdVT/ly, P 8y1l3!,u r=bx& 6i61 A]?T &[2D ln,$jP u?E'Ys54WWp=6WY,*!ws*Md"֟ԯ\fk/jE:Z">N;>pkt;97jjoZBPv؝Yʾʃǎ]>وPIRxU%ޜOcN?4Ka-(]"qE͊vв5iU>md~[ W??<'v<[Y;$*zW_tb19ҷzL`f ,a7WІllι2 S]_@ "ZSSsW~SfE_#A )o˨}<2* +!m#.ֽSy'.'{Mtkx5{/Z(nCu pm hQ|+B3/+޺OK俀 )pl_=Iu_?73[~"?_:^hэsr¥ǓOvMq ;.H0 ;.YhQԥG- HUC[Us9Aa.AzzonaZ`@:L=S& ;- 詆W]9Or+[NaF5{Mv-l*LӰ?Ca5a `!gN o endstream endobj 90 0 obj << /Filter /FlateDecode /Length 284 >> stream x]1n0 EwB7R\%C MVV܈ Z9Gݓgqmendstream endobj 91 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 2129 >> stream xTiTTW~Mϧ" MGڥED L(0BСt?6V!DEAhEQ\"(.ȉ fP3j4p qs21zxqhοnU}JBP)LcC?7x)̔l\2\ Tv";ۃ&~8r^MNKT)9"鐼t_1م>>>lJ;aWqfmG\Ng0fszދ8tc#ׄAa Nϙ4:6"'EMeCy;}f/jXKՒ .?33ekfbZ3ay. VI˓tg&gIa0TȳbĪ1|뚵4Cj|F7<ϋuR86Mk64.Ie4iG!䘵&.CcJqѼbW}QW0k}[}-ot^cehl(+MQg7|&?% f{S*ZBERT x*`j &Q)eKTU)yca_ -vmi0#+[*ZMшd9-~7m 8(p$ ^&[[I?@HE\pCob -b뻩}``Gx**nI^`5kԺd]b:ѯ:`'l4NŽ>+:!7&P /PL]Y/zS.3CJXD@2ȃx?8:;"Y R^qXElY[ P9a]Lx:d ϬG{Km_(h9xI=`&0{|PlU{P |E7v 66r(Ϧ=?19ˇn+gdm2uu*KmIMy3gӥ%oM`:wB ,nzO-{Uyԁ+*yY%ˊ0[AI28HtUHyҫ񏥖2 ڬGU֟-eʷ"%_~gemUtc|g)Nv1aiI\Z:Ҭ?#v Prg鹪* zej3]~W8Ԡ(l(هѮݕշኳ|vj-87e$ԮFx!Ze10n>nA僛s-9yE꼭E;'nKrbP8V/ΣWPi!/3PQeέ ي $NvSTUʚP(dؗDşb)n|DĞ `_|(,l8[E tm{@8kZ^ n)ۯGYpŢE- _͡d2IeC@ VADC`{uF7u2:#eh.)!$8ODn 6)C= Vam0%"&kރwUC='5:ԁNM +,~'a'8I^ RpOC}ܗy'aG'08QS/{f_U>vR1\:4s9bgn/~1|ky:Do@h%&rU:A ytVG7=?[FGJPBVu\l Q6ĵpg .l` wG & ݄\m~dV.e S, Y4N3Qޡw=HbL\fٞߊUMQ$ǫ pHfI>p8[F _O+MvpOC; .|/߽WyӇ-Ee[~?:Fy[ K-<8lWke,ka:STr)ğZdX?l#0'8IqmO""PM/c` Fwa:䅰s:v^ged%.wa㟒px-+ 0#L]ky@aV[O[3|nefdu=uvv?y~endstream endobj 92 0 obj << /Filter /FlateDecode /Length 260 >> stream x]1n0 EwB7Ԓ\%C ߏi^MYͫs ߖ{ l|Sմf$:>|7+_We[1]!F8MR*y!TVId`LNN)H vvT@/ؓ I ӾNH* <Y6ՆzQ6]#k~mՄ{)V} ݵxN\yReRkendstream endobj 93 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 1810 >> stream xuTiTTp޺ ̌Bc1,1&E‬3( PPuDe"E18Ȩ0*`<6քkJԞN?!@Io~:ŸKA!# Wbp#>%s\9;/?Ò&'O EYKte~RԞqAJ0?8rxOo7Ru8'00"@ 9ŬSX> zl$|G:/;\HDPjB*sm#MiroX%د34idfZĎ 612jXKΟ%oR$X?tD胕/"%' ۞ uV/-_2A#ѝ1bR4 ,uWx[FTnvdYXecVt ĩZ=Fmf }U4dw)Q*?znp䰁:k7 VY fBهhl]s $(LT'u3/v2sg2_ߚйeP߭<;ۍ' 8'e,lMsjJ>l6g0)}%TZZMiM*X,+KuX b Ku\Հ㬞_f1Qx%J:~ KA*;EmXdC0%qP5RTy𻺦+U %d ay} %K ωB,ꚲ:Z">mLHF#8ܚh%av{~e;l@;,n!X ~?K%DpX y ké-9cY2Vqo:LBƶ;kL `g`7.83N>_UTQ [xv~6LN@=f/\G7ʽplKȮg^> BGnگj,-'Xg69Q{9x MJd[UEtɣ}Vn:V@|&ءc ueuh3viǑ9 B&s/C0^pw{1ss.4$ B>[gxVT:}_5"8M%%4q\v]H=oe4͡ڈy=WISŨ(9ݤ\8x1Wy"ȶ1 _cWS3$]C> stream x]1n0 EwB7$R \%C LkNioS)lpӕ.%L/܌Ҙ5_ݰ8kQ)AI2uX IRQO ԋFN94qI#HH 4"Nc$RxZZT(yM%ߑ݉IEk*/<yM*{I&);y{W].F1gmY2tendstream endobj 95 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 2182 >> stream xeyPS!$9M+1S4PEתuVlAYDTl," D-,Vn]EAzkmn=X3|3y=qB&F;pF(E-%\PbFIB290}BC0n">Y%oB9_+FJE긘h~B%G&w4j*F({ɣpCD @$ٍ"{>D|?d >‬@3]T=7WϘ Rs"" FGbz tU̙_^GcßAc2)ȏqpes*.c0Ā ֨ t_ ͒^Y ]kx1xN: p{`zak '?|N宿nhx_뉯wIx#J!>D *H815tmjS ]˜U89' \pKxlopK, kۍ$6tp~j/䜧Զg~m>hq`172q`Ϸr/[ā~ ѨNG`35#^1s3+;Iʼr hʞGy{zߜخ {R*+M$l UEi2ʑ,}!k}\Dƶ;a%7󖇃*" cοv9u;ܱ){(Riz5fODeϐȇNU'%~"X-\kaHH`LOCӯ_7yҗÃF+m i'dl>rI D4-K-U&^[6cig w,إ'>0 "hlDhB GVˤ sscY(%*&+ON Et΂M} +T`ܻd+/2> Ru_Ւ/ u)&U益x!‚K8ka7˚7eЗvoGXΤ|ڝ|IgKG&.m\MO^2?_0n S%sTCt]I9j*5QIRN۩ "KXo7-%տ8ͤkBU7 SfhZH HGő+x U L҇܅{KR[R #VƋ&j2Z&1wNi> =(E'dgu;{NUw +}"uH}%3KysYendstream endobj 96 0 obj << /Filter /FlateDecode /Length 519 >> stream x]nP~ `O"YwlhU} E0"΢oߙqE49g˺ܚ:f^i׏}ͥ.;52>۰O߆6AV?G:m>pnrrW;.OO"-E}},#P$؁R$ T$؉ D-]`;ZDgފk^$X"-J3aiqEi148048j5LF@;҂k5x6 O@w;.^'#+3#+3#+3#+3cksQ=, - . , - . , - . , - . t9sd`IU *DTTz(h1d@&MZ 5l2b``0bd o7M&XSIk7ɛ`M&y)$o5śMx o7ŕ 4ftuyWخj_` endstream endobj 97 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 4999 >> stream xXXT׶>0gFAƣX7V,A,Fo {7(hHk5իǛh:yfL{0>P&#(@`I.ϟW'T$dQ̄,7 H k)@gryP_p3gΙ1of@ydX:y$oOfV׍+6lYe;%EQۖ; W|gM}N^뼝}\|6o f9kyオ` EE9S.ʖZOQFjJmR[2j:ZN͠S=FޥVR3U,j55rRkyZ=ʉZGL)3j 5ZBYPbJHMFR H)*‚ FL0]hXqJR=R22;7z?9=vlJ =`F3Fy9^> ;'ܴiu5HTE y?!ʴvGI :G IAxT>:6ԣ2tijmS ?"? l@Cw961؂qg7ud`q8k,jE;Az/&F.~boVz1܃]Luǽ W]HvBvRZ==%a., qqSqd \>BVC3~",1cgSL=svi.3yٲe7>kf[(-MNv&+N5'sr扞4:,d$ޤ7TES^&  }XI0XEF*IqGN- 2QB^&o)dW` {5wUؼ =VrKQC +}@1;\B^Wу?ӭC50]S&Zq{MDXqٲג_Ir[wI@XrKv\뛳5t=_5鯵P@bz;bk{@Xݯ0\' 7a.Kc3]^,FB(ޠOD>>(2 lBbCMǖx)@8u^nFZOKM?\|͕kn^?5m ~#-$ͧcKG+p1R썢:tyb ދf%l]rPq[b??p~pstD[~!Ψ`*Yz&KlXƶW&&ǿƽ bZ1pD]8,SZj=LƤb$.oW pp htP j +<=tOGCRQBft S#~gxJ|ֻr;& X;S3(;јsL}Wqh$H7mgH6*P$$Lq, n--mc?;=>:X506i6s=)c"47O?,EuD0poJoqL`ű7߻ڧeqօd8Y%Ȉ~|hijEϸr3.ٛU*&4#FܪG2xPFy6pP L]wsLY/`!K2 m4zG&rȧU#JtU]hBL.*xa(Nm,92Rcc+}SC%68&hzeC9|/׻$ݺe~Rɔ;(ʝ3PAG=: %l򴖄qԝ/k-&6F+Klc2Z"WW֒sm̬@Hɀu^;rUE^(^6i`QgDNrhyqՆ,$h/ EDAt( u@`3*./jq1_KzxL"l`QW OY [QAPrW> no$t ¶y3dW KD!aK X$d7GU qdccBuvâؕ8W|j.o8~7Gj*4z[+ o9f1MG]/MHvOIy6Zx_RiإRR39ٹTBbOt ITT*yRe!0V)Cr+C !|_Tgz;rԈ32JrQd]B!:6_)TlRlHʫϴq%(JSU eHPUp9xXxR{AKF+uFh=Ge^*3#9!)EnV M|_$YGpj:[=ɜgeWUV|^H3jFyYE ڛuu/n٣6pNɏ1 ŠbR(hU&Vz"C*&WoT2v9!5AN+*=k7N-8ip*P'&u%+]|-x?PTW5an`@ d`|Wf)#{̗xW:dPxuبK0Z#K(-AFD@a RC$pAG@fv6`≇ubep]ţkkGɐ!|rRm~pc ;En|:`mMc3hZ9-L1C5ʓgר]D7Ni*A-Ů;rnRH#Y# |hwm N6R&4M^i0xIu3\S$%CPt@۩J"lb_9<2 Hן<&\ct^ϺVmKºK/KNw~Ӡh+Y+ des|+8ULB A{ipD9:,_}OڗH:ҳ(%fƠTԑT1*FY(;!EՂ$O7oU5Fs5E%꜐++8[>s{++'LGDK> !ip!@(sFcyW ˴Rg|3H4PtQձqmLζ5tE,^ZuCuM!6:P( liEn?1ٕu~sڴ/-b AW֐F^t/c^ēj1V.kkkV5ȹi0 Ԫ :uʽ7/"{Yy\B踤ԩ+ .b zQ*noJN\| ⹮]I \$Aӥʭ޸3,јڕ^^^713`ksٓb$78A]LN?ۗKHXb_ ~((yˈgۏQ:&#]$RIRsEhL 'M_#0 +UO4bz*ׂ#Sx%3MЫbEb  { ^p}֠y S0̷x:~[?7N+܌(may[2SD:MmB% 6 FpC : ՄҶkl޼,/uKfVx{MWΣUl^(f!';>4v#u>㓓``mя0%6g6x…qRsքRV:17%+?~0V:{p8'&%%Xk=mx8.] z^ /C~EqXy+UJCY#,miAZ`6OE=R4B"9D60/ HQfa-(T؛b("; m>|4WYe疲nV32:3>*$J[S[z(@ӿn㻠J+ɦQl)X?!ATt@IU^PU|2D~b7_XE&o ˧g|}q52NB3*LgגZCEE: l/ Xa( ^Yڪ Y3͛TV=E՚{NfJR\D&{uMYAiTpcB$SMkFiMQ&f#I.yffYe^ИDendstream endobj 98 0 obj << /Filter /FlateDecode /Length 306 >> stream x]n@ DA !_¡UA9B8;KLt> stream xV{TTu0U!:db薲M4#% a0Oafx31 Yh!)YhYZsk;mv/jF{s=sw}rYùdq7I2gO8Y\WlA4FF<̝%*^'.I\,y͙مbYya~|f͉ɉ[2֘T\y@/ŧOKIڙa綴)KT?>X\V.ffe(JoW ۉ"H#%vkuD@l$6-D2"&hB*'s~UrQwEQ|%O)9+\;gn<޼y(r;f@'8S\f+u䗘Tr##ͭ`vvx|3ghlHD)ERyAU hIMm?cQ4uZ{M*PW+gZţ-sAnTLL>Y+ ;( / ,maq$ dCIlZހC&%Z} M4E:TL)kw]S*l*d**>/ђ*F^s_<Blzp?:-/mh߽dAew0|*beFD}PgG#>gc~*eW2U ¤s%'9!]l(@3t#cmg` gJ)ch*dCL2wVZ PF:?j%1acGu|yU\&(Z-gm_hendstream endobj 100 0 obj << /Filter /FlateDecode /Length 298 >> stream x]1n@E{N7`am֖iE(`, /"ϟo;E?ifx>cҗnsm/I]1U1ObMv.[;~oZ~V~GPzmҥ-a{U׏n3 v;$ 0{C =*@U0U&PL TShUTFQ[a[CLp&ja!b5Q0Hs4sG?Ȟ`dRmvr,W2o.o#lRQendstream endobj 101 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 2117 >> stream x} PW{1Dv@C  98AYOx &1i`^ñdlg( OxpҗXSl5‡g @!O[m"3+^upm8%&])73A p RS;: TRП}߽uZ#jׇe->9Jbsi3C޸ҞX$4Y~@s['EFhOÛ~m%dYp;Onݸ}?x.q9($ʌ9T m xokxs9W oQdC7NyJA-5p~wVr;nplj"+Éy%y"F2{^AwlM|f/ԗ'gJ"N,Q8wWy;稤WUnWA[ᮾ6YJHy#{p]("V34J2&/R%9VEIy,~#Js̘|GEL'7s>&SQeeȽ GuGT"c\6+3@ze_zt  JI -(v`@ ݠ{:lnq?k!3%֍0)f0߉:ذf\2#kT Q-*,.zBoE1| rjs #T,h[;}\p y F?q}x[nCTcl%5!Rۘmhڡ娱 ׄ<޲VgYWմ29PdR 7^Ǒ!Go7Ք{#NhD+U%Iyܜzü{lj[ 2p;ՆoMLH@jJCΐ̅#ͭM`w!UP: /?O`gL9u;!9dHVQ{g|[D|Y@çx|q:>= -\O7j3NЎ+nlph0R=ݽ%] UeZk$xꇾpmUFԞhKQ8jЬKzRW7= Zz®uJ 55KOq^i?UM'==jj1xa(!}endstream endobj 102 0 obj << /Filter /FlateDecode /Length 160 >> stream x]O1 y%ʒ.ZUm?@b!C@Hg|wYud@>%Mo &0UV&.:9txn {^H>u)75$:^?&[ jh.qb$Nii 83 [-Sendstream endobj 103 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 210 >> stream xcd`ab`ddM,pI-Lu JM/I, 154kf!CwџXyyXzx8&}-݃3#OFB@biB@frFbiQjbBi^JjBIFBzABNfrj^rvg ! (c```g```bddw@ٯr!?'χ }ṈEendstream endobj 104 0 obj << /Filter /FlateDecode /Length 161 >> stream x]O10 XЪj8(NߗNN>ˮGֱ4%"@cQ`}*Bv7ޟ@n]O$sT[9hy$(6ֶI{`>Jqp8H.~r V/wS endstream endobj 105 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 317 >> stream xcd`ab`dd74,ILf!CGO&nnW }O=J1<-9(3=DXWHZ*$U*8)x%&ggg*$(x)34R3sBR#B]܃C5旤g200030v1012|_cew3qEt"깥rmlٝ%-Y%:;8j&,YٿcD9f{orߔ)| ~xNn;n9.|z@g <<{'}=s&20> wendstream endobj 106 0 obj << /Filter /FlateDecode /Length 4791 >> stream x[r6_[썛"BVى璤ԔZĤTl;ڷglPnɲ՚Is۬Ŭ#mt^I93N\,KrVV*f+[]<`Go]lKm$ƻ\2R(ʉr.T%fy@W m,f/Of.)g'G^n1.w YY楙\ewr/GNh܉BSfݤTr%rN -+YI+)R:z8(KldBw>u&ng]enD=|G*Zs (G=I܋ׯ'a67N(=^E!'ˍsnkR"8USa0 b-םrP+b5>g%]~^7?aAZ5S{T˧³w7eؚ\)_$ 2T>98$j5=qX+A51ǷPBK<$ҨqwHI%T%g2:HR=`!-*Ɠ#_f*[: DkOEA:^0?5 73p {u/TQȬom(+l6\6l w'L.%.k씥_xT]3%J ?SQ,vuQGf_[>QQP:]m45GzvWz!9j{vryI _ˏ"QjTVaӬpaMxet춫4 ԳG[tCk{需_-=kzhvfUŭ9'v5lz08B~[iqκ#0o1jܮ7x' ŌvQa4,ΖaT~^yJ 7]?sz)0iaczk#.KJ"Npw3ޒpYڇvwM?@ )\E \ `rSh;{hdzSj| `vUVytoO\c!ڒ١LGL-œƱuGkc؟a8]錹@ñ hܝRW[EOs)ʙ. Em>jN,Wb 9`" fN|įpUt =U.DZ01 UGU`'+ʻF4,\75XRa>>}"vp)oqdsiEs *l#p]T%?~ǶNMyh"Ma#Bm?ACY3Nbk2J2wC|D5A 8&Nt["ns^#=^W՗7zvW+G0Ulh,T8Y'6{bZ%D_rVNrJ_Q ~0Y5}U{BŧIGe C,WÛ)$RB䑚Xa|DKaB֪'E4UvBe7aEe3A-xӵ(7)^+w}U>[:fXga/2`1^.}9e=l;ͯKDN1^7&:)) ^7 \Cw?^{|}-3(?T xI-BbJ1'5.$&e,PJOWֻEz>;A$0\řfϓMX`~/$f竮uxeU:K'BT*bJ9zр&X5pu[.sQ`|(¦dI<VixIW;lV)b(YizG;`tbmYP'9PUdުF %\Ǖ -Cx(uTh;i駷e-݈oSE b -JXP ɿxx;)0fpݔ4yI̔3"{P62MijaM}Wt;FY|ήN 8PJ {vcw99J*_다b?5yB5v|)dgt{3݄,~˜Ɋt[pTeY|.oa(ʚ۝H^2)w6 ;^QA4VۄO!kJuv%*{q}wsW^}>`nc{M[@)9OOsfsqJF"-JllwsR+i_6g͍LFYXq7~~HrBNY G|׭ewu渎;RY=vB* hwDE<Κ)kԁm3  3s 'ư3Id$}$Gzw$@?HnzIHP& ݢMV}j/Rm=E_Eg_ N0ab~ us<71%(|bM:y,%ϰˤyY  ]Iٛj׶mU&02&sd|}HpGBQ[\;jֱq!KǏ=ШU)b0ZRRLȋ(/;|ї&pдptv8(A<yf3&? L[S f yi2/ú%0མtaRpht'R6y$%"k$TSOχz@DЋvJ!]*kkb/5\ӇۭX/ҷzWP">M-F%(%nU`Ǐ{T:23 ;ަ@\hڇscesPtŸQ/EDd E~bTu3Qα :jP _> stream x]ANP D9En66,@@hLLq{Ky^^q~?Rc}fk\~H=R[PC )%T.$ 'v!;mH["F4F13Q)$Qf&Y2e4j0iHk|&k&o ЈЉ7!og h hXiƵVnZq/` Ư؄km8Ҹ99Ҹ99Ҹ99Ҹ99u1kfTxٖuEg3uNsB7Jendstream endobj 108 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 3040 >> stream xUViTg֮TB;АfF˸8b". (,͎@D$ [H= B xμIrGW]އGP<[iWtzUg2Θ}e8 s`1y%Gy<i̎а@ۚ k+V~MLl71Lа_Y|<&$HkMH&Q6aQV^q>~vRA!+VZmJQRK(;j)rSNvP(ʜI,]ZʒSO 5eCR;eL-#Et)S$]^ oZVLg5Dw֧Srzcژdپ[,,\ؼh"E .BՠD%@J,SX[:E4b9SڀJa8@HЂ !UO;G:UόU-H8.l?cN?Pq Eif,њ@&Q$J\̍|$xC3 =׺Y˕}Fp'ig+xCk|r>FYl,bjpPhd|f. N"Tx(d" w$5ƭ=B#S|Yy`=X>c1 6:㸉qm,Dpwidkdu8o6XCܡbcłkwx}FCwqQdR"x1gJ' <3\|erH0pS<(d-;x" lda iGRN&Aj[)!icRz#L%=|>ǜHJTKc e}vNd$8(:$[qJ+40M=sG>D|xƪ(L Du(NKqzĴaޓDz=Uu[q=Mz~VW1x5oܯMg|0q8lxr2ْ^t=E%a~ W{ۗgiJ=C=S]C`볦Ӆ 4zj#: !KIE.XL! azU57ԕIt`>S EJ!JDroף?t͠v{2h S|K-t08/8ATMBPCY=Y}89.D-5|=2W.g cvy-픴޻t[;p`RG CHDX6xNhП8^ }l)OQ*d(²[ Ϟ(!> rp "PDbr@ UlkVyJ\j=.$?zu]IcMv#iފ ݪ;%Nd$^]m"l?^oK%ueEr3 p^юmC%>.R[`y,"~1_`ܿp6/z|B'_绊,њ1ޓ~% K:OAh0F/˃b7k?̀)8$"[8[+9Œaa `.nM8!hYyCz)_*+miȅ rRrb0~Y>wCSf~%M<2KߟtWb\rG&_o`1q(FNx: 90*DG3,FanoԠEBCjQt(<@JӰż!{kcecGǿhwEIޙzYIYp PUEN 4aoVm.@[;߽Zz.=Uwg %\4E%VAT! (= p4LA?=:pyV;k-t](\+94tF9%>3XD* A"9u.lz9TҜ 2 \-3 ʒV0uKM]_uxmp&"1g ܣCe,odl2ZK4] OȜAWg^v skOB1\ @ M IM1akx/SU}5ZdHT_Pk}kQ^K3~J`8cuvGϢ!3eVD_Kހ MPendstream endobj 109 0 obj << /Filter /FlateDecode /Length 367 >> stream x];n0D{B7ЏZڀn\$\@PaI"NbH@O # #'W9rtɕ+Ny(U\pc5ݢ!)1!)1!)1!)1!)1AD444 4P<5ϡx2iàIO?N.k nkqڴZ5n8ſm]慯R(endstream endobj 110 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 3454 >> stream xW TWڭ(QKUD%., ѸfG_7.]I BEcx:&Irsfy_ֽ[ZBdZww\hW@Ploqo0Q"L2&K~s3CRdi6i]k81aB*|ᬈL ٽ7g/A4WS|dns<qf-\(Sʼ66nXAn熷 )r'sd/G@PWHwٿ7 -^BSu4“N'6ބLl%Vw b5!F%Fc5Ao 1Ob"1U0%Wݒ/LJߖ^1]oja%J/Q#l"H-#{-3-_RыG@oځ@S^'iRAgQ(#(نD [ B:t-T#Q--M]^?H&4 4nZ+$q~@ ;iy't^B<ل9{ @&b "! 2gu뵟Ng=l82%b_)C+@$',8\NM0], GҦ/G Bᡋ\2@A*( 9/193|6@>c;>us|vt^O  )iVܘ|6 3@qG SPWG T_dr cwY(lx2v`' | tl.JCYWe7l_"N"{UJ8xQ }=܊qvkV9?4߸r__u]?E_o 4$^MѬΜy4(Œ! }0J)0IooՔYڪCB<!TA'J"ϊ)h^Y,%(4O_k@h3eav4&dWfڔ:URUgsM ÿdjrByQd$$DUۨĖ+ѵGv~:ޕjmK*jxc> s0Hb2؄iAZö#}2_+Pk(C+Ml@`ixb}l|S<FcPAhdzHc{#E'JߚiVO g+PY@aC|:=m_碠+;:5 =F5idg!־ TNOmRNŠXD~BZLsSS v-ʯ@{a+'Ȗ^T9 Zs!]`K.| eC\'cٸ[ÐrjMVKggaL>[o2,y=Au `QWkxwY:5e8!P*X36Ǧ f3 6#j(JdTL@REi2E_OUʷn?ĵGW|҂}ڽ\e>n~؜zxw5WS"fPYVz˘|wlbbbˎ3/;sMc`}g+Ϟ:y w|hPg0oY_֚ |@[g1Hd2923(LiiYA)N/ ^tPUA.-ۮMx>`lF'F3n`a7-߻*!kQEŋ۟ v"W\KGk~7'g|a- "낯HcJag@@Fۑ֟ LR"0:e}DRQu!$!MpWkLC:9C4< t\3r0( dM~fM|b>YD lRxs*)_D|H jwoUq!iL}\yBtfbZ)+IP&ae^洯B~aDvbQܧxlax0% S˗ }7fiCKʃUr>^("v\unG.TZ*)c_Ͷ7bh9@) G:4WOvwEzL^7Q)j13Vnn}i V[lKKC= u`,LٳL D{D恏qtIf'hFQNFlu a&##7 7 3nC^S7YdlZ.zO^Rx-}scw̍qS Iwgs<E5W)*bTn.m<ٳ SUJ.JZC0crVBuV=z}BTJ opl sMGMG { Gt<Н>\bœv0=}=kǙXZvҧ==|pkK{-w]{kA^N~@&66|x}l*`*PQYECB]DDJ}zӯhq-pqqso@z unڗ^,8 [F '5v`RMAFS)L,G?endstream endobj 111 0 obj << /Type /XRef /Length 134 /Filter /FlateDecode /DecodeParms << /Columns 4 /Predictor 12 >> /W [ 1 2 1 ] /Info 3 0 R /Root 2 0 R /Size 112 /ID [<9a5fa9f64ae90fa4eee29c353d1b7b3e><2b51c579ae70541efce161094902574f>] >> stream xcb&F~ c%/n3`T &? b}@ HLʁ(`LA,+$M!U$dA @q${"@& 3. kk endstream endobj startxref 63047 %%EOF RcppAnnoy/inst/tinytest/0000755000176200001440000000000014653657263015044 5ustar liggesusersRcppAnnoy/inst/tinytest/testOnDiskBuild.R0000644000176200001440000000211213541674477020233 0ustar liggesusers suppressMessages(library(RcppAnnoy)) f <- 2 # arbitrary a <- new(AnnoyEuclidean, f) diskfile <- tempfile(pattern="annoy", fileext="bin") expect_true(a$onDiskBuild(diskfile)) a$addItem(0, c(2, 2)) a$addItem(1, c(3, 2)) a$addItem(2, c(3, 3)) a$build(10) expect_equal(a$getNNsByVector(c(4,4), 3), c(2,1,0)) #, msg="getNNsByVector check 1") expect_equal(a$getNNsByVector(c(1,1), 3), c(0,1,2)) #, msg="getNNsByVector check 2") expect_equal(a$getNNsByVector(c(5,3), 3), c(2,1,0)) #, msg="getNNsByVector check 3") a$unload() a$load(diskfile) expect_equal(a$getNNsByVector(c(4,4), 3), c(2,1,0)) #, msg="getNNsByVector check 1") expect_equal(a$getNNsByVector(c(1,1), 3), c(0,1,2)) #, msg="getNNsByVector check 2") expect_equal(a$getNNsByVector(c(5,3), 3), c(2,1,0)) #, msg="getNNsByVector check 3") b <- new(AnnoyEuclidean, f) b$load(diskfile) expect_equal(b$getNNsByVector(c(4,4), 3), c(2,1,0)) #, msg="getNNsByVector check 1") expect_equal(b$getNNsByVector(c(1,1), 3), c(0,1,2)) #, msg="getNNsByVector check 2") expect_equal(b$getNNsByVector(c(5,3), 3), c(2,1,0)) #, msg="getNNsByVector check 3") RcppAnnoy/inst/tinytest/testManhattan.R0000644000176200001440000000460113465550702017771 0ustar liggesusers suppressMessages(library(RcppAnnoy)) ## test01getNNsByVector f <- 2 a <- new(AnnoyManhattan, f) a$addItem(0, c(2, 2)) a$addItem(1, c(3, 2)) a$addItem(2, c(3, 3)) a$build(10) checkEqual(a$getNNsByVector(c(4,4), 3), c(2,1,0), msg="getNNsByVector check 1") checkEqual(a$getNNsByVector(c(1,1), 3), c(0,1,2), msg="getNNsByVector check 2") checkEqual(a$getNNsByVector(c(5,3), 3), c(2,1,0), msg="getNNsByVector check 3") ## test02getNNsByItem f <- 2 a <- new(AnnoyManhattan, f) a$addItem(0, c(2, 2)) a$addItem(1, c(3, 2)) a$addItem(2, c(3, 3)) a$build(10) checkEqual(a$getNNsByItem(0, 3), c(0, 1, 2), msg="getNNsByItem check 1") checkEqual(a$getNNsByItem(2, 3), c(2, 1, 0), msg="getNNsByItem check 2") ## test03dist f <- 2 a <- new(AnnoyManhattan, f) a$addItem(0, c(0, 1)) a$addItem(1, c(1, 1)) a$addItem(2, c(0, 0)) checkEqual(a$getDistance(0, 1), 1.0, msg="distance 1")# checkEqual(a$getDistance(1, 2), 2.0, msg="distance 2")# ## test04largeIndex ## Generate pairs of random points where the pair is super close f <- 10 ##q <- rnorm(f, 0, 10) a <- new(AnnoyManhattan, f) set.seed(123) for (j in seq(0, 10000, by=2)) { p <- rnorm(f) x <- 1 + p + rnorm(f, 0, 1.0e-2) y <- 1 + p + rnorm(f, 0, 1.0e-2) a$addItem(j, x) a$addItem(j+1, y) } a$build(10) res <- TRUE for (j in seq(0, 10000, by=2)) { res <- res && all.equal(a$getNNsByItem(j, 2), c(j, j+1)) && all.equal(a$getNNsByItem(j+1, 2), c(j+1, j)) } checkTrue(res) ## test05precision precision <- function(n, nTrees=10, nPoints=10000, nRounds=3) { found <- 0 for (r in 1:nRounds) { ## create random points at distance x f <- 10 a <- new(AnnoyManhattan, f) for (j in seq(nPoints)) { p <- rnorm(f, 0, 1) nrm <- sqrt(sum(p^2)) x <- p / nrm + j a$addItem(j, x) } a$build(nTrees) nns <- a$getNNsByVector(rep(0, f), n) checkEqual(nns, nns[order(nns)], msg="checking precision order") # should be in order ## The number of gaps should be equal to the last item minus n-1 found <- found + length(nns[nns <= n]) } return(1.0 * found / (n * nRounds)) } checkTrue(precision(1) >= 0.98) #, msg="precision at 1") checkTrue(precision(10) >= 0.98) #, msg="precision at 10") checkTrue(precision(100) >= 0.98) #, msg="precision at 100") checkTrue(precision(1000) >= 0.98) #, msg="precision at 1000") RcppAnnoy/inst/tinytest/testEuclidean.R0000644000176200001440000000506113465550702017750 0ustar liggesusers suppressMessages(library(RcppAnnoy)) ## getNNsByVector f <- 2 a <- new(AnnoyEuclidean, f) a$addItem(0, c(2, 2)) a$addItem(1, c(3, 2)) a$addItem(2, c(3, 3)) a$build(10) checkEqual(a$getNNsByVector(c(4,4), 3), c(2,1,0), msg="getNNsByVector check 1") checkEqual(a$getNNsByVector(c(1,1), 3), c(0,1,2), msg="getNNsByVector check 1") checkEqual(a$getNNsByVector(c(4,2), 3), c(1,2,0), msg="getNNsByVector check 1") ## getNNsByItem f <- 2 a <- new(AnnoyEuclidean, f) a$addItem(0, c(2, 2)) a$addItem(1, c(3, 2)) a$addItem(2, c(3, 3)) a$build(10) checkEqual(a$getNNsByItem(0, 3), c(0, 1, 2), msg="getNNsByItem check 1") checkEqual(a$getNNsByItem(2, 3), c(2, 1, 0), msg="getNNsByItem check 2") ### test03dist f <- 2 a <- new(AnnoyEuclidean, f) a$addItem(0, c(0, 1)) a$addItem(1, c(1, 1)) a$addItem(2, c(0, 0)) checkEqual(a$getDistance(0, 1), 1.0^0.5, msg="distance 1") checkEqual(a$getDistance(1, 2), 2.0^0.5, msg="distance 2", tolerance=1e-6) ## test04largeIndex ## Generate pairs of random points where the pair is super close f <- 10 #q <- rnorm(f, 0, 10) a <- new(AnnoyEuclidean, f) set.seed(123) for (j in seq(0, 10000, by=2)) { p <- rnorm(f) x <- 1 + p + rnorm(f, 0, 1.0e-2) y <- 1 + p + rnorm(f, 0, 1.0e-2) a$addItem(j, x) a$addItem(j+1, y) } a$build(10) res <- TRUE for (j in seq(0, 10000, by=2)) { #expect_equal(a$getNNsByItem(j, 2), c(j, j+1), msg="getNNsByItem check1") #expect_equal(a$getNNsByItem(j+1, 2), c(j+1, j), msg="getNNsByItem check1") res <- res && all.equal(a$getNNsByItem(j, 2), c(j, j+1)) && all.equal(a$getNNsByItem(j+1, 2), c(j+1, j)) } expect_true(res) ## test05precision precision <- function(n, nTrees=10, nPoints=10000, nRounds=3) { found <- 0 for (r in 1:nRounds) { ## create random points at distance x f <- 10 a <- new(AnnoyEuclidean, f) for (j in seq(nPoints)) { p <- rnorm(f, 0, 1) nrm <- sqrt(sum(p^2)) x <- p / nrm * j a$addItem(j, x) } a$build(nTrees) nns <- a$getNNsByVector(rep(0, f), n) checkEqual(nns, nns[order(nns)], msg="checking precision order") # should be in order ## The number of gaps should be equal to the last item minus n-1 found <- found + length(nns[ nns <= n]) } return(1.0 * found / (n * nRounds)) } checkTrue(precision(1) >= 0.98)#, msg="precision at 1") checkTrue(precision(10) >= 0.98)#, msg="precision at 10") checkTrue(precision(100) >= 0.98)#, msg="precision at 100") checkTrue(precision(1000) >= 0.98)#, msg="precision at 1000") RcppAnnoy/inst/tinytest/testSeeds.R0000644000176200001440000000152313743414424017120 0ustar liggesusers ## if this is set (eg .travis.yml) then run the test if (Sys.getenv("RunAllRcppAnnoyTests") != "yes") exit_file("Skip this test") suppressMessages(library(RcppAnnoy)) f <- 2 set.seed(123456) # R Seed for next two vectors n <- 100 x <- rnorm(n) y <- rnorm(n) v1 <- new(AnnoyHamming, f) v1$setSeed(123) for (i in 1:n) v1$addItem(i-1, c(x[i], y[i])) v1$build(f) v2 <- new(AnnoyHamming, f) v2$setSeed(456) # different for (i in 1:n) v2$addItem(i-1, c(x[i], y[i])) v2$build(f) v3 <- new(AnnoyHamming, f) v3$setSeed(123) # as first for (i in 1:n) v3$addItem(i-1, c(x[i], y[i])) v3$build(f) checkEqual(v1$getNNsByVector(c(0.5,0.5), 20), v3$getNNsByVector(c(0.5,0.5), 20)) # msg="v1 and v3 are equal") checkTrue(any(v1$getNNsByVector(c(0.5,0.5), 20) != v2$getNNsByVector(c(0.5,0.5), 20))) # msg="v1 and v2 are not equal") RcppAnnoy/inst/tinytest/testHamming.R0000644000176200001440000000527113465550702017442 0ustar liggesusers suppressMessages(library(RcppAnnoy)) # test01getNNsByVector f <- 2 a <- new(AnnoyHamming, f) a$addItem(0, c(2, 2)) a$addItem(1, c(3, 2)) a$addItem(2, c(3, 3)) a$build(10) checkEqual(a$getNNsByVector(c(4,4), 3), c(0,1,2), msg="getNNsByVector check 1") checkEqual(a$getNNsByVector(c(1,1), 3), c(2,1,0), msg="getNNsByVector check 2") checkEqual(a$getNNsByVector(c(5,3), 3), c(2,1,0), msg="getNNsByVector check 3") # test02getNNsByItem f <- 2 a <- new(AnnoyHamming, f) a$addItem(0, c(2, 2)) a$addItem(1, c(3, 2)) a$addItem(2, c(3, 3)) a$build(10) checkEqual(a$getNNsByItem(0, 3), c(0, 1, 2), msg="getNNsByItem check 1") checkEqual(a$getNNsByItem(2, 3), c(2, 1, 0), msg="getNNsByItem check 2") # test03dist f <- 2 a <- new(AnnoyHamming, f) a$addItem(0, c(0, 1)) a$addItem(1, c(1, 1)) a$addItem(2, c(0, 0)) checkEqual(a$getDistance(0, 1), 1.0, msg="distance 1")# checkEqual(a$getDistance(1, 2), 2.0, msg="distance 2")# ## test04largeIndex <- function() { ## ## Generate pairs of random points where the pair is super close ## f <- 10 ## #q <- rnorm(f, 0, 10) ## a <- new(AnnoyHamming, f) ## set.seed(123) ## for (j in seq(0, 10000, by=2)) { ## p <- rnorm(f) ## x <- 1 + p + rnorm(f, 0, 1.0e-2) ## y <- 1 + p + rnorm(f, 0, 1.0e-2) ## a$addItem(j, x) ## a$addItem(j+1, y) ## } ## a$build(10) ## for (j in seq(0, 10000, by=2)) { ## checkEquals(a$getNNsByItem(j, 2), c(j, j+1), msg="getNNsByItem check 1") ## checkEquals(a$getNNsByItem(j+1, 2), c(j+1, j), msg="getNNsByItem check 2") ## } ## } ## test05precision <- function() { ## precision <- function(n, nTrees=10, nPoints=10000, nRounds=3) { ## found <- 0 ## for (r in 1:nRounds) { ## ## create random points at distance x ## f <- 10 ## a <- new(AnnoyHamming, f) ## for (j in seq(nPoints)) { ## p <- rnorm(f, 0, 1) ## nrm <- sqrt(sum(p^2)) ## x <- p / nrm + j ## a$addItem(j, x) ## } ## a$build(nTrees) ## nns <- a$getNNsByVector(rep(0, f), n) ## checkEquals(nns, nns[order(nns)], msg="checking precision order") # should be in order ## ## The number of gaps should be equal to the last item minus n-1 ## found <- found + length(nns[nns <= n]) ## } ## return(1.0 * found / (n * nRounds)) ## } ## checkTrue(precision(1) >= 0.98, msg="precision at 1") ## checkTrue(precision(10) >= 0.98, msg="precision at 10") ## checkTrue(precision(100) >= 0.98, msg="precision at 100") ## checkTrue(precision(1000) >= 0.98, msg="precision at 1000") ## } RcppAnnoy/inst/tinytest/testDotProduct.R0000644000176200001440000000401014653657263020151 0ustar liggesusers suppressMessages(library(RcppAnnoy)) f <- 3 a <- new(AnnoyDotProduct, f) a$addItem(0, c(0,0,1)) a$addItem(1, c(0,1,0)) a$addItem(2, c(1,0,0)) a$build(10) checkEqual(a$getNNsByVector(c(3,2,1), 3), c(2,1,0), msg="getNNsByVector check 1") checkEqual(a$getNNsByVector(c(1,2,3), 3), c(0,1,2), msg="getNNsByVector check 1") checkEqual(a$getNNsByVector(c(2,0,1), 3), c(2,0,1), msg="getNNsByVector check 1") f <- 3 a <- new(AnnoyDotProduct, f) a$addItem(0, c(2,1,0)) a$addItem(1, c(1,2,0)) a$addItem(2, c(0,0,1)) a$build(10) checkEqual(a$getNNsByItem(0, 3), c(0,1,2), msg="getNNsByItem check1") checkEqual(a$getNNsByItem(1, 3), c(1,0,2), msg="getNNsByItem check2") f <- 2 a <- new(AnnoyDotProduct, f) a$addItem(0, c(0, 1)) a$addItem(1, c(1, 1)) checkEqual(a$getDistance(0, 1), 0 * 1 + 1 * 1, msg="distance 1", tolerance=1e-6) f <- 2 a <- new(AnnoyDotProduct, f) a$addItem(0, c(1000, 0)) a$addItem(1, c(10, 0)) checkEqual(a$getDistance(0, 1), 1000 * 10 + 0 * 0, msg="distance 2", tolerance=1e-6) f <- 2 a <- new(AnnoyDotProduct, f) a$addItem(0, c(97, 0)) a$addItem(1, c(42, 42)) d <- 97 * 42 + 0 * 42 checkEqual(a$getDistance(0, 1), d, msg="distance 3", tolerance=1.0e-6) f <- 2 a <- new(AnnoyDotProduct, f) a$addItem(0, c(1, 0)) a$addItem(1, c(0, 0)) checkEqual(a$getDistance(0, 1), 0, msg="distance 4", tolerance=1.0e-6) ## Generate pairs of random points where the pair is super close f <- 10 a <- new(AnnoyDotProduct, f) set.seed(123) for (j in seq(0, 10000, by=2)) { p <- rnorm(f) f1 <- runif(1) + 1 f2 <- runif(1) + 1 x <- f1 * p + rnorm(f, 0, 1.0e-2) y <- f2 * p + rnorm(f, 0, 1.0e-2) a$addItem(j, x / norm(x, "2")) a$addItem(j+1, y/norm(y, "2")) } a$build(10) res <- TRUE for (j in seq(0, 10000, by=2)) { #expect_equal(a$getNNsByItem(j, 2), c(j, j+1), msg="getNNsByItem check1") #expect_equal(a$getNNsByItem(j+1, 2), c(j+1, j), msg="getNNsByItem check1") res <- res && all.equal(a$getNNsByItem(j, 2), c(j, j+1)) && all.equal(a$getNNsByItem(j+1, 2), c(j+1, j)) } checkTrue(res) RcppAnnoy/inst/tinytest/testAngular.R0000644000176200001440000000374213465550702017454 0ustar liggesusers suppressMessages(library(RcppAnnoy)) f <- 3 a <- new(AnnoyAngular, f) a$addItem(0, c(0,0,1)) a$addItem(1, c(0,1,0)) a$addItem(2, c(1,0,0)) a$build(10) checkEqual(a$getNNsByVector(c(3,2,1), 3), c(2,1,0), msg="getNNsByVector check 1") checkEqual(a$getNNsByVector(c(1,2,3), 3), c(0,1,2), msg="getNNsByVector check 1") checkEqual(a$getNNsByVector(c(2,0,1), 3), c(2,0,1), msg="getNNsByVector check 1") f <- 3 a <- new(AnnoyAngular, f) a$addItem(0, c(2,1,0)) a$addItem(1, c(1,2,0)) a$addItem(2, c(0,0,1)) a$build(10) checkEqual(a$getNNsByItem(0, 3), c(0,1,2), msg="getNNsByItem check1") checkEqual(a$getNNsByItem(1, 3), c(1,0,2), msg="getNNsByItem check2") f <- 2 a <- new(AnnoyAngular, f) a$addItem(0, c(0, 1)) a$addItem(1, c(1, 1)) checkEqual(a$getDistance(0, 1), (2.0 * (1.0 - 2^(-0.5)))^0.5, msg="distance 1", tolerance=1e-6) f <- 2 a <- new(AnnoyAngular, f) a$addItem(0, c(1000, 0)) a$addItem(1, c(10, 0)) checkEqual(a$getDistance(0, 1), 0, msg="distance 2") f <- 2 a <- new(AnnoyAngular, f) a$addItem(0, c(97, 0)) a$addItem(1, c(42, 42)) d <- ((1 - 2^(-0.5))^2 + (2^(-0.5))^2)^0.5 checkEqual(a$getDistance(0, 1), d, msg="distance 3", tolerance=1.0e-6) f <- 2 a <- new(AnnoyAngular, f) a$addItem(0, c(1, 0)) a$addItem(1, c(0, 0)) checkEqual(a$getDistance(0, 1), 2.0^0.5, msg="distance 4", tolerance=1.0e-6) ## Generate pairs of random points where the pair is super close f <- 10 a <- new(AnnoyAngular, f) set.seed(123) for (j in seq(0, 10000, by=2)) { p <- rnorm(f) f1 <- runif(1) + 1 f2 <- runif(1) + 1 x <- f1 * p + rnorm(f, 0, 1.0e-2) y <- f2 * p + rnorm(f, 0, 1.0e-2) a$addItem(j, x) a$addItem(j+1, y) } a$build(10) res <- TRUE for (j in seq(0, 10000, by=2)) { #expect_equal(a$getNNsByItem(j, 2), c(j, j+1), msg="getNNsByItem check1") #expect_equal(a$getNNsByItem(j+1, 2), c(j+1, j), msg="getNNsByItem check1") res <- res && all.equal(a$getNNsByItem(j, 2), c(j, j+1)) && all.equal(a$getNNsByItem(j+1, 2), c(j+1, j)) } checkTrue(res) RcppAnnoy/inst/tinytest/testVignette.R0000644000176200001440000000130613756552014017643 0ustar liggesusers ## See #66 for the idea and discussion vigfile <- system.file("rmd", "UsingAnnoyInCpp.Rmd", package="RcppAnnoy") if (!file.exists(vigfile)) exit_file("No vignette source found. What's up with that?") lines <- readLines(vigfile) starts <- which(lines == "```{Rcpp, eval=FALSE}") ends <- which(lines=="```") ends <- ends[findInterval(starts, ends)+1] code <- lines[unlist(mapply(seq, starts+1, ends-1))] res <- Rcpp::sourceCpp(code=paste(code, collapse="\n")) # checks everything is compileable. expect_equal(res$functions, "thingy") # check we got a function compiled set.seed(42) mat <- matrix(runif(1000), 100) Q <- matrix(runif(100), 10) res <- thingy(mat, 1, 10, Q, tempfile()) expect_equal(res, 1) RcppAnnoy/inst/tinytest/data/0000755000176200001440000000000013465550702015743 5ustar liggesusersRcppAnnoy/inst/tinytest/data/test.tree0000644000176200001440000004461013465550702017610 0ustar liggesusers>?O>a8վ+x2;w? r[?vVz?| sT7>^C>3>L8?ɺUE!=?]=A>o?׿1UD A>V>i.<\;┿pњ}Ҋj鿫v^ j>?2T?M?'OV45@\?@Q e?5?<= &#?Q?{?iҾI?{Z忏??⼟]?-'?8%>Ԛ:o>\??bZ~ؾ>B` Pgmc?֤>6x??ޒ??z2 ?vUp?s? $>:?&$) >:=>>QN?ۿh}>0B?qLp>BI?k^>q(u?ޜ%>P%-K?@)B=5@Ym>yn>̔1>+-T+N? ?K{=kE>)=ntq?= N>#>e?ܻTu?e"=ܺ>< >EV{R{?#>?v>z{,=(?=E=D?ˎc~?D?a%濨4?-S tо~5(9)?)m>f>=␾$$̜پ?\?yCc=߉*ג>T>[@X> ?2/SKo->> =6?h,%?1ܿC(>IzffuBo-Կ I?(ҿ g>蝎?@]?s.?HBK>9?Kd- ??q[`?A{F?W @q>Mtal(W=n?R,?UqT͗ -R?⳿C7@phh?t>1>V?>8\?͓@褁=?t??:"T>@Y<,ǿ>??x6xþ>@gO>G?sm d==?7U"RB?x @LL *C?=T?#Y??iT >Zt? UjB?$""Of@!?-9oC`{?5=#? Yf>>jO @Ŀ>+p)VF>/|?E˽bz\?iɿ1}f>}aٿ*/o=$?bvɽҬ,?W=jLU?~qv? "r?>?e ?Ǔ@?ah^81?!s?J_)?-ξ݌?I+>G ¿:毲>̍?40?ж\jݮ:>W?3?l?>@ھ>B~0>)S= ?k?5K =v?J=*2ۿ`B?>d^UܿS[?v>Htk2h_@ɠ~Y=[\i:??n?{?bò>㳿biyqs?$)Ć?>`=c<0?sojg?*J?rfxT?'4>8M >y<q|.i?F79>wܾmKg> +?7r> 6>[ >ɪTU5?'?}}IokZ?@yKt?a?=?"U>: >jd?܈>Np??Mv;Ay*>f%AK?q+]?ϔ?V?^S[a*ayUa??>R>;NP?mƿMe,s>Z6þ{?8,)oUL4?!3>wzx?">=%??F2MM\?V_ƀO?Dd? ?)U uDZ~naF?f:?mDb?fkֿ\R?mB?Ͼ42 @(&>X-hSC? FQChj!"@ݬMh?>>ɑL0ռ(?x(?e@v=Ȣ?Yg=s?ܿ8ÞR}M?ys~ó?Ad˶6w??>lۂ>\sx[#!&ן=2#-?M?j>_+?4̽*doK>zȥb?νSQ?ty?w9>a4@r@T@4Xؾ8?x~d?ȗCK/>Ͽ ?K?پ?XF? =>?R?^ο`?;S^??g?@yn? `?oi?L!?3q惿2?{ ?=F=Av?h]?#=(?٭>bb?,> ȿQ?6=~!=/R&>.{>?>?޿A>X?7>/§?>s}?e=V!P{ƾ}B*>O=i:7?A<?a:^ _!/?: ϗyI<̿I1f[>j9.6.-?# A?\L?>{>_zXMk۾Y)<Ƿ?IV<{v>Ma$@ʰ?Yw"99>KAžO?5=&Z?.?Ů>? l?&3k>r񿧘,S?ڳ&ڎܼ}rcmǾƽA4=nZ3?)xH?f.?yJU>4?S" >ȆS?6Y?/?Kw>T0?򒗿u_vĿ"@?z<>Gb$?L n?@?stތ m4#W?X ?aZ>C:|]/%>{`#4+>쿺>P?8!?>1>?=.@[K:^a=m?b?#??cϾh=,S hB?U%>h@o߾ ` ި?lKWlտc?8=Z?yd/ ?Y?>5myo]?n>W?5?8b~a>࿁?{??E'X>yJ|\=c̍?͟k ?%?҇?߷?-\>ߚ>`!0o=<?y@Yu? rGVդ?RZ۾D?Ntֽj~17S$L><Ҿ֠)T?t@Ĉ|X̛N?ed?*m#!=܌6der8{=e *>q? ?92>׽+p* :5fi_@>F>Z=ʌ *Z?S%>8ֽgh!k>ܳ>%9?;k Ż=6 ;+x/==eU  #'+283@AOV%jk?< <=>kC>pa4t>Cٽ &146T[lo!Yk>w=>BL>\^Y᥾mn=Q(@km>+jR/%>> */;EU\b-57BLpq'罓˾J>i$=g)>>/=?>?MPQa !%KRXY`/sxc> ba{_:Q>=±>S>=twSo >J>(<]k^=>k> g,?uv:Jcy~ ?7¼5>/>Ĉ>>z{:A;8N>(N>BS(}< [= $9CFS|}q|;}>logڽw~ႾMY>1 ()G  ".:=^ZdN>r@AxlV>>>fI>Q]?5O>}{B}A>(=p;K=VC> ~>q= g>W>Հ_0f^= &*?MPUXbg=^>s\- O>?v"2;_ ,.BDQ[c0z}?%OyپrEB'>1>տ1̾=W)?9K=ϤY> -4:ENRTY\%9K`'/CHOVZa/>= ?=~Xq?žwpo=='oZY2̜Lo>7"^¾ lD4ᾝ$> ?>9=ݮ>> D iJ>+ď>  <JL  +F]^ !$1W'>蟢Ⱦ#Ž߲>, (?W>DѾΝ> `>NJ1>p4%?NK!&=:W=F%涽=R #6=>AI(57 )038@GSd5R>Cޅ>>l>bʻ= >,0߾M>>,l>??ol7> >G>>X> >xg=wٽ8fyW3dh(&>Ã>  <CDPVa^'>d&}FKAd]>;þcҊ> */?OU38@AM$?:>ö;=H@><>(> >^%=#?  '2W]c OK/>>u=#>;F>h \=y*WE<#)+>GI&(5J_4m=-о9)?v> MK!BE=.>D> >[@d;JQ'>~V|>_>s>.9QZ.xY>im>N!?Ru> L۽Ͼ yT>~= $6=KLT ,07:HS,=8>=LJ>p? >| =h ;?TO>  "1;FN[^>?d7 1zj4¼tRSj = (W!+τ>z 뾆6f>?nS !B %-4ERX`bY\dM >H>^8\o>MF=Ho>t(>.;ۚ>3`v)=)})[a=ꤾA >ˇ*> KS >eᘾ傼L7X'==>$=+qԀ½=@v=_cz۾2? >F]_ '2<GJNW^",9V>J-[=gX>Ǿ23Vd;y #+11=>SYb=!# =0< I>>X  $()DI0:=H6>GO>9=`> >FB>D =91?ˎоT2>)*b.^ >j>chܬ߾Q> 8>{>#J>.J>}BrTg>>F>yE\s>CF8f= %-CMY`c !57;?@L\7 XD= ̊NQOj=J$?Zx>hb> ?%>?>g9п *.KQRSX4BU[b TR#7?d8>Uy)>L>F7ؾO>q3=-5>ӵ &/368AEOPad" X/2P=*1D0\2.+QYt'>B\",>9-?Q?h=*>پxm*=μ$n=YJo'>c>2;?NW5Y^_c|Zݼ9"> ߽:B<>|W?<?zY{l?z>tֽz;.3=?Q(.GR ,7>KMUZ` "*BCQX !/9:S1֩>j>=x(L?V)C:">3>>E=}ü4]$?>9ؾF[=5>=kT=})gD>= )&  $'DJP]a >Nї><P>cK>?[*=f׾<%4OV[\b 038<AE>QB)?tZ>4뼬`CV%J>u?=  #+-1=IL )6@FHTd?5 R>)肾_>Oc=N>rk>᪾;<c5>gk>a=&3>#?3>A>}>aB4>I:ꮽ:.>$(#=&+IT쓹= |v#=C>>p*?2>0== #=BN > ?f׾~>&g=\b!>=c^>3= '1DJR["4PQX\b K美>Oz6lս&H>`ʽ(rL{'-?Гb>?pv=\ݎ>H=¾7> %(6:YZ^`*5OUck >UC>_$)>se>)?q{<C>-.2KM ,7;(͎>P>?d:>.5?-۾}g>Eb?>ؾdv>>aP$9a/?CEV]>ʽU>rbj2j}z[!"L̽a0k=zw>x>>7>+0x>3o>'>K\=s <q>]=>1B?W_ !)<>FGS038@AHdڟľB>=>Q\gL>㇙>D?>4}u/4?Q>">->R?h>(7T= >I> ,n}҂= >\~C=b t>6ͼr=4T@S?u > . -.5KMSY`c %36U!79Z4T?倽gkJ$Ͻq*>,35P>'> ;Nm?:BY>;M=7)>ey#(Gt=1Ћ<} >c.>n>m=>=3>> 0:@HTW^ )IL5+b=>:Hv>h ȾFо:= 𽷇 UXs &L;4U$Q?=>l> )K>BE  >4r\⾄L%>z$>)>p1P6 FJ   |>+ykxt>x>`>kܽ=Ƹ2?C "$+18<=>]=3> >k=ʉ%>Y>ϙ>]>'%>ga†=F8a>+`DV_ *,OQX[43Oa>P 5$hS>J$潉<>/4A\ab &'PRd! *婾pۼn=YC˾o !%?EZ>(ƴCd?3=?lV߽DA>¾>Խ>d=ܒQt;IF> H]> ?˲>'%;#i=U'i~<[_\='<D $)0:@IL i=ATYܽ`>*1=FGBZ=?UW=H>>þ!3=d>>(p?"QW_  8AFTaE<"%֐X*A-' >T>%+&[?Wp#$?H;\y\z>eH?Ƚ>=V=  .17Sc#(,5;G*&+uHo,IK-lj8>;u'(]i߾3Ymﵾnо>=ż= \>!/B)*VV{۾#<->Ze=B=(yɽ,IX> &?U 3Y^`,-M=r!=D7=n c46NZ./ _>WE=d5#=EԮ: ?Mžc%-KMRX\ *+2CJOPV[bd1>$.- 8A>ۛ6ৼTa>%>229 >L>!> dW{`⾿Q3=sdɾRS38'w'ML X;נ/ +Hi'Ͼ45i#?μ =g>>XI>a>6q9>A[>=(67$v>>32>< pw !7FS5=>B"KL:==NJ>2$o>Su% >c_>r>8 ?;<~/%dDq7>>=ns&{μ>bv/0;@ ,9:EQTXZ\ 6HUY_2?B*Nm=2l>wt/(i?Ʀ޾a@Ais>OJc>i>0JT(Q]+?\S>|r=).<G^%*34IOV$CFM<ȫ/ ;[>}:==s rP<0>DE(O4[jWT>-wcCřLk>&]b 28?ACJP[aGH7>;<Ҿ=?ʾ&P>vͼF8H>$=Ժ= DMNWc   #$'+-1R`dJQs> (\>z>„U쾤>$K=2Ӿ)KNf>VH=$>LM1>=+!f>7ARý%8>ϋ(86 z^>  "5BXZ )79;STW^OP=z5=$c>?0I>m>J姼 &*+?QUY_ (.28GMc;R]\ݼ>ᖾ彘?>iu>)r>i:?-SZ:- >S9J=ǧĽ4>=9fl?TW>F>z&4>P>o9>U>,OUV]4.<݅>x((?ᖾ<2;>y>M>Β=!%/0KL-E\`aXYNh>:`${.ɽҜ=.*ДƊE[a% =FHPVb 1:N[\Q>;> ?2ӭ)>>:>0B $'@A[,<>C]^_M;h~>ԨndE c{o=p>='4 #36I 4DJORder8{=e *>q? ?92>׽+p* :dN>r@AxlV>>>fI>Q]?d5R>Cޅ>>l>bʻ= >,dM >H>^8\o>MF=Ho>t(>d" X/2)肾_>Oc=N>rk>᪾;dڟľB>=>Q\gL>㇙>D?>4}ud! *婾pۼn=YC˾o !%?EZ>d1>$.- 8A>ۛ6ৼTa>%>dJQs> (\>z>„U쾤>$K=2ӾRcppAnnoy/inst/tinytest/testIndex.R0000644000176200001440000000215113541674477017136 0ustar liggesusers ## if this is set (eg .travis.yml) then run the test if (Sys.getenv("RunAllRcppAnnoyTests") != "yes") exit_file("Skip this test") suppressMessages(library(RcppAnnoy)) a <- new(AnnoyAngular, 10) a$load(system.file("tinytest", "data", "test.tree", package="RcppAnnoy")) ## This might change in the future if we change the search ## algorithm, but in that case let's update the test checkEqual(a$getNNsByItem(0, 10), c(0, 85, 42, 11, 54, 38, 53, 66, 19, 31), msg="check loaded index") a <- new(AnnoyEuclidean, 10) v <- rnorm(10) expect_error(a$addItem(-2, v))#, msg="check negative index", silent=TRUE) expect_error(a$addItem(NA, v))#, msg="check NA index", silent=TRUE) ## modeled after annoy_test.py() and its t i <- new(AnnoyAngular, 10) i$load(system.file("tinytest", "data", "test.tree", package="RcppAnnoy")) u <- i$getItemsVector(99) i$save(tempfile()) v <- i$getItemsVector(99) checkEqual(u, v, msg="getItemVector comparison") j <- new(AnnoyAngular, 10) j$load(system.file("tinytest", "data", "test.tree", package="RcppAnnoy")) w <- i$getItemsVector(99) checkEqual(u, w, msg="getItemVector comparison") RcppAnnoy/inst/NEWS.Rd0000644000176200001440000001670715131020653014212 0ustar liggesusers\name{NEWS} \title{News for Package \pkg{RcppAnnoy}} \newcommand{\ghpr}{\href{https://github.com/eddelbuettel/rcppannoy/pull/#1}{##1}} \newcommand{\ghit}{\href{https://github.com/eddelbuettel/rcppannoy/issues/#1}{##1}} \section{Changes in version 0.0.23 (2026-01-12)}{ \itemize{ \item Add dot product distance metrics (Benjamin James in \ghpr{78}) \item Apply small polish to the documentation (Dirk closing \ghit{79}) \item A new \code{demo()} has been added (Samuel Granjeaud in \ghit{79}) \item Switch to Authors@R in DESCRIPTION \item Several updates to continuous integration and README.md \item Small enhancements to package help files \item Updates to vignettes and references \item Vignette now uses \code{Rcpp::asis} builder (Dirk in \ghpr{80}) \item Switch one macro to a function to avoid a compiler nag (Amos Elberg in \ghpr{81}) } } \section{Changes in version 0.0.22 (2024-01-23)}{ \itemize{ \item Replace empty examples macro to satisfy CRAN request. } } \section{Changes in version 0.0.21 (2023-07-02)}{ \itemize{ \item The build setup switched from C++11 to C++17 which offers threading support (which remains off by default to ensure consistent results) \item Upstream code was updated to Annoy 1.17.3, the switch to an explicit C++ namespace has been accomodated (Dirk in \ghpr{75}) } } \section{Changes in version 0.0.20 (2022-10-27)}{ \itemize{ \item Minor tweaks to appease \code{clang-15} and \code{Xcode 14} } } \section{Changes in version 0.0.19 (2021-07-30)}{ \itemize{ \item Minor tweaks to default CI setup and DESCRIPTION file } } \section{Changes in version 0.0.18 (2020-12-15)}{ \itemize{ \item Small tweaks to threading policy header defines (Dirk closing \ghit{65}) \item Vignette code is again compiled during testing (Aaron Lum and Dirk in \ghpr{66} addressing \ghit{64}) \item Upstream code (with Aaron's PR) was synchronized once more (Dirk in \ghpr{67}) \item A new helper function was added to report the Annoy version (Aaron in \ghpr{68}) } } \section{Changes in version 0.0.17 (2020-11-15)}{ \itemize{ \item Upgrade to Annoy 1.17, but default to serial use. \item Add new header file to regroup includes and defines. \item Upgrade CI script to use R with bspm on focal. } } \section{Changes in version 0.0.16 (2020-03-06)}{ \itemize{ \item Use \code{int} in two interfaces (Dirk in \ghpr{59} for upstream PR 460 and closing \ghit{56}). \item Use \code{inline} for two helper functions (Dirk in \ghpr{59} for upstream PR 461 and closing \ghit{57}; also Aaron in \ghpr{58} after earlier discussion). \item Removed a noisy \code{pragma} (Dirk in \ghpr{60} for upstream PR 462). \item Add a simple helper function displaying compiler status. } } \section{Changes in version 0.0.15 (2020-02-25)}{ \itemize{ \item RcppAnnoy synchronized with upstream PR 455 (Dirk in \ghpr{55}). \item The help page has a small correction thanks to Bill1 Venables. \item The \code{alloca()} function is now declared portably thanks to a working example in \emph{Writing R Extensions}. } } \section{Changes in version 0.0.14 (2019-11-11)}{ \itemize{ \item RcppAnnoy again synchronized with upstream to ensure builds with older compilers without AVX512 instructions (Dirk \ghpr{53}). \item The \code{cleanup} script only uses \code{/bin/sh}. } } \section{Changes in version 0.0.13 (2019-09-23)}{ \itemize{ \item In example(), the saved and loaded filename is now obtained via tempfile() to not touch user directories per CRAN Policy (Dirk). \item RcppAnnoy was again synchronized with Annoy upstream leading to enhanced performance and more features (Dirk \ghpr{48}). \item Minor changes made (and send as PRs upstream) to adapt both \code{annoylib.h} and \code{mman.h} changes (Dirk). \item A spurious command was removed from one vignette (Peter Hickey in \ghpr{49}). \item Two new user-facing functions onDiskBuild() and unbuild() were added (Dirk in \ghpr{50}). \item Minor tweaks were made to two tinytest-using test files (Dirk). } } \section{Changes in version 0.0.12 (2019-05-12)}{ \itemize{ \item Allow setting of seed (Dirk in \ghpr{41} fixing \ghit{40}). \item Document \code{setSeed} (James Melville in \ghpr{42} documenting \ghit{41}). \item Added documentation (Adam Spannbauer in \ghpr{44} closing \ghit{43}). \item Switched unit testing to the new \pkg{tinytest} package (Dirk in \ghpr{45}). \item The vignette is now pre-made in included as-is in Sweave document reducing the number of suggested packages. } } \section{Changes in version 0.0.11 (2018-10-30)}{ \itemize{ \item Synchronized with Annoy upstream (\ghpr{26}, \ghpr{30}, \ghpr{36}). \item Added new Hamming distance measure functionality; should be considered experimental as the functionality depends on integer values. \item Travis CI use was updated to the R 3.5 PPA (\ghpr{28}) \item New vignette about Annoy use from C++ via Rcpp (Aaron Lun in \ghpr{29} addressing \ghit{19}; also \ghpr{32}, \ghpr{33}) \item The vignette was rewritten using \pkg{pinp} (\ghpr{34}, \ghpr{35}). } } \section{Changes in version 0.0.10 (2017-09-25)}{ \itemize{ \item The \code{getItemsVector()} function no longer crashes (\ghit{24}) } } \section{Changes in version 0.0.9 (2017-08-31)}{ \itemize{ \item Synchronized with Annoy upstream version 1.9.1 \item Minor updates in calls and tests as required by annoy 1.9.1 \item New Manhattan distance modules along with unit test code \item Additional unit tests from upstream test code carried over \item Binary mode is used for \code{save} (as suggested by @khoran in \ghit{21}) \item A new file \code{init.c} was added with calls to \code{R_registerRoutines()} and \code{R_useDynamicSymbols()} \item Symbol registration is enabled in \code{useDynLib} } } \section{Changes in version 0.0.8 (2016-10-01)}{ \itemize{ \item New functions \code{getNNsByItemList} and \code{getNNsByVectorList}, from by Michael Phan-Ba in \ghit{12} \item Added destructor (PR \ghpr{14} by Michael Phan-Ba) \item Extended templatization (PR \ghpr{11} by Dan Dillon) \item Switched to \code{run.sh} for Travis (PR \ghpr{17}) \item Added test for admissible value to \code{addItem} (PR \ghpr{18} closing issue \ghit{13}) } } \section{Changes in version 0.0.7 (2015-11-15)}{ \itemize{ \item Synchronized with Annoy upstream changes \item Minor internal changes required by Annoy changes } } \section{Changes in version 0.0.6 (2015-05-03)}{ \itemize{ \item Synchronized with Annoy upstream changes \item Permit supplying our own RNG \item Minor internal changes } } \section{Changes in version 0.0.5 (2015-01-22)}{ \itemize{ \item Synchronized with Annoy upstream changes } } \section{Changes in version 0.0.4 (2015-01-22)}{ \itemize{ \item Synchronized with Annoy upstream changes \item Depends on R (>= 3.1) to permit C++11 builds } } \section{Changes in version 0.0.3 (2014-11-17)}{ \itemize{ \item Synchronized with Annoy upstream changes \item Windows support added (Qiang Kou in \ghpr{2}) } } \section{Changes in version 0.0.2 (2014-11-13)}{ \itemize{ \item Added verbosity toggle \item Added regression tests } } \section{Changes in version 0.0.1 (2014-11-08)}{ \itemize{ \item Initial release } } RcppAnnoy/README.md0000644000176200001440000000467515131017743013460 0ustar liggesusers## RcppAnnoy: Rcpp bindings for [Annoy](https://github.com/spotify/annoy) [![CI](https://github.com/eddelbuettel/rcppannoy/workflows/ci/badge.svg)](https://github.com/eddelbuettel/rcppannoy/actions?query=workflow%3Aci) [![License](https://eddelbuettel.github.io/badges/GPL2+.svg)](https://www.r-project.org/Licenses/GPL-2) [![CRAN](https://www.r-pkg.org/badges/version/RcppAnnoy)](https://cran.r-project.org/package=RcppAnnoy) [![r-universe](https://eddelbuettel.r-universe.dev/badges/RcppAnnoy)](https://eddelbuettel.r-universe.dev/RcppAnnoy) [![Dependencies](https://tinyverse.netlify.app/badge/RcppAnnoy)](https://cran.r-project.org/package=RcppAnnoy) [![Downloads](https://cranlogs.r-pkg.org/badges/RcppAnnoy?color=brightgreen)](https://www.r-pkg.org:443/pkg/RcppAnnoy) [![Last Commit](https://img.shields.io/github/last-commit/eddelbuettel/rcppannoy)](https://github.com/eddelbuettel/rcppannoy) ### What is Annoy? [Annoy](https://github.com/spotify/annoy) is a small, fast and lightweight library for Approximate Nearest Neighbours with a particular focus on efficient memory use and the ability to load a pre-saved index. [Annoy](https://github.com/spotify/annoy) is written by [Erik Bernhardsson](https://erikbern.com/). See its page for more on features, its (Python) API, and the other language ports. [Annoy](https://github.com/spotify/annoy) is part of the esteemed _let us find other music you may like_ algorithm by [Spotify](https://github.com/spotify/). ### Why this package? It provides a nice example for Rcpp Modules and use of templates: Annoy uses a clean C++ core with templated data type, as well as several distance measures. This package shows that it is easy to wrap both aspects from R giving us multi-lingual approaches to data discovery and machine learning. ### Status The package matches the behaviour of the original Python package in the original Python wrapper for the [Annoy](https://github.com/spotify/annoy) library. It also replicates all unit tests written for the Python frontend, including a test for efficiently `mmap`-ing a binary index file. The package originally built on Linux and OS X, and thanks to a patch by [Qiang Kou](https://github.com/thirdwing) now also builds on Windows. ### Installation You can either install from source via this repo, or install [the CRAN package](https://cran.r-project.org/package=RcppAnnoy) the usual way from [R](https://www.r-project.org). ### Author Dirk Eddelbuettel ### License GPL (>= 2) RcppAnnoy/build/0000755000176200001440000000000015131164023013256 5ustar liggesusersRcppAnnoy/build/vignette.rds0000644000176200001440000000037715131164023015624 0ustar liggesusersm 0Eӗ/n\IBA*"6ڨMBRQw~:M@[K\B/!>i[.C!m"bbjAn aE H$9OXsqzb)&R/JA82J*啟hƢ v?lY7D)]_#KU71Q_4֖vn' H Ҍ; 9 $;iRcppAnnoy/man/0000755000176200001440000000000013763265274012755 5ustar liggesusersRcppAnnoy/man/RcppAnnoy-package.Rd0000644000176200001440000000123214553527625016543 0ustar liggesusers\name{RcppAnnoy-package} \alias{RcppAnnoy-package} \alias{RcppAnnoy} \alias{Rcpp_Annoy} \docType{package} \title{ Rcpp bindings for the Annoy C++ library for approximate nearest neighbors. } \description{ Annoy is a small library written to provide fast and memory-efficient nearest neigbor lookup from a possibly static index which can be shared across processes. } \details{ Details about Annoy are available at the reference listed below. } \author{ Dirk Eddelbuettel for the R interface; Erik Bernhardsson for Annoy itself. Maintainer: Dirk Eddelbuettel } \references{ \url{https://github.com/spotify/annoy} } \keyword{package} RcppAnnoy/man/getArchictectureStatus.Rd0000644000176200001440000000077513627327743017745 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/RcppExports.R \name{getArchictectureStatus} \alias{getArchictectureStatus} \title{Report CPU Architecture and Compiler} \usage{ getArchictectureStatus() } \value{ A constant direct created at compile-time describing the extent of AVX instructions (512 bit, 128 bit, or none) and compiler use where currently recognised are MSC (unlikely for R), GCC, Clang, or \sQuote{other}. } \description{ Report CPU Architecture and Compiler } RcppAnnoy/man/getAnnoyVersion.Rd0000644000176200001440000000113213763265274016373 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/version.R \name{getAnnoyVersion} \alias{getAnnoyVersion} \title{Get the Annoy library version} \usage{ getAnnoyVersion(compact = FALSE) } \arguments{ \item{compact}{Logical scalar indicating whether a compact \code{\link{package_version}} should be returned.} } \value{ An integer vector containing the major, minor and patch version numbers; or if \code{compact=TRUE}, a \code{\link{package_version}} object. } \description{ Get the version of the Annoy C++ library that RcppAnnoy was compiled with. } \author{ Aaron Lun } RcppAnnoy/man/AnnoyIndex.Rd0000644000176200001440000001234614725061714015316 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/annoy.R \name{AnnoyIndex} \alias{AnnoyIndex} \alias{AnnoyEuclidean} \alias{Rcpp_AnnoyEuclidean-class} \alias{Rcpp_AnnoyEuclidean} \alias{AnnoyAngular} \alias{Rcpp_AnnoyAngular-class} \alias{Rcpp_AnnoyAngular} \alias{AnnoyManhattan} \alias{Rcpp_AnnoyManhattan-class} \alias{Rcpp_AnnoyManhattan} \alias{AnnoyHamming} \alias{Rcpp_AnnoyHamming-class} \alias{Rcpp_AnnoyHamming} \alias{AnnoyDotProduct} \alias{Rcpp_AnnoyDotProduct-class} \alias{Rcpp_AnnoyDotProduct} \title{Approximate Nearest Neighbors with Annoy} \description{ Annoy is a small library written to provide fast and memory-efficient nearest neighbor lookup from a possibly static index which can be shared across processes. } \section{Usage}{ \preformatted{ a <- new(AnnoyEuclidean, vectorsz) a$setSeed(0) a$setVerbose(0) a$addItem(i, dv) a$getNItems() a$getItemsVector(i) a$getDistance(i, j) a$build(n_trees) a$getNNsByItem(i, n) a$getNNsByItemList(i, n, search_k, include_distances) a$getNNsByVector(v, n) a$getNNsByVectorList(v, n, search_k, include_distances) a$save(fn) a$load(fn) a$unload() } } \section{Details}{ \code{new(Class, vectorsz)} Create a new Annoy instance of type \code{Class} where \code{Class} is on of the following: \code{AnnoyEuclidean}, \code{AnnoyAngular}, \code{AnnoyManhattan}, \code{AnnoyHamming}. \code{vectorsz} denotes the length of the vectors that the Annoy instance will be indexing. \code{$addItem(i, v)} Adds item \code{i} (any nonnegative integer) with vector \code{v}. Note that it will allocate memory for \code{max(i) + 1} items. \code{$build(n_trees)} Builds a forest of \code{n_trees} trees. More trees gives higher precision when querying. After calling \code{build}, no more items can be added. \code{$save(fn)} Saves the index to disk as filename \code{fn}. After saving, no more items can be added. \code{$load(fn)} Loads (mmaps) an index from filename \code{fn} on disk. \code{$unload()} Unloads index. \code{$getDistance(i, j)} Returns the distance between items \code{i} and \code{j} \code{$getNNsByItem(i, n)} Returns the \code{n} closest items as an integer vector of indices. \code{$getNNsByVector(v, n)} Same as \code{$getNNsByItem}, but queries by vector \code{v} rather than index \code{i}. \code{$getNNsByItemList(i, n, search_k = -1, include_distances = FALSE)} Returns the n closest items to item \code{i} as a list. During the query it will inspect up to \code{search_k} nodes which defaults to \code{n_trees * n} if not provided. \code{search_k} gives you a run-time tradeoff between better accuracy and speed. If you set \code{include_distances} to \code{TRUE}, it will return a length 2 list with elements \code{"item"} & \code{"distance"}. The \code{"item"} element contains the \code{n} closest items as an integer vector of indices. The optional \code{"distance"} element contains the corresponding distances to \code{"item"} as a numeric vector. \code{$getNNsByVectorList(i, n, search_k = -1, include_distances = FALSE)} Same as \code{$getNNsByItemList}, but queries by vector \code{v} rather than index \code{i} \code{$getItemsVector(i)} Returns the vector for item \code{i} that was previously added. \code{$getNItems()} Returns the number of items in the index. \code{$setVerbose()} If \code{1} then messages will be printed during processing. If \code{0} then messages will be suppressed during processing. \code{$setSeed()} Set random seed for annoy (integer). } \examples{ library(RcppAnnoy) # BUILDING ANNOY INDEX --------------------------------------------------------- vector_size <- 10 a <- new(AnnoyEuclidean, vector_size) a$setSeed(42) # Turn on verbose status messages (0 to turn off) a$setVerbose(1) # Load 100 random vectors into index for (i in 1:100) a$addItem(i - 1, runif(vector_size)) # Annoy uses zero indexing # Display number of items in index a$getNItems() # Retrieve item at postition 0 in index a$getItemsVector(0) # Calculate distance between items at postitions 0 & 1 in index a$getDistance(0, 1) # Build forest with 50 trees a$build(50) # PERFORMING ANNOY SEARCH ------------------------------------------------------ # Retrieve 5 nearest neighbors to item 0 # Returned as integer vector of indices a$getNNsByItem(0, 5) # Retrieve 5 nearest neighbors to item 0 # search_k = -1 will invoke default search_k value of n_trees * n # Return results as list with an element for distance a$getNNsByItemList(0, 5, -1, TRUE) # Retrieve 5 nearest neighbors to item 0 # search_k = -1 will invoke default search_k value of n_trees * n # Return results as list without an element for distance a$getNNsByItemList(0, 5, -1, FALSE) v <- runif(vector_size) # Retrieve 5 nearest neighbors to vector v # Returned as integer vector of indices a$getNNsByVector(v, 5) # Retrieve 5 nearest neighbors to vector v # search_k = -1 will invoke default search_k value of n_trees * n # Return results as list with an element for distance a$getNNsByVectorList(v, 5, -1, TRUE) # SAVING/LOADING ANNOY INDEX --------------------------------------------------- # Create a tempfile, replace with a local file to keep treefile <- tempfile(pattern="annoy", fileext=".tree") # Save annoy tree to disk a$save(treefile) # Load annoy tree from disk a$load(treefile) # Unload index from memory a$unload() } RcppAnnoy/DESCRIPTION0000644000176200001440000000306515131171252013673 0ustar liggesusersPackage: RcppAnnoy Type: Package Title: 'Rcpp' Bindings for 'Annoy', a Library for Approximate Nearest Neighbors Version: 0.0.23 Date: 2026-01-12 Authors@R: c(person("Dirk", "Eddelbuettel", role = c("aut", "cre"), email = "edd@debian.org", comment = c(ORCID = "0000-0001-6419-907X")), person("Erik", "Bernhardsson", role = c("aut"), comment = "Principal author of Annoy")) Description: 'Annoy' is a small C++ library for Approximate Nearest Neighbors written for efficient memory usage as well an ability to load from / save to disk. This package provides an R interface by relying on the 'Rcpp' package, exposing the same interface as the original Python wrapper to 'Annoy'. See for more on 'Annoy'. 'Annoy' is released under Version 2.0 of the Apache License. Also included is a small Windows port of 'mmap' which is released under the MIT license. License: GPL (>= 2) Depends: R (>= 3.1) Imports: methods, Rcpp LinkingTo: Rcpp Suggests: tinytest URL: https://github.com/eddelbuettel/rcppannoy, https://dirk.eddelbuettel.com/code/rcpp.annoy.html BugReports: https://github.com/eddelbuettel/rcppannoy/issues NeedsCompilation: yes RoxygenNote: 7.3.2 Encoding: UTF-8 VignetteBuilder: Rcpp Packaged: 2026-01-12 12:24:51 UTC; edd Author: Dirk Eddelbuettel [aut, cre] (ORCID: ), Erik Bernhardsson [aut] (Principal author of Annoy) Maintainer: Dirk Eddelbuettel Repository: CRAN Date/Publication: 2026-01-12 13:10:02 UTC