Rtsne/0000755000176200001440000000000013371603635011354 5ustar liggesusersRtsne/inst/0000755000176200001440000000000013034657072012331 5ustar liggesusersRtsne/inst/CITATION0000644000176200001440000000263012662606547013476 0ustar liggesuserscitHeader("t-SNE is described in (Van der Maaten & Hinton 2008), while the Barnes-Hut t-SNE implementation is described in (Van der Maaten 2014). To cite the Rtsne package specifically, use (Krijthe 2015).") citEntry( entry="Article", title="Visualizing High-Dimensional Data Using t-SNE", volume= "9", pages="2579-2605", year="2008", author="L.J.P. van der Maaten and G.E. Hinton", journal="Journal of Machine Learning Research", textVersion="L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. Journal of Machine Learning Research 9(Nov):2579-2605, 2008.") citEntry( entry="Article", title="Accelerating t-SNE using Tree-Based Algorithms", volume= "15", pages="3221-3245", year="2014", author="L.J.P. van der Maaten", journal="Journal of Machine Learning Research", textVersion="L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms. Journal of Machine Learning Research 15(Oct):3221-3245, 2014." ) citEntry( entry="Manual", title = "{Rtsne}: T-Distributed Stochastic Neighbor Embedding using Barnes-Hut Implementation", author = "Jesse H. Krijthe", year = "2015", note = sprintf("R package version %s", meta$Version), url = "https://github.com/jkrijthe/Rtsne", textVersion="Jesse H. Krijthe (2015). Rtsne: T-Distributed Stochastic Neighbor Embedding using a Barnes-Hut Implementation, URL: https://github.com/jkrijthe/Rtsne" ) Rtsne/tests/0000755000176200001440000000000013366634402012516 5ustar liggesusersRtsne/tests/testthat.R0000644000176200001440000000004512511021433014461 0ustar liggesuserslibrary(testthat) test_check("Rtsne")Rtsne/tests/testthat/0000755000176200001440000000000013371603635014356 5ustar liggesusersRtsne/tests/testthat/test_neighbors.R0000644000176200001440000000674713366030705017532 0ustar liggesuserscontext("Rtsne neighbor input") set.seed(101) # The original iris_matrix has a few tied distances, which alters the order of nearest neighbours. # This then alters the order of addition when computing various statistics; # which results in small rounding errors that are amplified across t-SNE iterations. # Hence, I have to simulate a case where no ties are possible. simdata <- matrix(rnorm(234*5), nrow=234) NNFUN <- function(D, K) # A quick reference function for computing NNs, to avoid depending on other packages. { all.indices <- matrix(0L, nrow(D), K) all.distances <- matrix(0, nrow(D), K) for (i in seq_len(nrow(D))) { current <- D[i,] by.dist <- setdiff(order(current), i) all.indices[i,] <- head(by.dist, ncol(all.indices)) all.distances[i,] <- current[all.indices[i,]] } list(index=all.indices, distance=all.distances) } test_that("Rtsne with nearest-neighbor input compares to distance matrix input", { D <- as.matrix(dist(simdata)) out <- NNFUN(D, 90) # 3 * perplexity all.indices <- out$index all.distances <- out$distance # The vptree involves a few R::runif calls, which alters the seed in the precomputed distance case. # This results in different random initialization of Y. # Thus, we need to supply a pre-computed Y as well. Y_in <- matrix(runif(nrow(simdata)*2), ncol=2) out <- Rtsne_neighbors(all.indices, all.distances, Y_init=Y_in, perplexity=30, max_iter=1) blah <- Rtsne(D, is_distance=TRUE, Y_init=Y_in, perplexity=30, max_iter=1) expect_equal(out$Y, blah$Y) # Trying again with a different number of neighbors. Y_in <- matrix(runif(nrow(simdata)*2), ncol=2) out <- Rtsne_neighbors(all.indices[,1:30], all.distances[,1:30], Y_init=Y_in, perplexity=10) blah <- Rtsne(D, is_distance=TRUE, Y_init=Y_in, perplexity=10) expect_equal(out$Y, blah$Y) }) test_that("Rtsne with nearest-neighbor input behaves upon normalization", { D <- as.matrix(dist(normalize_input(simdata))) out <- NNFUN(D, 90) # 3 * perplexity all.indices <- out$index all.distances <- out$distance # The vptree involves a few R::runif calls, which alters the seed in the precomputed distance case. # This results in different random initialization of Y. # Thus, we need to supply a pre-computed Y as well. Y_in <- matrix(runif(nrow(simdata)*2), ncol=2) out <- Rtsne_neighbors(all.indices, all.distances, Y_init=Y_in, perplexity=30, max_iter=1) blah <- Rtsne(D, is_distance=TRUE, Y_init=Y_in, perplexity=30, max_iter=1) expect_equal(out$Y, blah$Y) # Trying again with a different number of neighbors. Y_in <- matrix(runif(nrow(simdata)*2), ncol=2) out <- Rtsne_neighbors(all.indices[,1:30], all.distances[,1:30], Y_init=Y_in, perplexity=10) blah <- Rtsne(D, is_distance=TRUE, Y_init=Y_in, perplexity=10) expect_equal(out$Y, blah$Y) }) test_that("error conditions are correctly explored", { expect_error(Rtsne_neighbors("yay", matrix(0, 50, 10), Y_init=Y_in, perplexity=10), "matrix") expect_error(Rtsne_neighbors(matrix(0L, 50, 5), matrix(0, 50, 10), Y_init=Y_in, perplexity=10), "same dimensions") expect_error(Rtsne_neighbors(matrix(0L, 50, 10), matrix(0, 50, 10), Y_init=Y_in, perplexity=10), "invalid indices") expect_error(Rtsne_neighbors(matrix(51L, 50, 10), matrix(0, 50, 10), Y_init=Y_in, perplexity=10), "invalid indices") expect_error(Rtsne_neighbors(matrix(NA_real_, 50, 10), matrix(0, 50, 10), Y_init=Y_in, perplexity=10), "invalid indices") }) Rtsne/tests/testthat/test_Rtsne.R0000644000176200001440000001401513366625604016640 0ustar liggesuserscontext("Rtsne main function") # Prepare iris dataset iris_unique <- unique(iris) # Remove duplicates Xscale<-normalize_input(as.matrix(iris_unique[,1:4])) distmat <- as.matrix(dist(Xscale)) # Run models to compare to iter_equal <- 500 test_that("Scaling gives the expected result", { Xscale2 <- scale(as.matrix(iris_unique[,1:4]), center = TRUE, scale=FALSE) Xscale2 <- scale(Xscale, center=FALSE, scale=rep(max(abs(Xscale)),4)) expect_equivalent(Xscale,Xscale2) }) test_that("Manual distance calculation equals C++ distance calculation", { # Does not work on 32 bit windows skip_on_cran() # Exact set.seed(50) tsne_matrix <- Rtsne(as.matrix(iris_unique[,1:4]),verbose=FALSE, is_distance = FALSE,theta=0.0,max_iter=iter_equal, pca=FALSE, normalize=TRUE) set.seed(50) tsne_dist <- Rtsne(distmat, verbose=FALSE, is_distance = TRUE, theta = 0.0, max_iter=iter_equal) expect_equal(tsne_matrix$Y,tsne_dist$Y) # Inexact set.seed(50) tsne_matrix <- Rtsne(as.matrix(iris_unique[,1:4]),verbose=FALSE, is_distance = FALSE, theta=0.1,max_iter=iter_equal,pca=FALSE) set.seed(50) tsne_dist <- Rtsne(distmat, verbose=FALSE, is_distance = TRUE, theta = 0.1, max_iter=iter_equal, pca=FALSE) expect_equal(tsne_matrix$Y,tsne_dist$Y) }) test_that("Accepts dist", { # Exact set.seed(50) tsne_out_dist_matrix <- Rtsne(distmat, is_distance = TRUE, theta=0.0, max_iter=iter_equal) set.seed(50) tsne_out_dist <- Rtsne(dist(Xscale),theta=0.0,max_iter=iter_equal) expect_equal(tsne_out_dist$Y,tsne_out_dist_matrix$Y) # Inexact set.seed(50) tsne_out_dist_matrix <- Rtsne(distmat, is_distance = TRUE, theta=0.1, max_iter=iter_equal) set.seed(50) tsne_out_dist <- Rtsne(dist(Xscale), theta=0.1, max_iter=iter_equal) expect_equal(tsne_out_dist$Y,tsne_out_dist_matrix$Y) }) test_that("Accepts data.frame", { # Exact set.seed(50) tsne_out_matrix <- Rtsne(as.matrix(iris_unique[,1:4]),dims=1,verbose=FALSE, is_distance = FALSE,theta=0.0,max_iter=iter_equal,pca=FALSE) set.seed(50) tsne_out_df <- Rtsne(iris_unique[,1:4],dims=1,verbose=FALSE, is_distance = FALSE,theta=0.0,pca=FALSE,max_iter=iter_equal,num_threads=1) expect_equal(tsne_out_matrix$Y,tsne_out_df$Y) # Inexact set.seed(50) tsne_out_matrix_bh <- Rtsne(as.matrix(iris_unique[,1:4]),verbose=FALSE, is_distance = FALSE,theta=0.1,pca=FALSE,max_iter=iter_equal) set.seed(50) tsne_out_df <- Rtsne(iris_unique[,1:4],verbose=FALSE, is_distance = FALSE,theta=0.1,pca=FALSE,max_iter=iter_equal,num_threads=1) expect_equal(tsne_out_matrix_bh$Y,tsne_out_df$Y) }) test_that("OpenMP with different threads returns same result",{ # Does not work on windows skip_on_cran() set.seed(50) tsne_out_df1 <- Rtsne(iris_unique[,1:4],dims=3,verbose=FALSE, is_distance = FALSE, theta=0.1,pca=FALSE,max_iter=iter_equal,num_threads=1) set.seed(50) tsne_out_df2 <- Rtsne(iris_unique[,1:4],dims=3,verbose=FALSE, is_distance = FALSE, theta=0.1,pca=FALSE,max_iter=iter_equal,num_threads=2) set.seed(50) tsne_out_df3 <- Rtsne(iris_unique[,1:4],dims=3,verbose=FALSE, is_distance = FALSE, theta=0.1,pca=FALSE,max_iter=iter_equal,num_threads=3) expect_equal(tsne_out_df1$Y,tsne_out_df2$Y) expect_equal(tsne_out_df2$Y,tsne_out_df3$Y) }) test_that("Continuing from initialization gives approximately the same result as direct run", { # Does not work exactly due to resetting of "gains". iter_equal <- 1000 extra_iter <- 200 #Exact set.seed(50) tsne_out_full <- Rtsne(iris_unique[,1:4], perplexity=3,theta=0.0,pca=FALSE, max_iter=iter_equal,final_momentum = 0) set.seed(50) tsne_out_part1 <- Rtsne(iris_unique[,1:4], perplexity=3,theta=0.0,pca=FALSE, max_iter=iter_equal-extra_iter,final_momentum = 0) tsne_out_part2 <- Rtsne(iris_unique[,1:4], perplexity=3,theta=0.0,pca=FALSE, max_iter=extra_iter,Y_init=tsne_out_part1$Y,final_momentum = 0) expect_equivalent(dist(tsne_out_full$Y),dist(tsne_out_part2$Y),tolerance=0.01) #Inexact set.seed(50) tsne_out_full <- Rtsne(iris_unique[,1:4],final_momentum=0,theta=0.1,pca=FALSE,max_iter=iter_equal) set.seed(50) tsne_out_part1 <- Rtsne(iris_unique[,1:4],final_momentum=0,theta=0.1,pca=FALSE,max_iter=iter_equal-extra_iter) set.seed(50) tsne_out_part2 <- Rtsne(iris_unique[,1:4],final_momentum=0,theta=0.1,pca=FALSE,max_iter=extra_iter,Y_init=tsne_out_part1$Y) expect_equivalent(dist(tsne_out_full$Y),dist(tsne_out_part2$Y),tolerance=0.01) }) test_that("partial_pca FALSE and TRUE give similar results", { # Only first few iterations iter_equal <- 5 set.seed(42) fat_data <- rbind(sapply(runif(200,-1,1), function(x) rnorm(200,x)), sapply(runif(200,-1,1), function(x) rnorm(200,x))) set.seed(42) tsne_out_prcomp <- Rtsne(fat_data, max_iter = iter_equal) set.seed(42) tsne_out_irlba <- Rtsne(fat_data, partial_pca = T, max_iter = iter_equal) # Sign of principal components are arbitrary so even with same seed tSNE coordinates are not the same expect_equal(tsne_out_prcomp$costs, tsne_out_irlba$costs, tolerance = .01, scale = 1) }) test_that("Error conditions", { expect_error(Rtsne("test", matrix(0, 50, 10), Y_init=Y_in, perplexity=10), "matrix") expect_error(Rtsne(distmat,is_distance = 3),"logical") expect_error(Rtsne(matrix(0,2,3),is_distance = TRUE),"Input") expect_error(Rtsne(matrix(0,100,3)),"duplicates") expect_error(Rtsne(matrix(0,2,3),pca_center = 2),"TRUE") expect_error(Rtsne(matrix(0,2,3),initial_dims=1.3),"dimensionality") expect_error(Rtsne(matrix(0,2,3),dims=4),"dims") expect_error(Rtsne(matrix(0,2,3),max_iter=1.5),"should") }) test_that("Verbose option", { expect_output(Rtsne(iris_unique[,1:4],pca=TRUE,verbose=TRUE,max_iter=150),"Fitting performed") }) Rtsne/src/0000755000176200001440000000000013366634402012143 5ustar liggesusersRtsne/src/Makevars0000644000176200001440000000212113366634402013633 0ustar liggesusers## Use the R_HOME indirection to support installations of multiple R version ## PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) ## As an alternative, one can also add this code in a file 'configure' ## ## PKG_LIBS=`${R_HOME}/bin/Rscript -e "Rcpp:::LdFlags()"` ## ## sed -e "s|@PKG_LIBS@|${PKG_LIBS}|" \ ## src/Makevars.in > src/Makevars ## ## which together with the following file 'src/Makevars.in' ## ## PKG_LIBS = @PKG_LIBS@ ## ## can be used to create src/Makevars dynamically. This scheme is more ## powerful and can be expanded to also check for and link with other ## libraries. It should be complemented by a file 'cleanup' ## ## rm src/Makevars ## ## which removes the autogenerated file src/Makevars. ## ## Of course, autoconf can also be used to write configure files. This is ## done by a number of packages, but recommended only for more advanced users ## comfortable with autoconf and its related tools. Rtsne/src/tsne.h0000644000176200001440000001044113366634402013265 0ustar liggesusers/* * * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Delft University of Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * */ #include "datapoint.h" #include #ifndef TSNE_H #define TSNE_H static inline double sign_tsne(double x) { return (x == .0 ? .0 : (x < .0 ? -1.0 : 1.0)); } template class TSNE { public: TSNE(double perplexity, double theta, bool verbose, int max_iter, bool init, int stop_lying_iter, int mom_switch_iter, double momentum, double final_momentum, double eta, double exaggeration_factor,int num_threads); void run(double* X, unsigned int N, int D, double* Y, bool distance_precomputed, double* cost, double* itercost); void run(const int* nn_index, const double* nn_dist, unsigned int N, int K, double* Y, double* cost, double* itercost); private: void symmetrizeMatrix(unsigned int N); void trainIterations(unsigned int N, double* Y, double* cost, double* itercost); void computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, unsigned int N, int D, double* dC, double theta); void computeExactGradient(double* P, double* Y, unsigned int N, int D, double* dC); double evaluateError(double* P, double* Y, unsigned int N, int D); double evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, unsigned int N, int D, double theta); void getCost(double* P, double* Y, unsigned int N, int D, double* costs); void getCost(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, unsigned int N, int D, double theta, double* costs); void zeroMean(double* X, unsigned int N, int D); void computeGaussianPerplexity(double* X, unsigned int N, int D, bool distance_precomputed); template void computeGaussianPerplexity(double* X, unsigned int N, int D, int K); void computeGaussianPerplexity(const int* nn_dex, const double* nn_dist, unsigned int N, int K); void setupApproximateMemory(unsigned int N, int K); void computeProbabilities(const double perplexity, const int K, const double* distances, double* cur_P); void computeSquaredEuclideanDistance(double* X, unsigned int N, int D, double* DD); void computeSquaredEuclideanDistanceDirect(double* X, unsigned int N, int D, double* DD); double randn(); // Member variables. double perplexity, theta, momentum, final_momentum, eta, exaggeration_factor; int max_iter, stop_lying_iter, mom_switch_iter, num_threads; bool verbose, init, exact; std::vector row_P, col_P; std::vector val_P, P; }; #endif Rtsne/src/sptree.cpp0000644000176200001440000003173413366634402014161 0ustar liggesusers/* * * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Delft University of Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * */ #include #include #include #include #include #include #include #include "sptree.h" // Constructs cell template Cell::Cell() { } template Cell::Cell(double* inp_corner, double* inp_width) { for(int d = 0; d < NDims; d++) setCorner(d, inp_corner[d]); for(int d = 0; d < NDims; d++) setWidth( d, inp_width[d]); } // Destructs cell template Cell::~Cell() { } template double Cell::getCorner(unsigned int d) const { return corner[d]; } template double Cell::getWidth(unsigned int d) const { return width[d]; } template void Cell::setCorner(unsigned int d, double val) { corner[d] = val; } template void Cell::setWidth(unsigned int d, double val) { width[d] = val; } // Checks whether a point lies in a cell template bool Cell::containsPoint(double point[]) const { for(int d = 0; d < NDims; d++) { if(corner[d] - width[d] > point[d]) return false; if(corner[d] + width[d] < point[d]) return false; } return true; } // Default constructor for SPTree -- build tree, too! template SPTree::SPTree(double* inp_data, unsigned int N) { // Compute mean, width, and height of current map (boundaries of SPTree) int nD = 0; double* mean_Y = (double*) calloc(NDims, sizeof(double)); double* min_Y = (double*) malloc(NDims * sizeof(double)); double* max_Y = (double*) malloc(NDims * sizeof(double)); for(unsigned int d = 0; d < NDims; d++) { min_Y[d] = DBL_MAX; max_Y[d] = -DBL_MAX; } for(unsigned int n = 0; n < N; n++) { for(unsigned int d = 0; d < NDims; d++) { mean_Y[d] += inp_data[n * NDims + d]; if(inp_data[nD + d] < min_Y[d]) min_Y[d] = inp_data[nD + d]; if(inp_data[nD + d] > max_Y[d]) max_Y[d] = inp_data[nD + d]; } nD += NDims; } for(int d = 0; d < NDims; d++) mean_Y[d] /= (double) N; // Construct SPTree double* width = (double*) malloc(NDims * sizeof(double)); for(int d = 0; d < NDims; d++) width[d] = max_tsne(max_Y[d] - mean_Y[d], mean_Y[d] - min_Y[d]) + 1e-5; init(NULL, inp_data, mean_Y, width); fill(N); // Clean up memory free(mean_Y); free(max_Y); free(min_Y); free(width); } // Constructor for SPTree with particular size and parent -- build the tree, too! template SPTree::SPTree(double* inp_data, unsigned int N, double* inp_corner, double* inp_width) { init(NULL, inp_data, inp_corner, inp_width); fill(N); } // Constructor for SPTree with particular size (do not fill the tree) template SPTree::SPTree(double* inp_data, double* inp_corner, double* inp_width) { init(NULL, inp_data, inp_corner, inp_width); } // Constructor for SPTree with particular size and parent (do not fill tree) template SPTree::SPTree(SPTree* inp_parent, double* inp_data, double* inp_corner, double* inp_width) { init(inp_parent, inp_data, inp_corner, inp_width); } // Constructor for SPTree with particular size and parent -- build the tree, too! template SPTree::SPTree(SPTree* inp_parent, double* inp_data, unsigned int N, double* inp_corner, double* inp_width) { init(inp_parent, inp_data, inp_corner, inp_width); fill(N); } // Main initialization function template void SPTree::init(SPTree* inp_parent, double* inp_data, double* inp_corner, double* inp_width) { parent = inp_parent; data = inp_data; is_leaf = true; size = 0; cum_size = 0; for(unsigned int d = 0; d < NDims; d++) boundary.setCorner(d, inp_corner[d]); for(unsigned int d = 0; d < NDims; d++) boundary.setWidth( d, inp_width[d]); for(unsigned int i = 0; i < no_children; i++) children[i] = NULL; for(unsigned int d = 0; d < NDims; d++) center_of_mass[d] = .0; } // Destructor for SPTree template SPTree::~SPTree() { for(unsigned int i = 0; i < no_children; i++) { if(children[i] != NULL) delete children[i]; } } // Update the data underlying this tree template void SPTree::setData(double* inp_data) { data = inp_data; } // Get the parent of the current tree template SPTree* SPTree::getParent() { return parent; } // Insert a point into the SPTree template bool SPTree::insert(unsigned int new_index) { // Ignore objects which do not belong in this quad tree double* point = data + new_index * NDims; if(!boundary.containsPoint(point)) return false; // Online update of cumulative size and center-of-mass cum_size++; double mult1 = (double) (cum_size - 1) / (double) cum_size; double mult2 = 1.0 / (double) cum_size; for(unsigned int d = 0; d < NDims; d++) { center_of_mass[d] = center_of_mass[d] * mult1 + mult2 * point[d]; } // If there is space in this quad tree and it is a leaf, add the object here if(is_leaf && size < QT_NODE_CAPACITY) { index[size] = new_index; size++; return true; } // Don't add duplicates for now (this is not very nice) bool any_duplicate = false; for(unsigned int n = 0; n < size; n++) { bool duplicate = true; for(unsigned int d = 0; d < NDims; d++) { if(point[d] != data[index[n] * NDims + d]) { duplicate = false; break; } } any_duplicate = any_duplicate | duplicate; } if(any_duplicate) return true; // Otherwise, we need to subdivide the current cell if(is_leaf) subdivide(); // Find out where the point can be inserted for(unsigned int i = 0; i < no_children; i++) { if(children[i]->insert(new_index)) return true; } // Otherwise, the point cannot be inserted (this should never happen) return false; } // Create four children which fully divide this cell into four quads of equal area template void SPTree::subdivide() { // Create new children double new_corner[NDims]; double new_width[NDims]; for(unsigned int i = 0; i < no_children; i++) { unsigned int div = 1; for(unsigned int d = 0; d < NDims; d++) { new_width[d] = .5 * boundary.getWidth(d); if((i / div) % 2 == 1) new_corner[d] = boundary.getCorner(d) - .5 * boundary.getWidth(d); else new_corner[d] = boundary.getCorner(d) + .5 * boundary.getWidth(d); div *= 2; } children[i] = new SPTree(this, data, new_corner, new_width); } // Move existing points to correct children for(unsigned int i = 0; i < size; i++) { bool success = false; for(unsigned int j = 0; j < no_children; j++) { if(!success) success = children[j]->insert(index[i]); } index[i] = -1; } // Empty parent node size = 0; is_leaf = false; } // Build SPTree on dataset template void SPTree::fill(unsigned int N) { for(unsigned int i = 0; i < N; i++) insert(i); } // Checks whether the specified tree is correct template bool SPTree::isCorrect() { for(unsigned int n = 0; n < size; n++) { double* point = data + index[n] * NDims; if(!boundary.containsPoint(point)) return false; } if(!is_leaf) { bool correct = true; for(int i = 0; i < no_children; i++) correct = correct && children[i]->isCorrect(); return correct; } else return true; } // Build a list of all indices in SPTree template void SPTree::getAllIndices(unsigned int* indices) { getAllIndices(indices, 0); } // Build a list of all indices in SPTree template unsigned int SPTree::getAllIndices(unsigned int* indices, unsigned int loc) { // Gather indices in current quadrant for(unsigned int i = 0; i < size; i++) indices[loc + i] = index[i]; loc += size; // Gather indices in children if(!is_leaf) { for(int i = 0; i < no_children; i++) loc = children[i]->getAllIndices(indices, loc); } return loc; } template unsigned int SPTree::getDepth() { if(is_leaf) return 1; int depth = 0; for(unsigned int i = 0; i < no_children; i++) depth = max_tsne(depth, children[i]->getDepth()); return 1 + depth; } // Compute non-edge forces using Barnes-Hut algorithm template double SPTree::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[]) const { double resultSum = 0; double buff[NDims]; // make buff local for parallelization // Make sure that we spend no time on empty nodes or self-interactions if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return resultSum; // Compute distance between point and center-of-mass double sqdist = .0; unsigned int ind = point_index * NDims; for(unsigned int d = 0; d < NDims; d++) { buff[d] = data[ind + d] - center_of_mass[d]; sqdist += buff[d] * buff[d]; } // Check whether we can use this node as a "summary" double max_width = 0.0; double cur_width; for(unsigned int d = 0; d < NDims; d++) { cur_width = boundary.getWidth(d); max_width = (max_width > cur_width) ? max_width : cur_width; } if(is_leaf || max_width / sqrt(sqdist) < theta) { // Compute and add t-SNE force between point and current node sqdist = 1.0 / (1.0 + sqdist); double mult = cum_size * sqdist; resultSum += mult; mult *= sqdist; for(unsigned int d = 0; d < NDims; d++) neg_f[d] += mult * buff[d]; } else { // Recursively apply Barnes-Hut to children for(unsigned int i = 0; i < no_children; i++){ resultSum += children[i]->computeNonEdgeForces(point_index, theta, neg_f); } } return resultSum; } // Computes edge forces template void SPTree::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, unsigned int N, double* pos_f, int num_threads) const { // Loop over all edges in the graph #pragma omp parallel for schedule(static) num_threads(num_threads) for(unsigned int n = 0; n < N; n++) { unsigned int ind1 = n * NDims; for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { double buff[NDims]; // make buff local for parallelization // Compute pairwise distance and Q-value double sqdist = 1.0; unsigned int ind2 = col_P[i] * NDims; for(unsigned int d = 0; d < NDims; d++) { buff[d] = data[ind1 + d] - data[ind2 + d]; sqdist += buff[d] * buff[d]; } sqdist = val_P[i] / sqdist; // Sum positive force for(unsigned int d = 0; d < NDims; d++) pos_f[ind1 + d] += sqdist * buff[d]; } } } // Print out tree template void SPTree::print() { if(cum_size == 0) { Rprintf("Empty node\n"); return; } if(is_leaf) { Rprintf("Leaf node; data = ["); for(unsigned int i = 0; i < size; i++) { double* point = data + index[i] * NDims; for(int d = 0; d < NDims; d++) Rprintf("%f, ", point[d]); Rprintf(" (index = %d)", index[i]); if(i < size - 1) Rprintf("\n"); else Rprintf("]\n"); } } else { Rprintf("Intersection node with center-of-mass = ["); for(int d = 0; d < NDims; d++) Rprintf("%f, ", center_of_mass[d]); Rprintf("]; children are:\n"); for(int i = 0; i < no_children; i++) children[i]->print(); } } // declare templates explicitly template class SPTree<1>; template class SPTree<2>; template class SPTree<3>; Rtsne/src/normalize_input.cpp0000644000176200001440000000206413366634402016070 0ustar liggesusers#include "Rcpp.h" #include // Function that performs the matrix normalization. // [[Rcpp::export]] Rcpp::NumericMatrix normalize_input_cpp(Rcpp::NumericMatrix input) { // Rows are observations, columns are variables. Rcpp::NumericMatrix output=Rcpp::clone(input); const int N=output.nrow(), D=output.ncol(); // Running through each column and centering it. Rcpp::NumericMatrix::iterator oIt=output.begin(); for (int d=0; d max_X) max_X = tmp; } for (oIt=output.begin(); oIt!=output.end(); ++oIt) { *oIt /= max_X; } return output; } Rtsne/src/sptree.h0000644000176200001440000001044513366634402013622 0ustar liggesusers/* * * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Delft University of Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * */ #ifndef SPTREE_H #define SPTREE_H using namespace std; static inline double max_tsne(double x, double y) { return (x <= y ? y : x); } template class Cell { double corner[NDims]; double width[NDims]; public: Cell(); Cell(double* inp_corner, double* inp_width); ~Cell(); double getCorner(unsigned int d) const; double getWidth(unsigned int d) const; void setCorner(unsigned int d, double val); void setWidth(unsigned int d, double val); bool containsPoint(double point[]) const; }; template class SPTree { public: enum { no_children = 2 * SPTree::no_children }; private: // Fixed constants static const unsigned int QT_NODE_CAPACITY = 1; // Properties of this node in the tree SPTree* parent; unsigned int dimension; bool is_leaf; unsigned int size; unsigned int cum_size; int num_threads; // Axis-aligned bounding box stored as a center with half-dimensions to represent the boundaries of this quad tree Cell boundary; // Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children double* data; double center_of_mass[NDims]; unsigned int index[QT_NODE_CAPACITY]; // Children SPTree* children[no_children]; public: SPTree(double* inp_data, unsigned int N); SPTree(double* inp_data, double* inp_corner, double* inp_width); SPTree(double* inp_data, unsigned int N, double* inp_corner, double* inp_width); SPTree(SPTree* inp_parent, double* inp_data, unsigned int N, double* inp_corner, double* inp_width); SPTree(SPTree* inp_parent, double* inp_data, double* inp_corner, double* inp_width); ~SPTree(); void setData(double* inp_data); SPTree* getParent(); void construct(Cell boundary); bool insert(unsigned int new_index); void subdivide(); bool isCorrect(); void rebuildTree(); void getAllIndices(unsigned int* indices); unsigned int getDepth(); double computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[]) const; void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, unsigned int N, double* pos_f, int num_threads) const; void print(); private: void init(SPTree* inp_parent, double* inp_data, double* inp_corner, double* inp_width); void fill(unsigned int N); unsigned int getAllIndices(unsigned int* indices, unsigned int loc); bool isChild(unsigned int test_index, unsigned int start, unsigned int end); }; template <> struct SPTree<0> { enum { no_children = 1 }; }; #endif Rtsne/src/tsne.cpp0000644000176200001440000007227313366634402013633 0ustar liggesusers/* * * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Delft University of Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * */ #include #include #include #include #include #include #include #include "sptree.h" #include "vptree.h" #include "tsne.h" #ifdef _OPENMP #include #endif extern "C" { #include } using namespace std; template TSNE::TSNE(double Perplexity, double Theta, bool Verbose, int Max_iter, bool Init, int Stop_lying_iter, int Mom_switch_iter, double Momentum, double Final_momentum, double Eta, double Exaggeration_factor, int Num_threads) : perplexity(Perplexity), theta(Theta), momentum(Momentum), final_momentum(Final_momentum), eta(Eta), exaggeration_factor(Exaggeration_factor), max_iter(Max_iter), stop_lying_iter(Stop_lying_iter), mom_switch_iter(Mom_switch_iter), num_threads(Num_threads), verbose(Verbose), init(Init), exact(theta==.0) { #ifdef _OPENMP int threads = num_threads; if (num_threads==0) { threads = omp_get_max_threads(); } // Print notice whether OpenMP is used if (verbose) Rprintf("OpenMP is working. %d threads.\n", threads); #endif return; } // Perform t-SNE template void TSNE::run(double* X, unsigned int N, int D, double* Y, bool distance_precomputed, double* cost, double* itercost) { if(N - 1 < 3 * perplexity) { Rcpp::stop("Perplexity too large for the number of data points!\n"); } if (verbose) Rprintf("Using no_dims = %d, perplexity = %f, and theta = %f\n", NDims, perplexity, theta); if (verbose) Rprintf("Computing input similarities...\n"); clock_t start = clock(); // Compute input similarities for exact t-SNE if(exact) { // Compute similarities computeGaussianPerplexity(X, N, D, distance_precomputed); // Symmetrize input similarities if (verbose) Rprintf("Symmetrizing...\n"); for(unsigned long n = 0; n < N; n++) { for(unsigned long m = n + 1; m < N; m++) { P[n * N + m] += P[m * N + n]; P[m * N + n] = P[n * N + m]; } } double sum_P = 0; for(size_t i = 0; i < P.size(); i++) sum_P += P[i]; for(size_t i = 0; i < P.size(); i++) P[i] /= sum_P; } // Compute input similarities for approximate t-SNE else { int K=3*perplexity; // Compute asymmetric pairwise input similarities if (distance_precomputed) { computeGaussianPerplexity(X, N, D, K); } else { computeGaussianPerplexity(X, N, D, K); } // Symmetrize input similarities symmetrizeMatrix(N); double sum_P = .0; for(unsigned int i = 0; i < row_P[N]; i++) sum_P += val_P[i]; for(unsigned int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P; } if (verbose) { clock_t end = clock(); if(exact) Rprintf("Done in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC); else Rprintf("Done in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N)); } trainIterations(N, Y, cost, itercost); return; } // Perform t-SNE with nearest neighbor results. template void TSNE::run(const int* nn_index, const double* nn_dist, unsigned int N, int K, double* Y, double* cost, double* itercost) { if(N - 1 < 3 * perplexity) { Rcpp::stop("Perplexity too large for the number of data points!\n"); } if (verbose) Rprintf("Using no_dims = %d, perplexity = %f, and theta = %f\n", NDims, perplexity, theta); if (verbose) Rprintf("Computing input similarities...\n"); clock_t start = clock(); // Compute asymmetric pairwise input similarities computeGaussianPerplexity(nn_index, nn_dist, N, K); // Symmetrize input similarities symmetrizeMatrix(N); double sum_P = .0; for(unsigned int i = 0; i < row_P[N]; i++) sum_P += val_P[i]; for(unsigned int i = 0; i < row_P[N]; i++) val_P[i] /= sum_P; if (verbose) { clock_t end = clock(); if(exact) Rprintf("Done in %4.2f seconds!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC); else Rprintf("Done in %4.2f seconds (sparsity = %f)!\nLearning embedding...\n", (float) (end - start) / CLOCKS_PER_SEC, (double) row_P[N] / ((double) N * (double) N)); } trainIterations(N, Y, cost, itercost); return; } // Perform main training loop template void TSNE::trainIterations(unsigned int N, double* Y, double* cost, double* itercost) { // Allocate some memory double* dY = (double*) malloc(N * NDims * sizeof(double)); double* uY = (double*) malloc(N * NDims * sizeof(double)); double* gains = (double*) malloc(N * NDims * sizeof(double)); if(dY == NULL || uY == NULL || gains == NULL) { Rcpp::stop("Memory allocation failed!\n"); } for(unsigned int i = 0; i < N * NDims; i++) uY[i] = .0; for(unsigned int i = 0; i < N * NDims; i++) gains[i] = 1.0; // Lie about the P-values if(exact) { for(unsigned long i = 0; i < (unsigned long)N * N; i++) P[i] *= exaggeration_factor; } else { for(unsigned long i = 0; i < row_P[N]; i++) val_P[i] *= exaggeration_factor; } // Initialize solution (randomly), if not already done if (!init) { for(unsigned int i = 0; i < N * NDims; i++) Y[i] = randn() * .0001; } clock_t start = clock(), end; float total_time=0; int costi = 0; //iterator for saving the total costs for the iterations for(int iter = 0; iter < max_iter; iter++) { // Stop lying about the P-values after a while, and switch momentum if(iter == stop_lying_iter) { if(exact) { for(unsigned long i = 0; i < (unsigned long)N * N; i++) P[i] /= exaggeration_factor; } else { for(unsigned int i = 0; i < row_P[N]; i++) val_P[i] /= exaggeration_factor; } } if(iter == mom_switch_iter) momentum = final_momentum; // Compute (approximate) gradient if(exact) computeExactGradient(P.data(), Y, N, NDims, dY); else computeGradient(P.data(), row_P.data(), col_P.data(), val_P.data(), Y, N, NDims, dY, theta); // Update gains for(unsigned int i = 0; i < N * NDims; i++) gains[i] = (sign_tsne(dY[i]) != sign_tsne(uY[i])) ? (gains[i] + .2) : (gains[i] * .8); for(unsigned int i = 0; i < N * NDims; i++) if(gains[i] < .01) gains[i] = .01; // Perform gradient update (with momentum and gains) for(unsigned int i = 0; i < N * NDims; i++) uY[i] = momentum * uY[i] - eta * gains[i] * dY[i]; for(unsigned int i = 0; i < N * NDims; i++) Y[i] = Y[i] + uY[i]; // Make solution zero-mean zeroMean(Y, N, NDims); // Print out progress if((iter > 0 && (iter+1) % 50 == 0) || iter == max_iter - 1) { end = clock(); double C = .0; if(exact) C = evaluateError(P.data(), Y, N, NDims); else C = evaluateError(row_P.data(), col_P.data(), val_P.data(), Y, N, NDims, theta); // doing approximate computation here! if(iter == 0) { if (verbose) Rprintf("Iteration %d: error is %f\n", iter + 1, C); } else { total_time += (float) (end - start) / CLOCKS_PER_SEC; if (verbose) Rprintf("Iteration %d: error is %f (50 iterations in %4.2f seconds)\n", iter+1, C, (float) (end - start) / CLOCKS_PER_SEC); } itercost[costi] = C; itercost++; start = clock(); } } end = clock(); total_time += (float) (end - start) / CLOCKS_PER_SEC; if(exact) getCost(P.data(), Y, N, NDims, cost); else getCost(row_P.data(), col_P.data(), val_P.data(), Y, N, NDims, theta, cost); // doing approximate computation here! // Clean up memory free(dY); free(uY); free(gains); if (verbose) Rprintf("Fitting performed in %4.2f seconds.\n", total_time); return; } // Compute gradient of the t-SNE cost function (using Barnes-Hut algorithm) template void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp_col_P, double* inp_val_P, double* Y, unsigned int N, int D, double* dC, double theta) { // Construct space-partitioning tree on current map SPTree* tree = new SPTree(Y, N); // Compute all terms required for t-SNE gradient double* pos_f = (double*) calloc(N * D, sizeof(double)); double* neg_f = (double*) calloc(N * D, sizeof(double)); if(pos_f == NULL || neg_f == NULL) { Rcpp::stop("Memory allocation failed!\n"); } tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f, num_threads); // Storing the output to sum in single-threaded mode; avoid randomness in rounding errors. std::vector output(N); #pragma omp parallel for schedule(guided) num_threads(num_threads) for (unsigned int n = 0; n < N; n++) { output[n]=tree->computeNonEdgeForces(n, theta, neg_f + n * D); } double sum_Q = .0; for (unsigned int n=0; n void TSNE::computeExactGradient(double* P, double* Y, unsigned int N, int D, double* dC) { // Make sure the current gradient contains zeros for(unsigned int i = 0; i < N * D; i++) dC[i] = 0.0; // Compute the squared Euclidean distance matrix double* DD = (double*) malloc((unsigned long)N * N * sizeof(double)); if(DD == NULL) { Rcpp::stop("Memory allocation failed!\n"); } computeSquaredEuclideanDistance(Y, N, D, DD); // Compute Q-matrix and normalization sum double* Q = (double*) malloc((unsigned long)N * N * sizeof(double)); if(Q == NULL) { Rcpp::stop("Memory allocation failed!\n"); } double sum_Q = .0; for(unsigned long n = 0; n < N; n++) { for(unsigned long m = 0; m < N; m++) { if(n != m) { Q[n * N + m] = 1 / (1 + DD[n * N + m]); sum_Q += Q[n * N + m]; } } } // Perform the computation of the gradient for(unsigned long n = 0; n < N; n++) { for(unsigned long m = 0; m < N; m++) { if(n != m) { double mult = (P[n * N + m] - (Q[n * N + m] / sum_Q)) * Q[n * N + m]; for(int d = 0; d < D; d++) { dC[n * D + d] += (Y[n * D + d] - Y[m * D + d]) * mult; } } } } // Free memory free(DD); DD = NULL; free(Q); Q = NULL; } // Evaluate t-SNE cost function (exactly) template double TSNE::evaluateError(double* P, double* Y, unsigned int N, int D) { // Compute the squared Euclidean distance matrix double* DD = (double*) malloc((unsigned long)N * N * sizeof(double)); double* Q = (double*) malloc((unsigned long)N * N * sizeof(double)); if(DD == NULL || Q == NULL) { Rcpp::stop("Memory allocation failed!\n"); } computeSquaredEuclideanDistance(Y, N, D, DD); // Compute Q-matrix and normalization sum double sum_Q = DBL_MIN; for(unsigned long n = 0; n < N; n++) { for(unsigned long m = 0; m < N; m++) { if(n != m) { Q[n * N + m] = 1 / (1 + DD[n * N + m]); sum_Q += Q[n * N + m]; } else Q[n * N + m] = DBL_MIN; } } for(unsigned long i = 0; i < (unsigned long)N * N; i++) Q[i] /= sum_Q; // Sum t-SNE error double C = .0; for(unsigned long n = 0; n < N; n++) { for(unsigned long m = 0; m < N; m++) { C += P[n * N + m] * log((P[n * N + m] + 1e-9) / (Q[n * N + m] + 1e-9)); } } // Clean up memory free(DD); free(Q); return C; } // Evaluate t-SNE cost function (approximately) template double TSNE::evaluateError(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, unsigned int N, int D, double theta) { // Get estimate of normalization term SPTree* tree = new SPTree(Y, N); double* buff = (double*) calloc(D, sizeof(double)); double sum_Q = .0; for(unsigned int n = 0; n < N; n++) sum_Q += tree->computeNonEdgeForces(n, theta, buff); // Loop over all edges to compute t-SNE error int ind1, ind2; double C = .0, Q; for(unsigned int n = 0; n < N; n++) { ind1 = n * D; for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { Q = .0; ind2 = col_P[i] * D; for(int d = 0; d < D; d++) buff[d] = Y[ind1 + d]; for(int d = 0; d < D; d++) buff[d] -= Y[ind2 + d]; for(int d = 0; d < D; d++) Q += buff[d] * buff[d]; Q = (1.0 / (1.0 + Q)) / sum_Q; C += val_P[i] * log((val_P[i] + FLT_MIN) / (Q + FLT_MIN)); } } // Clean up memory free(buff); delete tree; return C; } // Evaluate t-SNE cost function (exactly) template void TSNE::getCost(double* P, double* Y, unsigned int N, int D, double* costs) { // Compute the squared Euclidean distance matrix double* DD = (double*) malloc((unsigned long)N * N * sizeof(double)); double* Q = (double*) malloc((unsigned long)N * N * sizeof(double)); if(DD == NULL || Q == NULL) { Rcpp::stop("Memory allocation failed!\n"); } computeSquaredEuclideanDistance(Y, N, D, DD); // Compute Q-matrix and normalization sum double sum_Q = DBL_MIN; for(unsigned long n = 0; n < N; n++) { for(unsigned long m = 0; m < N; m++) { if(n != m) { Q[n * N + m] = 1 / (1 + DD[n * N + m]); sum_Q += Q[n * N + m]; } else Q[n * N + m] = DBL_MIN; } } for(unsigned long i = 0; i < (unsigned long)N * N; i++) Q[i] /= sum_Q; // Sum t-SNE error for(unsigned long n = 0; n < N; n++) { costs[n] = 0.0; for(unsigned long m = 0; m < N; m++) { costs[n] += P[n * N + m] * log((P[n * N + m] + 1e-9) / (Q[n * N + m] + 1e-9)); } } // Clean up memory free(DD); free(Q); } // Evaluate t-SNE cost function (approximately) template void TSNE::getCost(unsigned int* row_P, unsigned int* col_P, double* val_P, double* Y, unsigned int N, int D, double theta, double* costs) { // Get estimate of normalization term SPTree* tree = new SPTree(Y, N); double* buff = (double*) calloc(D, sizeof(double)); double sum_Q = .0; for(unsigned int n = 0; n < N; n++) sum_Q += tree->computeNonEdgeForces(n, theta, buff); // Loop over all edges to compute t-SNE error int ind1, ind2; double Q; for(unsigned int n = 0; n < N; n++) { ind1 = n * D; costs[n] = 0.0; for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { Q = .0; ind2 = col_P[i] * D; for(int d = 0; d < D; d++) buff[d] = Y[ind1 + d]; for(int d = 0; d < D; d++) buff[d] -= Y[ind2 + d]; for(int d = 0; d < D; d++) Q += buff[d] * buff[d]; Q = (1.0 / (1.0 + Q)) / sum_Q; costs[n] += val_P[i] * log((val_P[i] + FLT_MIN) / (Q + FLT_MIN)); } } // Clean up memory free(buff); delete tree; } // Compute input similarities with a fixed perplexity template void TSNE::computeGaussianPerplexity(double* X, unsigned int N, int D, bool distance_precomputed) { size_t N2=N; N2*=N; P.resize(N2); // Compute the squared Euclidean distance matrix double* DD = (double*) malloc(N2 * sizeof(double)); if(DD == NULL) { Rcpp::stop("Memory allocation failed!\n"); } if (distance_precomputed) { DD = X; } else { computeSquaredEuclideanDistanceDirect(X, N, D, DD); // Needed to make sure the results are exactly equal to distance calculation in R for (size_t n=0; n 0) { min_beta = beta; if(max_beta == DBL_MAX || max_beta == -DBL_MAX) beta *= 2.0; else beta = (beta + max_beta) / 2.0; } else { max_beta = beta; if(min_beta == -DBL_MAX || min_beta == DBL_MAX) beta /= 2.0; else beta = (beta + min_beta) / 2.0; } } // Update iteration counter iter++; } // Row normalize P for(unsigned long m = 0; m < N; m++) P[n * N + m] /= sum_P; } // Clean up memory if (!distance_precomputed) { free(DD); } DD = NULL; } // Compute input similarities with a fixed perplexity using ball trees (this function allocates memory another function should free) template template void TSNE::computeGaussianPerplexity(double* X, unsigned int N, int D, int K) { if(perplexity > K) Rprintf("Perplexity should be lower than K!\n"); // Allocate the memory we need setupApproximateMemory(N, K); // Build ball tree on data set VpTree* tree = new VpTree(); vector obj_X(N, DataPoint(D, -1, X)); for(unsigned int n = 0; n < N; n++) obj_X[n] = DataPoint(D, n, X + n * D); tree->create(obj_X); // Loop over all points to find nearest neighbors if (verbose) Rprintf("Building tree...\n"); int steps_completed = 0; #pragma omp parallel for schedule(guided) num_threads(num_threads) for(unsigned int n = 0; n < N; n++) { vector indices; vector distances; indices.reserve(K+1); distances.reserve(K+1); // Find nearest neighbors tree->search(obj_X[n], K + 1, &indices, &distances); double * cur_P = val_P.data() + row_P[n]; computeProbabilities(perplexity, K, distances.data()+1, cur_P); // +1 to avoid self. unsigned int * cur_col_P = col_P.data() + row_P[n]; for (int m=0; m void TSNE::computeGaussianPerplexity(const int* nn_idx, const double* nn_dist, unsigned int N, int K) { if(perplexity > K) Rprintf("Perplexity should be lower than K!\n"); // Allocate the memory we need setupApproximateMemory(N, K); // Loop over all points to find nearest neighbors int steps_completed = 0; #pragma omp parallel for schedule(guided) num_threads(num_threads) for(unsigned int n = 0; n < N; n++) { double * cur_P = val_P.data() + row_P[n]; computeProbabilities(perplexity, K, nn_dist + row_P[n], cur_P); const int * cur_idx = nn_idx + row_P[n]; unsigned int * cur_col_P = col_P.data() + row_P[n]; for (int m=0; m void TSNE::setupApproximateMemory(unsigned int N, int K) { row_P.resize(N+1); col_P.resize(N*K); val_P.resize(N*K); row_P[0] = 0; for(unsigned int n = 0; n < N; n++) row_P[n + 1] = row_P[n] + K; return; } template void TSNE::computeProbabilities (const double perplexity, const int K, const double* distances, double* cur_P) { // Initialize some variables for binary search bool found = false; double beta = 1.0; double min_beta = -DBL_MAX; double max_beta = DBL_MAX; double tol = 1e-5; // Iterate until we found a good perplexity int iter = 0; double sum_P; while(!found && iter < 200) { // Compute Gaussian kernel row for(int m = 0; m < K; m++) cur_P[m] = exp(-beta * distances[m] * distances[m]); // Compute entropy of current row sum_P = DBL_MIN; for(int m = 0; m < K; m++) sum_P += cur_P[m]; double H = .0; for(int m = 0; m < K; m++) H += beta * (distances[m] * distances[m] * cur_P[m]); H = (H / sum_P) + log(sum_P); // Evaluate whether the entropy is within the tolerance level double Hdiff = H - log(perplexity); if(Hdiff < tol && -Hdiff < tol) { found = true; } else { if(Hdiff > 0) { min_beta = beta; if(max_beta == DBL_MAX || max_beta == -DBL_MAX) beta *= 2.0; else beta = (beta + max_beta) / 2.0; } else { max_beta = beta; if(min_beta == -DBL_MAX || min_beta == DBL_MAX) beta /= 2.0; else beta = (beta + min_beta) / 2.0; } } // Update iteration counter iter++; } // Row-normalize current row of P. for(int m = 0; m < K; m++) { cur_P[m] /= sum_P; } return; } template void TSNE::symmetrizeMatrix(unsigned int N) { // Count number of elements and row counts of symmetric matrix int* row_counts = (int*) calloc(N, sizeof(int)); if(row_counts == NULL) { Rcpp::stop("Memory allocation failed!\n"); } for(unsigned int n = 0; n < N; n++) { for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { // Check whether element (col_P[i], n) is present bool present = false; for(unsigned int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { if(col_P[m] == n) present = true; } if(present) row_counts[n]++; else { row_counts[n]++; row_counts[col_P[i]]++; } } } int no_elem = 0; for(unsigned int n = 0; n < N; n++) no_elem += row_counts[n]; // Allocate memory for symmetrized matrix std::vector sym_row_P(N+1), sym_col_P(no_elem); std::vector sym_val_P(no_elem); // Construct new row indices for symmetric matrix sym_row_P[0] = 0; for(unsigned int n = 0; n < N; n++) sym_row_P[n + 1] = sym_row_P[n] + row_counts[n]; // Fill the result matrix int* offset = (int*) calloc(N, sizeof(int)); if(offset == NULL) { Rcpp::stop("Memory allocation failed!\n"); } for(unsigned int n = 0; n < N; n++) { for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) { // considering element(n, col_P[i]) // Check whether element (col_P[i], n) is present bool present = false; for(unsigned int m = row_P[col_P[i]]; m < row_P[col_P[i] + 1]; m++) { if(col_P[m] == n) { present = true; if(n <= col_P[i]) { // make sure we do not add elements twice sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; sym_val_P[sym_row_P[n] + offset[n]] = val_P[i] + val_P[m]; sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i] + val_P[m]; } } } // If (col_P[i], n) is not present, there is no addition involved if(!present) { sym_col_P[sym_row_P[n] + offset[n]] = col_P[i]; sym_col_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = n; sym_val_P[sym_row_P[n] + offset[n]] = val_P[i]; sym_val_P[sym_row_P[col_P[i]] + offset[col_P[i]]] = val_P[i]; } // Update offsets if(!present || (present && n <= col_P[i])) { offset[n]++; if(col_P[i] != n) offset[col_P[i]]++; } } } // Divide the result by two for(int i = 0; i < no_elem; i++) sym_val_P[i] /= 2.0; // Return symmetrized matrices row_P.swap(sym_row_P); col_P.swap(sym_col_P); val_P.swap(sym_val_P); // Free up some memery free(offset); offset = NULL; free(row_counts); row_counts = NULL; } // Compute squared Euclidean distance matrix (using BLAS) template void TSNE::computeSquaredEuclideanDistance(double* X, unsigned int N, int D, double* DD) { double* dataSums = (double*) calloc(N, sizeof(double)); if(dataSums == NULL) { Rcpp::stop("Memory allocation failed!\n"); } for(unsigned int n = 0; n < N; n++) { for(int d = 0; d < D; d++) { dataSums[n] += (X[n * D + d] * X[n * D + d]); } } for(unsigned long n = 0; n < N; n++) { for(unsigned long m = 0; m < N; m++) { DD[n * N + m] = dataSums[n] + dataSums[m]; } } double a1 = -2.0; double a2 = 1.0; int Nsigned = N; dgemm_("T", "N", &Nsigned, &Nsigned, &D, &a1, X, &D, X, &D, &a2, DD, &Nsigned); free(dataSums); dataSums = NULL; } // Compute squared Euclidean distance matrix (No BLAS) template void TSNE::computeSquaredEuclideanDistanceDirect(double* X, unsigned int N, int D, double* DD) { const double* XnD = X; for(unsigned int n = 0; n < N; ++n, XnD += D) { const double* XmD = XnD + D; double* curr_elem = &DD[n*N + n]; *curr_elem = 0.0; double* curr_elem_sym = curr_elem + N; for(unsigned int m = n + 1; m < N; ++m, XmD+=D, curr_elem_sym+=N) { *(++curr_elem) = 0.0; for(int d = 0; d < D; ++d) { *curr_elem += (XnD[d] - XmD[d]) * (XnD[d] - XmD[d]); } *curr_elem_sym = *curr_elem; } } } // Makes data zero-mean template void TSNE::zeroMean(double* X, unsigned int N, int D) { // Compute data mean double* mean = (double*) calloc(D, sizeof(double)); if(mean == NULL) { Rcpp::stop("Memory allocation failed!\n"); } int nD = 0; for(unsigned int n = 0; n < N; n++) { for(int d = 0; d < D; d++) { mean[d] += X[nD + d]; } nD += D; } for(int d = 0; d < D; d++) { mean[d] /= (double) N; } // Subtract data mean nD = 0; for(unsigned int n = 0; n < N; n++) { for(int d = 0; d < D; d++) { X[nD + d] -= mean[d]; } nD += D; } free(mean); mean = NULL; } // Generates a Gaussian random number template double TSNE::randn() { Rcpp::RNGScope scope; double x, y, radius; do { x = 2 * (double)R::runif(0,1) - 1; y = 2 * (double)R::runif(0,1) - 1; radius = (x * x) + (y * y); } while((radius >= 1.0) || (radius == 0.0)); radius = sqrt(-2 * log(radius) / radius); x *= radius; y *= radius; return x; } // declare templates explicitly template class TSNE<1>; template class TSNE<2>; template class TSNE<3>; Rtsne/src/vptree.h0000644000176200001440000001740313366634402013626 0ustar liggesusers/* * * Copyright (c) 2014, Laurens van der Maaten (Delft University of Technology) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the Delft University of Technology. * 4. Neither the name of the Delft University of Technology nor the names of * its contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY LAURENS VAN DER MAATEN ''AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL LAURENS VAN DER MAATEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * */ /* This code was adopted with minor modifications from Steve Hanov's great tutorial at http://stevehanov.ca/blog/index.php?id=130 */ #include #include #include #include #include #include #include #include #include "datapoint.h" #ifndef VPTREE_H #define VPTREE_H double euclidean_distance(const DataPoint &t1, const DataPoint &t2) { double dd = .0; for(int d = 0; d < t1.dimensionality(); d++) dd += (t1.x(d) - t2.x(d)) * (t1.x(d) - t2.x(d)); return sqrt(dd); } double precomputed_distance(const DataPoint &t1, const DataPoint &t2) { double dd = .0; dd = t1.x(t2.index()); return dd; } template class VpTree { public: // Default constructor VpTree() : _root(0) {} // Destructor ~VpTree() { delete _root; } // Function to create a new VpTree from data void create(const std::vector& items) { delete _root; _items = items; _root = buildFromPoints(0, items.size()); } // Function that uses the tree to find the k nearest neighbors of target void search(const T& target, int k, std::vector* results, std::vector* distances) { // Use a priority queue to store intermediate results on std::priority_queue heap; // Variable that tracks the distance to the farthest point in our results double tau = DBL_MAX; // Perform the search search(_root, target, k, heap, &tau); // Gather final results results->clear(); distances->clear(); while(!heap.empty()) { results->push_back(_items[heap.top().index]); distances->push_back(heap.top().dist); heap.pop(); } // Results are in reverse order std::reverse(results->begin(), results->end()); std::reverse(distances->begin(), distances->end()); } private: std::vector _items; // Single node of a VP tree (has a point and radius; left children are closer to point than the radius) struct Node { int index; // index of point in node double threshold; // radius(?) Node* left; // points closer by than threshold Node* right; // points farther away than threshold Node() : index(0), threshold(0.), left(0), right(0) {} ~Node() { // destructor delete left; delete right; } }* _root; // An item on the intermediate result queue struct HeapItem { HeapItem( int index, double dist) : index(index), dist(dist) {} int index; double dist; bool operator<(const HeapItem& o) const { return dist < o.dist; } }; // Distance comparator for use in std::nth_element struct DistanceComparator { const T& item; DistanceComparator(const T& item) : item(item) {} bool operator()(const T& a, const T& b) { return distance(item, a) < distance(item, b); } }; // Function that (recursively) fills the tree Node* buildFromPoints( int lower, int upper ) { if (upper == lower) { // indicates that we're done here! return NULL; } // Lower index is center of current node Node* node = new Node(); node->index = lower; if (upper - lower > 1) { // if we did not arrive at leaf yet // Choose an arbitrary point and move it to the start Rcpp::RNGScope scope; int i = (int) ((double)R::runif(0,1) * (upper - lower - 1)) + lower; std::swap(_items[lower], _items[i]); // Partition around the median distance int median = (upper + lower) / 2; std::nth_element(_items.begin() + lower + 1, _items.begin() + median, _items.begin() + upper, DistanceComparator(_items[lower])); // Threshold of the new node will be the distance to the median node->threshold = distance(_items[lower], _items[median]); // Recursively build tree node->index = lower; node->left = buildFromPoints(lower + 1, median); node->right = buildFromPoints(median, upper); } // Return result return node; } // Helper function that searches the tree void search(Node* node, const T& target, unsigned int k, std::priority_queue& heap, double* ptau) { if(node == NULL) return; // indicates that we're done here // Compute distance between target and current node double dist = distance(_items[node->index], target); // If current node within radius tau if(dist < (*ptau)) { if(heap.size() == k) heap.pop(); // remove furthest node from result list (if we already have k results) heap.push(HeapItem(node->index, dist)); // add current node to result list if(heap.size() == k) *ptau = heap.top().dist; // update value of tau (farthest point in result list) } // Return if we arrived at a leaf if(node->left == NULL && node->right == NULL) { return; } // If the target lies within the radius of ball if(dist < node->threshold) { if(dist - (*ptau) <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child first search(node->left, target, k, heap, ptau); } if(dist + (*ptau) >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child search(node->right, target, k, heap, ptau); } // If the target lies outsize the radius of the ball } else { if(dist + (*ptau) >= node->threshold) { // if there can still be neighbors outside the ball, recursively search right child first search(node->right, target, k, heap, ptau); } if(dist - (*ptau) <= node->threshold) { // if there can still be neighbors inside the ball, recursively search left child search(node->left, target, k, heap, ptau); } } } }; #endif Rtsne/src/datapoint.h0000644000176200001440000000223213366634402014276 0ustar liggesusers#ifndef DATAPOINT_H #define DATAPOINT_H class DataPoint { int _ind; public: double* _x; int _D; DataPoint() { _D = 1; _ind = -1; _x = NULL; } DataPoint(int D, int ind, double* x) { _D = D; _ind = ind; _x = (double*) malloc(_D * sizeof(double)); for(int d = 0; d < _D; d++) _x[d] = x[d]; } DataPoint(const DataPoint& other) { // this makes a deep copy -- should not free anything if(this != &other) { _D = other.dimensionality(); _ind = other.index(); _x = (double*) malloc(_D * sizeof(double)); for(int d = 0; d < _D; d++) _x[d] = other.x(d); } } ~DataPoint() { if(_x != NULL) free(_x); } DataPoint& operator= (const DataPoint& other) { // asignment should free old object if(this != &other) { if(_x != NULL) free(_x); _D = other.dimensionality(); _ind = other.index(); _x = (double*) malloc(_D * sizeof(double)); for(int d = 0; d < _D; d++) _x[d] = other.x(d); } return *this; } int index() const { return _ind; } int dimensionality() const { return _D; } double x(int d) const { return _x[d]; } }; #endif Rtsne/src/Rtsne.cpp0000644000176200001440000001153213366634402013744 0ustar liggesusers#include #include "tsne.h" using namespace Rcpp; Rcpp::List save_results(int N, int no_dims, const std::vector& Y, const std::vector& costs, const std::vector& itercosts, double theta, double perplexity, int D, int stop_lying_iter, int mom_switch_iter, double momentum, double final_momentum, double eta, double exaggeration_factor); // Function that runs the Barnes-Hut implementation of t-SNE // [[Rcpp::export]] Rcpp::List Rtsne_cpp(NumericMatrix X, int no_dims, double perplexity, double theta, bool verbose, int max_iter, bool distance_precomputed, NumericMatrix Y_in, bool init, int stop_lying_iter, int mom_switch_iter, double momentum, double final_momentum, double eta, double exaggeration_factor, unsigned int num_threads) { size_t N = X.ncol(), D = X.nrow(); double * data=X.begin(); if (verbose) Rprintf("Read the %i x %i data matrix successfully!\n", N, D); std::vector Y(N * no_dims), costs(N), itercosts(static_cast(std::ceil(max_iter/50.0))); // Providing user-supplied solution. if (init) { for (size_t i = 0; i < Y.size(); i++) Y[i] = Y_in[i]; if (verbose) Rprintf("Using user supplied starting positions\n"); } // Run tsne if (no_dims==1) { TSNE<1> tsne(perplexity, theta, verbose, max_iter, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads); tsne.run(data, N, D, Y.data(), distance_precomputed, costs.data(), itercosts.data()); } else if (no_dims==2) { TSNE<2> tsne(perplexity, theta, verbose, max_iter, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads); tsne.run(data, N, D, Y.data(), distance_precomputed, costs.data(), itercosts.data()); } else if (no_dims==3) { TSNE<3> tsne(perplexity, theta, verbose, max_iter, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads); tsne.run(data, N, D, Y.data(), distance_precomputed, costs.data(), itercosts.data()); } else { Rcpp::stop("Only 1, 2 or 3 dimensional output is suppported.\n"); } return Rcpp::List::create(Rcpp::_["Y"]=Rcpp::NumericMatrix(no_dims, N, Y.data()), Rcpp::_["costs"]=Rcpp::NumericVector(costs.begin(), costs.end()), Rcpp::_["itercosts"]=Rcpp::NumericVector(itercosts.begin(), itercosts.end())); } // Function that runs the Barnes-Hut implementation of t-SNE on nearest neighbor results. // [[Rcpp::export]] Rcpp::List Rtsne_nn_cpp(IntegerMatrix nn_dex, NumericMatrix nn_dist, int no_dims, double perplexity, double theta, bool verbose, int max_iter, NumericMatrix Y_in, bool init, int stop_lying_iter, int mom_switch_iter, double momentum, double final_momentum, double eta, double exaggeration_factor, unsigned int num_threads) { size_t N = nn_dex.ncol(), K=nn_dex.nrow(); // transposed - columns are points, rows are neighbors. if (verbose) Rprintf("Read the NN results for %i points successfully!\n", N); std::vector Y(N * no_dims), costs(N), itercosts(static_cast(std::ceil(max_iter/50.0))); // Providing user-supplied solution. if (init) { for (size_t i = 0; i < Y.size(); i++) Y[i] = Y_in[i]; if (verbose) Rprintf("Using user supplied starting positions\n"); } // Run tsne if (no_dims==1) { TSNE<1> tsne(perplexity, theta, verbose, max_iter, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads); tsne.run(nn_dex.begin(), nn_dist.begin(), N, K, Y.data(), costs.data(), itercosts.data()); } else if (no_dims==2) { TSNE<2> tsne(perplexity, theta, verbose, max_iter, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads); tsne.run(nn_dex.begin(), nn_dist.begin(), N, K, Y.data(), costs.data(), itercosts.data()); } else if (no_dims==3) { TSNE<3> tsne(perplexity, theta, verbose, max_iter, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads); tsne.run(nn_dex.begin(), nn_dist.begin(), N, K, Y.data(), costs.data(), itercosts.data()); } else { Rcpp::stop("Only 1, 2 or 3 dimensional output is suppported.\n"); } return Rcpp::List::create(Rcpp::_["Y"]=Rcpp::NumericMatrix(no_dims, N, Y.data()), Rcpp::_["costs"]=Rcpp::NumericVector(costs.begin(), costs.end()), Rcpp::_["itercosts"]=Rcpp::NumericVector(itercosts.begin(), itercosts.end())); } Rtsne/src/Makevars.win0000644000176200001440000000047313366634402014437 0ustar liggesusers ## Use the R_HOME indirection to support installations of multiple R version ## PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::LdFlags()") $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) Rtsne/src/RcppExports.cpp0000644000176200001440000001265713366634402015153 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; // Rtsne_cpp Rcpp::List Rtsne_cpp(NumericMatrix X, int no_dims, double perplexity, double theta, bool verbose, int max_iter, bool distance_precomputed, NumericMatrix Y_in, bool init, int stop_lying_iter, int mom_switch_iter, double momentum, double final_momentum, double eta, double exaggeration_factor, unsigned int num_threads); RcppExport SEXP _Rtsne_Rtsne_cpp(SEXP XSEXP, SEXP no_dimsSEXP, SEXP perplexitySEXP, SEXP thetaSEXP, SEXP verboseSEXP, SEXP max_iterSEXP, SEXP distance_precomputedSEXP, SEXP Y_inSEXP, SEXP initSEXP, SEXP stop_lying_iterSEXP, SEXP mom_switch_iterSEXP, SEXP momentumSEXP, SEXP final_momentumSEXP, SEXP etaSEXP, SEXP exaggeration_factorSEXP, SEXP num_threadsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP); Rcpp::traits::input_parameter< int >::type no_dims(no_dimsSEXP); Rcpp::traits::input_parameter< double >::type perplexity(perplexitySEXP); Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP); Rcpp::traits::input_parameter< int >::type max_iter(max_iterSEXP); Rcpp::traits::input_parameter< bool >::type distance_precomputed(distance_precomputedSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type Y_in(Y_inSEXP); Rcpp::traits::input_parameter< bool >::type init(initSEXP); Rcpp::traits::input_parameter< int >::type stop_lying_iter(stop_lying_iterSEXP); Rcpp::traits::input_parameter< int >::type mom_switch_iter(mom_switch_iterSEXP); Rcpp::traits::input_parameter< double >::type momentum(momentumSEXP); Rcpp::traits::input_parameter< double >::type final_momentum(final_momentumSEXP); Rcpp::traits::input_parameter< double >::type eta(etaSEXP); Rcpp::traits::input_parameter< double >::type exaggeration_factor(exaggeration_factorSEXP); Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); rcpp_result_gen = Rcpp::wrap(Rtsne_cpp(X, no_dims, perplexity, theta, verbose, max_iter, distance_precomputed, Y_in, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads)); return rcpp_result_gen; END_RCPP } // Rtsne_nn_cpp Rcpp::List Rtsne_nn_cpp(IntegerMatrix nn_dex, NumericMatrix nn_dist, int no_dims, double perplexity, double theta, bool verbose, int max_iter, NumericMatrix Y_in, bool init, int stop_lying_iter, int mom_switch_iter, double momentum, double final_momentum, double eta, double exaggeration_factor, unsigned int num_threads); RcppExport SEXP _Rtsne_Rtsne_nn_cpp(SEXP nn_dexSEXP, SEXP nn_distSEXP, SEXP no_dimsSEXP, SEXP perplexitySEXP, SEXP thetaSEXP, SEXP verboseSEXP, SEXP max_iterSEXP, SEXP Y_inSEXP, SEXP initSEXP, SEXP stop_lying_iterSEXP, SEXP mom_switch_iterSEXP, SEXP momentumSEXP, SEXP final_momentumSEXP, SEXP etaSEXP, SEXP exaggeration_factorSEXP, SEXP num_threadsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< IntegerMatrix >::type nn_dex(nn_dexSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type nn_dist(nn_distSEXP); Rcpp::traits::input_parameter< int >::type no_dims(no_dimsSEXP); Rcpp::traits::input_parameter< double >::type perplexity(perplexitySEXP); Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP); Rcpp::traits::input_parameter< int >::type max_iter(max_iterSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type Y_in(Y_inSEXP); Rcpp::traits::input_parameter< bool >::type init(initSEXP); Rcpp::traits::input_parameter< int >::type stop_lying_iter(stop_lying_iterSEXP); Rcpp::traits::input_parameter< int >::type mom_switch_iter(mom_switch_iterSEXP); Rcpp::traits::input_parameter< double >::type momentum(momentumSEXP); Rcpp::traits::input_parameter< double >::type final_momentum(final_momentumSEXP); Rcpp::traits::input_parameter< double >::type eta(etaSEXP); Rcpp::traits::input_parameter< double >::type exaggeration_factor(exaggeration_factorSEXP); Rcpp::traits::input_parameter< unsigned int >::type num_threads(num_threadsSEXP); rcpp_result_gen = Rcpp::wrap(Rtsne_nn_cpp(nn_dex, nn_dist, no_dims, perplexity, theta, verbose, max_iter, Y_in, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads)); return rcpp_result_gen; END_RCPP } // normalize_input_cpp Rcpp::NumericMatrix normalize_input_cpp(Rcpp::NumericMatrix input); RcppExport SEXP _Rtsne_normalize_input_cpp(SEXP inputSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type input(inputSEXP); rcpp_result_gen = Rcpp::wrap(normalize_input_cpp(input)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { {"_Rtsne_Rtsne_cpp", (DL_FUNC) &_Rtsne_Rtsne_cpp, 16}, {"_Rtsne_Rtsne_nn_cpp", (DL_FUNC) &_Rtsne_Rtsne_nn_cpp, 16}, {"_Rtsne_normalize_input_cpp", (DL_FUNC) &_Rtsne_normalize_input_cpp, 1}, {NULL, NULL, 0} }; RcppExport void R_init_Rtsne(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } Rtsne/NAMESPACE0000644000176200001440000000047213366631434012600 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(Rtsne,data.frame) S3method(Rtsne,default) S3method(Rtsne,dist) export(Rtsne) export(Rtsne_neighbors) export(normalize_input) import(Rcpp) importFrom(stats,model.matrix) importFrom(stats,na.fail) importFrom(stats,prcomp) useDynLib(Rtsne, .registration = TRUE) Rtsne/NEWS.md0000644000176200001440000000105713366566146012466 0ustar liggesusers# Rtsne 0.15 * Substantial speed increase by fixing the possible embedding dimensionalities to 1, 2 or 3 * Clarification of the licensing of different parts of the package * Support for using, more efficient, partial PCA (by Daniel Wells) * Made the normalization optional and the normalization function used available (by Aaron Lun) * Support for using precomputed nearest neighbour matrices (by Aaron Lun) * Added OpenMP support * Default verbose value is now the global setting (by Richard Cotton) * Added a `NEWS.md` file to track changes to the package. Rtsne/R/0000755000176200001440000000000013366634402011555 5ustar liggesusersRtsne/R/utils.R0000644000176200001440000000724313366024117013042 0ustar liggesusers#' Normalize input data matrix #' #' Mean centers each column of an input data matrix so that it has a mean of zero. #' Scales the entire matrix so that the largest absolute of the centered matrix is equal to unity. #' #' @param X matrix; Input data matrix with rows as observations and columns as variables/dimensions. #' #' @details #' Normalization avoids numerical problems when the coordinates (and thus the distances between observations) are very large. #' Directly computing distances on this scale may lead to underflow when computing the probabilities in the t-SNE algorithm. #' Rescaling the input values mitigates these problems to some extent. #' #' @return A numeric matrix of the same dimensions as \code{X} but centred by column and scaled to have a maximum deviation of 1. #' #' @author #' Aaron Lun #' #' @examples #' iris_unique <- unique(iris) # Remove duplicates #' iris_matrix <- as.matrix(iris_unique[,1:4]) #' X <- normalize_input(iris_matrix) #' colMeans(X) #' range(X) #' @export normalize_input <- function(X) { # Using the original C++ code from bhtsne to do mean centering, even though it would be simple to do with sweep(). # This is because R's sums are more numerically precise, so for consistency with the original code, we need to use the naive C++ version. normalize_input_cpp(X) } is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) # Checks if the input is a whole number. { abs(x - round(x)) < tol } .check_tsne_params <- function(nsamples, dims, perplexity, theta, max_iter, verbose, Y_init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor) # Checks parameters for the t-SNE algorithm that are independent of # the format of the input data (e.g., distance matrix or neighbors). { if (!is.wholenumber(dims) || dims < 1 || dims > 3) { stop("dims should be either 1, 2 or 3") } if (!is.wholenumber(max_iter) || !(max_iter>0)) { stop("number of iterations should be a positive integer")} if (!is.null(Y_init) && (nsamples!=nrow(Y_init) || ncol(Y_init)!=dims)) { stop("incorrect format for Y_init") } if (!is.numeric(perplexity) || perplexity <= 0) { stop("perplexity should be a positive number") } if (!is.numeric(theta) || (theta<0.0) || (theta>1.0) ) { stop("theta should lie in [0, 1]")} if (!is.wholenumber(stop_lying_iter) || stop_lying_iter<0) { stop("stop_lying_iter should be a positive integer")} if (!is.wholenumber(mom_switch_iter) || mom_switch_iter<0) { stop("mom_switch_iter should be a positive integer")} if (!is.numeric(momentum) || momentum < 0) { stop("momentum should be a positive number") } if (!is.numeric(final_momentum) || final_momentum < 0) { stop("final momentum should be a positive number") } if (!is.numeric(eta) || eta <= 0) { stop("eta should be a positive number") } if (!is.numeric(exaggeration_factor) || exaggeration_factor <= 0) { stop("exaggeration_factor should be a positive number")} if (nsamples - 1 < 3 * perplexity) { stop("perplexity is too large for the number of samples")} init <- !is.null(Y_init) if (init) { Y_init <- t(Y_init) # transposing for rapid column-major access. } else { Y_init <- matrix() } list(no_dims=dims, perplexity=perplexity, theta=theta, max_iter=max_iter, verbose=verbose, init=init, Y_in=Y_init, stop_lying_iter=stop_lying_iter, mom_switch_iter=mom_switch_iter, momentum=momentum, final_momentum=final_momentum, eta=eta, exaggeration_factor=exaggeration_factor) } .clear_unwanted_params <- function(args) # Removing parameters that we do not want to store in the output. { args$Y_in <- args$init <- args$no_dims <- args$verbose <- NULL args } Rtsne/R/RcppExports.R0000644000176200001440000000203613366631400014165 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 Rtsne_cpp <- function(X, no_dims, perplexity, theta, verbose, max_iter, distance_precomputed, Y_in, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads) { .Call(`_Rtsne_Rtsne_cpp`, X, no_dims, perplexity, theta, verbose, max_iter, distance_precomputed, Y_in, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads) } Rtsne_nn_cpp <- function(nn_dex, nn_dist, no_dims, perplexity, theta, verbose, max_iter, Y_in, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads) { .Call(`_Rtsne_Rtsne_nn_cpp`, nn_dex, nn_dist, no_dims, perplexity, theta, verbose, max_iter, Y_in, init, stop_lying_iter, mom_switch_iter, momentum, final_momentum, eta, exaggeration_factor, num_threads) } normalize_input_cpp <- function(input) { .Call(`_Rtsne_normalize_input_cpp`, input) } Rtsne/R/neighbors.R0000644000176200001440000000253113364660053013660 0ustar liggesusers#' @rdname Rtsne #' @export Rtsne_neighbors <- function(index, distance, dims=2, perplexity=30, theta=0.5, max_iter=1000,verbose=getOption("verbose", FALSE), Y_init=NULL, stop_lying_iter=ifelse(is.null(Y_init),250L,0L), mom_switch_iter=ifelse(is.null(Y_init),250L,0L), momentum=0.5, final_momentum=0.8, eta=200.0, exaggeration_factor=12.0, num_threads=1, ...) { if (!is.matrix(index)) { stop("Input index is not a matrix") } if (!identical(dim(index), dim(distance))) { stop("index and distance matrices should have the same dimensions") } R <- range(index) if (any(R < 1 | R > nrow(index) | !is.finite(R))) { stop("invalid indices") } tsne.args <- .check_tsne_params(nrow(index), dims=dims, perplexity=perplexity, theta=theta, max_iter=max_iter, verbose=verbose, Y_init=Y_init, stop_lying_iter=stop_lying_iter, mom_switch_iter=mom_switch_iter, momentum=momentum, final_momentum=final_momentum, eta=eta, exaggeration_factor=exaggeration_factor) # Transposing is necessary for fast column-major access to each sample, -1 for zero-indexing. out <- do.call(Rtsne_nn_cpp, c(list(nn_dex=t(index - 1L), nn_dist=t(distance), num_threads=num_threads), tsne.args)) out$Y <- t(out$Y) # Transposing back. c(list(N=nrow(index)), out, .clear_unwanted_params(tsne.args)) } Rtsne/R/Rtsne.R0000644000176200001440000003336413366604706013010 0ustar liggesusers#' Barnes-Hut implementation of t-Distributed Stochastic Neighbor Embedding #' #' Wrapper for the C++ implementation of Barnes-Hut t-Distributed Stochastic Neighbor Embedding. t-SNE is a method for constructing a low dimensional embedding of high-dimensional data, distances or similarities. Exact t-SNE can be computed by setting theta=0.0. #' #' Given a distance matrix \eqn{D} between input objects (which by default, is the euclidean distances between two objects), we calculate a similarity score in the original space p_ij. \deqn{ p_{j | i} = \frac{\exp(-\|D_{ij}\|^2 / 2 \sigma_i^2)}{\sum_{k \neq i} \exp(-\|D_{ij}\|^2 / 2 \sigma_i^2)} } which is then symmetrized using: \deqn{ p_{i j}=\frac{p_{j|i} + p_{i|j}}{2n}}. The \eqn{\sigma} for each object is chosen in such a way that the perplexity of p_{j|i} has a value that is close to the user defined perplexity. This value effectively controls how many nearest neighbours are taken into account when constructing the embedding in the low-dimensional space. #' For the low-dimensional space we use the Cauchy distribution (t-distribution with one degree of freedom) as the distribution of the distances to neighbouring objects: #' \deqn{ q_{i j} = \frac{(1+ \| y_i-y_j\|^2)^{-1}}{\sum_{k \neq l} 1+ \| y_k-y_l\|^2)^{-1}}}. #' By changing the location of the objects y in the embedding to minimize the Kullback-Leibler divergence between these two distributions \eqn{ q_{i j}} and \eqn{ p_{i j}}, we create a map that focusses on small-scale structure, due to the assymetry of the KL-divergence. The t-distribution is chosen to avoid the crowding problem: in the original high dimensional space, there are potentially many equidistant objects with moderate distance from a particular object, more than can be accounted for in the low dimensional representation. The t-distribution makes sure that these objects are more spread out in the new representation. #' #' For larger datasets, a problem with the a simple gradient descent to minimize the Kullback-Leibler divergence is the computational complexity of each gradient step (which is \eqn{O(n^2)}). The Barnes-Hut implementation of the algorithm attempts to mitigate this problem using two tricks: (1) approximating small similarities by 0 in the \eqn{p_{ij}} distribution, where the non-zero entries are computed by finding 3*perplexity nearest neighbours using an efficient tree search. (2) Using the Barnes-Hut algorithm in the computation of the gradient which approximates large distance similarities using a quadtree. This approximation is controlled by the \code{theta} parameter, with smaller values leading to more exact approximations. When \code{theta=0.0}, the implementation uses a standard t-SNE implementation. The Barnes-Hut approximation leads to a \eqn{O(n log(n))} computational complexity for each iteration. #' #' During the minimization of the KL-divergence, the implementation uses a trick known as early exaggeration, which multiplies the \eqn{p_{ij}}'s by 12 during the first 250 iterations. This leads to tighter clustering and more distance between clusters of objects. This early exaggeration is not used when the user gives an initialization of the objects in the embedding by setting \code{Y_init}. During the early exaggeration phase, a momentum term of 0.5 is used while this is changed to 0.8 after the first 250 iterations. All these default parameters can be changed by the user. #' #' After checking the correctness of the input, the \code{Rtsne} function (optionally) does an initial reduction of the feature space using \code{\link{prcomp}}, before calling the C++ TSNE implementation. Since R's random number generator is used, use \code{\link{set.seed}} before the function call to get reproducible results. #' #' If \code{X} is a data.frame, it is transformed into a matrix using \code{\link{model.matrix}}. If \code{X} is a \code{\link{dist}} object, it is currently first expanded into a full distance matrix. #' #' @param X matrix; Data matrix (each row is an observation, each column is a variable) #' @param index integer matrix; Each row contains the identity of the nearest neighbors for each observation #' @param distance numeric matrix; Each row contains the distance to the nearest neighbors in \code{index} for each observation #' @param dims integer; Output dimensionality (default: 2) #' @param initial_dims integer; the number of dimensions that should be retained in the initial PCA step (default: 50) #' @param perplexity numeric; Perplexity parameter (should not be bigger than 3 * perplexity < nrow(X) - 1, see details for interpretation) #' @param theta numeric; Speed/accuracy trade-off (increase for less accuracy), set to 0.0 for exact TSNE (default: 0.5) #' @param check_duplicates logical; Checks whether duplicates are present. It is best to make sure there are no duplicates present and set this option to FALSE, especially for large datasets (default: TRUE) #' @param pca logical; Whether an initial PCA step should be performed (default: TRUE) #' @param partial_pca logical; Whether truncated PCA should be used to calculate principal components (requires the irlba package). This is faster for large input matrices (default: FALSE) #' @param max_iter integer; Number of iterations (default: 1000) #' @param verbose logical; Whether progress updates should be printed (default: global "verbose" option, or FALSE if that is not set) #' @param ... Other arguments that can be passed to Rtsne #' @param is_distance logical; Indicate whether X is a distance matrix (experimental, default: FALSE) #' @param Y_init matrix; Initial locations of the objects. If NULL, random initialization will be used (default: NULL). Note that when using this, the initial stage with exaggerated perplexity values and a larger momentum term will be skipped. #' @param pca_center logical; Should data be centered before pca is applied? (default: TRUE) #' @param pca_scale logical; Should data be scaled before pca is applied? (default: FALSE) #' @param normalize logical; Should data be normalized internally prior to distance calculations with \code{\link{normalize_input}}? (default: TRUE) #' @param stop_lying_iter integer; Iteration after which the perplexities are no longer exaggerated (default: 250, except when Y_init is used, then 0) #' @param mom_switch_iter integer; Iteration after which the final momentum is used (default: 250, except when Y_init is used, then 0) #' @param momentum numeric; Momentum used in the first part of the optimization (default: 0.5) #' @param final_momentum numeric; Momentum used in the final part of the optimization (default: 0.8) #' @param eta numeric; Learning rate (default: 200.0) #' @param exaggeration_factor numeric; Exaggeration factor used to multiply the P matrix in the first part of the optimization (default: 12.0) #' @param num_threads integer; Number of threads to use using OpenMP, default 1. 0 corresponds to using all available cores #' #' @return List with the following elements: #' \item{Y}{Matrix containing the new representations for the objects} #' \item{N}{Number of objects} #' \item{origD}{Original Dimensionality before TSNE (only when \code{X} is a data matrix)} #' \item{perplexity}{See above} #' \item{theta}{See above} #' \item{costs}{The cost for every object after the final iteration} #' \item{itercosts}{The total costs (KL-divergence) for all objects in every 50th + the last iteration} #' \item{stop_lying_iter}{Iteration after which the perplexities are no longer exaggerated} #' \item{mom_switch_iter}{Iteration after which the final momentum is used} #' \item{momentum}{Momentum used in the first part of the optimization} #' \item{final_momentum}{Momentum used in the final part of the optimization} #' \item{eta}{Learning rate} #' \item{exaggeration_factor}{Exaggeration factor used to multiply the P matrix in the first part of the optimization} #' #' @section Supplying precomputed distances: #' If a distance matrix is already available, this can be directly supplied to \code{Rtsne} by setting \code{is_distance=TRUE}. #' This improves efficiency by avoiding recalculation of distances, but requires some work to get the same results as running default \code{Rtsne} on a data matrix. #' Specifically, Euclidean distances should be computed from a normalized data matrix - see \code{\link{normalize_input}} for details. #' PCA arguments will also be ignored if \code{is_distance=TRUE}. #' #' NN search results can be directly supplied to \code{Rtsne_neighbors} to avoid repeating the (possibly time-consuming) search. #' To achieve the same results as \code{Rtsne} on the data matrix, the search should be conducted on the normalized data matrix. #' The number of nearest neighbors should also be equal to three-fold the \code{perplexity}, rounded down to the nearest integer. #' Note that pre-supplied NN results cannot be used when \code{theta=0} as they are only relevant for the approximate algorithm. #' #' Any kind of distance metric can be used as input. #' In contrast, running \code{Rtsne} on a data matrix will always use Euclidean distances. #' #' @references Maaten, L. Van Der, 2014. Accelerating t-SNE using Tree-Based Algorithms. Journal of Machine Learning Research, 15, p.3221-3245. #' @references van der Maaten, L.J.P. & Hinton, G.E., 2008. Visualizing High-Dimensional Data Using t-SNE. Journal of Machine Learning Research, 9, pp.2579-2605. #' #' @examples #' iris_unique <- unique(iris) # Remove duplicates #' iris_matrix <- as.matrix(iris_unique[,1:4]) #' #' # Set a seed if you want reproducible results #' set.seed(42) #' tsne_out <- Rtsne(iris_matrix,pca=FALSE,perplexity=30,theta=0.0) # Run TSNE #' #' # Show the objects in the 2D tsne representation #' plot(tsne_out$Y,col=iris_unique$Species, asp=1) #' #' # data.frame as input #' tsne_out <- Rtsne(iris_unique,pca=FALSE, theta=0.0) #' #' # Using a dist object #' set.seed(42) #' tsne_out <- Rtsne(dist(normalize_input(iris_matrix)), theta=0.0) #' plot(tsne_out$Y,col=iris_unique$Species, asp=1) #' #' set.seed(42) #' tsne_out <- Rtsne(as.matrix(dist(normalize_input(iris_matrix))),theta=0.0) #' plot(tsne_out$Y,col=iris_unique$Species, asp=1) #' #' # Supplying starting positions (example: continue from earlier embedding) #' set.seed(42) #' tsne_part1 <- Rtsne(iris_unique[,1:4], theta=0.0, pca=FALSE, max_iter=350) #' tsne_part2 <- Rtsne(iris_unique[,1:4], theta=0.0, pca=FALSE, max_iter=650, Y_init=tsne_part1$Y) #' plot(tsne_part2$Y,col=iris_unique$Species, asp=1) #' \dontrun{ #' # Fast PCA and multicore #' #' tsne_out <- Rtsne(iris_matrix, theta=0.1, partial_pca = TRUE, initial_dims=3) #' tsne_out <- Rtsne(iris_matrix, theta=0.1, num_threads = 2) #' } #' @useDynLib Rtsne, .registration = TRUE #' @import Rcpp #' @importFrom stats model.matrix na.fail prcomp #' #' @export Rtsne <- function (X, ...) { UseMethod("Rtsne", X) } #' @describeIn Rtsne Default Interface #' @export Rtsne.default <- function(X, dims=2, initial_dims=50, perplexity=30, theta=0.5, check_duplicates=TRUE, pca=TRUE, partial_pca=FALSE, max_iter=1000,verbose=getOption("verbose", FALSE), is_distance=FALSE, Y_init=NULL, pca_center=TRUE, pca_scale=FALSE, normalize=TRUE, stop_lying_iter=ifelse(is.null(Y_init),250L,0L), mom_switch_iter=ifelse(is.null(Y_init),250L,0L), momentum=0.5, final_momentum=0.8, eta=200.0, exaggeration_factor=12.0, num_threads=1, ...) { if (!is.logical(is_distance)) { stop("is_distance should be a logical variable")} if (!is.matrix(X)) { stop("Input X is not a matrix")} if (is_distance & !(is.matrix(X) & (nrow(X)==ncol(X)))) { stop("Input is not an accepted distance matrix") } if (!(is.logical(pca_center) && is.logical(pca_scale)) ) { stop("pca_center and pca_scale should be TRUE or FALSE")} if (!is.wholenumber(initial_dims) || initial_dims<=0) { stop("Incorrect initial dimensionality.")} tsne.args <- .check_tsne_params(nrow(X), dims=dims, perplexity=perplexity, theta=theta, max_iter=max_iter, verbose=verbose, Y_init=Y_init, stop_lying_iter=stop_lying_iter, mom_switch_iter=mom_switch_iter, momentum=momentum, final_momentum=final_momentum, eta=eta, exaggeration_factor=exaggeration_factor) # Check for missing values X <- na.fail(X) # Apply PCA if (!is_distance) { if (pca) { if(verbose) cat("Performing PCA\n") if(partial_pca){ if (!requireNamespace("irlba", quietly = TRUE)) {stop("Package \"irlba\" is required for partial PCA. Please install it.", call. = FALSE)} X <- irlba::prcomp_irlba(X, n = initial_dims, center = pca_center, scale = pca_scale)$x }else{ if(verbose & min(dim(X))>2500) cat("Consider setting partial_pca=TRUE for large matrices\n") X <- prcomp(X, retx=TRUE, center = pca_center, scale. = pca_scale, rank. = initial_dims)$x } } if (check_duplicates) { if (any(duplicated(X))) { stop("Remove duplicates before running TSNE.") } } if (normalize) { X <- normalize_input(X) } X <- t(X) # transposing for rapid column-major access. } else { # Compute Squared distance if we are using exact TSNE if (theta==0.0) { X <- X^2 } } out <- do.call(Rtsne_cpp, c(list(X=X, distance_precomputed=is_distance, num_threads=num_threads), tsne.args)) out$Y <- t(out$Y) # Transposing back. info <- list(N=ncol(X)) if (!is_distance) { out$origD <- nrow(X) } # 'origD' is unknown for distance matrices. c(info, out, .clear_unwanted_params(tsne.args)) } #' @describeIn Rtsne tsne on given dist object #' @export Rtsne.dist <- function(X,...,is_distance=TRUE) { X <- as.matrix(na.fail(X)) Rtsne(X, ..., is_distance=is_distance) } #' @describeIn Rtsne tsne on data.frame #' @export Rtsne.data.frame <- function(X,...) { X <- model.matrix(~.-1,na.fail(X)) Rtsne(X, ...) } Rtsne/README.md0000644000176200001440000000365613366606175012653 0ustar liggesusers [![CRAN version](http://www.r-pkg.org/badges/version/Rtsne)](https://cran.r-project.org/package=Rtsne/) [![Travis-CI Build Status](https://travis-ci.org/jkrijthe/Rtsne.png?branch=master)](https://travis-ci.org/jkrijthe/Rtsne) [![codecov.io](https://codecov.io/github/jkrijthe/Rtsne/coverage.svg?branch=master)](https://codecov.io/github/jkrijthe/Rtsne?branch=master) [![CRAN mirror downloads](http://cranlogs.r-pkg.org/badges/Rtsne)](https://cran.r-project.org/package=Rtsne/) # R wrapper for Van der Maaten’s Barnes-Hut implementation of t-Distributed Stochastic Neighbor Embedding ## Installation To install from CRAN: ``` r install.packages("Rtsne") # Install Rtsne package from CRAN ``` To install the latest version from the github repository, use: ``` r if(!require(devtools)) install.packages("devtools") # If not already installed devtools::install_github("jkrijthe/Rtsne") ``` ## Usage After installing the package, use the following code to run a simple example (to install, see below). ``` r library(Rtsne) # Load package iris_unique <- unique(iris) # Remove duplicates set.seed(42) # Sets seed for reproducibility tsne_out <- Rtsne(as.matrix(iris_unique[,1:4])) # Run TSNE plot(tsne_out$Y,col=iris_unique$Species,asp=1) # Plot the result ``` ![](tools/example-1.png) # Details This R package offers a wrapper around the Barnes-Hut TSNE C++ implementation of \[2\] \[3\]. Changes were made to the original code to allow it to function as an R package and to add additional functionality and speed improvements. # References \[1\] L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. Journal of Machine Learning Research 9(Nov):2579-2605, 2008. \[2\] L.J.P. van der Maaten. Barnes-Hut-SNE. In Proceedings of the International Conference on Learning Representations, 2013. \[3\] Rtsne/MD50000644000176200001440000000246413371603635011672 0ustar liggesusersec323add137c937c250edbb200e5d61d *DESCRIPTION 49fc1d4c2793cb7e3f05d44249b7e27b *LICENSE 1c735967f4f81eb8e783fd0034e71403 *NAMESPACE b945559fefc649a5c564c892b2da8ca3 *NEWS.md 084cfe0c64d846b9cc5f37ed17057d8a *R/RcppExports.R e8002282b7d26686d2f63033a60a9b6d *R/Rtsne.R 4e9ec6d249295c36908a210edf740e53 *R/neighbors.R acc89602417f900edc1155767873f277 *R/utils.R 92ffbe0ac5fa08c85885a9d8fc34e9c0 *README.md 42ae2f3c4872e03c5e401d949a86cad9 *inst/CITATION 1064b7fbe0f6a9fc5c85ce0cdd70c7a3 *man/Rtsne.Rd 534b6db9f30e8d2ccc33ce366180e0e8 *man/normalize_input.Rd fccdbee9a90c6ec50412daaf3f641201 *src/Makevars cd626b9e988ce6dd307bf23e23e5f26c *src/Makevars.win fadd501c152c2941c91ad84ccaf2ea63 *src/RcppExports.cpp d5cd293f39fb84f7d1d8ef9f30196182 *src/Rtsne.cpp 18ddc006d1ce092a4636748c5873a4a4 *src/datapoint.h 4c911f51eb82db749ffb654223fd6b9c *src/normalize_input.cpp 965a08920d0d7afc760a154f84c24c2c *src/sptree.cpp 8cc6063c4e6b8d4f51fe30a8a9436ebb *src/sptree.h 037cb4a0b5ddd290cc9d3026aa59802c *src/tsne.cpp e99c1b8c5682d35a6a5c698f9bcad673 *src/tsne.h 9b4912c92ddeb9daef0a3178680e6580 *src/vptree.h 99cf22df0ba208958da6b07a31a21a74 *tests/testthat.R c92846ddfe6947b533b7cbcfe3fb302e *tests/testthat/test_Rtsne.R cd0c1d978634e0ab81753305e3de6c43 *tests/testthat/test_neighbors.R dd21ca933ce8580c0492d2e092f4cbc6 *tools/example-1.png Rtsne/DESCRIPTION0000644000176200001440000000174613371603635013072 0ustar liggesusersPackage: Rtsne Type: Package Title: T-Distributed Stochastic Neighbor Embedding using a Barnes-Hut Implementation Version: 0.15 Authors@R: c( person("Jesse", "Krijthe", ,"jkrijthe@gmail.com", role = c("aut", "cre")), person("Laurens", "van der Maaten", role = c("cph"), comment = "Author of original C++ code") ) Description: An R wrapper around the fast T-distributed Stochastic Neighbor Embedding implementation by Van der Maaten (see for more information on the original implementation). License: file LICENSE URL: https://github.com/jkrijthe/Rtsne Imports: Rcpp (>= 0.11.0), stats LinkingTo: Rcpp Suggests: irlba, testthat RoxygenNote: 6.1.0 NeedsCompilation: yes Packaged: 2018-11-01 17:33:54 UTC; jkrijthe Author: Jesse Krijthe [aut, cre], Laurens van der Maaten [cph] (Author of original C++ code) Maintainer: Jesse Krijthe License_is_FOSS: yes Repository: CRAN Date/Publication: 2018-11-10 16:30:21 UTC Rtsne/man/0000755000176200001440000000000013364660053012126 5ustar liggesusersRtsne/man/normalize_input.Rd0000644000176200001440000000216213364660053015635 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{normalize_input} \alias{normalize_input} \title{Normalize input data matrix} \usage{ normalize_input(X) } \arguments{ \item{X}{matrix; Input data matrix with rows as observations and columns as variables/dimensions.} } \value{ A numeric matrix of the same dimensions as \code{X} but centred by column and scaled to have a maximum deviation of 1. } \description{ Mean centers each column of an input data matrix so that it has a mean of zero. Scales the entire matrix so that the largest absolute of the centered matrix is equal to unity. } \details{ Normalization avoids numerical problems when the coordinates (and thus the distances between observations) are very large. Directly computing distances on this scale may lead to underflow when computing the probabilities in the t-SNE algorithm. Rescaling the input values mitigates these problems to some extent. } \examples{ iris_unique <- unique(iris) # Remove duplicates iris_matrix <- as.matrix(iris_unique[,1:4]) X <- normalize_input(iris_matrix) colMeans(X) range(X) } \author{ Aaron Lun } Rtsne/man/Rtsne.Rd0000644000176200001440000002711113366604716013520 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/Rtsne.R, R/neighbors.R \name{Rtsne} \alias{Rtsne} \alias{Rtsne.default} \alias{Rtsne.dist} \alias{Rtsne.data.frame} \alias{Rtsne_neighbors} \title{Barnes-Hut implementation of t-Distributed Stochastic Neighbor Embedding} \usage{ Rtsne(X, ...) \method{Rtsne}{default}(X, dims = 2, initial_dims = 50, perplexity = 30, theta = 0.5, check_duplicates = TRUE, pca = TRUE, partial_pca = FALSE, max_iter = 1000, verbose = getOption("verbose", FALSE), is_distance = FALSE, Y_init = NULL, pca_center = TRUE, pca_scale = FALSE, normalize = TRUE, stop_lying_iter = ifelse(is.null(Y_init), 250L, 0L), mom_switch_iter = ifelse(is.null(Y_init), 250L, 0L), momentum = 0.5, final_momentum = 0.8, eta = 200, exaggeration_factor = 12, num_threads = 1, ...) \method{Rtsne}{dist}(X, ..., is_distance = TRUE) \method{Rtsne}{data.frame}(X, ...) Rtsne_neighbors(index, distance, dims = 2, perplexity = 30, theta = 0.5, max_iter = 1000, verbose = getOption("verbose", FALSE), Y_init = NULL, stop_lying_iter = ifelse(is.null(Y_init), 250L, 0L), mom_switch_iter = ifelse(is.null(Y_init), 250L, 0L), momentum = 0.5, final_momentum = 0.8, eta = 200, exaggeration_factor = 12, num_threads = 1, ...) } \arguments{ \item{X}{matrix; Data matrix (each row is an observation, each column is a variable)} \item{...}{Other arguments that can be passed to Rtsne} \item{dims}{integer; Output dimensionality (default: 2)} \item{initial_dims}{integer; the number of dimensions that should be retained in the initial PCA step (default: 50)} \item{perplexity}{numeric; Perplexity parameter (should not be bigger than 3 * perplexity < nrow(X) - 1, see details for interpretation)} \item{theta}{numeric; Speed/accuracy trade-off (increase for less accuracy), set to 0.0 for exact TSNE (default: 0.5)} \item{check_duplicates}{logical; Checks whether duplicates are present. It is best to make sure there are no duplicates present and set this option to FALSE, especially for large datasets (default: TRUE)} \item{pca}{logical; Whether an initial PCA step should be performed (default: TRUE)} \item{partial_pca}{logical; Whether truncated PCA should be used to calculate principal components (requires the irlba package). This is faster for large input matrices (default: FALSE)} \item{max_iter}{integer; Number of iterations (default: 1000)} \item{verbose}{logical; Whether progress updates should be printed (default: global "verbose" option, or FALSE if that is not set)} \item{is_distance}{logical; Indicate whether X is a distance matrix (experimental, default: FALSE)} \item{Y_init}{matrix; Initial locations of the objects. If NULL, random initialization will be used (default: NULL). Note that when using this, the initial stage with exaggerated perplexity values and a larger momentum term will be skipped.} \item{pca_center}{logical; Should data be centered before pca is applied? (default: TRUE)} \item{pca_scale}{logical; Should data be scaled before pca is applied? (default: FALSE)} \item{normalize}{logical; Should data be normalized internally prior to distance calculations with \code{\link{normalize_input}}? (default: TRUE)} \item{stop_lying_iter}{integer; Iteration after which the perplexities are no longer exaggerated (default: 250, except when Y_init is used, then 0)} \item{mom_switch_iter}{integer; Iteration after which the final momentum is used (default: 250, except when Y_init is used, then 0)} \item{momentum}{numeric; Momentum used in the first part of the optimization (default: 0.5)} \item{final_momentum}{numeric; Momentum used in the final part of the optimization (default: 0.8)} \item{eta}{numeric; Learning rate (default: 200.0)} \item{exaggeration_factor}{numeric; Exaggeration factor used to multiply the P matrix in the first part of the optimization (default: 12.0)} \item{num_threads}{integer; Number of threads to use using OpenMP, default 1. 0 corresponds to using all available cores} \item{index}{integer matrix; Each row contains the identity of the nearest neighbors for each observation} \item{distance}{numeric matrix; Each row contains the distance to the nearest neighbors in \code{index} for each observation} } \value{ List with the following elements: \item{Y}{Matrix containing the new representations for the objects} \item{N}{Number of objects} \item{origD}{Original Dimensionality before TSNE (only when \code{X} is a data matrix)} \item{perplexity}{See above} \item{theta}{See above} \item{costs}{The cost for every object after the final iteration} \item{itercosts}{The total costs (KL-divergence) for all objects in every 50th + the last iteration} \item{stop_lying_iter}{Iteration after which the perplexities are no longer exaggerated} \item{mom_switch_iter}{Iteration after which the final momentum is used} \item{momentum}{Momentum used in the first part of the optimization} \item{final_momentum}{Momentum used in the final part of the optimization} \item{eta}{Learning rate} \item{exaggeration_factor}{Exaggeration factor used to multiply the P matrix in the first part of the optimization} } \description{ Wrapper for the C++ implementation of Barnes-Hut t-Distributed Stochastic Neighbor Embedding. t-SNE is a method for constructing a low dimensional embedding of high-dimensional data, distances or similarities. Exact t-SNE can be computed by setting theta=0.0. } \details{ Given a distance matrix \eqn{D} between input objects (which by default, is the euclidean distances between two objects), we calculate a similarity score in the original space p_ij. \deqn{ p_{j | i} = \frac{\exp(-\|D_{ij}\|^2 / 2 \sigma_i^2)}{\sum_{k \neq i} \exp(-\|D_{ij}\|^2 / 2 \sigma_i^2)} } which is then symmetrized using: \deqn{ p_{i j}=\frac{p_{j|i} + p_{i|j}}{2n}}. The \eqn{\sigma} for each object is chosen in such a way that the perplexity of p_{j|i} has a value that is close to the user defined perplexity. This value effectively controls how many nearest neighbours are taken into account when constructing the embedding in the low-dimensional space. For the low-dimensional space we use the Cauchy distribution (t-distribution with one degree of freedom) as the distribution of the distances to neighbouring objects: \deqn{ q_{i j} = \frac{(1+ \| y_i-y_j\|^2)^{-1}}{\sum_{k \neq l} 1+ \| y_k-y_l\|^2)^{-1}}}. By changing the location of the objects y in the embedding to minimize the Kullback-Leibler divergence between these two distributions \eqn{ q_{i j}} and \eqn{ p_{i j}}, we create a map that focusses on small-scale structure, due to the assymetry of the KL-divergence. The t-distribution is chosen to avoid the crowding problem: in the original high dimensional space, there are potentially many equidistant objects with moderate distance from a particular object, more than can be accounted for in the low dimensional representation. The t-distribution makes sure that these objects are more spread out in the new representation. For larger datasets, a problem with the a simple gradient descent to minimize the Kullback-Leibler divergence is the computational complexity of each gradient step (which is \eqn{O(n^2)}). The Barnes-Hut implementation of the algorithm attempts to mitigate this problem using two tricks: (1) approximating small similarities by 0 in the \eqn{p_{ij}} distribution, where the non-zero entries are computed by finding 3*perplexity nearest neighbours using an efficient tree search. (2) Using the Barnes-Hut algorithm in the computation of the gradient which approximates large distance similarities using a quadtree. This approximation is controlled by the \code{theta} parameter, with smaller values leading to more exact approximations. When \code{theta=0.0}, the implementation uses a standard t-SNE implementation. The Barnes-Hut approximation leads to a \eqn{O(n log(n))} computational complexity for each iteration. During the minimization of the KL-divergence, the implementation uses a trick known as early exaggeration, which multiplies the \eqn{p_{ij}}'s by 12 during the first 250 iterations. This leads to tighter clustering and more distance between clusters of objects. This early exaggeration is not used when the user gives an initialization of the objects in the embedding by setting \code{Y_init}. During the early exaggeration phase, a momentum term of 0.5 is used while this is changed to 0.8 after the first 250 iterations. All these default parameters can be changed by the user. After checking the correctness of the input, the \code{Rtsne} function (optionally) does an initial reduction of the feature space using \code{\link{prcomp}}, before calling the C++ TSNE implementation. Since R's random number generator is used, use \code{\link{set.seed}} before the function call to get reproducible results. If \code{X} is a data.frame, it is transformed into a matrix using \code{\link{model.matrix}}. If \code{X} is a \code{\link{dist}} object, it is currently first expanded into a full distance matrix. } \section{Methods (by class)}{ \itemize{ \item \code{default}: Default Interface \item \code{dist}: tsne on given dist object \item \code{data.frame}: tsne on data.frame }} \section{Supplying precomputed distances}{ If a distance matrix is already available, this can be directly supplied to \code{Rtsne} by setting \code{is_distance=TRUE}. This improves efficiency by avoiding recalculation of distances, but requires some work to get the same results as running default \code{Rtsne} on a data matrix. Specifically, Euclidean distances should be computed from a normalized data matrix - see \code{\link{normalize_input}} for details. PCA arguments will also be ignored if \code{is_distance=TRUE}. NN search results can be directly supplied to \code{Rtsne_neighbors} to avoid repeating the (possibly time-consuming) search. To achieve the same results as \code{Rtsne} on the data matrix, the search should be conducted on the normalized data matrix. The number of nearest neighbors should also be equal to three-fold the \code{perplexity}, rounded down to the nearest integer. Note that pre-supplied NN results cannot be used when \code{theta=0} as they are only relevant for the approximate algorithm. Any kind of distance metric can be used as input. In contrast, running \code{Rtsne} on a data matrix will always use Euclidean distances. } \examples{ iris_unique <- unique(iris) # Remove duplicates iris_matrix <- as.matrix(iris_unique[,1:4]) # Set a seed if you want reproducible results set.seed(42) tsne_out <- Rtsne(iris_matrix,pca=FALSE,perplexity=30,theta=0.0) # Run TSNE # Show the objects in the 2D tsne representation plot(tsne_out$Y,col=iris_unique$Species, asp=1) # data.frame as input tsne_out <- Rtsne(iris_unique,pca=FALSE, theta=0.0) # Using a dist object set.seed(42) tsne_out <- Rtsne(dist(normalize_input(iris_matrix)), theta=0.0) plot(tsne_out$Y,col=iris_unique$Species, asp=1) set.seed(42) tsne_out <- Rtsne(as.matrix(dist(normalize_input(iris_matrix))),theta=0.0) plot(tsne_out$Y,col=iris_unique$Species, asp=1) # Supplying starting positions (example: continue from earlier embedding) set.seed(42) tsne_part1 <- Rtsne(iris_unique[,1:4], theta=0.0, pca=FALSE, max_iter=350) tsne_part2 <- Rtsne(iris_unique[,1:4], theta=0.0, pca=FALSE, max_iter=650, Y_init=tsne_part1$Y) plot(tsne_part2$Y,col=iris_unique$Species, asp=1) \dontrun{ # Fast PCA and multicore tsne_out <- Rtsne(iris_matrix, theta=0.1, partial_pca = TRUE, initial_dims=3) tsne_out <- Rtsne(iris_matrix, theta=0.1, num_threads = 2) } } \references{ Maaten, L. Van Der, 2014. Accelerating t-SNE using Tree-Based Algorithms. Journal of Machine Learning Research, 15, p.3221-3245. van der Maaten, L.J.P. & Hinton, G.E., 2008. Visualizing High-Dimensional Data Using t-SNE. Journal of Machine Learning Research, 9, pp.2579-2605. } Rtsne/tools/0000755000176200001440000000000013366606175012522 5ustar liggesusersRtsne/tools/example-1.png0000644000176200001440000012044513366606175015027 0ustar liggesusersPNG  IHDRz4iCCPkCGColorSpaceGenericRGB8U]hU>sg#$Sl4t? % V46nI6"dΘ83OEP|1Ŀ (>/ % (>P苦;3ie|{g蹪X-2s=+WQ+]L6O w[C{_F qb Uvz?Zb1@/zcs>~if,ӈUSjF 1_Mjbuݠpamhmçϙ>a\+5%QKFkm}ۖ?ޚD\!~6,-7SثŜvķ5Z;[rmS5{yDyH}r9|-ăFAJjI.[/]mK 7KRDrYQO-Q||6 (0 MXd(@h2_f<:”_δ*d>e\c?~,7?& ك^2Iq2"y@g|U\ @IDATx|ƿ { (M@Q((>z(V "" {Nvsîd ,$|_;3r9߱     t^HHHH  P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1     P~HHHHJ4y1    ">F‡~t]<HHHnɓ_E4in3ȈYTRY;HHn{ŋQ\^Ώ&$$iӦ]wu34   EIyF3PNG$@$@$@$= p/ @<gHHHH z^    x&@@9 @(@ý$@$@$@$@L4r:    PFχ{IHHHh<t$@$@$@$@ 3 xHHHH'@=%   g ӑ DO4z>K$@$@$@$(@(#   h|HHHH P3PNG$@$@$@$= p/ @<gHHHH z^    x&@@9 @(@ý$@$@$@$@L4r:    PFχ{IHHHh<t$@$@$@$@ 3 xHHHH'@=%   g ӑ DO4z>K$@$@$@$(@(#   h|HHHH P3PNG$@$@$@$= p/ @<gHHHH z^    x&@@9 @(@ý$@$@$@$@L4r:    PFχ{IHH03'PvyNZ$'J8OA$@$@$ \\Ŕc_`y񪥩Y;eps dG& I$˞(<3eh,Gڐv| m EQfYx& 1&IHH#8j)ImN, zouzHaD]E=/ݽ)t€}P0yL(:uQ,eI<>L[iBEP5D$@$@$ݷYYBVY-3?VkˣzںZ߾&ZՆ'Lsk/ʦ)a娕~|AK čqc֩_H'N-',OrϏ⿡̊ ?NݐNΥ`g+ -%_hAG&B=<ƈU\7F$@$@A#6E[t_J+s(x{7_ ߒ,Ԯqr*dl|^۟D-G/pa_`Ɩ[ kٞD~;{;>6/:Ω_&[#m=ܓ؜cWHH)]Hg8j?:BO S,|lscrH* lmo9C ~$^E}Bkn5GCqBrTflmF+KͿ-|Ls+S & KKzqιϢhFHx[y_콼hN䉫ǐ1If+rXcoFV3F*5ޟzgzhU˧jl3>pw45▀Φi_+{{:Go\.ƠK:lWNBjn{%* N DA1HH,ί"cHf߳Uvrq#HL c.x5&VXF ljd5v/iIr'HA#<_XeX1h쿲/N*5ba{SE巠Sk%@Hs M3xh{}=n> 8=vuU0DST3 +r#OXθ]1(Y |,8XUky .nǩN+񿽯VW#& K#yy*&ɎٻNQYt)hBd=I2dMߺ-+,C9|۽/^iµE=\$;71!IHPe_hRlT YaHRѦbJǀCqs$;"ZpDkxO~QqQl,$+~(or+rrL͈7ש{GYf5 ^g} .K;ư1TK8\#x95U|qΔ6$1MkD:}q'&KO=fTKvYDw{ HH5<=. ]5#Kl"DA#s3׫~܎o*;6ao0SJUOGI^U4S[2K~4ŝs>|~&9hk"2 +#^;D+bރ>2EY%gY .^[K+ۯ(-}fYQ-mms _'3)\aJ |rlݺn!~H$@$@A$ggƕǮ2xhC}t^ ;v}Z3XxO6˹*4tzD?ciC5R?N+ r%͋ev +_~z N~ kC6M>xp]MS?$q_tMnhUq64stf3'8:tPߏàA1cFT\EEb0{lcHH 6˶៧^D H]-g/%0-Rܲ|5$J,n{Si'{Η0Bx B2*8x*B:wz Vٲ= }K:0wq_&#opmJ[%g=KUϭtr}E=ڔS"I3]ftz?'_~U&LÇ#SLh޼9VZ=IHH -SRbYf켴͈9k<9gQ}/'tx6g_,-go% FQkuq[{tq=r$ɍ>ėŧcg.RcWx5_rfi̶Ǡs("00BS#g%kFo{:ɝ}ET*QskB ޓb#J++ޣݐOg$Ӗk/>N"\*AMpT5S 4Sk4rFN>}Yܹspx$@$@$'3`CU>~imzi~YlZ@JI7 2ʭȉ*4b|}]=jZzs=v4`uC>Bt!#?%f͐MW_XfήhY?py{SD(-`ƚ'GmRJiڞJ^FW4l /8'>uίLC#UNٞ5~gK͕,^'84iR/_tM4<ܹs $V {<]VT+zϘ=@i8jt6綷7S=>J\ș, t ߏvœzN(4|8O2*hǣoόVPW+6i٠OV̶nNޡ]CplD_IzzcC<ʤz9'@4 #gΜQ$@$@V婾%%jK u, ֜_!m'ă[I'ݞ#p*ݻ|4]V}oTNC#oԒbeŽ\l/2(.!IF-.!wU'aLIx1ϛ{zt.>'ӼW+RttGotk~x}W~8^9~%W(1P{V4G\x-{&82ΟX~S&OA$@F6a7Ujb 5Q|qDA3xld XO>&{y7-hėBՆMos5a?ϼa*MͶ[R[ 9BYySxMN;b։_PSSuyb?y6OXwaΜՒ)^}:tߡO{^!1oQL(:y$Uk*nf +&HDҊ|;3MDž[{S?2yupaWwD4~|$ HNÁ+0ԢHX %ք9̘ O^p[4k܇;ލ6v5l^ [tr_֙޵M. x p)ko[KChz9lX!V[(PKdnI{7GZku ǽ}MevR NdKG+g^jZp<4i ԮBgʱ/J5OJ){ sCr~/n}y;;96퐔)$i+.'*H49 N[.FG?T@%*O_>VQts /Tx}>UCW#W|R)we0y|.]O`wx S~6u tۊ%E񔥱2^ºG$2EDfz<4PO*lPz4K'SՅw.c?!q_Ҋ%/'GP!UU|-H*~7XzmL?D%SWZIbҞה5UeJRҷ^;eKף-b+-w hx$@$p'P=騾[6*]̜ j?꫋bBm5)$~OJ[gR%}5<zO|%QyE+UMjESDtwo@Mv/t){t:rjAN[U:4^amҟ]+5ZZ()K:}ͧ̒4g4Eho.[*9S9xSM6VT>[73Ol,X(I @U8r6eH T5/;{?(yx,ۓfIx%@L?kY}CYWw=ݗwHB HH wx'@w]e 7·->~>\_EGyplzk#>g--bQ롕:O] RR:k5ů%s~\ kpS{b6ZTgPT3֜_Fȝ4t;ǺJe<#꥿oځ{^/iO.ؖaq[ ^GgIQ'Fz'ӎJ*y.?QՅbH?p-n*0^=l*>7i#$Uu|X+PW>ħ4,If$TkڊN7Wb|HH xXS)Vה6xwOOm?,ŋp&})"_Iaح JJe͆F=XU Mu)ͭq6f|MjwXO>ldrg/86m>+ $UeS:WM5U bOt<?OaϚ+.yU\;6™#GQNg*U o4*#r֩_jgA&oS-?,5ٳԪSt33@̳zsO|V's]z"ڮ2cߕ홀UݒDGEe@7}l?csEL+)ǿ@եa/ՎEeyT{狯;.mV{ŧΫ UW2*'kɄ.<)n'QC>N> @TG+95ga3. <߄kzavͰ2jr'vz@Xφ7;N1 ˹=0̗<`?qr]O**μCejKYjv/`*oiM7"K-44:,wF!1|\a/~['|}[v3y/$M5 P­$@$@p?\}pdٵȝ V*Rtl\68am gbXU*s~#\wCh[rq+hj*f|0WIie)8S:"m7Ssnƫ^>0#9glT|#sG75ĢSKn_݅JH'p> C8\gsc=JWxOY}^XT U>#k{m<=%-^ U%gsoOs9]Gl^lP0y0BN >{Ҝ4FXt/|2?z=0L;iRs`}Ë$vo(n ,r4H#@z}x @B#GClZI޹| YY+H׆Ζ`eqZɾo\rK^s <#F5bh\g;^z%TL^O|z"}ҫ\x#=PO7xT i7،++ƛSkm3DJMqoY&snU>(ܧ'@op٤@j; ܖDsUPI-i1 \&U*f JSmg4Z#;mkvP0EQQfx^'s yHHȒֽ .^!Êy@wS4'V~~bY ]ħu[{Ucq{fI<4E:xoy7P:}"gs7 HD!Md|S=9y VZOuM0 ډ5QY=Mm|yxFgؾw1?#j$ckh{Ϸv==VSJdz=eWj:hflFTMJ+ɫ'bwOuIs^|?+cS(PGzCS 2&lL5Ǎ dzIHn!Gl53x $ws-EKcP[&!_MscjZ }}i_ϰՁ#ټ%rL# v;e5>b~ˡs7>Ixsơ_΁ielDO3 7"x]" uy|rԽt) ݶ=_JHo'2͗\Q=p x$@$@q$pH,b f~}=;Zdh ؏z ;v®~OʘQZT^`ܸxs/O)tjD\:={dM!-+7W9]y>Y·|bd;EJ*Z+i%wnmSfSUG\h|}As =/Ȗ48Wn44UOo5^S:uU,:?g/vmA&hr$@$@ hQkwJxhp7}(`eˁti~fLg޸_}`}DπoΔ&2znbvo!_~m= };074bb;ZEfXu&wk)Gv̓Q2i-/F+Uj矈c>0Uihl-$>wUl翾hv P @N$@$pcLwtRM| \aUj"ܑ@htv\ݡ"ypqg+O.@,<{d,\i7לgBV.b3H!ߩ kdޮ] ζ:A Nϕ{2>dzɇ?xPuU!%`e~c󵚿=8Bwㅵ.rDۘ8 `d  &pNӥ yS8 Y籽[_uUs26C=t;%RWi .]TO$@$a=vᶪWI~XWB{f̄3O҈X&92^@!C);z"D]T2QOK]r Z ]W V]_Ԃk~{h9*6恽d#y |y  hX5kb1b%A #  @ y:,( 2L ϋKX6]6l>{$=rDr"H9z W^6nͼ"[鋞ACn-Gnx~=?L K=}֓1wFp-Y!LUYbx'H$]|mj $<&όwL$@g( ?r`%Z|;[C{mۛږS}=bYƇF4w~ KzFyɏ;|"WrkvG:}θO*^l&}m 41of{Zo1WIJtKz}P*$򼶈`w$|-<xw@%@`?:8 UUPh4K!MKm g\MFfP?Ps8aۣq6sX[N-))UR ooZctB*C>/a9k$IrEn8GZBrBuU~ɸn3foHSE5MZ<OD@Ύp?8FzuDhp$Ӻ*G 04֝)kp[rcs@<y0  3™ e٪+K{xEeh[ v9-RFSF!_L=#c?g= Y9 S,'%ZP$]Nax!EY "7.U HNi:SX݈lF3#4[*wpdޞ)% v )upFGD$X K=UQ6zy(_DtkzI]Qɥ$p},#x> $[\'By#@u ]#ќL}NG"zBghaQPKebo'ܻX(y$*hRDZ5aj .YFnR.o#זf! % tz{EcGH,韒Ȯ໓QHnu7-BeIݞ,} 44hP3&6l%u>DJYj/OvzCoCΌ4UNXd[" $3 И  pS"py_) $ ec@ei{)O2;\=XxJb ؿO7RsK1Q~Dd> )Jr:Zy7~/LbK#Z0Pn%( N~ַ@IDATftEӳ~Q)d,5}Ȥ$&c#/Fm)8\Қ]B"bloKQ[RbW%=1;=9^#6P(M?=8Hp d  @Q[,MԵ{xvмh=1Zמ"qfρk?Z^M5R-Q<^GjuZźw))9!5iTğ^RSzkTW^+ֿ׮d'>Ý6Rj'qwy">5jKFҸ7 }/ )&6ڿ>bT;ҁ@ 0zju~#{  ەaKR3uYvj[]MHR3~Z!.o ,[ҷ֙t^ggNv&Li(bL>ChKeH~8f19kq%vRmYz?RwE,k@f6G>swv/fo J 3gs]j֬Iz]x $ٳKðEyݝ[>;>!FNrD$eXw>Z3sX&5ݺ%[j?5ਭS|z?reaEXj[N#R]{ЎIϯ]A=UkՖ\Rɽq1U-jD jTkVXZzc/)9Z5ELᇶִ$ !pwS"%ǨaR恺[/`009y2okZG3x%@@1C*0`x( mJ@vmi$%M K.hh!OS]s,acZIct%)b5E,VNCk&IkM7Vҽ g,<*> nR3dw;wӢ|oɝAZOPs`{8L4u0*bGe|S*yegVCOO)cLO%-<=ߋPWs17=w0Saݰ/a!Z!wg(@ I/Ǚ̃>(y<HH aǎF]xe=@bL^Uܸ,ʇ<L*ue)^ėz%ղ,~^cĸlccd*I+㝮bzNm1q?e%΀^}^#LL"}?%׮-ޢ*Bu?w4Ej"~ҺYʖ{8xY`KzPg?ay^DspaCy59W^3s HHeaP:M-y3{%"~H"tBZA/&z*-qD<"4>IDħ4Ξ2 kwƼ\ΆwG{3\b q}Y $uXS<׌TQaIzj(*jj&[Ħ#id)YHr$WX=jcP.SQDUS.rNB5Bk1RA?޽{C/i \/-BêVUԥgIm g4>KmED,!w"dwu)q['%l,K"7ET-50)¢YN*UKEM^'nUdrVk~oѕSsZ!/.g$~Of9"T ?TK+;@͛EM6ҼwIH *$3PHt;SŧL%c]TNSts%9,KUG:09.1<`;f{{HkġIΏ?K4-4"\Kj356kLxTݽ$@۬QX`aITGĩohh\[R:-57}ȅS{Qyڵ wuD\u̙3ѠA?f̘'r @@е?c#Zl%9-b?]סE}Zbdf[:G%-&֟䍺0hX#ú{^mtp&M6Z$iAYkw)2^ɓ0)ޘ(yq(F>[H _nNibvƯHEɓѺuk8HHH .L t,adȥD5q\Hy-|E1ݸ%*\ڲiY;'ԥ^AV&MӸ$ZfRnwֆS+CthV{ΛZ )jր=gV9XmlU,'=ŇU"f^+uW[,U`9+l>n"K7F2Ӝϯ-Z/bС~HHbC~]XuD >ÈO=ג>Z~SDv[6v̏}oC, ͵o1*&9Ru\["~ǦSoRZ`yw-K+LmRa#P ;>eRE\_k2VNv摢L}J"ZM|͜nd}ϼN%P}{îW7z7 DQ0_xqus?7KBmOK$p+ gRm?k+nx>,/&Ҙ<쓨Dl**¸ދfz'i a= cT)e<1wg-fͲ:ٳpgcĮ.a#Hw)dHlnICC}SJ !0*ubX`A@۷͛1rA$@$@7ܵ֫/Gձ PyͥP2܏u3]#GVŦ11/Ï9oFؒjZwJ\/S %" #ԉ\z6k"3^>2NKQ P=_˧-%EB9HHHPh Y4۴ZaMp;ǚ:D^5H~d ǰ$OXl]oG5꾿iXǣ3n&imd>ey~{r;Dԯ_I=zh|駲q ӧ<HHbE@\q=l)^s<KZpF Q \wm} +Xb;È~}n1w'*yAu~Ӿ\|"X!SN($  nKiΣ>ZCۿ&:QC3]($9l"-΂*<ՔnX29mzj+V bHH⛀UIz;";K  4v5r&gs._7IE|@ޒmVߌ~&Mg}>/*W[pG$@$@q&`|B燥?H|M9ɪZxT-J2]ճY"HN 0m6[qb  Sfj|YNC yo…ҥK;w.r1v99/$$)S 4 DC ]b~BFj~e˖-VL~!vڅ''4\#^wi^Pª%xjm&Tkɘ1ctHHHH"P~駨[.fΜԜP̓.SP!جY3?~܈`J@F*D5:? 6jgz*rܹ̉sM Yy    Hl" nݺ(kwZxqh>hΝ} ,Xhc @-"Es# D$chڴi1uTk*T0y >8'ߓ @bzi!Ғ%Ki&$O%KDժU&>uꔱgey    D VTOL2%իg~Mj֬oeʔn'    Ęz 3 xHHHH'@=%   g ӑ DO4z>K$@$@$@$(@(#   h|HHHH P3PNG$@$@$@$=8 c͚58w'|1i$,X0f'    X }UVH:5ʕ+ 6o߾ӧ.\RJH*=_ @D1 +WiӦزe nZr$kѳgψs= DI ^sM.x 3Y˖-6mZo(/$@$@$@$@$%ct֭(UT|FUTk׮HHHHQ(P .ѣG#M7 $$r労HHHHH 4hܹsqի<4*믿⣏>B۶m4i@ss D"SCFl ڵ35o#G4)7 @Tbzbٲel2,_D?5y%Jj^n'    b%@L۶EGA$@$@$@$@K VtѢE1bԐ^}A#r ;0) ̢ bV"*b¸kwI" &T̊k\uQ1oBP $wv0 3C{gnݺu뭞o=\@< YfYv< Ӂ?1m @ @^ꫯրdM2A @M oPr4Lr3! @@yGaӦMC @(Sv|n ]vJW_}u>; @F ' hF,TB d Wm־ R@ U'4RHqꫯgyUB @\%@ bw8uTk֬i^zr]F @tv饗d0`թSǴ:҅^h{.$!@ Pny{gM4{jժe=uѴ*ҵ^s~ !@J@^ZÆ VC;w)0I姟~rk @@>yh˖-?7xh׷6mآE_~>|> @HțI+!wqvIwqmvYgtСC@ |Z@.]jv7f͚e]t$5h 58@ ^:X}̽!/ 5d=zM2V\!@r8l2;m_n۷OI)&NlIʱܑ… O>zAQTԾrs16l@ zYvlu(w_vڴnچ陪v/Kg\z!},< 46tSO?vظ @B TcS~}6l~2aw}v؈#ufZ?g @`hM~WSt3K @@1TǏoZ@ Pj-\Č  @ 4 @UN-zmo}C @'PtQnu-={f;L= @@1@x0/_n&L7on뮻*׿ګj?xvynQ @Yj|z,u7׿lvN;?zg=*DVK/I&/buŋWC P*e?uY6Fmd[6洺?ۻO??&N2dH?}Ek?|olƌ>F-*A} '`z=#F h=Jъ+l 7/E9ssα:Ȟ}պ<`GqDf͚lϚo7xUW]e~kϟo|/ڰaCWmڴ1j+MS @(.z^W\C⋞%`w'q'pB߶][ozZtرGzCM7Ec}MkY=#ommִiSҥOxvgUT馛Rc '-gTj[g{3!b{[顇jVjժABK.Dx,5VMb^4iv뭷aÆֵGaR @OZϨB#l}QGUFĩc >~VVɓ'ZkOSeUVvѣߓ;swONl+iܹ}6 @(a*!Mֲe˼-ߐj_^=޽G4ȏGyOkw:!@(|5|M얕O= EEyKj0`͛7/Gܜ{\*HS>ue|2n-Z}Gu mѦ);̃P9s WI{.uĬR0}%wvx\d+G4;辭\E= @H DS*@ DCR);z!j낕0QW`/eQQߌByzq`!f{m7BڣoN*vkp 7D['MQXJ5 M`B~(XHK.$dM;3&}#D(Xw},!*) E >W46  zqew"5h ={vF^{=Q&)S:O` o?9AG5X)S'as5aH" S 0$q7>(뮫DZrJ["[oB|Le֬Y>BШq9!SA*2hTt+{QHXrq\l/3 Qv6|h-^uo#17eHm%UN]j;DG!![CHNTxR+y뮻.чw~I4(j&y'|2nx]`A\ iC|EA]`KH|YIkzK?Q @ ATך*J I/qckrbԩSS'羘Z讻0e~}KmfJ?uT?\~jj[mGW !VҸ{QDz4}ڨQCֵ\IE>A<]5jW_|3SN,þ|d>|9]k9Og*p}'c=nAf|Χ 'V*Њ>^xa3RKZJ+ zI:&sa*\, Sy-@%$c*}%=u-K.*ZըO>֫W/ ]ӽK)SO=e_)ɼlzQQϞ=]|UVt{49LETZU//*S?A jyMCeLkZQd4}(OkMhz=DvGY[# /Z{]FAEAy}:֎Bz~U  <}_p~FS!A,bVQRDQ[ Mi'pF Z',M KFaiyAzR&;A\ԇ^0  @ <{QݞHkZV>4*OQ#-{)kbSrIKijZA5}ݪU+"s1駟z%%O/jd_SVP3ğ(ŒҮ]; APJrP*&7~n ˥R/Ek+˪Bhe}ܒ>,fB>e唫AOP_m{Ww}wJ=; TbWkja5vYx(0V([Q4"[ģCyt0R&Ȱg$KhrifZG%裏kz zR$,OO}|ZOW8)LGa4Q,p\ZnP˪*]_md-XHS2(E+ITRau "DsM6,o~J(!-I홊|ۨnݺ^is95۵Ԧwd}e-L{RK+XIk+xHAHhOT CĻ*N/ܭ84-ݩ{}F^AS>kPYiU3*_MQ &ZڗV!7'1a-x:yidjQ@@QIlUZ@SIe틋ru)fXyʇuo" l+ESlEL9=OYGY/SZʊ)Ir*h)ms-(LV>%LG龰JD[Ke n Kz龥S n=RSKW(hyB{G>&!@N h5v @җRtJ-LrVOM*;J"S )]KeKs =K}zj-2~xO$?ΰ*G3=]kիEEKm4hy[/_PYWF v 顶rK_|IC[v>J%?Z@ PRsT`$M}Ko䑠SPOs!qy] Hqh31h}޻5'ҽiJ@0ұEuJڏ4S0cZ45%6u$`uO ~ )}hG.X;RN5n)}?b8>9DG7twyQƇ`B~b @@@[V nQ,LoxN.$$LUb_)”G!\?ݻ{${1ݻ*J_$k"Տ/L8я'׉$2D;,*!ɯmـ ;ǍhP5P\cƌuC2wӊC:)ܟS90y|*! >i2e`B~7*HL/ ӫ}?}mux>RByO*Gkc TN*r^qyShzKbؕ GUWiDc=,$w_V*2 h5`uMe '|n` D 1M;a%!_SA5J$Q!-Qd炮wމ1(H:&A !5Hx 7LEҏ _ P_9ETIP+)ՒTUR 8 6y?3|yͰ~ V!IX*9\IAY ,9c!Ւ&B PU <[c)xM˟1.J&/#-^!ݗqj*&(W]c*$16H ǔzS;EADF'M/!ޗtZ3.o4( kGyai/ >` `9a]w׶~jOC H~վ( DTH! h-7&OE%ZQn2يwJV)t%T?IV 2{چh[WSFOw_q /xRN;MkZ[ZM+gĞ5w̘]Y0[ܗ|8{١Cw|SjlҴV7s?UCkk]wJ뮅tO-@"[oEpC @ Z3^1DNhZG\)k<0M?  Ȑº\=DUQ)+,Jn!*CJ2%>J1 SW^ֲx2V[lQOw7H T#Zr՘K2'1V{R*&EtKiHJ _%Jd^Jm-ٵkWӺ%x%M'N,E[Zj^Xj4(]KcJ<+K/O\MkitkOe0`@[܅@KR @5ڤ Z h>q'矷RMI'SZ']يrH|4^4M|LLMTB/b̥͟wWҺP5k+#)]KpK.)3F@2JMROQ׌KAFM65Qƫ( U+6߿Ƙ @,V,wM6)ScWz$Mg+ T7hu)SNL'NxJ@ PUIw sfr T7huu-~S @@u@V7j뮻LA^}UkҤI ~| @(>E'@ #FXݺu3cG}zꕱ 4IDAT @k@M;N?S^{m;3^Z3T* @@VE'@: 5jTk6ĻB ?SL"t֥KTZk3f̰^|E4= @s=O>?8 _=W^;1* @,µN;ym̙&+l뭷mS2Y\ @ (Ĩ~( @AhU hvd;wnv4 @H%Ptt} &E;ڰaò\#@^{eZ@ N4:udѣGgMf;^z'_Q kq@ 4E'@9{M+W+2؅ @(dE^0eӧ׾B @4nݬUVnx @(|E+@k׮mGqDf @)r >؁ @(*5FN4Zhacǎ-`!@@1tٲe6~xSx  @@1p32@ ddlC T9#@rKݻmU @ T@ѦaJ-z^> @@1p @@fC5 @@@V Wz @B  @UCZ5\ @ h0TC T hpW@ ,YP @ P5UÕ^!@@fC5 @@1KqV Oѣ7p_~GyĚ4i(U" FӧOQ͘17nUim֬Y;6w/Fg϶FhUڇVXasεvءA˗ۗ_~io}V}HaѢE֩Sj`j\ZAzC ~Y5+W>KL@>Qf<^ѤILj$Lb˖-܀Z6m-]YZ/Y$K 94sL>LͳK/ZhQVroP*OA@_J>6lȇYw… ݲwhUڇϟo-[tLi~|.*#SNm^ r s6~Jرck׮6f̘t&` @ @1W @H"M& @@@V=c@ DM@ 'z\ @ 4  @UOZ @@'O?v}\]Gk#(q&ƫEUתk^+zx%]wݵ].5^+zJH͚5h5<_XӦMkV~y+E;Z)7@ S| @q5rC @ @ 0:@ P @k# @@a@at @@ָG A &- @@#q @MZχA GZ)7@ WzK:i_Wx+TmʕYO}eQVx%rZ+ҶzkSv7?rGy?p[wum#G/|6l2xbu]m]vI9s˴y>}?3e)=]s5;[ cǎߖ22>|՗z>}KeP3ƺtbl5nSM)TdcwٯOίlU .{9GyoO2Y?7|;<8p <^;c޳=أ*U}?Vv27nM8ѺuflAN;.\x} rv?k_j;_v=:c\r}GYZJ G}wlɒ%vgqSKmGªCawl^xZֽ{AeIo`&T F/ꪫRЩSh7O^cLkEYgRWSw{𶌚6mկ_̭QQ)s*èG`ѓ?K /*O>??3 &LpVA'J}㨣,u )?dȐh֋5k>x0Dna ,^zEaV+ZlY&ocTeϞ)$1^ٛݪܷ,<={}gv'&7N8-)5tgo{2Z`{IǗLjQ'駟z^{寥)K[=5,_P7;o~__ٻz%Nw7|c|A&ocTeL[)} >>E)SUfk믿RlժMX$l7n# >JzwSZUCN:ʶ.?6l"4>䪒/}"NIPo*Y&qw߿{'V2R(wY{L:7o']NPM7__r6hh1~.'`k7ѯ>*QԩS-Lz۴i3zlO5fѢEx~WAyQ5j|v 7 TqUJ.?~*ǨԿ˒?!%X9s؊+=.Ë+%n0`G^*ơ0t WE$G[ ~N-2*QC ֦(HVY%6% T  P$XGA!EA5&Ve_. CΝ;'>ǕQAV޽{SO=*ݕL[ҿǒv]vw~ elFe>ڵs뜦s"ԳgO|*-S\j+LUw}_k¯|ݣ)q;}9v4iR\UeTJ|w퉡J֭3>';vl\Ur[ne%S>omU~{L6"lv衇zdW*3%#>K,3cDuJF;k[sРA^I>E*O.Q>̚GQokzY%!3@d^RTQ)J &5BTɩbRj?+/Or3}(1l̙6w\;Rp3&}ޤ\%s(3nkҤI%\(r=x+@'߲v޼yO9ޣ0ERW ;_?VC4OYJ}Y2=HxńT!nJC=g}6v⿧#F$n;:'|2QWJ'$! >N;-QWfʚ)@Fa9榛nL?4QW {|T@*"R㸘g}OT'[c_Q|a$חBlܩO?t2 }RCec(v/I* AlQ#{(dHm\b{s HS!& . %%~(kD!&:SuFǏk&q%j:Z`AVB6(l.EovmL_%6U_j+MXRS fWaJ̿ b5!֥X1}n 0*:1eIttA .ZJFO_gwpH6(lJHMY-R,-#~}fxP jOL@Jj53Ef>jy>oEvϘ1D{GߒRȃ|D)q/o.Q3Ke!t) @(a* @@ @V!@@!@S`  @ @Kas @ @ )0@ PB%U@ P @(!z* @(Bx  h =ln h!<@J6 @ c @%DZB[ @@ZO1@ "-ͭB B - @@ @V!@@!@S`  @ @Kas @ @ )0@ PB%U@ P @(!z* @(Bx *n/ؖ.]g^˗ʕ+WΆJ?7&/wm˖-+1L8`r-O>v[lawqpBzm]v姟~c.jժ?ݻw1>}K/%6,QGg@6ld w j5.?kK/{$/r믿7N4i ثW/2e=CV^=?;۸q:to6ڎ?x?cIg@6ld 5o񪫮:uZkeW^y|&ұcG;MO>$hر^׭[7;uֵ͛oKߐyw+V6+@y @C6@xgG>֭[c=va؎;hgy-X 1~{MSmH% ŋeQFfo[/}wc~m6lh'pM:5qzϞ=sIk^}Ǿ;t|>͛8o/Qw]wٺk{EQdVƍo߾vׯ=v@fE@ nֶm[^h{キ־}{ +b]tI܊g}[%;88p 4Hɏqvg}bp}MlHЪ7xnF2dFsܹ~,*O.=z)(=i4h`{lVn 8m̘1Υ~NZso묳N6 PnÖ@( `W_}6{@/ererQN3c C>8#Ѹ*~^gz9sҫ+N:4~-R `MV2R@ @+"]@C@FY%$HRqe0[tMmvN?ӦMvZz%? ~i|WH~RezUb:LǻۚIS@ @ a0@`B2^54)gH'=."床e wN9T~y4qB+ˬ3m y@%d~\%!J P # 4U{A*I!eUetAHi!P)y}-riмKl̙K׸ofo#͇~ϋߣ[e5Udٳ] Yy}׮]VU+r!9p(lvs==gR))bNIӕTKF_<$4bo*j-T (M7 D\ȑ#-D[n$"[6v[~*-KW_~g$k׮NAWZ),lA@ PKa8S  t,qQT*Ejy:E._hth l%՟r]@+U4?+WiԿG*%rV v  \~Պc Ei:%S/=>MS7Ti6iҤ3;X%wه 4!CH"!9|RMM