tsne/0000755000175100001440000000000014220264502011236 5ustar hornikuserstsne/NAMESPACE0000644000175100001440000000010412736526705012470 0ustar hornikusersexportPattern("^[[:alpha:]]+") importFrom("stats", "dist", "rnorm") tsne/man/0000755000175100001440000000000012736527345012032 5ustar hornikuserstsne/man/tsne.Rd0000644000175100001440000000601512736527345013274 0ustar hornikusers\name{tsne} \Rdversion{1.1} \alias{tsne} %- Also NEED an '\alias' for EACH other topic documented here. \title{ The t-SNE method for dimensionality reduction } \description{ Provides a simple function interface for specifying t-SNE dimensionality reduction on R matrices or "dist" objects. } \usage{ tsne(X, initial_config = NULL, k = 2, initial_dims = 30, perplexity = 30, max_iter = 1000, min_cost = 0, epoch_callback = NULL, whiten = TRUE, epoch=100) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{ The R matrix or "dist" object } \item{initial_config}{ an argument providing a matrix specifying the initial embedding for X. See Details. } \item{k}{ the dimension of the resulting embedding. } \item{initial_dims}{ The number of dimensions to use in reduction method. } \item{perplexity}{ Perplexity parameter. (optimal number of neighbors) } \item{max_iter}{ Maximum number of iterations to perform. } \item{min_cost}{ The minimum cost value (error) to halt iteration. } \item{epoch_callback}{ A callback function used after each epoch (an epoch here means a set number of iterations) } \item{whiten}{ A boolean value indicating whether the matrix data should be whitened. } \item{epoch}{ The number of iterations in between update messages. } } %%\details{ %% ~~ If necessary, more details than the description above ~~ %%} \value{ An R object containing a \emph{ydata} embedding matrix, as well as a the matrix of probabilities \emph{P} } \details{ When the initial_config argument is specified, the algorithm will automatically enter the \emph{final momentum} stage. This stage has less large scale adjustment to the embedding, and is intended for small scale tweaking of positioning. This can greatly speed up the generation of embeddings for various similar X datasets, while also preserving overall embedding orientation. } \references{ L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. \emph{Journal of Machine Learning Research} 9 (Nov) : 2579-2605, 2008. L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In \emph{Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics} (AISTATS), JMLR W&CP 5:384-391, 2009. } \author{ Justin Donaldson (jdonaldson@gmail.com) } %%\note{ %% ~~further notes~~ %%} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \link{dist} } \examples{\dontrun{ colors = rainbow(length(unique(iris$Species))) names(colors) = unique(iris$Species) ecb = function(x,y){ plot(x,t='n'); text(x,labels=iris$Species, col=colors[iris$Species]) } tsne_iris = tsne(iris[,1:4], epoch_callback = ecb, perplexity=50) # compare to PCA dev.new() pca_iris = princomp(iris[,1:4])$scores[,1:2] plot(pca_iris, t='n') text(pca_iris, labels=iris$Species,col=colors[iris$Species]) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. % \keyword{ ~kwd1 } % \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line tsne/man/tsne-package.Rd0000644000175100001440000000215512736361373014663 0ustar hornikusers\name{tsne-package} \Rdversion{1.1} \alias{tsne-package} \docType{package} \title{The tsne-package for multidimensional scaling} \description{ This package contains one function called \link[tsne]{tsne} which contains all the functionality. } \details{ \tabular{ll}{ Package: \tab tsne\cr Type: \tab Package\cr Version: \tab 0.1\cr Date: \tab 2010-02-19\cr License: \tab GPL\cr LazyLoad: \tab yes\cr } } \author{ Justin Donaldson https://github.com/jdonaldson/rtsne Maintainer: Justin Donaldson (jdonaldson@gmail.com) } \references{ L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. \emph{Journal of Machine Learning Research} 9 (Nov) : 2579-2605, 2008. L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In \emph{Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics} (AISTATS), JMLR W&CP 5:384-391, 2009. } \keyword{ package } % \seealso{ % ~~ Optional links to other man pages, e.g. ~~ % ~~ \code{\link[:-package]{}} ~~ % } % \examples{ % ~~ simple examples of the most important functions ~~ % } tsne/DESCRIPTION0000644000175100001440000000103014220264502012736 0ustar hornikusersPackage: tsne Type: Package Title: T-Distributed Stochastic Neighbor Embedding for R (t-SNE) Version: 0.1-3.1 Date: 2016-06-04 Author: Justin Donaldson Maintainer: Justin Donaldson Description: A "pure R" implementation of the t-SNE algorithm. License: GPL LazyLoad: yes NeedsCompilation: no URL: https://github.com/jdonaldson/rtsne/ BugReports: https://github.com/jdonaldson/rtsne/issues Packaged: 2022-03-28 07:38:37 UTC; hornik Repository: CRAN Date/Publication: 2022-03-28 07:52:34 UTC tsne/NEWS0000644000175100001440000000057112736246723011757 0ustar hornikusers* version 0.1 - Initial release. main tsne function included. * version 0.1-1 - fixed misc. documentation omissions. * version 0.1-2 - using an initial configuration parameter automatically places the tsne in the 'late stage' mode, which is mainly focused on small scale adjustments. * version 0.1-3 - fix a broken gain equation, and made distance matrix checks more reliable tsne/R/0000755000175100001440000000000014220262764011447 5ustar hornikuserstsne/R/tsne.R0000644000175100001440000000407212736247160012551 0ustar hornikuserstsne <- function(X, initial_config = NULL, k=2, initial_dims=30, perplexity=30, max_iter = 1000, min_cost=0, epoch_callback=NULL,whiten=TRUE, epoch=100 ){ if ('dist' %in% class(X)) { n = attr(X,'Size') } else { X = as.matrix(X) X = X - min(X) X = X/max(X) initial_dims = min(initial_dims,ncol(X)) if (whiten) X<-.whiten(as.matrix(X),n.comp=initial_dims) n = nrow(X) } momentum = .5 final_momentum = .8 mom_switch_iter = 250 epsilon = 500 min_gain = .01 initial_P_gain = 4 eps = 2^(-52) # typical machine precision if (!is.null(initial_config) && is.matrix(initial_config)) { if (nrow(initial_config) != n | ncol(initial_config) != k){ stop('initial_config argument does not match necessary configuration for X') } ydata = initial_config initial_P_gain = 1 } else { ydata = matrix(rnorm(k * n),n) } P = .x2p(X,perplexity, 1e-5)$P P = .5 * (P + t(P)) P[P < eps]<-eps P = P/sum(P) P = P * initial_P_gain grads = matrix(0,nrow(ydata),ncol(ydata)) incs = matrix(0,nrow(ydata),ncol(ydata)) gains = matrix(1,nrow(ydata),ncol(ydata)) for (iter in 1:max_iter){ if (iter %% epoch == 0) { # epoch cost = sum(apply(P * log((P+eps)/(Q+eps)),1,sum)) message("Epoch: Iteration #",iter," error is: ",cost) if (cost < min_cost) break if (!is.null(epoch_callback)) epoch_callback(ydata) } sum_ydata = apply(ydata^2, 1, sum) num = 1/(1 + sum_ydata + sweep(-2 * ydata %*% t(ydata),2, -t(sum_ydata))) diag(num)=0 Q = num / sum(num) if (any(is.nan(num))) message ('NaN in grad. descent') Q[Q < eps] = eps stiffnesses = 4 * (P-Q) * num for (i in 1:n){ grads[i,] = apply(sweep(-ydata, 2, -ydata[i,]) * stiffnesses[,i],2,sum) } gains = ((gains + .2) * abs(sign(grads) != sign(incs)) + gains * .8 * abs(sign(grads) == sign(incs))) gains[gains < min_gain] = min_gain incs = momentum * incs - epsilon * (gains * grads) ydata = ydata + incs ydata = sweep(ydata,2,apply(ydata,2,mean)) if (iter == mom_switch_iter) momentum = final_momentum if (iter == 100 && is.null(initial_config)) P = P/4 } ydata } tsne/R/tsne-internal.R0000644000175100001440000000322714220262764014361 0ustar hornikusers.Hbeta <- function(D, beta){ P = exp(-D * beta) sumP = sum(P) if (sumP == 0){ H = 0 P = D * 0 } else { H = log(sumP) + beta * sum(D %*% P) /sumP P = P/sumP } r = {} r$H = H r$P = P r } .x2p <- function(X,perplexity = 15,tol = 1e-5){ if (inherits(X, 'dist')) { D = X n = attr(D,'Size') } else{ D = dist(X) n = attr(D,'Size') } D = as.matrix(D) P = matrix(0, n, n ) beta = rep(1, n) logU = log(perplexity) for (i in 1:n){ betamin = -Inf betamax = Inf Di = D[i, -i] hbeta = .Hbeta(Di, beta[i]) H = hbeta$H; thisP = hbeta$P Hdiff = H - logU; tries = 0; while(abs(Hdiff) > tol && tries < 50){ if (Hdiff > 0){ betamin = beta[i] if (is.infinite(betamax)) beta[i] = beta[i] * 2 else beta[i] = (beta[i] + betamax)/2 } else{ betamax = beta[i] if (is.infinite(betamin)) beta[i] = beta[i]/ 2 else beta[i] = ( beta[i] + betamin) / 2 } hbeta = .Hbeta(Di, beta[i]) H = hbeta$H thisP = hbeta$P Hdiff = H - logU tries = tries + 1 } P[i,-i] = thisP } r = {} r$P = P r$beta = beta sigma = sqrt(1/beta) message('sigma summary: ', paste(names(summary(sigma)),':',summary(sigma),'|',collapse='')) r } .whiten <- function(X, row.norm=FALSE, verbose=FALSE, n.comp=ncol(X)) { n.comp; # forces an eval/save of n.comp if (verbose) message("Centering") n = nrow(X) p = ncol(X) X <- scale(X, scale = FALSE) X <- if (row.norm) t(scale(X, scale = row.norm)) else t(X) if (verbose) message("Whitening") V <- X %*% t(X)/n s <- La.svd(V) D <- diag(c(1/sqrt(s$d))) K <- D %*% t(s$u) K <- matrix(K[1:n.comp, ], n.comp, p) X = t(K %*% X) X } tsne/MD50000644000175100001440000000050414220264502011545 0ustar hornikusersca7758c0501e9b335a033577f1e16f79 *DESCRIPTION d65aaf552fc71192a426c3b41f3b0c48 *NAMESPACE a63efcd4c418f6199317015c874204de *NEWS 196be737f5d3047faddf645c4ea0a298 *R/tsne-internal.R a444f7bd180097f3bb7f3b1b61d77d43 *R/tsne.R bb49103a06187a42f001e2671f2072f7 *man/tsne-package.Rd b085bd2cdbbc2f88aab4172f4312be83 *man/tsne.Rd