tsne/0000755000175100001440000000000012742222450011241 5ustar hornikuserstsne/NAMESPACE0000644000175100001440000000010412736526705012470 0ustar hornikusersexportPattern("^[[:alpha:]]+") importFrom("stats", "dist", "rnorm") tsne/NEWS0000644000175100001440000000057112736246723011757 0ustar hornikusers* version 0.1 - Initial release. main tsne function included. * version 0.1-1 - fixed misc. documentation omissions. * version 0.1-2 - using an initial configuration parameter automatically places the tsne in the 'late stage' mode, which is mainly focused on small scale adjustments. * version 0.1-3 - fix a broken gain equation, and made distance matrix checks more reliable tsne/R/0000755000175100001440000000000012736247440011453 5ustar hornikuserstsne/R/tsne.R0000644000175100001440000000407212736247160012551 0ustar hornikuserstsne <- function(X, initial_config = NULL, k=2, initial_dims=30, perplexity=30, max_iter = 1000, min_cost=0, epoch_callback=NULL,whiten=TRUE, epoch=100 ){ if ('dist' %in% class(X)) { n = attr(X,'Size') } else { X = as.matrix(X) X = X - min(X) X = X/max(X) initial_dims = min(initial_dims,ncol(X)) if (whiten) X<-.whiten(as.matrix(X),n.comp=initial_dims) n = nrow(X) } momentum = .5 final_momentum = .8 mom_switch_iter = 250 epsilon = 500 min_gain = .01 initial_P_gain = 4 eps = 2^(-52) # typical machine precision if (!is.null(initial_config) && is.matrix(initial_config)) { if (nrow(initial_config) != n | ncol(initial_config) != k){ stop('initial_config argument does not match necessary configuration for X') } ydata = initial_config initial_P_gain = 1 } else { ydata = matrix(rnorm(k * n),n) } P = .x2p(X,perplexity, 1e-5)$P P = .5 * (P + t(P)) P[P < eps]<-eps P = P/sum(P) P = P * initial_P_gain grads = matrix(0,nrow(ydata),ncol(ydata)) incs = matrix(0,nrow(ydata),ncol(ydata)) gains = matrix(1,nrow(ydata),ncol(ydata)) for (iter in 1:max_iter){ if (iter %% epoch == 0) { # epoch cost = sum(apply(P * log((P+eps)/(Q+eps)),1,sum)) message("Epoch: Iteration #",iter," error is: ",cost) if (cost < min_cost) break if (!is.null(epoch_callback)) epoch_callback(ydata) } sum_ydata = apply(ydata^2, 1, sum) num = 1/(1 + sum_ydata + sweep(-2 * ydata %*% t(ydata),2, -t(sum_ydata))) diag(num)=0 Q = num / sum(num) if (any(is.nan(num))) message ('NaN in grad. descent') Q[Q < eps] = eps stiffnesses = 4 * (P-Q) * num for (i in 1:n){ grads[i,] = apply(sweep(-ydata, 2, -ydata[i,]) * stiffnesses[,i],2,sum) } gains = ((gains + .2) * abs(sign(grads) != sign(incs)) + gains * .8 * abs(sign(grads) == sign(incs))) gains[gains < min_gain] = min_gain incs = momentum * incs - epsilon * (gains * grads) ydata = ydata + incs ydata = sweep(ydata,2,apply(ydata,2,mean)) if (iter == mom_switch_iter) momentum = final_momentum if (iter == 100 && is.null(initial_config)) P = P/4 } ydata } tsne/R/tsne-internal.R0000644000175100001440000000322612736246377014374 0ustar hornikusers.Hbeta <- function(D, beta){ P = exp(-D * beta) sumP = sum(P) if (sumP == 0){ H = 0 P = D * 0 } else { H = log(sumP) + beta * sum(D %*% P) /sumP P = P/sumP } r = {} r$H = H r$P = P r } .x2p <- function(X,perplexity = 15,tol = 1e-5){ if (class(X) == 'dist') { D = X n = attr(D,'Size') } else{ D = dist(X) n = attr(D,'Size') } D = as.matrix(D) P = matrix(0, n, n ) beta = rep(1, n) logU = log(perplexity) for (i in 1:n){ betamin = -Inf betamax = Inf Di = D[i, -i] hbeta = .Hbeta(Di, beta[i]) H = hbeta$H; thisP = hbeta$P Hdiff = H - logU; tries = 0; while(abs(Hdiff) > tol && tries < 50){ if (Hdiff > 0){ betamin = beta[i] if (is.infinite(betamax)) beta[i] = beta[i] * 2 else beta[i] = (beta[i] + betamax)/2 } else{ betamax = beta[i] if (is.infinite(betamin)) beta[i] = beta[i]/ 2 else beta[i] = ( beta[i] + betamin) / 2 } hbeta = .Hbeta(Di, beta[i]) H = hbeta$H thisP = hbeta$P Hdiff = H - logU tries = tries + 1 } P[i,-i] = thisP } r = {} r$P = P r$beta = beta sigma = sqrt(1/beta) message('sigma summary: ', paste(names(summary(sigma)),':',summary(sigma),'|',collapse='')) r } .whiten <- function(X, row.norm=FALSE, verbose=FALSE, n.comp=ncol(X)) { n.comp; # forces an eval/save of n.comp if (verbose) message("Centering") n = nrow(X) p = ncol(X) X <- scale(X, scale = FALSE) X <- if (row.norm) t(scale(X, scale = row.norm)) else t(X) if (verbose) message("Whitening") V <- X %*% t(X)/n s <- La.svd(V) D <- diag(c(1/sqrt(s$d))) K <- D %*% t(s$u) K <- matrix(K[1:n.comp, ], n.comp, p) X = t(K %*% X) X } tsne/MD50000644000175100001440000000050412742222450011550 0ustar hornikusers89ec2b9a3dc063fc9141420faf288338 *DESCRIPTION d65aaf552fc71192a426c3b41f3b0c48 *NAMESPACE a63efcd4c418f6199317015c874204de *NEWS 5b83328297146a191456b5e2299c5123 *R/tsne-internal.R a444f7bd180097f3bb7f3b1b61d77d43 *R/tsne.R bb49103a06187a42f001e2671f2072f7 *man/tsne-package.Rd b085bd2cdbbc2f88aab4172f4312be83 *man/tsne.Rd tsne/DESCRIPTION0000644000175100001440000000102612742222450012746 0ustar hornikusersPackage: tsne Type: Package Title: T-Distributed Stochastic Neighbor Embedding for R (t-SNE) Version: 0.1-3 Date: 2016-06-04 Author: Justin Donaldson Maintainer: Justin Donaldson Description: A "pure R" implementation of the t-SNE algorithm. License: GPL LazyLoad: yes NeedsCompilation: no URL: https://github.com/jdonaldson/rtsne/ BugReports: https://github.com/jdonaldson/rtsne/issues Packaged: 2016-07-15 15:40:42 UTC; jdonaldson Repository: CRAN Date/Publication: 2016-07-15 20:02:16 tsne/man/0000755000175100001440000000000012736527345012032 5ustar hornikuserstsne/man/tsne-package.Rd0000644000175100001440000000215512736361373014663 0ustar hornikusers\name{tsne-package} \Rdversion{1.1} \alias{tsne-package} \docType{package} \title{The tsne-package for multidimensional scaling} \description{ This package contains one function called \link[tsne]{tsne} which contains all the functionality. } \details{ \tabular{ll}{ Package: \tab tsne\cr Type: \tab Package\cr Version: \tab 0.1\cr Date: \tab 2010-02-19\cr License: \tab GPL\cr LazyLoad: \tab yes\cr } } \author{ Justin Donaldson https://github.com/jdonaldson/rtsne Maintainer: Justin Donaldson (jdonaldson@gmail.com) } \references{ L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. \emph{Journal of Machine Learning Research} 9 (Nov) : 2579-2605, 2008. L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In \emph{Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics} (AISTATS), JMLR W&CP 5:384-391, 2009. } \keyword{ package } % \seealso{ % ~~ Optional links to other man pages, e.g. ~~ % ~~ \code{\link[:-package]{}} ~~ % } % \examples{ % ~~ simple examples of the most important functions ~~ % } tsne/man/tsne.Rd0000644000175100001440000000601512736527345013274 0ustar hornikusers\name{tsne} \Rdversion{1.1} \alias{tsne} %- Also NEED an '\alias' for EACH other topic documented here. \title{ The t-SNE method for dimensionality reduction } \description{ Provides a simple function interface for specifying t-SNE dimensionality reduction on R matrices or "dist" objects. } \usage{ tsne(X, initial_config = NULL, k = 2, initial_dims = 30, perplexity = 30, max_iter = 1000, min_cost = 0, epoch_callback = NULL, whiten = TRUE, epoch=100) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{ The R matrix or "dist" object } \item{initial_config}{ an argument providing a matrix specifying the initial embedding for X. See Details. } \item{k}{ the dimension of the resulting embedding. } \item{initial_dims}{ The number of dimensions to use in reduction method. } \item{perplexity}{ Perplexity parameter. (optimal number of neighbors) } \item{max_iter}{ Maximum number of iterations to perform. } \item{min_cost}{ The minimum cost value (error) to halt iteration. } \item{epoch_callback}{ A callback function used after each epoch (an epoch here means a set number of iterations) } \item{whiten}{ A boolean value indicating whether the matrix data should be whitened. } \item{epoch}{ The number of iterations in between update messages. } } %%\details{ %% ~~ If necessary, more details than the description above ~~ %%} \value{ An R object containing a \emph{ydata} embedding matrix, as well as a the matrix of probabilities \emph{P} } \details{ When the initial_config argument is specified, the algorithm will automatically enter the \emph{final momentum} stage. This stage has less large scale adjustment to the embedding, and is intended for small scale tweaking of positioning. This can greatly speed up the generation of embeddings for various similar X datasets, while also preserving overall embedding orientation. } \references{ L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. \emph{Journal of Machine Learning Research} 9 (Nov) : 2579-2605, 2008. L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In \emph{Proceedings of the Twelfth International Conference on Artificial Intelligence and Statistics} (AISTATS), JMLR W&CP 5:384-391, 2009. } \author{ Justin Donaldson (jdonaldson@gmail.com) } %%\note{ %% ~~further notes~~ %%} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \link{dist} } \examples{\dontrun{ colors = rainbow(length(unique(iris$Species))) names(colors) = unique(iris$Species) ecb = function(x,y){ plot(x,t='n'); text(x,labels=iris$Species, col=colors[iris$Species]) } tsne_iris = tsne(iris[,1:4], epoch_callback = ecb, perplexity=50) # compare to PCA dev.new() pca_iris = princomp(iris[,1:4])$scores[,1:2] plot(pca_iris, t='n') text(pca_iris, labels=iris$Species,col=colors[iris$Species]) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. % \keyword{ ~kwd1 } % \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line