dimRed/0000755000176200001440000000000013102645305011455 5ustar liggesusersdimRed/tests/0000755000176200001440000000000013102640400012606 5ustar liggesusersdimRed/tests/testthat.R0000644000176200001440000000007013024276154014604 0ustar liggesuserslibrary(testthat) library(dimRed) test_check("dimRed") dimRed/tests/testthat/0000755000176200001440000000000013102645305014457 5ustar liggesusersdimRed/tests/testthat/test_drr.R0000644000176200001440000000041213024273620016425 0ustar liggesusers context("drr") test_that("drr forward and backward passes", { spiral <- loadDataSet("Helix", n = 500) drr.spiral <- embed(spiral, "DRR", ndim = 3) dsa <- drr.spiral@apply(spiral) dsi <- drr.spiral@inverse(dsa) expect_equal(dsi, spiral) }) dimRed/tests/testthat/test_dimRedData.R0000644000176200001440000000232013037635505017644 0ustar liggesusers context("the dimRedData class") test_that("constructor", { expect_equal(dimRedData(), new("dimRedData", data = matrix(numeric(0), nrow = 0, ncol = 0), meta = data.frame())) expect_error(dimRedData(iris)) expect_s4_class(dimRedData(iris[, 1:4], iris[, 5]), "dimRedData") expect_s4_class(dimRedData(iris[, 1:4]), "dimRedData") expect_error(dimRedData(iris)) }) test_that("conversion functions", { expect_equal(as(iris[, 1:4], "dimRedData"), dimRedData(iris[, 1:4])) expect_error(as(iris, "dimRedData")) expect_equal(as(loadDataSet("Iris"), "data.frame"), as.data.frame(loadDataSet("Iris"))) expect_equivalent(as.dimRedData(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, iris), loadDataSet("Iris")) }) test_that("misc functions", { Iris <- loadDataSet("Iris") expect_equal(getData(Iris), Iris@data) expect_equal(getMeta(Iris), Iris@meta) expect_equal(nrow(Iris), 150) expect_equal(Iris[1:4], Iris[1:4, ]) expect_equal(Iris[1:4], Iris[c(rep(TRUE, 4), rep(FALSE, 146))]) expect_equal(Iris[1:4], Iris[c(rep(TRUE, 4), rep(FALSE, 146)), ]) }) dimRed/tests/testthat/test_dimRedResult.R0000644000176200001440000000055213042052045020242 0ustar liggesusers context("dimRedResult-class") test_that("predict/inverse methods", { dat <- loadDataSet("Iris") emb <- embed(dat, "PCA", ndim = 4) pred <- predict(emb, dat) inv <- inverse(emb, pred) expect_equal(getDimRedData(emb), pred) expect_equal(dat, inv) emb2 <- embed(dat, "tSNE") expect_error(predict(emb2, dat)) expect_error(inverse(emb2, dat)) }) dimRed/tests/testthat/test_all.R0000644000176200001440000000216713037650325016424 0ustar liggesusers context("high level functions") test_that("high level functions working?", { embed_methods <- dimRedMethodList() quality_methods <- dimRedQualityList() scurve <- loadDataSet("3D S Curve", n = 500) quality_results <- matrix(NA, length(embed_methods), length(quality_methods), dimnames = list(embed_methods, quality_methods)) embedded_data <- list() for (e in embed_methods) { message("embedding: ", e) suppressWarnings( embedded_data[[e]] <- embed(scurve, e, .mute = c("message", "output"))) for (q in quality_methods) { message(" quality: ", q) quality_results[e, q] <- tryCatch( suppressWarnings(quality(embedded_data[[e]], q, .mute = c("message", "output"))), error = function (e) NA ) } } expect(inherits(quality_results, "matrix"), "should be matrix") expect(storage.mode(quality_results) == "double", 'storage should be "double"') }) dimRed/tests/testthat/test_misc.R0000644000176200001440000000140013037635410016572 0ustar liggesuserscontext("misc functions") a <- matrix(rnorm(25), 5, 5) b <- matrix(rnorm(25), 5, 5) test_that("squared euclidean distance", { expect_equivalent( t(as.matrix(dist(rbind(a, b)))[6:10, 1:5] ^ 2), pdist2(a, b) ) }) test_that("formula functions", { expect_equal(rhs(a + b ~ c + d), ~ c + d + 0) expect_equal(lhs(a + b ~ c + d), ~ a + b + 0) }) test_that("makeEpsGraph", { check_makeEpsGraph <- function(x, eps){ naive <- as.matrix(dist(x)) naive[naive >= eps] <- 0 epsSp <- as.matrix(makeEpsSparseMatrix(x, eps)) all(naive == epsSp) } expect_true(check_makeEpsGraph(iris[1:4], 1000)) expect_true(check_makeEpsGraph(iris[1:4], 1)) expect_true(check_makeEpsGraph(iris[1:4], 0.5)) }) dimRed/tests/testthat/test_isomap.R0000644000176200001440000000135413102620660017131 0ustar liggesusers context("isomap") ## no isomap specific tests, because forward method is not really ## exact. test_that("check vs vegan isomap", { eps <- 1e-13 a <- loadDataSet("3D S Curve", n = 200) vegiso <- vegan::isomap(dist(getData(a)), k = 8, ndim = 2) vegy <- vegan::scores(vegiso) drdiso <- embed(a, "Isomap", knn = 8, ndim = 2) drdy <- drdiso@data@data ## Randomly fails: ## expect_equivalent(drdy, vegy) err1 <- max(abs(drdy - vegy)) drdy[, 2] <- -drdy[, 2] err2 <- max(abs(drdy - vegy)) err <- min(err1, err2) expect_true(err < eps, info = paste0("err = ", err, ", eps = ", eps, ", expected err < eps")) }) dimRed/tests/testthat/test_dataSets.R0000644000176200001440000000030413024272076017412 0ustar liggesuserscontext("dataSets") test_that("datasets load", { for (d in dataSetList()) { ds <- loadDataSet(d) expect(inherits(ds, "dimRedData"), "must be of class 'dimRedData'") } }) dimRed/tests/testthat/test_PCA.R0000644000176200001440000000462213065032123016244 0ustar liggesusers data(iris) context("PCA") test_that("general data conversions", { irisData <- as(iris[, 1:4], "dimRedData") expect_equal(class(irisData)[1], "dimRedData") irisParsCS <- list(center = TRUE, scale. = TRUE) irisParsC <- list(center = TRUE, scale. = FALSE) irisParsS <- list(center = FALSE, scale. = TRUE) irisPars <- list(center = FALSE, scale. = FALSE) irisResCS <- do.call(function(...) embed(irisData, "PCA", ...), irisParsCS) irisResS <- do.call(function(...) embed(irisData, "PCA", ...), irisParsS) irisResC <- do.call(function(...) embed(irisData, "PCA", ...), irisParsC) irisRes <- do.call(function(...) embed(irisData, "PCA", ...), irisPars) expect_equal(class(irisResCS)[1], "dimRedResult") expect_equal(class(irisResS)[1], "dimRedResult") expect_equal(class(irisResC)[1], "dimRedResult") expect_equal(class(irisRes)[1], "dimRedResult") expect_equal(irisResCS@apply(irisData), irisResCS@data) expect_equal(irisResS@apply(irisData), irisResS@data) expect_equal(irisResC@apply(irisData), irisResC@data) expect_equal(irisRes@apply(irisData), irisRes@data) expect(sqrt(mean( (irisResCS@inverse(irisResCS@data)@data - irisData@data) ^ 2 )) < 0.3, "error too large" ) expect(sqrt(mean( (irisResS@inverse(irisResS@data)@data - irisData@data) ^ 2 )) < 0.3, "error too large" ) expect(sqrt(mean( (irisResC@inverse(irisResC@data)@data - irisData@data) ^ 2 )) < 0.3, "error too large" ) expect(sqrt(mean( (irisRes@inverse(irisRes@data)@data - irisData@data) ^ 2 )) < 0.3, "error too large" ) scale2 <- function(x, center, scale.) scale(x, center, scale.) expect_equal( do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResCS), irisParsCS), getData( getDimRedData(irisResCS) ) ) expect_equal( do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResS), irisParsS), getData( getDimRedData(irisResS) ) ) expect_equal( do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResC), irisParsC), getData( getDimRedData(irisResC) ) ) expect_equal( do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisRes), irisPars), getData( getDimRedData(irisRes) ) ) }) dimRed/tests/testthat/test_quality.R0000644000176200001440000000265713102622330017334 0ustar liggesusers context("quality") test_that("quality", { irisData <- loadDataSet("Iris") parsPCA <- list(center = TRUE, scale. = TRUE) resPCA <- do.call(function(...) embed(irisData, "PCA", ...), parsPCA) suppressWarnings( resQual <- list( Q_local(resPCA), Q_global(resPCA), mean_R_NX(resPCA), total_correlation(resPCA), cophenetic_correlation(resPCA), distance_correlation(resPCA), reconstruction_rmse(resPCA) ) ) lapply(resQual, function(x) expect_true(is.numeric(x))) }) test_that("rmse_by_ndim", { ir <- loadDataSet("Iris") ir.drr <- embed(ir, "DRR", ndim = ndims(ir)) ir.pca <- embed(ir, "PCA", ndim = ndims(ir)) rmse_res <- data.frame( drr = reconstruction_error(ir.drr), pca = reconstruction_error(ir.pca) ) for (i in 1:length(rmse_res$pca)) { expect_true(rmse_res$pca[i] - rmse_res$drr[i] + 1e-12 > 0, info = paste0( "ndim = ", i, ", rmse pca = ", rmse_res$pca[i], ", rmse drr = ", rmse_res$drr[i] )) } # expect_true(all((rmse_res$pca - rmse_res$drr) + 1e-12 > 0)) expect_error(reconstruction_error(ir.pca, 5)) expect_error(reconstruction_error(ir.pca, 0)) }) dimRed/tests/testthat/test_dimRedMethod-class.R0000644000176200001440000000050213041740150021302 0ustar liggesusers context("dimRedMethod-class") test_that("pars matching", { for (m in dimRedMethodList()) { mo <- getMethodObject(m) expect( all.equal( mo@stdpars, matchPars(mo, list()) ), paste("par matching for", m, "failed") ) } }) dimRed/tests/testthat/test_kPCA.R0000644000176200001440000000465413037634054016436 0ustar liggesusers data(iris) context("kPCA") test_that("general data conversions", { irisData <- loadDataSet("Iris") expect_equal(class(irisData)[1], "dimRedData") irisPars <- list() irisPars[[length(irisPars) + 1]] <- list(kernel = "rbfdot", kpar = list(sigma = 0.1)) irisPars[[length(irisPars) + 1]] <- list(kernel = "rbfdot", kpar = list(sigma = 1)) irisPars[[length(irisPars) + 1]] <- list(kernel = "polydot", kpar = list(degree = 3)) irisPars[[length(irisPars) + 1]] <- list(kernel = "vanilladot", kpar = list()) irisPars[[length(irisPars) + 1]] <- list(kernel = "laplacedot", kpar = list(sigma = 1)) irisPars[[length(irisPars) + 1]] <- list(kernel = "laplacedot", kpar = list(sigma = 0.1)) irisPars[[length(irisPars) + 1]] <- list(kernel = "besseldot", kpar = list(sigma = 0.1, order = 1, degree = 1)) irisPars[[length(irisPars) + 1]] <- list(kernel = "besseldot", kpar = list(sigma = 1, order = 2, degree = 3)) irisPars[[length(irisPars) + 1]] <- list(kernel = "splinedot", kpar = list()) irisRes <- lapply(irisPars, function(x) do.call( function(...) tryCatch(embed(.data = irisData, .method = "kPCA", ...), error = function(e) as.character(e)), x ) ) for (i in 1:length(irisRes)) { if (inherits(irisRes[[i]], "character")){ expect(grepl("singular", irisRes[[i]]), "singular") } else { expect(inherits(irisRes[[i]], "dimRedResult"), 'should be of class "dimRedResult"') } } for (i in 1:length(irisRes)){ if (inherits(irisRes[[i]], "dimRedResult")){ expect_equal(irisRes[[i]]@apply(irisData)@data[, 1:2], irisRes[[i]]@data@data) ## the reverse is an approximate: expect( max( irisRes[[i]]@inverse(irisRes[[i]]@data)@data - irisData@data ) < 200, paste0("inverse of kpca is an approximate, ", "so this may fail due to numerical inaccuracy") ) } } }) dimRed/tests/testthat/test_ICA.R0000644000176200001440000000115513065034141016235 0ustar liggesusers data(iris) context("FastICA") test_that("general data conversions", { irisData <- as(iris[, 1:4], "dimRedData") expect_equal(class(irisData)[1], "dimRedData") irisRes <- embed(irisData, "FastICA") expect_equal(class(irisRes)[1], "dimRedResult") expect_equal(irisRes@apply(irisData), irisRes@data) expect(sqrt(mean( (irisRes@inverse(irisRes@data)@data - irisData@data) ^ 2 )) < 0.3, "error too large" ) expect_equal( scale(iris[1:4], TRUE, FALSE) %*% getRotationMatrix(irisRes), unname(as.matrix(getData( getDimRedData(irisRes) )) ) ) }) dimRed/NAMESPACE0000644000176200001440000000423013065033470012675 0ustar liggesusers# Generated by roxygen2: do not edit by hand export(AUC_lnK_R_NX) export(DRR) export(DiffusionMaps) export(DrL) export(FastICA) export(FruchtermanReingold) export(HLLE) export(Isomap) export(KamadaKawai) export(LCMC) export(LLE) export(LaplacianEigenmaps) export(MDS) export(PCA) export(Q_NX) export(Q_global) export(Q_local) export(R_NX) export(dataSetList) export(dimRedData) export(dimRedMethodList) export(dimRedQualityList) export(dimRedResult) export(distance_correlation) export(embed) export(getRotationMatrix) export(installSuggests) export(inverse) export(kPCA) export(loadDataSet) export(mean_R_NX) export(mixColor1Ramps) export(mixColor2Ramps) export(mixColor3Ramps) export(mixColorRamps) export(nMDS) export(plot) export(plot_R_NX) export(predict) export(quality) export(reconstruction_error) export(reconstruction_rmse) export(tSNE) export(total_correlation) exportClasses(DRR) exportClasses(DiffusionMaps) exportClasses(DrL) exportClasses(FastICA) exportClasses(FruchtermanReingold) exportClasses(HLLE) exportClasses(Isomap) exportClasses(KamadaKawai) exportClasses(LLE) exportClasses(LaplacianEigenmaps) exportClasses(MDS) exportClasses(PCA) exportClasses(dimRedData) exportClasses(dimRedMethod) exportClasses(dimRedResult) exportClasses(kPCA) exportClasses(nMDS) exportClasses(tSNE) exportMethods("[") exportMethods(AUC_lnK_R_NX) exportMethods(LCMC) exportMethods(Q_NX) exportMethods(Q_global) exportMethods(Q_local) exportMethods(R_NX) exportMethods(as.data.frame) exportMethods(as.dimRedData) exportMethods(cophenetic_correlation) exportMethods(distance_correlation) exportMethods(embed) exportMethods(getData) exportMethods(getDimRedData) exportMethods(getMeta) exportMethods(getOrgData) exportMethods(getPars) exportMethods(inverse) exportMethods(maximize_correlation) exportMethods(mean_R_NX) exportMethods(ndims) exportMethods(nrow) exportMethods(plot) exportMethods(predict) exportMethods(print) exportMethods(quality) exportMethods(reconstruction_error) exportMethods(reconstruction_rmse) exportMethods(total_correlation) import(DRR) import(methods) import(utils) importFrom(grDevices,colorRamp) importFrom(grDevices,rgb) importFrom(graphics,plot) importFrom(stats,predict) dimRed/NEWS.md0000644000176200001440000000037013066671545012571 0ustar liggesusers # dimRed 0.0.3.9001 * Fixed kPCA predict function and documentation typos (@topepo #2) * Added predict and inverse functions * Added a function to extract rotation matrices from PCA and FastICA # dimRed 0.0.3 * First version on CRAN dimRed/R/0000755000176200001440000000000013102640400011645 5ustar liggesusersdimRed/R/dataSets.R0000644000176200001440000001432013033377101013550 0ustar liggesusers#' Example Data Sets for dimensionality reduction #' #' A compilation of standard data sets that are often being used to #' showcase dimensionality reduction techniques. #' #' The argument \code{name} should be one of #' \code{dataSetList()}. Partial matching is possible, see #' \code{\link{match.arg}}. Generated data sets contain the internal #' coordinates of the manifold in the \code{meta} slot. Call #' \code{dataSetList()} to see what data sets are available. #' #' #' #' @param name A character vector that specifies the name of the data #' set. #' @param n In generated data sets the number of points to be #' generated, else ignored. #' @param sigma In generated data sets the standard deviation of the #' noise added, else ignored. #' @return \code{loadDataSet} an object of class #' \code{\link{dimRedData}}. \code{dataSetList()} return a #' character string with the implemented data sets #' #' @examples #' ## a list of available data sets: #' dataSetList() #' #' ## Load a data set: #' swissRoll <- loadDataSet("Swiss Roll") #' \donttest{plot(swissRoll, type = "3vars")} #' #' ## Load Iris data set, partial matching: #' loadDataSet("I") #' #' @name dataSets NULL #' @include dimRedData-class.R #' @rdname dataSets #' @export loadDataSet <- function (name = dataSetList(), n = 2000, sigma = 0.05) { name <- match.arg(name) switch( name, "Swiss Roll" = swissRoll(n, sigma), "Broken Swiss Roll" = brokenSwissRoll(n, sigma), "Helix" = helix(n, sigma), "Twin Peaks" = twinPeaks(n, sigma), "Sphere" = sphere(n, sigma), "FishBowl" = fishbowl(n, sigma), "Ball" = ball(n, sigma), "3D S Curve" = sCurve(n, sigma), "variable Noise Helix" = noisyHelix(n, sigma), "Cube" = cube(n, sigma), "Iris" = irisdata() ) } #' @rdname dataSets #' @export dataSetList <- function () { return(c( "Swiss Roll", "Broken Swiss Roll", "Helix", "Twin Peaks", "Sphere", "Ball", "FishBowl", "3D S Curve", "variable Noise Helix", "Iris", "Cube" )) } irisdata <- function() { dd <- as.matrix(datasets::iris[, 1:4]) new("dimRedData", data = dd, meta = datasets::iris[, 5, drop = FALSE]) } swissRoll <- function (n = 2000, sigma = 0.05) { x <- stats::runif(n, 1.5 * pi, 4.5 * pi) y <- stats::runif(n, 0, 30) new("dimRedData", data = swissRollMapping(x, y) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(x = x, y = y)) } brokenSwissRoll <- function (n = 2000, sigma = 0.05) { x <- c( stats::runif(floor(n / 2), 1.5 * pi, 2.7 * pi), stats::runif(ceiling(n / 2), 3.3 * pi, 4.5 * pi) ) y <- stats::runif(n, 0, 30) new("dimRedData", data = swissRollMapping(x, y) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(x = x, y = y)) } swissRollMapping <- function (x, y) { cbind(x = x * cos(x), y = y, z = x * sin(x)) } helix <- function (n = 2000, sigma = 0.05) { t <- stats::runif(n, 0, 2 * pi) new("dimRedData", data = helixMapping(t) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(t = t)) } helixMapping <- function (x) { cbind(x = (2 + cos(8 * x)) * cos(x), y = (2 + cos(8 * x)) * sin(x), z = (sin(8 * x))) } twinPeaks <- function (n = 2000, sigma = 0.05) { x <- stats::runif(n, -1, 1) y <- stats::runif(n, -1, 1) new("dimRedData", data = twinPeaksMapping(x, y) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(x = x, y = y)) } twinPeaksMapping <- function (x, y) { cbind(x = x, y = y, z = sin(pi * x) * tanh(3 * y)) } sphere <- function (n = 2000, sigma = 0.05) { phi <- stats::runif(n, 0, 2 * pi) psi <- acos(stats::runif(n, -1, 1)) new("dimRedData", data = sphereMapping(phi, psi) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(phi = phi, psi = psi)) } fishbowl <- function (n = 2000, sigma = 0.05) { phi <- stats::runif(n, 0, 2 * pi) psi <- acos(stats::runif(n, -1, 0.8)) new("dimRedData", data = sphereMapping(phi, psi) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(psi = psi)) } sphereMapping <- function (phi, psi) { cbind(x = cos(phi) * sin(psi), y = sin(phi) * sin(psi), z = cos(psi)) } ball <- function (n = 2000, sigma = 0.05) { phi <- stats::runif(n, 0, 2 * pi) psi <- acos(stats::runif(n, -1, 1)) ## make it uniformly distributed inside the sphere r <- stats::runif(n) ^ (1 / 3) new("dimRedData", data = ballMapping(phi, psi, r) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(phi = phi, psi = psi, r = r)) } ballMapping <- function (phi, psi, r) { cbind(x = r * cos(phi) * sin(psi), y = r * sin(phi) * sin(psi), z = r * cos(psi)) } sCurve <- function (n = 2000, sigma = 0.05) { t <- stats::runif(n, -1.5 * pi, 1.5 * pi) y <- stats::runif(n, 0, 2) new("dimRedData", data = sCurveMapping(t, y) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(x = t, y = y)) } sCurveMapping <- function (t, y) { cbind(x = sin(t), y = y, z = sign(t) * (cos(t) - 1)) } noisyHelix <- function (n = 2000, sigma = 0.05) { t <- stats::runif(n, 0, 4 * pi) min_noise <- 0.1 max_noise <- 1.4 new("dimRedData", data = noisyHelixMapping(t, min_noise, max_noise) + stats::rnorm(3 * n, sd = sigma), meta = data.frame(t = t)) } noisyHelixMapping <- function(t, min_noise, max_noise) { make_noise <- function (t){ stats::rnorm(length(t), sd = t * max_noise / max(t) + min_noise) } cbind(x = 3 * cos(t) + make_noise(t), y = 3 * sin(t) + make_noise(t), z = 2 * t + make_noise(t)) } cube <- function(n = 2000, sigma = 0.05){ tmp <- cbind(x = stats::runif(n) + stats::rnorm(n, sd = sigma), y = stats::runif(n) + stats::rnorm(n, sd = sigma), z = stats::runif(n) + stats::rnorm(n, sd = sigma)) new("dimRedData", data = tmp, meta = tmp) } dimRed/R/get_info.R0000644000176200001440000000176213065035422013602 0ustar liggesusers #' getRotationMatrix #' #' Extract the rotation matrix from \code{\link{dimRedResult}} objects derived from PCA and FastICA #' #' The data has to be pre-processed the same way as the method does, e.g. #' centering and/or scaling. #' #' @param x of type \code{\link{dimRedResult}} #' @return a matrix #' #' @examples #' dat <- loadDataSet("Iris") #' #' pca <- embed(dat, "PCA") #' ica <- embed(dat, "FastICA") #' #' rot_pca <- getRotationMatrix(pca) #' rot_ica <- getRotationMatrix(ica) #' #' scale(getData(dat), TRUE, FALSE) %*% rot_pca - getData(getDimRedData(pca)) #' scale(getData(dat), TRUE, FALSE) %*% rot_ica - getData(getDimRedData(ica)) #' #' @family convenience functions #' @export getRotationMatrix <- function(x) { if(!inherits(x, "dimRedResult")) stop("x must be of type 'dimRedResult'") if(x@method == "PCA") return(environment(x@apply)$rot) if(x@method == "FastICA") return(environment(x@apply)$res$K %*% environment(x@apply)$res$W) stop(paste("Not implemented for", x@method)) } dimRed/R/pca.R0000644000176200001440000001005513065025601012544 0ustar liggesusers#' Principal Component Analysis #' #' S4 Class implementing PCA. #' #' PCA transforms the data in orthogonal components so that the first #' axis accounts for the larges variance in the data, all the #' following axes account for the highest variance under the #' constraint that they are orthogonal to the preceding axes. PCA is #' sensitive to the scaling of the variables. PCA is by far the #' fastest and simples method of dimensionality reduction and should #' probably always be applied as a baseline if other methods are tested. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' PCA can take the following parameters: #' \describe{ #' \item{ndim}{The number of output dimensions.} #' \item{center}{logical, should the data be centered, defaults to \code{TRUE}.} #' \item{scale.}{logical, should the data be scaled, defaults to \code{FALSE}.} #' } #' #' @section Implementation: #' #' Wraps around \code{\link{prcomp}}. Because PCA can be reduced to a #' simple rotation, forward and backward projection functions are #' supplied. . #' #' @examples #' dat <- loadDataSet("Iris") #' #' ## using the S4 Class #' pca <- PCA() #' emb <- pca@fun(dat, pca@stdpars) #' #' ## using embed() #' emb2 <- embed(dat, "PCA") #' #' plot(emb, type = "2vars") #' plot(emb@inverse(emb@data), type = "3vars") #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export PCA #' @exportClass PCA PCA <- setClass( "PCA", contains = "dimRedMethod", prototype = list( stdpars = list(ndim = 2, center = TRUE, scale. = FALSE), fun = function (data, pars, keep.org.data = TRUE) { ndim <- pars$ndim pars$ndim <- NULL meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL data <- data@data res <- do.call( prcomp, c(list(x = data), pars) ) # evaluate results here for functions data <- res$x[, seq_len(ndim), drop = FALSE] ce <- res$center sc <- res$scale rot <- res$rotation[, seq_len(ndim)] rerot <- t(rot) appl <- function(x) { appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() proj <- if (inherits(x, "dimRedData")) x@data else x if (ncol(proj) != ncol(orgdata)) stop("x must have the same number of dimensions ", "as the original data") if (ce[1] != FALSE) proj <- t(apply(proj, 1, function(x) x - ce)) if (sc[1] != FALSE) proj <- t(apply(proj, 1, function(x) x / sc)) proj <- proj %*% rot proj <- new("dimRedData", data = proj, meta = appl.meta) return(proj) } inv <- function(x) { appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() proj <- if (inherits(x, "dimRedData")) x@data else x if (ncol(proj) > ncol(data)) stop("x must have less or equal number of dimensions ", "as the original data") d <- ncol(proj) reproj <- proj %*% rerot[seq_len(d), ] if (sc[1] != FALSE) reproj <- t(apply(reproj, 1, function(x) x * sc)) if (ce[1] != FALSE) reproj <- t(apply(reproj, 1, function(x) x + ce)) reproj <- new("dimRedData", data = reproj, meta = appl.meta) return(reproj) } res <- new( "dimRedResult", data = new("dimRedData", data = data, meta = meta), org.data = orgdata, apply = appl, inverse = inv, has.org.data = keep.org.data, has.apply = TRUE, has.inverse = TRUE, method = "PCA", pars = pars ) return(res) }) ) dimRed/R/dimRedData-class.R0000644000176200001440000001360513024300244015100 0ustar liggesusers#' @include misc.R NULL #' Class "dimRedData" #' #' A class to hold data for dimensionality reduction and methods. #' #' The class hast two slots, \code{data} and \code{meta}. The #' \code{data} slot contains a \code{numeric matrix} with variables in #' columns and observations in rows. The \code{meta} slot may contain #' a \code{data.frame} with additional information. Both slots need to #' have the same number of rows or the \code{meta} slot needs to #' contain an empty \code{data.frame}. #' #' See examples for easy conversion from and to \code{data.frame}. #' #' For plotting functions see \code{\link{plot.dimRedData}}. #' #' @slot data of class \code{matrix}, holds the data, observations in #' rows, variables in columns #' @slot meta of class \code{data.frame}, holds meta data such as #' classes, internal manifold coordinates, or simply additional #' data of the data set. Must have the same number of rows as the #' \code{data} slot or be an empty data frame. #' #' #' @examples #' ## Load an example data set: #' s3d <- loadDataSet("3D S Curve") #' #' ## Create using a constructor: #' #' ### without meta information: #' dimRedData(iris[, 1:4]) #' #' ### with meta information: #' dimRedData(iris[, 1:4], iris[, 5]) #' #' ### using slot names: #' dimRedData(data = iris[, 1:4], meta = iris[, 5]) #' #' ## Convert to a dimRedData objects: #' Iris <- as(iris[, 1:4], "dimRedData") #' #' ## Convert to data.frame: #' head(as(s3d, "data.frame")) #' head(as.data.frame(s3d)) #' head(as.data.frame(as(iris[, 1:4], "dimRedData"))) #' #' ## Extract slots: #' head(getData(s3d)) #' head(getMeta(s3d)) #' #' ## Get the number of observations: #' nrow(s3d) #' #' ## Subset: #' s3d[1:5, ] #' #' @family dimRedData #' @import methods #' @export dimRedData #' @exportClass dimRedData dimRedData <- setClass( "dimRedData", slots = c(data = "matrix", meta = "data.frame"), prototype = prototype(data = matrix(numeric(0), 0, 0), meta = data.frame()), validity = function (object) { retval <- NULL if (!is.matrix(object@data)) { retval <- c( retval, c("data must be a matrix with ", "observations in rows and dimensions in columns") ) } if (!is.numeric(object@data)) { retval <- c( retval, c("data must be numeric") ) } if ((nrow(object@meta) != 0) && (nrow(object@meta) != nrow(object@data))){ retval <- c( retval, c("data and meta must have the same numbers of rows") ) } return(if (is.null(retval)) TRUE else retval) } ) setMethod("initialize", signature = c("dimRedData"), function (.Object, data = matrix(numeric(0), 0, 0), meta = data.frame()) { data <- as.matrix(data) meta <- as.data.frame(meta) .Object <- callNextMethod() return(.Object) }) setAs(from = "ANY", to = "dimRedData", def = function(from) new("dimRedData", data = as.matrix(from))) setAs(from = "dimRedData", to = "data.frame", def = function(from) as.data.frame(from)) #' @param meta.prefix Prefix for the columns of the meta data names. #' @param data.prefix Prefix for the columns of the variable names. #' #' @family dimRedData #' @describeIn dimRedData convert to data.frame #' @export setMethod(f = "as.data.frame", signature = c("dimRedData"), definition = function(x, meta.prefix = "meta.", data.prefix = "") { tmp <- list() if (nrow(x@meta) > 0){ tmp$meta <- as.data.frame(x@meta, stringsAsFactors = FALSE) names(tmp$meta) <- paste0(meta.prefix, colnames(x@meta)) } tmp$data <- as.data.frame(x@data, stringsAsFactors = FALSE) names(tmp$data) <- paste0(data.prefix, colnames(x@data)) names(tmp) <- NULL data.frame(tmp, stringsAsFactors = FALSE) }) #' @param formula The formula, left hand side is assigned to the meta slot #' right hand side is assigned to the data slot. #' @param data A data frame #' #' @examples #' ## create a dimRedData object using a formula #' as.dimRedData(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, #' iris)[1:5] #' #' @include misc.R #' @family dimRedData #' @describeIn dimRedData Convert a \code{data.frame} to a dimRedData #' object using a formula #' @export setMethod(f = "as.dimRedData", signature = c("formula"), definition = function(formula, data) { data <- as.data.frame(data) meta <- stats::model.frame(lhs(formula), data) data <- stats::model.matrix(rhs(formula), data) return(new("dimRedData", data = data, meta = meta)) }) #' @param object Of class dimRedData. #' @describeIn dimRedData Get the data slot. #' @export setMethod("getData", "dimRedData", function(object) object@data) #' @describeIn dimRedData Get the meta slot. #' @export setMethod("getMeta", "dimRedData", function(object) object@meta) #' @param x Of class dimRedData #' @describeIn dimRedData Get the number of observations. #' @export setMethod("nrow", "dimRedData", function(x) nrow(x@data)) #' @param i a valid index for subsetting rows. #' @examples #' ## Shuffle data: #' s3 <- s3d[nrow(s3d)] #' #' @describeIn dimRedData Subset rows. #' @export setMethod("[", signature(x = "dimRedData", i = "ANY"), function(x, i) { x@data <- x@data[i, , drop = FALSE] if (nrow(x@meta) != 0) x@meta <- x@meta[i, , drop = FALSE] vv <- validObject(x) if (vv == TRUE) return(x) else stop("cannot subset dimRedData object: \n", paste(vv, collapse = "\n")) }) #' @describeIn dimRedData Extract the number of Variables from the data. #' #' @examples #' ## Get the number of variables: #' ndims(s3d) #' #' @export setMethod("ndims", "dimRedData", function(object) ncol(object@data)) dimRed/R/hlle.R0000644000176200001440000001040013037636032012724 0ustar liggesusers#' Hessian Locally Linear Embedding #' #' An S4 Class implementing Hessian Locally Linear Embedding (HLLE) #' #' HLLE uses local hessians to approximate the curvines and is an #' extension to non-convex subsets in lowdimensional space. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' HLLE can take the following parameters: #' \describe{ #' \item{knn}{neighborhood size} #' \item{ndim}{number of output dimensions} #' } #' #' @section Implementation: #' Own implementation, sticks to the algorithm in Donoho and Grimes #' (2003). Makes use of sparsity to speed up final embedding. #' #' @references #' Donoho, D.L., Grimes, C., 2003. Hessian eigenmaps: Locally linear #' embedding techniques for high-dimensional data. PNAS 100, #' 5591-5596. doi:10.1073/pnas.1031596100 #' #' @examples #' dat <- loadDataSet("3D S Curve", n = 1500) #' #' ## directy use the S4 class: #' hlle <- HLLE() #' emb <- hlle@fun(dat, hlle@stdpars) #' #' ## using embed(): #' emb2 <- embed(dat, "HLLE", knn = 45) #' #' plot(emb, type = "2vars") #' plot(emb2, type = "2vars") #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export HLLE #' @exportClass HLLE HLLE <- setClass( "HLLE", contains = "dimRedMethod", prototype = list( stdpars = list(knn = 50, ndim = 2), fun = function(data, pars, keep.org.data = TRUE) { chckpkg("RSpectra") chckpkg("Matrix") chckpkg("RANN") if (is.null(pars$knn)) pars$knn <- 50 if (is.null(pars$ndim)) pars$ndim <- 2 indata <- data@data n <- nrow(indata) hs <- pars$ndim * (pars$ndim + 1) / 2 W <- Matrix::sparseMatrix(i = numeric(0), j = numeric(0), x = numeric(0), dims = c(n, hs * n)) ii <- jj <- ww <- list() ## Identify neighbors: message(Sys.time(), ": Finding nearest neighbors", sep = "") nnidx <- RANN::nn2(data = indata, query = indata, k = pars$knn + 1, treetype = "kd", "standard", eps = 0)$nn.idx#[, -1] message(Sys.time(), ": Calculating Hessian", sep = "") for (i in seq_len(n)) { cat(i, "/", n, "\r", sep = "") ## get neighborhood Nui <- indata[nnidx[i, ], , drop = FALSE] ## Form tangent coordinates: Nui <- sweep(Nui, 2, colMeans(Nui), "-") tc <- svd(Nui, nu = pars$ndim, nv = 0)$u ## Develop Hessian Estimator Xi <- cbind( 1, tc, tc ^ 2, apply(combn(seq_len(pars$ndim), 2), 2, function(x) tc[, x[1]] * tc[, x[2]]) ) tHi <- qr.Q(qr(Xi))[, -(1:(pars$ndim + 1)), drop = FALSE] ## Add quadratic form to hessian ii[[i]] <- rep(nnidx[i, ], hs) jj[[i]] <- rep((i - 1) * hs + (1:hs), each = ncol(nnidx)) ww[[i]] <- as.vector(tHi) } H <- as(Matrix::tcrossprod(Matrix::spMatrix( i = unlist(ii, FALSE, FALSE), j = unlist(jj, FALSE, FALSE), x = unlist(ww, FALSE, FALSE), nrow = n, ncol = n * hs) ), "dgCMatrix") ## Find null space: message(Sys.time(), ": Embedding", sep = "") ## eigs and eigs_sym converges much more reliably and faster ## with sigma = -eps than with which = "L*" outdata <- RSpectra::eigs_sym(H, k = pars$ndim + 1, sigma = -1e-5) message(paste(c("Eigenvalues:", format(outdata$values)), collapse = " ")) outdata <- outdata$vectors[, order(outdata$values)[-1], drop = FALSE] colnames(outdata) <- paste0("HLLE", seq_len(ncol(outdata))) message(Sys.time(), ": DONE", sep = "") return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = data@meta), org.data = if (keep.org.data) data@data else NULL, has.org.data = keep.org.data, method = "HLLE", pars = pars )) }) ) dimRed/R/dimRed.R0000644000176200001440000000224213037627617013222 0ustar liggesusers#' @title #' The dimRed package #' #' @description This package simplifies dimensionality reduction in R by #' providing a framework of S4 classes and methods. dimRed collects #' dimensionality reduction methods that are implemented in R and implements #' others. It gives them a common interface and provides plotting #' functions for visualization and functions for quality assessment. #' #' Funding provided by the Department for Biogeochemical Integration, #' Empirical Inference of the Earth System Group, at the Max Plack #' Institute for Biogeochemistry, Jena. #' #' @references #' #' Lee, J.A., Renard, E., Bernard, G., Dupont, P., Verleysen, M., #' 2013. Type 1 and 2 mixtures of Kullback-Leibler divergences as cost #' functions in dimensionality reduction based on similarity #' preservation. Neurocomputing. 112, #' 92-107. doi:10.1016/j.neucom.2012.12.036 #' #' Lee, J.A., Lee, J.A., Verleysen, M., 2008. Rank-based quality #' assessment of nonlinear dimensionality reduction. Proceedings of #' ESANN 2008 49-54. #' #' Chen, L., Buja, A., 2006. Local Multidimensional Scaling for #' Nonlinear Dimension Reduction, Graph Layout and Proximity Analysis. #' "_PACKAGE" dimRed/R/loe.R0000644000176200001440000000310713024273620012561 0ustar liggesusers ## this function produces segfaults and is super slow ## #' Local Ordinal Embedding ## #' ## #' Instance of \code{\link{dimRedMethod}} for Local Ordinal Embedding. ## #' ## #' For details see \code{\link[loe]{LOE}} ## #' ## #' @examples ## #' # for whatever reason the loe package has problems if I run this ## #' # with R CMD check, running it in the REPL works just fine ## #' dat <- loadDataSet("Iris")[sample(20)] ## #' loe <- LOE() ## #' emb <- loe@fun(dat, loe@stdpars) ## #' ## #' ## #' plot(emb@data@data) ## #' ## #' @include dimRedResult-class.R ## #' @include dimRedMethod-class.R ## #' @export ## LOE <- setClass( ## "LOE", ## contains = "dimRedMethod", ## prototype = list( ## stdpars = list(d = stats::dist, knn = 50, ndim = 2), ## fun = function (data, pars, ## keep.org.data = TRUE) { ## chckpkg("loe") ## meta <- data@meta ## orgdata <- if (keep.org.data) data@data else NULL ## indata <- data@data ## data.adj <- loe:::make.kNNG(as.matrix(pars$d(indata)), k = pars$knn) ## outdata <- loe::LOE(data.adj, p = pars$ndim, method = "MM")$X ## colnames(outdata) <- paste0("LOE", 1:ncol(outdata)) ## return(new( ## "dimRedResult", ## data = new("dimRedData", ## data = outdata, ## meta = meta), ## org.data = orgdata, ## has.org.data = keep.org.data, ## method = "loe", ## pars = pars ## )) ## }) ## ) dimRed/R/quality.R0000644000176200001440000004456713054044534013514 0ustar liggesusers#' @include dimRedResult-class.R #' @include dimRedData-class.R #' @export setGeneric("quality", function (.data, ...) standardGeneric("quality"), valueClass = "numeric") #' Quality Criteria for dimensionality reduction. #' #' A collection of functions to compute quality measures on #' \code{\link{dimRedResult}} objects. #' #' @section Implemented methods: #' #' Method must be one of \code{"\link{Q_local}", "\link{Q_global}", #' "\link{mean_R_NX}", "\link{total_correlation}", #' "\link{cophenetic_correlation}", "\link{distance_correlation}", #' "\link{reconstruction_rmse}"} #' #' @section Rank based criteria: #' #' \code{Q_local}, \code{Q_global}, and \code{mean_R_nx} are #' quality criteria based on the Co-ranking matrix. \code{Q_local} #' and \code{Q_global} determine the local/global quality of the #' embedding, while \code{mean_R_nx} determines the quality of the #' overall embedding. They are parameter free and return a single #' number. The object must include the original data. The number #' returns is in the range [0, 1], higher values mean a better #' local/global embedding. #' #' @section Correlation based criteria: #' #' \code{total_correlation} calculates the sum of the mean squared #' correlations of the original axes with the axes in reduced #' dimensions, because some methods do not care about correlations #' with axes, there is an option to rotate data in reduced space to #' maximize this criterium. The number may be greater than one if more #' dimensions are summed up. #' #' \code{cophenetic_correlation} calculate the correlation between the #' lower triangles of distance matrices, the correlation and distance #' methods may be specified. The result is in range [-1, 1]. #' #' \code{distance_correlation} measures the independes of samples by #' calculating the correlation of distances. For details see #' \code{\link[energy]{dcor}}. #' #' @section Reconstruction error: #' #' \code{reconstruction_rmse} calculates the root mean squared error #' of the reconstrucion. \code{object} requires an inverse function. #' #' #' @references #' #' Lueks, W., Mokbel, B., Biehl, M., Hammer, B., 2011. How #' to Evaluate Dimensionality Reduction? - Improving the #' Co-ranking Matrix. arXiv:1110.3917 [cs]. #' #' Szekely, G.J., Rizzo, M.L., Bakirov, N.K., 2007. Measuring and #' testing dependence by correlation of distances. Ann. Statist. 35, #' 2769-2794. doi:10.1214/009053607000000505 #' #' Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015. Multi-scale #' similarities in stochastic neighbour embedding: Reducing #' dimensionality while preserving both local and global #' structure. Neurocomputing, 169, #' 246-261. doi:10.1016/j.neucom.2014.12.095 #' #' #' #' @param .data object of class \code{dimRedResult} #' @param .method character vector naming one of the methods #' @param .mute what output from the embedding method should be muted. #' @param ... the pameters, internally passed as a list to the #' quality method as \code{pars = list(...)} #' @return a number #' #' @examples #' \dontrun{ #' embed_methods <- dimRedMethodList() #' quality_methods <- dimRedQualityList() #' scurve <- loadDataSet("3D S Curve", n = 500) #' #' quality_results <- matrix(NA, length(embed_methods), length(quality_methods), #' dimnames = list(embed_methods, quality_methods)) #' embedded_data <- list() #' #' for (e in embed_methods) { #' message("embedding: ", e) #' embedded_data[[e]] <- embed(scurve, e, .mute = c("message", "output")) #' for (q in quality_methods) { #' message(" quality: ", q) #' quality_results[e, q] <- tryCatch( #' quality(embedded_data[[e]], q), #' error = function (e) NA #' ) #' } #' } #' #' print(quality_results) #' } #' @author Guido Kraemer #' @aliases quality quality.dimRedResult #' @family Quality scores for dimensionality reduction #' @describeIn quality Calculate a quality index from a dimRedResult object. #' @export setMethod( "quality", "dimRedResult", function (.data, .method = dimRedQualityList(), .mute = character(0), # c("output", "message"), ...) { method <- match.arg(.method) methodFunction <- getQualityFunction(method) args <- c(list(object = .data), list(...)) devnull <- if (Sys.info()["sysname"] != "Windows") "/dev/null" else "NUL" if ("message" %in% .mute){ devnull1 <- file(devnull, "wt") sink(devnull1, type = "message") on.exit({ sink(file = NULL, type = "message") close(devnull1) }, add = TRUE) } if ("output" %in% .mute) { devnull2 <- file(devnull, "wt") sink(devnull2, type = "output") on.exit({ sink() close(devnull2) }, add = TRUE) } do.call(methodFunction, args) } ) getQualityFunction <- function (method) { switch( method, Q_local = Q_local, Q_global = Q_global, mean_R_NX = mean_R_NX, AUC_lnK_R_NX = AUC_lnK_R_NX, total_correlation = total_correlation, cophenetic_correlation = cophenetic_correlation, distance_correlation = distance_correlation, reconstruction_rmse = reconstruction_rmse ) } #' @export setGeneric( "Q_local", function(object, ...) standardGeneric("Q_local"), valueClass = "numeric" ) #' Method Q_local #' #' Calculate the Q_local score to assess the quality of a dimensionality reduction. #' #' @param object of class dimRedResult #' @family Quality scores for dimensionality reduction #' @aliases Q_local #' @export setMethod( "Q_local", "dimRedResult", function (object) { if (!object@has.org.data) stop("object requires original data") chckpkg("coRanking") Q <- coRanking::coranking(object@org.data, object@data@data) nQ <- nrow(Q) N <- nQ + 1 Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N lcmc <- Qnx - seq_len(nQ) / nQ Kmax <- which.max(lcmc) Qlocal <- sum(lcmc[1:Kmax]) / Kmax return(Qlocal) } ) #' @export setGeneric( "Q_global", function(object, ...) standardGeneric("Q_global"), valueClass = "numeric" ) #' Method Q_global #' #' Calculate the Q_global score to assess the quality of a dimensionality reduction. #' #' @param object of class dimRedResult #' @family Quality scores for dimensionality reduction #' @aliases Q_global #' @export setMethod( "Q_global", "dimRedResult", function(object){ if (!object@has.org.data) stop("object requires original data") chckpkg("coRanking") Q <- coRanking::coranking(object@org.data, object@data@data) nQ <- nrow(Q) N <- nQ + 1 Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N lcmc <- Qnx - seq_len(nQ) / nQ Kmax <- which.max(lcmc) Qglobal <- sum(lcmc[(Kmax + 1):nQ]) / (N - Kmax) return(Qglobal) } ) #' @export setGeneric( "mean_R_NX", function(object, ...) standardGeneric("mean_R_NX"), valueClass = "numeric" ) #' Method mean_R_NX #' #' Calculate the mean_R_NX score to assess the quality of a dimensionality reduction. #' #' @param object of class dimRedResult #' @family Quality scores for dimensionality reduction #' @aliases mean_R_NX #' @export setMethod( "mean_R_NX", "dimRedResult", function(object) mean(R_NX(object)) ) #' @export setGeneric( "AUC_lnK_R_NX", function(object, ...) standardGeneric("AUC_lnK_R_NX"), valueClass = "numeric" ) #' Method AUC_lnK_R_NX #' #' Calculate the Area under the R_NX(ln K), used in Lee et. al. (2013). #' #' @references #' #' Lee, J.A., Renard, E., Bernard, G., Dupont, P., Verleysen, M., #' 2013. Type 1 and 2 mixtures of Kullback-Leibler divergences as cost #' functions in dimensionality reduction based on similarity #' preservation. Neurocomputing. 112, #' 92-107. doi:10.1016/j.neucom.2012.12.036 #' #' @param object of class dimRedResult #' @family Quality scores for dimensionality reduction #' @aliases AUC_lnK_R_NX #' @export setMethod( "AUC_lnK_R_NX", "dimRedResult", function(object) { rnx <- R_NX(object) auc_lnK(rnx) } ) auc_lnK <- function(rnx) { Ks <- seq_along(rnx) return (sum(rnx / Ks) / sum(1 / Ks)) ## in my intuition it should be the folowing: ## N <- length(rnx) ## sum((rnx[-N] + rnx[-1]) / 2 * (log(2:N) - log(seq_len(N - 1)))) } #' @export setGeneric( "total_correlation", function(object, ...) standardGeneric("total_correlation"), valueClass = "numeric" ) #' Method total_correlation #' #' Calculate the total correlation of the variables with the axes to #' assess the quality of a dimensionality reduction. #' #' @param object of class dimRedResult #' @param naxes the number of axes to use for optimization. #' @param cor_method the correlation method to use. #' @param is.rotated if FALSE the object is rotated. #' #' @family Quality scores for dimensionality reduction #' @aliases total_correlation #' @export setMethod( "total_correlation", "dimRedResult", function(object, naxes = ndims(object), cor_method = "pearson", is.rotated = FALSE){ if (!object@has.org.data) stop("object requires original data") if (length(naxes) != 1 || naxes < 1 || naxes > ncol(object@data@data)) stop("naxes must specify the numbers of axes to optimize for, ", "i.e. a single integer between 1 and ncol(object@data@data)") ## try to partially match cor_method: cor_methods <- c("pearson", "kendall", "spearman") cor_method <- cor_methods[pmatch(cor_method, cor_methods)] if (is.na(cor_method)) stop("cor_method must match one of ", "'pearson', 'kendall', or 'spearman', ", "at least partially.") if (!is.rotated) { rotated_result <- maximize_correlation( object, naxes, cor_method ) } else { rotated_result <- object } res <- 0 for (i in 1:naxes) res <- res + mean(correlate( rotated_result@data@data, rotated_result@org.data, cor_method )[i, ] ^ 2) return(res) } ) setGeneric("cophenetic_correlation", function(object, ...) standardGeneric("cophenetic_correlation"), valueClass = "numeric") #' Method cophenetic_correlation #' #' Calculate the correlation between the distance matrices in high and #' low dimensioal space. #' #' @param object of class dimRedResult #' @param d the distance function to use. #' @param cor_method The correlation method. #' @aliases cophenetic_correlation #' @family Quality scores for dimensionality reduction #' @export setMethod( "cophenetic_correlation", "dimRedResult", function(object, d = stats::dist, cor_method = "pearson"){ ## if (missing(d)) d <- stats::dist ## if (missing(cor_method)) cor_method <- "pearson" if (!object@has.org.data) stop("object requires original data") cor_methods <- c("pearson", "kendall", "spearman") cor_method <- cor_methods[pmatch(cor_method, cor_methods)] if (is.na(cor_method)) stop("cor_method must match one of ", "'pearson', 'kendall', or 'spearman', ", "at least partially.") d.org <- d(object@org.data) d.emb <- d(object@data@data) if (!inherits(d.org, "dist") || !inherits(d.emb, "dist")) stop("d must return a dist object") res <- correlate( d(object@org.data), d(object@data@data), cor_method ) return(res) } ) #' @export setGeneric( "distance_correlation", function(object) standardGeneric("distance_correlation"), valueClass = "numeric" ) #' Method distance_correlation #' #' Calculate the distance correlation between the distance matrices in #' high and low dimensioal space. #' #' @param object of class dimRedResult #' @aliases distance_correlation #' @family Quality scores for dimensionality reduction #' @export setMethod( "distance_correlation", "dimRedResult", function(object){ if (!object@has.org.data) stop("object requires original data") if (!requireNamespace("energy")) stop("package energy required.") energy::dcor(object@org.data, object@data@data) } ) #' @export setGeneric( "reconstruction_rmse", function(object) standardGeneric("reconstruction_rmse"), valueClass = "numeric" ) #' Method reconstruction_rmse #' #' Calculate the reconstruction root mean squared error a dimensionality reduction, the method must have an inverse mapping. #' #' @param object of class dimRedResult #' @aliases reconstruction_rmse #' @family Quality scores for dimensionality reduction #' @export setMethod( "reconstruction_rmse", "dimRedResult", function(object){ if (!object@has.org.data) stop("object requires original data") if (!object@has.inverse) stop("object requires an inverse function") recon <- object@inverse(object@data) sqrt(mean((recon@data - object@org.data) ^ 2)) } ) #' @rdname quality #' #' @export dimRedQualityList <- function () { return(c("Q_local", "Q_global", "mean_R_NX", "AUC_lnK_R_NX", "total_correlation", "cophenetic_correlation", "distance_correlation", "reconstruction_rmse")) } #' @export setGeneric( "R_NX", function(object) standardGeneric("R_NX"), valueClass = "numeric" ) #' Method R_NX #' #' Calculate the R_NX score from Lee et. al. (2013) which shows the #' neighborhood preservation for the Kth nearest neighbors, #' corrected for random point distributions and scaled to range [0, 1]. #' @param object of class dimRedResult #' @family Quality scores for dimensionality reduction #' @aliases R_NX #' @export setMethod( "R_NX", "dimRedResult", function(object) { chckpkg("coRanking") if (!object@has.org.data) stop("object requires original data") Q <- coRanking::coranking(object@org.data, object@data@data) nQ <- nrow(Q) N <- nQ + 1 Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N Rnx <- ((N - 1) * Qnx - seq_len(nQ)) / (N - 1 - seq_len(nQ)) Rnx[-nQ] } ) #' @export setGeneric( "Q_NX", function(object, ...) standardGeneric("Q_NX"), valueClass = "numeric" ) #' Method Q_NX #' #' Calculate the Q_NX score (Chen & Buja 2006, the notation in the #' publication is M_k). Which is the fraction of points that remain inside #' the same K-ary neighborhood in high and low dimensional space. #' #' @param object of class dimRedResult #' @family Quality scores for dimensionality reduction #' @aliases Q_NX #' @export setMethod( "Q_NX", "dimRedResult", function(object) { chckpkg("coRanking") Q <- coRanking::coranking(object@org.data, object@data@data) nQ <- nrow(Q) N <- nQ + 1 Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N Qnx } ) #'@export setGeneric( "LCMC", function(object, ...) standardGeneric("LCMC"), valueClass = "numeric" ) #' Method LCMC #' #' Calculates the Local Continuity Meta Criterion, which is #' \code{\link{Q_NX}} adjusted for random overlap inside the K-ary #' neighborhood. #' #' @param object of class dimRedResult #' @family Quality scores for dimensionality reduction #' @aliases LCMC #' @export setMethod( "LCMC", "dimRedResult", function(object) { chckpkg("coRanking") Q <- coRanking::coranking(object@org.data, object@data@data) nQ <- nrow(Q) N <- nQ + 1 lcmc <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N - seq_len(nQ) / nQ lcmc } ) rnx2qnx <- function(rnx, K = seq_along(rnx), N = length(rnx) + 1) { (rnx * (N - 1 - K) + K) / (N - 1) } qnx2rnx <- function(qnx, K = seq_along(qnx), N = length(qnx) + 1) { ((N - 1) * qnx - K) / (N - 1 - K) } #' @export setGeneric( "reconstruction_error", function(object, ...) standardGeneric("reconstruction_error"), valueClass = "numeric" ) #' Method reconstruction_error #' #' Calculate the error using only the first \code{n} dimensions of the embedded #' data. \code{error_fun} can either be one of \code{c("rmse", "mae")} to #' calculate the root mean square error or the mean absolute error respectively, #' or a function that takes to equally sized vectors as input and returns a #' single number as output. #' #' @param object of class dimRedResult #' @param n a positive integer or vector of integers \code{<= ndims(object)} #' @param error_fun a function or string indicating an error function. #' @return a vector of number with the same length as \code{n} with the #' #' @examples #' \dontrun{ #' ir <- loadDataSet("Iris") #' ir.drr <- embed(ir, "DRR", ndim = ndims(ir)) #' ir.pca <- embed(ir, "PCA", ndim = ndims(ir)) #' #' rmse <- data.frame( #' rmse_drr = reconstruction_error(ir.drr), #' rmse_pca = reconstruction_error(ir.pca) #' ) #' #' matplot(rmse, type = "l") #' plot(ir) #' plot(ir.drr) #' plot(ir.pca) #' } #' @author Guido Kraemer #' @family Quality scores for dimensionality reduction #' @aliases reconstruction_error #' @export setMethod( "reconstruction_error", c("dimRedResult"), function (object, n = seq_len(ndims(object)), error_fun = "rmse") { if (any(n > ndims(object))) stop("n > ndims(object)") if (any(n < 1)) stop("n < 1") if (inherits(error_fun, "character")) { switch( error_fun, rmse = rmse, mae = mae ) } else if (inherits(error_fun, "function")) { error_fun } else { stop("error_fun must be a string or function, see documentation for details") } res <- numeric(length(n)) org <- getData(getOrgData(object)) for (i in n) { rec <- getData(inverse( object , getData(getDimRedData(object))[, seq_len(i), drop = FALSE] )) res[i] <- sqrt(mean((org - rec) ^ 2)) } res } ) rmse <- function (x1, x2) sqrt(mean((x1 - x2) ^ 2)) mae <- function (x1, x2) mean(abs(x1 - x2)) dimRed/R/plot.R0000644000176200001440000001466013026500147012765 0ustar liggesusers#' Plotting of dimRed* objects #' #' Plots a object of class dimRedResult and dimRedData. For the #' documentation of the plotting function in base see here: #' \code{\link{plot.default}}. #' #' Plotting functions for the classes usind in \code{dimRed}. they are #' intended to give a quick overview over the results, so they are #' somewhat inflexible, e.g. it is hard to modify color scales or #' plotting parameters. #' #' If you require more control over plotting, it is better to convert #' the object to a \code{data.frame} first and use the standard #' functions for plotting. #' #' @param x dimRedResult/dimRedData class, e.g. output of #' embedded/loadDataSet #' @param y Ignored #' @param type plot type, one of \code{c("pairs", "parallel", "2vars", #' "3vars", "3varsrgl")} #' @param col the columns of the meta slot to use for coloring, can be #' referenced as the column names or number of x@data #' @param vars the axes of the embedding to use for plotting #' @param ... handed over to the underlying plotting function. #' #' @examples #' scurve = loadDataSet("3D S Curve") #' plot(scurve, type = "pairs", main = "pairs plot of S curve") #' plot(scurve, type = "parpl") #' plot(scurve, type = "2vars", vars = c("y", "z")) #' plot(scurve, type = "3vars") #' #' @include mixColorSpaces.R #' @include dimRedData-class.R #' @importFrom graphics plot #' #' @aliases plot.dimRed #' @export setGeneric( "plot", function(x, y, ...) standardGeneric("plot"), useAsDefault = graphics::plot ) #' @describeIn plot Ploting of dimRedData objects #' @aliases plot.dimRedData #' @export setMethod( f = "plot", signature = c("dimRedData"), definition = function(x, type = "pairs", vars = seq_len(ncol(x@data)), col = seq_len(min(3, ncol(x@meta))), ...) { cols <- colorize(x@meta[, col, drop = FALSE]) switch( type, "pairs" = { chckpkg("graphics") graphics::pairs(x@data[, vars], col = cols, ... ) }, "parpl" = { chckpkg("MASS") MASS::parcoord(x@data[, vars], col = cols, ... ) }, "2vars" = { chckpkg("graphics") graphics::plot(x@data[, vars[1:2]], col = cols, ... ) }, "3vars" = { chckpkg("scatterplot3d") scatterplot3d::scatterplot3d(x@data[, vars[1:3]], color = cols, ...) }, "3varsrgl" = { chckpkg("rgl") rgl::plot3d(x@data[, vars[1:3]], col = cols, ... ) }, stop("wrong argument to plot.dimRedData") ) } ) #' @describeIn plot Ploting of dimRedResult objects. #' @aliases plot.dimRedResult #' @export setMethod( f = "plot", signature = c("dimRedResult"), definition = function (x, type = "pairs", vars = seq_len(ncol(x@data@data)), col = seq_len(min(3, ncol(x@data@meta))), ...) { plot(x = x@data, type = type, vars = vars, col = col, ...) } ) #' plot_R_NX #' #' Plot the R_NX curve for different embeddings. Takes a list of #' \code{\link{dimRedResult}} objects as input. #' Also the Area under the curve values are computed for logarithmic K #' (AUC_lnK) and appear in the legend. #' #' @param x a list of \code{\link{dimRedResult}} objects. The names of #' the list will appear in the legend with the AUC_lnK value. #' @return A ggplot object, the design can be changed by appending #' \code{theme(...)} #' #' @examples #' #' ## define which methods to apply #' embed_methods <- c("Isomap", "PCA") #' ## load test data set #' data_set <- loadDataSet("3D S Curve", n = 1000) #' ## apply dimensionality reduction #' data_emb <- lapply(embed_methods, function(x) embed(data_set, x)) #' names(data_emb) <- embed_methods #' ## plot the R_NX curves: #' plot_R_NX(data_emb) + #' ggplot2::theme(legend.title = ggplot2::element_blank(), #' legend.position = c(0.5, 0.1), #' legend.justification = c(0.5, 0.1)) #' #' @export plot_R_NX <- function(x) { chckpkg("ggplot2") chckpkg("tidyr") chckpkg("scales") lapply( x, function(x) if (!inherits(x, "dimRedResult")) stop("x must be a list and ", "all items must inherit from 'dimRedResult'") ) rnx <- lapply(x, R_NX) auc <- sapply(rnx, auc_lnK) df <- as.data.frame(rnx) names(df) <- names(x) df$K <- seq_len(nrow(df)) qnxgrid <- expand.grid(K = df$K, rnx = seq(0.1, 0.9, by = 0.1)) ## TODO: FIND OUT WHY THIS AS IN THE PUBLICATION BUT IS WRONG! qnxgrid$qnx <- rnx2qnx(qnxgrid$rnx, K = qnxgrid$K, N = nrow(df)) # qnxgrid$rnx_group <- factor(qnxgrid$rnx) df <- tidyr::gather_(df, key_col = "embedding", value_col = "R_NX", names(x)) ggplot2::ggplot(df) + ggplot2::geom_line(ggplot2::aes_string(y = "R_NX", x = "K", color = "embedding")) + ## TODO: find out if this is wrong: ## ggplot2::geom_line(data = qnxgrid, ## mapping = ggplot2::aes_string(x = "K", y = "qnx", ## group = "rnx_group"), ## linetype = 2, ## size = 0.1) + ggplot2::geom_line(data = qnxgrid, mapping = ggplot2::aes_string(x = "K", y = "rnx", group = "rnx_group"), linetype = 3, size = 0.1) + ggplot2::scale_x_log10( labels = scales::trans_format("log10", scales::math_format()), expand = c(0, 0) ) + ggplot2::scale_y_continuous(expression(R[NX]), limits = c(0, 1), expand = c(0, 0)) + ggplot2::annotation_logticks(sides = "b") + ggplot2::scale_color_discrete( breaks = names(x), labels = paste(format(auc, digits = 3), names(x))) + ggplot2::theme_classic() } dimRed/R/embed.R0000644000176200001440000001153113046704516013065 0ustar liggesusers#' dispatches the different methods for dimensionality reduction #' #' wraps around all dimensionality reduction functions. #' #' Method must be one of \code{dimRedMethodList()}, partial matching #' is performed. All parameters start with a dot, to avoid clashes #' with partial argument matching (see the R manual section 4.3.2), if #' there should ever occur any clashes in the arguments, call the #' function with all arguments named, e.g. \code{embed(.data = dat, #' .method = "mymethod", .d = "some parameter")}. #' #' @param .data object of class \code{dimRedData} #' @param .method character vector naming one of the dimensionality #' reduction techniques. #' @param .mute a character vector containing the elements you want to #' mute (\code{c("message", "output")}), defaults to #' \code{character(0)}. #' @param .keep.org.data TRUE/FALSE keep the original data. #' @param ... the pameters, internally passed as a list to the #' dimensionality reduction method as \code{pars = list(...)} #' @return an object of class \code{dimRedResult} #' #' @examples #' \dontrun{ #' embed_methods <- dimRedMethodList() #' quality_methods <- dimRedQualityList() #' dataset <- loadDataSet("Iris") #' #' quality_results <- matrix(NA, length(embed_methods), length(quality_methods), #' dimnames = list(embed_methods, quality_methods)) #' embedded_data <- list() #' #' for (e in embed_methods) { #' message("embedding: ", e) #' embedded_data[[e]] <- embed(dataset, e, .mute = c("message", "output")) #' for (q in quality_methods) { #' message(" quality: ", q) #' quality_results[e, q] <- tryCatch( #' quality(embedded_data[[e]], q), #' error = function(e) NA #' ) #' } #' } #' #' print(quality_results) #' } #' ## embed a data.frame using a formula: #' head(as.data.frame( #' embed(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, #' iris, "PCA") #' )) #' #' head(as.data.frame( #' embed(iris[, 1:4], "PCA") #' )) #' head(as.data.frame( #' embed(as.matrix(iris[, 1:4]), "PCA") #' )) #' @export setGeneric("embed", function(.data, ...) standardGeneric("embed"), valueClass = "dimRedResult") #' @describeIn embed embed a data.frame using a formula. #' @param .formula a formula, see \code{\link{as.dimRedData}}. #' @export setMethod( "embed", "formula", function(.formula, .data, .method = dimRedMethodList(), .mute = character(0), .keep.org.data = TRUE, ...) { if (!is.data.frame(.data)) stop(".data must be a data.frame") .data <- as.dimRedData(.formula, .data) embed(.data, .method, .mute, .keep.org.data, ...) } ) #' @describeIn embed Embed anything as long as it can be coerced to #' \code{dimRedData}. #' @export setMethod( "embed", "ANY", function(.data, .method = dimRedMethodList(), .mute = character(0), .keep.org.data = TRUE, ...) { embed(as(.data, "dimRedData"), .method, .mute, .keep.org.data, ...) } ) #' @describeIn embed Embed a dimRedData object #' @export setMethod( "embed", "dimRedData", function(.data, .method = dimRed::dimRedMethodList(), .mute = character(0), #c("message", "output"), .keep.org.data = TRUE, ...){ .method <- match.arg(.method) methodObject <- getMethodObject(.method) args <- list( data = as(.data, "dimRedData"), keep.org.data = .keep.org.data ) args$pars <- matchPars(methodObject, list(...)) devnull <- if (Sys.info()["sysname"] != "Windows") "/dev/null" else "NUL" if ("message" %in% .mute){ devnull1 <- file(devnull, "wt") sink(devnull1, type = "message") on.exit({ sink(file = NULL, type = "message") close(devnull1) }, add = TRUE) } if ("output" %in% .mute) { devnull2 <- file(devnull, "wt") sink(devnull2, type = "output") on.exit({ sink() close(devnull2) }, add = TRUE) } do.call(methodObject@fun, args) } ) getMethodObject <- function (method) { ## switch( ## method, ## graph_kk = kamada_kawai, ## graph_drl = drl, ## graph_fr = fruchterman_reingold, ## drr = drr, ## isomap = isomap, ## diffmap = diffmap, ## tsne = tsne, ## nmds = nmds, ## mds = mds, ## ica = fastica, ## pca = pca, ## lle = lle, ## loe = loe, ## soe = soe, ## leim = leim, ## kpca = kpca ## ) method <- match.arg(method, dimRedMethodList()) do.call(method, list()) } dimRed/R/dimRedResult-class.R0000644000176200001440000001425613042052030015505 0ustar liggesusers#' @include misc.R #' @include dimRedData-class.R NULL #' Class "dimRedResult" #' #' A class to hold the results of of a dimensionality reduction. #' #' @slot data Output data of class dimRedData. #' @slot org.data original data, a matrix. #' @slot apply a function to apply the method to out-of-sampledata, #' may not exist. #' @slot inverse a function to calculate the original coordinates from #' reduced space, may not exist. #' @slot has.org.data logical, if the original data is included in the object. #' @slot has.apply logical, if a forward method is exists. #' @slot has.inverse logical if an inverse method exists. #' @slot method saves the method used. #' @slot pars saves the parameters used. #' #' @examples #' ## Create object by embedding data #' iris.pca <- embed(loadDataSet("Iris"), "PCA") #' #' ## Convert the result to a data.frame #' head(as(iris.pca, "data.frame")) #' head(as.data.frame(iris.pca)) #' #' ## There are no nameclashes to avoid here: #' head(as.data.frame(iris.pca, #' org.data.prefix = "", #' meta.prefix = "", #' data.prefix = "")) #' #' ## Print it more or less nicely: #' print(iris.pca) #' #' ## Get the embedded data as a dimRedData object: #' getDimRedData(iris.pca) #' #' ## Get the original data including meta information: #' getOrgData(iris.pca) #' #' @family dimRedResult #' @export dimRedResult #' @exportClass dimRedResult dimRedResult <- setClass( "dimRedResult", slots = c( data = "dimRedData", org.data = "matrix", apply = "function", inverse = "function", has.org.data = "logical", has.apply = "logical", has.inverse = "logical", method = "character", pars = "list" ), prototype = list( data = new("dimRedData"), org.data = matrix(numeric(0), 0, 0), apply = function(x) NA, inverse = function(x) NA, has.org.data = FALSE, has.apply = FALSE, has.inverse = FALSE, method = "", pars = list() ) ) setAs( from = "dimRedResult", to = "data.frame", def = function(from){ if (from@has.org.data) { org.data <- from@org.data names(org.data) <- paste("org", names(org.data), sep = ".") cbind(as(from@data, "data.frame"), as.data.frame(org.data)) } else { as(from@data, "data.frame") } } ) #' @importFrom stats predict #' @export setGeneric( "predict", function(object, ...) standardGeneric("predict"), useAsDefault = stats::predict ) #' @describeIn dimRedResult apply a trained method to new data, does not work #' with all methods, will give an error if there is no \code{apply}. #' In some cases the apply function may only be an approximation. #' @param xnew new data, of type \code{\link{dimRedData}} #' #' @export setMethod(f = "predict", signature = "dimRedResult", definition = function(object, xnew) { if (object@has.apply) object@apply(xnew) else stop("object does not have an apply function") }) #' @export setGeneric( "inverse", function(object, ...) standardGeneric("inverse") ) #' @describeIn dimRedResult inverse transformation of embedded data, does not #' work with all methods, will give an error if there is no \code{inverse}. #' In some cases the apply function may only be an approximation. #' @param ynew embedded data, of type \code{\link{dimRedData}} #' #' @aliases inverse #' @export setMethod(f = "inverse", signature = c("dimRedResult"), definition = function(object, ynew) { if (object@has.inverse) object@inverse(ynew) else stop("object does not have an inverse function") }) #' @param x Of class \code{dimRedResult} #' @param org.data.prefix Prefix for the columns of the org.data slot. #' @param meta.prefix Prefix for the columns of \code{x@@data@@meta}. #' @param data.prefix Prefix for the columns of \code{x@@data@@data}. #' #' @describeIn dimRedResult convert to \code{data.frame} #' @export setMethod(f = "as.data.frame", signature = c("dimRedResult"), definition = function(x, org.data.prefix = "org.", meta.prefix = "meta.", data.prefix = "") { tmp <- list() if (nrow(x@data@meta) > 0){ tmp$meta <- as.data.frame(x@data@meta) names(tmp$meta) <- paste0(meta.prefix, colnames(x@data@meta)) } tmp$data <- as.data.frame(x@data@data) names(tmp$data) <- paste0(data.prefix, colnames(x@data@data)) if (x@has.org.data){ tmp$org.data <- as.data.frame(x@org.data) names(tmp$org.data) <- paste0(org.data.prefix, colnames(x@org.data)) } names(tmp) <- NULL data.frame(tmp, stringsAsFactors = FALSE) }) #' @param object Of class \code{dimRedResult} #' @describeIn dimRedResult Get the parameters with which the method #' was called. #' @export setMethod( f = "getPars", signature = "dimRedResult", definition = function (object) { object@pars } ) #' @describeIn dimRedResult Method for printing. #' @import utils #' @export setMethod( f = "print", signature = "dimRedResult", definition = function(x) { cat("Method:\n") cat(x@method, "\n") cat("Parameters:\n") utils::str(x@pars) } ) #' @describeIn dimRedResult Get the original data and meta.data #' @export setMethod( f = "getOrgData", signature = "dimRedResult", definition = function(object) { return(new("dimRedData", data = object@org.data, meta = object@data@meta)) } ) #' @describeIn dimRedResult Get the embedded data #' @export setMethod( f = "getDimRedData", signature = "dimRedResult", definition = function(object) { return(object@data) } ) #' @describeIn dimRedResult Extract the number of embedding dimensions. #' #' @examples #' ## Get the number of variables: #' ndims(iris.pca) #' #' @export setMethod( "ndims", "dimRedResult", function(object) ncol(object@data@data) ) dimRed/R/fastica.R0000644000176200001440000000655213065033245013425 0ustar liggesusers#' Independent Component Analysis #' #' An S4 Class implementing the FastICA algorithm for Indepentend #' Component Analysis. #' #' ICA is used for blind signal separation of different sources. It is #' a linear Projection. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' FastICA can take the following parameters: #' \describe{ #' \item{ndim}{The number of output dimensions. Defaults to \code{2}} #' } #' #' @section Implementation: #' Wraps around \code{\link[fastICA]{fastICA}}. FastICA uses a very #' fast approximation for negentropy to estimate statistical #' independences between signals. Because it is a simple #' rotation/projection, forward and backward functions can be given. #' #' #' @examples #' dat <- loadDataSet("3D S Curve") #' #' ## use the S4 Class directly: #' fastica <- FastICA() #' emb <- fastica@fun(dat, pars = list(ndim = 2)) #' #' ## simpler, use embed(): #' emb2 <- embed(dat, "FastICA", ndim = 2) #' #' #' plot(emb@data@data) #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export FastICA #' @exportClass FastICA FastICA <- setClass( "FastICA", contains = "dimRedMethod", prototype = list( stdpars = list(ndim = 2), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("fastICA") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL orgdata.colmeans <- colMeans(orgdata) indata <- data@data res <- fastICA::fastICA(indata, n.comp = pars$ndim, method = "C") outdata <- res$S colnames(outdata) <- paste0("ICA", 1:ncol(outdata)) appl <- function(x){ appl.meta <- if (inherits(x, "dimRedData")) x@meta else matrix(numeric(0), 0, 0) proj <- if (inherits(x, "dimRedData")) x@data else x out <- scale(proj, center = orgdata.colmeans, scale = FALSE) %*% res$K %*% res$W colnames(out) <- paste0("ICA", 1:ncol(out)) return(new("dimRedData", data = out, meta = appl.meta)) } inv <- function(x){ appl.meta <- if (inherits(x, "dimRedData")) x@meta else matrix(numeric(0), 0, 0) proj <- if (inherits(x, "dimRedData")) x@data else x out <- scale(proj %*% res$A[1:ncol(proj), ], center = -orgdata.colmeans, scale = FALSE) reproj <- new("dimRedData", data = out, meta = appl.meta) return(reproj) } return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, has.org.data = keep.org.data, apply = appl, inverse = inv, has.apply = TRUE, has.inverse = TRUE, method = "FastICA", pars = pars )) }) ) dimRed/R/mds.R0000644000176200001440000001105413040077042012563 0ustar liggesusers#' Metric Dimensional Scaling #' #' An S4 Class implementing classical scaling (MDS). #' #' MDS tries to maintain distances in high- and low-dimensional space, #' it has the advantage over PCA that arbitrary distance functions can #' be used, but it is computationally more demanding. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' MDS can take the following parameters: #' \describe{ #' \item{ndim}{The number of dimensions.} #' \item{d}{The function to calculate the distance matrix from the input coordinates, defaults to euclidean distances.} #' } #' #' @section Implementation: #' #' Wraps around \code{\link[stats]{cmdscale}}. The implementation also #' provides an out-of-sample extension which is not completely #' optimized yet. #' #' @examples #' \dontrun{ #' dat <- loadDataSet("3D S Curve") #' #' ## Use the S4 Class directly: #' mds <- MDS() #' emb <- mds@fun(dat, mds@stdpars) #' #' ## use embed(): #' emb2 <- embed(dat, "MDS", d = function(x) exp(stats::dist(x))) #' #' #' plot(emb, type = "2vars") #' plot(emb2, type = "2vars") #' } #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export MDS #' @exportClass MDS MDS <- setClass( "MDS", contains = "dimRedMethod", prototype = list( stdpars = list(d = stats::dist, ndim = 2), fun = function (data, pars, keep.org.data = TRUE) { ## meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data ## there are only efficient implementations for euclidean ## distances: extra efficient implementation for euclidean ## distances are possible, D is quared several times, it would be ## much faster to compute the squared distance right away. has.apply <- identical(all.equal(pars$d, dist), TRUE) # == TRUE # necessary, # because # all.equal # returns # TRUE or an # error # string!!!! D <- as.matrix(pars$d(indata)) if (has.apply) mD2 <- mean(D ^ 2) ## cmdscale square the matrix internally res <- stats::cmdscale(D, k = pars$ndim) outdata <- res D <- NULL ## Untested: remove that from environment before creating ## appl function, else it will stay in its environment ## forever appl <- if (!has.apply) function(x) NA else function(x) { appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() proj <- if (inherits(x, "dimRedData")) x@data else x ## double center new data with respect to old: TODO: optimize ## this method, according to the de Silva, Tenenbaum(2004) ## paper. Need an efficient method to calculate the distance ## matrices between different point sets and arbitrary ## distances. Kab <- as.matrix(pars$d(proj) ^ 2) Exa <- colMeans(pdist2(indata, proj)) Kab <- sweep(Kab, 1, Exa) #, "-") Kab <- sweep(Kab, 2, Exa) #, "-") Kab <- -0.5 * (Kab + mD2) ## Eigenvalue decomposition tmp <- eigen(Kab, symmetric = TRUE) ev <- tmp$values[seq_len(pars$ndim)] evec <- tmp$vectors[, seq_len(pars$ndim), drop = FALSE] k1 <- sum(ev > 0) if (k1 < pars$ndim) { warning(gettextf("only %d of the first %d eigenvalues are > 0", k1, k), domain = NA) evec <- evec[, ev > 0, drop = FALSE] ev <- ev[ev > 0] } points <- evec * rep(sqrt(ev), each = nrow(proj)) dimnames(points) <- list(NULL, paste0("MDS", seq_len(ncol(points)))) new("dimRedData", data = points, meta = appl.meta) } colnames(outdata) <- paste0("MDS", seq_len(ncol(outdata))) return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, apply = appl, has.org.data = keep.org.data, has.apply = has.apply, method = "mds", pars = pars )) }) ) dimRed/R/rotate.R0000644000176200001440000001670413032675517013321 0ustar liggesusers ## rotate X in such a way that the values of Y have maximum squared ## correlation with the dimensions specified in axes. We optimize ## axes[1] first, then axes[2] without axes[1], ... ## we maximize the squared correlations of the original variables ## with the axis of the embeding and the final result is the sum_{axes} sum(squared(correlation(variables, axis))) setGeneric( "maximize_correlation", function(object, ...) standardGeneric("maximize_correlation"), valueClass = "dimRedResult" ) #' Maximize Correlation with the Axes #' #' Rotates the data in such a way that the correlation with the first #' \code{naxes} axes is maximized. #' #' Methods that do not use eigenvector decomposition, like t-SNE often #' do not align the data with axes according to the correlation of #' variables with the data. \code{maximize_correlation} uses the #' \code{\link[optimx]{optimx}} package to rotate the data in such a #' way that the original variables have maximum correlation with the #' embedding axes. #' #' @param object A dimRedResult object #' @param naxes the number of axes to optimize for. #' @param cor_method which correlation method to use #' #' @aliases maximize_correlation #' @export setMethod( "maximize_correlation", "dimRedResult", function(object, naxes = ncol(object@data@data), cor_method = "pearson"){ ## if (missing(naxes)) naxes <- ncol(object@data@data) ## if (missing(cor_method)) cor_method <- "pearson" if (!object@has.org.data) stop("object requires original data") if (length(naxes) != 1 || naxes < 1 || naxes > ncol(object@data@data)) stop("naxes must specify the numbers of axes to optimize for, ", "i.e. a single integer between 1 and ncol(object@data@data)") ## try to partially match cor_method: cor_method <- cor_method[pmatch(cor_method, c("pearson", "kendall", "spearman"))] if (is.na(cor_method)) stop("cor_method must match one of ", "'pearson', 'kendall', or 'spearman', ", "at least partially.") mcres <- .maximize_correlation(object@data@data, object@org.data, 1:naxes, cor_method) res <- object res@data@data <- mcres$rotated return(res) } ) .maximize_correlation <- function(X, Y, axes = 1:ncol(X), cor_method = "pearson"){ if (nrow(X) != nrow(Y)) stop("'X' and 'Y' must have the same number of rows") if (max(axes) > ncol(X)){ axes <- axes[ axes <= ncol(X) ] warning("'max(axes)' must be <= 'ncol(X)', removing some axes") } chckpkg("optimx") xndim <- ncol(X) without_axes <- integer(0) res <- list() for (axis in axes){ without_axes <- c(without_axes, axis) nplanes <- xndim - length(without_axes) planes <- matrix(NA, 2, nplanes) planes[1, ] <- axis planes[2, ] <- (1:xndim)[-without_axes] if (ncol(planes) == 0) break o <- optimx::optimx( par = rep(0, nplanes), fn = obj, ## method = c("Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "nlm", ## "nlminb", "spg", "ucminf", "newuoa", "bobyqa", "nmkb", ## "hjkb", "Rcgmin", "Rvmmin"), lower = 0, upper = 2 * pi, control = list(all.methods = T), X = as.matrix(X), Y = as.matrix(Y), axis = axis, without_axes = without_axes, cor_method = cor_method ) best_idx <- which.min(o$value) if (length(best_idx) == 0) best_idx <- NA res_idx <- length(res) + 1 res[[res_idx]] <- list() res[[res_idx]]$axis <- axis res[[res_idx]]$without_axes <- without_axes res[[res_idx]]$angs <- unname( unlist(o[best_idx, 1:nplanes]) ) res[[res_idx]]$planes <- planes res[[res_idx]]$X <- rotate(res[[res_idx]]$angs, planes, X) ## this is the mean squared correlation of the original variables ## with "axis", see return value of "obj": res[[res_idx]]$cor <- -o$value[best_idx] } ## calculate the correlation for axes nres <- length(res) if (nres > 0) { ## the result is the sum of the mean squared correlations of the ## original variables with the axes. "res[[i]]$cor" contains the ## mean squared correlation of the variables with axis "i" res$result <- 0 for (i in 1:nres) res$result <- res$result + res[[i]]$cor ^ 2 ## res$result <- res$result / length(res) ## rotate the input to maximize correlations res$rotated <- X for (i in 1:nres) res$rotated <- rotate(res[[i]]$angs, res[[i]]$planes, res$rotated) } else { ## if we only had one dimension, simply return the means squared ## correlation and don't rotate res$result <- sum(correlate(X, Y, cor_method) ^ 2) res$rotated <- X } res } #### helper functions for rotation ## we create a number or rotation matrices around the 2d planes ## spanned by the orthonormal matrices, multiply them for a general ## rotation which is then applied to the data X rotate <- function (angs, planes, X) { ndim <- ncol(X) nplanes <- ncol(planes) if (length(angs) != nplanes) stop("length(angs) not equal to chose(ndim, 2)") ## loop over the planes to construct general rotation matrix rotmat <- diag(ndim) for (p in 1:nplanes) { ## 2d rotation ## possible optimization: create large rotation matrix ## directly and insert values linearly without a for loop rotmat2d <- matrix( c(cos(angs[p]), -sin(angs[p]), sin(angs[p]), cos(angs[p])), 2, 2, byrow = T ) p_rotmat <- diag(ndim) for (i in 1:2) for (j in 1:2) p_rotmat[ planes[i, p], planes[j, p] ] <- rotmat2d[i, j] rotmat <- rotmat %*% p_rotmat } t(rotmat %*% t(X)) } get_planes <- function(ndims, axis, without_axes){ nplanes <- ndims - length(without_axes) planes <- matrix(NA, 2, nplanes) planes[1, ] <- axis planes[2, ] <- (1:ndims)[c(-axis, -without_axes)] planes } obj <- function(alpha, X, Y, axis, without_axes, cor_method = "pearson"){ ## correlation with first axis xndim <- ncol(X) planes <- get_planes(xndim, axis, without_axes) X2 <- rotate(alpha, planes, X) ## cor(x, y) returns a matrix with the correlations between the ## columns of x = X2 (rows) and the columns of y = Y (columns) we ## want the mean of squared correlations of all variables original ## variables with the first axis, i.e. we require the relevant ## (axis) column of the resulting matrix. ## Possible optimization: use only the relevant column of Y -mean(correlate( X2, Y, #use = "pairwise.complete.obs", method = cor_method )[axis, ] ^ 2) } correlate <- function (x, y, method, ...) { if (method != "kendall"){ return(stats::cor(x, y, method = method, ...)) } else { chckpkg("pcaPP") ## make the cor.fk method behave like cor for matrices: if (is.matrix(x) && is.matrix(y)) { res <- matrix( NA, nrow = ncol(x), ncol = ncol(y), dimnames = list(colnames(x), colnames(y)) ) for (i in 1:ncol(x)) { for (j in 1:ncol(y)){ res[i, j] <- pcaPP::cor.fk(x[, i], y[, j]) } } return(res) } else if (is.null(dim(x)) && is.null(dim(y))){ return(pcaPP::cor.fk(x, y)) } else { stop("something is wrong with the input of 'correlate()'") } } } dimRed/R/tsne.R0000644000176200001440000000556413040077066012770 0ustar liggesusers#' t-Distributed Stochastic Neighborhood Embedding #' #' An S4 Class for t-SNE. #' #' t-SNE is a method that uses Kullback-Leibler divergence between the #' distance matrices in high and low-dimensional space to embed the #' data. The method is very well suited to visualize complex #' structures in low dimensions. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' t-SNE can take the following parameters: #' \describe{ #' \item{d}{A distance function, defaults to euclidean distances} #' \item{perplexity}{The perplexity parameter, roughly equivalent to neighborhood size.} #' \item{theta}{Approximation for the nearest neighbour search, large values are more inaccurate.} #' \item{ndim}{The number of embedding dimensions.} #' } #' #' @section Implementation: #' #' Wraps around \code{\link[Rtsne]{Rtsne}}, which is very well #' documented. Setting \code{theta = 0} does a normal t-SNE, larger #' values for \code{theta < 1} use the Barnes-Hut algorithm which #' scales much nicer with data size. Larger values for perplexity take #' larger neighborhoods into account. #' #' @references #' Maaten, L. van der, 2014. Accelerating t-SNE using Tree-Based #' Algorithms. Journal of Machine Learning Research 15, 3221-3245. #' #' van der Maaten, L., Hinton, G., 2008. Visualizing Data using #' t-SNE. J. Mach. Learn. Res. 9, 2579-2605. #' #' @examples #' \dontrun{ #' dat <- loadDataSet("3D S Curve", n = 500) #' #' ## using the S4 class directly: #' tsne <- tSNE() #' emb <- tsne@fun(dat, tsne@stdpars) #' #' ## using embed() #' emb2 <- embed(dat, "tSNE", perplexity = 80) #' #' plot(emb, type = "2vars") #' plot(emb2, type = "2vars") #' } #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export tSNE #' @exportClass tSNE tSNE <- setClass( "tSNE", contains = "dimRedMethod", prototype = list( stdpars = list(d = stats::dist, perplexity = 30, theta = 0.5, ndim = 2), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("Rtsne") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data outdata <- Rtsne::Rtsne(pars$d(indata), perplexity = pars$perplexity, theta = pars$theta, dims = pars$ndim)$Y colnames(outdata) <- paste0("tSNE", 1:ncol(outdata)) return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, has.org.data = keep.org.data, method = "tsne", pars = pars )) }) ) dimRed/R/diffmap.R0000644000176200001440000001101013037665163013413 0ustar liggesusers#' Diffusion Maps #' #' An S4 Class implementing Diffusion Maps #' #' Diffusion Maps uses a diffusion probability matrix to robustly #' approximate a manifold. #' #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' Diffusion Maps can take the following parameters: #' \describe{ #' \item{d}{a function transforming a matrix row wise into a #' distance matrix or \code{dist} object, #' e.g. \code{\link[stats]{dist}}.} #' \item{ndim}{The number of dimensions} #' \item{eps}{The epsilon parameter that determines the #' diffusion weight matrix from a distance matrix \code{d}, #' \eqn{exp(-d^2/eps)}, if set to \code{"auto"} it will #' be set to the median distance to the 0.01*n nearest #' neighbor.} #' \item{t}{Time-scale parameter. The recommended value, 0, #' uses multiscale geometry.} #' \item{delta}{Sparsity cut-off for the symmetric graph Laplacian, #' a higher value results in more sparsity and faster calculation. #' The predefined value is 10^-5.} #' } #' #' @section Implementation: #' Wraps around \code{\link[diffusionMap]{diffuse}}, see there for #' details. It uses the notation of Richards et al. (2009) which is #' slightly different from the one in the original paper (Coifman and #' Lafon, 2006) and there is no \eqn{\alpha} parameter. #' There is also an out-of-sample extension, see examples. #' #' #' @references #' Richards, J.W., Freeman, P.E., Lee, A.B., Schafer, #' C.M., 2009. Exploiting Low-Dimensional Structure in #' Astronomical Spectra. ApJ 691, #' 32. doi:10.1088/0004-637X/691/1/32 #' #' Coifman, R.R., Lafon, S., 2006. Diffusion maps. Applied and #' Computational Harmonic Analysis 21, #' 5-30. doi:10.1016/j.acha.2006.04.006 #' #' @examples #' dat <- loadDataSet("3D S Curve") #' #' ## use the S4 Class directly: #' diffmap <- DiffusionMaps() #' emb <- diffmap@fun(dat, diffmap@stdpars) #' #' ## simpler, use embed(): #' emb2 <- embed(dat, "DiffusionMaps") #' #' plot(emb, type = "2vars") #' #' samp <- sample(floor(nrow(dat) / 10)) #' embsamp <- diffmap@fun(dat[samp], diffmap@stdpars) #' embother <- embsamp@apply(dat[-samp]) #' plot(embsamp, type = "2vars") #' points(embother) #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export DiffusionMaps #' @exportClass DiffusionMaps DiffusionMaps <- setClass( "DiffusionMaps", contains = "dimRedMethod", prototype = list( stdpars = list(d = stats::dist, ndim = 2, eps = "auto", t = 0, delta = 1e-5), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("diffusionMap") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data distmat <- pars$d(indata) if (pars$eps == "auto") pars$eps <- diffusionMap::epsilonCompute(distmat) diffres <- diffusionMap::diffuse( D = distmat, t = pars$t, eps.val = pars$eps, neigen = pars$ndim, maxdim = pars$ndim, delta = pars$delta ) outdata <- diffres$X appl <- function(x) { appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() proj <- if (inherits(x, "dimRedData")) x@data else x if (ncol(proj) != ncol(data@data)) stop("x must have the same number of dimensions ", "as the original data") dd <- sqrt(pdist2(proj, indata)) appl.res <- diffusionMap::nystrom(diffres, dd, sigma = diffres$epsilon) dimnames(appl.res) <- list( rownames(x), paste0("diffMap", seq_len(ncol(outdata))) ) return(appl.res) } colnames(outdata) <- paste0("diffMap", seq_len(ncol(outdata))) return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, apply = appl, has.apply = TRUE, has.org.data = keep.org.data, method = "diffmap", pars = pars )) }) ) dimRed/R/drr.R0000644000176200001440000001450413040076314012573 0ustar liggesusers#' Dimensionality Reduction via Regression #' #' An S4 Class implementing Dimensionality Reduction via Regression (DRR). #' #' DRR is a non-linear extension of PCA that uses Kernel Ridge regression. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' DRR can take the following parameters: #' \describe{ #' \item{ndim}{The number of dimensions} #' \item{lambda}{The regularization parameter for the ridge #' regression.} #' \item{kernel}{The kernel to use for KRR, defaults to #' \code{"rbfdot"}.} #' \item{kernel.pars}{A list with kernel parameters, elements depend #' on the kernel used, \code{"rbfdot"} uses \code{"sigma"}.} #' \item{pca}{logical, should an initial pca step be performed, #' defaults to \code{TRUE}.} #' \item{pca.center}{logical, should the data be centered before the #' pca step. Defaults to \code{TRUE}.} #' \item{pca.scale}{logical, should the data be scaled before the #' pca ste. Defaults to \code{FALSE}.} #' \item{fastcv}{logical, should \code{\link[CVST]{fastCV}} from the #' CVST package be used instead of normal cross-validation.} #' \item{fastcv.test}{If \code{fastcv = TRUE}, separate test data set for fastcv.} #' \item{cv.folds}{if \code{fastcv = FALSE}, specifies the number of #' folds for crossvalidation.} #' \item{fastkrr.nblocks}{integer, higher values sacrifice numerical #' accuracy for speed and less memory, see below for details.} #' \item{verbose}{logical, should the cross-validation results be #' printed out.} #' } #' #' @section Implementation: #' Wraps around \code{\link[DRR]{drr}}, see there for details. DRR is #' a non-linear extension of principal components analysis using Kernel #' Ridge Regression (KRR, details see \code{\link[CVST]{constructKRRLearner}} #' and \code{\link[DRR]{constructFastKRRLearner}}). Non-linear #' regression is used to explain more variance than PCA. DRR provides #' an out-of-sample extension and a backward projection. #' #' The most expensive computations are matrix inversions therefore the #' implementation profits a lot from a multithreaded BLAS library. #' The best parameters for each KRR are determined by cross-validaton #' over all parameter combinations of \code{lambda} and #' \code{kernel.pars}, using less parameter values will speed up #' computation time. Calculation of KRR can be accelerated by #' increasing \code{fastkrr.nblocks}, it should be smaller than #' n^{1/3} up to sacrificing some accuracy, for details see #' \code{\link[DRR]{constructFastKRRLearner}}. Another way to speed up #' is to use \code{pars$fastcv = TRUE} which might provide a more #' efficient way to search the parameter space but may also miss the #' global maximum, I have not ran tests on the accuracy of this method. #' #' #' #' @references #' Laparra, V., Malo, J., Camps-Valls, G., #' 2015. Dimensionality Reduction via Regression in Hyperspectral #' Imagery. IEEE Journal of Selected Topics in Signal Processing #' 9, 1026-1036. doi:10.1109/JSTSP.2015.2417833 #' #' @examples #' \dontrun{ #' dat <- loadDataSet("variable Noise Helix", n = 200)[sample(200)] #' #' ## use the S4 Class directly: #' drr <- DRR() #' pars <- drr@stdpars #' pars$ndim <- 3 #' emb <- drr@fun(dat, pars) #' #' ## simpler, use embed(): #' emb2 <- embed(dat, "DRR", ndim = 3) #' #' #' plot(dat, type = "3vars") #' plot(emb, type = "3vars") #' plot(emb@inverse(emb@data@data[, 1, drop = FALSE]), type = "3vars") #' } #' #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @import DRR #' @family dimensionality reduction methods #' @export DRR #' @exportClass DRR DRR <- setClass( "DRR", contains = "dimRedMethod", prototype = list( stdpars = list(ndim = 2, lambda = c(0, 10 ^ (-3:2)), kernel = "rbfdot", kernel.pars = list(sigma = 10 ^ (-3:4)), pca = TRUE, pca.center = TRUE, pca.scale = FALSE, fastcv = FALSE, cv.folds = 5, fastcv.test = NULL, fastkrr.nblocks = 4, verbose = TRUE), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("DRR") chckpkg("kernlab") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data res <- do.call(DRR::drr, c(list(X = indata), pars)) outdata <- res$fitted.data colnames(outdata) <- paste0("DRR", 1:ncol(outdata)) appl <- function(x){ appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() proj <- if (inherits(x, "dimRedData")) x@data else x if (ncol(proj) != ncol(data@data)) stop("x must have the same number of dimensions ", "as the original data") appl.out <- new("dimRedData", data = res$apply(proj), meta = appl.meta) dimnames(appl.out@data) <- list( rownames(x), paste0("DRR", seq_len(ncol(appl.out@data))) ) return(appl.out) } inv <- function(x) { appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() proj <- if (inherits(x, "dimRedData")) x@data else x if (ncol(proj) > ncol(data@data)) stop("x must have less or equal number of dimensions ", "as the original data") inv.out <- new("dimRedData", data = res$inverse(proj), meta = appl.meta) dimnames(inv.out@data) <- list(rownames(proj), colnames(data@data)) return(inv.out) } return( new("dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, apply = appl, inverse = inv, has.org.data = keep.org.data, has.apply = TRUE, has.inverse = TRUE, method = "drr", pars = pars ) ) }) ) dimRed/R/kpca.R0000644000176200001440000000731413042054106012720 0ustar liggesusers#' Kernel PCA #' #' An S4 Class implementing Kernel PCA #' #' Kernel PCA is a nonlinear extension of PCA using kernel methods. #' #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' Kernel PCA can take the following parameters: #' \describe{ #' \item{ndim}{the number of output dimensions, defaults to 2} #' \item{kernel}{The kernel function, either as a function or a #' character vector with the name of the kernel. Defaults to #' \code{"rbfdot"}} #' \item{kpar}{A list with the parameters for the kernel function} #' } #' #' @section Implementation: #' #' Wraps around \code{\link[kernlab]{kpca}}, but provides additionally #' forward and backward projections. #' #' @examples #' \dontrun{ #' dat <- loadDataSet("3D S Curve") #' #' ## use the S4 class directly: #' kpca <- kPCA() #' emb <- kpca@fun(dat, kpca@stdpars) #' #' ## simpler, use embed(): #' emb2 <- embed(dat, "kPCA") #' #' plot(emb, type = "2vars") #' } #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export kPCA #' @exportClass kPCA kPCA <- setClass( "kPCA", contains = "dimRedMethod", prototype = list( stdpars = list(kernel = "rbfdot", kpar = list(sigma = 0.1), ndim = 2), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("kernlab") if (is.null(pars$ndim)) pars$ndim <- 2 meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data res <- do.call(kernlab::kpca, c(list(x = indata), pars)) kernel <- get_kernel_fun(pars$kernel, pars$kpar) # for the inverse: K_rev <- kernlab::kernelMatrix(kernel, res@rotated) diag(K_rev) <- 0.1 + diag(K_rev) dual_coef <- solve(K_rev, indata) appl <- function (x) { appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() proj <- if (inherits(x, "dimRedData")) x@data else x proj <- kernlab::predict(res, proj)[, 1:pars$ndim, drop = FALSE] colnames(proj) <- paste0("kPCA", 1:ncol(proj)) new("dimRedData", data = proj, meta = appl.meta) } inv <- function (x) { appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() proj <- if (inherits(x, "dimRedData")) x@data else x resrot <- res@rotated[, 1:ncol(proj)] rot <- kernlab::kernelMatrix(kernel, proj, resrot) proj <- rot %*% dual_coef new("dimRedData", data = proj, meta = appl.meta) } outdata <- res@rotated[, 1:pars$ndim, drop = FALSE] colnames(outdata) <- paste0("kPCA", 1:ncol(outdata)) return( new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, apply = appl, inverse = inv, has.org.data = keep.org.data, has.apply = TRUE, has.inverse = TRUE, method = "kpca", pars = pars ) ) }) ) ## get the kernel function out of the kernlab namespace: get_kernel_fun <- function (kernel, pars) { if (!is(kernel, "kernel")) { if (is(kernel, "function")) { kernel <- deparse(substitute(kernel)) } else { kernel <- get(kernel, asNamespace("kernlab")) } kernel <- do.call(kernel, pars) } return(kernel) } dimRed/R/dimRedMethod-class.R0000644000176200001440000000647513024300420015452 0ustar liggesusers#' Class "dimRedMethod" #' #' A virtual class "dimRedMethod" to serve as a template to implement #' methods for dimensionality reduction. #' #' Implementations of dimensionality reductions should inherit from #' this class. #' #' The \code{fun} slot should be a function that takes three arguments #' \describe{ #' \item{data}{An object of class \code{\link{dimRedData}}.} #' \item{pars}{A list with the standard parameters.} #' \item{keep.org.data}{Logical. If the original data should be kept in the output.} #' } #' and returns an object of class \code{\link{dimRedResult}}. #' #' The \code{stdpars} slot should take a list that contains standard #' parameters for the implemented methods. #' #' This way the method can be called by \code{embed(data, #' "method-name", ...)}, where \code{...} can be used to to change #' single parameters. #' #' #' @slot fun A function that does the embedding. #' @slot stdpars A list with the default parameters for the \code{fun} #' slot. #' #' @family dimensionality reduction methods #' @seealso \link{dimRedMethodList} #' @export setClass("dimRedMethod", contains = "VIRTUAL", slots = c(fun = "function", stdpars = "list")) #' dimRedMethodList #' #' Get the names of all methods for dimensionality reduction. #' #' Returns the name of all classes that inherit from #' \code{\link{dimRedMethod-class}} to use with \code{\link{embed}}. #' #' @return a character vector with the names of classes that inherit #' from \code{dimRedMethod}. #' #' @examples #' dimRedMethodList() #' #' @export dimRedMethodList <- function () { ## return(c( ## "graph_kk", ## "graph_drl", ## "graph_fr", ## "drr", ## "isomap", ## "diffmap", ## "tsne", ## "nmds", ## "mds", ## "ica", ## "pca", ## "lle", ## ## those two methods are buggy and can produce segfaults: ## ## "loe", "soe", ## "leim", ## "kpca" ## )) names(completeClassDefinition("dimRedMethod", doExtends = FALSE)@subclasses) } # to put standard values for omitted arguments setGeneric("matchPars", function(object, pars) standardGeneric("matchPars"), valueClass = c("list")) setMethod("matchPars", signature(object = "dimRedMethod", pars = "list"), definition = function(object, pars) { nsp <- names(object@stdpars) ncp <- names(pars) nap <- union(nsp, ncp) res <- list() ## exists can deal with elements being NULL ## to assign list@el <- NULL do: ## list["el"] <- list(NULL) for (np in nap) { miss.std <- !exists(np, where = object@stdpars) miss.par <- !exists(np, where = pars) if (miss.std) { warning("Parameter matching: ", np, " is not a standard parameter, ignoring.") } else if (miss.par) { res[np] <- object@stdpars[np] } else { res[np] <- pars[np] } } ## if the method does not accept parameters we have to return ## null, so in embed there is no args$par created. and passed by ## do.call in the embed() function. if (length(res) != 0) ## return(res) else return(NULL) ## first try without the above, all methods should have a pars ## argument. return(res) }) dimRed/R/nmds.R0000644000176200001440000000352113024273620012743 0ustar liggesusers#' Non-Metric Dimensional Scaling #' #' An S4 Class implementing Non-Metric Dimensional Scaling. #' #' A non-linear extension of MDS using monotonic regression #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' nMDS can take the following parameters: #' \describe{ #' \item{d}{A distance function.} #' \item{ndim}{The number of embedding dimensions.} #' } #' #' @section Implementation: #' Wraps around the #' \code{\link[vegan]{monoMDS}}. For parameters that are not #' available here, the standard configuration is used. #' #' @examples #' dat <- loadDataSet("3D S Curve", n = 1000) #' #' ## using the S4 classes: #' nmds <- nMDS() #' emb <- nmds@fun(dat, nmds@stdpars) #' #' #' ## using embed() #' emb2 <- embed(dat, "nMDS", d = function(x) exp(dist(x))) #' #' #' plot(emb, type = "2vars") #' plot(emb2, type = "2vars") #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export nMDS #' @exportClass nMDS nMDS <- setClass( "nMDS", contains = "dimRedMethod", prototype = list( stdpars = list(d = stats::dist, ndim = 2), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("vegan") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data outdata <- vegan::monoMDS(pars$d(indata), k = pars$ndim)$points colnames(outdata) <- paste0("NMDS", 1:ncol(outdata)) return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, has.org.data = keep.org.data, method = "nmds", pars = pars )) }) ) dimRed/R/lle.R0000644000176200001440000000435613024273620012565 0ustar liggesusers#' Locally Linear Embedding #' #' An S4 Class implementing Locally Linear Embedding (LLE) #' #' LLE approximates the points in the manifold by linear combination #' of its neighbors. These linear combinations are the same inside the #' manifold and in highdimensional space. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' LLE can take the following parameters: #' \describe{ #' \item{knn}{the number of neighbors for the knn graph., defaults to 50.} #' \item{ndim}{the number of embedding dimensions, defaults to 2.} #' } #' #' @section Implementation: #' Wraps around \code{\link[lle]{lle}}, only #' exposes the parameters \code{k} and \code{m}. #' #' @references #' Roweis, S.T., Saul, L.K., 2000. Nonlinear Dimensionality Reduction #' by Locally Linear Embedding. Science 290, #' 2323-2326. doi:10.1126/science.290.5500.2323 #' #' @examples #' dat <- loadDataSet("3D S Curve", n = 500) #' #' ## directy use the S4 class: #' lle <- LLE() #' emb <- lle@fun(dat, lle@stdpars) #' #' ## using embed(): #' emb2 <- embed(dat, "LLE", knn = 45) #' #' plot(emb, type = "2vars") #' plot(emb2, type = "2vars") #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export LLE #' @exportClass LLE LLE <- setClass( "LLE", contains = "dimRedMethod", prototype = list( stdpars = list(knn = 50, ndim = 2), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("lle") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data outdata <- lle::lle(indata, k = pars$knn, m = pars$ndim)$Y if (is.null(dim(outdata))) { dim(outdata) <- c(length(outdata), 1) } colnames(outdata) <- paste0("LLE", 1:ncol(outdata)) return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, has.org.data = keep.org.data, method = "lle", pars = pars )) }) ) dimRed/R/soe.R0000644000176200001440000000264213024273620012573 0ustar liggesusers## #' Soft Ordinal Embedding ## #' ## #' Instance of \code{\link{dimRedMethod}} for Soft Ordinal Embedding. ## #' ## #' For details see \code{\link[loe]{SOE}}. ## #' ## #' ## #' @examples ## #' dat <- loadDataSet("3D S Curve", n = 50) ## #' soe <- SOE() ## #' emb <- soe@fun(dat, soe@stdpars) ## #' ## #' ## #' plot(emb@data@data) ## #' ## #' ## #' @include dimRedResult-class.R ## #' @include dimRedMethod-class.R ## #' @export ## SOE <- setClass( ## "SOE", ## contains = "dimRedMethod", ## prototype = list( ## stdpars = list(d = stats::dist, knn = 50, ndim = 2), ## fun = function (data, ## pars, ## keep.org.data = TRUE) { ## chckpkg("loe") ## meta <- data@meta ## orgdata <- if (keep.org.data) data@data else NULL ## indata <- data@data ## outdata <- loe::SOE(loe::get.order(as.matrix(pars$d(indata))), ## N = nrow(indata), p = pars$ndim)$X ## colnames(outdata) <- paste0("SOE", 1:ncol(outdata)) ## return(new( ## "dimRedResult", ## data = new("dimRedData", ## data = outdata, ## meta = meta), ## org.data = orgdata, ## has.org.data = keep.org.data, ## method = "soe", ## pars = pars ## )) ## }) ## ) dimRed/R/misc.R0000644000176200001440000002066413037630230012742 0ustar liggesusers## if (!isClassUnion("missingORnumeric")) setClassUnion("missingORnumeric", c("numeric", "missing")) ## if (!isClassUnion("missingORcharacter")) setClassUnion("missingORcharacter", c("character", "missing")) ## if (!isClassUnion("missingORlogical")) setClassUnion("missingORlogical", c("logical", "missing")) ## if (!isClassUnion("missingORfunction")) setClassUnion("missingORfunction", c("function", "missing")) # Squared euclidean distance between points in A and B # taken from http://blog.felixriedel.com/2013/05/pairwise-distances-in-r/ pdist2 <- function (A, B) { an <- rowSums(A ^ 2) # apply(A, 1, function(rvec) crossprod(rvec, rvec)) bn <- rowSums(B ^ 2) # apply(B, 1, function(rvec) crossprod(rvec, rvec)) m <- nrow(A) n <- nrow(B) matrix(rep(an, n), nrow = m) + matrix(rep(bn, m), nrow = m, byrow = TRUE) - 2 * tcrossprod(A, B) } ## a + b ~ c + d ## becomes ## ~ c + d + 0 rhs <- function (formula) { fs <- as.character(formula)[3] stats::as.formula(paste("~", fs, "+ 0")) } ## a + b ~ c + d ## becomes ## ~ a + b + 0 lhs <- function (formula) { fs <- as.character(formula)[2] stats::as.formula(paste("~", fs, "+ 0")) } ## check if a package is installed chckpkg <- function (pkg) { if (!requireNamespace(pkg, quietly = TRUE)) { stop(paste0("require '", pkg, "' package, install it using install.packages('", pkg, "')")) } } ## create generics that appear in several different places #' Converts to data.frame #' #' General conversions of objects created by \code{dimRed} to \code{data.frame}. #' See class documentations for details (\code{\link{dimRedData}}, #' \code{\link{dimRedResult}}). For the documentation of this function in base #' package, see here: \code{\link[base]{as.data.frame.default}}. #' #' @param x The object to be converted #' @param row.names unused in \code{dimRed} #' @param optional unused in \code{dimRed} #' @param ... other arguments. setGeneric( "as.data.frame", function(x, row.names, optional, ...) standardGeneric("as.data.frame"), useAsDefault = base::as.data.frame, valueClass = "data.frame" ) #' Converts to dimRedData #' #' Conversion functions to dimRedData. #' #' @param formula a formula object. #' @param ... other arguments. setGeneric( "as.dimRedData", function(formula, ...) standardGeneric("as.dimRedData"), valueClass = "dimRedData" ) #' Method getData #' #' Extracts the data slot. #' #' @param object The object to be converted. setGeneric("getData", function(object) standardGeneric("getData")) #' Method getMeta #' #' Extracts the meta slot. #' #' @param object The object to be converted. #' @param ... other arguments. setGeneric("getMeta", function(object, ...) standardGeneric("getMeta")) #' Method getPars #' #' Extracts the pars slot. #' #' @param object The object to be converted. #' @param ... other arguments. setGeneric("getPars", function (object, ...) standardGeneric("getPars")) #' Method getOrgData #' #' Extract the Original data. #' #' @param object The object to extract data from. #' @param ... other arguments. setGeneric("getOrgData", function (object, ...) standardGeneric("getOrgData")) #' Method getDimRedData #' #' Extract dimRedData. #' @param object The object to extract data from. #' @param ... other arguments. setGeneric("getDimRedData", function (object, ...) standardGeneric("getDimRedData")) #' Method print #' #' Imports the print method into the package namespace. #' #' @param x The object to be printed. #' @param ... Other arguments for printing. setGeneric("print", function(x, ...) standardGeneric("print")) #' Method ndims #' #' Extract the number of dimensions. #' #' @param object To extract the number of dimensions from. #' @param ... Arguments for further methods setGeneric("ndims", function (object, ...) standardGeneric("ndims"), valueClass = "integer") #' getSuggests #' #' Install packages wich are suggested by dimRed. #' #' By default dimRed will not install all the dependencies, because #' there are quite a lot and in case some of them are not available #' for your platform you will not be able to install dimRed without #' problems. #' #' To solve this I provide a function which automatically installes #' all the suggested packages. #' #' @examples #' \dontrun{ #' installSuggests() #' } #' @export installSuggests <- function () { "%w/o%" <- function(x, y) x[!x %in% y] pkgString <- installed.packages()["dimRed", "Suggests"] deps <- strsplit(pkgString, ", |,\n")[[1]] deps <- gsub("\n", "", deps) # Windows needs this installedPkgs <- rownames(installed.packages()) missingPkgs <- deps %w/o% installedPkgs if (length(missingPkgs) > 0) { message("The following packages are missing: ") cat(missingPkgs, "\n") message("installing ...") install.packages(missingPkgs) pkgString <- installed.packages()["dimRed", "Suggests"] installedPkgs <- rownames(installed.packages()) missingPkgs <- deps %w/o% installedPkgs if (length(missingPkgs) > 0) { message("Could not install the following packages:") cat(missingPkgs, "\n") message("please install manually or some methods will not work.") } else { message("All necessary packages installed") message("If things still don't work try 'update.package()'") message("If it still does not work file a bugreport!!") } } else { message("All necessary packages installed") message("If things still don't work try 'update.package()'") message("If it still does not work file a bugreport!!") } } ## input data(matrix or data frame) return knn graph implements ## "smart" choices on RANN::nn2 parameters we ignore radius search ## TODO: find out a good limit to switch from kd to bd trees COMMENT: ## bd trees are buggy, they dont work if there are duplicated data ## points and checking would neutralize the performance gain, so bd ## trees are not really usable. #' makeKNNgraph #' #' Create a K-nearest neighbor graph from data x. Uses #' \code{\link[RANN]{nn2}} as a fast way to find the neares neighbors. #' #' @param x data, a matrix, observations in rows, dimensions in #' columns #' @param k the number of nearest neighbors. #' @param eps number, if \code{eps > 0} the KNN search is approximate, #' see \code{\link[RANN]{nn2}} #' @param diag logical, if \code{TRUE} every edge of the returned #' graph will have an edge with weight \code{0} to itself. #' #' @return an object of type \code{\link[igraph]{igraph}} with edge #' weight being the distances. #' #' #' makeKNNgraph <- function(x, k, eps = 0, diag = FALSE){ ## requireNamespace("RANN") ## requireNamespace("igraph") ## consts INF_VAL <- 1.340781e+15 NA_IDX <- 0 BDKD_LIM <- 1000000 #todo: figure out a good value here ## select parameters M <- nrow(x) treetype <- "kd" # if (M < BDKD_LIM) "kd" else "bd" # see: # https://github.com/jefferis/RANN/issues/19 searchtype <- if (eps == 0) "standard" else "priority" ## RANN::nn2 returns the points in data with respect to query ## e.g. the rows in the output are the points in query and the ## columns the points in data. nn2res <- RANN::nn2(data = x, query = x, k = k + 1, treetype = treetype, searchtype = searchtype, eps = eps) ## create graph: the first ny nodes will be y, the last nx nodes ## will be x, if x != y g <- igraph::make_empty_graph(M, directed = FALSE) g[from = if (diag) rep(seq_len(M), times = k + 1) else rep(seq_len(M), times = k), to = if (diag) as.vector(nn2res$nn.idx) else as.vector(nn2res$nn.idx[, -1]), attr = "weight"] <- if (diag) as.vector(nn2res$nn.dists) else as.vector(nn2res$nn.dists[, -1]) return(g) } makeEpsSparseMatrix <- function(x, eps) { chckpkg("Matrix") n <- nrow(x) dd <- stats::dist(x) ddind <- dd < eps rows <- unlist(lapply(2:n, function(x) x:n), use.names = FALSE) cols <- rep(seq_len(n - 1), times = (n - 1):1) Matrix::sparseMatrix(i = rows[ddind], j = cols[ddind], x = dd[ddind], dims = c(n, n), symmetric = TRUE) } dimRed/R/leim.R0000644000176200001440000001316513033413630012732 0ustar liggesusers#' Laplacian Eigenmaps #' #' An S4 Class implementing Laplacian Eigenmaps #' #' Laplacian Eigenmaps use a kernel and were originally developed to #' separate non-convex clusters under the name spectral clustering. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' \code{LaplacianEigenmaps} can take the following parameters: #' \describe{ #' \item{ndim}{the number of output dimensions.} #' #' \item{sparse}{A character vector specifying hot to make the graph #' sparse, \code{"knn"} means that a K-nearest neighbor graph is #' constructed, \code{"eps"} an epsilon neighborhood graph is #' constructed, else a dense distance matrix is used.} #' #' \item{knn}{The number of nearest neighbors to use for the knn graph.} #' \item{eps}{The distance for the epsilon neighborhood graph.} #' #' \item{t}{Parameter for the transformation of the distance matrix #' by \eqn{w=exp(-d^2/t)}, larger values give less weight to #' differences in distance, \code{t == Inf} treats all distances != 0 equally.} #' \item{norm}{logical, should the normed laplacian be used?} #' } #' #' @section Implementation: #' Wraps around \code{\link[loe]{spec.emb}}. #' #' @references #' Belkin, M., Niyogi, P., 2003. Laplacian Eigenmaps for #' Dimensionality Reduction and Data Representation. Neural #' Computation 15, 1373. #' #' @examples #' dat <- loadDataSet("3D S Curve") #' leim <- LaplacianEigenmaps() #' emb <- leim@fun(dat, leim@stdpars) #' #' #' plot(emb@data@data) #' #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @export LaplacianEigenmaps #' @exportClass LaplacianEigenmaps LaplacianEigenmaps <- setClass( "LaplacianEigenmaps", contains = "dimRedMethod", prototype = list( stdpars = list(ndim = 2, sparse = "knn", knn = 50, eps = 0.1, t = Inf, norm = T), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("loe") chckpkg("RSpectra") chckpkg("Matrix") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data if (is.null(pars$d)) pars$d <- dist if (is.null(pars$knn)) pars$knn <- 50 if (is.null(pars$ndim)) pars$ndim <- 2 if (is.null(pars$t)) pars$t <- Inf if (is.null(pars$norm)) pars$norm <- TRUE message(Sys.time(), ": Creating weight matrix") W <- if (pars$sparse == "knn") { knng <- makeKNNgraph(indata, k = pars$knn, eps = 0, diag = TRUE) if (is.infinite(pars$t)){ igraph::set_edge_attr(knng, name = "weight", value = 1) } else { igraph::set_edge_attr( knng, name = "weight", value = exp( -( igraph::edge_attr( knng, name = "weight" ) ^ 2 ) / pars$t ) ) } igraph::as_adj(knng, sparse = TRUE, attr = "weight", type = "both") } else if (pars$sparse == "eps") { tmp <- makeEpsSparseMatrix(indata, pars$eps) tmp@x <- if (is.infinite(pars$t)) rep(1, length(tmp@i)) else exp(- (tmp@x ^ 2) / pars$t) ## diag(tmp) <- 1 as(tmp, "dgCMatrix") } else { # dense case tmp <- dist(indata) tmp[] <- if (is.infinite(pars$t)) 1 else exp( -(tmp ^ 2) / pars$t) tmp <- as.matrix(tmp) diag(tmp) <- 1 tmp } ## we don't need to test for symmetry, because we know the ## matrix is symmetric D <- Matrix::Diagonal(x = Matrix::rowSums(W)) L <- D - W ## for the generalized eigenvalue problem, we do not have a solver ## use A u = \lambda B u ## Lgen <- Matrix::Diagonal(x = 1 / Matrix::diag(D) ) %*% L ## but then we get negative eigenvalues and complex eigenvalues Lgen <- L message(Sys.time(), ": Eigenvalue decomposition") outdata <- if (pars$norm) { DS <- Matrix::Diagonal(x = 1 / sqrt(Matrix::diag(D))) RSpectra::eigs_sym(DS %*% Lgen %*% DS, k = pars$ndim + 1, sigma = -1e-5) } else { RSpectra::eigs_sym(Lgen, k = pars$ndim + 1, sigma = -1e-5) } message("Eigenvalues: ", paste(format(outdata$values), collapse = " ")) ## The eigenvalues are in decreasing order and we remove the ## smallest, which should be approx 0: outdata <- outdata$vectors[, order(outdata$values)[-1], drop = FALSE] colnames(outdata) <- paste0("LEIM", 1:ncol(outdata)) message(Sys.time(), ": DONE") return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, has.org.data = keep.org.data, method = "leim", pars = pars )) }) ) dimRed/R/mixColorSpaces.R0000644000176200001440000000452313042052034014732 0ustar liggesusers#' Mixing color ramps #' #' mix different color ramps #' #' automatically create colors to represent a varying number of #' dimensions. #' #' @param vars a list of variables #' @param ramps a list of color ramps, one for each variable. #' #' @examples #' cols <- expand.grid(x = seq(0, 1, length.out = 10), #' y = seq(0, 1, length.out = 10), #' z = seq(0, 1, length.out = 10)) #' mixed <- mixColor3Ramps(cols) #' #' \dontrun{ #' library(rgl) #' plot3d(cols$x, cols$y, cols$z, col = mixed, pch = 15) #' #' cols <- expand.grid(x = seq(0, 1, length.out = 10), #' y = seq(0, 1, length.out = 10)) #' mixed <- mixColor2Ramps(cols) #' } #' #' plot(cols$x, cols$y, col = mixed, pch = 15) #' @importFrom grDevices colorRamp #' @importFrom grDevices rgb #' @export mixColorRamps <- function (vars, ramps) { if (length(vars) > length(ramps)) stop("need more or equal ramps than vars") nvars <- length(vars) rgbs <- list() for (i in 1:nvars){ rgbs[[i]] <- ramps[[i]](scale01(as.numeric(vars[[i]]))) } retrgb <- Reduce(`+`, rgbs) res <- apply(retrgb, 2, function(x) (x - min(x)) / (max(x) - min(x))) res[is.nan(res)] <- 0 return(rgb(res)) } #' @rdname mixColorRamps #' @export mixColor1Ramps <- function (vars, ramps = colorRamp(c("blue", "black", "red"))) { mixColorRamps(vars, list(ramps)) } #' @rdname mixColorRamps #' @export mixColor2Ramps <- function (vars, ramps = list(colorRamp(c("blue", "green")), colorRamp(c("blue", "red")))) { mixColorRamps(vars, ramps) } #' @rdname mixColorRamps #' @export mixColor3Ramps <- function (vars, ramps = list(colorRamp(c("#001A00", "#00E600")), colorRamp(c("#00001A", "#0000E6")), colorRamp(c("#1A0000", "#E60000")))) { mixColorRamps(vars, ramps) } colorize <- function (vars) { l <- length(vars) if (l == 1) return(mixColor1Ramps(vars)) if (l == 2) return(mixColor2Ramps(vars)) if (l == 3) return(mixColor3Ramps(vars)) return("#000000") } scale01 <- function(x, low = min(x, na.rm = TRUE), high = max(x, na.rm = FALSE)) { x <- (x - low) / (high - low) x } dimRed/R/isomap.R0000644000176200001440000001650313026512501013272 0ustar liggesusers#' Isomap embedding #' #' An S4 Class implementing the Isomap Algorithm #' #' The Isomap algorithm approximates a manifold using geodesic #' distances on a k nearest neighbor graph. Then classical scaling is #' performed on the resulting distance matrix. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' Isomap can take the following parameters: #' \describe{ #' \item{knn}{The number of nearest neighbors in the graph. Defaults to 50.} #' \item{ndim}{The number of embedding dimensions, defaults to 2.} #' } #' #' @section Implementation: #' #' The dimRed package uses its own implementation of Isomap which also #' comes with an out of sample extension (known as landmark #' Isomap). The default Isomap algorithm scales computationally not #' very well, the implementation here uses \code{\link[RANN]{nn2}} for #' a faster search of the neares neighbors. If data are too large it #' may be useful to fit a subsample of the data and use the #' out-of-sample extension for the other points. #' #' @examples #' dat <- loadDataSet("3D S Curve", n = 500) #' #' ## use the S4 Class directly: #' isomap <- Isomap() #' emb <- isomap@fun(dat, isomap@stdpars) #' #' ## or simpler, use embed(): #' samp <- sample(nrow(dat), size = 200) #' emb2 <- embed(dat[samp], "Isomap", mute = NULL, knn = 10) #' emb3 <- emb2@apply(dat[-samp]) #' #' plot(emb2, type = "2vars") #' plot(emb3, type = "2vars") #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export Isomap #' @exportClass Isomap Isomap <- setClass( "Isomap", contains = "dimRedMethod", prototype = list( stdpars = list(knn = 50, ndim = 2), fun = function (data, pars, keep.org.data = TRUE) { message(Sys.time(), ": Isomap START") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data if (is.null(pars$eps)) pars$eps <- 0 ## geodesic distances message(Sys.time(), ": constructing knn graph") knng <- makeKNNgraph(x = indata, k = pars$knn, eps = pars$eps) message(Sys.time(), ": calculating geodesic distances") geodist <- igraph::distances(knng, algorithm = "dijkstra") message(Sys.time(), ": cmdscale") cmdout <- stats::cmdscale(geodist, k = pars$ndim, eig = TRUE) message(Sys.time(), ": post processing") neig <- sum(cmdout$eig > 0) if (neig < pars$ndim) { warning("Isomap: eigenvalues < 0, returning less dimensions!") cmdout$points <- cmdout$points[, seq_len(neig), drop = FALSE] cmdout$eig <- cmdout$eig[seq_len(neig)] } else { cmdout$eig <- cmdout$eig[seq_len(pars$ndim)] } colnames(cmdout$points) <- paste0("iso", seq_len(ncol(cmdout$points))) appl <- function (x) { message(Sys.time(), ": L-Isomap embed START") appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame() indata <- if (inherits(x, "dimRedData")) x@data else x if (ncol(indata) != ncol(data@data)) stop("x must have the same number of dimensions as the original data") nindata <- nrow(indata) norg <- nrow(orgdata) message(Sys.time(), ": constructing knn graph") lknng <- makeKNNgraph(rbind(indata, orgdata), k = pars$knn, eps = pars$eps) message(Sys.time(), ": calculating geodesic distances") lgeodist <- igraph::distances(lknng, seq_len(nindata), nindata + seq_len(norg)) message(Sys.time(), ": embedding") dammu <- sweep(lgeodist ^ 2, 2, colMeans(geodist ^ 2), "-") Lsharp <- sweep(cmdout$points, 2, cmdout$eig, "/") out <- -0.5 * (dammu %*% Lsharp) message(Sys.time(), ": DONE") return(new("dimRedData", data = out, meta = appl.meta)) } return(new( "dimRedResult", data = new("dimRedData", data = cmdout$points, meta = meta), org.data = orgdata, has.org.data = keep.org.data, apply = appl, has.apply = TRUE, method = "isomap", pars = pars )) }) ) ## input data(matrix or data frame) return knn graph implements ## "smart" choices on RANN::nn2 parameters we ignore radius search ## TODO: find out a good limit to switch from kd to bd trees COMMENT: ## bd trees are buggy, they dont work if there are duplicated data ## points and checking would neutralize the performance gain, so bd ## trees are not really usable. makeKNNgraph <- function (x, k, eps = 0, diag = FALSE){ ## requireNamespace("RANN") ## requireNamespace("igraph") ## consts INF_VAL <- 1.340781e+15 NA_IDX <- 0 BDKD_LIM <- 1000000 #todo: figure out a good value here ## select parameters M <- nrow(x) treetype <- "kd" # if (M < BDKD_LIM) "kd" else "bd" # see: # https://github.com/jefferis/RANN/issues/19 searchtype <- if (eps == 0) "standard" else "priority" ## RANN::nn2 returns the points in data with respect to query ## e.g. the rows in the output are the points in query and the ## columns the points in data. nn2res <- RANN::nn2(data = x, query = x, k = k + 1, treetype = treetype, searchtype = searchtype, eps = eps) ## create graph: the first ny nodes will be y, the last nx nodes ## will be x, if x != y ## it is not really pretty to create a ## directed graph first and then make it undirected. g <- igraph::make_empty_graph(M, directed = TRUE) g[from = if (diag) rep(seq_len(M), times = k + 1) else rep(seq_len(M), times = k), to = if (diag) as.vector(nn2res$nn.idx) else as.vector(nn2res$nn.idx[, -1]), attr = "weight"] <- if (diag) as.vector(nn2res$nn.dists) else as.vector(nn2res$nn.dists[, -1]) return(igraph::as.undirected(g, mode = "collapse", edge.attr.comb = "first")) } ## the original isomap method I'll keep it here for completeness: ## isomap <- new("dimRedMethod", ## stdpars = list(knn = 50, ## d = dist, ## ndim = 2) ## fun = function (data, pars, ## keep.org.data = TRUE) { ## chckpkg("vegan") ## meta <- data@meta ## orgdata <- if (keep.org.data) data@data else NULL ## indata <- data@data ## outdata <- vegan::isomap(pars$d(indata), ## ndim = pars$ndim, ## k = pars$knn)$points ## colnames(outdata) <- paste0("Iso", 1:ncol(outdata)) ## return(new( ## "dimRedResult", ## data = new("dimRedData", ## data = outdata, ## meta = meta), ## org.data = orgdata, ## has.org.data = keep.org.data, ## method = "isomap", ## pars = pars ## )) ## }) dimRed/R/graph_embed.R0000644000176200001440000002123613040076764014252 0ustar liggesusers#' Graph Embedding via the Kamada Kawai Algorithm #' #' An S4 Class implementing the Kamada Kawai Algorithm for graph embedding. #' #' Graph embedding algorithms se the data as a graph. Between the #' nodes of the graph exist attracting and repelling forces which can #' be modeled as electrical fields or springs connecting the #' nodes. The graph is then forced into a lower dimensional #' representation that tries to represent the forces betweent he nodes #' accurately by minimizing the total energy of the attracting and #' repelling forces. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' KamadaKawai can take the following parameters: #' \describe{ #' \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3} #' \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.} #' \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.} #' } #' #' @section Implementation: #' Wraps around \code{\link[igraph]{layout_with_kk}}. The parameters #' maxiter, epsilon and kkconst are set to the default values and #' cannot be set, this may change in a future release. The DimRed #' Package adds an extra sparsity parameter by constructing a knn #' graph which also may improve visualization quality. #' #' @examples #' dat <- loadDataSet("Swiss Roll", n = 500) #' kamada_kawai <- KamadaKawai() #' kk <- kamada_kawai@fun(dat, kamada_kawai@stdpars) #' #' plot(kk@data@data) #' #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export KamadaKawai #' @exportClass KamadaKawai KamadaKawai <- setClass( "KamadaKawai", contains = "dimRedMethod", prototype = list( stdpars = list(ndim = 2, knn = 100, d = stats::dist), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("igraph") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data outdata <- em_graph_layout( indata, graph_em_method = igraph::layout_with_kk, knn = pars$knn, d = pars$d, ndim = pars$ndim, weight.trans = I #pars$weight.trans ) colnames(outdata) <- paste0("KK", 1:ncol(outdata)) return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, has.org.data = keep.org.data, method = "graph_kk", pars = pars )) }) ) #' Distributed Recursive Graph Layout #' #' An S4 Class implementing Distributed recursive Graph Layout. #' #' DrL uses a complex algorithm to avoid local minima in the graph #' embedding which uses several steps. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' DrL can take the following parameters: #' \describe{ #' \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3} #' \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.} #' \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.} #' } #' #' @section Implementation: #' Wraps around \code{\link[igraph]{layout_with_drl}}. The parameters #' maxiter, epsilon and kkconst are set to the default values and #' cannot be set, this may change in a future release. The DimRed #' Package adds an extra sparsity parameter by constructing a knn #' graph which also may improve visualization quality. #' #' @examples #' \dontrun{ #' dat <- loadDataSet("Swiss Roll", n = 500) #' #' ## use the S4 Class directly: #' drl <- DrL() #' emb <- drl@fun(dat, drl@stdpars) #' #' ## simpler, use embed(): #' emb2 <- embed(dat, "DrL") #' #' #' plot(emb) #' } #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export DrL #' @exportClass DrL DrL <- setClass( "DrL", contains = "dimRedMethod", prototype = list( stdpars = list(ndim = 2, knn = 100, d = stats::dist), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("igraph") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data outdata <- em_graph_layout( indata, graph_em_method = igraph::layout_with_drl, knn = pars$knn, d = pars$d, ndim = pars$ndim, weight.trans = I #pars$weight.trans ) colnames(outdata) <- paste0("DrL", 1:ncol(outdata)) return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, has.org.data = keep.org.data, method = "graph_drl", pars = pars )) }) ) #' Fruchterman Reingold Graph Layout #' #' An S4 Class implementing the Fruchterman Reingold Graph Layout #' algorithm. #' #' @template dimRedMethodSlots #' #' @template dimRedMethodGeneralUsage #' #' @section Parameters: #' \describe{ #' \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3} #' \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.} #' \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.} #' } #' #' @section Implementation: #' Wraps around \code{\link[igraph]{layout_with_fr}}, see there for #' details. The Fruchterman Reingold algorithm puts the data into #' a circle and puts connected points close to each other. #' #' @examples #' dat <- loadDataSet("Swiss Roll", n = 100) #' #' ## use the S4 Class directly: #' fruchterman_reingold <- FruchtermanReingold() #' pars <- fruchterman_reingold@stdpars #' pars$knn <- 5 #' emb <- fruchterman_reingold@fun(dat, pars) #' #' ## simpler, use embed(): #' emb2 <- embed(dat, "FruchtermanReingold", knn = 5) #' #' plot(emb, type = "2vars") #' #' @include dimRedResult-class.R #' @include dimRedMethod-class.R #' @family dimensionality reduction methods #' @export FruchtermanReingold #' @exportClass FruchtermanReingold FruchtermanReingold <- setClass( "FruchtermanReingold", contains = "dimRedMethod", prototype = list( stdpars = list(ndim = 2, knn = 100, d = stats::dist), fun = function (data, pars, keep.org.data = TRUE) { chckpkg("igraph") meta <- data@meta orgdata <- if (keep.org.data) data@data else NULL indata <- data@data outdata <- em_graph_layout( indata, graph_em_method = igraph::layout_with_fr, knn = pars$knn, d = pars$d, ndim = pars$ndim, weight.trans = I #pars$weight.trans ) colnames(outdata) <- paste0("FR", 1:ncol(outdata)) return(new( "dimRedResult", data = new("dimRedData", data = outdata, meta = meta), org.data = orgdata, has.org.data = keep.org.data, method = "graph_fr", pars = pars )) }) ) em_graph_layout <- function(data, graph_em_method, knn = 50, d = stats::dist, ndim = 2, weight.trans = I){ chckpkg("igraph") data.dist <- as.matrix(d(data)) data.graph <- construct_knn_graph(data.dist, knn) embed_graph(data.graph, graph_em_method, ndim = ndim) } embed_graph <- function(graph, f, weight.trans = I, ndim = 2){ f(graph, weights = weight.trans(igraph::E(graph)$weight), dim = ndim) } construct_knn_graph <- function (data.dist, knn) { chckpkg("igraph") chckpkg("coRanking") data.graph <- igraph::graph_from_adjacency_matrix( adjmatrix = data.dist, mode = "undirected", weighted = T ) if (is.infinite(knn) || is.na(knn)) return(data.graph) ## else: remove all unnecessary edges data.rankm <- coRanking::rankmatrix(data.dist, input = "dist") data.rankm.ind <- data.rankm <= knn + 1 inds <- which( !(data.rankm.ind | t(data.rankm.ind)), arr.ind = TRUE ) data.graph[ from = inds[, 1], to = inds[, 2] ] <- FALSE return(data.graph) } dimRed/README.md0000644000176200001440000000163513102620674012743 0ustar liggesusers# dimRed [![Travis Build Status](https://travis-ci.org/gdkrmr/dimRed.svg?branch=master)](https://travis-ci.org/gdkrmr/dimRed) [![Coverage Status](https://img.shields.io/codecov/c/github/gdkrmr/dimRed/master.svg)](https://codecov.io/github/gdkrmr/dimRed?branch=master) [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/dimRed)](https://cran.r-project.org/package=dimRed) [![License GPL 3][badge-license]](http://www.gnu.org/licenses/gpl-3.0.txt) A Framework for Dimensionality Reduction for the R language. A collection of dimensionality reduction techniques from R packages and provides a common interface for calling the methods. ## Installing: ```R ## install.packages("devtools") devtools::install_github("gdkrmr/dimRed") ``` Install from CRAN ```R install.packages("dimRed") ``` Load it: ```R library(dimRed) ``` Install dependencies: ```R ## To install all dependencies: dimRed::installSuggests() ``` dimRed/MD50000644000176200001440000001224513102645305011771 0ustar liggesusersc7b3e295d89f6e97bc745992a143773a *DESCRIPTION ae5a59342168733d9988af23c3ca4c2a *LICENSE 77cbd3a1b622ef691d993b22c4039aaf *NAMESPACE 5067a3c6ecb862f898c7b3094b66a09d *NEWS.md d4e3e654a96439e20fb41d3e10985af0 *R/dataSets.R a8a220d351916a77648c15687f5fad12 *R/diffmap.R a34a8b34ff42a13f8b9ee4912aa01dae *R/dimRed.R 48ee826c737eb4d753d1df0a9d6729f2 *R/dimRedData-class.R bf1cbcd95d3724a522ae6ea3e6a4cd5a *R/dimRedMethod-class.R 17bb09661cb24363c2cdac31a4330b8b *R/dimRedResult-class.R daac94506b94b9c2b608750722d6cd33 *R/drr.R b1f35affd19b1986560bcf547451ff40 *R/embed.R d3378abe5bdbdd5b03117fe557f20ab7 *R/fastica.R ea38f53e7cbec827875840cb5277c0ab *R/get_info.R 216998ec0b2205f77993cf38ee1a9f82 *R/graph_embed.R 435c95619278e2cb8de7d622f637fd30 *R/hlle.R 3831d18ceaaed7e4397b00b50856f9ff *R/isomap.R cb5b7b219e8709c0d1fc73635daa5d18 *R/kpca.R 0e58f3d58b6f59534d873ca2f19cc64f *R/leim.R 2a67073beb66e4ea066383b5edbd8868 *R/lle.R d188fc370cdc5ea1094613ec0b3972da *R/loe.R 17f107848e2652bbf102cba936f0c34e *R/mds.R fafc0f2fe12ea0f91c8d7d226eaaafa8 *R/misc.R c115a187e0c2cbbb12219ac0db73e66d *R/mixColorSpaces.R 67173bbfb27be200e376d715fad2da6f *R/nmds.R 74f56b12deeb233b42ce28591e672606 *R/pca.R aa156efc841228cd96387b77565392d8 *R/plot.R 420d8194383b385e053a60536ca954bf *R/quality.R f00fdc1888d4597dff509ecf5f683b85 *R/rotate.R 546e6d5cf4d954c002b0e5d2031eb69f *R/soe.R 62a94fd820564ab64411436b955e77d4 *R/tsne.R e28a127a366285bfbf7f762c8b81cd3c *README.md 73a75dba40967ca0357c33e5b1da9ef1 *man/AUC_lnK_R_NX-dimRedResult-method.Rd 546812a3d6f2c7b6a6a564c4fd66c9b6 *man/DRR-class.Rd 33f691fe27eadb829abc537b121fe883 *man/DiffusionMaps-class.Rd 67e2ba5085da53bd715d8405eb021cca *man/DrL-class.Rd 0f62d721f07139725993a10c1a3f8047 *man/FastICA-class.Rd 068460ab5d8b2996dce0946f71122b01 *man/FruchtermanReingold-class.Rd 0005c7e3d7166f61784a76e29a5777ce *man/HLLE-class.Rd 425e0ca9c6b690d919098601db8848b6 *man/Isomap-class.Rd 2b15c8d6e0176753a6f84d349d86b824 *man/KamadaKawai-class.Rd 15bdac771579710d7551c3f3b8de90fe *man/LCMC-dimRedResult-method.Rd f5d63c2c71cce3db3af9839ceaf55953 *man/LLE-class.Rd e1f54c4f9b999f4b005b93b0f59739d9 *man/LaplacianEigenmaps-class.Rd f62d0594c6bd4378dc528d23aca1c8bd *man/MDS-class.Rd bb462f63627b279182795661d519eb2f *man/PCA-class.Rd 8b9542c8ebd403c5c2926c88e9ae6daf *man/Q_NX-dimRedResult-method.Rd 08fafe7719021786cba3241d23b5370b *man/Q_global-dimRedResult-method.Rd ac002031fde956c6e559519a60c7733c *man/Q_local-dimRedResult-method.Rd 7f5feebe4a0a82f07bfce627bbc2358f *man/R_NX-dimRedResult-method.Rd 718f4e21d3332957a02215ba01e9125f *man/as.data.frame.Rd ac6b39dee6b874508744ff2f22750ad6 *man/as.dimRedData.Rd b1b8ef56480a491d235266037f3e5620 *man/cophenetic_correlation-dimRedResult-method.Rd 419144b1768266c642462c3c959788ff *man/dataSets.Rd 685e9ac76e6c37e56393adb612749f12 *man/dimRed-package.Rd fd7b86897e1520063e2bc9770bf13764 *man/dimRedData-class.Rd 1f5bde35b17f59bd0ae6c880beb59852 *man/dimRedMethod-class.Rd 5e4f3b8f84657467b64741981c70e5ae *man/dimRedMethodList.Rd c6e467c6f28ebd8715fcef62ada71e9b *man/dimRedResult-class.Rd 3a453f9b2c5162a3a12064aa24e9b0fd *man/distance_correlation-dimRedResult-method.Rd 1f438c13a06bbf3741a30f970f0448e5 *man/embed.Rd af8863f32422517f0758352f5773d916 *man/getData.Rd beb9c9c0b2b9069d2cfde3a7ef78f810 *man/getDimRedData.Rd 3d700516a933d7d8fe95cebd9e3e0365 *man/getMeta.Rd 54af2a8843ad16e4c8daf4b75f80fd51 *man/getOrgData.Rd b6a82a60362da823b309497c27b54d61 *man/getPars.Rd 4d1954e5a6fab70d8f45a7f60f6642ef *man/getRotationMatrix.Rd 6df2e37941539cd6055499ba4a0eca83 *man/installSuggests.Rd ba375cec94cd53337880246f00bb3a1a *man/kPCA-class.Rd e6fda0f5f8483f08ff20d467eab06cac *man/makeKNNgraph.Rd 0337738c6a82a8f3a041a337d8af7f6f *man/maximize_correlation-dimRedResult-method.Rd 860fa7ff7d368a39ff42056c02718313 *man/mean_R_NX-dimRedResult-method.Rd ca6d375b0f0df0043fa5ba19785024f6 *man/mixColorRamps.Rd 089af589aaf022e6966ae337c64e19ae *man/nMDS-class.Rd d129fc72b1b204e139d23885ccd064bc *man/ndims.Rd 683794831183ff840cc4324512214b0b *man/plot.Rd b10d102f9ac43f93f2123e9cda417822 *man/plot_R_NX.Rd ecdac99d07501417ba48b07a28ec5f5d *man/print.Rd 049c05bcc6354b06e3e663a2efbea8a0 *man/quality.Rd c53cf2173361a9843d283b8a21410a45 *man/reconstruction_error-dimRedResult-method.Rd f3744f1920c66aba45b71ad76966dc3b *man/reconstruction_rmse-dimRedResult-method.Rd 74b844c5429203e1e64909353b84bd05 *man/tSNE-class.Rd 1d4eee0894789fc7fadd3dbf62a2628c *man/total_correlation-dimRedResult-method.Rd 90e7032d9dab3cdce1f4cdab889c1e5e *tests/testthat.R 9133c54653cdd0ab58c3a076c400c8e0 *tests/testthat/test_ICA.R 17815ed1ccb390b8d0c55a94f51bd3a9 *tests/testthat/test_PCA.R b98cb07dc62f09c7e5fd031ae25ead10 *tests/testthat/test_all.R eb2b0ed077c46c957b4069250febb644 *tests/testthat/test_dataSets.R d9b466e7fbdf77244760e4227e7f4646 *tests/testthat/test_dimRedData.R 1e8ad507fae940db54517de856e4695d *tests/testthat/test_dimRedMethod-class.R 9edf9c5c225b71ef4e0420120c0e3357 *tests/testthat/test_dimRedResult.R f12ae7a9eeca865384b07b7fe4319523 *tests/testthat/test_drr.R 9c8250ccc583d5455ed262bbe13dcee9 *tests/testthat/test_isomap.R 75ff819a82bd02b26f7b6aae2131ff82 *tests/testthat/test_kPCA.R 534d8537c124bd16da79633add3fea2d *tests/testthat/test_misc.R 839120972699a0ec977201ae7e9e3d1f *tests/testthat/test_quality.R dimRed/DESCRIPTION0000644000176200001440000000237713102645305013174 0ustar liggesusersPackage: dimRed Title: A Framework for Dimensionality Reduction Version: 0.1.0 Authors@R: person("Guido", "Kraemer", email = "gkraemer@bgc-jena.mpg.de", role = c("aut", "cre")) Description: A collection of dimensionality reduction techniques from R packages and provides a common interface for calling the methods. Depends: R (>= 3.0.0), methods, DRR Suggests: MASS, Matrix, RANN, RSpectra, Rtsne, coRanking, diffusionMap, energy, fastICA, ggplot2, graphics, igraph, kernlab, lle, loe, optimx, pcaPP, rgl, scales, scatterplot3d, stats, testthat, tidyr, vegan License: GPL-3 | file LICENSE URL: https://github.com/gdkrmr/dimRed LazyData: true Collate: 'misc.R' 'dimRedData-class.R' 'dataSets.R' 'dimRedMethod-class.R' 'dimRedResult-class.R' 'diffmap.R' 'dimRed.R' 'drr.R' 'embed.R' 'fastica.R' 'get_info.R' 'graph_embed.R' 'hlle.R' 'isomap.R' 'kpca.R' 'leim.R' 'lle.R' 'loe.R' 'mds.R' 'mixColorSpaces.R' 'nmds.R' 'pca.R' 'plot.R' 'quality.R' 'rotate.R' 'soe.R' 'tsne.R' RoxygenNote: 6.0.1 NeedsCompilation: yes Packaged: 2017-05-04 14:56:00 UTC; gkraemer Author: Guido Kraemer [aut, cre] Maintainer: Guido Kraemer Repository: CRAN Date/Publication: 2017-05-04 15:37:41 UTC dimRed/man/0000755000176200001440000000000013065033470012232 5ustar liggesusersdimRed/man/getDimRedData.Rd0000644000176200001440000000050213065033470015154 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{getDimRedData} \alias{getDimRedData} \title{Method getDimRedData} \usage{ getDimRedData(object, ...) } \arguments{ \item{object}{The object to extract data from.} \item{...}{other arguments.} } \description{ Extract dimRedData. } dimRed/man/embed.Rd0000644000176200001440000000547213065033470013605 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/embed.R \docType{methods} \name{embed} \alias{embed} \alias{embed,formula-method} \alias{embed,ANY-method} \alias{embed,dimRedData-method} \title{dispatches the different methods for dimensionality reduction} \usage{ embed(.data, ...) \S4method{embed}{formula}(.formula, .data, .method = dimRedMethodList(), .mute = character(0), .keep.org.data = TRUE, ...) \S4method{embed}{ANY}(.data, .method = dimRedMethodList(), .mute = character(0), .keep.org.data = TRUE, ...) \S4method{embed}{dimRedData}(.data, .method = dimRed::dimRedMethodList(), .mute = character(0), .keep.org.data = TRUE, ...) } \arguments{ \item{.data}{object of class \code{dimRedData}} \item{...}{the pameters, internally passed as a list to the dimensionality reduction method as \code{pars = list(...)}} \item{.formula}{a formula, see \code{\link{as.dimRedData}}.} \item{.method}{character vector naming one of the dimensionality reduction techniques.} \item{.mute}{a character vector containing the elements you want to mute (\code{c("message", "output")}), defaults to \code{character(0)}.} \item{.keep.org.data}{TRUE/FALSE keep the original data.} } \value{ an object of class \code{dimRedResult} } \description{ wraps around all dimensionality reduction functions. } \details{ Method must be one of \code{dimRedMethodList()}, partial matching is performed. All parameters start with a dot, to avoid clashes with partial argument matching (see the R manual section 4.3.2), if there should ever occur any clashes in the arguments, call the function with all arguments named, e.g. \code{embed(.data = dat, .method = "mymethod", .d = "some parameter")}. } \section{Methods (by class)}{ \itemize{ \item \code{formula}: embed a data.frame using a formula. \item \code{ANY}: Embed anything as long as it can be coerced to \code{dimRedData}. \item \code{dimRedData}: Embed a dimRedData object }} \examples{ \dontrun{ embed_methods <- dimRedMethodList() quality_methods <- dimRedQualityList() dataset <- loadDataSet("Iris") quality_results <- matrix(NA, length(embed_methods), length(quality_methods), dimnames = list(embed_methods, quality_methods)) embedded_data <- list() for (e in embed_methods) { message("embedding: ", e) embedded_data[[e]] <- embed(dataset, e, .mute = c("message", "output")) for (q in quality_methods) { message(" quality: ", q) quality_results[e, q] <- tryCatch( quality(embedded_data[[e]], q), error = function(e) NA ) } } print(quality_results) } ## embed a data.frame using a formula: head(as.data.frame( embed(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, iris, "PCA") )) head(as.data.frame( embed(iris[, 1:4], "PCA") )) head(as.data.frame( embed(as.matrix(iris[, 1:4]), "PCA") )) } dimRed/man/getRotationMatrix.Rd0000644000176200001440000000146113065035446016214 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_info.R \name{getRotationMatrix} \alias{getRotationMatrix} \title{getRotationMatrix} \usage{ getRotationMatrix(x) } \arguments{ \item{x}{of type \code{\link{dimRedResult}}} } \value{ a matrix } \description{ Extract the rotation matrix from \code{\link{dimRedResult}} objects derived from PCA and FastICA } \details{ The data has to be pre-processed the same way as the method does, e.g. centering and/or scaling. } \examples{ dat <- loadDataSet("Iris") pca <- embed(dat, "PCA") ica <- embed(dat, "FastICA") rot_pca <- getRotationMatrix(pca) rot_ica <- getRotationMatrix(ica) scale(getData(dat), TRUE, FALSE) \%*\% rot_pca - getData(getDimRedData(pca)) scale(getData(dat), TRUE, FALSE) \%*\% rot_ica - getData(getDimRedData(ica)) } dimRed/man/LLE-class.Rd0000644000176200001440000000431513065033470014243 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lle.R \docType{class} \name{LLE-class} \alias{LLE-class} \alias{LLE} \title{Locally Linear Embedding} \description{ An S4 Class implementing Locally Linear Embedding (LLE) } \details{ LLE approximates the points in the manifold by linear combination of its neighbors. These linear combinations are the same inside the manifold and in highdimensional space. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ LLE can take the following parameters: \describe{ \item{knn}{the number of neighbors for the knn graph., defaults to 50.} \item{ndim}{the number of embedding dimensions, defaults to 2.} } } \section{Implementation}{ Wraps around \code{\link[lle]{lle}}, only exposes the parameters \code{k} and \code{m}. } \examples{ dat <- loadDataSet("3D S Curve", n = 500) ## directy use the S4 class: lle <- LLE() emb <- lle@fun(dat, lle@stdpars) ## using embed(): emb2 <- embed(dat, "LLE", knn = 45) plot(emb, type = "2vars") plot(emb2, type = "2vars") } \references{ Roweis, S.T., Saul, L.K., 2000. Nonlinear Dimensionality Reduction by Locally Linear Embedding. Science 290, 2323-2326. doi:10.1126/science.290.5500.2323 } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/cophenetic_correlation-dimRedResult-method.Rd0000644000176200001440000000237213065033470023126 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{cophenetic_correlation,dimRedResult-method} \alias{cophenetic_correlation,dimRedResult-method} \alias{cophenetic_correlation} \title{Method cophenetic_correlation} \usage{ \S4method{cophenetic_correlation}{dimRedResult}(object, d = stats::dist, cor_method = "pearson") } \arguments{ \item{object}{of class dimRedResult} \item{d}{the distance function to use.} \item{cor_method}{The correlation method.} } \description{ Calculate the correlation between the distance matrices in high and low dimensioal space. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/Q_NX-dimRedResult-method.Rd0000644000176200001440000000223713065033470017211 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{Q_NX,dimRedResult-method} \alias{Q_NX,dimRedResult-method} \alias{Q_NX} \title{Method Q_NX} \usage{ \S4method{Q_NX}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculate the Q_NX score (Chen & Buja 2006, the notation in the publication is M_k). Which is the fraction of points that remain inside the same K-ary neighborhood in high and low dimensional space. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/nMDS-class.Rd0000644000176200001440000000367713065033470014442 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nmds.R \docType{class} \name{nMDS-class} \alias{nMDS-class} \alias{nMDS} \title{Non-Metric Dimensional Scaling} \description{ An S4 Class implementing Non-Metric Dimensional Scaling. } \details{ A non-linear extension of MDS using monotonic regression } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ nMDS can take the following parameters: \describe{ \item{d}{A distance function.} \item{ndim}{The number of embedding dimensions.} } } \section{Implementation}{ Wraps around the \code{\link[vegan]{monoMDS}}. For parameters that are not available here, the standard configuration is used. } \examples{ dat <- loadDataSet("3D S Curve", n = 1000) ## using the S4 classes: nmds <- nMDS() emb <- nmds@fun(dat, nmds@stdpars) ## using embed() emb2 <- embed(dat, "nMDS", d = function(x) exp(dist(x))) plot(emb, type = "2vars") plot(emb2, type = "2vars") } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{tSNE-class}} } dimRed/man/maximize_correlation-dimRedResult-method.Rd0000644000176200001440000000177313065033470022634 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/rotate.R \docType{methods} \name{maximize_correlation,dimRedResult-method} \alias{maximize_correlation,dimRedResult-method} \alias{maximize_correlation} \title{Maximize Correlation with the Axes} \usage{ \S4method{maximize_correlation}{dimRedResult}(object, naxes = ncol(object@data@data), cor_method = "pearson") } \arguments{ \item{object}{A dimRedResult object} \item{naxes}{the number of axes to optimize for.} \item{cor_method}{which correlation method to use} } \description{ Rotates the data in such a way that the correlation with the first \code{naxes} axes is maximized. } \details{ Methods that do not use eigenvector decomposition, like t-SNE often do not align the data with axes according to the correlation of variables with the data. \code{maximize_correlation} uses the \code{\link[optimx]{optimx}} package to rotate the data in such a way that the original variables have maximum correlation with the embedding axes. } dimRed/man/as.data.frame.Rd0000644000176200001440000000124213065033470015124 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{as.data.frame} \alias{as.data.frame} \title{Converts to data.frame} \usage{ as.data.frame(x, row.names, optional, ...) } \arguments{ \item{x}{The object to be converted} \item{row.names}{unused in \code{dimRed}} \item{optional}{unused in \code{dimRed}} \item{...}{other arguments.} } \description{ General conversions of objects created by \code{dimRed} to \code{data.frame}. See class documentations for details (\code{\link{dimRedData}}, \code{\link{dimRedResult}}). For the documentation of this function in base package, see here: \code{\link[base]{as.data.frame.default}}. } dimRed/man/total_correlation-dimRedResult-method.Rd0000644000176200001440000000254713065033470022134 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{total_correlation,dimRedResult-method} \alias{total_correlation,dimRedResult-method} \alias{total_correlation} \title{Method total_correlation} \usage{ \S4method{total_correlation}{dimRedResult}(object, naxes = ndims(object), cor_method = "pearson", is.rotated = FALSE) } \arguments{ \item{object}{of class dimRedResult} \item{naxes}{the number of axes to use for optimization.} \item{cor_method}{the correlation method to use.} \item{is.rotated}{if FALSE the object is rotated.} } \description{ Calculate the total correlation of the variables with the axes to assess the quality of a dimensionality reduction. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}} } dimRed/man/PCA-class.Rd0000644000176200001440000000461013065033470014230 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pca.R \docType{class} \name{PCA-class} \alias{PCA-class} \alias{PCA} \title{Principal Component Analysis} \description{ S4 Class implementing PCA. } \details{ PCA transforms the data in orthogonal components so that the first axis accounts for the larges variance in the data, all the following axes account for the highest variance under the constraint that they are orthogonal to the preceding axes. PCA is sensitive to the scaling of the variables. PCA is by far the fastest and simples method of dimensionality reduction and should probably always be applied as a baseline if other methods are tested. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ PCA can take the following parameters: \describe{ \item{ndim}{The number of output dimensions.} \item{center}{logical, should the data be centered, defaults to \code{TRUE}.} \item{scale.}{logical, should the data be scaled, defaults to \code{FALSE}.} } } \section{Implementation}{ Wraps around \code{\link{prcomp}}. Because PCA can be reduced to a simple rotation, forward and backward projection functions are supplied. . } \examples{ dat <- loadDataSet("Iris") ## using the S4 Class pca <- PCA() emb <- pca@fun(dat, pca@stdpars) ## using embed() emb2 <- embed(dat, "PCA") plot(emb, type = "2vars") plot(emb@inverse(emb@data), type = "3vars") } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/HLLE-class.Rd0000644000176200001440000000423713065033470014356 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/hlle.R \docType{class} \name{HLLE-class} \alias{HLLE-class} \alias{HLLE} \title{Hessian Locally Linear Embedding} \description{ An S4 Class implementing Hessian Locally Linear Embedding (HLLE) } \details{ HLLE uses local hessians to approximate the curvines and is an extension to non-convex subsets in lowdimensional space. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ HLLE can take the following parameters: \describe{ \item{knn}{neighborhood size} \item{ndim}{number of output dimensions} } } \section{Implementation}{ Own implementation, sticks to the algorithm in Donoho and Grimes (2003). Makes use of sparsity to speed up final embedding. } \examples{ dat <- loadDataSet("3D S Curve", n = 1500) ## directy use the S4 class: hlle <- HLLE() emb <- hlle@fun(dat, hlle@stdpars) ## using embed(): emb2 <- embed(dat, "HLLE", knn = 45) plot(emb, type = "2vars") plot(emb2, type = "2vars") } \references{ Donoho, D.L., Grimes, C., 2003. Hessian eigenmaps: Locally linear embedding techniques for high-dimensional data. PNAS 100, 5591-5596. doi:10.1073/pnas.1031596100 } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/DiffusionMaps-class.Rd0000644000176200001440000000631713065033470016402 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/diffmap.R \docType{class} \name{DiffusionMaps-class} \alias{DiffusionMaps-class} \alias{DiffusionMaps} \title{Diffusion Maps} \description{ An S4 Class implementing Diffusion Maps } \details{ Diffusion Maps uses a diffusion probability matrix to robustly approximate a manifold. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ Diffusion Maps can take the following parameters: \describe{ \item{d}{a function transforming a matrix row wise into a distance matrix or \code{dist} object, e.g. \code{\link[stats]{dist}}.} \item{ndim}{The number of dimensions} \item{eps}{The epsilon parameter that determines the diffusion weight matrix from a distance matrix \code{d}, \eqn{exp(-d^2/eps)}, if set to \code{"auto"} it will be set to the median distance to the 0.01*n nearest neighbor.} \item{t}{Time-scale parameter. The recommended value, 0, uses multiscale geometry.} \item{delta}{Sparsity cut-off for the symmetric graph Laplacian, a higher value results in more sparsity and faster calculation. The predefined value is 10^-5.} } } \section{Implementation}{ Wraps around \code{\link[diffusionMap]{diffuse}}, see there for details. It uses the notation of Richards et al. (2009) which is slightly different from the one in the original paper (Coifman and Lafon, 2006) and there is no \eqn{\alpha} parameter. There is also an out-of-sample extension, see examples. } \examples{ dat <- loadDataSet("3D S Curve") ## use the S4 Class directly: diffmap <- DiffusionMaps() emb <- diffmap@fun(dat, diffmap@stdpars) ## simpler, use embed(): emb2 <- embed(dat, "DiffusionMaps") plot(emb, type = "2vars") samp <- sample(floor(nrow(dat) / 10)) embsamp <- diffmap@fun(dat[samp], diffmap@stdpars) embother <- embsamp@apply(dat[-samp]) plot(embsamp, type = "2vars") points(embother) } \references{ Richards, J.W., Freeman, P.E., Lee, A.B., Schafer, C.M., 2009. Exploiting Low-Dimensional Structure in Astronomical Spectra. ApJ 691, 32. doi:10.1088/0004-637X/691/1/32 Coifman, R.R., Lafon, S., 2006. Diffusion maps. Applied and Computational Harmonic Analysis 21, 5-30. doi:10.1016/j.acha.2006.04.006 } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/makeKNNgraph.Rd0000644000176200001440000000141513065033470015030 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{makeKNNgraph} \alias{makeKNNgraph} \title{makeKNNgraph} \usage{ makeKNNgraph(x, k, eps = 0, diag = FALSE) } \arguments{ \item{x}{data, a matrix, observations in rows, dimensions in columns} \item{k}{the number of nearest neighbors.} \item{eps}{number, if \code{eps > 0} the KNN search is approximate, see \code{\link[RANN]{nn2}}} \item{diag}{logical, if \code{TRUE} every edge of the returned graph will have an edge with weight \code{0} to itself.} } \value{ an object of type \code{\link[igraph]{igraph}} with edge weight being the distances. } \description{ Create a K-nearest neighbor graph from data x. Uses \code{\link[RANN]{nn2}} as a fast way to find the neares neighbors. } dimRed/man/AUC_lnK_R_NX-dimRedResult-method.Rd0000644000176200001440000000252513065033470020506 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{AUC_lnK_R_NX,dimRedResult-method} \alias{AUC_lnK_R_NX,dimRedResult-method} \alias{AUC_lnK_R_NX} \title{Method AUC_lnK_R_NX} \usage{ \S4method{AUC_lnK_R_NX}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculate the Area under the R_NX(ln K), used in Lee et. al. (2013). } \references{ Lee, J.A., Renard, E., Bernard, G., Dupont, P., Verleysen, M., 2013. Type 1 and 2 mixtures of Kullback-Leibler divergences as cost functions in dimensionality reduction based on similarity preservation. Neurocomputing. 112, 92-107. doi:10.1016/j.neucom.2012.12.036 } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/installSuggests.Rd0000644000176200001440000000112513065033470015713 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{installSuggests} \alias{installSuggests} \title{getSuggests} \usage{ installSuggests() } \description{ Install packages wich are suggested by dimRed. } \details{ By default dimRed will not install all the dependencies, because there are quite a lot and in case some of them are not available for your platform you will not be able to install dimRed without problems. To solve this I provide a function which automatically installes all the suggested packages. } \examples{ \dontrun{ installSuggests() } } dimRed/man/getPars.Rd0000644000176200001440000000045113065033470014126 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{getPars} \alias{getPars} \title{Method getPars} \usage{ getPars(object, ...) } \arguments{ \item{object}{The object to be converted.} \item{...}{other arguments.} } \description{ Extracts the pars slot. } dimRed/man/distance_correlation-dimRedResult-method.Rd0000644000176200001440000000217313065033470022576 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{distance_correlation,dimRedResult-method} \alias{distance_correlation,dimRedResult-method} \alias{distance_correlation} \title{Method distance_correlation} \usage{ \S4method{distance_correlation}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculate the distance correlation between the distance matrices in high and low dimensioal space. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/DrL-class.Rd0000644000176200001440000000446013065033470014311 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/graph_embed.R \docType{class} \name{DrL-class} \alias{DrL-class} \alias{DrL} \title{Distributed Recursive Graph Layout} \description{ An S4 Class implementing Distributed recursive Graph Layout. } \details{ DrL uses a complex algorithm to avoid local minima in the graph embedding which uses several steps. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ DrL can take the following parameters: \describe{ \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3} \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.} \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.} } } \section{Implementation}{ Wraps around \code{\link[igraph]{layout_with_drl}}. The parameters maxiter, epsilon and kkconst are set to the default values and cannot be set, this may change in a future release. The DimRed Package adds an extra sparsity parameter by constructing a knn graph which also may improve visualization quality. } \examples{ \dontrun{ dat <- loadDataSet("Swiss Roll", n = 500) ## use the S4 Class directly: drl <- DrL() emb <- drl@fun(dat, drl@stdpars) ## simpler, use embed(): emb2 <- embed(dat, "DrL") plot(emb) } } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/reconstruction_rmse-dimRedResult-method.Rd0000644000176200001440000000221613065033470022510 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{reconstruction_rmse,dimRedResult-method} \alias{reconstruction_rmse,dimRedResult-method} \alias{reconstruction_rmse} \title{Method reconstruction_rmse} \usage{ \S4method{reconstruction_rmse}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculate the reconstruction root mean squared error a dimensionality reduction, the method must have an inverse mapping. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/dimRed-package.Rd0000644000176200001440000000263013065033470015317 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimRed.R \docType{package} \name{dimRed-package} \alias{dimRed} \alias{dimRed-package} \title{The dimRed package} \description{ This package simplifies dimensionality reduction in R by providing a framework of S4 classes and methods. dimRed collects dimensionality reduction methods that are implemented in R and implements others. It gives them a common interface and provides plotting functions for visualization and functions for quality assessment. Funding provided by the Department for Biogeochemical Integration, Empirical Inference of the Earth System Group, at the Max Plack Institute for Biogeochemistry, Jena. } \references{ Lee, J.A., Renard, E., Bernard, G., Dupont, P., Verleysen, M., 2013. Type 1 and 2 mixtures of Kullback-Leibler divergences as cost functions in dimensionality reduction based on similarity preservation. Neurocomputing. 112, 92-107. doi:10.1016/j.neucom.2012.12.036 Lee, J.A., Lee, J.A., Verleysen, M., 2008. Rank-based quality assessment of nonlinear dimensionality reduction. Proceedings of ESANN 2008 49-54. Chen, L., Buja, A., 2006. Local Multidimensional Scaling for Nonlinear Dimension Reduction, Graph Layout and Proximity Analysis. } \seealso{ Useful links: \itemize{ \item \url{https://github.com/gdkrmr/dimRed} } } \author{ \strong{Maintainer}: Guido Kraemer \email{gkraemer@bgc-jena.mpg.de} } dimRed/man/getOrgData.Rd0000644000176200001440000000047513065033470014550 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{getOrgData} \alias{getOrgData} \title{Method getOrgData} \usage{ getOrgData(object, ...) } \arguments{ \item{object}{The object to extract data from.} \item{...}{other arguments.} } \description{ Extract the Original data. } dimRed/man/dimRedResult-class.Rd0000644000176200001440000000674413065033470016242 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimRedResult-class.R \docType{class} \name{dimRedResult-class} \alias{dimRedResult-class} \alias{dimRedResult} \alias{predict,dimRedResult-method} \alias{inverse,dimRedResult-method} \alias{inverse} \alias{as.data.frame,dimRedResult-method} \alias{getPars,dimRedResult-method} \alias{print,dimRedResult-method} \alias{getOrgData,dimRedResult-method} \alias{getDimRedData,dimRedResult-method} \alias{ndims,dimRedResult-method} \title{Class "dimRedResult"} \usage{ \S4method{predict}{dimRedResult}(object, xnew) \S4method{inverse}{dimRedResult}(object, ynew) \S4method{as.data.frame}{dimRedResult}(x, org.data.prefix = "org.", meta.prefix = "meta.", data.prefix = "") \S4method{getPars}{dimRedResult}(object) \S4method{print}{dimRedResult}(x) \S4method{getOrgData}{dimRedResult}(object) \S4method{getDimRedData}{dimRedResult}(object) \S4method{ndims}{dimRedResult}(object) } \arguments{ \item{object}{Of class \code{dimRedResult}} \item{xnew}{new data, of type \code{\link{dimRedData}}} \item{ynew}{embedded data, of type \code{\link{dimRedData}}} \item{x}{Of class \code{dimRedResult}} \item{org.data.prefix}{Prefix for the columns of the org.data slot.} \item{meta.prefix}{Prefix for the columns of \code{x@data@meta}.} \item{data.prefix}{Prefix for the columns of \code{x@data@data}.} } \description{ A class to hold the results of of a dimensionality reduction. } \section{Methods (by generic)}{ \itemize{ \item \code{predict}: apply a trained method to new data, does not work with all methods, will give an error if there is no \code{apply}. In some cases the apply function may only be an approximation. \item \code{inverse}: inverse transformation of embedded data, does not work with all methods, will give an error if there is no \code{inverse}. In some cases the apply function may only be an approximation. \item \code{as.data.frame}: convert to \code{data.frame} \item \code{getPars}: Get the parameters with which the method was called. \item \code{print}: Method for printing. \item \code{getOrgData}: Get the original data and meta.data \item \code{getDimRedData}: Get the embedded data \item \code{ndims}: Extract the number of embedding dimensions. }} \section{Slots}{ \describe{ \item{\code{data}}{Output data of class dimRedData.} \item{\code{org.data}}{original data, a matrix.} \item{\code{apply}}{a function to apply the method to out-of-sampledata, may not exist.} \item{\code{inverse}}{a function to calculate the original coordinates from reduced space, may not exist.} \item{\code{has.org.data}}{logical, if the original data is included in the object.} \item{\code{has.apply}}{logical, if a forward method is exists.} \item{\code{has.inverse}}{logical if an inverse method exists.} \item{\code{method}}{saves the method used.} \item{\code{pars}}{saves the parameters used.} }} \examples{ ## Create object by embedding data iris.pca <- embed(loadDataSet("Iris"), "PCA") ## Convert the result to a data.frame head(as(iris.pca, "data.frame")) head(as.data.frame(iris.pca)) ## There are no nameclashes to avoid here: head(as.data.frame(iris.pca, org.data.prefix = "", meta.prefix = "", data.prefix = "")) ## Print it more or less nicely: print(iris.pca) ## Get the embedded data as a dimRedData object: getDimRedData(iris.pca) ## Get the original data including meta information: getOrgData(iris.pca) ## Get the number of variables: ndims(iris.pca) } dimRed/man/Q_local-dimRedResult-method.Rd0000644000176200001440000000206513065033470017755 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{Q_local,dimRedResult-method} \alias{Q_local,dimRedResult-method} \alias{Q_local} \title{Method Q_local} \usage{ \S4method{Q_local}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculate the Q_local score to assess the quality of a dimensionality reduction. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/MDS-class.Rd0000644000176200001440000000422713065033470014254 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mds.R \docType{class} \name{MDS-class} \alias{MDS-class} \alias{MDS} \title{Metric Dimensional Scaling} \description{ An S4 Class implementing classical scaling (MDS). } \details{ MDS tries to maintain distances in high- and low-dimensional space, it has the advantage over PCA that arbitrary distance functions can be used, but it is computationally more demanding. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ MDS can take the following parameters: \describe{ \item{ndim}{The number of dimensions.} \item{d}{The function to calculate the distance matrix from the input coordinates, defaults to euclidean distances.} } } \section{Implementation}{ Wraps around \code{\link[stats]{cmdscale}}. The implementation also provides an out-of-sample extension which is not completely optimized yet. } \examples{ \dontrun{ dat <- loadDataSet("3D S Curve") ## Use the S4 Class directly: mds <- MDS() emb <- mds@fun(dat, mds@stdpars) ## use embed(): emb2 <- embed(dat, "MDS", d = function(x) exp(stats::dist(x))) plot(emb, type = "2vars") plot(emb2, type = "2vars") } } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/KamadaKawai-class.Rd0000644000176200001440000000512213065033470015757 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/graph_embed.R \docType{class} \name{KamadaKawai-class} \alias{KamadaKawai-class} \alias{KamadaKawai} \title{Graph Embedding via the Kamada Kawai Algorithm} \description{ An S4 Class implementing the Kamada Kawai Algorithm for graph embedding. } \details{ Graph embedding algorithms se the data as a graph. Between the nodes of the graph exist attracting and repelling forces which can be modeled as electrical fields or springs connecting the nodes. The graph is then forced into a lower dimensional representation that tries to represent the forces betweent he nodes accurately by minimizing the total energy of the attracting and repelling forces. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ KamadaKawai can take the following parameters: \describe{ \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3} \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.} \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.} } } \section{Implementation}{ Wraps around \code{\link[igraph]{layout_with_kk}}. The parameters maxiter, epsilon and kkconst are set to the default values and cannot be set, this may change in a future release. The DimRed Package adds an extra sparsity parameter by constructing a knn graph which also may improve visualization quality. } \examples{ dat <- loadDataSet("Swiss Roll", n = 500) kamada_kawai <- KamadaKawai() kk <- kamada_kawai@fun(dat, kamada_kawai@stdpars) plot(kk@data@data) } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/mean_R_NX-dimRedResult-method.Rd0000644000176200001440000000207713065033470020214 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{mean_R_NX,dimRedResult-method} \alias{mean_R_NX,dimRedResult-method} \alias{mean_R_NX} \title{Method mean_R_NX} \usage{ \S4method{mean_R_NX}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculate the mean_R_NX score to assess the quality of a dimensionality reduction. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/LCMC-dimRedResult-method.Rd0000644000176200001440000000213713065033470017121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{LCMC,dimRedResult-method} \alias{LCMC,dimRedResult-method} \alias{LCMC} \title{Method LCMC} \usage{ \S4method{LCMC}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculates the Local Continuity Meta Criterion, which is \code{\link{Q_NX}} adjusted for random overlap inside the K-ary neighborhood. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/FruchtermanReingold-class.Rd0000644000176200001440000000430113065033470017564 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/graph_embed.R \docType{class} \name{FruchtermanReingold-class} \alias{FruchtermanReingold-class} \alias{FruchtermanReingold} \title{Fruchterman Reingold Graph Layout} \description{ An S4 Class implementing the Fruchterman Reingold Graph Layout algorithm. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ \describe{ \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3} \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.} \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.} } } \section{Implementation}{ Wraps around \code{\link[igraph]{layout_with_fr}}, see there for details. The Fruchterman Reingold algorithm puts the data into a circle and puts connected points close to each other. } \examples{ dat <- loadDataSet("Swiss Roll", n = 100) ## use the S4 Class directly: fruchterman_reingold <- FruchtermanReingold() pars <- fruchterman_reingold@stdpars pars$knn <- 5 emb <- fruchterman_reingold@fun(dat, pars) ## simpler, use embed(): emb2 <- embed(dat, "FruchtermanReingold", knn = 5) plot(emb, type = "2vars") } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/tSNE-class.Rd0000644000176200001440000000531113065033470014435 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tsne.R \docType{class} \name{tSNE-class} \alias{tSNE-class} \alias{tSNE} \title{t-Distributed Stochastic Neighborhood Embedding} \description{ An S4 Class for t-SNE. } \details{ t-SNE is a method that uses Kullback-Leibler divergence between the distance matrices in high and low-dimensional space to embed the data. The method is very well suited to visualize complex structures in low dimensions. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ t-SNE can take the following parameters: \describe{ \item{d}{A distance function, defaults to euclidean distances} \item{perplexity}{The perplexity parameter, roughly equivalent to neighborhood size.} \item{theta}{Approximation for the nearest neighbour search, large values are more inaccurate.} \item{ndim}{The number of embedding dimensions.} } } \section{Implementation}{ Wraps around \code{\link[Rtsne]{Rtsne}}, which is very well documented. Setting \code{theta = 0} does a normal t-SNE, larger values for \code{theta < 1} use the Barnes-Hut algorithm which scales much nicer with data size. Larger values for perplexity take larger neighborhoods into account. } \examples{ \dontrun{ dat <- loadDataSet("3D S Curve", n = 500) ## using the S4 class directly: tsne <- tSNE() emb <- tsne@fun(dat, tsne@stdpars) ## using embed() emb2 <- embed(dat, "tSNE", perplexity = 80) plot(emb, type = "2vars") plot(emb2, type = "2vars") } } \references{ Maaten, L. van der, 2014. Accelerating t-SNE using Tree-Based Algorithms. Journal of Machine Learning Research 15, 3221-3245. van der Maaten, L., Hinton, G., 2008. Visualizing Data using t-SNE. J. Mach. Learn. Res. 9, 2579-2605. } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}} } dimRed/man/Isomap-class.Rd0000644000176200001440000000474613065033470015067 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/isomap.R \docType{class} \name{Isomap-class} \alias{Isomap-class} \alias{Isomap} \title{Isomap embedding} \description{ An S4 Class implementing the Isomap Algorithm } \details{ The Isomap algorithm approximates a manifold using geodesic distances on a k nearest neighbor graph. Then classical scaling is performed on the resulting distance matrix. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ Isomap can take the following parameters: \describe{ \item{knn}{The number of nearest neighbors in the graph. Defaults to 50.} \item{ndim}{The number of embedding dimensions, defaults to 2.} } } \section{Implementation}{ The dimRed package uses its own implementation of Isomap which also comes with an out of sample extension (known as landmark Isomap). The default Isomap algorithm scales computationally not very well, the implementation here uses \code{\link[RANN]{nn2}} for a faster search of the neares neighbors. If data are too large it may be useful to fit a subsample of the data and use the out-of-sample extension for the other points. } \examples{ dat <- loadDataSet("3D S Curve", n = 500) ## use the S4 Class directly: isomap <- Isomap() emb <- isomap@fun(dat, isomap@stdpars) ## or simpler, use embed(): samp <- sample(nrow(dat), size = 200) emb2 <- embed(dat[samp], "Isomap", mute = NULL, knn = 10) emb3 <- emb2@apply(dat[-samp]) plot(emb2, type = "2vars") plot(emb3, type = "2vars") } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/mixColorRamps.Rd0000644000176200001440000000237513065033470015327 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mixColorSpaces.R \name{mixColorRamps} \alias{mixColorRamps} \alias{mixColor1Ramps} \alias{mixColor2Ramps} \alias{mixColor3Ramps} \title{Mixing color ramps} \usage{ mixColorRamps(vars, ramps) mixColor1Ramps(vars, ramps = colorRamp(c("blue", "black", "red"))) mixColor2Ramps(vars, ramps = list(colorRamp(c("blue", "green")), colorRamp(c("blue", "red")))) mixColor3Ramps(vars, ramps = list(colorRamp(c("#001A00", "#00E600")), colorRamp(c("#00001A", "#0000E6")), colorRamp(c("#1A0000", "#E60000")))) } \arguments{ \item{vars}{a list of variables} \item{ramps}{a list of color ramps, one for each variable.} } \description{ mix different color ramps } \details{ automatically create colors to represent a varying number of dimensions. } \examples{ cols <- expand.grid(x = seq(0, 1, length.out = 10), y = seq(0, 1, length.out = 10), z = seq(0, 1, length.out = 10)) mixed <- mixColor3Ramps(cols) \dontrun{ library(rgl) plot3d(cols$x, cols$y, cols$z, col = mixed, pch = 15) cols <- expand.grid(x = seq(0, 1, length.out = 10), y = seq(0, 1, length.out = 10)) mixed <- mixColor2Ramps(cols) } plot(cols$x, cols$y, col = mixed, pch = 15) } dimRed/man/reconstruction_error-dimRedResult-method.Rd0000644000176200001440000000366713065033470022706 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{reconstruction_error,dimRedResult-method} \alias{reconstruction_error,dimRedResult-method} \alias{reconstruction_error} \title{Method reconstruction_error} \usage{ \S4method{reconstruction_error}{dimRedResult}(object, n = seq_len(ndims(object)), error_fun = "rmse") } \arguments{ \item{object}{of class dimRedResult} \item{n}{a positive integer or vector of integers \code{<= ndims(object)}} \item{error_fun}{a function or string indicating an error function.} } \value{ a vector of number with the same length as \code{n} with the } \description{ Calculate the error using only the first \code{n} dimensions of the embedded data. \code{error_fun} can either be one of \code{c("rmse", "mae")} to calculate the root mean square error or the mean absolute error respectively, or a function that takes to equally sized vectors as input and returns a single number as output. } \examples{ \dontrun{ ir <- loadDataSet("Iris") ir.drr <- embed(ir, "DRR", ndim = ndims(ir)) ir.pca <- embed(ir, "PCA", ndim = ndims(ir)) rmse <- data.frame( rmse_drr = reconstruction_error(ir.drr), rmse_pca = reconstruction_error(ir.pca) ) matplot(rmse, type = "l") plot(ir) plot(ir.drr) plot(ir.pca) } } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } \author{ Guido Kraemer } dimRed/man/dimRedMethodList.Rd0000644000176200001440000000103013065033470015714 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimRedMethod-class.R \name{dimRedMethodList} \alias{dimRedMethodList} \title{dimRedMethodList} \usage{ dimRedMethodList() } \value{ a character vector with the names of classes that inherit from \code{dimRedMethod}. } \description{ Get the names of all methods for dimensionality reduction. } \details{ Returns the name of all classes that inherit from \code{\link{dimRedMethod-class}} to use with \code{\link{embed}}. } \examples{ dimRedMethodList() } dimRed/man/R_NX-dimRedResult-method.Rd0000644000176200001440000000222713065033470017211 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{R_NX,dimRedResult-method} \alias{R_NX,dimRedResult-method} \alias{R_NX} \title{Method R_NX} \usage{ \S4method{R_NX}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculate the R_NX score from Lee et. al. (2013) which shows the neighborhood preservation for the Kth nearest neighbors, corrected for random point distributions and scaled to range [0, 1]. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/DRR-class.Rd0000644000176200001440000001053713065033470014261 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/drr.R \docType{class} \name{DRR-class} \alias{DRR-class} \alias{DRR} \title{Dimensionality Reduction via Regression} \description{ An S4 Class implementing Dimensionality Reduction via Regression (DRR). } \details{ DRR is a non-linear extension of PCA that uses Kernel Ridge regression. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ DRR can take the following parameters: \describe{ \item{ndim}{The number of dimensions} \item{lambda}{The regularization parameter for the ridge regression.} \item{kernel}{The kernel to use for KRR, defaults to \code{"rbfdot"}.} \item{kernel.pars}{A list with kernel parameters, elements depend on the kernel used, \code{"rbfdot"} uses \code{"sigma"}.} \item{pca}{logical, should an initial pca step be performed, defaults to \code{TRUE}.} \item{pca.center}{logical, should the data be centered before the pca step. Defaults to \code{TRUE}.} \item{pca.scale}{logical, should the data be scaled before the pca ste. Defaults to \code{FALSE}.} \item{fastcv}{logical, should \code{\link[CVST]{fastCV}} from the CVST package be used instead of normal cross-validation.} \item{fastcv.test}{If \code{fastcv = TRUE}, separate test data set for fastcv.} \item{cv.folds}{if \code{fastcv = FALSE}, specifies the number of folds for crossvalidation.} \item{fastkrr.nblocks}{integer, higher values sacrifice numerical accuracy for speed and less memory, see below for details.} \item{verbose}{logical, should the cross-validation results be printed out.} } } \section{Implementation}{ Wraps around \code{\link[DRR]{drr}}, see there for details. DRR is a non-linear extension of principal components analysis using Kernel Ridge Regression (KRR, details see \code{\link[CVST]{constructKRRLearner}} and \code{\link[DRR]{constructFastKRRLearner}}). Non-linear regression is used to explain more variance than PCA. DRR provides an out-of-sample extension and a backward projection. The most expensive computations are matrix inversions therefore the implementation profits a lot from a multithreaded BLAS library. The best parameters for each KRR are determined by cross-validaton over all parameter combinations of \code{lambda} and \code{kernel.pars}, using less parameter values will speed up computation time. Calculation of KRR can be accelerated by increasing \code{fastkrr.nblocks}, it should be smaller than n^{1/3} up to sacrificing some accuracy, for details see \code{\link[DRR]{constructFastKRRLearner}}. Another way to speed up is to use \code{pars$fastcv = TRUE} which might provide a more efficient way to search the parameter space but may also miss the global maximum, I have not ran tests on the accuracy of this method. } \examples{ \dontrun{ dat <- loadDataSet("variable Noise Helix", n = 200)[sample(200)] ## use the S4 Class directly: drr <- DRR() pars <- drr@stdpars pars$ndim <- 3 emb <- drr@fun(dat, pars) ## simpler, use embed(): emb2 <- embed(dat, "DRR", ndim = 3) plot(dat, type = "3vars") plot(emb, type = "3vars") plot(emb@inverse(emb@data@data[, 1, drop = FALSE]), type = "3vars") } } \references{ Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality Reduction via Regression in Hyperspectral Imagery. IEEE Journal of Selected Topics in Signal Processing 9, 1026-1036. doi:10.1109/JSTSP.2015.2417833 } \seealso{ Other dimensionality reduction methods: \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/print.Rd0000644000176200001440000000047713065033470013665 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{print} \alias{print} \title{Method print} \usage{ print(x, ...) } \arguments{ \item{x}{The object to be printed.} \item{...}{Other arguments for printing.} } \description{ Imports the print method into the package namespace. } dimRed/man/quality.Rd0000644000176200001440000001076613065033470014223 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{quality,dimRedResult-method} \alias{quality,dimRedResult-method} \alias{quality} \alias{quality.dimRedResult} \alias{dimRedQualityList} \title{Quality Criteria for dimensionality reduction.} \usage{ \S4method{quality}{dimRedResult}(.data, .method = dimRedQualityList(), .mute = character(0), ...) dimRedQualityList() } \arguments{ \item{.data}{object of class \code{dimRedResult}} \item{.method}{character vector naming one of the methods} \item{.mute}{what output from the embedding method should be muted.} \item{...}{the pameters, internally passed as a list to the quality method as \code{pars = list(...)}} } \value{ a number } \description{ A collection of functions to compute quality measures on \code{\link{dimRedResult}} objects. } \section{Methods (by class)}{ \itemize{ \item \code{dimRedResult}: Calculate a quality index from a dimRedResult object. }} \section{Implemented methods}{ Method must be one of \code{"\link{Q_local}", "\link{Q_global}", "\link{mean_R_NX}", "\link{total_correlation}", "\link{cophenetic_correlation}", "\link{distance_correlation}", "\link{reconstruction_rmse}"} } \section{Rank based criteria}{ \code{Q_local}, \code{Q_global}, and \code{mean_R_nx} are quality criteria based on the Co-ranking matrix. \code{Q_local} and \code{Q_global} determine the local/global quality of the embedding, while \code{mean_R_nx} determines the quality of the overall embedding. They are parameter free and return a single number. The object must include the original data. The number returns is in the range [0, 1], higher values mean a better local/global embedding. } \section{Correlation based criteria}{ \code{total_correlation} calculates the sum of the mean squared correlations of the original axes with the axes in reduced dimensions, because some methods do not care about correlations with axes, there is an option to rotate data in reduced space to maximize this criterium. The number may be greater than one if more dimensions are summed up. \code{cophenetic_correlation} calculate the correlation between the lower triangles of distance matrices, the correlation and distance methods may be specified. The result is in range [-1, 1]. \code{distance_correlation} measures the independes of samples by calculating the correlation of distances. For details see \code{\link[energy]{dcor}}. } \section{Reconstruction error}{ \code{reconstruction_rmse} calculates the root mean squared error of the reconstrucion. \code{object} requires an inverse function. } \examples{ \dontrun{ embed_methods <- dimRedMethodList() quality_methods <- dimRedQualityList() scurve <- loadDataSet("3D S Curve", n = 500) quality_results <- matrix(NA, length(embed_methods), length(quality_methods), dimnames = list(embed_methods, quality_methods)) embedded_data <- list() for (e in embed_methods) { message("embedding: ", e) embedded_data[[e]] <- embed(scurve, e, .mute = c("message", "output")) for (q in quality_methods) { message(" quality: ", q) quality_results[e, q] <- tryCatch( quality(embedded_data[[e]], q), error = function (e) NA ) } } print(quality_results) } } \references{ Lueks, W., Mokbel, B., Biehl, M., Hammer, B., 2011. How to Evaluate Dimensionality Reduction? - Improving the Co-ranking Matrix. arXiv:1110.3917 [cs]. Szekely, G.J., Rizzo, M.L., Bakirov, N.K., 2007. Measuring and testing dependence by correlation of distances. Ann. Statist. 35, 2769-2794. doi:10.1214/009053607000000505 Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015. Multi-scale similarities in stochastic neighbour embedding: Reducing dimensionality while preserving both local and global structure. Neurocomputing, 169, 246-261. doi:10.1016/j.neucom.2014.12.095 } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_global,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } \author{ Guido Kraemer } dimRed/man/as.dimRedData.Rd0000644000176200001440000000050713065033470015123 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{as.dimRedData} \alias{as.dimRedData} \title{Converts to dimRedData} \usage{ as.dimRedData(formula, ...) } \arguments{ \item{formula}{a formula object.} \item{...}{other arguments.} } \description{ Conversion functions to dimRedData. } dimRed/man/Q_global-dimRedResult-method.Rd0000644000176200001440000000207213065033470020121 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality.R \docType{methods} \name{Q_global,dimRedResult-method} \alias{Q_global,dimRedResult-method} \alias{Q_global} \title{Method Q_global} \usage{ \S4method{Q_global}{dimRedResult}(object) } \arguments{ \item{object}{of class dimRedResult} } \description{ Calculate the Q_global score to assess the quality of a dimensionality reduction. } \seealso{ Other Quality scores for dimensionality reduction: \code{\link{AUC_lnK_R_NX,dimRedResult-method}}, \code{\link{LCMC,dimRedResult-method}}, \code{\link{Q_NX,dimRedResult-method}}, \code{\link{Q_local,dimRedResult-method}}, \code{\link{R_NX,dimRedResult-method}}, \code{\link{cophenetic_correlation,dimRedResult-method}}, \code{\link{distance_correlation,dimRedResult-method}}, \code{\link{mean_R_NX,dimRedResult-method}}, \code{\link{quality,dimRedResult-method}}, \code{\link{reconstruction_error,dimRedResult-method}}, \code{\link{reconstruction_rmse,dimRedResult-method}}, \code{\link{total_correlation,dimRedResult-method}} } dimRed/man/dimRedData-class.Rd0000644000176200001440000000656613065033470015637 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimRedData-class.R \docType{class} \name{dimRedData-class} \alias{dimRedData-class} \alias{dimRedData} \alias{as.data.frame,dimRedData-method} \alias{as.dimRedData,formula-method} \alias{getData,dimRedData-method} \alias{getMeta,dimRedData-method} \alias{nrow,dimRedData-method} \alias{[,dimRedData,ANY,ANY,ANY-method} \alias{ndims,dimRedData-method} \title{Class "dimRedData"} \usage{ \S4method{as.data.frame}{dimRedData}(x, meta.prefix = "meta.", data.prefix = "") \S4method{as.dimRedData}{formula}(formula, data) \S4method{getData}{dimRedData}(object) \S4method{getMeta}{dimRedData}(object) \S4method{nrow}{dimRedData}(x) \S4method{[}{dimRedData,ANY,ANY,ANY}(x, i) \S4method{ndims}{dimRedData}(object) } \arguments{ \item{x}{Of class dimRedData} \item{meta.prefix}{Prefix for the columns of the meta data names.} \item{data.prefix}{Prefix for the columns of the variable names.} \item{formula}{The formula, left hand side is assigned to the meta slot right hand side is assigned to the data slot.} \item{data}{A data frame} \item{object}{Of class dimRedData.} \item{i}{a valid index for subsetting rows.} } \description{ A class to hold data for dimensionality reduction and methods. } \details{ The class hast two slots, \code{data} and \code{meta}. The \code{data} slot contains a \code{numeric matrix} with variables in columns and observations in rows. The \code{meta} slot may contain a \code{data.frame} with additional information. Both slots need to have the same number of rows or the \code{meta} slot needs to contain an empty \code{data.frame}. See examples for easy conversion from and to \code{data.frame}. For plotting functions see \code{\link{plot.dimRedData}}. } \section{Methods (by generic)}{ \itemize{ \item \code{as.data.frame}: convert to data.frame \item \code{as.dimRedData}: Convert a \code{data.frame} to a dimRedData object using a formula \item \code{getData}: Get the data slot. \item \code{getMeta}: Get the meta slot. \item \code{nrow}: Get the number of observations. \item \code{[}: Subset rows. \item \code{ndims}: Extract the number of Variables from the data. }} \section{Slots}{ \describe{ \item{\code{data}}{of class \code{matrix}, holds the data, observations in rows, variables in columns} \item{\code{meta}}{of class \code{data.frame}, holds meta data such as classes, internal manifold coordinates, or simply additional data of the data set. Must have the same number of rows as the \code{data} slot or be an empty data frame.} }} \examples{ ## Load an example data set: s3d <- loadDataSet("3D S Curve") ## Create using a constructor: ### without meta information: dimRedData(iris[, 1:4]) ### with meta information: dimRedData(iris[, 1:4], iris[, 5]) ### using slot names: dimRedData(data = iris[, 1:4], meta = iris[, 5]) ## Convert to a dimRedData objects: Iris <- as(iris[, 1:4], "dimRedData") ## Convert to data.frame: head(as(s3d, "data.frame")) head(as.data.frame(s3d)) head(as.data.frame(as(iris[, 1:4], "dimRedData"))) ## Extract slots: head(getData(s3d)) head(getMeta(s3d)) ## Get the number of observations: nrow(s3d) ## Subset: s3d[1:5, ] ## create a dimRedData object using a formula as.dimRedData(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, iris)[1:5] ## Shuffle data: s3 <- s3d[nrow(s3d)] ## Get the number of variables: ndims(s3d) } dimRed/man/FastICA-class.Rd0000644000176200001440000000411613065033470015040 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/fastica.R \docType{class} \name{FastICA-class} \alias{FastICA-class} \alias{FastICA} \title{Independent Component Analysis} \description{ An S4 Class implementing the FastICA algorithm for Indepentend Component Analysis. } \details{ ICA is used for blind signal separation of different sources. It is a linear Projection. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ FastICA can take the following parameters: \describe{ \item{ndim}{The number of output dimensions. Defaults to \code{2}} } } \section{Implementation}{ Wraps around \code{\link[fastICA]{fastICA}}. FastICA uses a very fast approximation for negentropy to estimate statistical independences between signals. Because it is a simple rotation/projection, forward and backward functions can be given. } \examples{ dat <- loadDataSet("3D S Curve") ## use the S4 Class directly: fastica <- FastICA() emb <- fastica@fun(dat, pars = list(ndim = 2)) ## simpler, use embed(): emb2 <- embed(dat, "FastICA", ndim = 2) plot(emb@data@data) } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/plot.Rd0000644000176200001440000000370313065033470013502 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R \docType{methods} \name{plot} \alias{plot} \alias{plot.dimRed} \alias{plot,dimRedData,ANY-method} \alias{plot.dimRedData} \alias{plot,dimRedResult,ANY-method} \alias{plot.dimRedResult} \title{Plotting of dimRed* objects} \usage{ plot(x, y, ...) \S4method{plot}{dimRedData,ANY}(x, type = "pairs", vars = seq_len(ncol(x@data)), col = seq_len(min(3, ncol(x@meta))), ...) \S4method{plot}{dimRedResult,ANY}(x, type = "pairs", vars = seq_len(ncol(x@data@data)), col = seq_len(min(3, ncol(x@data@meta))), ...) } \arguments{ \item{x}{dimRedResult/dimRedData class, e.g. output of embedded/loadDataSet} \item{y}{Ignored} \item{...}{handed over to the underlying plotting function.} \item{type}{plot type, one of \code{c("pairs", "parallel", "2vars", "3vars", "3varsrgl")}} \item{vars}{the axes of the embedding to use for plotting} \item{col}{the columns of the meta slot to use for coloring, can be referenced as the column names or number of x@data} } \description{ Plots a object of class dimRedResult and dimRedData. For the documentation of the plotting function in base see here: \code{\link{plot.default}}. } \details{ Plotting functions for the classes usind in \code{dimRed}. they are intended to give a quick overview over the results, so they are somewhat inflexible, e.g. it is hard to modify color scales or plotting parameters. If you require more control over plotting, it is better to convert the object to a \code{data.frame} first and use the standard functions for plotting. } \section{Methods (by class)}{ \itemize{ \item \code{x = dimRedData,y = ANY}: Ploting of dimRedData objects \item \code{x = dimRedResult,y = ANY}: Ploting of dimRedResult objects. }} \examples{ scurve = loadDataSet("3D S Curve") plot(scurve, type = "pairs", main = "pairs plot of S curve") plot(scurve, type = "parpl") plot(scurve, type = "2vars", vars = c("y", "z")) plot(scurve, type = "3vars") } dimRed/man/ndims.Rd0000644000176200001440000000050613065033470013634 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{ndims} \alias{ndims} \title{Method ndims} \usage{ ndims(object, ...) } \arguments{ \item{object}{To extract the number of dimensions from.} \item{...}{Arguments for further methods} } \description{ Extract the number of dimensions. } dimRed/man/getMeta.Rd0000644000176200001440000000045113065033470014107 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{getMeta} \alias{getMeta} \title{Method getMeta} \usage{ getMeta(object, ...) } \arguments{ \item{object}{The object to be converted.} \item{...}{other arguments.} } \description{ Extracts the meta slot. } dimRed/man/plot_R_NX.Rd0000644000176200001440000000213513065033470014366 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R \name{plot_R_NX} \alias{plot_R_NX} \title{plot_R_NX} \usage{ plot_R_NX(x) } \arguments{ \item{x}{a list of \code{\link{dimRedResult}} objects. The names of the list will appear in the legend with the AUC_lnK value.} } \value{ A ggplot object, the design can be changed by appending \code{theme(...)} } \description{ Plot the R_NX curve for different embeddings. Takes a list of \code{\link{dimRedResult}} objects as input. Also the Area under the curve values are computed for logarithmic K (AUC_lnK) and appear in the legend. } \examples{ ## define which methods to apply embed_methods <- c("Isomap", "PCA") ## load test data set data_set <- loadDataSet("3D S Curve", n = 1000) ## apply dimensionality reduction data_emb <- lapply(embed_methods, function(x) embed(data_set, x)) names(data_emb) <- embed_methods ## plot the R_NX curves: plot_R_NX(data_emb) + ggplot2::theme(legend.title = ggplot2::element_blank(), legend.position = c(0.5, 0.1), legend.justification = c(0.5, 0.1)) } dimRed/man/dataSets.Rd0000644000176200001440000000253413065033470014275 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dataSets.R \name{dataSets} \alias{dataSets} \alias{loadDataSet} \alias{dataSetList} \title{Example Data Sets for dimensionality reduction} \usage{ loadDataSet(name = dataSetList(), n = 2000, sigma = 0.05) dataSetList() } \arguments{ \item{name}{A character vector that specifies the name of the data set.} \item{n}{In generated data sets the number of points to be generated, else ignored.} \item{sigma}{In generated data sets the standard deviation of the noise added, else ignored.} } \value{ \code{loadDataSet} an object of class \code{\link{dimRedData}}. \code{dataSetList()} return a character string with the implemented data sets } \description{ A compilation of standard data sets that are often being used to showcase dimensionality reduction techniques. } \details{ The argument \code{name} should be one of \code{dataSetList()}. Partial matching is possible, see \code{\link{match.arg}}. Generated data sets contain the internal coordinates of the manifold in the \code{meta} slot. Call \code{dataSetList()} to see what data sets are available. } \examples{ ## a list of available data sets: dataSetList() ## Load a data set: swissRoll <- loadDataSet("Swiss Roll") \donttest{plot(swissRoll, type = "3vars")} ## Load Iris data set, partial matching: loadDataSet("I") } dimRed/man/getData.Rd0000644000176200001440000000040613065033470014072 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/misc.R \name{getData} \alias{getData} \title{Method getData} \usage{ getData(object) } \arguments{ \item{object}{The object to be converted.} } \description{ Extracts the data slot. } dimRed/man/LaplacianEigenmaps-class.Rd0000644000176200001440000000420313065033470017340 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/leim.R \docType{class} \name{LaplacianEigenmaps-class} \alias{LaplacianEigenmaps-class} \alias{LaplacianEigenmaps} \title{Laplacian Eigenmaps} \description{ An S4 Class implementing Laplacian Eigenmaps } \details{ Laplacian Eigenmaps use a kernel and were originally developed to separate non-convex clusters under the name spectral clustering. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ \code{LaplacianEigenmaps} can take the following parameters: \describe{ \item{ndim}{the number of output dimensions.} \item{sparse}{A character vector specifying hot to make the graph sparse, \code{"knn"} means that a K-nearest neighbor graph is constructed, \code{"eps"} an epsilon neighborhood graph is constructed, else a dense distance matrix is used.} \item{knn}{The number of nearest neighbors to use for the knn graph.} \item{eps}{The distance for the epsilon neighborhood graph.} \item{t}{Parameter for the transformation of the distance matrix by \eqn{w=exp(-d^2/t)}, larger values give less weight to differences in distance, \code{t == Inf} treats all distances != 0 equally.} \item{norm}{logical, should the normed laplacian be used?} } } \section{Implementation}{ Wraps around \code{\link[loe]{spec.emb}}. } \examples{ dat <- loadDataSet("3D S Curve") leim <- LaplacianEigenmaps() emb <- leim@fun(dat, leim@stdpars) plot(emb@data@data) } \references{ Belkin, M., Niyogi, P., 2003. Laplacian Eigenmaps for Dimensionality Reduction and Data Representation. Neural Computation 15, 1373. } dimRed/man/dimRedMethod-class.Rd0000644000176200001440000000316113065033470016172 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimRedMethod-class.R \docType{class} \name{dimRedMethod-class} \alias{dimRedMethod-class} \title{Class "dimRedMethod"} \description{ A virtual class "dimRedMethod" to serve as a template to implement methods for dimensionality reduction. } \details{ Implementations of dimensionality reductions should inherit from this class. The \code{fun} slot should be a function that takes three arguments \describe{ \item{data}{An object of class \code{\link{dimRedData}}.} \item{pars}{A list with the standard parameters.} \item{keep.org.data}{Logical. If the original data should be kept in the output.} } and returns an object of class \code{\link{dimRedResult}}. The \code{stdpars} slot should take a list that contains standard parameters for the implemented methods. This way the method can be called by \code{embed(data, "method-name", ...)}, where \code{...} can be used to to change single parameters. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding.} \item{\code{stdpars}}{A list with the default parameters for the \code{fun} slot.} }} \seealso{ \link{dimRedMethodList} Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{kPCA-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/man/kPCA-class.Rd0000644000176200001440000000403113065033470014400 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/kpca.R \docType{class} \name{kPCA-class} \alias{kPCA-class} \alias{kPCA} \title{Kernel PCA} \description{ An S4 Class implementing Kernel PCA } \details{ Kernel PCA is a nonlinear extension of PCA using kernel methods. } \section{Slots}{ \describe{ \item{\code{fun}}{A function that does the embedding and returns a dimRedResult object.} \item{\code{stdpars}}{The standard parameters for the function.} }} \section{General usage}{ Dimensionality reduction methods are S4 Classes that either be used directly, in which case they have to be initialized and a full list with parameters has to be handed to the \code{@fun()} slot, or the method name be passed to the embed function and parameters can be given to the \code{...}, in which case missing parameters will be replaced by the ones in the \code{@stdpars}. } \section{Parameters}{ Kernel PCA can take the following parameters: \describe{ \item{ndim}{the number of output dimensions, defaults to 2} \item{kernel}{The kernel function, either as a function or a character vector with the name of the kernel. Defaults to \code{"rbfdot"}} \item{kpar}{A list with the parameters for the kernel function} } } \section{Implementation}{ Wraps around \code{\link[kernlab]{kpca}}, but provides additionally forward and backward projections. } \examples{ \dontrun{ dat <- loadDataSet("3D S Curve") ## use the S4 class directly: kpca <- kPCA() emb <- kpca@fun(dat, kpca@stdpars) ## simpler, use embed(): emb2 <- embed(dat, "kPCA") plot(emb, type = "2vars") } } \seealso{ Other dimensionality reduction methods: \code{\link{DRR-class}}, \code{\link{DiffusionMaps-class}}, \code{\link{DrL-class}}, \code{\link{FastICA-class}}, \code{\link{FruchtermanReingold-class}}, \code{\link{HLLE-class}}, \code{\link{Isomap-class}}, \code{\link{KamadaKawai-class}}, \code{\link{LLE-class}}, \code{\link{MDS-class}}, \code{\link{PCA-class}}, \code{\link{dimRedMethod-class}}, \code{\link{nMDS-class}}, \code{\link{tSNE-class}} } dimRed/LICENSE0000644000176200001440000007674512772463050012514 0ustar liggesusersGNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright © 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. “This License” refers to version 3 of the GNU General Public License. “Copyright” also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. “The Program” refers to any copyrightable work licensed under this License. Each licensee is addressed as “you”. “Licensees” and “recipients” may be individuals or organizations. To “modify” a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a “modified version” of the earlier work or a work “based on” the earlier work. A “covered work” means either the unmodified Program or a work based on the Program. To “propagate” a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To “convey” a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays “Appropriate Legal Notices” to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The “source code” for a work means the preferred form of the work for making modifications to it. “Object code” means any non-source form of a work. A “Standard Interface” means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The “System Libraries” of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A “Major Component”, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The “Corresponding Source” for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to “keep intact all notices”. c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an “aggregate” if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A “User Product” is either (1) a “consumer product”, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, “normally used” refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. “Installation Information” for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. “Additional permissions” are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered “further restrictions” within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An “entity transaction” is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A “contributor” is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's “contributor version”. A contributor's “essential patent claims” are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, “control” includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a “patent license” is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To “grant” such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. “Knowingly relying” means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is “discriminatory” if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16.Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee.