DRR/inst/doc/comparePCA.Rmd 0000644 0001751 0000144 00000005116 12766467536 015110 0 ustar hornik users ---
title: "Comparing DRR and PCA"
author: "Guido Kraemer"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Compare DRR and PCA}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
This is an example application to compare the accuracy and
computational speed of DRR for different parameters to PCA.
## Load libraries
```{r, echo = TRUE, message = FALSE}
library(DRR)
```
## Read in data
```{r, echo = TRUE, warning = FALSE, error = FALSE}
data(iris)
in_data <- iris[,1:4]
npoints <- nrow(in_data)
nvars <- ncol(in_data)
for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
my_data <- scale(in_data[sample(npoints),], scale = FALSE)
```
## Fit the dimensionality reductions.
```{r, echo = TRUE, results = "hide", warning = FALSE, error = FALSE, message = FALSE}
t0 <- system.time(pca <- prcomp(my_data, center = FALSE, scale. = FALSE))
t1 <- system.time(drr.1 <- drr(my_data))
t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2))
t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5))
t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE))
```
## Plot the data
```{r, echo = FALSE, results = "hold"}
pairs(my_data , gap = 0, main = "iris")
pairs(pca$x , gap = 0, main = "pca")
pairs(drr.1$fitted.data, gap = 0, main = "drr.1")
pairs(drr.2$fitted.data, gap = 0, main = "drr.2")
pairs(drr.3$fitted.data, gap = 0, main = "drr.3")
pairs(drr.4$fitted.data, gap = 0, main = "drr.4")
```
## Calculate RMSE
```{r, echo = TRUE, tidy = TRUE}
rmse <- matrix(NA_real_, nrow = 5, ncol = nvars,
dimnames = list(c("pca", "drr.1", "drr.2", "drr.3", "drr.4"),
seq_len(nvars)))
for(i in seq_len(nvars)){
pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
rmse["pca", i] <- sqrt( sum( (my_data - pca_inv )^2) )
rmse["drr.1",i] <- sqrt( sum( (my_data - drr.1$inverse(drr.1$fitted.data[, 1:i, drop = FALSE]) )^2) )
rmse["drr.2",i] <- sqrt( sum( (my_data - drr.2$inverse(drr.2$fitted.data[, 1:i, drop = FALSE]) )^2) )
rmse["drr.3",i] <- sqrt( sum( (my_data - drr.3$inverse(drr.3$fitted.data[, 1:i, drop = FALSE]) )^2) )
rmse["drr.4",i] <- sqrt( sum( (my_data - drr.4$inverse(drr.4$fitted.data[, 1:i, drop = FALSE]) )^2) )
}
```
## The Results
More blocks for fastkrr speed up calculation, too are bad for
accuracy.
### RMSE
```{r, echo = FALSE}
print(rmse)
```
### Processing time
```{r, echo = FALSE}
print(rbind(pca = t0, drr.1 = t1, drr.2 = t2, drr.3 = t3, drr.4 = t4)[,1:3])
```
DRR/NAMESPACE 0000644 0001751 0000144 00000000206 12765462765 012160 0 ustar hornik users # Generated by roxygen2: do not edit by hand
export(constructFastKRRLearner)
export(drr)
import(CVST)
import(Matrix)
import(kernlab)
DRR/R/ 0000755 0001751 0000144 00000000000 12766467704 011143 5 ustar hornik users DRR/R/DRR-package.R 0000644 0001751 0000144 00000001465 12766466201 013303 0 ustar hornik users #' Dimensionality Reduction via Regression.
#'
#' DRR implements the Dimensionality Reduction via Regression using
#' Kernel Ridge Regression. It also adds a faster implementation of
#' Kernel Ridge regression that can be used with the CVST package.
#'
#' Thanks to the Max Planck Institute for Biogeochemistry in Jena for
#' the funding.
#'
#' @references
#' Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
#' Reduction via Regression in Hyperspectral Imagery. IEEE Journal
#' of Selected Topics in Signal Processing 9,
#' 1026-1036. doi:10.1109/JSTSP.2015.2417833
#' Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
#' Kernel Ridge Regression: A Distributed Algorithm with Minimax
#' Optimal Rates. arXiv:1305.5029 [cs, math, stat].
#'
#' @docType package
"_PACKAGE"
DRR/R/fastKRR.R 0000644 0001751 0000144 00000012540 12766461264 012577 0 ustar hornik users #' Fast implementation for Kernel Ridge Regression.
#'
#' Constructs a learner for the divide and conquer version of KRR.
#'
#' This function is to be used with the CVST package as a drop in
#' replacement for \code{\link[CVST]{constructKRRLearner}}. The
#' implementation approximates the inversion of the kernel Matrix
#' using the divide an conquer scheme, lowering computational and
#' memory complexity from \eqn{O(n^3)} and \eqn{O(n^2)} to
#' \eqn{O(n^3/m^2)} and \eqn{O(n^2/m^2)} respectively, where m are the
#' number of blocks to be used (parameter nblocks). Theoretically safe
#' values for \eqn{m} are \eqn{< n^{1/3}}, but practically \eqn{m} may
#' be a little bit larger. The function will issue a warning, if the
#' value for \eqn{m} is too large.
#'
#'
#'
#' @return Returns a learner similar to \code{\link[CVST]{constructKRRLearner}}
#' suitable for the use with \code{\link[CVST]{CV}} and
#' \code{\link[CVST]{fastCV}}.
#'
#' @seealso \code{\link[CVST]{constructLearner}}
#'
#' @references
#' Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
#' Kernel Ridge Regression: A Distributed Algorithm with Minimax
#' Optimal Rates. arXiv:1305.5029 [cs, math, stat].
#'
#' @examples
#' ns <- noisySinc(1000)
#' nsTest <- noisySinc(1000)
#'
#' fast.krr <- constructFastKRRLearner()
#' fast.p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns), nblocks = 4)
#' system.time(fast.m <- fast.krr$learn(ns, fast.p))
#' fast.pred <- fast.krr$predict(fast.m, nsTest)
#' sum((fast.pred - nsTest$y)^2) / getN(nsTest)
#'
#' \dontrun{
#' krr <- CVST::constructKRRLearner()
#' p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns))
#' system.time(m <- krr$learn(ns, p))
#' pred <- krr$predict(m, nsTest)
#' sum((pred - nsTest$y)^2) / getN(nsTest)
#'
#' plot(ns, col = '#00000030', pch = 19)
#' lines(sort(nsTest$x), fast.pred[order(nsTest$x)], col = '#00C000', lty = 2)
#' lines(sort(nsTest$x), pred[order(nsTest$x)], col = '#0000C0', lty = 2)
#' legend('topleft', legend = c('fast KRR', 'KRR'),
#' col = c('#00C000', '#0000C0'), lty = 2)
#' }
#'
#' @import Matrix
#' @import CVST
#' @import kernlab
#' @export
constructFastKRRLearner <- function () {
if(!requireNamespace("CVST")) stop("require the 'CVST' package")
if(!requireNamespace("kernlab")) stop("require 'kernlab' package")
learn.krr <- function(data, params) {
stopifnot(CVST::isRegression(data))
nblocks <- params$nblocks
nobs <- nrow(data$x)
if (log(nblocks) / log(nobs) > 1/3) {
warning(
"Number of blocks too large wrt. number of observations, log(m)/log(N) = ",
sprintf("%.2f", log(nblocks)), "/", sprintf("%.2f", log(nobs)),
" = ", sprintf("%.2f", log(nblocks)/log(nobs)),
", should be < 1/3, you results may suffer numerical inaccurracy. ",
"For detail see Zhang et. al. (2013)"
)
}
## make kernel function
kpar <- params[setdiff(names(params), c("kernel", "lambda", "nblocks"))]
kernel <- get_kernel_fun(params$kernel, kpar)
## make blocks for samples
shuff <- sample(1:nobs)
blocksizes <- makeBlocks(nobs, nblocks)
bends <- cumsum(blocksizes)
bstarts <- c(1, bends[-nblocks] + 1)
## we make nblock models for the subsamples
## this can be parallelized:
models <- list()
for(i in 1:nblocks) {
iIndices <- shuff[ bstarts[i]:bends[i] ]
models[[i]] <- krr(data$x[iIndices,], kernel, data$y[iIndices], params$lambda)
}
return(models)
} # end learn.krr
predict.krr <- function(models, newData) {
stopifnot(CVST::isRegression(newData))
nModels <- length(models)
pred <- rep(0, nrow(newData$x))
for(i in 1:nModels) {
pred <- pred + krr.predict(newData$x, models[[i]])
}
pred <- pred / nModels
return(as.matrix(pred))
}
return(CVST::constructLearner(learn.krr, predict.krr))
}
# dividing nobs into nblocks parts that have approximately the same size
#
# @param nobs total number of observations
# @param nblocks number of blocks
#
# @return vector of integers of length \code{nblocks} that sums up to
# \code{nobs}
#
makeBlocks <- function (nobs, nblocks) {
maxbs <- nobs %/% nblocks
rest <- nobs %% nblocks
res <- rep(maxbs, nblocks)
if(rest > 0) res[1:rest] <- maxbs + 1
return(res)
}
## get the kernel function out of the kernlab namespace:
get_kernel_fun <- function (kernel, pars) {
if (!methods::is(kernel,"kernel")) {
if (methods::is(kernel,"function")) {
kernel <- deparse(substitute(kernel))
} else {
kernel <- get(kernel, asNamespace('kernlab'))
}
kernel <- do.call(kernel, pars)
}
return(kernel)
}
## internal functions from cvst package, have to be here, because CVST
## does not export them and CRAN does not allow the use of unexported
## functions.
## CVST:::.krr
krr <- function (data, kernel, y, lambda) {
K <- kernlab::kernelMatrix(kernel, data)
N <- nrow(K)
alpha <- solve(Matrix(K + diag(lambda, N))) %*% y
return(list(data = data, kernel = kernel, alpha = alpha))
}
## CVST:::.krr.predict
krr.predict <- function (newData, krr) {
k <- kernlab::kernelMatrix(krr$kernel, newData, krr$data)
return(k %*% krr$alpha)
}
DRR/R/DRR.R 0000644 0001751 0000144 00000021615 12766464720 011715 0 ustar hornik users #' Dimensionality Reduction via Regression
#'
#'
#' \code{drr} Implements Dimensionality Reduction via Regression using
#' Kernel Ridge Regression.
#'
#'
#' Parameter combination will be formed and cross-validation used to
#' select the best combination. Cross-validation uses
#' \code{\link[CVST]{CV}} or \code{\link[CVST]{fastCV}}.
#'
#' Pre-treatment of the data using a PCA and scaling is made
#' \eqn{\alpha = Vx}. the representation in reduced dimensions is
#'
#' \deqn{y_i = \alpha - f_i(\alpha_1, \ldots, \alpha_{i-1})}
#'
#' then the final DRR representation is:
#'
#' \deqn{r = (\alpha_1, y_2, y_3, \ldots,y_d)}
#'
#' DRR is invertible by
#'
#' \deqn{\alpha_i = y_i + f_i(\alpha_1,\alpha_2, \ldots, alpha_{i-1})}
#'
#' If less dimensions are estimated, there will be less inverse
#' functions and calculating the inverse will be inaccurate.
#'
#'
#' @references
#' Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
#' Reduction via Regression in Hyperspectral Imagery. IEEE Journal
#' of Selected Topics in Signal Processing 9,
#' 1026-1036. doi:10.1109/JSTSP.2015.2417833
#'
#' @param X input data, a matrix.
#' @param ndim the number of output dimensions and regression
#' functions to be estimated, see details for inversion.
#' @param lambda the penalty term for the Kernel Ridge Regression.
#' @param kernel a kernel function or string, see
#' \code{\link[kernlab]{kernel-class}} for details.
#' @param kernel.pars a list with parameters for the kernel. each
#' parameter can be a vector, crossvalidation will choose the best
#' combination.
#' @param pca logical, do a preprocessing using pca.
#' @param pca.center logical, center data before applying pca.
#' @param pca.scale logical, scale data before applying pca.
#' @param fastcv if \code{TRUE} uses \code{\link[CVST]{fastCV}}, if
#' \code{FALSE} uses \code{\link[CVST]{CV}} for crossvalidation.
#' @param cv.folds if using normal crossvalidation, the number of
#' folds to be used.
#' @param fastcv.test an optional separate test data set to be used
#' for \code{\link[CVST]{fastCV}}, handed over as option
#' \code{test} to \code{\link[CVST]{fastCV}}.
#' @param fastkrr.nblocks the number of blocks used for fast KRR,
#' higher numbers are faster to compute but may introduce
#' numerical inaccurracies, see
#' \code{\link{constructFastKRRLearner}} for details.
#' @param verbose logical, should the crossvalidation report back.
#'
#' @return A list the following items:
#' \itemize{
#' \item {"fitted.data"} The data in reduced dimensions.
#' \item {"pca.means"} The means used to center the original data.
#' \item {"pca.scale"} The standard deviations used to scale the original data.
#' \item {"pca.rotation"} The rotation matrix of the PCA.
#' \item {"models"} A list of models used to estimate each dimension.
#' \item {"apply"} A function to fit new data to the estimated model.
#' \item {"inverse"} A function to untransform data.
#' }
#'
#' @examples
#' tt <- seq(0,4*pi, length.out = 200)
#' helix <- cbind(
#' x = 3 * cos(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
#' y = 3 * sin(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
#' z = 2 * tt + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt)))
#' )
#' system.time(
#' drr.fit <- drr(helix, ndim = 3, cv.folds = 4,
#' lambda = 10^(-2:1),
#' kernel.pars = list(sigma = 10^(0:3)),
#' fastkrr.nblocks = 2, verbose = TRUE,
#' fastcv = FALSE)
#' )
#'
#' \dontrun{
#' library(rgl)
#' plot3d(helix)
#' points3d(drr.fit$inverse(drr.fit$fitted.data[,1,drop = FALSE]), col = 'blue')
#' points3d(drr.fit$inverse(drr.fit$fitted.data[,1:2]), col = 'red')
#'
#' plot3d(drr.fit$fitted.data)
#' pad <- -3
#' fd <- drr.fit$fitted.data
#' xx <- seq(min(fd[,1]) , max(fd[,1]) , length.out = 25)
#' yy <- seq(min(fd[,2]) - pad, max(fd[,2]) + pad, length.out = 5)
#' zz <- seq(min(fd[,3]) - pad, max(fd[,3]) + pad, length.out = 5)
#'
#' dd <- as.matrix(expand.grid(xx, yy, zz))
#' plot3d(helix)
#' for(y in yy) for(x in xx)
#' rgl.linestrips(drr.fit$inverse(cbind(x, y, zz)), col = 'blue')
#' for(y in yy) for(z in zz)
#' rgl.linestrips(drr.fit$inverse(cbind(xx, y, z)), col = 'blue')
#' for(x in xx) for(z in zz)
#' rgl.linestrips(drr.fit$inverse(cbind(x, yy, z)), col = 'blue')
#' }
#'
#' @import Matrix
#' @import kernlab
#' @import CVST
#' @export
drr <- function (X, ndim = ncol(X),
lambda = c(0, 10^(-3:2)),
kernel = 'rbfdot',
kernel.pars = list(sigma = 10^(-3:4)),
pca = TRUE,
pca.center = TRUE,
pca.scale = FALSE,
fastcv = FALSE,
cv.folds = 5,
fastcv.test = NULL,
fastkrr.nblocks = 4,
verbose = TRUE) {
if((!fastcv) && (cv.folds <= 1)) stop("need more than one fold for crossvalidation")
if(cv.folds %% 1 != 0) stop("cv.folds must be a whole number")
if(fastkrr.nblocks < 1) stop("fastkrr.nblocks must be at least 1")
if(fastkrr.nblocks %% 1 != 0) stop('fastkrr.nblocks must be a whole number')
if(!requireNamespace("CVST")) stop("require the 'CVST' package")
if(!requireNamespace("kernlab")) stop("require 'kernlab' package")
if(ndim < ncol(X)) warning('ndim < data dimensionality, the inverse functions will be incomplete!')
if(ndim > ncol(X)) ndim <- ncol(X)
if (pca) {
pca <- stats::prcomp(X, center = pca.center, scale. = pca.scale)
if (!pca.center) pca$center <- rep(0, ncol(X))
if (!pca.scale) pca$scale <- rep(1, ncol(X))
} else {
pca <- list()
pca$x <- X
pca$rotation <- diag(1, ncol(X), ncol(X))
pca$center <- rep(0, ncol(X))
pca$scale <- rep(1, ncol(X))
}
alpha <- pca$x
d <- ndim
kpars <- kernel.pars
kpars$kernel <- kernel
kpars$lambda <- lambda
kpars$nblocks <- fastkrr.nblocks
krrl <- constructFastKRRLearner()
p <- do.call(CVST::constructParams, kpars)
Y <- matrix(NA_real_, nrow = nrow(X), ncol = d)
models <- list()
if (d > 1) for (i in d:2) {
message(Sys.time(), ": Constructing Axis ", d-i+1, "/", d)
data <- CVST::constructData(
x = alpha[,1:(i-1), drop = FALSE],
y = alpha[,i]
)
cat("predictors: ", colnames(alpha)[1:(i-1)], "dependent: ", colnames(alpha)[i], '\n')
res <- if (fastcv) {
CVST::fastCV(
data, krrl, p,
CVST::constructCVSTModel(),
test = fastcv.test,
verbose = verbose
)
} else {
CVST::CV(
data, krrl, p,
fold = cv.folds,
verbose = verbose
)
}
model <- krrl$learn(data, res[[1]])
models[[i]] <- model
Y[,i] <- as.matrix(alpha[,i] - krrl$predict(model, data))
}
## we don't need to construct the very last dimension
message(Sys.time(), ": Constructing Axis ", d, "/", d)
Y[,1] <- alpha[,1]
models[[1]] <- list()
appl <- function(x) {
## apply PCA
dat <- scale(x, pca$center, pca$scale)
dat <- dat %*% pca$rotation
## apply KRR
outdat <- matrix(NA_real_, ncol = d, nrow = nrow(x))
if(d > 1) for (i in d:2)
outdat[,i] <-
dat[,i] - krrl$predict(
models[[i]],
CVST::constructData(x = dat[,1:(i-1), drop = FALSE],
y = NA)
)
outdat[,1] <- dat[,1]
return(outdat)
}
inv <- function(x){
dat <- cbind(x, matrix(0, nrow(x), ncol(X)-ncol(x)))
outdat <- dat #matrix(NA_real_, nrow(x), ncol(X))
## krr
#outdat[,1] <- dat[,1]
if(d > 1) for (i in 2:d)
outdat[,i] <- dat[,i] + krrl$predict(
models[[i]],
CVST::constructData(x = outdat[,1:(i-1), drop = FALSE],
y = NA)
)
## inverse pca
outdat <- outdat %*% t(pca$rotation)
outdat <- sweep(outdat, 2L, pca$scale, "*")
outdat <- sweep(outdat, 2L, pca$center, "+")
return(outdat)
}
return(list(
fitted.data = Y,
pca.means = pca$center,
pca.scale = pca$scale,
pca.rotation = pca$rotation,
models = models,
apply = appl,
inverse = inv
))
}
DRR/vignettes/ 0000755 0001751 0000144 00000000000 12766467704 012752 5 ustar hornik users DRR/vignettes/comparePCA.Rmd 0000644 0001751 0000144 00000005116 12766461546 015371 0 ustar hornik users ---
title: "Comparing DRR and PCA"
author: "Guido Kraemer"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Compare DRR and PCA}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
This is an example application to compare the accuracy and
computational speed of DRR for different parameters to PCA.
## Load libraries
```{r, echo = TRUE, message = FALSE}
library(DRR)
```
## Read in data
```{r, echo = TRUE, warning = FALSE, error = FALSE}
data(iris)
in_data <- iris[,1:4]
npoints <- nrow(in_data)
nvars <- ncol(in_data)
for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
my_data <- scale(in_data[sample(npoints),], scale = FALSE)
```
## Fit the dimensionality reductions.
```{r, echo = TRUE, results = "hide", warning = FALSE, error = FALSE, message = FALSE}
t0 <- system.time(pca <- prcomp(my_data, center = FALSE, scale. = FALSE))
t1 <- system.time(drr.1 <- drr(my_data))
t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2))
t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5))
t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE))
```
## Plot the data
```{r, echo = FALSE, results = "hold"}
pairs(my_data , gap = 0, main = "iris")
pairs(pca$x , gap = 0, main = "pca")
pairs(drr.1$fitted.data, gap = 0, main = "drr.1")
pairs(drr.2$fitted.data, gap = 0, main = "drr.2")
pairs(drr.3$fitted.data, gap = 0, main = "drr.3")
pairs(drr.4$fitted.data, gap = 0, main = "drr.4")
```
## Calculate RMSE
```{r, echo = TRUE, tidy = TRUE}
rmse <- matrix(NA_real_, nrow = 5, ncol = nvars,
dimnames = list(c("pca", "drr.1", "drr.2", "drr.3", "drr.4"),
seq_len(nvars)))
for(i in seq_len(nvars)){
pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
rmse["pca", i] <- sqrt( sum( (my_data - pca_inv )^2) )
rmse["drr.1",i] <- sqrt( sum( (my_data - drr.1$inverse(drr.1$fitted.data[, 1:i, drop = FALSE]) )^2) )
rmse["drr.2",i] <- sqrt( sum( (my_data - drr.2$inverse(drr.2$fitted.data[, 1:i, drop = FALSE]) )^2) )
rmse["drr.3",i] <- sqrt( sum( (my_data - drr.3$inverse(drr.3$fitted.data[, 1:i, drop = FALSE]) )^2) )
rmse["drr.4",i] <- sqrt( sum( (my_data - drr.4$inverse(drr.4$fitted.data[, 1:i, drop = FALSE]) )^2) )
}
```
## The Results
More blocks for fastkrr speed up calculation, too are bad for
accuracy.
### RMSE
```{r, echo = FALSE}
print(rmse)
```
### Processing time
```{r, echo = FALSE}
print(rbind(pca = t0, drr.1 = t1, drr.2 = t2, drr.3 = t3, drr.4 = t4)[,1:3])
```
DRR/README.md 0000644 0001751 0000144 00000000500 12766447356 012215 0 ustar hornik users # DRR
Dimensionality Reduction via Regression
An implementation of Dimensionality Reduction
via Regression using Kernel Ridge Regression.
## Installing:
```R
## install.packages("devtools")
devtools::install_github("gdkrmr/DRR")
```
Install from CRAN:
```R
install.packages("DRR")
```
Load it:
```R
library(DRR)
```
DRR/MD5 0000644 0001751 0000144 00000001320 12766523063 011235 0 ustar hornik users 4e03d25f1635cbcc23e5326330fdaa24 *DESCRIPTION
bab4ad6c5e310c1e95b701ae12c97e71 *NAMESPACE
1ab6357662737c5bffc2d64479f18e6b *R/DRR-package.R
d841bef331530ab81836262ebea7970b *R/DRR.R
35a4d665232367ed6a64040f4d29756e *R/fastKRR.R
6cdb32c1ae5ab787e1ffd9a85d0b23e8 *README.md
7ca7ab51dbac1c5e41c58202c8dc7d9b *build/vignette.rds
e72a819e472d18f7fe5e9f74831912c6 *inst/doc/comparePCA.R
3994f2898362cd62287d6ec1161d534b *inst/doc/comparePCA.Rmd
b698ae948c254d9f7c4ef4b8901dee8a *inst/doc/comparePCA.html
08cf8625d95ab5e61ceadf890ed9ecca *man/DRR-package.Rd
45275666649ffc029ec40838c775b04a *man/constructFastKRRLearner.Rd
d12026ad13f5f628325934d8c7360937 *man/drr.Rd
3994f2898362cd62287d6ec1161d534b *vignettes/comparePCA.Rmd
DRR/build/ 0000755 0001751 0000144 00000000000 12766467704 012041 5 ustar hornik users DRR/build/vignette.rds 0000644 0001751 0000144 00000000323 12766467704 014376 0 ustar hornik users b```b`ffd`b2 1#'K-H,J
pvMAv*)$( )G2 $7M|sr5@5/17 vԂԼ?iN,/AQU▙
7$apq2݀a>9`~~MI,F(WJbI^ZP? ' DRR/DESCRIPTION 0000644 0001751 0000144 00000001214 12766523063 012435 0 ustar hornik users Package: DRR
Title: Dimensionality Reduction via Regression
Version: 0.0.2
Authors@R: person("Guido", "Kraemer",
email = "gkraemer@bgc-jena.mpg.de",
role = c("aut","cre"))
Description: An Implementation of Dimensionality Reduction
via Regression using Kernel Ridge Regression.
License: GPL-3
Imports: stats, methods
Suggests: knitr
VignetteBuilder: knitr
LazyData: true
Depends: kernlab, CVST, Matrix
RoxygenNote: 5.0.1
NeedsCompilation: no
Packaged: 2016-09-15 09:54:12 UTC; gkraemer
Author: Guido Kraemer [aut, cre]
Maintainer: Guido Kraemer
Repository: CRAN
Date/Publication: 2016-09-15 15:46:27
DRR/man/ 0000755 0001751 0000144 00000000000 12764526555 011513 5 ustar hornik users DRR/man/constructFastKRRLearner.Rd 0000644 0001751 0000144 00000004177 12766466213 016541 0 ustar hornik users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fastKRR.R
\name{constructFastKRRLearner}
\alias{constructFastKRRLearner}
\title{Fast implementation for Kernel Ridge Regression.}
\usage{
constructFastKRRLearner()
}
\value{
Returns a learner similar to \code{\link[CVST]{constructKRRLearner}}
suitable for the use with \code{\link[CVST]{CV}} and
\code{\link[CVST]{fastCV}}.
}
\description{
Constructs a learner for the divide and conquer version of KRR.
}
\details{
This function is to be used with the CVST package as a drop in
replacement for \code{\link[CVST]{constructKRRLearner}}. The
implementation approximates the inversion of the kernel Matrix
using the divide an conquer scheme, lowering computational and
memory complexity from \eqn{O(n^3)} and \eqn{O(n^2)} to
\eqn{O(n^3/m^2)} and \eqn{O(n^2/m^2)} respectively, where m are the
number of blocks to be used (parameter nblocks). Theoretically safe
values for \eqn{m} are \eqn{< n^{1/3}}, but practically \eqn{m} may
be a little bit larger. The function will issue a warning, if the
value for \eqn{m} is too large.
}
\examples{
ns <- noisySinc(1000)
nsTest <- noisySinc(1000)
fast.krr <- constructFastKRRLearner()
fast.p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns), nblocks = 4)
system.time(fast.m <- fast.krr$learn(ns, fast.p))
fast.pred <- fast.krr$predict(fast.m, nsTest)
sum((fast.pred - nsTest$y)^2) / getN(nsTest)
\dontrun{
krr <- CVST::constructKRRLearner()
p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns))
system.time(m <- krr$learn(ns, p))
pred <- krr$predict(m, nsTest)
sum((pred - nsTest$y)^2) / getN(nsTest)
plot(ns, col = '#00000030', pch = 19)
lines(sort(nsTest$x), fast.pred[order(nsTest$x)], col = '#00C000', lty = 2)
lines(sort(nsTest$x), pred[order(nsTest$x)], col = '#0000C0', lty = 2)
legend('topleft', legend = c('fast KRR', 'KRR'),
col = c('#00C000', '#0000C0'), lty = 2)
}
}
\references{
Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
Kernel Ridge Regression: A Distributed Algorithm with Minimax
Optimal Rates. arXiv:1305.5029 [cs, math, stat].
}
\seealso{
\code{\link[CVST]{constructLearner}}
}
DRR/man/drr.Rd 0000644 0001751 0000144 00000010670 12766466213 012571 0 ustar hornik users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DRR.R
\name{drr}
\alias{drr}
\title{Dimensionality Reduction via Regression}
\usage{
drr(X, ndim = ncol(X), lambda = c(0, 10^(-3:2)), kernel = "rbfdot",
kernel.pars = list(sigma = 10^(-3:4)), pca = TRUE, pca.center = TRUE,
pca.scale = FALSE, fastcv = FALSE, cv.folds = 5, fastcv.test = NULL,
fastkrr.nblocks = 4, verbose = TRUE)
}
\arguments{
\item{X}{input data, a matrix.}
\item{ndim}{the number of output dimensions and regression
functions to be estimated, see details for inversion.}
\item{lambda}{the penalty term for the Kernel Ridge Regression.}
\item{kernel}{a kernel function or string, see
\code{\link[kernlab]{kernel-class}} for details.}
\item{kernel.pars}{a list with parameters for the kernel. each
parameter can be a vector, crossvalidation will choose the best
combination.}
\item{pca}{logical, do a preprocessing using pca.}
\item{pca.center}{logical, center data before applying pca.}
\item{pca.scale}{logical, scale data before applying pca.}
\item{fastcv}{if \code{TRUE} uses \code{\link[CVST]{fastCV}}, if
\code{FALSE} uses \code{\link[CVST]{CV}} for crossvalidation.}
\item{cv.folds}{if using normal crossvalidation, the number of
folds to be used.}
\item{fastcv.test}{an optional separate test data set to be used
for \code{\link[CVST]{fastCV}}, handed over as option
\code{test} to \code{\link[CVST]{fastCV}}.}
\item{fastkrr.nblocks}{the number of blocks used for fast KRR,
higher numbers are faster to compute but may introduce
numerical inaccurracies, see
\code{\link{constructFastKRRLearner}} for details.}
\item{verbose}{logical, should the crossvalidation report back.}
}
\value{
A list the following items:
\itemize{
\item {"fitted.data"} The data in reduced dimensions.
\item {"pca.means"} The means used to center the original data.
\item {"pca.scale"} The standard deviations used to scale the original data.
\item {"pca.rotation"} The rotation matrix of the PCA.
\item {"models"} A list of models used to estimate each dimension.
\item {"apply"} A function to fit new data to the estimated model.
\item {"inverse"} A function to untransform data.
}
}
\description{
\code{drr} Implements Dimensionality Reduction via Regression using
Kernel Ridge Regression.
}
\details{
Parameter combination will be formed and cross-validation used to
select the best combination. Cross-validation uses
\code{\link[CVST]{CV}} or \code{\link[CVST]{fastCV}}.
Pre-treatment of the data using a PCA and scaling is made
\eqn{\alpha = Vx}. the representation in reduced dimensions is
\deqn{y_i = \alpha - f_i(\alpha_1, \ldots, \alpha_{i-1})}
then the final DRR representation is:
\deqn{r = (\alpha_1, y_2, y_3, \ldots,y_d)}
DRR is invertible by
\deqn{\alpha_i = y_i + f_i(\alpha_1,\alpha_2, \ldots, alpha_{i-1})}
If less dimensions are estimated, there will be less inverse
functions and calculating the inverse will be inaccurate.
}
\examples{
tt <- seq(0,4*pi, length.out = 200)
helix <- cbind(
x = 3 * cos(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
y = 3 * sin(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
z = 2 * tt + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt)))
)
system.time(
drr.fit <- drr(helix, ndim = 3, cv.folds = 4,
lambda = 10^(-2:1),
kernel.pars = list(sigma = 10^(0:3)),
fastkrr.nblocks = 2, verbose = TRUE,
fastcv = FALSE)
)
\dontrun{
library(rgl)
plot3d(helix)
points3d(drr.fit$inverse(drr.fit$fitted.data[,1,drop = FALSE]), col = 'blue')
points3d(drr.fit$inverse(drr.fit$fitted.data[,1:2]), col = 'red')
plot3d(drr.fit$fitted.data)
pad <- -3
fd <- drr.fit$fitted.data
xx <- seq(min(fd[,1]) , max(fd[,1]) , length.out = 25)
yy <- seq(min(fd[,2]) - pad, max(fd[,2]) + pad, length.out = 5)
zz <- seq(min(fd[,3]) - pad, max(fd[,3]) + pad, length.out = 5)
dd <- as.matrix(expand.grid(xx, yy, zz))
plot3d(helix)
for(y in yy) for(x in xx)
rgl.linestrips(drr.fit$inverse(cbind(x, y, zz)), col = 'blue')
for(y in yy) for(z in zz)
rgl.linestrips(drr.fit$inverse(cbind(xx, y, z)), col = 'blue')
for(x in xx) for(z in zz)
rgl.linestrips(drr.fit$inverse(cbind(x, yy, z)), col = 'blue')
}
}
\references{
Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
Reduction via Regression in Hyperspectral Imagery. IEEE Journal
of Selected Topics in Signal Processing 9,
1026-1036. doi:10.1109/JSTSP.2015.2417833
}
DRR/man/DRR-package.Rd 0000644 0001751 0000144 00000001650 12766466213 014020 0 ustar hornik users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DRR-package.R
\docType{package}
\name{DRR-package}
\alias{DRR}
\alias{DRR-package}
\title{Dimensionality Reduction via Regression.}
\description{
DRR implements the Dimensionality Reduction via Regression using
Kernel Ridge Regression. It also adds a faster implementation of
Kernel Ridge regression that can be used with the CVST package.
}
\details{
Thanks to the Max Planck Institute for Biogeochemistry in Jena for
the funding.
}
\references{
Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
Reduction via Regression in Hyperspectral Imagery. IEEE Journal
of Selected Topics in Signal Processing 9,
1026-1036. doi:10.1109/JSTSP.2015.2417833
Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
Kernel Ridge Regression: A Distributed Algorithm with Minimax
Optimal Rates. arXiv:1305.5029 [cs, math, stat].
}