[r-cran-drr] 01/02: New upstream version 0.0.2
Andreas Tille
tille at debian.org
Sun Oct 22 21:26:22 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-drr.
commit 5f50baaa4c26b069b5d1c74a928b122e526d2db1
Author: Andreas Tille <tille at debian.org>
Date: Sun Oct 22 23:25:49 2017 +0200
New upstream version 0.0.2
---
DESCRIPTION | 21 ++++
MD5 | 14 +++
NAMESPACE | 7 ++
R/DRR-package.R | 21 ++++
R/DRR.R | 248 +++++++++++++++++++++++++++++++++++++++++
R/fastKRR.R | 161 ++++++++++++++++++++++++++
README.md | 21 ++++
build/vignette.rds | Bin 0 -> 211 bytes
inst/doc/comparePCA.R | 48 ++++++++
inst/doc/comparePCA.Rmd | 84 ++++++++++++++
inst/doc/comparePCA.html | 159 ++++++++++++++++++++++++++
man/DRR-package.Rd | 26 +++++
man/constructFastKRRLearner.Rd | 62 +++++++++++
man/drr.Rd | 132 ++++++++++++++++++++++
vignettes/comparePCA.Rmd | 84 ++++++++++++++
15 files changed, 1088 insertions(+)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..5c8679c
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,21 @@
+Package: DRR
+Title: Dimensionality Reduction via Regression
+Version: 0.0.2
+Authors at R: person("Guido", "Kraemer",
+ email = "gkraemer at bgc-jena.mpg.de",
+ role = c("aut","cre"))
+Description: An Implementation of Dimensionality Reduction
+ via Regression using Kernel Ridge Regression.
+License: GPL-3
+Imports: stats, methods
+Suggests: knitr
+VignetteBuilder: knitr
+LazyData: true
+Depends: kernlab, CVST, Matrix
+RoxygenNote: 5.0.1
+NeedsCompilation: no
+Packaged: 2016-09-15 09:54:12 UTC; gkraemer
+Author: Guido Kraemer [aut, cre]
+Maintainer: Guido Kraemer <gkraemer at bgc-jena.mpg.de>
+Repository: CRAN
+Date/Publication: 2016-09-15 15:46:27
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..eff6112
--- /dev/null
+++ b/MD5
@@ -0,0 +1,14 @@
+4e03d25f1635cbcc23e5326330fdaa24 *DESCRIPTION
+bab4ad6c5e310c1e95b701ae12c97e71 *NAMESPACE
+1ab6357662737c5bffc2d64479f18e6b *R/DRR-package.R
+d841bef331530ab81836262ebea7970b *R/DRR.R
+35a4d665232367ed6a64040f4d29756e *R/fastKRR.R
+6cdb32c1ae5ab787e1ffd9a85d0b23e8 *README.md
+7ca7ab51dbac1c5e41c58202c8dc7d9b *build/vignette.rds
+e72a819e472d18f7fe5e9f74831912c6 *inst/doc/comparePCA.R
+3994f2898362cd62287d6ec1161d534b *inst/doc/comparePCA.Rmd
+b698ae948c254d9f7c4ef4b8901dee8a *inst/doc/comparePCA.html
+08cf8625d95ab5e61ceadf890ed9ecca *man/DRR-package.Rd
+45275666649ffc029ec40838c775b04a *man/constructFastKRRLearner.Rd
+d12026ad13f5f628325934d8c7360937 *man/drr.Rd
+3994f2898362cd62287d6ec1161d534b *vignettes/comparePCA.Rmd
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..836f87d
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,7 @@
+# Generated by roxygen2: do not edit by hand
+
+export(constructFastKRRLearner)
+export(drr)
+import(CVST)
+import(Matrix)
+import(kernlab)
diff --git a/R/DRR-package.R b/R/DRR-package.R
new file mode 100644
index 0000000..e96ca18
--- /dev/null
+++ b/R/DRR-package.R
@@ -0,0 +1,21 @@
+#' Dimensionality Reduction via Regression.
+#'
+#' DRR implements the Dimensionality Reduction via Regression using
+#' Kernel Ridge Regression. It also adds a faster implementation of
+#' Kernel Ridge regression that can be used with the CVST package.
+#'
+#' Thanks to the Max Planck Institute for Biogeochemistry in Jena for
+#' the funding.
+#'
+#' @references
+#' Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
+#' Reduction via Regression in Hyperspectral Imagery. IEEE Journal
+#' of Selected Topics in Signal Processing 9,
+#' 1026-1036. doi:10.1109/JSTSP.2015.2417833
+#' Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
+#' Kernel Ridge Regression: A Distributed Algorithm with Minimax
+#' Optimal Rates. arXiv:1305.5029 [cs, math, stat].
+#'
+#' @docType package
+"_PACKAGE"
+
diff --git a/R/DRR.R b/R/DRR.R
new file mode 100644
index 0000000..2ed3450
--- /dev/null
+++ b/R/DRR.R
@@ -0,0 +1,248 @@
+#' Dimensionality Reduction via Regression
+#'
+#'
+#' \code{drr} Implements Dimensionality Reduction via Regression using
+#' Kernel Ridge Regression.
+#'
+#'
+#' Parameter combination will be formed and cross-validation used to
+#' select the best combination. Cross-validation uses
+#' \code{\link[CVST]{CV}} or \code{\link[CVST]{fastCV}}.
+#'
+#' Pre-treatment of the data using a PCA and scaling is made
+#' \eqn{\alpha = Vx}. the representation in reduced dimensions is
+#'
+#' \deqn{y_i = \alpha - f_i(\alpha_1, \ldots, \alpha_{i-1})}
+#'
+#' then the final DRR representation is:
+#'
+#' \deqn{r = (\alpha_1, y_2, y_3, \ldots,y_d)}
+#'
+#' DRR is invertible by
+#'
+#' \deqn{\alpha_i = y_i + f_i(\alpha_1,\alpha_2, \ldots, alpha_{i-1})}
+#'
+#' If less dimensions are estimated, there will be less inverse
+#' functions and calculating the inverse will be inaccurate.
+#'
+#'
+#' @references
+#' Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
+#' Reduction via Regression in Hyperspectral Imagery. IEEE Journal
+#' of Selected Topics in Signal Processing 9,
+#' 1026-1036. doi:10.1109/JSTSP.2015.2417833
+#'
+#' @param X input data, a matrix.
+#' @param ndim the number of output dimensions and regression
+#' functions to be estimated, see details for inversion.
+#' @param lambda the penalty term for the Kernel Ridge Regression.
+#' @param kernel a kernel function or string, see
+#' \code{\link[kernlab]{kernel-class}} for details.
+#' @param kernel.pars a list with parameters for the kernel. each
+#' parameter can be a vector, crossvalidation will choose the best
+#' combination.
+#' @param pca logical, do a preprocessing using pca.
+#' @param pca.center logical, center data before applying pca.
+#' @param pca.scale logical, scale data before applying pca.
+#' @param fastcv if \code{TRUE} uses \code{\link[CVST]{fastCV}}, if
+#' \code{FALSE} uses \code{\link[CVST]{CV}} for crossvalidation.
+#' @param cv.folds if using normal crossvalidation, the number of
+#' folds to be used.
+#' @param fastcv.test an optional separate test data set to be used
+#' for \code{\link[CVST]{fastCV}}, handed over as option
+#' \code{test} to \code{\link[CVST]{fastCV}}.
+#' @param fastkrr.nblocks the number of blocks used for fast KRR,
+#' higher numbers are faster to compute but may introduce
+#' numerical inaccurracies, see
+#' \code{\link{constructFastKRRLearner}} for details.
+#' @param verbose logical, should the crossvalidation report back.
+#'
+#' @return A list the following items:
+#' \itemize{
+#' \item {"fitted.data"} The data in reduced dimensions.
+#' \item {"pca.means"} The means used to center the original data.
+#' \item {"pca.scale"} The standard deviations used to scale the original data.
+#' \item {"pca.rotation"} The rotation matrix of the PCA.
+#' \item {"models"} A list of models used to estimate each dimension.
+#' \item {"apply"} A function to fit new data to the estimated model.
+#' \item {"inverse"} A function to untransform data.
+#' }
+#'
+#' @examples
+#' tt <- seq(0,4*pi, length.out = 200)
+#' helix <- cbind(
+#' x = 3 * cos(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
+#' y = 3 * sin(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
+#' z = 2 * tt + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt)))
+#' )
+#' system.time(
+#' drr.fit <- drr(helix, ndim = 3, cv.folds = 4,
+#' lambda = 10^(-2:1),
+#' kernel.pars = list(sigma = 10^(0:3)),
+#' fastkrr.nblocks = 2, verbose = TRUE,
+#' fastcv = FALSE)
+#' )
+#'
+#' \dontrun{
+#' library(rgl)
+#' plot3d(helix)
+#' points3d(drr.fit$inverse(drr.fit$fitted.data[,1,drop = FALSE]), col = 'blue')
+#' points3d(drr.fit$inverse(drr.fit$fitted.data[,1:2]), col = 'red')
+#'
+#' plot3d(drr.fit$fitted.data)
+#' pad <- -3
+#' fd <- drr.fit$fitted.data
+#' xx <- seq(min(fd[,1]) , max(fd[,1]) , length.out = 25)
+#' yy <- seq(min(fd[,2]) - pad, max(fd[,2]) + pad, length.out = 5)
+#' zz <- seq(min(fd[,3]) - pad, max(fd[,3]) + pad, length.out = 5)
+#'
+#' dd <- as.matrix(expand.grid(xx, yy, zz))
+#' plot3d(helix)
+#' for(y in yy) for(x in xx)
+#' rgl.linestrips(drr.fit$inverse(cbind(x, y, zz)), col = 'blue')
+#' for(y in yy) for(z in zz)
+#' rgl.linestrips(drr.fit$inverse(cbind(xx, y, z)), col = 'blue')
+#' for(x in xx) for(z in zz)
+#' rgl.linestrips(drr.fit$inverse(cbind(x, yy, z)), col = 'blue')
+#' }
+#'
+#' @import Matrix
+#' @import kernlab
+#' @import CVST
+#' @export
+drr <- function (X, ndim = ncol(X),
+ lambda = c(0, 10^(-3:2)),
+ kernel = 'rbfdot',
+ kernel.pars = list(sigma = 10^(-3:4)),
+ pca = TRUE,
+ pca.center = TRUE,
+ pca.scale = FALSE,
+ fastcv = FALSE,
+ cv.folds = 5,
+ fastcv.test = NULL,
+ fastkrr.nblocks = 4,
+ verbose = TRUE) {
+ if((!fastcv) && (cv.folds <= 1)) stop("need more than one fold for crossvalidation")
+ if(cv.folds %% 1 != 0) stop("cv.folds must be a whole number")
+ if(fastkrr.nblocks < 1) stop("fastkrr.nblocks must be at least 1")
+ if(fastkrr.nblocks %% 1 != 0) stop('fastkrr.nblocks must be a whole number')
+ if(!requireNamespace("CVST")) stop("require the 'CVST' package")
+ if(!requireNamespace("kernlab")) stop("require 'kernlab' package")
+ if(ndim < ncol(X)) warning('ndim < data dimensionality, the inverse functions will be incomplete!')
+ if(ndim > ncol(X)) ndim <- ncol(X)
+
+ if (pca) {
+ pca <- stats::prcomp(X, center = pca.center, scale. = pca.scale)
+ if (!pca.center) pca$center <- rep(0, ncol(X))
+ if (!pca.scale) pca$scale <- rep(1, ncol(X))
+ } else {
+ pca <- list()
+ pca$x <- X
+ pca$rotation <- diag(1, ncol(X), ncol(X))
+ pca$center <- rep(0, ncol(X))
+ pca$scale <- rep(1, ncol(X))
+ }
+
+
+ alpha <- pca$x
+ d <- ndim
+
+ kpars <- kernel.pars
+ kpars$kernel <- kernel
+ kpars$lambda <- lambda
+ kpars$nblocks <- fastkrr.nblocks
+
+ krrl <- constructFastKRRLearner()
+
+ p <- do.call(CVST::constructParams, kpars)
+
+ Y <- matrix(NA_real_, nrow = nrow(X), ncol = d)
+ models <- list()
+ if (d > 1) for (i in d:2) {
+ message(Sys.time(), ": Constructing Axis ", d-i+1, "/", d)
+ data <- CVST::constructData(
+ x = alpha[,1:(i-1), drop = FALSE],
+ y = alpha[,i]
+ )
+
+ cat("predictors: ", colnames(alpha)[1:(i-1)], "dependent: ", colnames(alpha)[i], '\n')
+
+
+ res <- if (fastcv) {
+ CVST::fastCV(
+ data, krrl, p,
+ CVST::constructCVSTModel(),
+ test = fastcv.test,
+ verbose = verbose
+ )
+ } else {
+ CVST::CV(
+ data, krrl, p,
+ fold = cv.folds,
+ verbose = verbose
+ )
+ }
+ model <- krrl$learn(data, res[[1]])
+
+ models[[i]] <- model
+ Y[,i] <- as.matrix(alpha[,i] - krrl$predict(model, data))
+ }
+ ## we don't need to construct the very last dimension
+ message(Sys.time(), ": Constructing Axis ", d, "/", d)
+ Y[,1] <- alpha[,1]
+ models[[1]] <- list()
+
+
+ appl <- function(x) {
+ ## apply PCA
+ dat <- scale(x, pca$center, pca$scale)
+ dat <- dat %*% pca$rotation
+
+ ## apply KRR
+ outdat <- matrix(NA_real_, ncol = d, nrow = nrow(x))
+ if(d > 1) for (i in d:2)
+ outdat[,i] <-
+ dat[,i] - krrl$predict(
+ models[[i]],
+ CVST::constructData(x = dat[,1:(i-1), drop = FALSE],
+ y = NA)
+ )
+
+ outdat[,1] <- dat[,1]
+
+ return(outdat)
+ }
+
+ inv <- function(x){
+ dat <- cbind(x, matrix(0, nrow(x), ncol(X)-ncol(x)))
+
+ outdat <- dat #matrix(NA_real_, nrow(x), ncol(X))
+
+ ## krr
+ #outdat[,1] <- dat[,1]
+ if(d > 1) for (i in 2:d)
+ outdat[,i] <- dat[,i] + krrl$predict(
+ models[[i]],
+ CVST::constructData(x = outdat[,1:(i-1), drop = FALSE],
+ y = NA)
+ )
+
+ ## inverse pca
+ outdat <- outdat %*% t(pca$rotation)
+ outdat <- sweep(outdat, 2L, pca$scale, "*")
+ outdat <- sweep(outdat, 2L, pca$center, "+")
+
+ return(outdat)
+ }
+
+ return(list(
+ fitted.data = Y,
+ pca.means = pca$center,
+ pca.scale = pca$scale,
+ pca.rotation = pca$rotation,
+ models = models,
+ apply = appl,
+ inverse = inv
+ ))
+}
+
diff --git a/R/fastKRR.R b/R/fastKRR.R
new file mode 100644
index 0000000..0928278
--- /dev/null
+++ b/R/fastKRR.R
@@ -0,0 +1,161 @@
+#' Fast implementation for Kernel Ridge Regression.
+#'
+#' Constructs a learner for the divide and conquer version of KRR.
+#'
+#' This function is to be used with the CVST package as a drop in
+#' replacement for \code{\link[CVST]{constructKRRLearner}}. The
+#' implementation approximates the inversion of the kernel Matrix
+#' using the divide an conquer scheme, lowering computational and
+#' memory complexity from \eqn{O(n^3)} and \eqn{O(n^2)} to
+#' \eqn{O(n^3/m^2)} and \eqn{O(n^2/m^2)} respectively, where m are the
+#' number of blocks to be used (parameter nblocks). Theoretically safe
+#' values for \eqn{m} are \eqn{< n^{1/3}}, but practically \eqn{m} may
+#' be a little bit larger. The function will issue a warning, if the
+#' value for \eqn{m} is too large.
+#'
+#'
+#'
+#' @return Returns a learner similar to \code{\link[CVST]{constructKRRLearner}}
+#' suitable for the use with \code{\link[CVST]{CV}} and
+#' \code{\link[CVST]{fastCV}}.
+#'
+#' @seealso \code{\link[CVST]{constructLearner}}
+#'
+#' @references
+#' Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
+#' Kernel Ridge Regression: A Distributed Algorithm with Minimax
+#' Optimal Rates. arXiv:1305.5029 [cs, math, stat].
+#'
+#' @examples
+#' ns <- noisySinc(1000)
+#' nsTest <- noisySinc(1000)
+#'
+#' fast.krr <- constructFastKRRLearner()
+#' fast.p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns), nblocks = 4)
+#' system.time(fast.m <- fast.krr$learn(ns, fast.p))
+#' fast.pred <- fast.krr$predict(fast.m, nsTest)
+#' sum((fast.pred - nsTest$y)^2) / getN(nsTest)
+#'
+#' \dontrun{
+#' krr <- CVST::constructKRRLearner()
+#' p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns))
+#' system.time(m <- krr$learn(ns, p))
+#' pred <- krr$predict(m, nsTest)
+#' sum((pred - nsTest$y)^2) / getN(nsTest)
+#'
+#' plot(ns, col = '#00000030', pch = 19)
+#' lines(sort(nsTest$x), fast.pred[order(nsTest$x)], col = '#00C000', lty = 2)
+#' lines(sort(nsTest$x), pred[order(nsTest$x)], col = '#0000C0', lty = 2)
+#' legend('topleft', legend = c('fast KRR', 'KRR'),
+#' col = c('#00C000', '#0000C0'), lty = 2)
+#' }
+#'
+#' @import Matrix
+#' @import CVST
+#' @import kernlab
+#' @export
+constructFastKRRLearner <- function () {
+ if(!requireNamespace("CVST")) stop("require the 'CVST' package")
+ if(!requireNamespace("kernlab")) stop("require 'kernlab' package")
+
+ learn.krr <- function(data, params) {
+ stopifnot(CVST::isRegression(data))
+ nblocks <- params$nblocks
+ nobs <- nrow(data$x)
+ if (log(nblocks) / log(nobs) > 1/3) {
+ warning(
+ "Number of blocks too large wrt. number of observations, log(m)/log(N) = ",
+ sprintf("%.2f", log(nblocks)), "/", sprintf("%.2f", log(nobs)),
+ " = ", sprintf("%.2f", log(nblocks)/log(nobs)),
+ ", should be < 1/3, you results may suffer numerical inaccurracy. ",
+ "For detail see Zhang et. al. (2013)"
+ )
+ }
+
+ ## make kernel function
+ kpar <- params[setdiff(names(params), c("kernel", "lambda", "nblocks"))]
+ kernel <- get_kernel_fun(params$kernel, kpar)
+
+ ## make blocks for samples
+ shuff <- sample(1:nobs)
+ blocksizes <- makeBlocks(nobs, nblocks)
+ bends <- cumsum(blocksizes)
+ bstarts <- c(1, bends[-nblocks] + 1)
+
+ ## we make nblock models for the subsamples
+ ## this can be parallelized:
+ models <- list()
+ for(i in 1:nblocks) {
+ iIndices <- shuff[ bstarts[i]:bends[i] ]
+ models[[i]] <- krr(data$x[iIndices,], kernel, data$y[iIndices], params$lambda)
+ }
+ return(models)
+ } # end learn.krr
+
+ predict.krr <- function(models, newData) {
+ stopifnot(CVST::isRegression(newData))
+
+ nModels <- length(models)
+ pred <- rep(0, nrow(newData$x))
+ for(i in 1:nModels) {
+ pred <- pred + krr.predict(newData$x, models[[i]])
+ }
+ pred <- pred / nModels
+ return(as.matrix(pred))
+ }
+
+
+ return(CVST::constructLearner(learn.krr, predict.krr))
+}
+
+# dividing nobs into nblocks parts that have approximately the same size
+#
+# @param nobs total number of observations
+# @param nblocks number of blocks
+#
+# @return vector of integers of length \code{nblocks} that sums up to
+# \code{nobs}
+#
+makeBlocks <- function (nobs, nblocks) {
+
+ maxbs <- nobs %/% nblocks
+ rest <- nobs %% nblocks
+
+ res <- rep(maxbs, nblocks)
+ if(rest > 0) res[1:rest] <- maxbs + 1
+ return(res)
+}
+
+
+## get the kernel function out of the kernlab namespace:
+get_kernel_fun <- function (kernel, pars) {
+ if (!methods::is(kernel,"kernel")) {
+ if (methods::is(kernel,"function")) {
+ kernel <- deparse(substitute(kernel))
+ } else {
+ kernel <- get(kernel, asNamespace('kernlab'))
+ }
+ kernel <- do.call(kernel, pars)
+ }
+ return(kernel)
+}
+
+
+## internal functions from cvst package, have to be here, because CVST
+## does not export them and CRAN does not allow the use of unexported
+## functions.
+
+## CVST:::.krr
+krr <- function (data, kernel, y, lambda) {
+ K <- kernlab::kernelMatrix(kernel, data)
+ N <- nrow(K)
+ alpha <- solve(Matrix(K + diag(lambda, N))) %*% y
+ return(list(data = data, kernel = kernel, alpha = alpha))
+}
+
+## CVST:::.krr.predict
+krr.predict <- function (newData, krr) {
+ k <- kernlab::kernelMatrix(krr$kernel, newData, krr$data)
+ return(k %*% krr$alpha)
+}
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..15d6a1e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,21 @@
+# DRR
+Dimensionality Reduction via Regression
+
+An implementation of Dimensionality Reduction
+via Regression using Kernel Ridge Regression.
+
+## Installing:
+```R
+## install.packages("devtools")
+devtools::install_github("gdkrmr/DRR")
+```
+
+Install from CRAN:
+```R
+install.packages("DRR")
+```
+
+Load it:
+```R
+library(DRR)
+```
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..6cec450
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/inst/doc/comparePCA.R b/inst/doc/comparePCA.R
new file mode 100644
index 0000000..a69db66
--- /dev/null
+++ b/inst/doc/comparePCA.R
@@ -0,0 +1,48 @@
+## ---- echo = TRUE, message = FALSE---------------------------------------
+library(DRR)
+
+## ---- echo = TRUE, warning = FALSE, error = FALSE------------------------
+data(iris)
+
+in_data <- iris[,1:4]
+
+npoints <- nrow(in_data)
+nvars <- ncol(in_data)
+for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
+my_data <- scale(in_data[sample(npoints),], scale = FALSE)
+
+## ---- echo = TRUE, results = "hide", warning = FALSE, error = FALSE, message = FALSE----
+t0 <- system.time(pca <- prcomp(my_data, center = FALSE, scale. = FALSE))
+t1 <- system.time(drr.1 <- drr(my_data))
+t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2))
+t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5))
+t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE))
+
+## ---- echo = FALSE, results = "hold"-------------------------------------
+pairs(my_data , gap = 0, main = "iris")
+pairs(pca$x , gap = 0, main = "pca")
+pairs(drr.1$fitted.data, gap = 0, main = "drr.1")
+pairs(drr.2$fitted.data, gap = 0, main = "drr.2")
+pairs(drr.3$fitted.data, gap = 0, main = "drr.3")
+pairs(drr.4$fitted.data, gap = 0, main = "drr.4")
+
+## ---- echo = TRUE, tidy = TRUE-------------------------------------------
+rmse <- matrix(NA_real_, nrow = 5, ncol = nvars,
+ dimnames = list(c("pca", "drr.1", "drr.2", "drr.3", "drr.4"),
+ seq_len(nvars)))
+
+for(i in seq_len(nvars)){
+ pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
+ rmse["pca", i] <- sqrt( sum( (my_data - pca_inv )^2) )
+ rmse["drr.1",i] <- sqrt( sum( (my_data - drr.1$inverse(drr.1$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.2",i] <- sqrt( sum( (my_data - drr.2$inverse(drr.2$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.3",i] <- sqrt( sum( (my_data - drr.3$inverse(drr.3$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.4",i] <- sqrt( sum( (my_data - drr.4$inverse(drr.4$fitted.data[, 1:i, drop = FALSE]) )^2) )
+}
+
+## ---- echo = FALSE-------------------------------------------------------
+print(rmse)
+
+## ---- echo = FALSE-------------------------------------------------------
+print(rbind(pca = t0, drr.1 = t1, drr.2 = t2, drr.3 = t3, drr.4 = t4)[,1:3])
+
diff --git a/inst/doc/comparePCA.Rmd b/inst/doc/comparePCA.Rmd
new file mode 100644
index 0000000..8d7cf47
--- /dev/null
+++ b/inst/doc/comparePCA.Rmd
@@ -0,0 +1,84 @@
+---
+title: "Comparing DRR and PCA"
+author: "Guido Kraemer"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Compare DRR and PCA}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+This is an example application to compare the accuracy and
+computational speed of DRR for different parameters to PCA.
+
+## Load libraries
+```{r, echo = TRUE, message = FALSE}
+library(DRR)
+```
+
+## Read in data
+```{r, echo = TRUE, warning = FALSE, error = FALSE}
+data(iris)
+
+in_data <- iris[,1:4]
+
+npoints <- nrow(in_data)
+nvars <- ncol(in_data)
+for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
+my_data <- scale(in_data[sample(npoints),], scale = FALSE)
+```
+
+## Fit the dimensionality reductions.
+```{r, echo = TRUE, results = "hide", warning = FALSE, error = FALSE, message = FALSE}
+t0 <- system.time(pca <- prcomp(my_data, center = FALSE, scale. = FALSE))
+t1 <- system.time(drr.1 <- drr(my_data))
+t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2))
+t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5))
+t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE))
+```
+
+
+## Plot the data
+```{r, echo = FALSE, results = "hold"}
+pairs(my_data , gap = 0, main = "iris")
+pairs(pca$x , gap = 0, main = "pca")
+pairs(drr.1$fitted.data, gap = 0, main = "drr.1")
+pairs(drr.2$fitted.data, gap = 0, main = "drr.2")
+pairs(drr.3$fitted.data, gap = 0, main = "drr.3")
+pairs(drr.4$fitted.data, gap = 0, main = "drr.4")
+```
+
+## Calculate RMSE
+```{r, echo = TRUE, tidy = TRUE}
+rmse <- matrix(NA_real_, nrow = 5, ncol = nvars,
+ dimnames = list(c("pca", "drr.1", "drr.2", "drr.3", "drr.4"),
+ seq_len(nvars)))
+
+for(i in seq_len(nvars)){
+ pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
+ rmse["pca", i] <- sqrt( sum( (my_data - pca_inv )^2) )
+ rmse["drr.1",i] <- sqrt( sum( (my_data - drr.1$inverse(drr.1$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.2",i] <- sqrt( sum( (my_data - drr.2$inverse(drr.2$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.3",i] <- sqrt( sum( (my_data - drr.3$inverse(drr.3$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.4",i] <- sqrt( sum( (my_data - drr.4$inverse(drr.4$fitted.data[, 1:i, drop = FALSE]) )^2) )
+}
+```
+
+## The Results
+More blocks for fastkrr speed up calculation, too are bad for
+accuracy.
+
+### RMSE
+```{r, echo = FALSE}
+print(rmse)
+```
+
+### Processing time
+```{r, echo = FALSE}
+print(rbind(pca = t0, drr.1 = t1, drr.2 = t2, drr.3 = t3, drr.4 = t4)[,1:3])
+```
+
+
+
+
diff --git a/inst/doc/comparePCA.html b/inst/doc/comparePCA.html
new file mode 100644
index 0000000..9f25d48
--- /dev/null
+++ b/inst/doc/comparePCA.html
@@ -0,0 +1,159 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+<meta name="author" content="Guido Kraemer" />
+
+<meta name="date" content="2016-09-15" />
+
+<title>Comparing DRR and PCA</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+ margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+</style>
+
+
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20bot [...]
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">Comparing DRR and PCA</h1>
+<h4 class="author"><em>Guido Kraemer</em></h4>
+<h4 class="date"><em>2016-09-15</em></h4>
+
+
+
+<p>This is an example application to compare the accuracy and computational speed of DRR for different parameters to PCA.</p>
+<div id="load-libraries" class="section level2">
+<h2>Load libraries</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(DRR)</code></pre></div>
+</div>
+<div id="read-in-data" class="section level2">
+<h2>Read in data</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">data</span>(iris)
+
+in_data <-<span class="st"> </span>iris[,<span class="dv">1</span>:<span class="dv">4</span>]
+
+npoints <-<span class="st"> </span><span class="kw">nrow</span>(in_data)
+nvars <-<span class="st"> </span><span class="kw">ncol</span>(in_data)
+for (i in <span class="kw">seq_len</span>(nvars)) in_data[[i]] <-<span class="st"> </span><span class="kw">as.numeric</span>(in_data[[i]])
+my_data <-<span class="st"> </span><span class="kw">scale</span>(in_data[<span class="kw">sample</span>(npoints),], <span class="dt">scale =</span> <span class="ot">FALSE</span>)</code></pre></div>
+</div>
+<div id="fit-the-dimensionality-reductions." class="section level2">
+<h2>Fit the dimensionality reductions.</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t0 <-<span class="st"> </span><span class="kw">system.time</span>(pca <-<span class="st"> </span><span class="kw">prcomp</span>(my_data, <span class="dt">center =</span> <span class="ot">FALSE</span>, <span class="dt">scale. =</span> <span class="ot">FALSE</span>))
+t1 <-<span class="st"> </span><span class="kw">system.time</span>(drr<span class="fl">.1</span> <-<span class="st"> </span><span class="kw">drr</span>(my_data))
+t2 <-<span class="st"> </span><span class="kw">system.time</span>(drr<span class="fl">.2</span> <-<span class="st"> </span><span class="kw">drr</span>(my_data, <span class="dt">fastkrr =</span> <span class="dv">2</span>))
+t3 <-<span class="st"> </span><span class="kw">system.time</span>(drr<span class="fl">.3</span> <-<span class="st"> </span><span class="kw">drr</span>(my_data, <span class="dt">fastkrr =</span> <span class="dv">5</span>))
+t4 <-<span class="st"> </span><span class="kw">system.time</span>(drr<span class="fl">.4</span> <-<span class="st"> </span><span class="kw">drr</span>(my_data, <span class="dt">fastkrr =</span> <span class="dv">2</span>, <span class="dt">fastcv =</span> <span class="ot">TRUE</span>))</code></pre></div>
+</div>
+<div id="plot-the-data" class="section level2">
+<h2>Plot the data</h2>
+<p><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAASAAAAEgCAMAAAAjXV6yAAADAFBMVEUAAAABAQECAgIDAwMEBAQFBQUGBgYHBwcICAgJCQkKCgoLCwsMDAwNDQ0ODg4PDw8QEBARERESEhITExMUFBQVFRUWFhYXFxcYGBgZGRkaGhobGxscHBwdHR0eHh4fHx8gICAhISEiIiIjIyMkJCQlJSUmJiYnJycoKCgpKSkqKiorKyssLCwtLS0uLi4vLy8wMDAxMTEyMjIzMzM0NDQ1NTU2NjY3Nzc4ODg5OTk6Ojo7Ozs8PDw9PT0+Pj4/Pz9AQEBBQUFCQkJDQ0NERERFRUVGRkZHR0dISEhJSUlKSkpLS0tMTExNTU1OTk5PT09QUFBRUVFSUlJTU1NUVFRVVVVWVlZXV1dYWFhZWVlaWlpbW1tcXFxdXV1eXl5fX19gYGBhYWFiYmJjY2NkZ [...]
+</div>
+<div id="calculate-rmse" class="section level2">
+<h2>Calculate RMSE</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rmse <-<span class="st"> </span><span class="kw">matrix</span>(<span class="ot">NA_real_</span>, <span class="dt">nrow =</span> <span class="dv">5</span>, <span class="dt">ncol =</span> nvars, <span class="dt">dimnames =</span> <span class="kw">list</span>(<span class="kw">c</span>(<span class="st">"pca"</span>, <span class="st">"drr.1"</span>,
+ <span class="st">"drr.2"</span>, <span class="st">"drr.3"</span>, <span class="st">"drr.4"</span>), <span class="kw">seq_len</span>(nvars)))
+
+for (i in <span class="kw">seq_len</span>(nvars)) {
+ pca_inv <-<span class="st"> </span>pca$x[, <span class="dv">1</span>:i, drop =<span class="st"> </span><span class="ot">FALSE</span>] %*%<span class="st"> </span><span class="kw">t</span>(pca$rotation[, <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>])
+ rmse[<span class="st">"pca"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>pca_inv)^<span class="dv">2</span>))
+ rmse[<span class="st">"drr.1"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>drr<span class="fl">.1</span>$<span class="kw">inverse</span>(drr<span class="fl">.1</span>$fitted.data[,
+ <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>]))^<span class="dv">2</span>))
+ rmse[<span class="st">"drr.2"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>drr<span class="fl">.2</span>$<span class="kw">inverse</span>(drr<span class="fl">.2</span>$fitted.data[,
+ <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>]))^<span class="dv">2</span>))
+ rmse[<span class="st">"drr.3"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>drr<span class="fl">.3</span>$<span class="kw">inverse</span>(drr<span class="fl">.3</span>$fitted.data[,
+ <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>]))^<span class="dv">2</span>))
+ rmse[<span class="st">"drr.4"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>drr<span class="fl">.4</span>$<span class="kw">inverse</span>(drr<span class="fl">.4</span>$fitted.data[,
+ <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>]))^<span class="dv">2</span>))
+}</code></pre></div>
+</div>
+<div id="the-results" class="section level2">
+<h2>The Results</h2>
+<p>More blocks for fastkrr speed up calculation, too are bad for accuracy.</p>
+<div id="rmse" class="section level3">
+<h3>RMSE</h3>
+<pre><code>## 1 2 3 4
+## pca 7.166770 3.899313 1.884524 1.732772e-14
+## drr.1 5.469965 3.458503 1.709825 1.675732e-14
+## drr.2 5.478541 3.487811 1.645224 1.674559e-14
+## drr.3 5.552791 3.499912 1.668842 1.674177e-14
+## drr.4 5.570917 3.690015 1.642986 1.674143e-14</code></pre>
+</div>
+<div id="processing-time" class="section level3">
+<h3>Processing time</h3>
+<pre><code>## user.self sys.self elapsed
+## pca 0.000 0.000 0.001
+## drr.1 30.412 51.860 24.329
+## drr.2 20.200 34.372 15.491
+## drr.3 39.788 67.140 32.411
+## drr.4 22.648 21.956 19.565</code></pre>
+</div>
+</div>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+ (function () {
+ var script = document.createElement("script");
+ script.type = "text/javascript";
+ script.src = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+ document.getElementsByTagName("head")[0].appendChild(script);
+ })();
+</script>
+
+</body>
+</html>
diff --git a/man/DRR-package.Rd b/man/DRR-package.Rd
new file mode 100644
index 0000000..6070b69
--- /dev/null
+++ b/man/DRR-package.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DRR-package.R
+\docType{package}
+\name{DRR-package}
+\alias{DRR}
+\alias{DRR-package}
+\title{Dimensionality Reduction via Regression.}
+\description{
+DRR implements the Dimensionality Reduction via Regression using
+Kernel Ridge Regression. It also adds a faster implementation of
+Kernel Ridge regression that can be used with the CVST package.
+}
+\details{
+Thanks to the Max Planck Institute for Biogeochemistry in Jena for
+the funding.
+}
+\references{
+Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
+ Reduction via Regression in Hyperspectral Imagery. IEEE Journal
+ of Selected Topics in Signal Processing 9,
+ 1026-1036. doi:10.1109/JSTSP.2015.2417833
+Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
+ Kernel Ridge Regression: A Distributed Algorithm with Minimax
+ Optimal Rates. arXiv:1305.5029 [cs, math, stat].
+}
+
diff --git a/man/constructFastKRRLearner.Rd b/man/constructFastKRRLearner.Rd
new file mode 100644
index 0000000..8f1a616
--- /dev/null
+++ b/man/constructFastKRRLearner.Rd
@@ -0,0 +1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fastKRR.R
+\name{constructFastKRRLearner}
+\alias{constructFastKRRLearner}
+\title{Fast implementation for Kernel Ridge Regression.}
+\usage{
+constructFastKRRLearner()
+}
+\value{
+Returns a learner similar to \code{\link[CVST]{constructKRRLearner}}
+ suitable for the use with \code{\link[CVST]{CV}} and
+ \code{\link[CVST]{fastCV}}.
+}
+\description{
+Constructs a learner for the divide and conquer version of KRR.
+}
+\details{
+This function is to be used with the CVST package as a drop in
+replacement for \code{\link[CVST]{constructKRRLearner}}. The
+implementation approximates the inversion of the kernel Matrix
+using the divide an conquer scheme, lowering computational and
+memory complexity from \eqn{O(n^3)} and \eqn{O(n^2)} to
+\eqn{O(n^3/m^2)} and \eqn{O(n^2/m^2)} respectively, where m are the
+number of blocks to be used (parameter nblocks). Theoretically safe
+values for \eqn{m} are \eqn{< n^{1/3}}, but practically \eqn{m} may
+be a little bit larger. The function will issue a warning, if the
+value for \eqn{m} is too large.
+}
+\examples{
+ns <- noisySinc(1000)
+nsTest <- noisySinc(1000)
+
+fast.krr <- constructFastKRRLearner()
+fast.p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns), nblocks = 4)
+system.time(fast.m <- fast.krr$learn(ns, fast.p))
+fast.pred <- fast.krr$predict(fast.m, nsTest)
+sum((fast.pred - nsTest$y)^2) / getN(nsTest)
+
+\dontrun{
+krr <- CVST::constructKRRLearner()
+p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns))
+system.time(m <- krr$learn(ns, p))
+pred <- krr$predict(m, nsTest)
+sum((pred - nsTest$y)^2) / getN(nsTest)
+
+plot(ns, col = '#00000030', pch = 19)
+lines(sort(nsTest$x), fast.pred[order(nsTest$x)], col = '#00C000', lty = 2)
+lines(sort(nsTest$x), pred[order(nsTest$x)], col = '#0000C0', lty = 2)
+legend('topleft', legend = c('fast KRR', 'KRR'),
+ col = c('#00C000', '#0000C0'), lty = 2)
+}
+
+}
+\references{
+Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
+ Kernel Ridge Regression: A Distributed Algorithm with Minimax
+ Optimal Rates. arXiv:1305.5029 [cs, math, stat].
+}
+\seealso{
+\code{\link[CVST]{constructLearner}}
+}
+
diff --git a/man/drr.Rd b/man/drr.Rd
new file mode 100644
index 0000000..7c08026
--- /dev/null
+++ b/man/drr.Rd
@@ -0,0 +1,132 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DRR.R
+\name{drr}
+\alias{drr}
+\title{Dimensionality Reduction via Regression}
+\usage{
+drr(X, ndim = ncol(X), lambda = c(0, 10^(-3:2)), kernel = "rbfdot",
+ kernel.pars = list(sigma = 10^(-3:4)), pca = TRUE, pca.center = TRUE,
+ pca.scale = FALSE, fastcv = FALSE, cv.folds = 5, fastcv.test = NULL,
+ fastkrr.nblocks = 4, verbose = TRUE)
+}
+\arguments{
+\item{X}{input data, a matrix.}
+
+\item{ndim}{the number of output dimensions and regression
+functions to be estimated, see details for inversion.}
+
+\item{lambda}{the penalty term for the Kernel Ridge Regression.}
+
+\item{kernel}{a kernel function or string, see
+\code{\link[kernlab]{kernel-class}} for details.}
+
+\item{kernel.pars}{a list with parameters for the kernel. each
+parameter can be a vector, crossvalidation will choose the best
+combination.}
+
+\item{pca}{logical, do a preprocessing using pca.}
+
+\item{pca.center}{logical, center data before applying pca.}
+
+\item{pca.scale}{logical, scale data before applying pca.}
+
+\item{fastcv}{if \code{TRUE} uses \code{\link[CVST]{fastCV}}, if
+\code{FALSE} uses \code{\link[CVST]{CV}} for crossvalidation.}
+
+\item{cv.folds}{if using normal crossvalidation, the number of
+folds to be used.}
+
+\item{fastcv.test}{an optional separate test data set to be used
+for \code{\link[CVST]{fastCV}}, handed over as option
+\code{test} to \code{\link[CVST]{fastCV}}.}
+
+\item{fastkrr.nblocks}{the number of blocks used for fast KRR,
+higher numbers are faster to compute but may introduce
+numerical inaccurracies, see
+\code{\link{constructFastKRRLearner}} for details.}
+
+\item{verbose}{logical, should the crossvalidation report back.}
+}
+\value{
+A list the following items:
+\itemize{
+ \item {"fitted.data"} The data in reduced dimensions.
+ \item {"pca.means"} The means used to center the original data.
+ \item {"pca.scale"} The standard deviations used to scale the original data.
+ \item {"pca.rotation"} The rotation matrix of the PCA.
+ \item {"models"} A list of models used to estimate each dimension.
+ \item {"apply"} A function to fit new data to the estimated model.
+ \item {"inverse"} A function to untransform data.
+}
+}
+\description{
+\code{drr} Implements Dimensionality Reduction via Regression using
+Kernel Ridge Regression.
+}
+\details{
+Parameter combination will be formed and cross-validation used to
+select the best combination. Cross-validation uses
+\code{\link[CVST]{CV}} or \code{\link[CVST]{fastCV}}.
+
+Pre-treatment of the data using a PCA and scaling is made
+\eqn{\alpha = Vx}. the representation in reduced dimensions is
+
+\deqn{y_i = \alpha - f_i(\alpha_1, \ldots, \alpha_{i-1})}
+
+then the final DRR representation is:
+
+\deqn{r = (\alpha_1, y_2, y_3, \ldots,y_d)}
+
+DRR is invertible by
+
+\deqn{\alpha_i = y_i + f_i(\alpha_1,\alpha_2, \ldots, alpha_{i-1})}
+
+If less dimensions are estimated, there will be less inverse
+functions and calculating the inverse will be inaccurate.
+}
+\examples{
+tt <- seq(0,4*pi, length.out = 200)
+helix <- cbind(
+ x = 3 * cos(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
+ y = 3 * sin(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
+ z = 2 * tt + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt)))
+)
+system.time(
+drr.fit <- drr(helix, ndim = 3, cv.folds = 4,
+ lambda = 10^(-2:1),
+ kernel.pars = list(sigma = 10^(0:3)),
+ fastkrr.nblocks = 2, verbose = TRUE,
+ fastcv = FALSE)
+)
+
+\dontrun{
+library(rgl)
+plot3d(helix)
+points3d(drr.fit$inverse(drr.fit$fitted.data[,1,drop = FALSE]), col = 'blue')
+points3d(drr.fit$inverse(drr.fit$fitted.data[,1:2]), col = 'red')
+
+plot3d(drr.fit$fitted.data)
+pad <- -3
+fd <- drr.fit$fitted.data
+xx <- seq(min(fd[,1]) , max(fd[,1]) , length.out = 25)
+yy <- seq(min(fd[,2]) - pad, max(fd[,2]) + pad, length.out = 5)
+zz <- seq(min(fd[,3]) - pad, max(fd[,3]) + pad, length.out = 5)
+
+dd <- as.matrix(expand.grid(xx, yy, zz))
+plot3d(helix)
+for(y in yy) for(x in xx)
+ rgl.linestrips(drr.fit$inverse(cbind(x, y, zz)), col = 'blue')
+for(y in yy) for(z in zz)
+ rgl.linestrips(drr.fit$inverse(cbind(xx, y, z)), col = 'blue')
+for(x in xx) for(z in zz)
+ rgl.linestrips(drr.fit$inverse(cbind(x, yy, z)), col = 'blue')
+}
+
+}
+\references{
+Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
+ Reduction via Regression in Hyperspectral Imagery. IEEE Journal
+ of Selected Topics in Signal Processing 9,
+ 1026-1036. doi:10.1109/JSTSP.2015.2417833
+}
+
diff --git a/vignettes/comparePCA.Rmd b/vignettes/comparePCA.Rmd
new file mode 100644
index 0000000..8d7cf47
--- /dev/null
+++ b/vignettes/comparePCA.Rmd
@@ -0,0 +1,84 @@
+---
+title: "Comparing DRR and PCA"
+author: "Guido Kraemer"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+ %\VignetteIndexEntry{Compare DRR and PCA}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+This is an example application to compare the accuracy and
+computational speed of DRR for different parameters to PCA.
+
+## Load libraries
+```{r, echo = TRUE, message = FALSE}
+library(DRR)
+```
+
+## Read in data
+```{r, echo = TRUE, warning = FALSE, error = FALSE}
+data(iris)
+
+in_data <- iris[,1:4]
+
+npoints <- nrow(in_data)
+nvars <- ncol(in_data)
+for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
+my_data <- scale(in_data[sample(npoints),], scale = FALSE)
+```
+
+## Fit the dimensionality reductions.
+```{r, echo = TRUE, results = "hide", warning = FALSE, error = FALSE, message = FALSE}
+t0 <- system.time(pca <- prcomp(my_data, center = FALSE, scale. = FALSE))
+t1 <- system.time(drr.1 <- drr(my_data))
+t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2))
+t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5))
+t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE))
+```
+
+
+## Plot the data
+```{r, echo = FALSE, results = "hold"}
+pairs(my_data , gap = 0, main = "iris")
+pairs(pca$x , gap = 0, main = "pca")
+pairs(drr.1$fitted.data, gap = 0, main = "drr.1")
+pairs(drr.2$fitted.data, gap = 0, main = "drr.2")
+pairs(drr.3$fitted.data, gap = 0, main = "drr.3")
+pairs(drr.4$fitted.data, gap = 0, main = "drr.4")
+```
+
+## Calculate RMSE
+```{r, echo = TRUE, tidy = TRUE}
+rmse <- matrix(NA_real_, nrow = 5, ncol = nvars,
+ dimnames = list(c("pca", "drr.1", "drr.2", "drr.3", "drr.4"),
+ seq_len(nvars)))
+
+for(i in seq_len(nvars)){
+ pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
+ rmse["pca", i] <- sqrt( sum( (my_data - pca_inv )^2) )
+ rmse["drr.1",i] <- sqrt( sum( (my_data - drr.1$inverse(drr.1$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.2",i] <- sqrt( sum( (my_data - drr.2$inverse(drr.2$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.3",i] <- sqrt( sum( (my_data - drr.3$inverse(drr.3$fitted.data[, 1:i, drop = FALSE]) )^2) )
+ rmse["drr.4",i] <- sqrt( sum( (my_data - drr.4$inverse(drr.4$fitted.data[, 1:i, drop = FALSE]) )^2) )
+}
+```
+
+## The Results
+More blocks for fastkrr speed up calculation, too are bad for
+accuracy.
+
+### RMSE
+```{r, echo = FALSE}
+print(rmse)
+```
+
+### Processing time
+```{r, echo = FALSE}
+print(rbind(pca = t0, drr.1 = t1, drr.2 = t2, drr.3 = t3, drr.4 = t4)[,1:3])
+```
+
+
+
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-drr.git
More information about the debian-science-commits
mailing list