[r-cran-drr] 01/02: New upstream version 0.0.2

Andreas Tille tille at debian.org
Sun Oct 22 21:26:22 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-cran-drr.

commit 5f50baaa4c26b069b5d1c74a928b122e526d2db1
Author: Andreas Tille <tille at debian.org>
Date:   Sun Oct 22 23:25:49 2017 +0200

    New upstream version 0.0.2
---
 DESCRIPTION                    |  21 ++++
 MD5                            |  14 +++
 NAMESPACE                      |   7 ++
 R/DRR-package.R                |  21 ++++
 R/DRR.R                        | 248 +++++++++++++++++++++++++++++++++++++++++
 R/fastKRR.R                    | 161 ++++++++++++++++++++++++++
 README.md                      |  21 ++++
 build/vignette.rds             | Bin 0 -> 211 bytes
 inst/doc/comparePCA.R          |  48 ++++++++
 inst/doc/comparePCA.Rmd        |  84 ++++++++++++++
 inst/doc/comparePCA.html       | 159 ++++++++++++++++++++++++++
 man/DRR-package.Rd             |  26 +++++
 man/constructFastKRRLearner.Rd |  62 +++++++++++
 man/drr.Rd                     | 132 ++++++++++++++++++++++
 vignettes/comparePCA.Rmd       |  84 ++++++++++++++
 15 files changed, 1088 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..5c8679c
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,21 @@
+Package: DRR
+Title: Dimensionality Reduction via Regression
+Version: 0.0.2
+Authors at R: person("Guido", "Kraemer",
+                  email = "gkraemer at bgc-jena.mpg.de",
+		  role = c("aut","cre"))
+Description: An Implementation of Dimensionality Reduction
+    via Regression using Kernel Ridge Regression.
+License: GPL-3
+Imports: stats, methods
+Suggests: knitr
+VignetteBuilder: knitr
+LazyData: true
+Depends: kernlab, CVST, Matrix
+RoxygenNote: 5.0.1
+NeedsCompilation: no
+Packaged: 2016-09-15 09:54:12 UTC; gkraemer
+Author: Guido Kraemer [aut, cre]
+Maintainer: Guido Kraemer <gkraemer at bgc-jena.mpg.de>
+Repository: CRAN
+Date/Publication: 2016-09-15 15:46:27
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..eff6112
--- /dev/null
+++ b/MD5
@@ -0,0 +1,14 @@
+4e03d25f1635cbcc23e5326330fdaa24 *DESCRIPTION
+bab4ad6c5e310c1e95b701ae12c97e71 *NAMESPACE
+1ab6357662737c5bffc2d64479f18e6b *R/DRR-package.R
+d841bef331530ab81836262ebea7970b *R/DRR.R
+35a4d665232367ed6a64040f4d29756e *R/fastKRR.R
+6cdb32c1ae5ab787e1ffd9a85d0b23e8 *README.md
+7ca7ab51dbac1c5e41c58202c8dc7d9b *build/vignette.rds
+e72a819e472d18f7fe5e9f74831912c6 *inst/doc/comparePCA.R
+3994f2898362cd62287d6ec1161d534b *inst/doc/comparePCA.Rmd
+b698ae948c254d9f7c4ef4b8901dee8a *inst/doc/comparePCA.html
+08cf8625d95ab5e61ceadf890ed9ecca *man/DRR-package.Rd
+45275666649ffc029ec40838c775b04a *man/constructFastKRRLearner.Rd
+d12026ad13f5f628325934d8c7360937 *man/drr.Rd
+3994f2898362cd62287d6ec1161d534b *vignettes/comparePCA.Rmd
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..836f87d
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,7 @@
+# Generated by roxygen2: do not edit by hand
+
+export(constructFastKRRLearner)
+export(drr)
+import(CVST)
+import(Matrix)
+import(kernlab)
diff --git a/R/DRR-package.R b/R/DRR-package.R
new file mode 100644
index 0000000..e96ca18
--- /dev/null
+++ b/R/DRR-package.R
@@ -0,0 +1,21 @@
+#' Dimensionality Reduction via Regression.
+#'
+#' DRR implements the Dimensionality Reduction via Regression using
+#' Kernel Ridge Regression. It also adds a faster implementation of
+#' Kernel Ridge regression that can be used with the CVST package.
+#'
+#' Thanks to the Max Planck Institute for Biogeochemistry in Jena for
+#' the funding.
+#'
+#' @references
+#' Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
+#'     Reduction via Regression in Hyperspectral Imagery. IEEE Journal
+#'     of Selected Topics in Signal Processing 9,
+#'     1026-1036. doi:10.1109/JSTSP.2015.2417833
+#' Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
+#'     Kernel Ridge Regression: A Distributed Algorithm with Minimax
+#'     Optimal Rates. arXiv:1305.5029 [cs, math, stat].
+#'
+#' @docType package
+"_PACKAGE"
+
diff --git a/R/DRR.R b/R/DRR.R
new file mode 100644
index 0000000..2ed3450
--- /dev/null
+++ b/R/DRR.R
@@ -0,0 +1,248 @@
+#' Dimensionality Reduction via Regression
+#'
+#' 
+#' \code{drr} Implements Dimensionality Reduction via Regression using
+#' Kernel Ridge Regression.
+#'
+#' 
+#' Parameter combination will be formed and cross-validation used to
+#' select the best combination. Cross-validation uses
+#' \code{\link[CVST]{CV}} or \code{\link[CVST]{fastCV}}.
+#'
+#' Pre-treatment of the data using a PCA and scaling is made
+#' \eqn{\alpha = Vx}.  the representation in reduced dimensions is
+#' 
+#' \deqn{y_i = \alpha - f_i(\alpha_1, \ldots, \alpha_{i-1})}
+#'
+#' then the final DRR representation is:
+#' 
+#' \deqn{r = (\alpha_1, y_2, y_3, \ldots,y_d)}
+#'
+#' DRR is invertible by
+#'
+#' \deqn{\alpha_i = y_i + f_i(\alpha_1,\alpha_2, \ldots, alpha_{i-1})}
+#'
+#' If less dimensions are estimated, there will be less inverse
+#' functions and calculating the inverse will be inaccurate.
+#' 
+#' 
+#' @references
+#' Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
+#'     Reduction via Regression in Hyperspectral Imagery. IEEE Journal
+#'     of Selected Topics in Signal Processing 9,
+#'     1026-1036. doi:10.1109/JSTSP.2015.2417833
+#'
+#' @param X input data, a matrix.
+#' @param ndim the number of output dimensions and regression
+#'     functions to be estimated, see details for inversion.
+#' @param lambda the penalty term for the Kernel Ridge Regression.
+#' @param kernel a kernel function or string, see
+#'     \code{\link[kernlab]{kernel-class}} for details.
+#' @param kernel.pars a list with parameters for the kernel. each
+#'     parameter can be a vector, crossvalidation will choose the best
+#'     combination.
+#' @param pca logical, do a preprocessing using pca.
+#' @param pca.center logical, center data before applying pca.
+#' @param pca.scale logical, scale data before applying pca.
+#' @param fastcv if \code{TRUE} uses \code{\link[CVST]{fastCV}}, if
+#'     \code{FALSE} uses \code{\link[CVST]{CV}} for crossvalidation.
+#' @param cv.folds if using normal crossvalidation, the number of
+#'     folds to be used.
+#' @param fastcv.test an optional separate test data set to be used
+#'     for \code{\link[CVST]{fastCV}}, handed over as option
+#'     \code{test} to \code{\link[CVST]{fastCV}}.
+#' @param fastkrr.nblocks the number of blocks used for fast KRR,
+#'     higher numbers are faster to compute but may introduce
+#'     numerical inaccurracies, see
+#'     \code{\link{constructFastKRRLearner}} for details.
+#' @param verbose logical, should the crossvalidation report back.
+#'
+#' @return A list the following items:
+#' \itemize{
+#'  \item {"fitted.data"} The data in reduced dimensions.
+#'  \item {"pca.means"} The means used to center the original data.
+#'  \item {"pca.scale"} The standard deviations used to scale the original data.
+#'  \item {"pca.rotation"} The rotation matrix of the PCA.
+#'  \item {"models"} A list of models used to estimate each dimension.
+#'  \item {"apply"} A function to fit new data to the estimated model.
+#'  \item {"inverse"} A function to untransform data. 
+#' }
+#' 
+#' @examples
+#' tt <- seq(0,4*pi, length.out = 200)
+#' helix <- cbind(
+#'   x = 3 * cos(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
+#'   y = 3 * sin(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
+#'   z = 2 * tt      + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt)))
+#' )
+#' system.time(
+#' drr.fit  <- drr(helix, ndim = 3, cv.folds = 4,
+#'                 lambda = 10^(-2:1),
+#'                 kernel.pars = list(sigma = 10^(0:3)),
+#'                 fastkrr.nblocks = 2, verbose = TRUE,
+#'                 fastcv = FALSE)
+#' )
+#'
+#' \dontrun{
+#' library(rgl)
+#' plot3d(helix)
+#' points3d(drr.fit$inverse(drr.fit$fitted.data[,1,drop = FALSE]), col = 'blue')
+#' points3d(drr.fit$inverse(drr.fit$fitted.data[,1:2]),             col = 'red')
+#' 
+#' plot3d(drr.fit$fitted.data)
+#' pad <- -3
+#' fd <- drr.fit$fitted.data
+#' xx <- seq(min(fd[,1])      , max(fd[,1])      , length.out = 25)
+#' yy <- seq(min(fd[,2]) - pad, max(fd[,2]) + pad, length.out = 5)
+#' zz <- seq(min(fd[,3]) - pad, max(fd[,3]) + pad, length.out = 5)
+#'
+#' dd <- as.matrix(expand.grid(xx, yy, zz))
+#' plot3d(helix)
+#' for(y in yy) for(x in xx)
+#'   rgl.linestrips(drr.fit$inverse(cbind(x, y, zz)), col = 'blue')
+#' for(y in yy) for(z in zz)
+#'   rgl.linestrips(drr.fit$inverse(cbind(xx, y, z)), col = 'blue')
+#' for(x in xx) for(z in zz)
+#'   rgl.linestrips(drr.fit$inverse(cbind(x, yy, z)), col = 'blue')
+#' }
+#'
+#' @import Matrix
+#' @import kernlab
+#' @import CVST
+#' @export
+drr <- function (X, ndim         = ncol(X),
+                 lambda          = c(0, 10^(-3:2)),
+                 kernel          = 'rbfdot',
+                 kernel.pars     = list(sigma = 10^(-3:4)),
+                 pca             = TRUE,
+                 pca.center      = TRUE,
+                 pca.scale       = FALSE,
+                 fastcv          = FALSE,
+                 cv.folds        = 5,
+                 fastcv.test     = NULL,
+                 fastkrr.nblocks = 4,
+                 verbose         = TRUE)  {
+    if((!fastcv) && (cv.folds <= 1)) stop("need more than one fold for crossvalidation")
+    if(cv.folds %% 1 != 0)           stop("cv.folds must be a whole number")
+    if(fastkrr.nblocks < 1)          stop("fastkrr.nblocks must be at least 1")
+    if(fastkrr.nblocks %% 1 != 0)    stop('fastkrr.nblocks must be a whole number')
+    if(!requireNamespace("CVST"))    stop("require the 'CVST' package")
+    if(!requireNamespace("kernlab")) stop("require 'kernlab' package")
+    if(ndim < ncol(X))               warning('ndim < data dimensionality, the inverse functions will be incomplete!')
+    if(ndim > ncol(X))               ndim <- ncol(X)
+
+    if (pca) {
+        pca <- stats::prcomp(X, center = pca.center, scale. = pca.scale)
+        if (!pca.center) pca$center <- rep(0, ncol(X))
+        if (!pca.scale) pca$scale   <- rep(1, ncol(X))
+    } else {
+        pca <- list()
+        pca$x <- X
+        pca$rotation <- diag(1, ncol(X), ncol(X))
+        pca$center <- rep(0, ncol(X))
+        pca$scale <- rep(1, ncol(X))
+    }
+
+    
+    alpha <- pca$x
+    d <- ndim
+
+    kpars <- kernel.pars
+    kpars$kernel <- kernel
+    kpars$lambda <- lambda
+    kpars$nblocks <- fastkrr.nblocks
+
+    krrl <- constructFastKRRLearner()
+       
+    p <- do.call(CVST::constructParams, kpars)
+    
+    Y <- matrix(NA_real_, nrow = nrow(X), ncol = d)
+    models <- list()
+    if (d > 1) for (i in d:2) {
+        message(Sys.time(), ": Constructing Axis ", d-i+1, "/", d)
+        data <- CVST::constructData(
+            x = alpha[,1:(i-1), drop = FALSE],
+            y = alpha[,i]
+        )
+
+        cat("predictors: ", colnames(alpha)[1:(i-1)], "dependent: ", colnames(alpha)[i], '\n')
+
+     
+        res <- if (fastcv) {
+                   CVST::fastCV(
+                       data, krrl, p,
+                       CVST::constructCVSTModel(),
+                       test = fastcv.test,
+                       verbose = verbose
+                   )
+               } else {
+                   CVST::CV(
+                       data, krrl, p,
+                       fold = cv.folds,
+                       verbose = verbose
+                   )
+               }
+        model <- krrl$learn(data, res[[1]])
+
+        models[[i]] <- model
+        Y[,i] <- as.matrix(alpha[,i] - krrl$predict(model, data))
+    }
+    ## we don't need to construct the very last dimension
+    message(Sys.time(), ": Constructing Axis ", d, "/", d)
+    Y[,1] <- alpha[,1]
+    models[[1]] <- list()
+
+    
+    appl <- function(x) {
+        ## apply PCA
+        dat <- scale(x, pca$center, pca$scale)
+        dat <-  dat %*% pca$rotation
+        
+        ## apply KRR
+        outdat <- matrix(NA_real_, ncol = d, nrow = nrow(x))
+        if(d > 1) for (i in d:2)
+            outdat[,i] <-
+                dat[,i] - krrl$predict(
+                              models[[i]],
+                              CVST::constructData(x = dat[,1:(i-1), drop = FALSE],
+                                                  y = NA)
+                          )
+        
+        outdat[,1] <- dat[,1]
+    
+        return(outdat)                
+    }
+
+    inv <- function(x){
+        dat <- cbind(x, matrix(0, nrow(x), ncol(X)-ncol(x)))
+
+        outdat <- dat #matrix(NA_real_, nrow(x), ncol(X))
+
+        ## krr
+        #outdat[,1] <- dat[,1]
+        if(d > 1) for (i in 2:d)
+            outdat[,i] <- dat[,i] + krrl$predict(
+                              models[[i]],
+                              CVST::constructData(x = outdat[,1:(i-1), drop = FALSE],
+                                                  y = NA)
+                          )
+        
+        ## inverse pca
+        outdat <- outdat %*% t(pca$rotation)
+        outdat <- sweep(outdat, 2L, pca$scale, "*")
+        outdat <- sweep(outdat, 2L, pca$center, "+")
+        
+        return(outdat)        
+    }
+                                   
+    return(list(
+        fitted.data = Y,
+        pca.means = pca$center,
+        pca.scale = pca$scale,
+        pca.rotation = pca$rotation,
+        models = models,
+        apply = appl,
+        inverse = inv
+    ))
+}
+
diff --git a/R/fastKRR.R b/R/fastKRR.R
new file mode 100644
index 0000000..0928278
--- /dev/null
+++ b/R/fastKRR.R
@@ -0,0 +1,161 @@
+#' Fast implementation for Kernel Ridge Regression.
+#' 
+#' Constructs a learner for the divide and conquer version of KRR.
+#'
+#' This function is to be used with the CVST package as a drop in
+#' replacement for \code{\link[CVST]{constructKRRLearner}}. The
+#' implementation approximates the inversion of the kernel Matrix
+#' using the divide an conquer scheme, lowering computational and
+#' memory complexity from \eqn{O(n^3)} and \eqn{O(n^2)} to
+#' \eqn{O(n^3/m^2)} and \eqn{O(n^2/m^2)} respectively, where m are the
+#' number of blocks to be used (parameter nblocks). Theoretically safe
+#' values for \eqn{m} are \eqn{< n^{1/3}}, but practically \eqn{m} may
+#' be a little bit larger. The function will issue a warning, if the
+#' value for \eqn{m} is too large.
+#'
+#' 
+#'
+#' @return Returns a learner similar to \code{\link[CVST]{constructKRRLearner}}
+#'     suitable for the use with \code{\link[CVST]{CV}} and
+#'     \code{\link[CVST]{fastCV}}.
+#'
+#' @seealso \code{\link[CVST]{constructLearner}}
+#' 
+#' @references
+#' Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
+#'     Kernel Ridge Regression: A Distributed Algorithm with Minimax
+#'     Optimal Rates. arXiv:1305.5029 [cs, math, stat].
+#' 
+#' @examples
+#' ns <- noisySinc(1000)
+#' nsTest <- noisySinc(1000)
+#' 
+#' fast.krr <- constructFastKRRLearner()
+#' fast.p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns), nblocks = 4)
+#' system.time(fast.m <- fast.krr$learn(ns, fast.p))
+#' fast.pred <- fast.krr$predict(fast.m, nsTest)
+#' sum((fast.pred - nsTest$y)^2) / getN(nsTest)
+#'
+#' \dontrun{
+#' krr <- CVST::constructKRRLearner()
+#' p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns))
+#' system.time(m <- krr$learn(ns, p))
+#' pred <- krr$predict(m, nsTest)
+#' sum((pred - nsTest$y)^2) / getN(nsTest)
+#'
+#' plot(ns, col = '#00000030', pch = 19)
+#' lines(sort(nsTest$x), fast.pred[order(nsTest$x)], col = '#00C000', lty = 2)
+#' lines(sort(nsTest$x), pred[order(nsTest$x)], col = '#0000C0', lty = 2)
+#' legend('topleft', legend = c('fast KRR', 'KRR'),
+#'        col = c('#00C000', '#0000C0'), lty = 2)
+#' }
+#'
+#' @import Matrix
+#' @import CVST
+#' @import kernlab
+#' @export
+constructFastKRRLearner <- function () {
+    if(!requireNamespace("CVST")) stop("require the 'CVST' package")
+    if(!requireNamespace("kernlab")) stop("require 'kernlab' package")
+
+    learn.krr <- function(data, params) {
+        stopifnot(CVST::isRegression(data))
+        nblocks <- params$nblocks
+        nobs <- nrow(data$x)
+        if (log(nblocks) / log(nobs) > 1/3) {
+            warning(
+                "Number of blocks too large wrt. number of observations, log(m)/log(N) = ",
+                sprintf("%.2f", log(nblocks)), "/", sprintf("%.2f", log(nobs)),
+                " = ", sprintf("%.2f", log(nblocks)/log(nobs)),
+                ", should be < 1/3, you results may suffer numerical inaccurracy. ",
+                "For detail see Zhang et. al. (2013)"
+            )
+        }
+
+        ## make kernel function
+        kpar <- params[setdiff(names(params), c("kernel", "lambda", "nblocks"))]
+        kernel <- get_kernel_fun(params$kernel, kpar)
+
+        ## make blocks for samples
+        shuff <- sample(1:nobs)
+        blocksizes <- makeBlocks(nobs, nblocks)
+        bends <- cumsum(blocksizes)
+        bstarts <- c(1, bends[-nblocks] + 1)
+
+        ## we make nblock models for the subsamples
+        ## this can be parallelized:
+        models <- list()
+        for(i in 1:nblocks) {
+            iIndices <- shuff[ bstarts[i]:bends[i] ]
+            models[[i]] <- krr(data$x[iIndices,], kernel, data$y[iIndices], params$lambda)
+        }
+        return(models)
+    } # end learn.krr
+    
+    predict.krr <- function(models, newData) {
+        stopifnot(CVST::isRegression(newData))
+
+        nModels <- length(models)
+        pred <- rep(0, nrow(newData$x))
+        for(i in 1:nModels) {
+            pred <- pred + krr.predict(newData$x, models[[i]])
+        }
+        pred <- pred / nModels
+        return(as.matrix(pred))
+    }
+        
+               
+  return(CVST::constructLearner(learn.krr, predict.krr))
+}
+
+# dividing nobs into nblocks parts that have approximately the same size
+#
+# @param nobs total number of observations
+# @param nblocks number of blocks
+#
+# @return vector of integers of length \code{nblocks} that sums up to
+#   \code{nobs}
+# 
+makeBlocks <- function (nobs, nblocks) {
+ 
+  maxbs <- nobs %/% nblocks
+  rest <-  nobs %% nblocks
+
+  res <- rep(maxbs, nblocks)
+  if(rest > 0) res[1:rest] <- maxbs + 1
+  return(res)
+}
+
+
+## get the kernel function out of the kernlab namespace:
+get_kernel_fun <- function (kernel, pars) {
+    if (!methods::is(kernel,"kernel")) {
+        if (methods::is(kernel,"function")) {
+            kernel <- deparse(substitute(kernel))
+        } else {
+            kernel <- get(kernel, asNamespace('kernlab'))
+        }
+        kernel <- do.call(kernel, pars)
+    }
+    return(kernel)
+}
+
+
+## internal functions from cvst package, have to be here, because CVST
+## does not export them and CRAN does not allow the use of unexported
+## functions.
+
+## CVST:::.krr
+krr <- function (data, kernel, y, lambda) {
+    K <- kernlab::kernelMatrix(kernel, data)
+    N <- nrow(K)
+    alpha <- solve(Matrix(K + diag(lambda, N))) %*% y
+    return(list(data = data, kernel = kernel, alpha = alpha))
+}
+
+## CVST:::.krr.predict
+krr.predict <- function (newData, krr) {
+    k <- kernlab::kernelMatrix(krr$kernel, newData, krr$data)
+    return(k %*% krr$alpha)
+}
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..15d6a1e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,21 @@
+# DRR
+Dimensionality Reduction via Regression
+
+An implementation of Dimensionality Reduction
+via Regression using Kernel Ridge Regression.
+
+## Installing:
+```R
+## install.packages("devtools")
+devtools::install_github("gdkrmr/DRR")
+```
+
+Install from CRAN:
+```R
+install.packages("DRR")
+```
+
+Load it:
+```R
+library(DRR)
+```
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..6cec450
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/inst/doc/comparePCA.R b/inst/doc/comparePCA.R
new file mode 100644
index 0000000..a69db66
--- /dev/null
+++ b/inst/doc/comparePCA.R
@@ -0,0 +1,48 @@
+## ---- echo = TRUE, message = FALSE---------------------------------------
+library(DRR)
+
+## ---- echo = TRUE, warning = FALSE, error = FALSE------------------------
+data(iris)
+
+in_data <- iris[,1:4]
+
+npoints <- nrow(in_data)
+nvars <- ncol(in_data)
+for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
+my_data <- scale(in_data[sample(npoints),], scale = FALSE)
+
+## ---- echo = TRUE, results = "hide", warning = FALSE, error = FALSE, message = FALSE----
+t0 <- system.time(pca   <- prcomp(my_data, center = FALSE, scale. = FALSE))
+t1 <- system.time(drr.1 <- drr(my_data))
+t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2))
+t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5))
+t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE))
+
+## ---- echo = FALSE, results = "hold"-------------------------------------
+pairs(my_data          , gap = 0, main = "iris")
+pairs(pca$x            , gap = 0, main = "pca")
+pairs(drr.1$fitted.data, gap = 0, main = "drr.1")
+pairs(drr.2$fitted.data, gap = 0, main = "drr.2")
+pairs(drr.3$fitted.data, gap = 0, main = "drr.3")
+pairs(drr.4$fitted.data, gap = 0, main = "drr.4")
+
+## ---- echo = TRUE, tidy = TRUE-------------------------------------------
+rmse <- matrix(NA_real_, nrow = 5, ncol = nvars,
+               dimnames = list(c("pca", "drr.1", "drr.2", "drr.3", "drr.4"),
+                               seq_len(nvars)))
+
+for(i in seq_len(nvars)){
+    pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
+    rmse["pca",  i] <- sqrt( sum( (my_data - pca_inv                                               )^2) )
+    rmse["drr.1",i] <- sqrt( sum( (my_data - drr.1$inverse(drr.1$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.2",i] <- sqrt( sum( (my_data - drr.2$inverse(drr.2$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.3",i] <- sqrt( sum( (my_data - drr.3$inverse(drr.3$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.4",i] <- sqrt( sum( (my_data - drr.4$inverse(drr.4$fitted.data[, 1:i, drop = FALSE]) )^2) )
+}
+
+## ---- echo = FALSE-------------------------------------------------------
+print(rmse)
+
+## ---- echo = FALSE-------------------------------------------------------
+print(rbind(pca = t0, drr.1 = t1, drr.2 = t2, drr.3 = t3, drr.4 = t4)[,1:3])
+
diff --git a/inst/doc/comparePCA.Rmd b/inst/doc/comparePCA.Rmd
new file mode 100644
index 0000000..8d7cf47
--- /dev/null
+++ b/inst/doc/comparePCA.Rmd
@@ -0,0 +1,84 @@
+---
+title: "Comparing DRR and PCA"
+author: "Guido Kraemer"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+    %\VignetteIndexEntry{Compare DRR and PCA}
+    %\VignetteEngine{knitr::rmarkdown}
+    %\VignetteEncoding{UTF-8}
+---
+
+This is an example application to compare the accuracy and
+computational speed of DRR for different parameters to PCA.
+
+## Load libraries
+```{r, echo = TRUE, message = FALSE}
+library(DRR)
+```
+
+## Read in data
+```{r, echo = TRUE, warning = FALSE, error = FALSE}
+data(iris)
+
+in_data <- iris[,1:4]
+
+npoints <- nrow(in_data)
+nvars <- ncol(in_data)
+for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
+my_data <- scale(in_data[sample(npoints),], scale = FALSE)
+```
+
+## Fit the dimensionality reductions.
+```{r, echo = TRUE, results = "hide", warning = FALSE, error = FALSE, message = FALSE}
+t0 <- system.time(pca   <- prcomp(my_data, center = FALSE, scale. = FALSE))
+t1 <- system.time(drr.1 <- drr(my_data))
+t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2))
+t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5))
+t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE))
+```
+
+
+## Plot the data
+```{r, echo = FALSE, results = "hold"}
+pairs(my_data          , gap = 0, main = "iris")
+pairs(pca$x            , gap = 0, main = "pca")
+pairs(drr.1$fitted.data, gap = 0, main = "drr.1")
+pairs(drr.2$fitted.data, gap = 0, main = "drr.2")
+pairs(drr.3$fitted.data, gap = 0, main = "drr.3")
+pairs(drr.4$fitted.data, gap = 0, main = "drr.4")
+```
+
+## Calculate RMSE
+```{r, echo = TRUE, tidy = TRUE}
+rmse <- matrix(NA_real_, nrow = 5, ncol = nvars,
+               dimnames = list(c("pca", "drr.1", "drr.2", "drr.3", "drr.4"),
+                               seq_len(nvars)))
+
+for(i in seq_len(nvars)){
+    pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
+    rmse["pca",  i] <- sqrt( sum( (my_data - pca_inv                                               )^2) )
+    rmse["drr.1",i] <- sqrt( sum( (my_data - drr.1$inverse(drr.1$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.2",i] <- sqrt( sum( (my_data - drr.2$inverse(drr.2$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.3",i] <- sqrt( sum( (my_data - drr.3$inverse(drr.3$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.4",i] <- sqrt( sum( (my_data - drr.4$inverse(drr.4$fitted.data[, 1:i, drop = FALSE]) )^2) )
+}
+```
+
+## The Results
+More blocks for fastkrr speed up calculation, too are bad for
+accuracy.
+
+### RMSE
+```{r, echo = FALSE}
+print(rmse)
+```
+
+### Processing time
+```{r, echo = FALSE}
+print(rbind(pca = t0, drr.1 = t1, drr.2 = t2, drr.3 = t3, drr.4 = t4)[,1:3])
+```
+
+
+
+
diff --git a/inst/doc/comparePCA.html b/inst/doc/comparePCA.html
new file mode 100644
index 0000000..9f25d48
--- /dev/null
+++ b/inst/doc/comparePCA.html
@@ -0,0 +1,159 @@
+<!DOCTYPE html>
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+
+<head>
+
+<meta charset="utf-8">
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="pandoc" />
+
+<meta name="viewport" content="width=device-width, initial-scale=1">
+
+<meta name="author" content="Guido Kraemer" />
+
+<meta name="date" content="2016-09-15" />
+
+<title>Comparing DRR and PCA</title>
+
+
+
+<style type="text/css">code{white-space: pre;}</style>
+<style type="text/css">
+div.sourceCode { overflow-x: auto; }
+table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
+  margin: 0; padding: 0; vertical-align: baseline; border: none; }
+table.sourceCode { width: 100%; line-height: 100%; }
+td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
+td.sourceCode { padding-left: 5px; }
+code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
+code > span.dt { color: #902000; } /* DataType */
+code > span.dv { color: #40a070; } /* DecVal */
+code > span.bn { color: #40a070; } /* BaseN */
+code > span.fl { color: #40a070; } /* Float */
+code > span.ch { color: #4070a0; } /* Char */
+code > span.st { color: #4070a0; } /* String */
+code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
+code > span.ot { color: #007020; } /* Other */
+code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
+code > span.fu { color: #06287e; } /* Function */
+code > span.er { color: #ff0000; font-weight: bold; } /* Error */
+code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
+code > span.cn { color: #880000; } /* Constant */
+code > span.sc { color: #4070a0; } /* SpecialChar */
+code > span.vs { color: #4070a0; } /* VerbatimString */
+code > span.ss { color: #bb6688; } /* SpecialString */
+code > span.im { } /* Import */
+code > span.va { color: #19177c; } /* Variable */
+code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
+code > span.op { color: #666666; } /* Operator */
+code > span.bu { } /* BuiltIn */
+code > span.ex { } /* Extension */
+code > span.pp { color: #bc7a00; } /* Preprocessor */
+code > span.at { color: #7d9029; } /* Attribute */
+code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
+code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
+code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
+code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
+</style>
+
+
+
+<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20bot [...]
+
+</head>
+
+<body>
+
+
+
+
+<h1 class="title toc-ignore">Comparing DRR and PCA</h1>
+<h4 class="author"><em>Guido Kraemer</em></h4>
+<h4 class="date"><em>2016-09-15</em></h4>
+
+
+
+<p>This is an example application to compare the accuracy and computational speed of DRR for different parameters to PCA.</p>
+<div id="load-libraries" class="section level2">
+<h2>Load libraries</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(DRR)</code></pre></div>
+</div>
+<div id="read-in-data" class="section level2">
+<h2>Read in data</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">data</span>(iris)
+
+in_data <-<span class="st"> </span>iris[,<span class="dv">1</span>:<span class="dv">4</span>]
+
+npoints <-<span class="st"> </span><span class="kw">nrow</span>(in_data)
+nvars <-<span class="st"> </span><span class="kw">ncol</span>(in_data)
+for (i in <span class="kw">seq_len</span>(nvars)) in_data[[i]] <-<span class="st"> </span><span class="kw">as.numeric</span>(in_data[[i]])
+my_data <-<span class="st"> </span><span class="kw">scale</span>(in_data[<span class="kw">sample</span>(npoints),], <span class="dt">scale =</span> <span class="ot">FALSE</span>)</code></pre></div>
+</div>
+<div id="fit-the-dimensionality-reductions." class="section level2">
+<h2>Fit the dimensionality reductions.</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">t0 <-<span class="st"> </span><span class="kw">system.time</span>(pca   <-<span class="st"> </span><span class="kw">prcomp</span>(my_data, <span class="dt">center =</span> <span class="ot">FALSE</span>, <span class="dt">scale. =</span> <span class="ot">FALSE</span>))
+t1 <-<span class="st"> </span><span class="kw">system.time</span>(drr<span class="fl">.1</span> <-<span class="st"> </span><span class="kw">drr</span>(my_data))
+t2 <-<span class="st"> </span><span class="kw">system.time</span>(drr<span class="fl">.2</span> <-<span class="st"> </span><span class="kw">drr</span>(my_data, <span class="dt">fastkrr =</span> <span class="dv">2</span>))
+t3 <-<span class="st"> </span><span class="kw">system.time</span>(drr<span class="fl">.3</span> <-<span class="st"> </span><span class="kw">drr</span>(my_data, <span class="dt">fastkrr =</span> <span class="dv">5</span>))
+t4 <-<span class="st"> </span><span class="kw">system.time</span>(drr<span class="fl">.4</span> <-<span class="st"> </span><span class="kw">drr</span>(my_data, <span class="dt">fastkrr =</span> <span class="dv">2</span>, <span class="dt">fastcv =</span> <span class="ot">TRUE</span>))</code></pre></div>
+</div>
+<div id="plot-the-data" class="section level2">
+<h2>Plot the data</h2>
+<p><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAASAAAAEgCAMAAAAjXV6yAAADAFBMVEUAAAABAQECAgIDAwMEBAQFBQUGBgYHBwcICAgJCQkKCgoLCwsMDAwNDQ0ODg4PDw8QEBARERESEhITExMUFBQVFRUWFhYXFxcYGBgZGRkaGhobGxscHBwdHR0eHh4fHx8gICAhISEiIiIjIyMkJCQlJSUmJiYnJycoKCgpKSkqKiorKyssLCwtLS0uLi4vLy8wMDAxMTEyMjIzMzM0NDQ1NTU2NjY3Nzc4ODg5OTk6Ojo7Ozs8PDw9PT0+Pj4/Pz9AQEBBQUFCQkJDQ0NERERFRUVGRkZHR0dISEhJSUlKSkpLS0tMTExNTU1OTk5PT09QUFBRUVFSUlJTU1NUVFRVVVVWVlZXV1dYWFhZWVlaWlpbW1tcXFxdXV1eXl5fX19gYGBhYWFiYmJjY2NkZ [...]
+</div>
+<div id="calculate-rmse" class="section level2">
+<h2>Calculate RMSE</h2>
+<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">rmse <-<span class="st"> </span><span class="kw">matrix</span>(<span class="ot">NA_real_</span>, <span class="dt">nrow =</span> <span class="dv">5</span>, <span class="dt">ncol =</span> nvars, <span class="dt">dimnames =</span> <span class="kw">list</span>(<span class="kw">c</span>(<span class="st">"pca"</span>, <span class="st">"drr.1"</span>, 
+    <span class="st">"drr.2"</span>, <span class="st">"drr.3"</span>, <span class="st">"drr.4"</span>), <span class="kw">seq_len</span>(nvars)))
+
+for (i in <span class="kw">seq_len</span>(nvars)) {
+    pca_inv <-<span class="st"> </span>pca$x[, <span class="dv">1</span>:i, drop =<span class="st"> </span><span class="ot">FALSE</span>] %*%<span class="st"> </span><span class="kw">t</span>(pca$rotation[, <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>])
+    rmse[<span class="st">"pca"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>pca_inv)^<span class="dv">2</span>))
+    rmse[<span class="st">"drr.1"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>drr<span class="fl">.1</span>$<span class="kw">inverse</span>(drr<span class="fl">.1</span>$fitted.data[, 
+        <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>]))^<span class="dv">2</span>))
+    rmse[<span class="st">"drr.2"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>drr<span class="fl">.2</span>$<span class="kw">inverse</span>(drr<span class="fl">.2</span>$fitted.data[, 
+        <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>]))^<span class="dv">2</span>))
+    rmse[<span class="st">"drr.3"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>drr<span class="fl">.3</span>$<span class="kw">inverse</span>(drr<span class="fl">.3</span>$fitted.data[, 
+        <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>]))^<span class="dv">2</span>))
+    rmse[<span class="st">"drr.4"</span>, i] <-<span class="st"> </span><span class="kw">sqrt</span>(<span class="kw">sum</span>((my_data -<span class="st"> </span>drr<span class="fl">.4</span>$<span class="kw">inverse</span>(drr<span class="fl">.4</span>$fitted.data[, 
+        <span class="dv">1</span>:i, <span class="dt">drop =</span> <span class="ot">FALSE</span>]))^<span class="dv">2</span>))
+}</code></pre></div>
+</div>
+<div id="the-results" class="section level2">
+<h2>The Results</h2>
+<p>More blocks for fastkrr speed up calculation, too are bad for accuracy.</p>
+<div id="rmse" class="section level3">
+<h3>RMSE</h3>
+<pre><code>##              1        2        3            4
+## pca   7.166770 3.899313 1.884524 1.732772e-14
+## drr.1 5.469965 3.458503 1.709825 1.675732e-14
+## drr.2 5.478541 3.487811 1.645224 1.674559e-14
+## drr.3 5.552791 3.499912 1.668842 1.674177e-14
+## drr.4 5.570917 3.690015 1.642986 1.674143e-14</code></pre>
+</div>
+<div id="processing-time" class="section level3">
+<h3>Processing time</h3>
+<pre><code>##       user.self sys.self elapsed
+## pca       0.000    0.000   0.001
+## drr.1    30.412   51.860  24.329
+## drr.2    20.200   34.372  15.491
+## drr.3    39.788   67.140  32.411
+## drr.4    22.648   21.956  19.565</code></pre>
+</div>
+</div>
+
+
+
+<!-- dynamically load mathjax for compatibility with self-contained -->
+<script>
+  (function () {
+    var script = document.createElement("script");
+    script.type = "text/javascript";
+    script.src  = "https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
+    document.getElementsByTagName("head")[0].appendChild(script);
+  })();
+</script>
+
+</body>
+</html>
diff --git a/man/DRR-package.Rd b/man/DRR-package.Rd
new file mode 100644
index 0000000..6070b69
--- /dev/null
+++ b/man/DRR-package.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DRR-package.R
+\docType{package}
+\name{DRR-package}
+\alias{DRR}
+\alias{DRR-package}
+\title{Dimensionality Reduction via Regression.}
+\description{
+DRR implements the Dimensionality Reduction via Regression using
+Kernel Ridge Regression. It also adds a faster implementation of
+Kernel Ridge regression that can be used with the CVST package.
+}
+\details{
+Thanks to the Max Planck Institute for Biogeochemistry in Jena for
+the funding.
+}
+\references{
+Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
+    Reduction via Regression in Hyperspectral Imagery. IEEE Journal
+    of Selected Topics in Signal Processing 9,
+    1026-1036. doi:10.1109/JSTSP.2015.2417833
+Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
+    Kernel Ridge Regression: A Distributed Algorithm with Minimax
+    Optimal Rates. arXiv:1305.5029 [cs, math, stat].
+}
+
diff --git a/man/constructFastKRRLearner.Rd b/man/constructFastKRRLearner.Rd
new file mode 100644
index 0000000..8f1a616
--- /dev/null
+++ b/man/constructFastKRRLearner.Rd
@@ -0,0 +1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fastKRR.R
+\name{constructFastKRRLearner}
+\alias{constructFastKRRLearner}
+\title{Fast implementation for Kernel Ridge Regression.}
+\usage{
+constructFastKRRLearner()
+}
+\value{
+Returns a learner similar to \code{\link[CVST]{constructKRRLearner}}
+    suitable for the use with \code{\link[CVST]{CV}} and
+    \code{\link[CVST]{fastCV}}.
+}
+\description{
+Constructs a learner for the divide and conquer version of KRR.
+}
+\details{
+This function is to be used with the CVST package as a drop in
+replacement for \code{\link[CVST]{constructKRRLearner}}. The
+implementation approximates the inversion of the kernel Matrix
+using the divide an conquer scheme, lowering computational and
+memory complexity from \eqn{O(n^3)} and \eqn{O(n^2)} to
+\eqn{O(n^3/m^2)} and \eqn{O(n^2/m^2)} respectively, where m are the
+number of blocks to be used (parameter nblocks). Theoretically safe
+values for \eqn{m} are \eqn{< n^{1/3}}, but practically \eqn{m} may
+be a little bit larger. The function will issue a warning, if the
+value for \eqn{m} is too large.
+}
+\examples{
+ns <- noisySinc(1000)
+nsTest <- noisySinc(1000)
+
+fast.krr <- constructFastKRRLearner()
+fast.p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns), nblocks = 4)
+system.time(fast.m <- fast.krr$learn(ns, fast.p))
+fast.pred <- fast.krr$predict(fast.m, nsTest)
+sum((fast.pred - nsTest$y)^2) / getN(nsTest)
+
+\dontrun{
+krr <- CVST::constructKRRLearner()
+p <- list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns))
+system.time(m <- krr$learn(ns, p))
+pred <- krr$predict(m, nsTest)
+sum((pred - nsTest$y)^2) / getN(nsTest)
+
+plot(ns, col = '#00000030', pch = 19)
+lines(sort(nsTest$x), fast.pred[order(nsTest$x)], col = '#00C000', lty = 2)
+lines(sort(nsTest$x), pred[order(nsTest$x)], col = '#0000C0', lty = 2)
+legend('topleft', legend = c('fast KRR', 'KRR'),
+       col = c('#00C000', '#0000C0'), lty = 2)
+}
+
+}
+\references{
+Zhang, Y., Duchi, J.C., Wainwright, M.J., 2013. Divide and Conquer
+    Kernel Ridge Regression: A Distributed Algorithm with Minimax
+    Optimal Rates. arXiv:1305.5029 [cs, math, stat].
+}
+\seealso{
+\code{\link[CVST]{constructLearner}}
+}
+
diff --git a/man/drr.Rd b/man/drr.Rd
new file mode 100644
index 0000000..7c08026
--- /dev/null
+++ b/man/drr.Rd
@@ -0,0 +1,132 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/DRR.R
+\name{drr}
+\alias{drr}
+\title{Dimensionality Reduction via Regression}
+\usage{
+drr(X, ndim = ncol(X), lambda = c(0, 10^(-3:2)), kernel = "rbfdot",
+  kernel.pars = list(sigma = 10^(-3:4)), pca = TRUE, pca.center = TRUE,
+  pca.scale = FALSE, fastcv = FALSE, cv.folds = 5, fastcv.test = NULL,
+  fastkrr.nblocks = 4, verbose = TRUE)
+}
+\arguments{
+\item{X}{input data, a matrix.}
+
+\item{ndim}{the number of output dimensions and regression
+functions to be estimated, see details for inversion.}
+
+\item{lambda}{the penalty term for the Kernel Ridge Regression.}
+
+\item{kernel}{a kernel function or string, see
+\code{\link[kernlab]{kernel-class}} for details.}
+
+\item{kernel.pars}{a list with parameters for the kernel. each
+parameter can be a vector, crossvalidation will choose the best
+combination.}
+
+\item{pca}{logical, do a preprocessing using pca.}
+
+\item{pca.center}{logical, center data before applying pca.}
+
+\item{pca.scale}{logical, scale data before applying pca.}
+
+\item{fastcv}{if \code{TRUE} uses \code{\link[CVST]{fastCV}}, if
+\code{FALSE} uses \code{\link[CVST]{CV}} for crossvalidation.}
+
+\item{cv.folds}{if using normal crossvalidation, the number of
+folds to be used.}
+
+\item{fastcv.test}{an optional separate test data set to be used
+for \code{\link[CVST]{fastCV}}, handed over as option
+\code{test} to \code{\link[CVST]{fastCV}}.}
+
+\item{fastkrr.nblocks}{the number of blocks used for fast KRR,
+higher numbers are faster to compute but may introduce
+numerical inaccurracies, see
+\code{\link{constructFastKRRLearner}} for details.}
+
+\item{verbose}{logical, should the crossvalidation report back.}
+}
+\value{
+A list the following items:
+\itemize{
+ \item {"fitted.data"} The data in reduced dimensions.
+ \item {"pca.means"} The means used to center the original data.
+ \item {"pca.scale"} The standard deviations used to scale the original data.
+ \item {"pca.rotation"} The rotation matrix of the PCA.
+ \item {"models"} A list of models used to estimate each dimension.
+ \item {"apply"} A function to fit new data to the estimated model.
+ \item {"inverse"} A function to untransform data. 
+}
+}
+\description{
+\code{drr} Implements Dimensionality Reduction via Regression using
+Kernel Ridge Regression.
+}
+\details{
+Parameter combination will be formed and cross-validation used to
+select the best combination. Cross-validation uses
+\code{\link[CVST]{CV}} or \code{\link[CVST]{fastCV}}.
+
+Pre-treatment of the data using a PCA and scaling is made
+\eqn{\alpha = Vx}.  the representation in reduced dimensions is
+
+\deqn{y_i = \alpha - f_i(\alpha_1, \ldots, \alpha_{i-1})}
+
+then the final DRR representation is:
+
+\deqn{r = (\alpha_1, y_2, y_3, \ldots,y_d)}
+
+DRR is invertible by
+
+\deqn{\alpha_i = y_i + f_i(\alpha_1,\alpha_2, \ldots, alpha_{i-1})}
+
+If less dimensions are estimated, there will be less inverse
+functions and calculating the inverse will be inaccurate.
+}
+\examples{
+tt <- seq(0,4*pi, length.out = 200)
+helix <- cbind(
+  x = 3 * cos(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
+  y = 3 * sin(tt) + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt))),
+  z = 2 * tt      + rnorm(length(tt), sd = seq(0.1, 1.4, length.out = length(tt)))
+)
+system.time(
+drr.fit  <- drr(helix, ndim = 3, cv.folds = 4,
+                lambda = 10^(-2:1),
+                kernel.pars = list(sigma = 10^(0:3)),
+                fastkrr.nblocks = 2, verbose = TRUE,
+                fastcv = FALSE)
+)
+
+\dontrun{
+library(rgl)
+plot3d(helix)
+points3d(drr.fit$inverse(drr.fit$fitted.data[,1,drop = FALSE]), col = 'blue')
+points3d(drr.fit$inverse(drr.fit$fitted.data[,1:2]),             col = 'red')
+
+plot3d(drr.fit$fitted.data)
+pad <- -3
+fd <- drr.fit$fitted.data
+xx <- seq(min(fd[,1])      , max(fd[,1])      , length.out = 25)
+yy <- seq(min(fd[,2]) - pad, max(fd[,2]) + pad, length.out = 5)
+zz <- seq(min(fd[,3]) - pad, max(fd[,3]) + pad, length.out = 5)
+
+dd <- as.matrix(expand.grid(xx, yy, zz))
+plot3d(helix)
+for(y in yy) for(x in xx)
+  rgl.linestrips(drr.fit$inverse(cbind(x, y, zz)), col = 'blue')
+for(y in yy) for(z in zz)
+  rgl.linestrips(drr.fit$inverse(cbind(xx, y, z)), col = 'blue')
+for(x in xx) for(z in zz)
+  rgl.linestrips(drr.fit$inverse(cbind(x, yy, z)), col = 'blue')
+}
+
+}
+\references{
+Laparra, V., Malo, J., Camps-Valls, G., 2015. Dimensionality
+    Reduction via Regression in Hyperspectral Imagery. IEEE Journal
+    of Selected Topics in Signal Processing 9,
+    1026-1036. doi:10.1109/JSTSP.2015.2417833
+}
+
diff --git a/vignettes/comparePCA.Rmd b/vignettes/comparePCA.Rmd
new file mode 100644
index 0000000..8d7cf47
--- /dev/null
+++ b/vignettes/comparePCA.Rmd
@@ -0,0 +1,84 @@
+---
+title: "Comparing DRR and PCA"
+author: "Guido Kraemer"
+date: "`r Sys.Date()`"
+output: rmarkdown::html_vignette
+vignette: >
+    %\VignetteIndexEntry{Compare DRR and PCA}
+    %\VignetteEngine{knitr::rmarkdown}
+    %\VignetteEncoding{UTF-8}
+---
+
+This is an example application to compare the accuracy and
+computational speed of DRR for different parameters to PCA.
+
+## Load libraries
+```{r, echo = TRUE, message = FALSE}
+library(DRR)
+```
+
+## Read in data
+```{r, echo = TRUE, warning = FALSE, error = FALSE}
+data(iris)
+
+in_data <- iris[,1:4]
+
+npoints <- nrow(in_data)
+nvars <- ncol(in_data)
+for (i in seq_len(nvars)) in_data[[i]] <- as.numeric(in_data[[i]])
+my_data <- scale(in_data[sample(npoints),], scale = FALSE)
+```
+
+## Fit the dimensionality reductions.
+```{r, echo = TRUE, results = "hide", warning = FALSE, error = FALSE, message = FALSE}
+t0 <- system.time(pca   <- prcomp(my_data, center = FALSE, scale. = FALSE))
+t1 <- system.time(drr.1 <- drr(my_data))
+t2 <- system.time(drr.2 <- drr(my_data, fastkrr = 2))
+t3 <- system.time(drr.3 <- drr(my_data, fastkrr = 5))
+t4 <- system.time(drr.4 <- drr(my_data, fastkrr = 2, fastcv = TRUE))
+```
+
+
+## Plot the data
+```{r, echo = FALSE, results = "hold"}
+pairs(my_data          , gap = 0, main = "iris")
+pairs(pca$x            , gap = 0, main = "pca")
+pairs(drr.1$fitted.data, gap = 0, main = "drr.1")
+pairs(drr.2$fitted.data, gap = 0, main = "drr.2")
+pairs(drr.3$fitted.data, gap = 0, main = "drr.3")
+pairs(drr.4$fitted.data, gap = 0, main = "drr.4")
+```
+
+## Calculate RMSE
+```{r, echo = TRUE, tidy = TRUE}
+rmse <- matrix(NA_real_, nrow = 5, ncol = nvars,
+               dimnames = list(c("pca", "drr.1", "drr.2", "drr.3", "drr.4"),
+                               seq_len(nvars)))
+
+for(i in seq_len(nvars)){
+    pca_inv <- pca$x[, 1:i, drop = FALSE] %*% t(pca$rotation[, 1:i, drop = FALSE])
+    rmse["pca",  i] <- sqrt( sum( (my_data - pca_inv                                               )^2) )
+    rmse["drr.1",i] <- sqrt( sum( (my_data - drr.1$inverse(drr.1$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.2",i] <- sqrt( sum( (my_data - drr.2$inverse(drr.2$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.3",i] <- sqrt( sum( (my_data - drr.3$inverse(drr.3$fitted.data[, 1:i, drop = FALSE]) )^2) )
+    rmse["drr.4",i] <- sqrt( sum( (my_data - drr.4$inverse(drr.4$fitted.data[, 1:i, drop = FALSE]) )^2) )
+}
+```
+
+## The Results
+More blocks for fastkrr speed up calculation, too are bad for
+accuracy.
+
+### RMSE
+```{r, echo = FALSE}
+print(rmse)
+```
+
+### Processing time
+```{r, echo = FALSE}
+print(rbind(pca = t0, drr.1 = t1, drr.2 = t2, drr.3 = t3, drr.4 = t4)[,1:3])
+```
+
+
+
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-drr.git



More information about the debian-science-commits mailing list