[r-cran-cvst] 01/02: New upstream version 0.2-1

Andreas Tille tille at debian.org
Sun Oct 22 21:36:18 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-cran-cvst.

commit eb4cfc29b331b1e4dbc5bc21f9dedb376425a38a
Author: Andreas Tille <tille at debian.org>
Date:   Sun Oct 22 23:35:46 2017 +0200

    New upstream version 0.2-1
---
 DESCRIPTION                    |  14 ++
 MD5                            |  17 +++
 NAMESPACE                      |   1 +
 R/CV.R                         | 244 ++++++++++++++++++++++++++++++++++
 R/methods.R                    | 127 ++++++++++++++++++
 R/util.R                       | 295 +++++++++++++++++++++++++++++++++++++++++
 README.md                      |  12 ++
 man/CV.Rd                      |  60 +++++++++
 man/CVST-package.Rd            |  67 ++++++++++
 man/cochranq.test.Rd           |  50 +++++++
 man/constructCVSTModel.Rd      |  63 +++++++++
 man/constructData.Rd           |  62 +++++++++
 man/constructLearner.Rd        | 102 ++++++++++++++
 man/constructParams.Rd         |  37 ++++++
 man/constructSequentialTest.Rd |  83 ++++++++++++
 man/fastCV.Rd                  |  73 ++++++++++
 man/noisyDonoho.Rd             |  64 +++++++++
 man/noisySine.Rd               |  57 ++++++++
 18 files changed, 1428 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..c968535
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,14 @@
+Package: CVST
+Type: Package
+Title: Fast Cross-Validation via Sequential Testing
+Version: 0.2-1
+Date: 2013-12-10
+Depends: kernlab,Matrix
+Author: Tammo Krueger, Mikio Braun
+Maintainer: Tammo Krueger <tammokrueger at googlemail.com>
+Description: This package implements the fast cross-validation via sequential testing (CVST) procedure. CVST is an improved cross-validation procedure which uses non-parametric testing coupled with sequential analysis to determine the best parameter set on linearly increasing subsets of the data. By eliminating underperforming candidates quickly and keeping promising candidates as long as possible, the method speeds up the computation while preserving the capability of a full cross-valid [...]
+License: GPL (>= 2.0)
+Packaged: 2013-12-10 13:06:36 UTC; tammok
+NeedsCompilation: no
+Repository: CRAN
+Date/Publication: 2013-12-10 14:50:04
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..f861177
--- /dev/null
+++ b/MD5
@@ -0,0 +1,17 @@
+635d31fd315c55401a3ce30a4f096481 *DESCRIPTION
+8b54e5a89fbda3af5e077053d40bec76 *NAMESPACE
+209666bfdd1d76a79ee11af8fa919a61 *R/CV.R
+807866a5ca52a90f7ce267e28e41704b *R/methods.R
+cf33cdc93c9b2aa2929369a658eab977 *R/util.R
+d397398aac7e3352aa7e1ed6234dc491 *README.md
+889feb1c2dba681c0b85199ec33693b9 *man/CV.Rd
+ebb263537ff83916c7c7ffd97b89e97d *man/CVST-package.Rd
+22f0b3ced43046e9c6c832213c612a99 *man/cochranq.test.Rd
+2c840dfb7798526a5726f93bbc8e5d50 *man/constructCVSTModel.Rd
+101d0d1145f69a67cc133b224ac3a5f0 *man/constructData.Rd
+bc6a2af0e2ba849b839fd41a26768399 *man/constructLearner.Rd
+53cdd42ecc89dd8f52f45af2367fcc78 *man/constructParams.Rd
+1a7e9e2bc332e7bae0a611cfe5797269 *man/constructSequentialTest.Rd
+5f8fb944813989e373ff9c9f24513cc7 *man/fastCV.Rd
+d4befef987e30fa1eaa09def9e5dc3f1 *man/noisyDonoho.Rd
+3bfc25412298e3b10174bbf080dc2573 *man/noisySine.Rd
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..d75f824
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/R/CV.R b/R/CV.R
new file mode 100644
index 0000000..19bc455
--- /dev/null
+++ b/R/CV.R
@@ -0,0 +1,244 @@
+CV = function(data, learner, params, fold=5, verbose=TRUE) {
+  stopifnot(class(learner) == "CVST.learner" &&
+            class(data) == "CVST.data" &&
+            class(params) == "CVST.params")
+  nParams = length(params)
+  dimnames = list(as.character(1:fold), names(params))
+
+  results = matrix(0, fold, nParams, dimnames=dimnames)
+  size = getN(data) / fold
+
+  for (ind in 1:nParams) {
+    p = params[[ind]]
+    for (f in 1:fold) {
+      validationIndex = seq((f-1)*size + 1, f*size)
+      curTrain = getSubset(data, -validationIndex)
+      curTest = getSubset(data, validationIndex)
+      # either mean squared error or mean classification error
+      results[f, ind] = mean(.getResult(curTrain, curTest, learner, p))
+    }
+    if (verbose) {
+      cat(names(params)[ind], "(", mean(results[, ind]), ")\n")
+    }
+  }
+  winner = which.min(apply(results, 2, mean))
+  if (length(winner) == 0) {
+    return(NULL)
+  }
+  else {
+    return(params[winner])
+  }
+}
+
+# the function to perform fastcrossvalidation:
+#
+# train: training data CVST.data
+#
+# learner: the learner as CVST.learner
+#
+# params: list of parameters for the learner as CVST.params
+#
+# setup: setup of the CVST as CVST.setup
+#
+# test: either the test data for fixed test error setting or NULL, if
+# the adjusted test error setting should be used
+fastCV = function(train, learner, params, setup, test=NULL, verbose=TRUE) {
+  stopifnot(class(learner) == "CVST.learner" && class(train) == "CVST.data" &&
+            class(params) == "CVST.params" && class(setup) == "CVST.setup" &&
+            (is.null(test) || class(test) == "CVST.data"))
+  isClassificationTask = isClassification(train)
+  regressionSimilarityViaOutliers = setup$regressionSimilarityViaOutliers
+  earlyStopping = setup$earlyStoppingSignificance
+  similarity = setup$similaritySignificance
+  # use nested modeling, i.e. we start with the first minimalModel number of
+  # data points and in each step subsequently add minimalModel data points to it
+  nestModel = TRUE
+  earlyStoppingWindow = setup$earlyStoppingWindow
+
+  if (is.null(test)) {
+    # we are in the adjusted test error setting, therefore we have to keep
+    # an additional slice of the data for the last test
+    minimalModel = getN(train) / (setup$steps + 1)
+    n = getN(train) - minimalModel
+  }
+  else {
+    minimalModel = getN(train) / setup$steps
+    n = getN(train)
+  }
+
+  N = seq(minimalModel, n, by=minimalModel)
+  st = getCVSTTest(setup$steps, setup$beta, setup$alpha)
+  nParams = length(params)
+  if (verbose) {
+    cat("Total number of params:", nParams, "\n")
+  }
+  dimnames = list(names(params), as.character(N))
+  traces = matrix(0, nParams, length(N), dimnames=dimnames)
+  success = matrix(0, nParams, length(N), dimnames=dimnames)
+  skipCalculation = rep(FALSE, nParams)
+  isEarlyStopping = FALSE
+  stoppedAt = length(N)
+  activeConfigurations = matrix(FALSE, nParams, length(N), dimnames=dimnames)
+  configurationsLeft = nParams
+  
+  for (ind in 1:length(N)) {
+    n = N[ind]
+    if (!isClassificationTask && regressionSimilarityViaOutliers) {
+      err = .calculateErrors(train, test, n, learner, params, skipCalculation, squared=FALSE)
+      success[, ind] = apply(err^2, 1, mean)
+    }
+    else {
+      err = .calculateErrors(train, test, n, learner, params, skipCalculation)
+      success[, ind] = apply(err, 1, mean)
+    }
+    
+    success[, ind] = apply(err, 1, mean)
+    indByError = sort.list(success[, ind], decreasing=FALSE, na.last=TRUE)
+    traces[indByError[1], ind] = 1
+    sortedErrors = t(err[indByError, ])
+    if (!isClassificationTask && regressionSimilarityViaOutliers) {
+      s = apply(sortedErrors, 2, sd)
+      sortedErrors = t(abs(t(sortedErrors)) > s * qnorm(1 - (similarity / 2)))
+    }
+    adjustedSignificance = similarity / (configurationsLeft - 1)
+    for (k in 2:length(indByError)) {
+      if (is.na(success[indByError[k], ind])) {
+        # we either have an unsufficient model, which gives us NA as result...
+        # ... or reached the skipCalculation, so we can stop our procedure
+        break
+      }
+      if (isClassificationTask) {
+        pvalue = cochranq.test(sortedErrors[, 1:k])$p.value
+      }
+      else {
+        if (regressionSimilarityViaOutliers) {
+          pvalue = cochranq.test(sortedErrors[, 1:k])$p.value
+        }
+        else {
+          pvalue = friedman.test(sortedErrors[, 1:k])$p.value
+        }
+      }
+      if (!is.nan(pvalue) && pvalue <= adjustedSignificance) {
+        break
+      }
+      traces[indByError[k], ind] = 1
+    }
+    if (verbose) {
+      cat("(sim:", sum(traces[, ind]), "alpha:", similarity, "left:", configurationsLeft, ")")
+    }
+    # do the testing here...
+    # check for loosers
+    if (ind > 1) {
+      testResults = apply(traces[, 1:ind], 1, testSequence, st=st)
+      # check for loosers
+      skipCalculation = (testResults == -1)
+      if (verbose) {
+        cat("Skipped configurations:", sum(skipCalculation), " ")
+      }
+    }
+    configurationsLeft = nParams - sum(skipCalculation)
+    activeConfigurations[, ind] = !skipCalculation
+    # check for early stopping
+    if (earlyStoppingWindow >= 2 && ind > earlyStoppingWindow && earlyStopping < 1.0) {
+      # check, whether all remaining parameters perform similar
+      if (sum(!skipCalculation) > 1)
+        pvalue = cochranq.test(t(traces[!skipCalculation, (ind-earlyStoppingWindow+1):ind]))$p.value
+      else {
+        pvalue = 1.0
+      }
+      if (!is.nan(pvalue) && pvalue > earlyStopping) {
+        if (verbose) {
+          cat("EARLY STOPPING!")
+        }
+        isEarlyStopping = TRUE
+        stoppedAt = ind
+        break
+      }
+      # just go on, if they are signifcantly dissimilar!
+    }
+  }
+  if (verbose) {
+    cat("\n")
+  }
+  theWinners = !skipCalculation
+  ret = list(traces=traces, success=success)
+  ret$numberOfPotentialWinners = sum(theWinners)
+  ret$isEarlyStopping = isEarlyStopping
+  ret$stoppedAt = stoppedAt
+  ret$activeConfigurations = activeConfigurations
+  ret$earlyStoppingWindow = earlyStoppingWindow
+  winningConfiguration = .getOptimalSolution(ret)
+
+  ret$param = params[winningConfiguration]
+  ret$winningConfiguration = winningConfiguration
+
+  return(params[winningConfiguration])
+}
+
+# returns a (# configuration) X (# testsamples) matrix containing 0/1 or squared error at
+# position i, j if the model learned on N data points of traindata
+# with configuration i labeled point j of the testdata
+# correctly or not. skipCalculation controls, which confguration should be
+# skipped. A NA in the returned matrix corresponds to skipped configuration.
+.calculateErrors = function(traindata, testdata, N, learner, params, skipCalculation, squared=TRUE) {
+  nestModel = TRUE
+  nPars = length(params)
+
+  if (nestModel) {
+    sampleIndex = 1:N
+  }
+  else {
+    sampleIndex = sample.int(getN(traindata), N)
+  }
+  # if no test data is available, we have the adjusted test error settings,
+  # i.e. we use the rest of the train data, which is not used for model building
+  # to determine the test error
+  if (is.null(testdata)) {
+    testdata = getSubset(traindata, -sampleIndex)
+  }
+  # initialize results
+  results = matrix(NA, nPars, getN(testdata))
+  # calculate results
+  curTrain = getSubset(traindata, sampleIndex)
+  for (ind in 1:nPars) {
+    param = params[[ind]]
+    if (!is.null(skipCalculation) && skipCalculation[ind]) {
+      next
+    }
+    results[ind, ] = as.vector(.getResult(curTrain, testdata, learner, param, squared=squared))
+  }
+  return(results)
+}
+
+.getOptimalSolution = function(paramRace) {
+  remainingConfs = paramRace$activeConfigurations[, paramRace$stoppedAt]
+  if (sum(remainingConfs) == 1) {
+    return(remainingConfs)
+  }
+  # pick the race, which has the smallest mean rank inside
+  # the earlyStoppingWindow:
+  lastSuccess = paramRace$success[remainingConfs, (paramRace$stoppedAt - paramRace$earlyStoppingWindow + 1):paramRace$stoppedAt]
+  meanRank = apply(apply(lastSuccess, 2, rank), 1, mean)
+  # breaks ties at random
+  overallWinner = which(remainingConfs)[.which.is.min(meanRank)]
+
+  ret = rep(FALSE, nrow(paramRace$traces))
+  names(ret) = rownames(paramRace$traces)
+  ret[overallWinner] = TRUE
+  return(ret)
+}
+
+.which.is.min = function (x) {
+  y = seq_along(x)[x == min(x)]
+  if (length(y) > 1) {
+    y = sample(y, 1)
+  }
+  return(y)
+}
+
+getCVSTTest = function(steps, beta=.1, alpha=.01) {
+  pi1 = .5 * ((1 - beta) / alpha)^(1/steps)
+  sst = constructSequentialTest(.5, pi1, beta, alpha)
+  sst$steps = steps
+  return(sst)
+}
diff --git a/R/methods.R b/R/methods.R
new file mode 100644
index 0000000..6bd64a8
--- /dev/null
+++ b/R/methods.R
@@ -0,0 +1,127 @@
+constructSVRLearner = function() {
+  learn.svr = function(data, params) {
+    #require(kernlab)
+    stopifnot(isRegression(data))
+    kpar=params[setdiff(names(params), c("kernel", "nu", "C"))]
+    return(ksvm(data$x, data$y, kernel=params$kernel, kpar=kpar, type="nu-svr", nu=params$nu, C=params$C / getN(data), scale=FALSE))
+  }
+  
+  predict.svr = function(model, newData) {
+    stopifnot(isRegression(newData))
+    return(predict(model, newData$x))
+  }
+  return(constructLearner(learn.svr, predict.svr))
+}
+
+constructSVMLearner = function() {
+  learn.svm = function(data, params) {
+    #require(kernlab)
+    stopifnot(isClassification(data))    
+    kpar=params[setdiff(names(params), c("kernel", "nu"))]
+    return(ksvm(data$x, data$y, kernel=params$kernel, kpar=kpar, type="nu-svc", nu=params$nu, scale=FALSE))
+  }
+  
+  predict.svm = function(model, newData) {
+    stopifnot(isClassification(newData))    
+    return(predict(model, newData$x))
+  }
+  return(constructLearner(learn.svm, predict.svm))
+}
+
+constructKlogRegLearner = function() {
+  learn.klogreg = function(data, params) {
+    #require(kernlab)
+    stopifnot(isClassification(data))    
+    # convert the factor to numeric 0/1
+    if (nlevels(data$y) > 2) {
+      stop("klogreg does not support multiclass experiments")
+    }
+    y = (data$y != levels(data$y)[1]) + 0
+    kpar = params[setdiff(names(params), c("kernel", "lambda", "tol", "maxiter"))]
+    kernel = do.call(params$kernel, kpar)
+    model = .klogreg(data$x, kernel, y, getN(data) * params$lambda, params$tol, params$maxiter)
+    model$yLevels = levels(data$y)
+    return(model)
+  }
+  
+  predict.klogreg = function(model, newData) {
+    stopifnot(isClassification(newData))    
+    pred = .klogreg.predict(model, newData$x)
+    f = factor(pred, c("0", "1"), model$yLevels, ordered=FALSE)
+    return(f)
+  }
+  return(constructLearner(learn.klogreg, predict.klogreg))
+}
+
+constructKRRLearner = function() {
+  learn.krr = function(data, params) {
+    #require(kernlab)
+    stopifnot(isRegression(data))
+    kpar = params[setdiff(names(params), c("kernel", "lambda"))]
+    kernel = do.call(params$kernel, kpar)
+    return(.krr(data$x, kernel, data$y, getN(data) * params$lambda))
+  }
+  
+  predict.krr = function(model, newData) {
+    stopifnot(isRegression(newData))
+    return(as.matrix(.krr.predict(newData$x, model)))
+  }
+  return(constructLearner(learn.krr, predict.krr))
+}
+
+.krr = function(data, kernel, y, lambda) {
+  #require(kernlab)
+  #require(Matrix)
+  K = kernelMatrix(kernel, data)
+  N = nrow(K)
+  alpha = solve(Matrix(K + diag(lambda, N))) %*% y
+  return(list(data=data, kernel=kernel, alpha=alpha))
+}
+
+.krr.predict = function(newData, krr) {
+  #require(kernlab)
+  k = kernelMatrix(krr$kernel, newData, krr$data)
+  return(k %*% krr$alpha)
+}
+
+.klogreg = function(data, kernel, labels, lambda, tol, maxiter) {
+  # labels should be 0/1
+  #require(kernlab)
+  #require(Matrix)
+  K = Matrix(kernelMatrix(kernel, data)@.Data)
+  N = nrow(K)
+  alpha = rep(1/N, N)
+  iter = 1
+  while (TRUE) {
+    Kalpha = as.vector(K %*% alpha)
+    spec = 1 + exp(-Kalpha)
+    pi = 1 / spec
+    diagW = pi * (1 - pi)
+    e = (labels - pi) / diagW
+    q = Kalpha + e
+    theSol = try(solve(K + lambda * Diagonal(x=1/diagW), q))
+    if (class(theSol) == "try-error") {
+      break
+    }
+    alphan = as.vector(theSol)
+    if (any(is.nan(alphan)) || all(abs(alphan - alpha) <= tol)) {
+      break
+    }
+    else if (iter > maxiter) {
+      cat("klogreg:maxiter!")
+      break
+    }
+    else {
+      alpha = alphan
+      iter = iter + 1
+    }
+  }
+  return(list(data=data, kernel=kernel, alpha=as.vector(alpha), pi=pi))
+}
+
+.klogreg.predict = function(klogreg, newData) {
+  #require(kernlab)
+  K = kernelMult(klogreg$kernel, newData, klogreg$data, klogreg$alpha)
+  pi = 1 / (1 + exp(-as.vector(K)))
+  return((pi >= .5) + 0)
+}
diff --git a/R/util.R b/R/util.R
new file mode 100644
index 0000000..943c5c2
--- /dev/null
+++ b/R/util.R
@@ -0,0 +1,295 @@
+# data is a list
+# x: either a list or a matrix containing the data rowwise
+# y: vector of labels/values
+constructData = function(x, y) {
+  stopifnot(is.list(x) || is.vector(x) || is.matrix(x))
+  stopifnot(is.list(y) || is.vector(y) || is.factor(y))
+  data = list(x=x, y=y)
+  class(data) = "CVST.data"
+  return(data)
+}
+
+getN = function(data) {
+  stopifnot(class(data) == "CVST.data")
+  if (is.list(data$x) || is.vector(data$x)) {
+    N = length(data$x)
+  }
+  else {
+    N = nrow(data$x)
+  }
+  return(N)
+}
+
+shuffleData = function(data) {
+  stopifnot(class(data) == "CVST.data")
+  shuffle = sample.int(getN(data))
+  return(getSubset(data, shuffle))
+}
+
+getSubset = function(data, subset) {
+  stopifnot(class(data) == "CVST.data")
+  x = getX(data, subset)
+  y = data$y[subset]
+  ret = constructData(x=x, y=y)
+  return(ret)
+}
+
+getX = function(data, subset=NULL) {
+  stopifnot(class(data) == "CVST.data")
+  if (is.null(subset)) {
+    ret = data$x
+  }
+  else {
+    if (is.list(data$x) || is.vector(data$x)) {
+      ret = data$x[subset]
+    }
+    else {
+      ret = data$x[subset, ,drop=FALSE]
+    }
+  }
+  return(ret)
+}
+
+isClassification = function(data) {
+  stopifnot(class(data) == "CVST.data")
+  return(is.factor(data$y))
+}
+
+isRegression = function(data) {
+  stopifnot(class(data) == "CVST.data")
+  return(!isClassification(data))
+}
+
+constructLearner = function(learn, predict) {
+  stopifnot(is.function(learn) && is.function(predict))
+  learner = list(learn=learn, predict=predict)
+  class(learner) = "CVST.learner"
+  return(learner)
+}
+
+constructCVSTModel = function(steps=10, beta=.1, alpha=.01, similaritySignificance=.05, earlyStoppingSignificance=.05, earlyStoppingWindow=3, regressionSimilarityViaOutliers=FALSE) {
+  ret = list(steps=steps,
+    beta=beta,
+    alpha=alpha,
+    similaritySignificance=similaritySignificance,
+    earlyStoppingSignificance=earlyStoppingSignificance,
+    earlyStoppingWindow=earlyStoppingWindow,
+    regressionSimilarityViaOutliers=regressionSimilarityViaOutliers)
+  class(ret) = "CVST.setup"
+  return(ret)
+}
+
+constructParams = function(...) {
+  pn = names(substitute(c(...)))[-1]
+  ret = expand.grid(..., stringsAsFactors=FALSE, KEEP.OUT.ATTRS = FALSE)
+  params = lapply(1:nrow(ret), function(ind) as.list(ret[ind, ]))
+  paramNames = lapply(1:nrow(ret), function(ind) paste(pn, ret[ind, ], sep="=", collapse=" "))
+  names(params) = paramNames
+  class(params) = "CVST.params"
+  return(params)
+}
+
+
+.getResult = function(train, test, learner, param, squared=TRUE) {
+  stopifnot(class(learner) == "CVST.learner" && class(train) == "CVST.data" && class(test) == "CVST.data")
+  model = try(learner$learn(train, param))
+  if (class(model) == "try-error") {
+    pred = rep(NA, length(test$y))
+  }
+  else {
+    pred = try(learner$predict(model, test))
+    if (class(pred) == "try-error") {
+      pred = rep(NA, length(test$y))
+    }
+  }
+  if (isClassification(test)) {
+    res = (test$y != pred)
+  }
+  else {
+    if (squared) {
+      res = (pred - test$y)^2
+    }
+    else {
+      res = (pred - test$y)
+    }
+  }
+  return(res)
+}
+
+cochranq.test = function(mat) {
+  cochransQtest = list(statistic = 0, parameter = 0, p.value = 1,
+    method = "Cochran's Q Test",
+    data.name = deparse(substitute(mat)))
+  class(cochransQtest) = "htest"
+
+  if (is.vector(mat) || any(dim(mat) <= 1)) {
+    return(cochransQtest)
+  }
+
+  # we expect the individuals in the rows, repetitions/treatments in the columns
+  m = ncol(mat)
+  df = m - 1
+  L = apply(mat, 1, sum)
+  index = (L > 0 & L < m)
+  if (sum(index) <= 1) {
+    # all rows are either one or zero... no effect!
+    return(cochransQtest)
+  }
+  
+  if (sum(index) * m <= 24) {
+    return(.perm.cochranq.test(mat[index, ]))
+  }
+
+  L = L[index]
+  T = apply(mat[index, ], 2, sum)
+  Q = ((m-1) * (m * sum(T^2) - sum(T)^2)) / (m * sum(L) - sum(L^2))
+  names(df) = "df"
+  names(Q) = "Cochran's Q"
+
+  if (is.nan(Q)) {
+    p.val = 1.0
+  }
+  else {
+    p.val = pchisq(Q, df, lower.tail=FALSE)
+  }
+  cochransQtest$statistic = Q
+  cochransQtest$parameter = df
+  cochransQtest$p.value = p.val
+  return(cochransQtest)
+} 
+
+.perm.cochranq.test = function(mat, nperm=1000) {
+  if (is.vector(mat) || any(dim(mat) <= 1)) {
+    cochransQtest = list(statistic = 0, parameter = 0, p.value = 1,
+      method = "Cochran's Q Test",
+      data.name = deparse(substitute(mat)))
+    class(cochransQtest) = "htest"
+    return(cochransQtest)
+  }
+  # we expect no straight zero or one-rows in mat
+  m = ncol(mat)
+  df = m - 1
+  L = apply(mat, 1, sum)
+  T = apply(mat, 2, sum)
+  quot = (m * sum(L) - sum(L^2))
+  Q = ((m-1) * (m * sum(T^2) - sum(T)^2)) / quot
+  names(df) = "df"
+  names(Q) = "Cochran's Q"
+  
+  permFun = function() {
+    newPerm = mat
+    for (i in 1:nrow(mat)) {
+        newPerm[i, ] = mat[i, sample(m)]
+      }
+    T = apply(newPerm, 2, sum)
+    Q = ((m-1) * (m * sum(T^2) - sum(T)^2)) / quot
+    return(Q)
+  }
+  
+  QS = replicate(nperm, permFun())
+  p.value = mean(QS >= Q)
+  cochransQtest = list(statistic = Q, parameter = df, p.value = p.value,
+    method = "Cochran's Q Test (monte-carlo)",
+    data.name = deparse(substitute(mat)))
+  class(cochransQtest) = "htest"
+  return(cochransQtest)
+}
+
+constructSequentialTest = function(piH0=.5, piH1=.9, beta, alpha) {
+  a1 = log((1 - beta) / alpha) / (log(piH1 / piH0) + log((1 - piH0) / (1 - piH1)))
+  a0 = -log(beta / (1 - alpha)) / (log(piH1 / piH0) + log((1 - piH0) / (1 - piH1)))
+  b = log((1 - piH0) / (1 - piH1)) / (log(piH1 / piH0) + log((1 - piH0) / (1 - piH1)))
+  ret = list(a1=a1, a0=a0, b=b, piH0=piH0, piH1=piH1, alpha=alpha, beta=beta)
+  class(ret) = "CVST.sequentialTest"
+  return(ret)
+}
+
+plotSequence = function(st, s) {
+  y = cumsum(s)
+  if (!is.null(st$steps)) {
+    plot(y, xlim=c(1, st$steps), ylim=c(1, st$steps))
+  }
+  else {
+    plot(y)
+  }
+  abline(a=st$a1, b=st$b, col="red")
+  abline(a=-st$a0, b=st$b, col="red", lty=2)
+
+  abline(h=0)
+  abline(a=0, b=1)
+  title(sprintf("one-sided H0:%0.2f; H1:%0.2f", st$piH0, st$piH1))
+}
+
+
+testSequence = function(st, s) {
+  stopifnot(class(st) == "CVST.sequentialTest")  
+  n = length(s)
+  y = cumsum(s)
+  ret = 0
+  if (y[n] >= st$b * n + st$a1) {
+    ret = 1
+  }
+  else if (y[n] <= st$b * n - st$a0) {
+    ret = -1
+  }
+  return(ret)
+}
+
+noisySinc = function(n, dim=2, sigma=0.1) {
+  if (length(n) > 1) {
+    x = n
+  }
+  else {
+    x = runif(n, -pi, pi)
+  }
+  sinc = function(d) sin(d) / (d)
+  y = sinc(4 * x) + 0.2 * sin(15 * x * dim) + sigma*rnorm(n)
+  y[is.nan(y)] = 1
+  return(constructData(x=as.matrix(x), y=y))
+}
+
+noisySine = function(n, dim=5, sigma=.25) {
+  x = runif(n, 0, 2 * pi * dim)
+  y = sin(x)
+  if (!is.null(sigma) && sigma > 0) {
+    y = y + rnorm(n, sd=sigma)
+  }
+  label = factor(y == abs(y))  
+  return(constructData(x=as.matrix(x), y=label))
+}
+
+noisyDonoho = function(n, fun=doppler, sigma=1) {
+  x = matrix(runif(n, 0, 1), n, 1)
+  y = as.vector(fun(x)) + rnorm(n, sd=sigma)
+  return(constructData(x=x, y=y))
+}
+
+blocks = function(x, scale=3.656993) {
+  t = c(0.1, 0.13, 0.15, 0.23, 0.25, 0.40, 0.44, 0.65, 0.76, 0.78, 0.81)
+  h = c(4, -5, 3, -4, 5, -4.2, 2.1, 4.3, -3.1, 2.1, -4.2)
+  ret = t(sapply(x, function(xx) (1 + sign(xx - t)) / 2)) %*% h
+  
+  ret = ret * scale
+  return(ret)
+}
+
+bumps = function(x, scale=10.52884) {
+  t = c(0.1, 0.13, 0.15, 0.23, 0.25, 0.40, 0.44, 0.65, 0.76, 0.78, 0.81)
+  h = c(4, 5, 3, 4, 5, 4.2, 2.1, 4.3, 3.1, 5.1, 4.2)
+  w = c(0.005, 0.005, 0.006, 0.01, 0.01, 0.03, 0.01, 0.01, 0.005, 0.008, 0.005)
+  ret = t(sapply(x, function(xx) (1 + abs((xx - t) / w))^-4 )) %*% h
+  ret = ret * scale
+  return(ret)
+}
+
+heavisine = function(x, scale=2.356934) {
+  ret = 4 * sin(4 * pi * x) - sign(x - 0.3) - sign(0.72 - x)
+  ret = ret * scale
+  return(ret)
+}
+
+doppler = function(x, scale=24.22172) {
+  ret = sqrt(x * (1 - x)) * sin((2.1 * pi) / (x + 0.05)) 
+  ret = ret * scale
+  return(ret)
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..448e61f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,12 @@
+CVST
+====
+
+Fast Cross-Validation via Sequential Testing
+
+The package CVST is hosted on CRAN, so
+
+    install.packages("CVST")
+    library(CVST)
+    example(CVST)
+
+will give you a first impression.
\ No newline at end of file
diff --git a/man/CV.Rd b/man/CV.Rd
new file mode 100644
index 0000000..27878e9
--- /dev/null
+++ b/man/CV.Rd
@@ -0,0 +1,60 @@
+\name{CV}
+\alias{CV}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Perform a k-fold Cross-validation
+}
+\description{
+  Performs the usual k-fold cross-validation procedure on a given data
+  set, parameter grid and learner.
+}
+\usage{
+CV(data, learner, params, fold = 5, verbose = TRUE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{data}{
+    The data set as \code{CVST.data} object.
+}
+  \item{learner}{
+    The learner as \code{CVST.learner} object.
+}
+  \item{params}{
+    the parameter grid as \code{CVST.params} object.
+}
+  \item{fold}{
+    The number of folds that should be generated for each set of parameters.
+}
+  \item{verbose}{
+    Should the procedure report the performance for each model?
+}
+}
+\value{
+  Returns the optimal parameter settings as determined by k-fold cross-validation.
+}
+\references{
+M. Stone.
+Cross-validatory choice and assessment of statistical predictions.
+\emph{Journal of the Royal Statistical Society. Series B}, 36(2):111--147, 1974.
+
+Sylvain Arlot, Alain Celisse, and Paul Painleve.
+A survey of cross-validation procedures for model selection.
+\emph{Statistics Surveys}, 4:40--79, 2010.
+}
+
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+
+\seealso{
+  \code{\link{fastCV}}
+  \code{\link{constructData}}
+  \code{\link{constructLearner}}
+  \code{\link{constructParams}}
+}
+\examples{
+ns = noisySine(100)
+svm = constructSVMLearner()
+params = constructParams(kernel="rbfdot", sigma=10^(-3:3), nu=c(0.05, 0.1, 0.2, 0.3))
+opt = CV(ns, svm, params)
+}
diff --git a/man/CVST-package.Rd b/man/CVST-package.Rd
new file mode 100644
index 0000000..28716e1
--- /dev/null
+++ b/man/CVST-package.Rd
@@ -0,0 +1,67 @@
+\name{CVST-package}
+\alias{CVST-package}
+\alias{CVST}
+\docType{package}
+\title{
+Fast Cross-Validation via Sequential Testing
+}
+\description{
+  This package implements the fast cross-validation via sequential
+  testing (CVST) procedure. CVST is an improved cross-validation procedure which uses non-parametric
+  testing coupled with sequential analysis to determine the best
+  parameter set on linearly increasing subsets of the data. By
+  eliminating underperforming candidates quickly and keeping promising
+  candidates as long as possible, the method speeds up the computation
+  while preserving the capability of a full cross-validation.
+  Additionally to the CVST the package contains an implentation of the
+  ordinary k-fold cross-validation with a flexible and powerful set of
+  helper objects and methods to handle the overall model selection
+  process. The implementations of the Cochran's Q test with permutations and
+  the sequential testing framework of Wald are generic and can therefore
+  also be used in other contexts.
+}
+\details{
+\tabular{ll}{
+Package: \tab CVST\cr
+Type: \tab Package\cr
+Version: \tab 0.2\cr
+Date: \tab 2013-03-25\cr
+License: \tab GPL (>=2.0)\cr
+}
+}
+\author{Tammo Krueger, Mikio Braun
+
+Maintainer: Tammo Krueger <tammokrueger at googlemail.com>
+}
+\references{
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential analysis.
+\emph{Neural Information Processing Systems (NIPS), Big Learning
+  Workshop}, 2011.
+URL \url{http://biglearn.org/2011/index.php/Papers\#paper2}.
+  
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential testing.
+\emph{CoRR}, abs/1206.2248, 2012.
+URL \url{http://arxiv.org/abs/1206.2248}.
+
+Abraham Wald.
+\emph{Sequential Analysis}.
+Wiley, 1947.
+
+W. G. Cochran.
+The comparison of percentages in matched samples.
+\emph{Biometrika}, 37 (3-4):256--266, 1950.
+
+M. Friedman.
+The use of ranks to avoid the assumption of normality implicit in the analysis of variance.
+\emph{Journal of the American Statistical Association}, 32 (200):675--701, 1937.
+
+}
+\keyword{ package }
+\examples{
+ns = noisySine(100)
+svm = constructSVMLearner()
+params = constructParams(kernel="rbfdot", sigma=10^(-3:3), nu=c(0.05, 0.1, 0.2, 0.3))
+opt = fastCV(ns, svm, params, constructCVSTModel())
+}
diff --git a/man/cochranq.test.Rd b/man/cochranq.test.Rd
new file mode 100644
index 0000000..10a6c7e
--- /dev/null
+++ b/man/cochranq.test.Rd
@@ -0,0 +1,50 @@
+\name{cochranq.test}
+\alias{cochranq.test}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Cochran's Q Test with Permutation
+}
+\description{
+  Performs the Cochran's Q test on the data. If the data matrix contains
+  too few elements, the chisquare distribution of the test statistic is
+  replaced by a permutation variant.
+}
+\usage{
+cochranq.test(mat)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{mat}{
+    The data matrix with the individuals in the rows and treatments in the columns.
+}
+}
+\value{
+  Returns a \code{htest} object with the usual entries.
+}
+\references{
+W. G. Cochran.
+The comparison of percentages in matched samples.
+\emph{Biometrika}, 37 (3-4):256--266, 1950.
+
+Kashinath D. Patil.
+Cochran's Q test: Exact distribution.
+\emph{Journal of the American Statistical Association}, 70 (349):186--189, 1975.
+
+Merle W. Tate and Sara M. Brown.
+Note on the {Cochran Q} test.
+\emph{Journal of the American Statistical Association}, 65 (329):155--160, 1970.
+}
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+
+\examples{
+mat = matrix(c(rep(0, 10), 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
+1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,
+0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0,
+1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1), ncol=4)
+cochranq.test(mat)
+mat = matrix(c(rep(0, 7), 1, rep(0, 12), 1, 1, 0, 1,
+rep(0, 5), 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1), nrow=8)
+cochranq.test(mat)
+}
diff --git a/man/constructCVSTModel.Rd b/man/constructCVSTModel.Rd
new file mode 100644
index 0000000..3a84b8d
--- /dev/null
+++ b/man/constructCVSTModel.Rd
@@ -0,0 +1,63 @@
+\name{constructCVSTModel}
+\alias{constructCVSTModel}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Setup for a CVST Run.
+}
+\description{
+  This is an helper object of type \code{CVST.setup} conatining all
+  necessary parameters for a CVST run.
+}
+\usage{
+constructCVSTModel(steps = 10, beta = 0.1, alpha = 0.01,
+similaritySignificance = 0.05, earlyStoppingSignificance = 0.05,
+earlyStoppingWindow = 3, regressionSimilarityViaOutliers = FALSE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{steps}{
+    Number of steps CVST should run
+  }
+\item{beta}{
+  Significance level for H0.
+}
+\item{alpha}{
+    Significance level for H1.
+}
+  \item{similaritySignificance}{
+    Significance level of the similarity test.
+}
+  \item{earlyStoppingSignificance}{
+    Significance level of the early stopping test.
+}
+  \item{earlyStoppingWindow}{
+    Size of the early stopping window.
+}
+  \item{regressionSimilarityViaOutliers}{
+    Should the less strict outlier-based similarity measure for
+    regression tasks be used.
+}
+}
+\value{
+  A \code{CVST.setup} object suitable for \code{\link{fastCV}}.
+}
+
+\references{
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential analysis.
+\emph{Neural Information Processing Systems (NIPS), Big Learning
+  Workshop}, 2011.
+URL \url{http://biglearn.org/2011/index.php/Papers\#paper2}.
+  
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential testing.
+\emph{CoRR}, abs/1206.2248, 2012.
+URL \url{http://arxiv.org/abs/1206.2248}.
+}
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+
+\seealso{
+  \code{\link{fastCV}}
+}
diff --git a/man/constructData.Rd b/man/constructData.Rd
new file mode 100644
index 0000000..15fedcf
--- /dev/null
+++ b/man/constructData.Rd
@@ -0,0 +1,62 @@
+\name{constructData}
+\alias{constructData}
+\alias{getN}
+\alias{getSubset}
+\alias{getX}
+\alias{shuffleData}
+\alias{isClassification}
+\alias{isRegression}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Construction and Handling of \code{CVST.data} Objects
+}
+\description{
+  The CVST methods needs a structured interface to both regression and
+  classification data sets. These helper methods allow the construction
+  and consistence handling of these types of data sets.
+}
+\usage{
+constructData(x, y)
+getN(data)
+getSubset(data, subset)
+getX(data, subset = NULL)
+shuffleData(data)
+isClassification(data)
+isRegression(data)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{x}{
+    The feature data as vector or matrix.
+}
+  \item{y}{
+    The observed values (regressands/labels) as list, vector or factor.
+}
+  \item{data}{
+    A \code{CVST.data} object generated via \code{constructData}.
+}
+\item{subset}{
+  A index set.
+}
+
+}
+\value{
+  \code{constructData} returns a \code{CVST.data} object. \code{getN}
+  returns the number of data points in the data set. \code{getSubset}
+  returns a subset of the data as a \code{CVST.data} object, while
+  \code{getX} just return the feature data. \code{shuffleData} returns a
+  randomly shuffled instance of the data.
+}
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+\examples{
+nsine = noisySine(10)
+isClassification(nsine)
+isRegression(nsine)
+getN(nsine)
+getX(nsine)
+nsineShuffeled = shuffleData(nsine)
+getX(nsineShuffeled)
+getSubset(nsineShuffeled, 1:3)
+}
diff --git a/man/constructLearner.Rd b/man/constructLearner.Rd
new file mode 100644
index 0000000..3578c9f
--- /dev/null
+++ b/man/constructLearner.Rd
@@ -0,0 +1,102 @@
+\name{constructLearner}
+\alias{constructLearner}
+\alias{constructKlogRegLearner}
+\alias{constructKRRLearner}
+\alias{constructSVMLearner}
+\alias{constructSVRLearner}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Construction of Specific Learners for CVST
+}
+\description{
+  These methods construct a \code{CVST.learner} object suitable for the
+  CVST method. These objects provide the common interface needed for the
+  \code{\link{CV}} and \code{\link{fastCV}} methods. We provide kernel
+  logistic regression, kernel ridge regression, support vector machines
+  and support vector regression as fully functional implementation templates.
+}
+\usage{
+constructLearner(learn, predict)
+constructKlogRegLearner()
+constructKRRLearner()
+constructSVMLearner()
+constructSVRLearner()
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{learn}{
+    The learning methods which takes a \code{CVST.data} and list of
+    parameters and return a model.
+}
+\item{predict}{
+  The prediction method which takes a model and \code{CVST.data} and
+  returns the corresponding predictions.
+}
+}
+\details{
+  The nu-SVM and nu-SVR are build on top the corresponding implementations of
+  the \code{kernlab} package (see reference). In the list of parameters these
+  implementations expect an entry named \code{kernel}, which gives the
+  name of the kernel that should be used, an entry named \code{nu}
+  specifying the nu parameter, and an entry named \code{C} giving the C
+  parameter for the nu-SVR.
+
+  The KRR and KLR also expect \code{kernel} and necessary other
+  parameters to construct the kernel. Both methods expect a lambda
+  parameter and KLR additonally a tol and maxiter parameter in the
+  parameter list.
+
+  Note that the lambda of KRR/KLR and the C parameter of SVR are scaled
+  by the data set size to allow for comparable results in the fast CV loop.
+}
+\value{
+  Returns a learner of type \code{CVST.learner} suitable for \code{\link{CV}} and \code{\link{fastCV}}.
+}
+\references{
+  Alexandros Karatzoglou, Alexandros Smola, Kurt Hornik, Achim Zeileis.
+  kernlab - An S4 Package for Kernel Methods in R
+  \emph{Journal of Statistical Software} Vol. 11, Issue 9, Nov 2004.
+  URL: \url{http://www.jstatsoft.org/v11/i09}.
+  
+  Volker Roth.
+  Probabilistic discriminative kernel classifiers for multi-class problems.
+  In \emph{Proceedings of the 23rd DAGM-Symposium on Pattern Recognition}, pages 246--253, 2001.
+}
+\author{
+  Tammo Krueger <tammokrueger at googlemail.com>
+}
+
+\seealso{
+  \code{\link{CV}}
+  \code{\link{fastCV}}
+}
+\examples{
+# SVM
+ns = noisySine(100)
+svm = constructSVMLearner()
+p = list(kernel="rbfdot", sigma=100, nu=.1)
+m = svm$learn(ns, p)
+nsTest = noisySine(1000)
+pred = svm$predict(m, nsTest)
+sum(pred != nsTest$y) / getN(nsTest)
+# Kernel logistic regression
+klr = constructKlogRegLearner()
+p = list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns), tol=10e-6, maxiter=100)
+m = klr$learn(ns, p)
+pred = klr$predict(m, nsTest)
+sum(pred != nsTest$y) / getN(nsTest)
+# SVR
+ns = noisySinc(100)
+svr = constructSVRLearner()
+p = list(kernel="rbfdot", sigma=100, nu=.1, C=1*getN(ns))
+m = svr$learn(ns, p)
+nsTest = noisySinc(1000)
+pred = svr$predict(m, nsTest)
+sum((pred - nsTest$y)^2) / getN(nsTest)
+# Kernel ridge regression
+krr = constructKRRLearner()
+p = list(kernel="rbfdot", sigma=100, lambda=.1/getN(ns))
+m = krr$learn(ns, p)
+pred = krr$predict(m, nsTest)
+sum((pred - nsTest$y)^2) / getN(nsTest)
+}
diff --git a/man/constructParams.Rd b/man/constructParams.Rd
new file mode 100644
index 0000000..2265dce
--- /dev/null
+++ b/man/constructParams.Rd
@@ -0,0 +1,37 @@
+\name{constructParams}
+\alias{constructParams}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Construct a Grid of Parameters
+}
+\description{
+  This is a helper function which, geiven a named list of parameter
+  choices, expand the complete grid and returns a \code{CVST.params}
+  object suitable for \code{\link{CV}} and \code{\link{fastCV}}.
+}
+\usage{
+constructParams(...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{\dots}{
+    The parameters that should be expanded.
+}
+}
+\value{
+  Returns a \code{CVST.params} wich is basically a named list of
+  possible parameter vallues.
+}
+
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+
+\seealso{
+  \code{\link{fastCV}}
+}
+\examples{
+params = constructParams(kernel="rbfdot", sigma=10^(-1:5), nu=c(0.1, 0.2))
+# the expanded grid contains 14 parameter lists:
+length(params)
+}
diff --git a/man/constructSequentialTest.Rd b/man/constructSequentialTest.Rd
new file mode 100644
index 0000000..5d05607
--- /dev/null
+++ b/man/constructSequentialTest.Rd
@@ -0,0 +1,83 @@
+\name{constructSequentialTest}
+\alias{constructSequentialTest}
+\alias{getCVSTTest}
+\alias{testSequence}
+\alias{plotSequence}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Construct and Handle Sequential Tests.
+}
+\description{
+  These functions handle the construction and calculation with
+  sequential tests as introduced by Wald (1947). \code{getCVSTTest}
+  constructs a special sequential test as introduced in Krueger
+  (2011). \code{testSequence} test a sequence of 0/1 whether it is
+  distributed according to H0 or H1.
+}
+\usage{
+constructSequentialTest(piH0 = 0.5, piH1 = 0.9, beta, alpha)
+getCVSTTest(steps, beta = 0.1, alpha = 0.01)
+testSequence(st, s)
+plotSequence(st, s)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{piH0}{
+    Probability of the binomial distribution for H0.
+}
+\item{piH1}{
+  Probability of the binomial distribution for H1.
+}
+\item{beta}{
+  Significance level for H0.
+}
+\item{alpha}{
+    Significance level for H1.
+}
+\item{steps}{
+  Number of steps the CVST procedure should be executed.
+}
+\item{st}{
+  A sequential test of type \code{CVST.sequentialTest}.
+}
+  \item{s}{
+    A sequence of 0/1 values.
+}
+}
+\value{
+  \code{constructSequentialTest} and \code{getCVSTTest} return a
+  \code{CVST.sequentialTest} with the specified
+  properties. \code{testSequence} returns 1, if H1 can be expected, -1
+  if H0 can be accepted, and 0 if the test needs more data for a
+  decission. \code{plotSequence} gives a graphical impression of the
+  this testing procedure.
+}
+\references{
+Abraham Wald.
+\emph{Sequential Analysis}.
+Wiley, 1947.
+
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential analysis.
+\emph{Neural Information Processing Systems (NIPS), Big Learning
+  Workshop}, 2011.
+URL \url{http://biglearn.org/2011/index.php/Papers\#paper2}.
+  
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential testing.
+\emph{CoRR}, abs/1206.2248, 2012.
+URL \url{http://arxiv.org/abs/1206.2248}.
+}
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+
+\seealso{
+  \code{\link{fastCV}}
+}
+\examples{
+st = getCVSTTest(10)
+s = rbinom(10,1, .5)
+plotSequence(st, s)
+testSequence(st, s)
+}
diff --git a/man/fastCV.Rd b/man/fastCV.Rd
new file mode 100644
index 0000000..6db8350
--- /dev/null
+++ b/man/fastCV.Rd
@@ -0,0 +1,73 @@
+\name{fastCV}
+\alias{fastCV}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  The Fast Cross-Validation via Sequential Testing (CVST) Procedure
+}
+\description{
+  CVST is an improved cross-validation procedure which uses non-parametric
+  testing coupled with sequential analysis to determine the best
+  parameter set on linearly increasing subsets of the data. By
+  eliminating underperforming candidates quickly and keeping promising
+  candidates as long as possible, the method speeds up the computation
+  while preserving the capability of a full cross-validation.  
+}
+\usage{
+fastCV(train, learner, params, setup, test = NULL, verbose = TRUE)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{train}{
+    The data set as \code{CVST.data} object.
+}
+  \item{learner}{
+    The learner as \code{CVST.learner} object.
+}
+  \item{params}{
+    the parameter grid as \code{CVST.params} object.
+}
+  \item{setup}{
+    A \code{CVST.setup} object containing the necessary parameter for
+    the CVST procedure.
+}
+  \item{test}{
+    An independent test set that should be used at each step. If
+    \code{NULL} then the remaining data after learning a model
+    at each step is used instead.
+}
+  \item{verbose}{
+    Should the procedure report the performance after each step?
+}
+}
+\value{
+  Returns the optimal parameter settings as determined by fast
+  cross-validation via sequential testing.
+}
+\references{
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential analysis.
+\emph{Neural Information Processing Systems (NIPS), Big Learning
+  Workshop}, 2011.
+URL \url{http://biglearn.org/2011/index.php/Papers\#paper2}.
+  
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential testing.
+\emph{CoRR}, abs/1206.2248, 2012.
+URL \url{http://arxiv.org/abs/1206.2248}.
+}
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+\seealso{
+  \code{\link{CV}}
+  \code{\link{constructCVSTModel}}
+  \code{\link{constructData}}
+  \code{\link{constructLearner}}
+  \code{\link{constructParams}}
+}
+\examples{
+ns = noisySine(100)
+svm = constructSVMLearner()
+params = constructParams(kernel="rbfdot", sigma=10^(-3:3), nu=c(0.05, 0.1, 0.2, 0.3))
+opt = fastCV(ns, svm, params, constructCVSTModel())
+}
diff --git a/man/noisyDonoho.Rd b/man/noisyDonoho.Rd
new file mode 100644
index 0000000..17919d0
--- /dev/null
+++ b/man/noisyDonoho.Rd
@@ -0,0 +1,64 @@
+\name{noisyDonoho}
+\alias{noisyDonoho}
+\alias{heavisine}
+\alias{doppler}
+\alias{bumps}
+\alias{blocks}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Generate Donoho's Toy Data Sets 
+}
+\description{
+  This function allows to generate noisy variants of the toy signals
+  introduced by Donoho (see reference section). The scaling is chosen to
+  reflect the setting as discussed in the original paper.
+}
+\usage{
+noisyDonoho(n, fun = doppler, sigma = 1)
+blocks(x, scale = 3.656993)
+bumps(x, scale = 10.52884)
+doppler(x, scale = 24.22172)
+heavisine(x, scale = 2.356934)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{n}{
+    Number of data points that should be generated.
+}
+  \item{fun}{
+    Function to use to generate the data.
+}
+  \item{sigma}{
+    Standard deviation of the noise component.
+}
+   \item{x}{
+    Number of data points that should be generated.  
+}
+  \item{scale}{
+    Scaling parameter.
+}
+}
+\value{
+  Returns a data set of type CVST.data
+}
+\references{
+David L. Donoho and Jain M. Johnstone.
+Ideal spatial adaptation by wavelet shrinkage.
+\emph{Biometrika}, 81 (3) 425--455, 1994.
+}
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+
+\seealso{
+  \code{\link{constructData}}
+}
+\examples{
+bumpsSet = noisyDonoho(1000, fun=bumps)
+plot(bumpsSet)
+dopplerSet = noisyDonoho(1000, fun=doppler)
+plot(dopplerSet)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{datasets}
diff --git a/man/noisySine.Rd b/man/noisySine.Rd
new file mode 100644
index 0000000..d8e5691
--- /dev/null
+++ b/man/noisySine.Rd
@@ -0,0 +1,57 @@
+\name{noisySine}
+\alias{noisySine}
+\alias{noisySinc}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+  Regression and Classification Toy Data Set
+}
+\description{
+  Regression and Classification Toy Data Set based on the sine and sinc function.
+}
+\usage{
+noisySine(n, dim = 5, sigma = 0.25)
+noisySinc(n, dim = 2, sigma = 0.1)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{n}{
+    Number of data points that should be generated.
+}
+  \item{dim}{
+    Intrinsic dimensionality of the data set (see references for details).
+}
+  \item{sigma}{
+    Standard deviation of the noise component.
+}
+}
+\value{
+  Returns a data set of type CVST.data
+}
+\references{
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential analysis.
+\emph{Neural Information Processing Systems (NIPS), Big Learning
+  Workshop}, 2011.
+URL \url{http://biglearn.org/2011/index.php/Papers\#paper2}.
+  
+Tammo Krueger, Danny Panknin, and Mikio Braun.
+Fast cross-validation via sequential testing.
+\emph{CoRR}, abs/1206.2248, 2012.
+URL \url{http://arxiv.org/abs/1206.2248}.
+}
+\author{
+Tammo Krueger <tammokrueger at googlemail.com>
+}
+
+\seealso{
+  \code{\link{constructData}}
+}
+\examples{
+nsine = noisySine(1000)
+plot(nsine, col=nsine$y)
+nsinc = noisySinc(1000)
+plot(nsinc)
+}
+% Add one or more standard keywords, see file 'KEYWORDS' in the
+% R documentation directory.
+\keyword{datasets}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-cvst.git



More information about the debian-science-commits mailing list