[r-cran-mlbench] 03/05: New upstream version 2.1-1

Andreas Tille tille at debian.org
Sat Oct 21 14:44:18 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-cran-mlbench.

commit 4ac351d8cc1437f94bfe0f62cdb23ff95666f814
Author: Andreas Tille <tille at debian.org>
Date:   Sat Oct 21 16:40:03 2017 +0200

    New upstream version 2.1-1
---
 DESCRIPTION                   |  16 ++
 Data-Administration.R         |  93 ++++++++
 MD5                           |  66 ++++++
 NAMESPACE                     |  18 ++
 NEWS                          | 102 +++++++++
 R/mlbench-class.R             | 500 ++++++++++++++++++++++++++++++++++++++++++
 R/mlbench-regression.R        |  78 +++++++
 README                        |  11 +
 data/BostonHousing.rda        | Bin 0 -> 12932 bytes
 data/BostonHousing2.rda       | Bin 0 -> 16660 bytes
 data/BreastCancer.rda         | Bin 0 -> 5460 bytes
 data/DNA.rda                  | Bin 0 -> 81208 bytes
 data/Glass.rda                | Bin 0 -> 4144 bytes
 data/HouseVotes84.rda         | Bin 0 -> 2264 bytes
 data/Ionosphere.rda           | Bin 0 -> 34524 bytes
 data/LetterRecognition.rda    | Bin 0 -> 162580 bytes
 data/Ozone.rda                | Bin 0 -> 5224 bytes
 data/PimaIndiansDiabetes.rda  | Bin 0 -> 8424 bytes
 data/PimaIndiansDiabetes2.rda | Bin 0 -> 8284 bytes
 data/Satellite.rda            | Bin 0 -> 109096 bytes
 data/Servo.rda                | Bin 0 -> 1060 bytes
 data/Shuttle.rda              | Bin 0 -> 371492 bytes
 data/Sonar.rda                | Bin 0 -> 26152 bytes
 data/Soybean.rda              | Bin 0 -> 4732 bytes
 data/Vehicle.rda              | Bin 0 -> 15220 bytes
 data/Vowel.rda                | Bin 0 -> 17800 bytes
 data/Zoo.rda                  | Bin 0 -> 1268 bytes
 debian/changelog              |   5 -
 debian/compat                 |   1 -
 debian/control                |  23 --
 debian/copyright              |  31 ---
 debian/rules                  |   3 -
 debian/source/format          |   1 -
 debian/watch                  |   2 -
 inst/CITATION                 |  31 +++
 man/BostonHousing.Rd          |  83 +++++++
 man/BreastCancer.Rd           |  86 ++++++++
 man/DNA.Rd                    |  80 +++++++
 man/Glass.Rd                  |  55 +++++
 man/HouseVotes84.Rd           |  64 ++++++
 man/Ionosphere.Rd             |  74 +++++++
 man/LetterRecognition.Rd      |  76 +++++++
 man/Ozone.Rd                  |  38 ++++
 man/PimaIndiansDiabetes.Rd    |  74 +++++++
 man/Satellite.Rd              | 113 ++++++++++
 man/Servo.Rd                  |  56 +++++
 man/Shuttle.Rd                |  42 ++++
 man/Sonar.Rd                  |  60 +++++
 man/Soybean.Rd                | 106 +++++++++
 man/Vehicle.Rd                |  80 +++++++
 man/Vowel.Rd                  |  53 +++++
 man/Zoo.Rd                    |  48 ++++
 man/as.data.frame.mlbench.Rd  |  19 ++
 man/bayesclass.Rd             |  39 ++++
 man/mlbench.2dnormals.Rd      |  30 +++
 man/mlbench.cassini.Rd        |  28 +++
 man/mlbench.circle.Rd         |  32 +++
 man/mlbench.cuboids.Rd        |  32 +++
 man/mlbench.friedman1.Rd      |  34 +++
 man/mlbench.friedman2.Rd      |  39 ++++
 man/mlbench.friedman3.Rd      |  40 ++++
 man/mlbench.hypercube.Rd      |  33 +++
 man/mlbench.peak.Rd           |  23 ++
 man/mlbench.ringnorm.Rd       |  31 +++
 man/mlbench.shapes.Rd         |  19 ++
 man/mlbench.simplex.Rd        |  37 ++++
 man/mlbench.smiley.Rd         |  24 ++
 man/mlbench.spirals.Rd        |  32 +++
 man/mlbench.threenorm.Rd      |  33 +++
 man/mlbench.twonorm.Rd        |  31 +++
 man/mlbench.waveform.Rd       |  52 +++++
 man/mlbench.xor.Rd            |  30 +++
 man/plot.mlbench.Rd           |  28 +++
 src/waveform.c                | 126 +++++++++++
 74 files changed, 2895 insertions(+), 66 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..c2023d5
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,16 @@
+Package: mlbench
+Version: 2.1-1
+Title: Machine Learning Benchmark Problems
+Date: 2010-12-10
+Author: Friedrich Leisch and Evgenia Dimitriadou.
+Maintainer: Friedrich Leisch <Friedrich.Leisch at R-project.org>
+Description: A collection of artificial and real-world machine learning
+        benchmark problems, including, e.g., several data sets from the
+        UCI repository.
+Depends: R (>= 2.10)
+License: GPL-2
+Suggests: lattice
+ZipData: No
+Packaged: 2012-07-10 08:19:15 UTC; leisch
+Repository: CRAN
+Date/Publication: 2012-07-10 11:51:32
diff --git a/Data-Administration.R b/Data-Administration.R
new file mode 100644
index 0000000..9bd247b
--- /dev/null
+++ b/Data-Administration.R
@@ -0,0 +1,93 @@
+## This file keeps record of reading the data into R and
+## transformations (if any) that have been applied. All
+## transformations are indicated in the respective help pages.
+
+###**********************************************************
+
+LetterRecognition <- scan(file="LetterRecognition.data")
+
+LetterRecognition <- matrix(LetterRecognition,ncol=17,byrow=TRUE)
+LetterRecognition <- as.data.frame(LetterRecognition)
+colnames(LetterRecognition) <-
+    c("lettr", "x.box", "y.box", "width", "high", "onpix", "x.bar",
+      "y.bar", "x2bar", "y2bar", "xybar", "x2ybr", "xy2br", "x.ege",
+      "xegvy", "y.ege", "yegvx")
+LetterRecognition$lettr <- factor(LetterRecognition$lettr,
+                                  labels=LETTERS)
+
+save(LetterRecognition, file="data/LetterRecognition.rda")
+
+
+###**********************************************************
+
+### PimaIndiansDiabetes2
+
+load("data/PimaIndiansDiabetes.rda")
+PimaIndiansDiabetes2 = PimaIndiansDiabetes
+
+for(n in c("glucose", "pressure","triceps", "insulin",  "mass")){
+    PimaIndiansDiabetes2[[n]][PimaIndiansDiabetes[[n]]==0] <- NA
+}
+
+save(PimaIndiansDiabetes2, file="data/PimaIndiansDiabetes2.rda")
+
+###**********************************************************
+
+Satellite <- scan("Satellite.data")
+
+Satellite <- matrix(Satellite,ncol=37,byrow=TRUE)
+Satellite <- data.frame(x=Satellite[,1:36], classes=factor(Satellite[,37]))
+levels(Satellite$classes) <- c("red soil",
+                               "cotton crop",
+                               "grey soil",
+                               "damp grey soil",
+                               "vegetation stubble",
+                               "very damp grey soil")
+
+save(Satellite, file="data/Satellite.rda")
+
+###**********************************************************
+
+### Zoo
+
+## download zoo.data from UCI repository (2007-02-02)
+## edit zoo.data from UCI repository: two rows have name "frog"
+## -> frog.1 and frog.2
+
+Zoo <- read.csv("zoo.data", header=FALSE, row.names=1)
+
+colnames(Zoo) <- c("hair",
+                   "feathers",	
+                   "eggs",		
+                   "milk",		
+                   "airborne",	
+                   "aquatic",	
+                   "predator",	
+                   "toothed",	
+                   "backbone",	
+                   "breathes",	
+                   "venomous",	
+                   "fins",		
+                   "legs",		
+                   "tail",		
+                   "domestic",	
+                   "catsize",	
+                   "type")
+
+Zoo[,1:12] <- lapply(Zoo[,1:12], as.logical)
+Zoo[,14:16] <- lapply(Zoo[,14:16], as.logical)
+Zoo[,17] <- factor(Zoo[,17],
+                   labels=c("mammal","bird","reptile","fish",
+                   "amphibian","insect","mollusc.et.al"))
+
+save(Zoo, file="Zoo.rda")
+
+###**********************************************************
+
+## change compression type
+
+for(f in list.files("data")){
+    n <- sub(".rda", "", f)
+    load(file.path("data", f))
+    save(list=n, file=f, compress="xz")
+}
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..4b816ba
--- /dev/null
+++ b/MD5
@@ -0,0 +1,66 @@
+f2a9aa569a77ebfe29653e2bf16ec0eb *DESCRIPTION
+6776e69974f0f642d95de03376502981 *Data-Administration.R
+8ee1257469e0ad8511789428c5d466c2 *NAMESPACE
+a7f407e416bcd81e0e2697cfb6eeb714 *NEWS
+0000c717c763ca757f9a2b6bedb67c78 *R/mlbench-class.R
+1bee7ea370a599f2609f58254396e178 *R/mlbench-regression.R
+4069da5d5f4e156b36697681600a3224 *README
+c47b4e6ddc106f9e2f0450d1b1fc23cb *data/BostonHousing.rda
+25209554a682562de2c0018f673f1584 *data/BostonHousing2.rda
+eae7e006a671d988711f7b8a2901d94f *data/BreastCancer.rda
+cb9ee3f094336cdb4476d824bbac2375 *data/DNA.rda
+73889627f545aa8c233869679aa5aaec *data/Glass.rda
+0473be45f18e80533003e21648c7c718 *data/HouseVotes84.rda
+9ca65dd2bb66a3f37d21ae0fec773229 *data/Ionosphere.rda
+e987247fa4b45c92191efd9322952737 *data/LetterRecognition.rda
+4b195a02c1b60d550803163f81f4f63c *data/Ozone.rda
+faf2bcc583d54f91a923786c58658584 *data/PimaIndiansDiabetes.rda
+96b377e07072cf0f789cd05d464db58c *data/PimaIndiansDiabetes2.rda
+7a34cc4153a892f6be26d2aacaa8b119 *data/Satellite.rda
+76d6d7760bc46ad2c658144248c68b64 *data/Servo.rda
+2dd38ffe9dda175c3c7609881a7b8f3f *data/Shuttle.rda
+802ff940ad8fe915446f93df51bbeda9 *data/Sonar.rda
+a6824b09d2afe54c3aebd8594797edd8 *data/Soybean.rda
+0e1aafd30e104a7807375132d647bf13 *data/Vehicle.rda
+4f980bc3c5691f964e72cd20fa56b9fb *data/Vowel.rda
+2a152427c8e223a7bd47395246f829a3 *data/Zoo.rda
+e90f25e177787d2fc29a5a05ac44a009 *inst/CITATION
+59dbb880b61f993c3cbb4ec427d36ed7 *man/BostonHousing.Rd
+9b97c1fded837b56696de3b4d82c81cc *man/BreastCancer.Rd
+bf5315d9df64f445e8cab7aa58043d4d *man/DNA.Rd
+94d8156f7120639b6ce988f69f13b566 *man/Glass.Rd
+a5025b39f090875ce1e4bddc8122ea8f *man/HouseVotes84.Rd
+d35fdfe73ad6292958190fb7fb1defd5 *man/Ionosphere.Rd
+524a0475ba287619cb88e539215c8d7e *man/LetterRecognition.Rd
+665705ea72f9d2bfbfb150cfbd406f21 *man/Ozone.Rd
+1d836860da4733a38a8b8b92cfcedc5a *man/PimaIndiansDiabetes.Rd
+9da4e9e39e0aefa7898bd0c799d4098c *man/Satellite.Rd
+5aa1a174b6de6c679baa66c5cebfaa6c *man/Servo.Rd
+c387b8164d53b9b741bf1df404bf1cc5 *man/Shuttle.Rd
+1770f07a5eef3b970353ca4f611d9383 *man/Sonar.Rd
+33db769ef50c9eb74343b51684d489a2 *man/Soybean.Rd
+be1022555ec0d16c199d542581e9cf5a *man/Vehicle.Rd
+7c68cb63adc44fd3aacf3481286c4a7b *man/Vowel.Rd
+289cc87ccdbe2ff2af466ce41b73deee *man/Zoo.Rd
+29a301b7f3aa4f802e66d9d85aac2017 *man/as.data.frame.mlbench.Rd
+9a0737fde2383cf20f14acf2f75ca92d *man/bayesclass.Rd
+97181d5baeeec757207efbf463b7c56d *man/mlbench.2dnormals.Rd
+68fe245c3189dc82befbdcba7ab5c753 *man/mlbench.cassini.Rd
+67107266a500f068db39255e9cb18561 *man/mlbench.circle.Rd
+2741fe81b108ad224e7728ce777898ce *man/mlbench.cuboids.Rd
+77f68ba4e4176203a1b1538a86b0a515 *man/mlbench.friedman1.Rd
+32273c6b6d722cc0b9e7305862d69ee7 *man/mlbench.friedman2.Rd
+694df9ab1efd67f92ddba2532cb1b24d *man/mlbench.friedman3.Rd
+407d84c1c4ad6207992bb3f1382e92b8 *man/mlbench.hypercube.Rd
+30a96c293a1a6787354d3b1b78d35db5 *man/mlbench.peak.Rd
+1d54014c9a19032f4095c6dc1dec2a55 *man/mlbench.ringnorm.Rd
+057d71cdf6ab03943252656547e515de *man/mlbench.shapes.Rd
+b54f8ecd12955cde96dbd8825f129369 *man/mlbench.simplex.Rd
+dae712045090b90ae7ba5f4ed4dadaf3 *man/mlbench.smiley.Rd
+0e0386db8230f0143e22c990b9180b09 *man/mlbench.spirals.Rd
+176063258f3cd02e90f6a25909dd0145 *man/mlbench.threenorm.Rd
+1bf9aee1bd5830130abfb9d60efd9875 *man/mlbench.twonorm.Rd
+7d899328b07839fd51caf4bc635d67bb *man/mlbench.waveform.Rd
+3f946079c1693b5fc45a6af3e7df9032 *man/mlbench.xor.Rd
+ab08b35363d5fa1dc40b68c91d68888e *man/plot.mlbench.Rd
+97c51b84051dd25177fd0ca9437826ee *src/waveform.c
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..8671fc5
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,18 @@
+useDynLib(mlbench)
+
+export(bayesclass)
+exportPattern("^mlbench.*")
+
+S3method(as.data.frame, mlbench)
+S3method(plot, mlbench)
+
+S3method(bayesclass, noerr)
+S3method(bayesclass, mlbench.2dnormals)
+S3method(bayesclass, mlbench.circle)
+S3method(bayesclass, mlbench.xor)
+S3method(bayesclass, mlbench.cassini)
+S3method(bayesclass, mlbench.cuboids)
+S3method(bayesclass, mlbench.twonorm)
+S3method(bayesclass, mlbench.threenorm)
+S3method(bayesclass, mlbench.ringnorm)
+
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..7e23d3b
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,102 @@
+Changes in Version 2.1-1
+
+  o Added a NAMESPACE.
+
+
+Changes in Version 2.1-0
+
+  o Removed dependencies on e1071 and scatterplot3d (the latter replaced by 
+    cloud() from lattice). 
+
+  o mlbench.corners() has been renamed to mlbench.hypercube().
+
+  o New function mlbench.simplex() by Manuel Eugster and Sebastian Kaiser.
+
+  o Bugfixes in the bayesclass() methods for ringnorm and threenorm 
+    submitted by Julia Schiffer. 
+
+
+Changes in Version 2.0-0
+
+  o Changed license of complete package to GPL-2.
+
+  o Recompressed all data sets using xz.
+
+  o Added examples to all help pages.
+
+  o Added info about UCI repository to mlbench.waveform.Rd.
+
+
+Changes in Version 1.1-6
+
+  o Improve CITATION.
+
+
+Changes in Version 1.1-5
+
+  o Fixed Rd bug and renamed README to LICENSE.
+
+
+Changes in Version 1.1-4
+
+  o Fixed documentation of S3 objects.
+
+
+Changes in Version 1.1-3
+
+  o Bugfix: data sets that are loaded using R code did not close the
+    corresponding connections.
+
+
+Changes in Version 1.1-2
+
+  o New data set Zoo.
+
+
+Changes in Version 1.1-2
+
+  o Converted the 'chas' columns of BostonHousing2 to a factor.
+
+  o Added a corrected version of PimaIndiansDiabetes as PimaIndiansDiabetes2.
+ 
+  o Added correct citation for UCI repository in various places.
+
+
+Changes in Version 1.1-1
+
+  o Add '...' argument to as.data.frame() methods for consistency
+    with R 2.4.0 or later.
+
+
+Changes in Version 1.1-0
+
+  o New artificial classification problems mlbench.shapes() and
+    mlbench.corners()
+
+  o Added corrected version of Boston housing data (thanks to
+    John Maindonald). 
+
+	
+Changes in Version 1.0-1
+
+  o Minor fixes in BostonHousing.Rd.
+
+
+Changes in Version 1.0-1
+
+  o Fixed a Bug in the waveform function (thanks to Karsten L�bke).
+
+  o Added a "ZipData: No" to the DESCRIPTION file.
+
+  o All data files are now in compressed format.
+  
+
+Changes in Version 1.0-0
+
+  o New function mlbench.smiley().
+
+  o Version 1.0-0 is not really different from the previous release
+    (version 0.5-10), but we decided that it is stupid to have a
+    version number smaller than 1.0 for a package that is stable and
+    useful.
+
diff --git a/R/mlbench-class.R b/R/mlbench-class.R
new file mode 100644
index 0000000..ac3c5f8
--- /dev/null
+++ b/R/mlbench-class.R
@@ -0,0 +1,500 @@
+#
+#  Copyright (C) 1997-2010 Friedrich Leisch
+#  $Id: mlbench-class.R 4612 2010-10-08 09:51:20Z leisch $
+#
+
+mlbench.xor <- function(n, d=2){
+
+  x <- matrix(runif(n*d,-1,1),ncol=d,nrow=n)
+  if((d != as.integer(d)) || (d<2))
+    stop("d must be an integer >=2")
+
+  z <- rep(0, length=n)
+  for(k in 1:n){
+    if(x[k,1]>=0){
+      tmp <- (x[k,2:d] >=0)
+      z[k] <- 1+sum(tmp*2^(0:(d-2)))
+    }
+    else {
+      tmp <- !(x[k,2:d] >=0)
+      z[k] <- 1 + sum(tmp*2^(0:(d-2)))
+    }
+  }
+
+  retval <- list(x=x, classes=factor(z))
+  class(retval) <- c("mlbench.xor", "mlbench")
+  retval
+}
+
+mlbench.circle <- function(n, d=2){
+
+  x <- matrix(runif(n*d,-1,1),ncol=d,nrow=n)
+  if((d != as.integer(d)) || (d<2))
+    stop("d must be an integer >=2")
+
+  z <- rep(1, length=n)
+
+  r <- (2^(d-1) * gamma(1+d/2) / (pi^(d/2)))^(1/d)
+  z[apply(x, 1, function(x) sum(x^2)) > r^2] <- 2
+
+  retval <- list(x=x, classes=factor(z))
+  class(retval) <- c("mlbench.circle", "mlbench")
+  retval
+}
+
+mlbench.2dnormals <- function(n, cl=2, r=sqrt(cl), sd=1){
+  
+  e <- sample(0:(cl-1), size=n, replace=TRUE)
+  m <- r * cbind(cos(pi/4 + e*2*pi/cl), sin(pi/4 + e*2*pi/cl))
+  x <- matrix(rnorm(2*n, sd=sd), ncol=2) + m
+
+  retval <- list(x=x, classes=factor(e+1))
+  class(retval) <- c("mlbench.2dnormals", "mlbench")
+  retval
+}
+
+
+mlbench.1spiral <- function(n, cycles=1, sd=0)
+{
+    w <- seq(0, by=cycles/n, length=n)
+    x <- matrix(0, nrow=n, ncol=2)
+
+    x[,1] <- (2*w+1)*cos(2*pi*w)/3;
+    x[,2] <- (2*w+1)*sin(2*pi*w)/3;
+
+    if(sd>0){
+        e <- rnorm(n, sd=sd)
+
+        xs <- cos(2*pi*w)-pi*(2*w+1)*sin(2*pi*w)
+        ys <- sin(2*pi*w)+pi*(2*w+1)*cos(2*pi*w)
+  
+        nrm <- sqrt(xs^2+ys^2)
+        x[,1] <- x[,1] + e*ys/nrm
+        x[,2] <- x[,2] - e*xs/nrm
+    }
+    x
+}
+
+mlbench.spirals <- function(n, cycles=1, sd=0)
+{
+    x <-  matrix(0, nrow=n, ncol=2)
+    c2 <- sample(1:n, size=n/2, replace=FALSE)
+    cl <- factor(rep(1, length=n), levels=as.character(1:2))
+    cl[c2] <- 2
+
+    x[-c2,] <- mlbench.1spiral(n=n-length(c2), cycles=cycles, sd=sd)
+    x[c2,] <- - mlbench.1spiral(n=length(c2), cycles=cycles, sd=sd)
+        
+    retval <- list(x=x, classes=cl)
+    class(retval) <- c("mlbench.spirals", "mlbench")
+    retval
+}
+
+mlbench.ringnorm <- function(n, d=20)
+{
+    x <-  matrix(0, nrow=n, ncol=d)
+    c2 <- sample(1:n, size=n/2, replace=FALSE)
+    cl <- factor(rep(1, length=n), levels=as.character(1:2))
+    cl[c2] <- 2
+
+    a <- 1/sqrt(d)
+    x[-c2,] <- matrix(rnorm(n=d*(n-length(c2)), sd=2), ncol=d)
+    x[c2,]  <- matrix(rnorm(n=d*length(c2), mean=a), ncol=d)
+    
+    retval <- list(x=x, classes=cl)
+    class(retval) <- c("mlbench.ringnorm", "mlbench")
+    retval
+}
+
+mlbench.twonorm <- function (n, d = 20) 
+{
+    x <- matrix(0, nrow = n, ncol = d)
+    c2 <- sample(1:n, size = n/2, replace = FALSE)
+    cl <- factor(rep(1, length = n), levels = as.character(1:2))
+    cl[c2] <- 2
+    a <- 2/sqrt(d)
+    x[-c2, ] <- matrix(rnorm(n = d * (n - length(c2)), mean = a, sd = 1), 
+        ncol = d)
+    x[c2, ] <- matrix(rnorm(n = d * length(c2), mean = -a), ncol = d)
+    retval <- list(x = x, classes = cl)
+    class(retval) <- c("mlbench.twonorm", "mlbench")
+    retval
+}
+
+mlbench.threenorm <- function (n, d = 20) 
+{
+    x <- matrix(0, nrow = n, ncol = d)
+    c2 <- sample(1:n, size = n/2, replace = FALSE)
+    cl <- factor(rep(1, length = n), levels = as.character(1:2))
+    cl[c2] <- 2
+    c1 <- (1:n)[-c2]
+    a <- 2/sqrt(d)
+    for (i in c1)
+      {
+        distr <- as.logical(round(runif(1,0,1)))
+        if ( distr )
+          x[i, ] <- rnorm(n = d, mean = a) 
+        else
+          x[i, ] <- rnorm(n = d, mean = -a) 
+      }
+    m <- rep(c(a, -a), d/2)
+    if ((d %% 2)==1)
+      m <- c(m, a)
+    x[c2, ] <- matrix(rnorm(n = d * length(c2), mean = m),
+                      ncol = d, byrow=TRUE)
+    retval <- list(x = x, classes = cl)
+    class(retval) <- c("mlbench.threenorm", "mlbench")
+    retval
+}
+
+mlbench.waveform <- function (n)
+{
+    Rnuminstances <- n
+    retval <- .C("waveform",
+                 Rnuminstances = as.integer(Rnuminstances),
+                 x = double(21*n),
+                 type = integer(n),
+                 PACKAGE = "mlbench")
+    x <- matrix (retval$x, ncol=21, byrow = TRUE)
+    retval <- list (x=x, classes=as.factor(retval$type+1))
+    class(retval) <- c("mlbench.waveform","mlbench")
+    return(retval)
+}
+
+mlbench.cassini <- function(n,relsize=c(2,2,1))
+{
+    cassinib <- function(x, a, c)
+    {
+        y <- numeric(2)
+        y[1] <- -sqrt(-c^2  - x^2  + sqrt(a^4  + 4*c^2*x^2))
+        y[2] <- sqrt(-c^2  - x^2  + sqrt(a^4  + 4*c^2*x^2))
+        y
+    }
+    
+    circle <- function(x, r)
+        sqrt(r^2-x^2)
+    
+    
+    big1<-relsize[1]
+    big2<-relsize[2]
+    small<-relsize[3]
+    parts<-big1+big2+small
+    npiece<-n/parts
+    n1<-round(big1*npiece)
+    n2<-round(big2*npiece)
+    n3<-round(small*npiece)
+    if ((n1+n2+n3)!=n) n3<-n3+1
+    a<-1
+    C<-0.97
+    Cell<-sqrt((1+C^2)/3)
+    aell <- Cell*sqrt(2)
+    transl <- 1.1
+    r <- 0.6
+    tmima1<-matrix(0,ncol=2,nrow=n1)
+    tmima2<-matrix(0,ncol=2,nrow=n2)
+    tmima3<-matrix(0,ncol=2,nrow=n3)
+    n1found <- 0
+    while(n1found < n1)
+    {
+        x1 <- runif(1,min=-sqrt(a^2+C^2),max=sqrt(a^2+C^2))
+        y1 <- runif(1,min=-transl-1,max=-transl+0.6)
+        if ((y1 < cassinib(x1,a,C)[2]-transl) &&
+            (y1 > cassinib(x1,aell,Cell)[1]-transl))
+        {
+            n1found <- n1found +1
+            tmima1[n1found,]<-c(x1,y1)
+        }
+    }
+    
+    n2found <- 0
+    while(n2found < n2)
+    {
+        x2 <- runif(1,min=-sqrt(a^2+C^2),max=sqrt(a^2+C^2))
+        y2 <- runif(1,max= transl+1,min=transl-0.6)
+        if ((y2 > cassinib(x2,a,C)[1]+transl) &&
+            (y2 < cassinib(x2,aell,Cell)[2]+transl))
+        {
+            n2found <- n2found +1
+            tmima2[n2found,]<-c(x2,y2)
+        }
+    }
+    
+    n3found <- 0
+    while(n3found < n3)
+    {
+        x3<-runif(1,min=-r,max=r)
+        y3<-runif(1,min=-r,max=r)
+        if ((y3 > -circle(x3,r)) &&
+            (y3 < circle(x3,r)))
+        {
+            n3found <- n3found +1
+            tmima3[n3found,]<-c(x3,y3) 
+        }
+    }
+    teliko <- rbind(tmima1,tmima2,tmima3)
+    cl <- factor(c(rep(1,n1),rep(2,n2),rep(3,n3)))
+    retval<-list(x=teliko,classes=cl)
+    class(retval) <- c("mlbench.cassini","mlbench")
+    retval
+}
+
+mlbench.cuboids <- function (n, relsize=c(2,2,2,1))
+{
+    big1 <- relsize[1]
+    big2 <- relsize[2]
+    big3 <- relsize[3]
+    small <- relsize[4]
+    parts<-big1+big2++big3+small
+    npiece<-n/parts
+    n1<-round(big1*npiece)
+    n2<-round(big2*npiece)
+    n3<-round(big3*npiece)
+    n4<-round(small*npiece)
+    if ((n1+n2+n3+n4)!=n) n4<-n4+1
+    
+    x1 <- cbind(runif(n1,min=0,max=1),runif(n1,min=0.75,max=1.0),runif(n1,min=0.75,max=1))
+    
+    x2 <- cbind(runif(n2,min=0.75,max=1.0),runif(n2,min=0,max=0.25),runif(n2,min=0,max=1))
+    
+    x3 <- cbind(runif(n3,min=0.0,max=0.25),runif(n3,min=0.0,max=1),runif(n3,min=0,max=0.25))
+    
+    x4 <- cbind(runif(n4,min=0.4,max=0.6),runif(n4,min=0.4,max=0.6),runif(n4,min=0.4,max=0.6))
+    
+    x<-rbind(x1,x2,x3,x4)
+    retval <-list(x=x,classes=factor(c(rep(1,n1),rep(2,n2),
+                      rep(3,n3),rep(4,n4))))
+    class(retval) <- c("mlbench.cuboids","mlbench")
+    return(retval)
+}
+
+
+mlbench.smiley <- function(n=500, sd1=.1, sd2=.05)
+{
+    n1 <- round(n/6)
+    n2 <- round(n/4)
+    n3 <- n - 2 * n1 - n2
+
+    x1 <- cbind(rnorm(n1, -.8, sd1), rnorm(n1, 1, sd1))
+    x2 <- cbind(rnorm(n1,  .8, sd1), rnorm(n1, 1, sd1))
+
+    x3 <- cbind(runif(n2, -.2, .2), runif(n2, 0, .75))
+    x3[,1] <- x3[,1]*(1-x3[,2])
+    
+    x4 <- runif(n3, -1, 1)
+    x4 <- cbind(x4, x4^2 - 1 + rnorm(n3, 0, sd2))
+
+    x <- 
+    retval <- list(x = rbind(x1, x2, x3, x4),
+                   classes=factor(c(rep(1,n1),rep(2,n1),rep(3,n2),rep(4,n3))))
+    class(retval) <- c("mlbench.smiley", "mlbench")
+    retval
+}
+    
+                           
+mlbench.shapes <- function(n=500)
+{
+    n1 <- round(n/4)
+    n2 <- n-3*n1
+    
+    x1 <- cbind(rnorm(n1, -1, .2), rnorm(n1, 1.5, .2))
+    x2 <- cbind(runif(n1, -1.5, -0.5), runif(n1, -2, 0))
+
+    x3 <- cbind(runif(n1, -1, 1), runif(n1, 1, 2))
+    x3[,1] <- x3[,1]*(2-x3[,2])+1
+    
+    x4 <- runif(n2, 0.5, 2)
+    x4 <- cbind(x4, cos(4*x4)-x4+runif(n2,-.2,.2))
+
+    retval <- list(x = rbind(x1, x2, x3, x4),
+                   classes=factor(c(rep(1,n1),rep(2,n1),rep(3,n1),rep(4,n2))))
+    class(retval) <- c("mlbench.shapes", "mlbench")
+    retval
+}
+
+###**********************************************************
+
+## Original ist bincombinations in e1071
+hypercube <- function(d) {
+
+  retval <- matrix(0, nrow=2^d, ncol=d)
+  
+  for(n in 1:d){
+    retval[,n] <- rep(c(rep(0, (2^d/2^n)), rep(1, (2^d/2^n))),
+                      length=2^d)
+  }
+  retval
+}
+
+
+
+mlbench.hypercube <- function(n=800, d=3, sides=rep(1,d), sd=0.1)
+{
+    m <- hypercube(d)
+    n1 <- round(n/2^d)
+    
+    sides <- rep(sides, length=d)
+    z <- NULL
+    
+    for(k in 1:nrow(m))
+    {
+        m[k,] <- m[k,]*sides
+        z1 <- matrix(rnorm(d*n1, sd=sd), ncol=d)
+        z1 <- sweep(z1, 2, m[k,], "+")
+        z <- rbind(z, z1)
+    }
+    retval <- list(x=z,
+                   classes=factor(rep(1:nrow(m), rep(n1, nrow(m)))))
+    class(retval) <- c("mlbench.hypercube", "mlbench")
+    retval
+}
+
+## for backwards compatibility
+mlbench.corners <- function(...) mlbench.hypercube(...)
+
+###**********************************************************
+
+simplex <- function(d, sides, center = TRUE)
+{
+    m <- matrix(0, d+1, d)
+    cent <- rep(0,d)
+
+    m[2,1] <- sides
+    cent[1] <- sides/2
+    b <- sides/2
+
+    if(d>=2)
+    {
+        for(i in 2:d)
+        {
+            m[i+1,] <- cent
+            m[i+1,i] <- sqrt(sides^2-b^2)
+            cent[i] <- 1/(i+1)* m[i+1,i]
+            b <- (1- 1/(i+1)) * m[i+1,i]
+        }
+    }
+    if(center)
+      m <- t(t(m) - cent)
+    m
+}
+
+mlbench.simplex <- function (n = 800, d = 3, sides = 1, sd = 0.1, center=TRUE)
+{
+    m <- simplex(d=d , sides=sides, center=center)
+    n1 <- round(n/2^d)
+    z <- NULL
+    for (k in 1:nrow(m)) {
+        z1 <- matrix(rnorm(d * n1, sd = sd), ncol = d)
+        z1 <- sweep(z1, 2, m[k, ], "+")
+        z <- rbind(z, z1)
+    }
+    retval <- list(x = z, classes = factor(rep(1:nrow(m), rep(n1,
+        nrow(m)))))
+    class(retval) <- c("mlbench.simplex", "mlbench")
+    retval
+}
+
+                           
+###**********************************************************
+                           
+
+bayesclass <- function(z) UseMethod("bayesclass")
+
+bayesclass.noerr <- function(z) z$classes
+
+bayesclass.mlbench.xor <- bayesclass.noerr
+bayesclass.mlbench.circle <- bayesclass.noerr
+bayesclass.mlbench.cassini <- bayesclass.noerr
+bayesclass.mlbench.cuboids <- bayesclass.noerr
+
+    
+bayesclass.mlbench.2dnormals <- function(z){
+
+    ncl <- length(levels(z$classes))
+    z <- z$x
+    for(k in 1:nrow(z)){
+        z[k,] <- z[k,] / sqrt(sum(z[k,]^2))
+    }        
+    winkel <- acos(z[,1] * sign(z[,2])) + pi * (z[,2]<0)
+    winkel <- winkel - pi/ncl - pi/4
+    winkel[winkel < 0] <- winkel[winkel<0] + 2*pi
+    retval <- (winkel)%/%(2 * pi/ncl)
+    factor((retval+1)%%ncl+1)
+}
+
+bayesclass.mlbench.ringnorm <- function (z)
+  {
+    z <- z$x
+    ndata <- dim(z)[1]
+    ndim <- dim(z)[2]
+    a <- 1/sqrt(ndim)
+    center1 <- rep(0,ndim)
+    center2 <- rep(a,ndim)
+    m1 <- mahalanobis(z, center1, (4*diag(ndim)), inverted=FALSE) +
+        ndim*log(4)
+    m2 <- mahalanobis(z, center2, diag(ndim), inverted=FALSE)
+    as.factor ((m1 > m2) +1)
+  }
+
+bayesclass.mlbench.twonorm <- function (z)
+  {
+    z <- z$x
+    ndata <- dim(z)[1]
+    bayesclass <- integer(ndata)
+    ndim <- dim(z)[2]
+    a <- 2/sqrt(ndim)
+    center1 <- rep(a,ndim)
+    center2 <- rep(-a,ndim)
+    for (i in 1:ndata)
+      {
+        dist1 <- sum((z[i, ] - center1) ^2)
+        dist2 <- sum((z[i, ] - center2) ^2)
+        bayesclass[i] <- (dist1 > dist2) +1
+      }
+    as.factor(bayesclass)
+  }
+
+## Code by Julia Schiffner
+bayesclass.mlbench.threenorm <- function(z)
+{
+    z <- z$x
+    ndim <- dim(z)[2]
+    a <- 2/sqrt(ndim)
+
+    center1a <- rep(a, ndim)
+    center1b <- rep(-a, ndim)
+    center2 <- rep(c(a, -a), ndim/2)
+    
+    if ((ndim%%2) == 1)
+        center2 <- c(center2, a)
+
+    m1 <- 0.5 * exp(-0.5 * mahalanobis(z, center1a, diag(ndim),
+                                       inverted = FALSE)) +
+          0.5 * exp(-0.5 * mahalanobis(z, center1b,
+                                       diag(ndim), inverted = FALSE))
+
+    m2 <- exp(-0.5 * mahalanobis(z, center2, diag(ndim), inverted = FALSE))
+    
+    as.factor((m1 < m2) + 1)
+}
+
+###**********************************************************
+
+as.data.frame.mlbench <- function(x, row.names=NULL, optional=FALSE, ...)
+{
+    data.frame(x=x$x, classes=x$classes)
+}
+
+
+plot.mlbench <- function(x, xlab="", ylab="", ...)
+{
+    if(ncol(x$x)>2){
+        pairs(x$x, col=as.integer(x$classes), ...)
+    }
+    else{
+        plot(x$x, col=as.integer(x$classes), xlab=xlab, ylab=ylab, ...)
+    }
+}        
+
+
+
+
diff --git a/R/mlbench-regression.R b/R/mlbench-regression.R
new file mode 100644
index 0000000..e90de12
--- /dev/null
+++ b/R/mlbench-regression.R
@@ -0,0 +1,78 @@
+#
+#  Copyright (C) 1997-2010 Friedrich Leisch
+#  $Id: mlbench-regression.R 4612 2010-10-08 09:51:20Z leisch $
+#
+
+mlbench.friedman1 <- function(n, sd=1){
+
+  x <- matrix(runif(10*n),ncol=10)
+
+  y <- 10 * sin(pi * x[,1] * x[,2])
+  y <- y + 20 * ( x[,3] - 0.5)^2 + 10 * x[,4] + 5 * x[,5]
+
+  if(sd>0){
+    y <- y + rnorm(n, sd=sd)
+  }
+
+  list(x=x, y=y)
+}
+
+mlbench.friedman2 <- function(n, sd=125){
+
+  x <- cbind(runif(n,min=0,max=100),
+	     runif(n,min=40*pi,max=560*pi),
+	     runif(n,min=0,max=1),
+	     runif(n,min=1,max=11))
+
+  y <- sqrt(x[,1]^2 + (x[,2]*x[,3] - 1/(x[,2]*x[,4]))^2)
+
+  if(sd>0){
+    y <- y + rnorm(n, sd=sd)
+  }
+
+  list(x=x, y=y)
+}
+
+mlbench.friedman3 <- function(n, sd=0.1){
+
+  x <- cbind(runif(n,min=0,max=100),
+	     runif(n,min=40*pi,max=560*pi),
+	     runif(n,min=0,max=1),
+	     runif(n,min=1,max=11))
+
+  y <- atan( (x[,2]*x[,3] - 1/(x[,2]*x[,4])) / x[,1] )
+
+  if(sd>0){
+    y <- y + rnorm(n, sd=sd)
+  }
+
+  list(x=x, y=y)
+}
+
+mlbench.peak <- function(n, d=20)
+  {
+    metro <- numeric(n)
+    y <- numeric(n)
+    x <- matrix(0, nrow=n, ncol=d)
+    for (ndata in 1:n)
+      {
+        radius <- runif(1, min=0, max=3)
+        x[ndata,] <- rnorm(d)
+        metro[ndata] <- sqrt(sum(x[ndata,]^2))
+        x[ndata,] <- radius * (x[ndata,]/metro[ndata])
+        y[ndata] <- 25 * exp(-0.5* radius^2)
+      }
+    list(x=x, y=y)
+  }
+
+
+
+
+
+
+          
+        
+
+
+
+
diff --git a/README b/README
new file mode 100644
index 0000000..0b397a5
--- /dev/null
+++ b/README
@@ -0,0 +1,11 @@
+This package contains a collection of real-world datasets and
+functions for creating artificial datasets that work as benchmarks for
+machine learning methods.
+
+Most datasets have been taken from the UCI repository at
+    ftp://ftp.ics.uci.edu/pub/machine-learning-databases
+    http://www.ics.uci.edu/~mlearn/MLRepository.html
+See the corresponding help files for original data sources. 
+
+Some conversions are done for smoother usage in R (like conversions to
+factors), all changes are recorded in file Data-Administration.R.
diff --git a/data/BostonHousing.rda b/data/BostonHousing.rda
new file mode 100644
index 0000000..73fe114
Binary files /dev/null and b/data/BostonHousing.rda differ
diff --git a/data/BostonHousing2.rda b/data/BostonHousing2.rda
new file mode 100644
index 0000000..6df4a37
Binary files /dev/null and b/data/BostonHousing2.rda differ
diff --git a/data/BreastCancer.rda b/data/BreastCancer.rda
new file mode 100644
index 0000000..7c6cc7f
Binary files /dev/null and b/data/BreastCancer.rda differ
diff --git a/data/DNA.rda b/data/DNA.rda
new file mode 100644
index 0000000..4774e9d
Binary files /dev/null and b/data/DNA.rda differ
diff --git a/data/Glass.rda b/data/Glass.rda
new file mode 100644
index 0000000..68986af
Binary files /dev/null and b/data/Glass.rda differ
diff --git a/data/HouseVotes84.rda b/data/HouseVotes84.rda
new file mode 100644
index 0000000..a38fef3
Binary files /dev/null and b/data/HouseVotes84.rda differ
diff --git a/data/Ionosphere.rda b/data/Ionosphere.rda
new file mode 100644
index 0000000..2799202
Binary files /dev/null and b/data/Ionosphere.rda differ
diff --git a/data/LetterRecognition.rda b/data/LetterRecognition.rda
new file mode 100644
index 0000000..755abdd
Binary files /dev/null and b/data/LetterRecognition.rda differ
diff --git a/data/Ozone.rda b/data/Ozone.rda
new file mode 100644
index 0000000..3811b79
Binary files /dev/null and b/data/Ozone.rda differ
diff --git a/data/PimaIndiansDiabetes.rda b/data/PimaIndiansDiabetes.rda
new file mode 100644
index 0000000..b3632f2
Binary files /dev/null and b/data/PimaIndiansDiabetes.rda differ
diff --git a/data/PimaIndiansDiabetes2.rda b/data/PimaIndiansDiabetes2.rda
new file mode 100644
index 0000000..8e0fe0c
Binary files /dev/null and b/data/PimaIndiansDiabetes2.rda differ
diff --git a/data/Satellite.rda b/data/Satellite.rda
new file mode 100644
index 0000000..88228a2
Binary files /dev/null and b/data/Satellite.rda differ
diff --git a/data/Servo.rda b/data/Servo.rda
new file mode 100644
index 0000000..6416be8
Binary files /dev/null and b/data/Servo.rda differ
diff --git a/data/Shuttle.rda b/data/Shuttle.rda
new file mode 100644
index 0000000..26da74f
Binary files /dev/null and b/data/Shuttle.rda differ
diff --git a/data/Sonar.rda b/data/Sonar.rda
new file mode 100644
index 0000000..660d834
Binary files /dev/null and b/data/Sonar.rda differ
diff --git a/data/Soybean.rda b/data/Soybean.rda
new file mode 100644
index 0000000..0c04d73
Binary files /dev/null and b/data/Soybean.rda differ
diff --git a/data/Vehicle.rda b/data/Vehicle.rda
new file mode 100644
index 0000000..04d8a6b
Binary files /dev/null and b/data/Vehicle.rda differ
diff --git a/data/Vowel.rda b/data/Vowel.rda
new file mode 100644
index 0000000..7c1718f
Binary files /dev/null and b/data/Vowel.rda differ
diff --git a/data/Zoo.rda b/data/Zoo.rda
new file mode 100644
index 0000000..306685f
Binary files /dev/null and b/data/Zoo.rda differ
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 49e36f1..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-r-cran-mlbench (2.1-1-1) unstable; urgency=low
-
-  * Initial release (Closes: #826861).
-
- -- Andreas Tille <tille at debian.org>  Thu, 09 Jun 2016 17:16:06 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index e59d2b2..0000000
--- a/debian/control
+++ /dev/null
@@ -1,23 +0,0 @@
-Source: r-cran-mlbench
-Maintainer: Debian Science Team <debian-science-maintainers at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: gnu-r
-Testsuite: autopkgtest
-Priority: optional
-Build-Depends: debhelper (>= 9),
-               cdbs,
-               r-base-dev (>= 3.0.0)
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-science/packages/R/r-cran-mlbench/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-science/packages/R/r-cran-mlbench/trunk/
-Homepage: https://cran.r-project.org/web/packages/mlbench(
-
-Package: r-cran-mlbench
-Architecture: any
-Depends: ${shlibs:Depends},
-         ${misc:Depends},
-         ${R:Depends}
-Description: GNU R Machine Learning Benchmark Problems
- This GNU R package provices a collection of artificial and real-world
- machine learning benchmark problems, including, e.g., several data sets
- from the UCI repository.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 7fbc7ea..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,31 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: mlbench
-Upstream-Contact: Friedrich Leisch <Friedrich.Leisch at R-project.org>
-Source: http://cran.r-project.org/web/packages/mlbench/
-
-Files: *
-Copyright: 2011-2016 Friedrich Leisch <Friedrich.Leisch at R-project.org>
-                     Evgenia Dimitriadou
-License: GPL-2
-
-Files: debian/*
-Copyright: 2016 Andreas Tille <tille at debian.org>
-License: GPL-2
-
-License: GPL-2
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License.
- .
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
- .
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- .
- Comment: On Debian systems, the complete text of the GNU General Public
- License can be found in `/usr/share/common-licenses/GPL-2'.
-
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 2fbba2d..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/make -f
-
-include /usr/share/R/debian/r-cran.mk
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 0c207f8..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=3
-http://cran.r-project.org/src/contrib/mlbench_([-\d.]*)\.tar\.gz
diff --git a/inst/CITATION b/inst/CITATION
new file mode 100644
index 0000000..a22f741
--- /dev/null
+++ b/inst/CITATION
@@ -0,0 +1,31 @@
+citHeader("To cite package mlbench in publications use:")
+
+## R >= 2.8.0 passes package metadata to citation().
+if(!exists("meta") || is.null(meta)) meta <- packageDescription("mlbench")
+year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date)
+vers <- paste("R package version", meta$Version)
+
+citEntry(entry="Manual",
+         title = "mlbench: Machine Learning Benchmark Problems",
+         author = "Friedrich Leisch and Evgenia Dimitriadou",
+         year = year,
+         note = vers,
+         
+         textVersion =
+         paste("Friedrich Leisch & Evgenia Dimitriadou (", year,
+               "). mlbench: Machine Learning Benchmark Problems. ",
+               vers, ".", sep=""))
+
+citEntry(entry="Misc",
+         header="To cite data sets from the UCI repository (as indicated in the help pages) use:",
+         author = "D.J. Newman, S. Hettich, C.L. Blake and C.J. Merz",
+         year = 1998,
+         title = "UCI Repository of machine learning databases",
+         url = "http://www.ics.uci.edu/~mlearn/MLRepository.html",
+         institution = "University of California, Irvine, Dept. of Information and Computer Sciences",
+         textVersion =
+         paste("Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).",
+               "UCI Repository of machine learning databases",
+               "[http://www.ics.uci.edu/~mlearn/MLRepository.html].",
+               "Irvine, CA: University of California,",
+               "Department of Information and Computer Science."))
diff --git a/man/BostonHousing.Rd b/man/BostonHousing.Rd
new file mode 100644
index 0000000..e393578
--- /dev/null
+++ b/man/BostonHousing.Rd
@@ -0,0 +1,83 @@
+\name{BostonHousing}
+\alias{BostonHousing}
+\alias{BostonHousing2}
+\title{Boston Housing Data}
+\usage{
+data(BostonHousing)
+data(BostonHousing2)
+}
+\description{Housing data for 506 census tracts of Boston from the 1970
+  census. The dataframe
+  \code{BostonHousing} contains the original data by Harrison and
+  Rubinfeld (1979), the dataframe \code{BostonHousing2} the corrected
+  version with additional spatial information (see references below).
+}
+\format{The original data are 506 observations on 14 variables,
+    \code{medv} being the target variable:
+    \tabular{ll}{
+      crim \tab per capita crime rate by town \cr
+      zn \tab proportion of residential land zoned for lots over 25,000 sq.ft \cr
+      indus \tab proportion of non-retail business acres per town \cr
+      chas \tab Charles River dummy variable (= 1 if tract bounds river; 0 otherwise) \cr
+      nox \tab nitric oxides concentration (parts per 10 million) \cr
+      rm \tab average number of rooms per dwelling \cr
+      age \tab proportion of owner-occupied units built prior to 1940 \cr
+      dis \tab weighted distances to five Boston employment centres \cr
+      rad \tab index of accessibility to radial highways \cr
+      tax \tab full-value property-tax rate per USD 10,000 \cr
+      ptratio \tab pupil-teacher ratio by town \cr
+      b \tab \eqn{1000(B - 0.63)^2} where \eqn{B} is the proportion of blacks by town\cr
+      lstat \tab percentage of lower status of the population \cr
+      medv \tab median value of owner-occupied homes in USD 1000's
+    }
+    The corrected data set has the following additional columns:
+    \tabular{ll}{
+      cmedv \tab corrected median value of owner-occupied homes in USD 1000's \cr
+      town \tab name of town \cr
+      tract \tab census tract \cr
+      lon \tab longitude of census tract \cr
+      lat \tab latitude of census tract \cr
+    }
+}
+\references{  
+  Harrison, D. and Rubinfeld, D.L. (1978).
+  Hedonic prices and the demand for clean air.
+  \emph{Journal of Environmental Economics and Management}, \bold{5},
+  81--102.
+
+  Gilley, O.W., and R. Kelley Pace (1996). On the Harrison and Rubinfeld  
+  Data. \emph{Journal of Environmental Economics and Management}, \bold{31},  
+  403--405. [Provided corrections and examined censoring.]
+
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+
+  Pace, R. Kelley, and O.W. Gilley (1997). Using the Spatial Configuration of  
+  the Data to Improve Estimation. \emph{Journal of the Real Estate Finance  
+  and Economics}, \bold{14}, 333--340. [Added georeferencing and spatial
+  estimation.]
+}  
+\source{
+  The original data have been taken from the UCI Repository Of Machine Learning
+  Databases at
+  \itemize{
+    \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html},
+  }
+  the corrected data have been taken from Statlib at
+  \itemize{
+    \item \url{http://lib.stat.cmu.edu/datasets/}
+  }
+  See Statlib and references there for details on the corrections.
+  Both were converted to R format by Friedrich Leisch.
+}
+\keyword{datasets}
+\examples{
+data(BostonHousing)
+summary(BostonHousing)
+
+data(BostonHousing2)
+summary(BostonHousing2)
+}
diff --git a/man/BreastCancer.Rd b/man/BreastCancer.Rd
new file mode 100644
index 0000000..375bf84
--- /dev/null
+++ b/man/BreastCancer.Rd
@@ -0,0 +1,86 @@
+\name{BreastCancer}
+\title{Wisconsin Breast Cancer Database}
+\usage{data(BreastCancer)}
+\alias{BreastCancer}
+\format{A data frame with 699 observations on 11 variables, one being a
+    character variable, 9 being ordered or nominal, and 1 target class.
+
+    \tabular{cll}{ 
+ [,1] \tab Id \tab Sample code number\cr
+ [,2] \tab Cl.thickness \tab Clump Thickness\cr
+ [,3] \tab Cell.size \tab Uniformity of Cell Size\cr
+ [,4] \tab Cell.shape \tab Uniformity of Cell Shape\cr
+ [,5] \tab Marg.adhesion  \tab Marginal Adhesion\cr
+ [,6] \tab Epith.c.size \tab Single Epithelial Cell Size\cr
+ [,7] \tab Bare.nuclei \tab Bare Nuclei\cr
+ [,8] \tab Bl.cromatin \tab Bland Chromatin\cr
+ [,9] \tab Normal.nucleoli \tab Normal Nucleoli\cr
+[,10] \tab Mitoses \tab Mitoses\cr
+[,11] \tab Class \tab Class
+}
+    }
+\description{
+    The objective is to identify each of a number of benign or malignant
+    classes. Samples arrive periodically as
+    Dr. Wolberg reports his clinical cases.
+    The database therefore reflects this chronological grouping of the
+    data.  This grouping information appears immediately below, having been
+    removed from the data itself.  Each variable except for the first was
+    converted into 11 primitive numerical attributes with values ranging
+    from 0 through 10.  There are 16 missing attribute values. See cited
+    below for more details.}
+\source{
+    \itemize{
+       	\item Creator: Dr. WIlliam H. Wolberg (physician); University of
+	Wisconsin Hospital ;Madison; Wisconsin; USA 
+        \item Donor: Olvi Mangasarian (mangasarian at cs.wisc.edu)
+	\item Received: David W. Aha (aha at cs.jhu.edu)
+    }
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+     }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+\references{   
+   1. Wolberg,W.H., \& Mangasarian,O.L. (1990). Multisurface method of 
+      pattern separation for medical diagnosis applied to breast cytology. In
+      Proceedings of the National Academy of Sciences, 87,
+      9193-9196.\cr
+      - Size of data set: only 369 instances (at that point in time)\cr
+      - Collected classification results: 1 trial only\cr
+      - Two pairs of parallel hyperplanes were found to be consistent with
+         50\% of the data\cr
+         - Accuracy on remaining 50\% of dataset: 93.5\%\cr
+      - Three pairs of parallel hyperplanes were found to be consistent with
+         67\% of data\cr
+         - Accuracy on remaining 33\% of dataset: 95.9\%
+
+   2. Zhang,J. (1992). Selecting typical instances in instance-based
+      learning.  In Proceedings of the Ninth International Machine
+      Learning Conference (pp. 470-479).  Aberdeen, Scotland: Morgan
+      Kaufmann.\cr
+      - Size of data set: only 369 instances (at that point in time)\cr
+      - Applied 4 instance-based learning algorithms\cr
+      - Collected classification results averaged over 10 trials\cr
+      - Best accuracy result: \cr
+         - 1-nearest neighbor: 93.7\%\cr
+         - trained on 200 instances, tested on the other 169\cr
+      - Also of interest:\cr
+         - Using only typical instances: 92.2\% (storing only 23.1 instances)\cr
+         - trained on 200 instances, tested on the other 169
+
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+}
+\keyword{datasets}
+\examples{
+data(BreastCancer)
+summary(BreastCancer)
+}
+    
diff --git a/man/DNA.Rd b/man/DNA.Rd
new file mode 100644
index 0000000..b7af715
--- /dev/null
+++ b/man/DNA.Rd
@@ -0,0 +1,80 @@
+\name{DNA}
+\title{Primate splice-junction gene sequences (DNA)} 
+\usage{data(DNA)}
+\alias{DNA}
+\format{A data frame with 3,186 observations on 180 variables, all
+nominal and a target class.}
+
+\description{It consists of 3,186 data points (splice junctions). The
+    data points are described by 180 indicator binary
+    variables and the problem is to recognize the 3 classes (ei, ie,
+    neither), i.e., the boundaries between exons (the parts of the DNA
+    sequence retained after splicing) and introns (the parts of the DNA
+    sequence that are spliced out).
+    
+    The StaLog dna dataset is a processed version of the Irvine 
+    database described below. The main difference is that the 
+    symbolic variables representing the nucleotides (only A,G,T,C) 
+    were replaced by 3 binary indicator variables. Thus the original 
+    60 symbolic attributes were changed into 180 binary attributes.  
+    The names of the examples were removed. The examples with 
+    ambiguities were removed (there was very few of them, 4).   
+    The StatLog version of this dataset was produced by Ross King
+    at Strathclyde University. For original details see the Irvine 
+    database documentation.
+
+    The nucleotides A,C,G,T were given indicator values as follows:
+    \tabular{cl}{
+    	\tab A -> 1 0 0\cr
+    	\tab C -> 0 1 0\cr
+    	\tab G -> 0 0 1\cr
+    	\tab T -> 0 0 0\cr
+    }
+    Hint. Much better performance is generally observed if attributes
+    closest to the junction are used. In the StatLog version, this
+    means using attributes A61 to A120 only.   
+}
+\source{
+    \itemize{
+       	\item Source:\cr
+  	- all examples taken from Genbank 64.1 (ftp site:
+	genbank.bio.net)\cr
+       	- categories "ei" and "ie" include every "split-gene" 
+        for primates in Genbank 64.1\cr
+       	- non-splice examples taken from sequences known not to include
+        a splicing site\cr
+   	\item Donor: G. Towell, M. Noordewier, and J. Shavlik, 
+        {towell,shavlik}@cs.wisc.edu, noordewi at cs.rutgers.edu
+    }
+    These data have been taken from: 
+    \itemize{
+    	\item ftp.stams.strath.ac.uk/pub/Statlog
+    	    }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+     machine learning:\cr
+       	-- M. O. Noordewier and G. G. Towell and J. W. Shavlik, 1991; 
+           "Training Knowledge-Based Neural Networks to Recognize Genes in 
+           DNA Sequences".  Advances in Neural Information Processing Systems,
+           volume 3, Morgan Kaufmann.
+
+	-- G. G. Towell and J. W. Shavlik and M. W. Craven, 1991;  
+           "Constructive Induction in Knowledge-Based Neural Networks",  
+           In Proceedings of the Eighth International Machine Learning
+	   Workshop, Morgan Kaufmann.
+
+        -- G. G. Towell, 1991;
+           "Symbolic Knowledge and Neural Networks: Insertion, Refinement, and
+           Extraction", PhD Thesis, University of Wisconsin - Madison.
+
+        -- G. G. Towell and J. W. Shavlik, 1992;
+           "Interpretation of Artificial Neural Networks: Mapping 
+           Knowledge-based Neural Networks into Rules", In Advances in Neural
+           Information Processing Systems, volume 4, Morgan Kaufmann.  
+}
+\keyword{datasets}
+\examples{
+data(DNA)
+summary(DNA)
+}
diff --git a/man/Glass.Rd b/man/Glass.Rd
new file mode 100644
index 0000000..bfdfb1d
--- /dev/null
+++ b/man/Glass.Rd
@@ -0,0 +1,55 @@
+\name{Glass}
+\alias{Glass}
+\title{Glass Identification Database}
+\usage{data(Glass)}
+\keyword{datasets}
+\description{A data frame with 214 observation containing examples of
+  the chemical analysis of 7 different types of glass. The problem is to
+  forecast the type of class on basis of the chemical analysis.  The
+  study of classification of types of glass was motivated by
+  criminological investigation.  At the scene of the crime, the glass left
+  can be used as evidence (if it is correctly identified!).
+}
+\format{
+    A data frame with 214 observations on 10 variables:
+    \tabular{cll}{
+ [,1] \tab RI \tab refractive index\cr
+ [,2] \tab Na \tab Sodium\cr
+ [,3] \tab Mg \tab Magnesium\cr
+ [,4] \tab Al \tab Aluminum\cr
+ [,5] \tab Si \tab Silicon\cr
+ [,6] \tab K  \tab Potassium\cr
+ [,7] \tab Ca \tab Calcium\cr
+ [,8] \tab Ba \tab Barium\cr
+ [,9] \tab Fe \tab Iron \cr
+[,10] \tab Type \tab Type of glass (class attribute) \cr 
+}
+}   
+\source{
+    \itemize{
+       	\item Creator: B. German, Central Research Establishment, Home
+	Office Forensic Science Service, Aldermaston, Reading, Berkshire
+	RG7 4PN 
+   	\item Donor: Vina Spiehler, Ph.D., DABFT, Diagnostic Products
+	Corporation
+    }
+    
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Friedrich Leisch.
+}
+\references{  
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+}
+\examples{
+data(Glass)
+summary(Glass)
+}
diff --git a/man/HouseVotes84.Rd b/man/HouseVotes84.Rd
new file mode 100644
index 0000000..c71cd9b
--- /dev/null
+++ b/man/HouseVotes84.Rd
@@ -0,0 +1,64 @@
+\name{HouseVotes84}
+\alias{HouseVotes84}
+\title{United States Congressional Voting Records 1984}
+\usage{data(HouseVotes84)}
+\description{
+    This data set includes votes for each of the U.S. House of
+    Representatives Congressmen on the 16 key votes identified by the
+    CQA.  The CQA lists nine different types of votes: voted for, paired
+    for, and announced for (these three simplified to yea), voted
+    against, paired against, and announced against (these three
+    simplified to nay), voted present, voted present to avoid conflict
+    of interest, and did not vote or otherwise make a position known
+    (these three simplified to an unknown disposition).
+}
+\keyword{datasets}
+\format{
+    A data frame with 435 observations on 17 variables:
+    \tabular{rl}{
+   1 \tab Class Name: 2 (democrat, republican)\cr
+   2 \tab handicapped-infants: 2 (y,n)\cr
+   3 \tab water-project-cost-sharing: 2 (y,n)\cr
+   4 \tab adoption-of-the-budget-resolution: 2 (y,n)\cr
+   5 \tab physician-fee-freeze: 2 (y,n)\cr
+   6 \tab el-salvador-aid: 2 (y,n)\cr
+   7 \tab religious-groups-in-schools: 2 (y,n)\cr
+   8 \tab anti-satellite-test-ban: 2 (y,n)\cr
+   9 \tab aid-to-nicaraguan-contras: 2 (y,n)\cr
+  10 \tab mx-missile: 2 (y,n)\cr
+  11 \tab immigration: 2 (y,n)\cr
+  12 \tab synfuels-corporation-cutback: 2 (y,n)\cr
+  13 \tab education-spending: 2 (y,n)\cr
+  14 \tab superfund-right-to-sue: 2 (y,n)\cr
+  15 \tab crime: 2 (y,n)\cr
+  16 \tab duty-free-exports: 2 (y,n)\cr
+  17 \tab export-administration-act-south-africa: 2 (y,n)\cr
+  }
+}
+\source{
+    \itemize{
+	\item Source: Congressional Quarterly Almanac, 98th Congress,
+	2nd session 1984, Volume XL: Congressional Quarterly Inc.,
+	ington, D.C., 1985
+	\item Donor: Jeff Schlimmer (Jeffrey.Schlimmer at a.gp.cs.cmu.edu)
+    }
+
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Friedrich Leisch.
+}
+\references{  
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+}
+\examples{
+data(HouseVotes84)
+summary(HouseVotes84)
+}
diff --git a/man/Ionosphere.Rd b/man/Ionosphere.Rd
new file mode 100644
index 0000000..7cb5edb
--- /dev/null
+++ b/man/Ionosphere.Rd
@@ -0,0 +1,74 @@
+\name{Ionosphere}
+\title{Johns Hopkins University Ionosphere database}
+\usage{data(Ionosphere)}
+\alias{Ionosphere}
+\format{A data frame with 351 observations on 35 independent variables, some 
+    numerical and 2 nominal, and one last defining the class.}
+
+\description{
+    This radar data was collected by a system in Goose Bay, Labrador.  This
+   system consists of a phased array of 16 high-frequency antennas with a
+   total transmitted power on the order of 6.4 kilowatts.  See the paper
+   for more details.  The targets were free electrons in the ionosphere.
+   "good" radar returns are those showing evidence of some type of structure 
+   in the ionosphere.  "bad" returns are those that do not; their signals pass
+   through the ionosphere.  
+
+   Received signals were processed using an autocorrelation function whose
+   arguments are the time of a pulse and the pulse number.  There were 17
+   pulse numbers for the Goose Bay system.  Instances in this databse are
+   described by 2 attributes per pulse number, corresponding to the complex
+   values returned by the function resulting from the complex electromagnetic
+   signal. See cited below for more details.}
+\source{
+    \itemize{
+       	\item Source: Space Physics Group; Applied Physics Laboratory;
+	Johns Hopkins University; Johns Hopkins Road; Laurel; MD 20723 
+        \item Donor: Vince Sigillito (vgs at aplcen.apl.jhu.edu)
+    }
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+   Sigillito, V. G., Wing, S. P., Hutton, L. V., \& Baker, K. B. (1989).
+      Classification of radar returns from the ionosphere using neural 
+      networks. Johns Hopkins APL Technical Digest, 10, 262-266.
+
+      They investigated using backprop and the perceptron training algorithm
+      on this database.  Using the first 200 instances for training, which
+      were carefully split almost 50\% positive and 50\% negative, they found
+      that a "linear" perceptron attained 90.7\%, a "non-linear" perceptron
+      attained 92\%, and backprop an average of over 96\% accuracy on the 
+      remaining 150 test instances, consisting of 123 "good" and only 24 "bad"
+      instances.  (There was a counting error or some mistake somewhere; there
+      are a total of 351 rather than 350 instances in this domain.) Accuracy
+      on "good" instances was much higher than for "bad" instances.  Backprop
+      was tested with several different numbers of hidden units (in [0,15])
+      and incremental results were also reported (corresponding to how well
+      the different variants of backprop did after a periodic number of 
+      epochs).
+
+      David Aha (aha at ics.uci.edu) briefly investigated this database.
+      He found that nearest neighbor attains an accuracy of 92.1\%, that
+      Ross Quinlan's C4 algorithm attains 94.0\% (no windowing), and that
+      IB3 (Aha \& Kibler, IJCAI-1989) attained 96.7\% (parameter settings:
+      70\% and 80\% for acceptance and dropping respectively).
+      
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+
+}
+\keyword{datasets}
+\examples{
+data(Ionosphere)
+summary(Ionosphere)
+}
+    
diff --git a/man/LetterRecognition.Rd b/man/LetterRecognition.Rd
new file mode 100644
index 0000000..95ca0bb
--- /dev/null
+++ b/man/LetterRecognition.Rd
@@ -0,0 +1,76 @@
+\name{LetterRecognition}
+\title{Letter Image Recognition Data}
+\usage{data(LetterRecognition)}
+\alias{LetterRecognition}
+\format{A data frame with 20,000 observations on 17 variables, the first
+    is a factor with levels A-Z, the remaining 16 are numeric.
+
+    \tabular{rll}{
+ [,1] \tab lettr \tab  capital letter\cr
+ [,2] \tab x.box \tab  horizontal position of box\cr
+ [,3] \tab y.box \tab  vertical position of box\cr
+ [,4] \tab width \tab  width of box\cr
+ [,5] \tab high  \tab  height of box\cr
+ [,6] \tab onpix \tab  total number of on pixels\cr
+ [,7] \tab x.bar \tab  mean x of on pixels in box\cr
+ [,8] \tab y.bar \tab  mean y of on pixels in box\cr
+ [,9] \tab x2bar \tab  mean x variance\cr
+[,10] \tab y2bar \tab  mean y variance\cr
+[,11] \tab xybar \tab  mean x y correlation\cr
+[,12] \tab x2ybr \tab  mean of \eqn{x^2 y} \cr
+[,13] \tab xy2br \tab  mean of \eqn{x y^2} \cr
+[,14] \tab x.ege \tab  mean edge count left to right\cr
+[,15] \tab xegvy \tab  correlation of x.ege with y\cr
+[,16] \tab y.ege \tab  mean edge count bottom to top\cr
+[,17] \tab yegvx \tab  correlation of y.ege with x\cr
+    }
+}
+\description{
+   The objective is to identify each of a large number of black-and-white
+   rectangular pixel displays as one of the 26 capital letters in the English
+   alphabet.  The character images were based on 20 different fonts and each
+   letter within these 20 fonts was randomly distorted to produce a file of
+   20,000 unique stimuli.  Each stimulus was converted into 16 primitive
+   numerical attributes (statistical moments and edge counts) which were then
+   scaled to fit into a range of integer values from 0 through 15.  We
+   typically train on the first 16000 items and then use the resulting model
+   to predict the letter category for the remaining 4000.  See the article
+   cited below for more details.
+}
+\source{
+    \itemize{
+       	\item Creator: David J. Slate
+     	\item Odesta Corporation; 1890 Maple Ave; Suite 115; Evanston, IL 60201
+   	\item Donor: David J. Slate (dave at math.nwu.edu) (708) 491-3867   
+    }
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Friedrich Leisch.
+}
+\references{
+    P. W. Frey and D. J. Slate (Machine Learning Vol 6/2 March 91):
+    "Letter Recognition Using Holland-style Adaptive Classifiers".
+
+    The research for this article investigated the ability of several
+    variations of Holland-style adaptive classifier systems to learn to
+    correctly guess the letter categories associated with vectors of 16
+    integer attributes extracted from raster scan images of the letters.
+    The best accuracy obtained was a little over 80\%.  It would be
+    interesting to see how well other methods do with the same data.
+
+    Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+    UCI Repository of machine learning databases
+    [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+    University of California, Department of Information and Computer
+    Science.
+}
+\keyword{datasets}
+\examples{
+data(LetterRecognition)
+summary(LetterRecognition)
+}
+    
diff --git a/man/Ozone.Rd b/man/Ozone.Rd
new file mode 100644
index 0000000..4fad4df
--- /dev/null
+++ b/man/Ozone.Rd
@@ -0,0 +1,38 @@
+\name{Ozone}
+\alias{Ozone}
+\title{Los Angeles ozone pollution data, 1976}
+\usage{data(Ozone)}
+\keyword{datasets}
+\description{A data frame with 366 observations on 13 variables, each
+  observation is one day}
+\format{
+    \tabular{rl}{
+   1 \tab Month: 1 = January, ..., 12 = December\cr
+   2 \tab Day of month\cr
+   3 \tab Day of week: 1 = Monday, ..., 7 = Sunday\cr
+   4 \tab Daily maximum one-hour-average ozone reading\cr
+   5 \tab 500 millibar pressure height (m) measured at Vandenberg AFB\cr
+   6 \tab Wind speed (mph) at Los Angeles International Airport (LAX)\cr
+   7 \tab Humidity (\%) at LAX\cr
+   8 \tab Temperature (degrees F) measured at Sandburg, CA\cr
+   9 \tab Temperature (degrees F) measured at El Monte, CA\cr
+  10 \tab Inversion base height (feet) at LAX\cr
+  11 \tab Pressure gradient (mm Hg) from LAX to Daggett, CA\cr
+  12 \tab Inversion base temperature (degrees F) at LAX\cr
+  13 \tab Visibility (miles) measured at LAX\cr
+  }
+}
+\details{
+The problem is to predict the daily maximum one-hour-average
+ozone reading (V4).
+}
+\source{
+    Leo Breiman, Department of Statistics, UC Berkeley.  Data used in
+    Leo Breiman and Jerome H. Friedman (1985), Estimating optimal
+    transformations for multiple regression and correlation, JASA, 80, pp.
+    580-598.
+}
+\examples{
+data(Ozone)
+summary(Ozone)
+}
diff --git a/man/PimaIndiansDiabetes.Rd b/man/PimaIndiansDiabetes.Rd
new file mode 100644
index 0000000..04f0057
--- /dev/null
+++ b/man/PimaIndiansDiabetes.Rd
@@ -0,0 +1,74 @@
+\name{PimaIndiansDiabetes}
+\alias{PimaIndiansDiabetes}
+\alias{PimaIndiansDiabetes2}
+\title{Pima Indians Diabetes Database}
+\usage{
+  data(PimaIndiansDiabetes)
+  data(PimaIndiansDiabetes2)
+}
+\keyword{datasets}
+\description{
+    A data frame with 768 observations on 9 variables.}
+\format{
+    \tabular{rl}{
+      pregnant \tab Number of times pregnant\cr
+      glucose \tab Plasma glucose concentration (glucose tolerance test)\cr
+      pressure \tab Diastolic blood pressure (mm Hg)\cr
+      triceps \tab Triceps skin fold thickness (mm)\cr
+      insulin \tab 2-Hour serum insulin (mu U/ml)\cr
+      mass \tab Body mass index (weight in kg/(height in m)\^2)\cr
+      pedigree \tab Diabetes pedigree function\cr
+      age \tab Age (years)\cr
+      diabetes \tab Class variable (test for diabetes)\cr
+  }
+}
+\details{The data set \code{PimaIndiansDiabetes2} contains a corrected
+  version of the original data set. While the UCI repository index
+  claims that there are no missing values, closer inspection of the data
+  shows several physical impossibilities, e.g., blood pressure or body
+  mass index of 0. In \code{PimaIndiansDiabetes2}, all zero values of
+  \code{glucose}, \code{pressure}, \code{triceps}, \code{insulin} and
+  \code{mass} have been set to \code{NA}, see also Wahba et al (1995)
+  and Ripley (1996).
+}
+\source{
+  \itemize{
+    \item Original owners: National Institute of Diabetes and Digestive and
+    Kidney Diseases
+    \item Donor of database: Vincent Sigillito
+    (vgs at aplcen.apl.jhu.edu)
+  }
+    
+  These data have been taken from the UCI Repository Of Machine Learning
+  Databases at
+  \itemize{
+    \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+    \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+  }
+  and were converted to R format by Friedrich Leisch.
+}
+\references{
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+
+  Brian D. Ripley (1996), Pattern Recognition and Neural Networks,
+  Cambridge University Press, Cambridge.
+    
+  Grace Whaba, Chong Gu, Yuedong Wang, and Richard Chappell (1995),
+  Soft Classification a.k.a. Risk Estimation via Penalized Log
+  Likelihood and Smoothing Spline Analysis of Variance, in D. H.
+  Wolpert (1995), The Mathematics of Generalization, 331-359,
+  Addison-Wesley, Reading, MA.
+}
+\examples{
+  data(PimaIndiansDiabetes)
+  summary(PimaIndiansDiabetes)
+
+  data(PimaIndiansDiabetes2)
+  summary(PimaIndiansDiabetes2)
+}
+
+    
diff --git a/man/Satellite.Rd b/man/Satellite.Rd
new file mode 100644
index 0000000..e2a0d10
--- /dev/null
+++ b/man/Satellite.Rd
@@ -0,0 +1,113 @@
+\name{Satellite}
+\alias{Satellite}
+\title{Landsat Multi-Spectral Scanner Image Data}
+\description{
+  The database consists of the multi-spectral values of pixels in 3x3
+  neighbourhoods in a satellite image, and the classification associated
+  with the central pixel in each neighbourhood.  The aim is to predict
+  this classification, given the multi-spectral values.
+}
+\usage{data(Satellite)}
+\format{
+  A data frame with 36 inputs (\code{x.1 \ldots x.36}) and one target
+  (\code{classes}).
+}
+\details{
+  One frame of Landsat MSS imagery consists of four digital images of
+  the same scene in different spectral bands.  Two of these are in the
+  visible region (corresponding approximately to green and red regions
+  of the visible spectrum) and two are in the (near) infra-red.  Each
+  pixel is a 8-bit binary word, with 0 corresponding to black and 255 to
+  white. The spatial resolution of a pixel is about 80m x 80m.  Each
+  image contains 2340 x 3380 such pixels.
+    
+  The database is a (tiny) sub-area of a scene, consisting of 82 x 100
+  pixels. Each line of data corresponds to a 3x3 square neighbourhood of
+  pixels completely contained within the 82x100 sub-area.  Each line
+  contains the pixel values in the four spectral bands (converted to
+  ASCII) of each of the 9 pixels in the 3x3 neighbourhood and a number
+  indicating the classification label of the central pixel.
+
+  The classes are
+  \tabular{l}{
+    red soil\cr
+    cotton crop\cr
+    grey soil\cr
+    damp grey soil\cr
+    soil with vegetation stubble\cr
+    very damp grey soil\cr
+  }
+
+  The data is given in random order and certain lines of data have been
+  removed so you cannot reconstruct the original image from this
+  dataset.
+	
+  In each line of data the four spectral values for the top-left pixel
+  are given first followed by the four spectral values for the
+  top-middle pixel and then those for the top-right pixel, and so on
+  with the pixels read out in sequence left-to-right and top-to-bottom.
+  Thus, the four spectral values for the central pixel are given by
+  attributes 17,18,19 and 20.  If you like you can use only these four
+  attributes, while ignoring the others.  This avoids the problem which
+  arises when a 3x3 neighbourhood straddles a boundary.
+}
+\section{Origin}{
+  The original Landsat data for this database was generated from data
+  purchased from NASA by the Australian Centre for Remote Sensing, and
+  used for research at: The Centre for Remote Sensing, University of New
+  South Wales, Kensington, PO Box 1, NSW 2033, Australia.
+
+  The sample database was generated taking a small section (82 rows and
+  100 columns) from the original data.  The binary values were converted
+  to their present ASCII form by Ashwin Srinivasan.  The classification
+  for each pixel was performed on the basis of an actual site visit by
+  Ms. Karen Hall, when working for Professor John A. Richards, at the
+  Centre for Remote Sensing at the University of New South Wales,
+  Australia. Conversion to 3x3 neighbourhoods and splitting into test
+  and training sets was done by Alistair Sutherland.
+}
+\section{History}{
+  The Landsat satellite data is one of the many sources of information
+  available for a scene. The interpretation of a scene by integrating
+  spatial data of diverse types and resolutions including multispectral
+  and radar data, maps indicating topography, land use etc. is expected
+  to assume significant importance with the onset of an era characterised
+  by integrative approaches to remote sensing (for example, NASA's Earth
+  Observing System commencing this decade). Existing statistical methods 
+  are ill-equipped for handling such diverse data types. Note that this
+  is not true for Landsat MSS data considered in isolation (as in
+  this sample database). This data satisfies the important requirements
+  of being numerical and at a single resolution, and standard
+  maximum-likelihood classification performs very well. Consequently,
+  for this data, it should be interesting to compare the performance
+  of other methods against the statistical approach.
+}
+\source{
+  Ashwin Srinivasan,
+  Department of Statistics and Data Modeling,
+  University of Strathclyde,
+  Glasgow,
+  Scotland,
+  UK,
+  \email{ross at uk.ac.turing}
+
+  These data have been taken from the UCI Repository Of Machine Learning
+  Databases at
+  \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+  }
+  and were converted to R format by Friedrich Leisch.
+}
+\keyword{datasets}
+\references{  
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+}
+\examples{
+data(Satellite)
+summary(Satellite)
+}
diff --git a/man/Servo.Rd b/man/Servo.Rd
new file mode 100644
index 0000000..3556a31
--- /dev/null
+++ b/man/Servo.Rd
@@ -0,0 +1,56 @@
+\name{Servo}
+\title{Servo Data}
+\usage{data(Servo)}
+\alias{Servo}
+\format{A data frame with 167 observations on 5 variables, 4 nominal and
+    1 as the target class.}
+
+\description{This data set is from a simulation of a servo system
+    involving a servo amplifier, a motor, a lead screw/nut, and a
+    sliding carriage of some sort. It may have been on of the
+    translational axes of a robot on the 9th floor of the AI lab. In any
+    case, the output value is almost certainly a rise time, or the time
+    required for the system to respond to a step change in a position
+    set point. The variables that describe the data set and their values
+    are the following:
+    
+    \tabular{cll}{
+	[,1] \tab Motor \tab A,B,C,D,E\cr
+	[,2] \tab Screw \tab A,B,C,D,E\cr
+    	[,3] \tab Pgain \tab 3,4,5,6\cr
+    	[,4] \tab Vgain \tab 1,2,3,4,5\cr
+    	[,5] \tab Class \tab 0.13 to 7.10
+    }
+}
+\source{
+    \itemize{
+       	\item Creator: Karl Ulrich (MIT) in 1986
+	\item Donor: Ross Quinlan 
+    }
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+    1. Quinlan, J.R., "Learning with continuous classes", Proc. 5th
+    Australian Joint Conference on AI (eds A. Adams and L. Sterling),
+    Singapore: World Scientific, 1992 
+    2. Quinlan, J.R., "Combining instance-based and model-based
+    learning", Proc. ML'93 (ed P.E. Utgoff), San Mateo: Morgan Kaufmann
+    1993
+
+    Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+    UCI Repository of machine learning databases
+    [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+    University of California, Department of Information and Computer
+    Science.
+}
+\keyword{datasets}
+\examples{
+data(Servo)
+summary(Servo)
+}
diff --git a/man/Shuttle.Rd b/man/Shuttle.Rd
new file mode 100644
index 0000000..8ddecb8
--- /dev/null
+++ b/man/Shuttle.Rd
@@ -0,0 +1,42 @@
+\name{Shuttle}
+\title{Shuttle Dataset (Statlog version)}
+\usage{data(Shuttle)}
+\alias{Shuttle}
+\format{A data frame with 58,000 observations on 9 numerical independent
+    variables and 1 target class.}
+
+\description{The shuttle dataset contains 9 attributes all of which are
+    numerical with the first one being time.  The last column is the class
+    with the following 7 levels: Rad.Flow, Fpv.Close, Fpv.Open, High, Bypass,
+    Bpv.Close, Bpv.Open.
+    
+    Approximately 80\% of the data belongs to class 1. Therefore the
+    default accuracy is about 80\%. The aim here is to obtain an
+    accuracy of 99 - 99.9\%.
+
+}
+\source{
+    \itemize{
+       	\item Source: Jason Catlett of Basser Department of Computer
+	Science; University of Sydney; N.S.W.; Australia.
+    }
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+\keyword{datasets}
+\references{  
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+}
+\examples{
+data(Shuttle)
+summary(Shuttle)
+}
diff --git a/man/Sonar.Rd b/man/Sonar.Rd
new file mode 100644
index 0000000..20fe344
--- /dev/null
+++ b/man/Sonar.Rd
@@ -0,0 +1,60 @@
+\name{Sonar}
+\title{Sonar, Mines vs. Rocks}
+\usage{data(Sonar)}
+\alias{Sonar}
+\format{A data frame with 208 observations on 61 variables, all numerical and one (the Class) nominal.}
+
+\description{This is the data set used by Gorman and Sejnowski in their
+    study of the classification of sonar signals using a neural network
+    [1]. The task is to train a network to discriminate between sonar
+    signals bounced off a metal cylinder and those bounced off a roughly
+    cylindrical rock.  
+    
+    Each pattern is a set of 60 numbers in the range 0.0 to 1.0. Each
+    number represents the energy within a particular frequency band,
+    integrated over a certain period of time. The integration aperture
+    for higher frequencies occur later in time, since these frequencies
+    are transmitted later during the chirp.
+    
+    The label associated with each record contains the letter "R" if the
+    object is a rock and "M" if it is a mine (metal cylinder). The
+    numbers in the labels are in increasing order of aspect angle, but
+    they do not encode the angle directly. 
+}
+
+\source{
+    \itemize{
+       	\item Contribution: Terry Sejnowski, Salk Institute and
+	University of California, San Deigo.
+	\item Development: R. Paul Gorman, Allied-Signal Aerospace
+	Technology Center. 
+	\item Maintainer: Scott E. Fahlman 
+	
+    }
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+
+\references{
+  Gorman, R. P., and Sejnowski, T. J. (1988). "Analysis of Hidden
+  Units in a Layered Network Trained to Classify Sonar Targets" in
+  Neural Networks, Vol. 1, pp. 75-89.
+
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+}
+\keyword{datasets}
+\examples{
+data(Sonar)
+summary(Sonar)
+}
+    
+
diff --git a/man/Soybean.Rd b/man/Soybean.Rd
new file mode 100644
index 0000000..d07efb3
--- /dev/null
+++ b/man/Soybean.Rd
@@ -0,0 +1,106 @@
+\name{Soybean}
+\title{Soybean Database}
+\usage{data(Soybean)}
+\alias{Soybean}
+\format{A data frame with 683 observations on 36 variables. There are 35
+    categorical attributes, all numerical and a nominal denoting the
+    class.
+    \tabular{cll}{
+	[,1] \tab Class \tab the 19 classes\cr
+	[,2] \tab date \tab
+	apr(0),may(1),june(2),july(3),aug(4),sept(5),oct(6).\cr
+	[,3] \tab plant.stand \tab normal(0),lt-normal(1).\cr
+    	[,4] \tab precip \tab lt-norm(0),norm(1),gt-norm(2).\cr
+    	[,5] \tab temp \tab lt-norm(0),norm(1),gt-norm(2).\cr
+    	[,6] \tab hail \tab yes(0),no(1).\cr
+    	[,7] \tab crop.hist \tab dif-lst-yr(0),s-l-y(1),s-l-2-y(2),
+	s-l-7-y(3).\cr
+    	[,8] \tab area.dam \tab
+	scatter(0),low-area(1),upper-ar(2),whole-field(3).\cr
+    	[,9] \tab sever \tab minor(0),pot-severe(1),severe(2).\cr
+    	[,10] \tab seed.tmt \tab none(0),fungicide(1),other(2).\cr
+   	[,11] \tab germ \tab 90-100\%(0),80-89\%(1),lt-80\%(2).\cr
+   	[,12] \tab plant.growth \tab norm(0),abnorm(1).\cr
+   	[,13] \tab leaves \tab norm(0),abnorm(1).\cr
+   	[,14] \tab leaf.halo \tab
+	absent(0),yellow-halos(1),no-yellow-halos(2).\cr
+   	[,15] \tab leaf.marg \tab w-s-marg(0),no-w-s-marg(1),dna(2).\cr
+   	[,16] \tab leaf.size \tab lt-1/8(0),gt-1/8(1),dna(2).\cr
+   	[,17] \tab leaf.shread \tab absent(0),present(1).\cr
+   	[,18] \tab leaf.malf \tab absent(0),present(1).\cr
+   	[,19] \tab leaf.mild \tab absent(0),upper-surf(1),lower-surf(2).\cr
+   	[,20] \tab stem \tab norm(0),abnorm(1).\cr
+   	[,21] \tab lodging \tab	yes(0),no(1).\cr
+   	[,22] \tab stem.cankers \tab
+	absent(0),below-soil(1),above-s(2),ab-sec-nde(3).\cr
+   	[,23] \tab canker.lesion \tab dna(0),brown(1),dk-brown-blk(2),tan(3).\cr
+   	[,24] \tab fruiting.bodies \tab absent(0),present(1).\cr
+   	[,25] \tab ext.decay \tab absent(0),firm-and-dry(1),watery(2).\cr
+   	[,26] \tab mycelium \tab absent(0),present(1).\cr
+   	[,27] \tab int.discolor \tab none(0),brown(1),black(2).\cr
+   	[,28] \tab sclerotia \tab absent(0),present(1).\cr
+   	[,29] \tab fruit.pods \tab norm(0),diseased(1),few-present(2),dna(3).\cr
+   	[,30] \tab fruit.spots \tab
+	absent(0),col(1),br-w/blk-speck(2),distort(3),dna(4).\cr
+   	[,31] \tab seed \tab norm(0),abnorm(1).\cr
+   	[,32] \tab mold.growth \tab absent(0),present(1).\cr
+   	[,33] \tab seed.discolor \tab absent(0),present(1).\cr
+   	[,34] \tab seed.size \tab norm(0),lt-norm(1).\cr
+   	[,35] \tab shriveling \tab absent(0),present(1).\cr
+   	[,36] \tab roots \tab norm(0),rotted(1),galls-cysts(2).
+
+}
+    }
+
+\description{
+    There are 19 classes, only the first 15 of which have been used in prior
+    work.  The folklore seems to be that the last four classes are
+    unjustified by the data since they have so few examples.
+    There are 35 categorical attributes, some nominal and some ordered.  The
+    value ``dna'' means does not apply.  The values for attributes are
+    encoded numerically, with the first value encoded as ``0,'' the second as
+    ``1,'' and so forth. 
+  }
+\source{
+    \itemize{
+       	\item Source: R.S. Michalski and R.L. Chilausky "Learning by
+	Being Told and Learning from Examples: An Experimental
+	Comparison of the Two Methods of Knowledge Acquisition in the
+	Context of Developing an Expert System for Soybean Disease
+	Diagnosis", International Journal of Policy Analysis and
+	Information Systems, Vol. 4, No. 2, 1980.
+        \item Donor: Ming Tan & Jeff Schlimmer (Jeff.Schlimmer\%cs.cmu.edu)
+    }
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+    Tan, M., & Eshelman, L. (1988). Using weighted networks to represent
+    classification knowledge in noisy domains.  Proceedings of the Fifth
+    International Conference on Machine Learning (pp. 121-134). Ann Arbor,
+    Michigan: Morgan Kaufmann.
+    -- IWN recorded a 97.1\% classification accuracy 
+    -- 290 training and 340 test instances
+	    
+    Fisher,D.H. & Schlimmer,J.C. (1988). Concept Simplification and
+    Predictive Accuracy. Proceedings of the Fifth
+    International Conference on Machine Learning (pp. 22-28). Ann Arbor,
+    Michigan: Morgan Kaufmann.
+    -- Notes why this database is highly predictable
+
+    Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+    UCI Repository of machine learning databases
+    [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+    University of California, Department of Information and Computer
+    Science.    
+}
+\keyword{datasets}
+\examples{
+data(Soybean)
+summary(Soybean)
+}    
diff --git a/man/Vehicle.Rd b/man/Vehicle.Rd
new file mode 100644
index 0000000..b0f5991
--- /dev/null
+++ b/man/Vehicle.Rd
@@ -0,0 +1,80 @@
+\name{Vehicle}
+\alias{Vehicle}
+\title{Vehicle Silhouettes}
+\usage{data(Vehicle)}
+
+\keyword{datasets}
+\format{
+    A data frame with 846 observations on 19 variables, all numerical
+    and one nominal defining the class of the objects.
+    
+    \tabular{cll}{
+   [,1] \tab Comp \tab Compactness\cr
+   [,2] \tab Circ \tab Circularity\cr
+   [,3] \tab D.Circ \tab Distance Circularity\cr
+   [,4] \tab Rad.Ra \tab Radius ratio\cr
+   [,5] \tab Pr.Axis.Ra \tab pr.axis aspect ratio\cr
+   [,6] \tab Max.L.Ra \tab max.length aspect ratio\cr
+   [,7] \tab Scat.Ra \tab scatter ratio\cr
+   [,8] \tab Elong \tab elongatedness\cr
+   [,9] \tab Pr.Axis.Rect \tab pr.axis rectangularity\cr
+  [,10] \tab Max.L.Rect \tab max.length rectangularity\cr
+  [,11] \tab Sc.Var.Maxis \tab scaled variance along major axis\cr
+  [,12] \tab Sc.Var.maxis \tab scaled variance along minor axis\cr
+  [,13] \tab Ra.Gyr \tab scaled radius of gyration\cr
+  [,14] \tab Skew.Maxis \tab skewness about major axis\cr
+  [,15] \tab Skew.maxis \tab skewness about minor axis\cr
+  [,16] \tab Kurt.maxis \tab kurtosis about minor axis\cr
+  [,17] \tab Kurt.Maxis \tab kurtosis about major axis\cr
+  [,18] \tab Holl.Ra \tab hollows ratio\cr
+  [,19] \tab Class \tab type
+  }
+}
+\description{
+    The purpose is to classify a given silhouette as one of four types
+    of vehicle, using a set of features extracted from the
+    silhouette. The vehicle may be viewed from one of many different
+    angles. The features were extracted from the silhouettes by the HIPS
+    (Hierarchical Image Processing System) extension BINATTS, which
+    extracts a combination of scale independent features utilising both
+    classical moments based measures such as scaled variance, skewness
+    and kurtosis about the major/minor axes and heuristic measures such
+    as hollows, circularity, rectangularity and compactness. 
+    
+    Four "Corgie" model vehicles were used for the experiment: a double
+    decker bus, Cheverolet van, Saab 9000 and an Opel Manta 400. This
+    particular combination of vehicles was chosen with the expectation
+    that the bus, van and either one of the cars would be readily
+    distinguishable, but it would be more difficult to distinguish
+    between the cars. 
+}
+\source{
+    \itemize{
+	\item Creator: Drs.Pete Mowforth and Barry Shepherd, Turing
+	Institute, Glasgow, Scotland.   
+    }
+
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+
+\references{
+    Turing Institute Research Memorandum TIRM-87-018 "Vehicle
+    Recognition Using Rule Based Methods" by Siebert,JP (March 1987)
+
+    Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+    UCI Repository of machine learning databases
+    [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+    University of California, Department of Information and Computer
+    Science.
+    }
+
+    \examples{
+data(Vehicle)
+summary(Vehicle)
+}
diff --git a/man/Vowel.Rd b/man/Vowel.Rd
new file mode 100644
index 0000000..6263ee9
--- /dev/null
+++ b/man/Vowel.Rd
@@ -0,0 +1,53 @@
+\name{Vowel}
+\alias{Vowel}
+\title{Vowel Recognition (Deterding data)}
+\usage{data(Vowel)}
+\keyword{datasets}
+\format{
+    A data frame with 990 observations on 10 independent variables, one
+    nominal and the other numerical, and 1 as the target class.}
+   
+\description{Speaker independent recognition of the eleven steady state
+    vowels of British English using a specified training set of lpc
+    derived log area ratios. The vowels are indexed by integers
+    0-10. For each utterance, there are ten floating-point input values,
+    with array indices 0-9. The vowels are the following: hid, hId, hEd,
+    hAd, hYd, had, hOd, hod, hUd, hud, hed. 
+}
+\source{
+    \itemize{
+	\item Creator: Tony Robinson 
+	\item Maintainer: Scott E. Fahlman, CMU
+    }
+    
+    These data have been taken from the UCI Repository Of Machine Learning
+    Databases at
+    \itemize{
+      \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+      \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+    }
+    and were converted to R format by Evgenia Dimitriadou.
+}
+
+\references{
+    D. H. Deterding, 1989, University of Cambridge, "Speaker
+    Normalisation for Automatic Speech Recognition", submitted for PhD.
+    
+    M. Niranjan and F. Fallside, 1988, Cambridge University Engineering
+    Department, "Neural Networks and Radial Basis Functions in
+    Classifying Static Speech Patterns", CUED/F-INFENG/TR.22.
+    
+    Steve Renals and Richard Rohwer, "Phoneme Classification Experiments
+    Using Radial Basis Functions", Submitted to the International Joint
+    Conference on Neural Networks, Washington, 1989.
+
+    Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+    UCI Repository of machine learning databases
+    [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+    University of California, Department of Information and Computer
+    Science.
+}
+\examples{
+data(Vowel)
+summary(Vowel)
+}
diff --git a/man/Zoo.Rd b/man/Zoo.Rd
new file mode 100644
index 0000000..6b06cf5
--- /dev/null
+++ b/man/Zoo.Rd
@@ -0,0 +1,48 @@
+\name{Zoo}
+\alias{Zoo}
+\title{Zoo Data}
+\usage{
+data(Zoo)
+}
+\description{A simple dataset containing 17 (mostly logical) variables
+  on 101 animals.}
+\format{A data frame with 17 columns: hair, feathers, eggs, milk,
+  airborne, aquatic, predator, toothed, backbone, breathes, venomous,
+  fins, legs, tail, domestic, catsize, type.
+
+  Most variables are logical and indicate whether the corresponding
+  animal has the corresponsing characteristic or not. The only 2
+  exceptions are: \code{legs} takes
+  values 0, 2, 4, 5, 6, and 8. \code{type} is a grouping of the animals
+  into 7 groups, see the example section for the detailed list.
+}
+\details{
+  Ask the original donor of the data why \emph{girl} is an animal.
+}
+\references{
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+}  
+\source{
+  The original data have been donated by Richard S. Forsyth to the UCI
+  Repository Of Machine Learning
+  Databases at
+  \itemize{
+    \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}.
+  }
+  and were converted to R format by Friedrich Leisch.
+}
+\keyword{datasets}
+\examples{
+data(Zoo)
+summary(Zoo)
+
+## see the annimals grouped by type
+tapply(rownames(Zoo), Zoo$type, function(x) x)
+
+## which animals have fins?
+rownames(Zoo)[Zoo$fins]
+}
diff --git a/man/as.data.frame.mlbench.Rd b/man/as.data.frame.mlbench.Rd
new file mode 100644
index 0000000..e8ba7b4
--- /dev/null
+++ b/man/as.data.frame.mlbench.Rd
@@ -0,0 +1,19 @@
+\name{as.data.frame.mlbench}
+\alias{as.data.frame.mlbench}
+\title{Convert an mlbench object to a dataframe}
+\description{
+  Converts \code{x} (which is basically a list) to a dataframe.
+}
+\usage{
+\method{as.data.frame}{mlbench}(x, row.names=NULL, optional=FALSE, \dots)
+}
+\arguments{
+  \item{x}{Object of class \code{"mlbench"}.}
+  \item{row.names,optional,\dots}{currently ignored.}
+}
+\examples{
+p <- mlbench.xor(5)
+p
+as.data.frame(p)
+}
+\keyword{manip}
diff --git a/man/bayesclass.Rd b/man/bayesclass.Rd
new file mode 100644
index 0000000..5a0bdb2
--- /dev/null
+++ b/man/bayesclass.Rd
@@ -0,0 +1,39 @@
+\name{bayesclass}
+\alias{bayesclass}
+\alias{bayesclass.noerr}
+\alias{bayesclass.mlbench.2dnormals}
+\alias{bayesclass.mlbench.circle}
+\alias{bayesclass.mlbench.xor}
+\alias{bayesclass.mlbench.cassini}
+\alias{bayesclass.mlbench.cuboids}
+\alias{bayesclass.mlbench.twonorm}
+\alias{bayesclass.mlbench.threenorm}
+\alias{bayesclass.mlbench.ringnorm}
+
+\title{Bayes classifier}
+\usage{
+bayesclass(z)
+}
+\arguments{
+ \item{z}{An object of class \code{"mlbench"}.}
+}
+\description{
+    Returns the decision of the (optimal) Bayes classifier for a given
+    data set. This is a generic function, i.e., there are different
+    methods for the various mlbench problems.
+
+    If the classes of the problem do not overlap, then the Bayes
+    decision is identical to the true classification, which is
+    implemented as the dummy function \code{bayesclass.noerr} (which
+    simply returns \code{z$classes} and is used for all problems with
+    disjunct classes).
+}
+\examples{
+# 6 overlapping classes
+p <- mlbench.2dnormals(500,6)
+plot(p)
+
+plot(p$x, col=as.numeric(bayesclass(p)))
+}
+
+\keyword{classif}
diff --git a/man/mlbench.2dnormals.Rd b/man/mlbench.2dnormals.Rd
new file mode 100644
index 0000000..3f9407f
--- /dev/null
+++ b/man/mlbench.2dnormals.Rd
@@ -0,0 +1,30 @@
+\name{mlbench.2dnormals}
+\alias{mlbench.2dnormals}
+\title{2-dimensional Gaussian Problem}
+\usage{
+mlbench.2dnormals(n, cl=2, r=sqrt(cl), sd=1)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{cl}{number of classes}
+    \item{r}{radius at which the centers of the classes are located}
+    \item{sd}{standard deviation of the Gaussians}
+}
+\value{Returns an object of class \code{"bayes.2dnormals"} with components
+    \item{x}{input values}
+    \item{classes}{factor vector of length \code{n} with target classes} 
+}
+\description{
+    Each of the \code{cl} classes consists of a 2-dimensional
+    Gaussian. The centers are equally spaced on a circle around the
+    origin with radius \code{r}.
+}
+\examples{
+# 2 classes
+p <- mlbench.2dnormals(500,2)
+plot(p)
+# 6 classes
+p <- mlbench.2dnormals(500,6)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.cassini.Rd b/man/mlbench.cassini.Rd
new file mode 100644
index 0000000..57dc977
--- /dev/null
+++ b/man/mlbench.cassini.Rd
@@ -0,0 +1,28 @@
+\name{mlbench.cassini}
+\alias{mlbench.cassini}
+\title{Cassini: A 2 Dimensional Problem}
+\usage{
+mlbench.cassini(n, relsize=c(2,2,1))
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{relsize}{relative size of the classes (vector of length 3)}
+}
+\value{Returns an object of class \code{"mlbench.cassini"}  with components
+    \item{x}{input values}
+    \item{classes}{vector of length \code{n} with target classes} 
+}
+\description{
+    The inputs of the cassini problem are uniformly distributed on
+    a \code{2}-dimensional space within 3 structures. The 2 external
+    structures (classes) are banana-shaped structures and in between them, the
+    middle structure (class) is a circle.
+}
+
+\author{Evgenia Dimitriadou and Andreas Weingessel}
+
+\examples{
+p <- mlbench.cassini(5000)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.circle.Rd b/man/mlbench.circle.Rd
new file mode 100644
index 0000000..5de02ea
--- /dev/null
+++ b/man/mlbench.circle.Rd
@@ -0,0 +1,32 @@
+\name{mlbench.circle}
+\alias{mlbench.circle}
+\title{Circle in a Square Problem}
+\usage{
+mlbench.circle(n, d=2)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{d}{dimension of the circle problem}
+}
+\value{Returns an object of class \code{"mlbench.circle"}  with components
+    \item{x}{input values}
+    \item{classes}{factor vector of length \code{n} with target classes} 
+}
+\description{
+    The inputs of the circle problem are uniformly distributed on
+    the \code{d}-dimensional cube with corners \eqn{\{\pm 1\}}{\{+-1\}}. 
+    This is a 2-class problem: The first class is a \code{d}-dimensional
+    ball in the middle of the cube, the remainder forms the second
+    class. The size of the ball is chosen such that both classes have equal
+    prior probability 0.5.
+}
+\examples{
+# 2d example
+p<-mlbench.circle(300,2)
+plot(p)
+#
+# 3d example
+p<-mlbench.circle(300,3)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.cuboids.Rd b/man/mlbench.cuboids.Rd
new file mode 100644
index 0000000..e5a09fa
--- /dev/null
+++ b/man/mlbench.cuboids.Rd
@@ -0,0 +1,32 @@
+\name{mlbench.cuboids}
+\alias{mlbench.cuboids}
+\title{Cuboids: A 3 Dimensional Problem}
+\usage{
+mlbench.cuboids(n, relsize=c(2,2,2,1))
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{relsize}{relative size of the classes (vector of length 4)}
+}
+\value{Returns an object of class \code{"mlbench.cuboids"}  with components
+    \item{x}{input values}
+    \item{classes}{vector of length \code{n} with target classes} 
+}
+\description{
+    The inputs of the cuboids problem are uniformly distributed on
+    a \code{3}-dimensional space within 3 cuboids and a small
+    cube in the middle of them. 
+}
+
+\author{Evgenia Dimitriadou, and Andreas Weingessel}
+
+\examples{
+p <- mlbench.cuboids(7000)
+plot(p)
+\dontrun{
+library(Rggobi)
+g <- ggobi(p$x)
+g$setColors(p$class)
+g$setMode("2D Tour")
+}}
+\keyword{datagen}
diff --git a/man/mlbench.friedman1.Rd b/man/mlbench.friedman1.Rd
new file mode 100644
index 0000000..1c44fd4
--- /dev/null
+++ b/man/mlbench.friedman1.Rd
@@ -0,0 +1,34 @@
+\name{mlbench.friedman1}
+\alias{mlbench.friedman1}
+\title{Benchmark Problem Friedman 1}
+\usage{
+mlbench.friedman1(n, sd=1)
+}
+\arguments{
+\item{n}{number of patterns to create}
+\item{sd}{Standard deviation of noise}
+}
+\description{
+The regression problem Friedman 1 as described in Friedman (1991) and
+Breiman (1996). Inputs are 10 independent variables uniformly
+distributed on the interval \eqn{[0,1]}, only 5 out of these 10 are actually
+used. Outputs are created according to
+the formula
+\deqn{y = 10 \sin(\pi x1 x2) + 20 (x3 - 0.5)^2 + 10 x4 + 5 x5 + e}{
+  y = 10 sin(\pi x1 x2) + 20 (x3 - 0.5)^2
+  + 10 x4 + 5 x5 + e}
+
+where e is N(0,sd).
+}
+\value{Returns a list with components
+\item{x}{input values (independent variables)}
+\item{y}{output values (dependent variable)}
+}
+\references{
+Breiman, Leo (1996) Bagging predictors. Machine Learning 24, pages
+123-140.
+
+Friedman, Jerome H. (1991) Multivariate adaptive regression
+splines. The Annals of Statistics 19 (1), pages 1-67. 
+}
+\keyword{datagen}
diff --git a/man/mlbench.friedman2.Rd b/man/mlbench.friedman2.Rd
new file mode 100644
index 0000000..9d23d62
--- /dev/null
+++ b/man/mlbench.friedman2.Rd
@@ -0,0 +1,39 @@
+\name{mlbench.friedman2}
+\alias{mlbench.friedman2}
+\title{Benchmark Problem Friedman 2}
+\usage{
+mlbench.friedman2(n, sd=125)
+}
+\arguments{
+\item{n}{number of patterns to create}
+\item{sd}{Standard deviation of noise. The default value of 125 gives
+a signal to noise ratio (i.e., the ratio of the standard deviations) of
+3:1. Thus, the variance of the function itself (without noise)
+accounts for 90\% of the total variance.}
+}
+\description{
+The regression problem Friedman 2 as described in Friedman (1991) and
+Breiman (1996). Inputs are 4 independent variables uniformly
+distrtibuted over the ranges
+\deqn{0 \le x1 \le 100}
+\deqn{40 \pi \le x2 \le 560 \pi}
+\deqn{0 \le x3 \le 1}
+\deqn{1 \le x4 \le 11}
+
+The outputs are created according to the formula
+\deqn{y = (x1^2 + (x2 x3 - (1/(x2 x4)))^2)^{0.5} + e}
+where e is N(0,sd).
+}
+\value{Returns a list with components
+\item{x}{input values (independent variables)}
+\item{y}{output values (dependent variable)}
+}
+\references{
+Breiman, Leo (1996) Bagging predictors. Machine Learning 24, pages
+123-140.
+
+Friedman, Jerome H. (1991) Multivariate adaptive regression
+splines. The Annals of Statistics 19 (1), pages 1-67. 
+}
+\keyword{datagen}
+
diff --git a/man/mlbench.friedman3.Rd b/man/mlbench.friedman3.Rd
new file mode 100644
index 0000000..07e6285
--- /dev/null
+++ b/man/mlbench.friedman3.Rd
@@ -0,0 +1,40 @@
+\name{mlbench.friedman3}
+\alias{mlbench.friedman3}
+\title{Benchmark Problem Friedman 3}
+\usage{
+mlbench.friedman3(n, sd=0.1)
+}
+\arguments{
+\item{n}{number of patterns to create}
+\item{sd}{Standard deviation of noise. The default value of 0.1 gives
+a signal to noise ratio (i.e., the ratio of the standard deviations) of
+3:1. Thus, the variance of the function itself (without noise)
+accounts for 90\% of the total variance.}
+}
+\description{
+The regression problem Friedman 3 as described in Friedman (1991) and
+Breiman (1996). Inputs are 4 independent variables uniformly
+distrtibuted over the ranges
+\deqn{0 \le x1 \le 100}
+\deqn{40 \pi \le x2 \le 560 \pi}
+\deqn{0 \le x3 \le 1}
+\deqn{1 \le x4 \le 11}
+
+The outputs are created according to the formula
+\deqn{y = \mbox{atan}((x2 x3 - (1/(x2 x4)))/x1) + e}{
+  y = atan ((x2 x3 - (1/(x2 x4)))/x1) + e}
+
+where e is N(0,sd).
+}
+\value{Returns a list with components
+\item{x}{input values (independent variables)}
+\item{y}{output values (dependent variable)}
+}
+\references{
+Breiman, Leo (1996) Bagging predictors. Machine Learning 24, pages
+123-140.
+
+Friedman, Jerome H. (1991) Multivariate adaptive regression
+splines. The Annals of Statistics 19 (1), pages 1-67. 
+}
+\keyword{datagen}
diff --git a/man/mlbench.hypercube.Rd b/man/mlbench.hypercube.Rd
new file mode 100644
index 0000000..57e841b
--- /dev/null
+++ b/man/mlbench.hypercube.Rd
@@ -0,0 +1,33 @@
+\name{mlbench.hypercube}
+\alias{mlbench.corners}
+\alias{mlbench.hypercube}
+\alias{hypercube}
+\title{Corners of Hypercube}
+\usage{
+mlbench.hypercube(n=800, d=3, sides=rep(1,d), sd=0.1)
+hypercube(d)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{d}{dimensionality of hypercube, default is 3}
+    \item{sides}{lengths of the sides of the hypercube, default is to
+      create a unit hypercube}
+    \item{sd}{standard deviation}
+}
+\value{Returns an object of class \code{"mlbench.hypercube"}  with components
+    \item{x}{input values}
+    \item{classes}{factor of length \code{n} with target classes} }
+\description{
+    The created data are \code{d}-dimensional spherical Gaussians with standard
+    deviation \code{sd} and means at the corners of a
+    \code{d}-dimensional hypercube. The number of classes is \eqn{2^d}.
+    
+}
+\examples{
+p <- mlbench.hypercube()
+plot(p)
+
+library("lattice")
+cloud(x.3~x.1+x.2, groups=classes, data=as.data.frame(p))
+}
+\keyword{datagen}
diff --git a/man/mlbench.peak.Rd b/man/mlbench.peak.Rd
new file mode 100644
index 0000000..8e06aed
--- /dev/null
+++ b/man/mlbench.peak.Rd
@@ -0,0 +1,23 @@
+\name{mlbench.peak}
+\alias{mlbench.peak}
+\title{Peak Benchmark Problem}
+\usage{
+mlbench.peak(n, d=20)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{d}{dimension of the problem}
+}
+\description{
+    Let \eqn{r=3u} where u is uniform on
+    [0,1]. Take x to be uniformly distributed on the d-dimensional
+    sphere of radius r. Let \eqn{y=25exp(-.5r^2)}. This data set is not a
+    classification problem but a regression problem where y is the
+    dependent variable.
+}
+\value{Returns a list with components
+\item{x}{input values (independent variables)}
+\item{y}{output values (dependent variable)}
+}
+
+\keyword{datagen}
diff --git a/man/mlbench.ringnorm.Rd b/man/mlbench.ringnorm.Rd
new file mode 100644
index 0000000..fada12a
--- /dev/null
+++ b/man/mlbench.ringnorm.Rd
@@ -0,0 +1,31 @@
+\name{mlbench.ringnorm}
+\alias{mlbench.ringnorm}
+\title{Ringnorm Benchmark Problem}
+\usage{
+mlbench.ringnorm(n, d=20)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{d}{dimension of the ringnorm problem}
+}
+\value{Returns an object of class \code{"mlbench.ringnorm"} with components
+    \item{x}{input values}
+    \item{classes}{factor vector of length \code{n} with target classes} 
+}
+\description{
+    The inputs of the ringnorm problem are points from two Gaussian
+    distributions. Class 1 is multivariate normal with mean 0 and
+    covariance 4 times the identity matrix. Class 2 has unit covariance
+    and mean \eqn{(a,a,\ldots,a)}, \eqn{a=d^{-0.5}}.
+
+}
+\references{
+    Breiman, L. (1996). Bias, variance, and arcing classifiers.
+    Tech. Rep. 460, Statistics Department, University of California,
+    Berkeley, CA, USA.
+}
+\examples{
+p<-mlbench.ringnorm(1000, d=2)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.shapes.Rd b/man/mlbench.shapes.Rd
new file mode 100644
index 0000000..60a1913
--- /dev/null
+++ b/man/mlbench.shapes.Rd
@@ -0,0 +1,19 @@
+\name{mlbench.shapes}
+\alias{mlbench.shapes}
+\title{Shapes in 2d}
+\usage{
+mlbench.shapes(n=500)
+}
+\arguments{
+  \item{n}{number of patterns to create}
+}
+\value{Returns an object of class \code{"mlbench.shapes"}  with components
+  \item{x}{input values}
+  \item{classes}{factor of length \code{n} with target classes} 
+}
+\description{A Gaussian, square, triangle and wave in 2 dimensions.}
+\examples{
+p<-mlbench.shapes()
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.simplex.Rd b/man/mlbench.simplex.Rd
new file mode 100644
index 0000000..2fde33d
--- /dev/null
+++ b/man/mlbench.simplex.Rd
@@ -0,0 +1,37 @@
+\name{mlbench.simplex}
+\alias{mlbench.simplex}
+\alias{simplex}
+\title{Corners of d-dimensional Simplex}
+\usage{
+mlbench.simplex(n = 800, d = 3, sides = 1, sd = 0.1, center=TRUE)
+simplex(d, sides, center=TRUE)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{d}{dimensionality of simplex, default is 3}
+    \item{sides}{lengths of the sides of the simplex, default is to
+      create a unit simplex}
+    \item{sd}{standard deviation}
+    \item{center}{If \code{TRUE}, the origin is the center of gravity of
+      the simplex. If \code{FALSE}, the origin is a corner of the
+      simplex and all coordinates of the simplex are positive.}
+}
+\value{Returns an object of class \code{"mlbench.simplex"}  with components
+    \item{x}{input values}
+    \item{classes}{factor of length \code{n} with target classes} }
+\description{
+    The created data are \code{d}-dimensional spherical Gaussians with standard
+    deviation \code{sd} and means at the corners of a
+    \code{d}-dimensional simplex. The number of classes is \code{d+1}.
+}    
+\author{
+  Manuel Eugster and Sebastian Kaiser 
+  }
+\examples{
+p <- mlbench.simplex()
+plot(p)
+
+library("lattice")
+cloud(x.3~x.1+x.2, groups=classes, data=as.data.frame(p))
+}
+\keyword{datagen}
diff --git a/man/mlbench.smiley.Rd b/man/mlbench.smiley.Rd
new file mode 100644
index 0000000..4083429
--- /dev/null
+++ b/man/mlbench.smiley.Rd
@@ -0,0 +1,24 @@
+\name{mlbench.smiley}
+\alias{mlbench.smiley}
+\title{The Smiley}
+\usage{
+mlbench.smiley(n=500, sd1 = 0.1, sd2 = 0.05)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{sd1}{standard deviation for eyes}
+    \item{sd2}{standard deviation for mouth}
+}
+\value{Returns an object of class \code{"mlbench.smiley"}  with components
+    \item{x}{input values}
+    \item{classes}{factor vector of length \code{n} with target classes} 
+}
+\description{
+    The smiley consists of 2 Gaussian eyes, a trapezoid nose and a
+    parabula mouth (with vertical Gaussian noise).
+}
+\examples{
+p<-mlbench.smiley()
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.spirals.Rd b/man/mlbench.spirals.Rd
new file mode 100644
index 0000000..55f99c6
--- /dev/null
+++ b/man/mlbench.spirals.Rd
@@ -0,0 +1,32 @@
+\name{mlbench.spirals}
+\alias{mlbench.spirals}
+\alias{mlbench.1spiral}
+\title{Two Spirals Benchmark Problem}
+\usage{
+mlbench.spirals(n, cycles=1, sd=0)
+mlbench.1spiral(n, cycles=1, sd=0)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{cycles}{the number of cycles each spiral makes}
+    \item{sd}{standard deviation of data points around the spirals}
+}
+\value{Returns an object of class \code{"mlbench.spirals"} with components
+    \item{x}{input values}
+    \item{classes}{factor vector of length \code{n} with target classes} 
+}
+\description{
+    The inputs of the spirals problem are points on two entangled spirals. If
+    \code{sd>0}, then Gaussian noise is added to each data
+    point. \code{mlbench.1spiral} creates a single spiral.
+}
+\examples{
+# 1 cycle each, no noise
+p<-mlbench.spirals(300)
+plot(p)
+#
+# 1.5 cycles each, with noise
+p<-mlbench.spirals(300,1.5,0.05)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.threenorm.Rd b/man/mlbench.threenorm.Rd
new file mode 100644
index 0000000..9fe00d2
--- /dev/null
+++ b/man/mlbench.threenorm.Rd
@@ -0,0 +1,33 @@
+\name{mlbench.threenorm}
+\alias{mlbench.threenorm}
+\title{Threenorm Benchmark Problem}
+\usage{
+mlbench.threenorm(n, d=20)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{d}{dimension of the threenorm problem}
+}
+\value{Returns an object of class \code{"mlbench.threenorm"} with components
+    \item{x}{input values}
+    \item{classes}{factor vector of length \code{n} with target classes} 
+}
+\description{
+    The inputs of the threenorm problem are points from two Gaussian
+    distributions with unit covariance matrix. Class 1 is drawn with
+    equal probability from a unit multivariate normal with mean
+    \eqn{(a,a,\ldots,a)} and from a unit multivariate normal with mean 
+    \eqn{(-a,-a,\ldots,-a)}. Class 2 is drawn from a multivariate normal
+    with mean at \eqn{(a,-a,a, \ldots,-a)}, \eqn{a=2/d^{0.5}}. 
+
+}
+\references{
+    Breiman, L. (1996). Bias, variance, and arcing classifiers.
+    Tech. Rep. 460, Statistics Department, University of California,
+    Berkeley, CA, USA.
+}
+\examples{
+p<-mlbench.threenorm(1000, d=2)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.twonorm.Rd b/man/mlbench.twonorm.Rd
new file mode 100644
index 0000000..77e9784
--- /dev/null
+++ b/man/mlbench.twonorm.Rd
@@ -0,0 +1,31 @@
+\name{mlbench.twonorm}
+\alias{mlbench.twonorm}
+\title{Twonorm Benchmark Problem}
+\usage{
+mlbench.twonorm(n, d=20)
+}
+\arguments{
+    \item{n}{number of patterns to create}
+    \item{d}{dimension of the twonorm problem}
+}
+\value{Returns an object of class \code{"mlbench.twonorm"} with components
+    \item{x}{input values}
+    \item{classes}{factor vector of length \code{n} with target classes} 
+}
+\description{
+    The inputs of the twonorm problem are points from two Gaussian
+    distributions with unit covariance matrix. Class 1 is multivariate
+    normal with mean \eqn{(a,a,\ldots,a)} and class 2 with mean
+    \eqn{(-a,-a,\ldots,-a)}, \eqn{a=2/d^{0.5}}. 
+
+}
+\references{
+    Breiman, L. (1996). Bias, variance, and arcing classifiers.
+    Tech. Rep. 460, Statistics Department, University of California,
+    Berkeley, CA, USA.
+}
+\examples{
+p<-mlbench.twonorm(1000, d=2)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.waveform.Rd b/man/mlbench.waveform.Rd
new file mode 100644
index 0000000..5a57c8f
--- /dev/null
+++ b/man/mlbench.waveform.Rd
@@ -0,0 +1,52 @@
+\name{mlbench.waveform}
+\alias{mlbench.waveform}
+\title{Waveform Database Generator}
+\usage{
+  mlbench.waveform(n)
+}
+\arguments{
+  \item{n}{number of patterns to create}
+}
+
+\value{
+  Returns an object of class \code{"mlbench.waveform"} with components
+  \item{x}{input values}
+  \item{classes}{factor vector of length \code{n} with target classes}
+}
+
+\description{
+    The generated data set consists of 21 attributes with continuous
+    values and a variable showing the 3 classes (33\% for each of 3
+    classes). Each class is generated from a combination of 2 of 3
+    "base" waves. 
+  }
+\source{
+  The original C code for the waveform generator hase been taken
+  from the UCI Repository
+  of Machine Learning Databases at
+  \itemize{
+    \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+    \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+  }
+  The C code has been modified to use R's random number generator
+  by Friedrich Leisch, who also wrote the R interface.
+}
+  
+\references{
+  Breiman, L. (1996). Bias, variance, and arcing
+  classifiers. Tech. Rep. 460, Statistics Department, University of
+  California, Berkeley, CA, USA.
+
+  Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+  UCI Repository of machine learning databases
+  [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+  University of California, Department of Information and Computer
+  Science.
+}
+
+\examples{
+  p<-mlbench.waveform(100)
+  plot(p)
+}
+
+\keyword{datagen}
diff --git a/man/mlbench.xor.Rd b/man/mlbench.xor.Rd
new file mode 100644
index 0000000..b3f57d6
--- /dev/null
+++ b/man/mlbench.xor.Rd
@@ -0,0 +1,30 @@
+\name{mlbench.xor}
+\alias{mlbench.xor}
+\title{Continuous XOR Benchmark Problem}
+\usage{
+mlbench.xor(n, d=2)
+}
+\arguments{
+\item{n}{number of patterns to create}
+\item{d}{dimension of the XOR problem}
+}
+\value{Returns an object of class \code{"mlbench.xor"} with components
+\item{x}{input values}
+\item{classes}{factor vector of length \code{n} with target classes} 
+}
+\description{
+    The inputs of the XOR problem are uniformly distributed on
+    the \code{d}-dimensional cube with corners \eqn{\{\pm 1\}}{\{+-1\}}. Each pair of
+    opposite corners form one class, hence the total number of classes is
+    \eqn{2^(d-1)}
+}
+\examples{
+# 2d example
+p<-mlbench.xor(300,2)
+plot(p)
+#
+# 3d example
+p<-mlbench.xor(300,3)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/plot.mlbench.Rd b/man/plot.mlbench.Rd
new file mode 100644
index 0000000..19532c1
--- /dev/null
+++ b/man/plot.mlbench.Rd
@@ -0,0 +1,28 @@
+\name{plot.mlbench}
+\alias{plot.mlbench}
+\title{Plot mlbench objects}
+\usage{
+\S3method{plot}{mlbench}(x, xlab="", ylab="", ...)
+}
+\arguments{
+ \item{x}{Object of class \code{"mlbench"}.}
+ \item{xlab}{Label for x-axis.}
+ \item{ylab}{Label for y-axis.}
+ \item{\dots}{Further plotting options.}
+}
+\description{
+    Plots the data of an mlbench object using different colors for each
+    class. If the dimension of the input space is larger that 2, a
+    scatter plot matrix is used.
+}
+\examples{
+# 6 normal classes
+p <- mlbench.2dnormals(500,6)
+plot(p)
+
+# 4-dimensiona XOR
+p <- mlbench.xor(500,4)
+plot(p)
+}
+
+\keyword{hplot}
diff --git a/src/waveform.c b/src/waveform.c
new file mode 100644
index 0000000..d70bb70
--- /dev/null
+++ b/src/waveform.c
@@ -0,0 +1,126 @@
+/* =====================================================================
+   David Aha
+   August 1988
+   Creates waveform domain data
+   Usage: create-waveform number num-attributes
+   See CART book, page 49 for details
+   This is for the 21-attribute problem.
+
+   Requires use of the UNIXSTAT tool named "probdist".
+
+   modified by Friedrich Leisch on 2000/12/11 to use R's random number
+   generator
+   ===================================================================== */
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <R_ext/Random.h>
+
+#define NUMBER_OF_ATTRIBUTES 21
+#define NUMBER_OF_CLASSES 3
+
+int num_instances;
+double h[NUMBER_OF_CLASSES][NUMBER_OF_ATTRIBUTES];
+
+/* =====================================================================
+   Main Function
+   ===================================================================== */
+void waveform(int *R_num_instances, double *x, int *type)
+{
+   void execute(double *x, int *type);
+   void initialize();
+
+   num_instances = *R_num_instances;
+
+   GetRNGstate();
+   initialize();
+   execute(x, type);
+   PutRNGstate();
+}
+
+/* =====================================================================
+   Initializes the algorithm.
+   ==================================================================== */
+void initialize()
+{
+   int i,j;
+
+   /*==== Setup for waveform of types 1 through 3 ====*/
+   for(i=0; i<3; i++)
+      for(j=0; j<21; j++)
+	h[i][j] = 0.0;
+
+   /*==== Waveform 1 ====*/
+   for(i=1; i<=6; i++)
+     h[0][i] = (double)i;
+   j=1;
+   for(i=11; i>=7; i--)
+     { h[0][i] = (double)j;
+       j++;
+     }
+
+   /*==== Waveform 2 ====*/
+   j = 1;
+   for(i=9; i<=14; i++)
+     { h[1][i] = (double)j;
+       j++;
+     }
+   j=1;
+   for(i=19; i>=15; i--)
+     { h[1][i] = (double)j;
+       j++;
+     }
+
+   /*==== Waveform 3 ====*/
+   j = 1;
+   for(i=5; i<=10; i++)
+     { h[2][i] = (double)j;
+       j++;
+     }
+   j=1;
+   for(i=15; i>=11; i--)
+     { h[2][i] = (double)j;
+       j++;
+     }
+
+}
+   
+/* =====================================================================
+   Executes the algorithm.
+   ===================================================================== */
+void execute(double *x, int *type)
+{
+    int num_instance, num_attribute;
+    int waveform_type, choice[2];
+    double random_attribute_value, multiplier[2];
+
+    
+    for(num_instance=0; num_instance<num_instances; num_instance++)
+    {  /*==== Set up class type ====*/
+	waveform_type = floor(3*unif_rand());
+	switch (waveform_type)
+	{ case 0: choice[0] = 0; choice[1] = 1; break;
+	case 1: choice[0] = 0; choice[1] = 2; break;
+	case 2: choice[0] = 1; choice[1] = 2; break;
+	}
+	
+	/*==== Set up u and (1-u) for this call ====*/
+	multiplier[0] = unif_rand();
+	multiplier[1] = 1.0 - multiplier[0];
+	
+	/*==== Create the instance ====*/
+	for(num_attribute=0; num_attribute<NUMBER_OF_ATTRIBUTES;
+	    num_attribute++)
+	{
+	    random_attribute_value = norm_rand();
+	    /*==== Calculate the value ====*/
+	    x[num_instance*NUMBER_OF_ATTRIBUTES + num_attribute] =
+		(multiplier[0] * h[choice[0]][num_attribute]) +
+		(multiplier[1] * h[choice[1]][num_attribute]) +
+		random_attribute_value;
+	}
+
+	type[num_instance] = waveform_type;	
+    }
+}
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-mlbench.git



More information about the debian-science-commits mailing list