[r-cran-mlbench] 03/05: New upstream version 2.1-1
Andreas Tille
tille at debian.org
Sat Oct 21 14:44:18 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-mlbench.
commit 4ac351d8cc1437f94bfe0f62cdb23ff95666f814
Author: Andreas Tille <tille at debian.org>
Date: Sat Oct 21 16:40:03 2017 +0200
New upstream version 2.1-1
---
DESCRIPTION | 16 ++
Data-Administration.R | 93 ++++++++
MD5 | 66 ++++++
NAMESPACE | 18 ++
NEWS | 102 +++++++++
R/mlbench-class.R | 500 ++++++++++++++++++++++++++++++++++++++++++
R/mlbench-regression.R | 78 +++++++
README | 11 +
data/BostonHousing.rda | Bin 0 -> 12932 bytes
data/BostonHousing2.rda | Bin 0 -> 16660 bytes
data/BreastCancer.rda | Bin 0 -> 5460 bytes
data/DNA.rda | Bin 0 -> 81208 bytes
data/Glass.rda | Bin 0 -> 4144 bytes
data/HouseVotes84.rda | Bin 0 -> 2264 bytes
data/Ionosphere.rda | Bin 0 -> 34524 bytes
data/LetterRecognition.rda | Bin 0 -> 162580 bytes
data/Ozone.rda | Bin 0 -> 5224 bytes
data/PimaIndiansDiabetes.rda | Bin 0 -> 8424 bytes
data/PimaIndiansDiabetes2.rda | Bin 0 -> 8284 bytes
data/Satellite.rda | Bin 0 -> 109096 bytes
data/Servo.rda | Bin 0 -> 1060 bytes
data/Shuttle.rda | Bin 0 -> 371492 bytes
data/Sonar.rda | Bin 0 -> 26152 bytes
data/Soybean.rda | Bin 0 -> 4732 bytes
data/Vehicle.rda | Bin 0 -> 15220 bytes
data/Vowel.rda | Bin 0 -> 17800 bytes
data/Zoo.rda | Bin 0 -> 1268 bytes
debian/changelog | 5 -
debian/compat | 1 -
debian/control | 23 --
debian/copyright | 31 ---
debian/rules | 3 -
debian/source/format | 1 -
debian/watch | 2 -
inst/CITATION | 31 +++
man/BostonHousing.Rd | 83 +++++++
man/BreastCancer.Rd | 86 ++++++++
man/DNA.Rd | 80 +++++++
man/Glass.Rd | 55 +++++
man/HouseVotes84.Rd | 64 ++++++
man/Ionosphere.Rd | 74 +++++++
man/LetterRecognition.Rd | 76 +++++++
man/Ozone.Rd | 38 ++++
man/PimaIndiansDiabetes.Rd | 74 +++++++
man/Satellite.Rd | 113 ++++++++++
man/Servo.Rd | 56 +++++
man/Shuttle.Rd | 42 ++++
man/Sonar.Rd | 60 +++++
man/Soybean.Rd | 106 +++++++++
man/Vehicle.Rd | 80 +++++++
man/Vowel.Rd | 53 +++++
man/Zoo.Rd | 48 ++++
man/as.data.frame.mlbench.Rd | 19 ++
man/bayesclass.Rd | 39 ++++
man/mlbench.2dnormals.Rd | 30 +++
man/mlbench.cassini.Rd | 28 +++
man/mlbench.circle.Rd | 32 +++
man/mlbench.cuboids.Rd | 32 +++
man/mlbench.friedman1.Rd | 34 +++
man/mlbench.friedman2.Rd | 39 ++++
man/mlbench.friedman3.Rd | 40 ++++
man/mlbench.hypercube.Rd | 33 +++
man/mlbench.peak.Rd | 23 ++
man/mlbench.ringnorm.Rd | 31 +++
man/mlbench.shapes.Rd | 19 ++
man/mlbench.simplex.Rd | 37 ++++
man/mlbench.smiley.Rd | 24 ++
man/mlbench.spirals.Rd | 32 +++
man/mlbench.threenorm.Rd | 33 +++
man/mlbench.twonorm.Rd | 31 +++
man/mlbench.waveform.Rd | 52 +++++
man/mlbench.xor.Rd | 30 +++
man/plot.mlbench.Rd | 28 +++
src/waveform.c | 126 +++++++++++
74 files changed, 2895 insertions(+), 66 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..c2023d5
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,16 @@
+Package: mlbench
+Version: 2.1-1
+Title: Machine Learning Benchmark Problems
+Date: 2010-12-10
+Author: Friedrich Leisch and Evgenia Dimitriadou.
+Maintainer: Friedrich Leisch <Friedrich.Leisch at R-project.org>
+Description: A collection of artificial and real-world machine learning
+ benchmark problems, including, e.g., several data sets from the
+ UCI repository.
+Depends: R (>= 2.10)
+License: GPL-2
+Suggests: lattice
+ZipData: No
+Packaged: 2012-07-10 08:19:15 UTC; leisch
+Repository: CRAN
+Date/Publication: 2012-07-10 11:51:32
diff --git a/Data-Administration.R b/Data-Administration.R
new file mode 100644
index 0000000..9bd247b
--- /dev/null
+++ b/Data-Administration.R
@@ -0,0 +1,93 @@
+## This file keeps record of reading the data into R and
+## transformations (if any) that have been applied. All
+## transformations are indicated in the respective help pages.
+
+###**********************************************************
+
+LetterRecognition <- scan(file="LetterRecognition.data")
+
+LetterRecognition <- matrix(LetterRecognition,ncol=17,byrow=TRUE)
+LetterRecognition <- as.data.frame(LetterRecognition)
+colnames(LetterRecognition) <-
+ c("lettr", "x.box", "y.box", "width", "high", "onpix", "x.bar",
+ "y.bar", "x2bar", "y2bar", "xybar", "x2ybr", "xy2br", "x.ege",
+ "xegvy", "y.ege", "yegvx")
+LetterRecognition$lettr <- factor(LetterRecognition$lettr,
+ labels=LETTERS)
+
+save(LetterRecognition, file="data/LetterRecognition.rda")
+
+
+###**********************************************************
+
+### PimaIndiansDiabetes2
+
+load("data/PimaIndiansDiabetes.rda")
+PimaIndiansDiabetes2 = PimaIndiansDiabetes
+
+for(n in c("glucose", "pressure","triceps", "insulin", "mass")){
+ PimaIndiansDiabetes2[[n]][PimaIndiansDiabetes[[n]]==0] <- NA
+}
+
+save(PimaIndiansDiabetes2, file="data/PimaIndiansDiabetes2.rda")
+
+###**********************************************************
+
+Satellite <- scan("Satellite.data")
+
+Satellite <- matrix(Satellite,ncol=37,byrow=TRUE)
+Satellite <- data.frame(x=Satellite[,1:36], classes=factor(Satellite[,37]))
+levels(Satellite$classes) <- c("red soil",
+ "cotton crop",
+ "grey soil",
+ "damp grey soil",
+ "vegetation stubble",
+ "very damp grey soil")
+
+save(Satellite, file="data/Satellite.rda")
+
+###**********************************************************
+
+### Zoo
+
+## download zoo.data from UCI repository (2007-02-02)
+## edit zoo.data from UCI repository: two rows have name "frog"
+## -> frog.1 and frog.2
+
+Zoo <- read.csv("zoo.data", header=FALSE, row.names=1)
+
+colnames(Zoo) <- c("hair",
+ "feathers",
+ "eggs",
+ "milk",
+ "airborne",
+ "aquatic",
+ "predator",
+ "toothed",
+ "backbone",
+ "breathes",
+ "venomous",
+ "fins",
+ "legs",
+ "tail",
+ "domestic",
+ "catsize",
+ "type")
+
+Zoo[,1:12] <- lapply(Zoo[,1:12], as.logical)
+Zoo[,14:16] <- lapply(Zoo[,14:16], as.logical)
+Zoo[,17] <- factor(Zoo[,17],
+ labels=c("mammal","bird","reptile","fish",
+ "amphibian","insect","mollusc.et.al"))
+
+save(Zoo, file="Zoo.rda")
+
+###**********************************************************
+
+## change compression type
+
+for(f in list.files("data")){
+ n <- sub(".rda", "", f)
+ load(file.path("data", f))
+ save(list=n, file=f, compress="xz")
+}
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..4b816ba
--- /dev/null
+++ b/MD5
@@ -0,0 +1,66 @@
+f2a9aa569a77ebfe29653e2bf16ec0eb *DESCRIPTION
+6776e69974f0f642d95de03376502981 *Data-Administration.R
+8ee1257469e0ad8511789428c5d466c2 *NAMESPACE
+a7f407e416bcd81e0e2697cfb6eeb714 *NEWS
+0000c717c763ca757f9a2b6bedb67c78 *R/mlbench-class.R
+1bee7ea370a599f2609f58254396e178 *R/mlbench-regression.R
+4069da5d5f4e156b36697681600a3224 *README
+c47b4e6ddc106f9e2f0450d1b1fc23cb *data/BostonHousing.rda
+25209554a682562de2c0018f673f1584 *data/BostonHousing2.rda
+eae7e006a671d988711f7b8a2901d94f *data/BreastCancer.rda
+cb9ee3f094336cdb4476d824bbac2375 *data/DNA.rda
+73889627f545aa8c233869679aa5aaec *data/Glass.rda
+0473be45f18e80533003e21648c7c718 *data/HouseVotes84.rda
+9ca65dd2bb66a3f37d21ae0fec773229 *data/Ionosphere.rda
+e987247fa4b45c92191efd9322952737 *data/LetterRecognition.rda
+4b195a02c1b60d550803163f81f4f63c *data/Ozone.rda
+faf2bcc583d54f91a923786c58658584 *data/PimaIndiansDiabetes.rda
+96b377e07072cf0f789cd05d464db58c *data/PimaIndiansDiabetes2.rda
+7a34cc4153a892f6be26d2aacaa8b119 *data/Satellite.rda
+76d6d7760bc46ad2c658144248c68b64 *data/Servo.rda
+2dd38ffe9dda175c3c7609881a7b8f3f *data/Shuttle.rda
+802ff940ad8fe915446f93df51bbeda9 *data/Sonar.rda
+a6824b09d2afe54c3aebd8594797edd8 *data/Soybean.rda
+0e1aafd30e104a7807375132d647bf13 *data/Vehicle.rda
+4f980bc3c5691f964e72cd20fa56b9fb *data/Vowel.rda
+2a152427c8e223a7bd47395246f829a3 *data/Zoo.rda
+e90f25e177787d2fc29a5a05ac44a009 *inst/CITATION
+59dbb880b61f993c3cbb4ec427d36ed7 *man/BostonHousing.Rd
+9b97c1fded837b56696de3b4d82c81cc *man/BreastCancer.Rd
+bf5315d9df64f445e8cab7aa58043d4d *man/DNA.Rd
+94d8156f7120639b6ce988f69f13b566 *man/Glass.Rd
+a5025b39f090875ce1e4bddc8122ea8f *man/HouseVotes84.Rd
+d35fdfe73ad6292958190fb7fb1defd5 *man/Ionosphere.Rd
+524a0475ba287619cb88e539215c8d7e *man/LetterRecognition.Rd
+665705ea72f9d2bfbfb150cfbd406f21 *man/Ozone.Rd
+1d836860da4733a38a8b8b92cfcedc5a *man/PimaIndiansDiabetes.Rd
+9da4e9e39e0aefa7898bd0c799d4098c *man/Satellite.Rd
+5aa1a174b6de6c679baa66c5cebfaa6c *man/Servo.Rd
+c387b8164d53b9b741bf1df404bf1cc5 *man/Shuttle.Rd
+1770f07a5eef3b970353ca4f611d9383 *man/Sonar.Rd
+33db769ef50c9eb74343b51684d489a2 *man/Soybean.Rd
+be1022555ec0d16c199d542581e9cf5a *man/Vehicle.Rd
+7c68cb63adc44fd3aacf3481286c4a7b *man/Vowel.Rd
+289cc87ccdbe2ff2af466ce41b73deee *man/Zoo.Rd
+29a301b7f3aa4f802e66d9d85aac2017 *man/as.data.frame.mlbench.Rd
+9a0737fde2383cf20f14acf2f75ca92d *man/bayesclass.Rd
+97181d5baeeec757207efbf463b7c56d *man/mlbench.2dnormals.Rd
+68fe245c3189dc82befbdcba7ab5c753 *man/mlbench.cassini.Rd
+67107266a500f068db39255e9cb18561 *man/mlbench.circle.Rd
+2741fe81b108ad224e7728ce777898ce *man/mlbench.cuboids.Rd
+77f68ba4e4176203a1b1538a86b0a515 *man/mlbench.friedman1.Rd
+32273c6b6d722cc0b9e7305862d69ee7 *man/mlbench.friedman2.Rd
+694df9ab1efd67f92ddba2532cb1b24d *man/mlbench.friedman3.Rd
+407d84c1c4ad6207992bb3f1382e92b8 *man/mlbench.hypercube.Rd
+30a96c293a1a6787354d3b1b78d35db5 *man/mlbench.peak.Rd
+1d54014c9a19032f4095c6dc1dec2a55 *man/mlbench.ringnorm.Rd
+057d71cdf6ab03943252656547e515de *man/mlbench.shapes.Rd
+b54f8ecd12955cde96dbd8825f129369 *man/mlbench.simplex.Rd
+dae712045090b90ae7ba5f4ed4dadaf3 *man/mlbench.smiley.Rd
+0e0386db8230f0143e22c990b9180b09 *man/mlbench.spirals.Rd
+176063258f3cd02e90f6a25909dd0145 *man/mlbench.threenorm.Rd
+1bf9aee1bd5830130abfb9d60efd9875 *man/mlbench.twonorm.Rd
+7d899328b07839fd51caf4bc635d67bb *man/mlbench.waveform.Rd
+3f946079c1693b5fc45a6af3e7df9032 *man/mlbench.xor.Rd
+ab08b35363d5fa1dc40b68c91d68888e *man/plot.mlbench.Rd
+97c51b84051dd25177fd0ca9437826ee *src/waveform.c
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..8671fc5
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,18 @@
+useDynLib(mlbench)
+
+export(bayesclass)
+exportPattern("^mlbench.*")
+
+S3method(as.data.frame, mlbench)
+S3method(plot, mlbench)
+
+S3method(bayesclass, noerr)
+S3method(bayesclass, mlbench.2dnormals)
+S3method(bayesclass, mlbench.circle)
+S3method(bayesclass, mlbench.xor)
+S3method(bayesclass, mlbench.cassini)
+S3method(bayesclass, mlbench.cuboids)
+S3method(bayesclass, mlbench.twonorm)
+S3method(bayesclass, mlbench.threenorm)
+S3method(bayesclass, mlbench.ringnorm)
+
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..7e23d3b
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,102 @@
+Changes in Version 2.1-1
+
+ o Added a NAMESPACE.
+
+
+Changes in Version 2.1-0
+
+ o Removed dependencies on e1071 and scatterplot3d (the latter replaced by
+ cloud() from lattice).
+
+ o mlbench.corners() has been renamed to mlbench.hypercube().
+
+ o New function mlbench.simplex() by Manuel Eugster and Sebastian Kaiser.
+
+ o Bugfixes in the bayesclass() methods for ringnorm and threenorm
+ submitted by Julia Schiffer.
+
+
+Changes in Version 2.0-0
+
+ o Changed license of complete package to GPL-2.
+
+ o Recompressed all data sets using xz.
+
+ o Added examples to all help pages.
+
+ o Added info about UCI repository to mlbench.waveform.Rd.
+
+
+Changes in Version 1.1-6
+
+ o Improve CITATION.
+
+
+Changes in Version 1.1-5
+
+ o Fixed Rd bug and renamed README to LICENSE.
+
+
+Changes in Version 1.1-4
+
+ o Fixed documentation of S3 objects.
+
+
+Changes in Version 1.1-3
+
+ o Bugfix: data sets that are loaded using R code did not close the
+ corresponding connections.
+
+
+Changes in Version 1.1-2
+
+ o New data set Zoo.
+
+
+Changes in Version 1.1-2
+
+ o Converted the 'chas' columns of BostonHousing2 to a factor.
+
+ o Added a corrected version of PimaIndiansDiabetes as PimaIndiansDiabetes2.
+
+ o Added correct citation for UCI repository in various places.
+
+
+Changes in Version 1.1-1
+
+ o Add '...' argument to as.data.frame() methods for consistency
+ with R 2.4.0 or later.
+
+
+Changes in Version 1.1-0
+
+ o New artificial classification problems mlbench.shapes() and
+ mlbench.corners()
+
+ o Added corrected version of Boston housing data (thanks to
+ John Maindonald).
+
+
+Changes in Version 1.0-1
+
+ o Minor fixes in BostonHousing.Rd.
+
+
+Changes in Version 1.0-1
+
+ o Fixed a Bug in the waveform function (thanks to Karsten L�bke).
+
+ o Added a "ZipData: No" to the DESCRIPTION file.
+
+ o All data files are now in compressed format.
+
+
+Changes in Version 1.0-0
+
+ o New function mlbench.smiley().
+
+ o Version 1.0-0 is not really different from the previous release
+ (version 0.5-10), but we decided that it is stupid to have a
+ version number smaller than 1.0 for a package that is stable and
+ useful.
+
diff --git a/R/mlbench-class.R b/R/mlbench-class.R
new file mode 100644
index 0000000..ac3c5f8
--- /dev/null
+++ b/R/mlbench-class.R
@@ -0,0 +1,500 @@
+#
+# Copyright (C) 1997-2010 Friedrich Leisch
+# $Id: mlbench-class.R 4612 2010-10-08 09:51:20Z leisch $
+#
+
+mlbench.xor <- function(n, d=2){
+
+ x <- matrix(runif(n*d,-1,1),ncol=d,nrow=n)
+ if((d != as.integer(d)) || (d<2))
+ stop("d must be an integer >=2")
+
+ z <- rep(0, length=n)
+ for(k in 1:n){
+ if(x[k,1]>=0){
+ tmp <- (x[k,2:d] >=0)
+ z[k] <- 1+sum(tmp*2^(0:(d-2)))
+ }
+ else {
+ tmp <- !(x[k,2:d] >=0)
+ z[k] <- 1 + sum(tmp*2^(0:(d-2)))
+ }
+ }
+
+ retval <- list(x=x, classes=factor(z))
+ class(retval) <- c("mlbench.xor", "mlbench")
+ retval
+}
+
+mlbench.circle <- function(n, d=2){
+
+ x <- matrix(runif(n*d,-1,1),ncol=d,nrow=n)
+ if((d != as.integer(d)) || (d<2))
+ stop("d must be an integer >=2")
+
+ z <- rep(1, length=n)
+
+ r <- (2^(d-1) * gamma(1+d/2) / (pi^(d/2)))^(1/d)
+ z[apply(x, 1, function(x) sum(x^2)) > r^2] <- 2
+
+ retval <- list(x=x, classes=factor(z))
+ class(retval) <- c("mlbench.circle", "mlbench")
+ retval
+}
+
+mlbench.2dnormals <- function(n, cl=2, r=sqrt(cl), sd=1){
+
+ e <- sample(0:(cl-1), size=n, replace=TRUE)
+ m <- r * cbind(cos(pi/4 + e*2*pi/cl), sin(pi/4 + e*2*pi/cl))
+ x <- matrix(rnorm(2*n, sd=sd), ncol=2) + m
+
+ retval <- list(x=x, classes=factor(e+1))
+ class(retval) <- c("mlbench.2dnormals", "mlbench")
+ retval
+}
+
+
+mlbench.1spiral <- function(n, cycles=1, sd=0)
+{
+ w <- seq(0, by=cycles/n, length=n)
+ x <- matrix(0, nrow=n, ncol=2)
+
+ x[,1] <- (2*w+1)*cos(2*pi*w)/3;
+ x[,2] <- (2*w+1)*sin(2*pi*w)/3;
+
+ if(sd>0){
+ e <- rnorm(n, sd=sd)
+
+ xs <- cos(2*pi*w)-pi*(2*w+1)*sin(2*pi*w)
+ ys <- sin(2*pi*w)+pi*(2*w+1)*cos(2*pi*w)
+
+ nrm <- sqrt(xs^2+ys^2)
+ x[,1] <- x[,1] + e*ys/nrm
+ x[,2] <- x[,2] - e*xs/nrm
+ }
+ x
+}
+
+mlbench.spirals <- function(n, cycles=1, sd=0)
+{
+ x <- matrix(0, nrow=n, ncol=2)
+ c2 <- sample(1:n, size=n/2, replace=FALSE)
+ cl <- factor(rep(1, length=n), levels=as.character(1:2))
+ cl[c2] <- 2
+
+ x[-c2,] <- mlbench.1spiral(n=n-length(c2), cycles=cycles, sd=sd)
+ x[c2,] <- - mlbench.1spiral(n=length(c2), cycles=cycles, sd=sd)
+
+ retval <- list(x=x, classes=cl)
+ class(retval) <- c("mlbench.spirals", "mlbench")
+ retval
+}
+
+mlbench.ringnorm <- function(n, d=20)
+{
+ x <- matrix(0, nrow=n, ncol=d)
+ c2 <- sample(1:n, size=n/2, replace=FALSE)
+ cl <- factor(rep(1, length=n), levels=as.character(1:2))
+ cl[c2] <- 2
+
+ a <- 1/sqrt(d)
+ x[-c2,] <- matrix(rnorm(n=d*(n-length(c2)), sd=2), ncol=d)
+ x[c2,] <- matrix(rnorm(n=d*length(c2), mean=a), ncol=d)
+
+ retval <- list(x=x, classes=cl)
+ class(retval) <- c("mlbench.ringnorm", "mlbench")
+ retval
+}
+
+mlbench.twonorm <- function (n, d = 20)
+{
+ x <- matrix(0, nrow = n, ncol = d)
+ c2 <- sample(1:n, size = n/2, replace = FALSE)
+ cl <- factor(rep(1, length = n), levels = as.character(1:2))
+ cl[c2] <- 2
+ a <- 2/sqrt(d)
+ x[-c2, ] <- matrix(rnorm(n = d * (n - length(c2)), mean = a, sd = 1),
+ ncol = d)
+ x[c2, ] <- matrix(rnorm(n = d * length(c2), mean = -a), ncol = d)
+ retval <- list(x = x, classes = cl)
+ class(retval) <- c("mlbench.twonorm", "mlbench")
+ retval
+}
+
+mlbench.threenorm <- function (n, d = 20)
+{
+ x <- matrix(0, nrow = n, ncol = d)
+ c2 <- sample(1:n, size = n/2, replace = FALSE)
+ cl <- factor(rep(1, length = n), levels = as.character(1:2))
+ cl[c2] <- 2
+ c1 <- (1:n)[-c2]
+ a <- 2/sqrt(d)
+ for (i in c1)
+ {
+ distr <- as.logical(round(runif(1,0,1)))
+ if ( distr )
+ x[i, ] <- rnorm(n = d, mean = a)
+ else
+ x[i, ] <- rnorm(n = d, mean = -a)
+ }
+ m <- rep(c(a, -a), d/2)
+ if ((d %% 2)==1)
+ m <- c(m, a)
+ x[c2, ] <- matrix(rnorm(n = d * length(c2), mean = m),
+ ncol = d, byrow=TRUE)
+ retval <- list(x = x, classes = cl)
+ class(retval) <- c("mlbench.threenorm", "mlbench")
+ retval
+}
+
+mlbench.waveform <- function (n)
+{
+ Rnuminstances <- n
+ retval <- .C("waveform",
+ Rnuminstances = as.integer(Rnuminstances),
+ x = double(21*n),
+ type = integer(n),
+ PACKAGE = "mlbench")
+ x <- matrix (retval$x, ncol=21, byrow = TRUE)
+ retval <- list (x=x, classes=as.factor(retval$type+1))
+ class(retval) <- c("mlbench.waveform","mlbench")
+ return(retval)
+}
+
+mlbench.cassini <- function(n,relsize=c(2,2,1))
+{
+ cassinib <- function(x, a, c)
+ {
+ y <- numeric(2)
+ y[1] <- -sqrt(-c^2 - x^2 + sqrt(a^4 + 4*c^2*x^2))
+ y[2] <- sqrt(-c^2 - x^2 + sqrt(a^4 + 4*c^2*x^2))
+ y
+ }
+
+ circle <- function(x, r)
+ sqrt(r^2-x^2)
+
+
+ big1<-relsize[1]
+ big2<-relsize[2]
+ small<-relsize[3]
+ parts<-big1+big2+small
+ npiece<-n/parts
+ n1<-round(big1*npiece)
+ n2<-round(big2*npiece)
+ n3<-round(small*npiece)
+ if ((n1+n2+n3)!=n) n3<-n3+1
+ a<-1
+ C<-0.97
+ Cell<-sqrt((1+C^2)/3)
+ aell <- Cell*sqrt(2)
+ transl <- 1.1
+ r <- 0.6
+ tmima1<-matrix(0,ncol=2,nrow=n1)
+ tmima2<-matrix(0,ncol=2,nrow=n2)
+ tmima3<-matrix(0,ncol=2,nrow=n3)
+ n1found <- 0
+ while(n1found < n1)
+ {
+ x1 <- runif(1,min=-sqrt(a^2+C^2),max=sqrt(a^2+C^2))
+ y1 <- runif(1,min=-transl-1,max=-transl+0.6)
+ if ((y1 < cassinib(x1,a,C)[2]-transl) &&
+ (y1 > cassinib(x1,aell,Cell)[1]-transl))
+ {
+ n1found <- n1found +1
+ tmima1[n1found,]<-c(x1,y1)
+ }
+ }
+
+ n2found <- 0
+ while(n2found < n2)
+ {
+ x2 <- runif(1,min=-sqrt(a^2+C^2),max=sqrt(a^2+C^2))
+ y2 <- runif(1,max= transl+1,min=transl-0.6)
+ if ((y2 > cassinib(x2,a,C)[1]+transl) &&
+ (y2 < cassinib(x2,aell,Cell)[2]+transl))
+ {
+ n2found <- n2found +1
+ tmima2[n2found,]<-c(x2,y2)
+ }
+ }
+
+ n3found <- 0
+ while(n3found < n3)
+ {
+ x3<-runif(1,min=-r,max=r)
+ y3<-runif(1,min=-r,max=r)
+ if ((y3 > -circle(x3,r)) &&
+ (y3 < circle(x3,r)))
+ {
+ n3found <- n3found +1
+ tmima3[n3found,]<-c(x3,y3)
+ }
+ }
+ teliko <- rbind(tmima1,tmima2,tmima3)
+ cl <- factor(c(rep(1,n1),rep(2,n2),rep(3,n3)))
+ retval<-list(x=teliko,classes=cl)
+ class(retval) <- c("mlbench.cassini","mlbench")
+ retval
+}
+
+mlbench.cuboids <- function (n, relsize=c(2,2,2,1))
+{
+ big1 <- relsize[1]
+ big2 <- relsize[2]
+ big3 <- relsize[3]
+ small <- relsize[4]
+ parts<-big1+big2++big3+small
+ npiece<-n/parts
+ n1<-round(big1*npiece)
+ n2<-round(big2*npiece)
+ n3<-round(big3*npiece)
+ n4<-round(small*npiece)
+ if ((n1+n2+n3+n4)!=n) n4<-n4+1
+
+ x1 <- cbind(runif(n1,min=0,max=1),runif(n1,min=0.75,max=1.0),runif(n1,min=0.75,max=1))
+
+ x2 <- cbind(runif(n2,min=0.75,max=1.0),runif(n2,min=0,max=0.25),runif(n2,min=0,max=1))
+
+ x3 <- cbind(runif(n3,min=0.0,max=0.25),runif(n3,min=0.0,max=1),runif(n3,min=0,max=0.25))
+
+ x4 <- cbind(runif(n4,min=0.4,max=0.6),runif(n4,min=0.4,max=0.6),runif(n4,min=0.4,max=0.6))
+
+ x<-rbind(x1,x2,x3,x4)
+ retval <-list(x=x,classes=factor(c(rep(1,n1),rep(2,n2),
+ rep(3,n3),rep(4,n4))))
+ class(retval) <- c("mlbench.cuboids","mlbench")
+ return(retval)
+}
+
+
+mlbench.smiley <- function(n=500, sd1=.1, sd2=.05)
+{
+ n1 <- round(n/6)
+ n2 <- round(n/4)
+ n3 <- n - 2 * n1 - n2
+
+ x1 <- cbind(rnorm(n1, -.8, sd1), rnorm(n1, 1, sd1))
+ x2 <- cbind(rnorm(n1, .8, sd1), rnorm(n1, 1, sd1))
+
+ x3 <- cbind(runif(n2, -.2, .2), runif(n2, 0, .75))
+ x3[,1] <- x3[,1]*(1-x3[,2])
+
+ x4 <- runif(n3, -1, 1)
+ x4 <- cbind(x4, x4^2 - 1 + rnorm(n3, 0, sd2))
+
+ x <-
+ retval <- list(x = rbind(x1, x2, x3, x4),
+ classes=factor(c(rep(1,n1),rep(2,n1),rep(3,n2),rep(4,n3))))
+ class(retval) <- c("mlbench.smiley", "mlbench")
+ retval
+}
+
+
+mlbench.shapes <- function(n=500)
+{
+ n1 <- round(n/4)
+ n2 <- n-3*n1
+
+ x1 <- cbind(rnorm(n1, -1, .2), rnorm(n1, 1.5, .2))
+ x2 <- cbind(runif(n1, -1.5, -0.5), runif(n1, -2, 0))
+
+ x3 <- cbind(runif(n1, -1, 1), runif(n1, 1, 2))
+ x3[,1] <- x3[,1]*(2-x3[,2])+1
+
+ x4 <- runif(n2, 0.5, 2)
+ x4 <- cbind(x4, cos(4*x4)-x4+runif(n2,-.2,.2))
+
+ retval <- list(x = rbind(x1, x2, x3, x4),
+ classes=factor(c(rep(1,n1),rep(2,n1),rep(3,n1),rep(4,n2))))
+ class(retval) <- c("mlbench.shapes", "mlbench")
+ retval
+}
+
+###**********************************************************
+
+## Original ist bincombinations in e1071
+hypercube <- function(d) {
+
+ retval <- matrix(0, nrow=2^d, ncol=d)
+
+ for(n in 1:d){
+ retval[,n] <- rep(c(rep(0, (2^d/2^n)), rep(1, (2^d/2^n))),
+ length=2^d)
+ }
+ retval
+}
+
+
+
+mlbench.hypercube <- function(n=800, d=3, sides=rep(1,d), sd=0.1)
+{
+ m <- hypercube(d)
+ n1 <- round(n/2^d)
+
+ sides <- rep(sides, length=d)
+ z <- NULL
+
+ for(k in 1:nrow(m))
+ {
+ m[k,] <- m[k,]*sides
+ z1 <- matrix(rnorm(d*n1, sd=sd), ncol=d)
+ z1 <- sweep(z1, 2, m[k,], "+")
+ z <- rbind(z, z1)
+ }
+ retval <- list(x=z,
+ classes=factor(rep(1:nrow(m), rep(n1, nrow(m)))))
+ class(retval) <- c("mlbench.hypercube", "mlbench")
+ retval
+}
+
+## for backwards compatibility
+mlbench.corners <- function(...) mlbench.hypercube(...)
+
+###**********************************************************
+
+simplex <- function(d, sides, center = TRUE)
+{
+ m <- matrix(0, d+1, d)
+ cent <- rep(0,d)
+
+ m[2,1] <- sides
+ cent[1] <- sides/2
+ b <- sides/2
+
+ if(d>=2)
+ {
+ for(i in 2:d)
+ {
+ m[i+1,] <- cent
+ m[i+1,i] <- sqrt(sides^2-b^2)
+ cent[i] <- 1/(i+1)* m[i+1,i]
+ b <- (1- 1/(i+1)) * m[i+1,i]
+ }
+ }
+ if(center)
+ m <- t(t(m) - cent)
+ m
+}
+
+mlbench.simplex <- function (n = 800, d = 3, sides = 1, sd = 0.1, center=TRUE)
+{
+ m <- simplex(d=d , sides=sides, center=center)
+ n1 <- round(n/2^d)
+ z <- NULL
+ for (k in 1:nrow(m)) {
+ z1 <- matrix(rnorm(d * n1, sd = sd), ncol = d)
+ z1 <- sweep(z1, 2, m[k, ], "+")
+ z <- rbind(z, z1)
+ }
+ retval <- list(x = z, classes = factor(rep(1:nrow(m), rep(n1,
+ nrow(m)))))
+ class(retval) <- c("mlbench.simplex", "mlbench")
+ retval
+}
+
+
+###**********************************************************
+
+
+bayesclass <- function(z) UseMethod("bayesclass")
+
+bayesclass.noerr <- function(z) z$classes
+
+bayesclass.mlbench.xor <- bayesclass.noerr
+bayesclass.mlbench.circle <- bayesclass.noerr
+bayesclass.mlbench.cassini <- bayesclass.noerr
+bayesclass.mlbench.cuboids <- bayesclass.noerr
+
+
+bayesclass.mlbench.2dnormals <- function(z){
+
+ ncl <- length(levels(z$classes))
+ z <- z$x
+ for(k in 1:nrow(z)){
+ z[k,] <- z[k,] / sqrt(sum(z[k,]^2))
+ }
+ winkel <- acos(z[,1] * sign(z[,2])) + pi * (z[,2]<0)
+ winkel <- winkel - pi/ncl - pi/4
+ winkel[winkel < 0] <- winkel[winkel<0] + 2*pi
+ retval <- (winkel)%/%(2 * pi/ncl)
+ factor((retval+1)%%ncl+1)
+}
+
+bayesclass.mlbench.ringnorm <- function (z)
+ {
+ z <- z$x
+ ndata <- dim(z)[1]
+ ndim <- dim(z)[2]
+ a <- 1/sqrt(ndim)
+ center1 <- rep(0,ndim)
+ center2 <- rep(a,ndim)
+ m1 <- mahalanobis(z, center1, (4*diag(ndim)), inverted=FALSE) +
+ ndim*log(4)
+ m2 <- mahalanobis(z, center2, diag(ndim), inverted=FALSE)
+ as.factor ((m1 > m2) +1)
+ }
+
+bayesclass.mlbench.twonorm <- function (z)
+ {
+ z <- z$x
+ ndata <- dim(z)[1]
+ bayesclass <- integer(ndata)
+ ndim <- dim(z)[2]
+ a <- 2/sqrt(ndim)
+ center1 <- rep(a,ndim)
+ center2 <- rep(-a,ndim)
+ for (i in 1:ndata)
+ {
+ dist1 <- sum((z[i, ] - center1) ^2)
+ dist2 <- sum((z[i, ] - center2) ^2)
+ bayesclass[i] <- (dist1 > dist2) +1
+ }
+ as.factor(bayesclass)
+ }
+
+## Code by Julia Schiffner
+bayesclass.mlbench.threenorm <- function(z)
+{
+ z <- z$x
+ ndim <- dim(z)[2]
+ a <- 2/sqrt(ndim)
+
+ center1a <- rep(a, ndim)
+ center1b <- rep(-a, ndim)
+ center2 <- rep(c(a, -a), ndim/2)
+
+ if ((ndim%%2) == 1)
+ center2 <- c(center2, a)
+
+ m1 <- 0.5 * exp(-0.5 * mahalanobis(z, center1a, diag(ndim),
+ inverted = FALSE)) +
+ 0.5 * exp(-0.5 * mahalanobis(z, center1b,
+ diag(ndim), inverted = FALSE))
+
+ m2 <- exp(-0.5 * mahalanobis(z, center2, diag(ndim), inverted = FALSE))
+
+ as.factor((m1 < m2) + 1)
+}
+
+###**********************************************************
+
+as.data.frame.mlbench <- function(x, row.names=NULL, optional=FALSE, ...)
+{
+ data.frame(x=x$x, classes=x$classes)
+}
+
+
+plot.mlbench <- function(x, xlab="", ylab="", ...)
+{
+ if(ncol(x$x)>2){
+ pairs(x$x, col=as.integer(x$classes), ...)
+ }
+ else{
+ plot(x$x, col=as.integer(x$classes), xlab=xlab, ylab=ylab, ...)
+ }
+}
+
+
+
+
diff --git a/R/mlbench-regression.R b/R/mlbench-regression.R
new file mode 100644
index 0000000..e90de12
--- /dev/null
+++ b/R/mlbench-regression.R
@@ -0,0 +1,78 @@
+#
+# Copyright (C) 1997-2010 Friedrich Leisch
+# $Id: mlbench-regression.R 4612 2010-10-08 09:51:20Z leisch $
+#
+
+mlbench.friedman1 <- function(n, sd=1){
+
+ x <- matrix(runif(10*n),ncol=10)
+
+ y <- 10 * sin(pi * x[,1] * x[,2])
+ y <- y + 20 * ( x[,3] - 0.5)^2 + 10 * x[,4] + 5 * x[,5]
+
+ if(sd>0){
+ y <- y + rnorm(n, sd=sd)
+ }
+
+ list(x=x, y=y)
+}
+
+mlbench.friedman2 <- function(n, sd=125){
+
+ x <- cbind(runif(n,min=0,max=100),
+ runif(n,min=40*pi,max=560*pi),
+ runif(n,min=0,max=1),
+ runif(n,min=1,max=11))
+
+ y <- sqrt(x[,1]^2 + (x[,2]*x[,3] - 1/(x[,2]*x[,4]))^2)
+
+ if(sd>0){
+ y <- y + rnorm(n, sd=sd)
+ }
+
+ list(x=x, y=y)
+}
+
+mlbench.friedman3 <- function(n, sd=0.1){
+
+ x <- cbind(runif(n,min=0,max=100),
+ runif(n,min=40*pi,max=560*pi),
+ runif(n,min=0,max=1),
+ runif(n,min=1,max=11))
+
+ y <- atan( (x[,2]*x[,3] - 1/(x[,2]*x[,4])) / x[,1] )
+
+ if(sd>0){
+ y <- y + rnorm(n, sd=sd)
+ }
+
+ list(x=x, y=y)
+}
+
+mlbench.peak <- function(n, d=20)
+ {
+ metro <- numeric(n)
+ y <- numeric(n)
+ x <- matrix(0, nrow=n, ncol=d)
+ for (ndata in 1:n)
+ {
+ radius <- runif(1, min=0, max=3)
+ x[ndata,] <- rnorm(d)
+ metro[ndata] <- sqrt(sum(x[ndata,]^2))
+ x[ndata,] <- radius * (x[ndata,]/metro[ndata])
+ y[ndata] <- 25 * exp(-0.5* radius^2)
+ }
+ list(x=x, y=y)
+ }
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/README b/README
new file mode 100644
index 0000000..0b397a5
--- /dev/null
+++ b/README
@@ -0,0 +1,11 @@
+This package contains a collection of real-world datasets and
+functions for creating artificial datasets that work as benchmarks for
+machine learning methods.
+
+Most datasets have been taken from the UCI repository at
+ ftp://ftp.ics.uci.edu/pub/machine-learning-databases
+ http://www.ics.uci.edu/~mlearn/MLRepository.html
+See the corresponding help files for original data sources.
+
+Some conversions are done for smoother usage in R (like conversions to
+factors), all changes are recorded in file Data-Administration.R.
diff --git a/data/BostonHousing.rda b/data/BostonHousing.rda
new file mode 100644
index 0000000..73fe114
Binary files /dev/null and b/data/BostonHousing.rda differ
diff --git a/data/BostonHousing2.rda b/data/BostonHousing2.rda
new file mode 100644
index 0000000..6df4a37
Binary files /dev/null and b/data/BostonHousing2.rda differ
diff --git a/data/BreastCancer.rda b/data/BreastCancer.rda
new file mode 100644
index 0000000..7c6cc7f
Binary files /dev/null and b/data/BreastCancer.rda differ
diff --git a/data/DNA.rda b/data/DNA.rda
new file mode 100644
index 0000000..4774e9d
Binary files /dev/null and b/data/DNA.rda differ
diff --git a/data/Glass.rda b/data/Glass.rda
new file mode 100644
index 0000000..68986af
Binary files /dev/null and b/data/Glass.rda differ
diff --git a/data/HouseVotes84.rda b/data/HouseVotes84.rda
new file mode 100644
index 0000000..a38fef3
Binary files /dev/null and b/data/HouseVotes84.rda differ
diff --git a/data/Ionosphere.rda b/data/Ionosphere.rda
new file mode 100644
index 0000000..2799202
Binary files /dev/null and b/data/Ionosphere.rda differ
diff --git a/data/LetterRecognition.rda b/data/LetterRecognition.rda
new file mode 100644
index 0000000..755abdd
Binary files /dev/null and b/data/LetterRecognition.rda differ
diff --git a/data/Ozone.rda b/data/Ozone.rda
new file mode 100644
index 0000000..3811b79
Binary files /dev/null and b/data/Ozone.rda differ
diff --git a/data/PimaIndiansDiabetes.rda b/data/PimaIndiansDiabetes.rda
new file mode 100644
index 0000000..b3632f2
Binary files /dev/null and b/data/PimaIndiansDiabetes.rda differ
diff --git a/data/PimaIndiansDiabetes2.rda b/data/PimaIndiansDiabetes2.rda
new file mode 100644
index 0000000..8e0fe0c
Binary files /dev/null and b/data/PimaIndiansDiabetes2.rda differ
diff --git a/data/Satellite.rda b/data/Satellite.rda
new file mode 100644
index 0000000..88228a2
Binary files /dev/null and b/data/Satellite.rda differ
diff --git a/data/Servo.rda b/data/Servo.rda
new file mode 100644
index 0000000..6416be8
Binary files /dev/null and b/data/Servo.rda differ
diff --git a/data/Shuttle.rda b/data/Shuttle.rda
new file mode 100644
index 0000000..26da74f
Binary files /dev/null and b/data/Shuttle.rda differ
diff --git a/data/Sonar.rda b/data/Sonar.rda
new file mode 100644
index 0000000..660d834
Binary files /dev/null and b/data/Sonar.rda differ
diff --git a/data/Soybean.rda b/data/Soybean.rda
new file mode 100644
index 0000000..0c04d73
Binary files /dev/null and b/data/Soybean.rda differ
diff --git a/data/Vehicle.rda b/data/Vehicle.rda
new file mode 100644
index 0000000..04d8a6b
Binary files /dev/null and b/data/Vehicle.rda differ
diff --git a/data/Vowel.rda b/data/Vowel.rda
new file mode 100644
index 0000000..7c1718f
Binary files /dev/null and b/data/Vowel.rda differ
diff --git a/data/Zoo.rda b/data/Zoo.rda
new file mode 100644
index 0000000..306685f
Binary files /dev/null and b/data/Zoo.rda differ
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 49e36f1..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-r-cran-mlbench (2.1-1-1) unstable; urgency=low
-
- * Initial release (Closes: #826861).
-
- -- Andreas Tille <tille at debian.org> Thu, 09 Jun 2016 17:16:06 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index e59d2b2..0000000
--- a/debian/control
+++ /dev/null
@@ -1,23 +0,0 @@
-Source: r-cran-mlbench
-Maintainer: Debian Science Team <debian-science-maintainers at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: gnu-r
-Testsuite: autopkgtest
-Priority: optional
-Build-Depends: debhelper (>= 9),
- cdbs,
- r-base-dev (>= 3.0.0)
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-science/packages/R/r-cran-mlbench/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-science/packages/R/r-cran-mlbench/trunk/
-Homepage: https://cran.r-project.org/web/packages/mlbench(
-
-Package: r-cran-mlbench
-Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends},
- ${R:Depends}
-Description: GNU R Machine Learning Benchmark Problems
- This GNU R package provices a collection of artificial and real-world
- machine learning benchmark problems, including, e.g., several data sets
- from the UCI repository.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 7fbc7ea..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,31 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: mlbench
-Upstream-Contact: Friedrich Leisch <Friedrich.Leisch at R-project.org>
-Source: http://cran.r-project.org/web/packages/mlbench/
-
-Files: *
-Copyright: 2011-2016 Friedrich Leisch <Friedrich.Leisch at R-project.org>
- Evgenia Dimitriadou
-License: GPL-2
-
-Files: debian/*
-Copyright: 2016 Andreas Tille <tille at debian.org>
-License: GPL-2
-
-License: GPL-2
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License.
- .
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- .
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- .
- Comment: On Debian systems, the complete text of the GNU General Public
- License can be found in `/usr/share/common-licenses/GPL-2'.
-
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 2fbba2d..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/make -f
-
-include /usr/share/R/debian/r-cran.mk
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 0c207f8..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=3
-http://cran.r-project.org/src/contrib/mlbench_([-\d.]*)\.tar\.gz
diff --git a/inst/CITATION b/inst/CITATION
new file mode 100644
index 0000000..a22f741
--- /dev/null
+++ b/inst/CITATION
@@ -0,0 +1,31 @@
+citHeader("To cite package mlbench in publications use:")
+
+## R >= 2.8.0 passes package metadata to citation().
+if(!exists("meta") || is.null(meta)) meta <- packageDescription("mlbench")
+year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date)
+vers <- paste("R package version", meta$Version)
+
+citEntry(entry="Manual",
+ title = "mlbench: Machine Learning Benchmark Problems",
+ author = "Friedrich Leisch and Evgenia Dimitriadou",
+ year = year,
+ note = vers,
+
+ textVersion =
+ paste("Friedrich Leisch & Evgenia Dimitriadou (", year,
+ "). mlbench: Machine Learning Benchmark Problems. ",
+ vers, ".", sep=""))
+
+citEntry(entry="Misc",
+ header="To cite data sets from the UCI repository (as indicated in the help pages) use:",
+ author = "D.J. Newman, S. Hettich, C.L. Blake and C.J. Merz",
+ year = 1998,
+ title = "UCI Repository of machine learning databases",
+ url = "http://www.ics.uci.edu/~mlearn/MLRepository.html",
+ institution = "University of California, Irvine, Dept. of Information and Computer Sciences",
+ textVersion =
+ paste("Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).",
+ "UCI Repository of machine learning databases",
+ "[http://www.ics.uci.edu/~mlearn/MLRepository.html].",
+ "Irvine, CA: University of California,",
+ "Department of Information and Computer Science."))
diff --git a/man/BostonHousing.Rd b/man/BostonHousing.Rd
new file mode 100644
index 0000000..e393578
--- /dev/null
+++ b/man/BostonHousing.Rd
@@ -0,0 +1,83 @@
+\name{BostonHousing}
+\alias{BostonHousing}
+\alias{BostonHousing2}
+\title{Boston Housing Data}
+\usage{
+data(BostonHousing)
+data(BostonHousing2)
+}
+\description{Housing data for 506 census tracts of Boston from the 1970
+ census. The dataframe
+ \code{BostonHousing} contains the original data by Harrison and
+ Rubinfeld (1979), the dataframe \code{BostonHousing2} the corrected
+ version with additional spatial information (see references below).
+}
+\format{The original data are 506 observations on 14 variables,
+ \code{medv} being the target variable:
+ \tabular{ll}{
+ crim \tab per capita crime rate by town \cr
+ zn \tab proportion of residential land zoned for lots over 25,000 sq.ft \cr
+ indus \tab proportion of non-retail business acres per town \cr
+ chas \tab Charles River dummy variable (= 1 if tract bounds river; 0 otherwise) \cr
+ nox \tab nitric oxides concentration (parts per 10 million) \cr
+ rm \tab average number of rooms per dwelling \cr
+ age \tab proportion of owner-occupied units built prior to 1940 \cr
+ dis \tab weighted distances to five Boston employment centres \cr
+ rad \tab index of accessibility to radial highways \cr
+ tax \tab full-value property-tax rate per USD 10,000 \cr
+ ptratio \tab pupil-teacher ratio by town \cr
+ b \tab \eqn{1000(B - 0.63)^2} where \eqn{B} is the proportion of blacks by town\cr
+ lstat \tab percentage of lower status of the population \cr
+ medv \tab median value of owner-occupied homes in USD 1000's
+ }
+ The corrected data set has the following additional columns:
+ \tabular{ll}{
+ cmedv \tab corrected median value of owner-occupied homes in USD 1000's \cr
+ town \tab name of town \cr
+ tract \tab census tract \cr
+ lon \tab longitude of census tract \cr
+ lat \tab latitude of census tract \cr
+ }
+}
+\references{
+ Harrison, D. and Rubinfeld, D.L. (1978).
+ Hedonic prices and the demand for clean air.
+ \emph{Journal of Environmental Economics and Management}, \bold{5},
+ 81--102.
+
+ Gilley, O.W., and R. Kelley Pace (1996). On the Harrison and Rubinfeld
+ Data. \emph{Journal of Environmental Economics and Management}, \bold{31},
+ 403--405. [Provided corrections and examined censoring.]
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+
+ Pace, R. Kelley, and O.W. Gilley (1997). Using the Spatial Configuration of
+ the Data to Improve Estimation. \emph{Journal of the Real Estate Finance
+ and Economics}, \bold{14}, 333--340. [Added georeferencing and spatial
+ estimation.]
+}
+\source{
+ The original data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html},
+ }
+ the corrected data have been taken from Statlib at
+ \itemize{
+ \item \url{http://lib.stat.cmu.edu/datasets/}
+ }
+ See Statlib and references there for details on the corrections.
+ Both were converted to R format by Friedrich Leisch.
+}
+\keyword{datasets}
+\examples{
+data(BostonHousing)
+summary(BostonHousing)
+
+data(BostonHousing2)
+summary(BostonHousing2)
+}
diff --git a/man/BreastCancer.Rd b/man/BreastCancer.Rd
new file mode 100644
index 0000000..375bf84
--- /dev/null
+++ b/man/BreastCancer.Rd
@@ -0,0 +1,86 @@
+\name{BreastCancer}
+\title{Wisconsin Breast Cancer Database}
+\usage{data(BreastCancer)}
+\alias{BreastCancer}
+\format{A data frame with 699 observations on 11 variables, one being a
+ character variable, 9 being ordered or nominal, and 1 target class.
+
+ \tabular{cll}{
+ [,1] \tab Id \tab Sample code number\cr
+ [,2] \tab Cl.thickness \tab Clump Thickness\cr
+ [,3] \tab Cell.size \tab Uniformity of Cell Size\cr
+ [,4] \tab Cell.shape \tab Uniformity of Cell Shape\cr
+ [,5] \tab Marg.adhesion \tab Marginal Adhesion\cr
+ [,6] \tab Epith.c.size \tab Single Epithelial Cell Size\cr
+ [,7] \tab Bare.nuclei \tab Bare Nuclei\cr
+ [,8] \tab Bl.cromatin \tab Bland Chromatin\cr
+ [,9] \tab Normal.nucleoli \tab Normal Nucleoli\cr
+[,10] \tab Mitoses \tab Mitoses\cr
+[,11] \tab Class \tab Class
+}
+ }
+\description{
+ The objective is to identify each of a number of benign or malignant
+ classes. Samples arrive periodically as
+ Dr. Wolberg reports his clinical cases.
+ The database therefore reflects this chronological grouping of the
+ data. This grouping information appears immediately below, having been
+ removed from the data itself. Each variable except for the first was
+ converted into 11 primitive numerical attributes with values ranging
+ from 0 through 10. There are 16 missing attribute values. See cited
+ below for more details.}
+\source{
+ \itemize{
+ \item Creator: Dr. WIlliam H. Wolberg (physician); University of
+ Wisconsin Hospital ;Madison; Wisconsin; USA
+ \item Donor: Olvi Mangasarian (mangasarian at cs.wisc.edu)
+ \item Received: David W. Aha (aha at cs.jhu.edu)
+ }
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+ 1. Wolberg,W.H., \& Mangasarian,O.L. (1990). Multisurface method of
+ pattern separation for medical diagnosis applied to breast cytology. In
+ Proceedings of the National Academy of Sciences, 87,
+ 9193-9196.\cr
+ - Size of data set: only 369 instances (at that point in time)\cr
+ - Collected classification results: 1 trial only\cr
+ - Two pairs of parallel hyperplanes were found to be consistent with
+ 50\% of the data\cr
+ - Accuracy on remaining 50\% of dataset: 93.5\%\cr
+ - Three pairs of parallel hyperplanes were found to be consistent with
+ 67\% of data\cr
+ - Accuracy on remaining 33\% of dataset: 95.9\%
+
+ 2. Zhang,J. (1992). Selecting typical instances in instance-based
+ learning. In Proceedings of the Ninth International Machine
+ Learning Conference (pp. 470-479). Aberdeen, Scotland: Morgan
+ Kaufmann.\cr
+ - Size of data set: only 369 instances (at that point in time)\cr
+ - Applied 4 instance-based learning algorithms\cr
+ - Collected classification results averaged over 10 trials\cr
+ - Best accuracy result: \cr
+ - 1-nearest neighbor: 93.7\%\cr
+ - trained on 200 instances, tested on the other 169\cr
+ - Also of interest:\cr
+ - Using only typical instances: 92.2\% (storing only 23.1 instances)\cr
+ - trained on 200 instances, tested on the other 169
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\keyword{datasets}
+\examples{
+data(BreastCancer)
+summary(BreastCancer)
+}
+
diff --git a/man/DNA.Rd b/man/DNA.Rd
new file mode 100644
index 0000000..b7af715
--- /dev/null
+++ b/man/DNA.Rd
@@ -0,0 +1,80 @@
+\name{DNA}
+\title{Primate splice-junction gene sequences (DNA)}
+\usage{data(DNA)}
+\alias{DNA}
+\format{A data frame with 3,186 observations on 180 variables, all
+nominal and a target class.}
+
+\description{It consists of 3,186 data points (splice junctions). The
+ data points are described by 180 indicator binary
+ variables and the problem is to recognize the 3 classes (ei, ie,
+ neither), i.e., the boundaries between exons (the parts of the DNA
+ sequence retained after splicing) and introns (the parts of the DNA
+ sequence that are spliced out).
+
+ The StaLog dna dataset is a processed version of the Irvine
+ database described below. The main difference is that the
+ symbolic variables representing the nucleotides (only A,G,T,C)
+ were replaced by 3 binary indicator variables. Thus the original
+ 60 symbolic attributes were changed into 180 binary attributes.
+ The names of the examples were removed. The examples with
+ ambiguities were removed (there was very few of them, 4).
+ The StatLog version of this dataset was produced by Ross King
+ at Strathclyde University. For original details see the Irvine
+ database documentation.
+
+ The nucleotides A,C,G,T were given indicator values as follows:
+ \tabular{cl}{
+ \tab A -> 1 0 0\cr
+ \tab C -> 0 1 0\cr
+ \tab G -> 0 0 1\cr
+ \tab T -> 0 0 0\cr
+ }
+ Hint. Much better performance is generally observed if attributes
+ closest to the junction are used. In the StatLog version, this
+ means using attributes A61 to A120 only.
+}
+\source{
+ \itemize{
+ \item Source:\cr
+ - all examples taken from Genbank 64.1 (ftp site:
+ genbank.bio.net)\cr
+ - categories "ei" and "ie" include every "split-gene"
+ for primates in Genbank 64.1\cr
+ - non-splice examples taken from sequences known not to include
+ a splicing site\cr
+ \item Donor: G. Towell, M. Noordewier, and J. Shavlik,
+ {towell,shavlik}@cs.wisc.edu, noordewi at cs.rutgers.edu
+ }
+ These data have been taken from:
+ \itemize{
+ \item ftp.stams.strath.ac.uk/pub/Statlog
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+ machine learning:\cr
+ -- M. O. Noordewier and G. G. Towell and J. W. Shavlik, 1991;
+ "Training Knowledge-Based Neural Networks to Recognize Genes in
+ DNA Sequences". Advances in Neural Information Processing Systems,
+ volume 3, Morgan Kaufmann.
+
+ -- G. G. Towell and J. W. Shavlik and M. W. Craven, 1991;
+ "Constructive Induction in Knowledge-Based Neural Networks",
+ In Proceedings of the Eighth International Machine Learning
+ Workshop, Morgan Kaufmann.
+
+ -- G. G. Towell, 1991;
+ "Symbolic Knowledge and Neural Networks: Insertion, Refinement, and
+ Extraction", PhD Thesis, University of Wisconsin - Madison.
+
+ -- G. G. Towell and J. W. Shavlik, 1992;
+ "Interpretation of Artificial Neural Networks: Mapping
+ Knowledge-based Neural Networks into Rules", In Advances in Neural
+ Information Processing Systems, volume 4, Morgan Kaufmann.
+}
+\keyword{datasets}
+\examples{
+data(DNA)
+summary(DNA)
+}
diff --git a/man/Glass.Rd b/man/Glass.Rd
new file mode 100644
index 0000000..bfdfb1d
--- /dev/null
+++ b/man/Glass.Rd
@@ -0,0 +1,55 @@
+\name{Glass}
+\alias{Glass}
+\title{Glass Identification Database}
+\usage{data(Glass)}
+\keyword{datasets}
+\description{A data frame with 214 observation containing examples of
+ the chemical analysis of 7 different types of glass. The problem is to
+ forecast the type of class on basis of the chemical analysis. The
+ study of classification of types of glass was motivated by
+ criminological investigation. At the scene of the crime, the glass left
+ can be used as evidence (if it is correctly identified!).
+}
+\format{
+ A data frame with 214 observations on 10 variables:
+ \tabular{cll}{
+ [,1] \tab RI \tab refractive index\cr
+ [,2] \tab Na \tab Sodium\cr
+ [,3] \tab Mg \tab Magnesium\cr
+ [,4] \tab Al \tab Aluminum\cr
+ [,5] \tab Si \tab Silicon\cr
+ [,6] \tab K \tab Potassium\cr
+ [,7] \tab Ca \tab Calcium\cr
+ [,8] \tab Ba \tab Barium\cr
+ [,9] \tab Fe \tab Iron \cr
+[,10] \tab Type \tab Type of glass (class attribute) \cr
+}
+}
+\source{
+ \itemize{
+ \item Creator: B. German, Central Research Establishment, Home
+ Office Forensic Science Service, Aldermaston, Reading, Berkshire
+ RG7 4PN
+ \item Donor: Vina Spiehler, Ph.D., DABFT, Diagnostic Products
+ Corporation
+ }
+
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Friedrich Leisch.
+}
+\references{
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\examples{
+data(Glass)
+summary(Glass)
+}
diff --git a/man/HouseVotes84.Rd b/man/HouseVotes84.Rd
new file mode 100644
index 0000000..c71cd9b
--- /dev/null
+++ b/man/HouseVotes84.Rd
@@ -0,0 +1,64 @@
+\name{HouseVotes84}
+\alias{HouseVotes84}
+\title{United States Congressional Voting Records 1984}
+\usage{data(HouseVotes84)}
+\description{
+ This data set includes votes for each of the U.S. House of
+ Representatives Congressmen on the 16 key votes identified by the
+ CQA. The CQA lists nine different types of votes: voted for, paired
+ for, and announced for (these three simplified to yea), voted
+ against, paired against, and announced against (these three
+ simplified to nay), voted present, voted present to avoid conflict
+ of interest, and did not vote or otherwise make a position known
+ (these three simplified to an unknown disposition).
+}
+\keyword{datasets}
+\format{
+ A data frame with 435 observations on 17 variables:
+ \tabular{rl}{
+ 1 \tab Class Name: 2 (democrat, republican)\cr
+ 2 \tab handicapped-infants: 2 (y,n)\cr
+ 3 \tab water-project-cost-sharing: 2 (y,n)\cr
+ 4 \tab adoption-of-the-budget-resolution: 2 (y,n)\cr
+ 5 \tab physician-fee-freeze: 2 (y,n)\cr
+ 6 \tab el-salvador-aid: 2 (y,n)\cr
+ 7 \tab religious-groups-in-schools: 2 (y,n)\cr
+ 8 \tab anti-satellite-test-ban: 2 (y,n)\cr
+ 9 \tab aid-to-nicaraguan-contras: 2 (y,n)\cr
+ 10 \tab mx-missile: 2 (y,n)\cr
+ 11 \tab immigration: 2 (y,n)\cr
+ 12 \tab synfuels-corporation-cutback: 2 (y,n)\cr
+ 13 \tab education-spending: 2 (y,n)\cr
+ 14 \tab superfund-right-to-sue: 2 (y,n)\cr
+ 15 \tab crime: 2 (y,n)\cr
+ 16 \tab duty-free-exports: 2 (y,n)\cr
+ 17 \tab export-administration-act-south-africa: 2 (y,n)\cr
+ }
+}
+\source{
+ \itemize{
+ \item Source: Congressional Quarterly Almanac, 98th Congress,
+ 2nd session 1984, Volume XL: Congressional Quarterly Inc.,
+ ington, D.C., 1985
+ \item Donor: Jeff Schlimmer (Jeffrey.Schlimmer at a.gp.cs.cmu.edu)
+ }
+
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Friedrich Leisch.
+}
+\references{
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\examples{
+data(HouseVotes84)
+summary(HouseVotes84)
+}
diff --git a/man/Ionosphere.Rd b/man/Ionosphere.Rd
new file mode 100644
index 0000000..7cb5edb
--- /dev/null
+++ b/man/Ionosphere.Rd
@@ -0,0 +1,74 @@
+\name{Ionosphere}
+\title{Johns Hopkins University Ionosphere database}
+\usage{data(Ionosphere)}
+\alias{Ionosphere}
+\format{A data frame with 351 observations on 35 independent variables, some
+ numerical and 2 nominal, and one last defining the class.}
+
+\description{
+ This radar data was collected by a system in Goose Bay, Labrador. This
+ system consists of a phased array of 16 high-frequency antennas with a
+ total transmitted power on the order of 6.4 kilowatts. See the paper
+ for more details. The targets were free electrons in the ionosphere.
+ "good" radar returns are those showing evidence of some type of structure
+ in the ionosphere. "bad" returns are those that do not; their signals pass
+ through the ionosphere.
+
+ Received signals were processed using an autocorrelation function whose
+ arguments are the time of a pulse and the pulse number. There were 17
+ pulse numbers for the Goose Bay system. Instances in this databse are
+ described by 2 attributes per pulse number, corresponding to the complex
+ values returned by the function resulting from the complex electromagnetic
+ signal. See cited below for more details.}
+\source{
+ \itemize{
+ \item Source: Space Physics Group; Applied Physics Laboratory;
+ Johns Hopkins University; Johns Hopkins Road; Laurel; MD 20723
+ \item Donor: Vince Sigillito (vgs at aplcen.apl.jhu.edu)
+ }
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+ Sigillito, V. G., Wing, S. P., Hutton, L. V., \& Baker, K. B. (1989).
+ Classification of radar returns from the ionosphere using neural
+ networks. Johns Hopkins APL Technical Digest, 10, 262-266.
+
+ They investigated using backprop and the perceptron training algorithm
+ on this database. Using the first 200 instances for training, which
+ were carefully split almost 50\% positive and 50\% negative, they found
+ that a "linear" perceptron attained 90.7\%, a "non-linear" perceptron
+ attained 92\%, and backprop an average of over 96\% accuracy on the
+ remaining 150 test instances, consisting of 123 "good" and only 24 "bad"
+ instances. (There was a counting error or some mistake somewhere; there
+ are a total of 351 rather than 350 instances in this domain.) Accuracy
+ on "good" instances was much higher than for "bad" instances. Backprop
+ was tested with several different numbers of hidden units (in [0,15])
+ and incremental results were also reported (corresponding to how well
+ the different variants of backprop did after a periodic number of
+ epochs).
+
+ David Aha (aha at ics.uci.edu) briefly investigated this database.
+ He found that nearest neighbor attains an accuracy of 92.1\%, that
+ Ross Quinlan's C4 algorithm attains 94.0\% (no windowing), and that
+ IB3 (Aha \& Kibler, IJCAI-1989) attained 96.7\% (parameter settings:
+ 70\% and 80\% for acceptance and dropping respectively).
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+
+}
+\keyword{datasets}
+\examples{
+data(Ionosphere)
+summary(Ionosphere)
+}
+
diff --git a/man/LetterRecognition.Rd b/man/LetterRecognition.Rd
new file mode 100644
index 0000000..95ca0bb
--- /dev/null
+++ b/man/LetterRecognition.Rd
@@ -0,0 +1,76 @@
+\name{LetterRecognition}
+\title{Letter Image Recognition Data}
+\usage{data(LetterRecognition)}
+\alias{LetterRecognition}
+\format{A data frame with 20,000 observations on 17 variables, the first
+ is a factor with levels A-Z, the remaining 16 are numeric.
+
+ \tabular{rll}{
+ [,1] \tab lettr \tab capital letter\cr
+ [,2] \tab x.box \tab horizontal position of box\cr
+ [,3] \tab y.box \tab vertical position of box\cr
+ [,4] \tab width \tab width of box\cr
+ [,5] \tab high \tab height of box\cr
+ [,6] \tab onpix \tab total number of on pixels\cr
+ [,7] \tab x.bar \tab mean x of on pixels in box\cr
+ [,8] \tab y.bar \tab mean y of on pixels in box\cr
+ [,9] \tab x2bar \tab mean x variance\cr
+[,10] \tab y2bar \tab mean y variance\cr
+[,11] \tab xybar \tab mean x y correlation\cr
+[,12] \tab x2ybr \tab mean of \eqn{x^2 y} \cr
+[,13] \tab xy2br \tab mean of \eqn{x y^2} \cr
+[,14] \tab x.ege \tab mean edge count left to right\cr
+[,15] \tab xegvy \tab correlation of x.ege with y\cr
+[,16] \tab y.ege \tab mean edge count bottom to top\cr
+[,17] \tab yegvx \tab correlation of y.ege with x\cr
+ }
+}
+\description{
+ The objective is to identify each of a large number of black-and-white
+ rectangular pixel displays as one of the 26 capital letters in the English
+ alphabet. The character images were based on 20 different fonts and each
+ letter within these 20 fonts was randomly distorted to produce a file of
+ 20,000 unique stimuli. Each stimulus was converted into 16 primitive
+ numerical attributes (statistical moments and edge counts) which were then
+ scaled to fit into a range of integer values from 0 through 15. We
+ typically train on the first 16000 items and then use the resulting model
+ to predict the letter category for the remaining 4000. See the article
+ cited below for more details.
+}
+\source{
+ \itemize{
+ \item Creator: David J. Slate
+ \item Odesta Corporation; 1890 Maple Ave; Suite 115; Evanston, IL 60201
+ \item Donor: David J. Slate (dave at math.nwu.edu) (708) 491-3867
+ }
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Friedrich Leisch.
+}
+\references{
+ P. W. Frey and D. J. Slate (Machine Learning Vol 6/2 March 91):
+ "Letter Recognition Using Holland-style Adaptive Classifiers".
+
+ The research for this article investigated the ability of several
+ variations of Holland-style adaptive classifier systems to learn to
+ correctly guess the letter categories associated with vectors of 16
+ integer attributes extracted from raster scan images of the letters.
+ The best accuracy obtained was a little over 80\%. It would be
+ interesting to see how well other methods do with the same data.
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\keyword{datasets}
+\examples{
+data(LetterRecognition)
+summary(LetterRecognition)
+}
+
diff --git a/man/Ozone.Rd b/man/Ozone.Rd
new file mode 100644
index 0000000..4fad4df
--- /dev/null
+++ b/man/Ozone.Rd
@@ -0,0 +1,38 @@
+\name{Ozone}
+\alias{Ozone}
+\title{Los Angeles ozone pollution data, 1976}
+\usage{data(Ozone)}
+\keyword{datasets}
+\description{A data frame with 366 observations on 13 variables, each
+ observation is one day}
+\format{
+ \tabular{rl}{
+ 1 \tab Month: 1 = January, ..., 12 = December\cr
+ 2 \tab Day of month\cr
+ 3 \tab Day of week: 1 = Monday, ..., 7 = Sunday\cr
+ 4 \tab Daily maximum one-hour-average ozone reading\cr
+ 5 \tab 500 millibar pressure height (m) measured at Vandenberg AFB\cr
+ 6 \tab Wind speed (mph) at Los Angeles International Airport (LAX)\cr
+ 7 \tab Humidity (\%) at LAX\cr
+ 8 \tab Temperature (degrees F) measured at Sandburg, CA\cr
+ 9 \tab Temperature (degrees F) measured at El Monte, CA\cr
+ 10 \tab Inversion base height (feet) at LAX\cr
+ 11 \tab Pressure gradient (mm Hg) from LAX to Daggett, CA\cr
+ 12 \tab Inversion base temperature (degrees F) at LAX\cr
+ 13 \tab Visibility (miles) measured at LAX\cr
+ }
+}
+\details{
+The problem is to predict the daily maximum one-hour-average
+ozone reading (V4).
+}
+\source{
+ Leo Breiman, Department of Statistics, UC Berkeley. Data used in
+ Leo Breiman and Jerome H. Friedman (1985), Estimating optimal
+ transformations for multiple regression and correlation, JASA, 80, pp.
+ 580-598.
+}
+\examples{
+data(Ozone)
+summary(Ozone)
+}
diff --git a/man/PimaIndiansDiabetes.Rd b/man/PimaIndiansDiabetes.Rd
new file mode 100644
index 0000000..04f0057
--- /dev/null
+++ b/man/PimaIndiansDiabetes.Rd
@@ -0,0 +1,74 @@
+\name{PimaIndiansDiabetes}
+\alias{PimaIndiansDiabetes}
+\alias{PimaIndiansDiabetes2}
+\title{Pima Indians Diabetes Database}
+\usage{
+ data(PimaIndiansDiabetes)
+ data(PimaIndiansDiabetes2)
+}
+\keyword{datasets}
+\description{
+ A data frame with 768 observations on 9 variables.}
+\format{
+ \tabular{rl}{
+ pregnant \tab Number of times pregnant\cr
+ glucose \tab Plasma glucose concentration (glucose tolerance test)\cr
+ pressure \tab Diastolic blood pressure (mm Hg)\cr
+ triceps \tab Triceps skin fold thickness (mm)\cr
+ insulin \tab 2-Hour serum insulin (mu U/ml)\cr
+ mass \tab Body mass index (weight in kg/(height in m)\^2)\cr
+ pedigree \tab Diabetes pedigree function\cr
+ age \tab Age (years)\cr
+ diabetes \tab Class variable (test for diabetes)\cr
+ }
+}
+\details{The data set \code{PimaIndiansDiabetes2} contains a corrected
+ version of the original data set. While the UCI repository index
+ claims that there are no missing values, closer inspection of the data
+ shows several physical impossibilities, e.g., blood pressure or body
+ mass index of 0. In \code{PimaIndiansDiabetes2}, all zero values of
+ \code{glucose}, \code{pressure}, \code{triceps}, \code{insulin} and
+ \code{mass} have been set to \code{NA}, see also Wahba et al (1995)
+ and Ripley (1996).
+}
+\source{
+ \itemize{
+ \item Original owners: National Institute of Diabetes and Digestive and
+ Kidney Diseases
+ \item Donor of database: Vincent Sigillito
+ (vgs at aplcen.apl.jhu.edu)
+ }
+
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Friedrich Leisch.
+}
+\references{
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+
+ Brian D. Ripley (1996), Pattern Recognition and Neural Networks,
+ Cambridge University Press, Cambridge.
+
+ Grace Whaba, Chong Gu, Yuedong Wang, and Richard Chappell (1995),
+ Soft Classification a.k.a. Risk Estimation via Penalized Log
+ Likelihood and Smoothing Spline Analysis of Variance, in D. H.
+ Wolpert (1995), The Mathematics of Generalization, 331-359,
+ Addison-Wesley, Reading, MA.
+}
+\examples{
+ data(PimaIndiansDiabetes)
+ summary(PimaIndiansDiabetes)
+
+ data(PimaIndiansDiabetes2)
+ summary(PimaIndiansDiabetes2)
+}
+
+
diff --git a/man/Satellite.Rd b/man/Satellite.Rd
new file mode 100644
index 0000000..e2a0d10
--- /dev/null
+++ b/man/Satellite.Rd
@@ -0,0 +1,113 @@
+\name{Satellite}
+\alias{Satellite}
+\title{Landsat Multi-Spectral Scanner Image Data}
+\description{
+ The database consists of the multi-spectral values of pixels in 3x3
+ neighbourhoods in a satellite image, and the classification associated
+ with the central pixel in each neighbourhood. The aim is to predict
+ this classification, given the multi-spectral values.
+}
+\usage{data(Satellite)}
+\format{
+ A data frame with 36 inputs (\code{x.1 \ldots x.36}) and one target
+ (\code{classes}).
+}
+\details{
+ One frame of Landsat MSS imagery consists of four digital images of
+ the same scene in different spectral bands. Two of these are in the
+ visible region (corresponding approximately to green and red regions
+ of the visible spectrum) and two are in the (near) infra-red. Each
+ pixel is a 8-bit binary word, with 0 corresponding to black and 255 to
+ white. The spatial resolution of a pixel is about 80m x 80m. Each
+ image contains 2340 x 3380 such pixels.
+
+ The database is a (tiny) sub-area of a scene, consisting of 82 x 100
+ pixels. Each line of data corresponds to a 3x3 square neighbourhood of
+ pixels completely contained within the 82x100 sub-area. Each line
+ contains the pixel values in the four spectral bands (converted to
+ ASCII) of each of the 9 pixels in the 3x3 neighbourhood and a number
+ indicating the classification label of the central pixel.
+
+ The classes are
+ \tabular{l}{
+ red soil\cr
+ cotton crop\cr
+ grey soil\cr
+ damp grey soil\cr
+ soil with vegetation stubble\cr
+ very damp grey soil\cr
+ }
+
+ The data is given in random order and certain lines of data have been
+ removed so you cannot reconstruct the original image from this
+ dataset.
+
+ In each line of data the four spectral values for the top-left pixel
+ are given first followed by the four spectral values for the
+ top-middle pixel and then those for the top-right pixel, and so on
+ with the pixels read out in sequence left-to-right and top-to-bottom.
+ Thus, the four spectral values for the central pixel are given by
+ attributes 17,18,19 and 20. If you like you can use only these four
+ attributes, while ignoring the others. This avoids the problem which
+ arises when a 3x3 neighbourhood straddles a boundary.
+}
+\section{Origin}{
+ The original Landsat data for this database was generated from data
+ purchased from NASA by the Australian Centre for Remote Sensing, and
+ used for research at: The Centre for Remote Sensing, University of New
+ South Wales, Kensington, PO Box 1, NSW 2033, Australia.
+
+ The sample database was generated taking a small section (82 rows and
+ 100 columns) from the original data. The binary values were converted
+ to their present ASCII form by Ashwin Srinivasan. The classification
+ for each pixel was performed on the basis of an actual site visit by
+ Ms. Karen Hall, when working for Professor John A. Richards, at the
+ Centre for Remote Sensing at the University of New South Wales,
+ Australia. Conversion to 3x3 neighbourhoods and splitting into test
+ and training sets was done by Alistair Sutherland.
+}
+\section{History}{
+ The Landsat satellite data is one of the many sources of information
+ available for a scene. The interpretation of a scene by integrating
+ spatial data of diverse types and resolutions including multispectral
+ and radar data, maps indicating topography, land use etc. is expected
+ to assume significant importance with the onset of an era characterised
+ by integrative approaches to remote sensing (for example, NASA's Earth
+ Observing System commencing this decade). Existing statistical methods
+ are ill-equipped for handling such diverse data types. Note that this
+ is not true for Landsat MSS data considered in isolation (as in
+ this sample database). This data satisfies the important requirements
+ of being numerical and at a single resolution, and standard
+ maximum-likelihood classification performs very well. Consequently,
+ for this data, it should be interesting to compare the performance
+ of other methods against the statistical approach.
+}
+\source{
+ Ashwin Srinivasan,
+ Department of Statistics and Data Modeling,
+ University of Strathclyde,
+ Glasgow,
+ Scotland,
+ UK,
+ \email{ross at uk.ac.turing}
+
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Friedrich Leisch.
+}
+\keyword{datasets}
+\references{
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\examples{
+data(Satellite)
+summary(Satellite)
+}
diff --git a/man/Servo.Rd b/man/Servo.Rd
new file mode 100644
index 0000000..3556a31
--- /dev/null
+++ b/man/Servo.Rd
@@ -0,0 +1,56 @@
+\name{Servo}
+\title{Servo Data}
+\usage{data(Servo)}
+\alias{Servo}
+\format{A data frame with 167 observations on 5 variables, 4 nominal and
+ 1 as the target class.}
+
+\description{This data set is from a simulation of a servo system
+ involving a servo amplifier, a motor, a lead screw/nut, and a
+ sliding carriage of some sort. It may have been on of the
+ translational axes of a robot on the 9th floor of the AI lab. In any
+ case, the output value is almost certainly a rise time, or the time
+ required for the system to respond to a step change in a position
+ set point. The variables that describe the data set and their values
+ are the following:
+
+ \tabular{cll}{
+ [,1] \tab Motor \tab A,B,C,D,E\cr
+ [,2] \tab Screw \tab A,B,C,D,E\cr
+ [,3] \tab Pgain \tab 3,4,5,6\cr
+ [,4] \tab Vgain \tab 1,2,3,4,5\cr
+ [,5] \tab Class \tab 0.13 to 7.10
+ }
+}
+\source{
+ \itemize{
+ \item Creator: Karl Ulrich (MIT) in 1986
+ \item Donor: Ross Quinlan
+ }
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+ 1. Quinlan, J.R., "Learning with continuous classes", Proc. 5th
+ Australian Joint Conference on AI (eds A. Adams and L. Sterling),
+ Singapore: World Scientific, 1992
+ 2. Quinlan, J.R., "Combining instance-based and model-based
+ learning", Proc. ML'93 (ed P.E. Utgoff), San Mateo: Morgan Kaufmann
+ 1993
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\keyword{datasets}
+\examples{
+data(Servo)
+summary(Servo)
+}
diff --git a/man/Shuttle.Rd b/man/Shuttle.Rd
new file mode 100644
index 0000000..8ddecb8
--- /dev/null
+++ b/man/Shuttle.Rd
@@ -0,0 +1,42 @@
+\name{Shuttle}
+\title{Shuttle Dataset (Statlog version)}
+\usage{data(Shuttle)}
+\alias{Shuttle}
+\format{A data frame with 58,000 observations on 9 numerical independent
+ variables and 1 target class.}
+
+\description{The shuttle dataset contains 9 attributes all of which are
+ numerical with the first one being time. The last column is the class
+ with the following 7 levels: Rad.Flow, Fpv.Close, Fpv.Open, High, Bypass,
+ Bpv.Close, Bpv.Open.
+
+ Approximately 80\% of the data belongs to class 1. Therefore the
+ default accuracy is about 80\%. The aim here is to obtain an
+ accuracy of 99 - 99.9\%.
+
+}
+\source{
+ \itemize{
+ \item Source: Jason Catlett of Basser Department of Computer
+ Science; University of Sydney; N.S.W.; Australia.
+ }
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+\keyword{datasets}
+\references{
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\examples{
+data(Shuttle)
+summary(Shuttle)
+}
diff --git a/man/Sonar.Rd b/man/Sonar.Rd
new file mode 100644
index 0000000..20fe344
--- /dev/null
+++ b/man/Sonar.Rd
@@ -0,0 +1,60 @@
+\name{Sonar}
+\title{Sonar, Mines vs. Rocks}
+\usage{data(Sonar)}
+\alias{Sonar}
+\format{A data frame with 208 observations on 61 variables, all numerical and one (the Class) nominal.}
+
+\description{This is the data set used by Gorman and Sejnowski in their
+ study of the classification of sonar signals using a neural network
+ [1]. The task is to train a network to discriminate between sonar
+ signals bounced off a metal cylinder and those bounced off a roughly
+ cylindrical rock.
+
+ Each pattern is a set of 60 numbers in the range 0.0 to 1.0. Each
+ number represents the energy within a particular frequency band,
+ integrated over a certain period of time. The integration aperture
+ for higher frequencies occur later in time, since these frequencies
+ are transmitted later during the chirp.
+
+ The label associated with each record contains the letter "R" if the
+ object is a rock and "M" if it is a mine (metal cylinder). The
+ numbers in the labels are in increasing order of aspect angle, but
+ they do not encode the angle directly.
+}
+
+\source{
+ \itemize{
+ \item Contribution: Terry Sejnowski, Salk Institute and
+ University of California, San Deigo.
+ \item Development: R. Paul Gorman, Allied-Signal Aerospace
+ Technology Center.
+ \item Maintainer: Scott E. Fahlman
+
+ }
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+
+\references{
+ Gorman, R. P., and Sejnowski, T. J. (1988). "Analysis of Hidden
+ Units in a Layered Network Trained to Classify Sonar Targets" in
+ Neural Networks, Vol. 1, pp. 75-89.
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\keyword{datasets}
+\examples{
+data(Sonar)
+summary(Sonar)
+}
+
+
diff --git a/man/Soybean.Rd b/man/Soybean.Rd
new file mode 100644
index 0000000..d07efb3
--- /dev/null
+++ b/man/Soybean.Rd
@@ -0,0 +1,106 @@
+\name{Soybean}
+\title{Soybean Database}
+\usage{data(Soybean)}
+\alias{Soybean}
+\format{A data frame with 683 observations on 36 variables. There are 35
+ categorical attributes, all numerical and a nominal denoting the
+ class.
+ \tabular{cll}{
+ [,1] \tab Class \tab the 19 classes\cr
+ [,2] \tab date \tab
+ apr(0),may(1),june(2),july(3),aug(4),sept(5),oct(6).\cr
+ [,3] \tab plant.stand \tab normal(0),lt-normal(1).\cr
+ [,4] \tab precip \tab lt-norm(0),norm(1),gt-norm(2).\cr
+ [,5] \tab temp \tab lt-norm(0),norm(1),gt-norm(2).\cr
+ [,6] \tab hail \tab yes(0),no(1).\cr
+ [,7] \tab crop.hist \tab dif-lst-yr(0),s-l-y(1),s-l-2-y(2),
+ s-l-7-y(3).\cr
+ [,8] \tab area.dam \tab
+ scatter(0),low-area(1),upper-ar(2),whole-field(3).\cr
+ [,9] \tab sever \tab minor(0),pot-severe(1),severe(2).\cr
+ [,10] \tab seed.tmt \tab none(0),fungicide(1),other(2).\cr
+ [,11] \tab germ \tab 90-100\%(0),80-89\%(1),lt-80\%(2).\cr
+ [,12] \tab plant.growth \tab norm(0),abnorm(1).\cr
+ [,13] \tab leaves \tab norm(0),abnorm(1).\cr
+ [,14] \tab leaf.halo \tab
+ absent(0),yellow-halos(1),no-yellow-halos(2).\cr
+ [,15] \tab leaf.marg \tab w-s-marg(0),no-w-s-marg(1),dna(2).\cr
+ [,16] \tab leaf.size \tab lt-1/8(0),gt-1/8(1),dna(2).\cr
+ [,17] \tab leaf.shread \tab absent(0),present(1).\cr
+ [,18] \tab leaf.malf \tab absent(0),present(1).\cr
+ [,19] \tab leaf.mild \tab absent(0),upper-surf(1),lower-surf(2).\cr
+ [,20] \tab stem \tab norm(0),abnorm(1).\cr
+ [,21] \tab lodging \tab yes(0),no(1).\cr
+ [,22] \tab stem.cankers \tab
+ absent(0),below-soil(1),above-s(2),ab-sec-nde(3).\cr
+ [,23] \tab canker.lesion \tab dna(0),brown(1),dk-brown-blk(2),tan(3).\cr
+ [,24] \tab fruiting.bodies \tab absent(0),present(1).\cr
+ [,25] \tab ext.decay \tab absent(0),firm-and-dry(1),watery(2).\cr
+ [,26] \tab mycelium \tab absent(0),present(1).\cr
+ [,27] \tab int.discolor \tab none(0),brown(1),black(2).\cr
+ [,28] \tab sclerotia \tab absent(0),present(1).\cr
+ [,29] \tab fruit.pods \tab norm(0),diseased(1),few-present(2),dna(3).\cr
+ [,30] \tab fruit.spots \tab
+ absent(0),col(1),br-w/blk-speck(2),distort(3),dna(4).\cr
+ [,31] \tab seed \tab norm(0),abnorm(1).\cr
+ [,32] \tab mold.growth \tab absent(0),present(1).\cr
+ [,33] \tab seed.discolor \tab absent(0),present(1).\cr
+ [,34] \tab seed.size \tab norm(0),lt-norm(1).\cr
+ [,35] \tab shriveling \tab absent(0),present(1).\cr
+ [,36] \tab roots \tab norm(0),rotted(1),galls-cysts(2).
+
+}
+ }
+
+\description{
+ There are 19 classes, only the first 15 of which have been used in prior
+ work. The folklore seems to be that the last four classes are
+ unjustified by the data since they have so few examples.
+ There are 35 categorical attributes, some nominal and some ordered. The
+ value ``dna'' means does not apply. The values for attributes are
+ encoded numerically, with the first value encoded as ``0,'' the second as
+ ``1,'' and so forth.
+ }
+\source{
+ \itemize{
+ \item Source: R.S. Michalski and R.L. Chilausky "Learning by
+ Being Told and Learning from Examples: An Experimental
+ Comparison of the Two Methods of Knowledge Acquisition in the
+ Context of Developing an Expert System for Soybean Disease
+ Diagnosis", International Journal of Policy Analysis and
+ Information Systems, Vol. 4, No. 2, 1980.
+ \item Donor: Ming Tan & Jeff Schlimmer (Jeff.Schlimmer\%cs.cmu.edu)
+ }
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+\references{
+ Tan, M., & Eshelman, L. (1988). Using weighted networks to represent
+ classification knowledge in noisy domains. Proceedings of the Fifth
+ International Conference on Machine Learning (pp. 121-134). Ann Arbor,
+ Michigan: Morgan Kaufmann.
+ -- IWN recorded a 97.1\% classification accuracy
+ -- 290 training and 340 test instances
+
+ Fisher,D.H. & Schlimmer,J.C. (1988). Concept Simplification and
+ Predictive Accuracy. Proceedings of the Fifth
+ International Conference on Machine Learning (pp. 22-28). Ann Arbor,
+ Michigan: Morgan Kaufmann.
+ -- Notes why this database is highly predictable
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\keyword{datasets}
+\examples{
+data(Soybean)
+summary(Soybean)
+}
diff --git a/man/Vehicle.Rd b/man/Vehicle.Rd
new file mode 100644
index 0000000..b0f5991
--- /dev/null
+++ b/man/Vehicle.Rd
@@ -0,0 +1,80 @@
+\name{Vehicle}
+\alias{Vehicle}
+\title{Vehicle Silhouettes}
+\usage{data(Vehicle)}
+
+\keyword{datasets}
+\format{
+ A data frame with 846 observations on 19 variables, all numerical
+ and one nominal defining the class of the objects.
+
+ \tabular{cll}{
+ [,1] \tab Comp \tab Compactness\cr
+ [,2] \tab Circ \tab Circularity\cr
+ [,3] \tab D.Circ \tab Distance Circularity\cr
+ [,4] \tab Rad.Ra \tab Radius ratio\cr
+ [,5] \tab Pr.Axis.Ra \tab pr.axis aspect ratio\cr
+ [,6] \tab Max.L.Ra \tab max.length aspect ratio\cr
+ [,7] \tab Scat.Ra \tab scatter ratio\cr
+ [,8] \tab Elong \tab elongatedness\cr
+ [,9] \tab Pr.Axis.Rect \tab pr.axis rectangularity\cr
+ [,10] \tab Max.L.Rect \tab max.length rectangularity\cr
+ [,11] \tab Sc.Var.Maxis \tab scaled variance along major axis\cr
+ [,12] \tab Sc.Var.maxis \tab scaled variance along minor axis\cr
+ [,13] \tab Ra.Gyr \tab scaled radius of gyration\cr
+ [,14] \tab Skew.Maxis \tab skewness about major axis\cr
+ [,15] \tab Skew.maxis \tab skewness about minor axis\cr
+ [,16] \tab Kurt.maxis \tab kurtosis about minor axis\cr
+ [,17] \tab Kurt.Maxis \tab kurtosis about major axis\cr
+ [,18] \tab Holl.Ra \tab hollows ratio\cr
+ [,19] \tab Class \tab type
+ }
+}
+\description{
+ The purpose is to classify a given silhouette as one of four types
+ of vehicle, using a set of features extracted from the
+ silhouette. The vehicle may be viewed from one of many different
+ angles. The features were extracted from the silhouettes by the HIPS
+ (Hierarchical Image Processing System) extension BINATTS, which
+ extracts a combination of scale independent features utilising both
+ classical moments based measures such as scaled variance, skewness
+ and kurtosis about the major/minor axes and heuristic measures such
+ as hollows, circularity, rectangularity and compactness.
+
+ Four "Corgie" model vehicles were used for the experiment: a double
+ decker bus, Cheverolet van, Saab 9000 and an Opel Manta 400. This
+ particular combination of vehicles was chosen with the expectation
+ that the bus, van and either one of the cars would be readily
+ distinguishable, but it would be more difficult to distinguish
+ between the cars.
+}
+\source{
+ \itemize{
+ \item Creator: Drs.Pete Mowforth and Barry Shepherd, Turing
+ Institute, Glasgow, Scotland.
+ }
+
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+
+\references{
+ Turing Institute Research Memorandum TIRM-87-018 "Vehicle
+ Recognition Using Rule Based Methods" by Siebert,JP (March 1987)
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+ }
+
+ \examples{
+data(Vehicle)
+summary(Vehicle)
+}
diff --git a/man/Vowel.Rd b/man/Vowel.Rd
new file mode 100644
index 0000000..6263ee9
--- /dev/null
+++ b/man/Vowel.Rd
@@ -0,0 +1,53 @@
+\name{Vowel}
+\alias{Vowel}
+\title{Vowel Recognition (Deterding data)}
+\usage{data(Vowel)}
+\keyword{datasets}
+\format{
+ A data frame with 990 observations on 10 independent variables, one
+ nominal and the other numerical, and 1 as the target class.}
+
+\description{Speaker independent recognition of the eleven steady state
+ vowels of British English using a specified training set of lpc
+ derived log area ratios. The vowels are indexed by integers
+ 0-10. For each utterance, there are ten floating-point input values,
+ with array indices 0-9. The vowels are the following: hid, hId, hEd,
+ hAd, hYd, had, hOd, hod, hUd, hud, hed.
+}
+\source{
+ \itemize{
+ \item Creator: Tony Robinson
+ \item Maintainer: Scott E. Fahlman, CMU
+ }
+
+ These data have been taken from the UCI Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ and were converted to R format by Evgenia Dimitriadou.
+}
+
+\references{
+ D. H. Deterding, 1989, University of Cambridge, "Speaker
+ Normalisation for Automatic Speech Recognition", submitted for PhD.
+
+ M. Niranjan and F. Fallside, 1988, Cambridge University Engineering
+ Department, "Neural Networks and Radial Basis Functions in
+ Classifying Static Speech Patterns", CUED/F-INFENG/TR.22.
+
+ Steve Renals and Richard Rohwer, "Phoneme Classification Experiments
+ Using Radial Basis Functions", Submitted to the International Joint
+ Conference on Neural Networks, Washington, 1989.
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\examples{
+data(Vowel)
+summary(Vowel)
+}
diff --git a/man/Zoo.Rd b/man/Zoo.Rd
new file mode 100644
index 0000000..6b06cf5
--- /dev/null
+++ b/man/Zoo.Rd
@@ -0,0 +1,48 @@
+\name{Zoo}
+\alias{Zoo}
+\title{Zoo Data}
+\usage{
+data(Zoo)
+}
+\description{A simple dataset containing 17 (mostly logical) variables
+ on 101 animals.}
+\format{A data frame with 17 columns: hair, feathers, eggs, milk,
+ airborne, aquatic, predator, toothed, backbone, breathes, venomous,
+ fins, legs, tail, domestic, catsize, type.
+
+ Most variables are logical and indicate whether the corresponding
+ animal has the corresponsing characteristic or not. The only 2
+ exceptions are: \code{legs} takes
+ values 0, 2, 4, 5, 6, and 8. \code{type} is a grouping of the animals
+ into 7 groups, see the example section for the detailed list.
+}
+\details{
+ Ask the original donor of the data why \emph{girl} is an animal.
+}
+\references{
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+\source{
+ The original data have been donated by Richard S. Forsyth to the UCI
+ Repository Of Machine Learning
+ Databases at
+ \itemize{
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}.
+ }
+ and were converted to R format by Friedrich Leisch.
+}
+\keyword{datasets}
+\examples{
+data(Zoo)
+summary(Zoo)
+
+## see the annimals grouped by type
+tapply(rownames(Zoo), Zoo$type, function(x) x)
+
+## which animals have fins?
+rownames(Zoo)[Zoo$fins]
+}
diff --git a/man/as.data.frame.mlbench.Rd b/man/as.data.frame.mlbench.Rd
new file mode 100644
index 0000000..e8ba7b4
--- /dev/null
+++ b/man/as.data.frame.mlbench.Rd
@@ -0,0 +1,19 @@
+\name{as.data.frame.mlbench}
+\alias{as.data.frame.mlbench}
+\title{Convert an mlbench object to a dataframe}
+\description{
+ Converts \code{x} (which is basically a list) to a dataframe.
+}
+\usage{
+\method{as.data.frame}{mlbench}(x, row.names=NULL, optional=FALSE, \dots)
+}
+\arguments{
+ \item{x}{Object of class \code{"mlbench"}.}
+ \item{row.names,optional,\dots}{currently ignored.}
+}
+\examples{
+p <- mlbench.xor(5)
+p
+as.data.frame(p)
+}
+\keyword{manip}
diff --git a/man/bayesclass.Rd b/man/bayesclass.Rd
new file mode 100644
index 0000000..5a0bdb2
--- /dev/null
+++ b/man/bayesclass.Rd
@@ -0,0 +1,39 @@
+\name{bayesclass}
+\alias{bayesclass}
+\alias{bayesclass.noerr}
+\alias{bayesclass.mlbench.2dnormals}
+\alias{bayesclass.mlbench.circle}
+\alias{bayesclass.mlbench.xor}
+\alias{bayesclass.mlbench.cassini}
+\alias{bayesclass.mlbench.cuboids}
+\alias{bayesclass.mlbench.twonorm}
+\alias{bayesclass.mlbench.threenorm}
+\alias{bayesclass.mlbench.ringnorm}
+
+\title{Bayes classifier}
+\usage{
+bayesclass(z)
+}
+\arguments{
+ \item{z}{An object of class \code{"mlbench"}.}
+}
+\description{
+ Returns the decision of the (optimal) Bayes classifier for a given
+ data set. This is a generic function, i.e., there are different
+ methods for the various mlbench problems.
+
+ If the classes of the problem do not overlap, then the Bayes
+ decision is identical to the true classification, which is
+ implemented as the dummy function \code{bayesclass.noerr} (which
+ simply returns \code{z$classes} and is used for all problems with
+ disjunct classes).
+}
+\examples{
+# 6 overlapping classes
+p <- mlbench.2dnormals(500,6)
+plot(p)
+
+plot(p$x, col=as.numeric(bayesclass(p)))
+}
+
+\keyword{classif}
diff --git a/man/mlbench.2dnormals.Rd b/man/mlbench.2dnormals.Rd
new file mode 100644
index 0000000..3f9407f
--- /dev/null
+++ b/man/mlbench.2dnormals.Rd
@@ -0,0 +1,30 @@
+\name{mlbench.2dnormals}
+\alias{mlbench.2dnormals}
+\title{2-dimensional Gaussian Problem}
+\usage{
+mlbench.2dnormals(n, cl=2, r=sqrt(cl), sd=1)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{cl}{number of classes}
+ \item{r}{radius at which the centers of the classes are located}
+ \item{sd}{standard deviation of the Gaussians}
+}
+\value{Returns an object of class \code{"bayes.2dnormals"} with components
+ \item{x}{input values}
+ \item{classes}{factor vector of length \code{n} with target classes}
+}
+\description{
+ Each of the \code{cl} classes consists of a 2-dimensional
+ Gaussian. The centers are equally spaced on a circle around the
+ origin with radius \code{r}.
+}
+\examples{
+# 2 classes
+p <- mlbench.2dnormals(500,2)
+plot(p)
+# 6 classes
+p <- mlbench.2dnormals(500,6)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.cassini.Rd b/man/mlbench.cassini.Rd
new file mode 100644
index 0000000..57dc977
--- /dev/null
+++ b/man/mlbench.cassini.Rd
@@ -0,0 +1,28 @@
+\name{mlbench.cassini}
+\alias{mlbench.cassini}
+\title{Cassini: A 2 Dimensional Problem}
+\usage{
+mlbench.cassini(n, relsize=c(2,2,1))
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{relsize}{relative size of the classes (vector of length 3)}
+}
+\value{Returns an object of class \code{"mlbench.cassini"} with components
+ \item{x}{input values}
+ \item{classes}{vector of length \code{n} with target classes}
+}
+\description{
+ The inputs of the cassini problem are uniformly distributed on
+ a \code{2}-dimensional space within 3 structures. The 2 external
+ structures (classes) are banana-shaped structures and in between them, the
+ middle structure (class) is a circle.
+}
+
+\author{Evgenia Dimitriadou and Andreas Weingessel}
+
+\examples{
+p <- mlbench.cassini(5000)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.circle.Rd b/man/mlbench.circle.Rd
new file mode 100644
index 0000000..5de02ea
--- /dev/null
+++ b/man/mlbench.circle.Rd
@@ -0,0 +1,32 @@
+\name{mlbench.circle}
+\alias{mlbench.circle}
+\title{Circle in a Square Problem}
+\usage{
+mlbench.circle(n, d=2)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{d}{dimension of the circle problem}
+}
+\value{Returns an object of class \code{"mlbench.circle"} with components
+ \item{x}{input values}
+ \item{classes}{factor vector of length \code{n} with target classes}
+}
+\description{
+ The inputs of the circle problem are uniformly distributed on
+ the \code{d}-dimensional cube with corners \eqn{\{\pm 1\}}{\{+-1\}}.
+ This is a 2-class problem: The first class is a \code{d}-dimensional
+ ball in the middle of the cube, the remainder forms the second
+ class. The size of the ball is chosen such that both classes have equal
+ prior probability 0.5.
+}
+\examples{
+# 2d example
+p<-mlbench.circle(300,2)
+plot(p)
+#
+# 3d example
+p<-mlbench.circle(300,3)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.cuboids.Rd b/man/mlbench.cuboids.Rd
new file mode 100644
index 0000000..e5a09fa
--- /dev/null
+++ b/man/mlbench.cuboids.Rd
@@ -0,0 +1,32 @@
+\name{mlbench.cuboids}
+\alias{mlbench.cuboids}
+\title{Cuboids: A 3 Dimensional Problem}
+\usage{
+mlbench.cuboids(n, relsize=c(2,2,2,1))
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{relsize}{relative size of the classes (vector of length 4)}
+}
+\value{Returns an object of class \code{"mlbench.cuboids"} with components
+ \item{x}{input values}
+ \item{classes}{vector of length \code{n} with target classes}
+}
+\description{
+ The inputs of the cuboids problem are uniformly distributed on
+ a \code{3}-dimensional space within 3 cuboids and a small
+ cube in the middle of them.
+}
+
+\author{Evgenia Dimitriadou, and Andreas Weingessel}
+
+\examples{
+p <- mlbench.cuboids(7000)
+plot(p)
+\dontrun{
+library(Rggobi)
+g <- ggobi(p$x)
+g$setColors(p$class)
+g$setMode("2D Tour")
+}}
+\keyword{datagen}
diff --git a/man/mlbench.friedman1.Rd b/man/mlbench.friedman1.Rd
new file mode 100644
index 0000000..1c44fd4
--- /dev/null
+++ b/man/mlbench.friedman1.Rd
@@ -0,0 +1,34 @@
+\name{mlbench.friedman1}
+\alias{mlbench.friedman1}
+\title{Benchmark Problem Friedman 1}
+\usage{
+mlbench.friedman1(n, sd=1)
+}
+\arguments{
+\item{n}{number of patterns to create}
+\item{sd}{Standard deviation of noise}
+}
+\description{
+The regression problem Friedman 1 as described in Friedman (1991) and
+Breiman (1996). Inputs are 10 independent variables uniformly
+distributed on the interval \eqn{[0,1]}, only 5 out of these 10 are actually
+used. Outputs are created according to
+the formula
+\deqn{y = 10 \sin(\pi x1 x2) + 20 (x3 - 0.5)^2 + 10 x4 + 5 x5 + e}{
+ y = 10 sin(\pi x1 x2) + 20 (x3 - 0.5)^2
+ + 10 x4 + 5 x5 + e}
+
+where e is N(0,sd).
+}
+\value{Returns a list with components
+\item{x}{input values (independent variables)}
+\item{y}{output values (dependent variable)}
+}
+\references{
+Breiman, Leo (1996) Bagging predictors. Machine Learning 24, pages
+123-140.
+
+Friedman, Jerome H. (1991) Multivariate adaptive regression
+splines. The Annals of Statistics 19 (1), pages 1-67.
+}
+\keyword{datagen}
diff --git a/man/mlbench.friedman2.Rd b/man/mlbench.friedman2.Rd
new file mode 100644
index 0000000..9d23d62
--- /dev/null
+++ b/man/mlbench.friedman2.Rd
@@ -0,0 +1,39 @@
+\name{mlbench.friedman2}
+\alias{mlbench.friedman2}
+\title{Benchmark Problem Friedman 2}
+\usage{
+mlbench.friedman2(n, sd=125)
+}
+\arguments{
+\item{n}{number of patterns to create}
+\item{sd}{Standard deviation of noise. The default value of 125 gives
+a signal to noise ratio (i.e., the ratio of the standard deviations) of
+3:1. Thus, the variance of the function itself (without noise)
+accounts for 90\% of the total variance.}
+}
+\description{
+The regression problem Friedman 2 as described in Friedman (1991) and
+Breiman (1996). Inputs are 4 independent variables uniformly
+distrtibuted over the ranges
+\deqn{0 \le x1 \le 100}
+\deqn{40 \pi \le x2 \le 560 \pi}
+\deqn{0 \le x3 \le 1}
+\deqn{1 \le x4 \le 11}
+
+The outputs are created according to the formula
+\deqn{y = (x1^2 + (x2 x3 - (1/(x2 x4)))^2)^{0.5} + e}
+where e is N(0,sd).
+}
+\value{Returns a list with components
+\item{x}{input values (independent variables)}
+\item{y}{output values (dependent variable)}
+}
+\references{
+Breiman, Leo (1996) Bagging predictors. Machine Learning 24, pages
+123-140.
+
+Friedman, Jerome H. (1991) Multivariate adaptive regression
+splines. The Annals of Statistics 19 (1), pages 1-67.
+}
+\keyword{datagen}
+
diff --git a/man/mlbench.friedman3.Rd b/man/mlbench.friedman3.Rd
new file mode 100644
index 0000000..07e6285
--- /dev/null
+++ b/man/mlbench.friedman3.Rd
@@ -0,0 +1,40 @@
+\name{mlbench.friedman3}
+\alias{mlbench.friedman3}
+\title{Benchmark Problem Friedman 3}
+\usage{
+mlbench.friedman3(n, sd=0.1)
+}
+\arguments{
+\item{n}{number of patterns to create}
+\item{sd}{Standard deviation of noise. The default value of 0.1 gives
+a signal to noise ratio (i.e., the ratio of the standard deviations) of
+3:1. Thus, the variance of the function itself (without noise)
+accounts for 90\% of the total variance.}
+}
+\description{
+The regression problem Friedman 3 as described in Friedman (1991) and
+Breiman (1996). Inputs are 4 independent variables uniformly
+distrtibuted over the ranges
+\deqn{0 \le x1 \le 100}
+\deqn{40 \pi \le x2 \le 560 \pi}
+\deqn{0 \le x3 \le 1}
+\deqn{1 \le x4 \le 11}
+
+The outputs are created according to the formula
+\deqn{y = \mbox{atan}((x2 x3 - (1/(x2 x4)))/x1) + e}{
+ y = atan ((x2 x3 - (1/(x2 x4)))/x1) + e}
+
+where e is N(0,sd).
+}
+\value{Returns a list with components
+\item{x}{input values (independent variables)}
+\item{y}{output values (dependent variable)}
+}
+\references{
+Breiman, Leo (1996) Bagging predictors. Machine Learning 24, pages
+123-140.
+
+Friedman, Jerome H. (1991) Multivariate adaptive regression
+splines. The Annals of Statistics 19 (1), pages 1-67.
+}
+\keyword{datagen}
diff --git a/man/mlbench.hypercube.Rd b/man/mlbench.hypercube.Rd
new file mode 100644
index 0000000..57e841b
--- /dev/null
+++ b/man/mlbench.hypercube.Rd
@@ -0,0 +1,33 @@
+\name{mlbench.hypercube}
+\alias{mlbench.corners}
+\alias{mlbench.hypercube}
+\alias{hypercube}
+\title{Corners of Hypercube}
+\usage{
+mlbench.hypercube(n=800, d=3, sides=rep(1,d), sd=0.1)
+hypercube(d)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{d}{dimensionality of hypercube, default is 3}
+ \item{sides}{lengths of the sides of the hypercube, default is to
+ create a unit hypercube}
+ \item{sd}{standard deviation}
+}
+\value{Returns an object of class \code{"mlbench.hypercube"} with components
+ \item{x}{input values}
+ \item{classes}{factor of length \code{n} with target classes} }
+\description{
+ The created data are \code{d}-dimensional spherical Gaussians with standard
+ deviation \code{sd} and means at the corners of a
+ \code{d}-dimensional hypercube. The number of classes is \eqn{2^d}.
+
+}
+\examples{
+p <- mlbench.hypercube()
+plot(p)
+
+library("lattice")
+cloud(x.3~x.1+x.2, groups=classes, data=as.data.frame(p))
+}
+\keyword{datagen}
diff --git a/man/mlbench.peak.Rd b/man/mlbench.peak.Rd
new file mode 100644
index 0000000..8e06aed
--- /dev/null
+++ b/man/mlbench.peak.Rd
@@ -0,0 +1,23 @@
+\name{mlbench.peak}
+\alias{mlbench.peak}
+\title{Peak Benchmark Problem}
+\usage{
+mlbench.peak(n, d=20)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{d}{dimension of the problem}
+}
+\description{
+ Let \eqn{r=3u} where u is uniform on
+ [0,1]. Take x to be uniformly distributed on the d-dimensional
+ sphere of radius r. Let \eqn{y=25exp(-.5r^2)}. This data set is not a
+ classification problem but a regression problem where y is the
+ dependent variable.
+}
+\value{Returns a list with components
+\item{x}{input values (independent variables)}
+\item{y}{output values (dependent variable)}
+}
+
+\keyword{datagen}
diff --git a/man/mlbench.ringnorm.Rd b/man/mlbench.ringnorm.Rd
new file mode 100644
index 0000000..fada12a
--- /dev/null
+++ b/man/mlbench.ringnorm.Rd
@@ -0,0 +1,31 @@
+\name{mlbench.ringnorm}
+\alias{mlbench.ringnorm}
+\title{Ringnorm Benchmark Problem}
+\usage{
+mlbench.ringnorm(n, d=20)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{d}{dimension of the ringnorm problem}
+}
+\value{Returns an object of class \code{"mlbench.ringnorm"} with components
+ \item{x}{input values}
+ \item{classes}{factor vector of length \code{n} with target classes}
+}
+\description{
+ The inputs of the ringnorm problem are points from two Gaussian
+ distributions. Class 1 is multivariate normal with mean 0 and
+ covariance 4 times the identity matrix. Class 2 has unit covariance
+ and mean \eqn{(a,a,\ldots,a)}, \eqn{a=d^{-0.5}}.
+
+}
+\references{
+ Breiman, L. (1996). Bias, variance, and arcing classifiers.
+ Tech. Rep. 460, Statistics Department, University of California,
+ Berkeley, CA, USA.
+}
+\examples{
+p<-mlbench.ringnorm(1000, d=2)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.shapes.Rd b/man/mlbench.shapes.Rd
new file mode 100644
index 0000000..60a1913
--- /dev/null
+++ b/man/mlbench.shapes.Rd
@@ -0,0 +1,19 @@
+\name{mlbench.shapes}
+\alias{mlbench.shapes}
+\title{Shapes in 2d}
+\usage{
+mlbench.shapes(n=500)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+}
+\value{Returns an object of class \code{"mlbench.shapes"} with components
+ \item{x}{input values}
+ \item{classes}{factor of length \code{n} with target classes}
+}
+\description{A Gaussian, square, triangle and wave in 2 dimensions.}
+\examples{
+p<-mlbench.shapes()
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.simplex.Rd b/man/mlbench.simplex.Rd
new file mode 100644
index 0000000..2fde33d
--- /dev/null
+++ b/man/mlbench.simplex.Rd
@@ -0,0 +1,37 @@
+\name{mlbench.simplex}
+\alias{mlbench.simplex}
+\alias{simplex}
+\title{Corners of d-dimensional Simplex}
+\usage{
+mlbench.simplex(n = 800, d = 3, sides = 1, sd = 0.1, center=TRUE)
+simplex(d, sides, center=TRUE)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{d}{dimensionality of simplex, default is 3}
+ \item{sides}{lengths of the sides of the simplex, default is to
+ create a unit simplex}
+ \item{sd}{standard deviation}
+ \item{center}{If \code{TRUE}, the origin is the center of gravity of
+ the simplex. If \code{FALSE}, the origin is a corner of the
+ simplex and all coordinates of the simplex are positive.}
+}
+\value{Returns an object of class \code{"mlbench.simplex"} with components
+ \item{x}{input values}
+ \item{classes}{factor of length \code{n} with target classes} }
+\description{
+ The created data are \code{d}-dimensional spherical Gaussians with standard
+ deviation \code{sd} and means at the corners of a
+ \code{d}-dimensional simplex. The number of classes is \code{d+1}.
+}
+\author{
+ Manuel Eugster and Sebastian Kaiser
+ }
+\examples{
+p <- mlbench.simplex()
+plot(p)
+
+library("lattice")
+cloud(x.3~x.1+x.2, groups=classes, data=as.data.frame(p))
+}
+\keyword{datagen}
diff --git a/man/mlbench.smiley.Rd b/man/mlbench.smiley.Rd
new file mode 100644
index 0000000..4083429
--- /dev/null
+++ b/man/mlbench.smiley.Rd
@@ -0,0 +1,24 @@
+\name{mlbench.smiley}
+\alias{mlbench.smiley}
+\title{The Smiley}
+\usage{
+mlbench.smiley(n=500, sd1 = 0.1, sd2 = 0.05)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{sd1}{standard deviation for eyes}
+ \item{sd2}{standard deviation for mouth}
+}
+\value{Returns an object of class \code{"mlbench.smiley"} with components
+ \item{x}{input values}
+ \item{classes}{factor vector of length \code{n} with target classes}
+}
+\description{
+ The smiley consists of 2 Gaussian eyes, a trapezoid nose and a
+ parabula mouth (with vertical Gaussian noise).
+}
+\examples{
+p<-mlbench.smiley()
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.spirals.Rd b/man/mlbench.spirals.Rd
new file mode 100644
index 0000000..55f99c6
--- /dev/null
+++ b/man/mlbench.spirals.Rd
@@ -0,0 +1,32 @@
+\name{mlbench.spirals}
+\alias{mlbench.spirals}
+\alias{mlbench.1spiral}
+\title{Two Spirals Benchmark Problem}
+\usage{
+mlbench.spirals(n, cycles=1, sd=0)
+mlbench.1spiral(n, cycles=1, sd=0)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{cycles}{the number of cycles each spiral makes}
+ \item{sd}{standard deviation of data points around the spirals}
+}
+\value{Returns an object of class \code{"mlbench.spirals"} with components
+ \item{x}{input values}
+ \item{classes}{factor vector of length \code{n} with target classes}
+}
+\description{
+ The inputs of the spirals problem are points on two entangled spirals. If
+ \code{sd>0}, then Gaussian noise is added to each data
+ point. \code{mlbench.1spiral} creates a single spiral.
+}
+\examples{
+# 1 cycle each, no noise
+p<-mlbench.spirals(300)
+plot(p)
+#
+# 1.5 cycles each, with noise
+p<-mlbench.spirals(300,1.5,0.05)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.threenorm.Rd b/man/mlbench.threenorm.Rd
new file mode 100644
index 0000000..9fe00d2
--- /dev/null
+++ b/man/mlbench.threenorm.Rd
@@ -0,0 +1,33 @@
+\name{mlbench.threenorm}
+\alias{mlbench.threenorm}
+\title{Threenorm Benchmark Problem}
+\usage{
+mlbench.threenorm(n, d=20)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{d}{dimension of the threenorm problem}
+}
+\value{Returns an object of class \code{"mlbench.threenorm"} with components
+ \item{x}{input values}
+ \item{classes}{factor vector of length \code{n} with target classes}
+}
+\description{
+ The inputs of the threenorm problem are points from two Gaussian
+ distributions with unit covariance matrix. Class 1 is drawn with
+ equal probability from a unit multivariate normal with mean
+ \eqn{(a,a,\ldots,a)} and from a unit multivariate normal with mean
+ \eqn{(-a,-a,\ldots,-a)}. Class 2 is drawn from a multivariate normal
+ with mean at \eqn{(a,-a,a, \ldots,-a)}, \eqn{a=2/d^{0.5}}.
+
+}
+\references{
+ Breiman, L. (1996). Bias, variance, and arcing classifiers.
+ Tech. Rep. 460, Statistics Department, University of California,
+ Berkeley, CA, USA.
+}
+\examples{
+p<-mlbench.threenorm(1000, d=2)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.twonorm.Rd b/man/mlbench.twonorm.Rd
new file mode 100644
index 0000000..77e9784
--- /dev/null
+++ b/man/mlbench.twonorm.Rd
@@ -0,0 +1,31 @@
+\name{mlbench.twonorm}
+\alias{mlbench.twonorm}
+\title{Twonorm Benchmark Problem}
+\usage{
+mlbench.twonorm(n, d=20)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+ \item{d}{dimension of the twonorm problem}
+}
+\value{Returns an object of class \code{"mlbench.twonorm"} with components
+ \item{x}{input values}
+ \item{classes}{factor vector of length \code{n} with target classes}
+}
+\description{
+ The inputs of the twonorm problem are points from two Gaussian
+ distributions with unit covariance matrix. Class 1 is multivariate
+ normal with mean \eqn{(a,a,\ldots,a)} and class 2 with mean
+ \eqn{(-a,-a,\ldots,-a)}, \eqn{a=2/d^{0.5}}.
+
+}
+\references{
+ Breiman, L. (1996). Bias, variance, and arcing classifiers.
+ Tech. Rep. 460, Statistics Department, University of California,
+ Berkeley, CA, USA.
+}
+\examples{
+p<-mlbench.twonorm(1000, d=2)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/mlbench.waveform.Rd b/man/mlbench.waveform.Rd
new file mode 100644
index 0000000..5a57c8f
--- /dev/null
+++ b/man/mlbench.waveform.Rd
@@ -0,0 +1,52 @@
+\name{mlbench.waveform}
+\alias{mlbench.waveform}
+\title{Waveform Database Generator}
+\usage{
+ mlbench.waveform(n)
+}
+\arguments{
+ \item{n}{number of patterns to create}
+}
+
+\value{
+ Returns an object of class \code{"mlbench.waveform"} with components
+ \item{x}{input values}
+ \item{classes}{factor vector of length \code{n} with target classes}
+}
+
+\description{
+ The generated data set consists of 21 attributes with continuous
+ values and a variable showing the 3 classes (33\% for each of 3
+ classes). Each class is generated from a combination of 2 of 3
+ "base" waves.
+ }
+\source{
+ The original C code for the waveform generator hase been taken
+ from the UCI Repository
+ of Machine Learning Databases at
+ \itemize{
+ \item \url{ftp://ftp.ics.uci.edu/pub/machine-learning-databases}
+ \item \url{http://www.ics.uci.edu/~mlearn/MLRepository.html}
+ }
+ The C code has been modified to use R's random number generator
+ by Friedrich Leisch, who also wrote the R interface.
+}
+
+\references{
+ Breiman, L. (1996). Bias, variance, and arcing
+ classifiers. Tech. Rep. 460, Statistics Department, University of
+ California, Berkeley, CA, USA.
+
+ Newman, D.J. & Hettich, S. & Blake, C.L. & Merz, C.J. (1998).
+ UCI Repository of machine learning databases
+ [http://www.ics.uci.edu/~mlearn/MLRepository.html]. Irvine, CA:
+ University of California, Department of Information and Computer
+ Science.
+}
+
+\examples{
+ p<-mlbench.waveform(100)
+ plot(p)
+}
+
+\keyword{datagen}
diff --git a/man/mlbench.xor.Rd b/man/mlbench.xor.Rd
new file mode 100644
index 0000000..b3f57d6
--- /dev/null
+++ b/man/mlbench.xor.Rd
@@ -0,0 +1,30 @@
+\name{mlbench.xor}
+\alias{mlbench.xor}
+\title{Continuous XOR Benchmark Problem}
+\usage{
+mlbench.xor(n, d=2)
+}
+\arguments{
+\item{n}{number of patterns to create}
+\item{d}{dimension of the XOR problem}
+}
+\value{Returns an object of class \code{"mlbench.xor"} with components
+\item{x}{input values}
+\item{classes}{factor vector of length \code{n} with target classes}
+}
+\description{
+ The inputs of the XOR problem are uniformly distributed on
+ the \code{d}-dimensional cube with corners \eqn{\{\pm 1\}}{\{+-1\}}. Each pair of
+ opposite corners form one class, hence the total number of classes is
+ \eqn{2^(d-1)}
+}
+\examples{
+# 2d example
+p<-mlbench.xor(300,2)
+plot(p)
+#
+# 3d example
+p<-mlbench.xor(300,3)
+plot(p)
+}
+\keyword{datagen}
diff --git a/man/plot.mlbench.Rd b/man/plot.mlbench.Rd
new file mode 100644
index 0000000..19532c1
--- /dev/null
+++ b/man/plot.mlbench.Rd
@@ -0,0 +1,28 @@
+\name{plot.mlbench}
+\alias{plot.mlbench}
+\title{Plot mlbench objects}
+\usage{
+\S3method{plot}{mlbench}(x, xlab="", ylab="", ...)
+}
+\arguments{
+ \item{x}{Object of class \code{"mlbench"}.}
+ \item{xlab}{Label for x-axis.}
+ \item{ylab}{Label for y-axis.}
+ \item{\dots}{Further plotting options.}
+}
+\description{
+ Plots the data of an mlbench object using different colors for each
+ class. If the dimension of the input space is larger that 2, a
+ scatter plot matrix is used.
+}
+\examples{
+# 6 normal classes
+p <- mlbench.2dnormals(500,6)
+plot(p)
+
+# 4-dimensiona XOR
+p <- mlbench.xor(500,4)
+plot(p)
+}
+
+\keyword{hplot}
diff --git a/src/waveform.c b/src/waveform.c
new file mode 100644
index 0000000..d70bb70
--- /dev/null
+++ b/src/waveform.c
@@ -0,0 +1,126 @@
+/* =====================================================================
+ David Aha
+ August 1988
+ Creates waveform domain data
+ Usage: create-waveform number num-attributes
+ See CART book, page 49 for details
+ This is for the 21-attribute problem.
+
+ Requires use of the UNIXSTAT tool named "probdist".
+
+ modified by Friedrich Leisch on 2000/12/11 to use R's random number
+ generator
+ ===================================================================== */
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <R_ext/Random.h>
+
+#define NUMBER_OF_ATTRIBUTES 21
+#define NUMBER_OF_CLASSES 3
+
+int num_instances;
+double h[NUMBER_OF_CLASSES][NUMBER_OF_ATTRIBUTES];
+
+/* =====================================================================
+ Main Function
+ ===================================================================== */
+void waveform(int *R_num_instances, double *x, int *type)
+{
+ void execute(double *x, int *type);
+ void initialize();
+
+ num_instances = *R_num_instances;
+
+ GetRNGstate();
+ initialize();
+ execute(x, type);
+ PutRNGstate();
+}
+
+/* =====================================================================
+ Initializes the algorithm.
+ ==================================================================== */
+void initialize()
+{
+ int i,j;
+
+ /*==== Setup for waveform of types 1 through 3 ====*/
+ for(i=0; i<3; i++)
+ for(j=0; j<21; j++)
+ h[i][j] = 0.0;
+
+ /*==== Waveform 1 ====*/
+ for(i=1; i<=6; i++)
+ h[0][i] = (double)i;
+ j=1;
+ for(i=11; i>=7; i--)
+ { h[0][i] = (double)j;
+ j++;
+ }
+
+ /*==== Waveform 2 ====*/
+ j = 1;
+ for(i=9; i<=14; i++)
+ { h[1][i] = (double)j;
+ j++;
+ }
+ j=1;
+ for(i=19; i>=15; i--)
+ { h[1][i] = (double)j;
+ j++;
+ }
+
+ /*==== Waveform 3 ====*/
+ j = 1;
+ for(i=5; i<=10; i++)
+ { h[2][i] = (double)j;
+ j++;
+ }
+ j=1;
+ for(i=15; i>=11; i--)
+ { h[2][i] = (double)j;
+ j++;
+ }
+
+}
+
+/* =====================================================================
+ Executes the algorithm.
+ ===================================================================== */
+void execute(double *x, int *type)
+{
+ int num_instance, num_attribute;
+ int waveform_type, choice[2];
+ double random_attribute_value, multiplier[2];
+
+
+ for(num_instance=0; num_instance<num_instances; num_instance++)
+ { /*==== Set up class type ====*/
+ waveform_type = floor(3*unif_rand());
+ switch (waveform_type)
+ { case 0: choice[0] = 0; choice[1] = 1; break;
+ case 1: choice[0] = 0; choice[1] = 2; break;
+ case 2: choice[0] = 1; choice[1] = 2; break;
+ }
+
+ /*==== Set up u and (1-u) for this call ====*/
+ multiplier[0] = unif_rand();
+ multiplier[1] = 1.0 - multiplier[0];
+
+ /*==== Create the instance ====*/
+ for(num_attribute=0; num_attribute<NUMBER_OF_ATTRIBUTES;
+ num_attribute++)
+ {
+ random_attribute_value = norm_rand();
+ /*==== Calculate the value ====*/
+ x[num_instance*NUMBER_OF_ATTRIBUTES + num_attribute] =
+ (multiplier[0] * h[choice[0]][num_attribute]) +
+ (multiplier[1] * h[choice[1]][num_attribute]) +
+ random_attribute_value;
+ }
+
+ type[num_instance] = waveform_type;
+ }
+}
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-mlbench.git
More information about the debian-science-commits
mailing list