[mlpack] 243/324: More tests for Adaboost added, with tolerance for change in rt also provided.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:15 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 2d5872c6455e79731febc1b16901c59b23dabb1a
Author: saxena.udit <saxena.udit at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Tue Jul 29 18:56:10 2014 +0000
More tests for Adaboost added, with tolerance for change in rt also provided.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16922 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/adaboost/adaboost.hpp | 5 +-
src/mlpack/methods/adaboost/adaboost_impl.hpp | 13 +-
src/mlpack/methods/adaboost/adaboost_main.cpp | 5 +-
.../methods/decision_stump/decision_stump.hpp | 9 +-
.../methods/decision_stump/decision_stump_impl.hpp | 18 +-
src/mlpack/tests/adaboost_test.cpp | 99 +++++++++-
src/mlpack/tests/data/nonlinsepdata.txt | 200 +++++++++++++++++++++
src/mlpack/tests/data/nonlinsepdata_labels.txt | 200 +++++++++++++++++++++
8 files changed, 532 insertions(+), 17 deletions(-)
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
index cfca3bc..56a7b98 100644
--- a/src/mlpack/methods/adaboost/adaboost.hpp
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -25,10 +25,11 @@ public:
* @param data Input data
* @param labels Corresponding labels
* @param iterations Number of boosting rounds
+ * @param tol The tolerance for change in values of rt.
* @param other Weak Learner, which has been initialized already
*/
Adaboost(const MatType& data, const arma::Row<size_t>& labels,
- int iterations, const WeakLearner& other);
+ int iterations, double tol, const WeakLearner& other);
/**
* This function helps in building a classification Matrix which is of
@@ -59,6 +60,8 @@ public:
// To check for the bound for the hammingLoss.
double ztAccumulator;
+ // The tolerance for change in rt and when to stop.
+ double tolerance;
}; // class Adaboost
} // namespace adaboost
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
index 3d0d663..a6ed804 100644
--- a/src/mlpack/methods/adaboost/adaboost_impl.hpp
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -46,15 +46,15 @@ namespace adaboost {
*/
template<typename MatType, typename WeakLearner>
Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
- const arma::Row<size_t>& labels, int iterations,
+ const arma::Row<size_t>& labels, int iterations, double tol,
const WeakLearner& other)
{
// Counting the number of classes into numClasses.
size_t numClasses = (arma::max(labels) - arma::min(labels)) + 1;
-
+ tolerance = tol;
int i, j, k;
double rt, crt, alphat = 0.0, zt;
- double tolerance = 1e-20;
+ // double tolerance = 1e-8;
// std::cout<<"Tolerance is "<<tolerance<<"\n";
// crt is for stopping the iterations when rt
// stops changing by less than a tolerant value.
@@ -127,11 +127,8 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
if (i > 0)
{
- if ( (rt - crt) < tolerance)
- {
- // std::cout<<(rt-crt)<<"\n";
- i = iterations;
- }
+ if ( std::abs(rt - crt) < tolerance )
+ break;
}
crt = rt;
diff --git a/src/mlpack/methods/adaboost/adaboost_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
index 3cb9028..82e25b7 100644
--- a/src/mlpack/methods/adaboost/adaboost_main.cpp
+++ b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -27,6 +27,7 @@ PARAM_STRING("output", "The file in which the predicted labels for the test set"
PARAM_INT("iterations","The maximum number of boosting iterations "
"to be run", "i", 1000);
PARAM_INT_REQ("classes","The number of classes in the input label set.","c");
+PARAM_DOUBLE("tolerance","The tolerance for change in values of rt","e",1e-10);
int main(int argc, char *argv[])
{
@@ -75,6 +76,8 @@ int main(int argc, char *argv[])
mat testingData;
data::Load(testingDataFilename, testingData, true);
+ const double tolerance = CLI::GetParam<double>("tolerance");
+
if (testingData.n_rows != trainingData.n_rows)
Log::Fatal << "Test data dimensionality (" << testingData.n_rows << ") "
<< "must be the same as training data (" << trainingData.n_rows - 1
@@ -88,7 +91,7 @@ int main(int argc, char *argv[])
perceptron::Perceptron<> p(trainingData, labels.t(), iter);
Timer::Start("Training");
- Adaboost<> a(trainingData, labels.t(), iterations, p);
+ Adaboost<> a(trainingData, labels.t(), iterations, tolerance, p);
Timer::Stop("Training");
return 0;
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index 3c0adcb..5255670 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -110,7 +110,8 @@ class DecisionStump
* candidate for the splitting attribute.
*/
double SetupSplitAttribute(const arma::rowvec& attribute,
- const arma::Row<size_t>& labels);
+ const arma::Row<size_t>& labels,
+ const arma::rowvec& D);
/**
* After having decided the attribute on which to split, train on that
@@ -151,6 +152,12 @@ class DecisionStump
*/
template <typename AttType, typename LabelType>
double CalculateEntropy(arma::subview_row<LabelType> labels);
+
+ /**
+ *
+ *
+ */
+ void Train(const MatType& data, const arma::Row<size_t>& labels, const arma::rowvec& D);
};
}; // namespace decision_stump
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index 80d961c..089415f 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -34,6 +34,15 @@ DecisionStump<MatType>::DecisionStump(const MatType& data,
numClass = classes;
bucketSize = inpBucketSize;
+ arma::rowvec D(data.n_cols);
+ D.fill(1.0);
+
+ Train(data, labels, D);
+}
+
+template<typename MatType>
+void DecisionStump<MatType>::Train(const MatType& data, const arma::Row<size_t>& labels, const arma::rowvec& D)
+{
// If classLabels are not all identical, proceed with training.
int bestAtt = 0;
double entropy;
@@ -48,7 +57,7 @@ DecisionStump<MatType>::DecisionStump(const MatType& data,
{
// For each attribute with non-identical values, treat it as a potential
// splitting attribute and calculate entropy if split on it.
- entropy = SetupSplitAttribute(data.row(i), labels);
+ entropy = SetupSplitAttribute(data.row(i), labels, D);
// Log::Debug << "Entropy for attribute " << i << " is " << entropy << ".\n";
gain = rootEntropy - entropy;
@@ -145,7 +154,8 @@ DecisionStump<MatType>::ModifyData(MatType& data, const arma::Row<double>& D)
template <typename MatType>
double DecisionStump<MatType>::SetupSplitAttribute(
const arma::rowvec& attribute,
- const arma::Row<size_t>& labels)
+ const arma::Row<size_t>& labels,
+ const arma::rowvec& D)
{
int i, count, begin, end;
double entropy = 0.0;
@@ -160,8 +170,12 @@ double DecisionStump<MatType>::SetupSplitAttribute(
arma::Row<size_t> sortedLabels(attribute.n_elem);
sortedLabels.fill(0);
+ arma::rowvec dTemp(D.n_cols);
for (i = 0; i < attribute.n_elem; i++)
+ {
sortedLabels(i) = labels(sortedIndexAtt(i));
+ dTemp(i) = D(sortedIndexAtt(i));
+ }
i = 0;
count = 0;
diff --git a/src/mlpack/tests/adaboost_test.cpp b/src/mlpack/tests/adaboost_test.cpp
index d613e21..703889f 100644
--- a/src/mlpack/tests/adaboost_test.cpp
+++ b/src/mlpack/tests/adaboost_test.cpp
@@ -44,7 +44,8 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundIris)
// Define parameters for the adaboost
int iterations = 100;
- Adaboost<> a(inputData, labels.row(0), iterations, p);
+ double tolerance = 1e-10;
+ Adaboost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
if(labels(i) != a.finalHypothesis(i))
@@ -90,7 +91,8 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorIris)
// Define parameters for the adaboost
int iterations = 100;
- Adaboost<> a(inputData, labels.row(0), iterations, p);
+ double tolerance = 1e-10;
+ Adaboost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
if(labels(i) != a.finalHypothesis(i))
@@ -128,7 +130,8 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn)
// Define parameters for the adaboost
int iterations = 50;
- Adaboost<> a(inputData, labels.row(0), iterations, p);
+ double tolerance = 1e-10;
+ Adaboost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
if(labels(i) != a.finalHypothesis(i))
@@ -175,7 +178,95 @@ BOOST_AUTO_TEST_CASE(WeakLearnerErrorVertebralColumn)
// Define parameters for the adaboost
int iterations = 50;
- Adaboost<> a(inputData, labels.row(0), iterations, p);
+ double tolerance = 1e-10;
+ Adaboost<> a(inputData, labels.row(0), iterations, tolerance, p);
+ int countError = 0;
+ for (size_t i = 0; i < labels.n_cols; i++)
+ if(labels(i) != a.finalHypothesis(i))
+ countError++;
+ double error = (double) countError / labels.n_cols;
+
+ BOOST_REQUIRE(error <= weakLearnerErrorRate);
+}
+
+/**
+ * This test case runs the Adaboost.mh algorithm on non-linearly
+ * separable dataset.
+ * It checks whether the hamming loss breaches the upperbound, which
+ * is provided by ztAccumulator.
+ */
+BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData)
+{
+ arma::mat inputData;
+
+ if (!data::Load("nonlinsepdata.txt", inputData))
+ BOOST_FAIL("Cannot load test dataset nonlinsepdata.txt!");
+
+ arma::Mat<size_t> labels;
+
+ if (!data::Load("nonlinsepdata_labels.txt",labels))
+ BOOST_FAIL("Cannot load labels for nonlinsepdata_labels.txt");
+
+ // no need to map the labels here
+
+ // Define your own weak learner, perceptron in this case.
+ // Run the perceptron for perceptron_iter iterations.
+ int perceptron_iter = 800;
+
+ perceptron::Perceptron<> p(inputData, labels.row(0), perceptron_iter);
+
+ // Define parameters for the adaboost
+ int iterations = 50;
+ double tolerance = 1e-10;
+ Adaboost<> a(inputData, labels.row(0), iterations, tolerance, p);
+ int countError = 0;
+ for (size_t i = 0; i < labels.n_cols; i++)
+ if(labels(i) != a.finalHypothesis(i))
+ countError++;
+ double hammingLoss = (double) countError / labels.n_cols;
+
+ BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+}
+
+/**
+ * This test case runs the Adaboost.mh algorithm on a non-linearly
+ * separable dataset.
+ * It checks if the error returned by running a single instance of the
+ * weak learner is worse than running the boosted weak learner using
+ * adaboost.
+ */
+BOOST_AUTO_TEST_CASE(WeakLearnerErrorNonLinearSepData)
+{
+ arma::mat inputData;
+
+ if (!data::Load("nonlinsepdata.txt", inputData))
+ BOOST_FAIL("Cannot load test dataset nonlinsepdata.txt!");
+
+ arma::Mat<size_t> labels;
+
+ if (!data::Load("nonlinsepdata_labels.txt",labels))
+ BOOST_FAIL("Cannot load labels for nonlinsepdata_labels.txt");
+
+ // no need to map the labels here
+
+ // Define your own weak learner, perceptron in this case.
+ // Run the perceptron for perceptron_iter iterations.
+ int perceptron_iter = 800;
+
+ arma::Row<size_t> perceptronPrediction(labels.n_cols);
+ perceptron::Perceptron<> p(inputData, labels.row(0), perceptron_iter);
+ p.Classify(inputData, perceptronPrediction);
+
+ int countWeakLearnerError = 0;
+ for (size_t i = 0; i < labels.n_cols; i++)
+ if(labels(i) != perceptronPrediction(i))
+ countWeakLearnerError++;
+ double weakLearnerErrorRate = (double) countWeakLearnerError / labels.n_cols;
+
+ // Define parameters for the adaboost
+ int iterations = 50;
+ double tolerance = 1e-10;
+ Adaboost<> a(inputData, labels.row(0), iterations, tolerance, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
if(labels(i) != a.finalHypothesis(i))
diff --git a/src/mlpack/tests/data/nonlinsepdata.txt b/src/mlpack/tests/data/nonlinsepdata.txt
new file mode 100644
index 0000000..aae2e37
--- /dev/null
+++ b/src/mlpack/tests/data/nonlinsepdata.txt
@@ -0,0 +1,200 @@
+-0.299105532 0.572326729
+-0.836483249 -0.14359759
+0.008063874 -0.007024867
+-0.343167143 0.42961481
+0.32154837 -0.208236731
+-0.217072934 -0.212645094
+0.429448087 0.042831669
+0.531008929 -0.252171061
+-0.228575587 -0.586958836
+0.733937526 -0.012346747
+0.303535501 0.571892222
+0.539039196 0.628216718
+0.412215252 -0.185824745
+-0.307331594 -0.885248399
+0.151464995 0.066677945
+0.02810081 -0.818338472
+0.015731441 -0.645799755
+0.661724665 -0.347577756
+-0.089698701 -0.363787138
+0.316161623 0.313278339
+0.88787143 0.111484946
+0.970688757 0.161322385
+0.55101173 0.099046711
+0.019806601 0.831823487
+0.596855325 -0.721216246
+-0.917824177 -0.228485105
+-0.27570897 -0.095019869
+0.108012122 -0.1654937
+0.292911812 0.289884615
+-0.170870048 0.921382619
+-0.269166632 -0.160922833
+0.637122848 -0.123215673
+-0.907638043 -0.366173065
+0.725175629 0.08535568
+0.260180377 -0.790052711
+0.059974592 0.558496803
+0.290949275 -0.775789564
+-0.696033218 0.591746086
+0.088498834 -0.102255727
+-0.015662941 -0.865967082
+-0.932972607 -0.344126727
+0.566055791 0.654023314
+0.492482144 0.194692911
+0.64246928 0.613606187
+0.308782039 0.088767443
+0.522317298 0.41842343
+0.144916266 0.942122486
+-0.124875724 -0.52727621
+-0.499126147 -0.196592035
+0.359473181 0.028418378
+0.125175757 0.060811083
+0.806560524 -0.16821312
+-0.418556149 -0.19088967
+0.149065896 0.493280871
+-0.010245202 0.701047926
+-0.008959135 -0.230829545
+0.254738853 0.713600534
+-0.334082867 -0.187395872
+0.754083034 -0.143750879
+0.478110324 -0.686417423
+0.020391919 0.319246924
+-0.642978854 0.025182941
+-0.607613175 -0.570268698
+0.306423566 -0.565936275
+0.123538578 0.186257023
+0.72706875 0.085249277
+0.737146843 0.347000254
+-0.714918394 0.061488226
+-0.762951097 -0.602080044
+-0.695314036 -0.321081659
+-0.390384895 0.866603455
+0.369827758 -0.026497804
+-0.327322825 -0.17926549
+-0.655559774 0.614518243
+-0.598580033 -0.198440877
+-0.011918728 -0.406462964
+-0.468026012 -0.841700798
+-0.055200503 0.587127554
+0.792314883 0.350783736
+-0.583600401 0.62081282
+-0.64458229 -0.68397351
+0.195561235 0.765132675
+0.015002733 -0.866439468
+0.539805874 0.239808175
+-0.033649068 0.518455043
+0.319246005 -0.891048936
+0.227677062 0.61339575
+-0.628677954 -0.754721192
+-0.804753322 -0.12575209
+0.702105593 -0.358808384
+-0.028267632 0.337560238
+0.891922873 0.392380999
+-0.06284684 -0.667804169
+0.105634707 -0.753147345
+0.534655451 -0.349686075
+0.443053473 -0.201818235
+0.946813295 0.083211205
+0.074300943 0.75376313
+-0.039789138 -0.035876894
+0.745621743 -0.247372451
+0.743619596 -1.81591802
+1.155134773 -1.155691663
+-0.730475116 -1.792547114
+-1.549717954 -0.723237783
+0.781720033 1.24857396
+-1.533967653 0.323906224
+-0.944213794 1.386142763
+0.254188113 1.607492509
+-1.624602712 -1.067337954
+-0.739085468 1.791634164
+1.278347298 -0.741926562
+1.662805028 -1.060597485
+-0.055156833 1.232406071
+0.900610262 0.753584576
+0.41579285 1.11011431
+-1.586495923 0.004145099
+-0.138408011 -1.022456668
+0.509090382 1.893340333
+0.02305279 1.865694236
+0.609535781 -0.820684516
+-1.067528965 0.623675136
+-1.049854852 -0.945513359
+0.120914993 1.174040076
+1.484419861 1.106783517
+-1.4191842 -0.141866709
+-1.934438955 0.279764857
+0.951389268 1.356798069
+-1.375209835 -1.321095706
+-1.808529076 -0.513250346
+-1.092175944 -1.588498871
+0.948952249 1.216195061
+0.015594362 -1.624437522
+0.321967496 1.814427652
+-1.352713814 -0.16529335
+0.345744458 -1.533369276
+1.656086814 0.233896107
+-0.460792886 0.913383678
+1.163585001 -1.194367686
+0.850341757 -1.256163979
+-1.796190808 -0.442470528
+0.932987115 -1.036952001
+1.82338821 -0.594913071
+0.763599686 -1.466453943
+0.758080019 -0.988607753
+1.492253497 1.066286642
+-0.978672999 -1.370508188
+-0.310337647 -1.032191049
+-1.157032863 -0.960133321
+0.534533646 -1.65875866
+1.892449042 0.417662491
+0.126897732 -0.992523572
+-0.227095688 -1.969442814
+-0.0888739 1.456654514
+-0.087928817 1.97828692
+-1.13053945 -1.239366312
+-1.052421933 1.542383058
+0.476271294 -1.580284838
+-1.602001068 0.563756307
+0.278412374 -1.333872331
+1.658498678 -0.099409044
+-0.55976856 1.713687384
+1.177482802 1.479349776
+1.470575571 -1.050073357
+-1.699394959 -0.454123902
+1.871811971 -0.299596785
+-0.917079278 -1.061606607
+1.73434779 -0.066363522
+-1.470010888 -0.437823934
+-1.444880111 -0.24213575
+0.985071168 1.123418374
+-0.915193748 1.169127518
+1.378278589 0.22912872
+0.903525291 1.431572615
+-0.0414839 -1.512951056
+1.944240347 0.375946415
+-1.164397187 1.295214988
+0.819463581 -0.961347546
+-0.532169769 -1.413116543
+-0.516650608 -1.327821851
+0.692724746 1.706765607
+-1.78217547 0.681062009
+-1.678951498 -0.944800369
+1.725046866 -0.111232858
+0.384280254 1.289555533
+0.018881382 1.325422045
+-0.144267356 1.88251979
+0.001741105 -1.607215265
+-1.778635709 0.492887681
+1.544133898 0.577162072
+-1.786134271 -0.095665791
+0.378568063 1.553615494
+0.547230462 -1.735854416
+-0.482079187 1.572950613
+1.525034275 0.267207967
+-1.185673846 -0.788824603
+0.610287572 -0.912237428
+-1.071198843 0.993129184
+-0.000990903 -1.113048629
+-1.982204157 -0.138989282
+-1.201996822 -0.316294472
diff --git a/src/mlpack/tests/data/nonlinsepdata_labels.txt b/src/mlpack/tests/data/nonlinsepdata_labels.txt
new file mode 100644
index 0000000..cbfde7b
--- /dev/null
+++ b/src/mlpack/tests/data/nonlinsepdata_labels.txt
@@ -0,0 +1,200 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list