[mlpack] 211/324: Adaboost test improved and now works. Improved adaboost.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:11 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 317f2201da78dd649d6728754f1ab28a41e20a39
Author: saxena.udit <saxena.udit at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Thu Jul 24 20:34:38 2014 +0000
Adaboost test improved and now works. Improved adaboost.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16853 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/adaboost/adaboost.hpp | 37 ++++++++++++++++--
src/mlpack/methods/adaboost/adaboost_impl.hpp | 55 ++++++++-------------------
src/mlpack/methods/adaboost/adaboost_main.cpp | 15 ++------
src/mlpack/tests/adaboost_test.cpp | 21 +++++-----
4 files changed, 66 insertions(+), 62 deletions(-)
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
index 267a6a0..cfca3bc 100644
--- a/src/mlpack/methods/adaboost/adaboost.hpp
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -19,15 +19,46 @@ template <typename MatType = arma::mat, typename WeakLearner =
class Adaboost
{
public:
- arma::Row<size_t> finalHypothesis;
-
+ /**
+ * Constructor. Currently runs the Adaboost.mh algorithm
+ *
+ * @param data Input data
+ * @param labels Corresponding labels
+ * @param iterations Number of boosting rounds
+ * @param other Weak Learner, which has been initialized already
+ */
Adaboost(const MatType& data, const arma::Row<size_t>& labels,
- int iterations, size_t classes, const WeakLearner& other);
+ int iterations, const WeakLearner& other);
+ /**
+ * This function helps in building a classification Matrix which is of
+ * form:
+ * -1 if l is not the correct label
+ * 1 if l is the correct label
+ *
+ * @param t The classification matrix to be built
+ * @param l The labels from which the classification matrix is to be built.
+ */
void buildClassificationMatrix(arma::mat& t, const arma::Row<size_t>& l);
+ /**
+ * This function helps in building the Weight Distribution matrix
+ * which is updated during every iteration. It calculates the
+ * "difficulty" in classifying a point by adding the weights for all
+ * instances, using D.
+ *
+ * @param D The 2 Dimensional weight matrix from which the weights are
+ * to be calculated.
+ * @param weights The output weight vector.
+ */
void buildWeightMatrix(const arma::mat& D, arma::rowvec& weights);
+ // Stores the final classification of the Labels.
+ arma::Row<size_t> finalHypothesis;
+
+ // To check for the bound for the hammingLoss.
+ double ztAccumulator;
+
}; // class Adaboost
} // namespace adaboost
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
index 9f225f9..b39d229 100644
--- a/src/mlpack/methods/adaboost/adaboost_impl.hpp
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -42,19 +42,21 @@ namespace adaboost {
* @param data Input data
* @param labels Corresponding labels
* @param iterations Number of boosting rounds
- * @param classes Number of classes in labels
* @param other Weak Learner, which has been initialized already
*/
template<typename MatType, typename WeakLearner>
Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
const arma::Row<size_t>& labels, int iterations,
- size_t classes, const WeakLearner& other)
+ const WeakLearner& other)
{
- // note: put a fail safe for the variable 'classes' or
- // remove it entirely by using unique function.
+ // Counting the number of classes into numClasses.
+ size_t numClasses = (arma::max(labels) - arma::min(labels)) + 1;
+
int i, j, k;
double rt, alphat = 0.0, zt;
+ ztAccumulator = 1.0;
+
// To be used for prediction by the Weak Learner for prediction.
arma::Row<size_t> predictedLabels(labels.n_cols);
@@ -62,7 +64,7 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
MatType tempData(data);
// Build the classification Matrix yt from labels
- arma::mat yt(predictedLabels.n_cols, classes);
+ arma::mat yt(predictedLabels.n_cols, numClasses);
// Build a classification matrix of the form D(i,l)
// where i is the ith instance
@@ -71,37 +73,32 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
// ht(x), to be loaded after a round of prediction every time the weak
// learner is run, by using the buildClassificationMatrix function
- arma::mat ht(predictedLabels.n_cols, classes);
+ arma::mat ht(predictedLabels.n_cols, numClasses);
// This matrix is a helper matrix used to calculate the final hypothesis.
- arma::mat sumFinalH(predictedLabels.n_cols, classes);
+ arma::mat sumFinalH(predictedLabels.n_cols, numClasses);
sumFinalH.fill(0.0);
// load the initial weights into a 2-D matrix
- const double initWeight = (double) 1 / (data.n_cols * classes);
- arma::mat D(data.n_cols, classes);
+ const double initWeight = (double) 1 / (data.n_cols * numClasses);
+ arma::mat D(data.n_cols, numClasses);
D.fill(initWeight);
- // D.print("The value of D after initialization.");
// Weights are to be compressed into this rowvector
// for focussing on the perceptron weights.
arma::rowvec weights(predictedLabels.n_cols);
- // weights.print("This is the value of weight just after initialization.");
+
// This is the final hypothesis.
arma::Row<size_t> finalH(predictedLabels.n_cols);
-
- // int localErrorCount;
// now start the boosting rounds
for (i = 0; i < iterations; i++)
{
-
// Initialized to zero in every round.
rt = 0.0;
zt = 0.0;
// Build the weight vectors
-
buildWeightMatrix(D, weights);
// call the other weak learner and train the labels.
@@ -111,15 +108,6 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
//Now from predictedLabels, build ht, the weak hypothesis
buildClassificationMatrix(ht, predictedLabels);
-/* localErrorCount = 0;
- for (int m = 0; m < labels.n_cols; m++)
- if (labels(m) != predictedLabels(m))
- {
- localErrorCount++;
- // std::cout<<m<<"th error.\n";
- }
- std::cout<<"Local Error is: "<<localErrorCount<<"\n";
- std::cout<<"Local Error Rate: "<<(double)localErrorCount/predictedLabels.n_cols<<"\n";*/
// Now, start calculation of alpha(t) using ht
// begin calculation of rt
@@ -140,7 +128,6 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
{
for (k = 0;k < D.n_cols; k++)
{
-
// we calculate zt, the normalization constant
zt += D(j,k) * exp(-1 * alphat * yt(j,k) * ht(j,k));
D(j,k) = D(j,k) * exp(-1 * alphat * yt(j,k) * ht(j,k));
@@ -149,15 +136,17 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
sumFinalH(j,k) += (alphat * ht(j,k));
}
}
+
// normalization of D
-
D = D / zt;
+
+ // Accumulating the value of zt for the Hamming Loss bound.
+ ztAccumulator *= zt;
}
// Iterations are over, now build a strong hypothesis
// from a weighted combination of these weak hypotheses.
- // This step of storing it in a temporary row vector can be improved upon ?
arma::rowvec tempSumFinalH;
arma::uword max_index;
for (i = 0;i < sumFinalH.n_rows; i++)
@@ -167,18 +156,6 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
finalH(i) = max_index;
}
finalHypothesis = finalH;
- // labels.print("These are the labels.");
- // finalH.print("This is the final hypothesis.");
- /*int counterror = 0;
- for (i = 0; i < labels.n_cols; i++)
- if(labels(i) != finalH(i))
- {
- std::cout<<i<<"th prediction not correct!\n";
- counterror++;
- }
- std::cout<<"\nFinally - There are "<<counterror<<" number of misclassified records.\n";
- std::cout<<"The error rate is: "<<(double)counterror/labels.n_cols;*/
- //finalH is the final hypothesis.
}
/**
diff --git a/src/mlpack/methods/adaboost/adaboost_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
index fbd256d..3cb9028 100644
--- a/src/mlpack/methods/adaboost/adaboost_main.cpp
+++ b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -2,7 +2,7 @@
* @file: adaboost_main.cpp
* @author: Udit Saxena
*
- *
+ *
*/
#include <mlpack/core.hpp>
@@ -81,22 +81,15 @@ int main(int argc, char *argv[])
<< ")!" << std::endl;
int iterations = CLI::GetParam<int>("iterations");
- int classes = 3;
-
// define your own weak learner, perceptron in this case.
- int iter = 4000;
+ // defining the number of iterations of the perceptron.
+ int iter = 400;
perceptron::Perceptron<> p(trainingData, labels.t(), iter);
Timer::Start("Training");
- Adaboost<> a(trainingData, labels.t(), iterations, classes, p);
+ Adaboost<> a(trainingData, labels.t(), iterations, p);
Timer::Stop("Training");
- // vec results;
- // data::RevertLabels(predictedLabels, mappings, results);
-
- // const string outputFilename = CLI::GetParam<string>("output");
- // data::Save(outputFilename, results, true, true);
-
return 0;
}
\ No newline at end of file
diff --git a/src/mlpack/tests/adaboost_test.cpp b/src/mlpack/tests/adaboost_test.cpp
index c173937..880c678 100644
--- a/src/mlpack/tests/adaboost_test.cpp
+++ b/src/mlpack/tests/adaboost_test.cpp
@@ -17,7 +17,12 @@ using namespace mlpack::adaboost;
BOOST_AUTO_TEST_SUITE(AdaboostTest);
-BOOST_AUTO_TEST_CASE(IrisSet)
+/**
+ * This test case runs the Adaboost.mh algorithm on the UCI Iris dataset.
+ * It checks whether the hamming loss breaches the upperbound, which
+ * is provided by ztAccumulator.
+ */
+BOOST_AUTO_TEST_CASE(HammingLossBound)
{
arma::mat inputData;
@@ -38,17 +43,15 @@ BOOST_AUTO_TEST_CASE(IrisSet)
perceptron::Perceptron<> p(inputData, labels.row(0), perceptron_iter);
// Define parameters for the adaboost
- int iterations = 15;
- int classes = 3;
- Adaboost<> a(inputData, labels.row(0), iterations, classes, p);
+ int iterations = 100;
+ Adaboost<> a(inputData, labels.row(0), iterations, p);
int countError = 0;
for (size_t i = 0; i < labels.n_cols; i++)
if(labels(i) != a.finalHypothesis(i))
- {
- std::cout<<i<<" prediction not correct!\n";
countError++;
- }
- std::cout<<"\nFinally - There are "<<countError<<" number of misclassified records.\n";
- std::cout<<"The error rate is: "<<(double)countError * 100/labels.n_cols<<"%\n";
+ double hammingLoss = (double) countError / labels.n_cols;
+
+ BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
}
+
BOOST_AUTO_TEST_SUITE_END();
\ No newline at end of file
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list