[mlpack] 211/324: Adaboost test improved and now works. Improved adaboost.

Sun Aug 17 08:22:11 UTC 2014

This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit 317f2201da78dd649d6728754f1ab28a41e20a39
Author: saxena.udit <saxena.udit at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Thu Jul 24 20:34:38 2014 +0000

    Adaboost test improved and now works. Improved adaboost.
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16853 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 src/mlpack/methods/adaboost/adaboost.hpp      | 37 ++++++++++++++++--
 src/mlpack/methods/adaboost/adaboost_impl.hpp | 55 ++++++++-------------------
 src/mlpack/methods/adaboost/adaboost_main.cpp | 15 ++------
 src/mlpack/tests/adaboost_test.cpp            | 21 +++++-----
 4 files changed, 66 insertions(+), 62 deletions(-)

diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
index 267a6a0..cfca3bc 100644
--- a/src/mlpack/methods/adaboost/adaboost.hpp
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -19,15 +19,46 @@ template <typename MatType = arma::mat, typename WeakLearner =
 class Adaboost 
 {
 public:
-  arma::Row<size_t> finalHypothesis;
-
+  /**
+   *  Constructor. Currently runs the Adaboost.mh algorithm
+   *  
+   *  @param data Input data
+   *  @param labels Corresponding labels
+   *  @param iterations Number of boosting rounds 
+   *  @param other Weak Learner, which has been initialized already
+   */
   Adaboost(const MatType& data, const arma::Row<size_t>& labels,
-           int iterations, size_t classes, const WeakLearner& other);
+           int iterations, const WeakLearner& other);
 
+  /**
+   *  This function helps in building a classification Matrix which is of 
+   *  form: 
+   *  -1 if l is not the correct label
+   *  1 if l is the correct label
+   *
+   *  @param t The classification matrix to be built
+   *  @param l The labels from which the classification matrix is to be built.
+   */
   void buildClassificationMatrix(arma::mat& t, const arma::Row<size_t>& l);
 
+  /**
+   *  This function helps in building the Weight Distribution matrix
+   *  which is updated during every iteration. It calculates the 
+   *  "difficulty" in classifying a point by adding the weights for all 
+   *  instances, using D.
+   *  
+   *  @param D The 2 Dimensional weight matrix from which the weights are
+   *            to be calculated.
+   *  @param weights The output weight vector.
+   */
   void buildWeightMatrix(const arma::mat& D, arma::rowvec& weights);
 
+  // Stores the final classification of the Labels.
+  arma::Row<size_t> finalHypothesis;
+
+  // To check for the bound for the hammingLoss.
+  double ztAccumulator;
+
 }; // class Adaboost
 
 } // namespace adaboost
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
index 9f225f9..b39d229 100644
--- a/src/mlpack/methods/adaboost/adaboost_impl.hpp
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -42,19 +42,21 @@ namespace adaboost {
  *  @param data Input data
  *  @param labels Corresponding labels
  *  @param iterations Number of boosting rounds 
- *  @param classes Number of classes in labels
  *  @param other Weak Learner, which has been initialized already
  */
 template<typename MatType, typename WeakLearner>
 Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data, 
         const arma::Row<size_t>& labels, int iterations, 
-        size_t classes, const WeakLearner& other)
+        const WeakLearner& other)
 {
-  // note: put a fail safe for the variable 'classes' or 
-  // remove it entirely by using unique function.
+  // Counting the number of classes into numClasses.
+  size_t numClasses = (arma::max(labels) - arma::min(labels)) + 1;
+
   int i, j, k;
   double rt, alphat = 0.0, zt;
   
+  ztAccumulator = 1.0; 
+  
   // To be used for prediction by the Weak Learner for prediction.
   arma::Row<size_t> predictedLabels(labels.n_cols);
   
@@ -62,7 +64,7 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
   MatType tempData(data);
   
   // Build the classification Matrix yt from labels
-  arma::mat yt(predictedLabels.n_cols, classes);
+  arma::mat yt(predictedLabels.n_cols, numClasses);
   
   // Build a classification matrix of the form D(i,l)
   // where i is the ith instance
@@ -71,37 +73,32 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
   
   // ht(x), to be loaded after a round of prediction every time the weak
   // learner is run, by using the buildClassificationMatrix function
-  arma::mat ht(predictedLabels.n_cols, classes);
+  arma::mat ht(predictedLabels.n_cols, numClasses);
 
   // This matrix is a helper matrix used to calculate the final hypothesis.
-  arma::mat sumFinalH(predictedLabels.n_cols, classes);
+  arma::mat sumFinalH(predictedLabels.n_cols, numClasses);
   sumFinalH.fill(0.0);
   
   // load the initial weights into a 2-D matrix
-  const double initWeight = (double) 1 / (data.n_cols * classes);
-  arma::mat D(data.n_cols, classes);
+  const double initWeight = (double) 1 / (data.n_cols * numClasses);
+  arma::mat D(data.n_cols, numClasses);
   D.fill(initWeight);
-  // D.print("The value of D after initialization.");
   
   // Weights are to be compressed into this rowvector
   // for focussing on the perceptron weights.
   arma::rowvec weights(predictedLabels.n_cols);
-  // weights.print("This is the value of weight just after initialization.");
+  
   // This is the final hypothesis.
   arma::Row<size_t> finalH(predictedLabels.n_cols);
 
-  
-  // int localErrorCount;
   // now start the boosting rounds
   for (i = 0; i < iterations; i++)
   {
-    
     // Initialized to zero in every round.
     rt = 0.0; 
     zt = 0.0;
     
     // Build the weight vectors
-
     buildWeightMatrix(D, weights);
     
     // call the other weak learner and train the labels.
@@ -111,15 +108,6 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
     //Now from predictedLabels, build ht, the weak hypothesis
     buildClassificationMatrix(ht, predictedLabels);
     
-/*    localErrorCount = 0;
-    for (int m = 0; m < labels.n_cols; m++)
-      if (labels(m) != predictedLabels(m))
-      {
-        localErrorCount++;
-        // std::cout<<m<<"th error.\n";
-      }
-    std::cout<<"Local Error is: "<<localErrorCount<<"\n";
-    std::cout<<"Local Error Rate: "<<(double)localErrorCount/predictedLabels.n_cols<<"\n";*/
     // Now, start calculation of alpha(t) using ht
     
     // begin calculation of rt
@@ -140,7 +128,6 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
     {
       for (k = 0;k < D.n_cols; k++)
       {  
-        
         // we calculate zt, the normalization constant
         zt += D(j,k) * exp(-1 * alphat * yt(j,k) * ht(j,k));
         D(j,k) = D(j,k) * exp(-1 * alphat * yt(j,k) * ht(j,k));
@@ -149,15 +136,17 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
         sumFinalH(j,k) += (alphat * ht(j,k));
       }
     }
+    
     // normalization of D
-
     D = D / zt;
+    
+    // Accumulating the value of zt for the Hamming Loss bound.
+    ztAccumulator *= zt;
   }
 
   // Iterations are over, now build a strong hypothesis
   // from a weighted combination of these weak hypotheses.
   
-  // This step of storing it in a temporary row vector can be improved upon ? 
   arma::rowvec tempSumFinalH;
   arma::uword max_index;
   for (i = 0;i < sumFinalH.n_rows; i++)
@@ -167,18 +156,6 @@ Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data,
     finalH(i) = max_index;
   }
   finalHypothesis = finalH;
-  // labels.print("These are the labels.");
-  // finalH.print("This is the final hypothesis.");
-  /*int counterror = 0;
-  for (i = 0; i < labels.n_cols; i++)
-    if(labels(i) != finalH(i))
-    { 
-      std::cout<<i<<"th prediction not correct!\n";
-      counterror++;
-    }
-  std::cout<<"\nFinally - There are "<<counterror<<" number of misclassified records.\n";  
-  std::cout<<"The error rate is: "<<(double)counterror/labels.n_cols;*/
-  //finalH is the final hypothesis.
 }
 
 /**
diff --git a/src/mlpack/methods/adaboost/adaboost_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
index fbd256d..3cb9028 100644
--- a/src/mlpack/methods/adaboost/adaboost_main.cpp
+++ b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -2,7 +2,7 @@
  * @file: adaboost_main.cpp
  * @author: Udit Saxena
  *
- *
+ * 
  */
 
 #include <mlpack/core.hpp>
@@ -81,22 +81,15 @@ int main(int argc, char *argv[])
         << ")!" << std::endl;
   int iterations = CLI::GetParam<int>("iterations");
   
-  int classes = 3;
-  
   // define your own weak learner, perceptron in this case.
-  int iter = 4000;
+  // defining the number of iterations of the perceptron.
+  int iter = 400;
   
   perceptron::Perceptron<> p(trainingData, labels.t(), iter);
   
   Timer::Start("Training");
-  Adaboost<> a(trainingData, labels.t(), iterations, classes, p);
+  Adaboost<> a(trainingData, labels.t(), iterations, p);
   Timer::Stop("Training");
 
-  // vec results;
-  // data::RevertLabels(predictedLabels, mappings, results);
-
-  // const string outputFilename = CLI::GetParam<string>("output");
-  // data::Save(outputFilename, results, true, true);
-
   return 0;
 }
\ No newline at end of file
diff --git a/src/mlpack/tests/adaboost_test.cpp b/src/mlpack/tests/adaboost_test.cpp
index c173937..880c678 100644
--- a/src/mlpack/tests/adaboost_test.cpp
+++ b/src/mlpack/tests/adaboost_test.cpp
@@ -17,7 +17,12 @@ using namespace mlpack::adaboost;
 
 BOOST_AUTO_TEST_SUITE(AdaboostTest);
 
-BOOST_AUTO_TEST_CASE(IrisSet)
+/**
+ *  This test case runs the Adaboost.mh algorithm on the UCI Iris dataset.
+ *  It checks whether the hamming loss breaches the upperbound, which
+ *  is provided by ztAccumulator.
+ */
+BOOST_AUTO_TEST_CASE(HammingLossBound)
 {
   arma::mat inputData;
 
@@ -38,17 +43,15 @@ BOOST_AUTO_TEST_CASE(IrisSet)
   perceptron::Perceptron<> p(inputData, labels.row(0), perceptron_iter);
 
   // Define parameters for the adaboost
-  int iterations = 15;
-  int classes = 3;
-  Adaboost<> a(inputData, labels.row(0), iterations, classes, p);
+  int iterations = 100;
+  Adaboost<> a(inputData, labels.row(0), iterations, p);
   int countError = 0;
   for (size_t i = 0; i < labels.n_cols; i++)
     if(labels(i) != a.finalHypothesis(i))
-    { 
-      std::cout<<i<<" prediction not correct!\n";
       countError++;
-    }
-  std::cout<<"\nFinally - There are "<<countError<<" number of misclassified records.\n";  
-  std::cout<<"The error rate is: "<<(double)countError * 100/labels.n_cols<<"%\n";
+  double hammingLoss = (double) countError / labels.n_cols;
+
+  BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
 }
+
 BOOST_AUTO_TEST_SUITE_END();
\ No newline at end of file

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git