[mlpack] 305/324: Refactoring and optimizations on Adaboost.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:21 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit c56d84a9ad08a8f59d3e45deb40c7c566c4a134e
Author: saxena.udit <saxena.udit at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Sat Aug 9 17:36:40 2014 +0000
Refactoring and optimizations on Adaboost.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16990 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/adaboost/adaboost.hpp | 28 ++---
src/mlpack/methods/adaboost/adaboost_impl.hpp | 125 +++++++++++----------
src/mlpack/methods/adaboost/adaboost_main.cpp | 40 ++++++-
.../methods/decision_stump/decision_stump_impl.hpp | 3 -
4 files changed, 117 insertions(+), 79 deletions(-)
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
index 5d26e27..b013355 100644
--- a/src/mlpack/methods/adaboost/adaboost.hpp
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -50,17 +50,16 @@ class Adaboost
const double tol,
const WeakLearner& other);
- /**
- * This function helps in building a classification Matrix which is of
- * form:
- * -1 if l is not the correct label
- * 1 if l is the correct label
- *
- * @param t The classification matrix to be built
- * @param l The labels from which the classification matrix is to be built.
- */
- void BuildClassificationMatrix(arma::mat& t, const arma::Row<size_t>& l);
+ // Stores the final classification of the Labels.
+ arma::Row<size_t> finalHypothesis;
+ // To check for the bound for the hammingLoss.
+ double ztAccumulator;
+
+ // The tolerance for change in rt and when to stop.
+ double tolerance;
+
+private:
/**
* This function helps in building the Weight Distribution matrix
* which is updated during every iteration. It calculates the
@@ -73,14 +72,7 @@ class Adaboost
*/
void BuildWeightMatrix(const arma::mat& D, arma::rowvec& weights);
- // Stores the final classification of the Labels.
- arma::Row<size_t> finalHypothesis;
-
- // To check for the bound for the hammingLoss.
- double ztAccumulator;
-
- // The tolerance for change in rt and when to stop.
- double tolerance;
+
}; // class Adaboost
} // namespace adaboost
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
index d420880..70cc9a7 100644
--- a/src/mlpack/methods/adaboost/adaboost_impl.hpp
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -45,7 +45,7 @@ Adaboost<MatType, WeakLearner>::Adaboost(
// Count the number of classes.
const size_t numClasses = (arma::max(labels) - arma::min(labels)) + 1;
tolerance = tol;
- int i, j, k;
+
double rt, crt, alphat = 0.0, zt;
// double tolerance = 1e-8;
// std::cout<<"Tolerance is "<<tolerance<<"\n";
@@ -53,25 +53,19 @@ Adaboost<MatType, WeakLearner>::Adaboost(
// stops changing by less than a tolerant value.
ztAccumulator = 1.0;
-
+
+ // crt is cumulative rt for stopping the iterations when rt
+ // stops changing by less than a tolerant value.
+
+ ztAccumulator = 1.0;
+ // ztAccumulator is
+
// To be used for prediction by the Weak Learner for prediction.
arma::Row<size_t> predictedLabels(labels.n_cols);
// Use tempData to modify input Data for incorporating weights.
MatType tempData(data);
- // Build the classification Matrix yt from labels
- arma::mat yt(predictedLabels.n_cols, numClasses);
-
- // Build a classification matrix of the form D(i,l)
- // where i is the ith instance
- // l is the lth class.
- BuildClassificationMatrix(yt, labels);
-
- // ht(x), to be loaded after a round of prediction every time the weak
- // learner is run, by using the BuildClassificationMatrix function
- arma::mat ht(predictedLabels.n_cols, numClasses);
-
// This matrix is a helper matrix used to calculate the final hypothesis.
arma::mat sumFinalH(predictedLabels.n_cols, numClasses);
sumFinalH.fill(0.0);
@@ -89,11 +83,15 @@ Adaboost<MatType, WeakLearner>::Adaboost(
arma::Row<size_t> finalH(predictedLabels.n_cols);
// now start the boosting rounds
- for (i = 0; i < iterations; i++)
+ for (int i = 0; i < iterations; i++)
{
// std::cout<<"Run "<<i<<" times.\n";
// Initialized to zero in every round.
- rt = 0.0;
+ // rt is used for calculation of alphat, is the weighted error
+ // rt = (sum)D(i)y(i)ht(xi)
+ rt = 0.0;
+
+ // zt is used for weight normalization.
zt = 0.0;
// Build the weight vectors
@@ -104,11 +102,27 @@ Adaboost<MatType, WeakLearner>::Adaboost(
w.Classify(tempData, predictedLabels);
//Now from predictedLabels, build ht, the weak hypothesis
- BuildClassificationMatrix(ht, predictedLabels);
-
+ // buildClassificationMatrix(ht, predictedLabels);
+
// Now, start calculation of alpha(t) using ht
- rt += arma::accu(D % yt % ht);
+ for (int j = 0;j < D.n_rows; j++) // instead of D, ht
+ {
+ if (predictedLabels(j) == labels(j))
+ {
+ for (int k = 0;k < numClasses; k++)
+ rt += D(j,k);
+ }
+
+ else
+ {
+ for (int k = 0;k < numClasses; k++)
+ rt -= D(j,k);
+ }
+ }
+ // end calculation of rt
+ // std::cout<<"Value of rt is: "<<rt<<"\n";
+
if (i > 0)
{
@@ -117,21 +131,45 @@ Adaboost<MatType, WeakLearner>::Adaboost(
}
crt = rt;
+ // our goal is to find alphat which mizimizes or approximately minimizes
+ // the value of Z as a function of alpha.
alphat = 0.5 * log((1 + rt) / (1 - rt));
// end calculation of alphat
// now start modifying weights
-
- for (j = 0; j < D.n_rows; j++)
+ for (int j = 0;j < D.n_rows; j++)
{
- for (k = 0; k < D.n_cols; k++)
+ double expo = exp(alphat);
+ if (predictedLabels(j) == labels(j))
{
- // we calculate zt, the normalization constant
- zt += D(j, k) * exp(-alphat * yt(j, k) * ht(j, k));
- D(j, k) = D(j, k) * exp(-alphat * yt(j, k) * ht(j, k));
-
- // adding to the matrix of FinalHypothesis
- sumFinalH(j, k) += (alphat * ht(j, k));
+ for (int k = 0;k < D.n_cols; k++)
+ {
+ // we calculate zt, the normalization constant
+ zt += D(j,k) / expo; // * exp(-1 * alphat * yt(j,k) * ht(j,k));
+ D(j,k) = D(j,k) / expo;
+
+ // adding to the matrix of FinalHypothesis
+ // sumFinalH(j,k) += (alphat * ht(j,k));
+ if (k == labels(j))
+ sumFinalH(j,k) += (alphat);// * ht(j,k));
+ else
+ sumFinalH(j,k) -= (alphat);
+ }
+ }
+ else
+ {
+ for (int k = 0;k < D.n_cols; k++)
+ {
+ // we calculate zt, the normalization constant
+ zt += D(j,k) * expo;
+ D(j,k) = D(j,k) * expo;
+
+ // adding to the matrix of FinalHypothesis
+ if (k == labels(j))
+ sumFinalH(j,k) += (alphat);// * ht(j,k));
+ else
+ sumFinalH(j,k) -= (alphat);
+ }
}
}
@@ -147,7 +185,8 @@ Adaboost<MatType, WeakLearner>::Adaboost(
arma::rowvec tempSumFinalH;
arma::uword max_index;
- for (i = 0; i < sumFinalH.n_rows; i++)
+
+ for (int i = 0;i < sumFinalH.n_rows; i++)
{
tempSumFinalH = sumFinalH.row(i);
tempSumFinalH.max(max_index);
@@ -157,34 +196,6 @@ Adaboost<MatType, WeakLearner>::Adaboost(
}
/**
- * This function helps in building a classification Matrix which is of
- * form:
- * -1 if l is not the correct label
- * 1 if l is the correct label
- *
- * @param t The classification matrix to be built
- * @param l The labels from which the classification matrix is to be built.
- */
-template <typename MatType, typename WeakLearner>
-void Adaboost<MatType, WeakLearner>::BuildClassificationMatrix(
- arma::mat& t,
- const arma::Row<size_t>& l)
-{
- int i, j;
-
- for (i = 0; i < t.n_rows; i++)
- {
- for (j = 0; j < t.n_cols; j++)
- {
- if (j == l(i))
- t(i, j) = 1.0;
- else
- t(i, j) = -1.0;
- }
- }
-}
-
-/**
* This function helps in building the Weight Distribution matrix
* which is updated during every iteration. It calculates the
* "difficulty" in classifying a point by adding the weights for all
diff --git a/src/mlpack/methods/adaboost/adaboost_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
index 82e25b7..d6a1c12 100644
--- a/src/mlpack/methods/adaboost/adaboost_main.cpp
+++ b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -2,6 +2,30 @@
* @file: adaboost_main.cpp
* @author: Udit Saxena
*
+ * Implementation of the Adaboost main file
+ *
+ * @code
+ * @article{Schapire:1999:IBA:337859.337870,
+ * author = {Schapire, Robert E. and Singer, Yoram},
+ * title = {Improved Boosting Algorithms Using Confidence-rated Predictions},
+ * journal = {Mach. Learn.},
+ * issue_date = {Dec. 1999},
+ * volume = {37},
+ * number = {3},
+ * month = dec,
+ * year = {1999},
+ * issn = {0885-6125},
+ * pages = {297--336},
+ * numpages = {40},
+ * url = {http://dx.doi.org/10.1023/A:1007614523901},
+ * doi = {10.1023/A:1007614523901},
+ * acmid = {337870},
+ * publisher = {Kluwer Academic Publishers},
+ * address = {Hingham, MA, USA},
+ * keywords = {boosting algorithms, decision trees, multiclass classification,
+ * output coding
+ * }
+ * @endcode
*
*/
@@ -13,7 +37,21 @@ using namespace std;
using namespace arma;
using namespace mlpack::adaboost;
-PROGRAM_INFO("","");
+PROGRAM_INFO("Adaboost","This program implements the Adaboost (or Adaptive Boost)"
+ " algorithm. The variant of Adaboost implemented here is Adaboost.mh. It uses a"
+ " weak learner, either of Decision Stumps or a Perceptron, and over many"
+ " iterations, creates a strong learner. It runs these iterations till a tolerance"
+ " value is crossed for change in the value of rt."
+ "\n"
+ "This program allows training of a adaboost object, and then application of "
+ "the strong learner to a test dataset. To train "
+ "a training dataset must be passed to --train_file (-t). Labels can either"
+ " be present as the last dimension of the training dataset, or given "
+ "explicitly with the --labels_file (-l) parameter.\n"
+ "\n"
+ "A test file is given through the --test_file (-T) parameter. The "
+ "predicted labels for the test set will be stored in the file specified by "
+ "the --output_file (-o) parameter.");
//necessary parameters
PARAM_STRING_REQ("train_file", "A file containing the training set.", "t");
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index ee095ff..348ab9a 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -11,9 +11,6 @@
// In case it hasn't been included yet.
#include "decision_stump.hpp"
-#include <set>
-#include <algorithm>
-
namespace mlpack {
namespace decision_stump {
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list