[mlpack] 09/58: Changes to Decision stump and AdaBoost.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Tue Sep 9 13:19:39 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 244175c0147a23d1d825ab7e1cbbeb82f6a1c8bd
Author: saxena.udit <saxena.udit at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Sun Aug 17 07:09:20 2014 +0000
Changes to Decision stump and AdaBoost.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17054 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/adaboost/adaboost.hpp | 7 ++--
src/mlpack/methods/adaboost/adaboost_impl.hpp | 20 +++++++----
.../methods/decision_stump/decision_stump.hpp | 18 ++++------
.../methods/decision_stump/decision_stump_impl.hpp | 42 +++++++++++-----------
src/mlpack/tests/adaboost_test.cpp | 18 ++++++----
5 files changed, 58 insertions(+), 47 deletions(-)
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
index 58ee336..96cfc04 100644
--- a/src/mlpack/methods/adaboost/adaboost.hpp
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -53,8 +53,8 @@ class AdaBoost
// Stores the final classification of the Labels.
arma::Row<size_t> finalHypothesis;
- // To check for the bound for the hammingLoss.
- double ztAccumulator;
+ // Return the value of ztProduct
+ double GetztProduct() { return ztProduct; }
// The tolerance for change in rt and when to stop.
double tolerance;
@@ -78,8 +78,9 @@ private:
std::vector<WeakLearner> wl;
std::vector<double> alpha;
- std::vector<double> z;
+ // To check for the bound for the hammingLoss.
+ double ztProduct;
}; // class AdaBoost
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
index 5902df6..54da3bb 100644
--- a/src/mlpack/methods/adaboost/adaboost_impl.hpp
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -52,12 +52,10 @@ AdaBoost<MatType, WeakLearner>::AdaBoost(
// crt is for stopping the iterations when rt
// stops changing by less than a tolerant value.
- ztAccumulator = 1.0;
-
// crt is cumulative rt for stopping the iterations when rt
// stops changing by less than a tolerant value.
- ztAccumulator = 1.0;
+ ztProduct = 1.0;
// ztAccumulator is
// To be used for prediction by the Weak Learner for prediction.
@@ -183,8 +181,7 @@ AdaBoost<MatType, WeakLearner>::AdaBoost(
D = D / zt;
// Accumulating the value of zt for the Hamming Loss bound.
- ztAccumulator *= zt;
- z.push_back(zt);
+ ztProduct *= zt;
}
// Iterations are over, now build a strong hypothesis
@@ -213,7 +210,7 @@ void AdaBoost<MatType, WeakLearner>::Classify(
arma::Row<size_t>& predictedLabels)
{
arma::Row<size_t> tempPredictedLabels(predictedLabels.n_cols);
- arma::mat cMatrix(test.n_cols, numClasses);
+ arma::mat cMatrix(numClasses, test.n_cols);
cMatrix.zeros();
predictedLabels.zeros();
@@ -223,7 +220,7 @@ void AdaBoost<MatType, WeakLearner>::Classify(
wl[i].Classify(test, tempPredictedLabels);
for (int j = 0; j < tempPredictedLabels.n_cols; j++)
- cMatrix(j, tempPredictedLabels(j)) += (alpha[i] * tempPredictedLabels(j));
+ cMatrix(tempPredictedLabels(j), j) += (alpha[i] * tempPredictedLabels(j));
}
arma::rowvec cMRow;
@@ -261,6 +258,15 @@ void AdaBoost<MatType, WeakLearner>::BuildWeightMatrix(
}
}
+/*/**
+ * Return the value of ztProduct
+ */
+ /*
+template <typename MatType, typename WeakLearner>
+double GetztProduct()
+{
+ return ztProduct;
+}*/
} // namespace adaboost
} // namespace mlpack
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index de1418a..8e8eb1b 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -110,10 +110,10 @@ class DecisionStump
* candidate for the splitting attribute.
* @param isWeight Whether we need to run a weighted Decision Stump.
*/
- template <typename W>
+ template <bool isWeight>
double SetupSplitAttribute(const arma::rowvec& attribute,
const arma::Row<size_t>& labels,
- W isWeight);
+ const arma::rowvec& weightD);
/**
* After having decided the attribute on which to split, train on that
@@ -154,9 +154,9 @@ class DecisionStump
* @param labels Corresponding labels of the attribute.
* @param isWeight Whether we need to run a weighted Decision Stump.
*/
- template <typename LabelType, typename W>
+ template <typename LabelType, bool isWeight>
double CalculateEntropy(arma::subview_row<LabelType> labels, int begin,
- W isWeight);
+ const arma::rowvec& tempD);
/**
* Train the decision stump on the given data and labels.
@@ -165,14 +165,10 @@ class DecisionStump
* @param labels Labels for dataset.
* @param isWeight Whether we need to run a weighted Decision Stump.
*/
- template <typename W>
- void Train(const MatType& data, const arma::Row<size_t>& labels, W isWeight);
+ template <bool isWeight>
+ void Train(const MatType& data, const arma::Row<size_t>& labels,
+ const arma::rowvec& weightD);
- //! To store the weight vectors for boosting purposes.
- arma::rowvec weightD;
-
- //! To store reordered weight vectors for boosting purposes.
- arma::rowvec tempD;
};
}; // namespace decision_stump
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index e3b5824..e43f416 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -30,9 +30,10 @@ DecisionStump<MatType>::DecisionStump(const MatType& data,
{
numClass = classes;
bucketSize = inpBucketSize;
- const bool isWeight = false;
- Train<bool>(data, labels, isWeight);
+ arma::rowvec weightD;
+
+ Train<false>(data, labels, weightD);
}
/**
@@ -43,14 +44,15 @@ DecisionStump<MatType>::DecisionStump(const MatType& data,
* @param isWeight Whether we need to run a weighted Decision Stump.
*/
template<typename MatType>
-template <typename W>
-void DecisionStump<MatType>::Train(const MatType& data, const arma::Row<size_t>& labels, W isWeight)
+template <bool isWeight>
+void DecisionStump<MatType>::Train(const MatType& data, const arma::Row<size_t>& labels,
+ const arma::rowvec& weightD)
{
// If classLabels are not all identical, proceed with training.
int bestAtt = 0;
double entropy;
- const double rootEntropy = CalculateEntropy<size_t, W>(
- labels.subvec(0, labels.n_elem - 1), 0, isWeight);
+ const double rootEntropy = CalculateEntropy<size_t, isWeight>(
+ labels.subvec(0, labels.n_elem - 1), 0, weightD);
double gain, bestGain = 0.0;
for (int i = 0; i < data.n_rows; i++)
@@ -60,7 +62,7 @@ void DecisionStump<MatType>::Train(const MatType& data, const arma::Row<size_t>&
{
// For each attribute with non-identical values, treat it as a potential
// splitting attribute and calculate entropy if split on it.
- entropy = SetupSplitAttribute<W>(data.row(i), labels, isWeight);
+ entropy = SetupSplitAttribute<isWeight>(data.row(i), labels, weightD);
gain = rootEntropy - entropy;
// Find the attribute with the best entropy so that the gain is
@@ -137,10 +139,10 @@ DecisionStump<MatType>::DecisionStump(
numClass = other.numClass;
bucketSize = other.bucketSize;
- weightD = weights;
- tempD = weightD;
- const bool isWeight = true;
- Train<bool>(data, labels, isWeight);
+ // weightD = weights;
+ // tempD = weightD;
+
+ Train<true>(data, labels, weights);
}
/**
@@ -152,11 +154,11 @@ DecisionStump<MatType>::DecisionStump(
* @param isWeight Whether we need to run a weighted Decision Stump.
*/
template <typename MatType>
-template <typename W>
+template <bool isWeight>
double DecisionStump<MatType>::SetupSplitAttribute(
const arma::rowvec& attribute,
const arma::Row<size_t>& labels,
- W isWeight)
+ const arma::rowvec& weightD)
{
int i, count, begin, end;
double entropy = 0.0;
@@ -171,7 +173,7 @@ double DecisionStump<MatType>::SetupSplitAttribute(
arma::Row<size_t> sortedLabels(attribute.n_elem);
sortedLabels.fill(0);
- tempD = arma::rowvec(weightD.n_cols);
+ arma::rowvec tempD = arma::rowvec(weightD.n_cols);
for (i = 0; i < attribute.n_elem; i++)
{
@@ -199,8 +201,8 @@ double DecisionStump<MatType>::SetupSplitAttribute(
// Use ratioEl to calculate the ratio of elements in this split.
const double ratioEl = ((double) (end - begin + 1) / sortedLabels.n_elem);
- entropy += ratioEl * CalculateEntropy<size_t, W>(
- sortedLabels.subvec(begin, end), begin, isWeight);
+ entropy += ratioEl * CalculateEntropy<size_t, isWeight>(
+ sortedLabels.subvec(begin, end), begin, tempD);
i++;
}
else if (sortedLabels(i) != sortedLabels(i + 1))
@@ -226,8 +228,8 @@ double DecisionStump<MatType>::SetupSplitAttribute(
}
const double ratioEl = ((double) (end - begin + 1) / sortedLabels.n_elem);
- entropy += ratioEl * CalculateEntropy<size_t, W>(
- sortedLabels.subvec(begin, end), begin, isWeight);
+ entropy += ratioEl * CalculateEntropy<size_t, isWeight>(
+ sortedLabels.subvec(begin, end), begin, tempD);
i = end + 1;
count = 0;
@@ -418,10 +420,10 @@ int DecisionStump<MatType>::IsDistinct(const arma::Row<rType>& featureRow)
* @param isWeight Whether we need to run a weighted Decision Stump.
*/
template<typename MatType>
-template<typename LabelType, typename W>
+template<typename LabelType, bool isWeight>
double DecisionStump<MatType>::CalculateEntropy(
arma::subview_row<LabelType> labels,
- int begin, W isWeight)
+ int begin, const arma::rowvec& tempD)
{
double entropy = 0.0;
size_t j;
diff --git a/src/mlpack/tests/adaboost_test.cpp b/src/mlpack/tests/adaboost_test.cpp
index 3abba11..8769866 100644
--- a/src/mlpack/tests/adaboost_test.cpp
+++ b/src/mlpack/tests/adaboost_test.cpp
@@ -53,7 +53,8 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundIris)
countError++;
double hammingLoss = (double) countError / labels.n_cols;
- BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+ double ztP = a.GetztProduct();
+ BOOST_REQUIRE(hammingLoss <= ztP);
}
/**
@@ -139,7 +140,8 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn)
countError++;
double hammingLoss = (double) countError / labels.n_cols;
- BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+ double ztP = a.GetztProduct();
+ BOOST_REQUIRE(hammingLoss <= ztP);
}
/**
@@ -226,7 +228,8 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData)
countError++;
double hammingLoss = (double) countError / labels.n_cols;
- BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+ double ztP = a.GetztProduct();
+ BOOST_REQUIRE(hammingLoss <= ztP);
}
/**
@@ -316,7 +319,8 @@ BOOST_AUTO_TEST_CASE(HammingLossIris_DS)
countError++;
double hammingLoss = (double) countError / labels.n_cols;
- BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+ double ztP = a.GetztProduct();
+ BOOST_REQUIRE(hammingLoss <= ztP);
}
/**
@@ -413,7 +417,8 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundVertebralColumn_DS)
countError++;
double hammingLoss = (double) countError / labels.n_cols;
- BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+ double ztP = a.GetztProduct();
+ BOOST_REQUIRE(hammingLoss <= ztP);
}
/**
@@ -508,7 +513,8 @@ BOOST_AUTO_TEST_CASE(HammingLossBoundNonLinearSepData_DS)
countError++;
double hammingLoss = (double) countError / labels.n_cols;
- BOOST_REQUIRE(hammingLoss <= a.ztAccumulator);
+ double ztP = a.GetztProduct();
+ BOOST_REQUIRE(hammingLoss <= ztP);
}
/**
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list