[mlpack] 185/324: Adaboost design issues, to be discussed, then changed later on
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:09 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 7f608d2015052c26ddef6142f34264218ac67542
Author: saxena.udit <saxena.udit at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Tue Jul 15 11:26:08 2014 +0000
Adaboost design issues, to be discussed, then changed later on
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16824 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/CMakeLists.txt | 1 +
src/mlpack/methods/adaboost/CMakeLists.txt | 26 +++++++
src/mlpack/methods/adaboost/adaboost.hpp | 31 ++++++++
src/mlpack/methods/adaboost/adaboost_impl.hpp | 87 ++++++++++++++++++++++
src/mlpack/methods/adaboost/adaboost_main.cpp | 75 +++++++++++++++++++
.../methods/decision_stump/decision_stump.hpp | 18 +++++
.../methods/decision_stump/decision_stump_impl.hpp | 32 ++++++++
src/mlpack/methods/perceptron/perceptron.hpp | 14 ++++
src/mlpack/methods/perceptron/perceptron_impl.hpp | 20 +++++
src/mlpack/tests/perceptron_test.cpp | 17 +++++
10 files changed, 321 insertions(+)
diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 93289ea..925e74d 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -1,5 +1,6 @@
# Recurse into each method mlpack provides.
set(DIRS
+# adaboost
amf
cf
decision_stump
diff --git a/src/mlpack/methods/adaboost/CMakeLists.txt b/src/mlpack/methods/adaboost/CMakeLists.txt
new file mode 100644
index 0000000..7e29581
--- /dev/null
+++ b/src/mlpack/methods/adaboost/CMakeLists.txt
@@ -0,0 +1,26 @@
+cmake_minimum_required(VERSION 2.8)
+
+# Define the files we need to compile.
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+ adaboost.hpp
+ adaboost_impl.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+ set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+add_executable(adaboost
+ adaboost_main.cpp
+)
+target_link_libraries(adaboost
+ mlpack
+)
+
+install(TARGETS adaboost RUNTIME DESTINATION bin)
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
new file mode 100644
index 0000000..8aafb07
--- /dev/null
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -0,0 +1,31 @@
+/**
+ * @file adaboost.hpp
+ * @author Udit Saxena
+ *
+ * AdaBoost header file
+ */
+
+#ifndef _MLPACK_METHODS_ADABOOST_ADABOOST_HPP
+#define _MLPACK_METHODS_ADABOOST_ADABOOST_HPP
+
+#include <mlpack/core.hpp>
+#include "../perceptron/main/perceptron.hpp"
+
+namespace mlpack {
+namespace adaboost {
+
+template <typename MatType = arma::mat, typename WeakLearner =
+ mlpack::perceptron::Perceptron<> >
+class Adaboost
+{
+public:
+ Adaboost(const MatType& data, const arma::Row<size_t>& labels,
+ int iterations, size_t classes, const WeakLearner& other);
+}; // class Adaboost
+
+} // namespace adaboost
+} // namespace mlpack
+
+#include "adaboost_impl.hpp"
+
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
new file mode 100644
index 0000000..da99936
--- /dev/null
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -0,0 +1,87 @@
+/*
+ * @file adaboost_impl.hpp
+ * @author Udit Saxena
+ *
+ * Implementation of the AdaBoost class
+ *
+ */
+
+#ifndef _MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
+#define _MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
+
+#include "adaboost.hpp"
+
+namespace mlpack {
+namespace adaboost {
+
+template<typename MatType, typename WeakLearner>
+Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data, const arma::Row<size_t>& labels,
+ int iterations, size_t classes, const WeakLearner& other)
+{
+ int j, i;
+
+ // load the initial weights
+
+ const double initWeight = 1 / (data.n_cols * classes);
+ arma::Row<double> D(data.n_cols);
+ D.fill(initWeight);
+
+ double rt, alphat = 0.0, zt;
+ arma::Row<size_t> predictedLabels(labels.n_cols);
+ MatType tempData(data);
+ // now start the boosting rounds
+ for (i = 0; i < iterations; i++)
+ {
+ rt = 0.0;
+ zt = 0.0;
+
+ //transform data, as per rules for perceptron
+ for (j = 0;j < tempData.n_cols;j++)
+ tempData.col(i) = D(i) * tempData.col(i);
+
+ // for now, perceptron initialized with default parameters
+ //mlpack::perceptron::Perceptron<> p(tempData, labels, 1000);
+ WeakLearner w(other);
+ w.Classify(tempData, predictedLabels);
+
+ // Now, start calculation of alpha(t)
+
+ // building a helper rowvector, mispredict to help in calculations.
+ // this stores the value of Yi(l)*ht(xi,l)
+
+ arma::Row<double> mispredict(predictedLabels.n_cols);
+
+ for(j = 0;j < predictedLabels.n_cols; j++)
+ {
+ if (predictedLabels(j) != labels(j))
+ mispredict(j) = -predictedLabels(j);
+ else
+ mispredict(j) = predictedLabels(j);
+ }
+
+ // begin calculation of rt
+
+ for (j = 0;j < predictedLabels.n_cols; j++)
+ rt +=(D(j) * mispredict(j));
+
+ // end calculation of rt
+
+ alphat = 0.5 * log((1 + rt) / (1 - rt));
+
+ // end calculation of alphat
+
+ for (j = 0;j < mispredict.n_cols; j++)
+ {
+ zt += D(i) * exp(-1 * alphat * mispredict(i));
+ D(i) = D(i) * exp(-1 * alphat * mispredict(i));
+ }
+
+ D = D / zt;
+
+ }
+
+}
+
+} // namespace adaboost
+} // namespace mlpack
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/adaboost/adaboost_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
new file mode 100644
index 0000000..1c30be0
--- /dev/null
+++ b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -0,0 +1,75 @@
+/*
+ * @file: adaboost_main.cpp
+ * @author: Udit Saxena
+ *
+ *
+ */
+
+#include <mlpack/core.hpp>
+#include "adaboost.hpp"
+
+using namespace mlpack;
+using namespace std;
+using namespace arma;
+
+PROGRAM_INFO("","");
+
+//necessary parameters
+PARAM_STRING_REQ("train_file", "A file containing the training set.", "tr");
+PARAM_STRING_REQ("labels_file", "A file containing labels for the training set.",
+ "l");
+PARAM_STRING_REQ("test_file", "A file containing the test set.", "te");
+
+//optional parameters.
+PARAM_STRING("output", "The file in which the predicted labels for the test set"
+ " will be written.", "o", "output.csv");
+PARAM_INT("iterations","The maximum number of boosting iterations "
+ "to be run", "i", 1000);
+PARAM_INT("classes","The number of classes in the input label set.","c");
+
+int main(int argc, char *argv[])
+{
+ CLI::ParseCommandLine(argc, argv);
+
+ const string trainingDataFilename = CLI::GetParam<string>("train_file");
+ mat trainingData;
+ data::Load(trainingDataFilename, trainingData, true);
+
+ const string labelsFilename = CLI::GetParam<string>("labels_file");
+ // Load labels.
+ mat labelsIn;
+ data::Load(labelsFilename, labelsIn, true);
+
+ // helpers for normalizing the labels
+ Col<size_t> labels;
+ vec mappings;
+
+ // Do the labels need to be transposed?
+ if (labelsIn.n_rows == 1)
+ labelsIn = labelsIn.t();
+
+ // normalize the labels
+ data::NormalizeLabels(labelsIn.unsafe_col(0), labels, mappings);
+
+ const string testingDataFilename = CLI::GetParam<string>("test_file");
+ mat testingData;
+ data::Load(testingDataFilename, testingData, true);
+
+ if (testingData.n_rows != trainingData.n_rows)
+ Log::Fatal << "Test data dimensionality (" << testingData.n_rows << ") "
+ << "must be the same as training data (" << trainingData.n_rows - 1
+ << ")!" << std::endl;
+ int iterations = CLI::GetParam<int>("iterations");
+
+ Timer::Start("Training");
+ Adaboost<> a(trainingData, labels, iterations, classes);
+ Timer::Stop("Training");
+
+ vec results;
+ data::RevertLabels(predictedLabels, mappings, results);
+
+ const string outputFilename = CLI::GetParam<string>("output");
+ data::Save(outputFilename, results, true, true);
+
+ return 0;
+}
\ No newline at end of file
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index fb4d6c9..3c0adcb 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -53,6 +53,24 @@ class DecisionStump
*/
void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
+ /**
+ *
+ *
+ *
+ *
+ */
+ DecisionStump(const DecisionStump<>& ds);
+
+ /**
+ *
+ *
+ *
+ *
+ *
+ *
+ ModifyData(MatType& data, const arma::Row<double>& D);
+ */
+
//! Access the splitting attribute.
int SplitAttribute() const { return splitAttribute; }
//! Modify the splitting attribute (be careful!).
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index 7300521..80d961c 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -104,6 +104,38 @@ void DecisionStump<MatType>::Classify(const MatType& test,
}
/**
+ *
+ *
+ *
+ *
+ *
+ */
+template <typename MatType>
+DecisionStump<MatType>::DecisionStump(const DecisionStump<>& ds)
+{
+ numClass = ds.numClass;
+
+ splitAttribute = ds.splitAttribute;
+
+ bucketSize = ds.bucketSize;
+
+ split = ds.split;
+
+ binLabels = ds.binLabels;
+}
+
+/**
+ *
+ *
+ *
+ *
+ *
+ *
+template <typename MatType>
+DecisionStump<MatType>::ModifyData(MatType& data, const arma::Row<double>& D)
+ */
+
+/**
* Sets up attribute as if it were splitting on it and finds entropy when
* splitting on attribute.
*
diff --git a/src/mlpack/methods/perceptron/perceptron.hpp b/src/mlpack/methods/perceptron/perceptron.hpp
index 7842e35..2ef86d2 100644
--- a/src/mlpack/methods/perceptron/perceptron.hpp
+++ b/src/mlpack/methods/perceptron/perceptron.hpp
@@ -53,6 +53,20 @@ class Perceptron
*/
void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
+ /**
+ *
+ *
+ *
+ */
+ Perceptron(const Perceptron<>& p);
+
+ /**
+ *
+ *
+ *
+ *
+ ModifyData(MatType& data, const arma::Row<double>& D);
+ */
private:
//! Stores the class labels for the input data.
arma::Row<size_t> classLabels;
diff --git a/src/mlpack/methods/perceptron/perceptron_impl.hpp b/src/mlpack/methods/perceptron/perceptron_impl.hpp
index bcec50f..48c53d4 100644
--- a/src/mlpack/methods/perceptron/perceptron_impl.hpp
+++ b/src/mlpack/methods/perceptron/perceptron_impl.hpp
@@ -110,6 +110,26 @@ void Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Classify(
}
}
+template <typename LearnPolicy, typename WeightInitializationPolicy, typename MatType>
+Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Perceptron(
+ const Perceptron<>& p)
+{
+ classLabels = p.classLabels;
+
+ weightVectors = p.weightVectors;
+
+ trainData = p.trainData;
+}
+
+/*
+template <typename LearnPolicy, typename WeightInitializationPolicy, typename MatType>
+Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::ModifyData(
+ MatType& data, const arma::Row<double>& D)
+{
+ for (int j = 0;j < data.n_cols;j++)
+ data.col(i) = D(i) * data.col(i);
+}
+*/
}; // namespace perceptron
}; // namespace mlpack
diff --git a/src/mlpack/tests/perceptron_test.cpp b/src/mlpack/tests/perceptron_test.cpp
index 07f99bd..aadb4f4 100644
--- a/src/mlpack/tests/perceptron_test.cpp
+++ b/src/mlpack/tests/perceptron_test.cpp
@@ -149,4 +149,21 @@ BOOST_AUTO_TEST_CASE(NonLinearlySeparableDataset)
BOOST_CHECK_EQUAL(predictedLabels(0, 3), 1);
}
+BOOST_AUTO_TEST_CASE(SecondaryConstructor)
+{
+ mat trainData;
+ trainData << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8
+ << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << endr
+ << 1 << 1 << 1 << 1 << 1 << 1 << 1 << 1
+ << 2 << 2 << 2 << 2 << 2 << 2 << 2 << 2 << endr;
+
+ Mat<size_t> labels;
+ labels << 0 << 0 << 0 << 1 << 0 << 1 << 1 << 1
+ << 0 << 0 << 0 << 1 << 0 << 1 << 1 << 1;
+
+ Perceptron<> p1(trainData, labels.row(0), 1000);
+
+ Perceptron<> p2(p1);
+}
+
BOOST_AUTO_TEST_SUITE_END();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list