[mlpack] 185/324: Adaboost design issues, to be discussed, then changed later on

Sun Aug 17 08:22:09 UTC 2014

This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit 7f608d2015052c26ddef6142f34264218ac67542
Author: saxena.udit <saxena.udit at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Tue Jul 15 11:26:08 2014 +0000

    Adaboost design issues, to be discussed, then changed later on
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16824 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 src/mlpack/methods/CMakeLists.txt                  |  1 +
 src/mlpack/methods/adaboost/CMakeLists.txt         | 26 +++++++
 src/mlpack/methods/adaboost/adaboost.hpp           | 31 ++++++++
 src/mlpack/methods/adaboost/adaboost_impl.hpp      | 87 ++++++++++++++++++++++
 src/mlpack/methods/adaboost/adaboost_main.cpp      | 75 +++++++++++++++++++
 .../methods/decision_stump/decision_stump.hpp      | 18 +++++
 .../methods/decision_stump/decision_stump_impl.hpp | 32 ++++++++
 src/mlpack/methods/perceptron/perceptron.hpp       | 14 ++++
 src/mlpack/methods/perceptron/perceptron_impl.hpp  | 20 +++++
 src/mlpack/tests/perceptron_test.cpp               | 17 +++++
 10 files changed, 321 insertions(+)

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 93289ea..925e74d 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -1,5 +1,6 @@
 # Recurse into each method mlpack provides.
 set(DIRS
+# adaboost 
   amf
   cf
   decision_stump
diff --git a/src/mlpack/methods/adaboost/CMakeLists.txt b/src/mlpack/methods/adaboost/CMakeLists.txt
new file mode 100644
index 0000000..7e29581
--- /dev/null
+++ b/src/mlpack/methods/adaboost/CMakeLists.txt
@@ -0,0 +1,26 @@
+cmake_minimum_required(VERSION 2.8)
+
+# Define the files we need to compile.
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+  adaboost.hpp
+  adaboost_impl.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+add_executable(adaboost
+  adaboost_main.cpp
+)
+target_link_libraries(adaboost
+  mlpack
+)
+
+install(TARGETS adaboost RUNTIME DESTINATION bin)
diff --git a/src/mlpack/methods/adaboost/adaboost.hpp b/src/mlpack/methods/adaboost/adaboost.hpp
new file mode 100644
index 0000000..8aafb07
--- /dev/null
+++ b/src/mlpack/methods/adaboost/adaboost.hpp
@@ -0,0 +1,31 @@
+/**
+ * @file adaboost.hpp
+ * @author Udit Saxena
+ *
+ * AdaBoost header file
+ */
+
+#ifndef _MLPACK_METHODS_ADABOOST_ADABOOST_HPP
+#define _MLPACK_METHODS_ADABOOST_ADABOOST_HPP
+
+#include <mlpack/core.hpp>
+#include "../perceptron/main/perceptron.hpp"
+ 
+namespace mlpack {
+namespace adaboost {
+
+template <typename MatType = arma::mat, typename WeakLearner = 
+          mlpack::perceptron::Perceptron<> >
+class Adaboost 
+{
+public:
+  Adaboost(const MatType& data, const arma::Row<size_t>& labels,
+           int iterations, size_t classes, const WeakLearner& other);
+}; // class Adaboost
+
+} // namespace adaboost
+} // namespace mlpack
+
+#include "adaboost_impl.hpp"
+
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/adaboost/adaboost_impl.hpp b/src/mlpack/methods/adaboost/adaboost_impl.hpp
new file mode 100644
index 0000000..da99936
--- /dev/null
+++ b/src/mlpack/methods/adaboost/adaboost_impl.hpp
@@ -0,0 +1,87 @@
+/*
+ * @file adaboost_impl.hpp
+ * @author Udit Saxena
+ *
+ * Implementation of the AdaBoost class
+ *
+ */
+
+#ifndef _MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
+#define _MLPACK_METHODS_ADABOOST_ADABOOST_IMPL_HPP
+
+#include "adaboost.hpp"
+
+namespace mlpack {
+namespace adaboost {
+
+template<typename MatType, typename WeakLearner>
+Adaboost<MatType, WeakLearner>::Adaboost(const MatType& data, const arma::Row<size_t>& labels,
+         int iterations, size_t classes, const WeakLearner& other)
+{
+  int j, i;
+  
+  // load the initial weights
+  
+  const double initWeight = 1 / (data.n_cols * classes);
+  arma::Row<double> D(data.n_cols);
+  D.fill(initWeight);
+
+  double rt, alphat = 0.0, zt;
+  arma::Row<size_t> predictedLabels(labels.n_cols);
+  MatType tempData(data);
+  // now start the boosting rounds
+  for (i = 0; i < iterations; i++)
+  {
+    rt = 0.0;
+    zt = 0.0;
+
+    //transform data, as per rules for perceptron
+    for (j = 0;j < tempData.n_cols;j++)
+      tempData.col(i) = D(i) * tempData.col(i);
+
+    // for now, perceptron initialized with default parameters
+    //mlpack::perceptron::Perceptron<> p(tempData, labels, 1000);
+    WeakLearner w(other);
+    w.Classify(tempData, predictedLabels);
+
+    // Now, start calculation of alpha(t)
+
+    // building a helper rowvector, mispredict to help in calculations.
+    // this stores the value of Yi(l)*ht(xi,l)
+    
+    arma::Row<double> mispredict(predictedLabels.n_cols);
+    
+    for(j = 0;j < predictedLabels.n_cols; j++)
+    {
+      if (predictedLabels(j) != labels(j))
+        mispredict(j) = -predictedLabels(j);
+      else
+        mispredict(j) = predictedLabels(j);
+    }
+
+    // begin calculation of rt
+
+    for (j = 0;j < predictedLabels.n_cols; j++)
+      rt +=(D(j) * mispredict(j));
+
+    // end calculation of rt
+
+    alphat = 0.5 * log((1 + rt) / (1 - rt));
+
+    // end calculation of alphat
+    
+    for (j = 0;j < mispredict.n_cols; j++)
+    {
+      zt += D(i) * exp(-1 * alphat * mispredict(i));
+      D(i) = D(i) * exp(-1 * alphat * mispredict(i));
+    }
+
+    D = D / zt;
+
+  }
+
+}
+
+} // namespace adaboost
+} // namespace mlpack
+#endif
\ No newline at end of file
diff --git a/src/mlpack/methods/adaboost/adaboost_main.cpp b/src/mlpack/methods/adaboost/adaboost_main.cpp
new file mode 100644
index 0000000..1c30be0
--- /dev/null
+++ b/src/mlpack/methods/adaboost/adaboost_main.cpp
@@ -0,0 +1,75 @@
+/*
+ * @file: adaboost_main.cpp
+ * @author: Udit Saxena
+ *
+ *
+ */
+
+#include <mlpack/core.hpp>
+#include "adaboost.hpp"
+
+using namespace mlpack;
+using namespace std;
+using namespace arma;
+
+PROGRAM_INFO("","");
+
+//necessary parameters
+PARAM_STRING_REQ("train_file", "A file containing the training set.", "tr");
+PARAM_STRING_REQ("labels_file", "A file containing labels for the training set.",
+  "l");
+PARAM_STRING_REQ("test_file", "A file containing the test set.", "te");
+
+//optional parameters.
+PARAM_STRING("output", "The file in which the predicted labels for the test set"
+    " will be written.", "o", "output.csv");
+PARAM_INT("iterations","The maximum number of boosting iterations "
+  "to be run", "i", 1000);
+PARAM_INT("classes","The number of classes in the input label set.","c");
+
+int main(int argc, char *argv[])
+{
+  CLI::ParseCommandLine(argc, argv);
+
+  const string trainingDataFilename = CLI::GetParam<string>("train_file");
+  mat trainingData;
+  data::Load(trainingDataFilename, trainingData, true);
+
+  const string labelsFilename = CLI::GetParam<string>("labels_file");
+  // Load labels.
+  mat labelsIn;
+  data::Load(labelsFilename, labelsIn, true);
+
+  // helpers for normalizing the labels
+  Col<size_t> labels;
+  vec mappings;
+
+  // Do the labels need to be transposed?
+  if (labelsIn.n_rows == 1)
+    labelsIn = labelsIn.t();
+
+  // normalize the labels
+  data::NormalizeLabels(labelsIn.unsafe_col(0), labels, mappings);
+
+  const string testingDataFilename = CLI::GetParam<string>("test_file");
+  mat testingData;
+  data::Load(testingDataFilename, testingData, true);
+
+  if (testingData.n_rows != trainingData.n_rows)
+    Log::Fatal << "Test data dimensionality (" << testingData.n_rows << ") "
+        << "must be the same as training data (" << trainingData.n_rows - 1
+        << ")!" << std::endl;
+  int iterations = CLI::GetParam<int>("iterations");
+  
+  Timer::Start("Training");
+  Adaboost<> a(trainingData, labels, iterations, classes);
+  Timer::Stop("Training");
+
+  vec results;
+  data::RevertLabels(predictedLabels, mappings, results);
+
+  const string outputFilename = CLI::GetParam<string>("output");
+  data::Save(outputFilename, results, true, true);
+
+  return 0;
+}
\ No newline at end of file
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index fb4d6c9..3c0adcb 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -53,6 +53,24 @@ class DecisionStump
    */
   void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
 
+  /**
+   *
+   *
+   *
+   *
+   */
+  DecisionStump(const DecisionStump<>& ds);
+
+  /**
+   *
+   *
+   *
+   *
+   *
+   *
+  ModifyData(MatType& data, const arma::Row<double>& D);
+  */
+  
   //! Access the splitting attribute.
   int SplitAttribute() const { return splitAttribute; }
   //! Modify the splitting attribute (be careful!).
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index 7300521..80d961c 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -104,6 +104,38 @@ void DecisionStump<MatType>::Classify(const MatType& test,
 }
 
 /**
+ *
+ *
+ *
+ *
+ *
+ */
+template <typename MatType>
+DecisionStump<MatType>::DecisionStump(const DecisionStump<>& ds)
+{
+  numClass = ds.numClass;
+
+  splitAttribute = ds.splitAttribute;
+
+  bucketSize = ds.bucketSize;
+
+  split = ds.split;
+
+  binLabels = ds.binLabels;
+}
+
+/**
+ *
+ *
+ *
+ *
+ *
+ *
+template <typename MatType>
+DecisionStump<MatType>::ModifyData(MatType& data, const arma::Row<double>& D)
+ */
+
+/**
  * Sets up attribute as if it were splitting on it and finds entropy when
  * splitting on attribute.
  *
diff --git a/src/mlpack/methods/perceptron/perceptron.hpp b/src/mlpack/methods/perceptron/perceptron.hpp
index 7842e35..2ef86d2 100644
--- a/src/mlpack/methods/perceptron/perceptron.hpp
+++ b/src/mlpack/methods/perceptron/perceptron.hpp
@@ -53,6 +53,20 @@ class Perceptron
    */
   void Classify(const MatType& test, arma::Row<size_t>& predictedLabels);
 
+  /**
+   *
+   *
+   *
+   */
+  Perceptron(const Perceptron<>& p);
+
+  /**
+   *
+   *
+   *
+   *
+   ModifyData(MatType& data, const arma::Row<double>& D);
+   */
 private:
   //! Stores the class labels for the input data.
   arma::Row<size_t> classLabels;
diff --git a/src/mlpack/methods/perceptron/perceptron_impl.hpp b/src/mlpack/methods/perceptron/perceptron_impl.hpp
index bcec50f..48c53d4 100644
--- a/src/mlpack/methods/perceptron/perceptron_impl.hpp
+++ b/src/mlpack/methods/perceptron/perceptron_impl.hpp
@@ -110,6 +110,26 @@ void Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Classify(
   }
 }
 
+template <typename LearnPolicy, typename WeightInitializationPolicy, typename MatType>
+Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Perceptron(
+  const Perceptron<>& p)
+{
+  classLabels = p.classLabels;
+
+  weightVectors = p.weightVectors;
+
+  trainData = p.trainData;
+}
+
+/*
+template <typename LearnPolicy, typename WeightInitializationPolicy, typename MatType>
+Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::ModifyData(
+  MatType& data, const arma::Row<double>& D)
+{
+  for (int j = 0;j < data.n_cols;j++)
+      data.col(i) = D(i) * data.col(i);
+}
+*/
 }; // namespace perceptron
 }; // namespace mlpack
 
diff --git a/src/mlpack/tests/perceptron_test.cpp b/src/mlpack/tests/perceptron_test.cpp
index 07f99bd..aadb4f4 100644
--- a/src/mlpack/tests/perceptron_test.cpp
+++ b/src/mlpack/tests/perceptron_test.cpp
@@ -149,4 +149,21 @@ BOOST_AUTO_TEST_CASE(NonLinearlySeparableDataset)
   BOOST_CHECK_EQUAL(predictedLabels(0, 3), 1);
 }
 
+BOOST_AUTO_TEST_CASE(SecondaryConstructor)
+{
+  mat trainData;
+  trainData << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8
+            << 1 << 2 << 3 << 4 << 5 << 6 << 7 << 8 << endr
+            << 1 << 1 << 1 << 1 << 1 << 1 << 1 << 1
+            << 2 << 2 << 2 << 2 << 2 << 2 << 2 << 2 << endr;
+
+  Mat<size_t> labels;
+  labels << 0 << 0 << 0 << 1 << 0 << 1 << 1 << 1
+         << 0 << 0 << 0 << 1 << 0 << 1 << 1 << 1;
+         
+  Perceptron<> p1(trainData, labels.row(0), 1000);
+
+  Perceptron<> p2(p1);
+}
+
 BOOST_AUTO_TEST_SUITE_END();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git