[mlpack] 12/44: Backport fixes from r17310-r17318. Mostly test fixes.

Barak A. Pearlmutter barak+git at pearlmutter.net
Mon Feb 15 19:35:52 UTC 2016


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to tag mlpack-1.0.11
in repository mlpack.

commit 0e0f31be8781bbbb0f28b7f3f862f91447b8346c
Author: Ryan Curtin <ryan at ratml.org>
Date:   Sun Dec 7 19:31:21 2014 +0000

    Backport fixes from r17310-r17318. Mostly test fixes.
---
 CMake/FindArmadillo.cmake                          | 57 +++++++++++++++++++---
 .../methods/decision_stump/decision_stump.hpp      | 18 +++++--
 .../methods/decision_stump/decision_stump_impl.hpp | 55 +++++++++------------
 src/mlpack/tests/cli_test.cpp                      |  4 +-
 src/mlpack/tests/decision_stump_test.cpp           |  8 +--
 src/mlpack/tests/regularized_svd_test.cpp          | 11 ++++-
 src/mlpack/tests/sparse_coding_test.cpp            |  4 +-
 7 files changed, 104 insertions(+), 53 deletions(-)

diff --git a/CMake/FindArmadillo.cmake b/CMake/FindArmadillo.cmake
index 67ede3f..9c7bffb 100644
--- a/CMake/FindArmadillo.cmake
+++ b/CMake/FindArmadillo.cmake
@@ -221,7 +221,27 @@ if(EXISTS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp")
 
     # Search for HDF5 (or replacement).
     if (NOT "${ARMA_USE_HDF5}" STREQUAL "")
-      find_package(HDF5 REQUIRED)
+      find_package(HDF5 QUIET)
+
+      if(NOT HDF5_FOUND)
+        # On Debian systems, the HDF5 package has been split into multiple
+        # packages so that it is co-installable.  But this may mean that the
+        # include files are hidden somewhere very odd that the FindHDF5.cmake
+        # script will not find.  Thus, we'll also quickly check pkgconfig to see
+        # if there is information on what to use there.
+        find_package(PkgConfig)
+        if (PKG_CONFIG_FOUND)
+          pkg_check_modules(HDF5 hdf5)
+          # But using pkgconfig is a little weird because HDF5_LIBRARIES won't
+          # be filled with exact library paths, like the other scripts.  So
+          # instead what we get is HDF5_LIBRARY_DIRS which is the equivalent of
+          # what we'd pass to -L.
+          if (HDF5_FOUND)
+            # I'm not sure what I think of doing this here...
+            link_directories("${HDF5_LIBRARY_DIRS}")
+          endif()
+        endif()
+      endif()
 
       set(SUPPORT_INCLUDE_DIRS "${SUPPORT_INCLUDE_DIRS}" "${HDF5_INCLUDE_DIRS}")
       set(SUPPORT_LIBRARIES "${SUPPORT_LIBRARIES}" "${HDF5_LIBRARIES}")
@@ -229,17 +249,38 @@ if(EXISTS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp")
 
   else("${ARMA_USE_WRAPPER}" STREQUAL "")
     # Some older versions still require linking against HDF5 since they did not
-    # wrap libhdf5.  This was true until 4.300 (check this!).
+    # wrap libhdf5.  This was true for versions older than 4.300.
 
-    if(NOT "${ARMA_USE_HDF5}" STREQUAL "")
+    if(NOT "${ARMA_USE_HDF5}" STREQUAL "" AND
+       "${ARMADILLO_VERSION_STRING}" VERSION_LESS "4.300.0")
       message(STATUS "Armadillo HDF5 support is enabled and manual linking is "
                      "required.")
       # We have HDF5 support and need to link against HDF5.
-      find_package(HDF5 REQUIRED)
-
-      set(SUPPORT_INCLUDE_DIRS "${HDF5_INCLUDE_DIRS}")
-      set(SUPPORT_LIBRARIES "${HDF5_LIBRARIES}")
-    endif(NOT "${ARMA_USE_HDF5}" STREQUAL "")
+      find_package(HDF5)
+
+      if(NOT HDF5_FOUND)
+        # On Debian systems, the HDF5 package has been split into multiple
+        # packages so that it is co-installable.  But this may mean that the
+        # include files are hidden somewhere very odd that the FindHDF5.cmake
+        # script will not find.  Thus, we'll also quickly check pkgconfig to see
+        # if there is information on what to use there.
+        find_package(PkgConfig)
+        if (PKG_CONFIG_FOUND)
+          pkg_check_modules(HDF5 hdf5)
+          # But using pkgconfig is a little weird because HDF5_LIBRARIES won't
+          # be filled with exact library paths, like the other scripts.  So
+          # instead what we get is HDF5_LIBRARY_DIRS which is the equivalent of
+          # what we'd pass to -L.
+          if (HDF5_FOUND)
+            # I'm not sure what I think of doing this here...
+            link_directories("${HDF5_LIBRARY_DIRS}")
+          endif()
+        endif()
+
+        set(SUPPORT_INCLUDE_DIRS "${HDF5_INCLUDE_DIRS}")
+        set(SUPPORT_LIBRARIES "${HDF5_LIBRARIES}")
+      endif()
+    endif()
 
   endif("${ARMA_USE_WRAPPER}" STREQUAL "")
 
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index 743abdd..151c5b7 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -85,7 +85,7 @@ class DecisionStump
    *
   ModifyData(MatType& data, const arma::Row<double>& D);
   */
-  
+
   //! Access the splitting attribute.
   int SplitAttribute() const { return splitAttribute; }
   //! Modify the splitting attribute (be careful!).
@@ -164,8 +164,20 @@ class DecisionStump
    * @param attribute The attribute of which we calculate the entropy.
    * @param labels Corresponding labels of the attribute.
    */
-  template <typename AttType, typename LabelType>
-  double CalculateEntropy(arma::subview_row<LabelType> labels);
+  template <typename LabelType, bool isWeight>
+  double CalculateEntropy(arma::subview_row<LabelType> labels, int begin,
+                          const arma::rowvec& tempD);
+
+  /**
+   * Train the decision stump on the given data and labels.
+   *
+   * @param data Dataset to train on.
+   * @param labels Labels for dataset.
+   * @param isWeight Whether we need to run a weighted Decision Stump.
+   */
+  template <bool isWeight>
+  void Train(const MatType& data, const arma::Row<size_t>& labels,
+             const arma::rowvec& weightD);
 };
 
 }; // namespace decision_stump
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index d7df05a..b3043e0 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -351,44 +351,35 @@ template <typename MatType>
 template <typename rType>
 rType DecisionStump<MatType>::CountMostFreq(const arma::Row<rType>& subCols)
 {
-  // Sort subCols for easier processing.
-  arma::Row<rType> sortCounts = arma::sort(subCols);
-  rType element = sortCounts[0];
-  size_t count = 0, localCount = 0;
+  // We'll create a map of elements and the number of times that each element is
+  // seen.
+  std::map<rType, size_t> countMap;
 
-  if (sortCounts.n_elem == 1)
-    return sortCounts[0];
-
-  // An O(n) loop which counts the most frequent element in sortCounts.
-  for (size_t i = 0; i < sortCounts.n_elem; ++i)
+  for (size_t i = 0; i < subCols.n_elem; ++i)
   {
-    if (i == sortCounts.n_elem - 1)
-    {
-      if (sortCounts(i - 1) == sortCounts(i))
-      {
-        // element = sortCounts(i - 1);
-        localCount++;
-      }
-      else if (localCount > count)
-        count = localCount;
-    }
-    else if (sortCounts(i) != sortCounts(i + 1))
-    {
-      localCount = 0;
-      count++;
-    }
+    if (countMap.count(subCols[i]) == 0)
+      countMap[subCols[i]] = 1;
     else
+      ++countMap[subCols[i]];
+  }
+
+  // Now find the maximum value.
+  typename std::map<rType, size_t>::iterator it = countMap.begin();
+  rType mostFreq = it->first;
+  size_t mostFreqCount = it->second;
+  while (it != countMap.end())
+>>>>>>> .merge-right.r17318
+  {
+    if (it->second >= mostFreqCount)
     {
-      localCount++;
-      if (localCount > count)
-      {
-        count = localCount;
-        if (localCount == 1)
-          element = sortCounts(i);
-      }
+      mostFreq = it->first;
+      mostFreqCount = it->second;
     }
+
+    ++it;
   }
-  return element;
+
+  return mostFreq;
 }
 
 /**
diff --git a/src/mlpack/tests/cli_test.cpp b/src/mlpack/tests/cli_test.cpp
index 5643179..b77575b 100644
--- a/src/mlpack/tests/cli_test.cpp
+++ b/src/mlpack/tests/cli_test.cpp
@@ -136,8 +136,8 @@ BOOST_AUTO_TEST_CASE(TestBooleanOption)
   // Now, if we specify this flag, it should be true.
   int argc = 2;
   char* argv[2];
-  argv[0] = strcpy(new char[strlen("programname")], "programname");
-  argv[1] = strcpy(new char[strlen("--flag_test")], "--flag_test");
+  argv[0] = strcpy(new char[strlen("programname") + 1], "programname");
+  argv[1] = strcpy(new char[strlen("--flag_test") + 1], "--flag_test");
 
   CLI::ParseCommandLine(argc, argv);
 
diff --git a/src/mlpack/tests/decision_stump_test.cpp b/src/mlpack/tests/decision_stump_test.cpp
index 2a04dfd..3cb3443 100644
--- a/src/mlpack/tests/decision_stump_test.cpp
+++ b/src/mlpack/tests/decision_stump_test.cpp
@@ -21,7 +21,7 @@
  */
 #include <mlpack/core.hpp>
 #include <mlpack/methods/decision_stump/decision_stump.hpp>
- 
+
 #include <boost/test/unit_test.hpp>
 #include "old_boost_test_definitions.hpp"
 
@@ -221,9 +221,9 @@ BOOST_AUTO_TEST_CASE(MultiClassSplit)
 
   BOOST_CHECK_EQUAL(predictedLabels(0, 0), 0);
   BOOST_CHECK_EQUAL(predictedLabels(0, 1), 0);
-  BOOST_CHECK_EQUAL(predictedLabels(0, 2), 0);
-  BOOST_CHECK_EQUAL(predictedLabels(0, 3), 0);
-  BOOST_CHECK_EQUAL(predictedLabels(0, 4), 0);
+  BOOST_CHECK_EQUAL(predictedLabels(0, 2), 1);
+  BOOST_CHECK_EQUAL(predictedLabels(0, 3), 1);
+  BOOST_CHECK_EQUAL(predictedLabels(0, 4), 1);
   BOOST_CHECK_EQUAL(predictedLabels(0, 5), 1);
   BOOST_CHECK_EQUAL(predictedLabels(0, 6), 2);
   BOOST_CHECK_EQUAL(predictedLabels(0, 7), 2);
diff --git a/src/mlpack/tests/regularized_svd_test.cpp b/src/mlpack/tests/regularized_svd_test.cpp
index 1c54770..4fd748a 100644
--- a/src/mlpack/tests/regularized_svd_test.cpp
+++ b/src/mlpack/tests/regularized_svd_test.cpp
@@ -191,8 +191,15 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionGradient)
       parameters(i, j) += epsilon;
 
       // Compare numerical and backpropagation gradient values.
-      BOOST_REQUIRE_CLOSE(numGradient1, gradient1(i, j), 1e-2);
-      BOOST_REQUIRE_CLOSE(numGradient2, gradient2(i, j), 1e-2);
+      if (gradient1(i, j) == 0.0)
+        BOOST_REQUIRE_SMALL(numGradient1, 1e-5);
+      else
+        BOOST_REQUIRE_CLOSE(numGradient1, gradient1(i, j), 1e-2);
+
+      if (gradient2(i, j) == 0.0)
+        BOOST_REQUIRE_SMALL(numGradient2, 1e-5);
+      else
+        BOOST_REQUIRE_CLOSE(numGradient2, gradient2(i, j), 1e-2);
     }
   }
 }
diff --git a/src/mlpack/tests/sparse_coding_test.cpp b/src/mlpack/tests/sparse_coding_test.cpp
index ff229ad..764a06a 100644
--- a/src/mlpack/tests/sparse_coding_test.cpp
+++ b/src/mlpack/tests/sparse_coding_test.cpp
@@ -118,7 +118,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestCodingStepElasticNet)
 
 BOOST_AUTO_TEST_CASE(SparseCodingTestDictionaryStep)
 {
-  const double tol = 2e-7;
+  const double tol = 1e-6;
 
   double lambda1 = 0.1;
   uword nAtoms = 25;
@@ -138,7 +138,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestDictionaryStep)
   mat Z = sc.Codes();
 
   uvec adjacencies = find(Z);
-  double normGradient = sc.OptimizeDictionary(adjacencies, 1e-12);
+  double normGradient = sc.OptimizeDictionary(adjacencies, 1e-15);
 
   BOOST_REQUIRE_SMALL(normGradient, tol);
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list