[mlpack] 12/44: Backport fixes from r17310-r17318. Mostly test fixes.
Barak A. Pearlmutter
barak+git at pearlmutter.net
Mon Feb 15 19:35:52 UTC 2016
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to tag mlpack-1.0.11
in repository mlpack.
commit 0e0f31be8781bbbb0f28b7f3f862f91447b8346c
Author: Ryan Curtin <ryan at ratml.org>
Date: Sun Dec 7 19:31:21 2014 +0000
Backport fixes from r17310-r17318. Mostly test fixes.
---
CMake/FindArmadillo.cmake | 57 +++++++++++++++++++---
.../methods/decision_stump/decision_stump.hpp | 18 +++++--
.../methods/decision_stump/decision_stump_impl.hpp | 55 +++++++++------------
src/mlpack/tests/cli_test.cpp | 4 +-
src/mlpack/tests/decision_stump_test.cpp | 8 +--
src/mlpack/tests/regularized_svd_test.cpp | 11 ++++-
src/mlpack/tests/sparse_coding_test.cpp | 4 +-
7 files changed, 104 insertions(+), 53 deletions(-)
diff --git a/CMake/FindArmadillo.cmake b/CMake/FindArmadillo.cmake
index 67ede3f..9c7bffb 100644
--- a/CMake/FindArmadillo.cmake
+++ b/CMake/FindArmadillo.cmake
@@ -221,7 +221,27 @@ if(EXISTS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp")
# Search for HDF5 (or replacement).
if (NOT "${ARMA_USE_HDF5}" STREQUAL "")
- find_package(HDF5 REQUIRED)
+ find_package(HDF5 QUIET)
+
+ if(NOT HDF5_FOUND)
+ # On Debian systems, the HDF5 package has been split into multiple
+ # packages so that it is co-installable. But this may mean that the
+ # include files are hidden somewhere very odd that the FindHDF5.cmake
+ # script will not find. Thus, we'll also quickly check pkgconfig to see
+ # if there is information on what to use there.
+ find_package(PkgConfig)
+ if (PKG_CONFIG_FOUND)
+ pkg_check_modules(HDF5 hdf5)
+ # But using pkgconfig is a little weird because HDF5_LIBRARIES won't
+ # be filled with exact library paths, like the other scripts. So
+ # instead what we get is HDF5_LIBRARY_DIRS which is the equivalent of
+ # what we'd pass to -L.
+ if (HDF5_FOUND)
+ # I'm not sure what I think of doing this here...
+ link_directories("${HDF5_LIBRARY_DIRS}")
+ endif()
+ endif()
+ endif()
set(SUPPORT_INCLUDE_DIRS "${SUPPORT_INCLUDE_DIRS}" "${HDF5_INCLUDE_DIRS}")
set(SUPPORT_LIBRARIES "${SUPPORT_LIBRARIES}" "${HDF5_LIBRARIES}")
@@ -229,17 +249,38 @@ if(EXISTS "${ARMADILLO_INCLUDE_DIR}/armadillo_bits/config.hpp")
else("${ARMA_USE_WRAPPER}" STREQUAL "")
# Some older versions still require linking against HDF5 since they did not
- # wrap libhdf5. This was true until 4.300 (check this!).
+ # wrap libhdf5. This was true for versions older than 4.300.
- if(NOT "${ARMA_USE_HDF5}" STREQUAL "")
+ if(NOT "${ARMA_USE_HDF5}" STREQUAL "" AND
+ "${ARMADILLO_VERSION_STRING}" VERSION_LESS "4.300.0")
message(STATUS "Armadillo HDF5 support is enabled and manual linking is "
"required.")
# We have HDF5 support and need to link against HDF5.
- find_package(HDF5 REQUIRED)
-
- set(SUPPORT_INCLUDE_DIRS "${HDF5_INCLUDE_DIRS}")
- set(SUPPORT_LIBRARIES "${HDF5_LIBRARIES}")
- endif(NOT "${ARMA_USE_HDF5}" STREQUAL "")
+ find_package(HDF5)
+
+ if(NOT HDF5_FOUND)
+ # On Debian systems, the HDF5 package has been split into multiple
+ # packages so that it is co-installable. But this may mean that the
+ # include files are hidden somewhere very odd that the FindHDF5.cmake
+ # script will not find. Thus, we'll also quickly check pkgconfig to see
+ # if there is information on what to use there.
+ find_package(PkgConfig)
+ if (PKG_CONFIG_FOUND)
+ pkg_check_modules(HDF5 hdf5)
+ # But using pkgconfig is a little weird because HDF5_LIBRARIES won't
+ # be filled with exact library paths, like the other scripts. So
+ # instead what we get is HDF5_LIBRARY_DIRS which is the equivalent of
+ # what we'd pass to -L.
+ if (HDF5_FOUND)
+ # I'm not sure what I think of doing this here...
+ link_directories("${HDF5_LIBRARY_DIRS}")
+ endif()
+ endif()
+
+ set(SUPPORT_INCLUDE_DIRS "${HDF5_INCLUDE_DIRS}")
+ set(SUPPORT_LIBRARIES "${HDF5_LIBRARIES}")
+ endif()
+ endif()
endif("${ARMA_USE_WRAPPER}" STREQUAL "")
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index 743abdd..151c5b7 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -85,7 +85,7 @@ class DecisionStump
*
ModifyData(MatType& data, const arma::Row<double>& D);
*/
-
+
//! Access the splitting attribute.
int SplitAttribute() const { return splitAttribute; }
//! Modify the splitting attribute (be careful!).
@@ -164,8 +164,20 @@ class DecisionStump
* @param attribute The attribute of which we calculate the entropy.
* @param labels Corresponding labels of the attribute.
*/
- template <typename AttType, typename LabelType>
- double CalculateEntropy(arma::subview_row<LabelType> labels);
+ template <typename LabelType, bool isWeight>
+ double CalculateEntropy(arma::subview_row<LabelType> labels, int begin,
+ const arma::rowvec& tempD);
+
+ /**
+ * Train the decision stump on the given data and labels.
+ *
+ * @param data Dataset to train on.
+ * @param labels Labels for dataset.
+ * @param isWeight Whether we need to run a weighted Decision Stump.
+ */
+ template <bool isWeight>
+ void Train(const MatType& data, const arma::Row<size_t>& labels,
+ const arma::rowvec& weightD);
};
}; // namespace decision_stump
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index d7df05a..b3043e0 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -351,44 +351,35 @@ template <typename MatType>
template <typename rType>
rType DecisionStump<MatType>::CountMostFreq(const arma::Row<rType>& subCols)
{
- // Sort subCols for easier processing.
- arma::Row<rType> sortCounts = arma::sort(subCols);
- rType element = sortCounts[0];
- size_t count = 0, localCount = 0;
+ // We'll create a map of elements and the number of times that each element is
+ // seen.
+ std::map<rType, size_t> countMap;
- if (sortCounts.n_elem == 1)
- return sortCounts[0];
-
- // An O(n) loop which counts the most frequent element in sortCounts.
- for (size_t i = 0; i < sortCounts.n_elem; ++i)
+ for (size_t i = 0; i < subCols.n_elem; ++i)
{
- if (i == sortCounts.n_elem - 1)
- {
- if (sortCounts(i - 1) == sortCounts(i))
- {
- // element = sortCounts(i - 1);
- localCount++;
- }
- else if (localCount > count)
- count = localCount;
- }
- else if (sortCounts(i) != sortCounts(i + 1))
- {
- localCount = 0;
- count++;
- }
+ if (countMap.count(subCols[i]) == 0)
+ countMap[subCols[i]] = 1;
else
+ ++countMap[subCols[i]];
+ }
+
+ // Now find the maximum value.
+ typename std::map<rType, size_t>::iterator it = countMap.begin();
+ rType mostFreq = it->first;
+ size_t mostFreqCount = it->second;
+ while (it != countMap.end())
+>>>>>>> .merge-right.r17318
+ {
+ if (it->second >= mostFreqCount)
{
- localCount++;
- if (localCount > count)
- {
- count = localCount;
- if (localCount == 1)
- element = sortCounts(i);
- }
+ mostFreq = it->first;
+ mostFreqCount = it->second;
}
+
+ ++it;
}
- return element;
+
+ return mostFreq;
}
/**
diff --git a/src/mlpack/tests/cli_test.cpp b/src/mlpack/tests/cli_test.cpp
index 5643179..b77575b 100644
--- a/src/mlpack/tests/cli_test.cpp
+++ b/src/mlpack/tests/cli_test.cpp
@@ -136,8 +136,8 @@ BOOST_AUTO_TEST_CASE(TestBooleanOption)
// Now, if we specify this flag, it should be true.
int argc = 2;
char* argv[2];
- argv[0] = strcpy(new char[strlen("programname")], "programname");
- argv[1] = strcpy(new char[strlen("--flag_test")], "--flag_test");
+ argv[0] = strcpy(new char[strlen("programname") + 1], "programname");
+ argv[1] = strcpy(new char[strlen("--flag_test") + 1], "--flag_test");
CLI::ParseCommandLine(argc, argv);
diff --git a/src/mlpack/tests/decision_stump_test.cpp b/src/mlpack/tests/decision_stump_test.cpp
index 2a04dfd..3cb3443 100644
--- a/src/mlpack/tests/decision_stump_test.cpp
+++ b/src/mlpack/tests/decision_stump_test.cpp
@@ -21,7 +21,7 @@
*/
#include <mlpack/core.hpp>
#include <mlpack/methods/decision_stump/decision_stump.hpp>
-
+
#include <boost/test/unit_test.hpp>
#include "old_boost_test_definitions.hpp"
@@ -221,9 +221,9 @@ BOOST_AUTO_TEST_CASE(MultiClassSplit)
BOOST_CHECK_EQUAL(predictedLabels(0, 0), 0);
BOOST_CHECK_EQUAL(predictedLabels(0, 1), 0);
- BOOST_CHECK_EQUAL(predictedLabels(0, 2), 0);
- BOOST_CHECK_EQUAL(predictedLabels(0, 3), 0);
- BOOST_CHECK_EQUAL(predictedLabels(0, 4), 0);
+ BOOST_CHECK_EQUAL(predictedLabels(0, 2), 1);
+ BOOST_CHECK_EQUAL(predictedLabels(0, 3), 1);
+ BOOST_CHECK_EQUAL(predictedLabels(0, 4), 1);
BOOST_CHECK_EQUAL(predictedLabels(0, 5), 1);
BOOST_CHECK_EQUAL(predictedLabels(0, 6), 2);
BOOST_CHECK_EQUAL(predictedLabels(0, 7), 2);
diff --git a/src/mlpack/tests/regularized_svd_test.cpp b/src/mlpack/tests/regularized_svd_test.cpp
index 1c54770..4fd748a 100644
--- a/src/mlpack/tests/regularized_svd_test.cpp
+++ b/src/mlpack/tests/regularized_svd_test.cpp
@@ -191,8 +191,15 @@ BOOST_AUTO_TEST_CASE(RegularizedSVDFunctionGradient)
parameters(i, j) += epsilon;
// Compare numerical and backpropagation gradient values.
- BOOST_REQUIRE_CLOSE(numGradient1, gradient1(i, j), 1e-2);
- BOOST_REQUIRE_CLOSE(numGradient2, gradient2(i, j), 1e-2);
+ if (gradient1(i, j) == 0.0)
+ BOOST_REQUIRE_SMALL(numGradient1, 1e-5);
+ else
+ BOOST_REQUIRE_CLOSE(numGradient1, gradient1(i, j), 1e-2);
+
+ if (gradient2(i, j) == 0.0)
+ BOOST_REQUIRE_SMALL(numGradient2, 1e-5);
+ else
+ BOOST_REQUIRE_CLOSE(numGradient2, gradient2(i, j), 1e-2);
}
}
}
diff --git a/src/mlpack/tests/sparse_coding_test.cpp b/src/mlpack/tests/sparse_coding_test.cpp
index ff229ad..764a06a 100644
--- a/src/mlpack/tests/sparse_coding_test.cpp
+++ b/src/mlpack/tests/sparse_coding_test.cpp
@@ -118,7 +118,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestCodingStepElasticNet)
BOOST_AUTO_TEST_CASE(SparseCodingTestDictionaryStep)
{
- const double tol = 2e-7;
+ const double tol = 1e-6;
double lambda1 = 0.1;
uword nAtoms = 25;
@@ -138,7 +138,7 @@ BOOST_AUTO_TEST_CASE(SparseCodingTestDictionaryStep)
mat Z = sc.Codes();
uvec adjacencies = find(Z);
- double normGradient = sc.OptimizeDictionary(adjacencies, 1e-12);
+ double normGradient = sc.OptimizeDictionary(adjacencies, 1e-15);
BOOST_REQUIRE_SMALL(normGradient, tol);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list