[mlpack] 90/149: Refactor CountMostFreq() so it is faster, simpler, and doesn't sometimes return uninitialized values.
Barak A. Pearlmutter
barak+git at pearlmutter.net
Sat May 2 09:11:12 UTC 2015
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 8ae26d8881769400f2ae228d485a5abedc10f8bb
Author: rcurtin <rcurtin at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Tue Nov 11 18:46:31 2014 +0000
Refactor CountMostFreq() so it is faster, simpler, and doesn't sometimes return
uninitialized values.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17318 9d5b8971-822b-0410-80eb-d18c1038ef23
---
.../methods/decision_stump/decision_stump.hpp | 2 +-
.../methods/decision_stump/decision_stump_impl.hpp | 54 +++++++++-------------
2 files changed, 23 insertions(+), 33 deletions(-)
diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index e3b50e5..831bbca 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -165,7 +165,7 @@ class DecisionStump
* @param isWeight Whether we need to run a weighted Decision Stump.
*/
template <bool isWeight>
- void Train(const MatType& data, const arma::Row<size_t>& labels,
+ void Train(const MatType& data, const arma::Row<size_t>& labels,
const arma::rowvec& weightD);
};
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index 5775653..d18365c 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -356,44 +356,34 @@ template <typename MatType>
template <typename rType>
rType DecisionStump<MatType>::CountMostFreq(const arma::Row<rType>& subCols)
{
- // Sort subCols for easier processing.
- arma::Row<rType> sortCounts = arma::sort(subCols);
- rType element;
- int count = 0, localCount = 0;
+ // We'll create a map of elements and the number of times that each element is
+ // seen.
+ std::map<rType, size_t> countMap;
- if (sortCounts.n_elem == 1)
- return sortCounts[0];
-
- // An O(n) loop which counts the most frequent element in sortCounts
- for (size_t i = 0; i < sortCounts.n_elem; ++i)
+ for (size_t i = 0; i < subCols.n_elem; ++i)
{
- if (i == sortCounts.n_elem - 1)
- {
- if (sortCounts(i - 1) == sortCounts(i))
- {
- // element = sortCounts(i - 1);
- localCount++;
- }
- else if (localCount > count)
- count = localCount;
- }
- else if (sortCounts(i) != sortCounts(i + 1))
- {
- localCount = 0;
- count++;
- }
+ if (countMap.count(subCols[i]) == 0)
+ countMap[subCols[i]] = 1;
else
+ ++countMap[subCols[i]];
+ }
+
+ // Now find the maximum value.
+ typename std::map<rType, size_t>::iterator it = countMap.begin();
+ rType mostFreq = it->first;
+ size_t mostFreqCount = it->second;
+ while (it != countMap.end())
+ {
+ if (it->second >= mostFreqCount)
{
- localCount++;
- if (localCount > count)
- {
- count = localCount;
- if (localCount == 1)
- element = sortCounts(i);
- }
+ mostFreq = it->first;
+ mostFreqCount = it->second;
}
+
+ ++it;
}
- return element;
+
+ return mostFreq;
}
/**
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list