[mlpack] 90/149: Refactor CountMostFreq() so it is faster, simpler, and doesn't sometimes return uninitialized values.

Barak A. Pearlmutter barak+git at pearlmutter.net
Sat May 2 09:11:12 UTC 2015


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit 8ae26d8881769400f2ae228d485a5abedc10f8bb
Author: rcurtin <rcurtin at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Tue Nov 11 18:46:31 2014 +0000

    Refactor CountMostFreq() so it is faster, simpler, and doesn't sometimes return
    uninitialized values.
    
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17318 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 .../methods/decision_stump/decision_stump.hpp      |  2 +-
 .../methods/decision_stump/decision_stump_impl.hpp | 54 +++++++++-------------
 2 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/src/mlpack/methods/decision_stump/decision_stump.hpp b/src/mlpack/methods/decision_stump/decision_stump.hpp
index e3b50e5..831bbca 100644
--- a/src/mlpack/methods/decision_stump/decision_stump.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump.hpp
@@ -165,7 +165,7 @@ class DecisionStump
    * @param isWeight Whether we need to run a weighted Decision Stump.
    */
   template <bool isWeight>
-  void Train(const MatType& data, const arma::Row<size_t>& labels, 
+  void Train(const MatType& data, const arma::Row<size_t>& labels,
              const arma::rowvec& weightD);
 
 };
diff --git a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
index 5775653..d18365c 100644
--- a/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
+++ b/src/mlpack/methods/decision_stump/decision_stump_impl.hpp
@@ -356,44 +356,34 @@ template <typename MatType>
 template <typename rType>
 rType DecisionStump<MatType>::CountMostFreq(const arma::Row<rType>& subCols)
 {
-  // Sort subCols for easier processing.
-  arma::Row<rType> sortCounts = arma::sort(subCols);
-  rType element;
-  int count = 0, localCount = 0;
+  // We'll create a map of elements and the number of times that each element is
+  // seen.
+  std::map<rType, size_t> countMap;
 
-  if (sortCounts.n_elem == 1)
-    return sortCounts[0];
-
-  // An O(n) loop which counts the most frequent element in sortCounts
-  for (size_t i = 0; i < sortCounts.n_elem; ++i)
+  for (size_t i = 0; i < subCols.n_elem; ++i)
   {
-    if (i == sortCounts.n_elem - 1)
-    {
-      if (sortCounts(i - 1) == sortCounts(i))
-      {
-        // element = sortCounts(i - 1);
-        localCount++;
-      }
-      else if (localCount > count)
-        count = localCount;
-    }
-    else if (sortCounts(i) != sortCounts(i + 1))
-    {
-      localCount = 0;
-      count++;
-    }
+    if (countMap.count(subCols[i]) == 0)
+      countMap[subCols[i]] = 1;
     else
+      ++countMap[subCols[i]];
+  }
+
+  // Now find the maximum value.
+  typename std::map<rType, size_t>::iterator it = countMap.begin();
+  rType mostFreq = it->first;
+  size_t mostFreqCount = it->second;
+  while (it != countMap.end())
+  {
+    if (it->second >= mostFreqCount)
     {
-      localCount++;
-      if (localCount > count)
-      {
-        count = localCount;
-        if (localCount == 1)
-          element = sortCounts(i);
-      }
+      mostFreq = it->first;
+      mostFreqCount = it->second;
     }
+
+    ++it;
   }
-  return element;
+
+  return mostFreq;
 }
 
 /**

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list