[mlpack] 36/149: Refactor: only track distanceCalculations, not scores and baseCases. Also remove traversalInfo because it's not used, and count distance calculations during cluster domination calculation.

Barak A. Pearlmutter barak+git at pearlmutter.net
Sat May 2 09:11:06 UTC 2015


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit 1bc6274ace52fd3eb721a01d135cdcd3e51cb341
Author: rcurtin <rcurtin at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Sun Oct 12 20:31:01 2014 +0000

    Refactor: only track distanceCalculations, not scores and baseCases.  Also
    remove traversalInfo because it's not used, and count distance calculations
    during cluster domination calculation.
    
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17244 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 .../methods/kmeans/pelleg_moore_kmeans_impl.hpp    |  2 +-
 .../methods/kmeans/pelleg_moore_kmeans_rules.hpp   | 30 +++++-----------------
 .../kmeans/pelleg_moore_kmeans_rules_impl.hpp      |  9 ++++---
 3 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
index 51dbd66..e7fd385 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_impl.hpp
@@ -63,7 +63,7 @@ double PellegMooreKMeans<MetricType, MatType>::Iterate(
   // irrelevant; we are checking each node with all clusters.
   traverser.Traverse(0, *tree);
 
-  distanceCalculations += rules.BaseCases() + rules.Scores();
+  distanceCalculations += rules.DistanceCalculations();
 
   // Now, calculate how far the clusters moved, after normalizing them.
   double residual = 0.0;
diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
index 9bb808d..874723d 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
@@ -32,23 +32,10 @@ class PellegMooreKMeansRules
                  TreeType& referenceNode,
                  const double oldScore);
 
-  //! Get the number of base cases that have been performed.
-  size_t BaseCases() const { return baseCases; }
-  //! Modify the number of base cases that have been performed.
-  size_t& BaseCases() { return baseCases; }
-
-  //! Get the number of scores that have been performed.
-  size_t Scores() const { return scores; }
-  //! Modify the number of scores that have been performed.
-  size_t& Scores() { return scores; }
-
-  //! Convenience typedef.
-  typedef neighbor::NeighborSearchTraversalInfo<TreeType> TraversalInfoType;
-
-  //! Get the traversal info.
-  const TraversalInfoType& TraversalInfo() const { return traversalInfo; }
-  //! Modify the traversal info.
-  TraversalInfoType& TraversalInfo() { return traversalInfo; }
+  //! Get the number of distance calculations that have been performed.
+  size_t DistanceCalculations() const { return distanceCalculations; }
+  //! Modify the number of distance calculations that have been performed.
+  size_t& DistanceCalculations() { return distanceCalculations; }
 
  private:
   //! The dataset.
@@ -62,13 +49,10 @@ class PellegMooreKMeansRules
   //! Instantiated metric.
   MetricType& metric;
 
-  //! The number of base cases that have been performed.
-  size_t baseCases;
-  //! The number of scores that have been performed.
-  size_t scores;
-
-  TraversalInfoType traversalInfo;
+  //! The number of O(d) distance calculations that have been performed.
+  size_t distanceCalculations;
 
+  //! Spare blacklist; I think it's only used by the root node.
   arma::uvec spareBlacklist;
 };
 
diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
index 1ad6fd4..d0cced5 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
@@ -24,8 +24,7 @@ PellegMooreKMeansRules<MetricType, TreeType>::PellegMooreKMeansRules(
     newCentroids(newCentroids),
     counts(counts),
     metric(metric),
-    baseCases(0),
-    scores(0),
+    distanceCalculations(0),
     spareBlacklist(centroids.n_cols)
 {
   // Nothing to do.
@@ -66,7 +65,7 @@ double PellegMooreKMeansRules<MetricType, TreeType>::Score(
   // or not this node is dominated by a single cluster.
   const size_t whitelisted = centroids.n_cols - arma::accu(*blacklistPtr);
 
-  scores += whitelisted;
+  distanceCalculations += whitelisted;
 
   arma::vec minDistances(whitelisted);
   minDistances.fill(DBL_MAX);
@@ -117,6 +116,8 @@ double PellegMooreKMeansRules<MetricType, TreeType>::Score(
         centroids.col(closestCluster));
     const double otherDist = metric.Evaluate(cornerPoint, centroids.col(c));
 
+    distanceCalculations += 3; // One for cornerPoint, then two distances.
+
     if (closestDist < otherDist)
     {
       // The closest cluster dominates the node with respect to the cluster c.
@@ -159,7 +160,7 @@ double PellegMooreKMeansRules<MetricType, TreeType>::Score(
       if (referenceNode.Stat().Blacklist()[c] == 1)
         continue;
 
-      ++baseCases;
+      ++distanceCalculations;
 
       // The reference index is the index of the data point.
       const double distance = metric.Evaluate(centroids.col(c),

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list