[mlpack] 33/149: Fix a bug; now this algorithm is much faster.

Barak A. Pearlmutter barak+git at pearlmutter.net
Sat May 2 09:11:06 UTC 2015


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit f69583f8ef83fe23a4b85fa3a7a726b73fd0754c
Author: rcurtin <rcurtin at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Sat Oct 11 01:43:52 2014 +0000

    Fix a bug; now this algorithm is much faster.
    
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17239 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp b/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
index 5292eab..7441acf 100644
--- a/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
+++ b/src/mlpack/methods/kmeans/hamerly_kmeans_impl.hpp
@@ -45,7 +45,8 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
   {
     for (size_t j = i + 1; j < centroids.n_cols; ++j)
     {
-      const double dist = metric.Evaluate(centroids.col(i), centroids.col(j));
+      const double dist = metric.Evaluate(centroids.col(i), centroids.col(j)) /
+          2.0;
       ++distanceCalculations;
 
       // Update bounds, if this intra-cluster distance is smaller.
@@ -58,7 +59,7 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
 
   for (size_t i = 0; i < dataset.n_cols; ++i)
   {
-    const double m = std::max(minClusterDistances(assignments[i]) / 2.0,
+    const double m = std::max(minClusterDistances(assignments[i]),
                               lowerBounds(i));
 
     // First bound test.
@@ -84,13 +85,14 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
 
     // The bounds failed.  So test against all other clusters.
     // This is Hamerly's Point-All-Ctrs() function from the paper.
+    // We have to reset the lower bound first.
+    lowerBounds(i) = DBL_MAX;
     for (size_t c = 0; c < centroids.n_cols; ++c)
     {
       if (c == assignments[i])
         continue;
 
       const double dist = metric.Evaluate(dataset.col(i), centroids.col(c));
-      ++distanceCalculations;
 
       // Is this a better cluster?  At this point, upperBounds[i] = d(i, c(i)).
       if (dist < upperBounds(i))
@@ -106,6 +108,7 @@ double HamerlyKMeans<MetricType, MatType>::Iterate(const arma::mat& centroids,
         lowerBounds(i) = dist;
       }
     }
+    distanceCalculations += centroids.n_cols - 1;
 
     // Update new centroids.
     newCentroids.col(assignments[i]) += dataset.col(i);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list