[mlpack] 37/149: Clean up an unnecessary sort, and remove spareBlacklist.
Barak A. Pearlmutter
barak+git at pearlmutter.net
Sat May 2 09:11:06 UTC 2015
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 9ab10191e5a6b2e6004a1727f273d0b231ecbf46
Author: rcurtin <rcurtin at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Sun Oct 12 20:45:13 2014 +0000
Clean up an unnecessary sort, and remove spareBlacklist.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17245 9d5b8971-822b-0410-80eb-d18c1038ef23
---
.../methods/kmeans/pelleg_moore_kmeans_rules.hpp | 3 -
.../kmeans/pelleg_moore_kmeans_rules_impl.hpp | 65 +++++++---------------
2 files changed, 20 insertions(+), 48 deletions(-)
diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
index 874723d..c4fc419 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules.hpp
@@ -51,9 +51,6 @@ class PellegMooreKMeansRules
//! The number of O(d) distance calculations that have been performed.
size_t distanceCalculations;
-
- //! Spare blacklist; I think it's only used by the root node.
- arma::uvec spareBlacklist;
};
}; // namespace kmeans
diff --git a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
index d0cced5..4ef735e 100644
--- a/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/pelleg_moore_kmeans_rules_impl.hpp
@@ -24,11 +24,9 @@ PellegMooreKMeansRules<MetricType, TreeType>::PellegMooreKMeansRules(
newCentroids(newCentroids),
counts(counts),
metric(metric),
- distanceCalculations(0),
- spareBlacklist(centroids.n_cols)
+ distanceCalculations(0)
{
// Nothing to do.
- spareBlacklist.zeros();
}
template<typename MetricType, typename TreeType>
@@ -48,47 +46,37 @@ double PellegMooreKMeansRules<MetricType, TreeType>::Score(
// Obtain the parent's blacklist. If this is the root node, we'll start with
// an empty blacklist. This means that after each iteration, we don't need to
// reset any statistics.
- arma::uvec* blacklistPtr = NULL;
if (referenceNode.Parent() == NULL ||
- referenceNode.Parent()->Stat().Blacklist().size() == 0)
- blacklistPtr = &spareBlacklist;
+ referenceNode.Parent()->Stat().Blacklist().n_elem == 0)
+ referenceNode.Stat().Blacklist().zeros(centroids.n_cols);
else
- blacklistPtr = &referenceNode.Parent()->Stat().Blacklist();
-
- // If the blacklist hasn't been initialized, fill it with zeros.
- if (blacklistPtr->n_elem == 0)
- blacklistPtr->zeros(centroids.n_cols);
- referenceNode.Stat().Blacklist() = *blacklistPtr;
+ referenceNode.Stat().Blacklist() =
+ referenceNode.Parent()->Stat().Blacklist();
// The query index is a fake index that we won't use, and the reference node
// holds all of the points in the dataset. Our goal is to determine whether
// or not this node is dominated by a single cluster.
- const size_t whitelisted = centroids.n_cols - arma::accu(*blacklistPtr);
+ const size_t whitelisted = centroids.n_cols -
+ arma::accu(referenceNode.Stat().Blacklist());
distanceCalculations += whitelisted;
- arma::vec minDistances(whitelisted);
- minDistances.fill(DBL_MAX);
- arma::Col<size_t> indexMappings(whitelisted);
- size_t index = 0;
+ // Which cluster has minimum distance to the node?
+ size_t closestCluster = centroids.n_cols;
+ double minMinDistance = DBL_MAX;
for (size_t i = 0; i < centroids.n_cols; ++i)
{
- if ((*blacklistPtr)[i] == 0)
+ if (referenceNode.Stat().Blacklist()[i] == 0)
{
- minDistances(index) = referenceNode.MinDistance(centroids.col(i));
- indexMappings(index) = i;
- ++index;
+ const double minDistance = referenceNode.MinDistance(centroids.col(i));
+ if (minDistance < minMinDistance)
+ {
+ minMinDistance = minDistance;
+ closestCluster = i;
+ }
}
}
- // Which cluster has minimum distance to the node? Sort by distance.
- // This should probably be rewritten -- we only need the minimum, not the
- // entire sorted list. That'll cost O(k) not O(k log k) (depending on sort
- // type).
- arma::uvec sortedClusterIndices = sort_index(minDistances);
- const size_t closestCluster = indexMappings(sortedClusterIndices[0]);
-
-
// Now, for every other whitelisted cluster, determine if the closest cluster
// owns the point. This calculation is specific to hyperrectangle trees (but,
// this implementation is specific to kd-trees, so that's okay). For
@@ -129,27 +117,14 @@ double PellegMooreKMeansRules<MetricType, TreeType>::Score(
if (whitelisted - newBlacklisted == 1)
{
- // This node is dominated by the first cluster.
- const size_t cluster = indexMappings(sortedClusterIndices[0]);
- counts[cluster] += referenceNode.NumDescendants();
- newCentroids.col(cluster) += referenceNode.NumDescendants() *
+ // This node is dominated by the closest cluster.
+ counts[closestCluster] += referenceNode.NumDescendants();
+ newCentroids.col(closestCluster) += referenceNode.NumDescendants() *
referenceNode.Stat().Centroid();
- if (referenceNode.Parent() == NULL ||
- referenceNode.Parent()->Stat().Blacklist().size() == 0)
- {
- spareBlacklist.zeros(centroids.n_cols);
- }
-
return DBL_MAX;
}
- if (referenceNode.Parent() == NULL ||
- referenceNode.Parent()->Stat().Blacklist().size() == 0)
- {
- spareBlacklist.zeros(centroids.n_cols);
- }
-
// Perform the base case here.
for (size_t i = 0; i < referenceNode.NumPoints(); ++i)
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list