[mlpack] 82/149: Add Pelleg-Moore type prune. This improves performance -- at least a bit.
Barak A. Pearlmutter
barak+git at pearlmutter.net
Sat May 2 09:11:11 UTC 2015
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit c254c653003ecd15484a1f7c989cb15707ee1016
Author: rcurtin <rcurtin at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Mon Nov 10 16:33:51 2014 +0000
Add Pelleg-Moore type prune. This improves performance -- at least a bit.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17310 9d5b8971-822b-0410-80eb-d18c1038ef23
---
.../methods/kmeans/dual_tree_kmeans_rules.hpp | 6 +-
.../methods/kmeans/dual_tree_kmeans_rules_impl.hpp | 95 ++++++++++++++--------
2 files changed, 64 insertions(+), 37 deletions(-)
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
index 4a54192..978c12c 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules.hpp
@@ -82,7 +82,7 @@ class DualTreeKMeansRules
* @param queryNode Query node.
* @param referenceNode Reference node.
*/
- double ElkanTypeScore(TreeType& queryNode, TreeType& referenceNode) const;
+ double ElkanTypeScore(TreeType& queryNode, TreeType& referenceNode);
/**
* See if an Elkan-type prune can be performed. If so, return DBL_MAX;
@@ -103,6 +103,10 @@ class DualTreeKMeansRules
double ElkanTypeScore(TreeType& queryNode,
TreeType& referenceNode,
const double minQueryDistance) const;
+
+ double PellegMooreScore(TreeType& /* queryNode */,
+ TreeType& referenceNode,
+ const double minDistance) const;
};
} // namespace kmeans
diff --git a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
index 1e352de..33ea4ae 100644
--- a/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
+++ b/src/mlpack/methods/kmeans/dual_tree_kmeans_rules_impl.hpp
@@ -140,9 +140,41 @@ double DualTreeKMeansRules<MetricType, TreeType>::Score(
return 0.0; // Pruning is not possible.
}
- ++distanceCalculations;
+ double score = ElkanTypeScore(queryNode, referenceNode);
+ if (score != DBL_MAX)
+ score = PellegMooreScore(queryNode, referenceNode, minDistance);
- return ElkanTypeScore(queryNode, referenceNode);
+ if (score == DBL_MAX)
+ {
+ referenceNode.Stat().ClustersPruned() += queryNode.NumDescendants();
+
+ // Have we pruned everything?
+ if (referenceNode.Stat().ClustersPruned() == centroids.n_cols - 1)
+ {
+ // Then the best query node must contain just one point.
+ const TreeType* bestQueryNode = (TreeType*)
+ referenceNode.Stat().ClosestQueryNode();
+ const size_t cluster = mappings[bestQueryNode->Descendant(0)];
+
+ referenceNode.Stat().Owner() = cluster;
+ newCentroids.col(cluster) += referenceNode.NumDescendants() *
+ referenceNode.Stat().Centroid();
+ counts(cluster) += referenceNode.NumDescendants();
+ referenceNode.Stat().ClustersPruned()++;
+ }
+ else if (referenceNode.Stat().ClustersPruned() +
+ visited[referenceNode.Descendant(0)] == centroids.n_cols)
+ {
+ for (size_t i = 0; i < referenceNode.NumPoints(); ++i)
+ {
+ const size_t cluster = assignments[referenceNode.Point(i)];
+ newCentroids.col(cluster) += dataset.col(referenceNode.Point(i));
+ counts(cluster)++;
+ }
+ }
+ }
+
+ return score;
}
template<typename MetricType, typename TreeType>
@@ -156,14 +188,16 @@ double DualTreeKMeansRules<MetricType, TreeType>::Rescore(
template<typename MetricType, typename TreeType>
double DualTreeKMeansRules<MetricType, TreeType>::Rescore(
- TreeType& queryNode,
- TreeType& referenceNode,
+ TreeType& /* queryNode */,
+ TreeType& /* referenceNode */,
const double oldScore) const
{
- if (oldScore == DBL_MAX)
- return oldScore; // We can't unprune something. This shouldn't happen.
+ return oldScore;
+
+// if (oldScore == DBL_MAX)
+// return oldScore; // We can't unprune something. This shouldn't happen.
- return ElkanTypeScore(queryNode, referenceNode, oldScore);
+// return ElkanTypeScore(queryNode, referenceNode, oldScore);
}
template<typename MetricType, typename TreeType>
@@ -234,18 +268,19 @@ bool DualTreeKMeansRules<MetricType, TreeType>::IsDescendantOf(
template<typename MetricType, typename TreeType>
double DualTreeKMeansRules<MetricType, TreeType>::ElkanTypeScore(
TreeType& queryNode,
- TreeType& referenceNode) const
+ TreeType& referenceNode)
{
// We have to calculate the minimum distance between the query node and the
// reference node's best query node.
const double minQueryDistance = queryNode.MinDistance((TreeType*)
referenceNode.Stat().ClosestQueryNode());
+ ++distanceCalculations;
return ElkanTypeScore(queryNode, referenceNode, minQueryDistance);
}
template<typename MetricType, typename TreeType>
double DualTreeKMeansRules<MetricType, TreeType>::ElkanTypeScore(
- TreeType& queryNode,
+ TreeType& /* queryNode */,
TreeType& referenceNode,
const double minQueryDistance) const
{
@@ -257,39 +292,27 @@ double DualTreeKMeansRules<MetricType, TreeType>::ElkanTypeScore(
// Then we can conclude d_max(best(N_r), N_r) <= d_min(N_q, N_r) which
// means that N_q cannot possibly hold any clusters that own any points in
// N_r.
- referenceNode.Stat().ClustersPruned() += queryNode.NumDescendants();
-
- // Have we pruned everything?
- if (referenceNode.Stat().ClustersPruned() == centroids.n_cols - 1)
- {
- // Then the best query node must contain just one point.
- const TreeType* bestQueryNode = (TreeType*)
- referenceNode.Stat().ClosestQueryNode();
- const size_t cluster = mappings[bestQueryNode->Descendant(0)];
-
- referenceNode.Stat().Owner() = cluster;
- newCentroids.col(cluster) += referenceNode.NumDescendants() *
- referenceNode.Stat().Centroid();
- counts(cluster) += referenceNode.NumDescendants();
- referenceNode.Stat().ClustersPruned()++;
- }
- else if (referenceNode.Stat().ClustersPruned() +
- visited[referenceNode.Descendant(0)] == centroids.n_cols)
- {
- for (size_t i = 0; i < referenceNode.NumPoints(); ++i)
- {
- const size_t cluster = assignments[referenceNode.Point(i)];
- newCentroids.col(cluster) += dataset.col(referenceNode.Point(i));
- counts(cluster)++;
- }
- }
-
return DBL_MAX;
}
return minQueryDistance;
}
+template<typename MetricType, typename TreeType>
+double DualTreeKMeansRules<MetricType, TreeType>::PellegMooreScore(
+ TreeType& /* queryNode */,
+ TreeType& referenceNode,
+ const double minDistance) const
+{
+ // If the minimum distance to the node is greater than the bound, then every
+ // cluster in the query node cannot possibly be the nearest neighbor of any of
+ // the points in the reference node.
+ if (minDistance > referenceNode.Stat().MaxQueryNodeDistance())
+ return DBL_MAX;
+
+ return minDistance;
+}
+
} // namespace kmeans
} // namespace mlpack
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list