[mlpack] 39/324: Fix/update some comments, almost finish the splitting algorithm. Several miscellaneous changes.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:21:54 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 14f3bfcb27636287c88c73f3a8b4905dade3776d
Author: andrewmw94 <andrewmw94 at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Mon Jun 2 22:58:12 2014 +0000
Fix/update some comments, almost finish the splitting algorithm. Several miscellaneous changes.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16608 9d5b8971-822b-0410-80eb-d18c1038ef23
---
.../core/tree/rectangle_tree/r_tree_split.hpp | 30 ++---
.../core/tree/rectangle_tree/r_tree_split_impl.hpp | 137 +++++++++++++++++++--
.../core/tree/rectangle_tree/rectangle_tree.hpp | 27 ++--
3 files changed, 157 insertions(+), 37 deletions(-)
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
index 949803e..c1ee80a 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
@@ -24,15 +24,17 @@ class RTreeSplit
public:
/**
- * Split a leaf node using the "default" algorithm. The methods for splitting non-leaf
- * nodes are private since they should only be called if a leaf node overflows.
+ * Split a leaf node using the "default" algorithm. If necessary, this split will propagate
+ * upwards through the tree. The methods for splitting non-leaf nodes are private since
+ * they should only be called if a leaf node overflows.
*/
static bool SplitLeafNode(const RectangleTree& tree);
private:
/**
- * Split a non-leaf node using the "default" algorithm.
+ * Split a non-leaf node using the "default" algorithm. If this is the root node and
+ * we need to move up the tree, a new root node is created.
*/
static bool SplitNonLeafNode(const RectangleTree& tree);
@@ -56,21 +58,15 @@ static void AssignPointDestNode(
const int intI,
const int intJ);
-static int GetBoundDestNode(
+/**
+ * Assign nodes to the two new nodes.
+ */
+static void AssignNodeDestNode(
const RectangleTree& oldTree,
- const RectangleTree& treeOne,
- const RectangleTree& treeTwo,
- const int index);
-
-};
-
-static double getPointToBoundScore(const HRectBound& bound, const arma::vec& point)
-{
- double score = 1.0;
- for(int i = 0; i < bound.dimensions; i++) {
- return score;
- }
-}
+ RectangleTree& treeOne,
+ RectangleTree& treeTwo,
+ const int intI,
+ const int intJ);
}; // namespace tree
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
index 252080f..03ced27 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
@@ -12,6 +12,12 @@
namespace mlpack {
namespace tree {
+/**
+ * We call GetPointSeeds to get the two points which will be the initial points in the new nodes
+ * We then call AssignPointDestNode to assign the remaining points to the two new nodes.
+ * Finally, we delete the old node and insert the new nodes into the tree, spliting the parent
+ * if necessary.
+ */
template<typename MatType>
void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
{
@@ -22,13 +28,15 @@ void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
int j = 0;
GetPointSeeds(tree, &i, &j);
- RectangleTree treeOne = new RectangleTree();
- treeOne.insertPoint(tree.dataset[i]);
- RectangleTree treeTwo = new RectangleTree();
- treeTwo.insertPoint(tree.dataset[j]);
-
+ // This will assign the ith and jth point appropriately.
AssignPointDestNode(tree, treeOne, treeTwo, i, j);
+ // create the parent node if necessary
+ if(par == NULL) {
+
+ }
+
+ //Remove this node and insert treeOne and treeTwo
RectangleTree* par = tree.parent();
int index = 0;
for(int i = 0; i < par.numOfChildren(); i++) {
@@ -40,6 +48,7 @@ void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
par.getChildren()[i] = treeOne;
par.getChildren()[par.end++] = treeTwo;
+ //because we copied the points to treeOne and treeTwo, we can just delete this node
delete tree;
// we only add one at a time, so should only need to test for equality
@@ -52,16 +61,50 @@ void RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
return;
}
+/**
+ * We call GetBoundSeeds to get the two new nodes that this one will be broken
+ * into. Then we call AssignNodeDestNode to move the children of this node
+ * into either of those two nodes. Finally, we delete the now unused information
+ * and recurse up the tree if necessary.
+ */
bool RTreeSplit<MatType>::SplitNonLeafNode(const RectangleTree& tree)
{
int i = 0;
int j = 0;
GetBoundSeeds(tree, &i, &j);
- RectangleTree treeOne = new RectangleTree();
- treeOne.insertRectangle()
- RectangleTree treeTwo = new RectangleTree();
+ // This will assign the ith and jth rectangles appropriately.
+ AssignNodeDestNode(tree, treeOne, treeTwo, i, j);
+
+ // create the parent node if necessary
+ if(par == NULL) {
+
+ }
+ //Remove this node and insert treeOne and treeTwo
+ RectangleTree* par = tree.parent();
+ int index = 0;
+ for(int i = 0; i < par.numOfChildren(); i++) {
+ if(par.getChildren()[i] == this) {
+ index = i;
+ break;
+ }
+ }
+ par.getChildren()[i] = treeOne;
+ par.getChildren()[par.end++] = treeTwo;
+
+ // Because we now have pointers to the information stored under this tree,
+ // we need to delete this node carefully.
+ tree.softDelete();
+
+ // we only add one at a time, so should only need to test for equality
+ // just in case, we use an assert.
+ boost::assert(numChildren <= maxNumChildren);
+
+ if(par.numOfChildren() == par.maxNumChildren) {
+ SplitNonLeafNode(par);
+ }
+ return;
}
/**
@@ -124,7 +167,6 @@ void RTreeSplit<MatType>::GetBoundSeeds(const RectangleTree& tree, int* iRet, in
return;
}
-
void RTreeSplit<MatType>::AssignPointDestNode(
const RectangleTree& oldTree,
RectangleTree& treeOne,
@@ -132,11 +174,14 @@ void RTreeSplit<MatType>::AssignPointDestNode(
const int intI,
const int intJ)
{
- Log::assert(end > 1); // If this isn't true, the tree is really weird.
int end = oldTree.count;
+ Log::assert(end > 1); // If this isn't true, the tree is really weird.
+
+ treeOne.insertPoint(oldTree.dataset.col(intI));
oldTree.dataset.col(intI) = oldTree.dataset.col(--end); // decrement end
+ treeTwo.insertPoint(oldTree.dataset.col(intJ));
oldTree.dataset.col(intJ) = oldTree.dataset.col(--end); // decrement end
-
+
int index = 0;
// In each iteration, we go through all points and find the one that causes the least
@@ -190,12 +235,80 @@ void RTreeSplit<MatType>::AssignPointDestNode(
oldTree.dataset.col(bestIndex) = oldTree.dataset.col(--end); // decrement end.
}
+}
+void RTreeSplit<MatType>::AssignNodeDestNode(
+ const RectangleTree& oldTree,
+ RectangleTree& treeOne,
+ RectangleTree& treeTwo,
+ const int intI,
+ const int intJ)
+{
+
+ int end = oldTree.getNumChildren();
+ Log::assert(end > 1); // If this isn't true, the tree is really weird.
-}
+ treeOne.getChildren()[0] = oldTree.getChildren()[intI];
+ oldTree.getChildren[intI] = oldTree.getChildren()[--end]; // decrement end
+ treeTwo.getChildren()[0] = oldTree.getChildren()[intJ];
+ oldTree.getChildren()[intJ] = oldTree.getChildren()[--end]; // decrement end
+
+ int index = 0;
+ // In each iteration, we go through all of the nodes and find the one that causes the least
+ // increase of volume when added to one of the two new rectangles. We then add it to that
+ // rectangle.
+ while() {
+ int bestIndex = 0;
+ double bestScore = 0;
+ int bestRect = 0;
+
+ // Calculate the increase in volume for assigning this point to each rectangle.
+ double volOne = 1.0;
+ double volTwo = 1.0;
+ for(int i = 0; i < bound.Dim(); i++) {
+ volOne *= treeOne.bound[i].width();
+ volTwo *= treeTwo.bound[i].width();
+ }
+
+ for(int j = 0; j < end; j++) {
+ double newVolOne = 1.0;
+ double newVolTwo = 1.0;
+ for(int i = 0; i < bound.Dim(); i++) {
+ double c = oldTree.dataset.col(index)[i];
+ newVolOne *= treeOne.bound[i].contains(c) ? treeOne.bound[i].width() :
+ (c < treeOne.bound[i].low() ? (high - c) : (c - low));
+ newVolTwo *= treeTwo.bound[i].contains(c) ? treeTwo.bound[i].width() :
+ (c < treeTwo.bound[i].low() ? (high - c) : (c - low));
+ }
+
+ if((newVolOne - volOne) < (newVolTwo - volTwo)) {
+ if(newVolOne - volOne < bestScore) {
+ bestScore = newVolOne - volOne;
+ bestIndex = index;
+ bestRect = 1;
+ }
+ } else {
+ if(newVolTwo - volTwo < bestScore) {
+ bestScore = newVolTwo - volTwo;
+ bestIndex = index;
+ bestRect = 2;
+ }
+ }
+ }
+
+ // Assign the point that causes the least increase in volume
+ // to the appropriate rectangle.
+ if(bestRect == 1)
+ treeOne.insertPoint(oldTree.dataset(bestIndex);
+ else
+ treeTwo.insertPoint(oldTree.dataset(bestIndex);
+
+ oldTree.dataset.col(bestIndex) = oldTree.dataset.col(--end); // decrement end.
+ }
+}
}; // namespace tree
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
index bdde1d4..b4c6ff2 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
@@ -45,12 +45,14 @@ class RectangleTree
//! The parent node (NULL if this is the root of the tree).
RectangleTree* parent;
//! The index of the first point in the dataset contained in this node (and
- //! its children).
+ //! its children). THIS IS ALWAYS 0 AT THE MOMENT. IT EXISTS MERELY IN CASE
+ //! I THINK OF A WAY TO CHANGE THAT. IN OTHER WORDS, IT WILL PROBABLY BE
+ //! REMOVED.
size_t begin;
//! The number of points in the dataset contained in this node (and its
- //! children).
+ //! children).
size_t count;
- //! The leaf size.
+ //! The leaf size. (Maximum allowable leaf size.)
size_t leafSize;
//! The minimum leaf size.
size_t minLeafSize;
@@ -64,7 +66,7 @@ class RectangleTree
double furthestDescendantDistance;
//! The dataset.
MatType& dataset;
-
+
public:
//! So other classes can use TreeType::Mat.
typedef MatType Mat;
@@ -92,10 +94,19 @@ class RectangleTree
* to any nodes which are children of this one.
*/
~RectangleTree();
+
+ /**
+ * Delete this node of the tree, but leave the stuff contained in it intact.
+ * This is used when splitting a node, where the data in this tree is moved to two
+ * other trees.
+ */
+ void softDelete();
/**
* Inserts a point into the tree. The point will be copied to the data matrix
- * of the leaf node where it is finally inserted.
+ * of the leaf node where it is finally inserted, but we pass by reference since
+ * it may be passed many times before it actually reaches a leaf.
+ * @param point The point (arma::vec&) to be inserted.
*/
void InsertPoint(const arma::vec& point);
@@ -127,12 +138,12 @@ class RectangleTree
//! Return the bound object for this node.
const HRectBound& Bound() const { return bound; }
- //! Return the bound object for this node.
+ //! Modify the bound object for this node.
HRectBound& Bound() { return bound; }
//! Return the statistic object for this node.
const StatisticType& Stat() const { return stat; }
- //! Return the statistic object for this node.
+ //! Modify the statistic object for this node.
StatisticType& Stat() { return stat; }
//! Return whether or not this node is a leaf (true if it has no children).
@@ -337,7 +348,7 @@ class RectangleTree
/**
* Splits the current node, recursing up the tree.
- * CURRENTLY IT DOES NOT Also returns a list of the changed indices.
+ * CURRENTLY IT DOES NOT Also returns a list of the changed indices (because there are none).
*
* @param data Dataset which we are using.
* @param oldFromNew Vector holding permuted indices NOT IMPLEMENTED.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list