[mlpack] 36/324: R tree stuff
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:21:53 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 59bb7bcaf4687cc304ccde1b62e7f2262c27d74c
Author: andrewmw94 <andrewmw94 at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Fri May 30 20:36:27 2014 +0000
R tree stuff
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16583 9d5b8971-822b-0410-80eb-d18c1038ef23
---
.../core/tree/rectangle_tree/r_tree_split.hpp | 20 +++-
.../core/tree/rectangle_tree/r_tree_split_impl.hpp | 129 +++++++++++++++++++--
.../core/tree/rectangle_tree/rectangle_tree.hpp | 6 +-
.../tree/rectangle_tree/rectangle_tree_impl.hpp | 34 +++++-
4 files changed, 166 insertions(+), 23 deletions(-)
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
index 85a3687..949803e 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split.hpp
@@ -47,13 +47,14 @@ static void GetPointSeeds(const RectangleTree& tree, int &i, int &j);
static void GetBoundSeeds(const RectangleTree& tree, int &i, int &j);
/**
- * Get the index of the tree
+ * Assign points to the two new nodes.
*/
-static int GetPointDestNode(
+static void AssignPointDestNode(
const RectangleTree& oldTree,
- const RectangleTree& treeOne,
- const RectangleTree& treeTwo,
- const int index);
+ RectangleTree& treeOne,
+ RectangleTree& treeTwo,
+ const int intI,
+ const int intJ);
static int GetBoundDestNode(
const RectangleTree& oldTree,
@@ -63,6 +64,15 @@ static int GetBoundDestNode(
};
+static double getPointToBoundScore(const HRectBound& bound, const arma::vec& point)
+{
+ double score = 1.0;
+ for(int i = 0; i < bound.dimensions; i++) {
+ return score;
+ }
+}
+
+
}; // namespace tree
}; // namespace mlpack
diff --git a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
index d182fec..babd973 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_tree_split_impl.hpp
@@ -23,20 +23,24 @@ bool RTreeSplit<MatType>::SplitLeafNode(const RectangleTree& tree)
GetPointSeeds(tree, &i, &j);
RectangleTree treeOne = new RectangleTree();
- treeOne.insertPoint(tree.points[i]);
+ treeOne.insertPoint(tree.dataset[i]);
RectangleTree treeTwo = new RectangleTree();
- treeTwo.insertPoint(tree.points[j]);
+ treeTwo.insertPoint(tree.dataset[j]);
- int treeOneCount = 1;
- int treeTwoCount = 1;
-
-
-
+ AssignPointDestNode(tree, treeOne, treeTwo, i, j);
+ return true;
}
bool RTreeSplit<MatType>::SplitNonLeafNode(const RectangleTree& tree)
{
-
+ int i = 0;
+ int j = 0;
+ GetBoundSeeds(tree, &i, &j);
+
+ RectangleTree treeOne = new RectangleTree();
+ treeOne.insertRectangle()
+ RectangleTree treeTwo = new RectangleTree();
+
}
/**
@@ -52,10 +56,10 @@ void RTreeSplit<MatType>::GetPointSeeds(const RectangleTree& tree, int* iRet, in
int worstI = 0;
int worstJ = 0;
for(int i = 0; i < tree.count; i++) {
- for(int j = i; j < tree.count; j++) {
+ for(int j = i+1; j < tree.count; j++) {
double score = 1.0;
for(int k = 0; k < dimensions; k++) {
- score *= std::abs(tree.points[i][k] - tree.points[j][k]);
+ score *= std::abs(tree.dataset[i][k] - tree.dataset[j][k]);
}
if(score > worstPairScore) {
worstPairScore = score;
@@ -65,11 +69,112 @@ void RTreeSplit<MatType>::GetPointSeeds(const RectangleTree& tree, int* iRet, in
}
}
- *iRet = i;
- *jRet = j;
+ *iRet = worstI;
+ *jRet = worstJ;
return;
}
+/**
+ * Get the two bounds that will be used as seeds for the split of the node.
+ * The indices of the bounds will be stored in iRet and jRet.
+ */
+void RTreeSplit<MatType>::GetBoundSeeds(const RectangleTree& tree, int* iRet, int* jRet)
+{
+ double worstPairScore = 0.0;
+ int worstI = 0;
+ int worstJ = 0;
+ for(int i = 0; i < tree.numChildren; i++) {
+ for(int j = i+1; j < tree.numChildren; j++) {
+ double score = 1.0;
+ for(int k = 0; k < dimensions; k++) {
+ score *= std::max(tree.children[i].bound[k].hi(), tree.children[j].bound[k].hi) -
+ std::min(tree.children[i].bound[k].low(), tree.children[j].bound[k].low());
+ }
+ if(score > worstPairScore) {
+ worstPairScore = score;
+ worstI = i;
+ worstJ = j;
+ }
+ }
+ }
+
+ *iRet = worstI;
+ *jRet = worstJ;
+ return;
+}
+
+void RTreeSplit<MatType>::AssignPointDestNode(
+ const RectangleTree& oldTree,
+ RectangleTree& treeOne,
+ RectangleTree& treeTwo,
+ const int intI,
+ const int intJ)
+{
+ int end = oldTree.count;
+ oldTree.data
+
+
+
+
+ int index = 0;
+ while() {
+ int bestIndex = 0;
+ double bestScore = 0;
+ int bestRect = 0;
+
+ // Calculate the increase in volume for assigning this point to each rectangle.
+ double volOne = 1.0;
+ double volTwo = 1.0;
+ for(int i = 0; i < bound.Dim(); i++) {
+ volOne *= treeOne.bound[i].width();
+ volTwo *= treeTwo.bound[i].width();
+ }
+
+ for(int j = 0; j < end; j++) {
+ double newVolOne = 1.0;
+ double newVolTwo = 1.0;
+ for(int i = 0; i < bound.Dim(); i++) {
+ double c = oldTree.dataset.col(index)[i];
+ newVolOne *= treeOne.bound[i].contains(c) ? treeOne.bound[i].width() :
+ (c < treeOne.bound[i].low() ? (high - c) : (c - low));
+ newVolTwo *= treeTwo.bound[i].contains(c) ? treeTwo.bound[i].width() :
+ (c < treeTwo.bound[i].low() ? (high - c) : (c - low));
+ }
+
+ if((newVolOne - volOne) < (newVolTwo - volTwo)) {
+ if(newVolOne - volOne < bestScore) {
+ bestScore = newVolOne - volOne;
+ bestIndex = index;
+ bestRect = 1;
+ }
+ } else {
+ if(newVolTwo - volTwo < bestScore) {
+ bestScore = newVolTwo - volTwo;
+ bestIndex = index;
+ bestRect = 2;
+ }
+ }
+ }
+
+ // Assign the point that causes the least increase in volume
+ // to the appropriate rectangle.
+ if(bestRect == 1)
+ treeOne.insertPoint(oldTree.dataset(bestIndex);
+ else
+ treeTwo.insertPoint(oldTree.dataset(bestIndex);
+
+ // I have the imaginary removePoint here. I need to find out more about how fast doing
+ // various things with arma::mat is before I decide how to "remove" the points from the
+ // dataset. Make sure this is not accidentally made the function for deleting points.
+ oldTree.removePoint(index);
+ end--;
+ }
+
+
+}
+
+
+
}; // namespace tree
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
index 3bf26a0..5f89aac 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
@@ -34,8 +34,10 @@ template<typename StatisticType = EmptyStatistic,
class RectangleTree
{
private:
- //! The max number of child nodes an non-leaf node can have.
+ //! The max number of child nodes a non-leaf node can have.
size_t maxNumChildren;
+ //! The minimum number of child nodes a non-leaf node can have.
+ size_t minNumChildren;
//! The number of child nodes actually in use (0 if this is a leaf node).
size_t numOfChildren;
//! The child nodes (Starting at 0 and ending at (numOfChildren-1) ).
@@ -50,6 +52,8 @@ class RectangleTree
size_t count;
//! The leaf size.
size_t leafSize;
+ //! The minimum leaf size.
+ size_t minLeafSize;
//! The bound object for this node.
HRectBound bound;
//! Any extra data contained in the node.
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
index 1643f85..d454c46 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
@@ -22,11 +22,31 @@ template<typename StatisticType,
typename DescentType>
RectangleTree<StatisticType, MatType, SplitType, DescentType>::RectangleTree(
MatType& data,
- const size_t leafSize):
+ const size_t leafSize,
+ const size_t minLeafSize,
+ const size_t maxNumChildren,
+ const size_t minNumChildren):
{
- //Do the actual stuff here
-
+ this.maxNumChildren = maxNumChildren;
+ this.minNumChildren = minNumChildren;
+ this.numChildren = 0;
+ this.parent = NULL;
+ this.begin = 0;
+ this.count = 0;
+ this.leafSize = leafSize;
+ this.minLeafSize = minLeafSize;
+ this.bound = new HRectBound(data.n_rows);
+ this.stat = EmptyStatistic;
+ this.parentDistance = 0.0;
+ this.furthestDescendantDistance = 0.0;
+ this.dataset = new MatType(leafSize+1); // Add one to make splitting the node simpler
+
+ // For now, just insert the points in order.
+ // This won't actually work for any meaningful size of data since the root changes.
+ for(int i = 0; i < n_cols; i++) {
+ insertPoint(data.col(i));
+ }
}
/**
@@ -305,8 +325,12 @@ std::string BinarySpaceTree<StatisticType, MatType, SplitType, DescentType>::ToS
convert << " Leaf size: " << leafSize << std::endl;
convert << " Split dimension: " << splitDimension << std::endl;
- // How many levels should we print? This will print the top two tree levels.
- for(
+ // How many levels should we print? This will print the root and it's children.
+ if(parent == NULL) {
+ for(int i = 0; i < numChildren; i++) {
+ children[i].ToString();
+ }
+ }
}
}; //namespace tree
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list