[mlpack] 08/20: Potential fix for OS X RectangleTree bug.
Barak A. Pearlmutter
barak+git at pearlmutter.net
Thu May 25 20:44:08 UTC 2017
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch master
in repository mlpack.
commit b95b2f9e2f2a43d76a0f3d558e247fe49e09e275
Author: Ryan Curtin <ryan at ratml.org>
Date: Tue May 2 15:00:50 2017 -0400
Potential fix for OS X RectangleTree bug.
---
.../core/tree/rectangle_tree/r_star_tree_split.hpp | 6 +-
.../tree/rectangle_tree/r_star_tree_split_impl.hpp | 412 +++++++++++----
.../tree/rectangle_tree/rectangle_tree_impl.hpp | 19 +-
.../core/tree/rectangle_tree/x_tree_split.hpp | 6 +-
.../core/tree/rectangle_tree/x_tree_split_impl.hpp | 584 +++++++++++++--------
5 files changed, 672 insertions(+), 355 deletions(-)
diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
index 6370120..7f1c036 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split.hpp
@@ -52,9 +52,9 @@ class RStarTreeSplit
* Comparator for sorting with std::pair. This comparator works a little bit
* faster then the default comparator.
*/
- template<typename ElemType>
- static bool PairComp(const std::pair<ElemType, size_t>& p1,
- const std::pair<ElemType, size_t>& p2)
+ template<typename ElemType, typename TreeType>
+ static bool PairComp(const std::pair<ElemType, TreeType>& p1,
+ const std::pair<ElemType, TreeType>& p2)
{
return p1.first < p2.first;
}
diff --git a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
index d5669a6..51ddfea 100644
--- a/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/r_star_tree_split_impl.hpp
@@ -35,29 +35,31 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
if (tree->Count() <= tree->MaxLeafSize())
return;
+// std::cout << "split leaf node " << tree << " with parent " << tree->Parent()
+//<< "\n";
// If we are splitting the root node, we need will do things differently so
// that the constructor and other methods don't confuse the end user by giving
// an address of another node.
- if (tree->Parent() == NULL)
- {
+// if (tree->Parent() == NULL)
+// {
// We actually want to copy this way. Pointers and everything.
- TreeType* copy = new TreeType(*tree, false);
- copy->Parent() = tree;
- tree->Count() = 0;
- tree->NullifyData();
+// TreeType* copy = new TreeType(*tree, false);
+// copy->Parent() = tree;
+// tree->Count() = 0;
+// tree->NullifyData();
// Because this was a leaf node, numChildren must be 0.
- tree->children[(tree->NumChildren())++] = copy;
- assert(tree->NumChildren() == 1);
+// tree->children[(tree->NumChildren())++] = copy;
+// assert(tree->NumChildren() == 1);
- RStarTreeSplit::SplitLeafNode(copy,relevels);
- return;
- }
+// RStarTreeSplit::SplitLeafNode(copy,relevels);
+// return;
+// }
// If we haven't yet reinserted on this level, we try doing so now.
- if (relevels[tree->TreeDepth()])
+ if (relevels[tree->TreeDepth() - 1])
{
- relevels[tree->TreeDepth()] = false;
+ relevels[tree->TreeDepth() - 1] = false;
// We sort the points by decreasing distance to the centroid of the bound.
// We then remove the first p entries and reinsert them at the root.
@@ -81,8 +83,8 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
- std::vector<size_t> pointIndices(p);
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
+ std::vector<size_t> pointIndices(sorted.size());
for (size_t i = 0; i < p; i++)
{
@@ -119,7 +121,7 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxLeafSize() - 2 * tree->MinLeafSize() +
@@ -195,56 +197,134 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < sorted.size(); i++)
{
sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
- sorted[i].second = i;
+ sorted[i].second = tree->Point(i);
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-
- TreeType* treeOne = new TreeType(tree->Parent());
- TreeType* treeTwo = new TreeType(tree->Parent());
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
- if (tiedOnOverlap)
+ if (tree->Parent())
{
- for (size_t i = 0; i < tree->Count(); i++)
+// std::cout << "first check parent " << tree->Parent() << " numDescendants "
+//<< tree->Parent()->NumDescendants() <<
+//" with " << tree->Parent()->NumChildren() << " children\n";
+// size_t manualCount = 0;
+// for (size_t i = 0; i < tree->Parent()->NumChildren(); ++i)
+// {
+// std::cout << "(" << &(tree->Parent()->Child(i)) << ") ";
+// manualCount += tree->Parent()->Child(i).NumDescendants();
+// std::cout << tree->Parent()->Child(i).NumDescendants() << " ";
+// }
+// std::cout << "\n";
+ // TreeType* treeOne = new TreeType(tree->Parent());
+ // Now clean the node, and we will re-use this.
+ const size_t oldDescendants = tree->numDescendants;
+ const size_t numPoints = tree->count;
+ tree->numChildren = 0;
+ tree->numDescendants = 0;
+ tree->bound.Clear();
+ tree->count = 0;
+ tree->begin = 0;
+
+ TreeType* treeTwo = new TreeType(tree->Parent());
+
+ if (tiedOnOverlap)
{
- if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
- treeOne->InsertPoint(tree->Point(sorted[i].second));
- else
- treeTwo->InsertPoint(tree->Point(sorted[i].second));
+ for (size_t i = 0; i < numPoints; i++)
+ {
+ if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
+ tree->InsertPoint(sorted[i].second);
+ else
+ treeTwo->InsertPoint(sorted[i].second);
+ }
}
+ else
+ {
+ for (size_t i = 0; i < numPoints; i++)
+ {
+ if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
+ tree->InsertPoint(sorted[i].second);
+ else
+ treeTwo->InsertPoint(sorted[i].second);
+ }
+ }
+
+ // Insert the new tree node.
+ TreeType* par = tree->Parent();
+ par->children[par->NumChildren()++] = treeTwo;
+
+ // We only add one at a time, so we should only need to test for equality
+ // just in case, we use an assert.
+// std::cout << "x: " << oldDescendants << " == " << tree->NumDescendants() <<
+//" + " << treeTwo->NumDescendants() << " (" << tree->NumDescendants() +
+//treeTwo->NumDescendants() << ")\n";
+ assert(oldDescendants == tree->NumDescendants() + treeTwo->NumDescendants());
+// std::cout << "check parent " << par << " numDescendants " << par->NumDescendants() <<
+//" with " << par->NumChildren() << " children\n";
+// manualCount = 0;
+// for (size_t i = 0; i < par->NumChildren(); ++i)
+// {
+// std::cout << "(" << &(par->Child(i)) << ") ";
+// manualCount += par->Child(i).NumDescendants();
+// std::cout << par->Child(i).NumDescendants() << " ";
+// }
+// std::cout << "\n";
+// assert(par->NumDescendants() == manualCount);
+ assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+ if (par->NumChildren() == par->MaxNumChildren() + 1)
+ RStarTreeSplit::SplitNonLeafNode(par, relevels);
+
+ assert(tree->Parent()->NumChildren() <= tree->MaxNumChildren());
+ assert(tree->Parent()->NumChildren() >= tree->MinNumChildren());
+ assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
+ assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
}
else
{
- for (size_t i = 0; i < tree->Count(); i++)
+ TreeType* treeOne = new TreeType(tree);
+ TreeType* treeTwo = new TreeType(tree);
+
+ const size_t oldNumDescendants = tree->NumDescendants();
+ const size_t numPoints = tree->Count();
+ tree->numChildren = 0;
+ tree->bound.Clear();
+ tree->count = 0;
+ tree->begin = 0;
+ tree->numDescendants = 0;
+
+ if (tiedOnOverlap)
{
- if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
- treeOne->InsertPoint(tree->Point(sorted[i].second));
- else
- treeTwo->InsertPoint(tree->Point(sorted[i].second));
+ for (size_t i = 0; i < numPoints; ++i)
+ {
+ if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
+ treeOne->InsertPoint(sorted[i].second);
+ else
+ treeTwo->InsertPoint(sorted[i].second);
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < numPoints; ++i)
+ {
+ if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
+ treeOne->InsertPoint(sorted[i].second);
+ else
+ treeTwo->InsertPoint(sorted[i].second);
+ }
}
- }
-
- // Remove this node and insert treeOne and treeTwo.
- TreeType* par = tree->Parent();
- size_t index = 0;
- while (par->children[index] != tree) { index++; }
-
- assert(index != par->NumChildren());
- par->children[index] = treeOne;
- par->children[par->NumChildren()++] = treeTwo;
-
- // We only add one at a time, so we should only need to test for equality
- // just in case, we use an assert.
- assert(par->NumChildren() <= par->MaxNumChildren() + 1);
- if (par->NumChildren() == par->MaxNumChildren() + 1)
- RStarTreeSplit::SplitNonLeafNode(par,relevels);
-
- assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
- assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
- assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
- assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
- tree->SoftDelete();
+ InsertNodeIntoTree(tree, treeOne);
+ InsertNodeIntoTree(tree, treeTwo);
+
+// std::cout << "y: " << oldNumDescendants << ": " << treeOne->NumDescendants()
+//<< " + " << treeTwo->NumDescendants() << " (" << tree->NumDescendants() <<
+//")\n";
+// std::cout << "tree left " << treeOne->NumDescendants() << ", right " <<
+//treeTwo->NumDescendants() << ", parent " << tree->NumDescendants() << "\n";
+// for (size_t i = 0; i < tree->NumChildren(); ++i)
+// std::cout << tree->Child(i).NumDescendants() << " ";
+// std::cout << " (that's the children of the parent)\n";
+ assert(oldNumDescendants == tree->NumDescendants());
+ }
}
/**
@@ -257,6 +337,8 @@ void RStarTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
template<typename TreeType>
bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
+// std::cout << "split nonleaf node " << tree << " with parent " <<
+//tree->Parent() << "\n";
// Convenience typedef.
typedef typename TreeType::ElemType ElemType;
typedef bound::HRectBound<metric::EuclideanDistance, ElemType> BoundType;
@@ -264,19 +346,19 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
// If we are splitting the root node, we need will do things differently so
// that the constructor and other methods don't confuse the end user by giving
// an address of another node.
- if (tree->Parent() == NULL)
- {
+// if (tree->Parent() == NULL)
+// {
// We actually want to copy this way. Pointers and everything.
- TreeType* copy = new TreeType(*tree, false);
+// TreeType* copy = new TreeType(*tree, false);
- copy->Parent() = tree;
- tree->NumChildren() = 0;
- tree->NullifyData();
- tree->children[(tree->NumChildren())++] = copy;
+// copy->Parent() = tree;
+// tree->NumChildren() = 0;
+// tree->NullifyData();
+// tree->children[(tree->NumChildren())++] = copy;
- RStarTreeSplit::SplitNonLeafNode(copy,relevels);
- return true;
- }
+// RStarTreeSplit::SplitNonLeafNode(copy,relevels);
+// return true;
+// }
/*
// If we haven't yet reinserted on this level, we try doing so now.
@@ -358,7 +440,7 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -441,7 +523,7 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -514,13 +596,13 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
}
}
- std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
if (lowIsBest)
{
for (size_t i = 0; i < sorted.size(); i++)
{
sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
- sorted[i].second = i;
+ sorted[i].second = &tree->Child(i);
}
}
else
@@ -528,67 +610,161 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
for (size_t i = 0; i < sorted.size(); i++)
{
sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
- sorted[i].second = i;
+ sorted[i].second = &tree->Child(i);
}
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-
- TreeType* treeOne = new TreeType(tree->Parent());
- TreeType* treeTwo = new TreeType(tree->Parent());
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
- if (tiedOnOverlap)
+ if (tree->Parent() != NULL)
{
- for (size_t i = 0; i < tree->NumChildren(); i++)
+ const size_t oldNumDescendants = tree->NumDescendants();
+// for (size_t i = 0; i < tree->NumChildren(); ++i)
+// std::cout << tree->Child(i).NumDescendants() << " ";
+// std::cout << " (total " << tree->NumDescendants() << ", count " <<
+//tree->count << "\n";
+ const size_t oldNumChildren = tree->NumChildren();
+ tree->numChildren = 0;
+ tree->bound.Clear();
+ tree->count = 0;
+ tree->begin = 0;
+ tree->numDescendants = 0;
+ TreeType* treeTwo = new TreeType(tree->Parent());
+
+ if (tiedOnOverlap)
{
- if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].second)));
- else
- InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].second)));
+ for (size_t i = 0; i < oldNumChildren; i++)
+ {
+ if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
+ {
+// std::cout << "insert " << sorted[i].second->NumDescendants() << " into tree"
+// << "\n";
+ InsertNodeIntoTree(tree, sorted[i].second);
+ }
+ else
+ {
+// std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeTwo\n";
+ InsertNodeIntoTree(treeTwo, sorted[i].second);
+ }
+ }
}
- }
- else
- {
- for (size_t i = 0; i < tree->NumChildren(); i++)
+ else
{
- if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, &(tree->Child(sorted[i].second)));
- else
- InsertNodeIntoTree(treeTwo, &(tree->Child(sorted[i].second)));
+ for (size_t i = 0; i < oldNumChildren; i++)
+ {
+ if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
+ {
+// std::cout << "insert " << sorted[i].second->NumDescendants() << " into tree\n";
+ InsertNodeIntoTree(tree, sorted[i].second);
+ }
+ else
+ {
+// std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeTwo\n";
+ InsertNodeIntoTree(treeTwo, sorted[i].second);
+ }
+ }
}
- }
- // Remove this node and insert treeOne and treeTwo
- TreeType* par = tree->Parent();
- size_t index = 0;
- while (par->children[index] != tree) { index++; }
+ // Insert the new node into the tree.
+ TreeType* par = tree->Parent();
+ par->children[par->NumChildren()++] = treeTwo;
+
+ // We only add one at a time, so we should only need to test for equality
+ // just in case, we use an assert.
+ assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+ if (par->NumChildren() == par->MaxNumChildren() + 1)
+ RStarTreeSplit::SplitNonLeafNode(par,relevels);
+
+ // We have to update the children of each of these new nodes so that they
+ // record the correct parent.
+ for (size_t i = 0; i < tree->NumChildren(); i++)
+ tree->children[i]->Parent() = tree;
+
+ for (size_t i = 0; i < treeTwo->NumChildren(); i++)
+ treeTwo->children[i]->Parent() = treeTwo;
- par->children[index] = treeOne;
- par->children[par->NumChildren()++] = treeTwo;
+// std::cout << "tree left " << tree->NumDescendants() << ", right " <<
+//treeTwo->NumDescendants() << ", parent " << par->NumDescendants() << "\n";
+// for (size_t i = 0; i < par->NumChildren(); ++i)
+// std::cout << par->Child(i).NumDescendants() << " ";
+// std::cout << " (that's the children of the parent)\n";
+ assert(oldNumDescendants == (tree->NumDescendants() +
+treeTwo->NumDescendants()));
- // We only add one at a time, so we should only need to test for equality
- // just in case, we use an assert.
- assert(par->NumChildren() <= par->MaxNumChildren() + 1);
- if (par->NumChildren() == par->MaxNumChildren() + 1)
- RStarTreeSplit::SplitNonLeafNode(par,relevels);
- // We have to update the children of each of these new nodes so that they
- // record the correct parent.
- for (size_t i = 0; i < treeOne->NumChildren(); i++)
- treeOne->children[i]->Parent() = treeOne;
+ assert(tree->Parent()->NumChildren() <= tree->MaxNumChildren());
+ assert(tree->Parent()->NumChildren() >= tree->MinNumChildren());
+ assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
+ assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
+
+ assert(tree->MaxNumChildren() < 7);
+ assert(treeTwo->MaxNumChildren() < 7);
+ }
+ else
+ {
+ const size_t oldDescendants = tree->NumDescendants();
+// for (size_t i = 0; i < tree->NumChildren(); ++i)
+// std::cout << tree->Child(i).NumDescendants() << " ";
+// std::cout << " (total " << tree->NumDescendants() << ", count " <<
+//tree->count << "\n";
+ TreeType* treeOne = new TreeType(tree);
+ TreeType* treeTwo = new TreeType(tree);
+
+ const size_t oldNumChildren = tree->NumChildren();
+ tree->count = 0;
+ tree->numChildren = 0;
+ tree->bound.Clear();
+ tree->numDescendants = 0;
+
+ if (tiedOnOverlap)
+ {
+ for (size_t i = 0; i < oldNumChildren; i++)
+ {
+ if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
+ {
+// std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeOne\n";
+ InsertNodeIntoTree(treeOne, sorted[i].second);
+ }
+ else
+ {
+// std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeTwo\n";
+ InsertNodeIntoTree(treeTwo, sorted[i].second);
+ }
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < oldNumChildren; i++)
+ {
+ if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
+ {
+// std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeOne\n";
+ InsertNodeIntoTree(treeOne, sorted[i].second);
+ }
+ else
+ {
+// std::cout << "insert " << sorted[i].second->NumDescendants() << " into treeTwo\n";
+ InsertNodeIntoTree(treeTwo, sorted[i].second);
+ }
+ }
+ }
- for (size_t i = 0; i < treeTwo->NumChildren(); i++)
- treeTwo->children[i]->Parent() = treeTwo;
+ InsertNodeIntoTree(tree, treeOne);
+ InsertNodeIntoTree(tree, treeTwo);
- assert(treeOne->Parent()->NumChildren() <= treeOne->MaxNumChildren());
- assert(treeOne->Parent()->NumChildren() >= treeOne->MinNumChildren());
- assert(treeTwo->Parent()->NumChildren() <= treeTwo->MaxNumChildren());
- assert(treeTwo->Parent()->NumChildren() >= treeTwo->MinNumChildren());
+// std::cout << oldDescendants << "; " << treeOne->numDescendants << ", " <<
+//treeTwo->numDescendants << " --> " << tree->numDescendants << "\n";
+ assert(oldDescendants == (treeOne->numDescendants +
+ treeTwo->numDescendants));
- assert(treeOne->MaxNumChildren() < 7);
- assert(treeTwo->MaxNumChildren() < 7);
+ // We have to update the children of each of these new nodes so that they
+ // record the correct parent.
+ for (size_t i = 0; i < treeOne->NumChildren(); i++)
+ treeOne->children[i]->Parent() = treeOne;
- tree->SoftDelete();
+ for (size_t i = 0; i < treeTwo->NumChildren(); i++)
+ treeTwo->children[i]->Parent() = treeTwo;
+ }
return false;
}
@@ -600,9 +776,21 @@ bool RStarTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels
template<typename TreeType>
void RStarTreeSplit::InsertNodeIntoTree(TreeType* destTree, TreeType* srcNode)
{
+// std::cout << "insert " << srcNode << " into " << destTree << "\n";
+
destTree->Bound() |= srcNode->Bound();
destTree->numDescendants += srcNode->numDescendants;
destTree->children[destTree->NumChildren()++] = srcNode;
+
+// std::cout << "dest now has " << destTree->NumDescendants() << "\n";
+// size_t manualCount = 0;
+// for (size_t i = 0; i < destTree->NumChildren(); ++i)
+// {
+// manualCount += destTree->Child(i).NumDescendants();
+// std::cout << destTree->Child(i).NumDescendants() << " ";
+// }
+// std::cout << "\n";
+// assert(manualCount == destTree->NumDescendants());
}
} // namespace tree
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
index 5b4295b..d5f8858 100644
--- a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
@@ -275,7 +275,6 @@ RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
if (ownsDataset)
delete dataset;
-
}
/**
@@ -336,9 +335,7 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
numDescendants++;
- std::vector<bool> lvls(TreeDepth());
- for (size_t i = 0; i < lvls.size(); i++)
- lvls[i] = true;
+ std::vector<bool> lvls(TreeDepth(), true);
// If this is a leaf node, we stop here and add the point.
if (numChildren == 0)
@@ -452,9 +449,7 @@ bool RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
while (root->Parent() != NULL)
root = root->Parent();
- std::vector<bool> lvls(root->TreeDepth());
- for (size_t i = 0; i < lvls.size(); i++)
- lvls[i] = true;
+ std::vector<bool> lvls(root->TreeDepth(), true);
if (numChildren == 0)
{
@@ -991,6 +986,7 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
// This will check the minFill of the parent.
parent->CondenseTree(point, relevels, usePoint);
// Now it should be safe to delete this node.
+ std::cout << "soft delete " << this << "\n";
SoftDelete();
return;
@@ -1052,6 +1048,7 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
// This will check the minFill of the point.
parent->CondenseTree(point, relevels, usePoint);
// Now it should be safe to delete this node.
+ std::cout << "soft delete " << this << " 2\n";
SoftDelete();
return;
@@ -1067,15 +1064,17 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
if (child->NumChildren() > maxNumChildren)
{
maxNumChildren = child->MaxNumChildren();
- children.resize(maxNumChildren+1);
+ children.resize(maxNumChildren + 1);
}
for (size_t i = 0; i < child->NumChildren(); i++) {
children[i] = child->children[i];
children[i]->Parent() = this;
+ child->children[i] = NULL;
}
numChildren = child->NumChildren();
+ child->NumChildren() = 0;
for (size_t i = 0; i < child->Count(); i++)
{
@@ -1086,7 +1085,9 @@ void RectangleTree<MetricType, StatisticType, MatType, SplitType, DescentType,
auxiliaryInfo = child->AuxiliaryInfo();
count = child->Count();
- child->SoftDelete();
+ child->Count() = 0;
+
+ delete child;
return;
}
}
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
index a3d6cef..2c6ce57 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split.hpp
@@ -62,9 +62,9 @@ class XTreeSplit
* Comparator for sorting with std::pair. This comparator works a little bit
* faster then the default comparator.
*/
- template<typename ElemType>
- static bool PairComp(const std::pair<ElemType, size_t>& p1,
- const std::pair<ElemType, size_t>& p2)
+ template<typename ElemType, typename SecondType>
+ static bool PairComp(const std::pair<ElemType, SecondType>& p1,
+ const std::pair<ElemType, SecondType>& p2)
{
return p1.first < p2.first;
}
diff --git a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
index a8f6dc8..6f72673 100644
--- a/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
+++ b/src/mlpack/core/tree/rectangle_tree/x_tree_split_impl.hpp
@@ -38,24 +38,24 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
// If we are splitting the root node, we need will do things differently so
// that the constructor and other methods don't confuse the end user by giving
// an address of another node.
- if (tree->Parent() == NULL)
- {
+ //if (tree->Parent() == NULL)
+ //{
// We actually want to copy this way. Pointers and everything.
- TreeType* copy = new TreeType(*tree, false);
- copy->Parent() = tree;
- tree->Count() = 0;
- tree->NullifyData();
+ // TreeType* copy = new TreeType(*tree, false);
+ // copy->Parent() = tree;
+ // tree->Count() = 0;
+ // tree->NullifyData();
// Because this was a leaf node, numChildren must be 0.
- tree->children[(tree->NumChildren())++] = copy;
- assert(tree->NumChildren() == 1);
- XTreeSplit::SplitLeafNode(copy,relevels);
- return;
- }
+ // tree->children[(tree->NumChildren())++] = copy;
+ // assert(tree->NumChildren() == 1);
+ // XTreeSplit::SplitLeafNode(copy,relevels);
+ // return;
+ //}
// If we haven't yet reinserted on this level, we try doing so now.
- if (relevels[tree->TreeDepth()])
+ if (relevels[tree->TreeDepth() - 1])
{
- relevels[tree->TreeDepth()] = false;
+ relevels[tree->TreeDepth() - 1] = false;
// We sort the points by decreasing distance to the centroid of the bound.
// We then remove the first p entries and reinsert them at the root.
TreeType* root = tree;
@@ -77,10 +77,10 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
sorted[i].first = tree->Metric().Evaluate(center,
tree->Dataset().col(tree->Point(i)));
- sorted[i].second = i;
+ sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
std::vector<size_t> pointIndices(p);
for (size_t i = 0; i < p; i++)
@@ -129,7 +129,7 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
sorted[i].second = i;
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxLeafSize() -
@@ -202,77 +202,104 @@ void XTreeSplit::SplitLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < sorted.size(); i++)
{
sorted[i].first = tree->Dataset().col(tree->Point(i))[bestAxis];
- sorted[i].second = i;
+ sorted[i].second = tree->Point(i);
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-
- TreeType* treeOne = new TreeType(tree->Parent(),
- tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
- TreeType* treeTwo = new TreeType(tree->Parent(),
- tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, size_t>);
- // The leaf nodes should never have any overlap introduced by the above method
- // since a split axis is chosen and then points are assigned based on their
- // value along that axis.
- if (tiedOnOverlap)
+ if (tree->Parent() != NULL)
{
- for (size_t i = 0; i < tree->Count(); i++)
+ // We can reuse 'tree' as one of the two children.
+ //TreeType* treeOne = new TreeType(tree->Parent(),
+ // tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+ TreeType* treeTwo = new TreeType(tree->Parent(),
+ tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+
+ const size_t oldCount = tree->Count();
+ tree->Count() = 0;
+
+ // The leaf nodes should never have any overlap introduced by the above method
+ // since a split axis is chosen and then points are assigned based on their
+ // value along that axis.
+ if (tiedOnOverlap)
{
- if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
- treeOne->InsertPoint(tree->Point(sorted[i].second));
- else
- treeTwo->InsertPoint(tree->Point(sorted[i].second));
+ for (size_t i = 0; i < oldCount; i++)
+ {
+ if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
+ tree->InsertPoint(sorted[i].second);
+ else
+ treeTwo->InsertPoint(sorted[i].second);
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < oldCount; i++)
+ {
+ if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
+ tree->InsertPoint(tree->Point(sorted[i].second));
+ else
+ treeTwo->InsertPoint(tree->Point(sorted[i].second));
+ }
}
+
+ // Remove this node and insert treeOne and treeTwo.
+ TreeType* par = tree->Parent();
+ par->children[par->NumChildren()++] = treeTwo;
+
+ // We now update the split history of each new node.
+ tree->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+ tree->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+ treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+ treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+
+ // We only add one at a time, so we should only need to test for equality just
+ // in case, we use an assert.
+ assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+ if (par->NumChildren() == par->MaxNumChildren() + 1)
+ XTreeSplit::SplitNonLeafNode(par,relevels);
}
else
{
- for (size_t i = 0; i < tree->Count(); i++)
+ // We have to insert two nodes, and this node moves "up", since it is the
+ // root.
+ TreeType* treeOne = new TreeType(tree,
+ tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+ TreeType* treeTwo = new TreeType(tree,
+ tree->AuxiliaryInfo().NormalNodeMaxNumChildren());
+
+ const size_t oldCount = tree->Count();
+ tree->Count() = 0;
+
+ if (tiedOnOverlap)
{
- if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
- treeOne->InsertPoint(tree->Point(sorted[i].second));
- else
- treeTwo->InsertPoint(tree->Point(sorted[i].second));
+ for (size_t i = 0; i < oldCount; i++)
+ {
+ if (i < bestAreaIndexOnBestAxis + tree->MinLeafSize())
+ treeOne->InsertPoint(sorted[i].second);
+ else
+ treeTwo->InsertPoint(sorted[i].second);
+ }
}
- }
-
- // Remove this node and insert treeOne and treeTwo.
- TreeType* par = tree->Parent();
- size_t index = par->NumChildren();
- for (size_t i = 0; i < par->NumChildren(); i++)
- {
- if (par->children[i] == tree)
+ else
{
- index = i;
- break;
+ for (size_t i = 0; i < oldCount; i++)
+ {
+ if (i < bestOverlapIndexOnBestAxis + tree->MinLeafSize())
+ treeOne->InsertPoint(sorted[i].second);
+ else
+ treeTwo->InsertPoint(sorted[i].second);
+ }
}
+
+ tree->children[0] = treeOne;
+ tree->children[1] = treeTwo;
+ tree->numChildren = 2;
+
+ treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+ treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+ treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+ treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
}
- assert(index != par->NumChildren());
- par->children[index] = treeOne;
- par->children[par->NumChildren()++] = treeTwo;
-
- // We now update the split history of each new node.
- treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
- treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
- treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
- treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
-
- // We only add one at a time, so we should only need to test for equality just
- // in case, we use an assert.
- assert(par->NumChildren() <= par->MaxNumChildren() + 1);
- if (par->NumChildren() == par->MaxNumChildren() + 1)
- XTreeSplit::SplitNonLeafNode(par,relevels);
-
- assert(treeOne->Parent()->NumChildren() <=
- treeOne->Parent()->MaxNumChildren());
- assert(treeOne->Parent()->NumChildren() >=
- treeOne->Parent()->MinNumChildren());
- assert(treeTwo->Parent()->NumChildren() <=
- treeTwo->Parent()->MaxNumChildren());
- assert(treeTwo->Parent()->NumChildren() >=
- treeTwo->Parent()->MinNumChildren());
-
- tree->SoftDelete();
}
/**
@@ -292,18 +319,18 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
// If we are splitting the root node, we need will do things differently so
// that the constructor and other methods don't confuse the end user by giving
// an address of another node.
- if (tree->Parent() == NULL)
- {
+// if (tree->Parent() == NULL)
+// {
// We actually want to copy this way. Pointers and everything.
- TreeType* copy = new TreeType(*tree, false);
-
- copy->Parent() = tree;
- tree->NumChildren() = 0;
- tree->NullifyData();
- tree->children[(tree->NumChildren())++] = copy;
- XTreeSplit::SplitNonLeafNode(copy,relevels);
- return true;
- }
+// TreeType* copy = new TreeType(*tree, false);
+
+// copy->Parent() = tree;
+// tree->NumChildren() = 0;
+// tree->NullifyData();
+// tree->children[(tree->NumChildren())++] = copy;
+// XTreeSplit::SplitNonLeafNode(copy,relevels);
+// return true;
+// }
// The X tree paper doesn't explain how to handle the split history when
// reinserting nodes and reinserting nodes seems to hurt the performance, so
@@ -373,14 +400,14 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
ElemType axisScore = 0.0;
// We'll do Bound().Lo() now and use Bound().Hi() later.
- std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
for (size_t i = 0; i < sorted.size(); i++)
{
sorted[i].first = tree->Child(i).Bound()[j].Lo();
- sorted[i].second = i;
+ sorted[i].second = &tree->Child(i);
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -408,10 +435,10 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
BoundType bound2(tree->Bound().Dim());
for (size_t l = 0; l < cutOff; l++)
- bound1 |= tree->Child(sorted[l].second).Bound();
+ bound1 |= sorted[l].second->Bound();
for (size_t l = cutOff; l < tree->NumChildren(); l++)
- bound2 |= tree->Child(sorted[l].second).Bound();
+ bound2 |= sorted[l].second->Bound();
ElemType area1 = bound1.Volume();
ElemType area2 = bound2.Volume();
@@ -478,14 +505,14 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
{
ElemType axisScore = 0.0;
- std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
for (size_t i = 0; i < sorted.size(); i++)
{
sorted[i].first = tree->Child(i).Bound()[j].Hi();
- sorted[i].second = i;
+ sorted[i].second = &tree->Child(i);
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
// We'll store each of the three scores for each distribution.
std::vector<ElemType> areas(tree->MaxNumChildren() -
@@ -513,10 +540,10 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
BoundType bound2(tree->Bound().Dim());
for (size_t l = 0; l < cutOff; l++)
- bound1 |= tree->Child(sorted[l].second).Bound();
+ bound1 |= sorted[l].second->Bound();
for (size_t l = cutOff; l < tree->NumChildren(); l++)
- bound2 |= tree->Child(sorted[l].second).Bound();
+ bound2 |= sorted[l].second->Bound();
ElemType area1 = bound1.Volume();
ElemType area2 = bound2.Volume();
@@ -581,13 +608,13 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
}
}
- std::vector<std::pair<ElemType, size_t>> sorted(tree->NumChildren());
+ std::vector<std::pair<ElemType, TreeType*>> sorted(tree->NumChildren());
if (lowIsBest)
{
for (size_t i = 0; i < sorted.size(); i++)
{
sorted[i].first = tree->Child(i).Bound()[bestAxis].Lo();
- sorted[i].second = i;
+ sorted[i].second = &tree->Child(i);
}
}
else
@@ -595,181 +622,282 @@ bool XTreeSplit::SplitNonLeafNode(TreeType *tree,std::vector<bool>& relevels)
for (size_t i = 0; i < sorted.size(); i++)
{
sorted[i].first = tree->Child(i).Bound()[bestAxis].Hi();
- sorted[i].second = i;
+ sorted[i].second = &tree->Child(i);
}
}
- std::sort(sorted.begin(), sorted.end(), PairComp<ElemType>);
-
- TreeType* treeOne = new TreeType(tree->Parent(), tree->MaxNumChildren());
- TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
+ std::sort(sorted.begin(), sorted.end(), PairComp<ElemType, TreeType*>);
- // Now as per the X-tree paper, we ensure that this split was good enough.
- bool useMinOverlapSplit = false;
- if (tiedOnOverlap)
+ if (tree->Parent() != NULL)
{
- if (overlapBestAreaAxis/areaBestAreaAxis < MAX_OVERLAP)
+ // Reuse tree as the new child.
+ TreeType* treeTwo = new TreeType(tree->Parent(), tree->MaxNumChildren());
+ const size_t numChildren = tree->NumChildren();
+ tree->numChildren = 0;
+
+ // Now as per the X-tree paper, we ensure that this split was good enough.
+ bool useMinOverlapSplit = false;
+ if (tiedOnOverlap)
{
- for (size_t i = 0; i < tree->NumChildren(); i++)
+ if (overlapBestAreaAxis/areaBestAreaAxis < MAX_OVERLAP)
{
- if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, tree->children[sorted[i].second]);
- else
- InsertNodeIntoTree(treeTwo, tree->children[sorted[i].second]);
+ for (size_t i = 0; i < numChildren; i++)
+ {
+ if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
+ InsertNodeIntoTree(tree, sorted[i].second);
+ else
+ InsertNodeIntoTree(treeTwo, sorted[i].second);
+ }
}
+ else
+ useMinOverlapSplit = true;
}
else
- useMinOverlapSplit = true;
- }
- else
- {
- if (overlapBestOverlapAxis/areaBestOverlapAxis < MAX_OVERLAP)
{
- for (size_t i = 0; i < tree->NumChildren(); i++)
+ if (overlapBestOverlapAxis/areaBestOverlapAxis < MAX_OVERLAP)
{
- if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, tree->children[sorted[i].second]);
- else
- InsertNodeIntoTree(treeTwo, tree->children[sorted[i].second]);
+ for (size_t i = 0; i < numChildren; i++)
+ {
+ if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
+ InsertNodeIntoTree(tree, sorted[i].second);
+ else
+ InsertNodeIntoTree(treeTwo, sorted[i].second);
+ }
}
+ else
+ useMinOverlapSplit = true;
}
- else
- useMinOverlapSplit = true;
- }
- // If the split was not good enough, then we try the minimal overlap split.
- // If that fails, we create a "super node" (more accurately we resize this one
- // to make it a super node).
- if (useMinOverlapSplit)
- {
- // If there is a dimension that might work, try that.
- if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
- (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
+ // If the split was not good enough, then we try the minimal overlap split.
+ // If that fails, we create a "super node" (more accurately we resize this one
+ // to make it a super node).
+ if (useMinOverlapSplit)
{
- std::vector<std::pair<ElemType, size_t>> sorted2(tree->NumChildren());
- if (minOverlapSplitUsesHi)
+ // If there is a dimension that might work, try that.
+ if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
+ (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
{
- for (size_t i = 0; i < sorted2.size(); i++)
+ std::vector<std::pair<ElemType, TreeType*>> sorted2(numChildren);
+ if (minOverlapSplitUsesHi)
{
- sorted2[i].first = tree->Child(i).Bound()[bestAxis].Hi();
- sorted2[i].second = i;
+ for (size_t i = 0; i < sorted2.size(); i++)
+ {
+ sorted2[i].first = sorted[i].second->Bound()[bestAxis].Hi();
+ sorted2[i].second = sorted[i].second;
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < sorted2.size(); i++)
+ {
+ sorted2[i].first = sorted[i].second->Bound()[bestAxis].Lo();
+ sorted2[i].second = sorted[i].second;
+ }
+ }
+ std::sort(sorted2.begin(), sorted2.end(), PairComp<ElemType, TreeType*>);
+
+ for (size_t i = 0; i < numChildren; i++)
+ {
+ if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
+ InsertNodeIntoTree(tree, sorted2[i].second);
+ else
+ InsertNodeIntoTree(treeTwo, sorted2[i].second);
}
}
else
{
- for (size_t i = 0; i < sorted2.size(); i++)
+ // We don't create a supernode that would be the only child of the root.
+ // (Note that if you did try to do so you would need to update the parent
+ // field on each child of this new node as creating a supernode causes the
+ // function to return before that is done.
+
+ // I thought commenting out the bellow would make the tree less efficient
+ // but would still work. It doesn't. I should look into that to see if
+ // there is another bug.
+
+ if ((tree->Parent()->Parent() == NULL) &&
+ (tree->Parent()->NumChildren() == 1))
{
- sorted2[i].first = tree->Child(i).Bound()[bestAxis].Lo();
- sorted2[i].second = i;
+ // We make the root a supernode instead.
+ tree->Parent()->MaxNumChildren() = tree->MaxNumChildren() +
+ tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
+ tree->Parent()->children.resize(tree->Parent()->MaxNumChildren() + 1);
+ tree->Parent()->NumChildren() = tree->NumChildren();
+ for (size_t i = 0; i < numChildren; ++i)
+ {
+ tree->Parent()->children[i] = sorted[i].second;
+ tree->Parent()->children[i]->Parent() = tree->Parent();
+ tree->children[i] = NULL;
+ }
+
+ delete tree;
+ delete treeTwo;
+
+ return false;
}
+
+ // If we don't have to worry about the root, we just enlarge this node.
+ tree->MaxNumChildren() +=
+ tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
+ tree->children.resize(tree->MaxNumChildren() + 1);
+ tree->numChildren = numChildren;
+ for (size_t i = 0; i < numChildren; i++)
+ tree->Child(i).Parent() = tree;
+
+ delete treeTwo;
+ return false;
}
- std::sort(sorted2.begin(), sorted2.end(), PairComp<ElemType>);
+ }
- for (size_t i = 0; i < tree->NumChildren(); i++)
+ // Update the split history of each child.
+ tree->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+ tree->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+ treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+ treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+
+ // Remove this node and insert treeOne and treeTwo
+ TreeType* par = tree->Parent();
+ par->children[par->NumChildren()++] = treeTwo;
+
+ // we only add one at a time, so we should only need to test for equality
+ // just in case, we use an assert.
+ if (!(par->NumChildren() <= par->MaxNumChildren() + 1))
+ Log::Debug << "error " << par->NumChildren() << ", "
+ << par->MaxNumChildren() + 1 << std::endl;
+ assert(par->NumChildren() <= par->MaxNumChildren() + 1);
+
+ if (par->NumChildren() == par->MaxNumChildren() + 1)
+ XTreeSplit::SplitNonLeafNode(par,relevels);
+
+ // We have to update the children of each of these new nodes so that they
+ // record the correct parent.
+ for (size_t i = 0; i < treeTwo->NumChildren(); i++)
+ treeTwo->Child(i).Parent() = treeTwo;
+
+ assert(tree->Parent()->NumChildren() <=
+ tree->Parent()->MaxNumChildren());
+ assert(tree->Parent()->NumChildren() >=
+ tree->Parent()->MinNumChildren());
+ assert(treeTwo->Parent()->NumChildren() <=
+ treeTwo->Parent()->MaxNumChildren());
+ assert(treeTwo->Parent()->NumChildren() >=
+ treeTwo->Parent()->MinNumChildren());
+
+ return false;
+ }
+ else
+ {
+ // We are the root of the tree, so we need to create two children to add.
+ TreeType* treeOne = new TreeType(tree, tree->MaxNumChildren());
+ TreeType* treeTwo = new TreeType(tree, tree->MaxNumChildren());
+ const size_t numChildren = tree->NumChildren();
+ tree->numChildren = 0;
+
+ // Now as per the X-tree paper, we ensure that this split was good enough.
+ bool useMinOverlapSplit = false;
+ if (tiedOnOverlap)
+ {
+ if (overlapBestAreaAxis/areaBestAreaAxis < MAX_OVERLAP)
{
- if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
- InsertNodeIntoTree(treeOne, tree->children[sorted2[i].second]);
- else
- InsertNodeIntoTree(treeTwo, tree->children[sorted2[i].second]);
+ for (size_t i = 0; i < numChildren; i++)
+ {
+ if (i < bestAreaIndexOnBestAxis + tree->MinNumChildren())
+ InsertNodeIntoTree(treeOne, sorted[i].second);
+ else
+ InsertNodeIntoTree(treeTwo, sorted[i].second);
+ }
}
+ else
+ useMinOverlapSplit = true;
}
else
{
- // We don't create a supernode that would be the only child of the root.
- // (Note that if you did try to do so you would need to update the parent
- // field on each child of this new node as creating a supernode causes the
- // function to return before that is done.
-
- // I thought commenting out the bellow would make the tree less efficient
- // but would still work. It doesn't. I should look into that to see if
- // there is another bug.
+ if (overlapBestOverlapAxis/areaBestOverlapAxis < MAX_OVERLAP)
+ {
+ for (size_t i = 0; i < numChildren; i++)
+ {
+ if (i < bestOverlapIndexOnBestAxis + tree->MinNumChildren())
+ InsertNodeIntoTree(treeOne, sorted[i].second);
+ else
+ InsertNodeIntoTree(treeTwo, sorted[i].second);
+ }
+ }
+ else
+ useMinOverlapSplit = true;
+ }
- if ((tree->Parent()->Parent() == NULL) &&
- (tree->Parent()->NumChildren() == 1))
+ // If the split was not good enough, then we try the minimal overlap split.
+ // If that fails, we create a "super node" (more accurately we resize this one
+ // to make it a super node).
+ if (useMinOverlapSplit)
+ {
+ // If there is a dimension that might work, try that.
+ if ((minOverlapSplitDimension != tree->Bound().Dim()) &&
+ (bestScoreMinOverlapSplit / areaOfBestMinOverlapSplit < MAX_OVERLAP))
{
- // We make the root a supernode instead.
- tree->Parent()->MaxNumChildren() = tree->MaxNumChildren() +
- tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
- tree->Parent()->children.resize(tree->Parent()->MaxNumChildren() + 1);
- tree->Parent()->NumChildren() = tree->NumChildren();
- for (size_t i = 0; i < tree->NumChildren(); i++)
+ std::vector<std::pair<ElemType, TreeType*>> sorted2(numChildren);
+ if (minOverlapSplitUsesHi)
+ {
+ for (size_t i = 0; i < sorted2.size(); i++)
+ {
+ sorted2[i].first = sorted[i].second->Bound()[bestAxis].Hi();
+ sorted2[i].second = sorted[i].second;
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < sorted2.size(); i++)
+ {
+ sorted2[i].first = sorted[i].second->Bound()[bestAxis].Lo();
+ sorted2[i].second = sorted[i].second;
+ }
+ }
+ std::sort(sorted2.begin(), sorted2.end(), PairComp<ElemType, TreeType*>);
+
+ for (size_t i = 0; i < numChildren; i++)
{
- tree->Parent()->children[i] = tree->children[i];
- tree->Child(i).Parent() = tree->Parent();
+ if (i < bestIndexMinOverlapSplit + tree->MinNumChildren())
+ InsertNodeIntoTree(treeOne, sorted2[i].second);
+ else
+ InsertNodeIntoTree(treeTwo, sorted2[i].second);
}
+ }
+ else
+ {
+ // Make this node a supernode.
+ tree->MaxNumChildren() +=
+ tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
+ tree->children.resize(tree->MaxNumChildren() + 1);
+ tree->numChildren = numChildren;
+ for (size_t i = 0; i < numChildren; i++)
+ tree->Child(i).Parent() = tree;
delete treeOne;
delete treeTwo;
- tree->NullifyData();
- tree->SoftDelete();
return false;
}
-
- // If we don't have to worry about the root, we just enlarge this node.
- tree->MaxNumChildren() +=
- tree->AuxiliaryInfo().NormalNodeMaxNumChildren();
- tree->children.resize(tree->MaxNumChildren() + 1);
- for (size_t i = 0; i < tree->NumChildren(); i++)
- tree->Child(i).Parent() = tree;
-
- delete treeOne;
- delete treeTwo;
-
- return false;
}
- }
- // Update the split history of each child.
- treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
- treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
- treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
- treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
-
- // Remove this node and insert treeOne and treeTwo
- TreeType* par = tree->Parent();
- size_t index = 0;
- for (size_t i = 0; i < par->NumChildren(); i++)
- {
- if (par->children[i] == tree)
- {
- index = i;
- break;
- }
+ // Update the split history of each child.
+ treeOne->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+ treeOne->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+ treeTwo->AuxiliaryInfo().SplitHistory().history[bestAxis] = true;
+ treeTwo->AuxiliaryInfo().SplitHistory().lastDimension = bestAxis;
+
+ // Remove this node and insert treeOne and treeTwo
+ tree->children[0] = treeOne;
+ tree->children[1] = treeTwo;
+ tree->numChildren = 2;
+
+ // We have to update the children of each of these new nodes so that they
+ // record the correct parent.
+ for (size_t i = 0; i < treeOne->NumChildren(); ++i)
+ treeOne->Child(i).Parent() = treeOne;
+ for (size_t i = 0; i < treeTwo->NumChildren(); i++)
+ treeTwo->Child(i).Parent() = treeTwo;
+
+ return false;
}
-
- par->children[index] = treeOne;
- par->children[par->NumChildren()++] = treeTwo;
-
- // we only add one at a time, so we should only need to test for equality
- // just in case, we use an assert.
-
- if (!(par->NumChildren() <= par->MaxNumChildren() + 1))
- Log::Debug << "error " << par->NumChildren() << ", "
- << par->MaxNumChildren() + 1 << std::endl;
- assert(par->NumChildren() <= par->MaxNumChildren() + 1);
-
- if (par->NumChildren() == par->MaxNumChildren() + 1)
- XTreeSplit::SplitNonLeafNode(par,relevels);
-
- // We have to update the children of each of these new nodes so that they
- // record the correct parent.
- for (size_t i = 0; i < treeOne->NumChildren(); i++)
- treeOne->Child(i).Parent() = treeOne;
- for (size_t i = 0; i < treeTwo->NumChildren(); i++)
- treeTwo->Child(i).Parent() = treeTwo;
-
- assert(treeOne->Parent()->NumChildren() <=
- treeOne->Parent()->MaxNumChildren());
- assert(treeOne->Parent()->NumChildren() >=
- treeOne->Parent()->MinNumChildren());
- assert(treeTwo->Parent()->NumChildren() <=
- treeTwo->Parent()->MaxNumChildren());
- assert(treeTwo->Parent()->NumChildren() >=
- treeTwo->Parent()->MinNumChildren());
-
- tree->SoftDelete();
-
- return false;
}
/**
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list