[mlpack] 03/324: Add files and some preliminary code for R tree
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:21:50 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit e24e9642f860046e9e798a797933904640bd48bf
Author: andrewmw94 <andrewmw94 at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Mon May 19 17:32:08 2014 +0000
Add files and some preliminary code for R tree
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16521 9d5b8971-822b-0410-80eb-d18c1038ef23
---
.../tree/binary_space_tree/binary_space_tree.hpp | 8 +-
src/mlpack/core/tree/rectangle_tree.hpp | 19 ++
.../core/tree/rectangle_tree/rectangle_tree.hpp | 337 +++++++++++++++++++++
.../tree/rectangle_tree/rectangle_tree_impl.hpp | 21 ++
.../rectangle_tree/rectangle_tree_traverser.hpp | 1 +
.../rectangle_tree_traverser_impl.hpp | 1 +
src/mlpack/core/tree/rectangle_tree/traits.hpp | 56 ++++
7 files changed, 438 insertions(+), 5 deletions(-)
diff --git a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
index 183c711..6b38b6f 100644
--- a/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
+++ b/src/mlpack/core/tree/binary_space_tree/binary_space_tree.hpp
@@ -24,8 +24,6 @@ namespace tree /** Trees and tree-building procedures. */ {
* from it. If you need to add or delete a node, the better procedure is to
* rebuild the tree entirely.
*
- * This tree does take one parameter, which is the leaf size to be used.
- *
* @tparam BoundType The bound used for each node. The valid types of bounds
* and the necessary skeleton interface for this class can be found in
* bounds/.
@@ -307,12 +305,12 @@ class BinarySpaceTree
*/
double FurthestDescendantDistance() const;
- //! Modify the distance from the center of this node to the center of the
- //! parent node.
- double& ParentDistance() { return parentDistance; }
//! Return the distance from the center of this node to the center of the
//! parent node.
double ParentDistance() const { return parentDistance; }
+ //! Modify the distance from the center of this node to the center of the
+ //! parent node.
+ double& ParentDistance() { return parentDistance; }
/**
* Return the specified child (0 will be left, 1 will be right). If the index
diff --git a/src/mlpack/core/tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree.hpp
new file mode 100644
index 0000000..12f30fd
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree.hpp
@@ -0,0 +1,19 @@
+/**
+ * @file rectangle_tree.hpp
+ * @author Andrew Wells
+ *
+ * Include all the necessary filse to use the Rectangle Type Trees (RTree, RStarTree, XTree,
+ * and HilbertRTree.)
+ */
+#ifndef __MLPACK_CORE_TREE_RECTINGLINEAR_TREE_RECTANGLINEAR_TREE_HPP
+#define __MLPACK_CORE_TREE_RECTINGLINEAR_TREE_RECTANGLINEAR_TREE_HPP
+
+/* we include bounds.hpp since it gives us the necessary files.
+ * However, we will not use the "ballbounds" option.
+ */
+#include "bounds.hpp"
+#include "rectangle_tree/rectangle_tree.hpp"
+#include "rectangle_tree/rectangle_tree_traverser.hpp"
+#include "rectangle_tree/traits.hpp"
+
+#endif
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
new file mode 100644
index 0000000..5ed3348
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree.hpp
@@ -0,0 +1,337 @@
+/**
+ * @file rectangle_tree.hpp
+ *
+ * Definition of generalized rectangle type trees (r_tree, r_star_tree, x_tree, and hilbert_r_tree).
+ */
+#ifndef __MLPACK_CORE_TREE_RECTINGLE_TREE_RECTANGLE_TREE_HPP
+#define __MLPACK_CORE_TREE_RECTINGLE_TREE_RECTANGLE_TREE_HPP
+
+#include <mlpack/core.hpp>
+
+#include "../statistic.hpp"
+
+namespace mlpack {
+namespace tree /** Trees and tree-building procedures. */ {
+
+/**
+ * A rectangle type tree tree, such as an R-tree or X-tree. Once the
+ * bound and type of dataset is defined, the tree will construct itself. Call
+ * the constructor with the dataset to build the tree on, and the entire tree
+ * will be built.
+ *
+ * This tree does allow growth, so you can add and delete nodes
+ * from it.
+ *
+ * @tparam StatisticType Extra data contained in the node. See statistic.hpp
+ * for the necessary skeleton interface.
+ * @tparam MatType The dataset class.
+ */
+
+template<typename StatisticType = EmptyStatistic,
+ typename MatType = arma::mat>
+class RectangleTree
+{
+ private:
+ //! The max number of child nodes an non-leaf node can have.
+ size_t maxNumChildren;
+ //! The number of child nodes actually in use (0 if this is a leaf node).
+ size_t numOfChildren;
+ //! The child nodes (Starting at 0 and ending at (numOfChildren-1) ).
+ std::vector<RectangleTree*> children;
+ //! The parent node (NULL if this is the root of the tree).
+ RectangleTree* parent;
+ //! The index of the first point in the dataset contained in this node (and
+ //! its children).
+ size_t begin;
+ //! The number of points in the dataset contained in this node (and its
+ //! children).
+ size_t count;
+ //! The leaf size.
+ size_t leafSize;
+ //! The bound object for this node.
+ HRectBound bound;
+ //! Any extra data contained in the node.
+ StatisticType stat;
+ //! The distance from the centroid of this node to the centroid of the parent.
+ double parentDistance;
+ //! The discance to the furthest descendant, cached to speed things up.
+ double furthestDescendantDistance;
+ //! The dataset.
+ MatType& dataset;
+
+ public:
+ //! So other classes can use TreeType::Mat.
+ typedef MatType Mat;
+
+ //! A traverser for rectangle type trees; see
+ //! rectangle_tree_traverser.hpp for implementation.
+ template<typename RuleType>
+ class RectangleTreeTraverser;
+
+ /**
+ * Construct this as the root node of a rectangle type tree using the given
+ * dataset. This will modify the ordering of the points in the dataset!
+ *
+ * @param data Dataset from which to create the tree. This will be modified!
+ * @param leafSize Size of each leaf in the tree;
+ * @param maxNumChildren The maximum number of child nodes a non-leaf node may have.
+ */
+ RectangleTree(MatType& data, const size_t leafSize = 20, const size_t maxNumChildren = 5);
+
+ //TODO implement the oldFromNew stuff if applicable.
+
+ /**
+ * Deletes this node, deallocating the memory for the children and calling
+ * their destructors in turn. This will invalidate any younters or references
+ * to any nodes which are children of this one.
+ */
+ ~RectangleTree();
+
+ /**
+ * Find a node in this tree by its begin and count (const).
+ *
+ * Every node is uniquely identified by these two numbers.
+ * This is useful for communicating position over the network,
+ * when pointers would be invalid.
+ *
+ * @param begin The begin() of the node to find.
+ * @param count The count() of the node to find.
+ * @return The found node, or NULL if not found.
+ */
+ const RectangleTree* FindByBeginCount(size_t begin, size_t count) const;
+
+ /**
+ * Find a node in this tree by its begin and count.
+ *
+ * Every node is uniquely identified by these two numbers.
+ * This is useful for communicating position over the network,
+ * when pointers would be invalid.
+ *
+ * @param begin The begin() of the node to find.
+ * @param count The count() of the node to find.
+ * @return The found node, or NULL if not found.
+ */
+ RectangleTree* FindByBeginCount(size_t begin, size_t count);
+
+ //! Return the bound object for this node.
+ const HRectBound& Bound() const { return bound; }
+ //! Return the bound object for this node.
+ HRectBound& Bound() { return bound; }
+
+ //! Return the statistic object for this node.
+ const StatisticType& Stat() const { return stat; }
+ //! Return the statistic object for this node.
+ StatisticType& Stat() { return stat; }
+
+ //! Return whether or not this node is a leaf (true if it has no children).
+ bool IsLeaf() const;
+
+ //! Return the leaf size.
+ size_t LeafSize() const { return leafSize; }
+ //! Modify the leaf size.
+ size_t& LeafSize() { return leafSize; }
+
+ //! Gets the parent of this node.
+ RectangleTree* Parent() const { return parent; }
+ //! Modify the parent of this node.
+ RectangleTree*& Parent() { return parent; }
+
+ //! Get the dataset which the tree is built on.
+ const arma::mat& Dataset() const { return dataset; }
+ //! Modify the dataset which the tree is built on. Be careful!
+ arma::mat& Dataset() { return dataset; }
+
+ //! Get the metric which the tree uses.
+ typename BoundType::MetricType Metric() const { return bound.Metric(); }
+
+ //! Get the centroid of the node and store it in the given vector.
+ void Centroid(arma::vec& centroid) { bound.Centroid(centroid); }
+
+ //! Return the number of children in this node.
+ size_t NumChildren() const;
+
+ /**
+ * Return the furthest distance to a point held in this node. If this is not
+ * a leaf node, then the distance is 0 because the node holds no points.
+ */
+ double FurthestPointDistance() const;
+
+ /**
+ * Return the furthest possible descendant distance. This returns the maximum
+ * distance from the centroid to the edge of the bound and not the empirical
+ * quantity which is the actual furthest descendant distance. So the actual
+ * furthest descendant distance may be less than what this method returns (but
+ * it will never be greater than this).
+ */
+ double FurthestDescendantDistance() const;
+
+ //! Return the distance from the center of this node to the center of the
+ //! parent node.
+ double ParentDistance() const { return parentDistance; }
+ //! Modify the distance from the center of this node to the center of the
+ //! parent node.
+ double& ParentDistance() { return parentDistance; }
+
+ /**
+ * Return the specified child.
+ *
+ * @param child Index of child to return.
+ */
+ RectangleTree& Child(const size_t child) const;
+
+ //! Return the number of points in this node (0 if not a leaf).
+ size_t NumPoints() const;
+
+ /**
+ * Return the number of descendants of this node. For a non-leaf in a binary
+ * space tree, this is the number of points at the descendant leaves. For a
+ * leaf, this is the number of points in the leaf.
+ */
+ size_t NumDescendants() const;
+
+ /**
+ * Return the index (with reference to the dataset) of a particular descendant
+ * of this node. The index should be greater than zero but less than the
+ * number of descendants.
+ *
+ * @param index Index of the descendant.
+ */
+ size_t Descendant(const size_t index) const;
+
+ /**
+ * Return the index (with reference to the dataset) of a particular point in
+ * this node. This will happily return invalid indices if the given index is
+ * greater than the number of points in this node (obtained with NumPoints())
+ * -- be careful.
+ *
+ * @param index Index of point for which a dataset index is wanted.
+ */
+ size_t Point(const size_t index) const;
+
+ //! Return the minimum distance to another node.
+ double MinDistance(const RectangleTree* other) const
+ {
+ return bound.MinDistance(other->Bound());
+ }
+
+ //! Return the maximum distance to another node.
+ double MaxDistance(const RectangleTree* other) const
+ {
+ return bound.MaxDistance(other->Bound());
+ }
+
+ //! Return the minimum and maximum distance to another node.
+ math::Range RangeDistance(const RectangleTree* other) const
+ {
+ return bound.RangeDistance(other->Bound());
+ }
+
+
+ //! Return the minimum distance to another point.
+ template<typename VecType>
+ double MinDistance(const VecType& point,
+ typename boost::enable_if<IsVector<VecType> >::type* = 0)
+ const
+ {
+ return bound.MinDistance(point);
+ }
+
+ //! Return the maximum distance to another point.
+ template<typename VecType>
+ double MaxDistance(const VecType& point,
+ typename boost::enable_if<IsVector<VecType> >::type* = 0)
+ const
+ {
+ return bound.MaxDistance(point);
+ }
+
+ //! Return the minimum and maximum distance to another point.
+ template<typename VecType>
+ math::Range
+ RangeDistance(const VecType& point,
+ typename boost::enable_if<IsVector<VecType> >::type* = 0) const
+ {
+ return bound.RangeDistance(point);
+ }
+
+ /**
+ * Obtains the number of nodes in the tree, starting with this.
+ */
+ size_t TreeSize() const;
+
+ /**
+ * Obtains the number of levels below this node in the tree, starting with
+ * this.
+ */
+ size_t TreeDepth() const;
+
+ //! Return the index of the beginning point of this subset.
+ size_t Begin() const { return begin; }
+ //! Modify the index of the beginning point of this subset.
+ size_t& Begin() { return begin; }
+
+ /**
+ * Gets the index one beyond the last index in the subset.
+ */
+ size_t End() const;
+
+ //! Return the number of points in this subset.
+ size_t Count() const { return count; }
+ //! Modify the number of points in this subset.
+ size_t& Count() { return count; }
+
+ //! Returns false: this tree type does not have self children.
+ static bool HasSelfChildren() { return false; }
+
+ private:
+ /**
+ * Private copy constructor, available only to fill (pad) the tree to a
+ * specified level.
+ */
+ RectangleTree(const size_t begin,
+ const size_t count,
+ HRectBound bound,
+ StatisticType stat,
+ const int leafSize = 20) :
+ begin(begin),
+ count(count),
+ bound(bound),
+ stat(stat),
+ leafSize(leafSize) { }
+
+ RectangleTree* CopyMe()
+ {
+ return new RectangleTree(begin, count, bound, stat, leafSize);
+ }
+
+ /**
+ * Splits the current node, assigning its left and right children recursively.
+ *
+ * @param data Dataset which we are using.
+ */
+ void SplitNode(MatType& data);
+
+ /**
+ * Splits the current node, assigning its left and right children recursively.
+ * Also returns a list of the changed indices.
+ *
+ * @param data Dataset which we are using.
+ * @param oldFromNew Vector holding permuted indices.
+ */
+ void SplitNode(MatType& data, std::vector<size_t>& oldFromNew);
+
+ public:
+ /**
+ * Returns a string representation of this object.
+ */
+ std::string ToString() const;
+
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+// Include implementation.
+#include "rectangle_tree_impl.hpp"
+
+#endif
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
new file mode 100644
index 0000000..8dea9f3
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_impl.hpp
@@ -0,0 +1,21 @@
+/**
+ * @file rectangle_tree_impl.hpp
+ *
+ * Implementation of generalized rectangle tree.
+ */
+#ifndef __MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_IMPL_HPP
+#define __MLPACK_CORE_TREE_RECTANGLE_TREE_RECTANGLE_TREE_IMPL_HPP
+
+// In case it wasn't included already for sem reason.
+#include "rectangle_tree.hpp"
+
+namespace mlpack {
+namespace tree {
+
+template<typename StatisticType,
+ typename MatType>
+RectangleTree<StatisticType, MatType>::RectangleTree()
+
+
+}; //namespace tree
+}; //namespace mlpack
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser.hpp
@@ -0,0 +1 @@
+
diff --git a/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser_impl.hpp b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser_impl.hpp
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/rectangle_tree_traverser_impl.hpp
@@ -0,0 +1 @@
+
diff --git a/src/mlpack/core/tree/rectangle_tree/traits.hpp b/src/mlpack/core/tree/rectangle_tree/traits.hpp
new file mode 100644
index 0000000..5bef337
--- /dev/null
+++ b/src/mlpack/core/tree/rectangle_tree/traits.hpp
@@ -0,0 +1,56 @@
+/**
+ * @file traits.hpp
+ * @author Andrew Wells
+ *
+ * Specialization of the TreeTraits class for the RectangleTree type of tree.
+ */
+#ifndef __MLPACK_CORE_TREE_RECTANGLE_TREE_TRAITS_HPP
+#define __MLPACK_CORE_TREE_RECTANGLE_TREE_TRAITS_HPP
+
+#include <mlpack/core/tree/tree_traits.hpp>
+
+namespace mlpack {
+namespace tree {
+
+/**
+ * This is a specialization of the TreeType class to the RectangleTree tree
+ * type. It defines characteristics of the rectangle type trees, and is used to
+ * help write tree-independent (but still optimized) tree-based algorithms. See
+ * mlpack/core/tree/tree_traits.hpp for more information.
+ */
+template<typename StatisticType,
+ typename MatType>
+class TreeTraits<RectangleTree<StatisticType, MatType> >
+{
+ public:
+ /**
+ * The R-tree cannot easily calculate the distance from a node to
+ * its parent; so RectangleTree<...>::ParentDistance() does not exist.
+ */
+ static const bool HasParentDistance = false;
+
+ /**
+ * An R-tree can have overlapping children.
+ */
+ static const bool HasOverlappingChildren = true;
+
+ /**
+ * There is no guarantee that the first point in a node is its centroid.
+ */
+ static const bool FirstPointIsCentroid = false;
+
+ /**
+ * Points are not contained at multiple levels of the R-tree.
+ */
+ static const bool HasSelfChildren = false;
+
+ /**
+ * Points are rearranged during building of the tree.
+ */
+ static const bool RearrangesDataset = true;
+};
+
+}; // namespace tree
+}; // namespace mlpack
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list