[mlpack] 92/324: Combined CosineNode and CosineTree classes.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:21:59 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit d231b191e050db84186550051c9006fabaf8fbc9
Author: siddharth.950 <siddharth.950 at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Mon Jun 30 15:25:43 2014 +0000
Combined CosineNode and CosineTree classes.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16730 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/core/tree/CMakeLists.txt | 4 +-
src/mlpack/core/tree/cosine_tree/cosine_node.hpp | 178 ----------------
.../core/tree/cosine_tree/cosine_node_impl.hpp | 230 ---------------------
src/mlpack/core/tree/cosine_tree/cosine_tree.hpp | 160 +++++++++++++-
.../core/tree/cosine_tree/cosine_tree_impl.hpp | 225 +++++++++++++++++++-
src/mlpack/tests/cosine_tree_test.cpp | 20 +-
6 files changed, 382 insertions(+), 435 deletions(-)
diff --git a/src/mlpack/core/tree/CMakeLists.txt b/src/mlpack/core/tree/CMakeLists.txt
index 5003336..c526a70 100644
--- a/src/mlpack/core/tree/CMakeLists.txt
+++ b/src/mlpack/core/tree/CMakeLists.txt
@@ -13,10 +13,8 @@ set(SOURCES
binary_space_tree/single_tree_traverser_impl.hpp
binary_space_tree/traits.hpp
bounds.hpp
- cosine_tree/cosine_node.hpp
- cosine_tree/cosine_node_impl.hpp
- cosine_tree/cosine_tree_impl.hpp
cosine_tree/cosine_tree.hpp
+ cosine_tree/cosine_tree_impl.hpp
cover_tree/cover_tree.hpp
cover_tree/cover_tree_impl.hpp
cover_tree/first_point_is_root.hpp
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_node.hpp b/src/mlpack/core/tree/cosine_tree/cosine_node.hpp
deleted file mode 100644
index 2f69dbe..0000000
--- a/src/mlpack/core/tree/cosine_tree/cosine_node.hpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/**
- * @file cosine_node.hpp
- * @author Siddharth Agrawal
- *
- * Definition of Cosine Node.
- */
-
-#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_NODE_HPP
-#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_NODE_HPP
-
-#include <mlpack/core.hpp>
-
-namespace mlpack {
-namespace tree {
-
-class CosineNode
-{
- public:
-
- /**
- * CosineNode constructor for the root node of the tree. It initializes the
- * necessary variables required for splitting of the node, and building the
- * tree further. It takes a pointer to the input matrix and calculates the
- * relevant variables using it.
- *
- * @param dataset Matrix for which cosine tree is constructed.
- */
- CosineNode(const arma::mat& dataset);
-
- /**
- * CosineNode constructor for nodes other than the root node of the tree. It
- * takes in a pointer to the parent node and a list of column indices which
- * mentions the columns to be included in the node. The function calculate the
- * relevant variables just like the constructor above.
- *
- * @param parentNode Pointer to the parent CosineNode.
- * @param subIndices Pointer to vector of column indices to be included.
- */
- CosineNode(CosineNode& parentNode, const std::vector<size_t>& subIndices);
-
- /**
- * This function splits the CosineNode into two children based on the cosines
- * of the columns contained in the node, with respect to the sampled splitting
- * point. The function also calls the CosineNode constructor for the children.
- */
- void CosineNodeSplit();
-
- /**
- * Sample 'numSamples' points from the Length-Squared distribution of the
- * CosineNode. The function uses 'l2NormsSquared' to calculate the cumulative
- * probability distribution of the column vectors. The sampling is based on a
- * randomly generated values in the range [0, 1].
- */
- void ColumnSamplesLS(std::vector<size_t>& sampledIndices,
- arma::vec& probabilities, size_t numSamples);
-
- /**
- * Sample a point from the Length-Squared distribution of the CosineNode. The
- * function uses 'l2NormsSquared' to calculate the cumulative probability
- * distribution of the column vectors. The sampling is based on a randomly
- * generated value in the range [0, 1].
- */
- size_t ColumnSampleLS();
-
- /**
- * Sample a column based on the cumulative Length-Squared distribution of the
- * CosineNode, and a randomly generated value in the range [0, 1]. Binary
- * search is more efficient than searching linearly for the same. This leads
- * a significant speedup when there are large number of columns to choose from
- * and when a number of samples are to be drawn from the distribution.
- *
- * @param cDistribution Cumulative LS distibution of columns in the node.
- * @param value Randomly generated value in the range [0, 1].
- * @param start Starting index of the distribution interval to search in.
- * @param end Ending index of the distribution interval to search in.
- */
- size_t BinarySearch(arma::vec& cDistribution, double value, size_t start,
- size_t end);
-
- /**
- * Calculate cosines of the columns present in the node, with respect to the
- * sampled splitting point. The calculated cosine values are useful for
- * splitting the node into its children.
- *
- * @param cosines Vector to store the cosine values in.
- */
- void CalculateCosines(arma::vec& cosines);
-
- /**
- * Calculate centroid of the columns present in the node. The calculated
- * centroid is used as a basis vector for the cosine tree being constructed.
- */
- void CalculateCentroid();
-
- //! Get pointer to the dataset matrix.
- const arma::mat& GetDataset() const { return dataset; }
-
- //! Get the indices of columns in the node.
- std::vector<size_t>& VectorIndices() { return indices; }
-
- //! Set the Monte Carlo error.
- void L2Error(const double error) { this->l2Error = error; }
-
- //! Get the Monte Carlo error.
- double L2Error() const { return l2Error; }
-
- //! Get pointer to the centroid vector.
- arma::vec& Centroid() { return centroid; }
-
- //! Set the basis vector of the node.
- void BasisVector(arma::vec& bVector) { this->basisVector = bVector; }
-
- //! Get the basis vector of the node.
- arma::vec& BasisVector() { return basisVector; }
-
- //! Get pointer to the left child of the node.
- CosineNode* Left() { return left; }
-
- //! Get pointer to the right child of the node.
- CosineNode* Right() { return right; }
-
- //! Get number of columns of input matrix in the node.
- size_t NumColumns() const { return numColumns; }
-
- //! Get the Frobenius norm squared of columns in the node.
- double FrobNormSquared() const { return frobNormSquared; }
-
- //! Get the column index of split point of the node.
- size_t SplitPointIndex() const { return indices[splitPointIndex]; }
-
- private:
- //! Matrix for which cosine tree is constructed.
- const arma::mat& dataset;
- //! Parent of the node.
- CosineNode* parent;
- //! Right child of the node.
- CosineNode* right;
- //! Left child of the node.
- CosineNode* left;
- //! Indices of columns of input matrix in the node.
- std::vector<size_t> indices;
- //! L2-norm squared of columns in the node.
- arma::vec l2NormsSquared;
- //! Centroid of columns of input matrix in the node.
- arma::vec centroid;
- //! Orthonormalized basis vector of the node.
- arma::vec basisVector;
- //! Index of split point of cosine node.
- size_t splitPointIndex;
- //! Number of columns of input matrix in the node.
- size_t numColumns;
- //! Monte Carlo error for this node.
- double l2Error;
- //! Frobenius norm squared of columns in the node.
- double frobNormSquared;
-
- // Friend class to facilitate construction of priority queue.
- friend class CompareCosineNode;
-};
-
-class CompareCosineNode
-{
- public:
-
- // Comparison function for construction of priority queue.
- bool operator() (const CosineNode* a, const CosineNode* b) const
- {
- return a->l2Error < b->l2Error;
- }
-};
-
-}; // namespace tree
-}; // namespace mlpack
-
-// Include implementation.
-#include "cosine_node_impl.hpp"
-
-#endif
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_node_impl.hpp b/src/mlpack/core/tree/cosine_tree/cosine_node_impl.hpp
deleted file mode 100644
index fb92665..0000000
--- a/src/mlpack/core/tree/cosine_tree/cosine_node_impl.hpp
+++ /dev/null
@@ -1,230 +0,0 @@
-/**
- * @file cosine_node_impl.hpp
- * @author Siddharth Agrawal
- *
- * Implementation of cosine node.
- */
-#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_NODE_IMPL_HPP
-#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_NODE_IMPL_HPP
-
-// In case it wasn't included already for some reason.
-#include "cosine_node.hpp"
-
-namespace mlpack {
-namespace tree {
-
-CosineNode::CosineNode(const arma::mat& dataset) :
- dataset(dataset),
- parent(NULL),
- right(NULL),
- left(NULL),
- numColumns(dataset.n_cols)
-{
- // Initialize sizes of column indices and l2 norms.
- indices.resize(numColumns);
- l2NormsSquared.zeros(numColumns);
-
- // Set indices and calculate squared norms of the columns.
- for(size_t i = 0; i < numColumns; i++)
- {
- indices[i] = i;
- double l2Norm = arma::norm(dataset.col(i), 2);
- l2NormsSquared(i) = l2Norm * l2Norm;
- }
-
- // Frobenius norm of columns in the node.
- frobNormSquared = arma::accu(l2NormsSquared);
-
- // Calculate centroid of columns in the node.
- CalculateCentroid();
-
- splitPointIndex = ColumnSampleLS();
-}
-
-CosineNode::CosineNode(CosineNode& parentNode,
- const std::vector<size_t>& subIndices) :
- dataset(parentNode.GetDataset()),
- parent(&parentNode),
- right(NULL),
- left(NULL),
- numColumns(subIndices.size())
-{
- // Initialize sizes of column indices and l2 norms.
- indices.resize(numColumns);
- l2NormsSquared.zeros(numColumns);
-
- // Set indices and squared norms of the columns.
- for(size_t i = 0; i < numColumns; i++)
- {
- indices[i] = parentNode.indices[subIndices[i]];
- l2NormsSquared(i) = parentNode.l2NormsSquared(subIndices[i]);
- }
-
- // Frobenius norm of columns in the node.
- frobNormSquared = arma::accu(l2NormsSquared);
-
- // Calculate centroid of columns in the node.
- CalculateCentroid();
-
- splitPointIndex = ColumnSampleLS();
-}
-
-void CosineNode::CosineNodeSplit()
-{
- //! If less than two nodes, splitting does not make sense.
- if(numColumns < 3) return;
-
- //! Calculate cosines with respect to the splitting point.
- arma::vec cosines;
- CalculateCosines(cosines);
-
- //! Compute maximum and minimum cosine values.
- double cosineMax, cosineMin;
- cosineMax = arma::max(cosines % (cosines < 1));
- cosineMin = arma::min(cosines);
-
- std::vector<size_t> leftIndices, rightIndices;
-
- // Split columns into left and right children. The splitting condition for the
- // column to be in the left child is as follows:
- // cos_max - cos(i) <= cos(i) - cos_min
- for(size_t i = 0; i < numColumns; i++)
- {
- if(cosineMax - cosines(i) <= cosines(i) - cosineMin)
- {
- leftIndices.push_back(i);
- }
- else
- {
- rightIndices.push_back(i);
- }
- }
-
- // Split the node into left and right children.
- left = new CosineNode(*this, leftIndices);
- right = new CosineNode(*this, rightIndices);
-}
-
-void CosineNode::ColumnSamplesLS(std::vector<size_t>& sampledIndices,
- arma::vec& probabilities,
- size_t numSamples)
-{
- // Initialize the cumulative distribution vector size.
- arma::vec cDistribution;
- cDistribution.zeros(numColumns + 1);
-
- // Calculate cumulative length-squared distribution for the node.
- for(size_t i = 0; i < numColumns; i++)
- {
- cDistribution(i+1) = cDistribution(i) + l2NormsSquared(i) / frobNormSquared;
- }
-
- // Intialize sizes of the 'sampledIndices' and 'probabilities' vectors.
- sampledIndices.resize(numSamples);
- probabilities.zeros(numSamples);
-
- for(size_t i = 0; i < numSamples; i++)
- {
- // Generate a random value for sampling.
- double randValue = arma::randu();
- size_t start = 0, end = numColumns, searchIndex;
-
- // Sample from the distribution and store corresponding probability.
- searchIndex = BinarySearch(cDistribution, randValue, start, end);
- sampledIndices[i] = indices[searchIndex];
- probabilities(i) = l2NormsSquared(searchIndex) / frobNormSquared;
- }
-}
-
-size_t CosineNode::ColumnSampleLS()
-{
- // If only one element is present, there can only be one sample.
- if(numColumns < 2)
- {
- return 0;
- }
-
- // Initialize the cumulative distribution vector size.
- arma::vec cDistribution;
- cDistribution.zeros(numColumns + 1);
-
- // Calculate cumulative length-squared distribution for the node.
- for(size_t i = 0; i < numColumns; i++)
- {
- cDistribution(i+1) = cDistribution(i) + l2NormsSquared(i) / frobNormSquared;
- }
-
- // Generate a random value for sampling.
- double randValue = arma::randu();
- size_t start = 0, end = numColumns;
-
- // Sample from the distribution.
- return BinarySearch(cDistribution, randValue, start, end);
-}
-
-size_t CosineNode::BinarySearch(arma::vec& cDistribution,
- double value,
- size_t start,
- size_t end)
-{
- size_t pivot = (start + end) / 2;
-
- // If pivot is zero, first point is the sampled point.
- if(!pivot)
- {
- return pivot;
- }
-
- // Binary search recursive algorithm.
- if(value > cDistribution(pivot - 1) && value <= cDistribution(pivot))
- {
- return (pivot - 1);
- }
- else if(value < cDistribution(pivot - 1))
- {
- return BinarySearch(cDistribution, value, start, pivot - 1);
- }
- else
- {
- return BinarySearch(cDistribution, value, pivot + 1, end);
- }
-}
-
-void CosineNode::CalculateCosines(arma::vec& cosines)
-{
- // Initialize cosine vector as a vector of zeros.
- cosines.zeros(numColumns);
-
- for(size_t i = 0; i < numColumns; i++)
- {
- // If norm is zero, store cosine value as zero. Else, calculate cosine value
- // between two vectors.
- if(l2NormsSquared(i) == 0)
- {
- cosines(i) = 0;
- }
- else
- {
- cosines(i) = arma::norm_dot(dataset.col(indices[splitPointIndex]),
- dataset.col(indices[i]));
- }
- }
-}
-
-void CosineNode::CalculateCentroid()
-{
- // Initialize centroid as vector of zeros.
- centroid.zeros(dataset.n_rows);
-
- // Calculate centroid of columns in the node.
- for(size_t i = 0; i < numColumns; i++)
- {
- centroid += dataset.col(indices[i]);
- }
- centroid /= numColumns;
-}
-
-}; // namespace tree
-}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
index e073381..4a411d9 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
@@ -11,18 +11,41 @@
#include <mlpack/core.hpp>
#include <boost/heap/priority_queue.hpp>
-#include "cosine_node.hpp"
-
namespace mlpack {
namespace tree {
+// Predeclare classes for CosineNodeQueue typedef.
+class CompareCosineNode;
+class CosineTree;
+
+// CosineNodeQueue typedef.
+typedef boost::heap::priority_queue<CosineTree*,
+ boost::heap::compare<CompareCosineNode> > CosineNodeQueue;
+
class CosineTree
{
public:
-
- // Type definition for CosineNode priority queue.
- typedef boost::heap::priority_queue<CosineNode*,
- boost::heap::compare<CompareCosineNode> > CosineNodeQueue;
+
+ /**
+ * CosineTree constructor for the root node of the tree. It initializes the
+ * necessary variables required for splitting of the node, and building the
+ * tree further. It takes a pointer to the input matrix and calculates the
+ * relevant variables using it.
+ *
+ * @param dataset Matrix for which cosine tree is constructed.
+ */
+ CosineTree(const arma::mat& dataset);
+
+ /**
+ * CosineTree constructor for nodes other than the root node of the tree. It
+ * takes in a pointer to the parent node and a list of column indices which
+ * mentions the columns to be included in the node. The function calculate the
+ * relevant variables just like the constructor above.
+ *
+ * @param parentNode Pointer to the parent cosine node.
+ * @param subIndices Pointer to vector of column indices to be included.
+ */
+ CosineTree(CosineTree& parentNode, const std::vector<size_t>& subIndices);
/**
* Construct the CosineTree and the basis for the given matrix, and passed
@@ -68,7 +91,7 @@ class CosineTree
* @param addBasisVector1 Address to first additional basis vector.
* @param addBasisVector2 Address to second additional basis vector.
*/
- double MonteCarloError(CosineNode* node,
+ double MonteCarloError(CosineTree* node,
CosineNodeQueue& treeQueue,
arma::vec* addBasisVector1 = NULL,
arma::vec* addBasisVector2 = NULL);
@@ -79,10 +102,100 @@ class CosineTree
* @param treeQueue Priority queue of cosine nodes.
*/
void ConstructBasis(CosineNodeQueue& treeQueue);
+
+ /**
+ * This function splits the cosine node into two children based on the cosines
+ * of the columns contained in the node, with respect to the sampled splitting
+ * point. The function also calls the CosineTree constructor for the children.
+ */
+ void CosineNodeSplit();
+
+ /**
+ * Sample 'numSamples' points from the Length-Squared distribution of the
+ * cosine node. The function uses 'l2NormsSquared' to calculate the cumulative
+ * probability distribution of the column vectors. The sampling is based on a
+ * randomly generated values in the range [0, 1].
+ */
+ void ColumnSamplesLS(std::vector<size_t>& sampledIndices,
+ arma::vec& probabilities, size_t numSamples);
+
+ /**
+ * Sample a point from the Length-Squared distribution of the cosine node. The
+ * function uses 'l2NormsSquared' to calculate the cumulative probability
+ * distribution of the column vectors. The sampling is based on a randomly
+ * generated value in the range [0, 1].
+ */
+ size_t ColumnSampleLS();
+
+ /**
+ * Sample a column based on the cumulative Length-Squared distribution of the
+ * cosine node, and a randomly generated value in the range [0, 1]. Binary
+ * search is more efficient than searching linearly for the same. This leads
+ * a significant speedup when there are large number of columns to choose from
+ * and when a number of samples are to be drawn from the distribution.
+ *
+ * @param cDistribution Cumulative LS distibution of columns in the node.
+ * @param value Randomly generated value in the range [0, 1].
+ * @param start Starting index of the distribution interval to search in.
+ * @param end Ending index of the distribution interval to search in.
+ */
+ size_t BinarySearch(arma::vec& cDistribution, double value, size_t start,
+ size_t end);
+
+ /**
+ * Calculate cosines of the columns present in the node, with respect to the
+ * sampled splitting point. The calculated cosine values are useful for
+ * splitting the node into its children.
+ *
+ * @param cosines Vector to store the cosine values in.
+ */
+ void CalculateCosines(arma::vec& cosines);
+
+ /**
+ * Calculate centroid of the columns present in the node. The calculated
+ * centroid is used as a basis vector for the cosine tree being constructed.
+ */
+ void CalculateCentroid();
//! Returns the basis of the constructed subspace.
void GetFinalBasis(arma::mat& finalBasis) { finalBasis = basis; }
+ //! Get pointer to the dataset matrix.
+ const arma::mat& GetDataset() const { return dataset; }
+
+ //! Get the indices of columns in the node.
+ std::vector<size_t>& VectorIndices() { return indices; }
+
+ //! Set the Monte Carlo error.
+ void L2Error(const double error) { this->l2Error = error; }
+
+ //! Get the Monte Carlo error.
+ double L2Error() const { return l2Error; }
+
+ //! Get pointer to the centroid vector.
+ arma::vec& Centroid() { return centroid; }
+
+ //! Set the basis vector of the node.
+ void BasisVector(arma::vec& bVector) { this->basisVector = bVector; }
+
+ //! Get the basis vector of the node.
+ arma::vec& BasisVector() { return basisVector; }
+
+ //! Get pointer to the left child of the node.
+ CosineTree* Left() { return left; }
+
+ //! Get pointer to the right child of the node.
+ CosineTree* Right() { return right; }
+
+ //! Get number of columns of input matrix in the node.
+ size_t NumColumns() const { return numColumns; }
+
+ //! Get the Frobenius norm squared of columns in the node.
+ double FrobNormSquared() const { return frobNormSquared; }
+
+ //! Get the column index of split point of the node.
+ size_t SplitPointIndex() const { return indices[splitPointIndex]; }
+
private:
//! Matrix for which cosine tree is constructed.
const arma::mat& dataset;
@@ -92,6 +205,39 @@ class CosineTree
double delta;
//! Subspace basis of the input dataset.
arma::mat basis;
+ //! Parent of the node.
+ CosineTree* parent;
+ //! Right child of the node.
+ CosineTree* right;
+ //! Left child of the node.
+ CosineTree* left;
+ //! Indices of columns of input matrix in the node.
+ std::vector<size_t> indices;
+ //! L2-norm squared of columns in the node.
+ arma::vec l2NormsSquared;
+ //! Centroid of columns of input matrix in the node.
+ arma::vec centroid;
+ //! Orthonormalized basis vector of the node.
+ arma::vec basisVector;
+ //! Index of split point of cosine node.
+ size_t splitPointIndex;
+ //! Number of columns of input matrix in the node.
+ size_t numColumns;
+ //! Monte Carlo error for this node.
+ double l2Error;
+ //! Frobenius norm squared of columns in the node.
+ double frobNormSquared;
+};
+
+class CompareCosineNode
+{
+ public:
+
+ // Comparison function for construction of priority queue.
+ bool operator() (const CosineTree* a, const CosineTree* b) const
+ {
+ return a->L2Error() < b->L2Error();
+ }
};
}; // namespace tree
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp b/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
index 263d0b1..46167d5 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
@@ -15,6 +15,62 @@
namespace mlpack {
namespace tree {
+CosineTree::CosineTree(const arma::mat& dataset) :
+ dataset(dataset),
+ parent(NULL),
+ right(NULL),
+ left(NULL),
+ numColumns(dataset.n_cols)
+{
+ // Initialize sizes of column indices and l2 norms.
+ indices.resize(numColumns);
+ l2NormsSquared.zeros(numColumns);
+
+ // Set indices and calculate squared norms of the columns.
+ for(size_t i = 0; i < numColumns; i++)
+ {
+ indices[i] = i;
+ double l2Norm = arma::norm(dataset.col(i), 2);
+ l2NormsSquared(i) = l2Norm * l2Norm;
+ }
+
+ // Frobenius norm of columns in the node.
+ frobNormSquared = arma::accu(l2NormsSquared);
+
+ // Calculate centroid of columns in the node.
+ CalculateCentroid();
+
+ splitPointIndex = ColumnSampleLS();
+}
+
+CosineTree::CosineTree(CosineTree& parentNode,
+ const std::vector<size_t>& subIndices) :
+ dataset(parentNode.GetDataset()),
+ parent(&parentNode),
+ right(NULL),
+ left(NULL),
+ numColumns(subIndices.size())
+{
+ // Initialize sizes of column indices and l2 norms.
+ indices.resize(numColumns);
+ l2NormsSquared.zeros(numColumns);
+
+ // Set indices and squared norms of the columns.
+ for(size_t i = 0; i < numColumns; i++)
+ {
+ indices[i] = parentNode.indices[subIndices[i]];
+ l2NormsSquared(i) = parentNode.l2NormsSquared(subIndices[i]);
+ }
+
+ // Frobenius norm of columns in the node.
+ frobNormSquared = arma::accu(l2NormsSquared);
+
+ // Calculate centroid of columns in the node.
+ CalculateCentroid();
+
+ splitPointIndex = ColumnSampleLS();
+}
+
CosineTree::CosineTree(const arma::mat& dataset,
const double epsilon,
const double delta) :
@@ -26,7 +82,7 @@ CosineTree::CosineTree(const arma::mat& dataset,
CosineNodeQueue treeQueue;
// Define root node of the tree and add it to the queue.
- CosineNode root(dataset);
+ CosineTree root(dataset);
arma::vec tempVector = arma::zeros(dataset.n_rows);
root.L2Error(0);
root.BasisVector(tempVector);
@@ -38,7 +94,7 @@ CosineTree::CosineTree(const arma::mat& dataset,
while(monteCarloError > epsilon * root.FrobNormSquared())
{
// Pop node from queue with highest projection error.
- CosineNode* currentNode;
+ CosineTree* currentNode;
currentNode = treeQueue.top();
treeQueue.pop();
@@ -46,7 +102,7 @@ CosineTree::CosineTree(const arma::mat& dataset,
currentNode->CosineNodeSplit();
// Obtain pointers to the left and right children of the current node.
- CosineNode *currentLeft, *currentRight;
+ CosineTree *currentLeft, *currentRight;
currentLeft = currentNode->Left();
currentRight = currentNode->Right();
@@ -88,7 +144,7 @@ void CosineTree::ModifiedGramSchmidt(CosineNodeQueue& treeQueue,
newBasisVector = centroid;
// Variables for iterating throught the priority queue.
- CosineNode *currentNode;
+ CosineTree *currentNode;
CosineNodeQueue::const_iterator i = treeQueue.begin();
// For every vector in the current basis, remove its projection from the
@@ -113,7 +169,7 @@ void CosineTree::ModifiedGramSchmidt(CosineNodeQueue& treeQueue,
newBasisVector /= arma::norm(newBasisVector, 2);
}
-double CosineTree::MonteCarloError(CosineNode* node,
+double CosineTree::MonteCarloError(CosineTree* node,
CosineNodeQueue& treeQueue,
arma::vec* addBasisVector1,
arma::vec* addBasisVector2)
@@ -148,7 +204,7 @@ double CosineTree::MonteCarloError(CosineNode* node,
arma::vec projection;
projection.zeros(projectionSize);
- CosineNode *currentNode;
+ CosineTree *currentNode;
CosineNodeQueue::const_iterator j = treeQueue.begin();
size_t k = 0;
@@ -204,7 +260,7 @@ void CosineTree::ConstructBasis(CosineNodeQueue& treeQueue)
basis.zeros(dataset.n_rows, treeQueue.size());
// Variables for iterating through the priority queue.
- CosineNode *currentNode;
+ CosineTree *currentNode;
CosineNodeQueue::const_iterator i = treeQueue.begin();
// Transfer basis vectors from the queue to the basis matrix.
@@ -216,6 +272,161 @@ void CosineTree::ConstructBasis(CosineNodeQueue& treeQueue)
}
}
+void CosineTree::CosineNodeSplit()
+{
+ //! If less than two nodes, splitting does not make sense.
+ if(numColumns < 3) return;
+
+ //! Calculate cosines with respect to the splitting point.
+ arma::vec cosines;
+ CalculateCosines(cosines);
+
+ //! Compute maximum and minimum cosine values.
+ double cosineMax, cosineMin;
+ cosineMax = arma::max(cosines % (cosines < 1));
+ cosineMin = arma::min(cosines);
+
+ std::vector<size_t> leftIndices, rightIndices;
+
+ // Split columns into left and right children. The splitting condition for the
+ // column to be in the left child is as follows:
+ // cos_max - cos(i) <= cos(i) - cos_min
+ for(size_t i = 0; i < numColumns; i++)
+ {
+ if(cosineMax - cosines(i) <= cosines(i) - cosineMin)
+ {
+ leftIndices.push_back(i);
+ }
+ else
+ {
+ rightIndices.push_back(i);
+ }
+ }
+
+ // Split the node into left and right children.
+ left = new CosineTree(*this, leftIndices);
+ right = new CosineTree(*this, rightIndices);
+}
+
+void CosineTree::ColumnSamplesLS(std::vector<size_t>& sampledIndices,
+ arma::vec& probabilities,
+ size_t numSamples)
+{
+ // Initialize the cumulative distribution vector size.
+ arma::vec cDistribution;
+ cDistribution.zeros(numColumns + 1);
+
+ // Calculate cumulative length-squared distribution for the node.
+ for(size_t i = 0; i < numColumns; i++)
+ {
+ cDistribution(i+1) = cDistribution(i) + l2NormsSquared(i) / frobNormSquared;
+ }
+
+ // Intialize sizes of the 'sampledIndices' and 'probabilities' vectors.
+ sampledIndices.resize(numSamples);
+ probabilities.zeros(numSamples);
+
+ for(size_t i = 0; i < numSamples; i++)
+ {
+ // Generate a random value for sampling.
+ double randValue = arma::randu();
+ size_t start = 0, end = numColumns, searchIndex;
+
+ // Sample from the distribution and store corresponding probability.
+ searchIndex = BinarySearch(cDistribution, randValue, start, end);
+ sampledIndices[i] = indices[searchIndex];
+ probabilities(i) = l2NormsSquared(searchIndex) / frobNormSquared;
+ }
+}
+
+size_t CosineTree::ColumnSampleLS()
+{
+ // If only one element is present, there can only be one sample.
+ if(numColumns < 2)
+ {
+ return 0;
+ }
+
+ // Initialize the cumulative distribution vector size.
+ arma::vec cDistribution;
+ cDistribution.zeros(numColumns + 1);
+
+ // Calculate cumulative length-squared distribution for the node.
+ for(size_t i = 0; i < numColumns; i++)
+ {
+ cDistribution(i+1) = cDistribution(i) + l2NormsSquared(i) / frobNormSquared;
+ }
+
+ // Generate a random value for sampling.
+ double randValue = arma::randu();
+ size_t start = 0, end = numColumns;
+
+ // Sample from the distribution.
+ return BinarySearch(cDistribution, randValue, start, end);
+}
+
+size_t CosineTree::BinarySearch(arma::vec& cDistribution,
+ double value,
+ size_t start,
+ size_t end)
+{
+ size_t pivot = (start + end) / 2;
+
+ // If pivot is zero, first point is the sampled point.
+ if(!pivot)
+ {
+ return pivot;
+ }
+
+ // Binary search recursive algorithm.
+ if(value > cDistribution(pivot - 1) && value <= cDistribution(pivot))
+ {
+ return (pivot - 1);
+ }
+ else if(value < cDistribution(pivot - 1))
+ {
+ return BinarySearch(cDistribution, value, start, pivot - 1);
+ }
+ else
+ {
+ return BinarySearch(cDistribution, value, pivot + 1, end);
+ }
+}
+
+void CosineTree::CalculateCosines(arma::vec& cosines)
+{
+ // Initialize cosine vector as a vector of zeros.
+ cosines.zeros(numColumns);
+
+ for(size_t i = 0; i < numColumns; i++)
+ {
+ // If norm is zero, store cosine value as zero. Else, calculate cosine value
+ // between two vectors.
+ if(l2NormsSquared(i) == 0)
+ {
+ cosines(i) = 0;
+ }
+ else
+ {
+ cosines(i) = arma::norm_dot(dataset.col(indices[splitPointIndex]),
+ dataset.col(indices[i]));
+ }
+ }
+}
+
+void CosineTree::CalculateCentroid()
+{
+ // Initialize centroid as vector of zeros.
+ centroid.zeros(dataset.n_rows);
+
+ // Calculate centroid of columns in the node.
+ for(size_t i = 0; i < numColumns; i++)
+ {
+ centroid += dataset.col(indices[i]);
+ }
+ centroid /= numColumns;
+}
+
}; // namespace tree
}; // namespace mlpack
diff --git a/src/mlpack/tests/cosine_tree_test.cpp b/src/mlpack/tests/cosine_tree_test.cpp
index d9c2f37..10447ba 100644
--- a/src/mlpack/tests/cosine_tree_test.cpp
+++ b/src/mlpack/tests/cosine_tree_test.cpp
@@ -43,7 +43,7 @@ BOOST_AUTO_TEST_CASE(CosineTreeNoSplit)
}
/**
- * Checks CosineNode::CosineNodeSplit() by doing a depth first search on a
+ * Checks CosineTree::CosineNodeSplit() by doing a depth first search on a
* random dataset and checking if it satisfies the split condition.
*/
BOOST_AUTO_TEST_CASE(CosineNodeCosineSplit)
@@ -54,17 +54,17 @@ BOOST_AUTO_TEST_CASE(CosineNodeCosineSplit)
// Make a random dataset and the root object.
arma::mat data = arma::randu(numRows, numCols);
- CosineNode root(data);
+ CosineTree root(data);
// Stack for depth first search of the tree.
- std::vector<CosineNode*> nodeStack;
+ std::vector<CosineTree*> nodeStack;
nodeStack.push_back(&root);
// While stack is not empty.
while(nodeStack.size())
{
// Pop a node from the stack and split it.
- CosineNode *currentNode, *currentLeft, *currentRight;
+ CosineTree *currentNode, *currentLeft, *currentRight;
currentNode = nodeStack.back();
currentNode->CosineNodeSplit();
nodeStack.pop_back();
@@ -139,14 +139,14 @@ BOOST_AUTO_TEST_CASE(CosineTreeModifiedGramSchmidt)
arma::mat data = arma::randu(numRows, numCols);
// Declare a queue and a dummy CosineTree object.
- CosineTree::CosineNodeQueue basisQueue;
+ CosineNodeQueue basisQueue;
CosineTree dummyTree(data, epsilon, delta);
for(size_t i = 0; i < numCols; i++)
{
// Make a new CosineNode object.
- CosineNode* basisNode;
- basisNode = new CosineNode(data);
+ CosineTree* basisNode;
+ basisNode = new CosineTree(data);
// Use the columns of the dataset as random centroids.
arma::vec centroid = data.col(i);
@@ -156,8 +156,8 @@ BOOST_AUTO_TEST_CASE(CosineTreeModifiedGramSchmidt)
dummyTree.ModifiedGramSchmidt(basisQueue, centroid, newBasisVector);
// Check if the obtained vector is orthonormal to the basis vectors.
- CosineTree::CosineNodeQueue::const_iterator j = basisQueue.begin();
- CosineNode* currentNode;
+ CosineNodeQueue::const_iterator j = basisQueue.begin();
+ CosineTree* currentNode;
for(; j != basisQueue.end(); j++)
{
@@ -175,7 +175,7 @@ BOOST_AUTO_TEST_CASE(CosineTreeModifiedGramSchmidt)
// Deallocate memory given to the objects.
for(size_t i = 0; i < numCols; i++)
{
- CosineNode* currentNode;
+ CosineTree* currentNode;
currentNode = basisQueue.top();
basisQueue.pop();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list