[mlpack] 39/44: Fix potential memory leak, and document known CosineTree bug.
Barak A. Pearlmutter
barak+git at pearlmutter.net
Mon Feb 15 19:35:55 UTC 2016
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to tag mlpack-1.0.11
in repository mlpack.
commit a9f22b58be3a82ee339040eb0251e1d7f92284be
Author: Ryan Curtin <ryan at ratml.org>
Date: Wed Dec 10 15:31:30 2014 +0000
Fix potential memory leak, and document known CosineTree bug.
---
HISTORY.txt | 3 +
src/mlpack/core/tree/cosine_tree/cosine_tree.cpp | 133 ++++++++++++-----------
src/mlpack/core/tree/cosine_tree/cosine_tree.hpp | 74 +++++++------
3 files changed, 115 insertions(+), 95 deletions(-)
diff --git a/HISTORY.txt b/HISTORY.txt
index a906829..4b1d853 100644
--- a/HISTORY.txt
+++ b/HISTORY.txt
@@ -21,6 +21,9 @@
* Handle Newton method convergence better for
SparseCoding::OptimizeDictionary() and make maximum iterations a parameter.
+ * Known bug: CosineTree construction may fail in some cases on i386 systems.
+ (#376)
+
2014-08-29 mlpack 1.0.10
* Bugfix for NeighborSearch regression which caused very slow allknn/allkfn.
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
index 9ac498a..3752017 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
@@ -29,14 +29,14 @@ namespace tree {
CosineTree::CosineTree(const arma::mat& dataset) :
dataset(dataset),
parent(NULL),
- right(NULL),
left(NULL),
+ right(NULL),
numColumns(dataset.n_cols)
-{
+{
// Initialize sizes of column indices and l2 norms.
indices.resize(numColumns);
l2NormsSquared.zeros(numColumns);
-
+
// Set indices and calculate squared norms of the columns.
for(size_t i = 0; i < numColumns; i++)
{
@@ -44,13 +44,13 @@ CosineTree::CosineTree(const arma::mat& dataset) :
double l2Norm = arma::norm(dataset.col(i), 2);
l2NormsSquared(i) = l2Norm * l2Norm;
}
-
+
// Frobenius norm of columns in the node.
frobNormSquared = arma::accu(l2NormsSquared);
-
+
// Calculate centroid of columns in the node.
CalculateCentroid();
-
+
splitPointIndex = ColumnSampleLS();
}
@@ -58,27 +58,27 @@ CosineTree::CosineTree(CosineTree& parentNode,
const std::vector<size_t>& subIndices) :
dataset(parentNode.GetDataset()),
parent(&parentNode),
- right(NULL),
left(NULL),
+ right(NULL),
numColumns(subIndices.size())
{
// Initialize sizes of column indices and l2 norms.
indices.resize(numColumns);
l2NormsSquared.zeros(numColumns);
-
+
// Set indices and squared norms of the columns.
for(size_t i = 0; i < numColumns; i++)
{
indices[i] = parentNode.indices[subIndices[i]];
l2NormsSquared(i) = parentNode.l2NormsSquared(subIndices[i]);
}
-
+
// Frobenius norm of columns in the node.
frobNormSquared = arma::accu(l2NormsSquared);
-
+
// Calculate centroid of columns in the node.
CalculateCentroid();
-
+
splitPointIndex = ColumnSampleLS();
}
@@ -87,63 +87,74 @@ CosineTree::CosineTree(const arma::mat& dataset,
const double delta) :
dataset(dataset),
epsilon(epsilon),
- delta(delta)
+ delta(delta),
+ left(NULL),
+ right(NULL)
{
// Declare the cosine tree priority queue.
CosineNodeQueue treeQueue;
-
+
// Define root node of the tree and add it to the queue.
CosineTree root(dataset);
arma::vec tempVector = arma::zeros(dataset.n_rows);
root.L2Error(0);
root.BasisVector(tempVector);
treeQueue.push(&root);
-
+
// Initialize Monte Carlo error estimate for comparison.
double monteCarloError = root.FrobNormSquared();
-
- while(monteCarloError > epsilon * root.FrobNormSquared())
+
+ while (monteCarloError > epsilon * root.FrobNormSquared())
{
// Pop node from queue with highest projection error.
CosineTree* currentNode;
currentNode = treeQueue.top();
treeQueue.pop();
-
+
// Split the node into left and right children.
currentNode->CosineNodeSplit();
-
+
// Obtain pointers to the left and right children of the current node.
CosineTree *currentLeft, *currentRight;
currentLeft = currentNode->Left();
currentRight = currentNode->Right();
-
+
// Calculate basis vectors of left and right children.
arma::vec lBasisVector, rBasisVector;
-
+
ModifiedGramSchmidt(treeQueue, currentLeft->Centroid(), lBasisVector);
ModifiedGramSchmidt(treeQueue, currentRight->Centroid(), rBasisVector,
&lBasisVector);
-
+
// Add basis vectors to their respective nodes.
currentLeft->BasisVector(lBasisVector);
currentRight->BasisVector(rBasisVector);
-
+
// Calculate Monte Carlo error estimates for child nodes.
MonteCarloError(currentLeft, treeQueue, &lBasisVector, &rBasisVector);
MonteCarloError(currentRight, treeQueue, &lBasisVector, &rBasisVector);
-
+
// Push child nodes into the priority queue.
treeQueue.push(currentLeft);
treeQueue.push(currentRight);
-
+
// Calculate Monte Carlo error estimate for the root node.
monteCarloError = MonteCarloError(&root, treeQueue);
}
-
+
// Construct the subspace basis from the current priority queue.
ConstructBasis(treeQueue);
}
+CosineTree::~CosineTree()
+{
+ // Clean the memory.
+ if (left)
+ delete left;
+ if (right)
+ delete right;
+}
+
void CosineTree::ModifiedGramSchmidt(CosineNodeQueue& treeQueue,
arma::vec& centroid,
arma::vec& newBasisVector,
@@ -161,18 +172,18 @@ void CosineTree::ModifiedGramSchmidt(CosineNodeQueue& treeQueue,
for(; i != treeQueue.end(); i++)
{
currentNode = *i;
-
+
double projection = arma::dot(currentNode->BasisVector(), centroid);
newBasisVector -= projection * currentNode->BasisVector();
}
-
+
// If additional basis vector is passed, take it into account.
if(addBasisVector)
{
double projection = arma::dot(*addBasisVector, centroid);
newBasisVector -= *addBasisVector * projection;
}
-
+
// Normalize the modified centroid vector.
if(arma::norm(newBasisVector, 2))
newBasisVector /= arma::norm(newBasisVector, 2);
@@ -185,19 +196,19 @@ double CosineTree::MonteCarloError(CosineTree* node,
{
std::vector<size_t> sampledIndices;
arma::vec probabilities;
-
+
// Sample O(log m) points from the input node's distribution.
// 'm' is the number of columns present in the node.
- size_t numSamples = log(node->NumColumns()) + 1;
+ size_t numSamples = log(node->NumColumns()) + 1;
node->ColumnSamplesLS(sampledIndices, probabilities, numSamples);
-
+
// Get pointer to the original dataset.
arma::mat dataset = node->GetDataset();
-
+
// Initialize weighted projection magnitudes as zeros.
arma::vec weightedMagnitudes;
weightedMagnitudes.zeros(numSamples);
-
+
// Set size of projection vector, depending on whether additional basis
// vectors are passed.
size_t projectionSize;
@@ -205,7 +216,7 @@ double CosineTree::MonteCarloError(CosineTree* node,
projectionSize = treeQueue.size() + 2;
else
projectionSize = treeQueue.size();
-
+
// For each sample, calculate the weighted projection onto the current basis.
for(size_t i = 0; i < numSamples; i++)
{
@@ -215,13 +226,13 @@ double CosineTree::MonteCarloError(CosineTree* node,
CosineTree *currentNode;
CosineNodeQueue::const_iterator j = treeQueue.begin();
-
+
size_t k = 0;
// Compute the projection of the sampled vector onto the existing subspace.
for(; j != treeQueue.end(); j++, k++)
{
currentNode = *j;
-
+
projection(k) = arma::dot(dataset.col(sampledIndices[i]),
currentNode->BasisVector());
}
@@ -233,33 +244,33 @@ double CosineTree::MonteCarloError(CosineTree* node,
projection(k) = arma::dot(dataset.col(sampledIndices[i]),
*addBasisVector2);
}
-
+
// Calculate the Frobenius norm squared of the projected vector.
double frobProjection = arma::norm(projection, "frob");
double frobProjectionSquared = frobProjection * frobProjection;
-
+
// Calculate the weighted projection magnitude.
weightedMagnitudes(i) = frobProjectionSquared / probabilities(i);
}
-
+
// Compute mean and standard deviation of the weighted samples.
double mu = arma::mean(weightedMagnitudes);
double sigma = arma::stddev(weightedMagnitudes);
-
+
if(!sigma)
{
node->L2Error(node->FrobNormSquared() - mu);
return (node->FrobNormSquared() - mu);
}
-
+
// Fit a normal distribution using the calculated statistics, and calculate a
// lower bound on the magnitudes for the passed 'delta' parameter.
boost::math::normal dist(mu, sigma);
double lowerBound = boost::math::quantile(dist, delta);
-
+
// Upper bound on the subspace reconstruction error.
node->L2Error(node->FrobNormSquared() - lowerBound);
-
+
return (node->FrobNormSquared() - lowerBound);
}
@@ -267,11 +278,11 @@ void CosineTree::ConstructBasis(CosineNodeQueue& treeQueue)
{
// Initialize basis as matrix of zeros.
basis.zeros(dataset.n_rows, treeQueue.size());
-
+
// Variables for iterating through the priority queue.
CosineTree *currentNode;
CosineNodeQueue::const_iterator i = treeQueue.begin();
-
+
// Transfer basis vectors from the queue to the basis matrix.
size_t j = 0;
for(; i != treeQueue.end(); i++, j++)
@@ -285,18 +296,18 @@ void CosineTree::CosineNodeSplit()
{
//! If less than two nodes, splitting does not make sense.
if(numColumns < 3) return;
-
+
//! Calculate cosines with respect to the splitting point.
arma::vec cosines;
CalculateCosines(cosines);
-
+
//! Compute maximum and minimum cosine values.
double cosineMax, cosineMin;
cosineMax = arma::max(cosines % (cosines < 1));
cosineMin = arma::min(cosines);
-
+
std::vector<size_t> leftIndices, rightIndices;
-
+
// Split columns into left and right children. The splitting condition for the
// column to be in the left child is as follows:
// cos_max - cos(i) <= cos(i) - cos_min
@@ -311,7 +322,7 @@ void CosineTree::CosineNodeSplit()
rightIndices.push_back(i);
}
}
-
+
// Split the node into left and right children.
left = new CosineTree(*this, leftIndices);
right = new CosineTree(*this, rightIndices);
@@ -324,23 +335,23 @@ void CosineTree::ColumnSamplesLS(std::vector<size_t>& sampledIndices,
// Initialize the cumulative distribution vector size.
arma::vec cDistribution;
cDistribution.zeros(numColumns + 1);
-
+
// Calculate cumulative length-squared distribution for the node.
for(size_t i = 0; i < numColumns; i++)
{
cDistribution(i+1) = cDistribution(i) + l2NormsSquared(i) / frobNormSquared;
}
-
+
// Intialize sizes of the 'sampledIndices' and 'probabilities' vectors.
sampledIndices.resize(numSamples);
probabilities.zeros(numSamples);
-
+
for(size_t i = 0; i < numSamples; i++)
{
// Generate a random value for sampling.
double randValue = arma::randu();
size_t start = 0, end = numColumns, searchIndex;
-
+
// Sample from the distribution and store corresponding probability.
searchIndex = BinarySearch(cDistribution, randValue, start, end);
sampledIndices[i] = indices[searchIndex];
@@ -359,17 +370,17 @@ size_t CosineTree::ColumnSampleLS()
// Initialize the cumulative distribution vector size.
arma::vec cDistribution;
cDistribution.zeros(numColumns + 1);
-
+
// Calculate cumulative length-squared distribution for the node.
for(size_t i = 0; i < numColumns; i++)
{
cDistribution(i+1) = cDistribution(i) + l2NormsSquared(i) / frobNormSquared;
}
-
+
// Generate a random value for sampling.
double randValue = arma::randu();
size_t start = 0, end = numColumns;
-
+
// Sample from the distribution.
return BinarySearch(cDistribution, randValue, start, end);
}
@@ -380,13 +391,13 @@ size_t CosineTree::BinarySearch(arma::vec& cDistribution,
size_t end)
{
size_t pivot = (start + end) / 2;
-
+
// If pivot is zero, first point is the sampled point.
if(!pivot)
{
return pivot;
}
-
+
// Binary search recursive algorithm.
if(value > cDistribution(pivot - 1) && value <= cDistribution(pivot))
{
@@ -406,7 +417,7 @@ void CosineTree::CalculateCosines(arma::vec& cosines)
{
// Initialize cosine vector as a vector of zeros.
cosines.zeros(numColumns);
-
+
for(size_t i = 0; i < numColumns; i++)
{
// If norm is zero, store cosine value as zero. Else, calculate cosine value
@@ -427,7 +438,7 @@ void CosineTree::CalculateCentroid()
{
// Initialize centroid as vector of zeros.
centroid.zeros(dataset.n_rows);
-
+
// Calculate centroid of columns in the node.
for(size_t i = 0; i < numColumns; i++)
{
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
index 63a94b2..6402e73 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
@@ -19,7 +19,7 @@
* You should have received a copy of the GNU General Public License along with
* MLPACK. If not, see <http://www.gnu.org/licenses/>.
*/
-
+
#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_HPP
@@ -40,7 +40,7 @@ typedef boost::heap::priority_queue<CosineTree*,
class CosineTree
{
public:
-
+
/**
* CosineTree constructor for the root node of the tree. It initializes the
* necessary variables required for splitting of the node, and building the
@@ -50,7 +50,7 @@ class CosineTree
* @param dataset Matrix for which cosine tree is constructed.
*/
CosineTree(const arma::mat& dataset);
-
+
/**
* CosineTree constructor for nodes other than the root node of the tree. It
* takes in a pointer to the parent node and a list of column indices which
@@ -61,7 +61,7 @@ class CosineTree
* @param subIndices Pointer to vector of column indices to be included.
*/
CosineTree(CosineTree& parentNode, const std::vector<size_t>& subIndices);
-
+
/**
* Construct the CosineTree and the basis for the given matrix, and passed
* 'epsilon' and 'delta' parameters. The CosineTree is constructed by
@@ -79,7 +79,13 @@ class CosineTree
CosineTree(const arma::mat& dataset,
const double epsilon,
const double delta);
-
+
+ /**
+ * Destroy the cosine tree and all of its children (take care of the memory
+ * allocations too).
+ */
+ ~CosineTree();
+
/**
* Calculates the orthonormalization of the passed centroid, with respect to
* the current vector subspace.
@@ -88,12 +94,12 @@ class CosineTree
* @param centroid Centroid of the node being added to the basis.
* @param newBasisVector Orthonormalized centroid of the node.
* @param addBasisVector Address to additional basis vector.
- */
+ */
void ModifiedGramSchmidt(CosineNodeQueue& treeQueue,
arma::vec& centroid,
arma::vec& newBasisVector,
arma::vec* addBasisVector = NULL);
-
+
/**
* Estimates the squared error of the projection of the input node's matrix
* onto the current vector subspace. A normal distribution is fit using
@@ -105,35 +111,35 @@ class CosineTree
* @param treeQueue Priority queue of cosine nodes.
* @param addBasisVector1 Address to first additional basis vector.
* @param addBasisVector2 Address to second additional basis vector.
- */
+ */
double MonteCarloError(CosineTree* node,
CosineNodeQueue& treeQueue,
arma::vec* addBasisVector1 = NULL,
arma::vec* addBasisVector2 = NULL);
-
+
/**
* Constructs the final basis matrix, after the cosine tree construction.
*
* @param treeQueue Priority queue of cosine nodes.
- */
+ */
void ConstructBasis(CosineNodeQueue& treeQueue);
-
+
/**
* This function splits the cosine node into two children based on the cosines
* of the columns contained in the node, with respect to the sampled splitting
* point. The function also calls the CosineTree constructor for the children.
*/
void CosineNodeSplit();
-
+
/**
* Sample 'numSamples' points from the Length-Squared distribution of the
* cosine node. The function uses 'l2NormsSquared' to calculate the cumulative
* probability distribution of the column vectors. The sampling is based on a
* randomly generated values in the range [0, 1].
*/
- void ColumnSamplesLS(std::vector<size_t>& sampledIndices,
+ void ColumnSamplesLS(std::vector<size_t>& sampledIndices,
arma::vec& probabilities, size_t numSamples);
-
+
/**
* Sample a point from the Length-Squared distribution of the cosine node. The
* function uses 'l2NormsSquared' to calculate the cumulative probability
@@ -141,7 +147,7 @@ class CosineTree
* generated value in the range [0, 1].
*/
size_t ColumnSampleLS();
-
+
/**
* Sample a column based on the cumulative Length-Squared distribution of the
* cosine node, and a randomly generated value in the range [0, 1]. Binary
@@ -156,7 +162,7 @@ class CosineTree
*/
size_t BinarySearch(arma::vec& cDistribution, double value, size_t start,
size_t end);
-
+
/**
* Calculate cosines of the columns present in the node, with respect to the
* sampled splitting point. The calculated cosine values are useful for
@@ -165,52 +171,52 @@ class CosineTree
* @param cosines Vector to store the cosine values in.
*/
void CalculateCosines(arma::vec& cosines);
-
+
/**
* Calculate centroid of the columns present in the node. The calculated
* centroid is used as a basis vector for the cosine tree being constructed.
*/
void CalculateCentroid();
-
+
//! Returns the basis of the constructed subspace.
void GetFinalBasis(arma::mat& finalBasis) { finalBasis = basis; }
-
+
//! Get pointer to the dataset matrix.
const arma::mat& GetDataset() const { return dataset; }
-
+
//! Get the indices of columns in the node.
std::vector<size_t>& VectorIndices() { return indices; }
-
+
//! Set the Monte Carlo error.
void L2Error(const double error) { this->l2Error = error; }
-
+
//! Get the Monte Carlo error.
double L2Error() const { return l2Error; }
-
+
//! Get pointer to the centroid vector.
arma::vec& Centroid() { return centroid; }
-
+
//! Set the basis vector of the node.
void BasisVector(arma::vec& bVector) { this->basisVector = bVector; }
-
+
//! Get the basis vector of the node.
arma::vec& BasisVector() { return basisVector; }
-
+
//! Get pointer to the left child of the node.
CosineTree* Left() { return left; }
-
+
//! Get pointer to the right child of the node.
CosineTree* Right() { return right; }
-
+
//! Get number of columns of input matrix in the node.
size_t NumColumns() const { return numColumns; }
-
+
//! Get the Frobenius norm squared of columns in the node.
double FrobNormSquared() const { return frobNormSquared; }
-
+
//! Get the column index of split point of the node.
size_t SplitPointIndex() const { return indices[splitPointIndex]; }
-
+
private:
//! Matrix for which cosine tree is constructed.
const arma::mat& dataset;
@@ -222,10 +228,10 @@ class CosineTree
arma::mat basis;
//! Parent of the node.
CosineTree* parent;
- //! Right child of the node.
- CosineTree* right;
//! Left child of the node.
CosineTree* left;
+ //! Right child of the node.
+ CosineTree* right;
//! Indices of columns of input matrix in the node.
std::vector<size_t> indices;
//! L2-norm squared of columns in the node.
@@ -247,7 +253,7 @@ class CosineTree
class CompareCosineNode
{
public:
-
+
// Comparison function for construction of priority queue.
bool operator() (const CosineTree* a, const CosineTree* b) const
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list