[mlpack] 113/324: Adding QUIC-SVD.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:01 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 204fe375d1a4ac5c506ea2bd52b826ef39988c2e
Author: siddharth.950 <siddharth.950 at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Thu Jul 3 13:08:12 2014 +0000
Adding QUIC-SVD.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16751 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/core/tree/CMakeLists.txt | 2 +-
.../{cosine_tree_impl.hpp => cosine_tree.cpp} | 8 ---
src/mlpack/core/tree/cosine_tree/cosine_tree.hpp | 3 -
src/mlpack/methods/CMakeLists.txt | 1 +
src/mlpack/methods/CMakeLists.txt~ | 3 +
src/mlpack/methods/quic_svd/CMakeLists.txt | 15 ++++
src/mlpack/methods/quic_svd/quic_svd.hpp | 70 ++++++++++++++++++
src/mlpack/methods/quic_svd/quic_svd_impl.hpp | 82 ++++++++++++++++++++++
src/mlpack/tests/CMakeLists.txt | 1 +
src/mlpack/tests/CMakeLists.txt~ | 4 ++
src/mlpack/tests/quic_svd_test.cpp | 42 +++++++++++
11 files changed, 219 insertions(+), 12 deletions(-)
diff --git a/src/mlpack/core/tree/CMakeLists.txt b/src/mlpack/core/tree/CMakeLists.txt
index 7988285..43b6117 100644
--- a/src/mlpack/core/tree/CMakeLists.txt
+++ b/src/mlpack/core/tree/CMakeLists.txt
@@ -14,7 +14,7 @@ set(SOURCES
binary_space_tree/traits.hpp
bounds.hpp
cosine_tree/cosine_tree.hpp
- cosine_tree/cosine_tree_impl.hpp
+ cosine_tree/cosine_tree.cpp
cover_tree/cover_tree.hpp
cover_tree/cover_tree_impl.hpp
cover_tree/first_point_is_root.hpp
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
similarity index 98%
rename from src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
rename to src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
index 46167d5..67e1d9a 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.cpp
@@ -4,10 +4,6 @@
*
* Implementation of cosine tree.
*/
-#ifndef __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_IMPL_HPP
-#define __MLPACK_CORE_TREE_COSINE_TREE_COSINE_TREE_IMPL_HPP
-
-// In case it wasn't included already for some reason.
#include "cosine_tree.hpp"
#include <boost/math/distributions/normal.hpp>
@@ -127,8 +123,6 @@ CosineTree::CosineTree(const arma::mat& dataset,
// Calculate Monte Carlo error estimate for the root node.
monteCarloError = MonteCarloError(&root, treeQueue);
-
- std::cout << monteCarloError / root.FrobNormSquared() << "\n";
}
// Construct the subspace basis from the current priority queue.
@@ -429,5 +423,3 @@ void CosineTree::CalculateCentroid()
}; // namespace tree
}; // namespace mlpack
-
-#endif
diff --git a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
index 4a411d9..f4d5aac 100644
--- a/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
+++ b/src/mlpack/core/tree/cosine_tree/cosine_tree.hpp
@@ -243,7 +243,4 @@ class CompareCosineNode
}; // namespace tree
}; // namespace mlpack
-// Include implementation.
-#include "cosine_tree_impl.hpp"
-
#endif
diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index d9eea39..5115733 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -23,6 +23,7 @@ set(DIRS
# lmf
pca
perceptron
+ quic_svd
radical
range_search
rann
diff --git a/src/mlpack/methods/CMakeLists.txt~ b/src/mlpack/methods/CMakeLists.txt~
index fb6a0d4..1477e4a 100644
--- a/src/mlpack/methods/CMakeLists.txt~
+++ b/src/mlpack/methods/CMakeLists.txt~
@@ -2,6 +2,7 @@
set(DIRS
amf
cf
+ decision_stump
det
emst
fastmks
@@ -21,6 +22,8 @@ set(DIRS
nmf
# lmf
pca
+ perceptron
+# quic_svd
radical
range_search
rann
diff --git a/src/mlpack/methods/quic_svd/CMakeLists.txt b/src/mlpack/methods/quic_svd/CMakeLists.txt
new file mode 100644
index 0000000..c89b866
--- /dev/null
+++ b/src/mlpack/methods/quic_svd/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Define the files we need to compile.
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+ quic_svd.hpp
+ quic_svd_impl.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+ set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/quic_svd/quic_svd.hpp b/src/mlpack/methods/quic_svd/quic_svd.hpp
new file mode 100644
index 0000000..f2ef741
--- /dev/null
+++ b/src/mlpack/methods/quic_svd/quic_svd.hpp
@@ -0,0 +1,70 @@
+/**
+ * @file quic_svd.hpp
+ * @author Siddharth Agrawal
+ *
+ * An implementation of QUIC-SVD.
+ */
+#ifndef __MLPACK_METHODS_QUIC_SVD_QUIC_SVD_HPP
+#define __MLPACK_METHODS_QUIC_SVD_QUIC_SVD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/tree/cosine_tree/cosine_tree.hpp>
+
+namespace mlpack {
+namespace svd {
+
+class QUIC_SVD
+{
+ public:
+
+ /**
+ * Constructor which implements the QUIC-SVD algorithm. The function calls the
+ * CosineTree constructor to create a subspace basis, where the original
+ * matrix's projection has minimum reconstruction error. The constructor then
+ * uses the ExtractSVD() function to calculate the SVD of the original dataset
+ * in that subspace.
+ *
+ * @param dataset Matrix for which SVD is calculated.
+ * @param u First unitary matrix.
+ * @param v Second unitary matrix.
+ * @param sigma Diagonal matrix of singular values.
+ * @param epsilon Error tolerance fraction for calculated subspace.
+ * @param delta Cumulative probability for Monte Carlo error lower bound.
+ */
+ QUIC_SVD(const arma::mat& dataset,
+ arma::mat& u,
+ arma::mat& v,
+ arma::mat& sigma,
+ const double epsilon = 0.03,
+ const double delta = 0.1);
+
+ /**
+ * This function uses the vector subspace created using a cosine tree to
+ * calculate an approximate SVD of the original matrix.
+ *
+ * @param u First unitary matrix.
+ * @param v Second unitary matrix.
+ * @param sigma Diagonal matrix of singular values.
+ */
+ void ExtractSVD(arma::mat& u,
+ arma::mat& v,
+ arma::mat& sigma);
+
+ private:
+ //! Matrix for which cosine tree is constructed.
+ const arma::mat& dataset;
+ //! Error tolerance fraction for calculated subspace.
+ double epsilon;
+ //! Cumulative probability for Monte Carlo error lower bound.
+ double delta;
+ //! Subspace basis of the input dataset.
+ arma::mat basis;
+};
+
+}; // namespace svd
+}; // namespace mlpack
+
+// Include implementation.
+#include "quic_svd_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/quic_svd/quic_svd_impl.hpp b/src/mlpack/methods/quic_svd/quic_svd_impl.hpp
new file mode 100644
index 0000000..f1fada3
--- /dev/null
+++ b/src/mlpack/methods/quic_svd/quic_svd_impl.hpp
@@ -0,0 +1,82 @@
+/**
+ * @file quic_svd_impl.hpp
+ * @author Siddharth Agrawal
+ *
+ * An implementation of QUIC-SVD.
+ */
+#ifndef __MLPACK_METHODS_QUIC_SVD_QUIC_SVD_IMPL_HPP
+#define __MLPACK_METHODS_QUIC_SVD_QUIC_SVD_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "quic_svd.hpp"
+
+using namespace mlpack::tree;
+
+namespace mlpack {
+namespace svd {
+
+QUIC_SVD::QUIC_SVD(const arma::mat& dataset,
+ arma::mat& u,
+ arma::mat& v,
+ arma::mat& sigma,
+ const double epsilon,
+ const double delta) :
+ dataset(dataset),
+ epsilon(epsilon),
+ delta(delta)
+{
+ // Since columns are sample in the implementation, the matrix is transposed if
+ // necessary for maximum speedup.
+ CosineTree* ctree;
+ if(dataset.n_cols > dataset.n_rows)
+ ctree = new CosineTree(dataset, epsilon, delta);
+ else
+ ctree = new CosineTree(dataset.t(), epsilon, delta);
+
+ // Get subspace basis by creating the cosine tree.
+ ctree->GetFinalBasis(basis);
+
+ // Use the ExtractSVD algorithm mentioned in the paper to extract the SVD of
+ // the original dataset in the obtained subspace.
+ ExtractSVD(u, v, sigma);
+}
+
+void QUIC_SVD::ExtractSVD(arma::mat& u,
+ arma::mat& v,
+ arma::mat& sigma)
+{
+ // Calculate A * V_hat, necessary for further calculations.
+ arma::mat projectedMat;
+ if(dataset.n_cols > dataset.n_rows)
+ projectedMat = dataset.t() * basis;
+ else
+ projectedMat = dataset * basis;
+
+ // Calculate the squared projected matrix.
+ arma::mat projectedMatSquared = projectedMat.t() * projectedMat;
+
+ // Calculate the SVD of the above matrix.
+ arma::mat uBar, vBar;
+ arma::vec sigmaBar;
+ arma::svd(uBar, sigmaBar, vBar, projectedMatSquared);
+
+ // Calculate the approximate SVD of the original matrix, using the SVD of the
+ // squared projected matrix.
+ v = basis * vBar;
+ sigma = arma::sqrt(diagmat(sigmaBar));
+ u = projectedMat * vBar * sigma.i();
+
+ // Since columns are sampled, the unitary matrices have to be exchanged, if
+ // the transposed matrix is not passed.
+ if(dataset.n_cols > dataset.n_rows)
+ {
+ arma::mat tempMat = u;
+ u = v;
+ v = tempMat;
+ }
+}
+
+}; // namespace svd
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/tests/CMakeLists.txt b/src/mlpack/tests/CMakeLists.txt
index 3175331..41c7867 100644
--- a/src/mlpack/tests/CMakeLists.txt
+++ b/src/mlpack/tests/CMakeLists.txt
@@ -36,6 +36,7 @@ add_executable(mlpack_test
nmf_test.cpp
pca_test.cpp
perceptron_test.cpp
+ quic_svd_test.cpp
radical_test.cpp
range_search_test.cpp
rectangle_tree_test.cpp
diff --git a/src/mlpack/tests/CMakeLists.txt~ b/src/mlpack/tests/CMakeLists.txt~
index d35779b..0c989c9 100644
--- a/src/mlpack/tests/CMakeLists.txt~
+++ b/src/mlpack/tests/CMakeLists.txt~
@@ -8,6 +8,7 @@ add_executable(mlpack_test
aug_lagrangian_test.cpp
cf_test.cpp
cli_test.cpp
+ cosine_tree_test.cpp
decision_stump_test.cpp
det_test.cpp
distribution_test.cpp
@@ -35,8 +36,11 @@ add_executable(mlpack_test
nmf_test.cpp
pca_test.cpp
perceptron_test.cpp
+# quic_svd_test.cpp
radical_test.cpp
range_search_test.cpp
+ rectangle_tree_test.cpp
+ sa_test.cpp
save_restore_utility_test.cpp
sgd_test.cpp
sort_policy_test.cpp
diff --git a/src/mlpack/tests/quic_svd_test.cpp b/src/mlpack/tests/quic_svd_test.cpp
new file mode 100644
index 0000000..c0d9c34
--- /dev/null
+++ b/src/mlpack/tests/quic_svd_test.cpp
@@ -0,0 +1,42 @@
+/**
+ * @file quic_svd_test.cpp
+ * @author Siddharth Agrawal
+ *
+ * Test file for QUIC-SVD class.
+ */
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/quic_svd/quic_svd.hpp>
+
+#include <boost/test/unit_test.hpp>
+#include "old_boost_test_definitions.hpp"
+
+BOOST_AUTO_TEST_SUITE(QUICSVDTest);
+
+using namespace mlpack;
+using namespace mlpack::svd;
+
+/**
+ * The reconstruction error of the obtained SVD should be small.
+ */
+BOOST_AUTO_TEST_CASE(QUICSVDReconstructionError)
+{
+ // Load the dataset.
+ arma::mat dataset;
+ data::Load("test_data_3_1000.csv", dataset);
+
+ // Obtain the SVD using default parameters.
+ arma::mat u, v, sigma;
+ QUIC_SVD quicsvd(dataset, u, v, sigma);
+
+ // Reconstruct the matrix using the SVD.
+ arma::mat reconstruct;
+ reconstruct = u * sigma * v.t();
+
+ // The relative reconstruction error should be small.
+ double relativeError = arma::norm(dataset - reconstruct, "frob") /
+ arma::norm(dataset, "frob");
+ BOOST_REQUIRE_SMALL(relativeError, 1e-5);
+}
+
+BOOST_AUTO_TEST_SUITE_END();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list