[mlpack] 195/324: Add nystroem method for kernel approximation.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:10 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 946d61afcda408ee8f43f7248e0cacfe275e68be
Author: marcus <marcus at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Fri Jul 18 21:56:30 2014 +0000
Add nystroem method for kernel approximation.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16837 9d5b8971-822b-0410-80eb-d18c1038ef23
---
.../methods/nystroem_method/kmeans_selection.hpp | 47 +++++++++++
.../methods/nystroem_method/nystroem_method.hpp | 81 +++++++++++++++++++
.../nystroem_method/nystroem_method_impl.hpp | 90 ++++++++++++++++++++++
.../methods/nystroem_method/ordered_selection.hpp | 38 +++++++++
.../methods/nystroem_method/random_selection.hpp | 40 ++++++++++
5 files changed, 296 insertions(+)
diff --git a/src/mlpack/methods/nystroem_method/kmeans_selection.hpp b/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
new file mode 100644
index 0000000..f931d74
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/kmeans_selection.hpp
@@ -0,0 +1,47 @@
+/**
+ * @file kmeans_selection.hpp
+ * @author Marcus Edel
+ *
+ * Use the centroids of the K-Means clustering method for use in the Nystroem
+ * method of kernel matrix approximation.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_KMEANS_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_KMEANS_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/methods/kmeans/kmeans.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+template<typename ClusteringType = kmeans::KMeans<> >
+class KMeansSelection
+{
+ public:
+ /**
+ * Use the K-Means clustering method to select the specified number of points
+ * in the dataset.
+ *
+ * @param data Dataset to sample from.
+ * @param m Number of points to select.
+ * @return Matrix pointer in which centroids are stored.
+ */
+ const static arma::mat* Select(const arma::mat& data,
+ const size_t m,
+ const size_t maxIterations = 5)
+ {
+ arma::Col<size_t> assignments;
+ arma::mat* centroids = new arma::mat;
+
+ // Perform the K-Means clustering method.
+ ClusteringType kmeans(maxIterations);
+ kmeans.Cluster(data, m, assignments, *centroids);
+
+ return centroids;
+ }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/nystroem_method/nystroem_method.hpp b/src/mlpack/methods/nystroem_method/nystroem_method.hpp
new file mode 100644
index 0000000..fc91d6e
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/nystroem_method.hpp
@@ -0,0 +1,81 @@
+/**
+ * @file nystroem_method.hpp
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ *
+ * Implementation of the Nystroem method for approximating a kernel matrix.
+ * There are many variations on how to do this, so template parameters allow the
+ * selection of many different techniques.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_HPP
+
+#include <mlpack/core.hpp>
+#include "kmeans_selection.hpp"
+
+namespace mlpack {
+namespace kernel {
+
+template<
+ typename KernelType,
+ typename PointSelectionPolicy = KMeansSelection<>
+>
+class NystroemMethod
+{
+ public:
+ /**
+ * Create the NystroemMethod object. The constructor here does not really do
+ * anything.
+ *
+ * @param data Data matrix.
+ * @param kernel Kernel to be used for computation.
+ * @param rank Rank to be used for matrix approximation.
+ */
+ NystroemMethod(const arma::mat& data, KernelType& kernel, const size_t rank);
+
+ /**
+ * Apply the low-rank factorization to obtain an output matrix G such that
+ * K' = G * G^T.
+ *
+ * @param output Matrix to store kernel approximation into.
+ */
+ void Apply(arma::mat& output);
+
+ /**
+ * Construct the kernel matrix with matrix that contains the selected points.
+ *
+ * @param data Data matrix pointer.
+ * @param miniKernel to store the constructed mini-kernel matrix in.
+ * @param miniKernel to store the constructed semi-kernel matrix in.
+ */
+ void GetKernelMatrix(const arma::mat* data,
+ arma::mat& miniKernel,
+ arma::mat& semiKernel);
+
+ /**
+ * Construct the kernel matrix with the selected points.
+ *
+ * @param points Indices of selected points.
+ * @param miniKernel to store the constructed mini-kernel matrix in.
+ * @param miniKernel to store the constructed semi-kernel matrix in.
+ */
+ void GetKernelMatrix(const arma::vec& points,
+ arma::mat& miniKernel,
+ arma::mat& semiKernel);
+
+ private:
+ //! The reference dataset.
+ const arma::mat& data;
+ //! The locally stored kernel, if it is necessary.
+ KernelType& kernel;
+ //! Rank used for matrix approximation.
+ const size_t rank;
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+// Include implementation.
+#include "nystroem_method_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp b/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp
new file mode 100644
index 0000000..01ae911
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/nystroem_method_impl.hpp
@@ -0,0 +1,90 @@
+/**
+ * @file nystroem_method_impl.hpp
+ * @author Ryan Curtin
+ * @author Marcus Edel
+ *
+ * Implementation of the Nystroem method for approximating a kernel matrix.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_IMPL_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_NYSTROEM_METHOD_IMPL_HPP
+
+// In case it hasn't been included yet.
+#include "nystroem_method.hpp"
+
+namespace mlpack {
+namespace kernel {
+
+template<typename KernelType, typename PointSelectionPolicy>
+NystroemMethod<KernelType, PointSelectionPolicy>::NystroemMethod(
+ const arma::mat& data,
+ KernelType& kernel,
+ const size_t rank) :
+ data(data),
+ kernel(kernel),
+ rank(rank)
+{ }
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::GetKernelMatrix(
+ const arma::mat* seletedData,
+ arma::mat& miniKernel,
+ arma::mat& semiKernel)
+{
+ // Assemble mini-kernel matrix.
+ for (size_t i = 0; i < rank; ++i)
+ for (size_t j = 0; j < rank; ++j)
+ miniKernel(i, j) = kernel.Evaluate(seletedData->col(i),
+ seletedData->col(j));
+
+ // Construct semi-kernel matrix with interactions between selected data and
+ // all points.
+ for (size_t i = 0; i < data.n_cols; ++i)
+ for (size_t j = 0; j < rank; ++j)
+ semiKernel(i, j) = kernel.Evaluate(data.col(i),
+ seletedData->col(j));
+ // Clean the memory.
+ delete seletedData;
+}
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::GetKernelMatrix(
+ const arma::vec& selectedPoints,
+ arma::mat& miniKernel,
+ arma::mat& semiKernel)
+{
+ // Assemble mini-kernel matrix.
+ for (size_t i = 0; i < rank; ++i)
+ for (size_t j = 0; j < rank; ++j)
+ miniKernel(i, j) = kernel.Evaluate(data.col(selectedPoints(i)),
+ data.col(selectedPoints(j)));
+
+ // Construct semi-kernel matrix with interactions between selected points and
+ // all points.
+ for (size_t i = 0; i < data.n_cols; ++i)
+ for (size_t j = 0; j < rank; ++j)
+ semiKernel(i, j) = kernel.Evaluate(data.col(i),
+ data.col(selectedPoints(j)));
+}
+
+template<typename KernelType, typename PointSelectionPolicy>
+void NystroemMethod<KernelType, PointSelectionPolicy>::Apply(arma::mat& output)
+{
+ arma::mat miniKernel(rank, rank);
+ arma::mat semiKernel(data.n_cols, rank);
+
+ GetKernelMatrix(PointSelectionPolicy::Select(data, rank), miniKernel,
+ semiKernel);
+
+ // Singular value decomposition mini-kernel matrix.
+ arma::mat U, V;
+ arma::vec s;
+ arma::svd(U, s, V, miniKernel);
+
+ // Construct the output matrix.
+ output = semiKernel * (U * arma::diagmat(1.0 / sqrt(s))) * V;
+}
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/nystroem_method/ordered_selection.hpp b/src/mlpack/methods/nystroem_method/ordered_selection.hpp
new file mode 100644
index 0000000..154c04e
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/ordered_selection.hpp
@@ -0,0 +1,38 @@
+/**
+ * @file ordered_selection.hpp
+ * @author Ryan Curtin
+ *
+ * Select the first points of the dataset for use in the Nystroem method of
+ * kernel matrix approximation. This is mostly for testing, but might have
+ * other uses.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_ORDERED_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_ORDERED_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+class OrderedSelection
+{
+ public:
+ /**
+ * Select the specified number of points in the dataset.
+ *
+ * @param data Dataset to sample from.
+ * @param m Number of points to select.
+ * @return Indices of selected points from the dataset.
+ */
+ const static arma::vec Select(const arma::mat& /* unused */, const size_t m)
+ {
+ // This generates [0 1 2 3 ... (m - 1)].
+ arma::vec selectedPoints = arma::linspace<arma::vec>(0, m - 1, m);
+ return selectedPoints;
+ }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/nystroem_method/random_selection.hpp b/src/mlpack/methods/nystroem_method/random_selection.hpp
new file mode 100644
index 0000000..83b09cc
--- /dev/null
+++ b/src/mlpack/methods/nystroem_method/random_selection.hpp
@@ -0,0 +1,40 @@
+/**
+ * @file random_selection.hpp
+ * @author Ryan Curtin
+ *
+ * Randomly select some points (with replacement) to use for the Nystroem
+ * method. Replacement is suboptimal, but for rank << number of points, this is
+ * unlikely.
+ */
+#ifndef __MLPACK_METHODS_NYSTROEM_METHOD_RANDOM_SELECTION_HPP
+#define __MLPACK_METHODS_NYSTROEM_METHOD_RANDOM_SELECTION_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace kernel {
+
+class RandomSelection
+{
+ public:
+ /**
+ * Randomly select the specified number of points in the dataset.
+ *
+ * @param data Dataset to sample from.
+ * @param m Number of points to select.
+ * @return Indices of selected points from the dataset.
+ */
+ const static arma::vec Select(const arma::mat& data, const size_t m)
+ {
+ arma::vec selectedPoints(m);
+ for (size_t i = 0; i < m; ++i)
+ selectedPoints(i) = math::RandInt(0, data.n_cols);
+
+ return selectedPoints;
+ }
+};
+
+}; // namespace kernel
+}; // namespace mlpack
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list