[mlpack] 207/324: Integrate nystroem method into the kernel_pca_main.cpp file.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:11 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit a35033e434d5e7881d73b83752bce05dbf695b19
Author: marcus <marcus at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Tue Jul 22 18:52:36 2014 +0000
Integrate nystroem method into the kernel_pca_main.cpp file.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16849 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/kernel_pca/kernel_pca_main.cpp | 102 ++++++++++++++++++----
1 file changed, 87 insertions(+), 15 deletions(-)
diff --git a/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp b/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
index 1d6695c..91f6d75 100644
--- a/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
+++ b/src/mlpack/methods/kernel_pca/kernel_pca_main.cpp
@@ -5,6 +5,11 @@
* Executable for Kernel PCA.
*/
#include <mlpack/core.hpp>
+#include <mlpack/methods/nystroem_method/ordered_selection.hpp>
+#include <mlpack/methods/nystroem_method/random_selection.hpp>
+#include <mlpack/methods/nystroem_method/kmeans_selection.hpp>
+#include <mlpack/methods/nystroem_method/nystroem_method.hpp>
+#include <mlpack/methods/kernel_pca/kernel_rules/nystroem_method.hpp>
#include "kernel_pca.hpp"
@@ -24,6 +29,14 @@ PROGRAM_INFO("Kernel Principal Components Analysis",
"For the case where a linear kernel is used, this reduces to regular "
"PCA."
"\n\n"
+
+ "For example, the following will perform KPCA on the 'input.csv' file using"
+ " the gaussian kernel and store the transformed date in the "
+ "'transformed.csv' file."
+
+ "\n\n"
+ "$ kernel_pca -i input.csv -k gaussian -o transformed.csv"
+ "\n\n"
"The kernels that are supported are listed below:"
"\n\n"
" * 'linear': the standard linear dot product (same as normal PCA):\n"
@@ -49,7 +62,15 @@ PROGRAM_INFO("Kernel Principal Components Analysis",
"\n"
"The parameters for each of the kernels should be specified with the "
"options --bandwidth, --kernel_scale, --offset, or --degree (or a "
- "combination of those options).\n");
+ "combination of those options)."
+ "\n\n"
+ "Optionally, the nystroem method (\"Using the Nystroem method to speed up"
+ " kernel machines\", 2001) can be used to calculate the kernel matrix by "
+ "specifying the --nystroem_method (-n) option. This approach works by using"
+ " a subset of the data as basis to reconstruct the kernel matrix; to specify"
+ " the sampling scheme, the --sampling parameter is used, the sampling scheme"
+ " for the nystroem method can be chosen from the following list: kmeans,"
+ " random, ordered.");
PARAM_STRING_REQ("input_file", "Input dataset to perform KPCA on.", "i");
PARAM_STRING_REQ("output_file", "File to save modified dataset to.", "o");
@@ -63,6 +84,11 @@ PARAM_INT("new_dimensionality", "If not 0, reduce the dimensionality of "
PARAM_FLAG("center", "If set, the transformed data will be centered about the "
"origin.", "c");
+PARAM_FLAG("nystroem_method", "If set, the nystroem method will be used.", "n");
+
+PARAM_STRING("sampling", "Sampling scheme to use for the nystroem method: "
+ "'kmeans', 'random', 'ordered'", "s", "kmeans");
+
PARAM_DOUBLE("kernel_scale", "Scale, for 'hyptan' kernel.", "S", 1.0);
PARAM_DOUBLE("offset", "Offset, for 'hyptan' and 'polynomial' kernels.", "O",
0.0);
@@ -71,6 +97,48 @@ PARAM_DOUBLE("bandwidth", "Bandwidth, for 'gaussian' and 'laplacian' kernels.",
PARAM_DOUBLE("degree", "Degree of polynomial, for 'polynomial' kernel.", "D",
1.0);
+//! Run RunKPCA on the specified dataset for the given kernel type.
+template<typename KernelType>
+void RunKPCA(arma::mat& dataset,
+ const bool centerTransformedData,
+ const bool nystroem,
+ const size_t newDim,
+ const string& sampling,
+ KernelType& kernel)
+{
+ if (nystroem) {
+ // Make sure the sampling scheme is valid.
+ if (sampling == "kmeans")
+ {
+ KernelPCA<KernelType, NystroemKernelRule<KernelType,
+ KMeansSelection<> > >kpca;
+ kpca.Apply(dataset, newDim);
+ }
+ else if (sampling == "random")
+ {
+ KernelPCA<KernelType, NystroemKernelRule<KernelType,
+ RandomSelection> > kpca;
+ kpca.Apply(dataset, newDim);
+ }
+ else if (sampling == "ordered")
+ {
+ KernelPCA<KernelType, NystroemKernelRule<KernelType,
+ OrderedSelection> > kpca;
+ kpca.Apply(dataset, newDim);
+ }
+ else
+ {
+ // Invalid sampling scheme.
+ Log::Fatal << "Invalid sampling scheme ('" << sampling << "'); valid "
+ << "choices are 'kmeans', 'random' and 'ordered'" << endl;
+ }
+ }
+ else {
+ KernelPCA<KernelType> kpca(kernel, centerTransformedData);
+ kpca.Apply(dataset, newDim);
+ }
+}
+
int main(int argc, char** argv)
{
// Parse command line options.
@@ -99,19 +167,22 @@ int main(int argc, char** argv)
const string kernelType = CLI::GetParam<string>("kernel");
const bool centerTransformedData = CLI::HasParam("center");
+ const bool nystroem = CLI::HasParam("nystroem_method");
+ const string sampling = CLI::GetParam<string>("sampling");
if (kernelType == "linear")
{
- KernelPCA<LinearKernel> kpca(LinearKernel(), centerTransformedData);
- kpca.Apply(dataset, newDim);
+ LinearKernel kernel;
+ RunKPCA<LinearKernel>(dataset, centerTransformedData, nystroem, newDim,
+ sampling, kernel);
}
else if (kernelType == "gaussian")
{
const double bandwidth = CLI::GetParam<double>("bandwidth");
GaussianKernel kernel(bandwidth);
- KernelPCA<GaussianKernel> kpca(kernel, centerTransformedData);
- kpca.Apply(dataset, newDim);
+ RunKPCA<GaussianKernel>(dataset, centerTransformedData, nystroem, newDim,
+ sampling, kernel);
}
else if (kernelType == "polynomial")
{
@@ -119,8 +190,8 @@ int main(int argc, char** argv)
const double offset = CLI::GetParam<double>("offset");
PolynomialKernel kernel(degree, offset);
- KernelPCA<PolynomialKernel> kpca(kernel, centerTransformedData);
- kpca.Apply(dataset, newDim);
+ RunKPCA<PolynomialKernel>(dataset, centerTransformedData, nystroem,
+ newDim, sampling, kernel);
}
else if (kernelType == "hyptan")
{
@@ -128,29 +199,30 @@ int main(int argc, char** argv)
const double offset = CLI::GetParam<double>("offset");
HyperbolicTangentKernel kernel(scale, offset);
- KernelPCA<HyperbolicTangentKernel> kpca(kernel, centerTransformedData);
- kpca.Apply(dataset, newDim);
+ RunKPCA<HyperbolicTangentKernel>(dataset, centerTransformedData, nystroem,
+ newDim, sampling, kernel);
}
else if (kernelType == "laplacian")
{
const double bandwidth = CLI::GetParam<double>("bandwidth");
LaplacianKernel kernel(bandwidth);
- KernelPCA<LaplacianKernel> kpca(kernel, centerTransformedData);
- kpca.Apply(dataset, newDim);
+ RunKPCA<LaplacianKernel>(dataset, centerTransformedData, nystroem, newDim,
+ sampling, kernel);
}
else if (kernelType == "epanechnikov")
{
const double bandwidth = CLI::GetParam<double>("bandwidth");
EpanechnikovKernel kernel(bandwidth);
- KernelPCA<EpanechnikovKernel> kpca(kernel, centerTransformedData);
- kpca.Apply(dataset, newDim);
+ RunKPCA<EpanechnikovKernel>(dataset, centerTransformedData, nystroem,
+ newDim, sampling, kernel);
}
else if (kernelType == "cosine")
{
- KernelPCA<CosineDistance> kpca(CosineDistance(), centerTransformedData);
- kpca.Apply(dataset, newDim);
+ CosineDistance kernel;
+ RunKPCA<CosineDistance>(dataset, centerTransformedData, nystroem, newDim,
+ sampling, kernel);
}
else
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list