[mlpack] 175/324: Adding Regularized SVD Code
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:08 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit b98d1cc46947ed2f456b54b719f6cd7c941e3090
Author: siddharth.950 <siddharth.950 at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Thu Jul 10 19:16:30 2014 +0000
Adding Regularized SVD Code
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16814 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/core/optimizers/sgd/sgd_impl.hpp | 1 +
src/mlpack/methods/CMakeLists.txt | 1 +
src/mlpack/methods/regularized_svd/CMakeLists.txt | 17 ++
.../methods/regularized_svd/regularized_svd.hpp | 70 ++++++++
.../regularized_svd/regularized_svd_function.cpp | 181 +++++++++++++++++++++
.../regularized_svd/regularized_svd_function.hpp | 115 +++++++++++++
.../regularized_svd/regularized_svd_impl.hpp | 47 ++++++
7 files changed, 432 insertions(+)
diff --git a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
index 63a9690..1054d5a 100644
--- a/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
+++ b/src/mlpack/core/optimizers/sgd/sgd_impl.hpp
@@ -7,6 +7,7 @@
#ifndef __MLPACK_CORE_OPTIMIZERS_SGD_SGD_IMPL_HPP
#define __MLPACK_CORE_OPTIMIZERS_SGD_SGD_IMPL_HPP
+#include <mlpack/methods/regularized_svd/regularized_svd_function.hpp>
// In case it hasn't been included yet.
#include "sgd.hpp"
diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 5115733..93289ea 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -27,6 +27,7 @@ set(DIRS
radical
range_search
rann
+ regularized_svd
sparse_autoencoder
sparse_coding
)
diff --git a/src/mlpack/methods/regularized_svd/CMakeLists.txt b/src/mlpack/methods/regularized_svd/CMakeLists.txt
new file mode 100644
index 0000000..75cc1da
--- /dev/null
+++ b/src/mlpack/methods/regularized_svd/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Define the files we need to compile.
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+ regularized_svd.hpp
+ regularized_svd_impl.hpp
+ regularized_svd_function.hpp
+ regularized_svd_function.cpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+ set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd.hpp b/src/mlpack/methods/regularized_svd/regularized_svd.hpp
new file mode 100644
index 0000000..3af7921
--- /dev/null
+++ b/src/mlpack/methods/regularized_svd/regularized_svd.hpp
@@ -0,0 +1,70 @@
+/**
+ * @file regularized_svd.hpp
+ * @author Siddharth Agrawal
+ *
+ * An implementation of Regularized SVD.
+ */
+
+#ifndef __MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_SVD_HPP
+#define __MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_SVD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
+
+#include "regularized_svd_function.hpp"
+
+namespace mlpack {
+namespace svd {
+
+template<
+ template<typename> class OptimizerType = mlpack::optimization::SGD
+>
+class RegularizedSVD
+{
+ public:
+
+ /**
+ * Constructor for Regularized SVD. Obtains the user and item matrices after
+ * training on the passed data. The constructor initiates an object of class
+ * RegularizedSVDFunction for optimization. It uses the SGD optimizer by
+ * default. The optimizer uses a template specialization of Optimize().
+ *
+ * @param data Dataset for which SVD is calculated.
+ * @param u User matrix in the matrix decomposition.
+ * @param v Item matrix in the matrix decomposition.
+ * @param rank Rank used for matrix factorization.
+ * @param iterations Number of optimization iterations.
+ * @param lambda Regularization parameter for the optimization.
+ */
+ RegularizedSVD(const arma::mat& data,
+ arma::mat& u,
+ arma::mat& v,
+ const size_t rank,
+ const size_t iterations = 10,
+ const double alpha = 0.01,
+ const double lambda = 0.02);
+
+ private:
+ //! Rating data.
+ const arma::mat& data;
+ //! Rank used for matrix factorization.
+ size_t rank;
+ //! Number of optimization iterations.
+ size_t iterations;
+ //! Learning rate for the SGD optimizer.
+ double alpha;
+ //! Regularization parameter for the optimization.
+ double lambda;
+ //! Function that will be held by the optimizer.
+ RegularizedSVDFunction rSVDFunc;
+ //! Default SGD optimizer for the class.
+ mlpack::optimization::SGD<RegularizedSVDFunction> optimizer;
+};
+
+}; // namespace svd
+}; // namespace mlpack
+
+// Include implementation.
+#include "regularized_svd_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd_function.cpp b/src/mlpack/methods/regularized_svd/regularized_svd_function.cpp
new file mode 100644
index 0000000..55222aa
--- /dev/null
+++ b/src/mlpack/methods/regularized_svd/regularized_svd_function.cpp
@@ -0,0 +1,181 @@
+/**
+ * @file regularized_svd_function.cpp
+ * @author Siddharth Agrawal
+ *
+ * An implementation of the RegularizedSVDFunction class.
+ */
+
+#include "regularized_svd_function.hpp"
+
+namespace mlpack {
+namespace svd {
+
+RegularizedSVDFunction::RegularizedSVDFunction(const arma::mat& data,
+ const size_t rank,
+ const double lambda) :
+ data(data),
+ rank(rank),
+ lambda(lambda)
+{
+ // Number of users and items in the data.
+ numUsers = max(data.row(0)) + 1;
+ numItems = max(data.row(1)) + 1;
+
+ // Initialize the parameters.
+ initialPoint.randu(rank, numUsers + numItems);
+}
+
+double RegularizedSVDFunction::Evaluate(const arma::mat& parameters) const
+{
+ // The cost for the optimization is as follows:
+ // f(u, v) = sum((rating(i, j) - u(i).t() * v(j))^2)
+ // The sum is over all the ratings in the rating matrix.
+ // 'i' points to the user and 'j' points to the item being considered.
+ // The regularization term is added to the above cost, where the vectors u(i)
+ // and v(j) are regularized for each rating they contribute to.
+
+ double cost = 0.0;
+
+ for(size_t i = 0; i < data.n_cols; i++)
+ {
+ // Indices for accessing the the correct parameter columns.
+ const size_t user = data(0, i);
+ const size_t item = data(1, i) + numUsers;
+
+ // Calculate the squared error in the prediction.
+ const double rating = data(2, i);
+ double ratingError = rating - arma::dot(parameters.col(user),
+ parameters.col(item));
+ double ratingErrorSquared = ratingError * ratingError;
+
+ // Calculate the regularization penalty corresponding to the parameters.
+ double userVecNorm = arma::norm(parameters.col(user), 2);
+ double itemVecNorm = arma::norm(parameters.col(item), 2);
+ double regularizationError = lambda * (userVecNorm * userVecNorm +
+ itemVecNorm * itemVecNorm);
+
+ cost += (ratingErrorSquared + regularizationError);
+ }
+
+ return cost;
+}
+
+double RegularizedSVDFunction::Evaluate(const arma::mat& parameters,
+ const size_t i) const
+{
+ // Indices for accessing the the correct parameter columns.
+ const size_t user = data(0, i);
+ const size_t item = data(1, i) + numUsers;
+
+ // Calculate the squared error in the prediction.
+ const double rating = data(2, i);
+ double ratingError = rating - arma::dot(parameters.col(user),
+ parameters.col(item));
+ double ratingErrorSquared = ratingError * ratingError;
+
+ // Calculate the regularization penalty corresponding to the parameters.
+ double userVecNorm = arma::norm(parameters.col(user), 2);
+ double itemVecNorm = arma::norm(parameters.col(item), 2);
+ double regularizationError = lambda * (userVecNorm * userVecNorm +
+ itemVecNorm * itemVecNorm);
+
+ return (ratingErrorSquared + regularizationError);
+}
+
+void RegularizedSVDFunction::Gradient(const arma::mat& parameters,
+ arma::mat& gradient) const
+{
+ // For an example with rating corresponding to user 'i' and item 'j', the
+ // gradients for the parameters is as follows:
+ // grad(u(i)) = lambda * u(i) - error * v(j)
+ // grad(v(j)) = lambda * v(j) - error * u(i)
+ // 'error' is the prediction error for that example, which is:
+ // rating(i, j) - u(i).t() * v(j)
+ // The full gradient is calculated by summing the contributions over all the
+ // training examples.
+
+ gradient.zeros(rank, numUsers + numItems);
+
+ for(size_t i = 0; i < data.n_cols; i++)
+ {
+ // Indices for accessing the the correct parameter columns.
+ const size_t user = data(0, i);
+ const size_t item = data(1, i) + numUsers;
+
+ // Prediction error for the example.
+ const double rating = data(2, i);
+ double ratingError = rating - arma::dot(parameters.col(user),
+ parameters.col(item));
+
+ // Gradient is non-zero only for the parameter columns corresponding to the
+ // example.
+ gradient.col(user) += lambda * parameters.col(user) -
+ ratingError * parameters.col(item);
+ gradient.col(item) += lambda * parameters.col(item) -
+ ratingError * parameters.col(user);
+ }
+}
+
+}; // namespace svd
+}; // namespace mlpack
+
+// Template specialization for the SGD optimizer.
+namespace mlpack {
+namespace optimization {
+
+template<>
+double SGD<mlpack::svd::RegularizedSVDFunction>::Optimize(arma::mat& parameters)
+{
+ // Find the number of functions to use.
+ const size_t numFunctions = function.NumFunctions();
+
+ // To keep track of where we are and how things are going.
+ size_t currentFunction = 0;
+ double overallObjective = 0;
+
+ // Calculate the first objective function.
+ for(size_t i = 0; i < numFunctions; i++)
+ overallObjective += function.Evaluate(parameters, i);
+
+ const arma::mat data = function.Dataset();
+
+ // Now iterate!
+ for(size_t i = 1; i != maxIterations; i++, currentFunction++)
+ {
+ // Is this iteration the start of a sequence?
+ if((currentFunction % numFunctions) == 0)
+ {
+ // Reset the counter variables.
+ overallObjective = 0;
+ currentFunction = 0;
+ }
+
+ const size_t numUsers = function.NumUsers();
+
+ // Indices for accessing the the correct parameter columns.
+ const size_t user = data(0, currentFunction);
+ const size_t item = data(1, currentFunction) + numUsers;
+
+ // Prediction error for the example.
+ const double rating = data(2, currentFunction);
+ double ratingError = rating - arma::dot(parameters.col(user),
+ parameters.col(item));
+
+ double lambda = function.Lambda();
+
+ // Gradient is non-zero only for the parameter columns corresponding to the
+ // example.
+ parameters.col(user) -= stepSize * (lambda * parameters.col(user) -
+ ratingError * parameters.col(item));
+ parameters.col(item) -= stepSize * (lambda * parameters.col(item) -
+ ratingError * parameters.col(user));
+
+ // Now add that to the overall objective function.
+ overallObjective += function.Evaluate(parameters, currentFunction);
+ }
+
+ return overallObjective;
+}
+
+}; // namespace optimization
+}; // namespace mlpack
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd_function.hpp b/src/mlpack/methods/regularized_svd/regularized_svd_function.hpp
new file mode 100644
index 0000000..d696bfb
--- /dev/null
+++ b/src/mlpack/methods/regularized_svd/regularized_svd_function.hpp
@@ -0,0 +1,115 @@
+/**
+ * @file regularized_svd_function.hpp
+ * @author Siddharth Agrawal
+ *
+ * An implementation of the RegularizedSVDFunction class.
+ */
+
+#ifndef __MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_FUNCTION_SVD_HPP
+#define __MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_FUNCTION_SVD_HPP
+
+#include <mlpack/core.hpp>
+#include <mlpack/core/optimizers/sgd/sgd.hpp>
+
+namespace mlpack {
+namespace svd {
+
+class RegularizedSVDFunction
+{
+ public:
+
+ /**
+ * Constructor for RegularizedSVDFunction class. The constructor calculates
+ * the number of users and items in the passed data. It also randomly
+ * initializes the parameter values.
+ *
+ * @param data Dataset for which SVD is calculated.
+ * @param rank Rank used for matrix factorization.
+ * @param lambda Regularization parameter used for optimization.
+ */
+ RegularizedSVDFunction(const arma::mat& data,
+ const size_t rank,
+ const double lambda);
+
+ /**
+ * Evaluates the cost function over all examples in the data.
+ *
+ * @param parameters Parameters(user/item matrices) of the decomposition.
+ */
+ double Evaluate(const arma::mat& parameters) const;
+
+ /**
+ * Evaluates the cost function for one training example. Useful for the SGD
+ * optimizer abstraction which uses one training example at a time.
+ *
+ * @param parameters Parameters(user/item matrices) of the decomposition.
+ * @param i Index of the training example to be used.
+ */
+ double Evaluate(const arma::mat& parameters,
+ const size_t i) const;
+
+ /**
+ * Evaluates the full gradient of the cost function over all the training
+ * examples.
+ *
+ * @param parameters Parameters(user/item matrices) of the decomposition.
+ * @param gradient Calculated gradient for the parameters.
+ */
+ void Gradient(const arma::mat& parameters,
+ arma::mat& gradient) const;
+
+ //! Return the initial point for the optimization.
+ const arma::mat& GetInitialPoint() const { return initialPoint; }
+
+ //! Return the dataset passed into the constructor.
+ const arma::mat& Dataset() const { return data; }
+
+ //! Return the number of training examples. Useful for SGD optimizer.
+ size_t NumFunctions() const { return data.n_cols; }
+
+ //! Return the number of users in the data.
+ size_t NumUsers() const { return numUsers; }
+
+ //! Return the number of items in the data.
+ size_t NumItems() const { return numItems; }
+
+ //! Return the regularization parameters.
+ double Lambda() const { return lambda; }
+
+ //! Return the rank used for the factorization.
+ size_t Rank() const { return rank; }
+
+ private:
+ //! Rating data.
+ const arma::mat& data;
+ //! Initial parameter point.
+ arma::mat initialPoint;
+ //! Rank used for matrix factorization.
+ size_t rank;
+ //! Regularization parameter for the optimization.
+ double lambda;
+ //! Number of users in the given dataset.
+ size_t numUsers;
+ //! Number of items in the given dataset.
+ size_t numItems;
+};
+
+}; // namespace svd
+}; // namespace mlpack
+
+namespace mlpack {
+namespace optimization {
+
+ /**
+ * Template specialization for SGD optimizer. Used because the gradient
+ * affects only a small number of parameters per example, and thus the normal
+ * abstraction does not work as fast as we might like it to.
+ */
+ template<>
+ double SGD<mlpack::svd::RegularizedSVDFunction>::Optimize(
+ arma::mat& parameters);
+
+}; // namespace optimization
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp b/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp
new file mode 100644
index 0000000..d88e678
--- /dev/null
+++ b/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp
@@ -0,0 +1,47 @@
+/**
+ * @file regularized_svd_impl.hpp
+ * @author Siddharth Agrawal
+ *
+ * An implementation of Regularized SVD.
+ */
+
+#ifndef __MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_SVD_IMPL_HPP
+#define __MLPACK_METHODS_REGULARIZED_SVD_REGULARIZED_SVD_IMPL_HPP
+
+namespace mlpack {
+namespace svd {
+
+template<template<typename> class OptimizerType>
+RegularizedSVD<OptimizerType>::RegularizedSVD(const arma::mat& data,
+ arma::mat& u,
+ arma::mat& v,
+ const size_t rank,
+ const size_t iterations,
+ const double alpha,
+ const double lambda) :
+ data(data),
+ rank(rank),
+ iterations(iterations),
+ alpha(alpha),
+ lambda(lambda),
+ rSVDFunc(data, rank, lambda),
+ optimizer(rSVDFunc, alpha, iterations * data.n_cols)
+{
+ arma::mat parameters = rSVDFunc.GetInitialPoint();
+
+ // Train the model.
+ Timer::Start("regularized_svd_optimization");
+ const double out = optimizer.Optimize(parameters);
+ Timer::Stop("regularized_svd_optimization");
+
+ const size_t numUsers = max(data.row(0)) + 1;
+ const size_t numItems = max(data.row(1)) + 1;
+
+ u = parameters.submat(0, 0, rank - 1, numUsers - 1);
+ v = parameters.submat(0, numUsers, rank - 1, numUsers + numItems - 1);
+}
+
+}; // namespace svd
+}; // namespace mlpack
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list