[mlpack] 324/324: Made Reg SVD work with CF.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:22 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 610127221eb64f8f25f317caf483e0ae081fd4e4
Author: siddharth.950 <siddharth.950 at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Sat Aug 16 18:53:31 2014 +0000
Made Reg SVD work with CF.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17044 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/cf/cf.hpp | 41 ++++++++++++
src/mlpack/methods/cf/cf_impl.hpp | 75 +++++++++++++++++++++-
.../methods/regularized_svd/regularized_svd.hpp | 48 +++++++++-----
.../regularized_svd/regularized_svd_impl.hpp | 39 ++++++-----
4 files changed, 166 insertions(+), 37 deletions(-)
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index f8321ab..edca85f 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -24,6 +24,24 @@ namespace mlpack {
namespace cf /** Collaborative filtering. */ {
/**
+ * Template class for factorizer traits. This stores the default values for the
+ * variables to be assumed for a given factorizer. If any of the factorizers
+ * needs to have a different value for the traits, a template specialization has
+ * be wriiten for that factorizer. An example can be found in the module for
+ * Regularized SVD.
+ */
+template<typename FactorizerType>
+class FactorizerTraits
+{
+ public:
+ /**
+ * If true, then the passed data matrix is used for factorizer.Apply().
+ * Otherwise, it is modified into a form suitable for factorization.
+ */
+ static const bool IsCleaned = false;
+};
+
+/**
* This class implements Collaborative Filtering (CF). This implementation
* presently supports Alternating Least Squares (ALS) for collaborative
* filtering.
@@ -73,6 +91,29 @@ class CF
CF(arma::mat& data,
const size_t numUsersForSimilarity = 5,
const size_t rank = 0);
+
+ /**
+ * Initialize the CF object using an instantiated factorizer. Store a
+ * reference to the data that we will be using. There are parameters that can
+ * be set; default values are provided for each of them. If the rank is left
+ * unset (or is set to 0), a simple density-based heuristic will be used to
+ * choose a rank.
+ *
+ * @param data Initial (user, item, rating) matrix.
+ * @param factorizer Instantiated factorizer object.
+ * @param numUsersForSimilarity Size of the neighborhood.
+ * @param rank Rank parameter for matrix factorization.
+ */
+ CF(arma::mat& data,
+ FactorizerType& factorizer,
+ const size_t numUsersForSimilarity = 5,
+ const size_t rank = 0);
+
+ /*void ApplyFactorizer(arma::mat& data, const typename boost::enable_if_c<
+ FactorizerTraits<FactorizerType>::IsCleaned == false, int*>::type);
+
+ void ApplyFactorizer(arma::mat& data, const typename boost::enable_if_c<
+ FactorizerTraits<FactorizerType>::IsCleaned == true, int*>::type);*/
//! Sets number of users for calculating similarity.
void NumUsersForSimilarity(const size_t num)
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 65cb78d..18403a3 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -12,6 +12,32 @@
namespace mlpack {
namespace cf {
+template<typename FactorizerType>
+void ApplyFactorizer(arma::mat& data,
+ arma::sp_mat& cleanedData,
+ FactorizerType& factorizer,
+ const size_t rank,
+ arma::mat& w,
+ arma::mat& h,
+ const typename boost::enable_if_c<
+ FactorizerTraits<FactorizerType>::IsCleaned == false, int*>::type = 0)
+{
+ factorizer.Apply(cleanedData, rank, w, h);
+}
+
+template<typename FactorizerType>
+void ApplyFactorizer(arma::mat& data,
+ arma::sp_mat& cleanedData,
+ FactorizerType& factorizer,
+ const size_t rank,
+ arma::mat& w,
+ arma::mat& h,
+ const typename boost::enable_if_c<
+ FactorizerTraits<FactorizerType>::IsCleaned == true, int*>::type = 0)
+{
+ factorizer.Apply(data, rank, w, h);
+}
+
/**
* Construct the CF object.
*/
@@ -24,7 +50,50 @@ CF<FactorizerType>::CF(arma::mat& data,
factorizer()
{
// Validate neighbourhood size.
- if (numUsersForSimilarity < 1)
+ if(numUsersForSimilarity < 1)
+ {
+ Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+ << numUsersForSimilarity << " given). Setting value to 5.\n";
+ //Setting Default Value of 5
+ this->numUsersForSimilarity = 5;
+ }
+
+ CleanData(data);
+
+ // Check if the user wanted us to choose a rank for them.
+ if(rank == 0)
+ {
+ // This is a simple heuristic that picks a rank based on the density of the
+ // dataset between 5 and 105.
+ const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
+ const size_t rankEstimate = size_t(density) + 5;
+
+ // Set to heuristic value.
+ Log::Info << "No rank given for decomposition; using rank of "
+ << rankEstimate << " calculated by density-based heuristic."
+ << std::endl;
+ this->rank = rankEstimate;
+ }
+
+ // Operations independent of the query:
+ // Decompose the sparse data matrix to user and data matrices.
+ ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
+}
+
+/**
+ * Construct the CF object using an instantiated factorizer.
+ */
+template<typename FactorizerType>
+CF<FactorizerType>::CF(arma::mat& data,
+ FactorizerType& factorizer,
+ const size_t numUsersForSimilarity,
+ const size_t rank) :
+ numUsersForSimilarity(numUsersForSimilarity),
+ rank(rank),
+ factorizer(factorizer)
+{
+ // Validate neighbourhood size.
+ if(numUsersForSimilarity < 1)
{
Log::Warn << "CF::CF(): neighbourhood size should be > 0("
<< numUsersForSimilarity << " given). Setting value to 5.\n";
@@ -35,7 +104,7 @@ CF<FactorizerType>::CF(arma::mat& data,
CleanData(data);
// Check if the user wanted us to choose a rank for them.
- if (rank == 0)
+ if(rank == 0)
{
// This is a simple heuristic that picks a rank based on the density of the
// dataset between 5 and 105.
@@ -51,7 +120,7 @@ CF<FactorizerType>::CF(arma::mat& data,
// Operations independent of the query:
// Decompose the sparse data matrix to user and data matrices.
- factorizer.Apply(cleanedData, this->rank, w, h);
+ ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
}
template<typename FactorizerType>
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd.hpp b/src/mlpack/methods/regularized_svd/regularized_svd.hpp
index 3af7921..a602540 100644
--- a/src/mlpack/methods/regularized_svd/regularized_svd.hpp
+++ b/src/mlpack/methods/regularized_svd/regularized_svd.hpp
@@ -10,6 +10,7 @@
#include <mlpack/core.hpp>
#include <mlpack/core/optimizers/sgd/sgd.hpp>
+#include <mlpack/methods/cf/cf.hpp>
#include "regularized_svd_function.hpp"
@@ -29,41 +30,54 @@ class RegularizedSVD
* RegularizedSVDFunction for optimization. It uses the SGD optimizer by
* default. The optimizer uses a template specialization of Optimize().
*
- * @param data Dataset for which SVD is calculated.
- * @param u User matrix in the matrix decomposition.
- * @param v Item matrix in the matrix decomposition.
- * @param rank Rank used for matrix factorization.
* @param iterations Number of optimization iterations.
+ * @param alpha Learning rate for the SGD optimizer.
* @param lambda Regularization parameter for the optimization.
*/
- RegularizedSVD(const arma::mat& data,
- arma::mat& u,
- arma::mat& v,
- const size_t rank,
- const size_t iterations = 10,
+ RegularizedSVD(const size_t iterations = 10,
const double alpha = 0.01,
const double lambda = 0.02);
+
+ /**
+ * Obtains the user and item matrices using the provided data and rank.
+ *
+ * @param data Rating data matrix.
+ * @param rank Rank parameter to be used for optimization.
+ * @param u Item matrix obtained on decomposition.
+ * @param v User matrix obtained on decomposition.
+ */
+ void Apply(const arma::mat& data,
+ const size_t rank,
+ arma::mat& u,
+ arma::mat& v);
private:
- //! Rating data.
- const arma::mat& data;
- //! Rank used for matrix factorization.
- size_t rank;
//! Number of optimization iterations.
size_t iterations;
//! Learning rate for the SGD optimizer.
double alpha;
//! Regularization parameter for the optimization.
double lambda;
- //! Function that will be held by the optimizer.
- RegularizedSVDFunction rSVDFunc;
- //! Default SGD optimizer for the class.
- mlpack::optimization::SGD<RegularizedSVDFunction> optimizer;
};
}; // namespace svd
}; // namespace mlpack
+namespace mlpack {
+namespace cf {
+
+//! Factorizer traits of Regularized SVD.
+template<>
+class FactorizerTraits<mlpack::svd::RegularizedSVD<> >
+{
+ public:
+ //! Data provided to RegularizedSVD need not be cleaned.
+ static const bool IsCleaned = true;
+};
+
+}; // namespace cf
+}; // namespace mlpack
+
// Include implementation.
#include "regularized_svd_impl.hpp"
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp b/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp
index d88e678..7c7fbfc 100644
--- a/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp
+++ b/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp
@@ -12,33 +12,38 @@ namespace mlpack {
namespace svd {
template<template<typename> class OptimizerType>
-RegularizedSVD<OptimizerType>::RegularizedSVD(const arma::mat& data,
- arma::mat& u,
- arma::mat& v,
- const size_t rank,
- const size_t iterations,
+RegularizedSVD<OptimizerType>::RegularizedSVD(const size_t iterations,
const double alpha,
const double lambda) :
- data(data),
- rank(rank),
iterations(iterations),
alpha(alpha),
- lambda(lambda),
- rSVDFunc(data, rank, lambda),
- optimizer(rSVDFunc, alpha, iterations * data.n_cols)
+ lambda(lambda)
{
- arma::mat parameters = rSVDFunc.GetInitialPoint();
+ // Nothing to do.
+}
- // Train the model.
- Timer::Start("regularized_svd_optimization");
- const double out = optimizer.Optimize(parameters);
- Timer::Stop("regularized_svd_optimization");
+template<template<typename> class OptimizerType>
+void RegularizedSVD<OptimizerType>::Apply(const arma::mat& data,
+ const size_t rank,
+ arma::mat& u,
+ arma::mat& v)
+{
+ // Make the optimizer object using a RegularizedSVDFunction object.
+ RegularizedSVDFunction rSVDFunc(data, rank, lambda);
+ mlpack::optimization::SGD<RegularizedSVDFunction> optimizer(rSVDFunc, alpha,
+ iterations * data.n_cols);
+
+ // Get optimized parameters.
+ arma::mat parameters = rSVDFunc.GetInitialPoint();
+ optimizer.Optimize(parameters);
+ // Constants for extracting user and item matrices.
const size_t numUsers = max(data.row(0)) + 1;
const size_t numItems = max(data.row(1)) + 1;
- u = parameters.submat(0, 0, rank - 1, numUsers - 1);
- v = parameters.submat(0, numUsers, rank - 1, numUsers + numItems - 1);
+ // Extract user and item matrices from the optimized parameters.
+ u = parameters.submat(0, numUsers, rank - 1, numUsers + numItems - 1).t();
+ v = parameters.submat(0, 0, rank - 1, numUsers - 1);
}
}; // namespace svd
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list