[mlpack] 324/324: Made Reg SVD work with CF.

Barak A. Pearlmutter barak+git at cs.nuim.ie
Sun Aug 17 08:22:22 UTC 2014


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit 610127221eb64f8f25f317caf483e0ae081fd4e4
Author: siddharth.950 <siddharth.950 at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Sat Aug 16 18:53:31 2014 +0000

    Made Reg SVD work with CF.
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@17044 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 src/mlpack/methods/cf/cf.hpp                       | 41 ++++++++++++
 src/mlpack/methods/cf/cf_impl.hpp                  | 75 +++++++++++++++++++++-
 .../methods/regularized_svd/regularized_svd.hpp    | 48 +++++++++-----
 .../regularized_svd/regularized_svd_impl.hpp       | 39 ++++++-----
 4 files changed, 166 insertions(+), 37 deletions(-)

diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index f8321ab..edca85f 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -24,6 +24,24 @@ namespace mlpack {
 namespace cf /** Collaborative filtering. */ {
 
 /**
+ * Template class for factorizer traits. This stores the default values for the
+ * variables to be assumed for a given factorizer. If any of the factorizers
+ * needs to have a different value for the traits, a template specialization has
+ * be wriiten for that factorizer. An example can be found in the module for
+ * Regularized SVD.
+ */
+template<typename FactorizerType>
+class FactorizerTraits
+{
+ public:
+  /**
+   * If true, then the passed data matrix is used for factorizer.Apply().
+   * Otherwise, it is modified into a form suitable for factorization.
+   */
+  static const bool IsCleaned = false;
+};
+
+/**
  * This class implements Collaborative Filtering (CF). This implementation
  * presently supports Alternating Least Squares (ALS) for collaborative
  * filtering.
@@ -73,6 +91,29 @@ class CF
   CF(arma::mat& data,
      const size_t numUsersForSimilarity = 5,
      const size_t rank = 0);
+  
+  /**
+   * Initialize the CF object using an instantiated factorizer. Store a
+   * reference to the data that we will be using. There are parameters that can
+   * be set; default values are provided for each of them. If the rank is left
+   * unset (or is set to 0), a simple density-based heuristic will be used to
+   * choose a rank.
+   *
+   * @param data Initial (user, item, rating) matrix.
+   * @param factorizer Instantiated factorizer object.
+   * @param numUsersForSimilarity Size of the neighborhood.
+   * @param rank Rank parameter for matrix factorization.
+   */
+  CF(arma::mat& data,
+     FactorizerType& factorizer,
+     const size_t numUsersForSimilarity = 5,
+     const size_t rank = 0);
+   
+  /*void ApplyFactorizer(arma::mat& data, const typename boost::enable_if_c<
+      FactorizerTraits<FactorizerType>::IsCleaned == false, int*>::type);
+      
+  void ApplyFactorizer(arma::mat& data, const typename boost::enable_if_c<
+      FactorizerTraits<FactorizerType>::IsCleaned == true, int*>::type);*/
 
   //! Sets number of users for calculating similarity.
   void NumUsersForSimilarity(const size_t num)
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 65cb78d..18403a3 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -12,6 +12,32 @@
 namespace mlpack {
 namespace cf {
 
+template<typename FactorizerType>
+void ApplyFactorizer(arma::mat& data,
+    arma::sp_mat& cleanedData,
+    FactorizerType& factorizer,
+    const size_t rank,
+    arma::mat& w,
+    arma::mat& h,
+    const typename boost::enable_if_c<
+    FactorizerTraits<FactorizerType>::IsCleaned == false, int*>::type = 0)
+{
+  factorizer.Apply(cleanedData, rank, w, h);
+}
+
+template<typename FactorizerType>
+void ApplyFactorizer(arma::mat& data,
+    arma::sp_mat& cleanedData,
+    FactorizerType& factorizer,
+    const size_t rank,
+    arma::mat& w,
+    arma::mat& h,
+    const typename boost::enable_if_c<
+    FactorizerTraits<FactorizerType>::IsCleaned == true, int*>::type = 0)
+{
+  factorizer.Apply(data, rank, w, h);
+}
+
 /**
  * Construct the CF object.
  */
@@ -24,7 +50,50 @@ CF<FactorizerType>::CF(arma::mat& data,
     factorizer()
 {
   // Validate neighbourhood size.
-  if (numUsersForSimilarity < 1)
+  if(numUsersForSimilarity < 1)
+  {
+    Log::Warn << "CF::CF(): neighbourhood size should be > 0("
+        << numUsersForSimilarity << " given). Setting value to 5.\n";
+    //Setting Default Value of 5
+    this->numUsersForSimilarity = 5;
+  }
+
+  CleanData(data);
+
+  // Check if the user wanted us to choose a rank for them.
+  if(rank == 0)
+  {
+    // This is a simple heuristic that picks a rank based on the density of the
+    // dataset between 5 and 105.
+    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
+    const size_t rankEstimate = size_t(density) + 5;
+
+    // Set to heuristic value.
+    Log::Info << "No rank given for decomposition; using rank of "
+        << rankEstimate << " calculated by density-based heuristic."
+        << std::endl;
+    this->rank = rankEstimate;
+  }
+
+  // Operations independent of the query:
+  // Decompose the sparse data matrix to user and data matrices.
+  ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
+}
+
+/**
+ * Construct the CF object using an instantiated factorizer.
+ */
+template<typename FactorizerType>
+CF<FactorizerType>::CF(arma::mat& data,
+                       FactorizerType& factorizer,
+                       const size_t numUsersForSimilarity,
+                       const size_t rank) :
+    numUsersForSimilarity(numUsersForSimilarity),
+    rank(rank),
+    factorizer(factorizer)
+{
+  // Validate neighbourhood size.
+  if(numUsersForSimilarity < 1)
   {
     Log::Warn << "CF::CF(): neighbourhood size should be > 0("
         << numUsersForSimilarity << " given). Setting value to 5.\n";
@@ -35,7 +104,7 @@ CF<FactorizerType>::CF(arma::mat& data,
   CleanData(data);
 
   // Check if the user wanted us to choose a rank for them.
-  if (rank == 0)
+  if(rank == 0)
   {
     // This is a simple heuristic that picks a rank based on the density of the
     // dataset between 5 and 105.
@@ -51,7 +120,7 @@ CF<FactorizerType>::CF(arma::mat& data,
 
   // Operations independent of the query:
   // Decompose the sparse data matrix to user and data matrices.
-  factorizer.Apply(cleanedData, this->rank, w, h);
+  ApplyFactorizer<FactorizerType>(data, cleanedData, factorizer, this->rank, w, h);
 }
 
 template<typename FactorizerType>
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd.hpp b/src/mlpack/methods/regularized_svd/regularized_svd.hpp
index 3af7921..a602540 100644
--- a/src/mlpack/methods/regularized_svd/regularized_svd.hpp
+++ b/src/mlpack/methods/regularized_svd/regularized_svd.hpp
@@ -10,6 +10,7 @@
 
 #include <mlpack/core.hpp>
 #include <mlpack/core/optimizers/sgd/sgd.hpp>
+#include <mlpack/methods/cf/cf.hpp>
 
 #include "regularized_svd_function.hpp"
 
@@ -29,41 +30,54 @@ class RegularizedSVD
    * RegularizedSVDFunction for optimization. It uses the SGD optimizer by
    * default. The optimizer uses a template specialization of Optimize().
    *
-   * @param data Dataset for which SVD is calculated.
-   * @param u User matrix in the matrix decomposition.
-   * @param v Item matrix in the matrix decomposition.
-   * @param rank Rank used for matrix factorization.
    * @param iterations Number of optimization iterations.
+   * @param alpha Learning rate for the SGD optimizer.
    * @param lambda Regularization parameter for the optimization.
    */
-  RegularizedSVD(const arma::mat& data,
-                 arma::mat& u,
-                 arma::mat& v,
-                 const size_t rank,
-                 const size_t iterations = 10,
+  RegularizedSVD(const size_t iterations = 10,
                  const double alpha = 0.01,
                  const double lambda = 0.02);
+  
+  /**
+   * Obtains the user and item matrices using the provided data and rank.
+   *
+   * @param data Rating data matrix.
+   * @param rank Rank parameter to be used for optimization.
+   * @param u Item matrix obtained on decomposition.
+   * @param v User matrix obtained on decomposition.
+   */
+  void Apply(const arma::mat& data,
+             const size_t rank,
+             arma::mat& u,
+             arma::mat& v);
                  
  private:
-  //! Rating data.
-  const arma::mat& data;
-  //! Rank used for matrix factorization.
-  size_t rank;
   //! Number of optimization iterations.
   size_t iterations;
   //! Learning rate for the SGD optimizer.
   double alpha;
   //! Regularization parameter for the optimization.
   double lambda;
-  //! Function that will be held by the optimizer.
-  RegularizedSVDFunction rSVDFunc;
-  //! Default SGD optimizer for the class.
-  mlpack::optimization::SGD<RegularizedSVDFunction> optimizer;
 };
 
 }; // namespace svd
 }; // namespace mlpack
 
+namespace mlpack {
+namespace cf {
+
+//! Factorizer traits of Regularized SVD.
+template<>
+class FactorizerTraits<mlpack::svd::RegularizedSVD<> >
+{
+ public:
+  //! Data provided to RegularizedSVD need not be cleaned.
+  static const bool IsCleaned = true;
+};
+
+}; // namespace cf
+}; // namespace mlpack
+
 // Include implementation.
 #include "regularized_svd_impl.hpp"
 
diff --git a/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp b/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp
index d88e678..7c7fbfc 100644
--- a/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp
+++ b/src/mlpack/methods/regularized_svd/regularized_svd_impl.hpp
@@ -12,33 +12,38 @@ namespace mlpack {
 namespace svd {
 
 template<template<typename> class OptimizerType>
-RegularizedSVD<OptimizerType>::RegularizedSVD(const arma::mat& data,
-                                              arma::mat& u,
-                                              arma::mat& v,
-                                              const size_t rank,
-                                              const size_t iterations,
+RegularizedSVD<OptimizerType>::RegularizedSVD(const size_t iterations,
                                               const double alpha,
                                               const double lambda) :
-    data(data),
-    rank(rank),
     iterations(iterations),
     alpha(alpha),
-    lambda(lambda),
-    rSVDFunc(data, rank, lambda),
-    optimizer(rSVDFunc, alpha, iterations * data.n_cols)
+    lambda(lambda)
 {
-  arma::mat parameters = rSVDFunc.GetInitialPoint();
+  // Nothing to do.
+}
 
-  // Train the model.
-  Timer::Start("regularized_svd_optimization");
-  const double out = optimizer.Optimize(parameters);
-  Timer::Stop("regularized_svd_optimization");
+template<template<typename> class OptimizerType>
+void RegularizedSVD<OptimizerType>::Apply(const arma::mat& data,
+                                          const size_t rank,
+                                          arma::mat& u,
+                                          arma::mat& v)
+{
+  // Make the optimizer object using a RegularizedSVDFunction object.
+  RegularizedSVDFunction rSVDFunc(data, rank, lambda);
+  mlpack::optimization::SGD<RegularizedSVDFunction> optimizer(rSVDFunc, alpha,
+      iterations * data.n_cols);
+  
+  // Get optimized parameters.
+  arma::mat parameters = rSVDFunc.GetInitialPoint();
+  optimizer.Optimize(parameters);
   
+  // Constants for extracting user and item matrices.
   const size_t numUsers = max(data.row(0)) + 1;
   const size_t numItems = max(data.row(1)) + 1;
   
-  u = parameters.submat(0, 0, rank - 1, numUsers - 1);
-  v = parameters.submat(0, numUsers, rank - 1, numUsers + numItems - 1);
+  // Extract user and item matrices from the optimized parameters.
+  u = parameters.submat(0, numUsers, rank - 1, numUsers + numItems - 1).t();
+  v = parameters.submat(0, 0, rank - 1, numUsers - 1);
 }
 
 }; // namespace svd

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list