[mlpack] 292/324: * changed row_col_iterator::operator-- implementation * added documentation to termination policies * minor fix of PlainSVD module

Barak A. Pearlmutter barak+git at cs.nuim.ie
Sun Aug 17 08:22:19 UTC 2014


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit 4b20f831321560ee2a690b5d014a60c12a12d56f
Author: sumedhghaisas <sumedhghaisas at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Wed Aug 6 16:52:35 2014 +0000

    * changed row_col_iterator::operator-- implementation
    * added documentation to termination policies
    * minor fix of PlainSVD module
    
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16976 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 src/mlpack/core/arma_extend/Mat_extra_meat.hpp     |  44 ++++-----
 src/mlpack/methods/amf/amf.hpp                     |   2 +-
 src/mlpack/methods/amf/amf_impl.hpp                |   6 +-
 .../complete_incremental_termination.hpp           |  70 ++++++++++----
 .../incomplete_incremental_termination.hpp         |  65 +++++++++----
 .../simple_residue_termination.hpp                 |  63 +++++++++---
 .../simple_tolerance_termination.hpp               |  68 ++++++++++++-
 .../validation_RMSE_termination.hpp                | 107 ++++++++++++++++++---
 .../amf/update_rules/svd_batch_learning.hpp        |  20 ++--
 src/mlpack/methods/cf/cf.hpp                       |   6 +-
 src/mlpack/methods/cf/cf_impl.hpp                  |  46 ++++-----
 src/mlpack/methods/cf/plain_svd.cpp                |   4 +-
 src/mlpack/tests/cf_test.cpp                       |  27 ------
 13 files changed, 375 insertions(+), 153 deletions(-)

diff --git a/src/mlpack/core/arma_extend/Mat_extra_meat.hpp b/src/mlpack/core/arma_extend/Mat_extra_meat.hpp
index 6a9b2fa..ac5242d 100644
--- a/src/mlpack/core/arma_extend/Mat_extra_meat.hpp
+++ b/src/mlpack/core/arma_extend/Mat_extra_meat.hpp
@@ -28,8 +28,8 @@ Mat<eT>::const_row_col_iterator::const_row_col_iterator(const const_row_iterator
     : M(&it.M), current_pos(&it.M(it.row, it.col)), internal_col(it.col), internal_row(it.row)
   {
   // Nothing to do.
-  }
-
+  } 
+  
 
 
 template<typename eT>
@@ -87,18 +87,18 @@ Mat<eT>::const_row_col_iterator::operator++(int)
 template<typename eT>
 inline typename Mat<eT>::const_row_col_iterator&
 Mat<eT>::const_row_col_iterator::operator--()
-  {
-  current_pos--;
-  internal_row--;
-
-  // Check to see if we moved a column.
-  if(internal_row == -1)
     {
+  if(internal_row != 0)
+    {
+    current_pos--;
+    internal_row--;
+    }
+  else if(internal_col != 0)
+    {
+    current_pos--;
     internal_col--;
     internal_row = M->n_rows - 1;
     }
-
-  return *this;
   }
 
 
@@ -176,7 +176,7 @@ Mat<eT>::const_row_col_iterator::operator==(const iterator& rhs) const
   {
   return (rhs == current_pos);
   }
-
+  
 
 
 template<typename eT>
@@ -212,7 +212,7 @@ Mat<eT>::const_row_col_iterator::operator==(const row_iterator& rhs) const
   {
   return (&rhs.M(rhs.row, rhs.col) == current_pos);
   }
-
+  
 
 
 template<typename eT>
@@ -293,18 +293,18 @@ Mat<eT>::row_col_iterator::operator++(int)
 template<typename eT>
 inline typename Mat<eT>::row_col_iterator&
 Mat<eT>::row_col_iterator::operator--()
-  {
-  current_pos--;
-  internal_row--;
-
-  // Check to see if we moved a column.
-  if(internal_row == -1)
     {
+  if(internal_row != 0)
+    {
+    current_pos--;
+    internal_row--;
+    }
+  else if(internal_col != 0)
+    {
+    current_pos--;
     internal_col--;
     internal_row = M->n_rows - 1;
     }
-
-  return *this;
   }
 
 
@@ -318,7 +318,7 @@ Mat<eT>::row_col_iterator::operator--(int)
   --(*this);
 
   return temp;
-  }
+  } 
 
 
 
@@ -327,7 +327,7 @@ inline bool
 Mat<eT>::row_col_iterator::operator==(const const_row_col_iterator& rhs) const
   {
   return (rhs.current_pos == current_pos);
-  }
+  } 
 
 
 
diff --git a/src/mlpack/methods/amf/amf.hpp b/src/mlpack/methods/amf/amf.hpp
index ef20b0b..6f7e91e 100644
--- a/src/mlpack/methods/amf/amf.hpp
+++ b/src/mlpack/methods/amf/amf.hpp
@@ -54,7 +54,7 @@ namespace amf {
  * @tparam UpdateRule The update rule for calculating W and H matrix at each
  *     iteration.
  *
- * @see NMF_MultiplicativeDistanceUpdate
+ * @see NMF_MultiplicativeDistanceUpdate, SimpleResidueTermination
  */
 template<typename TerminationPolicyType = SimpleResidueTermination,
          typename InitializationRuleType = RandomInitialization,
diff --git a/src/mlpack/methods/amf/amf_impl.hpp b/src/mlpack/methods/amf/amf_impl.hpp
index d99cf57..d27b04d 100644
--- a/src/mlpack/methods/amf/amf_impl.hpp
+++ b/src/mlpack/methods/amf/amf_impl.hpp
@@ -47,16 +47,20 @@ Apply(const MatType& V,
 
   Log::Info << "Initialized W and H." << std::endl;
 
+  // initialize the update rule
   update.Initialize(V, r);
+  // initialize the termination policy
   terminationPolicy.Initialize(V);
 
+  // check if termination conditions are met
   while (!terminationPolicy.IsConverged(W, H))
   {
     // Update the values of W and H based on the update rules provided.
     update.WUpdate(V, W, H);
     update.HUpdate(V, W, H);
   }
-
+  
+  // get final residue and iteration count from termination policy
   const double residue = terminationPolicy.Index();
   const size_t iteration = terminationPolicy.Iteration();
 
diff --git a/src/mlpack/methods/amf/termination_policies/complete_incremental_termination.hpp b/src/mlpack/methods/amf/termination_policies/complete_incremental_termination.hpp
index 7e28d0a..d3d2077 100644
--- a/src/mlpack/methods/amf/termination_policies/complete_incremental_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/complete_incremental_termination.hpp
@@ -1,11 +1,8 @@
 /**
- * @file cf.hpp
+ * @file complete_incremental_termination.hpp
  * @author Sumedh Ghaisas
  *
- * Collaborative filtering.
- *
- * Defines the CF class to perform collaborative filtering on the specified data
- * set using alternating least squares (ALS).
+ * Termination policy used in AMF (Alternating Matrix Factorization).
  */
 #ifndef _MLPACK_METHODS_AMF_COMPLETE_INCREMENTAL_TERMINATION_HPP_INCLUDED
 #define _MLPACK_METHODS_AMF_COMPLETE_INCREMENTAL_TERMINATION_HPP_INCLUDED
@@ -15,58 +12,93 @@ namespace mlpack
 namespace amf
 {
 
+/**
+ * This class acts as a wrapper for basic termination policies to be used by 
+ * SVDCompleteIncrementalLearning. This class calls the wrapped class functions
+ * after every n calls to main class functions where n is the number of non-zero
+ * entries in the matrix being factorized. 
+ *
+ * @see AMF, SVDCompleteIncrementalLearning
+ */
 template <class TerminationPolicy>
 class CompleteIncrementalTermination
 {
  public:
+  //! empty constructor
   CompleteIncrementalTermination(TerminationPolicy t_policy = TerminationPolicy())
             : t_policy(t_policy) {}
 
+  /**
+   * Initializes the termination policy before stating the factorization.
+   *
+   * @param V Input matrix to be factorized.
+   */
   template <class MatType>
   void Initialize(const MatType& V)
   {
     t_policy.Initialize(V);
 
+    //! get number of non-zero entries
     incrementalIndex = accu(V != 0);
     iteration = 0;
   }
 
+  /**
+   * Initializes the termination policy before stating the factorization.
+   *
+   * @param V Input matrix to be factorized.
+   */
   void Initialize(const arma::sp_mat& V)
   {
     t_policy.Initialize(V);
 
+    // get number of non-zero entries
     incrementalIndex = V.n_nonzero;
     iteration = 0;
   }
 
+  /**
+   * Check if termination criterio is met.
+   *
+   * @param W Basis matrix of output.
+   * @param H Encoding matrix of output.
+   */
   bool IsConverged(arma::mat& W, arma::mat& H)
   {
+    // increment iteration count
     iteration++;
+    
+    // if iteration count is multiple of incremental index,
+    // return wrapped class function
     if(iteration % incrementalIndex == 0)
       return t_policy.IsConverged(W, H);
+    // else just return false
     else return false;
   }
 
-  const double& Index()
-  {
-    return t_policy.Index();
-  }
-  const size_t& Iteration()
-  {
-    return iteration;
-  }
+  //! Get current value of residue
+  const double& Index() const { return t_policy.Index(); }
+
+  //! Get current iteration count  
+  const size_t& Iteration() const { return iteration; }
   
-  const size_t& MaxIterations()
-  {
-    return t_policy.MaxIterations();
-  }
+  //! Access upper limit of iteration count
+  const size_t& MaxIterations() const { return t_policy.MaxIterations(); }
+  size_t& MaxIterations() { return t_policy.MaxIterations(); }
+  
+  //! Access the wrapped class object
+  const TerminationPolicy& TPolicy() const { return t_policy; }
+  TerminationPolicy& TPolicy() { return t_policy; }
 
  private:
+  //! wrapped class object
   TerminationPolicy t_policy;
-
+  
+  //! number of iterations after which wrapped class object will be called
   size_t incrementalIndex;
+  //! current iteration count
   size_t iteration;
-};
+}; // class CompleteIncrementalTermination
 
 } // namespace amf
 } // namespace mlpack
diff --git a/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp b/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
index d53b8b7..a6cd415 100644
--- a/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
@@ -1,61 +1,92 @@
 /**
  * @file incomplete_incremental_termination.hpp
  * @author Sumedh Ghaisas
+ *
+ * Termination policy used in AMF (Alternating Matrix Factorization).
  */
-#ifndef _INCOMPLETE_INCREMENTAL_TERMINATION_HPP_INCLUDED
-#define _INCOMPLETE_INCREMENTAL_TERMINATION_HPP_INCLUDED
+#ifndef _MLPACK_METHODS_AMF_INCOMPLETEINCREMENTALTERMINATION_HPP_INCLUDED
+#define _MLPACK_METHODS_AMF_INCOMPLETEINCREMENTALTERMINATION_HPP_INCLUDED
 
 #include <mlpack/core.hpp>
 
 namespace mlpack {
 namespace amf {
 
+/**
+ * This class acts as a wrapper for basic termination policies to be used by 
+ * SVDIncompleteIncrementalLearning. This class calls the wrapped class functions
+ * after every n calls to main class functions where n is the number of rows. 
+ *
+ * @see AMF, SVDIncompleteIncrementalLearning
+ */
 template <class TerminationPolicy>
 class IncompleteIncrementalTermination
 {
  public:
+  //! empty constructor
   IncompleteIncrementalTermination(TerminationPolicy t_policy = TerminationPolicy())
             : t_policy(t_policy) {}
 
+  /**
+   * Initializes the termination policy before stating the factorization.
+   *
+   * @param V Input matrix to be factorized.
+   */
   template <class MatType>
   void Initialize(const MatType& V)
   {
     t_policy.Initialize(V);
-
+    
+    // initialize incremental index to number of rows
     incrementalIndex = V.n_rows;
     iteration = 0;
   }
 
+  /**
+   * Check if termination criterio is met.
+   *
+   * @param W Basis matrix of output.
+   * @param H Encoding matrix of output.
+   */
   bool IsConverged(arma::mat& W, arma::mat& H)
   {
+    // increment iteration count
     iteration++;
+    
+    // if iteration count is multiple of incremental index,
+    // return wrapped class function
     if(iteration % incrementalIndex == 0)  
       return t_policy.IsConverged(W, H);
+    // else just return false
     else return false;
   }
 
-  const double& Index()
-  {
-    return t_policy.Index();
-  }
-  const size_t& Iteration()
-  {
-    return iteration;
-  }
-  const size_t& MaxIterations()
-  {
-    return t_policy.MaxIterations();
-  }
+  //! Get current value of residue
+  const double& Index() const { return t_policy.Index(); }
+
+  //! Get current iteration count  
+  const size_t& Iteration() const { return iteration; }
+  
+  //! Access upper limit of iteration count
+  const size_t& MaxIterations() const { return t_policy.MaxIterations(); }
+  size_t& MaxIterations() { return t_policy.MaxIterations(); }
+  
+  //! Access the wrapped class object
+  const TerminationPolicy& TPolicy() const { return t_policy; }
+  TerminationPolicy& TPolicy() { return t_policy; }
   
  private:
+  //! wrapped class object
   TerminationPolicy t_policy;
 
+  //! number of iterations after which wrapped class object will be called
   size_t incrementalIndex;
+  //! current iteration count
   size_t iteration;
-};
+}; // class IncompleteIncrementalTermination
 
 }; // namespace amf
 }; // namespace mlpack
 
-#endif
+#endif // _MLPACK_METHODS_AMF_INCOMPLETEINCREMENTALTERMINATION_HPP_INCLUDED
 
diff --git a/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp b/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
index 3e5f7b8..66d7930 100644
--- a/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
@@ -1,6 +1,8 @@
 /**
  * @file simple_residue_termination.hpp
  * @author Sumedh Ghaisas
+ *
+ * Termination policy used in AMF (Alternating Matrix Factorization).
  */
 #ifndef _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
 #define _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
@@ -10,58 +12,97 @@
 namespace mlpack {
 namespace amf {
 
+/**
+ * This class implements simple residue based termination policy. Termination 
+ * decision depends on two factors, value of residue and number of iteration. 
+ * If the current value of residue drops below the threshold or the number of 
+ * iterations goes above the threshold, positive termination signal is passed 
+ * to AMF.
+ *
+ * @see AMF
+ */
 class SimpleResidueTermination
 {
  public:
+  //! empty constructor
   SimpleResidueTermination(const double minResidue = 1e-10,
                            const size_t maxIterations = 10000)
         : minResidue(minResidue), maxIterations(maxIterations) { }
 
+  /**
+   * Initializes the termination policy before stating the factorization.
+   *
+   * @param V Input matrix being factorized.
+   */
   template<typename MatType>
   void Initialize(const MatType& V)
   {
+    // set resisue to minimum value
     residue = minResidue;
+    // set iteration to minimum value
     iteration = 1;
+    // remove history
     normOld = 0;
 
+    // initialize required variables
     const size_t n = V.n_rows;
     const size_t m = V.n_cols;
-
     nm = n * m;
   }
 
+  /**
+   * Check if termination criterio is met.
+   *
+   * @param W Basis matrix of output.
+   * @param H Encoding matrix of output.
+   */
   bool IsConverged(arma::mat& W, arma::mat& H)
   {
     // Calculate norm of WH after each iteration.
     arma::mat WH;
 
+    // calculate the norm and compute the residue 
     WH = W * H;
     double norm = sqrt(accu(WH % WH) / nm);
+    residue = fabs(normOld - norm);
+    residue /= normOld;
 
-    if (iteration != 0)
-    {
-      residue = fabs(normOld - norm);
-      residue /= normOld;
-    }
-
+    // store the residue into history
     normOld = norm;
-
+    
+    // increment iteration count
     iteration++;
     
+    // check if termination criterion is met
     if(residue < minResidue || iteration > maxIterations) return true;
     else return false;
   }
 
-  const double& Index() { return residue; }
-  const size_t& Iteration() { return iteration; }
-  const size_t& MaxIterations() { return maxIterations; }
+  //! Get current value of residue
+  const double& Index() const { return residue; }
+
+  //! Get current iteration count  
+  const size_t& Iteration() const { return iteration; }
+  
+  //! Access max iteration count
+  const size_t& MaxIterations() const { return maxIterations; }
+  size_t& MaxIterations() { return maxIterations; }
+  
+  //! Access minimum residue value
+  const double& MinResidue() const { return minResidue; }
+  double& MinResidue() { return minResidue; }
 
 public:
+  //! residue threshold
   double minResidue;
+  //! iteration threshold
   size_t maxIterations;
 
+  //! current value of residue
   double residue;
+  //! current iteration count
   size_t iteration;
+  //! norm of previous iteration
   double normOld;
 
   size_t nm;
diff --git a/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp b/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
index 18fef4d..3c12e21 100644
--- a/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
@@ -1,6 +1,8 @@
 /**
  * @file simple_tolerance_termination.hpp
  * @author Sumedh Ghaisas
+ *
+ * Termination policy used in AMF (Alternating Matrix Factorization).
  */
 #ifndef _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
 #define _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
@@ -10,10 +12,21 @@
 namespace mlpack {
 namespace amf {
 
+/**
+ * This class implements residue tolerance termination policy. Termination 
+ * criterion is met when increase in residue value drops below the given tolerance.
+ * To accomodate spikes certain number of successive residue drops are accepted.
+ * This upper imit on successive drops can be adjusted with reverseStepCount.
+ * Secondary termination criterion terminates algorithm when iteration count 
+ * goes above the threshold. 
+ *
+ * @see AMF
+ */
 template <class MatType>
 class SimpleToleranceTermination
 {
  public:
+  //! empty constructor
   SimpleToleranceTermination(const double tolerance = 1e-5,
                              const size_t maxIterations = 10000,
                              const size_t reverseStepTolerance = 3)
@@ -21,6 +34,11 @@ class SimpleToleranceTermination
               maxIterations(maxIterations),
               reverseStepTolerance(reverseStepTolerance) {}
 
+  /**
+   * Initializes the termination policy before stating the factorization.
+   *
+   * @param V Input matrix to be factorized.
+   */
   void Initialize(const MatType& V)
   {
     residueOld = DBL_MAX;
@@ -36,13 +54,19 @@ class SimpleToleranceTermination
     reverseStepCount = 0;
   }
 
+  /**
+   * Check if termination criterio is met.
+   *
+   * @param W Basis matrix of output.
+   * @param H Encoding matrix of output.
+   */
   bool IsConverged(arma::mat& W, arma::mat& H)
   {
-    // Calculate norm of WH after each iteration.
     arma::mat WH;
 
     WH = W * H;
 
+    // compute residue
     residueOld = residue;
     size_t n = V->n_rows;
     size_t m = V->n_cols;
@@ -65,31 +89,43 @@ class SimpleToleranceTermination
     residue = sum / count;
     residue = sqrt(residue);
 
+    // increment iteration count
     iteration++;  
-  
+    
+    // if residue tolerance is not satisfied
     if((residueOld - residue) / residueOld < tolerance && iteration > 4)
     {
+      // check if this is a first of successive drops
       if(reverseStepCount == 0 && isCopy == false)
       {
+        // store a copy of W and H matrix
         isCopy = true;
         this->W = W;
         this->H = H;
+        // store residue values
         c_index = residue;
         c_indexOld = residueOld;
       }
+      // increase successive drop count
       reverseStepCount++;
     }
+    // if tolerance is satisfied
     else
     {
+      // initialize successive drop count
       reverseStepCount = 0;
+      // if residue is droped below minimum scrap stored values
       if(residue <= c_indexOld && isCopy == true)
       {
         isCopy = false;
       }
     }
 
+    // check if termination criterion is met
     if(reverseStepCount == reverseStepTolerance || iteration > maxIterations)
     {
+      // if stored values are present replace them with current value as they
+      // represent the minimum residue point
       if(isCopy)
       {
         W = this->W;
@@ -101,25 +137,47 @@ class SimpleToleranceTermination
     else return false;
   }
 
-  const double& Index() { return residue; }
-  const size_t& Iteration() { return iteration; }
-  const size_t& MaxIterations() { return maxIterations; }
+  //! Get current value of residue
+  const double& Index() const { return residue; }
+
+  //! Get current iteration count  
+  const size_t& Iteration() const { return iteration; }
+  
+  //! Access upper limit of iteration count
+  const size_t& MaxIterations() const { return maxIterations; }
+  size_t& MaxIterations() { return maxIterations; }
+  
+  //! Access tolerance value
+  const double& Tolerance() const { return tolerance; }
+  double& Tolerance() { return tolerance; }
 
  private:
+  //! tolerance 
   double tolerance;
+  //! iteration threshold
   size_t maxIterations;
 
+  //! pointer to matrix being factorized
   const MatType* V;
 
+  //! current iteration count
   size_t iteration;
+  
+  //! residue values
   double residueOld;
   double residue;
   double normOld;
 
+  //! tolerance on successive residue drops
   size_t reverseStepTolerance;
+  //! successive residue drops 
   size_t reverseStepCount;
   
+  //! indicates whether a copy of information is available which corresponds to
+  //! minimum residue point
   bool isCopy;
+  
+  //! variables to store information of minimum residue point
   arma::mat W;
   arma::mat H;
   double c_indexOld;
diff --git a/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp b/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
index f63be80..844c151 100644
--- a/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
@@ -1,5 +1,11 @@
-#ifndef VALIDATION_RMSE_TERMINATION_HPP_INCLUDED
-#define VALIDATION_RMSE_TERMINATION_HPP_INCLUDED
+/**
+ * @file validation_RMSE_termination.hpp
+ * @author Sumedh Ghaisas
+ *
+ * Termination policy used in AMF (Alternating Matrix Factorization).
+ */
+#ifndef _MLPACK_METHODS_AMF_VALIDATIONRMSETERMINATION_HPP_INCLUDED
+#define _MLPACK_METHODS_AMF_VALIDATIONRMSETERMINATION_HPP_INCLUDED
 
 #include <mlpack/core.hpp>
 
@@ -7,10 +13,34 @@ namespace mlpack
 {
 namespace amf
 {
+
+/**
+ * This class implements validation termination policy based on RMSE index. 
+ * The input data matrix is divided into 2 sets, training set and validation set.
+ * Entries of validation set are nullifed in the input matrix. Termination 
+ * criterion is met when increase in validation set RMSe value drops below the 
+ * given tolerance. To accomodate spikes certain number of successive validation 
+ * RMSE drops are accepted. This upper imit on successive drops can be adjusted 
+ * with reverseStepCount. Secondary termination criterion terminates algorithm 
+ * when iteration count goes above the threshold. 
+ *
+ * @note The input matrix is modified by this termination policy.
+ *
+ * @see AMF
+ */
 template <class MatType>
 class ValidationRMSETermination
 {
  public:
+  /**
+   * Create a validation set according to given parameters and nullifies this 
+   * set in data matrix(training set). 
+   *
+   * @param V Input matrix to be factorized.
+   * @param num_test_points number of validation test points
+   * @param maxIterations max iteration count before termination
+   * @param reverseStepTolerance max successive RMSE drops allowed
+   */
   ValidationRMSETermination(MatType& V,
                             size_t num_test_points,
                             double tolerance = 1e-5,
@@ -24,26 +54,38 @@ class ValidationRMSETermination
     size_t n = V.n_rows;
     size_t m = V.n_cols;
 
+    // initialize validation set matrix
     test_points.zeros(num_test_points, 3);
-
+    
+    // fill validation set matrix with random chosen entries
     for(size_t i = 0; i < num_test_points; i++)
     {
       double t_val;
       size_t t_row;
       size_t t_col;
+      
+      // pick a random non-zero entry
       do
       {
         t_row = rand() % n;
         t_col = rand() % m;
       } while((t_val = V(t_row, t_col)) == 0);
 
+      // add the entry to the validation set
       test_points(i, 0) = t_row;
       test_points(i, 1) = t_col;
       test_points(i, 2) = t_val;
+      
+      // nullify the added entry from data matrix (training set)
       V(t_row, t_col) = 0;
     }
   }
-
+  
+  /**
+   * Initializes the termination policy before stating the factorization.
+   *
+   * @param V Input matrix to be factorized.
+   */
   void Initialize(const MatType& /* V */)
   {
     iteration = 1;
@@ -58,13 +100,19 @@ class ValidationRMSETermination
     isCopy = false;
   }
 
+  /**
+   * Check if termination criterio is met.
+   *
+   * @param W Basis matrix of output.
+   * @param H Encoding matrix of output.
+   */
   bool IsConverged(arma::mat& W, arma::mat& H)
   {
-    // Calculate norm of WH after each iteration.
     arma::mat WH;
 
     WH = W * H;
 
+    // compute validation RMSE
     if (iteration != 0)
     {
       rmseOld = rmse;
@@ -82,31 +130,43 @@ class ValidationRMSETermination
       rmse = sqrt(rmse);
     }
 
+    // increment iteration count
     iteration++;
   
+    // if RMSE tolerance is not satisfied
     if((rmseOld - rmse) / rmseOld < tolerance && iteration > 4)
     {
+      // check if this is a first of successive drops
       if(reverseStepCount == 0 && isCopy == false)
       {
+        // store a copy of W and H matrix
         isCopy = true;
         this->W = W;
         this->H = H;
+        // store residue values
         c_indexOld = rmseOld;
         c_index = rmse;
       }
+      // increase successive drop count
       reverseStepCount++;
     }
+    // if tolerance is satisfied
     else
     {
+      // initialize successive drop count
       reverseStepCount = 0;
+      // if residue is droped below minimum scrap stored values
       if(rmse <= c_indexOld && isCopy == true)
       {
         isCopy = false;
       }
     }
 
+    // check if termination criterion is met
     if(reverseStepCount == reverseStepTolerance || iteration > maxIterations)
     {
+      // if stored values are present replace them with current value as they
+      // represent the minimum residue point
       if(isCopy)
       {
         W = this->W;
@@ -118,36 +178,59 @@ class ValidationRMSETermination
     else return false;
   }
   
-  const double& Index() { return rmse; }
+  //! Get current value of residue
+  const double& Index() const { return rmse; }
 
-  const size_t& Iteration() { return iteration; }
+  //! Get current iteration count  
+  const size_t& Iteration() const { return iteration; }
+  
+  //! Get number of validation points
+  const size_t& NumTestPoints() const { return num_test_points; }
   
-  const size_t& MaxIterations() { return maxIterations; }
+  //! Access upper limit of iteration count
+  const size_t& MaxIterations() const { return maxIterations; }
+  size_t& MaxIterations() { return maxIterations; }
+  
+  //! Access tolerance value
+  const double& Tolerance() const { return tolerance; }
+  double& Tolerance() { return tolerance; }
 
  private:
+  //! tolerance
   double tolerance;
+  //! max iteration limit
   size_t maxIterations;
+  //! number of validation test points
   size_t num_test_points;
+  
+  //! current iteration count
   size_t iteration;
 
-  arma::Mat<double> test_points;
+  //! validation point matrix
+  arma::mat test_points;
 
+  //! rmse values
   double rmseOld;
   double rmse;
 
+  //! tolerance on successive residue drops
   size_t reverseStepTolerance;
+  //! successive residue drops 
   size_t reverseStepCount;
   
+  //! indicates whether a copy of information is available which corresponds to
+  //! minimum residue point
   bool isCopy;
+  
+  //! variables to store information of minimum residue point
   arma::mat W;
   arma::mat H;
   double c_indexOld;
   double c_index;
-};
+}; // class ValidationRMSETermination
 
 } // namespace amf
 } // namespace mlpack
 
 
-#endif // VALIDATION_RMSE_TERMINATION_HPP_INCLUDED
-
+#endif // _MLPACK_METHODS_AMF_VALIDATIONRMSETERMINATION_HPP_INCLUDED
diff --git a/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp b/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
index bb31e9e..84e41ce 100644
--- a/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
+++ b/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
@@ -1,6 +1,8 @@
 /**
- * @file simple_residue_termination.hpp
+ * @file svd_batch_learning.hpp
  * @author Sumedh Ghaisas
+ *
+ * SVD factorization used in AMF (Alternating Matrix Factorization).
  */
 #ifndef __MLPACK_METHODS_AMF_UPDATE_RULES_SVD_BATCHLEARNING_HPP
 #define __MLPACK_METHODS_AMF_UPDATE_RULES_SVD_BATCHLEARNING_HPP
@@ -14,13 +16,19 @@ namespace amf
 class SVDBatchLearning
 {
  public:
+  /**
+   * SVD Batch learning constructor. 
+   *
+   * @param u step value used in batch learning
+   * @param kw regularization constant for W matrix
+   * @param kh regularization constant for H matrix
+   * @param momentum momentum applied to batch learning process
+   */
   SVDBatchLearning(double u = 0.0002,
                    double kw = 0,
                    double kh = 0,
-                   double momentum = 0.9,
-                   double min = -DBL_MIN,
-                   double max = DBL_MAX)
-        : u(u), kw(kw), kh(kh), min(min), max(max), momentum(momentum)
+                   double momentum = 0.9)
+        : u(u), kw(kw), kh(kh), momentum(momentum)
     {}
 
   template<typename MatType>
@@ -117,8 +125,6 @@ class SVDBatchLearning
   double u;
   double kw;
   double kh;
-  double min;
-  double max;
   double momentum;
 
   arma::mat mW;
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index bbb8883..500ed10 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -117,8 +117,6 @@ class CF
   const arma::mat& H() const { return h; }
   //! Get the Rating Matrix.
   const arma::mat& Rating() const { return rating; }
-  //! Get the data matrix.
-  const arma::mat& Data() const { return data; }
   //! Get the cleaned data matrix.
   const arma::sp_mat& CleanedData() const { return cleanedData; }
 
@@ -148,8 +146,6 @@ class CF
   std::string ToString() const;
 
  private:
-  //! Initial data matrix.
-  arma::mat data;
   //! Number of users for similarity.
   size_t numUsersForSimilarity;
   //! Rank used for matrix factorization.
@@ -165,7 +161,7 @@ class CF
   //! Cleaned data matrix.
   arma::sp_mat cleanedData;
   //! Converts the User, Item, Value Matrix to User-Item Table
-  void CleanData();
+  void CleanData(const arma::mat& data);
 
   /**
    * Helper function to insert a point into the recommendation matrices.
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 69419fe..80389bf 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -6,7 +6,6 @@
  *
  * Implementation of CF class to perform Collaborative Filtering on the
  * specified data set.
- *
  */
 
 namespace mlpack {
@@ -19,7 +18,6 @@ template<typename FactorizerType>
 CF<FactorizerType>::CF(arma::mat& data,
                        const size_t numUsersForSimilarity,
                        const size_t rank) :
-    data(data),
     numUsersForSimilarity(numUsersForSimilarity),
     rank(rank),
     factorizer()
@@ -33,7 +31,26 @@ CF<FactorizerType>::CF(arma::mat& data,
     this->numUsersForSimilarity = 5;
   }
 
-  CleanData();
+  CleanData(data);
+
+  // Check if the user wanted us to choose a rank for them.
+  if (rank == 0)
+  {
+    // This is a simple heuristic that picks a rank based on the density of the
+    // dataset between 5 and 105.
+    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
+    const size_t rankEstimate = size_t(density) + 5;
+
+    // Set to heuristic value.
+    Log::Info << "No rank given for decomposition; using rank of "
+        << rankEstimate << " calculated by density-based heuristic."
+        << std::endl;
+    this->rank = rankEstimate;
+  }
+
+  // Operations independent of the query:
+  // Decompose the sparse data matrix to user and data matrices.
+  factorizer.Apply(cleanedData, this->rank, w, h);
 }
 
 template<typename FactorizerType>
@@ -56,27 +73,6 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
                                             arma::Mat<size_t>& recommendations,
                                             arma::Col<size_t>& users)
 {
-  // Base function for calculating recommendations.
-
-  // Check if the user wanted us to choose a rank for them.
-  if (rank == 0)
-  {
-    // This is a simple heuristic that picks a rank based on the density of the
-    // dataset between 5 and 105.
-    const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
-    const size_t rankEstimate = size_t(density) + 5;
-
-    // Set to heuristic value.
-    Log::Info << "No rank given for decomposition; using rank of "
-        << rankEstimate << " calculated by density-based heuristic."
-        << std::endl;
-    rank = rankEstimate;
-  }
-
-  // Operations independent of the query:
-  // Decompose the sparse data matrix to user and data matrices.
-  factorizer.Apply(cleanedData, rank, w, h);
-
   // Generate new table by multiplying approximate values.
   rating = w * h;
 
@@ -156,7 +152,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
 }
 
 template<typename FactorizerType>
-void CF<FactorizerType>::CleanData()
+void CF<FactorizerType>::CleanData(const arma::mat& data)
 {
   // Generate list of locations for batch insert constructor for sparse
   // matrices.
diff --git a/src/mlpack/methods/cf/plain_svd.cpp b/src/mlpack/methods/cf/plain_svd.cpp
index d52bbcd..8495fe9 100644
--- a/src/mlpack/methods/cf/plain_svd.cpp
+++ b/src/mlpack/methods/cf/plain_svd.cpp
@@ -55,8 +55,10 @@ double PlainSVD::Apply(const arma::mat& V,
   sigma = sigma.subvec(0, r - 1);
 
   W = W * arma::diagmat(sigma);
+  
+  H = arma::trans(H);
 
-  arma::mat V_rec = W * arma::trans(H);
+  arma::mat V_rec = W * H;
 
   size_t n = V.n_rows;
   size_t m = V.n_cols;
diff --git a/src/mlpack/tests/cf_test.cpp b/src/mlpack/tests/cf_test.cpp
index 5744721..6b4d41e 100644
--- a/src/mlpack/tests/cf_test.cpp
+++ b/src/mlpack/tests/cf_test.cpp
@@ -19,33 +19,6 @@ using namespace mlpack::cf;
 using namespace std;
 
 /**
- * Make sure that the constructor works okay.
- */
-BOOST_AUTO_TEST_CASE(CFConstructorTest)
-{
-  // Load GroupLens data.
-  arma::mat dataset;
-  data::Load("GroupLens100k.csv", dataset);
-
-  // Number of users for similarity (not the default).
-  const size_t numUsersForSimilarity = 8;
-
-  CF<> c(dataset, numUsersForSimilarity);
-
-  // Check parameters.
-  BOOST_REQUIRE_EQUAL(c.NumUsersForSimilarity(), numUsersForSimilarity);
-
-  // Check data.
-  BOOST_REQUIRE_EQUAL(c.Data().n_rows, dataset.n_rows);
-  BOOST_REQUIRE_EQUAL(c.Data().n_cols, dataset.n_cols);
-
-  // Check values (this should be superfluous...).
-  for (size_t i = 0; i < dataset.n_rows; i++)
-    for (size_t j = 0; j < dataset.n_cols; j++)
-      BOOST_REQUIRE_EQUAL(c.Data()(i, j), dataset(i, j));
-}
-
-/**
  * Make sure that correct number of recommendations are generated when query
  * set. Default case.
  */

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list