[mlpack] 292/324: * changed row_col_iterator::operator-- implementation * added documentation to termination policies * minor fix of PlainSVD module
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:19 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 4b20f831321560ee2a690b5d014a60c12a12d56f
Author: sumedhghaisas <sumedhghaisas at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Wed Aug 6 16:52:35 2014 +0000
* changed row_col_iterator::operator-- implementation
* added documentation to termination policies
* minor fix of PlainSVD module
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16976 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/core/arma_extend/Mat_extra_meat.hpp | 44 ++++-----
src/mlpack/methods/amf/amf.hpp | 2 +-
src/mlpack/methods/amf/amf_impl.hpp | 6 +-
.../complete_incremental_termination.hpp | 70 ++++++++++----
.../incomplete_incremental_termination.hpp | 65 +++++++++----
.../simple_residue_termination.hpp | 63 +++++++++---
.../simple_tolerance_termination.hpp | 68 ++++++++++++-
.../validation_RMSE_termination.hpp | 107 ++++++++++++++++++---
.../amf/update_rules/svd_batch_learning.hpp | 20 ++--
src/mlpack/methods/cf/cf.hpp | 6 +-
src/mlpack/methods/cf/cf_impl.hpp | 46 ++++-----
src/mlpack/methods/cf/plain_svd.cpp | 4 +-
src/mlpack/tests/cf_test.cpp | 27 ------
13 files changed, 375 insertions(+), 153 deletions(-)
diff --git a/src/mlpack/core/arma_extend/Mat_extra_meat.hpp b/src/mlpack/core/arma_extend/Mat_extra_meat.hpp
index 6a9b2fa..ac5242d 100644
--- a/src/mlpack/core/arma_extend/Mat_extra_meat.hpp
+++ b/src/mlpack/core/arma_extend/Mat_extra_meat.hpp
@@ -28,8 +28,8 @@ Mat<eT>::const_row_col_iterator::const_row_col_iterator(const const_row_iterator
: M(&it.M), current_pos(&it.M(it.row, it.col)), internal_col(it.col), internal_row(it.row)
{
// Nothing to do.
- }
-
+ }
+
template<typename eT>
@@ -87,18 +87,18 @@ Mat<eT>::const_row_col_iterator::operator++(int)
template<typename eT>
inline typename Mat<eT>::const_row_col_iterator&
Mat<eT>::const_row_col_iterator::operator--()
- {
- current_pos--;
- internal_row--;
-
- // Check to see if we moved a column.
- if(internal_row == -1)
{
+ if(internal_row != 0)
+ {
+ current_pos--;
+ internal_row--;
+ }
+ else if(internal_col != 0)
+ {
+ current_pos--;
internal_col--;
internal_row = M->n_rows - 1;
}
-
- return *this;
}
@@ -176,7 +176,7 @@ Mat<eT>::const_row_col_iterator::operator==(const iterator& rhs) const
{
return (rhs == current_pos);
}
-
+
template<typename eT>
@@ -212,7 +212,7 @@ Mat<eT>::const_row_col_iterator::operator==(const row_iterator& rhs) const
{
return (&rhs.M(rhs.row, rhs.col) == current_pos);
}
-
+
template<typename eT>
@@ -293,18 +293,18 @@ Mat<eT>::row_col_iterator::operator++(int)
template<typename eT>
inline typename Mat<eT>::row_col_iterator&
Mat<eT>::row_col_iterator::operator--()
- {
- current_pos--;
- internal_row--;
-
- // Check to see if we moved a column.
- if(internal_row == -1)
{
+ if(internal_row != 0)
+ {
+ current_pos--;
+ internal_row--;
+ }
+ else if(internal_col != 0)
+ {
+ current_pos--;
internal_col--;
internal_row = M->n_rows - 1;
}
-
- return *this;
}
@@ -318,7 +318,7 @@ Mat<eT>::row_col_iterator::operator--(int)
--(*this);
return temp;
- }
+ }
@@ -327,7 +327,7 @@ inline bool
Mat<eT>::row_col_iterator::operator==(const const_row_col_iterator& rhs) const
{
return (rhs.current_pos == current_pos);
- }
+ }
diff --git a/src/mlpack/methods/amf/amf.hpp b/src/mlpack/methods/amf/amf.hpp
index ef20b0b..6f7e91e 100644
--- a/src/mlpack/methods/amf/amf.hpp
+++ b/src/mlpack/methods/amf/amf.hpp
@@ -54,7 +54,7 @@ namespace amf {
* @tparam UpdateRule The update rule for calculating W and H matrix at each
* iteration.
*
- * @see NMF_MultiplicativeDistanceUpdate
+ * @see NMF_MultiplicativeDistanceUpdate, SimpleResidueTermination
*/
template<typename TerminationPolicyType = SimpleResidueTermination,
typename InitializationRuleType = RandomInitialization,
diff --git a/src/mlpack/methods/amf/amf_impl.hpp b/src/mlpack/methods/amf/amf_impl.hpp
index d99cf57..d27b04d 100644
--- a/src/mlpack/methods/amf/amf_impl.hpp
+++ b/src/mlpack/methods/amf/amf_impl.hpp
@@ -47,16 +47,20 @@ Apply(const MatType& V,
Log::Info << "Initialized W and H." << std::endl;
+ // initialize the update rule
update.Initialize(V, r);
+ // initialize the termination policy
terminationPolicy.Initialize(V);
+ // check if termination conditions are met
while (!terminationPolicy.IsConverged(W, H))
{
// Update the values of W and H based on the update rules provided.
update.WUpdate(V, W, H);
update.HUpdate(V, W, H);
}
-
+
+ // get final residue and iteration count from termination policy
const double residue = terminationPolicy.Index();
const size_t iteration = terminationPolicy.Iteration();
diff --git a/src/mlpack/methods/amf/termination_policies/complete_incremental_termination.hpp b/src/mlpack/methods/amf/termination_policies/complete_incremental_termination.hpp
index 7e28d0a..d3d2077 100644
--- a/src/mlpack/methods/amf/termination_policies/complete_incremental_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/complete_incremental_termination.hpp
@@ -1,11 +1,8 @@
/**
- * @file cf.hpp
+ * @file complete_incremental_termination.hpp
* @author Sumedh Ghaisas
*
- * Collaborative filtering.
- *
- * Defines the CF class to perform collaborative filtering on the specified data
- * set using alternating least squares (ALS).
+ * Termination policy used in AMF (Alternating Matrix Factorization).
*/
#ifndef _MLPACK_METHODS_AMF_COMPLETE_INCREMENTAL_TERMINATION_HPP_INCLUDED
#define _MLPACK_METHODS_AMF_COMPLETE_INCREMENTAL_TERMINATION_HPP_INCLUDED
@@ -15,58 +12,93 @@ namespace mlpack
namespace amf
{
+/**
+ * This class acts as a wrapper for basic termination policies to be used by
+ * SVDCompleteIncrementalLearning. This class calls the wrapped class functions
+ * after every n calls to main class functions where n is the number of non-zero
+ * entries in the matrix being factorized.
+ *
+ * @see AMF, SVDCompleteIncrementalLearning
+ */
template <class TerminationPolicy>
class CompleteIncrementalTermination
{
public:
+ //! empty constructor
CompleteIncrementalTermination(TerminationPolicy t_policy = TerminationPolicy())
: t_policy(t_policy) {}
+ /**
+ * Initializes the termination policy before stating the factorization.
+ *
+ * @param V Input matrix to be factorized.
+ */
template <class MatType>
void Initialize(const MatType& V)
{
t_policy.Initialize(V);
+ //! get number of non-zero entries
incrementalIndex = accu(V != 0);
iteration = 0;
}
+ /**
+ * Initializes the termination policy before stating the factorization.
+ *
+ * @param V Input matrix to be factorized.
+ */
void Initialize(const arma::sp_mat& V)
{
t_policy.Initialize(V);
+ // get number of non-zero entries
incrementalIndex = V.n_nonzero;
iteration = 0;
}
+ /**
+ * Check if termination criterio is met.
+ *
+ * @param W Basis matrix of output.
+ * @param H Encoding matrix of output.
+ */
bool IsConverged(arma::mat& W, arma::mat& H)
{
+ // increment iteration count
iteration++;
+
+ // if iteration count is multiple of incremental index,
+ // return wrapped class function
if(iteration % incrementalIndex == 0)
return t_policy.IsConverged(W, H);
+ // else just return false
else return false;
}
- const double& Index()
- {
- return t_policy.Index();
- }
- const size_t& Iteration()
- {
- return iteration;
- }
+ //! Get current value of residue
+ const double& Index() const { return t_policy.Index(); }
+
+ //! Get current iteration count
+ const size_t& Iteration() const { return iteration; }
- const size_t& MaxIterations()
- {
- return t_policy.MaxIterations();
- }
+ //! Access upper limit of iteration count
+ const size_t& MaxIterations() const { return t_policy.MaxIterations(); }
+ size_t& MaxIterations() { return t_policy.MaxIterations(); }
+
+ //! Access the wrapped class object
+ const TerminationPolicy& TPolicy() const { return t_policy; }
+ TerminationPolicy& TPolicy() { return t_policy; }
private:
+ //! wrapped class object
TerminationPolicy t_policy;
-
+
+ //! number of iterations after which wrapped class object will be called
size_t incrementalIndex;
+ //! current iteration count
size_t iteration;
-};
+}; // class CompleteIncrementalTermination
} // namespace amf
} // namespace mlpack
diff --git a/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp b/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
index d53b8b7..a6cd415 100644
--- a/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/incomplete_incremental_termination.hpp
@@ -1,61 +1,92 @@
/**
* @file incomplete_incremental_termination.hpp
* @author Sumedh Ghaisas
+ *
+ * Termination policy used in AMF (Alternating Matrix Factorization).
*/
-#ifndef _INCOMPLETE_INCREMENTAL_TERMINATION_HPP_INCLUDED
-#define _INCOMPLETE_INCREMENTAL_TERMINATION_HPP_INCLUDED
+#ifndef _MLPACK_METHODS_AMF_INCOMPLETEINCREMENTALTERMINATION_HPP_INCLUDED
+#define _MLPACK_METHODS_AMF_INCOMPLETEINCREMENTALTERMINATION_HPP_INCLUDED
#include <mlpack/core.hpp>
namespace mlpack {
namespace amf {
+/**
+ * This class acts as a wrapper for basic termination policies to be used by
+ * SVDIncompleteIncrementalLearning. This class calls the wrapped class functions
+ * after every n calls to main class functions where n is the number of rows.
+ *
+ * @see AMF, SVDIncompleteIncrementalLearning
+ */
template <class TerminationPolicy>
class IncompleteIncrementalTermination
{
public:
+ //! empty constructor
IncompleteIncrementalTermination(TerminationPolicy t_policy = TerminationPolicy())
: t_policy(t_policy) {}
+ /**
+ * Initializes the termination policy before stating the factorization.
+ *
+ * @param V Input matrix to be factorized.
+ */
template <class MatType>
void Initialize(const MatType& V)
{
t_policy.Initialize(V);
-
+
+ // initialize incremental index to number of rows
incrementalIndex = V.n_rows;
iteration = 0;
}
+ /**
+ * Check if termination criterio is met.
+ *
+ * @param W Basis matrix of output.
+ * @param H Encoding matrix of output.
+ */
bool IsConverged(arma::mat& W, arma::mat& H)
{
+ // increment iteration count
iteration++;
+
+ // if iteration count is multiple of incremental index,
+ // return wrapped class function
if(iteration % incrementalIndex == 0)
return t_policy.IsConverged(W, H);
+ // else just return false
else return false;
}
- const double& Index()
- {
- return t_policy.Index();
- }
- const size_t& Iteration()
- {
- return iteration;
- }
- const size_t& MaxIterations()
- {
- return t_policy.MaxIterations();
- }
+ //! Get current value of residue
+ const double& Index() const { return t_policy.Index(); }
+
+ //! Get current iteration count
+ const size_t& Iteration() const { return iteration; }
+
+ //! Access upper limit of iteration count
+ const size_t& MaxIterations() const { return t_policy.MaxIterations(); }
+ size_t& MaxIterations() { return t_policy.MaxIterations(); }
+
+ //! Access the wrapped class object
+ const TerminationPolicy& TPolicy() const { return t_policy; }
+ TerminationPolicy& TPolicy() { return t_policy; }
private:
+ //! wrapped class object
TerminationPolicy t_policy;
+ //! number of iterations after which wrapped class object will be called
size_t incrementalIndex;
+ //! current iteration count
size_t iteration;
-};
+}; // class IncompleteIncrementalTermination
}; // namespace amf
}; // namespace mlpack
-#endif
+#endif // _MLPACK_METHODS_AMF_INCOMPLETEINCREMENTALTERMINATION_HPP_INCLUDED
diff --git a/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp b/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
index 3e5f7b8..66d7930 100644
--- a/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
@@ -1,6 +1,8 @@
/**
* @file simple_residue_termination.hpp
* @author Sumedh Ghaisas
+ *
+ * Termination policy used in AMF (Alternating Matrix Factorization).
*/
#ifndef _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
#define _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
@@ -10,58 +12,97 @@
namespace mlpack {
namespace amf {
+/**
+ * This class implements simple residue based termination policy. Termination
+ * decision depends on two factors, value of residue and number of iteration.
+ * If the current value of residue drops below the threshold or the number of
+ * iterations goes above the threshold, positive termination signal is passed
+ * to AMF.
+ *
+ * @see AMF
+ */
class SimpleResidueTermination
{
public:
+ //! empty constructor
SimpleResidueTermination(const double minResidue = 1e-10,
const size_t maxIterations = 10000)
: minResidue(minResidue), maxIterations(maxIterations) { }
+ /**
+ * Initializes the termination policy before stating the factorization.
+ *
+ * @param V Input matrix being factorized.
+ */
template<typename MatType>
void Initialize(const MatType& V)
{
+ // set resisue to minimum value
residue = minResidue;
+ // set iteration to minimum value
iteration = 1;
+ // remove history
normOld = 0;
+ // initialize required variables
const size_t n = V.n_rows;
const size_t m = V.n_cols;
-
nm = n * m;
}
+ /**
+ * Check if termination criterio is met.
+ *
+ * @param W Basis matrix of output.
+ * @param H Encoding matrix of output.
+ */
bool IsConverged(arma::mat& W, arma::mat& H)
{
// Calculate norm of WH after each iteration.
arma::mat WH;
+ // calculate the norm and compute the residue
WH = W * H;
double norm = sqrt(accu(WH % WH) / nm);
+ residue = fabs(normOld - norm);
+ residue /= normOld;
- if (iteration != 0)
- {
- residue = fabs(normOld - norm);
- residue /= normOld;
- }
-
+ // store the residue into history
normOld = norm;
-
+
+ // increment iteration count
iteration++;
+ // check if termination criterion is met
if(residue < minResidue || iteration > maxIterations) return true;
else return false;
}
- const double& Index() { return residue; }
- const size_t& Iteration() { return iteration; }
- const size_t& MaxIterations() { return maxIterations; }
+ //! Get current value of residue
+ const double& Index() const { return residue; }
+
+ //! Get current iteration count
+ const size_t& Iteration() const { return iteration; }
+
+ //! Access max iteration count
+ const size_t& MaxIterations() const { return maxIterations; }
+ size_t& MaxIterations() { return maxIterations; }
+
+ //! Access minimum residue value
+ const double& MinResidue() const { return minResidue; }
+ double& MinResidue() { return minResidue; }
public:
+ //! residue threshold
double minResidue;
+ //! iteration threshold
size_t maxIterations;
+ //! current value of residue
double residue;
+ //! current iteration count
size_t iteration;
+ //! norm of previous iteration
double normOld;
size_t nm;
diff --git a/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp b/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
index 18fef4d..3c12e21 100644
--- a/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
@@ -1,6 +1,8 @@
/**
* @file simple_tolerance_termination.hpp
* @author Sumedh Ghaisas
+ *
+ * Termination policy used in AMF (Alternating Matrix Factorization).
*/
#ifndef _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
#define _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
@@ -10,10 +12,21 @@
namespace mlpack {
namespace amf {
+/**
+ * This class implements residue tolerance termination policy. Termination
+ * criterion is met when increase in residue value drops below the given tolerance.
+ * To accomodate spikes certain number of successive residue drops are accepted.
+ * This upper imit on successive drops can be adjusted with reverseStepCount.
+ * Secondary termination criterion terminates algorithm when iteration count
+ * goes above the threshold.
+ *
+ * @see AMF
+ */
template <class MatType>
class SimpleToleranceTermination
{
public:
+ //! empty constructor
SimpleToleranceTermination(const double tolerance = 1e-5,
const size_t maxIterations = 10000,
const size_t reverseStepTolerance = 3)
@@ -21,6 +34,11 @@ class SimpleToleranceTermination
maxIterations(maxIterations),
reverseStepTolerance(reverseStepTolerance) {}
+ /**
+ * Initializes the termination policy before stating the factorization.
+ *
+ * @param V Input matrix to be factorized.
+ */
void Initialize(const MatType& V)
{
residueOld = DBL_MAX;
@@ -36,13 +54,19 @@ class SimpleToleranceTermination
reverseStepCount = 0;
}
+ /**
+ * Check if termination criterio is met.
+ *
+ * @param W Basis matrix of output.
+ * @param H Encoding matrix of output.
+ */
bool IsConverged(arma::mat& W, arma::mat& H)
{
- // Calculate norm of WH after each iteration.
arma::mat WH;
WH = W * H;
+ // compute residue
residueOld = residue;
size_t n = V->n_rows;
size_t m = V->n_cols;
@@ -65,31 +89,43 @@ class SimpleToleranceTermination
residue = sum / count;
residue = sqrt(residue);
+ // increment iteration count
iteration++;
-
+
+ // if residue tolerance is not satisfied
if((residueOld - residue) / residueOld < tolerance && iteration > 4)
{
+ // check if this is a first of successive drops
if(reverseStepCount == 0 && isCopy == false)
{
+ // store a copy of W and H matrix
isCopy = true;
this->W = W;
this->H = H;
+ // store residue values
c_index = residue;
c_indexOld = residueOld;
}
+ // increase successive drop count
reverseStepCount++;
}
+ // if tolerance is satisfied
else
{
+ // initialize successive drop count
reverseStepCount = 0;
+ // if residue is droped below minimum scrap stored values
if(residue <= c_indexOld && isCopy == true)
{
isCopy = false;
}
}
+ // check if termination criterion is met
if(reverseStepCount == reverseStepTolerance || iteration > maxIterations)
{
+ // if stored values are present replace them with current value as they
+ // represent the minimum residue point
if(isCopy)
{
W = this->W;
@@ -101,25 +137,47 @@ class SimpleToleranceTermination
else return false;
}
- const double& Index() { return residue; }
- const size_t& Iteration() { return iteration; }
- const size_t& MaxIterations() { return maxIterations; }
+ //! Get current value of residue
+ const double& Index() const { return residue; }
+
+ //! Get current iteration count
+ const size_t& Iteration() const { return iteration; }
+
+ //! Access upper limit of iteration count
+ const size_t& MaxIterations() const { return maxIterations; }
+ size_t& MaxIterations() { return maxIterations; }
+
+ //! Access tolerance value
+ const double& Tolerance() const { return tolerance; }
+ double& Tolerance() { return tolerance; }
private:
+ //! tolerance
double tolerance;
+ //! iteration threshold
size_t maxIterations;
+ //! pointer to matrix being factorized
const MatType* V;
+ //! current iteration count
size_t iteration;
+
+ //! residue values
double residueOld;
double residue;
double normOld;
+ //! tolerance on successive residue drops
size_t reverseStepTolerance;
+ //! successive residue drops
size_t reverseStepCount;
+ //! indicates whether a copy of information is available which corresponds to
+ //! minimum residue point
bool isCopy;
+
+ //! variables to store information of minimum residue point
arma::mat W;
arma::mat H;
double c_indexOld;
diff --git a/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp b/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
index f63be80..844c151 100644
--- a/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
+++ b/src/mlpack/methods/amf/termination_policies/validation_RMSE_termination.hpp
@@ -1,5 +1,11 @@
-#ifndef VALIDATION_RMSE_TERMINATION_HPP_INCLUDED
-#define VALIDATION_RMSE_TERMINATION_HPP_INCLUDED
+/**
+ * @file validation_RMSE_termination.hpp
+ * @author Sumedh Ghaisas
+ *
+ * Termination policy used in AMF (Alternating Matrix Factorization).
+ */
+#ifndef _MLPACK_METHODS_AMF_VALIDATIONRMSETERMINATION_HPP_INCLUDED
+#define _MLPACK_METHODS_AMF_VALIDATIONRMSETERMINATION_HPP_INCLUDED
#include <mlpack/core.hpp>
@@ -7,10 +13,34 @@ namespace mlpack
{
namespace amf
{
+
+/**
+ * This class implements validation termination policy based on RMSE index.
+ * The input data matrix is divided into 2 sets, training set and validation set.
+ * Entries of validation set are nullifed in the input matrix. Termination
+ * criterion is met when increase in validation set RMSe value drops below the
+ * given tolerance. To accomodate spikes certain number of successive validation
+ * RMSE drops are accepted. This upper imit on successive drops can be adjusted
+ * with reverseStepCount. Secondary termination criterion terminates algorithm
+ * when iteration count goes above the threshold.
+ *
+ * @note The input matrix is modified by this termination policy.
+ *
+ * @see AMF
+ */
template <class MatType>
class ValidationRMSETermination
{
public:
+ /**
+ * Create a validation set according to given parameters and nullifies this
+ * set in data matrix(training set).
+ *
+ * @param V Input matrix to be factorized.
+ * @param num_test_points number of validation test points
+ * @param maxIterations max iteration count before termination
+ * @param reverseStepTolerance max successive RMSE drops allowed
+ */
ValidationRMSETermination(MatType& V,
size_t num_test_points,
double tolerance = 1e-5,
@@ -24,26 +54,38 @@ class ValidationRMSETermination
size_t n = V.n_rows;
size_t m = V.n_cols;
+ // initialize validation set matrix
test_points.zeros(num_test_points, 3);
-
+
+ // fill validation set matrix with random chosen entries
for(size_t i = 0; i < num_test_points; i++)
{
double t_val;
size_t t_row;
size_t t_col;
+
+ // pick a random non-zero entry
do
{
t_row = rand() % n;
t_col = rand() % m;
} while((t_val = V(t_row, t_col)) == 0);
+ // add the entry to the validation set
test_points(i, 0) = t_row;
test_points(i, 1) = t_col;
test_points(i, 2) = t_val;
+
+ // nullify the added entry from data matrix (training set)
V(t_row, t_col) = 0;
}
}
-
+
+ /**
+ * Initializes the termination policy before stating the factorization.
+ *
+ * @param V Input matrix to be factorized.
+ */
void Initialize(const MatType& /* V */)
{
iteration = 1;
@@ -58,13 +100,19 @@ class ValidationRMSETermination
isCopy = false;
}
+ /**
+ * Check if termination criterio is met.
+ *
+ * @param W Basis matrix of output.
+ * @param H Encoding matrix of output.
+ */
bool IsConverged(arma::mat& W, arma::mat& H)
{
- // Calculate norm of WH after each iteration.
arma::mat WH;
WH = W * H;
+ // compute validation RMSE
if (iteration != 0)
{
rmseOld = rmse;
@@ -82,31 +130,43 @@ class ValidationRMSETermination
rmse = sqrt(rmse);
}
+ // increment iteration count
iteration++;
+ // if RMSE tolerance is not satisfied
if((rmseOld - rmse) / rmseOld < tolerance && iteration > 4)
{
+ // check if this is a first of successive drops
if(reverseStepCount == 0 && isCopy == false)
{
+ // store a copy of W and H matrix
isCopy = true;
this->W = W;
this->H = H;
+ // store residue values
c_indexOld = rmseOld;
c_index = rmse;
}
+ // increase successive drop count
reverseStepCount++;
}
+ // if tolerance is satisfied
else
{
+ // initialize successive drop count
reverseStepCount = 0;
+ // if residue is droped below minimum scrap stored values
if(rmse <= c_indexOld && isCopy == true)
{
isCopy = false;
}
}
+ // check if termination criterion is met
if(reverseStepCount == reverseStepTolerance || iteration > maxIterations)
{
+ // if stored values are present replace them with current value as they
+ // represent the minimum residue point
if(isCopy)
{
W = this->W;
@@ -118,36 +178,59 @@ class ValidationRMSETermination
else return false;
}
- const double& Index() { return rmse; }
+ //! Get current value of residue
+ const double& Index() const { return rmse; }
- const size_t& Iteration() { return iteration; }
+ //! Get current iteration count
+ const size_t& Iteration() const { return iteration; }
+
+ //! Get number of validation points
+ const size_t& NumTestPoints() const { return num_test_points; }
- const size_t& MaxIterations() { return maxIterations; }
+ //! Access upper limit of iteration count
+ const size_t& MaxIterations() const { return maxIterations; }
+ size_t& MaxIterations() { return maxIterations; }
+
+ //! Access tolerance value
+ const double& Tolerance() const { return tolerance; }
+ double& Tolerance() { return tolerance; }
private:
+ //! tolerance
double tolerance;
+ //! max iteration limit
size_t maxIterations;
+ //! number of validation test points
size_t num_test_points;
+
+ //! current iteration count
size_t iteration;
- arma::Mat<double> test_points;
+ //! validation point matrix
+ arma::mat test_points;
+ //! rmse values
double rmseOld;
double rmse;
+ //! tolerance on successive residue drops
size_t reverseStepTolerance;
+ //! successive residue drops
size_t reverseStepCount;
+ //! indicates whether a copy of information is available which corresponds to
+ //! minimum residue point
bool isCopy;
+
+ //! variables to store information of minimum residue point
arma::mat W;
arma::mat H;
double c_indexOld;
double c_index;
-};
+}; // class ValidationRMSETermination
} // namespace amf
} // namespace mlpack
-#endif // VALIDATION_RMSE_TERMINATION_HPP_INCLUDED
-
+#endif // _MLPACK_METHODS_AMF_VALIDATIONRMSETERMINATION_HPP_INCLUDED
diff --git a/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp b/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
index bb31e9e..84e41ce 100644
--- a/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
+++ b/src/mlpack/methods/amf/update_rules/svd_batch_learning.hpp
@@ -1,6 +1,8 @@
/**
- * @file simple_residue_termination.hpp
+ * @file svd_batch_learning.hpp
* @author Sumedh Ghaisas
+ *
+ * SVD factorization used in AMF (Alternating Matrix Factorization).
*/
#ifndef __MLPACK_METHODS_AMF_UPDATE_RULES_SVD_BATCHLEARNING_HPP
#define __MLPACK_METHODS_AMF_UPDATE_RULES_SVD_BATCHLEARNING_HPP
@@ -14,13 +16,19 @@ namespace amf
class SVDBatchLearning
{
public:
+ /**
+ * SVD Batch learning constructor.
+ *
+ * @param u step value used in batch learning
+ * @param kw regularization constant for W matrix
+ * @param kh regularization constant for H matrix
+ * @param momentum momentum applied to batch learning process
+ */
SVDBatchLearning(double u = 0.0002,
double kw = 0,
double kh = 0,
- double momentum = 0.9,
- double min = -DBL_MIN,
- double max = DBL_MAX)
- : u(u), kw(kw), kh(kh), min(min), max(max), momentum(momentum)
+ double momentum = 0.9)
+ : u(u), kw(kw), kh(kh), momentum(momentum)
{}
template<typename MatType>
@@ -117,8 +125,6 @@ class SVDBatchLearning
double u;
double kw;
double kh;
- double min;
- double max;
double momentum;
arma::mat mW;
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index bbb8883..500ed10 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -117,8 +117,6 @@ class CF
const arma::mat& H() const { return h; }
//! Get the Rating Matrix.
const arma::mat& Rating() const { return rating; }
- //! Get the data matrix.
- const arma::mat& Data() const { return data; }
//! Get the cleaned data matrix.
const arma::sp_mat& CleanedData() const { return cleanedData; }
@@ -148,8 +146,6 @@ class CF
std::string ToString() const;
private:
- //! Initial data matrix.
- arma::mat data;
//! Number of users for similarity.
size_t numUsersForSimilarity;
//! Rank used for matrix factorization.
@@ -165,7 +161,7 @@ class CF
//! Cleaned data matrix.
arma::sp_mat cleanedData;
//! Converts the User, Item, Value Matrix to User-Item Table
- void CleanData();
+ void CleanData(const arma::mat& data);
/**
* Helper function to insert a point into the recommendation matrices.
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 69419fe..80389bf 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -6,7 +6,6 @@
*
* Implementation of CF class to perform Collaborative Filtering on the
* specified data set.
- *
*/
namespace mlpack {
@@ -19,7 +18,6 @@ template<typename FactorizerType>
CF<FactorizerType>::CF(arma::mat& data,
const size_t numUsersForSimilarity,
const size_t rank) :
- data(data),
numUsersForSimilarity(numUsersForSimilarity),
rank(rank),
factorizer()
@@ -33,7 +31,26 @@ CF<FactorizerType>::CF(arma::mat& data,
this->numUsersForSimilarity = 5;
}
- CleanData();
+ CleanData(data);
+
+ // Check if the user wanted us to choose a rank for them.
+ if (rank == 0)
+ {
+ // This is a simple heuristic that picks a rank based on the density of the
+ // dataset between 5 and 105.
+ const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
+ const size_t rankEstimate = size_t(density) + 5;
+
+ // Set to heuristic value.
+ Log::Info << "No rank given for decomposition; using rank of "
+ << rankEstimate << " calculated by density-based heuristic."
+ << std::endl;
+ this->rank = rankEstimate;
+ }
+
+ // Operations independent of the query:
+ // Decompose the sparse data matrix to user and data matrices.
+ factorizer.Apply(cleanedData, this->rank, w, h);
}
template<typename FactorizerType>
@@ -56,27 +73,6 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
arma::Mat<size_t>& recommendations,
arma::Col<size_t>& users)
{
- // Base function for calculating recommendations.
-
- // Check if the user wanted us to choose a rank for them.
- if (rank == 0)
- {
- // This is a simple heuristic that picks a rank based on the density of the
- // dataset between 5 and 105.
- const double density = (cleanedData.n_nonzero * 100.0) / cleanedData.n_elem;
- const size_t rankEstimate = size_t(density) + 5;
-
- // Set to heuristic value.
- Log::Info << "No rank given for decomposition; using rank of "
- << rankEstimate << " calculated by density-based heuristic."
- << std::endl;
- rank = rankEstimate;
- }
-
- // Operations independent of the query:
- // Decompose the sparse data matrix to user and data matrices.
- factorizer.Apply(cleanedData, rank, w, h);
-
// Generate new table by multiplying approximate values.
rating = w * h;
@@ -156,7 +152,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
}
template<typename FactorizerType>
-void CF<FactorizerType>::CleanData()
+void CF<FactorizerType>::CleanData(const arma::mat& data)
{
// Generate list of locations for batch insert constructor for sparse
// matrices.
diff --git a/src/mlpack/methods/cf/plain_svd.cpp b/src/mlpack/methods/cf/plain_svd.cpp
index d52bbcd..8495fe9 100644
--- a/src/mlpack/methods/cf/plain_svd.cpp
+++ b/src/mlpack/methods/cf/plain_svd.cpp
@@ -55,8 +55,10 @@ double PlainSVD::Apply(const arma::mat& V,
sigma = sigma.subvec(0, r - 1);
W = W * arma::diagmat(sigma);
+
+ H = arma::trans(H);
- arma::mat V_rec = W * arma::trans(H);
+ arma::mat V_rec = W * H;
size_t n = V.n_rows;
size_t m = V.n_cols;
diff --git a/src/mlpack/tests/cf_test.cpp b/src/mlpack/tests/cf_test.cpp
index 5744721..6b4d41e 100644
--- a/src/mlpack/tests/cf_test.cpp
+++ b/src/mlpack/tests/cf_test.cpp
@@ -19,33 +19,6 @@ using namespace mlpack::cf;
using namespace std;
/**
- * Make sure that the constructor works okay.
- */
-BOOST_AUTO_TEST_CASE(CFConstructorTest)
-{
- // Load GroupLens data.
- arma::mat dataset;
- data::Load("GroupLens100k.csv", dataset);
-
- // Number of users for similarity (not the default).
- const size_t numUsersForSimilarity = 8;
-
- CF<> c(dataset, numUsersForSimilarity);
-
- // Check parameters.
- BOOST_REQUIRE_EQUAL(c.NumUsersForSimilarity(), numUsersForSimilarity);
-
- // Check data.
- BOOST_REQUIRE_EQUAL(c.Data().n_rows, dataset.n_rows);
- BOOST_REQUIRE_EQUAL(c.Data().n_cols, dataset.n_cols);
-
- // Check values (this should be superfluous...).
- for (size_t i = 0; i < dataset.n_rows; i++)
- for (size_t j = 0; j < dataset.n_cols; j++)
- BOOST_REQUIRE_EQUAL(c.Data()(i, j), dataset(i, j));
-}
-
-/**
* Make sure that correct number of recommendations are generated when query
* set. Default case.
*/
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list