[mlpack] 11/324: added module 'lmf'(Latent Matrix Factorization) to accommodate SVD based update rules alongside NMF based update rule. CF module is updated to use LMF module.

Barak A. Pearlmutter barak+git at cs.nuim.ie
Sun Aug 17 08:21:51 UTC 2014


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit f98fec4ba4224fec7d27080c8914df6e4466891b
Author: sumedhghaisas <sumedhghaisas at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Wed May 21 20:23:43 2014 +0000

    added module 'lmf'(Latent Matrix Factorization) to accommodate SVD based update rules alongside NMF based update rule. CF module is updated to use LMF module.
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16531 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 src/mlpack/methods/CMakeLists.txt                  |   1 +
 src/mlpack/methods/cf/cf.hpp                       |  12 +-
 src/mlpack/methods/cf/cf_impl.hpp                  |  12 +-
 src/mlpack/methods/lmf/CMakeLists.txt              |  26 ++++
 src/mlpack/methods/lmf/init_rules/CMakeLists.txt   |  15 +++
 .../methods/lmf/init_rules/random_acol_init.hpp    |  87 ++++++++++++++
 src/mlpack/methods/lmf/init_rules/random_init.hpp  |  56 +++++++++
 src/mlpack/methods/lmf/lmf.hpp                     | 120 +++++++++++++++++++
 src/mlpack/methods/lmf/lmf_impl.hpp                |  86 ++++++++++++++
 src/mlpack/methods/lmf/lmf_main.cpp                | 126 ++++++++++++++++++++
 src/mlpack/methods/lmf/update_rules/CMakeLists.txt |  16 +++
 src/mlpack/methods/lmf/update_rules/nmf_als.hpp    | 109 +++++++++++++++++
 .../methods/lmf/update_rules/nmf_mult_dist.hpp     |  87 ++++++++++++++
 .../methods/lmf/update_rules/nmf_mult_div.hpp      | 131 +++++++++++++++++++++
 14 files changed, 866 insertions(+), 18 deletions(-)

diff --git a/src/mlpack/methods/CMakeLists.txt b/src/mlpack/methods/CMakeLists.txt
index 9e15448..a434eca 100644
--- a/src/mlpack/methods/CMakeLists.txt
+++ b/src/mlpack/methods/CMakeLists.txt
@@ -18,6 +18,7 @@ set(DIRS
   nca
   neighbor_search
   nmf
+  lmf
   pca
   radical
   range_search
diff --git a/src/mlpack/methods/cf/cf.hpp b/src/mlpack/methods/cf/cf.hpp
index 4eda91f..3e12902 100644
--- a/src/mlpack/methods/cf/cf.hpp
+++ b/src/mlpack/methods/cf/cf.hpp
@@ -12,14 +12,12 @@
 
 #include <mlpack/core.hpp>
 #include <mlpack/methods/neighbor_search/neighbor_search.hpp>
-#include <mlpack/methods/nmf/nmf.hpp>
-#include <mlpack/methods/nmf/als_update_rules.hpp>
+#include <mlpack/methods/lmf/lmf.hpp>
+#include <mlpack/methods/lmf/update_rules/nmf_als.hpp>
 #include <set>
 #include <map>
 #include <iostream>
 
-using namespace mlpack::nmf;
-
 namespace mlpack {
 namespace cf /** Collaborative filtering. */{
 
@@ -56,10 +54,8 @@ namespace cf /** Collaborative filtering. */{
  *     Apply(arma::sp_mat& data, size_t rank, arma::mat& W, arma::mat& H).
  */
 template<
-    typename FactorizerType = NMF<RandomInitialization,
-                                  WAlternatingLeastSquaresRule,
-                                  HAlternatingLeastSquaresRule>
->
+    typename FactorizerType = lmf::LMF<lmf::RandomInitialization, 
+                                       lmf::NMF_ALSUpdate> >
 class CF
 {
  public:
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index c2b6232..15fbe34 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -8,13 +8,6 @@
  * specified data set.
  *
  */
-#include "cf.hpp"
-#include <mlpack/methods/nmf/nmf.hpp>
-#include <mlpack/methods/nmf/als_update_rules.hpp>
-
-using namespace mlpack::nmf;
-using namespace mlpack::neighbor;
-using namespace std;
 
 namespace mlpack {
 namespace cf {
@@ -85,7 +78,6 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
 
   // Operations independent of the query:
   // Decompose the sparse data matrix to user and data matrices.
-  // Presently only ALS (via NMF) is supported as an optimizer.
   factorizer.Apply(cleanedData, rank, w, h);
 
   // Generate new table by multiplying approximate values.
@@ -106,7 +98,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
 
   // Calculate the neighborhood of the queried users.
   // This should be a templatized option.
-  AllkNN a(rating, query);
+  neighbor::AllkNN a(rating, query);
   arma::mat resultingDistances; // Temporary storage.
   a.Search(numUsersForSimilarity, neighborhood, resultingDistances);
 
@@ -162,7 +154,7 @@ void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
     // warning.
     if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
       Log::Warn << "Could not provide " << values.n_rows << " recommendations "
-          << "for user " << users(i) << " (not enough un-rated items)!" << endl;
+          << "for user " << users(i) << " (not enough un-rated items)!" << std::endl;
   }
 }
 
diff --git a/src/mlpack/methods/lmf/CMakeLists.txt b/src/mlpack/methods/lmf/CMakeLists.txt
new file mode 100644
index 0000000..e9205ae
--- /dev/null
+++ b/src/mlpack/methods/lmf/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+  lmf.hpp
+  lmf_impl.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
+
+add_subdirectory(update_rules)
+add_subdirectory(init_rules)
+
+add_executable(lmf
+  lmf_main.cpp
+)
+target_link_libraries(lmf
+  mlpack
+)
+install(TARGETS lmf RUNTIME DESTINATION bin)
diff --git a/src/mlpack/methods/lmf/init_rules/CMakeLists.txt b/src/mlpack/methods/lmf/init_rules/CMakeLists.txt
new file mode 100644
index 0000000..a31d281
--- /dev/null
+++ b/src/mlpack/methods/lmf/init_rules/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+  random_init.hpp
+  random_acol_init.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/lmf/init_rules/random_acol_init.hpp b/src/mlpack/methods/lmf/init_rules/random_acol_init.hpp
new file mode 100644
index 0000000..b110e49
--- /dev/null
+++ b/src/mlpack/methods/lmf/init_rules/random_acol_init.hpp
@@ -0,0 +1,87 @@
+/**
+ * @file random_acol_init.hpp
+ * @author Mohan Rajendran
+ *
+ * Intialization rule for Non-Negative Matrix Factorization. This simple
+ * initialization is performed by the random Acol initialization introduced in
+ * the paper 'Algorithms, Initializations and Convergence' by Langville et al.
+ * This method sets each of the columns of W by averaging p randomly chosen
+ * columns of V.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __MLPACK_METHODS_LMF_RANDOM_ACOL_INIT_HPP
+#define __MLPACK_METHODS_LMF_RANDOM_ACOL_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace lmf {
+
+/**
+ * This class initializes the W matrix of the NMF algorithm by averaging p
+ * randomly chosen columns of V.  In this case, p is a template parameter.  H is
+ * then set randomly.
+ *
+ * @tparam The number of random columns to average for each column of W.
+ */
+template<int p = 5>
+class RandomAcolInitialization
+{
+ public:
+  // Empty constructor required for the InitializeRule template
+  RandomAcolInitialization()
+  { }
+
+  template<typename MatType>
+  inline static void Initialize(const MatType& V,
+                                const size_t r,
+                                arma::mat& W,
+                                arma::mat& H)
+  {
+    const size_t n = V.n_rows;
+    const size_t m = V.n_cols;
+
+    if (p > m)
+    {
+      Log::Warn << "Number of random columns is more than the number of columns"
+          << "available in the V matrix; weird results may ensue!" << std::endl;
+    }
+
+    W.zeros(n, r);
+
+    // Initialize W matrix with random columns.
+    for (size_t col = 0; col < r; col++)
+    {
+      for (size_t randCol = 0; randCol < p; randCol++)
+      {
+        // .col() does not work in this case, as of Armadillo 3.920.
+        W.unsafe_col(col) += V.col(math::RandInt(0, m));
+      }
+    }
+
+    // Now divide by p.
+    W /= p;
+
+    // Initialize H to random values.
+    H.randu(r, m);
+  }
+}; // Class RandomAcolInitialization
+
+}; // namespace lmf
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/lmf/init_rules/random_init.hpp b/src/mlpack/methods/lmf/init_rules/random_init.hpp
new file mode 100644
index 0000000..de91724
--- /dev/null
+++ b/src/mlpack/methods/lmf/init_rules/random_init.hpp
@@ -0,0 +1,56 @@
+/**
+ * @file random_init.hpp
+ * @author Mohan Rajendran
+ *
+ * Intialization rule for Non-Negative Matrix Factorization (NMF). This simple
+ * initialization is performed by assigning a random matrix to W and H.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __MLPACK_METHODS_LMF_RANDOM_INIT_HPP
+#define __MLPACK_METHODS_LMF_RANDOM_INIT_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace lmf {
+
+class RandomInitialization
+{
+ public:
+  // Empty constructor required for the InitializeRule template
+  RandomInitialization() { }
+
+  template<typename MatType>
+  inline static void Initialize(const MatType& V,
+                                const size_t r,
+                                arma::mat& W,
+                                arma::mat& H)
+  {
+    // Simple implementation (left in the header file due to its simplicity).
+    size_t n = V.n_rows;
+    size_t m = V.n_cols;
+
+    // Intialize to random values.
+    W.randu(n, r);
+    H.randu(r, m);
+  }
+};
+
+}; // namespace lmf
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/lmf/lmf.hpp b/src/mlpack/methods/lmf/lmf.hpp
new file mode 100644
index 0000000..16d4c63
--- /dev/null
+++ b/src/mlpack/methods/lmf/lmf.hpp
@@ -0,0 +1,120 @@
+#ifndef __MLPACK_METHODS_LMF_LMF_HPP
+#define __MLPACK_METHODS_LMF_LMF_HPP
+
+#include <mlpack/core.hpp>
+#include "update_rules/nmf_mult_dist.hpp"
+#include "init_rules/random_init.hpp"
+
+namespace mlpack {
+namespace lmf {
+
+/**
+ * This class implements the LMF on the given matrix V. Latent Matrix
+ * Factorization decomposes V in the form \f$ V \approx WH \f$ where W is
+ * called the basis matrix and H is called the encoding matrix. V is taken
+ * to be of size n x m and the obtained W is n x r and H is r x m. The size r is
+ * called the rank of the factorization.
+ *
+ * The implementation requires two template types; the first contains the
+ * initialization rule for the W and H matrix and the other contains the update
+ * rule to be used during each iteration.  This templatization allows the
+ * user to try various update rules (including ones not supplied with MLPACK)
+ * for factorization.
+ *
+ * A simple example of how to run LMF is shown below.
+ *
+ * @code
+ * extern arma::mat V; // Matrix that we want to perform LMF on.
+ * size_t r = 10; // Rank of decomposition
+ * arma::mat W; // Basis matrix
+ * arma::mat H; // Encoding matrix
+ *
+ * LMF<> lmf; // Default options
+ * lmf.Apply(V, W, H, r);
+ * @endcode
+ *
+ * @tparam InitializationRule The initialization rule for initializing W and H
+ *     matrix.
+ * @tparam UpdateRule The update rule for calculating W and H matrix at each
+ *     iteration.
+ *
+ * @see NMF_MultiplicativeDistanceUpdate
+ */
+template<typename InitializationRule = RandomInitialization,
+         typename UpdateRule = NMF_MultiplicativeDistanceUpdate>
+class LMF
+{
+ public:
+  /**
+   * Create the LMF object and (optionally) set the parameters which LMF will
+   * run with.  The minimum residue refers to the root mean square of the
+   * difference between two subsequent iterations of the product W * H.  A low
+   * residue indicates that subsequent iterations are not producing much change
+   * in W and H.  Once the residue goes below the specified minimum residue, the
+   * algorithm terminates.
+   *
+   * @param maxIterations Maximum number of iterations allowed before giving up.
+   *     A value of 0 indicates no limit.
+   * @param minResidue The minimum allowed residue before the algorithm
+   *     terminates.
+   * @param Initialize Optional Initialization object for initializing the
+   *     W and H matrices
+   * @param Update Optional UpdateRule object; for when the update rule for
+   *     the W and H vector has states that it needs to store
+   */
+  LMF(const size_t maxIterations = 10000,
+      const double minResidue = 1e-10,
+      const InitializationRule initializeRule = InitializationRule(),
+      const UpdateRule update = UpdateRule());
+
+  /**
+   * Apply Latent Matrix Factorization to the provided matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be output.
+   * @param H Encoding matrix to output.
+   * @param r Rank r of the factorization.
+   */
+  template<typename MatType>
+  void Apply(const MatType& V,
+             const size_t r,
+             arma::mat& W,
+             arma::mat& H) const;
+
+ private:
+  //! The maximum number of iterations allowed before giving up.
+  size_t maxIterations;
+  //! The minimum residue, below which iteration is considered converged.
+  double minResidue;
+  //! Instantiated initialization Rule.
+  InitializationRule initializeRule;
+  //! Instantiated update rule.
+  UpdateRule update;
+
+ public:
+  //! Access the maximum number of iterations.
+  size_t MaxIterations() const { return maxIterations; }
+  //! Modify the maximum number of iterations.
+  size_t& MaxIterations() { return maxIterations; }
+  //! Access the minimum residue before termination.
+  double MinResidue() const { return minResidue; }
+  //! Modify the minimum residue before termination.
+  double& MinResidue() { return minResidue; }
+  //! Access the initialization rule.
+  const InitializationRule& InitializeRule() const { return initializeRule; }
+  //! Modify the initialization rule.
+  InitializationRule& InitializeRule() { return initializeRule; }
+  //! Access the update rule.
+  const UpdateRule& Update() const { return update; }
+  //! Modify the update rule.
+  UpdateRule& Update() { return update; }
+
+}; // class LMF
+
+}; // namespace lmf
+}; // namespace mlpack
+
+// Include implementation.
+#include "lmf_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/lmf/lmf_impl.hpp b/src/mlpack/methods/lmf/lmf_impl.hpp
new file mode 100644
index 0000000..d4bf896
--- /dev/null
+++ b/src/mlpack/methods/lmf/lmf_impl.hpp
@@ -0,0 +1,86 @@
+namespace mlpack {
+namespace lmf {
+
+/**
+ * Construct the LMF object.
+ */
+template<typename InitializationRule,
+         typename UpdateRule>
+LMF<InitializationRule, UpdateRule>::LMF(
+    const size_t maxIterations,
+    const double minResidue,
+    const InitializationRule initializeRule,
+    const UpdateRule update) :
+    maxIterations(maxIterations),
+    minResidue(minResidue),
+    initializeRule(initializeRule),
+    update(update)
+{
+  if (minResidue < 0.0)
+  {
+    Log::Warn << "LMF::LMF(): minResidue must be a positive value ("
+        << minResidue << " given). Setting to the default value of 1e-10.\n";
+    this->minResidue = 1e-10;
+  }
+}
+
+/**
+ * Apply Latent Matrix Factorization to the provided matrix.
+ *
+ * @param V Input matrix to be factorized
+ * @param W Basis matrix to be output
+ * @param H Encoding matrix to output
+ * @param r Rank r of the factorization
+ */
+template<typename InitializationRule,
+         typename UpdateRule>
+template<typename MatType>
+void LMF<InitializationRule, UpdateRule>::Apply(
+    const MatType& V,
+    const size_t r,
+    arma::mat& W,
+    arma::mat& H) const
+{
+  const size_t n = V.n_rows;
+  const size_t m = V.n_cols;
+
+  // Initialize W and H.
+  initializeRule.Initialize(V, r, W, H);
+
+  Log::Info << "Initialized W and H." << std::endl;
+
+  size_t iteration = 1;
+  const size_t nm = n * m;
+  double residue = minResidue;
+  double normOld = 0;
+  double norm = 0;
+  arma::mat WH;
+
+  while (residue >= minResidue && iteration != maxIterations)
+  {
+    // Update step.
+    // Update the value of W and H based on the Update Rules provided
+    update.WUpdate(V, W, H);
+    update.HUpdate(V, W, H);
+
+    // Calculate norm of WH after each iteration.
+    WH = W * H;
+    norm = sqrt(accu(WH % WH) / nm);
+
+    if (iteration != 0)
+    {
+      residue = fabs(normOld - norm);
+      residue /= normOld;
+    }
+
+    normOld = norm;
+
+    iteration++;
+  }
+
+  Log::Info << "LMF converged to residue of " << sqrt(residue) << " in "
+      << iteration << " iterations." << std::endl;
+}
+
+}; // namespace nmf
+}; // namespace mlpack
diff --git a/src/mlpack/methods/lmf/lmf_main.cpp b/src/mlpack/methods/lmf/lmf_main.cpp
new file mode 100644
index 0000000..04a79dc
--- /dev/null
+++ b/src/mlpack/methods/lmf/lmf_main.cpp
@@ -0,0 +1,126 @@
+#include <mlpack/core.hpp>
+
+#include "lmf.hpp"
+
+#include "init_rules/random_init.hpp"
+#include "update_rules/nmf_mult_dist.hpp"
+#include "update_rules/nmf_mult_div.hpp"
+#include "update_rules/nmf_als.hpp"
+
+using namespace mlpack;
+using namespace mlpack::lmf;
+using namespace std;
+
+// Document program.
+PROGRAM_INFO("Latent Matrix Factorization", "This program performs "
+    "matrix factorization on the given dataset, storing the "
+    "resulting decomposed matrices in the specified files.  For an input "
+    "dataset V, LMF decomposes V into two matrices W and H such that "
+    "\n\n"
+    "V = W * H"
+    "\n\n"
+    "If V is of size (n x m),"
+    " then W will be of size (n x r) and H will be of size (r x m), where r is "
+    "the rank of the factorization (specified by --rank)."
+    "\n\n"
+    "Optionally, the desired update rules for each LMF iteration can be chosen "
+    "from the following list:"
+    "\n\n"
+    " - multdist: multiplicative distance-based update rules (Lee and Seung "
+    "1999): non-negative matrix factorization. Matrix V should contain\n"
+    "non-negative elements.\n"
+    " - multdiv: multiplicative divergence-based update rules (Lee and Seung "
+    "1999): non-negative matrix factorization. Matrix V should contain\n"
+    "non-negative elements.\n"
+    " - als: alternating least squares update rules (Paatero and Tapper 1994)\n"
+    "non-negative matrix factorization. Matrix V should contain\n"
+    "non-negative elements.\n"
+    "\n"
+    "The maximum number of iterations is specified with --max_iterations, and "
+    "the minimum residue required for algorithm termination is specified with "
+    "--min_residue.");
+
+// Parameters for program.
+PARAM_STRING_REQ("input_file", "Input dataset to perform NMF on.", "i");
+PARAM_STRING_REQ("w_file", "File to save the calculated W matrix to.", "W");
+PARAM_STRING_REQ("h_file", "File to save the calculated H matrix to.", "H");
+PARAM_INT_REQ("rank", "Rank of the factorization.", "r");
+
+PARAM_INT("max_iterations", "Number of iterations before NMF terminates (0 runs"
+    " until convergence.", "m", 10000);
+PARAM_INT("seed", "Random seed.  If 0, 'std::time(NULL)' is used.", "s", 0);
+PARAM_DOUBLE("min_residue", "The minimum root mean square residue allowed for "
+    "each iteration, below which the program terminates.", "e", 1e-5);
+
+PARAM_STRING("update_rules", "Update rules for each iteration; ( multdist | "
+    "multdiv | als ).", "u", "multdist");
+
+int main(int argc, char** argv)
+{
+  // Parse command line.
+  CLI::ParseCommandLine(argc, argv);
+
+  // Initialize random seed.
+  if (CLI::GetParam<int>("seed") != 0)
+    math::RandomSeed((size_t) CLI::GetParam<int>("seed"));
+  else
+    math::RandomSeed((size_t) std::time(NULL));
+
+  // Gather parameters.
+  const string inputFile = CLI::GetParam<string>("input_file");
+  const string hOutputFile = CLI::GetParam<string>("h_file");
+  const string wOutputFile = CLI::GetParam<string>("w_file");
+  const size_t r = CLI::GetParam<int>("rank");
+  const size_t maxIterations = CLI::GetParam<int>("max_iterations");
+  const double minResidue = CLI::GetParam<double>("min_residue");
+  const string updateRules = CLI::GetParam<string>("update_rules");
+
+  // Validate rank.
+  if (r < 1)
+  {
+    Log::Fatal << "The rank of the factorization cannot be less than 1."
+        << std::endl;
+  }
+
+  if ((updateRules != "multdist") &&
+      (updateRules != "multdiv") &&
+      (updateRules != "als"))
+  {
+    Log::Fatal << "Invalid update rules ('" << updateRules << "'); must be '"
+        << "multdist', 'multdiv', or 'als'." << std::endl;
+  }
+
+  // Load input dataset.
+  arma::mat V;
+  data::Load(inputFile, V, true);
+
+  arma::mat W;
+  arma::mat H;
+
+  // Perform NMF with the specified update rules.
+  if (updateRules == "multdist")
+  {
+    Log::Info << "Performing LMF with multiplicative distance-based update(Non-negative Matrix Factorization) "
+        << "rules." << std::endl;
+    LMF<> nmf(maxIterations, minResidue);
+    nmf.Apply(V, r, W, H);
+  }
+  else if (updateRules == "multdiv")
+  {
+    Log::Info << "Performing NMF with multiplicative divergence-based update(Non-negative Matrix Factorization) "
+        << "rules." << std::endl;
+    LMF<RandomInitialization,NMF_MultiplicativeDivergenceUpdate> lmf(maxIterations, minResidue);
+    lmf.Apply(V, r, W, H);
+  }
+  else if (updateRules == "als")
+  {
+    Log::Info << "Performing NMF with alternating least squared update rules.(Non-negative Matrix Factorization)"
+        << std::endl;
+    LMF<RandomInitialization, NMF_ALSUpdate> lmf(maxIterations, minResidue);
+    lmf.Apply(V, r, W, H);
+  }
+
+  // Save results.
+  data::Save(wOutputFile, W, false);
+  data::Save(hOutputFile, H, false);
+}
diff --git a/src/mlpack/methods/lmf/update_rules/CMakeLists.txt b/src/mlpack/methods/lmf/update_rules/CMakeLists.txt
new file mode 100644
index 0000000..011ec27
--- /dev/null
+++ b/src/mlpack/methods/lmf/update_rules/CMakeLists.txt
@@ -0,0 +1,16 @@
+# Define the files we need to compile
+# Anything not in this list will not be compiled into MLPACK.
+set(SOURCES
+  nmf_als.hpp
+  nmf_mult_dist.hpp
+  nmf_mult_div.hpp
+)
+
+# Add directory name to sources.
+set(DIR_SRCS)
+foreach(file ${SOURCES})
+  set(DIR_SRCS ${DIR_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/${file})
+endforeach()
+# Append sources (with directory name) to list of all MLPACK sources (used at
+# the parent scope).
+set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
diff --git a/src/mlpack/methods/lmf/update_rules/nmf_als.hpp b/src/mlpack/methods/lmf/update_rules/nmf_als.hpp
new file mode 100644
index 0000000..d559ea3
--- /dev/null
+++ b/src/mlpack/methods/lmf/update_rules/nmf_als.hpp
@@ -0,0 +1,109 @@
+/**
+ * @file nmf_als.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * titled 'Alternating Least Squares' described in the paper 'Positive Matrix
+ * Factorization: A Non-negative Factor Model with Optimal Utilization of
+ * Error Estimates of Data Values' by P. Paatero and U. Tapper. It uses least
+ * squares projection formula to reduce the error value of
+ * \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ by alternately calculating W and H
+ * respectively while holding the other matrix constant.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_ALS_HPP
+#define __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_ALS_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace lmf {
+
+/**
+ * The alternating least square update rules of matrices W and H.
+ */
+class NMF_ALSUpdate
+{
+ public:
+  // Empty constructor required for the UpdateRule template.
+  NMF_ALSUpdate() { }
+
+  /**
+   * The update rule for the basis matrix W. The formula used is
+   * \f[
+   * W^T = \frac{HV^T}{HH^T}
+   * \f]
+   * The function takes in all the matrices and only changes the
+   * value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+  template<typename MatType>
+  inline static void WUpdate(const MatType& V,
+                             arma::mat& W,
+                             const arma::mat& H)
+  {
+    // The call to inv() sometimes fails; so we are using the psuedoinverse.
+    // W = (inv(H * H.t()) * H * V.t()).t();
+    W = V * H.t() * pinv(H * H.t());
+
+    // Set all negative numbers to machine epsilon
+    for (size_t i = 0; i < W.n_elem; i++)
+    {
+      if (W(i) < 0.0)
+      {
+        W(i) = 0.0;
+      }
+    }
+  }
+
+  /**
+   * The update rule for the encoding matrix H. The formula used is
+   * \f[
+   * H = \frac{W^TV}{W^TW}
+   * \f]
+   * The function takes in all the matrices and only changes the
+   * value of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to be updated.
+   */
+  template<typename MatType>
+  inline static void HUpdate(const MatType& V,
+                             const arma::mat& W,
+                             arma::mat& H)
+  {
+    H = pinv(W.t() * W) * W.t() * V;
+
+    // Set all negative numbers to 0.
+    for (size_t i = 0; i < H.n_elem; i++)
+    {
+      if (H(i) < 0.0)
+      {
+        H(i) = 0.0;
+      }
+    }
+  }
+};
+
+}; // namespace lmf
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/lmf/update_rules/nmf_mult_dist.hpp b/src/mlpack/methods/lmf/update_rules/nmf_mult_dist.hpp
new file mode 100644
index 0000000..72a3541
--- /dev/null
+++ b/src/mlpack/methods/lmf/update_rules/nmf_mult_dist.hpp
@@ -0,0 +1,87 @@
+/**
+ * @file nmf_mult_dist.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the Frobenius norm \f$ \sqrt{\sum_i \sum_j(V-WH)^2} \f$ is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIST_UPDATE_RULES_HPP
+#define __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIST_UPDATE_RULES_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace lmf {
+
+/**
+ * The multiplicative distance update rules for matrices W and H.
+ */
+class NMF_MultiplicativeDistanceUpdate
+{
+ public:
+  // Empty constructor required for the UpdateRule template.
+  NMF_MultiplicativeDistanceUpdate() { }
+
+  /**
+   * The update rule for the basis matrix W. The formula used is
+   * \f[
+   * W_{ia} \leftarrow W_{ia} \frac{(VH^T)_{ia}}{
+   * The function takes in all the matrices and only changes the
+   * value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+  template<typename MatType>
+  inline static void WUpdate(const MatType& V,
+                             arma::mat& W,
+                             const arma::mat& H)
+  {
+    W = (W % (V * H.t())) / (W * H * H.t());
+  }
+
+  /**
+   * The update rule for the encoding matrix H. The formula used is
+   * \f[
+   * H_{a\mu} \leftarrow H_{a\mu} \frac{(W^T V)_{a\mu}}{(W^T WH)_{a\mu}}
+   * \f]
+   * The function takes in all the matrices and only changes the
+   * value of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to be updated.
+   */
+  template<typename MatType>
+  inline static void HUpdate(const MatType& V,
+                             const arma::mat& W,
+                             arma::mat& H)
+  {
+    H = (H % (W.t() * V)) / (W.t() * W * H);
+  }
+};
+
+}; // namespace lmf
+}; // namespace mlpack
+
+#endif
diff --git a/src/mlpack/methods/lmf/update_rules/nmf_mult_div.hpp b/src/mlpack/methods/lmf/update_rules/nmf_mult_div.hpp
new file mode 100644
index 0000000..73ee0d5
--- /dev/null
+++ b/src/mlpack/methods/lmf/update_rules/nmf_mult_div.hpp
@@ -0,0 +1,131 @@
+/**
+ * @file nmf_mult_div.hpp
+ * @author Mohan Rajendran
+ *
+ * Update rules for the Non-negative Matrix Factorization. This follows a method
+ * described in the paper 'Algorithms for Non-negative Matrix Factorization'
+ * by D. D. Lee and H. S. Seung. This is a multiplicative rule that ensures
+ * that the Kullback–Leibler divergence
+ * \f$ \sum_i \sum_j (V_{ij} log\frac{V_{ij}}{(WH)_{ij}}-V_{ij}+(WH)_{ij}) \f$is
+ * non-increasing between subsequent iterations. Both of the update rules
+ * for W and H are defined in this file.
+ *
+ * This file is part of MLPACK 1.0.8.
+ *
+ * MLPACK is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * MLPACK is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+ * details (LICENSE.txt).
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * MLPACK.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIV_HPP
+#define __MLPACK_METHODS_LMF_UPDATE_RULES_NMF_MULT_DIV_HPP
+
+#include <mlpack/core.hpp>
+
+namespace mlpack {
+namespace lmf {
+
+
+class NMF_MultiplicativeDivergenceUpdate
+{
+ public:
+  // Empty constructor required for the WUpdateRule template.
+  NMF_MultiplicativeDivergenceUpdate() { }
+
+  /**
+   * The update rule for the basis matrix W. The formula used is
+   * \f[
+   * W_{ia} \leftarrow W_{ia} \frac{\sum_{\mu} H_{a\mu} V_{i\mu}/(WH)_{i\mu}}
+   * {\sum_{\nu} H_{a\nu}}
+   * \f]
+   * The function takes in all the matrices and only changes the 
+   * value of the W matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix to be updated.
+   * @param H Encoding matrix.
+   */
+  template<typename MatType>
+  inline static void WUpdate(const MatType& V,
+                            arma::mat& W,
+                            const arma::mat& H)
+  {
+    // Simple implementation left in the header file.
+    arma::mat t1;
+    arma::rowvec t2;
+
+    t1 = W * H;
+    for (size_t i = 0; i < W.n_rows; ++i)
+    {
+      for (size_t j = 0; j < W.n_cols; ++j)
+      {
+        // Writing this as a single expression does not work as of Armadillo
+        // 3.920.  This should be fixed in a future release, and then the code
+        // below can be fixed.
+        //t2 = H.row(j) % V.row(i) / t1.row(i);
+        t2.set_size(H.n_cols);
+        for (size_t k = 0; k < t2.n_elem; ++k)
+        {
+          t2(k) = H(j, k) * V(i, k) / t1(i, k);
+        }
+
+        W(i, j) = W(i, j) * sum(t2) / sum(H.row(j));
+      }
+    }
+  }
+
+  /**
+   * The update rule for the encoding matrix H. The formula used is
+   * \f[
+   * H_{a\mu} \leftarrow H_{a\mu} \frac{\sum_{i} W_{ia} V_{i\mu}/(WH)_{i\mu}}
+   * {\sum_{k} H_{ka}}
+   * \f]
+   * The function takes in all the matrices and only changes the value 
+   * of the H matrix.
+   *
+   * @param V Input matrix to be factorized.
+   * @param W Basis matrix.
+   * @param H Encoding matrix to updated.
+   */
+  template<typename MatType>
+  inline static void HUpdate(const MatType& V,
+                            const arma::mat& W,
+                            arma::mat& H)
+  {
+    // Simple implementation left in the header file.
+    arma::mat t1;
+    arma::colvec t2;
+
+    t1 = W * H;
+    for (size_t i = 0; i < H.n_rows; i++)
+    {
+      for (size_t j = 0; j < H.n_cols; j++)
+      {
+        // Writing this as a single expression does not work as of Armadillo
+        // 3.920.  This should be fixed in a future release, and then the code
+        // below can be fixed.
+        //t2 = W.col(i) % V.col(j) / t1.col(j);
+        t2.set_size(W.n_rows);
+        for (size_t k = 0; k < t2.n_elem; ++k)
+        {
+          t2(k) = W(k, i) * V(k, j) / t1(k, j);
+        }
+
+        H(i,j) = H(i,j) * sum(t2) / sum(W.col(i));
+      }
+    }
+  }
+};
+
+}; // namespace lmf
+}; // namespace mlpack
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list