[mlpack] 81/324: * Templatized termination of AMF matrix factorization * added 3 termination policies 1) SimpleResidueTermination 2) SimpleToleranceTermination 3) ValidationRMSETermination
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:21:58 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit 4bf285247abd209baf62697cb25ed133f38cf4b2
Author: sumedhghaisas <sumedhghaisas at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Wed Jun 25 22:26:51 2014 +0000
* Templatized termination of AMF matrix factorization
* added 3 termination policies
1) SimpleResidueTermination
2) SimpleToleranceTermination
3) ValidationRMSETermination
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16715 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/amf/CMakeLists.txt | 1 +
src/mlpack/methods/amf/amf.hpp | 29 ++++-----
src/mlpack/methods/amf/amf_impl.hpp | 63 ++++++------------
src/mlpack/methods/amf/amf_main.cpp | 13 +++-
.../amf/{ => termination_policies}/CMakeLists.txt | 15 +----
.../simple_residue_termination.hpp | 73 +++++++++++++++++++++
.../simple_tolerance_termination.hpp | 76 ++++++++++++++++++++++
src/mlpack/methods/cf/cf_impl.hpp | 7 +-
src/mlpack/methods/nmf/nmf_main.cpp | 13 +++-
src/mlpack/tests/nmf_test.cpp | 17 +++--
10 files changed, 218 insertions(+), 89 deletions(-)
diff --git a/src/mlpack/methods/amf/CMakeLists.txt b/src/mlpack/methods/amf/CMakeLists.txt
index 1cdec9e..2abbe85 100644
--- a/src/mlpack/methods/amf/CMakeLists.txt
+++ b/src/mlpack/methods/amf/CMakeLists.txt
@@ -16,6 +16,7 @@ set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
add_subdirectory(update_rules)
add_subdirectory(init_rules)
+add_subdirectory(termination_policies)
add_executable(amf
amf_main.cpp
diff --git a/src/mlpack/methods/amf/amf.hpp b/src/mlpack/methods/amf/amf.hpp
index d505c88..fa54fba 100644
--- a/src/mlpack/methods/amf/amf.hpp
+++ b/src/mlpack/methods/amf/amf.hpp
@@ -8,6 +8,7 @@
#include <mlpack/core.hpp>
#include <mlpack/methods/amf/update_rules/nmf_mult_dist.hpp>
#include <mlpack/methods/amf/init_rules/random_init.hpp>
+#include <mlpack/methods/amf/termination_policies/simple_residue_termination.hpp>
namespace mlpack {
namespace amf {
@@ -45,7 +46,8 @@ namespace amf {
* @see NMF_MultiplicativeDistanceUpdate
*/
template<typename InitializationRule = RandomInitialization,
- typename UpdateRule = NMFMultiplicativeDistanceUpdate>
+ typename UpdateRule = NMFMultiplicativeDistanceUpdate,
+ typename TerminationPolicy = SimpleResidueTermination>
class AMF
{
public:
@@ -66,10 +68,9 @@ class AMF
* @param Update Optional UpdateRule object; for when the update rule for
* the W and H vector has states that it needs to store
*/
- AMF(const size_t maxIterations = 10000,
- const double tolerance = 1e-5,
- const InitializationRule initializeRule = InitializationRule(),
- const UpdateRule update = UpdateRule());
+ AMF(const InitializationRule& initializeRule = InitializationRule(),
+ const UpdateRule& update = UpdateRule(),
+ const TerminationPolicy& t_policy = TerminationPolicy());
/**
* Apply Latent Matrix Factorization to the provided matrix.
@@ -86,24 +87,14 @@ class AMF
arma::mat& H);
private:
- //! The maximum number of iterations allowed before giving up.
- size_t maxIterations;
- //! The minimum residue, below which iteration is considered converged.
- double tolerance;
//! Instantiated initialization Rule.
InitializationRule initializeRule;
//! Instantiated update rule.
UpdateRule update;
+ //! termination policy
+ TerminationPolicy t_policy;
public:
- //! Access the maximum number of iterations.
- size_t MaxIterations() const { return maxIterations; }
- //! Modify the maximum number of iterations.
- size_t& MaxIterations() { return maxIterations; }
- //! Access the minimum residue before termination.
- double Tolerance() const { return tolerance; }
- //! Modify the minimum residue before termination.
- double& Tolerance() { return tolerance; }
//! Access the initialization rule.
const InitializationRule& InitializeRule() const { return initializeRule; }
//! Modify the initialization rule.
@@ -112,6 +103,10 @@ class AMF
const UpdateRule& Update() const { return update; }
//! Modify the update rule.
UpdateRule& Update() { return update; }
+ //! Access the termination policy
+ const TerminationPolicy& TPolicy() const { return t_policy; }
+ //! Modify the termination policy
+ TerminationPolicy& TPolicy() { return t_policy; }
}; // class AMF
diff --git a/src/mlpack/methods/amf/amf_impl.hpp b/src/mlpack/methods/amf/amf_impl.hpp
index 9ee69a4..f226cd1 100644
--- a/src/mlpack/methods/amf/amf_impl.hpp
+++ b/src/mlpack/methods/amf/amf_impl.hpp
@@ -9,24 +9,16 @@ namespace amf {
* Construct the LMF object.
*/
template<typename InitializationRule,
- typename UpdateRule>
-AMF<InitializationRule, UpdateRule>::AMF(
- const size_t maxIterations,
- const double tolerance,
- const InitializationRule initializeRule,
- const UpdateRule update) :
- maxIterations(maxIterations),
- tolerance(tolerance),
+ typename UpdateRule,
+ typename TerminationPolicy>
+AMF<InitializationRule, UpdateRule, TerminationPolicy>::AMF(
+ const InitializationRule& initializeRule,
+ const UpdateRule& update,
+ const TerminationPolicy& t_policy) :
initializeRule(initializeRule),
- update(update)
-{
- if (tolerance < 0.0 || tolerance > 1)
- {
- Log::Warn << "AMF::AMF(): tolerance must be a positive value in the range (0-1) but value "
- << tolerance << " is given. Setting to the default value of 1e-5.\n";
- this->tolerance = 1e-5;
- }
-}
+ update(update),
+ t_policy(t_policy)
+{ }
/**
* Apply Latent Matrix Factorization to the provided matrix.
@@ -37,56 +29,39 @@ AMF<InitializationRule, UpdateRule>::AMF(
* @param r Rank r of the factorization
*/
template<typename InitializationRule,
- typename UpdateRule>
+ typename UpdateRule,
+ typename TerminationPolicy>
template<typename MatType>
-double AMF<InitializationRule, UpdateRule>::Apply(
+double AMF<InitializationRule, UpdateRule, TerminationPolicy>::Apply(
const MatType& V,
const size_t r,
arma::mat& W,
arma::mat& H)
{
- const size_t n = V.n_rows;
- const size_t m = V.n_cols;
-
// Initialize W and H.
initializeRule.Initialize(V, r, W, H);
Log::Info << "Initialized W and H." << std::endl;
- size_t iteration = 1;
- const size_t nm = n * m;
- double residue = DBL_MIN;
- double oldResidue = DBL_MAX;
- double normOld = 0;
- double norm = 0;
arma::mat WH;
update.Initialize(V, r);
+ t_policy.Initialize(V);
- while ((std::abs(oldResidue - residue) / oldResidue >= tolerance || iteration < 4) && iteration != maxIterations)
+ while (!t_policy.IsConverged())
{
// Update step.
// Update the value of W and H based on the Update Rules provided
update.WUpdate(V, W, H);
update.HUpdate(V, W, H);
- // Calculate norm of WH after each iteration.
- WH = W * H;
- norm = sqrt(accu(WH % WH) / nm);
-
- if (iteration != 0)
- {
- oldResidue = residue;
- residue = fabs(normOld - norm);
- residue /= normOld;
- }
-
- normOld = norm;
-
- iteration++;
+ t_policy.Step(W, H);
}
- Log::Info << "AMF converged to residue of " << sqrt(residue) << " in "
+ double residue = sqrt(t_policy.Index());
+ size_t iteration = t_policy.Iteration();
+
+ Log::Info << "AMF converged to residue of " << residue << " in "
<< iteration << " iterations." << std::endl;
return residue;
diff --git a/src/mlpack/methods/amf/amf_main.cpp b/src/mlpack/methods/amf/amf_main.cpp
index a470dab..e92787d 100644
--- a/src/mlpack/methods/amf/amf_main.cpp
+++ b/src/mlpack/methods/amf/amf_main.cpp
@@ -7,6 +7,8 @@
#include "update_rules/nmf_mult_div.hpp"
#include "update_rules/nmf_als.hpp"
+#include "termination_policies/simple_residue_termination.hpp"
+
using namespace mlpack;
using namespace mlpack::amf;
using namespace std;
@@ -102,21 +104,26 @@ int main(int argc, char** argv)
{
Log::Info << "Performing AMF with multiplicative distance-based update(Non-negative Matrix Factorization) "
<< "rules." << std::endl;
- AMF<> amf(maxIterations, minResidue);
+ SimpleResidueTermination srt(minResidue, maxIterations);
+ AMF<> amf(RandomInitialization(), NMFMultiplicativeDistanceUpdate(), srt);
amf.Apply(V, r, W, H);
}
else if (updateRules == "multdiv")
{
Log::Info << "Performing NMF with multiplicative divergence-based update(Non-negative Matrix Factorization) "
<< "rules." << std::endl;
- AMF<RandomInitialization,NMFMultiplicativeDivergenceUpdate> amf(maxIterations, minResidue);
+ SimpleResidueTermination srt(minResidue, maxIterations);
+ AMF<RandomInitialization,NMFMultiplicativeDivergenceUpdate>
+ amf(RandomInitialization(), NMFMultiplicativeDivergenceUpdate(), srt);
amf.Apply(V, r, W, H);
}
else if (updateRules == "als")
{
Log::Info << "Performing NMF with alternating least squared update rules.(Non-negative Matrix Factorization)"
<< std::endl;
- AMF<RandomInitialization, NMFALSUpdate> amf(maxIterations, minResidue);
+ SimpleResidueTermination srt(minResidue, maxIterations);
+ AMF<RandomInitialization, NMFALSUpdate>
+ amf(RandomInitialization(), NMFALSUpdate(), srt);
amf.Apply(V, r, W, H);
}
diff --git a/src/mlpack/methods/amf/CMakeLists.txt b/src/mlpack/methods/amf/termination_policies/CMakeLists.txt
similarity index 67%
copy from src/mlpack/methods/amf/CMakeLists.txt
copy to src/mlpack/methods/amf/termination_policies/CMakeLists.txt
index 1cdec9e..988153a 100644
--- a/src/mlpack/methods/amf/CMakeLists.txt
+++ b/src/mlpack/methods/amf/termination_policies/CMakeLists.txt
@@ -1,8 +1,8 @@
# Define the files we need to compile
# Anything not in this list will not be compiled into MLPACK.
set(SOURCES
- amf.hpp
- amf_impl.hpp
+ simple_residue_termination.hpp
+ simple_tolerance_termination.hpp
)
# Add directory name to sources.
@@ -13,14 +13,3 @@ endforeach()
# Append sources (with directory name) to list of all MLPACK sources (used at
# the parent scope).
set(MLPACK_SRCS ${MLPACK_SRCS} ${DIR_SRCS} PARENT_SCOPE)
-
-add_subdirectory(update_rules)
-add_subdirectory(init_rules)
-
-add_executable(amf
- amf_main.cpp
-)
-target_link_libraries(amf
- mlpack
-)
-install(TARGETS amf RUNTIME DESTINATION bin)
diff --git a/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp b/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
new file mode 100644
index 0000000..1935a39
--- /dev/null
+++ b/src/mlpack/methods/amf/termination_policies/simple_residue_termination.hpp
@@ -0,0 +1,73 @@
+#ifndef _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
+#define _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
+
+#include <mlpack/core.hpp>
+
+namespace mlpack
+{
+namespace amf
+{
+class SimpleResidueTermination
+{
+public:
+ SimpleResidueTermination(const double minResidue = 1e-10,
+ const size_t maxIterations = 10000)
+ : minResidue(minResidue), maxIterations(maxIterations) { }
+
+ template<typename MatType>
+ void Initialize(MatType& V)
+ {
+ residue = minResidue;
+ iteration = 1;
+ normOld = 0;
+
+ const size_t n = V.n_rows;
+ const size_t m = V.n_cols;
+
+ nm = n * m;
+ }
+
+ bool IsConverged()
+ {
+ if(residue < minResidue || iteration > maxIterations) return true;
+ else return false;
+ }
+
+ template<typename MatType>
+ void Step(const MatType& W, const MatType& H)
+ {
+ // Calculate norm of WH after each iteration.
+ arma::mat WH;
+
+ WH = W * H;
+ double norm = sqrt(accu(WH % WH) / nm);
+
+ if (iteration != 0)
+ {
+ residue = fabs(normOld - norm);
+ residue /= normOld;
+ }
+
+ normOld = norm;
+
+ iteration++;
+ }
+
+ const double& Index() { return residue; }
+ const size_t& Iteration() { return iteration; }
+
+public:
+ double minResidue;
+ size_t maxIterations;
+
+ double residue;
+ size_t iteration;
+ double normOld;
+
+ size_t nm;
+};
+}
+}
+
+
+#endif // _MLPACK_METHODS_AMF_SIMPLERESIDUETERMINATION_HPP_INCLUDED
diff --git a/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp b/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
new file mode 100644
index 0000000..197a36f
--- /dev/null
+++ b/src/mlpack/methods/amf/termination_policies/simple_tolerance_termination.hpp
@@ -0,0 +1,76 @@
+#ifndef _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
+#define _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
+
+#include <mlpack/core.hpp>
+
+namespace mlpack
+{
+namespace amf
+{
+class SimpleToleranceTermination
+{
+public:
+ SimpleToleranceTermination(const double tolerance = 1e-5,
+ const size_t maxIterations = 10000)
+ : tolerance(tolerance), maxIterations(maxIterations) {}
+
+ template<typename MatType>
+ void Initialize(MatType& V)
+ {
+ residueOld = DBL_MAX;
+ iteration = 1;
+ normOld = 0;
+ residue = DBL_MIN;
+
+ const size_t n = V.n_rows;
+ const size_t m = V.n_cols;
+
+ nm = n * m;
+ }
+
+ bool IsConverged()
+ {
+ if(((residueOld - residue) / residueOld < tolerance && iteration > 4) || iteration > maxIterations) return true;
+ else return false;
+ }
+
+ template<typename MatType>
+ void Step(const MatType& W, const MatType& H)
+ {
+ // Calculate norm of WH after each iteration.
+ arma::mat WH;
+
+ WH = W * H;
+ double norm = sqrt(accu(WH % WH) / nm);
+
+ if (iteration != 0)
+ {
+ residueOld = residue;
+ residue = fabs(normOld - norm);
+ residue /= normOld;
+ }
+
+ normOld = norm;
+
+ iteration++;
+ }
+
+ const double& Index() { return residue; }
+ const size_t& Iteration() { return iteration; }
+
+private:
+ double tolerance;
+ size_t maxIterations;
+
+ size_t iteration;
+ double residueOld;
+ double residue;
+ double normOld;
+
+ size_t nm;
+};
+}
+}
+
+
+#endif // _MLPACK_METHODS_AMF_SIMPLE_TOLERANCE_TERMINATION_HPP_INCLUDED
diff --git a/src/mlpack/methods/cf/cf_impl.hpp b/src/mlpack/methods/cf/cf_impl.hpp
index 15fbe34..69419fe 100644
--- a/src/mlpack/methods/cf/cf_impl.hpp
+++ b/src/mlpack/methods/cf/cf_impl.hpp
@@ -21,7 +21,8 @@ CF<FactorizerType>::CF(arma::mat& data,
const size_t rank) :
data(data),
numUsersForSimilarity(numUsersForSimilarity),
- rank(rank)
+ rank(rank),
+ factorizer()
{
// Validate neighbourhood size.
if (numUsersForSimilarity < 1)
@@ -32,10 +33,6 @@ CF<FactorizerType>::CF(arma::mat& data,
this->numUsersForSimilarity = 5;
}
- // Set default factorizer.
- FactorizerType f(10000, 1e-5);
- Factorizer(f);
-
CleanData();
}
diff --git a/src/mlpack/methods/nmf/nmf_main.cpp b/src/mlpack/methods/nmf/nmf_main.cpp
index 10562e8..3bbe659 100644
--- a/src/mlpack/methods/nmf/nmf_main.cpp
+++ b/src/mlpack/methods/nmf/nmf_main.cpp
@@ -12,6 +12,7 @@
#include <mlpack/methods/amf/update_rules/nmf_mult_dist.hpp>
#include <mlpack/methods/amf/update_rules/nmf_mult_div.hpp>
#include <mlpack/methods/amf/update_rules/nmf_als.hpp>
+#include <mlpack/methods/amf/termination_policies/simple_residue_termination.hpp>
using namespace mlpack;
using namespace mlpack::amf;
@@ -104,21 +105,27 @@ int main(int argc, char** argv)
{
Log::Info << "Performing NMF with multiplicative distance-based update "
<< "rules." << std::endl;
- AMF<> amf(maxIterations, minResidue);
+
+ SimpleResidueTermination srt(minResidue, maxIterations);
+ AMF<> amf(RandomInitialization(), NMFMultiplicativeDistanceUpdate(), srt);
amf.Apply(V, r, W, H);
}
else if (updateRules == "multdiv")
{
Log::Info << "Performing NMF with multiplicative divergence-based update "
<< "rules." << std::endl;
- AMF<RandomInitialization, NMFMultiplicativeDivergenceUpdate> amf(maxIterations, minResidue);
+ SimpleResidueTermination srt(minResidue, maxIterations);
+ AMF<RandomInitialization, NMFMultiplicativeDivergenceUpdate>
+ amf(RandomInitialization(), NMFMultiplicativeDivergenceUpdate(), srt);
amf.Apply(V, r, W, H);
}
else if (updateRules == "als")
{
Log::Info << "Performing NMF with alternating least squared update rules."
<< std::endl;
- AMF<RandomInitialization, NMFALSUpdate> amf(maxIterations, minResidue);
+ SimpleResidueTermination srt(minResidue, maxIterations);
+ AMF<RandomInitialization, NMFALSUpdate>
+ amf(RandomInitialization(), NMFALSUpdate(), srt);
amf.Apply(V, r, W, H);
}
diff --git a/src/mlpack/tests/nmf_test.cpp b/src/mlpack/tests/nmf_test.cpp
index abc2b86..59aaa41 100644
--- a/src/mlpack/tests/nmf_test.cpp
+++ b/src/mlpack/tests/nmf_test.cpp
@@ -9,6 +9,7 @@
#include <mlpack/methods/amf/init_rules/random_acol_init.hpp>
#include <mlpack/methods/amf/update_rules/nmf_mult_div.hpp>
#include <mlpack/methods/amf/update_rules/nmf_als.hpp>
+#include <mlpack/methods/amf/update_rules/nmf_mult_dist.hpp>
#include <boost/test/unit_test.hpp>
#include "old_boost_test_definitions.hpp"
@@ -54,7 +55,9 @@ BOOST_AUTO_TEST_CASE(NMFAcolDistTest)
mat v = w * h;
const size_t r = 12;
- AMF<RandomAcolInitialization<> > nmf(10000, 1e-7);
+ SimpleResidueTermination srt(1e-7, 10000);
+ AMF<RandomAcolInitialization<> >
+ nmf(RandomAcolInitialization<>(), NMFMultiplicativeDistanceUpdate(), srt);
nmf.Apply(v, r, w, h);
mat wh = w * h;
@@ -98,7 +101,9 @@ BOOST_AUTO_TEST_CASE(NMFALSTest)
mat v = w * h;
size_t r = 12;
- AMF<RandomAcolInitialization<>, NMFALSUpdate> nmf(50000, 1e-12);
+ SimpleResidueTermination srt(1e-12, 50000);
+ AMF<RandomAcolInitialization<>, NMFALSUpdate>
+ nmf(RandomAcolInitialization<>(), NMFALSUpdate(), srt);
nmf.Apply(v, r, w, h);
const mat wh = w * h;
@@ -137,7 +142,9 @@ BOOST_AUTO_TEST_CASE(SparseNMFAcolDistTest)
mat dw, dh;
size_t r = 15;
- AMF<RandomAcolInitialization<> > nmf(10000, 1e-10);
+ SimpleResidueTermination srt(1e-10, 10000);
+ AMF<RandomAcolInitialization<> >
+ nmf(RandomAcolInitialization<>(), NMFMultiplicativeDistanceUpdate(), srt);
const size_t seed = mlpack::math::RandInt(1000000);
mlpack::math::RandomSeed(seed); // Set random seed so results are the same.
nmf.Apply(v, r, w, h);
@@ -184,7 +191,9 @@ BOOST_AUTO_TEST_CASE(SparseNMFALSTest)
mat dw, dh;
size_t r = 5;
- AMF<RandomInitialization, NMFALSUpdate> nmf(10000, 1e-10);
+ SimpleResidueTermination srt(1e-10, 10000);
+ AMF<RandomInitialization, NMFALSUpdate>
+ nmf(RandomInitialization(), NMFALSUpdate(), srt);
const size_t seed = mlpack::math::RandInt(1000000);
mlpack::math::RandomSeed(seed);
nmf.Apply(v, r, w, h);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list