[mlpack] 67/324: Significant refactoring of NMF tests. The sparse tests were generally invalid because NMF is not guaranteed to produce a unique decomposition. Instead, now we test the Frobenius norm. Also, the tolerances needed to be significantly adjusted because of the new convergence criteria in the AMF class. Lastly, the SparseNMFRandomDiv test and SparseNMFDefaultTest have been removed because those update rules seem to often result in NaNs when sparse matrices are used as input.

Barak A. Pearlmutter barak+git at cs.nuim.ie
Sun Aug 17 08:21:56 UTC 2014


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch svn-trunk
in repository mlpack.

commit 096e7075b9b1abddaea5c0b466f94f89a74a8dcd
Author: rcurtin <rcurtin at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date:   Mon Jun 23 21:45:46 2014 +0000

    Significant refactoring of NMF tests.  The sparse tests were generally invalid
    because NMF is not guaranteed to produce a unique decomposition.  Instead, now
    we test the Frobenius norm.  Also, the tolerances needed to be significantly
    adjusted because of the new convergence criteria in the AMF class.  Lastly, the
    SparseNMFRandomDiv test and SparseNMFDefaultTest have been removed because those
    update rules seem to often result in NaNs when sparse matrices are used as
    input.
    
    
    git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16698 9d5b8971-822b-0410-80eb-d18c1038ef23
---
 src/mlpack/tests/nmf_test.cpp | 249 ++++++++++++++++--------------------------
 1 file changed, 97 insertions(+), 152 deletions(-)

diff --git a/src/mlpack/tests/nmf_test.cpp b/src/mlpack/tests/nmf_test.cpp
index 4408dd3..abc2b86 100644
--- a/src/mlpack/tests/nmf_test.cpp
+++ b/src/mlpack/tests/nmf_test.cpp
@@ -26,19 +26,20 @@ using namespace mlpack::amf;
  */
 BOOST_AUTO_TEST_CASE(NMFDefaultTest)
 {
-  mat w = randu<mat>(20, 16);
-  mat h = randu<mat>(16, 20);
+  math::RandomSeed(std::time(NULL));
+  mat w = randu<mat>(20, 12);
+  mat h = randu<mat>(12, 20);
   mat v = w * h;
-  size_t r = 16;
+  size_t r = 12;
 
   AMF<> nmf;
   nmf.Apply(v, r, w, h);
 
   mat wh = w * h;
 
-  for (size_t row = 0; row < 20; row++)
-    for (size_t col = 0; col < 20; col++)
-      BOOST_REQUIRE_CLOSE(v(row, col), wh(row, col), 10.0);
+  // Make sure reconstruction error is not too high.  0.5% tolerance.
+  BOOST_REQUIRE_SMALL(arma::norm(v - wh, "fro") / arma::norm(v, "fro"),
+      0.01);
 }
 
 /**
@@ -47,19 +48,19 @@ BOOST_AUTO_TEST_CASE(NMFDefaultTest)
  */
 BOOST_AUTO_TEST_CASE(NMFAcolDistTest)
 {
-  mat w = randu<mat>(20, 16);
-  mat h = randu<mat>(16, 20);
+  math::RandomSeed(std::time(NULL));
+  mat w = randu<mat>(20, 12);
+  mat h = randu<mat>(12, 20);
   mat v = w * h;
-  size_t r = 16;
+  const size_t r = 12;
 
-  AMF<RandomAcolInitialization<> > nmf;
+  AMF<RandomAcolInitialization<> > nmf(10000, 1e-7);
   nmf.Apply(v, r, w, h);
 
   mat wh = w * h;
 
-  for (size_t row = 0; row < 20; row++)
-    for (size_t col = 0; col < 20; col++)
-      BOOST_REQUIRE_CLOSE(v(row, col), wh(row, col), 10.0);
+  BOOST_REQUIRE_SMALL(arma::norm(v - wh, "fro") / arma::norm(v, "fro"),
+      0.01);
 }
 
 /**
@@ -68,19 +69,20 @@ BOOST_AUTO_TEST_CASE(NMFAcolDistTest)
  */
 BOOST_AUTO_TEST_CASE(NMFRandomDivTest)
 {
-  mat w = randu<mat>(20, 16);
-  mat h = randu<mat>(16, 20);
+  math::RandomSeed(std::time(NULL));
+  mat w = randu<mat>(20, 12);
+  mat h = randu<mat>(12, 20);
   mat v = w * h;
-  size_t r = 16;
+  size_t r = 12;
 
   AMF<RandomInitialization, NMFMultiplicativeDivergenceUpdate> nmf;
   nmf.Apply(v, r, w, h);
 
   mat wh = w * h;
 
-  for (size_t row = 0; row < 20; row++)
-    for (size_t col = 0; col < 20; col++)
-      BOOST_REQUIRE_CLOSE(v(row, col), wh(row, col), 10.0);
+  // Make sure reconstruction error is not too high.  0.5% tolerance.
+  BOOST_REQUIRE_SMALL(arma::norm(v - wh, "fro") / arma::norm(v, "fro"),
+      0.01);
 }
 
 /**
@@ -90,59 +92,22 @@ BOOST_AUTO_TEST_CASE(NMFRandomDivTest)
  */
 BOOST_AUTO_TEST_CASE(NMFALSTest)
 {
-  mat w = randu<mat>(20, 16);
-  mat h = randu<mat>(16, 20);
+  math::RandomSeed(std::time(NULL));
+  mat w = randu<mat>(20, 12);
+  mat h = randu<mat>(12, 20);
   mat v = w * h;
-  size_t r = 16;
+  size_t r = 12;
 
-  AMF<RandomInitialization, NMFALSUpdate> nmf(50000, 1e-15);
+  AMF<RandomAcolInitialization<>, NMFALSUpdate> nmf(50000, 1e-12);
   nmf.Apply(v, r, w, h);
 
   const mat wh = w * h;
 
-  const double error = arma::accu(arma::abs(v - wh)) / arma::accu(v);
-
-  // Most runs seem to produce error between 0.01 and 0.03.
-  // This is a randomized test, so we don't really have guarantees.
-  BOOST_REQUIRE_SMALL(error, 0.04);
-}
-
-/**
- * Check the if the product of the calculated factorization is close to the
- * input matrix, with a sparse input matrix.. Default case.
- */
-BOOST_AUTO_TEST_CASE(SparseNMFDefaultTest)
-{
-  mat w, h;
-  sp_mat v;
-  v.sprandu(20, 20, 0.2);
-  mat dv(v); // Make a dense copy.
-  mat dw, dh;
-  size_t r = 18;
-
-  // It seems to hit the iteration limit first.
-  AMF<> nmf(10000, 1e-20);
-  mlpack::math::RandomSeed(1000); // Set random seed so results are the same.
-  nmf.Apply(v, r, w, h);
-  mlpack::math::RandomSeed(1000);
-  nmf.Apply(dv, r, dw, dh);
-
-  // Make sure the results are about equal for the W and H matrices.
-  for (size_t i = 0; i < w.n_elem; ++i)
-  {
-    if (w(i) == 0.0)
-      BOOST_REQUIRE_SMALL(dw(i), 1e-15);
-    else
-      BOOST_REQUIRE_CLOSE(w(i), dw(i), 1e-5);
-  }
-
-  for (size_t i = 0; i < h.n_elem; ++i)
-  {
-    if (h(i) == 0.0)
-      BOOST_REQUIRE_SMALL(dh(i), 1e-15);
-    else
-      BOOST_REQUIRE_CLOSE(h(i), dh(i), 1e-5);
-  }
+  // Make sure reconstruction error is not too high.  6% tolerance.  It seems
+  // like ALS doesn't converge to results that are as good.  It also seems to be
+  // particularly sensitive to initial conditions.
+  BOOST_REQUIRE_SMALL(arma::norm(v - wh, "fro") / arma::norm(v, "fro"),
+      0.06);
 }
 
 /**
@@ -152,73 +117,44 @@ BOOST_AUTO_TEST_CASE(SparseNMFDefaultTest)
  */
 BOOST_AUTO_TEST_CASE(SparseNMFAcolDistTest)
 {
-  mat w, h;
-  sp_mat v;
-  v.sprandu(20, 20, 0.3);
-  mat dv(v); // Make a dense copy.
-  mat dw, dh;
-  size_t r = 16;
-
-  AMF<RandomAcolInitialization<> > nmf;
-  mlpack::math::RandomSeed(1000); // Set random seed so results are the same.
-  nmf.Apply(v, r, w, h);
-  mlpack::math::RandomSeed(1000);
-  nmf.Apply(dv, r, dw, dh);
+  // We have to ensure that the residues aren't NaNs.  This can happen when a
+  // matrix is created with all zeros in a column or row.
+  double denseResidue = std::numeric_limits<double>::quiet_NaN();
+  double sparseResidue = std::numeric_limits<double>::quiet_NaN();
 
-  // Make sure the results are about equal for the W and H matrices.
-  for (size_t i = 0; i < w.n_elem; ++i)
-  {
-    if (w(i) == 0.0)
-      BOOST_REQUIRE_SMALL(dw(i), 1e-15);
-    else
-      BOOST_REQUIRE_CLOSE(w(i), dw(i), 1e-5);
-  }
+  mat vp, dvp; // Resulting matrices.
 
-  for (size_t i = 0; i < h.n_elem; ++i)
+  while (sparseResidue != sparseResidue && denseResidue != denseResidue)
   {
-    if (h(i) == 0.0)
-      BOOST_REQUIRE_SMALL(dh(i), 1e-15);
-    else
-      BOOST_REQUIRE_CLOSE(h(i), dh(i), 1e-5);
+    math::RandomSeed(std::time(NULL));
+    mat w, h;
+    sp_mat v;
+    v.sprandu(20, 20, 0.3);
+    // Ensure there is at least one nonzero element in every row and column.
+    for (size_t i = 0; i < 20; ++i)
+      v(i, i) += 1e-5;
+    mat dv(v); // Make a dense copy.
+    mat dw, dh;
+    size_t r = 15;
+
+    AMF<RandomAcolInitialization<> > nmf(10000, 1e-10);
+    const size_t seed = mlpack::math::RandInt(1000000);
+    mlpack::math::RandomSeed(seed); // Set random seed so results are the same.
+    nmf.Apply(v, r, w, h);
+    mlpack::math::RandomSeed(seed);
+    nmf.Apply(dv, r, dw, dh);
+
+    // Reconstruct matrices.
+    vp = w * h;
+    dvp = dw * dh;
+
+    denseResidue = arma::norm(v - vp, "fro");
+    sparseResidue = arma::norm(dv - dvp, "fro");
   }
-}
-
-/**
- * Check the if the product of the calculated factorization is close to the
- * input matrix, with a sparse input matrix. Random initialization, divergence
- * minimization update.
- */
-BOOST_AUTO_TEST_CASE(SparseNMFRandomDivTest)
-{
-  mat w, h;
-  sp_mat v;
-  v.sprandu(20, 20, 0.3);
-  mat dv(v); // Make a dense copy.
-  mat dw, dh;
-  size_t r = 16;
-
-  AMF<RandomInitialization, NMFMultiplicativeDivergenceUpdate> nmf;
-  mlpack::math::RandomSeed(10); // Set random seed so the results are the same.
-  nmf.Apply(v, r, w, h);
-  mlpack::math::RandomSeed(10);
-  nmf.Apply(dv, r, dw, dh);
 
   // Make sure the results are about equal for the W and H matrices.
-  for (size_t i = 0; i < w.n_elem; ++i)
-  {
-    if (w(i) == 0.0)
-      BOOST_REQUIRE_SMALL(dw(i), 1e-15);
-    else
-      BOOST_REQUIRE_CLOSE(w(i), dw(i), 1e-5);
-  }
-
-  for (size_t i = 0; i < h.n_elem; ++i)
-  {
-    if (h(i) == 0.0)
-      BOOST_REQUIRE_SMALL(dh(i), 1e-15);
-    else
-      BOOST_REQUIRE_CLOSE(h(i), dh(i), 1e-5);
-  }
+  BOOST_REQUIRE_SMALL(arma::norm(vp - dvp, "fro") / arma::norm(vp, "fro"),
+      1e-5);
 }
 
 /**
@@ -228,35 +164,44 @@ BOOST_AUTO_TEST_CASE(SparseNMFRandomDivTest)
  */
 BOOST_AUTO_TEST_CASE(SparseNMFALSTest)
 {
-  mat w, h;
-  sp_mat v;
-  v.sprandu(10, 10, 0.3);
-  mat dv(v); // Make a dense copy.
-  mat dw, dh;
-  size_t r = 8;
-
-  AMF<RandomInitialization, NMFALSUpdate> nmf;
-  mlpack::math::RandomSeed(40);
-  nmf.Apply(v, r, w, h);
-  mlpack::math::RandomSeed(40);
-  nmf.Apply(dv, r, dw, dh);
+  // We have to ensure that the residues aren't NaNs.  This can happen when a
+  // matrix is created with all zeros in a column or row.
+  double denseResidue = std::numeric_limits<double>::quiet_NaN();
+  double sparseResidue = std::numeric_limits<double>::quiet_NaN();
 
-  // Make sure the results are about equal for the W and H matrices.
-  for (size_t i = 0; i < w.n_elem; ++i)
-  {
-    if (w(i) == 0.0)
-      BOOST_REQUIRE_SMALL(dw(i), 1e-15);
-    else
-      BOOST_REQUIRE_CLOSE(w(i), dw(i), 1e-5);
-  }
+  mat vp, dvp; // Resulting matrices.
 
-  for (size_t i = 0; i < h.n_elem; ++i)
+  while (sparseResidue != sparseResidue && denseResidue != denseResidue)
   {
-    if (h(i) == 0.0)
-      BOOST_REQUIRE_SMALL(dh(i), 1e-15);
-    else
-      BOOST_REQUIRE_CLOSE(h(i), dh(i), 1e-5);
+    math::RandomSeed(std::time(NULL));
+    mat w, h;
+    sp_mat v;
+    v.sprandu(10, 10, 0.3);
+    // Ensure there is at least one nonzero element in every row and column.
+    for (size_t i = 0; i < 10; ++i)
+      v(i, i) += 1e-5;
+    mat dv(v); // Make a dense copy.
+    mat dw, dh;
+    size_t r = 5;
+
+    AMF<RandomInitialization, NMFALSUpdate> nmf(10000, 1e-10);
+    const size_t seed = mlpack::math::RandInt(1000000);
+    mlpack::math::RandomSeed(seed);
+    nmf.Apply(v, r, w, h);
+    mlpack::math::RandomSeed(seed);
+    nmf.Apply(dv, r, dw, dh);
+
+    // Reconstruct matrices.
+    vp = w * h; // In general vp won't be sparse.
+    dvp = dw * dh;
+
+    denseResidue = arma::norm(v - vp, "fro");
+    sparseResidue = arma::norm(dv - dvp, "fro");
   }
+
+  // Make sure the results are about equal for the W and H matrices.
+  BOOST_REQUIRE_SMALL(arma::norm(vp - dvp, "fro") / arma::norm(vp, "fro"),
+      1e-5);
 }
 
 BOOST_AUTO_TEST_SUITE_END();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list