[vlfeat] 21/44: [enhancement] gmm: improves the algorithm used to restart empty Gaussian components

Dima Kogan dima at secretsauce.net
Wed Sep 16 21:05:30 UTC 2015


This is an automated email from the git hooks/post-receive script.

dkogan-guest pushed a commit to branch master
in repository vlfeat.

commit a54ac89a9f4d7e9eb9ee9f0cfc6e077d209f9083
Author: Andrea Vedaldi <vedaldi at gmail.com>
Date:   Wed Jan 14 16:38:34 2015 +0000

    [enhancement] gmm: improves the algorithm used to restart empty Gaussian components
    
    This patch correctes the criterion used to decide which Gaussian components should be divided in order to restart a dead Gaussian component. The previous version was using an incorrect derivation of the math.
---
 toolbox/gmm/vl_gmm.m |  5 +++--
 vl/gmm.c             | 36 ++++++++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/toolbox/gmm/vl_gmm.m b/toolbox/gmm/vl_gmm.m
index 28dbea3..c9f6aa1 100644
--- a/toolbox/gmm/vl_gmm.m
+++ b/toolbox/gmm/vl_gmm.m
@@ -48,8 +48,9 @@
 %     may yield degenerate solutions, including NaNs.
 %
 %   Example::
-%     VL_GMM(X, 10, 'verbose', 'MaxNumIterations', 20) estimates the
-%     mixture of 10 gaussians using at mosst 20 iterations.
+%     VL_GMM(X, 10, 'verbose', 'MaxNumIterations', 20) learns a
+%     mixture of ten Gaussians using at most twenty iterations of the
+%     algorithm.
 %
 %   See also: <a href="matlab:vl_help('gmm')">GMMs</a>, VL_KMEANS(), VL_HELP().
 
diff --git a/vl/gmm.c b/vl/gmm.c
index 33ad794..19c8be7 100644
--- a/vl/gmm.c
+++ b/vl/gmm.c
@@ -896,23 +896,39 @@ VL_XCAT(_vl_gmm_restart_empty_modes_, SFX) (VlGMM * self, TYPE const * data)
     }
 
     /*
-     Search for the cluster that (approximately)
-     maximally contribute to make the log-likelihood
-     small.
+     Search for the Gaussian components that (approximately)
+     maximally contribute to make the negative log-likelihood of the data
+     large. Then split the worst offender.
+     
+     To do so, we approximate the exptected log-likelihood of the GMM:
+     
+     E[-log(f(x))] = H(f) = - log \int f(x) log f(x)
+    
+     where the density f(x) = sum_k pk gk(x) is a GMM. This is intractable
+     but it is easy to approximate if we suppose that supp gk is disjoint with
+     supp gq for all components k ~= q. In this canse
+     
+     H(f) ~= sum_k [ - pk log(pk) + pk H(gk) ]
+     
+     where H(gk) is the entropy of component k taken alone. The entropy of
+     the latter is given by:
+     
+     H(gk) = D/2 (1 + log(2pi) + 1/2 sum_{i=0}^D log sigma_i^2
+
      */
 
     for (j_cl = 0 ; j_cl < (signed)numClusters ; ++j_cl) {
       double size_ ;
       if (priors[j_cl] < VL_GMM_MIN_PRIOR) { continue ; }
-      size_ = - 0.5 * (1.0 + log(2*VL_PI)) ;
+      size_ = + 0.5 * dimension * (1.0 + log(2*VL_PI)) ;
       for(d = 0 ; d < (signed)dimension ; d++) {
         double sigma2 = covariances[j_cl * dimension + d] ;
-        size_ -= 0.5 * log(sigma2) ;
+        size_ += 0.5 * log(sigma2) ;
       }
-      size_ *= priors[j_cl] ;
+      size_ = priors[j_cl] * (size_ - log(priors[j_cl])) ;
 
-      if (self->verbosity > 2) {
-        VL_PRINTF("gmm: mode %d: prior %f, mass %f, score %f\n",
+      if (self->verbosity > 1) {
+        VL_PRINTF("gmm: mode %d: prior %f, mass %f, entropy contribution %f\n",
                   j_cl, priors[j_cl], mass[j_cl], size_) ;
       }
 
@@ -962,12 +978,12 @@ VL_XCAT(_vl_gmm_restart_empty_modes_, SFX) (VlGMM * self, TYPE const * data)
         TYPE q = posteriors[i_cl + self->numClusters * i_d] ; /* ~= 0 */
         if (data[best + i_d * self->dimension] < mu) {
           /* assign this point to i_cl */
-          posteriors[i_cl + self->numClusters * i_d] += p ;
+          posteriors[i_cl + self->numClusters * i_d] = p + q ;
           posteriors[j_cl + self->numClusters * i_d] = 0 ;
         } else {
           /* assign this point to j_cl */
           posteriors[i_cl + self->numClusters * i_d] = 0 ;
-          posteriors[j_cl + self->numClusters * i_d] += q ;
+          posteriors[j_cl + self->numClusters * i_d] = p + q ;
         }
       }
     }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/vlfeat.git



More information about the debian-science-commits mailing list