[mlpack] 98/207: Fix support for multidimensional discrete distributions.

Barak A. Pearlmutter barak+git at pearlmutter.net
Thu Mar 23 17:53:44 UTC 2017


This is an automated email from the git hooks/post-receive script.

bap pushed a commit to branch master
in repository mlpack.

commit aabb5cfa3be9a766ddb0debdbbfd9b7fb8b2b276
Author: Ryan Curtin <ryan at ratml.org>
Date:   Mon Feb 27 09:24:12 2017 -0500

    Fix support for multidimensional discrete distributions.
---
 src/mlpack/methods/hmm/hmm_train_main.cpp | 140 +++++++++++++++++++++++++++++-
 1 file changed, 138 insertions(+), 2 deletions(-)

diff --git a/src/mlpack/methods/hmm/hmm_train_main.cpp b/src/mlpack/methods/hmm/hmm_train_main.cpp
index 20722b0..ea9fd78 100644
--- a/src/mlpack/methods/hmm/hmm_train_main.cpp
+++ b/src/mlpack/methods/hmm/hmm_train_main.cpp
@@ -44,9 +44,9 @@ PARAM_FLAG("batch", "If true, input_file (and if passed, labels_file) are "
     "expected to contain a list of files to use as input observation sequences "
     "(and label sequences).", "b");
 PARAM_INT_IN("states", "Number of hidden states in HMM (necessary, unless "
-    "model_file is specified.", "n", 0);
+    "model_file is specified).", "n", 0);
 PARAM_INT_IN("gaussians", "Number of gaussians in each GMM (necessary when type"
-    " is 'gmm'.", "g", 0);
+    " is 'gmm').", "g", 0);
 PARAM_STRING_IN("model_file", "Pre-existing HMM model file.", "m", "");
 PARAM_STRING_IN("labels_file", "Optional file of hidden states, used for "
     "labeled training.", "l", "");
@@ -66,6 +66,142 @@ using namespace std;
 
 // Because we don't know what the type of our HMM is, we need to write a
 // function that can take arbitrary HMM types.
+struct Init
+{
+  template<typename HMMType>
+  static void Apply(HMMType& hmm, vector<mat>* trainSeq)
+  {
+    const size_t states = CLI::GetParam<int>("states");
+    const double tolerance = CLI::GetParam<double>("tolerance");
+
+    // Create the initialized-to-zero model.
+    Create(hmm, *trainSeq, states, tolerance);
+
+    // Initializing the emission distribution depends on the distribution.
+    // Therefore we have to use the helper functions.
+    RandomInitialize(hmm.Emission());
+  }
+
+  //! Helper function to create discrete HMM.
+  static void Create(HMM<DiscreteDistribution>& hmm,
+                     vector<mat>& trainSeq,
+                     size_t states,
+                     double tolerance)
+  {
+    // Maximum observation is necessary so we know how to train the discrete
+    // distribution.
+    arma::Col<size_t> maxEmissions(trainSeq[0].n_rows);
+    maxEmissions.zeros();
+    for (vector<mat>::iterator it = trainSeq.begin(); it != trainSeq.end();
+         ++it)
+    {
+      arma::Col<size_t> maxSeqs =
+          arma::conv_to<arma::Col<size_t>>::from(arma::max(*it, 1)) + 1;
+      maxEmissions = arma::max(maxEmissions, maxSeqs);
+    }
+
+    hmm = HMM<DiscreteDistribution>(size_t(states),
+        DiscreteDistribution(maxEmissions), tolerance);
+  }
+
+  //! Helper function to create Gaussian HMM.
+  static void Create(HMM<GaussianDistribution>& hmm,
+                     vector<mat>& trainSeq,
+                     size_t states,
+                     double tolerance)
+  {
+    // Find dimension of the data.
+    const size_t dimensionality = trainSeq[0].n_rows;
+
+    // Verify dimensionality of data.
+    for (size_t i = 0; i < trainSeq.size(); ++i)
+      if (trainSeq[i].n_rows != dimensionality)
+        Log::Fatal << "Observation sequence " << i << " dimensionality ("
+            << trainSeq[i].n_rows << " is incorrect (should be "
+            << dimensionality << ")!" << endl;
+
+    // Get the model and initialize it.
+    hmm = HMM<GaussianDistribution>(size_t(states),
+        GaussianDistribution(dimensionality), tolerance);
+  }
+
+  //! Helper function to create GMM HMM.
+  static void Create(HMM<GMM>& hmm,
+                     vector<mat>& trainSeq,
+                     size_t states,
+                     double tolerance)
+  {
+    // Find dimension of the data.
+    const size_t dimensionality = trainSeq[0].n_rows;
+    const int gaussians = CLI::GetParam<int>("gaussians");
+
+    if (gaussians == 0)
+      Log::Fatal << "Number of gaussians for each GMM must be specified (-g) "
+          << "when type = 'gmm'!" << endl;
+
+    if (gaussians < 0)
+      Log::Fatal << "Invalid number of gaussians (" << gaussians << "); must "
+          << "be greater than or equal to 1." << endl;
+
+    // Create HMM object.
+    hmm = HMM<GMM>(size_t(states), GMM(size_t(gaussians), dimensionality),
+        tolerance);
+
+    // Issue a warning if the user didn't give labels.
+    if (!CLI::HasParam("labels_file"))
+      Log::Warn << "Unlabeled training of GMM HMMs is almost certainly not "
+          << "going to produce good results!" << endl;
+  }
+
+  //! Helper function for discrete emission distributions.
+  static void RandomInitialize(vector<DiscreteDistribution>& e)
+  {
+    for (size_t i = 0; i < e.size(); ++i)
+    {
+      e[i].Probabilities().randu();
+      e[i].Probabilities() /= arma::accu(e[i].Probabilities());
+    }
+  }
+
+  //! Helper function for Gaussian emission distributions.
+  static void RandomInitialize(vector<GaussianDistribution>& e)
+  {
+    for (size_t i = 0; i < e.size(); ++i)
+    {
+      const size_t dimensionality = e[i].Mean().n_rows;
+      e[i].Mean().randu();
+      // Generate random covariance.
+      arma::mat r = arma::randu<arma::mat>(dimensionality, dimensionality);
+      e[i].Covariance(r * r.t());
+    }
+  }
+
+  //! Helper function for GMM emission distributions.
+  static void RandomInitialize(vector<GMM>& e)
+  {
+    for (size_t i = 0; i < e.size(); ++i)
+    {
+      // Random weights.
+      e[i].Weights().randu();
+      e[i].Weights() /= arma::accu(e[i].Weights());
+
+      // Random means and covariances.
+      for (int g = 0; g < CLI::GetParam<int>("gaussians"); ++g)
+      {
+        const size_t dimensionality = e[i].Component(g).Mean().n_rows;
+        e[i].Component(g).Mean().randu();
+
+        // Generate random covariance.
+        arma::mat r = arma::randu<arma::mat>(dimensionality,
+            dimensionality);
+        e[i].Component(g).Covariance(r * r.t());
+      }
+    }
+  }
+};
+
+// Because we don't know what the type of our HMM is, we need to write a
+// function that can take arbitrary HMM types.
 struct Train
 {
   template<typename HMMType>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git



More information about the debian-science-commits mailing list