[mlpack] 150/324: Add support for HMM initial states. Slight modification of API for creating HMMs by hand -- the initial parameter is now required. This change may affect some existing results, and the new results may not perfectly agree with MATLAB, but MATLAB does not have the flexibility to seriously support initial probabilities. It is possible to set the initial parameters such that it will emulate MATLAB behavior right, probably by setting the initial probabilities to the first column of the transition matrix.
Barak A. Pearlmutter
barak+git at cs.nuim.ie
Sun Aug 17 08:22:05 UTC 2014
This is an automated email from the git hooks/post-receive script.
bap pushed a commit to branch svn-trunk
in repository mlpack.
commit fed0425c06bb512ac4ea96743551a3831d46b3ec
Author: rcurtin <rcurtin at 9d5b8971-822b-0410-80eb-d18c1038ef23>
Date: Tue Jul 8 19:25:22 2014 +0000
Add support for HMM initial states. Slight modification of API for creating
HMMs by hand -- the initial parameter is now required. This change may affect
some existing results, and the new results may not perfectly agree with MATLAB,
but MATLAB does not have the flexibility to seriously support initial
probabilities. It is possible to set the initial parameters such that it will
emulate MATLAB behavior right, probably by setting the initial probabilities to
the first column of the transition matrix.
git-svn-id: http://svn.cc.gatech.edu/fastlab/mlpack/trunk@16789 9d5b8971-822b-0410-80eb-d18c1038ef23
---
src/mlpack/methods/hmm/hmm.hpp | 28 +++++++++++++++++++++++-----
src/mlpack/methods/hmm/hmm_impl.hpp | 33 +++++++++++++++++++++++++++------
2 files changed, 50 insertions(+), 11 deletions(-)
diff --git a/src/mlpack/methods/hmm/hmm.hpp b/src/mlpack/methods/hmm/hmm.hpp
index 4ea62f2..f416875 100644
--- a/src/mlpack/methods/hmm/hmm.hpp
+++ b/src/mlpack/methods/hmm/hmm.hpp
@@ -87,6 +87,9 @@ class HMM
* Optionally, the tolerance for convergence of the Baum-Welch algorithm can
* be set.
*
+ * By default, the transition matrix and initial probability vector are set to
+ * contain equal probability for each state.
+ *
* @param states Number of states.
* @param emissions Default distribution for emissions.
* @param tolerance Tolerance for convergence of training algorithm
@@ -97,10 +100,15 @@ class HMM
const double tolerance = 1e-5);
/**
- * Create the Hidden Markov Model with the given transition matrix and the
- * given emission distributions. The dimensionality of the observations of
- * the HMM are taken from the given emission distributions. Alternately, the
- * dimensionality can be set with Dimensionality().
+ * Create the Hidden Markov Model with the given initial probability vector,
+ * the given transition matrix, and the given emission distributions. The
+ * dimensionality of the observations of the HMM are taken from the given
+ * emission distributions. Alternately, the dimensionality can be set with
+ * Dimensionality().
+ *
+ * The initial state probability vector should have length equal to the number
+ * of states, and each entry represents the probability of being in the given
+ * state at time T = 0 (the beginning of a sequence).
*
* The transition matrix should be such that T(i, j) is the probability of
* transition to state i from state j. The columns of the matrix should sum
@@ -112,12 +120,14 @@ class HMM
* Optionally, the tolerance for convergence of the Baum-Welch algorithm can
* be set.
*
+ * @param initial Initial state probabilities.
* @param transition Transition matrix.
* @param emission Emission distributions.
* @param tolerance Tolerance for convergence of training algorithm
* (Baum-Welch).
*/
- HMM(const arma::mat& transition,
+ HMM(const arma::vec& initial,
+ const arma::mat& transition,
const std::vector<Distribution>& emission,
const double tolerance = 1e-5);
@@ -250,6 +260,11 @@ class HMM
*/
double LogLikelihood(const arma::mat& dataSeq) const;
+ //! Return the vector of initial state probabilities.
+ const arma::vec& Initial() const { return initial; }
+ //! Modify the vector of initial state probabilities.
+ arma::vec& Initial() { return initial; }
+
//! Return the transition matrix.
const arma::mat& Transition() const { return transition; }
//! Return a modifiable transition matrix reference.
@@ -307,6 +322,9 @@ class HMM
const arma::vec& scales,
arma::mat& backwardProb) const;
+ //! Initial state probability vector.
+ arma::vec initial;
+
//! Transition probability matrix.
arma::mat transition;
diff --git a/src/mlpack/methods/hmm/hmm_impl.hpp b/src/mlpack/methods/hmm/hmm_impl.hpp
index 2992470..a8644f2 100644
--- a/src/mlpack/methods/hmm/hmm_impl.hpp
+++ b/src/mlpack/methods/hmm/hmm_impl.hpp
@@ -22,6 +22,7 @@ template<typename Distribution>
HMM<Distribution>::HMM(const size_t states,
const Distribution emissions,
const double tolerance) :
+ initial(arma::ones<arma::vec>(states) / (double) states),
transition(arma::ones<arma::mat>(states, states) / (double) states),
emission(states, /* default distribution */ emissions),
dimensionality(emissions.Dimensionality()),
@@ -33,9 +34,11 @@ HMM<Distribution>::HMM(const size_t states,
* emission probability matrix.
*/
template<typename Distribution>
-HMM<Distribution>::HMM(const arma::mat& transition,
+HMM<Distribution>::HMM(const arma::vec& initial,
+ const arma::mat& transition,
const std::vector<Distribution>& emission,
const double tolerance) :
+ initial(initial),
transition(transition),
emission(emission),
tolerance(tolerance)
@@ -101,6 +104,8 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
for (size_t iter = 0; iter < iterations; iter++)
{
// Clear new transition matrix and emission probabilities.
+ arma::vec newInitial(transition.n_rows);
+ newInitial.zeros();
arma::mat newTransition(transition.n_rows, transition.n_cols);
newTransition.zeros();
@@ -122,6 +127,7 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
loglik += Estimate(dataSeq[seq], stateProb, forward, backward, scales);
// Now re-estimate the parameters. This is the M-step.
+ // pi_i = sum_d ((1 / P(seq[d])) sum_t (f(i, 0) b(i, 0))
// T_ij = sum_d ((1 / P(seq[d])) sum_t (f(i, t) T_ij E_i(seq[d][t]) b(i,
// t + 1)))
// E_ij = sum_d ((1 / P(seq[d])) sum_{t | seq[d][t] = j} f(i, t) b(i, t)
@@ -130,6 +136,9 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
{
for (size_t j = 0; j < transition.n_cols; j++)
{
+ // Add to estimate of initial probability for state j.
+ newInitial[j] = stateProb(j, 0);
+
if (t < dataSeq[seq].n_cols - 1)
{
// Estimate of T_ij (probability of transition from state j to state
@@ -148,6 +157,10 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq)
}
}
+ // Normalize the new initial probabilities.
+ if (dataSeq.size() == 0)
+ initial = newInitial / dataSeq.size();
+
// Assign the new transition matrix. We use %= (element-wise
// multiplication) because every element of the new transition matrix must
// still be multiplied by the old elements (this is the multiplication we
@@ -191,6 +204,7 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq,
<< ")." << std::endl;
}
+ initial.zeros();
transition.zeros();
// Estimate the transition and emission matrices directly from the
@@ -218,6 +232,7 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq,
// Loop over each observation in the sequence. For estimation of the
// transition matrix, we must ignore the last observation.
+ initial[stateSeq[seq][0]]++;
for (size_t t = 0; t < dataSeq[seq].n_cols - 1; t++)
{
transition(stateSeq[seq][t + 1], stateSeq[seq][t])++;
@@ -229,6 +244,9 @@ void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq,
std::make_pair(seq, stateSeq[seq].n_elem - 1));
}
+ // Normalize initial weights.
+ initial /= accu(initial);
+
// Normalize transition matrix.
for (size_t col = 0; col < transition.n_cols; col++)
{
@@ -370,9 +388,9 @@ double HMM<Distribution>::Predict(const arma::mat& dataSeq,
logStateProb.col(0).zeros();
for (size_t state = 0; state < transition.n_rows; state++)
{
- logStateProb[state] = log(transition.unsafe_col(state).max() *
+ logStateProb(state, 0) = log(initial[state] *
emission[state].Probability(dataSeq.unsafe_col(0)));
- stateSeqBack[state] = state;
+ stateSeqBack(state, 0) = state;
}
// Store the best first state.
@@ -429,10 +447,13 @@ void HMM<Distribution>::Forward(const arma::mat& dataSeq,
forwardProb.zeros(transition.n_rows, dataSeq.n_cols);
scales.zeros(dataSeq.n_cols);
- // Starting state (at t = -1) is assumed to be state 0. This is what MATLAB
- // does in their hmmdecode() function, so we will emulate that behavior.
+ // The first entry in the forward algorithm uses the initial state
+ // probabilities. Note that MATLAB assumes that the starting state (at
+ // t = -1) is state 0; this is not our assumption here. To force that
+ // behavior, you could append a single starting state to every single data
+ // sequence and that should produce results in line with MATLAB.
for (size_t state = 0; state < transition.n_rows; state++)
- forwardProb(state, 0) = transition(state, 0) *
+ forwardProb(state, 0) = initial(state) *
emission[state].Probability(dataSeq.unsafe_col(0));
// Then normalize the column.
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mlpack.git
More information about the debian-science-commits
mailing list