[irstlm] 118/126: some fixes
Giulio Paci
giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:46:51 UTC 2016
This is an automated email from the git hooks/post-receive script.
giuliopaci-guest pushed a commit to annotated tag adaptiveLM.v0.1
in repository irstlm.
commit 7d818ddf7474427539f5bc7d841356f969b3a057
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date: Tue Oct 20 00:34:36 2015 +0200
some fixes
---
src/context-dependent-evaluation.cpp | 15 ++++----
src/lmContainer.h | 18 ++++++++++
src/lmContextDependent.cpp | 70 ++++++++++++++++++++++++++++++------
src/lmContextDependent.h | 18 ++++++++--
src/lmInterpolation.cpp | 4 ++-
5 files changed, 102 insertions(+), 23 deletions(-)
diff --git a/src/context-dependent-evaluation.cpp b/src/context-dependent-evaluation.cpp
index e7d738c..6d575bf 100644
--- a/src/context-dependent-evaluation.cpp
+++ b/src/context-dependent-evaluation.cpp
@@ -266,7 +266,7 @@ int main(int argc, char **argv)
// reset ngram at begin of sentence
if (word_vec.at(i) == lmt->getDict()->BoS()) {
- size=0;
+ size=1;
continue;
}
first = last - size;
@@ -389,9 +389,6 @@ int main(int argc, char **argv)
string_vec_t word_vec;
split(sentence, ' ', word_vec);
- //add the BoS symbol at the beginning
- string_vec_t::iterator it = word_vec.insert ( word_vec.begin() , lmt->getDict()->BoS() );
-
//first points to the last recent term to take into account
//last points to the position after the most recent term to take into account
//last could point outside the vector of string; do NOT use word_vec.at(last)
@@ -399,21 +396,21 @@ int main(int argc, char **argv)
size_t order = lmt->maxlevel();
//start the computation from the second word because the first is the BoS symbol,but including BoS in the ngrams
- size_t size=1;
- for (size_t i=1; i< word_vec.size(); ++i){
+ size_t size=0;
+ for (size_t i=0; i< word_vec.size(); ++i){
++size;
size=(size<order)?size:order;
last=i+1;
// reset ngram at begin of sentence
if (word_vec.at(i) == lmt->getDict()->BoS()) {
- size=0;
+ size=1;
continue;
}
first = last - size;
string_vec_t tmp_word_vec(word_vec.begin() + first, word_vec.begin() +last);
-
+
if (size>=1) {
VERBOSE(2,"computing prob for first:|" << first << "| and last:|" << last << "|" << std::endl);
@@ -813,7 +810,7 @@ int main(int argc, char **argv)
// reset ngram at begin of sentence
if (word_vec.at(word_pos) == lmt->getDict()->BoS()) {
- size=0;
+ size=1;
continue;
}
first = last - size;
diff --git a/src/lmContainer.h b/src/lmContainer.h
index b8ac737..a89ccb0 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -180,6 +180,24 @@ public:
}
+ virtual double clprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+ UNUSED(lm_weights);
+ UNUSED(topic_weights);
+ return clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
+ };
+ virtual double clprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+ UNUSED(lm_weights);
+ UNUSED(topic_weights);
+ return clprob(ng, ngsize, bow, bol, maxsuffptr, statesize, extendible);
+ }
+ virtual double clprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+ VERBOSE(3,"lmContainer::clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,...." << std::endl);
+ UNUSED(lm_weights);
+ UNUSED(topic_weights);
+ return clprob(text, bow, bol, maxsuffptr, statesize, extendible);
+ }
+
+
virtual const char *cmaxsuffptr(ngram ng, unsigned int* statesize=NULL)
{
UNUSED(ng);
diff --git a/src/lmContextDependent.cpp b/src/lmContextDependent.cpp
index 9fc530a..a230d23 100644
--- a/src/lmContextDependent.cpp
+++ b/src/lmContextDependent.cpp
@@ -159,6 +159,17 @@ namespace irstlm {
return lprob(ng, text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
}
+ double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ {
+ VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, " << std::endl);
+ //create the actual ngram
+ ngram ong(dict);
+ ong.pushc(codes,sz);
+ MY_ASSERT (ong.size == sz);
+
+ return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ }
+
double lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
VERBOSE(2,"lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, ...)" << std::endl);
@@ -172,6 +183,43 @@ namespace irstlm {
return lprob(ng, text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
}
+
+ double lmContextDependent::lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ {
+ VERBOSE(2,"lmContextDependent::lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
+ string_vec_t text;
+ if (ng.size>1){
+ text.push_back(ng.dict->decode(*ng.wordp(2)));
+ }
+ text.push_back(ng.dict->decode(*ng.wordp(1)));
+
+ return lprob(ng, text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ }
+
+ double lmContextDependent::lprob(int* codes, int sz, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ {
+ VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, lm_map_t& lm_weights, topic_map_t& topic_weights, " << std::endl);
+ //create the actual ngram
+ ngram ong(dict);
+ ong.pushc(codes,sz);
+ MY_ASSERT (ong.size == sz);
+
+ return lprob(ong, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ }
+
+ double lmContextDependent::lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ {
+ VERBOSE(2,"lmContextDependent::lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
+
+ //create the actual ngram
+ ngram ng(dict);
+ ng.pushw(text);
+ VERBOSE(3,"ng:|" << ng << "|" << std::endl);
+
+ MY_ASSERT (ng.size == (int) text.size());
+ return lprob(ng, text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ }
+
double lmContextDependent::lprob(ngram& ng, string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
VERBOSE(2,"lmContextDependent::lprob(ngram& ng, topic_map_t& topic_weights, ...)" << std::endl);
@@ -183,6 +231,17 @@ namespace irstlm {
return ret_logprob;
}
+ double lmContextDependent::lprob(ngram& ng, string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ {
+ VERBOSE(2,"lmContextDependent::lprob(ngram& ng, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
+ double lm_logprob = m_lm->clprob(ng, lm_weights, bow, bol, maxsuffptr, statesize, extendible);
+ double similarity_score = m_similaritymodel->context_similarity(text, topic_weights);
+ double ret_logprob = lm_logprob + m_similaritymodel_weight * similarity_score;
+ VERBOSE(2, "lm_log10_pr:" << lm_logprob << " similarity_score:" << similarity_score << " m_similaritymodel_weight:" << m_similaritymodel_weight << " ret_log10_pr:" << ret_logprob << std::endl);
+
+ return ret_logprob;
+ }
+
double lmContextDependent::total_clprob(string_vec_t& text, topic_map_t& topic_weights)
{
@@ -215,17 +274,6 @@ namespace irstlm {
return log10(tot_pr);
}
- double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
- {
- VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, " << std::endl);
- //create the actual ngram
- ngram ong(dict);
- ong.pushc(codes,sz);
- MY_ASSERT (ong.size == sz);
-
- return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
- }
-
double lmContextDependent::setlogOOVpenalty(int dub)
{
MY_ASSERT(dub > dict->size());
diff --git a/src/lmContextDependent.h b/src/lmContextDependent.h
index 3e6a072..6e6bb53 100644
--- a/src/lmContextDependent.h
+++ b/src/lmContextDependent.h
@@ -115,7 +115,6 @@ namespace irstlm {
UNUSED(extendible);
assert(false);
};
-
virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
VERBOSE(0, "virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
UNUSED(ng);
@@ -126,7 +125,6 @@ namespace irstlm {
UNUSED(extendible);
assert(false);
};
-
virtual double clprob(string_vec_t& text, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
VERBOSE(0, "virtual double clprob(string_vec_t& text, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
UNUSED(text);
@@ -148,11 +146,27 @@ namespace irstlm {
return lprob(text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
};
+ virtual double clprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ return lprob(ng, ngsize, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ };
+ virtual double clprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ return lprob(ng, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ };
+ virtual double clprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ return lprob(text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ };
+
virtual double lprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
virtual double lprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
virtual double lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double lprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+
double lprob(ngram& ng, string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible);
+ double lprob(ngram& ng, string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible);
+
double total_clprob(string_vec_t& text, topic_map_t& topic_weights);
double total_clprob(ngram& ng, topic_map_t& topic_weights);
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index c6deb75..18cf612 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -109,7 +109,7 @@ namespace irstlm {
inp.getline(line,BUFSIZ,'\n');
tokenN = parseWords(line,words,idx_size);
- if(tokenN < idx_file || tokenN > idx_inverted) {
+ if(tokenN < idx_file || tokenN > idx_size) {
exit_error(IRSTLM_ERROR_DATA, "ERROR: wrong header format of configuration file\ncorrect format:\nLMINTERPOLATION number_of_models\nweight_of_LM_1 filename_of_LM_1 [inverted]\nweight_of_LM_2 filename_of_LM_2\nor\nLMINTERPOLATION number_of_models MAP\nweight_of_LM_1 name_LM_1 filename_of_LM_1\nweight_of_LM_2 name_LM_2 filename_of_LM_2");
}
@@ -179,6 +179,7 @@ namespace irstlm {
//return log10 prob of an ngram
double lmInterpolation::clprob(ngram ng, lm_map_t& lm_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
+ VERBOSE(1,"double lmInterpolation::clprob(ngram ng, lm_map_t& lm_weights,...)" << std::endl);
double pr=0.0;
double _logpr;
@@ -264,6 +265,7 @@ namespace irstlm {
//return log10 prob of an ngram
double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
+ VERBOSE(1,"double lmInterpolation::clprob(ngram ng, ...)" << std::endl);
double pr=0.0;
double _logpr;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git
More information about the debian-science-commits
mailing list