[irstlm] 123/126: code optimization; code cleanuo
Giulio Paci
giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:46:52 UTC 2016
This is an automated email from the git hooks/post-receive script.
giuliopaci-guest pushed a commit to annotated tag adaptiveLM.v0.1
in repository irstlm.
commit f33f68a5690618d23204df78c4861b736fbea577
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date: Mon Oct 26 19:11:04 2015 +0100
code optimization; code cleanuo
---
src/compile-lm.cpp | 10 +--
src/context-dependent-evaluation.cpp | 119 ++++++++++++++++++-----------------
src/context-similarity.cpp | 18 ------
src/context-similarity.h | 2 -
src/lmContainer.cpp | 64 ++++++++++++++++---
src/lmContainer.h | 48 +++-----------
src/lmContextDependent.cpp | 95 +++++++++++-----------------
src/lmContextDependent.h | 53 ++++------------
src/lmInterpolation.cpp | 34 ++++++----
src/lmInterpolation.h | 22 +++----
src/lmmacro.h | 4 --
11 files changed, 211 insertions(+), 258 deletions(-)
diff --git a/src/compile-lm.cpp b/src/compile-lm.cpp
index 39aff7d..a01a7a2 100644
--- a/src/compile-lm.cpp
+++ b/src/compile-lm.cpp
@@ -183,7 +183,7 @@ int main(int argc, char **argv)
//checking the language model type
lmContainer* lmt = lmContainer::CreateLanguageModel(infile,ngramcache_load_factor,dictionary_load_factor);
-
+
//let know that table has inverted n-grams
if (invert) lmt->is_inverted(invert);
@@ -191,6 +191,7 @@ int main(int argc, char **argv)
lmt->load(infile);
+ VERBOSE(1,"after load" << std::endl);
//CHECK this part for sfilter to make it possible only for LMTABLE
if (sfilter != NULL) {
lmContainer* filtered_lmt = NULL;
@@ -212,8 +213,9 @@ int main(int argc, char **argv)
if (dub) lmt->setlogOOVpenalty((int)dub);
//use caches to save time (only if PS_CACHE_ENABLE is defined through compilation flags)
+ ;
lmt->init_caches(lmt->maxlevel());
-
+
if (seval != NULL) {
if (randcalls>0) {
@@ -301,9 +303,8 @@ int main(int argc, char **argv)
int bol=0;
char *msp;
unsigned int statesize;
-
lmt->dictionary_incflag(1);
-
+
while(inptxt >> ng) {
if (ng.size>lmt->maxlevel()) ng.size=lmt->maxlevel();
@@ -369,6 +370,7 @@ int main(int argc, char **argv)
}
Nw++;
sent_Nw++;
+
if (sent_PP_flag && (*ng.wordp(1)==eos)) {
sent_PP=exp((-sent_logPr * M_LN10) / sent_Nw);
sent_PPwp= sent_PP * (1 - 1/exp(sent_Noov * ( lmt->getlogOOVpenalty() * M_LN10 ) / sent_Nw));
diff --git a/src/context-dependent-evaluation.cpp b/src/context-dependent-evaluation.cpp
index 5ae7cd7..5a60515 100644
--- a/src/context-dependent-evaluation.cpp
+++ b/src/context-dependent-evaluation.cpp
@@ -37,12 +37,6 @@ using namespace irstlm;
typedef std::pair<double,int> double_and_int_pair;
-void transform(topic_map_t& topic_map, lm_map_t& lm_map){
- for (topic_map_t::const_iterator it=topic_map.begin(); it!=topic_map.end(); ++it){
- lm_map[it->first] = topic_map[it->first];
- }
-}
-
struct cmp_double_and_int_pair {
//order first by the first field (double), and in case of equality by the second field (int)
bool operator()(const double_and_int_pair& a, const double_and_int_pair& b) const {
@@ -103,6 +97,7 @@ int main(int argc, char **argv)
bool add_lexicon_words = false;
bool add_lm_words = false;
bool add_sentence_words = false;
+ bool add_full_dictionary = false;
int successor_limit=100;
int debug = 0;
@@ -135,6 +130,7 @@ int main(int argc, char **argv)
"context_model_normalization", CMDBOOLTYPE|CMDMSG, &context_model_normalization, "enable/disable normalization of context-dependent model (default is false)",
"add_lm_words", CMDBOOLTYPE|CMDMSG, &add_lm_words, "enable/disable addition of the unigram/bigrmam successors into the alternatives (default is false)",
"add_sentence_words", CMDBOOLTYPE|CMDMSG, &add_sentence_words, "enable/disable addition of the words of the current sentence into the alternatives (default is false)",
+ "add_full_dictionary", CMDBOOLTYPE|CMDMSG, &add_full_dictionary, "enable/disable addition of all words of the dictionary into the alternatives (default is false)",
"successor_limit", CMDINTTYPE|CMDMSG, &successor_limit, "threshold to decide whether adding the unigram/bigram successors into the alternatives (default is 100)",
"Help", CMDBOOLTYPE|CMDMSG, &help, "print this help",
@@ -214,6 +210,9 @@ int main(int argc, char **argv)
}
if (topicscore == true) {
+ if (lmt->getLanguageModelType() != _IRSTLM_LMCONTEXTDEPENDENT) {
+ exit_error(IRSTLM_ERROR_DATA, "This type of score is not available for the LM loaded");
+ }
if (lmt->getLanguageModelType() == _IRSTLM_LMINTERPOLATION) {
debug = (debug>4)?4:debug;
std::cerr << "Maximum debug value for this LM type: " << debug << std::endl;
@@ -246,14 +245,13 @@ int main(int argc, char **argv)
std::string sentence;
std::string context;
- ((lmContextDependent*) lmt)->GetSentenceAndContext(sentence,context,line_str);
+ bool withContext = lmt->GetSentenceAndContext(sentence,context,line_str);
//getting apriori topic weights
topic_map_t apriori_topic_map;
- ((lmContextDependent*) lmt)->getContextSimilarity()->setContextMap(apriori_topic_map,context);
- lm_map_t apriori_lm_map;
- transform(apriori_topic_map,apriori_lm_map);
-
+ if (withContext){
+ lmt->setContextMap(apriori_topic_map,context);
+ }
// computation using std::string
// loop over ngrams of the sentence
string_vec_t word_vec;
@@ -299,29 +297,18 @@ int main(int argc, char **argv)
((lmContextDependent*) lmt)->getContextSimilarity()->add_topic_scores(sentence_topic_map, tmp_topic_map);
IFVERBOSE(2){
- // VERBOSE(2,"word-based topic-distribution:");
- // ((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(tmp_topic_map);
VERBOSE(2,"word-based topic-distribution:");
((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(tmp_topic_map,apriori_topic_map,1);
}
tmp_topic_map.clear();
- // IFVERBOSE(2){
- // VERBOSE(2,"sentence-based topic-distribution:");
- // ((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(sentence_topic_map);
- // VERBOSE(2,"sentence-based topic-distribution:");
- // ((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(sentence_topic_map,apriori_topic_map);
- // }
}
}
IFVERBOSE(2){
- // VERBOSE(2,"sentence-based topic-distribution:");
- // ((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(sentence_topic_map);
VERBOSE(2,"sentence-based topic-distribution:");
((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(sentence_topic_map,apriori_topic_map,last);
}
std::cout << sentence << ((lmContextDependent*) lmt)->getContextDelimiter();
- ((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(sentence_topic_map);
- // ((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(sentence_topic_map,apriori_topic_map);
+ ((lmContextDependent*) lmt)->getContextSimilarity()->print_topic_scores(sentence_topic_map);
apriori_topic_map.clear();
}
@@ -385,15 +372,13 @@ int main(int argc, char **argv)
//getting sentence string;
std::string sentence;
std::string context;
-
- ((lmContextDependent*) lmt)->GetSentenceAndContext(sentence,context,line_str);
-
+
+ bool withContext = lmt->GetSentenceAndContext(sentence,context,line_str);
//getting apriori topic weights
topic_map_t apriori_topic_map;
- ((lmContextDependent*) lmt)->getContextSimilarity()->setContextMap(apriori_topic_map,context);
- lm_map_t apriori_lm_map;
- transform(apriori_topic_map,apriori_lm_map);
-
+ if (withContext){
+ ((lmContextDependent*) lmt)->setContextMap(apriori_topic_map,context);
+ }
// computation using std::string
// loop over ngrams of the sentence
string_vec_t word_vec;
@@ -440,9 +425,12 @@ int main(int argc, char **argv)
VERBOSE(2,"tmp_word_vec.size:|" << tmp_word_vec.size() << "|" << std::endl);
VERBOSE(2,"dict.size:|" << lmt->getDict()->size() << "|" << std::endl);
-
-// current_Pr = lmt->clprob(tmp_word_vec, apriori_topic_map, &bow, &bol, &msp, &statesize);
- current_Pr = lmt->clprob(tmp_word_vec, apriori_lm_map, apriori_topic_map, &bow, &bol, &msp, &statesize);
+
+ if (withContext){
+ current_Pr = lmt->clprob(tmp_word_vec, apriori_topic_map, &bow, &bol, &msp, &statesize);
+ }else{
+ current_Pr = lmt->clprob(tmp_word_vec, &bow, &bol, &msp, &statesize);
+ }
/*
double tot_pr = 0.0;
if (context_model_normalization){
@@ -454,14 +442,16 @@ int main(int argc, char **argv)
int current_pos = tmp_word_vec.size()-1;
std::string current_word = tmp_word_vec.at(current_pos);
- /*
- //loop over all words in the LM
- dictionary* current_dict = lmt->getDict();
- */
-
//loop over a set of selected alternative words
//populate the dictionary with all words associated with the current word
- dictionary* current_dict = new dictionary((char *)NULL,1000000);
+
+ dictionary* current_dict;
+ if (add_full_dictionary){
+ //loop over all words in the LM
+ current_dict = lmt->getDict();
+ }else{
+ current_dict = new dictionary((char *)NULL,1000000);
+ }
current_dict->incflag(1);
current_dict->encode(current_word.c_str());
@@ -538,7 +528,6 @@ int main(int argc, char **argv)
}
}
-
VERBOSE(2,"after add_lm_words current_dict->size:" << current_dict->size() << std::endl);
if (add_sentence_words){
@@ -555,7 +544,7 @@ int main(int argc, char **argv)
VERBOSE(2,"current_dict->size:" << current_dict->size() << std::endl);
for (int h=0;h<current_dict->size();++h){
- VERBOSE(2,"h:" << h << " w:|" << current_dict->decode(h) << "|" << std::endl);
+ VERBOSE(3,"h:" << h << " w:|" << current_dict->decode(h) << "|" << std::endl);
}
//the first word in current_dict is always the current_word; hence we can skip it during the scan
@@ -579,8 +568,12 @@ int main(int argc, char **argv)
std::cout << std::endl;
}
-// double pr=lmt->clprob(tmp_word_vec, apriori_topic_map, &bow, &bol, &msp, &statesize);
- double pr=lmt->clprob(tmp_word_vec, apriori_lm_map, apriori_topic_map, &bow, &bol, &msp, &statesize);
+ double pr;
+ if (withContext){
+ pr=lmt->clprob(tmp_word_vec, apriori_topic_map, &bow, &bol, &msp, &statesize);
+ }else{
+ pr=lmt->clprob(tmp_word_vec, &bow, &bol, &msp, &statesize);
+ }
current_tot_pr += pow(10.0,pr);
if (best_pr < pr){
best_pr = pr;
@@ -682,8 +675,7 @@ int main(int argc, char **argv)
apriori_topic_map.clear();
}
-
-
+
model_norm_PP = exp((-model_norm_logPr * M_LN10) / Nw);
model_norm_PPwp = model_norm_PP * (1 - 1/exp(Noov * norm_oovpenalty * M_LN10 / Nw));
model_PP = exp((-model_logPr * M_LN10) / Nw);
@@ -795,13 +787,13 @@ int main(int argc, char **argv)
std::string sentence;
std::string context;
- ((lmContextDependent*) lmt)->GetSentenceAndContext(sentence,context,line_str);
+ bool withContext=lmt->GetSentenceAndContext(sentence,context,line_str);
//getting apriori topic weights
topic_map_t apriori_topic_map;
- ((lmContextDependent*) lmt)->getContextSimilarity()->setContextMap(apriori_topic_map,context);
- lm_map_t apriori_lm_map;
- transform(apriori_topic_map,apriori_lm_map);
+ if (withContext){
+ ((lmContextDependent*) lmt)->setContextMap(apriori_topic_map,context);
+ }
// computation using std::string
// loop over ngrams of the sentence
@@ -856,16 +848,23 @@ int main(int argc, char **argv)
int current_pos = tmp_word_vec.size()-1;
std::string current_word = tmp_word_vec.at(current_pos);
-// double current_Pr=lmt->clprob(tmp_word_vec, apriori_topic_map, &bow, &bol, &msp, &statesize);
- double current_Pr=lmt->clprob(tmp_word_vec, apriori_lm_map, apriori_topic_map, &bow, &bol, &msp, &statesize);
+ double current_Pr;
+ if (withContext){
+ current_Pr=lmt->clprob(tmp_word_vec, apriori_topic_map, &bow, &bol, &msp, &statesize);
+ }else{
+ current_Pr=lmt->clprob(tmp_word_vec, &bow, &bol, &msp, &statesize);
+ }
- /*
- //loop over all words in the LM
- dictionary* current_dict = lmt->getDict();
- */
//loop over a set of selected alternative words
//populate the dictionary with all words associated with the current word
- dictionary* current_dict = new dictionary((char *)NULL,1000000);
+
+ dictionary* current_dict;
+ if (add_full_dictionary){
+ //loop over all words in the LM
+ current_dict = lmt->getDict();
+ }else{
+ current_dict = new dictionary((char *)NULL,1000000);
+ }
current_dict->incflag(1);
current_dict->encode(current_word.c_str());
@@ -977,9 +976,13 @@ int main(int argc, char **argv)
}
std::cout << std::endl;
}
-// double pr=lmt->clprob(tmp_word_vec, apriori_topic_map, &bow, &bol, &msp, &statesize);
- double pr=lmt->clprob(tmp_word_vec, apriori_lm_map, apriori_topic_map, &bow, &bol, &msp, &statesize);
+ double pr;
+ if (withContext){
+ pr=lmt->clprob(tmp_word_vec, apriori_topic_map, &bow, &bol, &msp, &statesize);
+ }else{
+ pr=lmt->clprob(tmp_word_vec, &bow, &bol, &msp, &statesize);
+ }
if (pr > current_Pr){
++current_rank;
}
diff --git a/src/context-similarity.cpp b/src/context-similarity.cpp
index 06613cb..0f2af82 100644
--- a/src/context-similarity.cpp
+++ b/src/context-similarity.cpp
@@ -147,24 +147,6 @@ namespace irstlm {
std::cout << std::endl;
}
- void ContextSimilarity::setContextMap(topic_map_t& topic_map, const std::string& context){
-
- string_vec_t topic_weight_vec;
- string_vec_t topic_weight;
-
- // context is supposed in this format
- // topic-name1,topic-value1:topic-name2,topic-value2:...:topic-nameN,topic-valueN
-
- //first-level split the context in a vector of topic-name1,topic-value1, using the first separator ':'
- split(context, topic_map_delimiter1, topic_weight_vec);
- for (string_vec_t::iterator it=topic_weight_vec.begin(); it!=topic_weight_vec.end(); ++it){
- //first-level split the context in a vector of topic-name1 and ,topic-value1, using the second separator ','
- split(*it, topic_map_delimiter2, topic_weight);
- topic_map[topic_weight.at(0)] = strtod (topic_weight.at(1).c_str(), NULL);
- topic_weight.clear();
- }
- }
-
void ContextSimilarity::create_ngram(const string_vec_t& text, ngram& ng)
{
//text is a vector of strings with w in the last position and the history in the previous positions
diff --git a/src/context-similarity.h b/src/context-similarity.h
index 8a6bba2..5e67553 100644
--- a/src/context-similarity.h
+++ b/src/context-similarity.h
@@ -99,8 +99,6 @@ namespace irstlm {
ContextSimilarity(const std::string &dictfile, const std::string &num_modelfile, const std::string &den_modelfile);
~ContextSimilarity();
- void setContextMap(topic_map_t& topic_map, const std::string& context);
-
void get_topic_scores(string_vec_t& text, topic_map_t& topic_map);
void get_topic_scores(ngram& ng, ngramtable& ngt, ngramtable& ngt2, topic_map_t& topic_map);
diff --git a/src/lmContainer.cpp b/src/lmContainer.cpp
index 34895a5..ec3fc5e 100644
--- a/src/lmContainer.cpp
+++ b/src/lmContainer.cpp
@@ -62,17 +62,12 @@ namespace irstlm {
bool lmContainer::lmt_cache_enabled=false;
#endif
- inline void error(const char* message)
- {
- std::cerr << message << "\n";
- throw std::runtime_error(message);
- }
-
lmContainer::lmContainer()
{
requiredMaxlev=1000;
lmtype=_IRSTLM_LMUNKNOWN;
maxlev=0;
+ m_isadaptive=false;
}
int lmContainer::getLanguageModelType(std::string filename)
@@ -111,43 +106,53 @@ namespace irstlm {
lmContainer* lmContainer::CreateLanguageModel(const std::string infile, float nlf, float dlf)
{
int type = lmContainer::getLanguageModelType(infile);
- std::cerr << "Language Model Type of " << infile << " is " << type << std::endl;
+
+ VERBOSE(1,"lmContainer* lmContainer::CreateLanguageModel(...) Language Model Type of " << infile << " is " << type << std::endl);
return lmContainer::CreateLanguageModel(type, nlf, dlf);
}
lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf)
{
+ std::cerr << "lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf)" << std::endl;
- std::cerr << "Language Model Type is " << type << std::endl;
+ VERBOSE(1,"Language Model Type is " << type << std::endl);
lmContainer* lm=NULL;
switch (type) {
case _IRSTLM_LMTABLE:
+ VERBOSE(1,"_IRSTLM_LMTABLE" << std::endl);
lm = new lmtable(nlf, dlf);
break;
case _IRSTLM_LMMACRO:
+ VERBOSE(1,"_IRSTLM_LMMACRO" << std::endl);
lm = new lmmacro(nlf, dlf);
break;
case _IRSTLM_LMCLASS:
+ VERBOSE(1,"_IRSTLM_LMCLASS" << std::endl);
lm = new lmclass(nlf, dlf);
break;
case _IRSTLM_LMINTERPOLATION:
+ VERBOSE(1,"_IRSTLM_LMINTERPOLATION" << std::endl);
lm = new lmInterpolation(nlf, dlf);
break;
case _IRSTLM_LMCONTEXTDEPENDENT:
+ VERBOSE(1,"_IRSTLM_LMCONTEXTDEPENDENT" << std::endl);
lm = new lmContextDependent(nlf, dlf);
break;
default:
+ VERBOSE(1,"UNKNOWN" << std::endl);
exit_error(IRSTLM_ERROR_DATA, "This language model type is unknown!");
}
+ VERBOSE(1,"lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf) lm:|" << (void*) lm << "|" << std::endl);
+ VERBOSE(1,"lm->getLanguageModelType:|" << lm->getLanguageModelType() << "|" << std::endl);
lm->setLanguageModelType(type);
@@ -171,4 +176,47 @@ namespace irstlm {
return false;
}
+
+ bool lmContainer::GetSentenceAndContext(std::string& sentence, std::string& context, std::string& line)
+ {
+ VERBOSE(2,"bool lmContextDependent::GetSentenceAndContext" << std::endl);
+ VERBOSE(2,"line:|" << line << "|" << std::endl);
+ bool ret;
+ size_t pos = line.find(context_delimiter);
+ if (pos != std::string::npos){ // context_delimiter is found
+ sentence = line.substr(0, pos);
+ line.erase(0, pos + context_delimiter.length());
+
+ //getting context string;
+ context = line;
+ ret=true;
+ }else{
+ sentence = line;
+ context = "";
+ ret=false;
+ }
+ VERBOSE(2,"sentence:|" << sentence << "|" << std::endl);
+ VERBOSE(2,"context:|" << context << "|" << std::endl);
+ return ret;
+ }
+
+
+ void lmContainer::setContextMap(topic_map_t& topic_map, const std::string& context){
+
+ string_vec_t topic_weight_vec;
+ string_vec_t topic_weight;
+
+ // context is supposed in this format
+ // topic-name1,topic-value1:topic-name2,topic-value2:...:topic-nameN,topic-valueN
+
+ //first-level split the context in a vector of topic-name1,topic-value1, using the first separator ':'
+ split(context, topic_map_delimiter1, topic_weight_vec);
+ for (string_vec_t::iterator it=topic_weight_vec.begin(); it!=topic_weight_vec.end(); ++it){
+ //first-level split the context in a vector of topic-name1 and ,topic-value1, using the second separator ','
+ split(*it, topic_map_delimiter2, topic_weight);
+ topic_map[topic_weight.at(0)] = strtod (topic_weight.at(1).c_str(), NULL);
+ topic_weight.clear();
+ }
+ }
+
}//namespace irstlm
diff --git a/src/lmContainer.h b/src/lmContainer.h
index a89ccb0..5fb8b2e 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -48,10 +48,10 @@ typedef enum {LMT_FIND, //!< search: find an entry
} LMT_ACTION;
namespace irstlm {
-
-
+
+
typedef std::map< std::string, float > topic_map_t;
- typedef std::map< std::string, double > lm_map_t;
+// typedef std::map< std::string, double > lm_map_t;
class lmContainer
{
@@ -63,6 +63,9 @@ protected:
int lmtype; //auto reference to its own type
int maxlev; //maximun order of sub LMs;
int requiredMaxlev; //max loaded level, i.e. load up to requiredMaxlev levels
+ bool m_isadaptive; //flag is true if the LM can be adapted by means of any external context
+ void isAdaptive(bool val){ m_isadaptive = val; }
+ bool isAdaptive(){ return m_isadaptive;}
public:
@@ -131,7 +134,6 @@ public:
return 0.0;
};
virtual double clprob(string_vec_t& text, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
- VERBOSE(0,"lmContainer::clprob(string_vec_t& text, double* bow,...." << std::endl);
UNUSED(text);
UNUSED(bow);
UNUSED(bol);
@@ -160,44 +162,10 @@ public:
return clprob(ng, ngsize, bow, bol, maxsuffptr, statesize, extendible);
}
virtual double clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
- VERBOSE(3,"lmContainer::clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,...." << std::endl);
UNUSED(topic_weights);
return clprob(text, bow, bol, maxsuffptr, statesize, extendible);
}
- virtual double clprob(ngram ng, lm_map_t& lm_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
- UNUSED(lm_weights);
- return clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
- };
- virtual double clprob(int* ng, int ngsize, lm_map_t& lm_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
- UNUSED(lm_weights);
- return clprob(ng, ngsize, bow, bol, maxsuffptr, statesize, extendible);
- }
- virtual double clprob(string_vec_t& text, lm_map_t& lm_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
- VERBOSE(3,"lmContainer::clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,...." << std::endl);
- UNUSED(lm_weights);
- return clprob(text, bow, bol, maxsuffptr, statesize, extendible);
- }
-
-
- virtual double clprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
- UNUSED(lm_weights);
- UNUSED(topic_weights);
- return clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
- };
- virtual double clprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
- UNUSED(lm_weights);
- UNUSED(topic_weights);
- return clprob(ng, ngsize, bow, bol, maxsuffptr, statesize, extendible);
- }
- virtual double clprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
- VERBOSE(3,"lmContainer::clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,...." << std::endl);
- UNUSED(lm_weights);
- UNUSED(topic_weights);
- return clprob(text, bow, bol, maxsuffptr, statesize, extendible);
- }
-
-
virtual const char *cmaxsuffptr(ngram ng, unsigned int* statesize=NULL)
{
UNUSED(ng);
@@ -285,6 +253,10 @@ public:
inline static bool is_cache_enabled(){
return is_lmt_cache_enabled() && is_ps_cache_enabled();
}
+
+ bool GetSentenceAndContext(std::string& sentence, std::string& context, std::string& line);
+
+ void setContextMap(topic_map_t& topic_map, const std::string& context);
};
diff --git a/src/lmContextDependent.cpp b/src/lmContextDependent.cpp
index 764b414..1df595a 100644
--- a/src/lmContextDependent.cpp
+++ b/src/lmContextDependent.cpp
@@ -49,7 +49,10 @@ namespace irstlm {
order=0;
memmap=0;
isInverted=false;
+ m_isadaptive=true;
+
+ VERBOSE(2,"lmContextDependent::lmContextDependent(const std::string &filename,int mmap) isadaptive:|" << m_isadaptive << "|" << std::endl);
}
lmContextDependent::~lmContextDependent()
@@ -127,52 +130,33 @@ namespace irstlm {
VERBOSE(0, "topic_threshold_on_h:|" << m_similaritymodel->get_Threshold_on_H() << "|" << std::endl);
VERBOSE(0, "shift-beta smoothing on counts:|" << m_similaritymodel->get_SmoothingValue() << "|" << std::endl);
}
-
- void lmContextDependent::GetSentenceAndContext(std::string& sentence, std::string& context, std::string& line)
- {
- VERBOSE(2,"lmContextDependent::GetSentenceAndContext" << std::endl);
- VERBOSE(2,"line:|" << line << "|" << std::endl);
- size_t pos = line.find(context_delimiter);
- if (pos != std::string::npos){ // context_delimiter is found
- sentence = line.substr(0, pos);
- line.erase(0, pos + context_delimiter.length());
-
- //getting context string;
- context = line;
- }else{
- sentence = line;
- context = "";
- }
- VERBOSE(2,"sentence:|" << sentence << "|" << std::endl);
- VERBOSE(2,"context:|" << context << "|" << std::endl);
- }
-
- double lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+
+ double lmContextDependent::lprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(2,"lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, ...)" << std::endl);
+ VERBOSE(2,"lmContextDependent::lprob(ngram ng, ...)" << std::endl);
string_vec_t text;
if (ng.size>1){
text.push_back(ng.dict->decode(*ng.wordp(2)));
}
text.push_back(ng.dict->decode(*ng.wordp(1)));
- return lprob(ng, text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ return lprob(ng, text, bow, bol, maxsuffptr, statesize, extendible);
}
-
- double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+
+ double lmContextDependent::lprob(int* codes, int sz, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, " << std::endl);
+ VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, " << std::endl);
//create the actual ngram
ngram ong(dict);
ong.pushc(codes,sz);
MY_ASSERT (ong.size == sz);
- return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ return lprob(ong, bow, bol, maxsuffptr, statesize, extendible);
}
- double lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmContextDependent::lprob(string_vec_t& text, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(2,"lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, ...)" << std::endl);
+ VERBOSE(2,"lmContextDependent::lprob(string_vec_t& text, ...)" << std::endl);
//create the actual ngram
ngram ng(dict);
@@ -180,36 +164,46 @@ namespace irstlm {
VERBOSE(3,"ng:|" << ng << "|" << std::endl);
MY_ASSERT (ng.size == (int) text.size());
- return lprob(ng, text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ return lprob(ng, text, bow, bol, maxsuffptr, statesize, extendible);
}
+ double lmContextDependent::lprob(ngram& ng, string_vec_t& text, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ {
+ UNUSED(text);
+ VERBOSE(2,"lmContextDependent::lprob(ngram& ng, string_vec_t& text, ...)" << std::endl);
+ double lm_logprob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
+ double ret_logprob = lm_logprob;
+ VERBOSE(2, "lm_log10_pr:" << lm_logprob << " similarity_score:_undef_ m_similaritymodel_weight:_undef_ ret_log10_pr:" << ret_logprob << std::endl);
+
+ return ret_logprob;
+ }
- double lmContextDependent::lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(2,"lmContextDependent::lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
+ VERBOSE(2,"lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, ...)" << std::endl);
string_vec_t text;
if (ng.size>1){
text.push_back(ng.dict->decode(*ng.wordp(2)));
}
text.push_back(ng.dict->decode(*ng.wordp(1)));
- return lprob(ng, text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ return lprob(ng, text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
}
- double lmContextDependent::lprob(int* codes, int sz, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, lm_map_t& lm_weights, topic_map_t& topic_weights, " << std::endl);
+ VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, " << std::endl);
//create the actual ngram
ngram ong(dict);
ong.pushc(codes,sz);
MY_ASSERT (ong.size == sz);
- return lprob(ong, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
}
- double lmContextDependent::lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(2,"lmContextDependent::lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
+ VERBOSE(2,"lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, ...)" << std::endl);
//create the actual ngram
ngram ng(dict);
@@ -217,29 +211,14 @@ namespace irstlm {
VERBOSE(3,"ng:|" << ng << "|" << std::endl);
MY_ASSERT (ng.size == (int) text.size());
- return lprob(ng, text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
- }
-
- double lmContextDependent::lprob(ngram& ng, string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
- {
- VERBOSE(2,"lmContextDependent::lprob(ngram& ng, topic_map_t& topic_weights, ...)" << std::endl);
- double lm_logprob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
- double similarity_score = m_similaritymodel->context_similarity(text, topic_weights);
- double ret_logprob = lm_logprob + m_similaritymodel_weight * similarity_score;
- VERBOSE(2, "lm_log10_pr:" << lm_logprob << " similarity_score:" << similarity_score << " m_similaritymodel_weight:" << m_similaritymodel_weight << " ret_log10_pr:" << ret_logprob << std::endl);
-
- return ret_logprob;
+ return lprob(ng, text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
}
- double lmContextDependent::lprob(ngram& ng, string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmContextDependent::lprob(ngram& ng, string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(2,"lmContextDependent::lprob(ngram& ng, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
- double lm_logprob;
- if (lm_weights.size() == 0){
- lm_logprob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
- }else{
- lm_logprob = m_lm->clprob(ng, lm_weights, bow, bol, maxsuffptr, statesize, extendible);
- }
+ VERBOSE(2,"lmContextDependent::lprob(ngram& ng, string_vec_t& text, topic_map_t& topic_weights, ...)" << std::endl);
+// double lm_logprob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
+ double lm_logprob = m_lm->clprob(ng, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
double similarity_score = m_similaritymodel->context_similarity(text, topic_weights);
double ret_logprob = lm_logprob + m_similaritymodel_weight * similarity_score;
VERBOSE(2, "lm_log10_pr:" << lm_logprob << " similarity_score:" << similarity_score << " m_similaritymodel_weight:" << m_similaritymodel_weight << " ret_log10_pr:" << ret_logprob << std::endl);
diff --git a/src/lmContextDependent.h b/src/lmContextDependent.h
index 6e6bb53..c017908 100644
--- a/src/lmContextDependent.h
+++ b/src/lmContextDependent.h
@@ -102,38 +102,14 @@ namespace irstlm {
return context_delimiter;
}
- void GetSentenceAndContext(std::string& sentence, std::string& context, std::string& line);
-
virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- VERBOSE(0, "virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
- UNUSED(ng);
- UNUSED(ngsize);
- UNUSED(bow);
- UNUSED(bol);
- UNUSED(maxsuffptr);
- UNUSED(statesize);
- UNUSED(extendible);
- assert(false);
+ return lprob(ng, ngsize, bow, bol, maxsuffptr, statesize, extendible);
};
virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- VERBOSE(0, "virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
- UNUSED(ng);
- UNUSED(bow);
- UNUSED(bol);
- UNUSED(maxsuffptr);
- UNUSED(statesize);
- UNUSED(extendible);
- assert(false);
+ return lprob(ng, bow, bol, maxsuffptr, statesize, extendible);
};
virtual double clprob(string_vec_t& text, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- VERBOSE(0, "virtual double clprob(string_vec_t& text, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
- UNUSED(text);
- UNUSED(bow);
- UNUSED(bol);
- UNUSED(maxsuffptr);
- UNUSED(statesize);
- UNUSED(extendible);
- assert(false);
+ return lprob(text, bow, bol, maxsuffptr, statesize, extendible);
};
virtual double clprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
@@ -146,26 +122,18 @@ namespace irstlm {
return lprob(text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
};
- virtual double clprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- return lprob(ng, ngsize, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
- };
- virtual double clprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- return lprob(ng, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
- };
- virtual double clprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- return lprob(text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
- };
+ virtual double lprob(int* ng, int ngsize,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double lprob(string_vec_t& text, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+
+ double lprob(ngram& ng, string_vec_t& text, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible);
+
virtual double lprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
virtual double lprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
virtual double lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double lprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-
double lprob(ngram& ng, string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible);
- double lprob(ngram& ng, string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible);
double total_clprob(string_vec_t& text, topic_map_t& topic_weights);
double total_clprob(ngram& ng, topic_map_t& topic_weights);
@@ -183,10 +151,11 @@ namespace irstlm {
virtual int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev){
return m_lm->succscan(h,ng,action,lev);
}
-
+ /*
int maxlevel() const {
return maxlev;
};
+ */
virtual inline void setDict(dictionary* d) {
if (dict) delete dict;
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index 06db624..1f7d66a 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -54,7 +54,6 @@ namespace irstlm {
dictionary_upperbound=1000000;
int memmap=mmap;
-
dict=new dictionary((char *)NULL,1000000,dictionary_load_factor);
//get info from the configuration file
@@ -87,6 +86,7 @@ namespace irstlm {
idx_file=1;
idx_inverted=2;
idx_size=3;
+ m_isadaptive=false;
}else{
m_map_flag=true;
idx_weight=0;
@@ -94,6 +94,7 @@ namespace irstlm {
idx_file=2;
idx_inverted=3;
idx_size=4;
+ m_isadaptive=true;
}
m_number_lm = atoi(words[1]);
@@ -106,7 +107,7 @@ namespace irstlm {
VERBOSE(2,"lmInterpolation::load(const std::string &filename,int mmap) m_number_lm:"<< m_number_lm << std::endl;);
dict->incflag(1);
- for (int i=0; i<m_number_lm; i++) {
+ for (size_t i=0; i<m_number_lm; i++) {
inp.getline(line,BUFSIZ,'\n');
tokenN = parseWords(line,words,idx_size);
@@ -126,15 +127,17 @@ namespace irstlm {
if (m_map_flag){
m_idx[words[idx_name]] = i;
m_name[i] = words[idx_name];
+ VERBOSE(2,"i:" << i << " m_idx[words[idx_name]]:|" << m_idx[words[idx_name]] << "| m_name[i]:|" << m_name[i] << "|" << endl);
}else{
std::stringstream name;
name << i;
m_idx[name.str()] = i;
m_name[i] = name.str();
+ VERBOSE(2,"i:" << i << " name.str():|" << name.str() << "| m_name[i]:|" << m_name[i] << "|" << endl);
}
m_file[i] = words[idx_file];
- VERBOSE(2,"lmInterpolation::load(const std::string &filename,int mmap) i:" << i << " m_name:|"<< m_name[i] << "|" " m_file:|"<< m_file[i] << "|" << std::endl);
+ VERBOSE(2,"lmInterpolation::load(const std::string &filename,int mmap) i:" << i << " m_name:|"<< m_name[i] << "|" " m_file:|"<< m_file[i] << "| isadaptve:|" << m_isadaptive << "|" << std::endl);
m_lm[i] = load_lm(i,memmap,ngramcache_load_factor,dictionary_load_factor);
//set the actual value for inverted flag, which is known only after loading the lM
@@ -149,7 +152,7 @@ namespace irstlm {
inp.close();
int maxorder = 0;
- for (int i=0; i<m_number_lm; i++) {
+ for (size_t i=0; i<m_number_lm; i++) {
maxorder = (maxorder > m_lm[i]->maxlevel())?maxorder:m_lm[i]->maxlevel();
}
@@ -180,9 +183,9 @@ namespace irstlm {
}
//return log10 prob of an ngram
- double lmInterpolation::clprob(ngram ng, lm_map_t& lm_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmInterpolation::clprob(ngram ng, topic_map_t& lm_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(1,"double lmInterpolation::clprob(ngram ng, lm_map_t& lm_weights,...)" << std::endl);
+ VERBOSE(1,"double lmInterpolation::clprob(ngram ng, topic_map_t& lm_weights,...)" << std::endl);
double pr=0.0;
double _logpr;
@@ -197,7 +200,8 @@ namespace irstlm {
double_vec_t weight(m_number_lm);
set_weight(lm_weights,weight);
- for (size_t i=0; i<m_lm.size(); i++) {
+// for (size_t i=0; i<m_lm.size(); i++) {
+ for (size_t i=0; i<m_number_lm; i++) {
if (weight[i]>0.0){
ngram _ng(m_lm[i]->getDict());
_ng.trans(ng);
@@ -237,6 +241,9 @@ namespace irstlm {
actualextendible=true; //set extendible flag to true if the ngram is extendible for any LM
}
}
+ else{
+ VERBOSE(3," LM " << i << " weight is zero" << std::endl);
+ }
}
if (bol) *bol=actualbol;
if (bow) *bow=log(actualbow);
@@ -253,7 +260,7 @@ namespace irstlm {
return log10(pr);
}
- double lmInterpolation::clprob(int* codes, int sz, lm_map_t& lm_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmInterpolation::clprob(int* codes, int sz, topic_map_t& lm_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
//create the actual ngram
@@ -352,7 +359,7 @@ namespace irstlm {
MY_ASSERT(dub > dict->size());
double _logpr;
double OOVpenalty=0.0;
- for (int i=0; i<m_number_lm; i++) {
+ for (size_t i=0; i<m_number_lm; i++) {
if (m_weight[i]>0.0){
m_lm[i]->setlogOOVpenalty(dub); //set OOV Penalty for each LM
_logpr=m_lm[i]->getlogOOVpenalty(); // logOOV penalty is in log10
@@ -365,11 +372,14 @@ namespace irstlm {
return logOOVpenalty;
}
- void lmInterpolation::set_weight(const lm_map_t& map, double_vec_t& weight){
+ void lmInterpolation::set_weight(const topic_map_t& map, double_vec_t& weight){
VERBOSE(4,"void lmInterpolation::set_weight" << std::endl);
VERBOSE(4,"map.size:" << map.size() << std::endl);
- for (lm_map_t::const_iterator it=map.begin(); it!=map.end();++it){
- weight[m_idx[it->first]] = it->second;
+ for (topic_map_t::const_iterator it=map.begin(); it!=map.end();++it){
+ if (m_idx.find(it->first) == m_idx.end()){
+ exit_error(IRSTLM_ERROR_DATA, "void lmInterpolation::set_weight(const topic_map_t& map, double_vec_t& weight) ERROR: you are setting the weight of a LM which is not included in the interpolated LM");
+ }
+ weight[m_idx[it->first]] = it->second;
VERBOSE(4,"it->first:|" << it->first << "| it->second:|" << it->second << "| m_idx[it->first]:|" << m_idx[it->first] << "| weight[m_idx[it->first]]:|" <<weight[m_idx[it->first]] << "|" << std::endl);
}
}
diff --git a/src/lmInterpolation.h b/src/lmInterpolation.h
index 23d70f4..e2a9c7b 100644
--- a/src/lmInterpolation.h
+++ b/src/lmInterpolation.h
@@ -45,7 +45,7 @@ interpolation of several sub LMs
class lmInterpolation: public lmContainer
{
static const bool debug=true;
- int m_number_lm;
+ size_t m_number_lm;
int order;
int dictionary_upperbound; //set by user
double logOOVpenalty; //penalty for OOV words (default 0)
@@ -53,22 +53,20 @@ class lmInterpolation: public lmContainer
bool m_map_flag; //flag for the presence of a map between name and lm
int memmap; //level from which n-grams are accessed via mmap
- std::vector<double> m_weight;
+ double_vec_t m_weight;
std::vector<std::string> m_file;
std::vector<bool> m_isinverted;
std::vector<lmContainer*> m_lm;
std::map< std::string, size_t > m_idx;
std::map< size_t, std::string > m_name;
-
- int maxlev; //maximun order of sub LMs;
float ngramcache_load_factor;
float dictionary_load_factor;
dictionary *dict; // dictionary for all interpolated LMs
- void set_weight(const lm_map_t& map, std::vector<double>& weight);
+ void set_weight(const topic_map_t& map, double_vec_t& weight);
public:
@@ -81,12 +79,8 @@ public:
virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double clprob(ngram ng, lm_map_t& lm_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double clprob(int* ng, int ngsize, lm_map_t& lm_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL);
-
- int maxlevel() const {
- return maxlev;
- };
+ virtual double clprob(ngram ng, topic_map_t& lm_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double clprob(int* ng, int ngsize, topic_map_t& lm_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL);
virtual inline void setDict(dictionary* d) {
if (dict) delete dict;
@@ -116,7 +110,7 @@ public:
//for an interpolation LM this variable does not make sense
//for compatibility, we return true if all subLM return true
inline bool is_inverted() {
- for (int i=0; i<m_number_lm; i++) {
+ for (size_t i=0; i<m_number_lm; i++) {
if (m_isinverted[i] == false) return false;
}
return true;
@@ -127,7 +121,7 @@ public:
};
inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
- for (int i=0; i<m_number_lm; i++) {
+ for (size_t i=0; i<m_number_lm; i++) {
int _code=m_lm[i]->getDict()->encode(getDict()->decode(code));
if (m_lm[i]->is_OOV(_code) == false) return false;
}
@@ -135,7 +129,7 @@ public:
}
virtual int addWord(const char *w){
- for (int i=0; i<m_number_lm; i++) {
+ for (size_t i=0; i<m_number_lm; i++) {
m_lm[i]->getDict()->incflag(1);
m_lm[i]->getDict()->encode(w);
m_lm[i]->getDict()->incflag(0);
diff --git a/src/lmmacro.h b/src/lmmacro.h
index bfeab6d..b4223d6 100644
--- a/src/lmmacro.h
+++ b/src/lmmacro.h
@@ -43,7 +43,6 @@ class lmmacro: public lmtable
{
dictionary *dict;
- int maxlev; //max level of table
int selectedField;
bool collapseFlag; //flag for the presence of collapse
@@ -112,9 +111,6 @@ public:
inline dictionary* getDict() const {
return dict;
}
- inline int maxlevel() const {
- return maxlev;
- };
inline virtual void dictionary_incflag(const bool flag) {
dict->incflag(flag);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git
More information about the debian-science-commits
mailing list