[irstlm] 45/78: fixings related to computation of approximated perplexity ; enabling computation of approximated perplexity for interpolated LM ; code cleanup and optimization;
Giulio Paci
giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:47:05 UTC 2016
This is an automated email from the git hooks/post-receive script.
giuliopaci-guest pushed a commit to tag adaptiveLM.v0.10
in repository irstlm.
commit c0aff29328f615ce1e6672952245e08ebf201fc6
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date: Fri Nov 20 00:02:13 2015 +0100
fixings related to computation of approximated perplexity ; enabling computation of approximated perplexity for interpolated LM ; code cleanup and optimization;
---
src/compile-lm.cpp | 2 +-
src/context-dependent-evaluation.cpp | 97 ++-
src/context-similarity.cpp | 26 +-
src/context-similarity.h | 44 +-
src/cplsa.h | 4 +-
src/cswam.cpp | 2 +-
src/cswam.h | 13 +-
src/dict.cpp | 2 +-
src/dictionary.cpp | 6 +-
src/dictionary.h | 4 +-
src/doc.cpp | 2 +-
src/dtsel.cpp | 4 +-
src/htable.cpp | 2 +-
src/htable.h | 6 +-
src/interplm.cpp | 2 +-
src/interplm.h | 2 +-
src/interpolate-lm.cpp | 2 +-
src/linearlm.cpp | 5 +-
src/lmContainer.cpp | 8 +-
src/lmContainer.h | 86 +--
src/lmContextDependent.cpp | 70 ++-
src/lmContextDependent.h | 32 +-
src/lmInterpolation.cpp | 64 +-
src/lmInterpolation.h | 18 +-
src/lmclass.cpp | 2 +-
src/lmclass.h | 16 +-
src/lmmacro.cpp | 6 +-
src/lmmacro.h | 6 +-
src/lmtable.cpp | 27 +-
src/lmtable.h | 98 ++--
src/mdiadapt.cpp | 43 +-
src/mempool.cpp | 2 +-
src/mempool.h | 4 +-
src/mfstream.cpp | 2 +-
src/mixture.cpp | 1072 +++++++++++++++++-----------------
src/ngramcache.cpp | 22 +-
src/ngramtable.cpp | 15 +-
src/ngramtable.h | 7 +-
src/ngt.cpp | 2 +-
src/normcache.cpp | 2 +-
src/shiftlm.cpp | 18 +-
src/shiftlm.h | 2 +-
src/tlm.cpp | 4 +-
src/util.cpp | 4 +-
44 files changed, 1014 insertions(+), 843 deletions(-)
diff --git a/src/compile-lm.cpp b/src/compile-lm.cpp
index d876458..8911c58 100644
--- a/src/compile-lm.cpp
+++ b/src/compile-lm.cpp
@@ -419,7 +419,7 @@ int main(int argc, char **argv)
delete lmt;
return 0;
- };
+ }
}
if (sscore == true) {
diff --git a/src/context-dependent-evaluation.cpp b/src/context-dependent-evaluation.cpp
index 188b48c..24e66fb 100644
--- a/src/context-dependent-evaluation.cpp
+++ b/src/context-dependent-evaluation.cpp
@@ -221,7 +221,9 @@ int main(int argc, char **argv)
VERBOSE(1, "You did not set any lexicon, but you activated parameter \"--add_lexicon_words\". This is formally correct; maybe you want to pass the lexicon through the input; Please check whether your setting is correct." << std::endl);
}
}else{
- VERBOSE(1, "You set a lexicon, but you did not activate parameter \"--add_lexicon_words\". Hence, words in he lexicon are not used as alternatives" << std::endl);
+ if (lexiconfile != NULL) {
+ VERBOSE(1, "You set a lexicon, but you did not activate parameter \"--add_lexicon_words\". Hence, words in he lexicon are not used as alternatives" << std::endl);
+ }
}
/*
if (std::string lexiconfile!= NULL) {
@@ -501,9 +503,80 @@ int main(int argc, char **argv)
}
*/
}
+ VERBOSE(2,"after add_lexicon_words current_dict->size:" << current_dict->size() << std::endl);
+ }else{
+ VERBOSE(2,"add_lexicon_words not active" << std::endl);
}
- VERBOSE(2,"after add_lexicon_words current_dict->size:" << current_dict->size() << std::endl);
+ if(1){
+ if (add_lm_words){
+ bool succ_flag=false;
+ ngram hg(lmt->getDict());
+
+ dictionary* succ_dict;
+
+ if (size==1) {
+ hg.size=0;
+ hg.pushw(lmt->getDict()->BoS());
+
+ succ_dict = new dictionary((char *)NULL,1000000);
+ succ_dict->incflag(1);
+ lmt->getSuccDict(hg,succ_dict);
+ succ_dict->incflag(0);
+ if (succ_dict->size() >= successor_limit){
+ VERBOSE(3,"successors are not added into the alternatives because they are too many" << std::endl);
+ }else if (succ_dict->size() == 0){
+ VERBOSE(3,"there are no successors" << std::endl);
+ }else{
+ succ_flag=true;
+ }
+ }else if (size>=2) {
+ hg.size=0;
+ hg.pushw(tmp_word_vec.at(tmp_word_vec.size()-2));
+
+ succ_dict = new dictionary((char *)NULL,1000000);
+ succ_dict->incflag(1);
+ lmt->getSuccDict(hg,succ_dict);
+ succ_dict->incflag(0);
+ if (succ_dict->size() >= successor_limit){
+ VERBOSE(3,"successors are not added into the alternatives because they are too many" << std::endl);
+ }else if (succ_dict->size() == 0){
+ VERBOSE(3,"there are no successors" << std::endl);
+ }else{
+ succ_flag=true;
+ }
+ if (!succ_flag && size>=3){
+ hg.size=0;
+ hg.pushw(tmp_word_vec.at(tmp_word_vec.size()-3));
+ hg.pushw(tmp_word_vec.at(tmp_word_vec.size()-2));
+
+ delete succ_dict;
+ succ_dict = new dictionary((char *)NULL,1000000);
+ succ_dict->incflag(1);
+ lmt->getSuccDict(hg,succ_dict);
+ succ_dict->incflag(0);
+ lmt->getSuccDict(hg,succ_dict);
+
+ if (succ_dict->size() >= successor_limit){
+ VERBOSE(3,"successors are not added into the alternatives because they are too many" << std::endl);
+ }else if (succ_dict->size() == 0){
+ VERBOSE(3,"there are no successors" << std::endl);
+ }else{
+ succ_flag=true;
+ }
+ }
+ }
+
+ if (succ_flag){
+ current_dict->augment(succ_dict, false); //do not add OOV
+ }
+ VERBOSE(2,"after add_lm_words current_dict->size:" << current_dict->size() << std::endl);
+ }else{
+ VERBOSE(2,"add_lm_words not active" << std::endl);
+ }
+ }
+ /*
+ if(0){
if (add_lm_words){
bool succ_flag=false;
ngram hg(lmt->getDict());
@@ -512,8 +585,9 @@ int main(int argc, char **argv)
hg.pushw(lmt->getDict()->BoS());
hg.pushc(0);
+ VERBOSE(1,"1 before calling lmt->get(hg,hg.size,hg.size-1) add_lm_words hg:|" << hg << "| hg.size:|" << hg.size << "| hg.lev+1:|" << (hg.lev+1) << "| hg.succ:|" << hg.succ << "|" << std::endl);
lmt->get(hg,hg.size,hg.size-1);
- VERBOSE(1,"add_lm_words hg:|" << hg << "| hg.size:" << hg.size << " hg.succ:" << hg.succ << std::endl);
+ VERBOSE(1,"1 after calling lmt->get(hg,hg.size,hg.size-1) add_lm_words hg:|" << hg << "| hg.size:|" << hg.size << "| hg.lev+1:|" << (hg.lev+1) << "| hg.succ:|" << hg.succ << "|" << std::endl);
if (hg.succ < successor_limit){
succ_flag=true;
}else{
@@ -523,8 +597,9 @@ int main(int argc, char **argv)
hg.pushw(tmp_word_vec.at(tmp_word_vec.size()-2));
hg.pushc(0);
+ VERBOSE(1,"2 before calling lmt->get(hg,hg.size,hg.size-1) hg:|" << hg << "| hg.size:|" << hg.size << "| hg.lev+1:|" << (hg.lev+1) << "| hg.succ:|" << hg.succ << "|" << std::endl);
lmt->get(hg,hg.size,hg.size-1);
- VERBOSE(1,"add_lm_words hg:|" << hg << "| hg.size:" << hg.size << " hg.succ:" << hg.succ << std::endl);
+ VERBOSE(1,"2 after calling lmt->get(hg,hg.size,hg.size-1) add_lm_words hg:|" << hg << "| hg.size:|" << hg.size << "| hg.lev+1:|" << (hg.lev+1) << "| hg.succ:|" << hg.succ << "|" << std::endl);
if (hg.succ < successor_limit){
succ_flag=true;
}else{
@@ -537,8 +612,9 @@ int main(int argc, char **argv)
hg.pushw(tmp_word_vec.at(tmp_word_vec.size()-2));
hg.pushc(0);
+ VERBOSE(1,"3 before calling lmt->get(hg,hg.size,hg.size-1) add_lm_words hg:|" << hg << "| hg.size:|" << hg.size << "| hg.lev+1:|" << (hg.lev+1) << "| hg.succ:|" << hg.succ << "|" << std::endl);
lmt->get(hg,hg.size,hg.size-1);
- VERBOSE(1,"add_lm_words hg:|" << hg << "| hg.size:" << hg.size << " hg.succ:" << hg.succ << std::endl);
+ VERBOSE(1,"3 after calling lmt->get(hg,hg.size,hg.size-1) add_lm_words hg:|" << hg << "| hg.size:|" << hg.size << "| hg.lev+1:|" << (hg.lev+1) << "| hg.succ:|" << hg.succ << "|" << std::endl);
if (hg.succ < successor_limit){
succ_flag=true;
@@ -556,18 +632,23 @@ int main(int argc, char **argv)
current_dict->encode(ng.dict->decode(*ng.wordp(1)));
}
}
-
+ VERBOSE(2,"after add_lm_words current_dict->size:" << current_dict->size() << std::endl);
+ }else{
+ VERBOSE(2,"add_lm_words not active" << std::endl);
}
- VERBOSE(2,"after add_lm_words current_dict->size:" << current_dict->size() << std::endl);
+ }
+ */
if (add_sentence_words){
for (string_vec_t::const_iterator it=word_vec.begin(); it!=word_vec.end(); ++it)
{
current_dict->encode(it->c_str());
}
+ VERBOSE(2,"after add_sentence_words current_dict->size:" << current_dict->size() << std::endl);
+ }else{
+ VERBOSE(2,"add_sentence_words not active" << std::endl);
}
current_dict->incflag(0);
- VERBOSE(2,"after add_sentence_words current_dict->size:" << current_dict->size() << std::endl);
sent_current_dict_alternatives += current_dict->size();
current_dict_alternatives += current_dict->size();
diff --git a/src/context-similarity.cpp b/src/context-similarity.cpp
index a4ac74c..6e3983a 100644
--- a/src/context-similarity.cpp
+++ b/src/context-similarity.cpp
@@ -218,19 +218,21 @@ namespace irstlm {
double ContextSimilarity::topic_score(ngram& ng, ngramtable& ngt, ngramtable& ngt2){
switch (m_score_type){
- case TOPIC_SCORE_TYPE_0:
- return topic_score_option0(ng, ngt, ngt2);
- case TOPIC_SCORE_TYPE_1:
- return topic_score_option1(ng, ngt, ngt2);
- case TOPIC_SCORE_TYPE_2:
- return topic_score_option2(ng, ngt, ngt2);
- case TOPIC_SCORE_TYPE_3:
- return topic_score_option3(ng, ngt, ngt2);
- default:
- std::stringstream ss_msg;
- ss_msg << "Topic score type " << m_score_type << " is unknown.";
- exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
+ case TOPIC_SCORE_TYPE_0:
+ return topic_score_option0(ng, ngt, ngt2);
+ case TOPIC_SCORE_TYPE_1:
+ return topic_score_option1(ng, ngt, ngt2);
+ case TOPIC_SCORE_TYPE_2:
+ return topic_score_option2(ng, ngt, ngt2);
+ case TOPIC_SCORE_TYPE_3:
+ return topic_score_option3(ng, ngt, ngt2);
+ default:
+ std::stringstream ss_msg;
+ ss_msg << "Topic score type " << m_score_type << " is unknown.";
+ exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
}
+ MY_ASSERT(false); //never pass here!!!
+ return 0.0;
}
double ContextSimilarity::topic_score_option0(ngram& ng, ngramtable& ngt, ngramtable& ngt2)
diff --git a/src/context-similarity.h b/src/context-similarity.h
index b8684dd..5a67f53 100644
--- a/src/context-similarity.h
+++ b/src/context-similarity.h
@@ -36,8 +36,6 @@
#include "ngramtable.h"
#include "lmContainer.h"
-class ngram;
-
namespace irstlm {
#define topic_map_delimiter1 ':'
#define topic_map_delimiter2 ','
@@ -58,14 +56,14 @@ namespace irstlm {
int m_hk_order; //order of m_hk_ngt
int m_wk_order; //order of m_wk_ngt
int m_hwk_order; //order of m_hwk_ngt
-
+
int m_topic_size; //number of topics in the model
topic_map_t topic_map;
int m_threshold_on_h; //frequency threshold on h to allow computation of similairty scores
double m_smoothing; //smoothing value to sum to the counts to avoid zero-prob; implements a sort of shift-beta smoothing
int m_score_type; //scoreing type for computing the topic distribution, values are TOPIC_SCORE_TYPE_[0123]
-
+
//flag for enabling/disabling context_similarity scores
// if disabled, context_similarity is 0.0 and topic_scores distribution is empty
bool m_active;
@@ -74,7 +72,7 @@ namespace irstlm {
void add_topic(const std::string& topic, ngram& ng);
void modify_topic(const std::string& topic, ngram& ng);
void create_topic_ngram(const string_vec_t& text, const std::string& topic, ngram& ng);
-
+
void get_counts(ngram& ng, ngramtable& ngt, double& c_xk, double& c_x);
double topic_score(string_vec_t text, const std::string& topic, ngramtable& ngt, ngramtable& ngt2);
@@ -112,7 +110,7 @@ namespace irstlm {
void print_topic_scores(topic_map_t& map);
void print_topic_scores(topic_map_t& map, topic_map_t& refmap, double len);
double DeltaCrossEntropy(topic_map_t& topic_map, topic_map_t& tmp_map, double len);
-
+
void normalize_topic_scores(topic_map_t& map);
double context_similarity(string_vec_t& text, topic_map_t& topic_weights);
@@ -136,25 +134,25 @@ namespace irstlm {
m_active = val;
}
- void set_Topic_Score_Type(int t){
- switch (t){
- case TOPIC_SCORE_TYPE_0:
- case TOPIC_SCORE_TYPE_1:
- case TOPIC_SCORE_TYPE_2:
- case TOPIC_SCORE_TYPE_3:
- m_score_type = t;
- default:
- std::stringstream ss_msg;
- ss_msg << "Topic score type " << m_score_type << " is unknown.";
- exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
+ void set_Topic_Score_Type(int t){
+ switch (t){
+ case TOPIC_SCORE_TYPE_0:
+ case TOPIC_SCORE_TYPE_1:
+ case TOPIC_SCORE_TYPE_2:
+ case TOPIC_SCORE_TYPE_3:
+ m_score_type = t;
+ default:
+ std::stringstream ss_msg;
+ ss_msg << "Topic score type " << m_score_type << " is unknown.";
+ exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
}
- }
- int get_Topic_Score_Type(){
- return m_score_type;
- }
-
+ }
+ int get_Topic_Score_Type(){
+ return m_score_type;
+ }
+
};
-}
+};
#endif
diff --git a/src/cplsa.h b/src/cplsa.h
index a57d8c3..974d1b1 100755
--- a/src/cplsa.h
+++ b/src/cplsa.h
@@ -65,12 +65,12 @@ public:
static void *expected_counts_helper(void *argv){
task t=*(task *)argv;
((plsa *)t.ctx)->expected_counts(t.argv);return NULL;
- };
+ }
static void *single_inference_helper(void *argv){
task t=*(task *)argv;
((plsa *)t.ctx)->single_inference(t.argv);return NULL;
- };
+ }
int train(char *trainfile,char* modelfile, int maxiter, float noiseW,int spectopic=0);
int inference(char *trainfile, char* modelfile, int maxiter, char* topicfeatfile,char* wordfeatfile);
diff --git a/src/cswam.cpp b/src/cswam.cpp
index 23f7021..ed0708a 100755
--- a/src/cswam.cpp
+++ b/src/cswam.cpp
@@ -869,7 +869,7 @@ void cswam::contraction(void *argv){
//re-normalize weights
float totw=0;
for (int n=0;n<TM[e].n;n++){totw+=TM[e].W[n]; assert(TM[e].W[n] > 0.0001);}
- for (int n=0;n<TM[e].n;n++){TM[e].W[n]/=totw;};
+ for (int n=0;n<TM[e].n;n++){TM[e].W[n]/=totw;}
}
int cswam::train(char *srctrainfile, char*trgtrainfile,char *modelfile, int maxiter,int threads){
diff --git a/src/cswam.h b/src/cswam.h
index a77647f..a1eceeb 100755
--- a/src/cswam.h
+++ b/src/cswam.h
@@ -158,25 +158,25 @@ public:
static void *expected_counts_helper(void *argv){
task t=*(task *)argv;
((cswam *)t.ctx)->expected_counts(t.argv);return NULL;
- };
+ }
void maximization(void *argv);
static void *maximization_helper(void *argv){
task t=*(task *)argv;
((cswam *)t.ctx)->maximization(t.argv);return NULL;
- };
+ }
void expansion(void *argv);
static void *expansion_helper(void *argv){
task t=*(task *)argv;
((cswam *)t.ctx)->expansion(t.argv);return NULL;
- };
+ }
void contraction(void *argv);
static void *contraction_helper(void *argv){
task t=*(task *)argv;
((cswam *)t.ctx)->contraction(t.argv);return NULL;
- };
+ }
void M1_ecounts(void *argv);
@@ -201,16 +201,13 @@ public:
void findfriends(FriendList* friends);
-
-
int train(char *srctrainfile,char *trgtrainfile,char* modelfile, int maxiter,int threads=1);
void aligner(void *argv);
static void *aligner_helper(void *argv){
task t=*(task *)argv;
((cswam *)t.ctx)->aligner(t.argv);return NULL;
- };
-
+ }
int test(char *srctestfile, char* trgtestfile, char* modelfile,char* alignmentfile, int threads=1);
diff --git a/src/dict.cpp b/src/dict.cpp
index 43bb8aa..04e0984 100644
--- a/src/dict.cpp
+++ b/src/dict.cpp
@@ -101,7 +101,7 @@ int main(int argc, char **argv)
if (inp==NULL) {
usage();
exit_error(IRSTLM_NO_ERROR, "Warning: no input file specified");
- };
+ }
// options compatibility issues:
if (curveflag && !freqflag)
diff --git a/src/dictionary.cpp b/src/dictionary.cpp
index 95e727e..2a8ff18 100644
--- a/src/dictionary.cpp
+++ b/src/dictionary.cpp
@@ -140,13 +140,13 @@ void dictionary::generate(char *filename,bool header)
}
-void dictionary::augment(dictionary *d)
+void dictionary::augment(dictionary *d, bool add_oov)
{
incflag(1);
for (int i=0; i<d->n; i++)
encode(d->decode(i));
incflag(0);
- encode(OOV());
+ if (add_oov) encode(OOV());
}
@@ -427,7 +427,7 @@ void dictionary::sort()
tb[i].code=i;
//always insert without checking whether the word is already in
htb->insert((char*)&tb[i].word);
- };
+ }
}
diff --git a/src/dictionary.h b/src/dictionary.h
index 9423c8f..58ae4bd 100644
--- a/src/dictionary.h
+++ b/src/dictionary.h
@@ -213,7 +213,7 @@ public:
void load(std::istream& fd);
void save(std::ostream& fd);
- void augment(dictionary *d);
+ void augment(dictionary *d, bool add_oov=true);
int size() const {
return n;
@@ -228,7 +228,7 @@ public:
void print_curve_oov(int curvesize, const char *filename, int listflag=0);
void cleanfreq() {
- for (int i=0; i<n; ++i){ tb[i].freq=0; };
+ for (int i=0; i<n; ++i){ tb[i].freq=0; }
N=0;
}
diff --git a/src/doc.cpp b/src/doc.cpp
index ec59756..7284c72 100755
--- a/src/doc.cpp
+++ b/src/doc.cpp
@@ -73,7 +73,7 @@ doc::doc(dictionary* d,char* docfname,bool use_null_word){
}
if (m < MAXDOCLEN) tmp[m++]=w;
- if (m==MAXDOCLEN) {cerr<< "warn: clipping long document (line " << n << " )\n";exit(1);};
+ if (m==MAXDOCLEN) {cerr<< "warn: clipping long document (line " << n << " )\n";exit(1);}
}
cerr << "uploaded " << n << " documents\n";
diff --git a/src/dtsel.cpp b/src/dtsel.cpp
index b52818a..6f9737b 100644
--- a/src/dtsel.cpp
+++ b/src/dtsel.cpp
@@ -203,12 +203,12 @@ int main(int argc, char **argv)
if (!evalset && (!indom || !outdom)){
exit_error(IRSTLM_ERROR_DATA, "Must specify in-domain and out-domain data files");
- };
+ }
//score file is always required: either as output or as input
if (!scorefile){
exit_error(IRSTLM_ERROR_DATA, "Must specify score file");
- };
+ }
if (!evalset && !model){
exit_error(IRSTLM_ERROR_DATA, "Must specify data selection model");
diff --git a/src/htable.cpp b/src/htable.cpp
index 99e8388..46b5b21 100644
--- a/src/htable.cpp
+++ b/src/htable.cpp
@@ -59,7 +59,7 @@ address htable<int *>::Hash(int* key)
h ^= (h >> 6);
h += ~(h << 11);
h ^= (h >> 16);
- };
+ }
return h;
}
diff --git a/src/htable.h b/src/htable.h
index 54ca184..dc08a5c 100644
--- a/src/htable.h
+++ b/src/htable.h
@@ -210,7 +210,7 @@ T htable<T>::scan(HT_ACTION action)
T k = scan_p->key;
scan_p=(entry<T> *)scan_p->next;
return k;
- };
+ }
return NULL;
}
@@ -235,7 +235,7 @@ void htable<T>::map(ostream& co,int cols)
while(p!=NULL) {
n++;
p=(entry<T> *)p->next;
- };
+ }
if (i && (i % cols)==0) {
co << img << "\n";
@@ -262,7 +262,7 @@ void htable<T>::stat() const
<< " acc " << accesses
<< " coll " << collisions
<< " used memory " << used()/1024 << "Kb\n";
-};
+}
#endif
diff --git a/src/interplm.cpp b/src/interplm.cpp
index c671409..a3c2f8c 100644
--- a/src/interplm.cpp
+++ b/src/interplm.cpp
@@ -100,7 +100,7 @@ interplm::interplm(char *ngtfile,int depth,TABLETYPE tabtype):
<< dict->freq(BoS) << "\n";
}
-};
+}
interplm::~interplm()
{
diff --git a/src/interplm.h b/src/interplm.h
index 2093f15..8fe9003 100644
--- a/src/interplm.h
+++ b/src/interplm.h
@@ -98,7 +98,7 @@ public:
void trainunigr();
double unigrWB(ngram ng);
- virtual double unigr(ngram ng){ return unigrWB(ng); };
+ virtual double unigr(ngram ng){ return unigrWB(ng); }
double zerofreq(int lev);
diff --git a/src/interpolate-lm.cpp b/src/interpolate-lm.cpp
index fe5cefd..318764e 100644
--- a/src/interpolate-lm.cpp
+++ b/src/interpolate-lm.cpp
@@ -485,7 +485,7 @@ int main(int argc, char **argv)
<< " Noov_any=" << Noov_any
<< " OOV_any=" << (float)Noov_any/Nw * 100.0 << "%" << std::endl;
- };
+ }
if (score == true) {
diff --git a/src/linearlm.cpp b/src/linearlm.cpp
index 2e9b2f8..b39db0a 100644
--- a/src/linearlm.cpp
+++ b/src/linearlm.cpp
@@ -48,8 +48,7 @@ namespace irstlm {
prunethresh=prunefreq;
cerr << "PruneThresh: " << prunethresh << "\n";
- };
-
+ }
int linearwb::train()
{
@@ -135,7 +134,7 @@ namespace irstlm {
{
prunethresh=prunefreq;
cerr << "PruneThresh: " << prunethresh << "\n";
- };
+ }
int linearstb::train()
{
diff --git a/src/lmContainer.cpp b/src/lmContainer.cpp
index f8c8dae..a1de966 100644
--- a/src/lmContainer.cpp
+++ b/src/lmContainer.cpp
@@ -107,7 +107,7 @@ namespace irstlm {
VERBOSE(1,"type: " << type << std::endl);
return type;
- };
+ }
lmContainer* lmContainer::CreateLanguageModel(const std::string infile, float nlf, float dlf)
{
@@ -178,11 +178,11 @@ namespace irstlm {
return res;
}
return false;
- };
+ }
bool lmContainer::GetSentenceAndLexicon(std::string& sentence, std::string& lexiconfile, std::string& line)
{
- VERBOSE(2,"bool lmContextDependent::GetSentenceAndLexicon" << std::endl);
+ VERBOSE(2,"bool lmContainer::GetSentenceAndLexicon" << std::endl);
VERBOSE(2,"line:|" << line << "|" << std::endl);
bool ret;
size_t pos = line.find(lexicon_delimiter);
@@ -204,7 +204,7 @@ namespace irstlm {
}
bool lmContainer::GetSentenceAndContext(std::string& sentence, std::string& context, std::string& line)
{
- VERBOSE(2,"bool lmContextDependent::GetSentenceAndContext" << std::endl);
+ VERBOSE(2,"bool lmContainer::GetSentenceAndContext" << std::endl);
VERBOSE(2,"line:|" << line << "|" << std::endl);
bool ret;
size_t pos = line.find(context_delimiter);
diff --git a/src/lmContainer.h b/src/lmContainer.h
index fef19d4..a8c2f09 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -69,73 +69,80 @@ namespace irstlm {
public:
lmContainer();
- virtual ~lmContainer() {};
+ virtual ~lmContainer() {}
virtual void load(const std::string &filename, int mmap=0) {
UNUSED(filename);
UNUSED(mmap);
- };
+ }
virtual void savetxt(const char *filename) {
UNUSED(filename);
- };
+ }
virtual void savebin(const char *filename) {
UNUSED(filename);
- };
+ }
virtual double getlogOOVpenalty() const {
return 0.0;
- };
+ }
virtual double setlogOOVpenalty(int dub) {
UNUSED(dub);
return 0.0;
- };
+ }
virtual double setlogOOVpenalty(double oovp) {
UNUSED(oovp);
return 0.0;
- };
+ }
inline virtual dictionary* getDict() const {
return NULL;
- };
+ }
inline virtual void maxlevel(int lev) {
maxlev = lev;
- };
+ }
inline virtual int maxlevel() const {
return maxlev;
- };
+ }
inline virtual void stat(int lev=0) {
UNUSED(lev);
- };
+ }
inline virtual void setMaxLoadedLevel(int lev) {
requiredMaxlev=lev;
- };
+ }
inline virtual int getMaxLoadedLevel() {
return requiredMaxlev;
- };
+ }
virtual bool is_inverted(const bool flag) {
UNUSED(flag);
return false;
- };
+ }
virtual bool is_inverted() {
return false;
- };
+ }
- virtual double clprob(ngram ng) { return clprob(ng, NULL, NULL, NULL, NULL, NULL, NULL, NULL); }
+ virtual double clprob(ngram ng) {
+
+ VERBOSE(3,"double lmContainer::clprob(ngram ng) ng:|" << ng << "|\n");
+ return clprob(ng, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+ }
virtual double clprob(ngram ng, double* bow) { return clprob(ng, bow, NULL, NULL, NULL, NULL, NULL, NULL); }
virtual double clprob(ngram ng, double* bow, int* bol) { return clprob(ng, bow, bol, NULL, NULL, NULL, NULL, NULL); }
virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr) { return clprob(ng, bow, bol, NULL, maxsuffptr, NULL, NULL, NULL); }
virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize) { return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, NULL, NULL); }
- virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible) { return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, NULL); };
- virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow) { return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, lastbow); }
+ virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible) { return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, NULL); }
+ virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow) {
+ VERBOSE(3,"double lmContainer::clprob(ngram ng,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible, double* lastbow) ng:|" << ng << "|\n");
+ return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, lastbow);
+ }
virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx) { return clprob(ng, bow, bol, maxsuffidx, NULL, NULL, NULL, NULL); }
virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr) { return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, NULL, NULL, NULL); }
virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize) { return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, NULL, NULL); }
- virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible) { return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, NULL); };
+ virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible) { return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, NULL); }
virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow)
{
@@ -164,9 +171,7 @@ namespace irstlm {
MY_ASSERT (ong.size == ngsize);
return clprob(ong, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow);
- };
-
- // virtual double clprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL, double* lastbow=NULL) { return clprob(ng, topic_weights, bow, bol, NULL, maxsuffptr, statesize, extendible, lastbow); };
+ }
virtual double clprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL, double* lastbow=NULL)
{
UNUSED(topic_weights);
@@ -192,7 +197,7 @@ namespace irstlm {
UNUSED(extendible);
UNUSED(lastbow);
return 0.0;
- };
+ }
virtual double clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL)
{
@@ -206,7 +211,7 @@ namespace irstlm {
UNUSED(extendible);
UNUSED(lastbow);
return 0.0;
- };
+ }
virtual const char *cmaxsuffptr(ngram ng, unsigned int* statesize=NULL)
{
@@ -260,27 +265,33 @@ namespace irstlm {
return 0;
}
+ virtual void getSuccDict(ngram& ng,dictionary* d){
+ VERBOSE(1,"void lmContainer::getSuccDict(ngram& ng,dictionary* d) START ng:|" << ng << "|" << std::endl);
+ UNUSED(ng);
+ UNUSED(d);
+ }
- virtual void used_caches() {};
+
+ virtual void used_caches() {}
virtual void init_caches(int uptolev) {
UNUSED(uptolev);
- };
- virtual void check_caches_levels() {};
- virtual void reset_caches() {};
+ }
+ virtual void check_caches_levels() {}
+ virtual void reset_caches() {}
- virtual void reset_mmap() {};
+ virtual void reset_mmap() {}
void inline setLanguageModelType(int type) {
lmtype=type;
- };
+ }
int getLanguageModelType() const {
return lmtype;
- };
+ }
static int getLanguageModelType(std::string filename);
inline virtual void dictionary_incflag(const bool flag) {
UNUSED(flag);
- };
+ }
virtual bool filter(const string sfilter, lmContainer*& sublmt, const string skeepunigrams);
@@ -290,8 +301,7 @@ namespace irstlm {
inline virtual bool is_OOV(int code) {
UNUSED(code);
return false;
- };
-
+ }
inline static bool is_lmt_cache_enabled(){
VERBOSE(3,"inline static bool is_lmt_cache_enabled() " << lmt_cache_enabled << std::endl);
@@ -316,7 +326,7 @@ namespace irstlm {
virtual void print_table_stat(){
VERBOSE(3,"virtual void lmContainer::print_table_stat() "<< std::endl);
- };
+ }
inline std::string getContextDelimiter() const{ return context_delimiter; }
@@ -326,11 +336,11 @@ namespace irstlm {
virtual inline void set_Active(bool val)
{
UNUSED(val);
- };
- virtual bool is_Normalized(){ return false; };
+ }
+ virtual bool is_Normalized(){ return false; }
virtual void set_Normalized(bool val) {
UNUSED(val);
- };
+ }
};
diff --git a/src/lmContextDependent.cpp b/src/lmContextDependent.cpp
index cc52dd0..d4a18d4 100644
--- a/src/lmContextDependent.cpp
+++ b/src/lmContextDependent.cpp
@@ -62,10 +62,10 @@ namespace irstlm {
VERBOSE(2,"configuration file:|" << filename << "|" << std::endl);
std::stringstream ss_format;
-
+
ss_format << "LMCONTEXTDEPENDENT\nfilename_of_LM\nweight k_model hk_model hwk_model pruning_threshold [smoothing]" << std::endl;
ss_format << "or\nLMCONTEXTDEPENDENT TYPE score_type\nfilename_of_LM \nweight k_model hk_model hwk_model pruning_threshold [smoothing]" << std::endl;
-
+
dictionary_upperbound=1000000;
int memmap=mmap;
@@ -78,26 +78,26 @@ namespace irstlm {
int tokenN;
inp.getline(line,MAX_LINE,'\n');
tokenN = parseWords(line,words,LMCONTEXTDEPENDENT_CONFIGURE_MAX_TOKEN);
-
+
bool error=false;
- if ((tokenN!=1) || (tokenN!=3)){
- error=true;
- }else if ((strcmp(words[0],"LMCONTEXTDEPENDENT") != 0) && (strcmp(words[0],"lmcontextdependent")!=0)) {
- error=true;
- }else if ((tokenN==3) && ((strcmp(words[1],"TYPE") != 0) && (strcmp(words[1],"type") != 0))){
- error=true;
- }
+ if ((tokenN!=1) || (tokenN!=3)){
+ error=true;
+ }else if ((strcmp(words[0],"LMCONTEXTDEPENDENT") != 0) && (strcmp(words[0],"lmcontextdependent")!=0)) {
+ error=true;
+ }else if ((tokenN==3) && ((strcmp(words[1],"TYPE") != 0) && (strcmp(words[1],"type") != 0))){
+ error=true;
+ }
if (error){
std::stringstream ss_msg;
ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
- }
-
- int _score_type;
- if (tokenN==1){
- _score_type = TOPIC_SCORE_TYPE_2;
+ }
+
+ int _score_type;
+ if (tokenN==1){
+ _score_type = TOPIC_SCORE_TYPE_2;
}else{
- _score_type = atoi(words[2]);
+ _score_type = atoi(words[2]);
}
//reading ngram-based LM
@@ -106,12 +106,12 @@ namespace irstlm {
if(tokenN < 1 || tokenN > 1) {
error=true;
}
- if (error){
- std::stringstream ss_msg;
- ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
- exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
- }
-
+ if (error){
+ std::stringstream ss_msg;
+ ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
+ exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
+ }
+
VERBOSE(0, "model_w:|" << words[0] << "|" << std::endl);
//checking the language model type
@@ -133,11 +133,11 @@ namespace irstlm {
if(tokenN < 5 || tokenN > LMCONTEXTDEPENDENT_CONFIGURE_MAX_TOKEN) {
error= true;
}
- if (error){
- std::stringstream ss_msg;
- ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
- exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
- }
+ if (error){
+ std::stringstream ss_msg;
+ ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
+ exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
+ }
//loading topic model and initialization
m_similaritymodel_weight = (float) atof(words[0]);
@@ -145,10 +145,10 @@ namespace irstlm {
std::string _hk_ngt = words[2];
std::string _hwk_ngt = words[3];
int _thr = atoi(words[4]);
-
+
double _smoothing = 0.1;
if (tokenN == 6){ _smoothing = atof(words[5]); }
-
+
m_similaritymodel = new ContextSimilarity(_k_ngt, _hk_ngt, _hwk_ngt);
m_similaritymodel->set_Threshold_on_H(_thr);
m_similaritymodel->set_SmoothingValue(_smoothing);
@@ -275,5 +275,15 @@ namespace irstlm {
logOOVpenalty=log(m_lm->getlogOOVpenalty());
return logOOVpenalty;
}
-
+
+ /* returns into the dictionary the successors of the given ngram;
+ it collects the successors from main LM
+ */
+ void lmContextDependent::getSuccDict(ngram& ng,dictionary* d){
+ ngram _ng(m_lm->getDict());
+ _ng.trans(ng);
+ m_lm->getSuccDict(_ng,d);
+ }
+
+
}//namespace irstlm
diff --git a/src/lmContextDependent.h b/src/lmContextDependent.h
index 07cd736..6b35122 100644
--- a/src/lmContextDependent.h
+++ b/src/lmContextDependent.h
@@ -36,12 +36,12 @@ namespace irstlm {
class PseudoTopicModel
{
public:
- PseudoTopicModel(){};
- ~PseudoTopicModel(){};
+ PseudoTopicModel(){}
+ ~PseudoTopicModel(){}
void load(const std::string &filename){
UNUSED(filename);
- };
+ }
double prob(string_vec_t& text, topic_map_t& topic_weights){
UNUSED(text);
@@ -99,10 +99,10 @@ namespace irstlm {
return lexicon_delimiter;
}
- virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL){ return lprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); };
- virtual double clprob(string_vec_t& text, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL){ return lprob(text, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); };
- virtual double clprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL){ return lprob(ng, topic_weights, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); };
- virtual double clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL){ return lprob(text, topic_weights, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); };
+ virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL){ return lprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); }
+ virtual double clprob(string_vec_t& text, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL){ return lprob(text, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); }
+ virtual double clprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL){ return lprob(ng, topic_weights, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); }
+ virtual double clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL){ return lprob(text, topic_weights, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); }
virtual double lprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
virtual double lprob(string_vec_t& text, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
@@ -128,28 +128,23 @@ namespace irstlm {
virtual int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev){
return m_lm->succscan(h,ng,action,lev);
}
- /*
- int maxlevel() const {
- return maxlev;
- };
- */
virtual inline void setDict(dictionary* d) {
if (dict) delete dict;
dict=d;
- };
+ }
virtual inline lmContainer* getWordLM() const {
return m_lm;
- };
+ }
virtual inline ContextSimilarity* getContextSimilarity() const {
return m_similaritymodel;
- };
+ }
virtual inline dictionary* getDict() const {
return dict;
- };
+ }
//set penalty for OOV words
virtual inline double getlogOOVpenalty() const {
@@ -175,7 +170,7 @@ namespace irstlm {
inline virtual void dictionary_incflag(const bool flag) {
dict->incflag(flag);
- };
+ }
inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
return m_lm->is_OOV(code);
@@ -191,6 +186,9 @@ namespace irstlm {
void set_Normalized(bool val){
m_normalization = val;
}
+
+ /* returns into the dictionary the successors of the given ngram */
+ virtual void getSuccDict(ngram& ng,dictionary* d);
};
}//namespace irstlm
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index c64f843..99fb403 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -48,12 +48,12 @@ namespace irstlm {
{
VERBOSE(2,"lmInterpolation::load(const std::string &filename,int memmap)" << std::endl);
VERBOSE(2," filename:|" << filename << "|" << std::endl);
-
- std::stringstream ss_format;
-
- ss_format << "LMINTERPOLATION number_of_models\nweight_of_LM_1 filename_of_LM_1 [inverted]\nweight_of_LM_2 filename_of_LM_2 [inverted]\n...\n";
- ss_format << "or\nLMINTERPOLATION number_of_models MAP\nweight_of_LM_1 name_LM_1 filename_of_LM_1\nweight_of_LM_2 name_LM_2 filename_of_LM_2 [inverted]\n...\n";
-
+
+ std::stringstream ss_format;
+
+ ss_format << "LMINTERPOLATION number_of_models\nweight_of_LM_1 filename_of_LM_1 [inverted]\nweight_of_LM_2 filename_of_LM_2 [inverted]\n...\n";
+ ss_format << "or\nLMINTERPOLATION number_of_models MAP\nweight_of_LM_1 name_LM_1 filename_of_LM_1\nweight_of_LM_2 name_LM_2 filename_of_LM_2 [inverted]\n...\n";
+
dictionary_upperbound=1000000;
int memmap=mmap;
@@ -79,9 +79,9 @@ namespace irstlm {
}
if (error){
- std::stringstream ss_msg;
- ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
- exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
+ std::stringstream ss_msg;
+ ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
+ exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
}
size_t idx_weight, idx_file, idx_name, idx_inverted, idx_size;
@@ -119,10 +119,10 @@ namespace irstlm {
if(tokenN < idx_file || tokenN > idx_size) {
error = true;
}
- if (error){
+ if (error){
std::stringstream ss_msg;
- ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
- exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
+ ss_msg << "ERROR: wrong header format of configuration file\ncorrect format:" << ss_format;
+ exit_error(IRSTLM_ERROR_DATA,ss_msg.str());
}
//check whether the (textual) LM has to be loaded as inverted
@@ -473,5 +473,45 @@ namespace irstlm {
VERBOSE(4,"it->first:|" << it->first << "| it->second:|" << it->second << "| m_idx[it->first]:|" << m_idx[it->first] << "| weight[m_idx[it->first]]:|" <<weight[m_idx[it->first]] << "|" << std::endl);
}
}
+ int lmInterpolation::get(ngram& ng,int n,int lev)
+ {
+ /*The function get for the lmInterpolation LM type is not well defined
+ The chosen implementation is the following:
+ - for each submodel with weight larger than 0.0,
+ -- an ngram is created with the submodel dictionary using the main ngram (of lmInterpolation)
+ -- the submodel-specific ngram is searched in the corresponding submodel
+ - the main ngram is considered found if any submodel-specific ngram is found
+ - the amount of successors of the main ngram are set to the maximum among the amount of successors of the submodel-specifc ngrams
+ Note: an other option could be that of setting he amount of successors of the main ngram to the sum of the amount of successors of the submodel-specifc ngram; but we did not implemented it.
+ */
+ int ret = 0;
+ int succ = 0;
+ for (size_t i=0; i<m_number_lm; i++) {
+ if (m_weight[i]>0.0){
+ ngram _ng(m_lm[i]->getDict());
+ _ng.trans(ng);
+ if (m_lm[i]->get(_ng, n, lev)){
+ ret = 1;
+ succ = (_ng.succ>succ)?_ng.succ:succ;
+ }
+ }
+ }
+ ng.succ=succ;
+ return ret;
+ }
+
+ /* returns into the dictionary the successors of the given ngram;
+ it collects the successors from all submodels with weights larger than 0.0
+ */
+ void lmInterpolation::getSuccDict(ngram& ng,dictionary* d){
+ for (size_t i=0; i<m_number_lm; i++) {
+ if (m_weight[i]>0.0){
+ ngram _ng(m_lm[i]->getDict());
+ _ng.trans(ng);
+ m_lm[i]->getSuccDict(_ng,d);
+ }
+ }
+ }
+
}//namespace irstlm
diff --git a/src/lmInterpolation.h b/src/lmInterpolation.h
index 750adf6..d592a2b 100644
--- a/src/lmInterpolation.h
+++ b/src/lmInterpolation.h
@@ -73,7 +73,7 @@ namespace irstlm {
public:
lmInterpolation(float nlf=0.0, float dlfi=0.0);
- virtual ~lmInterpolation() {};
+ virtual ~lmInterpolation() {}
virtual void load(const std::string &filename,int mmap=0);
lmContainer* load_lm(int i, int memmap, float nlf, float dlf);
@@ -114,16 +114,16 @@ namespace irstlm {
int maxlevel() const {
return maxlev;
- };
+ }
virtual inline void setDict(dictionary* d) {
if (dict) delete dict;
dict=d;
- };
+ }
virtual inline dictionary* getDict() const {
return dict;
- };
+ }
//set penalty for OOV words
virtual inline double getlogOOVpenalty() const {
@@ -152,7 +152,7 @@ namespace irstlm {
inline virtual void dictionary_incflag(const bool flag) {
dict->incflag(flag);
- };
+ }
inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
for (size_t i=0; i<m_number_lm; i++) {
@@ -174,6 +174,14 @@ namespace irstlm {
return c;
}
+ virtual inline int get(ngram& ng) {
+ return get(ng,ng.size,ng.size);
+ }
+ virtual int get(ngram& ng,int n,int lev);
+
+ /* returns into the dictionary the successors of the given ngram */
+ virtual void getSuccDict(ngram& ng,dictionary* d);
+
};
}//namespace irstlm
diff --git a/src/lmclass.cpp b/src/lmclass.cpp
index b578eb3..bcefc8e 100644
--- a/src/lmclass.cpp
+++ b/src/lmclass.cpp
@@ -57,7 +57,7 @@ namespace irstlm {
memset(MapScore,0,MaxMapSize*sizeof(double));
MapScoreN=0;
dict = new dictionary((char *)NULL,MaxMapSize); //word to cluster dictionary
- };
+ }
lmclass::~lmclass()
{
diff --git a/src/lmclass.h b/src/lmclass.h
index 3fcfba0..010f9c0 100644
--- a/src/lmclass.h
+++ b/src/lmclass.h
@@ -56,7 +56,7 @@ namespace irstlm {
wcode = getDict()->oovcode();
}
return MapScore[wcode];
- };
+ }
inline size_t getMap(int wcode) {
//the input word is un-known by the map, so I "transform" this word into the oov (of the words)
@@ -64,7 +64,7 @@ namespace irstlm {
wcode = getDict()->oovcode();
}
return dict->freq(wcode);
- };
+ }
void checkMap();
@@ -76,25 +76,25 @@ namespace irstlm {
virtual void load(const std::string &filename,int mmap=0);
/*
- virtual double lprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL) { return lprob(ng,bow,bol,NULL,maxsuffptr,statesize,extendible,lastbow); };
- virtual double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) { return clprob(ng,bow,bol,NULL,maxsuffptr,statesize,extendible); };
- virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) { return clprob(ng,ngsize,bow,bol,NULL,maxsuffptr,statesize,extendible); };
+ virtual double lprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL) { return lprob(ng,bow,bol,NULL,maxsuffptr,statesize,extendible,lastbow); }
+ virtual double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) { return clprob(ng,bow,bol,NULL,maxsuffptr,statesize,extendible); }
+ virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) { return clprob(ng,ngsize,bow,bol,NULL,maxsuffptr,statesize,extendible); }
*/
virtual double lprob(ngram ng, double* bow=NULL,int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
- virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL) { return lprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); };
+ virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL) { return lprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); }
inline bool is_OOV(int code) {
//a word is consisdered OOV if its mapped value is OOV
return lmtable::is_OOV(getMap(code));
- };
+ }
inline dictionary* getDict() const {
return dict;
}
inline virtual void dictionary_incflag(const bool flag) {
dict->incflag(flag);
- };
+ }
};
}//namespace irstlm
diff --git a/src/lmmacro.cpp b/src/lmmacro.cpp
index 1ca4aad..ec7ec9e 100644
--- a/src/lmmacro.cpp
+++ b/src/lmmacro.cpp
@@ -54,7 +54,7 @@ namespace irstlm {
{
dict = new dictionary((char *)NULL,1000000); // dict of micro tags
getDict()->incflag(1);
- };
+ }
lmmacro::~lmmacro()
{
@@ -157,7 +157,7 @@ namespace irstlm {
loadmap(mapfilename);
getDict()->genoovcode();
- };
+ }
void lmmacro::unloadmap()
{
@@ -327,7 +327,7 @@ namespace irstlm {
VERBOSE(3,"prob = " << prob << "\n");
return prob;
- };
+ }
double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible, double* lastbow)
{
diff --git a/src/lmmacro.h b/src/lmmacro.h
index 5ff8421..523035d 100644
--- a/src/lmmacro.h
+++ b/src/lmmacro.h
@@ -107,17 +107,17 @@ namespace irstlm {
if (field_code >= microMacroMapN) return true;
VERBOSE(2,"inline virtual bool lmmacro::is_OOV(int code)*field_code:" << field_code << " microMacroMap[field_code]:" << microMacroMap[field_code] << " lmtable::dict->oovcode():" << lmtable::dict->oovcode() << std::endl);
return (microMacroMap[field_code] == lmtable::dict->oovcode());
- };
+ }
inline dictionary* getDict() const {
return dict;
}
inline int maxlevel() const {
return maxlev;
- };
+ }
inline virtual void dictionary_incflag(const bool flag) {
dict->incflag(flag);
- };
+ }
inline virtual bool filter(const string sfilter, lmContainer* sublmt, const string skeepunigrams) {
UNUSED(sfilter);
diff --git a/src/lmtable.cpp b/src/lmtable.cpp
index 948bfd1..5f250a1 100644
--- a/src/lmtable.cpp
+++ b/src/lmtable.cpp
@@ -112,7 +112,7 @@ namespace irstlm {
// by default, it is a standard LM, i.e. queried for score
setOrderQuery(false);
- };
+ }
lmtable::~lmtable()
{
@@ -138,7 +138,7 @@ namespace irstlm {
}
if (delete_dict) delete dict;
- };
+ }
void lmtable::init_prob_and_state_cache()
{
@@ -511,7 +511,7 @@ namespace irstlm {
for (int c=0; c<NumCenters[Order]; c++) {
inp >> Pcenters[Order][c];
if (Order<maxlev) inp >> Bcenters[Order][c];
- };
+ }
//empty the last line
inp.getline((char*)line,MAX_LINE);
}
@@ -1165,7 +1165,7 @@ namespace irstlm {
}
return 1;
- };
+ }
//template<typename TA, typename TB>
@@ -1258,7 +1258,7 @@ namespace irstlm {
}
return 1;
- };
+ }
void *lmtable::search(int lev,
@@ -1303,7 +1303,7 @@ namespace irstlm {
}
default:
error((char*)"lmtable::search: this option is available");
- };
+ }
return NULL;
}
@@ -2246,6 +2246,19 @@ namespace irstlm {
}
return 0;
}
+
+ /* returns into the dictionary the successors of the given ngram */
+ void lmtable::getSuccDict(ngram& ng,dictionary* d){
+ ngram hg=ng;
+ hg.pushc(0);
+ if (get(hg,hg.size,hg.size-1)){
+ ngram _ng=hg;
+ succscan(hg,_ng,LMT_INIT,_ng.size);
+ while(succscan(hg,_ng,LMT_CONT,_ng.size)) {
+ d->encode(_ng.dict->decode(*_ng.wordp(1)));
+ }
+ }
+ }
//maxsuffptr returns the largest suffix of an n-gram that is contained
//in the LM table. This can be used as a compact representation of the
@@ -2662,7 +2675,7 @@ namespace irstlm {
#else
return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible, lastbow);
#endif
- };
+ }
int lmtable::succrange(node ndp,int level,table_entry_pos_t* isucc,table_entry_pos_t* esucc)
{
diff --git a/src/lmtable.h b/src/lmtable.h
index 606a76d..4fcaf4b 100644
--- a/src/lmtable.h
+++ b/src/lmtable.h
@@ -175,8 +175,8 @@ namespace irstlm {
void init_prob_and_state_cache();
void init_probcache() {
init_prob_and_state_cache();
- }; //kept for back compatibility
- void init_statecache() {}; //kept for back compatibility
+ } //kept for back compatibility
+ void init_statecache() {} //kept for back compatibility
void init_lmtcaches();
// void init_lmtcaches(int uptolev);
void init_caches(int uptolev);
@@ -189,7 +189,7 @@ namespace irstlm {
void delete_prob_and_state_cache();
void delete_probcache() {
delete_prob_and_state_cache();
- }; //kept for back compatibility
+ } //kept for back compatibility
void delete_statecache() {}; //kept for back compatibility
void delete_lmtcaches();
void delete_caches();
@@ -201,15 +201,15 @@ namespace irstlm {
void check_prob_and_state_cache_levels() const;
void check_probcache_levels() const {
check_prob_and_state_cache_levels();
- }; //kept for back compatibility
- void check_statecache_levels() const{}; //kept for back compatibility
+ } //kept for back compatibility
+ void check_statecache_levels() const {} //kept for back compatibility
void check_lmtcaches_levels() const;
void check_caches_levels() const;
void reset_prob_and_state_cache();
void reset_probcache() {
reset_prob_and_state_cache();
- }; //kept for back compatibility
+ } //kept for back compatibility
void reset_statecache() {}; //kept for back compatibility
void reset_lmtcaches();
void reset_caches();
@@ -218,10 +218,10 @@ namespace irstlm {
bool are_prob_and_state_cache_active() const;
bool is_probcache_active() const {
return are_prob_and_state_cache_active();
- }; //kept for back compatibility
+ } //kept for back compatibility
bool is_statecache_active() const {
return are_prob_and_state_cache_active();
- }; //kept for back compatibility
+ } //kept for back compatibility
bool are_lmtcaches_active() const;
bool are_caches_active() const;
@@ -256,7 +256,7 @@ namespace irstlm {
virtual int maxlevel() const {
return maxlev;
- };
+ }
inline bool isQuantized() const {
return isQtable;
}
@@ -297,7 +297,7 @@ namespace irstlm {
void resize_level_nommap(int level);
void resize_level_mmap(int level, const char* filename);
- inline void update_offset(int level, table_entry_pos_t value) { tb_offset[level]=value; };
+ inline void update_offset(int level, table_entry_pos_t value) { tb_offset[level]=value; }
virtual void load(const std::string &filename, int mmap=0);
@@ -313,7 +313,7 @@ namespace irstlm {
int reload(std::set<string> words);
- void filter(const char* /* unused parameter: lmfile */) {};
+ void filter(const char* /* unused parameter: lmfile */) {}
/*
virtual double lprob(ngram ng){ return lprob(ng, NULL, NULL, NULL, NULL, NULL, NULL, NULL); }
virtual double lprob(ngram ng, double* bow){ return lprob(ng, bow, NULL, NULL, NULL, NULL, NULL, NULL); }
@@ -331,6 +331,32 @@ namespace irstlm {
virtual double lprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
+ virtual double clprob(string_vec_t& text, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow)
+ {
+ VERBOSE(2,"lmtable::clprob(string_vec_t& text, ...)" << std::endl);
+
+ //create the actual ngram
+ ngram ng(dict);
+ ng.pushw(text);
+ VERBOSE(3,"ng:|" << ng << "|" << std::endl);
+ MY_ASSERT (ng.size == (int) text.size());
+ return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow);
+ }
+
+ virtual double clprob(string_vec_t& text, topic_map_t& lm_weights, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow)
+ {
+ UNUSED(lm_weights);
+ VERBOSE(2,"lmtable::clprob(string_vec_t& text, topic_map_t& lm_weights, ...)" << std::endl);
+
+ //create the actual ngram
+ ngram ng(dict);
+ ng.pushw(text);
+ VERBOSE(3,"ng:|" << ng << "|" << std::endl);
+
+ MY_ASSERT (ng.size == (int) text.size());
+ return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow);
+ }
+
virtual const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
virtual const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
virtual ngram_state_t maxsuffidx(ngram ong, unsigned int* size=NULL);
@@ -360,7 +386,7 @@ namespace irstlm {
MY_ASSERT(ptr!=NULL);
for (int i=0; i<size; i++)
ptr[offs+i]=(value >> (8 * i)) & 0xff;
- };
+ }
inline void getmem(char* ptr,int* value,int offs,int size) {
MY_ASSERT(ptr!=NULL);
@@ -368,20 +394,19 @@ namespace irstlm {
for (int i=1; i<size; i++){
*value= *value | ( ( ptr[offs+i] & 0xff ) << (8 *i));
}
- };
+ }
template<typename T>
inline void putmem(char* ptr,T value,int offs) {
MY_ASSERT(ptr!=NULL);
memcpy(ptr+offs, &value, sizeof(T));
- };
+ }
template<typename T>
inline void getmem(char* ptr,T* value,int offs) {
MY_ASSERT(ptr!=NULL);
memcpy((void*)value, ptr+offs, sizeof(T));
- };
-
+ }
int nodesize(LMT_TYPE ndt) {
switch (ndt) {
@@ -408,8 +433,7 @@ namespace irstlm {
putmem(nd,value,offset,LMTCODESIZE);
return value;
- };
-
+ }
int codecmp(node a,node b) {
register int i,result;
@@ -418,12 +442,11 @@ namespace irstlm {
if(result) return result;
}
return 0;
- };
+ }
int codediff(node a,node b) {
return word(a)-word(b);
- };
-
+ }
inline float prob(node nd,LMT_TYPE ndt) {
int offs=LMTCODESIZE;
@@ -447,7 +470,7 @@ namespace irstlm {
MY_ASSERT(0);
return 0;
}
- };
+ }
template<typename T>
inline T prob(node nd, LMT_TYPE ndt, T value) {
@@ -472,7 +495,7 @@ namespace irstlm {
}
return value;
- };
+ }
inline float bow(node nd,LMT_TYPE ndt) {
int offs=LMTCODESIZE+(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
@@ -496,7 +519,7 @@ namespace irstlm {
MY_ASSERT(0);
return 0;
}
- };
+ }
template<typename T>
inline T bow(node nd,LMT_TYPE ndt, T value) {
@@ -521,7 +544,7 @@ namespace irstlm {
}
return value;
- };
+ }
inline table_entry_pos_t boundwithoffset(node nd,LMT_TYPE ndt, int level){ return bound(nd,ndt) - tb_offset[level+1]; }
@@ -540,7 +563,7 @@ namespace irstlm {
// value -= tb_offset[level+1];
return value;
- };
+ }
// table_entry_pos_t bound(node nd,LMT_TYPE ndt, table_entry_pos_t value, int level=0) {
@@ -553,7 +576,7 @@ namespace irstlm {
putmem(nd,value,offs);
return value;
- };
+ }
//template<typename T> T boundwithoffset(node nd,LMT_TYPE ndt, T value, int level);
@@ -567,7 +590,7 @@ namespace irstlm {
getmem(nd,&value,offs);
return value;
// return value-tb_offset[level+1];
- };
+ }
*/
/*
@@ -579,7 +602,7 @@ namespace irstlm {
return value;
// return value+tb_offset[level+1];
- };
+ }
*/
/*
@@ -591,7 +614,7 @@ namespace irstlm {
getmem(nd,&value,offs);
return value;
- };
+ }
template<typename T>
inline T bound(node nd,LMT_TYPE ndt, T value) {
@@ -601,7 +624,7 @@ namespace irstlm {
putmem(nd,value,offs);
return value;
- };
+ }
*/
//returns the indexes of the successors of a node
int succrange(node ndp,int level,table_entry_pos_t* isucc=NULL,table_entry_pos_t* esucc=NULL);
@@ -613,15 +636,15 @@ namespace irstlm {
if (delete_dict==true && dict) delete dict;
dict=d;
delete_dict=false;
- };
+ }
inline dictionary* getDict() const {
return dict;
- };
+ }
inline table_entry_pos_t getCurrentSize(int l) const {
return cursize[l];
- };
+ }
inline void setOrderQuery(bool v) {
orderQuery = v;
@@ -640,7 +663,7 @@ namespace irstlm {
//never allow the increment of the dictionary through this function
inline virtual void dictionary_incflag(const bool flag) {
UNUSED(flag);
- };
+ }
inline virtual bool filter(const string sfilter, lmtable* sublmt, const string skeepunigrams) {
std::cerr << "filtering... \n";
@@ -655,7 +678,10 @@ namespace irstlm {
inline virtual bool is_OOV(int code) {
return (code == dict->oovcode());
- };
+ }
+
+ /* returns into the dictionary the successors of the given ngram */
+ virtual void getSuccDict(ngram& ng,dictionary* d);
};
diff --git a/src/mdiadapt.cpp b/src/mdiadapt.cpp
index 83a6edf..64c3747 100644
--- a/src/mdiadapt.cpp
+++ b/src/mdiadapt.cpp
@@ -61,19 +61,19 @@ namespace irstlm {
forelm=NULL;
cache=NULL;
m_save_per_level=true;
- };
+ }
mdiadaptlm::~mdiadaptlm()
{
if (cache) delete cache;
delete_caches();
- };
+ }
void mdiadaptlm::delete_caches(int level)
{
if (probcache[level]) delete probcache[level];
if (backoffcache[level]) delete backoffcache[level];
- };
+ }
void mdiadaptlm::delete_caches()
{
@@ -83,7 +83,7 @@ namespace irstlm {
delete [] probcache;
delete [] backoffcache;
#endif
- };
+ }
void mdiadaptlm::caches_stat()
{
@@ -99,7 +99,7 @@ namespace irstlm {
}
}
#endif
- };
+ }
void mdiadaptlm::create_caches(int mcl)
@@ -123,41 +123,40 @@ namespace irstlm {
MY_ASSERT(backoffcache[level]==NULL);
probcache[level]=new NGRAMCACHE_t(level,sizeof(double),400000);
backoffcache[level]=new NGRAMCACHE_t(level,sizeof(double),400000);
- };
+ }
void mdiadaptlm::init_caches()
{
#ifdef MDIADAPTLM_CACHE_ENABLE
for (int i=1; i<=max_caching_level; i++) init_caches(i);
#endif
- };
+ }
void mdiadaptlm::check_cache_levels(int level)
{
if (probcache[level] && probcache[level]->isfull()) probcache[level]->reset(probcache[level]->cursize());
if (backoffcache[level] && backoffcache[level]->isfull()) backoffcache[level]->reset(backoffcache[level]->cursize());
- };
+ }
void mdiadaptlm::check_cache_levels()
{
#ifdef MDIADAPTLM_CACHE_ENABLE
for (int i=1; i<=max_caching_level; i++) check_cache_levels(i);
#endif
- };
+ }
void mdiadaptlm::reset_caches(int level)
{
if (probcache[level]) probcache[level]->reset(MAX(probcache[level]->cursize(),probcache[level]->maxsize()));
if (backoffcache[level]) backoffcache[level]->reset(MAX(backoffcache[level]->cursize(),backoffcache[level]->maxsize()));
- };
+ }
void mdiadaptlm::reset_caches()
{
#ifdef MDIADAPTLM_CACHE_ENABLE
for (int i=1; i<=max_caching_level; i++) reset_caches(i);
#endif
- };
-
+ }
inline NGRAMCACHE_t* mdiadaptlm::get_probcache(int level)
{
@@ -198,7 +197,7 @@ namespace irstlm {
oovscaling=foreunig(fng)/oovscaling;
return 1;
- };
+ }
int mdiadaptlm::savescalefactor(char* filename)
{
@@ -256,9 +255,7 @@ namespace irstlm {
discount(ng,1,fstar,lambda,0);
return fstar;
- };
-
-
+ }
int mdiadaptlm::adapt(char* ngtfile,int alev,double step)
{
@@ -307,7 +304,7 @@ namespace irstlm {
cerr << "done\n";
return 1;
- };
+ }
double mdiadaptlm::zeta(ngram ng,int size)
@@ -705,7 +702,7 @@ namespace irstlm {
*l=c;
}
return 0;
- };
+ }
void fwritex(char *p,int sz,int n,FILE* f)
{
@@ -1031,7 +1028,7 @@ namespace irstlm {
system("date");
return 1;
- };
+ }
///// Save in IRST MT format
@@ -1165,7 +1162,7 @@ namespace irstlm {
cerr << "LEVEL " << i << "DONE \n";
}
return 1;
- };
+ }
///// Save in binary format forbackoff N-gram models
@@ -1462,7 +1459,7 @@ namespace irstlm {
VERBOSE(2,"mdiadaptlm::saveBIN_per_word END\n");
return 1;
- };
+ }
///// Save in binary format forbackoff N-gram models
int mdiadaptlm::saveBIN_per_level(char *filename,int backoff,char* subdictfile,int mmap)
@@ -1927,7 +1924,7 @@ namespace irstlm {
VERBOSE(2,"mdiadaptlm::saveARPA_per_word END\n");
return 1;
- };
+ }
///// Save in format for ARPA backoff N-gram models
int mdiadaptlm::saveARPA_per_level(char *filename,int backoff,char* subdictfile )
@@ -2145,7 +2142,7 @@ namespace irstlm {
VERBOSE(2,"mdiadaptlm::saveARPA_per_level END\n");
return 1;
- };
+ }
}//namespace irstlm
diff --git a/src/mempool.cpp b/src/mempool.cpp
index 36220f8..988932a 100644
--- a/src/mempool.cpp
+++ b/src/mempool.cpp
@@ -249,7 +249,7 @@ const char *strstack::push(const char *s)
if ((len+1) >= size) {
exit_error(IRSTLM_ERROR_DATA, "strstack::push string is too long");
- };
+ }
if ((idx+len+1) >= size) {
//append a new block
diff --git a/src/mempool.h b/src/mempool.h
index ba44d21..e749a88 100644
--- a/src/mempool.h
+++ b/src/mempool.h
@@ -50,10 +50,10 @@ class memnode
memnode *next; //!< next block ptr
public:
//! Creates a memory node
- memnode():block(NULL), next(NULL){};
+ memnode():block(NULL), next(NULL) {}
//! Destroys memory node
- ~memnode(){};
+ ~memnode(){}
};
diff --git a/src/mfstream.cpp b/src/mfstream.cpp
index 4ae076c..b749247 100644
--- a/src/mfstream.cpp
+++ b/src/mfstream.cpp
@@ -86,7 +86,7 @@ int mfstream::swapbytes(char *p, int sz, int n)
}
return 0;
-};
+}
mfstream& mfstream::iwritex(streampos loc,void *ptr,int size,int n)
diff --git a/src/mixture.cpp b/src/mixture.cpp
index de24433..67a0bc6 100644
--- a/src/mixture.cpp
+++ b/src/mixture.cpp
@@ -1,22 +1,22 @@
/******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+ ******************************************************************************/
#include <cmath>
@@ -39,538 +39,538 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
namespace irstlm {
-//
-//Mixture interpolated language model
-//
+ //
+ //Mixture interpolated language model
+ //
-static Enum_T SLmTypeEnum [] = {
- { (char*)"ImprovedKneserNey", IMPROVED_KNESER_NEY },
- { (char*)"ikn", IMPROVED_KNESER_NEY },
- { (char*)"KneserNey", KNESER_NEY },
- { (char*)"kn", KNESER_NEY },
- { (char*)"ModifiedShiftBeta", MOD_SHIFT_BETA },
- { (char*)"msb", MOD_SHIFT_BETA },
- { (char*)"ImprovedShiftBeta", IMPROVED_SHIFT_BETA },
- { (char*)"isb", IMPROVED_SHIFT_BETA },
- { (char*)"InterpShiftBeta", SHIFT_BETA },
- { (char*)"ShiftBeta", SHIFT_BETA },
- { (char*)"sb", SHIFT_BETA },
- { (char*)"InterpShiftOne", SHIFT_ONE },
- { (char*)"ShiftOne", SHIFT_ONE },
- { (char*)"s1", SHIFT_ONE },
- { (char*)"InterpShiftZero", SHIFT_ZERO },
- { (char*)"s0", SHIFT_ZERO },
- { (char*)"LinearWittenBell", LINEAR_WB },
- { (char*)"wb", LINEAR_WB },
- { (char*)"Mixture", MIXTURE },
- { (char*)"mix", MIXTURE },
- END_ENUM
-};
-
-
-mixture::mixture(bool fulltable,char* sublminfo,int depth,int prunefreq,char* ipfile,char* opfile):
+ static Enum_T SLmTypeEnum [] = {
+ { (char*)"ImprovedKneserNey", IMPROVED_KNESER_NEY },
+ { (char*)"ikn", IMPROVED_KNESER_NEY },
+ { (char*)"KneserNey", KNESER_NEY },
+ { (char*)"kn", KNESER_NEY },
+ { (char*)"ModifiedShiftBeta", MOD_SHIFT_BETA },
+ { (char*)"msb", MOD_SHIFT_BETA },
+ { (char*)"ImprovedShiftBeta", IMPROVED_SHIFT_BETA },
+ { (char*)"isb", IMPROVED_SHIFT_BETA },
+ { (char*)"InterpShiftBeta", SHIFT_BETA },
+ { (char*)"ShiftBeta", SHIFT_BETA },
+ { (char*)"sb", SHIFT_BETA },
+ { (char*)"InterpShiftOne", SHIFT_ONE },
+ { (char*)"ShiftOne", SHIFT_ONE },
+ { (char*)"s1", SHIFT_ONE },
+ { (char*)"InterpShiftZero", SHIFT_ZERO },
+ { (char*)"s0", SHIFT_ZERO },
+ { (char*)"LinearWittenBell", LINEAR_WB },
+ { (char*)"wb", LINEAR_WB },
+ { (char*)"Mixture", MIXTURE },
+ { (char*)"mix", MIXTURE },
+ END_ENUM
+ };
+
+
+ mixture::mixture(bool fulltable,char* sublminfo,int depth,int prunefreq,char* ipfile,char* opfile):
mdiadaptlm((char *)NULL,depth)
- {
-
- prunethresh=prunefreq;
- ipfname=ipfile;
- opfname=opfile;
- usefulltable=fulltable;
-
- mfstream inp(sublminfo,ios::in );
- if (!inp) {
- std::stringstream ss_msg;
- ss_msg << "cannot open " << sublminfo;
- exit_error(IRSTLM_ERROR_IO, ss_msg.str());
- }
-
- char line[MAX_LINE];
- inp.getline(line,MAX_LINE);
-
- sscanf(line,"%d",&numslm);
-
- sublm=new interplm* [numslm];
-
- cerr << "WARNING: Parameters PruneSingletons (ps) and PruneTopSingletons (pts) are not taken into account for this type of LM (mixture); please specify the singleton pruning policy for each submodel using parameters \"-sps\" and \"-spts\" in the configuraton file\n";
-
- int max_npar=6;
- for (int i=0; i<numslm; i++) {
- char **par=new char*[max_npar];
- par[0]=new char[BUFSIZ];
- par[0][0]='\0';
-
- inp.getline(line,MAX_LINE);
-
- const char *const wordSeparators = " \t\r\n";
- char *word = strtok(line, wordSeparators);
- int j = 1;
-
- while (word){
- if (j>max_npar){
- std::stringstream ss_msg;
- ss_msg << "Too many parameters (expected " << max_npar << ")";
- exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
- }
- par[j] = new char[MAX_LINE];
- strcpy(par[j],word);
- // std::cerr << "par[j]:|" << par[j] << "|" << std::endl;
- word = strtok(0, wordSeparators);
- j++;
- }
-
- int actual_npar = j;
-
- char *subtrainfile;
- int slmtype;
- bool subprunesingletons;
- bool subprunetopsingletons;
- char *subprune_thr_str=NULL;
-
- int subprunefreq;
-
- DeclareParams((char*)
- "SubLanguageModelType",CMDENUMTYPE|CMDMSG, &slmtype, SLmTypeEnum, "type of the sub LM",
- "slm",CMDENUMTYPE|CMDMSG, &slmtype, SLmTypeEnum, "type of the sub LM",
- "sTrainOn",CMDSTRINGTYPE|CMDMSG, &subtrainfile, "training file of the sub LM",
- "str",CMDSTRINGTYPE|CMDMSG, &subtrainfile, "training file of the sub LM",
- "sPruneThresh",CMDSUBRANGETYPE|CMDMSG, &subprunefreq, 0, 1000, "threshold for pruning the sub LM",
- "sp",CMDSUBRANGETYPE|CMDMSG, &subprunefreq, 0, 1000, "threshold for pruning the sub LM",
- "sPruneSingletons",CMDBOOLTYPE|CMDMSG, &subprunesingletons, "boolean flag for pruning of singletons of the sub LM (default is true)",
- "sps",CMDBOOLTYPE|CMDMSG, &subprunesingletons, "boolean flag for pruning of singletons of the sub LM (default is true)",
- "sPruneTopSingletons",CMDBOOLTYPE|CMDMSG, &subprunetopsingletons, "boolean flag for pruning of singletons at the top level of the sub LM (default is false)",
- "spts",CMDBOOLTYPE|CMDMSG, &subprunetopsingletons, "boolean flag for pruning of singletons at the top level of the sub LM (default is false)",
- "sPruneFrequencyThreshold",CMDSTRINGTYPE|CMDMSG, &subprune_thr_str, "pruning frequency threshold for each level of the sub LM; comma-separated list of values; (default is \"0 0 ... 0\", for all levels)",
- "spft",CMDSTRINGTYPE|CMDMSG, &subprune_thr_str, "pruning frequency threshold for each level of the sub LM; comma-separated list of values; (default is \"0 0 ... 0\", for all levels)",
- (char *)NULL );
-
- subtrainfile=NULL;
- slmtype=0;
- subprunefreq=0;
- subprunesingletons=true;
- subprunetopsingletons=false;
-
- GetParams(&actual_npar, &par, (char*) NULL);
-
-
- if (!slmtype) {
- std::stringstream ss_msg;
- ss_msg << "The type (-slm) for sub LM number " << i+1 << " is not specified" ;
- exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
- }
-
- if (!subtrainfile) {
- std::stringstream ss_msg;
- ss_msg << "The file (-str) for sub lm number " << i+1 << " is not specified";
- exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
- }
-
- if (subprunefreq==-1) {
- std::stringstream ss_msg;
- ss_msg << "The prune threshold (-sp) for sub lm number " << i+1 << " is not specified";
- exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
- }
-
- switch (slmtype) {
-
- case LINEAR_WB:
- sublm[i]=new linearwb(subtrainfile,depth,subprunefreq,IMPROVEDSHIFTBETA_I);
- break;
-
- case SHIFT_BETA:
- sublm[i]=new shiftbeta(subtrainfile,depth,subprunefreq,-1,SHIFTBETA_I);
- break;
+ {
+
+ prunethresh=prunefreq;
+ ipfname=ipfile;
+ opfname=opfile;
+ usefulltable=fulltable;
+
+ mfstream inp(sublminfo,ios::in );
+ if (!inp) {
+ std::stringstream ss_msg;
+ ss_msg << "cannot open " << sublminfo;
+ exit_error(IRSTLM_ERROR_IO, ss_msg.str());
+ }
+
+ char line[MAX_LINE];
+ inp.getline(line,MAX_LINE);
+
+ sscanf(line,"%d",&numslm);
+
+ sublm=new interplm* [numslm];
+
+ cerr << "WARNING: Parameters PruneSingletons (ps) and PruneTopSingletons (pts) are not taken into account for this type of LM (mixture); please specify the singleton pruning policy for each submodel using parameters \"-sps\" and \"-spts\" in the configuraton file\n";
+
+ int max_npar=6;
+ for (int i=0; i<numslm; i++) {
+ char **par=new char*[max_npar];
+ par[0]=new char[BUFSIZ];
+ par[0][0]='\0';
+
+ inp.getline(line,MAX_LINE);
+
+ const char *const wordSeparators = " \t\r\n";
+ char *word = strtok(line, wordSeparators);
+ int j = 1;
+
+ while (word){
+ if (j>max_npar){
+ std::stringstream ss_msg;
+ ss_msg << "Too many parameters (expected " << max_npar << ")";
+ exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
+ }
+ par[j] = new char[MAX_LINE];
+ strcpy(par[j],word);
+ // std::cerr << "par[j]:|" << par[j] << "|" << std::endl;
+ word = strtok(0, wordSeparators);
+ j++;
+ }
+
+ int actual_npar = j;
+
+ char *subtrainfile;
+ int slmtype;
+ bool subprunesingletons;
+ bool subprunetopsingletons;
+ char *subprune_thr_str=NULL;
+
+ int subprunefreq;
+
+ DeclareParams((char*)
+ "SubLanguageModelType",CMDENUMTYPE|CMDMSG, &slmtype, SLmTypeEnum, "type of the sub LM",
+ "slm",CMDENUMTYPE|CMDMSG, &slmtype, SLmTypeEnum, "type of the sub LM",
+ "sTrainOn",CMDSTRINGTYPE|CMDMSG, &subtrainfile, "training file of the sub LM",
+ "str",CMDSTRINGTYPE|CMDMSG, &subtrainfile, "training file of the sub LM",
+ "sPruneThresh",CMDSUBRANGETYPE|CMDMSG, &subprunefreq, 0, 1000, "threshold for pruning the sub LM",
+ "sp",CMDSUBRANGETYPE|CMDMSG, &subprunefreq, 0, 1000, "threshold for pruning the sub LM",
+ "sPruneSingletons",CMDBOOLTYPE|CMDMSG, &subprunesingletons, "boolean flag for pruning of singletons of the sub LM (default is true)",
+ "sps",CMDBOOLTYPE|CMDMSG, &subprunesingletons, "boolean flag for pruning of singletons of the sub LM (default is true)",
+ "sPruneTopSingletons",CMDBOOLTYPE|CMDMSG, &subprunetopsingletons, "boolean flag for pruning of singletons at the top level of the sub LM (default is false)",
+ "spts",CMDBOOLTYPE|CMDMSG, &subprunetopsingletons, "boolean flag for pruning of singletons at the top level of the sub LM (default is false)",
+ "sPruneFrequencyThreshold",CMDSTRINGTYPE|CMDMSG, &subprune_thr_str, "pruning frequency threshold for each level of the sub LM; comma-separated list of values; (default is \"0 0 ... 0\", for all levels)",
+ "spft",CMDSTRINGTYPE|CMDMSG, &subprune_thr_str, "pruning frequency threshold for each level of the sub LM; comma-separated list of values; (default is \"0 0 ... 0\", for all levels)",
+ (char *)NULL );
+
+ subtrainfile=NULL;
+ slmtype=0;
+ subprunefreq=0;
+ subprunesingletons=true;
+ subprunetopsingletons=false;
+
+ GetParams(&actual_npar, &par, (char*) NULL);
+
+
+ if (!slmtype) {
+ std::stringstream ss_msg;
+ ss_msg << "The type (-slm) for sub LM number " << i+1 << " is not specified" ;
+ exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
+ }
+
+ if (!subtrainfile) {
+ std::stringstream ss_msg;
+ ss_msg << "The file (-str) for sub lm number " << i+1 << " is not specified";
+ exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
+ }
+
+ if (subprunefreq==-1) {
+ std::stringstream ss_msg;
+ ss_msg << "The prune threshold (-sp) for sub lm number " << i+1 << " is not specified";
+ exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
+ }
+
+ switch (slmtype) {
+
+ case LINEAR_WB:
+ sublm[i]=new linearwb(subtrainfile,depth,subprunefreq,IMPROVEDSHIFTBETA_I);
+ break;
+
+ case SHIFT_BETA:
+ sublm[i]=new shiftbeta(subtrainfile,depth,subprunefreq,-1,SHIFTBETA_I);
+ break;
+
+ case KNESER_NEY:
+ // lm=new kneserney(subtrainfile,depth,subprunefreq,-1,KNESERNEY_I);
+
+ break;
+
+ case MOD_SHIFT_BETA:
+ case IMPROVED_KNESER_NEY:
+ sublm[i]=new improvedkneserney(subtrainfile,depth,subprunefreq,IMPROVEDKNESERNEY_I);
+ break;
+
+ case IMPROVED_SHIFT_BETA:
+ sublm[i]=new improvedshiftbeta(subtrainfile,depth,subprunefreq,IMPROVEDSHIFTBETA_I);
+ break;
+
+ case SHIFT_ONE:
+ sublm[i]=new shiftone(subtrainfile,depth,subprunefreq,SIMPLE_I);
+ break;
+
+ case MIXTURE:
+ sublm[i]=new mixture(usefulltable,subtrainfile,depth,subprunefreq);
+ break;
+
+ default:
+ exit_error(IRSTLM_ERROR_DATA, "not implemented yet");
+ }
+
+ sublm[i]->prunesingletons(subprunesingletons==true);
+ sublm[i]->prunetopsingletons(subprunetopsingletons==true);
+
+ if (subprunetopsingletons==true)
+ //apply most specific pruning method
+ sublm[i]->prunesingletons(false);
+
+ if (subprune_thr_str)
+ sublm[i]->set_prune_ngram(subprune_thr_str);
+
+
+ cerr << "eventually generate OOV code of sub lm[" << i << "]\n";
+ sublm[i]->dict->genoovcode();
+
+ //create super dictionary
+ dict->augment(sublm[i]->dict);
+
+ //creates the super n-gram table
+ if(usefulltable) augment(sublm[i]);
+
+ cerr << "super table statistics\n";
+ stat(2);
+ }
+
+ cerr << "eventually generate OOV code of the mixture\n";
+ dict->genoovcode();
+ cerr << "dict size of the mixture:" << dict->size() << "\n";
+ //tying parameters
+ k1=2;
+ k2=10;
+ }
+
+ double mixture::reldist(double *l1,double *l2,int n)
+ {
+ double dist=0.0,size=0.0;
+ for (int i=0; i<n; i++) {
+ dist+=(l1[i]-l2[i])*(l1[i]-l2[i]);
+ size+=l1[i]*l1[i];
+ }
+ return sqrt(dist/size);
+ }
+
+
+ double rand01()
+ {
+ return (double)rand()/(double)RAND_MAX;
+ }
+
+ int mixture::genpmap()
+ {
+ dictionary* d=sublm[0]->dict;
+
+ cerr << "Computing parameters mapping: ..." << d->size() << " ";
+ pm=new int[d->size()];
+ //initialize
+ for (int i=0; i<d->size(); i++) pm[i]=0;
+
+ pmax=k2-k1+1; //update # of parameters
+
+ for (int w=0; w<d->size(); w++) {
+ int f=d->freq(w);
+ if ((f>k1) && (f<=k2)) pm[w]=f-k1;
+ else if (f>k2) {
+ pm[w]=pmax++;
+ }
+ }
+ cerr << "pmax " << pmax << " ";
+ return 1;
+ }
+
+ int mixture::pmap(ngram ng,int lev)
+ {
+
+ ngram h(sublm[0]->dict);
+ h.trans(ng);
+
+ if (lev<=1) return 0;
+ //get the last word of history
+ if (!sublm[0]->get(h,2,1)) return 0;
+ return (int) pm[*h.wordp(2)];
+ }
+
+
+ int mixture::savepar(char* opf)
+ {
+ mfstream out(opf,ios::out);
+
+ cerr << "saving parameters in " << opf << "\n";
+ out << lmsize() << " " << pmax << "\n";
+
+ for (int i=0; i<=lmsize(); i++)
+ for (int j=0; j<pmax; j++)
+ out.writex(l[i][j],sizeof(double),numslm);
+
+
+ return 1;
+ }
+
+
+ int mixture::loadpar(char* ipf)
+ {
+
+ mfstream inp(ipf,ios::in);
+
+ if (!inp) {
+ std::stringstream ss_msg;
+ ss_msg << "cannot open file: " << ipf;
+ exit_error(IRSTLM_ERROR_IO, ss_msg.str());
+ }
+
+ cerr << "loading parameters from " << ipf << "\n";
+
+ // check compatibility
+ char header[100];
+ inp.getline(header,100);
+ int value1,value2;
+ sscanf(header,"%d %d",&value1,&value2);
+
+ if (value1 != lmsize() || value2 != pmax) {
+ std::stringstream ss_msg;
+ ss_msg << "parameter file " << ipf << " is incompatible";
+ exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
+ }
+
+ for (int i=0; i<=lmsize(); i++)
+ for (int j=0; j<pmax; j++)
+ inp.readx(l[i][j],sizeof(double),numslm);
+
+ return 1;
+ }
+
+ int mixture::train()
+ {
+
+ double zf;
+
+ srand(1333);
+
+ genpmap();
+
+ if (dub()<dict->size()) {
+ std::stringstream ss_msg;
+ ss_msg << "\nERROR: DUB value is too small: the LM will possibly compute wrong probabilities if sub-LMs have different vocabularies!\n";
+ ss_msg << "This exception should already have been handled before!!!\n";
+ exit_error(IRSTLM_ERROR_MODEL, ss_msg.str());
+ }
+
+ cerr << "mixlm --> DUB: " << dub() << endl;
+ for (int i=0; i<numslm; i++) {
+ cerr << i << " sublm --> DUB: " << sublm[i]->dub() << endl;
+ cerr << "eventually generate OOV code ";
+ cerr << sublm[i]->dict->encode(sublm[i]->dict->OOV()) << "\n";
+ sublm[i]->train();
+ }
+
+ //initialize parameters
+
+ for (int i=0; i<=lmsize(); i++) {
+ l[i]=new double*[pmax];
+ for (int j=0; j<pmax; j++) {
+ l[i][j]=new double[numslm];
+ for (int k=0; k<numslm; k++)
+ l[i][j][k]=1.0/(double)numslm;
+ }
+ }
+
+ if (ipfname) {
+ //load parameters from file
+ loadpar(ipfname);
+ } else {
+ //start training of mixture model
+
+ double oldl[pmax][numslm];
+ char alive[pmax],used[pmax];
+ int totalive;
+
+ ngram ng(sublm[0]->dict);
+
+ for (int lev=1; lev<=lmsize(); lev++) {
+
+ zf=sublm[0]->zerofreq(lev);
+
+ cerr << "Starting training at lev:" << lev << "\n";
+
+ for (int i=0; i<pmax; i++) {
+ alive[i]=1;
+ used[i]=0;
+ }
+ totalive=1;
+ int iter=0;
+ while (totalive && (iter < 20) ) {
+
+ iter++;
+
+ for (int i=0; i<pmax; i++)
+ if (alive[i])
+ for (int j=0; j<numslm; j++) {
+ oldl[i][j]=l[lev][i][j];
+ l[lev][i][j]=1.0/(double)numslm;
+ }
+
+ sublm[0]->scan(ng,INIT,lev);
+ while(sublm[0]->scan(ng,CONT,lev)) {
+
+ //do not include oov for unigrams
+ if ((lev==1) && (*ng.wordp(1)==sublm[0]->dict->oovcode()))
+ continue;
+
+ int par=pmap(ng,lev);
+ used[par]=1;
+
+ //controllo se aggiornare il parametro
+ if (alive[par]) {
- case KNESER_NEY:
- // lm=new kneserney(subtrainfile,depth,subprunefreq,-1,KNESERNEY_I);
-
- break;
+ double backoff=(lev>1?prob(ng,lev-1):1); //backoff
+ double denom=0.0;
+ double* numer = new double[numslm];
+ double fstar,lambda;
- case MOD_SHIFT_BETA:
- case IMPROVED_KNESER_NEY:
- sublm[i]=new improvedkneserney(subtrainfile,depth,subprunefreq,IMPROVEDKNESERNEY_I);
- break;
+ //int cv=(int)floor(zf * (double)ng.freq + rand01());
+ //int cv=1; //old version of leaving-one-out
+ int cv=(int)floor(zf * (double)ng.freq)+1;
+ //int cv=1; //old version of leaving-one-out
+ //if (lev==3)q
- case IMPROVED_SHIFT_BETA:
- sublm[i]=new improvedshiftbeta(subtrainfile,depth,subprunefreq,IMPROVEDSHIFTBETA_I);
- break;
+ //if (iter>10)
+ // cout << ng
+ // << " backoff " << backoff
+ // << " level " << lev
+ // << "\n";
- case SHIFT_ONE:
- sublm[i]=new shiftone(subtrainfile,depth,subprunefreq,SIMPLE_I);
- break;
+ for (int i=0; i<numslm; i++) {
+
+ //use cv if i=0
+
+ sublm[i]->discount(ng,lev,fstar,lambda,(i==0)*(cv));
+ numer[i]=oldl[par][i]*(fstar + lambda * backoff);
+
+ ngram ngslm(sublm[i]->dict);
+ ngslm.trans(ng);
+ if ((*ngslm.wordp(1)==sublm[i]->dict->oovcode()) &&
+ (dict->dub() > sublm[i]->dict->size()))
+ numer[i]/=(double)(dict->dub() - sublm[i]->dict->size());
+
+ denom+=numer[i];
+ }
- case MIXTURE:
- sublm[i]=new mixture(usefulltable,subtrainfile,depth,subprunefreq);
- break;
+ for (int i=0; i<numslm; i++) {
+ l[lev][par][i]+=(ng.freq * (numer[i]/denom));
+ //if (iter>10)
+ //cout << ng << " l: " << l[lev][par][i] << "\n";
+ }
+ delete []numer;
+ }
+ }
+
+ //normalize all parameters
+ totalive=0;
+ for (int i=0; i<pmax; i++) {
+ double tot=0;
+ if (alive[i]) {
+ for (int j=0; j<numslm; j++) tot+=(l[lev][i][j]);
+ for (int j=0; j<numslm; j++) l[lev][i][j]/=tot;
- default:
- exit_error(IRSTLM_ERROR_DATA, "not implemented yet");
- };
-
- sublm[i]->prunesingletons(subprunesingletons==true);
- sublm[i]->prunetopsingletons(subprunetopsingletons==true);
-
- if (subprunetopsingletons==true)
- //apply most specific pruning method
- sublm[i]->prunesingletons(false);
-
- if (subprune_thr_str)
- sublm[i]->set_prune_ngram(subprune_thr_str);
-
-
- cerr << "eventually generate OOV code of sub lm[" << i << "]\n";
- sublm[i]->dict->genoovcode();
-
- //create super dictionary
- dict->augment(sublm[i]->dict);
-
- //creates the super n-gram table
- if(usefulltable) augment(sublm[i]);
-
- cerr << "super table statistics\n";
- stat(2);
- }
-
- cerr << "eventually generate OOV code of the mixture\n";
- dict->genoovcode();
- cerr << "dict size of the mixture:" << dict->size() << "\n";
- //tying parameters
- k1=2;
- k2=10;
- };
-
-double mixture::reldist(double *l1,double *l2,int n)
-{
- double dist=0.0,size=0.0;
- for (int i=0; i<n; i++) {
- dist+=(l1[i]-l2[i])*(l1[i]-l2[i]);
- size+=l1[i]*l1[i];
- }
- return sqrt(dist/size);
-}
-
-
-double rand01()
-{
- return (double)rand()/(double)RAND_MAX;
-}
-
-int mixture::genpmap()
-{
- dictionary* d=sublm[0]->dict;
-
- cerr << "Computing parameters mapping: ..." << d->size() << " ";
- pm=new int[d->size()];
- //initialize
- for (int i=0; i<d->size(); i++) pm[i]=0;
-
- pmax=k2-k1+1; //update # of parameters
-
- for (int w=0; w<d->size(); w++) {
- int f=d->freq(w);
- if ((f>k1) && (f<=k2)) pm[w]=f-k1;
- else if (f>k2) {
- pm[w]=pmax++;
- }
- }
- cerr << "pmax " << pmax << " ";
- return 1;
-}
-
-int mixture::pmap(ngram ng,int lev)
-{
-
- ngram h(sublm[0]->dict);
- h.trans(ng);
-
- if (lev<=1) return 0;
- //get the last word of history
- if (!sublm[0]->get(h,2,1)) return 0;
- return (int) pm[*h.wordp(2)];
-}
-
-
-int mixture::savepar(char* opf)
-{
- mfstream out(opf,ios::out);
-
- cerr << "saving parameters in " << opf << "\n";
- out << lmsize() << " " << pmax << "\n";
-
- for (int i=0; i<=lmsize(); i++)
- for (int j=0; j<pmax; j++)
- out.writex(l[i][j],sizeof(double),numslm);
-
-
- return 1;
-}
-
-
-int mixture::loadpar(char* ipf)
-{
-
- mfstream inp(ipf,ios::in);
-
- if (!inp) {
- std::stringstream ss_msg;
- ss_msg << "cannot open file: " << ipf;
- exit_error(IRSTLM_ERROR_IO, ss_msg.str());
- }
-
- cerr << "loading parameters from " << ipf << "\n";
-
- // check compatibility
- char header[100];
- inp.getline(header,100);
- int value1,value2;
- sscanf(header,"%d %d",&value1,&value2);
-
- if (value1 != lmsize() || value2 != pmax) {
- std::stringstream ss_msg;
- ss_msg << "parameter file " << ipf << " is incompatible";
- exit_error(IRSTLM_ERROR_DATA, ss_msg.str());
- }
-
- for (int i=0; i<=lmsize(); i++)
- for (int j=0; j<pmax; j++)
- inp.readx(l[i][j],sizeof(double),numslm);
-
- return 1;
-}
-
-int mixture::train()
-{
-
- double zf;
-
- srand(1333);
-
- genpmap();
-
- if (dub()<dict->size()) {
- std::stringstream ss_msg;
- ss_msg << "\nERROR: DUB value is too small: the LM will possibly compute wrong probabilities if sub-LMs have different vocabularies!\n";
- ss_msg << "This exception should already have been handled before!!!\n";
- exit_error(IRSTLM_ERROR_MODEL, ss_msg.str());
- }
-
- cerr << "mixlm --> DUB: " << dub() << endl;
- for (int i=0; i<numslm; i++) {
- cerr << i << " sublm --> DUB: " << sublm[i]->dub() << endl;
- cerr << "eventually generate OOV code ";
- cerr << sublm[i]->dict->encode(sublm[i]->dict->OOV()) << "\n";
- sublm[i]->train();
- }
-
- //initialize parameters
-
- for (int i=0; i<=lmsize(); i++) {
- l[i]=new double*[pmax];
- for (int j=0; j<pmax; j++) {
- l[i][j]=new double[numslm];
- for (int k=0; k<numslm; k++)
- l[i][j][k]=1.0/(double)numslm;
- }
- }
-
- if (ipfname) {
- //load parameters from file
- loadpar(ipfname);
- } else {
- //start training of mixture model
-
- double oldl[pmax][numslm];
- char alive[pmax],used[pmax];
- int totalive;
-
- ngram ng(sublm[0]->dict);
-
- for (int lev=1; lev<=lmsize(); lev++) {
-
- zf=sublm[0]->zerofreq(lev);
-
- cerr << "Starting training at lev:" << lev << "\n";
-
- for (int i=0; i<pmax; i++) {
- alive[i]=1;
- used[i]=0;
- }
- totalive=1;
- int iter=0;
- while (totalive && (iter < 20) ) {
-
- iter++;
-
- for (int i=0; i<pmax; i++)
- if (alive[i])
- for (int j=0; j<numslm; j++) {
- oldl[i][j]=l[lev][i][j];
- l[lev][i][j]=1.0/(double)numslm;
- }
-
- sublm[0]->scan(ng,INIT,lev);
- while(sublm[0]->scan(ng,CONT,lev)) {
-
- //do not include oov for unigrams
- if ((lev==1) && (*ng.wordp(1)==sublm[0]->dict->oovcode()))
- continue;
-
- int par=pmap(ng,lev);
- used[par]=1;
-
- //controllo se aggiornare il parametro
- if (alive[par]) {
-
- double backoff=(lev>1?prob(ng,lev-1):1); //backoff
- double denom=0.0;
- double* numer = new double[numslm];
- double fstar,lambda;
-
- //int cv=(int)floor(zf * (double)ng.freq + rand01());
- //int cv=1; //old version of leaving-one-out
- int cv=(int)floor(zf * (double)ng.freq)+1;
- //int cv=1; //old version of leaving-one-out
- //if (lev==3)q
-
- //if (iter>10)
- // cout << ng
- // << " backoff " << backoff
- // << " level " << lev
- // << "\n";
-
- for (int i=0; i<numslm; i++) {
-
- //use cv if i=0
-
- sublm[i]->discount(ng,lev,fstar,lambda,(i==0)*(cv));
- numer[i]=oldl[par][i]*(fstar + lambda * backoff);
-
- ngram ngslm(sublm[i]->dict);
- ngslm.trans(ng);
- if ((*ngslm.wordp(1)==sublm[i]->dict->oovcode()) &&
- (dict->dub() > sublm[i]->dict->size()))
- numer[i]/=(double)(dict->dub() - sublm[i]->dict->size());
-
- denom+=numer[i];
- }
-
- for (int i=0; i<numslm; i++) {
- l[lev][par][i]+=(ng.freq * (numer[i]/denom));
- //if (iter>10)
- //cout << ng << " l: " << l[lev][par][i] << "\n";
- }
- delete []numer;
- }
- }
-
- //normalize all parameters
- totalive=0;
- for (int i=0; i<pmax; i++) {
- double tot=0;
- if (alive[i]) {
- for (int j=0; j<numslm; j++) tot+=(l[lev][i][j]);
- for (int j=0; j<numslm; j++) l[lev][i][j]/=tot;
-
- //decide if to continue to update
- if (!used[i] || (reldist(l[lev][i],oldl[i],numslm)<=0.05))
- alive[i]=0;
- }
- totalive+=alive[i];
- }
-
- cerr << "Lev " << lev << " iter " << iter << " tot alive " << totalive << "\n";
-
- }
- }
- }
-
- if (opfname) savepar(opfname);
-
-
- return 1;
-}
-
-int mixture::discount(ngram ng_,int size,double& fstar,double& lambda,int /* unused parameter: cv */)
-{
-
- ngram ng(dict);
- ng.trans(ng_);
+ //decide if to continue to update
+ if (!used[i] || (reldist(l[lev][i],oldl[i],numslm)<=0.05))
+ alive[i]=0;
+ }
+ totalive+=alive[i];
+ }
+
+ cerr << "Lev " << lev << " iter " << iter << " tot alive " << totalive << "\n";
+
+ }
+ }
+ }
+
+ if (opfname) savepar(opfname);
+
+
+ return 1;
+ }
- double lambda2,fstar2;
- fstar=0.0;
- lambda=0.0;
- int p=pmap(ng,size);
- MY_ASSERT(p <= pmax);
- double lsum=0;
-
-
- for (int i=0; i<numslm; i++) {
- sublm[i]->discount(ng,size,fstar2,lambda2,0);
-
- ngram ngslm(sublm[i]->dict);
- ngslm.trans(ng);
-
- if (dict->dub() > sublm[i]->dict->size()){
- if (*ngslm.wordp(1) == sublm[i]->dict->oovcode()) {
- fstar2/=(double)(sublm[i]->dict->dub() - sublm[i]->dict->size()+1);
- }
+ int mixture::discount(ngram ng_,int size,double& fstar,double& lambda,int /* unused parameter: cv */)
+ {
+
+ ngram ng(dict);
+ ng.trans(ng_);
+
+ double lambda2,fstar2;
+ fstar=0.0;
+ lambda=0.0;
+ int p=pmap(ng,size);
+ MY_ASSERT(p <= pmax);
+ double lsum=0;
+
+
+ for (int i=0; i<numslm; i++) {
+ sublm[i]->discount(ng,size,fstar2,lambda2,0);
+
+ ngram ngslm(sublm[i]->dict);
+ ngslm.trans(ng);
+
+ if (dict->dub() > sublm[i]->dict->size()){
+ if (*ngslm.wordp(1) == sublm[i]->dict->oovcode()) {
+ fstar2/=(double)(sublm[i]->dict->dub() - sublm[i]->dict->size()+1);
+ }
+ }
+
+
+ fstar+=(l[size][p][i]*fstar2);
+ lambda+=(l[size][p][i]*lambda2);
+ lsum+=l[size][p][i];
}
-
-
- fstar+=(l[size][p][i]*fstar2);
- lambda+=(l[size][p][i]*lambda2);
- lsum+=l[size][p][i];
- }
-
- if (dict->dub() > dict->size())
- if (*ng.wordp(1) == dict->oovcode()) {
- fstar*=(double)(dict->dub() - dict->size()+1);
- }
+
+ if (dict->dub() > dict->size())
+ if (*ng.wordp(1) == dict->oovcode()) {
+ fstar*=(double)(dict->dub() - dict->size()+1);
+ }
+
+ MY_ASSERT((lsum>LOWER_DOUBLE_PRECISION_OF_1) && (lsum<=UPPER_DOUBLE_PRECISION_OF_1));
+ return 1;
+ }
- MY_ASSERT((lsum>LOWER_DOUBLE_PRECISION_OF_1) && (lsum<=UPPER_DOUBLE_PRECISION_OF_1));
- return 1;
-}
-
-
-//creates the ngramtable on demand from the sublm tables
-int mixture::get(ngram& ng,int n,int lev)
-{
-
- if (usefulltable)
+
+ //creates the ngramtable on demand from the sublm tables
+ int mixture::get(ngram& ng,int n,int lev)
{
+
+ if (usefulltable)
+ {
+ return ngramtable::get(ng,n,lev);
+ }
+
+ //free current tree
+ resetngramtable();
+
+ //get 1-word prefix from ng
+ ngram ug(dict,1);
+ *ug.wordp(1)=*ng.wordp(ng.size);
+
+ //local ngram to upload entries
+ ngram locng(dict,maxlevel());
+
+ //allocate subtrees from sublm
+ for (int i=0; i<numslm; i++) {
+
+ ngram subug(sublm[i]->dict,1);
+ subug.trans(ug);
+
+ if (sublm[i]->get(subug,1,1)) {
+
+ ngram subng(sublm[i]->dict,maxlevel());
+ *subng.wordp(maxlevel())=*subug.wordp(1);
+ sublm[i]->scan(subug.link,subug.info,1,subng,INIT,maxlevel());
+ while(sublm[i]->scan(subug.link,subug.info,1,subng,CONT,maxlevel())) {
+ locng.trans(subng);
+ put(locng);
+ }
+ }
+ }
+
return ngramtable::get(ng,n,lev);
- }
- //free current tree
- resetngramtable();
-
- //get 1-word prefix from ng
- ngram ug(dict,1);
- *ug.wordp(1)=*ng.wordp(ng.size);
-
- //local ngram to upload entries
- ngram locng(dict,maxlevel());
-
- //allocate subtrees from sublm
- for (int i=0; i<numslm; i++) {
-
- ngram subug(sublm[i]->dict,1);
- subug.trans(ug);
-
- if (sublm[i]->get(subug,1,1)) {
-
- ngram subng(sublm[i]->dict,maxlevel());
- *subng.wordp(maxlevel())=*subug.wordp(1);
- sublm[i]->scan(subug.link,subug.info,1,subng,INIT,maxlevel());
- while(sublm[i]->scan(subug.link,subug.info,1,subng,CONT,maxlevel())) {
- locng.trans(subng);
- put(locng);
- }
- }
- }
-
- return ngramtable::get(ng,n,lev);
-
-}
+ }
}//namespace irstlm
diff --git a/src/ngramcache.cpp b/src/ngramcache.cpp
index 5f1f6a8..d4cf41b 100644
--- a/src/ngramcache.cpp
+++ b/src/ngramcache.cpp
@@ -57,14 +57,13 @@ ngramcache::ngramcache(int n,int size,int maxentries,float lf)
mp=new mempool(ngsize * sizeof(int)+infosize,MP_BLOCK_SIZE);
accesses=0;
hits=0;
-};
+}
ngramcache::~ngramcache()
{
delete ht;
delete mp;
-};
-
+}
//resize cache to specified number of entries
void ngramcache::reset(int n)
@@ -76,7 +75,7 @@ void ngramcache::reset(int n)
ht=new htable<int*> ((size_t) (maxn/load_factor), ngsize * sizeof(int)); //decrease the lower load factor to reduce collision
mp=new mempool(ngsize * sizeof(int)+infosize,MP_BLOCK_SIZE);
entries=0;
-};
+}
char* ngramcache::get(const int* ngp,char*& info)
{
@@ -89,7 +88,7 @@ char* ngramcache::get(const int* ngp,char*& info)
}
return found;
-};
+}
char* ngramcache::get(const int* ngp,double& info)
{
@@ -102,7 +101,7 @@ char* ngramcache::get(const int* ngp,double& info)
};
return found;
-};
+}
char* ngramcache::get(const int* ngp,prob_and_state_t& info)
{
@@ -115,7 +114,7 @@ char* ngramcache::get(const int* ngp,prob_and_state_t& info)
hits++;
}
return found;
-};
+}
int ngramcache::add(const int* ngp,const char*& info)
{
@@ -126,7 +125,7 @@ int ngramcache::add(const int* ngp,const char*& info)
MY_ASSERT(found == entry); //false if key is already inside
entries++;
return 1;
-};
+}
int ngramcache::add(const int* ngp,const double& info)
{
@@ -137,7 +136,7 @@ int ngramcache::add(const int* ngp,const double& info)
MY_ASSERT(found == entry); //false if key is already inside
entries++;
return 1;
-};
+}
int ngramcache::add(const int* ngp,const prob_and_state_t& info)
{
@@ -148,12 +147,11 @@ int ngramcache::add(const int* ngp,const prob_and_state_t& info)
MY_ASSERT(found == entry); //false if key is already inside
entries++;
return 1;
-};
-
+}
void ngramcache::stat() const
{
std::cout << "ngramcache stats: entries=" << entries << " acc=" << accesses << " hits=" << hits
<< " ht.used= " << ht->used() << " mp.used= " << mp->used() << " mp.wasted= " << mp->wasted() << "\n";
-};
+}
diff --git a/src/ngramtable.cpp b/src/ngramtable.cpp
index 1779e90..c28c342 100644
--- a/src/ngramtable.cpp
+++ b/src/ngramtable.cpp
@@ -197,7 +197,7 @@ tabletype::tabletype(TABLETYPE tt,int codesize) {
}
L_FREQ_OFFS=CODESIZE;
-};
+}
ngramtable::ngramtable(char* filename,int maxl,char* /* unused parameter: is */,
dictionary* extdict /* external dictionary */,char* filterdictfile,
@@ -666,7 +666,7 @@ void ngramtable::generate(char *filename, dictionary* extdict)
for (i=1; i<maxlev; i++) {
ng.pushw(dict->BoS());
ng.freq=1;
- };
+ }
while (inp >> ng) {
@@ -702,7 +702,7 @@ void ngramtable::generate(char *filename, dictionary* extdict)
int code=filterdict->encode(dict->decode(*ng2.wordp(maxlev)));
if (code!=filterdict->oovcode()) put(ng2);
} else put(ng2);
- };
+ }
dict->incflag(0);
inp.close();
@@ -766,7 +766,7 @@ void ngramtable::generate_hmask(char *filename,char* hmask,int inplen)
if (ng2.size) dict->incfreq(*ng2.wordp(1),1);
if (!(++c % 1000000)) cerr << ".";
- };
+ }
dict->incflag(0);
inp.close();
@@ -1253,8 +1253,7 @@ char **ngramtable::grow(table *tb,NODETYPE ndt,int lev,
}
return tb;
-
-};
+}
int ngramtable::put(ngram& ng)
@@ -1551,7 +1550,7 @@ ngramtable::~ngramtable()
delete [] occupancy;
delete [] mentr;
delete dict;
-};
+}
void ngramtable::stat(int level)
{
@@ -1729,7 +1728,7 @@ int ngramtable::codecmp(char * a,char *b) {
if(result) return result;
}
return 0;
-};
+}
long long ngramtable::freq(node nd,NODETYPE ndt,long long value) {
int offs=(ndt & LNODE)?L_FREQ_OFFS:I_FREQ_OFFS;
diff --git a/src/ngramtable.h b/src/ngramtable.h
index a533856..1f2836b 100644
--- a/src/ngramtable.h
+++ b/src/ngramtable.h
@@ -186,7 +186,7 @@ public:
inline void freetree() {
freetree(tree);
- };
+ }
void freetree(node nd);
@@ -296,8 +296,7 @@ public:
inline int codediff(node a,node b) {
return word(a)-word(b);
- };
-
+ }
int update(ngram ng);
@@ -374,7 +373,7 @@ public:
inline dictionary* getDict() const {
return dict;
- };
+ }
};
#endif
diff --git a/src/ngt.cpp b/src/ngt.cpp
index 511e693..3c2f494 100644
--- a/src/ngt.cpp
+++ b/src/ngt.cpp
@@ -156,7 +156,7 @@ int main(int argc, char **argv)
if (inp==NULL) {
usage();
exit_error(IRSTLM_ERROR_DATA,"Warning: no input file specified");
- };
+ }
if (out==NULL) {
cerr << "Warning: no output file specified!\n";
diff --git a/src/normcache.cpp b/src/normcache.cpp
index f64c167..2bd3119 100644
--- a/src/normcache.cpp
+++ b/src/normcache.cpp
@@ -64,7 +64,7 @@ void normcache::expand(int n)
for (int i=0; i<step; i++)
cache[n][maxcache[n]+i]=0;
maxcache[n]+=step;
-};
+}
double normcache::get(ngram ng,int size,double& value)
diff --git a/src/shiftlm.cpp b/src/shiftlm.cpp
index 7dc4633..0533e83 100644
--- a/src/shiftlm.cpp
+++ b/src/shiftlm.cpp
@@ -50,7 +50,7 @@ shiftone::shiftone(char* ngtfile,int depth,int prunefreq,TABLETYPE tt):
beta=1.0;
-};
+}
int shiftone::train()
@@ -141,9 +141,7 @@ shiftbeta::shiftbeta(char* ngtfile,int depth,int prunefreq,double b,TABLETYPE tt
prunethresh=prunefreq;
cerr << "PruneThresh: " << prunethresh << "\n";
-};
-
-
+}
int shiftbeta::train()
{
@@ -215,7 +213,7 @@ int shiftbeta::train()
}
return 1;
-};
+}
@@ -311,7 +309,7 @@ improvedkneserney::improvedkneserney(char* ngtfile,int depth,int prunefreq,TABLE
beta[1][1]=0.0;
beta[1][2]=0.0;
-};
+}
int improvedkneserney::train()
@@ -417,7 +415,7 @@ int improvedkneserney::train()
}
return 1;
-};
+}
@@ -553,7 +551,7 @@ int improvedkneserney::discount(ngram ng_,int size,double& fstar,double& lambda,
beta[1][1]=0.0;
beta[1][2]=0.0;
- };
+ }
int improvedshiftbeta::train()
@@ -658,9 +656,7 @@ int improvedkneserney::discount(ngram ng_,int size,double& fstar,double& lambda,
}
return 1;
- };
-
-
+ }
int improvedshiftbeta::discount(ngram ng_,int size,double& fstar,double& lambda, int cv)
{
diff --git a/src/shiftlm.h b/src/shiftlm.h
index 51a51ab..a27d4d3 100644
--- a/src/shiftlm.h
+++ b/src/shiftlm.h
@@ -82,7 +82,7 @@ public:
}
double unigrIKN(ngram ng);
- inline double unigr(ngram ng){ return unigrIKN(ng); };
+ inline double unigr(ngram ng){ return unigrIKN(ng); }
};
class improvedshiftbeta: public mdiadaptlm
diff --git a/src/tlm.cpp b/src/tlm.cpp
index 0f70c6d..1b3020f 100644
--- a/src/tlm.cpp
+++ b/src/tlm.cpp
@@ -352,7 +352,7 @@ int main(int argc, char **argv)
default:
cerr << "not implemented yet\n";
return 1;
- };
+ }
if (dub < lm->dict->size()){
cerr << "dub (" << dub << ") is not set or too small. dub is re-set to the dictionary size (" << lm->dict->size() << ")" << endl;
@@ -410,7 +410,7 @@ int main(int argc, char **argv)
((mdiadaptlm *)lm)->get_zetacache()->stat();
cerr << "\n";
- };
+ }
if (compsize)
cout << "LM size " << (int)lm->netsize() << "\n";
diff --git a/src/util.cpp b/src/util.cpp
index 1763b7c..6e140fd 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -193,7 +193,7 @@ Timer g_timer;
void ResetUserTime()
{
g_timer.start();
-};
+}
void PrintUserTime(const std::string &message)
{
@@ -309,7 +309,7 @@ void exit_error(int err, const std::string &msg)
}
}
exit(err);
-};
+}
/*
#ifdef MY_ASSERT_FLAG
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git
More information about the debian-science-commits
mailing list