[irstlm] 04/78: enable another lm state based on the index of ngram inside the table instead of its address
Giulio Paci
giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:47:00 UTC 2016
This is an automated email from the git hooks/post-receive script.
giuliopaci-guest pushed a commit to tag adaptiveLM.v0.10
in repository irstlm.
commit e00816476faa4b15610545c02d09b4ff7724473a
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date: Wed Nov 4 11:35:34 2015 +0100
enable another lm state based on the index of ngram inside the table instead of its address
---
src/compile-lm.cpp | 23 ++++++++---
src/interpolate-lm.cpp | 9 +++--
src/lmContainer.h | 23 ++++++++---
src/lmInterpolation.cpp | 17 ++++++--
src/lmInterpolation.h | 7 +++-
src/lmclass.cpp | 10 +++--
src/lmclass.h | 19 +++++++--
src/lmmacro.cpp | 14 ++++---
src/lmmacro.h | 6 ++-
src/lmtable.cpp | 102 +++++++++++++++++++++++++++++++++++++-----------
src/lmtable.h | 16 +++++---
src/ngramcache.h | 5 ++-
src/util.h | 2 +
13 files changed, 190 insertions(+), 63 deletions(-)
diff --git a/src/compile-lm.cpp b/src/compile-lm.cpp
index 0ad6707..5f0cc70 100644
--- a/src/compile-lm.cpp
+++ b/src/compile-lm.cpp
@@ -191,6 +191,8 @@ int main(int argc, char **argv)
lmt->load(infile);
+ lmt->print_table_stat();
+
//CHECK this part for sfilter to make it possible only for LMTABLE
if (sfilter != NULL) {
lmContainer* filtered_lmt = NULL;
@@ -296,12 +298,14 @@ int main(int argc, char **argv)
double bow;
int bol=0;
+ ngram_state_t msidx;
char *msp;
unsigned int statesize;
lmt->dictionary_incflag(1);
while(inptxt >> ng) {
+ VERBOSE(3,"read ng:|" << ng << "|" << std::endl);
if (ng.size>lmt->maxlevel()) ng.size=lmt->maxlevel();
@@ -312,7 +316,10 @@ int main(int argc, char **argv)
}
if (ng.size>=1) {
- Pr=lmt->clprob(ng,&bow,&bol,&msp,&statesize);
+ VERBOSE(3,"computing clprob ng:|" << ng << "|" << std::endl);
+// Pr=lmt->clprob(ng,&bow,&bol,&msp,&statesize);
+ Pr=lmt->clprob(ng,&bow,&bol,&msidx,&msp,&statesize);
+ VERBOSE(3,"computing clprob ng:|" << ng << "| Pr:|" << Pr << "| ngramstate:" << msidx << " msp:|" << (void*) msp << "| statesize:|" << statesize << "|" << std::endl);
logPr+=Pr;
sent_logPr+=Pr;
@@ -331,12 +338,12 @@ int main(int argc, char **argv)
std::cout.flush();
}
else if (debug==4) {
- std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
+ std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " ngramstate:" << msidx << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
std::cout << std::endl;
std::cout.flush();
}
else if (debug>4) {
- std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
+ std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " ngramstate:" << msidx << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
double totp=0.0;
int oldw=*ng.wordp(1);
double oovp=lmt->getlogOOVpenalty();
@@ -386,8 +393,10 @@ int main(int argc, char **argv)
std::cerr << ".";
lmt->check_caches_levels();
}
-
+
+ VERBOSE(3,"computing clprob END" << std::endl);
}
+ VERBOSE(3,"read END" << std::endl);
}
PP=exp((-logPr * log(10.0)) /Nw);
@@ -471,6 +480,7 @@ int main(int argc, char **argv)
double Pr;
double bow;
int bol=0;
+ ngram_state_t msidx;
char *msp;
unsigned int statesize;
@@ -490,9 +500,10 @@ int main(int argc, char **argv)
ng.size=lmt->maxlevel();
}
- Pr=lmt->clprob(ng,&bow,&bol,&msp,&statesize);
+// Pr=lmt->clprob(ng,&bow,&bol,&msp,&statesize);
+ Pr=lmt->clprob(ng,&bow,&bol,&msidx, &msp,&statesize);
#ifndef OUTPUT_SUPPRESSED
- std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
+ std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " ngramstate:" << msidx << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
std::cout << std::endl;
std::cout.flush();
#endif
diff --git a/src/interpolate-lm.cpp b/src/interpolate-lm.cpp
index 3d0a3ec..fe5cefd 100644
--- a/src/interpolate-lm.cpp
+++ b/src/interpolate-lm.cpp
@@ -390,6 +390,7 @@ int main(int argc, char **argv)
double bow;
int bol=0;
+ ngram_state_t msidx;
char *msp;
unsigned int statesize;
@@ -415,7 +416,8 @@ int main(int argc, char **argv)
ngram ong(lmt[i]->getDict());
ong.trans(ng);
- logpr = lmt[i]->clprob(ong,&bow,&bol,&msp,&statesize); //actual prob of the interpolation
+// logpr = lmt[i]->clprob(ong,&bow,&bol,&msp,&statesize); //actual prob of the interpolation
+ logpr = lmt[i]->clprob(ong,&bow,&bol,&msidx,&msp,&statesize); //actual prob of the interpolation
//logpr = lmt[i]->clprob(ong,&bow,&bol); //LM log-prob
Pr+=w[i] * pow(10.0,logpr); //actual prob of the interpolation
@@ -519,8 +521,9 @@ int main(int argc, char **argv)
for (i=0; i<N; i++) {
ngram ong(lmt[i]->getDict());
ong.trans(ng);
- logpr = lmt[i]->clprob(ong,&bow,&bol,NULL,&statesize); //LM log-prob (using caches if available)
-
+// logpr = lmt[i]->clprob(ong,&bow,&bol,NULL,&statesize); //LM log-prob (using caches if available)
+ logpr = lmt[i]->clprob(ong,&bow,&bol,NULL,NULL,&statesize); //LM log-prob (using caches if available)
+
Pr+=w[i] * pow(10.0,logpr); //actual prob of the interpolation
std::cout << "lm " << i << ":" << " logpr: " << logpr << " weight: " << w[i] << std::endl;
if (maxbol<bol) maxbol=bol;
diff --git a/src/lmContainer.h b/src/lmContainer.h
index a492ec9..ebf7fa1 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -113,23 +113,31 @@ public:
};
virtual bool is_inverted() {
return false;
- };
- virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
-VERBOSE(3,"virtual double lmContainer::clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) ng:|" << ng << "|\n");
+ };
+
+//virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+ virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+// VERBOSE(3,"virtual double lmContainer::clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) ng:|" << ng << "|\n");
+ VERBOSE(3,"virtual double lmContainer::clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) ng:|" << ng << "|\n");
UNUSED(ng);
UNUSED(bow);
UNUSED(bol);
+ UNUSED(maxsuffidx);
UNUSED(maxsuffptr);
UNUSED(statesize);
UNUSED(extendible);
return 0.0;
};
- virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
-VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL)\n");
+
+// virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+ virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+// VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL)\n");
+ VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL)\n");
UNUSED(ng);
UNUSED(ngsize);
UNUSED(bow);
UNUSED(bol);
+ UNUSED(maxsuffidx);
UNUSED(maxsuffptr);
UNUSED(statesize);
UNUSED(extendible);
@@ -225,6 +233,11 @@ VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=N
getDict()->incflag(0);
return c;
}
+
+ virtual void print_table_stat(){
+ VERBOSE(3,"virtual void lmContainer::print_table_stat() "<< std::endl);
+ };
+
};
}//namespace irstlm
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index 356289f..33aaa21 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -140,25 +140,30 @@ namespace irstlm {
}
//return log10 prob of an ngram
- double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+// double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmInterpolation::clprob(ngram ng, double* bow,int* bol,ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
double pr=0.0;
double _logpr;
char* _maxsuffptr=NULL,*actualmaxsuffptr=NULL;
+ ngram_state_t _maxsuffidx=NULL,actualmaxsuffidx=NULL;
unsigned int _statesize=0,actualstatesize=0;
int _bol=0,actualbol=MAX_NGRAM;
double _bow=0.0,actualbow=0.0;
bool _extendible=false;
bool actualextendible=false;
+// ngram_state_t* maxsuffidx = new ngram_state_t;
+
for (size_t i=0; i<m_lm.size(); i++) {
if (m_weight[i]>0.0){
ngram _ng(m_lm[i]->getDict());
_ng.trans(ng);
- _logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffptr,&_statesize,&_extendible);
+// _logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffptr,&_statesize,&_extendible);
+ _logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffidx,&_maxsuffptr,&_statesize,&_extendible);
IFVERBOSE(3){
//cerr.precision(10);
@@ -185,6 +190,7 @@ namespace irstlm {
if(_statesize > actualstatesize || i == 0) {
actualmaxsuffptr = _maxsuffptr;
+ actualmaxsuffidx = _maxsuffidx;
actualstatesize = _statesize;
}
if (_bol < actualbol) {
@@ -198,6 +204,7 @@ namespace irstlm {
if (bol) *bol=actualbol;
if (bow) *bow=log(actualbow);
if (maxsuffptr) *maxsuffptr=actualmaxsuffptr;
+ if (maxsuffidx) *maxsuffidx=actualmaxsuffidx;
if (statesize) *statesize=actualstatesize;
if (extendible) {
*extendible=actualextendible;
@@ -211,7 +218,8 @@ namespace irstlm {
return log10(pr);
}
- double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+// double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,ngram_state_t* maxsuffidx,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
//create the actual ngram
@@ -219,7 +227,8 @@ namespace irstlm {
ong.pushc(codes,sz);
MY_ASSERT (ong.size == sz);
- return clprob(ong, bow, bol, maxsuffptr, statesize, extendible);
+// return clprob(ong, bow, bol, maxsuffptr, statesize, extendible);
+ return clprob(ong, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible);
}
double lmInterpolation::setlogOOVpenalty(int dub)
diff --git a/src/lmInterpolation.h b/src/lmInterpolation.h
index eb9edb5..7219a5c 100644
--- a/src/lmInterpolation.h
+++ b/src/lmInterpolation.h
@@ -71,9 +71,12 @@ public:
void load(const std::string &filename,int mmap=0);
lmContainer* load_lm(int i, int memmap, float nlf, float dlf);
+
+// virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+// virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
int maxlevel() const {
return maxlev;
diff --git a/src/lmclass.cpp b/src/lmclass.cpp
index 75626b3..8788a00 100644
--- a/src/lmclass.cpp
+++ b/src/lmclass.cpp
@@ -201,8 +201,9 @@ void lmclass::loadMapElement(const char* in, const char* out, double sc)
if (wcode >= MapScoreN) MapScoreN++; //increment size of the array MapScore if the element is new
}
-
-double lmclass::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,bool* extendible)
+
+//double lmclass::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,bool* extendible)
+double lmclass::lprob(ngram ong,double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
double lpr=getMapScore(*ong.wordp(1));
@@ -213,8 +214,9 @@ double lmclass::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigne
// mapped_ng.trans_freq(ong);
mapping(ong,mapped_ng);
- lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffptr,statesize, extendible);
-
+// lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffptr,statesize, extendible);
+ lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffidx,maxsuffptr,statesize, extendible);
+
VERBOSE(3,"In lmclass::lprob(...) global prob = " << lpr << "\n");
return lpr;
}
diff --git a/src/lmclass.h b/src/lmclass.h
index 408291d..55d4fc1 100644
--- a/src/lmclass.h
+++ b/src/lmclass.h
@@ -74,16 +74,29 @@ public:
~lmclass();
void load(const std::string &filename,int mmap=0);
-
- double lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+
+// double lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+/*
inline double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
return lprob(ng,bow,bol,maxsuffptr,statesize,extendible);
};
- inline double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+*/
+/*
+ inline double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
ngram ong(getDict());
ong.pushc(ng,ngsize);
return lprob(ong,bow,bol,maxsuffptr,statesize,extendible);
};
+*/
+ double lprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ inline double clprob(ngram ng,double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+ return lprob(ng,bow,bol,maxsuffidx,maxsuffptr,statesize,extendible);
+ };
+ inline double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+ ngram ong(getDict());
+ ong.pushc(ng,ngsize);
+ return lprob(ong,bow,bol,maxsuffidx,maxsuffptr,statesize,extendible);
+ };
inline bool is_OOV(int code) {
//a word is consisdered OOV if its mapped value is OOV
diff --git a/src/lmmacro.cpp b/src/lmmacro.cpp
index 2d8f482..8edcf09 100644
--- a/src/lmmacro.cpp
+++ b/src/lmmacro.cpp
@@ -327,15 +327,18 @@ double lmmacro::lprob(ngram micro_ng)
return prob;
};
-
-double lmmacro::clprob(int* codes, int sz, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+
+//double lmmacro::clprob(int* codes, int sz, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+double lmmacro::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
{
ngram micro_ng(getDict());
micro_ng.pushc(codes,sz);
- return clprob(micro_ng,bow,bol,state,statesize,extendible);
+// return clprob(micro_ng,bow,bol,state,statesize,extendible);
+ return clprob(micro_ng,bow,bol,ngramstate,state,statesize,extendible);
}
-double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+// double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
{
VERBOSE(3," lmmacro::clprob(ngram), parameter = <" << micro_ng << ">\n");
@@ -353,7 +356,8 @@ double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, char** state,unsig
logpr = 0.0;
} else {
VERBOSE(3," QUERY MACRO LM on (after transformation and size reduction) " << transformed_ng << "\n");
- logpr = lmtable::clprob(transformed_ng, bow, bol, state, statesize, extendible);
+// logpr = lmtable::clprob(transformed_ng, bow, bol, state, statesize, extendible);
+ logpr = lmtable::clprob(transformed_ng, bow, bol, ngramstate, state, statesize, extendible);
}
VERBOSE(3," GET logpr: " << logpr << "\n");
diff --git a/src/lmmacro.h b/src/lmmacro.h
index bfeab6d..fc05b5f 100644
--- a/src/lmmacro.h
+++ b/src/lmmacro.h
@@ -77,8 +77,10 @@ public:
void load(const std::string &filename,int mmap=0);
double lprob(ngram ng);
- double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+// double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+// double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ double clprob(ngram ng,double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
diff --git a/src/lmtable.cpp b/src/lmtable.cpp
index fc7852e..7ae6e03 100644
--- a/src/lmtable.cpp
+++ b/src/lmtable.cpp
@@ -1742,19 +1742,20 @@ namespace irstlm {
void lmtable::print_table_stat()
{
- VERBOSE(2,"printing statistics of tables" << endl);
+ VERBOSE(2,"printing statistics of tables" << std::endl);
for (int i=1; i<=maxlev; i++)
print_table_stat(i);
}
void lmtable::print_table_stat(int level)
{
- VERBOSE(2," level: " << level);
- VERBOSE(2," maxsize[level]:" << maxsize[level]);
- VERBOSE(2," cursize[level]:" << cursize[level]);
- VERBOSE(2," tb_offset[level]:" << tb_offset[level]);
- VERBOSE(2," table:" << (void*) table);
- VERBOSE(2," table[level]:" << (void*) table[level]);
+ VERBOSE(2," level: " << level << std::endl);
+ VERBOSE(2," maxsize[level]:" << maxsize[level] << std::endl);
+ VERBOSE(2," cursize[level]:" << cursize[level] << std::endl);
+ VERBOSE(2," tb_offset[level]:" << tb_offset[level] << std::endl);
+ VERBOSE(2," table:" << (void*) table << std::endl);
+ VERBOSE(2," table[level]:" << (void*) table[level] << std::endl);
+ VERBOSE(2," table[level]-table:" << ((char*) table[level]-(char*) table) << std::endl);
VERBOSE(2," tableGaps[level]:" << (void*) tableGaps[level] << std::endl);
}
@@ -1936,6 +1937,9 @@ namespace irstlm {
maxsize[l]=cursize[l];
}
+ //update table offsets
+ for (int l=2; l<=maxlev; l++) update_offset(l,tb_offset[l-1]+maxsize[l-1]);
+
char header2[MAX_LINE];
if (isQtable) {
inp >> header2;
@@ -2419,10 +2423,11 @@ namespace irstlm {
//lastbow: bow of the deepest found ngram
//non recursive version, also includes maxsuffptr
- double lmtable::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,
- bool* extendible, double *lastbow)
+// double lmtable::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,bool* extendible, double *lastbow)
+ double lmtable::lprob(ngram ong,double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible, double *lastbow)
{
- VERBOSE(3," lmtable::lprob(ngram) ong " << ong << "\n");
+ VERBOSE(3," lmtable::lprob(ngram) ong |" << ong << "|\n" << std::endl);
+ VERBOSE(3," lmtable::lprob(ngram) ong.size |" << ong.size << "|\n" << std::endl);
if (ong.size==0) return 0.0; //sanity check
if (ong.size>maxlev) ong.size=maxlev; //adjust n-gram level to table size
@@ -2435,6 +2440,9 @@ namespace irstlm {
float ibow,iprob; //internal back-off weight and logprob
+
+// ngram_state_t* maxsuffidx = new ngram_state_t;
+
if (isInverted) {
ngram ing=ong; //Inverted ngram TRIE
@@ -2444,14 +2452,24 @@ namespace irstlm {
iprob=ing.prob;
lpr = (double)(isQtable?Pcenters[ing.lev][(qfloat_t)iprob]:iprob);
if (*ong.wordp(1)==dict->oovcode()) lpr-=logOOVpenalty; //add OOV penalty
- if (statesize) *statesize=MIN(ing.lev,(ing.size-1)); //find largest n-1 gram suffix
- if (maxsuffptr) *maxsuffptr=ing.path[MIN(ing.lev,(ing.size-1))];
+ size_t _level=MIN(ing.lev,(ing.size-1));
+// if (statesize) *statesize=MIN(ing.lev,(ing.size-1)); //find largest n-1 gram suffix
+// if (maxsuffptr) *maxsuffptr=ing.path[MIN(ing.lev,(ing.size-1))];
+ if (statesize) *statesize=_level; //find largest n-1 gram suffix
+ if (maxsuffptr) *maxsuffptr=ing.path[_level];
+
+ if (maxsuffidx){
+ int ndsz=nodesize(tbltype[_level]);
+ *maxsuffidx = (ngram_state_t) ( ((table_pos_t) (ing.path[_level]) - (table_pos_t) table[_level] ) / ndsz ) + tb_offset[_level] + 1; //added 1 to distinguish from zero-ngram
+ VERBOSE(3,"lmtable::lprob(ngram) ing:|" << ing << "| _level:|" << _level << "| ing.path[_level]:|" << ing.path[_level] << "| tb_offset[_level]:|" << tb_offset[_level] << "| *maxsuffidx:|" << *maxsuffidx << "|" << std::endl);
+ }
if (extendible) *extendible=succrange(ing.path[ing.lev],ing.lev)>0;
if (lastbow) *lastbow=(double) (isQtable?Bcenters[ing.lev][(qfloat_t)ing.bow]:ing.bow);
} else { // means a real unknown word!
lpr=-log(UNIGRAM_RESOLUTION)/M_LN10;
if (statesize) *statesize=0; //default statesize for zero-gram!
if (maxsuffptr) *maxsuffptr=NULL; //default stateptr for zero-gram!
+ if (maxsuffidx) *maxsuffidx=0; //default state-value for zero-gram!
}
if (ing.lev < ing.size) { //compute backoff weight
@@ -2482,23 +2500,47 @@ namespace irstlm {
MY_ASSERT((extendible == NULL) || (extendible && *extendible==false));
// MY_ASSERT(lastbow==NULL);
for (ngram ng=ong; ng.size>0; ng.size--) {
+ VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "|" << std::endl);
if (get(ng,ng.size,ng.size)) {
+ VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| inside if get" << std::endl);
iprob=ng.prob;
lpr = (double)(isQtable?Pcenters[ng.size][(qfloat_t)iprob]:iprob);
if (*ng.wordp(1)==dict->oovcode()) lpr-=logOOVpenalty; //add OOV penalty
if (maxsuffptr || statesize) { //one extra step is needed if ng.size=ong.size
+
+ VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| inside if maxsuffptr" << std::endl);
if (ong.size==ng.size) {
ng.size--;
get(ng,ng.size,ng.size);
}
- if (statesize) *statesize=ng.size;
- if (maxsuffptr) *maxsuffptr=ng.link; //we should check ng.link != NULL
+ if (statesize) *statesize=ng.size;
+ if (maxsuffptr) *maxsuffptr=ng.link; //we should check ng.link != NULL
+
+ size_t _level=ng.size;
+ VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| _level:|" << _level << "|" << std::endl);
+// VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| (void*) table:|" << (void*) table << "|" << std::endl);
+// VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| (void*) ng.link:|" << (void*) ng.link << "|" << std::endl);
+
+
+ if (maxsuffidx){
+ int ndsz=nodesize(tbltype[_level]);
+ *maxsuffidx=0;
+ if (ng.link){
+ *maxsuffidx = (ngram_state_t) ( ((table_pos_t) (ng.link) - (table_pos_t) table[_level]) / ndsz ) + tb_offset[_level] + 1; //added 1 to distinguish from zero-ngram
+ }
+
+ VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| *maxsuffidx:|" << *maxsuffidx << "|" << std::endl);
+ }
}
+ VERBOSE(3,"lmtable::lprob(ngram) returning (rbow+lpr):|" << (rbow+lpr) << "|" << std::endl);
return rbow+lpr;
} else {
+ VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| inside else get" << std::endl);
+ VERBOSE(3,"lmtable::lprob(ngram) ng.size:|" << ng.size << "|" << std::endl);
if (ng.size==1) { //means a real unknow word!
- if (maxsuffptr) *maxsuffptr=NULL; //default stateptr for zero-gram!
if (statesize) *statesize=0;
+ if (maxsuffptr) *maxsuffptr=NULL; //default stateptr for zero-gram!
+ if (maxsuffidx) *maxsuffidx=0; //default state-value for zero-gram!
return rbow -log(UNIGRAM_RESOLUTION)/M_LN10;
} else { //compute backoff
if (bol) (*bol)++; //increase backoff level
@@ -2514,6 +2556,7 @@ namespace irstlm {
}
}
+ VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| END " << std::endl);
}
}
@@ -2523,9 +2566,11 @@ namespace irstlm {
//return log10 probsL use cache memory
- double lmtable::clprob(ngram ong,double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+// double lmtable::clprob(ngram ong,double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+ double lmtable::clprob(ngram ong,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible)
{
- VERBOSE(3,"double lmtable::clprob(ngram ong,double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible) ong:|" << ong << "|\n");
+// VERBOSE(3,"double lmtable::clprob(ngram ong,double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible) ong:|" << ong << "|\n");
+ VERBOSE(3,"double lmtable::clprob(ngram ong,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible) ong:|" << ong << "|\n");
#ifdef TRACE_CACHELM
// if (probcache && ong.size==maxlev && sentence_id>0) {
@@ -2537,6 +2582,7 @@ namespace irstlm {
if (ong.size==0) {
if (statesize!=NULL) *statesize=0;
if (state!=NULL) *state=NULL;
+ if (ngramstate!=NULL) *ngramstate=NULL;
if (extendible!=NULL) *extendible=false;
return 0.0;
}
@@ -2554,6 +2600,7 @@ namespace irstlm {
if (bow) *bow = pst_get.bow;
if (bol) *bol = pst_get.bol;
if (state) *state = pst_get.state;
+ if (ngramstate) *ngramstate = pst_get.ngramstate;
if (statesize) *statesize = pst_get.statesize;
if (extendible) *extendible = pst_get.extendible;
@@ -2563,12 +2610,13 @@ namespace irstlm {
//cache miss
prob_and_state_t pst_add;
- logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
+ logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.ngramstate), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
if (bow) *bow = pst_add.bow;
if (bol) *bol = pst_add.bol;
if (state) *state = pst_add.state;
+ if (ngramstate) *ngramstate = pst_add.ngramstate;
if (statesize) *statesize = pst_add.statesize;
if (extendible) *extendible = pst_add.extendible;
@@ -2581,16 +2629,19 @@ namespace irstlm {
}
return logpr;
#else
- return lmtable::lprob(ong, bow, bol, state, statesize, extendible);
+// return lmtable::lprob(ong, bow, bol, state, statesize, extendible);
+ return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible);
#endif
};
//return log10 probsL use cache memory
//this function simulates the clprob(ngram, ...) but it takes as input an array of codes instead of the ngram
- double lmtable::clprob(int* codes, int sz, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+// double lmtable::clprob(int* codes, int sz, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+ double lmtable::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
{
- VERBOSE(3," double lmtable::clprob(int* codes, int sz, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)\n");
+// VERBOSE(3," double lmtable::clprob(int* codes, int sz, double* bow, int* bol, char** state, unsigned int* statesize, bool* extendible)\n");
+ VERBOSE(3," double lmtable::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible)\n");
#ifdef TRACE_CACHELM
// if (probcache && sz==maxlev && sentence_id>0) {
if (probcache && sentence_id>0) {
@@ -2602,6 +2653,7 @@ namespace irstlm {
if (sz==0) {
if (statesize!=NULL) *statesize=0;
if (state!=NULL) *state=NULL;
+ if (ngramstate!=NULL) *ngramstate=NULL;
if (extendible!=NULL) *extendible=false;
return 0.0;
}
@@ -2621,6 +2673,7 @@ namespace irstlm {
if (bow) *bow = pst_get.bow;
if (bol) *bol = pst_get.bol;
if (state) *state = pst_get.state;
+ if (ngramstate) *ngramstate = pst_get.ngramstate;
if (statesize) *statesize = pst_get.statesize;
if (extendible) *extendible = pst_get.extendible;
@@ -2635,12 +2688,14 @@ namespace irstlm {
//cache miss
prob_and_state_t pst_add;
- logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
+// logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
+ logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.ngramstate), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
if (bow) *bow = pst_add.bow;
if (bol) *bol = pst_add.bol;
if (state) *state = pst_add.state;
+ if (ngramstate) *ngramstate = pst_add.ngramstate;
if (statesize) *statesize = pst_add.statesize;
if (extendible) *extendible = pst_add.extendible;
@@ -2663,7 +2718,8 @@ namespace irstlm {
logpr = lmtable::lprob(ong, bow, bol, state, statesize, extendible);
return logpr;
*/
- return lmtable::lprob(ong, bow, bol, state, statesize, extendible);
+// return lmtable::lprob(ong, bow, bol, state, statesize, extendible);
+ return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible);
#endif
};
diff --git a/src/lmtable.h b/src/lmtable.h
index d33e6f1..ee7b9b2 100644
--- a/src/lmtable.h
+++ b/src/lmtable.h
@@ -65,6 +65,7 @@
typedef enum {INTERNAL,QINTERNAL,LEAF,QLEAF} LMT_TYPE;
typedef char* node;
+typedef unsigned int ngram_state_t; //type for pointing to a full ngram in the table
typedef unsigned int table_entry_pos_t; //type for pointing to a full ngram in the table
typedef unsigned long table_pos_t; // type for pointing to a single char in the table
typedef unsigned char qfloat_t; //type for quantized probabilities
@@ -315,11 +316,17 @@ public:
void filter(const char* /* unused parameter: lmfile */) {};
- virtual double lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
- virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+// virtual double lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
+// virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+// virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+
+ virtual double lprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
+ virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+
+
void *search(int lev,table_entry_pos_t offs,table_entry_pos_t n,int sz,int *w, LMT_ACTION action,char **found=(char **)NULL);
int mybsearch(char *ar, table_entry_pos_t n, int size, char *key, table_entry_pos_t *idx);
@@ -342,7 +349,7 @@ public:
virtual const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
virtual const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
- virtual const char *cmaxsuffptr(int* codes, int sz, unsigned int* size=NULL);
+ virtual const char *cmaxsuffptr(int* codes, int sz, unsigned int* size=NULL);
inline void putmem(char* ptr,int value,int offs,int size) {
MY_ASSERT(ptr!=NULL);
@@ -462,7 +469,6 @@ public:
return value;
};
-
inline float bow(node nd,LMT_TYPE ndt) {
int offs=LMTCODESIZE+(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
diff --git a/src/ngramcache.h b/src/ngramcache.h
index dc47952..232afa9 100644
--- a/src/ngramcache.h
+++ b/src/ngramcache.h
@@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "mempool.h"
#include "htable.h"
+#include "util.h"
#define NGRAMCACHE_t ngramcache
@@ -32,12 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
typedef struct PROB_AND_STATE_ENTRY {
double logpr; //!< probability value of an ngram
+ ngram_state_t ngramstate; //!< index of the largest n-gram contained in the LM table.
char* state; //!< the largest suffix of an n-gram contained in the LM table.
unsigned int statesize; //!< LM statesize of an ngram
double bow; //!< backoff weight
int bol; //!< backoff level
bool extendible; //!< flag for extendibility of the ngram
- PROB_AND_STATE_ENTRY(double lp=0.0, char* st=NULL, unsigned int stsz=0, double bw=0.0, int bl=0, bool ex=false): logpr(lp), state(st), statesize(stsz), bow(bw), bol(bl), extendible(ex) {}; //initializer
+// PROB_AND_STATE_ENTRY(double lp=0.0, char* st=NULL, unsigned int stsz=0, double bw=0.0, int bl=0, bool ex=false): logpr(lp), state(st), statesize(stsz), bow(bw), bol(bl), extendible(ex) {}; //initializer
+ PROB_AND_STATE_ENTRY(double lp=0.0, ngram_state_t ngramst=0, char* st=NULL, unsigned int stsz=0, double bw=0.0, int bl=0, bool ex=false): logpr(lp), ngramstate(ngramst), state(st), statesize(stsz), bow(bw), bol(bl), extendible(ex) {}; //initializer
} prob_and_state_t;
void print(prob_and_state_t* pst, std::ostream& out=std::cout);
diff --git a/src/util.h b/src/util.h
index 34db77a..3db4416 100644
--- a/src/util.h
+++ b/src/util.h
@@ -50,6 +50,8 @@ using namespace std;
#define SSEED 50
class ngram;
+typedef unsigned int ngram_state_t; //type for pointing to a full ngram in the table
+
class mfstream;
std::string gettempfolder();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git
More information about the debian-science-commits
mailing list