[irstlm] 04/78: enable another lm state based on the index of ngram inside the table instead of its address

Giulio Paci giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:47:00 UTC 2016


This is an automated email from the git hooks/post-receive script.

giuliopaci-guest pushed a commit to tag adaptiveLM.v0.10
in repository irstlm.

commit e00816476faa4b15610545c02d09b4ff7724473a
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date:   Wed Nov 4 11:35:34 2015 +0100

    enable another lm state based on the index of ngram inside the table instead of its address
---
 src/compile-lm.cpp      |  23 ++++++++---
 src/interpolate-lm.cpp  |   9 +++--
 src/lmContainer.h       |  23 ++++++++---
 src/lmInterpolation.cpp |  17 ++++++--
 src/lmInterpolation.h   |   7 +++-
 src/lmclass.cpp         |  10 +++--
 src/lmclass.h           |  19 +++++++--
 src/lmmacro.cpp         |  14 ++++---
 src/lmmacro.h           |   6 ++-
 src/lmtable.cpp         | 102 +++++++++++++++++++++++++++++++++++++-----------
 src/lmtable.h           |  16 +++++---
 src/ngramcache.h        |   5 ++-
 src/util.h              |   2 +
 13 files changed, 190 insertions(+), 63 deletions(-)

diff --git a/src/compile-lm.cpp b/src/compile-lm.cpp
index 0ad6707..5f0cc70 100644
--- a/src/compile-lm.cpp
+++ b/src/compile-lm.cpp
@@ -191,6 +191,8 @@ int main(int argc, char **argv)
 
   lmt->load(infile);
 
+	lmt->print_table_stat();
+	
   //CHECK this part for sfilter to make it possible only for LMTABLE
   if (sfilter != NULL) {
     lmContainer* filtered_lmt = NULL;
@@ -296,12 +298,14 @@ int main(int argc, char **argv)
 			
       double bow;
       int bol=0;
+      ngram_state_t msidx;
       char *msp;
       unsigned int statesize;
 
       lmt->dictionary_incflag(1);
 
       while(inptxt >> ng) {
+				VERBOSE(3,"read ng:|" << ng << "|" << std::endl);
 
         if (ng.size>lmt->maxlevel()) ng.size=lmt->maxlevel();
 
@@ -312,7 +316,10 @@ int main(int argc, char **argv)
         }
 
         if (ng.size>=1) {
-          Pr=lmt->clprob(ng,&bow,&bol,&msp,&statesize);
+					VERBOSE(3,"computing clprob ng:|" << ng << "|" << std::endl);
+//          Pr=lmt->clprob(ng,&bow,&bol,&msp,&statesize);
+          Pr=lmt->clprob(ng,&bow,&bol,&msidx,&msp,&statesize);
+					VERBOSE(3,"computing clprob ng:|" << ng << "| Pr:|" << Pr << "| ngramstate:" << msidx << " msp:|" << (void*) msp << "| statesize:|" << statesize << "|" << std::endl);
           logPr+=Pr;
           sent_logPr+=Pr;
 
@@ -331,12 +338,12 @@ int main(int argc, char **argv)
             std::cout.flush();
           }
           else if (debug==4) {
-            std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
+            std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " ngramstate:" << msidx << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
             std::cout << std::endl;
             std::cout.flush();
           }
           else if (debug>4) {
-            std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
+            std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " ngramstate:" << msidx << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
             double totp=0.0;
             int oldw=*ng.wordp(1);
             double oovp=lmt->getlogOOVpenalty();
@@ -386,8 +393,10 @@ int main(int argc, char **argv)
             std::cerr << ".";
             lmt->check_caches_levels();
           }
-
+					
+					VERBOSE(3,"computing clprob END" << std::endl);
         }
+				VERBOSE(3,"read END" << std::endl);
       }
 
       PP=exp((-logPr * log(10.0)) /Nw);
@@ -471,6 +480,7 @@ int main(int argc, char **argv)
 		double Pr;
 		double bow;
 		int bol=0;
+		ngram_state_t msidx;
 		char *msp;
 		unsigned int statesize;
 
@@ -490,9 +500,10 @@ int main(int argc, char **argv)
 					ng.size=lmt->maxlevel();
 				}
 				
-				Pr=lmt->clprob(ng,&bow,&bol,&msp,&statesize);
+//				Pr=lmt->clprob(ng,&bow,&bol,&msp,&statesize);
+				Pr=lmt->clprob(ng,&bow,&bol,&msidx, &msp,&statesize);
 #ifndef OUTPUT_SUPPRESSED
-				std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
+				std::cout << ng << " [" << ng.size-bol << "-gram: recombine:" << statesize << " ngramstate:" << msidx << " state:" << (void*) msp << "] [" << ng.size+1-((bol==0)?(1):bol) << "-gram: bol:" << bol << "] " << Pr << " bow:" << bow;
 				std::cout << std::endl;
     				std::cout.flush();
 #endif
diff --git a/src/interpolate-lm.cpp b/src/interpolate-lm.cpp
index 3d0a3ec..fe5cefd 100644
--- a/src/interpolate-lm.cpp
+++ b/src/interpolate-lm.cpp
@@ -390,6 +390,7 @@ int main(int argc, char **argv)
 
       double bow;
       int bol=0;
+      ngram_state_t msidx;
       char *msp;
       unsigned int statesize;
 
@@ -415,7 +416,8 @@ int main(int argc, char **argv)
 
             ngram ong(lmt[i]->getDict());
             ong.trans(ng);
-            logpr = lmt[i]->clprob(ong,&bow,&bol,&msp,&statesize); //actual prob of the interpolation
+//            logpr = lmt[i]->clprob(ong,&bow,&bol,&msp,&statesize); //actual prob of the interpolation
+            logpr = lmt[i]->clprob(ong,&bow,&bol,&msidx,&msp,&statesize); //actual prob of the interpolation
             //logpr = lmt[i]->clprob(ong,&bow,&bol); //LM log-prob
 
             Pr+=w[i] * pow(10.0,logpr); //actual prob of the interpolation
@@ -519,8 +521,9 @@ int main(int argc, char **argv)
         for (i=0; i<N; i++) {
           ngram ong(lmt[i]->getDict());
           ong.trans(ng);
-          logpr = lmt[i]->clprob(ong,&bow,&bol,NULL,&statesize); //LM log-prob (using caches if available)
-
+//          logpr = lmt[i]->clprob(ong,&bow,&bol,NULL,&statesize); //LM log-prob (using caches if available)
+          logpr = lmt[i]->clprob(ong,&bow,&bol,NULL,NULL,&statesize); //LM log-prob (using caches if available)
+					
           Pr+=w[i] * pow(10.0,logpr); //actual prob of the interpolation
           std::cout << "lm " << i << ":" << " logpr: " << logpr << " weight: " << w[i] << std::endl;
           if (maxbol<bol) maxbol=bol;
diff --git a/src/lmContainer.h b/src/lmContainer.h
index a492ec9..ebf7fa1 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -113,23 +113,31 @@ public:
   };
   virtual bool is_inverted() {
     return false;
-  };
-  virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
-VERBOSE(3,"virtual double  lmContainer::clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) ng:|" << ng  << "|\n");
+  };	
+	
+//virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+	virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+//		VERBOSE(3,"virtual double  lmContainer::clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) ng:|" << ng  << "|\n");
+		VERBOSE(3,"virtual double  lmContainer::clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) ng:|" << ng  << "|\n");
     UNUSED(ng);
     UNUSED(bow);
     UNUSED(bol);
+    UNUSED(maxsuffidx);
     UNUSED(maxsuffptr);
     UNUSED(statesize);
     UNUSED(extendible);
     return 0.0;
   };
-  virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
-VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL)\n");
+
+//  virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+  virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+//	VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL)\n");
+	VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL)\n");
     UNUSED(ng);
     UNUSED(ngsize);
     UNUSED(bow);
     UNUSED(bol);
+    UNUSED(maxsuffidx);
     UNUSED(maxsuffptr);
     UNUSED(statesize);
     UNUSED(extendible);
@@ -225,6 +233,11 @@ VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=N
 		getDict()->incflag(0);
 		return c;
 	}
+	
+	virtual void print_table_stat(){
+    VERBOSE(3,"virtual void lmContainer::print_table_stat() "<< std::endl);
+	};
+	
 };
 
 }//namespace irstlm
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index 356289f..33aaa21 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -140,25 +140,30 @@ namespace irstlm {
 	}
 	
 	//return log10 prob of an ngram
-	double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+//	double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	double lmInterpolation::clprob(ngram ng, double* bow,int* bol,ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible)
 	{
 		
 		double pr=0.0;
 		double _logpr;
 		
 		char* _maxsuffptr=NULL,*actualmaxsuffptr=NULL;
+		ngram_state_t _maxsuffidx=NULL,actualmaxsuffidx=NULL;
 		unsigned int _statesize=0,actualstatesize=0;
 		int _bol=0,actualbol=MAX_NGRAM;
 		double _bow=0.0,actualbow=0.0; 
 		bool _extendible=false;
 		bool actualextendible=false;
 		
+//		ngram_state_t* maxsuffidx = new ngram_state_t;
+		
 		for (size_t i=0; i<m_lm.size(); i++) {
 			
 			if (m_weight[i]>0.0){
 				ngram _ng(m_lm[i]->getDict());
 				_ng.trans(ng);
-				_logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffptr,&_statesize,&_extendible);
+//				_logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffptr,&_statesize,&_extendible);				
+				_logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffidx,&_maxsuffptr,&_statesize,&_extendible);
 				
 				IFVERBOSE(3){
 					//cerr.precision(10);
@@ -185,6 +190,7 @@ namespace irstlm {
 				
 				if(_statesize > actualstatesize || i == 0) {
 					actualmaxsuffptr = _maxsuffptr;
+					actualmaxsuffidx = _maxsuffidx;
 					actualstatesize = _statesize;
 				}
 				if (_bol < actualbol) {
@@ -198,6 +204,7 @@ namespace irstlm {
 		if (bol) *bol=actualbol;
 		if (bow) *bow=log(actualbow);
 		if (maxsuffptr) *maxsuffptr=actualmaxsuffptr;
+		if (maxsuffidx) *maxsuffidx=actualmaxsuffidx;
 		if (statesize) *statesize=actualstatesize;
 		if (extendible) {
 			*extendible=actualextendible;
@@ -211,7 +218,8 @@ namespace irstlm {
 		return log10(pr);
 	}
 	
-	double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+//	double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,ngram_state_t* maxsuffidx,char** maxsuffptr,unsigned int* statesize,bool* extendible)
 	{
 		
 		//create the actual ngram
@@ -219,7 +227,8 @@ namespace irstlm {
 		ong.pushc(codes,sz);
 		MY_ASSERT (ong.size == sz);
 		
-		return clprob(ong, bow, bol, maxsuffptr, statesize, extendible);
+//		return clprob(ong, bow, bol, maxsuffptr, statesize, extendible);
+		return clprob(ong, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible);
 	}
 	
 	double lmInterpolation::setlogOOVpenalty(int dub)
diff --git a/src/lmInterpolation.h b/src/lmInterpolation.h
index eb9edb5..7219a5c 100644
--- a/src/lmInterpolation.h
+++ b/src/lmInterpolation.h
@@ -71,9 +71,12 @@ public:
 
   void load(const std::string &filename,int mmap=0);
   lmContainer* load_lm(int i, int memmap, float nlf, float dlf);
+	
+//  virtual double clprob(ngram ng,            double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+//  virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
 
-  virtual double clprob(ngram ng,            double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-  virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+  virtual double clprob(ngram ng,            double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+  virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
 
   int maxlevel() const {
     return maxlev;
diff --git a/src/lmclass.cpp b/src/lmclass.cpp
index 75626b3..8788a00 100644
--- a/src/lmclass.cpp
+++ b/src/lmclass.cpp
@@ -201,8 +201,9 @@ void lmclass::loadMapElement(const char* in, const char* out, double sc)
 
   if (wcode >= MapScoreN) MapScoreN++; //increment size of the array MapScore if the element is new
 }
-
-double lmclass::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	
+//double lmclass::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,bool* extendible)
+double lmclass::lprob(ngram ong,double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible)
 {
   double lpr=getMapScore(*ong.wordp(1));
 
@@ -213,8 +214,9 @@ double lmclass::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigne
   //  mapped_ng.trans_freq(ong);
   mapping(ong,mapped_ng);
 
-  lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffptr,statesize, extendible);
-
+//  lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffptr,statesize, extendible);
+  lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffidx,maxsuffptr,statesize, extendible);
+	
   VERBOSE(3,"In lmclass::lprob(...) global prob  = " <<  lpr  << "\n");
   return lpr;
 }
diff --git a/src/lmclass.h b/src/lmclass.h
index 408291d..55d4fc1 100644
--- a/src/lmclass.h
+++ b/src/lmclass.h
@@ -74,16 +74,29 @@ public:
   ~lmclass();
 
   void load(const std::string &filename,int mmap=0);
-
-  double lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+	
+//  double lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+/*
   inline double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
     return lprob(ng,bow,bol,maxsuffptr,statesize,extendible);
   };
-  inline double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+*/
+/*
+ inline double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
     ngram ong(getDict());
     ong.pushc(ng,ngsize);
     return lprob(ong,bow,bol,maxsuffptr,statesize,extendible);
   };
+*/
+  double lprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+  inline double clprob(ngram ng,double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+    return lprob(ng,bow,bol,maxsuffidx,maxsuffptr,statesize,extendible);
+  };
+  inline double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+    ngram ong(getDict());
+    ong.pushc(ng,ngsize);
+    return lprob(ong,bow,bol,maxsuffidx,maxsuffptr,statesize,extendible);
+  };
 
   inline bool is_OOV(int code) {
     //a word is consisdered OOV if its mapped value is OOV
diff --git a/src/lmmacro.cpp b/src/lmmacro.cpp
index 2d8f482..8edcf09 100644
--- a/src/lmmacro.cpp
+++ b/src/lmmacro.cpp
@@ -327,15 +327,18 @@ double lmmacro::lprob(ngram micro_ng)
 
   return prob;
 };
-
-double lmmacro::clprob(int* codes, int sz,  double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+	
+//double lmmacro::clprob(int* codes, int sz,  double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+double lmmacro::clprob(int* codes, int sz,  double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
 {
   ngram micro_ng(getDict());
   micro_ng.pushc(codes,sz);
-  return clprob(micro_ng,bow,bol,state,statesize,extendible);
+//  return clprob(micro_ng,bow,bol,state,statesize,extendible);
+  return clprob(micro_ng,bow,bol,ngramstate,state,statesize,extendible);
 }
 
-double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+//	double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
 {
 
   VERBOSE(3," lmmacro::clprob(ngram), parameter = <" <<  micro_ng << ">\n");
@@ -353,7 +356,8 @@ double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, char** state,unsig
     logpr = 0.0;
   } else {
     VERBOSE(3,"  QUERY MACRO LM on (after transformation and size reduction) " << transformed_ng << "\n");
-    logpr = lmtable::clprob(transformed_ng, bow, bol, state, statesize, extendible);
+//    logpr = lmtable::clprob(transformed_ng, bow, bol, state, statesize, extendible);
+    logpr = lmtable::clprob(transformed_ng, bow, bol, ngramstate, state, statesize, extendible);
   }
   VERBOSE(3,"  GET logpr: " << logpr << "\n");
 
diff --git a/src/lmmacro.h b/src/lmmacro.h
index bfeab6d..fc05b5f 100644
--- a/src/lmmacro.h
+++ b/src/lmmacro.h
@@ -77,8 +77,10 @@ public:
   void load(const std::string &filename,int mmap=0);
 
   double lprob(ngram ng);
-  double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+//  double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+//  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+  double clprob(ngram ng,double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
 
   const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
   const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
diff --git a/src/lmtable.cpp b/src/lmtable.cpp
index fc7852e..7ae6e03 100644
--- a/src/lmtable.cpp
+++ b/src/lmtable.cpp
@@ -1742,19 +1742,20 @@ namespace irstlm {
 	
 	void lmtable::print_table_stat()
 	{
-		VERBOSE(2,"printing statistics of tables" << endl);
+		VERBOSE(2,"printing statistics of tables" << std::endl);
 		for (int i=1; i<=maxlev; i++)
 			print_table_stat(i);
 	}
 	
 	void lmtable::print_table_stat(int level)
 	{
-		VERBOSE(2," level: " << level);
-		VERBOSE(2," maxsize[level]:" << maxsize[level]);
-		VERBOSE(2," cursize[level]:" << cursize[level]);
-		VERBOSE(2," tb_offset[level]:" << tb_offset[level]);
-		VERBOSE(2," table:" << (void*) table);
-		VERBOSE(2," table[level]:" << (void*) table[level]);
+		VERBOSE(2," level: " << level << std::endl);
+		VERBOSE(2," maxsize[level]:" << maxsize[level] << std::endl);
+		VERBOSE(2," cursize[level]:" << cursize[level] << std::endl);
+		VERBOSE(2," tb_offset[level]:" << tb_offset[level] << std::endl);
+		VERBOSE(2," table:" << (void*) table << std::endl);
+		VERBOSE(2," table[level]:" << (void*) table[level] << std::endl);
+		VERBOSE(2," table[level]-table:" << ((char*) table[level]-(char*) table) << std::endl);
 		VERBOSE(2," tableGaps[level]:" << (void*) tableGaps[level] << std::endl);
 	}
 	
@@ -1936,6 +1937,9 @@ namespace irstlm {
 			maxsize[l]=cursize[l];
 		}
 		
+		//update table offsets
+		for (int l=2; l<=maxlev; l++) update_offset(l,tb_offset[l-1]+maxsize[l-1]);
+		
 		char header2[MAX_LINE];
 		if (isQtable) {
 			inp >> header2;
@@ -2419,10 +2423,11 @@ namespace irstlm {
 	//lastbow: bow of the deepest found ngram
 	
 	//non recursive version, also includes maxsuffptr
-	double lmtable::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,
-												bool* extendible, double *lastbow)
+//	double lmtable::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,bool* extendible, double *lastbow)
+	double lmtable::lprob(ngram ong,double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible, double *lastbow)
 	{
-		VERBOSE(3," lmtable::lprob(ngram) ong " << ong  << "\n");
+		VERBOSE(3," lmtable::lprob(ngram) ong |" << ong  << "|\n" << std::endl);
+		VERBOSE(3," lmtable::lprob(ngram) ong.size |" << ong.size  << "|\n" << std::endl);
 		
 		if (ong.size==0) return 0.0; //sanity check
 		if (ong.size>maxlev) ong.size=maxlev; //adjust n-gram level to table size
@@ -2435,6 +2440,9 @@ namespace irstlm {
 		float ibow,iprob;    //internal back-off weight and logprob
 		
 		
+		
+//		ngram_state_t* maxsuffidx = new ngram_state_t;
+		
 		if (isInverted) {
 			ngram ing=ong; //Inverted ngram TRIE
 			
@@ -2444,14 +2452,24 @@ namespace irstlm {
 				iprob=ing.prob;
 				lpr = (double)(isQtable?Pcenters[ing.lev][(qfloat_t)iprob]:iprob);
 				if (*ong.wordp(1)==dict->oovcode()) lpr-=logOOVpenalty; //add OOV penalty
-				if (statesize)  *statesize=MIN(ing.lev,(ing.size-1)); //find largest n-1 gram suffix
-				if (maxsuffptr) *maxsuffptr=ing.path[MIN(ing.lev,(ing.size-1))];
+				size_t _level=MIN(ing.lev,(ing.size-1));
+//				if (statesize)  *statesize=MIN(ing.lev,(ing.size-1)); //find largest n-1 gram suffix
+//				if (maxsuffptr) *maxsuffptr=ing.path[MIN(ing.lev,(ing.size-1))];
+				if (statesize)  *statesize=_level; //find largest n-1 gram suffix
+				if (maxsuffptr) *maxsuffptr=ing.path[_level];
+				
+				if (maxsuffidx){
+					int ndsz=nodesize(tbltype[_level]);
+					*maxsuffidx = (ngram_state_t) ( ((table_pos_t) (ing.path[_level]) - (table_pos_t) table[_level] ) / ndsz ) + tb_offset[_level] + 1; //added 1 to distinguish from zero-ngram
+					VERBOSE(3,"lmtable::lprob(ngram) ing:|" << ing << "| _level:|" << _level << "| ing.path[_level]:|" << ing.path[_level] << "| tb_offset[_level]:|" << tb_offset[_level] << "| *maxsuffidx:|" << *maxsuffidx << "|" << std::endl);
+				}
 				if (extendible) *extendible=succrange(ing.path[ing.lev],ing.lev)>0;
 				if (lastbow) *lastbow=(double) (isQtable?Bcenters[ing.lev][(qfloat_t)ing.bow]:ing.bow);
 			} else { // means a real unknown word!
 				lpr=-log(UNIGRAM_RESOLUTION)/M_LN10;
 				if (statesize)  *statesize=0;     //default statesize for zero-gram!
 				if (maxsuffptr) *maxsuffptr=NULL; //default stateptr for zero-gram!
+				if (maxsuffidx) *maxsuffidx=0; //default state-value for zero-gram!
 			}
 			
 			if (ing.lev < ing.size) { //compute backoff weight
@@ -2482,23 +2500,47 @@ namespace irstlm {
 			MY_ASSERT((extendible == NULL) || (extendible && *extendible==false));
 			//		MY_ASSERT(lastbow==NULL);
 			for (ngram ng=ong; ng.size>0; ng.size--) {
+				VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "|" << std::endl);
 				if (get(ng,ng.size,ng.size)) {
+					VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| inside if get" << std::endl);
 					iprob=ng.prob;
 					lpr = (double)(isQtable?Pcenters[ng.size][(qfloat_t)iprob]:iprob);
 					if (*ng.wordp(1)==dict->oovcode()) lpr-=logOOVpenalty; //add OOV penalty
 					if (maxsuffptr || statesize) { //one extra step is needed if ng.size=ong.size
+						
+						VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| inside if maxsuffptr" << std::endl);
 						if (ong.size==ng.size) {
 							ng.size--;
 							get(ng,ng.size,ng.size);
 						}
-						if (statesize)  *statesize=ng.size;
-						if (maxsuffptr) *maxsuffptr=ng.link; //we should check ng.link != NULL
+						if (statesize)	*statesize=ng.size;
+						if (maxsuffptr)	*maxsuffptr=ng.link; //we should check ng.link != NULL
+						
+						size_t _level=ng.size;
+						VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| _level:|" << _level << "|" << std::endl);
+//						VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| (void*) table:|" << (void*) table << "|" << std::endl);
+//						VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| (void*) ng.link:|" << (void*) ng.link << "|" << std::endl);
+						
+						
+						if (maxsuffidx){
+							int ndsz=nodesize(tbltype[_level]);
+							*maxsuffidx=0;
+							if (ng.link){
+								*maxsuffidx = (ngram_state_t) ( ((table_pos_t) (ng.link) - (table_pos_t) table[_level]) / ndsz ) + tb_offset[_level] + 1; //added 1 to distinguish from zero-ngram
+							}
+							
+							VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| *maxsuffidx:|" << *maxsuffidx << "|" << std::endl);
+						}
 					}
+					VERBOSE(3,"lmtable::lprob(ngram) returning (rbow+lpr):|" << (rbow+lpr) << "|" << std::endl);
 					return rbow+lpr;
 				} else {
+					VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| inside else get" << std::endl);
+					VERBOSE(3,"lmtable::lprob(ngram) ng.size:|" << ng.size << "|" << std::endl);
 					if (ng.size==1) { //means a real unknow word!
-						if (maxsuffptr) *maxsuffptr=NULL; //default stateptr for zero-gram!
 						if (statesize)  *statesize=0;
+						if (maxsuffptr) *maxsuffptr=NULL; //default stateptr for zero-gram!
+						if (maxsuffidx) *maxsuffidx=0; //default state-value for zero-gram!
 						return rbow -log(UNIGRAM_RESOLUTION)/M_LN10;
 					} else { //compute backoff
 						if (bol) (*bol)++; //increase backoff level
@@ -2514,6 +2556,7 @@ namespace irstlm {
 					}
 					
 				}
+				VERBOSE(3,"lmtable::lprob(ngram) ng:|" << ng << "| END " << std::endl);
 				
 			}
 		}
@@ -2523,9 +2566,11 @@ namespace irstlm {
 	
 	
 	//return log10 probsL use cache memory
-	double lmtable::clprob(ngram ong,double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+//	double lmtable::clprob(ngram ong,double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+	double lmtable::clprob(ngram ong,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible)
 	{
-		VERBOSE(3,"double lmtable::clprob(ngram ong,double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible) ong:|" << ong  << "|\n");
+//		VERBOSE(3,"double lmtable::clprob(ngram ong,double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible) ong:|" << ong  << "|\n");
+		VERBOSE(3,"double lmtable::clprob(ngram ong,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible) ong:|" << ong  << "|\n");
 		
 #ifdef TRACE_CACHELM
 		//		if (probcache && ong.size==maxlev && sentence_id>0) {
@@ -2537,6 +2582,7 @@ namespace irstlm {
 		if (ong.size==0) {
 			if (statesize!=NULL) *statesize=0;
 			if (state!=NULL) *state=NULL;
+			if (ngramstate!=NULL) *ngramstate=NULL;
 			if (extendible!=NULL) *extendible=false;
 			return 0.0;
 		}
@@ -2554,6 +2600,7 @@ namespace irstlm {
 			if (bow) *bow = pst_get.bow;
 			if (bol) *bol = pst_get.bol;
 			if (state) *state = pst_get.state;
+			if (ngramstate) *ngramstate = pst_get.ngramstate;
 			if (statesize) *statesize = pst_get.statesize;
 			if (extendible) *extendible = pst_get.extendible;
 			
@@ -2563,12 +2610,13 @@ namespace irstlm {
 		//cache miss
 		
 		prob_and_state_t pst_add;
-		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
+		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.ngramstate), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
 		
 		
 		if (bow) *bow = pst_add.bow;
 		if (bol) *bol = pst_add.bol;
 		if (state) *state = pst_add.state;
+		if (ngramstate) *ngramstate = pst_add.ngramstate;
 		if (statesize) *statesize = pst_add.statesize;
 		if (extendible) *extendible = pst_add.extendible;
 		
@@ -2581,16 +2629,19 @@ namespace irstlm {
 		}
 		return logpr;
 #else
-		return lmtable::lprob(ong, bow, bol, state, statesize, extendible);
+//		return lmtable::lprob(ong, bow, bol, state, statesize, extendible);
+		return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible);
 #endif
 	};
 	
 	
 	//return log10 probsL use cache memory
 	//this function simulates the clprob(ngram, ...) but it takes as input an array of codes instead of the ngram
-	double lmtable::clprob(int* codes, int sz, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+//	double lmtable::clprob(int* codes, int sz, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+	double lmtable::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
 	{
-		VERBOSE(3," double lmtable::clprob(int* codes, int sz, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)\n");
+//		VERBOSE(3," double lmtable::clprob(int* codes, int sz, double* bow, int* bol, char** state, unsigned int* statesize, bool* extendible)\n");
+		VERBOSE(3," double lmtable::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible)\n");
 #ifdef TRACE_CACHELM
 		//		if (probcache && sz==maxlev && sentence_id>0) {
 		if (probcache && sentence_id>0) {
@@ -2602,6 +2653,7 @@ namespace irstlm {
 		if (sz==0) {
 			if (statesize!=NULL) *statesize=0;
 			if (state!=NULL) *state=NULL;
+			if (ngramstate!=NULL) *ngramstate=NULL;
 			if (extendible!=NULL) *extendible=false;
 			return 0.0;
 		}
@@ -2621,6 +2673,7 @@ namespace irstlm {
 			if (bow) *bow = pst_get.bow;
 			if (bol) *bol = pst_get.bol;
 			if (state) *state = pst_get.state;
+			if (ngramstate) *ngramstate = pst_get.ngramstate;
 			if (statesize) *statesize = pst_get.statesize;
 			if (extendible) *extendible = pst_get.extendible;
 			
@@ -2635,12 +2688,14 @@ namespace irstlm {
 		
 		//cache miss
 		prob_and_state_t pst_add;
-		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
+//		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
+		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.ngramstate), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
 		
 		
 		if (bow) *bow = pst_add.bow;
 		if (bol) *bol = pst_add.bol;
 		if (state) *state = pst_add.state;
+		if (ngramstate) *ngramstate = pst_add.ngramstate;
 		if (statesize) *statesize = pst_add.statesize;
 		if (extendible) *extendible = pst_add.extendible;
 		
@@ -2663,7 +2718,8 @@ namespace irstlm {
 		 logpr = lmtable::lprob(ong, bow, bol, state, statesize, extendible);
 		 return logpr;
 		 */
-		return lmtable::lprob(ong, bow, bol, state, statesize, extendible);
+//		return lmtable::lprob(ong, bow, bol, state, statesize, extendible);
+		return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible);
 #endif
 	};
 	
diff --git a/src/lmtable.h b/src/lmtable.h
index d33e6f1..ee7b9b2 100644
--- a/src/lmtable.h
+++ b/src/lmtable.h
@@ -65,6 +65,7 @@
 typedef enum {INTERNAL,QINTERNAL,LEAF,QLEAF} LMT_TYPE;
 typedef char* node;
 
+typedef unsigned int  ngram_state_t; //type for pointing to a full ngram in the table
 typedef unsigned int  table_entry_pos_t; //type for pointing to a full ngram in the table
 typedef unsigned long table_pos_t; // type for pointing to a single char in the table
 typedef unsigned char qfloat_t; //type for quantized probabilities
@@ -315,11 +316,17 @@ public:
 	void filter(const char* /* unused parameter: lmfile */) {};
 	
 	
-	virtual double  lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
-	virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-	virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+//	virtual double  lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
+//	virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+//	virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
 	
 	
+	
+	virtual double  lprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
+  virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+  virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+	
+
 	void *search(int lev,table_entry_pos_t offs,table_entry_pos_t n,int sz,int *w, LMT_ACTION action,char **found=(char **)NULL);
 	
 	int mybsearch(char *ar, table_entry_pos_t n, int size, char *key, table_entry_pos_t *idx);
@@ -342,7 +349,7 @@ public:
 	
 	virtual const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
 	virtual const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
-        virtual const char *cmaxsuffptr(int* codes, int sz, unsigned int* size=NULL);
+  virtual const char *cmaxsuffptr(int* codes, int sz, unsigned int* size=NULL);
 	
 	inline void putmem(char* ptr,int value,int offs,int size) {
 		MY_ASSERT(ptr!=NULL);
@@ -462,7 +469,6 @@ public:
 		return value;
 	};
 	
-	
 	inline float bow(node nd,LMT_TYPE ndt) {
 		int offs=LMTCODESIZE+(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
 		
diff --git a/src/ngramcache.h b/src/ngramcache.h
index dc47952..232afa9 100644
--- a/src/ngramcache.h
+++ b/src/ngramcache.h
@@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 
 #include "mempool.h"
 #include "htable.h"
+#include "util.h"
 
 #define NGRAMCACHE_t ngramcache
 
@@ -32,12 +33,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 
 typedef struct PROB_AND_STATE_ENTRY {
   double logpr;   //!< probability value of an ngram
+  ngram_state_t ngramstate;  //!< index of the largest n-gram contained in the LM table.
   char* state;  //!< the largest suffix of an n-gram contained in the LM table.
   unsigned int statesize; //!< LM statesize of an ngram
   double bow;     //!< backoff weight
   int bol;        //!< backoff level
   bool extendible;  //!< flag for extendibility of the ngram
-  PROB_AND_STATE_ENTRY(double lp=0.0, char* st=NULL, unsigned int stsz=0, double bw=0.0, int bl=0, bool ex=false): logpr(lp), state(st), statesize(stsz), bow(bw), bol(bl), extendible(ex) {}; //initializer
+//  PROB_AND_STATE_ENTRY(double lp=0.0, char* st=NULL, unsigned int stsz=0, double bw=0.0, int bl=0, bool ex=false): logpr(lp), state(st), statesize(stsz), bow(bw), bol(bl), extendible(ex) {}; //initializer
+  PROB_AND_STATE_ENTRY(double lp=0.0, ngram_state_t ngramst=0, char* st=NULL, unsigned int stsz=0, double bw=0.0, int bl=0, bool ex=false): logpr(lp), ngramstate(ngramst), state(st), statesize(stsz), bow(bw), bol(bl), extendible(ex) {}; //initializer
 } prob_and_state_t;
 
 void print(prob_and_state_t* pst,  std::ostream& out=std::cout);
diff --git a/src/util.h b/src/util.h
index 34db77a..3db4416 100644
--- a/src/util.h
+++ b/src/util.h
@@ -50,6 +50,8 @@ using namespace std;
 #define SSEED 50
 
 class ngram;
+typedef unsigned int  ngram_state_t; //type for pointing to a full ngram in the table
+
 class mfstream;
 
 std::string gettempfolder();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git



More information about the debian-science-commits mailing list