[irstlm] 40/126: minor changes and code cleanup related to usage of log_10 and ln for probabilities

Tue May 17 07:46:43 UTC 2016

This is an automated email from the git hooks/post-receive script.

giuliopaci-guest pushed a commit to annotated tag adaptiveLM.v0.1
in repository irstlm.

commit 1482cd32fc46ac1b1ce3a3647e5efb74ff6f453c
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date:   Tue Aug 4 13:52:13 2015 +0200

    minor changes and code cleanup related to usage of log_10 and ln for probabilities
---
 src/compile-lm.cpp      | 13 +++++++------
 src/interplm.cpp        |  2 +-
 src/interpolate-lm.cpp  | 21 +++++++++++----------
 src/lmInterpolation.cpp | 12 +++++++-----
 src/lmtable.h           |  5 +++--
 src/ngt.cpp             |  5 +++--
 src/quantize-lm.cpp     |  8 ++------
 7 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/src/compile-lm.cpp b/src/compile-lm.cpp
index f3c534b..e5033e5 100644
--- a/src/compile-lm.cpp
+++ b/src/compile-lm.cpp
@@ -370,8 +370,8 @@ int main(int argc, char **argv)
           Nw++;
           sent_Nw++;
           if (sent_PP_flag && (*ng.wordp(1)==eos)) {
-            sent_PP=exp((-sent_logPr * log(10.0)) /sent_Nw);
-            sent_PPwp= sent_PP * (1 - 1/exp((sent_Noov *  lmt->getlogOOVpenalty()) * log(10.0) / sent_Nw));
+            sent_PP=exp((-sent_logPr * M_LN10) /sent_Nw);
+            sent_PPwp= sent_PP * (1 - 1/exp((sent_Noov *  lmt->getlogOOVpenalty()) * M_LN10 / sent_Nw));
 
             std::cout << "%% sent_Nw=" << sent_Nw
                       << " sent_PP=" << sent_PP
@@ -393,9 +393,9 @@ int main(int argc, char **argv)
         }
       }
 
-      PP=exp((-logPr * log(10.0)) /Nw);
+      PP=exp((-logPr * M_LN10) /Nw);
 
-      PPwp= PP * (1 - 1/exp((Noov *  lmt->getlogOOVpenalty()) * log(10.0) / Nw));
+      PPwp= PP * (1 - 1/exp((Noov *  lmt->getlogOOVpenalty()) * M_LN10 / Nw));
 
       std::cout << "%% Nw=" << Nw
                 << " PP=" << PP
@@ -448,10 +448,11 @@ int main(int argc, char **argv)
           std::cerr << ".";
           lmt->check_caches_levels();
         }
-        std::cout << ng << " p= " << lmt->clprob(ng,&bow,&bol) * M_LN10;
+				//pay attention: lmt->clprob(ng,&bow,&bol) is a log10(prob(ng), hence lmt->clprob(ng,&bow,&bol) * M_LN10   is a ln(prob(ng)
+        std::cout << ng << " ln_p= " << lmt->clprob(ng,&bow,&bol) * M_LN10;
         std::cout << " bo= " << bol << std::endl;
       } else {
-        std::cout << ng << " p= NULL" << std::endl;
+        std::cout << ng << " ln_p= NULL" << std::endl;
       }
       std::cout << "> ";
     }
diff --git a/src/interplm.cpp b/src/interplm.cpp
index c671409..b313f65 100644
--- a/src/interplm.cpp
+++ b/src/interplm.cpp
@@ -399,7 +399,7 @@ void interplm::test_txt(char* filename,int size,bool /* unused parameter: backof
       pr=prob(ng,ng.size);
 
       if (outpr)
-        outp << ng << "[" << ng.size << "-gram]" << " " << pr << " " << log(pr)/log(10.0) << std::endl;
+        outp << ng << "[" << ng.size << "-gram]" << " " << pr << " " << log10(pr) << std::endl;
 
       lp-=log(pr);
 
diff --git a/src/interpolate-lm.cpp b/src/interpolate-lm.cpp
index d3071b3..9b10cae 100644
--- a/src/interpolate-lm.cpp
+++ b/src/interpolate-lm.cpp
@@ -424,8 +424,8 @@ int main(int argc, char **argv)
             if (*ong.wordp(1) != lmt[i]->getDict()->oovcode()) OOV_all_flag=false; //OOV wrt LM[i]
             if (*ong.wordp(1) == lmt[i]->getDict()->oovcode()) OOV_any_flag=true; //OOV wrt LM[i]
           }
-
-          lPr=log(Pr)/M_LN10;
+					
+          lPr=log10(Pr);
           logPr+=lPr;
           sent_logPr+=lPr;
 
@@ -433,9 +433,9 @@ int main(int argc, char **argv)
             std::cout << ng.dict->decode(*ng.wordp(1)) << " [" << ng.size-minbol << "]" << " ";
             if (*ng.wordp(1)==eos) std::cout << std::endl;
           }
-          if (debug==2)
-            std::cout << ng << " [" << ng.size-minbol << "-gram]" << " " << log(Pr) << std::endl;
-
+          if (debug==2){ //pay attention: log-prob is ln(Pr)
+            std::cout << ng << " [" << ng.size-minbol << "-gram]" << " ln_p=" << log(Pr) << std::endl;
+					}
           if (minbol) {
             Nbo++;  //all LMs have back-offed by at least one
             sent_Nbo++;
@@ -454,7 +454,7 @@ int main(int argc, char **argv)
           sent_Nw++;
 
           if (*ng.wordp(1)==eos && sent_PP_flag) {
-            sent_PP=exp((-sent_logPr * log(10.0)) /sent_Nw);
+            sent_PP=exp((-sent_logPr * M_LN10) / sent_Nw);
             std::cout << "%% sent_Nw=" << sent_Nw
                       << " sent_PP=" << sent_PP
                       << " sent_Nbo=" << sent_Nbo
@@ -519,15 +519,16 @@ int main(int argc, char **argv)
         for (i=0; i<N; i++) {
           ngram ong(lmt[i]->getDict());
           ong.trans(ng);
-          logpr = lmt[i]->clprob(ong,&bow,&bol,NULL,&statesize); //LM log-prob (using caches if available)
+          logpr = lmt[i]->clprob(ong,&bow,&bol,NULL,&statesize); //LM log10-prob (using caches if available)
 
           Pr+=w[i] * pow(10.0,logpr); //actual prob of the interpolation
-          std::cout << "lm " << i << ":" << " logpr: " << logpr << " weight: " << w[i] << std::endl;
+          std::cout << "lm " << i << ":" << " log10_pr: " << logpr << " weight: " << w[i] << std::endl;
           if (maxbol<bol) maxbol=bol;
           if (maxstatesize<statesize) maxstatesize=statesize;
         }
-
-        std::cout << ng << " p= " << log(Pr) << " bo= " << maxbol << " recombine= " << maxstatesize << std::endl;
+				
+				//pay attention: log-prob is ln(Pr)
+        std::cout << ng << " ln_p= " << log(Pr) << " log10_p= " << log10(Pr) << " bo= " << maxbol << " recombine= " << maxstatesize << std::endl;
 
         if ((n % 10000000)==0) {
           std::cerr << "." << std::endl;
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index de66a96..a214f78 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -147,7 +147,7 @@ lmContainer* lmInterpolation::load_lm(int i,int memmap, float nlf, float dlf)
   return lmt;
 }
 
-
+//return log10 prob of an ngram
 double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
 {
 	
@@ -213,7 +213,7 @@ double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,
 	 if (bow) std::cerr << " bow:" << *bow << std::endl;
 	 if (bol) std::cerr << " bol:" << *bol << std::endl;
 	 */
-  return log(pr)/M_LN10;
+  return log10(pr);
 }
 
 double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
@@ -234,10 +234,12 @@ double lmInterpolation::setlogOOVpenalty(int dub)
   double OOVpenalty=0.0;
   for (int i=0; i<m_number_lm; i++) {
     m_lm[i]->setlogOOVpenalty(dub);  //set OOV Penalty for each LM
-    _logpr=m_lm[i]->getlogOOVpenalty();
-    OOVpenalty+=m_weight[i]*exp(_logpr);
+    _logpr=m_lm[i]->getlogOOVpenalty(); // logOOV penalty is in log10
+//    OOVpenalty+=m_weight[i]*exp(_logpr);
+    OOVpenalty+=m_weight[i]*exp(_logpr*M_LN10);  // logOOV penalty is in log10
   }
-  logOOVpenalty=log(OOVpenalty);
+//  logOOVpenalty=log(OOVpenalty);
+  logOOVpenalty=log10(OOVpenalty);
   return logOOVpenalty;
 }
 }//namespace irstlm
diff --git a/src/lmtable.h b/src/lmtable.h
index 07e25d0..a6c8539 100644
--- a/src/lmtable.h
+++ b/src/lmtable.h
@@ -244,15 +244,16 @@ public:
 	
 	void configure(int n,bool quantized);
 	
-	//set penalty for OOV words
+	//returns log10 penalty for OOV words
 	inline double getlogOOVpenalty() const {
 		return logOOVpenalty;
 	}
 	
+	//set penalty for OOV words, as log10
 	inline double setlogOOVpenalty(int dub) {
 		MY_ASSERT(dub > dict->size());
 		dictionary_upperbound = dub;
-		return logOOVpenalty=log((double)(dictionary_upperbound - dict->size()))/M_LN10;
+		return logOOVpenalty=log10((double)(dictionary_upperbound - dict->size()));
 	}
 	
 	inline double setlogOOVpenalty(double oovp) {
diff --git a/src/ngt.cpp b/src/ngt.cpp
index 511e693..321f67c 100644
--- a/src/ngt.cpp
+++ b/src/ngt.cpp
@@ -421,12 +421,13 @@ int main(int argc, char **argv)
     }
 
     PP=exp(-logPr/Nw);
-    PPwp= PP * exp(Noov * log(10000000.0-ngt->dict->size())/Nw);
+		int dub=10000000;
+    PPwp= PP * exp(Noov * log((double) dub - ngt->dict->size())/Nw);
 
     cout << "%%% NGT TEST OF SMT LM\n";
     cout << "%% LM=" << inp << " SIZE="<< ngt->maxlevel();
     cout << "   TestFile="<< ftlm << "\n";
-    cout << "%% OOV PENALTY = 1/" << 10000000.0-ngt->dict->size() << "\n";
+    cout << "%% OOV PENALTY = 1/" << (dub - ngt->dict->size()) << "\n";
 
 
     cout << "%% Nw=" << Nw << " PP=" << PP << " PPwp=" << PPwp
diff --git a/src/quantize-lm.cpp b/src/quantize-lm.cpp
index d713349..7f18c41 100644
--- a/src/quantize-lm.cpp
+++ b/src/quantize-lm.cpp
@@ -414,11 +414,7 @@ int main(int argc, char **argv)
 
 int ComputeCluster(int centers,double* ctrs,unsigned int N,DataItem* bintable)
 {
-
-
   //cerr << "\nExecuting Clutering Algorithm:  k=" << centers<< "\n";
-  double log10=log(10.0);
-
   for (unsigned int i=0; i<N; i++) bintable[i].code=0;
 
   //cout << "start sort \n";
@@ -475,13 +471,13 @@ int ComputeCluster(int centers,double* ctrs,unsigned int N,DataItem* bintable)
 
     MY_ASSERT(bintable[i].code < centers);
 
-    ctrs[bintable[i].code]=ctrs[bintable[i].code]+exp(bintable[i].pt * log10);
+    ctrs[bintable[i].code]=ctrs[bintable[i].code]+exp(bintable[i].pt * M_LN10);
 
   }
 
   for (int i=0; i<centers; i++) {
     if (population[i]>0)
-      ctrs[i]=log(ctrs[i]/population[i])/log10;
+      ctrs[i]=log10(ctrs[i]/population[i]);
     else
       ctrs[i]=-99;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git