[irstlm] 118/126: some fixes

Tue May 17 07:46:51 UTC 2016

This is an automated email from the git hooks/post-receive script.

giuliopaci-guest pushed a commit to annotated tag adaptiveLM.v0.1
in repository irstlm.

commit 7d818ddf7474427539f5bc7d841356f969b3a057
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date:   Tue Oct 20 00:34:36 2015 +0200

    some fixes
---
 src/context-dependent-evaluation.cpp | 15 ++++----
 src/lmContainer.h                    | 18 ++++++++++
 src/lmContextDependent.cpp           | 70 ++++++++++++++++++++++++++++++------
 src/lmContextDependent.h             | 18 ++++++++--
 src/lmInterpolation.cpp              |  4 ++-
 5 files changed, 102 insertions(+), 23 deletions(-)

diff --git a/src/context-dependent-evaluation.cpp b/src/context-dependent-evaluation.cpp
index e7d738c..6d575bf 100644
--- a/src/context-dependent-evaluation.cpp
+++ b/src/context-dependent-evaluation.cpp
@@ -266,7 +266,7 @@ int main(int argc, char **argv)
 				
 				// reset ngram at begin of sentence
 				if (word_vec.at(i) == lmt->getDict()->BoS()) {
-					size=0;
+					size=1;
 					continue;
 				}
 				first = last - size;
@@ -389,9 +389,6 @@ int main(int argc, char **argv)
 			string_vec_t word_vec;
 			split(sentence, ' ', word_vec);
 			
-			//add the BoS symbol at the beginning
-			string_vec_t::iterator it = word_vec.insert ( word_vec.begin() , lmt->getDict()->BoS() );
-			
 			//first points to the last recent term to take into account
 			//last points to the position after the most recent term to take into account
 			//last could point outside the vector of string; do NOT use word_vec.at(last)
@@ -399,21 +396,21 @@ int main(int argc, char **argv)
 			size_t order = lmt->maxlevel();
 			
 			//start the computation from the second word because the first is the BoS symbol,but including BoS in the ngrams
-			size_t size=1;
-			for (size_t i=1; i< word_vec.size(); ++i){
+			size_t size=0;
+			for (size_t i=0; i< word_vec.size(); ++i){
 				++size;
 				size=(size<order)?size:order;
 				last=i+1;
 				
 				// reset ngram at begin of sentence
 				if (word_vec.at(i) == lmt->getDict()->BoS()) {
-					size=0;
+					size=1;
 					continue;
 				}
 				first = last - size;
 				
 				string_vec_t tmp_word_vec(word_vec.begin() + first, word_vec.begin() +last);
-				
+			
 				if (size>=1) {
 					VERBOSE(2,"computing prob for first:|" << first << "| and last:|" << last << "|" << std::endl);
 					
@@ -813,7 +810,7 @@ int main(int argc, char **argv)
 				
 				// reset ngram at begin of sentence
 				if (word_vec.at(word_pos) == lmt->getDict()->BoS()) {
-					size=0;
+					size=1;
 					continue;
 				}
 				first = last - size;
diff --git a/src/lmContainer.h b/src/lmContainer.h
index b8ac737..a89ccb0 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -180,6 +180,24 @@ public:
   }
 	
 	
+  virtual double clprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+    UNUSED(lm_weights);
+    UNUSED(topic_weights);
+    return clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
+  };
+  virtual double clprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+    UNUSED(lm_weights);
+    UNUSED(topic_weights);
+    return clprob(ng, ngsize, bow, bol, maxsuffptr, statesize, extendible);
+  }
+	virtual double clprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+		VERBOSE(3,"lmContainer::clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,...." << std::endl);
+    UNUSED(lm_weights);
+    UNUSED(topic_weights);
+    return clprob(text, bow, bol, maxsuffptr, statesize, extendible);
+  }
+	
+	
   virtual const char *cmaxsuffptr(ngram ng, unsigned int* statesize=NULL)
   {
     UNUSED(ng);
diff --git a/src/lmContextDependent.cpp b/src/lmContextDependent.cpp
index 9fc530a..a230d23 100644
--- a/src/lmContextDependent.cpp
+++ b/src/lmContextDependent.cpp
@@ -159,6 +159,17 @@ namespace irstlm {
 		return lprob(ng, text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
 	}
 	
+	double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	{
+		VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, " << std::endl);
+		//create the actual ngram
+		ngram ong(dict);
+		ong.pushc(codes,sz);
+		MY_ASSERT (ong.size == sz);
+		
+		return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);	
+	}
+	
 	double lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
 	{
 		VERBOSE(2,"lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, ...)" << std::endl);
@@ -172,6 +183,43 @@ namespace irstlm {
 		return lprob(ng, text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
 	}
 	
+	
+	double lmContextDependent::lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	{
+		VERBOSE(2,"lmContextDependent::lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
+		string_vec_t text;
+		if (ng.size>1){
+			text.push_back(ng.dict->decode(*ng.wordp(2)));
+		}
+		text.push_back(ng.dict->decode(*ng.wordp(1)));
+		
+		return lprob(ng, text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+	}
+	
+	double lmContextDependent::lprob(int* codes, int sz, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	{
+		VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, lm_map_t& lm_weights, topic_map_t& topic_weights, " << std::endl);
+		//create the actual ngram
+		ngram ong(dict);
+		ong.pushc(codes,sz);
+		MY_ASSERT (ong.size == sz);
+		
+		return lprob(ong, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);	
+	}
+	
+	double lmContextDependent::lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	{
+		VERBOSE(2,"lmContextDependent::lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
+		
+		//create the actual ngram
+		ngram ng(dict);
+		ng.pushw(text);
+		VERBOSE(3,"ng:|" << ng << "|" << std::endl);		
+		
+		MY_ASSERT (ng.size == (int) text.size());
+		return lprob(ng, text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+	}
+	
 	double lmContextDependent::lprob(ngram& ng, string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
 	{
 		VERBOSE(2,"lmContextDependent::lprob(ngram& ng, topic_map_t& topic_weights, ...)" << std::endl);
@@ -183,6 +231,17 @@ namespace irstlm {
 		return ret_logprob;
 	}
 	
+	double lmContextDependent::lprob(ngram& ng, string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	{
+		VERBOSE(2,"lmContextDependent::lprob(ngram& ng, lm_map_t& lm_weights, topic_map_t& topic_weights, ...)" << std::endl);
+		double lm_logprob = m_lm->clprob(ng, lm_weights, bow, bol, maxsuffptr, statesize, extendible);
+		double similarity_score = m_similaritymodel->context_similarity(text, topic_weights);
+		double ret_logprob = lm_logprob + m_similaritymodel_weight * similarity_score;
+		VERBOSE(2, "lm_log10_pr:" << lm_logprob << " similarity_score:" << similarity_score << " m_similaritymodel_weight:" << m_similaritymodel_weight << " ret_log10_pr:" << ret_logprob << std::endl);
+		
+		return ret_logprob;
+	}
+	
 	
 	double lmContextDependent::total_clprob(string_vec_t& text, topic_map_t& topic_weights)
 	{		
@@ -215,17 +274,6 @@ namespace irstlm {
 		return log10(tot_pr);
 	}
 	
-	double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
-	{
-		VERBOSE(3,"lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, " << std::endl);
-		//create the actual ngram
-		ngram ong(dict);
-		ong.pushc(codes,sz);
-		MY_ASSERT (ong.size == sz);
-		
-		return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);	
-	}
-	
 	double lmContextDependent::setlogOOVpenalty(int dub)
 	{
 		MY_ASSERT(dub > dict->size());
diff --git a/src/lmContextDependent.h b/src/lmContextDependent.h
index 3e6a072..6e6bb53 100644
--- a/src/lmContextDependent.h
+++ b/src/lmContextDependent.h
@@ -115,7 +115,6 @@ namespace irstlm {
 			UNUSED(extendible);
 			assert(false);
 		};
-		
 		virtual double clprob(ngram ng,            double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
 			VERBOSE(0, "virtual double clprob(ngram ng,            double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
 			UNUSED(ng);
@@ -126,7 +125,6 @@ namespace irstlm {
 			UNUSED(extendible);
 			assert(false);
 		};
-		
 		virtual double clprob(string_vec_t& text, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
 			VERBOSE(0, "virtual double clprob(string_vec_t& text, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
 			UNUSED(text);
@@ -148,11 +146,27 @@ namespace irstlm {
 			return lprob(text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
 		};
 		
+		virtual double clprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+			return lprob(ng, ngsize, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+		};
+		virtual double clprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+			return lprob(ng, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+		};
+		virtual double clprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+			return lprob(text, lm_weights, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+		};
+		
 		virtual double lprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
 		virtual double lprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
 		virtual double lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
 		
+		virtual double lprob(int* ng, int ngsize, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+		virtual double lprob(ngram ng, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+		virtual double lprob(string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+		
 		double lprob(ngram& ng, string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible);
+		double lprob(ngram& ng, string_vec_t& text, lm_map_t& lm_weights, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible);
+		
 		double total_clprob(string_vec_t& text, topic_map_t& topic_weights);
 		double total_clprob(ngram& ng, topic_map_t& topic_weights);		
 		
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index c6deb75..18cf612 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -109,7 +109,7 @@ namespace irstlm {
 			inp.getline(line,BUFSIZ,'\n');
 			tokenN = parseWords(line,words,idx_size);
 			
-			if(tokenN < idx_file || tokenN > idx_inverted) {
+			if(tokenN < idx_file || tokenN > idx_size) {
 				exit_error(IRSTLM_ERROR_DATA, "ERROR: wrong header format of configuration file\ncorrect format:\nLMINTERPOLATION number_of_models\nweight_of_LM_1 filename_of_LM_1 [inverted]\nweight_of_LM_2 filename_of_LM_2\nor\nLMINTERPOLATION number_of_models MAP\nweight_of_LM_1 name_LM_1 filename_of_LM_1\nweight_of_LM_2 name_LM_2 filename_of_LM_2");
 			}
 			
@@ -179,6 +179,7 @@ namespace irstlm {
 	//return log10 prob of an ngram
 	double lmInterpolation::clprob(ngram ng, lm_map_t& lm_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
 	{
+		VERBOSE(1,"double lmInterpolation::clprob(ngram ng, lm_map_t& lm_weights,...)"  << std::endl);
 		
 		double pr=0.0;
 		double _logpr;
@@ -264,6 +265,7 @@ namespace irstlm {
 	//return log10 prob of an ngram
 	double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
 	{
+		VERBOSE(1,"double lmInterpolation::clprob(ngram ng, ...)"  << std::endl);
 		
 		double pr=0.0;
 		double _logpr;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git