[irstlm] 26/78: code cleanup and optimization

Tue May 17 07:47:03 UTC 2016

This is an automated email from the git hooks/post-receive script.

giuliopaci-guest pushed a commit to tag adaptiveLM.v0.10
in repository irstlm.

commit b0a4bcdbc0b67690d943f0ccdfc0c1ab36a09a4e
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date:   Mon Nov 9 09:42:34 2015 +0100

    code cleanup and optimization
---
 src/context-dependent-evaluation.cpp | 24 ++++++++----------------
 src/lmContainer.cpp                  | 22 ++++++++++++++++++++++
 src/lmContainer.h                    | 10 +++++++++-
 src/lmInterpolation.cpp              |  6 +++++-
 src/lmInterpolation.h                |  4 +++-
 5 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/src/context-dependent-evaluation.cpp b/src/context-dependent-evaluation.cpp
index 0b32f27..188b48c 100644
--- a/src/context-dependent-evaluation.cpp
+++ b/src/context-dependent-evaluation.cpp
@@ -201,8 +201,10 @@ int main(int argc, char **argv)
   lmt->setMaxLoadedLevel(requiredMaxlev);
 	
   lmt->load(infile);
-	((lmContextDependent*) lmt)->set_Active(context_model_active);
-	((lmContextDependent*) lmt)->set_Normalized(context_model_normalization);
+//	((lmContextDependent*) lmt->set_Active(context_model_active);
+	lmt->set_Active(context_model_active);
+//	((lmContextDependent*) lmt)->set_Normalized(context_model_normalization);
+	lmt->set_Normalized(context_model_normalization);
 	
   if (dub) lmt->setlogOOVpenalty((int)dub);
 	
@@ -236,18 +238,6 @@ int main(int argc, char **argv)
 		if (lmt->getLanguageModelType() != _IRSTLM_LMCONTEXTDEPENDENT) {
 			exit_error(IRSTLM_ERROR_DATA, "This type of score is not available for the LM loaded");
 		}
-		if (lmt->getLanguageModelType() == _IRSTLM_LMINTERPOLATION) {
-			debug = (debug>4)?4:debug;
-			std::cerr << "Maximum debug value for this LM type: " << debug << std::endl;
-		}
-		if (lmt->getLanguageModelType() == _IRSTLM_LMMACRO) {
-			debug = (debug>4)?4:debug;
-			std::cerr << "Maximum debug value for this LM type: " << debug << std::endl;
-		}
-		if (lmt->getLanguageModelType() == _IRSTLM_LMCLASS) {
-			debug = (debug>4)?4:debug;
-			std::cerr << "Maximum debug value for this LM type: " << debug << std::endl;
-		}
 		
 		std::cerr << "Start Topic Score generation " << std::endl;
 		std::cerr << "OOV code: " << lmt->getDict()->oovcode() << std::endl;
@@ -403,13 +393,15 @@ int main(int argc, char **argv)
 			std::string context;
 			std::string sentence_lexiconfile;
 			
-			bool withLexicon = ((lmContextDependent*) lmt)->GetSentenceAndLexicon(tmp_sentence,sentence_lexiconfile,line_str);
+//			bool withLexicon = ((lmContextDependent*) lmt)->GetSentenceAndLexicon(tmp_sentence,sentence_lexiconfile,line_str);
+			bool withLexicon = lmt->GetSentenceAndLexicon(tmp_sentence,sentence_lexiconfile,line_str);
 			bool withContext = lmt->GetSentenceAndContext(sentence,context,tmp_sentence);
 			
 			//getting apriori topic weights
 			topic_map_t apriori_topic_map;
 			if (withContext){
-				((lmContextDependent*) lmt)->setContextMap(apriori_topic_map,context);
+//				((lmContextDependent*) lmt)->setContextMap(apriori_topic_map,context);
+				lmt->setContextMap(apriori_topic_map,context);
 			}
 			// computation using std::string
 			// loop over ngrams of the sentence
diff --git a/src/lmContainer.cpp b/src/lmContainer.cpp
index 654a064..f8c8dae 100644
--- a/src/lmContainer.cpp
+++ b/src/lmContainer.cpp
@@ -180,6 +180,28 @@ namespace irstlm {
 		return false;
 	};
 	
+	bool lmContainer::GetSentenceAndLexicon(std::string& sentence, std::string& lexiconfile, std::string& line)
+	{
+		VERBOSE(2,"bool lmContextDependent::GetSentenceAndLexicon" << std::endl);
+		VERBOSE(2,"line:|" << line << "|" << std::endl);
+		bool ret;
+		size_t pos = line.find(lexicon_delimiter);	
+		if (pos != std::string::npos){ // lexicon_delimiter is found
+			sentence = line.substr(0, pos);
+			line.erase(0, pos + lexicon_delimiter.length());
+			
+			//getting context string;
+			lexiconfile = line;
+			ret=true;
+		}else{
+			sentence = line;
+			lexiconfile = "";
+			ret=false;
+		}	
+		VERBOSE(2,"sentence:|" << sentence << "|" << std::endl);	
+		VERBOSE(2,"lexicon:|" << lexiconfile << "|" << std::endl);
+		return ret;
+	}
 	bool lmContainer::GetSentenceAndContext(std::string& sentence, std::string& context, std::string& line)
 	{
 		VERBOSE(2,"bool lmContextDependent::GetSentenceAndContext" << std::endl);
diff --git a/src/lmContainer.h b/src/lmContainer.h
index 6efa9a8..cf30cea 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -318,9 +318,17 @@ namespace irstlm {
 		
 		inline std::string getContextDelimiter() const{ return context_delimiter; }
 		
+		bool GetSentenceAndLexicon(std::string& sentence, std::string& lexiconfile, std::string& line);
 		bool GetSentenceAndContext(std::string& sentence, std::string& context, std::string& line);
-		
 		void setContextMap(topic_map_t& topic_map, const std::string& context);
+		virtual inline void set_Active(bool val)
+		{
+			UNUSED(val);
+		};
+		virtual bool is_Normalized(){ return  false; };
+		virtual void set_Normalized(bool val)		{
+			UNUSED(val);
+		};
 		
 	};
 	
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index 7da53bc..c1dcc94 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -123,6 +123,7 @@ namespace irstlm {
 			VERBOSE(2,"i:" << i << " m_isinverted[i]:" << m_isinverted[i] << endl);
 			
 			m_weight[i] = atof(words[idx_weight]);
+			VERBOSE(2,"this:|" << (void*) this << "| i:" << i << " m_weight[i]:" << m_weight[i] << endl);
 			if (m_map_flag){
 				m_idx[words[idx_name]] = i;
 				m_name[i] = words[idx_name];
@@ -135,7 +136,7 @@ namespace irstlm {
 			}
 			m_file[i] = words[idx_file];
 			
-			VERBOSE(2,"lmInterpolation::load(const std::string &filename,int mmap) i:" << i << " m_name:|"<< m_name[i] << "|" " m_file:|"<< m_file[i] << "| isadaptve:|" << m_isadaptive << "|" << std::endl);
+			VERBOSE(2,"lmInterpolation::load(const std::string &filename,int mmap) i:" << i << " m_name:|"<< m_name[i] << "|" " m_file:|"<< m_file[i] << "| isadaptive:|" << m_isadaptive << "|" << std::endl);
 			
 			m_lm[i] = load_lm(i,memmap,ngramcache_load_factor,dictionary_load_factor);
 			//set the actual value for inverted flag, which is known only after loading the lM
@@ -283,6 +284,9 @@ namespace irstlm {
 		bool _extendible=false,actualextendible=false;
 		
 		for (size_t i=0; i<m_number_lm; i++) {
+			VERBOSE(2,"this:|" << (void*) this << "| i:" << i << " m_weight[i]:" << m_weight[i] << endl);
+		}
+		for (size_t i=0; i<m_number_lm; i++) {
 			
 			if (m_weight[i]>0.0){
 				ngram _ng(m_lm[i]->getDict());
diff --git a/src/lmInterpolation.h b/src/lmInterpolation.h
index 8e5e429..750adf6 100644
--- a/src/lmInterpolation.h
+++ b/src/lmInterpolation.h
@@ -89,7 +89,9 @@ namespace irstlm {
 			ngram ng(dict);
 			ng.pushw(text);
 			VERBOSE(3,"ng:|" << ng << "|" << std::endl);		
-			
+			for (size_t i=0; i<m_number_lm; i++) {
+				VERBOSE(2,"this:|" << (void*) this << "| i:" << i << " m_weight[i]:" << m_weight[i] << endl);
+			}
 			MY_ASSERT (ng.size == (int) text.size());
 			return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow);
 		}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git