[irstlm] 22/126: minor changes; code cleanup; re-indentation

Giulio Paci giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:46:41 UTC 2016


This is an automated email from the git hooks/post-receive script.

giuliopaci-guest pushed a commit to annotated tag adaptiveLM.v0.1
in repository irstlm.

commit bb7b80032d4032bfea8f8347841700a7fb8e0cf5
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date:   Wed Jul 22 16:38:24 2015 +0200

    minor changes; code cleanup; re-indentation
---
 src/lmContextDependent.cpp | 211 +++++++++++++++++-------------------
 src/lmContextDependent.h   | 260 +++++++++++++++++++++++----------------------
 2 files changed, 231 insertions(+), 240 deletions(-)

diff --git a/src/lmContextDependent.cpp b/src/lmContextDependent.cpp
index 59ccbc6..9beb720 100644
--- a/src/lmContextDependent.cpp
+++ b/src/lmContextDependent.cpp
@@ -30,7 +30,7 @@
 #include "util.h"
 
 using namespace std;
-	
+
 inline void error(const char* message)
 {
   std::cerr << message << "\n";
@@ -38,120 +38,107 @@ inline void error(const char* message)
 }
 
 namespace irstlm {
-lmContextDependent::lmContextDependent(float nlf, float dlf)
-{
-  ngramcache_load_factor = nlf;
-  dictionary_load_factor = dlf;
-  m_lm=NULL;
-  m_topicmodel=NULL;
+	lmContextDependent::lmContextDependent(float nlf, float dlf)
+	{
+		ngramcache_load_factor = nlf;
+		dictionary_load_factor = dlf;
+		m_lm=NULL;
+		m_topicmodel=NULL;
+		
+		order=0;
+		memmap=0;
+		isInverted=false;
+		
+	}
 	
-  order=0;
-  memmap=0;
-  isInverted=false;
-
-}
-
-lmContextDependent::~lmContextDependent()
-{
-  if (m_lm) delete m_lm;
-  if (m_topicmodel) delete m_topicmodel;
-}
-
-void lmContextDependent::load(const std::string &filename,int mmap)
-{
-  VERBOSE(2,"lmContextDependent::load(const std::string &filename,int memmap)" << std::endl);
-  VERBOSE(2," filename:|" << filename << "|" << std::endl);
+	lmContextDependent::~lmContextDependent()
+	{
+		if (m_lm) delete m_lm;
+		if (m_topicmodel) delete m_topicmodel;
+	}
 	
+	void lmContextDependent::load(const std::string &filename,int mmap)
+	{
+		VERBOSE(2,"lmContextDependent::load(const std::string &filename,int memmap)" << std::endl);
+		VERBOSE(2," filename:|" << filename << "|" << std::endl);
+		
+		dictionary_upperbound=1000000;
+		int memmap=mmap;
+		
+		//get info from the configuration file
+		fstream inp(filename.c_str(),ios::in|ios::binary);
+		
+		char line[MAX_LINE];
+		const char* words[LMCONFIGURE_MAX_TOKEN];
+		int tokenN;
+		inp.getline(line,MAX_LINE,'\n');
+		tokenN = parseWords(line,words,LMCONFIGURE_MAX_TOKEN);
+		
+		if (tokenN != 1 || ((strcmp(words[0],"LMCONTEXTDEPENDENT") != 0) && (strcmp(words[0],"lmcontextdependent")!=0)))
+			error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
+		
+		//reading ngram-based LM
+		inp.getline(line,BUFSIZ,'\n');
+		tokenN = parseWords(line,words,2);
+		if(tokenN < 2 || tokenN > 2) {
+			error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
+		}
+		
+		//loading ngram-based LM and initialization
+		m_lm_weight = (float) atof(words[0]);
+		
+		//checking the language model type
+		m_lm=lmContainer::CreateLanguageModel(words[1],ngramcache_load_factor,dictionary_load_factor);
+		
+		m_lm->setMaxLoadedLevel(requiredMaxlev);
+		
+		m_lm->load(words[1], memmap);
+		maxlev=m_lm->maxlevel();
+		dict=m_lm->getDict();
+		getDict()->genoovcode();
+		
+		m_lm->init_caches(m_lm->maxlevel());		
+		
+		//reading topic model
+		inp.getline(line,BUFSIZ,'\n');
+		tokenN = parseWords(line,words,2);
+		
+		if(tokenN < 2 || tokenN > 2) {
+			error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
+		}
+		
+		//loading topic model and initialization
+		m_topicmodel_weight = (float) atof(words[0]);
+		//m_topic_model = new  xxxxxxxxxxxxxxxx
+		
+		inp.close();
+	}
 	
-  dictionary_upperbound=1000000;
-  int memmap=mmap;
+	double lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	{
+		double lm_prob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
+		double topic_prob = 0.0;  // to_CHECK
+		double ret_prob = m_lm_weight * lm_prob + m_topicmodel_weight * topic_prob;
+		
+		return ret_prob;
+	}
 	
-  //get info from the configuration file
-  fstream inp(filename.c_str(),ios::in|ios::binary);
+	double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	{
+		//create the actual ngram
+		ngram ong(dict);
+		ong.pushc(codes,sz);
+		MY_ASSERT (ong.size == sz);
+		
+		return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);	
+	}
 	
-  char line[MAX_LINE];
-  const char* words[LMCONFIGURE_MAX_TOKEN];
-  int tokenN;
-  inp.getline(line,MAX_LINE,'\n');
-  tokenN = parseWords(line,words,LMCONFIGURE_MAX_TOKEN);
-	
-  if (tokenN != 2 || ((strcmp(words[0],"LMCONTEXTDEPENDENT") != 0) && (strcmp(words[0],"lmcontextdependent")!=0)))
-    error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
-	
-//reading ngram-based LM
-  inp.getline(line,BUFSIZ,'\n');
-  tokenN = parseWords(line,words,3);
-
-  if(tokenN < 2 || tokenN >3) {
-    error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
-  }
-
-  //check whether the (textual) LM has to be loaded as inverted
-  m_isinverted = false;
-  if(tokenN == 3) {
-    if (strcmp(words[2],"inverted") == 0)
-      m_isinverted = true;
-  }
-  VERBOSE(2,"m_isinverted:" << m_isinverted << endl);
-
-  m_lm_weight = (float) atof(words[0]);
-
-  //checking the language model type
-  m_lm=lmContainer::CreateLanguageModel(words[1],ngramcache_load_factor,dictionary_load_factor);
-
-  //let know that table has inverted n-grams
-  m_lm->is_inverted(m_isinverted);  //set inverted flag for each LM
-
-  m_lm->setMaxLoadedLevel(requiredMaxlev);
-
-  m_lm->load(words[1], memmap);
-  dict=m_lm->getDict();
-  getDict()->genoovcode();
-
-  m_lm->init_caches(m_lm->maxlevel());
-
-
-//reading bigram-base topic model
-  inp.getline(line,BUFSIZ,'\n');
-  tokenN = parseWords(line,words,3);
-
-  if(tokenN < 2 || tokenN >3) {
-    error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
-  }
-
-  //loading topic model and initialization
-  m_topicmodel_weight = (float) atof(words[0]);
-  //m_topic_model = new  xxxxxxxxxxxxxxxx
-
-
-  inp.close();
-}
-
-double lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
-{
-  double lm_prob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
-  double topic_prob = 0.0;  // to_CHECK
-  double ret_prob = m_lm_weight * lm_prob + m_topicmodel_weight * topic_prob;
-
-  return ret_prob;
-}
-
-double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
-{
-  //create the actual ngram
-  ngram ong(dict);
-  ong.pushc(codes,sz);
-  MY_ASSERT (ong.size == sz);
-
-  return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);	
-}
-
-double lmContextDependent::setlogOOVpenalty(int dub)
-{
-  MY_ASSERT(dub > dict->size());
-  m_lm->setlogOOVpenalty(dub);  //set OOV Penalty by means of DUB
-  double OOVpenalty = m_lm->getlogOOVpenalty();  //get OOV Penalty
-  logOOVpenalty=log(OOVpenalty);
-  return logOOVpenalty;
-}
+	double lmContextDependent::setlogOOVpenalty(int dub)
+	{
+		MY_ASSERT(dub > dict->size());
+		m_lm->setlogOOVpenalty(dub);  //set OOV Penalty by means of DUB
+		double OOVpenalty = m_lm->getlogOOVpenalty();  //get OOV Penalty
+		logOOVpenalty=log(OOVpenalty);
+		return logOOVpenalty;
+	}
 }//namespace irstlm
diff --git a/src/lmContextDependent.h b/src/lmContextDependent.h
index bf5a12f..2ca6b9e 100644
--- a/src/lmContextDependent.h
+++ b/src/lmContextDependent.h
@@ -1,24 +1,24 @@
 // $Id: lmContextDependent.h 3686 2010-10-15 11:55:32Z bertoldi $
 
 /******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ 
+ ******************************************************************************/
 
 #ifndef MF_LMCONTEXTDEPENDENT_H
 #define MF_LMCONTEXTDEPENDENT_H
@@ -34,118 +34,122 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 #include "lmContainer.h"
 
 namespace irstlm {
-/*
-Context-dependent LM
-Wrapper LM which combines a standard ngram-based word-based LM
-and a bigram-based topic model 
-*/
-
+	/*
+	 Context-dependent LM
+	 Wrapper LM which combines a standard ngram-based word-based LM
+	 and a bigram-based topic model 
+	 */
+	
 #define LMCONFIGURE_MAX_TOKEN 3
-
-class lmContextDependent: public lmContainer
-{
-  static const bool debug=true;
-  int order;
-  int dictionary_upperbound; //set by user
-  double  logOOVpenalty; //penalty for OOV words (default 0)
-  bool      isInverted;
-  int memmap;  //level from which n-grams are accessed via mmap
-
-  lmContainer* m_lm;
-  std::string m_lm_file;
-  bool m_isinverted;
-
-//  TopicModel* m_topicmodel;
-  lmContainer* m_topicmodel;   //to remove when TopicModel is ready
-  double m_lm_weight;
-
-  double m_topicmodel_weight;
-  std::string m_topicmodel_file;
-
-  float ngramcache_load_factor;
-  float dictionary_load_factor;
-
-  dictionary *dict; // dictionary for all interpolated LMs
-
-public:
-
-  lmContextDependent(float nlf=0.0, float dlfi=0.0);
-  virtual ~lmContextDependent();
-
-  void load(const std::string &filename,int mmap=0);
-
-  virtual double clprob(ngram ng,            double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
-        VERBOSE(0, "This LM type (lmContextDependent) does not support this function");
-        UNUSED(ng);
-        UNUSED(bow);
-        UNUSED(bol);
-        UNUSED(maxsuffptr);
-        UNUSED(statesize);
-        UNUSED(extendible);
-        assert(false);
-  };
-
-  virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
-	VERBOSE(0, "This LM type (lmContextDependent) does not support this function");
-        UNUSED(ng);
-        UNUSED(ngsize);
-        UNUSED(bow);
-        UNUSED(bol);
-        UNUSED(maxsuffptr);
-        UNUSED(statesize);
-        UNUSED(extendible);
-	assert(false);
-  };
-
-  virtual double clprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
-	return lprob(ng, ngsize, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
-  };
-  virtual double lprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-  virtual double lprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-
-  int maxlevel() const {
-    return maxlev;
-  };
-
-  virtual inline void setDict(dictionary* d) {
-    if (dict) delete dict;
-    dict=d;
-  };
 	
-  virtual inline dictionary* getDict() const {
-    return dict;
-  };
-
-  //set penalty for OOV words
-  virtual inline double getlogOOVpenalty() const {
-    return logOOVpenalty;
-  }
-
-  virtual double setlogOOVpenalty(int dub);
-
-  double inline setlogOOVpenalty(double oovp) {
-    return logOOVpenalty=oovp;
-  }
-
-//set the inverted flag
-  inline bool is_inverted(const bool flag) {
-    return isInverted = flag;
-  }
-
-//for an interpolation LM this variable does not make sense
-//for compatibility, we return true if all subLM return true
-  inline bool is_inverted() {
-    return m_isinverted;
-  }
-
-  inline virtual void dictionary_incflag(const bool flag) {
-    dict->incflag(flag);
-  };
-
-  inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
-    return m_lm->is_OOV(code);
-  }
-};
+	class lmContextDependent: public lmContainer
+	{
+		static const bool debug=true;
+		int order;
+		int dictionary_upperbound; //set by user
+		double  logOOVpenalty; //penalty for OOV words (default 0)
+		bool      isInverted;
+		int memmap;  //level from which n-grams are accessed via mmap
+		
+		lmContainer* m_lm;
+		std::string m_lm_file;
+		bool m_isinverted;
+		
+		//  TopicModel* m_topicmodel;
+		lmContainer* m_topicmodel;   //to remove when TopicModel is ready
+		double m_lm_weight;
+		
+		double m_topicmodel_weight;
+		std::string m_topicmodel_file;
+		
+		float ngramcache_load_factor;
+		float dictionary_load_factor;
+		
+		dictionary *dict; // dictionary for all interpolated LMs
+		
+	public:
+		
+		lmContextDependent(float nlf=0.0, float dlfi=0.0);
+		virtual ~lmContextDependent();
+		
+		void load(const std::string &filename,int mmap=0);
+		
+		virtual double clprob(ngram ng,            double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+			VERBOSE(0, "virtual double clprob(ngram ng,            double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
+			VERBOSE(0, "This LM type (lmContextDependent) does not support this function");
+			UNUSED(ng);
+			UNUSED(bow);
+			UNUSED(bol);
+			UNUSED(maxsuffptr);
+			UNUSED(statesize);
+			UNUSED(extendible);
+			assert(false);
+		};
+		
+		virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+			VERBOSE(0, "virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
+			UNUSED(ng);
+			UNUSED(ngsize);
+			UNUSED(bow);
+			UNUSED(bol);
+			UNUSED(maxsuffptr);
+			UNUSED(statesize);
+			UNUSED(extendible);
+			assert(false);
+		};
+		
+		virtual double clprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+			return lprob(ng, ngsize, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+		};
+		virtual double clprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+			return lprob(ng, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+		};
+		virtual double lprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+		virtual double lprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+		
+		int maxlevel() const {
+			return maxlev;
+		};
+		
+		virtual inline void setDict(dictionary* d) {
+			if (dict) delete dict;
+			dict=d;
+		};
+		
+		virtual inline dictionary* getDict() const {
+			return dict;
+		};
+		
+		//set penalty for OOV words
+		virtual inline double getlogOOVpenalty() const {
+			return logOOVpenalty;
+		}
+		
+		virtual double setlogOOVpenalty(int dub);
+		
+		double inline setlogOOVpenalty(double oovp) {
+			return logOOVpenalty=oovp;
+		}
+		
+		//set the inverted flag
+		inline bool is_inverted(const bool flag) {
+			return isInverted = flag;
+		}
+		
+		//for an interpolation LM this variable does not make sense
+		//for compatibility, we return true if all subLM return true
+		inline bool is_inverted() {
+			return m_isinverted;
+		}
+		
+		inline virtual void dictionary_incflag(const bool flag) {
+			dict->incflag(flag);
+		};
+		
+		inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
+			return m_lm->is_OOV(code);
+		}
+	};
 }//namespace irstlm
 
 #endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git



More information about the debian-science-commits mailing list