[irstlm] 22/126: minor changes; code cleanup; re-indentation
Giulio Paci
giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:46:41 UTC 2016
This is an automated email from the git hooks/post-receive script.
giuliopaci-guest pushed a commit to annotated tag adaptiveLM.v0.1
in repository irstlm.
commit bb7b80032d4032bfea8f8347841700a7fb8e0cf5
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date: Wed Jul 22 16:38:24 2015 +0200
minor changes; code cleanup; re-indentation
---
src/lmContextDependent.cpp | 211 +++++++++++++++++-------------------
src/lmContextDependent.h | 260 +++++++++++++++++++++++----------------------
2 files changed, 231 insertions(+), 240 deletions(-)
diff --git a/src/lmContextDependent.cpp b/src/lmContextDependent.cpp
index 59ccbc6..9beb720 100644
--- a/src/lmContextDependent.cpp
+++ b/src/lmContextDependent.cpp
@@ -30,7 +30,7 @@
#include "util.h"
using namespace std;
-
+
inline void error(const char* message)
{
std::cerr << message << "\n";
@@ -38,120 +38,107 @@ inline void error(const char* message)
}
namespace irstlm {
-lmContextDependent::lmContextDependent(float nlf, float dlf)
-{
- ngramcache_load_factor = nlf;
- dictionary_load_factor = dlf;
- m_lm=NULL;
- m_topicmodel=NULL;
+ lmContextDependent::lmContextDependent(float nlf, float dlf)
+ {
+ ngramcache_load_factor = nlf;
+ dictionary_load_factor = dlf;
+ m_lm=NULL;
+ m_topicmodel=NULL;
+
+ order=0;
+ memmap=0;
+ isInverted=false;
+
+ }
- order=0;
- memmap=0;
- isInverted=false;
-
-}
-
-lmContextDependent::~lmContextDependent()
-{
- if (m_lm) delete m_lm;
- if (m_topicmodel) delete m_topicmodel;
-}
-
-void lmContextDependent::load(const std::string &filename,int mmap)
-{
- VERBOSE(2,"lmContextDependent::load(const std::string &filename,int memmap)" << std::endl);
- VERBOSE(2," filename:|" << filename << "|" << std::endl);
+ lmContextDependent::~lmContextDependent()
+ {
+ if (m_lm) delete m_lm;
+ if (m_topicmodel) delete m_topicmodel;
+ }
+ void lmContextDependent::load(const std::string &filename,int mmap)
+ {
+ VERBOSE(2,"lmContextDependent::load(const std::string &filename,int memmap)" << std::endl);
+ VERBOSE(2," filename:|" << filename << "|" << std::endl);
+
+ dictionary_upperbound=1000000;
+ int memmap=mmap;
+
+ //get info from the configuration file
+ fstream inp(filename.c_str(),ios::in|ios::binary);
+
+ char line[MAX_LINE];
+ const char* words[LMCONFIGURE_MAX_TOKEN];
+ int tokenN;
+ inp.getline(line,MAX_LINE,'\n');
+ tokenN = parseWords(line,words,LMCONFIGURE_MAX_TOKEN);
+
+ if (tokenN != 1 || ((strcmp(words[0],"LMCONTEXTDEPENDENT") != 0) && (strcmp(words[0],"lmcontextdependent")!=0)))
+ error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
+
+ //reading ngram-based LM
+ inp.getline(line,BUFSIZ,'\n');
+ tokenN = parseWords(line,words,2);
+ if(tokenN < 2 || tokenN > 2) {
+ error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
+ }
+
+ //loading ngram-based LM and initialization
+ m_lm_weight = (float) atof(words[0]);
+
+ //checking the language model type
+ m_lm=lmContainer::CreateLanguageModel(words[1],ngramcache_load_factor,dictionary_load_factor);
+
+ m_lm->setMaxLoadedLevel(requiredMaxlev);
+
+ m_lm->load(words[1], memmap);
+ maxlev=m_lm->maxlevel();
+ dict=m_lm->getDict();
+ getDict()->genoovcode();
+
+ m_lm->init_caches(m_lm->maxlevel());
+
+ //reading topic model
+ inp.getline(line,BUFSIZ,'\n');
+ tokenN = parseWords(line,words,2);
+
+ if(tokenN < 2 || tokenN > 2) {
+ error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
+ }
+
+ //loading topic model and initialization
+ m_topicmodel_weight = (float) atof(words[0]);
+ //m_topic_model = new xxxxxxxxxxxxxxxx
+
+ inp.close();
+ }
- dictionary_upperbound=1000000;
- int memmap=mmap;
+ double lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ {
+ double lm_prob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
+ double topic_prob = 0.0; // to_CHECK
+ double ret_prob = m_lm_weight * lm_prob + m_topicmodel_weight * topic_prob;
+
+ return ret_prob;
+ }
- //get info from the configuration file
- fstream inp(filename.c_str(),ios::in|ios::binary);
+ double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ {
+ //create the actual ngram
+ ngram ong(dict);
+ ong.pushc(codes,sz);
+ MY_ASSERT (ong.size == sz);
+
+ return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ }
- char line[MAX_LINE];
- const char* words[LMCONFIGURE_MAX_TOKEN];
- int tokenN;
- inp.getline(line,MAX_LINE,'\n');
- tokenN = parseWords(line,words,LMCONFIGURE_MAX_TOKEN);
-
- if (tokenN != 2 || ((strcmp(words[0],"LMCONTEXTDEPENDENT") != 0) && (strcmp(words[0],"lmcontextdependent")!=0)))
- error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
-
-//reading ngram-based LM
- inp.getline(line,BUFSIZ,'\n');
- tokenN = parseWords(line,words,3);
-
- if(tokenN < 2 || tokenN >3) {
- error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
- }
-
- //check whether the (textual) LM has to be loaded as inverted
- m_isinverted = false;
- if(tokenN == 3) {
- if (strcmp(words[2],"inverted") == 0)
- m_isinverted = true;
- }
- VERBOSE(2,"m_isinverted:" << m_isinverted << endl);
-
- m_lm_weight = (float) atof(words[0]);
-
- //checking the language model type
- m_lm=lmContainer::CreateLanguageModel(words[1],ngramcache_load_factor,dictionary_load_factor);
-
- //let know that table has inverted n-grams
- m_lm->is_inverted(m_isinverted); //set inverted flag for each LM
-
- m_lm->setMaxLoadedLevel(requiredMaxlev);
-
- m_lm->load(words[1], memmap);
- dict=m_lm->getDict();
- getDict()->genoovcode();
-
- m_lm->init_caches(m_lm->maxlevel());
-
-
-//reading bigram-base topic model
- inp.getline(line,BUFSIZ,'\n');
- tokenN = parseWords(line,words,3);
-
- if(tokenN < 2 || tokenN >3) {
- error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCONTEXTDEPENDENT\nweight_of_ngram_LM filename_of_LM\nweight_of_topic_model filename_of_topic_model");
- }
-
- //loading topic model and initialization
- m_topicmodel_weight = (float) atof(words[0]);
- //m_topic_model = new xxxxxxxxxxxxxxxx
-
-
- inp.close();
-}
-
-double lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
-{
- double lm_prob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
- double topic_prob = 0.0; // to_CHECK
- double ret_prob = m_lm_weight * lm_prob + m_topicmodel_weight * topic_prob;
-
- return ret_prob;
-}
-
-double lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
-{
- //create the actual ngram
- ngram ong(dict);
- ong.pushc(codes,sz);
- MY_ASSERT (ong.size == sz);
-
- return lprob(ong, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
-}
-
-double lmContextDependent::setlogOOVpenalty(int dub)
-{
- MY_ASSERT(dub > dict->size());
- m_lm->setlogOOVpenalty(dub); //set OOV Penalty by means of DUB
- double OOVpenalty = m_lm->getlogOOVpenalty(); //get OOV Penalty
- logOOVpenalty=log(OOVpenalty);
- return logOOVpenalty;
-}
+ double lmContextDependent::setlogOOVpenalty(int dub)
+ {
+ MY_ASSERT(dub > dict->size());
+ m_lm->setlogOOVpenalty(dub); //set OOV Penalty by means of DUB
+ double OOVpenalty = m_lm->getlogOOVpenalty(); //get OOV Penalty
+ logOOVpenalty=log(OOVpenalty);
+ return logOOVpenalty;
+ }
}//namespace irstlm
diff --git a/src/lmContextDependent.h b/src/lmContextDependent.h
index bf5a12f..2ca6b9e 100644
--- a/src/lmContextDependent.h
+++ b/src/lmContextDependent.h
@@ -1,24 +1,24 @@
// $Id: lmContextDependent.h 3686 2010-10-15 11:55:32Z bertoldi $
/******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+ ******************************************************************************/
#ifndef MF_LMCONTEXTDEPENDENT_H
#define MF_LMCONTEXTDEPENDENT_H
@@ -34,118 +34,122 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "lmContainer.h"
namespace irstlm {
-/*
-Context-dependent LM
-Wrapper LM which combines a standard ngram-based word-based LM
-and a bigram-based topic model
-*/
-
+ /*
+ Context-dependent LM
+ Wrapper LM which combines a standard ngram-based word-based LM
+ and a bigram-based topic model
+ */
+
#define LMCONFIGURE_MAX_TOKEN 3
-
-class lmContextDependent: public lmContainer
-{
- static const bool debug=true;
- int order;
- int dictionary_upperbound; //set by user
- double logOOVpenalty; //penalty for OOV words (default 0)
- bool isInverted;
- int memmap; //level from which n-grams are accessed via mmap
-
- lmContainer* m_lm;
- std::string m_lm_file;
- bool m_isinverted;
-
-// TopicModel* m_topicmodel;
- lmContainer* m_topicmodel; //to remove when TopicModel is ready
- double m_lm_weight;
-
- double m_topicmodel_weight;
- std::string m_topicmodel_file;
-
- float ngramcache_load_factor;
- float dictionary_load_factor;
-
- dictionary *dict; // dictionary for all interpolated LMs
-
-public:
-
- lmContextDependent(float nlf=0.0, float dlfi=0.0);
- virtual ~lmContextDependent();
-
- void load(const std::string &filename,int mmap=0);
-
- virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- VERBOSE(0, "This LM type (lmContextDependent) does not support this function");
- UNUSED(ng);
- UNUSED(bow);
- UNUSED(bol);
- UNUSED(maxsuffptr);
- UNUSED(statesize);
- UNUSED(extendible);
- assert(false);
- };
-
- virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- VERBOSE(0, "This LM type (lmContextDependent) does not support this function");
- UNUSED(ng);
- UNUSED(ngsize);
- UNUSED(bow);
- UNUSED(bol);
- UNUSED(maxsuffptr);
- UNUSED(statesize);
- UNUSED(extendible);
- assert(false);
- };
-
- virtual double clprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- return lprob(ng, ngsize, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
- };
- virtual double lprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double lprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-
- int maxlevel() const {
- return maxlev;
- };
-
- virtual inline void setDict(dictionary* d) {
- if (dict) delete dict;
- dict=d;
- };
- virtual inline dictionary* getDict() const {
- return dict;
- };
-
- //set penalty for OOV words
- virtual inline double getlogOOVpenalty() const {
- return logOOVpenalty;
- }
-
- virtual double setlogOOVpenalty(int dub);
-
- double inline setlogOOVpenalty(double oovp) {
- return logOOVpenalty=oovp;
- }
-
-//set the inverted flag
- inline bool is_inverted(const bool flag) {
- return isInverted = flag;
- }
-
-//for an interpolation LM this variable does not make sense
-//for compatibility, we return true if all subLM return true
- inline bool is_inverted() {
- return m_isinverted;
- }
-
- inline virtual void dictionary_incflag(const bool flag) {
- dict->incflag(flag);
- };
-
- inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
- return m_lm->is_OOV(code);
- }
-};
+ class lmContextDependent: public lmContainer
+ {
+ static const bool debug=true;
+ int order;
+ int dictionary_upperbound; //set by user
+ double logOOVpenalty; //penalty for OOV words (default 0)
+ bool isInverted;
+ int memmap; //level from which n-grams are accessed via mmap
+
+ lmContainer* m_lm;
+ std::string m_lm_file;
+ bool m_isinverted;
+
+ // TopicModel* m_topicmodel;
+ lmContainer* m_topicmodel; //to remove when TopicModel is ready
+ double m_lm_weight;
+
+ double m_topicmodel_weight;
+ std::string m_topicmodel_file;
+
+ float ngramcache_load_factor;
+ float dictionary_load_factor;
+
+ dictionary *dict; // dictionary for all interpolated LMs
+
+ public:
+
+ lmContextDependent(float nlf=0.0, float dlfi=0.0);
+ virtual ~lmContextDependent();
+
+ void load(const std::string &filename,int mmap=0);
+
+ virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ VERBOSE(0, "virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
+ VERBOSE(0, "This LM type (lmContextDependent) does not support this function");
+ UNUSED(ng);
+ UNUSED(bow);
+ UNUSED(bol);
+ UNUSED(maxsuffptr);
+ UNUSED(statesize);
+ UNUSED(extendible);
+ assert(false);
+ };
+
+ virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ VERBOSE(0, "virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
+ UNUSED(ng);
+ UNUSED(ngsize);
+ UNUSED(bow);
+ UNUSED(bol);
+ UNUSED(maxsuffptr);
+ UNUSED(statesize);
+ UNUSED(extendible);
+ assert(false);
+ };
+
+ virtual double clprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ return lprob(ng, ngsize, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ };
+ virtual double clprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ return lprob(ng, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ };
+ virtual double lprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double lprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+
+ int maxlevel() const {
+ return maxlev;
+ };
+
+ virtual inline void setDict(dictionary* d) {
+ if (dict) delete dict;
+ dict=d;
+ };
+
+ virtual inline dictionary* getDict() const {
+ return dict;
+ };
+
+ //set penalty for OOV words
+ virtual inline double getlogOOVpenalty() const {
+ return logOOVpenalty;
+ }
+
+ virtual double setlogOOVpenalty(int dub);
+
+ double inline setlogOOVpenalty(double oovp) {
+ return logOOVpenalty=oovp;
+ }
+
+ //set the inverted flag
+ inline bool is_inverted(const bool flag) {
+ return isInverted = flag;
+ }
+
+ //for an interpolation LM this variable does not make sense
+ //for compatibility, we return true if all subLM return true
+ inline bool is_inverted() {
+ return m_isinverted;
+ }
+
+ inline virtual void dictionary_incflag(const bool flag) {
+ dict->incflag(flag);
+ };
+
+ inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
+ return m_lm->is_OOV(code);
+ }
+ };
}//namespace irstlm
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git
More information about the debian-science-commits
mailing list