[irstlm] 26/126: minor changes; code cleanup; re-indentation
Giulio Paci
giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:46:41 UTC 2016
This is an automated email from the git hooks/post-receive script.
giuliopaci-guest pushed a commit to annotated tag adaptiveLM.v0.1
in repository irstlm.
commit 5995f0bdd74c67a3132e221aeec14b29ba6c0a5e
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date: Fri Jul 24 07:58:21 2015 +0200
minor changes; code cleanup; re-indentation
---
src/lmContainer.cpp | 266 ++++++++++++++++++++++-----------------------
src/lmContainer.h | 18 ++-
src/lmContextDependent.cpp | 6 +-
src/lmContextDependent.h | 33 ++++--
src/util.cpp | 2 +-
src/util.h | 4 +-
6 files changed, 181 insertions(+), 148 deletions(-)
diff --git a/src/lmContainer.cpp b/src/lmContainer.cpp
index 1a1f4f2..dc042f8 100644
--- a/src/lmContainer.cpp
+++ b/src/lmContainer.cpp
@@ -1,24 +1,24 @@
// $Id: lmContainer.cpp 3686 2010-10-15 11:55:32Z bertoldi $
/******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+ ******************************************************************************/
#include <stdio.h>
#include <cstdlib>
#include <stdlib.h>
@@ -35,7 +35,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "lmContextDependent.h"
using namespace std;
-
+
namespace irstlm {
#ifdef PS_CACHE_ENABLE
@@ -43,132 +43,132 @@ namespace irstlm {
#undef PS_CACHE_ENABLE
#endif
#endif
-
+
#ifdef LMT_CACHE_ENABLE
#if LMT_CACHE_ENABLE==0
#undef LMT_CACHE_ENABLE
#endif
#endif
-
+
#if PS_CACHE_ENABLE
-bool lmContainer::ps_cache_enabled=true;
+ bool lmContainer::ps_cache_enabled=true;
#else
-bool lmContainer::ps_cache_enabled=false;
+ bool lmContainer::ps_cache_enabled=false;
#endif
-
+
#if LMT_CACHE_ENABLE
-bool lmContainer::lmt_cache_enabled=true;
+ bool lmContainer::lmt_cache_enabled=true;
#else
-bool lmContainer::lmt_cache_enabled=false;
+ bool lmContainer::lmt_cache_enabled=false;
#endif
-
-inline void error(const char* message)
-{
- std::cerr << message << "\n";
- throw std::runtime_error(message);
-}
-
-lmContainer::lmContainer()
-{
- requiredMaxlev=1000;
- lmtype=_IRSTLM_LMUNKNOWN;
- maxlev=0;
-}
-
-int lmContainer::getLanguageModelType(std::string filename)
-{
- fstream inp(filename.c_str(),ios::in|ios::binary);
-
- if (!inp.good()) {
- std::stringstream ss_msg;
- ss_msg << "Failed to open " << filename;
- exit_error(IRSTLM_ERROR_IO, ss_msg.str());
- }
- //give a look at the header to get informed about the language model type
- std::string header;
- inp >> header;
- inp.close();
-
- VERBOSE(1,"LM header:|" << header << "|" << std::endl);
-
- int type=_IRSTLM_LMUNKNOWN;
- VERBOSE(1,"type: " << type << std::endl);
- if (header == "lminterpolation" || header == "LMINTERPOLATION") {
- type = _IRSTLM_LMINTERPOLATION;
- } else if (header == "lmcontextdependent" || header == "LMCONTEXTDEPENDENT") {
- type = _IRSTLM_LMCONTEXTDEPENDENT;
- } else if (header == "lmmacro" || header == "LMMACRO") {
- type = _IRSTLM_LMMACRO;
- } else if (header == "lmclass" || header == "LMCLASS") {
- type = _IRSTLM_LMCLASS;
- } else {
- type = _IRSTLM_LMTABLE;
- }
- VERBOSE(1,"type: " << type << std::endl);
-
- return type;
-};
-
-lmContainer* lmContainer::CreateLanguageModel(const std::string infile, float nlf, float dlf)
-{
- int type = lmContainer::getLanguageModelType(infile);
- std::cerr << "Language Model Type of " << infile << " is " << type << std::endl;
-
- return lmContainer::CreateLanguageModel(type, nlf, dlf);
-}
-
-lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf)
-{
-
- std::cerr << "Language Model Type is " << type << std::endl;
-
- lmContainer* lm=NULL;
-
- switch (type) {
-
- case _IRSTLM_LMTABLE:
- lm = new lmtable(nlf, dlf);
- break;
-
- case _IRSTLM_LMMACRO:
- lm = new lmmacro(nlf, dlf);
- break;
+
+ inline void error(const char* message)
+ {
+ std::cerr << message << "\n";
+ throw std::runtime_error(message);
+ }
+
+ lmContainer::lmContainer()
+ {
+ requiredMaxlev=1000;
+ lmtype=_IRSTLM_LMUNKNOWN;
+ maxlev=0;
+ }
+
+ int lmContainer::getLanguageModelType(std::string filename)
+ {
+ fstream inp(filename.c_str(),ios::in|ios::binary);
+
+ if (!inp.good()) {
+ std::stringstream ss_msg;
+ ss_msg << "Failed to open " << filename;
+ exit_error(IRSTLM_ERROR_IO, ss_msg.str());
+ }
+ //give a look at the header to get informed about the language model type
+ std::string header;
+ inp >> header;
+ inp.close();
+
+ VERBOSE(1,"LM header:|" << header << "|" << std::endl);
+
+ int type=_IRSTLM_LMUNKNOWN;
+ VERBOSE(1,"type: " << type << std::endl);
+ if (header == "lminterpolation" || header == "LMINTERPOLATION") {
+ type = _IRSTLM_LMINTERPOLATION;
+ } else if (header == "lmcontextdependent" || header == "LMCONTEXTDEPENDENT") {
+ type = _IRSTLM_LMCONTEXTDEPENDENT;
+ } else if (header == "lmmacro" || header == "LMMACRO") {
+ type = _IRSTLM_LMMACRO;
+ } else if (header == "lmclass" || header == "LMCLASS") {
+ type = _IRSTLM_LMCLASS;
+ } else {
+ type = _IRSTLM_LMTABLE;
+ }
+ VERBOSE(1,"type: " << type << std::endl);
+
+ return type;
+ };
+
+ lmContainer* lmContainer::CreateLanguageModel(const std::string infile, float nlf, float dlf)
+ {
+ int type = lmContainer::getLanguageModelType(infile);
+ std::cerr << "Language Model Type of " << infile << " is " << type << std::endl;
+
+ return lmContainer::CreateLanguageModel(type, nlf, dlf);
+ }
+
+ lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf)
+ {
+
+ std::cerr << "Language Model Type is " << type << std::endl;
+
+ lmContainer* lm=NULL;
+
+ switch (type) {
+
+ case _IRSTLM_LMTABLE:
+ lm = new lmtable(nlf, dlf);
+ break;
+
+ case _IRSTLM_LMMACRO:
+ lm = new lmmacro(nlf, dlf);
+ break;
+
+ case _IRSTLM_LMCLASS:
+ lm = new lmclass(nlf, dlf);
+ break;
+
+ case _IRSTLM_LMINTERPOLATION:
+ lm = new lmInterpolation(nlf, dlf);
+ break;
+
+ case _IRSTLM_LMCONTEXTDEPENDENT:
+ lm = new lmContextDependent(nlf, dlf);
+ break;
+
+ default:
+ exit_error(IRSTLM_ERROR_DATA, "This language model type is unknown!");
+ }
+
+ lm->setLanguageModelType(type);
+ return lm;
+ }
+
+ bool lmContainer::filter(const string sfilter, lmContainer*& sublmC, const string skeepunigrams)
+ {
+ if (lmtype == _IRSTLM_LMTABLE) {
+ sublmC = lmContainer::CreateLanguageModel(lmtype,((lmtable*) this)->GetNgramcacheLoadFactor(),((lmtable*) this)->GetDictionaryLoadFactor());
- case _IRSTLM_LMCLASS:
- lm = new lmclass(nlf, dlf);
- break;
+ //let know that table has inverted n-grams
+ sublmC->is_inverted(is_inverted());
+ sublmC->setMaxLoadedLevel(getMaxLoadedLevel());
+ sublmC->maxlevel(maxlevel());
- case _IRSTLM_LMINTERPOLATION:
- lm = new lmInterpolation(nlf, dlf);
- break;
-
- case _IRSTLM_LMCONTEXTDEPENDENT:
- lm = new lmContextDependent(nlf, dlf);
- break;
+ bool res=((lmtable*) this)->filter(sfilter, (lmtable*) sublmC, skeepunigrams);
- default:
- exit_error(IRSTLM_ERROR_DATA, "This language model type is unknown!");
- }
-
- lm->setLanguageModelType(type);
- return lm;
-}
-
-bool lmContainer::filter(const string sfilter, lmContainer*& sublmC, const string skeepunigrams)
-{
- if (lmtype == _IRSTLM_LMTABLE) {
- sublmC = lmContainer::CreateLanguageModel(lmtype,((lmtable*) this)->GetNgramcacheLoadFactor(),((lmtable*) this)->GetDictionaryLoadFactor());
-
- //let know that table has inverted n-grams
- sublmC->is_inverted(is_inverted());
- sublmC->setMaxLoadedLevel(getMaxLoadedLevel());
- sublmC->maxlevel(maxlevel());
-
- bool res=((lmtable*) this)->filter(sfilter, (lmtable*) sublmC, skeepunigrams);
-
- return res;
- }
- return false;
-};
-
+ return res;
+ }
+ return false;
+ };
+
}//namespace irstlm
diff --git a/src/lmContainer.h b/src/lmContainer.h
index 69de5be..eb1fca6 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -42,7 +42,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
typedef enum {BINARY,TEXT,YRANIB,NONE} OUTFILE_TYPE;
namespace irstlm {
-
+
typedef std::map< std::string, float > topic_map_t;
class lmContainer
@@ -122,6 +122,15 @@ public:
UNUSED(extendible);
return 0.0;
};
+ virtual double clprob(string_vec_t& text, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
+ UNUSED(text);
+ UNUSED(bow);
+ UNUSED(bol);
+ UNUSED(maxsuffptr);
+ UNUSED(statesize);
+ UNUSED(extendible);
+ return 0.0;
+ };
virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
UNUSED(ng);
UNUSED(ngsize);
@@ -142,7 +151,12 @@ public:
UNUSED(topic_weights);
return clprob(ng, ngsize, bow, bol, maxsuffptr, statesize, extendible);
}
-
+
+ virtual double clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+ UNUSED(topic_weights);
+ return clprob(text, bow, bol, maxsuffptr, statesize, extendible);
+ }
+
virtual const char *cmaxsuffptr(ngram ng, unsigned int* statesize=NULL)
{
UNUSED(ng);
diff --git a/src/lmContextDependent.cpp b/src/lmContextDependent.cpp
index 83fd1ae..2639ace 100644
--- a/src/lmContextDependent.cpp
+++ b/src/lmContextDependent.cpp
@@ -117,7 +117,7 @@ namespace irstlm {
double lmContextDependent::lprob(ngram ng, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- std::vector<std::string> text; // replace with the text passed as parameter
+ string_vec_t text; // replace with the text passed as parameter
double lm_prob = m_lm->clprob(ng, bow, bol, maxsuffptr, statesize, extendible);
double topic_prob = m_topicmodel->prob(text, topic_weights);
double ret_prob = m_lm_weight * lm_prob + m_topicmodel_weight * topic_prob;
@@ -126,9 +126,9 @@ namespace irstlm {
return ret_prob;
}
- double lmContextDependent::lprob(std::vector<std::string>& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+ double lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
{
- VERBOSE(0,"lmContextDependent::lprob(int* codes, int sz, topic_map_t& topic_weights, " << std::endl);
+ VERBOSE(0,"lmContextDependent::lprob(string_vec_t& text, topic_map_t& topic_weights, " << std::endl);
//create the actual ngram
ngram ng(dict);
ng.pushw(text);
diff --git a/src/lmContextDependent.h b/src/lmContextDependent.h
index 835338d..d3d1689 100644
--- a/src/lmContextDependent.h
+++ b/src/lmContextDependent.h
@@ -40,9 +40,11 @@ namespace irstlm {
PseudoTopicModel(){};
~PseudoTopicModel(){};
- void load(const std::string &filename){};
+ void load(const std::string &filename){
+ UNUSED(filename);
+ };
- double prob(std::vector<std::string>& text, topic_map_t& topic_weights){
+ double prob(string_vec_t& text, topic_map_t& topic_weights){
UNUSED(text);
UNUSED(topic_weights);
return 1.0;
@@ -92,9 +94,21 @@ namespace irstlm {
void load(const std::string &filename,int mmap=0);
+
+ virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ VERBOSE(0, "virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
+ UNUSED(ng);
+ UNUSED(ngsize);
+ UNUSED(bow);
+ UNUSED(bol);
+ UNUSED(maxsuffptr);
+ UNUSED(statesize);
+ UNUSED(extendible);
+ assert(false);
+ };
+
virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
VERBOSE(0, "virtual double clprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
- VERBOSE(0, "This LM type (lmContextDependent) does not support this function");
UNUSED(ng);
UNUSED(bow);
UNUSED(bol);
@@ -104,10 +118,9 @@ namespace irstlm {
assert(false);
};
- virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
- VERBOSE(0, "virtual double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
- UNUSED(ng);
- UNUSED(ngsize);
+ virtual double clprob(string_vec_t& text, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ VERBOSE(0, "virtual double clprob(string_vec_t& text, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL)" << std::endl << "This LM type (lmContextDependent) does not support this function" << std::endl);
+ UNUSED(text);
UNUSED(bow);
UNUSED(bol);
UNUSED(maxsuffptr);
@@ -122,9 +135,13 @@ namespace irstlm {
virtual double clprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
return lprob(ng, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
};
+ virtual double clprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL){
+ return lprob(text, topic_weights, bow, bol, maxsuffptr, statesize, extendible);
+ };
+
virtual double lprob(int* ng, int ngsize, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
virtual double lprob(ngram ng, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
- virtual double lprob(std::vector<std::string>& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+ virtual double lprob(string_vec_t& text, topic_map_t& topic_weights, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
int maxlevel() const {
return maxlev;
diff --git a/src/util.cpp b/src/util.cpp
index bb223f6..6db27c4 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -366,7 +366,7 @@ namespace irstlm {
}
}
-std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
+string_vec_t &split(const std::string &s, char delim, string_vec_t &elems) {
std::stringstream ss(s);
std::string item;
while (std::getline(ss, item, delim)) {
diff --git a/src/util.h b/src/util.h
index 38f318f..fbb9bbf 100644
--- a/src/util.h
+++ b/src/util.h
@@ -48,6 +48,8 @@ using namespace std;
#define BUCKET 10000
#define SSEED 50
+typedef std::vector< std::string > string_vec_t;
+
class ngram;
class mfstream;
@@ -72,7 +74,7 @@ int parseWords(char *, const char **, int);
int parseline(istream& inp, int Order,ngram& ng,float& prob,float& bow);
//split a string into a vector of string according to one specified delimiter (char)
-std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems);
+string_vec_t &split(const std::string &s, char delim, string_vec_t &elems);
void exit_error(int err, const std::string &msg="");
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git
More information about the debian-science-commits
mailing list