[irstlm] 11/78: code optimization

Giulio Paci giuliopaci-guest at moszumanska.debian.org
Tue May 17 07:47:00 UTC 2016


This is an automated email from the git hooks/post-receive script.

giuliopaci-guest pushed a commit to tag adaptiveLM.v0.10
in repository irstlm.

commit ede270d107cca714d21d90e3a407d0dec9fc60e7
Author: Nicola Bertoldi <bertoldi at fbk.eu>
Date:   Sun Nov 8 15:50:16 2015 +0100

    code optimization
---
 src/lmContainer.cpp     |  270 ++++----
 src/lmContainer.h       |  465 +++++++------
 src/lmInterpolation.cpp |   53 +-
 src/lmInterpolation.h   |  248 +++----
 src/lmclass.cpp         |  404 +++++------
 src/lmclass.h           |  173 +++--
 src/lmmacro.cpp         | 1722 ++++++++++++++++++++++++-----------------------
 src/lmmacro.h           |  208 +++---
 src/lmtable.cpp         |  262 +++----
 src/lmtable.h           | 1129 +++++++++++++++----------------
 10 files changed, 2466 insertions(+), 2468 deletions(-)

diff --git a/src/lmContainer.cpp b/src/lmContainer.cpp
index 7b995d4..bde6996 100644
--- a/src/lmContainer.cpp
+++ b/src/lmContainer.cpp
@@ -1,24 +1,24 @@
 // $Id: lmContainer.cpp 3686 2010-10-15 11:55:32Z bertoldi $
 
 /******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ 
+ ******************************************************************************/
 #include <stdio.h>
 #include <cstdlib>
 #include <stdlib.h>
@@ -34,7 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 #include "lmInterpolation.h"
 
 using namespace std;
-	
+
 namespace irstlm {
 	
 #ifdef PS_CACHE_ENABLE
@@ -42,134 +42,134 @@ namespace irstlm {
 #undef PS_CACHE_ENABLE
 #endif
 #endif
-
+	
 #ifdef LMT_CACHE_ENABLE
 #if LMT_CACHE_ENABLE==0
 #undef LMT_CACHE_ENABLE
 #endif
 #endif
-
+	
 #if PS_CACHE_ENABLE
-bool lmContainer::ps_cache_enabled=true;
+	bool lmContainer::ps_cache_enabled=true;
 #else
-bool lmContainer::ps_cache_enabled=false;
+	bool lmContainer::ps_cache_enabled=false;
 #endif
-
+	
 #if LMT_CACHE_ENABLE
-bool lmContainer::lmt_cache_enabled=true;
+	bool lmContainer::lmt_cache_enabled=true;
 #else
-bool lmContainer::lmt_cache_enabled=false;
+	bool lmContainer::lmt_cache_enabled=false;
 #endif
-
-inline void error(const char* message)
-{
-  std::cerr << message << "\n";
-  throw std::runtime_error(message);
-}
-
-lmContainer::lmContainer()
-{
-  requiredMaxlev=IRSTLM_REQUIREDMAXLEV_DEFAULT;
-	lmtype=_IRSTLM_LMUNKNOWN;
-	maxlev=0;
-}
-
-int lmContainer::getLanguageModelType(std::string filename)
-{
-  fstream inp(filename.c_str(),ios::in|ios::binary);
-
-  if (!inp.good()) {
-		std::stringstream ss_msg;
-		ss_msg << "Failed to open " << filename;
-    exit_error(IRSTLM_ERROR_IO, ss_msg.str());
-  }
-  //give a look at the header to get informed about the language model type
-  std::string header;
-  inp >> header;
-  inp.close();
-
-  VERBOSE(1,"LM header:|" << header << "|" << std::endl);
-
-  int type=_IRSTLM_LMUNKNOWN;
-  VERBOSE(1,"type: " << type << std::endl);
-  if (header == "lmminterpolation" || header == "LMINTERPOLATION") {
-    type = _IRSTLM_LMINTERPOLATION;
-  } else if (header == "lmmacro" || header == "LMMACRO") {
-    type = _IRSTLM_LMMACRO;
-  } else if (header == "lmclass" || header == "LMCLASS") {
-    type = _IRSTLM_LMCLASS;
-  } else {
-    type = _IRSTLM_LMTABLE;
-  }
-  VERBOSE(1,"type: " << type << std::endl);
-
-  return type;
-};
-
-lmContainer* lmContainer::CreateLanguageModel(const std::string infile, float nlf, float dlf)
-{
-  int type = lmContainer::getLanguageModelType(infile);
-                
-  VERBOSE(1,"lmContainer* lmContainer::CreateLanguageModel(...) Language Model Type of " << infile << " is " << type << std::endl);
-
-  return lmContainer::CreateLanguageModel(type, nlf, dlf);
-}
-
-lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf)
-{
-  VERBOSE(1,"Language Model Type is " << type << std::endl);
-
-  lmContainer* lm=NULL;
-
-  switch (type) {
-			
-		case _IRSTLM_LMTABLE:
-                        VERBOSE(1,"_IRSTLM_LMTABLE" << std::endl);
-			lm = new lmtable(nlf, dlf);
-			break;
-			
-		case _IRSTLM_LMMACRO:
-                        VERBOSE(1,"_IRSTLM_LMMACRO" << std::endl);
-			lm = new lmmacro(nlf, dlf);
-			break;
+	
+	inline void error(const char* message)
+	{
+		std::cerr << message << "\n";
+		throw std::runtime_error(message);
+	}
+	
+	lmContainer::lmContainer()
+	{
+		requiredMaxlev=IRSTLM_REQUIREDMAXLEV_DEFAULT;
+		lmtype=_IRSTLM_LMUNKNOWN;
+		maxlev=0;
+	}
+	
+	int lmContainer::getLanguageModelType(std::string filename)
+	{
+		fstream inp(filename.c_str(),ios::in|ios::binary);
+		
+		if (!inp.good()) {
+			std::stringstream ss_msg;
+			ss_msg << "Failed to open " << filename;
+			exit_error(IRSTLM_ERROR_IO, ss_msg.str());
+		}
+		//give a look at the header to get informed about the language model type
+		std::string header;
+		inp >> header;
+		inp.close();
+		
+		VERBOSE(1,"LM header:|" << header << "|" << std::endl);
+		
+		int type=_IRSTLM_LMUNKNOWN;
+		VERBOSE(1,"type: " << type << std::endl);
+		if (header == "lmminterpolation" || header == "LMINTERPOLATION") {
+			type = _IRSTLM_LMINTERPOLATION;
+		} else if (header == "lmmacro" || header == "LMMACRO") {
+			type = _IRSTLM_LMMACRO;
+		} else if (header == "lmclass" || header == "LMCLASS") {
+			type = _IRSTLM_LMCLASS;
+		} else {
+			type = _IRSTLM_LMTABLE;
+		}
+		VERBOSE(1,"type: " << type << std::endl);
+		
+		return type;
+	};
+	
+	lmContainer* lmContainer::CreateLanguageModel(const std::string infile, float nlf, float dlf)
+	{
+		int type = lmContainer::getLanguageModelType(infile);
+		
+		VERBOSE(1,"lmContainer* lmContainer::CreateLanguageModel(...) Language Model Type of " << infile << " is " << type << std::endl);
+		
+		return lmContainer::CreateLanguageModel(type, nlf, dlf);
+	}
+	
+	lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf)
+	{
+		VERBOSE(1,"Language Model Type is " << type << std::endl);
+		
+		lmContainer* lm=NULL;
+		
+		switch (type) {
+				
+			case _IRSTLM_LMTABLE:
+				VERBOSE(1,"_IRSTLM_LMTABLE" << std::endl);
+				lm = new lmtable(nlf, dlf);
+				break;
+				
+			case _IRSTLM_LMMACRO:
+				VERBOSE(1,"_IRSTLM_LMMACRO" << std::endl);
+				lm = new lmmacro(nlf, dlf);
+				break;
+				
+			case _IRSTLM_LMCLASS:
+				VERBOSE(1,"_IRSTLM_LMCLASS" << std::endl);
+				lm = new lmclass(nlf, dlf);
+				break;
+				
+			case _IRSTLM_LMINTERPOLATION:
+				VERBOSE(1,"_IRSTLM_LMINTERPOLATION" << std::endl);
+				lm = new lmInterpolation(nlf, dlf);
+				break;
+				
+			default:
+				VERBOSE(1,"UNKNOWN" << std::endl);
+				exit_error(IRSTLM_ERROR_DATA, "This language model type is unknown!");
+		}
+		VERBOSE(1,"lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf) lm:|" << (void*) lm << "|" << std::endl);
+		
+		lm->setLanguageModelType(type);
+		
+		VERBOSE(1,"lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf) lm->getLanguageModelType:|" << lm->getLanguageModelType() << "|" << std::endl)
+		return lm;
+	}
+	
+	bool lmContainer::filter(const string sfilter, lmContainer*& sublmC, const string skeepunigrams)
+	{
+		if (lmtype == _IRSTLM_LMTABLE) {
+			sublmC = lmContainer::CreateLanguageModel(lmtype,((lmtable*) this)->GetNgramcacheLoadFactor(),((lmtable*) this)->GetDictionaryLoadFactor());
 			
-		case _IRSTLM_LMCLASS:
-                        VERBOSE(1,"_IRSTLM_LMCLASS" << std::endl);
-			lm = new lmclass(nlf, dlf);
-			break;
+			//let know that table has inverted n-grams
+			sublmC->is_inverted(is_inverted());
+			sublmC->setMaxLoadedLevel(getMaxLoadedLevel());
+			sublmC->maxlevel(maxlevel());
 			
-		case _IRSTLM_LMINTERPOLATION:
-                        VERBOSE(1,"_IRSTLM_LMINTERPOLATION" << std::endl);
-			lm = new lmInterpolation(nlf, dlf);
-			break;
+			bool res=((lmtable*) this)->filter(sfilter, (lmtable*) sublmC, skeepunigrams);
 			
-		default:
-			VERBOSE(1,"UNKNOWN" << std::endl);
-			exit_error(IRSTLM_ERROR_DATA, "This language model type is unknown!");
-  }
-  VERBOSE(1,"lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf) lm:|" << (void*) lm << "|" << std::endl);
-
-  lm->setLanguageModelType(type);
-
-  VERBOSE(1,"lmContainer* lmContainer::CreateLanguageModel(int type, float nlf, float dlf) lm->getLanguageModelType:|" << lm->getLanguageModelType() << "|" << std::endl)
-  return lm;
-}
-
-bool lmContainer::filter(const string sfilter, lmContainer*& sublmC, const string skeepunigrams)
-{
-  if (lmtype == _IRSTLM_LMTABLE) {
-    sublmC = lmContainer::CreateLanguageModel(lmtype,((lmtable*) this)->GetNgramcacheLoadFactor(),((lmtable*) this)->GetDictionaryLoadFactor());
-
-    //let know that table has inverted n-grams
-    sublmC->is_inverted(is_inverted());
-    sublmC->setMaxLoadedLevel(getMaxLoadedLevel());
-    sublmC->maxlevel(maxlevel());
-
-    bool res=((lmtable*) this)->filter(sfilter, (lmtable*) sublmC, skeepunigrams);
-
-    return res;
-  }
-  return false;
-};
-
+			return res;
+		}
+		return false;
+	};
+	
 }//namespace irstlm
diff --git a/src/lmContainer.h b/src/lmContainer.h
index 2b4e3f0..4f18c3c 100644
--- a/src/lmContainer.h
+++ b/src/lmContainer.h
@@ -1,24 +1,24 @@
 // $Id: lmContainer.h 3686 2010-10-15 11:55:32Z bertoldi $
 
 /******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ 
+ ******************************************************************************/
 
 #ifndef MF_LMCONTAINER_H
 #define MF_LMCONTAINER_H
@@ -40,233 +40,218 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 typedef enum {BINARY,TEXT,YRANIB,NONE} OUTFILE_TYPE;
 
 typedef enum {LMT_FIND,    //!< search: find an entry
-        LMT_ENTER,   //!< search: enter an entry
-        LMT_INIT,    //!< scan: start scan
-        LMT_CONT     //!< scan: continue scan
+	LMT_ENTER,   //!< search: enter an entry
+	LMT_INIT,    //!< scan: start scan
+	LMT_CONT     //!< scan: continue scan
 } LMT_ACTION;
 
 namespace irstlm {
-class lmContainer
-{
-  static const bool debug=true;
-  static bool ps_cache_enabled;
-  static bool lmt_cache_enabled;
-
-protected:
-  int          lmtype; //auto reference to its own type
-  int          maxlev; //maximun order of sub LMs;
-  int  requiredMaxlev; //max loaded level, i.e. load up to requiredMaxlev levels
-
-public:
-
-  lmContainer();
-  virtual ~lmContainer() {};
-
-	 
-  virtual void load(const std::string &filename, int mmap=0) {
-    UNUSED(filename);
-    UNUSED(mmap);
-  };
-	
-  virtual void savetxt(const char *filename) {
-    UNUSED(filename);
-  };
-  virtual void savebin(const char *filename) {
-    UNUSED(filename);
-  };
-
-  virtual double getlogOOVpenalty() const {
-    return 0.0;
-  };
-  virtual double setlogOOVpenalty(int dub) {
-    UNUSED(dub);
-    return 0.0;
-  };
-  virtual double setlogOOVpenalty(double oovp) {
-    UNUSED(oovp);
-    return 0.0;
-  };
-
-  inline virtual dictionary* getDict() const {
-    return NULL;
-  };
-  inline virtual void maxlevel(int lev) {
-    maxlev = lev;
-  };
-  inline virtual int maxlevel() const {
-    return maxlev;
-  };
-  inline virtual void stat(int lev=0) {
-    UNUSED(lev);
-  };
-
-  inline virtual void setMaxLoadedLevel(int lev) {
-    requiredMaxlev=lev;
-  };
-  inline virtual int getMaxLoadedLevel() {
-    return requiredMaxlev;
-  };
-
-  virtual bool is_inverted(const bool flag) {
-    UNUSED(flag);
-    return false;
-  };
-  virtual bool is_inverted() {
-    return false;
-  };	
-	
-	double clprob(ngram ng) {
-		return clprob(ng, NULL, NULL, NULL, NULL, NULL, NULL);
-	}
-	double clprob(ngram ng, double* bow, int* bol) {
-		return clprob(ng, bow, bol, NULL, NULL, NULL, NULL);
-	}
-	double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize) {
-		return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, NULL);
-	}
-	double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize) {
-		return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, NULL);
-	}
-	virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize,bool* extendible) {
-		VERBOSE(3,"virtual double  lmContainer::clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) ng:|" << ng  << "|\n");
-		return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible);
-  };
-	virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize,bool* extendible) {
-		VERBOSE(3,"virtual double  lmContainer::clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) ng:|" << ng  << "|\n");
-    UNUSED(ng);
-    UNUSED(bow);
-    UNUSED(bol);
-    UNUSED(maxsuffidx);
-    UNUSED(maxsuffptr);
-    UNUSED(statesize);
-    UNUSED(extendible);
-    return 0.0;
-  };
-
-  virtual double clprob(int* ng, int ngsize, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize,bool* extendible) {
-		VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL)\n");
-		return clprob(ng, ngsize, bow, bol, NULL, maxsuffptr, statesize, extendible);
-  };
-
-  virtual double clprob(int* ng, int ngsize, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize,bool* extendible) {
-	VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL)\n");
-    UNUSED(ng);
-    UNUSED(ngsize);
-    UNUSED(bow);
-    UNUSED(bol);
-    UNUSED(maxsuffidx);
-    UNUSED(maxsuffptr);
-    UNUSED(statesize);
-    UNUSED(extendible);
-    return 0.0;
-  };
-
-	
-  virtual const char *cmaxsuffptr(ngram ng, unsigned int* statesize=NULL)
-  {
-    return cmaxsuffptr(ng, statesize);
-  }
-	
-  virtual const char *cmaxsuffptr(int* ng, int ngsize, unsigned int* statesize=NULL)
-  {
-    return cmaxsuffptr(ng, ngsize, statesize);
-  }
-	
-  virtual ngram_state_t cmaxsuffidx(ngram ng, unsigned int* statesize=NULL)
-  {
-    UNUSED(ng);
-    UNUSED(statesize);
-    return NULL;
-  }
-
-  virtual ngram_state_t cmaxsuffidx(int* ng, int ngsize, unsigned int* statesize=NULL)
-  {
-    UNUSED(ng);
-    UNUSED(ngsize);
-    UNUSED(statesize);
-    return NULL;
-  }
-
-  virtual inline int get(ngram& ng) {
-    UNUSED(ng);
-    return 0;
-  }
-        
-  virtual int get(ngram& ng,int n,int lev){
-    UNUSED(ng);
-    UNUSED(n);
-    UNUSED(lev);
-    return 0;
-  }
-        
-  virtual int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev){
-    UNUSED(ng);
-    UNUSED(h);
-    UNUSED(action);
-    UNUSED(lev);
-    return 0;     
-  }
-
-
-  virtual void used_caches() {};
-  virtual void init_caches(int uptolev) {
-    UNUSED(uptolev);
-  };
-  virtual void check_caches_levels() {};
-  virtual void reset_caches() {};
-
-  virtual void  reset_mmap() {};
-
-  void inline setLanguageModelType(int type) {
-    lmtype=type;
-  };
-  int getLanguageModelType() const {
-    return lmtype;
-  };
-  static int getLanguageModelType(std::string filename);
-
-  inline virtual void dictionary_incflag(const bool flag) {
-    UNUSED(flag);
-  };
-
-  virtual bool filter(const string sfilter, lmContainer*& sublmt, const string skeepunigrams);
-
-  static lmContainer* CreateLanguageModel(const std::string infile, float nlf=0.0, float dlf=0.0);
-  static lmContainer* CreateLanguageModel(int type, float nlf=0.0, float dlf=0.0);
-
-  inline virtual bool is_OOV(int code) {
-    UNUSED(code);
-    return false;
-  };
-
-
-  inline static bool is_lmt_cache_enabled(){
-    VERBOSE(3,"inline static bool is_lmt_cache_enabled() " << lmt_cache_enabled << std::endl);
-    return lmt_cache_enabled;
-  }
-
-  inline static bool is_ps_cache_enabled(){
-    VERBOSE(3,"inline static bool is_ps_cache_enabled() " << ps_cache_enabled << std::endl);
-    return ps_cache_enabled;
-  }
-
-  inline static bool is_cache_enabled(){
-    return is_lmt_cache_enabled() && is_ps_cache_enabled();
-  }
-	
-	virtual int addWord(const char *w){
-		getDict()->incflag(1);
-		int c=getDict()->encode(w);
-		getDict()->incflag(0);
-		return c;
-	}
-	
-	virtual void print_table_stat(){
-    VERBOSE(3,"virtual void lmContainer::print_table_stat() "<< std::endl);
+	class lmContainer
+	{
+		static const bool debug=true;
+		static bool ps_cache_enabled;
+		static bool lmt_cache_enabled;
+		
+	protected:
+		int          lmtype; //auto reference to its own type
+		int          maxlev; //maximun order of sub LMs;
+		int  requiredMaxlev; //max loaded level, i.e. load up to requiredMaxlev levels
+		
+	public:
+		
+		lmContainer();
+		virtual ~lmContainer() {};
+		
+		
+		virtual void load(const std::string &filename, int mmap=0) {
+			UNUSED(filename);
+			UNUSED(mmap);
+		};
+		
+		virtual void savetxt(const char *filename) {
+			UNUSED(filename);
+		};
+		virtual void savebin(const char *filename) {
+			UNUSED(filename);
+		};
+		
+		virtual double getlogOOVpenalty() const {
+			return 0.0;
+		};
+		virtual double setlogOOVpenalty(int dub) {
+			UNUSED(dub);
+			return 0.0;
+		};
+		virtual double setlogOOVpenalty(double oovp) {
+			UNUSED(oovp);
+			return 0.0;
+		};
+		
+		inline virtual dictionary* getDict() const {
+			return NULL;
+		};
+		inline virtual void maxlevel(int lev) {
+			maxlev = lev;
+		};
+		inline virtual int maxlevel() const {
+			return maxlev;
+		};
+		inline virtual void stat(int lev=0) {
+			UNUSED(lev);
+		};
+		
+		inline virtual void setMaxLoadedLevel(int lev) {
+			requiredMaxlev=lev;
+		};
+		inline virtual int getMaxLoadedLevel() {
+			return requiredMaxlev;
+		};
+		
+		virtual bool is_inverted(const bool flag) {
+			UNUSED(flag);
+			return false;
+		};
+		virtual bool is_inverted() {
+			return false;
+		};	
+		
+		virtual double clprob(ngram ng) { return clprob(ng, NULL, NULL, NULL, NULL, NULL, NULL, NULL); }
+		virtual double clprob(ngram ng, double* bow) { return clprob(ng, bow, NULL, NULL, NULL, NULL, NULL, NULL); }
+		virtual double clprob(ngram ng, double* bow, int* bol) { return clprob(ng, bow, bol, NULL, NULL, NULL, NULL, NULL); }
+		virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr) { return clprob(ng, bow, bol, NULL, maxsuffptr, NULL, NULL, NULL); }
+		virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize) { return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, NULL, NULL); }
+		virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible) { return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, NULL); };
+		virtual double clprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow) { return clprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, lastbow); }
+		
+		virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx) { return clprob(ng, bow, bol, maxsuffidx, NULL, NULL, NULL, NULL); }
+		virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr) { return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, NULL, NULL, NULL); }
+		virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize) { return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, NULL, NULL); }
+		virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible) { return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, NULL); };
+		virtual double clprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow) { return clprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow); }
+		
+		virtual double clprob(int* ng, int ngsize=NULL, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL, double* lastbow=NULL) {
+			VERBOSE(3,"virtual double lmContainer::clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL, double* lastbow=NULL)\n");
+			
+			//create the actual ngram
+			ngram ong(getDict());
+			ong.pushc(ng,ngsize);
+			MY_ASSERT (ong.size == ngsize);
+			
+			return clprob(ong, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow);
+		};
+		
+		virtual const char *cmaxsuffptr(ngram ng, unsigned int* statesize=NULL)
+		{
+			UNUSED(ng);
+			UNUSED(statesize);
+			return NULL;
+		}
+		
+		virtual const char *cmaxsuffptr(int* ng, int ngsize, unsigned int* statesize=NULL)
+		{
+			//create the actual ngram 
+			ngram ong(getDict());
+			ong.pushc(ng,ngsize);
+			MY_ASSERT (ong.size == ngsize);
+			return cmaxsuffptr(ng, ngsize, statesize);
+		}
+		
+		virtual ngram_state_t cmaxsuffidx(ngram ng, unsigned int* statesize=NULL)
+		{
+			UNUSED(ng);
+			UNUSED(statesize);
+			return 0;
+		}
+		
+		virtual ngram_state_t cmaxsuffidx(int* ng, int ngsize, unsigned int* statesize=NULL)
+		{
+			//create the actual ngram 
+			ngram ong(getDict());                
+			ong.pushc(ng,ngsize);
+			MY_ASSERT (ong.size == ngsize); 
+			return cmaxsuffidx(ong,statesize);
+		}
+		
+		virtual inline int get(ngram& ng) {
+			UNUSED(ng);
+			return 0;
+		}
+		
+		virtual int get(ngram& ng,int n,int lev){
+			UNUSED(ng);
+			UNUSED(n);
+			UNUSED(lev);
+			return 0;
+		}
+		
+		virtual int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev){
+			UNUSED(ng);
+			UNUSED(h);
+			UNUSED(action);
+			UNUSED(lev);
+			return 0;     
+		}
+		
+		
+		virtual void used_caches() {};
+		virtual void init_caches(int uptolev) {
+			UNUSED(uptolev);
+		};
+		virtual void check_caches_levels() {};
+		virtual void reset_caches() {};
+		
+		virtual void  reset_mmap() {};
+		
+		void inline setLanguageModelType(int type) {
+			lmtype=type;
+		};
+		int getLanguageModelType() const {
+			return lmtype;
+		};
+		static int getLanguageModelType(std::string filename);
+		
+		inline virtual void dictionary_incflag(const bool flag) {
+			UNUSED(flag);
+		};
+		
+		virtual bool filter(const string sfilter, lmContainer*& sublmt, const string skeepunigrams);
+		
+		static lmContainer* CreateLanguageModel(const std::string infile, float nlf=0.0, float dlf=0.0);
+		static lmContainer* CreateLanguageModel(int type, float nlf=0.0, float dlf=0.0);
+		
+		inline virtual bool is_OOV(int code) {
+			UNUSED(code);
+			return false;
+		};
+		
+		
+		inline static bool is_lmt_cache_enabled(){
+			VERBOSE(3,"inline static bool is_lmt_cache_enabled() " << lmt_cache_enabled << std::endl);
+			return lmt_cache_enabled;
+		}
+		
+		inline static bool is_ps_cache_enabled(){
+			VERBOSE(3,"inline static bool is_ps_cache_enabled() " << ps_cache_enabled << std::endl);
+			return ps_cache_enabled;
+		}
+		
+		inline static bool is_cache_enabled(){
+			return is_lmt_cache_enabled() && is_ps_cache_enabled();
+		}
+		
+		virtual int addWord(const char *w){
+			getDict()->incflag(1);
+			int c=getDict()->encode(w);
+			getDict()->incflag(0);
+			return c;
+		}
+		
+		virtual void print_table_stat(){
+			VERBOSE(3,"virtual void lmContainer::print_table_stat() "<< std::endl);
+		};
+		
 	};
 	
-};
-
 }//namespace irstlm
 
 #endif
diff --git a/src/lmInterpolation.cpp b/src/lmInterpolation.cpp
index 113c18b..dbc9c5f 100644
--- a/src/lmInterpolation.cpp
+++ b/src/lmInterpolation.cpp
@@ -141,7 +141,7 @@ namespace irstlm {
 	
 	//return log10 prob of an ngram
 	//	double lmInterpolation::clprob(ngram ng, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
-	double lmInterpolation::clprob(ngram ng, double* bow,int* bol,ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	double lmInterpolation::clprob(ngram ng, double* bow,int* bol,ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible, double* lastbow)
 	{
 		
 		double pr=0.0;
@@ -152,6 +152,7 @@ namespace irstlm {
 		unsigned int _statesize=0,actualstatesize=0;
 		int _bol=0,actualbol=MAX_NGRAM;
 		double _bow=0.0,actualbow=0.0; 
+		double _lastbow=0.0,actuallastbow=0.0; 
 		bool _extendible=false;
 		bool actualextendible=false;
 		
@@ -163,7 +164,7 @@ namespace irstlm {
 				ngram _ng(m_lm[i]->getDict());
 				_ng.trans(ng);
 				//				_logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffptr,&_statesize,&_extendible);				
-				_logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffidx,&_maxsuffptr,&_statesize,&_extendible);
+				_logpr=m_lm[i]->clprob(_ng,&_bow,&_bol,&_maxsuffidx,&_maxsuffptr,&_statesize,&_extendible, lastbow);
 				
 				IFVERBOSE(3){
 					//cerr.precision(10);
@@ -173,6 +174,7 @@ namespace irstlm {
 					VERBOSE(3," _statesize:" << _statesize << std::endl);
 					VERBOSE(3," _bow:" << _bow << std::endl);
 					VERBOSE(3," _bol:" << _bol << std::endl);
+					VERBOSE(3," _lastbow:" << _lastbow << std::endl);
 				}
 				
 				/*
@@ -183,6 +185,7 @@ namespace irstlm {
 				 //What is the bow of a LM interpolation? The weighted sum of the bow of the submodels
 				 //What is the prob of a LM interpolation? The weighted sum of the prob of the submodels
 				 //What is the extendible flag of a LM interpolation? true if the extendible flag is one for any LM
+				 //What is the lastbow of a LM interpolation? The weighted sum of the lastbow of the submodels
 				 */
 				
 				pr+=m_weight[i]*pow(10.0,_logpr);
@@ -199,6 +202,9 @@ namespace irstlm {
 				if (_extendible) {
 					actualextendible=true; //set extendible flag to true if the ngram is extendible for any LM
 				}
+				if (_lastbow < actuallastbow) {
+					actuallastbow=_lastbow; //backoff limit of LM[i]
+				}
 			}
 		}
 		if (bol) *bol=actualbol;
@@ -206,30 +212,31 @@ namespace irstlm {
 		if (maxsuffptr) *maxsuffptr=actualmaxsuffptr;
 		if (maxsuffidx) *maxsuffidx=actualmaxsuffidx;
 		if (statesize) *statesize=actualstatesize;
-		if (extendible) {
-			*extendible=actualextendible;
-			//    delete _extendible;
-		}
+		if (extendible) *extendible=actualextendible;
+		if (lastbow) *bol=actuallastbow;
 		
 		if (statesize) VERBOSE(3, " statesize:" << *statesize << std::endl);
 		if (bow) VERBOSE(3, " bow:" << *bow << std::endl);
 		if (bol) VERBOSE(3, " bol:" << *bol << std::endl);
+		if (lastbow) VERBOSE(3, " lastbow:" << *lastbow << std::endl);
 		
 		return log10(pr);
 	}
 	
-	//	double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
-	double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,ngram_state_t* maxsuffidx,char** maxsuffptr,unsigned int* statesize,bool* extendible)
-	{
-		
-		//create the actual ngram
-		ngram ong(dict);
-		ong.pushc(codes,sz);
-		MY_ASSERT (ong.size == sz);
-		
-		//		return clprob(ong, bow, bol, maxsuffptr, statesize, extendible);
-		return clprob(ong, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible);
-	}
+	/*
+	 //	double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	 double lmInterpolation::clprob(int* codes, int sz, double* bow,int* bol,ngram_state_t* maxsuffidx,char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	 {
+	 
+	 //create the actual ngram
+	 ngram ong(dict);
+	 ong.pushc(codes,sz);
+	 MY_ASSERT (ong.size == sz);
+	 
+	 //		return clprob(ong, bow, bol, maxsuffptr, statesize, extendible);
+	 return clprob(ong, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible);
+	 }
+	 */
 	
 	const char *lmInterpolation::cmaxsuffptr(ngram ng, unsigned int* statesize){
 		
@@ -270,7 +277,8 @@ namespace irstlm {
 		
 		return maxsuffptr;
 	}
-	
+
+	/*
   const char *lmInterpolation::cmaxsuffptr(int* codes, int sz, unsigned int* statesize)
 	{
 		//create the actual ngram
@@ -279,7 +287,7 @@ namespace irstlm {
 		MY_ASSERT (ong.size == sz);
 		return cmaxsuffptr(ong, statesize);
 	}
-	
+	*/
 	ngram_state_t lmInterpolation::cmaxsuffidx(ngram ng, unsigned int* statesize)
 	{
 		ngram_state_t maxsuffidx=0;
@@ -320,7 +328,8 @@ namespace irstlm {
 		
 		return maxsuffidx;
 	}
-	
+
+	/*
   ngram_state_t lmInterpolation::cmaxsuffidx(int* codes, int sz, unsigned int* statesize)
 	{
 		//create the actual ngram
@@ -329,7 +338,7 @@ namespace irstlm {
 		MY_ASSERT (ong.size == sz);
 		return cmaxsuffidx(ong, statesize);
 	}
-	
+	*/
 	
 	double lmInterpolation::setlogOOVpenalty(int dub)
 	{
diff --git a/src/lmInterpolation.h b/src/lmInterpolation.h
index b1fe743..12a7add 100644
--- a/src/lmInterpolation.h
+++ b/src/lmInterpolation.h
@@ -1,24 +1,24 @@
 // $Id: lmInterpolation.h 3686 2010-10-15 11:55:32Z bertoldi $
 
 /******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ 
+ ******************************************************************************/
 
 #ifndef MF_LMINTERPOLATION_H
 #define MF_LMINTERPOLATION_H
@@ -34,116 +34,116 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 #include "n_gram.h"
 #include "lmContainer.h"
 
-	
-namespace irstlm {
-/*
-interpolation of several sub LMs
-*/
-
-#define LMINTERPOLATION_MAX_TOKEN 3
-
-class lmInterpolation: public lmContainer
-{
-  static const bool debug=true;
-  int m_number_lm;
-  int order;
-  int dictionary_upperbound; //set by user
-  double  logOOVpenalty; //penalty for OOV words (default 0)
-  bool      isInverted;
-  int memmap;  //level from which n-grams are accessed via mmap
 
-  std::vector<double> m_weight;
-  std::vector<std::string> m_file;
-  std::vector<bool> m_isinverted;
-  std::vector<lmContainer*> m_lm;
-
-  int               maxlev; //maximun order of sub LMs;
-
-  float ngramcache_load_factor;
-  float dictionary_load_factor;
-
-  dictionary *dict; // dictionary for all interpolated LMs
-
-public:
-
-  lmInterpolation(float nlf=0.0, float dlfi=0.0);
-  virtual ~lmInterpolation() {};
-
-  void load(const std::string &filename,int mmap=0);
-  lmContainer* load_lm(int i, int memmap, float nlf, float dlf);
-
-  double clprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-	
-	const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
-  const char *cmaxsuffptr(int* codes, int sz, unsigned int* size=NULL);
-  ngram_state_t cmaxsuffidx(ngram ong, unsigned int* size=NULL);
-  ngram_state_t cmaxsuffidx(int* codes, int sz, unsigned int* size=NULL);
-	
-
-  int maxlevel() const {
-    return maxlev;
-  };
-
-  virtual inline void setDict(dictionary* d) {
-		if (dict) delete dict;
-    dict=d;
-  };
+namespace irstlm {
+	/*
+	 interpolation of several sub LMs
+	 */
 	
-  virtual inline dictionary* getDict() const {
-    return dict;
-  };
-
-  //set penalty for OOV words
-  virtual inline double getlogOOVpenalty() const {
-    return logOOVpenalty;
-  }
-
-  virtual double setlogOOVpenalty(int dub);
-
-  double inline setlogOOVpenalty(double oovp) {
-    return logOOVpenalty=oovp;
-  }
-
-//set the inverted flag (used to set the inverted flag of each subLM, when loading)
-  inline bool is_inverted(const bool flag) {
-    return isInverted = flag;
-  }
-
-//for an interpolation LM this variable does not make sense
-//for compatibility, we return true if all subLM return true
-  inline bool is_inverted() {
-    for (int i=0; i<m_number_lm; i++) {
-      if (m_isinverted[i] == false) return false;
-    }
-    return true;
-  }
-
-  inline virtual void dictionary_incflag(const bool flag) {
-		dict->incflag(flag);
-  };
-
-  inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
-    for (int i=0; i<m_number_lm; i++) {
-      int _code=m_lm[i]->getDict()->encode(getDict()->decode(code));
-      if (m_lm[i]->is_OOV(_code) == false) return false;
-    }
-    return true;
-  }
+#define LMINTERPOLATION_MAX_TOKEN 3
 	
-	virtual int addWord(const char *w){
-		for (int i=0; i<m_number_lm; i++) {
-			m_lm[i]->getDict()->incflag(1);
-			m_lm[i]->getDict()->encode(w);
-			m_lm[i]->getDict()->incflag(0);
+	class lmInterpolation: public lmContainer
+	{
+		static const bool debug=true;
+		int m_number_lm;
+		int order;
+		int dictionary_upperbound; //set by user
+		double  logOOVpenalty; //penalty for OOV words (default 0)
+		bool      isInverted;
+		int memmap;  //level from which n-grams are accessed via mmap
+		
+		std::vector<double> m_weight;
+		std::vector<std::string> m_file;
+		std::vector<bool> m_isinverted;
+		std::vector<lmContainer*> m_lm;
+		
+		int               maxlev; //maximun order of sub LMs;
+		
+		float ngramcache_load_factor;
+		float dictionary_load_factor;
+		
+		dictionary *dict; // dictionary for all interpolated LMs
+		
+	public:
+		
+		lmInterpolation(float nlf=0.0, float dlfi=0.0);
+		virtual ~lmInterpolation() {};
+		
+		virtual void load(const std::string &filename,int mmap=0);
+		lmContainer* load_lm(int i, int memmap, float nlf, float dlf);
+		
+		virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
+		//  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+		
+		const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
+		//  const char *cmaxsuffptr(int* codes, int sz, unsigned int* size=NULL);
+		ngram_state_t cmaxsuffidx(ngram ong, unsigned int* size=NULL);
+		//  ngram_state_t cmaxsuffidx(int* codes, int sz, unsigned int* size=NULL);
+		
+		
+		int maxlevel() const {
+			return maxlev;
+		};
+		
+		virtual inline void setDict(dictionary* d) {
+			if (dict) delete dict;
+			dict=d;
+		};
+		
+		virtual inline dictionary* getDict() const {
+			return dict;
+		};
+		
+		//set penalty for OOV words
+		virtual inline double getlogOOVpenalty() const {
+			return logOOVpenalty;
 		}
-		getDict()->incflag(1);
-		int c=getDict()->encode(w);
-		getDict()->incflag(0);
-		return c;
-	}
-	
-};
+		
+		virtual double setlogOOVpenalty(int dub);
+		
+		double inline setlogOOVpenalty(double oovp) {
+			return logOOVpenalty=oovp;
+		}
+		
+		//set the inverted flag (used to set the inverted flag of each subLM, when loading)
+		inline bool is_inverted(const bool flag) {
+			return isInverted = flag;
+		}
+		
+		//for an interpolation LM this variable does not make sense
+		//for compatibility, we return true if all subLM return true
+		inline bool is_inverted() {
+			for (int i=0; i<m_number_lm; i++) {
+				if (m_isinverted[i] == false) return false;
+			}
+			return true;
+		}
+		
+		inline virtual void dictionary_incflag(const bool flag) {
+			dict->incflag(flag);
+		};
+		
+		inline virtual bool is_OOV(int code) { //returns true if the word is OOV for each subLM
+			for (int i=0; i<m_number_lm; i++) {
+				int _code=m_lm[i]->getDict()->encode(getDict()->decode(code));
+				if (m_lm[i]->is_OOV(_code) == false) return false;
+			}
+			return true;
+		}
+		
+		virtual int addWord(const char *w){
+			for (int i=0; i<m_number_lm; i++) {
+				m_lm[i]->getDict()->incflag(1);
+				m_lm[i]->getDict()->encode(w);
+				m_lm[i]->getDict()->incflag(0);
+			}
+			getDict()->incflag(1);
+			int c=getDict()->encode(w);
+			getDict()->incflag(0);
+			return c;
+		}
+		
+	};
 }//namespace irstlm
 
 #endif
diff --git a/src/lmclass.cpp b/src/lmclass.cpp
index 8788a00..b578eb3 100644
--- a/src/lmclass.cpp
+++ b/src/lmclass.cpp
@@ -1,24 +1,24 @@
 // $Id: lmclass.cpp 3631 2010-10-07 12:04:12Z bertoldi $
 
 /******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ 
+ ******************************************************************************/
 #include <stdio.h>
 #include <stdlib.h>
 #include <fcntl.h>
@@ -37,7 +37,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 using namespace std;
 
 // local utilities: start
-	
+
 int parseWords(char *sentence, const char **words, int max);
 
 inline void error(const char* message)
@@ -49,190 +49,190 @@ inline void error(const char* message)
 // local utilities: end
 
 namespace irstlm {
-
-lmclass::lmclass(float nlf, float dlfi):lmtable(nlf,dlfi)
-{
-  MaxMapSize=1000000;
-  MapScore= (double *)malloc(MaxMapSize*sizeof(double));// //array of probabilities
-  memset(MapScore,0,MaxMapSize*sizeof(double));
-  MapScoreN=0;
-  dict = new dictionary((char *)NULL,MaxMapSize); //word to cluster dictionary
-};
-
-lmclass::~lmclass()
-{
-  free (MapScore);
-  delete dict;
-}
-
-void lmclass::load(const std::string &filename,int memmap)
-{
-  VERBOSE(2,"lmclass::load(const std::string &filename,int memmap)" << std::endl);
-
-  //get info from the configuration file
-  fstream inp(filename.c_str(),ios::in|ios::binary);
-
-  char line[MAX_LINE];
-  const char* words[LMCLASS_MAX_TOKEN];
-  int tokenN;
-  inp.getline(line,MAX_LINE,'\n');
-  tokenN = parseWords(line,words,LMCLASS_MAX_TOKEN);
-
-  if (tokenN != 2 || ((strcmp(words[0],"LMCLASS") != 0) && (strcmp(words[0],"lmclass")!=0)))
-    error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCLASS LM_order\nfilename_of_LM\nfilename_of_map");
-
-  maxlev = atoi(words[1]);
-  std::string lmfilename;
-  if (inp.getline(line,MAX_LINE,'\n')) {
-    tokenN = parseWords(line,words,LMCLASS_MAX_TOKEN);
-    lmfilename = words[0];
-  } else {
-    error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCLASS LM_order\nfilename_of_LM\nfilename_of_map");
-  }
-
-  std::string W2Cdict = "";
-  if (inp.getline(line,MAX_LINE,'\n')) {
-    tokenN = parseWords(line,words,LMCLASS_MAX_TOKEN);
-    W2Cdict = words[0];
-  } else {
-    error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCLASS LM_order\nfilename_of_LM\nfilename_of_map");
-  }
-  inp.close();
-
-  std::cerr << "lmfilename:" << lmfilename << std::endl;
-  if (W2Cdict != "") {
-    std::cerr << "mapfilename:" << W2Cdict << std::endl;
-  } else {
-    error((char*)"ERROR: you must specify a map!");
-  }
-
-
-  // Load the (possibly binary) LM
-  inputfilestream inpLM(lmfilename.c_str());
-  if (!inpLM.good()) {
-    std::cerr << "Failed to open " << lmfilename << "!" << std::endl;
-    exit(1);
-  }
-  lmtable::load(inpLM,lmfilename.c_str(),NULL,memmap);
-
-  inputfilestream inW2C(W2Cdict);
-  if (!inW2C.good()) {
-    std::cerr << "Failed to open " << W2Cdict << "!" << std::endl;
-    exit(1);
-  }
-  loadMap(inW2C);
-  getDict()->genoovcode();
-
-  VERBOSE(2,"OOV code of lmclass is " << getDict()->oovcode() << " mapped into " << getMap(getDict()->oovcode())<< "\n");
-  getDict()->incflag(1);
-}
-
-void lmclass::loadMap(istream& inW2C)
-{
-
-  double lprob=0.0;
-  int howmany=0;
-
-  const char* words[1 + LMTMAXLEV + 1 + 1];
-
-  //open input stream and prepare an input string
-  char line[MAX_LINE];
-
-  dict->incflag(1); //can add to the map dictionary
-
-  cerr<<"loadW2Cdict()...\n";
-  //save freq of EOS and BOS
-
-  loadMapElement(dict->BoS(),lmtable::dict->BoS(),0.0);
-  loadMapElement(dict->EoS(),lmtable::dict->EoS(),0.0);
-
-  //should i add <unk> to the dict or just let the trans_freq handle <unk>
-  loadMapElement(dict->OOV(),lmtable::dict->OOV(),0.0);
-
-  while (inW2C.getline(line,MAX_LINE)) {
-    if (strlen(line)==MAX_LINE-1) {
-      cerr << "lmtable::loadW2Cdict: input line exceed MAXLINE ("
-           << MAX_LINE << ") chars " << line << "\n";
-      exit(1);
-    }
-
-    howmany = parseWords(line, words, 4); //3
-
-    if(howmany == 3) {
-      MY_ASSERT(sscanf(words[2], "%lf", &lprob));
-      lprob=(double)log10(lprob);
-    } else if(howmany==2) {
-
-      VERBOSE(3,"No score for the pair (" << words[0] << "," << words[1] << "); set to default 1.0\n");
-
-      lprob=0.0;
-    } else {
-      cerr << "parseline: not enough entries" << line << "\n";
-      exit(1);
-    }
-    loadMapElement(words[0],words[1],lprob);
-
-    //check if the are available position in MapScore
-    checkMap();
-  }
-
-  VERBOSE(2,"There are " << MapScoreN << " entries in the map\n");
-
-  dict->incflag(0); //can NOT add to the dictionary of lmclass
-}
-
-void lmclass::checkMap()
-{
-  if (MapScoreN > MaxMapSize) {
-    MaxMapSize=2*MapScoreN;
-    MapScore = (double*) reallocf(MapScore, sizeof(double)*(MaxMapSize));
-    VERBOSE(2,"In lmclass::checkMap(...) MaxMapSize=" <<  MaxMapSize  << " MapScoreN=" <<  MapScoreN  << "\n");
-  }
-}
-
-void lmclass::loadMapElement(const char* in, const char* out, double sc)
-{
-  //freq of word (in) encodes the ID of the class (out)
-  //save the probability associated with the pair (in,out)
-  int wcode=dict->encode(in);
-  dict->freq(wcode,lmtable::dict->encode(out));
-  MapScore[wcode]=sc;
-  VERBOSE(3,"In lmclass::loadMapElement(...) in=" << in  << " wcode=" <<  wcode << " out=" << out << " ccode=" << lmtable::dict->encode(out) << " MapScoreN=" << MapScoreN  << "\n");
-
-  if (wcode >= MapScoreN) MapScoreN++; //increment size of the array MapScore if the element is new
-}
 	
-//double lmclass::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,bool* extendible)
-double lmclass::lprob(ngram ong,double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr,unsigned int* statesize,bool* extendible)
-{
-  double lpr=getMapScore(*ong.wordp(1));
-
-  VERBOSE(3,"In lmclass::lprob(...) Mapscore    = " <<  lpr  << "\n");
-
-  //convert ong to it's clustered encoding
-  ngram mapped_ng(lmtable::getDict());
-  //  mapped_ng.trans_freq(ong);
-  mapping(ong,mapped_ng);
-
-//  lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffptr,statesize, extendible);
-  lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffidx,maxsuffptr,statesize, extendible);
+	lmclass::lmclass(float nlf, float dlfi):lmtable(nlf,dlfi)
+	{
+		MaxMapSize=1000000;
+		MapScore= (double *)malloc(MaxMapSize*sizeof(double));// //array of probabilities
+		memset(MapScore,0,MaxMapSize*sizeof(double));
+		MapScoreN=0;
+		dict = new dictionary((char *)NULL,MaxMapSize); //word to cluster dictionary
+	};
 	
-  VERBOSE(3,"In lmclass::lprob(...) global prob  = " <<  lpr  << "\n");
-  return lpr;
-}
-
-void lmclass::mapping(ngram &in, ngram &out)
-{
-  int insize = in.size;
-  VERBOSE(3,"In lmclass::mapping(ngram &in, ngram &out) in    = " <<  in  << "\n");
-
-  // map the input sequence (in) into the corresponding output sequence (out), by applying the provided map
-  for (int i=insize; i>0; i--) {
-    out.pushc(getMap(*in.wordp(i)));
-  }
-
-  VERBOSE(3,"In lmclass::mapping(ngram &in, ngram &out) out    = " <<  out  << "\n");
-  return;
-}
+	lmclass::~lmclass()
+	{
+		free (MapScore);
+		delete dict;
+	}
+	
+	void lmclass::load(const std::string &filename,int memmap)
+	{
+		VERBOSE(2,"lmclass::load(const std::string &filename,int memmap)" << std::endl);
+		
+		//get info from the configuration file
+		fstream inp(filename.c_str(),ios::in|ios::binary);
+		
+		char line[MAX_LINE];
+		const char* words[LMCLASS_MAX_TOKEN];
+		int tokenN;
+		inp.getline(line,MAX_LINE,'\n');
+		tokenN = parseWords(line,words,LMCLASS_MAX_TOKEN);
+		
+		if (tokenN != 2 || ((strcmp(words[0],"LMCLASS") != 0) && (strcmp(words[0],"lmclass")!=0)))
+			error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCLASS LM_order\nfilename_of_LM\nfilename_of_map");
+		
+		maxlev = atoi(words[1]);
+		std::string lmfilename;
+		if (inp.getline(line,MAX_LINE,'\n')) {
+			tokenN = parseWords(line,words,LMCLASS_MAX_TOKEN);
+			lmfilename = words[0];
+		} else {
+			error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCLASS LM_order\nfilename_of_LM\nfilename_of_map");
+		}
+		
+		std::string W2Cdict = "";
+		if (inp.getline(line,MAX_LINE,'\n')) {
+			tokenN = parseWords(line,words,LMCLASS_MAX_TOKEN);
+			W2Cdict = words[0];
+		} else {
+			error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMCLASS LM_order\nfilename_of_LM\nfilename_of_map");
+		}
+		inp.close();
+		
+		std::cerr << "lmfilename:" << lmfilename << std::endl;
+		if (W2Cdict != "") {
+			std::cerr << "mapfilename:" << W2Cdict << std::endl;
+		} else {
+			error((char*)"ERROR: you must specify a map!");
+		}
+		
+		
+		// Load the (possibly binary) LM
+		inputfilestream inpLM(lmfilename.c_str());
+		if (!inpLM.good()) {
+			std::cerr << "Failed to open " << lmfilename << "!" << std::endl;
+			exit(1);
+		}
+		lmtable::load(inpLM,lmfilename.c_str(),NULL,memmap);
+		
+		inputfilestream inW2C(W2Cdict);
+		if (!inW2C.good()) {
+			std::cerr << "Failed to open " << W2Cdict << "!" << std::endl;
+			exit(1);
+		}
+		loadMap(inW2C);
+		getDict()->genoovcode();
+		
+		VERBOSE(2,"OOV code of lmclass is " << getDict()->oovcode() << " mapped into " << getMap(getDict()->oovcode())<< "\n");
+		getDict()->incflag(1);
+	}
+	
+	void lmclass::loadMap(istream& inW2C)
+	{
+		
+		double lprob=0.0;
+		int howmany=0;
+		
+		const char* words[1 + LMTMAXLEV + 1 + 1];
+		
+		//open input stream and prepare an input string
+		char line[MAX_LINE];
+		
+		dict->incflag(1); //can add to the map dictionary
+		
+		cerr<<"loadW2Cdict()...\n";
+		//save freq of EOS and BOS
+		
+		loadMapElement(dict->BoS(),lmtable::dict->BoS(),0.0);
+		loadMapElement(dict->EoS(),lmtable::dict->EoS(),0.0);
+		
+		//should i add <unk> to the dict or just let the trans_freq handle <unk>
+		loadMapElement(dict->OOV(),lmtable::dict->OOV(),0.0);
+		
+		while (inW2C.getline(line,MAX_LINE)) {
+			if (strlen(line)==MAX_LINE-1) {
+				cerr << "lmtable::loadW2Cdict: input line exceed MAXLINE ("
+				<< MAX_LINE << ") chars " << line << "\n";
+				exit(1);
+			}
+			
+			howmany = parseWords(line, words, 4); //3
+			
+			if(howmany == 3) {
+				MY_ASSERT(sscanf(words[2], "%lf", &lprob));
+				lprob=(double)log10(lprob);
+			} else if(howmany==2) {
+				
+				VERBOSE(3,"No score for the pair (" << words[0] << "," << words[1] << "); set to default 1.0\n");
+				
+				lprob=0.0;
+			} else {
+				cerr << "parseline: not enough entries" << line << "\n";
+				exit(1);
+			}
+			loadMapElement(words[0],words[1],lprob);
+			
+			//check if the are available position in MapScore
+			checkMap();
+		}
+		
+		VERBOSE(2,"There are " << MapScoreN << " entries in the map\n");
+		
+		dict->incflag(0); //can NOT add to the dictionary of lmclass
+	}
+	
+	void lmclass::checkMap()
+	{
+		if (MapScoreN > MaxMapSize) {
+			MaxMapSize=2*MapScoreN;
+			MapScore = (double*) reallocf(MapScore, sizeof(double)*(MaxMapSize));
+			VERBOSE(2,"In lmclass::checkMap(...) MaxMapSize=" <<  MaxMapSize  << " MapScoreN=" <<  MapScoreN  << "\n");
+		}
+	}
+	
+	void lmclass::loadMapElement(const char* in, const char* out, double sc)
+	{
+		//freq of word (in) encodes the ID of the class (out)
+		//save the probability associated with the pair (in,out)
+		int wcode=dict->encode(in);
+		dict->freq(wcode,lmtable::dict->encode(out));
+		MapScore[wcode]=sc;
+		VERBOSE(3,"In lmclass::loadMapElement(...) in=" << in  << " wcode=" <<  wcode << " out=" << out << " ccode=" << lmtable::dict->encode(out) << " MapScoreN=" << MapScoreN  << "\n");
+		
+		if (wcode >= MapScoreN) MapScoreN++; //increment size of the array MapScore if the element is new
+	}
+	
+	//double lmclass::lprob(ngram ong,double* bow, int* bol, char** maxsuffptr,unsigned int* statesize,bool* extendible)
+	double lmclass::lprob(ngram ong,double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow)
+	{
+		double lpr=getMapScore(*ong.wordp(1));
+		
+		VERBOSE(3,"In lmclass::lprob(...) Mapscore    = " <<  lpr  << "\n");
+		
+		//convert ong to it's clustered encoding
+		ngram mapped_ng(lmtable::getDict());
+		//  mapped_ng.trans_freq(ong);
+		mapping(ong,mapped_ng);
+		
+		//  lpr+=lmtable::clprob(mapped_ng,bow,bol,maxsuffptr,statesize, extendible);
+		lpr+=lmtable::clprob(mapped_ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, lastbow);
+		
+		VERBOSE(3,"In lmclass::lprob(...) global prob  = " <<  lpr  << "\n");
+		return lpr;
+	}
+	
+	void lmclass::mapping(ngram &in, ngram &out)
+	{
+		int insize = in.size;
+		VERBOSE(3,"In lmclass::mapping(ngram &in, ngram &out) in    = " <<  in  << "\n");
+		
+		// map the input sequence (in) into the corresponding output sequence (out), by applying the provided map
+		for (int i=insize; i>0; i--) {
+			out.pushc(getMap(*in.wordp(i)));
+		}
+		
+		VERBOSE(3,"In lmclass::mapping(ngram &in, ngram &out) out    = " <<  out  << "\n");
+		return;
+	}
 }//namespace irstlm
 
diff --git a/src/lmclass.h b/src/lmclass.h
index 3ab299f..a22164d 100644
--- a/src/lmclass.h
+++ b/src/lmclass.h
@@ -1,24 +1,24 @@
 // $Id: lmclass.h 3461 2010-08-27 10:17:34Z bertoldi $
 
 /******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ 
+ ******************************************************************************/
 
 
 #ifndef MF_LMCLASS_H
@@ -34,79 +34,76 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 #include "dictionary.h"
 #include "n_gram.h"
 #include "lmtable.h"
-	
+
 #define LMCLASS_MAX_TOKEN 2
 
 namespace irstlm {
-class lmclass: public lmtable
-{
-  dictionary     *dict; // dictionary (words - macro tags)
-  double *MapScore;
-  int MapScoreN;
-  int MaxMapSize;
-
-protected:
-  void loadMap(std::istream& inp);
-  void loadMapElement(const char* in, const char* out, double sc);
-  void mapping(ngram &in, ngram &out);
-
-  inline double getMapScore(int wcode) {
-//the input word is un-known by the map, so I "transform" this word into the oov (of the words)
-    if (wcode >= MapScoreN) {
-      wcode = getDict()->oovcode();
-    }
-    return MapScore[wcode];
-  };
-
-  inline size_t getMap(int wcode) {
-//the input word is un-known by the map, so I "transform" this word into the oov (of the words)
-    if (wcode >= MapScoreN) {
-      wcode = getDict()->oovcode();
-    }
-    return dict->freq(wcode);
-  };
-
-  void checkMap();
-
-public:
-  lmclass(float nlf=0.0, float dlfi=0.0);
-
-  ~lmclass();
-
-  void load(const std::string &filename,int mmap=0);
-
-  double lprob(ngram ng, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
-		return lprob(ng,bow,bol,NULL,maxsuffptr,statesize,extendible);
-  };
-  double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
-    return clprob(ng,bow,bol,NULL,maxsuffptr,statesize,extendible);
-  };
-  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
-    return clprob(ng,ngsize,bow,bol,NULL,maxsuffptr,statesize,extendible);
-  };
-
-  double lprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-  double clprob(ngram ng,double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
-    return lprob(ng,bow,bol,maxsuffidx,maxsuffptr,statesize,extendible);
-  };
-  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
-    ngram ong(getDict());
-    ong.pushc(ng,ngsize);
-    return lprob(ong,bow,bol,maxsuffidx,maxsuffptr,statesize,extendible);
-  };
-
-  inline bool is_OOV(int code) {
-    //a word is consisdered OOV if its mapped value is OOV
-    return lmtable::is_OOV(getMap(code));
-  };
-
-  inline dictionary* getDict() const {
-    return dict;
-  }
-  inline virtual void dictionary_incflag(const bool flag) {
-    dict->incflag(flag);
-  };
-};
+	class lmclass: public lmtable
+	{
+		dictionary     *dict; // dictionary (words - macro tags)
+		double *MapScore;
+		int MapScoreN;
+		int MaxMapSize;
+		
+	protected:
+		void loadMap(std::istream& inp);
+		void loadMapElement(const char* in, const char* out, double sc);
+		void mapping(ngram &in, ngram &out);
+		
+		inline double getMapScore(int wcode) {
+			//the input word is un-known by the map, so I "transform" this word into the oov (of the words)
+			if (wcode >= MapScoreN) {
+				wcode = getDict()->oovcode();
+			}
+			return MapScore[wcode];
+		};
+		
+		inline size_t getMap(int wcode) {
+			//the input word is un-known by the map, so I "transform" this word into the oov (of the words)
+			if (wcode >= MapScoreN) {
+				wcode = getDict()->oovcode();
+			}
+			return dict->freq(wcode);
+		};
+		
+		void checkMap();
+		
+	public:
+		lmclass(float nlf=0.0, float dlfi=0.0);
+		
+		~lmclass();
+		
+		virtual void load(const std::string &filename,int mmap=0);
+		
+		
+		//  virtual double lprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL) { return lprob(ng,bow,bol,NULL,maxsuffptr,statesize,extendible,lastbow); };
+		//  virtual double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) { return clprob(ng,bow,bol,NULL,maxsuffptr,statesize,extendible); };
+		
+		//  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) { return clprob(ng,ngsize,bow,bol,NULL,maxsuffptr,statesize,extendible); };
+		
+		double lprob(ngram ng, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL, double* lastbow=NULL);
+		double clprob(ngram ng,double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL, double* lastbow=NULL) {
+			return lprob(ng,bow,bol,maxsuffidx,maxsuffptr,statesize,extendible,lastbow);
+		};
+		/*
+		 double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL) {
+		 ngram ong(getDict());
+		 ong.pushc(ng,ngsize);
+		 return lprob(ong,bow,bol,maxsuffidx,maxsuffptr,statesize,extendible);
+		 };
+		 */
+		inline bool is_OOV(int code) {
+			//a word is consisdered OOV if its mapped value is OOV
+			return lmtable::is_OOV(getMap(code));
+		};
+		
+		inline dictionary* getDict() const {
+			return dict;
+		}
+		inline virtual void dictionary_incflag(const bool flag) {
+			dict->incflag(flag);
+		};
+	};
 	
 }//namespace irstlm
 
diff --git a/src/lmmacro.cpp b/src/lmmacro.cpp
index 66c7063..0f64477 100644
--- a/src/lmmacro.cpp
+++ b/src/lmmacro.cpp
@@ -1,24 +1,24 @@
 // $Id: lmmacro.cpp 3631 2010-10-07 12:04:12Z bertoldi $
 
 /******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ 
+ ******************************************************************************/
 #include <stdio.h>
 #include <stdlib.h>
 #include <fcntl.h>
@@ -36,7 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 #include "util.h"
 
 using namespace std;
-	
+
 // local utilities: start
 
 inline void error(const char* message)
@@ -50,509 +50,265 @@ inline void error(const char* message)
 
 namespace irstlm {
 	
-lmmacro::lmmacro(float nlf, float dlfi):lmtable(nlf,dlfi)
-{
-  dict = new dictionary((char *)NULL,1000000); // dict of micro tags
-  getDict()->incflag(1);
-};
-
-lmmacro::~lmmacro()
-{
-  if (mapFlag) unloadmap();
-}
-
-
-void lmmacro::load(const std::string &filename,int memmap)
-{
-  VERBOSE(2,"lmmacro::load(const std::string &filename,int memmap)" << std::endl);
-
-  //get info from the configuration file
-  fstream inp(filename.c_str(),ios::in|ios::binary);
-
-  char line[MAX_LINE];
-  const char* words[MAX_TOKEN_N_MAP];
-  int tokenN;
-  inp.getline(line,MAX_LINE,'\n');
-  tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
-
-  if (tokenN != 4 || ((strcmp(words[0],"LMMACRO") != 0) && (strcmp(words[0],"lmmacro")!=0)))
-    error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMMACRO lmsize field [true|false]\nfilename_of_LM\nfilename_of_map (optional)");
-  maxlev = atoi(words[1]);
-  selectedField = atoi(words[2]);
-
-  if ((strcmp(words[3],"TRUE") == 0) || (strcmp(words[3],"true") == 0))
-    collapseFlag = true;
-  else if ((strcmp(words[3],"FALSE") == 0) || (strcmp(words[3],"false") == 0))
-    collapseFlag = false;
-  else
-    error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMMACRO lmsize field [true|false]\nfilename_of_LM\nfilename_of_map (optional)");
-
+	lmmacro::lmmacro(float nlf, float dlfi):lmtable(nlf,dlfi)
+	{
+		dict = new dictionary((char *)NULL,1000000); // dict of micro tags
+		getDict()->incflag(1);
+	};
+	
+	lmmacro::~lmmacro()
+	{
+		if (mapFlag) unloadmap();
+	}
+	
+	
+	void lmmacro::load(const std::string &filename,int memmap)
+	{
+		VERBOSE(2,"lmmacro::load(const std::string &filename,int memmap)" << std::endl);
+		
+		//get info from the configuration file
+		fstream inp(filename.c_str(),ios::in|ios::binary);
+		
+		char line[MAX_LINE];
+		const char* words[MAX_TOKEN_N_MAP];
+		int tokenN;
+		inp.getline(line,MAX_LINE,'\n');
+		tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
+		
+		if (tokenN != 4 || ((strcmp(words[0],"LMMACRO") != 0) && (strcmp(words[0],"lmmacro")!=0)))
+			error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMMACRO lmsize field [true|false]\nfilename_of_LM\nfilename_of_map (optional)");
+		maxlev = atoi(words[1]);
+		selectedField = atoi(words[2]);
+		
+		if ((strcmp(words[3],"TRUE") == 0) || (strcmp(words[3],"true") == 0))
+			collapseFlag = true;
+		else if ((strcmp(words[3],"FALSE") == 0) || (strcmp(words[3],"false") == 0))
+			collapseFlag = false;
+		else
+			error((char*)"ERROR: wrong header format of configuration file\ncorrect format: LMMACRO lmsize field [true|false]\nfilename_of_LM\nfilename_of_map (optional)");
+		
 #ifdef DLEXICALLM
-  selectedFieldForLexicon = atoi(words[3]);
-  collapseFlag = atoi(words[4]);
+		selectedFieldForLexicon = atoi(words[3]);
+		collapseFlag = atoi(words[4]);
 #endif
-
-  if (selectedField == -1)
-    cerr << "no selected field: the whole string is used" << std::endl;
-  else
-    cerr << "selected field n. " << selectedField << std::endl;
-  if (collapseFlag)
-    cerr << "collapse is enabled" << std::endl;
-  else
-    cerr << "collapse is disabled" << std::endl;
-
-
-  std::string lmfilename;
-  if (inp.getline(line,MAX_LINE,'\n')) {
-    tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
-    lmfilename = words[0];
-  } else
-    error((char*)"ERROR: wrong format of configuration file\ncorrect format: LMMACRO lmsize field [true|false]\nfilename_of_LM\nfilename_of_map (optional)");
-
-  std::string mapfilename = "";
-  if (inp.getline(line,MAX_LINE,'\n')) {
-    tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
-    mapfilename = words[0];
-    mapFlag = true;
-  } else {
-    mapFlag = false;
-  }
-
-  inp.close();
-
-
-  std::cerr << "lmfilename:" << lmfilename << std::endl;
-  if (mapfilename != "") {
-    std::cerr << "mapfilename:" << mapfilename << std::endl;
-  } else {
-    std::cerr << "no mapfilename" << std::endl;
-    mapFlag = false;
-  }
-
-  //allow the dictionary to add new words
-  getDict()->incflag(1);
-
-
-  if ((!mapFlag) && (collapseFlag)) {
-    error((char*)"ERROR: you must specify a map if you want to collapse a specific field!");
-  }
+		
+		if (selectedField == -1)
+			cerr << "no selected field: the whole string is used" << std::endl;
+		else
+			cerr << "selected field n. " << selectedField << std::endl;
+		if (collapseFlag)
+			cerr << "collapse is enabled" << std::endl;
+		else
+			cerr << "collapse is disabled" << std::endl;
+		
+		
+		std::string lmfilename;
+		if (inp.getline(line,MAX_LINE,'\n')) {
+			tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
+			lmfilename = words[0];
+		} else
+			error((char*)"ERROR: wrong format of configuration file\ncorrect format: LMMACRO lmsize field [true|false]\nfilename_of_LM\nfilename_of_map (optional)");
+		
+		std::string mapfilename = "";
+		if (inp.getline(line,MAX_LINE,'\n')) {
+			tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
+			mapfilename = words[0];
+			mapFlag = true;
+		} else {
+			mapFlag = false;
+		}
+		
+		inp.close();
+		
+		
+		std::cerr << "lmfilename:" << lmfilename << std::endl;
+		if (mapfilename != "") {
+			std::cerr << "mapfilename:" << mapfilename << std::endl;
+		} else {
+			std::cerr << "no mapfilename" << std::endl;
+			mapFlag = false;
+		}
+		
+		//allow the dictionary to add new words
+		getDict()->incflag(1);
+		
+		
+		if ((!mapFlag) && (collapseFlag)) {
+			error((char*)"ERROR: you must specify a map if you want to collapse a specific field!");
+		}
 #ifdef DLEXICALLM
-
-  std::string lexicalclassesfilename = words[2];
-  if (lexicalclassesfilename != "NULL" && lexicalclassesfilename != "null") lexicalclassesfilename = "";
-
-  if (lexicalclassesfilename != "") std::cerr << "lexicalclassesfilename:" << lexicalclassesfilename << std::endl;
-  else std::cerr << "no lexicalclassesfilename" << std::endl;
-
-  // Load the classes of lexicalization tokens:
-  if (lexicalclassesfilename != "")    loadLexicalClasses(lexicalclassesfilename.c_str());
+		
+		std::string lexicalclassesfilename = words[2];
+		if (lexicalclassesfilename != "NULL" && lexicalclassesfilename != "null") lexicalclassesfilename = "";
+		
+		if (lexicalclassesfilename != "") std::cerr << "lexicalclassesfilename:" << lexicalclassesfilename << std::endl;
+		else std::cerr << "no lexicalclassesfilename" << std::endl;
+		
+		// Load the classes of lexicalization tokens:
+		if (lexicalclassesfilename != "")    loadLexicalClasses(lexicalclassesfilename.c_str());
 #endif
-
-  // Load the (possibly binary) LM
-  lmtable::load(lmfilename,memmap);
-
-  getDict()->incflag(1);
+		
+		// Load the (possibly binary) LM
+		lmtable::load(lmfilename,memmap);
+		
+		getDict()->incflag(1);
+		
+		if (mapFlag)
+			loadmap(mapfilename);
+		getDict()->genoovcode();
+		
+	};
 	
-  if (mapFlag)
-    loadmap(mapfilename);
-  getDict()->genoovcode();
-
-};
-
-void lmmacro::unloadmap()
-{
-  delete dict;
-  free(microMacroMap);
-  if (collapseFlag) {
-    free(collapsableMap);
-    free(collapsatorMap);
-  }
+	void lmmacro::unloadmap()
+	{
+		delete dict;
+		free(microMacroMap);
+		if (collapseFlag) {
+			free(collapsableMap);
+			free(collapsatorMap);
+		}
 #ifdef DLEXICALLM
-  free(lexicaltoken2classMap);
+		free(lexicaltoken2classMap);
 #endif
-}
-
-void lmmacro::loadmap(const std::string mapfilename)
-{
-  microMacroMapN = 0;
-  microMacroMap = NULL;
-  collapsableMap = NULL;
-  collapsatorMap = NULL;
-
+	}
+	
+	void lmmacro::loadmap(const std::string mapfilename)
+	{
+		microMacroMapN = 0;
+		microMacroMap = NULL;
+		collapsableMap = NULL;
+		collapsatorMap = NULL;
+		
 #ifdef DLEXICALLM
-  lexicaltoken2classMap = NULL;
-  lexicaltoken2classMapN = 0;
+		lexicaltoken2classMap = NULL;
+		lexicaltoken2classMapN = 0;
 #endif
-
-  microMacroMap = (int *)calloc(BUFSIZ, sizeof(int));
-  if (collapseFlag) {
-    collapsableMap = (bool *)calloc(BUFSIZ, sizeof(bool));
-    collapsatorMap = (bool *)calloc(BUFSIZ, sizeof(bool));
-  }
-
-	
-  getDict()->genoovcode();
-	microMacroMap[microMacroMapN] = lmtable::getDict()->oovcode();
-	MY_ASSERT(microMacroMapN == getDict()->oovcode());
-	microMacroMapN++;
-	
-	
-  if (lmtable::getDict()->getcode(BOS_)==-1) {
-    lmtable::getDict()->incflag(1);
-    lmtable::getDict()->encode(BOS_);
-    lmtable::getDict()->incflag(0);
-  }
-
-  if (lmtable::getDict()->getcode(EOS_)==-1) {
-    lmtable::getDict()->incflag(1);
-    lmtable::getDict()->encode(EOS_);
-    lmtable::getDict()->incflag(0);
-  }
-
-  char line[MAX_LINE];
-  const char* words[MAX_TOKEN_N_MAP];
-  const char *macroW;
-  const char *microW;
-  int tokenN;
-  bool bos=false,eos=false;
-
-  // Load the dictionary of micro tags (to be put in "dict" of lmmacro class):
-  inputfilestream inpMap(mapfilename.c_str());
-  std::cerr << "Reading map " << mapfilename << "..." << std::endl;
-  while (inpMap.getline(line,MAX_LINE,'\n')) {
-    tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
-    if (tokenN != 2)
-      error((char*)"ERROR: wrong format of map file\n");
-    microW = words[0];
-    macroW = words[1];
-    int microW_c=getDict()->encode(microW);
-    VERBOSE(4, "microW gets the code:" << microW_c << std::endl);
-		
-    if (microMacroMapN>0 && !(microMacroMapN % BUFSIZ)) {
-      microMacroMap = (int *)reallocf(microMacroMap, sizeof(int)*(BUFSIZ*(1+microMacroMapN/BUFSIZ)));
-      if (collapseFlag) {
-        //create supporting info for collapse
-
-        collapsableMap = (bool *)reallocf(collapsableMap, sizeof(bool)*(BUFSIZ*(1+microMacroMapN/BUFSIZ)));
-        collapsatorMap = (bool *)reallocf(collapsatorMap, sizeof(bool)*(BUFSIZ*(1+microMacroMapN/BUFSIZ)));
-      }
-    }
-    microMacroMap[microMacroMapN] = lmtable::getDict()->getcode(macroW);
 		
-    if (collapseFlag) {
-
-      int len = strlen(microW)-1;
-      if (microW[len] == '(') {
-        collapsableMap[microMacroMapN] = false;
-        collapsatorMap[microMacroMapN] = true;
-      } else if (microW[len] == ')') {
-        collapsableMap[microMacroMapN] = true;
-        collapsatorMap[microMacroMapN] = false;
-      } else if (microW[len] == '+') {
-        collapsableMap[microMacroMapN] = true;
-        collapsatorMap[microMacroMapN] = true;
-      } else {
-        collapsableMap[microMacroMapN] = false;
-        collapsatorMap[microMacroMapN] = false;
-      }
-    }
-
-    if (!bos && !strcmp(microW,BOS_)) bos=true;
-    if (!eos && !strcmp(microW,EOS_)) eos=true;
-
-    VERBOSE(2,"\nmicroW = " << microW << "\n"
-            << "macroW = " << macroW << "\n"
-            << "microMacroMapN = " << microMacroMapN << "\n"
-            << "code of micro = " <<  getDict()->getcode(microW) << "\n"
-            << "code of macro = " <<  lmtable::getDict()->getcode(macroW) << "\n");
-
-    microMacroMapN++;
-  }
-
-  if ((microMacroMapN == 0) && (selectedField == -1))
-    error((char*)"ERROR: with no field selection, a map for the whole string is mandatory\n");
-
-  if (microMacroMapN>0) {
-    // Add <s>-><s> to map if missing
-    if (!bos) {
-      getDict()->encode(BOS_);
-      if (microMacroMapN && !(microMacroMapN%BUFSIZ))
-        microMacroMap = (int *)reallocf(microMacroMap, sizeof(int)*(microMacroMapN+BUFSIZ));
-      microMacroMap[microMacroMapN++] = lmtable::getDict()->getcode(BOS_);
-    }
-
-    // Add </s>-></s> to map if missing
-    if (!eos) {
-      getDict()->encode(EOS_);
-      if (microMacroMapN && !(microMacroMapN%BUFSIZ))
-        microMacroMap = (int *)reallocf(microMacroMap, sizeof(int)*(microMacroMapN+BUFSIZ));
-      microMacroMap[microMacroMapN++] = lmtable::getDict()->getcode(EOS_);
-    }
-  }
-  //  getDict()->incflag(0);
-
-  VERBOSE(2,"oovcode(micro)=" <<  getDict()->oovcode() << "\n"
-          << "oovcode(macro)=" <<  lmtable::getDict()->oovcode() << "\n"
-          << "microMacroMapN = " << microMacroMapN << "\n"
-          << "macrodictsize  = " << getDict()->size() << "\n"
-          << "microdictsize  = " << lmtable::getDict()->size() << "\n");
-
-  IFVERBOSE(2) {
-    for (int i=0; i<microMacroMapN; i++) {
-      VERBOSE(2,"micro[" << getDict()->decode(i) << "] {"<< i << "} -> " << lmtable::getDict()->decode(microMacroMap[i]) << " {" << microMacroMap[i]<< "}" << "\n");
-    }
-  }
-  std::cerr << "...done\n";
-}
-
-
-double lmmacro::lprob(ngram micro_ng)
-{
-  VERBOSE(2,"lmmacro::lprob, parameter = <" <<  micro_ng << ">\n");
-
-  ngram macro_ng(lmtable::getDict());
-
-  if (micro_ng.dict ==  macro_ng.dict)
-    macro_ng.trans(micro_ng);  // micro to macro mapping already done
-  else
-    map(&micro_ng, &macro_ng); // mapping required
-
-  VERBOSE(3,"lmmacro::lprob: micro_ng = " << micro_ng << "\n"
-          <<  "lmmacro::lprob: macro_ng = " << macro_ng << "\n");
-
-  // ask LM with macro
-  double prob;
-  prob = lmtable::lprob(macro_ng);
-  VERBOSE(3,"prob = " << prob << "\n");
-
-  return prob;
-};
+		microMacroMap = (int *)calloc(BUFSIZ, sizeof(int));
+		if (collapseFlag) {
+			collapsableMap = (bool *)calloc(BUFSIZ, sizeof(bool));
+			collapsatorMap = (bool *)calloc(BUFSIZ, sizeof(bool));
+		}
+		
+		
+		getDict()->genoovcode();
+		microMacroMap[microMacroMapN] = lmtable::getDict()->oovcode();
+		MY_ASSERT(microMacroMapN == getDict()->oovcode());
+		microMacroMapN++;
+		
+		
+		if (lmtable::getDict()->getcode(BOS_)==-1) {
+			lmtable::getDict()->incflag(1);
+			lmtable::getDict()->encode(BOS_);
+			lmtable::getDict()->incflag(0);
+		}
+		
+		if (lmtable::getDict()->getcode(EOS_)==-1) {
+			lmtable::getDict()->incflag(1);
+			lmtable::getDict()->encode(EOS_);
+			lmtable::getDict()->incflag(0);
+		}
+		
+		char line[MAX_LINE];
+		const char* words[MAX_TOKEN_N_MAP];
+		const char *macroW;
+		const char *microW;
+		int tokenN;
+		bool bos=false,eos=false;
+		
+		// Load the dictionary of micro tags (to be put in "dict" of lmmacro class):
+		inputfilestream inpMap(mapfilename.c_str());
+		std::cerr << "Reading map " << mapfilename << "..." << std::endl;
+		while (inpMap.getline(line,MAX_LINE,'\n')) {
+			tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
+			if (tokenN != 2)
+				error((char*)"ERROR: wrong format of map file\n");
+			microW = words[0];
+			macroW = words[1];
+			int microW_c=getDict()->encode(microW);
+			VERBOSE(4, "microW gets the code:" << microW_c << std::endl);
+			
+			if (microMacroMapN>0 && !(microMacroMapN % BUFSIZ)) {
+				microMacroMap = (int *)reallocf(microMacroMap, sizeof(int)*(BUFSIZ*(1+microMacroMapN/BUFSIZ)));
+				if (collapseFlag) {
+					//create supporting info for collapse
+					
+					collapsableMap = (bool *)reallocf(collapsableMap, sizeof(bool)*(BUFSIZ*(1+microMacroMapN/BUFSIZ)));
+					collapsatorMap = (bool *)reallocf(collapsatorMap, sizeof(bool)*(BUFSIZ*(1+microMacroMapN/BUFSIZ)));
+				}
+			}
+			microMacroMap[microMacroMapN] = lmtable::getDict()->getcode(macroW);
+			
+			if (collapseFlag) {
+				
+				int len = strlen(microW)-1;
+				if (microW[len] == '(') {
+					collapsableMap[microMacroMapN] = false;
+					collapsatorMap[microMacroMapN] = true;
+				} else if (microW[len] == ')') {
+					collapsableMap[microMacroMapN] = true;
+					collapsatorMap[microMacroMapN] = false;
+				} else if (microW[len] == '+') {
+					collapsableMap[microMacroMapN] = true;
+					collapsatorMap[microMacroMapN] = true;
+				} else {
+					collapsableMap[microMacroMapN] = false;
+					collapsatorMap[microMacroMapN] = false;
+				}
+			}
+			
+			if (!bos && !strcmp(microW,BOS_)) bos=true;
+			if (!eos && !strcmp(microW,EOS_)) eos=true;
+			
+			VERBOSE(2,"\nmicroW = " << microW << "\n"
+							<< "macroW = " << macroW << "\n"
+							<< "microMacroMapN = " << microMacroMapN << "\n"
+							<< "code of micro = " <<  getDict()->getcode(microW) << "\n"
+							<< "code of macro = " <<  lmtable::getDict()->getcode(macroW) << "\n");
+			
+			microMacroMapN++;
+		}
+		
+		if ((microMacroMapN == 0) && (selectedField == -1))
+			error((char*)"ERROR: with no field selection, a map for the whole string is mandatory\n");
+		
+		if (microMacroMapN>0) {
+			// Add <s>-><s> to map if missing
+			if (!bos) {
+				getDict()->encode(BOS_);
+				if (microMacroMapN && !(microMacroMapN%BUFSIZ))
+					microMacroMap = (int *)reallocf(microMacroMap, sizeof(int)*(microMacroMapN+BUFSIZ));
+				microMacroMap[microMacroMapN++] = lmtable::getDict()->getcode(BOS_);
+			}
+			
+			// Add </s>-></s> to map if missing
+			if (!eos) {
+				getDict()->encode(EOS_);
+				if (microMacroMapN && !(microMacroMapN%BUFSIZ))
+					microMacroMap = (int *)reallocf(microMacroMap, sizeof(int)*(microMacroMapN+BUFSIZ));
+				microMacroMap[microMacroMapN++] = lmtable::getDict()->getcode(EOS_);
+			}
+		}
+		//  getDict()->incflag(0);
+		
+		VERBOSE(2,"oovcode(micro)=" <<  getDict()->oovcode() << "\n"
+						<< "oovcode(macro)=" <<  lmtable::getDict()->oovcode() << "\n"
+						<< "microMacroMapN = " << microMacroMapN << "\n"
+						<< "macrodictsize  = " << getDict()->size() << "\n"
+						<< "microdictsize  = " << lmtable::getDict()->size() << "\n");
+		
+		IFVERBOSE(2) {
+			for (int i=0; i<microMacroMapN; i++) {
+				VERBOSE(2,"micro[" << getDict()->decode(i) << "] {"<< i << "} -> " << lmtable::getDict()->decode(microMacroMap[i]) << " {" << microMacroMap[i]<< "}" << "\n");
+			}
+		}
+		std::cerr << "...done\n";
+	}
 	
-//double lmmacro::clprob(int* codes, int sz,  double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
-double lmmacro::clprob(int* codes, int sz,  double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
-{
-  ngram micro_ng(getDict());
-  micro_ng.pushc(codes,sz);
-//  return clprob(micro_ng,bow,bol,state,statesize,extendible);
-  return clprob(micro_ng,bow,bol,ngramstate,state,statesize,extendible);
-}
-
-//	double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
-double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
-{
-
-  VERBOSE(3," lmmacro::clprob(ngram), parameter = <" <<  micro_ng << ">\n");
-
-  ngram transformed_ng(lmtable::getDict());
-  bool collapsed = transform(micro_ng, transformed_ng);
-  VERBOSE(3,"lmmacro::clprob(ngram), transformed_ng = <" <<  transformed_ng << ">\n");
-
-  double logpr;
-
-  if (collapsed) {
-    // the last token of the ngram continues an already open "chunk"
-    // the probability at chunk-level is not computed because it has been already computed when the actual"chunk" opens
-    VERBOSE(3,"  SKIPPED call to lmtable::clprob because of collapse; logpr: 0.0\n");
-    logpr = 0.0;
-  } else {
-    VERBOSE(3,"  QUERY MACRO LM on (after transformation and size reduction) " << transformed_ng << "\n");
-//    logpr = lmtable::clprob(transformed_ng, bow, bol, state, statesize, extendible);
-    logpr = lmtable::clprob(transformed_ng, bow, bol, ngramstate, state, statesize, extendible);
-  }
-  VERBOSE(3,"  GET logpr: " << logpr << "\n");
-
-  return logpr;
-}
-
-bool lmmacro::transform(ngram &in, ngram &out)
-{
-  VERBOSE(3,"lmmacro::transform(ngram &in, ngram &out), in = <" <<  in << ">\n");
-
-  //step 1: selection of the correct field
-  ngram field_ng(getDict());
-  if (selectedField >= 0)
-    field_selection(in, field_ng);
-  else
-    field_ng = in;
-
-  //step 2: collapsing
-  ngram collapsed_ng(getDict());
-  bool collapsed = false;
-  if (collapseFlag)
-    collapsed = collapse(field_ng, collapsed_ng);
-  else
-    collapsed_ng = field_ng;
-
-  //step 3: mapping using the loaded map
-  if (mapFlag)
-    mapping(collapsed_ng, out);
-  else
-    out.trans(collapsed_ng);
-
-  if (out.size>lmtable::maxlevel()) out.size=lmtable::maxlevel();
-
-  VERBOSE(3,"lmmacro::transform(ngram &in, ngram &out), out = <" <<  out << ">\n");
-  return collapsed;
-}
-
-
-
-void lmmacro::field_selection(ngram &in, ngram &out)
-{
-  VERBOSE(3,"In lmmacro::field_selection(ngram &in, ngram &out) in    = " <<  in  << "\n");
-
-  int microsize = in.size;
-
-  for (int i=microsize; i>0; i--) {
-
-    char curr_token[BUFSIZ];
-    strcpy(curr_token, getDict()->decode(*in.wordp(i)));
-    char *field;
-    if (strcmp(curr_token,"<s>") &&
-        strcmp(curr_token,"</s>") &&
-        strcmp(curr_token,"_unk_")) {
-      field = strtok(curr_token, "#");
-      int j=0;
-      while (j<selectedField && field != NULL) {
-        field = strtok(0, "#");
-        j++;
-      }
-    } else {
-      field = curr_token;
-    }
-
-
-    if (field) {
-      out.pushw(field);
-    } else {
-
-      out.pushw((char*)"_unk_");
-
-      // cerr << *in << "\n";
-      // error((char*)"ERROR: Malformed input: selected field does not exist in token\n");
-
-      /**
-      We can be here in 2 cases:
-
-      a. effectively when the token is malformed, that is the selected
-      field does not exist
-
-      b. in case of verbatim translation, that is the source word is
-      not known to the phrase table and moses transfers it as it is
-      to the target side: in this case, no assumption can be made on its
-      format, which means that the selected field can not exist
-
-      The possibility of case (b) makes incorrect the error exit from
-      the code at this point: correct, on the contrary, push the _unk_ string
-      **/
-    }
-  }
-  VERBOSE(3,"In lmmacro::field_selection(ngram &in, ngram &out) out    = " <<  out  << "\n");
-  return;
-}
-
-bool lmmacro::collapse(ngram &in, ngram &out)
-{
-  VERBOSE(3,"In lmmacro::collapse(ngram &in, ngram &out) in    = " <<  in  << "\n")
-
-  // fill the ngram out with the collapsed tokens
-  //return true if collapse happens for the most recent token
-  //return false if collapse does not happen for the most recent token
-  int microsize = in.size;
-  out.size = 0;
-
-  if (microsize == 1) {
-    out.pushc(*in.wordp(1));
-    return false;
-  }
-
-  int curr_code = *in.wordp(1);
-  int prev_code = *in.wordp(2);
-
-  if (microMacroMap[curr_code] == microMacroMap[prev_code]) {
-    if (collapsableMap[curr_code] && collapsatorMap[prev_code]) {
-      return true;
-    }
-  }
-
-  //collapse does not happen for the most recent token
-  // collapse all previous tokens, but the last
-
-  prev_code = *in.wordp(microsize);
-  out.pushc(prev_code);
-
-  for (int i=microsize-1; i>1; i--) {
-
-    curr_code = *in.wordp(i);
-
-    if (microMacroMap[curr_code] != microMacroMap[prev_code]) {
-      out.pushc(curr_code);
-    } else {
-      if (!(collapsableMap[curr_code] && collapsatorMap[prev_code])) {
-        out.pushc(prev_code);
-      }
-    }
-    prev_code = curr_code;
-  }
-  // and insert the most recent token
-  out.pushc(*in.wordp(1));
-  VERBOSE(3,"In lmmacro::collapse(ngram &in, ngram &out) out    = " <<  out  << "\n");
-  return false;
-}
-
-void lmmacro::mapping(ngram &in, ngram &out)
-{
-  VERBOSE(3,"In lmmacro::mapping(ngram &in, ngram &out) in    = " <<  in  << "\n");
-
-  int microsize = in.size;
-
-  // map microtag sequence (in) into the corresponding sequence of macrotags (possibly shorter) (out)
-
-  for (int i=microsize; i>0; i--) {
-
-    int in_code = *in.wordp(i);
-    int out_code;
-    if (in_code < microMacroMapN)
-      out_code = microMacroMap[in_code];
-    else
-      out_code = lmtable::getDict()->oovcode();
-
-    out.pushc(out_code);
-  }
-  VERBOSE(3,"In lmmacro::mapping(ngram &in, ngram &out) out    = " <<  out  << "\n");
-  return;
-}
-
-
-//maxsuffptr returns the largest suffix of an n-gram that is contained
-//in the LM table. This can be used as a compact representation of the
-//(n-1)-gram state of a n-gram LM. if the input k-gram has k>=n then it
-//is trimmed to its n-1 suffix.
-
-const char *lmmacro::maxsuffptr(ngram micro_ng, unsigned int* size)
-{
-  ngram macro_ng(lmtable::getDict());
-
-  if (micro_ng.dict ==  macro_ng.dict)
-    macro_ng.trans(micro_ng);  // micro to macro mapping already done
-  else
-    map(&micro_ng, &macro_ng); // mapping required
-
-  VERBOSE(2,"lmmacro::lprob: micro_ng = " << micro_ng << "\n"
-          << "lmmacro::lprob: macro_ng = " << macro_ng << "\n");
-
-  return lmtable::maxsuffptr(macro_ng,size);
-}
 	
-const char *lmmacro::cmaxsuffptr(ngram micro_ng, unsigned int* size)
-{
-		//cerr << "lmmacro::CMAXsuffptr\n";
-		//cerr << "micro_ng: " << micro_ng
-		//	<< " -> micro_ng.size: " << micro_ng.size << "\n";
-		
-		//the LM working on the selected field = 0
-		//contributes to the LM state
-		//  if (selectedField>0)    return NULL;
+	double lmmacro::lprob(ngram micro_ng)
+	{
+		VERBOSE(2,"lmmacro::lprob, parameter = <" <<  micro_ng << ">\n");
 		
 		ngram macro_ng(lmtable::getDict());
 		
@@ -561,39 +317,261 @@ const char *lmmacro::cmaxsuffptr(ngram micro_ng, unsigned int* size)
 		else
 			map(&micro_ng, &macro_ng); // mapping required
 		
-		VERBOSE(2,"lmmacro::lprob: micro_ng = " << micro_ng << "\n"
-						<<  "lmmacro::lprob: macro_ng = " << macro_ng << "\n")
+		VERBOSE(3,"lmmacro::lprob: micro_ng = " << micro_ng << "\n"
+						<<  "lmmacro::lprob: macro_ng = " << macro_ng << "\n");
 		
-		return lmtable::cmaxsuffptr(macro_ng,size);
+		// ask LM with macro
+		double prob;
+		prob = lmtable::lprob(macro_ng);
+		VERBOSE(3,"prob = " << prob << "\n");
 		
-}
+		return prob;
+	};
+	/*
+	 //double lmmacro::clprob(int* codes, int sz,  double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+	 double lmmacro::clprob(int* codes, int sz,  double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible, double* lastbow)
+	 {
+	 ngram micro_ng(getDict());
+	 micro_ng.pushc(codes,sz);
+	 //  return clprob(micro_ng,bow,bol,state,statesize,extendible);
+	 return clprob(micro_ng,bow,bol,ngramstate,state,statesize,extendible, lastbow);
+	 }
+	 */
 	
-ngram_state_t lmmacro::maxsuffidx(ngram micro_ng, unsigned int* size)
-{
-		//cerr << "lmmacro::CMAXsuffptr\n";
-		//cerr << "micro_ng: " << micro_ng
-		//	<< " -> micro_ng.size: " << micro_ng.size << "\n";
+	//	double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, char** state,unsigned int* statesize,bool* extendible)
+	double lmmacro::clprob(ngram micro_ng, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible, double* lastbow)
+	{
 		
-		//the LM working on the selected field = 0
-		//contributes to the LM state
-		//  if (selectedField>0)    return NULL;
+		VERBOSE(3," lmmacro::clprob(ngram), parameter = <" <<  micro_ng << ">\n");
+		
+		ngram transformed_ng(lmtable::getDict());
+		bool collapsed = transform(micro_ng, transformed_ng);
+		VERBOSE(3,"lmmacro::clprob(ngram), transformed_ng = <" <<  transformed_ng << ">\n");
 		
+		double logpr;
+		
+		if (collapsed) {
+			// the last token of the ngram continues an already open "chunk"
+			// the probability at chunk-level is not computed because it has been already computed when the actual"chunk" opens
+			VERBOSE(3,"  SKIPPED call to lmtable::clprob because of collapse; logpr: 0.0\n");
+			logpr = 0.0;
+		} else {
+			VERBOSE(3,"  QUERY MACRO LM on (after transformation and size reduction) " << transformed_ng << "\n");
+			//    logpr = lmtable::clprob(transformed_ng, bow, bol, state, statesize, extendible);
+			logpr = lmtable::clprob(transformed_ng, bow, bol, ngramstate, state, statesize, extendible, lastbow);
+		}
+		VERBOSE(3,"  GET logpr: " << logpr << "\n");
+		
+		return logpr;
+	}
+	
+	bool lmmacro::transform(ngram &in, ngram &out)
+	{
+		VERBOSE(3,"lmmacro::transform(ngram &in, ngram &out), in = <" <<  in << ">\n");
+		
+		//step 1: selection of the correct field
+		ngram field_ng(getDict());
+		if (selectedField >= 0)
+			field_selection(in, field_ng);
+		else
+			field_ng = in;
+		
+		//step 2: collapsing
+		ngram collapsed_ng(getDict());
+		bool collapsed = false;
+		if (collapseFlag)
+			collapsed = collapse(field_ng, collapsed_ng);
+		else
+			collapsed_ng = field_ng;
+		
+		//step 3: mapping using the loaded map
+		if (mapFlag)
+			mapping(collapsed_ng, out);
+		else
+			out.trans(collapsed_ng);
+		
+		if (out.size>lmtable::maxlevel()) out.size=lmtable::maxlevel();
+		
+		VERBOSE(3,"lmmacro::transform(ngram &in, ngram &out), out = <" <<  out << ">\n");
+		return collapsed;
+	}
+	
+	
+	
+	void lmmacro::field_selection(ngram &in, ngram &out)
+	{
+		VERBOSE(3,"In lmmacro::field_selection(ngram &in, ngram &out) in    = " <<  in  << "\n");
+		
+		int microsize = in.size;
+		
+		for (int i=microsize; i>0; i--) {
+			
+			char curr_token[BUFSIZ];
+			strcpy(curr_token, getDict()->decode(*in.wordp(i)));
+			char *field;
+			if (strcmp(curr_token,"<s>") &&
+					strcmp(curr_token,"</s>") &&
+					strcmp(curr_token,"_unk_")) {
+				field = strtok(curr_token, "#");
+				int j=0;
+				while (j<selectedField && field != NULL) {
+					field = strtok(0, "#");
+					j++;
+				}
+			} else {
+				field = curr_token;
+			}
+			
+			
+			if (field) {
+				out.pushw(field);
+			} else {
+				
+				out.pushw((char*)"_unk_");
+				
+				// cerr << *in << "\n";
+				// error((char*)"ERROR: Malformed input: selected field does not exist in token\n");
+				
+				/**
+				 We can be here in 2 cases:
+				 
+				 a. effectively when the token is malformed, that is the selected
+				 field does not exist
+				 
+				 b. in case of verbatim translation, that is the source word is
+				 not known to the phrase table and moses transfers it as it is
+				 to the target side: in this case, no assumption can be made on its
+				 format, which means that the selected field can not exist
+				 
+				 The possibility of case (b) makes incorrect the error exit from
+				 the code at this point: correct, on the contrary, push the _unk_ string
+				 **/
+			}
+		}
+		VERBOSE(3,"In lmmacro::field_selection(ngram &in, ngram &out) out    = " <<  out  << "\n");
+		return;
+	}
+	
+	bool lmmacro::collapse(ngram &in, ngram &out)
+	{
+		VERBOSE(3,"In lmmacro::collapse(ngram &in, ngram &out) in    = " <<  in  << "\n")
+		
+		// fill the ngram out with the collapsed tokens
+		//return true if collapse happens for the most recent token
+		//return false if collapse does not happen for the most recent token
+		int microsize = in.size;
+		out.size = 0;
+		
+		if (microsize == 1) {
+			out.pushc(*in.wordp(1));
+			return false;
+		}
+		
+		int curr_code = *in.wordp(1);
+		int prev_code = *in.wordp(2);
+		
+		if (microMacroMap[curr_code] == microMacroMap[prev_code]) {
+			if (collapsableMap[curr_code] && collapsatorMap[prev_code]) {
+				return true;
+			}
+		}
+		
+		//collapse does not happen for the most recent token
+		// collapse all previous tokens, but the last
+		
+		prev_code = *in.wordp(microsize);
+		out.pushc(prev_code);
+		
+		for (int i=microsize-1; i>1; i--) {
+			
+			curr_code = *in.wordp(i);
+			
+			if (microMacroMap[curr_code] != microMacroMap[prev_code]) {
+				out.pushc(curr_code);
+			} else {
+				if (!(collapsableMap[curr_code] && collapsatorMap[prev_code])) {
+					out.pushc(prev_code);
+				}
+			}
+			prev_code = curr_code;
+		}
+		// and insert the most recent token
+		out.pushc(*in.wordp(1));
+		VERBOSE(3,"In lmmacro::collapse(ngram &in, ngram &out) out    = " <<  out  << "\n");
+		return false;
+	}
+	
+	void lmmacro::mapping(ngram &in, ngram &out)
+	{
+		VERBOSE(3,"In lmmacro::mapping(ngram &in, ngram &out) in    = " <<  in  << "\n");
+		
+		int microsize = in.size;
+		
+		// map microtag sequence (in) into the corresponding sequence of macrotags (possibly shorter) (out)
+		
+		for (int i=microsize; i>0; i--) {
+			
+			int in_code = *in.wordp(i);
+			int out_code;
+			if (in_code < microMacroMapN)
+				out_code = microMacroMap[in_code];
+			else
+				out_code = lmtable::getDict()->oovcode();
+			
+			out.pushc(out_code);
+		}
+		VERBOSE(3,"In lmmacro::mapping(ngram &in, ngram &out) out    = " <<  out  << "\n");
+		return;
+	}
+	
+	
+	//maxsuffptr returns the largest suffix of an n-gram that is contained
+	//in the LM table. This can be used as a compact representation of the
+	//(n-1)-gram state of a n-gram LM. if the input k-gram has k>=n then it
+	//is trimmed to its n-1 suffix.
+	
+	const char *lmmacro::maxsuffptr(ngram micro_ng, unsigned int* size)
+	{
 		ngram macro_ng(lmtable::getDict());
 		
 		if (micro_ng.dict ==  macro_ng.dict)
 			macro_ng.trans(micro_ng);  // micro to macro mapping already done
-			else
-				map(&micro_ng, &macro_ng); // mapping required
-				
-				VERBOSE(2,"lmmacro::lprob: micro_ng = " << micro_ng << "\n"
-								<<  "lmmacro::lprob: macro_ng = " << macro_ng << "\n")
-				
-				return lmtable::cmaxsuffidx(macro_ng,size);
+		else
+			map(&micro_ng, &macro_ng); // mapping required
+		
+		VERBOSE(2,"lmmacro::lprob: micro_ng = " << micro_ng << "\n"
+						<< "lmmacro::lprob: macro_ng = " << macro_ng << "\n");
 		
+		return lmtable::maxsuffptr(macro_ng,size);
 	}
-
-ngram_state_t lmmacro::cmaxsuffidx(ngram micro_ng, unsigned int* size)
-{
+	
+	/*
+	 const char *lmmacro::cmaxsuffptr(ngram micro_ng, unsigned int* size)
+	 {
+	 //cerr << "lmmacro::CMAXsuffptr\n";
+	 //cerr << "micro_ng: " << micro_ng
+	 //	<< " -> micro_ng.size: " << micro_ng.size << "\n";
+	 
+	 //the LM working on the selected field = 0
+	 //contributes to the LM state
+	 //  if (selectedField>0)    return NULL;
+	 
+	 ngram macro_ng(lmtable::getDict());
+	 
+	 if (micro_ng.dict ==  macro_ng.dict)
+	 macro_ng.trans(micro_ng);  // micro to macro mapping already done
+	 else
+	 map(&micro_ng, &macro_ng); // mapping required
+	 
+	 VERBOSE(2,"lmmacro::lprob: micro_ng = " << micro_ng << "\n"
+	 <<  "lmmacro::lprob: macro_ng = " << macro_ng << "\n")
+	 
+	 return lmtable::cmaxsuffptr(macro_ng,size);
+	 
+	 }
+	 */
+	ngram_state_t lmmacro::maxsuffidx(ngram micro_ng, unsigned int* size)
+	{
 		//cerr << "lmmacro::CMAXsuffptr\n";
 		//cerr << "micro_ng: " << micro_ng
 		//	<< " -> micro_ng.size: " << micro_ng.size << "\n";
@@ -615,340 +593,366 @@ ngram_state_t lmmacro::cmaxsuffidx(ngram micro_ng, unsigned int* size)
 		return lmtable::cmaxsuffidx(macro_ng,size);
 		
 	}
-
-void lmmacro::map(ngram *in, ngram *out)
-{
-
-  VERBOSE(2,"In lmmacro::map, in = " << *in << endl
-          << " (selectedField = " << selectedField << " )\n");
-
-  if (selectedField==-2) // the whole token is compatible with the LM words
-    One2OneMapping(in, out);
-
-  else if (selectedField==-1) // the whole token has to be mapped before querying the LM
-    Micro2MacroMapping(in, out);
-
-  else if (selectedField<10) { // select the field "selectedField" from tokens (separator is assumed to be "#")
-    ngram field_ng(((lmmacro *)this)->getDict());
-    int microsize = in->size;
-
-    for (int i=microsize; i>0; i--) {
-
-      char curr_token[BUFSIZ];
-      strcpy(curr_token, ((lmmacro *)this)->getDict()->decode(*(in->wordp(i))));
-      char *field;
-      if (strcmp(curr_token,"<s>") &&
-          strcmp(curr_token,"</s>") &&
-          strcmp(curr_token,"_unk_")) {
-        field = strtok(curr_token, "#");
-        int j=0;
-        while (j<selectedField && field != NULL) {
-          field = strtok(0, "#");
-          j++;
-        }
-      } else {
-        field = curr_token;
-      }
-
-      if (field)
-        field_ng.pushw(field);
-      else {
-
-        field_ng.pushw((char*)"_unk_");
-
-        // cerr << *in << "\n";
-        // error((char*)"ERROR: Malformed input: selected field does not exist in token\n");
-
-        /**
-            We can be here in 2 cases:
-
-            a. effectively when the token is malformed, that is the selected
-            field does not exist
-
-            b. in case of verbatim translation, that is the source word is
-            not known to the phrase table and moses transfers it as it is
-            to the target side: in this case, no assumption can be made on its
-            format, which means that the selected field can not exist
-
-            The possibility of case (b) makes incorrect the error exit from
-            the code at this point: correct, on the contrary, push the _unk_ string
-        **/
-      }
-    }
-    if (microMacroMapN>0)
-      Micro2MacroMapping(&field_ng, out);
-    else
-      out->trans(field_ng);
-  } else {
-
+	
+	/*
+	 ngram_state_t lmmacro::cmaxsuffidx(ngram micro_ng, unsigned int* size)
+	 {
+	 //cerr << "lmmacro::CMAXsuffptr\n";
+	 //cerr << "micro_ng: " << micro_ng
+	 //	<< " -> micro_ng.size: " << micro_ng.size << "\n";
+	 
+	 //the LM working on the selected field = 0
+	 //contributes to the LM state
+	 //  if (selectedField>0)    return NULL;
+	 
+	 ngram macro_ng(lmtable::getDict());
+	 
+	 if (micro_ng.dict ==  macro_ng.dict)
+	 macro_ng.trans(micro_ng);  // micro to macro mapping already done
+	 else
+	 map(&micro_ng, &macro_ng); // mapping required
+	 
+	 VERBOSE(2,"lmmacro::lprob: micro_ng = " << micro_ng << "\n"
+	 <<  "lmmacro::lprob: macro_ng = " << macro_ng << "\n")
+	 
+	 return lmtable::cmaxsuffidx(macro_ng,size);
+	 
+	 }
+	 */
+	
+	void lmmacro::map(ngram *in, ngram *out)
+	{
+		
+		VERBOSE(2,"In lmmacro::map, in = " << *in << endl
+						<< " (selectedField = " << selectedField << " )\n");
+		
+		if (selectedField==-2) // the whole token is compatible with the LM words
+			One2OneMapping(in, out);
+		
+		else if (selectedField==-1) // the whole token has to be mapped before querying the LM
+			Micro2MacroMapping(in, out);
+		
+		else if (selectedField<10) { // select the field "selectedField" from tokens (separator is assumed to be "#")
+			ngram field_ng(((lmmacro *)this)->getDict());
+			int microsize = in->size;
+			
+			for (int i=microsize; i>0; i--) {
+				
+				char curr_token[BUFSIZ];
+				strcpy(curr_token, ((lmmacro *)this)->getDict()->decode(*(in->wordp(i))));
+				char *field;
+				if (strcmp(curr_token,"<s>") &&
+						strcmp(curr_token,"</s>") &&
+						strcmp(curr_token,"_unk_")) {
+					field = strtok(curr_token, "#");
+					int j=0;
+					while (j<selectedField && field != NULL) {
+						field = strtok(0, "#");
+						j++;
+					}
+				} else {
+					field = curr_token;
+				}
+				
+				if (field)
+					field_ng.pushw(field);
+				else {
+					
+					field_ng.pushw((char*)"_unk_");
+					
+					// cerr << *in << "\n";
+					// error((char*)"ERROR: Malformed input: selected field does not exist in token\n");
+					
+					/**
+					 We can be here in 2 cases:
+					 
+					 a. effectively when the token is malformed, that is the selected
+					 field does not exist
+					 
+					 b. in case of verbatim translation, that is the source word is
+					 not known to the phrase table and moses transfers it as it is
+					 to the target side: in this case, no assumption can be made on its
+					 format, which means that the selected field can not exist
+					 
+					 The possibility of case (b) makes incorrect the error exit from
+					 the code at this point: correct, on the contrary, push the _unk_ string
+					 **/
+				}
+			}
+			if (microMacroMapN>0)
+				Micro2MacroMapping(&field_ng, out);
+			else
+				out->trans(field_ng);
+		} else {
+			
 #ifdef DLEXICALLM
-    // selectedField>=10: tens=idx of micro tag (possibly to be mapped to
-    // macro tag), unidx=idx of lemma to be concatenated by "_" to the
-    // (mapped) tag
-
-    int tagIdx = selectedField/10;
-    int lemmaIdx = selectedField%10;
-
-    // micro (or mapped to macro) sequence construction:
-    ngram tag_ng(getDict());
-    char *lemmas[BUFSIZ];
-
-    int microsize = in->size;
-    for (int i=microsize; i>0; i--) {
-      char curr_token[BUFSIZ];
-      strcpy(curr_token, getDict()->decode(*(in->wordp(i))));
-      char *tag = NULL, *lemma = NULL;
-
-      if (strcmp(curr_token,"<s>") &&
-          strcmp(curr_token,"</s>") &&
-          strcmp(curr_token,"_unk_")) {
-
-        if (tagIdx<lemmaIdx) {
-          tag = strtok(curr_token, "#");
-          for (int j=0; j<tagIdx; j++)
-            tag = strtok(0, "#");
-          for (int j=tagIdx; j<lemmaIdx; j++)
-            lemma = strtok(0, "#");
-        } else {
-          lemma = strtok(curr_token, "#");
-          for (int j=0; j<lemmaIdx; j++)
-            lemma = strtok(0, "#");
-          for (int j=lemmaIdx; j<tagIdx; j++)
-            tag = strtok(0, "#");
-        }
-
-        VERBOSE(3,"(tag,lemma) = " << tag << " " << lemma << "\n");
-      } else {
-        tag = curr_token;
-        lemma = curr_token;
-        VERBOSE(3,"(tag=lemma) = " << tag << " " << lemma << "\n");
-      }
-      if (tag) {
-        tag_ng.pushw(tag);
-        lemmas[i] = strdup(lemma);
-      } else {
-        tag_ng.pushw((char*)"_unk_");
-        lemmas[i] = strdup("_unk_");
-      }
-    }
-
-    if (microMacroMapN>0)
-      Micro2MacroMapping(&tag_ng, out, lemmas);
-    else
-      out->trans(tag_ng); // qui si dovrebbero sostituire i tag con tag_lemma, senza mappatura!
-
+			// selectedField>=10: tens=idx of micro tag (possibly to be mapped to
+			// macro tag), unidx=idx of lemma to be concatenated by "_" to the
+			// (mapped) tag
+			
+			int tagIdx = selectedField/10;
+			int lemmaIdx = selectedField%10;
+			
+			// micro (or mapped to macro) sequence construction:
+			ngram tag_ng(getDict());
+			char *lemmas[BUFSIZ];
+			
+			int microsize = in->size;
+			for (int i=microsize; i>0; i--) {
+				char curr_token[BUFSIZ];
+				strcpy(curr_token, getDict()->decode(*(in->wordp(i))));
+				char *tag = NULL, *lemma = NULL;
+				
+				if (strcmp(curr_token,"<s>") &&
+						strcmp(curr_token,"</s>") &&
+						strcmp(curr_token,"_unk_")) {
+					
+					if (tagIdx<lemmaIdx) {
+						tag = strtok(curr_token, "#");
+						for (int j=0; j<tagIdx; j++)
+							tag = strtok(0, "#");
+						for (int j=tagIdx; j<lemmaIdx; j++)
+							lemma = strtok(0, "#");
+					} else {
+						lemma = strtok(curr_token, "#");
+						for (int j=0; j<lemmaIdx; j++)
+							lemma = strtok(0, "#");
+						for (int j=lemmaIdx; j<tagIdx; j++)
+							tag = strtok(0, "#");
+					}
+					
+					VERBOSE(3,"(tag,lemma) = " << tag << " " << lemma << "\n");
+				} else {
+					tag = curr_token;
+					lemma = curr_token;
+					VERBOSE(3,"(tag=lemma) = " << tag << " " << lemma << "\n");
+				}
+				if (tag) {
+					tag_ng.pushw(tag);
+					lemmas[i] = strdup(lemma);
+				} else {
+					tag_ng.pushw((char*)"_unk_");
+					lemmas[i] = strdup("_unk_");
+				}
+			}
+			
+			if (microMacroMapN>0)
+				Micro2MacroMapping(&tag_ng, out, lemmas);
+			else
+				out->trans(tag_ng); // qui si dovrebbero sostituire i tag con tag_lemma, senza mappatura!
+			
 #endif
-
-  }
-
-  VERBOSE(2,"In lmmacro::map, FINAL out = " << *out << endl);
-}
-
-void lmmacro::One2OneMapping(ngram *in, ngram *out)
-{
-  int insize = in->size;
-
-  // map each token of the sequence "in" into the same-length sequence "out" through the map
-
-  for (int i=insize; i>0; i--) {
-
-    int curr_code = *(in->wordp(i));
-    const char *outtoken =
+			
+		}
+		
+		VERBOSE(2,"In lmmacro::map, FINAL out = " << *out << endl);
+	}
+	
+	void lmmacro::One2OneMapping(ngram *in, ngram *out)
+	{
+		int insize = in->size;
+		
+		// map each token of the sequence "in" into the same-length sequence "out" through the map
+		
+		for (int i=insize; i>0; i--) {
+			
+			int curr_code = *(in->wordp(i));
+			const char *outtoken =
       lmtable::getDict()->decode((curr_code<microMacroMapN)?microMacroMap[curr_code]:lmtable::getDict()->oovcode());
-    out->pushw(outtoken);
-  }
-  return;
-}
-
-
-void lmmacro::Micro2MacroMapping(ngram *in, ngram *out)
-{
-
-  int microsize = in->size;
-
-  VERBOSE(2,"In Micro2MacroMapping, in    = " <<  *in  << "\n");
-
-  // map microtag sequence (in) into the corresponding sequence of macrotags (possibly shorter) (out)
-
-  for (int i=microsize; i>0; i--) {
-
-    int curr_code = *(in->wordp(i));
-    const char *curr_macrotag = lmtable::getDict()->decode((curr_code<microMacroMapN)?microMacroMap[curr_code]:lmtable::getDict()->oovcode());
-
-    if (i==microsize) {
-      out->pushw(curr_macrotag);
-
-    } else {
-      int prev_code = *(in->wordp(i+1));
-
-      const char *prev_microtag = getDict()->decode(prev_code);
-      const char *curr_microtag = getDict()->decode(curr_code);
-      const char *prev_macrotag = lmtable::getDict()->decode((prev_code<microMacroMapN)?microMacroMap[prev_code]:lmtable::getDict()->oovcode());
-
-
-      int prev_len = strlen(prev_microtag)-1;
-      int curr_len = strlen(curr_microtag)-1;
-
-      if (strcmp(curr_macrotag,prev_macrotag) != 0 ||
-          !(
-            (( prev_microtag[prev_len]== '(' || ( prev_microtag[0]== '(' && prev_microtag[prev_len]!= ')' )) &&  ( curr_microtag[curr_len]==')' && curr_microtag[0]!='(')) ||
-            (( prev_microtag[prev_len]== '(' || ( prev_microtag[0]== '(' && prev_microtag[prev_len]!= ')' )) &&  curr_microtag[curr_len]=='+' ) ||
-            (prev_microtag[prev_len]== '+' &&  curr_microtag[curr_len]=='+' ) ||
-            (prev_microtag[prev_len]== '+' &&  ( curr_microtag[curr_len]==')' && curr_microtag[0]!='(' ))))
-        out->pushw(curr_macrotag);
-    }
-  }
-  return;
-}
-
+			out->pushw(outtoken);
+		}
+		return;
+	}
 	
 	
-// DISMITTED ON FEB 2011 BECAUSE TOO MUCH PROBLEMATIC FROM A THEORETICAL POINT OF VIEW
-
+	void lmmacro::Micro2MacroMapping(ngram *in, ngram *out)
+	{
+		
+		int microsize = in->size;
+		
+		VERBOSE(2,"In Micro2MacroMapping, in    = " <<  *in  << "\n");
+		
+		// map microtag sequence (in) into the corresponding sequence of macrotags (possibly shorter) (out)
+		
+		for (int i=microsize; i>0; i--) {
+			
+			int curr_code = *(in->wordp(i));
+			const char *curr_macrotag = lmtable::getDict()->decode((curr_code<microMacroMapN)?microMacroMap[curr_code]:lmtable::getDict()->oovcode());
+			
+			if (i==microsize) {
+				out->pushw(curr_macrotag);
+				
+			} else {
+				int prev_code = *(in->wordp(i+1));
+				
+				const char *prev_microtag = getDict()->decode(prev_code);
+				const char *curr_microtag = getDict()->decode(curr_code);
+				const char *prev_macrotag = lmtable::getDict()->decode((prev_code<microMacroMapN)?microMacroMap[prev_code]:lmtable::getDict()->oovcode());
+				
+				
+				int prev_len = strlen(prev_microtag)-1;
+				int curr_len = strlen(curr_microtag)-1;
+				
+				if (strcmp(curr_macrotag,prev_macrotag) != 0 ||
+						!(
+							(( prev_microtag[prev_len]== '(' || ( prev_microtag[0]== '(' && prev_microtag[prev_len]!= ')' )) &&  ( curr_microtag[curr_len]==')' && curr_microtag[0]!='(')) ||
+							(( prev_microtag[prev_len]== '(' || ( prev_microtag[0]== '(' && prev_microtag[prev_len]!= ')' )) &&  curr_microtag[curr_len]=='+' ) ||
+							(prev_microtag[prev_len]== '+' &&  curr_microtag[curr_len]=='+' ) ||
+							(prev_microtag[prev_len]== '+' &&  ( curr_microtag[curr_len]==')' && curr_microtag[0]!='(' ))))
+					out->pushw(curr_macrotag);
+			}
+		}
+		return;
+	}
+	
+	
+	
+	// DISMITTED ON FEB 2011 BECAUSE TOO MUCH PROBLEMATIC FROM A THEORETICAL POINT OF VIEW
+	
 #ifdef DLEXICALLM
 	
-void lmmacro::Micro2MacroMapping(ngram *in, ngram *out, char **lemmas)
-{
-  VERBOSE(2,"In Micro2MacroMapping, in    = " <<  *in  << "\n")
-
-  int microsize = in->size;
-
-  IFVERBOSE(3) {
-    VERBOSE(3,"In Micro2MacroMapping, lemmas:\n");
-    if (lexicaltoken2classMap)
-      for (int i=microsize; i>0; i--)
-        VERBOSE(3,"lemmas[" << i << "]=" << lemmas[i] << " -> class -> " << lexicaltoken2classMap[lmtable::getDict()->encode(lemmas[i])] << endl);
-    else
-      for (int i=microsize; i>0; i--)
-        VERBOSE(3,"lemmas[" << i << "]=" << lemmas[i] << endl);
-  }
-
-  // map microtag sequence (in) into the corresponding sequence of macrotags (possibly shorter) (out)
-
-  char tag_lemma[BUFSIZ];
-
-  for (int i=microsize; i>0; i--) {
-
-    int curr_code = *(in->wordp(i));
-
-    const char *curr_microtag = getDict()->decode(curr_code);
-    const char *curr_lemma    = lemmas[i];
-    const char *curr_macrotag = lmtable::getDict()->decode((curr_code<microMacroMapN)?microMacroMap[curr_code]:lmtable::getDict()->oovcode());
-    int curr_len = strlen(curr_microtag)-1;
-
-    if (i==microsize) {
-      if (( curr_microtag[curr_len]=='(' ) || ( curr_microtag[0]=='(' && curr_microtag[curr_len]!=')' ) || ( curr_microtag[curr_len]=='+' ))
-        sprintf(tag_lemma, "%s", curr_macrotag); // non lessicalizzo il macrotag se sono ancora all''interno del chunk
-      else if (lexicaltoken2classMap)
-        sprintf(tag_lemma, "%s_class%d", curr_macrotag, lexicaltoken2classMap[lmtable::getDict()->encode(curr_lemma)]);
-      else
-        sprintf(tag_lemma, "%s_%s", curr_macrotag, lemmas[microsize]);
-
-      VERBOSE(2,"In Micro2MacroMapping, starting tag_lemma = >" <<  tag_lemma   << "<\n");
-
-      out->pushw(tag_lemma);
-      free(lemmas[microsize]);
-
-
-    } else {
-
-      int prev_code = *(in->wordp(i+1));
-      const char *prev_microtag = getDict()->decode(prev_code);
-      const char *prev_macrotag = lmtable::getDict()->decode((prev_code<microMacroMapN)?microMacroMap[prev_code]:lmtable::getDict()->oovcode());
-
-
-      int prev_len = strlen(prev_microtag)-1;
-
-      if (( curr_microtag[curr_len]=='(' ) || ( curr_microtag[0]=='(' && curr_microtag[curr_len]!=')' ) || ( curr_microtag[curr_len]=='+' ))
-        sprintf(tag_lemma, "%s", curr_macrotag); // non lessicalizzo il macrotag se sono ancora all''interno del chunk
-      else if (lexicaltoken2classMap)
-        sprintf(tag_lemma, "%s_class%d", curr_macrotag, lexicaltoken2classMap[lmtable::getDict()->encode(curr_lemma)]);
-      else
-        sprintf(tag_lemma, "%s_%s", curr_macrotag, curr_lemma);
-
-      VERBOSE(2,"In Micro2MacroMapping, tag_lemma = >" <<  tag_lemma   << "<\n");
-
-      if (strcmp(curr_macrotag,prev_macrotag) != 0 ||
-          !(
-            (( prev_microtag[prev_len]== '(' || ( prev_microtag[0]== '(' && prev_microtag[prev_len]!=')' )) && curr_microtag[curr_len]==')' && curr_microtag[0]!='(') ||
-            (( prev_microtag[prev_len]== '(' || ( prev_microtag[0]== '(' && prev_microtag[prev_len]!= ')')) && curr_microtag[curr_len]=='+' ) ||
-            (prev_microtag[prev_len]== '+' &&  curr_microtag[curr_len]=='+' ) ||
-            (prev_microtag[prev_len]== '+' &&  curr_microtag[curr_len]==')' && curr_microtag[0]!='(' ))) {
-
-        VERBOSE(2,"In Micro2MacroMapping, before pushw, out = " <<  *out << endl);
-        out->pushw(tag_lemma);
-        VERBOSE(2,"In Micro2MacroMapping, after pushw, out = " <<  *out << endl);
-      } else {
-        VERBOSE(2,"In Micro2MacroMapping, before shift, out = " <<  *out << endl);
-        out->shift();
-        VERBOSE(2,"In Micro2MacroMapping, after shift, out = " <<  *out << endl);
-        out->pushw(tag_lemma);
-        VERBOSE(2,"In Micro2MacroMapping, after push, out = " <<  *out << endl);
-      }
-      free(lemmas[i]);
-    }
-  }
-  return;
-}
-
-void lmmacro::loadLexicalClasses(const char *fn)
-{
-  char line[MAX_LINE];
-  const char* words[MAX_TOKEN_N_MAP];
-  int tokenN;
-
-  lexicaltoken2classMap = (int *)calloc(BUFSIZ, sizeof(int));
-  lexicaltoken2classMapN = BUFSIZ;
-
-  lmtable::getDict()->incflag(1);
-
-  inputfilestream inp(fn);
-  while (inp.getline(line,MAX_LINE,'\n')) {
-    tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
-    if (tokenN != 2)
-      error((char*)"ERROR: wrong format of lexical classes file\n");
-    else {
-      int classIdx = atoi(words[1]);
-      int wordCode = lmtable::getDict()->encode(words[0]);
-
-      if (wordCode>=lexicaltoken2classMapN) {
-        int r = (wordCode-lexicaltoken2classMapN)/BUFSIZ;
-        lexicaltoken2classMapN += (r+1)*BUFSIZ;
-        lexicaltoken2classMap = (int *)reallocf(lexicaltoken2classMap, sizeof(int)*lexicaltoken2classMapN);
-      }
-      lexicaltoken2classMap[wordCode] = classIdx;
-    }
-  }
-
-  lmtable::getDict()->incflag(0);
-
-  IFVERBOSE(3) {
-    for (int x=0; x<lmtable::getDict()->size(); x++)
-      VERBOSE(3,"class of <" << lmtable::getDict()->decode(x) << "> (code=" << x << ") = " << lexicaltoken2classMap[x] << endl);
-  }
-
-  return;
-}
-
-
-void lmmacro::cutLex(ngram *in, ngram *out)
-{
-  *out=*in;
-
-  const char *curr_macro = out->dict->decode(*(out->wordp(1)));
-  out->shift();
-  const char *p = strrchr(curr_macro, '_');
-  int lexLen;
-  if (p)
-    lexLen=strlen(p);
-  else
-    lexLen=0;
-  char curr_NoLexMacro[BUFSIZ];
-  memset(&curr_NoLexMacro,0,BUFSIZ);
-  strncpy(curr_NoLexMacro,curr_macro,strlen(curr_macro)-lexLen);
-  out->pushw(curr_NoLexMacro);
-  return;
-}
+	void lmmacro::Micro2MacroMapping(ngram *in, ngram *out, char **lemmas)
+	{
+		VERBOSE(2,"In Micro2MacroMapping, in    = " <<  *in  << "\n")
+		
+		int microsize = in->size;
+		
+		IFVERBOSE(3) {
+			VERBOSE(3,"In Micro2MacroMapping, lemmas:\n");
+			if (lexicaltoken2classMap)
+				for (int i=microsize; i>0; i--)
+					VERBOSE(3,"lemmas[" << i << "]=" << lemmas[i] << " -> class -> " << lexicaltoken2classMap[lmtable::getDict()->encode(lemmas[i])] << endl);
+			else
+				for (int i=microsize; i>0; i--)
+					VERBOSE(3,"lemmas[" << i << "]=" << lemmas[i] << endl);
+		}
+		
+		// map microtag sequence (in) into the corresponding sequence of macrotags (possibly shorter) (out)
+		
+		char tag_lemma[BUFSIZ];
+		
+		for (int i=microsize; i>0; i--) {
+			
+			int curr_code = *(in->wordp(i));
+			
+			const char *curr_microtag = getDict()->decode(curr_code);
+			const char *curr_lemma    = lemmas[i];
+			const char *curr_macrotag = lmtable::getDict()->decode((curr_code<microMacroMapN)?microMacroMap[curr_code]:lmtable::getDict()->oovcode());
+			int curr_len = strlen(curr_microtag)-1;
+			
+			if (i==microsize) {
+				if (( curr_microtag[curr_len]=='(' ) || ( curr_microtag[0]=='(' && curr_microtag[curr_len]!=')' ) || ( curr_microtag[curr_len]=='+' ))
+					sprintf(tag_lemma, "%s", curr_macrotag); // non lessicalizzo il macrotag se sono ancora all''interno del chunk
+				else if (lexicaltoken2classMap)
+					sprintf(tag_lemma, "%s_class%d", curr_macrotag, lexicaltoken2classMap[lmtable::getDict()->encode(curr_lemma)]);
+				else
+					sprintf(tag_lemma, "%s_%s", curr_macrotag, lemmas[microsize]);
+				
+				VERBOSE(2,"In Micro2MacroMapping, starting tag_lemma = >" <<  tag_lemma   << "<\n");
+				
+				out->pushw(tag_lemma);
+				free(lemmas[microsize]);
+				
+				
+			} else {
+				
+				int prev_code = *(in->wordp(i+1));
+				const char *prev_microtag = getDict()->decode(prev_code);
+				const char *prev_macrotag = lmtable::getDict()->decode((prev_code<microMacroMapN)?microMacroMap[prev_code]:lmtable::getDict()->oovcode());
+				
+				
+				int prev_len = strlen(prev_microtag)-1;
+				
+				if (( curr_microtag[curr_len]=='(' ) || ( curr_microtag[0]=='(' && curr_microtag[curr_len]!=')' ) || ( curr_microtag[curr_len]=='+' ))
+					sprintf(tag_lemma, "%s", curr_macrotag); // non lessicalizzo il macrotag se sono ancora all''interno del chunk
+				else if (lexicaltoken2classMap)
+					sprintf(tag_lemma, "%s_class%d", curr_macrotag, lexicaltoken2classMap[lmtable::getDict()->encode(curr_lemma)]);
+				else
+					sprintf(tag_lemma, "%s_%s", curr_macrotag, curr_lemma);
+				
+				VERBOSE(2,"In Micro2MacroMapping, tag_lemma = >" <<  tag_lemma   << "<\n");
+				
+				if (strcmp(curr_macrotag,prev_macrotag) != 0 ||
+						!(
+							(( prev_microtag[prev_len]== '(' || ( prev_microtag[0]== '(' && prev_microtag[prev_len]!=')' )) && curr_microtag[curr_len]==')' && curr_microtag[0]!='(') ||
+							(( prev_microtag[prev_len]== '(' || ( prev_microtag[0]== '(' && prev_microtag[prev_len]!= ')')) && curr_microtag[curr_len]=='+' ) ||
+							(prev_microtag[prev_len]== '+' &&  curr_microtag[curr_len]=='+' ) ||
+							(prev_microtag[prev_len]== '+' &&  curr_microtag[curr_len]==')' && curr_microtag[0]!='(' ))) {
+							
+							VERBOSE(2,"In Micro2MacroMapping, before pushw, out = " <<  *out << endl);
+							out->pushw(tag_lemma);
+							VERBOSE(2,"In Micro2MacroMapping, after pushw, out = " <<  *out << endl);
+						} else {
+							VERBOSE(2,"In Micro2MacroMapping, before shift, out = " <<  *out << endl);
+							out->shift();
+							VERBOSE(2,"In Micro2MacroMapping, after shift, out = " <<  *out << endl);
+							out->pushw(tag_lemma);
+							VERBOSE(2,"In Micro2MacroMapping, after push, out = " <<  *out << endl);
+						}
+				free(lemmas[i]);
+			}
+		}
+		return;
+	}
+	
+	void lmmacro::loadLexicalClasses(const char *fn)
+	{
+		char line[MAX_LINE];
+		const char* words[MAX_TOKEN_N_MAP];
+		int tokenN;
+		
+		lexicaltoken2classMap = (int *)calloc(BUFSIZ, sizeof(int));
+		lexicaltoken2classMapN = BUFSIZ;
+		
+		lmtable::getDict()->incflag(1);
+		
+		inputfilestream inp(fn);
+		while (inp.getline(line,MAX_LINE,'\n')) {
+			tokenN = parseWords(line,words,MAX_TOKEN_N_MAP);
+			if (tokenN != 2)
+				error((char*)"ERROR: wrong format of lexical classes file\n");
+			else {
+				int classIdx = atoi(words[1]);
+				int wordCode = lmtable::getDict()->encode(words[0]);
+				
+				if (wordCode>=lexicaltoken2classMapN) {
+					int r = (wordCode-lexicaltoken2classMapN)/BUFSIZ;
+					lexicaltoken2classMapN += (r+1)*BUFSIZ;
+					lexicaltoken2classMap = (int *)reallocf(lexicaltoken2classMap, sizeof(int)*lexicaltoken2classMapN);
+				}
+				lexicaltoken2classMap[wordCode] = classIdx;
+			}
+		}
+		
+		lmtable::getDict()->incflag(0);
+		
+		IFVERBOSE(3) {
+			for (int x=0; x<lmtable::getDict()->size(); x++)
+				VERBOSE(3,"class of <" << lmtable::getDict()->decode(x) << "> (code=" << x << ") = " << lexicaltoken2classMap[x] << endl);
+		}
+		
+		return;
+	}
+	
+	
+	void lmmacro::cutLex(ngram *in, ngram *out)
+	{
+		*out=*in;
+		
+		const char *curr_macro = out->dict->decode(*(out->wordp(1)));
+		out->shift();
+		const char *p = strrchr(curr_macro, '_');
+		int lexLen;
+		if (p)
+			lexLen=strlen(p);
+		else
+			lexLen=0;
+		char curr_NoLexMacro[BUFSIZ];
+		memset(&curr_NoLexMacro,0,BUFSIZ);
+		strncpy(curr_NoLexMacro,curr_macro,strlen(curr_macro)-lexLen);
+		out->pushw(curr_NoLexMacro);
+		return;
+	}
 #endif
 	
 }//namespace irstlm
diff --git a/src/lmmacro.h b/src/lmmacro.h
index c67c6bf..9cdec56 100644
--- a/src/lmmacro.h
+++ b/src/lmmacro.h
@@ -1,24 +1,24 @@
 // $Id: lmmacro.h 3461 2010-08-27 10:17:34Z bertoldi $
 
 /******************************************************************************
-IrstLM: IRST Language Model Toolkit
-Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
-
-******************************************************************************/
+ IrstLM: IRST Language Model Toolkit
+ Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
+ 
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+ 
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ Lesser General Public License for more details.
+ 
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ 
+ ******************************************************************************/
 
 
 #ifndef MF_LMMACRO_H
@@ -34,101 +34,101 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 #include "dictionary.h"
 #include "n_gram.h"
 #include "lmtable.h"
-	
+
 #define MAX_TOKEN_N_MAP 5
 
 namespace irstlm {
 	
-class lmmacro: public lmtable
-{
-
-  dictionary     *dict;
-  int             maxlev; //max level of table
-  int             selectedField;
-
-  bool            collapseFlag; //flag for the presence of collapse
-  bool            mapFlag; //flag for the presence of map
-
-  int             microMacroMapN;
-  int            *microMacroMap;
-  bool           *collapsableMap;
-  bool           *collapsatorMap;
-
+	class lmmacro: public lmtable
+	{
+		
+		dictionary     *dict;
+		int             maxlev; //max level of table
+		int             selectedField;
+		
+		bool            collapseFlag; //flag for the presence of collapse
+		bool            mapFlag; //flag for the presence of map
+		
+		int             microMacroMapN;
+		int            *microMacroMap;
+		bool           *collapsableMap;
+		bool           *collapsatorMap;
+		
 #ifdef DLEXICALLM
-  int             selectedFieldForLexicon;
-  int            *lexicaltoken2classMap;
-  int             lexicaltoken2classMapN;
+		int             selectedFieldForLexicon;
+		int            *lexicaltoken2classMap;
+		int             lexicaltoken2classMapN;
 #endif
-
-
-  void loadmap(const std::string mapfilename);
-  void unloadmap();
-
-  bool transform(ngram &in, ngram &out);
-  void field_selection(ngram &in, ngram &out);
-  bool collapse(ngram &in, ngram &out);
-  void mapping(ngram &in, ngram &out);
-
-public:
-
-  lmmacro(float nlf=0.0, float dlfi=0.0);
-  ~lmmacro();
-
-  void load(const std::string &filename,int mmap=0);
-
-  double lprob(ngram ng);
-  double clprob(ngram ng,double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
-
-  const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
-  const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
-  ngram_state_t maxsuffidx(ngram ong, unsigned int* size=NULL);
-  ngram_state_t cmaxsuffidx(ngram ong, unsigned int* size=NULL);
-	
-  void map(ngram *in, ngram *out);
-  void One2OneMapping(ngram *in, ngram *out);
-  void Micro2MacroMapping(ngram *in, ngram *out);
+		
+		
+		void loadmap(const std::string mapfilename);
+		void unloadmap();
+		
+		bool transform(ngram &in, ngram &out);
+		void field_selection(ngram &in, ngram &out);
+		bool collapse(ngram &in, ngram &out);
+		void mapping(ngram &in, ngram &out);
+		
+	public:
+		
+		lmmacro(float nlf=0.0, float dlfi=0.0);
+		~lmmacro();
+		
+		virtual void load(const std::string &filename,int mmap=0);
+		
+		virtual double lprob(ngram ng);
+		virtual double clprob(ngram ng,double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL, double* lastbow=NULL);
+		//  double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,ngram_state_t* maxsuffidx=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
+		
+		virtual const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
+		//  const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
+		virtual ngram_state_t maxsuffidx(ngram ong, unsigned int* size=NULL);
+		//  ngram_state_t cmaxsuffidx(ngram ong, unsigned int* size=NULL);
+		
+		void map(ngram *in, ngram *out);
+		void One2OneMapping(ngram *in, ngram *out);
+		void Micro2MacroMapping(ngram *in, ngram *out);
 #ifdef DLEXICALLM
-  void Micro2MacroMapping(ngram *in, ngram *out, char **lemma);
-  void loadLexicalClasses(const char *fn);
-  void cutLex(ngram *in, ngram *out);
+		void Micro2MacroMapping(ngram *in, ngram *out, char **lemma);
+		void loadLexicalClasses(const char *fn);
+		void cutLex(ngram *in, ngram *out);
 #endif
-
-	inline bool is_OOV(int code) {
-		ngram word_ng(getDict());
-		ngram field_ng(getDict());
-		word_ng.pushc(code); 
-		if (selectedField >= 0)
-			field_selection(word_ng, field_ng);
-		else
-			field_ng = word_ng;
-		int field_code=*field_ng.wordp(1);
-		VERBOSE(2,"inline virtual bool lmmacro::is_OOV(int code) word_ng:" << word_ng << " field_ng:" << field_ng << std::endl);
-		//the selected field(s) of a token is considered OOV 
-		//either if unknown by the microMacroMap
-		//or if its mapped macroW is OOV
-		if (field_code >= microMacroMapN) return true;
-		VERBOSE(2,"inline virtual bool lmmacro::is_OOV(int code)*field_code:" << field_code << "  microMacroMap[field_code]:" << microMacroMap[field_code] << " lmtable::dict->oovcode():" << lmtable::dict->oovcode() << std::endl);
-		return (microMacroMap[field_code] == lmtable::dict->oovcode());
+		
+		inline bool is_OOV(int code) {
+			ngram word_ng(getDict());
+			ngram field_ng(getDict());
+			word_ng.pushc(code); 
+			if (selectedField >= 0)
+				field_selection(word_ng, field_ng);
+			else
+				field_ng = word_ng;
+			int field_code=*field_ng.wordp(1);
+			VERBOSE(2,"inline virtual bool lmmacro::is_OOV(int code) word_ng:" << word_ng << " field_ng:" << field_ng << std::endl);
+			//the selected field(s) of a token is considered OOV 
+			//either if unknown by the microMacroMap
+			//or if its mapped macroW is OOV
+			if (field_code >= microMacroMapN) return true;
+			VERBOSE(2,"inline virtual bool lmmacro::is_OOV(int code)*field_code:" << field_code << "  microMacroMap[field_code]:" << microMacroMap[field_code] << " lmtable::dict->oovcode():" << lmtable::dict->oovcode() << std::endl);
+			return (microMacroMap[field_code] == lmtable::dict->oovcode());
+		};
+		inline dictionary* getDict() const {
+			return dict;
+		}
+		inline int maxlevel() const {
+			return maxlev;
+		};
+		
+		inline virtual void dictionary_incflag(const bool flag) {
+			dict->incflag(flag);
+		};
+		
+		inline virtual bool filter(const string sfilter, lmContainer* sublmt, const string skeepunigrams) {
+			UNUSED(sfilter);
+			UNUSED(sublmt);
+			UNUSED(skeepunigrams);
+			return false;
+		}
 	};
-  inline dictionary* getDict() const {
-    return dict;
-  }
-  inline int maxlevel() const {
-    return maxlev;
-  };
-
-  inline virtual void dictionary_incflag(const bool flag) {
-    dict->incflag(flag);
-  };
-
-  inline virtual bool filter(const string sfilter, lmContainer* sublmt, const string skeepunigrams) {
-    UNUSED(sfilter);
-    UNUSED(sublmt);
-    UNUSED(skeepunigrams);
-    return false;
-  }
-};
 	
 }//namespace irstlm
 #endif
diff --git a/src/lmtable.cpp b/src/lmtable.cpp
index 9376ce5..3865230 100644
--- a/src/lmtable.cpp
+++ b/src/lmtable.cpp
@@ -212,44 +212,44 @@ namespace irstlm {
 		delete_lmtcaches();
 #endif
 	}
-
-        void lmtable::stat_prob_and_state_cache()
-        {
+	
+	void lmtable::stat_prob_and_state_cache()
+	{
 #ifdef PS_CACHE_ENABLE
-                for (int i=1; i<=max_cache_lev; i++)
-                {
+		for (int i=1; i<=max_cache_lev; i++)
+		{
 			std::cout << "void lmtable::stat_prob_and_state_cache() level:" << i << std::endl;
-                        if (prob_and_state_cache[i])
-                        {
-                                 prob_and_state_cache[i]->stat();
-                        }
-                }
+			if (prob_and_state_cache[i])
+			{
+				prob_and_state_cache[i]->stat();
+			}
+		}
 #endif
-        }
-        void lmtable::stat_lmtcaches()
-        {
+	}
+	void lmtable::stat_lmtcaches()
+	{
 #ifdef PS_CACHE_ENABLE
-                for (int i=2; i<=max_cache_lev; i++)
-                {
+		for (int i=2; i<=max_cache_lev; i++)
+		{
 			std::cout << "void lmtable::stat_lmtcaches() level:" << i << std::endl;
-                        if (lmtcache[i])
-                        {
-                                 lmtcache[i]->stat();
-                        }
-                }
+			if (lmtcache[i])
+			{
+				lmtcache[i]->stat();
+			}
+		}
 #endif
-        }
-
-        void lmtable::stat_caches()
-        {
+	}
+	
+	void lmtable::stat_caches()
+	{
 #ifdef PS_CACHE_ENABLE
-                stat_prob_and_state_cache();
+		stat_prob_and_state_cache();
 #endif
 #ifdef LMT_CACHE_ENABLE
-                stat_lmtcaches();
+		stat_lmtcaches();
 #endif
-        }
-
+	}
+	
 	
 	void lmtable::used_prob_and_state_cache() const
 	{
@@ -1767,7 +1767,7 @@ namespace irstlm {
 			concatenate_single_level(i, fromfilename, tofilename);
 		}
 	}
-
+	
 	//concatenate corresponding single level files of two different tables
 	void lmtable::concatenate_single_level(int level, const char* fromfilename, const char* tofilename){
 		//single level files should have a name derived from "fromfilename" and "tofilename"
@@ -2085,8 +2085,8 @@ namespace irstlm {
 			
 			
 			//insert both found and not found items!!!
-//			if (lmtcache[l] && hit==true) {
-
+			//			if (lmtcache[l] && hit==true) {
+			
 			//insert only not found items!!!
 			if (lmtcache[l] && hit==false) {
 				const char* found2=found;
@@ -2348,6 +2348,7 @@ namespace irstlm {
 #endif
 	}
 	
+	/*
 	//this function simulates the cmaxsuffptr(ngram, ...) but it takes as input an array of codes instead of the ngram
 	const char *lmtable::cmaxsuffptr(int* codes, int sz, unsigned int* size)
 	{
@@ -2397,17 +2398,10 @@ namespace irstlm {
 		ngram ong(dict);
 		ong.pushc(codes,sz);
 		MY_ASSERT (ong.size == sz);
-		/*
-		 unsigned int isize; //internal state size variable
-		 char* found=(char *) maxsuffptr(ong,&isize);
-		 char* found2=(char *) maxsuffptr(ong,size);
-		 if (size!=NULL) *size=isize;
-		 return found;
-		 */
 		return maxsuffptr(ong,size);
 #endif
 	}
-
+	*/
 	
 	//non recursive version
 	ngram_state_t lmtable::maxsuffidx(ngram ong, unsigned int* size)
@@ -2456,7 +2450,7 @@ namespace irstlm {
 						if (ng.succ==0) *size=isize-1;
 						else *size=isize;
 					}
-
+					
 					int ndsz=nodesize(tbltype[isize]);
 					ngram_state_t msidx = 0;
 					if (ng.link){
@@ -2471,7 +2465,7 @@ namespace irstlm {
 			return 0;
 		}
 	}
-
+	
 	ngram_state_t lmtable::cmaxsuffidx(ngram ong, unsigned int* size)
 	{
 		VERBOSE(3,"ngram_state_t lmtable::cmaxsuffidx(ngram ong, unsigned int* size) ong:|" << ong  << "|\n");
@@ -2493,7 +2487,7 @@ namespace irstlm {
 		//		if (prob_and_state_cache && ong.size==maxlev && prob_and_state_cache->get(ong.wordp(maxlev),pst)) {
 		if (prob_and_state_cache[ong.size] && prob_and_state_cache[ong.size]->get(ong.wordp(ong.size),pst)) {
 			*size=pst.statesize;
-//			return pst.state;
+			//			return pst.state;
 			return pst.ngramstate;
 		}
 		ong.size = orisize;
@@ -2520,6 +2514,7 @@ namespace irstlm {
 #endif
 	}
 	
+	/*
 	//this function simulates the cmaxsuffptr(ngram, ...) but it takes as input an array of codes instead of the ngram
 	ngram_state_t lmtable::cmaxsuffidx(int* codes, int sz, unsigned int* size)
 	{
@@ -2571,6 +2566,7 @@ namespace irstlm {
 		return maxsuffidx(ong,size);
 #endif
 	}
+	*/
 	
 	//returns log10prob of n-gram
 	//bow: backoff weight
@@ -2597,8 +2593,8 @@ namespace irstlm {
 		
 		if (bow) *bow=0; //initialize back-off weight
 		if (bol) *bol=0; //initialize bock-off level
-		
-		
+		if (lastbow) *lastbow=0; //initialize back-off weight of the deepest found ngram
+
 		double rbow=0,lpr=0; //output back-off weight and logprob
 		float ibow,iprob;    //internal back-off weight and logprob
 		
@@ -2720,9 +2716,9 @@ namespace irstlm {
 	
 	
 	//return log10 probsL use cache memory
-	double lmtable::clprob(ngram ong,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible)
+	double lmtable::clprob(ngram ong,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible, double* lastbow)
 	{
-		VERBOSE(3,"double lmtable::clprob(ngram ong,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible) ong:|" << ong  << "|\n");
+		VERBOSE(3,"double lmtable::clprob(ngram ong,double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible, double* lastbow) ong:|" << ong  << "|\n");
 		
 #ifdef TRACE_CACHELM
 		//		if (probcache && ong.size==maxlev && sentence_id>0) {
@@ -2736,6 +2732,7 @@ namespace irstlm {
 			if (state!=NULL) *state=NULL;
 			if (ngramstate!=NULL) *ngramstate=NULL;
 			if (extendible!=NULL) *extendible=false;
+			if (lastbow!=NULL) *lastbow=false;
 			return 0.0;
 		}
 		
@@ -2755,6 +2752,7 @@ namespace irstlm {
 			if (ngramstate) *ngramstate = pst_get.ngramstate;
 			if (statesize) *statesize = pst_get.statesize;
 			if (extendible) *extendible = pst_get.extendible;
+			if (lastbow) *lastbow = pst_get.lastbow;
 			
 			return logpr;
 		}
@@ -2762,7 +2760,7 @@ namespace irstlm {
 		//cache miss
 		
 		prob_and_state_t pst_add;
-		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.ngramstate), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
+		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.ngramstate), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible), &(pst_add.lastbow));
 		
 		
 		if (bow) *bow = pst_add.bow;
@@ -2771,6 +2769,7 @@ namespace irstlm {
 		if (ngramstate) *ngramstate = pst_add.ngramstate;
 		if (statesize) *statesize = pst_add.statesize;
 		if (extendible) *extendible = pst_add.extendible;
+		if (extendible) *lastbow = pst_add.lastbow;
 		
 		
 		//		if (prob_and_state_cache && ong.size==maxlev) {
@@ -2781,91 +2780,94 @@ namespace irstlm {
 		}
 		return logpr;
 #else
-		return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible);
-#endif
-	};
-	
-	
-	//return log10 probsL use cache memory
-	//this function simulates the clprob(ngram, ...) but it takes as input an array of codes instead of the ngram
-	double lmtable::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible)
-	{
-		VERBOSE(3," double lmtable::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible)\n");
-#ifdef TRACE_CACHELM
-		//		if (probcache && sz==maxlev && sentence_id>0) {
-		if (probcache && sentence_id>0) {
-			*cacheout << sentence_id << "\n";
-			//print the codes of the vector ng
-		}
-#endif
-		
-		if (sz==0) {
-			if (statesize!=NULL) *statesize=0;
-			if (state!=NULL) *state=NULL;
-			if (ngramstate!=NULL) *ngramstate=NULL;
-			if (extendible!=NULL) *extendible=false;
-			return 0.0;
-		}
-		
-		if (sz>maxlev) sz=maxlev; //adjust n-gram level to table size
-		
-#ifdef PS_CACHE_ENABLE
-		double logpr;
-		
-		//cache hit
-		prob_and_state_t pst_get;
-		
-		//		if (prob_and_state_cache && sz==maxlev && prob_and_state_cache->get(codes,pst_get)) {
-		if (prob_and_state_cache[sz] && prob_and_state_cache[sz]->get(codes,pst_get)) {
-			
-			logpr=pst_get.logpr;
-			if (bow) *bow = pst_get.bow;
-			if (bol) *bol = pst_get.bol;
-			if (state) *state = pst_get.state;
-			if (ngramstate) *ngramstate = pst_get.ngramstate;
-			if (statesize) *statesize = pst_get.statesize;
-			if (extendible) *extendible = pst_get.extendible;
-			
-			return logpr;
-		}
-		
-		
-		//create the actual ngram
-		ngram ong(dict);
-		ong.pushc(codes,sz);
-		MY_ASSERT (ong.size == sz);
-		
-		//cache miss
-		prob_and_state_t pst_add;
-//		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
-		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.ngramstate), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible));
-		
-		
-		if (bow) *bow = pst_add.bow;
-		if (bol) *bol = pst_add.bol;
-		if (state) *state = pst_add.state;
-		if (ngramstate) *ngramstate = pst_add.ngramstate;
-		if (statesize) *statesize = pst_add.statesize;
-		if (extendible) *extendible = pst_add.extendible;
-		
-		
-		//		if (prob_and_state_cache && ong.size==maxlev) {
-		//			prob_and_state_cache->add(ong.wordp(maxlev),pst_add);
-		//		}
-		if (prob_and_state_cache[sz]) {
-			prob_and_state_cache[sz]->add(ong.wordp(ong.size),pst_add);
-		}
-		return logpr;
-#else
-		
-		//create the actual ngram
-		ngram ong(dict);
-		ong.pushc(codes,sz);
-		MY_ASSERT (ong.size == sz);
-		return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible);
+		return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible, lastbow);
 #endif
 	};
 	
+	/*
+	 //return log10 probsL use cache memory
+	 //this function simulates the clprob(ngram, ...) but it takes as input an array of codes instead of the ngram
+	 double lmtable::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state,unsigned int* statesize,bool* extendible, double* lastbow)
+	 {
+	 VERBOSE(3," double lmtable::clprob(int* codes, int sz, double* bow, int* bol, ngram_state_t* ngramstate, char** state, unsigned int* statesize, bool* extendible, double* lastbow)\n");
+	 #ifdef TRACE_CACHELM
+	 //		if (probcache && sz==maxlev && sentence_id>0) {
+	 if (probcache && sentence_id>0) {
+	 *cacheout << sentence_id << "\n";
+	 //print the codes of the vector ng
+	 }
+	 #endif
+	 
+	 if (sz==0) {
+	 if (statesize!=NULL) *statesize=0;
+	 if (state!=NULL) *state=NULL;
+	 if (ngramstate!=NULL) *ngramstate=NULL;
+	 if (extendible!=NULL) *extendible=false;
+	 if (lastbow!=NULL) *lastbow=false;
+	 return 0.0;
+	 }
+	 
+	 if (sz>maxlev) sz=maxlev; //adjust n-gram level to table size
+	 
+	 #ifdef PS_CACHE_ENABLE
+	 double logpr;
+	 
+	 //cache hit
+	 prob_and_state_t pst_get;
+	 
+	 //		if (prob_and_state_cache && sz==maxlev && prob_and_state_cache->get(codes,pst_get)) {
+	 if (prob_and_state_cache[sz] && prob_and_state_cache[sz]->get(codes,pst_get)) {
+	 
+	 logpr=pst_get.logpr;
+	 if (bow) *bow = pst_get.bow;
+	 if (bol) *bol = pst_get.bol;
+	 if (state) *state = pst_get.state;
+	 if (ngramstate) *ngramstate = pst_get.ngramstate;
+	 if (statesize) *statesize = pst_get.statesize;
+	 if (extendible) *extendible = pst_get.extendible;
+	 if (lastbow) *lastbow = pst_get.lastbow;
+	 
+	 return logpr;
+	 }
+	 
+	 
+	 //create the actual ngram
+	 ngram ong(dict);
+	 ong.pushc(codes,sz);
+	 MY_ASSERT (ong.size == sz);
+	 
+	 //cache miss
+	 prob_and_state_t pst_add;
+	 //		logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible), &(pst_add.lastbow));
+	 logpr = pst_add.logpr = lmtable::lprob(ong, &(pst_add.bow), &(pst_add.bol), &(pst_add.ngramstate), &(pst_add.state), &(pst_add.statesize), &(pst_add.extendible), &(pst_add.lastbow));
+	 
+	 
+	 if (bow) *bow = pst_add.bow;
+	 if (bol) *bol = pst_add.bol;
+	 if (state) *state = pst_add.state;
+	 if (ngramstate) *ngramstate = pst_add.ngramstate;
+	 if (statesize) *statesize = pst_add.statesize;
+	 if (extendible) *extendible = pst_add.extendible;
+	 if (lastbow) *lastbow = pst_add.lastbow;
+	 
+	 
+	 //		if (prob_and_state_cache && ong.size==maxlev) {
+	 //			prob_and_state_cache->add(ong.wordp(maxlev),pst_add);
+	 //		}
+	 if (prob_and_state_cache[sz]) {
+	 prob_and_state_cache[sz]->add(ong.wordp(ong.size),pst_add);
+	 }
+	 return logpr;
+	 #else
+	 
+	 //create the actual ngram
+	 ngram ong(dict);
+	 ong.pushc(codes,sz);
+	 MY_ASSERT (ong.size == sz);
+	 return lmtable::lprob(ong, bow, bol, ngramstate, state, statesize, extendible,lastbow);
+	 #endif
+	 };
+	 */
 	
 	int lmtable::succrange(node ndp,int level,table_entry_pos_t* isucc,table_entry_pos_t* esucc)
 	{
@@ -2912,7 +2914,7 @@ namespace irstlm {
 		}
 		
 		if (level >1 ) lmtable::getDict()->stat();
-
+		
 		stat_caches();
 		
 	}
diff --git a/src/lmtable.h b/src/lmtable.h
index 77ed54d..6bb707f 100644
--- a/src/lmtable.h
+++ b/src/lmtable.h
@@ -76,594 +76,595 @@ typedef unsigned char qfloat_t; //type for quantized probabilities
 #define BOUND_EMPTY2 (numeric_limits<table_entry_pos_t>::max() - 1)
 
 namespace irstlm {
-class lmtable: public lmContainer
-{
-	static const bool debug=true;
-	
-	void loadtxt(std::istream& inp,const char* header,const char* filename,int mmap);
-	void loadtxt_ram(std::istream& inp,const char* header);
-	void loadtxt_mmap(std::istream& inp,const char* header,const char* outfilename);
-	void loadtxt_level(std::istream& inp,int l);
-	
-	void loadbin(std::istream& inp,const char* header,const char* filename,int mmap);
-	void loadbin_header(std::istream& inp, const char* header);
-	void loadbin_dict(std::istream& inp);
-	void loadbin_codebook(std::istream& inp,int l);
-	void loadbin_level(std::istream& inp,int l);
-	
-protected:
-	char*       table[LMTMAXLEV+1];  //storage of all levels
-	LMT_TYPE    tbltype[LMTMAXLEV+1];  //table type for each levels
-	table_entry_pos_t       cursize[LMTMAXLEV+1];  //current size of levels
-	
-	//current offset for in-memory tables (different for each level
-	//needed to manage partial tables
-	// mempos = diskpos - offset[level]
-	table_entry_pos_t       tb_offset[LMTMAXLEV+1];
-	
-	table_entry_pos_t       maxsize[LMTMAXLEV+1];  //max size of levels
-	table_entry_pos_t*     startpos[LMTMAXLEV+1];  //support vector to store start positions
-	char      info[100]; //information put in the header
-	
-	//statistics
-	int    totget[LMTMAXLEV+1];
-	int    totbsearch[LMTMAXLEV+1];
-	
-	//probability quantization
-	bool      isQtable;
-	
-	//Incomplete LM table from distributed training
-	bool      isItable;
-	
-	//Table with reverted n-grams for fast access
-	bool      isInverted;
-	
-	//Table might contain pruned n-grams
-	bool      isPruned;
-	
-	int       NumCenters[LMTMAXLEV+1];
-	float*    Pcenters[LMTMAXLEV+1];
-	float*    Bcenters[LMTMAXLEV+1];
-	
-	double  logOOVpenalty; //penalty for OOV words (default 0)
-	int     dictionary_upperbound; //set by user
-	int     backoff_state;
-	
-	//improve access speed
-	int max_cache_lev;
-	
-//	NGRAMCACHE_t* prob_and_state_cache;
-	NGRAMCACHE_t* prob_and_state_cache[LMTMAXLEV+1];
-	NGRAMCACHE_t* lmtcache[LMTMAXLEV+1];
-	float ngramcache_load_factor;
-	float dictionary_load_factor;
-	
-	//memory map on disk
-	int memmap;  //level from which n-grams are accessed via mmap
-	int diskid;
-	off_t tableOffs[LMTMAXLEV+1];
-	off_t tableGaps[LMTMAXLEV+1];
-	
-	// is this LM queried for knowing the matching order or (standard
-	// case) for score?
-	bool      orderQuery;
-	
-	//flag to enable/disable deletion of dict in the destructor
-	bool delete_dict;
-	
-public:
-	
+	class lmtable: public lmContainer
+	{
+		static const bool debug=true;
+		
+		void loadtxt(std::istream& inp,const char* header,const char* filename,int mmap);
+		void loadtxt_ram(std::istream& inp,const char* header);
+		void loadtxt_mmap(std::istream& inp,const char* header,const char* outfilename);
+		void loadtxt_level(std::istream& inp,int l);
+		
+		void loadbin(std::istream& inp,const char* header,const char* filename,int mmap);
+		void loadbin_header(std::istream& inp, const char* header);
+		void loadbin_dict(std::istream& inp);
+		void loadbin_codebook(std::istream& inp,int l);
+		void loadbin_level(std::istream& inp,int l);
+		
+	protected:
+		char*       table[LMTMAXLEV+1];  //storage of all levels
+		LMT_TYPE    tbltype[LMTMAXLEV+1];  //table type for each levels
+		table_entry_pos_t       cursize[LMTMAXLEV+1];  //current size of levels
+		
+		//current offset for in-memory tables (different for each level
+		//needed to manage partial tables
+		// mempos = diskpos - offset[level]
+		table_entry_pos_t       tb_offset[LMTMAXLEV+1];
+		
+		table_entry_pos_t       maxsize[LMTMAXLEV+1];  //max size of levels
+		table_entry_pos_t*     startpos[LMTMAXLEV+1];  //support vector to store start positions
+		char      info[100]; //information put in the header
+		
+		//statistics
+		int    totget[LMTMAXLEV+1];
+		int    totbsearch[LMTMAXLEV+1];
+		
+		//probability quantization
+		bool      isQtable;
+		
+		//Incomplete LM table from distributed training
+		bool      isItable;
+		
+		//Table with reverted n-grams for fast access
+		bool      isInverted;
+		
+		//Table might contain pruned n-grams
+		bool      isPruned;
+		
+		int       NumCenters[LMTMAXLEV+1];
+		float*    Pcenters[LMTMAXLEV+1];
+		float*    Bcenters[LMTMAXLEV+1];
+		
+		double  logOOVpenalty; //penalty for OOV words (default 0)
+		int     dictionary_upperbound; //set by user
+		int     backoff_state;
+		
+		//improve access speed
+		int max_cache_lev;
+		
+		//	NGRAMCACHE_t* prob_and_state_cache;
+		NGRAMCACHE_t* prob_and_state_cache[LMTMAXLEV+1];
+		NGRAMCACHE_t* lmtcache[LMTMAXLEV+1];
+		float ngramcache_load_factor;
+		float dictionary_load_factor;
+		
+		//memory map on disk
+		int memmap;  //level from which n-grams are accessed via mmap
+		int diskid;
+		off_t tableOffs[LMTMAXLEV+1];
+		off_t tableGaps[LMTMAXLEV+1];
+		
+		// is this LM queried for knowing the matching order or (standard
+		// case) for score?
+		bool      orderQuery;
+		
+		//flag to enable/disable deletion of dict in the destructor
+		bool delete_dict;
+		
+	public:
+		
 #ifdef TRACE_CACHELM
-	std::fstream* cacheout;
-	int sentence_id;
+		std::fstream* cacheout;
+		int sentence_id;
 #endif
-	
-	dictionary     *dict; // dictionary (words - macro tags)
-	
-	lmtable(float nlf=0.0, float dlfi=0.0);
-	
-	virtual ~lmtable();
-	
-	table_entry_pos_t wdprune(float *thr, int aflag=0);
-	table_entry_pos_t wdprune(float *thr, int aflag, ngram ng, int ilev, int elev, table_entry_pos_t ipos, table_entry_pos_t epos, double lk=0, double bo=0, double *ts=0, double *tbs=0);
-	double lprobx(ngram ong, double *lkp=0, double *bop=0, int *bol=0);
-	
-	table_entry_pos_t ngcnt(table_entry_pos_t *cnt);
-	table_entry_pos_t ngcnt(table_entry_pos_t *cnt, ngram ng, int l, table_entry_pos_t ipos, table_entry_pos_t epos);
-	int pscale(int lev, table_entry_pos_t ipos, table_entry_pos_t epos, double s);
-	
-	void init_prob_and_state_cache();
-	void init_probcache() {
-		init_prob_and_state_cache();
-	}; //kept for back compatibility
-	void init_statecache() {}; //kept for back compatibility
-	void init_lmtcaches();
-//	void init_lmtcaches(int uptolev);
-	void init_caches(int uptolev);
-	
-	void used_prob_and_state_cache() const;
-	void used_lmtcaches() const;
-	void used_caches() const;
-	
-	
-	void delete_prob_and_state_cache();
-	void delete_probcache() {
-		delete_prob_and_state_cache();
-	}; //kept for back compatibility
-	void delete_statecache() {}; //kept for back compatibility
-	void delete_lmtcaches();
-	void delete_caches();
-	
-	void stat_prob_and_state_cache();
-	void stat_lmtcaches();
-	void stat_caches();
-
-	void check_prob_and_state_cache_levels() const;
-	void check_probcache_levels() const {
-		check_prob_and_state_cache_levels();
-	}; //kept for back compatibility
-	void check_statecache_levels() const{}; //kept for back compatibility
-	void check_lmtcaches_levels() const;
-	void check_caches_levels() const;
-	
-	void reset_prob_and_state_cache();
-	void reset_probcache() {
-		reset_prob_and_state_cache();
-	}; //kept for back compatibility
-	void reset_statecache() {}; //kept for back compatibility
-	void reset_lmtcaches();
-	void reset_caches();
-	
-	
-	bool are_prob_and_state_cache_active() const;
-	bool is_probcache_active() const {
-		return are_prob_and_state_cache_active();
-	}; //kept for back compatibility
-	bool is_statecache_active() const {
-		return are_prob_and_state_cache_active();
-	}; //kept for back compatibility
-	bool are_lmtcaches_active() const;
-	bool are_caches_active() const;
-	
-	void reset_mmap();
-	
-	//set the inverted flag to load ngrams in an inverted order
-	//this choice is disregarded if a binary LM is loaded,
-	//because the info is stored into the header
-	inline bool is_inverted(const bool flag) {
-		return isInverted=flag;
-	}
-	inline bool is_inverted() const {
-		return isInverted;
-	}
-	
-	void configure(int n,bool quantized);
-	
-	//set penalty for OOV words
-	inline double getlogOOVpenalty() const {
-		return logOOVpenalty;
-	}
-	
-	inline double setlogOOVpenalty(int dub) {
-		MY_ASSERT(dub > dict->size());
-		dictionary_upperbound = dub;
-		return logOOVpenalty=log((double)(dictionary_upperbound - dict->size()))/M_LN10;
-	}
-	
-	inline double setlogOOVpenalty(double oovp) {
-		return logOOVpenalty=oovp;
-	}
-	
-	virtual int maxlevel() const {
-		return maxlev;
-	};
-	inline bool isQuantized() const {
-		return isQtable;
-	}
-	
-	
-	void savetxt(const char *filename);
-	void savebin(const char *filename);
-	
-	void appendbin_level(int level, fstream &out, int mmap);
-	void appendbin_level_nommap(int level, fstream &out);
-	void appendbin_level_mmap(int level, fstream &out);
-	
-	void savebin_level(int level, const char* filename, int mmap);
-	void savebin_level_nommap(int level, const char* filename);
-	void savebin_level_mmap(int level, const char* filename);
-	void savebin_dict(std::fstream& out);
-	
-	void compact_all_levels(const char* filename);
-	void compact_single_level(int level, const char* filename);
-	
-	void concatenate_all_levels(const char* fromfilename, const char* tofilename);
-	void concatenate_single_level(int level, const char* fromfilename, const char* tofilename);
-	
-	void remove_all_levels(const char* filename);
-	void remove_single_level(int level, const char* filename);
-	
-	void print_table_stat();
-	void print_table_stat(int level);
-	
-	void dumplm(std::fstream& out,ngram ng, int ilev, int elev, table_entry_pos_t ipos,table_entry_pos_t epos);
-	
-	
-	void delete_level(int level, const char* outfilename, int mmap);
-	void delete_level_nommap(int level);
-	void delete_level_mmap(int level, const char* filename);
-	
-	void resize_level(int level, const char* outfilename, int mmap);
-	void resize_level_nommap(int level);
-	void resize_level_mmap(int level, const char* filename);
-	
-	inline void update_offset(int level, table_entry_pos_t value) { tb_offset[level]=value; };
-	
-	
-	virtual void load(const std::string &filename, int mmap=0);
-	virtual void load(std::istream& inp,const char* filename=NULL,const char* outfilename=NULL,int mmap=0);
-	
-	void load_centers(std::istream& inp,int l);
-	
-	void expand_level(int level, table_entry_pos_t size, const char* outfilename, int mmap);
-	void expand_level_nommap(int level, table_entry_pos_t size);
-	void expand_level_mmap(int level, table_entry_pos_t size, const char* outfilename);
-	
-	void cpsublm(lmtable* sublmt, dictionary* subdict,bool keepunigr=true);
-	
-	int reload(std::set<string> words);
-	
-	void filter(const char* /* unused parameter: lmfile */) {};
-	
-	virtual double  lprob(ngram ng){
-		return lprob(ng, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
-	}
-	virtual double  lprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible){
-		return lprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, NULL);
-	}
-	virtual double  lprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible){
-	 return lprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, NULL);
-	}
-	
-	virtual double  lprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow){
-		return lprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, lastbow);
-	}
-	virtual double  lprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow);
-	
-  virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL);
-  virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL);
-	
-
-	void *search(int lev,table_entry_pos_t offs,table_entry_pos_t n,int sz,int *w, LMT_ACTION action,char **found=(char **)NULL);
-	
-	int mybsearch(char *ar, table_entry_pos_t n, int size, char *key, table_entry_pos_t *idx);
-	
-	
-	int add(ngram& ng, float prob,float bow);
-	//template<typename TA, typename TB> int add(ngram& ng, TA prob,TB bow);
-	
-	int addwithoffset(ngram& ng, float prob,float bow);
-	//  template<typename TA, typename TB> int addwithoffset(ngram& ng, TA prob,TB bow);
-	
-	void checkbounds(int level);
-	
-	virtual inline int get(ngram& ng) {
-		return get(ng,ng.size,ng.size);
-	}
-	virtual int get(ngram& ng,int n,int lev);
-	
-	int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev);
-	
-	virtual const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
-	virtual const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
-  virtual const char *cmaxsuffptr(int* codes, int sz, unsigned int* size=NULL);
-	virtual ngram_state_t maxsuffidx(ngram ong, unsigned int* size=NULL);
-  virtual ngram_state_t cmaxsuffidx(ngram ong, unsigned int* size=NULL);
-  virtual ngram_state_t cmaxsuffidx(int* codes, int sz, unsigned int* size=NULL);
-	
-	inline void putmem(char* ptr,int value,int offs,int size) {
-		MY_ASSERT(ptr!=NULL);
-		for (int i=0; i<size; i++)
-			ptr[offs+i]=(value >> (8 * i)) & 0xff;
-	};
-	
-	inline void getmem(char* ptr,int* value,int offs,int size) {
-		MY_ASSERT(ptr!=NULL);
-		*value=ptr[offs] & 0xff;
-		for (int i=1; i<size; i++){
-			*value= *value | ( ( ptr[offs+i] & 0xff ) << (8 *i));
+		
+		dictionary     *dict; // dictionary (words - macro tags)
+		
+		lmtable(float nlf=0.0, float dlfi=0.0);
+		
+		virtual ~lmtable();
+		
+		table_entry_pos_t wdprune(float *thr, int aflag=0);
+		table_entry_pos_t wdprune(float *thr, int aflag, ngram ng, int ilev, int elev, table_entry_pos_t ipos, table_entry_pos_t epos, double lk=0, double bo=0, double *ts=0, double *tbs=0);
+		double lprobx(ngram ong, double *lkp=0, double *bop=0, int *bol=0);
+		
+		table_entry_pos_t ngcnt(table_entry_pos_t *cnt);
+		table_entry_pos_t ngcnt(table_entry_pos_t *cnt, ngram ng, int l, table_entry_pos_t ipos, table_entry_pos_t epos);
+		int pscale(int lev, table_entry_pos_t ipos, table_entry_pos_t epos, double s);
+		
+		void init_prob_and_state_cache();
+		void init_probcache() {
+			init_prob_and_state_cache();
+		}; //kept for back compatibility
+		void init_statecache() {}; //kept for back compatibility
+		void init_lmtcaches();
+		//	void init_lmtcaches(int uptolev);
+		void init_caches(int uptolev);
+		
+		void used_prob_and_state_cache() const;
+		void used_lmtcaches() const;
+		void used_caches() const;
+		
+		
+		void delete_prob_and_state_cache();
+		void delete_probcache() {
+			delete_prob_and_state_cache();
+		}; //kept for back compatibility
+		void delete_statecache() {}; //kept for back compatibility
+		void delete_lmtcaches();
+		void delete_caches();
+		
+		void stat_prob_and_state_cache();
+		void stat_lmtcaches();
+		void stat_caches();
+		
+		void check_prob_and_state_cache_levels() const;
+		void check_probcache_levels() const {
+			check_prob_and_state_cache_levels();
+		}; //kept for back compatibility
+		void check_statecache_levels() const{}; //kept for back compatibility
+		void check_lmtcaches_levels() const;
+		void check_caches_levels() const;
+		
+		void reset_prob_and_state_cache();
+		void reset_probcache() {
+			reset_prob_and_state_cache();
+		}; //kept for back compatibility
+		void reset_statecache() {}; //kept for back compatibility
+		void reset_lmtcaches();
+		void reset_caches();
+		
+		
+		bool are_prob_and_state_cache_active() const;
+		bool is_probcache_active() const {
+			return are_prob_and_state_cache_active();
+		}; //kept for back compatibility
+		bool is_statecache_active() const {
+			return are_prob_and_state_cache_active();
+		}; //kept for back compatibility
+		bool are_lmtcaches_active() const;
+		bool are_caches_active() const;
+		
+		void reset_mmap();
+		
+		//set the inverted flag to load ngrams in an inverted order
+		//this choice is disregarded if a binary LM is loaded,
+		//because the info is stored into the header
+		inline bool is_inverted(const bool flag) {
+			return isInverted=flag;
 		}
-	};
-	
-	template<typename T>
-	inline void putmem(char* ptr,T value,int offs) {
-		MY_ASSERT(ptr!=NULL);
-		memcpy(ptr+offs, &value, sizeof(T));
-	};
-	
-	template<typename T>
-	inline void getmem(char* ptr,T* value,int offs) {
-		MY_ASSERT(ptr!=NULL);
-		memcpy((void*)value, ptr+offs, sizeof(T));
-	};
-	
-	
-	int nodesize(LMT_TYPE ndt) {
-		switch (ndt) {
-			case INTERNAL:
-				return LMTCODESIZE + PROBSIZE + PROBSIZE + BOUNDSIZE;
-			case QINTERNAL:
-				return LMTCODESIZE + QPROBSIZE + QPROBSIZE + BOUNDSIZE;
-			case LEAF:
-				return LMTCODESIZE + PROBSIZE;
-			case QLEAF:
-				return LMTCODESIZE + QPROBSIZE;
-			default:
-				MY_ASSERT(0);
-				return 0;
+		inline bool is_inverted() const {
+			return isInverted;
 		}
-	}
-	
-	inline int word(node nd,int value=-1) {
-		int offset=0;
 		
-		if (value==-1)
-			getmem(nd,&value,offset,LMTCODESIZE);
-		else
-			putmem(nd,value,offset,LMTCODESIZE);
+		void configure(int n,bool quantized);
 		
-		return value;
-	};
-	
-	
-	int codecmp(node a,node b) {
-		register int i,result;
-		for (i=(LMTCODESIZE-1); i>=0; i--) {
-			result=(unsigned char)a[i]-(unsigned char)b[i];
-			if(result) return result;
+		//set penalty for OOV words
+		inline double getlogOOVpenalty() const {
+			return logOOVpenalty;
 		}
-		return 0;
-	};
-	
-	int codediff(node a,node b) {
-		return word(a)-word(b);
-	};
-	
-	
-	inline float prob(node nd,LMT_TYPE ndt) {
-		int offs=LMTCODESIZE;
-		
-		float fv;
-		unsigned char cv;
-		switch (ndt) {
-			case INTERNAL:
-				getmem(nd,&fv,offs);
-				return fv;
-			case QINTERNAL:
-				getmem(nd,&cv,offs);
-				return (float) cv;
-			case LEAF:
-				getmem(nd,&fv,offs);
-				return fv;
-			case QLEAF:
-				getmem(nd,&cv,offs);
-				return (float) cv;
-			default:
-				MY_ASSERT(0);
-				return 0;
-		}
-	};
-	
-	template<typename T>
-	inline T prob(node nd, LMT_TYPE ndt, T value) {
-		int offs=LMTCODESIZE;		
-		
-		switch (ndt) {
-			case INTERNAL:
-				putmem(nd, value,offs);
-				break;
-			case QINTERNAL:
-				putmem(nd,(unsigned char) value,offs);
-				break;
-			case LEAF:
-				putmem(nd, value,offs);
-				break;
-			case QLEAF:
-				putmem(nd,(unsigned char) value,offs);
-				break;
-			default:
-				MY_ASSERT(0);
-				return (T) 0;
+		
+		inline double setlogOOVpenalty(int dub) {
+			MY_ASSERT(dub > dict->size());
+			dictionary_upperbound = dub;
+			return logOOVpenalty=log((double)(dictionary_upperbound - dict->size()))/M_LN10;
 		}
 		
-		return value;
-	};
-	
-	inline float bow(node nd,LMT_TYPE ndt) {
-		int offs=LMTCODESIZE+(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
-		
-		float fv;
-		unsigned char cv;
-		switch (ndt) {
-			case INTERNAL:
-				getmem(nd,&fv,offs);
-				return fv;
-			case QINTERNAL:
-				getmem(nd,&cv,offs);
-				return (float) cv;
-			case LEAF:
-				getmem(nd,&fv,offs);
-				return fv;
-			case QLEAF:
-				getmem(nd,&cv,offs);
-				return (float) cv;
-			default:
-				MY_ASSERT(0);
-				return 0;
+		inline double setlogOOVpenalty(double oovp) {
+			return logOOVpenalty=oovp;
 		}
-	};
-	
-	template<typename T>
-	inline T bow(node nd,LMT_TYPE ndt, T value) {
-		int offs=LMTCODESIZE+(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
-		
-		switch (ndt) {
-			case INTERNAL:
-				putmem(nd, value,offs);
-				break;
-			case QINTERNAL:
-				putmem(nd,(unsigned char) value,offs);
-				break;
-			case LEAF:
-				putmem(nd, value,offs);
-				break;
-			case QLEAF:
-				putmem(nd,(unsigned char) value,offs);
-				break;
-			default:
-				MY_ASSERT(0);
-				return 0;
+		
+		virtual int maxlevel() const {
+			return maxlev;
+		};
+		inline bool isQuantized() const {
+			return isQtable;
 		}
 		
-		return value;
-	};
-	
-	
-	inline table_entry_pos_t boundwithoffset(node nd,LMT_TYPE ndt, int level){ return bound(nd,ndt) - tb_offset[level+1]; }
-	
-	inline table_entry_pos_t boundwithoffset(node nd,LMT_TYPE ndt, table_entry_pos_t value, int level){ return bound(nd, ndt, value + tb_offset[level+1]); }
-	
-	//	table_entry_pos_t bound(node nd,LMT_TYPE ndt, int level=0) {
-	table_entry_pos_t bound(node nd,LMT_TYPE ndt) {
 		
-		int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+		void savetxt(const char *filename);
+		void savebin(const char *filename);
 		
-		table_entry_pos_t value;
+		void appendbin_level(int level, fstream &out, int mmap);
+		void appendbin_level_nommap(int level, fstream &out);
+		void appendbin_level_mmap(int level, fstream &out);
 		
-		getmem(nd,&value,offs);
+		void savebin_level(int level, const char* filename, int mmap);
+		void savebin_level_nommap(int level, const char* filename);
+		void savebin_level_mmap(int level, const char* filename);
+		void savebin_dict(std::fstream& out);
 		
-		//		value -= tb_offset[level+1];
+		void compact_all_levels(const char* filename);
+		void compact_single_level(int level, const char* filename);
 		
-		return value;
-	};
-	
-	
-	//	table_entry_pos_t bound(node nd,LMT_TYPE ndt, table_entry_pos_t value, int level=0) {
-	table_entry_pos_t bound(node nd,LMT_TYPE ndt, table_entry_pos_t value) {
+		void concatenate_all_levels(const char* fromfilename, const char* tofilename);
+		void concatenate_single_level(int level, const char* fromfilename, const char* tofilename);
 		
-		int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+		void remove_all_levels(const char* filename);
+		void remove_single_level(int level, const char* filename);
 		
-		//		value += tb_offset[level+1];
+		void print_table_stat();
+		void print_table_stat(int level);
 		
-		putmem(nd,value,offs);
+		void dumplm(std::fstream& out,ngram ng, int ilev, int elev, table_entry_pos_t ipos,table_entry_pos_t epos);
 		
-		return value;
-	};
-	
-	//template<typename T> T boundwithoffset(node nd,LMT_TYPE ndt, T value, int level);
-	
-	/*
-	 table_entry_pos_t  boundwithoffset(node nd,LMT_TYPE ndt, int level) {
-	 
-	 int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
-	 
-	 table_entry_pos_t value;
-	 
-	 getmem(nd,&value,offs);
-	 return value;
-	 //    return value-tb_offset[level+1];
-	 };
-	 */
-	
-	/*
-	 table_entry_pos_t boundwithoffset(node nd,LMT_TYPE ndt, table_entry_pos_t value, int level) {
-	 
-	 int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
-	 
-	 putmem(nd,value,offs);
-	 
-	 return value;
-	 //		return value+tb_offset[level+1];
-	 };	
-	 */
-	
-	/*
-	 inline table_entry_pos_t  bound(node nd,LMT_TYPE ndt) {
-	 
-	 int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
-	 
-	 table_entry_pos_t value;
-	 
-	 getmem(nd,&value,offs);
-	 return value;
-	 };
-	 
-	 template<typename T>
-	 inline T bound(node nd,LMT_TYPE ndt, T value) {
-	 
-	 int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
-	 
-	 putmem(nd,value,offs);
-	 
-	 return value;
-	 };
-	 */
-	//returns the indexes of the successors of a node
-	int succrange(node ndp,int level,table_entry_pos_t* isucc=NULL,table_entry_pos_t* esucc=NULL);
-	
-	void stat(int lev=0);
-	void printTable(int level);
-	
-	virtual inline void setDict(dictionary* d) {
-		if (delete_dict==true && dict) delete dict;
-		dict=d;
-		delete_dict=false;
-	};
-	
-	inline dictionary* getDict() const {
-		return dict;
-	};
-	
-	inline table_entry_pos_t getCurrentSize(int l) const {
-		return cursize[l];
-	};
-	
-	inline void setOrderQuery(bool v) {
-		orderQuery = v;
-	}
-	inline bool isOrderQuery() const {
-		return orderQuery;
-	}
-	
-	inline float GetNgramcacheLoadFactor() {
-		return  ngramcache_load_factor;
-	}
-	inline float GetDictionaryLoadFactor() {
-		return  ngramcache_load_factor;
-	}
-	
-	//never allow the increment of the dictionary through this function
-	inline virtual void dictionary_incflag(const bool flag) {
-		UNUSED(flag);
-	};
-	
-	inline virtual bool filter(const string sfilter, lmtable* sublmt, const string skeepunigrams) {
-		std::cerr << "filtering... \n";
-		dictionary *dict=new dictionary((char *)sfilter.c_str());
-		
-		cpsublm(sublmt, dict,(skeepunigrams=="yes"));
-		delete dict;
-		std::cerr << "...done\n";
-		return true;
-	}
-	
+		
+		void delete_level(int level, const char* outfilename, int mmap);
+		void delete_level_nommap(int level);
+		void delete_level_mmap(int level, const char* filename);
+		
+		void resize_level(int level, const char* outfilename, int mmap);
+		void resize_level_nommap(int level);
+		void resize_level_mmap(int level, const char* filename);
+		
+		inline void update_offset(int level, table_entry_pos_t value) { tb_offset[level]=value; };
+		
+		
+		virtual void load(const std::string &filename, int mmap=0);
+		virtual void load(std::istream& inp,const char* filename=NULL,const char* outfilename=NULL,int mmap=0);
+		
+		void load_centers(std::istream& inp,int l);
+		
+		void expand_level(int level, table_entry_pos_t size, const char* outfilename, int mmap);
+		void expand_level_nommap(int level, table_entry_pos_t size);
+		void expand_level_mmap(int level, table_entry_pos_t size, const char* outfilename);
+		
+		void cpsublm(lmtable* sublmt, dictionary* subdict,bool keepunigr=true);
+		
+		int reload(std::set<string> words);
+		
+		void filter(const char* /* unused parameter: lmfile */) {};
+		
+		virtual double  lprob(ngram ng){ return lprob(ng, NULL, NULL, NULL, NULL, NULL, NULL, NULL); }
+		virtual double  lprob(ngram ng, double* bow){ return lprob(ng, bow, NULL, NULL, NULL, NULL, NULL, NULL); }
+		virtual double  lprob(ngram ng, double* bow, int* bol){ return lprob(ng, bow, bol, NULL, NULL, NULL, NULL, NULL); }
+		virtual double  lprob(ngram ng, double* bow, int* bol, char** maxsuffptr){ return lprob(ng, bow, bol, NULL, maxsuffptr, NULL, NULL, NULL); }
+		virtual double  lprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize){ return lprob(ng, bow, bol, NULL, maxsuffptr, statesize, NULL, NULL); }
+		virtual double  lprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible){ return lprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, NULL); }
 
-  inline virtual bool is_OOV(int code) {
-		return (code == dict->oovcode());
+		virtual double  lprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx){ return lprob(ng, bow, bol, maxsuffidx, NULL, NULL, NULL, NULL); }
+		virtual double  lprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr){ return lprob(ng, bow, bol, maxsuffidx, maxsuffptr, NULL, NULL, NULL); }
+		virtual double  lprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize){ return lprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, NULL, NULL); }
+		virtual double  lprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible){ return lprob(ng, bow, bol, maxsuffidx, maxsuffptr, statesize, extendible, NULL); }
+		
+		
+//		virtual double  lprob(ngram ng, double* bow, int* bol, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow){ return lprob(ng, bow, bol, NULL, maxsuffptr, statesize, extendible, lastbow); }
+		virtual double  lprob(ngram ng, double* bow, int* bol, ngram_state_t* maxsuffidx, char** maxsuffptr, unsigned int* statesize, bool* extendible, double* lastbow);
+		
+		virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
+//		virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, ngram_state_t* maxsuffidx=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL, bool* extendible=NULL, double* lastbow=NULL);
+		
+		
+		void *search(int lev,table_entry_pos_t offs,table_entry_pos_t n,int sz,int *w, LMT_ACTION action,char **found=(char **)NULL);
+		
+		int mybsearch(char *ar, table_entry_pos_t n, int size, char *key, table_entry_pos_t *idx);
+		
+		
+		int add(ngram& ng, float prob,float bow);
+		//template<typename TA, typename TB> int add(ngram& ng, TA prob,TB bow);
+		
+		int addwithoffset(ngram& ng, float prob,float bow);
+		//  template<typename TA, typename TB> int addwithoffset(ngram& ng, TA prob,TB bow);
+		
+		void checkbounds(int level);
+		
+		virtual inline int get(ngram& ng) {
+			return get(ng,ng.size,ng.size);
+		}
+		virtual int get(ngram& ng,int n,int lev);
+		
+		int succscan(ngram& h,ngram& ng,LMT_ACTION action,int lev);
+		
+		virtual const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
+		virtual const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
+//		virtual const char *cmaxsuffptr(int* codes, int sz, unsigned int* size=NULL);
+		virtual ngram_state_t maxsuffidx(ngram ong, unsigned int* size=NULL);
+		virtual ngram_state_t cmaxsuffidx(ngram ong, unsigned int* size=NULL);
+//		virtual ngram_state_t cmaxsuffidx(int* codes, int sz, unsigned int* size=NULL);
+		
+		inline void putmem(char* ptr,int value,int offs,int size) {
+			MY_ASSERT(ptr!=NULL);
+			for (int i=0; i<size; i++)
+				ptr[offs+i]=(value >> (8 * i)) & 0xff;
+		};
+		
+		inline void getmem(char* ptr,int* value,int offs,int size) {
+			MY_ASSERT(ptr!=NULL);
+			*value=ptr[offs] & 0xff;
+			for (int i=1; i<size; i++){
+				*value= *value | ( ( ptr[offs+i] & 0xff ) << (8 *i));
+			}
+		};
+		
+		template<typename T>
+		inline void putmem(char* ptr,T value,int offs) {
+			MY_ASSERT(ptr!=NULL);
+			memcpy(ptr+offs, &value, sizeof(T));
+		};
+		
+		template<typename T>
+		inline void getmem(char* ptr,T* value,int offs) {
+			MY_ASSERT(ptr!=NULL);
+			memcpy((void*)value, ptr+offs, sizeof(T));
+		};
+		
+		
+		int nodesize(LMT_TYPE ndt) {
+			switch (ndt) {
+				case INTERNAL:
+					return LMTCODESIZE + PROBSIZE + PROBSIZE + BOUNDSIZE;
+				case QINTERNAL:
+					return LMTCODESIZE + QPROBSIZE + QPROBSIZE + BOUNDSIZE;
+				case LEAF:
+					return LMTCODESIZE + PROBSIZE;
+				case QLEAF:
+					return LMTCODESIZE + QPROBSIZE;
+				default:
+					MY_ASSERT(0);
+					return 0;
+			}
+		}
+		
+		inline int word(node nd,int value=-1) {
+			int offset=0;
+			
+			if (value==-1)
+				getmem(nd,&value,offset,LMTCODESIZE);
+			else
+				putmem(nd,value,offset,LMTCODESIZE);
+			
+			return value;
+		};
+		
+		
+		int codecmp(node a,node b) {
+			register int i,result;
+			for (i=(LMTCODESIZE-1); i>=0; i--) {
+				result=(unsigned char)a[i]-(unsigned char)b[i];
+				if(result) return result;
+			}
+			return 0;
+		};
+		
+		int codediff(node a,node b) {
+			return word(a)-word(b);
+		};
+		
+		
+		inline float prob(node nd,LMT_TYPE ndt) {
+			int offs=LMTCODESIZE;
+			
+			float fv;
+			unsigned char cv;
+			switch (ndt) {
+				case INTERNAL:
+					getmem(nd,&fv,offs);
+					return fv;
+				case QINTERNAL:
+					getmem(nd,&cv,offs);
+					return (float) cv;
+				case LEAF:
+					getmem(nd,&fv,offs);
+					return fv;
+				case QLEAF:
+					getmem(nd,&cv,offs);
+					return (float) cv;
+				default:
+					MY_ASSERT(0);
+					return 0;
+			}
+		};
+		
+		template<typename T>
+		inline T prob(node nd, LMT_TYPE ndt, T value) {
+			int offs=LMTCODESIZE;		
+			
+			switch (ndt) {
+				case INTERNAL:
+					putmem(nd, value,offs);
+					break;
+				case QINTERNAL:
+					putmem(nd,(unsigned char) value,offs);
+					break;
+				case LEAF:
+					putmem(nd, value,offs);
+					break;
+				case QLEAF:
+					putmem(nd,(unsigned char) value,offs);
+					break;
+				default:
+					MY_ASSERT(0);
+					return (T) 0;
+			}
+			
+			return value;
+		};
+		
+		inline float bow(node nd,LMT_TYPE ndt) {
+			int offs=LMTCODESIZE+(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+			
+			float fv;
+			unsigned char cv;
+			switch (ndt) {
+				case INTERNAL:
+					getmem(nd,&fv,offs);
+					return fv;
+				case QINTERNAL:
+					getmem(nd,&cv,offs);
+					return (float) cv;
+				case LEAF:
+					getmem(nd,&fv,offs);
+					return fv;
+				case QLEAF:
+					getmem(nd,&cv,offs);
+					return (float) cv;
+				default:
+					MY_ASSERT(0);
+					return 0;
+			}
+		};
+		
+		template<typename T>
+		inline T bow(node nd,LMT_TYPE ndt, T value) {
+			int offs=LMTCODESIZE+(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+			
+			switch (ndt) {
+				case INTERNAL:
+					putmem(nd, value,offs);
+					break;
+				case QINTERNAL:
+					putmem(nd,(unsigned char) value,offs);
+					break;
+				case LEAF:
+					putmem(nd, value,offs);
+					break;
+				case QLEAF:
+					putmem(nd,(unsigned char) value,offs);
+					break;
+				default:
+					MY_ASSERT(0);
+					return 0;
+			}
+			
+			return value;
+		};
+		
+		
+		inline table_entry_pos_t boundwithoffset(node nd,LMT_TYPE ndt, int level){ return bound(nd,ndt) - tb_offset[level+1]; }
+		
+		inline table_entry_pos_t boundwithoffset(node nd,LMT_TYPE ndt, table_entry_pos_t value, int level){ return bound(nd, ndt, value + tb_offset[level+1]); }
+		
+		//	table_entry_pos_t bound(node nd,LMT_TYPE ndt, int level=0) {
+		table_entry_pos_t bound(node nd,LMT_TYPE ndt) {
+			
+			int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+			
+			table_entry_pos_t value;
+			
+			getmem(nd,&value,offs);
+			
+			//		value -= tb_offset[level+1];
+			
+			return value;
+		};
+		
+		
+		//	table_entry_pos_t bound(node nd,LMT_TYPE ndt, table_entry_pos_t value, int level=0) {
+		table_entry_pos_t bound(node nd,LMT_TYPE ndt, table_entry_pos_t value) {
+			
+			int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+			
+			//		value += tb_offset[level+1];
+			
+			putmem(nd,value,offs);
+			
+			return value;
+		};
+		
+		//template<typename T> T boundwithoffset(node nd,LMT_TYPE ndt, T value, int level);
+		
+		/*
+		 table_entry_pos_t  boundwithoffset(node nd,LMT_TYPE ndt, int level) {
+		 
+		 int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+		 
+		 table_entry_pos_t value;
+		 
+		 getmem(nd,&value,offs);
+		 return value;
+		 //    return value-tb_offset[level+1];
+		 };
+		 */
+		
+		/*
+		 table_entry_pos_t boundwithoffset(node nd,LMT_TYPE ndt, table_entry_pos_t value, int level) {
+		 
+		 int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+		 
+		 putmem(nd,value,offs);
+		 
+		 return value;
+		 //		return value+tb_offset[level+1];
+		 };	
+		 */
+		
+		/*
+		 inline table_entry_pos_t  bound(node nd,LMT_TYPE ndt) {
+		 
+		 int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+		 
+		 table_entry_pos_t value;
+		 
+		 getmem(nd,&value,offs);
+		 return value;
+		 };
+		 
+		 template<typename T>
+		 inline T bound(node nd,LMT_TYPE ndt, T value) {
+		 
+		 int offs=LMTCODESIZE+2*(ndt==QINTERNAL?QPROBSIZE:PROBSIZE);
+		 
+		 putmem(nd,value,offs);
+		 
+		 return value;
+		 };
+		 */
+		//returns the indexes of the successors of a node
+		int succrange(node ndp,int level,table_entry_pos_t* isucc=NULL,table_entry_pos_t* esucc=NULL);
+		
+		void stat(int lev=0);
+		void printTable(int level);
+		
+		virtual inline void setDict(dictionary* d) {
+			if (delete_dict==true && dict) delete dict;
+			dict=d;
+			delete_dict=false;
+		};
+		
+		inline dictionary* getDict() const {
+			return dict;
+		};
+		
+		inline table_entry_pos_t getCurrentSize(int l) const {
+			return cursize[l];
+		};
+		
+		inline void setOrderQuery(bool v) {
+			orderQuery = v;
+		}
+		inline bool isOrderQuery() const {
+			return orderQuery;
+		}
+		
+		inline float GetNgramcacheLoadFactor() {
+			return  ngramcache_load_factor;
+		}
+		inline float GetDictionaryLoadFactor() {
+			return  ngramcache_load_factor;
+		}
+		
+		//never allow the increment of the dictionary through this function
+		inline virtual void dictionary_incflag(const bool flag) {
+			UNUSED(flag);
+		};
+		
+		inline virtual bool filter(const string sfilter, lmtable* sublmt, const string skeepunigrams) {
+			std::cerr << "filtering... \n";
+			dictionary *dict=new dictionary((char *)sfilter.c_str());
+			
+			cpsublm(sublmt, dict,(skeepunigrams=="yes"));
+			delete dict;
+			std::cerr << "...done\n";
+			return true;
+		}
+		
+		
+		inline virtual bool is_OOV(int code) {
+			return (code == dict->oovcode());
+		};
+		
 	};
 	
-};
-	
 }//namespace irstlm
 
 #endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/irstlm.git



More information about the debian-science-commits mailing list