[lttoolbox] 01/02: Imported Upstream version 3.3.2~r63423
Tino Didriksen
tinodidriksen-guest at moszumanska.debian.org
Sat Jan 2 14:23:23 UTC 2016
This is an automated email from the git hooks/post-receive script.
tinodidriksen-guest pushed a commit to branch master
in repository lttoolbox.
commit 3b289f3361e5d97ce54a13a4aeed5737d4a32ca2
Author: Tino Didriksen <mail at tinodidriksen.com>
Date: Sat Jan 2 14:23:12 2016 +0000
Imported Upstream version 3.3.2~r63423
---
COPYING | 41 +-
Makefile.am | 3 +-
configure.ac | 66 +--
lttoolbox/alphabet.cc | 4 +-
lttoolbox/alphabet.h | 4 +-
lttoolbox/att_compiler.cc | 7 +-
lttoolbox/att_compiler.h | 4 +-
lttoolbox/buffer.h | 4 +-
lttoolbox/compiler.cc | 10 +-
lttoolbox/compiler.h | 4 +-
lttoolbox/compression.cc | 4 +-
lttoolbox/compression.h | 4 +-
lttoolbox/dix.dtd | 4 +-
lttoolbox/entry_token.cc | 7 +-
lttoolbox/entry_token.h | 4 +-
lttoolbox/exception.h | 6 +-
lttoolbox/expander.cc | 11 +-
lttoolbox/expander.h | 4 +-
lttoolbox/fst_processor.cc | 563 +++++++++++-----------
lttoolbox/fst_processor.h | 10 +-
lttoolbox/lt_comp.cc | 4 +-
lttoolbox/lt_expand.cc | 4 +-
lttoolbox/lt_locale.cc | 4 +-
lttoolbox/lt_locale.h | 4 +-
lttoolbox/lt_print.cc | 4 +-
lttoolbox/lt_proc.cc | 48 +-
lttoolbox/lt_tmxcomp.cc | 4 +-
lttoolbox/lt_tmxproc.cc | 4 +-
lttoolbox/lt_trim.cc | 8 +-
lttoolbox/ltstr.h | 4 +-
lttoolbox/match_exe.cc | 7 +-
lttoolbox/match_exe.h | 4 +-
lttoolbox/match_node.cc | 4 +-
lttoolbox/match_node.h | 4 +-
lttoolbox/match_state.cc | 10 +-
lttoolbox/match_state.h | 4 +-
lttoolbox/my_stdio.h | 4 +-
lttoolbox/node.cc | 4 +-
lttoolbox/node.h | 4 +-
lttoolbox/pattern_list.cc | 7 +-
lttoolbox/pattern_list.h | 4 +-
lttoolbox/regexp_compiler.cc | 10 +-
lttoolbox/regexp_compiler.h | 4 +-
lttoolbox/sorted_vector.cc | 4 +-
lttoolbox/sorted_vector.h | 4 +-
lttoolbox/state.cc | 4 +-
lttoolbox/state.h | 4 +-
lttoolbox/tmx_compiler.cc | 7 +-
lttoolbox/tmx_compiler.h | 4 +-
lttoolbox/trans_exe.cc | 7 +-
lttoolbox/trans_exe.h | 4 +-
lttoolbox/transducer.cc | 141 +++---
lttoolbox/transducer.h | 18 +-
lttoolbox/xml_parse_util.cc | 4 +-
lttoolbox/xml_parse_util.h | 4 +-
tests/data/biproc-skips-tags-mono.dix | 14 +
tests/data/double-clitics-bi.dix | 16 +
tests/data/double-clitics-mono.dix | 24 +
tests/data/en-af.automorf.bin | Bin 217450 -> 0 bytes
tests/lt_proc/__init__.py | 24 +-
tests/lt_proc/null_flush.py | 49 --
tests/lt_proc/null_flush_invalid_stream_format.py | 42 ++
tests/lt_trim/__init__.py | 80 +--
tests/proctest.py | 59 ++-
tests/run_tests.py | 5 +-
65 files changed, 681 insertions(+), 759 deletions(-)
diff --git a/COPYING b/COPYING
index 623b625..d159169 100644
--- a/COPYING
+++ b/COPYING
@@ -1,12 +1,12 @@
- GNU GENERAL PUBLIC LICENSE
- Version 2, June 1991
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
- Preamble
+ Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
@@ -15,7 +15,7 @@ software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
+the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
@@ -55,8 +55,8 @@ patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
-
- GNU GENERAL PUBLIC LICENSE
+
+ GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
@@ -110,7 +110,7 @@ above, provided that you also meet all of these conditions:
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
-
+
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
@@ -168,7 +168,7 @@ access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
-
+
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
@@ -225,7 +225,7 @@ impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
-
+
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
@@ -255,7 +255,7 @@ make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
- NO WARRANTY
+ NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
@@ -277,9 +277,9 @@ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
@@ -303,17 +303,16 @@ the "copyright" line and a pointer to where the full notice is found.
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
- Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
@@ -336,5 +335,5 @@ necessary. Here is a sample; alter the names:
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General
+library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.
diff --git a/Makefile.am b/Makefile.am
index b67d234..68e23d3 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -6,5 +6,6 @@ pkgconfig_DATA = lttoolbox.pc
EXTRA_DIST=autogen.sh CMakeLists.txt
+# TODO: the below will use python3 if you run it on Arch Linux with no python2 installed
test: tests/run_tests.py
- $<
+ $(PYTHON) $<
diff --git a/configure.ac b/configure.ac
index a6f8f34..f017264 100644
--- a/configure.ac
+++ b/configure.ac
@@ -49,73 +49,15 @@ VERSION=$GENERIC_VERSION
AM_INIT_AUTOMAKE($PACKAGE, $VERSION, no-define)
-# Checks for programs.
-
-AC_MSG_CHECKING([Compilation architecture: PPC, i686, x86_64, Other])
-if test x$(which arch) = x
-then ARCH=$($(which uname) -m)
-else ARCH=$($(which arch))
-fi
-
-if test x$ARCH = xppc
-then
- AC_MSG_RESULT([PowerPC])
- CFLAGS="-Wall -fomit-frame-pointer $CFLAGS"
- CXXFLAGS="-Wall -fomit-frame-pointer $CXXFLAGS"
-else
- if test x$ARCH = xi686
- then
- AC_MSG_RESULT([i686])
- CFLAGS="-Wall -march=i686 -O3 -fomit-frame-pointer -funroll-loops $CFLAGS"
- CXXFLAGS="-Wall -march=i686 -O3 \
- -fomit-frame-pointer -funroll-loops $CXXFLAGS"
-
- else
- if test x$ARCH = xx86_64
- then
- AC_MSG_RESULT([x86_64])
- CFLAGS="-Wall -O3 -mtune=nocona -fomit-frame-pointer -funroll-loops $CFLAGS"
- CXXFLAGS="-Wall -O3 -mtune=nocona \
- -fomit-frame-pointer -funroll-loops $CXXFLAGS"
- else
- AC_MSG_RESULT([Other])
- CFLAGS="-Wall -O3 $CFLAGS"
- CXXFLAGS="-Wall -O3 $CXXFLAGS"
- fi
- fi
-fi
-
AC_CANONICAL_HOST
-
-USE_ANSI=yes
-
-case "${host_os}" in
- mingw*)
- echo "Found MinGW: Also considering MSYS paths"
- CFLAGS="$CFLAGS -L/lib -I/include"
- CXXFLAGS="$CXXFLAGS -L/lib -I/include"
- CPPFLAGS="$CPPFLAGS -I/include"
-
- USE_ANSI=no
- ;;
- cygwin*)
- echo "Found Cygwin: -ansi unset"
- USE_ANSI=no
- ;;
-esac
-
-if test x$USE_ANSI = xyes
-then
- CFLAGS="$CFLAGS -ansi"
- CXXFLAGS="$CXXFLAGS -ansi"
- CPPFLAGS="$CPPFLAGS -ansi"
-fi
-
AC_PROG_CXX
AM_PROG_LIBTOOL
AM_SANITY_CHECK
AC_LANG_CPLUSPLUS
+CFLAGS="-Wall -Wextra $CFLAGS"
+CXXFLAGS="-Wall -Wextra $CXXFLAGS"
+
AC_ARG_ENABLE(debug,
[ --enable-debug Enable "-g -Wall" compiler options],
[CXXFLAGS="-g -Wall";CFLAGS="-g -Wall"])
@@ -181,4 +123,6 @@ AC_CHECK_FUNCS([setlocale strdup getopt_long])
AM_CONDITIONAL([WINDOWS], [test x$version_type = xwindows])
+AM_PATH_PYTHON([2], [], [AC_MSG_WARN([Can't run 'make test' without Python installed.])])
+
AC_OUTPUT([Makefile lttoolbox.pc lttoolbox/Makefile])
diff --git a/lttoolbox/alphabet.cc b/lttoolbox/alphabet.cc
index a28e54a..807523c 100644
--- a/lttoolbox/alphabet.cc
+++ b/lttoolbox/alphabet.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/alphabet.h>
#include <lttoolbox/compression.h>
diff --git a/lttoolbox/alphabet.h b/lttoolbox/alphabet.h
index e6b3a57..67f9e89 100644
--- a/lttoolbox/alphabet.h
+++ b/lttoolbox/alphabet.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ALPHABET_
#define _ALPHABET_
diff --git a/lttoolbox/att_compiler.cc b/lttoolbox/att_compiler.cc
index ec44fcd..778796f 100644
--- a/lttoolbox/att_compiler.cc
+++ b/lttoolbox/att_compiler.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/att_compiler.h>
@@ -24,7 +22,8 @@
using namespace std;
-AttCompiler::AttCompiler()
+AttCompiler::AttCompiler() :
+starting_state(0)
{
}
diff --git a/lttoolbox/att_compiler.h b/lttoolbox/att_compiler.h
index b7c07a7..4c52d05 100644
--- a/lttoolbox/att_compiler.h
+++ b/lttoolbox/att_compiler.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _MYATT_COMPILER_
#define _MYATT_COMPILER_
diff --git a/lttoolbox/buffer.h b/lttoolbox/buffer.h
index c7c0a60..59ff1ed 100644
--- a/lttoolbox/buffer.h
+++ b/lttoolbox/buffer.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _BUFFER_
#define _BUFFER_
diff --git a/lttoolbox/compiler.cc b/lttoolbox/compiler.cc
index 451cfca..1f00662 100644
--- a/lttoolbox/compiler.cc
+++ b/lttoolbox/compiler.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/compiler.h>
#include <lttoolbox/compression.h>
@@ -61,7 +59,11 @@ wstring const Compiler::COMPILER_V_ATTR = L"v";
wstring const Compiler::COMPILER_VL_ATTR = L"vl";
wstring const Compiler::COMPILER_VR_ATTR = L"vr";
-Compiler::Compiler()
+Compiler::Compiler() :
+reader(0),
+verbose(false),
+first_element(false),
+acx_current_char(0)
{
}
diff --git a/lttoolbox/compiler.h b/lttoolbox/compiler.h
index 9cc9b06..a4e94a8 100644
--- a/lttoolbox/compiler.h
+++ b/lttoolbox/compiler.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _MYCOMPILER_
#define _MYCOMPILER_
diff --git a/lttoolbox/compression.cc b/lttoolbox/compression.cc
index 736ee49..cf20140 100644
--- a/lttoolbox/compression.cc
+++ b/lttoolbox/compression.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/compression.h>
#include <lttoolbox/my_stdio.h>
diff --git a/lttoolbox/compression.h b/lttoolbox/compression.h
index f5c3b7c..e311bea 100644
--- a/lttoolbox/compression.h
+++ b/lttoolbox/compression.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _COMPRESSION_
#define _COMPRESSION_
diff --git a/lttoolbox/dix.dtd b/lttoolbox/dix.dtd
index 583133b..437d8f2 100644
--- a/lttoolbox/dix.dtd
+++ b/lttoolbox/dix.dtd
@@ -12,9 +12,7 @@
General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA.
+ along with this program; if not <http://www.gnu.org/licenses/>.
DTD for the format of dictionaries
-->
diff --git a/lttoolbox/entry_token.cc b/lttoolbox/entry_token.cc
index bdadd90..565cd11 100644
--- a/lttoolbox/entry_token.cc
+++ b/lttoolbox/entry_token.cc
@@ -12,14 +12,13 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/entry_token.h>
-EntryToken::EntryToken()
+EntryToken::EntryToken() :
+type(paradigm)
{
}
diff --git a/lttoolbox/entry_token.h b/lttoolbox/entry_token.h
index 143e559..d5bb4c9 100644
--- a/lttoolbox/entry_token.h
+++ b/lttoolbox/entry_token.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ENTRYTOKEN_
#define _ENTRYTOKEN_
diff --git a/lttoolbox/exception.h b/lttoolbox/exception.h
index 320c245..ab18903 100644
--- a/lttoolbox/exception.h
+++ b/lttoolbox/exception.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __EXCEPTION_
#define __EXCEPTION_
@@ -35,7 +33,7 @@ public:
{
}
- const char* what()
+ const char* what() const throw ()
{
return msg.c_str();
}
diff --git a/lttoolbox/expander.cc b/lttoolbox/expander.cc
index 7f464d7..f1e492f 100644
--- a/lttoolbox/expander.cc
+++ b/lttoolbox/expander.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/compiler.h>
@@ -33,7 +31,8 @@
using namespace std;
-Expander::Expander()
+Expander::Expander() :
+reader(0)
{
LtLocale::tryToSetLocale();
}
@@ -385,8 +384,8 @@ Expander::procEntry(FILE *output)
// detecci�n del uso de paradigmas no definidos
if(paradigm.find(p) == paradigm.end() &&
- paradigm_lr.find(p) == paradigm.end() &&
- paradigm_rl.find(p) == paradigm.end())
+ paradigm_lr.find(p) == paradigm_lr.end() &&
+ paradigm_rl.find(p) == paradigm_rl.end())
{
wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
wcerr << L"): Undefined paradigm '" << p << L"'." <<endl;
diff --git a/lttoolbox/expander.h b/lttoolbox/expander.h
index f4f94a4..431885c 100644
--- a/lttoolbox/expander.h
+++ b/lttoolbox/expander.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _EXPANDER_
#define _EXPANDER_
diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc
index 39f6370..412177e 100644
--- a/lttoolbox/fst_processor.cc
+++ b/lttoolbox/fst_processor.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/fst_processor.h>
#include <lttoolbox/compression.h>
@@ -29,8 +27,10 @@
using namespace std;
-
-FSTProcessor::FSTProcessor()
+
+FSTProcessor::FSTProcessor() :
+outOfWord(false),
+isLastBlankTM(false)
{
// escaped_chars chars
escaped_chars.insert(L'[');
@@ -44,14 +44,14 @@ FSTProcessor::FSTProcessor()
escaped_chars.insert(L'@');
escaped_chars.insert(L'<');
escaped_chars.insert(L'>');
-
+
caseSensitive = false;
dictionaryCase = false;
do_decomposition = false;
nullFlush = false;
nullFlushGeneration = false;
showControlSymbols = false;
- biltransSurfaceForms = false;
+ biltransSurfaceForms = false;
compoundOnlyLSymbol = 0;
compoundRSymbol = 0;
compound_max_elements = 4;
@@ -86,11 +86,11 @@ FSTProcessor::readEscaped(FILE *input)
{
streamError();
}
-
+
return val;
}
-wstring
+wstring
FSTProcessor::readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2)
{
wstring result = L"";
@@ -109,7 +109,7 @@ FSTProcessor::readFullBlock(FILE *input, wchar_t const delim1, wchar_t const del
{
result += static_cast<wchar_t>(readEscaped(input));
}
- }
+ }
if(c != delim2)
{
@@ -147,7 +147,7 @@ FSTProcessor::readAnalysis(FILE *input)
blankqueue.push(readFullBlock(input, L'[', L']'));
input_buffer.add(static_cast<int>(L' '));
return static_cast<int>(L' ');
-
+
case L'\\':
val = static_cast<wchar_t>(fgetwc_unlocked(input));
if(escaped_chars.find(val) == escaped_chars.end())
@@ -159,7 +159,7 @@ FSTProcessor::readAnalysis(FILE *input)
default:
streamError();
- }
+ }
}
input_buffer.add(val);
@@ -196,7 +196,7 @@ FSTProcessor::readTMAnalysis(FILE *input)
input_buffer.add(static_cast<int>(L' '));
isLastBlankTM = true;
return static_cast<int>(L' ');
-
+
case L'\\':
val = static_cast<wchar_t>(fgetwc_unlocked(input));
if(escaped_chars.find(val) == escaped_chars.end())
@@ -231,7 +231,7 @@ FSTProcessor::readTMAnalysis(FILE *input)
default:
streamError();
- }
+ }
}
input_buffer.add(val);
@@ -264,7 +264,7 @@ FSTProcessor::readPostgeneration(FILE *input)
blankqueue.push(readFullBlock(input, L'[', L']'));
input_buffer.add(static_cast<int>(L' '));
return static_cast<int>(L' ');
-
+
case L'\\':
val = static_cast<wchar_t>(fgetwc_unlocked(input));
if(escaped_chars.find(val) == escaped_chars.end())
@@ -273,7 +273,7 @@ FSTProcessor::readPostgeneration(FILE *input)
}
input_buffer.add(static_cast<int>(val));
return val;
-
+
default:
input_buffer.add(val);
return val;
@@ -302,7 +302,7 @@ FSTProcessor::skipUntil(FILE *input, FILE *output, wint_t const character)
fputwc_unlocked(L'\\', output);
fputwc_unlocked(val, output);
break;
-
+
case L'\0':
fputwc_unlocked(val, output);
if(nullFlushGeneration)
@@ -310,7 +310,7 @@ FSTProcessor::skipUntil(FILE *input, FILE *output, wint_t const character)
fflush(output);
}
break;
-
+
default:
if(val == character)
{
@@ -334,7 +334,7 @@ FSTProcessor::readGeneration(FILE *input, FILE *output)
{
return 0x7fffffff;
}
-
+
if(outOfWord)
{
if(val == L'^')
@@ -423,7 +423,7 @@ FSTProcessor::readBilingual(FILE *input, FILE *output)
{
return pair<wstring, int>(symbol, 0x7fffffff);
}
-
+
if(outOfWord)
{
if(val == L'^')
@@ -488,7 +488,7 @@ FSTProcessor::readBilingual(FILE *input, FILE *output)
cad += static_cast<wchar_t>(val);
int res = alphabet(cad);
-
+
if (res == 0) {
symbol = cad;
}
@@ -499,17 +499,13 @@ FSTProcessor::readBilingual(FILE *input, FILE *output)
fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output);
return readBilingual(input, output);
}
- else
- {
- return pair<wstring, int>(symbol, val);
- }
- return pair<wstring, int>(symbol, 0x7fffffff);
+ return pair<wstring, int>(symbol, val);
}
void
FSTProcessor::flushBlanks(FILE *output)
-{
+{
for(unsigned int i = blankqueue.size(); i > 0; i--)
{
fputws_unlocked(blankqueue.front().c_str(), output);
@@ -589,14 +585,14 @@ FSTProcessor::writeEscaped(wstring const &str, FILE *output)
fputwc_unlocked(L'\\', output);
}
fputwc_unlocked(str[i], output);
- }
+ }
}
void
FSTProcessor::writeEscapedWithTags(wstring const &str, FILE *output)
{
for(unsigned int i = 0, limit = str.size(); i < limit; i++)
- {
+ {
if(str[i] == L'<' && i >=1 && str[i-1] != L'\\')
{
fputws_unlocked(str.substr(i).c_str(), output);
@@ -608,7 +604,7 @@ FSTProcessor::writeEscapedWithTags(wstring const &str, FILE *output)
fputwc_unlocked(L'\\', output);
}
fputwc_unlocked(str[i], output);
- }
+ }
}
@@ -639,7 +635,7 @@ FSTProcessor::printUnknownWord(wstring const &sf, FILE *output)
fputwc_unlocked(L'/', output);
fputwc_unlocked(L'*', output);
writeEscaped(sf, output);
- fputwc_unlocked(L'$', output);
+ fputwc_unlocked(L'$', output);
}
unsigned int
@@ -690,9 +686,9 @@ FSTProcessor::load(FILE *input)
{
alphabetic_chars.insert(static_cast<wchar_t>(Compression::multibyte_read(input)));
len--;
- }
+ }
- // symbols
+ // symbols
alphabet.read(input);
len = Compression::multibyte_read(input);
@@ -708,7 +704,7 @@ FSTProcessor::load(FILE *input)
}
transducers[name].read(input, alphabet);
len--;
- }
+ }
}
@@ -732,7 +728,7 @@ FSTProcessor::initTMAnalysis()
limit = transducers.end();
it != limit; it++)
{
- all_finals.insert(it->second.getFinals().begin(),
+ all_finals.insert(it->second.getFinals().begin(),
it->second.getFinals().end());
}
}
@@ -740,7 +736,7 @@ FSTProcessor::initTMAnalysis()
void
FSTProcessor::initGeneration()
{
- calcInitial();
+ calcInitial();
for(map<wstring, TransExe, Ltstr>::iterator it = transducers.begin(),
limit = transducers.end();
it != limit; it++)
@@ -775,7 +771,7 @@ FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupp
//wcerr << val << L" før step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
current_state.step_case(val, caseSensitive);
-
+
if(current_state.size() > MAX_COMBINATIONS) {
wcerr << L"Warning: compoundAnalysis's MAX_COMBINATIONS exceeded for '" << input_word << L"'" << endl;
wcerr << L" gave up at char " << i << L" '" << val << L"'." << endl;
@@ -788,7 +784,7 @@ FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupp
if(i < input_word.size()-1)
current_state.restartFinals(all_finals, compoundOnlyLSymbol, initial_state, '+');
-
+
//wcerr << val << " eft rest " << i << " current_state = " << current_state.getReadableString(alphabet) << endl;
//wcerr << i << " result = " << current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper) << endl;
//wcerr << i << " -- size = " << current_state.size() << endl;
@@ -802,13 +798,13 @@ FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupp
current_state.pruneCompounds(compoundRSymbol, '+', compound_max_elements);
wstring result = current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper);
//wcerr << L"rrresult = " << result << endl;
-
+
return result;
}
-void
+void
FSTProcessor::initDecompositionSymbols() {
if ((compoundOnlyLSymbol=alphabet(L"<:co:only-L>")) == 0
&& (compoundOnlyLSymbol=alphabet(L"<:compound:only-L>")) == 0
@@ -825,7 +821,7 @@ FSTProcessor::initDecompositionSymbols() {
&& (compoundRSymbol=alphabet(L"<:compound:R>")) == 0
&& (compoundRSymbol=alphabet(L"<@co:R>")) == 0
&& (compoundRSymbol=alphabet(L"<@compound:R>")) == 0
- && (compoundRSymbol=alphabet(L"<compound-R>")) == 0)
+ && (compoundRSymbol=alphabet(L"<compound-R>")) == 0)
{
wcerr << L"Warning: Decomposition symbol <:compound:R> not found" << endl;
}
@@ -834,7 +830,7 @@ FSTProcessor::initDecompositionSymbols() {
}
-void
+void
FSTProcessor::initDecomposition() {
do_decomposition = true;
initAnalysis();
@@ -842,7 +838,7 @@ FSTProcessor::initDecomposition() {
}
/*wstring
-FSTProcessor::decompose(wstring w)
+FSTProcessor::decompose(wstring w)
{
State current_state = *initial_state;
@@ -856,10 +852,10 @@ FSTProcessor::decompose(wstring w)
}
//wcerr << L"+ decompose: " << w << endl;
- for (unsigned int i = 0; i < w.length(); i++)
- {
- //if(i == (w.length() - 1))
- if(i == (w.length()))
+ for (unsigned int i = 0; i < w.length(); i++)
+ {
+ //if(i == (w.length() - 1))
+ if(i == (w.length()))
{
last = true;
}
@@ -870,41 +866,41 @@ FSTProcessor::decompose(wstring w)
//wcerr << L"++ [" << last << L"][" << current_state.size() << L"] decompose: " << w.at(i) << endl;
- if(last)
+ if(last)
{
previous_state = current_state;
}
- else
+ else
{
previous_state = current_state;
- if (current_state.size() != 0)
+ if (current_state.size() != 0)
{
- if (!isAlphabetic(val) && iswupper(val) && !caseSensitive)
+ if (!isAlphabetic(val) && iswupper(val) && !caseSensitive)
{
- current_state.step(val, towlower(val));
- }
- else
+ current_state.step(val, towlower(val));
+ }
+ else
{
//wcerr << L"+++ step: " << w.at(i) << endl;
current_state.step(val);
}
}
}
- if(i == (w.length())-1)
+ if(i == (w.length())-1)
{
last = true;
}
-
- if (current_state.size() == 0 || last)
+
+ if (current_state.size() == 0 || last)
{
//wcerr << L"+++ [" << last << L"][" << current_state.size() << L"]" << endl;
- if(current_state.isFinal(all_finals))
+ if(current_state.isFinal(all_finals))
{
previous_state = current_state;
}
- if(previous_state.isFinal(all_finals))
+ if(previous_state.isFinal(all_finals))
{
firstupper = iswupper(w.at(0));
wstring result = previous_state.filterFinals(all_finals, alphabet,
@@ -915,15 +911,15 @@ FSTProcessor::decompose(wstring w)
//wcerr << L"++++ result[" << index << L"]: " << result << endl;
vector<wstring> lfs;
wstring::size_type pos;
-
+
pos = result.find(L'/');
- if(pos == wstring::npos)
+ if(pos == wstring::npos)
{
lfs.push_back(result);
- }
+ }
else
{
- while(pos != wstring::npos)
+ while(pos != wstring::npos)
{
lfs.push_back(result.substr(0, pos));
result.erase(0, pos + 1);
@@ -939,10 +935,10 @@ FSTProcessor::decompose(wstring w)
return L"";
}
- if (!last)
+ if (!last)
{
current_state = *initial_state;
- i--;
+ i--;
}
}
}
@@ -955,7 +951,7 @@ FSTProcessor::decompose(wstring w)
wstring lf = L"";
- if(index != elements.size())
+ if(index != elements.size())
{
//wcerr << L"++ index != elements.size(): " << index << L" != " << elements.size() << endl;
return L"";
@@ -964,17 +960,17 @@ FSTProcessor::decompose(wstring w)
vector<wstring> first_elements = elements.at(0);
vector<wstring> second_elements = elements.at(1);
- if(first_elements.size() == 0 || second_elements.size() == 0)
+ if(first_elements.size() == 0 || second_elements.size() == 0)
{
//wcerr << L"++ first or second empty" << endl;
return L"";
}
-
- if(index == 2)
- {
+
+ if(index == 2)
+ {
for(unsigned int j = 0; j < first_elements.size(); j++)
{
- for(unsigned int y = 0; y < second_elements.size(); y++)
+ for(unsigned int y = 0; y < second_elements.size(); y++)
{
wstring analysis = first_elements.at(j) + L"+" + second_elements.at(y);
lf = lf + L"/" + analysis;
@@ -982,7 +978,7 @@ FSTProcessor::decompose(wstring w)
}
}
}
- else if(index == 3)
+ else if(index == 3)
{
vector<wstring> third_elements = elements.at(2);
@@ -990,7 +986,7 @@ FSTProcessor::decompose(wstring w)
{
for(unsigned int k = 0; k < second_elements.size(); k++)
{
- for(unsigned int y = 0; y < third_elements.size(); y++)
+ for(unsigned int y = 0; y < third_elements.size(); y++)
{
wstring analysis = first_elements.at(j) + L"+" + second_elements.at(k) + L"+" + third_elements.at(y);
lf = lf + L"/" + analysis;
@@ -999,8 +995,8 @@ FSTProcessor::decompose(wstring w)
}
}
- }
- else
+ }
+ else
{
return L"";
}
@@ -1038,7 +1034,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
uppercase = firstupper && iswupper(sf[sf.size()-1]);
}
- if(do_decomposition && compoundOnlyLSymbol != 0)
+ if(do_decomposition && compoundOnlyLSymbol != 0)
{
current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
}
@@ -1056,7 +1052,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
uppercase = firstupper && iswupper(sf[sf.size()-1]);
}
- if(do_decomposition && compoundOnlyLSymbol != 0)
+ if(do_decomposition && compoundOnlyLSymbol != 0)
{
current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
}
@@ -1064,7 +1060,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
escaped_chars,
uppercase, firstupper);
last_postblank = true;
- last = input_buffer.getPos();
+ last = input_buffer.getPos();
}
else if(current_state.isFinal(preblank))
{
@@ -1074,7 +1070,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
uppercase = firstupper && iswupper(sf[sf.size()-1]);
}
- if(do_decomposition && compoundOnlyLSymbol != 0)
+ if(do_decomposition && compoundOnlyLSymbol != 0)
{
current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
}
@@ -1082,7 +1078,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
escaped_chars,
uppercase, firstupper);
last_preblank = true;
- last = input_buffer.getPos();
+ last = input_buffer.getPos();
}
else if(!isAlphabetic(val))
{
@@ -1092,12 +1088,12 @@ FSTProcessor::analysis(FILE *input, FILE *output)
uppercase = firstupper && iswupper(sf[sf.size()-1]);
}
- if(do_decomposition && compoundOnlyLSymbol != 0)
+ if(do_decomposition && compoundOnlyLSymbol != 0)
{
current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
}
- lf = current_state.filterFinals(all_finals, alphabet,
- escaped_chars,
+ lf = current_state.filterFinals(all_finals, alphabet,
+ escaped_chars,
uppercase, firstupper);
last_postblank = false;
last_preblank = false;
@@ -1105,7 +1101,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
last = input_buffer.getPos();
}
}
- else if(sf == L"" && iswspace(val))
+ else if(sf == L"" && iswspace(val))
{
lf = L"/*";
lf.append(sf);
@@ -1123,7 +1119,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
{
current_state.step(val, towlower(val));
}
-
+
if(current_state.size() != 0)
{
alphabet.getSymbol(sf, val);
@@ -1168,14 +1164,14 @@ FSTProcessor::analysis(FILE *input, FILE *output)
input_buffer.setPos(last);
input_buffer.back(1);
}
- else if(isAlphabetic(val) &&
- ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
+ else if(isAlphabetic(val) &&
+ ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
lf == L""))
{
do
{
alphabet.getSymbol(sf, val);
- }
+ }
while((val = readAnalysis(input)) && isAlphabetic(val));
unsigned int limit = firstNotAlpha(sf);
@@ -1184,13 +1180,13 @@ FSTProcessor::analysis(FILE *input, FILE *output)
if(limit == 0)
{
input_buffer.back(sf.size());
- writeEscaped(sf.substr(0,1), output);
+ writeEscaped(sf.substr(0,1), output);
}
else
- {
+ {
input_buffer.back(1+(size-limit));
wstring unknown_word = sf.substr(0, limit);
- if(do_decomposition)
+ if(do_decomposition)
{
if(!dictionaryCase)
{
@@ -1200,16 +1196,16 @@ FSTProcessor::analysis(FILE *input, FILE *output)
wstring compound = L"";
compound = compoundAnalysis(unknown_word, uppercase, firstupper);
- if(compound != L"")
+ if(compound != L"")
{
printWord(unknown_word, compound, output);
}
- else
+ else
{
printUnknownWord(unknown_word, output);
}
- }
- else
+ }
+ else
{
printUnknownWord(unknown_word, output);
}
@@ -1226,10 +1222,10 @@ FSTProcessor::analysis(FILE *input, FILE *output)
writeEscaped(sf.substr(0,1), output);
}
else
- {
+ {
input_buffer.back(1+(size-limit));
wstring unknown_word = sf.substr(0, limit);
- if(do_decomposition)
+ if(do_decomposition)
{
if(!dictionaryCase)
{
@@ -1239,16 +1235,16 @@ FSTProcessor::analysis(FILE *input, FILE *output)
wstring compound = L"";
compound = compoundAnalysis(unknown_word, uppercase, firstupper);
- if(compound != L"")
+ if(compound != L"")
{
printWord(unknown_word, compound, output);
}
- else
+ else
{
printUnknownWord(unknown_word, output);
}
- }
- else
+ }
+ else
{
printUnknownWord(unknown_word, output);
}
@@ -1257,12 +1253,12 @@ FSTProcessor::analysis(FILE *input, FILE *output)
}
else
{
- printWord(sf.substr(0, sf.size()-input_buffer.diffPrevPos(last)),
+ printWord(sf.substr(0, sf.size()-input_buffer.diffPrevPos(last)),
lf, output);
input_buffer.setPos(last);
input_buffer.back(1);
}
-
+
current_state = *initial_state;
lf = L"";
sf = L"";
@@ -1271,7 +1267,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
last_preblank = false;
}
}
-
+
// print remaining blanks
flushBlanks(output);
}
@@ -1280,12 +1276,12 @@ void
FSTProcessor::analysis_wrapper_null_flush(FILE *input, FILE *output)
{
setNullFlush(false);
- while(!feof(input))
+ while(!feof(input))
{
analysis(input, output);
fputwc_unlocked(L'\0', output);
int code = fflush(output);
- if(code != 0)
+ if(code != 0)
{
wcerr << L"Could not flush output " << errno << endl;
}
@@ -1293,18 +1289,18 @@ FSTProcessor::analysis_wrapper_null_flush(FILE *input, FILE *output)
}
void
-FSTProcessor::generation_wrapper_null_flush(FILE *input, FILE *output,
+FSTProcessor::generation_wrapper_null_flush(FILE *input, FILE *output,
GenerationMode mode)
{
setNullFlush(false);
nullFlushGeneration = true;
-
- while(!feof(input))
+
+ while(!feof(input))
{
generation(input, output, mode);
fputwc_unlocked(L'\0', output);
int code = fflush(output);
- if(code != 0)
+ if(code != 0)
{
wcerr << L"Could not flush output " << errno << endl;
}
@@ -1315,12 +1311,12 @@ void
FSTProcessor::postgeneration_wrapper_null_flush(FILE *input, FILE *output)
{
setNullFlush(false);
- while(!feof(input))
+ while(!feof(input))
{
postgeneration(input, output);
fputwc_unlocked(L'\0', output);
int code = fflush(output);
- if(code != 0)
+ if(code != 0)
{
wcerr << L"Could not flush output " << errno << endl;
}
@@ -1331,12 +1327,12 @@ void
FSTProcessor::transliteration_wrapper_null_flush(FILE *input, FILE *output)
{
setNullFlush(false);
- while(!feof(input))
+ while(!feof(input))
{
transliteration(input, output);
fputwc_unlocked(L'\0', output);
int code = fflush(output);
- if(code != 0)
+ if(code != 0)
{
wcerr << L"Could not flush output " << errno << endl;
}
@@ -1358,14 +1354,14 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
{
if(iswpunct(val))
{
- lf = current_state.filterFinalsTM(all_finals, alphabet,
+ lf = current_state.filterFinalsTM(all_finals, alphabet,
escaped_chars,
blankqueue, numbers).substr(1);
last = input_buffer.getPos();
numbers.clear();
}
}
- else if(sf == L"" && iswspace(val))
+ else if(sf == L"" && iswspace(val))
{
lf.append(sf);
last = input_buffer.getPos();
@@ -1379,7 +1375,7 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
{
current_state.step(val, towlower(val));
}
-
+
if(current_state.size() != 0)
{
if(val == -1)
@@ -1412,11 +1408,11 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
fputwc_unlocked(val, output);
}
}
- else if(!iswspace(val) && !iswpunct(val) &&
- ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
+ else if(!iswspace(val) && !iswpunct(val) &&
+ ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
lf == L""))
{
-
+
do
{
if(val == -1)
@@ -1431,7 +1427,7 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
{
alphabet.getSymbol(sf, val);
}
- }
+ }
while((val = readTMAnalysis(input)) && !iswspace(val) && !iswpunct(val));
if(val == 0)
@@ -1450,7 +1446,7 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
break;
}
blankqueue.pop();
- }
+ }
/*
unsigned int limit = sf.find(L' ');
@@ -1477,7 +1473,7 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
break;
}
blankqueue.pop();
- }
+ }
}
else
@@ -1488,13 +1484,13 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
input_buffer.setPos(last);
input_buffer.back(1);
}
-
+
current_state = *initial_state;
lf = L"";
sf = L"";
}
}
-
+
// print remaining blanks
flushBlanks(output);
}
@@ -1510,9 +1506,9 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
State current_state = *initial_state;
wstring sf = L"";
-
+
outOfWord = false;
-
+
skipUntil(input, output, L'^');
int val;
@@ -1538,9 +1534,9 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
}
else if(mode == gm_tagged_nm)
{
- fputwc_unlocked(L'^', output);
+ fputwc_unlocked(L'^', output);
writeEscaped(removeTags(sf.substr(1)), output);
- fputwc_unlocked(L'/', output);
+ fputwc_unlocked(L'/', output);
writeEscapedWithTags(sf, output);
fputwc_unlocked(L'$', output);
}
@@ -1564,10 +1560,10 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
writeEscaped(removeTags(sf), output);
}
else if(mode == gm_tagged_nm)
- {
- fputwc_unlocked(L'^', output);
+ {
+ fputwc_unlocked(L'^', output);
writeEscaped(removeTags(sf.substr(1)), output);
- fputwc_unlocked(L'/', output);
+ fputwc_unlocked(L'/', output);
writeEscapedWithTags(sf, output);
fputwc_unlocked(L'$', output);
}
@@ -1620,15 +1616,15 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
}
else if(mode == gm_tagged_nm)
{
- fputwc_unlocked(L'^', output);
+ fputwc_unlocked(L'^', output);
writeEscaped(removeTags(sf), output);
- fputwc_unlocked(L'/', output);
- fputwc_unlocked(L'#', output);
+ fputwc_unlocked(L'/', output);
+ fputwc_unlocked(L'#', output);
writeEscapedWithTags(sf, output);
fputwc_unlocked(L'$', output);
}
}
-
+
current_state = *initial_state;
sf = L"";
}
@@ -1695,7 +1691,7 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
fputwc_unlocked(val, output);
}
}
- else
+ else
{
// test for final states
if(current_state.isFinal(all_finals))
@@ -1707,36 +1703,36 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
uppercase, firstupper, 0);
// case of the beggining of the next word
-
- wstring mybuf = L"";
- for(unsigned int i = sf.size()-1; i >= 0; i--)
+
+ wstring mybuf = L"";
+ for(size_t i = sf.size(); i > 0; --i)
{
- if(!isalpha(sf[i]))
+ if(!isalpha(sf[i-1]))
{
break;
}
else
{
- mybuf = sf[i] + mybuf;
+ mybuf = sf[i-1] + mybuf;
}
}
-
+
if(mybuf.size() > 0)
{
bool myfirstupper = iswupper(mybuf[0]);
bool myuppercase = mybuf.size() > 1 && iswupper(mybuf[1]);
-
- for(unsigned int i = lf.size()-1; i >= 0; i--)
+
+ for(size_t i = lf.size(); i > 0; --i)
{
- if(!isalpha(lf[i]))
+ if(!isalpha(lf[i-1]))
{
- if(myfirstupper && i != lf.size()-1)
+ if(myfirstupper && i != lf.size())
{
- lf[i+1] = towupper(lf[i+1]);
+ lf[i] = towupper(lf[i]);
}
else
{
- lf[i+1] = towlower(lf[i+1]);
+ lf[i] = towlower(lf[i]);
}
break;
}
@@ -1744,16 +1740,16 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
{
if(myuppercase)
{
- lf[i] = towupper(lf[i]);
+ lf[i-1] = towupper(lf[i-1]);
}
else
{
- lf[i] = towlower(lf[i]);
+ lf[i-1] = towlower(lf[i-1]);
}
}
}
}
-
+
last = input_buffer.getPos();
}
@@ -1765,20 +1761,20 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
{
current_state.step(val, towlower(val));
}
-
+
if(current_state.size() != 0)
{
alphabet.getSymbol(sf, val);
}
else
- {
+ {
if(lf == L"")
{
unsigned int mark = sf.size();
for(unsigned int i = 1, limit = sf.size(); i < limit; i++)
{
if(sf[i] == L'~')
- {
+ {
mark = i;
break;
}
@@ -1810,7 +1806,7 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
fputwc_unlocked(L'\\', output);
}
fputwc_unlocked(val, output);
- }
+ }
}
current_state = *initial_state;
@@ -1820,9 +1816,9 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
}
}
}
-
+
// print remaining blanks
- flushBlanks(output);
+ flushBlanks(output);
}
void
@@ -1840,35 +1836,35 @@ FSTProcessor::transliteration(FILE *input, FILE *output)
while(wchar_t val = readPostgeneration(input))
{
- if(iswpunct(val) || iswspace(val))
+ if(iswpunct(val) || iswspace(val))
{
bool firstupper = iswupper(sf[1]);
bool uppercase = sf.size() > 1 && firstupper && iswupper(sf[2]);
lf = current_state.filterFinals(all_finals, alphabet, escaped_chars,
uppercase, firstupper, 0);
- if(!lf.empty())
+ if(!lf.empty())
{
fputws_unlocked(lf.substr(1).c_str(), output);
current_state = *initial_state;
lf = L"";
sf = L"";
}
- if(iswspace(val))
+ if(iswspace(val))
{
printSpace(val, output);
- }
- else
+ }
+ else
{
- if(isEscaped(val))
+ if(isEscaped(val))
{
fputwc_unlocked(L'\\', output);
}
fputwc_unlocked(val, output);
}
- }
- else
+ }
+ else
{
- if(current_state.isFinal(all_finals))
+ if(current_state.isFinal(all_finals))
{
bool firstupper = iswupper(sf[1]);
bool uppercase = sf.size() > 1 && firstupper && iswupper(sf[2]);
@@ -1878,28 +1874,28 @@ FSTProcessor::transliteration(FILE *input, FILE *output)
}
current_state.step(val);
- if(current_state.size() != 0)
+ if(current_state.size() != 0)
{
alphabet.getSymbol(sf, val);
- }
- else
+ }
+ else
{
- if(!lf.empty())
+ if(!lf.empty())
{
fputws_unlocked(lf.substr(1).c_str(), output);
input_buffer.setPos(last);
input_buffer.back(1);
val = lf[lf.size()-1];
- }
- else
+ }
+ else
{
- if(iswspace(val))
+ if(iswspace(val))
{
printSpace(val, output);
- }
- else
+ }
+ else
{
- if(isEscaped(val))
+ if(isEscaped(val))
{
fputwc_unlocked(L'\\', output);
}
@@ -1925,7 +1921,7 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
unsigned int end_point = input_word.size()-2;
wstring queue = L"";
bool mark = false;
-
+
if(with_delim == false)
{
start_point = 0;
@@ -1936,13 +1932,13 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
{
return input_word;
}
-
+
if(input_word[start_point] == L'=')
{
start_point++;
mark = true;
}
-
+
bool firstupper = iswupper(input_word[start_point]);
bool uppercase = firstupper && iswupper(input_word[start_point+1]);
@@ -1950,7 +1946,7 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
{
int val;
wstring symbol = L"";
-
+
if(input_word[i] == L'\\')
{
i++;
@@ -1985,13 +1981,13 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
current_state.step(val);
}
}
- if(current_state.isFinal(all_finals))
+ if(current_state.isFinal(all_finals))
{
result = current_state.filterFinals(all_finals, alphabet,
escaped_chars,
uppercase, firstupper, 0);
if(with_delim)
- {
+ {
if(mark)
{
result = L"^="+result.substr(1);
@@ -2013,9 +2009,9 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
}
}
}
-
+
if(current_state.size() == 0)
- {
+ {
if(symbol != L"" && result != L"")
{
queue.append(symbol);
@@ -2025,18 +2021,18 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
// word is not present
if(with_delim)
{
- result = L"^@" + input_word.substr(1);
+ result = L"^@" + input_word.substr(1);
}
else
{
result = L"@" + input_word;
}
- return result;
- }
+ return result;
+ }
}
}
- if(start_point < (end_point - 3))
+ if(start_point < (end_point - 3))
{
return L"^$";
}
@@ -2044,8 +2040,7 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
if(queue != L"")
{
- wstring result_with_queue = L"";
- bool multiple_translation = false;
+ wstring result_with_queue = L"";
for(unsigned int i = 0, limit = result.size(); i != limit; i++)
{
switch(result[i])
@@ -2054,12 +2049,11 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
result_with_queue += L'\\';
i++;
break;
-
+
case L'/':
result_with_queue.append(queue);
- multiple_translation = true;
break;
-
+
default:
break;
}
@@ -2094,7 +2088,7 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
unsigned int end_point = input_word.size()-2;
wstring queue = L"";
bool mark = false;
-
+
if(with_delim == false)
{
start_point = 0;
@@ -2105,13 +2099,13 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
{
return input_word;
}
-
+
if(input_word[start_point] == L'=')
{
start_point++;
mark = true;
}
-
+
bool firstupper = iswupper(input_word[start_point]);
bool uppercase = firstupper && iswupper(input_word[start_point+1]);
@@ -2119,7 +2113,7 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
{
int val;
wstring symbol = L"";
-
+
if(input_word[i] == L'\\')
{
i++;
@@ -2160,7 +2154,7 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
escaped_chars,
uppercase, firstupper, 0);
if(with_delim)
- {
+ {
if(mark)
{
result = L"^="+result.substr(1);
@@ -2182,9 +2176,9 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
}
}
}
-
+
if(current_state.size() == 0)
- {
+ {
if(symbol != L"" && result != L"")
{
queue.append(symbol);
@@ -2194,14 +2188,14 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
// word is not present
if(with_delim)
{
- result = L"^@" + input_word.substr(1);
+ result = L"^@" + input_word.substr(1);
}
else
{
result = L"@" + input_word;
}
- return result;
- }
+ return result;
+ }
}
}
@@ -2209,8 +2203,7 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
if(queue != L"")
{
- wstring result_with_queue = L"";
- bool multiple_translation = false;
+ wstring result_with_queue = L"";
for(unsigned int i = 0, limit = result.size(); i != limit; i++)
{
switch(result[i])
@@ -2219,12 +2212,11 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
result_with_queue += L'\\';
i++;
break;
-
+
case L'/':
result_with_queue.append(queue);
- multiple_translation = true;
break;
-
+
default:
break;
}
@@ -2253,13 +2245,13 @@ FSTProcessor::bilingual_wrapper_null_flush(FILE *input, FILE *output)
{
setNullFlush(false);
nullFlushGeneration = true;
-
- while(!feof(input))
+
+ while(!feof(input))
{
bilingual(input, output);
fputwc_unlocked(L'\0', output);
int code = fflush(output);
- if(code != 0)
+ if(code != 0)
{
wcerr << L"Could not flush output " << errno << endl;
}
@@ -2270,7 +2262,7 @@ wstring
FSTProcessor::compose(wstring const &lexforms, wstring const &queue) const
{
wstring result = L"";
-
+
for(unsigned int i = 1; i< lexforms.size(); i++)
{
if(lexforms[i] == L'\\')
@@ -2284,7 +2276,7 @@ FSTProcessor::compose(wstring const &lexforms, wstring const &queue) const
}
result += lexforms[i];
}
-
+
return L"/" + result + queue;
}
@@ -2300,9 +2292,9 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
wstring sf = L""; // source language analysis
wstring queue = L""; // symbols to be added to each target
wstring result = L""; // result of looking up analysis in bidix
-
+
outOfWord = false;
-
+
skipUntil(input, output, L'^');
pair<wstring,int> tr; // readBilingual return value, containing:
int val; // the alphabet value of current symbol, and
@@ -2311,7 +2303,7 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
bool seensurface = false;
wstring surface = L"";
-
+
while(true) // ie. while(val != 0x7fffffff)
{
tr = readBilingual(input, output);
@@ -2319,11 +2311,11 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
val = tr.second;
//fwprintf(stderr, L"> %S : %C : %d\n", tr.first.c_str(), tr.second, tr.second);
- if(biltransSurfaceForms && !seensurface && !outOfWord)
+ if(biltransSurfaceForms && !seensurface && !outOfWord)
{
- while(val != L'/' && val != 0x7fffffff)
+ while(val != L'/' && val != 0x7fffffff)
{
- surface = surface + symbol;
+ surface = surface + symbol;
alphabet.getSymbol(surface, val);
tr = readBilingual(input, output);
symbol = tr.first;
@@ -2336,11 +2328,11 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
val = tr.second;
}
- if (val == 0x7fffffff)
+ if (val == 0x7fffffff)
{
break;
}
-
+
if(val == L'$' && outOfWord)
{
if(!seentags) // if no tags: only return complete matches
@@ -2352,7 +2344,7 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
escaped_chars,
uppercase, firstupper, 0);
}
-
+
if(sf[0] == L'*')
{
printWordBilingual(sf, L"/"+sf, output);
@@ -2363,17 +2355,17 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
}
else
{ //xxx
- if(biltransSurfaceForms)
+ if(biltransSurfaceForms)
{
printWordBilingual(surface, L"/@"+surface, output);
}
else
- {
+ {
printWordBilingual(sf, L"/@"+sf, output);
}
}
seensurface = false;
- surface = L"";
+ surface = L"";
queue = L"";
result = L"";
current_state = *initial_state;
@@ -2393,8 +2385,8 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
if(val == 0) // non-alphabetic, possibly unknown tag; add to sf
{
- sf += symbol;
- }
+ sf += symbol;
+ }
}
else
{
@@ -2405,38 +2397,39 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
if(val == 0) // non-alphabetic, possibly unknown tag; add to sf
{
- sf += symbol;
+ sf += symbol;
}
- if(alphabet.isTag(val) || val == 0)
+ if(alphabet.isTag(val) || val == 0)
{
seentags = true;
- }
+ }
if(current_state.size() != 0)
{
- if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
- {
- current_state.step(val, towlower(val));
- }
- else
- {
- current_state.step(val);
- }
+ if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+ {
+ current_state.step(val, towlower(val));
+ }
+ else
+ {
+ current_state.step(val);
+ }
}
if(current_state.isFinal(all_finals))
{
bool uppercase = sf.size() > 1 && iswupper(sf[1]);
bool firstupper= iswupper(sf[0]);
+ queue = L""; // the intervening tags were matched
result = current_state.filterFinals(all_finals, alphabet,
escaped_chars,
- uppercase, firstupper, 0);
+ uppercase, firstupper, 0);
}
- if(current_state.size() == 0 && result != L"")
+ else if(result != L"")
{
// We already have a result, but there is still more to read
// of the analysis; following tags are not consumed, but
// output as target language tags (added to result on
- // end-of-word)
+ // end-of-word). This queue is reset if result is changed.
if(alphabet.isTag(val)) // known tag
{
alphabet.getSymbol(queue, val);
@@ -2445,7 +2438,7 @@ FSTProcessor::bilingual(FILE *input, FILE *output)
{
queue += symbol;
}
- else
+ else if(current_state.size() == 0)
{
// There are no more alive transductions and the current symbol is not a tag -- unknown word!
result = L"";
@@ -2465,7 +2458,7 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
wstring queue = L"";
bool mark = false;
bool seentags = false; // have we seen any tags at all in the analysis?
-
+
if(with_delim == false)
{
start_point = 0;
@@ -2476,7 +2469,7 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
{
return pair<wstring, int>(input_word, 0);
}
-
+
if(input_word[start_point] == L'=')
{
start_point++;
@@ -2485,12 +2478,12 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
bool firstupper = iswupper(input_word[start_point]);
bool uppercase = firstupper && iswupper(input_word[start_point+1]);
-
+
for(unsigned int i = start_point; i <= end_point; i++)
{
int val = 0;
wstring symbol = L"";
-
+
if(input_word[i] == L'\\')
{
i++;
@@ -2538,7 +2531,7 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
result = L"^=" + result.substr(1);
}
else
- {
+ {
result[0] = L'^';
}
}
@@ -2554,7 +2547,7 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
}
}
}
-
+
if(current_state.size() == 0)
{
if(symbol != L"" && result != L"")
@@ -2566,42 +2559,41 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
// word is not present
if(with_delim)
{
- result = L"^@" + input_word.substr(1);
+ result = L"^@" + input_word.substr(1);
}
else
{
result = L"@" + input_word;
}
- return pair<wstring, int>(result, 0);
- }
+ return pair<wstring, int>(result, 0);
+ }
}
}
if (!seentags
&& L"" == current_state.filterFinals(all_finals, alphabet,
escaped_chars,
- uppercase, firstupper, 0))
+ uppercase, firstupper, 0))
{
// word is not present
if(with_delim)
{
- result = L"^@" + input_word.substr(1);
+ result = L"^@" + input_word.substr(1);
}
else
{
result = L"@" + input_word;
}
- return pair<wstring, int>(result, 0);
+ return pair<wstring, int>(result, 0);
}
-
+
// attach unmatched queue automatically
if(queue != L"")
{
- wstring result_with_queue = L"";
- bool multiple_translation = false;
+ wstring result_with_queue = L"";
for(unsigned int i = 0, limit = result.size(); i != limit; i++)
{
switch(result[i])
@@ -2610,12 +2602,11 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
result_with_queue += L'\\';
i++;
break;
-
+
case L'/':
result_with_queue.append(queue);
- multiple_translation = true;
break;
-
+
default:
break;
}
@@ -2647,7 +2638,7 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
unsigned int start_point = 1;
unsigned int end_point = input_word.size()-2;
bool mark = false;
-
+
if(with_delim == false)
{
start_point = 0;
@@ -2658,7 +2649,7 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
{
return input_word;
}
-
+
if(input_word[start_point] == L'=')
{
start_point++;
@@ -2672,7 +2663,7 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
{
int val;
wstring symbol = L"";
-
+
if(input_word[i] == L'\\')
{
i++;
@@ -2713,7 +2704,7 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
escaped_chars,
uppercase, firstupper, 0);
if(with_delim)
- {
+ {
if(mark)
{
result = L"^=" + result.substr(1);
@@ -2735,22 +2726,22 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
}
}
}
-
+
if(current_state.size() == 0)
- {
+ {
if(symbol == L"")
{
// word is not present
if(with_delim)
{
- result = L"^@" + input_word.substr(1);
+ result = L"^@" + input_word.substr(1);
}
else
{
result = L"@" + input_word;
}
- return result;
- }
+ return result;
+ }
}
}
@@ -2779,8 +2770,8 @@ FSTProcessor::valid() const
wcerr << L"Error: Invalid dictionary (hint: entry beginning with whitespace)" << endl;
return false;
}
- }
-
+ }
+
return true;
}
@@ -2831,7 +2822,7 @@ FSTProcessor::readSAO(FILE *input)
else
{
streamError();
- }
+ }
}
input_buffer.add(val);
@@ -2891,22 +2882,22 @@ FSTProcessor::SAO(FILE *input, FILE *output)
escaped_chars,
uppercase, firstupper);
last_postblank = true;
- last = input_buffer.getPos();
+ last = input_buffer.getPos();
}
else if(!isAlphabetic(val))
{
bool firstupper = iswupper(sf[0]);
bool uppercase = firstupper && iswupper(sf[sf.size()-1]);
- lf = current_state.filterFinalsSAO(all_finals, alphabet,
- escaped_chars,
+ lf = current_state.filterFinalsSAO(all_finals, alphabet,
+ escaped_chars,
uppercase, firstupper);
last_postblank = false;
last_incond = false;
last = input_buffer.getPos();
}
}
- else if(sf == L"" && iswspace(val))
+ else if(sf == L"" && iswspace(val))
{
lf = L"/*";
lf.append(sf);
@@ -2923,7 +2914,7 @@ FSTProcessor::SAO(FILE *input, FILE *output)
{
current_state.step(val, towlower(val));
}
-
+
if(current_state.size() != 0)
{
alphabet.getSymbol(sf, val);
@@ -2958,14 +2949,14 @@ FSTProcessor::SAO(FILE *input, FILE *output)
input_buffer.setPos(last);
input_buffer.back(1);
}
- else if(isAlphabetic(val) &&
- ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
+ else if(isAlphabetic(val) &&
+ ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
lf == L""))
{
do
{
alphabet.getSymbol(sf, val);
- }
+ }
while((val = readSAO(input)) && isAlphabetic(val));
unsigned int limit = firstNotAlpha(sf);
@@ -2992,7 +2983,7 @@ FSTProcessor::SAO(FILE *input, FILE *output)
input_buffer.setPos(last);
input_buffer.back(1);
}
-
+
current_state = *initial_state;
lf = L"";
sf = L"";
@@ -3000,7 +2991,7 @@ FSTProcessor::SAO(FILE *input, FILE *output)
last_postblank = false;
}
}
-
+
// print remaining blanks
flushBlanks(output);
}
@@ -3015,7 +3006,7 @@ FSTProcessor::removeTags(wstring const &str)
return str.substr(0, i);
}
}
-
+
return str;
}
@@ -3064,8 +3055,8 @@ FSTProcessor::firstNotAlpha(wstring const &sf)
if(!isAlphabetic(sf[i]))
{
return i;
- }
+ }
}
-
+
return wstring::npos;
}
diff --git a/lttoolbox/fst_processor.h b/lttoolbox/fst_processor.h
index 59fd997..dcca272 100644
--- a/lttoolbox/fst_processor.h
+++ b/lttoolbox/fst_processor.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _FSTPROCESSOR_
@@ -141,7 +139,7 @@ private:
bool caseSensitive;
/**
- * if true, uses the dictionary case, discarding surface case
+ * if true, uses the dictionary case, discarding surface case
* information
*/
bool dictionaryCase;
@@ -152,7 +150,7 @@ private:
bool nullFlush;
/**
- * nullFlush property for the skipUntil function
+ * nullFlush property for the skipUntil function
*/
bool nullFlushGeneration;
@@ -348,7 +346,7 @@ private:
GenerationMode mode);
void postgeneration_wrapper_null_flush(FILE *input, FILE *output);
void transliteration_wrapper_null_flush(FILE *input, FILE *output);
-
+
wstring compose(wstring const &lexforms, wstring const &queue) const;
bool isLastBlankTM;
diff --git a/lttoolbox/lt_comp.cc b/lttoolbox/lt_comp.cc
index 7692660..9f58975 100644
--- a/lttoolbox/lt_comp.cc
+++ b/lttoolbox/lt_comp.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/compiler.h>
#include <lttoolbox/att_compiler.h>
diff --git a/lttoolbox/lt_expand.cc b/lttoolbox/lt_expand.cc
index ed3651d..84e0afd 100644
--- a/lttoolbox/lt_expand.cc
+++ b/lttoolbox/lt_expand.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/expander.h>
diff --git a/lttoolbox/lt_locale.cc b/lttoolbox/lt_locale.cc
index a3ee533..e33e494 100644
--- a/lttoolbox/lt_locale.cc
+++ b/lttoolbox/lt_locale.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/lt_locale.h>
diff --git a/lttoolbox/lt_locale.h b/lttoolbox/lt_locale.h
index 582acc6..6612ac5 100644
--- a/lttoolbox/lt_locale.h
+++ b/lttoolbox/lt_locale.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _MYLOCALE_
diff --git a/lttoolbox/lt_print.cc b/lttoolbox/lt_print.cc
index a2ac5b9..fc06ac9 100644
--- a/lttoolbox/lt_print.cc
+++ b/lttoolbox/lt_print.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/transducer.h>
#include <lttoolbox/compression.h>
diff --git a/lttoolbox/lt_proc.cc b/lttoolbox/lt_proc.cc
index 7a0921e..e6b9101 100644
--- a/lttoolbox/lt_proc.cc
+++ b/lttoolbox/lt_proc.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/fst_processor.h>
#include <lttoolbox/lttoolbox_config.h>
@@ -50,7 +48,7 @@ void endProgram(char *name)
cout << " -e, --decompose-nouns: Try to decompound unknown words" << endl;
cout << " -g, --generation: morphological generation" << endl;
cout << " -l, --tagged-gen: morphological generation keeping lexical forms" << endl;
- cout << " -m, --tagged-nm-gen: same as -l but without unknown word marks" << endl;
+ cout << " -m, --tagged-nm-gen: same as -l but without unknown word marks" << endl;
cout << " -n, --non-marked-gen morph. generation without unknown word marks" << endl;
cout << " -o, --surf-bilingual: lexical transfer with surface forms" << endl;
cout << " -p, --post-generation: post-generation" << endl;
@@ -114,7 +112,7 @@ int main(int argc, char *argv[])
{"case-sensitive", 0, 0, 'c'},
{"help", 0, 0, 'h'}
};
-#endif
+#endif
while(true)
{
@@ -123,20 +121,20 @@ int main(int argc, char *argv[])
int c = getopt_long(argc, argv, "abceglmndopstzwvh", long_options, &option_index);
#else
int c = getopt(argc, argv, "abceglmndopstzwvh");
-#endif
+#endif
if(c == -1)
{
break;
}
-
+
switch(c)
{
case 'c':
fstp.setCaseSensitiveMode(true);
break;
- case 'e':
+ case 'e':
case 'a':
case 'b':
case 'o':
@@ -179,7 +177,7 @@ int main(int argc, char *argv[])
FILE *input = stdin, *output = stdout;
LtLocale::tryToSetLocale();
-
+
if(optind == (argc - 3))
{
FILE *in = fopen(argv[optind], "rb");
@@ -188,21 +186,21 @@ int main(int argc, char *argv[])
cerr << "Error: Cannot not open file '" << argv[optind] << "'." << endl << endl;
exit(EXIT_FAILURE);
}
-
+
input = fopen(argv[optind+1], "rb");
if(input == NULL || ferror(input))
{
cerr << "Error: Cannot not open file '" << argv[optind+1] << "'." << endl << endl;
exit(EXIT_FAILURE);
}
-
+
output= fopen(argv[optind+2], "wb");
if(output == NULL || ferror(output))
{
cerr << "Error: Cannot not open file '" << argv[optind+2] << "'." << endl << endl;
exit(EXIT_FAILURE);
}
-
+
fstp.load(in);
fclose(in);
}
@@ -214,17 +212,17 @@ int main(int argc, char *argv[])
cerr << "Error: Cannot not open file '" << argv[optind] << "'." << endl << endl;
exit(EXIT_FAILURE);
}
-
+
input = fopen(argv[optind+1], "rb");
if(input == NULL || ferror(input))
{
cerr << "Error: Cannot not open file '" << argv[optind+1] << "'." << endl << endl;
exit(EXIT_FAILURE);
}
-
+
fstp.load(in);
fclose(in);
- }
+ }
else if(optind == (argc - 1))
{
FILE *in = fopen(argv[optind], "rb");
@@ -233,7 +231,7 @@ int main(int argc, char *argv[])
cerr << "Error: Cannot not open file '" << argv[optind] << "'." << endl << endl;
exit(EXIT_FAILURE);
}
- fstp.load(in);
+ fstp.load(in);
fclose(in);
}
else
@@ -261,13 +259,13 @@ int main(int argc, char *argv[])
checkValidity(fstp);
fstp.generation(input, output);
break;
-
+
case 'd':
fstp.initGeneration();
checkValidity(fstp);
fstp.generation(input, output, gm_all);
break;
-
+
case 'l':
fstp.initGeneration();
checkValidity(fstp);
@@ -279,7 +277,7 @@ int main(int argc, char *argv[])
checkValidity(fstp);
fstp.generation(input, output, gm_tagged_nm);
break;
-
+
case 'p':
fstp.initPostgeneration();
checkValidity(fstp);
@@ -293,18 +291,18 @@ int main(int argc, char *argv[])
break;
case 't':
- fstp.initPostgeneration();
+ fstp.initPostgeneration();
checkValidity(fstp);
fstp.transliteration(input, output);
break;
-
+
case 'o':
fstp.initBiltrans();
checkValidity(fstp);
fstp.setBiltransSurfaceForms(true);
fstp.bilingual(input, output);
break;
-
+
case 'b':
fstp.initBiltrans();
checkValidity(fstp);
@@ -316,10 +314,10 @@ int main(int argc, char *argv[])
checkValidity(fstp);
fstp.analysis(input, output);
break;
-
+
case 'a':
default:
- fstp.initAnalysis();
+ fstp.initAnalysis();
checkValidity(fstp);
fstp.analysis(input, output);
break;
@@ -336,6 +334,6 @@ int main(int argc, char *argv[])
}
fclose(input);
- fclose(output);
+ fclose(output);
return EXIT_SUCCESS;
}
diff --git a/lttoolbox/lt_tmxcomp.cc b/lttoolbox/lt_tmxcomp.cc
index f5aa19e..feb2156 100644
--- a/lttoolbox/lt_tmxcomp.cc
+++ b/lttoolbox/lt_tmxcomp.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/tmx_compiler.h>
#include <lttoolbox/lttoolbox_config.h>
diff --git a/lttoolbox/lt_tmxproc.cc b/lttoolbox/lt_tmxproc.cc
index 83f3086..548c288 100644
--- a/lttoolbox/lt_tmxproc.cc
+++ b/lttoolbox/lt_tmxproc.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/fst_processor.h>
#include <lttoolbox/lttoolbox_config.h>
diff --git a/lttoolbox/lt_trim.cc b/lttoolbox/lt_trim.cc
index d65873f..3814954 100644
--- a/lttoolbox/lt_trim.cc
+++ b/lttoolbox/lt_trim.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/transducer.h>
#include <lttoolbox/compression.h>
@@ -101,11 +99,11 @@ trim(FILE *file_mono, FILE *file_bi)
for(std::map<wstring, Transducer>::iterator it = trans_bi.begin(); it != trans_bi.end(); it++)
{
Transducer union_tmp = it->second;
- if(union_transducer.isEmpty())
+ if(union_transducer.isEmpty())
{
union_transducer = union_tmp;
}
- else
+ else
{
union_transducer.unionWith(alph_bi, union_tmp);
}
diff --git a/lttoolbox/ltstr.h b/lttoolbox/ltstr.h
index 2232d1a..258bfd2 100644
--- a/lttoolbox/ltstr.h
+++ b/lttoolbox/ltstr.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _Ltstr_
#define _Ltstr_
diff --git a/lttoolbox/match_exe.cc b/lttoolbox/match_exe.cc
index 7c6d89a..94b2bda 100644
--- a/lttoolbox/match_exe.cc
+++ b/lttoolbox/match_exe.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/match_exe.h>
@@ -22,7 +20,8 @@
#include <lttoolbox/my_stdio.h>
#include <lttoolbox/compression.h>
-MatchExe::MatchExe()
+MatchExe::MatchExe() :
+initial_id(0)
{
}
diff --git a/lttoolbox/match_exe.h b/lttoolbox/match_exe.h
index b0978ec..e33112e 100644
--- a/lttoolbox/match_exe.h
+++ b/lttoolbox/match_exe.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _MATCHEXE_
diff --git a/lttoolbox/match_node.cc b/lttoolbox/match_node.cc
index 906c603..6a51e10 100644
--- a/lttoolbox/match_node.cc
+++ b/lttoolbox/match_node.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/match_node.h>
diff --git a/lttoolbox/match_node.h b/lttoolbox/match_node.h
index 4b0c687..2df4e9b 100644
--- a/lttoolbox/match_node.h
+++ b/lttoolbox/match_node.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _MATCHNODE_
diff --git a/lttoolbox/match_state.cc b/lttoolbox/match_state.cc
index 5d6107f..7f0fc64 100644
--- a/lttoolbox/match_state.cc
+++ b/lttoolbox/match_state.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/match_state.h>
#include <lttoolbox/pattern_list.h>
@@ -56,11 +54,17 @@ void
MatchState::destroy()
{
delete[] state;
+ state = 0;
}
void
MatchState::copy(MatchState const &s)
{
+ if (state == 0)
+ {
+ state = new MatchNode *[BUF_LIMIT];
+ }
+
for(int i = 0; i < BUF_LIMIT; i++)
{
state[i] = s.state[i];
diff --git a/lttoolbox/match_state.h b/lttoolbox/match_state.h
index 1ddf470..79456d7 100644
--- a/lttoolbox/match_state.h
+++ b/lttoolbox/match_state.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _MATCHSTATE_
#define _MATCHSTATE_
diff --git a/lttoolbox/my_stdio.h b/lttoolbox/my_stdio.h
index 8aea63e..2fde421 100644
--- a/lttoolbox/my_stdio.h
+++ b/lttoolbox/my_stdio.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
// cstdio wrapper for backwards compatibility
diff --git a/lttoolbox/node.cc b/lttoolbox/node.cc
index 18a18ba..4e23274 100644
--- a/lttoolbox/node.cc
+++ b/lttoolbox/node.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/node.h>
diff --git a/lttoolbox/node.h b/lttoolbox/node.h
index 0df8a0f..c7e8331 100644
--- a/lttoolbox/node.h
+++ b/lttoolbox/node.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _NODE_
diff --git a/lttoolbox/pattern_list.cc b/lttoolbox/pattern_list.cc
index 4d7341c..f7d1659 100644
--- a/lttoolbox/pattern_list.cc
+++ b/lttoolbox/pattern_list.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/pattern_list.h>
#include <lttoolbox/compression.h>
@@ -43,7 +41,8 @@ PatternList::destroy()
{
}
-PatternList::PatternList()
+PatternList::PatternList() :
+sequence_id(0)
{
sequence = false;
alphabet.includeSymbol(ANY_TAG);
diff --git a/lttoolbox/pattern_list.h b/lttoolbox/pattern_list.h
index f34901b..65225c4 100644
--- a/lttoolbox/pattern_list.h
+++ b/lttoolbox/pattern_list.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _PATTERNLIST_
#define _PATTERNLIST_
diff --git a/lttoolbox/regexp_compiler.cc b/lttoolbox/regexp_compiler.cc
index 2bea989..3fe79da 100644
--- a/lttoolbox/regexp_compiler.cc
+++ b/lttoolbox/regexp_compiler.cc
@@ -12,16 +12,18 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/regexp_compiler.h>
#include <cstdlib>
#include <iostream>
-RegexpCompiler::RegexpCompiler()
+RegexpCompiler::RegexpCompiler() :
+token(0),
+alphabet(0),
+state(0),
+letter(0)
{
}
diff --git a/lttoolbox/regexp_compiler.h b/lttoolbox/regexp_compiler.h
index b5d2d36..6e0b810 100644
--- a/lttoolbox/regexp_compiler.h
+++ b/lttoolbox/regexp_compiler.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _REGEXP_COMPILER_
#define _REGEXP_COMPILER_
diff --git a/lttoolbox/sorted_vector.cc b/lttoolbox/sorted_vector.cc
index b239ea4..e51f9a7 100644
--- a/lttoolbox/sorted_vector.cc
+++ b/lttoolbox/sorted_vector.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/sorted_vector.h>
#include <cstdlib>
diff --git a/lttoolbox/sorted_vector.h b/lttoolbox/sorted_vector.h
index ac1162d..9995472 100644
--- a/lttoolbox/sorted_vector.h
+++ b/lttoolbox/sorted_vector.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _SORTEDVECTOR_
#define _SORTEDVECTOR_
diff --git a/lttoolbox/state.cc b/lttoolbox/state.cc
index cf6c9f8..446c741 100644
--- a/lttoolbox/state.cc
+++ b/lttoolbox/state.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/state.h>
diff --git a/lttoolbox/state.h b/lttoolbox/state.h
index 7ab016e..624ab7e 100644
--- a/lttoolbox/state.h
+++ b/lttoolbox/state.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _STATE_
#define _STATE_
diff --git a/lttoolbox/tmx_compiler.cc b/lttoolbox/tmx_compiler.cc
index 461a4e6..cead6c0 100644
--- a/lttoolbox/tmx_compiler.cc
+++ b/lttoolbox/tmx_compiler.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/tmx_compiler.h>
#include <lttoolbox/compression.h>
@@ -44,7 +42,8 @@ wstring const TMXCompiler::TMX_COMPILER_LANG_ATTR = L"lang";
wstring const TMXCompiler::TMX_COMPILER_SEG_ELEM = L"seg";
wstring const TMXCompiler::TMX_COMPILER_PROP_ELEM = L"prop";
-TMXCompiler::TMXCompiler()
+TMXCompiler::TMXCompiler() :
+reader(0)
{
LtLocale::tryToSetLocale();
alphabet.includeSymbol(L"<n>"); // -1 -> numbers
diff --git a/lttoolbox/tmx_compiler.h b/lttoolbox/tmx_compiler.h
index f156381..9f2009e 100644
--- a/lttoolbox/tmx_compiler.h
+++ b/lttoolbox/tmx_compiler.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TMXCOMPILER_
#define _TMXCOMPILER_
diff --git a/lttoolbox/trans_exe.cc b/lttoolbox/trans_exe.cc
index 96d5ced..382152d 100644
--- a/lttoolbox/trans_exe.cc
+++ b/lttoolbox/trans_exe.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/trans_exe.h>
@@ -22,7 +20,8 @@
#include <lttoolbox/lttoolbox_config.h>
#include <lttoolbox/my_stdio.h>
-TransExe::TransExe()
+TransExe::TransExe() :
+initial_id(0)
{
}
diff --git a/lttoolbox/trans_exe.h b/lttoolbox/trans_exe.h
index d5f9e10..2bfe49f 100644
--- a/lttoolbox/trans_exe.h
+++ b/lttoolbox/trans_exe.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TRANSEXE_
diff --git a/lttoolbox/transducer.cc b/lttoolbox/transducer.cc
index 2cffc71..32b11dd 100644
--- a/lttoolbox/transducer.cc
+++ b/lttoolbox/transducer.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/transducer.h>
#include <lttoolbox/compression.h>
@@ -34,9 +32,9 @@ Transducer::newState()
while(transitions.find(nstate) != transitions.end())
{
nstate++;
- }
+ }
transitions[nstate].clear(); // force creating
-
+
return nstate;
}
@@ -96,8 +94,8 @@ Transducer::insertSingleTransduction(int const tag, int const source)
return range.first->second;
}
}
- return -1;
- }
+ return -1;
+ }
else
{
return -1;
@@ -118,13 +116,13 @@ Transducer::insertNewSingleTransduction(int const tag, int const source)
}
int
-Transducer::insertTransducer(int const source, Transducer &t,
+Transducer::insertTransducer(int const source, Transducer &t,
int const epsilon_tag)
{
map<int, int> relacion;
t.joinFinals(epsilon_tag);
-
+
for(map<int, multimap<int, int> >::const_iterator it = t.transitions.begin(),
limit = t.transitions.end();
it != limit; it++)
@@ -136,21 +134,21 @@ Transducer::insertTransducer(int const source, Transducer &t,
it != t.transitions.end(); it++)
{
for(multimap<int, int>::const_iterator it2 = it->second.begin(),
- limit2 = (it->second).end();
+ limit2 = (it->second).end();
it2 != limit2; it2++)
{
transitions[relacion[it->first]].insert(pair<int, int>(it2->first, relacion[it2->second]));
}
}
- transitions[source].insert(pair<int, int>(epsilon_tag,
+ transitions[source].insert(pair<int, int>(epsilon_tag,
relacion[t.initial]));
return relacion[*(t.finals.begin())];
}
void
-Transducer::linkStates(int const source, int const destino,
+Transducer::linkStates(int const source, int const destino,
int const etiqueta)
{
@@ -214,7 +212,7 @@ set<int>
Transducer::closure(int const state, int const epsilon_tag)
{
set<int> nonvisited, result;
-
+
nonvisited.insert(state);
result.insert(state);
@@ -245,14 +243,14 @@ Transducer::joinFinals(int const epsilon_tag)
{
int state = newState();
- for(set<int>::iterator it = finals.begin(), limit = finals.end();
+ for(set<int>::iterator it = finals.begin(), limit = finals.end();
it != limit; it++)
{
linkStates(*it, state, epsilon_tag);
- }
+ }
finals.clear();
- finals.insert(state);
+ finals.insert(state);
}
else if(finals.size() == 0)
{
@@ -273,7 +271,7 @@ Transducer::isEmptyIntersection(set<int> const &s1, set<int> const &s2)
{
return false;
}
- }
+ }
}
else
{
@@ -311,15 +309,15 @@ Transducer::determinize(int const epsilon_tag)
{
finals_prima.insert(0);
}
-
+
int t = 0;
while(talla_Q_prima != Q_prima.size())
{
talla_Q_prima = Q_prima.size();
R[(t+1)%2].clear();
-
- for(set<int>::iterator it = R[t].begin(), limit = R[t].end();
+
+ for(set<int>::iterator it = R[t].begin(), limit = R[t].end();
it != limit; it++)
{
if(!isEmptyIntersection(Q_prima[*it], finals))
@@ -327,9 +325,9 @@ Transducer::determinize(int const epsilon_tag)
finals_prima.insert(*it);
}
- map<int, set<int> > mymap;
+ map<int, set<int> > mymap;
- for(set<int>::iterator it2 = Q_prima[*it].begin(),
+ for(set<int>::iterator it2 = Q_prima[*it].begin(),
limit2 = Q_prima[*it].end();
it2 != limit2; it2++)
{
@@ -340,8 +338,8 @@ Transducer::determinize(int const epsilon_tag)
if(it3->first != epsilon_tag)
{
set<int> c = closure(it3->second, epsilon_tag);
-
- for(set<int>::iterator it4 = c.begin(), limit4 = c.end();
+
+ for(set<int>::iterator it4 = c.begin(), limit4 = c.end();
it4 != limit4; it4++)
{
mymap[it3->first].insert(*it4);
@@ -350,21 +348,21 @@ Transducer::determinize(int const epsilon_tag)
}
}
- // adding new states
- for(map<int, set<int> >::iterator it2 = mymap.begin(), limit2 = mymap.end();
+ // adding new states
+ for(map<int, set<int> >::iterator it2 = mymap.begin(), limit2 = mymap.end();
it2 != limit2; it2++)
- {
+ {
if(Q_prima_inv.find(it2->second) == Q_prima_inv.end())
{
int etiq = Q_prima.size();
Q_prima[etiq] = it2->second;
Q_prima_inv[it2->second] = etiq;
R[(t+1)%2].insert(Q_prima_inv[it2->second]);
- transitions_prima[etiq].clear();
+ transitions_prima[etiq].clear();
}
transitions_prima[*it].insert(pair<int, int>(it2->first, Q_prima_inv[it2->second]));
}
- }
+ }
t = (t+1)%2;
}
@@ -388,7 +386,7 @@ void
Transducer::optional(int const epsilon_tag)
{
joinFinals(epsilon_tag);
- int state = newState();
+ int state = newState();
linkStates(state, initial, epsilon_tag);
initial = state;
@@ -486,7 +484,7 @@ Transducer::write(FILE *output, int const decalage)
Compression::multibyte_write(finals.size(), output);
int base = 0;
- for(set<int>::iterator it = finals.begin(), limit = finals.end();
+ for(set<int>::iterator it = finals.begin(), limit = finals.end();
it != limit; it++)
{
Compression::multibyte_write(*it - base, output);
@@ -506,7 +504,7 @@ Transducer::write(FILE *output, int const decalage)
it2 != limit2; it2++)
{
Compression::multibyte_write(it2->first-tagbase+decalage, output);
- tagbase = it2->first;
+ tagbase = it2->first;
if(it2->second >= it->first)
{
@@ -539,7 +537,7 @@ Transducer::read(FILE *input, int const decalage)
base = Compression::multibyte_read(input);
int number_of_states = base;
- int current_state = 0;
+ int current_state = 0;
while(number_of_states > 0)
{
int number_of_local_transitions = Compression::multibyte_read(input);
@@ -554,7 +552,7 @@ Transducer::read(FILE *input, int const decalage)
new_t.transitions[state].clear(); // force create
}
new_t.transitions[current_state].insert(pair<int, int>(tagbase, state));
- }
+ }
number_of_states--;
current_state++;
}
@@ -581,12 +579,12 @@ Transducer::reverse(int const epsilon_tag)
joinFinals(epsilon_tag);
map<int, multimap<int, int> > temporal;
-
+
for(map<int, multimap<int, int> >::reverse_iterator it = transitions.rbegin(); it != transitions.rend(); it++)
{
multimap<int, int> aux = it->second;
it->second.clear();
- for(multimap<int, int>::iterator it2 = aux.begin(), limit2 = aux.end();
+ for(multimap<int, int>::iterator it2 = aux.begin(), limit2 = aux.end();
it2 != limit2; it2++)
{
if(it2->second >= it->first)
@@ -602,11 +600,11 @@ Transducer::reverse(int const epsilon_tag)
{
(it->second).insert(temporal[it->first].begin(), temporal[it->first].end());
temporal.erase(it->first);
- }
+ }
}
- for(map<int, multimap<int, int> >::reverse_iterator it = temporal.rbegin(),
- limit = temporal.rend();
+ for(map<int, multimap<int, int> >::reverse_iterator it = temporal.rbegin(),
+ limit = temporal.rend();
it != limit; it++)
{
for(multimap<int, int>::iterator it2 = it->second.begin(),
@@ -615,8 +613,8 @@ Transducer::reverse(int const epsilon_tag)
{
transitions[it->first].insert(pair<int, int>(it2->first, it2->second));
}
- }
-
+ }
+
int tmp = initial;
initial = *(finals.begin());
finals.clear();
@@ -631,8 +629,8 @@ Transducer::show(Alphabet const &alphabet, FILE *output, int const epsilon_tag)
for(map<int, multimap<int, int> >::const_iterator it = transitions.begin(); it != transitions.end(); it++)
{
multimap<int, int> aux = it->second;
-
- for(multimap<int, int>::const_iterator it2 = aux.begin(); it2 != aux.end(); it2++)
+
+ for(multimap<int, int>::const_iterator it2 = aux.begin(); it2 != aux.end(); it2++)
{
pair<int, int> t = alphabet.decode(it2->first);
fwprintf(output, L"%d\t", it->first);
@@ -643,7 +641,7 @@ Transducer::show(Alphabet const &alphabet, FILE *output, int const epsilon_tag)
{
fwprintf(output, L"ε\t", l.c_str());
}
- else
+ else
{
fwprintf(output, L"%S\t", l.c_str());
}
@@ -653,13 +651,13 @@ Transducer::show(Alphabet const &alphabet, FILE *output, int const epsilon_tag)
{
fwprintf(output, L"ε\t", r.c_str());
}
- else
+ else
{
fwprintf(output, L"%S\t", r.c_str());
}
fwprintf(output, L"\n");
- }
- }
+ }
+ }
for(set<int>::const_iterator it3 = finals.begin(); it3 != finals.end(); it3++)
{
@@ -667,7 +665,7 @@ Transducer::show(Alphabet const &alphabet, FILE *output, int const epsilon_tag)
}
}
-int
+int
Transducer::getStateSize(int const state)
{
set<int> states;
@@ -689,10 +687,10 @@ Transducer::recognise(wstring patro, Alphabet &a, FILE *err)
bool accepted = false;
set<int> states ;
- set<int> myclosure1 = closure(getInitial(), 0);
- states.insert(myclosure1.begin(), myclosure1.end());
+ set<int> myclosure1 = closure(getInitial(), 0);
+ states.insert(myclosure1.begin(), myclosure1.end());
// For each of the characters in the input string
- for(wstring::iterator it = patro.begin(); it != patro.end(); it++)
+ for(wstring::iterator it = patro.begin(); it != patro.end(); it++)
{
set<int> new_state; //Transducer::closure(int const state, int const epsilon_tag)
int sym = *it;
@@ -701,11 +699,11 @@ Transducer::recognise(wstring patro, Alphabet &a, FILE *err)
for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
{
multimap<int, int> p = transitions[*it2];
- // For each of the transitions in the state
+ // For each of the transitions in the state
for(multimap<int, int>::iterator it3 = p.begin(); it3 != p.end(); it3++)
- {
-
+ {
+
pair<int, int> t = a.decode(it3->first);
wstring l = L"";
a.getSymbol(l, t.first);
@@ -717,9 +715,9 @@ Transducer::recognise(wstring patro, Alphabet &a, FILE *err)
if(l.find(*it) != wstring::npos)
{
set<int> myclosure = closure(it3->second, 0);
- //wcerr << L"Before closure alives: " <<new_state.size() << endl;
+ //wcerr << L"Before closure alives: " <<new_state.size() << endl;
new_state.insert(myclosure.begin(), myclosure.end());
- //wcerr << L"After closure alives: " <<new_state.size() << endl;
+ //wcerr << L"After closure alives: " <<new_state.size() << endl;
}
}
}
@@ -727,7 +725,7 @@ Transducer::recognise(wstring patro, Alphabet &a, FILE *err)
}
for(set<int>::iterator it4 = states.begin(); it4 != states.end(); it4++)
{
- if(isFinal(*it4))
+ if(isFinal(*it4))
{
accepted = true;
}
@@ -791,7 +789,7 @@ Transducer::copyWithTagsFirst(int start,
std::set<SearchState> finally;
SearchState current;
todo.push_front(make_pair(start,start));
-
+
while(todo.size() > 0) {
current = todo.front();
todo.pop_front();
@@ -839,7 +837,7 @@ Transducer::copyWithTagsFirst(int start,
{
finally.insert(make_pair(this_src, this_lemqlast));
}
-
+
if(seen.find(make_pair(this_trg, this_lemqlast)) == seen.end())
{
todo.push_front(make_pair(this_trg, this_lemqlast));
@@ -862,7 +860,7 @@ Transducer::copyWithTagsFirst(int start,
}
} // end for transitions
} // end while todo
-
+
for(set<SearchState>::iterator it = finally.begin(), limit = finally.end();
it != limit;
it++)
@@ -870,7 +868,7 @@ Transducer::copyWithTagsFirst(int start,
int last_tag = it->first,
this_lemqlast = it->second;
// copy lemq, letting this_lemqlast be the only final state in newlemq
- Transducer newlemq = Transducer(lemq);
+ Transducer newlemq = Transducer(lemq);
newlemq.finals.clear();
newlemq.finals.insert(states_this_lemq[this_lemqlast]);
newlemq.minimize();
@@ -964,7 +962,7 @@ Transducer::intersect(Transducer &trimmer,
joinFinals(epsilon_tag);
/**
* this ∩ trimmer = trimmed
- *
+ *
* The trimmer is typically a bidix passed through appendDotStar.
*/
@@ -982,7 +980,7 @@ Transducer::intersect(Transducer &trimmer,
trimmed.initial));
typedef std::pair<int, std::pair<int, int > > SearchState;
- // first: currently searched state in this;
+ // first: currently searched state in this;
// second.first: currently matched trimmer state;
// second.second: last matched trimmer state before a + restart (or the same second.first if no + is seen yet).
// When several trimmer-states match from one this-state, we just get several triplets.
@@ -1001,7 +999,7 @@ Transducer::intersect(Transducer &trimmer,
trimmer_src = current.second.first,
trimmer_preplus = current.second.second,
trimmer_preplus_next = trimmer_preplus;
-
+
if(states_this_trimmed.find(make_pair(this_src, trimmer_src)) == states_this_trimmed.end()) {
wcerr <<L"Error: couldn't find "<<this_src<<L","<<trimmer_src<<L" in state map"<<endl;
exit(EXIT_FAILURE);
@@ -1023,7 +1021,7 @@ Transducer::intersect(Transducer &trimmer,
trimmer_preplus_next = trimmer_trg;
}
- if(trimmer_left == L"")
+ if(trimmer_left == L"")
{
next = make_pair(this_src, make_pair(trimmer_trg, trimmer_preplus_next));
std::pair<int, int> states_trg = make_pair(this_src, trimmer_trg);
@@ -1053,9 +1051,12 @@ Transducer::intersect(Transducer &trimmer,
if(this_right == COMPILER_JOIN_ELEM)
{
- // Go to the start in trimmer, but record where we restarted
- // from in case we later see a #:
- next = make_pair(this_trg, make_pair(trimmer.initial, trimmer_src));
+ if(trimmer_preplus == trimmer_src) {
+ // Keep the old preplus state if it was set; equal to current trimmer state means unset:
+ trimmer_preplus_next = trimmer_src; // not _trg when join!
+ }
+ // Go to the start in trimmer, but record where we restarted from in case we later see a #:
+ next = make_pair(this_trg, make_pair(trimmer.initial, trimmer_preplus_next));
if(seen.find(next) == seen.end())
{
todo.push_front(next);
@@ -1127,7 +1128,7 @@ Transducer::intersect(Transducer &trimmer,
if(trimmer_left != L"" && this_right == trimmer_left) // we've already dealt with trimmer epsilons
{
next = make_pair(this_trg, make_pair(trimmer_trg, trimmer_preplus_next));
- if(seen.find(next) == seen.end())
+ if(seen.find(next) == seen.end())
{
todo.push_front(next);
}
@@ -1161,6 +1162,6 @@ Transducer::intersect(Transducer &trimmer,
}
// We do not minimize here, in order to let lt_trim print a warning
- // ()instead of exiting the whole program) if no finals.
+ // (instead of exiting the whole program) if no finals.
return trimmed;
}
diff --git a/lttoolbox/transducer.h b/lttoolbox/transducer.h
index 8b8e31c..958f02c 100644
--- a/lttoolbox/transducer.h
+++ b/lttoolbox/transducer.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TRANSDUCTOR_
#define _TRANSDUCTOR_
@@ -36,7 +34,7 @@ class Transducer
{
private:
friend class MatchExe;
-
+
/**
* Initial state
*/
@@ -103,7 +101,7 @@ public:
/**
* Insertion of a single transduction, creating a new target state
- * if needed
+ * if needed
* @param tag the tag of the transduction being inserted
* @param source the source state of the new transduction
* @return the target state
@@ -128,8 +126,8 @@ public:
* @return the new target state
*/
int insertTransducer(int const source, Transducer &t,
- int const epsilon_tag = 0);
-
+ int const epsilon_tag = 0);
+
/**
* Link two existing states by a transduction
* @param source the source state
@@ -212,7 +210,7 @@ public:
void optional(int const epsilon_tag = 0);
/**
- * Make a transducer cyclic (link final states with initial state with
+ * Make a transducer cyclic (link final states with initial state with
* empty transductions)
* @param epsilon_tag the tag to take as epsilon
*/
@@ -234,7 +232,7 @@ public:
* @return true if the transducer is empty
*/
bool isEmpty() const;
-
+
/**
* Check if the transducer has no final state(s)
* @return true if the set of final states is empty
@@ -289,7 +287,7 @@ public:
*/
void unionWith(Alphabet &my_a,
Transducer &t,
- int const epsilon_tag = 0);
+ int const epsilon_tag = 0);
/**
* Converts this class into a "prefix transducer", ie. for any final
diff --git a/lttoolbox/xml_parse_util.cc b/lttoolbox/xml_parse_util.cc
index 69c905b..3702c76 100644
--- a/lttoolbox/xml_parse_util.cc
+++ b/lttoolbox/xml_parse_util.cc
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <lttoolbox/xml_parse_util.h>
diff --git a/lttoolbox/xml_parse_util.h b/lttoolbox/xml_parse_util.h
index 0e16d3c..b3db2a5 100644
--- a/lttoolbox/xml_parse_util.h
+++ b/lttoolbox/xml_parse_util.h
@@ -12,9 +12,7 @@
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _XMLPARSEUTIL_
#define _XMLPARSEUTIL_
diff --git a/tests/data/biproc-skips-tags-mono.dix b/tests/data/biproc-skips-tags-mono.dix
new file mode 100644
index 0000000..a2e739f
--- /dev/null
+++ b/tests/data/biproc-skips-tags-mono.dix
@@ -0,0 +1,14 @@
+<dictionary>
+ <sdefs>
+ <sdef n="KEPT"/>
+ <sdef n="STILLMATCHING"/>
+ <sdef n="MATCHSOFAR"/>
+ <sdef n="NONMATCHL"/>
+ <sdef n="NONMATCHR"/>
+ </sdefs>
+
+ <section id="main" type="standard">
+<e><p><l>vihki<s n="KEPT"/><s n="MATCHSOFAR"/><s n="STILLMATCHING"/><s n="NONMATCHL"/></l><r>vihki<s n="KEPT"/><s n="MATCHSOFAR"/><s n="STILLMATCHING"/><s n="NONMATCHR"/></r></p></e>
+<e><p><l>vihki<s n="KEPT"/></l><r>vihki<s n="KEPT"/></r></p></e>
+ </section>
+</dictionary>
diff --git a/tests/data/double-clitics-bi.dix b/tests/data/double-clitics-bi.dix
new file mode 100644
index 0000000..f0364fd
--- /dev/null
+++ b/tests/data/double-clitics-bi.dix
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dictionary>
+ <alphabet/>
+ <sdefs>
+ <sdef n="prn" />
+ <sdef n="vblex" />
+ </sdefs>
+
+
+ <section id="main" type="standard">
+<e> <p><l>a<g><b/>d<s n="vblex"/></g></l><r>A<s n="vblex"/></r></p></e>
+<e> <p><l>b<s n="prn"/></l><r>B<s n="prn"/></r></p></e>
+<e> <p><l>c<s n="prn"/></l><r>C<s n="prn"/></r></p></e>
+<e> <p><l>x<s n="vblex"/></l><r>X<s n="vblex"/></r></p></e>
+ </section>
+</dictionary>
diff --git a/tests/data/double-clitics-mono.dix b/tests/data/double-clitics-mono.dix
new file mode 100644
index 0000000..39c9706
--- /dev/null
+++ b/tests/data/double-clitics-mono.dix
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dictionary>
+ <alphabet>ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅabcdefghijklmnopqrstuvwxyzæøåcqwxzCQWXZéèêóòâôÉÊÈÓÔÒÂáàÁÀäÄöÖšŠčČðđÐ</alphabet>
+ <sdefs>
+ <sdef n="vblex"/>
+ <sdef n="pp"/>
+ <sdef n="prn" />
+ <sdef n="ger" />
+ <sdef n="inf" />
+ <sdef n="enc" />
+ </sdefs>
+ <pardefs>
+<pardef n="__vblex">
+ <e> <p><l>-b-c</l> <r><s n="vblex"/><s n="ger"/><j/>b<s n="prn"/><s n="enc"/><j/>c<s n="prn"/><s n="enc"/></r></p></e>
+ <e> <p><l></l> <r><s n="vblex"/><s n="inf"/></r></p></e>
+</pardef>
+ </pardefs>
+
+<section id="main" type="standard">
+ <e><i>a</i><par n="__vblex"/> <p><l><b/>d</l><r><g><b/>d</g></r></p></e>
+ <e><i>x</i><par n="__vblex"/></e>
+ <e><i>y</i><par n="__vblex"/></e>
+</section>
+</dictionary>
diff --git a/tests/data/en-af.automorf.bin b/tests/data/en-af.automorf.bin
deleted file mode 100644
index 963cc8a..0000000
Binary files a/tests/data/en-af.automorf.bin and /dev/null differ
diff --git a/tests/lt_proc/__init__.py b/tests/lt_proc/__init__.py
index 497f0cb..805784c 100644
--- a/tests/lt_proc/__init__.py
+++ b/tests/lt_proc/__init__.py
@@ -1 +1,23 @@
-from null_flush import *
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import sys
+import unittest
+from proctest import ProcTest
+
+class ValidInput(unittest.TestCase, ProcTest):
+ inputs = ["ab",
+ "ABC jg",
+ "y n"]
+ expectedOutputs = ["^ab/ab<n><ind>$",
+ "^ABC/AB<n><def>$ ^jg/j<pr>+g<n>$",
+ "^y/y<n><ind>$ ^n/n<n><ind>$"]
+
+class BiprocSkipTags(unittest.TestCase, ProcTest):
+ procdix = "data/biproc-skips-tags-mono.dix"
+ procflags = ["-b", "-z"]
+ inputs = ["^vihki<KEPT><MATCHSOFAR><STILLMATCHING><SOMEHOWKEPT1><@SOMEHOWKEPT2>$"]
+ expectedOutputs = ["^vihki<KEPT><MATCHSOFAR><STILLMATCHING><SOMEHOWKEPT1><@SOMEHOWKEPT2>/vihki<KEPT><MATCHSOFAR><STILLMATCHING><SOMEHOWKEPT1><@SOMEHOWKEPT2>$"]
+
+# These fail on some systems:
+#from null_flush_invalid_stream_format import *
diff --git a/tests/lt_proc/null_flush.py b/tests/lt_proc/null_flush.py
deleted file mode 100644
index 0b33226..0000000
--- a/tests/lt_proc/null_flush.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
-import sys
-import unittest
-from proctest import ProcTest
-
-class ValidInput(unittest.TestCase, ProcTest):
- inputs = [s + ".[][\n]" for s in
- ["I",
- "like apples",
- "very much"]]
-
- expectedOutputs = [s + "^./.<sent>$[][\n]" for s in
- ["^I/prpers<prn><subj><p1><mf><sg>/PRPERS<prn><subj><p1><mf><sg>$",
- "^like/like<pr>/like<vblex><inf>/like<vblex><pres>$ ^apples/apple<n><pl>$",
- "^very much/very much<adv>$"]]
-
-class NoSuperblankBeforeNUL(unittest.TestCase, ProcTest):
- inputs = [u"The dog gladly eats homework.",
- u"If wé swim fast enough,",
- u"we should reach shallow waters.",
- u"before;",
- u"the sharks;",
- u"come."]
-
- expectedOutputs = [u"^The/The<det><def><sp>$ ^dog/dog<n><sg>$ ^gladly/gladly<adv>$ ^eats/eat<vblex><pri><p3><sg>$ ^homework/homework<n><unc><sg>$",
- u"^If/If<cnjadv>$ ^wé/*wé$ ^swim/swim<vblex><inf>/swim<vblex><pres>$ ^fast/fast<adj><sint>/fast<n><sg>$ ^enough/enough<adv>/enough<det><qnt><sp>$",
- u"^we/prpers<prn><subj><p1><mf><pl>$ ^should/should<vaux><inf>$ ^reach/reach<vblex><inf>/reach<vblex><pres>$ ^shallow/shallow<adj><sint>$ ^waters/water<n><pl>$",
- u"^before/before<adv>/before<cnjadv>/before<pr>$",
- u"^the/the<det><def><sp>$ ^sharks/shark<n><pl>$",
- u"^come/come<vblex><inf>/come<vblex><pres>/come<vblex><pp>$"]
-
-class WronglyEscapedLetter(unittest.TestCase, ProcTest):
- inputs = ["before you g\\o to bed.[][\n]"]
- expectedOutputs = ["^before/before<adv>/before<cnjadv>/before<pr>$ ^you/prpers<prn><subj><p2><mf><sp>/prpers<prn><obj><p2><mf><sp>$ "]
- expectedRetCode = 1
-
-
-class UnescapedAngleBracket(unittest.TestCase, ProcTest):
- inputs = ["Simon prefers dark chocolate>.[][\n]"]
- expectedOutputs = ["^Simon/Simon<np><ant><m><sg>$ ^prefers/prefer<vblex><pri><p3><sg>$ ^dark/dark<adj><sint>/dark<n><sg>$ "]
- expectedRetCode = 1
-
-class UnclosedSuperblank(unittest.TestCase, ProcTest):
- inputs = ["you should always[ eat"]
- #expectedOutputs = ["^you/prpers<prn><subj><p2><mf><sp>/prpers<prn><obj><p2><mf><sp>$ ^should/should<vaux><inf>$ "]
- expectedOutputs = [""]
- expectedRetCode = 1
diff --git a/tests/lt_proc/null_flush_invalid_stream_format.py b/tests/lt_proc/null_flush_invalid_stream_format.py
new file mode 100644
index 0000000..cd9ddd6
--- /dev/null
+++ b/tests/lt_proc/null_flush_invalid_stream_format.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import sys
+import unittest
+from proctest import ProcTest
+
+# These tests are for invalid Apertium Stream format; lt-proc's output
+# for these seems system-dependent, so we can't use them as regression
+# tests (until that's fixed, if it's worth fixing).
+
+class NoSuperblankBeforeNUL(unittest.TestCase, ProcTest):
+ inputs = ["The dog gladly eats homework.",
+ "If wé swim fast enough,",
+ "we should reach shallow waters.",
+ "before;",
+ "the sharks;",
+ "come."]
+
+ expectedOutputs = ["^The/The<det><def><sp>$ ^dog/dog<n><sg>$ ^gladly/gladly<adv>$ ^eats/eat<vblex><pri><p3><sg>$ ^homework/homework<n><unc><sg>$",
+ "^If/If<cnjadv>$ ^wé/*wé$ ^swim/swim<vblex><inf>/swim<vblex><pres>$ ^fast/fast<adj><sint>/fast<n><sg>$ ^enough/enough<adv>/enough<det><qnt><sp>$",
+ "^we/prpers<prn><subj><p1><mf><pl>$ ^should/should<vaux><inf>$ ^reach/reach<vblex><inf>/reach<vblex><pres>$ ^shallow/shallow<adj><sint>$ ^waters/water<n><pl>$",
+ "^before/before<adv>/before<cnjadv>/before<pr>$",
+ "^the/the<det><def><sp>$ ^sharks/shark<n><pl>$",
+ "^come/come<vblex><inf>/come<vblex><pres>/come<vblex><pp>$"]
+
+class WronglyEscapedLetter(unittest.TestCase, ProcTest):
+ inputs = ["before you g\\o to bed.[][\n]"]
+ expectedOutputs = ["^before/before<adv>/before<cnjadv>/before<pr>$ ^you/prpers<prn><subj><p2><mf><sp>/prpers<prn><obj><p2><mf><sp>$ "]
+ expectedRetCodeFail = True
+
+
+class UnescapedAngleBracket(unittest.TestCase, ProcTest):
+ inputs = ["Simon prefers dark chocolate>.[][\n]"]
+ expectedOutputs = ["^Simon/Simon<np><ant><m><sg>$ ^prefers/prefer<vblex><pri><p3><sg>$ ^dark/dark<adj><sint>/dark<n><sg>$ "]
+ expectedRetCodeFail = True
+
+class UnclosedSuperblank(unittest.TestCase, ProcTest):
+ inputs = ["you should always[ eat"]
+ #expectedOutputs = ["^you/prpers<prn><subj><p2><mf><sp>/prpers<prn><obj><p2><mf><sp>$ ^should/should<vaux><inf>$ "]
+ expectedOutputs = [""]
+ expectedRetCodeFail = True
diff --git a/tests/lt_trim/__init__.py b/tests/lt_trim/__init__.py
index bbf28ef..446ce34 100644
--- a/tests/lt_trim/__init__.py
+++ b/tests/lt_trim/__init__.py
@@ -8,15 +8,13 @@ from __future__ import unicode_literals
# This is similar to diffing the lt-expand of uncompiled XML dictionaries.
# See also `man hfst-fst2strings'.
+from proctest import ProcTest
import unittest
-from subprocess import call
+from subprocess import Popen, PIPE, call
from tempfile import mkdtemp
from shutil import rmtree
-from proctest import ProcTest
-from subprocess import Popen, PIPE
-
class TrimProcTest(ProcTest):
monodix = "data/minimal-mono.dix"
monodir = "lr"
@@ -24,55 +22,31 @@ class TrimProcTest(ProcTest):
bidir = "lr"
procflags = ["-z"]
- def runTest(self):
- tmpd = mkdtemp()
- try:
- self.assertEqual(0, call(["../lttoolbox/lt-comp",
- self.monodir,
- self.monodix,
- tmpd+"/mono.bin"],
- stdout=PIPE))
- self.assertEqual(0, call(["../lttoolbox/lt-comp",
- self.bidir,
- self.bidix,
- tmpd+"/bi.bin"],
- stdout=PIPE))
- self.assertEqual(0, call(["../lttoolbox/lt-trim",
- tmpd+"/mono.bin",
- tmpd+"/bi.bin",
- tmpd+"/trimmed.bin"],
- stdout=PIPE))
-
- self.proc = Popen(["../lttoolbox/lt-proc"] + self.procflags + [tmpd+"/trimmed.bin"],
- stdin=PIPE,
- stdout=PIPE,
- stderr=PIPE)
-
- for inp,exp in zip(self.inputs, self.expectedOutputs):
- self.assertEqual( self.communicateFlush(inp+"[][\n]"),
- exp+"[][\n]" )
-
- self.proc.communicate() # let it terminate
- self.proc.stdin.close()
- self.proc.stdout.close()
- self.proc.stderr.close()
- self.assertEqual( self.proc.poll(),
- self.expectedRetCode )
-
-
- finally:
- rmtree(tmpd)
+ def compileTest(self, tmpd):
+ self.assertEqual(0, call(["../lttoolbox/lt-comp",
+ self.monodir,
+ self.monodix,
+ tmpd+"/mono.bin"],
+ stdout=PIPE))
+ self.assertEqual(0, call(["../lttoolbox/lt-comp",
+ self.bidir,
+ self.bidix,
+ tmpd+"/bi.bin"],
+ stdout=PIPE))
+ self.assertEqual(0, call(["../lttoolbox/lt-trim",
+ tmpd+"/mono.bin",
+ tmpd+"/bi.bin",
+ tmpd+"/compiled.bin"],
+ stdout=PIPE))
class TrimNormalAndJoin(unittest.TestCase, TrimProcTest):
inputs = ["abc", "ab", "y", "n", "jg", "jh", "kg"]
expectedOutputs = ["^abc/ab<n><def>$", "^ab/ab<n><ind>$", "^y/y<n><ind>$", "^n/*n$", "^jg/j<pr>+g<n>$", "^jh/*jh$", "^kg/*kg$"]
- expectedRetCode = 0
class TrimCmp(unittest.TestCase, TrimProcTest):
inputs = ["a", "b", "c", "d", "aa", "ab", "ac", "ad", "ba", "bb", "bc", "bd", "ca", "cb", "cc", "cd", "da", "db", "dc", "dd", ]
expectedOutputs = ["^a/*a$", "^b/b<n>$", "^c/*c$", "^d/d<n>$", "^aa/*aa$", "^ab/a<n>+b<n>$", "^ac/*ac$", "^ad/a<n>+d<n>$", "^ba/*ba$", "^bb/*bb$", "^bc/*bc$", "^bd/*bd$", "^ca/*ca$", "^cb/d<n>+b<n>$", "^cc/*cc$", "^cd/d<n>+d<n>$", "^da/*da$", "^db/*db$", "^dc/*dc$", "^dd/*dd$"]
- expectedRetCode = 0
monodix = "data/cmp-mono.dix"
bidix = "data/cmp-bi.dix"
procflags = ["-e", "-z"]
@@ -80,21 +54,18 @@ class TrimCmp(unittest.TestCase, TrimProcTest):
class TrimLongleft(unittest.TestCase, TrimProcTest):
inputs = ["herdende"]
expectedOutputs = ["^herdende/herde<adj><pprs>$"]
- expectedRetCode = 0
monodix = "data/longleft-mono.dix"
bidix = "data/longleft-bi.dix"
class DivergingPaths(unittest.TestCase, TrimProcTest):
inputs = ["xa ya"]
expectedOutputs = ["^xa/*xa$ ^ya/ya<vblex>$"]
- expectedRetCode = 0
monodix = "data/diverging-paths-mono.dix"
bidix = "data/diverging-paths-bi.dix"
class MergingPaths(unittest.TestCase, TrimProcTest):
inputs = ["en ei"]
expectedOutputs = ["^en/en<det><qnt><m><sg>$ ^ei/en<det><qnt><f><sg>$"]
- expectedRetCode = 0
monodix = "data/merging-paths-mono.dix"
bidir = "rl"
bidix = "data/merging-paths-bi.dix"
@@ -102,7 +73,6 @@ class MergingPaths(unittest.TestCase, TrimProcTest):
class BidixPardef(unittest.TestCase, TrimProcTest):
inputs = ["c"]
expectedOutputs = ["^c/c<vblex><inf>$"]
- expectedRetCode = 0
monodix = "data/bidixpardef-mono.dix"
bidir = "rl"
bidix = "data/bidixpardef-bi.dix"
@@ -110,7 +80,6 @@ class BidixPardef(unittest.TestCase, TrimProcTest):
class UnbalancedEpsilons(unittest.TestCase, TrimProcTest):
inputs = ["re", "rer", "res", "ret"]
expectedOutputs = ["^re/re<vblex><inf>$", "^rer/re<vblex><pres>$", "^res/re<vblex><pres>$", "^ret/re<vblex><pret>$"]
- expectedRetCode = 0
monodix = "data/unbalanced-epsilons-mono.dix"
bidir = "rl"
bidix = "data/unbalanced-epsilons-bi.dix"
@@ -118,7 +87,6 @@ class UnbalancedEpsilons(unittest.TestCase, TrimProcTest):
class LeftUnbalancedEpsilons(unittest.TestCase, TrimProcTest):
inputs = ["a"]
expectedOutputs = ["^a/a<adv>$"]
- expectedRetCode = 0
monodix = "data/left-unbalanced-epsilons-mono.dix"
bidir = "rl"
bidix = "data/left-unbalanced-epsilons-bi.dix"
@@ -126,21 +94,18 @@ class LeftUnbalancedEpsilons(unittest.TestCase, TrimProcTest):
class Group(unittest.TestCase, TrimProcTest):
inputs = ["abc", "pq", "pqr", "pqs", "xyz"]
expectedOutputs = ["^abc/ab<n><ind>#c$", "^pq/pq<n><ind>$", "^pqr/pq<n><ind>#r$", "^pqs/*pqs$", "^xyz/*xyz$"]
- expectedRetCode = 0
monodix = "data/group-mono.dix"
bidix = "data/group-bi.dix"
class GroupUnbalancedEpsilons(unittest.TestCase, TrimProcTest):
inputs = ["def"]
expectedOutputs = ["^def/de<n><f><sg>#f$"]
- expectedRetCode = 0
monodix = "data/group-mono.dix"
bidix = "data/group-bi.dix"
class BothJoinAndGroup(unittest.TestCase, TrimProcTest):
inputs = ["jkl", "jkm", "jnl"]
expectedOutputs = ["^jkl/j<n><ind>+k<n><ind>#l$", "^jkm/*jkm$", "^jnl/*jnl$"]
- expectedRetCode = 0
monodix = "data/group-mono.dix"
bidix = "data/group-bi.dix"
@@ -148,18 +113,23 @@ class BothJoinAndGroup(unittest.TestCase, TrimProcTest):
class FinalEpsilons(unittest.TestCase, TrimProcTest):
inputs = ["ea"]
expectedOutputs = ["^ea/e<n>#a$"]
- expectedRetCode = 0
monodix = "data/final-epsilons-mono.dix"
bidix = "data/final-epsilons-bi.dix"
class BidixEpsilons(unittest.TestCase, TrimProcTest):
inputs = ["aa ba"]
expectedOutputs = ["^aa/aa<vblex><pp>$ ^ba/*ba$"]
- expectedRetCode = 0
monodix = "data/bidix-epsilons-mono.dix"
bidix = "data/bidix-epsilons-bi.dix"
bidir = "rl"
+class DoubleClitics(unittest.TestCase, TrimProcTest):
+ inputs = ["a-b-c d"]
+ expectedOutputs = ["^a-b-c d/a<vblex><ger>+b<prn><enc>+c<prn><enc># d$"]
+ monodix = "data/double-clitics-mono.dix"
+ bidix = "data/double-clitics-bi.dix"
+ bidir = "lr"
+
class Empty(unittest.TestCase, TrimProcTest):
def runTest(self):
diff --git a/tests/proctest.py b/tests/proctest.py
index 08ec836..7e2e490 100644
--- a/tests/proctest.py
+++ b/tests/proctest.py
@@ -1,7 +1,9 @@
# -*- coding: utf-8 -*-
import itertools
-from subprocess import Popen, PIPE
+from subprocess import Popen, PIPE, call
+from tempfile import mkdtemp
+from shutil import rmtree
import signal
class Alarm(Exception):
@@ -11,10 +13,12 @@ class ProcTest():
"""See lt_proc test for how to use this. Override runTest if you don't
want to use NUL flushing."""
- cmdLine = ["../lttoolbox/.libs/lt-proc", "-z", "data/en-af.automorf.bin"]
+ procdix = "data/minimal-mono.dix"
+ procdir = "lr"
+ procflags = ["-z"]
inputs = itertools.repeat("")
expectedOutputs = itertools.repeat("")
- expectedRetCode = 0
+ expectedRetCodeFail = False
def alarmHandler(self, signum, frame):
raise Alarm
@@ -28,7 +32,7 @@ class ProcTest():
def communicateFlush(self, string):
self.proc.stdin.write(string.encode('utf-8'))
- self.proc.stdin.write('\0')
+ self.proc.stdin.write(b'\0')
self.proc.stdin.flush()
output = []
@@ -37,25 +41,44 @@ class ProcTest():
char = self.withTimeout(2, self.proc.stdout.read, 1)
except Alarm:
pass
- while char and char != '\0':
+ while char and char != b'\0':
output.append(char)
try:
char = self.withTimeout(2, self.proc.stdout.read, 1)
except Alarm:
break # send what we got up till now
- return "".join(output).decode('utf-8')
+ return b"".join(output).decode('utf-8')
+
+ def compileTest(self, tmpd):
+ self.assertEqual(0, call(["../lttoolbox/lt-comp",
+ self.procdir,
+ self.procdix,
+ tmpd+"/compiled.bin"],
+ stdout=PIPE))
def runTest(self):
- self.proc = Popen(self.cmdLine, stdin=PIPE, stdout=PIPE, stderr=PIPE)
-
- for inp,exp in zip(self.inputs, self.expectedOutputs):
- self.assertEqual( self.communicateFlush(inp),
- exp )
-
- self.proc.communicate() # let it terminate
- self.proc.stdin.close()
- self.proc.stdout.close()
- self.proc.stderr.close()
- self.assertEqual( self.proc.poll(),
- self.expectedRetCode )
+ tmpd = mkdtemp()
+ try:
+ self.compileTest(tmpd)
+ self.proc = Popen(["../lttoolbox/lt-proc"] + self.procflags + [tmpd+"/compiled.bin"],
+ stdin=PIPE,
+ stdout=PIPE,
+ stderr=PIPE)
+
+ for inp, exp in zip(self.inputs, self.expectedOutputs):
+ self.assertEqual(self.communicateFlush(inp+"[][\n]"),
+ exp+"[][\n]")
+
+ self.proc.communicate() # let it terminate
+ self.proc.stdin.close()
+ self.proc.stdout.close()
+ self.proc.stderr.close()
+ retCode = self.proc.poll()
+ if self.expectedRetCodeFail:
+ self.assertNotEqual(retCode, 0)
+ else:
+ self.assertEqual(retCode, 0)
+
+ finally:
+ rmtree(tmpd)
diff --git a/tests/run_tests.py b/tests/run_tests.py
index 475d1fa..a52f2be 100755
--- a/tests/run_tests.py
+++ b/tests/run_tests.py
@@ -9,6 +9,9 @@ import lt_proc, lt_trim
if __name__ == "__main__":
os.chdir(os.path.dirname(__file__))
+ failures = 0
for module in [lt_trim, lt_proc]:
suite = unittest.TestLoader().loadTestsFromModule(module)
- unittest.TextTestRunner(verbosity = 2).run(suite)
+ res = unittest.TextTestRunner(verbosity = 2).run(suite)
+ failures += len(res.failures)
+ sys.exit(min(failures, 255))
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/lttoolbox.git
More information about the debian-science-commits
mailing list