[mbt] 02/03: New upstream version 3.3.1
Maarten van Gompel
proycon-guest at moszumanska.debian.org
Wed Nov 8 21:06:12 UTC 2017
This is an automated email from the git hooks/post-receive script.
proycon-guest pushed a commit to branch master
in repository mbt.
commit 4eda000a6e1d184ddf6875a75c5837a1dddb6712
Author: proycon <proycon at anaproy.nl>
Date: Wed Nov 8 22:03:34 2017 +0100
New upstream version 3.3.1
---
ChangeLog | 116 ++++++++++++++++++++++++++++++++++++---
Makefile.in | 2 +-
NEWS | 15 +++++
config.guess | 28 ++++++----
config.sub | 22 ++++++--
configure | 71 ++++++++----------------
configure.ac | 28 ++--------
include/mbt/Logging.h | 2 +-
include/mbt/MbtAPI.h | 4 +-
include/mbt/Pattern.h | 3 +-
include/mbt/Sentence.h | 2 +-
include/mbt/TagLex.h | 2 +-
include/mbt/Tagger.h | 4 +-
ltmain.sh | 6 +-
m4/libtool.m4 | 1 +
m4/ltsugar.m4 | 7 +--
m4/lt~obsolete.m4 | 7 +--
mbt.pc | 6 +-
src/GenerateTagger.cxx | 14 +++--
src/Makefile.am | 5 +-
src/Makefile.in | 5 +-
src/Mbt.cxx | 2 +-
src/MbtAPI.cxx | 2 +-
src/Mbtg.cxx | 2 +-
src/Pattern.cxx | 27 ++++-----
src/RunTagger.cxx | 145 +++++++++++++++++++++++++------------------------
src/Sentence.cxx | 129 +++++++++++++++++++++++++++----------------
src/TagLex.cxx | 17 +++---
src/Tagger.cxx | 120 ++++++++++++++++++++--------------------
src/convert.cxx | 2 +-
src/simpletest.cxx | 14 ++++-
31 files changed, 469 insertions(+), 341 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 1b41444..b63d096 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,105 @@
+2017-11-07 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * NEWS: NEWS about release
+
+2017-09-20 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/GenerateTagger.cxx, src/RunTagger.cxx: more refactoring
+
+2017-09-20 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * configure.ac, src/Sentence.cxx: simplified configuration script.
+ some refactoring too
+
+2017-09-20 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/GenerateTagger.cxx, src/Sentence.cxx: added omp safeguards
+
+2017-07-27 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * .travis.yml: updated .travis.yml
+
+2017-07-19 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * : commit c449e36094d113aaf6c60a37da51003e2c3196ab Author: Ko van
+ der Sloot <K.vanderSloot at let.ru.nl> Date: Wed Jul 19 13:19:07 2017
+ +0200
+
+2017-07-17 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/RunTagger.cxx: fixed statistics for ENRICHED format
+
+2017-05-09 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * .travis.yml: o noes
+
+2017-05-09 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * .travis.yml: added Travis notices
+
+2017-02-27 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/GenerateTagger.cxx, src/Tagger.cxx: some refactoring
+
+2017-02-21 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * .travis.yml, src/Makefile.am: modernize Travis build
+
+2017-01-25 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * include/mbt/MbtAPI.h, src/TagLex.cxx, src/Tagger.cxx: somre
+ reactoring to improve code quality
+
+2017-01-25 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/RunTagger.cxx: small refactor to satisfy scan-build
+
+2017-01-23 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * .travis.yml: force Travis to use trusty
+
+2017-01-23 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * include/mbt/Pattern.h, src/Pattern.cxx, src/RunTagger.cxx,
+ src/Sentence.cxx, src/TagLex.cxx, src/Tagger.cxx: some refactoring
+
+2017-01-17 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * include/mbt/Tagger.h, src/RunTagger.cxx: small refactoring to
+ avoid 0 pointers
+
+2017-01-09 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/RunTagger.cxx: send manifest to cerr, not cout. (issue #3)
+
+2016-12-21 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/GenerateTagger.cxx, src/RunTagger.cxx: entered 2017
+
+2016-12-20 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * include/mbt/Logging.h, include/mbt/MbtAPI.h,
+ include/mbt/Pattern.h, include/mbt/Sentence.h,
+ include/mbt/TagLex.h, include/mbt/Tagger.h, src/GenerateTagger.cxx,
+ src/Mbt.cxx, src/MbtAPI.cxx, src/Mbtg.cxx, src/Pattern.cxx,
+ src/RunTagger.cxx, src/Sentence.cxx, src/TagLex.cxx,
+ src/Tagger.cxx, src/convert.cxx, src/simpletest.cxx: bump year
+
+2016-12-15 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * include/mbt/Tagger.h, src/Pattern.cxx, src/RunTagger.cxx,
+ src/Sentence.cxx, src/TagLex.cxx, src/Tagger.cxx: some refactoring
+ and polishing
+
+2016-12-15 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * src/simpletest.cxx: made sure the thes also runs from the
+ commandline
+
+2016-07-11 Ko van der Sloot <K.vanderSloot at let.ru.nl>
+
+ * configure.ac: bumped version after release
+
2016-07-11 Ko van der Sloot <K.vanderSloot at let.ru.nl>
* NEWS: spread the NEWS!
@@ -23,7 +125,7 @@
2016-02-17 Maarten van Gompel <proycon at anaproy.nl>
- * README, README.md, bootstrap.sh: Converted README to markdown,
+ * README => README.md, bootstrap.sh: Converted README to markdown,
added badgges
2016-01-18 Ko van der Sloot <K.vanderSloot at let.ru.nl>
@@ -314,7 +416,7 @@
2013-10-17 sloot <sloot at 12f355fe-0486-481a-ad91-c297ab22b4e3>
- * bootstrap, bootstrap.sh, configure.ac: modernized git-svn-id: https://ilk.uvt.nl/svn/trunk/sources/Mbt3@16621
+ * bootstrap => bootstrap.sh, configure.ac: modernized git-svn-id: https://ilk.uvt.nl/svn/trunk/sources/Mbt3@16621
12f355fe-0486-481a-ad91-c297ab22b4e3
2013-05-13 sloot <sloot at 12f355fe-0486-481a-ad91-c297ab22b4e3>
@@ -571,7 +673,7 @@
2011-09-13 sloot <sloot at 12f355fe-0486-481a-ad91-c297ab22b4e3>
* example/Makefile.am, example/simpletest.setting, src/Makefile.am,
- src/demo.cxx, src/simpletest.cxx: modified dem to simpletest 'make
+ src/{demo.cxx => simpletest.cxx}: modified dem to simpletest 'make
check' works 'make distcheck' fails when a recemt Timbl isn't
installed :{ git-svn-id: https://ilk.uvt.nl/svn/trunk/sources/Mbt3@13157
12f355fe-0486-481a-ad91-c297ab22b4e3
@@ -682,8 +784,8 @@
2011-03-21 sloot <sloot at 12f355fe-0486-481a-ad91-c297ab22b4e3>
- * NEWS, configure.ac, docs/Makefile.am, docs/Mbt.1, docs/Mbtg.1,
- docs/mbt.1, docs/mbtg.1, mbt.pc.in, src/Makefile.am: more decapping.
+ * NEWS, configure.ac, docs/Makefile.am, docs/{Mbt.1 => mbt.1},
+ docs/{Mbtg.1 => mbtg.1}, mbt.pc.in, src/Makefile.am: more decapping.
man pages too! git-svn-id: https://ilk.uvt.nl/svn/trunk/sources/Mbt3@8992
12f355fe-0486-481a-ad91-c297ab22b4e3
@@ -958,7 +1060,7 @@
2010-11-10 sloot <sloot at 12f355fe-0486-481a-ad91-c297ab22b4e3>
* include/mbt/Makefile.am, include/mbt/MbtAPI.h,
- include/mbt/MbtServer.h, include/mbt/MbtServerBase.h,
+ include/mbt/{MbtServer.h => MbtServerBase.h},
include/mbt/Sentence.h, include/mbt/Tagger.h, src/Makefile.am,
src/MbtAPI.cxx, src/MbtServer.cxx, src/MbtServerBase.cxx,
src/Sentence.cxx, src/Tagger.cxx: cleanup, renaming etc. git-svn-id: https://ilk.uvt.nl/svn/trunk/sources/Mbt3@6794
@@ -1110,7 +1212,7 @@
2010-06-02 antalb <antalb at 12f355fe-0486-481a-ad91-c297ab22b4e3>
- * example/test.tok, test.tok: moved test.tok to the example
+ * test.tok => example/test.tok: moved test.tok to the example
directory git-svn-id: https://ilk.uvt.nl/svn/trunk/sources/Mbt3@4857
12f355fe-0486-481a-ad91-c297ab22b4e3
diff --git a/Makefile.in b/Makefile.in
index dd9bf73..ca1aaa5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -196,7 +196,7 @@ CSCOPE = cscope
DIST_SUBDIRS = $(SUBDIRS)
am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in \
$(srcdir)/mbt.pc.in AUTHORS COPYING ChangeLog INSTALL NEWS \
- README TODO compile config.guess config.sub install-sh \
+ README TODO compile config.guess config.sub depcomp install-sh \
ltmain.sh missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
diff --git a/NEWS b/NEWS
index 2a877b2..55cf1be 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,18 @@
+mbt version 3.3.1 2016-07-11
+[Ko van der Sloot]
+Bug fix release:
+A 3.2.17 tarball was released with the tag v3.3 which is confusing.
+So we release it again as 3.3.1 with the correct tarball.
+
+mbt version 3.2.17 2016-07-11
+[Ko van der Sloot]
+Maintenance release
+- fixed a bug due tot uninitialized memeory
+- code refactoring
+- added OPENMP safeguards
+- fixed statistics for Enriched format
+- fixed issue #3 (sending manifest to cerr not cout)
+
mbt version 3.2.16 2016-07-11
[Ko van der Sloot]
Maintenance release
diff --git a/config.guess b/config.guess
index 0967f2a..2e9ad7f 100755
--- a/config.guess
+++ b/config.guess
@@ -2,7 +2,7 @@
# Attempt to guess a canonical system name.
# Copyright 1992-2016 Free Software Foundation, Inc.
-timestamp='2016-04-02'
+timestamp='2016-10-02'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -186,9 +186,12 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*) machine=${UNAME_MACHINE_ARCH}-unknown ;;
esac
# The Operating System including object format, if it has switched
- # to ELF recently, or will in the future.
+ # to ELF recently (or will in the future) and ABI.
case "${UNAME_MACHINE_ARCH}" in
- arm*|earm*|i386|m68k|ns32k|sh3*|sparc|vax)
+ earm*)
+ os=netbsdelf
+ ;;
+ arm*|i386|m68k|ns32k|sh3*|sparc|vax)
eval $set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ELF__
@@ -997,6 +1000,9 @@ EOF
eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
;;
+ mips64el:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ exit ;;
openrisc*:Linux:*:*)
echo or1k-unknown-linux-${LIBC}
exit ;;
@@ -1029,6 +1035,9 @@ EOF
ppcle:Linux:*:*)
echo powerpcle-unknown-linux-${LIBC}
exit ;;
+ riscv32:Linux:*:* | riscv64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
exit ;;
@@ -1408,18 +1417,17 @@ esac
cat >&2 <<EOF
$0: unable to guess system type
-This script, last modified $timestamp, has failed to recognize
-the operating system you are using. It is advised that you
-download the most up to date version of the config scripts from
+This script (version $timestamp), has failed to recognize the
+operating system you are using. If your script is old, overwrite
+config.guess and config.sub with the latest versions from:
http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
and
http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
-If the version you run ($0) is already up to date, please
-send the following data and any information you think might be
-pertinent to <config-patches at gnu.org> in order to provide the needed
-information to handle your system.
+If $0 has already been updated, send the following data and any
+information you think might be pertinent to config-patches at gnu.org to
+provide the necessary information to handle your system.
config.guess timestamp = $timestamp
diff --git a/config.sub b/config.sub
index 8d39c4b..dd2ca93 100755
--- a/config.sub
+++ b/config.sub
@@ -2,7 +2,7 @@
# Configuration validation subroutine script.
# Copyright 1992-2016 Free Software Foundation, Inc.
-timestamp='2016-03-30'
+timestamp='2016-11-04'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -117,7 +117,7 @@ case $maybe_os in
nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
- kopensolaris*-gnu* | \
+ kopensolaris*-gnu* | cloudabi*-eabi* | \
storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
@@ -301,6 +301,7 @@ case $basic_machine in
| open8 | or1k | or1knd | or32 \
| pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle \
+ | pru \
| pyramid \
| riscv32 | riscv64 \
| rl78 | rx \
@@ -428,6 +429,7 @@ case $basic_machine in
| orion-* \
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+ | pru-* \
| pyramid-* \
| riscv32-* | riscv64-* \
| rl78-* | romp-* | rs6000-* | rx-* \
@@ -643,6 +645,14 @@ case $basic_machine in
basic_machine=m68k-bull
os=-sysv3
;;
+ e500v[12])
+ basic_machine=powerpc-unknown
+ os=$os"spe"
+ ;;
+ e500v[12]-*)
+ basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+ os=$os"spe"
+ ;;
ebmon29k)
basic_machine=a29k-amd
os=-ebmon
@@ -1022,7 +1032,7 @@ case $basic_machine in
ppc-* | ppcbe-*)
basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
- ppcle | powerpclittle | ppc-le | powerpc-little)
+ ppcle | powerpclittle)
basic_machine=powerpcle-unknown
;;
ppcle-* | powerpclittle-*)
@@ -1032,7 +1042,7 @@ case $basic_machine in
;;
ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
;;
- ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+ ppc64le | powerpc64little)
basic_machine=powerpc64le-unknown
;;
ppc64le-* | powerpc64little-*)
@@ -1389,7 +1399,7 @@ case $os in
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* \
| -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+ | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
| -linux-newlib* | -linux-musl* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
@@ -1399,7 +1409,7 @@ case $os in
| -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
| -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
| -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
- | -onefs* | -tirtos*)
+ | -onefs* | -tirtos* | -phoenix* | -fuchsia*)
# Remember, each alternative MUST END IN *, to match a version number.
;;
-qnx*)
diff --git a/configure b/configure
index dcf0dfa..7052961 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for mbt 3.2.16.
+# Generated by GNU Autoconf 2.69 for mbt 3.3.1.
#
# Report bugs to <lamasoftware at science.ru.nl>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='mbt'
PACKAGE_TARNAME='mbt'
-PACKAGE_VERSION='3.2.16'
-PACKAGE_STRING='mbt 3.2.16'
+PACKAGE_VERSION='3.3.1'
+PACKAGE_STRING='mbt 3.3.1'
PACKAGE_BUGREPORT='lamasoftware at science.ru.nl'
PACKAGE_URL=''
@@ -781,8 +781,6 @@ with_aix_soname
with_gnu_ld
with_sysroot
enable_libtool_lock
-with_timbl
-with_ticcutils
'
ac_precious_vars='build_alias
host_alias
@@ -1355,7 +1353,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures mbt 3.2.16 to adapt to many kinds of systems.
+\`configure' configures mbt 3.3.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1426,7 +1424,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of mbt 3.2.16:";;
+ short | recursive ) echo "Configuration of mbt 3.3.1:";;
esac
cat <<\_ACEOF
@@ -1457,12 +1455,6 @@ Optional Packages:
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
--with-sysroot[=DIR] Search for dependent libraries within DIR (or the
compiler's sysroot if not specified).
- --with-timbl=DIR use timbl installed in <DIR>;
- note that you can install timbl in a non-default directory with
- ./configure --prefix=<DIR> in the timbl installation directory
- --with-ticcutils=DIR use ticcutils installed in <DIR>;
- note that you can install ticcutils in a non-default directory with
- ./configure --prefix=<DIR> in the ticcutils installation directory
Some influential environment variables:
CXX C++ compiler command
@@ -1557,7 +1549,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-mbt configure 3.2.16
+mbt configure 3.3.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2010,7 +2002,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by mbt $as_me 3.2.16, which was
+It was created by mbt $as_me 3.3.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2873,7 +2865,7 @@ fi
# Define the identity of the package.
PACKAGE='mbt'
- VERSION='3.2.16'
+ VERSION='3.3.1'
cat >>confdefs.h <<_ACEOF
@@ -16417,24 +16409,13 @@ $as_echo "no" >&6; }
PKG_CONFIG=""
fi
fi
-# Checks for timbl library.
-# inspired by feh-1.3.4/configure.ac. Tnx Tom Gilbert and feh hackers.
-
-# ugly hack when PKG_CONFIG_PATH isn't defined.
-# couldn't get it to work otherwise
if test "x$PKG_CONFIG_PATH" = x; then
- export PKG_CONFIG_PATH=""
-fi
-
-# Check whether --with-timbl was given.
-if test "${with_timbl+set}" = set; then :
- withval=$with_timbl; PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$withval/lib/pkgconfig"
+ export PKG_CONFIG_PATH="$prefix/lib/pkgconfig"
else
- PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$prefix/lib/pkgconfig"
+ export PKG_CONFIG_PATH="$prefix/lib/pkgconfig:$PKG_CONFIG_PATH"
fi
-# AC_MSG_NOTICE( [pkg-config search path: $PKG_CONFIG_PATH] )
pkg_failed=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for timbl" >&5
@@ -16530,15 +16511,6 @@ CXXFLAGS="$CXXFLAGS $timbl_CFLAGS"
LIBS="$LIBS $timbl_LIBS"
-# Check whether --with-ticcutils was given.
-if test "${with_ticcutils+set}" = set; then :
- withval=$with_ticcutils; PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$withval/lib/pkgconfig"
-else
- PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$prefix/lib/pkgconfig"
-fi
-
-# AC_MSG_NOTICE( [pkg-config search path: $PKG_CONFIG_PATH] )
-
pkg_failed=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ticcutils" >&5
$as_echo_n "checking for ticcutils... " >&6; }
@@ -16547,12 +16519,12 @@ if test -n "$ticcutils_CFLAGS"; then
pkg_cv_ticcutils_CFLAGS="$ticcutils_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ticcutils >= 0.4 \""; } >&5
- ($PKG_CONFIG --exists --print-errors "ticcutils >= 0.4 ") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ticcutils >= 0.16 \""; } >&5
+ ($PKG_CONFIG --exists --print-errors "ticcutils >= 0.16 ") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_ticcutils_CFLAGS=`$PKG_CONFIG --cflags "ticcutils >= 0.4 " 2>/dev/null`
+ pkg_cv_ticcutils_CFLAGS=`$PKG_CONFIG --cflags "ticcutils >= 0.16 " 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -16564,12 +16536,12 @@ if test -n "$ticcutils_LIBS"; then
pkg_cv_ticcutils_LIBS="$ticcutils_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ticcutils >= 0.4 \""; } >&5
- ($PKG_CONFIG --exists --print-errors "ticcutils >= 0.4 ") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ticcutils >= 0.16 \""; } >&5
+ ($PKG_CONFIG --exists --print-errors "ticcutils >= 0.16 ") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_ticcutils_LIBS=`$PKG_CONFIG --libs "ticcutils >= 0.4 " 2>/dev/null`
+ pkg_cv_ticcutils_LIBS=`$PKG_CONFIG --libs "ticcutils >= 0.16 " 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -16590,14 +16562,14 @@ else
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- ticcutils_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ticcutils >= 0.4 " 2>&1`
+ ticcutils_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ticcutils >= 0.16 " 2>&1`
else
- ticcutils_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ticcutils >= 0.4 " 2>&1`
+ ticcutils_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ticcutils >= 0.16 " 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$ticcutils_PKG_ERRORS" >&5
- as_fn_error $? "Package requirements (ticcutils >= 0.4 ) were not met:
+ as_fn_error $? "Package requirements (ticcutils >= 0.16 ) were not met:
$ticcutils_PKG_ERRORS
@@ -17169,7 +17141,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by mbt $as_me 3.2.16, which was
+This file was extended by mbt $as_me 3.3.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -17235,7 +17207,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-mbt config.status 3.2.16
+mbt config.status 3.3.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -18458,6 +18430,7 @@ $as_echo X"$file" |
cat <<_LT_EOF >> "$cfgfile"
#! $SHELL
# Generated automatically by $as_me ($PACKAGE) $VERSION
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
# NOTE: Changes made to this file will be lost: look at ltmain.sh.
# Provide generalized library-building support services.
diff --git a/configure.ac b/configure.ac
index dd74171..f5c93fa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.61)
-AC_INIT([mbt],[3.2.16],[lamasoftware at science.ru.nl])
+AC_INIT([mbt],[3.3.1],[lamasoftware at science.ru.nl])
AM_INIT_AUTOMAKE
AC_CONFIG_SRCDIR([.])
AC_CONFIG_MACRO_DIR([m4])
@@ -45,34 +45,18 @@ if test x"$acx_pthread_ok" = xyes; then
fi
PKG_PROG_PKG_CONFIG
-# Checks for timbl library.
-# inspired by feh-1.3.4/configure.ac. Tnx Tom Gilbert and feh hackers.
-
-# ugly hack when PKG_CONFIG_PATH isn't defined.
-# couldn't get it to work otherwise
if test "x$PKG_CONFIG_PATH" = x; then
- export PKG_CONFIG_PATH=""
+ export PKG_CONFIG_PATH="$prefix/lib/pkgconfig"
+else
+ export PKG_CONFIG_PATH="$prefix/lib/pkgconfig:$PKG_CONFIG_PATH"
fi
-AC_ARG_WITH(timbl,
- [ --with-timbl=DIR use timbl installed in <DIR>;
- note that you can install timbl in a non-default directory with
- ./configure --prefix=<DIR> in the timbl installation directory],
- [PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$withval/lib/pkgconfig"],
- [PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$prefix/lib/pkgconfig"])
-# AC_MSG_NOTICE( [pkg-config search path: $PKG_CONFIG_PATH] )
+
PKG_CHECK_MODULES([timbl], [timbl >= 6.4.4] )
CXXFLAGS="$CXXFLAGS $timbl_CFLAGS"
LIBS="$LIBS $timbl_LIBS"
-AC_ARG_WITH(ticcutils,
- [ --with-ticcutils=DIR use ticcutils installed in <DIR>;
- note that you can install ticcutils in a non-default directory with
- ./configure --prefix=<DIR> in the ticcutils installation directory],
- [PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$withval/lib/pkgconfig"],
- [PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$prefix/lib/pkgconfig"])
-# AC_MSG_NOTICE( [pkg-config search path: $PKG_CONFIG_PATH] )
-PKG_CHECK_MODULES([ticcutils], [ticcutils >= 0.4] )
+PKG_CHECK_MODULES([ticcutils], [ticcutils >= 0.16] )
CXXFLAGS="$CXXFLAGS $ticcutils_CFLAGS"
LIBS="$LIBS $ticcutils_LIBS"
diff --git a/include/mbt/Logging.h b/include/mbt/Logging.h
index 88706d6..97e1ab7 100644
--- a/include/mbt/Logging.h
+++ b/include/mbt/Logging.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
diff --git a/include/mbt/MbtAPI.h b/include/mbt/MbtAPI.h
index fd1bc7d..a338591 100644
--- a/include/mbt/MbtAPI.h
+++ b/include/mbt/MbtAPI.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -41,7 +41,7 @@ class MbtAPI {
static bool GenerateTagger( int, char** );
static bool GenerateTagger( const std::string& );
static bool RunTagger( int, char** );
- MbtAPI( const std::string& );
+ explicit MbtAPI( const std::string& );
MbtAPI( const std::string&, TiCC::LogStream& );
~MbtAPI();
bool isInit() const;
diff --git a/include/mbt/Pattern.h b/include/mbt/Pattern.h
index 3f06feb..8d50a84 100644
--- a/include/mbt/Pattern.h
+++ b/include/mbt/Pattern.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -31,7 +31,6 @@ class PatTemplate {
public:
std::string templatestring;
std::string word_templatestring;
- size_t tlen;
size_t numslots;
size_t wordslots;
int focuspos;
diff --git a/include/mbt/Sentence.h b/include/mbt/Sentence.h
index cf25565..0a33cec 100644
--- a/include/mbt/Sentence.h
+++ b/include/mbt/Sentence.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
diff --git a/include/mbt/TagLex.h b/include/mbt/TagLex.h
index f7e6060..9b2b3ac 100644
--- a/include/mbt/TagLex.h
+++ b/include/mbt/TagLex.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
diff --git a/include/mbt/Tagger.h b/include/mbt/Tagger.h
index a438435..acd389f 100644
--- a/include/mbt/Tagger.h
+++ b/include/mbt/Tagger.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -67,7 +67,7 @@ namespace Tagger {
const Timbl::ValueDistribution * );
void NextPath( StringHash&,
const Timbl::TargetValue *,
- const Timbl::ValueDistribution*,
+ const Timbl::ValueDistribution *,
int );
void ClearBest();
void Shift( int, int );
diff --git a/ltmain.sh b/ltmain.sh
index 2ad8be8..a736cf9 100644
--- a/ltmain.sh
+++ b/ltmain.sh
@@ -31,7 +31,7 @@
PROGRAM=libtool
PACKAGE=libtool
-VERSION="2.4.6 Debian-2.4.6-1"
+VERSION="2.4.6 Debian-2.4.6-2"
package_revision=2.4.6
@@ -1977,7 +1977,7 @@ func_version ()
# End:
# Set a version string.
-scriptversion='(GNU libtool) 2.4.6 Debian-2.4.6-1'
+scriptversion='(GNU libtool) 2.4.6'
# func_echo ARG...
@@ -2068,7 +2068,7 @@ include the following information:
compiler: $LTCC
compiler flags: $LTCFLAGS
linker: $LD (gnu? $with_gnu_ld)
- version: $progname $scriptversion
+ version: $progname $scriptversion Debian-2.4.6-2
automake: `($AUTOMAKE --version) 2>/dev/null |$SED 1q`
autoconf: `($AUTOCONF --version) 2>/dev/null |$SED 1q`
diff --git a/m4/libtool.m4 b/m4/libtool.m4
index ee80844..10ab284 100644
--- a/m4/libtool.m4
+++ b/m4/libtool.m4
@@ -728,6 +728,7 @@ _LT_CONFIG_SAVE_COMMANDS([
cat <<_LT_EOF >> "$cfgfile"
#! $SHELL
# Generated automatically by $as_me ($PACKAGE) $VERSION
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
# NOTE: Changes made to this file will be lost: look at ltmain.sh.
# Provide generalized library-building support services.
diff --git a/m4/ltsugar.m4 b/m4/ltsugar.m4
index 48bc934..9000a05 100644
--- a/m4/ltsugar.m4
+++ b/m4/ltsugar.m4
@@ -1,7 +1,6 @@
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
#
-# Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software
-# Foundation, Inc.
+# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
# Written by Gary V. Vaughan, 2004
#
# This file is free software; the Free Software Foundation gives
@@ -34,7 +33,7 @@ m4_define([_lt_join],
# ------------
# Manipulate m4 lists.
# These macros are necessary as long as will still need to support
-# Autoconf-2.59, which quotes differently.
+# Autoconf-2.59 which quotes differently.
m4_define([lt_car], [[$1]])
m4_define([lt_cdr],
[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
@@ -45,7 +44,7 @@ m4_define([lt_unquote], $1)
# lt_append(MACRO-NAME, STRING, [SEPARATOR])
# ------------------------------------------
-# Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'.
+# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
# Note that neither SEPARATOR nor STRING are expanded; they are appended
# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
# No SEPARATOR is output if MACRO-NAME was previously undefined (different
diff --git a/m4/lt~obsolete.m4 b/m4/lt~obsolete.m4
index c6b26f8..c573da9 100644
--- a/m4/lt~obsolete.m4
+++ b/m4/lt~obsolete.m4
@@ -1,7 +1,6 @@
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
#
-# Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software
-# Foundation, Inc.
+# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
# Written by Scott James Remnant, 2004.
#
# This file is free software; the Free Software Foundation gives
@@ -12,7 +11,7 @@
# These exist entirely to fool aclocal when bootstrapping libtool.
#
-# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN),
+# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
# which have later been changed to m4_define as they aren't part of the
# exported API, or moved to Autoconf or Automake where they belong.
#
@@ -26,7 +25,7 @@
# included after everything else. This provides aclocal with the
# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
# because those macros already exist, or will be overwritten later.
-# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
+# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
#
# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
# Yes, that means every name once taken will need to remain here until
diff --git a/mbt.pc b/mbt.pc
index 65c8111..6355176 100644
--- a/mbt.pc
+++ b/mbt.pc
@@ -1,12 +1,12 @@
-prefix=/home/sloot/git/../usr/local
+prefix=/home/sloot/usr/local
exec_prefix=${prefix}
libdir=${exec_prefix}/lib
includedir=${prefix}/include
Name: mbt
-Version: 3.2.16
+Version: 3.3.1
Description: mbt library.
Requires.private: timbl >= 6.4.2
Libs: -L${libdir} -lmbt
-Libs.private: -lpthread -L/home/sloot/git/../usr/local/lib -ltimbl -L/home/sloot/git/../usr/local/lib -lticcutils
+Libs.private: -lpthread -L/home/sloot/usr/local/lib -ltimbl -L/home/sloot/usr/local/lib -lticcutils
Cflags: -I${includedir}
diff --git a/src/GenerateTagger.cxx b/src/GenerateTagger.cxx
index e8f073e..e37644d 100644
--- a/src/GenerateTagger.cxx
+++ b/src/GenerateTagger.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -260,7 +260,6 @@ struct more_second {
// of the words in the dictionary and the values
// of the features are stored in the testpattern
int swcn = 0;
- int thisTagCode;
while( mySentence.nextpat( Action, TestPat,
*kwordlist, TheLex,
swcn ) ){
@@ -271,10 +270,14 @@ struct more_second {
}
}
if ( !skip )
- for( int f=0; f < nslots; f++){
+ for( int f=0; f < nslots; ++f ){
outfile << indexlex( TestPat[f], TheLex ) << " ";
}
- thisTagCode = TheLex.Hash( mySentence.gettag(swcn) );
+ int thisTagCode = -1;
+#pragma omp critical (hasher)
+ {
+ thisTagCode = TheLex.Hash( mySentence.gettag(swcn) );
+ }
if ( !skip ){
for ( auto const& it : mySentence.getEnrichments(swcn) ){
outfile << it << " ";
@@ -574,7 +577,6 @@ struct more_second {
}
int TaggerClass::CreateTagger( TiCC::CL_Options& opts ){
- string value;
if ( opts.is_present( 'h' ) ||
opts.is_present( "help" ) ){
gen_usage( "mbtg" );
@@ -583,7 +585,7 @@ struct more_second {
//
// present yourself to the user
//
- cerr << "mbtg " << VERSION << " (c) CLST, ILK and CLiPS 1998 - 2016." << endl
+ cerr << "mbtg " << VERSION << " (c) CLST, ILK and CLiPS 1998 - 2017." << endl
<< "Memory Based Tagger Generator" << endl
<< "CLST - Centre for Language and Speech Technology,"
<< "Radboud University" << endl
diff --git a/src/Makefile.am b/src/Makefile.am
index 2b85c49..9e0f3a0 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,8 +1,5 @@
-# $Id$
-# $URL$
-
AM_CPPFLAGS = -I at top_srcdir@/include
-AM_CXXFLAGS = -std=c++0x # -Weffc++
+AM_CXXFLAGS = -std=c++11
LDADD = libmbt.la
diff --git a/src/Makefile.in b/src/Makefile.in
index bbf6db3..e66d94a 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -14,9 +14,6 @@
@SET_MAKE@
-# $Id$
-# $URL$
-
VPATH = @srcdir@
am__is_gnu_make = { \
@@ -541,7 +538,7 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AM_CPPFLAGS = -I at top_srcdir@/include
-AM_CXXFLAGS = -std=c++0x # -Weffc++
+AM_CXXFLAGS = -std=c++11
LDADD = libmbt.la
TESTS = $(check_PROGRAMS)
TESTS_ENVIRONMENT = topsrcdir=$(top_srcdir)
diff --git a/src/Mbt.cxx b/src/Mbt.cxx
index 31198b6..5de2335 100644
--- a/src/Mbt.cxx
+++ b/src/Mbt.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
diff --git a/src/MbtAPI.cxx b/src/MbtAPI.cxx
index 05a883f..39517e4 100644
--- a/src/MbtAPI.cxx
+++ b/src/MbtAPI.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
diff --git a/src/Mbtg.cxx b/src/Mbtg.cxx
index 75b2bd2..c1af02e 100644
--- a/src/Mbtg.cxx
+++ b/src/Mbtg.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
diff --git a/src/Pattern.cxx b/src/Pattern.cxx
index 585d4c7..f10d9c6 100644
--- a/src/Pattern.cxx
+++ b/src/Pattern.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -52,24 +52,21 @@ PatTemplate::PatTemplate()
hyphen = 0;
capital = 0;
numeric = 0;
+ compensation = 0;
wordfocus = 0;
- templatestring = "";
- word_templatestring = "";
}
bool PatTemplate::set( const string& tempstr ){
// reads a format string and figures out a template for the patterns from
// this
- tlen = tempstr.length();
-
// Find the position of the focus.
// Seperate string is built for the word-context.
int j = 0;
int k = 0;
bool focus = false;
compensation = 0;
- for ( size_t i = 0; i < tlen; i++) {
- switch(tempstr[i]){
+ for ( const auto& c : tempstr ){
+ switch( c ){
case 'f':
if ( focus ){
cerr << "more than 1 focus position in Pattern! " << tempstr << endl;
@@ -77,8 +74,8 @@ bool PatTemplate::set( const string& tempstr ){
}
focuspos = j;
skipfocus=0;
- templatestring += tempstr[i];
- word_templatestring += tempstr[i];
+ templatestring += c;
+ word_templatestring += c;
word_focuspos = k;
++numslots;
++j;
@@ -94,8 +91,8 @@ bool PatTemplate::set( const string& tempstr ){
}
focuspos = j;
skipfocus=1;
- templatestring += tempstr[i];
- word_templatestring += tempstr[i];
+ templatestring += c;
+ word_templatestring += c;
word_focuspos = k;
++numslots;
++j;
@@ -105,12 +102,12 @@ bool PatTemplate::set( const string& tempstr ){
focus = true;
break;
case 'd':
- templatestring += tempstr[i];
+ templatestring += c;
++numslots;
++j;
break;
case 'a':
- templatestring += tempstr[i];
+ templatestring += c;
++numslots;
++j;
break;
@@ -130,7 +127,7 @@ bool PatTemplate::set( const string& tempstr ){
numeric = 1;
break;
case 'w':
- word_templatestring += tempstr[i];
+ word_templatestring += c;
++wordslots;
++k;
break;
@@ -167,7 +164,7 @@ bool PatTemplate::set( const string& tempstr ){
}
break;
default:
- cerr << "ERROR: illegal symbol '" << tempstr[i]
+ cerr << "ERROR: illegal symbol '" << c
<< "' in context string'" << endl;
return false;
}
diff --git a/src/RunTagger.cxx b/src/RunTagger.cxx
index 80e9d2b..5933388 100644
--- a/src/RunTagger.cxx
+++ b/src/RunTagger.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -71,7 +71,7 @@ namespace Tagger {
BeamData::~BeamData(){
if ( paths ){
- for ( int q=0; q < size; q++ ){
+ for ( int q=0; q < size; ++q ){
delete n_best_array[q];
delete [] paths[q];
delete [] temppaths[q];
@@ -95,7 +95,7 @@ namespace Tagger {
return false;
}
else {
- for ( int q=0; q < Size; q++ ){
+ for ( int q=0; q < Size; ++q ){
paths[q] = 0;
temppaths[q] = 0;
if ( (n_best_array[q] = new n_best_tuple) == 0 ){
@@ -111,7 +111,7 @@ namespace Tagger {
delete [] temppaths[q];
}
}
- for ( int q=0; q < Size; q++ ){
+ for ( int q=0; q < Size; ++q ){
if ( (paths[q] = new int[noWords]) == 0 ||
(temppaths[q] = new int[noWords]) == 0 ){
throw runtime_error( "Beam: not enough memory for N-best search tables" );
@@ -124,13 +124,13 @@ namespace Tagger {
void BeamData::ClearBest(){
DBG << "clearing n_best_array..." << endl;
- for ( int i=0; i < size; i++ )
+ for ( int i=0; i < size; ++i )
n_best_array[i]->clean();
}
void BeamData::Shift( int no_words, int i_word ){
- for ( int q1 = 0; q1 < no_words; q1++ ){
- for ( int jb = 0; jb < size; jb++ ){
+ for ( int q1 = 0; q1 < no_words; ++q1 ){
+ for ( int jb = 0; jb < size; ++jb ){
path_prob[jb] = n_best_array[jb]->prob;
if ( n_best_array[jb]->path != EMPTY_PATH ){
if ( q1 < i_word ){
@@ -150,8 +150,8 @@ namespace Tagger {
temppaths[jb][q1] = EMPTY_PATH;
}
}
- for ( int jb = 0; jb < size; jb++ ){
- for ( int q1=0; q1 < no_words; q1++ )
+ for ( int jb = 0; jb < size; ++jb ){
+ for ( int q1=0; q1 < no_words; ++q1 )
paths[jb][q1] = temppaths[jb][q1];
}
}
@@ -190,8 +190,8 @@ namespace Tagger {
class name_prob_pair{
public:
- name_prob_pair( const string& n, double p ){
- name = n; prob = p; next = 0;
+ name_prob_pair( const string& n, double p ): name(n),prob(p){
+ next = 0;
}
~name_prob_pair(){};
string name;
@@ -225,18 +225,20 @@ namespace Tagger {
// While we will use only the first BeamSize entries, don't forget
// the most important one...!
name_prob_pair *result = 0, *tmp, *Pref = 0;
+ if ( !Dist ){
+ return 0;
+ }
double sum_freq = 0.0;
- ValueDistribution::dist_iterator it = Dist->begin();
- while ( it != Dist->end() ){
- string name = (*it).second->Value()->Name();
- double freq = (*it).second->Weight();
+ for ( const auto& it : *Dist ){
+ string name = it.second->Value()->Name();
+ // string name = (*it).second->Value()->Name();
+ double freq = it.second->Weight();
sum_freq += freq;
tmp = new name_prob_pair( name, freq );
if ( name == PrefClass->Name() )
Pref = tmp;
else
result = add_descending( tmp, result );
- ++it;
}
if ( Pref ){
Pref->next = result;
@@ -260,7 +262,7 @@ namespace Tagger {
path_prob[0] = 1.0;
}
else {
- name_prob_pair *d_pnt, *tmp_d_pnt, *Distr;
+ name_prob_pair *d_pnt, *Distr;
Distr = break_down( distrib, answer );
d_pnt = Distr;
int jb = 0;
@@ -269,12 +271,12 @@ namespace Tagger {
paths[jb][0] = TheLex.Hash( d_pnt->name );
path_prob[jb] = d_pnt->prob;
}
- tmp_d_pnt = d_pnt;
+ name_prob_pair *tmp_d_pnt = d_pnt;
d_pnt = d_pnt->next;
delete tmp_d_pnt;
jb++;
}
- for ( ; jb < size; jb++ ){
+ for ( ; jb < size; ++jb ){
paths[jb][0] = EMPTY_PATH;
path_prob[jb] = 0.0;
}
@@ -293,7 +295,7 @@ namespace Tagger {
else {
DBG << "BeamData::NextPath[" << beam_cnt << "] ( " << answer << " , "
<< distrib << " )" << endl;
- name_prob_pair *d_pnt, *tmp_d_pnt, *Distr;
+ name_prob_pair *d_pnt, *Distr;
Distr = break_down( distrib, answer );
d_pnt = Distr;
int ab = 0;
@@ -302,7 +304,7 @@ namespace Tagger {
double thisWProb = d_pnt->prob;
double thisPProb = thisWProb * path_prob[beam_cnt];
int dtag = TheLex.Hash( d_pnt->name );
- for( int ane = size-1; ane >=0; ane-- ){
+ for( int ane = size-1; ane >=0; --ane ){
if ( thisPProb <= n_best_array[ane]->prob )
break;
if ( ane == 0 ||
@@ -315,7 +317,7 @@ namespace Tagger {
<< endl;
// shift
n_best_tuple *keep = n_best_array[size-1];
- for ( int ash = size-1; ash > ane; ash-- ){
+ for ( int ash = size-1; ash > ane; --ash ){
n_best_array[ash] = n_best_array[ash-1];
}
n_best_array[ane] = keep;
@@ -325,7 +327,7 @@ namespace Tagger {
}
}
}
- tmp_d_pnt = d_pnt;
+ name_prob_pair *tmp_d_pnt = d_pnt;
d_pnt = d_pnt->next;
delete tmp_d_pnt;
++ab;
@@ -344,7 +346,6 @@ namespace Tagger {
int no_words=0;
// loop as long as you get non empty sentences
//
- string tagged_sentence;
string line;
while ( getline(is, line ) ){
vector<TagResult> res = tagLine( line );
@@ -361,7 +362,7 @@ namespace Tagger {
void TaggerClass::ShowCats( ostream& os, const vector<int>& Pat, int slots ){
os << "Pattern : ";
- for( int slot=0; slot < slots; slot++){
+ for( int slot=0; slot < slots; ++slot ){
os << indexlex( Pat[slot], TheLex )<< " ";
}
os << endl;
@@ -377,15 +378,15 @@ namespace Tagger {
else
slots = Ktemplate.totalslots() - Ktemplate.skipfocus;
string line;
- for( int f=0; f < slots; f++ ){
+ for( int f=0; f < slots; ++f ){
line += indexlex( pat[f], TheLex );
line += " ";
}
const vector<string> enr = mySentence.getEnrichments(word);
- for ( auto er: enr ){
+ for ( const auto& er: enr ){
line += er + " ";
}
- if ( input_kind == TAGGED )
+ if ( input_kind != UNTAGGED )
line += mySentence.gettag(word);
else
line += "??";
@@ -395,7 +396,7 @@ namespace Tagger {
// dump if desired
//
if ( dumpflag ){
- for( int slot=0; slot < slots; slot++){
+ for( int slot=0; slot < slots; ++slot ){
cout << indexlex( pat[slot], TheLex );
}
cout << endl;
@@ -560,12 +561,14 @@ namespace Tagger {
int TaggerClass::Run(){
int result = -1;
if ( initialized ){
+ bool out_to_file = OutputFileName != "";
ostream *os;
- if ( OutputFileName != "" ){
+ if ( out_to_file ){
os = new ofstream( OutputFileName );
}
- else
+ else {
os = &default_cout;
+ }
ifstream infile;
if ( !piped_input ){
string inname = TestFilePath + TestFileName;
@@ -589,7 +592,7 @@ namespace Tagger {
else
result = ProcessFile( cin, *os );
}
- if ( OutputFileName != "" ){
+ if ( out_to_file ){
delete os;
}
}
@@ -621,7 +624,6 @@ namespace Tagger {
#endif
if ( !answer ){
throw runtime_error( "Tagger: A classifying problem prevented continuing. Sorry!" );
- exit(EXIT_FAILURE);
}
return answer;
}
@@ -644,10 +646,10 @@ namespace Tagger {
distribution_array[0] = distribution->DistToString();
if ( confidence_flag )
confidence_array[0] = distribution->Confidence( answer );
- }
- if ( IsActive( DBG ) ){
- LOG << "BeamData::InitPaths( " << mySentence << endl;
- LOG << " , " << answer << " , " << distribution << " )" << endl;
+ if ( IsActive( DBG ) ){
+ LOG << "BeamData::InitPaths( " << mySentence << endl;
+ LOG << " , " << answer << " , " << distribution << " )" << endl;
+ }
}
Beam->InitPaths( TheLex, answer, distribution );
if ( IsActive( DBG ) ){
@@ -735,11 +737,11 @@ namespace Tagger {
DBG << "Start: " << mySentence.getword( 0 ) << endl;
InitTest( mySentence, TestPat, Action );
- for ( unsigned int iword=1; iword < mySentence.size(); iword++ ){
+ for ( unsigned int iword=1; iword < mySentence.size(); ++iword ){
// clear best_array
DBG << endl << "Next: " << mySentence.getword( iword ) << endl;
Beam->ClearBest();
- for ( int beam_count=0; beam_count < Beam_Size; beam_count++ ){
+ for ( int beam_count=0; beam_count < Beam_Size; ++beam_count ){
if ( !NextBest( mySentence, TestPat, iword, beam_count ) )
break;
}
@@ -783,12 +785,12 @@ namespace Tagger {
return eom;
}
- string TaggerClass::TRtoString( const vector<TagResult>& tr ) const {
+ string TaggerClass::TRtoString( const vector<TagResult>& trs ) const {
string result;
- for ( unsigned int Wcnt=0; Wcnt < tr.size(); ++Wcnt ){
+ for ( const auto& tr : trs ){
// lookup the assigned category
- result += tr[Wcnt].word();
- if ( tr[Wcnt].isKnown() ){
+ result += tr.word();
+ if ( tr.isKnown() ){
if ( input_kind == UNTAGGED )
result += "/";
else
@@ -803,22 +805,22 @@ namespace Tagger {
// output the correct tag if possible
//
if ( input_kind == ENRICHED )
- result = result + tr[Wcnt].enrichment() + "\t";
+ result = result + tr.enrichment() + "\t";
if ( input_kind == TAGGED ||
input_kind == ENRICHED ){
- result += tr[Wcnt].inputTag() + "\t" + tr[Wcnt].assignedTag();
+ result += tr.inputTag() + "\t" + tr.assignedTag();
if ( confidence_flag )
- result += " [" + toString( tr[Wcnt].confidence() ) + "]";
+ result += " [" + toString( tr.confidence() ) + "]";
if ( distrib_flag )
- result += " " + tr[Wcnt].distribution();
+ result += " " + tr.distribution();
if ( distance_flag )
- result += " " + toString( tr[Wcnt].distance() );
+ result += " " + toString( tr.distance() );
result += "\n";
}
else {
- result += tr[Wcnt].assignedTag();
+ result += tr.assignedTag();
if ( confidence_flag )
- result += "/" + toString( tr[Wcnt].confidence() );
+ result += "/" + toString( tr.confidence() );
result += " ";
}
} // end of output loop through one sentence
@@ -831,10 +833,9 @@ namespace Tagger {
int& no_known, int& no_unknown,
int& no_correct_known,
int& no_correct_unknown ){
- string result;
string tagstring;
//now some output
- for ( unsigned int Wcnt=0; Wcnt < mySentence.size(); Wcnt++ ){
+ for ( unsigned int Wcnt=0; Wcnt < mySentence.size(); ++Wcnt ){
tagstring = indexlex( Beam->paths[0][Wcnt], TheLex );
if ( mySentence.known(Wcnt) ){
no_known++;
@@ -870,7 +871,6 @@ namespace Tagger {
mySentence.read(infile, input_kind, EosMark, line_cnt ) ){
if ( mySentence.size() == 0 )
continue;
- string tagged_sentence;
if ( ++HartBeat % 100 == 0 ) {
cerr << "."; cerr.flush();
}
@@ -880,7 +880,7 @@ namespace Tagger {
continue;
}
vector<TagResult> res = tagSentence( mySentence );
- tagged_sentence = TRtoString( res );
+ string tagged_sentence = TRtoString( res );
if ( !tagged_sentence.empty() ){
// show the results of 1 sentence
statistics( mySentence,
@@ -937,7 +937,7 @@ namespace Tagger {
while(setfile.getline(SetBuffer,511,'\n')){
switch (SetBuffer[0]) {
case 'B':
- if ( sscanf(SetBuffer,"B %d", &Beam_Size ) != 1 )
+ if ( sscanf(SetBuffer,"B %40d", &Beam_Size ) != 1 )
Beam_Size = 1;
break;
case 'd':
@@ -945,33 +945,33 @@ namespace Tagger {
cerr << " Dumpflag ON" << endl;
break;
case 'e': {
- sscanf( SetBuffer, "e %s", value );
+ sscanf( SetBuffer, "e %40s", value );
EosMark = value;
break;
}
case 'k':
- sscanf(SetBuffer,"k %s", value );
+ sscanf(SetBuffer,"k %300s", value );
KnownTreeBaseName = value;
KnownTreeName = prefixWithAbsolutePath( KnownTreeBaseName,
SettingsFilePath );
knowntreeflag = true; // there is a knowntreefile specified
break;
case 'l':
- sscanf(SetBuffer,"l %s", value );
+ sscanf(SetBuffer,"l %300s", value );
l_option_name = value;
l_option_name = prefixWithAbsolutePath( l_option_name,
SettingsFilePath );
lexflag = true; // there is a lexicon specified
break;
case 'L':
- sscanf(SetBuffer,"L %s", value );
+ sscanf(SetBuffer,"L %300s", value );
L_option_name = value;
L_option_name = prefixWithAbsolutePath( L_option_name,
SettingsFilePath );
klistflag = true;
break;
case 'o':
- sscanf(SetBuffer,"t %s", value );
+ sscanf(SetBuffer,"t %300s", value );
OutputFileName = value;
OutputFileName = prefixWithAbsolutePath( OutputFileName,
SettingsFilePath );
@@ -986,7 +986,7 @@ namespace Tagger {
UtmplStr = string( SetBuffer+2 );
break;
case 'r':
- sscanf(SetBuffer,"r %s", value );
+ sscanf(SetBuffer,"r %300s", value );
r_option_name = value;
r_option_name = prefixWithAbsolutePath( r_option_name,
SettingsFilePath );
@@ -999,14 +999,14 @@ namespace Tagger {
exit(EXIT_FAILURE);
break;
case 't':
- sscanf(SetBuffer,"t %s", value );
+ sscanf(SetBuffer,"t %300s", value );
TestFileName = value;
TestFileName = prefixWithAbsolutePath( TestFileName,
SettingsFilePath );
piped_input = false; // there is a test file specified
break;
case 'E':
- if ( SetBuffer[1] == ' ' && sscanf(SetBuffer,"E %s", value ) > 0 ){
+ if ( SetBuffer[1] == ' ' && sscanf(SetBuffer,"E %300s", value ) > 0 ){
TestFileName = value;
TestFileName = prefixWithAbsolutePath( TestFileName,
SettingsFilePath );
@@ -1022,7 +1022,7 @@ namespace Tagger {
}
break;
case 'T':
- sscanf(SetBuffer,"T %s", value );
+ sscanf(SetBuffer,"T %300s", value );
TestFileName = value;
TestFileName = prefixWithAbsolutePath( TestFileName,
SettingsFilePath );
@@ -1030,7 +1030,7 @@ namespace Tagger {
input_kind = TAGGED; // there is a tagged test file specified
break;
case 'u':
- sscanf(SetBuffer,"u %s", value );
+ sscanf(SetBuffer,"u %300s", value );
UnknownTreeBaseName = value;
UnknownTreeName = prefixWithAbsolutePath( UnknownTreeBaseName,
SettingsFilePath );
@@ -1144,14 +1144,17 @@ namespace Tagger {
}
if ( Opts.extract( 'v', value ) ){
vector<string> opts;
- size_t num = split_at( value, opts, "+" );
- for ( size_t i = 0; i < num; ++i ){
- if ( opts[i] == "di" )
+ split_at( value, opts, "+" );
+ for ( const auto& o : opts ){
+ if ( o == "di" ){
distance_flag = true;
- if ( opts[i] == "db" )
+ }
+ else if ( o == "db" ){
distrib_flag = true;
- if ( opts[i] == "cf" )
+ }
+ else if ( o == "cf" ){
confidence_flag = true;
+ }
}
};
if ( cloned && input_kind == ENRICHED ){
@@ -1173,7 +1176,7 @@ namespace Tagger {
void TaggerClass::manifest(){
// present yourself to the user
//
- cout << "mbt " << VERSION << " (c) CLST, ILK and CLiPS 1998 - 2016." << endl
+ cerr << "mbt " << VERSION << " (c) CLST, ILK and CLiPS 1998 - 2017." << endl
<< "Memory Based Tagger " << endl
<< "CLST - Centre for Language and Speech Technology,"
<< "Radboud University" << endl
diff --git a/src/Sentence.cxx b/src/Sentence.cxx
index 3a6e692..2719e54 100644
--- a/src/Sentence.cxx
+++ b/src/Sentence.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -44,15 +44,19 @@ namespace Tagger {
using namespace std;
const string Separators = "\t \n";
- const int MAXTCPBUF = 65536;
// New enriched word.
//
- word::word( const string& some_word, const vector<string>& extra_features, const string& some_tag){
- the_word = some_word;
- word_tag = some_tag;
+ word::word( const string& some_word,
+ const vector<string>& extra_features,
+ const string& some_tag ):
+ the_word( some_word ),
+ word_tag( some_tag ),
+ word_amb_tag( -1 ),
+ word_ass_tag( -1 ),
+ extraFeatures( extra_features )
+ {
the_word_index = -1;
- extraFeatures = extra_features;
}
// Delete a word
@@ -73,8 +77,9 @@ namespace Tagger {
}
void sentence::clear(){
- for ( unsigned int i=0; i < no_words; i++ )
- delete Words[i];
+ for ( const auto& w : Words ){
+ delete w;
+ }
Words.clear();
no_words = 0;
}
@@ -87,7 +92,6 @@ namespace Tagger {
string sentence::getenr( unsigned int index ){
string result;
if ( index < no_words ){
- const std::vector<std::string> enr = getEnrichments( index );
auto it = Words[index]->extraFeatures.cbegin();
while( it != Words[index]->extraFeatures.cend() ){
result += *it;
@@ -103,10 +107,10 @@ namespace Tagger {
//
void sentence::print( ostream &os ) const{
os << "Sentence :'";
- if ( no_words != 0 ){
- for ( unsigned int i = 0; i < no_words-1; i++ )
- os << Words[i]->the_word << ", ";
- os << Words[no_words-1]->the_word;
+ for ( const auto& w : Words ){
+ os << w->the_word;
+ if ( &w != &Words.back() )
+ os << ", ";
}
os << "'";
}
@@ -136,24 +140,31 @@ namespace Tagger {
bool sentence::init_windowing( Lexicon &lex,
StringHash& TheLex ) {
- if ( UTAG == -1 )
- UTAG = TheLex.Hash( UNKNOWN );
+ if ( UTAG == -1 ){
+#pragma omp critical (hasher)
+ {
+ UTAG = TheLex.Hash( UNKNOWN );
+ }
+ }
if ( no_words == 0 ) {
// cerr << "ERROR: empty sentence?!" << endl;
return false;
}
else {
- LexInfo * foundInfo;
- word *cur_word;
- for ( unsigned int wpos = 0; wpos < no_words; ++wpos ){
- cur_word = Words[wpos];
- cur_word->the_word_index = TheLex.Hash( cur_word->the_word );
+ for ( const auto& cur_word : Words ){
+#pragma omp critical (hasher)
+ {
+ cur_word->the_word_index = TheLex.Hash( cur_word->the_word );
+ }
// look up ambiguous tag in the dictionary
//
- foundInfo = lex.Lookup( cur_word->the_word );
- if( foundInfo != NULL ){
+ LexInfo *foundInfo = lex.Lookup( cur_word->the_word );
+ if ( foundInfo != NULL ){
// cerr << "MT Lookup(" << cur_word->the_word << ") gave " << *foundInfo << endl;
- cur_word->word_amb_tag = TheLex.Hash( foundInfo->Trans() );
+#pragma omp critical (hasher)
+ {
+ cur_word->word_amb_tag = TheLex.Hash( foundInfo->Trans() );
+ }
}
else {
// cerr << "MT Lookup(" << cur_word->the_word << ") gave NILL" << endl;
@@ -176,7 +187,12 @@ namespace Tagger {
hap += 'N';
if ( hap.length() == 6 )
hap += '0';
- return TheLex.Hash( hap );
+ int result = -1;
+#pragma omp critical (hasher)
+ {
+ result = TheLex.Hash( hap );
+ }
+ return result;
}
bool sentence::nextpat( MatchAction& Action, vector<int>& Pat,
@@ -214,21 +230,23 @@ namespace Tagger {
// Prefix?
//
if (aTemplate->numprefix > 0) {
- for ( size_t j = 0;
- j < (size_t)aTemplate->numprefix; j++) {
+ for ( size_t j = 0; j < (size_t)aTemplate->numprefix; ++j ) {
string addChars = "_";
if ( j < CurWLen )
addChars += current_word->the_word[j];
else
addChars += '='; // "_=" denotes "no value"
- Pat[i_feature] = TheLex.Hash( addChars );
+#pragma omp critical (hasher)
+ {
+ Pat[i_feature] = TheLex.Hash( addChars );
+ }
i_feature++;
}
}
for ( unsigned int i = 0, c_pos = position - aTemplate->word_focuspos;
i < aTemplate->wordslots;
- i++, c_pos++) {
+ ++i, ++c_pos ) {
// Now loop.
//
// depending on the slot type, transfer the appropriate
@@ -259,7 +277,10 @@ namespace Tagger {
}
}
else { // Out of context.
- Pat[i_feature] = TheLex.Hash( DOT );
+#pragma omp critical (hasher)
+ {
+ Pat[i_feature] = TheLex.Hash( DOT );
+ }
i_feature++;
}
} // i
@@ -268,7 +289,7 @@ namespace Tagger {
//
for ( unsigned int ii = 0, cc_pos = position - aTemplate->focuspos;
ii < aTemplate->numslots;
- ii++, cc_pos++ ) {
+ ++ii, ++cc_pos ) {
// move a pointer to the position of the word that
// should occupy the present template slot
@@ -302,7 +323,10 @@ namespace Tagger {
}
}
else{ // Out of context.
- Pat[i_feature] = TheLex.Hash( DOT );
+#pragma omp critical (hasher)
+ {
+ Pat[i_feature] = TheLex.Hash( DOT );
+ }
i_feature++;
}
} // i
@@ -310,13 +334,16 @@ namespace Tagger {
// Suffix?
//
if (aTemplate->numsuffix > 0) {
- for ( size_t j = aTemplate->numsuffix; j > 0; j--) {
+ for ( size_t j = aTemplate->numsuffix; j > 0; --j ) {
string addChars = "_";
if ( j <= CurWLen )
addChars += current_word->the_word[CurWLen - j];
else
addChars += '=';
- Pat[i_feature] = TheLex.Hash( addChars );
+#pragma omp critical (hasher)
+ {
+ Pat[i_feature] = TheLex.Hash( addChars );
+ }
i_feature++;
}
}
@@ -329,7 +356,10 @@ namespace Tagger {
addChars = "_H";
else
addChars = "_0";
- Pat[i_feature] = TheLex.Hash( addChars );
+#pragma omp critical (hasher)
+ {
+ Pat[i_feature] = TheLex.Hash( addChars );
+ }
i_feature++;
}
@@ -341,7 +371,10 @@ namespace Tagger {
addChars += 'C';
else
addChars += '0';
- Pat[i_feature] = TheLex.Hash( addChars );
+#pragma omp critical (hasher)
+ {
+ Pat[i_feature] = TheLex.Hash( addChars );
+ }
i_feature++;
}
@@ -349,13 +382,16 @@ namespace Tagger {
//
if (aTemplate->numeric) {
string addChars = "_0";
- for ( unsigned int j = 0; j < CurWLen; j++) {
+ for ( unsigned int j = 0; j < CurWLen; ++j ) {
if( isdigit(current_word->the_word[j]) ){
addChars[1] = 'N';
break;
}
}
- Pat[i_feature] = TheLex.Hash( addChars );
+#pragma omp critical (hasher)
+ {
+ Pat[i_feature] = TheLex.Hash( addChars );
+ }
i_feature++;
}
// cerr << "next_pat: i_feature = " << i_feature << endl;
@@ -412,14 +448,13 @@ namespace Tagger {
else if ( Utt_Terminator( line ) ){
return true;
}
- vector<string> parts;
- int num = TiCC::split_at_first_of( line, parts, Separators );
- if ( num != 2 ){
-#pragma omp critical
+ vector<string> parts = TiCC::split_at_first_of( line, Separators );
+ if ( parts.size() != 2 ){
+#pragma omp critical (errors)
{
cerr << endl << "error in line " << line_no << " : '"
<< line << "' (skipping it)" << endl;
- if ( num == 1 ){
+ if ( parts.size() == 1 ){
cerr << "missing a tag ? " << endl;
}
else {
@@ -451,11 +486,10 @@ namespace Tagger {
}
continue;
}
- vector<string> parts;
- TiCC::split_at_first_of( line, parts, " \t" );
+ vector<string> parts = TiCC::split_at_first_of( line, " \t" );
line = "";
bool terminated = false;
- for ( auto p : parts ){
+ for ( const auto& p : parts ){
// cerr << "bekijk " << p << endl;
if ( Utt_Terminator( p ) ){
terminated = true;
@@ -483,7 +517,6 @@ namespace Tagger {
string line;
string Word;
string Tag;
- vector<string> extras;
while( getline( infile, line ) ){
++line_no;
line = TiCC::trim( line );
@@ -496,8 +529,8 @@ namespace Tagger {
else if ( Utt_Terminator( line ) ){
return true;
}
- size_t size = TiCC::split_at_first_of( line, extras, Separators );
- if ( size >= 2 ){
+ vector<string> extras = TiCC::split_at_first_of( line, Separators );
+ if ( extras.size() >= 2 ){
Word = extras.front();
extras.erase(extras.begin()); // expensive, but allas. extras is small
Tag = extras.back();
diff --git a/src/TagLex.cxx b/src/TagLex.cxx
index 628e4a9..6325aae 100644
--- a/src/TagLex.cxx
+++ b/src/TagLex.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -40,9 +40,8 @@
namespace Tagger {
using namespace std;
- TagInfo::TagInfo( const string& name, const string& tag ){
- Word = name;
- WordFreq = 0;
+ TagInfo::TagInfo( const string& name, const string& tag ):
+ Word(name), WordFreq(0) {
Update( tag );
}
@@ -74,7 +73,7 @@ namespace Tagger {
}
struct FS {
- FS( int f, string s ):freq(f), str(s) {};
+ FS( int f, const string& s ):freq(f), str(s) {};
int freq;
string str;
};
@@ -91,9 +90,11 @@ namespace Tagger {
sort( FreqTags.begin(), FreqTags.end(), cmpFreq );
string tmpstr;
for ( auto const& it2 : FreqTags ){
- tmpstr += it2.str + ";";
+ tmpstr += it2.str;
+ if ( &it2 != &FreqTags.back() ){
+ tmpstr += ";";
+ }
}
- tmpstr.erase(tmpstr.length()-1); //remove last ';'
StringRepr = tmpstr;
}
@@ -115,7 +116,7 @@ namespace Tagger {
}
TagInfo *TagLex::Lookup( const string& name ){
- return (TagInfo *)TagTree->Retrieve( name );
+ return reinterpret_cast<TagInfo *>(TagTree->Retrieve( name ));
}
TagInfo *TagLex::Store( const string& name, const string& tag ){
diff --git a/src/Tagger.cxx b/src/Tagger.cxx
index 095a038..171a9ea 100644
--- a/src/Tagger.cxx
+++ b/src/Tagger.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -83,6 +83,7 @@ namespace Tagger {
TopNumber = 100;
DoTop = false;
DoNpax = true;
+ DoTagList = false;
KeepIntermediateFiles = false;
KtmplStr = "ddfa";
@@ -114,65 +115,64 @@ namespace Tagger {
cloned = false;
}
- TaggerClass::TaggerClass( const TaggerClass& in ){
- cur_log = in.cur_log;
- KnownTree = in.KnownTree;
- unKnownTree = in.unKnownTree;
- TimblOptStr = in.TimblOptStr;
- FilterThreshold = in.FilterThreshold;
- Npax = in.Npax;
- TopNumber = in.TopNumber;
- DoTop = in.DoTop;
- DoNpax = in.DoNpax;
- KeepIntermediateFiles = in.KeepIntermediateFiles;
-
- KtmplStr = in.KtmplStr;
- UtmplStr = in.UtmplStr;
- L_option_name = in.L_option_name;
- EosMark = in.EosMark;
-
- Ktemplate = in.Ktemplate;
- Utemplate = in.Utemplate;
-
- UnknownTreeBaseName = in.UnknownTreeBaseName;
- KnownTreeBaseName = in.KnownTreeBaseName;
- LexFileBaseName = in.LexFileBaseName;
- MTLexFileBaseName = in.MTLexFileBaseName;
- TopNFileBaseName = in.TopNFileBaseName;
- NpaxFileBaseName = in.NpaxFileBaseName;
- UnknownTreeName = in.UnknownTreeName;
- KnownTreeName = in.KnownTreeName;
- LexFileName = in.LexFileName;
- MTLexFileName = in.MTLexFileName;
- TopNFileName = in.TopNFileName;
- NpaxFileName = in.NpaxFileName;
- TestFileName = in.TestFileName;
- TestFilePath = in.TestFilePath;
- OutputFileName = in.OutputFileName;
- SettingsFileName = in.SettingsFileName;
- SettingsFilePath = in.SettingsFilePath;
-
- initialized = in.initialized;
- Beam_Size = in.Beam_Size;
- Beam = 0;
- MT_lexicon = in.MT_lexicon;
- kwordlist = in.kwordlist;
- piped_input = in.piped_input;
- input_kind = in.input_kind;
- lexflag = in.lexflag;
- knowntreeflag = in.knowntreeflag;
- unknowntreeflag = in.unknowntreeflag;
- knowntemplateflag = in.knowntemplateflag;
- unknowntemplateflag = in.unknowntemplateflag;
- knownoutfileflag = in.knownoutfileflag;
- unknownoutfileflag = in.unknownoutfileflag;
- reverseflag = in.reverseflag;
- dumpflag = in.dumpflag;
- distance_flag = in.distance_flag;
- distrib_flag = in.distrib_flag;
- confidence_flag = in.confidence_flag;
- klistflag = in.klistflag;
- cloned = true;
+ TaggerClass::TaggerClass( const TaggerClass& in ):
+ cur_log( in.cur_log ),
+ KnownTree( in.KnownTree ),
+ unKnownTree( in.unKnownTree ),
+ initialized( in.initialized ),
+ kwordlist( in.kwordlist ),
+ uwordlist( in.uwordlist ),
+ Beam( 0 ),
+ input_kind( in.input_kind ),
+ piped_input( in.piped_input ),
+ lexflag( in.lexflag ),
+ knowntreeflag( in.knowntreeflag ),
+ unknowntreeflag( in.unknowntreeflag ),
+ knowntemplateflag( in.knowntemplateflag ),
+ unknowntemplateflag( in.unknowntemplateflag ),
+ knownoutfileflag( in.knownoutfileflag ),
+ unknownoutfileflag( in.unknownoutfileflag ),
+ reverseflag( in.reverseflag ),
+ dumpflag( in.dumpflag ),
+ distance_flag( in.distance_flag ),
+ distrib_flag( in.distrib_flag ),
+ confidence_flag( in.confidence_flag ),
+ klistflag( in.klistflag ),
+ Beam_Size( in.Beam_Size ),
+ TimblOptStr( in.TimblOptStr ),
+ FilterThreshold( in.FilterThreshold ),
+ Npax( in.Npax ),
+ TopNumber( in.TopNumber ),
+ DoTop( in.DoTop ),
+ DoNpax( in.DoNpax ),
+ DoTagList( in.DoTagList ),
+ KeepIntermediateFiles( in.KeepIntermediateFiles ),
+ KtmplStr( in.KtmplStr ),
+ UtmplStr( in.UtmplStr ),
+ L_option_name( in.L_option_name ),
+ EosMark( in.EosMark ),
+ Ktemplate( in.Ktemplate ),
+ Utemplate( in.Utemplate ),
+ MT_lexicon( in.MT_lexicon ),
+ UnknownTreeBaseName( in.UnknownTreeBaseName ),
+ KnownTreeBaseName( in.KnownTreeBaseName ),
+ LexFileBaseName( in.LexFileBaseName ),
+ MTLexFileBaseName( in.MTLexFileBaseName ),
+ TopNFileBaseName( in.TopNFileBaseName ),
+ NpaxFileBaseName( in.NpaxFileBaseName),
+ UnknownTreeName( in.UnknownTreeName),
+ KnownTreeName( in.KnownTreeName),
+ LexFileName( in.LexFileName),
+ MTLexFileName( in.MTLexFileName),
+ TopNFileName( in.TopNFileName),
+ NpaxFileName( in.NpaxFileName),
+ TestFileName( in.TestFileName),
+ TestFilePath( in.TestFilePath),
+ OutputFileName( in.OutputFileName),
+ SettingsFileName( in.SettingsFileName),
+ SettingsFilePath( in.SettingsFilePath ),
+ cloned( true )
+ {
}
bool TaggerClass::setLog( LogStream& os ){
diff --git a/src/convert.cxx b/src/convert.cxx
index 1ba9bc8..82d6315 100644
--- a/src/convert.cxx
+++ b/src/convert.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
diff --git a/src/simpletest.cxx b/src/simpletest.cxx
index 487cb46..4e23693 100644
--- a/src/simpletest.cxx
+++ b/src/simpletest.cxx
@@ -1,5 +1,5 @@
/*
- Copyright (c) 1998 - 2016
+ Copyright (c) 1998 - 2017
CLST - Radboud University
ILK - Tilburg University
CLiPS - University of Antwerp
@@ -32,8 +32,16 @@ using namespace std;
using namespace Tagger;
int main(){
- string path = getenv( "topsrcdir" );
- MbtAPI::GenerateTagger( "-T " + path + "/example/eindh.data -s ./simple.setting " );
+ string path;
+ const char *ev = getenv( "topsrcdir" );
+ if ( ev ){
+ path = ev;
+ }
+ else {
+ path = ".";
+ }
+ string command = "-T " + path + "/example/eindh.data -s ./simple.setting";
+ MbtAPI::GenerateTagger( command );
MbtAPI demo( "-s ./simple.setting" );
cerr << demo.Tag( "dit is een test" ) << endl;
vector<TagResult> v = demo.TagLine( "Test regel 2 ." );
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mbt.git
More information about the debian-science-commits
mailing list