[frog] 01/01: Imported Upstream version 0.12.19
Joost van Baal
joostvb at moszumanska.debian.org
Sun Jul 5 03:33:10 UTC 2015
This is an automated email from the git hooks/post-receive script.
joostvb pushed a commit to annotated tag upstream/0.12.19
in repository frog.
commit 8531d6c79dfdf5833fafb218b9b16134d486c2ff
Author: Joost van Baal-Ilić <joostvb at nusku.mdcc.cx>
Date: Sun Jul 5 05:26:21 2015 +0200
Imported Upstream version 0.12.19
---
ChangeLog | 389 +++++++++++
NEWS | 13 +-
configure | 368 +++++-----
configure.ac | 42 +-
include/frog/Frog.h | 14 +-
include/frog/FrogAPI.h | 138 ++++
include/frog/Makefile.am | 2 +-
include/frog/Makefile.in | 2 +-
include/frog/Parser.h | 7 +-
include/frog/cgn_tagger_mod.h | 4 +-
include/frog/iob_tagger_mod.h | 4 +-
include/frog/mblem_mod.h | 13 +-
include/frog/mbma_mod.h | 18 +-
include/frog/mwu_chunker_mod.h | 4 +-
include/frog/ner_tagger_mod.h | 4 +-
include/frog/ucto_tokenizer_mod.h | 36 +-
m4/ax_lib_readline.m4 | 4 +-
m4/libtool.m4 | 314 ++++++---
m4/ltoptions.m4 | 19 +-
src/Frog-util.cxx | 30 +-
src/Frog.cxx | 1394 ++++++-------------------------------
src/FrogAPI.cxx | 1073 ++++++++++++++++++++++++++++
src/Makefile.am | 7 +-
src/Makefile.in | 11 +-
src/Parser.cxx | 14 +-
src/cgn_tagger_mod.cxx | 9 +-
src/iob_tagger_mod.cxx | 9 +-
src/mblem_mod.cxx | 42 +-
src/mblem_prog.cxx | 87 +--
src/mbma_mod.cxx | 257 ++++---
src/mbma_prog.cxx | 89 ++-
src/mwu_chunker_mod.cxx | 6 +-
src/ner_tagger_mod.cxx | 7 +-
src/ucto_tokenizer_mod.cxx | 67 +-
34 files changed, 2681 insertions(+), 1816 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 06a7687..597608f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,392 @@
+2014-11-28 13:24 mvgompel
+
+ * [r17877] configure.ac: Changing behaviour of --with-python, point
+ to the executable directly rather than the directory containing a
+ python executable (python may be called python2 on some systems)
+
+2014-11-27 13:18 sloot
+
+ * [r17875] configure.ac: reverted good ICU checking to bad checking
+ because there are too many
+ old distributions around. And OSX screws up anyway.
+
+2014-11-18 14:44 sloot
+
+ * [r17829] src/Frog.cxx: removed debug line
+
+2014-11-18 14:29 sloot
+
+ * [r17828] src/Frog.cxx: fixed a bug and an oops
+
+2014-11-13 09:56 sloot
+
+ * [r17822] include/frog/mbma_mod.h, src/mbma_mod.cxx: simplified
+ (using smarter addMorphologLayer())
+
+2014-11-11 13:44 sloot
+
+ * [r17809] configure.ac, include/frog/mblem_mod.h,
+ src/mblem_mod.cxx: use smarter addPosAnnotation
+
+2014-10-20 13:24 sloot
+
+ * [r17750] src/Frog.cxx: oeps2
+
+2014-10-20 13:16 sloot
+
+ * [r17749] src/FrogAPI.cxx: oesp
+
+2014-10-20 09:22 mvgompel
+
+ * [r17747] configure.ac: macro fix
+
+2014-10-19 11:50 mvgompel
+
+ * [r17738] configure.ac: added compiler fallback
+
+2014-09-25 16:15 sloot
+
+ * [r17715] src/FrogAPI.cxx: aiaiaiaiaia
+
+2014-09-25 16:04 sloot
+
+ * [r17714] include/frog/Parser.h, src/FrogAPI.cxx, src/Parser.cxx,
+ src/iob_tagger_mod.cxx: fixed FoLiA input for IOB and Parser too
+
+2014-09-25 15:13 sloot
+
+ * [r17713] include/frog/mblem_mod.h, include/frog/mbma_mod.h,
+ src/FrogAPI.cxx, src/mbma_mod.cxx: mblem en mbla now also work on
+ FoLiA with existing lemma's and
+ morphemes (in a different set, of course)
+
+2014-09-25 13:09 sloot
+
+ * [r17712] src/mblem_mod.cxx: catch trouble early
+
+2014-09-23 13:27 sloot
+
+ * [r17708] src/FrogAPI.cxx, src/mblem_mod.cxx, src/mbma_mod.cxx:
+ last minute fix and layout improvements
+
+2014-09-23 09:17 sloot
+
+ * [r17705] NEWS, configure.ac: bumping
+
+2014-09-18 16:02 sloot
+
+ * [r17683] src/FrogAPI.cxx: signal problems with num_threads
+
+2014-09-16 14:28 sloot
+
+ * [r17673] NEWS: updated NEWS
+
+2014-09-16 12:32 sloot
+
+ * [r17672] include/frog/Frog.h, include/frog/FrogAPI.h,
+ src/Frog.cxx, src/FrogAPI.cxx: moved the default configfile name
+ to the API
+
+2014-09-16 10:51 sloot
+
+ * [r17671] include/frog/FrogAPI.h, src/Frog.cxx, src/FrogAPI.cxx:
+ refactoring and renaming!
+
+2014-09-16 10:27 sloot
+
+ * [r17670] include/frog/FrogAPI.h, src/FrogAPI.cxx: refactoring the
+ API. step 2
+
+2014-09-16 09:04 sloot
+
+ * [r17668] src/FrogAPI.cxx: small refactoring. more to come...
+
+2014-09-15 15:58 sloot
+
+ * [r17665] include/frog/FrogAPI.h, src/FrogAPI.cxx,
+ src/mblem_prog.cxx, src/mbma_prog.cxx: svn tags
+
+2014-09-15 15:56 sloot
+
+ * [r17664] src/FrogAPI.cxx: Copyright text added
+
+2014-09-15 15:37 sloot
+
+ * [r17663] include/frog/FrogAPI.h, src/FrogAPI.cxx: attempt to help
+ Maarten
+
+2014-09-15 14:59 sloot
+
+ * [r17661] include/frog/ucto_tokenizer_mod.h, src/FrogAPI.cxx,
+ src/ucto_tokenizer_mod.cxx: cleaner?
+
+2014-09-15 14:53 sloot
+
+ * [r17660] src/FrogAPI.cxx: leak
+
+2014-09-15 14:50 sloot
+
+ * [r17659] include/frog/FrogAPI.h, src/FrogAPI.cxx: hacking along
+
+2014-09-15 13:59 sloot
+
+ * [r17658] src/Frog-util.cxx, src/mblem_prog.cxx, src/mbma_mod.cxx,
+ src/mbma_prog.cxx: refactoring and cleaning up
+
+2014-09-15 13:31 sloot
+
+ * [r17657] include/frog/Frog.h: comment changed
+
+2014-09-15 13:31 sloot
+
+ * [r17656] src/mblem_prog.cxx: modernized
+
+2014-09-15 13:03 sloot
+
+ * [r17655] include/frog/FrogAPI.h,
+ include/frog/ucto_tokenizer_mod.h, src/Frog.cxx, src/FrogAPI.cxx,
+ src/mblem_prog.cxx, src/mbma_prog.cxx,
+ src/ucto_tokenizer_mod.cxx: - refactoring the API.
+ - ucto_tokenizer has the same init() as all other modules now.
+
+2014-09-15 10:36 sloot
+
+ * [r17653] src/Frog.cxx, src/FrogAPI.cxx: better
+
+2014-09-15 10:23 sloot
+
+ * [r17652] include/frog/FrogAPI.h, src/Frog.cxx, src/FrogAPI.cxx:
+ refactoring the API. Not done yet
+
+2014-09-13 20:18 mvgompel
+
+ * [r17650] src/FrogAPI.cxx, src/cgn_tagger_mod.cxx,
+ src/ner_tagger_mod.cxx: debug value wasn't always initialised to
+ 0 prior to processing config
+
+2014-09-13 20:00 mvgompel
+
+ * [r17649] include/frog/FrogAPI.h, src/FrogAPI.cxx: moved "frogging
+ in total took" inside !hidetimer block.. (for some mysterious
+ reason something stumbles over it).. Also renamed string Test to
+ Testtostring()
+
+2014-09-13 18:13 mvgompel
+
+ * [r17647] include/frog/FrogAPI.h,
+ include/frog/ucto_tokenizer_mod.h, src/FrogAPI.cxx,
+ src/ucto_tokenizer_mod.cxx: updates needed for python-frog
+ binding
+
+2014-09-12 11:47 mvgompel
+
+ * [r17646] src/ucto_tokenizer_mod.cxx: typo
+
+2014-09-09 15:34 sloot
+
+ * [r17634] src/Frog.cxx: modernized commandline handling
+
+2014-09-09 14:46 sloot
+
+ * [r17633] src/FrogAPI.cxx: added line that was lost somewhere in
+ Maartens refactoring process
+
+2014-09-09 14:05 sloot
+
+ * [r17630] include/frog/mbma_mod.h, src/mbma_mod.cxx: fixed a
+ memory leak.
+
+2014-09-08 14:24 sloot
+
+ * [r17627] include/frog/FrogAPI.h, src/Frog.cxx, src/FrogAPI.cxx,
+ src/mblem_mod.cxx: more cleanup, refactoring and a potential
+ memory leak
+
+2014-09-08 13:48 sloot
+
+ * [r17626] include/frog/FrogAPI.h, src/Frog.cxx, src/FrogAPI.cxx:
+ some refactoring: avoid copies of configuration, pass more consts
+ (and references)
+
+2014-09-08 12:17 sloot
+
+ * [r17625] include/frog/FrogAPI.h, src/FrogAPI.cxx: moved 'real'
+ code from .h to .cxx file
+
+2014-09-08 11:14 mvgompel
+
+ * [r17623] include/frog/FrogAPI.h, src/FrogAPI.cxx: Added a Test()
+ variant that returns a string (to be used for the python binding)
+
+2014-09-08 09:30 mvgompel
+
+ * [r17619] include/frog/ucto_tokenizer_mod.h,
+ src/ucto_tokenizer_mod.cxx: added tokenizestring() method
+
+2014-09-07 19:24 mvgompel
+
+ * [r17616] include/frog/Frog.h, include/frog/FrogAPI.h,
+ include/frog/Makefile.am, include/frog/Parser.h,
+ include/frog/cgn_tagger_mod.h, include/frog/iob_tagger_mod.h,
+ include/frog/mblem_mod.h, include/frog/mbma_mod.h,
+ include/frog/mwu_chunker_mod.h, include/frog/ner_tagger_mod.h,
+ include/frog/ucto_tokenizer_mod.h, src/Frog-util.cxx,
+ src/Frog.cxx, src/FrogAPI.cxx, src/Makefile.am, src/Parser.cxx,
+ src/cgn_tagger_mod.cxx, src/iob_tagger_mod.cxx,
+ src/mblem_mod.cxx, src/mblem_prog.cxx, src/mbma_mod.cxx,
+ src/mbma_prog.cxx, src/mwu_chunker_mod.cxx,
+ src/ner_tagger_mod.cxx, src/ucto_tokenizer_mod.cxx: Major Frog
+ refactoring: Added FrogAPI, exposing a Frog API for public use.
+ Frog.cxx is sized down and uses FrogAPI. Also weeded out most of
+ the global variables (=evil) and increased modularity. Everything
+ seems to work, but more extensive testing is recommended.
+
+2014-09-07 12:49 mvgompel
+
+ * [r17614] include/frog/Frog.h, src/Frog.cxx: fix
+
+2014-09-07 11:28 mvgompel
+
+ * [r17613] include/frog/Frog.h, src/Frog.cxx: Refactoring Frog
+ interface so I can use it as an external library later: added
+ declarations to Frog.h, introduced FrogOptions class to hold the
+ various options (rather than global variables)
+
+2014-08-25 09:48 sloot
+
+ * [r17552] configure.ac, src/Frog.cxx: use ticcutils 0,6
+
+2014-08-21 15:25 sloot
+
+ * [r17545] src/Frog.cxx: mark options with optional arguments.
+
+2014-08-18 13:08 sloot
+
+ * [r17527] src/Frog.cxx: Include Python.h first!
+ see: https://docs.python.org/2/c-api/intro.html#includes
+ "Note
+
+ Since Python may define some pre-processor definitions which
+ affect the standard headers on some systems, you must include
+ Python.h before any standard headers are included"
+
+2014-08-14 13:48 sloot
+
+ * [r17519] src/Frog.cxx: next attempt :{
+
+2014-08-14 13:42 sloot
+
+ * [r17518] src/Frog.cxx: attempt to handle missing readline() stuff
+ better.
+
+2014-08-07 08:42 sloot
+
+ * [r17489] src/Parser.cxx: fixed xml:id for dependencies
+
+2014-08-06 14:55 sloot
+
+ * [r17487] src/iob_tagger_mod.cxx: added xml:id to Chunks too
+
+2014-08-06 14:47 sloot
+
+ * [r17486] src/mwu_chunker_mod.cxx, src/ner_tagger_mod.cxx: added
+ xml:id to Enteties and Entity
+
+2014-08-06 14:10 sloot
+
+ * [r17485] src/cgn_tagger_mod.cxx: map EMOTICONS ro SPEC(symb)
+
+2014-08-05 09:55 sloot
+
+ * [r17474] configure.ac: make sure libxml2 is there
+
+2014-07-16 15:29 sloot
+
+ * [r17460] src/mbma_mod.cxx: added debug lines to examine "eer" and
+ "ere" exceptions
+
+2014-07-16 15:28 sloot
+
+ * [r17459] src/Frog.cxx: better handling of -d and --debug=
+ remove last \n on interactive use
+
+2014-07-10 12:11 sloot
+
+ * [r17440] include/frog/mbma_mod.h, src/mbma_mod.cxx: don't apply
+ rules that don't apply :)
+
+2014-07-09 15:51 sloot
+
+ * [r17439] include/frog/mbma_mod.h, src/mbma_mod.cxx: some more
+ refactoring
+
+2014-07-09 15:22 sloot
+
+ * [r17438] include/frog/mbma_mod.h, src/mbma_mod.cxx: made
+ debugging easier
+
+2014-07-09 14:19 sloot
+
+ * [r17437] src/Frog.cxx: added an commandline option to switch
+ debugging per module on/off
+
+2014-07-09 13:13 sloot
+
+ * [r17436] include/frog/Frog.h, src/Makefile.am,
+ src/cgn_tagger_mod.cxx, src/iob_tagger_mod.cxx,
+ src/mblem_mod.cxx, src/mblem_prog.cxx, src/mbma_mod.cxx,
+ src/mbma_prog.cxx, src/mwu_chunker_mod.cxx,
+ src/ner_tagger_mod.cxx, src/ucto_tokenizer_mod.cxx: some
+ refactoring to get rid of some global symbols.
+ debug level set using the configurartin file now.
+ compile with -std=c++0x. Supports 'long long'.
+
+2014-07-07 10:56 sloot
+
+ * [r17425] src/mbma_mod.cxx: fixed 2 more daring problems
+
+2014-07-07 09:20 sloot
+
+ * [r17424] src/mbma_mod.cxx: fixed problem with daring bracketing.
+ still shaky
+
+2014-07-03 16:04 sloot
+
+ * [r17421] src/mbma_mod.cxx: oeps. debugging was on
+
+2014-07-03 16:02 sloot
+
+ * [r17420] include/frog/mbma_mod.h, src/mbma_mod.cxx: avoid empty
+ brackets (as creacted by 'pseudo-regiseur')
+
+2014-07-03 15:19 sloot
+
+ * [r17419] src/Frog.cxx: pass debug option to the gobal config
+
+2014-06-24 08:32 sloot
+
+ * [r17379] src/Frog.cxx, src/mblem_mod.cxx: fixed LIBREADLINE
+ include problem.
+ Python.h sets HAVE_LIBREADLINE unconditionally
+
+2014-06-19 09:59 sloot
+
+ * [r17366] src/mbma_mod.cxx: small hack to satisfy the compiler
+
+2014-06-17 15:19 sloot
+
+ * [r17358] src/Frog.cxx, src/iob_tagger_mod.cxx: produce XML even
+ on empty file
+
+2014-06-11 14:01 sloot
+
+ * [r17334] src/tst.sh: re-re-refiexed make distcheck
+
+2014-06-11 13:53 sloot
+
+ * [r17333] NEWS, src/Frog.cxx: fixed interactive mode
+ NEWS!
+
2014-06-04 08:46 sloot
* [r17286] src/mbma_mod.cxx: small fix for 'ge' prefix
diff --git a/NEWS b/NEWS
index 2eefe8d..8b2214d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,13 +1,22 @@
-0.12.18 - 2014-06
+0.12.19 - 2014-09-23
+[Ko van der Sloot]
+ * release
+
+0.12.18 - 2014-09-16
+ * A true FrogAPI is added
+ * depends on ticcutils 0.6 or above (for CommandLine mostly)
* a lot of changes in the MBMA module. It now can produce nested morphemes
using the --daring option of frog. Still experimental!
* Frog can now run interactive with readline support too.
* -t option is optional, multiple inputfiles are supported
* -o works for multiple files
+ * -d works better now (--debug even better)
+ * added xml:id to Entities and Chunks
+ * a lot off small bug fixes
0.12.17 - 2013-04-03
* the servermode now kan handle multiline input (non XML only).
- Can be switched of with the -n option.
+ Can be switched off with the -n option.
* A lot of refactoring regarding FoLiA stuff
* start using ticcutils
* the -Q option now works
diff --git a/configure b/configure
index 63591cd..5991827 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for frog 0.12.18.
+# Generated by GNU Autoconf 2.69 for frog 0.12.19.
#
# Report bugs to <timbl at uvt.nl>.
#
@@ -589,8 +589,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='frog'
PACKAGE_TARNAME='frog'
-PACKAGE_VERSION='0.12.18'
-PACKAGE_STRING='frog 0.12.18'
+PACKAGE_VERSION='0.12.19'
+PACKAGE_STRING='frog 0.12.19'
PACKAGE_BUGREPORT='timbl at uvt.nl'
PACKAGE_URL=''
@@ -1393,7 +1393,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures frog 0.12.18 to adapt to many kinds of systems.
+\`configure' configures frog 0.12.19 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1463,7 +1463,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of frog 0.12.18:";;
+ short | recursive ) echo "Configuration of frog 0.12.19:";;
esac
cat <<\_ACEOF
@@ -1486,19 +1486,19 @@ Optional Features:
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
- --with-pic try to use only PIC/non-PIC objects [default=use
+ --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use
both]
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
--with-sysroot=DIR Search for dependent libraries within DIR
(or the compiler's sysroot if not specified).
- --with-icu=DIR use ICU installed in <DIR>
+ --with-icu=DIR use ICU installed in <DIR>
--with-timbl=DIR use timbl installed in <DIR>
--with-ticcutils=DIR use ticcutils installed in <DIR>
--with-timblserver=DIR use timblserver installed in <DIR>
--with-mbt=DIR use mbt installed in <DIR>
--with-folia=DIR use libfolia installed in <DIR>
--with-ucto=DIR use ucto installed in <DIR>
- --with-python=DIR use python executable installed in <DIR>
+ --with-python=FILE use python executable <FILE>
Some influential environment variables:
CXX C++ compiler command
@@ -1609,7 +1609,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-frog configure 0.12.18
+frog configure 0.12.19
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2262,7 +2262,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by frog $as_me 0.12.18, which was
+It was created by frog $as_me 0.12.19, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -3125,7 +3125,7 @@ fi
# Define the identity of the package.
PACKAGE='frog'
- VERSION='0.12.18'
+ VERSION='0.12.19'
cat >>confdefs.h <<_ACEOF
@@ -3240,7 +3240,7 @@ if test -z "$CXX"; then
CXX=$CCC
else
if test -n "$ac_tool_prefix"; then
- for ac_prog in g++
+ for ac_prog in g++ c++
do
# Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
@@ -3284,7 +3284,7 @@ fi
fi
if test -z "$CXX"; then
ac_ct_CXX=$CXX
- for ac_prog in g++
+ for ac_prog in g++ c++
do
# Extract the first word of "$ac_prog", so it can be a program name with args.
set dummy $ac_prog; ac_word=$2
@@ -5514,6 +5514,11 @@ else
lt_cv_sys_max_cmd_len=196608
;;
+ os2*)
+ # The test takes a long time on OS/2.
+ lt_cv_sys_max_cmd_len=8192
+ ;;
+
osf*)
# Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
# due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
@@ -5540,7 +5545,8 @@ else
;;
*)
lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
- if test -n "$lt_cv_sys_max_cmd_len"; then
+ if test -n "$lt_cv_sys_max_cmd_len" && \
+ test undefined != "$lt_cv_sys_max_cmd_len"; then
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
else
@@ -5553,7 +5559,7 @@ else
# If test is not a shell built-in, we'll probably end up computing a
# maximum length that is only half of the actual maximum length, but
# we can't tell.
- while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \
+ while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
= "X$teststring$teststring"; } >/dev/null 2>&1 &&
test $i != 17 # 1/2 MB should be enough
do
@@ -5941,10 +5947,6 @@ freebsd* | dragonfly*)
fi
;;
-gnu*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
haiku*)
lt_cv_deplibs_check_method=pass_all
;;
@@ -5982,8 +5984,8 @@ irix5* | irix6* | nonstopux*)
lt_cv_deplibs_check_method=pass_all
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
lt_cv_deplibs_check_method=pass_all
;;
@@ -6623,13 +6625,13 @@ old_postuninstall_cmds=
if test -n "$RANLIB"; then
case $host_os in
openbsd*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
;;
*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
;;
esac
- old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+ old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
fi
case $host_os in
@@ -6776,6 +6778,7 @@ for ac_symprfx in "" "_"; do
# which start with @ or ?.
lt_cv_sys_global_symbol_pipe="$AWK '"\
" {last_section=section; section=\$ 3};"\
+" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
" \$ 0!~/External *\|/{next};"\
" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
@@ -7064,7 +7067,7 @@ ia64-*-hpux*)
rm -rf conftest*
;;
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
@@ -7080,9 +7083,19 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
LD="${LD-ld} -m elf_i386_fbsd"
;;
x86_64-*linux*)
- LD="${LD-ld} -m elf_i386"
+ case `/usr/bin/file conftest.o` in
+ *x86-64*)
+ LD="${LD-ld} -m elf32_x86_64"
+ ;;
+ *)
+ LD="${LD-ld} -m elf_i386"
+ ;;
+ esac
+ ;;
+ powerpc64le-*)
+ LD="${LD-ld} -m elf32lppclinux"
;;
- ppc64-*linux*|powerpc64-*linux*)
+ powerpc64-*)
LD="${LD-ld} -m elf32ppclinux"
;;
s390x-*linux*)
@@ -7101,7 +7114,10 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
x86_64-*linux*)
LD="${LD-ld} -m elf_x86_64"
;;
- ppc*-*linux*|powerpc*-*linux*)
+ powerpcle-*)
+ LD="${LD-ld} -m elf64lppc"
+ ;;
+ powerpc-*)
LD="${LD-ld} -m elf64ppc"
;;
s390*-*linux*|s390*-*tpf*)
@@ -7164,7 +7180,7 @@ $as_echo "$lt_cv_cc_needs_belf" >&6; }
CFLAGS="$SAVE_CFLAGS"
fi
;;
-sparc*-*solaris*)
+*-*solaris*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
@@ -7175,7 +7191,20 @@ sparc*-*solaris*)
case `/usr/bin/file conftest.o` in
*64-bit*)
case $lt_cv_prog_gnu_ld in
- yes*) LD="${LD-ld} -m elf64_sparc" ;;
+ yes*)
+ case $host in
+ i?86-*-solaris*)
+ LD="${LD-ld} -m elf_x86_64"
+ ;;
+ sparc*-*-solaris*)
+ LD="${LD-ld} -m elf64_sparc"
+ ;;
+ esac
+ # GNU ld 2.21 introduced _sol2 emulations. Use them if available.
+ if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+ LD="${LD-ld}_sol2"
+ fi
+ ;;
*)
if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
LD="${LD-ld} -64"
@@ -7815,7 +7844,13 @@ else
$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
-dynamiclib -Wl,-single_module conftest.c 2>conftest.err
_lt_result=$?
- if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then
+ # If there is a non-empty error log, and "single_module"
+ # appears in it, assume the flag caused a linker warning
+ if test -s conftest.err && $GREP single_module conftest.err; then
+ cat conftest.err >&5
+ # Otherwise, if the output was created with a 0 exit code from
+ # the compiler, it worked.
+ elif test -f libconftest.dylib && test $_lt_result -eq 0; then
lt_cv_apple_cc_single_mod=yes
else
cat conftest.err >&5
@@ -7826,6 +7861,7 @@ else
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5
$as_echo "$lt_cv_apple_cc_single_mod" >&6; }
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
if ${lt_cv_ld_exported_symbols_list+:} false; then :
@@ -7858,6 +7894,7 @@ rm -f core conftest.err conftest.$ac_objext \
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5
$as_echo "$lt_cv_ld_exported_symbols_list" >&6; }
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
$as_echo_n "checking for -force_load linker flag... " >&6; }
if ${lt_cv_ld_force_load+:} false; then :
@@ -7879,7 +7916,9 @@ _LT_EOF
echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5
$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
_lt_result=$?
- if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then
+ if test -s conftest.err && $GREP force_load conftest.err; then
+ cat conftest.err >&5
+ elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
lt_cv_ld_force_load=yes
else
cat conftest.err >&5
@@ -8295,7 +8334,22 @@ fi
# Check whether --with-pic was given.
if test "${with_pic+set}" = set; then :
- withval=$with_pic; pic_mode="$withval"
+ withval=$with_pic; lt_p=${PACKAGE-default}
+ case $withval in
+ yes|no) pic_mode=$withval ;;
+ *)
+ pic_mode=default
+ # Look at the argument we got. We use all the common list separators.
+ lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+ for lt_pkg in $withval; do
+ IFS="$lt_save_ifs"
+ if test "X$lt_pkg" = "X$lt_p"; then
+ pic_mode=yes
+ fi
+ done
+ IFS="$lt_save_ifs"
+ ;;
+ esac
else
pic_mode=default
fi
@@ -8373,6 +8427,10 @@ LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+
+
+
+
test -z "$LN_S" && LN_S="ln -s"
@@ -8832,7 +8890,9 @@ lt_prog_compiler_static=
case $cc_basename in
nvcc*) # Cuda Compiler Driver 2.2
lt_prog_compiler_wl='-Xlinker '
- lt_prog_compiler_pic='-Xcompiler -fPIC'
+ if test -n "$lt_prog_compiler_pic"; then
+ lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic"
+ fi
;;
esac
else
@@ -8876,7 +8936,7 @@ lt_prog_compiler_static=
lt_prog_compiler_static='-non_shared'
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
# old Intel for x86_64 which still supported -KPIC.
ecc*)
@@ -8923,18 +8983,33 @@ lt_prog_compiler_static=
;;
*)
case `$CC -V 2>&1 | sed 5q` in
- *Sun\ F* | *Sun*Fortran*)
+ *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*)
# Sun Fortran 8.3 passes all unrecognized flags to the linker
lt_prog_compiler_pic='-KPIC'
lt_prog_compiler_static='-Bstatic'
lt_prog_compiler_wl=''
;;
+ *Sun\ F* | *Sun*Fortran*)
+ lt_prog_compiler_pic='-KPIC'
+ lt_prog_compiler_static='-Bstatic'
+ lt_prog_compiler_wl='-Qoption ld '
+ ;;
*Sun\ C*)
# Sun C 5.9
lt_prog_compiler_pic='-KPIC'
lt_prog_compiler_static='-Bstatic'
lt_prog_compiler_wl='-Wl,'
;;
+ *Intel*\ [CF]*Compiler*)
+ lt_prog_compiler_wl='-Wl,'
+ lt_prog_compiler_pic='-fPIC'
+ lt_prog_compiler_static='-static'
+ ;;
+ *Portland\ Group*)
+ lt_prog_compiler_wl='-Wl,'
+ lt_prog_compiler_pic='-fpic'
+ lt_prog_compiler_static='-Bstatic'
+ ;;
esac
;;
esac
@@ -9296,7 +9371,6 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
hardcode_direct=no
hardcode_direct_absolute=no
hardcode_libdir_flag_spec=
- hardcode_libdir_flag_spec_ld=
hardcode_libdir_separator=
hardcode_minus_L=no
hardcode_shlibpath_var=unsupported
@@ -9549,8 +9623,7 @@ _LT_EOF
xlf* | bgf* | bgxlf* | mpixlf*)
# IBM XL Fortran 10.1 on PPC cannot create shared libs itself
whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive'
- hardcode_libdir_flag_spec=
- hardcode_libdir_flag_spec_ld='-rpath $libdir'
+ hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
if test "x$supports_anon_versioning" = xyes; then
archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
@@ -9930,6 +10003,7 @@ fi
# The linker will not automatically build a static lib if we build a DLL.
# _LT_TAGVAR(old_archive_from_new_cmds, )='true'
enable_shared_with_static_runtimes=yes
+ exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
# Don't use ranlib
old_postinstall_cmds='chmod 644 $oldlib'
@@ -9975,6 +10049,7 @@ fi
hardcode_shlibpath_var=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
else
whole_archive_flag_spec=''
fi
@@ -10003,10 +10078,6 @@ fi
hardcode_shlibpath_var=no
;;
- freebsd1*)
- ld_shlibs=no
- ;;
-
# FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
# support. Future versions do this automatically, but an explicit c++rt0.o
# does not break anything, and helps significantly (at the cost of a little
@@ -10019,7 +10090,7 @@ fi
;;
# Unfortunately, older versions of FreeBSD 2 do not have this feature.
- freebsd2*)
+ freebsd2.*)
archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
hardcode_direct=yes
hardcode_minus_L=yes
@@ -10058,7 +10129,6 @@ fi
fi
if test "$with_gnu_ld" = no; then
hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
- hardcode_libdir_flag_spec_ld='+b $libdir'
hardcode_libdir_separator=:
hardcode_direct=yes
hardcode_direct_absolute=yes
@@ -10682,11 +10752,6 @@ esac
-
-
-
-
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
$as_echo_n "checking dynamic linker characteristics... " >&6; }
@@ -10776,7 +10841,7 @@ need_version=unknown
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -10785,7 +10850,7 @@ aix3*)
;;
aix[4-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -10850,7 +10915,7 @@ beos*)
;;
bsdi[45]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -10989,7 +11054,7 @@ darwin* | rhapsody*)
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -10997,10 +11062,6 @@ dgux*)
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -11008,7 +11069,7 @@ freebsd* | dragonfly*)
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[123]*) objformat=aout ;;
+ freebsd[23].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -11026,7 +11087,7 @@ freebsd* | dragonfly*)
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[01]* | freebsdelf3.[01]*)
@@ -11045,19 +11106,8 @@ freebsd* | dragonfly*)
esac
;;
-gnu*)
- version_type=linux
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -11118,7 +11168,7 @@ hpux9* | hpux10* | hpux11*)
;;
interix[3-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -11134,7 +11184,7 @@ irix5* | irix6* | nonstopux*)
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -11171,9 +11221,9 @@ linux*oldld* | linux*aout* | linux*coff*)
dynamic_linker=no
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11267,7 +11317,7 @@ netbsd*)
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -11336,7 +11386,7 @@ rdos*)
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11361,7 +11411,7 @@ sunos4*)
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -11385,7 +11435,7 @@ sysv4 | sysv4.3*)
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -11416,7 +11466,7 @@ sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -11426,7 +11476,7 @@ tpf*)
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -12351,7 +12401,6 @@ export_dynamic_flag_spec_CXX=
hardcode_direct_CXX=no
hardcode_direct_absolute_CXX=no
hardcode_libdir_flag_spec_CXX=
-hardcode_libdir_flag_spec_ld_CXX=
hardcode_libdir_separator_CXX=
hardcode_minus_L_CXX=no
hardcode_shlibpath_var_CXX=unsupported
@@ -12935,6 +12984,7 @@ fi
hardcode_shlibpath_var_CXX=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
whole_archive_flag_spec_CXX='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+
else
whole_archive_flag_spec_CXX=''
fi
@@ -12979,7 +13029,7 @@ fi
esac
;;
- freebsd[12]*)
+ freebsd2.*)
# C++ shared libraries reported to be fairly broken before
# switch to ELF
ld_shlibs_CXX=no
@@ -12995,9 +13045,6 @@ fi
ld_shlibs_CXX=yes
;;
- gnu*)
- ;;
-
haiku*)
archive_cmds_CXX='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
link_all_deplibs_CXX=yes
@@ -13159,7 +13206,7 @@ fi
inherit_rpath_CXX=yes
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
KCC*)
# Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -13655,6 +13702,7 @@ _lt_libdeps_save_CFLAGS=$CFLAGS
case "$CC $CFLAGS " in #(
*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
esac
if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
@@ -14018,7 +14066,7 @@ lt_prog_compiler_static_CXX=
;;
esac
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
KCC*)
# KAI C++ Compiler
@@ -14444,7 +14492,9 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
;;
cygwin* | mingw* | cegcc*)
case $cc_basename in
- cl*) ;;
+ cl*)
+ exclude_expsyms_CXX='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+ ;;
*)
export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols'
exclude_expsyms_CXX='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
@@ -14600,8 +14650,6 @@ esac
-
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5
$as_echo_n "checking dynamic linker characteristics... " >&6; }
@@ -14627,7 +14675,7 @@ need_version=unknown
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -14636,7 +14684,7 @@ aix3*)
;;
aix[4-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -14701,7 +14749,7 @@ beos*)
;;
bsdi[45]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -14838,7 +14886,7 @@ darwin* | rhapsody*)
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -14846,10 +14894,6 @@ dgux*)
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -14857,7 +14901,7 @@ freebsd* | dragonfly*)
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[123]*) objformat=aout ;;
+ freebsd[23].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -14875,7 +14919,7 @@ freebsd* | dragonfly*)
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[01]* | freebsdelf3.[01]*)
@@ -14894,19 +14938,8 @@ freebsd* | dragonfly*)
esac
;;
-gnu*)
- version_type=linux
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -14967,7 +15000,7 @@ hpux9* | hpux10* | hpux11*)
;;
interix[3-9]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -14983,7 +15016,7 @@ irix5* | irix6* | nonstopux*)
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -15020,9 +15053,9 @@ linux*oldld* | linux*aout* | linux*coff*)
dynamic_linker=no
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -15116,7 +15149,7 @@ netbsd*)
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -15185,7 +15218,7 @@ rdos*)
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -15210,7 +15243,7 @@ sunos4*)
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -15234,7 +15267,7 @@ sysv4 | sysv4.3*)
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -15265,7 +15298,7 @@ sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -15275,7 +15308,7 @@ tpf*)
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -15414,6 +15447,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
ac_config_commands="$ac_config_commands libtool"
@@ -16849,16 +16884,16 @@ done
#check for needed dependencies
useICU=1;
-# inspired by feh-1.3.4/configure.ac. Tnx Tom Gilbert and feh hackers.
+# inspired by feh-1.3.4/configure.ac. Tnx Tom Gilbert and feh hackers.
# Check whether --with-icu was given.
if test "${with_icu+set}" = set; then :
withval=$with_icu; if test "$with_icu" = "no"; then
- useICU=0
- else
- CXXFLAGS="$CXXFLAGS -I$withval/include"
- LIBS="-L$withval/lib $LIBS"
- fi
+ useICU=0
+ else
+ CXXFLAGS="$CXXFLAGS -I$withval/include"
+ LIBS="-L$withval/lib $LIBS"
+ fi
fi
@@ -16982,11 +17017,11 @@ $as_echo "$ICU_IOLIBS" >&6; }
if test $succeeded = yes; then
CXXFLAGS="$CXXFLAGS $ICU_CPPSEARCHPATH"
- LIBS="$ICU_LIBPATH $ICU_LIBS $ICU_IOLIBS $LIBS"
+ LIBS="$ICU_LIBPATH $ICU_LIBS $ICU_IOLIBS $LIBS"
else
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "\"No ICU development environment found. Please check if libicu-dev or the like is installed\"
+as_fn_error $? "\"No ICU development environment found. Please check if libicu-ev or the like is installed\"
See \`config.log' for more details" "$LINENO" 5; }
fi
@@ -16994,7 +17029,7 @@ See \`config.log' for more details" "$LINENO" 5; }
$as_echo "#define HAVE_ICU 1" >>confdefs.h
else
- as_fn_error $? "\"ICU support is required\"" "$LINENO" 5
+ as_fn_error $? "\"ICU support is required\"" "$LINENO" 5
fi
@@ -17116,12 +17151,12 @@ if test -n "$ticcutils_CFLAGS"; then
pkg_cv_ticcutils_CFLAGS="$ticcutils_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ticcutils >= 0.5 \""; } >&5
- ($PKG_CONFIG --exists --print-errors "ticcutils >= 0.5 ") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ticcutils >= 0.6 \""; } >&5
+ ($PKG_CONFIG --exists --print-errors "ticcutils >= 0.6 ") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_ticcutils_CFLAGS=`$PKG_CONFIG --cflags "ticcutils >= 0.5 " 2>/dev/null`
+ pkg_cv_ticcutils_CFLAGS=`$PKG_CONFIG --cflags "ticcutils >= 0.6 " 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -17133,12 +17168,12 @@ if test -n "$ticcutils_LIBS"; then
pkg_cv_ticcutils_LIBS="$ticcutils_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ticcutils >= 0.5 \""; } >&5
- ($PKG_CONFIG --exists --print-errors "ticcutils >= 0.5 ") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"ticcutils >= 0.6 \""; } >&5
+ ($PKG_CONFIG --exists --print-errors "ticcutils >= 0.6 ") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_ticcutils_LIBS=`$PKG_CONFIG --libs "ticcutils >= 0.5 " 2>/dev/null`
+ pkg_cv_ticcutils_LIBS=`$PKG_CONFIG --libs "ticcutils >= 0.6 " 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -17159,14 +17194,14 @@ else
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- ticcutils_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ticcutils >= 0.5 " 2>&1`
+ ticcutils_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ticcutils >= 0.6 " 2>&1`
else
- ticcutils_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ticcutils >= 0.5 " 2>&1`
+ ticcutils_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ticcutils >= 0.6 " 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$ticcutils_PKG_ERRORS" >&5
- as_fn_error $? "Package requirements (ticcutils >= 0.5 ) were not met:
+ as_fn_error $? "Package requirements (ticcutils >= 0.6 ) were not met:
$ticcutils_PKG_ERRORS
@@ -17424,12 +17459,12 @@ if test -n "$folia_CFLAGS"; then
pkg_cv_folia_CFLAGS="$folia_CFLAGS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"folia >= 0.11\""; } >&5
- ($PKG_CONFIG --exists --print-errors "folia >= 0.11") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"folia >= 0.13\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "folia >= 0.13") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_folia_CFLAGS=`$PKG_CONFIG --cflags "folia >= 0.11" 2>/dev/null`
+ pkg_cv_folia_CFLAGS=`$PKG_CONFIG --cflags "folia >= 0.13" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -17441,12 +17476,12 @@ if test -n "$folia_LIBS"; then
pkg_cv_folia_LIBS="$folia_LIBS"
elif test -n "$PKG_CONFIG"; then
if test -n "$PKG_CONFIG" && \
- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"folia >= 0.11\""; } >&5
- ($PKG_CONFIG --exists --print-errors "folia >= 0.11") 2>&5
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"folia >= 0.13\""; } >&5
+ ($PKG_CONFIG --exists --print-errors "folia >= 0.13") 2>&5
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; then
- pkg_cv_folia_LIBS=`$PKG_CONFIG --libs "folia >= 0.11" 2>/dev/null`
+ pkg_cv_folia_LIBS=`$PKG_CONFIG --libs "folia >= 0.13" 2>/dev/null`
test "x$?" != "x0" && pkg_failed=yes
else
pkg_failed=yes
@@ -17467,14 +17502,14 @@ else
_pkg_short_errors_supported=no
fi
if test $_pkg_short_errors_supported = yes; then
- folia_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "folia >= 0.11" 2>&1`
+ folia_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "folia >= 0.13" 2>&1`
else
- folia_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "folia >= 0.11" 2>&1`
+ folia_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "folia >= 0.13" 2>&1`
fi
# Put the nasty error message in config.log where it belongs
echo "$folia_PKG_ERRORS" >&5
- as_fn_error $? "Package requirements (folia >= 0.11) were not met:
+ as_fn_error $? "Package requirements (folia >= 0.13) were not met:
$folia_PKG_ERRORS
@@ -17619,7 +17654,7 @@ save_FLAGS="$CXXFLAGS"
# Check whether --with-python was given.
if test "${with_python+set}" = set; then :
- withval=$with_python; PYTHON="$withval/python"
+ withval=$with_python; PYTHON="$withval"
else
PYTHON="python"
fi
@@ -18802,7 +18837,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by frog $as_me 0.12.18, which was
+This file was extended by frog $as_me 0.12.19, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -18868,7 +18903,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-frog config.status 0.12.18
+frog config.status 0.12.19
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -19005,6 +19040,7 @@ pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`'
SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`'
ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`'
+PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`'
host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`'
host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`'
host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`'
@@ -19087,7 +19123,6 @@ with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`'
allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`'
no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec_ld='`$ECHO "$hardcode_libdir_flag_spec_ld" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`'
hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`'
hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`'
@@ -19159,7 +19194,6 @@ with_gnu_ld_CXX='`$ECHO "$with_gnu_ld_CXX" | $SED "$delay_single_quote_subst"`'
allow_undefined_flag_CXX='`$ECHO "$allow_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
no_undefined_flag_CXX='`$ECHO "$no_undefined_flag_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_flag_spec_CXX='`$ECHO "$hardcode_libdir_flag_spec_CXX" | $SED "$delay_single_quote_subst"`'
-hardcode_libdir_flag_spec_ld_CXX='`$ECHO "$hardcode_libdir_flag_spec_ld_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_libdir_separator_CXX='`$ECHO "$hardcode_libdir_separator_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_direct_CXX='`$ECHO "$hardcode_direct_CXX" | $SED "$delay_single_quote_subst"`'
hardcode_direct_absolute_CXX='`$ECHO "$hardcode_direct_absolute_CXX" | $SED "$delay_single_quote_subst"`'
@@ -19198,6 +19232,7 @@ _LTECHO_EOF'
# Quote evaled strings.
for var in SHELL \
ECHO \
+PATH_SEPARATOR \
SED \
GREP \
EGREP \
@@ -19248,7 +19283,6 @@ with_gnu_ld \
allow_undefined_flag \
no_undefined_flag \
hardcode_libdir_flag_spec \
-hardcode_libdir_flag_spec_ld \
hardcode_libdir_separator \
exclude_expsyms \
include_expsyms \
@@ -19282,7 +19316,6 @@ with_gnu_ld_CXX \
allow_undefined_flag_CXX \
no_undefined_flag_CXX \
hardcode_libdir_flag_spec_CXX \
-hardcode_libdir_flag_spec_ld_CXX \
hardcode_libdir_separator_CXX \
exclude_expsyms_CXX \
include_expsyms_CXX \
@@ -20095,8 +20128,8 @@ $as_echo X"$file" |
# NOTE: Changes made to this file will be lost: look at ltmain.sh.
#
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is part of GNU Libtool.
@@ -20150,6 +20183,9 @@ SHELL=$lt_SHELL
# An echo program that protects backslashes.
ECHO=$lt_ECHO
+# The PATH separator for the build system.
+PATH_SEPARATOR=$lt_PATH_SEPARATOR
+
# The host system.
host_alias=$host_alias
host=$host
@@ -20451,10 +20487,6 @@ no_undefined_flag=$lt_no_undefined_flag
# This must work even if \$libdir does not exist
hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
-# If ld is used when linking, flag to hardcode \$libdir into a binary
-# during linking. This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld
-
# Whether we need a single "-rpath" flag with a separated argument.
hardcode_libdir_separator=$lt_hardcode_libdir_separator
@@ -20797,10 +20829,6 @@ no_undefined_flag=$lt_no_undefined_flag_CXX
# This must work even if \$libdir does not exist
hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX
-# If ld is used when linking, flag to hardcode \$libdir into a binary
-# during linking. This must work even if \$libdir does not exist.
-hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld_CXX
-
# Whether we need a single "-rpath" flag with a separated argument.
hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX
diff --git a/configure.ac b/configure.ac
index feaa6b8..ab77737 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,10 +1,10 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
-# $Id: configure.ac 17474 2014-08-05 09:55:58Z sloot $
+# $Id: configure.ac 17877 2014-11-28 13:24:53Z mvgompel $
# $URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/configure.ac $
AC_PREREQ(2.59)
-AC_INIT([frog], [0.12.18], [timbl at uvt.nl])
+AC_INIT([frog], [0.12.19], [timbl at uvt.nl])
AM_INIT_AUTOMAKE
AC_CONFIG_SRCDIR([configure.ac])
AC_CONFIG_MACRO_DIR([m4])
@@ -18,7 +18,7 @@ else
fi
# Checks for programs.
-AC_PROG_CXX( [g++] )
+AC_PROG_CXX( [g++ c++] )
if $cxx_flags_were_set; then
CXXFLAGS=$CXXFLAGS
@@ -80,24 +80,24 @@ AX_LIB_READLINE
#check for needed dependencies
useICU=1;
-# inspired by feh-1.3.4/configure.ac. Tnx Tom Gilbert and feh hackers.
+# inspired by feh-1.3.4/configure.ac. Tnx Tom Gilbert and feh hackers.
AC_ARG_WITH(icu,
- [ --with-icu=DIR use ICU installed in <DIR>],
- [if test "$with_icu" = "no"; then
- useICU=0
- else
- CXXFLAGS="$CXXFLAGS -I$withval/include"
- LIBS="-L$withval/lib $LIBS"
- fi] )
+ [ --with-icu=DIR use ICU installed in <DIR>],
+ [if test "$with_icu" = "no"; then
+ useICU=0
+ else
+ CXXFLAGS="$CXXFLAGS -I$withval/include"
+ LIBS="-L$withval/lib $LIBS"
+ fi] )
if test "$useICU" = "1"; then
- AX_ICU_CHECK( [3.6],
- [CXXFLAGS="$CXXFLAGS $ICU_CPPSEARCHPATH"
- LIBS="$ICU_LIBPATH $ICU_LIBS $ICU_IOLIBS $LIBS"],
- [AC_MSG_FAILURE( "No ICU development environment found. Please check if libicu-dev or the like is installed" )] )
- AC_DEFINE(HAVE_ICU, 1, we want to use ICU )
+ AX_ICU_CHECK( [3.6],
+ [CXXFLAGS="$CXXFLAGS $ICU_CPPSEARCHPATH"
+ LIBS="$ICU_LIBPATH $ICU_LIBS $ICU_IOLIBS $LIBS"],
+ [AC_MSG_FAILURE( "No ICU development environment found. Please check if libicu-ev or the like is installed" )] )
+ AC_DEFINE(HAVE_ICU, 1, we want to use ICU )
else
- AC_MSG_ERROR("ICU support is required")
+ AC_MSG_ERROR("ICU support is required")
fi
AC_ARG_WITH(timbl,
@@ -112,7 +112,7 @@ AC_ARG_WITH(ticcutils,
[ --with-ticcutils=DIR use ticcutils installed in <DIR>],
[PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$withval/lib/pkgconfig"],
[PKG_CONFIG_PATH="$PKG_CONFIG_PATH:$prefix/lib/pkgconfig"])
-PKG_CHECK_MODULES([ticcutils], [ticcutils >= 0.5] )
+PKG_CHECK_MODULES([ticcutils], [ticcutils >= 0.6] )
CXXFLAGS="$CXXFLAGS $ticcutils_CFLAGS"
LIBS="$ticcutils_LIBS $LIBS"
@@ -137,7 +137,7 @@ AC_ARG_WITH(folia,
[PKG_CONFIG_PATH="$withval/lib/pkgconfig:$PKG_CONFIG_PATH"],
[PKG_CONFIG_PATH="$prefix/lib/pkgconfig:$PKG_CONFIG_PATH"])
AC_MSG_NOTICE( [pkg-config search path: $PKG_CONFIG_PATH] )
-PKG_CHECK_MODULES([folia],[folia >= 0.11])
+PKG_CHECK_MODULES([folia],[folia >= 0.13])
CXXFLAGS="$CXXFLAGS $folia_CFLAGS"
LIBS="$folia_LIBS $LIBS"
@@ -155,8 +155,8 @@ save_LIBS="$LIBS"
save_FLAGS="$CXXFLAGS"
AC_ARG_WITH(python,
- [ --with-python=DIR use python executable installed in <DIR>],
- [PYTHON="$withval/python"],
+ [ --with-python=FILE use python executable <FILE>],
+ [PYTHON="$withval"],
[PYTHON="python"])
#we need Python > 2.5.x
diff --git a/include/frog/Frog.h b/include/frog/Frog.h
index 490bc31..680df58 100755
--- a/include/frog/Frog.h
+++ b/include/frog/Frog.h
@@ -1,5 +1,6 @@
+/* ex: set tabstop=8 expandtab: */
/*
- $Id: Frog.h 17436 2014-07-09 13:13:35Z sloot $
+ $Id: Frog.h 17672 2014-09-16 12:32:10Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/Frog.h $
Copyright (c) 2006 - 2014
@@ -36,14 +37,12 @@
#include "ticcutils/Timer.h"
#include "libfolia/document.h"
+
+//declared here and defined in Frog-util.cxx (bit messy)
std::string prefix( const std::string&, const std::string& );
-bool existsDir( const std::string& );
std::string getTime();
-
-void getFileNames( const std::string&, const std::string&,
- std::set<std::string>& );
-
-extern TiCC::LogStream *theErrLog;
+bool existsDir( const std::string& );
+void getFileNames( const std::string&, const std::string&, std::set<std::string>& );
class TimerBlock{
public:
@@ -63,4 +62,5 @@ public:
TiCC::Timer frogTimer;
};
+
#endif
diff --git a/include/frog/FrogAPI.h b/include/frog/FrogAPI.h
new file mode 100644
index 0000000..ddbdb20
--- /dev/null
+++ b/include/frog/FrogAPI.h
@@ -0,0 +1,138 @@
+/* ex: set tabstop=8 expandtab: */
+/*
+ $Id: FrogAPI.h 17672 2014-09-16 12:32:10Z sloot $
+ $URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/FrogAPI.h $
+
+ Copyright (c) 2006 - 2014
+ Tilburg University
+
+ This file is part of frog
+
+ frog is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ frog is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+ For questions and suggestions, see:
+ http://ilk.uvt.nl/software.html
+ or send mail to:
+ timbl at uvt.nl
+*/
+
+
+#ifndef FROGAPI_H
+#define FROGAPI_H
+
+#include "timbl/TimblAPI.h"
+
+#include "frog/Frog.h" //internals
+#include "ticcutils/Configuration.h"
+#include "timblserver/FdStream.h"
+#include "timblserver/ServerBase.h"
+
+#include "frog/ucto_tokenizer_mod.h"
+#include "frog/mbma_mod.h"
+#include "frog/mblem_mod.h"
+#include "frog/mwu_chunker_mod.h"
+#include "frog/cgn_tagger_mod.h"
+#include "frog/iob_tagger_mod.h"
+#include "frog/ner_tagger_mod.h"
+#include "frog/Parser.h"
+
+#include <vector>
+#include <string>
+#include <iostream>
+
+class FrogOptions {
+ public:
+ bool doTok;
+ bool doLemma;
+ bool doMorph;
+ bool doDaringMorph;
+ bool doMwu;
+ bool doIOB;
+ bool doNER;
+ bool doParse;
+ bool doSentencePerLine;
+ bool doQuoteDetection;
+ bool doDirTest;
+ bool doServer;
+
+ bool doXMLin;
+ bool doXMLout;
+ bool doKanon;
+
+ int debugFlag;
+ bool interactive;
+ int numThreads;
+
+ std::string encoding;
+ std::string uttmark;
+ std::string listenport;
+ std::string docid;
+ std::string textclass;
+
+ std::string tmpDirName;
+
+ unsigned int maxParserTokens;
+
+ FrogOptions();
+ private:
+ FrogOptions(const FrogOptions & );
+};
+
+
+class FrogAPI {
+ public:
+ FrogAPI( const FrogOptions&,
+ const TiCC::Configuration&,
+ TiCC::LogStream * );
+ ~FrogAPI();
+ static std::string defaultConfigDir();
+ static std::string defaultConfigFile();
+ void FrogFile( const std::string&, std::ostream&, const std::string& );
+ void FrogDoc( folia::Document&, bool=false );
+ void FrogServer( Sockets::ServerSocket &conn );
+ void FrogInteractive();
+ std::string Frogtostring( const std::string& );
+ std::string Frogtostringfromfile( const std::string& );
+
+ private:
+ // functions
+ bool TestSentence( folia::Sentence*, TimerBlock&);
+ std::vector<folia::Word*> lookup( folia::Word *,
+ const std::vector<folia::Entity*>& ) const;
+ folia::Dependency *lookupDep( const folia::Word *,
+ const std::vector<folia::Dependency*>& ) const;
+ std::string lookupNEREntity( const std::vector<folia::Word *>&,
+ const std::vector<folia::Entity*>& ) const;
+ std::string lookupIOBChunk( const std::vector<folia::Word *>&,
+ const std::vector<folia::Chunk*>& ) const;
+ void displayMWU( std::ostream&, size_t, const std::vector<folia::Word*>& ) const;
+ std::ostream& showResults( std::ostream&, folia::Document& ) const;
+
+ // data
+ const TiCC::Configuration& configuration;
+ const FrogOptions& options;
+ TiCC::LogStream *theErrLog;
+
+ // pointers to all the modules
+ Mbma *myMbma;
+ Mblem *myMblem;
+ Mwu *myMwu;
+ Parser *myParser;
+ CGNTagger *myCGNTagger;
+ IOBTagger *myIOBTagger;
+ NERTagger *myNERTagger;
+ UctoTokenizer *tokenizer;
+};
+
+#endif
diff --git a/include/frog/Makefile.am b/include/frog/Makefile.am
index 3d4fe19..ebe9802 100644
--- a/include/frog/Makefile.am
+++ b/include/frog/Makefile.am
@@ -1,3 +1,3 @@
-pkginclude_HEADERS = Frog.h mblem_mod.h mbma_mod.h mwu_chunker_mod.h \
+pkginclude_HEADERS = FrogAPI.h Frog.h mblem_mod.h mbma_mod.h mwu_chunker_mod.h \
cgn_tagger_mod.h iob_tagger_mod.h Parser.h \
ucto_tokenizer_mod.h ner_tagger_mod.h
diff --git a/include/frog/Makefile.in b/include/frog/Makefile.in
index df5d934..4ab9cb0 100644
--- a/include/frog/Makefile.in
+++ b/include/frog/Makefile.in
@@ -329,7 +329,7 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
ucto_CFLAGS = @ucto_CFLAGS@
ucto_LIBS = @ucto_LIBS@
-pkginclude_HEADERS = Frog.h mblem_mod.h mbma_mod.h mwu_chunker_mod.h \
+pkginclude_HEADERS = FrogAPI.h Frog.h mblem_mod.h mbma_mod.h mwu_chunker_mod.h \
cgn_tagger_mod.h iob_tagger_mod.h Parser.h \
ucto_tokenizer_mod.h ner_tagger_mod.h
diff --git a/include/frog/Parser.h b/include/frog/Parser.h
index 3a6eae9..7a42306 100644
--- a/include/frog/Parser.h
+++ b/include/frog/Parser.h
@@ -1,5 +1,5 @@
/*
- $Id: Parser.h 16815 2014-01-06 10:00:29Z sloot $
+ $Id: Parser.h 17714 2014-09-25 16:04:28Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/Parser.h $
Copyright (c) 2006 - 2014
@@ -78,7 +78,9 @@ struct parseData;
class Parser {
public:
- Parser():pairs(0),dir(0),rels(0),PI(0),isInit(false),keepIntermediate(false){};
+ Parser(TiCC::LogStream* logstream):pairs(0),dir(0),rels(0),PI(0),isInit(false),keepIntermediate(false) {
+ parseLog = new TiCC::LogStream(logstream, "parser-");
+ };
~Parser();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& doc ) const;
@@ -87,6 +89,7 @@ class Parser {
void prepareParse( const std::vector<folia::Word *>&,
const std::string&, parseData& );
void createParserFile( const parseData& );
+ std::string getTagset() const { return tagset; };
private:
void createRelDir( const parseData& );
void createPairs( const parseData& );
diff --git a/include/frog/cgn_tagger_mod.h b/include/frog/cgn_tagger_mod.h
index 31470a0..801551e 100644
--- a/include/frog/cgn_tagger_mod.h
+++ b/include/frog/cgn_tagger_mod.h
@@ -1,5 +1,5 @@
/*
- $Id: cgn_tagger_mod.h 16815 2014-01-06 10:00:29Z sloot $
+ $Id: cgn_tagger_mod.h 17616 2014-09-07 19:24:55Z mvgompel $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/cgn_tagger_mod.h $
Copyright (c) 2006 - 2014
@@ -33,7 +33,7 @@
class CGNTagger {
public:
- CGNTagger();
+ CGNTagger(TiCC::LogStream*);
~CGNTagger();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& ) const;
diff --git a/include/frog/iob_tagger_mod.h b/include/frog/iob_tagger_mod.h
index edf16ec..7eb8b4d 100644
--- a/include/frog/iob_tagger_mod.h
+++ b/include/frog/iob_tagger_mod.h
@@ -1,5 +1,5 @@
/*
- $Id: iob_tagger_mod.h 16815 2014-01-06 10:00:29Z sloot $
+ $Id: iob_tagger_mod.h 17616 2014-09-07 19:24:55Z mvgompel $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/iob_tagger_mod.h $
Copyright (c) 2006 - 2014
@@ -31,7 +31,7 @@
class IOBTagger {
public:
- IOBTagger();
+ IOBTagger(TiCC::LogStream *);
~IOBTagger();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& ) const;
diff --git a/include/frog/mblem_mod.h b/include/frog/mblem_mod.h
index 899ba32..d9854f3 100755
--- a/include/frog/mblem_mod.h
+++ b/include/frog/mblem_mod.h
@@ -1,5 +1,5 @@
/*
- $Id: mblem_mod.h 16815 2014-01-06 10:00:29Z sloot $
+ $Id: mblem_mod.h 17809 2014-11-11 13:44:50Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/mblem_mod.h $
Copyright (c) 2006 - 2014
@@ -14,12 +14,12 @@
frog is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
-
+
For questions and suggestions, see:
http://ilk.uvt.nl/software.html
or send mail to:
@@ -29,6 +29,7 @@
#ifndef __MBLEM_MOD__
#define __MBLEM_MOD__
+
class mblemData {
public:
mblemData( const std::string& l, const std::string& t ){
@@ -44,7 +45,7 @@ class mblemData {
class Mblem {
public:
- Mblem();
+ Mblem(TiCC::LogStream *);
~Mblem();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& doc ) const;
@@ -53,12 +54,12 @@ class Mblem {
std::vector<std::pair<std::string,std::string> > getResult() const;
void filterTag( const std::string& );
void makeUnique();
+ std::string getTagset() const { return tagset; };
private:
void read_transtable( const std::string& );
void create_MBlem_defaults();
bool readsettings( const std::string& dir, const std::string& fname );
- void addLemma( folia::FoliaElement *, const std::string&);
- void addAltLemma( folia::Word *, const std::string&);
+ void addLemma( folia::Word *, const std::string&) ;
std::string make_instance( const UnicodeString& in );
void getFoLiAResult( folia::Word *, const UnicodeString& );
Timbl::TimblAPI *myLex;
diff --git a/include/frog/mbma_mod.h b/include/frog/mbma_mod.h
index 2d78bdf..861425f 100755
--- a/include/frog/mbma_mod.h
+++ b/include/frog/mbma_mod.h
@@ -1,5 +1,5 @@
/*
- $Id: mbma_mod.h 17440 2014-07-10 12:11:27Z sloot $
+ $Id: mbma_mod.h 17822 2014-11-13 09:56:31Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/mbma_mod.h $
Copyright (c) 2006 - 2014
@@ -68,9 +68,11 @@ public:
virtual void resolveTail(){ abort(); };
virtual void resolveMiddle(){ abort(); };
virtual folia::Morpheme *createMorpheme( folia::Document *,
+ const std::string&,
const std::string& ) const = 0;
virtual folia::Morpheme *createMorpheme( folia::Document *,
const std::string&,
+ const std::string&,
int&,
std::string& ) const = 0;
CLEX::Type tag() const { return cls; };
@@ -93,9 +95,11 @@ public:
std::string original() const { return orig; };
int infixpos() const { return ifpos; };
folia::Morpheme *createMorpheme( folia::Document *,
+ const std::string&,
const std::string& ) const;
folia::Morpheme *createMorpheme( folia::Document *,
const std::string&,
+ const std::string&,
int&,
std::string& ) const;
private:
@@ -128,9 +132,11 @@ class BracketNest: public BaseBracket {
UnicodeString deepmorphemes() const;
CLEX::Type getFinalTag();
folia::Morpheme *createMorpheme( folia::Document *,
+ const std::string&,
const std::string& ) const;
folia::Morpheme *createMorpheme( folia::Document *,
const std::string&,
+ const std::string&,
int&,
std::string& ) const;
std::list<BaseBracket *> parts;
@@ -171,7 +177,7 @@ static std::map<char,std::string> iNames;
class Mbma {
public:
- Mbma();
+ Mbma(TiCC::LogStream *);
~Mbma();
bool init( const TiCC::Configuration& );
void addDeclaration( folia::Document& doc ) const;
@@ -181,6 +187,7 @@ class Mbma {
std::vector<std::vector<std::string> > getResult() const;
void setDaring( bool b ){ doDaring = b; };
void clearAnalysis();
+ std::string getTagset() const { return mbma_tagset; };
private:
void cleanUp();
bool readsettings( const std::string&, const std::string& );
@@ -199,18 +206,16 @@ class Mbma {
void addMorph( folia::MorphologyLayer *,
const std::vector<std::string>& ) const;
void addMorph( folia::Word *, const std::vector<std::string>& ) const;
- void addAltMorph( folia::Word *, const std::vector<std::string>& ) const;
void addBracketMorph( folia::Word *,
const std::string&,
const std::string& ) const;
void addBracketMorph( folia::Word *, const BracketNest * ) const;
- void addAltBracketMorph( folia::Word *, const BracketNest * ) const;
std::string MTreeFilename;
Timbl::TimblAPI *MTree;
std::map<std::string,std::string> TAGconv;
std::vector<MBMAana*> analysis;
std::string version;
- std::string tagset;
+ std::string mbma_tagset;
std::string cgn_tagset;
std::string clex_tagset;
TiCC::LogStream *mbmaLog;
@@ -247,9 +252,12 @@ class MBMAana {
return rule.extract_morphemes();
};
+ UnicodeString getKey( bool );
+
private:
std::string tag;
std::string infl;
+ UnicodeString sortkey;
std::string description;
Rule rule;
BracketNest *brackets;
diff --git a/include/frog/mwu_chunker_mod.h b/include/frog/mwu_chunker_mod.h
index 1dd506d..b4146e9 100755
--- a/include/frog/mwu_chunker_mod.h
+++ b/include/frog/mwu_chunker_mod.h
@@ -1,5 +1,5 @@
/*
- $Id: mwu_chunker_mod.h 16815 2014-01-06 10:00:29Z sloot $
+ $Id: mwu_chunker_mod.h 17616 2014-09-07 19:24:55Z mvgompel $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/mwu_chunker_mod.h $
Copyright (c) 2006 - 2014
@@ -58,7 +58,7 @@ class mwuAna {
class Mwu {
friend std::ostream& operator<< (std::ostream&, const Mwu& );
public:
- Mwu();
+ Mwu(TiCC::LogStream*);
~Mwu();
void reset();
bool init( const TiCC::Configuration& );
diff --git a/include/frog/ner_tagger_mod.h b/include/frog/ner_tagger_mod.h
index af053e3..08291f4 100644
--- a/include/frog/ner_tagger_mod.h
+++ b/include/frog/ner_tagger_mod.h
@@ -1,5 +1,5 @@
/*
- $Id: ner_tagger_mod.h 16815 2014-01-06 10:00:29Z sloot $
+ $Id: ner_tagger_mod.h 17616 2014-09-07 19:24:55Z mvgompel $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/ner_tagger_mod.h $
Copyright (c) 2006 - 2014
@@ -31,7 +31,7 @@
class NERTagger {
public:
- NERTagger();
+ NERTagger(LogStream *);
~NERTagger();
bool init( const TiCC::Configuration& );
void Classify( const std::vector<folia::Word *>& );
diff --git a/include/frog/ucto_tokenizer_mod.h b/include/frog/ucto_tokenizer_mod.h
index c5a73c8..fd85cab 100644
--- a/include/frog/ucto_tokenizer_mod.h
+++ b/include/frog/ucto_tokenizer_mod.h
@@ -1,31 +1,31 @@
/*
- $Id: ucto_tokenizer_mod.h 16815 2014-01-06 10:00:29Z sloot $
+ $Id: ucto_tokenizer_mod.h 17661 2014-09-15 14:59:03Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/include/frog/ucto_tokenizer_mod.h $
Copyright (c) 2006 - 2014
Tilburg University
-
+
This file is part of frog.
frog is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
frog is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
For questions and suggestions, see:
http://ilk.uvt.nl/software.html
or send mail to:
timbl at uvt.nl
*/
-
+
#ifndef UCTO_TOKENIZER_MOD_H
#define UCTO_TOKENIZER_MOD_H
@@ -33,15 +33,19 @@
class UctoTokenizer {
public:
- UctoTokenizer();
+ UctoTokenizer(TiCC::LogStream *);
~UctoTokenizer() { delete tokenizer; delete uctoLog; };
- bool init( const TiCC::Configuration&, const std::string&, bool );
+ bool init( const TiCC::Configuration& );
void setUttMarker( const std::string& );
+ void setPassThru( bool );
+ bool getPassThru() const;
void setSentencePerLineInput( bool );
void setInputEncoding( const std::string& );
void setQuoteDetection( bool );
void setInputXml( bool );
void setTextClass( const std::string& );
+ void setDocID( const std::string& );
+ folia::Document tokenizestring( const std::string& );
folia::Document tokenize( std::istream& );
bool tokenize( folia::Document& );
std::vector<std::string> tokenize( const std::string& );
@@ -50,4 +54,4 @@ class UctoTokenizer {
TiCC::LogStream *uctoLog;
};
-#endif
+#endif
diff --git a/m4/ax_lib_readline.m4 b/m4/ax_lib_readline.m4
index 2347ad7..056f25c 100644
--- a/m4/ax_lib_readline.m4
+++ b/m4/ax_lib_readline.m4
@@ -92,7 +92,7 @@ AC_DEFUN([AX_LIB_READLINE], [
LIBS="$LIBS $ax_cv_lib_readline"
AC_DEFINE(HAVE_LIBREADLINE, 1,
[Define if you have a readline compatible library])
- AC_CHECK_HEADERS([readline.h readline/readline.h])
+ AC_CHECK_HEADERS(readline.h readline/readline.h)
AC_CACHE_CHECK([whether readline supports history],
ax_cv_lib_readline_history, [
ax_cv_lib_readline_history="no"
@@ -101,7 +101,7 @@ AC_DEFUN([AX_LIB_READLINE], [
if test "$ax_cv_lib_readline_history" = "yes"; then
AC_DEFINE(HAVE_READLINE_HISTORY, 1,
[Define if your readline library has \`add_history'])
- AC_CHECK_HEADERS([history.h readline/history.h])
+ AC_CHECK_HEADERS(history.h readline/history.h)
fi
fi
])dnl
diff --git a/m4/libtool.m4 b/m4/libtool.m4
index 8ff3c76..d7c043f 100644
--- a/m4/libtool.m4
+++ b/m4/libtool.m4
@@ -1,8 +1,8 @@
# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
#
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is free software; the Free Software Foundation gives
@@ -11,8 +11,8 @@
m4_define([_LT_COPYING], [dnl
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
-# 2006, 2007, 2008, 2009, 2010 Free Software Foundation,
-# Inc.
+# 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# Written by Gordon Matzigkeit, 1996
#
# This file is part of GNU Libtool.
@@ -146,6 +146,8 @@ AC_REQUIRE([AC_CANONICAL_BUILD])dnl
AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl
AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl
+_LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl
+dnl
_LT_DECL([], [host_alias], [0], [The host system])dnl
_LT_DECL([], [host], [0])dnl
_LT_DECL([], [host_os], [0])dnl
@@ -637,7 +639,7 @@ m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl
m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION])
configured by $[0], generated by m4_PACKAGE_STRING.
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2011 Free Software Foundation, Inc.
This config.lt script is free software; the Free Software Foundation
gives unlimited permision to copy, distribute and modify it."
@@ -801,6 +803,7 @@ AC_DEFUN([LT_LANG],
m4_case([$1],
[C], [_LT_LANG(C)],
[C++], [_LT_LANG(CXX)],
+ [Go], [_LT_LANG(GO)],
[Java], [_LT_LANG(GCJ)],
[Fortran 77], [_LT_LANG(F77)],
[Fortran], [_LT_LANG(FC)],
@@ -822,6 +825,31 @@ m4_defun([_LT_LANG],
])# _LT_LANG
+m4_ifndef([AC_PROG_GO], [
+############################################################
+# NOTE: This macro has been submitted for inclusion into #
+# GNU Autoconf as AC_PROG_GO. When it is available in #
+# a released version of Autoconf we should remove this #
+# macro and use it instead. #
+############################################################
+m4_defun([AC_PROG_GO],
+[AC_LANG_PUSH(Go)dnl
+AC_ARG_VAR([GOC], [Go compiler command])dnl
+AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl
+_AC_ARG_VAR_LDFLAGS()dnl
+AC_CHECK_TOOL(GOC, gccgo)
+if test -z "$GOC"; then
+ if test -n "$ac_tool_prefix"; then
+ AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo])
+ fi
+fi
+if test -z "$GOC"; then
+ AC_CHECK_PROG(GOC, gccgo, gccgo, false)
+fi
+])#m4_defun
+])#m4_ifndef
+
+
# _LT_LANG_DEFAULT_CONFIG
# -----------------------
m4_defun([_LT_LANG_DEFAULT_CONFIG],
@@ -852,6 +880,10 @@ AC_PROVIDE_IFELSE([AC_PROG_GCJ],
m4_ifdef([LT_PROG_GCJ],
[m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])])
+AC_PROVIDE_IFELSE([AC_PROG_GO],
+ [LT_LANG(GO)],
+ [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])])
+
AC_PROVIDE_IFELSE([LT_PROG_RC],
[LT_LANG(RC)],
[m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])])
@@ -954,7 +986,13 @@ m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
-dynamiclib -Wl,-single_module conftest.c 2>conftest.err
_lt_result=$?
- if test -f libconftest.dylib && test ! -s conftest.err && test $_lt_result = 0; then
+ # If there is a non-empty error log, and "single_module"
+ # appears in it, assume the flag caused a linker warning
+ if test -s conftest.err && $GREP single_module conftest.err; then
+ cat conftest.err >&AS_MESSAGE_LOG_FD
+ # Otherwise, if the output was created with a 0 exit code from
+ # the compiler, it worked.
+ elif test -f libconftest.dylib && test $_lt_result -eq 0; then
lt_cv_apple_cc_single_mod=yes
else
cat conftest.err >&AS_MESSAGE_LOG_FD
@@ -962,6 +1000,7 @@ m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
rm -rf libconftest.dylib*
rm -f conftest.*
fi])
+
AC_CACHE_CHECK([for -exported_symbols_list linker flag],
[lt_cv_ld_exported_symbols_list],
[lt_cv_ld_exported_symbols_list=no
@@ -973,6 +1012,7 @@ m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[
[lt_cv_ld_exported_symbols_list=no])
LDFLAGS="$save_LDFLAGS"
])
+
AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load],
[lt_cv_ld_force_load=no
cat > conftest.c << _LT_EOF
@@ -990,7 +1030,9 @@ _LT_EOF
echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD
$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err
_lt_result=$?
- if test -f conftest && test ! -s conftest.err && test $_lt_result = 0 && $GREP forced_load conftest 2>&1 >/dev/null; then
+ if test -s conftest.err && $GREP force_load conftest.err; then
+ cat conftest.err >&AS_MESSAGE_LOG_FD
+ elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then
lt_cv_ld_force_load=yes
else
cat conftest.err >&AS_MESSAGE_LOG_FD
@@ -1035,8 +1077,8 @@ _LT_EOF
])
-# _LT_DARWIN_LINKER_FEATURES
-# --------------------------
+# _LT_DARWIN_LINKER_FEATURES([TAG])
+# ---------------------------------
# Checks for linker and compiler features on darwin
m4_defun([_LT_DARWIN_LINKER_FEATURES],
[
@@ -1047,6 +1089,8 @@ m4_defun([_LT_DARWIN_LINKER_FEATURES],
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
if test "$lt_cv_ld_force_load" = "yes"; then
_LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`'
+ m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes],
+ [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes])
else
_LT_TAGVAR(whole_archive_flag_spec, $1)=''
fi
@@ -1268,7 +1312,7 @@ ia64-*-hpux*)
rm -rf conftest*
;;
-x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|powerpc*-*linux*| \
s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
@@ -1280,9 +1324,19 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
LD="${LD-ld} -m elf_i386_fbsd"
;;
x86_64-*linux*)
- LD="${LD-ld} -m elf_i386"
+ case `/usr/bin/file conftest.o` in
+ *x86-64*)
+ LD="${LD-ld} -m elf32_x86_64"
+ ;;
+ *)
+ LD="${LD-ld} -m elf_i386"
+ ;;
+ esac
+ ;;
+ powerpc64le-*)
+ LD="${LD-ld} -m elf32lppclinux"
;;
- ppc64-*linux*|powerpc64-*linux*)
+ powerpc64-*)
LD="${LD-ld} -m elf32ppclinux"
;;
s390x-*linux*)
@@ -1301,7 +1355,10 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
x86_64-*linux*)
LD="${LD-ld} -m elf_x86_64"
;;
- ppc*-*linux*|powerpc*-*linux*)
+ powerpcle-*)
+ LD="${LD-ld} -m elf64lppc"
+ ;;
+ powerpc-*)
LD="${LD-ld} -m elf64ppc"
;;
s390*-*linux*|s390*-*tpf*)
@@ -1330,14 +1387,27 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
CFLAGS="$SAVE_CFLAGS"
fi
;;
-sparc*-*solaris*)
+*-*solaris*)
# Find out which ABI we are using.
echo 'int i;' > conftest.$ac_ext
if AC_TRY_EVAL(ac_compile); then
case `/usr/bin/file conftest.o` in
*64-bit*)
case $lt_cv_prog_gnu_ld in
- yes*) LD="${LD-ld} -m elf64_sparc" ;;
+ yes*)
+ case $host in
+ i?86-*-solaris*)
+ LD="${LD-ld} -m elf_x86_64"
+ ;;
+ sparc*-*-solaris*)
+ LD="${LD-ld} -m elf64_sparc"
+ ;;
+ esac
+ # GNU ld 2.21 introduced _sol2 emulations. Use them if available.
+ if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+ LD="${LD-ld}_sol2"
+ fi
+ ;;
*)
if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
LD="${LD-ld} -64"
@@ -1414,13 +1484,13 @@ old_postuninstall_cmds=
if test -n "$RANLIB"; then
case $host_os in
openbsd*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
;;
*)
- old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib"
+ old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
;;
esac
- old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+ old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
fi
case $host_os in
@@ -1600,6 +1670,11 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
lt_cv_sys_max_cmd_len=196608
;;
+ os2*)
+ # The test takes a long time on OS/2.
+ lt_cv_sys_max_cmd_len=8192
+ ;;
+
osf*)
# Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
# due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
@@ -1626,7 +1701,8 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
;;
*)
lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
- if test -n "$lt_cv_sys_max_cmd_len"; then
+ if test -n "$lt_cv_sys_max_cmd_len" && \
+ test undefined != "$lt_cv_sys_max_cmd_len"; then
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
else
@@ -1639,7 +1715,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
# If test is not a shell built-in, we'll probably end up computing a
# maximum length that is only half of the actual maximum length, but
# we can't tell.
- while { test "X"`func_fallback_echo "$teststring$teststring" 2>/dev/null` \
+ while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
= "X$teststring$teststring"; } >/dev/null 2>&1 &&
test $i != 17 # 1/2 MB should be enough
do
@@ -2185,7 +2261,7 @@ need_version=unknown
case $host_os in
aix3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
shlibpath_var=LIBPATH
@@ -2194,7 +2270,7 @@ aix3*)
;;
aix[[4-9]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
hardcode_into_libs=yes
@@ -2259,7 +2335,7 @@ beos*)
;;
bsdi[[45]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
@@ -2398,7 +2474,7 @@ m4_if([$1], [],[
;;
dgux*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
@@ -2406,10 +2482,6 @@ dgux*)
shlibpath_var=LD_LIBRARY_PATH
;;
-freebsd1*)
- dynamic_linker=no
- ;;
-
freebsd* | dragonfly*)
# DragonFly does not have aout. When/if they implement a new
# versioning mechanism, adjust this.
@@ -2417,7 +2489,7 @@ freebsd* | dragonfly*)
objformat=`/usr/bin/objformat`
else
case $host_os in
- freebsd[[123]]*) objformat=aout ;;
+ freebsd[[23]].*) objformat=aout ;;
*) objformat=elf ;;
esac
fi
@@ -2435,7 +2507,7 @@ freebsd* | dragonfly*)
esac
shlibpath_var=LD_LIBRARY_PATH
case $host_os in
- freebsd2*)
+ freebsd2.*)
shlibpath_overrides_runpath=yes
;;
freebsd3.[[01]]* | freebsdelf3.[[01]]*)
@@ -2454,19 +2526,8 @@ freebsd* | dragonfly*)
esac
;;
-gnu*)
- version_type=linux
- need_lib_prefix=no
- need_version=no
- library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
- soname_spec='${libname}${release}${shared_ext}$major'
- shlibpath_var=LD_LIBRARY_PATH
- shlibpath_overrides_runpath=no
- hardcode_into_libs=yes
- ;;
-
haiku*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
dynamic_linker="$host_os runtime_loader"
@@ -2527,7 +2588,7 @@ hpux9* | hpux10* | hpux11*)
;;
interix[[3-9]]*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
@@ -2543,7 +2604,7 @@ irix5* | irix6* | nonstopux*)
nonstopux*) version_type=nonstopux ;;
*)
if test "$lt_cv_prog_gnu_ld" = yes; then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
else
version_type=irix
fi ;;
@@ -2580,9 +2641,9 @@ linux*oldld* | linux*aout* | linux*coff*)
dynamic_linker=no
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
- version_type=linux
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2657,7 +2718,7 @@ netbsd*)
;;
newsos6)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
shlibpath_var=LD_LIBRARY_PATH
shlibpath_overrides_runpath=yes
@@ -2726,7 +2787,7 @@ rdos*)
;;
solaris*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2751,7 +2812,7 @@ sunos4*)
;;
sysv4 | sysv4.3*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -2775,7 +2836,7 @@ sysv4 | sysv4.3*)
sysv4*MP*)
if test -d /usr/nec ;then
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
soname_spec='$libname${shared_ext}.$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -2806,7 +2867,7 @@ sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
tpf*)
# TPF is a cross-target only. Preferred cross-host = GNU/Linux.
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
need_lib_prefix=no
need_version=no
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
@@ -2816,7 +2877,7 @@ tpf*)
;;
uts4*)
- version_type=linux
+ version_type=linux # correct to gnu/linux during the next big refactor
library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
soname_spec='${libname}${release}${shared_ext}$major'
shlibpath_var=LD_LIBRARY_PATH
@@ -3197,10 +3258,6 @@ freebsd* | dragonfly*)
fi
;;
-gnu*)
- lt_cv_deplibs_check_method=pass_all
- ;;
-
haiku*)
lt_cv_deplibs_check_method=pass_all
;;
@@ -3238,8 +3295,8 @@ irix5* | irix6* | nonstopux*)
lt_cv_deplibs_check_method=pass_all
;;
-# This must be Linux ELF.
-linux* | k*bsd*-gnu | kopensolaris*-gnu)
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
lt_cv_deplibs_check_method=pass_all
;;
@@ -3658,6 +3715,7 @@ for ac_symprfx in "" "_"; do
# which start with @ or ?.
lt_cv_sys_global_symbol_pipe="$AWK ['"\
" {last_section=section; section=\$ 3};"\
+" /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
" /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
" \$ 0!~/External *\|/{next};"\
" / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
@@ -3990,7 +4048,7 @@ m4_if([$1], [CXX], [
;;
esac
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
KCC*)
# KAI C++ Compiler
@@ -4242,7 +4300,9 @@ m4_if([$1], [CXX], [
case $cc_basename in
nvcc*) # Cuda Compiler Driver 2.2
_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker '
- _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Xcompiler -fPIC'
+ if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)"
+ fi
;;
esac
else
@@ -4287,7 +4347,7 @@ m4_if([$1], [CXX], [
_LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
# old Intel for x86_64 which still supported -KPIC.
ecc*)
@@ -4334,18 +4394,33 @@ m4_if([$1], [CXX], [
;;
*)
case `$CC -V 2>&1 | sed 5q` in
- *Sun\ F* | *Sun*Fortran*)
+ *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*)
# Sun Fortran 8.3 passes all unrecognized flags to the linker
_LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
_LT_TAGVAR(lt_prog_compiler_wl, $1)=''
;;
+ *Sun\ F* | *Sun*Fortran*)
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+ ;;
*Sun\ C*)
# Sun C 5.9
_LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
_LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
_LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
;;
+ *Intel*\ [[CF]]*Compiler*)
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-static'
+ ;;
+ *Portland\ Group*)
+ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+ _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+ ;;
esac
;;
esac
@@ -4505,7 +4580,9 @@ m4_if([$1], [CXX], [
;;
cygwin* | mingw* | cegcc*)
case $cc_basename in
- cl*) ;;
+ cl*)
+ _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+ ;;
*)
_LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols'
_LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname']
@@ -4533,7 +4610,6 @@ m4_if([$1], [CXX], [
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
@@ -4787,8 +4863,7 @@ _LT_EOF
xlf* | bgf* | bgxlf* | mpixlf*)
# IBM XL Fortran 10.1 on PPC cannot create shared libs itself
_LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive'
- _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='-rpath $libdir'
+ _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
_LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
if test "x$supports_anon_versioning" = xyes; then
_LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
@@ -5084,6 +5159,7 @@ _LT_EOF
# The linker will not automatically build a static lib if we build a DLL.
# _LT_TAGVAR(old_archive_from_new_cmds, $1)='true'
_LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+ _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
_LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols'
# Don't use ranlib
_LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib'
@@ -5130,10 +5206,6 @@ _LT_EOF
_LT_TAGVAR(hardcode_shlibpath_var, $1)=no
;;
- freebsd1*)
- _LT_TAGVAR(ld_shlibs, $1)=no
- ;;
-
# FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
# support. Future versions do this automatically, but an explicit c++rt0.o
# does not break anything, and helps significantly (at the cost of a little
@@ -5146,7 +5218,7 @@ _LT_EOF
;;
# Unfortunately, older versions of FreeBSD 2 do not have this feature.
- freebsd2*)
+ freebsd2.*)
_LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
_LT_TAGVAR(hardcode_direct, $1)=yes
_LT_TAGVAR(hardcode_minus_L, $1)=yes
@@ -5185,7 +5257,6 @@ _LT_EOF
fi
if test "$with_gnu_ld" = no; then
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
- _LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir'
_LT_TAGVAR(hardcode_libdir_separator, $1)=:
_LT_TAGVAR(hardcode_direct, $1)=yes
_LT_TAGVAR(hardcode_direct_absolute, $1)=yes
@@ -5627,9 +5698,6 @@ _LT_TAGDECL([], [no_undefined_flag], [1],
_LT_TAGDECL([], [hardcode_libdir_flag_spec], [1],
[Flag to hardcode $libdir into a binary during linking.
This must work even if $libdir does not exist])
-_LT_TAGDECL([], [hardcode_libdir_flag_spec_ld], [1],
- [[If ld is used when linking, flag to hardcode $libdir into a binary
- during linking. This must work even if $libdir does not exist]])
_LT_TAGDECL([], [hardcode_libdir_separator], [1],
[Whether we need a single "-rpath" flag with a separated argument])
_LT_TAGDECL([], [hardcode_direct], [0],
@@ -5787,7 +5855,6 @@ _LT_TAGVAR(export_dynamic_flag_spec, $1)=
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
@@ -6157,7 +6224,7 @@ if test "$_lt_caught_CXX_error" != yes; then
esac
;;
- freebsd[[12]]*)
+ freebsd2.*)
# C++ shared libraries reported to be fairly broken before
# switch to ELF
_LT_TAGVAR(ld_shlibs, $1)=no
@@ -6173,9 +6240,6 @@ if test "$_lt_caught_CXX_error" != yes; then
_LT_TAGVAR(ld_shlibs, $1)=yes
;;
- gnu*)
- ;;
-
haiku*)
_LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
_LT_TAGVAR(link_all_deplibs, $1)=yes
@@ -6337,7 +6401,7 @@ if test "$_lt_caught_CXX_error" != yes; then
_LT_TAGVAR(inherit_rpath, $1)=yes
;;
- linux* | k*bsd*-gnu | kopensolaris*-gnu)
+ linux* | k*bsd*-gnu | kopensolaris*-gnu | gnu*)
case $cc_basename in
KCC*)
# Kuck and Associates, Inc. (KAI) C++ Compiler
@@ -6918,12 +6982,18 @@ public class foo {
}
};
_LT_EOF
+], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF
+package foo
+func foo() {
+}
+_LT_EOF
])
_lt_libdeps_save_CFLAGS=$CFLAGS
case "$CC $CFLAGS " in #(
*\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;;
*\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;;
+*\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;;
esac
dnl Parse the compiler output and extract the necessary
@@ -7120,7 +7190,6 @@ _LT_TAGVAR(export_dynamic_flag_spec, $1)=
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_automatic, $1)=no
@@ -7253,7 +7322,6 @@ _LT_TAGVAR(export_dynamic_flag_spec, $1)=
_LT_TAGVAR(hardcode_direct, $1)=no
_LT_TAGVAR(hardcode_direct_absolute, $1)=no
_LT_TAGVAR(hardcode_libdir_flag_spec, $1)=
-_LT_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
_LT_TAGVAR(hardcode_libdir_separator, $1)=
_LT_TAGVAR(hardcode_minus_L, $1)=no
_LT_TAGVAR(hardcode_automatic, $1)=no
@@ -7440,6 +7508,77 @@ CFLAGS=$lt_save_CFLAGS
])# _LT_LANG_GCJ_CONFIG
+# _LT_LANG_GO_CONFIG([TAG])
+# --------------------------
+# Ensure that the configuration variables for the GNU Go compiler
+# are suitably defined. These variables are subsequently used by _LT_CONFIG
+# to write the compiler configuration to `libtool'.
+m4_defun([_LT_LANG_GO_CONFIG],
+[AC_REQUIRE([LT_PROG_GO])dnl
+AC_LANG_SAVE
+
+# Source file extension for Go test sources.
+ac_ext=go
+
+# Object file extension for compiled Go test sources.
+objext=o
+_LT_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="package main; func main() { }"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='package main; func main() { }'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_TAG_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=yes
+CC=${GOC-"gccgo"}
+CFLAGS=$GOFLAGS
+compiler=$CC
+_LT_TAGVAR(compiler, $1)=$CC
+_LT_TAGVAR(LD, $1)="$LD"
+_LT_CC_BASENAME([$compiler])
+
+# Go did not exist at the time GCC didn't implicitly link libc in.
+_LT_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_TAGVAR(reload_flag, $1)=$reload_flag
+_LT_TAGVAR(reload_cmds, $1)=$reload_cmds
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+if test -n "$compiler"; then
+ _LT_COMPILER_NO_RTTI($1)
+ _LT_COMPILER_PIC($1)
+ _LT_COMPILER_C_O($1)
+ _LT_COMPILER_FILE_LOCKS($1)
+ _LT_LINKER_SHLIBS($1)
+ _LT_LINKER_HARDCODE_LIBPATH($1)
+
+ _LT_CONFIG($1)
+fi
+
+AC_LANG_RESTORE
+
+GCC=$lt_save_GCC
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+])# _LT_LANG_GO_CONFIG
+
+
# _LT_LANG_RC_CONFIG([TAG])
# -------------------------
# Ensure that the configuration variables for the Windows resource compiler
@@ -7509,6 +7648,13 @@ dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([LT_AC_PROG_GCJ], [])
+# LT_PROG_GO
+# ----------
+AC_DEFUN([LT_PROG_GO],
+[AC_CHECK_TOOL(GOC, gccgo,)
+])
+
+
# LT_PROG_RC
# ----------
AC_DEFUN([LT_PROG_RC],
diff --git a/m4/ltoptions.m4 b/m4/ltoptions.m4
index 17cfd51..5d9acd8 100644
--- a/m4/ltoptions.m4
+++ b/m4/ltoptions.m4
@@ -326,9 +326,24 @@ dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
m4_define([_LT_WITH_PIC],
[AC_ARG_WITH([pic],
- [AS_HELP_STRING([--with-pic],
+ [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
- [pic_mode="$withval"],
+ [lt_p=${PACKAGE-default}
+ case $withval in
+ yes|no) pic_mode=$withval ;;
+ *)
+ pic_mode=default
+ # Look at the argument we got. We use all the common list separators.
+ lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+ for lt_pkg in $withval; do
+ IFS="$lt_save_ifs"
+ if test "X$lt_pkg" = "X$lt_p"; then
+ pic_mode=yes
+ fi
+ done
+ IFS="$lt_save_ifs"
+ ;;
+ esac],
[pic_mode=default])
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
diff --git a/src/Frog-util.cxx b/src/Frog-util.cxx
index ce99967..300f455 100755
--- a/src/Frog-util.cxx
+++ b/src/Frog-util.cxx
@@ -1,5 +1,5 @@
/*
- $Id: Frog-util.cxx 16815 2014-01-06 10:00:29Z sloot $
+ $Id: Frog-util.cxx 17658 2014-09-15 13:59:33Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/Frog-util.cxx $
Copyright (c) 2006 - 2014
@@ -8,31 +8,27 @@
This file is part of frog
frog is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
frog is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
For questions and suggestions, see:
http://ilk.uvt.nl/software.html
or send mail to:
timbl at uvt.nl
-*/
+*/
-#include <iostream>
-#include <fstream>
+#include <set>
#include <string>
-#include <vector>
-#include <cstdlib>
#include "config.h"
-#include "timbl/TimblAPI.h"
#include "frog/Frog.h"
#include <sys/types.h>
@@ -46,7 +42,7 @@ using namespace std;
string prefix( const string& path, const string& fn ){
if ( fn.find( "/" ) == string::npos ){
// only append prefix when NO path is specified
- return path + fn;
+ return path + "/" + fn;
}
return fn;
}
@@ -62,7 +58,7 @@ bool existsDir( const string& dirName ){
}
void getFileNames( const string& dirName,
- const string& ext,
+ const string& ext,
set<string>& fileNames ){
DIR *dir = opendir( dirName.c_str() );
if ( !dir )
diff --git a/src/Frog.cxx b/src/Frog.cxx
index 32fb9a6..a2dbef9 100755
--- a/src/Frog.cxx
+++ b/src/Frog.cxx
@@ -1,5 +1,6 @@
+/* ex: set tabstop=8 expandtab: */
/*
- $Id: Frog.cxx 17459 2014-07-16 15:28:07Z sloot $
+ $Id: Frog.cxx 17829 2014-11-18 14:44:00Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/Frog.cxx $
Copyright (c) 2006 - 2014
@@ -28,6 +29,10 @@
timbl at uvt.nl
*/
+// Python.h seems to best included first. It tramples upon defines like:
+// _XOPEN_SOURCE, _POSIX_C_SOURCE" etc.
+#include "Python.h"
+
#include <cstdlib>
#include <cstdio>
#include <sys/wait.h>
@@ -38,28 +43,10 @@
#include <sstream>
#include <fstream>
#include <vector>
-#include <omp.h>
-
#include "config.h"
-
-#ifdef HAVE_LIBREADLINE
-# if defined(HAVE_READLINE_READLINE_H)
-# include <readline/readline.h>
-# elif defined(HAVE_READLINE_H)
-# include <readline.h>
-# endif /* !defined(HAVE_READLINE_H) */
-#endif /* HAVE_LIBREADLINE */
-
-#ifdef HAVE_READLINE_HISTORY
-# if defined(HAVE_READLINE_HISTORY_H)
-# include <readline/history.h>
-# elif defined(HAVE_HISTORY_H)
-# include <history.h>
-# endif /* defined(HAVE_READLINE_HISTORY_H) */
-#endif /* HAVE_READLINE_HISTORY */
-
-// Python.h seems to set HAVE_LIBREADLINE so we include this late...
-#include "Python.h"
+#ifdef HAVE_OPENMP
+#include <omp.h>
+#endif
#include "timbl/TimblAPI.h"
#include "timblserver/FdStream.h"
@@ -67,57 +54,23 @@
// individual module headers
-#include "frog/Frog.h"
-#include "ticcutils/Configuration.h"
+#include "frog/Frog.h" //internal interface, included by all modules
+#include "frog/FrogAPI.h" //public API interface
#include "ticcutils/StringOps.h"
#include "ticcutils/CommandLine.h"
-#include "frog/ucto_tokenizer_mod.h"
-#include "frog/mbma_mod.h"
-#include "frog/mblem_mod.h"
-#include "frog/mwu_chunker_mod.h"
-#include "frog/cgn_tagger_mod.h"
-#include "frog/iob_tagger_mod.h"
-#include "frog/ner_tagger_mod.h"
-#include "frog/Parser.h"
+#include "ticcutils/FileUtils.h"
using namespace std;
using namespace folia;
using namespace TiCC;
-LogStream my_default_log( cerr, "frog-", StampMessage ); // fall-back
-LogStream *theErrLog = &my_default_log; // fill the externals
-
string testDirName;
-string tmpDirName;
string outputFileName;
-string docid = "untitled";
-string textclass;
bool wantOUT;
string XMLoutFileName;
-bool doXMLin;
-bool doXMLout;
-bool doKanon;
string outputDirName;
string xmlDirName;
set<string> fileNames;
-string ProgName;
-int debugFlag = 0; //0 for none, more for more output
-unsigned int maxParserTokens = 0; // 0 for unlimited
-bool doTok = true;
-bool doLemma = true;
-bool doMorph = true;
-bool doDaringMorph = false;
-bool doMwu = true;
-bool doIOB = true;
-bool doNER = true;
-bool doParse = true;
-bool doDirTest = false;
-bool doServer = false;
-bool doSentencePerLine = false;
-bool doQuoteDetection = false;
-string listenport = "void";
-string encoding;
-string uttmark = "";
/* assumptions:
each components gets its own configfile per cmdline options
@@ -139,8 +92,6 @@ string uttmark = "";
*/
Configuration configuration;
-static string configDir = string(SYSCONF_PATH) + "/" + PACKAGE + "/";
-static string configFileName = configDir + "frog.cfg";
void usage( ) {
cout << endl << "Options:\n";
@@ -158,7 +109,8 @@ void usage( ) {
<< "\t============= MODULE SELECTION ==========================================\n"
<< "\t --skip=[mptncla] Skip Tokenizer (t), Lemmatizer (l), Morphological Analyzer (a), Chunker (c), Multi-Word Units (m), Named Entity Recognition (n), or Parser (p) \n"
<< "\t============= CONFIGURATION OPTIONS =====================================\n"
- << "\t -c <filename> Set configuration file (default " << configFileName << ")\n"
+ << "\t -c <filename> Set configuration file (default "
+ << FrogAPI::defaultConfigFile() << ")\n"
<< "\t============= OUTPUT OPTIONS ============================================\n"
<< "\t -o <outputfile> Output columned output to file, instead of default stdout\n"
<< "\t -X <xmlfile> Output also to an XML file in FoLiA format\n"
@@ -166,7 +118,7 @@ void usage( ) {
<< "\t --outputdir=<dir> Output to dir, instead of default stdout\n"
<< "\t --xmldir=<dir> Use 'dir' to output FoliA XML to.\n"
<< "\t --tmpdir=<directory> (location to store intermediate files. Default /tmp )\n"
- << "\t --keep-parser-files=[yes|no] keep intermediate parser files. (last sentence only).\n"
+ << "\t --keep-parser-files keep intermediate parser files. (last sentence only).\n"
<< "\t============= OTHER OPTIONS ============================================\n"
<< "\t -h. give some help.\n"
<< "\t -V or --version . Show version info.\n"
@@ -180,62 +132,44 @@ void usage( ) {
<< "\t (but always 1 for server mode)\n";
}
-//**** stuff to process commandline options *********************************
-
-static Mbma myMbma;
-static Mblem myMblem;
-static Mwu myMwu;
-static Parser myParser;
-static CGNTagger myCGNTagger;
-static IOBTagger myIOBTagger;
-static NERTagger myNERTagger;
-static UctoTokenizer tokenizer;
+bool parse_args( TiCC::CL_Options& Opts, FrogOptions& options,
+ LogStream* theErrLog ) {
+ // process the command line and fill FrogOptions to initialize the API
+ // also fill some globals we use for our own main.
-
-bool parse_args( TiCC::CL_Options& Opts ) {
string value;
- bool mood;
- if ( Opts.find('V', value, mood ) ||
- Opts.find("version", value ) ){
+ if ( Opts.is_present('V' ) || Opts.is_present("version" ) ){
// we already did show what we wanted.
exit( EXIT_SUCCESS );
}
- if ( Opts.find ('h', value, mood)) {
+ if ( Opts.is_present( 'h' ) ) {
usage();
exit( EXIT_SUCCESS );
};
// is a config file specified?
- if ( Opts.find( 'c', value, mood ) ) {
- configFileName = value;
- Opts.remove( 'c' );
- };
-
+ string configFileName = FrogAPI::defaultConfigFile();
+ Opts.extract( 'c', configFileName );
if ( configuration.fill( configFileName ) ){
*Log(theErrLog) << "config read from: " << configFileName << endl;
}
else {
- cerr << "failed te read configuration from! '" << configFileName << "'" << endl;
- cerr << "did you correctly install the frogdata package?" << endl;
+ cerr << "failed to read configuration from '" << configFileName << "' !!" << endl;
+ cerr << "Did you correctly install the frogdata package?" << endl;
return false;
}
// debug opts
- if ( Opts.find ('d', value, mood)) {
- if ( !stringTo<int>( value, debugFlag ) ){
+ if ( Opts.extract ('d', value) ) {
+ if ( !stringTo<int>( value, options.debugFlag ) ){
*Log(theErrLog) << "-d value should be an integer" << endl;
return false;
}
configuration.setatt( "debug", value );
- Opts.remove('d');
}
else {
configuration.setatt( "debug", "0" );
}
- if ( Opts.find( "debug", value ) ) {
- if ( value.empty() ){
- *Log(theErrLog) << "missing a value for --debug (did you forget the '='?)" << endl;
- return false;
- }
+ if ( Opts.extract( "debug", value ) ) {
value = TiCC::lowercase( value );
vector<string> vec;
TiCC::split_at( value, vec, "," );
@@ -274,217 +208,145 @@ bool parse_args( TiCC::CL_Options& Opts ) {
return false;
}
}
- Opts.remove("debug");
}
- if ( Opts.find ('n', value, mood)) {
- doSentencePerLine = true;
- };
- if ( Opts.find ('Q', value, mood)) {
- doQuoteDetection = true;
- };
- if ( Opts.find( "skip", value )) {
- if ( value.empty() ){
- *Log(theErrLog) << "missing a value for --skip (did you forget the '='?)" << endl;
- return false;
- }
+ options.doSentencePerLine = Opts.extract( 'n' );
+ options.doQuoteDetection = Opts.extract( 'Q' );
+ if ( Opts.extract( "skip", value )) {
string skip = value;
if ( skip.find_first_of("tT") != string::npos )
- doTok = false;
+ options.doTok = false;
if ( skip.find_first_of("lL") != string::npos )
- doLemma = false;
+ options.doLemma = false;
if ( skip.find_first_of("aA") != string::npos )
- doMorph = false;
+ options.doMorph = false;
if ( skip.find_first_of("mM") != string::npos )
- doMwu = false;
+ options.doMwu = false;
if ( skip.find_first_of("cC") != string::npos )
- doIOB = false;
+ options.doIOB = false;
if ( skip.find_first_of("nN") != string::npos )
- doNER = false;
+ options.doNER = false;
if ( skip.find_first_of("pP") != string::npos )
- doParse = false;
+ options.doParse = false;
+ else if ( !options.doMwu ){
+ *Log(theErrLog) << " Parser disabled, because MWU is deselected" << endl;
+ options.doParse = false;
+ }
Opts.remove("skip");
};
- if ( Opts.find( "daring", value ) ) {
- if ( value.empty() )
- value = "1";
- doDaringMorph = stringTo<bool>( value );
- if ( doDaringMorph ){
- doMorph = true;
- }
- }
- if ( Opts.find( 'e', value, mood)) {
- encoding = value;
+ if ( Opts.extract( "daring" ) ) {
+ options.doDaringMorph = true;
+ options.doMorph = true;
}
+ Opts.extract( 'e', options.encoding );
- if ( Opts.find( "max-parser-tokens", value ) ){
- if ( value.empty() ){
- *Log(theErrLog) << "max-parser-tokens option without value " << endl;
+ if ( Opts.extract( "max-parser-tokens", value ) ){
+ if ( !stringTo<unsigned int>( value, options.maxParserTokens ) ){
+ *Log(theErrLog) << "max-parser-tokens value should be an integer" << endl;
return false;
}
- else {
- if ( !stringTo<unsigned int>( value, maxParserTokens ) ){
- *Log(theErrLog) << "max-parser-tokens value should be an integer" << endl;
- return false;
- }
- }
- Opts.remove("max-parser-tokens");
}
- if ( Opts.find ('S', value, mood)) {
- doServer = true;
- listenport= value;
- }
+ options.doServer = Opts.extract('S', options.listenport );
+
#ifdef HAVE_OPENMP
- if ( doServer ) {
+ if ( options.doServer ) {
// run in one thread in server mode, forking is too expensive for lots of small snippets
- omp_set_num_threads( 1 );
+ options.numThreads = 1;
}
- else if ( Opts.find( "threads", value ) ){
- if ( value.empty() ){
- *Log(theErrLog) << "missing a value for --threads (did you forget the '='?)" << endl;
- return false;
- }
+ else if ( Opts.extract( "threads", value ) ){
int num;
if ( !stringTo<int>( value, num ) || num < 1 ){
*Log(theErrLog) << "threads value should be a positive integer" << endl;
return false;
}
- omp_set_num_threads( num );
+ options.numThreads = num;
}
#endif
- if ( Opts.find( "keep-parser-files", value ) ){
- if ( value.empty() ||
- stringTo<bool>( value ) ){
- configuration.setatt( "keepIntermediateFiles", "true", "parser" );
- Opts.remove("keep-parser-files");
- }
+ if ( Opts.extract( "keep-parser-files" ) ){
+ configuration.setatt( "keepIntermediateFiles", "true", "parser" );
}
- tmpDirName = configuration.lookUp( "tmpdir", "global" );
- if ( Opts.find ( "tmpdir", value )) {
- if ( value.empty() ){
- *Log(theErrLog) << "missing a value for --tmpdir (did you forget the '='?)" << endl;
- return false;
- }
- tmpDirName = value;
- Opts.remove("tmpdir");
+ options.tmpDirName = configuration.lookUp( "tmpdir", "global" );
+ Opts.extract( "tmpdir", options.tmpDirName ); // so might be overridden
+ if ( options.tmpDirName.empty() ){
+ options.tmpDirName = "/tmp/";
}
- if ( tmpDirName.empty() ){
- tmpDirName = "/tmp/";
- }
- else if ( tmpDirName[tmpDirName.size()-1] != '/' ){
- tmpDirName += "/";
+ else if ( options.tmpDirName[options.tmpDirName.size()-1] != '/' ){
+ options.tmpDirName += "/";
}
#ifdef HAVE_DIRENT_H
- if ( !tmpDirName.empty() ){
- if ( !existsDir( tmpDirName ) ){
- *Log(theErrLog) << "temporary dir " << tmpDirName << " not readable" << endl;
+ if ( !options.tmpDirName.empty() ){
+ if ( !existsDir( options.tmpDirName ) ){
+ *Log(theErrLog) << "temporary dir " << options.tmpDirName << " not readable" << endl;
return false;
}
- *Log(theErrLog) << "checking tmpdir: " << tmpDirName << " OK" << endl;
+ *Log(theErrLog) << "checking tmpdir: " << options.tmpDirName << " OK" << endl;
}
#endif
string TestFileName;
- if ( Opts.find ( "testdir", value )) {
+ if ( Opts.extract( "testdir", TestFileName ) ) {
#ifdef HAVE_DIRENT_H
- doDirTest = true;
- testDirName = value;
+ options.doDirTest = true;
+ testDirName = TestFileName;
if ( testDirName[testDirName.size()-1] != '/' ){
testDirName += "/";
}
- if ( !testDirName.empty() ){
- if ( !existsDir( testDirName ) ){
- *Log(theErrLog) << "input dir " << testDirName << " not readable" << endl;
- return false;
- }
- }
- else {
- *Log(theErrLog) << "missing a value for --testdir (did you forget the '='?)" << endl;
+ if ( !existsDir( testDirName ) ){
+ *Log(theErrLog) << "input dir " << testDirName << " not readable" << endl;
return false;
}
#else
*Log(theErrLog) << "--testdir option not supported!" << endl;
#endif
- Opts.remove("testdir");
}
- else if ( Opts.find( 't', value, mood )) {
- TestFileName = value;
- ifstream is( value.c_str() );
+ else if ( Opts.extract( 't', TestFileName ) ) {
+ ifstream is( TestFileName );
if ( !is ){
- *Log(theErrLog) << "input stream " << value << " is not readable" << endl;
+ *Log(theErrLog) << "input stream " << TestFileName << " is not readable" << endl;
return false;
}
- Opts.remove('t');
};
wantOUT = false;
- if ( Opts.find( "outputdir", value )) {
- outputDirName = value;
+ if ( Opts.extract( "outputdir", outputDirName )) {
if ( outputDirName[outputDirName.size()-1] != '/' ){
outputDirName += "/";
}
#ifdef HAVE_DIRENT_H
- if ( !outputDirName.empty() ){
- if ( !existsDir( outputDirName ) ){
- *Log(theErrLog) << "output dir " << outputDirName << " not readable" << endl;
- return false;
- }
- }
- else {
- *Log(theErrLog) << "missing a value for --outputdir (did you forget the '='?)" << endl;
+ if ( !existsDir( outputDirName ) ){
+ *Log(theErrLog) << "output dir " << outputDirName << " not readable" << endl;
return false;
}
#endif
wantOUT = true;
- Opts.remove( "outputdir");
}
- else if ( Opts.find ('o', value, mood)) {
+ else if ( Opts.extract( 'o', outputFileName ) ){
wantOUT = true;
- outputFileName = value;
- Opts.remove('o');
};
- doXMLout = false;
- if ( Opts.find ( "id", value )) {
- if ( value.empty() ){
- *Log(theErrLog) << "missing a value for --id (did you forget the '='?)" << endl;
- return false;
- }
- docid = value;
- Opts.remove( "id");
- }
- if ( Opts.find( "xmldir", value )) {
- xmlDirName = value;
+ options.doXMLout = false;
+ Opts.extract( "id", options.docid );
+ if ( Opts.extract( "xmldir", xmlDirName ) ){
if ( xmlDirName[xmlDirName.size()-1] != '/' ){
xmlDirName += "/";
}
#ifdef HAVE_DIRENT_H
- if ( !xmlDirName.empty() ){
- if ( !existsDir( xmlDirName ) ){
- *Log(theErrLog) << "XML output dir " << xmlDirName << " not readable" << endl;
- return false;
- }
- }
- else {
- *Log(theErrLog) << "missing a value for --xmldir (did you forget the '='?)" << endl;
+ if ( !existsDir( xmlDirName ) ){
+ *Log(theErrLog) << "XML output dir " << xmlDirName << " not readable" << endl;
return false;
}
#endif
- doXMLout = true;
- Opts.remove( "xmldir");
- }
- else if ( Opts.find ('X', value, mood)) {
- doXMLout = true;
- XMLoutFileName = value;
- Opts.remove('X');
+ options.doXMLout = true;
}
- if ( Opts.find ("KANON", value ) ){
- doKanon = true;
- Opts.remove( "KANON" );
+ else if ( Opts.extract('X', XMLoutFileName ) ){
+ options.doXMLout = true;
}
- doXMLin = false;
- if ( Opts.find ('x', value, mood)) {
- doXMLin = true;
+
+ options.doKanon = Opts.extract("KANON");
+
+ options.doXMLin = false;
+ if ( Opts.extract ('x', value ) ){
+ options.doXMLin = true;
if ( !value.empty() ){
if ( ! (xmlDirName.empty() &&
testDirName.empty() &&
@@ -499,19 +361,12 @@ bool parse_args( TiCC::CL_Options& Opts ) {
return false;
}
}
- Opts.remove('x');
}
- if ( Opts.find ( "textclass", value )) {
- if ( !doXMLin ){
+ if ( Opts.extract( "textclass", options.textclass ) ){
+ if ( !options.doXMLin ){
*Log(theErrLog) << "--textclass is only valid when -x is also present" << endl;
return false;
}
- if ( value.empty() ){
- *Log(theErrLog) << "missing a value for --textclass (did you forget the '='?)" << endl;
- return false;
- }
- textclass = value;
- Opts.remove( "textclass");
}
if ( !XMLoutFileName.empty() && !testDirName.empty() ){
@@ -519,24 +374,18 @@ bool parse_args( TiCC::CL_Options& Opts ) {
return false;
}
- if ( Opts.find ("uttmarker", value )) {
- if ( value.empty() ){
- *Log(theErrLog) << "missing a value for --uttmarker (did you forget the '='?)" << endl;
- return false;
- }
- uttmark = value;
- }
+ Opts.extract ("uttmarker", options.uttmark );
if ( !outputDirName.empty() && testDirName.empty() ){
*Log(theErrLog) << "useless -outputdir option" << endl;
return false;
}
if ( !testDirName.empty() ){
- if ( doXMLin )
+ if ( options.doXMLin )
getFileNames( testDirName, ".xml", fileNames );
else
getFileNames( testDirName, "", fileNames );
if ( fileNames.empty() ){
- *Log(theErrLog) << "error: couln't find any files in directory: "
+ *Log(theErrLog) << "error: couldn't find any files in directory: "
<< testDirName << endl;
return false;
}
@@ -559,998 +408,147 @@ bool parse_args( TiCC::CL_Options& Opts ) {
return false;
}
}
+ if ( !Opts.empty() ){
+ *Log(theErrLog) << "unhandled commandline options: " << Opts.toString() << endl;
- return true;
-}
-
-bool froginit(){
- // for some modules init can take a long time
- // so first make sure it will not fail on some trivialities
- //
- if ( doTok && !configuration.hasSection("tokenizer") ){
- *Log(theErrLog) << "Missing [[tokenizer]] section in config file." << endl;
- return false;
- }
- if ( doIOB && !configuration.hasSection("IOB") ){
- *Log(theErrLog) << "Missing [[IOB]] section in config file." << endl;
- return false;
- }
- if ( doNER && !configuration.hasSection("NER") ){
- *Log(theErrLog) << "Missing [[NER]] section in config file." << endl;
return false;
}
- if ( doMwu ){
- if ( !configuration.hasSection("mwu") ){
- *Log(theErrLog) << "Missing [[mwu]] section in config file." << endl;
- return false;
- }
- }
- else if ( doParse ){
- *Log(theErrLog) << " Parser disabled, because MWU is deselected" << endl;
- doParse = false;
- }
-
- if ( doServer ){
- // we use fork(). omp (GCC version) doesn't do well when omp is used
- // before the fork!
- // see: http://bisqwit.iki.fi/story/howto/openmp/#OpenmpAndFork
- bool stat = tokenizer.init( configuration, docid, !doTok );
- if ( stat ){
- tokenizer.setSentencePerLineInput( doSentencePerLine );
- tokenizer.setQuoteDetection( doQuoteDetection );
- tokenizer.setInputEncoding( encoding );
- tokenizer.setInputXml( doXMLin );
- tokenizer.setUttMarker( uttmark );
- tokenizer.setTextClass( textclass );
- stat = myCGNTagger.init( configuration );
- if ( stat ){
- if ( doIOB ){
- stat = myIOBTagger.init( configuration );
- }
- if ( stat && doNER ){
- stat = myNERTagger.init( configuration );
- }
- if ( stat && doLemma ){
- stat = myMblem.init( configuration );
- }
- if ( stat && doMorph ){
- stat = myMbma.init( configuration );
- if ( stat ) {
- if ( doDaringMorph )
- myMbma.setDaring(true);
- if ( doMwu ){
- stat = myMwu.init( configuration );
- if ( stat && doParse ){
- stat = myParser.init( configuration );
- }
- }
- }
- }
- }
- }
- if ( !stat ){
- *Log(theErrLog) << "Initialization failed." << endl;
- return false;
- }
- }
- else {
- bool tokStat = true;
- bool lemStat = true;
- bool mwuStat = true;
- bool mbaStat = true;
- bool parStat = true;
- bool tagStat = true;
- bool iobStat = true;
- bool nerStat = true;
-
-#pragma omp parallel sections
- {
-#pragma omp section
- {
- tokStat = tokenizer.init( configuration, docid, !doTok );
- if ( tokStat ){
- tokenizer.setSentencePerLineInput( doSentencePerLine );
- tokenizer.setQuoteDetection( doQuoteDetection );
- tokenizer.setInputEncoding( encoding );
- tokenizer.setInputXml( doXMLin );
- tokenizer.setUttMarker( uttmark );
- tokenizer.setTextClass( textclass );
- }
- }
-#pragma omp section
- {
- if ( doLemma ){
- lemStat = myMblem.init( configuration );
- }
- }
-#pragma omp section
- {
- if ( doMorph ){
- mbaStat = myMbma.init( configuration );
- if ( doDaringMorph )
- myMbma.setDaring(true);
- }
- }
-#pragma omp section
- {
- tagStat = myCGNTagger.init( configuration );
- }
-#pragma omp section
- {
- if ( doIOB ){
- iobStat = myIOBTagger.init( configuration );
- }
- }
-#pragma omp section
- {
- if ( doNER ){
- nerStat = myNERTagger.init( configuration );
- }
- }
-#pragma omp section
- {
- if ( doMwu ){
- mwuStat = myMwu.init( configuration );
- if ( mwuStat && doParse ){
- Timer initTimer;
- initTimer.start();
- parStat = myParser.init( configuration );
- initTimer.stop();
- *Log(theErrLog) << "init Parse took: " << initTimer << endl;
- }
- }
- }
- } // end omp parallel sections
- if ( ! ( tokStat && iobStat && nerStat && tagStat && lemStat
- && mbaStat && mwuStat && parStat ) ){
- *Log(theErrLog) << "Initialization failed for: ";
- if ( ! ( tokStat ) ){
- *Log(theErrLog) << "[tokenizer] ";
- }
- if ( ! ( tagStat ) ){
- *Log(theErrLog) << "[tagger] ";
- }
- if ( ! ( iobStat ) ){
- *Log(theErrLog) << "[IOB] ";
- }
- if ( ! ( nerStat ) ){
- *Log(theErrLog) << "[NER] ";
- }
- if ( ! ( lemStat ) ){
- *Log(theErrLog) << "[lemmatizer] ";
- }
- if ( ! ( mbaStat ) ){
- *Log(theErrLog) << "[morphology] ";
- }
- if ( ! ( mwuStat ) ){
- *Log(theErrLog) << "[multiword unit] ";
- }
- if ( ! ( parStat ) ){
- *Log(theErrLog) << "[parser] ";
- }
- *Log(theErrLog) << endl;
- return false;
- }
- }
- *Log(theErrLog) << "Initialization done." << endl;
return true;
}
-vector<Word*> lookup( Word *word, const vector<Entity*>& entities ){
- vector<Word*> vec;
- for ( size_t p=0; p < entities.size(); ++p ){
- vec = entities[p]->select<Word>();
- if ( !vec.empty() ){
- if ( vec[0]->id() == word->id() ) {
- return vec;
- }
- }
- }
- vec.clear();
- vec.push_back( word ); // single unit
- return vec;
-}
-
-Dependency *lookupDep( const Word *word,
- const vector<Dependency*>&dependencies ){
- if (dependencies.size() == 0 ){
- return 0;
- }
- int dbFlag = stringTo<int>( configuration.lookUp( "debug", "parser" ) );
- if ( dbFlag ){
- using TiCC::operator<<;
- *Log( theErrLog ) << "\nDependency-lookup "<< word << " in " << dependencies << endl;
- }
- for ( size_t i=0; i < dependencies.size(); ++i ){
- if ( dbFlag ){
- *Log( theErrLog ) << "Dependency try: " << dependencies[i] << endl;
- }
- try {
- vector<DependencyDependent*> dv = dependencies[i]->select<DependencyDependent>();
- if ( !dv.empty() ){
- vector<Word*> v = dv[0]->select<Word>();
- for ( size_t j=0; j < v.size(); ++j ){
- if ( v[j] == word ){
- if ( dbFlag ){
- *Log(theErrLog) << "\nDependency found word " << v[j] << endl;
- }
- return dependencies[i];
- }
- }
- }
- }
- catch ( exception& e ){
- if (dbFlag > 0)
- *Log(theErrLog) << "get Dependency results failed: "
- << e.what() << endl;
- }
- }
- return 0;
-}
-
-string lookupNEREntity( const vector<Word *>& mwu,
- const vector<Entity*>& entities ){
- string endresult;
- int dbFlag = stringTo<int>( configuration.lookUp( "debug", "NER" ) );
- for ( size_t j=0; j < mwu.size(); ++j ){
- if ( dbFlag ){
- using TiCC::operator<<;
- *Log(theErrLog) << "\nNER: lookup "<< mwu[j] << " in " << entities << endl;
- }
- string result;
- for ( size_t i=0; i < entities.size(); ++i ){
- if ( dbFlag ){
- *Log(theErrLog) << "NER try: " << entities[i] << endl;
- }
- try {
- vector<Word*> v = entities[i]->select<Word>();
- bool first = true;
- for ( size_t k=0; k < v.size(); ++k ){
- if ( v[k] == mwu[j] ){
- if (dbFlag){
- *Log(theErrLog) << "NER found word " << v[k] << endl;
- }
- if ( first )
- result += "B-" + uppercase(entities[i]->cls());
- else
- result += "I-" + uppercase(entities[i]->cls());
- break;
- }
- else
- first = false;
- }
- }
- catch ( exception& e ){
- if (dbFlag > 0)
- *Log(theErrLog) << "get NER results failed: "
- << e.what() << endl;
- }
- }
- if ( result.empty() )
- endresult += "O";
- else
- endresult += result;
- if ( j < mwu.size()-1 )
- endresult += "_";
- }
- return endresult;
-}
-
-
-string lookupIOBChunk( const vector<Word *>& mwu,
- const vector<Chunk*>& chunks ){
- string endresult;
- int dbFlag = stringTo<int>( configuration.lookUp( "debug", "IOB" ) );
- for ( size_t j=0; j < mwu.size(); ++j ){
- if ( dbFlag ){
- using TiCC::operator<<;
- *Log(theErrLog) << "IOB lookup "<< mwu[j] << " in " << chunks << endl;
- }
- string result;
- for ( size_t i=0; i < chunks.size(); ++i ){
- if ( dbFlag ){
- *Log(theErrLog) << "IOB try: " << chunks[i] << endl;
- }
- try {
- vector<Word*> v = chunks[i]->select<Word>();
- bool first = true;
- for ( size_t k=0; k < v.size(); ++k ){
- if ( v[k] == mwu[j] ){
- if (dbFlag){
- *Log(theErrLog) << "IOB found word " << v[k] << endl;
- }
- if ( first )
- result += "B-" + chunks[i]->cls();
- else
- result += "I-" + chunks[i]->cls();
- break;
- }
- else
- first = false;
- }
- }
- catch ( exception& e ){
- if (dbFlag > 0)
- *Log(theErrLog) << "get Chunks results failed: "
- << e.what() << endl;
- }
- }
- if ( result.empty() )
- endresult += "O";
- else
- endresult += result;
- if ( j < mwu.size()-1 )
- endresult += "_";
- }
- return endresult;
-}
-
-void displayMWU( ostream& os, size_t index,
- const vector<Word*> mwu ){
- string wrd;
- string pos;
- string lemma;
- string morph;
- double conf = 1;
- for ( size_t p=0; p < mwu.size(); ++p ){
- Word *word = mwu[p];
- try {
- wrd += word->str();
- PosAnnotation *postag = word->annotation<PosAnnotation>( );
- pos += postag->cls();
- if ( p < mwu.size() -1 ){
- wrd += "_";
- pos += "_";
- }
- conf *= postag->confidence();
- }
- catch ( exception& e ){
- if (debugFlag > 0)
- *Log(theErrLog) << "get Postag results failed: "
- << e.what() << endl;
- }
- if ( doLemma ){
- try {
- lemma += word->lemma();
- if ( p < mwu.size() -1 ){
- lemma += "_";
- }
- }
- catch ( exception& e ){
- if (debugFlag > 0)
- *Log(theErrLog) << "get Lemma results failed: "
- << e.what() << endl;
- }
- }
- if ( doDaringMorph ){
- try {
- vector<MorphologyLayer*> ml = word->annotations<MorphologyLayer>();
- for ( size_t q=0; q < ml.size(); ++q ){
- vector<Morpheme*> m = ml[q]->select<Morpheme>( false );
- assert( m.size() == 1 ); // top complex layer
- string desc = m[0]->description();
- morph = desc;
- if ( q < ml.size()-1 )
- morph += "/";
- }
- if ( p < mwu.size() -1 ){
- morph += "_";
- }
- }
- catch ( exception& e ){
- if (debugFlag > 0)
- *Log(theErrLog) << "get Morph results failed: "
- << e.what() << endl;
- }
- }
- else if ( doMorph ){
- try {
- vector<MorphologyLayer*> ml = word->annotations<MorphologyLayer>();
- for ( size_t q=0; q < ml.size(); ++q ){
- vector<Morpheme*> m = ml[q]->select<Morpheme>();
- for ( size_t t=0; t < m.size(); ++t ){
- string txt = UnicodeToUTF8( m[t]->text() );
- morph += "[" + txt + "]";
- }
- if ( q < ml.size()-1 )
- morph += "/";
- }
- if ( p < mwu.size() -1 ){
- morph += "_";
- }
- }
- catch ( exception& e ){
- if (debugFlag > 0)
- *Log(theErrLog) << "get Morph results failed: "
- << e.what() << endl;
- }
- }
- }
- os << index << "\t" << wrd << "\t" << lemma << "\t" << morph << "\t" << pos << "\t" << std::fixed << conf;
-}
-
-ostream &showResults( ostream& os,
- const Sentence* sentence,
- bool showParse ){
- vector<Word*> words = sentence->words();
- vector<Entity*> mwu_entities = sentence->select<Entity>( myMwu.getTagset() );
- vector<Dependency*> dependencies = sentence->select<Dependency>();
- vector<Chunk*> iob_chunking = sentence->select<Chunk>();
- vector<Entity*> ner_entities = sentence->select<Entity>( myNERTagger.getTagset() );
- static set<ElementType> excludeSet;
- vector<Sentence*> parts = sentence->select<Sentence>( excludeSet );
- if ( !doQuoteDetection )
- assert( parts.size() == 0 );
- for ( size_t i=0; i < parts.size(); ++i ){
- vector<Entity*> ents = parts[i]->select<Entity>( myMwu.getTagset() );
- mwu_entities.insert( mwu_entities.end(), ents.begin(), ents.end() );
- vector<Dependency*> deps = parts[i]->select<Dependency>();
- dependencies.insert( dependencies.end(), deps.begin(), deps.end() );
- vector<Chunk*> chunks = parts[i]->select<Chunk>();
- iob_chunking.insert( iob_chunking.end(), chunks.begin(), chunks.end() );
- vector<Entity*> ners = parts[i]->select<Entity>( myNERTagger.getTagset() );
- ner_entities.insert( ner_entities.end(), ners.begin(), ners.end() );
- }
-
- size_t index = 1;
- map<FoliaElement*, int> enumeration;
- vector<vector<Word*> > mwus;
- for( size_t i=0; i < words.size(); ++i ){
- Word *word = words[i];
- vector<Word*> mwu = lookup( word, mwu_entities );
- for ( size_t j=0; j < mwu.size(); ++j ){
- enumeration[mwu[j]] = index;
- }
- mwus.push_back( mwu );
- i += mwu.size()-1;
- ++index;
- }
- for( size_t i=0; i < mwus.size(); ++i ){
- displayMWU( os, i+1, mwus[i] );
- if ( doNER ){
- string cls;
- string s = lookupNEREntity( mwus[i], ner_entities );
- os << "\t" << s;
- }
- else {
- os << "\t\t";
- }
- if ( doIOB ){
- string cls;
- string s = lookupIOBChunk( mwus[i], iob_chunking );
- os << "\t" << s;
- }
- else {
- os << "\t\t";
- }
- if ( showParse ){
- string cls;
- Dependency *dep = lookupDep( mwus[i][0], dependencies );
- if ( dep ){
- vector<Headwords*> w = dep->select<Headwords>();
- size_t num;
- if ( w[0]->index(0)->isinstance( PlaceHolder_t ) ){
- string indexS = w[0]->index(0)->str();
- FoliaElement *pnt = w[0]->index(0)->doc()->index(indexS);
- num = enumeration.find(pnt->index(0))->second;
- }
- else {
- num = enumeration.find(w[0]->index(0))->second;
- }
- os << "\t" << num << "\t" << dep->cls();
- }
- else {
- os << "\t"<< 0 << "\tROOT";
- }
- }
- else {
- os << "\t\t";
- }
- os << endl;
- ++index;
- }
- if ( words.size() )
- os << endl;
- return os;
-}
-
-bool TestSentence( Sentence* sent,
- TimerBlock& timers ){
- vector<Word*> swords;
- if ( doQuoteDetection )
- swords = sent->wordParts();
- else
- swords = sent->words();
- bool showParse = doParse;
- if ( !swords.empty() ) {
-#pragma omp parallel sections
- {
-#pragma omp section
- {
- timers.tagTimer.start();
- myCGNTagger.Classify( swords );
- timers.tagTimer.stop();
- }
-#pragma omp section
- {
- if ( doIOB ){
- timers.iobTimer.start();
- myIOBTagger.Classify( swords );
- timers.iobTimer.stop();
- }
- }
-#pragma omp section
- {
- if ( doNER ){
- timers.nerTimer.start();
- myNERTagger.Classify( swords );
- timers.nerTimer.stop();
- }
- }
- } // parallel sections
- for ( size_t i = 0; i < swords.size(); ++i ) {
-#pragma omp parallel sections
- {
-#pragma omp section
- {
- if ( doMorph ){
- timers.mbmaTimer.start();
- if (debugFlag)
- *Log(theErrLog) << "Calling mbma..." << endl;
- myMbma.Classify( swords[i] );
- timers.mbmaTimer.stop();
- }
- }
-#pragma omp section
- {
- if ( doLemma ){
- timers.mblemTimer.start();
- if (debugFlag)
- *Log(theErrLog) << "Calling mblem..." << endl;
- myMblem.Classify( swords[i] );
- timers.mblemTimer.stop();
- }
- }
- } // omp parallel sections
- } //for int i = 0 to num_words
-
- if ( doMwu ){
- if ( swords.size() > 0 ){
- timers.mwuTimer.start();
- myMwu.Classify( swords );
- timers.mwuTimer.stop();
- }
- }
- if ( doParse ){
- if ( maxParserTokens != 0 && swords.size() > maxParserTokens ){
- showParse = false;
- }
- else {
- myParser.Parse( swords, myMwu.getTagset(), tmpDirName, timers );
- }
- }
- }
- return showParse;
-}
-
-void Test( Document& doc,
- ostream& outStream,
- bool interactive = false,
- const string& xmlOutFile = "" ) {
- TimerBlock timers;
- timers.frogTimer.start();
- // first we make sure that the doc will accept our annotations, by
- // declaring them in the doc
- myCGNTagger.addDeclaration( doc );
- if ( doLemma )
- myMblem.addDeclaration( doc );
- if ( doMorph )
- myMbma.addDeclaration( doc );
- if (doIOB)
- myIOBTagger.addDeclaration( doc );
- if (doNER)
- myNERTagger.addDeclaration( doc );
- if (doMwu)
- myMwu.addDeclaration( doc );
- if (doParse)
- myParser.addDeclaration( doc );
-
- if ( debugFlag > 5 )
- *Log(theErrLog) << "Testing document :" << doc << endl;
-
- vector<Sentence*> topsentences = doc.sentences();
- vector<Sentence*> sentences;
- if ( doQuoteDetection )
- sentences = doc.sentenceParts();
- else
- sentences = topsentences;
- size_t numS = sentences.size();
- if ( numS > 0 ) { //process sentences
- if (debugFlag > 0)
- *Log(theErrLog) << "found " << numS << " sentence(s) in document." << endl;
- for ( size_t i = 0; i < numS; i++) {
- /* ******* Begin process sentence ********** */
- //NOTE- full sentences are passed (which may span multiple lines) (MvG)
- bool showParse = TestSentence( sentences[i], timers );
- if ( doParse && !showParse ){
- *Log(theErrLog) << "WARNING!" << endl;
- *Log(theErrLog) << "Sentence " << i+1 << " isn't parsed because it contains more tokens then set with the --max-parser-tokens=" << maxParserTokens << " option." << endl;
- }
- }
- for ( size_t i = 0; i < topsentences.size(); ++i ) {
- if ( !(doServer && doXMLout) )
- showResults( outStream, topsentences[i], doParse );
- }
- }
- else {
- if (debugFlag > 0)
- *Log(theErrLog) << "No sentences found in document. " << endl;
- }
- if ( doServer && doXMLout )
- outStream << doc << endl;
- if ( !xmlOutFile.empty() ){
- doc.save( xmlOutFile, doKanon );
- *Log(theErrLog) << "resulting FoLiA doc saved in " << xmlOutFile << endl;
- }
-
- timers.frogTimer.stop();
- if ( !interactive ){
- *Log(theErrLog) << "tokenisation took: " << timers.tokTimer << endl;
- *Log(theErrLog) << "CGN tagging took: " << timers.tagTimer << endl;
- if ( doIOB)
- *Log(theErrLog) << "IOB chunking took: " << timers.iobTimer << endl;
- if ( doNER)
- *Log(theErrLog) << "NER took: " << timers.nerTimer << endl;
- if ( doMorph )
- *Log(theErrLog) << "MBA took: " << timers.mbmaTimer << endl;
- if ( doLemma )
- *Log(theErrLog) << "Mblem took: " << timers.mblemTimer << endl;
- if ( doMwu )
- *Log(theErrLog) << "MWU resolving took: " << timers.mwuTimer << endl;
- if ( doParse ){
- *Log(theErrLog) << "Parsing (prepare) took: " << timers.prepareTimer << endl;
- *Log(theErrLog) << "Parsing (pairs) took: " << timers.pairsTimer << endl;
- *Log(theErrLog) << "Parsing (rels) took: " << timers.relsTimer << endl;
- *Log(theErrLog) << "Parsing (dir) took: " << timers.dirTimer << endl;
- *Log(theErrLog) << "Parsing (csi) took: " << timers.csiTimer << endl;
- *Log(theErrLog) << "Parsing (total) took: " << timers.parseTimer << endl;
- }
- }
- *Log(theErrLog) << "Frogging in total took: " << timers.frogTimer << endl;
- return;
-}
-
-void Test( const string& infilename,
- ostream &os,
- const string& xmlOutF ) {
- // stuff the whole input into one FoLiA document.
- // This is not a good idea on the long term, I think (agreed [proycon] )
-
- string xmlOutFile = xmlOutF;
- if ( doXMLin && !xmlOutFile.empty() ){
- if ( match_back( infilename, ".gz" ) ){
- if ( !match_back( xmlOutFile, ".gz" ) )
- xmlOutFile += ".gz";
- }
- else if ( match_back( infilename, ".bz2" ) ){
- if ( !match_back( xmlOutFile, ".bz2" ) )
- xmlOutFile += ".bz2";
- }
- }
- if ( doXMLin ){
- Document doc;
- try {
- doc.readFromFile( infilename );
- }
- catch ( exception &e ){
- *Log(theErrLog) << "retrieving FoLiA from '" << infilename << "' failed with exception:" << endl;
- cerr << e.what() << endl;
- return;
- }
- tokenizer.tokenize( doc );
- Test( doc, os, false, xmlOutFile );
- }
- else {
- ifstream IN( infilename.c_str() );
- Document doc = tokenizer.tokenize( IN );
- Test( doc, os, false, xmlOutFile );
- }
-}
-
-void TestServer( Sockets::ServerSocket &conn) {
- //by Maarten van Gompel
-
- try {
- while (true) {
- ostringstream outputstream;
- if ( doXMLin ){
- string result;
- string s;
- while ( conn.read(s) ){
- result += s + "\n";
- if ( s.empty() )
- break;
- }
- if ( result.size() < 50 ){
- // a FoLia doc must be at least a few 100 bytes
- // so this is wrong. Just bail out
- throw( runtime_error( "read garbage" ) );
- }
- if ( debugFlag )
- *Log(theErrLog) << "received data [" << result << "]" << endl;
- Document doc;
- try {
- doc.readFromString( result );
- }
- catch ( std::exception& e ){
- *Log(theErrLog) << "FoLiaParsing failed:" << endl
- << e.what() << endl;
- throw;
- }
- *Log(theErrLog) << "Processing... " << endl;
- tokenizer.tokenize( doc );
- Test( doc, outputstream );
- }
- else {
- string data = "";
- if ( doSentencePerLine ){
- if ( !conn.read( data ) ) //read data from client
- throw( runtime_error( "read failed" ) );
- }
- else {
- string line;
- while( conn.read(line) ){
- if ( line == "EOT" )
- break;
- data += line + "\n";
- }
- }
- if (debugFlag)
- *Log(theErrLog) << "Received: [" << data << "]" << endl;
- *Log(theErrLog) << "Processing... " << endl;
- istringstream inputstream(data,istringstream::in);
- Document doc = tokenizer.tokenize( inputstream );
- Test( doc, outputstream );
- }
- if (!conn.write( (outputstream.str()) ) || !(conn.write("READY\n")) ){
- if (debugFlag)
- *Log(theErrLog) << "socket " << conn.getMessage() << endl;
- throw( runtime_error( "write to client failed" ) );
- }
-
- }
- }
- catch ( std::exception& e ) {
- if (debugFlag)
- *Log(theErrLog) << "connection lost: " << e.what() << endl;
- }
- *Log(theErrLog) << "Connection closed.\n";
-}
-
-#ifdef HAVE_LIBREADLINE
-void TestInteractive(){
- const char *prompt = "frog> ";
- string line;
- bool eof = false;
- while ( !eof ){
- string data;
- char *input = readline( prompt );
- if ( !input ){
- eof = true;
- break;
- }
- line = input;
- if ( doSentencePerLine ){
- if ( line.empty() ){
- continue;
- }
- else {
- data += line + "\n";
- add_history( input );
- }
- }
- else {
- if ( !line.empty() ){
- add_history( input );
- data = line + "\n";
- }
- while ( !eof ){
- char *input = readline( prompt );
- if ( !input ){
- eof = true;
- break;
- }
- line = input;
- if ( line.empty() ){
- break;
- }
- add_history( input );
- data += line + "\n";
- }
- }
- if ( !data.empty() ){
- if ( data[data.size()-1] == '\n' ){
- data = data.substr( 0, data.size()-1 );
- }
- cout << "Processing... '" << data << "'" << endl;
- istringstream inputstream(data,istringstream::in);
- Document doc = tokenizer.tokenize( inputstream );
- Test( doc, cout, true );
- }
- }
- cout << "Done.\n";
-}
-
-#else
-void TestInteractive(){
- cout << "frog>"; cout.flush();
- string line;
- string data;
- while ( getline( cin, line ) ){
- string data = line;
- if ( doSentencePerLine ){
- if ( line.empty() ){
- cout << "frog>"; cout.flush();
- continue;
- }
- }
- else {
- if ( !line.empty() ){
- data += "\n";
- }
- cout << "frog>"; cout.flush();
- string line2;
- while( getline( cin, line2 ) ){
- if ( line2.empty() )
- break;
- data += line2 + "\n";
- cout << "frog>"; cout.flush();
- }
- }
- if ( data.empty() ){
- cout << "ignoring empty input" << endl;
- cout << "frog>"; cout.flush();
- continue;
- }
- cout << "Processing... " << endl;
- istringstream inputstream(data,istringstream::in);
- Document doc = tokenizer.tokenize( inputstream );
- Test( doc, cout, true );
- cout << "frog>"; cout.flush();
- }
- cout << "Done.\n";
-}
-#endif
int main(int argc, char *argv[]) {
cerr << "frog " << VERSION << " (c) ILK 1998 - 2014" << endl;
cerr << "Induction of Linguistic Knowledge Research Group, Tilburg University" << endl;
- ProgName = argv[0];
cerr << "based on [" << Tokenizer::VersionName() << ", "
<< folia::VersionName() << ", "
<< Timbl::VersionName() << ", "
<< TimblServer::VersionName() << ", "
<< Tagger::VersionName() << "]" << endl;
- // cout << "configdir: " << configDir << endl;
+ LogStream *theErrLog = new LogStream( cerr, "frog-", StampMessage );
std::ios_base::sync_with_stdio(false);
+ FrogOptions options;
+
try {
- TiCC::CL_Options Opts("c:e:o:t:x:X:nQhVd:S:",
+ TiCC::CL_Options Opts("c:e:o:t:x::X::nQhVd:S:",
"textclass:,testdir:,uttmarker:,max-parser-tokens:,"
- "skip:,id:,outputdir:,xmldir:,tmpdir:,daring,debug:,"
- "keep-parser-files:,version,threads:,KANON");
+ "skip:,id:,outputdir:,xmldir:,tmpdir:,daring,"
+ "debug:,keep-parser-files,version,threads:,KANON");
Opts.init(argc, argv);
- if ( parse_args(Opts) ){
- if ( !froginit() ){
- throw runtime_error( "init failed" );
- }
- if ( !fileNames.empty() ) {
- string outPath = outputDirName;
- string xmlPath = xmlDirName;
- set<string>::const_iterator it = fileNames.begin();
- ostream *outS = 0;
- if ( !outputFileName.empty() ){
- outS = new ofstream( outputFileName.c_str() );
+ bool parsed = parse_args( Opts, options, theErrLog );
+ if (!parsed) {
+ throw runtime_error( "init failed" );
+ }
+ FrogAPI frog( options, configuration, theErrLog );
+
+ if ( !fileNames.empty() ) {
+ string outPath = outputDirName;
+ string xmlPath = xmlDirName;
+ set<string>::const_iterator it = fileNames.begin();
+ ostream *outS = 0;
+ if ( !outputFileName.empty() ){
+ outS = new ofstream( outputFileName.c_str() );
+ }
+ while ( it != fileNames.end() ){
+ string testName = testDirName;
+ testName += *it;
+ if ( !TiCC::isFile( testName ) ){
+ *Log(theErrLog) << "skip " << testName << " (file not found )"
+ << endl;
+ ++it;
+ continue;
}
- while ( it != fileNames.end() ){
- string testName = testDirName;
- testName += *it;
- string outName;
- if ( outS == 0 ){
- if ( wantOUT ){
- if ( doXMLin ){
- if ( !outPath.empty() )
- outName = outPath + *it + ".out";
- }
- else
+ string outName;
+ if ( outS == 0 ){
+ if ( wantOUT ){
+ if ( options.doXMLin ){
+ if ( !outPath.empty() )
outName = outPath + *it + ".out";
- outS = new ofstream( outName.c_str() );
}
else {
- outS = &cout;
- }
- }
- string xmlName = XMLoutFileName;
- if ( xmlName.empty() ){
- if ( !xmlDirName.empty() ){
- if ( it->rfind(".xml") == string::npos )
- xmlName = xmlPath + *it + ".xml";
- else
- xmlName = xmlPath + *it;
+ outName = outPath + *it + ".out";
}
- else if ( doXMLout )
- xmlName = *it + ".xml"; // do not clobber the inputdir!
+ outS = new ofstream( outName.c_str() );
+ } else {
+ outS = &cout;
}
- *Log(theErrLog) << "Frogging " << testName << endl;
- Test( testName, *outS, xmlName );
- if ( !outName.empty() ){
- *Log(theErrLog) << "results stored in " << outName << endl;
- delete outS;
- outS = 0;
+ }
+ string xmlName = XMLoutFileName;
+ if ( xmlName.empty() ){
+ if ( !xmlDirName.empty() ){
+ if ( it->rfind(".xml") == string::npos )
+ xmlName = xmlPath + *it + ".xml";
+ else
+ xmlName = xmlPath + *it;
}
- ++it;
+ else if ( options.doXMLout )
+ xmlName = *it + ".xml"; // do not clobber the inputdir!
}
- if ( !outputFileName.empty() ){
- *Log(theErrLog) << "results stored in " << outputFileName << endl;
+ *Log(theErrLog) << "Frogging " << testName << endl;
+ frog.FrogFile( testName, *outS, xmlName );
+ if ( !outName.empty() ){
+ *Log(theErrLog) << "results stored in " << outName << endl;
delete outS;
+ outS = 0;
}
+ ++it;
}
- else if ( doServer ) {
- //first set up some things to deal with zombies
- struct sigaction action;
- action.sa_handler = SIG_IGN;
- sigemptyset(&action.sa_mask);
+ if ( !outputFileName.empty() ){
+ *Log(theErrLog) << "results stored in " << outputFileName << endl;
+ delete outS;
+ }
+ }
+ else if ( options.doServer ) {
+ //first set up some things to deal with zombies
+ struct sigaction action;
+ action.sa_handler = SIG_IGN;
+ sigemptyset(&action.sa_mask);
#ifdef SA_NOCLDWAIT
- action.sa_flags = SA_NOCLDWAIT;
+ action.sa_flags = SA_NOCLDWAIT;
#endif
- sigaction(SIGCHLD, &action, NULL);
+ sigaction(SIGCHLD, &action, NULL);
- srand((unsigned)time(0));
+ srand((unsigned)time(0));
- *Log(theErrLog) << "Listening on port " << listenport << "\n";
+ *Log(theErrLog) << "Listening on port " << options.listenport << "\n";
- try
- {
- // Create the socket
- Sockets::ServerSocket server;
- if ( !server.connect( listenport ) )
- throw( runtime_error( "starting server on port " + listenport + " failed" ) );
- if ( !server.listen( 5 ) ) {
- // maximum of 5 pending requests
- throw( runtime_error( "listen(5) failed" ) );
+ try {
+ // Create the socket
+ Sockets::ServerSocket server;
+ if ( !server.connect( options.listenport ) )
+ throw( runtime_error( "starting server on port " + options.listenport + " failed" ) );
+ if ( !server.listen( 5 ) ) {
+ // maximum of 5 pending requests
+ throw( runtime_error( "listen(5) failed" ) );
+ }
+ while ( true ) {
+ Sockets::ServerSocket conn;
+ if ( server.accept( conn ) ){
+ *Log(theErrLog) << "New connection..." << endl;
+ int pid = fork();
+ if (pid < 0) {
+ *Log(theErrLog) << "ERROR on fork" << endl;
+ throw runtime_error( "FORK failed" );
}
- while ( true ) {
-
- Sockets::ServerSocket conn;
- if ( server.accept( conn ) ){
- *Log(theErrLog) << "New connection..." << endl;
- int pid = fork();
- if (pid < 0) {
- *Log(theErrLog) << "ERROR on fork" << endl;
- throw runtime_error( "FORK failed" );
- } else if (pid == 0) {
- // server = NULL;
- TestServer(conn );
- exit(EXIT_SUCCESS);
- }
- }
- else {
- throw( runtime_error( "Accept failed" ) );
- }
+ else if (pid == 0) {
+ frog.FrogServer( conn );
+ exit(EXIT_SUCCESS);
}
- } catch ( std::exception& e )
- {
- *Log(theErrLog) << "Server error:" << e.what() << " Exiting." << endl;
- throw;
}
+ else {
+ throw( runtime_error( "Accept failed" ) );
+ }
+ }
}
- else {
- // interactive mode
- TestInteractive( );
+ catch ( std::exception& e ) {
+ *Log(theErrLog) << "Server error:" << e.what() << " Exiting." << endl;
+ throw;
}
}
else {
- throw runtime_error( "invalid arguments" );
+ // interactive mode
+ frog.FrogInteractive();
}
}
catch ( const exception& e ){
diff --git a/src/FrogAPI.cxx b/src/FrogAPI.cxx
new file mode 100644
index 0000000..c18a938
--- /dev/null
+++ b/src/FrogAPI.cxx
@@ -0,0 +1,1073 @@
+/* ex: set tabstop=8 expandtab: */
+/*
+ $Id: FrogAPI.cxx 17749 2014-10-20 13:16:50Z sloot $
+ $URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/FrogAPI.cxx $
+
+ Copyright (c) 2006 - 2014
+ Tilburg University
+
+ A Tagger-Lemmatizer-Morphological-Analyzer-Dependency-Parser for Dutch
+
+ This file is part of frog
+
+ frog is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ frog is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+ For questions and suggestions, see:
+ http://ilk.uvt.nl/software.html
+ or send mail to:
+ timbl at uvt.nl
+*/
+
+// Python.h seems to best included first. It tramples upon defines like:
+// _XOPEN_SOURCE, _POSIX_C_SOURCE" etc.
+#include "Python.h"
+
+#include <cstdlib>
+#include <cstdio>
+#include <sys/wait.h>
+#include <signal.h>
+#include <string>
+#include <cstring>
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <vector>
+#include "config.h"
+#ifdef HAVE_OPENMP
+#include <omp.h>
+#endif
+
+#ifdef HAVE_LIBREADLINE
+# if defined(HAVE_READLINE_READLINE_H)
+# include <readline/readline.h>
+# elif defined(HAVE_READLINE_H)
+# include <readline.h>
+# else
+# define NO_READLINE
+# endif /* !defined(HAVE_READLINE_H) */
+#else
+# define NO_READLINE
+#endif /* HAVE_LIBREADLINE */
+
+#ifdef HAVE_READLINE_HISTORY
+# if defined(HAVE_READLINE_HISTORY_H)
+# include <readline/history.h>
+# elif defined(HAVE_HISTORY_H)
+# include <history.h>
+# endif /* defined(HAVE_READLINE_HISTORY_H) */
+#endif /* HAVE_READLINE_HISTORY */
+
+
+// individual module headers
+
+#include "frog/FrogAPI.h" //will also include Frog.h (internals), FrogAPI.h is for public interface
+
+using namespace std;
+using namespace folia;
+using namespace TiCC;
+
+string configDir = string(SYSCONF_PATH) + "/" + PACKAGE + "/";
+string configFileName = configDir + "frog.cfg";
+
+string FrogAPI::defaultConfigDir(){ return configDir; }
+string FrogAPI::defaultConfigFile(){ return configFileName; }
+
+FrogOptions::FrogOptions() {
+ doTok = doLemma = doMorph = doMwu = doIOB = doNER = doParse = true;
+ doDaringMorph = false;
+ doSentencePerLine = false;
+ doQuoteDetection = false;
+ doDirTest = false;
+ doServer = false;
+ doXMLin = false;
+ doXMLout = false;
+ doKanon = false;
+ interactive = false;
+
+ maxParserTokens = 0; // 0 for unlimited
+#ifdef HAVE_OPENMP
+ numThreads = omp_get_max_threads();
+#else
+ numThreads = 1;
+#endif
+ listenport = "void";
+ docid = "untitled";
+ tmpDirName = "/tmp/";
+
+ debugFlag = 0;
+}
+
+FrogAPI::FrogAPI( const FrogOptions &opt,
+ const Configuration &conf,
+ LogStream *log ):
+ configuration(conf),
+ options(opt),
+ theErrLog(log),
+ myMbma(0),
+ myMblem(0),
+ myMwu(0),
+ myParser(0),
+ myCGNTagger(0),
+ myIOBTagger(0),
+ myNERTagger(0),
+ tokenizer(0)
+{
+ // for some modules init can take a long time
+ // so first make sure it will not fail on some trivialities
+ //
+ if ( options.doTok && !configuration.hasSection("tokenizer") ){
+ *Log(theErrLog) << "Missing [[tokenizer]] section in config file." << endl;
+ exit(2);
+ }
+ if ( options.doIOB && !configuration.hasSection("IOB") ){
+ *Log(theErrLog) << "Missing [[IOB]] section in config file." << endl;
+ exit(2);
+ }
+ if ( options.doNER && !configuration.hasSection("NER") ){
+ *Log(theErrLog) << "Missing [[NER]] section in config file." << endl;
+ exit(2);
+ }
+ if ( options.doMwu && !configuration.hasSection("mwu") ){
+ *Log(theErrLog) << "Missing [[mwu]] section in config file." << endl;
+ exit(2);
+ }
+ if ( options.doParse && !configuration.hasSection("parser") ){
+ *Log(theErrLog) << "Missing [[parser]] section in config file." << endl;
+ exit(2);
+ }
+
+ if ( options.doServer ){
+ // we use fork(). omp (GCC version) doesn't do well when omp is used
+ // before the fork!
+ // see: http://bisqwit.iki.fi/story/howto/openmp/#OpenmpAndFork
+ tokenizer = new UctoTokenizer(theErrLog);
+ bool stat = tokenizer->init( configuration );
+ if ( stat ){
+ tokenizer->setPassThru( !options.doTok );
+ tokenizer->setDocID( options.docid );
+ tokenizer->setSentencePerLineInput( options.doSentencePerLine );
+ tokenizer->setQuoteDetection( options.doQuoteDetection );
+ tokenizer->setInputEncoding( options.encoding );
+ tokenizer->setInputXml( options.doXMLin );
+ tokenizer->setUttMarker( options.uttmark );
+ tokenizer->setTextClass( options.textclass );
+ myCGNTagger = new CGNTagger(theErrLog);
+ stat = myCGNTagger->init( configuration );
+ if ( stat ){
+ if ( options.doIOB ){
+ myIOBTagger = new IOBTagger(theErrLog);
+ stat = myIOBTagger->init( configuration );
+ }
+ if ( stat && options.doNER ){
+ myNERTagger = new NERTagger(theErrLog);
+ stat = myNERTagger->init( configuration );
+ }
+ if ( stat && options.doLemma ){
+ myMblem = new Mblem(theErrLog);
+ stat = myMblem->init( configuration );
+ }
+ if ( stat && options.doMorph ){
+ myMbma = new Mbma(theErrLog);
+ stat = myMbma->init( configuration );
+ if ( stat ) {
+ if ( options.doDaringMorph )
+ myMbma->setDaring(true);
+ if ( options.doMwu ){
+ myMwu = new Mwu(theErrLog);
+ stat = myMwu->init( configuration );
+ if ( stat && options.doParse ){
+ myParser = new Parser(theErrLog);
+ stat = myParser->init( configuration );
+ }
+ }
+ }
+ }
+ }
+ }
+ if ( !stat ){
+ *Log(theErrLog) << "Frog initialization failed." << endl;
+ exit(2);
+ }
+ }
+ else {
+#ifdef HAVE_OPENMP
+ omp_set_num_threads( options.numThreads );
+ int curt = omp_get_max_threads();
+ if ( curt != options.numThreads ){
+ *Log(theErrLog) << "attempt to set to " << options.numThreads
+ << " threads FAILED, running on " << curt
+ << " threads instead" << endl;
+ }
+ else if ( options.debugFlag ){
+ *Log(theErrLog) << "running on " << curt
+ << " threads" << endl;
+ }
+
+#endif
+
+ bool tokStat = true;
+ bool lemStat = true;
+ bool mwuStat = true;
+ bool mbaStat = true;
+ bool parStat = true;
+ bool tagStat = true;
+ bool iobStat = true;
+ bool nerStat = true;
+
+#pragma omp parallel sections
+ {
+#pragma omp section
+ {
+ tokenizer = new UctoTokenizer(theErrLog);
+ tokStat = tokenizer->init( configuration );
+ if ( tokStat ){
+ tokenizer->setPassThru( !options.doTok );
+ tokenizer->setDocID( options.docid );
+ tokenizer->setSentencePerLineInput( options.doSentencePerLine );
+ tokenizer->setQuoteDetection( options.doQuoteDetection );
+ tokenizer->setInputEncoding( options.encoding );
+ tokenizer->setInputXml( options.doXMLin );
+ tokenizer->setUttMarker( options.uttmark );
+ tokenizer->setTextClass( options.textclass );
+ }
+ }
+#pragma omp section
+ {
+ if ( options.doLemma ){
+ myMblem = new Mblem(theErrLog);
+ lemStat = myMblem->init( configuration );
+ }
+ }
+#pragma omp section
+ {
+ if ( options.doMorph ){
+ myMbma = new Mbma(theErrLog);
+ mbaStat = myMbma->init( configuration );
+ if ( options.doDaringMorph )
+ myMbma->setDaring(true);
+ }
+ }
+#pragma omp section
+ {
+ myCGNTagger = new CGNTagger(theErrLog);
+ tagStat = myCGNTagger->init( configuration );
+ }
+#pragma omp section
+ {
+ if ( options.doIOB ){
+ myIOBTagger = new IOBTagger(theErrLog);
+ iobStat = myIOBTagger->init( configuration );
+ }
+ }
+#pragma omp section
+ {
+ if ( options.doNER ){
+ myNERTagger = new NERTagger(theErrLog);
+ nerStat = myNERTagger->init( configuration );
+ }
+ }
+#pragma omp section
+ {
+ if ( options.doMwu ){
+ myMwu = new Mwu(theErrLog);
+ mwuStat = myMwu->init( configuration );
+ if ( mwuStat && options.doParse ){
+ Timer initTimer;
+ initTimer.start();
+ myParser = new Parser(theErrLog);
+ parStat = myParser->init( configuration );
+ initTimer.stop();
+ *Log(theErrLog) << "init Parse took: " << initTimer << endl;
+ }
+ }
+ }
+ } // end omp parallel sections
+ if ( ! ( tokStat && iobStat && nerStat && tagStat && lemStat
+ && mbaStat && mwuStat && parStat ) ){
+ *Log(theErrLog) << "Initialization failed for: ";
+ if ( ! ( tokStat ) ){
+ *Log(theErrLog) << "[tokenizer] ";
+ }
+ if ( ! ( tagStat ) ){
+ *Log(theErrLog) << "[tagger] ";
+ }
+ if ( ! ( iobStat ) ){
+ *Log(theErrLog) << "[IOB] ";
+ }
+ if ( ! ( nerStat ) ){
+ *Log(theErrLog) << "[NER] ";
+ }
+ if ( ! ( lemStat ) ){
+ *Log(theErrLog) << "[lemmatizer] ";
+ }
+ if ( ! ( mbaStat ) ){
+ *Log(theErrLog) << "[morphology] ";
+ }
+ if ( ! ( mwuStat ) ){
+ *Log(theErrLog) << "[multiword unit] ";
+ }
+ if ( ! ( parStat ) ){
+ *Log(theErrLog) << "[parser] ";
+ }
+ *Log(theErrLog) << endl;
+ exit(2);
+ }
+ }
+ *Log(theErrLog) << "Initialization done." << endl;
+}
+
+FrogAPI::~FrogAPI() {
+ delete myMbma;
+ delete myMblem;
+ delete myMwu;
+ delete myCGNTagger;
+ delete myIOBTagger;
+ delete myNERTagger;
+ delete myParser;
+ delete tokenizer;
+}
+
+bool FrogAPI::TestSentence( Sentence* sent, TimerBlock& timers){
+ vector<Word*> swords;
+ if ( options.doQuoteDetection )
+ swords = sent->wordParts();
+ else
+ swords = sent->words();
+ bool showParse = options.doParse;
+ if ( !swords.empty() ) {
+#pragma omp parallel sections
+ {
+#pragma omp section
+ {
+ timers.tagTimer.start();
+ myCGNTagger->Classify( swords );
+ timers.tagTimer.stop();
+ }
+#pragma omp section
+ {
+ if ( options.doIOB ){
+ timers.iobTimer.start();
+ myIOBTagger->Classify( swords );
+ timers.iobTimer.stop();
+ }
+ }
+#pragma omp section
+ {
+ if ( options.doNER ){
+ timers.nerTimer.start();
+ myNERTagger->Classify( swords );
+ timers.nerTimer.stop();
+ }
+ }
+ } // parallel sections
+ for ( size_t i = 0; i < swords.size(); ++i ) {
+#pragma omp parallel sections
+ {
+#pragma omp section
+ {
+ if ( options.doMorph ){
+ timers.mbmaTimer.start();
+ if (options.debugFlag)
+ *Log(theErrLog) << "Calling mbma..." << endl;
+ myMbma->Classify( swords[i] );
+ timers.mbmaTimer.stop();
+ }
+ }
+#pragma omp section
+ {
+ if ( options.doLemma ){
+ timers.mblemTimer.start();
+ if (options.debugFlag)
+ *Log(theErrLog) << "Calling mblem..." << endl;
+ myMblem->Classify( swords[i] );
+ timers.mblemTimer.stop();
+ }
+ }
+ } // omp parallel sections
+ } //for int i = 0 to num_words
+
+ if ( options.doMwu ){
+ if ( swords.size() > 0 ){
+ timers.mwuTimer.start();
+ myMwu->Classify( swords );
+ timers.mwuTimer.stop();
+ }
+ }
+ if ( options.doParse ){
+ if ( options.maxParserTokens != 0 && swords.size() > options.maxParserTokens ){
+ showParse = false;
+ }
+ else {
+ myParser->Parse( swords, myMwu->getTagset(), options.tmpDirName, timers );
+ }
+ }
+ }
+ return showParse;
+}
+
+void FrogAPI::FrogServer( Sockets::ServerSocket &conn ){
+ try {
+ while (true) {
+ ostringstream outputstream;
+ if ( options.doXMLin ){
+ string result;
+ string s;
+ while ( conn.read(s) ){
+ result += s + "\n";
+ if ( s.empty() )
+ break;
+ }
+ if ( result.size() < 50 ){
+ // a FoLia doc must be at least a few 100 bytes
+ // so this is wrong. Just bail out
+ throw( runtime_error( "read garbage" ) );
+ }
+ if ( options.debugFlag )
+ *Log(theErrLog) << "received data [" << result << "]" << endl;
+ Document doc;
+ try {
+ doc.readFromString( result );
+ }
+ catch ( std::exception& e ){
+ *Log(theErrLog) << "FoLiaParsing failed:" << endl << e.what() << endl;
+ throw;
+ }
+ *Log(theErrLog) << "Processing... " << endl;
+ tokenizer->tokenize( doc );
+ FrogDoc( doc );
+ showResults( outputstream, doc );
+ }
+ else {
+ string data = "";
+ if ( options.doSentencePerLine ){
+ if ( !conn.read( data ) ) //read data from client
+ throw( runtime_error( "read failed" ) );
+ }
+ else {
+ string line;
+ while( conn.read(line) ){
+ if ( line == "EOT" )
+ break;
+ data += line + "\n";
+ }
+ }
+ if ( options.debugFlag )
+ *Log(theErrLog) << "Received: [" << data << "]" << endl;
+ *Log(theErrLog) << "Processing... " << endl;
+ istringstream inputstream(data,istringstream::in);
+ Document doc = tokenizer->tokenize( inputstream );
+ FrogDoc( doc );
+ showResults( outputstream, doc );
+ }
+ if (!conn.write( (outputstream.str()) ) || !(conn.write("READY\n")) ){
+ if (options.debugFlag)
+ *Log(theErrLog) << "socket " << conn.getMessage() << endl;
+ throw( runtime_error( "write to client failed" ) );
+ }
+ }
+ }
+ catch ( std::exception& e ) {
+ if (options.debugFlag)
+ *Log(theErrLog) << "connection lost: " << e.what() << endl;
+ }
+ *Log(theErrLog) << "Connection closed.\n";
+}
+
+#ifdef NO_READLINE
+void FrogAPI::FrogInteractive() {
+ cout << "frog>"; cout.flush();
+ string line;
+ string data;
+ while ( getline( cin, line ) ){
+ string data = line;
+ if ( options.doSentencePerLine ){
+ if ( line.empty() ){
+ cout << "frog>"; cout.flush();
+ continue;
+ }
+ }
+ else {
+ if ( !line.empty() ){
+ data += "\n";
+ }
+ cout << "frog>"; cout.flush();
+ string line2;
+ while( getline( cin, line2 ) ){
+ if ( line2.empty() )
+ break;
+ data += line2 + "\n";
+ cout << "frog>"; cout.flush();
+ }
+ }
+ if ( data.empty() ){
+ cout << "ignoring empty input" << endl;
+ cout << "frog>"; cout.flush();
+ continue;
+ }
+ cout << "Processing... " << endl;
+ istringstream inputstream(data,istringstream::in);
+ Document doc = tokenizer->tokenize( inputstream );
+ FrogDoc( doc, true );
+ showResults( cout, doc );
+ cout << "frog>"; cout.flush();
+ }
+ cout << "Done.\n";
+}
+#else
+void FrogAPI::FrogInteractive(){
+ const char *prompt = "frog> ";
+ string line;
+ bool eof = false;
+ while ( !eof ){
+ string data;
+ char *input = readline( prompt );
+ if ( !input ){
+ eof = true;
+ break;
+ }
+ line = input;
+ if ( options.doSentencePerLine ){
+ if ( line.empty() ){
+ continue;
+ }
+ else {
+ data += line + "\n";
+ add_history( input );
+ }
+ }
+ else {
+ if ( !line.empty() ){
+ add_history( input );
+ data = line + "\n";
+ }
+ while ( !eof ){
+ char *input = readline( prompt );
+ if ( !input ){
+ eof = true;
+ break;
+ }
+ line = input;
+ if ( line.empty() ){
+ break;
+ }
+ add_history( input );
+ data += line + "\n";
+ }
+ }
+ if ( !data.empty() ){
+ if ( data[data.size()-1] == '\n' ){
+ data = data.substr( 0, data.size()-1 );
+ }
+ cout << "Processing... '" << data << "'" << endl;
+ istringstream inputstream(data,istringstream::in);
+ Document doc = tokenizer->tokenize( inputstream );
+ FrogDoc( doc, true );
+ showResults( cout, doc );
+ }
+ }
+ cout << "Done.\n";
+}
+#endif
+
+vector<Word*> FrogAPI::lookup( Word *word,
+ const vector<Entity*>& entities ) const {
+ vector<Word*> vec;
+ for ( size_t p=0; p < entities.size(); ++p ){
+ vec = entities[p]->select<Word>();
+ if ( !vec.empty() ){
+ if ( vec[0]->id() == word->id() ) {
+ return vec;
+ }
+ }
+ }
+ vec.clear();
+ vec.push_back( word ); // single unit
+ return vec;
+}
+
+Dependency * FrogAPI::lookupDep( const Word *word,
+ const vector<Dependency*>&dependencies ) const{
+ if (dependencies.size() == 0 ){
+ return 0;
+ }
+ int dbFlag = 0;
+ try{
+ dbFlag = stringTo<int>( configuration.lookUp( "debug", "parser" ) );
+ } catch (exception & e) {
+ dbFlag = 0;
+ }
+ if ( dbFlag ){
+ using TiCC::operator<<;
+ *Log( theErrLog ) << "\nDependency-lookup "<< word << " in " << dependencies << endl;
+ }
+ for ( size_t i=0; i < dependencies.size(); ++i ){
+ if ( dbFlag ){
+ *Log( theErrLog ) << "Dependency try: " << dependencies[i] << endl;
+ }
+ try {
+ vector<DependencyDependent*> dv = dependencies[i]->select<DependencyDependent>();
+ if ( !dv.empty() ){
+ vector<Word*> v = dv[0]->select<Word>();
+ for ( size_t j=0; j < v.size(); ++j ){
+ if ( v[j] == word ){
+ if ( dbFlag ){
+ *Log(theErrLog) << "\nDependency found word " << v[j] << endl;
+ }
+ return dependencies[i];
+ }
+ }
+ }
+ }
+ catch ( exception& e ){
+ if (dbFlag > 0)
+ *Log(theErrLog) << "get Dependency results failed: "
+ << e.what() << endl;
+ }
+ }
+ return 0;
+}
+
+string FrogAPI::lookupNEREntity( const vector<Word *>& mwu,
+ const vector<Entity*>& entities ) const {
+ string endresult;
+ int dbFlag = 0;
+ try{
+ dbFlag = stringTo<int>( configuration.lookUp( "debug", "NER" ) );
+ } catch (exception & e) {
+ dbFlag = 0;
+ }
+
+ for ( size_t j=0; j < mwu.size(); ++j ){
+ if ( dbFlag ){
+ using TiCC::operator<<;
+ *Log(theErrLog) << "\nNER: lookup "<< mwu[j] << " in " << entities << endl;
+ }
+ string result;
+ for ( size_t i=0; i < entities.size(); ++i ){
+ if ( dbFlag ){
+ *Log(theErrLog) << "NER try: " << entities[i] << endl;
+ }
+ try {
+ vector<Word*> v = entities[i]->select<Word>();
+ bool first = true;
+ for ( size_t k=0; k < v.size(); ++k ){
+ if ( v[k] == mwu[j] ){
+ if (dbFlag){
+ *Log(theErrLog) << "NER found word " << v[k] << endl;
+ }
+ if ( first )
+ result += "B-" + uppercase(entities[i]->cls());
+ else
+ result += "I-" + uppercase(entities[i]->cls());
+ break;
+ }
+ else
+ first = false;
+ }
+ }
+ catch ( exception& e ){
+ if (dbFlag > 0)
+ *Log(theErrLog) << "get NER results failed: "
+ << e.what() << endl;
+ }
+ }
+ if ( result.empty() )
+ endresult += "O";
+ else
+ endresult += result;
+ if ( j < mwu.size()-1 )
+ endresult += "_";
+ }
+ return endresult;
+}
+
+
+string FrogAPI::lookupIOBChunk( const vector<Word *>& mwu,
+ const vector<Chunk*>& chunks ) const{
+ string endresult;
+ int dbFlag = 0;
+ try {
+ dbFlag = stringTo<int>( configuration.lookUp( "debug", "IOB" ) );
+ } catch (exception & e) {
+ dbFlag = 0;
+ }
+ for ( size_t j=0; j < mwu.size(); ++j ){
+ if ( dbFlag ){
+ using TiCC::operator<<;
+ *Log(theErrLog) << "IOB lookup "<< mwu[j] << " in " << chunks << endl;
+ }
+ string result;
+ for ( size_t i=0; i < chunks.size(); ++i ){
+ if ( dbFlag ){
+ *Log(theErrLog) << "IOB try: " << chunks[i] << endl;
+ }
+ try {
+ vector<Word*> v = chunks[i]->select<Word>();
+ bool first = true;
+ for ( size_t k=0; k < v.size(); ++k ){
+ if ( v[k] == mwu[j] ){
+ if (dbFlag){
+ *Log(theErrLog) << "IOB found word " << v[k] << endl;
+ }
+ if ( first )
+ result += "B-" + chunks[i]->cls();
+ else
+ result += "I-" + chunks[i]->cls();
+ break;
+ }
+ else
+ first = false;
+ }
+ }
+ catch ( exception& e ){
+ if (dbFlag > 0)
+ *Log(theErrLog) << "get Chunks results failed: "
+ << e.what() << endl;
+ }
+ }
+ if ( result.empty() )
+ endresult += "O";
+ else
+ endresult += result;
+ if ( j < mwu.size()-1 )
+ endresult += "_";
+ }
+ return endresult;
+}
+
+void FrogAPI::displayMWU( ostream& os,
+ size_t index,
+ const vector<Word*>& mwu ) const {
+ string wrd;
+ string pos;
+ string lemma;
+ string morph;
+ double conf = 1;
+ for ( size_t p=0; p < mwu.size(); ++p ){
+ Word *word = mwu[p];
+ try {
+ wrd += word->str();
+ PosAnnotation *postag = word->annotation<PosAnnotation>( );
+ pos += postag->cls();
+ if ( p < mwu.size() -1 ){
+ wrd += "_";
+ pos += "_";
+ }
+ conf *= postag->confidence();
+ }
+ catch ( exception& e ){
+ if (options.debugFlag > 0)
+ *Log(theErrLog) << "get Postag results failed: "
+ << e.what() << endl;
+ }
+ if ( options.doLemma ){
+ try {
+ lemma += word->lemma(myMblem->getTagset());
+ if ( p < mwu.size() -1 ){
+ lemma += "_";
+ }
+ }
+ catch ( exception& e ){
+ if (options.debugFlag > 0)
+ *Log(theErrLog) << "get Lemma results failed: "
+ << e.what() << endl;
+ }
+ }
+ if ( options.doDaringMorph ){
+ try {
+ vector<MorphologyLayer*> ml
+ = word->annotations<MorphologyLayer>( myMbma->getTagset() );
+ for ( size_t q=0; q < ml.size(); ++q ){
+ vector<Morpheme*> m =
+ ml[q]->select<Morpheme>( myMbma->getTagset(), false );
+ assert( m.size() == 1 ); // top complex layer
+ string desc = m[0]->description();
+ morph += desc;
+ if ( q < ml.size()-1 )
+ morph += "/";
+ }
+ if ( p < mwu.size() -1 ){
+ morph += "_";
+ }
+ }
+ catch ( exception& e ){
+ if (options.debugFlag > 0)
+ *Log(theErrLog) << "get Morph results failed: "
+ << e.what() << endl;
+ }
+ }
+ else if ( options.doMorph ){
+ try {
+ vector<MorphologyLayer*> ml =
+ word->annotations<MorphologyLayer>(myMbma->getTagset());
+ for ( size_t q=0; q < ml.size(); ++q ){
+ vector<Morpheme*> m = ml[q]->select<Morpheme>(myMbma->getTagset());
+ for ( size_t t=0; t < m.size(); ++t ){
+ string txt = UnicodeToUTF8( m[t]->text() );
+ morph += "[" + txt + "]";
+ }
+ if ( q < ml.size()-1 )
+ morph += "/";
+ }
+ if ( p < mwu.size() -1 ){
+ morph += "_";
+ }
+ }
+ catch ( exception& e ){
+ if (options.debugFlag > 0)
+ *Log(theErrLog) << "get Morph results failed: "
+ << e.what() << endl;
+ }
+ }
+ }
+ os << index << "\t" << wrd << "\t" << lemma << "\t" << morph << "\t" << pos << "\t" << std::fixed << conf;
+}
+
+ostream& FrogAPI::showResults( ostream& os,
+ Document& doc ) const {
+ if ( options.doServer && options.doXMLout )
+ doc.save( os, options.doKanon );
+ else {
+ vector<Sentence*> sentences = doc.sentences();
+ for ( size_t i=0; i < sentences.size(); ++i ){
+ Sentence *sentence = sentences[i];
+ vector<Word*> words = sentence->words();
+ vector<Entity*> mwu_entities;
+ if (myMwu)
+ mwu_entities = sentence->select<Entity>( myMwu->getTagset() );
+ vector<Dependency*> dependencies;
+ if (myParser)
+ dependencies = sentence->select<Dependency>( myParser->getTagset() );
+ vector<Chunk*> iob_chunking;
+ if ( myIOBTagger )
+ iob_chunking = sentence->select<Chunk>( myIOBTagger->getTagset() );
+ vector<Entity*> ner_entities;
+ if (myNERTagger)
+ ner_entities = sentence->select<Entity>( myNERTagger->getTagset() );
+ static set<ElementType> excludeSet;
+ vector<Sentence*> parts = sentence->select<Sentence>( excludeSet );
+ if ( !options.doQuoteDetection )
+ assert( parts.size() == 0 );
+ for ( size_t i=0; i < parts.size(); ++i ){
+ vector<Entity*> ents;
+ if (myMwu)
+ ents = parts[i]->select<Entity>( myMwu->getTagset() );
+ mwu_entities.insert( mwu_entities.end(), ents.begin(), ents.end() );
+ vector<Dependency*> deps = parts[i]->select<Dependency>();
+ dependencies.insert( dependencies.end(), deps.begin(), deps.end() );
+ vector<Chunk*> chunks = parts[i]->select<Chunk>();
+ iob_chunking.insert( iob_chunking.end(), chunks.begin(), chunks.end() );
+ vector<Entity*> ners ;
+ if (myNERTagger) ners = parts[i]->select<Entity>( myNERTagger->getTagset() );
+ ner_entities.insert( ner_entities.end(), ners.begin(), ners.end() );
+ }
+
+ size_t index = 1;
+ map<FoliaElement*, int> enumeration;
+ vector<vector<Word*> > mwus;
+ for( size_t i=0; i < words.size(); ++i ){
+ Word *word = words[i];
+ vector<Word*> mwu = lookup( word, mwu_entities );
+ for ( size_t j=0; j < mwu.size(); ++j ){
+ enumeration[mwu[j]] = index;
+ }
+ mwus.push_back( mwu );
+ i += mwu.size()-1;
+ ++index;
+ }
+ for( size_t i=0; i < mwus.size(); ++i ){
+ displayMWU( os, i+1, mwus[i] );
+ if ( options.doNER ){
+ string cls;
+ string s = lookupNEREntity( mwus[i], ner_entities );
+ os << "\t" << s;
+ }
+ else {
+ os << "\t\t";
+ }
+ if ( options.doIOB ){
+ string cls;
+ string s = lookupIOBChunk( mwus[i], iob_chunking);
+ os << "\t" << s;
+ }
+ else {
+ os << "\t\t";
+ }
+ if ( options.doParse ){
+ string cls;
+ Dependency *dep = lookupDep( mwus[i][0], dependencies);
+ if ( dep ){
+ vector<Headwords*> w = dep->select<Headwords>();
+ size_t num;
+ if ( w[0]->index(0)->isinstance( PlaceHolder_t ) ){
+ string indexS = w[0]->index(0)->str();
+ FoliaElement *pnt = w[0]->index(0)->doc()->index(indexS);
+ num = enumeration.find(pnt->index(0))->second;
+ }
+ else {
+ num = enumeration.find(w[0]->index(0))->second;
+ }
+ os << "\t" << num << "\t" << dep->cls();
+ }
+ else {
+ os << "\t"<< 0 << "\tROOT";
+ }
+ }
+ else {
+ os << "\t\t";
+ }
+ os << endl;
+ ++index;
+ }
+ if ( words.size() )
+ os << endl;
+ }
+ }
+ return os;
+}
+
+string FrogAPI::Frogtostring( const string& s ){
+ Document doc = tokenizer->tokenizestring( s );
+ stringstream ss;
+ FrogDoc( doc, true );
+ showResults( ss, doc );
+ return ss.str();
+}
+
+string FrogAPI::Frogtostringfromfile( const string& name ){
+ stringstream ss;
+ FrogFile( name, ss, "" );
+ return ss.str();
+}
+
+void FrogAPI::FrogDoc( Document& doc,
+ bool hidetimers ){
+ TimerBlock timers;
+ timers.frogTimer.start();
+ // first we make sure that the doc will accept our annotations, by
+ // declaring them in the doc
+ if (myCGNTagger)
+ myCGNTagger->addDeclaration( doc );
+ if (( options.doLemma ) && (myMblem))
+ myMblem->addDeclaration( doc );
+ if (( options.doMorph ) && (myMbma))
+ myMbma->addDeclaration( doc );
+ if ((options.doIOB) && (myIOBTagger))
+ myIOBTagger->addDeclaration( doc );
+ if ((options.doNER) && (myNERTagger))
+ myNERTagger->addDeclaration( doc );
+ if ((options.doMwu) && (myMwu))
+ myMwu->addDeclaration( doc );
+ if ((options.doParse) && (myParser))
+ myParser->addDeclaration( doc );
+
+ if ( options.debugFlag > 5 )
+ *Log(theErrLog) << "Testing document :" << doc << endl;
+
+ vector<Sentence*> sentences;
+ if ( options.doQuoteDetection )
+ sentences = doc.sentenceParts();
+ else
+ sentences = doc.sentences();
+ size_t numS = sentences.size();
+ if ( numS > 0 ) { //process sentences
+ if (options.debugFlag > 0)
+ *Log(theErrLog) << "found " << numS << " sentence(s) in document." << endl;
+ for ( size_t i = 0; i < numS; ++i ) {
+ //NOTE- full sentences are passed (which may span multiple lines) (MvG)
+ bool showParse = TestSentence( sentences[i], timers );
+ if ( options.doParse && !showParse ){
+ *Log(theErrLog) << "WARNING!" << endl;
+ *Log(theErrLog) << "Sentence " << i+1 << " isn't parsed because it contains more tokens then set with the --max-parser-tokens=" << options.maxParserTokens << " option." << endl;
+ }
+ }
+ }
+ else {
+ if (options.debugFlag > 0)
+ *Log(theErrLog) << "No sentences found in document. " << endl;
+ }
+
+ timers.frogTimer.stop();
+ if ( !hidetimers ){
+ *Log(theErrLog) << "tokenisation took: " << timers.tokTimer << endl;
+ *Log(theErrLog) << "CGN tagging took: " << timers.tagTimer << endl;
+ if ( options.doIOB)
+ *Log(theErrLog) << "IOB chunking took: " << timers.iobTimer << endl;
+ if ( options.doNER)
+ *Log(theErrLog) << "NER took: " << timers.nerTimer << endl;
+ if ( options.doMorph )
+ *Log(theErrLog) << "MBA took: " << timers.mbmaTimer << endl;
+ if ( options.doLemma )
+ *Log(theErrLog) << "Mblem took: " << timers.mblemTimer << endl;
+ if ( options.doMwu )
+ *Log(theErrLog) << "MWU resolving took: " << timers.mwuTimer << endl;
+ if ( options.doParse ){
+ *Log(theErrLog) << "Parsing (prepare) took: " << timers.prepareTimer << endl;
+ *Log(theErrLog) << "Parsing (pairs) took: " << timers.pairsTimer << endl;
+ *Log(theErrLog) << "Parsing (rels) took: " << timers.relsTimer << endl;
+ *Log(theErrLog) << "Parsing (dir) took: " << timers.dirTimer << endl;
+ *Log(theErrLog) << "Parsing (csi) took: " << timers.csiTimer << endl;
+ *Log(theErrLog) << "Parsing (total) took: " << timers.parseTimer << endl;
+ }
+ *Log(theErrLog) << "Frogging in total took: " << timers.frogTimer << endl;
+ }
+ return;
+}
+
+void FrogAPI::FrogFile( const string& infilename,
+ ostream &os,
+ const string& xmlOutF ) {
+ // stuff the whole input into one FoLiA document.
+ // This is not a good idea on the long term, I think (agreed [proycon] )
+
+ string xmlOutFile = xmlOutF;
+ if ( options.doXMLin && !xmlOutFile.empty() ){
+ if ( match_back( infilename, ".gz" ) ){
+ if ( !match_back( xmlOutFile, ".gz" ) )
+ xmlOutFile += ".gz";
+ }
+ else if ( match_back( infilename, ".bz2" ) ){
+ if ( !match_back( xmlOutFile, ".bz2" ) )
+ xmlOutFile += ".bz2";
+ }
+ }
+ if ( options.doXMLin ){
+ Document doc;
+ try {
+ doc.readFromFile( infilename );
+ }
+ catch ( exception &e ){
+ *Log(theErrLog) << "retrieving FoLiA from '" << infilename << "' failed with exception:" << endl;
+ cerr << e.what() << endl;
+ return;
+ }
+ tokenizer->tokenize( doc );
+ FrogDoc( doc, false );
+ if ( !xmlOutFile.empty() ){
+ doc.save( xmlOutFile, options.doKanon );
+ *Log(theErrLog) << "resulting FoLiA doc saved in " << xmlOutFile << endl;
+ }
+ showResults( os, doc );
+ }
+ else {
+ ifstream IN( infilename.c_str() );
+ Document doc = tokenizer->tokenize( IN );
+ FrogDoc( doc, false );
+ if ( !xmlOutFile.empty() ){
+ doc.save( xmlOutFile, options.doKanon );
+ *Log(theErrLog) << "resulting FoLiA doc saved in " << xmlOutFile << endl;
+ }
+ showResults( os, doc );
+ }
+}
diff --git a/src/Makefile.am b/src/Makefile.am
index 8ce3e31..7da1a1e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,4 +1,4 @@
-# $Id: Makefile.am 17436 2014-07-09 13:13:35Z sloot $
+# $Id: Makefile.am 17616 2014-09-07 19:24:55Z mvgompel $
# $URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/Makefile.am $
AM_CPPFLAGS = -I at top_srcdir@/include
@@ -10,15 +10,16 @@ mbma_SOURCES = mbma_prog.cxx
mblem_SOURCES = mblem_prog.cxx
LDADD = libfrog.la
-lib_LTLIBRARIES = libfrog.la
+lib_LTLIBRARIES = libfrog.la
libfrog_la_LDFLAGS = -version-info 1:0:0
-libfrog_la_SOURCES = mbma_mod.cxx mblem_mod.cxx \
+libfrog_la_SOURCES = FrogAPI.cxx mbma_mod.cxx mblem_mod.cxx \
Frog-util.cxx mwu_chunker_mod.cxx Parser.cxx \
cgn_tagger_mod.cxx iob_tagger_mod.cxx ner_tagger_mod.cxx \
ucto_tokenizer_mod.cxx
+
TESTS = tst.sh
EXTRA_DIST = tst.sh
diff --git a/src/Makefile.in b/src/Makefile.in
index 6f8a0fd..28b7732 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -14,7 +14,7 @@
@SET_MAKE@
-# $Id: Makefile.am 17436 2014-07-09 13:13:35Z sloot $
+# $Id: Makefile.am 17616 2014-09-07 19:24:55Z mvgompel $
# $URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/Makefile.am $
@@ -131,8 +131,8 @@ am__uninstall_files_from_dir = { \
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
libfrog_la_LIBADD =
-am_libfrog_la_OBJECTS = mbma_mod.lo mblem_mod.lo Frog-util.lo \
- mwu_chunker_mod.lo Parser.lo cgn_tagger_mod.lo \
+am_libfrog_la_OBJECTS = FrogAPI.lo mbma_mod.lo mblem_mod.lo \
+ Frog-util.lo mwu_chunker_mod.lo Parser.lo cgn_tagger_mod.lo \
iob_tagger_mod.lo ner_tagger_mod.lo ucto_tokenizer_mod.lo
libfrog_la_OBJECTS = $(am_libfrog_la_OBJECTS)
AM_V_lt = $(am__v_lt_ at AM_V@)
@@ -566,9 +566,9 @@ frog_SOURCES = Frog.cxx
mbma_SOURCES = mbma_prog.cxx
mblem_SOURCES = mblem_prog.cxx
LDADD = libfrog.la
-lib_LTLIBRARIES = libfrog.la
+lib_LTLIBRARIES = libfrog.la
libfrog_la_LDFLAGS = -version-info 1:0:0
-libfrog_la_SOURCES = mbma_mod.cxx mblem_mod.cxx \
+libfrog_la_SOURCES = FrogAPI.cxx mbma_mod.cxx mblem_mod.cxx \
Frog-util.cxx mwu_chunker_mod.cxx Parser.cxx \
cgn_tagger_mod.cxx iob_tagger_mod.cxx ner_tagger_mod.cxx \
ucto_tokenizer_mod.cxx
@@ -718,6 +718,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/Frog-util.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/Frog.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/FrogAPI.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/Parser.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/cgn_tagger_mod.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iob_tagger_mod.Plo at am__quote@
diff --git a/src/Parser.cxx b/src/Parser.cxx
index b247344..d45c2eb 100644
--- a/src/Parser.cxx
+++ b/src/Parser.cxx
@@ -1,5 +1,5 @@
/*
- $Id: Parser.cxx 17489 2014-08-07 08:42:59Z sloot $
+ $Id: Parser.cxx 17714 2014-09-25 16:04:28Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/Parser.cxx $
Copyright (c) 2006 - 2014
@@ -169,8 +169,6 @@ bool Parser::init( const Configuration& configuration ){
string relsFileName;
string relsOptions = "-a1 +D +vdb+di";
PI = new PythonInterface();
- parseLog = new LogStream( theErrLog );
- parseLog->addmessage("parser-");
maxDepSpanS = "20";
maxDepSpan = 20;
bool problem = false;
@@ -849,6 +847,7 @@ void Parser::prepareParse( const vector<Word *>& fwords,
void appendParseResult( const vector<Word *>& words,
parseData& pd,
+ const string& tagset,
istream& is ){
string line;
int cnt=0;
@@ -859,8 +858,9 @@ void appendParseResult( const vector<Word *>& words,
int num = TiCC::split_at( line, parts, " " );
if ( num > 7 ){
if ( TiCC::stringTo<int>( parts[0] ) != cnt+1 ){
- *Log(theErrLog) << "confused! " << endl;
- *Log(theErrLog) << "got line '" << line << "'" << endl;
+ //WARNING: commented out because theErrLog no longer available publicly
+ //*Log(theErrLog) << "confused! " << endl;
+ //*Log(theErrLog) << "got line '" << line << "'" << endl;
}
nums.push_back( TiCC::stringTo<int>(parts[6]) );
roles.push_back( parts[7] );
@@ -870,6 +870,7 @@ void appendParseResult( const vector<Word *>& words,
Sentence *sent = words[0]->sentence();
KWargs args;
args["generate_id"] = sent->id();
+ args["set"] = tagset;
DependenciesLayer *dl = new DependenciesLayer(sent->doc(),args);
#pragma omp critical(foliaupdate)
{
@@ -880,6 +881,7 @@ void appendParseResult( const vector<Word *>& words,
KWargs args;
args["generate_id"] = dl->id();
args["class"] = roles[i];
+ args["set"] = tagset;
#pragma omp critical(foliaupdate)
{
Dependency *d = new Dependency( sent->doc(), args );
@@ -967,7 +969,7 @@ void Parser::Parse( const vector<Word*>& words, const string& mwuSet,
timers.csiTimer.stop();
ifstream resFile( resFileName.c_str() );
if ( resFile ){
- appendParseResult( words, pd, resFile );
+ appendParseResult( words, pd, tagset, resFile );
}
else
*Log(parseLog) << "couldn't open results file: " << resFileName << endl;
diff --git a/src/cgn_tagger_mod.cxx b/src/cgn_tagger_mod.cxx
index 8449ba8..f23f799 100644
--- a/src/cgn_tagger_mod.cxx
+++ b/src/cgn_tagger_mod.cxx
@@ -1,5 +1,5 @@
/*
- $Id: cgn_tagger_mod.cxx 17485 2014-08-06 14:10:15Z sloot $
+ $Id: cgn_tagger_mod.cxx 17650 2014-09-13 20:18:58Z mvgompel $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/cgn_tagger_mod.cxx $
Copyright (c) 2006 - 2014
@@ -36,10 +36,10 @@
using namespace std;
using namespace folia;
-CGNTagger::CGNTagger(){
+CGNTagger::CGNTagger(TiCC::LogStream * logstream){
tagger = 0;
filter = 0;
- cgnLog = new LogStream( theErrLog, "cgn-tagger-" );
+ cgnLog = new LogStream( logstream, "cgn-tagger-" );
}
CGNTagger::~CGNTagger(){
@@ -178,6 +178,7 @@ void fillSubSetTable(){
bool CGNTagger::init( const Configuration& config ){
+ debug = 0;
string val = config.lookUp( "debug", "tagger" );
if ( val.empty() ){
val = config.lookUp( "debug" );
@@ -306,7 +307,7 @@ void CGNTagger::addTag( Word *word, const string& inputTag, double confidence ){
string::size_type openH = cgnTag.find( '(' );
string::size_type closeH = cgnTag.find( ')' );
if ( openH == string::npos || closeH == string::npos ){
- *Log(theErrLog) << "tagger_mod: main tag without subparts: impossible: " << cgnTag << endl;
+ *Log(cgnLog) << "tagger_mod: main tag without subparts: impossible: " << cgnTag << endl;
exit(-1);
}
mainTag = cgnTag.substr( 0, openH );
diff --git a/src/iob_tagger_mod.cxx b/src/iob_tagger_mod.cxx
index 43b5a44..83f81cf 100644
--- a/src/iob_tagger_mod.cxx
+++ b/src/iob_tagger_mod.cxx
@@ -1,5 +1,5 @@
/*
- $Id: iob_tagger_mod.cxx 17487 2014-08-06 14:55:31Z sloot $
+ $Id: iob_tagger_mod.cxx 17714 2014-09-25 16:04:28Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/iob_tagger_mod.cxx $
Copyright (c) 2006 - 2014
@@ -35,9 +35,9 @@
using namespace std;
using namespace folia;
-IOBTagger::IOBTagger(){
+IOBTagger::IOBTagger(TiCC::LogStream * logstream){
tagger = 0;
- iobLog = new LogStream( theErrLog, "iob-" );
+ iobLog = new LogStream( logstream, "iob-" );
}
IOBTagger::~IOBTagger(){
@@ -151,11 +151,12 @@ void IOBTagger::addIOBTags( const vector<Word*>& words,
{
Sentence *sent = words[0]->sentence();
try {
- el = sent->annotation<ChunkingLayer>();
+ el = sent->annotation<ChunkingLayer>(tagset);
}
catch(...){
KWargs args;
args["generate_id"] = sent->id();
+ args["set"] = tagset;
el = new ChunkingLayer(sent->doc(),args);
sent->append( el );
}
diff --git a/src/mblem_mod.cxx b/src/mblem_mod.cxx
index a605f63..9009ae8 100755
--- a/src/mblem_mod.cxx
+++ b/src/mblem_mod.cxx
@@ -1,5 +1,5 @@
/*
- $Id: mblem_mod.cxx 17436 2014-07-09 13:13:35Z sloot $
+ $Id: mblem_mod.cxx 17809 2014-11-11 13:44:50Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/mblem_mod.cxx $
Copyright (c) 2006 - 2014
@@ -41,9 +41,13 @@ using namespace std;
using namespace TiCC;
using namespace folia;
-Mblem::Mblem(): myLex(0),punctuation( "?...,:;\\'`(){}[]%#+-_=/!" ),
- history(20), debug(0) {
- mblemLog = new LogStream( theErrLog, "mblem" );
+Mblem::Mblem( LogStream *logstream ):
+ myLex(0),
+ punctuation( "?...,:;\\'`(){}[]%#+-_=/!" ),
+ history(20),
+ debug(0)
+{
+ mblemLog = new LogStream( logstream, "mblem" );
}
void Mblem::read_transtable( const string& tableName ) {
@@ -133,6 +137,7 @@ bool Mblem::init( const Configuration& config ) {
Mblem::~Mblem(){
// *Log(mblemLog) << "cleaning up MBLEM stuff" << endl;
+ delete filter;
delete myLex;
myLex = 0;
delete mblemLog;
@@ -194,23 +199,20 @@ bool isSimilar( const string& tag, const string& cgnTag ){
similar( tag, cgnTag, "ovt,1,ev" );
}
-void Mblem::addLemma( FoliaElement *word, const string& cls ){
+void Mblem::addLemma( Word *word, const string& cls ){
KWargs args;
args["set"]=tagset;
args["cls"]=cls;
#pragma omp critical(foliaupdate)
{
- word->addLemmaAnnotation( args );
- }
-}
-
-void Mblem::addAltLemma( Word *word, const string& cls ){
- Alternative *alt = new Alternative();
-#pragma omp critical(foliaupdate)
- {
- word->append( alt );
+ try {
+ word->addLemmaAnnotation( args );
+ }
+ catch( const exception& e ){
+ *Log(mblemLog) << e.what() << " addLemma failed." << endl;
+ exit(EXIT_FAILURE);
+ }
}
- addLemma( alt, cls );
}
void Mblem::filterTag( const string& postag ){
@@ -270,18 +272,10 @@ void Mblem::getFoLiAResult( Word *word, const UnicodeString& uWord ){
addLemma( word, result );
}
else {
- bool first = true;
vector<mblemData>::iterator it = mblemResult.begin();
while( it != mblemResult.end() ){
string result = it->getLemma();
- if ( first ){
- addLemma( word, result );
- first = false;
- }
- else {
- // there are more matching lemmas. add them as alternatives
- addAltLemma( word, result );
- }
+ addLemma( word, result );
++it;
}
}
diff --git a/src/mblem_prog.cxx b/src/mblem_prog.cxx
index 4cf525c..f2ee1de 100644
--- a/src/mblem_prog.cxx
+++ b/src/mblem_prog.cxx
@@ -1,6 +1,6 @@
/*
- $Id:$
- $URL:$
+ $Id: mblem_prog.cxx 17665 2014-09-15 15:58:01Z sloot $
+ $URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/mblem_prog.cxx $
Copyright (c) 2006 - 2014
Tilburg University
@@ -37,10 +37,9 @@
#include <map>
#include "config.h"
-#include "timbl/TimblAPI.h"
#include "ticcutils/LogStream.h"
#include "ticcutils/Configuration.h"
-#include "libfolia/folia.h"
+#include "ticcutils/CommandLine.h"
#include "frog/ucto_tokenizer_mod.h"
#include "frog/cgn_tagger_mod.h"
#include "frog/mblem_mod.h"
@@ -51,16 +50,15 @@ using namespace Timbl;
LogStream my_default_log( cerr, "", StampMessage ); // fall-back
LogStream *theErrLog = &my_default_log; // fill the externals
-string TestFileName;
-string ProgName;
+vector<string> fileNames;
bool doAll = false;
Configuration configuration;
static string configDir = string(SYSCONF_PATH) + "/" + PACKAGE + "/";
static string configFileName = configDir + "frog.cfg";
-static UctoTokenizer tokenizer;
-static CGNTagger tagger;
+static UctoTokenizer tokenizer(theErrLog);
+static CGNTagger tagger(theErrLog);
void usage( ) {
cout << endl << "mblem [options] testfile" << endl
@@ -75,26 +73,20 @@ void usage( ) {
<< "\t -d <debug level> (for more verbosity)\n";
}
-static Mblem myMblem;
+static Mblem myMblem(theErrLog);
-bool parse_args( TimblOpts& Opts ) {
+bool parse_args( TiCC::CL_Options& Opts ) {
cerr << "start " << Opts << endl;
- string value;
- bool mood;
- if ( Opts.Find('V', value, mood ) ||
- Opts.Find("version", value, mood ) ){
+ if ( Opts.is_present('V') || Opts.is_present("version" ) ){
// we already did show what we wanted.
exit( EXIT_SUCCESS );
}
- if ( Opts.Find ('h', value, mood)) {
+ if ( Opts.is_present ('h') ) {
usage();
exit( EXIT_SUCCESS );
};
// is a config file specified?
- if ( Opts.Find( 'c', value, mood ) ) {
- configFileName = value;
- Opts.Delete( 'c' );
- };
+ Opts.extract( 'c', configFileName );
if ( configuration.fill( configFileName ) ){
cerr << "config read from: " << configFileName << endl;
@@ -106,38 +98,28 @@ bool parse_args( TimblOpts& Opts ) {
}
// debug opts
- if ( Opts.Find ('d', value, mood)) {
+ string value;
+ if ( Opts.extract( 'd', value ) ) {
int debug = 0;
if ( !TiCC::stringTo<int>( value, debug ) ){
cerr << "-d value should be an integer" << endl;
return false;
}
configuration.setatt( "debug", value, "mblem" );
- Opts.Delete('d');
};
- if ( Opts.Find( 't', value, mood )) {
- TestFileName = value;
+ if ( Opts.extract( 't', value ) ){
ifstream is( value.c_str() );
if ( !is ){
cerr << "input stream " << value << " is not readable" << endl;
return false;
}
- Opts.Delete('t');
+ fileNames.push_back( value );
}
- else if ( Opts.Find( '?', value, mood )) {
- TestFileName = value;
- ifstream is( value.c_str() );
- if ( !is ){
- cerr << "input stream " << value << " is not readable" << endl;
- return false;
- }
- Opts.Delete('?');
- };
- if ( Opts.Find( 'a', value, mood )) {
- doAll = true;
- Opts.Delete('t');
+ else {
+ fileNames = Opts.getMassOpts();
};
+ doAll = Opts.is_present( 'a' );
return true;
}
@@ -149,7 +131,7 @@ bool init(){
cerr << "MBLEM Initialization failed." << endl;
return false;
}
- if ( !tokenizer.init( configuration, "", false ) ){
+ if ( !tokenizer.init( configuration ) ){
cerr << "UCTO Initialization failed." << endl;
return false;
}
@@ -193,31 +175,34 @@ int main(int argc, char *argv[]) {
std::ios_base::sync_with_stdio(false);
cerr << "mblem " << VERSION << " (c) ILK 1998 - 2014" << endl;
cerr << "Induction of Linguistic Knowledge Research Group, Tilburg University" << endl;
- ProgName = argv[0];
+ TiCC::CL_Options Opts("c:t:hVd:a", "version");
+ try {
+ Opts.init(argc, argv);
+ }
+ catch ( const exception& e ){
+ cerr << "fatal error: " << e.what() << endl;
+ return EXIT_FAILURE;
+ }
cerr << "based on [" << Timbl::VersionName() << "]" << endl;
cerr << "configdir: " << configDir << endl;
- try {
- TimblOpts Opts(argc, argv);
- if ( parse_args(Opts) ){
- if ( !init() ){
- cerr << "terminated." << endl;
- return EXIT_FAILURE;
- }
+ if ( parse_args(Opts) ){
+ if ( !init() ){
+ cerr << "terminated." << endl;
+ return EXIT_FAILURE;
+ }
+ for ( size_t i=0; i < fileNames.size(); ++i ){
+ string TestFileName = fileNames[i];
ifstream in(TestFileName.c_str() );
if ( in.good() ){
Test( in );
}
else {
cerr << "unable to open: " << TestFileName << endl;
- return EXIT_FAILURE;
+ continue;
}
}
- else {
- return EXIT_FAILURE;
- }
}
- catch ( const exception& e ){
- cerr << "fatal error: " << e.what() << endl;
+ else {
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
diff --git a/src/mbma_mod.cxx b/src/mbma_mod.cxx
index 22b77cf..0750702 100755
--- a/src/mbma_mod.cxx
+++ b/src/mbma_mod.cxx
@@ -1,5 +1,5 @@
/*
- $Id: mbma_mod.cxx 17460 2014-07-16 15:29:20Z sloot $
+ $Id: mbma_mod.cxx 17822 2014-11-13 09:56:31Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/mbma_mod.cxx $
Copyright (c) 2006 - 2014
@@ -30,9 +30,10 @@
#include <string>
#include <set>
#include <iostream>
+#include <algorithm>
#include <fstream>
#include <sstream>
-#include <timbl/TimblAPI.h>
+#include "timbl/TimblAPI.h"
#include "ucto/unicode.h"
#include "ticcutils/Configuration.h"
@@ -46,9 +47,14 @@ using namespace TiCC;
const long int LEFT = 6; // left context
const long int RIGHT = 6; // right context
-Mbma::Mbma(): MTreeFilename( "dm.igtree" ), MTree(0),
- transliterator(0), filter(0), doDaring(false) {
- mbmaLog = new LogStream( theErrLog, "mbma-" );
+Mbma::Mbma(LogStream * logstream):
+ MTreeFilename( "dm.igtree" ),
+ MTree(0),
+ transliterator(0),
+ filter(0),
+ doDaring(false)
+{
+ mbmaLog = new LogStream( logstream, "mbma-" );
}
Mbma::~Mbma() {
@@ -318,10 +324,10 @@ bool Mbma::init( const Configuration& config ) {
version = val;
val = config.lookUp( "set", "mbma" );
if ( val.empty() ){
- tagset = "http://ilk.uvt.nl/folia/sets/frog-mbma-nl";
+ mbma_tagset = "http://ilk.uvt.nl/folia/sets/frog-mbma-nl";
}
else
- tagset = val;
+ mbma_tagset = val;
val = config.lookUp( "set", "tagger" );
if ( val.empty() ){
@@ -1271,6 +1277,26 @@ MBMAana::MBMAana( const Rule& r, bool daring ): rule(r) {
}
}
+UnicodeString MBMAana::getKey( bool daring ){
+ if ( sortkey.isEmpty() ){
+ UnicodeString tmp;
+ if ( daring ){
+ stringstream ss;
+ ss << getBrackets() << endl;
+ tmp = UTF8ToUnicode(ss.str());
+ }
+ else {
+ vector<string> v = getMorph();
+ // create an unique string
+ for ( size_t p=0; p < v.size(); ++p ) {
+ tmp += UTF8ToUnicode(v[p]) + "+";
+ }
+ }
+ sortkey = tmp;
+ }
+ return sortkey;
+}
+
string Rule::getCleanInflect() const {
// get the FIRST inflection and clean it up by extracting only
// known inflection names
@@ -1454,24 +1480,20 @@ void Mbma::execute( const UnicodeString& word,
}
}
-void Mbma::addAltMorph( Word *word,
- const vector<string>& morphs ) const {
- Alternative *alt = new Alternative();
- MorphologyLayer *ml = new MorphologyLayer();
-#pragma omp critical(foliaupdate)
- {
- alt->append( ml );
- word->append( alt );
- }
- addMorph( ml, morphs );
-}
-
void Mbma::addMorph( Word *word,
const vector<string>& morphs ) const {
- MorphologyLayer *ml = new MorphologyLayer();
+ KWargs args;
+ args["set"] = mbma_tagset;
+ MorphologyLayer *ml;
#pragma omp critical(foliaupdate)
{
- word->append( ml );
+ try {
+ ml = word->addMorphologyLayer( args );
+ }
+ catch( const exception& e ){
+ *Log(mbmaLog) << e.what() << " addMorph failed." << endl;
+ exit(EXIT_FAILURE);
+ }
}
addMorph( ml, morphs );
}
@@ -1480,12 +1502,19 @@ void Mbma::addBracketMorph( Word *word,
const string& wrd,
const string& tag ) const {
// *Log(mbmaLog) << "addBracketMorph(" << wrd << "," << tag << ")" << endl;
- MorphologyLayer *ml = new MorphologyLayer();
+ KWargs args;
+ args["set"] = mbma_tagset;
+ MorphologyLayer *ml;
#pragma omp critical(foliaupdate)
{
- word->append( ml );
+ try {
+ ml = word->addMorphologyLayer( args );
+ }
+ catch( const exception& e ){
+ *Log(mbmaLog) << e.what() << " addBracketMorph failed." << endl;
+ exit(EXIT_FAILURE);
+ }
}
- KWargs args;
args["class"] = "stem";
Morpheme *result = new Morpheme( word->doc(), args );
args.clear();
@@ -1518,30 +1547,22 @@ void Mbma::addBracketMorph( Word *word,
void Mbma::addBracketMorph( Word *word,
const BracketNest *brackets ) const {
- MorphologyLayer *ml = new MorphologyLayer();
+ KWargs args;
+ args["set"] = mbma_tagset;
+ MorphologyLayer *ml;
#pragma omp critical(foliaupdate)
{
- word->append( ml );
- }
- Morpheme *m = brackets->createMorpheme( word->doc(), clex_tagset );
- if ( m ){
-#pragma omp critical(foliaupdate)
- {
- ml->append( m );
+ try {
+ ml = word->addMorphologyLayer( args );
+ }
+ catch( const exception& e ){
+ *Log(mbmaLog) << e.what() << " addBracketMorph failed." << endl;
+ exit(EXIT_FAILURE);
}
}
-}
-
-void Mbma::addAltBracketMorph( Word *word,
- const BracketNest *brackets ) const {
- Alternative *alt = new Alternative();
- MorphologyLayer *ml = new MorphologyLayer();
-#pragma omp critical(foliaupdate)
- {
- alt->append( ml );
- word->append( alt );
- }
- Morpheme *m = brackets->createMorpheme( word->doc(), clex_tagset );
+ Morpheme *m = brackets->createMorpheme( word->doc(),
+ mbma_tagset,
+ clex_tagset );
if ( m ){
#pragma omp critical(foliaupdate)
{
@@ -1551,20 +1572,23 @@ void Mbma::addAltBracketMorph( Word *word,
}
Morpheme *BracketLeaf::createMorpheme( Document *doc,
- const string& tagset ) const {
+ const string& mbma_tagset,
+ const string& clex_tagset ) const {
string desc;
int offset = 0;
- return createMorpheme( doc, tagset, offset, desc );
+ return createMorpheme( doc, mbma_tagset, clex_tagset, offset, desc );
}
Morpheme *BracketLeaf::createMorpheme( Document *doc,
- const string& tagset,
+ const string& mbma_tagset,
+ const string& clex_tagset,
int& offset,
string& desc ) const {
Morpheme *result = 0;
desc.clear();
if ( status == STEM ){
KWargs args;
+ args["set"] = mbma_tagset;
args["class"] = "stem";
result = new Morpheme( doc, args );
args.clear();
@@ -1586,7 +1610,7 @@ Morpheme *BracketLeaf::createMorpheme( Document *doc,
result->append( d );
}
args.clear();
- args["set"] = tagset;
+ args["set"] = clex_tagset;
args["cls"] = toString( tag() );
#pragma omp critical(foliaupdate)
{
@@ -1596,6 +1620,7 @@ Morpheme *BracketLeaf::createMorpheme( Document *doc,
else if ( status == INFLECTION ){
KWargs args;
args["class"] = "inflection";
+ args["set"] = mbma_tagset;
result = new Morpheme( doc, args );
args.clear();
string out = UnicodeToUTF8(morph);
@@ -1630,6 +1655,7 @@ Morpheme *BracketLeaf::createMorpheme( Document *doc,
else if ( status == DERIVATIONAL ){
KWargs args;
args["class"] = "derivational";
+ args["set"] = mbma_tagset;
result = new Morpheme( doc, args );
args.clear();
string out = UnicodeToUTF8(morph);
@@ -1650,8 +1676,8 @@ Morpheme *BracketLeaf::createMorpheme( Document *doc,
result->append( d );
}
args.clear();
- args["set"] = tagset;
- args["cls"] = orig;
+ args["set"] = clex_tagset;
+ args["cls"] = orig;
#pragma omp critical(foliaupdate)
{
result->addPosAnnotation( args );
@@ -1660,7 +1686,9 @@ Morpheme *BracketLeaf::createMorpheme( Document *doc,
else {
KWargs args;
args["class"] = "inflection";
+ args["set"] = mbma_tagset;
result = new Morpheme( doc, args );
+ args.clear();
string inf_desc;
for ( size_t i=0; i < inflect.size(); ++i ){
string d = iNames[inflect[i]];
@@ -1668,7 +1696,6 @@ Morpheme *BracketLeaf::createMorpheme( Document *doc,
inf_desc += ", ";
inf_desc += d;
}
- args.clear();
args["value"] = inf_desc;
Description *d = new Description( args );
#pragma omp critical(foliaupdate)
@@ -1680,18 +1707,21 @@ Morpheme *BracketLeaf::createMorpheme( Document *doc,
}
Morpheme *BracketNest::createMorpheme( Document *doc,
- const string& tagset ) const {
+ const string& mbma_tagset,
+ const string& clex_tagset ) const {
string desc;
int offset = 0;
- return createMorpheme( doc, tagset, offset, desc );
+ return createMorpheme( doc, mbma_tagset, clex_tagset, offset, desc );
}
Morpheme *BracketNest::createMorpheme( Document *doc,
- const string& tagset,
+ const string& mbma_tagset,
+ const string& clex_tagset,
int& of,
string& desc ) const {
KWargs args;
args["class"] = "complex";
+ args["set"] = mbma_tagset;
Morpheme *result = new Morpheme( doc, args );
list<BaseBracket*>::const_iterator it = parts.begin();
string mor;
@@ -1701,7 +1731,11 @@ Morpheme *BracketNest::createMorpheme( Document *doc,
int offset = 0;
while ( it != parts.end() ){
string deeper_desc;
- Morpheme *m = (*it)->createMorpheme( doc, tagset, offset, deeper_desc );
+ Morpheme *m = (*it)->createMorpheme( doc,
+ mbma_tagset,
+ clex_tagset,
+ offset,
+ deeper_desc );
if ( m ){
string tmp;
try {
@@ -1737,8 +1771,8 @@ Morpheme *BracketNest::createMorpheme( Document *doc,
result->append( d );
}
args.clear();
- args["set"] = tagset;
- args["cls"] = toString( tag() );
+ args["set"] = clex_tagset;
+ args["cls"] = toString( tag() );
#pragma omp critical(foliaupdate)
{
result->addPosAnnotation( args );
@@ -1754,12 +1788,14 @@ void Mbma::addMorph( MorphologyLayer *ml,
const vector<string>& morphs ) const {
int offset = 0;
for ( size_t p=0; p < morphs.size(); ++p ){
- Morpheme *m = new Morpheme();
+ KWargs args;
+ args["set"] = mbma_tagset;
+ Morpheme *m = new Morpheme( ml->doc(), args );
#pragma omp critical(foliaupdate)
{
ml->append( m );
}
- KWargs args;
+ args.clear();
args["value"] = morphs[p];
args["offset"] = toString(offset);
TextContent *t = new TextContent( args );
@@ -1771,6 +1807,10 @@ void Mbma::addMorph( MorphologyLayer *ml,
}
}
+bool mbmacmp( MBMAana *m1, MBMAana *m2 ){
+ return m1->getKey(false).length() > m2->getKey(false).length();
+}
+
void Mbma::filterTag( const string& head, const vector<string>& feats ){
// first we select only the matching heads
if (debugFlag){
@@ -1815,8 +1855,7 @@ void Mbma::filterTag( const string& head, const vector<string>& feats ){
if ( analysis.size() < 1 ){
if (debugFlag ){
- *Log(mbmaLog) << "analysis has size: " << analysis.size()
- << " so skip next filter" << endl;
+ *Log(mbmaLog) << "analysis is empty so skip next filter" << endl;
}
return;
}
@@ -1861,52 +1900,68 @@ void Mbma::filterTag( const string& head, const vector<string>& feats ){
}
// so now we have "the" best matches.
// Weed the rest
- vector<MBMAana*> res;
- set<MBMAana*>::const_iterator bit = bestMatches.begin();
- while ( bit != bestMatches.end() ){
- res.push_back( *bit );
- ++bit;
+ ait = analysis.begin();
+ while ( ait != analysis.end() ){
+ if ( bestMatches.find( *ait ) != bestMatches.end() ){
+ ++ait;
+ }
+ else {
+ delete *ait;
+ ait = analysis.erase( ait );
+ }
}
if (debugFlag){
*Log(mbmaLog) << "filter: analysis after second step" << endl;
int i=1;
- for(vector<MBMAana*>::const_iterator it=res.begin(); it != res.end(); it++)
- *Log(mbmaLog) << i++ << " - " << *it << endl;
- *Log(mbmaLog) << "start looking for doubles" << endl;
+ for( vector<MBMAana*>::const_iterator it=analysis.begin();
+ it != analysis.end();
+ ++it )
+ *Log(mbmaLog) << i++ << " - " << *it << endl;
}
//
// but now we still might have doubles
//
- map<string, MBMAana*> unique;
- vector<MBMAana*>::iterator it=res.begin();
- while ( it != res.end() ){
- string tmp;
- if ( doDaring ){
- stringstream ss;
- ss << (*it)->getBrackets() << endl;
- tmp = ss.str();
+ map<UnicodeString, MBMAana*> unique;
+ ait=analysis.begin();
+ while ( ait != analysis.end() ){
+ UnicodeString tmp = (*ait)->getKey( doDaring );
+ unique[tmp] = *ait;
+ ++ait;
+ }
+ // so we have map of 'equal' analysis
+ set<MBMAana*> uniqueAna;
+ map<UnicodeString, MBMAana*>::const_iterator uit=unique.begin();
+ while ( uit != unique.end() ){
+ uniqueAna.insert( uit->second );
+ ++uit;
+ }
+ // and now a set of all MBMAana's that are really different.
+ // remove all analysis that aren't in that set.
+ ait=analysis.begin();
+ while ( ait != analysis.end() ){
+ if ( uniqueAna.find( *ait ) != uniqueAna.end() ){
+ ++ait;
}
else {
- vector<string> v = (*it)->getMorph();
- // create an unique key
- for ( size_t p=0; p < v.size(); ++p ) {
- tmp += v[p] + "+";
- }
+ delete *ait;
+ ait = analysis.erase( ait );
}
- unique[tmp] = *it;
- ++it;
}
- vector<MBMAana*> result;
- map<string, MBMAana*>::const_iterator uit=unique.begin();
- while ( uit != unique.end() ){
- result.push_back( uit->second );
- if (debugFlag){
- *Log(mbmaLog) << "Final Bracketing: " << uit->second->getBrackets() << endl;
- }
- ++uit;
+ if ( debugFlag ){
+ *Log(mbmaLog) << "filter: analysis before sort on length:" << endl;
+ int i=1;
+ for(vector<MBMAana*>::const_iterator it=analysis.begin(); it != analysis.end(); it++)
+ *Log(mbmaLog) << i++ << " - " << *it << " " << (*it)->getKey(false)
+ << " (" << (*it)->getKey(false).length() << ")" << endl;
+ *Log(mbmaLog) << "" << endl;
}
- analysis = result;
- if (debugFlag){
+ // Now we have a small list of unique and differtent analysis.
+ // We assume the 'longest' analysis to be the best.
+ // So we prefer '[ge][maak][t]' over '[gemaak][t]'
+ // Therefor we sort on (unicode) string length
+ sort( analysis.begin(), analysis.end(), mbmacmp );
+
+ if ( debugFlag){
*Log(mbmaLog) << "filter: definitive analysis:" << endl;
int i=1;
for(vector<MBMAana*>::const_iterator it=analysis.begin(); it != analysis.end(); it++)
@@ -1935,17 +1990,11 @@ void Mbma::getFoLiAResult( Word *fword, const UnicodeString& uword ) const {
else {
vector<MBMAana*>::const_iterator sit = analysis.begin();
while( sit != analysis.end() ){
- if ( sit == analysis.begin() ){
- if ( doDaring )
- addBracketMorph( fword, (*sit)->getBrackets() );
- else
- addMorph( fword, (*sit)->getMorph() );
+ if ( doDaring ){
+ addBracketMorph( fword, (*sit)->getBrackets() );
}
else {
- if ( doDaring )
- addAltBracketMorph( fword, (*sit)->getBrackets() );
- else
- addAltMorph( fword, (*sit)->getMorph() );
+ addMorph( fword, (*sit)->getMorph() );
}
++sit;
}
@@ -1953,7 +2002,7 @@ void Mbma::getFoLiAResult( Word *fword, const UnicodeString& uword ) const {
}
void Mbma::addDeclaration( Document& doc ) const {
- doc.declare( AnnotationType::MORPHOLOGICAL, tagset,
+ doc.declare( AnnotationType::MORPHOLOGICAL, mbma_tagset,
"annotator='frog-mbma-" + version +
+ "', annotatortype='auto', datetime='" + getTime() + "'");
if ( doDaring ){
diff --git a/src/mbma_prog.cxx b/src/mbma_prog.cxx
index da62381..f449e8c 100644
--- a/src/mbma_prog.cxx
+++ b/src/mbma_prog.cxx
@@ -1,6 +1,6 @@
/*
- $Id:$
- $URL:$
+ $Id: mbma_prog.cxx 17665 2014-09-15 15:58:01Z sloot $
+ $URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/mbma_prog.cxx $
Copyright (c) 2006 - 2014
Tilburg University
@@ -37,10 +37,9 @@
#include <map>
#include "config.h"
-#include "timbl/TimblAPI.h"
#include "ticcutils/LogStream.h"
#include "ticcutils/Configuration.h"
-#include "libfolia/folia.h"
+#include "ticcutils/CommandLine.h"
#include "frog/ucto_tokenizer_mod.h"
#include "frog/cgn_tagger_mod.h"
#include "frog/mbma_mod.h"
@@ -51,16 +50,15 @@ using namespace Timbl;
LogStream my_default_log( cerr, "", StampMessage ); // fall-back
LogStream *theErrLog = &my_default_log; // fill the externals
-string TestFileName;
-string ProgName;
+vector<string> fileNames;
bool doAll = false;
Configuration configuration;
static string configDir = string(SYSCONF_PATH) + "/" + PACKAGE + "/";
static string configFileName = configDir + "frog.cfg";
-static UctoTokenizer tokenizer;
-static CGNTagger tagger;
+static UctoTokenizer tokenizer(theErrLog);
+static CGNTagger tagger(theErrLog);
void usage( ) {
cout << endl << "Options:\n";
@@ -74,26 +72,19 @@ void usage( ) {
<< "\t -d <debug level> (for more verbosity)\n";
}
-static Mbma myMbma;
+static Mbma myMbma(theErrLog);
-bool parse_args( TimblOpts& Opts ) {
- string value;
- bool mood;
- if ( Opts.Find('V', value, mood ) ||
- Opts.Find("version", value, mood ) ){
+bool parse_args( TiCC::CL_Options& Opts ) {
+ if ( Opts.is_present( 'V' ) || Opts.is_present("version") ){
// we already did show what we wanted.
exit( EXIT_SUCCESS );
}
- if ( Opts.Find ('h', value, mood)) {
+ if ( Opts.is_present('h') ){
usage();
exit( EXIT_SUCCESS );
};
// is a config file specified?
- if ( Opts.Find( 'c', value, mood ) ) {
- configFileName = value;
- Opts.Delete( 'c' );
- };
-
+ Opts.extract( 'c', configFileName );
if ( configuration.fill( configFileName ) ){
cerr << "config read from: " << configFileName << endl;
}
@@ -102,36 +93,31 @@ bool parse_args( TimblOpts& Opts ) {
cerr << "did you correctly install the frogdata package?" << endl;
return false;
}
-
+ string value;
// debug opts
- if ( Opts.Find ('d', value, mood)) {
+ if ( Opts.extract('d', value ) ){
int debug = 0;
if ( !TiCC::stringTo<int>( value, debug ) ){
cerr << "-d value should be an integer" << endl;
return false;
}
configuration.setatt( "debug", value, "mbma" );
- Opts.Delete('d');
};
- if ( Opts.Find( 't', value, mood )) {
- TestFileName = value;
+ if ( Opts.extract( 't', value ) ){
ifstream is( value.c_str() );
if ( !is ){
cerr << "input stream " << value << " is not readable" << endl;
return false;
}
- Opts.Delete('t');
- };
- if ( Opts.Find( 'a', value, mood )) {
- doAll = true;
- Opts.Delete('a');
- };
- if ( Opts.Find( "daring", value, mood )) {
- if ( value.empty() )
- value = "1";
- configuration.setatt( "daring", value, "mbma" );
- Opts.Delete("daring");
+ fileNames.push_back( value );
+ }
+ else {
+ fileNames = Opts.getMassOpts();
+ }
+ doAll = Opts.extract( 'a' );
+ if ( Opts.extract( "daring" ) ){
+ configuration.setatt( "daring", "1", "mbma" );
};
return true;
}
@@ -144,7 +130,7 @@ bool init(){
cerr << "MBMA Initialization failed." << endl;
return false;
}
- if ( !tokenizer.init( configuration, "", false ) ){
+ if ( !tokenizer.init( configuration ) ){
cerr << "UCTO Initialization failed." << endl;
return false;
}
@@ -208,16 +194,23 @@ int main(int argc, char *argv[]) {
std::ios_base::sync_with_stdio(false);
cerr << "mbma " << VERSION << " (c) ILK 1998 - 2014" << endl;
cerr << "Induction of Linguistic Knowledge Research Group, Tilburg University" << endl;
- ProgName = argv[0];
+ TiCC::CL_Options Opts("aVt:d:hc:","daring,version");
+ try {
+ Opts.init(argc, argv);
+ }
+ catch ( const exception& e ){
+ cerr << "fatal error: " << e.what() << endl;
+ return EXIT_FAILURE;
+ }
cerr << "based on [" << Timbl::VersionName() << "]" << endl;
cerr << "configdir: " << configDir << endl;
- try {
- TimblOpts Opts(argc, argv);
- if ( parse_args(Opts) ){
- if ( !init() ){
- cerr << "terminated." << endl;
- return EXIT_FAILURE;
- }
+ if ( parse_args(Opts) ){
+ if ( !init() ){
+ cerr << "terminated." << endl;
+ return EXIT_FAILURE;
+ }
+ for ( size_t i=0; i < fileNames.size(); ++i ){
+ string TestFileName = fileNames[i];
ifstream in(TestFileName.c_str() );
if ( in.good() ){
cerr << "Processing: " << TestFileName << endl;
@@ -228,12 +221,8 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
}
- else {
- return EXIT_FAILURE;
- }
}
- catch ( const exception& e ){
- cerr << "fatal error: " << e.what() << endl;
+ else {
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
diff --git a/src/mwu_chunker_mod.cxx b/src/mwu_chunker_mod.cxx
index 0dc5089..a7ca357 100755
--- a/src/mwu_chunker_mod.cxx
+++ b/src/mwu_chunker_mod.cxx
@@ -1,5 +1,5 @@
/*
- $Id: mwu_chunker_mod.cxx 17486 2014-08-06 14:47:38Z sloot $
+ $Id: mwu_chunker_mod.cxx 17616 2014-09-07 19:24:55Z mvgompel $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/mwu_chunker_mod.cxx $
Copyright (c) 2006 - 2014
@@ -97,8 +97,8 @@ EntitiesLayer *mwuAna::addEntity( const std::string& tagset,
return el;
}
-Mwu::Mwu(){
- mwuLog = new LogStream( theErrLog, "mwu-" );
+Mwu::Mwu(LogStream * logstream){
+ mwuLog = new LogStream( logstream, "mwu-" );
}
Mwu::~Mwu(){
diff --git a/src/ner_tagger_mod.cxx b/src/ner_tagger_mod.cxx
index 56dc9b9..ede670f 100644
--- a/src/ner_tagger_mod.cxx
+++ b/src/ner_tagger_mod.cxx
@@ -1,5 +1,5 @@
/*
- $Id: ner_tagger_mod.cxx 17486 2014-08-06 14:47:38Z sloot $
+ $Id: ner_tagger_mod.cxx 17650 2014-09-13 20:18:58Z mvgompel $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/ner_tagger_mod.cxx $
Copyright (c) 2006 - 2014
@@ -35,9 +35,9 @@
using namespace std;
using namespace folia;
-NERTagger::NERTagger(){
+NERTagger::NERTagger(TiCC::LogStream * logstream){
tagger = 0;
- nerLog = new LogStream( theErrLog, "ner-" );
+ nerLog = new LogStream( logstream, "ner-" );
}
NERTagger::~NERTagger(){
@@ -46,6 +46,7 @@ NERTagger::~NERTagger(){
}
bool NERTagger::init( const Configuration& config ){
+ debug = 0;
string val = config.lookUp( "debug", "NER" );
if ( val.empty() ){
val = config.lookUp( "debug" );
diff --git a/src/ucto_tokenizer_mod.cxx b/src/ucto_tokenizer_mod.cxx
index 1ab003c..c8307ac 100644
--- a/src/ucto_tokenizer_mod.cxx
+++ b/src/ucto_tokenizer_mod.cxx
@@ -1,5 +1,5 @@
/*
- $Id: ucto_tokenizer_mod.cxx 17436 2014-07-09 13:13:35Z sloot $
+ $Id: ucto_tokenizer_mod.cxx 17661 2014-09-15 14:59:03Z sloot $
$URL: https://ilk.uvt.nl/svn/sources/Frog/trunk/src/ucto_tokenizer_mod.cxx $
Copyright (c) 2006 - 2014
@@ -38,14 +38,14 @@
using namespace std;
using namespace TiCC;
-UctoTokenizer::UctoTokenizer() {
+UctoTokenizer::UctoTokenizer(LogStream * logstream) {
tokenizer = 0;
- uctoLog = new LogStream( theErrLog, "tok-" );
+ uctoLog = new LogStream( logstream, "tok-" );
}
-bool UctoTokenizer::init( const Configuration& config, const string & docid, bool pass ){
+bool UctoTokenizer::init( const Configuration& config ){
if ( tokenizer )
- throw runtime_error( "ucto tokenizer is already initalized" );
+ throw runtime_error( "ucto tokenizer is already initialized" );
tokenizer = new Tokenizer::TokenizerClass();
tokenizer->setErrorLog( uctoLog );
int debug = 0;
@@ -56,10 +56,9 @@ bool UctoTokenizer::init( const Configuration& config, const string & docid, boo
if ( !val.empty() )
debug = TiCC::stringTo<int>( val );
tokenizer->setDebug( debug );
- if ( pass ){
+ if ( tokenizer->getPassThru() ){
// when passthru, we don't further initialize the tokenizer
// it wil run in minimal mode then.
- tokenizer->setPassThru( true );
}
else {
string rulesName = config.lookUp( "rulesFile", "tokenizer" );
@@ -76,7 +75,7 @@ bool UctoTokenizer::init( const Configuration& config, const string & docid, boo
tokenizer->setVerbose( false );
tokenizer->setSentenceDetection( true ); //detection of sentences
tokenizer->setParagraphDetection( false ); //detection of paragraphs
- tokenizer->setXMLOutput( true, docid );
+ tokenizer->setXMLOutput( true );
return true;
}
@@ -86,21 +85,21 @@ void UctoTokenizer::setUttMarker( const string& u ) {
tokenizer->setEosMarker( u );
}
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
}
void UctoTokenizer::setSentencePerLineInput( bool b ) {
if ( tokenizer )
tokenizer->setSentencePerLineInput( b );
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
}
void UctoTokenizer::setQuoteDetection( bool b ) {
if ( tokenizer )
tokenizer->setQuoteDetection( b );
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
}
void UctoTokenizer::setInputEncoding( const std::string & enc ){
@@ -109,7 +108,7 @@ void UctoTokenizer::setInputEncoding( const std::string & enc ){
tokenizer->setInputEncoding( enc );
}
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
}
void UctoTokenizer::setTextClass( const std::string& cls ){
@@ -118,7 +117,16 @@ void UctoTokenizer::setTextClass( const std::string& cls ){
tokenizer->setTextClass( cls );
}
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
+}
+
+void UctoTokenizer::setDocID( const std::string& id ){
+ if ( tokenizer ){
+ if ( !id.empty() )
+ tokenizer->setDocID( id );
+ }
+ else
+ throw runtime_error( "ucto tokenizer not initialized" );
}
void UctoTokenizer::setInputXml( bool b ){
@@ -126,7 +134,23 @@ void UctoTokenizer::setInputXml( bool b ){
tokenizer->setXMLInput( b );
}
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
+}
+
+void UctoTokenizer::setPassThru( const bool b ) {
+ if ( tokenizer ){
+ tokenizer->setPassThru( b );
+ }
+ else
+ throw runtime_error( "ucto tokenizer not initialized" );
+}
+
+bool UctoTokenizer::getPassThru() const {
+ if ( tokenizer ){
+ return tokenizer->getPassThru();
+ }
+ else
+ throw runtime_error( "ucto tokenizer not initialized" );
}
vector<string> UctoTokenizer::tokenize( const string& line ){
@@ -142,20 +166,29 @@ vector<string> UctoTokenizer::tokenize( const string& line ){
return tokenizer->getSentences();
}
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
}
folia::Document UctoTokenizer::tokenize( istream& is ){
if ( tokenizer )
return tokenizer->tokenize( is );
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
+}
+
+folia::Document UctoTokenizer::tokenizestring( const string& s){
+ if ( tokenizer) {
+ istringstream is(s);
+ return tokenizer->tokenize( is);
+ }
+ else
+ throw runtime_error( "ucto tokenizer not initialized" );
}
bool UctoTokenizer::tokenize( folia::Document& doc ){
if ( tokenizer )
return tokenizer->tokenize( doc );
else
- throw runtime_error( "ucto tokenizer not initalized" );
+ throw runtime_error( "ucto tokenizer not initialized" );
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/frog.git
More information about the debian-science-commits
mailing list