[cg3] 01/02: Imported Upstream version 0.9.9~r10784
Tino Didriksen
tinodidriksen-guest at moszumanska.debian.org
Sun Jun 21 17:25:36 UTC 2015
This is an automated email from the git hooks/post-receive script.
tinodidriksen-guest pushed a commit to branch master
in repository cg3.
commit 9b32cea76ccc380cee0958c939a3df6045579efb
Author: Tino Didriksen <mail at tinodidriksen.com>
Date: Sun Jun 21 17:25:29 2015 +0000
Imported Upstream version 0.9.9~r10784
---
CMakeLists.txt | 23 +-
ChangeLog | 947 +++++++++-------------
TODO | 8 +-
cg3.g | 7 +-
emacs/cg.el | 44 +-
get-boost.sh | 2 +-
include/exec-stream/exec-stream.cpp | 466 -----------
include/exec-stream/exec-stream.h | 163 ----
include/exec-stream/posix/exec-stream-helpers.cpp | 842 -------------------
include/exec-stream/posix/exec-stream-helpers.h | 239 ------
include/exec-stream/posix/exec-stream-impl.cpp | 386 ---------
include/exec-stream/win/exec-stream-helpers.cpp | 727 -----------------
include/exec-stream/win/exec-stream-helpers.h | 183 -----
include/exec-stream/win/exec-stream-impl.cpp | 315 -------
include/posix/popen_plus.c | 183 +++++
include/posix/popen_plus.h | 56 ++
{win32 => include/win32}/getopt.c | 0
{win32 => include/win32}/getopt.h | 0
manual/cgkeywords.xml | 26 +
manual/cmdreference.xml | 116 +--
manual/contexts.xml | 17 +
manual/grammar.xml | 74 ++
manual/sets.xml | 35 +-
manual/tags.xml | 40 +
scripts/cg3-autobin.pl.in | 6 +-
src/ApertiumApplicator.cpp | 6 +-
src/BinaryGrammar_read.cpp | 204 +++--
src/BinaryGrammar_read_10043.cpp | 196 ++---
src/BinaryGrammar_write.cpp | 92 ++-
src/CMakeLists.txt | 15 +-
src/ContextualTest.cpp | 12 +-
src/ContextualTest.hpp | 18 +-
src/FSTApplicator.cpp | 58 +-
src/FSTApplicator.hpp | 4 +
src/Grammar.cpp | 228 ++++--
src/Grammar.hpp | 32 +-
src/GrammarApplicator.cpp | 104 ++-
src/GrammarApplicator.hpp | 110 +--
src/GrammarApplicator_matchSet.cpp | 228 +++---
src/GrammarApplicator_reflow.cpp | 36 +-
src/GrammarApplicator_runContextualTest.cpp | 83 +-
src/GrammarApplicator_runGrammar.cpp | 6 +-
src/GrammarApplicator_runRules.cpp | 119 ++-
src/GrammarWriter.cpp | 9 +-
src/IGrammarParser.hpp | 3 +-
src/NicelineApplicator.cpp | 6 +-
src/PlaintextApplicator.cpp | 2 +-
src/Reading.cpp | 19 +
src/Reading.hpp | 1 +
src/Set.cpp | 4 +
src/Strings.cpp | 12 +-
src/Strings.hpp | 10 +
src/Tag.cpp | 282 +------
src/Tag.hpp | 24 +-
src/TagTrie.hpp | 22 +
src/TextualParser.cpp | 762 +++++++++++------
src/TextualParser.hpp | 18 +
src/all_cg_comp.cpp | 1 +
src/all_cg_conv.cpp | 1 +
src/all_cg_proc.cpp | 1 +
src/all_vislcg3.cpp | 1 +
src/cg_comp.cpp | 2 +
src/cg_conv.cpp | 21 +
src/cg_proc.cpp | 2 +
src/inlines.hpp | 57 +-
src/main.cpp | 20 +-
src/options.hpp | 116 +--
src/options_conv.hpp | 6 +
src/parser_helpers.hpp | 324 ++++++++
src/process.hpp | 199 +++++
src/stdafx.hpp | 5 +-
src/uextras.cpp | 4 +-
src/uextras.hpp | 35 +
src/version.hpp | 4 +-
test/T_Dependency_Loops/grammar.cg3 | 9 +-
test/T_MapAdd_Different/args.txt | 2 +-
test/T_MapAdd_Different/expected.txt | 3 +-
test/T_RegExp/expected.txt | 3 +-
test/T_RegExp/grammar.cg3 | 2 +
test/T_RegExp/grammar.cg3b.10043 | Bin 3827 -> 4052 bytes
test/T_Templates/expected.txt | 7 +-
test/T_Templates/grammar.cg3 | 3 +
test/T_Templates/grammar.cg3b.10043 | Bin 5270 -> 0 bytes
test/T_Templates/input.txt | 5 +-
vapply.sh | 4 +-
win32/libgen.c | 25 -
win32/libgen.h | 14 -
87 files changed, 3238 insertions(+), 5268 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d880567..b53588d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,20 +33,18 @@ endif()
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /EHsc /MP")
- set(CMAKE_CXX_FLAGS_RELEASE "/MT /Ox /Ot /GL /GS- /DNDEBUG")
- set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/LTCG")
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT /Ox /Ot /GL /GS-")
+ set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG")
set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS})
set(CMAKE_C_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
- add_definitions(-DUNICODE -D_UNICODE -D_SECURE_SCL=0 -D_ITERATOR_DEBUG_LEVEL=0 -D_CRT_SECURE_NO_DEPRECATE -DWIN32_LEAN_AND_MEAN -DVC_EXTRALEAN -DNOMINMAX)
- include_directories("${CMAKE_CURRENT_SOURCE_DIR}/win32")
else()
- set(_FLAGS_COMMON "-Wall -Wextra -Wno-deprecated -Wno-unused-parameter -fPIC")
+ set(_FLAGS_COMMON "-Wall -Wextra -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_FLAGS_COMMON} -fvisibility-inlines-hidden")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3")
- set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -DNDEBUG")
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${_FLAGS_COMMON}")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3")
- set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3 -DNDEBUG")
+ set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3")
# Enable C++11 if possible
if((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.6) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.1))
@@ -70,8 +68,15 @@ else()
PATTERN ".svn" EXCLUDE)
endif()
+if(WIN32)
+ add_definitions(-DUNICODE -D_UNICODE -D_SECURE_SCL=0 -D_ITERATOR_DEBUG_LEVEL=0 -D_CRT_SECURE_NO_DEPRECATE -DWIN32_LEAN_AND_MEAN -DVC_EXTRALEAN -DNOMINMAX)
+ include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include/win32")
+else()
+ set(POPEN_PLUS_C "${CMAKE_CURRENT_SOURCE_DIR}/include/posix/popen_plus.c" "${CMAKE_CURRENT_SOURCE_DIR}/include/posix/popen_plus.h")
+ include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include/posix")
+endif()
+
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include")
-include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include/exec-stream")
enable_testing()
@@ -80,4 +85,4 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/scripts/cg3-autobin.pl.in ${CMAKE_CURRENT_BINARY_DIR}/scripts/cg3-autobin.pl @ONLY)
install(PROGRAMS "${CMAKE_CURRENT_BINARY_DIR}/scripts/cg3-autobin.pl" DESTINATION bin)
-install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/emacs/cg.el" DESTINATION share/emacs/site-lisp RENAME cg3-mode.el)
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/emacs/cg.el" DESTINATION share/emacs/site-lisp)
diff --git a/ChangeLog b/ChangeLog
index 7b5359c..f57829a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,375 @@
+2015-06-04 tino
+
+ * [r10749] include/posix/popen_plus.c, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_runRules.cpp: Fix weirdness
+ * [r10748] src/process.hpp: CentOS 6, a plague upon thy house
+ * [r10746] CMakeLists.txt, ChangeLog, include/exec-stream,
+ include/posix, include/win32, src/CMakeLists.txt,
+ src/GrammarApplicator.cpp, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_runRules.cpp, src/inlines.hpp,
+ src/process.hpp, src/version.hpp, win32: Switched from
+ exec-stream to popen_plus/win32 for a 20x speedup in External
+ processing
+ * [r10745] src/GrammarApplicator.hpp: Undo 2 lines
+ * [r10744] src/TextualParser.cpp: Throw on invalid options, rather
+ than endless loop
+ * [r10742] ChangeLog, TODO, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_runRules.cpp, src/Reading.cpp,
+ src/Reading.hpp, src/TextualParser.cpp, src/version.hpp: Add
+ SUB:* to make rule targets look at any sub-reading
+
+2015-05-30 tino
+
+ * [r10738] src/TextualParser.cpp: Don't parse SUB as flag
+
+2015-05-28 tino
+
+ * [r10734] src/GrammarApplicator_runRules.cpp: Fix sub-reading
+ still being active bug; Minor optiization
+
+2015-05-26 tino
+
+ * [r10730] ChangeLog, src/version.hpp: STRICT-TAGS done, so far
+ * [r10729] manual/grammar.xml, manual/tags.xml: Document
+ STRICT-TAGS and new options
+ * [r10728] src/TextualParser.hpp, src/parser_helpers.hpp,
+ test/T_Dependency_Loops/grammar.cg3: Tags listed in Strict-Tags
+ may now be used as implicit inline sets, where they are the only
+ member
+ * [r10727] src/Grammar.cpp, src/Grammar.hpp, src/Set.cpp,
+ src/TextualParser.cpp, src/TextualParser.hpp, src/main.cpp: Move
+ --show-tags code to after grammar indexing, to only show used
+ tags
+ * [r10725] src/TextualParser.cpp: Test the plain tag; Always allow
+ >>> and <<<
+ * [r10724] src/TextualParser.cpp: Always allow many more special
+ tag types
+ * [r10723] TODO, src/Strings.cpp, src/Strings.hpp,
+ src/TextualParser.cpp, src/TextualParser.hpp,
+ test/T_Dependency_Loops/grammar.cg3: Strict-Tags default allow
+ wordforms, baseforms, secondary, and * tag
+
+2015-04-23 unhammer
+
+ * [r10651] emacs/cg.el: wordform-lines can have tags (lex.sel)
+
+2015-04-20 tino
+
+ * [r10649] CMakeLists.txt: Append to flags
+
+2015-04-17 unhammer
+
+ * [r10647] emacs/cg.el: todo
+ * [r10646] emacs/cg.el: "handle all the string
+ modifiers"[irv]\{0,3\}
+ * [r10645] emacs/cg.el: "foo"i also possible, not just "foo"ri
+
+2015-04-13 tino
+
+ * [r10643] ChangeLog, src/Grammar.cpp, src/version.hpp: f branch
+ empty sets turned into * instead of error
+
+2015-04-10 tino
+
+ * [r10640] ChangeLog, src/Grammar.cpp, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_runRules.cpp, src/Strings.cpp,
+ src/Strings.hpp, src/version.hpp: Non-capturing unification is
+ now done per reading
+ * [r10639] src/GrammarApplicator_runRules.cpp: Remove debug
+ * [r10638] vapply.sh: Keep old output
+ * [r10637] ChangeLog, TODO, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runRules.cpp, src/version.hpp: WIP to make
+ unification truly per-reading
+
+2015-03-25 unhammer
+
+ * [r10629] emacs/cg.el: call it 0.1.7, since C-M-h now works, more
+ keywords are matched, and we highlight symbol-at-point in output.
+
+ +some nitpicking
+
+2015-03-25 tino
+
+ * [r10628] CMakeLists.txt: Don't rename cg.el to cg3-mode.el as it
+ seems no current distro ships any package with a conflicting
+ cg.el
+
+2015-03-24 tino
+
+ * [r10617] src/Grammar.cpp, src/Grammar.hpp, src/TextualParser.cpp:
+ Don't report potentially used sets as unused. They might actually
+ be unused, but that would take a lot more work to determine.
+
+2015-03-19 tino
+
+ * [r10601] CMakeLists.txt,
+ include/exec-stream/posix/exec-stream-impl.cpp,
+ src/BinaryGrammar_read.cpp, src/BinaryGrammar_read_10043.cpp,
+ src/BinaryGrammar_write.cpp, src/Grammar.hpp, src/inlines.hpp:
+ Remove -Wno-unused-result to help poor feeble GCC 4.2.1
+
+2015-03-09 tino
+
+ * [r10589] src/GrammarApplicator_reflow.cpp: Satisfy older
+ compilers
+ * [r10588] src/Grammar.cpp: Respect empty sections
+ * [r10587] ChangeLog, src/GrammarApplicator.hpp, src/main.cpp,
+ src/version.hpp: Enforce matching --prefix for binary grammars
+
+2015-03-06 tino
+
+ * [r10578] src/GrammarApplicator_runGrammar.cpp: Wordforms now
+ search for >" instead of just "
+ * [r10577] ChangeLog, manual/sets.xml, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_runRules.cpp, src/Strings.cpp,
+ src/Strings.hpp, src/Tag.hpp, src/TextualParser.cpp,
+ src/parser_helpers.hpp, src/version.hpp: Added magic set/tag
+ _SAME_BASIC_
+ * [r10576] ChangeLog, manual/grammar.xml,
+ src/BinaryGrammar_read.cpp, src/BinaryGrammar_write.cpp,
+ src/Grammar.hpp, src/GrammarApplicator_reflow.cpp,
+ src/Strings.cpp, src/Strings.hpp, src/TextualParser.cpp,
+ src/version.hpp: Added REOPEN-MAPPINGS
+ * [r10574] src/GrammarApplicator.hpp,
+ src/GrammarApplicator_runRules.cpp: Regex captures are now
+ per-reading
+ * [r10570] src/GrammarApplicator.cpp,
+ src/GrammarApplicator_reflow.cpp, src/GrammarWriter.cpp: Readings
+ that would be considered identical without --trace are now marked
+ deleted under --trace. This is incorrect behavior, but being
+ blessed because it makes things easier for grammarians.
+
+2015-03-05 tino
+
+ * [r10568] CMakeLists.txt, ChangeLog, manual/contexts.xml,
+ src/BinaryGrammar_write.cpp, src/ContextualTest.cpp,
+ src/ContextualTest.hpp, src/Grammar.cpp, src/Grammar.hpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarWriter.cpp, src/Tag.hpp, src/TagTrie.hpp,
+ src/TextualParser.cpp, src/inlines.hpp, src/stdafx.hpp,
+ src/version.hpp, test/T_Templates/expected.txt,
+ test/T_Templates/grammar.cg3,
+ test/T_Templates/grammar.cg3b.10043, test/T_Templates/input.txt:
+ New context modifier 'f'
+
+2015-02-24 tino
+
+ * [r10550] src/CMakeLists.txt: Actually install DLL
+ * [r10549] ChangeLog, manual/cmdreference.xml,
+ src/FSTApplicator.cpp, src/FSTApplicator.hpp, src/cg_conv.cpp,
+ src/options_conv.hpp, src/version.hpp: Added cg-conv --wfactor,-W
+ --wtag --sub-delim,-S
+
+2015-02-23 tino
+
+ * [r10548] src/CMakeLists.txt: More MSVC vs WIN32
+ * [r10547] CMakeLists.txt, get-boost.sh, src/CMakeLists.txt,
+ src/cg_comp.cpp, src/cg_proc.cpp, src/stdafx.hpp,
+ src/uextras.cpp, src/uextras.hpp: Differentiate between MSVC and
+ WIN32 to allow for cross-compiling
+
+2015-02-22 tino
+
+ * [r10543] ChangeLog, manual/grammar.xml,
+ src/GrammarApplicator_runRules.cpp, src/version.hpp,
+ test/T_RegExp/expected.txt, test/T_RegExp/grammar.cg3,
+ test/T_RegExp/grammar.cg3b.10043: Fix regex capture groups
+ remaining captured after a target match fail
+
+2015-02-20 tino
+
+ * [r10535] ChangeLog, src/Grammar.cpp, src/GrammarApplicator.cpp,
+ src/version.hpp: Undo not checking tag[0] for regex/icase tags
+ * [r10534] src/TextualParser.cpp: no-isets allow (*)
+ * [r10533] src/TextualParser.cpp: Moved no-isets check to start of
+ (
+ * [r10532] src/BinaryGrammar_read.cpp,
+ src/BinaryGrammar_read_10043.cpp, src/Grammar.hpp: Deduplicate
+ code
+ * [r10531] src/all_cg_comp.cpp, src/all_cg_conv.cpp,
+ src/all_cg_proc.cpp, src/all_vislcg3.cpp: Add #include
+ BinaryGrammar_read_10043.cpp
+ * [r10530] manual/cgkeywords.xml, manual/grammar.xml: Documented
+ OPTIONS += no-inline-sets no-inline-templates
+ * [r10529] src/parser_helpers.hpp: Include guard
+ * [r10528] manual/cgkeywords.xml, manual/cmdreference.xml,
+ manual/tags.xml: Document STRICT-TAGS
+
+2015-02-19 tino
+
+ * [r10525] src/TextualParser.cpp: You don't know context
+ * [r10524] ChangeLog, src/Grammar.cpp, src/GrammarApplicator.cpp,
+ src/Strings.cpp, src/Strings.hpp, src/Tag.cpp,
+ src/TextualParser.cpp, src/TextualParser.hpp, src/inlines.hpp,
+ src/main.cpp, src/options.hpp, src/parser_helpers.hpp,
+ src/version.hpp: Implemented STRICT-TAGS += ... ; Implemented
+ --show-tags ; Implemented OPTIONS += no-inline-templates
+ no-inline-sets ;
+
+2015-02-18 tino
+
+ * [r10515] cg3.g: That comment is no longer true
+
+2015-02-17 tino
+
+ * [r10506] src/ApertiumApplicator.cpp, src/FSTApplicator.cpp,
+ src/GrammarApplicator.cpp, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_runGrammar.cpp, src/NicelineApplicator.cpp,
+ src/PlaintextApplicator.cpp, src/cg_conv.cpp: cg-conv less noisy
+ * [r10505] src/TextualParser.cpp: Missing -> Expected
+
+2015-02-16 tino
+
+ * [r10504] src/CMakeLists.txt, src/Grammar.cpp, src/Grammar.hpp,
+ src/GrammarApplicator.cpp, src/GrammarApplicator.hpp,
+ src/IGrammarParser.hpp, src/Tag.cpp, src/Tag.hpp,
+ src/TextualParser.cpp, src/TextualParser.hpp,
+ src/parser_helpers.hpp, src/uextras.hpp: Spread basename and
+ context to more diagnostics
+ * [r10502] src/TextualParser.cpp: Posix basename() takes by
+ non-const char* - crazy people
+ * [r10501] TODO, src/CMakeLists.txt, src/TextualParser.cpp,
+ src/TextualParser.hpp, src/cg_comp.cpp, src/cg_proc.cpp,
+ src/uextras.hpp, win32/libgen.c, win32/libgen.h: Print basename
+ of grammar before most parsing diagnostics
+ * [r10499] src/TextualParser.cpp, src/TextualParser.hpp,
+ src/uextras.hpp: Show "near `...`" for many parsing diagnostics
+ * [r10497] src/main.cpp, src/options.hpp: Minor fix
+ * [r10496] scripts/cg3-autobin.pl.in: Synchronize with reality
+ * [r10495] src/main.cpp, src/options.hpp: Add option --quiet and
+ allow -v0
+
+2015-02-08 tino
+
+ * [r10439] src/inlines.hpp: Forgot to change hash to avoid MAX and
+ MAX-1 back in r10033
+
+2015-01-28 tino
+
+ * [r10391] test/runall.pl: ...and now with all the code that got
+ deleted.
+ * [r10390] test/helper.pl, test/runall.pl: Whoops, wrong svn rm.
+ Also, now tests old grammars.
+ * [r10389] test/T_AnyMinusSome/args.txt,
+ test/T_AnyMinusSome/grammar.cg3b.10043,
+ test/T_AnyMinusSome/run.pl, test/T_Barrier/grammar.cg3b.10043,
+ test/T_Barrier/run.pl, test/T_BasicAppend/grammar.cg3b.10043,
+ test/T_BasicAppend/run.pl,
+ test/T_BasicContextTest/grammar.cg3b.10043,
+ test/T_BasicContextTest/run.pl, test/T_BasicDelimit/args.txt,
+ test/T_BasicDelimit/expected.txt,
+ test/T_BasicDelimit/grammar.cg3b.10043,
+ test/T_BasicDelimit/run.pl, test/T_BasicDependency/args.txt,
+ test/T_BasicDependency/grammar.cg3b.10043,
+ test/T_BasicDependency/run.pl,
+ test/T_BasicIff/grammar.cg3b.10043, test/T_BasicIff/run.pl,
+ test/T_BasicSelect/args.txt,
+ test/T_BasicSelect/grammar.cg3b.10043, test/T_BasicSelect/run.pl,
+ test/T_BasicSubstitute/args.txt,
+ test/T_BasicSubstitute/grammar.cg3b.10043,
+ test/T_BasicSubstitute/run.pl, test/T_CG2Compat/args.txt,
+ test/T_CG2Compat/grammar.cg3b.10043, test/T_CG2Compat/run.pl,
+ test/T_CarefulBarrier/grammar.cg3b.10043,
+ test/T_CarefulBarrier/run.pl,
+ test/T_DelayAndDelete/grammar.cg3b.10043,
+ test/T_DelayAndDelete/run.pl, test/T_Dependency_Loops/args.txt,
+ test/T_Dependency_Loops/grammar.cg3b.10043,
+ test/T_Dependency_Loops/run.pl,
+ test/T_Dependency_OutOfRange/expected.txt,
+ test/T_Dependency_OutOfRange/grammar.cg3,
+ test/T_Dependency_OutOfRange/grammar.cg3b.10043,
+ test/T_Dependency_OutOfRange/run.pl,
+ test/T_DontMatchEmptySet/grammar.cg3b.10043,
+ test/T_DontMatchEmptySet/run.pl,
+ test/T_EndlessSelect/grammar.cg3b.10043,
+ test/T_EndlessSelect/run.pl, test/T_External/grammar.cg3b.10043,
+ test/T_Include/grammar.cg3b.10043, test/T_Include/run.pl,
+ test/T_InputCommands/grammar.cg3b.10043,
+ test/T_InputCommands/run.pl,
+ test/T_InputMarkup/grammar.cg3b.10043, test/T_InputMarkup/run.pl,
+ test/T_JumpExecute/args.txt,
+ test/T_JumpExecute/grammar.cg3b.10043, test/T_JumpExecute/run.pl,
+ test/T_MapAdd_Different/args.txt,
+ test/T_MapAdd_Different/expected.txt,
+ test/T_MapAdd_Different/grammar.cg3,
+ test/T_MapAdd_Different/grammar.cg3b.10043,
+ test/T_MapAdd_Different/run.pl,
+ test/T_MapThenRemove/grammar.cg3b.10043,
+ test/T_MapThenRemove/run.pl,
+ test/T_MapThenSelect/grammar.cg3b.10043,
+ test/T_MapThenSelect/run.pl, test/T_MappingPrefix/args.txt,
+ test/T_MappingPrefix/grammar.cg3b.10043,
+ test/T_MappingPrefix/run.pl, test/T_Movement/grammar.cg3b.10043,
+ test/T_Movement/run.pl,
+ test/T_MultipleSections/grammar.cg3b.10043,
+ test/T_MultipleSections/run.pl,
+ test/T_NegatedContextTest/grammar.cg3b.10043,
+ test/T_NegatedContextTest/run.pl,
+ test/T_NotContextTest/grammar.cg3b.10043,
+ test/T_NotContextTest/run.pl, test/T_NumericalTags/args.txt,
+ test/T_NumericalTags/grammar.cg3b.10043,
+ test/T_NumericalTags/run.pl,
+ test/T_OmniWithBarrier/grammar.cg3b.10043,
+ test/T_OmniWithBarrier/run.pl,
+ test/T_Omniscan/grammar.cg3b.10043, test/T_Omniscan/run.pl,
+ test/T_OriginPassing/args.txt,
+ test/T_OriginPassing/grammar.cg3b.10043,
+ test/T_OriginPassing/run.pl, test/T_Parentheses/args.txt,
+ test/T_Parentheses/grammar.cg3b.10043, test/T_Parentheses/run.pl,
+ test/T_RegExp/grammar.cg3b.10043, test/T_RegExp/run.pl,
+ test/T_Relations/args.txt, test/T_Relations/grammar.cg3b.10043,
+ test/T_Relations/run.pl, test/T_RemCohort/args.txt,
+ test/T_RemCohort/grammar.cg3b.10043, test/T_RemCohort/run.pl,
+ test/T_RemoveSingleTag/grammar.cg3b.10043,
+ test/T_RemoveSingleTag/run.pl,
+ test/T_ScanningTests/grammar.cg3b.10043,
+ test/T_ScanningTests/run.pl, test/T_SectionRanges/args.txt,
+ test/T_SectionRanges/grammar.cg3b.10043,
+ test/T_SectionRanges/run.pl, test/T_Sections/args.txt,
+ test/T_Sections/grammar.cg3b.10043, test/T_Sections/run.pl,
+ test/T_SetOp_FailFast/args.txt,
+ test/T_SetOp_FailFast/grammar.cg3b.10043,
+ test/T_SetOp_FailFast/run.pl, test/T_SetOps/grammar.cg3b.10043,
+ test/T_SetOps/run.pl, test/T_SetParentChild/grammar.cg3b.10043,
+ test/T_SetParentChild/run.pl, test/T_SoftDelimiters/args.txt,
+ test/T_SoftDelimiters/grammar.cg3b.10043,
+ test/T_SoftDelimiters/run.pl, test/T_SpaceInForms/args.txt,
+ test/T_SpaceInForms/grammar.cg3b.10043,
+ test/T_SpaceInForms/run.pl,
+ test/T_SubReadings_Apertium/grammar.cg3b.10043,
+ test/T_SubReadings_CG/args.txt,
+ test/T_SubReadings_CG/grammar.cg3b.10043,
+ test/T_SubReadings_CG/run.pl,
+ test/T_SubstituteNil/grammar.cg3b.10043,
+ test/T_SubstituteNil/run.pl, test/T_Templates/grammar.cg3b.10043,
+ test/T_Templates/run.pl, test/T_Trace/args.txt,
+ test/T_Trace/grammar.cg3b.10043, test/T_Trace/run.pl,
+ test/T_Unification/args.txt,
+ test/T_Unification/grammar.cg3b.10043, test/T_Unification/run.pl,
+ test/T_Variables/grammar.cg3b.10043, test/T_Variables/run.pl,
+ test/helper.pl, test/runall.pl: Tests now share run code where
+ possible
+ * [r10388] src/Grammar.cpp: Let's not overwrite a random set when
+ loading old grammars
+ * [r10387] src/Grammar.cpp: Remember old static sets
+ * [r10386] src/BinaryGrammar_read_10043.cpp, src/Grammar.cpp,
+ src/Grammar.hpp, src/TextualParser.cpp, src/cg_conv.cpp: cg-conv
+ also needs to create the dummy set
+
+2015-01-27 tino
+
+ * [r10384] src/BinaryGrammar.cpp, src/BinaryGrammar_read.cpp,
+ src/IGrammarParser.hpp: Only warn about old grammars when -v is
+ on
+
2015-01-24 tino
+ * [r10379] ChangeLog, src/BinaryGrammar_read_10043.cpp,
+ src/version.hpp: Old loader seems to work now
* [r10378] ChangeLog, src/BinaryGrammar.hpp,
src/BinaryGrammar_read.cpp, src/BinaryGrammar_read_10043.cpp,
src/CMakeLists.txt, src/version.hpp: Add support for loading
@@ -1964,580 +2334,3 @@
will be visible only to non-C contexts (so they are not ambiguous
or valid rule targets).
-2012-09-04 tino
-
- * [r8506] src/TextualParser.cpp: Also look for includes in the CWD
-
-2012-09-02 tino
-
- * [r8499] src/TextualParser.cpp, src/inlines.h,
- test/Apertium/T_AnyMinusSome/grammar.cg3,
- test/Apertium/T_AnyMinusSome/grammar.txt,
- test/Apertium/T_AnyMinusSome/run.pl,
- test/Apertium/T_Barrier/grammar.cg3,
- test/Apertium/T_Barrier/grammar.txt,
- test/Apertium/T_Barrier/run.pl,
- test/Apertium/T_BasicAppend/grammar.cg3,
- test/Apertium/T_BasicAppend/grammar.txt,
- test/Apertium/T_BasicAppend/run.pl,
- test/Apertium/T_BasicContextTest/grammar.cg3,
- test/Apertium/T_BasicContextTest/grammar.txt,
- test/Apertium/T_BasicContextTest/run.pl,
- test/Apertium/T_BasicDelimit/grammar.cg3,
- test/Apertium/T_BasicDelimit/grammar.txt,
- test/Apertium/T_BasicDelimit/run.pl,
- test/Apertium/T_BasicIff/grammar.cg3,
- test/Apertium/T_BasicIff/grammar.txt,
- test/Apertium/T_BasicIff/run.pl,
- test/Apertium/T_BasicRemove/grammar.cg3,
- test/Apertium/T_BasicRemove/grammar.txt,
- test/Apertium/T_BasicRemove/run.pl,
- test/Apertium/T_BasicSelect/grammar.cg3,
- test/Apertium/T_BasicSelect/grammar.txt,
- test/Apertium/T_BasicSelect/run.pl,
- test/Apertium/T_BasicSubstitute/grammar.cg3,
- test/Apertium/T_BasicSubstitute/grammar.txt,
- test/Apertium/T_BasicSubstitute/run.pl,
- test/Apertium/T_CarefulBarrier/grammar.cg3,
- test/Apertium/T_CarefulBarrier/grammar.txt,
- test/Apertium/T_CarefulBarrier/run.pl,
- test/Apertium/T_CharsInWhiteSpace/grammar.cg3,
- test/Apertium/T_CharsInWhiteSpace/grammar.txt,
- test/Apertium/T_CharsInWhiteSpace/run.pl,
- test/Apertium/T_CompositeSelect/grammar.cg3,
- test/Apertium/T_CompositeSelect/grammar.txt,
- test/Apertium/T_CompositeSelect/run.pl,
- test/Apertium/T_DontMatchEmptySet/grammar.cg3,
- test/Apertium/T_DontMatchEmptySet/grammar.txt,
- test/Apertium/T_DontMatchEmptySet/run.pl,
- test/Apertium/T_EndlessSelect/grammar.cg3,
- test/Apertium/T_EndlessSelect/grammar.txt,
- test/Apertium/T_EndlessSelect/run.pl,
- test/Apertium/T_Joiner/grammar.cg3,
- test/Apertium/T_Joiner/grammar.txt,
- test/Apertium/T_Joiner/run.pl,
- test/Apertium/T_MapAdd_Different/grammar.cg3,
- test/Apertium/T_MapAdd_Different/grammar.txt,
- test/Apertium/T_MapAdd_Different/run.pl,
- test/Apertium/T_MatchBaseform/grammar.cg3,
- test/Apertium/T_MatchBaseform/grammar.txt,
- test/Apertium/T_MatchBaseform/run.pl,
- test/Apertium/T_MatchWordform/grammar.cg3,
- test/Apertium/T_MatchWordform/grammar.txt,
- test/Apertium/T_MatchWordform/run.pl,
- test/Apertium/T_MultiWords/grammar.cg3,
- test/Apertium/T_MultiWords/grammar.txt,
- test/Apertium/T_MultiWords/run.pl,
- test/Apertium/T_MultipleSections/grammar.cg3,
- test/Apertium/T_MultipleSections/grammar.txt,
- test/Apertium/T_MultipleSections/run.pl,
- test/Apertium/T_MultiwordTagStaying/grammar.cg3,
- test/Apertium/T_MultiwordTagStaying/grammar.txt,
- test/Apertium/T_MultiwordTagStaying/run.pl,
- test/Apertium/T_NegatedContextTest/grammar.cg3,
- test/Apertium/T_NegatedContextTest/grammar.txt,
- test/Apertium/T_NegatedContextTest/run.pl,
- test/Apertium/T_RegExp_Map/grammar.cg3,
- test/Apertium/T_RegExp_Map/grammar.txt,
- test/Apertium/T_RegExp_Map/run.pl,
- test/Apertium/T_RegExp_Select/grammar.cg3,
- test/Apertium/T_RegExp_Select/grammar.txt,
- test/Apertium/T_RegExp_Select/run.pl,
- test/Apertium/T_RegExp_Substitute/grammar.cg3,
- test/Apertium/T_RegExp_Substitute/grammar.txt,
- test/Apertium/T_RegExp_Substitute/run.pl,
- test/Apertium/T_RemoveSingleTag/grammar.cg3,
- test/Apertium/T_RemoveSingleTag/grammar.txt,
- test/Apertium/T_RemoveSingleTag/run.pl,
- test/Apertium/T_ScanningTests/grammar.cg3,
- test/Apertium/T_ScanningTests/grammar.txt,
- test/Apertium/T_ScanningTests/run.pl,
- test/Apertium/T_Sections/grammar.cg3,
- test/Apertium/T_Sections/grammar.txt,
- test/Apertium/T_Sections/run.pl,
- test/Apertium/T_SetOp_FailFast/grammar.cg3,
- test/Apertium/T_SetOp_FailFast/grammar.txt,
- test/Apertium/T_SetOp_FailFast/run.pl,
- test/Apertium/T_SetOp_OR/grammar.cg3,
- test/Apertium/T_SetOp_OR/grammar.txt,
- test/Apertium/T_SetOp_OR/run.pl,
- test/Apertium/T_SpaceInWord/grammar.cg3,
- test/Apertium/T_SpaceInWord/grammar.txt,
- test/Apertium/T_SpaceInWord/run.pl,
- test/Apertium/T_SuperBlanks/grammar.cg3,
- test/Apertium/T_SuperBlanks/grammar.txt,
- test/Apertium/T_SuperBlanks/run.pl,
- test/Apertium/T_SuperBlanksNewline/grammar.cg3,
- test/Apertium/T_SuperBlanksNewline/grammar.txt,
- test/Apertium/T_SuperBlanksNewline/run.pl,
- test/Apertium/T_Unification/grammar.cg3,
- test/Apertium/T_Unification/grammar.txt,
- test/Apertium/T_Unification/run.pl,
- test/Apertium/T_UnknownWord/grammar.cg3,
- test/Apertium/T_UnknownWord/grammar.txt,
- test/Apertium/T_UnknownWord/run.pl,
- test/T_AnyMinusSome/grammar.cg3, test/T_AnyMinusSome/grammar.txt,
- test/T_AnyMinusSome/run.pl, test/T_Barrier/grammar.cg3,
- test/T_Barrier/grammar.txt, test/T_Barrier/run.pl,
- test/T_BasicAppend/grammar.cg3, test/T_BasicAppend/grammar.txt,
- test/T_BasicAppend/run.pl, test/T_BasicContextTest/grammar.cg3,
- test/T_BasicContextTest/grammar.txt,
- test/T_BasicContextTest/run.pl, test/T_BasicDelimit/grammar.cg3,
- test/T_BasicDelimit/grammar.txt, test/T_BasicDelimit/run.pl,
- test/T_BasicDependency/grammar.cg3,
- test/T_BasicDependency/grammar.txt,
- test/T_BasicDependency/run.pl, test/T_BasicIff/grammar.cg3,
- test/T_BasicIff/grammar.txt, test/T_BasicIff/run.pl,
- test/T_BasicRemove/grammar.cg3, test/T_BasicRemove/grammar.txt,
- test/T_BasicRemove/run.pl, test/T_BasicSelect/grammar.cg3,
- test/T_BasicSelect/grammar.txt, test/T_BasicSelect/run.pl,
- test/T_BasicSubstitute/grammar.cg3,
- test/T_BasicSubstitute/grammar.txt,
- test/T_BasicSubstitute/run.pl, test/T_CG2Compat/grammar.cg3,
- test/T_CG2Compat/grammar.txt, test/T_CG2Compat/run.pl,
- test/T_CarefulBarrier/grammar.cg3,
- test/T_CarefulBarrier/grammar.txt, test/T_CarefulBarrier/run.pl,
- test/T_CompositeSelect/grammar.cg3,
- test/T_CompositeSelect/grammar.txt,
- test/T_CompositeSelect/run.pl, test/T_DelayAndDelete/grammar.cg3,
- test/T_DelayAndDelete/grammar.txt, test/T_DelayAndDelete/run.pl,
- test/T_Dependency_Loops/grammar.cg3,
- test/T_Dependency_Loops/grammar.txt,
- test/T_Dependency_Loops/run.pl,
- test/T_Dependency_OutOfRange/grammar.cg3,
- test/T_Dependency_OutOfRange/grammar.txt,
- test/T_Dependency_OutOfRange/run.pl,
- test/T_DontMatchEmptySet/grammar.cg3,
- test/T_DontMatchEmptySet/grammar.txt,
- test/T_DontMatchEmptySet/run.pl,
- test/T_EndlessSelect/grammar.cg3,
- test/T_EndlessSelect/grammar.txt, test/T_EndlessSelect/run.pl,
- test/T_External/grammar.cg3, test/T_External/grammar.txt,
- test/T_External/run.pl, test/T_Include/grammar.cg3,
- test/T_Include/grammar.txt, test/T_Include/run.pl,
- test/T_InputCommands/grammar.cg3,
- test/T_InputCommands/grammar.txt, test/T_InputCommands/run.pl,
- test/T_InputMarkup/grammar.cg3, test/T_InputMarkup/grammar.txt,
- test/T_InputMarkup/run.pl, test/T_JumpExecute/grammar.cg3,
- test/T_JumpExecute/grammar.txt, test/T_JumpExecute/run.pl,
- test/T_MapAdd_Different/grammar.cg3,
- test/T_MapAdd_Different/grammar.txt,
- test/T_MapAdd_Different/run.pl, test/T_MapThenRemove/grammar.cg3,
- test/T_MapThenRemove/grammar.txt, test/T_MapThenRemove/run.pl,
- test/T_MapThenSelect/grammar.cg3,
- test/T_MapThenSelect/grammar.txt, test/T_MapThenSelect/run.pl,
- test/T_MappingPrefix/grammar.cg3,
- test/T_MappingPrefix/grammar.txt, test/T_MappingPrefix/run.pl,
- test/T_Movement/grammar.cg3, test/T_Movement/grammar.txt,
- test/T_Movement/run.pl, test/T_MultipleSections/grammar.cg3,
- test/T_MultipleSections/grammar.txt,
- test/T_MultipleSections/run.pl,
- test/T_NegatedContextTest/grammar.cg3,
- test/T_NegatedContextTest/grammar.txt,
- test/T_NegatedContextTest/run.pl,
- test/T_NotContextTest/grammar.cg3,
- test/T_NotContextTest/grammar.txt, test/T_NotContextTest/run.pl,
- test/T_NumericalTags/grammar.cg3,
- test/T_NumericalTags/grammar.txt, test/T_NumericalTags/run.pl,
- test/T_OmniWithBarrier/grammar.cg3,
- test/T_OmniWithBarrier/grammar.txt,
- test/T_OmniWithBarrier/run.pl, test/T_Omniscan/grammar.cg3,
- test/T_Omniscan/grammar.txt, test/T_Omniscan/run.pl,
- test/T_OriginPassing/grammar.cg3,
- test/T_OriginPassing/grammar.txt, test/T_OriginPassing/run.pl,
- test/T_Parentheses/grammar.cg3, test/T_Parentheses/grammar.txt,
- test/T_Parentheses/run.pl, test/T_RegExp/grammar.cg3,
- test/T_RegExp/grammar.txt, test/T_RegExp/run.pl,
- test/T_Relations/grammar.cg3, test/T_Relations/grammar.txt,
- test/T_Relations/run.pl, test/T_RemCohort/grammar.cg3,
- test/T_RemCohort/grammar.txt, test/T_RemCohort/run.pl,
- test/T_RemoveSingleTag/grammar.cg3,
- test/T_RemoveSingleTag/grammar.txt,
- test/T_RemoveSingleTag/run.pl, test/T_ScanningTests/grammar.cg3,
- test/T_ScanningTests/grammar.txt, test/T_ScanningTests/run.pl,
- test/T_SectionRanges/grammar.cg3,
- test/T_SectionRanges/grammar.txt, test/T_SectionRanges/run.pl,
- test/T_Sections/grammar.cg3, test/T_Sections/grammar.txt,
- test/T_Sections/run.pl, test/T_SetOp_FailFast/grammar.cg3,
- test/T_SetOp_FailFast/grammar.txt, test/T_SetOp_FailFast/run.pl,
- test/T_SetOps/grammar.cg3, test/T_SetOps/grammar.txt,
- test/T_SetOps/run.pl, test/T_SetParentChild/grammar.cg3,
- test/T_SetParentChild/grammar.txt, test/T_SetParentChild/run.pl,
- test/T_SoftDelimiters/grammar.cg3,
- test/T_SoftDelimiters/grammar.txt, test/T_SoftDelimiters/run.pl,
- test/T_SpaceInForms/grammar.cg3, test/T_SpaceInForms/grammar.txt,
- test/T_SpaceInForms/run.pl,
- test/T_SubReadings_Apertium/grammar.cg3,
- test/T_SubReadings_Apertium/grammar.txt,
- test/T_SubReadings_Apertium/run.pl,
- test/T_SubReadings_CG/grammar.cg3,
- test/T_SubReadings_CG/grammar.txt, test/T_SubReadings_CG/run.pl,
- test/T_SubstituteNil/grammar.cg3,
- test/T_SubstituteNil/grammar.txt, test/T_SubstituteNil/run.pl,
- test/T_Templates/grammar.cg3, test/T_Templates/grammar.txt,
- test/T_Templates/run.pl, test/T_Trace/grammar.cg3,
- test/T_Trace/grammar.txt, test/T_Trace/run.pl,
- test/T_Unification/grammar.cg3, test/T_Unification/grammar.txt,
- test/T_Unification/run.pl, test/T_Variables/grammar.cg3,
- test/T_Variables/grammar.txt, test/T_Variables/run.pl: Rename
- grammars to .cg3 extension; Fix a missing set parse error.
-
-2012-08-25 tino
-
- * [r8487] get-boost.sh: Fix get-boost
- * [r8485] src/TextualParser.cpp, src/TextualParser.h: Continue
- parsing up to 10 errors.
-
-2012-08-23 tino
-
- * [r8482] test/T_MappingPrefix/expected.txt,
- test/T_Unification/expected.txt: Also merge trace lines.
- * [r8480] scripts/cg3-autobin.pl, src/GrammarApplicator.h,
- src/GrammarApplicator_reflow.cpp, src/version.h: Also merge trace
- lines.
-
-2012-08-22 tino
-
- * [r8467] get-boost.sh, src/ApertiumApplicator.cpp: Boost 1.51.0
-
-2012-08-13 unhammer
-
- * [r8450] src/ApertiumApplicator.cpp: When outputting
- wordforms/surface forms, escape apertium stream format reserved
- characters. As -r8429.
-
-2012-08-02 unhammer
-
- * [r8429] src/ApertiumApplicator.cpp: When outputting
- baseforms/lemmas, escape apertium stream format reserved
- characters. Big if, but seems to have no impact on speed.
- * [r8428] emacs/cg.el: highlight iff, remcohort, addcohort
-
-2012-07-29 tino
-
- * [r8407] newsletters/2012-07-02.txt, newsletters/2012-07-29.txt:
- cg3 news
- * [r8406] scripts/cg3-autobin.pl, src/TextualParser.cpp,
- src/version.h: Actualize the new behavior of ALL/NONE NOT C
-
-2012-07-25 tino
-
- * [r8400] src/GrammarApplicator_reflow.cpp: cg3
- * [r8399] dist/dist-osx.pl, scripts/cg3-autobin.pl,
- src/BinaryGrammar_read.cpp, src/BinaryGrammar_write.cpp,
- src/Cohort.h, src/GrammarApplicator_reflow.cpp, src/version.h:
- Relations are now pipable. Bumped minor version to 8, since
- --dep-delimit and relation piping are major steps towards 1.0.
-
-2012-07-24 tino
-
- * [r8398] src/GrammarApplicator.cpp: cg3
- * [r8397] TODO, manual/relations.xml, scripts/cg3-autobin.pl,
- src/Cohort.h, src/Grammar.cpp, src/Grammar.h,
- src/GrammarApplicator.cpp, src/GrammarApplicator.h,
- src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runRules.cpp, src/SingleWindow.cpp,
- src/Tag.cpp, src/Tag.h, src/TextualParser.cpp, src/Window.h,
- src/main.cpp, src/version.h: WIP: Allow piping relations to later
- CG-3 processes. Still needs some polishing, but basically works.
-
-2012-07-21 tino
-
- * [r8395] manual/cmdreference.xml, manual/dependencies.xml: cg3
- * [r8394] scripts/cg3-autobin.pl, src/GrammarApplicator.cpp,
- src/GrammarApplicator.h, src/GrammarApplicator_runGrammar.cpp,
- src/SingleWindow.cpp, src/main.cpp, src/options.h, src/version.h:
- Made --dep-delimit take an opt arg
-
-2012-07-19 tino
-
- * [r8393] src/Window.cpp: cg3
- * [r8392] scripts/cg3-autobin.pl, src/CohortIterator.cpp,
- src/CohortIterator.h, src/GrammarApplicator.cpp,
- src/GrammarApplicator.h, src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/SingleWindow.cpp,
- src/Window.cpp, src/main.cpp, src/options.h, src/version.h,
- test/T_BasicDependency/run.pl: Added -D, --dep-delimit
-
-2012-07-18 tino
-
- * [r8391] get-boost.sh: Boost 1.50.0
-
-2012-07-02 tino
-
- * [r8382] newsletters/2012-07-02.txt: cg3
- * [r8381] CMakeLists.txt, scripts/cg3-autobin.pl,
- src/ContextualTest.h, src/stdafx.h, src/version.h: Older compiler
- fixes
-
-2012-06-29 tino
-
- * [r8373] scripts/cg3-autobin.pl, src/TextualParser.cpp,
- src/version.h: Actualize the deprecations
- * [r8372] src/ContextualTest.h,
- src/GrammarApplicator_runContextualTest.cpp,
- src/TextualParser.cpp: Don't warn for complex positions.
- * [r8371] src/CohortIterator.cpp, src/CohortIterator.h,
- src/GrammarApplicator.h,
- src/GrammarApplicator_runContextualTest.cpp,
- src/sorted_vector.hpp: Right/Leftmost now scans
- * [r8370] src/GrammarApplicator_runContextualTest.cpp: Check Self
- when I say so...
- * [r8369] manual/dependencies.xml,
- src/GrammarApplicator_runRules.cpp: Descendents, not Ancestors.
- Also allow relations rules to be asymmetric.
-
-2012-06-22 tino
-
- * [r8368] CMakeLists.txt, manual/contexts.xml,
- manual/dependencies.xml, manual/relations.xml,
- scripts/cg3-autobin.pl, src/BinaryGrammar_read.cpp,
- src/BinaryGrammar_write.cpp, src/Cohort.h,
- src/CohortIterator.cpp, src/CohortIterator.h,
- src/ContextualTest.cpp, src/ContextualTest.h,
- src/GrammarApplicator.cpp, src/GrammarApplicator.h,
- src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_runContextualTest.cpp,
- src/MatxinApplicator.cpp, src/TextualParser.cpp, src/inlines.h,
- src/sorted_vector.hpp, src/stdafx.h, src/version.h,
- test/T_BasicDependency/expected.txt,
- test/T_BasicDependency/grammar.txt,
- test/T_BasicDependency/input.txt: Added context cc for ancestors,
- and (l,r,ll,rr,lll,rrr,llr,rrl) for left/right limits. Doesn't
- work for parents yet.
-
-2012-06-11 tino
-
- * [r8357] scripts/cg3-autobin.pl, src/Grammar.cpp, src/Grammar.h,
- src/GrammarWriter.cpp, src/version.h: Fix set name hash collision
-
-2012-06-09 tino
-
- * [r8354] scripts/cg3-autobin.pl, src/version.h: Fix RemCohort
- segfault
- * [r8353] src/Cohort.cpp: cg3
- * [r8352] src/GrammarApplicator_runRules.cpp: cg3
- * [r8351] scripts/cg3-autobin.pl, src/Cohort.cpp,
- src/GrammarApplicator_runRules.cpp, src/version.h: cg3
-
-2012-05-21 tino
-
- * [r8308] src/Grammar.cpp, src/GrammarApplicator.cpp: cg3
- * [r8307] src/Grammar.cpp, src/Tag.cpp: cg3
- * [r8306] scripts/cg3-autobin.pl, src/Grammar.cpp, src/version.h:
- cg3
- * [r8305] manual/tags.xml, scripts/cg3-autobin.pl, src/Grammar.cpp,
- src/Grammar.h, src/GrammarApplicator.cpp,
- src/GrammarApplicator.h, src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_reflow.cpp, src/Tag.cpp, src/Tag.h,
- src/version.h, test/T_RegExp/expected.txt,
- test/T_RegExp/grammar.txt: Added regular expressions in the usual
- // notation, though requiring suffix r or i or ri.
- * [r8304] scripts/cg3-autobin.pl, src/CohortIterator.cpp,
- src/version.h: Fixed parent iterator's reset to clear seen before
- incrementing.
-
-2012-05-14 tino
-
- * [r8288] manual/installation.xml, scripts/cg3-autobin.pl,
- src/GrammarApplicator_runRules.cpp, src/version.h: Made
- getTagList() eliminate consecutive duplicates to fix some
- unification issues.
-
-2012-04-16 unhammer
-
- * [r8225] src/ApertiumApplicator.cpp: keep first seen
- blank/superblank in lSWindow->text, useful for cg-conv
- * [r8224] src/GrammarApplicator.cpp: ensure newlines after
- window->text
- * [r8223] emacs/cg.el: allow spaces in "<word>"
- * [r8222] src/ApertiumApplicator.cpp: handle escaped chars in
- tags/lemmas
-
-2012-04-12 tino
-
- * [r8216] scripts/cg3-autobin.pl,
- src/GrammarApplicator_runContextualTest.cpp, src/version.h: Fixed
- NONE p Tag to be true if there is a parent but the parent does
- not match Tag.
-
-2012-03-22 tino
-
- * [r8190] src/cg3.h, src/cg_conv.cpp, src/icu_uoptions.h: ICU 49.1
- redefinition warning fix
-
-2012-03-08 tino
-
- * [r8114] manual: Unlinked externals
-
-2012-03-07 ftyers
-
- * [r8105] src/cg_proc.cpp: document null flush option
-
-2012-03-04 tino
-
- * [r8096] src/ApertiumApplicator.cpp, src/ApertiumApplicator.h,
- src/BinaryGrammar.cpp, src/BinaryGrammar.h,
- src/BinaryGrammar_read.cpp, src/BinaryGrammar_write.cpp,
- src/Cohort.cpp, src/Cohort.h, src/CohortIterator.cpp,
- src/CohortIterator.h, src/CompositeTag.cpp, src/CompositeTag.h,
- src/ContextualTest.cpp, src/ContextualTest.h,
- src/FormatConverter.cpp, src/FormatConverter.h, src/Grammar.cpp,
- src/Grammar.h, src/GrammarApplicator.cpp,
- src/GrammarApplicator.h, src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runContextualTest.cpp,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
- src/GrammarWriter.h, src/IGrammarParser.h,
- src/MatxinApplicator.cpp, src/MatxinApplicator.h,
- src/Reading.cpp, src/Reading.h, src/Rule.cpp, src/Rule.h,
- src/Set.cpp, src/Set.h, src/SingleWindow.cpp, src/SingleWindow.h,
- src/Strings.cpp, src/Strings.h, src/Tag.cpp, src/Tag.h,
- src/TextualParser.cpp, src/TextualParser.h, src/Window.cpp,
- src/Window.h, src/bloomish.hpp, src/cg3.h, src/cg_comp.cpp,
- src/cg_conv.cpp, src/cg_proc.cpp, src/inlines.h,
- src/interval_vector.hpp, src/libcg3.cpp, src/macros.h,
- src/main.cpp, src/options.h, src/sorted_vector.hpp, src/stdafx.h,
- src/test_libcg3.c, src/uextras.cpp, src/uextras.h, src/version.h:
- Now in 2012
- * [r8095] src/libcg3.cpp: cg3
-
-2012-02-25 tino
-
- * [r8080] get-boost.sh, src/CMakeLists.txt: Boost 1.49.0
-
-2012-02-17 tino
-
- * [r8001] compile-profile.sh, scripts/profile-revisions.php,
- src/all_vislcg3.cpp, vapply.sh, vparse.sh: cg3
-
-2012-02-09 tino
-
- * [r7992] src/GrammarApplicator_runRules.cpp: Endless rule loop
- combo breaker
- * [r7991] src/GrammarApplicator_runRules.cpp: Debug info; Delimit
- fix maybe
- * [r7990] manual/faq.xml, scripts/cg3-autobin.pl: Autobin args and
- FAQ entry
- * [r7989] scripts/cg3-autobin.pl,
- src/GrammarApplicator_runRules.cpp, src/version.h: Made AddCohort
- add magic reading if none were listed.
-
-2012-01-31 ftyers
-
- * [r7933] src/ApertiumApplicator.cpp: the sound of code
- vanishing...
- * [r7932] src/ApertiumApplicator.cpp: update so we don't get
- spurious '/'
-
-2012-01-29 ftyers
-
- * [r7917] src/ApertiumApplicator.cpp, src/ApertiumApplicator.h,
- src/cg_proc.cpp: -1 option to only print out first analysis
-
-2012-01-18 tino
-
- * [r7873] scripts/cg3-autobin.pl, src/GrammarApplicator.cpp,
- src/GrammarApplicator.h, src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runRules.cpp, src/SingleWindow.cpp,
- src/SingleWindow.h, src/version.h: CG3 fix Delimit eating
- enclosures
-
-2012-01-06 tino
-
- * [r7824] newsletters/2012-01-06.txt: cg3 news
- * [r7823] manual/drafts.xml, manual/rules.xml,
- scripts/cg3-autobin.pl, src/version.h: CG-3 Release 0.9.7.7823
-
-2012-01-05 tino
-
- * [r7821] manual/bibliography.xml, manual/binarygrammar.xml,
- manual/cgglossary.xml, manual/cgkeywords.xml,
- manual/cmdreference.xml, manual/compatibility.xml,
- manual/contexts.xml, manual/contributing.xml,
- manual/dependencies.xml, manual/drafts.xml, manual/externals.xml,
- manual/faq.xml, manual/grammar.xml, manual/installation.xml,
- manual/intro.xml, manual/license.xml, manual/manual.xml,
- manual/parentheses.xml, manual/probabilistic.xml,
- manual/relations.xml, manual/rules.xml, manual/sets.xml,
- manual/streamcmds.xml, manual/streamformats.xml,
- manual/subreadings.xml, manual/tags.xml, manual/templates.xml,
- test/T_SubReadings_CG/input.txt: CG-3 docs
-
-2011-12-22 tino
-
- * [r7814] scripts/cg3-autobin.pl, src/GrammarApplicator.cpp,
- src/GrammarApplicator.h, src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/version.h,
- test/T_SubReadings, test/T_SubReadings_Apertium,
- test/T_SubReadings_CG, test/T_SubReadings_CG/expected.txt,
- test/T_SubReadings_CG/grammar.txt,
- test/T_SubReadings_CG/input.txt, test/T_SubReadings_CG/run.pl:
- Added sub-reading support to vislcg3 and cg-conv.
-
-2011-11-17 tino
-
- * [r7697] get-boost.sh: Boost 1.48.0
-
-2011-11-07 tino
-
- * [r7655] src/TextualParser.cpp: Add warning for unnamed named
- rules
-
-2011-11-05 tino
-
- * [r7650] src/Grammar.cpp, test/T_JumpExecute/grammar.txt:
- Implemented JUMP; still need Execute and docs
- * [r7649] test/T_JumpExecute/expected.txt: Implemented JUMP; still
- need Execute and docs
- * [r7648] src/CMakeLists.txt: Implemented JUMP; still need Execute
- and docs
- * [r7647] scripts/cg3-autobin.pl, src/Anchor.cpp, src/Anchor.h,
- src/BinaryGrammar_read.cpp, src/BinaryGrammar_write.cpp,
- src/Cohort.cpp, src/Grammar.cpp, src/Grammar.h,
- src/GrammarApplicator_runRules.cpp, src/TextualParser.cpp,
- src/TextualParser.h, src/interval_vector.hpp, src/version.h,
- test/T_JumpExecute, test/T_JumpExecute/grammar.txt,
- test/T_JumpExecute/input.txt, test/runall.pl: Implemented JUMP;
- still need Execute and docs
-
-2011-11-04 tino
-
- * [r7641] compile-profile.sh, newsletters/2008-09-11.txt,
- profile-apply.sh, profile-parse.sh, scripts/cg3-autobin.pl,
- test/T_AnyMinusSome/run.pl, test/T_Barrier/run.pl,
- test/T_BasicAppend/run.pl, test/T_BasicContextTest/run.pl,
- test/T_BasicDelimit/run.pl, test/T_BasicDependency/run.pl,
- test/T_BasicIff/run.pl, test/T_BasicRemove/run.pl,
- test/T_BasicSelect/run.pl, test/T_BasicSubstitute/run.pl,
- test/T_CG2Compat/run.pl, test/T_CarefulBarrier/run.pl,
- test/T_CompositeSelect/run.pl, test/T_DelayAndDelete/run.pl,
- test/T_Dependency_Loops/run.pl,
- test/T_Dependency_OutOfRange/run.pl,
- test/T_DontMatchEmptySet/run.pl, test/T_EndlessSelect/run.pl,
- test/T_External/run.pl, test/T_Include/run.pl,
- test/T_InputCommands/run.pl, test/T_InputMarkup/run.pl,
- test/T_MapAdd_Different/run.pl, test/T_MapThenRemove/run.pl,
- test/T_MapThenSelect/run.pl, test/T_MappingPrefix/run.pl,
- test/T_Movement/run.pl, test/T_MultipleSections/run.pl,
- test/T_NegatedContextTest/run.pl, test/T_NotContextTest/run.pl,
- test/T_NumericalTags/run.pl, test/T_OmniWithBarrier/run.pl,
- test/T_Omniscan/run.pl, test/T_OriginPassing/run.pl,
- test/T_Parentheses/run.pl, test/T_RegExp/run.pl,
- test/T_Relations/run.pl, test/T_RemCohort/run.pl,
- test/T_RemoveSingleTag/run.pl, test/T_ScanningTests/run.pl,
- test/T_SectionRanges/run.pl, test/T_Sections/run.pl,
- test/T_SetOp_FailFast/run.pl, test/T_SetOps/run.pl,
- test/T_SetParentChild/run.pl, test/T_SoftDelimiters/run.pl,
- test/T_SpaceInForms/run.pl, test/T_SubstituteNil/run.pl,
- test/T_Templates/run.pl, test/T_Trace/run.pl,
- test/T_Unification/run.pl, test/T_Variables/run.pl,
- test/clean.sh, test/runall.pl, vapply.sh, vparse.sh: Changed
- .bin3 to .cg3b everywhere.
-
-2011-11-01 tino
-
- * [r7630] CMakeLists.txt: cg3
-
diff --git a/TODO b/TODO
index 9f84daf..13981de 100644
--- a/TODO
+++ b/TODO
@@ -1,7 +1,6 @@
ToDo: Rules can be dynamically reordered in "optimize correct" mode by moving the offending rule to the end of the section
ToDo: Refactor statistics to be gathered in their own objects rather than inline with Grammar, Rules, ContextualTest, Set
ToDo: Enable a daemonized version. Easily doable via Perl's Net::Server::PreFork.
-ToDo: Make old cohort -> rules order available via switch
ToDo: Make use of Preferred-Targets
ToDo: Move DoesTagMatchSet to Grammar
ToDo: Line number on all applicable warnings
@@ -24,7 +23,6 @@ ToDo: Use Cohort iterator instead of subfunctions
ToDo: Make things less magic (explain what magic means in the various contexts)
ToDo: Handle duplicate different sets and delimiters from INCLUDE files. Possibly make new keyword or option for INCLUDE. Handle line numbering how?
ToDo: Add an insert position for MAP/ADD
-ToDo: Stabilize the binary grammar format or get to a point where old versions are easy to load
ToDo: Don't output a warning if it is identical to last output warning.
ToDo: Allow rules to be run on contextual test targets, if all tests are true.
ToDo: Rules don't currently require ; termination...should they?
@@ -56,3 +54,9 @@ ToDo: \u and \U escapes in textual tags; possibly only varstrings or "tags"
ToDo: cg-conv should just convert cohorts directly - no need to build whole sentences.
ToDo: When going from section 1 to section 1+2 the first time, just skip right to the section 2 rules
ToDo: Rule type MATH, ARITH or similar to manipulate numeric tag values.
+ToDo: Explain that SAFE vs. UNSAFE is about preserving the most information - SAFE is always least destructive
+ToDo: Document static tags / static reading
+ToDo: Section number in trace
+ToDo: Basque correct parse
+ToDo: Dep on readings
+ToDo: CLINK to require that all paths satisfy the linked tests
diff --git a/cg3.g b/cg3.g
index ec2ccf3..7a02168 100644
--- a/cg3.g
+++ b/cg3.g
@@ -31,7 +31,7 @@ TEMPLATE name = [x,y,z] ;
cg : stat+ 'END'? ;
-stat
+stat
: delimiters
| soft_delimiters
| preferred_targets
@@ -50,7 +50,6 @@ stat
| after_sections
| null_section
-// Fixme: It is not valid for any rules to appear before some section type has been seen
| rule
| rule_substitute_etc
| rule_map_etc
@@ -166,7 +165,7 @@ rule_external
template
: 'TEMPLATE' ntag '=' contexttest ';'
;
-
+
contexttest
// Fixme: Not perfect. Parses, but goes in the wrong category...
// Fixme: It really should be the contextpos that's optional, and that it is only optional if inlineset starts with T:, but dunno if that should be expressed in BNF
@@ -176,7 +175,7 @@ contexttest
| '[' inlineset (',' inlineset)* ']' ('LINK' contexttest)?
| '(' contexttest ')' ('OR' '(' contexttest ')')*
;
-
+
inlineset
: inlineset_single (set_op inlineset_single)*
;
diff --git a/emacs/cg.el b/emacs/cg.el
index 1f04256..54435a7 100644
--- a/emacs/cg.el
+++ b/emacs/cg.el
@@ -3,7 +3,7 @@
;; Copyright (C) 2010-2013 Kevin Brubeck Unhammer
;; Author: Kevin Brubeck Unhammer <unhammer at fsfe.org>
-;; Version: 0.1.6
+;; Version: 0.1.7
;; Url: http://beta.visl.sdu.dk/constraint_grammar.html
;; Keywords: languages
@@ -32,10 +32,14 @@
;; ; Or if you use a non-standard file suffix, e.g. .rlx:
;; (add-to-list 'auto-mode-alist '("\\.rlx\\'" . cg-mode))
-;; I recommend using autocomplete-mode for tab-completion, and
+;; I recommend using auto-complete-mode for tab-completion, and
;; smartparens-mode if you're used to it (paredit-mode does not work
;; well if you have set names with the # character in them). Both are
-;; available from MELPA (see http://melpa.milkbox.net/).
+;; available from MELPA (see http://melpa.milkbox.net/). You can
+;; lazy-load auto-complete for cg-mode like this:
+;;
+;; (eval-after-load 'auto-complete '(add-to-list 'ac-modes 'cg-mode))
+
;; TODO:
;; - optionally highlight any LIST/SET without ; at the end
@@ -44,6 +48,7 @@
;; - run vislcg3 --show-unused-sets and buttonise with line numbers (like Occur does)
;; - indentation function (based on prolog again?)
;; - the rest of the keywords
+;; - http://beta.visl.sdu.dk/cg3/single/#regex-icase
;; - keyword tab-completion
;; - the quotes-within-quotes thing plays merry hell with
;; paredit-doublequote, write a new doublequote function?
@@ -56,7 +61,7 @@
;;; Code:
-(defconst cg-version "0.1.6" "Version of cg-mode")
+(defconst cg-version "0.1.7" "Version of cg-mode")
(eval-when-compile (require 'cl))
@@ -78,7 +83,7 @@
"The vislcg3 command, e.g. \"/usr/local/bin/vislcg3\".
Buffer-local, so use `setq-default' if you want to change the
-global default value.
+global default value.
See also `cg-extra-args' and `cg-pre-pipe'."
:type 'string)
@@ -89,7 +94,7 @@ See also `cg-extra-args' and `cg-pre-pipe'."
"Extra arguments sent to vislcg3 when running `cg-check'.
Buffer-local, so use `setq-default' if you want to change the
-global default value.
+global default value.
See also `cg-command'."
:type 'string)
@@ -99,7 +104,7 @@ See also `cg-command'."
;;;###autoload
(defcustom cg-pre-pipe "cg-conv"
"Pipeline to run before the vislcg3 command when testing a file
-with `cg-check'.
+with `cg-check'.
Buffer-local, so use `setq-default' if you want to change the
global default value. If you want to set it on a per-file basis,
@@ -116,7 +121,7 @@ See also `cg-command' and `cg-post-pipe'."
;;;###autoload
(defcustom cg-post-pipe ""
"Pipeline to run after the vislcg3 command when testing a file
-with `cg-check'.
+with `cg-check'.
Buffer-local, so use `setq-default' if you want to change the
global default value. If you want to set it on a per-file basis,
@@ -165,7 +170,7 @@ re-evaluating `cg-kw-re' (or all of cg.el)." )
"IMMEDIATE"
"LOOKDELETED"
"LOOKDELAYED"
- "UNSAFE" ;
+ "UNSAFE" ;
"SAFE"
"REMEMBERX"
"RESETX"
@@ -311,7 +316,6 @@ CG-mode provides the following specific keyboard key bindings:
(set (make-local-variable 'beginning-of-defun-function) #'cg-beginning-of-defun)
(set (make-local-variable 'end-of-defun-function) #'cg-end-of-defun)
(setq indent-line-function #'cg-indent-line)
- (easy-mmode-pretty-mode-name 'cg-mode " cg")
(when font-lock-mode
(setq font-lock-set-defaults nil)
(font-lock-set-defaults)
@@ -319,14 +323,14 @@ CG-mode provides the following specific keyboard key bindings:
(add-hook 'after-change-functions #'cg-after-change nil 'buffer-local)
(let ((buf (current-buffer)))
(run-with-idle-timer 1 'repeat 'cg-output-hl buf))
- (run-mode-hooks #'cg-mode-hook))
+ (run-mode-hooks 'cg-mode-hook))
(defconst cg-font-lock-syntactic-keywords
;; We can have ("words"with"quotes"inside"")! Quote rule: is it a ",
;; if yes then jump to next unescaped ". Then regardless, jump to
;; next whitespace, but don't cross an unescaped )
- '(("\\(\"\\)[^\"\n]*\\(?:\"\\(?:\\\\)\\|[^) \n\t]\\)*\\)?\\(\"\\)\\(r\\(i\\)?\\)?[); \n\t]"
+ '(("\\(\"\\)[^\"\n]*\\(?:\"\\(?:\\\\)\\|[^) \n\t]\\)*\\)?\\(\"\\)[irv]\\{0,3\\}[); \n\t]"
(1 "\"")
(2 "\""))
;; A `#' begins a comment when it is unquoted and at the beginning
@@ -351,7 +355,7 @@ seems this function only runs on comments and strings..."
(if
(save-excursion
(goto-char (nth 8 state))
- (re-search-forward "\"[^\"\n]*\\(\"\\(\\\\)\\|[^) \n\t]\\)*\\)?\"\\(r\\(i\\)?\\)?[); \n\t]")
+ (re-search-forward "\"[^\"\n]*\\(\"\\(\\\\)\\|[^) \n\t]\\)*\\)?\"[irv]\\{0,3\\}[); \n\t]")
(and (match-string 1)
(not (equal ?\\ (char-before (match-beginning 1))))
;; TODO: make next-error hit these too
@@ -621,7 +625,7 @@ from, otherwise all CG buffers share one input buffer."
"Runs `cg-output-setup-hook' for `cg-check'. That hook is
useful for doing things like
(setenv \"PATH\" (concat \"~/local/stuff\" (getenv \"PATH\")))"
- (run-hooks #'cg-output-setup-hook))
+ (run-hooks 'cg-output-setup-hook))
(defvar cg-output-comment-face font-lock-comment-face ;compilation-info-face
"Face name to use for comments in cg-output.")
@@ -635,7 +639,7 @@ useful for doing things like
(defvar cg-output-mapping-face 'bold
"Face name to use for mapping tags in cg-output")
-(defvar cg-output-mode-font-lock-keywords
+(defvar cg-output-mode-font-lock-keywords
'(("^;\\(?:[^:]* \\)"
;; hack alert! a colon in a tag will mess this up
;; (hardly matters much though)
@@ -738,7 +742,7 @@ from hiding. Call `cg-output-show-all' to turn off all hiding."
(lexical-let (last)
(save-excursion
(goto-char (point-min))
- (while (re-search-forward "^\"<.*>\"$" nil 'noerror)
+ (while (re-search-forward "^\"<.*>\"" nil 'noerror)
(let ((line-beg (match-beginning 0))
(line-end (match-end 0)))
(cg-output-hide-region line-beg (+ line-beg 2)) ; "<
@@ -751,7 +755,7 @@ from hiding. Call `cg-output-show-all' to turn off all hiding."
(goto-char last)
(when (re-search-forward "^[^\t\"]" nil 'noerror)
(cg-output-hide-region last (match-beginning 0)))))
-
+
(when cg-output-unhide-regex
(cg-output-unhide-some cg-output-unhide-regex)))
@@ -778,7 +782,7 @@ and reused whenever `cg-output-hide-analyses' is called."
(setq cg--output-unhide-history (cons needle cg--output-unhide-history)))
(cg-output-hide-analyses))
-;;; TODO:
+;;; TODO:
(defun cg-output-toggle-analyses ()
"Hide or show analyses from output. See
`cg-output-hide-analyses'."
@@ -943,7 +947,7 @@ Similarly, `cg-post-pipe' is run on output."
(let ((cg-buffer (find-buffer-visiting cg--file)))
(bury-buffer)
(let ((cg-window (get-buffer-window cg-buffer)))
-
+
(if cg-window
(select-window cg-window)
(pop-to-buffer cg-buffer)))))
@@ -992,7 +996,7 @@ Similarly, `cg-post-pipe' is run on output."
;; Tino Didriksen recommends this file suffix.
;;; Run hooks -----------------------------------------------------------------
-(run-hooks #'cg-load-hook)
+(run-hooks 'cg-load-hook)
(provide 'cg)
diff --git a/get-boost.sh b/get-boost.sh
index 4328842..c928d56 100755
--- a/get-boost.sh
+++ b/get-boost.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-export BOOSTVER=56
+export BOOSTVER=57
export BDOT="1.$BOOSTVER.0"
export BUC="boost_1_${BOOSTVER}_0"
diff --git a/include/exec-stream/exec-stream.cpp b/include/exec-stream/exec-stream.cpp
deleted file mode 100644
index 36ec4ca..0000000
--- a/include/exec-stream/exec-stream.cpp
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
-Copyright (C) 2004 Artem Khodush
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include "exec-stream.h"
-
-#include <list>
-#include <vector>
-#include <algorithm>
-#include <exception>
-
-#ifdef _WIN32
-
-#define NOMINMAX
-#include <windows.h>
-
-#define HELPERS_H "win/exec-stream-helpers.h"
-#define HELPERS_CPP "win/exec-stream-helpers.cpp"
-#define IMPL_CPP "win/exec-stream-impl.cpp"
-
-#else
-
-#include <errno.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <pthread.h>
-
-#define HELPERS_H "posix/exec-stream-helpers.h"
-#define HELPERS_CPP "posix/exec-stream-helpers.cpp"
-#define IMPL_CPP "posix/exec-stream-impl.cpp"
-
-#endif
-
-namespace exec_stream_ns {
-}
-using namespace exec_stream_ns;
-
-// helper classes
-namespace exec_stream_ns {
-
-class buffer_list_t {
-public:
- struct buffer_t {
- std::size_t size;
- char * data;
- };
-
- buffer_list_t();
- ~buffer_list_t();
-
- void get( char * dst, std::size_t & size );
- void get_translate_crlf( char * dst, std::size_t & size );
- void put( char * const src, std::size_t size );
- void put_translate_crlf( char * const src, std::size_t size );
- buffer_t detach();
-
- bool empty();
- bool full( std::size_t limit ); // limit==0 -> no limit
-
- void clear();
-
-private:
- typedef std::list< buffer_t > buffers_t;
- buffers_t m_buffers;
- std::size_t m_read_offset; // offset into the first buffer
- std::size_t m_total_size;
-};
-
-buffer_list_t::buffer_list_t()
-{
- m_total_size=0;
- m_read_offset=0;
-}
-
-buffer_list_t::~buffer_list_t()
-{
- clear();
-}
-
-void buffer_list_t::get( char * dst, std::size_t & size )
-{
- std::size_t written_size=0;
- while( size>0 && m_total_size>0 ) {
- std::size_t portion_size=std::min( size, m_buffers.front().size-m_read_offset );
- std::char_traits< char >::copy( dst, m_buffers.front().data+m_read_offset, portion_size );
- dst+=portion_size;
- size-=portion_size;
- m_total_size-=portion_size;
- m_read_offset+=portion_size;
- written_size+=portion_size;
- if( m_read_offset==m_buffers.front().size ) {
- delete[] m_buffers.front().data;
- m_buffers.pop_front();
- m_read_offset=0;
- }
- }
- size=written_size;
-}
-
-void buffer_list_t::get_translate_crlf( char * dst, std::size_t & size )
-{
- std::size_t written_size=0;
- while( written_size!=size && m_total_size>0 ) {
- while( written_size!=size && m_read_offset!=m_buffers.front().size ) {
- char c=m_buffers.front().data[m_read_offset];
- if( c!='\r' ) { // MISFEATURE: single \r in the buffer will cause end of file
- *dst++=c;
- ++written_size;
- }
- --m_total_size;
- ++m_read_offset;
- }
- if( m_read_offset==m_buffers.front().size ) {
- delete[] m_buffers.front().data;
- m_buffers.pop_front();
- m_read_offset=0;
- }
- }
- size=written_size;
-}
-
-void buffer_list_t::put( char * const src, std::size_t size )
-{
- buffer_t buffer;
- buffer.data=new char[size];
- buffer.size=size;
- std::char_traits< char >::copy( buffer.data, src, size );
- m_buffers.push_back( buffer );
- m_total_size+=buffer.size;
-}
-
-void buffer_list_t::put_translate_crlf( char * const src, std::size_t size )
-{
- char const * p=src;
- std::size_t lf_count=0;
- while( p!=src+size ) {
- if( *p=='\n' ) {
- ++lf_count;
- }
- ++p;
- }
- buffer_t buffer;
- buffer.data=new char[size+lf_count];
- buffer.size=size+lf_count;
- p=src;
- char * dst=buffer.data;
- while( p!=src+size ) {
- if( *p=='\n' ) {
- *dst++='\r';
- }
- *dst++=*p;
- ++p;
- }
- m_buffers.push_back( buffer );
- m_total_size+=buffer.size;
-}
-
-buffer_list_t::buffer_t buffer_list_t::detach()
-{
- buffer_t buffer=m_buffers.front();
- m_buffers.pop_front();
- m_total_size-=buffer.size;
- return buffer;
-}
-
-bool buffer_list_t::empty()
-{
- return m_total_size==0;
-}
-
-bool buffer_list_t::full( std::size_t limit )
-{
- return limit!=0 && m_total_size>=limit;
-}
-
-void buffer_list_t::clear()
-{
- for( buffers_t::iterator i=m_buffers.begin(); i!=m_buffers.end(); ++i ) {
- delete[] i->data;
- }
- m_buffers.clear();
- m_read_offset=0;
- m_total_size=0;
-}
-
-}
-
-// platform-dependent helpers
-
-namespace exec_stream_ns {
-
-#include HELPERS_H
-#include HELPERS_CPP
-
-}
-
-// stream buffer class
-namespace exec_stream_ns {
-
-class exec_stream_buffer_t : public std::streambuf {
-public:
- exec_stream_buffer_t( exec_stream_t::stream_kind_t kind, thread_buffer_t & thread_buffer );
- virtual ~exec_stream_buffer_t();
-
- void clear();
-
-protected:
- virtual int_type underflow();
- virtual int_type overflow( int_type c );
- virtual int sync();
-
-private:
- bool send_buffer();
- bool send_char( char c );
-
- exec_stream_t::stream_kind_t m_kind;
- thread_buffer_t & m_thread_buffer;
- char * m_stream_buffer;
-};
-
-const std::size_t STREAM_BUFFER_SIZE=4096;
-
-exec_stream_buffer_t::exec_stream_buffer_t( exec_stream_t::stream_kind_t kind, thread_buffer_t & thread_buffer )
-: m_kind( kind ), m_thread_buffer( thread_buffer )
-{
- m_stream_buffer=new char[STREAM_BUFFER_SIZE];
- clear();
-}
-
-exec_stream_buffer_t::~exec_stream_buffer_t()
-{
- delete[] m_stream_buffer;
-}
-
-void exec_stream_buffer_t::clear()
-{
- if( m_kind==exec_stream_t::s_in ) {
- setp( m_stream_buffer, m_stream_buffer+STREAM_BUFFER_SIZE );
- }else {
- setg( m_stream_buffer, m_stream_buffer+STREAM_BUFFER_SIZE, m_stream_buffer+STREAM_BUFFER_SIZE );
- }
-}
-
-exec_stream_buffer_t::int_type exec_stream_buffer_t::underflow()
-{
- if( gptr()==egptr() ) {
- std::size_t read_size=STREAM_BUFFER_SIZE;
- bool no_more = false;
- m_thread_buffer.get( m_kind, m_stream_buffer, read_size, no_more );
- if( no_more || read_size==0 ) { // there is no way for underflow to return something other than eof when 0 bytes are read
- return traits_type::eof();
- }else {
- setg( m_stream_buffer, m_stream_buffer, m_stream_buffer+read_size );
- }
- }
- return traits_type::to_int_type( *eback() );
-}
-
-bool exec_stream_buffer_t::send_buffer()
-{
- if( pbase()!=pptr() ) {
- std::size_t write_size=pptr()-pbase();
- std::size_t n=write_size;
- bool no_more = false;
- m_thread_buffer.put( pbase(), n, no_more );
- if( no_more || n!=write_size ) {
- return false;
- }else {
- setp( m_stream_buffer, m_stream_buffer+STREAM_BUFFER_SIZE );
- }
- }
- return true;
-}
-
-bool exec_stream_buffer_t::send_char( char c )
-{
- std::size_t write_size=1;
- bool no_more = false;
- m_thread_buffer.put( &c, write_size, no_more );
- return write_size==1 && !no_more;
-}
-
-exec_stream_buffer_t::int_type exec_stream_buffer_t::overflow( exec_stream_buffer_t::int_type c )
-{
- if( !send_buffer() ) {
- return traits_type::eof();
- }
- if( c!=traits_type::eof() ) {
- if( pbase()==epptr() ) {
- if( !send_char( static_cast<char>(c) ) ) {
- return traits_type::eof();
- }
- }else {
- sputc( static_cast<char>(c) );
- }
- }
- return traits_type::not_eof( c );
-}
-
-int exec_stream_buffer_t::sync()
-{
- if( !send_buffer() ) {
- return -1;
- }
- return 0;
-}
-
-// stream classes
-
-class exec_istream_t : public std::istream {
-public:
- exec_istream_t( exec_stream_buffer_t & buf )
- : std::istream( &buf ) {
- }
-};
-
-
-class exec_ostream_t : public std::ostream {
-public:
- exec_ostream_t( exec_stream_buffer_t & buf )
- : std::ostream( &buf ){
- }
-};
-
-}
-
-// platform-dependent implementation
-#include IMPL_CPP
-
-
-//platform-independent exec_stream_t member functions
-exec_stream_t::exec_stream_t()
-{
- m_impl=new impl_t;
- exceptions( true );
-}
-
-exec_stream_t::exec_stream_t( std::string const & program, std::string const & arguments )
-{
- m_impl=new impl_t;
- exceptions( true );
- start( program, arguments );
-}
-
-void exec_stream_t::new_impl()
-{
- m_impl=new impl_t;
-}
-
-exec_stream_t::~exec_stream_t()
-{
- try {
- close();
- }catch( ... ) {
- }
- delete m_impl;
-}
-
-std::ostream & exec_stream_t::in()
-{
- return m_impl->m_in;
-}
-
-std::istream & exec_stream_t::out()
-{
- return m_impl->m_out;
-}
-
-std::istream & exec_stream_t::err()
-{
- return m_impl->m_err;
-}
-
-void exec_stream_t::exceptions( bool enable )
-{
- if( enable ) {
- // getline sets failbit on eof, so we should enable badbit and badbit _only_ to propagate our exceptions through iostream code.
- m_impl->m_in.exceptions( std::ios_base::badbit );
- m_impl->m_out.exceptions( std::ios_base::badbit );
- m_impl->m_err.exceptions( std::ios_base::badbit );
- }else {
- m_impl->m_in.exceptions( std::ios_base::goodbit );
- m_impl->m_out.exceptions( std::ios_base::goodbit );
- m_impl->m_err.exceptions( std::ios_base::goodbit );
- }
-}
-
-// exec_stream_t::error_t
-namespace exec_stream_ns {
-
-std::string int2str( unsigned long i, int base, std::size_t width )
-{
- std::string s;
- s.reserve(4);
- while( i!=0 ) {
- s="0123456789abcdef"[i%base]+s;
- i/=base;
- }
- if( width!=0 ) {
- while( s.size()<width ) {
- s="0"+s;
- }
- }
- return s;
-}
-
-}
-
-exec_stream_t::error_t::error_t()
-{
-}
-
-exec_stream_t::error_t::error_t( std::string const & msg )
-{
- m_msg=msg;
-}
-
-exec_stream_t::error_t::error_t( std::string const & msg, error_code_t code )
-{
- compose( msg, code );
-}
-
-exec_stream_t::error_t::~error_t() throw()
-{
-}
-
-char const * exec_stream_t::error_t::what() const throw()
-{
- return m_msg.c_str();
-}
-
-
-void exec_stream_t::error_t::compose( std::string const & msg, error_code_t code )
-{
- m_msg=msg;
- m_msg+="\n[code 0x"+int2str( code, 16, 4 )+" ("+int2str( code, 10, 0 )+")]";
-}
diff --git a/include/exec-stream/exec-stream.h b/include/exec-stream/exec-stream.h
deleted file mode 100644
index ffbce84..0000000
--- a/include/exec-stream/exec-stream.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
-Copyright (C) 2004 Artem Khodush
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef exec_stream_h
-#define exec_stream_h
-
-#include <string>
-#include <exception>
-#include <istream>
-#include <ostream>
-#include <vector>
-
-class exec_stream_t {
-public:
- exec_stream_t();
- exec_stream_t( std::string const & program, std::string const & arguments );
- template< class iterator > exec_stream_t( std::string const & program, iterator args_begin, iterator args_end );
-
- ~exec_stream_t();
-
- enum stream_kind_t { s_in=1, s_out=2, s_err=4, s_all=s_in|s_out|s_err, s_child=8 };
-
- void set_buffer_limit( int stream_kind, std::size_t size );
-
- typedef unsigned long timeout_t;
- void set_wait_timeout( int stream_kind, timeout_t milliseconds );
-
- void set_binary_mode( int stream_kind );
- void set_text_mode( int stream_kind );
-
- void start( std::string const & program, std::string const & arguments );
- template< class iterator > void start( std::string const & program, iterator args_begin, iterator args_end );
- void start( std::string const & program, char const * arg1, char const * arg2 ); // to compensate for damage from the previous one
- void start( std::string const & program, char * arg1, char * arg2 );
-
- bool close_in();
- bool close();
- void kill();
- int exit_code();
-
- std::ostream & in();
- std::istream & out();
- std::istream & err();
-
- typedef unsigned long error_code_t;
-
- class error_t : public std::exception {
- public:
- error_t( std::string const & msg );
- error_t( std::string const & msg, error_code_t code );
- ~error_t() throw();
- virtual char const * what() const throw();
- protected:
- error_t();
- void compose( std::string const & msg, error_code_t code );
-
- std::string m_msg;
- };
-
-private:
- exec_stream_t( exec_stream_t const & );
- exec_stream_t & operator=( exec_stream_t const & );
-
- struct impl_t;
- friend struct impl_t;
- impl_t * m_impl;
-
- void exceptions( bool enable );
-
-// helpers for template member functions
- void new_impl();
-
- class next_arg_t {
- public:
- virtual ~next_arg_t()
- {
- }
-
- virtual std::string const * next()=0;
- };
-
- template< class iterator > class next_arg_impl_t : public next_arg_t {
- public:
- next_arg_impl_t( iterator args_begin, iterator args_end )
- : m_args_i( args_begin ), m_args_end( args_end )
- {
- }
-
- virtual std::string const * next()
- {
- if( m_args_i==m_args_end ) {
- return 0;
- }else {
- m_arg=*m_args_i;
- ++m_args_i;
- return &m_arg;
- }
- }
-
- private:
- iterator m_args_i;
- iterator m_args_end;
- std::string m_arg;
- };
-
- void start( std::string const & program, next_arg_t & next_arg );
-};
-
-template< class iterator > inline exec_stream_t::exec_stream_t( std::string const & program, iterator args_begin, iterator args_end )
-{
- new_impl();
- exceptions( true );
- start( program, args_begin, args_end );
-}
-
-template< class iterator > inline void exec_stream_t::start( std::string const & program, iterator args_begin, iterator args_end )
-{
- exec_stream_t::next_arg_impl_t< iterator > next_arg( args_begin, args_end );
- start( program, next_arg );
-}
-
-inline void exec_stream_t::start( std::string const & program, char const * arg1, char const * arg2 )
-{
- std::vector< std::string > args;
- args.push_back( std::string( arg1 ) );
- args.push_back( std::string( arg2 ) );
- start( program, args.begin(), args.end() );
-}
-
-inline void exec_stream_t::start( std::string const & program, char * arg1, char * arg2 )
-{
- std::vector< std::string > args;
- args.push_back( std::string( arg1 ) );
- args.push_back( std::string( arg2 ) );
- start( program, args.begin(), args.end() );
-}
-
-#endif
diff --git a/include/exec-stream/posix/exec-stream-helpers.cpp b/include/exec-stream/posix/exec-stream-helpers.cpp
deleted file mode 100644
index a1c07b7..0000000
--- a/include/exec-stream/posix/exec-stream-helpers.cpp
+++ /dev/null
@@ -1,842 +0,0 @@
-/*
-Copyright (C) 2004 Artem Khodush
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-// os_error_t
-os_error_t::os_error_t( std::string const & msg )
-{
- compose( msg, errno );
-}
-
-os_error_t::os_error_t( std::string const & msg, exec_stream_t::error_code_t code )
-{
- compose( msg, code );
-}
-
-void os_error_t::compose( std::string const & msg, exec_stream_t::error_code_t code )
-{
- std::string s( msg );
- s+='\n';
- errno=0;
- char const * x=strerror( code );
- if( errno!=0 ) {
- s+="[unable to retrieve error description]";
- }else {
- s+=x;
- }
- exec_stream_t::error_t::compose( s, code );
-}
-
-// pipe_t
-pipe_t::pipe_t()
-: m_direction( closed )
-{
- m_fds[0]=-1;
- m_fds[1]=-1;
-}
-
-pipe_t::~pipe_t()
-{
- try {
- close();
- }catch(...) {
- }
-}
-
-int pipe_t::r() const
-{
- return m_fds[0];
-}
-
-int pipe_t::w() const
-{
- return m_fds[1];
-}
-
-void pipe_t::close_r()
-{
- if( m_direction==both || m_direction==read ) {
- if( ::close( m_fds[0] )==-1 ) {
- throw os_error_t( "pipe_t::close_r: close failed" );
- }
- m_direction= m_direction==both ? write : closed;
- }
-}
-
-void pipe_t::close_w()
-{
- if( m_direction==both || m_direction==write ) {
- if( ::close( m_fds[1] )==-1 ) {
- throw os_error_t( "pipe_t::close_w: close failed" );
- }
- m_direction= m_direction==both ? read : closed;
- }
-}
-
-void pipe_t::close()
-{
- close_r();
- close_w();
-}
-
-void pipe_t::open()
-{
- close();
- if( pipe( m_fds )==-1 ) {
- throw os_error_t( "pipe_t::open(): pipe() failed" );
- }
- m_direction=both;
-}
-
-
-// mutex_t
-mutex_t::mutex_t()
-{
- if( int code=pthread_mutex_init( &m_mutex, 0 ) ) {
- throw os_error_t( "mutex_t::mutex_t: pthread_mutex_init failed", code );
- }
-}
-
-mutex_t::~mutex_t()
-{
- pthread_mutex_destroy( &m_mutex );
-}
-
-
-// grab_mutex_t
-grab_mutex_t::grab_mutex_t( mutex_t & mutex, mutex_registrator_t * mutex_registrator )
-{
- m_mutex=&mutex.m_mutex;
- m_error_code=pthread_mutex_lock( m_mutex );
- m_grabbed=ok();
- m_mutex_registrator=mutex_registrator;
- if( m_mutex_registrator ) {
- m_mutex_registrator->add( this );
- }
-}
-
-grab_mutex_t::~grab_mutex_t()
-{
- release();
- if( m_mutex_registrator ) {
- m_mutex_registrator->remove( this );
- }
-}
-
-int grab_mutex_t::release()
-{
- int code=0;
- if( m_grabbed ) {
- code=pthread_mutex_unlock( m_mutex );
- m_grabbed=false;
- }
- return code;
-}
-
-bool grab_mutex_t::ok()
-{
- return m_error_code==0;
-}
-
-int grab_mutex_t::error_code()
-{
- return m_error_code;
-}
-
-// mutex_registrator_t
-mutex_registrator_t::~mutex_registrator_t()
-{
- for( mutexes_t::iterator i=m_mutexes.begin(); i!=m_mutexes.end(); ++i ) {
- (*i)->m_mutex_registrator=0;
- }
-}
-
-void mutex_registrator_t::add( grab_mutex_t * g )
-{
- m_mutexes.insert( m_mutexes.end(), g );
-}
-
-void mutex_registrator_t::remove( grab_mutex_t * g )
-{
- m_mutexes.erase( std::find( m_mutexes.begin(), m_mutexes.end(), g ) );
-}
-
-void mutex_registrator_t::release_all()
-{
- for( mutexes_t::iterator i=m_mutexes.begin(); i!=m_mutexes.end(); ++i ) {
- (*i)->release();
- }
-}
-
-// wait_result_t
-wait_result_t::wait_result_t( unsigned signaled_state, int error_code, bool timed_out )
-{
- m_timed_out=timed_out;
- m_error_code=error_code;
- m_signaled_state= error_code==0 ? signaled_state : 0;
-}
-
-bool wait_result_t::ok()
-{
- return m_error_code==0;
-}
-
-bool wait_result_t::is_signaled( int state )
-{
- return m_signaled_state&state;
-}
-
-int wait_result_t::error_code()
-{
- return m_error_code;
-}
-
-bool wait_result_t::timed_out()
-{
- return m_timed_out;
-}
-
-
-// event_t
-event_t::event_t()
-{
- if( int code=pthread_cond_init( &m_cond, 0 ) ) {
- throw os_error_t( "event_t::event_t: pthread_cond_init failed", code );
- }
- m_state=0;
-}
-
-event_t::~event_t()
-{
- pthread_cond_destroy( &m_cond );
-}
-
-int event_t::set( unsigned bits, mutex_registrator_t * mutex_registrator )
-{
- grab_mutex_t grab_mutex( m_mutex, mutex_registrator );
- if( !grab_mutex.ok() ) {
- return grab_mutex.error_code();
- }
-
- int code=0;
- if( bits&~m_state ) {
- m_state|=bits;
- code=pthread_cond_broadcast( &m_cond );
- }
-
- int release_code=grab_mutex.release();
- if( code==0 ) {
- code=release_code;
- }
- return code;
-}
-
-int event_t::reset( unsigned bits, mutex_registrator_t * mutex_registrator )
-{
- grab_mutex_t grab_mutex( m_mutex, mutex_registrator );
- if( !grab_mutex.ok() ) {
- return grab_mutex.error_code();
- }
- m_state&=~bits;
- return grab_mutex.release();
-}
-
-wait_result_t event_t::wait( unsigned any_bits, unsigned long timeout, mutex_registrator_t * mutex_registrator )
-{
- if( any_bits==0 ) {
- // we ain't waiting for anything
- return wait_result_t( 0, 0, false );
- }
-
- grab_mutex_t grab_mutex( m_mutex, mutex_registrator );
- if( !grab_mutex.ok() ) {
- return wait_result_t( 0, grab_mutex.error_code(), false );
- }
-
- struct timeval time_val_limit;
- gettimeofday( &time_val_limit, 0 );
- struct timespec time_limit;
- time_limit.tv_sec=time_val_limit.tv_sec+timeout/1000;
- time_limit.tv_nsec=1000*(time_val_limit.tv_usec+1000*(timeout%1000));
- int code=0;
- while( code==0 && (m_state&any_bits)==0 ) {
- code=pthread_cond_timedwait( &m_cond, &m_mutex.m_mutex, &time_limit );
- }
-
- unsigned state=m_state;
- int release_code=grab_mutex.release();
- if( code==0 ) {
- code=release_code;
- }
- return wait_result_t( state, code, code==ETIMEDOUT );
-}
-
-// thread_buffer_t
-thread_buffer_t::thread_buffer_t( pipe_t & in_pipe, pipe_t & out_pipe, pipe_t & err_pipe, std::ostream & in )
-: m_in_pipe( in_pipe ), m_out_pipe( out_pipe ), m_err_pipe( err_pipe ), m_in( in )
-{
- m_in_bad=false;
- m_error_prefix="";
- m_error_code=0;
- m_in_wait_timeout=2000;
- m_out_wait_timeout=2000;
- m_err_wait_timeout=2000;
- m_thread_termination_timeout=1000;
- m_in_buffer_limit=0;
- m_out_buffer_limit=0;
- m_err_buffer_limit=0;
- m_out_read_buffer_size=4096;
- m_err_read_buffer_size=4096;
- m_thread_started=false;
- m_in_closed=false;
-}
-
-thread_buffer_t::~thread_buffer_t()
-{
- bool stopped=false;
- try {
- stopped=stop_thread();
- }catch( ... ) {
- }
- if( !stopped ) {
- try {
- stopped=abort_thread();
- }catch( ... ) {
- }
- }
- if( !stopped ) {
- std::terminate();
- }
-}
-
-void thread_buffer_t::set_wait_timeout( int stream_kind, unsigned long milliseconds )
-{
- if( m_thread_started ) {
- throw exec_stream_t::error_t( "thread_buffer_t::set_wait_timeout: thread already started" );
- }
- if( stream_kind&exec_stream_t::s_in ) {
- m_in_wait_timeout=milliseconds;
- }
- if( stream_kind&exec_stream_t::s_out ) {
- m_out_wait_timeout=milliseconds;
- }
- if( stream_kind&exec_stream_t::s_err ) {
- m_err_wait_timeout=milliseconds;
- }
- if( stream_kind&exec_stream_t::s_child ) {
- m_thread_termination_timeout=milliseconds;
- }
-}
-
-void thread_buffer_t::set_buffer_limit( int stream_kind, std::size_t limit )
-{
- if( m_thread_started ) {
- throw exec_stream_t::error_t( "thread_buffer_t::set_buffer_limit: thread already started" );
- }
- if( stream_kind&exec_stream_t::s_in ) {
- m_in_buffer_limit=limit;
- }
- if( stream_kind&exec_stream_t::s_out ) {
- m_out_buffer_limit=limit;
- }
- if( stream_kind&exec_stream_t::s_err ) {
- m_err_buffer_limit=limit;
- }
-}
-
-void thread_buffer_t::set_read_buffer_size( int stream_kind, std::size_t size )
-{
- if( m_thread_started ) {
- throw exec_stream_t::error_t( "thread_buffer_t::set_read_buffer_size: thread already started" );
- }
- if( stream_kind&exec_stream_t::s_out ) {
- m_out_read_buffer_size=size;
- }
- if( stream_kind&exec_stream_t::s_err ) {
- m_err_read_buffer_size=size;
- }
-}
-
-void thread_buffer_t::start()
-{
- if( m_thread_started ) {
- throw exec_stream_t::error_t( "thread_buffer_t::start: thread already started" );
- }
- m_in_buffer.clear();
- m_out_buffer.clear();
- m_err_buffer.clear();
-
- int code;
- if( (code=m_thread_control.reset( ~0u, 0 )) || (code=m_thread_control.set( exec_stream_t::s_out|exec_stream_t::s_err, 0 ) ) ) {
- throw os_error_t( "thread_buffer_t::start: unable to initialize m_thread_control event", code );
- }
- if( (code=m_thread_responce.reset( ~0u, 0 )) || (code=m_thread_responce.set( exec_stream_t::s_in, 0 )) ) {
- throw os_error_t( "thread_buffer_t::start: unable to initialize m_thread_responce event", code );
- }
-
- m_error_prefix="";
- m_error_code=0;
-
- if( int code=pthread_create( &m_thread, 0, &thread_func, this ) ) {
- throw os_error_t( "exec_stream_therad_t::start: pthread_create failed", code );
- }
- m_thread_started=true;
- m_in_closed=false;
- m_in_bad=false;
-}
-
-bool thread_buffer_t::stop_thread()
-{
- if( m_thread_started ) {
- if( int code=m_thread_control.set( exec_stream_t::s_child, 0 ) ) {
- throw os_error_t( "thread_buffer_t::stop_thread: unable to set thread termination event", code );
- }
- wait_result_t wait_result=m_thread_responce.wait( exec_stream_t::s_child, m_thread_termination_timeout, 0 );
- if( !wait_result.ok() && !wait_result.timed_out() ) {
- throw os_error_t( "thread_buffer_t::stop_thread: wait for m_thread_stopped failed", wait_result.error_code() );
- }
- if( wait_result.ok() ) {
- void * thread_result;
- if( int code=pthread_join( m_thread, &thread_result ) ) {
- throw os_error_t( "thread_buffer_t::stop_thread: pthread_join failed", code );
- }
- m_thread_started=false;
- // check for any errors encountered in the thread
- if( m_error_code!=0 ) {
- throw os_error_t( m_error_prefix, m_error_code );
- }
- return true;
- }else {
- return false;
- }
- }
- return true;
-}
-
-bool thread_buffer_t::abort_thread()
-{
- if( m_thread_started ) {
- if( int code=pthread_cancel( m_thread ) ) {
- throw os_error_t( "thread_buffer_t::abort_thread: pthread_cancel failed", code );
- }
- void * thread_result;
- if( int code=pthread_join( m_thread, &thread_result ) ) {
- throw os_error_t( "thread_buffer_t::stop_thread: pthread_join failed", code );
- }
- m_thread_started=false;
- }
- return true;
-}
-
-const int s_in_eof=16;
-const int s_out_eof=32;
-const int s_err_eof=64;
-
-void thread_buffer_t::get( exec_stream_t::stream_kind_t kind, char * dst, std::size_t & size, bool & no_more )
-{
- if( !m_thread_started ) {
- throw exec_stream_t::error_t( "thread_buffer_t::get: thread was not started" );
- }
- unsigned long timeout= kind==exec_stream_t::s_out ? m_out_wait_timeout : m_err_wait_timeout;
- int eof_kind= kind==exec_stream_t::s_out ? s_out_eof : s_err_eof;
- buffer_list_t & buffer= kind==exec_stream_t::s_out ? m_out_buffer : m_err_buffer;
-
- wait_result_t wait_result=m_thread_responce.wait( kind|exec_stream_t::s_child|eof_kind, timeout, 0 );
- if( !wait_result.ok() ) {
- throw os_error_t( "thread_buffer_t::get: wait for got_data failed", wait_result.error_code() );
- }
-
- if( wait_result.is_signaled( exec_stream_t::s_child ) ) {
- // thread stopped - no need to synchronize
- if( !buffer.empty() ) {
- // we have data - deliver it first
- // when thread terminated, there is no need to synchronize
- buffer.get( dst, size );
- no_more=false;
- }else {
- // thread terminated and we have no more data to return - report errors, if any
- if( m_error_code!=0 ) {
- throw os_error_t( m_error_prefix, m_error_code );
- }
- // if terminated without error - signal eof
- size=0;
- no_more=true;
- }
- }else if( wait_result.is_signaled( kind|eof_kind ) ) {
- // thread got some data for us - grab them
- grab_mutex_t grab_mutex( m_mutex, 0 );
- if( !grab_mutex.ok() ) {
- throw os_error_t( "thread_buffer_t::get: wait for mutex failed", grab_mutex.error_code() );
- }
-
- if( !buffer.empty() ) {
- buffer.get( dst, size );
- no_more=false;
- }else {
- size=0;
- no_more=wait_result.is_signaled( eof_kind );
- }
- // if no data left - make the next get() wait until it arrives
- if( buffer.empty() ) {
- if( int code=m_thread_responce.reset( kind, 0 ) ) {
- throw os_error_t( "thread_buffer_t::get: unable to reset got_data event", code );
- }
- }
- // if buffer is not too long tell the thread we want more data
- std::size_t buffer_limit= kind==exec_stream_t::s_out ? m_out_buffer_limit : m_err_buffer_limit;
- if( !buffer.full( buffer_limit ) ) {
- if( int code=m_thread_control.set( kind, 0 ) ) {
- throw os_error_t( "thread_buffer_t::get: unable to set want_data event", code );
- }
- }
- }
-}
-
-void thread_buffer_t::put( char * src, std::size_t & size, bool & no_more )
-{
- if( !m_thread_started ) {
- throw exec_stream_t::error_t( "thread_buffer_t::put: thread was not started" );
- }
- if( m_in_closed || m_in_bad ) {
- size=0;
- no_more=true;
- return;
- }
- // wait for both m_want_data and m_mutex
- wait_result_t wait_result=m_thread_responce.wait( exec_stream_t::s_in|exec_stream_t::s_child, m_in_wait_timeout, 0 );
- if( !wait_result.ok() ) {
- // workaround for versions of libstdc++ (at least in gcc 3.1 pre) that do not intercept exceptions in operator<<( std::ostream, std::string )
- m_in_bad=true;
- if( m_in.exceptions()&std::ios_base::badbit ) {
- throw os_error_t( "thread_buffer_t::put: wait for want_data failed", wait_result.error_code() );
- }else {
- m_in.setstate( std::ios_base::badbit );
- size=0;
- no_more=true;
- return;
- }
- }
- if( wait_result.is_signaled( exec_stream_t::s_child ) ) {
- // thread stopped - check for errors
- if( m_error_code!=0 ) {
- throw os_error_t( m_error_prefix, m_error_code );
- }
- // if terminated without error - signal eof, since no one will ever write our data
- size=0;
- no_more=true;
- }else if( wait_result.is_signaled( exec_stream_t::s_in ) ) {
- // thread wants some data from us - stuff them
- grab_mutex_t grab_mutex( m_mutex, 0 );
- if( !grab_mutex.ok() ) {
- throw os_error_t( "thread_buffer_t::put: wait for mutex failed", grab_mutex.error_code() );
- }
-
- no_more=false;
- m_in_buffer.put( src, size );
-
- // if the buffer is too long - make the next put() wait until it shrinks
- if( m_in_buffer.full( m_in_buffer_limit ) ) {
- if( int code=m_thread_responce.reset( exec_stream_t::s_in, 0 ) ) {
- throw os_error_t( "thread_buffer_t::put: unable to reset want_data event", code );
- }
- }
- // tell the thread we got data
- if( !m_in_buffer.empty() ) {
- if( int code=m_thread_control.set( exec_stream_t::s_in, 0 ) ) {
- throw os_error_t( "thread_buffer_t::put: unable to set got_data event", code );
- }
- }
- }
-}
-
-void thread_buffer_t::close_in()
-{
- if( !m_in_bad ) {
- m_in.flush();
- }
- if( m_thread_started ) {
- if( int code=m_thread_control.set( s_in_eof, 0 ) ) {
- throw os_error_t( "thread_buffer_t::close_in: unable to set in_got_data event", code );
- }
- m_in_closed=true;
- }
-}
-
-void mutex_cleanup( void * p )
-{
- static_cast< mutex_registrator_t * >( p )->release_all();
-}
-
-void * thread_buffer_t::thread_func( void * param )
-{
- thread_buffer_t * p=static_cast< thread_buffer_t * >( param );
- // accessing p anywhere here is safe because thread_buffer_t destructor
- // ensures the thread is terminated before p get destroyed
- char * out_read_buffer=0;
- char * err_read_buffer=0;
- bool in_eof=false;
- bool in_closed=false;
- bool out_eof=false;
- bool err_eof=false;
-
- mutex_registrator_t mutex_registrator;
- pthread_cleanup_push( mutex_cleanup, &mutex_registrator );
-
- try {
- out_read_buffer=new char[p->m_out_read_buffer_size];
- err_read_buffer=new char[p->m_err_read_buffer_size];
-
- buffer_list_t::buffer_t write_buffer;
- write_buffer.data=0;
- write_buffer.size=0;
- std::size_t write_buffer_offset=0;
-
- unsigned long timeout=std::max( p->m_in_wait_timeout, std::max( p->m_out_wait_timeout, p->m_err_wait_timeout ) );
-
- fd_set read_fds;
- FD_ZERO( &read_fds );
- fd_set write_fds;
- FD_ZERO( &write_fds );
-
- while( true ) {
- unsigned wait_for=exec_stream_t::s_child;
- if( !in_eof && write_buffer.data==0 ) {
- wait_for|=exec_stream_t::s_in|s_in_eof;
- }
- if( !out_eof ) {
- wait_for|=exec_stream_t::s_out;
- }
- if( !err_eof ) {
- wait_for|=exec_stream_t::s_err;
- }
-
- wait_result_t wait_result=p->m_thread_control.wait( wait_for, timeout, &mutex_registrator );
- if( !wait_result.ok() && !wait_result.timed_out() ) {
- p->m_error_code=wait_result.error_code();
- p->m_error_prefix="thread_buffer_t::thread_func: wait for thread_event failed";
- break;
- }
-
- // we need more data - get from p->m_buffers
- if( write_buffer.data==0 && wait_result.is_signaled( exec_stream_t::s_in|s_in_eof ) ) {
- grab_mutex_t grab_mutex( p->m_mutex, &mutex_registrator );
- if( !grab_mutex.ok() ) {
- p->m_error_code=grab_mutex.error_code();
- p->m_error_prefix="thread_buffer_t::thread_func: wait for mutex failed";
- break;
- }
-
- if( p->m_in_buffer.empty() ) {
- // we have empty write_buffer, empty p->m_in_buffer and we are told it will stay so - time to close child's stdin
- if( wait_result.is_signaled( s_in_eof ) ) {
- in_eof=true;
- }
- }
- if( !p->m_in_buffer.empty() ) {
- // we've got buffer - detach it
- write_buffer=p->m_in_buffer.detach();
- write_buffer_offset=0;
- }
- // if no data left in p->m_in_buffer - wait until it arrives
- if( p->m_in_buffer.empty() ) {
- // if no data for us - stop trying to get it until we are told it arrived
- if( int code=p->m_thread_control.reset( exec_stream_t::s_in, &mutex_registrator ) ) {
- p->m_error_code=code;
- p->m_error_prefix="thread_buffer_t::thread_func: unable to reset thread_event (s_in)";
- break;
- }
- }
-
- // if buffer is not too long - tell put() it can proceed
- if( !p->m_in_buffer.full( p->m_in_buffer_limit ) ) {
- if( int code=p->m_thread_responce.set( exec_stream_t::s_in, &mutex_registrator ) ) {
- p->m_error_code=code;
- p->m_error_prefix="thread_buffer_t::thread_func: unable to set in_want_data event";
- break;
- }
- }
- }
-
- if( in_eof && write_buffer.data==0 ) {
- p->m_in_pipe.close();
- in_closed=true;
- }
-
- // see if they want us to stop, but only when there is nothing more to write
- if( write_buffer.data==0 && wait_result.is_signaled( exec_stream_t::s_child ) ) {
- break;
- }
-
- // determine whether we want something
- if( write_buffer.data!=0 ) {
- FD_SET( p->m_in_pipe.w(), &write_fds );
- }else {
- FD_CLR( p->m_in_pipe.w(), &write_fds );
- }
- if( !out_eof && wait_result.is_signaled( exec_stream_t::s_out ) ) {
- FD_SET( p->m_out_pipe.r(), &read_fds );
- }else {
- FD_CLR( p->m_out_pipe.r(), &read_fds );
- }
- if( !err_eof && wait_result.is_signaled( exec_stream_t::s_err ) ) {
- FD_SET( p->m_err_pipe.r(), &read_fds );
- }else {
- FD_CLR( p->m_err_pipe.r(), &read_fds );
- }
-
- if( FD_ISSET( p->m_in_pipe.w(), &write_fds ) || FD_ISSET( p->m_out_pipe.r(), &read_fds ) || FD_ISSET( p->m_err_pipe.r(), &read_fds ) ) {
- // we want something - get it
- struct timeval select_timeout;
- select_timeout.tv_sec=0;
- select_timeout.tv_usec=100000;
- int nfds=std::max( p->m_in_pipe.w(), std::max( p->m_out_pipe.r(), p->m_err_pipe.r() ) )+1;
- if( select( nfds, &read_fds, &write_fds, 0, &select_timeout )==-1 ) {
- p->m_error_code=errno;
- p->m_error_prefix="thread_buffer_t::thread_func: select failed";
- break;
- }
- }
-
- // determine what we got
-
- if( FD_ISSET( p->m_in_pipe.w(), &write_fds ) ) {
- // it seems we may write to child's stdin
- int n_written=write( p->m_in_pipe.w(), write_buffer.data+write_buffer_offset, write_buffer.size-write_buffer_offset );
- if( n_written==-1 ) {
- if( errno!=EAGAIN ) {
- p->m_error_code=errno;
- p->m_error_prefix="thread_buffer_t::thread_func: write to child stdin failed";
- break;
- }
- }else {
- write_buffer_offset+=n_written;
- if( write_buffer_offset==write_buffer.size ) {
- delete[] write_buffer.data;
- write_buffer.data=0;
- write_buffer.size=0;
- }
- }
- }
-
- if( FD_ISSET( p->m_out_pipe.r(), &read_fds ) ) {
- // it seems we may read child's stdout
- int n_out_read=read( p->m_out_pipe.r(), out_read_buffer, p->m_out_read_buffer_size );
- if( n_out_read==-1 ) {
- if( errno!=EAGAIN ) {
- p->m_error_code=errno;
- p->m_error_prefix="exec_stream_t::thread_func: read from child stdout failed";
- break;
- }
- }else {
- grab_mutex_t grab_mutex( p->m_mutex, &mutex_registrator );
- if( n_out_read!=0 ) {
- p->m_out_buffer.put( out_read_buffer, n_out_read );
- // if buffer is full - stop reading
- if( p->m_out_buffer.full( p->m_out_buffer_limit ) ) {
- if( int code=p->m_thread_control.reset( exec_stream_t::s_out, &mutex_registrator ) ) {
- p->m_error_code=code;
- p->m_error_prefix="exec_stream_t::thread_func: unable to reset m_out_want_data event";
- break;
- }
- }
- }
- unsigned responce=exec_stream_t::s_out;
- if( n_out_read==0 ) { // EOF when read 0 bytes while select told that it's ready
- out_eof=true;
- responce|=s_out_eof;
- }
- // we got either data or eof - tell always
- if( int code=p->m_thread_responce.set( responce, &mutex_registrator ) ) {
- p->m_error_code=code;
- p->m_error_prefix="exec_stream_t::thread_func: unable to set out_got_data event";
- break;
- }
- }
- }
-
- if( FD_ISSET( p->m_err_pipe.r(), &read_fds ) ) {
- // it seemds we may read child's stderr
- int n_err_read=read( p->m_err_pipe.r(), err_read_buffer, p->m_err_read_buffer_size );
- if( n_err_read==-1 ) {
- if( errno!=EAGAIN ) {
- p->m_error_code=errno;
- p->m_error_prefix="exec_stream_t::thread_func: read from child stderr failed";
- break;
- }
- }else {
- grab_mutex_t grab_mutex( p->m_mutex, &mutex_registrator );
- if( n_err_read!=0 ) {
- p->m_err_buffer.put( err_read_buffer, n_err_read );
- // if buffer is full - stop reading
- if( p->m_err_buffer.full( p->m_err_buffer_limit ) ) {
- if( int code=p->m_thread_control.reset( exec_stream_t::s_err, &mutex_registrator ) ) {
- p->m_error_code=code;
- p->m_error_prefix="exec_stream_t::thread_func: unable to reset m_err_want_data event";
- break;
- }
- }
- }
- unsigned responce=exec_stream_t::s_err;
- if( n_err_read==0 ) {
- err_eof=true;
- responce|=s_err_eof;
- }
- // we got either data or eof - tell always
- if( int code=p->m_thread_responce.set( responce, &mutex_registrator ) ) {
- p->m_error_code=code;
- p->m_error_prefix="exec_stream_t::thread_func: unable to set err_got_data event";
- break;
- }
- }
- }
-
- if( in_closed && out_eof && err_eof ) {
- // have nothing more to do
- break;
- }
- }
-
- delete[] write_buffer.data;
-
- }catch( ... ) {
- // might only be std::bad_alloc
- p->m_error_code=0;
- p->m_error_prefix="thread_buffer_t::writer_thread: exception caught";
- }
-
- delete[] out_read_buffer;
- delete[] err_read_buffer;
-
- // tell everyone that we've stopped, so that get() and put() will be unblocked
- if( int code=p->m_thread_responce.set( exec_stream_t::s_child, &mutex_registrator ) ) {
- p->m_error_code=code;
- p->m_error_prefix="exec_stream_t::thread_func: unable to set thread_stopped event";
- }
-
- pthread_cleanup_pop( 0 );
- return 0;
-}
diff --git a/include/exec-stream/posix/exec-stream-helpers.h b/include/exec-stream/posix/exec-stream-helpers.h
deleted file mode 100644
index 13f1eaa..0000000
--- a/include/exec-stream/posix/exec-stream-helpers.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
-Copyright (C) 2004 Artem Khodush
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-class os_error_t : public exec_stream_t::error_t {
-public:
- os_error_t( std::string const & msg );
- os_error_t( std::string const & msg, exec_stream_t::error_code_t code );
-private:
- void compose( std::string const & msg, exec_stream_t::error_code_t code );
-};
-
-
-template< class T > class buf_t {
-public:
- typedef T data_t;
-
- buf_t()
- {
- m_buf=0;
- m_size=0;
- }
-
- ~buf_t()
- {
- delete [] m_buf;
- }
-
- data_t * new_data( std::size_t size )
- {
- m_buf=new T[size];
- m_size=size;
- return m_buf;
- }
-
- void append_data( data_t const * data, std::size_t size )
- {
- buf_t new_buf;
- new_buf.new_data( m_size+size );
- std::char_traits< data_t >::copy( new_buf.m_buf, m_buf, m_size );
- std::char_traits< data_t >::copy( new_buf.m_buf+m_size, data, size );
- std::swap( this->m_buf, new_buf.m_buf );
- std::swap( this->m_size, new_buf.m_size );
- }
-
- data_t * data()
- {
- return m_buf;
- }
-
- std::size_t size()
- {
- return m_size;
- }
-
-private:
- buf_t( buf_t const & );
- buf_t & operator=( buf_t const & );
-
- data_t * m_buf;
- std::size_t m_size;
-};
-
-
-class pipe_t {
-public:
- pipe_t();
- ~pipe_t();
- int r() const;
- int w() const;
- void close_r();
- void close_w();
- void close();
- void open();
-private:
- enum direction_t{ closed, read, write, both };
- direction_t m_direction;
- int m_fds[2];
-};
-
-
-class mutex_t {
-public:
- mutex_t();
- ~mutex_t();
-
-private:
- pthread_mutex_t m_mutex;
-
- friend class event_t;
- friend class grab_mutex_t;
-};
-
-
-class grab_mutex_t {
-public:
- grab_mutex_t( mutex_t & mutex, class mutex_registrator_t * mutex_registrator );
- ~grab_mutex_t();
-
- int release();
- bool ok();
- int error_code();
-
-private:
- pthread_mutex_t * m_mutex;
- int m_error_code;
- bool m_grabbed;
- class mutex_registrator_t * m_mutex_registrator;
-
- friend class mutex_registrator_t;
-};
-
-class mutex_registrator_t {
-public:
- ~mutex_registrator_t();
- void add( grab_mutex_t * g );
- void remove( grab_mutex_t * g );
- void release_all();
-private:
- typedef std::list< grab_mutex_t * > mutexes_t;
- mutexes_t m_mutexes;
-};
-
-
-class wait_result_t {
-public:
- wait_result_t( unsigned signaled_state, int error_code, bool timed_out );
-
- bool ok();
- bool is_signaled( int state );
- int error_code();
- bool timed_out();
-
-private:
- unsigned m_signaled_state;
- int m_error_code;
- bool m_timed_out;
-};
-
-
-class event_t {
-public:
- event_t();
- ~event_t();
-
- int set( unsigned bits, mutex_registrator_t * mutex_registrator );
- int reset( unsigned bits, mutex_registrator_t * mutex_registrator );
-
- wait_result_t wait( unsigned any_bits, unsigned long timeout, mutex_registrator_t * mutex_registrator );
-
-private:
- mutex_t m_mutex;
- pthread_cond_t m_cond;
- unsigned volatile m_state;
-};
-
-
-class thread_buffer_t {
-public:
- thread_buffer_t( pipe_t & in_pipe, pipe_t & out_pipe, pipe_t & err_pipe, std::ostream & in );
- ~thread_buffer_t();
-
- void set_wait_timeout( int stream_kind, unsigned long milliseconds );
- void set_buffer_limit( int stream_kind, std::size_t limit );
- void set_read_buffer_size( int stream_kind, std::size_t size );
-
- void start();
-
- void get( exec_stream_t::stream_kind_t kind, char * dst, std::size_t & size, bool & no_more );
- void put( char * src, std::size_t & size, bool & no_more );
-
- void close_in();
- bool stop_thread();
- bool abort_thread();
-
-private:
- static void * thread_func( void * param );
-
- pthread_t m_thread;
- mutex_t m_mutex; // protecting m_in_buffer, m_out_buffer, m_err_buffer
-
- buffer_list_t m_in_buffer;
- buffer_list_t m_out_buffer;
- buffer_list_t m_err_buffer;
-
- event_t m_thread_control; // s_in : in got_data; s_out: out want data; s_err: err want data; s_child: stop thread
- event_t m_thread_responce; // s_in : in want data; s_out: out got data; s_err: err got data; s_child: thread stopped
-
- char const * m_error_prefix;
- int m_error_code;
-
- bool m_thread_started; // set in start(), checked in set_xxx(), get() and put()
- bool m_in_closed; // set in close_in(), checked in put()
-
- pipe_t & m_in_pipe;
- pipe_t & m_out_pipe;
- pipe_t & m_err_pipe;
-
- unsigned long m_in_wait_timeout;
- unsigned long m_out_wait_timeout;
- unsigned long m_err_wait_timeout;
-
- unsigned long m_thread_termination_timeout;
-
- std::size_t m_in_buffer_limit;
- std::size_t m_out_buffer_limit;
- std::size_t m_err_buffer_limit;
-
- std::size_t m_out_read_buffer_size;
- std::size_t m_err_read_buffer_size;
-
- // workaround for not-quite-conformant libstdc++ (see put())
- std::ostream & m_in;
- bool m_in_bad;
-};
diff --git a/include/exec-stream/posix/exec-stream-impl.cpp b/include/exec-stream/posix/exec-stream-impl.cpp
deleted file mode 100644
index c2afa4b..0000000
--- a/include/exec-stream/posix/exec-stream-impl.cpp
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
-Copyright (C) 2004 Artem Khodush
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-// exec_stream_t::impl_t
-struct exec_stream_t::impl_t {
- impl_t();
- ~impl_t();
-
- void split_args( std::string const & program, std::string const & arguments );
- void split_args( std::string const & program, exec_stream_t::next_arg_t & next_arg );
- void start( std::string const & program );
-
- pid_t m_child_pid;
- int m_exit_code;
- unsigned long m_child_timeout;
-
- buf_t< char > m_child_args;
- buf_t< char * > m_child_argp;
-
- pipe_t m_in_pipe;
- pipe_t m_out_pipe;
- pipe_t m_err_pipe;
-
- thread_buffer_t m_thread;
-
- exec_stream_buffer_t m_in_buffer;
- exec_stream_buffer_t m_out_buffer;
- exec_stream_buffer_t m_err_buffer;
-
- exec_ostream_t m_in;
- exec_istream_t m_out;
- exec_istream_t m_err;
-
- void (*m_old_sigpipe_handler)(int);
-};
-
-exec_stream_t::impl_t::impl_t()
-: m_thread( m_in_pipe, m_out_pipe, m_err_pipe, m_in ), /* m_in here is not initialized, but its ok */
- m_in_buffer( exec_stream_t::s_in, m_thread ), m_out_buffer( exec_stream_t::s_out, m_thread ), m_err_buffer( exec_stream_t::s_err, m_thread ),
- m_in( m_in_buffer ), m_out( m_out_buffer ), m_err( m_err_buffer )
-{
- m_out.tie( &m_in );
- m_err.tie( &m_in );
- m_child_timeout=1000;
- m_child_pid=-1;
- m_old_sigpipe_handler=signal( SIGPIPE, SIG_IGN );
-}
-
-exec_stream_t::impl_t::~impl_t()
-{
- signal( SIGPIPE, m_old_sigpipe_handler );
-}
-
-void exec_stream_t::impl_t::split_args( std::string const & program, std::string const & arguments )
-{
- char * args_end=m_child_args.new_data( program.size()+1+arguments.size()+1 );
- int argc=1;
-
- std::string::traits_type::copy( args_end, program.data(), program.size() );
- args_end+=program.size();
- *args_end++=0;
-
- std::string whitespace=" \t\r\n\v";
-
- std::string::size_type arg_start=arguments.find_first_not_of( whitespace );
- while( arg_start!=std::string::npos ) {
- ++argc;
- std::string::size_type arg_stop;
- if( arguments[arg_start]!='"' ) {
- arg_stop=arguments.find_first_of( whitespace, arg_start );
- if( arg_stop==std::string::npos ) {
- arg_stop=arguments.size();
- }
- std::string::traits_type::copy( args_end, arguments.data()+arg_start, arg_stop-arg_start );
- args_end+=arg_stop-arg_start;
- }else {
- std::string::size_type cur=arg_start+1;
- while( true ) {
- std::string::size_type next=arguments.find( '"', cur );
- if( next==std::string::npos || arguments[next-1]!='\\' ) {
- if( next==std::string::npos ) {
- next=arguments.size();
- arg_stop=next;
- }else {
- arg_stop=next+1;
- }
- std::string::traits_type::copy( args_end, arguments.data()+cur, next-cur );
- args_end+=next-cur;
- break;
- }else {
- std::string::traits_type::copy( args_end, arguments.data()+cur, next-1-cur );
- args_end+=next-1-cur;
- *args_end++='"';
- cur=next+1;
- }
- }
- }
- *args_end++=0;
- arg_start=arguments.find_first_not_of( whitespace, arg_stop );
- }
-
- char ** argp_end=m_child_argp.new_data( argc+1 );
- char * args=m_child_args.data();
- while( args!=args_end ) {
- *argp_end=args;
- args+=std::string::traits_type::length( args )+1;
- ++argp_end;
- }
- *argp_end=0;
-}
-
-void exec_stream_t::impl_t::split_args( std::string const & program, exec_stream_t::next_arg_t & next_arg )
-{
- typedef std::vector< std::size_t > arg_sizes_t;
- arg_sizes_t arg_sizes;
-
- m_child_args.new_data( program.size()+1 );
- std::string::traits_type::copy( m_child_args.data(), program.c_str(), program.size()+1 );
- arg_sizes.push_back( program.size()+1 );
-
- while( std::string const * s=next_arg.next() ) {
- m_child_args.append_data( s->c_str(), s->size()+1 );
- arg_sizes.push_back( s->size()+1 );
- }
-
- char ** argp_end=m_child_argp.new_data( arg_sizes.size()+1 );
- char * argp=m_child_args.data();
- for( arg_sizes_t::iterator i=arg_sizes.begin(); i!=arg_sizes.end(); ++i ) {
- *argp_end=argp;
- argp+=*i;
- ++argp_end;
- }
- *argp_end=0;
-}
-
-void exec_stream_t::set_buffer_limit( int stream_kind, std::size_t size )
-{
- m_impl->m_thread.set_buffer_limit( stream_kind, size );
-}
-
-void exec_stream_t::set_wait_timeout( int stream_kind, timeout_t milliseconds )
-{
- m_impl->m_thread.set_wait_timeout( stream_kind, milliseconds );
- if( stream_kind&exec_stream_t::s_child ) {
- m_impl->m_child_timeout=milliseconds;
- }
-}
-
-void exec_stream_t::start( std::string const & program, std::string const & arguments )
-{
- if( !close() ) {
- throw exec_stream_t::error_t( "exec_stream_t::start: previous child process has not yet terminated" );
- }
-
- m_impl->split_args( program, arguments );
- m_impl->start( program );
-}
-
-void exec_stream_t::start( std::string const & program, exec_stream_t::next_arg_t & next_arg )
-{
- if( !close() ) {
- throw exec_stream_t::error_t( "exec_stream_t::start: previous child process has not yet terminated" );
- }
-
- m_impl->split_args( program, next_arg );
- m_impl->start( program );
-}
-
-void exec_stream_t::impl_t::start( std::string const & program )
-{
- m_in_pipe.open();
- m_out_pipe.open();
- m_err_pipe.open();
-
- pipe_t status_pipe;
- status_pipe.open();
-
- pid_t pid=fork();
- if( pid==-1 ) {
- throw os_error_t( "exec_stream_t::start: fork failed" );
- }else if( pid==0 ) {
- try {
- status_pipe.close_r();
- if( fcntl( status_pipe.w(), F_SETFD, FD_CLOEXEC )==-1 ) {
- throw os_error_t( "exec_stream_t::start: unable to fcnth( status_pipe, F_SETFD, FD_CLOEXEC ) in child process" );
- }
- m_in_pipe.close_w();
- m_out_pipe.close_r();
- m_err_pipe.close_r();
- if( ::close( 0 )==-1 ) {
- throw os_error_t( "exec_stream_t::start: unable to close( 0 ) in child process" );
- }
- if( fcntl( m_in_pipe.r(), F_DUPFD, 0 )==-1 ) {
- throw os_error_t( "exec_stream_t::start: unable to fcntl( .., F_DUPFD, 0 ) in child process" );
- }
- if( ::close( 1 )==-1 ) {
- throw os_error_t( "exec_stream_t::start: unable to close( 1 ) in child process" );
- }
- if( fcntl( m_out_pipe.w(), F_DUPFD, 1 )==-1 ) {
- throw os_error_t( "exec_stream_t::start: unable to fcntl( .., F_DUPFD, 1 ) in child process" );
- }
- if( ::close( 2 )==-1 ) {
- throw os_error_t( "exec_stream_t::start: unable to close( 2 ) in child process" );
- }
- if( fcntl( m_err_pipe.w(), F_DUPFD, 2 )==-1 ) {
- throw os_error_t( "exec_stream_t::start: unable to fcntl( .., F_DUPFD, 2 ) in child process" );
- }
- m_in_pipe.close_r();
- m_out_pipe.close_w();
- m_err_pipe.close_w();
- if( execvp( m_child_args.data(), m_child_argp.data() )==-1 ) {
- throw os_error_t( "exec_stream_t::start: exec in child process failed. "+program );
- }
- throw exec_stream_t::error_t( "exec_stream_t::start: exec in child process returned" );
- }catch( std::exception const & e ) {
- const char * msg=e.what();
- std::size_t len=strlen( msg );
- write( status_pipe.w(), &len, sizeof( len ) );
- write( status_pipe.w(), msg, len );
- _exit( -1 );
- }catch( ... ) {
- const char * msg="exec_stream_t::start: unknown exception in child process";
- std::size_t len=strlen( msg );
- write( status_pipe.w(), &len, sizeof( len ) );
- write( status_pipe.w(), msg, len );
- _exit( 1 );
- }
- }else {
- m_child_pid=pid;
- status_pipe.close_w();
- fd_set status_fds;
- FD_ZERO( &status_fds );
- FD_SET( status_pipe.r(), &status_fds );
- struct timeval timeout;
- timeout.tv_sec=3;
- timeout.tv_usec=0;
- if( select( status_pipe.r()+1, &status_fds, 0, 0, &timeout )==-1 ) {
- throw os_error_t( "exec_stream_t::start: select on status_pipe failed" );
- }
- if( !FD_ISSET( status_pipe.r(), &status_fds ) ) {
- throw os_error_t( "exec_stream_t::start: timeout while waiting for child to report via status_pipe" );
- }
- std::size_t status_len;
- int status_nread=read( status_pipe.r(), &status_len, sizeof( status_len ) );
- // when all ok, status_pipe is closed on child's exec, and nothing is written to it
- if( status_nread!=0 ) {
- // otherwize, check what went wrong.
- if( status_nread==-1 ) {
- throw os_error_t( "exec_stream_t::start: read from status pipe failed" );
- }else if( status_nread!=sizeof( status_len ) ) {
- throw os_error_t( "exec_stream_t::start: unable to read length of status message from status_pipe" );
- }
- std::string status_msg;
- if( status_len!=0 ) {
- buf_t< char > status_buf;
- status_buf.new_data( status_len );
- status_nread=read( status_pipe.r(), status_buf.data(), status_len );
- if( status_nread==-1 ) {
- throw os_error_t( "exec_stream_t::start: readof status message from status pipe failed" );
- }
- status_msg.assign( status_buf.data(), status_len );
- }
- throw exec_stream_t::error_t( "exec_stream_t::start: error in child process."+status_msg );
- }
- status_pipe.close_r();
-
- m_in_pipe.close_r();
- m_out_pipe.close_w();
- m_err_pipe.close_w();
-
- if( fcntl( m_in_pipe.w(), F_SETFL, O_NONBLOCK )==-1 ) {
- throw os_error_t( "exec_stream_t::start: fcntl( in_pipe, F_SETFL, O_NONBLOCK ) failed" );
- }
-
- m_in_buffer.clear();
- m_out_buffer.clear();
- m_err_buffer.clear();
-
- m_in.clear();
- m_out.clear();
- m_err.clear();
-
- m_thread.set_read_buffer_size( exec_stream_t::s_out, STREAM_BUFFER_SIZE );
- m_thread.set_read_buffer_size( exec_stream_t::s_err, STREAM_BUFFER_SIZE );
- m_thread.start();
- }
-}
-
-bool exec_stream_t::close_in()
-{
- m_impl->m_thread.close_in();
- return true;
-}
-
-bool exec_stream_t::close()
-{
- close_in();
- if( !m_impl->m_thread.stop_thread() ) {
- m_impl->m_thread.abort_thread();
- }
- m_impl->m_in_pipe.close();
- m_impl->m_out_pipe.close();
- m_impl->m_err_pipe.close();
-
- if( m_impl->m_child_pid!=-1 ) {
- pid_t code=waitpid( m_impl->m_child_pid, &m_impl->m_exit_code, WNOHANG );
- if( code==-1 ) {
- throw os_error_t( "exec_stream_t::close: first waitpid failed" );
- }else if( code==0 ) {
-
- struct timeval select_timeout;
- select_timeout.tv_sec=m_impl->m_child_timeout/1000;
- select_timeout.tv_usec=(m_impl->m_child_timeout%1000)*1000;
- if( (code=select( 0, 0, 0, 0, &select_timeout ))==-1 ) {
- throw os_error_t( "exec_stream_t::close: select failed" );
- }
-
- code=waitpid( m_impl->m_child_pid, &m_impl->m_exit_code, WNOHANG );
- if( code==-1 ) {
- throw os_error_t( "exec_stream_t::close: second waitpid failed" );
- }else if( code==0 ) {
- return false;
- }else {
- m_impl->m_child_pid=-1;
- return true;
- }
-
- }else {
- m_impl->m_child_pid=-1;
- return true;
- }
- }
- return true;
-}
-
-void exec_stream_t::kill()
-{
- if( m_impl->m_child_pid!=-1 ) {
- if( ::kill( m_impl->m_child_pid, SIGKILL )==-1 ) {
- throw os_error_t( "exec_stream_t::kill: kill failed" );
- }
- m_impl->m_child_pid=-1;
- m_impl->m_exit_code=0;
- }
-}
-
-int exec_stream_t::exit_code()
-{
- if( m_impl->m_child_pid!=-1 ) {
- throw exec_stream_t::error_t( "exec_stream_t::exit_code: child process still running" );
- }
- return WEXITSTATUS( m_impl->m_exit_code );
-}
-
-void exec_stream_t::set_binary_mode( int )
-{
-}
-
-void exec_stream_t::set_text_mode( int )
-{
-}
diff --git a/include/exec-stream/win/exec-stream-helpers.cpp b/include/exec-stream/win/exec-stream-helpers.cpp
deleted file mode 100644
index 17f0927..0000000
--- a/include/exec-stream/win/exec-stream-helpers.cpp
+++ /dev/null
@@ -1,727 +0,0 @@
-/*
-Copyright (C) 2004 Artem Khodush
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-// os_error_t
-os_error_t::os_error_t( std::string const & msg )
-{
- compose( msg, GetLastError() );
-}
-
-os_error_t::os_error_t( std::string const & msg, exec_stream_t::error_code_t code )
-{
- compose( msg, code );
-}
-
-void os_error_t::compose( std::string const & msg, exec_stream_t::error_code_t code )
-{
- std::string s( msg );
- s+='\n';
- LPVOID buf;
- if( FormatMessageA( FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
- 0,
- code,
- MAKELANGID( LANG_NEUTRAL, SUBLANG_DEFAULT ),
- (LPSTR) &buf,
- 0,
- 0
- )==0 ) {
- s+="[unable to retrieve error description]";
- }else {
- // FormatMessage may return \n-terminated string
- LPSTR str_buf=(LPSTR)buf;
- std::size_t buf_len=strlen( str_buf );
- while( buf_len>0 && str_buf[buf_len-1]=='\n' ) {
- --buf_len;
- str_buf[buf_len]=0;
- }
- s+=(LPSTR)buf;
- LocalFree( buf );
- }
- exec_stream_t::error_t::compose( s, code );
-}
-
-// pipe_t
-pipe_t::pipe_t()
-: m_direction( closed ), m_r( INVALID_HANDLE_VALUE ), m_w( INVALID_HANDLE_VALUE )
-{
- open();
-}
-
-pipe_t::~pipe_t()
-{
- close();
-}
-
-void pipe_t::close_r()
-{
- if( m_direction==both || m_direction==read ) {
- if( !CloseHandle( m_r ) ) {
- throw os_error_t( "pipe_t::close_r: CloseHandle failed" );
- }
- m_direction= m_direction==both ? write : closed;
- }
-}
-
-void pipe_t::close_w()
-{
- if( m_direction==both || m_direction==write ) {
- if( !CloseHandle( m_w ) ) {
- throw os_error_t( "pipe_t::close_w: CloseHandle failed" );
- }
- m_direction= m_direction==both ? read : closed;
- }
-}
-
-void pipe_t::close()
-{
- close_r();
- close_w();
-}
-
-void pipe_t::open()
-{
- close();
- SECURITY_ATTRIBUTES sa;
- sa.nLength=sizeof( sa );
- sa.bInheritHandle=true;
- sa.lpSecurityDescriptor=0;
- if( !CreatePipe( &m_r, &m_w, &sa, 0 ) )
- throw os_error_t( "pipe_t::pipe_t: CreatePipe failed" );
- m_direction=both;
-}
-
-HANDLE pipe_t::r() const
-{
- return m_r;
-}
-
-HANDLE pipe_t::w() const
-{
- return m_w;
-}
-
-// set_stdhandle_t
-set_stdhandle_t::set_stdhandle_t( DWORD kind, HANDLE handle )
-: m_kind( kind ), m_save_handle( GetStdHandle( kind ) )
-{
- if( m_save_handle==INVALID_HANDLE_VALUE )
- throw os_error_t( "set_stdhandle_t::set_stdhandle_t: GetStdHandle() failed" );
- if( !SetStdHandle( kind, handle ) )
- throw os_error_t( "set_stdhandle_t::set_stdhandle_t: SetStdHandle() failed" );
-}
-
-set_stdhandle_t::~set_stdhandle_t()
-{
- SetStdHandle( m_kind, m_save_handle );
-}
-
-//wait_result_t
-wait_result_t::wait_result_t()
-{
- m_signaled_object=INVALID_HANDLE_VALUE;
- m_timed_out=false;
- m_error_code=ERROR_SUCCESS;
- m_error_message="";
-}
-
-wait_result_t::wait_result_t( DWORD wait_result, int objects_count, HANDLE const * objects )
-{
- m_signaled_object=INVALID_HANDLE_VALUE;
- m_timed_out=false;
- m_error_code=ERROR_SUCCESS;
- m_error_message="";
- if( wait_result>=WAIT_OBJECT_0 && wait_result<WAIT_OBJECT_0+objects_count ) {
- m_signaled_object=objects[wait_result-WAIT_OBJECT_0];
- }else if( wait_result>=WAIT_ABANDONED_0 && wait_result<WAIT_ABANDONED_0+objects_count ) {
- m_error_message="wait_result_t: one of the wait objects was abandoned";
- }else if( wait_result==WAIT_TIMEOUT ) {
- m_timed_out=true;
- m_error_message="wait_result_t: timeout elapsed";
- }else if( wait_result==WAIT_FAILED ) {
- m_error_code=GetLastError();
- }else {
- m_error_message="wait_result_t: weird error: unrecognised WaitForMultipleObjects return value";
- m_error_code=wait_result;
- }
-}
-
-bool wait_result_t::ok()
-{
- return m_error_code==ERROR_SUCCESS && m_error_message[0]==0;
-}
-
-bool wait_result_t::is_signaled( event_t & event )
-{
- return m_signaled_object==event.m_handle;
-}
-
-bool wait_result_t::timed_out()
-{
- return m_timed_out;
-}
-
-DWORD wait_result_t::error_code()
-{
- return m_error_code;
-}
-
-char const * wait_result_t::error_message()
-{
- return m_error_message;
-}
-
-// event_t
-event_t::event_t()
-{
- m_handle=CreateEvent( 0, TRUE, FALSE, 0 );
- if( m_handle==0 ) {
- throw os_error_t( "event_t::event_t: create event failed" );
- }
-}
-
-event_t::~event_t()
-{
- CloseHandle( m_handle );
-}
-
-bool event_t::set()
-{
- return SetEvent( m_handle )!=0;
-}
-
-bool event_t::reset()
-{
- return ResetEvent( m_handle )!=0;
-}
-
-// wait functions
-wait_result_t wait( HANDLE h, DWORD timeout )
-{
- return wait_result_t( WaitForSingleObject( h, timeout ), 1, &h );
-}
-
-wait_result_t wait( event_t & e, DWORD timeout )
-{
- return wait_result_t( WaitForSingleObject( e.m_handle, timeout ), 1, &e.m_handle );
-}
-
-wait_result_t wait( event_t & e1, event_t & e2, DWORD timeout )
-{
- HANDLE h[2];
- h[0]=e1.m_handle;
- h[1]=e2.m_handle;
- return wait_result_t( WaitForMultipleObjects( 2, h, FALSE, timeout ), 2, h );
-}
-
-// mutex_t
-mutex_t::mutex_t()
-{
- m_handle=CreateMutex( 0, FALSE, 0 );
- if( m_handle==0 ) {
- throw os_error_t( "mutex_t::mutex_t: CreateMutex failed" );
- }
-}
-
-mutex_t::~mutex_t()
-{
- CloseHandle( m_handle );
-}
-
-// grab_mutex_t
-grab_mutex_t::grab_mutex_t( mutex_t & mutex, DWORD timeout )
-{
- m_mutex=mutex.m_handle;
- m_wait_result=wait( m_mutex, timeout );
-}
-
-grab_mutex_t::~grab_mutex_t()
-{
- if( m_wait_result.ok() ) {
- ReleaseMutex( m_mutex );
- }
-}
-
-bool grab_mutex_t::ok()
-{
- return m_wait_result.ok();
-}
-
-DWORD grab_mutex_t::error_code()
-{
- return m_wait_result.error_code();
-}
-
-char const * grab_mutex_t::error_message()
-{
- return m_wait_result.error_message();
-}
-
-// thread_buffer_t
-thread_buffer_t::thread_buffer_t()
-{
- m_direction=dir_none;
-
- m_message_prefix="";
- m_error_code=ERROR_SUCCESS;
- m_error_message="";
-
- m_wait_timeout=2000;
- m_buffer_limit=0;
- m_read_buffer_size=4096;
-
- m_thread=0;
-
- m_thread_termination_timeout=500;
- m_translate_crlf=true;
-}
-
-thread_buffer_t::~thread_buffer_t()
-{
- bool stopped=false;
- try {
- stopped=stop_thread();
- }catch(...){
- }
- if( !stopped ) {
- // one more time, with attitude
- try {
- stopped=abort_thread();
- }catch(...){
- }
- if( !stopped ) {
- GetLastError();
- // otherwize, the thread will be left running loose stomping on freed memory.
- std::terminate();
- }
- }
-}
-
-void thread_buffer_t::set_wait_timeout( DWORD milliseconds )
-{
- if( m_direction!=dir_none ) {
- throw exec_stream_t::error_t( "thread_buffer_t::set_wait_timeout: thread already started" );
- }
- m_wait_timeout=milliseconds;
-}
-
-// next three set values that are accessed in the same thread only, so they may be called anytime
-void thread_buffer_t::set_thread_termination_timeout( DWORD milliseconds ) {
- m_thread_termination_timeout=milliseconds;
-}
-
-void thread_buffer_t::set_binary_mode()
-{
- m_translate_crlf=false;
-}
-
-void thread_buffer_t::set_text_mode()
-{
- m_translate_crlf=true;
-}
-
-void thread_buffer_t::set_buffer_limit( std::size_t limit )
-{
- if( m_direction!=dir_none ) {
- throw exec_stream_t::error_t( "thread_buffer_t::set_buffer_limit: thread already started" );
- }
- m_buffer_limit=limit;
-}
-
-void thread_buffer_t::set_read_buffer_size( std::size_t size )
-{
- if( m_direction!=dir_none ) {
- throw exec_stream_t::error_t( "thread_buffer_t::set_read_buffer_size: thread already started" );
- }
- m_read_buffer_size=size;
-}
-
-void thread_buffer_t::start_reader_thread( HANDLE pipe )
-{
- start_thread( pipe, dir_read );
-}
-
-void thread_buffer_t::start_writer_thread( HANDLE pipe )
-{
- start_thread( pipe, dir_write );
-}
-
-void thread_buffer_t::start_thread( HANDLE pipe, direction_t direction )
-{
- if( m_direction!=dir_none ) {
- throw exec_stream_t::error_t( "thread_buffer_t::start_thread: thread already started" );
- }
- m_buffer_list.clear();
- m_pipe=pipe;
- if( !m_stop_thread.reset() ) {
- throw os_error_t( "thread_buffer_t::start_thread: unable to initialize m_stop_thread event" );
- }
- if( !m_got_data.reset() ) {
- throw os_error_t( "thread_buffer_t::start_thread: unable to initialize m_got_data event" );
- }
- if( !m_want_data.set() ) {
- throw os_error_t( "thread_buffer_t::start_thread: unable to initialize m_want_data event" );
- }
- DWORD thread_id;
- m_thread=CreateThread( 0, 0, direction==dir_read ? reader_thread : writer_thread, this, 0, &thread_id );
- if( m_thread==0 ) {
- throw os_error_t( "thread_buffer_t::start_thread: unable to start thread" );
- }
- m_direction= direction==dir_read ? dir_read : dir_write;
-}
-
-bool thread_buffer_t::check_thread_stopped()
-{
- wait_result_t wait_result=wait( m_thread, m_thread_termination_timeout );
- if( !wait_result.ok() && !wait_result.timed_out() ) {
- check_error( "thread_buffer_t::check_thread_stopped: wait for thread to finish failed", wait_result.error_code(), wait_result.error_message() );
- }
- if( wait_result.ok() ) {
- CloseHandle( m_thread );
- m_direction=dir_none;
- return true;
- }else {
- return false;
- }
-}
-
-bool thread_buffer_t::stop_thread()
-{
- if( m_direction!=dir_none ) {
- if( !m_stop_thread.set() ) {
- throw os_error_t( "thread_buffer_t::stop_thread: m_stop_thread.set() failed" );
- }
- bool res=check_thread_stopped();
- if( res ) {
- check_error( m_message_prefix, m_error_code, m_error_message );
- }
- return res;
- }
- return true;
-}
-
-bool thread_buffer_t::abort_thread()
-{
- if( m_direction!=dir_none ) {
- if( !TerminateThread( m_thread, 0 ) ) {
- throw os_error_t( "exec_steam_t::abort_thread: TerminateThread failed" );
- }
- return check_thread_stopped();
- }
- return true;
-}
-
-void thread_buffer_t::get( exec_stream_t::stream_kind_t, char * dst, std::size_t & size, bool & no_more )
-{
- if( m_direction!=dir_read ) {
- throw exec_stream_t::error_t( "thread_buffer_t::get: thread was not started or was started for writing" );
- }
- // check thread status
- DWORD thread_exit_code;
- if( !GetExitCodeThread( m_thread, &thread_exit_code ) ) {
- throw os_error_t( "thread_buffer_t::get: GetExitCodeThread failed" );
- }
-
- if( thread_exit_code!=STILL_ACTIVE ) {
- if( !m_buffer_list.empty() ) {
- // we have data - deliver it first
- // when thread terminated, there is no need to synchronize
- if( m_translate_crlf ) {
- m_buffer_list.get_translate_crlf( dst, size );
- }else {
- m_buffer_list.get( dst, size );
- }
- no_more=false;
- }else {
- // thread terminated and we have no more data to return - report errors, if any
- check_error( m_message_prefix, m_error_code, m_error_message );
- // if terminated without error - signal eof
- no_more=true;
- size=0;
- }
- }else {
- no_more=false;
- // thread still running - synchronize
- // wait for both m_got_data, m_mutex
- wait_result_t wait_result=wait( m_got_data, m_wait_timeout );
- if( !wait_result.ok() ) {
- check_error( "thread_buffer_t::get: wait for got_data failed", wait_result.error_code(), wait_result.error_message() );
- }
- grab_mutex_t grab_mutex( m_mutex, m_wait_timeout );
- if( !grab_mutex.ok() ) {
- check_error( "thread_buffer_t::get: wait for mutex failed", grab_mutex.error_code(), grab_mutex.error_message() );
- }
-
- if( m_translate_crlf ) {
- m_buffer_list.get_translate_crlf( dst, size );
- }else {
- m_buffer_list.get( dst, size );
- }
-
- // if buffer is not too long tell the thread we want more data
- if( !m_buffer_list.full( m_buffer_limit ) ) {
- if( !m_want_data.set() ) {
- throw os_error_t( "thread_buffer_t::get: unable to set m_want_data event" );
- }
- }
- // if no data left - make the next get() wait until it arrives
- if( m_buffer_list.empty() ) {
- if( !m_got_data.reset() ) {
- throw os_error_t( "thread_buffer_t::get: unable to reset m_got_data event" );
- }
- }
- }
-}
-
-DWORD WINAPI thread_buffer_t::reader_thread( LPVOID param )
-{
- thread_buffer_t * p=static_cast< thread_buffer_t * >( param );
- // accessing p anywhere here is safe because thread_buffer_t destructor
- // ensures the thread is terminated before p get destroyed
- char * read_buffer=0;
- try {
- read_buffer=new char[p->m_read_buffer_size];
-
- for (;;) {
- // see if get() wants more data, or if someone wants to stop the thread
- wait_result_t wait_result=wait( p->m_stop_thread, p->m_want_data, p->m_wait_timeout );
- if( !wait_result.ok() && !wait_result.timed_out() ) {
- p->note_thread_error( "thread_buffer_t::reader_thread: wait for want_data, destruction failed", wait_result.error_code(), wait_result.error_message() );
- break;
- }
-
- if( wait_result.is_signaled( p->m_stop_thread ) ) {
- // they want us to stop
- break;
- }
-
- if( wait_result.is_signaled( p->m_want_data ) ) {
- // they want more data - read the file
- DWORD read_size=0;
- DWORD read_status=ERROR_SUCCESS;
- if( !ReadFile( p->m_pipe, read_buffer, p->m_read_buffer_size, &read_size, 0 ) ) {
- read_status=GetLastError();
- if( read_status!=ERROR_BROKEN_PIPE ) {
- p->note_thread_error( "thread_buffer_t::reader_thread: ReadFile failed", read_status, "" );
- break;
- }
- }
-
- // read something - append to p->m_buffers
- if( read_size!=0 ) {
- grab_mutex_t grab_mutex( p->m_mutex, p->m_wait_timeout );
- if( !grab_mutex.ok() ) {
- p->note_thread_error( "thread_buffer_t::reader_thread: wait for mutex failed", grab_mutex.error_code(), grab_mutex.error_message() );
- break;
- }
-
- p->m_buffer_list.put( read_buffer, read_size );
-
- // if buffer is too long - do not read any more until it shrinks
- if( p->m_buffer_list.full( p->m_buffer_limit ) ) {
- if( !p->m_want_data.reset() ) {
- p->note_thread_error( "thread_buffer_t::reader_thread: unable to reset m_want_data event", GetLastError(), "" );
- break;
- }
- }
- // tell get() we got some data
- if( !p->m_got_data.set() ) {
- p->note_thread_error( "thread_buffer_t::reader_thread: unable to set m_got_data event", GetLastError(), "" );
- break;
- }
- }
- // pipe broken - quit thread, which will be seen by get() as eof.
- if( read_status==ERROR_BROKEN_PIPE ) {
- break;
- }
- }
- }
- }catch( ... ) {
- // might only be std::bad_alloc
- p->note_thread_error( "", ERROR_SUCCESS, "thread_buffer_t::reader_thread: unknown exception caught" );
- }
-
- delete[] read_buffer;
-
- // ensure that get() is not left waiting on got_data
- p->m_got_data.set();
- return 0;
-}
-
-void thread_buffer_t::put( char * const src, std::size_t & size, bool & no_more )
-{
- if( m_direction!=dir_write ) {
- throw exec_stream_t::error_t( "thread_buffer_t::put: thread not started or started for reading" );
- }
- // check thread status
- DWORD thread_exit_code;
- if( !GetExitCodeThread( m_thread, &thread_exit_code ) ) {
- throw os_error_t( "thread_buffer_t::get: GetExitCodeThread failed" );
- }
-
- if( thread_exit_code!=STILL_ACTIVE ) {
- // thread terminated - check for errors
- check_error( m_message_prefix, m_error_code, m_error_message );
- // if terminated without error - signal eof, since no one will ever write our data
- size=0;
- no_more=true;
- }else {
- // wait for both m_want_data and m_mutex
- wait_result_t wait_result=wait( m_want_data, m_wait_timeout );
- if( !wait_result.ok() ) {
- check_error( "thread_buffer_t::put: wait for want_data failed", wait_result.error_code(), wait_result.error_message() );
- }
- grab_mutex_t grab_mutex( m_mutex, m_wait_timeout );
- if( !grab_mutex.ok() ) {
- check_error( "thread_buffer_t::put: wait for mutex failed", grab_mutex.error_code(), grab_mutex.error_message() );
- }
-
- // got them - put data
- no_more=false;
- if( m_translate_crlf ) {
- m_buffer_list.put_translate_crlf( src, size );
- }else {
- m_buffer_list.put( src, size );
- }
-
- // if the buffer is too long - make the next put() wait until it shrinks
- if( m_buffer_list.full( m_buffer_limit ) ) {
- if( !m_want_data.reset() ) {
- throw os_error_t( "thread_buffer_t::put: unable to reset m_want_data event" );
- }
- }
- // tell the thread we got data
- if( !m_buffer_list.empty() ) {
- if( !m_got_data.set() ) {
- throw os_error_t( "thread_buffer_t::put: unable to set m_got_data event" );
- }
- }
- }
-}
-
-DWORD WINAPI thread_buffer_t::writer_thread( LPVOID param )
-{
- thread_buffer_t * p=static_cast< thread_buffer_t * >( param );
- // accessing p anywhere here is safe because thread_buffer_t destructor
- // ensures the thread is terminated before p get destroyed
- try {
- buffer_list_t::buffer_t buffer;
- buffer.data=0;
- buffer.size=0;
- std::size_t buffer_offset=0;
-
- for (;;) {
- // wait for got_data or destruction, ignore timeout errors
- // for destruction the timeout is normally expected,
- // for got data the timeout is not normally expected but tolerable (no one wants to write)
- wait_result_t wait_result=wait( p->m_got_data, p->m_stop_thread, p->m_wait_timeout );
-
- if( !wait_result.ok() && !wait_result.timed_out() ) {
- p->note_thread_error( "thread_buffer_t::writer_thread: wait for got_data, destruction failed", wait_result.error_code(), wait_result.error_message() );
- break;
- }
-
- // if no data in local buffer to write - get from p->m_buffers
- if( buffer.data==0 && wait_result.is_signaled( p->m_got_data ) ) {
- grab_mutex_t grab_mutex( p->m_mutex, p->m_wait_timeout );
- if( !grab_mutex.ok() ) {
- p->note_thread_error( "thread_buffer_t::writer_thread: wait for mutex failed", grab_mutex.error_code(), grab_mutex.error_message() );
- break;
- }
- if( !p->m_buffer_list.empty() ) {
- // we've got buffer - detach it
- buffer=p->m_buffer_list.detach();
- buffer_offset=0;
- }
- // if no data left in p->m_buffers - wait until it arrives
- if( p->m_buffer_list.empty() ) {
- if( !p->m_got_data.reset() ) {
- p->note_thread_error( "thread_buffer_t::writer_thread: unable to reset m_got_data event", GetLastError(), "" );
- break;
- }
- }
- // if buffer is not too long - tell put() it can proceed
- if( !p->m_buffer_list.full( p->m_buffer_limit ) ) {
- if( !p->m_want_data.set() ) {
- p->note_thread_error( "thread_buffer_t::writer_thread: unable to set m_want_data event", GetLastError(), "" );
- break;
- }
- }
- }
-
- // see if they want us to stop, but only when all is written
- if( buffer.data==0 && wait_result.is_signaled( p->m_stop_thread ) ) {
- break;
- }
-
- if( buffer.data!=0 ) {
- // we have buffer - write it
- DWORD written_size;
- if( !WriteFile( p->m_pipe, buffer.data+buffer_offset, buffer.size-buffer_offset, &written_size, 0 ) ) {
- p->note_thread_error( "thread_buffer_t::writer_thread: WriteFile failed", GetLastError(), "" );
- break;
- }
- buffer_offset+=written_size;
- if( buffer_offset==buffer.size ) {
- delete[] buffer.data;
- buffer.data=0;
- }
- }
-
- }
-
- // we won't be writing any more - close child's stdin
- CloseHandle( p->m_pipe );
-
- // buffer may be left astray - clean up
- delete[] buffer.data;
-
- }catch( ... ) {
- // unreachable code. really.
- p->note_thread_error( "", ERROR_SUCCESS, "thread_buffer_t::writer_thread: unknown exception caught" );
- }
- // ensure that put() is not left waiting on m_want_data
- p->m_want_data.set();
- return 0;
-}
-
-void thread_buffer_t::check_error( std::string const & message_prefix, DWORD error_code, std::string const & error_message )
-{
- if( !error_message.empty() ) {
- throw exec_stream_t::error_t( message_prefix+"\n"+error_message, error_code );
- }else if( error_code!=ERROR_SUCCESS ) {
- throw os_error_t( message_prefix, error_code );
- }
-}
-
-void thread_buffer_t::note_thread_error( char const * message_prefix, DWORD error_code, char const * error_message )
-{
- m_message_prefix=message_prefix;
- m_error_code=error_code;
- m_error_message=error_message;
-}
-
diff --git a/include/exec-stream/win/exec-stream-helpers.h b/include/exec-stream/win/exec-stream-helpers.h
deleted file mode 100644
index 3cd1759..0000000
--- a/include/exec-stream/win/exec-stream-helpers.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
-Copyright (C) 2004 Artem Khodush
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-class os_error_t : public exec_stream_t::error_t {
-public:
- os_error_t( std::string const & msg );
- os_error_t( std::string const & msg, exec_stream_t::error_code_t code );
-private:
- void compose( std::string const & msg, exec_stream_t::error_code_t code );
-};
-
-
-class pipe_t {
-public:
- pipe_t();
- ~pipe_t();
- HANDLE r() const;
- HANDLE w() const;
- void close_r();
- void close_w();
- void close();
- void open();
-private:
- enum direction_t{ closed, read, write, both };
- direction_t m_direction;
- HANDLE m_r;
- HANDLE m_w;
-};
-
-
-class set_stdhandle_t {
-public:
- set_stdhandle_t( DWORD kind, HANDLE handle );
- ~set_stdhandle_t();
-private:
- DWORD m_kind;
- HANDLE m_save_handle;
-};
-
-
-class wait_result_t {
-public:
- wait_result_t();
- wait_result_t( DWORD wait_result, int objects_count, HANDLE const * objects );
-
- bool ok();
- bool is_signaled( class event_t & event );
- bool timed_out();
- DWORD error_code();
- char const * error_message();
-
-private:
-
- HANDLE m_signaled_object;
- bool m_timed_out;
- DWORD m_error_code;
- char const * m_error_message;
-};
-
-
-class event_t {
-public:
- event_t();
- ~event_t();
- bool set();
- bool reset();
-
-private:
- HANDLE m_handle;
-
- friend wait_result_t wait( event_t & e, DWORD timeout );
- friend wait_result_t wait( event_t & e1, event_t & e2, DWORD timeout );
- friend class wait_result_t;
-};
-
-wait_result_t wait( HANDLE e, DWORD timeout );
-wait_result_t wait( event_t & e, DWORD timeout );
-wait_result_t wait( event_t & e1, event_t & e2, DWORD timeout ); // waits for any one of e1, e2
-
-class mutex_t {
-public:
- mutex_t();
- ~mutex_t();
-
-private:
- HANDLE m_handle;
- friend class grab_mutex_t;
-};
-
-class grab_mutex_t {
-public:
- grab_mutex_t( mutex_t & mutex, DWORD timeout );
- ~grab_mutex_t();
-
- bool ok();
- DWORD error_code();
- char const * error_message();
-
-private:
- HANDLE m_mutex;
- wait_result_t m_wait_result;
-};
-
-
-class thread_buffer_t {
-public:
- thread_buffer_t();
- ~thread_buffer_t();
-
- // those three may be called only before the thread is started
- void set_wait_timeout( DWORD milliseconds );
- void set_thread_termination_timeout( DWORD milliseconds );
- void set_buffer_limit( std::size_t limit );
- void set_read_buffer_size( std::size_t size );
- void set_binary_mode();
- void set_text_mode();
-
- void start_reader_thread( HANDLE pipe );
- void start_writer_thread( HANDLE pipe);
-
- void get( exec_stream_t::stream_kind_t kind, char * dst, std::size_t & size, bool & no_more ); // may be called only after start_reader_thread
- void put( char * const src, std::size_t & size, bool & no_more );// may be called only after start_writer_thread
-
- bool stop_thread();
- bool abort_thread();
-
-private:
- enum direction_t { dir_none, dir_read, dir_write };
- direction_t m_direction; // set by start_thread
-
- buffer_list_t m_buffer_list;
- mutex_t m_mutex; // protecting m_buffer_list
-
- char const * m_message_prefix; // error occured in the thread, if any
- DWORD m_error_code; // they are examined only after the thread has terminated
- char const * m_error_message; // so setting them anywhere in the thread is safe
-
- DWORD m_wait_timeout; // parameters used in thread
- std::size_t m_buffer_limit; // they are set before the thread is started,
- std::size_t m_read_buffer_size; // so accessing them anywhere in the thread is safe
-
- HANDLE m_thread;
- event_t m_want_data; // for synchronisation between get and reader_thread
- event_t m_got_data; // or between put and writer_thread
- event_t m_stop_thread;
- HANDLE m_pipe;
- DWORD m_thread_termination_timeout;
- bool m_translate_crlf;
-
- void start_thread( HANDLE pipe, direction_t direction );
- static DWORD WINAPI reader_thread( LPVOID param );
- static DWORD WINAPI writer_thread( LPVOID param );
-
- void check_error( std::string const & message_prefix, DWORD error_code, std::string const & error_message );
- void note_thread_error( char const * message_prefix, DWORD error_code, char const * error_message );
- bool check_thread_stopped();
-};
-
diff --git a/include/exec-stream/win/exec-stream-impl.cpp b/include/exec-stream/win/exec-stream-impl.cpp
deleted file mode 100644
index 3d7801f..0000000
--- a/include/exec-stream/win/exec-stream-impl.cpp
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
-Copyright (C) 2004 Artem Khodush
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-3. The name of the author may not be used to endorse or promote products
-derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
-EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-// exec_stream_t::impl_t
-struct exec_stream_t::impl_t {
- impl_t();
-
- HANDLE m_child_process;
-
- HANDLE m_in_pipe;
- HANDLE m_out_pipe;
- HANDLE m_err_pipe;
-
- thread_buffer_t m_in_thread;
- thread_buffer_t m_out_thread;
- thread_buffer_t m_err_thread;
-
- exec_stream_buffer_t m_in_buffer;
- exec_stream_buffer_t m_out_buffer;
- exec_stream_buffer_t m_err_buffer;
-
- exec_ostream_t m_in;
- exec_istream_t m_out;
- exec_istream_t m_err;
-
- DWORD m_child_timeout;
- int m_exit_code;
-};
-
-exec_stream_t::impl_t::impl_t()
-: m_in_buffer( exec_stream_t::s_in, m_in_thread ), m_out_buffer( exec_stream_t::s_out, m_out_thread ), m_err_buffer( exec_stream_t::s_err, m_err_thread ),
- m_in( m_in_buffer ), m_out( m_out_buffer ), m_err( m_err_buffer )
-{
- m_out.tie( &m_in );
- m_err.tie( &m_in );
- m_child_process=0;
- m_in_pipe=0;
- m_out_pipe=0;
- m_err_pipe=0;
- m_child_timeout=500;
- m_exit_code=0;
-}
-
-
-void exec_stream_t::set_buffer_limit( int stream_kind, std::size_t size )
-{
- if( stream_kind&s_in ) {
- m_impl->m_in_thread.set_buffer_limit( size );
- }
- if( stream_kind&s_out ) {
- m_impl->m_out_thread.set_buffer_limit( size );
- }
- if( stream_kind&s_err ) {
- m_impl->m_err_thread.set_buffer_limit( size );
- }
-}
-
-void exec_stream_t::set_wait_timeout( int stream_kind, exec_stream_t::timeout_t milliseconds )
-{
- if( stream_kind&s_in ) {
- m_impl->m_in_thread.set_wait_timeout( milliseconds );
- }
- if( stream_kind&s_out ) {
- m_impl->m_out_thread.set_wait_timeout( milliseconds );
- }
- if( stream_kind&s_err ) {
- m_impl->m_err_thread.set_wait_timeout( milliseconds );
- }
- if( stream_kind&s_child ) {
- m_impl->m_child_timeout=milliseconds;
- m_impl->m_in_thread.set_thread_termination_timeout( milliseconds );
- m_impl->m_out_thread.set_thread_termination_timeout( milliseconds );
- m_impl->m_err_thread.set_thread_termination_timeout( milliseconds );
- }
-}
-
-void exec_stream_t::set_binary_mode( int stream_kind )
-{
- if( stream_kind&s_in ) {
- m_impl->m_in_thread.set_binary_mode();
- }
- if( stream_kind&s_out ) {
- m_impl->m_out_thread.set_binary_mode();
- }
- if( stream_kind&s_err ) {
- m_impl->m_err_thread.set_binary_mode();
- }
-}
-
-void exec_stream_t::set_text_mode( int stream_kind )
-{
- if( stream_kind&s_in ) {
- m_impl->m_in_thread.set_text_mode();
- }
- if( stream_kind&s_out ) {
- m_impl->m_out_thread.set_text_mode();
- }
- if( stream_kind&s_err ) {
- m_impl->m_err_thread.set_text_mode();
- }
-}
-
-void exec_stream_t::start( std::string const & program, std::string const & arguments )
-{
- if( !close() ) {
- throw exec_stream_t::error_t( "exec_stream_t::start: previous child process has not yet terminated" );
- }
-
- pipe_t in;
- pipe_t out;
- pipe_t err;
- set_stdhandle_t set_in( STD_INPUT_HANDLE, in.r() );
- set_stdhandle_t set_out( STD_OUTPUT_HANDLE, out.w() );
- set_stdhandle_t set_err( STD_ERROR_HANDLE, err.w() );
- HANDLE cp=GetCurrentProcess();
- if( !DuplicateHandle( cp, in.w(), cp, &m_impl->m_in_pipe, 0, FALSE, DUPLICATE_SAME_ACCESS ) ) {
- throw os_error_t( "exec_stream_t::start: unable to duplicate in handle" );
- }
- in.close_w();
- if( !DuplicateHandle( cp, out.r(), cp, &m_impl->m_out_pipe, 0, FALSE, DUPLICATE_SAME_ACCESS ) ) {
- throw os_error_t( "exec_stream_t::start: unable to duplicate out handle" );
- }
- out.close_r();
- if( !DuplicateHandle( cp, err.r(), cp, &m_impl->m_err_pipe, 0, FALSE, DUPLICATE_SAME_ACCESS ) ) {
- throw os_error_t( "exec_stream_t::start: unable to duplicate err handle" );
- }
- err.close_r();
-
- std::string command;
- command.reserve( program.size()+arguments.size()+3 );
- if( program.find_first_of( " \t" )!=std::string::npos ) {
- command+='"';
- command+=program;
- command+='"';
- }else
- command=program;
- if( arguments.size()!=0 ) {
- command+=' ';
- command+=arguments;
- }
- STARTUPINFOA si;
- ZeroMemory( &si, sizeof( si ) );
- si.cb=sizeof( si );
- PROCESS_INFORMATION pi;
- ZeroMemory( &pi, sizeof( pi ) );
- if( !CreateProcessA( 0, const_cast< char * >( command.c_str() ), 0, 0, TRUE, 0, 0, 0, &si, &pi ) ) {
- throw os_error_t( "exec_stream_t::start: CreateProcess failed.\n command line was: "+command );
- }
-
- m_impl->m_child_process=pi.hProcess;
-
- m_impl->m_in_buffer.clear();
- m_impl->m_out_buffer.clear();
- m_impl->m_err_buffer.clear();
-
- m_impl->m_in.clear();
- m_impl->m_out.clear();
- m_impl->m_err.clear();
-
- m_impl->m_out_thread.set_read_buffer_size( STREAM_BUFFER_SIZE );
- m_impl->m_out_thread.start_reader_thread( m_impl->m_out_pipe );
-
- m_impl->m_err_thread.set_read_buffer_size( STREAM_BUFFER_SIZE );
- m_impl->m_err_thread.start_reader_thread( m_impl->m_err_pipe );
-
- m_impl->m_in_thread.start_writer_thread( m_impl->m_in_pipe );
-}
-
-void exec_stream_t::start( std::string const & program, exec_stream_t::next_arg_t & next_arg )
-{
- std::string arguments;
- while( std::string const * arg=next_arg.next() ) {
- if( arg->find_first_of( " \t\"" )!=std::string::npos ) {
- arguments+=" \"";
- std::string::size_type cur=0;
- while( cur<arg->size() ) {
- std::string::size_type next=arg->find( '"', cur );
- if( next==std::string::npos ) {
- next=arg->size();
- arguments.append( *arg, cur, next-cur );
- cur=next;
- }else {
- arguments.append( *arg, cur, next-cur );
- arguments+="\\\"";
- cur=next+1;
- }
- }
- arguments+="\"";
- }else {
- arguments+=" "+*arg;
- }
- }
- start( program, arguments );
-}
-
-bool exec_stream_t::close_in()
-{
- if( m_impl->m_in_pipe!=0 ) {
- m_impl->m_in.flush();
- // stop writer thread before closing the handle it writes to,
- // the thread will attempt to write anything it can and close child's stdin
- // before thread_termination_timeout elapses
- if( m_impl->m_in_thread.stop_thread() ) {
- m_impl->m_in_pipe=0;
- return true;
- }else {
- return false;
- }
- }else {
- return true;
- }
-}
-
-bool exec_stream_t::close()
-{
- if( !close_in() ) {
- // need to close child's stdin no matter what, because otherwise "usual" child will run forever
- // And before closing child's stdin the writer thread should be stopped no matter what,
- // because it may be blocked on Write to m_in_pipe, and in that case closing m_in_pipe may block.
- if( !m_impl->m_in_thread.abort_thread() ) {
- throw exec_stream_t::error_t( "exec_stream_t::close: waiting till in_thread stops exceeded timeout" );
- }
- // when thread is terminated abnormally, it may left child's stdin open
- // try to close it here
- CloseHandle( m_impl->m_in_pipe );
- m_impl->m_in_pipe=0;
- }
- if( !m_impl->m_out_thread.stop_thread() ) {
- if( !m_impl->m_out_thread.abort_thread() ) {
- throw exec_stream_t::error_t( "exec_stream_t::close: waiting till out_thread stops exceeded timeout" );
- }
- }
- if( !m_impl->m_err_thread.stop_thread() ) {
- if( !m_impl->m_err_thread.abort_thread() ) {
- throw exec_stream_t::error_t( "exec_stream_t::close: waiting till err_thread stops exceeded timeout" );
- }
- }
- if( m_impl->m_out_pipe!=0 ) {
- if( !CloseHandle( m_impl->m_out_pipe ) ) {
- throw os_error_t( "exec_stream_t::close: unable to close out_pipe handle" );
- }
- m_impl->m_out_pipe=0;
- }
- if( m_impl->m_err_pipe!=0 ) {
- if( !CloseHandle( m_impl->m_err_pipe ) ) {
- throw os_error_t( "exec_stream_t::close: unable to close err_pipe handle" );
- }
- m_impl->m_err_pipe=0;
- }
- if( m_impl->m_child_process!=0 ) {
- wait_result_t wait_result=wait( m_impl->m_child_process, m_impl->m_child_timeout );
- if( !wait_result.ok() & !wait_result.timed_out() ) {
- throw os_error_t( std::string( "exec_stream_t::close: wait for child process failed. " )+wait_result.error_message() );
- }
- if( wait_result.ok() ) {
- DWORD exit_code;
- if( !GetExitCodeProcess( m_impl->m_child_process, &exit_code ) ) {
- throw os_error_t( "exec_stream_t::close: unable to get process exit code" );
- }
- m_impl->m_exit_code=exit_code;
- if( !CloseHandle( m_impl->m_child_process ) ) {
- throw os_error_t( "exec_stream_t::close: unable to close child process handle" );
- }
- m_impl->m_child_process=0;
- }
- }
- return m_impl->m_child_process==0;
-}
-
-void exec_stream_t::kill()
-{
- if( m_impl->m_child_process!=0 ) {
- if( !TerminateProcess( m_impl->m_child_process, 0 ) ) {
- throw os_error_t( "exec_stream_t::kill: unable to terminate child process" );
- }
- m_impl->m_exit_code=0;
- if( !CloseHandle( m_impl->m_child_process ) ) {
- throw os_error_t( "exec_stream_t::close: unable to close child process handle" );
- }
- m_impl->m_child_process=0;
- }
-}
-
-int exec_stream_t::exit_code()
-{
- if( m_impl->m_child_process!=0 ) {
- throw exec_stream_t::error_t( "exec_stream_t:exit_code: child process still running" );
- }
- return m_impl->m_exit_code;
-}
diff --git a/include/posix/popen_plus.c b/include/posix/popen_plus.c
new file mode 100644
index 0000000..f02fba9
--- /dev/null
+++ b/include/posix/popen_plus.c
@@ -0,0 +1,183 @@
+/*
+ ** Author: Hamid Alipour http://codingrecipes.com http://twitter.com/code_head
+ ** SQLite style license:
+ **
+ ** 2001 September 15
+ **
+ ** The author disclaims copyright to this source code. In place of
+ ** a legal notice, here is a blessing:
+ **
+ ** May you do good and not evil.
+ ** May you find forgiveness for yourself and forgive others.
+ ** May you share freely, never taking more than you give.
+ **/
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <paths.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <string.h>
+#include "popen_plus.h"
+
+popen_plus_process *popen_plus(const char *command)
+{
+ int inpipe[2];
+ int outpipe[2];
+ char *argv[4];
+ popen_plus_process *process = (popen_plus_process*)malloc(sizeof(popen_plus_process));
+
+ if (!process)
+ goto error_out;
+
+ if (pipe(inpipe) != 0)
+ goto clean_process_out;
+
+ if (pipe(outpipe) != 0)
+ goto clean_inpipe_out;
+
+ process->read_fp = fdopen(outpipe[READ], "r");
+ if (!process->read_fp)
+ goto clean_outpipe_out;
+
+ process->write_fp = fdopen(inpipe[WRITE], "w");
+ if (!process->write_fp)
+ goto clean_read_fp_out;
+
+ if (pthread_mutex_init(&process->mutex, NULL) != 0)
+ goto clean_write_fp_out;
+
+ process->pid = fork();
+ if (process->pid == -1)
+ goto clean_mutex_out;
+
+ if (process->pid == 0) {
+ close(outpipe[READ]);
+ close(inpipe[WRITE]);
+
+ if (inpipe[READ] != STDIN_FILENO) {
+ dup2(inpipe[READ], STDIN_FILENO);
+ close(inpipe[READ]);
+ }
+
+ if (outpipe[WRITE] != STDOUT_FILENO) {
+ dup2(outpipe[WRITE], STDOUT_FILENO);
+ close(outpipe[WRITE]);
+ }
+
+ argv[0] = "sh";
+ argv[1] = "-c";
+ argv[2] = (char *) command;
+ argv[3] = NULL;
+
+ execv(_PATH_BSHELL, argv);
+ exit(127);
+ }
+
+ close(outpipe[WRITE]);
+ close(inpipe[READ]);
+
+ return process;
+
+clean_mutex_out:
+ pthread_mutex_destroy(&process->mutex);
+
+clean_write_fp_out:
+ fclose(process->write_fp);
+
+clean_read_fp_out:
+ fclose(process->read_fp);
+
+clean_outpipe_out:
+ close(outpipe[READ]);
+ close(outpipe[WRITE]);
+
+clean_inpipe_out:
+ close(inpipe[READ]);
+ close(inpipe[WRITE]);
+
+clean_process_out:
+ free(process);
+
+error_out:
+ return NULL;
+}
+
+int popen_plus_close(popen_plus_process *process)
+{
+ int pstat = 0;
+ pid_t pid = 0;
+
+ /**
+ * If someone else destrys this mutex, then this call will fail and we know
+ * that another thread already cleaned up the process so we can safely return
+ * and since we are destroying this mutex bellow then we don't need to unlock
+ * it...
+ */
+ if (pthread_mutex_lock(&process->mutex) != 0)
+ return 0;
+
+ if (process->pid != -1) {
+ do {
+ pid = waitpid(process->pid, &pstat, 0);
+ } while (pid == -1 && errno == EINTR);
+ }
+
+ if (process->read_fp)
+ fclose(process->read_fp);
+
+ if (process->write_fp)
+ fclose(process->write_fp);
+
+ pthread_mutex_destroy(&process->mutex);
+
+ free(process);
+
+ return (pid == -1 ? -1 : pstat);
+}
+
+int popen_plus_kill(popen_plus_process *process)
+{
+ char command[64];
+
+ sprintf(command, "kill -9 %d", process->pid);
+ system(command);
+
+ return 0;
+}
+
+int popen_plus_kill_by_id(int process_id)
+{
+ char command[64];
+
+ sprintf(command, "kill -9 %d", process_id);
+ system(command);
+
+ return 0;
+}
+
+int popen_plus_terminate(popen_plus_process *process)
+{
+ char command[64];
+
+ sprintf(command, "kill -TERM %d", process->pid);
+ system(command);
+
+ return 0;
+}
+
+int popen_plus_terminate_with_id(int process_id)
+{
+ char command[64];
+
+ sprintf(command, "kill -TERM %d", process_id);
+ system(command);
+
+ return 0;
+}
diff --git a/include/posix/popen_plus.h b/include/posix/popen_plus.h
new file mode 100644
index 0000000..ee4b3c8
--- /dev/null
+++ b/include/posix/popen_plus.h
@@ -0,0 +1,56 @@
+/*
+ ** Author: Hamid Alipour http://codingrecipes.com http://twitter.com/code_head
+ ** SQLite style license:
+ **
+ ** 2001 September 15
+ **
+ ** The author disclaims copyright to this source code. In place of
+ ** a legal notice, here is a blessing:
+ **
+ ** May you do good and not evil.
+ ** May you find forgiveness for yourself and forgive others.
+ ** May you share freely, never taking more than you give.
+ **/
+
+#pragma once
+#ifndef POPEN_PLUS_H_f28c53c53a48d38efafee7fb7004a01faaac9e22
+#define POPEN_PLUS_H_f28c53c53a48d38efafee7fb7004a01faaac9e22
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <paths.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+
+#define READ 0
+#define WRITE 1
+
+typedef struct {
+ pthread_mutex_t mutex;
+ pid_t pid;
+ FILE *read_fp;
+ FILE *write_fp;
+} popen_plus_process;
+
+popen_plus_process *popen_plus(const char *command);
+int popen_plus_close(popen_plus_process *process);
+int popen_plus_kill(popen_plus_process *process);
+int popen_plus_kill_by_id(int process_id);
+int popen_plus_terminate(popen_plus_process *process);
+int popen_plus_terminate_with_id(int process_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/win32/getopt.c b/include/win32/getopt.c
similarity index 100%
rename from win32/getopt.c
rename to include/win32/getopt.c
diff --git a/win32/getopt.h b/include/win32/getopt.h
similarity index 100%
rename from win32/getopt.h
rename to include/win32/getopt.h
diff --git a/manual/cgkeywords.xml b/manual/cgkeywords.xml
index 1bc7bce..9de725e 100644
--- a/manual/cgkeywords.xml
+++ b/manual/cgkeywords.xml
@@ -443,6 +443,19 @@
</para>
</section>
+ <section id="keyword-options">
+ <title>OPTIONS</title>
+ <indexterm>
+ <primary>OPTIONS</primary>
+ </indexterm>
+ <para>
+ Global options that affect the grammar parsing.
+ </para>
+ <screen>
+ OPTIONS += no-inline-sets ;
+ </screen>
+ </section>
+
<section id="keyword-preferred-targets">
<title>PREFERRED-TARGETS</title>
<indexterm>
@@ -679,6 +692,19 @@
</screen>
</section>
+ <section id="keyword-strict-tags">
+ <title>STRICT-TAGS</title>
+ <indexterm>
+ <primary>STRICT-TAGS</primary>
+ </indexterm>
+ <para>
+ A whitelist of allowed tags.
+ </para>
+ <screen>
+ STRICT-TAGS += N V ADJ ;
+ </screen>
+ </section>
+
<section id="keyword-substitute">
<title>SUBSTITUTE</title>
<indexterm>
diff --git a/manual/cmdreference.xml b/manual/cmdreference.xml
index 45d65b3..90485be 100644
--- a/manual/cmdreference.xml
+++ b/manual/cmdreference.xml
@@ -17,62 +17,63 @@
Usage: vislcg3 [OPTIONS]
Options:
- -h, --help shows this help
- -?, --? shows this help
- -V, --version prints copyright and version information
- -g, --grammar specifies the grammar file to use for disambiguation
- --grammar-out writes the compiled grammar in textual form to a file
- --grammar-bin writes the compiled grammar in binary form to a file
- --grammar-only only compiles the grammar; implies --verbose
- --ordered allows multiple identical tags (will in future allow full ordered matching)
- -u, --unsafe allows the removal of all readings in a cohort, even the last one
- -s, --sections number or ranges of sections to run; defaults to all sections
- --rules number or ranges of rules to run; defaults to all rules
- --rule a name or number of a single rule to run
- -d, --debug enables debug output (very noisy)
- -v, --verbose increases verbosity
- -2, --vislcg-compat enables compatibility mode for older CG-2 and vislcg grammars
- -I, --stdin file to print output to instead of stdout
- -O, --stdout file to read input from instead of stdin
- -E, --stderr file to print errors to instead of stderr
- -C, --codepage-all codepage to use for grammar, input, and output streams; defaults to environment settings
- --codepage-grammar codepage to use for grammar; overrides --codepage-all
- --codepage-input codepage to use for input; overrides --codepage-all
- --codepage-output codepage to use for output and errors; overrides --codepage-all
- -L, --locale-all locale to use for grammar, input, and output streams; defaults to en_US_POSIX
- --locale-grammar locale to use for grammar; overrides --locale-all
- --locale-input locale to use for input; overrides --locale-all
- --locale-output locale to use for output and errors; overrides --locale-all
- --no-mappings disables all MAP, ADD, and REPLACE rules
- --no-corrections disables all SUBSTITUTE and APPEND rules
- --no-before-sections disables all rules in BEFORE-SECTIONS parts
- --no-sections disables all rules in SECTION parts
- --no-after-sections disables all rules in AFTER-SECTIONS parts
- -t, --trace prints debug output alongside with normal output
- --trace-name-only if a rule is named, omit the line number; implies --trace
- --trace-no-removed does not print removed readings; implies --trace
- --trace-encl traces which enclosure pass is currently happening; implies --trace
- --single-run runs each section only once; same as --max-runs 1
- --max-runs runs each section max N times; defaults to unlimited (0)
- -S, --statistics gathers profiling statistics while applying grammar
- -Z, --optimize-unsafe destructively optimize the profiled grammar to be faster
- -z, --optimize-safe conservatively optimize the profiled grammar to be faster
- -p, --prefix sets the mapping prefix; defaults to @
- --unicode-tags outputs Unicode code points for things like ->
- --num-windows number of windows to keep in before/ahead buffers; defaults to 2
- --always-span forces scanning tests to always span across window boundaries
- --soft-limit number of cohorts after which the SOFT-DELIMITERS kick in; defaults to 300
- --hard-limit number of cohorts after which the window is forcefully cut; defaults to 500
- -D, --dep-delimit delimit windows based on dependency instead of DELIMITERS; defaults to 10
- --dep-original outputs the original input dependency tag even if it is no longer valid
- --dep-allow-loops allows the creation of circular dependencies
- --dep-no-crossing prevents the creation of dependencies that would result in crossing branches
- --no-magic-readings prevents running rules on magic readings
- -o, --no-pass-origin prevents scanning tests from passing the point of origin
- -e, --show-end-tags allows the <<< tags to appear in output
- --show-unused-sets prints a list of unused sets and their line numbers; implies --grammar-only
- --show-tag-hashes prints a list of tags and their hashes as they are parsed during the run
- --show-set-hashes prints a list of sets and their hashes; implies --grammar-only
+ -h, --help shows this help
+ -?, --? shows this help
+ -V, --version prints copyright and version information
+ --min-binary-revision prints the minimum usable binary grammar revision
+ -g, --grammar specifies the grammar file to use for disambiguation
+ --grammar-out writes the compiled grammar in textual form to a file
+ --grammar-bin writes the compiled grammar in binary form to a file
+ --grammar-only only compiles the grammar; implies --verbose
+ --ordered (will in future allow full ordered matching)
+ -u, --unsafe allows the removal of all readings in a cohort, even the last one
+ -s, --sections number or ranges of sections to run; defaults to all sections
+ --rules number or ranges of rules to run; defaults to all rules
+ --rule a name or number of a single rule to run
+ -d, --debug enables debug output (very noisy)
+ -v, --verbose increases verbosity
+ --quiet squelches warnings (same as -v 0)
+ -2, --vislcg-compat enables compatibility mode for older CG-2 and vislcg grammars
+ -I, --stdin file to read input from instead of stdin
+ -O, --stdout file to print output to instead of stdout
+ -E, --stderr file to print errors to instead of stderr
+ -C, --codepage-all codepage to use for grammar, input, and output streams; defaults to UTF-8
+ --codepage-grammar codepage to use for grammar; overrides --codepage-all
+ --codepage-input codepage to use for input; overrides --codepage-all
+ --codepage-output codepage to use for output and errors; overrides --codepage-all
+ --no-mappings disables all MAP, ADD, and REPLACE rules
+ --no-corrections disables all SUBSTITUTE and APPEND rules
+ --no-before-sections disables all rules in BEFORE-SECTIONS parts
+ --no-sections disables all rules in SECTION parts
+ --no-after-sections disables all rules in AFTER-SECTIONS parts
+ -t, --trace prints debug output alongside with normal output
+ --trace-name-only if a rule is named, omit the line number; implies --trace
+ --trace-no-removed does not print removed readings; implies --trace
+ --trace-encl traces which enclosure pass is currently happening; implies --trace
+ --dry-run make no actual changes to the input
+ --single-run runs each section only once; same as --max-runs 1
+ --max-runs runs each section max N times; defaults to unlimited (0)
+ -S, --statistics gathers profiling statistics while applying grammar
+ -Z, --optimize-unsafe destructively optimize the profiled grammar to be faster
+ -z, --optimize-safe conservatively optimize the profiled grammar to be faster
+ -p, --prefix sets the mapping prefix; defaults to @
+ --unicode-tags outputs Unicode code points for things like ->
+ --unique-tags outputs unique tags only once per reading
+ --num-windows number of windows to keep in before/ahead buffers; defaults to 2
+ --always-span forces scanning tests to always span across window boundaries
+ --soft-limit number of cohorts after which the SOFT-DELIMITERS kick in; defaults to 300
+ --hard-limit number of cohorts after which the window is forcefully cut; defaults to 500
+ -D, --dep-delimit delimit windows based on dependency instead of DELIMITERS; defaults to 10
+ --dep-original outputs the original input dependency tag even if it is no longer valid
+ --dep-allow-loops allows the creation of circular dependencies
+ --dep-no-crossing prevents the creation of dependencies that would result in crossing branches
+ --no-magic-readings prevents running rules on magic readings
+ -o, --no-pass-origin prevents scanning tests from passing the point of origin
+ -e, --show-end-tags allows the <<< tags to appear in output
+ --show-unused-sets prints a list of unused sets and their line numbers; implies --grammar-only
+ --show-tags prints a list of unique tags; implies --grammar-only
+ --show-tag-hashes prints a list of tags and their hashes as they are parsed during the run
+ --show-set-hashes prints a list of sets and their hashes; implies --grammar-only
</screen>
</section>
@@ -102,6 +103,9 @@ Options:
-A, --out-apertium sets output format to Apertium
-N, --out-niceline sets output format to Niceline CG
-P, --out-plain sets output format to plain text
+ -W, --wfactor FST weight factor (defaults to 100.0)
+ --wtag FST weight tag prefix (defaults to W)
+ -S, --sub-delim FST sub-reading delimiters (defaults to #)
-r, --rtl sets sub-reading direction to RTL (default)
-l, --ltr sets sub-reading direction to LTR
</screen>
diff --git a/manual/contexts.xml b/manual/contexts.xml
index d630d25..b4f1e78 100644
--- a/manual/contexts.xml
+++ b/manual/contexts.xml
@@ -271,6 +271,23 @@
</screen>
</section>
+ <section id="test-branch">
+ <title>Optional Frequencies</title>
+ <indexterm>
+ <primary>f</primary>
+ </indexterm>
+ <para>
+ Position modifer 'f' creates two branches based on the current test. In the first, the test remains exactly as
+ is written. In the second, all <link linkend="numerical-matches">numeric tags</link> are removed and
+ modifier 'C' is added. This is equivalent to making an inline template with OR'ed tests.
+ </para>
+ <para>
+ E.g., test <code>(-1*f (N <W>50>))</code> is equivalent to <code>(-1* (N <W>50>)) OR (-1*C (N))</code>.
+ This is all done at compile time.
+ The numeric tag removal will dig down through the whole target set and create new sets along the way as needed.
+ </para>
+ </section>
+
<section id="test-dependency">
<title>Dependencies</title>
<para>
diff --git a/manual/grammar.xml b/manual/grammar.xml
index 3807d81..65f0020 100644
--- a/manual/grammar.xml
+++ b/manual/grammar.xml
@@ -4,6 +4,80 @@
<chapter id="grammar">
<title>Grammar</title>
+ <section id="reopen-mappings">
+ <title>REOPEN-MAPPINGS</title>
+ <para>
+ A list of mapping tags that ADD/MAP/REPLACE should be able to operate on even though they were present on readings
+ in the input stream.
+ </para>
+ <screen>
+ REOPEN-MAPPINGS = @a @b @c ;
+ </screen>
+ </section>
+
+ <section id="grammar-options">
+ <title>OPTIONS</title>
+ <para>
+ You can affect how the grammar should be parsed with <code>OPTIONS += ... ;</code>.
+ Currently options can only be added, hence <code>+=</code>, but removing and assignment can be implemented if needed.
+ </para>
+ <screen>
+ OPTIONS += no-inline-templates ;
+ </screen>
+
+ <section id="grammar-options-noisets">
+ <title>no-inline-sets</title>
+ <indexterm>
+ <primary>no-inline-sets</primary>
+ </indexterm>
+ <para>
+ Disallows the use of inline sets in most places. They're still allowed in places that CG-2 did not consider sets,
+ such as MAP, ADD, REPLACE, and ADDCOHORT tag lists, and in the context of a SET definition.
+ Also, the special set (*) remains valid.
+ </para>
+ </section>
+
+ <section id="grammar-options-noitmpls">
+ <title>no-inline-templates</title>
+ <indexterm>
+ <primary>no-inline-templates</primary>
+ </indexterm>
+ <para>
+ Disallows the use of inline templates in most places. They're still allowed in the context of a TEMPLATE definition.
+ </para>
+ </section>
+
+ <section id="grammar-options-strict-wforms">
+ <title>strict-wordforms</title>
+ <indexterm>
+ <primary>strict-wordforms</primary>
+ </indexterm>
+ <para>
+ Instructs STRICT-TAGS to forbid all wordform tags (<code>"<…>"</code>) by default.
+ </para>
+ </section>
+
+ <section id="grammar-options-strict-bforms">
+ <title>strict-baseforms</title>
+ <indexterm>
+ <primary>strict-baseforms</primary>
+ </indexterm>
+ <para>
+ Instructs STRICT-TAGS to forbid all baseform tags (<code>"…"</code>) by default.
+ </para>
+ </section>
+
+ <section id="grammar-options-strict-second">
+ <title>strict-secondary</title>
+ <indexterm>
+ <primary>strict-secondary</primary>
+ </indexterm>
+ <para>
+ Instructs STRICT-TAGS to forbid all secondary tags (<code><…></code>) by default.
+ </para>
+ </section>
+ </section>
+
<section id="grammar-include">
<title>INCLUDE</title>
<para>
diff --git a/manual/sets.xml b/manual/sets.xml
index 4323071..ead7603 100644
--- a/manual/sets.xml
+++ b/manual/sets.xml
@@ -15,7 +15,7 @@
<screen>
LIST a = a b c d ;
LIST b = c d e f ;
-
+
# Logically yields a set containing tags: a b c d e f
# Practically a reading must match either set
SET r = a OR b ;
@@ -31,7 +31,7 @@
<screen>
LIST a = a b c d ;
LIST b = c d e f ;
-
+
# Logically yields a set containing tags: a b !c !d !e !f
# Practically a reading must match the first set and must not match the second set
SET r = a - b ;
@@ -47,7 +47,7 @@
<screen>
LIST a = a b c d ;
LIST b = c d e f ;
-
+
# Logically yields a set containing tags: a b e f
SET r = a ∆ b ;
</screen>
@@ -62,7 +62,7 @@
<screen>
LIST a = a b c d ;
LIST b = c d e f ;
-
+
# Logically yields a set containing tags: c d
SET r = a ∩ b ;
</screen>
@@ -76,7 +76,7 @@
<screen>
LIST a = a b c d ;
LIST b = c d e f ;
-
+
# Logically yields a set containing tags: (a c) (b c) c (d c) (a d) (b d) d (a e)
# (b e) (c e) (d e) (a f) (b f) (c f) (d f)
# Practically a reading must match both sets
@@ -111,7 +111,7 @@
SELECT (*) - NotTheseTags ;
</screen>
</section>
-
+
<section id="set-delimiters">
<title>_S_DELIMITERS_</title>
<para>
@@ -123,7 +123,7 @@
SET SomeSet = OtherSet OR _S_DELIMITERS_ ;
</screen>
</section>
-
+
<section id="set-soft-delimiters">
<title>_S_SOFT_DELIMITERS_</title>
<para>
@@ -134,7 +134,7 @@
(**1 _S_SOFT_DELIMITERS_ BARRIER BoogieSet)
</screen>
</section>
-
+
<section id="set-target">
<title>Magic Set _TARGET_</title>
<para>
@@ -142,7 +142,7 @@
This set and tag will only match when the currently active cohort is the target of the rule.
</para>
</section>
-
+
<section id="set-mark">
<title>Magic Set _MARK_</title>
<para>
@@ -151,7 +151,7 @@
or if no such mark is set it will only match the target of the rule.
</para>
</section>
-
+
<section id="set-attachto">
<title>Magic Set _ATTACHTO_</title>
<para>
@@ -159,6 +159,15 @@
This set and tag will only match when the currently active cohort is the mark set with A.
</para>
</section>
+
+ <section id="set-same-basic">
+ <title>Magic Set _SAME_BASIC_</title>
+ <para>
+ A magic set containing the single tag (_SAME_BASIC_).
+ This set and tag will only match when the currently active reading has the same basic tags (non-mapping tags)
+ as the target reading.
+ </para>
+ </section>
</section>
<section id="set-unification">
@@ -208,17 +217,17 @@
<screen>
# Put $$UNISET in the target
SELECT (tag) + $$UNISET IF (-2* $$UNISET) (1** $$UNISET) ;
-
+
# Only refer to $$UNISET in a single linked chain of tests
SELECT (tag) IF (0 $$UNISET LINK -2* $$UNISET LINK 1** $$UNISET) ;
-
+
# Use rule option KEEPORDER
SELECT KEEPORDER (tag) IF (0 $$UNISET) (-2* $$UNISET) (1** $$UNISET) ;
</screen>
Having the unifier in the target is usually the best way to enforce behavior.
</para>
</section>
-
+
<section id="set-unification-set">
<title>Top-Level Set Unification</title>
<para>
diff --git a/manual/tags.xml b/manual/tags.xml
index 4223d6a..5c4be7b 100644
--- a/manual/tags.xml
+++ b/manual/tags.xml
@@ -352,4 +352,44 @@
This set will never match a reading with a <dem> tag, even if the reading matches (V TR).
</para>
</section>
+
+ <section id="strict-tags">
+ <title>STRICT-TAGS</title>
+ <para>
+ If you are worried about typos or need to otherwise enforce a strict tagset, <code>STRICT-TAGS</code> is your friend.
+ You can add tags to the list of allowed tags with <code>STRICT-TAGS += ... ;</code> where <code>...</code> is a list
+ of tags to allow. Any tag parsed while the STRICT-TAGS list is non-empty will be checked against the list, and an
+ error will be thrown if the tag is not on the list.
+ </para>
+ <para>
+ It is currently only possible to add to the list, hence <code>+=</code>.
+ Removing and assigning can be added if anyone needs those.
+ </para>
+ <screen>
+ STRICT-TAGS += N V ADJ etc ... ;
+ </screen>
+ <para>
+ By default, STRICT-TAGS always allows wordforms, baseforms, and VISL-style secondary tags
+ (<code>"<…>"</code>, <code>"…"</code>, <code><…></code>), since those are too prolific to list
+ individually. If you are extra paranoid, you can change that with <link linkend="grammar-options">OPTIONS</link>.
+ </para>
+ <para>
+ To get a list of unique used tags, pass --show-tags to CG-3. To filter this list to the default set of interesting tags,
+ something like this can be used:
+ <screen>
+ vislcg3 --show-tags -g grammar-goes-here | LC_ALL=C sort | egrep -v '^"' | egrep -v '^(/)?<.*>(/r|v)?$' | \
+ egrep -v '^\^' | egrep -v '^VSTR:' | egrep -v '^VAR:' | egrep -v '^_.*_$' | grep -v 'dummy string' | \
+ grep -v '^\*$' | grep -v '^<<<$' | grep -v '^>>>$' > tags.txt
+ </screen>
+ <emphasis>For comparison, this yields 285 tags for VISL's 10000-rule Danish grammar.</emphasis>
+ Edit the resulting list to remove any tags you can see are typos or should otherwise not be allowed,
+ collapse the list to a line, stuff it at the top of the grammar with STRICT-TAGS, and recompile the grammar.
+ Any errors you get will be lines where forbidden tags are used, which can be whole sets if those sets aren't used in any rules.
+ </para>
+ <para>
+ Once you have a suitable STRICT-TAGS list, you can further trim the grammar by taking advantage the fact that any
+ tag listed in STRICT-TAGS may be used as an implicit set that contains only the tag itself.
+ No more need for <code>LIST N = N ;</code> constructs.
+ </para>
+ </section>
</chapter>
diff --git a/scripts/cg3-autobin.pl.in b/scripts/cg3-autobin.pl.in
index 0047efe..927caa7 100755
--- a/scripts/cg3-autobin.pl.in
+++ b/scripts/cg3-autobin.pl.in
@@ -16,6 +16,7 @@ my %h = ();
GetOptions (\%h,
"help|h",
"version|V",
+"min-binary-revision",
"grammar|g=s",
"grammar-out=s",
"grammar-bin=s",
@@ -27,6 +28,7 @@ GetOptions (\%h,
"rule=s",
"debug|d:s",
"verbose|v:s",
+"quiet",
"vislcg-compat|2",
"stdin|I=s",
"stdout|O=s",
@@ -35,10 +37,6 @@ GetOptions (\%h,
"codepage-grammar=s",
"codepage-input=s",
"codepage-output=s",
-"locale-all|L=s",
-"locale-grammar=s",
-"locale-input=s",
-"locale-output=s",
"no-mappings",
"no-corrections",
"no-before-sections",
diff --git a/src/ApertiumApplicator.cpp b/src/ApertiumApplicator.cpp
index 47470ce..7d11ee4 100644
--- a/src/ApertiumApplicator.cpp
+++ b/src/ApertiumApplicator.cpp
@@ -227,7 +227,7 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
numCohorts++;
} // end >= soft_limit
if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
- if (cSWindow->cohorts.size() >= hard_limit) {
+ if (!is_conv && cSWindow->cohorts.size() >= hard_limit) {
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
@@ -805,7 +805,9 @@ void ApertiumApplicator::printSingleWindow(SingleWindow *window, UFILE *output)
Cohort *cohort = window->cohorts[c];
- mergeMappings(*cohort);
+ if (!split_mappings) {
+ mergeMappings(*cohort);
+ }
// Start of cohort
u_fprintf(output, "^");
diff --git a/src/BinaryGrammar_read.cpp b/src/BinaryGrammar_read.cpp
index 7e4ad71..061a5a2 100644
--- a/src/BinaryGrammar_read.cpp
+++ b/src/BinaryGrammar_read.cpp
@@ -27,28 +27,6 @@
namespace CG3 {
-inline void trie_unserialize(trie_t& trie, FILE *input, Grammar& grammar, uint32_t num_tags) {
- for (uint32_t i = 0; i < num_tags; ++i) {
- uint32_t u32tmp = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
- u32tmp = (uint32_t)ntohl(u32tmp);
- trie_node_t& node = trie[grammar.single_tags_list[u32tmp]];
-
- uint8_t u8tmp = 0;
- fread(&u8tmp, sizeof(uint8_t), 1, input);
- node.terminal = (u8tmp != 0);
-
- fread(&u32tmp, sizeof(uint32_t), 1, input);
- u32tmp = (uint32_t)ntohl(u32tmp);
- if (u32tmp) {
- if (!node.trie) {
- node.trie = new trie_t;
- }
- trie_unserialize(*node.trie, input, grammar, u32tmp);
- }
- }
-}
-
int BinaryGrammar::readBinaryGrammar(FILE *input) {
if (!input) {
u_fprintf(ux_stderr, "Error: Input is null - cannot read from nothing!\n");
@@ -65,7 +43,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
UErrorCode err = U_ZERO_ERROR;
UConverter *conv = ucnv_open("UTF-8", &err);
- if (fread(&cbuffers[0][0], 1, 4, input) != 4) {
+ if (fread_throw(&cbuffers[0][0], 1, 4, input) != 4) {
std::cerr << "Error: Error reading first 4 bytes from grammar!" << std::endl;
CG3Quit(1);
}
@@ -74,7 +52,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
CG3Quit(1);
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp <= 10297) {
if (verbosity >= 1) {
@@ -95,7 +73,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
grammar->is_binary = true;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
grammar->has_dep = (fields & (1 << 0)) != 0;
@@ -104,9 +82,9 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
if (fields & (1 << 1)) {
ucnv_reset(conv);
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &grammar->mapping_prefix, 1, &cbuffers[0][0], u32tmp, &err);
}
@@ -116,7 +94,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
u32tmp = 0;
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_single_tags = u32tmp;
@@ -126,60 +104,60 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
t->type |= T_GRAMMAR;
uint32_t fields = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
if (fields & (1 << 0)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->number = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 1)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 2)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->plain_hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->seed = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 4)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->type = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->comparison_hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->comparison_op = (C_OPS)ntohl(u32tmp);
}
if (fields & (1 << 7)) {
- fread(&i32tmp, sizeof(int32_t), 1, input);
+ fread_throw(&i32tmp, sizeof(int32_t), 1, input);
t->comparison_val = (int32_t)ntohl(i32tmp);
}
if (fields & (1 << 8)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
t->tag = &gbuffers[0][0];
}
}
if (fields & (1 << 9)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
UParseError pe;
@@ -199,28 +177,28 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
if (fields & (1 << 10)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t num = (uint32_t)ntohl(u32tmp);
t->allocateVsSets();
t->vs_sets->reserve(num);
tag_varsets[t->number].reserve(num);
for (size_t i=0 ; i<num ; ++i) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
tag_varsets[t->number].push_back(u32tmp);
}
}
if (fields & (1 << 11)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t num = (uint32_t)ntohl(u32tmp);
t->allocateVsNames();
t->vs_names->reserve(num);
for (size_t i=0 ; i<num ; ++i) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
t->vs_names->push_back(&gbuffers[0][0]);
}
@@ -235,27 +213,39 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
u32tmp = 0;
+ if (fields & (1 << 4)) {
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
+ u32tmp = (uint32_t)ntohl(u32tmp);
+ }
+ uint32_t num_remaps = u32tmp;
+ for (uint32_t i = 0; i<num_remaps; ++i) {
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
+ u32tmp = (uint32_t)ntohl(u32tmp);
+ grammar->reopen_mappings.insert(u32tmp);
+ }
+
+ u32tmp = 0;
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_pref_targets = u32tmp;
for (uint32_t i=0 ; i<num_pref_targets ; i++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->preferred_targets.push_back(u32tmp);
}
u32tmp = 0;
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_par_pairs = u32tmp;
for (uint32_t i=0 ; i<num_par_pairs ; i++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t left = (uint32_t)ntohl(u32tmp);
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t right = (uint32_t)ntohl(u32tmp);
grammar->parentheses[left] = right;
grammar->parentheses_reverse[right] = left;
@@ -263,21 +253,21 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
u32tmp = 0;
if (fields & (1 << 7)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_par_anchors = u32tmp;
for (uint32_t i=0 ; i<num_par_anchors ; i++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t left = (uint32_t)ntohl(u32tmp);
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t right = (uint32_t)ntohl(u32tmp);
grammar->anchors[left] = right;
}
u32tmp = 0;
if (fields & (1 << 8)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_sets = u32tmp;
@@ -286,57 +276,57 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
Set *s = grammar->allocateSet();
uint32_t fields = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
if (fields & (1 << 0)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
s->number = (uint32_t)ntohl(u32tmp);
}
// Field 1 is unused
if (fields & (1 << 2)) {
- fread(&u8tmp, sizeof(uint8_t), 1, input);
+ fread_throw(&u8tmp, sizeof(uint8_t), 1, input);
s->type = u8tmp;
}
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
trie_unserialize(s->trie, input, *grammar, u32tmp);
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
trie_unserialize(s->trie_special, input, *grammar, u32tmp);
}
}
if (fields & (1 << 4)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_set_ops = u32tmp;
for (uint32_t j=0 ; j<num_set_ops ; j++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
s->set_ops.push_back(u32tmp);
}
}
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_sets = u32tmp;
for (uint32_t j=0 ; j<num_sets ; j++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
s->sets.push_back(u32tmp);
}
}
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
s->setName(&gbuffers[0][0]);
}
@@ -354,20 +344,20 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
if (fields & (1 << 9)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->delimiters = grammar->sets_list[u32tmp];
}
if (fields & (1 << 10)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->soft_delimiters = grammar->sets_list[u32tmp];
}
u32tmp = 0;
if (fields & (1 << 11)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_contexts = u32tmp;
@@ -378,7 +368,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
u32tmp = 0;
if (fields & (1 << 12)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_rules = u32tmp;
@@ -387,53 +377,53 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
Rule *r = grammar->allocateRule();
uint32_t fields = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
if (fields & (1 << 0)) {
- fread(&i32tmp, sizeof(int32_t), 1, input);
+ fread_throw(&i32tmp, sizeof(int32_t), 1, input);
r->section = (int32_t)ntohl(i32tmp);
}
if (fields & (1 << 1)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->type = (KEYWORDS)ntohl(u32tmp);
}
if (fields & (1 << 2)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->line = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->flags = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 4)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
r->setName(&gbuffers[0][0]);
}
}
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->target = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->wordform = grammar->single_tags_list[(uint32_t)ntohl(u32tmp)];
}
if (fields & (1 << 7)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->varname = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 8)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->varvalue = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 9)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
int32_t v = u32tmp;
if (u32tmp & (1 << 31)) {
@@ -444,47 +434,47 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
r->sub_reading = v;
}
if (fields & (1 << 10)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->childset1 = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 11)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->childset2 = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 12)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->maplist = grammar->sets_list[(uint32_t)ntohl(u32tmp)];
}
if (fields & (1 << 13)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->sublist = grammar->sets_list[(uint32_t)ntohl(u32tmp)];
}
if (fields & (1 << 14)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->number = (uint32_t)ntohl(u32tmp);
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
r->dep_target = grammar->contexts[u32tmp];
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_dep_tests = u32tmp;
for (uint32_t j=0 ; j<num_dep_tests ; j++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *t = grammar->contexts[u32tmp];
r->addContextualTest(t, r->dep_tests);
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_tests = u32tmp;
for (uint32_t j=0 ; j<num_tests ; j++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *t = grammar->contexts[u32tmp];
r->addContextualTest(t, r->tests);
@@ -508,66 +498,66 @@ ContextualTest *BinaryGrammar::readContextualTest(FILE *input) {
int32_t i32tmp = 0;
uint32_t tmpl = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
if (fields & (1 << 0)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 1)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->pos = (uint32_t)ntohl(u32tmp);
if (t->pos & POS_64BIT) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->pos |= ((uint64_t)ntohl(u32tmp)) << 32;
}
}
if (fields & (1 << 2)) {
- fread(&i32tmp, sizeof(int32_t), 1, input);
+ fread_throw(&i32tmp, sizeof(int32_t), 1, input);
t->offset = (int32_t)ntohl(i32tmp);
}
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
tmpl = (uint32_t)ntohl(u32tmp);
deferred_tmpls[t] = tmpl;
}
if (fields & (1 << 4)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->target = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->line = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->relation = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 7)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->barrier = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 8)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->cbarrier = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 9)) {
- fread(&i32tmp, sizeof(int32_t), 1, input);
+ fread_throw(&i32tmp, sizeof(int32_t), 1, input);
t->offset_sub = (int32_t)ntohl(i32tmp);
}
if (fields & (1 << 10)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t num_ors = (uint32_t)ntohl(u32tmp);
for (uint32_t i=0 ; i<num_ors ; ++i) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *to = grammar->contexts[u32tmp];
t->ors.push_back(to);
}
}
if (fields & (1 << 11)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
t->linked = grammar->contexts[u32tmp];
}
diff --git a/src/BinaryGrammar_read_10043.cpp b/src/BinaryGrammar_read_10043.cpp
index 8279771..a6b9663 100644
--- a/src/BinaryGrammar_read_10043.cpp
+++ b/src/BinaryGrammar_read_10043.cpp
@@ -27,28 +27,6 @@
namespace CG3 {
-inline void trie_unserialize(trie_t& trie, FILE *input, Grammar& grammar, uint32_t num_tags) {
- for (uint32_t i = 0; i < num_tags; ++i) {
- uint32_t u32tmp = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
- u32tmp = (uint32_t)ntohl(u32tmp);
- trie_node_t& node = trie[grammar.single_tags_list[u32tmp]];
-
- uint8_t u8tmp = 0;
- fread(&u8tmp, sizeof(uint8_t), 1, input);
- node.terminal = (u8tmp != 0);
-
- fread(&u32tmp, sizeof(uint32_t), 1, input);
- u32tmp = (uint32_t)ntohl(u32tmp);
- if (u32tmp) {
- if (!node.trie) {
- node.trie = new trie_t;
- }
- trie_unserialize(*node.trie, input, grammar, u32tmp);
- }
- }
-}
-
static std::vector<ContextualTest*> contexts_list;
static Grammar::contexts_t templates;
@@ -68,7 +46,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
UErrorCode err = U_ZERO_ERROR;
UConverter *conv = ucnv_open("UTF-8", &err);
- if (fread(&cbuffers[0][0], 1, 4, input) != 4) {
+ if (fread_throw(&cbuffers[0][0], 1, 4, input) != 4) {
std::cerr << "Error: Error reading first 4 bytes from grammar!" << std::endl;
CG3Quit(1);
}
@@ -77,7 +55,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
CG3Quit(1);
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp < 10043) {
u_fprintf(ux_stderr, "Error: Grammar revision is %u, but this loader requires %u or later!\n", u32tmp, 10043);
@@ -86,7 +64,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
grammar->is_binary = true;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
grammar->has_dep = (fields & (1 << 0)) != 0;
@@ -95,9 +73,9 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
if (fields & (1 << 1)) {
ucnv_reset(conv);
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &grammar->mapping_prefix, 1, &cbuffers[0][0], u32tmp, &err);
}
@@ -107,7 +85,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
u32tmp = 0;
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_single_tags = u32tmp;
@@ -117,60 +95,60 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
t->type |= T_GRAMMAR;
uint32_t fields = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
if (fields & (1 << 0)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->number = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 1)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 2)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->plain_hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->seed = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 4)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->type = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->comparison_hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->comparison_op = (C_OPS)ntohl(u32tmp);
}
if (fields & (1 << 7)) {
- fread(&i32tmp, sizeof(int32_t), 1, input);
+ fread_throw(&i32tmp, sizeof(int32_t), 1, input);
t->comparison_val = (int32_t)ntohl(i32tmp);
}
if (fields & (1 << 8)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
t->tag = &gbuffers[0][0];
}
}
if (fields & (1 << 9)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
UParseError pe;
@@ -190,28 +168,28 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
}
if (fields & (1 << 10)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t num = (uint32_t)ntohl(u32tmp);
t->allocateVsSets();
t->vs_sets->reserve(num);
tag_varsets[t->number].reserve(num);
for (size_t i=0 ; i<num ; ++i) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
tag_varsets[t->number].push_back(u32tmp);
}
}
if (fields & (1 << 11)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t num = (uint32_t)ntohl(u32tmp);
t->allocateVsNames();
t->vs_names->reserve(num);
for (size_t i=0 ; i<num ; ++i) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
t->vs_names->push_back(&gbuffers[0][0]);
}
@@ -227,26 +205,26 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
u32tmp = 0;
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_pref_targets = u32tmp;
for (uint32_t i=0 ; i<num_pref_targets ; i++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->preferred_targets.push_back(u32tmp);
}
u32tmp = 0;
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_par_pairs = u32tmp;
for (uint32_t i=0 ; i<num_par_pairs ; i++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t left = (uint32_t)ntohl(u32tmp);
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t right = (uint32_t)ntohl(u32tmp);
grammar->parentheses[left] = right;
grammar->parentheses_reverse[right] = left;
@@ -254,21 +232,21 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
u32tmp = 0;
if (fields & (1 << 7)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_par_anchors = u32tmp;
for (uint32_t i=0 ; i<num_par_anchors ; i++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t left = (uint32_t)ntohl(u32tmp);
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t right = (uint32_t)ntohl(u32tmp);
grammar->anchors[left] = right;
}
u32tmp = 0;
if (fields & (1 << 8)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_sets = u32tmp;
@@ -277,60 +255,60 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
Set *s = grammar->allocateSet();
uint32_t fields = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
if (fields & (1 << 0)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
s->number = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 1)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
s->hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 2)) {
- fread(&u8tmp, sizeof(uint8_t), 1, input);
+ fread_throw(&u8tmp, sizeof(uint8_t), 1, input);
s->type = u8tmp;
}
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
trie_unserialize(s->trie, input, *grammar, u32tmp);
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
trie_unserialize(s->trie_special, input, *grammar, u32tmp);
}
}
if (fields & (1 << 4)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_set_ops = u32tmp;
for (uint32_t j=0 ; j<num_set_ops ; j++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
s->set_ops.push_back(u32tmp);
}
}
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_sets = u32tmp;
for (uint32_t j=0 ; j<num_sets ; j++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
s->sets.push_back(u32tmp);
}
}
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
s->setName(&gbuffers[0][0]);
}
@@ -349,20 +327,20 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
}
if (fields & (1 << 9)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->delimiters = grammar->sets_by_contents.find(u32tmp)->second;
}
if (fields & (1 << 10)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->soft_delimiters = grammar->sets_by_contents.find(u32tmp)->second;
}
u32tmp = 0;
if (fields & (1 << 11)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_contexts = u32tmp;
@@ -375,7 +353,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
u32tmp = 0;
if (fields & (1 << 12)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_rules = u32tmp;
@@ -384,53 +362,53 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
Rule *r = grammar->allocateRule();
uint32_t fields = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
if (fields & (1 << 0)) {
- fread(&i32tmp, sizeof(int32_t), 1, input);
+ fread_throw(&i32tmp, sizeof(int32_t), 1, input);
r->section = (int32_t)ntohl(i32tmp);
}
if (fields & (1 << 1)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->type = (KEYWORDS)ntohl(u32tmp);
}
if (fields & (1 << 2)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->line = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->flags = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 4)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
- fread(&cbuffers[0][0], 1, u32tmp, input);
+ fread_throw(&cbuffers[0][0], 1, u32tmp, input);
i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
r->setName(&gbuffers[0][0]);
}
}
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->target = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->wordform = grammar->single_tags_list[(uint32_t)ntohl(u32tmp)];
}
if (fields & (1 << 7)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->varname = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 8)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->varvalue = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 9)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
int32_t v = u32tmp;
if (u32tmp & (1 << 31)) {
@@ -441,47 +419,47 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
r->sub_reading = v;
}
if (fields & (1 << 10)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->childset1 = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 11)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->childset2 = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 12)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->maplist = grammar->sets_list[(uint32_t)ntohl(u32tmp)];
}
if (fields & (1 << 13)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->sublist = grammar->sets_list[(uint32_t)ntohl(u32tmp)];
}
if (fields & (1 << 14)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
r->number = (uint32_t)ntohl(u32tmp);
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
r->dep_target = contexts_list[u32tmp-1];
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_dep_tests = u32tmp;
for (uint32_t j=0 ; j<num_dep_tests ; j++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *t = contexts_list[u32tmp - 1];
r->addContextualTest(t, r->dep_tests);
}
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_tests = u32tmp;
for (uint32_t j=0 ; j<num_tests ; j++) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *t = contexts_list[u32tmp - 1];
r->addContextualTest(t, r->tests);
@@ -509,70 +487,70 @@ ContextualTest *BinaryGrammar::readContextualTest_10043(FILE *input) {
int32_t i32tmp = 0;
uint32_t tmpl = 0;
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
fields = (uint32_t)ntohl(u32tmp);
if (fields & (1 << 0)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->hash = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 1)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->pos = (uint32_t)ntohl(u32tmp);
if (t->pos & POS_64BIT) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->pos |= ((uint64_t)ntohl(u32tmp)) << 32;
}
}
if (fields & (1 << 2)) {
- fread(&i32tmp, sizeof(int32_t), 1, input);
+ fread_throw(&i32tmp, sizeof(int32_t), 1, input);
t->offset = (int32_t)ntohl(i32tmp);
}
if (fields & (1 << 3)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
tmpl = (uint32_t)ntohl(u32tmp);
t->tmpl = reinterpret_cast<ContextualTest*>(u32tmp);
}
if (fields & (1 << 4)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->target = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 5)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->line = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 6)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->relation = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 7)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->barrier = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 8)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
t->cbarrier = (uint32_t)ntohl(u32tmp);
}
if (fields & (1 << 9)) {
- fread(&i32tmp, sizeof(int32_t), 1, input);
+ fread_throw(&i32tmp, sizeof(int32_t), 1, input);
t->offset_sub = (int32_t)ntohl(i32tmp);
}
if (fields & (1 << 12)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
templates[(uint32_t)ntohl(u32tmp)] = t;
}
if (fields & (1 << 10)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t num_ors = (uint32_t)ntohl(u32tmp);
for (uint32_t i=0 ; i<num_ors ; ++i) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *to = contexts_list[u32tmp-1];
t->ors.push_back(to);
}
}
if (fields & (1 << 11)) {
- fread(&u32tmp, sizeof(uint32_t), 1, input);
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
t->linked = contexts_list[u32tmp - 1];
}
diff --git a/src/BinaryGrammar_write.cpp b/src/BinaryGrammar_write.cpp
index d2dfcdf..d346735 100644
--- a/src/BinaryGrammar_write.cpp
+++ b/src/BinaryGrammar_write.cpp
@@ -47,7 +47,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
// Write out the revision of the binary format
u32tmp = (uint32_t)htonl((uint32_t)CG3_FEATURE_REV);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
if (grammar->has_dep) {
fields |= (1 << 0);
@@ -61,11 +61,9 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
if (!grammar->single_tags_list.empty()) {
fields |= (1 << 3);
}
- /*
- if (!grammar->tags_list.empty()) {
+ if (!grammar->reopen_mappings.empty()) {
fields |= (1 << 4);
}
- //*/
if (!grammar->preferred_targets.empty()) {
fields |= (1 << 5);
}
@@ -95,19 +93,19 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
}
u32tmp = (uint32_t)htonl((uint32_t)fields);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
if (grammar->mapping_prefix) {
ucnv_reset(conv);
i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE-1, &grammar->mapping_prefix, 1, &err);
u32tmp = (uint32_t)htonl((uint32_t)i32tmp);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
- fwrite(&cbuffers[0][0], i32tmp, 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&cbuffers[0][0], i32tmp, 1, output);
}
if (!grammar->single_tags_list.empty()) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->single_tags_list.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
std::vector<Tag*>::const_iterator tags_iter;
for (tags_iter = grammar->single_tags_list.begin() ; tags_iter != grammar->single_tags_list.end() ; tags_iter++) {
@@ -188,45 +186,53 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
}
u32tmp = (uint32_t)htonl(fields);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
- fwrite(buffer.str().c_str(), buffer.str().length(), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(buffer.str().c_str(), buffer.str().length(), 1, output);
+ }
+
+ if (!grammar->reopen_mappings.empty()) {
+ u32tmp = (uint32_t)htonl((uint32_t)grammar->reopen_mappings.size());
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
+ }
+ for (BOOST_AUTO(iter, grammar->reopen_mappings.begin()); iter != grammar->reopen_mappings.end(); ++iter) {
+ u32tmp = (uint32_t)htonl((uint32_t)*iter);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
if (!grammar->preferred_targets.empty()) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->preferred_targets.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
- uint32Vector::const_iterator iter;
- for (iter = grammar->preferred_targets.begin() ; iter != grammar->preferred_targets.end() ; iter++ ) {
+ for (BOOST_AUTO(iter, grammar->preferred_targets.begin()) ; iter != grammar->preferred_targets.end() ; ++iter) {
u32tmp = (uint32_t)htonl((uint32_t)*iter);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
if (!grammar->parentheses.empty()) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->parentheses.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
boost_foreach (const Grammar::parentheses_t::value_type& iter_par, grammar->parentheses) {
u32tmp = (uint32_t)htonl(iter_par.first);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
u32tmp = (uint32_t)htonl(iter_par.second);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
if (!grammar->anchors.empty()) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->anchors.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
const_foreach (uint32FlatHashMap, grammar->anchors, iter_anchor, iter_anchor_end) {
u32tmp = (uint32_t)htonl((uint32_t)iter_anchor->first);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
u32tmp = (uint32_t)htonl((uint32_t)iter_anchor->second);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
if (!grammar->sets_list.empty()) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->sets_list.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
std::vector<Set*>::const_iterator set_iter;
for (set_iter = grammar->sets_list.begin() ; set_iter != grammar->sets_list.end() ; set_iter++) {
@@ -275,24 +281,24 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
}
u32tmp = (uint32_t)htonl(fields);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
- fwrite(buffer.str().c_str(), buffer.str().length(), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(buffer.str().c_str(), buffer.str().length(), 1, output);
}
if (grammar->delimiters) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->delimiters->number);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
if (grammar->soft_delimiters) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->soft_delimiters->number);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
seen_uint32.clear();
if (!grammar->contexts.empty()) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->contexts.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
for (BOOST_AUTO(cntx, grammar->contexts.begin()); cntx != grammar->contexts.end(); ++cntx) {
writeContextualTest(cntx->second, output);
@@ -300,7 +306,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
if (!grammar->rule_by_number.empty()) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->rule_by_number.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
const_foreach (RuleVector, grammar->rule_by_number, rule_iter, rule_iter_end) {
Rule *r = *rule_iter;
@@ -378,28 +384,28 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
}
u32tmp = (uint32_t)htonl(fields);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
- fwrite(buffer.str().c_str(), buffer.str().length(), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(buffer.str().c_str(), buffer.str().length(), 1, output);
u32tmp = 0;
if (r->dep_target) {
u32tmp = (uint32_t)htonl(r->dep_target->hash);
}
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
r->reverseContextualTests();
u32tmp = (uint32_t)htonl(r->dep_tests.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
const_foreach (ContextList, r->dep_tests, it, it_end) {
u32tmp = (uint32_t)htonl((*it)->hash);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
u32tmp = (uint32_t)htonl(r->tests.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
const_foreach (ContextList, r->tests, it, it_end) {
u32tmp = (uint32_t)htonl((*it)->hash);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
}
@@ -416,8 +422,8 @@ void BinaryGrammar::writeContextualTest(ContextualTest *t, FILE *output) {
if (t->tmpl) {
writeContextualTest(t->tmpl, output);
}
- const_foreach(ContextList, t->ors, iter, iter_end) {
- writeContextualTest(*iter, output);
+ boost_foreach (ContextualTest *iter, t->ors) {
+ writeContextualTest(iter, output);
}
if (t->linked) {
writeContextualTest(t->linked, output);
@@ -482,22 +488,22 @@ void BinaryGrammar::writeContextualTest(ContextualTest *t, FILE *output) {
}
u32tmp = (uint32_t)htonl(fields);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
- fwrite(buffer.str().c_str(), buffer.str().length(), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(buffer.str().c_str(), buffer.str().length(), 1, output);
if (!t->ors.empty()) {
u32tmp = (uint32_t)htonl((uint32_t)t->ors.size());
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
- const_foreach (ContextList, t->ors, iter, iter_end) {
- u32tmp = (uint32_t)htonl((*iter)->hash);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ boost_foreach (ContextualTest *iter, t->ors) {
+ u32tmp = (uint32_t)htonl(iter->hash);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
}
if (t->linked) {
u32tmp = (uint32_t)htonl(t->linked->hash);
- fwrite(&u32tmp, sizeof(uint32_t), 1, output);
+ fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1c36624..4973469 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -44,7 +44,7 @@ macro(cg3_link target)
target_link_libraries(${target} ${GOOGLE_TCMALLOC_LIB})
endif()
- if(MSVC)
+ if(WIN32)
target_link_libraries(${target} "wsock32.lib")
endif()
@@ -79,11 +79,12 @@ set(LIBCG3_HEADERS
interval_vector.hpp
istream.hpp
macros.hpp
+ parser_helpers.hpp
+ process.hpp
sorted_vector.hpp
stdafx.hpp
uextras.hpp
version.hpp
- ../include/exec-stream/exec-stream.h
)
set(LIBCG3_SOURCES
BinaryGrammar.cpp
@@ -110,15 +111,13 @@ set(LIBCG3_SOURCES
TextualParser.cpp
Window.cpp
uextras.cpp
- ../include/exec-stream/exec-stream.cpp
+ ${POPEN_PLUS_C}
${LIBCG3_HEADERS}
)
if(MSVC)
set(LIBCG3_SOURCES
- ../win32/getopt.c
- ../win32/getopt.h
- ../win32/libgen.c
- ../win32/libgen.h
+ "${CMAKE_SOURCE_DIR}/include/win32/getopt.c"
+ "${CMAKE_SOURCE_DIR}/include/win32/getopt.h"
${LIBCG3_SOURCES}
)
endif()
@@ -199,7 +198,7 @@ add_test(t_libcg3 test_libcg3 "${CMAKE_CURRENT_SOURCE_DIR}/../test/T_BasicSelect
install(TARGETS libcg3 ARCHIVE DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}")
if(NOT MSVC)
- install(TARGETS libcg3-shared ARCHIVE DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}" LIBRARY DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}")
+ install(TARGETS libcg3-shared ARCHIVE DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}" LIBRARY DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}" RUNTIME DESTINATION bin)
install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cg3.h" DESTINATION include)
endif()
install(TARGETS cg-comp cg-proc cg-conv vislcg3 RUNTIME DESTINATION bin)
diff --git a/src/ContextualTest.cpp b/src/ContextualTest.cpp
index 55972ef..6ac50e9 100644
--- a/src/ContextualTest.cpp
+++ b/src/ContextualTest.cpp
@@ -109,8 +109,8 @@ uint32_t ContextualTest::rehash() {
if (tmpl) {
hash = hash_value(hash, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(tmpl)));
}
- foreach (ContextList, ors, iter, iter_end) {
- hash = hash_value(hash, (*iter)->rehash());
+ boost_foreach (ContextualTest *iter, ors) {
+ hash = hash_value(hash, iter->rehash());
}
hash += seed;
@@ -125,8 +125,8 @@ void ContextualTest::resetStatistics() {
if (tmpl) {
tmpl->resetStatistics();
}
- foreach (ContextList, ors, idts, idts_end) {
- (*idts)->resetStatistics();
+ boost_foreach (ContextualTest *idts, ors) {
+ idts->resetStatistics();
}
if (linked) {
linked->resetStatistics();
@@ -155,8 +155,8 @@ void ContextualTest::markUsed(Grammar& grammar) {
if (tmpl) {
tmpl->markUsed(grammar);
}
- foreach (ContextList, ors, idts, idts_end) {
- (*idts)->markUsed(grammar);
+ boost_foreach (ContextualTest *idts, ors) {
+ idts->markUsed(grammar);
}
if (linked) {
linked->markUsed(grammar);
diff --git a/src/ContextualTest.hpp b/src/ContextualTest.hpp
index 046a66a..c131e2c 100644
--- a/src/ContextualTest.hpp
+++ b/src/ContextualTest.hpp
@@ -67,7 +67,7 @@ namespace CG3 {
POS_UNKNOWN = (1 << 25),
POS_RELATION = (1 << 26),
POS_ATTACH_TO = (1 << 27),
- // 28 unused
+ POS_NUMERIC_BRANCH = (1 << 28),
// 29 unused
POS_DEP_GLOB = (1 << 30),
POS_64BIT = (1ull << 31),
@@ -105,7 +105,7 @@ namespace CG3 {
ContextualTest *tmpl;
ContextualTest *linked;
- ContextList ors;
+ ContextVector ors;
ContextualTest();
@@ -116,6 +116,20 @@ namespace CG3 {
void markUsed(Grammar& grammar);
};
+ inline void copy_cntx(const ContextualTest *src, ContextualTest *trg) {
+ trg->offset = src->offset;
+ trg->offset_sub = src->offset_sub;
+ trg->line = src->line;
+ trg->hash = src->hash;
+ trg->seed = src->seed;
+ trg->pos = src->pos;
+ trg->target = src->target;
+ trg->relation = src->relation;
+ trg->barrier = src->barrier;
+ trg->cbarrier = src->cbarrier;
+ trg->tmpl = src->tmpl;
+ trg->linked = src->linked;
+ }
}
#endif
diff --git a/src/FSTApplicator.cpp b/src/FSTApplicator.cpp
index 8ee522f..67c145c 100644
--- a/src/FSTApplicator.cpp
+++ b/src/FSTApplicator.cpp
@@ -30,8 +30,12 @@
namespace CG3 {
FSTApplicator::FSTApplicator(UFILE *ux_err)
- : GrammarApplicator(ux_err)
+ : GrammarApplicator(ux_err),
+ wfactor(100.0)
{
+ wtag += 'W';
+ sub_delims += '#';
+ sub_delims += '+';
}
void FSTApplicator::runGrammarOnText(istream& input, UFILE *output) {
@@ -65,6 +69,8 @@ void FSTApplicator::runGrammarOnText(istream& input, UFILE *output) {
std::vector<UChar> cleaned(line.size(), 0);
bool ignoreinput = false;
bool did_soft_lookback = false;
+ UString wtag_buf;
+ Tag *wtag_tag;
index();
@@ -168,15 +174,46 @@ gotaline:
const UChar *base = space;
TagList mappings;
+ wtag_tag = 0;
+ double weight = 0.0;
UChar *tab = u_strchr(space, '\t');
if (tab) {
tab[0] = 0;
+ ++tab;
+ UChar *comma = u_strchr(tab, ',');
+ if (comma) {
+ comma[0] = '.';
+ }
+ char buf[32];
+ size_t i = 0;
+ for (; i < 31 && tab[i]; ++i) {
+ buf[i] = static_cast<char>(tab[i]);
+ }
+ buf[i] = 0;
+ if (strcmp(buf, "inf") == 0) {
+ i = sprintf(buf, "%d", std::numeric_limits<int32_t>::max());
+ }
+ else {
+ weight = strtof(buf, 0);
+ weight *= wfactor;
+ i = sprintf(buf, "%.0f", weight);
+ }
+ wtag_buf.clear();
+ wtag_buf.reserve(wtag.size() + i + 3);
+ wtag_buf += '<';
+ wtag_buf += wtag;
+ wtag_buf += ':';
+ std::copy(buf, buf + i, std::back_inserter(wtag_buf));
+ wtag_buf += '>';
+ wtag_tag = addTag(wtag_buf);
}
while (space && *space && (space = u_strchr(space, '+')) != 0) {
if (base && base[0]) {
+ int32_t f = u_strcspn(base, sub_delims.c_str());
UChar *hash = 0;
- if ((hash = u_strchr(base, '#')) != 0 && hash != base && hash < space) {
+ if (f && base+f < space) {
+ hash = const_cast<UChar*>(base)+f;
size_t oh = hash - &cleaned[0];
size_t ob = base - &cleaned[0];
cleaned.resize(cleaned.size()+1, 0);
@@ -201,6 +238,15 @@ gotaline:
else {
addTagToReading(*cReading, tag);
}
+ if (hash && hash[0] == 0) {
+ if (wtag_tag) {
+ addTagToReading(*cReading, wtag_tag);
+ }
+ Reading *nr = cReading->allocateReading(cReading->parent);
+ nr->next = cReading;
+ cReading = nr;
+ ++space;
+ }
}
base = ++space;
}
@@ -220,6 +266,9 @@ gotaline:
addTagToReading(*cReading, tag);
}
}
+ if (wtag_tag) {
+ addTagToReading(*cReading, wtag_tag);
+ }
if (!cReading->baseform) {
cReading->baseform = cCohort->wordform->hash;
u_fprintf(ux_stderr, "Warning: Line %u had no valid baseform.\n", numLines);
@@ -232,6 +281,9 @@ gotaline:
if (!mappings.empty()) {
splitMappings(mappings, *cCohort, *cReading, true);
}
+ if (grammar->sub_readings_ltr && cReading->next) {
+ cReading = reverse(cReading);
+ }
cCohort->appendReading(cReading);
++numReadings;
}
@@ -275,7 +327,7 @@ istext:
did_soft_lookback = false;
}
if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (!dep_delimit && grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
- if (cSWindow->cohorts.size() >= hard_limit) {
+ if (!is_conv && cSWindow->cohorts.size() >= hard_limit) {
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
diff --git a/src/FSTApplicator.hpp b/src/FSTApplicator.hpp
index b997f8d..14aed10 100644
--- a/src/FSTApplicator.hpp
+++ b/src/FSTApplicator.hpp
@@ -31,6 +31,10 @@ class FSTApplicator : public virtual GrammarApplicator {
public:
FSTApplicator(UFILE *ux_err);
void runGrammarOnText(istream& input, UFILE *output);
+
+ double wfactor;
+ UString wtag;
+ UString sub_delims;
};
}
diff --git a/src/Grammar.cpp b/src/Grammar.cpp
index 908b7d7..d3d01ee 100644
--- a/src/Grammar.cpp
+++ b/src/Grammar.cpp
@@ -72,11 +72,6 @@ Grammar::~Grammar() {
}
}
-void Grammar::addPreferredTarget(UChar *to) {
- Tag *tag = allocateTag(to);
- preferred_targets.push_back(tag->hash);
-}
-
void Grammar::addSet(Set *& to) {
if (!delimiters && u_strcmp(to->name.c_str(), stringbits[S_DELIMITSET].getTerminatedBuffer()) == 0) {
delimiters = to;
@@ -113,6 +108,7 @@ void Grammar::addSet(Set *& to) {
if (all_tags) {
for (size_t i=0 ; i<to->sets.size() ; ++i) {
Set *s = getSet(to->sets[i]);
+ maybe_used_sets.insert(s);
TagVector tv = trie_getTagList(s->getNonEmpty());
if (tv.size() == 1) {
addTagToSet(tv[0], to);
@@ -174,9 +170,9 @@ void Grammar::addSet(Set *& to) {
positive->trie_special.erase(pit);
}
}
- UString str = iter->toUString(true);
- str.erase(str.find('^'), 1);
- Tag *tag = allocateTag(str.c_str());
+ Tag *tag = new Tag(*iter);
+ tag->type &= ~T_FAILFAST;
+ tag = addTag(tag);
addTagToSet(tag, negative);
}
@@ -304,51 +300,6 @@ void Grammar::addSetToList(Set *s) {
}
}
-Set *Grammar::parseSet(const UChar *name) {
- uint32_t sh = hash_value(name);
-
- if (ux_isSetOp(name) != S_IGNORE) {
- u_fprintf(ux_stderr, "Error: Found set operator '%S' where set name expected on line %u!\n", name, lines);
- CG3Quit(1);
- }
-
- if ((
- (name[0] == '$' && name[1] == '$')
- || (name[0] == '&' && name[1] == '&')
- ) && name[2]) {
- const UChar *wname = &(name[2]);
- uint32_t wrap = hash_value(wname);
- Set *wtmp = getSet(wrap);
- if (!wtmp) {
- u_fprintf(ux_stderr, "Error: Attempted to reference undefined set '%S' on line %u!\n", wname, lines);
- CG3Quit(1);
- }
- Set *tmp = getSet(sh);
- if (!tmp) {
- Set *ns = allocateSet();
- ns->line = lines;
- ns->setName(name);
- ns->sets.push_back(wtmp->hash);
- if (name[0] == '$' && name[1] == '$') {
- ns->type |= ST_TAG_UNIFY;
- }
- else if (name[0] == '&' && name[1] == '&') {
- ns->type |= ST_SET_UNIFY;
- }
- addSet(ns);
- }
- }
- if (set_alias.find(sh) != set_alias.end()) {
- sh = set_alias[sh];
- }
- Set *tmp = getSet(sh);
- if (!tmp) {
- u_fprintf(ux_stderr, "Error: Attempted to reference undefined set '%S' on line %u!\n", name, lines);
- CG3Quit(1);
- }
- return tmp;
-}
-
void Grammar::allocateDummySet() {
Set *set_c = allocateSet();
set_c->line = 0;
@@ -360,6 +311,106 @@ void Grammar::allocateDummySet() {
sets_list.insert(sets_list.begin(), set_c);
}
+uint32_t Grammar::removeNumericTags(uint32_t s) {
+ Set *set = getSet(s);
+ if (!set->sets.empty()) {
+ bool did = false;
+ BOOST_AUTO(sets, set->sets);
+ for (size_t i = 0; i < sets.size(); ++i) {
+ uint32_t ns = removeNumericTags(sets[i]);
+ if (ns == 0) {
+ set = getSet(sets[i]);
+ u_fprintf(ux_stderr, "Error: Removing numeric tags for branch resulted in set %S on line %u being empty!\n", set->name.c_str(), set->line);
+ CG3Quit(1);
+ }
+ if (ns != sets[i]) {
+ sets[i] = ns;
+ did = true;
+ }
+ }
+ if (did) {
+ Set *ns = allocateSet();
+ ns->type = set->type;
+ ns->line = set->line;
+ ns->name = stringbits[S_GPREFIX].getTerminatedBuffer();
+ ns->name += set->name;
+ ns->name += '_';
+ ns->name += 'B';
+ ns->name += '_';
+ ns->sets = sets;
+ ns->set_ops = set->set_ops;
+ addSet(ns);
+ set = ns;
+ }
+ }
+ else {
+ bool did = false;
+ std::map<TagVector, bool> ntags;
+ TagVector tags;
+ const trie_t* tries[2] = { &set->trie, &set->trie_special };
+ for (size_t i = 0; i < 2; ++i) {
+ if (tries[i]->empty()) {
+ continue;
+ }
+ BOOST_AUTO(ctags, trie_getTags(*tries[i]));
+ for (BOOST_AUTO(it, ctags.begin()); it != ctags.end(); ++it) {
+ bool special = false;
+ tags.clear();
+ fill_tagvector(*it, tags, did, special);
+ if (!tags.empty()) {
+ ntags[tags] = special;
+ }
+ }
+ }
+
+ if (!set->ff_tags.empty()) {
+ bool special = false;
+ tags.clear();
+ fill_tagvector(set->ff_tags, tags, did, special);
+ if (!tags.empty()) {
+ ntags[tags] = special;
+ }
+ }
+
+ if (did) {
+ if (ntags.empty()) {
+ tags.clear();
+ tags.push_back(single_tags[tag_any]);
+ ntags[tags] = true;
+ if (verbosity_level > 0) {
+ u_fprintf(ux_stderr, "Warning: Set %S was empty and replaced with the * set in the C branch on line %u.\n", set->name.c_str(), set->line);
+ u_fflush(ux_stderr);
+ }
+ }
+ Set *ns = allocateSet();
+ ns->type = set->type;
+ ns->line = set->line;
+ ns->name = stringbits[S_GPREFIX].getTerminatedBuffer();
+ ns->name += set->name;
+ ns->name += '_';
+ ns->name += 'B';
+ ns->name += '_';
+
+ for (BOOST_AUTO(it, ntags.begin()); it != ntags.end(); ++it) {
+ if (it->second) {
+ if (it->first.size() == 1 && (it->first[0]->type & T_FAILFAST)) {
+ ns->ff_tags.insert(it->first[0]);
+ }
+ else {
+ trie_insert(ns->trie_special, it->first);
+ }
+ }
+ else {
+ trie_insert(ns->trie, it->first);
+ }
+ }
+ addSet(ns);
+ set = ns;
+ }
+ }
+ return set->hash;
+}
+
Rule *Grammar::allocateRule() {
return new Rule;
}
@@ -377,7 +428,7 @@ Tag *Grammar::allocateTag() {
return new Tag;
}
-Tag *Grammar::allocateTag(const UChar *txt, bool raw) {
+Tag *Grammar::allocateTag(const UChar *txt) {
if (txt[0] == 0) {
u_fprintf(ux_stderr, "Error: Empty tag on line %u! Forgot to fill in a ()?\n", lines);
CG3Quit(1);
@@ -386,10 +437,6 @@ Tag *Grammar::allocateTag(const UChar *txt, bool raw) {
u_fprintf(ux_stderr, "Error: Tag '%S' cannot start with ( on line %u! Possible extra opening ( or missing closing ) to the left. If you really meant it, escape it as \\(.\n", txt, lines);
CG3Quit(1);
}
- if (!raw && ux_isSetOp(txt) != S_IGNORE) {
- u_fprintf(ux_stderr, "Warning: Tag '%S' on line %u looks like a set operator. Maybe you meant to do SET instead of LIST?\n", txt, lines);
- u_fflush(ux_stderr);
- }
Taguint32HashMap::iterator it;
uint32_t thash = hash_value(txt);
if ((it = single_tags.find(thash)) != single_tags.end() && !it->second->tag.empty() && u_strcmp(it->second->tag.c_str(), txt) == 0) {
@@ -397,18 +444,21 @@ Tag *Grammar::allocateTag(const UChar *txt, bool raw) {
}
Tag *tag = new Tag();
- if (raw) {
- tag->parseTagRaw(txt, this);
- }
- else {
- tag->parseTag(txt, ux_stderr, this);
- }
+ tag->parseTagRaw(txt, this);
+ return addTag(tag);
+}
+
+Tag *Grammar::addTag(Tag *tag) {
tag->type |= T_GRAMMAR;
uint32_t hash = tag->rehash();
for (uint32_t seed = 0; seed < 10000; seed++) {
uint32_t ih = hash + seed;
+ Taguint32HashMap::iterator it;
if ((it = single_tags.find(ih)) != single_tags.end()) {
Tag *t = it->second;
+ if (t == tag) {
+ return tag;
+ }
if (t->tag == tag->tag) {
hash += seed;
delete tag;
@@ -417,13 +467,13 @@ Tag *Grammar::allocateTag(const UChar *txt, bool raw) {
}
else {
if (verbosity_level > 0 && seed) {
- u_fprintf(ux_stderr, "Warning: Tag %S got hash seed %u.\n", txt, seed);
+ u_fprintf(ux_stderr, "Warning: Tag %S got hash seed %u.\n", tag->tag.c_str(), seed);
u_fflush(ux_stderr);
}
tag->seed = seed;
hash = tag->rehash();
single_tags_list.push_back(tag);
- tag->number = (uint32_t)single_tags_list.size()-1;
+ tag->number = (uint32_t)single_tags_list.size() - 1;
single_tags[hash] = tag;
break;
}
@@ -462,8 +512,8 @@ ContextualTest *Grammar::addContextualTest(ContextualTest *t) {
t->rehash();
t->linked = addContextualTest(t->linked);
- foreach (ContextList, t->ors, it, it_end) {
- *it = addContextualTest(*it);
+ boost_foreach (ContextualTest *& it, t->ors) {
+ it = addContextualTest(it);
}
for (uint32_t seed = 0; seed < 1000; ++seed) {
@@ -500,7 +550,7 @@ void Grammar::addTemplate(ContextualTest *test, const UChar *name) {
}
void Grammar::addAnchor(const UChar *to, uint32_t at, bool primary) {
- uint32_t ah = allocateTag(to, true)->hash;
+ uint32_t ah = allocateTag(to)->hash;
uint32FlatHashMap::iterator it = anchors.find(ah);
if (primary && it != anchors.end()) {
u_fprintf(ux_stderr, "Error: Redefinition attempt for anchor '%S' on line %u!\n", to, lines);
@@ -531,7 +581,7 @@ void Grammar::renameAllRules() {
}
};
-void Grammar::reindex(bool unused_sets) {
+void Grammar::reindex(bool unused_sets, bool used_tags) {
foreach (Setuint32HashMap, sets_by_contents, dset, dset_end) {
if (dset->second->number == std::numeric_limits<uint32_t>::max()) {
dset->second->type |= ST_USED;
@@ -579,10 +629,10 @@ void Grammar::reindex(bool unused_sets) {
rules_any = 0;
foreach (TagVector, single_tags_list, iter, iter_end) {
- if ((*iter)->regexp && (*iter)->tag[0] == '/') {
+ if ((*iter)->regexp && (*iter)->tag[0] != '"' && (*iter)->tag[0] != '<') {
regex_tags.insert((*iter)->regexp);
}
- if (((*iter)->type & T_CASE_INSENSITIVE) && (*iter)->tag[0] == '/') {
+ if (((*iter)->type & T_CASE_INSENSITIVE) && (*iter)->tag[0] != '"' && (*iter)->tag[0] != '<') {
icase_tags.insert((*iter));
}
if (is_binary) {
@@ -621,6 +671,15 @@ void Grammar::reindex(bool unused_sets) {
}
}
+ for (BOOST_AUTO(it, parentheses.begin()); it != parentheses.end(); ++it) {
+ single_tags[it->first]->markUsed();
+ single_tags[it->second]->markUsed();
+ }
+
+ for (BOOST_AUTO(it, preferred_targets.begin()); it != preferred_targets.end(); ++it) {
+ single_tags[*it]->markUsed();
+ }
+
foreach (RuleVector, rule_by_number, iter_rule, iter_rule_end) {
if ((*iter_rule)->wordform) {
wf_rules.push_back(*iter_rule);
@@ -678,7 +737,7 @@ void Grammar::reindex(bool unused_sets) {
if (unused_sets) {
u_fprintf(ux_stdout, "Unused sets:\n");
foreach (Setuint32HashMap, sets_by_contents, rset, rset_end) {
- if (!(rset->second->type & ST_USED) && !rset->second->name.empty()) {
+ if (!(rset->second->type & ST_USED) && !rset->second->name.empty() && maybe_used_sets.count(rset->second) == 0) {
if (rset->second->name[0] != '_' || rset->second->name[1] != 'G' || rset->second->name[2] != '_') {
u_fprintf(ux_stdout, "Line %u set %S\n", rset->second->line, rset->second->name.c_str());
}
@@ -696,8 +755,7 @@ void Grammar::reindex(bool unused_sets) {
}
}
- Taguint32HashMap::iterator iter_tags;
- for (iter_tags = single_tags.begin() ; iter_tags != single_tags.end() ; ++iter_tags) {
+ for (BOOST_AUTO(iter_tags, single_tags.begin()) ; iter_tags != single_tags.end() ; ++iter_tags) {
Tag *tag = iter_tags->second;
if (tag->tag[0] == mapping_prefix) {
tag->type |= T_MAPPING;
@@ -751,6 +809,9 @@ void Grammar::reindex(bool unused_sets) {
u_fprintf(ux_stderr, "Warning: Rule on line %u had no target.\n", (*iter_rule)->line);
u_fflush(ux_stderr);
}
+ if (((*iter_rule)->maplist && ((*iter_rule)->maplist->type & ST_CHILD_UNIFY)) || ((*iter_rule)->sublist && ((*iter_rule)->sublist->type & ST_CHILD_UNIFY))) {
+ (*iter_rule)->flags |= FL_CAPTURE_UNIF;
+ }
if (is_binary) {
continue;
}
@@ -773,7 +834,11 @@ void Grammar::reindex(bool unused_sets) {
}
}
- sections.insert(sections.end(), sects.begin(), sects.end());
+ if (!sects.empty()) {
+ for (uint32_t i = 0; i <= sects.back(); ++i) {
+ sections.push_back(i);
+ }
+ }
if (sets_by_tag.find(tag_any) != sets_by_tag.end()) {
sets_any = &sets_by_tag[tag_any];
@@ -814,6 +879,17 @@ void Grammar::reindex(bool unused_sets) {
}
}
}
+
+ if (used_tags) {
+ for (BOOST_AUTO(iter_tags, single_tags.begin()); iter_tags != single_tags.end(); ++iter_tags) {
+ Tag *tag = iter_tags->second;
+ if (tag->type & T_USED) {
+ UString tmp(tag->toUString(true));
+ u_fprintf(ux_stdout, "%S\n", tmp.c_str());
+ }
+ }
+ exit(0);
+ }
}
inline void trie_indexToRule(const trie_t& trie, Grammar& grammar, uint32_t r) {
diff --git a/src/Grammar.hpp b/src/Grammar.hpp
index ca8e0a1..45ced04 100644
--- a/src/Grammar.hpp
+++ b/src/Grammar.hpp
@@ -59,6 +59,7 @@ namespace CG3 {
set_name_seeds_t set_name_seeds;
Setuint32HashMap sets_by_contents;
uint32FlatHashMap set_alias;
+ SetSet maybe_used_sets;
typedef std::vector<UString> static_sets_t;
static_sets_t static_sets;
@@ -86,6 +87,7 @@ namespace CG3 {
Set *soft_delimiters;
uint32_t tag_any;
uint32Vector preferred_targets;
+ uint32SortedVector reopen_mappings;
typedef bc::flat_map<uint32_t,uint32_t> parentheses_t;
parentheses_t parentheses;
parentheses_t parentheses_reverse;
@@ -103,20 +105,19 @@ namespace CG3 {
Grammar();
~Grammar();
- void addPreferredTarget(UChar *to);
-
void addSet(Set *& to);
Set *getSet(uint32_t which) const;
Set *allocateSet();
void destroySet(Set *set);
void addSetToList(Set *s);
- Set *parseSet(const UChar *name);
void allocateDummySet();
+ uint32_t removeNumericTags(uint32_t s);
void addAnchor(const UChar *to, uint32_t at, bool primary = false);
Tag *allocateTag();
- Tag *allocateTag(const UChar *tag, bool raw = false);
+ Tag *allocateTag(const UChar *tag);
+ Tag *addTag(Tag *tag);
void destroyTag(Tag *tag);
void addTagToSet(Tag *rtag, Set *set);
@@ -129,7 +130,7 @@ namespace CG3 {
void addTemplate(ContextualTest *test, const UChar *name);
void resetStatistics();
- void reindex(bool unused_sets=false);
+ void reindex(bool unused_sets = false, bool used_tags = false);
void renameAllRules();
void indexSetToRule(uint32_t, Set*);
@@ -140,6 +141,27 @@ namespace CG3 {
void contextAdjustTarget(ContextualTest*);
};
+ inline void trie_unserialize(trie_t& trie, FILE *input, Grammar& grammar, uint32_t num_tags) {
+ for (uint32_t i = 0; i < num_tags; ++i) {
+ uint32_t u32tmp = 0;
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
+ u32tmp = (uint32_t)ntohl(u32tmp);
+ trie_node_t& node = trie[grammar.single_tags_list[u32tmp]];
+
+ uint8_t u8tmp = 0;
+ fread_throw(&u8tmp, sizeof(uint8_t), 1, input);
+ node.terminal = (u8tmp != 0);
+
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
+ u32tmp = (uint32_t)ntohl(u32tmp);
+ if (u32tmp) {
+ if (!node.trie) {
+ node.trie = new trie_t;
+ }
+ trie_unserialize(*node.trie, input, grammar, u32tmp);
+ }
+ }
+ }
}
#endif
diff --git a/src/GrammarApplicator.cpp b/src/GrammarApplicator.cpp
index 3103bf9..00238a5 100644
--- a/src/GrammarApplicator.cpp
+++ b/src/GrammarApplicator.cpp
@@ -26,6 +26,8 @@
#include "Window.hpp"
#include "SingleWindow.hpp"
#include "Reading.hpp"
+#include "parser_helpers.hpp"
+#include "process.hpp"
namespace CG3 {
@@ -51,6 +53,8 @@ dry_run(false),
owns_grammar(false),
input_eof(false),
seen_barrier(false),
+is_conv(false),
+split_mappings(false),
dep_has_spanned(false),
dep_delimit(0),
dep_original(false),
@@ -68,6 +72,7 @@ gWindow(0),
has_relations(false),
grammar(0),
ux_stderr(ux_err),
+filebase(0),
numLines(0),
numWindows(0),
numCohorts(0),
@@ -107,16 +112,6 @@ GrammarApplicator::~GrammarApplicator() {
}
}
- foreach (externals_t, externals, ei, ei_end) {
- try {
- writeRaw(ei->second->in(), static_cast<uint32_t>(0));
- delete ei->second;
- }
- catch (...) {
- // We don't really care about errors since we're shutting down anyway.
- }
- }
-
delete gWindow;
if (owns_grammar) {
@@ -232,26 +227,17 @@ void GrammarApplicator::disableStatistics() {
statistics = false;
}
-Tag *GrammarApplicator::addTag(const UChar *txt, bool vstr) {
- Taguint32HashMap::iterator it;
- uint32_t thash = hash_value(txt);
- if ((it = single_tags.find(thash)) != single_tags.end() && !it->second->tag.empty() && u_strcmp(it->second->tag.c_str(), txt) == 0) {
- return it->second;
- }
-
- Tag *tag = new Tag();
- if (vstr) {
- tag->parseTag(txt, ux_stderr, grammar);
- }
- else {
- tag->parseTagRaw(txt, grammar);
- }
+Tag *GrammarApplicator::addTag(Tag *tag) {
uint32_t hash = tag->rehash();
uint32_t seed = 0;
- for ( ; seed < 10000 ; seed++) {
+ for (; seed < 10000; seed++) {
uint32_t ih = hash + seed;
+ Taguint32HashMap::iterator it;
if ((it = single_tags.find(ih)) != single_tags.end()) {
Tag *t = it->second;
+ if (t == tag) {
+ return tag;
+ }
if (t->tag == tag->tag) {
hash += seed;
delete tag;
@@ -260,7 +246,7 @@ Tag *GrammarApplicator::addTag(const UChar *txt, bool vstr) {
}
else {
if (seed && verbosity_level > 0) {
- u_fprintf(ux_stderr, "Warning: Tag %S got hash seed %u.\n", txt, seed);
+ u_fprintf(ux_stderr, "Warning: Tag %S got hash seed %u.\n", tag->tag.c_str(), seed);
u_fflush(ux_stderr);
}
tag->seed = seed;
@@ -269,9 +255,28 @@ Tag *GrammarApplicator::addTag(const UChar *txt, bool vstr) {
break;
}
}
- tag = single_tags[hash];
+ return single_tags[hash];
+}
+
+Tag *GrammarApplicator::addTag(const UChar *txt, bool vstr) {
+ Taguint32HashMap::iterator it;
+ uint32_t thash = hash_value(txt);
+ if ((it = single_tags.find(thash)) != single_tags.end() && !it->second->tag.empty() && u_strcmp(it->second->tag.c_str(), txt) == 0) {
+ return it->second;
+ }
+
+ Tag *tag = 0;
+ if (vstr) {
+ tag = ::CG3::parseTag(txt, 0, *this);
+ }
+ else {
+ tag = new Tag();
+ tag->parseTagRaw(txt, grammar);
+ tag = addTag(tag);
+ }
+
bool reflow = false;
- if ((tag->type & T_REGEXP) && tag->tag[0] == '/') {
+ if ((tag->type & T_REGEXP) && tag->tag[0] != '"' && tag->tag[0] != '<') {
if (grammar->regex_tags.insert(tag->regexp).second) {
foreach (Taguint32HashMap, single_tags, titer, titer_end) {
if (titer->second->type & T_TEXTUAL) {
@@ -290,7 +295,7 @@ Tag *GrammarApplicator::addTag(const UChar *txt, bool vstr) {
}
}
}
- if ((tag->type & T_CASE_INSENSITIVE) && tag->tag[0] == '/') {
+ if ((tag->type & T_CASE_INSENSITIVE) && tag->tag[0] != '"' && tag->tag[0] != '<') {
if (grammar->icase_tags.insert(tag).second) {
foreach (Taguint32HashMap, single_tags, titer, titer_end) {
if (titer->second->type & T_TEXTUAL) {
@@ -355,6 +360,9 @@ void GrammarApplicator::printReading(const Reading *reading, UFILE *output, size
}
if (reading->deleted) {
+ if (!trace) {
+ return;
+ }
u_fputc(';', output);
}
@@ -482,7 +490,9 @@ void GrammarApplicator::printCohort(Cohort *cohort, UFILE *output) {
}
u_fputc('\n', output);
- mergeMappings(*cohort);
+ if (!split_mappings) {
+ mergeMappings(*cohort);
+ }
foreach (ReadingList, cohort->readings, rter1, rter1_end) {
printReading(*rter1, output);
@@ -629,7 +639,7 @@ void GrammarApplicator::pipeOutCohort(const Cohort *cohort, std::ostream& output
output.write(str.c_str(), str.length());
}
-void GrammarApplicator::pipeOutSingleWindow(const SingleWindow& window, std::ostream& output) {
+void GrammarApplicator::pipeOutSingleWindow(const SingleWindow& window, Process& output) {
std::ostringstream ss;
writeRaw(ss, window.number);
@@ -646,10 +656,10 @@ void GrammarApplicator::pipeOutSingleWindow(const SingleWindow& window, std::ost
writeRaw(output, cs);
output.write(str.c_str(), str.length());
- output << std::flush;
+ output.flush();
}
-void GrammarApplicator::pipeInReading(Reading *reading, std::istream& input, bool force) {
+void GrammarApplicator::pipeInReading(Reading *reading, Process& input, bool force) {
uint32_t cs = 0;
readRaw(input, cs);
if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: reading packet length %u\n", cs);
@@ -701,7 +711,7 @@ void GrammarApplicator::pipeInReading(Reading *reading, std::istream& input, boo
reflowReading(*reading);
}
-void GrammarApplicator::pipeInCohort(Cohort *cohort, std::istream& input) {
+void GrammarApplicator::pipeInCohort(Cohort *cohort, Process& input) {
uint32_t cs = 0;
readRaw(input, cs);
if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: cohort packet length %u\n", cs);
@@ -743,7 +753,7 @@ void GrammarApplicator::pipeInCohort(Cohort *cohort, std::istream& input) {
}
}
-void GrammarApplicator::pipeInSingleWindow(SingleWindow& window, std::istream& input) {
+void GrammarApplicator::pipeInSingleWindow(SingleWindow& window, Process& input) {
uint32_t cs = 0;
readRaw(input, cs);
if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: window packet length %u\n", cs);
@@ -765,4 +775,28 @@ void GrammarApplicator::pipeInSingleWindow(SingleWindow& window, std::istream& i
}
}
+void GrammarApplicator::error(const char *str, const UChar *p) {
+ (void)p;
+ UChar buf[] = { L'R', L'U', L'N', L'T', L'I', L'M', L'E', 0 };
+ u_fprintf(ux_stderr, str, buf, 0, buf);
+}
+
+void GrammarApplicator::error(const char *str, const char *s, const UChar *p) {
+ (void)p;
+ UChar buf[] = { L'R', L'U', L'N', L'T', L'I', L'M', L'E', 0 };
+ u_fprintf(ux_stderr, str, buf, s, 0, buf);
+}
+
+void GrammarApplicator::error(const char *str, const UChar *s, const UChar *p) {
+ (void)p;
+ UChar buf[] = { L'R', L'U', L'N', L'T', L'I', L'M', L'E', 0 };
+ u_fprintf(ux_stderr, str, buf, s, 0, buf);
+}
+
+void GrammarApplicator::error(const char *str, const char *s, const UChar *S, const UChar *p) {
+ (void)p;
+ UChar buf[] = { L'R', L'U', L'N', L'T', L'I', L'M', L'E', 0 };
+ u_fprintf(ux_stderr, str, buf, s, S, 0, buf);
+}
+
}
diff --git a/src/GrammarApplicator.hpp b/src/GrammarApplicator.hpp
index 1fee4b9..32fe4c6 100644
--- a/src/GrammarApplicator.hpp
+++ b/src/GrammarApplicator.hpp
@@ -31,7 +31,9 @@
#include "interval_vector.hpp"
#include "flat_unordered_set.hpp"
#include "istream.hpp"
-#include <exec-stream.h>
+#include <deque>
+
+class Process;
namespace CG3 {
class Window;
@@ -43,6 +45,16 @@ namespace CG3 {
class Set;
class Rule;
+ struct dSMC_Context {
+ const ContextualTest *test;
+ Cohort **deep;
+ Cohort *origin;
+ uint64_t options;
+ bool did_test;
+ bool matched_target;
+ bool matched_tests;
+ };
+
class GrammarApplicator {
public:
bool always_span;
@@ -66,6 +78,8 @@ namespace CG3 {
bool owns_grammar;
bool input_eof;
bool seen_barrier;
+ bool is_conv;
+ bool split_mappings;
bool dep_has_spanned;
uint32_t dep_delimit;
@@ -105,6 +119,7 @@ namespace CG3 {
Grammar *grammar;
// Moved these public to help the library API
+ Tag *addTag(Tag *tag);
Tag *addTag(const UChar *tag, bool vstr = false);
Tag *addTag(const UString& txt, bool vstr = false);
void initEmptySingleWindow(SingleWindow *cSWindow);
@@ -117,6 +132,14 @@ namespace CG3 {
void splitAllMappings(all_mappings_t& all_mappings, Cohort& cohort, bool mapped = false);
Taguint32HashMap single_tags;
+ UFILE *ux_stderr;
+ UChar *filebase;
+ void error(const char *str, const UChar *p);
+ void error(const char *str, const char *s, const UChar *p);
+ void error(const char *str, const UChar *s, const UChar *p);
+ void error(const char *str, const char *s, const UChar *S, const UChar *p);
+ Grammar *get_grammar() { return grammar; }
+
protected:
void printTrace(UFILE *output, uint32_t hit_by);
void printReading(const Reading *reading, UFILE *output, size_t sub=1);
@@ -125,13 +148,11 @@ namespace CG3 {
void pipeOutReading(const Reading *reading, std::ostream& output);
void pipeOutCohort(const Cohort *cohort, std::ostream& output);
- void pipeOutSingleWindow(const SingleWindow& window, std::ostream& output);
+ void pipeOutSingleWindow(const SingleWindow& window, Process& output);
- void pipeInReading(Reading *reading, std::istream& input, bool force = false);
- void pipeInCohort(Cohort *cohort, std::istream& input);
- void pipeInSingleWindow(SingleWindow& window, std::istream& input);
-
- UFILE *ux_stderr;
+ void pipeInReading(Reading *reading, Process& input, bool force = false);
+ void pipeInCohort(Cohort *cohort, Process& input);
+ void pipeInSingleWindow(SingleWindow& window, Process& input);
uint32_t numLines;
uint32_t numWindows;
@@ -145,7 +166,7 @@ namespace CG3 {
typedef std::map<int32_t,uint32IntervalVector> RSType;
RSType runsections;
- typedef std::map<uint32_t,exec_stream_t*> externals_t;
+ typedef std::map<uint32_t,Process> externals_t;
externals_t externals;
uint32Vector ci_depths;
@@ -164,9 +185,12 @@ namespace CG3 {
bool did_final_enclosure;
std::vector<UnicodeString> regexgrps;
+ bc::flat_map<uint32_t, std::vector<UnicodeString> > regexgrps_r;
+ uint32_t same_basic;
Cohort *target;
Cohort *mark;
Cohort *attach_to;
+ Rule *current_rule;
typedef bc::flat_map<uint32_t,Reading*> readings_plain_t;
readings_plain_t readings_plain;
@@ -220,12 +244,11 @@ namespace CG3 {
bool doesSetMatchReading_trie(const Reading& reading, const Set& theset, const trie_t& trie, bool unif_mode = false);
bool doesSetMatchReading_tags(const Reading& reading, const Set& theset, bool unif_mode = false);
bool doesSetMatchReading(const Reading& reading, const uint32_t set, bool bypass_index = false, bool unif_mode = false);
- inline void doesSetMatchCohortHelper(std::vector<Reading*>& rv, const ReadingList& readings, const Set *theset, const ContextualTest *test = 0, uint32_t options = 0);
- std::vector<Reading*> doesSetMatchCohort(Cohort& cohort, const uint32_t set, const ContextualTest *test = 0, uint32_t options = 0);
- bool doesSetMatchCohortNormal_helper(ReadingList& readings, const Set *theset, const ContextualTest *test);
- bool doesSetMatchCohortNormal(Cohort& cohort, const uint32_t set, const ContextualTest *test = 0, uint64_t options = 0);
- bool doesSetMatchCohortCareful_helper(ReadingList& readings, const Set *theset, const ContextualTest *test);
- bool doesSetMatchCohortCareful(Cohort& cohort, const uint32_t set, const ContextualTest *test = 0, uint64_t options = 0);
+
+ inline bool doesSetMatchCohort_testLinked(Cohort& cohort, const Set& theset, dSMC_Context *context = 0);
+ inline bool doesSetMatchCohort_helper(Cohort& cohort, const Reading& reading, const Set& theset, dSMC_Context *context = 0);
+ bool doesSetMatchCohortNormal(Cohort& cohort, const uint32_t set, dSMC_Context *context = 0);
+ bool doesSetMatchCohortCareful(Cohort& cohort, const uint32_t set, dSMC_Context *context = 0);
bool statistics;
ticks gtimer;
@@ -250,63 +273,10 @@ namespace CG3 {
void reflowTextuals();
Reading *initEmptyCohort(Cohort& cohort);
- };
- inline Reading *get_sub_reading(Reading *tr, int sub_reading) {
- if (sub_reading == 0) {
- return tr;
- }
- if (sub_reading == GSR_ANY) {
- static Reading reading(tr->parent);
- reading = *tr;
- reading.next = 0;
- while (tr->next) {
- tr = tr->next;
- reading.tags_list.push_back(0);
- reading.tags_list.insert(reading.tags_list.end(), tr->tags_list.begin(), tr->tags_list.end());
- boost_foreach(uint32_t tag, tr->tags) {
- reading.tags.insert(tag);
- reading.tags_bloom.insert(tag);
- }
- boost_foreach(uint32_t tag, tr->tags_plain) {
- reading.tags_plain.insert(tag);
- reading.tags_plain_bloom.insert(tag);
- }
- boost_foreach(uint32_t tag, tr->tags_textual) {
- reading.tags_textual.insert(tag);
- reading.tags_textual_bloom.insert(tag);
- }
- reading.tags_numerical.insert(tr->tags_numerical.begin(), tr->tags_numerical.end());
- if (tr->mapped) {
- reading.mapped = true;
- }
- if (tr->mapping) {
- reading.mapping = tr->mapping;
- }
- }
- reading.rehash();
- return &reading;
- }
- if (sub_reading > 0) {
- for (int i=0 ; i<sub_reading && tr ; ++i) {
- tr = tr->next;
- }
- return tr;
- }
- if (sub_reading < 0) {
- int ntr = 0;
- Reading *ttr = tr;
- while (ttr) {
- ttr = ttr->next;
- --ntr;
- }
- for (int i=ntr ; i<sub_reading && tr ; ++i) {
- tr = tr->next;
- }
- return tr;
- }
- return tr;
- }
+ std::deque<Reading> subs_any;
+ Reading *get_sub_reading(Reading *tr, int sub_reading);
+ };
}
#endif
diff --git a/src/GrammarApplicator_matchSet.cpp b/src/GrammarApplicator_matchSet.cpp
index 7b82fcb..431b8be 100644
--- a/src/GrammarApplicator_matchSet.cpp
+++ b/src/GrammarApplicator_matchSet.cpp
@@ -422,6 +422,11 @@ uint32_t GrammarApplicator::doesTagMatchReading(const Reading& reading, const Ta
match = grammar->tag_any;
}
}
+ else if (tag.type & T_SAME_BASIC) {
+ if (reading.hash_plain == same_basic) {
+ match = grammar->tag_any;
+ }
+ }
if (match) {
++match_single;
@@ -681,25 +686,6 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
return retval;
}
-inline void GrammarApplicator::doesSetMatchCohortHelper(std::vector<Reading*>& rv, const ReadingList& readings, const Set *theset, const ContextualTest *test, uint32_t options) {
- const_foreach (ReadingList, readings, iter, iter_end) {
- Reading *reading = get_sub_reading(*iter, test->offset_sub);
- if (!reading) {
- continue;
- }
- if (doesSetMatchReading(*reading, theset->number, (theset->type & (ST_CHILD_UNIFY|ST_SPECIAL)) != 0)) {
- rv.push_back(reading);
- if (!(options & MASK_POS_CDEPREL)) {
- break;
- }
- }
- else if (options & POS_CAREFUL) {
- rv.clear();
- break;
- }
- }
-}
-
inline bool _check_options(std::vector<Reading*>& rv, uint32_t options, size_t nr) {
if ((options & POS_CAREFUL) && rv.size() != nr) {
return false;
@@ -710,112 +696,160 @@ inline bool _check_options(std::vector<Reading*>& rv, uint32_t options, size_t n
return !rv.empty();
}
-std::vector<Reading*> GrammarApplicator::doesSetMatchCohort(Cohort& cohort, const uint32_t set, const ContextualTest *test, uint32_t options) {
- std::vector<Reading*> rv;
- if (cohort.possible_sets.find(set) == cohort.possible_sets.end()) {
- return rv;
- }
-
- const Set *theset = grammar->sets_list[set];
- doesSetMatchCohortHelper(rv, cohort.readings, theset, test, options);
- if ((options & POS_LOOK_DELETED) && _check_options(rv, options, cohort.readings.size())) {
- doesSetMatchCohortHelper(rv, cohort.deleted, theset, test, options);
- }
- if ((options & POS_LOOK_DELAYED)
- && (!(options & POS_LOOK_DELETED) || _check_options(rv, options, cohort.readings.size()+cohort.deleted.size()))) {
- doesSetMatchCohortHelper(rv, cohort.delayed, theset, test, options);
- }
- if (rv.empty()) {
- if (!grammar->sets_any || grammar->sets_any->find(set) == grammar->sets_any->end()) {
- cohort.possible_sets.erase(set);
+inline bool GrammarApplicator::doesSetMatchCohort_testLinked(Cohort& cohort, const Set& theset, dSMC_Context *context) {
+ bool retval = true;
+ if (context->test && context->test->linked) {
+ if (!context->did_test) {
+ if (context->test->linked->pos & POS_NO_PASS_ORIGIN) {
+ context->matched_tests = (runContextualTest(cohort.parent, cohort.local_number, context->test->linked, context->deep, &cohort) != 0);
+ }
+ else {
+ context->matched_tests = (runContextualTest(cohort.parent, cohort.local_number, context->test->linked, context->deep, context->origin) != 0);
+ }
+ if (!(theset.type & ST_CHILD_UNIFY)) {
+ context->did_test = true;
+ }
}
+ retval = context->matched_tests;
}
- return rv;
+ return retval;
}
-bool GrammarApplicator::doesSetMatchCohortNormal_helper(ReadingList& readings, const Set *theset, const ContextualTest *test) {
- const_foreach (ReadingList, readings, iter, iter_end) {
- Reading *reading = *iter;
- if (test) {
- // ToDo: Barriers need some way to escape sub-readings
- reading = get_sub_reading(reading, test->offset_sub);
- if (!reading) {
- continue;
- }
- }
- if (doesSetMatchReading(*reading, theset->number, (theset->type & (ST_CHILD_UNIFY|ST_SPECIAL)) != 0)) {
- return true;
+inline bool GrammarApplicator::doesSetMatchCohort_helper(Cohort& cohort, const Reading& reading, const Set& theset, dSMC_Context *context) {
+ bool retval = false;
+ unif_tags_t utags;
+ uint32SortedVector usets;
+ if (context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY)) {
+ utags = *unif_tags;
+ usets = *unif_sets;
+ }
+ if (doesSetMatchReading(reading, theset.number, (theset.type & (ST_CHILD_UNIFY | ST_SPECIAL)) != 0)) {
+ retval = true;
+ if (context) {
+ context->matched_target = true;
}
}
- return false;
+ if (retval && context && (context->options & POS_NOT)) {
+ retval = !retval;
+ }
+ if (retval && context) {
+ retval = doesSetMatchCohort_testLinked(cohort, theset, context);
+ }
+ if (context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY) && (utags.size() != unif_tags->size() || utags != *unif_tags)) {
+ unif_tags->swap(utags);
+ }
+ if (context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY) && usets.size() != unif_sets->size()) {
+ unif_sets->swap(usets);
+ }
+ return retval;
}
-bool GrammarApplicator::doesSetMatchCohortNormal(Cohort& cohort, const uint32_t set, const ContextualTest *test, uint64_t options) {
- /*
- return !doesSetMatchCohort(cohort, set, options).empty();
- /*/
- if (!(options & (POS_LOOK_DELETED|POS_LOOK_DELAYED)) && cohort.possible_sets.find(set) == cohort.possible_sets.end()) {
- return false;
- }
+
+bool GrammarApplicator::doesSetMatchCohortNormal(Cohort& cohort, const uint32_t set, dSMC_Context *context) {
bool retval = false;
+
+ if (!(!context || (context->options & (POS_LOOK_DELETED | POS_LOOK_DELAYED | POS_NOT))) && cohort.possible_sets.find(set) == cohort.possible_sets.end()) {
+ return retval;
+ }
+
const Set *theset = grammar->sets_list[set];
- if (cohort.wread && doesSetMatchReading(*cohort.wread, theset->number, (theset->type & (ST_CHILD_UNIFY|ST_SPECIAL)) != 0)) {
- retval = true;
+
+ if (cohort.wread) {
+ retval = doesSetMatchCohort_helper(cohort, *cohort.wread, *theset, context);
}
- if (doesSetMatchCohortNormal_helper(cohort.readings, theset, test)) {
- retval = true;
+
+ if (retval && (!context || context->did_test)) {
+ return retval;
}
- if (!retval && (options & POS_LOOK_DELETED) && doesSetMatchCohortNormal_helper(cohort.deleted, theset, test)) {
- retval = true;
+
+ ReadingList *lists[3] = { &cohort.readings };
+ if (context && (context->options & POS_LOOK_DELETED)) {
+ lists[1] = &cohort.deleted;
}
- if (!retval && (options & POS_LOOK_DELAYED) && doesSetMatchCohortNormal_helper(cohort.delayed, theset, test)) {
- retval = true;
+ if (context && (context->options & POS_LOOK_DELAYED)) {
+ lists[2] = &cohort.delayed;
}
- if (!retval) {
+
+ for (size_t i = 0; i < 3; ++i) {
+ if (lists[i] == 0) {
+ continue;
+ }
+ const_foreach (ReadingList, *lists[i], iter, iter_end) {
+ Reading *reading = *iter;
+ if (context && context->test) {
+ // ToDo: Barriers need some way to escape sub-readings
+ reading = get_sub_reading(reading, context->test->offset_sub);
+ if (!reading) {
+ continue;
+ }
+ }
+ if (doesSetMatchCohort_helper(cohort, *reading, *theset, context)) {
+ retval = true;
+ }
+ if (retval && (!context || !(context->test && context->test->linked) || context->did_test)) {
+ return retval;
+ }
+ }
+ }
+
+ if (context && !context->matched_target && (context->options & POS_NOT)) {
+ retval = doesSetMatchCohort_testLinked(cohort, *theset, context);
+ }
+
+ if (context && !context->matched_target) {
if (!grammar->sets_any || grammar->sets_any->find(set) == grammar->sets_any->end()) {
cohort.possible_sets.erase(set);
}
}
+
return retval;
- //*/
}
-bool GrammarApplicator::doesSetMatchCohortCareful_helper(ReadingList& readings, const Set *theset, const ContextualTest *test) {
- const_foreach (ReadingList, readings, iter, iter_end) {
- Reading *reading = *iter;
- if (test) {
- reading = get_sub_reading(reading, test->offset_sub);
- if (!reading) {
- return false;
- }
- }
- if (!doesSetMatchReading(*reading, theset->number, (theset->type & (ST_CHILD_UNIFY|ST_SPECIAL)) != 0)) {
- return false;
- }
- }
- return !readings.empty();
-}
+bool GrammarApplicator::doesSetMatchCohortCareful(Cohort& cohort, const uint32_t set, dSMC_Context *context) {
+ bool retval = false;
-bool GrammarApplicator::doesSetMatchCohortCareful(Cohort& cohort, const uint32_t set, const ContextualTest *test, uint64_t options) {
- /*
- return !doesSetMatchCohort(cohort, set, options).empty();
- /*/
- if (!(options & (POS_LOOK_DELETED|POS_LOOK_DELAYED)) && cohort.possible_sets.find(set) == cohort.possible_sets.end()) {
- return false;
+ if (!(!context || (context->options & (POS_LOOK_DELETED | POS_LOOK_DELAYED | POS_NOT))) && cohort.possible_sets.find(set) == cohort.possible_sets.end()) {
+ return retval;
}
- bool retval = true;
+
const Set *theset = grammar->sets_list[set];
- if (!doesSetMatchCohortCareful_helper(cohort.readings, theset, test)) {
- retval = false;
+
+ ReadingList *lists[3] = { &cohort.readings };
+ if (context && (context->options & POS_LOOK_DELETED)) {
+ lists[1] = &cohort.deleted;
}
- if (retval && (options & POS_LOOK_DELETED) && !doesSetMatchCohortCareful_helper(cohort.deleted, theset, test)) {
- retval = false;
+ if (context && (context->options & POS_LOOK_DELAYED)) {
+ lists[2] = &cohort.delayed;
}
- if (retval && (options & POS_LOOK_DELAYED) && !doesSetMatchCohortCareful_helper(cohort.delayed, theset, test)) {
- retval = false;
+
+ for (size_t i = 0; i < 3; ++i) {
+ if (lists[i] == 0) {
+ continue;
+ }
+ const_foreach(ReadingList, *lists[i], iter, iter_end) {
+ Reading *reading = *iter;
+ if (context && context->test) {
+ // ToDo: Barriers need some way to escape sub-readings
+ reading = get_sub_reading(reading, context->test->offset_sub);
+ if (!reading) {
+ continue;
+ }
+ }
+ retval = doesSetMatchCohort_helper(cohort, *reading, *theset, context);
+ if (!retval) {
+ break;
+ }
+ }
+ if (!retval) {
+ break;
+ }
+ }
+
+ if (context && !context->matched_target && (context->options & POS_NOT)) {
+ retval = doesSetMatchCohort_testLinked(cohort, *theset, context);
}
+
return retval;
- //*/
}
}
diff --git a/src/GrammarApplicator_reflow.cpp b/src/GrammarApplicator_reflow.cpp
index a7fc861..15c3c24 100644
--- a/src/GrammarApplicator_reflow.cpp
+++ b/src/GrammarApplicator_reflow.cpp
@@ -628,30 +628,57 @@ void GrammarApplicator::splitAllMappings(all_mappings_t& all_mappings, Cohort& c
splitMappings(iter->second, cohort, *reading, mapped);
}
std::sort(cohort.readings.begin(), cohort.readings.end(), CG3::Reading::cmp_number);
+ if (!grammar->reopen_mappings.empty()) {
+ boost_foreach (Reading *reading, cohort.readings) {
+ if (reading->mapping && grammar->reopen_mappings.count(reading->mapping->hash)) {
+ reading->mapped = false;
+ }
+ }
+ }
all_mappings.clear();
}
void GrammarApplicator::mergeReadings(ReadingList& readings) {
- std::map<uint32_t, ReadingList> mlist;
+ bc::flat_map<uint32_t, std::pair<uint32_t,Reading*> > mapped;
+ mapped.reserve(readings.size());
+ bc::flat_map<uint32_t, ReadingList> mlist;
+ mlist.reserve(readings.size());
foreach (ReadingList, readings, iter, iter_end) {
Reading *r = *iter;
- uint32_t hp = r->hash_plain;
+ uint32_t hp = r->hash_plain, hplain = r->hash_plain;
+ uint32_t nm = 0;
if (trace) {
foreach (uint32Vector, r->hit_by, iter_hb, iter_hb_end) {
hp = hash_value(*iter_hb, hp);
}
}
+ if (r->mapping) {
+ ++nm;
+ }
Reading *sub = r->next;
while (sub) {
hp = hash_value(sub->hash_plain, hp);
+ hplain = hash_value(sub->hash_plain, hplain);
if (trace) {
foreach (uint32Vector, sub->hit_by, iter_hb, iter_hb_end) {
hp = hash_value(*iter_hb, hp);
}
}
+ if (sub->mapping) {
+ ++nm;
+ }
sub = sub->next;
}
- mlist[hp].push_back(r);
+ if (mapped.count(hplain)) {
+ if (mapped[hplain].first != 0 && nm == 0) {
+ r->deleted = true;
+ }
+ else if (mapped[hplain].first != nm && mapped[hplain].first == 0) {
+ mapped[hplain].second->deleted = true;
+ }
+ }
+ mapped[hplain] = std::make_pair(nm, r);
+ mlist[hp+nm].push_back(r);
}
if (mlist.size() == readings.size()) {
@@ -661,8 +688,7 @@ void GrammarApplicator::mergeReadings(ReadingList& readings) {
readings.clear();
std::vector<Reading*> order;
- std::map<uint32_t, ReadingList>::iterator miter;
- for (miter = mlist.begin() ; miter != mlist.end() ; miter++) {
+ for (BOOST_AUTO(miter, mlist.begin()) ; miter != mlist.end() ; miter++) {
ReadingList clist = miter->second;
Reading *nr = new Reading(*(clist.front()));
if (nr->mapping) {
diff --git a/src/GrammarApplicator_runContextualTest.cpp b/src/GrammarApplicator_runContextualTest.cpp
index 5f1ecf1..a5cc420 100644
--- a/src/GrammarApplicator_runContextualTest.cpp
+++ b/src/GrammarApplicator_runContextualTest.cpp
@@ -41,52 +41,54 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
if (deep) {
*deep = cohort;
}
+
+ dSMC_Context context = { test, deep, origin, test->pos, false, false, false };
+
if (test->pos & POS_CAREFUL) {
- *retval = doesSetMatchCohortCareful(*cohort, test->target, test, test->pos);
- }
- bool foundfirst = *retval;
- if (!foundfirst || !(test->pos & POS_CAREFUL)) {
- foundfirst = doesSetMatchCohortNormal(*cohort, test->target, test, test->pos);
- if (!(test->pos & POS_CAREFUL)) {
- *retval = foundfirst;
+ *retval = doesSetMatchCohortCareful(*cohort, test->target, &context);
+ if (!context.matched_target && (test->pos & POS_SCANFIRST)) {
+ context.did_test = true;
+ doesSetMatchCohortNormal(*cohort, test->target, &context);
}
}
- if (origin && (test->offset != 0 || (test->pos & (POS_SCANALL|POS_SCANFIRST))) && origin == cohort && origin->local_number != 0) {
- *retval = false;
- rvs |= TRV_BREAK;
- }
- if (test->pos & POS_NOT) {
- *retval = !*retval;
+ else {
+ *retval = doesSetMatchCohortNormal(*cohort, test->target, &context);
}
- if (*retval && test->linked) {
- if (test->linked->pos & POS_NO_PASS_ORIGIN) {
- *retval = (runContextualTest(cohort->parent, cohort->local_number, test->linked, deep, cohort) != 0);
- }
- else {
- *retval = (runContextualTest(cohort->parent, cohort->local_number, test->linked, deep, origin) != 0);
+
+ if (origin && (test->offset != 0 || (test->pos & (POS_SCANALL|POS_SCANFIRST))) && origin == cohort && origin->local_number != 0) {
+ if (!(test->pos & POS_NOT)) {
+ *retval = false;
}
+ rvs |= TRV_BREAK;
}
- if (foundfirst && (test->pos & POS_SCANFIRST)) {
+ if (context.matched_target && (test->pos & POS_SCANFIRST)) {
rvs |= TRV_BREAK;
}
else if (!(test->pos & (POS_SCANALL|POS_SCANFIRST|POS_SELF))) {
rvs |= TRV_BREAK;
}
+
+ context.test = 0;
+ context.deep = 0;
+ context.origin = 0;
+ context.did_test = true;
if (test->barrier) {
- bool barrier = doesSetMatchCohortNormal(*cohort, test->barrier, test, test->pos & ~POS_CAREFUL);
+ context.options = test->pos & ~POS_CAREFUL;
+ bool barrier = doesSetMatchCohortNormal(*cohort, test->barrier, &context);
if (barrier) {
seen_barrier = true;
rvs |= TRV_BREAK | TRV_BARRIER;
}
}
if (test->cbarrier) {
- bool cbarrier = doesSetMatchCohortCareful(*cohort, test->cbarrier, test, test->pos | POS_CAREFUL);
+ context.options = test->pos | POS_CAREFUL;
+ bool cbarrier = doesSetMatchCohortCareful(*cohort, test->cbarrier, &context);
if (cbarrier) {
seen_barrier = true;
rvs |= TRV_BREAK | TRV_BARRIER;
}
}
- if (foundfirst && *retval) {
+ if (context.matched_target && *retval) {
rvs |= TRV_BREAK;
}
if (!*retval) {
@@ -208,33 +210,32 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
}
else if (!test->ors.empty()) {
Cohort *cdeep = 0;
- std::list<ContextualTest*>::const_iterator iter;
- for (iter = test->ors.begin() ; iter != test->ors.end() ; iter++) {
- uint64_t orgpos = (*iter)->pos;
- int32_t orgoffset = (*iter)->offset;
- uint32_t orgcbar = (*iter)->cbarrier;
- uint32_t orgbar = (*iter)->barrier;
+ boost_foreach (ContextualTest *iter, test->ors) {
+ uint64_t orgpos = iter->pos;
+ int32_t orgoffset = iter->offset;
+ uint32_t orgcbar = iter->cbarrier;
+ uint32_t orgbar = iter->barrier;
if (test->pos & POS_TMPL_OVERRIDE) {
- (*iter)->pos = test->pos;
- (*iter)->pos &= ~(POS_TMPL_OVERRIDE|POS_NEGATE|POS_NOT|POS_MARK_JUMP);
- (*iter)->offset = test->offset;
+ iter->pos = test->pos;
+ iter->pos &= ~(POS_TMPL_OVERRIDE|POS_NEGATE|POS_NOT|POS_MARK_JUMP);
+ iter->offset = test->offset;
if (test->offset != 0 && !(test->pos & (POS_SCANFIRST|POS_SCANALL|POS_ABSOLUTE))) {
- (*iter)->pos |= POS_SCANALL;
+ iter->pos |= POS_SCANALL;
}
if (test->cbarrier) {
- (*iter)->cbarrier = test->cbarrier;
+ iter->cbarrier = test->cbarrier;
}
if (test->barrier) {
- (*iter)->barrier = test->barrier;
+ iter->barrier = test->barrier;
}
}
dep_deep_seen.clear();
- cohort = runContextualTest(sWindow, position, *iter, &cdeep, origin);
+ cohort = runContextualTest(sWindow, position, iter, &cdeep, origin);
if (test->pos & POS_TMPL_OVERRIDE) {
- (*iter)->pos = orgpos;
- (*iter)->offset = orgoffset;
- (*iter)->cbarrier = orgcbar;
- (*iter)->barrier = orgbar;
+ iter->pos = orgpos;
+ iter->offset = orgoffset;
+ iter->cbarrier = orgcbar;
+ iter->barrier = orgbar;
if (cdeep && test->offset != 0) {
int32_t reloff = int32_t(cdeep->local_number) - int32_t(position);
if (!(test->pos & (POS_SCANFIRST|POS_SCANALL|POS_ABSOLUTE))) {
@@ -635,7 +636,7 @@ Cohort *GrammarApplicator::runParenthesisTest(SingleWindow *sWindow, const Cohor
if (test->pos & POS_LEFT_PAR) {
cohort = sWindow->cohorts[par_left_pos];
}
- else if (test->pos & POS_RIGHT_PAR) {
+ else {
cohort = sWindow->cohorts[par_right_pos];
}
runSingleTest(cohort, test, rvs, &retval, deep, origin);
diff --git a/src/GrammarApplicator_runGrammar.cpp b/src/GrammarApplicator_runGrammar.cpp
index 3cef8db..033011e 100644
--- a/src/GrammarApplicator_runGrammar.cpp
+++ b/src/GrammarApplicator_runGrammar.cpp
@@ -158,6 +158,10 @@ gotaline:
if (space[0] == '"' && space[1] == '<') {
++space;
SKIPTO_NOSPAN(space, '"');
+ while (*space && space[-1] != '>') {
+ ++space;
+ SKIPTO_NOSPAN(space, '"');
+ }
SKIPTOWS(space, 0, true, true);
--space;
}
@@ -207,7 +211,7 @@ gotaline:
did_soft_lookback = false;
}
if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (!dep_delimit && grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
- if (cSWindow->cohorts.size() >= hard_limit) {
+ if (!is_conv && cSWindow->cohorts.size() >= hard_limit) {
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
diff --git a/src/GrammarApplicator_runRules.cpp b/src/GrammarApplicator_runRules.cpp
index ffae447..2413374 100644
--- a/src/GrammarApplicator_runRules.cpp
+++ b/src/GrammarApplicator_runRules.cpp
@@ -28,6 +28,7 @@
#include "Reading.hpp"
#include "ContextualTest.hpp"
#include "version.hpp"
+#include "process.hpp"
namespace CG3 {
@@ -153,6 +154,69 @@ TagList GrammarApplicator::getTagList(const Set& theSet, bool unif_mode) const {
return theTags;
}
+Reading *GrammarApplicator::get_sub_reading(Reading *tr, int sub_reading) {
+ if (sub_reading == 0) {
+ return tr;
+ }
+ if (sub_reading == GSR_ANY) {
+ subs_any.push_back(Reading());
+ Reading *reading = &subs_any.back();
+ *reading = *tr;
+ reading->next = 0;
+ while (tr->next) {
+ tr = tr->next;
+ reading->tags_list.push_back(0);
+ reading->tags_list.insert(reading->tags_list.end(), tr->tags_list.begin(), tr->tags_list.end());
+ boost_foreach(uint32_t tag, tr->tags) {
+ reading->tags.insert(tag);
+ reading->tags_bloom.insert(tag);
+ }
+ boost_foreach(uint32_t tag, tr->tags_plain) {
+ reading->tags_plain.insert(tag);
+ reading->tags_plain_bloom.insert(tag);
+ }
+ boost_foreach(uint32_t tag, tr->tags_textual) {
+ reading->tags_textual.insert(tag);
+ reading->tags_textual_bloom.insert(tag);
+ }
+ reading->tags_numerical.insert(tr->tags_numerical.begin(), tr->tags_numerical.end());
+ if (tr->mapped) {
+ reading->mapped = true;
+ }
+ if (tr->mapping) {
+ reading->mapping = tr->mapping;
+ }
+ if (tr->matched_target) {
+ reading->matched_target = true;
+ }
+ if (tr->matched_tests) {
+ reading->matched_tests = true;
+ }
+ }
+ reading->rehash();
+ return reading;
+ }
+ if (sub_reading > 0) {
+ for (int i = 0; i<sub_reading && tr; ++i) {
+ tr = tr->next;
+ }
+ return tr;
+ }
+ if (sub_reading < 0) {
+ int ntr = 0;
+ Reading *ttr = tr;
+ while (ttr) {
+ ttr = ttr->next;
+ --ntr;
+ }
+ for (int i = ntr; i<sub_reading && tr; ++i) {
+ tr = tr->next;
+ }
+ return tr;
+ }
+ return tr;
+}
+
/**
* Applies the passed rules to the passed SingleWindow.
*
@@ -178,6 +242,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
// ToDo: Now that numbering is used, can't this be made a normal max? Hm, maybe not since --sections can still force another order...but if we're smart, then we re-enumerate rules based on --sections
uint32IntervalVector intersects = current.valid_rules.intersect(rules);
+ ReadingList removed;
+ ReadingList selected;
if (debug_level > 1) {
std::cerr << "DEBUG: Trying window " << current.number << std::endl;
@@ -193,6 +259,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
continue;
}
+ current_rule = grammar->rule_by_number[j];
const Rule& rule = *(grammar->rule_by_number[j]);
if (debug_level > 1) {
std::cerr << "DEBUG: Trying rule " << rule.line << std::endl;
@@ -329,10 +396,16 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
if (!readings_plain.empty()) {
readings_plain.clear();
}
+ if (!subs_any.empty()) {
+ subs_any.clear();
+ }
// Varstring capture groups exist on a per-cohort basis, since we may need them for mapping later.
if (!regexgrps.empty()) {
regexgrps.clear();
}
+ if (!regexgrps_r.empty()) {
+ regexgrps_r.clear();
+ }
if (!unif_tags_rs.empty()) {
unif_tags_rs.clear();
}
@@ -349,6 +422,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
for (size_t i = 0; i < cohort->readings.size(); ++i) {
Reading *reading = get_sub_reading(cohort->readings[i], rule.sub_reading);
if (!reading) {
+ cohort->readings[i]->matched_target = false;
+ cohort->readings[i]->matched_tests = false;
continue;
}
@@ -397,16 +472,22 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
unif_sets->clear();
}
+ same_basic = reading->hash_plain;
target = 0;
mark = cohort;
+ size_t orz = regexgrps.size();
// Actually check if the reading is a valid target. First check if rule target matches...
if (rule.target && doesSetMatchReading(*reading, rule.target, (set.type & (ST_CHILD_UNIFY|ST_SPECIAL)) != 0)) {
+ bool captured = false;
+ if (orz != regexgrps.size()) {
+ did_test = false;
+ captured = true;
+ }
target = cohort;
reading->matched_target = true;
matched_target = true;
bool good = true;
// If we didn't already run the contextual tests, run them now.
- // This only needs to be done once per cohort as no current functionality exists to refer back to the exact reading.
if (!did_test) {
foreach (ContextList, rule.tests, it, it_end) {
ContextualTest *test = *it;
@@ -449,13 +530,25 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
reading->matched_tests = true;
++num_active;
++rule.num_match;
+ if (captured) {
+ regexgrps_r[reading->hash].swap(regexgrps);
+ }
+ }
+ else {
+ regexgrps.resize(orz);
}
++num_iff;
}
else {
+ regexgrps.resize(orz);
++rule.num_fail;
}
readings_plain.insert(std::make_pair(reading->hash_plain,reading));
+
+ if (reading != cohort->readings[i]) {
+ cohort->readings[i]->matched_target = reading->matched_target;
+ cohort->readings[i]->matched_tests = reading->matched_tests;
+ }
}
// If none of the readings were valid targets, remove this cohort from the rule's possible cohorts.
@@ -478,8 +571,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
}
// Keep track of which readings got removed and selected
- ReadingList removed;
- ReadingList selected;
+ removed.resize(0);
+ selected.resize(0);
// Remember the current state so we can compare later to see if anything has changed
const size_t state_num_readings = cohort->readings.size();
@@ -500,6 +593,10 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
bool good = reading.matched_tests;
const uint32_t state_hash = reading.hash;
+ if (regexgrps_r.count(reading.hash)) {
+ regexgrps_r[reading.hash].swap(regexgrps);
+ }
+
// Iff needs extra special care; if it is a Remove type and we matched the target, go ahead.
// If it had matched the tests it would have been Select type.
if (rule.type == K_IFF && type == K_REMOVE && reading.matched_target) {
@@ -616,26 +713,20 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
UErrorCode err = U_ZERO_ERROR;
u_strToUTF8(&cbuffers[0][0], CG3_BUFFER_SIZE-1, 0, ext->tag.c_str(), ext->tag.length(), &err);
- exec_stream_t *es = 0;
+ Process& es = externals[rule.varname];
try {
- es = new exec_stream_t;
- es->set_binary_mode(exec_stream_t::s_in);
- es->set_binary_mode(exec_stream_t::s_out);
- es->set_wait_timeout(exec_stream_t::s_in, 10000);
- es->set_wait_timeout(exec_stream_t::s_out, 10000);
- es->start(&cbuffers[0][0], "");
- writeRaw(es->in(), CG3_EXTERNAL_PROTOCOL);
+ es.start(&cbuffers[0][0]);
+ writeRaw(es, CG3_EXTERNAL_PROTOCOL);
}
catch (std::exception& e) {
u_fprintf(ux_stderr, "Error: External on line %u resulted in error: %s\n", rule.line, e.what());
CG3Quit(1);
}
- externals[rule.varname] = es;
ei = externals.find(rule.varname);
}
- pipeOutSingleWindow(current, ei->second->in());
- pipeInSingleWindow(current, ei->second->out());
+ pipeOutSingleWindow(current, ei->second);
+ pipeInSingleWindow(current, ei->second);
indexSingleWindow(current);
readings_changed = true;
diff --git a/src/GrammarWriter.cpp b/src/GrammarWriter.cpp
index ae9b7ff..389afcc 100644
--- a/src/GrammarWriter.cpp
+++ b/src/GrammarWriter.cpp
@@ -157,18 +157,18 @@ int GrammarWriter::writeGrammar(UFILE *output) {
}
}
boost_foreach (Set *s, grammar->sets_list) {
- if (s->type & ST_USED) {
- printSet(output, *s);
- }
+ printSet(output, *s);
}
u_fprintf(output, "\n");
+ /*
for (BOOST_AUTO(cntx, grammar->templates.begin()); cntx != grammar->templates.end(); ++cntx) {
u_fprintf(output, "TEMPLATE %u = ", cntx->second->hash);
printContextualTest(output, *cntx->second);
u_fprintf(output, " ;\n");
}
u_fprintf(output, "\n");
+ //*/
bool found = false;
const_foreach (RuleVector, grammar->rule_by_number, rule_iter, rule_iter_end) {
@@ -295,8 +295,7 @@ void GrammarWriter::printContextualTest(UFILE *to, const ContextualTest& test) {
u_fprintf(to, "T:%u ", test.tmpl->hash);
}
else if (!test.ors.empty()) {
- std::list<ContextualTest*>::const_iterator iter;
- for (iter = test.ors.begin() ; iter != test.ors.end() ; ) {
+ for (BOOST_AUTO(iter, test.ors.begin()) ; iter != test.ors.end() ; ) {
u_fprintf(to, "(");
printContextualTest(to, **iter);
u_fprintf(to, ")");
diff --git a/src/IGrammarParser.hpp b/src/IGrammarParser.hpp
index e8e8402..8e7e5db 100644
--- a/src/IGrammarParser.hpp
+++ b/src/IGrammarParser.hpp
@@ -34,8 +34,9 @@ namespace CG3 {
virtual void setCompatible(bool compat) = 0;
virtual void setVerbosity(uint32_t level) = 0;
virtual int parse_grammar_from_file(const char *filename, const char *locale, const char *codepage) = 0;
- protected:
+
UFILE *ux_stderr;
+ protected:
Grammar *result;
uint32_t verbosity;
};
diff --git a/src/NicelineApplicator.cpp b/src/NicelineApplicator.cpp
index e4420b0..faf24d3 100644
--- a/src/NicelineApplicator.cpp
+++ b/src/NicelineApplicator.cpp
@@ -171,7 +171,7 @@ gotaline:
did_soft_lookback = false;
}
if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (!dep_delimit && grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
- if (cSWindow->cohorts.size() >= hard_limit) {
+ if (!is_conv && cSWindow->cohorts.size() >= hard_limit) {
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
@@ -475,7 +475,9 @@ void NicelineApplicator::printCohort(Cohort *cohort, UFILE *output) {
did_warn_statictags = true;
}
- mergeMappings(*cohort);
+ if (!split_mappings) {
+ mergeMappings(*cohort);
+ }
if (cohort->readings.empty()) {
u_fputc('\t', output);
diff --git a/src/PlaintextApplicator.cpp b/src/PlaintextApplicator.cpp
index 5e21bfb..6db0d8a 100644
--- a/src/PlaintextApplicator.cpp
+++ b/src/PlaintextApplicator.cpp
@@ -158,7 +158,7 @@ gotaline:
did_soft_lookback = false;
}
if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (!dep_delimit && grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
- if (cSWindow->cohorts.size() >= hard_limit) {
+ if (!is_conv && cSWindow->cohorts.size() >= hard_limit) {
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
diff --git a/src/Reading.cpp b/src/Reading.cpp
index b265eab..3fdc143 100644
--- a/src/Reading.cpp
+++ b/src/Reading.cpp
@@ -24,6 +24,25 @@
namespace CG3 {
+Reading::Reading() :
+mapped(false),
+deleted(false),
+noprint(false),
+matched_target(false),
+matched_tests(false),
+baseform(0),
+hash(0),
+hash_plain(0),
+number(0),
+mapping(0),
+parent(0),
+next(0)
+{
+ #ifdef CG_TRACE_OBJECTS
+ std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
+ #endif
+}
+
Reading::Reading(Cohort *p) :
mapped(false),
deleted(false),
diff --git a/src/Reading.hpp b/src/Reading.hpp
index 4f4abfd..0d5cc99 100644
--- a/src/Reading.hpp
+++ b/src/Reading.hpp
@@ -60,6 +60,7 @@ namespace CG3 {
typedef bc::flat_map<uint32_t,Tag*> tags_numerical_t;
tags_numerical_t tags_numerical;
+ Reading();
Reading(Cohort *p);
Reading(const Reading& r);
~Reading();
diff --git a/src/Set.cpp b/src/Set.cpp
index dc41672..7e9b280 100644
--- a/src/Set.cpp
+++ b/src/Set.cpp
@@ -153,6 +153,10 @@ void Set::markUsed(Grammar& grammar) {
trie_markused(trie);
trie_markused(trie_special);
+ boost_foreach(Tag *tag, ff_tags) {
+ tag->markUsed();
+ }
+
for (uint32_t i=0 ; i<sets.size() ; ++i) {
Set *set = grammar.sets_by_contents.find(sets[i])->second;
set->markUsed(grammar);
diff --git a/src/Strings.cpp b/src/Strings.cpp
index 6c4e7d9..a800db6 100644
--- a/src/Strings.cpp
+++ b/src/Strings.cpp
@@ -48,7 +48,8 @@ UnicodeString flags[FLAGS_COUNT] = {
UNICODE_STRING_SIMPLE("UNMAPLAST"),
UNICODE_STRING_SIMPLE("REVERSE"),
UNICODE_STRING_SIMPLE("SUB"),
- UNICODE_STRING_SIMPLE("OUTPUT")
+ UNICODE_STRING_SIMPLE("OUTPUT"),
+ UNICODE_STRING_SIMPLE("CAPTURE_UNIF")
};
UnicodeString keywords[KEYWORD_COUNT] = {
@@ -107,6 +108,9 @@ UnicodeString keywords[KEYWORD_COUNT] = {
UNICODE_STRING_SIMPLE("EXTERNAL"),
UNICODE_STRING_SIMPLE("EXTERNAL-ONCE"),
UNICODE_STRING_SIMPLE("EXTERNAL-ALWAYS"),
+ UNICODE_STRING_SIMPLE("OPTIONS"),
+ UNICODE_STRING_SIMPLE("STRICT-TAGS"),
+ UNICODE_STRING_SIMPLE("REOPEN-MAPPINGS"),
UNICODE_STRING_SIMPLE("SUBREADINGS")
};
@@ -182,6 +186,12 @@ UnicodeString stringbits[STRINGS_COUNT] = {
UNICODE_STRING_SIMPLE("FROM"),
UNICODE_STRING_SIMPLE("EXCEPT"),
UNICODE_STRING_SIMPLE("_ENCL_"),
+ UNICODE_STRING_SIMPLE("_SAME_BASIC_"),
+ UNICODE_STRING_SIMPLE("no-inline-sets"),
+ UNICODE_STRING_SIMPLE("no-inline-templates"),
+ UNICODE_STRING_SIMPLE("strict-wordforms"),
+ UNICODE_STRING_SIMPLE("strict-baseforms"),
+ UNICODE_STRING_SIMPLE("strict-secondary"),
UNICODE_STRING_SIMPLE("<STREAMCMD:SETVAR:"),
UNICODE_STRING_SIMPLE("<STREAMCMD:REMVAR:")
};
diff --git a/src/Strings.hpp b/src/Strings.hpp
index 6a1115b..1923df6 100644
--- a/src/Strings.hpp
+++ b/src/Strings.hpp
@@ -81,6 +81,9 @@ namespace CG3 {
K_EXTERNAL,
K_EXTERNAL_ONCE,
K_EXTERNAL_ALWAYS,
+ K_OPTIONS,
+ K_STRICT_TAGS,
+ K_REOPEN_MAPPINGS,
K_SUBREADINGS,
KEYWORD_COUNT
};
@@ -155,6 +158,12 @@ namespace CG3 {
S_FROM,
S_EXCEPT,
S_UU_ENCL,
+ S_UU_SAME_BASIC,
+ S_NO_ISETS,
+ S_NO_ITMPLS,
+ S_STRICT_WFORMS,
+ S_STRICT_BFORMS,
+ S_STRICT_SECOND,
S_CMD_SETVAR,
S_CMD_REMVAR,
STRINGS_COUNT
@@ -187,6 +196,7 @@ namespace CG3 {
FL_REVERSE,
FL_SUB,
FL_OUTPUT,
+ FL_CAPTURE_UNIF,
FLAGS_COUNT
};
}
diff --git a/src/Tag.cpp b/src/Tag.cpp
index 4444346..dd8878d 100644
--- a/src/Tag.cpp
+++ b/src/Tag.cpp
@@ -46,255 +46,46 @@ regexp(0)
#endif
}
-Tag::~Tag() {
+Tag::Tag(const Tag& o) :
+comparison_op(o.comparison_op),
+comparison_val(o.comparison_val),
+type(o.type),
+comparison_hash(o.comparison_hash),
+dep_self(o.dep_self),
+dep_parent(o.dep_parent),
+hash(o.hash),
+plain_hash(o.plain_hash),
+number(o.number),
+seed(o.seed),
+tag(o.tag),
+regexp(0)
+{
#ifdef CG_TRACE_OBJECTS
std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
#endif
- if (regexp) {
- uregex_close(regexp);
- regexp = 0;
+ if (o.vs_names) {
+ allocateVsNames();
+ *vs_names.get() = *o.vs_names.get();
}
-}
-
-void Tag::parseTag(const UChar *to, UFILE *ux_stderr, Grammar *grammar) {
- type = 0;
-
- if (to && to[0]) {
- const UChar *tmp = to;
- while (tmp[0] && (tmp[0] == '!' || tmp[0] == '^')) {
- if (tmp[0] == '!' || tmp[0] == '^') {
- type |= T_FAILFAST;
- tmp++;
- }
- }
-
- size_t length = u_strlen(tmp);
- assert(length && "parseTag() will not work with empty strings.");
-
- if (tmp[0] == 'T' && tmp[1] == ':') {
- u_fprintf(ux_stderr, "Warning: Tag %S looks like a misattempt of template usage on line %u.\n", tmp, grammar->lines);
- }
-
- // ToDo: Implement META and VAR
- if (tmp[0] == 'M' && tmp[1] == 'E' && tmp[2] == 'T' && tmp[3] == 'A' && tmp[4] == ':') {
- type |= T_META;
- tmp += 5;
- length -= 5;
- }
- if (tmp[0] == 'V' && tmp[1] == 'A' && tmp[2] == 'R' && tmp[3] == ':') {
- type |= T_VARIABLE;
- tmp += 4;
- length -= 4;
- }
- if (tmp[0] == 'S' && tmp[1] == 'E' && tmp[2] == 'T' && tmp[3] == ':') {
- type |= T_SET;
- tmp += 4;
- length -= 4;
- }
- if (tmp[0] == 'V' && tmp[1] == 'S' && tmp[2] == 'T' && tmp[3] == 'R' && tmp[4] == ':') {
- type |= T_VARSTRING;
- type |= T_VSTR;
- tmp += 5;
-
- tag.assign(tmp);
- if (tag.empty()) {
- u_fprintf(ux_stderr, "Error: Parsing tag %S resulted in an empty tag on line %u - cannot continue!\n", tag.c_str(), grammar->lines);
- CG3Quit(1);
- }
-
- goto label_isVarstring;
- }
-
- if (tmp[0] && (tmp[0] == '"' || tmp[0] == '<' || tmp[0] == '/')) {
- size_t oldlength = length;
-
- // Parse the suffixes r, i, v but max only one of each.
- while (tmp[length-1] == 'i' || tmp[length-1] == 'r' || tmp[length-1] == 'v') {
- if (!(type & T_VARSTRING) && tmp[length-1] == 'v') {
- type |= T_VARSTRING;
- length--;
- continue;
- }
- if (!(type & T_REGEXP) && tmp[length-1] == 'r') {
- type |= T_REGEXP;
- length--;
- continue;
- }
- if (!(type & T_CASE_INSENSITIVE) && tmp[length-1] == 'i') {
- type |= T_CASE_INSENSITIVE;
- length--;
- continue;
- }
- break;
- }
-
- if (tmp[0] == '"' && tmp[length-1] == '"') {
- if (tmp[1] == '<' && tmp[length-2] == '>') {
- type |= T_WORDFORM;
- }
- else {
- type |= T_BASEFORM;
- }
- }
-
- if ((tmp[0] == '"' && tmp[length-1] == '"') || (tmp[0] == '<' && tmp[length-1] == '>') || (tmp[0] == '/' && tmp[length-1] == '/')) {
- type |= T_TEXTUAL;
- }
- else {
- type &= ~T_VARSTRING;
- type &= ~T_REGEXP;
- type &= ~T_CASE_INSENSITIVE;
- type &= ~T_WORDFORM;
- type &= ~T_BASEFORM;
- length = oldlength;
- }
- }
-
- for (size_t i=0, oldlength = length ; tmp[i] != 0 && i < oldlength ; ++i) {
- if (tmp[i] == '\\') {
- ++i;
- --length;
- }
- if (tmp[i] == 0) {
- break;
- }
- tag += tmp[i];
- }
- if (tag.empty()) {
- u_fprintf(ux_stderr, "Error: Parsing tag %S resulted in an empty tag on line %u - cannot continue!\n", tag.c_str(), grammar->lines);
- CG3Quit(1);
- }
-
- foreach (Grammar::regex_tags_t, grammar->regex_tags, iter, iter_end) {
- UErrorCode status = U_ZERO_ERROR;
- uregex_setText(*iter, tag.c_str(), tag.length(), &status);
- if (status != U_ZERO_ERROR) {
- u_fprintf(ux_stderr, "Error: uregex_setText(parseTag) returned %s - cannot continue!\n", u_errorName(status));
- CG3Quit(1);
- }
- status = U_ZERO_ERROR;
- if (uregex_matches(*iter, 0, &status)) {
- type |= T_TEXTUAL;
- }
- }
- foreach (Grammar::icase_tags_t, grammar->icase_tags, iter, iter_end) {
- UErrorCode status = U_ZERO_ERROR;
- if (u_strCaseCompare(tag.c_str(), tag.length(), (*iter)->tag.c_str(), (*iter)->tag.length(), U_FOLD_CASE_DEFAULT, &status) == 0) {
- type |= T_TEXTUAL;
- }
- if (status != U_ZERO_ERROR) {
- u_fprintf(ux_stderr, "Error: u_strCaseCompare(parseTag) returned %s - cannot continue!\n", u_errorName(status));
- CG3Quit(1);
- }
- }
-
- comparison_hash = hash_value(tag);
-
- if (tag[0] == '<' && tag[length-1] == '>') {
- parseNumeric();
- }
-
- if (u_strcmp(tag.c_str(), stringbits[S_ASTERIK].getTerminatedBuffer()) == 0) {
- type |= T_ANY;
- }
- else if (u_strcmp(tag.c_str(), stringbits[S_UU_LEFT].getTerminatedBuffer()) == 0) {
- type |= T_PAR_LEFT;
- }
- else if (u_strcmp(tag.c_str(), stringbits[S_UU_RIGHT].getTerminatedBuffer()) == 0) {
- type |= T_PAR_RIGHT;
- }
- else if (u_strcmp(tag.c_str(), stringbits[S_UU_ENCL].getTerminatedBuffer()) == 0) {
- type |= T_ENCL;
- }
- else if (u_strcmp(tag.c_str(), stringbits[S_UU_TARGET].getTerminatedBuffer()) == 0) {
- type |= T_TARGET;
- }
- else if (u_strcmp(tag.c_str(), stringbits[S_UU_MARK].getTerminatedBuffer()) == 0) {
- type |= T_MARK;
- }
- else if (u_strcmp(tag.c_str(), stringbits[S_UU_ATTACHTO].getTerminatedBuffer()) == 0) {
- type |= T_ATTACHTO;
- }
-
- if (type & T_REGEXP) {
- if (u_strcmp(tag.c_str(), stringbits[S_RXTEXT_ANY].getTerminatedBuffer()) == 0
- || u_strcmp(tag.c_str(), stringbits[S_RXBASE_ANY].getTerminatedBuffer()) == 0
- || u_strcmp(tag.c_str(), stringbits[S_RXWORD_ANY].getTerminatedBuffer()) == 0) {
- // ToDo: Add a case-insensitive version of T_REGEXP_ANY for unification
- type |= T_REGEXP_ANY;
- type &= ~T_REGEXP;
- }
- else {
- UParseError pe;
- UErrorCode status = U_ZERO_ERROR;
-
- UString rt;
- if (tag[0] == '/' && tag[length-1] == '/') {
- rt = tag.substr(1, length-2);
- }
- else {
- rt += '^';
- rt += tag;
- rt += '$';
- }
-
- if (type & T_CASE_INSENSITIVE) {
- regexp = uregex_open(rt.c_str(), rt.length(), UREGEX_CASE_INSENSITIVE, &pe, &status);
- }
- else {
- regexp = uregex_open(rt.c_str(), rt.length(), 0, &pe, &status);
- }
- if (status != U_ZERO_ERROR) {
- u_fprintf(ux_stderr, "Error: uregex_open returned %s trying to parse tag %S on line %u - cannot continue!\n", u_errorName(status), tag.c_str(), grammar->lines);
- CG3Quit(1);
- }
- }
- }
- else if (type & T_CASE_INSENSITIVE) {
- if (tag[0] == '/' && tag[length-1] == '/') {
- tag.resize(tag.size()-1);
- tag.erase(tag.begin());
- }
- }
-
-label_isVarstring:
- if (type & T_VARSTRING) {
- UChar *p = &tag[0];
- UChar *n = 0;
- do {
- SKIPTO(p, '{');
- if (*p) {
- n = p;
- SKIPTO(n, '}');
- if (*n) {
- allocateVsSets();
- allocateVsNames();
- ++p;
- UString theSet(p, n);
- Set *tmp = grammar->parseSet(theSet.c_str());
- vs_sets->push_back(tmp);
- UString old;
- old += '{';
- old += tmp->name;
- old += '}';
- vs_names->push_back(old);
- p = n;
- ++p;
- }
- }
- } while(*p);
- }
+ if (o.vs_sets) {
+ allocateVsSets();
+ *vs_sets.get() = *o.vs_sets.get();
}
-
- type &= ~T_SPECIAL;
- if (type & MASK_TAG_SPECIAL) {
- type |= T_SPECIAL;
+ if (o.regexp) {
+ UErrorCode status = U_ZERO_ERROR;
+ regexp = uregex_clone(o.regexp, &status);
}
+}
+
+Tag::~Tag() {
+ #ifdef CG_TRACE_OBJECTS
+ std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
+ #endif
- if (type & T_VARSTRING && type & (T_REGEXP|T_REGEXP_ANY|T_VARIABLE|T_META)) {
- u_fprintf(ux_stderr, "Error: Tag %S cannot mix varstring with any other special feature on line %u!\n", to, grammar->lines);
- CG3Quit(1);
+ if (regexp) {
+ uregex_close(regexp);
+ regexp = 0;
}
}
@@ -358,7 +149,7 @@ void Tag::parseTagRaw(const UChar *to, Grammar *grammar) {
UChar relname[256];
if (u_sscanf(tag.c_str(), "R:%[^:]:%i", &relname, &dep_parent) == 2 && dep_parent != 0) {
type |= T_RELATION;
- Tag *reltag = grammar->allocateTag(relname, true);
+ Tag *reltag = grammar->allocateTag(relname);
comparison_hash = reltag->hash;
}
}
@@ -536,6 +327,10 @@ UString Tag::toUString(bool escape) const {
str += ':';
}
+ if (type & (T_CASE_INSENSITIVE|T_REGEXP) && tag[0] != '"') {
+ str += '/';
+ }
+
if (escape) {
for (size_t i=0 ; i<tag.length() ; ++i) {
if (tag[i] == '\\' || tag[i] == '(' || tag[i] == ')' || tag[i] == ';' || tag[i] == '#') {
@@ -548,6 +343,9 @@ UString Tag::toUString(bool escape) const {
str + tag;
}
+ if (type & (T_CASE_INSENSITIVE|T_REGEXP) && tag[0] != '"') {
+ str += '/';
+ }
if (type & T_CASE_INSENSITIVE) {
str += 'i';
}
diff --git a/src/Tag.hpp b/src/Tag.hpp
index a42308f..ffed51c 100644
--- a/src/Tag.hpp
+++ b/src/Tag.hpp
@@ -54,7 +54,7 @@ namespace CG3 {
T_BASEFORM = (1 << 6),
T_TEXTUAL = (1 << 7),
T_DEPENDENCY = (1 << 8),
- // 9 unused
+ T_SAME_BASIC = (1 << 9),
T_FAILFAST = (1 << 10),
T_CASE_INSENSITIVE = (1 << 11),
T_REGEXP = (1 << 12),
@@ -73,7 +73,7 @@ namespace CG3 {
T_ENCL = (1 << 25),
T_RELATION = (1 << 26),
- MASK_TAG_SPECIAL = T_ANY|T_TARGET|T_MARK|T_ATTACHTO|T_PAR_LEFT|T_PAR_RIGHT|T_NUMERICAL|T_VARIABLE|T_META|T_FAILFAST|T_CASE_INSENSITIVE|T_REGEXP|T_REGEXP_ANY|T_VARSTRING|T_SET|T_ENCL,
+ MASK_TAG_SPECIAL = T_ANY|T_TARGET|T_MARK|T_ATTACHTO|T_PAR_LEFT|T_PAR_RIGHT|T_NUMERICAL|T_VARIABLE|T_META|T_FAILFAST|T_CASE_INSENSITIVE|T_REGEXP|T_REGEXP_ANY|T_VARSTRING|T_SET|T_ENCL|T_SAME_BASIC,
};
class Tag {
@@ -95,8 +95,8 @@ namespace CG3 {
mutable URegularExpression *regexp;
Tag();
+ Tag(const Tag& o);
~Tag();
- void parseTag(const UChar *to, UFILE *ux_stderr, Grammar *grammar);
void parseTagRaw(const UChar *to, Grammar *grammar);
UString toUString(bool escape = false) const;
@@ -104,8 +104,6 @@ namespace CG3 {
void markUsed();
void allocateVsSets();
void allocateVsNames();
-
- private:
void parseNumeric();
};
@@ -122,11 +120,25 @@ namespace CG3 {
}
};
-
typedef std::list<Tag*> TagList;
typedef std::vector<Tag*> TagVector;
typedef flat_unordered_map<uint32_t,Tag*> Taguint32HashMap;
typedef sorted_vector<Tag*, compare_Tag> TagSortedVector;
+
+ template<typename T>
+ inline void fill_tagvector(const T& in, TagVector& tags, bool& did, bool& special) {
+ boost_foreach(Tag *tag, in) {
+ if (tag->type & T_NUMERICAL) {
+ did = true;
+ }
+ else {
+ if (tag->type & T_SPECIAL) {
+ special = true;
+ }
+ tags.push_back(tag);
+ }
+ }
+ }
}
#endif
diff --git a/src/TagTrie.hpp b/src/TagTrie.hpp
index 1b1950b..0ac7c21 100644
--- a/src/TagTrie.hpp
+++ b/src/TagTrie.hpp
@@ -67,6 +67,28 @@ namespace CG3 {
return true;
}
+ inline trie_t *_trie_copy_helper(const trie_t& trie) {
+ trie_t *nt = new trie_t;
+ boost_foreach (const trie_t::value_type& p, trie) {
+ (*nt)[p.first].terminal = p.second.terminal;
+ if (p.second.trie) {
+ (*nt)[p.first].trie = _trie_copy_helper(*p.second.trie);
+ }
+ }
+ return nt;
+ }
+
+ inline trie_t trie_copy(const trie_t& trie) {
+ trie_t nt;
+ boost_foreach (const trie_t::value_type& p, trie) {
+ nt[p.first].terminal = p.second.terminal;
+ if (p.second.trie) {
+ nt[p.first].trie = _trie_copy_helper(*p.second.trie);
+ }
+ }
+ return nt;
+ }
+
inline void trie_delete(trie_t& trie) {
boost_foreach (trie_t::value_type& p, trie) {
if (p.second.trie) {
diff --git a/src/TextualParser.cpp b/src/TextualParser.cpp
index f35e224..e09b90a 100644
--- a/src/TextualParser.cpp
+++ b/src/TextualParser.cpp
@@ -23,32 +23,39 @@
#include "Strings.hpp"
#include "Grammar.hpp"
#include "ContextualTest.hpp"
+#include "parser_helpers.hpp"
#include <bitset>
namespace CG3 {
-TextualParser::TextualParser(Grammar& res, UFILE *ux_err) {
+TextualParser::TextualParser(Grammar& res, UFILE *ux_err) :
+verbosity_level(0),
+sets_counter(100),
+seen_mapping_prefix(0),
+option_vislcg_compat(false),
+in_section(false),
+in_before_sections(true),
+in_after_sections(false),
+in_null_section(false),
+no_isets(false),
+no_itmpls(false),
+strict_wforms(false),
+strict_bforms(false),
+strict_second(false),
+filename(0),
+locale(0),
+codepage(0),
+error_counter(0)
+{
ux_stderr = ux_err;
result = &res;
- filename = 0;
- locale = 0;
- codepage = 0;
- option_vislcg_compat = false;
- in_before_sections = true;
- in_after_sections = false;
- in_null_section = false;
- in_section = false;
- verbosity_level = 0;
- seen_mapping_prefix = 0;
- error_counter = 0;
- sets_counter = 100;
}
void TextualParser::incErrorCount() {
u_fflush(ux_stderr);
++error_counter;
if (error_counter >= 10) {
- u_fprintf(ux_stderr, "Too many errors - giving up...\n");
+ u_fprintf(ux_stderr, "%s: Too many errors - giving up...\n", filebase);
CG3Quit(1);
}
throw error_counter;
@@ -66,6 +73,85 @@ struct freq_sorter {
}
};
+void TextualParser::error(const char *str) {
+ u_fprintf(ux_stderr, str, filebase, result->lines);
+ incErrorCount();
+}
+
+void TextualParser::error(const char *str, UChar c) {
+ u_fprintf(ux_stderr, str, filebase, c, result->lines);
+ incErrorCount();
+}
+
+void TextualParser::error(const char *str, const UChar *p) {
+ ux_bufcpy(nearbuf, p, 20);
+ u_fprintf(ux_stderr, str, filebase, result->lines, nearbuf);
+ incErrorCount();
+}
+
+void TextualParser::error(const char *str, UChar c, const UChar *p) {
+ ux_bufcpy(nearbuf, p, 20);
+ u_fprintf(ux_stderr, str, filebase, c, result->lines, nearbuf);
+ incErrorCount();
+}
+
+void TextualParser::error(const char *str, const char *s, const UChar *p) {
+ ux_bufcpy(nearbuf, p, 20);
+ u_fprintf(ux_stderr, str, filebase, s, result->lines, nearbuf);
+ incErrorCount();
+}
+
+void TextualParser::error(const char *str, const UChar *s, const UChar *p) {
+ ux_bufcpy(nearbuf, p, 20);
+ u_fprintf(ux_stderr, str, filebase, s, result->lines, nearbuf);
+ incErrorCount();
+}
+
+void TextualParser::error(const char *str, const char *s, const UChar *S, const UChar *p) {
+ ux_bufcpy(nearbuf, p, 20);
+ u_fprintf(ux_stderr, str, filebase, s, S, result->lines, nearbuf);
+ incErrorCount();
+}
+
+Tag *TextualParser::parseTag(const UChar *to, const UChar *p) {
+ Tag *tag = ::CG3::parseTag(to, p, *this);
+ if (!strict_tags.empty() && !strict_tags.count(tag->plain_hash)) {
+ if (tag->type & (T_ANY | T_VARSTRING | T_VSTR | T_META | T_VARIABLE | T_SET | T_PAR_LEFT | T_PAR_RIGHT | T_ENCL | T_TARGET | T_MARK | T_ATTACHTO | T_SAME_BASIC)) {
+ // Always allow...
+ }
+ else if (u_strcmp(tag->tag.c_str(), stringbits[S_BEGINTAG].getTerminatedBuffer()) == 0 || u_strcmp(tag->tag.c_str(), stringbits[S_ENDTAG].getTerminatedBuffer()) == 0) {
+ // Always allow >>> and <<<
+ }
+ else if (tag->type & T_WORDFORM) {
+ if (strict_wforms) {
+ error("%s: Error: Wordform tag %S not on the strict-tags list, on line %u near `%S`!\n", tag->tag.c_str(), p);
+ incErrorCount();
+ }
+ }
+ else if (tag->type & T_BASEFORM) {
+ if (strict_bforms) {
+ error("%s: Error: Baseform tag %S not on the strict-tags list, on line %u near `%S`!\n", tag->tag.c_str(), p);
+ incErrorCount();
+ }
+ }
+ else if (tag->tag[0] == '<' && tag->tag[tag->tag.size()-1] == '>') {
+ if (strict_second) {
+ error("%s: Error: Secondary tag %S not on the strict-tags list, on line %u near `%S`!\n", tag->tag.c_str(), p);
+ incErrorCount();
+ }
+ }
+ else {
+ error("%s: Error: Tag %S not on the strict-tags list, on line %u near `%S`!\n", tag->tag.c_str(), p);
+ incErrorCount();
+ }
+ }
+ return tag;
+}
+
+Tag *TextualParser::addTag(Tag *tag) {
+ return result->addTag(tag);
+}
+
void TextualParser::parseTagList(UChar *& p, Set *s) {
std::set<TagVector> taglists;
bc::flat_map<Tag*, size_t> tag_freq;
@@ -84,22 +170,20 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
n++;
SKIPTO_NOSPAN(n, '"');
if (*n != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
}
}
result->lines += SKIPTOWS(n, ')', true);
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Tag *t = result->allocateTag(&gbuffers[0][0]);
+ Tag *t = parseTag(&gbuffers[0][0], p);
tags.push_back(t);
p = n;
result->lines += SKIPWS(p, ';', ')');
}
if (*p != ')') {
- u_fprintf(ux_stderr, "Error: Missing closing ) on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ) on line %u near `%S`!\n", p);
}
++p;
}
@@ -109,15 +193,14 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
n++;
SKIPTO_NOSPAN(n, '"');
if (*n != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
}
}
result->lines += SKIPTOWS(n, 0, true);
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Tag *t = result->allocateTag(&gbuffers[0][0]);
+ Tag *t = parseTag(&gbuffers[0][0], p);
tags.push_back(t);
p = n;
}
@@ -160,6 +243,10 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
}
}
+Set *TextualParser::parseSet(const UChar *name, const UChar *p) {
+ return ::CG3::parseSet(name, p, *this);
+}
+
Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
uint32Vector set_ops;
uint32Vector sets;
@@ -170,9 +257,13 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
if (*p && *p != ';' && *p != ')') {
if (!wantop) {
if (*p == '(') {
+ if (no_isets && p[1] != '*') {
+ error("%s: Error: Inline set spotted on line %u near `%S`!\n", p);
+ }
// No, this can't just reuse parseTagList() because this will only ever parse a single CompositeTag,
// whereas parseTagList() will handle mixed Tag and CompositeTag
// Doubly so now that parseTagList() will sort+uniq the tags, which we don't want for MAP/ADD/SUBSTITUTE/etc
+ UChar *n = p;
++p;
Set *set_c = result->allocateSet();
set_c->line = result->lines;
@@ -186,35 +277,32 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
n++;
SKIPTO_NOSPAN(n, '"');
if (*n != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
}
}
result->lines += SKIPTOWS(n, ')', true);
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Tag *t = result->allocateTag(&gbuffers[0][0]);
+ Tag *t = parseTag(&gbuffers[0][0], p);
tags.push_back(t);
p = n;
result->lines += SKIPWS(p, ';', ')');
}
if (*p != ')') {
- u_fprintf(ux_stderr, "Error: Missing closing ) on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ) on line %u near `%S`!\n", p);
}
++p;
if (tags.size() == 0) {
- u_fprintf(ux_stderr, "Error: Empty inline set on line %u! Use (*) if you want to replace with nothing.\n", result->lines);
- incErrorCount();
+ error("%s: Error: Empty inline set on line %u near `%S`! Use (*) if you want to replace with nothing.\n", n);
}
else if (tags.size() == 1) {
result->addTagToSet(tags[0], set_c);
}
else {
bool special = false;
- boost_foreach(Tag *tag, tags) {
+ boost_foreach (Tag *tag, tags) {
if (tag->type & T_SPECIAL) {
special = true;
break;
@@ -240,7 +328,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Set *tmp = result->parseSet(&gbuffers[0][0]);
+ Set *tmp = parseSet(&gbuffers[0][0], p);
uint32_t sh = tmp->hash;
sets.push_back(sh);
p = n;
@@ -290,7 +378,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
// Doing this yields a very cheap imperfect form of trie compression, but it's good enough
std::sort(tv.begin(), tv.end(), fs);
bool special = false;
- boost_foreach(Tag *tag, tv) {
+ boost_foreach (Tag *tag, tv) {
if (tag->type & T_SPECIAL) {
special = true;
break;
@@ -329,8 +417,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
}
}
else if (!wantop) {
- u_fprintf(ux_stderr, "Error: Missing set on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected set on line %u near `%S`!\n", p);
}
}
@@ -364,6 +451,8 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
bool negative = false;
bool had_digits = false;
+ UChar *n = p;
+
size_t tries;
for (tries=0 ; *p != ' ' && *p != '(' && *p != '/' && tries < 100 ; ++tries) {
if (*p == '*' && *(p+1) == '*') {
@@ -459,6 +548,10 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
t.pos |= POS_UNKNOWN;
++p;
}
+ if (*p == 'f') {
+ t.pos |= POS_NUMERIC_BRANCH;
+ ++p;
+ }
if (*p == '-') {
negative = true;
++p;
@@ -478,7 +571,7 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Tag *tag = result->allocateTag(&gbuffers[0][0], true);
+ Tag *tag = result->allocateTag(&gbuffers[0][0]);
t.relation = tag->hash;
p = n;
}
@@ -543,64 +636,55 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
t.pos |= POS_DEP_DEEP;
}
- if (tries >= 20) {
- u_fprintf(ux_stderr, "Warning: Position on line %u took many loops.\n", result->lines);
- }
if (tries >= 100) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - caused endless loop!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - caused endless loop!\n", n);
}
- if (!ISSPACE(*p)) {
- UChar op = 0;
- swapper<UChar> swp(true, op, p[16]);
- u_fprintf(ux_stderr, "Error: Garbage data '%S' encountered while parsing contextual position on line %u!\n", p, result->lines);
+ else if (tries >= 20) {
+ ux_bufcpy(nearbuf, n, 20);
+ u_fprintf(ux_stderr, "%s: Warning: Position on line %u near `%S` took many loops.\n", filebase, result->lines, nearbuf);
u_fflush(ux_stderr);
- incErrorCount();
+ }
+ if (!ISSPACE(*p)) {
+ error("%s: Error: Invalid position on line %u near `%S` - garbage data!\n", n);
}
if (had_digits) {
if (t.pos & (POS_DEP_CHILD|POS_DEP_SIBLING|POS_DEP_PARENT)) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - cannot combine offsets with dependency!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - cannot combine offsets with dependency!\n", n);
}
if (t.pos & (POS_LEFT_PAR|POS_RIGHT_PAR)) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - cannot combine offsets with enclosures!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - cannot combine offsets with enclosures!\n", n);
}
if (t.pos & POS_RELATION) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - cannot combine offsets with relations!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - cannot combine offsets with relations!\n", n);
}
}
if ((t.pos & POS_DEP_PARENT) && !(t.pos & POS_DEP_GLOB)) {
if (t.pos & (POS_LEFTMOST|POS_RIGHTMOST)) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - leftmost/rightmost requires ancestor, not parent!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - leftmost/rightmost requires ancestor, not parent!\n", n);
}
}
/*
if ((t.pos & (POS_LEFT_PAR|POS_RIGHT_PAR)) && (t.pos & (POS_SCANFIRST|POS_SCANALL))) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - cannot have both enclosure and scan!\n", result->lines);
- incErrorCount();
+ ux_bufcpy(nearbuf, n, 20);
+ error("%s: Error: Invalid position on line %u near `%S` - cannot have both enclosure and scan!\n", filebase);
}
//*/
if ((t.pos & POS_PASS_ORIGIN) && (t.pos & POS_NO_PASS_ORIGIN)) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - cannot have both O and o!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - cannot have both O and o!\n", n);
}
if ((t.pos & POS_LEFT_PAR) && (t.pos & POS_RIGHT_PAR)) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - cannot have both L and R!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - cannot have both L and R!\n", n);
}
if ((t.pos & POS_ALL) && (t.pos & POS_NONE)) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - cannot have both NONE and ALL!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - cannot have both NONE and ALL!\n", n);
}
if ((t.pos & POS_UNKNOWN) && (t.pos != POS_UNKNOWN || had_digits)) {
- u_fprintf(ux_stderr, "Error: Invalid position on line %u - '?' cannot be combined with anything else!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Invalid position on line %u near `%S` - '?' cannot be combined with anything else!\n", n);
}
if ((t.pos & POS_SCANALL) && (t.pos & POS_NOT)) {
- u_fprintf(ux_stderr, "Warning: Line %u: We don't think mixing NOT and ** makes sense...\n", result->lines);
+ ux_bufcpy(nearbuf, n, 20);
+ u_fprintf(ux_stderr, "%s: Warning: Line %u near `%S`: We don't think mixing NOT and ** makes sense...\n", filebase, result->lines, nearbuf);
+ u_fflush(ux_stderr);
}
if (t.pos > POS_64BIT) {
@@ -644,12 +728,14 @@ ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
if (ux_isEmpty(&gbuffers[0][0])) {
+ if (no_itmpls) {
+ error("%s: Error: Inline template spotted on line %u near `%S`!\n", p);
+ }
p = n;
pos_p = p;
for (;;) {
if (*p != '(') {
- u_fprintf(ux_stderr, "Error: Expected '(' but found '%C' on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Expected '(' but found '%C' on line %u near `%S`!\n", *p, p);
}
++p;
ContextualTest *ored = parseContextualTestList(p, rule);
@@ -665,16 +751,14 @@ ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
result->lines += SKIPWS(p);
}
if (t->ors.size() == 1 && verbosity_level > 0) {
- UChar oldp = *p;
- *p = 0;
+ uncond_swap<UChar> swp(*p, 0);
if (t->ors.front()->ors.size() < 2) {
- u_fprintf(ux_stderr, "Warning: Inline templates only make sense if you OR them on line %u at %S.\n", result->lines, pos_p);
+ u_fprintf(ux_stderr, "%s: Warning: Inline templates only make sense if you OR them on line %u at %S.\n", filebase, result->lines, pos_p);
}
else {
- u_fprintf(ux_stderr, "Warning: Inline templates do not need () around the whole expression on line %u at %S.\n", result->lines, pos_p);
+ u_fprintf(ux_stderr, "%s: Warning: Inline templates do not need () around the whole expression on line %u at %S.\n", filebase, result->lines, pos_p);
}
u_fflush(ux_stderr);
- *p = oldp;
}
}
else if (gbuffers[0][0] == '[') {
@@ -696,8 +780,7 @@ ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
result->lines += SKIPWS(p);
}
if (*p != ']') {
- u_fprintf(ux_stderr, "Error: Expected ']' but found '%C' on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Expected ']' but found '%C' on line %u near `%S`!\n", *p, p);
}
++p;
}
@@ -753,11 +836,9 @@ label_parseTemplateRef:
result->lines += SKIPWS(p);
if ((t->barrier || t->cbarrier) && !(t->pos & MASK_POS_SCAN)) {
- UChar oldp = *p;
- *p = 0;
- u_fprintf(ux_stderr, "Warning: Barriers only make sense for scanning tests on line %u at %S.\n", result->lines, pos_p);
+ uncond_swap<UChar> swp(*p, 0);
+ u_fprintf(ux_stderr, "%s: Warning: Barriers only make sense for scanning tests on line %u at %S.\n", filebase, result->lines, pos_p);
u_fflush(ux_stderr);
- *p = oldp;
t->barrier = 0;
t->cbarrier = 0;
}
@@ -766,8 +847,7 @@ label_parseTemplateRef:
bool linked = false;
result->lines += SKIPWS(p);
if (ux_simplecasecmp(p, stringbits[S_AND].getTerminatedBuffer(), stringbits[S_AND].length())) {
- u_fprintf(ux_stderr, "Error: 'AND' is deprecated; use 'LINK 0' or operator '+' instead. Found on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: 'AND' is deprecated; use 'LINK 0' or operator '+' instead. Found on line %u near `%S`!\n", p);
}
if (ux_simplecasecmp(p, stringbits[S_LINK].getTerminatedBuffer(), stringbits[S_LINK].length())) {
p += stringbits[S_LINK].length();
@@ -777,8 +857,7 @@ label_parseTemplateRef:
if (linked) {
if (t->pos & POS_NONE) {
- u_fprintf(ux_stderr, "Error: It does not make sense to LINK from a NONE test; perhaps you meant NOT or NEGATE on line %u?\n", result->lines);
- incErrorCount();
+ error("%s: Error: It does not make sense to LINK from a NONE test; perhaps you meant NOT or NEGATE on line %u near `%S`?\n", p);
}
t->linked = parseContextualTestList(p, rule);
}
@@ -832,15 +911,14 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
n++;
SKIPTO_NOSPAN(n, '"');
if (*n != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", lp);
}
}
result->lines += SKIPTOWS(n, 0, true);
ptrdiff_t c = n - lp;
u_strncpy(&gbuffers[0][0], lp, c);
gbuffers[0][c] = 0;
- Tag *wform = result->allocateTag(&gbuffers[0][0]);
+ Tag *wform = parseTag(&gbuffers[0][0], lp);
rule->wordform = wform;
}
@@ -855,7 +933,9 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
if (!gbuffers[0][0]) {
- u_fprintf(ux_stderr, "Warning: Rule on line %u had : but no name.\n", result->lines);
+ ux_bufcpy(nearbuf, p, 20);
+ u_fprintf(ux_stderr, "%s: Warning: Rule on line %u near `%S` had : but no name.\n", filebase, result->lines, nearbuf);
+ u_fflush(ux_stderr);
}
else {
rule->setName(&gbuffers[0][0]);
@@ -874,8 +954,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
rule->type = K_EXTERNAL_ALWAYS;
}
else {
- u_fprintf(ux_stderr, "Error: Missing keyword ONCE or ALWAYS on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected keyword ONCE or ALWAYS on line %u near `%S`!\n", p);
}
result->lines += SKIPWS(p);
@@ -885,8 +964,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
++n;
SKIPTO_NOSPAN(n, '"');
if (*n != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
}
}
result->lines += SKIPTOWS(n, 0, true);
@@ -900,11 +978,12 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
gbuffers[0][c] = 0;
}
- Tag *ext = result->allocateTag(&gbuffers[0][0], true);
+ Tag *ext = result->allocateTag(&gbuffers[0][0]);
rule->varname = ext->hash;
p = n;
}
+ lp = p;
bool setflag = true;
while (setflag) {
setflag = false;
@@ -915,7 +994,10 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
rule->flags |= (1 << i);
setflag = true;
- if (*p == ':') {
+ if (i == FL_SUB) {
+ if (*p != ':') {
+ goto undo_flag;
+ }
++p;
UChar *n = p;
result->lines += SKIPTOWS(n, 0, true);
@@ -923,13 +1005,17 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
p = n;
- if (i == FL_SUB) {
+ if (gbuffers[0][0] == '*') {
+ rule->sub_reading = GSR_ANY;
+ }
+ else {
u_sscanf(&gbuffers[0][0], "%d", &rule->sub_reading);
}
}
// Rule flags followed by letters or valid set characters should not be flags.
if (*p != '(' && !ISSPACE(*p)) {
+ undo_flag:
rule->flags &= ~(1 << i);
p = op;
setflag = false;
@@ -947,41 +1033,32 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
if (rule->flags & MASK_ENCL) {
std::bitset<sizeof(rule->flags)*CHAR_BIT> bits(static_cast<uint64_t>(rule->flags & MASK_ENCL));
if (bits.count() > 1) {
- u_fprintf(ux_stderr, "Error: Line %u: ENCL_* are all mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: ENCL_* are all mutually exclusive!\n", lp);
}
}
if (rule->flags & RF_KEEPORDER && rule->flags & RF_VARYORDER) {
- u_fprintf(ux_stderr, "Error: Line %u: KEEPORDER and VARYORDER are mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: KEEPORDER and VARYORDER are mutually exclusive!\n", lp);
}
if (rule->flags & RF_REMEMBERX && rule->flags & RF_RESETX) {
- u_fprintf(ux_stderr, "Error: Line %u: REMEMBERX and RESETX are mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: REMEMBERX and RESETX are mutually exclusive!\n", lp);
}
if (rule->flags & RF_NEAREST && rule->flags & RF_ALLOWLOOP) {
- u_fprintf(ux_stderr, "Error: Line %u: NEAREST and ALLOWLOOP are mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: NEAREST and ALLOWLOOP are mutually exclusive!\n", lp);
}
if (rule->flags & RF_UNSAFE && rule->flags & RF_SAFE) {
- u_fprintf(ux_stderr, "Error: Line %u: SAFE and UNSAFE are mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: SAFE and UNSAFE are mutually exclusive!\n", lp);
}
if (rule->flags & RF_UNMAPLAST && rule->flags & RF_SAFE) {
- u_fprintf(ux_stderr, "Error: Line %u: SAFE and UNMAPLAST are mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: SAFE and UNMAPLAST are mutually exclusive!\n", lp);
}
if (rule->flags & RF_DELAYED && rule->flags & RF_IMMEDIATE) {
- u_fprintf(ux_stderr, "Error: Line %u: IMMEDIATE and DELAYED are mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: IMMEDIATE and DELAYED are mutually exclusive!\n", lp);
}
if (rule->flags & RF_WITHCHILD && rule->flags & RF_NOCHILD) {
- u_fprintf(ux_stderr, "Error: Line %u: WITHCHILD and NOCHILD are mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: WITHCHILD and NOCHILD are mutually exclusive!\n", lp);
}
if (rule->flags & RF_ITERATE && rule->flags & RF_NOITERATE) {
- u_fprintf(ux_stderr, "Error: Line %u: ITERATE and NOITERATE are mutually exclusive!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Line %u near `%S`: ITERATE and NOITERATE are mutually exclusive!\n", lp);
}
if (!(rule->flags & (RF_ITERATE|RF_NOITERATE))) {
@@ -1012,48 +1089,46 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
rule->childset1 = 0;
}
+ lp = p;
if (key == K_SUBSTITUTE || key == K_EXECUTE) {
+ swapper_false swp(no_isets, no_isets);
Set *s = parseSetInlineWrapper(p);
s->reindex(*result);
rule->sublist = s;
if (s->empty()) {
- u_fprintf(ux_stderr, "Error: Empty substitute set on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Empty substitute set on line %u near `%S`!\n", lp);
}
if (s->trie.empty() && s->trie_special.empty() && !(s->type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY))) {
- u_fprintf(ux_stderr, "Error: Substitute set on line %u was neither unified nor of LIST type!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Substitute set on line %u near `%S` was neither unified nor of LIST type!\n", lp);
}
}
result->lines += SKIPWS(p);
+ lp = p;
if (key == K_MAP || key == K_ADD || key == K_REPLACE || key == K_APPEND || key == K_SUBSTITUTE || key == K_COPY
|| key == K_ADDRELATIONS || key == K_ADDRELATION
|| key == K_SETRELATIONS || key == K_SETRELATION
|| key == K_REMRELATIONS || key == K_REMRELATION
|| key == K_SETVARIABLE || key == K_REMVARIABLE
|| key == K_ADDCOHORT || key == K_JUMP) {
+ swapper_false swp(no_isets, no_isets);
Set *s = parseSetInlineWrapper(p);
s->reindex(*result);
rule->maplist = s;
if (s->empty()) {
- u_fprintf(ux_stderr, "Error: Empty mapping set on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Empty mapping set on line %u near `%S`!\n", lp);
}
if (s->trie.empty() && s->trie_special.empty() && !(s->type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY))) {
- u_fprintf(ux_stderr, "Error: Mapping set on line %u was neither unified nor of LIST type!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Mapping set on line %u near `%S` was neither unified nor of LIST type!\n", lp);
}
if (key == K_APPEND && !s->getNonEmpty().empty()) {
if (!(s->getNonEmpty().begin()->first->type & T_BASEFORM)) {
- u_fprintf(ux_stderr, "Error: There must be a baseform before any other tags in APPEND on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: There must be a baseform before any other tags in APPEND on line %u near `%S`!\n", lp);
}
}
if (key == K_ADDCOHORT && !s->getNonEmpty().empty()) {
if (!(s->getNonEmpty().begin()->first->type & T_WORDFORM)) {
- u_fprintf(ux_stderr, "Error: There must be a wordform before any other tags in ADDCOHORT on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: There must be a wordform before any other tags in ADDCOHORT on line %u near `%S`!\n", lp);
}
}
}
@@ -1065,17 +1140,17 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
}
result->lines += SKIPWS(p);
+ lp = p;
if (key == K_ADDRELATIONS || key == K_SETRELATIONS || key == K_REMRELATIONS || key == K_SETVARIABLE || copy_except) {
+ swapper_false swp(no_isets, no_isets);
Set *s = parseSetInlineWrapper(p);
s->reindex(*result);
rule->sublist = s;
if (s->empty()) {
- u_fprintf(ux_stderr, "Error: Empty relation set on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Empty relation set on line %u near `%S`!\n", lp);
}
if (s->trie.empty() && s->trie_special.empty() && !(s->type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY))) {
- u_fprintf(ux_stderr, "Error: Relation/Value set on line %u was neither unified nor of LIST type!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Relation/Value set on line %u near `%S` was neither unified nor of LIST type!\n", lp);
}
}
@@ -1089,8 +1164,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
rule->type = K_ADDCOHORT_BEFORE;
}
else {
- u_fprintf(ux_stderr, "Error: Missing position keyword AFTER or BEFORE on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected position keyword AFTER or BEFORE on line %u near `%S`!\n", p);
}
}
@@ -1115,8 +1189,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
parseContextualTests(p, rule);
result->lines += SKIPWS(p);
if (*p != ')') {
- u_fprintf(ux_stderr, "Error: Missing closing ) on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ) on line %u near `%S`! Probably caused by missing set operator.\n", p);
}
++p;
result->lines += SKIPWS(p);
@@ -1138,8 +1211,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
rule->type = K_MOVE_BEFORE;
}
else {
- u_fprintf(ux_stderr, "Error: Missing movement keyword AFTER or BEFORE on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected movement keyword AFTER or BEFORE on line %u near `%S`!\n", p);
}
}
else if (key == K_SWITCH) {
@@ -1147,8 +1219,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
p += stringbits[S_WITH].length();
}
else {
- u_fprintf(ux_stderr, "Error: Missing movement keyword WITH on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected movement keyword WITH on line %u near `%S`!\n", p);
}
}
else {
@@ -1160,8 +1231,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
rule->flags |= RF_REVERSE;
}
else {
- u_fprintf(ux_stderr, "Error: Missing dependency keyword TO or FROM on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected dependency keyword TO or FROM on line %u near `%S`!\n", p);
}
}
result->lines += SKIPWS(p);
@@ -1181,21 +1251,20 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
}
}
+ lp = p;
while (*p && *p == '(') {
++p;
result->lines += SKIPWS(p);
parseContextualDependencyTests(p, rule);
result->lines += SKIPWS(p);
if (*p != ')') {
- u_fprintf(ux_stderr, "Error: Missing closing ) on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ) on line %u near `%S`! Probably caused by missing set operator.\n", p);
}
++p;
result->lines += SKIPWS(p);
}
if (rule->dep_tests.empty()) {
- u_fprintf(ux_stderr, "Error: Missing dependency target on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected dependency target on line %u near `%S`!\n", lp);
}
rule->dep_target = rule->dep_tests.back();
rule->dep_tests.pop_back();
@@ -1227,7 +1296,8 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
}
}
if (found) {
- u_fprintf(ux_stderr, "Warning: Rule on line %u had 'x' in the first part of a contextual test, but no REMEMBERX flag.\n", result->lines);
+ u_fprintf(ux_stderr, "%s: Warning: Rule on line %u had 'x' in the first part of a contextual test, but no REMEMBERX flag.\n", filebase, result->lines);
+ u_fflush(ux_stderr);
}
}
@@ -1245,19 +1315,19 @@ void TextualParser::parseAnchorish(UChar *& p) {
p = n;
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; on line %u after anchor/section name!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; on line %u near `%S` after anchor/section name!\n", p);
}
}
int TextualParser::parseFromUChar(UChar *input, const char *fname) {
if (!input || !input[0]) {
- u_fprintf(ux_stderr, "Error: Input is empty - cannot continue!\n");
+ u_fprintf(ux_stderr, "%s: Error: Input is empty - cannot continue!\n", fname);
CG3Quit(1);
}
UChar *p = input;
result->lines = 1;
+ filebase = basename(const_cast<char*>(fname));
while (*p) {
try {
@@ -1271,8 +1341,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
&& ISCHR(*(p+7),'E','e') && ISCHR(*(p+8),'R','r')
&& !ISSTRING(p, 9)) {
if (result->delimiters) {
- u_fprintf(ux_stderr, "Error: Cannot redefine DELIMITERS on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Cannot redefine DELIMITERS on line %u near `%S`!\n", p);
}
result->delimiters = result->allocateSet();
result->delimiters->line = result->lines;
@@ -1280,20 +1349,17 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p += 10;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
parseTagList(p, result->delimiters);
result->addSet(result->delimiters);
if (result->delimiters->trie.empty() && result->delimiters->trie_special.empty()) {
- u_fprintf(ux_stderr, "Error: DELIMITERS declared, but no definitions given, on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: DELIMITERS declared, but no definitions given, on line %u near `%S`!\n", p);
}
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
}
// SOFT-DELIMITERS
@@ -1304,8 +1370,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
&& ISCHR(*(p+12),'E','e') && ISCHR(*(p+13),'R','r')
&& !ISSTRING(p, 14)) {
if (result->soft_delimiters) {
- u_fprintf(ux_stderr, "Error: Cannot redefine SOFT-DELIMITERS on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Cannot redefine SOFT-DELIMITERS on line %u near `%S`!\n", p);
}
result->soft_delimiters = result->allocateSet();
result->soft_delimiters->line = result->lines;
@@ -1313,20 +1378,17 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p += 15;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
parseTagList(p, result->soft_delimiters);
result->addSet(result->soft_delimiters);
if (result->soft_delimiters->trie.empty() && result->soft_delimiters->trie_special.empty()) {
- u_fprintf(ux_stderr, "Error: SOFT-DELIMITERS declared, but no definitions given, on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: SOFT-DELIMITERS declared, but no definitions given, on line %u near `%S`!\n", p);
}
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
}
// MAPPING-PREFIX
@@ -1338,7 +1400,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
&& !ISSTRING(p, 13)) {
if (seen_mapping_prefix) {
- u_fprintf(ux_stderr, "Error: MAPPING-PREFIX on line %u cannot change previous prefix set on line %u!\n", result->lines, seen_mapping_prefix);
+ u_fprintf(ux_stderr, "%s: Error: MAPPING-PREFIX on line %u cannot change previous prefix set on line %u!\n", filebase, result->lines, seen_mapping_prefix);
incErrorCount();
}
seen_mapping_prefix = result->lines;
@@ -1346,8 +1408,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p += 14;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
result->lines += SKIPWS(p);
@@ -1362,13 +1423,11 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
result->mapping_prefix = gbuffers[0][0];
if (!result->mapping_prefix) {
- u_fprintf(ux_stderr, "Error: MAPPING-PREFIX declared, but no definitions given, on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: MAPPING-PREFIX declared, but no definitions given, on line %u near `%S`!\n", p);
}
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
}
// PREFERRED-TARGETS
@@ -1381,8 +1440,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p += 17;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
result->lines += SKIPWS(p);
@@ -1393,28 +1451,66 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
n++;
SKIPTO_NOSPAN(n, '"');
if (*n != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
}
}
result->lines += SKIPTOWS(n, ';', true);
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Tag *t = result->allocateTag(&gbuffers[0][0]);
+ Tag *t = parseTag(&gbuffers[0][0], p);
result->preferred_targets.push_back(t->hash);
p = n;
result->lines += SKIPWS(p);
}
if (result->preferred_targets.empty()) {
- u_fprintf(ux_stderr, "Error: PREFERRED-TARGETS declared, but no definitions given, on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: PREFERRED-TARGETS declared, but no definitions given, on line %u near `%S`!\n", p);
}
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ }
+ // REOPEN-MAPPINGS
+ else if (ISCHR(*p, 'R', 'r') && ISCHR(*(p + 14), 'S', 's') && ISCHR(*(p + 1), 'E', 'e') && ISCHR(*(p + 2), 'O', 'o')
+ && ISCHR(*(p + 3), 'P', 'p') && ISCHR(*(p + 4), 'E', 'e') && ISCHR(*(p + 5), 'N', 'n') && ISCHR(*(p + 6), '-', '_')
+ && ISCHR(*(p + 7), 'M', 'm') && ISCHR(*(p + 8), 'A', 'a') && ISCHR(*(p + 9), 'P', 'p') && ISCHR(*(p + 10), 'P', 'p')
+ && ISCHR(*(p + 11), 'I', 'i') && ISCHR(*(p + 12), 'N', 'n') && ISCHR(*(p + 13), 'G', 'g')
+ && !ISSTRING(p, 14)) {
+ p += 15;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ result->lines += SKIPWS(p);
+
+ while (*p && *p != ';') {
+ UChar *n = p;
+ if (*n == '"') {
+ n++;
+ SKIPTO_NOSPAN(n, '"');
+ if (*n != '"') {
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ }
+ }
+ result->lines += SKIPTOWS(n, ';', true);
+ ptrdiff_t c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ Tag *t = parseTag(&gbuffers[0][0], p);
+ result->reopen_mappings.insert(t->hash);
+ p = n;
+ result->lines += SKIPWS(p);
+ }
+
+ if (result->reopen_mappings.empty()) {
+ error("%s: Error: REOPEN-MAPPINGS declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
}
// STATIC-SETS
@@ -1425,8 +1521,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p += 11;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
result->lines += SKIPWS(p);
@@ -1434,20 +1529,17 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
while (*p && *p != ';') {
UChar *n = p;
result->lines += SKIPTOWS(n, ';', true);
- const UString s(p, n);
- result->static_sets.push_back(s);
+ result->static_sets.push_back(UString(p, n));
p = n;
result->lines += SKIPWS(p);
}
if (result->static_sets.empty()) {
- u_fprintf(ux_stderr, "Error: STATIC-SETS declared, but no definitions given, on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: STATIC-SETS declared, but no definitions given, on line %u near `%S`!\n", p);
}
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
}
// ADDRELATIONS
@@ -1563,8 +1655,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p = n;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
parseTagList(p, s);
@@ -1572,19 +1663,17 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
Set *tmp = result->getSet(s->hash);
if (tmp) {
if (verbosity_level > 0 && tmp->name[0] != '_' && tmp->name[1] != 'G' && tmp->name[2] != '_') {
- u_fprintf(ux_stderr, "Warning: LIST %S was defined twice with the same contents: Lines %u and %u.\n", s->name.c_str(), tmp->line, s->line);
+ u_fprintf(ux_stderr, "%s: Warning: LIST %S was defined twice with the same contents: Lines %u and %u.\n", filebase, s->name.c_str(), tmp->line, s->line);
u_fflush(ux_stderr);
}
}
result->addSet(s);
if (s->empty()) {
- u_fprintf(ux_stderr, "Error: LIST %S declared, but no definitions given, on line %u!\n", s->name.c_str(), result->lines);
- incErrorCount();
+ error("%s: Error: LIST %S declared, but no definitions given, on line %u near `%S`!\n", s->name.c_str(), p);
}
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
}
// SET
@@ -1607,38 +1696,39 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p = n;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
+
+ swapper_false swp(no_isets, no_isets);
+
parseSetInline(p, s);
s->rehash();
Set *tmp = result->getSet(s->hash);
if (tmp) {
if (verbosity_level > 0 && tmp->name[0] != '_' && tmp->name[1] != 'G' && tmp->name[2] != '_') {
- u_fprintf(ux_stderr, "Warning: SET %S was defined twice with the same contents: Lines %u and %u.\n", s->name.c_str(), tmp->line, s->line);
+ u_fprintf(ux_stderr, "%s: Warning: SET %S was defined twice with the same contents: Lines %u and %u.\n", filebase, s->name.c_str(), tmp->line, s->line);
u_fflush(ux_stderr);
}
}
else if (s->sets.size() == 1 && !(s->type & ST_TAG_UNIFY)) {
tmp = result->getSet(s->sets.back());
if (verbosity_level > 0) {
- u_fprintf(ux_stderr, "Warning: Set %S (L:%u) has been aliased to %S (L:%u).\n", s->name.c_str(), s->line, tmp->name.c_str(), tmp->line);
+ u_fprintf(ux_stderr, "%s: Warning: Set %S on line %u aliased to %S on line %u.\n", filebase, s->name.c_str(), s->line, tmp->name.c_str(), tmp->line);
u_fflush(ux_stderr);
}
+ result->maybe_used_sets.insert(tmp);
result->set_alias[sh] = tmp->hash;
result->destroySet(s);
s = tmp;
}
result->addSet(s);
if (s->empty()) {
- u_fprintf(ux_stderr, "Error: SET %S declared, but no definitions given, on line %u!\n", s->name.c_str(), result->lines);
- incErrorCount();
+ error("%s: Error: SET %S declared, but no definitions given, on line %u near `%S`!\n", s->name.c_str(), p);
}
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u! Probably caused by missing set operator.\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`! Probably caused by missing set operator.\n", p);
}
}
// MAPPINGS
@@ -1778,8 +1868,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p += 11;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
result->lines += SKIPWS(p);
@@ -1790,18 +1879,100 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
result->sub_readings_ltr = false;
}
else {
- u_fprintf(ux_stderr, "Error: Missing RTL or LTR on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Expected RTL or LTR on line %u near `%S`!\n", *p, p);
}
UChar *n = p;
result->lines += SKIPTOWS(n, 0, true);
p = n;
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ }
+ // OPTIONS
+ else if (ISCHR(*p, 'O', 'o') && ISCHR(*(p + 6), 'S', 's') && ISCHR(*(p + 1), 'P', 'p') && ISCHR(*(p + 2), 'T', 't')
+ && ISCHR(*(p + 3), 'I', 'i') && ISCHR(*(p + 4), 'O', 'o') && ISCHR(*(p + 5), 'N', 'n')
+ && !ISSTRING(p, 6)) {
+ p += 7;
+ result->lines += SKIPWS(p, '+');
+ if (p[0] != '+' || p[1] != '=') {
+ error("%s: Error: Encountered a %C before the expected += on line %u near `%S`!\n", *p, p);
+ }
+ p += 2;
+ result->lines += SKIPWS(p);
+
+ typedef std::pair<size_t, bool&> pairs_t;
+ pairs_t pairs[] = {
+ { S_NO_ISETS, no_isets },
+ { S_NO_ITMPLS, no_itmpls },
+ { S_STRICT_WFORMS, strict_wforms },
+ { S_STRICT_BFORMS, strict_bforms },
+ { S_STRICT_SECOND, strict_second },
+ };
+
+ while (*p != ';') {
+ bool found = false;
+ boost_foreach(pairs_t& pair, pairs) {
+ if (ux_simplecasecmp(p, stringbits[pair.first].getTerminatedBuffer(), stringbits[pair.first].length())) {
+ p += stringbits[pair.first].length();
+ pair.second = true;
+ result->lines += SKIPWS(p);
+ found = true;
+ }
+ }
+ if (!found) {
+ error("%s: Error: Invalid option found on line %u near `%S`!\n", p);
+ }
+ }
+
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
}
+ // STRICT-TAGS
+ else if (ISCHR(*p, 'S', 's') && ISCHR(*(p + 10), 'S', 's') && ISCHR(*(p + 1), 'T', 't') && ISCHR(*(p + 2), 'R', 'r')
+ && ISCHR(*(p + 3), 'I', 'i') && ISCHR(*(p + 4), 'C', 'c') && ISCHR(*(p + 5), 'T', 't')
+ && ISCHR(*(p + 6), '-', '-') && ISCHR(*(p + 7), 'T', 't') && ISCHR(*(p + 8), 'A', 'a') && ISCHR(*(p + 9), 'G', 'g')
+ && !ISSTRING(p, 10)) {
+ p += 11;
+ result->lines += SKIPWS(p, '+');
+ if (p[0] != '+' || p[1] != '=') {
+ error("%s: Error: Encountered a %C before the expected += on line %u near `%S`!\n", *p, p);
+ }
+ p += 2;
+ result->lines += SKIPWS(p);
+
+ uint32SortedVector tmp;
+ strict_tags.swap(tmp);
+ while (*p && *p != ';') {
+ UChar *n = p;
+ if (*n == '"') {
+ n++;
+ SKIPTO_NOSPAN(n, '"');
+ if (*n != '"') {
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ }
+ }
+ result->lines += SKIPTOWS(n, ';', true);
+ ptrdiff_t c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ Tag *t = parseTag(&gbuffers[0][0], p);
+ tmp.insert(t->hash);
+ p = n;
+ result->lines += SKIPWS(p);
+ }
+
+ if (tmp.empty()) {
+ error("%s: Error: STRICT-TAGS declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ strict_tags.swap(tmp);
+ }
// ANCHOR
else if (ISCHR(*p,'A','a') && ISCHR(*(p+5),'R','r') && ISCHR(*(p+1),'N','n') && ISCHR(*(p+2),'C','c')
&& ISCHR(*(p+3),'H','h') && ISCHR(*(p+4),'O','o')
@@ -1821,12 +1992,10 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- uint32_t olines = result->lines;
p = n;
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
UErrorCode err = U_ZERO_ERROR;
@@ -1851,7 +2020,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
}
if (error != 0) {
- u_fprintf(ux_stderr, "Error: Cannot stat %s due to error %d - bailing out!\n", abspath.c_str(), error);
+ u_fprintf(ux_stderr, "%s: Error: Cannot stat %s due to error %d - bailing out!\n", filebase, abspath.c_str(), error);
CG3Quit(1);
}
else {
@@ -1860,7 +2029,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
UFILE *grammar = u_fopen(abspath.c_str(), "rb", locale, codepage);
if (!grammar) {
- u_fprintf(ux_stderr, "Error: Error opening %s for reading!\n", abspath.c_str());
+ u_fprintf(ux_stderr, "%s: Error: Error opening %s for reading!\n", filebase, abspath.c_str());
CG3Quit(1);
}
UChar32 bom = u_fgetcx(grammar);
@@ -1872,14 +2041,17 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
uint32_t read = u_file_read(&data[4], grammar_size*2, grammar);
u_fclose(grammar);
if (read >= grammar_size*2-1) {
- u_fprintf(ux_stderr, "Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n");
+ u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n", filebase);
CG3Quit(1);
}
data.resize(read+4+1);
- parseFromUChar(&data[4], abspath.c_str());
+ uint32_t olines = 0;
+ swapper<uint32_t> oswap(true, olines, result->lines);
+ const char *obase = 0;
+ swapper<const char*> bswap(true, obase, filebase);
- result->lines = olines;
+ parseFromUChar(&data[4], abspath.c_str());
}
// IFF
else if (ISCHR(*p,'I','i') && ISCHR(*(p+2),'F','f') && ISCHR(*(p+1),'F','f')
@@ -1982,19 +2154,19 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p = n;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
+ swapper_false swp(no_itmpls, no_itmpls);
+
ContextualTest *t = parseContextualTestList(p);
t->line = line;
result->addTemplate(t, name.c_str());
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u! Probably caused by missing set operator.\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`! Probably caused by missing set operator.\n", p);
}
}
// PARENTHESES
@@ -2005,8 +2177,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
p += 11;
result->lines += SKIPWS(p, '=');
if (*p != '=') {
- u_fprintf(ux_stderr, "Error: Encountered a %C before the expected = on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
}
++p;
result->lines += SKIPWS(p);
@@ -2018,8 +2189,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
UChar *n = p;
result->lines += SKIPTOWS(n, '(', true);
if (*n != '(') {
- u_fprintf(ux_stderr, "Error: Encountered %C before the expected ( on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered %C before the expected ( on line %u near `%S`!\n", *p, p);
}
n++;
result->lines += SKIPWS(n);
@@ -2028,42 +2198,38 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
n++;
SKIPTO_NOSPAN(n, '"');
if (*n != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
}
}
result->lines += SKIPTOWS(n, ')', true);
c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- left = result->allocateTag(&gbuffers[0][0]);
+ left = parseTag(&gbuffers[0][0], p);
result->lines += SKIPWS(n);
p = n;
if (*p == ')') {
- u_fprintf(ux_stderr, "Error: Encountered ) before the expected Right tag on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Encountered ) before the expected Right tag on line %u near `%S`!\n", p);
}
if (*n == '"') {
n++;
SKIPTO_NOSPAN(n, '"');
if (*n != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
}
}
result->lines += SKIPTOWS(n, ')', true);
c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- right = result->allocateTag(&gbuffers[0][0]);
+ right = parseTag(&gbuffers[0][0], p);
result->lines += SKIPWS(n);
p = n;
if (*p != ')') {
- u_fprintf(ux_stderr, "Error: Encountered %C before the expected ) on line %u!\n", *p, result->lines);
- incErrorCount();
+ error("%s: Error: Encountered %C before the expected ) on line %u near `%S`!\n", *p, p);
}
++p;
result->lines += SKIPWS(p);
@@ -2075,13 +2241,11 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
}
if (result->parentheses.empty()) {
- u_fprintf(ux_stderr, "Error: PARENTHESES declared, but no definitions given, on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: PARENTHESES declared, but no definitions given, on line %u near `%S`!\n", p);
}
result->lines += SKIPWS(p, ';');
if (*p != ';') {
- u_fprintf(ux_stderr, "Error: Missing closing ; before line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
}
// END
@@ -2095,24 +2259,19 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
}
// No keyword found at this position, skip a character.
else {
- // For some strange reason, '<' was explicitly allowed to exist without a purpose...
- // I cannot recall why, so removed that since it caused line counting errors.
+ UChar *n = p;
if (*p == ';' || *p == '"') {
if (*p == '"') {
++p;
SKIPTO_NOSPAN(p, '"');
if (*p != '"') {
- u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
- incErrorCount();
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", n);
}
}
result->lines += SKIPTOWS(p);
}
if (*p && *p != ';' && *p != '"' && !ISNL(*p) && !ISSPACE(*p)) {
- UChar op = 0;
- swapper<UChar> swp(true, op, p[16]);
- u_fprintf(ux_stderr, "Error: Garbage data '%S...' encountered on line %u!\n", p, result->lines);
- incErrorCount();
+ error("%s: Error: Garbage data encountered on line %u near `%S`!\n", p);
}
if (ISNL(*p)) {
result->lines += 1;
@@ -2124,17 +2283,18 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
result->lines += SKIPLN(p);
}
}
-
+
return 0;
}
int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, const char *cpage) {
filename = fname;
+ filebase = basename(const_cast<char*>(fname));
locale = loc;
codepage = cpage;
if (!result) {
- u_fprintf(ux_stderr, "Error: Cannot parse into nothing - hint: call setResult() before trying.\n");
+ u_fprintf(ux_stderr, "%s: Error: Cannot parse into nothing - hint: call setResult() before trying.\n", filebase);
CG3Quit(1);
}
@@ -2142,7 +2302,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
int error = stat(filename, &_stat);
if (error != 0) {
- u_fprintf(ux_stderr, "Error: Cannot stat %s due to error %d - bailing out!\n", filename, error);
+ u_fprintf(ux_stderr, "%s: Error: Cannot stat %s due to error %d - bailing out!\n", filebase, filename, error);
CG3Quit(1);
}
else {
@@ -2151,7 +2311,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
UFILE *grammar = u_fopen(filename, "rb", locale, codepage);
if (!grammar) {
- u_fprintf(ux_stderr, "Error: Error opening %s for reading!\n", filename);
+ u_fprintf(ux_stderr, "%s: Error: Error opening %s for reading!\n", filebase, filename);
CG3Quit(1);
}
UChar32 bom = u_fgetcx(grammar);
@@ -2164,7 +2324,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
uint32_t read = u_file_read(&data[4], result->grammar_size*2, grammar);
u_fclose(grammar);
if (read >= result->grammar_size*2-1) {
- u_fprintf(ux_stderr, "Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n");
+ u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n", filebase);
CG3Quit(1);
}
data.resize(read+4+1);
@@ -2173,7 +2333,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
// Allocate the magic * tag
{
- Tag *tany = result->allocateTag(stringbits[S_ASTERIK].getTerminatedBuffer());
+ Tag *tany = parseTag(stringbits[S_ASTERIK].getTerminatedBuffer());
result->tag_any = tany->hash;
}
// Create the dummy set
@@ -2183,7 +2343,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
Set *set_c = result->allocateSet();
set_c->line = 0;
set_c->setName(stringbits[S_UU_TARGET].getTerminatedBuffer());
- Tag *t = result->allocateTag(stringbits[S_UU_TARGET].getTerminatedBuffer());
+ Tag *t = parseTag(stringbits[S_UU_TARGET].getTerminatedBuffer());
result->addTagToSet(t, set_c);
result->addSet(set_c);
}
@@ -2192,7 +2352,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
Set *set_c = result->allocateSet();
set_c->line = 0;
set_c->setName(stringbits[S_UU_MARK].getTerminatedBuffer());
- Tag *t = result->allocateTag(stringbits[S_UU_MARK].getTerminatedBuffer());
+ Tag *t = parseTag(stringbits[S_UU_MARK].getTerminatedBuffer());
result->addTagToSet(t, set_c);
result->addSet(set_c);
}
@@ -2201,7 +2361,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
Set *set_c = result->allocateSet();
set_c->line = 0;
set_c->setName(stringbits[S_UU_ATTACHTO].getTerminatedBuffer());
- Tag *t = result->allocateTag(stringbits[S_UU_ATTACHTO].getTerminatedBuffer());
+ Tag *t = parseTag(stringbits[S_UU_ATTACHTO].getTerminatedBuffer());
result->addTagToSet(t, set_c);
result->addSet(set_c);
}
@@ -2211,7 +2371,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
Set *set_c = s_left = result->allocateSet();
set_c->line = 0;
set_c->setName(stringbits[S_UU_LEFT].getTerminatedBuffer());
- Tag *t = result->allocateTag(stringbits[S_UU_LEFT].getTerminatedBuffer());
+ Tag *t = parseTag(stringbits[S_UU_LEFT].getTerminatedBuffer());
result->addTagToSet(t, set_c);
result->addSet(set_c);
}
@@ -2221,7 +2381,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
Set *set_c = s_right = result->allocateSet();
set_c->line = 0;
set_c->setName(stringbits[S_UU_RIGHT].getTerminatedBuffer());
- Tag *t = result->allocateTag(stringbits[S_UU_RIGHT].getTerminatedBuffer());
+ Tag *t = parseTag(stringbits[S_UU_RIGHT].getTerminatedBuffer());
result->addTagToSet(t, set_c);
result->addSet(set_c);
}
@@ -2230,7 +2390,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
Set *set_c = result->allocateSet();
set_c->line = 0;
set_c->setName(stringbits[S_UU_ENCL].getTerminatedBuffer());
- Tag *t = result->allocateTag(stringbits[S_UU_ENCL].getTerminatedBuffer());
+ Tag *t = parseTag(stringbits[S_UU_ENCL].getTerminatedBuffer());
result->addTagToSet(t, set_c);
result->addSet(set_c);
}
@@ -2244,6 +2404,15 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
set_c->sets.push_back(s_right->hash);
result->addSet(set_c);
}
+ // Create the magic set _SAME_BASIC_ containing the tag _SAME_BASIC_
+ {
+ Set *set_c = result->allocateSet();
+ set_c->line = 0;
+ set_c->setName(stringbits[S_UU_SAME_BASIC].getTerminatedBuffer());
+ Tag *t = parseTag(stringbits[S_UU_SAME_BASIC].getTerminatedBuffer());
+ result->addTagToSet(t, set_c);
+ result->addSet(set_c);
+ }
error = parseFromUChar(&data[4], filename);
if (error) {
@@ -2258,16 +2427,97 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
}
}
+ boost_foreach (Tag *tag, result->single_tags_list) {
+ if (!(tag->type & T_VARSTRING)) {
+ continue;
+ }
+ UChar *p = &tag->tag[0];
+ UChar *n = 0;
+ do {
+ SKIPTO(p, '{');
+ if (*p) {
+ n = p;
+ SKIPTO(n, '}');
+ if (*n) {
+ tag->allocateVsSets();
+ tag->allocateVsNames();
+ ++p;
+ UString theSet(p, n);
+ Set *tmp = parseSet(theSet.c_str(), p);
+ tag->vs_sets->push_back(tmp);
+ UString old;
+ old += '{';
+ old += tmp->name;
+ old += '}';
+ tag->vs_names->push_back(old);
+ p = n;
+ ++p;
+ }
+ }
+ } while (*p);
+ }
+
const_foreach (deferred_t, deferred_tmpls, it, it_end) {
uint32_t cn = hash_value(it->second.second);
if (result->templates.find(cn) == result->templates.end()) {
- u_fprintf(ux_stderr, "Error: Unknown template '%S' referenced on line %u!\n", it->second.second.c_str(), it->second.first);
+ u_fprintf(ux_stderr, "%s: Error: Unknown template '%S' referenced on line %u!\n", filebase, it->second.second.c_str(), it->second.first);
++error_counter;
continue;
}
it->first->tmpl = result->templates.find(cn)->second;
}
+ bc::flat_map<uint32_t,uint32_t> sets;
+ for (BOOST_AUTO(cntx, result->contexts.begin()); cntx != result->contexts.end(); ) {
+ if (cntx->second->pos & POS_NUMERIC_BRANCH) {
+ ContextualTest *unsafec = cntx->second;
+ result->contexts.erase(cntx);
+
+ if (sets.find(unsafec->target) == sets.end()) {
+ sets[unsafec->target] = result->removeNumericTags(unsafec->target);
+ }
+ unsafec->pos &= ~POS_NUMERIC_BRANCH;
+
+ ContextualTest *safec = result->allocateContextualTest();
+ copy_cntx(unsafec, safec);
+
+ safec->pos |= POS_CAREFUL;
+ safec->target = sets[unsafec->target];
+
+ ContextualTest *tmp = unsafec;
+ unsafec = result->addContextualTest(unsafec);
+ safec = result->addContextualTest(safec);
+
+ ContextualTest *orc = result->allocateContextualTest();
+ orc->ors.push_back(safec);
+ orc->ors.push_back(unsafec);
+ orc = result->addContextualTest(orc);
+
+ for (BOOST_AUTO(cntx, result->contexts.begin()); cntx != result->contexts.end(); ++cntx) {
+ if (cntx->second->linked == tmp) {
+ cntx->second->linked = orc;
+ }
+ }
+ for (BOOST_AUTO(it, result->rule_by_number.begin()); it != result->rule_by_number.end(); ++it) {
+ if ((*it)->dep_target == tmp) {
+ (*it)->dep_target = orc;
+ }
+ ContextList *cntxs[2] = { &(*it)->tests, &(*it)->dep_tests };
+ for (size_t i = 0; i < 2; ++i) {
+ boost_foreach (ContextualTest *& test, *cntxs[i]) {
+ if (test == tmp) {
+ test = orc;
+ }
+ }
+ }
+ }
+ cntx = result->contexts.begin();
+ }
+ else {
+ ++cntx;
+ }
+ }
+
return error_counter;
}
diff --git a/src/TextualParser.hpp b/src/TextualParser.hpp
index 4ba6c07..dcb65c5 100644
--- a/src/TextualParser.hpp
+++ b/src/TextualParser.hpp
@@ -25,10 +25,12 @@
#include "IGrammarParser.hpp"
#include "Strings.hpp"
+#include "sorted_vector.hpp"
namespace CG3 {
class Rule;
class Set;
+ class Tag;
class ContextualTest;
class TextualParser : public IGrammarParser {
@@ -40,12 +42,26 @@ namespace CG3 {
int parse_grammar_from_file(const char *filename, const char *locale, const char *codepage);
+ void error(const char *str);
+ void error(const char *str, UChar c);
+ void error(const char *str, const UChar *p);
+ void error(const char *str, UChar c, const UChar *p);
+ void error(const char *str, const char *s, const UChar *p);
+ void error(const char *str, const UChar *s, const UChar *p);
+ void error(const char *str, const char *s, const UChar *S, const UChar *p);
+ Tag *addTag(Tag *tag);
+ Grammar *get_grammar() { return result; }
+ const char *filebase;
+ uint32SortedVector strict_tags;
+
private:
+ UChar nearbuf[32];
uint32_t verbosity_level;
uint32_t sets_counter;
uint32_t seen_mapping_prefix;
bool option_vislcg_compat;
bool in_section, in_before_sections, in_after_sections, in_null_section;
+ bool no_isets, no_itmpls, strict_wforms, strict_bforms, strict_second;
const char *filename;
const char *locale;
const char *codepage;
@@ -56,7 +72,9 @@ namespace CG3 {
int parseFromUChar(UChar *input, const char *fname = 0);
void addRuleToGrammar(Rule *rule);
+ Tag *parseTag(const UChar *to, const UChar *p = 0);
void parseTagList(UChar *& p, Set *s);
+ Set *parseSet(const UChar *name, const UChar *p = 0);
Set *parseSetInline(UChar *& p, Set *s = 0);
Set *parseSetInlineWrapper(UChar *& p);
void parseContextualTestPosition(UChar *& p, ContextualTest& t);
diff --git a/src/all_cg_comp.cpp b/src/all_cg_comp.cpp
index f003882..16d3940 100644
--- a/src/all_cg_comp.cpp
+++ b/src/all_cg_comp.cpp
@@ -9,6 +9,7 @@
#include "Tag.cpp"
#include "TextualParser.cpp"
#include "uextras.cpp"
+#include "BinaryGrammar_read_10043.cpp"
#include "BinaryGrammar_read.cpp"
#include "BinaryGrammar_write.cpp"
#include "cg_comp.cpp"
diff --git a/src/all_cg_conv.cpp b/src/all_cg_conv.cpp
index 393f2e9..18ea5b9 100644
--- a/src/all_cg_conv.cpp
+++ b/src/all_cg_conv.cpp
@@ -9,6 +9,7 @@
#include "Tag.cpp"
#include "TextualParser.cpp"
#include "uextras.cpp"
+#include "BinaryGrammar_read_10043.cpp"
#include "BinaryGrammar_read.cpp"
#include "BinaryGrammar_write.cpp"
#include "GrammarApplicator_runGrammar.cpp"
diff --git a/src/all_cg_proc.cpp b/src/all_cg_proc.cpp
index f838c3f..2741a44 100644
--- a/src/all_cg_proc.cpp
+++ b/src/all_cg_proc.cpp
@@ -9,6 +9,7 @@
#include "Tag.cpp"
#include "TextualParser.cpp"
#include "uextras.cpp"
+#include "BinaryGrammar_read_10043.cpp"
#include "BinaryGrammar_read.cpp"
#include "BinaryGrammar_write.cpp"
#include "GrammarApplicator_runGrammar.cpp"
diff --git a/src/all_vislcg3.cpp b/src/all_vislcg3.cpp
index 174d815..1975496 100644
--- a/src/all_vislcg3.cpp
+++ b/src/all_vislcg3.cpp
@@ -9,6 +9,7 @@
#include "Tag.cpp"
#include "TextualParser.cpp"
#include "uextras.cpp"
+#include "BinaryGrammar_read_10043.cpp"
#include "BinaryGrammar_read.cpp"
#include "BinaryGrammar_write.cpp"
#include "GrammarApplicator_runGrammar.cpp"
diff --git a/src/cg_comp.cpp b/src/cg_comp.cpp
index e1c28b1..a19cc32 100644
--- a/src/cg_comp.cpp
+++ b/src/cg_comp.cpp
@@ -26,7 +26,9 @@
#include "BinaryGrammar.hpp"
#include "GrammarApplicator.hpp"
+#ifndef _WIN32
#include <libgen.h>
+#endif
#include "version.hpp"
diff --git a/src/cg_conv.cpp b/src/cg_conv.cpp
index 0114cb6..0513aa9 100644
--- a/src/cg_conv.cpp
+++ b/src/cg_conv.cpp
@@ -199,6 +199,26 @@ int main(int argc, char *argv[]) {
ucnv_close(conv);
grammar.mapping_prefix = buf[0];
}
+ if (options[SUB_DELIMITER].doesOccur) {
+ size_t sn = strlen(options[SUB_DELIMITER].value);
+ applicator.sub_delims.resize(sn*2);
+ UConverter *conv = ucnv_open(codepage_default, &status);
+ sn = ucnv_toUChars(conv, &applicator.sub_delims[0], applicator.sub_delims.size(), options[SUB_DELIMITER].value, sn, &status);
+ applicator.sub_delims.resize(sn);
+ applicator.sub_delims += '+';
+ ucnv_close(conv);
+ }
+ if (options[FST_WTAG].doesOccur) {
+ size_t sn = strlen(options[FST_WTAG].value);
+ applicator.wtag.resize(sn * 2);
+ UConverter *conv = ucnv_open(codepage_default, &status);
+ sn = ucnv_toUChars(conv, &applicator.wtag[0], applicator.wtag.size(), options[FST_WTAG].value, sn, &status);
+ applicator.wtag.resize(sn);
+ ucnv_close(conv);
+ }
+ if (options[FST_WFACTOR].doesOccur) {
+ applicator.wfactor = strtof(options[FST_WFACTOR].value, 0);
+ }
applicator.setOutputFormat(CG3::FMT_CG);
@@ -212,6 +232,7 @@ int main(int argc, char *argv[]) {
applicator.setOutputFormat(CG3::FMT_PLAIN);
}
+ applicator.is_conv = true;
applicator.verbosity_level = 0;
applicator.runGrammarOnText(*instream.get(), ux_stdout);
diff --git a/src/cg_proc.cpp b/src/cg_proc.cpp
index 1c8ec57..195dcc1 100644
--- a/src/cg_proc.cpp
+++ b/src/cg_proc.cpp
@@ -26,7 +26,9 @@
#include "GrammarApplicator.hpp"
#include <getopt.h>
+#ifndef _WIN32
#include <libgen.h>
+#endif
#include "version.hpp"
diff --git a/src/inlines.hpp b/src/inlines.hpp
index 3d09171..51d52fb 100644
--- a/src/inlines.hpp
+++ b/src/inlines.hpp
@@ -339,13 +339,13 @@ inline void insert_if_exists(IT& cont, const OT* other) {
}
}
-template<typename T>
-inline void writeRaw(std::ostream& stream, const T& value) {
+template<typename S, typename T>
+inline void writeRaw(S& stream, const T& value) {
stream.write(reinterpret_cast<const char*>(&value), sizeof(T));
}
-template<typename T>
-inline void readRaw(std::istream& stream, T& value) {
+template<typename S, typename T>
+inline void readRaw(S& stream, T& value) {
stream.read(reinterpret_cast<char*>(&value), sizeof(T));
}
@@ -368,7 +368,8 @@ inline void writeUTF8String(std::ostream& output, const UString& str) {
writeUTF8String(output, str.c_str(), str.length());
}
-inline UString readUTF8String(std::istream& input) {
+template<typename S>
+inline UString readUTF8String(S& input) {
uint16_t len = 0;
readRaw(input, len);
@@ -508,6 +509,36 @@ private:
T& b;
};
+class swapper_false {
+public:
+ swapper_false(bool cond, bool& b) :
+ val(false),
+ swp(cond, val, b)
+ {}
+
+private:
+ bool val;
+ swapper<bool> swp;
+};
+
+template<typename T>
+class uncond_swap {
+public:
+ uncond_swap(T& a, T b) :
+ a_(a),
+ b_(b)
+ {
+ std::swap(a_, b_);
+ }
+
+ ~uncond_swap() {
+ std::swap(a_, b_);
+ }
+private:
+ T& a_;
+ T b_;
+};
+
template<typename T>
inline T* reverse(T *head) {
T *nr = 0;
@@ -525,6 +556,22 @@ inline void erase(Cont& cont, const T& val) {
cont.erase(std::remove(cont.begin(), cont.end(), val), cont.end());
}
+inline size_t fread_throw(void *buffer, size_t size, size_t count, FILE *stream) {
+ size_t rv = ::fread(buffer, size, count, stream);
+ if (rv != count) {
+ throw std::runtime_error("fread() did not read all requested objects");
+ }
+ return rv;
+}
+
+inline size_t fwrite_throw(const void *buffer, size_t size, size_t count, FILE *stream) {
+ size_t rv = ::fwrite(buffer, size, count, stream);
+ if (rv != count) {
+ throw std::runtime_error("fwrite() did not write all requested objects");
+ }
+ return rv;
+}
+
}
#endif
diff --git a/src/main.cpp b/src/main.cpp
index 2b9359c..bfa2428 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -53,9 +53,6 @@ int main(int argc, char* argv[]) {
if (options[VERSION].doesOccur || options[HELP1].doesOccur || options[HELP2].doesOccur) {
out = stdout;
- }
-
- if (options[VERBOSE].doesOccur || options[VERSION].doesOccur || options[HELP1].doesOccur || options[HELP2].doesOccur) {
// Keep the invocation vislcg3 --version | grep -Eo '[0-9]+$' holy so that it only outputs the revision regardless of other flags.
fprintf(out, "VISL CG-3 Disambiguator version %u.%u.%u.%u\n", CG3_VERSION_MAJOR, CG3_VERSION_MINOR, CG3_VERSION_PATCH, CG3_REVISION);
}
@@ -109,6 +106,7 @@ int main(int argc, char* argv[]) {
return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
+ fflush(out);
fflush(stderr);
if (options[SHOW_UNUSED_SETS].doesOccur || options[SHOW_SET_HASHES].doesOccur) {
@@ -121,6 +119,13 @@ int main(int argc, char* argv[]) {
}
}
+ if (options[QUIET].doesOccur) {
+ options[VERBOSE].doesOccur = false;
+ }
+ if (options[VERBOSE].doesOccur && options[VERBOSE].value && strcmp(options[VERBOSE].value, "0") == 0) {
+ options[VERBOSE].doesOccur = false;
+ }
+
/* Initialize ICU */
u_init(&status);
if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
@@ -265,13 +270,17 @@ int main(int argc, char* argv[]) {
size_t sn = strlen(options[MAPPING_PREFIX].value);
CG3::UString buf(sn*3, 0);
ucnv_toUChars(conv, &buf[0], buf.size(), options[MAPPING_PREFIX].value, sn, &status);
+ if (grammar.is_binary && grammar.mapping_prefix != buf[0]) {
+ std::cerr << "Error: Mapping prefix must match the one used for compiling the binary grammar!" << std::endl;
+ CG3Quit(1);
+ }
grammar.mapping_prefix = buf[0];
ucnv_close(conv);
}
if (options[VERBOSE].doesOccur) {
std::cerr << "Reindexing grammar..." << std::endl;
}
- grammar.reindex(options[SHOW_UNUSED_SETS].doesOccur == 1);
+ grammar.reindex(options[SHOW_UNUSED_SETS].doesOccur == 1, options[SHOW_TAGS].doesOccur == 1);
delete parser;
parser = 0;
@@ -561,6 +570,9 @@ void GAppSetOpts(CG3::GrammarApplicator& applicator, UConverter *conv) {
if (options[NO_PASS_ORIGIN].doesOccur) {
applicator.no_pass_origin = true;
}
+ if (options[SPLIT_MAPPINGS].doesOccur) {
+ applicator.split_mappings = true;
+ }
if (options[SHOW_END_TAGS].doesOccur) {
applicator.show_end_tags = true;
}
diff --git a/src/options.hpp b/src/options.hpp
index ca31c5b..c44332d 100644
--- a/src/options.hpp
+++ b/src/options.hpp
@@ -42,6 +42,7 @@ namespace Options {
RULE,
DODEBUG,
VERBOSE,
+ QUIET,
VISLCGCOMPAT,
STDIN,
STDOUT,
@@ -78,76 +79,81 @@ namespace Options {
DEP_BLOCK_CROSSING,
MAGIC_READINGS,
NO_PASS_ORIGIN,
+ SPLIT_MAPPINGS,
SHOW_END_TAGS,
SHOW_UNUSED_SETS,
+ SHOW_TAGS,
SHOW_TAG_HASHES,
SHOW_SET_HASHES,
NUM_OPTIONS
};
UOption options[]= {
- UOPTION_DEF_D("help", 'h', UOPT_NO_ARG, "shows this help"),
- UOPTION_DEF_D("?", '?', UOPT_NO_ARG, "shows this help"),
- UOPTION_DEF_D("version", 'V', UOPT_NO_ARG, "prints copyright and version information"),
- UOPTION_DEF_D("min-binary-revision", 0, UOPT_NO_ARG, "prints the minimum usable binary grammar revision"),
- UOPTION_DEF_D("grammar", 'g', UOPT_REQUIRES_ARG, "specifies the grammar file to use for disambiguation"),
- UOPTION_DEF_D("grammar-out", 0, UOPT_REQUIRES_ARG, "writes the compiled grammar in textual form to a file"),
- UOPTION_DEF_D("grammar-bin", 0, UOPT_REQUIRES_ARG, "writes the compiled grammar in binary form to a file"),
- UOPTION_DEF_D("grammar-only", 0, UOPT_NO_ARG, "only compiles the grammar; implies --verbose"),
- UOPTION_DEF_D("ordered", 0, UOPT_NO_ARG, "(will in future allow full ordered matching)"),
- UOPTION_DEF_D("unsafe", 'u', UOPT_NO_ARG, "allows the removal of all readings in a cohort, even the last one"),
- UOPTION_DEF_D("sections", 's', UOPT_REQUIRES_ARG, "number or ranges of sections to run; defaults to all sections"),
- UOPTION_DEF_D("rules", 0, UOPT_REQUIRES_ARG, "number or ranges of rules to run; defaults to all rules"),
- UOPTION_DEF_D("rule", 0, UOPT_REQUIRES_ARG, "a name or number of a single rule to run"),
- UOPTION_DEF_D("debug", 'd', UOPT_OPTIONAL_ARG, "enables debug output (very noisy)"),
- UOPTION_DEF_D("verbose", 'v', UOPT_OPTIONAL_ARG, "increases verbosity"),
- UOPTION_DEF_D("vislcg-compat", '2', UOPT_NO_ARG, "enables compatibility mode for older CG-2 and vislcg grammars"),
+ UOPTION_DEF_D("help", 'h', UOPT_NO_ARG, "shows this help"),
+ UOPTION_DEF_D("?", '?', UOPT_NO_ARG, "shows this help"),
+ UOPTION_DEF_D("version", 'V', UOPT_NO_ARG, "prints copyright and version information"),
+ UOPTION_DEF_D("min-binary-revision", 0, UOPT_NO_ARG, "prints the minimum usable binary grammar revision"),
+ UOPTION_DEF_D("grammar", 'g', UOPT_REQUIRES_ARG, "specifies the grammar file to use for disambiguation"),
+ UOPTION_DEF_D("grammar-out", 0, UOPT_REQUIRES_ARG, "writes the compiled grammar in textual form to a file"),
+ UOPTION_DEF_D("grammar-bin", 0, UOPT_REQUIRES_ARG, "writes the compiled grammar in binary form to a file"),
+ UOPTION_DEF_D("grammar-only", 0, UOPT_NO_ARG, "only compiles the grammar; implies --verbose"),
+ UOPTION_DEF_D("ordered", 0, UOPT_NO_ARG, "(will in future allow full ordered matching)"),
+ UOPTION_DEF_D("unsafe", 'u', UOPT_NO_ARG, "allows the removal of all readings in a cohort, even the last one"),
+ UOPTION_DEF_D("sections", 's', UOPT_REQUIRES_ARG, "number or ranges of sections to run; defaults to all sections"),
+ UOPTION_DEF_D("rules", 0, UOPT_REQUIRES_ARG, "number or ranges of rules to run; defaults to all rules"),
+ UOPTION_DEF_D("rule", 0, UOPT_REQUIRES_ARG, "a name or number of a single rule to run"),
+ UOPTION_DEF_D("debug", 'd', UOPT_OPTIONAL_ARG, "enables debug output (very noisy)"),
+ UOPTION_DEF_D("verbose", 'v', UOPT_OPTIONAL_ARG, "increases verbosity"),
+ UOPTION_DEF_D("quiet", 0, UOPT_NO_ARG, "squelches warnings (same as -v 0)"),
+ UOPTION_DEF_D("vislcg-compat", '2', UOPT_NO_ARG, "enables compatibility mode for older CG-2 and vislcg grammars"),
- UOPTION_DEF_D("stdin", 'I', UOPT_REQUIRES_ARG, "file to read input from instead of stdin"),
- UOPTION_DEF_D("stdout", 'O', UOPT_REQUIRES_ARG, "file to print output to instead of stdout"),
- UOPTION_DEF_D("stderr", 'E', UOPT_REQUIRES_ARG, "file to print errors to instead of stderr"),
+ UOPTION_DEF_D("stdin", 'I', UOPT_REQUIRES_ARG, "file to read input from instead of stdin"),
+ UOPTION_DEF_D("stdout", 'O', UOPT_REQUIRES_ARG, "file to print output to instead of stdout"),
+ UOPTION_DEF_D("stderr", 'E', UOPT_REQUIRES_ARG, "file to print errors to instead of stderr"),
- UOPTION_DEF_D("codepage-all", 'C', UOPT_REQUIRES_ARG, "codepage to use for grammar, input, and output streams; defaults to UTF-8"),
- UOPTION_DEF_D("codepage-grammar", 0, UOPT_REQUIRES_ARG, "codepage to use for grammar; overrides --codepage-all"),
- UOPTION_DEF_D("codepage-input", 0, UOPT_REQUIRES_ARG, "codepage to use for input; overrides --codepage-all"),
- UOPTION_DEF_D("codepage-output", 0, UOPT_REQUIRES_ARG, "codepage to use for output and errors; overrides --codepage-all"),
+ UOPTION_DEF_D("codepage-all", 'C', UOPT_REQUIRES_ARG, "codepage to use for grammar, input, and output streams; defaults to UTF-8"),
+ UOPTION_DEF_D("codepage-grammar", 0, UOPT_REQUIRES_ARG, "codepage to use for grammar; overrides --codepage-all"),
+ UOPTION_DEF_D("codepage-input", 0, UOPT_REQUIRES_ARG, "codepage to use for input; overrides --codepage-all"),
+ UOPTION_DEF_D("codepage-output", 0, UOPT_REQUIRES_ARG, "codepage to use for output and errors; overrides --codepage-all"),
- UOPTION_DEF_D("no-mappings", 0, UOPT_NO_ARG, "disables all MAP, ADD, and REPLACE rules"),
- UOPTION_DEF_D("no-corrections", 0, UOPT_NO_ARG, "disables all SUBSTITUTE and APPEND rules"),
- UOPTION_DEF_D("no-before-sections", 0, UOPT_NO_ARG, "disables all rules in BEFORE-SECTIONS parts"),
- UOPTION_DEF_D("no-sections", 0, UOPT_NO_ARG, "disables all rules in SECTION parts"),
- UOPTION_DEF_D("no-after-sections", 0, UOPT_NO_ARG, "disables all rules in AFTER-SECTIONS parts"),
+ UOPTION_DEF_D("no-mappings", 0, UOPT_NO_ARG, "disables all MAP, ADD, and REPLACE rules"),
+ UOPTION_DEF_D("no-corrections", 0, UOPT_NO_ARG, "disables all SUBSTITUTE and APPEND rules"),
+ UOPTION_DEF_D("no-before-sections", 0, UOPT_NO_ARG, "disables all rules in BEFORE-SECTIONS parts"),
+ UOPTION_DEF_D("no-sections", 0, UOPT_NO_ARG, "disables all rules in SECTION parts"),
+ UOPTION_DEF_D("no-after-sections", 0, UOPT_NO_ARG, "disables all rules in AFTER-SECTIONS parts"),
- UOPTION_DEF_D("trace", 't', UOPT_NO_ARG, "prints debug output alongside with normal output"),
- UOPTION_DEF_D("trace-name-only", 0, UOPT_NO_ARG, "if a rule is named, omit the line number; implies --trace"),
- UOPTION_DEF_D("trace-no-removed", 0, UOPT_NO_ARG, "does not print removed readings; implies --trace"),
- UOPTION_DEF_D("trace-encl", 0, UOPT_NO_ARG, "traces which enclosure pass is currently happening; implies --trace"),
+ UOPTION_DEF_D("trace", 't', UOPT_NO_ARG, "prints debug output alongside with normal output"),
+ UOPTION_DEF_D("trace-name-only", 0, UOPT_NO_ARG, "if a rule is named, omit the line number; implies --trace"),
+ UOPTION_DEF_D("trace-no-removed", 0, UOPT_NO_ARG, "does not print removed readings; implies --trace"),
+ UOPTION_DEF_D("trace-encl", 0, UOPT_NO_ARG, "traces which enclosure pass is currently happening; implies --trace"),
- UOPTION_DEF_D("dry-run", 0, UOPT_NO_ARG, "make no actual changes to the input"),
- UOPTION_DEF_D("single-run", 0, UOPT_NO_ARG, "runs each section only once; same as --max-runs 1"),
- UOPTION_DEF_D("max-runs", 0, UOPT_REQUIRES_ARG, "runs each section max N times; defaults to unlimited (0)"),
- UOPTION_DEF_D("statistics", 'S', UOPT_NO_ARG, "gathers profiling statistics while applying grammar"),
- UOPTION_DEF_D("optimize-unsafe", 'Z', UOPT_NO_ARG, "destructively optimize the profiled grammar to be faster"),
- UOPTION_DEF_D("optimize-safe", 'z', UOPT_NO_ARG, "conservatively optimize the profiled grammar to be faster"),
- UOPTION_DEF_D("prefix", 'p', UOPT_REQUIRES_ARG, "sets the mapping prefix; defaults to @"),
- UOPTION_DEF_D("unicode-tags", 0, UOPT_NO_ARG, "outputs Unicode code points for things like ->"),
- UOPTION_DEF_D("unique-tags", 0, UOPT_NO_ARG, "outputs unique tags only once per reading"),
+ UOPTION_DEF_D("dry-run", 0, UOPT_NO_ARG, "make no actual changes to the input"),
+ UOPTION_DEF_D("single-run", 0, UOPT_NO_ARG, "runs each section only once; same as --max-runs 1"),
+ UOPTION_DEF_D("max-runs", 0, UOPT_REQUIRES_ARG, "runs each section max N times; defaults to unlimited (0)"),
+ UOPTION_DEF_D("statistics", 'S', UOPT_NO_ARG, "gathers profiling statistics while applying grammar"),
+ UOPTION_DEF_D("optimize-unsafe", 'Z', UOPT_NO_ARG, "destructively optimize the profiled grammar to be faster"),
+ UOPTION_DEF_D("optimize-safe", 'z', UOPT_NO_ARG, "conservatively optimize the profiled grammar to be faster"),
+ UOPTION_DEF_D("prefix", 'p', UOPT_REQUIRES_ARG, "sets the mapping prefix; defaults to @"),
+ UOPTION_DEF_D("unicode-tags", 0, UOPT_NO_ARG, "outputs Unicode code points for things like ->"),
+ UOPTION_DEF_D("unique-tags", 0, UOPT_NO_ARG, "outputs unique tags only once per reading"),
- UOPTION_DEF_D("num-windows", 0, UOPT_REQUIRES_ARG, "number of windows to keep in before/ahead buffers; defaults to 2"),
- UOPTION_DEF_D("always-span", 0, UOPT_NO_ARG, "forces scanning tests to always span across window boundaries"),
- UOPTION_DEF_D("soft-limit", 0, UOPT_REQUIRES_ARG, "number of cohorts after which the SOFT-DELIMITERS kick in; defaults to 300"),
- UOPTION_DEF_D("hard-limit", 0, UOPT_REQUIRES_ARG, "number of cohorts after which the window is forcefully cut; defaults to 500"),
- UOPTION_DEF_D("dep-delimit", 'D', UOPT_OPTIONAL_ARG, "delimit windows based on dependency instead of DELIMITERS; defaults to 10"),
- UOPTION_DEF_D("dep-original", 0, UOPT_NO_ARG, "outputs the original input dependency tag even if it is no longer valid"),
- UOPTION_DEF_D("dep-allow-loops", 0, UOPT_NO_ARG, "allows the creation of circular dependencies"),
- UOPTION_DEF_D("dep-no-crossing", 0, UOPT_NO_ARG, "prevents the creation of dependencies that would result in crossing branches"),
+ UOPTION_DEF_D("num-windows", 0, UOPT_REQUIRES_ARG, "number of windows to keep in before/ahead buffers; defaults to 2"),
+ UOPTION_DEF_D("always-span", 0, UOPT_NO_ARG, "forces scanning tests to always span across window boundaries"),
+ UOPTION_DEF_D("soft-limit", 0, UOPT_REQUIRES_ARG, "number of cohorts after which the SOFT-DELIMITERS kick in; defaults to 300"),
+ UOPTION_DEF_D("hard-limit", 0, UOPT_REQUIRES_ARG, "number of cohorts after which the window is forcefully cut; defaults to 500"),
+ UOPTION_DEF_D("dep-delimit", 'D', UOPT_OPTIONAL_ARG, "delimit windows based on dependency instead of DELIMITERS; defaults to 10"),
+ UOPTION_DEF_D("dep-original", 0, UOPT_NO_ARG, "outputs the original input dependency tag even if it is no longer valid"),
+ UOPTION_DEF_D("dep-allow-loops", 0, UOPT_NO_ARG, "allows the creation of circular dependencies"),
+ UOPTION_DEF_D("dep-no-crossing", 0, UOPT_NO_ARG, "prevents the creation of dependencies that would result in crossing branches"),
- UOPTION_DEF_D("no-magic-readings", 0, UOPT_NO_ARG, "prevents running rules on magic readings"),
- UOPTION_DEF_D("no-pass-origin", 'o', UOPT_NO_ARG, "prevents scanning tests from passing the point of origin"),
- UOPTION_DEF_D("show-end-tags", 'e', UOPT_NO_ARG, "allows the <<< tags to appear in output"),
- UOPTION_DEF_D("show-unused-sets", 0, UOPT_NO_ARG, "prints a list of unused sets and their line numbers; implies --grammar-only"),
- UOPTION_DEF_D("show-tag-hashes", 0, UOPT_NO_ARG, "prints a list of tags and their hashes as they are parsed during the run"),
- UOPTION_DEF_D("show-set-hashes", 0, UOPT_NO_ARG, "prints a list of sets and their hashes; implies --grammar-only")
+ UOPTION_DEF_D("no-magic-readings", 0, UOPT_NO_ARG, "prevents running rules on magic readings"),
+ UOPTION_DEF_D("no-pass-origin", 'o', UOPT_NO_ARG, "prevents scanning tests from passing the point of origin"),
+ UOPTION_DEF_D("split-mappings", 0, UOPT_NO_ARG, "keep mapped readings separate in output"),
+ UOPTION_DEF_D("show-end-tags", 'e', UOPT_NO_ARG, "allows the <<< tags to appear in output"),
+ UOPTION_DEF_D("show-unused-sets", 0, UOPT_NO_ARG, "prints a list of unused sets and their line numbers; implies --grammar-only"),
+ UOPTION_DEF_D("show-tags", 0, UOPT_NO_ARG, "prints a list of unique tags; implies --grammar-only"),
+ UOPTION_DEF_D("show-tag-hashes", 0, UOPT_NO_ARG, "prints a list of tags and their hashes as they are parsed during the run"),
+ UOPTION_DEF_D("show-set-hashes", 0, UOPT_NO_ARG, "prints a list of sets and their hashes; implies --grammar-only")
};
}
diff --git a/src/options_conv.hpp b/src/options_conv.hpp
index 1e74213..4d9d61f 100644
--- a/src/options_conv.hpp
+++ b/src/options_conv.hpp
@@ -42,6 +42,9 @@ namespace Options {
OUT_APERTIUM,
OUT_NICELINE,
OUT_PLAIN,
+ FST_WFACTOR,
+ FST_WTAG,
+ SUB_DELIMITER,
SUB_RTL,
SUB_LTR,
NUM_OPTIONS
@@ -63,6 +66,9 @@ namespace Options {
UOPTION_DEF_D("out-apertium", 'A', UOPT_NO_ARG, "sets output format to Apertium"),
UOPTION_DEF_D("out-niceline", 'N', UOPT_NO_ARG, "sets output format to Niceline CG"),
UOPTION_DEF_D("out-plain", 'P', UOPT_NO_ARG, "sets output format to plain text"),
+ UOPTION_DEF_D("wfactor", 'W', UOPT_REQUIRES_ARG, "FST weight factor (defaults to 100.0)"),
+ UOPTION_DEF_D("wtag", 0, UOPT_REQUIRES_ARG, "FST weight tag prefix (defaults to W)"),
+ UOPTION_DEF_D("sub-delim", 'S', UOPT_REQUIRES_ARG, "FST sub-reading delimiters (defaults to #)"),
UOPTION_DEF_D("rtl", 'r', UOPT_NO_ARG, "sets sub-reading direction to RTL (default)"),
UOPTION_DEF_D("ltr", 'l', UOPT_NO_ARG, "sets sub-reading direction to LTR")
};
diff --git a/src/parser_helpers.hpp b/src/parser_helpers.hpp
new file mode 100644
index 0000000..8d7e620
--- /dev/null
+++ b/src/parser_helpers.hpp
@@ -0,0 +1,324 @@
+/*
+* Copyright (C) 2007-2015, GrammarSoft ApS
+* Developed by Tino Didriksen <mail at tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+*
+* This file is part of VISL CG-3
+*
+* VISL CG-3 is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* VISL CG-3 is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+#ifndef c6d28b7452ec699b_PARSER_HELPERS_H
+#define c6d28b7452ec699b_PARSER_HELPERS_H
+
+#include "Tag.hpp"
+#include "Set.hpp"
+#include "Grammar.hpp"
+#include "Strings.hpp"
+
+namespace CG3 {
+
+template<typename State>
+Tag *parseTag(const UChar *to, const UChar *p, State& state) {
+ if (to[0] == 0) {
+ state.error("%s: Error: Empty tag on line %u near `%S`! Forgot to fill in a ()?\n", p);
+ }
+ if (to[0] == '(') {
+ state.error("%s: Error: Tag '%S' cannot start with ( on line %u near `%S`! Possible extra opening ( or missing closing ) to the left. If you really meant it, escape it as \\(.\n", to, p);
+ }
+ if (ux_isSetOp(to) != S_IGNORE) {
+ u_fprintf(state.ux_stderr, "%s: Warning: Tag '%S' on line %u looks like a set operator. Maybe you meant to do SET instead of LIST?\n", state.filebase, to, state.get_grammar()->lines);
+ u_fflush(state.ux_stderr);
+ }
+
+ Taguint32HashMap::iterator it;
+ uint32_t thash = hash_value(to);
+ if ((it = state.get_grammar()->single_tags.find(thash)) != state.get_grammar()->single_tags.end() && !it->second->tag.empty() && u_strcmp(it->second->tag.c_str(), to) == 0) {
+ return it->second;
+ }
+
+ Tag *tag = state.get_grammar()->allocateTag();
+ tag->type = 0;
+
+ if (to && to[0]) {
+ const UChar *tmp = to;
+ while (tmp[0] && (tmp[0] == '!' || tmp[0] == '^')) {
+ if (tmp[0] == '!' || tmp[0] == '^') {
+ tag->type |= T_FAILFAST;
+ tmp++;
+ }
+ }
+
+ size_t length = u_strlen(tmp);
+ assert(length && "parseTag() will not work with empty strings.");
+
+ if (tmp[0] == 'T' && tmp[1] == ':') {
+ u_fprintf(state.ux_stderr, "%s: Warning: Tag %S looks like a misattempt of template usage on line %u.\n", state.filebase, tmp, state.get_grammar()->lines);
+ u_fflush(state.ux_stderr);
+ }
+
+ // ToDo: Implement META and VAR
+ if (tmp[0] == 'M' && tmp[1] == 'E' && tmp[2] == 'T' && tmp[3] == 'A' && tmp[4] == ':') {
+ tag->type |= T_META;
+ tmp += 5;
+ length -= 5;
+ }
+ if (tmp[0] == 'V' && tmp[1] == 'A' && tmp[2] == 'R' && tmp[3] == ':') {
+ tag->type |= T_VARIABLE;
+ tmp += 4;
+ length -= 4;
+ }
+ if (tmp[0] == 'S' && tmp[1] == 'E' && tmp[2] == 'T' && tmp[3] == ':') {
+ tag->type |= T_SET;
+ tmp += 4;
+ length -= 4;
+ }
+ if (tmp[0] == 'V' && tmp[1] == 'S' && tmp[2] == 'T' && tmp[3] == 'R' && tmp[4] == ':') {
+ tag->type |= T_VARSTRING;
+ tag->type |= T_VSTR;
+ tmp += 5;
+
+ tag->tag.assign(tmp);
+ if (tag->tag.empty()) {
+ state.error("%s: Error: Parsing tag %S resulted in an empty tag on line %u near `%S` - cannot continue!\n", tag->tag.c_str(), p);
+ }
+
+ goto label_isVarstring;
+ }
+
+ if (tmp[0] && (tmp[0] == '"' || tmp[0] == '<' || tmp[0] == '/')) {
+ size_t oldlength = length;
+
+ // Parse the suffixes r, i, v but max only one of each.
+ while (tmp[length - 1] == 'i' || tmp[length - 1] == 'r' || tmp[length - 1] == 'v') {
+ if (!(tag->type & T_VARSTRING) && tmp[length - 1] == 'v') {
+ tag->type |= T_VARSTRING;
+ length--;
+ continue;
+ }
+ if (!(tag->type & T_REGEXP) && tmp[length - 1] == 'r') {
+ tag->type |= T_REGEXP;
+ length--;
+ continue;
+ }
+ if (!(tag->type & T_CASE_INSENSITIVE) && tmp[length - 1] == 'i') {
+ tag->type |= T_CASE_INSENSITIVE;
+ length--;
+ continue;
+ }
+ break;
+ }
+
+ if (tmp[0] == '"' && tmp[length - 1] == '"') {
+ if (tmp[1] == '<' && tmp[length - 2] == '>') {
+ tag->type |= T_WORDFORM;
+ }
+ else {
+ tag->type |= T_BASEFORM;
+ }
+ }
+
+ if ((tmp[0] == '"' && tmp[length - 1] == '"') || (tmp[0] == '<' && tmp[length - 1] == '>') || (tmp[0] == '/' && tmp[length - 1] == '/')) {
+ tag->type |= T_TEXTUAL;
+ }
+ else {
+ tag->type &= ~T_VARSTRING;
+ tag->type &= ~T_REGEXP;
+ tag->type &= ~T_CASE_INSENSITIVE;
+ tag->type &= ~T_WORDFORM;
+ tag->type &= ~T_BASEFORM;
+ length = oldlength;
+ }
+ }
+
+ for (size_t i = 0, oldlength = length; tmp[i] != 0 && i < oldlength; ++i) {
+ if (tmp[i] == '\\') {
+ ++i;
+ --length;
+ }
+ if (tmp[i] == 0) {
+ break;
+ }
+ tag->tag += tmp[i];
+ }
+ if (tag->tag.empty()) {
+ state.error("%s: Error: Parsing tag %S resulted in an empty tag on line %u near `%S` - cannot continue!\n", tag->tag.c_str(), p);
+ }
+
+ foreach(Grammar::regex_tags_t, state.get_grammar()->regex_tags, iter, iter_end) {
+ UErrorCode status = U_ZERO_ERROR;
+ uregex_setText(*iter, tag->tag.c_str(), tag->tag.length(), &status);
+ if (status != U_ZERO_ERROR) {
+ state.error("%s: Error: uregex_setText(parseTag) returned %s on line %u near `%S` - cannot continue!\n", u_errorName(status), p);
+ }
+ status = U_ZERO_ERROR;
+ if (uregex_matches(*iter, 0, &status)) {
+ tag->type |= T_TEXTUAL;
+ }
+ }
+ foreach(Grammar::icase_tags_t, state.get_grammar()->icase_tags, iter, iter_end) {
+ UErrorCode status = U_ZERO_ERROR;
+ if (u_strCaseCompare(tag->tag.c_str(), tag->tag.length(), (*iter)->tag.c_str(), (*iter)->tag.length(), U_FOLD_CASE_DEFAULT, &status) == 0) {
+ tag->type |= T_TEXTUAL;
+ }
+ if (status != U_ZERO_ERROR) {
+ state.error("%s: Error: u_strCaseCompare(parseTag) returned %s on line %u near `%S` - cannot continue!\n", u_errorName(status), p);
+ }
+ }
+
+ tag->comparison_hash = hash_value(tag->tag);
+
+ if (tag->tag[0] == '<' && tag->tag[length - 1] == '>') {
+ tag->parseNumeric();
+ }
+
+ if (u_strcmp(tag->tag.c_str(), stringbits[S_ASTERIK].getTerminatedBuffer()) == 0) {
+ tag->type |= T_ANY;
+ }
+ else if (u_strcmp(tag->tag.c_str(), stringbits[S_UU_LEFT].getTerminatedBuffer()) == 0) {
+ tag->type |= T_PAR_LEFT;
+ }
+ else if (u_strcmp(tag->tag.c_str(), stringbits[S_UU_RIGHT].getTerminatedBuffer()) == 0) {
+ tag->type |= T_PAR_RIGHT;
+ }
+ else if (u_strcmp(tag->tag.c_str(), stringbits[S_UU_ENCL].getTerminatedBuffer()) == 0) {
+ tag->type |= T_ENCL;
+ }
+ else if (u_strcmp(tag->tag.c_str(), stringbits[S_UU_TARGET].getTerminatedBuffer()) == 0) {
+ tag->type |= T_TARGET;
+ }
+ else if (u_strcmp(tag->tag.c_str(), stringbits[S_UU_MARK].getTerminatedBuffer()) == 0) {
+ tag->type |= T_MARK;
+ }
+ else if (u_strcmp(tag->tag.c_str(), stringbits[S_UU_ATTACHTO].getTerminatedBuffer()) == 0) {
+ tag->type |= T_ATTACHTO;
+ }
+ else if (u_strcmp(tag->tag.c_str(), stringbits[S_UU_SAME_BASIC].getTerminatedBuffer()) == 0) {
+ tag->type |= T_SAME_BASIC;
+ }
+
+ if (tag->type & T_REGEXP) {
+ if (u_strcmp(tag->tag.c_str(), stringbits[S_RXTEXT_ANY].getTerminatedBuffer()) == 0
+ || u_strcmp(tag->tag.c_str(), stringbits[S_RXBASE_ANY].getTerminatedBuffer()) == 0
+ || u_strcmp(tag->tag.c_str(), stringbits[S_RXWORD_ANY].getTerminatedBuffer()) == 0) {
+ // ToDo: Add a case-insensitive version of T_REGEXP_ANY for unification
+ tag->type |= T_REGEXP_ANY;
+ tag->type &= ~T_REGEXP;
+ }
+ else {
+ UParseError pe;
+ UErrorCode status = U_ZERO_ERROR;
+
+ UString rt;
+ if (tag->tag[0] == '/' && tag->tag[length - 1] == '/') {
+ rt = tag->tag.substr(1, length - 2);
+ }
+ else {
+ rt += '^';
+ rt += tag->tag;
+ rt += '$';
+ }
+
+ if (tag->type & T_CASE_INSENSITIVE) {
+ tag->regexp = uregex_open(rt.c_str(), rt.length(), UREGEX_CASE_INSENSITIVE, &pe, &status);
+ }
+ else {
+ tag->regexp = uregex_open(rt.c_str(), rt.length(), 0, &pe, &status);
+ }
+ if (status != U_ZERO_ERROR) {
+ state.error("%s: Error: uregex_open returned %s trying to parse tag %S on line %u near `%S` - cannot continue!\n", u_errorName(status), tag->tag.c_str(), p);
+ }
+ }
+ }
+ if (tag->type & (T_CASE_INSENSITIVE|T_REGEXP)) {
+ if (tag->tag[0] == '/' && tag->tag[length - 1] == '/') {
+ tag->tag.resize(tag->tag.size() - 1);
+ tag->tag.erase(tag->tag.begin());
+ }
+ }
+
+ label_isVarstring:
+ ;
+ }
+
+ tag->type &= ~T_SPECIAL;
+ if (tag->type & MASK_TAG_SPECIAL) {
+ tag->type |= T_SPECIAL;
+ }
+
+ if (tag->type & T_VARSTRING && tag->type & (T_REGEXP | T_REGEXP_ANY | T_VARIABLE | T_META)) {
+ state.error("%s: Error: Tag %S cannot mix varstring with any other special feature on line %u near `%S`!\n", to, p);
+ }
+
+ return state.addTag(tag);
+}
+
+template<typename State>
+Set *parseSet(const UChar *name, const UChar *p, State& state) {
+ uint32_t sh = hash_value(name);
+
+ if (ux_isSetOp(name) != S_IGNORE) {
+ state.error("%s: Error: Found set operator '%S' where set name expected on line %u near `%S`!\n", name, p);
+ }
+
+ if ((
+ (name[0] == '$' && name[1] == '$')
+ || (name[0] == '&' && name[1] == '&')
+ ) && name[2]) {
+ const UChar *wname = &(name[2]);
+ uint32_t wrap = hash_value(wname);
+ Set *wtmp = state.get_grammar()->getSet(wrap);
+ if (!wtmp) {
+ state.error("%s: Error: Attempted to reference undefined set '%S' on line %u near `%S`!\n", wname, p);
+ }
+ Set *tmp = state.get_grammar()->getSet(sh);
+ if (!tmp) {
+ Set *ns = state.get_grammar()->allocateSet();
+ ns->line = state.get_grammar()->lines;
+ ns->setName(name);
+ ns->sets.push_back(wtmp->hash);
+ if (name[0] == '$' && name[1] == '$') {
+ ns->type |= ST_TAG_UNIFY;
+ }
+ else if (name[0] == '&' && name[1] == '&') {
+ ns->type |= ST_SET_UNIFY;
+ }
+ state.get_grammar()->addSet(ns);
+ }
+ }
+ if (state.get_grammar()->set_alias.find(sh) != state.get_grammar()->set_alias.end()) {
+ sh = state.get_grammar()->set_alias[sh];
+ }
+ Set *tmp = state.get_grammar()->getSet(sh);
+ if (!tmp) {
+ if (!state.strict_tags.empty()) {
+ Tag *tag = parseTag(name, p, state);
+ if (state.strict_tags.count(tag->plain_hash)) {
+ Set *ns = state.get_grammar()->allocateSet();
+ ns->line = state.get_grammar()->lines;
+ ns->setName(name);
+ state.get_grammar()->addTagToSet(tag, ns);
+ state.get_grammar()->addSet(ns);
+ return ns;
+ }
+ }
+ state.error("%s: Error: Attempted to reference undefined set '%S' on line %u near `%S`!\n", name, p);
+ }
+ return tmp;
+}
+
+}
+
+#endif
diff --git a/src/process.hpp b/src/process.hpp
new file mode 100644
index 0000000..a25c40d
--- /dev/null
+++ b/src/process.hpp
@@ -0,0 +1,199 @@
+/*
+* Copyright (C) 2007-2015, GrammarSoft ApS
+* Developed by Tino Didriksen <mail at tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+*
+* This file is part of VISL CG-3
+*
+* VISL CG-3 is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* VISL CG-3 is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+#ifndef c6d28b7452ec699b_PROCESS_HPP
+#define c6d28b7452ec699b_PROCESS_HPP
+#include <string>
+#include <stdexcept>
+
+#ifdef _WIN32
+#include <windows.h>
+
+class Process {
+public:
+ Process() :
+ g_hChildStd_IN_Rd(0),
+ g_hChildStd_IN_Wr(0),
+ g_hChildStd_OUT_Rd(0),
+ g_hChildStd_OUT_Wr(0) {
+ }
+
+ ~Process() {
+ CloseHandle(g_hChildStd_IN_Rd);
+ CloseHandle(g_hChildStd_IN_Wr);
+ CloseHandle(g_hChildStd_OUT_Rd);
+ CloseHandle(g_hChildStd_OUT_Wr);
+ }
+
+ void start(const std::string& cmdline) {
+ SECURITY_ATTRIBUTES saAttr = { sizeof(saAttr), 0, true };
+
+ if (!CreatePipe(&g_hChildStd_OUT_Rd, &g_hChildStd_OUT_Wr, &saAttr, 0)) {
+ std::string msg = formatLastError("Process CreatePipe 1");
+ throw std::runtime_error(msg);
+ }
+ if (!SetHandleInformation(g_hChildStd_OUT_Rd, HANDLE_FLAG_INHERIT, 0)) {
+ std::string msg = formatLastError("Process SetHandleInformation 1");
+ throw std::runtime_error(msg);
+ }
+ if (!CreatePipe(&g_hChildStd_IN_Rd, &g_hChildStd_IN_Wr, &saAttr, 0)) {
+ std::string msg = formatLastError("Process CreatePipe 2");
+ throw std::runtime_error(msg);
+ }
+ if (!SetHandleInformation(g_hChildStd_IN_Wr, HANDLE_FLAG_INHERIT, 0)) {
+ std::string msg = formatLastError("Process SetHandleInformation 2");
+ throw std::runtime_error(msg);
+ }
+
+ PROCESS_INFORMATION piProcInfo = { 0 };
+ STARTUPINFOA siStartInfo = { sizeof(siStartInfo) };
+
+ siStartInfo.hStdError = g_hChildStd_OUT_Wr;
+ siStartInfo.hStdOutput = g_hChildStd_OUT_Wr;
+ siStartInfo.hStdInput = g_hChildStd_IN_Rd;
+ siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
+
+ BOOL bSuccess = CreateProcessA(0,
+ const_cast<char*>(cmdline.c_str()),
+ 0,
+ 0,
+ TRUE,
+ CREATE_NO_WINDOW | BELOW_NORMAL_PRIORITY_CLASS,
+ 0,
+ 0,
+ &siStartInfo,
+ &piProcInfo);
+
+ if (!bSuccess) {
+ std::string msg("Process could not start!\nCmdline: ");
+ msg += cmdline.c_str();
+ msg += '\n';
+ msg = formatLastError(msg);
+ throw std::runtime_error(msg);
+ }
+
+ CloseHandle(piProcInfo.hProcess);
+ CloseHandle(piProcInfo.hThread);
+ }
+
+ void read(char *buffer, size_t count) {
+ DWORD bytes_read = 0;
+ if (!ReadFile(g_hChildStd_OUT_Rd, buffer, count, &bytes_read, 0) || bytes_read != count) {
+ std::string msg = formatLastError("Process.read(char*,size_t)");
+ throw std::runtime_error(msg);
+ }
+ }
+
+ void write(const char *buffer, size_t length) {
+ DWORD bytes = 0;
+ if (!WriteFile(g_hChildStd_IN_Wr, buffer, length, &bytes, 0) || bytes != length) {
+ std::string msg = formatLastError("Process.write(char*,size_t)");
+ throw std::runtime_error(msg);
+ }
+ }
+
+ void flush() {
+ }
+
+private:
+ HANDLE g_hChildStd_IN_Rd;
+ HANDLE g_hChildStd_IN_Wr;
+ HANDLE g_hChildStd_OUT_Rd;
+ HANDLE g_hChildStd_OUT_Wr;
+
+ std::string formatLastError(std::string msg = "") {
+ if (!msg.empty()) {
+ msg += ' ';
+ }
+ char *fmt = 0;
+ FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER, 0, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), (LPSTR)&fmt, 0, 0);
+ msg += "GetLastError: ";
+ msg += fmt;
+ msg += '\n';
+ LocalFree(fmt);
+ return msg;
+ }
+};
+
+#else
+#include <popen_plus.h>
+#include <cerrno>
+#include <cstdio>
+#include <cstring>
+
+class Process {
+private:
+ popen_plus_process *child;
+
+ std::string formatLastError(std::string msg = "") {
+ if (!msg.empty()) {
+ msg += ' ';
+ }
+ msg += "strerror: ";
+ msg += strerror(errno);
+ return msg;
+ }
+
+public:
+
+ Process() :
+ child(0) {
+ }
+
+ ~Process() {
+ popen_plus_kill(child);
+ popen_plus_close(child);
+ }
+
+ void start(const std::string& cmdline) {
+ child = popen_plus(cmdline.c_str());
+ if (child == 0) {
+ std::string msg = "Process could not start!\nCmdline: ";
+ msg += cmdline.c_str();
+ msg += '\n';
+ msg = formatLastError(msg);
+ throw std::runtime_error(msg);
+ }
+ }
+
+ void read(char *buffer, size_t count) {
+ if (fread(buffer, 1, count, child->read_fp) != count) {
+ std::string msg = formatLastError("Process.read(char*,size_t)");
+ throw std::runtime_error(msg);
+ }
+ }
+
+ void write(const char *buffer, size_t length) {
+ if (fwrite(buffer, 1, length, child->write_fp) != length) {
+ std::string msg = formatLastError("Process.write(char*,size_t)");
+ throw std::runtime_error(msg);
+ }
+ }
+
+ void flush() {
+ fflush(child->write_fp);
+ }
+};
+
+#endif
+
+#endif
diff --git a/src/stdafx.hpp b/src/stdafx.hpp
index 9ffab55..018a2bf 100644
--- a/src/stdafx.hpp
+++ b/src/stdafx.hpp
@@ -24,6 +24,9 @@
#define c6d28b7452ec699b_STDAFX_H
#ifdef _MSC_VER
+ // warning C4258: definition from the for loop is ignored; the definition from the enclosing scope is used
+ #pragma warning (disable: 4258)
+ #pragma conform(forScope, on)
// warning C4428: universal-character-name encountered in source
#pragma warning (disable: 4428)
// warning C4512: assignment operator could not be generated
@@ -86,7 +89,7 @@
#define stdext boost
#define hash_map unordered_map
-#ifdef _MSC_VER
+#ifdef _WIN32
#include <winsock.h> // for hton() and family.
#else
#include <unistd.h>
diff --git a/src/uextras.cpp b/src/uextras.cpp
index 4ba9172..768320c 100644
--- a/src/uextras.cpp
+++ b/src/uextras.cpp
@@ -19,7 +19,7 @@
* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifdef _MSC_VER
+#ifdef _WIN32
#include <windows.h>
#endif
@@ -30,7 +30,7 @@ namespace CG3 {
std::string ux_dirname(const char *in) {
char tmp[32768] = {0};
-#ifdef _MSC_VER
+#ifdef _WIN32
char *fname = 0;
GetFullPathNameA(in, 32767, tmp, &fname);
if (fname) {
diff --git a/src/uextras.hpp b/src/uextras.hpp
index 617378c..1b7fcef 100644
--- a/src/uextras.hpp
+++ b/src/uextras.hpp
@@ -26,6 +26,29 @@
#include "stdafx.hpp"
#include "Strings.hpp"
+#ifdef _WIN32
+inline const char *basename(const char *path) {
+ if (path != NULL) {
+ // Find the last position of \ or / in the path name
+ const char *pos = std::max(strrchr(path, '\\'), strrchr(path, '/'));
+
+ if (pos != NULL) { // If a \ char was found...
+ if (pos + 1 != NULL) // If it is not the last character in the string...
+ return pos + 1; // then return a pointer to the first character after \.
+ else
+ return pos; // else return a pointer to \.
+ }
+ else { // If a \ char was NOT found
+ return path; // return the pointer passed to basename (this is probably non-conformant)
+ }
+
+ }
+ else { // If path == NULL, return "."
+ return ".";
+ }
+}
+#endif
+
namespace CG3 {
inline int ux_isSetOp(const UChar *it) {
@@ -127,6 +150,18 @@ inline substr_t<Str> substr(const Str& str, size_t offset=0, size_t count=0) {
return substr_t<Str>(str, offset, count);
}
+inline UChar *ux_bufcpy(UChar *dst, const UChar *src, size_t n) {
+ size_t i = 0;
+ for (; i < n && src && src[i]; ++i) {
+ dst[i] = src[i];
+ if (dst[i] == 0x0A || dst[i] == 0x0D) {
+ dst[i] += 0x2400;
+ }
+ }
+ dst[i] = 0;
+ return dst;
+}
+
std::string ux_dirname(const char *in);
}
diff --git a/src/version.hpp b/src/version.hpp
index 2abdb47..08ab54d 100644
--- a/src/version.hpp
+++ b/src/version.hpp
@@ -30,8 +30,8 @@ const char* const CG3_COPYRIGHT_STRING = "Copyright (C) 2007-2015 GrammarSoft Ap
const uint32_t CG3_VERSION_MAJOR = 0;
const uint32_t CG3_VERSION_MINOR = 9;
const uint32_t CG3_VERSION_PATCH = 9;
-const uint32_t CG3_REVISION = 10379;
-const uint32_t CG3_FEATURE_REV = 10373;
+const uint32_t CG3_REVISION = 10754;
+const uint32_t CG3_FEATURE_REV = 10575;
const uint32_t CG3_TOO_OLD = 10373;
const uint32_t CG3_EXTERNAL_PROTOCOL = 7226;
diff --git a/test/T_Dependency_Loops/grammar.cg3 b/test/T_Dependency_Loops/grammar.cg3
index b731f12..ab20672 100644
--- a/test/T_Dependency_Loops/grammar.cg3
+++ b/test/T_Dependency_Loops/grammar.cg3
@@ -1,15 +1,16 @@
DELIMITERS = "<$.>" ;
+STRICT-TAGS += vis N @>N PRP @P< @ADVL ADV ;
MAP (vis) _S_DELIMITERS_ ;
SETPARENT _S_DELIMITERS_ TO (@0 (*)) ;
# These two are equivalent
-SETCHILD (N) TO (-1* (@>N)) ;
-SETPARENT (@>N) TO (1* (N)) ;
+SETCHILD N TO (-1* @>N) ;
+SETPARENT @>N TO (1* N) ;
-SETPARENT (@P<) TO (-1* (PRP)) ;
+SETPARENT @P< TO (-1* PRP) ;
-SETPARENT (@ADVL) (0 (PRP)) TO (-1* (ADV)) (0 (@ADVL)) ;
+SETPARENT @ADVL (0 PRP) TO (-1* ADV) (0 @ADVL) ;
SETPARENT ("down") TO (-1* ("once")) ;
SETPARENT ("once") TO (1* ("down")) ; # Immediate parent->child = child->parent block
diff --git a/test/T_MapAdd_Different/args.txt b/test/T_MapAdd_Different/args.txt
index 1db23ad..6cfee35 100755
--- a/test/T_MapAdd_Different/args.txt
+++ b/test/T_MapAdd_Different/args.txt
@@ -1 +1 @@
--t
\ No newline at end of file
+-t --split-mappings
\ No newline at end of file
diff --git a/test/T_MapAdd_Different/expected.txt b/test/T_MapAdd_Different/expected.txt
index ddbdc17..6cdc91c 100644
--- a/test/T_MapAdd_Different/expected.txt
+++ b/test/T_MapAdd_Different/expected.txt
@@ -1,5 +1,6 @@
"<word>"
"word" notwanted
- "matchme" wanted $tag £tag @tag @mapped ADD:8 ADD:9 ADD:10 MAP:12
+ "matchme" wanted $tag £tag @tag ADD:8 ADD:9 ADD:10 MAP:12
"word" notmeeither
+ "matchme" wanted $tag £tag @mapped ADD:8 ADD:9 ADD:10 MAP:12
diff --git a/test/T_RegExp/expected.txt b/test/T_RegExp/expected.txt
index 268604f..c1638a6 100644
--- a/test/T_RegExp/expected.txt
+++ b/test/T_RegExp/expected.txt
@@ -9,5 +9,4 @@
"BaseForm" d @baseform-diff
"<word>"
"baseform" e @baseform-diff
- "form" f @baseform-diff
-
+ "form" f @baseform-diff @add-f
diff --git a/test/T_RegExp/grammar.cg3 b/test/T_RegExp/grammar.cg3
index 1d37a9b..e20f7f0 100644
--- a/test/T_RegExp/grammar.cg3
+++ b/test/T_RegExp/grammar.cg3
@@ -32,3 +32,5 @@ ADD (@baseform-icsame) $$ibform (NEGATE 0 (*) - $$ibform) ;
ADD (@slashes) (/^@.*-SAM/ri) ;
ADD (@icase) (/@BASEFORM-SAME/i) ;
+
+ADD (VSTR:@add-$1) ("(.*)orm"r f) ;
diff --git a/test/T_RegExp/grammar.cg3b.10043 b/test/T_RegExp/grammar.cg3b.10043
index 0dc8b37..e8221b8 100644
Binary files a/test/T_RegExp/grammar.cg3b.10043 and b/test/T_RegExp/grammar.cg3b.10043 differ
diff --git a/test/T_Templates/expected.txt b/test/T_Templates/expected.txt
index 264a182..b315810 100644
--- a/test/T_Templates/expected.txt
+++ b/test/T_Templates/expected.txt
@@ -3,15 +3,16 @@
"<ate>"
"eat" <SVO> <SV> V PAST VFIN
"<a>"
- "a" <Indef> DET CENTRAL ART SG @preGood
+ "a" <Indef> DET CENTRAL ART SG <W:60> @preGood
+ "a" <Indef> NDET CENTRAL ART SG <W:60> @preGood
"<cow>"
"cow" N NOM SG @bothGoodGood
"<with>"
- "with" PREP @beforeNccN @artnGood @artnNegateGood @artnNegateGoodOffset
+ "with" PREP @beforeNccN @artnGood @artnNegateGood @artnNegateGoodOffset @branch-nonC @branch-C
"<biscuits>"
"biscuit" N NOM PL @startNccN
"<and>"
- "and" CC
+ "and" CC <W:20>
"<lemonade>"
"lemonade" <-Indef> N NOM SG
"<$.>"
diff --git a/test/T_Templates/grammar.cg3 b/test/T_Templates/grammar.cg3
index 22f859a..417361b 100644
--- a/test/T_Templates/grammar.cg3
+++ b/test/T_Templates/grammar.cg3
@@ -35,6 +35,9 @@ ADD (@artnNegateGood) (PREP) IF (NEGATE T:ArtN) ;
ADD (@artnNegateGoodOffset) (PREP) IF (NEGATE 1 T:ArtN) ;
ADD (@artnNegateBad) (PREP) IF (NEGATE -1 T:ArtN) ;
+ADD (@branch-nonC) (PREP) IF (-1*f (DET <W>30>)) ;
+ADD (@branch-C) (PREP) IF (1*f (CC <W>30>)) ;
+
## --- Following is an internal compiler stress test.
LIST x = x;
diff --git a/test/T_Templates/grammar.cg3b.10043 b/test/T_Templates/grammar.cg3b.10043
deleted file mode 100644
index 674d062..0000000
Binary files a/test/T_Templates/grammar.cg3b.10043 and /dev/null differ
diff --git a/test/T_Templates/input.txt b/test/T_Templates/input.txt
index ae7fbc3..874d9bf 100644
--- a/test/T_Templates/input.txt
+++ b/test/T_Templates/input.txt
@@ -3,7 +3,8 @@
"<ate>"
"eat" <SVO> <SV> V PAST VFIN
"<a>"
- "a" <Indef> DET CENTRAL ART SG
+ "a" <Indef> DET CENTRAL ART SG <W:60>
+ "a" <Indef> NDET CENTRAL ART SG <W:60>
"<cow>"
"cow" N NOM SG
"<with>"
@@ -11,7 +12,7 @@
"<biscuits>"
"biscuit" N NOM PL
"<and>"
- "and" CC
+ "and" CC <W:20>
"<lemonade>"
"lemonade" <-Indef> N NOM SG
"<$.>"
diff --git a/vapply.sh b/vapply.sh
index 5569894..2590f4e 100755
--- a/vapply.sh
+++ b/vapply.sh
@@ -8,7 +8,7 @@ mkdir -p vapply
cd vapply
rm -fv callgrind.*
mv -vf annotated annotated.old
+mv -vf output.txt output.txt.old
g++ -std=c++$CXXV -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -O3 -g3 -I../include -I../include/exec-stream ../src/all_vislcg3.cpp -o vislcg3-c++$CXXV.debug -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc
-head -n 2000 ../comparison/arboretum_stripped.txt | valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3-c++$CXXV.debug -v -C UTF-8 -g ../dancg.cg3b > output.txt
-#head -n 2000 ../comparison/arboretum_stripped.txt | valgrind ./vislcg3-c++$CXXV.debug -v -C UTF-8 -g ../dancg.cg3b > output.txt
+head -n 2000 ../comparison/arboretum_stripped.txt | valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3-c++$CXXV.debug -v -t -C UTF-8 -g ../dancg.cg3b > output.txt
callgrind_annotate --tree=both --auto=yes > annotated
diff --git a/win32/libgen.c b/win32/libgen.c
deleted file mode 100644
index f4c6f74..0000000
--- a/win32/libgen.c
+++ /dev/null
@@ -1,25 +0,0 @@
-#include "libgen.h"
-#include <string.h>
-
-// http://www.opengroup.org/onlinepubs/007908775/xsh/basename.html
-
-const char *basename(const char *path) {
- if (path != NULL) {
- // Find the last position of the \ in the path name
- const char *pos = strrchr(path, '\\');
-
- if (pos != NULL) { // If a \ char was found...
- if (pos + 1 != NULL) // If it is not the last character in the string...
- return pos + 1; // then return a pointer to the first character after \.
- else
- return pos; // else return a pointer to \.
- }
- else { // If a \ char was NOT found
- return path; // return the pointer passed to basename (this is probably non-conformant)
- }
-
- }
- else { // If path == NULL, return "."
- return ".";
- }
-}
diff --git a/win32/libgen.h b/win32/libgen.h
deleted file mode 100644
index bb7f542..0000000
--- a/win32/libgen.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef LIBGEN_H
-#define LIBGEN_H
-
-#ifdef __cplusplus
- extern "C" {
-#endif
-
-const char *basename(const char*);
-
-#ifdef __cplusplus
- }
-#endif
-
-#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/cg3.git
More information about the debian-science-commits
mailing list