[cg3] 01/02: Imported Upstream version 0.9.9~r11624
Tino Didriksen
tinodidriksen-guest at moszumanska.debian.org
Sat Jun 4 20:04:29 UTC 2016
This is an automated email from the git hooks/post-receive script.
tinodidriksen-guest pushed a commit to branch master
in repository cg3.
commit 94cb0afac03eeda7f131ce8f1ea99f80ba4f17e9
Author: Tino Didriksen <tino at didriksen.cc>
Date: Sat Jun 4 20:03:23 2016 +0000
Imported Upstream version 0.9.9~r11624
---
.clang-format | 60 +
.editorconfig | 14 +
.travis.yml | 18 +
CMakeLists.txt | 27 +-
ChangeLog | 1801 +++++++-----------
README | 4 -
README.md | 15 +
TODO | 1 +
clang-format.pl | 108 ++
emacs/cg.el | 304 +--
get-boost.sh | 2 +-
manual/contexts.xml | 23 +-
manual/grammar.xml | 1 +
manual/manual.xml | 2 +-
manual/rules.xml | 56 +-
manual/templates.xml | 41 +-
src/AST.hpp | 230 +++
src/ApertiumApplicator.cpp | 135 +-
src/ApertiumApplicator.hpp | 60 +-
src/BinaryGrammar.cpp | 5 +-
src/BinaryGrammar.hpp | 43 +-
src/BinaryGrammar_read.cpp | 61 +-
src/BinaryGrammar_read_10043.cpp | 49 +-
src/BinaryGrammar_write.cpp | 40 +-
src/CMakeLists.txt | 25 +-
src/Cohort.cpp | 53 +-
src/Cohort.hpp | 124 +-
src/CohortIterator.cpp | 96 +-
src/CohortIterator.hpp | 169 +-
src/ContextualTest.cpp | 37 +-
src/ContextualTest.hpp | 184 +-
src/FSTApplicator.cpp | 59 +-
src/FSTApplicator.hpp | 3 +-
src/FormatConverter.cpp | 100 +-
src/FormatConverter.hpp | 42 +-
src/Grammar.cpp | 246 ++-
src/Grammar.hpp | 257 +--
src/GrammarApplicator.cpp | 374 ++--
src/GrammarApplicator.hpp | 514 ++---
src/GrammarApplicator_matchSet.cpp | 140 +-
src/GrammarApplicator_reflow.cpp | 217 ++-
src/GrammarApplicator_runContextualTest.cpp | 327 ++--
src/GrammarApplicator_runGrammar.cpp | 70 +-
src/GrammarApplicator_runRules.cpp | 707 +++++--
src/GrammarWriter.cpp | 33 +-
src/GrammarWriter.hpp | 54 +-
src/IGrammarParser.hpp | 27 +-
...ApertiumApplicator.cpp => MatxinApplicator.cpp} | 420 +++--
src/MatxinApplicator.hpp | 80 +
src/MweSplitApplicator.cpp | 174 ++
...celineApplicator.hpp => MweSplitApplicator.hpp} | 25 +-
src/NicelineApplicator.cpp | 69 +-
src/NicelineApplicator.hpp | 3 +-
src/PlaintextApplicator.cpp | 61 +-
src/PlaintextApplicator.hpp | 3 +-
src/Reading.cpp | 75 +-
src/Reading.hpp | 82 +-
src/Relabeller.cpp | 382 ++++
src/Relabeller.hpp | 89 +
src/Rule.cpp | 53 +-
src/Rule.hpp | 167 +-
src/Set.cpp | 53 +-
src/Set.hpp | 143 +-
src/SingleWindow.cpp | 22 +-
src/SingleWindow.hpp | 68 +-
src/Strings.cpp | 22 +-
src/Strings.hpp | 365 ++--
src/Tag.cpp | 91 +-
src/Tag.hpp | 209 +-
src/TagTrie.hpp | 345 ++--
src/TextualParser.cpp | 1990 ++++++++++----------
src/TextualParser.hpp | 104 +-
src/Window.cpp | 25 +-
src/Window.hpp | 67 +-
src/all_cg_conv.cpp | 1 +
src/all_cg_proc.cpp | 1 +
src/bloomish.hpp | 7 +-
src/cg-mwesplit.cpp | 127 ++
src/cg-relabel.cpp | 140 ++
src/cg3.h | 2 +-
src/cg_comp.cpp | 4 +-
src/cg_conv.cpp | 18 +-
src/cg_proc.cpp | 170 +-
src/flat_unordered_map.hpp | 33 +-
src/flat_unordered_set.hpp | 30 +-
src/icu_uoptions.cpp | 190 +-
src/inlines.hpp | 176 +-
src/interval_vector.hpp | 65 +-
src/istream.hpp | 37 +-
src/libcg3.cpp | 60 +-
src/macros.hpp | 42 -
src/main.cpp | 56 +-
src/options.hpp | 246 +--
src/options_conv.hpp | 94 +-
src/parser_helpers.hpp | 23 +-
src/process.hpp | 8 +-
src/{GrammarWriter.hpp => scoped_stack.hpp} | 72 +-
src/sorted_vector.hpp | 33 +-
src/stdafx.hpp | 33 +-
src/test_libcg3.c | 10 +-
src/uextras.cpp | 9 +-
src/uextras.hpp | 24 +-
src/version.hpp | 6 +-
.../{T_BasicAppend => T_Append}/expected.txt | 0
.../{T_BasicAppend => T_Append}/grammar.cg3 | 0
.../Apertium/{T_BasicAppend => T_Append}/input.txt | 0
test/Apertium/{T_BasicAppend => T_Append}/run.pl | 0
.../expected.txt | 0
.../grammar.cg3 | 0
.../input.txt | 0
.../{T_BasicContextTest => T_ContextTest}/run.pl | 0
.../{T_BasicDelimit => T_Delimit}/expected.txt | 0
.../{T_BasicDelimit => T_Delimit}/grammar.cg3 | 0
.../{T_BasicDelimit => T_Delimit}/input.txt | 0
test/Apertium/{T_BasicDelimit => T_Delimit}/run.pl | 0
test/Apertium/{T_BasicIff => T_Iff}/expected.txt | 0
test/Apertium/{T_BasicIff => T_Iff}/grammar.cg3 | 0
test/Apertium/{T_BasicIff => T_Iff}/input.txt | 0
test/Apertium/{T_BasicIff => T_Iff}/run.pl | 0
.../{T_BasicRemove => T_Remove}/expected.txt | 0
.../{T_BasicRemove => T_Remove}/grammar.cg3 | 0
.../Apertium/{T_BasicRemove => T_Remove}/input.txt | 0
test/Apertium/{T_BasicRemove => T_Remove}/run.pl | 0
.../{T_BasicSelect => T_Select}/expected.txt | 0
.../{T_BasicSelect => T_Select}/grammar.cg3 | 0
.../Apertium/{T_BasicSelect => T_Select}/input.txt | 0
test/Apertium/{T_BasicSelect => T_Select}/run.pl | 0
.../expected.txt | 0
.../grammar.cg3 | 0
.../{T_BasicSubstitute => T_Substitute}/input.txt | 0
.../{T_BasicSubstitute => T_Substitute}/run.pl | 0
test/{T_BasicAppend => T_Append}/expected.txt | 3 +
test/{T_BasicAppend => T_Append}/grammar.cg3 | 2 +
test/{T_BasicAppend => T_Append}/input.txt | 0
test/T_BasicAppend/grammar.cg3b.10043 | Bin 2071 -> 0 bytes
test/T_BasicDependency/grammar.cg3b.10043 | Bin 8729 -> 0 bytes
test/T_BasicIff/grammar.cg3 | 4 -
test/T_BasicSelect/grammar.cg3b.10043 | Bin 837 -> 0 bytes
.../expected.txt | 0
.../grammar.cg3 | 0
.../grammar.cg3b.10043 | Bin
.../input.txt | 0
test/{T_BasicDelimit => T_Delimit}/args.txt | 0
test/{T_BasicDelimit => T_Delimit}/expected.txt | 0
test/{T_BasicDelimit => T_Delimit}/grammar.cg3 | 0
.../grammar.cg3b.10043 | Bin
test/{T_BasicDelimit => T_Delimit}/input.txt | 0
test/{T_BasicDependency => T_Dependency}/args.txt | 0
.../expected.txt | 7 +-
.../grammar.cg3 | 6 +
test/{T_BasicDependency => T_Dependency}/input.txt | 0
test/{T_BasicIff => T_Iff}/expected.txt | 0
test/T_Iff/grammar.cg3 | 4 +
test/{T_BasicIff => T_Iff}/grammar.cg3b.10043 | Bin
test/{T_BasicIff => T_Iff}/input.txt | 0
test/T_MultipleSections/grammar.cg3 | 4 +-
test/T_MweSplit/expected.txt | 76 +
test/T_MweSplit/input.txt | 66 +
test/T_MweSplit/run.pl | 23 +
test/T_NumericalTags/expected.txt | 2 +-
test/T_Omniscan/expected.txt | 40 +-
test/T_RegExp/expected.txt | 3 +
test/T_RegExp/grammar.cg3 | 2 +
test/T_RegExp/grammar.cg3b.10043 | Bin 4052 -> 0 bytes
test/T_RegExp/input.txt | 3 +
test/T_RelabelList/expected.txt | 21 +
test/T_RelabelList/grammar.cg3 | 42 +
test/T_RelabelList/input.txt | 23 +
test/T_RelabelList/relabel.cg3r | 20 +
test/T_RelabelList/run.pl | 53 +
test/T_RelabelList_Apertium/expected.txt | 1 +
test/T_RelabelList_Apertium/grammar.cg3 | 3 +
test/T_RelabelList_Apertium/input.txt | 1 +
test/T_RelabelList_Apertium/relabel.cg3r | 1 +
test/T_RelabelList_Apertium/run.pl | 54 +
test/T_RelabelSet/expected.txt | 27 +
test/T_RelabelSet/grammar.cg3 | 22 +
test/T_RelabelSet/input.txt | 33 +
test/T_RelabelSet/relabel.cg3r | 6 +
test/T_RelabelSet/run.pl | 53 +
test/{T_BasicSelect => T_Select}/args.txt | 0
test/{T_BasicSelect => T_Select}/expected.txt | 2 +-
test/{T_BasicSelect => T_Select}/grammar.cg3 | 2 +
test/{T_BasicSelect => T_Select}/input.txt | 0
test/{T_BasicSubstitute => T_SplitCohort}/args.txt | 0
test/T_SplitCohort/expected.txt | 13 +
test/T_SplitCohort/grammar.cg3 | 12 +
test/T_SplitCohort/input.txt | 6 +
test/{T_BasicSubstitute => T_Substitute}/args.txt | 0
.../expected.txt | 2 +-
.../grammar.cg3 | 0
test/{T_BasicSubstitute => T_Substitute}/input.txt | 0
test/T_Templates/expected.txt | 5 +-
test/T_Templates/grammar.cg3 | 9 +-
test/T_Trace/grammar.cg3 | 6 +-
test/runall.pl | 8 +-
todo.sh | 2 +-
197 files changed, 8519 insertions(+), 6015 deletions(-)
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..5915af0
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,60 @@
+AccessModifierOffset: -4
+AlignAfterOpenBracket: DontAlign
+AlignConsecutiveAssignments: false
+AlignEscapedNewlinesLeft: true
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: true
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: false
+ AfterNamespace: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: true
+ BeforeElse: true
+ IndentBraces: false
+ColumnLimit: 0
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 2
+ContinuationIndentWidth: 2
+Cpp11BracedListStyle: false
+ForEachMacros: [ foreach, reverse_foreach, BOOST_FOREACH, boost_foreach ]
+IndentCaseLabels: false
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+Language: Cpp
+MaxEmptyLinesToKeep: 2
+NamespaceIndentation: Inner
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 4
+UseTab: ForIndentation
diff --git a/.editorconfig b/.editorconfig
new file mode 100755
index 0000000..16917d2
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,14 @@
+# http://editorconfig.org/
+root = yes
+
+[*]
+charset = utf-8
+end_of_line = lf
+indent_size = 4
+indent_style = tab
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[**.xml]
+indent_size = 2
+indent_style = space
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..27f8a40
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,18 @@
+sudo: required
+dist: trusty
+language: cpp
+
+addons:
+ apt:
+ packages:
+ - cmake
+ - libboost-dev
+ - libicu-dev
+
+before_script:
+ - cmake .
+
+script:
+ - $CXX --version
+ - make -j2
+ - ./test/runall.pl
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 857a2e8..1830150 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,7 +39,10 @@ if(MSVC)
set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS})
set(CMAKE_C_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
else()
- set(_FLAGS_COMMON "-Wall -Wextra -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -Wno-unused-result -fPIC")
+ set(_FLAGS_COMMON "-Wall -Wextra -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -fPIC")
+ if((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.4) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
+ set(_FLAGS_COMMON "${_FLAGS_COMMON} -Wno-unused-result")
+ endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_FLAGS_COMMON} -fvisibility-inlines-hidden")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
@@ -47,14 +50,17 @@ else()
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3")
- # Enable C++14 or C++11 if possible
- if((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.9) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3))
- message(STATUS "Enabling C++14 for ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y")
- elseif((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.6) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.1))
- message(STATUS "Enabling C++11 for ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
- endif()
+ # Enable latest possible C++ standard
+ include(CheckCXXCompilerFlag)
+ foreach(flag "-std=c++17" "-std=c++1z" "-std=c++14" "-std=c++1y" "-std=c++11" "-std=c++0x")
+ string(REGEX REPLACE "[^a-z0-9]" "-" _flag ${flag})
+ CHECK_CXX_COMPILER_FLAG(${flag} COMPILER_SUPPORTS_${_flag})
+ if(COMPILER_SUPPORTS_${_flag})
+ message(STATUS "Enabling ${flag}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}")
+ break()
+ endif()
+ endforeach()
# Generate pkg-config file
set(prefix ${CMAKE_INSTALL_PREFIX})
@@ -89,4 +95,5 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/scripts/cg3-autobin.pl.in ${CMAKE_CURRENT_BINARY_DIR}/scripts/cg3-autobin.pl @ONLY)
install(PROGRAMS "${CMAKE_CURRENT_BINARY_DIR}/scripts/cg3-autobin.pl" DESTINATION bin)
-install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/emacs/cg.el" DESTINATION share/emacs/site-lisp)
+set(ELISPDIR "share/emacs/site-lisp" CACHE PATH "Where to install Emacs Lisp files")
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/emacs/cg.el" DESTINATION ${ELISPDIR})
diff --git a/ChangeLog b/ChangeLog
index d028f05..218f6c3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,677 @@
+2016-05-24 tino
+
+ * [r11620] src/Cohort.cpp, src/Cohort.hpp, src/CohortIterator.cpp,
+ src/GrammarApplicator.cpp, src/GrammarApplicator_reflow.cpp,
+ src/GrammarApplicator_runRules.cpp, src/MatxinApplicator.cpp,
+ src/NicelineApplicator.cpp: Magic constant -> DEP_NO_PARENT
+ * [r11619] .travis.yml: Travis changed?
+ * [r11618] src/TextualParser.cpp: Dead code
+
+2016-05-23 tino
+
+ * [r11613] src/MatxinApplicator.cpp, src/MatxinApplicator.hpp:
+ Format code
+
+2016-05-23 ftyers
+
+ * [r11603] src/MatxinApplicator.cpp: update so it gives full trees
+
+2016-05-19 tino
+
+ * [r11583] src/Grammar.cpp, src/TagTrie.hpp: Add KeepOrder if any
+ context uses varstrings or unification
+
+2016-05-18 tino
+
+ * [r11577] ChangeLog, src/GrammarApplicator_runGrammar.cpp,
+ src/version.hpp: Also add magic tag <<< in --dep-delimit mode
+
+2016-05-13 ftyers
+
+ * [r11551] src/MatxinApplicator.cpp, src/MatxinApplicator.hpp: fail
+ fast (well reasonably) if input contains subreadings
+
+2016-05-13 tino
+
+ * [r11550] src/MatxinApplicator.cpp: Substr that doesn't throw
+
+2016-05-13 ftyers
+
+ * [r11549] src/MatxinApplicator.cpp: fix whitespace handling
+ * [r11548] src/MatxinApplicator.cpp: go back to for loop as substr
+ doesn't work
+
+2016-05-13 tino
+
+ * [r11547] get-boost.sh: Boost 1.61.0
+ * [r11545] src/MatxinApplicator.cpp: Use substr to fix warning
+
+2016-05-13 ftyers
+
+ * [r11544] src/MatxinApplicator.cpp: rename field
+ * [r11543] src/MatxinApplicator.cpp: lop off @
+
+2016-05-13 tino
+
+ * [r11541] src/MatxinApplicator.cpp, src/MatxinApplicator.hpp:
+ Printing to stdout is really not allowed
+
+2016-05-12 ftyers
+
+ * [r11539] src/MatxinApplicator.cpp, src/MatxinApplicator.hpp: some
+ formatting fixes
+ * [r11538] src/MatxinApplicator.cpp: there is no pos field, and
+ also number sentences
+
+2016-05-12 tino
+
+ * [r11537] src/MatxinApplicator.cpp, src/MatxinApplicator.hpp:
+ Printing to stdout is not allowed
+ * [r11536] src/FormatConverter.cpp: Reorder init
+ * [r11535] src/MatxinApplicator.cpp, src/MatxinApplicator.hpp,
+ src/cg_proc.cpp: Attempt to inject history; Reformat
+
+2016-05-12 ftyers
+
+ * [r11534] src/ApertiumApplicator.cpp, src/CMakeLists.txt,
+ src/FormatConverter.cpp, src/FormatConverter.hpp,
+ src/MatxinApplicator.cpp, src/MatxinApplicator.hpp,
+ src/all_cg_conv.cpp, src/all_cg_proc.cpp, src/cg_proc.cpp,
+ src/options_conv.hpp: first version of the MatxinApplicator,
+ still quite a bit of work to do
+
+2016-05-12 tino
+
+ * [r11533] ChangeLog, src/ApertiumApplicator.cpp, src/cg_proc.cpp,
+ src/version.hpp: Fix cg-proc leak; Allow cg-proc to use textual
+ grammars, but with a warning
+
+2016-04-14 unhammer
+
+ * [r11459] emacs/cg.el: cg-calculate-indent was returning wrong val
+
+2016-04-13 tino
+
+ * [r11458] src/CMakeLists.txt: Test failed due to renamed tests
+
+2016-04-11 tino
+
+ * [r11456] src/GrammarApplicator.cpp: Print rule number where
+ possible
+ * [r11455] test/Apertium/T_Append, test/Apertium/T_BasicAppend,
+ test/Apertium/T_BasicContextTest, test/Apertium/T_BasicDelimit,
+ test/Apertium/T_BasicIff, test/Apertium/T_BasicRemove,
+ test/Apertium/T_BasicSelect, test/Apertium/T_BasicSubstitute,
+ test/Apertium/T_ContextTest, test/Apertium/T_Delimit,
+ test/Apertium/T_Iff, test/Apertium/T_Remove,
+ test/Apertium/T_Select, test/Apertium/T_Substitute,
+ test/T_Append, test/T_BasicAppend, test/T_BasicContextTest,
+ test/T_BasicDelimit, test/T_BasicDependency, test/T_BasicIff,
+ test/T_BasicSelect, test/T_BasicSubstitute, test/T_ContextTest,
+ test/T_Delimit, test/T_Dependency, test/T_Iff, test/T_Select,
+ test/T_Substitute: Rename tests
+ * [r11454] ChangeLog, src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_runRules.cpp, src/TextualParser.cpp,
+ src/version.hpp, test/T_BasicAppend/expected.txt,
+ test/T_BasicAppend/grammar.cg3,
+ test/T_BasicAppend/grammar.cg3b.10043: Append, AddCohort,
+ SplitCohort now performs their tag checks only at runtime
+
+2016-04-07 unhammer
+
+ * [r11440] emacs/cg.el: fix indentation where wordform before rule
+ keyword
+
+2016-04-06 tino
+
+ * [r11439] src/GrammarApplicator.cpp: At runtime, print numLines
+
+2016-04-04 tino
+
+ * [r11433] .clang-format, clang-format.pl,
+ src/GrammarApplicator_runRules.cpp, src/Set.hpp, src/inlines.hpp:
+ clang-format-3.9
+
+2016-03-30 tino
+
+ * [r11431] README.md: Gotta catch them all
+
+2016-03-29 unhammer
+
+ * [r11430] emacs/cg.el: (require 'cl-lib) ;; assume users have
+ emacs>=24.3
+
+ + lots of checkdoc-fixes
+ * [r11429] emacs/cg.el: compilation warnings now have filename
+ prepended
+
+2016-03-23 tino
+
+ * [r11423] manual/rules.xml: Fix Substitute documentation
+ * [r11421] src/GrammarApplicator_runRules.cpp: No, that wasn't a
+ problem
+ * [r11420] src/GrammarApplicator_runRules.cpp: Substitute should
+ not remove tags if it can't later replace them
+
+2016-03-22 tino
+
+ * [r11416] ChangeLog, src/GrammarApplicator.cpp,
+ src/GrammarApplicator.hpp, src/GrammarApplicator_runRules.cpp,
+ src/Strings.cpp, src/version.hpp,
+ test/T_BasicSubstitute/expected.txt: Substitute will now insert
+ in all found locations, if the tags to be replaced were found
+ contiguously in those locations
+
+2016-03-03 tino
+
+ * [r11383] manual/grammar.xml: Section name example
+
+2016-03-01 tino
+
+ * [r11374] src/libcg3.cpp, src/stdafx.hpp: Things no longer
+ relevant in VS 2015
+
+2016-02-23 tino
+
+ * [r11351] ChangeLog, src/GrammarApplicator_runRules.cpp,
+ src/version.hpp: Remove Substitute unique test
+
+2016-02-11 tino
+
+ * [r11341] src/GrammarApplicator_runGrammar.cpp: Fix B with -D
+
+2016-02-09 tino
+
+ * [r11340] src/Tag.cpp: Don't even try to parse very long tags as
+ numeric
+
+2016-02-08 tino
+
+ * [r11338] ChangeLog, src/GrammarApplicator_matchSet.cpp,
+ src/version.hpp: Even more undo regex captures on match fail
+
+2016-02-05 tino
+
+ * [r11329] manual/contexts.xml: Greedy and lazy bag
+ * [r11327] ChangeLog, manual/contexts.xml,
+ src/BinaryGrammar_read.cpp, src/BinaryGrammar_write.cpp,
+ src/ContextualTest.hpp, src/Grammar.cpp, src/Grammar.hpp,
+ src/GrammarApplicator_reflow.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/SingleWindow.cpp, src/SingleWindow.hpp,
+ src/TextualParser.cpp, src/version.hpp,
+ test/T_BasicDependency/expected.txt,
+ test/T_BasicDependency/grammar.cg3,
+ test/T_BasicDependency/grammar.cg3b.10043: Implement Bag of Tags
+ * [r11325] src/GrammarApplicator_reflow.cpp: Silly if
+
+2016-01-26 tino
+
+ * [r11265] manual/rules.xml: KeepOrder doc
+ * [r11264] ChangeLog, manual/manual.xml, src/AST.hpp,
+ src/ApertiumApplicator.cpp, src/ApertiumApplicator.hpp,
+ src/BinaryGrammar.cpp, src/BinaryGrammar.hpp,
+ src/BinaryGrammar_read.cpp, src/BinaryGrammar_write.cpp,
+ src/Cohort.cpp, src/Cohort.hpp, src/CohortIterator.cpp,
+ src/CohortIterator.hpp, src/ContextualTest.cpp,
+ src/ContextualTest.hpp, src/FSTApplicator.cpp,
+ src/FSTApplicator.hpp, src/FormatConverter.cpp,
+ src/FormatConverter.hpp, src/Grammar.cpp, src/Grammar.hpp,
+ src/GrammarApplicator.cpp, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_reflow.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runGrammar.cpp,
+ src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
+ src/GrammarWriter.hpp, src/IGrammarParser.hpp,
+ src/NicelineApplicator.cpp, src/NicelineApplicator.hpp,
+ src/PlaintextApplicator.cpp, src/PlaintextApplicator.hpp,
+ src/Reading.cpp, src/Reading.hpp, src/Relabeller.cpp,
+ src/Relabeller.hpp, src/Rule.cpp, src/Rule.hpp, src/Set.cpp,
+ src/Set.hpp, src/SingleWindow.cpp, src/SingleWindow.hpp,
+ src/Strings.cpp, src/Strings.hpp, src/Tag.cpp, src/Tag.hpp,
+ src/TagTrie.hpp, src/TextualParser.cpp, src/TextualParser.hpp,
+ src/Window.cpp, src/Window.hpp, src/bloomish.hpp,
+ src/cg-relabel.cpp, src/cg3.h, src/cg_comp.cpp, src/cg_conv.cpp,
+ src/cg_proc.cpp, src/flat_unordered_map.hpp,
+ src/flat_unordered_set.hpp, src/inlines.hpp,
+ src/interval_vector.hpp, src/istream.hpp, src/libcg3.cpp,
+ src/main.cpp, src/options.hpp, src/options_conv.hpp,
+ src/parser_helpers.hpp, src/process.hpp, src/scoped_stack.hpp,
+ src/sorted_vector.hpp, src/stdafx.hpp, src/test_libcg3.c,
+ src/uextras.cpp, src/uextras.hpp, src/version.hpp: Copyright and
+ rev bump
+
+2016-01-25 tino
+
+ * [r11254] src/Grammar.cpp, src/Set.hpp: Dirty way to add KEEPORDER
+ where it is likely missing
+
+2016-01-11 tino
+
+ * [r11203] src/FSTApplicator.cpp: Handle trailing + in baseforms
+
+2015-12-17 tino
+
+ * [r11144] ChangeLog, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_runContextualTest.cpp, src/version.hpp: In
+ barrier context, don't test links
+
+2015-12-03 tino
+
+ * [r11104] ChangeLog, src/Cohort.cpp, src/version.hpp: Fix segfault
+
+2015-11-27 tino
+
+ * [r11079] CMakeLists.txt: Apply ELISPDIR patch from Alex Dunn
+ <dunn.alex at gmail.com> to aid Homebrew packaging
+
+2015-11-16 tino
+
+ * [r11069] ChangeLog, TODO, src/GrammarApplicator_runRules.cpp,
+ src/version.hpp, test/T_RegExp/expected.txt,
+ test/T_RegExp/grammar.cg3, test/T_RegExp/grammar.cg3b.10043,
+ test/T_RegExp/input.txt: Propagate regexp captures to other
+ readings
+
+2015-11-12 tino
+
+ * [r11056] README, README.md: Add badge
+ * [r11055] .travis.yml: But then clang doesn't work, of course
+ * [r11054] .travis.yml: Le sigh...
+ * [r11053] .travis.yml: Try Travic CI's container setup
+ * [r11052] .travis.yml: Try a different CMake PPA
+ * [r11051] .travis.yml: Wasn't prepared for Ubuntu 12.04
+ * [r11050] .travis.yml: Let's see if Travis CI works...
+
+2015-11-10 tino
+
+ * [r11048] .clang-format, clang-format.pl,
+ src/GrammarApplicator_runRules.cpp, src/cg_proc.cpp: clang-format
+ now gets Egyptian braces right
+
+2015-11-05 tino
+
+ * [r11038] src/cg_proc.cpp: Flush flush
+
+2015-11-03 tino
+
+ * [r11034] src/GrammarApplicator_runContextualTest.cpp: Don't clear
+ readings again if we're at the current target
+ * [r11032] ChangeLog, manual/contexts.xml,
+ src/GrammarApplicator.hpp, src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runRules.cpp, src/version.hpp,
+ test/T_BasicSelect/expected.txt, test/T_BasicSelect/grammar.cg3,
+ test/T_BasicSelect/grammar.cg3b.10043: Context modifier 'A' now
+ works almost anywhere
+
+2015-10-31 tino
+
+ * [r11024] src/ApertiumApplicator.cpp, src/GrammarApplicator.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runRules.cpp, src/PlaintextApplicator.cpp,
+ src/TextualParser.cpp, src/cg_proc.cpp, src/parser_helpers.hpp,
+ src/process.hpp: Fixed based on Coverity Scan report - nothing
+ that would ever affect real world execution
+
+2015-10-14 tino
+
+ * [r10993] ChangeLog, src/GrammarApplicator_runContextualTest.cpp,
+ src/sorted_vector.hpp, src/version.hpp: Failure to find r,l
+ should set result to false, and thus let NONE/NOT work as
+ expected
+
+2015-09-29 tino
+
+ * [r10984] CMakeLists.txt: Better C++17/14/11 detection that also
+ works
+ * [r10983] CMakeLists.txt: Better C++17/14/11 detection
+
+2015-09-23 tino
+
+ * [r10982] manual/rules.xml, todo.sh: Documented SplitCohort
+
+2015-09-22 tino
+
+ * [r10980] src/GrammarApplicator_runRules.cpp: RemCohort segfault
+ * [r10979] src/GrammarApplicator_runGrammar.cpp,
+ test/T_SplitCohort/expected.txt, test/T_SplitCohort/grammar.cg3:
+ Special case initial setvars so that delimiters can see them;
+ SplitCohort test
+ * [r10978] src/GrammarApplicator_runRules.cpp: SplitCohort handle
+ named relations
+ * [r10977] ChangeLog, clang-format.pl,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_runRules.cpp, src/scoped_stack.hpp,
+ src/version.hpp: Fix unification across contexts when there is no
+ unification in target or capture
+ * [r10976] src/CMakeLists.txt, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_reflow.cpp,
+ src/GrammarApplicator_runRules.cpp, src/Rule.hpp, src/Set.hpp,
+ src/Tag.hpp, src/cg_proc.cpp, src/scoped_stack.hpp: Code cleanup,
+ a bit less actual static but more reuse
+
+2015-09-21 tino
+
+ * [r10975] .editorconfig, src/Cohort.hpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/SingleWindow.hpp: WIP kill global_number
+
+2015-09-19 tino
+
+ * [r10967] src/GrammarApplicator_runRules.cpp: Erase all knowledge
+ of a removed cohort from the dep tree
+ * [r10966] src/GrammarApplicator_runRules.cpp: Looping over a
+ container while modifying it is generally a horrible idea
+ * [r10964] ChangeLog, src/GrammarApplicator.cpp,
+ src/GrammarApplicator.hpp, src/version.hpp,
+ test/T_Omniscan/expected.txt: When dep has spanned, output in the
+ form of window number + padded local number, rather than global
+ number
+ * [r10963] src/GrammarApplicator_runRules.cpp: Properly detach
+ removed cohorts from the dependency tree
+
+2015-09-18 tino
+
+ * [r10961] src/GrammarApplicator_reflow.cpp: Fix segfault
+ * [r10958] ChangeLog, manual/rules.xml, src/Cohort.hpp,
+ src/CohortIterator.cpp, src/CohortIterator.hpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runRules.cpp, src/SingleWindow.hpp,
+ src/version.hpp: Fix cohort ordering to be reliable across
+ movement, insertion, removal, etc
+
+2015-09-16 tino
+
+ * [r10955] ChangeLog, src/Tag.cpp, src/version.hpp,
+ test/T_NumericalTags/expected.txt: Fix bug where e.g. <C:NN> was
+ considered numerical with value 0
+
+2015-09-14 tino
+
+ * [r10952] ChangeLog, src/GrammarApplicator_runRules.cpp,
+ src/Strings.cpp, src/Strings.hpp, src/TextualParser.cpp,
+ src/version.hpp, test/T_SplitCohort,
+ test/T_SplitCohort/expected.txt, test/T_SplitCohort/grammar.cg3,
+ test/T_SplitCohort/input.txt: Implement SplitCohort
+
+2015-09-08 tino
+
+ * [r10939] src/Cohort.hpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/SingleWindow.hpp: CentOS 6 fix
+
+2015-09-07 tino
+
+ * [r10938] TODO: ToDone
+ * [r10937] ChangeLog, src/GrammarApplicator.hpp,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runRules.cpp, src/inlines.hpp,
+ src/version.hpp: Make template ORs fully commutative
+
+2015-08-30 tino
+
+ * [r10925] src/TextualParser.cpp, src/inlines.hpp: Easier to read
+ parsing
+
+2015-08-26 unhammer
+
+ * [r10923] test/T_RelabelList_Apertium/expected.txt: doh, flip the
+ expected bit
+
+2015-08-26 tino
+
+ * [r10922] test/T_RelabelList/run.pl, test/T_RelabelSet/run.pl: Use
+ -I and -O instead of < and >, to avoid Windows pipe conversion
+
+2015-08-26 unhammer
+
+ * [r10921] src/Relabeller.cpp, src/Relabeller.hpp: allow replacing
+ tags with baseforms
+
+ remove the special case for singletag->singletag relabelling, it
+ wasn't transferring flags (T_BASEFORM) and it's handled by the
+ relabel_as_list case anyway
+ * [r10920] test/T_RelabelList_Apertium,
+ test/T_RelabelList_Apertium/expected.txt,
+ test/T_RelabelList_Apertium/grammar.cg3,
+ test/T_RelabelList_Apertium/input.txt,
+ test/T_RelabelList_Apertium/relabel.cg3r,
+ test/T_RelabelList_Apertium/run.pl: test that fails with cg-proc
+
+ (loops forever looking for a baseform relabelled by a
+ non-T_BASEFORM tag)
+
+2015-08-26 tino
+
+ * [r10919] src/AST.hpp, src/CMakeLists.txt, src/TextualParser.cpp,
+ src/TextualParser.hpp, src/stdafx.hpp: Print contents of textual
+ AST nodes
+
+2015-08-25 tino
+
+ * [r10917] ChangeLog, TODO, src/BinaryGrammar.hpp,
+ src/BinaryGrammar_read.cpp, src/version.hpp: OR'ed contexts also
+ need deferred loading
+
+2015-08-23 tino
+
+ * [r10910] clang-format.pl, src/BinaryGrammar.cpp, src/Grammar.hpp,
+ src/GrammarApplicator_runContextualTest.cpp, src/inlines.hpp:
+ Statement-final * format
+
+2015-08-23 unhammer
+
+ * [r10909] test/T_Relabel: rm empty dir
+
+2015-08-23 tino
+
+ * [r10908] ChangeLog, clang-format.pl, src/BinaryGrammar.cpp,
+ src/BinaryGrammar.hpp, src/Cohort.hpp, src/GrammarApplicator.hpp,
+ src/Relabeller.cpp, src/Relabeller.hpp, src/Rule.hpp,
+ src/Set.hpp, src/Tag.hpp, src/TagTrie.hpp, src/TextualParser.cpp,
+ src/TextualParser.hpp, src/cg-relabel.cpp, src/libcg3.cpp,
+ src/version.hpp: Fix formatting, auto_ptr
+
+2015-08-23 unhammer
+
+ * [r10907] src/Relabeller.cpp, src/Relabeller.hpp, src/Set.cpp,
+ src/Set.hpp: more trie_reindex to Set.hpp; cleanup
+ * [r10906] src/Relabeller.cpp: more warnings if weird relabel
+ grammar
+ * [r10905] src/Relabeller.hpp: rm unused debug function
+ * [r10904] src/Relabeller.cpp, src/Relabeller.hpp,
+ test/T_RelabelSet/expected.txt, test/T_RelabelSet/grammar.cg3,
+ test/T_RelabelSet/input.txt, test/T_RelabelSet/relabel.cg3r:
+ support for relabelling into a SET (with e.g. lista - listb)
+ * [r10903] src/Relabeller.cpp, src/Relabeller.hpp: some refactoring
+ * [r10902] src/Relabeller.cpp, src/Relabeller.hpp: some cleanup and
+ notes
+ * [r10901] src/Relabeller.cpp: s/for(auto)/boost_foreach(boring)/g
+ to please our CentOS users
+ * [r10900] src/Relabeller.cpp, src/cg-relabel.cpp: exit(1) unless
+ first arg is a cg3b; less verbose
+ * [r10899] test/T_Relabel/expected.txt, test/T_Relabel/grammar.cg3,
+ test/T_Relabel/input.txt, test/T_Relabel/relabel.cg3r,
+ test/T_Relabel/run.pl, test/T_RelabelList,
+ test/T_RelabelList/expected.txt, test/T_RelabelList/grammar.cg3,
+ test/T_RelabelList/input.txt, test/T_RelabelList/relabel.cg3r,
+ test/T_RelabelList/run.pl, test/T_RelabelSet,
+ test/T_RelabelSet/expected.txt, test/T_RelabelSet/grammar.cg3,
+ test/T_RelabelSet/input.txt, test/T_RelabelSet/relabel.cg3r,
+ test/T_RelabelSet/run.pl: update tests for bin cg-relabel
+ * [r10898] src/cg-relabel.cpp: Warn on on textual grammars handed
+ to cg-relabel
+ * [r10897] src/CMakeLists.txt, src/Relabeller.cpp,
+ src/Relabeller.hpp, src/cg-relabel.cpp, test/T_Relabel,
+ test/T_Relabel/expected.txt, test/T_Relabel/grammar.cg3,
+ test/T_Relabel/input.txt, test/T_Relabel/relabel.cg3r,
+ test/T_Relabel/run.pl: cg-relabel command using binary grammars
+
+ New relabel format (ab)using vislcg3 format: MAP (N) (n) OR (np)
+ ; means relabel the tag N into n or np, alternatively: LIST N = n
+ np; MAP (N) N;
+
+ The from-part is only ever one tag, but the to-part can be
+ several. Full-on SET's as to-parts still TODO, but should be
+ doable.
+
+ No longer uses GrammarWriter at all.
+
+ Some cleanup TODO.
+
+2015-08-21 tino
+
+ * [r10893] src/TextualParser.cpp: Move AST Contexts and
+ ContextsTarget
+ * [r10892] ChangeLog, src/TextualParser.cpp, src/TextualParser.hpp,
+ src/main.cpp, src/options.hpp, src/options_conv.hpp,
+ src/version.hpp: Added cmdline --dump-ast to output grammar parse
+ tree
+
+2015-08-19 tino
+
+ * [r10891] ChangeLog, src/GrammarApplicator_runRules.cpp,
+ src/TextualParser.cpp, src/version.hpp,
+ test/T_BasicIff/grammar.cg3, test/T_MultipleSections/grammar.cg3,
+ test/T_Trace/grammar.cg3: Warn if a rule doesn't end in ;
+
+2015-08-19 unhammer
+
+ * [r10890] emacs/cg.el: new cg-comment-or-uncomment function, bound
+ to C-; and M-#
+
+ demo at https://asciinema.org/a/25236
+
+ also, fix M-a/M-e so they don't fail if there's a set named
+ "setn" etc.
+
+2015-08-12 tino
+
+ * [r10880] .clang-format, clang-format.pl, src/TextualParser.cpp:
+ Update to clang-format 3.8
+
+2015-08-11 tino
+
+ * [r10877] ChangeLog, manual/templates.xml,
+ src/GrammarApplicator.hpp,
+ src/GrammarApplicator_runContextualTest.cpp, src/version.hpp:
+ More template override fixing, and documentation
+ * [r10876] ChangeLog, manual/templates.xml,
+ src/GrammarApplicator_runContextualTest.cpp, src/version.hpp,
+ test/T_Templates/expected.txt, test/T_Templates/grammar.cg3: Fix
+ template position override so that pos/neg overrides check the
+ min/max exit edges, rather than just the final exit; Fix linking
+ from pos-overridden templates
+
+2015-08-05 tino
+
+ * [r10863] src/TextualParser.cpp: More g++ 4.2 fixes, the sequel
+ * [r10862] src/TextualParser.cpp: More g++ 4.2 fixes
+ * [r10861] CMakeLists.txt: Ware when changing negations and
+ forgetting some
+ * [r10860] CMakeLists.txt, src/TextualParser.cpp: Appease the
+ ancient OS X gods
+
+2015-08-04 tino
+
+ * [r10855] ChangeLog, clang-format.pl,
+ src/GrammarApplicator_runContextualTest.cpp, src/version.hpp:
+ Allow POS_TMPL_OVERRIDE to progatate into OR'ed tests, because I
+ can't figure out why I disallowed it in r4766
+
+2015-07-24 tino
+
+ * [r10846] clang-format.pl, src/options.hpp, src/options_conv.hpp:
+ Forgot to rescue other enum-like lists from clang-format
+
+2015-07-22 tino
+
+ * [r10845] .clang-format, src/ContextualTest.hpp: Style
+ * [r10844] .clang-format, clang-format.pl,
+ src/ApertiumApplicator.cpp, src/ApertiumApplicator.hpp,
+ src/BinaryGrammar.cpp, src/BinaryGrammar.hpp,
+ src/BinaryGrammar_read.cpp, src/BinaryGrammar_read_10043.cpp,
+ src/BinaryGrammar_write.cpp, src/Cohort.cpp, src/Cohort.hpp,
+ src/CohortIterator.cpp, src/CohortIterator.hpp,
+ src/ContextualTest.cpp, src/ContextualTest.hpp,
+ src/FSTApplicator.cpp, src/FSTApplicator.hpp,
+ src/FormatConverter.cpp, src/FormatConverter.hpp,
+ src/Grammar.cpp, src/Grammar.hpp, src/GrammarApplicator.cpp,
+ src/GrammarApplicator.hpp, src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_reflow.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runGrammar.cpp,
+ src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
+ src/GrammarWriter.hpp, src/IGrammarParser.hpp,
+ src/NicelineApplicator.cpp, src/NicelineApplicator.hpp,
+ src/PlaintextApplicator.cpp, src/PlaintextApplicator.hpp,
+ src/Reading.cpp, src/Reading.hpp, src/Rule.cpp, src/Rule.hpp,
+ src/Set.cpp, src/Set.hpp, src/SingleWindow.cpp,
+ src/SingleWindow.hpp, src/Strings.cpp, src/Strings.hpp,
+ src/Tag.cpp, src/Tag.hpp, src/TagTrie.hpp, src/TextualParser.cpp,
+ src/TextualParser.hpp, src/Window.cpp, src/Window.hpp,
+ src/bloomish.hpp, src/cg_comp.cpp, src/cg_conv.cpp,
+ src/cg_proc.cpp, src/flat_unordered_map.hpp,
+ src/flat_unordered_set.hpp, src/icu_uoptions.cpp,
+ src/inlines.hpp, src/interval_vector.hpp, src/istream.hpp,
+ src/libcg3.cpp, src/main.cpp, src/options.hpp,
+ src/options_conv.hpp, src/parser_helpers.hpp,
+ src/sorted_vector.hpp, src/stdafx.hpp, src/test_libcg3.c,
+ src/uextras.cpp, src/uextras.hpp, src/version.hpp: Major style
+ fixup based on clang-format, with wrapper to correct
+ clang-format's bad ideas
+ * [r10843] src/Strings.hpp: +,
+ * [r10842] src/ContextualTest.hpp, src/FormatConverter.hpp,
+ src/GrammarApplicator.hpp, src/Tag.hpp,
+ src/flat_unordered_map.hpp, src/flat_unordered_set.hpp,
+ src/options.hpp, src/options_conv.hpp: +,
+ * [r10841] src/Grammar.cpp: -;
+ * [r10840] src/ApertiumApplicator.cpp, src/PlaintextApplicator.cpp:
+ Minor style fix
+ * [r10839] src/ApertiumApplicator.cpp, src/BinaryGrammar_read.cpp,
+ src/BinaryGrammar_read_10043.cpp, src/BinaryGrammar_write.cpp,
+ src/CMakeLists.txt, src/Cohort.cpp, src/CohortIterator.cpp,
+ src/FSTApplicator.cpp, src/Grammar.cpp,
+ src/GrammarApplicator.cpp, src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_reflow.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runGrammar.cpp,
+ src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
+ src/NicelineApplicator.cpp, src/PlaintextApplicator.cpp,
+ src/Reading.cpp, src/Rule.cpp, src/Set.cpp, src/SingleWindow.cpp,
+ src/Tag.cpp, src/Tag.hpp, src/TextualParser.cpp, src/Window.cpp,
+ src/cg_proc.cpp, src/libcg3.cpp, src/macros.hpp, src/main.cpp,
+ src/parser_helpers.hpp, src/stdafx.hpp, test/runall.pl: Vastly
+ nicer foreach macros
+
+2015-07-21 tino
+
+ * [r10838] src/GrammarWriter.cpp, src/Strings.cpp, src/Strings.hpp,
+ src/TextualParser.cpp, src/stdafx.hpp: Disable some shadowing
+ warnings for VS2015; Keep global shadowing and fix that instance
+
+2015-07-02 tino
+
+ * [r10827] CMakeLists.txt, src/CMakeLists.txt: Disable static
+ library
+
+2015-07-01 tino
+
+ * [r10825] Doxyfile, all2unix.pl, make-naive.sh,
+ scripts/auto-linux-wget.sh: Unused
+
2015-06-30 tino
+ * [r10824] ChangeLog, scripts/profile-revisions.php,
+ src/GrammarApplicator.hpp, src/GrammarApplicator_reflow.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runRules.cpp, src/version.hpp: More static
+ and less static
* [r10822] ChangeLog, src/GrammarApplicator.hpp,
src/GrammarApplicator_matchSet.cpp,
src/GrammarApplicator_reflow.cpp,
@@ -1194,1132 +1866,3 @@
src/options_conv.hpp, src/version.hpp: Removed Matxin; cg-conv
can now output to Niceline and Plain Text formats (-N, -P)
-2014-01-07 tino
-
- * [r9588] ChangeLog, scripts/cg3-autobin.pl, src/main.cpp,
- src/version.hpp: Output info to stdout if --version or --help,
- and stderr otherwise
-
-2014-01-05 tino
-
- * [r9570] ChangeLog, TODO, scripts/cg3-autobin.pl,
- src/GrammarApplicator_reflow.cpp, src/stdafx.hpp,
- src/version.hpp: All dependency tests now pass, with and without
- --dep-delimit
-
-2014-01-04 tino
-
- * [r9568] ChangeLog, scripts/cg3-autobin.pl,
- src/NicelineApplicator.cpp, src/cg_conv.cpp, src/version.hpp,
- update-revision.pl: cg-conv now handles less clean Niceline input
- * [r9567] manual/manual.xml, src/ApertiumApplicator.cpp,
- src/ApertiumApplicator.hpp, src/BinaryGrammar.cpp,
- src/BinaryGrammar.hpp, src/BinaryGrammar_read.cpp,
- src/BinaryGrammar_write.cpp, src/Cohort.cpp, src/Cohort.hpp,
- src/CohortIterator.cpp, src/CohortIterator.hpp,
- src/CompositeTag.cpp, src/CompositeTag.hpp,
- src/ContextualTest.cpp, src/ContextualTest.hpp,
- src/FSTApplicator.cpp, src/FSTApplicator.hpp,
- src/FormatConverter.cpp, src/FormatConverter.hpp,
- src/Grammar.cpp, src/Grammar.hpp, src/GrammarApplicator.cpp,
- src/GrammarApplicator.hpp, src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runContextualTest.cpp,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
- src/GrammarWriter.hpp, src/IGrammarParser.hpp,
- src/MatxinApplicator.cpp, src/MatxinApplicator.hpp,
- src/NicelineApplicator.cpp, src/NicelineApplicator.hpp,
- src/PlaintextApplicator.cpp, src/PlaintextApplicator.hpp,
- src/Reading.cpp, src/Reading.hpp, src/Rule.cpp, src/Rule.hpp,
- src/Set.cpp, src/Set.hpp, src/SingleWindow.cpp,
- src/SingleWindow.hpp, src/Strings.cpp, src/Strings.hpp,
- src/Tag.cpp, src/Tag.hpp, src/TextualParser.cpp,
- src/TextualParser.hpp, src/Window.cpp, src/Window.hpp,
- src/bloomish.hpp, src/cg3.h, src/cg_comp.cpp, src/cg_conv.cpp,
- src/cg_proc.cpp, src/inlines.hpp, src/interval_vector.hpp,
- src/istream.hpp, src/libcg3.cpp, src/macros.hpp, src/main.cpp,
- src/options.hpp, src/options_conv.hpp, src/sorted_vector.hpp,
- src/stdafx.hpp, src/test_libcg3.c, src/uextras.cpp,
- src/uextras.hpp, src/version.hpp: Yearly copyright refresh
-
-2013-12-30 tino
-
- * [r9528] ChangeLog, scripts/cg3-autobin.pl,
- src/GrammarApplicator_runGrammar.cpp, src/SingleWindow.cpp,
- src/version.hpp: Fix --dep-delimit
- * [r9524] ChangeLog, scripts/cg3-autobin.pl,
- src/GrammarApplicator_runGrammar.cpp, src/version.hpp: Fix
- --dep-delimit, again
- * [r9522] ChangeLog, scripts/cg3-autobin.pl,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/SingleWindow.cpp,
- src/version.hpp: Fix --dep-delimit to react at a reading level
-
-2013-12-24 tino
-
- * [r9516] ChangeLog, scripts/cg3-autobin.pl, src/cg_proc.cpp,
- src/istream.hpp, src/version.hpp: Dirty fix: don't eat BOMs in
- the stream if null-flush is in effect.
-
-2013-12-10 tino
-
- * [r9492] AUTHORS, CMake/DebSourcePPA.cmake, CMake/FindBoost.cmake,
- CMake/FindICU.cmake, CMakeLists.txt, COPYING, ChangeLog,
- Doxyfile, INSTALL, LICENSE, NEWS, README, TODO, all2unix.pl,
- cg3.g, cg3.pc.in, cmake.sh, compile-profile.sh, dist/Portfile,
- dist/dist-osx.pl, dist/osx/vislcg3, emacs/cg.el, get-boost.sh,
- include/cycle.h, include/exec-stream/exec-stream.cpp,
- include/exec-stream/exec-stream.h,
- include/exec-stream/posix/exec-stream-helpers.cpp,
- include/exec-stream/posix/exec-stream-helpers.h,
- include/exec-stream/posix/exec-stream-impl.cpp,
- include/exec-stream/win/exec-stream-helpers.cpp,
- include/exec-stream/win/exec-stream-helpers.h,
- include/exec-stream/win/exec-stream-impl.cpp, include/uoptions.h,
- license.icu.txt, make-naive.sh, manual/bibliography.xml,
- manual/binarygrammar.xml, manual/cgglossary.xml,
- manual/cgkeywords.xml, manual/chapterize.sh,
- manual/cmdreference.xml, manual/combine.sh,
- manual/compatibility.xml, manual/contexts.xml,
- manual/contributing.xml, manual/dependencies.xml,
- manual/drafts.xml, manual/externals.xml, manual/faq.xml,
- manual/generate.sh, manual/grammar.xml, manual/installation.xml,
- manual/intro.xml, manual/make-dist.sh, manual/manual.xml,
- manual/parentheses.xml, manual/pdf.sh, manual/probabilistic.xml,
- manual/relations.xml, manual/rules.xml, manual/sets.xml,
- manual/singlefile.sh, manual/streamcmds.xml,
- manual/streamformats.xml, manual/subreadings.xml,
- manual/tags.xml, manual/templates.xml, manual/validate.sh,
- manual/xsl-fo.sh, newsletters/2007-03-29.txt,
- newsletters/2007-12-04.txt, newsletters/2008-01-24.txt,
- newsletters/2008-03-06.txt, newsletters/2008-05-20.txt,
- newsletters/2008-07-15.txt, newsletters/2008-08-25.txt,
- newsletters/2008-09-11.txt, newsletters/2008-09-20.txt,
- newsletters/2008-11-10.txt, newsletters/2009-01-08.txt,
- newsletters/2009-02-21.txt, newsletters/2009-05-13.txt,
- newsletters/2009-06-30.txt, newsletters/2009-09-27.txt,
- newsletters/2010-03-18.txt, newsletters/2010-06-06.txt,
- newsletters/2010-09-22.txt, newsletters/2010-09-23.txt,
- newsletters/2010-11-11.txt, newsletters/2011-04-30.txt,
- newsletters/2011-08-01.txt, newsletters/2012-01-06.txt,
- newsletters/2012-07-02.txt, newsletters/2012-07-29.txt,
- newsletters/2013-06-19.txt, profile-apply.sh, profile-parse.sh,
- scripts/CG3_External.pm, scripts/auto-linux-wget.sh,
- scripts/cg3-autobin.pl, scripts/external.pl,
- scripts/external_text.pl, scripts/profile-revisions-tally.php,
- scripts/profile-revisions.php, scripts/wrapper.valgrind,
- src/ApertiumApplicator.cpp, src/ApertiumApplicator.hpp,
- src/BinaryGrammar.cpp, src/BinaryGrammar.hpp,
- src/BinaryGrammar_read.cpp, src/BinaryGrammar_write.cpp,
- src/CMakeLists.txt, src/Cohort.cpp, src/Cohort.hpp,
- src/CohortIterator.cpp, src/CohortIterator.hpp,
- src/CompositeTag.cpp, src/CompositeTag.hpp,
- src/ContextualTest.cpp, src/ContextualTest.hpp,
- src/FSTApplicator.cpp, src/FSTApplicator.hpp,
- src/FormatConverter.cpp, src/FormatConverter.hpp,
- src/Grammar.cpp, src/Grammar.hpp, src/GrammarApplicator.cpp,
- src/GrammarApplicator.hpp, src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runContextualTest.cpp,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
- src/GrammarWriter.hpp, src/IGrammarParser.hpp,
- src/MatxinApplicator.cpp, src/MatxinApplicator.hpp,
- src/NicelineApplicator.cpp, src/NicelineApplicator.hpp,
- src/PlaintextApplicator.cpp, src/PlaintextApplicator.hpp,
- src/Reading.cpp, src/Reading.hpp, src/Rule.cpp, src/Rule.hpp,
- src/Set.cpp, src/Set.hpp, src/SingleWindow.cpp,
- src/SingleWindow.hpp, src/Strings.cpp, src/Strings.hpp,
- src/Tag.cpp, src/Tag.hpp, src/TextualParser.cpp,
- src/TextualParser.hpp, src/Window.cpp, src/Window.hpp,
- src/all_cg_comp.cpp, src/all_cg_conv.cpp, src/all_cg_proc.cpp,
- src/all_vislcg3.cpp, src/bloomish.hpp, src/cg-comp.1,
- src/cg-proc.1, src/cg3.h, src/cg_comp.cpp, src/cg_conv.cpp,
- src/cg_proc.cpp, src/icu_uoptions.cpp, src/inlines.hpp,
- src/interval_vector.hpp, src/istream.hpp, src/libcg3.cpp,
- src/macros.hpp, src/main.cpp, src/options.hpp,
- src/options_conv.hpp, src/sorted_vector.hpp, src/stdafx.hpp,
- src/test_libcg3.c, src/uextras.cpp, src/uextras.hpp,
- src/version.hpp, src/vislcg3.1, test/Apertium/README,
- test/Apertium/T_AnyMinusSome/expected.txt,
- test/Apertium/T_AnyMinusSome/grammar.cg3,
- test/Apertium/T_AnyMinusSome/input.txt,
- test/Apertium/T_AnyMinusSome/run.pl,
- test/Apertium/T_Barrier/expected.txt,
- test/Apertium/T_Barrier/grammar.cg3,
- test/Apertium/T_Barrier/input.txt,
- test/Apertium/T_Barrier/run.pl,
- test/Apertium/T_BasicAppend/expected.txt,
- test/Apertium/T_BasicAppend/grammar.cg3,
- test/Apertium/T_BasicAppend/input.txt,
- test/Apertium/T_BasicAppend/run.pl,
- test/Apertium/T_BasicContextTest/expected.txt,
- test/Apertium/T_BasicContextTest/grammar.cg3,
- test/Apertium/T_BasicContextTest/input.txt,
- test/Apertium/T_BasicContextTest/run.pl,
- test/Apertium/T_BasicDelimit/expected.txt,
- test/Apertium/T_BasicDelimit/grammar.cg3,
- test/Apertium/T_BasicDelimit/input.txt,
- test/Apertium/T_BasicDelimit/run.pl,
- test/Apertium/T_BasicIff/expected.txt,
- test/Apertium/T_BasicIff/grammar.cg3,
- test/Apertium/T_BasicIff/input.txt,
- test/Apertium/T_BasicIff/run.pl,
- test/Apertium/T_BasicRemove/expected.txt,
- test/Apertium/T_BasicRemove/grammar.cg3,
- test/Apertium/T_BasicRemove/input.txt,
- test/Apertium/T_BasicRemove/run.pl,
- test/Apertium/T_BasicSelect/expected.txt,
- test/Apertium/T_BasicSelect/grammar.cg3,
- test/Apertium/T_BasicSelect/input.txt,
- test/Apertium/T_BasicSelect/run.pl,
- test/Apertium/T_BasicSubstitute/expected.txt,
- test/Apertium/T_BasicSubstitute/grammar.cg3,
- test/Apertium/T_BasicSubstitute/input.txt,
- test/Apertium/T_BasicSubstitute/run.pl,
- test/Apertium/T_CarefulBarrier/expected.txt,
- test/Apertium/T_CarefulBarrier/grammar.cg3,
- test/Apertium/T_CarefulBarrier/input.txt,
- test/Apertium/T_CarefulBarrier/run.pl,
- test/Apertium/T_CharsInWhiteSpace/expected.txt,
- test/Apertium/T_CharsInWhiteSpace/grammar.cg3,
- test/Apertium/T_CharsInWhiteSpace/input.txt,
- test/Apertium/T_CharsInWhiteSpace/run.pl,
- test/Apertium/T_CompositeSelect/expected.txt,
- test/Apertium/T_CompositeSelect/grammar.cg3,
- test/Apertium/T_CompositeSelect/input.txt,
- test/Apertium/T_CompositeSelect/run.pl,
- test/Apertium/T_DontMatchEmptySet/expected.txt,
- test/Apertium/T_DontMatchEmptySet/grammar.cg3,
- test/Apertium/T_DontMatchEmptySet/input.txt,
- test/Apertium/T_DontMatchEmptySet/run.pl,
- test/Apertium/T_EndlessSelect/expected.txt,
- test/Apertium/T_EndlessSelect/grammar.cg3,
- test/Apertium/T_EndlessSelect/input.txt,
- test/Apertium/T_EndlessSelect/run.pl,
- test/Apertium/T_Joiner/expected.txt,
- test/Apertium/T_Joiner/grammar.cg3,
- test/Apertium/T_Joiner/input.txt, test/Apertium/T_Joiner/run.pl,
- test/Apertium/T_MapAdd_Different/expected.txt,
- test/Apertium/T_MapAdd_Different/grammar.cg3,
- test/Apertium/T_MapAdd_Different/input.txt,
- test/Apertium/T_MapAdd_Different/run.pl,
- test/Apertium/T_MatchBaseform/expected.txt,
- test/Apertium/T_MatchBaseform/grammar.cg3,
- test/Apertium/T_MatchBaseform/input.txt,
- test/Apertium/T_MatchBaseform/run.pl,
- test/Apertium/T_MatchWordform/expected.txt,
- test/Apertium/T_MatchWordform/grammar.cg3,
- test/Apertium/T_MatchWordform/input.txt,
- test/Apertium/T_MatchWordform/run.pl,
- test/Apertium/T_MultiWords/expected.txt,
- test/Apertium/T_MultiWords/grammar.cg3,
- test/Apertium/T_MultiWords/input.txt,
- test/Apertium/T_MultiWords/run.pl,
- test/Apertium/T_MultipleSections/expected.txt,
- test/Apertium/T_MultipleSections/grammar.cg3,
- test/Apertium/T_MultipleSections/input.txt,
- test/Apertium/T_MultipleSections/run.pl,
- test/Apertium/T_MultiwordTagStaying/expected.txt,
- test/Apertium/T_MultiwordTagStaying/grammar.cg3,
- test/Apertium/T_MultiwordTagStaying/input.txt,
- test/Apertium/T_MultiwordTagStaying/run.pl,
- test/Apertium/T_NegatedContextTest/expected.txt,
- test/Apertium/T_NegatedContextTest/grammar.cg3,
- test/Apertium/T_NegatedContextTest/input.txt,
- test/Apertium/T_NegatedContextTest/run.pl,
- test/Apertium/T_RegExp_Map/expected.txt,
- test/Apertium/T_RegExp_Map/grammar.cg3,
- test/Apertium/T_RegExp_Map/input.txt,
- test/Apertium/T_RegExp_Map/run.pl,
- test/Apertium/T_RegExp_Select/expected.txt,
- test/Apertium/T_RegExp_Select/grammar.cg3,
- test/Apertium/T_RegExp_Select/input.txt,
- test/Apertium/T_RegExp_Select/run.pl,
- test/Apertium/T_RegExp_Substitute/expected.txt,
- test/Apertium/T_RegExp_Substitute/grammar.cg3,
- test/Apertium/T_RegExp_Substitute/input.txt,
- test/Apertium/T_RegExp_Substitute/run.pl,
- test/Apertium/T_RemoveSingleTag/expected.txt,
- test/Apertium/T_RemoveSingleTag/grammar.cg3,
- test/Apertium/T_RemoveSingleTag/input.txt,
- test/Apertium/T_RemoveSingleTag/run.pl,
- test/Apertium/T_ScanningTests/expected.txt,
- test/Apertium/T_ScanningTests/grammar.cg3,
- test/Apertium/T_ScanningTests/input.txt,
- test/Apertium/T_ScanningTests/run.pl,
- test/Apertium/T_Sections/expected.txt,
- test/Apertium/T_Sections/grammar.cg3,
- test/Apertium/T_Sections/input.txt,
- test/Apertium/T_Sections/run.pl,
- test/Apertium/T_SetOp_FailFast/expected.txt,
- test/Apertium/T_SetOp_FailFast/grammar.cg3,
- test/Apertium/T_SetOp_FailFast/input.txt,
- test/Apertium/T_SetOp_FailFast/run.pl,
- test/Apertium/T_SetOp_OR/expected.txt,
- test/Apertium/T_SetOp_OR/grammar.cg3,
- test/Apertium/T_SetOp_OR/input.txt,
- test/Apertium/T_SetOp_OR/run.pl,
- test/Apertium/T_SpaceInWord/expected.txt,
- test/Apertium/T_SpaceInWord/grammar.cg3,
- test/Apertium/T_SpaceInWord/input.txt,
- test/Apertium/T_SpaceInWord/run.pl,
- test/Apertium/T_SuperBlanks/expected.txt,
- test/Apertium/T_SuperBlanks/grammar.cg3,
- test/Apertium/T_SuperBlanks/input.txt,
- test/Apertium/T_SuperBlanks/run.pl,
- test/Apertium/T_SuperBlanksNewline/expected.txt,
- test/Apertium/T_SuperBlanksNewline/grammar.cg3,
- test/Apertium/T_SuperBlanksNewline/input.txt,
- test/Apertium/T_SuperBlanksNewline/run.pl,
- test/Apertium/T_SuperBlanksWithEscape/expected.txt,
- test/Apertium/T_SuperBlanksWithEscape/grammar.cg3,
- test/Apertium/T_SuperBlanksWithEscape/input.txt,
- test/Apertium/T_SuperBlanksWithEscape/run.pl,
- test/Apertium/T_Unification/expected.txt,
- test/Apertium/T_Unification/grammar.cg3,
- test/Apertium/T_Unification/input.txt,
- test/Apertium/T_Unification/run.pl,
- test/Apertium/T_UnknownWord/expected.txt,
- test/Apertium/T_UnknownWord/grammar.cg3,
- test/Apertium/T_UnknownWord/input.txt,
- test/Apertium/T_UnknownWord/run.pl, test/Apertium/clean.sh,
- test/Apertium/runall.pl, test/T_AnyMinusSome/expected.txt,
- test/T_AnyMinusSome/grammar.cg3, test/T_AnyMinusSome/input.txt,
- test/T_AnyMinusSome/run.pl, test/T_Barrier/expected.txt,
- test/T_Barrier/grammar.cg3, test/T_Barrier/input.txt,
- test/T_Barrier/run.pl, test/T_BasicAppend/expected.txt,
- test/T_BasicAppend/grammar.cg3, test/T_BasicAppend/input.txt,
- test/T_BasicAppend/run.pl, test/T_BasicContextTest/expected.txt,
- test/T_BasicContextTest/grammar.cg3,
- test/T_BasicContextTest/input.txt,
- test/T_BasicContextTest/run.pl, test/T_BasicDelimit/expected.txt,
- test/T_BasicDelimit/grammar.cg3, test/T_BasicDelimit/input.txt,
- test/T_BasicDelimit/run.pl, test/T_BasicDependency/expected.txt,
- test/T_BasicDependency/grammar.cg3,
- test/T_BasicDependency/input.txt, test/T_BasicDependency/run.pl,
- test/T_BasicIff/expected.txt, test/T_BasicIff/grammar.cg3,
- test/T_BasicIff/input.txt, test/T_BasicIff/run.pl,
- test/T_BasicSelect/expected.txt, test/T_BasicSelect/grammar.cg3,
- test/T_BasicSelect/input.txt, test/T_BasicSelect/run.pl,
- test/T_BasicSubstitute/expected.txt,
- test/T_BasicSubstitute/grammar.cg3,
- test/T_BasicSubstitute/input.txt, test/T_BasicSubstitute/run.pl,
- test/T_CG2Compat/expected.txt, test/T_CG2Compat/grammar.cg3,
- test/T_CG2Compat/input.txt, test/T_CG2Compat/run.pl,
- test/T_CarefulBarrier/expected.txt,
- test/T_CarefulBarrier/grammar.cg3,
- test/T_CarefulBarrier/input.txt, test/T_CarefulBarrier/run.pl,
- test/T_DelayAndDelete/expected.txt,
- test/T_DelayAndDelete/grammar.cg3,
- test/T_DelayAndDelete/input.txt, test/T_DelayAndDelete/run.pl,
- test/T_Dependency_Loops/expected.txt,
- test/T_Dependency_Loops/grammar.cg3,
- test/T_Dependency_Loops/input.txt,
- test/T_Dependency_Loops/run.pl,
- test/T_Dependency_OutOfRange/expected.txt,
- test/T_Dependency_OutOfRange/grammar.cg3,
- test/T_Dependency_OutOfRange/input.txt,
- test/T_Dependency_OutOfRange/run.pl,
- test/T_DontMatchEmptySet/expected.txt,
- test/T_DontMatchEmptySet/grammar.cg3,
- test/T_DontMatchEmptySet/input.txt,
- test/T_DontMatchEmptySet/run.pl,
- test/T_EndlessSelect/expected.txt,
- test/T_EndlessSelect/grammar.cg3, test/T_EndlessSelect/input.txt,
- test/T_EndlessSelect/run.pl, test/T_External/expected.txt,
- test/T_External/grammar.cg3, test/T_External/input.txt,
- test/T_External/run.pl, test/T_External/sets.inc,
- test/T_Include/expected.txt, test/T_Include/grammar.cg3,
- test/T_Include/input.txt, test/T_Include/rules.inc,
- test/T_Include/run.pl, test/T_Include/sets.inc,
- test/T_InputCommands/expected.txt,
- test/T_InputCommands/grammar.cg3, test/T_InputCommands/input.txt,
- test/T_InputCommands/run.pl, test/T_InputMarkup/expected.txt,
- test/T_InputMarkup/grammar.cg3, test/T_InputMarkup/input.txt,
- test/T_InputMarkup/run.pl, test/T_JumpExecute/expected.txt,
- test/T_JumpExecute/grammar.cg3, test/T_JumpExecute/input.txt,
- test/T_JumpExecute/run.pl, test/T_MapAdd_Different/expected.txt,
- test/T_MapAdd_Different/grammar.cg3,
- test/T_MapAdd_Different/input.txt,
- test/T_MapAdd_Different/run.pl,
- test/T_MapThenRemove/expected.txt,
- test/T_MapThenRemove/grammar.cg3, test/T_MapThenRemove/input.txt,
- test/T_MapThenRemove/run.pl, test/T_MapThenSelect/expected.txt,
- test/T_MapThenSelect/grammar.cg3, test/T_MapThenSelect/input.txt,
- test/T_MapThenSelect/run.pl, test/T_MappingPrefix/expected.txt,
- test/T_MappingPrefix/grammar.cg3, test/T_MappingPrefix/input.txt,
- test/T_MappingPrefix/run.pl, test/T_Movement/expected.txt,
- test/T_Movement/grammar.cg3, test/T_Movement/input.txt,
- test/T_Movement/run.pl, test/T_MultipleSections/expected.txt,
- test/T_MultipleSections/grammar.cg3,
- test/T_MultipleSections/input.txt,
- test/T_MultipleSections/run.pl,
- test/T_NegatedContextTest/expected.txt,
- test/T_NegatedContextTest/grammar.cg3,
- test/T_NegatedContextTest/input.txt,
- test/T_NegatedContextTest/run.pl,
- test/T_NotContextTest/expected.txt,
- test/T_NotContextTest/grammar.cg3,
- test/T_NotContextTest/input.txt, test/T_NotContextTest/run.pl,
- test/T_NumericalTags/expected.txt,
- test/T_NumericalTags/grammar.cg3, test/T_NumericalTags/input.txt,
- test/T_NumericalTags/run.pl, test/T_OmniWithBarrier/expected.txt,
- test/T_OmniWithBarrier/grammar.cg3,
- test/T_OmniWithBarrier/input.txt, test/T_OmniWithBarrier/run.pl,
- test/T_Omniscan/expected.txt, test/T_Omniscan/grammar.cg3,
- test/T_Omniscan/input.txt, test/T_Omniscan/run.pl,
- test/T_OriginPassing/expected.txt,
- test/T_OriginPassing/grammar.cg3, test/T_OriginPassing/input.txt,
- test/T_OriginPassing/run.pl, test/T_Parentheses/expected.txt,
- test/T_Parentheses/grammar.cg3, test/T_Parentheses/input.txt,
- test/T_Parentheses/run.pl, test/T_RegExp/expected.txt,
- test/T_RegExp/grammar.cg3, test/T_RegExp/input.txt,
- test/T_RegExp/run.pl, test/T_Relations/expected.txt,
- test/T_Relations/grammar.cg3, test/T_Relations/input.txt,
- test/T_Relations/run.pl, test/T_RemCohort/expected.txt,
- test/T_RemCohort/grammar.cg3, test/T_RemCohort/input.txt,
- test/T_RemCohort/run.pl, test/T_RemoveSingleTag/expected.txt,
- test/T_RemoveSingleTag/grammar.cg3,
- test/T_RemoveSingleTag/input.txt, test/T_RemoveSingleTag/run.pl,
- test/T_ScanningTests/expected.txt,
- test/T_ScanningTests/grammar.cg3, test/T_ScanningTests/input.txt,
- test/T_ScanningTests/run.pl, test/T_SectionRanges/expected.txt,
- test/T_SectionRanges/grammar.cg3, test/T_SectionRanges/input.txt,
- test/T_SectionRanges/run.pl, test/T_Sections/expected.txt,
- test/T_Sections/grammar.cg3, test/T_Sections/input.txt,
- test/T_Sections/run.pl, test/T_SetOp_FailFast/expected.txt,
- test/T_SetOp_FailFast/grammar.cg3,
- test/T_SetOp_FailFast/input.txt, test/T_SetOp_FailFast/run.pl,
- test/T_SetOps/expected.txt, test/T_SetOps/grammar.cg3,
- test/T_SetOps/input.txt, test/T_SetOps/run.pl,
- test/T_SetParentChild/expected.txt,
- test/T_SetParentChild/grammar.cg3,
- test/T_SetParentChild/input.txt, test/T_SetParentChild/run.pl,
- test/T_SoftDelimiters/expected.txt,
- test/T_SoftDelimiters/grammar.cg3,
- test/T_SoftDelimiters/input.txt, test/T_SoftDelimiters/run.pl,
- test/T_SpaceInForms/expected.txt,
- test/T_SpaceInForms/grammar.cg3, test/T_SpaceInForms/input.txt,
- test/T_SpaceInForms/run.pl,
- test/T_SubReadings_Apertium/expected.txt,
- test/T_SubReadings_Apertium/grammar.cg3,
- test/T_SubReadings_Apertium/input.txt,
- test/T_SubReadings_Apertium/run.pl,
- test/T_SubReadings_CG/expected.txt,
- test/T_SubReadings_CG/grammar.cg3,
- test/T_SubReadings_CG/input.txt, test/T_SubReadings_CG/run.pl,
- test/T_SubstituteNil/expected.txt,
- test/T_SubstituteNil/grammar.cg3, test/T_SubstituteNil/input.txt,
- test/T_SubstituteNil/run.pl, test/T_Templates/expected.txt,
- test/T_Templates/grammar.cg3, test/T_Templates/input.txt,
- test/T_Templates/run.pl, test/T_Trace/expected.txt,
- test/T_Trace/grammar.cg3, test/T_Trace/input.txt,
- test/T_Trace/run.pl, test/T_Unification/expected.txt,
- test/T_Unification/grammar.cg3, test/T_Unification/input.txt,
- test/T_Unification/run.pl, test/T_Variables/expected.txt,
- test/T_Variables/grammar.cg3, test/T_Variables/input.txt,
- test/T_Variables/run.pl, test/clean.sh, test/runall.pl, todo.sh,
- update-revision.pl, vapply.sh, vparse.sh, win32/getopt.c,
- win32/getopt.h, win32/libgen.c, win32/libgen.h: svn:eol-style=LF
- * [r9491] scripts/profile-revisions.php, src/stdafx.hpp, vapply.sh:
- \r\n -> \n
-
-2013-12-09 tino
-
- * [r9483] src/Grammar.cpp: Target:Rule code saved for later,
- #ifdef'd off
-
-2013-12-04 tino
-
- * [r9467] ChangeLog, scripts/cg3-autobin.pl,
- src/GrammarApplicator_runRules.cpp, src/version.hpp,
- test/T_RemCohort/expected.txt, test/T_RemCohort/grammar.cg3,
- test/T_RemCohort/input.txt: RemCohort now hands off enclosed
- cohorts
-
-2013-12-01 tino
-
- * [r9454] CMakeLists.txt: cg3
- * [r9453] ChangeLog, src/version.hpp, update-revision.pl: Use
- svn2cl to generate the ChangeLog
-
-2013-11-21 tino
-
- * [r9406] manual/rules.xml, scripts/cg3-autobin.pl,
- src/GrammarApplicator.hpp, src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runRules.cpp, src/Strings.cpp,
- src/Strings.hpp, src/TextualParser.cpp, src/version.hpp,
- test/T_BasicAppend/expected.txt, test/T_BasicAppend/grammar.cg3,
- test/T_BasicAppend/run.pl: COPY now has an EXCEPT clause (WIP,
- only takes raw tags)
-
-2013-11-19 tino
-
- * [r9401] src/ApertiumApplicator.cpp, src/FSTApplicator.cpp,
- src/GrammarApplicator_runContextualTest.cpp,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/MatxinApplicator.cpp,
- src/NicelineApplicator.cpp, src/PlaintextApplicator.cpp,
- src/Tag.cpp: Fix everything clang-analyzer complained about
-
-2013-11-18 tino
-
- * [r9398] CMakeLists.txt, src/CMakeLists.txt, src/CompositeTag.hpp,
- src/Rule.hpp, src/Tag.hpp: CMake script cleanup
-
-2013-11-17 tino
-
- * [r9397] test/T_RemCohort/run.pl, test/T_Unification/run.pl: diff
- -Z is not always available, so use -b
- * [r9396] scripts/cg3-autobin.pl,
- src/GrammarApplicator_runRules.cpp, src/sorted_vector.hpp,
- src/version.hpp, test/T_RemCohort/expected.txt,
- test/T_RemCohort/grammar.cg3, test/T_RemCohort/input.txt,
- test/T_RemCohort/run.pl: Rem- and AddCohort now properly updates
- the endtag marker <<<.
-
-2013-11-15 tino
-
- * [r9378] scripts/cg3-autobin.pl, src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runContextualTest.cpp, src/version.hpp,
- test/T_Unification/expected.txt, test/T_Unification/grammar.cg3,
- test/T_Unification/run.pl: If a test does not succeed, discard
- any captures performed by that test.
-
-2013-11-13 tino
-
- * [r9374] scripts/cg3-autobin.pl, src/GrammarApplicator.cpp,
- src/GrammarApplicator.hpp, src/main.cpp, src/options.hpp,
- src/version.hpp, test/T_AnyMinusSome/input.txt,
- test/T_AnyMinusSome/run.pl: Added --unique-tags
-
-2013-11-12 tino
-
- * [r9366] get-boost.sh: Boost 1.55.0
-
-2013-11-08 tino
-
- * [r9354] src/options.hpp: --ordered description
-
-2013-10-31 tino
-
- * [r9318] src/CMakeLists.txt: Don't make symlink
-
-2013-10-29 tino
-
- * [r9313] src/TextualParser.cpp, src/istream.hpp: Sign compare fix
- * [r9312] scripts/cg3-autobin.pl, src/TextualParser.cpp,
- src/istream.hpp, src/version.hpp, test/T_Include/grammar.cg3,
- test/T_Include/input.txt, test/T_Include/rules.inc,
- test/T_Include/run.pl, test/T_Include/sets.inc: Discard initial
- UTF BOMs from stdin and in grammars and in included grammars
-
-2013-10-23 tino
-
- * [r9306] scripts/cg3-autobin.pl,
- src/GrammarApplicator_runRules.cpp, src/version.hpp,
- test/T_BasicSubstitute/expected.txt,
- test/T_BasicSubstitute/grammar.cg3: Substitute can now replace
- wordforms with varstrings
-
-2013-10-21 tino
-
- * [r9299] src/ApertiumApplicator.cpp, src/FSTApplicator.cpp,
- src/GrammarApplicator.hpp, src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runGrammar.cpp, src/NicelineApplicator.cpp,
- src/PlaintextApplicator.cpp, src/libcg3.cpp,
- test/T_BasicSubstitute/expected.txt,
- test/T_BasicSubstitute/grammar.cg3: WIP Substitute wordform
- replace with varstring
- * [r9298] CMakeLists.txt, scripts/cg3-autobin.pl,
- src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runRules.cpp, src/version.hpp,
- test/T_BasicSubstitute/expected.txt,
- test/T_BasicSubstitute/grammar.cg3,
- test/T_BasicSubstitute/run.pl: Substitute can now replace
- wordforms
-
-2013-10-17 tino
-
- * [r9292] CMakeLists.txt: Add Saucy to PPA
-
-2013-10-11 unhammer
-
- * [r9285] emacs/cg.el: byte-compile on old emacsen
- * [r9284] emacs/cg.el: fix some compiler warnings (still requiring
- 'cl until 24.3 available everywhere)
- * [r9283] emacs/cg.el: fix header (package.el format)
-
-2013-10-09 tino
-
- * [r9279] scripts/cg3-autobin.pl, src/GrammarApplicator.hpp,
- src/cg3.h, src/libcg3.cpp, src/stdafx.hpp, src/version.hpp: Added
- cg3_sentence_copy() to libcg3 to aid in passing a sentence from
- one applicator to another
-
-2013-10-03 tino
-
- * [r9274] src/ApertiumApplicator.cpp, src/BinaryGrammar_read.cpp,
- src/BinaryGrammar_write.cpp, src/CompositeTag.cpp,
- src/CompositeTag.hpp, src/Grammar.cpp, src/GrammarApplicator.cpp,
- src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
- src/Set.cpp, src/TextualParser.cpp: Coding style cleanup, and
- more std::list to std::vector
- * [r9273] scripts/cg3-autobin.pl, src/ApertiumApplicator.cpp,
- src/BinaryGrammar_read.cpp, src/Grammar.hpp,
- src/GrammarApplicator.cpp, src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runRules.cpp, src/MatxinApplicator.cpp,
- src/Reading.hpp, src/Tag.cpp, src/Tag.hpp, src/inlines.hpp,
- src/libcg3.cpp, src/stdafx.hpp, src/version.hpp: Change the
- ordered tags_list in Reading to std::vector
- * [r9272] scripts/cg3-autobin.pl, src/inlines.hpp, src/version.hpp:
- Fixed --section 2-2 regression
-
-2013-10-01 tino
-
- * [r9265] scripts/cg3-autobin.pl, src/GrammarApplicator.cpp,
- src/GrammarApplicator.hpp, src/GrammarApplicator_runRules.cpp,
- src/main.cpp, src/options.hpp, src/version.hpp: Add --dry-run
-
-2013-10-01 unhammer
-
- * [r9264] emacs/cg.el: minor bug in `cg-output-toggle-analyses',
- some doc
- * [r9261] emacs/cg.el: `h' (cg-output-hide-analyses) in output
- buffer makes CG format look like plain sentences.
-
- Tags/analyses are still searchable, but hidden until searched
- for. `u' (cg-output-set-unhide) lets you set some exceptions to
- the hiding, or put (setq cg-output-unhide-regex "someregex") in
- ~/.emacs to save some default exception.
-
-2013-09-30 tino
-
- * [r9258] scripts/cg3-autobin.pl,
- src/GrammarApplicator_runRules.cpp, src/main.cpp,
- src/version.hpp, test/T_DelayAndDelete/expected.txt,
- test/T_DelayAndDelete/grammar.cg3,
- test/T_DelayAndDelete/input.txt, test/T_DelayAndDelete/run.pl:
- Fixed unsafe removal of magic readings. Could not produce a
- distilled test case that showed the problem.
- * [r9254] scripts/cg3-autobin.pl, src/cg3.h, src/libcg3.cpp,
- src/version.hpp: libcg3 now knows about sub-readings
-
-2013-09-25 tino
-
- * [r9249] CMakeLists.txt, scripts/cg3-autobin.pl, src/version.hpp:
- CMake Source and Binary dirs are separate
- * [r9248] scripts/cg3-autobin.pl, src/BinaryGrammar.hpp,
- src/BinaryGrammar_read.cpp, src/BinaryGrammar_write.cpp,
- src/ContextualTest.cpp, src/ContextualTest.hpp, src/Grammar.cpp,
- src/Grammar.hpp, src/Rule.cpp, src/Tag.cpp,
- src/TextualParser.cpp, src/inlines.hpp, src/version.hpp,
- test/T_Templates/grammar.cg3: Eliminate duplicate contextual
- tests. This saves 5.18% allocations and 6.07% memory usage.
- Binary format changed to accomodate. This change also allows one
- to refer to later templates.
- * [r9247] compile-profile.sh, profile-apply.sh, profile-parse.sh,
- scripts/profile-revisions.php, vapply.sh, vparse.sh: Scripts
-
-2013-09-25 unhammer
-
- * [r9246] emacs/cg.el: remove outdated comment
-
-2013-09-25 tino
-
- * [r9245] CMakeLists.txt, cmake.sh, compile-profile.sh,
- profile-apply.sh, vapply.sh, vparse.sh: Enabling C++11 makes
- grammar parsing 2.08% faster and rule applying 0.22% slower.
-
-2013-09-24 tino
-
- * [r9244] scripts/cg3-autobin.pl, src/version.hpp,
- update-revision.pl: ...and make update-revision know about the
- rename.
- * [r9243] CMakeLists.txt, src/ApertiumApplicator.cpp,
- src/ApertiumApplicator.h, src/ApertiumApplicator.hpp,
- src/BinaryGrammar.cpp, src/BinaryGrammar.h,
- src/BinaryGrammar.hpp, src/BinaryGrammar_read.cpp,
- src/BinaryGrammar_write.cpp, src/CMakeLists.txt, src/Cohort.cpp,
- src/Cohort.h, src/Cohort.hpp, src/CohortIterator.cpp,
- src/CohortIterator.h, src/CohortIterator.hpp,
- src/CompositeTag.cpp, src/CompositeTag.h, src/CompositeTag.hpp,
- src/ContextualTest.cpp, src/ContextualTest.h,
- src/ContextualTest.hpp, src/FSTApplicator.cpp,
- src/FSTApplicator.hpp, src/FormatConverter.cpp,
- src/FormatConverter.h, src/FormatConverter.hpp, src/Grammar.cpp,
- src/Grammar.h, src/Grammar.hpp, src/GrammarApplicator.cpp,
- src/GrammarApplicator.h, src/GrammarApplicator.hpp,
- src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runContextualTest.cpp,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
- src/GrammarWriter.h, src/GrammarWriter.hpp, src/IGrammarParser.h,
- src/IGrammarParser.hpp, src/MatxinApplicator.cpp,
- src/MatxinApplicator.h, src/MatxinApplicator.hpp,
- src/NicelineApplicator.cpp, src/NicelineApplicator.hpp,
- src/PlaintextApplicator.cpp, src/PlaintextApplicator.hpp,
- src/Reading.cpp, src/Reading.h, src/Reading.hpp, src/Rule.cpp,
- src/Rule.h, src/Rule.hpp, src/Set.cpp, src/Set.h, src/Set.hpp,
- src/SingleWindow.cpp, src/SingleWindow.h, src/SingleWindow.hpp,
- src/Strings.cpp, src/Strings.h, src/Strings.hpp, src/Tag.cpp,
- src/Tag.h, src/Tag.hpp, src/TextualParser.cpp,
- src/TextualParser.h, src/TextualParser.hpp, src/Window.cpp,
- src/Window.h, src/Window.hpp, src/all_cg_comp.cpp,
- src/all_cg_conv.cpp, src/all_cg_proc.cpp, src/all_vislcg3.cpp,
- src/cg_comp.cpp, src/cg_conv.cpp, src/cg_proc.cpp,
- src/icu_uoptions.cpp, src/inlines.h, src/inlines.hpp,
- src/istream.hpp, src/libcg3.cpp, src/macros.h, src/macros.hpp,
- src/main.cpp, src/options.h, src/options.hpp, src/stdafx.h,
- src/stdafx.hpp, src/uextras.cpp, src/uextras.h, src/uextras.hpp,
- src/version.h, src/version.hpp: C++ headers are .hpp, and show
- headers in IDEs
- * [r9242] src/version.h: Missed a const
- * [r9241] include/uoptions.h, src/CMakeLists.txt,
- src/icu_uoptions.h, src/libcg3.cpp, src/main.cpp, src/options.h,
- src/options_conv.hpp, src/version.h: Move uoptions.h to external,
- and inline it
- * [r9240] src/cg_comp.cpp, src/cg_proc.cpp: basename() is char* on
- Posix...by the gods, C is so awful
- * [r9239] src/cg_comp.cpp, src/cg_proc.cpp, win32/getopt.h,
- win32/libgen.c, win32/libgen.h: Various cleanup
- * [r9238] CMakeLists.txt, include/exec-stream,
- include/exec-stream/exec-stream.cpp,
- include/exec-stream/exec-stream.h, include/exec-stream/posix,
- include/exec-stream/win, src/CMakeLists.txt,
- src/GrammarApplicator.h, src/exec-stream.cpp, src/exec-stream.h,
- src/posix, src/stdafx.h, src/uextras.cpp, src/win: Moved
- exec-stream to external
- * [r9237] get-boost.sh, include/cycle.h, src/cycle.h, src/stdafx.h:
- Move cycle.h to signify external copyright
- * [r9236] CMakeLists.txt, scripts/cg3-autobin.pl,
- src/TextualParser.h, src/version.h: CMake 2.8.0 (even Lucid has
- that); Use C++11 if available
- * [r9235] scripts/cg3-autobin.pl, src/BinaryGrammar_read.cpp,
- src/BinaryGrammar_write.cpp, src/ContextualTest.cpp,
- src/ContextualTest.h, src/Grammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
- src/Rule.cpp, src/Rule.h, src/TextualParser.cpp, src/version.h:
- WIP towards duplicate context elimination
-
-2013-09-23 tino
-
- * [r9233] TODO, scripts/cg3-autobin.pl, src/BinaryGrammar_read.cpp,
- src/ContextualTest.cpp, src/ContextualTest.h, src/Grammar.cpp,
- src/Grammar.h, src/Rule.cpp, src/Rule.h, src/TextualParser.cpp,
- src/TextualParser.h, src/version.h, test/runall.pl: Stable but
- WIP cleanup of ContextualTest allocations and duplicate
- elimination
-
-2013-09-18 tino
-
- * [r9230] src/cg3.h: Appease the -pedantic overlords
-
-2013-09-18 ftyers
-
- * [r9229] cg3.pc.in: remove versioning guff in cg3.pc.in
-
-2013-09-18 tino
-
- * [r9228] src/CMakeLists.txt, src/libcg3.cpp: WIP library
-
-2013-09-17 tino
-
- * [r9226] CMakeLists.txt, cg3.pc.in, src/CMakeLists.txt,
- vislcg3-0.9.pc.in: WIP pkg-config
-
-2013-09-17 ftyers
-
- * [r9225] CMakeLists.txt: first pass
- * [r9224] vislcg3-0.9.pc.in: add .pc.in file
-
-2013-09-06 tino
-
- * [r9215] src/inlines.h: Line endings...
- * [r9212] src/GrammarApplicator_runContextualTest.cpp,
- src/inlines.h: Cleaner range parser code
-
-2013-09-05 unhammer
-
- * [r9209] emacs/cg.el: comments
- * [r9208] emacs/cg.el: clear cache if pre-pipe variable changed
- (also s/tabs/spaces)
-
-2013-09-04 tino
-
- * [r9205] manual/templates.xml, scripts/cg3-autobin.pl,
- src/TextualParser.cpp, src/version.h: More precise inline
- template abuse warnings; Only show them if compiling
-
-2013-09-02 tino
-
- * [r9201] scripts/cg3-autobin.pl, src/ApertiumApplicator.cpp,
- src/MatxinApplicator.cpp, src/Tag.cpp, src/uextras.h,
- src/version.h: Minor string manipulation optimizations
- * [r9200] scripts/cg3-autobin.pl, src/ApertiumApplicator.cpp,
- src/MatxinApplicator.cpp, src/uextras.h, src/version.h: Eliminate
- use of ux_substr() in favour of std::string's own .substr()
- * [r9199] scripts/cg3-autobin.pl,
- src/GrammarApplicator_runRules.cpp, src/TextualParser.cpp,
- src/uextras.cpp, src/uextras.h, src/version.h,
- test/T_Templates/grammar.cg3: Sub-readings now bypass some
- caching; Warn on pointless inline templates; Inline some ux
- functions
-
-2013-08-06 tino
-
- * [r9159] scripts/cg3-autobin.pl, src/TextualParser.cpp,
- src/version.h: Fix parsing composite tags with space after
- opening (
-
-2013-07-18 unhammer
-
- * [r9124] emacs/cg.el: better check for cache-emptying (cg or input
- variables changed) + after-check-secs
-
- Cache works when cg-per-buffer-input now. Check-after-change now
- waits 1 sec (configurable) before checking. Avoid error message
- if output window closed before check done.
-
-2013-07-17 unhammer
-
- * [r9122] emacs/cg.el: C-c c toggles running cg-check after each
- change, pre-pipe is cached so this works fast
-
-2013-07-15 unhammer
-
- * [r9119] emacs/cg.el: comment-face on all regular tags of ;-lines
- (was supposed to be in earlier commit)
- * [r9118] emacs/cg.el: C-c C-n / C-c C-p to navigate errors (or
- trace hits) from the CG file
- * [r9115] emacs/cg.el: common input buffer for all CG buffers (as
- default at least); some more fontification on cg-output
-
-2013-07-12 unhammer
-
- * [r9113] emacs/cg.el: start mode on .cg3 files as that's the
- official suffix; ELPA-style commenting
-
-2013-07-04 unhammer
-
- * [r9103] src/ApertiumApplicator.cpp,
- test/Apertium/T_SuperBlanksWithEscape,
- test/Apertium/T_SuperBlanksWithEscape/expected.txt,
- test/Apertium/T_SuperBlanksWithEscape/grammar.cg3,
- test/Apertium/T_SuperBlanksWithEscape/input.txt,
- test/Apertium/T_SuperBlanksWithEscape/run.pl: escaped chars in
- superblanks should stay put, not be printed before superblank
-
-2013-07-03 tino
-
- * [r9102] scripts/cg3-autobin.pl, src/ApertiumApplicator.cpp,
- src/cg_conv.cpp, src/main.cpp, src/options_conv.hpp,
- src/version.h: Allow multiple mapping tags in a single Apertium
- reading; Remove Apertium duplicate tag workaround; Add cg-conv
- --prefix
-
-2013-07-02 unhammer
-
- * [r9100] emacs/cg.el: file-name-base is new in emacs 24.3, shim;
- new keyword COPY
-
-2013-07-01 tino
-
- * [r9097] TODO, scripts/cg3-autobin.pl, src/GrammarApplicator.cpp,
- src/TextualParser.cpp, src/version.h: Fix segfault when tracing
- removed cohorts that had dependency
-
-2013-06-26 tino
-
- * [r9092] TODO, scripts/cg3-autobin.pl, src/GrammarApplicator.cpp,
- src/GrammarApplicator_reflow.cpp, src/version.h,
- test/T_BasicSubstitute/run.pl: Allow duplicate tags again now
- that Substitute works with them
- * [r9091] test/T_BasicSubstitute/expected.txt,
- test/T_BasicSubstitute/grammar.cg3,
- test/T_BasicSubstitute/run.pl:
- * [r9090] test/T_BasicSubstitute/run.pl: Forgot test...
- * [r9089] test/T_BasicSubstitute/expected.txt: Forgot test
- * [r9088] CMakeLists.txt, src/GrammarApplicator_runRules.cpp,
- src/main.cpp, src/win/exec-stream-helpers.cpp,
- src/win/exec-stream-impl.cpp, test/T_BasicSubstitute/grammar.cg3,
- test/T_BasicSubstitute/input.txt, test/T_BasicSubstitute/run.pl:
- Substitute now uses the exact insert position even when --ordered
- - this fixes the regression of tag order issues
-
-2013-06-25 tino
-
- * [r9087] make-naive.sh, src/all_cg_comp.cpp, src/all_cg_conv.cpp:
- Naive fix
- * [r9086] make-naive.sh: Naive builder
- * [r9085] src/all_cg_proc.cpp: Wipe Anchor.cpp
-
-2013-06-25 unhammer
-
- * [r9084] emacs/cg.el: cg-check: don't overwrite regular
- compilation sentinel (led to modeline saying "Compiling" even
- when done) cg-output-mode: rebind `g' since `recompile' expects
- old args to work
- * [r9083] emacs/cg.el: cg-post-pipe: like cg-pre-pipe
- cg-output-mode: n/p go next/prev error, i (or C-c C-i) opens edit
- input buffer
-
-2013-06-23 unhammer
-
- * [r9073] emacs/cg.el: cg-check: let user specify cg-command and
- cg-extra-args cg-pre-pipe: no longer a safe-local-variable (it's
- executed …), typing ! once on opening a new cg file should not be
- too much hassle.
-
-2013-06-23 tino
-
- * [r9072] scripts/cg3-autobin.pl, src/GrammarApplicator.cpp,
- src/GrammarApplicator_reflow.cpp, src/TextualParser.cpp,
- src/version.h: Re-enable duplicate tag elimination (which was
- causing the difference) and useless barrier elimination (which
- was innocent after all).
-
-2013-06-22 unhammer
-
- * [r9071] emacs/cg.el: cg-check, bound to C-c C-c, inspired by
- cg3ide. Clickable errors and traces. Rebound cg-goto-rule to C-c
- g. Use C-c C-i to edit the input sent to cg-check (or send it
- using cg-pre-pipe). Use a line like
-
- # -*- cg-pre-pipe: "lt-proc foo.bin | cg-conv"; coding: utf-8 -*-
-
- in your cg file to set a per-file pipeline to run before the
- vislcg3 step.
-
-2013-06-21 tino
-
- * [r9070] src/TextualParser.cpp: Undo change that caused
- differences where it should not have
- * [r9069] TODO, scripts/cg3-autobin.pl, src/CMakeLists.txt,
- src/version.h: Use Install_Prefix, not Prefix_Path
-
-2013-06-20 tino
-
- * [r9065] scripts/cg3-autobin.pl, src/TextualParser.cpp,
- src/version.h: Lower barrier abuse to a friendlier warning.
-
-2013-06-19 tino
-
- * [r9063] manual/cmdreference.xml, manual/streamformats.xml,
- newsletters/2013-06-19.txt, scripts/cg3-autobin.pl,
- src/version.h: CG-3 Release 0.9.8.9063
- * [r9062] README, scripts/cg3-autobin.pl,
- src/GrammarApplicator_reflow.cpp, src/PlaintextApplicator.cpp,
- src/exec-stream.cpp, src/version.h: cg-conv from plain text is
- slightly smarter
- * [r9059] CMakeLists.txt:
-
-2013-06-18 tino
-
- * [r9053] TODO, src/TextualParser.cpp: Fix potential segfault
-
-2013-06-17 tino
-
- * [r9051] scripts/cg3-autobin.pl, src/ContextualTest.h,
- src/TextualParser.cpp, src/version.h: Error out if
- Barrier/CBarrier is used for a non-scanning test
- * [r9046] dist/dist-osx.pl: ICU rewrite version bump
-
-2013-06-14 unhammer
-
- * [r9043] emacs/cg.el: wops, don't want to underline "foo\"bar"
-
-2013-06-12 unhammer
-
- * [r9041] emacs/cg.el: underline troublesome strings like "this
- "mess"right"here", highlight possible [xXlr] after [psc]
-
-2013-06-11 tino
-
- * [r9040] src/GrammarApplicator_reflow.cpp: No longer eliminates
- duplicate tags.
- * [r9039] scripts/cg3-autobin.pl, src/ApertiumApplicator.cpp,
- src/CMakeLists.txt, src/FSTApplicator.cpp, src/FSTApplicator.hpp,
- src/FormatConverter.cpp, src/FormatConverter.h,
- src/GrammarApplicator.cpp, src/MatxinApplicator.cpp,
- src/NicelineApplicator.cpp, src/TextualParser.cpp, src/version.h:
- Basic HFST/XFST handling. No longer eliminates duplicate tags in
- output.
-
-2013-06-07 tino
-
- * [r9036] scripts/cg3-autobin.pl, src/ApertiumApplicator.h,
- src/CMakeLists.txt, src/FormatConverter.cpp,
- src/FormatConverter.h, src/NicelineApplicator.hpp,
- src/PlaintextApplicator.cpp, src/PlaintextApplicator.hpp,
- src/cg_conv.cpp, src/version.h: cg-conv handles plain-text in the
- dumbest possible way, which may be a good thing...
- * [r9035] scripts/cg3-autobin.pl, src/NicelineApplicator.cpp,
- src/version.h: cg-conv now handles Niceline format with spaces in
- word/baseforms and either [] or "" baseforms.
- * [r9034] scripts/cg3-autobin.pl, src/CMakeLists.txt,
- src/FormatConverter.cpp, src/FormatConverter.h,
- src/NicelineApplicator.cpp, src/NicelineApplicator.hpp,
- src/cg_conv.cpp, src/version.h: cg-conv now handles partial
- Niceline format (WIP)
-
-2013-06-06 tino
-
- * [r9032] TODO, scripts/cg3-autobin.pl, src/ApertiumApplicator.cpp,
- src/ApertiumApplicator.h, src/FormatConverter.cpp,
- src/FormatConverter.h, src/GrammarApplicator.h,
- src/GrammarApplicator_runGrammar.cpp, src/MatxinApplicator.cpp,
- src/MatxinApplicator.h, src/cg_comp.cpp, src/cg_conv.cpp,
- src/cg_proc.cpp, src/istream.hpp, src/main.cpp, src/options.h,
- src/options_conv.hpp, src/version.h, test/runall.pl: WIP on
- making cg-conv much more useful
-
-2013-06-04 unhammer
-
- * [r9029] emacs/cg.el: highlight pO (parent + (no-)pass-origin)
-
-2013-05-24 tino
-
- * [r9009] APERTIUM_FORMAT, ChangeLog, Doxyfile, INSTALL,
- autogen.sh, compile-linux.sh, compile-mac.sh: Cleanup
- * [r9008] scripts/cg3-autobin.pl,
- src/GrammarApplicator_runRules.cpp, src/TextualParser.cpp,
- src/version.h: Check APPEND and ADDCOHORT first tag during
- compilation
-
-2013-05-21 tino
-
- * [r9005] TODO, src/Grammar.cpp, src/main.cpp: Help to stdout if
- asked for; 'on line' instead of just 'line'; Less ToDo
-
-2013-05-17 tino
-
- * [r8994] scripts/cg3-autobin.pl,
- src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/Reading.cpp,
- src/version.h: Using Select with mixed sub-reading levels is now
- safe.
-
-2013-05-16 tino
-
- * [r8993] scripts/cg3-autobin.pl, src/ApertiumApplicator.cpp,
- src/GrammarApplicator_reflow.cpp, src/version.h: Apertium format
- should also preserve sub-readings.
- * [r8991] scripts/cg3-autobin.pl, src/GrammarApplicator_reflow.cpp,
- src/version.h, test/T_SubReadings_CG/expected.txt,
- test/T_SubReadings_CG/input.txt: Now with less wanton
- destruction.
- * [r8984] manual/cmdreference.xml, manual/installation.xml,
- scripts/cg3-autobin.pl, src/cg_conv.cpp, src/version.h: cg-conv
- -a -l to use LTR instead of RTL
-
-2013-05-14 tino
-
- * [r8982] compile-profile.sh, scripts/cg3-autobin.pl,
- scripts/profile-revisions.php, src/CMakeLists.txt, src/version.h,
- vapply.sh, vparse.sh: Binary cg3 as symlink to vislcg3
- * [r8981] CMakeLists.txt: PPA fixes (drop Lucid support)
- * [r8980] CMake/DebSourcePPA.cmake, CMakeLists.txt,
- scripts/cg3-autobin.pl, src/version.h: PPA fixes
- * [r8979] CMake/DebSourcePPA.cmake, CMakeLists.txt: cg3
- * [r8978] CMakeLists.txt, scripts/cg3-autobin.pl, src/Grammar.cpp,
- src/version.h: Now with Ubuntu PPA at ppa:tinodidriksen/cg3
-
-2013-05-08 tino
-
- * [r8974] scripts/cg3-autobin.pl, src/Grammar.cpp, src/version.h,
- test/T_Unification/run.pl: Optimize SET with only single-tag ORs
- to LISTs
-
-2013-04-29 tino
-
- * [r8957] scripts/cg3-autobin.pl: cg3
- * [r8956] TODO, scripts/cg3-autobin.pl: cg3
-
-2013-04-14 tino
-
- * [r8897] TODO, scripts/cg3-autobin.pl, src/TextualParser.cpp,
- src/version.h, test/Apertium/T_Barrier/grammar.cg3,
- test/Apertium/T_BasicSubstitute/grammar.cg3,
- test/Apertium/T_CarefulBarrier/grammar.cg3,
- test/Apertium/T_ScanningTests/grammar.cg3,
- test/T_Barrier/grammar.cg3, test/T_BasicAppend/grammar.cg3,
- test/T_BasicContextTest/grammar.cg3,
- test/T_BasicDependency/grammar.cg3,
- test/T_BasicSubstitute/grammar.cg3, test/T_CG2Compat/grammar.cg3,
- test/T_CarefulBarrier/grammar.cg3,
- test/T_Dependency_Loops/grammar.cg3,
- test/T_Dependency_OutOfRange/grammar.cg3,
- test/T_JumpExecute/grammar.cg3, test/T_MapThenRemove/grammar.cg3,
- test/T_MapThenSelect/grammar.cg3,
- test/T_MappingPrefix/expected.txt,
- test/T_MappingPrefix/grammar.cg3, test/T_Movement/grammar.cg3,
- test/T_NumericalTags/expected.txt,
- test/T_NumericalTags/grammar.cg3, test/T_Omniscan/grammar.cg3,
- test/T_OriginPassing/grammar.cg3,
- test/T_Parentheses/expected.txt, test/T_Parentheses/grammar.cg3,
- test/T_RegExp/grammar.cg3, test/T_Relations/expected.txt,
- test/T_Relations/grammar.cg3, test/T_ScanningTests/grammar.cg3,
- test/T_SetOps/grammar.cg3, test/T_SetParentChild/grammar.cg3,
- test/T_SoftDelimiters/grammar.cg3,
- test/T_SpaceInForms/expected.txt,
- test/T_SpaceInForms/grammar.cg3,
- test/T_SubReadings_Apertium/grammar.cg3,
- test/T_SubReadings_CG/expected.txt,
- test/T_SubReadings_CG/grammar.cg3,
- test/T_SubstituteNil/grammar.cg3, test/T_Templates/grammar.cg3,
- test/T_Trace/expected.txt, test/T_Trace/grammar.cg3,
- test/T_Unification/expected.txt, test/T_Unification/grammar.cg3,
- test/T_Variables/grammar.cg3: Eliminated need for BEFORE-SECTIONS
- so rules before any declared section are assumed to be, well,
- before them...
-
-2013-04-04 tino
-
- * [r8889] scripts/cg3-autobin.pl, src/Cohort.cpp, src/Cohort.h,
- src/GrammarApplicator_runRules.cpp, src/version.h,
- test/T_Relations/expected.txt, test/T_Relations/grammar.cg3,
- test/T_Relations/run.pl: Only trace if relation rule actually did
- anything.
-
-2013-04-02 tino
-
- * [r8885] scripts/cg3-autobin.pl,
- src/GrammarApplicator_runContextualTest.cpp, src/version.h: Clear
- dep_deep cache for OR'ed tests
-
-2013-03-14 tino
-
- * [r8877] AUTHORS, manual/manual.xml, src/ApertiumApplicator.cpp,
- src/ApertiumApplicator.h, src/BinaryGrammar.cpp,
- src/BinaryGrammar.h, src/BinaryGrammar_read.cpp,
- src/BinaryGrammar_write.cpp, src/Cohort.cpp, src/Cohort.h,
- src/CohortIterator.cpp, src/CohortIterator.h,
- src/CompositeTag.cpp, src/CompositeTag.h, src/ContextualTest.cpp,
- src/ContextualTest.h, src/FormatConverter.cpp,
- src/FormatConverter.h, src/Grammar.cpp, src/Grammar.h,
- src/GrammarApplicator.cpp, src/GrammarApplicator.h,
- src/GrammarApplicator_matchSet.cpp,
- src/GrammarApplicator_reflow.cpp,
- src/GrammarApplicator_runContextualTest.cpp,
- src/GrammarApplicator_runGrammar.cpp,
- src/GrammarApplicator_runRules.cpp, src/GrammarWriter.cpp,
- src/GrammarWriter.h, src/IGrammarParser.h,
- src/MatxinApplicator.cpp, src/MatxinApplicator.h,
- src/Reading.cpp, src/Reading.h, src/Rule.cpp, src/Rule.h,
- src/Set.cpp, src/Set.h, src/SingleWindow.cpp, src/SingleWindow.h,
- src/Strings.cpp, src/Strings.h, src/Tag.cpp, src/Tag.h,
- src/TextualParser.cpp, src/TextualParser.h, src/Window.cpp,
- src/Window.h, src/bloomish.hpp, src/cg3.h, src/cg_comp.cpp,
- src/cg_conv.cpp, src/cg_proc.cpp, src/icu_uoptions.h,
- src/inlines.h, src/interval_vector.hpp, src/libcg3.cpp,
- src/macros.h, src/main.cpp, src/options.h, src/sorted_vector.hpp,
- src/stdafx.h, src/test_libcg3.c, src/uextras.cpp, src/uextras.h,
- src/version.h: Yearly copyright refresh.
-
diff --git a/README b/README
deleted file mode 100644
index 19f3039..0000000
--- a/README
+++ /dev/null
@@ -1,4 +0,0 @@
-See instead:
-- http://visl.sdu.dk/cg3.html
-- http://visl.sdu.dk/cg3/chunked/
-- manual/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1dbfc9a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+[![Build Status](https://travis-ci.org/TinoDidriksen/cg3.svg?branch=master)](https://travis-ci.org/TinoDidriksen/cg3)
+
+See instead:
+- http://visl.sdu.dk/constraint_grammar.html
+- http://visl.sdu.dk/cg3.html
+- http://visl.sdu.dk/cg3/chunked/
+- manual/
+- http://groups.google.com/group/constraint-grammar
+
+Other links:
+- http://visl.sdu.dk/svn/visl/tools/vislcg3/trunk/
+- https://en.wikipedia.org/wiki/Constraint_Grammar
+- http://wiki.apertium.org/wiki/Constraint_Grammar
+- http://kevindonnelly.org.uk/2010/05/constraint-grammar-tutorial/
+- http://openhub.net/p/cg3
diff --git a/TODO b/TODO
index 13981de..fd30def 100644
--- a/TODO
+++ b/TODO
@@ -60,3 +60,4 @@ ToDo: Section number in trace
ToDo: Basque correct parse
ToDo: Dep on readings
ToDo: CLINK to require that all paths satisfy the linked tests
+ToDo: Include only certain sections.
diff --git a/clang-format.pl b/clang-format.pl
new file mode 100755
index 0000000..40d1014
--- /dev/null
+++ b/clang-format.pl
@@ -0,0 +1,108 @@
+#!/usr/bin/env perl
+# -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*-
+use warnings;
+use strict;
+use utf8;
+
+sub file_read {
+ my ($name) = @_;
+ local $/ = undef;
+ open FILE, '<'.$name or die "Could not open $name: $!\n";
+ my $data = <FILE>;
+ close FILE;
+ return $data;
+}
+
+sub file_write {
+ my ($name,$data) = @_;
+ open FILE, '>'.$name or die "Could not open $name: $!\n";
+ print FILE $data;
+ close FILE;
+}
+
+chdir('src/');
+my @files = glob('*.c *.h *.cpp *.hpp');
+
+foreach my $file (@files) {
+ # Protect preprocessor directives due to https://llvm.org/bugs/show_bug.cgi?id=17362
+ my $data = file_read($file);
+ $data =~ s@#pragma once\n#ifndef at PRAGMA_ONCE_IFNDEF@g;
+ $data =~ s@([ \t]*#if.+?#endif\n)@// clang-format off\n$1// clang-format on\n at sg;
+ $data =~ s@\n// clang-format on\n// clang-format off\n@\n at g;
+ $data =~ s at PRAGMA_ONCE_IFNDEF@#pragma once\n#ifndef at g;
+ file_write($file, $data);
+
+ `clang-format-3.9 -style=file -i '$file'`;
+
+ my $data = file_read($file);
+ $data =~ s@\n[^\n]*//[^\n]+clang-format (off|on)\n@\n at g; # Remove preprocessor protection
+ # Things clang-format gets wrong:
+ $data =~ s@([ \t]+)(BOOST_FOREACH|boost_foreach|reverse_foreach|foreach)([^{\n]*) \{[ \t]+([^}\n]*)[ \t]+\}@$1$2$3 {\n$1\t$4\n$1}@g; # Don't allow single-line foreach blocks
+ $data =~ s@\s*([*&])>@$1>@g; # vector<T *>, really? Just no.
+ $data =~ s@([\w>]) &([\w(])@$1& $2 at g; # I like T *t, but I also like T& t ...
+ $data =~ s@([\w>]) \*&(\w)@$1 *& $2 at g; # ... and T *& t
+ $data =~ s at return& @return &@g; # ... except for return &t
+ $data =~ s@(operator .+?) \(@$1(@g; # No space before ( in operators
+ $data =~ s@ \*([,>)])@*$1 at g; # No space before statement-final *
+ $data =~ s@^([ \t]*)( [:,] [^\n]+) \{@$1$2\n$1\{@mg; # { after ctor-init should go on its own line
+ $data =~ s at template <@template<@g; # No space in template<
+
+ # clang-format horribly mangles enums if AlignConsecutiveAssignments is off, so fix that
+ my @enums = ($data =~ m at enum [^{]*\{(.+?)\n\s*\}[^;\n]*;@sg);
+ foreach my $enum (@enums) {
+ my @lines = split /\n/, $enum;
+ my $len = 0;
+ foreach my $line (@lines) {
+ if ($line =~ m@^(\s*\S+) = @) {
+ if (length($1) > $len) {
+ $len = length($1);
+ }
+ }
+ }
+
+ my @comb = ();
+ foreach my $line (@lines) {
+ if ($line =~ m@^(\s*\S+) = @) {
+ my $sps = ' ' x (1 + $len - length($1));
+ $line =~ s@ = @$sps= @;
+ }
+ $line =~ s@\(1 << (\d)\)@(1 << $1)@;
+ push @comb, $line;
+ }
+ my $comb = join "\n", @comb;
+ $data =~ s@\Q$enum\E@$comb at g;
+ }
+
+ # clang-format has no idea what I want with UOptions[], so fix that as well
+ my @enums = ($data =~ m at UOption options\[\] = \{(.+?)\n\s*\}[^;\n]*;@sg);
+ foreach my $enum (@enums) {
+ my @lines = split /\n/, $enum;
+ my $len = 0;
+ foreach my $line (@lines) {
+ if ($line =~ m@^([^"]+"[^"]+",)@) {
+ if (length($1) > $len) {
+ $len = length($1);
+ }
+ }
+ }
+
+ my @comb = ();
+ foreach my $line (@lines) {
+ if ($line =~ m@^([^"]+"[^"]+",)@) {
+ my $txt = $1;
+ my $sps = ' ' x (1 + $len - length($txt));
+ my ($is) = ($txt =~ m@^(\s+)@);
+ $line =~ s@\s+@ @g;
+ $line =~ s@, 0, @, 0, @g;
+ $line =~ s at UOPT_NO_ARG, @UOPT_NO_ARG, @g;
+ $line =~ s@^\s+@$is@;
+ $line =~ s@(\Q$txt\E) @$1$sps@;
+ }
+ push @comb, $line;
+ }
+ my $comb = join "\n", @comb;
+ $data =~ s@\Q$enum\E@$comb at g;
+ }
+
+ file_write($file, $data);
+}
diff --git a/emacs/cg.el b/emacs/cg.el
index 54435a7..cef1076 100644
--- a/emacs/cg.el
+++ b/emacs/cg.el
@@ -1,9 +1,9 @@
;;; cg.el --- major mode for editing Constraint Grammar files
-;; Copyright (C) 2010-2013 Kevin Brubeck Unhammer
+;; Copyright (C) 2010-2016 Kevin Brubeck Unhammer
;; Author: Kevin Brubeck Unhammer <unhammer at fsfe.org>
-;; Version: 0.1.7
+;; Version: 0.2.0
;; Url: http://beta.visl.sdu.dk/constraint_grammar.html
;; Keywords: languages
@@ -32,17 +32,18 @@
;; ; Or if you use a non-standard file suffix, e.g. .rlx:
;; (add-to-list 'auto-mode-alist '("\\.rlx\\'" . cg-mode))
-;; I recommend using auto-complete-mode for tab-completion, and
+;; I recommend using company-mode for tab-completion, and
;; smartparens-mode if you're used to it (paredit-mode does not work
;; well if you have set names with the # character in them). Both are
-;; available from MELPA (see http://melpa.milkbox.net/). You can
-;; lazy-load auto-complete for cg-mode like this:
+;; available from MELPA (see http://melpa.milkbox.net/).
;;
-;; (eval-after-load 'auto-complete '(add-to-list 'ac-modes 'cg-mode))
+;; You can lazy-load company-mode for cg-mode like this:
+;;
+;; (eval-after-load 'company-autoloads
+;; (add-hook 'cg-mode-hook #'company-mode))
;; TODO:
-;; - optionally highlight any LIST/SET without ; at the end
;; - different syntax highlighting for sets and tags (difficult)
;; - use something like prolog-clause-start to define M-a/e etc.
;; - run vislcg3 --show-unused-sets and buttonise with line numbers (like Occur does)
@@ -56,14 +57,14 @@
;; - derive cg-mode from prog-mode?
;; - goto-set/list
;; - show definition of set/list-at-point in modeline
-;; - send dictionary to auto-complete
;; - show section name/number in modeline
;;; Code:
-(defconst cg-version "0.1.7" "Version of cg-mode")
+(defconst cg-version "0.2.0" "Version of cg-mode.")
(eval-when-compile (require 'cl))
+(require 'cl-lib)
;;;============================================================================
;;;
@@ -103,11 +104,10 @@ See also `cg-command'."
;;;###autoload
(defcustom cg-pre-pipe "cg-conv"
- "Pipeline to run before the vislcg3 command when testing a file
-with `cg-check'.
+ "Pipeline to run before vislcg3 when testing a file with `cg-check'.
Buffer-local, so use `setq-default' if you want to change the
-global default value. If you want to set it on a per-file basis,
+global default value. If you want to set it on a per-file basis,
put a line like
# -*- cg-pre-pipe: \"lt-proc foo.bin | cg-conv\"; othervar: value; -*-
@@ -120,11 +120,10 @@ See also `cg-command' and `cg-post-pipe'."
;;;###autoload
(defcustom cg-post-pipe ""
- "Pipeline to run after the vislcg3 command when testing a file
-with `cg-check'.
+ "Pipeline to run after vislcg3 when testing a file with `cg-check'.
Buffer-local, so use `setq-default' if you want to change the
-global default value. If you want to set it on a per-file basis,
+global default value. If you want to set it on a per-file basis,
put a line like
# -*- cg-post-pipe: \"cg-conv --out-apertium | lt-proc -b foo.bin\"; -*-
@@ -140,9 +139,10 @@ See also `cg-command' and `cg-pre-pipe'."
;; These are not sets (and don't have names after the kw) but we
;; have them here to make beginning-of-defun work:
"MAPPING-PREFIX" "SOFT-DELIMITERS" "DELIMITERS")
- "Used for indentation, highlighting etc.; don't change without
-re-evaluating `cg-kw-re' (or all of cg.el).")
-(defconst cg-kw-set-re (regexp-opt cg-kw-set-list))
+ "List-like keywords used for indentation, highlighting etc.
+Don't change without re-evaluating `cg-kw-re' (or all of cg.el).")
+(defconst cg-kw-set-re (regexp-opt cg-kw-set-list)
+ "Regexp version of `cg-kw-set-list'.")
(defconst cg-kw-rule-list
'("SUBSTITUTE"
@@ -159,10 +159,12 @@ re-evaluating `cg-kw-re' (or all of cg.el).")
"ADDRELATIONS" "REMRELATIONS" "SETRELATIONS"
"SETVARIABLE" "REMVARIABLE"
"APPEND")
- "Used for indentation, highlighting etc.; don't change without
-re-evaluating `cg-kw-re' (or all of cg.el)." )
-(defconst cg-kw-rule-re (regexp-opt cg-kw-rule-list))
-(defconst cg-kw-re (regexp-opt (append cg-kw-set-list cg-kw-rule-list)))
+ "Rule-starter keywords for indentation, highlighting etc.
+Don't change without re-evaluating `cg-kw-re' (or all of cg.el)." )
+(defconst cg-kw-rule-re (regexp-opt cg-kw-rule-list)
+ "Regexp version of `cg-kw-rule-list'.")
+(defconst cg-kw-re (regexp-opt (append cg-kw-set-list cg-kw-rule-list))
+ "Regexp combination of `cg-kw-rule-list' and `cg-kw-set-list'.")
(defconst cg-kw-rule-flags '("NEAREST"
"ALLOWLOOP"
@@ -189,9 +191,9 @@ re-evaluating `cg-kw-re' (or all of cg.el)." )
"REVERSE"
"SUB"
"OUTPUT")
- "Used for highlighting, from
- http://visl.sdu.dk/svn/visl/tools/vislcg3/trunk/src/Strings.cpp
- Don't change without re-evaluating the file.")
+ "Rule flags used for highlighting.
+from http://visl.sdu.dk/svn/visl/tools/vislcg3/trunk/src/Strings.cpp
+Don't change without re-evaluating the file.")
(defconst cg-kw-context-flags '("NOT"
"NEGATE"
"NONE"
@@ -205,8 +207,8 @@ re-evaluating `cg-kw-re' (or all of cg.el)." )
"BEFORE"
"WITH"
"TO")
- "Used for highlighting; Don't change without re-evaluating the
- file.")
+ "Context flags used for highlighting.
+Don't change without re-evaluating the file.")
(defconst cg-kw-flags-re (regexp-opt (append cg-kw-rule-flags cg-kw-context-flags)))
@@ -259,15 +261,9 @@ re-evaluating `cg-kw-re' (or all of cg.el)." )
(modify-syntax-entry ?« "." table)
table))
-(defun cg-syntax-at-pos ()
- (let ((ppss (syntax-ppss)))
- (cond
- ((nth 8 ppss) (if (nth 4 ppss) 'comment 'string))
- ((nth 1 ppss) 'paren))))
-
(defun cg-beginning-of-defun ()
(re-search-backward defun-prompt-regexp nil 'noerror)
- (while (cg-syntax-at-pos)
+ (while (nth 4 (syntax-ppss))
(re-search-backward defun-prompt-regexp nil 'noerror))
(re-search-backward "\"<[^\"]>\"" (line-beginning-position) 'noerror))
@@ -275,12 +271,86 @@ re-evaluating `cg-kw-re' (or all of cg.el)." )
(and (search-forward ";")
(re-search-forward defun-prompt-regexp nil 'noerror)
(goto-char (match-beginning 0)))
- (while (cg-syntax-at-pos)
+ (while (nth 4 (syntax-ppss))
(and (search-forward ";")
- (re-search-forward defun-prompt-regexp nil 'noerror)
- (goto-char (match-beginning 0))))
+ (re-search-forward defun-prompt-regexp nil 'noerror)
+ (goto-char (match-beginning 0))))
(re-search-backward "\"<[^\"]>\"" (line-beginning-position) 'noerror))
+(defun cg--line-commented-p ()
+ (save-excursion
+ (back-to-indentation)
+ (looking-at "#")))
+
+(defun cg--region-commented-p (beg end)
+ (catch 'ret
+ (save-excursion
+ (goto-char beg)
+ (while (and (< (point) end)
+ (< (point) (point-max)))
+ (if (cg--line-commented-p)
+ (forward-line)
+ (throw 'ret nil)))
+ (throw 'ret t))))
+
+(defun cg--comment/uncomment-rule (comment &optional n)
+ "Comment/uncomment a rule around point."
+ (let ((i 0)
+ (n (if (numberp n) n 1))
+ (initial-point (point-marker)))
+ (while (< i n)
+ (incf i)
+ (let* ((r (save-excursion
+ (if (search-forward ";" nil 'noerror)
+ (1+ (point-marker))
+ (point-max))))
+ (l (save-excursion
+ (goto-char r)
+ (if (re-search-backward defun-prompt-regexp nil 'noerror)
+ (goto-char (line-beginning-position))
+ (point-min)))))
+ ;; Only uncomment rules if they're completely commented (but
+ ;; always uncomment the first one)
+ (when (or comment
+ (= i 1)
+ (cg--region-commented-p l r))
+ (goto-char r)
+ (skip-chars-forward "\r\n[:blank:]")
+ (if comment
+ (comment-region l r)
+ (uncomment-region l r))
+ (skip-chars-forward "\r\n[:blank:]")))
+ (when (= n 1)
+ (goto-char initial-point)))))
+
+(defun cg-comment-rule (&optional n)
+ "Comment a rule around point.
+With a prefix argument N, comment that many rules."
+ (interactive "p")
+ (cg--comment/uncomment-rule 'comment n))
+
+(defun cg-uncomment-rule (&optional n)
+ "Uncomment a rule around point.
+With a prefix argument N, uncomment that many rules."
+ (interactive "p")
+ (cg--comment/uncomment-rule nil n))
+
+(defun cg-comment-or-uncomment-rule (&optional n)
+ "Comment the rule at point.
+If already inside (or before) a comment, uncomment instead.
+With a prefix argument N, (un)comment that many rules."
+ (interactive "p")
+ (if (or (elt (syntax-ppss) 4)
+ (< (save-excursion
+ (skip-chars-forward "\r\n[:blank:]")
+ (point))
+ (save-excursion
+ (comment-forward 1)
+ (point))))
+ (cg-uncomment-rule n)
+ (cg-comment-rule n)))
+
+
;;;###autoload
(defun cg-mode ()
"Major mode for editing Constraint Grammar files.
@@ -312,13 +382,14 @@ CG-mode provides the following specific keyboard key bindings:
(set-syntax-table cg-mode-syntax-table)
(set (make-local-variable 'parse-sexp-ignore-comments) t)
(set (make-local-variable 'parse-sexp-lookup-properties) t)
- (set (make-local-variable 'defun-prompt-regexp) cg-kw-re)
+ (set (make-local-variable 'defun-prompt-regexp) (concat cg-kw-re "\\(?::[^\n\t ]+\\)[\t ]"))
(set (make-local-variable 'beginning-of-defun-function) #'cg-beginning-of-defun)
(set (make-local-variable 'end-of-defun-function) #'cg-end-of-defun)
(setq indent-line-function #'cg-indent-line)
(when font-lock-mode
(setq font-lock-set-defaults nil)
(font-lock-set-defaults)
+ ;; TODO: emacs 25 prefers `font-lock-ensure' and `font-lock-flush' over fontify
(font-lock-fontify-buffer))
(add-hook 'after-change-functions #'cg-after-change nil 'buffer-local)
(let ((buf (current-buffer)))
@@ -344,13 +415,15 @@ CG-mode provides the following specific keyboard key bindings:
("[( \t\n]\\(\\^\\)" 1 "'")))
(defun cg-font-lock-syntactic-face-function (state)
- "Determine which face to use when fontifying syntactically. See
-`font-lock-syntactic-face-function'.
-
-TODO: something like
- ((= 0 (nth 0 state)) font-lock-variable-name-face)
-would be great to differentiate SETs from their members, but it
-seems this function only runs on comments and strings..."
+ "Determine which face to use when fontifying syntactically.
+
+Argument STATE is assumed to be from `parse-partial-sexp' at the
+beginning of the region to highlight; see
+`font-lock-syntactic-face-function'."
+ ;; TODO: something like
+ ;; ((= 0 (nth 0 state)) font-lock-variable-name-face)
+ ;; would be great to differentiate SETs from their members, but it
+ ;; seems this function only runs on comments and strings...
(cond ((nth 3 state)
(if
(save-excursion
@@ -390,23 +463,25 @@ seems this function only runs on comments and strings..."
(let ((origin (point))
(old-case-fold-search case-fold-search))
(setq case-fold-search nil) ; for re-search-backward
- (save-excursion
- (let ((kw-pos (progn
- (goto-char (1- (or (search-forward ";" (line-end-position) t)
- (line-end-position))))
- (re-search-backward cg-kw-re nil 'noerror))))
- (setq case-fold-search old-case-fold-search)
- (when kw-pos
- (let* ((kw (match-string-no-properties 0)))
- (if (and (not (equal kw ";"))
- (> origin (line-end-position)))
- cg-indentation
- 0)))))))
+ (prog1
+ (save-excursion
+ (let ((kw-pos (progn
+ (goto-char (1- (or (search-forward ";" (line-end-position) t)
+ (line-end-position))))
+ (re-search-backward (concat ";\\|" cg-kw-re) nil 'noerror))))
+ (when kw-pos
+ (let* ((kw (match-string-no-properties 0)))
+ (if (and (not (equal kw ";"))
+ (> origin (line-end-position)))
+ cg-indentation
+ 0)))))
+ (setq case-fold-search old-case-fold-search))))
(defun cg-indent-line ()
- "Indent the current line. Very simple indentation: lines with
-keywords from `cg-kw-list' get zero indentation, others get one
-indentation."
+ "Indent the current line.
+
+Very simple indentation: lines with keywords from `cg-kw-list'
+get zero indentation, others get one indentation."
(interactive)
(let ((indent (cg-calculate-indent))
(pos (- (point-max) (point))))
@@ -427,15 +502,16 @@ indentation."
(defvar cg--goto-history nil)
(defun cg-permute (input)
- "From http://www.emacswiki.org/emacs/StringPermutations"
- (require 'cl) ; TODO: (require 'cl-lib) for whole file when 24.3 in distros
+ "Permute INPUT list.
+
+From http://www.emacswiki.org/emacs/StringPermutations"
(if (null input)
(list input)
- (mapcan (lambda (elt)
- (mapcan (lambda (p)
- (list (cons elt p)))
- (cg-permute (remove* elt input :count 1))))
- input)))
+ (cl-mapcan (lambda (elt)
+ (cl-mapcan (lambda (p)
+ (list (cons elt p)))
+ (cg-permute (cl-remove elt input :count 1))))
+ input)))
(defun cg-read-arg (prompt history &optional default)
(let* ((default (or default (car history)))
@@ -484,16 +560,17 @@ etc."
(setq regexp-history tmp))))
(defun cg-goto-rule (&optional input)
- "Go to the line number of the rule described by `input', where
-`input' is the rule info from vislcg3 --trace. E.g. if `input'
-is \"SELECT:1022:rulename\", go to the rule on line number
-1022. Interactively, use a prefix argument to paste `input'
-manually, otherwise this function uses the most recently copied
-line in the X clipboard.
+ "Go to the line number of the rule described by INPUT.
+
+INPUT is the rule info from vislcg3 --trace; e.g. if INPUT is
+\"SELECT:1022:rulename\", go to the rule on line number 1022.
+Interactively, use a prefix argument to paste INPUT manually,
+otherwise this function uses the most recently copied line in the
+X clipboard.
This makes switching between the terminal and the file slightly
-faster (since double-clicking the rule info -- in Konsole at
-least -- selects the whole string \"SELECT:1022:rulename\")."
+faster (since double-clicking the rule info in most terminals will
+select the whole string \"SELECT:1022:rulename\")."
(interactive (list (when current-prefix-arg
(cg-read-arg "Paste rule info from --trace here: "
cg--goto-history))))
@@ -601,29 +678,30 @@ from, otherwise all CG buffers share one input buffer."
(defconst cg-output-regexp-alist
`((,(format "%s:\\([^ \n\t:]+\\)\\(?::[^ \n\t]+\\)?" cg-kw-rule-re)
,#'cg-get-file 1 nil 1)
- ("^Warning: .*?line \\([0-9]+\\)"
- ,#'cg-get-file 1 nil 1)
- ("^Warning: .*"
+ ("^\\([^:]*: \\)?Warning: .*?line \\([0-9]+\\).*"
+ ,#'cg-get-file 2 nil 1)
+ ("^\\([^:]*: \\)?Warning: .*"
,#'cg-get-file nil nil 1)
- ("^Error: .*?line \\([0-9]+\\)"
- ,#'cg-get-file 1 nil 2)
- ("^Error: .*"
+ ("^\\([^:]*: \\)?Error: .*?line \\([0-9]+\\).*"
+ ,#'cg-get-file 2 nil 2)
+ ("^\\([^:]*: \\)?Error: .*"
,#'cg-get-file nil nil 2)
- (".*?line \\([0-9]+\\)" ; some error messages span several lines
+ (".*?line \\([0-9]+\\).*" ; some error messages span several lines
,#'cg-get-file 1 nil 2))
- "Regexp used to match vislcg3 --trace hits. See
-`compilation-error-regexp-alist'.")
+ "Regexp used to match vislcg3 --trace hits.
+See `compilation-error-regexp-alist'.")
;; TODO: highlight strings and @'s and #1->0's in cg-output-mode ?
;;;###autoload
(defcustom cg-output-setup-hook nil
- "List of hook functions run by `cg-output-process-setup' (see
-`run-hooks')."
+ "List of hook functions run by `cg-output-process-setup'.
+See `run-hooks'."
:type 'hook)
(defun cg-output-process-setup ()
- "Runs `cg-output-setup-hook' for `cg-check'. That hook is
-useful for doing things like
+ "Run `cg-output-setup-hook' for `cg-check'.
+
+That hook is useful for doing things like
(setenv \"PATH\" (concat \"~/local/stuff\" (getenv \"PATH\")))"
(run-hooks 'cg-output-setup-hook))
@@ -637,7 +715,7 @@ useful for doing things like
"Face name to use for lemmas in cg-output.")
(defvar cg-output-mapping-face 'bold
- "Face name to use for mapping tags in cg-output")
+ "Face name to use for mapping tags in cg-output.")
(defvar cg-output-mode-font-lock-keywords
'(("^;\\(?:[^:]* \\)"
@@ -725,21 +803,23 @@ runs."
(overlay-put o 'isearch-open-invisible 'cg-output-remove-overlay)))
(defun cg-output-show-all ()
- "Undoes the effect of `cg-output-hide-analyses'."
+ "Undo the effect of `cg-output-hide-analyses'."
(interactive)
(setq cg--output-hiding-analyses nil)
(remove-overlays (point-min) (point-max) 'invisible 'cg-output))
(defun cg-output-hide-analyses ()
- "Hides all analyses, turning the CG format back into input
-text (more or less). You can still isearch through the text for
-tags, REMOVE/SELECT keywords etc.
+ "Hide all analyses.
+
+This turns the CG format back into input text (more or less).
+You can still isearch through the text for tags, REMOVE/SELECT
+keywords etc.
Call `cg-output-set-unhide' to set a regex which will be exempt
-from hiding. Call `cg-output-show-all' to turn off all hiding."
+from hiding. Call `cg-output-show-all' to turn off all hiding."
(interactive)
(setq cg--output-hiding-analyses t)
- (lexical-let (last)
+ (lexical-let (prev)
(save-excursion
(goto-char (point-min))
(while (re-search-forward "^\"<.*>\"" nil 'noerror)
@@ -747,14 +827,14 @@ from hiding. Call `cg-output-show-all' to turn off all hiding."
(line-end (match-end 0)))
(cg-output-hide-region line-beg (+ line-beg 2)) ; "<
(cg-output-hide-region (- line-end 2) line-end) ; >"
- (when last
- (if (save-excursion (re-search-backward cg-sent-tag last 'noerror))
- (cg-output-hide-region last (- line-beg 1)) ; show newline
- (cg-output-hide-region last line-beg))) ; hide newline too
- (setq last line-end)))
- (goto-char last)
+ (when prev
+ (if (save-excursion (re-search-backward cg-sent-tag prev 'noerror))
+ (cg-output-hide-region prev (- line-beg 1)) ; show newline
+ (cg-output-hide-region prev line-beg))) ; hide newline too
+ (setq prev line-end)))
+ (goto-char prev)
(when (re-search-forward "^[^\t\"]" nil 'noerror)
- (cg-output-hide-region last (match-beginning 0)))))
+ (cg-output-hide-region prev (match-beginning 0)))))
(when cg-output-unhide-regex
(cg-output-unhide-some cg-output-unhide-regex)))
@@ -770,8 +850,11 @@ from hiding. Call `cg-output-show-all' to turn off all hiding."
(overlays-at (match-beginning 0))))))
(defun cg-output-set-unhide (needle)
- "Set some exeption to `cg-output-hide-analyses'. This is saved
-and reused whenever `cg-output-hide-analyses' is called."
+ "Set some exeption to `cg-output-hide-analyses'.
+
+If NEEDLE is the empty string, hide all analyses.
+This is saved and reused whenever `cg-output-hide-analyses' is
+called."
(interactive (list (cg-read-arg
"Regex to unhide, or empty to hide all"
cg--output-unhide-history
@@ -784,8 +867,8 @@ and reused whenever `cg-output-hide-analyses' is called."
;;; TODO:
(defun cg-output-toggle-analyses ()
- "Hide or show analyses from output. See
-`cg-output-hide-analyses'."
+ "Hide or show analyses from output.
+See `cg-output-hide-analyses'."
(interactive)
(if cg--output-hiding-analyses
(cg-output-show-all)
@@ -854,7 +937,7 @@ buffer (so 0 is after each change)."
'cg-output-buffer-name))))
(defun cg-end-process (proc &optional string)
- "End `proc', optionally first sending in `string'."
+ "End PROC, optionally first sending in STRING."
(when string
(process-send-string proc string))
(process-send-string proc "\n")
@@ -865,8 +948,8 @@ buffer (so 0 is after each change)."
in case you haven't saved yet).
If you've set `cg-pre-pipe', input will first be sent through
-that. Set your test input sentence(s) with `cg-edit-input'. If
-you want to send a whole file instead, just set `cg-pre-pipe' to
+that. Set your test input sentence(s) with `cg-edit-input'.
+If you want to send a whole file instead, just set `cg-pre-pipe' to
something like
\"zcat corpus.gz | lt-proc analyser.bin | cg-conv\".
@@ -973,6 +1056,9 @@ Similarly, `cg-post-pipe' is run on output."
(define-key cg-mode-map (kbd "C-c C-c") #'cg-check)
(define-key cg-mode-map (kbd "C-c C-i") #'cg-edit-input)
(define-key cg-mode-map (kbd "C-c c") #'cg-toggle-check-after-change)
+(define-key cg-mode-map (kbd "C-;") #'cg-comment-or-uncomment-rule)
+(define-key cg-mode-map (kbd "M-#") #'cg-comment-or-uncomment-rule)
+
(define-key cg-output-mode-map (kbd "C-c C-i") #'cg-back-to-file-and-edit-input)
(define-key cg-output-mode-map (kbd "i") #'cg-back-to-file-and-edit-input)
(define-key cg-output-mode-map (kbd "g") #'cg-back-to-file-and-check)
diff --git a/get-boost.sh b/get-boost.sh
index c928d56..922e1b7 100755
--- a/get-boost.sh
+++ b/get-boost.sh
@@ -1,5 +1,5 @@
#!/bin/sh
-export BOOSTVER=57
+export BOOSTVER=61
export BDOT="1.$BOOSTVER.0"
export BUC="boost_1_${BOOSTVER}_0"
diff --git a/manual/contexts.xml b/manual/contexts.xml
index b4f1e78..e71b58f 100644
--- a/manual/contexts.xml
+++ b/manual/contexts.xml
@@ -147,11 +147,15 @@
</section>
<section id="test-mark-attach-to">
- <title>Attach To</title>
+ <title>Attach To / Affect Instead</title>
<para>
'A' sets the cohort to be attached or related against to the currently active cohort of the test's target.
See also <link linkend="set-attachto">magic set _ATTACHTO_</link>.
</para>
+ <para>
+ As of version 0.9.9.11032, 'A' can be used for almost all rules to change the cohort to be affected,
+ instead of the target of the rule.
+ </para>
</section>
</section>
@@ -271,6 +275,23 @@
</screen>
</section>
+ <section id="test-bag-of-tags">
+ <title>Bag of Tags</title>
+ <indexterm>
+ <primary>B</primary>
+ </indexterm>
+ <para>
+ Position modifier 'B' causes the test to look in a bag of tags, which is like a bag of words but with all
+ tags. 'B' alone looks for tags in the current window, but can be combined with the
+ <link linkend="test-spanning">window spanning modifiers</link> to also test the windows before and/or after.
+ Linking from a 'B' test behaves as if you linked from offset 0.
+ </para>
+ <para>
+ The bag is greedy and lazy. It will hold all tags added to the window at any time, but will not forget
+ tags as they are removed from the window through various means.
+ </para>
+ </section>
+
<section id="test-branch">
<title>Optional Frequencies</title>
<indexterm>
diff --git a/manual/grammar.xml b/manual/grammar.xml
index 65f0020..6b7d13c 100644
--- a/manual/grammar.xml
+++ b/manual/grammar.xml
@@ -145,6 +145,7 @@
SET Hubba = ThisIsASet - (ctag) ;
SELECT ThisIsASet IF (-1 (dtag)) ;
+ SECTION with-name;
LIST AnotherSet = "<youknowthedrill>" ;
MAP (@bingo) TARGET AnotherSet ;
</screen>
diff --git a/manual/manual.xml b/manual/manual.xml
index ab16b4e..c170495 100644
--- a/manual/manual.xml
+++ b/manual/manual.xml
@@ -26,7 +26,7 @@
</authorgroup>
<copyright>
- <year>2007-2014</year>
+ <year>2007-2016</year>
<holder>GrammarSoft ApS</holder>
</copyright>
diff --git a/manual/rules.xml b/manual/rules.xml
index a34873d..9014772 100644
--- a/manual/rules.xml
+++ b/manual/rules.xml
@@ -56,6 +56,7 @@
Cohort manipulation:
ADDCOHORT <cohort tags> BEFORE|AFTER <target> [contextual_tests] ;
REMCOHORT <target> [contextual_tests] ;
+ SPLITCOHORT <cohort recipe> <target> [contextual_tests] ;
MOVE [WITHCHILD <child_set>|NOCHILD] <target> [contextual_tests]
BEFORE|AFTER [WITHCHILD <child_set>|NOCHILD] <contextual_target> [contextual_tests] ;
@@ -165,10 +166,6 @@
<para>
Inserts a new cohort before or after the target.
</para>
- <para>
- Caveat: This does NOT affect the rule application order as that is tied to the input order of the cohorts,
- so inserted cohorts are always last.
- </para>
<screen>
ADDCOHORT ("<wordform>" "baseform" tags) BEFORE (@waffles) ;
ADDCOHORT ("<wordform>" "baseform" tags) AFTER (@waffles) ;
@@ -185,7 +182,47 @@
</screen>
<para>
This will entirely remove a cohort with all its readings from the window.
+ Dependency will be forwarded so that the tree remains intact.
+ Named relations will be deleted.
+ </para>
+ </section>
+
+ <section id="splitcohort">
+ <title>SPLITCOHORT</title>
+ <indexterm>
+ <primary>SPLITCOHORT</primary>
+ </indexterm>
+ <screen>
+ [wordform] SPLITCOHORT <cohort recipe> <target> [contextual_tests] ;
+ </screen>
+ <para>
+ Splits a cohort into multiple new cohorts, with a recipe for which of the new cohorts shall inherit tags,
+ dependency, or named relations. The cohorts are listed in any order you want, and you may use regex captures to
+ fill in wordforms and baseforms. You can list as many cohorts as you want.
</para>
+ <para>
+ You can also designate their relative place in the dependency tree with <code>x->y</code> tags,
+ where <code>x</code> must be the sequential number of the new cohorts starting from 1, and <code>y</code>
+ is which new cohort it should link to.
+ Special value <code>c</code> for <code>x</code> may be used to designate that the cohort should inherit all children,
+ and value <code>p</code> for <code>y</code> designates it is the head of the local tree.
+ You may use <code>c->p</code> to give the same cohort both roles.
+ The default is that first cohort is the head and last cohort is the tail, and the new cohorts form a simple chain.
+ </para>
+ <para>
+ Similarly, you can use tag <code>R:*</code> to designate which cohort should inherit the named relations.
+ If <code>R:*</code> is not listed, the cohort marked <code>c-></code> will be used.
+ Thus if neither is listed, default is that the last cohort inherits them.
+ </para>
+ <screen>
+ # Split hyphenated tokens with exactly two parts
+ SPLITCOHORT (
+ # inherit tags with *, and inherit dependency children with c->2
+ "<$1>"v "$1"v tags * tags c->2
+ # inherit named relations with R:*, and inherit dependency parents with 2->p
+ "<$2>"v "$2"v tags go here R:* 2->p
+ ) ("<([^-]+)-([^-]+)>" other tags) (1* (context)) ;
+ </screen>
</section>
<section id="move-switch">
@@ -217,10 +254,6 @@
The second WITHCHILD uses the children of the cohort you're moving to as edges so you can avoid moving into
another dependency group.
</para>
- <para>
- Caveat: This does NOT affect the rule application order as that is tied to the input order of the cohorts,
- so after movement you may see some rules touching later cohorts than you'd expect.
- </para>
</section>
<section id="replace">
@@ -266,7 +299,8 @@
</screen>
<para>
Replaces the tags in the first list with the tags in the second list.
- If none of the tags in the first list are found, the insertion tags are simply appended.
+ If none of the tags in the first list are found, no insertion is done.
+ If only some of the tags are found, the insertion happens at the last removed tag, which may cause tags to be out of your desired order.
To prevent this, also have important tags as part of the target.
This works as in VISLCG, but the replacement tags may be the * tag to signify a nil replacement,
allowing for clean removal of tags in a reading. For example, to remove TAG do:
@@ -646,6 +680,10 @@
Prevents the re-ordering of contextual tests.
Useful in cases where a unifying set is not in the target of the rule.
</para>
+ <para>
+ You almost certainly need KEEPORDER if you use regex capture and varstring in separate contextual tests,
+ or if you use the special baseform or wordform regexes in unification in separate contextual tests.
+ </para>
</section>
<section id="rule-options-varyorder">
diff --git a/manual/templates.xml b/manual/templates.xml
index 21b2415..2c562f9 100644
--- a/manual/templates.xml
+++ b/manual/templates.xml
@@ -80,21 +80,30 @@
</screen>
</para>
- <para>
- It is also possible to override the position of a template, which changes their behavior. E.g:
- <screen>
- TEMPLATE tmpl = [N, CC, ADJ] ;
- # ... or ...
- TEMPLATE tmpl = 1 N LINK 1 CC LINK 1 ADJ ;
- SELECT (tag) IF (-1 T:tmpl) ;
- </screen>
- is equivalent to
- <screen>
- SELECT (tag) IF (-1** N LINK 1 CC LINK 1 ADJ) ;
- </screen>
- but with a post-condition that the final cohort matched from the exit of the template must be at the position given relative to the origin.
- In this case, a match of the template is only succesful if ADJ is in position -1 to the origin (tag).
- This behavior is equivalent to how templates worked in Fred Karlsson's CG-1, but with more flexibility.
- </para>
+ <section id="tmpl-override">
+ <title>Position Override</title>
+ <para>
+ It is also possible to override the position of a template, which changes their behavior. E.g:
+ <screen>
+ TEMPLATE tmpl = [N, CC, ADJ] ;
+ # ... or ...
+ TEMPLATE tmpl = 1 N LINK 1 CC LINK 1 ADJ ;
+ SELECT (tag) IF (-1 T:tmpl) ;
+ </screen>
+ is equivalent to
+ <screen>
+ SELECT (tag) IF (-1** N LINK 1 CC LINK 1 ADJ) ;
+ </screen>
+ but with a post-condition that the cohorts at the edges of the instantiated template must be at the position given relative to the origin.
+ In this case, a match of the template is only succesful if ADJ is in position -1 to the origin (tag).
+ This behavior is equivalent to how templates worked in Fred Karlsson's CG-1, but with more flexibility.
+ </para>
+ <para>
+ The post-condition check cannot currently inspect the actual edges of the space that the template touched to instantiate,
+ so it will perform the edge checks on the entry and exit cohorts only. A positive override will require that the leftmost
+ edge matches the position, while negative override will require rightmost edge matches. When linking from overridden
+ tests, a positive link will try to match from the rightmost edge, and negative link from the leftmost.
+ </para>
+ </section>
</chapter>
diff --git a/src/AST.hpp b/src/AST.hpp
new file mode 100644
index 0000000..9c61986
--- /dev/null
+++ b/src/AST.hpp
@@ -0,0 +1,230 @@
+/*
+* Copyright (C) 2007-2016, GrammarSoft ApS
+* Developed by Tino Didriksen <mail at tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+*
+* This file is part of VISL CG-3
+*
+* VISL CG-3 is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* VISL CG-3 is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+#ifndef c6d28b7452ec699b_AST_HPP
+#define c6d28b7452ec699b_AST_HPP
+
+#include "stdafx.hpp"
+
+enum ASTType {
+ AST_Unknown,
+ AST_AfterSections,
+ AST_Anchor,
+ AST_AnchorName,
+ AST_Barrier,
+ AST_BarrierSafe,
+ AST_BeforeSections,
+ AST_CompositeTag,
+ AST_Context,
+ AST_ContextMod,
+ AST_ContextPos,
+ AST_Contexts,
+ AST_ContextsTarget,
+ AST_Delimiters,
+ AST_Grammar,
+ AST_Include,
+ AST_IncludeFilename,
+ AST_List,
+ AST_MappingPrefix,
+ AST_NullSection,
+ AST_Option,
+ AST_Options,
+ AST_Parentheses,
+ AST_PreferredTargets,
+ AST_ReopenMappings,
+ AST_Rule,
+ AST_RuleAddcohortWhere,
+ AST_RuleDirection,
+ AST_RuleExcept,
+ AST_RuleExternalCmd,
+ AST_RuleExternalType,
+ AST_RuleFlag,
+ AST_RuleMaplist,
+ AST_RuleMoveType,
+ AST_RuleName,
+ AST_RuleSublist,
+ AST_RuleTarget,
+ AST_RuleType,
+ AST_RuleWithChildDepTarget,
+ AST_RuleWithChildTarget,
+ AST_RuleWordform,
+ AST_Section,
+ AST_Set,
+ AST_SetInline,
+ AST_SetName,
+ AST_SetOp,
+ AST_SoftDelimiters,
+ AST_StaticSets,
+ AST_StrictTags,
+ AST_SubReadings,
+ AST_SubReadingsDirection,
+ AST_Tag,
+ AST_TagList,
+ AST_Template,
+ AST_TemplateInline,
+ AST_TemplateName,
+ AST_TemplateRef,
+ AST_TemplateShorthand,
+ NUM_ASTTypes
+};
+const char *ASTType_str[NUM_ASTTypes] = {};
+
+struct ASTHelper;
+struct ASTNode {
+ ASTType type;
+ uint32_t line;
+ const UChar *b, *e;
+ std::vector<ASTNode> cs;
+
+ ASTNode(ASTType type = AST_Unknown, size_t line = 0, const UChar *b = 0, const UChar *e = 0)
+ : type(type)
+ , line(line)
+ , b(b)
+ , e(e)
+ {}
+};
+
+bool dump_ast = false;
+ASTNode ast;
+ASTNode *cur_ast = *
+ASTHelper *cur_ast_help = 0;
+
+const UChar *xml_encode(const UChar *b, const UChar *e) {
+ static CG3::UString buf;
+ buf.clear();
+ buf.reserve(e - b);
+ for (; b != e; ++b) {
+ if (*b == '&') {
+ buf += '&';
+ buf += 'a';
+ buf += 'm';
+ buf += 'p';
+ buf += ';';
+ }
+ else if (*b == '"') {
+ buf += '&';
+ buf += 'q';
+ buf += 'u';
+ buf += 'o';
+ buf += 't';
+ buf += ';';
+ }
+ else if (*b == '\'') {
+ buf += '&';
+ buf += 'a';
+ buf += 'p';
+ buf += 'o';
+ buf += 's';
+ buf += ';';
+ }
+ else if (*b == '<') {
+ buf += '&';
+ buf += 'l';
+ buf += 't';
+ buf += ';';
+ }
+ else if (*b == '>') {
+ buf += '&';
+ buf += 'g';
+ buf += 't';
+ buf += ';';
+ }
+ else {
+ buf += *b;
+ }
+ }
+ return buf.c_str();
+}
+
+void print_ast(UFILE *out, const UChar *b, size_t n, const ASTNode& node) {
+ std::string indent(n, ' ');
+ u_fprintf(out, "%s<%s l=\"%u\" b=\"%u\" e=\"%u\"", indent.c_str(), ASTType_str[node.type], node.line, static_cast<uint32_t>(node.b - b), static_cast<uint32_t>(node.e - b));
+ if (node.type == AST_AnchorName || node.type == AST_ContextMod || node.type == AST_ContextPos || node.type == AST_IncludeFilename || node.type == AST_MappingPrefix || node.type == AST_Option || node.type == AST_RuleAddcohortWhere || node.type == AST_RuleDirection || node.type == AST_RuleExternalCmd || node.type == AST_RuleExternalType || node.type == AST_RuleFlag || node.type == AST_RuleMoveType || node.type == AST_RuleName || node.type == AST_RuleType || node.type == AST_RuleWordform | [...]
+ u_fprintf(out, " t=\"%S\"", xml_encode(node.b, node.e));
+ }
+ if (node.cs.empty()) {
+ u_fprintf(out, "/>\n");
+ return;
+ }
+ u_fprintf(out, ">\n");
+ foreach (it, node.cs) {
+ if (it->type == AST_Grammar) {
+ print_ast(out, it->b, n + 1, *it);
+ }
+ else {
+ print_ast(out, b, n + 1, *it);
+ }
+ }
+ u_fprintf(out, "%s</%s>\n", indent.c_str(), ASTType_str[node.type]);
+}
+
+struct ASTHelper {
+ ASTNode *c;
+ ASTHelper *h;
+
+ ASTHelper(ASTType type = AST_Unknown, size_t line = 0, const UChar *b = 0, const UChar *e = 0)
+ : c(cur_ast)
+ , h(cur_ast_help)
+ {
+ cur_ast_help = this;
+ if (!dump_ast) {
+ c = 0;
+ h = 0;
+ return;
+ }
+ c->cs.push_back(ASTNode(type, line, b, e));
+ cur_ast = &c->cs.back();
+ }
+
+ ~ASTHelper() {
+ if (c || h) {
+ destroy();
+ }
+ }
+
+ void destroy() {
+ if (!dump_ast) {
+ return;
+ }
+ cur_ast = c;
+ cur_ast_help = h;
+ c = 0;
+ h = 0;
+ }
+};
+
+#ifdef _MSC_VER
+ // warning C4127: conditional expression is constant
+ #pragma warning (disable: 4127)
+#endif
+#define _AST_CONCAT(x, y) x##y
+#define _AST_CONCAT2(x, y) _AST_CONCAT(x, y)
+#define AST_OPEN(type) \
+ ASTType_str[AST_##type] = #type; \
+ ASTHelper _AST_CONCAT2(_ast_, __LINE__)(AST_##type, result->lines, p)
+#define AST_CLOSE(p) \
+ do { \
+ cur_ast->e = (p); \
+ cur_ast_help->destroy(); \
+ } while (false)
+
+#endif
diff --git a/src/ApertiumApplicator.cpp b/src/ApertiumApplicator.cpp
index e594dcb..06f1d61 100644
--- a/src/ApertiumApplicator.cpp
+++ b/src/ApertiumApplicator.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -30,14 +30,16 @@
namespace CG3 {
ApertiumApplicator::ApertiumApplicator(UFILE *ux_err)
- : GrammarApplicator(ux_err)
+ : GrammarApplicator(ux_err)
{
- nullFlush=false;
+ nullFlush = false;
wordform_case = false;
+ unique_tags = false;
print_word_forms = true;
print_only_first = false;
- runningWithNullFlush=false;
- fgetc_converter=0;
+ runningWithNullFlush = false;
+ fgetc_converter = 0;
+ fgetc_error = U_ZERO_ERROR;
}
@@ -46,40 +48,39 @@ bool ApertiumApplicator::getNullFlush() {
}
void ApertiumApplicator::setNullFlush(bool pNullFlush) {
- nullFlush=pNullFlush;
+ nullFlush = pNullFlush;
}
UChar ApertiumApplicator::u_fgetc_wrapper(istream& input) {
if (runningWithNullFlush) {
if (!fgetc_converter) {
- fgetc_error=U_ZERO_ERROR;
+ fgetc_error = U_ZERO_ERROR;
fgetc_converter = ucnv_open(ucnv_getDefaultName(), &fgetc_error);
if (U_FAILURE(fgetc_error)) {
- u_fprintf(ux_stderr, "Error in ucnv_open: %d\n", fgetc_error);
- }
+ u_fprintf(ux_stderr, "Error in ucnv_open: %d\n", fgetc_error);
+ }
}
int ch;
int result;
- int inputsize=0;
+ int inputsize = 0;
do {
ch = input.getc_raw();
- if (ch==0) {
+ if (ch == 0) {
return 0;
}
else {
- fgetc_inputbuf[inputsize]=static_cast<char>(ch);
+ fgetc_inputbuf[inputsize] = static_cast<char>(ch);
inputsize++;
- fgetc_error=U_ZERO_ERROR;
+ fgetc_error = U_ZERO_ERROR;
result = ucnv_toUChars(fgetc_converter, fgetc_outputbuf, 5, fgetc_inputbuf, inputsize, &fgetc_error);
if (U_FAILURE(fgetc_error)) {
u_fprintf(ux_stderr, "Error conversion: %d\n", fgetc_error);
}
}
- }
- while (( ((result>=1 && fgetc_outputbuf[0]==0xFFFD)) || result<1 || U_FAILURE(fgetc_error) ) && !input.eof() && inputsize<5);
+ } while ((((result >= 1 && fgetc_outputbuf[0] == 0xFFFD)) || result < 1 || U_FAILURE(fgetc_error)) && !input.eof() && inputsize < 5);
- if (fgetc_outputbuf[0]==0xFFFD && input.eof()) {
+ if (fgetc_outputbuf[0] == 0xFFFD && input.eof()) {
return U_EOF;
}
return fgetc_outputbuf[0];
@@ -92,13 +93,13 @@ UChar ApertiumApplicator::u_fgetc_wrapper(istream& input) {
void ApertiumApplicator::runGrammarOnTextWrapperNullFlush(istream& input, UFILE *output) {
setNullFlush(false);
- runningWithNullFlush=true;
+ runningWithNullFlush = true;
while (!input.eof()) {
runGrammarOnText(input, output);
u_fputc('\0', output);
u_fflush(output);
}
- runningWithNullFlush=false;
+ runningWithNullFlush = false;
}
/*
@@ -138,23 +139,23 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
}
}
- UChar inchar = 0; // Current character
- bool superblank = false; // Are we in a superblank ?
- bool incohort = false; // Are we in a cohort ?
- UString firstblank; // Blanks before the first window
+ UChar inchar = 0; // Current character
+ bool superblank = false; // Are we in a superblank ?
+ bool incohort = false; // Are we in a cohort ?
+ UString firstblank; // Blanks before the first window
index();
- uint32_t resetAfter = ((num_windows+4)*2+1);
+ uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
begintag = addTag(stringbits[S_BEGINTAG].getTerminatedBuffer())->hash; // Beginning of sentence tag
- endtag = addTag(stringbits[S_ENDTAG].getTerminatedBuffer())->hash; // End of sentence tag
+ endtag = addTag(stringbits[S_ENDTAG].getTerminatedBuffer())->hash; // End of sentence tag
- SingleWindow *cSWindow = 0; // Current single window (Cohort frame)
- Cohort *cCohort = 0; // Current cohort
- Reading *cReading = 0; // Current reading
+ SingleWindow *cSWindow = 0; // Current single window (Cohort frame)
+ Cohort *cCohort = 0; // Current cohort
+ Reading *cReading = 0; // Current reading
- SingleWindow *lSWindow = 0; // Left hand single window
+ SingleWindow *lSWindow = 0; // Left hand single window
gWindow->window_span = num_windows;
gtimer = getticks();
@@ -215,8 +216,8 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
initEmptyCohort(*cCohort);
}
if (cCohort && cSWindow->cohorts.size() >= soft_limit && grammar->soft_delimiters && doesSetMatchCohortNormal(*cCohort, grammar->soft_delimiters->number)) {
- // ie. we've read some cohorts
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ // ie. we've read some cohorts
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -231,7 +232,7 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -422,7 +423,7 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
if (cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
cReading = 0;
@@ -514,7 +515,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
base += '"';
while (*c != '\0') {
if (*c == '*') { // Initial asterisk means word is unknown, and
- // should just be copied in the output.
+ // should just be copied in the output.
unknown = true;
}
if (*c == '<' || *c == '\0') {
@@ -525,12 +526,12 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
}
if (!suf.empty()) { // Append the multiword suffix to the baseform
- // (this is normally done in pretransfer)
+ // (this is normally done in pretransfer)
base += suf;
}
base += '"';
-// u_fprintf(ux_stderr, ">> b: %S s: %S\n", base.c_str(), suf.c_str());
+ // u_fprintf(ux_stderr, ">> b: %S s: %S\n", base.c_str(), suf.c_str());
TagVector taglist;
@@ -575,7 +576,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
UString bf;
bf += '"';
if (tmptag[0] == '+') {
- bf.append(tmptag.begin()+1, tmptag.end());
+ bf.append(tmptag.begin() + 1, tmptag.end());
}
else {
bf += tmptag;
@@ -619,7 +620,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
// Search from the back until we find a baseform, then add all tags from there until the end onto the reading
while (!taglist.empty()) {
Reading *reading = cReading;
- reverse_foreach (TagVector, taglist, riter, riter_end) {
+ reverse_foreach (riter, taglist) {
if ((*riter)->type & T_BASEFORM) {
// If current reading already has a baseform, instead create a sub-reading as target
if (reading->baseform) {
@@ -630,7 +631,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
// Add tags
TagList mappings;
TagVector::iterator iter = riter.base();
- for (--iter ; iter != taglist.end() ; ++iter) {
+ for (--iter; iter != taglist.end(); ++iter) {
if ((*iter)->type & T_MAPPING || (*iter)->tag[0] == grammar->mapping_prefix) {
mappings.push_back(*iter);
}
@@ -666,7 +667,7 @@ void ApertiumApplicator::testPR(UFILE *output) {
"be# happy<vblex><inf>",
"aux3<tag>+aux2<tag>+aux1<tag>+main<tag>",
};
- for (size_t i = 0 ; i<6 ; ++i) {
+ for (size_t i = 0; i < 6; ++i) {
UString text(texts[i].begin(), texts[i].end());
Reading *reading = alloc_reading(0);
processReading(reading, text);
@@ -675,6 +676,7 @@ void ApertiumApplicator::testPR(UFILE *output) {
}
printReading(reading, output);
u_fprintf(output, "\n");
+ delete reading;
}
}
@@ -690,7 +692,7 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
if (reading->baseform) {
// Lop off the initial and final '"' characters
- UnicodeString bf(single_tags[reading->baseform]->tag.c_str()+1, single_tags[reading->baseform]->tag.length()-2);
+ UnicodeString bf(single_tags[reading->baseform]->tag.c_str() + 1, single_tags[reading->baseform]->tag.length() - 2);
if (wordform_case && !reading->next) {
// Use surface/wordform case, eg. if lt-proc
@@ -698,10 +700,10 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
// dictionary case on lemma/basefrom)
// Lop off the initial and final '"<>"' characters
// ToDo: A copy does not need to be made here - use pointer offsets
- UnicodeString wf(reading->parent->wordform->tag.c_str()+2, reading->parent->wordform->tag.length()-4);
+ UnicodeString wf(reading->parent->wordform->tag.c_str() + 2, reading->parent->wordform->tag.length() - 4);
int first = 0; // first occurrence of a lowercase character in baseform
- for (; first<bf.length() ; ++first) {
+ for (; first < bf.length(); ++first) {
if (u_islower(bf[first]) != 0) {
break;
}
@@ -709,7 +711,7 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
// this corresponds to fst_processor.cc in lttoolbox:
bool firstupper = first < wf.length() && (u_isupper(wf[first]) != 0);
- bool uppercase = firstupper && u_isupper(wf[wf.length()-1]);
+ bool uppercase = firstupper && u_isupper(wf[wf.length() - 1]);
if (uppercase) {
bf.toUpper(); // Perform a Unicode case folding to upper case -- Tino Didriksen
@@ -723,7 +725,7 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
} // if (wordform_case)
UString bf_escaped;
- for (int i=0 ; i<bf.length() ; ++i) {
+ for (int i = 0; i < bf.length(); ++i) {
if (bf[i] == '^' || bf[i] == '\\' || bf[i] == '/' || bf[i] == '$' || bf[i] == '[' || bf[i] == ']' || bf[i] == '{' || bf[i] == '}' || bf[i] == '<' || bf[i] == '>') {
bf_escaped += '\\';
}
@@ -741,24 +743,26 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
Reading::tags_list_t multitags_list; // everything after a +, until the first MAPPING tag
Reading::tags_list_t::iterator tter;
bool multi = false;
- for (tter = reading->tags_list.begin() ; tter != reading->tags_list.end() ; tter++) {
+ for (tter = reading->tags_list.begin(); tter != reading->tags_list.end(); tter++) {
const Tag *tag = single_tags[*tter];
if (tag->tag[0] == '+') {
multi = true;
- } else if (tag->type & T_MAPPING) {
+ }
+ else if (tag->type & T_MAPPING) {
multi = false;
}
if (multi) {
multitags_list.push_back(*tter);
- } else {
+ }
+ else {
tags_list.push_back(*tter);
}
}
- tags_list.insert(tags_list.end(),multitags_list.begin(),multitags_list.end());
+ tags_list.insert(tags_list.end(), multitags_list.begin(), multitags_list.end());
uint32SortedVector used_tags;
- for (tter = tags_list.begin() ; tter != tags_list.end() ; tter++) {
+ for (tter = tags_list.begin(); tter != tags_list.end(); tter++) {
if (unique_tags) {
if (used_tags.find(*tter) != used_tags.end()) {
continue;
@@ -783,7 +787,7 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
}
if (trace) {
- const_foreach (uint32Vector, reading->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, reading->hit_by) {
u_fputc('<', output);
printTrace(output, *iter_hb);
u_fputc('>', output);
@@ -792,13 +796,12 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
}
void ApertiumApplicator::printSingleWindow(SingleWindow *window, UFILE *output) {
-
// Window text comes at the left
if (!window->text.empty()) {
u_fprintf(output, "%S", window->text.c_str());
}
- for (uint32_t c=0 ; c < window->cohorts.size() ; c++) {
+ for (uint32_t c = 0; c < window->cohorts.size(); c++) {
if (c == 0) { // Skip magic cohort
continue;
}
@@ -815,9 +818,9 @@ void ApertiumApplicator::printSingleWindow(SingleWindow *window, UFILE *output)
if (print_word_forms == true) {
// Lop off the initial and final '"' characters
// ToDo: A copy does not need to be made here - use pointer offsets
- UnicodeString wf(cohort->wordform->tag.c_str()+2, cohort->wordform->tag.length()-4);
+ UnicodeString wf(cohort->wordform->tag.c_str() + 2, cohort->wordform->tag.length() - 4);
UString wf_escaped;
- for (int i=0 ; i<wf.length() ; ++i) {
+ for (int i = 0; i < wf.length(); ++i) {
if (wf[i] == '^' || wf[i] == '\\' || wf[i] == '/' || wf[i] == '$' || wf[i] == '[' || wf[i] == ']' || wf[i] == '{' || wf[i] == '}' || wf[i] == '<' || wf[i] == '>') {
wf_escaped += '\\';
}
@@ -827,7 +830,7 @@ void ApertiumApplicator::printSingleWindow(SingleWindow *window, UFILE *output)
// Print the static reading tags
if (cohort->wread) {
- const_foreach (Reading::tags_list_t, cohort->wread->tags_list, tter, tter_end) {
+ foreach (tter, cohort->wread->tags_list) {
if (*tter == cohort->wordform->hash) {
continue;
}
@@ -849,8 +852,7 @@ void ApertiumApplicator::printSingleWindow(SingleWindow *window, UFILE *output)
reading = reverse(reading);
}
printReading(reading, output);
- if(print_only_first == true)
- {
+ if (print_only_first == true) {
break;
}
}
@@ -897,11 +899,11 @@ void ApertiumApplicator::mergeMappings(Cohort& cohort) {
// foo<N><Sg><Acc><@←SUBJ>/foo<N><Sg><Acc><@←OBJ>
// => foo<N><Sg><Acc><@←SUBJ>/foo<N><Sg><Acc><@←OBJ>
std::map<uint32_t, ReadingList> mlist;
- foreach (ReadingList, cohort.readings, iter, iter_end) {
+ foreach (iter, cohort.readings) {
Reading *r = *iter;
uint32_t hp = r->hash; // instead of hash_plain, which doesn't include mapping tags
if (trace) {
- foreach (uint32Vector, r->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, r->hit_by) {
hp = hash_value(*iter_hb, hp);
}
}
@@ -909,7 +911,7 @@ void ApertiumApplicator::mergeMappings(Cohort& cohort) {
while (sub) {
hp = hash_value(sub->hash, hp);
if (trace) {
- foreach (uint32Vector, sub->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, sub->hit_by) {
hp = hash_value(*iter_hb, hp);
}
}
@@ -926,15 +928,18 @@ void ApertiumApplicator::mergeMappings(Cohort& cohort) {
std::vector<Reading*> order;
std::map<uint32_t, ReadingList>::iterator miter;
- for (miter = mlist.begin() ; miter != mlist.end() ; miter++) {
+ for (miter = mlist.begin(); miter != mlist.end(); miter++) {
ReadingList clist = miter->second;
- Reading *nr = alloc_reading(*(clist.front()));
- // no merging of mapping tags
- order.push_back(nr);
+ // no merging of mapping tags, so just take first reading of the group
+ order.push_back(clist.front());
+
+ clist.erase(clist.begin());
+ foreach (cit, clist) {
+ free_reading(*cit);
+ }
}
std::sort(order.begin(), order.end(), CG3::Reading::cmp_number);
cohort.readings.insert(cohort.readings.begin(), order.begin(), order.end());
}
-
}
diff --git a/src/ApertiumApplicator.hpp b/src/ApertiumApplicator.hpp
index 536da8c..cb0b6d1 100644
--- a/src/ApertiumApplicator.hpp
+++ b/src/ApertiumApplicator.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,42 +26,40 @@
#include "GrammarApplicator.hpp"
namespace CG3 {
- class ApertiumApplicator : public virtual GrammarApplicator {
- public:
- ApertiumApplicator(UFILE *ux_err);
+class ApertiumApplicator : public virtual GrammarApplicator {
+public:
+ ApertiumApplicator(UFILE *ux_err);
- void runGrammarOnText(istream& input, UFILE *output);
+ void runGrammarOnText(istream& input, UFILE *output);
- bool getNullFlush();
- bool wordform_case;
- bool print_word_forms;
- bool print_only_first;
- void setNullFlush(bool pNullFlush);
+ bool getNullFlush();
+ bool wordform_case;
+ bool print_word_forms;
+ bool print_only_first;
+ void setNullFlush(bool pNullFlush);
- void testPR(UFILE *output);
-
- protected:
- bool nullFlush;
- bool runningWithNullFlush;
-
- void printReading(Reading *reading, UFILE *output);
- void printSingleWindow(SingleWindow *window, UFILE *output);
-
- void runGrammarOnTextWrapperNullFlush(istream& input, UFILE *output);
+ void testPR(UFILE *output);
- UChar u_fgetc_wrapper(istream& input);
- UConverter* fgetc_converter;
- char fgetc_inputbuf[5];
- UChar fgetc_outputbuf[5];
- UErrorCode fgetc_error;
- void mergeMappings(Cohort& cohort);
-
- private:
+protected:
+ bool nullFlush;
+ bool runningWithNullFlush;
- void processReading(Reading *cReading, const UChar *reading_string);
- void processReading(Reading *cReading, const UString& reading_string);
+ void printReading(Reading *reading, UFILE *output);
+ void printSingleWindow(SingleWindow *window, UFILE *output);
- };
+ void runGrammarOnTextWrapperNullFlush(istream& input, UFILE *output);
+
+ UChar u_fgetc_wrapper(istream& input);
+ UConverter *fgetc_converter;
+ char fgetc_inputbuf[5];
+ UChar fgetc_outputbuf[5];
+ UErrorCode fgetc_error;
+ void mergeMappings(Cohort& cohort);
+
+private:
+ void processReading(Reading *cReading, const UChar *reading_string);
+ void processReading(Reading *cReading, const UString& reading_string);
+};
}
#endif
diff --git a/src/BinaryGrammar.cpp b/src/BinaryGrammar.cpp
index 2675fd4..5a9e03c 100644
--- a/src/BinaryGrammar.cpp
+++ b/src/BinaryGrammar.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -39,7 +39,7 @@ void BinaryGrammar::setVerbosity(uint32_t v) {
verbosity = v;
}
-int BinaryGrammar::parse_grammar_from_file(const char *filename, const char *, const char *) {
+int BinaryGrammar::parse_grammar_from_file(const char *filename, const char*, const char*) {
if (!grammar) {
u_fprintf(ux_stderr, "Error: Cannot parse into nothing - hint: call setResult() before trying.\n");
CG3Quit(1);
@@ -63,5 +63,4 @@ int BinaryGrammar::parse_grammar_from_file(const char *filename, const char *, c
}
return readBinaryGrammar(input);
}
-
}
diff --git a/src/BinaryGrammar.hpp b/src/BinaryGrammar.hpp
index 7a55bbb..19f6949 100644
--- a/src/BinaryGrammar.hpp
+++ b/src/BinaryGrammar.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,31 +26,34 @@
#include "IGrammarParser.hpp"
namespace CG3 {
- class ContextualTest;
+class ContextualTest;
- class BinaryGrammar : public IGrammarParser {
- public:
- BinaryGrammar(Grammar& result, UFILE *ux_err);
+class BinaryGrammar : public IGrammarParser {
+public:
+ BinaryGrammar(Grammar& result, UFILE *ux_err);
- int writeBinaryGrammar(FILE *output);
- int readBinaryGrammar(FILE *input);
+ int writeBinaryGrammar(FILE *output);
+ int readBinaryGrammar(FILE *input);
- void setCompatible(bool compat);
- void setVerbosity(uint32_t level);
- int parse_grammar_from_file(const char *filename, const char *locale, const char *codepage);
- private:
- Grammar *grammar;
- void writeContextualTest(ContextualTest *t, FILE *output);
- ContextualTest *readContextualTest(FILE *input);
+ void setCompatible(bool compat);
+ void setVerbosity(uint32_t level);
+ int parse_grammar_from_file(const char *filename, const char *locale, const char *codepage);
- typedef stdext::hash_map<ContextualTest*,uint32_t> deferred_t;
- deferred_t deferred_tmpls;
+private:
+ Grammar *grammar;
+ void writeContextualTest(ContextualTest *t, FILE *output);
+ ContextualTest *readContextualTest(FILE *input);
- uint32FlatHashSet seen_uint32;
+ typedef stdext::hash_map<ContextualTest*, uint32_t> deferred_t;
+ deferred_t deferred_tmpls;
+ typedef stdext::hash_map<ContextualTest*, std::vector<uint32_t> > deferred_ors_t;
+ deferred_ors_t deferred_ors;
- int readBinaryGrammar_10043(FILE *input);
- ContextualTest *readContextualTest_10043(FILE *input);
- };
+ uint32FlatHashSet seen_uint32;
+
+ int readBinaryGrammar_10043(FILE *input);
+ ContextualTest *readContextualTest_10043(FILE *input);
+};
}
#endif
diff --git a/src/BinaryGrammar_read.cpp b/src/BinaryGrammar_read.cpp
index 061a5a2..fdde347 100644
--- a/src/BinaryGrammar_read.cpp
+++ b/src/BinaryGrammar_read.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -79,6 +79,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
grammar->has_dep = (fields & (1 << 0)) != 0;
grammar->sub_readings_ltr = (fields & (1 << 2)) != 0;
grammar->has_relations = (fields & (1 << 13)) != 0;
+ grammar->has_bag_of_tags = (fields & (1 << 14)) != 0;
if (fields & (1 << 1)) {
ucnv_reset(conv);
@@ -89,7 +90,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
// Keep track of which sets that the varstring tags used; we can't just assign them as sets are not loaded yet
- typedef std::map<uint32_t,uint32Vector> tag_varsets_t;
+ typedef std::map<uint32_t, uint32Vector> tag_varsets_t;
tag_varsets_t tag_varsets;
u32tmp = 0;
@@ -99,7 +100,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
uint32_t num_single_tags = u32tmp;
grammar->single_tags_list.resize(num_single_tags);
- for (uint32_t i=0 ; i<num_single_tags ; i++) {
+ for (uint32_t i = 0; i < num_single_tags; i++) {
Tag *t = grammar->allocateTag();
t->type |= T_GRAMMAR;
@@ -147,7 +148,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
t->tag = &gbuffers[0][0];
}
}
@@ -158,7 +159,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
UParseError pe;
UErrorCode status = U_ZERO_ERROR;
@@ -182,7 +183,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
t->allocateVsSets();
t->vs_sets->reserve(num);
tag_varsets[t->number].reserve(num);
- for (size_t i=0 ; i<num ; ++i) {
+ for (size_t i = 0; i < num; ++i) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
tag_varsets[t->number].push_back(u32tmp);
@@ -193,13 +194,13 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
uint32_t num = (uint32_t)ntohl(u32tmp);
t->allocateVsNames();
t->vs_names->reserve(num);
- for (size_t i=0 ; i<num ; ++i) {
+ for (size_t i = 0; i < num; ++i) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
t->vs_names->push_back(&gbuffers[0][0]);
}
}
@@ -218,7 +219,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_remaps = u32tmp;
- for (uint32_t i = 0; i<num_remaps; ++i) {
+ for (uint32_t i = 0; i < num_remaps; ++i) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->reopen_mappings.insert(u32tmp);
@@ -230,7 +231,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_pref_targets = u32tmp;
- for (uint32_t i=0 ; i<num_pref_targets ; i++) {
+ for (uint32_t i = 0; i < num_pref_targets; i++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->preferred_targets.push_back(u32tmp);
@@ -242,7 +243,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_par_pairs = u32tmp;
- for (uint32_t i=0 ; i<num_par_pairs ; i++) {
+ for (uint32_t i = 0; i < num_par_pairs; i++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t left = (uint32_t)ntohl(u32tmp);
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
@@ -257,7 +258,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_par_anchors = u32tmp;
- for (uint32_t i=0 ; i<num_par_anchors ; i++) {
+ for (uint32_t i = 0; i < num_par_anchors; i++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t left = (uint32_t)ntohl(u32tmp);
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
@@ -272,7 +273,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
uint32_t num_sets = u32tmp;
grammar->sets_list.resize(num_sets);
- for (uint32_t i=0 ; i<num_sets ; i++) {
+ for (uint32_t i = 0; i < num_sets; i++) {
Set *s = grammar->allocateSet();
uint32_t fields = 0;
@@ -305,7 +306,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_set_ops = u32tmp;
- for (uint32_t j=0 ; j<num_set_ops ; j++) {
+ for (uint32_t j = 0; j < num_set_ops; j++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
s->set_ops.push_back(u32tmp);
@@ -315,7 +316,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_sets = u32tmp;
- for (uint32_t j=0 ; j<num_sets ; j++) {
+ for (uint32_t j = 0; j < num_sets; j++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
s->sets.push_back(u32tmp);
@@ -327,7 +328,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
s->setName(&gbuffers[0][0]);
}
}
@@ -335,9 +336,9 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
// Actually assign sets to the varstring tags now that sets are loaded
- foreach (tag_varsets_t, tag_varsets, iter, iter_end) {
+ foreach (iter, tag_varsets) {
Tag *t = grammar->single_tags_list[iter->first];
- foreach (uint32Vector, iter->second, uit, uit_end) {
+ foreach (uit, iter->second) {
Set *s = grammar->sets_list[*uit];
t->vs_sets->push_back(s);
}
@@ -373,7 +374,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
uint32_t num_rules = u32tmp;
grammar->rule_by_number.resize(num_rules);
- for (uint32_t i=0 ; i<num_rules ; i++) {
+ for (uint32_t i = 0; i < num_rules; i++) {
Rule *r = grammar->allocateRule();
uint32_t fields = 0;
@@ -402,7 +403,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
r->setName(&gbuffers[0][0]);
}
}
@@ -463,7 +464,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_dep_tests = u32tmp;
- for (uint32_t j=0 ; j<num_dep_tests ; j++) {
+ for (uint32_t j = 0; j < num_dep_tests; j++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *t = grammar->contexts[u32tmp];
@@ -473,7 +474,7 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_tests = u32tmp;
- for (uint32_t j=0 ; j<num_tests ; j++) {
+ for (uint32_t j = 0; j < num_tests; j++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *t = grammar->contexts[u32tmp];
@@ -483,10 +484,18 @@ int BinaryGrammar::readBinaryGrammar(FILE *input) {
}
// Bind the templates to where they are used
- foreach (deferred_t, deferred_tmpls, it, it_end) {
+ foreach (it, deferred_tmpls) {
it->first->tmpl = grammar->contexts.find(it->second)->second;
}
+ // Bind the OR'ed contexts to where they are used
+ foreach (it, deferred_ors) {
+ it->first->ors.reserve(it->second.size());
+ foreach (orit, it->second) {
+ it->first->ors.push_back(grammar->contexts.find(*orit)->second);
+ }
+ }
+
ucnv_close(conv);
return 0;
}
@@ -549,11 +558,10 @@ ContextualTest *BinaryGrammar::readContextualTest(FILE *input) {
if (fields & (1 << 10)) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t num_ors = (uint32_t)ntohl(u32tmp);
- for (uint32_t i=0 ; i<num_ors ; ++i) {
+ for (uint32_t i = 0; i < num_ors; ++i) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
- ContextualTest *to = grammar->contexts[u32tmp];
- t->ors.push_back(to);
+ deferred_ors[t].push_back(u32tmp);
}
}
if (fields & (1 << 11)) {
@@ -564,5 +572,4 @@ ContextualTest *BinaryGrammar::readContextualTest(FILE *input) {
return t;
}
-
}
diff --git a/src/BinaryGrammar_read_10043.cpp b/src/BinaryGrammar_read_10043.cpp
index a6b9663..652437d 100644
--- a/src/BinaryGrammar_read_10043.cpp
+++ b/src/BinaryGrammar_read_10043.cpp
@@ -80,7 +80,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
}
// Keep track of which sets that the varstring tags used; we can't just assign them as sets are not loaded yet
- typedef std::map<uint32_t,uint32Vector> tag_varsets_t;
+ typedef std::map<uint32_t, uint32Vector> tag_varsets_t;
tag_varsets_t tag_varsets;
u32tmp = 0;
@@ -90,7 +90,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
}
uint32_t num_single_tags = u32tmp;
grammar->single_tags_list.resize(num_single_tags);
- for (uint32_t i=0 ; i<num_single_tags ; i++) {
+ for (uint32_t i = 0; i < num_single_tags; i++) {
Tag *t = grammar->allocateTag();
t->type |= T_GRAMMAR;
@@ -138,7 +138,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
t->tag = &gbuffers[0][0];
}
}
@@ -149,7 +149,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
UParseError pe;
UErrorCode status = U_ZERO_ERROR;
@@ -173,7 +173,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
t->allocateVsSets();
t->vs_sets->reserve(num);
tag_varsets[t->number].reserve(num);
- for (size_t i=0 ; i<num ; ++i) {
+ for (size_t i = 0; i < num; ++i) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
tag_varsets[t->number].push_back(u32tmp);
@@ -184,13 +184,13 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
uint32_t num = (uint32_t)ntohl(u32tmp);
t->allocateVsNames();
t->vs_names->reserve(num);
- for (size_t i=0 ; i<num ; ++i) {
+ for (size_t i = 0; i < num; ++i) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
t->vs_names->push_back(&gbuffers[0][0]);
}
}
@@ -209,7 +209,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_pref_targets = u32tmp;
- for (uint32_t i=0 ; i<num_pref_targets ; i++) {
+ for (uint32_t i = 0; i < num_pref_targets; i++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
grammar->preferred_targets.push_back(u32tmp);
@@ -221,7 +221,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_par_pairs = u32tmp;
- for (uint32_t i=0 ; i<num_par_pairs ; i++) {
+ for (uint32_t i = 0; i < num_par_pairs; i++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t left = (uint32_t)ntohl(u32tmp);
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
@@ -236,7 +236,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
u32tmp = (uint32_t)ntohl(u32tmp);
}
uint32_t num_par_anchors = u32tmp;
- for (uint32_t i=0 ; i<num_par_anchors ; i++) {
+ for (uint32_t i = 0; i < num_par_anchors; i++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t left = (uint32_t)ntohl(u32tmp);
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
@@ -251,7 +251,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
}
uint32_t num_sets = u32tmp;
grammar->sets_list.resize(num_sets);
- for (uint32_t i=0 ; i<num_sets ; i++) {
+ for (uint32_t i = 0; i < num_sets; i++) {
Set *s = grammar->allocateSet();
uint32_t fields = 0;
@@ -287,7 +287,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_set_ops = u32tmp;
- for (uint32_t j=0 ; j<num_set_ops ; j++) {
+ for (uint32_t j = 0; j < num_set_ops; j++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
s->set_ops.push_back(u32tmp);
@@ -297,7 +297,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_sets = u32tmp;
- for (uint32_t j=0 ; j<num_sets ; j++) {
+ for (uint32_t j = 0; j < num_sets; j++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
s->sets.push_back(u32tmp);
@@ -309,7 +309,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
s->setName(&gbuffers[0][0]);
}
}
@@ -318,9 +318,9 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
}
// Actually assign sets to the varstring tags now that sets are loaded
- foreach (tag_varsets_t, tag_varsets, iter, iter_end) {
+ foreach (iter, tag_varsets) {
Tag *t = grammar->single_tags_list[iter->first];
- foreach (uint32Vector, iter->second, uit, uit_end) {
+ foreach (uit, iter->second) {
Set *s = grammar->sets_list[*uit];
t->vs_sets->push_back(s);
}
@@ -358,7 +358,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
}
uint32_t num_rules = u32tmp;
grammar->rule_by_number.resize(num_rules);
- for (uint32_t i=0 ; i<num_rules ; i++) {
+ for (uint32_t i = 0; i < num_rules; i++) {
Rule *r = grammar->allocateRule();
uint32_t fields = 0;
@@ -387,7 +387,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
if (u32tmp) {
ucnv_reset(conv);
fread_throw(&cbuffers[0][0], 1, u32tmp, input);
- i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE-1, &cbuffers[0][0], u32tmp, &err);
+ i32tmp = ucnv_toUChars(conv, &gbuffers[0][0], CG3_BUFFER_SIZE - 1, &cbuffers[0][0], u32tmp, &err);
r->setName(&gbuffers[0][0]);
}
}
@@ -442,13 +442,13 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
if (u32tmp) {
- r->dep_target = contexts_list[u32tmp-1];
+ r->dep_target = contexts_list[u32tmp - 1];
}
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_dep_tests = u32tmp;
- for (uint32_t j=0 ; j<num_dep_tests ; j++) {
+ for (uint32_t j = 0; j < num_dep_tests; j++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *t = contexts_list[u32tmp - 1];
@@ -458,7 +458,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
uint32_t num_tests = u32tmp;
- for (uint32_t j=0 ; j<num_tests ; j++) {
+ for (uint32_t j = 0; j < num_tests; j++) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
ContextualTest *t = contexts_list[u32tmp - 1];
@@ -468,7 +468,7 @@ int BinaryGrammar::readBinaryGrammar_10043(FILE *input) {
}
// Bind the named templates to where they are used
- foreach (deferred_t, deferred_tmpls, it, it_end) {
+ foreach (it, deferred_tmpls) {
BOOST_AUTO(tmt, templates.find(it->second));
it->first->tmpl = tmt->second;
}
@@ -542,10 +542,10 @@ ContextualTest *BinaryGrammar::readContextualTest_10043(FILE *input) {
if (fields & (1 << 10)) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
uint32_t num_ors = (uint32_t)ntohl(u32tmp);
- for (uint32_t i=0 ; i<num_ors ; ++i) {
+ for (uint32_t i = 0; i < num_ors; ++i) {
fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
u32tmp = (uint32_t)ntohl(u32tmp);
- ContextualTest *to = contexts_list[u32tmp-1];
+ ContextualTest *to = contexts_list[u32tmp - 1];
t->ors.push_back(to);
}
}
@@ -560,5 +560,4 @@ ContextualTest *BinaryGrammar::readContextualTest_10043(FILE *input) {
}
return t;
}
-
}
diff --git a/src/BinaryGrammar_write.cpp b/src/BinaryGrammar_write.cpp
index d346735..0272dab 100644
--- a/src/BinaryGrammar_write.cpp
+++ b/src/BinaryGrammar_write.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -91,13 +91,16 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
if (grammar->has_relations) {
fields |= (1 << 13);
}
+ if (grammar->has_bag_of_tags) {
+ fields |= (1 << 14);
+ }
u32tmp = (uint32_t)htonl((uint32_t)fields);
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
if (grammar->mapping_prefix) {
ucnv_reset(conv);
- i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE-1, &grammar->mapping_prefix, 1, &err);
+ i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE - 1, &grammar->mapping_prefix, 1, &err);
u32tmp = (uint32_t)htonl((uint32_t)i32tmp);
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
fwrite_throw(&cbuffers[0][0], i32tmp, 1, output);
@@ -108,7 +111,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
std::vector<Tag*>::const_iterator tags_iter;
- for (tags_iter = grammar->single_tags_list.begin() ; tags_iter != grammar->single_tags_list.end() ; tags_iter++) {
+ for (tags_iter = grammar->single_tags_list.begin(); tags_iter != grammar->single_tags_list.end(); tags_iter++) {
const Tag *t = *tags_iter;
uint32_t fields = 0;
@@ -152,7 +155,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
if (!t->tag.empty()) {
fields |= (1 << 8);
ucnv_reset(conv);
- i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE-1, t->tag.c_str(), t->tag.length(), &err);
+ i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE - 1, t->tag.c_str(), t->tag.length(), &err);
writeSwapped(buffer, i32tmp);
buffer.write(&cbuffers[0][0], i32tmp);
}
@@ -162,7 +165,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
int32_t len = 0;
const UChar *p = uregex_pattern(t->regexp, &len, &err);
ucnv_reset(conv);
- i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE-1, p, len, &err);
+ i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE - 1, p, len, &err);
writeSwapped(buffer, i32tmp);
buffer.write(&cbuffers[0][0], i32tmp);
}
@@ -170,16 +173,16 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
if (t->vs_sets) {
fields |= (1 << 10);
writeSwapped<uint32_t>(buffer, t->vs_sets->size());
- const_foreach (SetVector, *t->vs_sets, iter, iter_end) {
+ foreach (iter, *t->vs_sets) {
writeSwapped(buffer, (*iter)->number);
}
}
if (t->vs_names) {
fields |= (1 << 11);
writeSwapped<uint32_t>(buffer, t->vs_names->size());
- const_foreach (std::vector<UString>, *t->vs_names, iter, iter_end) {
+ foreach (iter, *t->vs_names) {
ucnv_reset(conv);
- i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE-1, (*iter).c_str(), (*iter).length(), &err);
+ i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE - 1, (*iter).c_str(), (*iter).length(), &err);
writeSwapped(buffer, i32tmp);
buffer.write(&cbuffers[0][0], i32tmp);
}
@@ -203,7 +206,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->preferred_targets.size());
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
- for (BOOST_AUTO(iter, grammar->preferred_targets.begin()) ; iter != grammar->preferred_targets.end() ; ++iter) {
+ for (BOOST_AUTO(iter, grammar->preferred_targets.begin()); iter != grammar->preferred_targets.end(); ++iter) {
u32tmp = (uint32_t)htonl((uint32_t)*iter);
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
@@ -223,7 +226,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->anchors.size());
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
- const_foreach (uint32FlatHashMap, grammar->anchors, iter_anchor, iter_anchor_end) {
+ foreach (iter_anchor, grammar->anchors) {
u32tmp = (uint32_t)htonl((uint32_t)iter_anchor->first);
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
u32tmp = (uint32_t)htonl((uint32_t)iter_anchor->second);
@@ -235,7 +238,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
std::vector<Set*>::const_iterator set_iter;
- for (set_iter = grammar->sets_list.begin() ; set_iter != grammar->sets_list.end() ; set_iter++) {
+ for (set_iter = grammar->sets_list.begin(); set_iter != grammar->sets_list.end(); set_iter++) {
Set *s = *set_iter;
uint32_t fields = 0;
@@ -261,21 +264,21 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
if (!s->set_ops.empty()) {
fields |= (1 << 4);
writeSwapped<uint32_t>(buffer, s->set_ops.size());
- const_foreach (uint32Vector, s->set_ops, iter, iter_end) {
+ foreach (iter, s->set_ops) {
writeSwapped(buffer, *iter);
}
}
if (!s->sets.empty()) {
fields |= (1 << 5);
writeSwapped<uint32_t>(buffer, s->sets.size());
- const_foreach (uint32Vector, s->sets, iter, iter_end) {
+ foreach (iter, s->sets) {
writeSwapped(buffer, *iter);
}
}
if (s->type & ST_STATIC) {
fields |= (1 << 6);
ucnv_reset(conv);
- i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE-1, s->name.c_str(), s->name.length(), &err);
+ i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE - 1, s->name.c_str(), s->name.length(), &err);
writeSwapped(buffer, i32tmp);
buffer.write(&cbuffers[0][0], i32tmp);
}
@@ -308,7 +311,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
u32tmp = (uint32_t)htonl((uint32_t)grammar->rule_by_number.size());
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
- const_foreach (RuleVector, grammar->rule_by_number, rule_iter, rule_iter_end) {
+ foreach (rule_iter, grammar->rule_by_number) {
Rule *r = *rule_iter;
uint32_t fields = 0;
@@ -334,7 +337,7 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
if (r->name) {
fields |= (1 << 4);
ucnv_reset(conv);
- i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE-1, r->name, u_strlen(r->name), &err);
+ i32tmp = ucnv_fromUChars(conv, &cbuffers[0][0], CG3_BUFFER_SIZE - 1, r->name, u_strlen(r->name), &err);
writeSwapped(buffer, i32tmp);
buffer.write(&cbuffers[0][0], i32tmp);
}
@@ -396,14 +399,14 @@ int BinaryGrammar::writeBinaryGrammar(FILE *output) {
r->reverseContextualTests();
u32tmp = (uint32_t)htonl(r->dep_tests.size());
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
- const_foreach (ContextList, r->dep_tests, it, it_end) {
+ foreach (it, r->dep_tests) {
u32tmp = (uint32_t)htonl((*it)->hash);
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
u32tmp = (uint32_t)htonl(r->tests.size());
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
- const_foreach (ContextList, r->tests, it, it_end) {
+ foreach (it, r->tests) {
u32tmp = (uint32_t)htonl((*it)->hash);
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
@@ -506,5 +509,4 @@ void BinaryGrammar::writeContextualTest(ContextualTest *t, FILE *output) {
fwrite_throw(&u32tmp, sizeof(uint32_t), 1, output);
}
}
-
}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index db5cbc8..72819ab 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -54,6 +54,7 @@ macro(cg3_link target)
endmacro()
set(LIBCG3_HEADERS
+ AST.hpp
BinaryGrammar.hpp
Cohort.hpp
CohortIterator.hpp
@@ -61,6 +62,8 @@ set(LIBCG3_HEADERS
Grammar.hpp
GrammarApplicator.hpp
GrammarWriter.hpp
+ Relabeller.hpp
+ MweSplitApplicator.hpp
IGrammarParser.hpp
Reading.hpp
Rule.hpp
@@ -78,9 +81,9 @@ set(LIBCG3_HEADERS
inlines.hpp
interval_vector.hpp
istream.hpp
- macros.hpp
parser_helpers.hpp
process.hpp
+ scoped_stack.hpp
sorted_vector.hpp
stdafx.hpp
uextras.hpp
@@ -102,6 +105,8 @@ set(LIBCG3_SOURCES
GrammarApplicator_runGrammar.cpp
GrammarApplicator_runRules.cpp
GrammarWriter.cpp
+ Relabeller.cpp
+ MweSplitApplicator.cpp
Reading.cpp
Rule.cpp
Set.cpp
@@ -150,14 +155,22 @@ add_executable(cg-proc
cg_proc.cpp
ApertiumApplicator.cpp
ApertiumApplicator.hpp
+ MatxinApplicator.cpp
+ MatxinApplicator.hpp
)
cg3_link(cg-proc)
target_link_libraries(cg-proc ${LINKLIB})
+add_executable(cg-relabel cg-relabel.cpp)
+cg3_link(cg-relabel)
+target_link_libraries(cg-relabel ${LINKLIB})
+
add_executable(cg-conv
cg_conv.cpp
ApertiumApplicator.cpp
ApertiumApplicator.hpp
+ MatxinApplicator.cpp
+ MatxinApplicator.hpp
FormatConverter.cpp
FormatConverter.hpp
FSTApplicator.cpp
@@ -171,6 +184,10 @@ add_executable(cg-conv
cg3_link(cg-conv)
target_link_libraries(cg-conv ${LINKLIB})
+add_executable(cg-mwesplit cg-mwesplit.cpp MweSplitApplicator.cpp MweSplitApplicator.hpp)
+cg3_link(cg-mwesplit)
+target_link_libraries(cg-mwesplit ${LINKLIB})
+
add_executable(vislcg3 main.cpp options.hpp)
cg3_link(vislcg3)
target_link_libraries(vislcg3 ${LINKLIB})
@@ -179,7 +196,7 @@ add_executable(test_libcg3 test_libcg3.c)
target_link_libraries(test_libcg3 ${LINKLIB})
if(APPLE)
- foreach(t cg-conv cg-comp cg-proc vislcg3)
+ foreach(t cg-conv cg-comp cg-proc cg-relabel cg-mwesplit vislcg3)
set("_file_${t}" $<TARGET_FILE:${t}>)
add_custom_command(
TARGET ${t}
@@ -194,7 +211,7 @@ if(APPLE)
endforeach()
endif()
-add_test(t_libcg3 test_libcg3 "${CMAKE_CURRENT_SOURCE_DIR}/../test/T_BasicSelect/grammar.cg3")
+add_test(t_libcg3 test_libcg3 "${CMAKE_CURRENT_SOURCE_DIR}/../test/T_Select/grammar.cg3")
if(INSTALL_STATIC)
install(TARGETS libcg3 ARCHIVE DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}")
@@ -204,4 +221,4 @@ if(NOT MSVC)
install(TARGETS libcg3-shared ARCHIVE DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}" LIBRARY DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}" RUNTIME DESTINATION bin)
install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cg3.h" DESTINATION include)
endif()
-install(TARGETS cg-comp cg-proc cg-conv vislcg3 RUNTIME DESTINATION bin)
+install(TARGETS cg-comp cg-proc cg-relabel cg-mwesplit cg-conv vislcg3 RUNTIME DESTINATION bin)
diff --git a/src/Cohort.cpp b/src/Cohort.cpp
index 95e0a50..6c4f509 100644
--- a/src/Cohort.cpp
+++ b/src/Cohort.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -51,19 +51,19 @@ void free_cohort(Cohort *c) {
pool_put(pool_cohorts, c);
}
-Cohort::Cohort(SingleWindow *p) :
-type(0),
-global_number(0),
-local_number(0),
-wordform(0),
-dep_self(0),
-dep_parent(std::numeric_limits<uint32_t>::max()),
-is_pleft(0),
-is_pright(0),
-parent(p),
-prev(0),
-next(0),
-wread(0)
+Cohort::Cohort(SingleWindow *p)
+ : type(0)
+ , global_number(0)
+ , local_number(0)
+ , wordform(0)
+ , dep_self(0)
+ , dep_parent(DEP_NO_PARENT)
+ , is_pleft(0)
+ , is_pright(0)
+ , parent(p)
+ , prev(0)
+ , next(0)
+ , wread(0)
{
#ifdef CG_TRACE_OBJECTS
std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
@@ -75,18 +75,18 @@ Cohort::~Cohort() {
std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << ": " << readings.size() << ", " << deleted.size() << ", " << delayed.size() << std::endl;
#endif
- foreach (ReadingList, readings, iter1, iter1_end) {
+ foreach (iter1, readings) {
delete (*iter1);
}
- foreach (ReadingList, deleted, iter2, iter2_end) {
+ foreach (iter2, deleted) {
delete (*iter2);
}
- foreach (ReadingList, delayed, iter3, iter3_end) {
+ foreach (iter3, delayed) {
delete (*iter3);
}
delete wread;
- foreach(CohortVector, removed, iter, iter_end) {
+ foreach (iter, removed) {
delete (*iter);
}
if (parent) {
@@ -97,7 +97,7 @@ Cohort::~Cohort() {
}
void Cohort::clear() {
- if (parent) {
+ if (parent && parent->parent) {
parent->parent->cohort_map.erase(global_number);
parent->parent->dep_window.erase(global_number);
}
@@ -108,7 +108,7 @@ void Cohort::clear() {
local_number = 0;
wordform = 0;
dep_self = 0;
- dep_parent = std::numeric_limits<uint32_t>::max();
+ dep_parent = DEP_NO_PARENT;
is_pleft = 0;
is_pright = 0;
parent = 0;
@@ -121,13 +121,13 @@ void Cohort::clear() {
relations.clear();
relations_input.clear();
- foreach(ReadingList, readings, iter1, iter1_end) {
+ foreach (iter1, readings) {
free_reading(*iter1);
}
- foreach(ReadingList, deleted, iter2, iter2_end) {
+ foreach (iter2, deleted) {
free_reading(*iter2);
}
- foreach(ReadingList, delayed, iter3, iter3_end) {
+ foreach (iter3, delayed) {
free_reading(*iter3);
}
free_reading(wread);
@@ -137,7 +137,7 @@ void Cohort::clear() {
delayed.clear();
wread = 0;
- foreach(CohortVector, removed, iter, iter_end) {
+ foreach (iter, removed) {
free_cohort(*iter);
}
removed.clear();
@@ -170,7 +170,7 @@ void Cohort::appendReading(Reading *read) {
type &= ~CT_NUM_CURRENT;
}
-Reading* Cohort::allocateAppendReading() {
+Reading *Cohort::allocateAppendReading() {
Reading *read = alloc_reading(this);
readings.push_back(read);
if (read->number == 0) {
@@ -186,7 +186,7 @@ void Cohort::updateMinMax() {
}
num_min.clear();
num_max.clear();
- const_foreach (ReadingList, readings, rter, rter_end) {
+ foreach (rter, readings) {
boost_foreach (Reading::tags_numerical_t::value_type& nter, (*rter)->tags_numerical) {
const Tag *tag = nter.second;
if (num_min.find(tag->comparison_hash) == num_min.end() || tag->comparison_val < num_min[tag->comparison_hash]) {
@@ -241,5 +241,4 @@ bool Cohort::remRelation(uint32_t rel, uint32_t cohort) {
}
return false;
}
-
}
diff --git a/src/Cohort.hpp b/src/Cohort.hpp
index 6bb520c..8a3e3ff 100644
--- a/src/Cohort.hpp
+++ b/src/Cohort.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -29,77 +29,77 @@
#include "flat_unordered_set.hpp"
namespace CG3 {
- class SingleWindow;
- class Reading;
- class Cohort;
- typedef bc::flat_map<uint32_t,uint32SortedVector> RelationCtn;
- typedef std::vector<Cohort*> CohortVector;
+class SingleWindow;
+class Reading;
+class Cohort;
+typedef bc::flat_map<uint32_t, uint32SortedVector> RelationCtn;
+typedef std::vector<Cohort*> CohortVector;
- enum {
- CT_ENCLOSED = (1 << 0),
- CT_RELATED = (1 << 1),
- CT_REMOVED = (1 << 2),
- CT_NUM_CURRENT = (1 << 3),
- CT_DEP_DONE = (1 << 4),
- };
+enum {
+ CT_ENCLOSED = (1 << 0),
+ CT_RELATED = (1 << 1),
+ CT_REMOVED = (1 << 2),
+ CT_NUM_CURRENT = (1 << 3),
+ CT_DEP_DONE = (1 << 4),
+};
- class Cohort {
- public:
- uint8_t type;
- uint32_t global_number;
- uint32_t local_number;
- Tag *wordform;
- uint32_t dep_self;
- uint32_t dep_parent;
- uint32_t is_pleft, is_pright;
- SingleWindow *parent;
- UString text;
- Cohort *prev, *next;
- Reading *wread;
- ReadingList readings;
- ReadingList deleted;
- ReadingList delayed;
- typedef bc::flat_map<uint32_t,int32_t> num_t;
- num_t num_max, num_min;
- uint32SortedVector dep_children;
- boost::dynamic_bitset<> possible_sets;
- CohortVector enclosed;
- CohortVector removed;
- RelationCtn relations;
- RelationCtn relations_input;
+// ToDo: Would love to make this a constexpr global, but that's C++11
+#define DEP_NO_PARENT std::numeric_limits<uint32_t>::max()
- int32_t getMin(uint32_t key);
- int32_t getMax(uint32_t key);
+class Cohort {
+public:
+ uint8_t type;
+ // ToDo: Get rid of global_number in favour of Cohort* relations
+ uint32_t global_number;
+ uint32_t local_number;
+ Tag *wordform;
+ uint32_t dep_self;
+ uint32_t dep_parent;
+ uint32_t is_pleft, is_pright;
+ SingleWindow *parent;
+ UString text;
+ Cohort *prev, *next;
+ Reading *wread;
+ ReadingList readings;
+ ReadingList deleted;
+ ReadingList delayed;
+ typedef bc::flat_map<uint32_t, int32_t> num_t;
+ num_t num_max, num_min;
+ uint32SortedVector dep_children;
+ boost::dynamic_bitset<> possible_sets;
+ CohortVector enclosed;
+ CohortVector removed;
+ RelationCtn relations;
+ RelationCtn relations_input;
- void detach();
+ int32_t getMin(uint32_t key);
+ int32_t getMax(uint32_t key);
- Cohort(SingleWindow *p);
- ~Cohort();
- void clear();
+ void detach();
- void addChild(uint32_t child);
- void remChild(uint32_t child);
- void appendReading(Reading *read);
- Reading *allocateAppendReading();
- bool addRelation(uint32_t rel, uint32_t cohort);
- bool setRelation(uint32_t rel, uint32_t cohort);
- bool remRelation(uint32_t rel, uint32_t cohort);
+ Cohort(SingleWindow *p);
+ ~Cohort();
+ void clear();
- private:
- void updateMinMax();
- };
+ void addChild(uint32_t child);
+ void remChild(uint32_t child);
+ void appendReading(Reading *read);
+ Reading *allocateAppendReading();
+ bool addRelation(uint32_t rel, uint32_t cohort);
+ bool setRelation(uint32_t rel, uint32_t cohort);
+ bool remRelation(uint32_t rel, uint32_t cohort);
- struct compare_Cohort {
- inline bool operator() (const Cohort* a, const Cohort* b) const {
- return a->global_number < b->global_number;
- }
- };
+private:
+ void updateMinMax();
+};
- typedef sorted_vector<Cohort*, compare_Cohort> CohortSet;
- typedef stdext::hash_map<uint32_t, CohortSet> uint32ToCohortsMap;
+struct compare_Cohort;
- Cohort *alloc_cohort(SingleWindow *p);
- void free_cohort(Cohort *c);
+typedef sorted_vector<Cohort*, compare_Cohort> CohortSet;
+typedef stdext::hash_map<uint32_t, CohortSet> uint32ToCohortsMap;
+
+Cohort *alloc_cohort(SingleWindow *p);
+void free_cohort(Cohort *c);
}
#endif
diff --git a/src/CohortIterator.cpp b/src/CohortIterator.cpp
index f1b005e..8d81900 100644
--- a/src/CohortIterator.cpp
+++ b/src/CohortIterator.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -22,14 +22,13 @@
#include "CohortIterator.hpp"
#include "ContextualTest.hpp"
#include "Window.hpp"
-#include "SingleWindow.hpp"
namespace CG3 {
-CohortIterator::CohortIterator(Cohort *cohort, const ContextualTest *test, bool span) :
-m_span(span),
-m_cohort(cohort),
-m_test(test)
+CohortIterator::CohortIterator(Cohort *cohort, const ContextualTest *test, bool span)
+ : m_span(span)
+ , m_cohort(cohort)
+ , m_test(test)
{
}
@@ -48,7 +47,7 @@ CohortIterator& CohortIterator::operator++() {
return *this;
}
-Cohort* CohortIterator::operator*() {
+Cohort *CohortIterator::operator*() {
return m_cohort;
}
@@ -58,8 +57,8 @@ void CohortIterator::reset(Cohort *cohort, const ContextualTest *test, bool span
m_test = test;
}
-TopologyLeftIter::TopologyLeftIter(Cohort *cohort, const ContextualTest *test, bool span) :
-CohortIterator(cohort, test, span)
+TopologyLeftIter::TopologyLeftIter(Cohort *cohort, const ContextualTest *test, bool span)
+ : CohortIterator(cohort, test, span)
{
}
@@ -67,7 +66,7 @@ TopologyLeftIter& TopologyLeftIter::operator++() {
if (!m_cohort || !m_test) {
return *this;
}
- if (m_cohort->prev && m_cohort->prev->parent != m_cohort->parent && !(m_test->pos & (POS_SPAN_BOTH|POS_SPAN_LEFT) || m_span)) {
+ if (m_cohort->prev && m_cohort->prev->parent != m_cohort->parent && !(m_test->pos & (POS_SPAN_BOTH | POS_SPAN_LEFT) || m_span)) {
m_cohort = 0;
}
else {
@@ -78,8 +77,8 @@ TopologyLeftIter& TopologyLeftIter::operator++() {
return *this;
}
-TopologyRightIter::TopologyRightIter(Cohort *cohort, const ContextualTest *test, bool span) :
-CohortIterator(cohort, test, span)
+TopologyRightIter::TopologyRightIter(Cohort *cohort, const ContextualTest *test, bool span)
+ : CohortIterator(cohort, test, span)
{
}
@@ -87,7 +86,7 @@ TopologyRightIter& TopologyRightIter::operator++() {
if (!m_cohort || !m_test) {
return *this;
}
- if (m_cohort->next && m_cohort->next->parent != m_cohort->parent && !(m_test->pos & (POS_SPAN_BOTH|POS_SPAN_RIGHT) || m_span)) {
+ if (m_cohort->next && m_cohort->next->parent != m_cohort->parent && !(m_test->pos & (POS_SPAN_BOTH | POS_SPAN_RIGHT) || m_span)) {
m_cohort = 0;
}
else {
@@ -98,8 +97,8 @@ TopologyRightIter& TopologyRightIter::operator++() {
return *this;
}
-DepParentIter::DepParentIter(Cohort *cohort, const ContextualTest *test, bool span) :
-CohortIterator(cohort, test, span)
+DepParentIter::DepParentIter(Cohort *cohort, const ContextualTest *test, bool span)
+ : CohortIterator(cohort, test, span)
{
++(*this);
}
@@ -108,8 +107,8 @@ DepParentIter& DepParentIter::operator++() {
if (!m_cohort || !m_test) {
return *this;
}
- if (m_cohort->dep_parent != std::numeric_limits<uint32_t>::max()) {
- std::map<uint32_t,Cohort*>::iterator it = m_cohort->parent->parent->cohort_map.find(m_cohort->dep_parent);
+ if (m_cohort->dep_parent != DEP_NO_PARENT) {
+ std::map<uint32_t, Cohort*>::iterator it = m_cohort->parent->parent->cohort_map.find(m_cohort->dep_parent);
if (it != m_cohort->parent->parent->cohort_map.end()) {
Cohort *cohort = it->second;
if (cohort->type & CT_REMOVED) {
@@ -144,8 +143,8 @@ void DepParentIter::reset(Cohort *cohort, const ContextualTest *test, bool span)
++(*this);
}
-DepDescendentIter::DepDescendentIter(Cohort *cohort, const ContextualTest *test, bool span) :
-CohortIterator(cohort, test, span)
+DepDescendentIter::DepDescendentIter(Cohort *cohort, const ContextualTest *test, bool span)
+ : CohortIterator(cohort, test, span)
{
reset(cohort, test, span);
}
@@ -165,20 +164,19 @@ void DepDescendentIter::reset(Cohort *cohort, const ContextualTest *test, bool s
m_cohort = 0;
if (cohort && test) {
- const_foreach (uint32SortedVector, cohort->dep_children, dter, dter_end) {
+ foreach (dter, cohort->dep_children) {
if (cohort->parent->parent->cohort_map.find(*dter) == cohort->parent->parent->cohort_map.end()) {
continue;
}
Cohort *current = cohort->parent->parent->cohort_map.find(*dter)->second;
bool good = true;
if (current->parent != cohort->parent) {
- if ((!(test->pos & (POS_SPAN_BOTH|POS_SPAN_LEFT))) && current->parent->number < cohort->parent->number) {
+ if ((!(test->pos & (POS_SPAN_BOTH | POS_SPAN_LEFT))) && current->parent->number < cohort->parent->number) {
good = false;
}
- else if ((!(test->pos & (POS_SPAN_BOTH|POS_SPAN_RIGHT))) && current->parent->number > cohort->parent->number) {
+ else if ((!(test->pos & (POS_SPAN_BOTH | POS_SPAN_RIGHT))) && current->parent->number > cohort->parent->number) {
good = false;
}
-
}
if (good) {
m_descendents.insert(current);
@@ -193,27 +191,26 @@ void DepDescendentIter::reset(Cohort *cohort, const ContextualTest *test, bool s
added = false;
CohortSet to_add;
- const_foreach (CohortSet, m_descendents, iter, iter_end) {
+ foreach (iter, m_descendents) {
Cohort *cohort_inner = *iter;
if (m_seen.find(cohort_inner) != m_seen.end()) {
continue;
}
m_seen.insert(cohort_inner);
- const_foreach (uint32SortedVector, cohort_inner->dep_children, dter, dter_end) {
+ foreach (dter, cohort_inner->dep_children) {
if (cohort_inner->parent->parent->cohort_map.find(*dter) == cohort_inner->parent->parent->cohort_map.end()) {
continue;
}
Cohort *current = cohort_inner->parent->parent->cohort_map.find(*dter)->second;
bool good = true;
if (current->parent != cohort->parent) {
- if ((!(test->pos & (POS_SPAN_BOTH|POS_SPAN_LEFT))) && current->parent->number < cohort->parent->number) {
+ if ((!(test->pos & (POS_SPAN_BOTH | POS_SPAN_LEFT))) && current->parent->number < cohort->parent->number) {
good = false;
}
- else if ((!(test->pos & (POS_SPAN_BOTH|POS_SPAN_RIGHT))) && current->parent->number > cohort->parent->number) {
+ else if ((!(test->pos & (POS_SPAN_BOTH | POS_SPAN_RIGHT))) && current->parent->number > cohort->parent->number) {
good = false;
}
-
}
if (good) {
to_add.insert(current);
@@ -222,10 +219,10 @@ void DepDescendentIter::reset(Cohort *cohort, const ContextualTest *test, bool s
}
}
- const_foreach (CohortSet, to_add, iter, iter_end) {
+ foreach (iter, to_add) {
m_descendents.insert(*iter);
}
- } while(added);
+ } while (added);
if (test->pos & POS_LEFT) {
m_seen.assign(m_descendents.begin(), m_descendents.lower_bound(cohort));
@@ -250,8 +247,9 @@ void DepDescendentIter::reset(Cohort *cohort, const ContextualTest *test, bool s
}
}
-DepAncestorIter::DepAncestorIter(Cohort *cohort, const ContextualTest *test, bool span) :
-CohortIterator(cohort, test, span) {
+DepAncestorIter::DepAncestorIter(Cohort *cohort, const ContextualTest *test, bool span)
+ : CohortIterator(cohort, test, span)
+{
reset(cohort, test, span);
}
@@ -270,7 +268,7 @@ void DepAncestorIter::reset(Cohort *cohort, const ContextualTest *test, bool spa
m_cohort = 0;
if (cohort && test) {
- for (Cohort *current = cohort; current; ) {
+ for (Cohort *current = cohort; current;) {
if (cohort->parent->parent->cohort_map.find(current->dep_parent) == cohort->parent->parent->cohort_map.end()) {
break;
}
@@ -283,7 +281,6 @@ void DepAncestorIter::reset(Cohort *cohort, const ContextualTest *test, bool spa
else if ((!(test->pos & (POS_SPAN_BOTH | POS_SPAN_RIGHT))) && current->parent->number > cohort->parent->number) {
good = false;
}
-
}
if (good) {
// If insertion fails, we've come around in a loop, so don't continue looping
@@ -318,10 +315,10 @@ void DepAncestorIter::reset(Cohort *cohort, const ContextualTest *test, bool spa
}
}
-CohortSetIter::CohortSetIter(Cohort *cohort, const ContextualTest *test, bool span) :
-CohortIterator(cohort, test, span),
-m_origcohort(cohort),
-m_cohortsetiter(m_cohortset.end())
+CohortSetIter::CohortSetIter(Cohort *cohort, const ContextualTest *test, bool span)
+ : CohortIterator(cohort, test, span)
+ , m_origcohort(cohort)
+ , m_cohortsetiter(m_cohortset.end())
{
}
@@ -332,7 +329,7 @@ void CohortSetIter::addCohort(Cohort *cohort) {
CohortSetIter& CohortSetIter::operator++() {
m_cohort = 0;
- for (; m_cohortsetiter != m_cohortset.end() ; ++m_cohortsetiter) {
+ for (; m_cohortsetiter != m_cohortset.end(); ++m_cohortsetiter) {
Cohort *cohort = *m_cohortsetiter;
if (cohort->parent == m_origcohort->parent || (m_test->pos & POS_SPAN_BOTH) || m_span) {
m_cohort = cohort;
@@ -350,16 +347,16 @@ CohortSetIter& CohortSetIter::operator++() {
return *this;
}
-MultiCohortIterator::MultiCohortIterator(Cohort *cohort, const ContextualTest *test, bool span) :
-m_span(span),
-m_cohort(cohort),
-m_test(test),
-m_cohortiter(0)
+MultiCohortIterator::MultiCohortIterator(Cohort *cohort, const ContextualTest *test, bool span)
+ : m_span(span)
+ , m_cohort(cohort)
+ , m_test(test)
+ , m_cohortiter(0)
{
}
MultiCohortIterator::~MultiCohortIterator() {
- delete m_cohortiter;
+ delete m_cohortiter;
}
bool MultiCohortIterator::operator==(const MultiCohortIterator& other) {
@@ -374,14 +371,14 @@ MultiCohortIterator& MultiCohortIterator::operator++() {
return *this;
}
-CohortIterator* MultiCohortIterator::operator*() {
+CohortIterator *MultiCohortIterator::operator*() {
return m_cohortiter;
}
// ToDo: Iterative deepening depth-first search
-ChildrenIterator::ChildrenIterator(Cohort *cohort, const ContextualTest *test, bool span) :
-MultiCohortIterator(cohort, test, span),
-m_depth(0)
+ChildrenIterator::ChildrenIterator(Cohort *cohort, const ContextualTest *test, bool span)
+ : MultiCohortIterator(cohort, test, span)
+ , m_depth(0)
{
}
@@ -394,5 +391,4 @@ ChildrenIterator& ChildrenIterator::operator++() {
}
return *this;
}
-
}
diff --git a/src/CohortIterator.hpp b/src/CohortIterator.hpp
index 3272ac4..ac366b8 100644
--- a/src/CohortIterator.hpp
+++ b/src/CohortIterator.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -25,127 +25,128 @@
#include "stdafx.hpp"
#include "Cohort.hpp"
+#include "SingleWindow.hpp"
namespace CG3 {
- class ContextualTest;
+class ContextualTest;
- class CohortIterator : public std::iterator<std::input_iterator_tag, Cohort*> {
- public:
- CohortIterator(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class CohortIterator : public std::iterator<std::input_iterator_tag, Cohort*> {
+public:
+ CohortIterator(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- virtual ~CohortIterator();
+ virtual ~CohortIterator();
- bool operator==(const CohortIterator& other);
- bool operator!=(const CohortIterator& other);
+ bool operator==(const CohortIterator& other);
+ bool operator!=(const CohortIterator& other);
- virtual CohortIterator& operator++();
+ virtual CohortIterator& operator++();
- Cohort* operator*();
+ Cohort *operator*();
- virtual void reset(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+ virtual void reset(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- protected:
- bool m_span;
- Cohort *m_cohort;
- const ContextualTest *m_test;
- };
+protected:
+ bool m_span;
+ Cohort *m_cohort;
+ const ContextualTest *m_test;
+};
- class TopologyLeftIter : public CohortIterator {
- public:
- TopologyLeftIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class TopologyLeftIter : public CohortIterator {
+public:
+ TopologyLeftIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- TopologyLeftIter& operator++();
- };
+ TopologyLeftIter& operator++();
+};
- class TopologyRightIter : public CohortIterator {
- public:
- TopologyRightIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class TopologyRightIter : public CohortIterator {
+public:
+ TopologyRightIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- TopologyRightIter& operator++();
- };
+ TopologyRightIter& operator++();
+};
- class DepParentIter : public CohortIterator {
- public:
- DepParentIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class DepParentIter : public CohortIterator {
+public:
+ DepParentIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- DepParentIter& operator++();
+ DepParentIter& operator++();
- void reset(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+ void reset(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- protected:
- CohortSet m_seen;
- };
+protected:
+ CohortSet m_seen;
+};
- class DepDescendentIter : public CohortIterator {
- public:
- DepDescendentIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class DepDescendentIter : public CohortIterator {
+public:
+ DepDescendentIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- DepDescendentIter& operator++();
+ DepDescendentIter& operator++();
- void reset(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+ void reset(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- protected:
- CohortSet m_descendents;
- CohortSet::const_iterator m_ai;
- };
+protected:
+ CohortSet m_descendents;
+ CohortSet::const_iterator m_ai;
+};
- class DepAncestorIter : public CohortIterator {
- public:
- DepAncestorIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class DepAncestorIter : public CohortIterator {
+public:
+ DepAncestorIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- DepAncestorIter& operator++();
+ DepAncestorIter& operator++();
- void reset(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+ void reset(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- protected:
- CohortSet m_ancestors;
- CohortSet::const_iterator m_ai;
- };
+protected:
+ CohortSet m_ancestors;
+ CohortSet::const_iterator m_ai;
+};
- class CohortSetIter : public CohortIterator {
- public:
- CohortSetIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class CohortSetIter : public CohortIterator {
+public:
+ CohortSetIter(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- void addCohort(Cohort *cohort);
+ void addCohort(Cohort *cohort);
- CohortSetIter& operator++();
+ CohortSetIter& operator++();
- protected:
- Cohort *m_origcohort;
- CohortSet m_cohortset;
- CohortSet::const_iterator m_cohortsetiter;
- };
+protected:
+ Cohort *m_origcohort;
+ CohortSet m_cohortset;
+ CohortSet::const_iterator m_cohortsetiter;
+};
- class MultiCohortIterator : public std::iterator<std::input_iterator_tag, Cohort*> {
- public:
- MultiCohortIterator(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class MultiCohortIterator : public std::iterator<std::input_iterator_tag, Cohort*> {
+public:
+ MultiCohortIterator(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- virtual ~MultiCohortIterator();
+ virtual ~MultiCohortIterator();
- bool operator==(const MultiCohortIterator& other);
- bool operator!=(const MultiCohortIterator& other);
+ bool operator==(const MultiCohortIterator& other);
+ bool operator!=(const MultiCohortIterator& other);
- virtual MultiCohortIterator& operator++();
+ virtual MultiCohortIterator& operator++();
- CohortIterator* operator*();
+ CohortIterator *operator*();
- protected:
- bool m_span;
- Cohort *m_cohort;
- const ContextualTest *m_test;
- CohortSet m_seen;
- CohortSetIter *m_cohortiter;
- };
+protected:
+ bool m_span;
+ Cohort *m_cohort;
+ const ContextualTest *m_test;
+ CohortSet m_seen;
+ CohortSetIter *m_cohortiter;
+};
- class ChildrenIterator : public MultiCohortIterator {
- public:
- ChildrenIterator(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
+class ChildrenIterator : public MultiCohortIterator {
+public:
+ ChildrenIterator(Cohort *cohort = 0, const ContextualTest *test = 0, bool span = false);
- ChildrenIterator& operator++();
+ ChildrenIterator& operator++();
- protected:
- uint32_t m_depth;
- };
+protected:
+ uint32_t m_depth;
+};
}
#endif
diff --git a/src/ContextualTest.cpp b/src/ContextualTest.cpp
index 6ac50e9..a9aebc6 100644
--- a/src/ContextualTest.cpp
+++ b/src/ContextualTest.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -25,23 +25,23 @@
namespace CG3 {
-ContextualTest::ContextualTest() :
-is_used(false),
-offset(0),
-offset_sub(0),
-line(0),
-hash(0),
-seed(0),
-pos(0),
-target(0),
-relation(0),
-barrier(0),
-cbarrier(0),
-num_fail(0),
-num_match(0),
-total_time(0),
-tmpl(0),
-linked(0)
+ContextualTest::ContextualTest()
+ : is_used(false)
+ , offset(0)
+ , offset_sub(0)
+ , line(0)
+ , hash(0)
+ , seed(0)
+ , pos(0)
+ , target(0)
+ , relation(0)
+ , barrier(0)
+ , cbarrier(0)
+ , num_fail(0)
+ , num_match(0)
+ , total_time(0)
+ , tmpl(0)
+ , linked(0)
{
// Nothing in the actual body...
}
@@ -162,5 +162,4 @@ void ContextualTest::markUsed(Grammar& grammar) {
linked->markUsed(grammar);
}
}
-
}
diff --git a/src/ContextualTest.hpp b/src/ContextualTest.hpp
index c131e2c..9d93d9e 100644
--- a/src/ContextualTest.hpp
+++ b/src/ContextualTest.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -29,107 +29,107 @@
#include <stdint.h>
namespace CG3 {
- class Grammar;
- class ContextualTest;
- typedef std::vector<ContextualTest*> ContextVector;
- typedef std::list<ContextualTest*> ContextList;
+class Grammar;
+class ContextualTest;
+typedef std::vector<ContextualTest*> ContextVector;
+typedef std::list<ContextualTest*> ContextList;
#ifdef _MSC_VER
- enum : uint64_t {
+enum : uint64_t {
#else
- enum {
+enum {
#endif
- POS_CAREFUL = (1 << 0),
- POS_NEGATE = (1 << 1),
- POS_NOT = (1 << 2),
- POS_SCANFIRST = (1 << 3),
- POS_SCANALL = (1 << 4),
- POS_ABSOLUTE = (1 << 5),
- POS_SPAN_RIGHT = (1 << 6),
- POS_SPAN_LEFT = (1 << 7),
- POS_SPAN_BOTH = (1 << 8),
- POS_DEP_PARENT = (1 << 9),
- POS_DEP_SIBLING = (1 << 10),
- POS_DEP_CHILD = (1 << 11),
- POS_PASS_ORIGIN = (1 << 12),
- POS_NO_PASS_ORIGIN = (1 << 13),
- POS_LEFT_PAR = (1 << 14),
- POS_RIGHT_PAR = (1 << 15),
- POS_SELF = (1 << 16),
- POS_NONE = (1 << 17),
- POS_ALL = (1 << 18),
- POS_DEP_DEEP = (1 << 19),
- POS_MARK_SET = (1 << 20),
- POS_MARK_JUMP = (1 << 21),
- POS_LOOK_DELETED = (1 << 22),
- POS_LOOK_DELAYED = (1 << 23),
- POS_TMPL_OVERRIDE = (1 << 24),
- POS_UNKNOWN = (1 << 25),
- POS_RELATION = (1 << 26),
- POS_ATTACH_TO = (1 << 27),
- POS_NUMERIC_BRANCH = (1 << 28),
- // 29 unused
- POS_DEP_GLOB = (1 << 30),
- POS_64BIT = (1ull << 31),
- POS_LEFT = (1ull << 32),
- POS_RIGHT = (1ull << 33),
- POS_LEFTMOST = (1ull << 34),
- POS_RIGHTMOST = (1ull << 35),
+ POS_CAREFUL = (1 << 0),
+ POS_NEGATE = (1 << 1),
+ POS_NOT = (1 << 2),
+ POS_SCANFIRST = (1 << 3),
+ POS_SCANALL = (1 << 4),
+ POS_ABSOLUTE = (1 << 5),
+ POS_SPAN_RIGHT = (1 << 6),
+ POS_SPAN_LEFT = (1 << 7),
+ POS_SPAN_BOTH = (1 << 8),
+ POS_DEP_PARENT = (1 << 9),
+ POS_DEP_SIBLING = (1 << 10),
+ POS_DEP_CHILD = (1 << 11),
+ POS_PASS_ORIGIN = (1 << 12),
+ POS_NO_PASS_ORIGIN = (1 << 13),
+ POS_LEFT_PAR = (1 << 14),
+ POS_RIGHT_PAR = (1 << 15),
+ POS_SELF = (1 << 16),
+ POS_NONE = (1 << 17),
+ POS_ALL = (1 << 18),
+ POS_DEP_DEEP = (1 << 19),
+ POS_MARK_SET = (1 << 20),
+ POS_MARK_JUMP = (1 << 21),
+ POS_LOOK_DELETED = (1 << 22),
+ POS_LOOK_DELAYED = (1 << 23),
+ POS_TMPL_OVERRIDE = (1 << 24),
+ POS_UNKNOWN = (1 << 25),
+ POS_RELATION = (1 << 26),
+ POS_ATTACH_TO = (1 << 27),
+ POS_NUMERIC_BRANCH = (1 << 28),
+ POS_BAG_OF_TAGS = (1 << 29),
+ POS_DEP_GLOB = (1 << 30),
+ POS_64BIT = (1ull << 31),
+ POS_LEFT = (1ull << 32),
+ POS_RIGHT = (1ull << 33),
+ POS_LEFTMOST = (1ull << 34),
+ POS_RIGHTMOST = (1ull << 35),
- MASK_POS_DEP = POS_DEP_PARENT|POS_DEP_SIBLING|POS_DEP_CHILD|POS_DEP_GLOB,
- MASK_POS_DEPREL = MASK_POS_DEP|POS_RELATION,
- MASK_POS_CDEPREL = MASK_POS_DEPREL|POS_CAREFUL,
- MASK_POS_LORR = POS_LEFT|POS_RIGHT|POS_LEFTMOST|POS_RIGHTMOST,
- MASK_POS_SCAN = POS_SCANFIRST|POS_SCANALL|POS_DEP_DEEP|POS_DEP_GLOB,
- };
+ MASK_POS_DEP = POS_DEP_PARENT | POS_DEP_SIBLING | POS_DEP_CHILD | POS_DEP_GLOB,
+ MASK_POS_DEPREL = MASK_POS_DEP | POS_RELATION,
+ MASK_POS_CDEPREL = MASK_POS_DEPREL | POS_CAREFUL,
+ MASK_POS_LORR = POS_LEFT | POS_RIGHT | POS_LEFTMOST | POS_RIGHTMOST,
+ MASK_POS_SCAN = POS_SCANFIRST | POS_SCANALL | POS_DEP_DEEP | POS_DEP_GLOB,
+};
- enum GSR_SPECIALS {
- GSR_ANY = 32767
- };
+enum GSR_SPECIALS {
+ GSR_ANY = 32767,
+};
- class ContextualTest {
- public:
- bool is_used;
- int32_t offset;
- int32_t offset_sub;
- uint32_t line;
- uint32_t hash;
- uint32_t seed;
- uint64_t pos;
- uint32_t target;
- uint32_t relation;
- uint32_t barrier;
- uint32_t cbarrier;
- mutable uint32_t num_fail, num_match;
- mutable double total_time;
- ContextualTest *tmpl;
- ContextualTest *linked;
+class ContextualTest {
+public:
+ bool is_used;
+ int32_t offset;
+ int32_t offset_sub;
+ uint32_t line;
+ uint32_t hash;
+ uint32_t seed;
+ uint64_t pos;
+ uint32_t target;
+ uint32_t relation;
+ uint32_t barrier;
+ uint32_t cbarrier;
+ mutable uint32_t num_fail, num_match;
+ mutable double total_time;
+ ContextualTest *tmpl;
+ ContextualTest *linked;
- ContextVector ors;
+ ContextVector ors;
- ContextualTest();
-
- bool operator==(const ContextualTest&) const;
- bool operator!=(const ContextualTest& o) const { return !(*this == o); }
- uint32_t rehash();
- void resetStatistics();
- void markUsed(Grammar& grammar);
- };
+ ContextualTest();
- inline void copy_cntx(const ContextualTest *src, ContextualTest *trg) {
- trg->offset = src->offset;
- trg->offset_sub = src->offset_sub;
- trg->line = src->line;
- trg->hash = src->hash;
- trg->seed = src->seed;
- trg->pos = src->pos;
- trg->target = src->target;
- trg->relation = src->relation;
- trg->barrier = src->barrier;
- trg->cbarrier = src->cbarrier;
- trg->tmpl = src->tmpl;
- trg->linked = src->linked;
- }
+ bool operator==(const ContextualTest &) const;
+ bool operator!=(const ContextualTest& o) const { return !(*this == o); }
+ uint32_t rehash();
+ void resetStatistics();
+ void markUsed(Grammar& grammar);
+};
+
+inline void copy_cntx(const ContextualTest *src, ContextualTest *trg) {
+ trg->offset = src->offset;
+ trg->offset_sub = src->offset_sub;
+ trg->line = src->line;
+ trg->hash = src->hash;
+ trg->seed = src->seed;
+ trg->pos = src->pos;
+ trg->target = src->target;
+ trg->relation = src->relation;
+ trg->barrier = src->barrier;
+ trg->cbarrier = src->cbarrier;
+ trg->tmpl = src->tmpl;
+ trg->linked = src->linked;
+}
}
#endif
diff --git a/src/FSTApplicator.cpp b/src/FSTApplicator.cpp
index ee3574c..2b59b34 100644
--- a/src/FSTApplicator.cpp
+++ b/src/FSTApplicator.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -30,12 +30,12 @@
namespace CG3 {
FSTApplicator::FSTApplicator(UFILE *ux_err)
- : GrammarApplicator(ux_err),
- wfactor(100.0)
+ : GrammarApplicator(ux_err)
+ , wfactor(100.0)
{
wtag += 'W';
sub_delims += '#';
- sub_delims += '+';
+ //sub_delims += '+';
}
void FSTApplicator::runGrammarOnText(istream& input, UFILE *output) {
@@ -74,7 +74,7 @@ void FSTApplicator::runGrammarOnText(istream& input, UFILE *output) {
index();
- uint32_t resetAfter = ((num_windows+4)*2+1);
+ uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
uint32_t lines = 0;
SingleWindow *cSWindow = 0;
@@ -90,9 +90,9 @@ void FSTApplicator::runGrammarOnText(istream& input, UFILE *output) {
++lines;
size_t offset = 0, packoff = 0;
// Read as much of the next line as will fit in the current buffer
- while (input.gets(&line[offset], line.size()-offset-1)) {
+ while (input.gets(&line[offset], line.size() - offset - 1)) {
// Copy the segment just read to cleaned
- for (size_t i=offset ; i<line.size() ; ++i) {
+ for (size_t i = offset; i < line.size(); ++i) {
// Only copy one space character, regardless of how many are in input
if (ISSPACE(line[i]) && !ISNL(line[i])) {
UChar space = (line[i] == '\t' ? '\t' : ' ');
@@ -106,25 +106,25 @@ void FSTApplicator::runGrammarOnText(istream& input, UFILE *output) {
}
// Break if there is a newline
if (ISNL(line[i])) {
- cleaned[packoff+1] = cleaned[packoff] = 0;
+ cleaned[packoff + 1] = cleaned[packoff] = 0;
goto gotaline; // Oh how I wish C++ had break 2;
}
if (line[i] == 0) {
- cleaned[packoff+1] = cleaned[packoff] = 0;
+ cleaned[packoff + 1] = cleaned[packoff] = 0;
break;
}
cleaned[packoff++] = line[i];
}
// If we reached this, buffer wasn't big enough. Double the size of the buffer and try again.
- offset = line.size()-2;
- line.resize(line.size()*2, 0);
- cleaned.resize(line.size()+1, 0);
+ offset = line.size() - 2;
+ line.resize(line.size() * 2, 0);
+ cleaned.resize(line.size() + 1, 0);
}
-gotaline:
+ gotaline:
// Trim trailing whitespace
- while (cleaned[0] && ISSPACE(cleaned[packoff-1])) {
- cleaned[packoff-1] = 0;
+ while (cleaned[0] && ISSPACE(cleaned[packoff - 1])) {
+ cleaned[packoff - 1] = 0;
--packoff;
}
if (!ignoreinput && cleaned[0]) {
@@ -208,18 +208,28 @@ gotaline:
wtag_tag = addTag(wtag_buf);
}
+ // Initial baseform, because it may end on +
+ UChar *plus = u_strchr(space, '+');
+ if (plus) {
+ ++plus;
+ const UChar cplus[] = { '+', 0 };
+ int32_t p = u_strspn(plus, cplus);
+ space = plus + p;
+ --space;
+ }
+
while (space && *space && (space = u_strchr(space, '+')) != 0) {
if (base && base[0]) {
int32_t f = u_strcspn(base, sub_delims.c_str());
UChar *hash = 0;
- if (f && base+f < space) {
- hash = const_cast<UChar*>(base)+f;
+ if (f && base + f < space) {
+ hash = const_cast<UChar*>(base) + f;
size_t oh = hash - &cleaned[0];
size_t ob = base - &cleaned[0];
- cleaned.resize(cleaned.size()+1, 0);
+ cleaned.resize(cleaned.size() + 1, 0);
hash = &cleaned[oh];
base = &cleaned[ob];
- std::copy_backward(hash, &cleaned[cleaned.size()-2], &cleaned[cleaned.size()-1]);
+ std::copy_backward(hash, &cleaned[cleaned.size() - 2], &cleaned[cleaned.size() - 1]);
hash[0] = 0;
space = hash;
}
@@ -289,13 +299,13 @@ gotaline:
}
}
else {
-istext:
+ istext:
if (cCohort && cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
}
if (cSWindow && cSWindow->cohorts.size() >= soft_limit && grammar->soft_delimiters && !did_soft_lookback) {
did_soft_lookback = true;
- reverse_foreach (CohortVector, cSWindow->cohorts, iter, iter_end) {
+ reverse_foreach (iter, cSWindow->cohorts) {
if (doesSetMatchCohortNormal(**iter, grammar->soft_delimiters->number)) {
did_soft_lookback = false;
Cohort *cohort = delimitAt(*cSWindow, *iter);
@@ -316,7 +326,7 @@ istext:
u_fprintf(ux_stderr, "Warning: Soft limit of %u cohorts reached at line %u but found suitable soft delimiter.\n", soft_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -331,7 +341,7 @@ istext:
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -397,7 +407,7 @@ istext:
if (cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
cReading = 0;
@@ -425,5 +435,4 @@ istext:
u_fflush(output);
}
-
}
diff --git a/src/FSTApplicator.hpp b/src/FSTApplicator.hpp
index 14aed10..e966e6e 100644
--- a/src/FSTApplicator.hpp
+++ b/src/FSTApplicator.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -36,7 +36,6 @@ public:
UString wtag;
UString sub_delims;
};
-
}
#endif
diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp
index 5017bda..8c491b7 100644
--- a/src/FormatConverter.cpp
+++ b/src/FormatConverter.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -23,14 +23,15 @@
namespace CG3 {
-FormatConverter::FormatConverter(UFILE *ux_err) :
-GrammarApplicator(ux_err),
-ApertiumApplicator(ux_err),
-NicelineApplicator(ux_err),
-PlaintextApplicator(ux_err),
-FSTApplicator(ux_err),
-informat(FMT_CG),
-outformat(FMT_CG)
+FormatConverter::FormatConverter(UFILE *ux_err)
+ : GrammarApplicator(ux_err)
+ , ApertiumApplicator(ux_err)
+ , NicelineApplicator(ux_err)
+ , PlaintextApplicator(ux_err)
+ , FSTApplicator(ux_err)
+ , MatxinApplicator(ux_err)
+ , informat(FMT_CG)
+ , outformat(FMT_CG)
{
}
@@ -44,52 +45,51 @@ void FormatConverter::setOutputFormat(CG_FORMATS format) {
void FormatConverter::runGrammarOnText(istream& input, UFILE *output) {
switch (informat) {
- case FMT_CG: {
- GrammarApplicator::runGrammarOnText(input, output);
- break;
- }
- case FMT_APERTIUM: {
- ApertiumApplicator::runGrammarOnText(input, output);
- break;
- }
- case FMT_NICELINE: {
- NicelineApplicator::runGrammarOnText(input, output);
- break;
- }
- case FMT_PLAIN: {
- PlaintextApplicator::runGrammarOnText(input, output);
- break;
- }
- case FMT_FST: {
- FSTApplicator::runGrammarOnText(input, output);
- break;
- }
- default:
- CG3Quit();
+ case FMT_CG: {
+ GrammarApplicator::runGrammarOnText(input, output);
+ break;
+ }
+ case FMT_APERTIUM: {
+ ApertiumApplicator::runGrammarOnText(input, output);
+ break;
+ }
+ case FMT_NICELINE: {
+ NicelineApplicator::runGrammarOnText(input, output);
+ break;
+ }
+ case FMT_PLAIN: {
+ PlaintextApplicator::runGrammarOnText(input, output);
+ break;
+ }
+ case FMT_FST: {
+ FSTApplicator::runGrammarOnText(input, output);
+ break;
+ }
+ default:
+ CG3Quit();
}
}
void FormatConverter::printSingleWindow(SingleWindow *window, UFILE *output) {
switch (outformat) {
- case FMT_CG: {
- GrammarApplicator::printSingleWindow(window, output);
- break;
- }
- case FMT_APERTIUM: {
- ApertiumApplicator::printSingleWindow(window, output);
- break;
- }
- case FMT_NICELINE: {
- NicelineApplicator::printSingleWindow(window, output);
- break;
- }
- case FMT_PLAIN: {
- PlaintextApplicator::printSingleWindow(window, output);
- break;
- }
- default:
- CG3Quit();
+ case FMT_CG: {
+ GrammarApplicator::printSingleWindow(window, output);
+ break;
+ }
+ case FMT_APERTIUM: {
+ ApertiumApplicator::printSingleWindow(window, output);
+ break;
+ }
+ case FMT_NICELINE: {
+ NicelineApplicator::printSingleWindow(window, output);
+ break;
+ }
+ case FMT_PLAIN: {
+ PlaintextApplicator::printSingleWindow(window, output);
+ break;
+ }
+ default:
+ CG3Quit();
}
}
-
}
diff --git a/src/FormatConverter.hpp b/src/FormatConverter.hpp
index 85d637e..0a2dfe2 100644
--- a/src/FormatConverter.hpp
+++ b/src/FormatConverter.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -24,33 +24,35 @@
#define c6d28b7452ec699b_FORMATCONVERTER_H
#include "ApertiumApplicator.hpp"
+#include "MatxinApplicator.hpp"
#include "NicelineApplicator.hpp"
#include "PlaintextApplicator.hpp"
#include "FSTApplicator.hpp"
namespace CG3 {
- enum CG_FORMATS {
- FMT_INVALID,
- FMT_CG,
- FMT_NICELINE,
- FMT_APERTIUM,
- FMT_FST,
- FMT_PLAIN,
- NUM_FORMATS
- };
+enum CG_FORMATS {
+ FMT_INVALID,
+ FMT_CG,
+ FMT_NICELINE,
+ FMT_APERTIUM,
+ FMT_MATXIN,
+ FMT_FST,
+ FMT_PLAIN,
+ NUM_FORMATS,
+};
- class FormatConverter : public ApertiumApplicator, public NicelineApplicator, public PlaintextApplicator, public FSTApplicator {
- public:
- FormatConverter(UFILE *ux_err);
+class FormatConverter : public ApertiumApplicator, public NicelineApplicator, public PlaintextApplicator, public FSTApplicator, public MatxinApplicator {
+public:
+ FormatConverter(UFILE *ux_err);
- void runGrammarOnText(istream& input, UFILE *output);
- void setInputFormat(CG_FORMATS format);
- void setOutputFormat(CG_FORMATS format);
+ void runGrammarOnText(istream& input, UFILE *output);
+ void setInputFormat(CG_FORMATS format);
+ void setOutputFormat(CG_FORMATS format);
- protected:
- CG_FORMATS informat, outformat;
- void printSingleWindow(SingleWindow *window, UFILE *output);
- };
+protected:
+ CG_FORMATS informat, outformat;
+ void printSingleWindow(SingleWindow *window, UFILE *output);
+};
}
#endif
diff --git a/src/Grammar.cpp b/src/Grammar.cpp
index 8de11b4..5be5fa6 100644
--- a/src/Grammar.cpp
+++ b/src/Grammar.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -25,45 +25,46 @@
namespace CG3 {
-Grammar::Grammar() :
-ux_stderr(0),
-ux_stdout(0),
-has_dep(false),
-has_relations(false),
-has_encl_final(false),
-is_binary(false),
-sub_readings_ltr(false),
-grammar_size(0),
-mapping_prefix('@'),
-lines(0),
-verbosity_level(0),
-total_time(0),
-rules_any(0),
-sets_any(0),
-delimiters(0),
-soft_delimiters(0),
-tag_any(0)
+Grammar::Grammar()
+ : ux_stderr(0)
+ , ux_stdout(0)
+ , has_dep(false)
+ , has_bag_of_tags(false)
+ , has_relations(false)
+ , has_encl_final(false)
+ , is_binary(false)
+ , sub_readings_ltr(false)
+ , grammar_size(0)
+ , mapping_prefix('@')
+ , lines(0)
+ , verbosity_level(0)
+ , total_time(0)
+ , rules_any(0)
+ , sets_any(0)
+ , delimiters(0)
+ , soft_delimiters(0)
+ , tag_any(0)
{
// Nothing in the actual body...
}
Grammar::~Grammar() {
- foreach (std::vector<Set*>, sets_list, iter_set, iter_set_end) {
+ foreach (iter_set, sets_list) {
destroySet(*iter_set);
}
- foreach (SetSet, sets_all, rsets, rsets_end) {
+ foreach (rsets, sets_all) {
delete *rsets;
}
-
+
Taguint32HashMap::iterator iter_stag;
- for (iter_stag = single_tags.begin() ; iter_stag != single_tags.end() ; ++iter_stag) {
+ for (iter_stag = single_tags.begin(); iter_stag != single_tags.end(); ++iter_stag) {
if (iter_stag->second) {
delete iter_stag->second;
}
}
- foreach (RuleVector, rule_by_number, iter_rules, iter_rules_end) {
+ foreach (iter_rules, rule_by_number) {
delete *iter_rules;
}
@@ -83,10 +84,10 @@ void Grammar::addSet(Set *& to) {
u_fprintf(ux_stderr, "Warning: Set name %S looks like a misattempt of template usage on line %u.\n", to->name.c_str(), to->line);
}
- if (!to->sets.empty() && !(to->type & (ST_TAG_UNIFY|ST_CHILD_UNIFY|ST_SET_UNIFY))) {
+ if (!to->sets.empty() && !(to->type & (ST_TAG_UNIFY | ST_CHILD_UNIFY | ST_SET_UNIFY))) {
bool all_tags = true;
- for (size_t i=0 ; i<to->sets.size() ; ++i) {
- if (i > 0 && to->set_ops[i-1] != S_OR) {
+ for (size_t i = 0; i < to->sets.size(); ++i) {
+ if (i > 0 && to->set_ops[i - 1] != S_OR) {
all_tags = false;
break;
}
@@ -106,7 +107,7 @@ void Grammar::addSet(Set *& to) {
}
if (all_tags) {
- for (size_t i=0 ; i<to->sets.size() ; ++i) {
+ for (size_t i = 0; i < to->sets.size(); ++i) {
Set *s = getSet(to->sets[i]);
maybe_used_sets.insert(s);
TagVector tv = trie_getTagList(s->getNonEmpty());
@@ -195,7 +196,7 @@ void Grammar::addSet(Set *& to) {
}
uint32_t chash = to->rehash();
- for (; to->name[0] != '_' || to->name[1] != 'G' || to->name[2] != '_' ;) {
+ for (; to->name[0] != '_' || to->name[1] != 'G' || to->name[2] != '_';) {
uint32_t nhash = hash_value(to->name.c_str());
if (sets_by_name.find(nhash) != sets_by_name.end()) {
Set *a = sets_by_contents.find(sets_by_name.find(nhash)->second)->second;
@@ -217,14 +218,14 @@ void Grammar::addSet(Set *& to) {
CG3Quit(1);
}
else {
- for (uint32_t seed=0 ; seed<1000 ; ++seed) {
- if (sets_by_name.find(nhash+seed) == sets_by_name.end()) {
+ for (uint32_t seed = 0; seed < 1000; ++seed) {
+ if (sets_by_name.find(nhash + seed) == sets_by_name.end()) {
if (verbosity_level > 0 && (to->name[0] != '_' || to->name[1] != 'G' || to->name[2] != '_')) {
u_fprintf(ux_stderr, "Warning: Set %S got hash seed %u.\n", to->name.c_str(), seed);
u_fflush(ux_stderr);
}
set_name_seeds[to->name] = seed;
- sets_by_name[nhash+seed] = chash;
+ sets_by_name[nhash + seed] = chash;
break;
}
}
@@ -240,9 +241,7 @@ void Grammar::addSet(Set *& to) {
if (a != to) {
a->reindex(*this);
to->reindex(*this);
- if ((a->type & (ST_SPECIAL|ST_TAG_UNIFY|ST_CHILD_UNIFY|ST_SET_UNIFY)) != (to->type & (ST_SPECIAL|ST_TAG_UNIFY|ST_CHILD_UNIFY|ST_SET_UNIFY))
- || a->set_ops.size() != to->set_ops.size() || a->sets.size() != to->sets.size()
- || a->trie.size() != to->trie.size() || a->trie_special.size() != to->trie_special.size()) {
+ if ((a->type & (ST_SPECIAL | ST_TAG_UNIFY | ST_CHILD_UNIFY | ST_SET_UNIFY)) != (to->type & (ST_SPECIAL | ST_TAG_UNIFY | ST_CHILD_UNIFY | ST_SET_UNIFY)) || a->set_ops.size() != to->set_ops.size() || a->sets.size() != to->sets.size() || a->trie.size() != to->trie.size() || a->trie_special.size() != to->trie_special.size()) {
u_fprintf(ux_stderr, "Error: Content hash collision between set %S on line %u and %S on line %u!\n", a->name.c_str(), a->line, to->name.c_str(), to->line);
CG3Quit(1);
}
@@ -290,12 +289,12 @@ void Grammar::addSetToList(Set *s) {
if (s->number == 0) {
if (sets_list.empty() || sets_list[0] != s) {
if (!s->sets.empty()) {
- foreach (uint32Vector, s->sets, sit, sit_end) {
+ foreach (sit, s->sets) {
addSetToList(getSet(*sit));
}
}
sets_list.push_back(s);
- s->number = (uint32_t)sets_list.size()-1;
+ s->number = (uint32_t)sets_list.size() - 1;
}
}
}
@@ -347,7 +346,7 @@ uint32_t Grammar::removeNumericTags(uint32_t s) {
bool did = false;
std::map<TagVector, bool> ntags;
TagVector tags;
- const trie_t* tries[2] = { &set->trie, &set->trie_special };
+ const trie_t *tries[2] = { &set->trie, &set->trie_special };
for (size_t i = 0; i < 2; ++i) {
if (tries[i]->empty()) {
continue;
@@ -517,9 +516,9 @@ ContextualTest *Grammar::addContextualTest(ContextualTest *t) {
}
for (uint32_t seed = 0; seed < 1000; ++seed) {
- contexts_t::iterator cit = contexts.find(t->hash+seed);
+ contexts_t::iterator cit = contexts.find(t->hash + seed);
if (cit == contexts.end()) {
- contexts[t->hash+seed] = t;
+ contexts[t->hash + seed] = t;
t->hash += seed;
t->seed = seed;
if (verbosity_level > 1 && seed) {
@@ -567,22 +566,22 @@ void Grammar::addAnchor(const UChar *to, uint32_t at, bool primary) {
void Grammar::resetStatistics() {
total_time = 0;
- for (uint32_t j=0;j<rules.size();j++) {
+ for (uint32_t j = 0; j < rules.size(); j++) {
rules[j]->resetStatistics();
}
}
void Grammar::renameAllRules() {
- foreach (RuleVector, rule_by_number, iter_rule, iter_rule_end) {
+ foreach (iter_rule, rule_by_number) {
Rule *r = *iter_rule;
gbuffers[0][0] = 0;
u_sprintf(&gbuffers[0][0], "L%u", r->line);
r->setName(&gbuffers[0][0]);
}
-};
+}
void Grammar::reindex(bool unused_sets, bool used_tags) {
- foreach (Setuint32HashMap, sets_by_contents, dset, dset_end) {
+ foreach (dset, sets_by_contents) {
if (dset->second->number == std::numeric_limits<uint32_t>::max()) {
dset->second->type |= ST_USED;
continue;
@@ -593,7 +592,7 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
dset->second->number = 0;
}
- foreach (static_sets_t, static_sets, sset, sset_end) {
+ foreach (sset, static_sets) {
uint32_t sh = hash_value(*sset);
if (set_alias.find(sh) != set_alias.end()) {
u_fprintf(ux_stderr, "Error: Static set %S is an alias; only real sets may be made static!\n", (*sset).c_str());
@@ -628,7 +627,7 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
sets_any = 0;
rules_any = 0;
- foreach (TagVector, single_tags_list, iter, iter_end) {
+ foreach (iter, single_tags_list) {
if ((*iter)->regexp && (*iter)->tag[0] != '"' && (*iter)->tag[0] != '<') {
regex_tags.insert((*iter)->regexp);
}
@@ -641,16 +640,16 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
if (!(*iter)->vs_sets) {
continue;
}
- foreach (SetVector, *(*iter)->vs_sets, sit, sit_end) {
+ foreach (sit, *(*iter)->vs_sets) {
(*sit)->markUsed(*this);
}
}
- foreach (TagVector, single_tags_list, titer, titer_end) {
+ foreach (titer, single_tags_list) {
if ((*titer)->type & T_TEXTUAL) {
continue;
}
- foreach (Grammar::regex_tags_t, regex_tags, iter, iter_end) {
+ foreach (iter, regex_tags) {
UErrorCode status = U_ZERO_ERROR;
uregex_setText(*iter, (*titer)->tag.c_str(), (*titer)->tag.length(), &status);
if (status == U_ZERO_ERROR) {
@@ -659,7 +658,7 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
}
}
}
- foreach (Grammar::icase_tags_t, icase_tags, iter, iter_end) {
+ foreach (iter, icase_tags) {
UErrorCode status = U_ZERO_ERROR;
if (u_strCaseCompare((*titer)->tag.c_str(), (*titer)->tag.length(), (*iter)->tag.c_str(), (*iter)->tag.length(), U_FOLD_CASE_DEFAULT, &status) == 0) {
(*titer)->type |= T_TEXTUAL;
@@ -680,7 +679,7 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
single_tags[*it]->markUsed();
}
- foreach (RuleVector, rule_by_number, iter_rule, iter_rule_end) {
+ foreach (iter_rule, rule_by_number) {
if ((*iter_rule)->wordform) {
wf_rules.push_back(*iter_rule);
}
@@ -707,10 +706,10 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
if ((*iter_rule)->dep_target) {
(*iter_rule)->dep_target->markUsed(*this);
}
- foreach (ContextList, (*iter_rule)->tests, it, it_end) {
+ foreach (it, (*iter_rule)->tests) {
(*it)->markUsed(*this);
}
- foreach (ContextList, (*iter_rule)->dep_tests, it, it_end) {
+ foreach (it, (*iter_rule)->dep_tests) {
(*it)->markUsed(*this);
}
}
@@ -736,7 +735,7 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
if (unused_sets) {
u_fprintf(ux_stdout, "Unused sets:\n");
- foreach (Setuint32HashMap, sets_by_contents, rset, rset_end) {
+ foreach (rset, sets_by_contents) {
if (!(rset->second->type & ST_USED) && !rset->second->name.empty() && maybe_used_sets.count(rset->second) == 0) {
if (rset->second->name[0] != '_' || rset->second->name[1] != 'G' || rset->second->name[2] != '_') {
u_fprintf(ux_stdout, "Line %u set %S\n", rset->second->line, rset->second->name.c_str());
@@ -749,13 +748,13 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
// Stuff below this line is not optional...
- foreach (Setuint32HashMap, sets_by_contents, tset, tset_end) {
+ foreach (tset, sets_by_contents) {
if (tset->second->type & ST_USED) {
addSetToList(tset->second);
}
}
- for (BOOST_AUTO(iter_tags, single_tags.begin()) ; iter_tags != single_tags.end() ; ++iter_tags) {
+ for (BOOST_AUTO(iter_tags, single_tags.begin()); iter_tags != single_tags.end(); ++iter_tags) {
Tag *tag = iter_tags->second;
if (tag->tag[0] == mapping_prefix) {
tag->type |= T_MAPPING;
@@ -779,7 +778,7 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
uint32SortedVector sects;
- foreach (RuleVector, rule_by_number, iter_rule, iter_rule_end) {
+ foreach (iter_rule, rule_by_number) {
if ((*iter_rule)->section == -1) {
before_sections.push_back(*iter_rule);
}
@@ -864,14 +863,14 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
CG3Quit(1);
}
else {
- for (uint32_t seed=0 ; seed<1000 ; ++seed) {
- if (sets_by_name.find(nhash+seed) == sets_by_name.end()) {
+ for (uint32_t seed = 0; seed < 1000; ++seed) {
+ if (sets_by_name.find(nhash + seed) == sets_by_name.end()) {
if (verbosity_level > 0) {
u_fprintf(ux_stderr, "Warning: Static set %S got hash seed %u.\n", to->name.c_str(), seed);
u_fflush(ux_stderr);
}
set_name_seeds[to->name] = seed;
- sets_by_name[nhash+seed] = cnum;
+ sets_by_name[nhash + seed] = cnum;
break;
}
}
@@ -880,6 +879,122 @@ void Grammar::reindex(bool unused_sets, bool used_tags) {
}
}
+ // Gather knowledge of which sets have varstrings
+ boost::dynamic_bitset<> sets_vstr(sets_list.size());
+ bool did = true;
+ while (did) {
+ did = false;
+ foreach (set, sets_list) {
+ if (sets_vstr.test((*set)->number)) {
+ continue;
+ }
+ foreach (iset, (*set)->sets) {
+ if (sets_vstr.test(*iset)) {
+ sets_vstr.set((*set)->number);
+ did = true;
+ break;
+ }
+ }
+ if (trie_hasType((*set)->trie, T_VARSTRING) || trie_hasType((*set)->trie_special, T_VARSTRING)) {
+ sets_vstr.set((*set)->number);
+ did = true;
+ }
+ }
+ }
+
+ // Gather knowledge of which contexts use unification or varstrings
+ bc::flat_set<ContextualTest*> nk;
+ did = true;
+ while (did) {
+ did = false;
+
+ foreach (cntx, contexts) {
+ ContextualTest *t = cntx->second;
+
+ if (nk.count(t)) {
+ continue;
+ }
+
+ if (t->tmpl && nk.count(t->tmpl)) {
+ //u_fprintf(ux_stderr, "added tmpl %u\n", t->line);
+ did |= nk.insert(t).second;
+ continue;
+ }
+ if (t->linked && nk.count(t->linked)) {
+ //u_fprintf(ux_stderr, "added linked %u\n", t->line);
+ did |= nk.insert(t).second;
+ continue;
+ }
+ if (t->target && (sets_list[t->target]->type & MASK_ST_UNIFY)) {
+ //u_fprintf(ux_stderr, "added target %u\n", t->line);
+ did |= nk.insert(t).second;
+ continue;
+ }
+ if (t->target && sets_vstr.test(t->target)) {
+ //u_fprintf(ux_stderr, "added target %u\n", t->line);
+ did |= nk.insert(t).second;
+ continue;
+ }
+ if (t->barrier && (sets_list[t->barrier]->type & MASK_ST_UNIFY)) {
+ //u_fprintf(ux_stderr, "added barrier %u\n", t->line);
+ did |= nk.insert(t).second;
+ continue;
+ }
+ if (t->barrier && sets_vstr.test(t->barrier)) {
+ //u_fprintf(ux_stderr, "added target %u\n", t->line);
+ did |= nk.insert(t).second;
+ continue;
+ }
+ if (t->cbarrier && (sets_list[t->cbarrier]->type & MASK_ST_UNIFY)) {
+ //u_fprintf(ux_stderr, "added cbarrier %u\n", t->line);
+ did |= nk.insert(t).second;
+ continue;
+ }
+ if (t->cbarrier && sets_vstr.test(t->cbarrier)) {
+ //u_fprintf(ux_stderr, "added target %u\n", t->line);
+ did |= nk.insert(t).second;
+ continue;
+ }
+ }
+ }
+
+ foreach (it, rule_by_number) {
+ Rule *r = *it;
+
+ // Determine whether this rule probably needs KEEPORDER
+ if (r->flags & RF_KEEPORDER) {
+ continue;
+ }
+ /* While this is a good indication that the unified set is used in the target, it is not 100%
+ if (r->target && (sets_list[r->target]->type & MASK_ST_UNIFY)) {
+ continue;
+ }
+ //*/
+ bool needs = false;
+ if (r->dep_target && nk.count(r->dep_target)) {
+ needs = true;
+ }
+ foreach (cntx, r->tests) {
+ if (nk.count(*cntx)) {
+ needs = true;
+ }
+ }
+ foreach (cntx, r->dep_tests) {
+ if (nk.count(*cntx)) {
+ needs = true;
+ }
+ }
+ if (needs) {
+ r->flags |= RF_KEEPORDER;
+ /* We can do the whole thing fully automatically, so explicit KEEPORDER will eventually be deprecated
+ if (verbosity_level) {
+ u_fprintf(ux_stderr, "Warning: Rule on line %u probably needs KEEPORDER.\n", r->line);
+ u_fflush(ux_stderr);
+ }
+ //*/
+ }
+ }
+
if (used_tags) {
for (BOOST_AUTO(iter_tags, single_tags.begin()); iter_tags != single_tags.end(); ++iter_tags) {
Tag *tag = iter_tags->second;
@@ -902,7 +1017,7 @@ inline void trie_indexToRule(const trie_t& trie, Grammar& grammar, uint32_t r) {
}
void Grammar::indexSetToRule(uint32_t r, Set *s) {
- if (s->type & (ST_SPECIAL|ST_TAG_UNIFY)) {
+ if (s->type & (ST_SPECIAL | ST_TAG_UNIFY)) {
indexTagToRule(tag_any, r);
return;
}
@@ -910,7 +1025,7 @@ void Grammar::indexSetToRule(uint32_t r, Set *s) {
trie_indexToRule(s->trie, *this, r);
trie_indexToRule(s->trie_special, *this, r);
- for (uint32_t i=0 ; i<s->sets.size() ; ++i) {
+ for (uint32_t i = 0; i < s->sets.size(); ++i) {
Set *set = sets_list[s->sets[i]];
indexSetToRule(r, set);
}
@@ -930,7 +1045,7 @@ inline void trie_indexToSet(const trie_t& trie, Grammar& grammar, uint32_t r) {
}
void Grammar::indexSets(uint32_t r, Set *s) {
- if (s->type & (ST_SPECIAL|ST_TAG_UNIFY)) {
+ if (s->type & (ST_SPECIAL | ST_TAG_UNIFY)) {
indexTagToSet(tag_any, r);
return;
}
@@ -938,7 +1053,7 @@ void Grammar::indexSets(uint32_t r, Set *s) {
trie_indexToSet(s->trie, *this, r);
trie_indexToSet(s->trie_special, *this, r);
- for (uint32_t i=0 ; i<s->sets.size() ; ++i) {
+ for (uint32_t i = 0; i < s->sets.size(); ++i) {
Set *set = sets_list[s->sets[i]];
indexSets(r, set);
}
@@ -957,7 +1072,7 @@ void Grammar::setAdjustSets(Set *s) {
}
s->type &= ~ST_USED;
- for (uint32_t i = 0; i<s->sets.size(); ++i) {
+ for (uint32_t i = 0; i < s->sets.size(); ++i) {
Set *set = sets_by_contents.find(s->sets[i])->second;
s->sets[i] = set->number;
setAdjustSets(set);
@@ -993,5 +1108,4 @@ void Grammar::contextAdjustTarget(ContextualTest *test) {
contextAdjustTarget(test->linked);
}
}
-
}
diff --git a/src/Grammar.hpp b/src/Grammar.hpp
index 8c0961a..6769ef0 100644
--- a/src/Grammar.hpp
+++ b/src/Grammar.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -32,136 +32,137 @@
#include "flat_unordered_set.hpp"
namespace CG3 {
- class Anchor;
-
- class Grammar {
- public:
- UFILE *ux_stderr, *ux_stdout;
-
- bool has_dep;
- bool has_relations;
- bool has_encl_final;
- bool is_binary;
- bool sub_readings_ltr;
- size_t grammar_size;
- UChar mapping_prefix;
- uint32_t lines;
- uint32_t verbosity_level;
- mutable double total_time;
-
- std::vector<Tag*> single_tags_list;
- Taguint32HashMap single_tags;
-
- std::vector<Set*> sets_list;
- SetSet sets_all;
- uint32FlatHashMap sets_by_name;
- typedef stdext::hash_map<UString,uint32_t> set_name_seeds_t;
- set_name_seeds_t set_name_seeds;
- Setuint32HashMap sets_by_contents;
- uint32FlatHashMap set_alias;
- SetSet maybe_used_sets;
-
- typedef std::vector<UString> static_sets_t;
- static_sets_t static_sets;
-
- typedef std::set<URegularExpression*> regex_tags_t;
- regex_tags_t regex_tags;
- typedef TagSortedVector icase_tags_t;
- icase_tags_t icase_tags;
-
- typedef stdext::hash_map<uint32_t, ContextualTest*> contexts_t;
- contexts_t templates;
- contexts_t contexts;
-
- typedef stdext::hash_map<uint32_t, uint32IntervalVector> rules_by_set_t;
- rules_by_set_t rules_by_set;
- typedef stdext::hash_map<uint32_t, uint32IntervalVector> rules_by_tag_t;
- rules_by_tag_t rules_by_tag;
- typedef stdext::hash_map<uint32_t, boost::dynamic_bitset<> > sets_by_tag_t;
- sets_by_tag_t sets_by_tag;
-
- uint32IntervalVector *rules_any;
- boost::dynamic_bitset<> *sets_any;
-
- Set *delimiters;
- Set *soft_delimiters;
- uint32_t tag_any;
- uint32Vector preferred_targets;
- uint32SortedVector reopen_mappings;
- typedef bc::flat_map<uint32_t,uint32_t> parentheses_t;
- parentheses_t parentheses;
- parentheses_t parentheses_reverse;
-
- uint32Vector sections;
- uint32FlatHashMap anchors;
-
- RuleVector rule_by_number;
- RuleVector before_sections;
- RuleVector rules;
- RuleVector after_sections;
- RuleVector null_section;
- RuleVector wf_rules;
-
- Grammar();
- ~Grammar();
-
- void addSet(Set *& to);
- Set *getSet(uint32_t which) const;
- Set *allocateSet();
- void destroySet(Set *set);
- void addSetToList(Set *s);
- void allocateDummySet();
- uint32_t removeNumericTags(uint32_t s);
-
- void addAnchor(const UChar *to, uint32_t at, bool primary = false);
-
- Tag *allocateTag();
- Tag *allocateTag(const UChar *tag);
- Tag *addTag(Tag *tag);
- void destroyTag(Tag *tag);
- void addTagToSet(Tag *rtag, Set *set);
-
- Rule *allocateRule();
- void addRule(Rule *rule);
- void destroyRule(Rule *rule);
-
- ContextualTest *allocateContextualTest();
- ContextualTest *addContextualTest(ContextualTest *t);
- void addTemplate(ContextualTest *test, const UChar *name);
-
- void resetStatistics();
- void reindex(bool unused_sets = false, bool used_tags = false);
- void renameAllRules();
-
- void indexSetToRule(uint32_t, Set*);
- void indexTagToRule(uint32_t, uint32_t);
- void indexSets(uint32_t, Set*);
- void indexTagToSet(uint32_t, uint32_t);
- void setAdjustSets(Set*);
- void contextAdjustTarget(ContextualTest*);
- };
-
- inline void trie_unserialize(trie_t& trie, FILE *input, Grammar& grammar, uint32_t num_tags) {
- for (uint32_t i = 0; i < num_tags; ++i) {
- uint32_t u32tmp = 0;
- fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
- u32tmp = (uint32_t)ntohl(u32tmp);
- trie_node_t& node = trie[grammar.single_tags_list[u32tmp]];
-
- uint8_t u8tmp = 0;
- fread_throw(&u8tmp, sizeof(uint8_t), 1, input);
- node.terminal = (u8tmp != 0);
-
- fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
- u32tmp = (uint32_t)ntohl(u32tmp);
- if (u32tmp) {
- if (!node.trie) {
- node.trie = new trie_t;
- }
- trie_unserialize(*node.trie, input, grammar, u32tmp);
+class Anchor;
+
+class Grammar {
+public:
+ UFILE *ux_stderr, *ux_stdout;
+
+ bool has_dep;
+ bool has_bag_of_tags;
+ bool has_relations;
+ bool has_encl_final;
+ bool is_binary;
+ bool sub_readings_ltr;
+ size_t grammar_size;
+ UChar mapping_prefix;
+ uint32_t lines;
+ uint32_t verbosity_level;
+ mutable double total_time;
+
+ std::vector<Tag*> single_tags_list;
+ Taguint32HashMap single_tags;
+
+ std::vector<Set*> sets_list;
+ SetSet sets_all;
+ uint32FlatHashMap sets_by_name;
+ typedef stdext::hash_map<UString, uint32_t> set_name_seeds_t;
+ set_name_seeds_t set_name_seeds;
+ Setuint32HashMap sets_by_contents;
+ uint32FlatHashMap set_alias;
+ SetSet maybe_used_sets;
+
+ typedef std::vector<UString> static_sets_t;
+ static_sets_t static_sets;
+
+ typedef std::set<URegularExpression*> regex_tags_t;
+ regex_tags_t regex_tags;
+ typedef TagSortedVector icase_tags_t;
+ icase_tags_t icase_tags;
+
+ typedef stdext::hash_map<uint32_t, ContextualTest*> contexts_t;
+ contexts_t templates;
+ contexts_t contexts;
+
+ typedef stdext::hash_map<uint32_t, uint32IntervalVector> rules_by_set_t;
+ rules_by_set_t rules_by_set;
+ typedef stdext::hash_map<uint32_t, uint32IntervalVector> rules_by_tag_t;
+ rules_by_tag_t rules_by_tag;
+ typedef stdext::hash_map<uint32_t, boost::dynamic_bitset<> > sets_by_tag_t;
+ sets_by_tag_t sets_by_tag;
+
+ uint32IntervalVector *rules_any;
+ boost::dynamic_bitset<> *sets_any;
+
+ Set *delimiters;
+ Set *soft_delimiters;
+ uint32_t tag_any;
+ uint32Vector preferred_targets;
+ uint32SortedVector reopen_mappings;
+ typedef bc::flat_map<uint32_t, uint32_t> parentheses_t;
+ parentheses_t parentheses;
+ parentheses_t parentheses_reverse;
+
+ uint32Vector sections;
+ uint32FlatHashMap anchors;
+
+ RuleVector rule_by_number;
+ RuleVector before_sections;
+ RuleVector rules;
+ RuleVector after_sections;
+ RuleVector null_section;
+ RuleVector wf_rules;
+
+ Grammar();
+ ~Grammar();
+
+ void addSet(Set *& to);
+ Set *getSet(uint32_t which) const;
+ Set *allocateSet();
+ void destroySet(Set *set);
+ void addSetToList(Set *s);
+ void allocateDummySet();
+ uint32_t removeNumericTags(uint32_t s);
+
+ void addAnchor(const UChar *to, uint32_t at, bool primary = false);
+
+ Tag *allocateTag();
+ Tag *allocateTag(const UChar *tag);
+ Tag *addTag(Tag *tag);
+ void destroyTag(Tag *tag);
+ void addTagToSet(Tag *rtag, Set *set);
+
+ Rule *allocateRule();
+ void addRule(Rule *rule);
+ void destroyRule(Rule *rule);
+
+ ContextualTest *allocateContextualTest();
+ ContextualTest *addContextualTest(ContextualTest *t);
+ void addTemplate(ContextualTest *test, const UChar *name);
+
+ void resetStatistics();
+ void reindex(bool unused_sets = false, bool used_tags = false);
+ void renameAllRules();
+
+ void indexSetToRule(uint32_t, Set*);
+ void indexTagToRule(uint32_t, uint32_t);
+ void indexSets(uint32_t, Set*);
+ void indexTagToSet(uint32_t, uint32_t);
+ void setAdjustSets(Set*);
+ void contextAdjustTarget(ContextualTest*);
+};
+
+inline void trie_unserialize(trie_t& trie, FILE *input, Grammar& grammar, uint32_t num_tags) {
+ for (uint32_t i = 0; i < num_tags; ++i) {
+ uint32_t u32tmp = 0;
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
+ u32tmp = (uint32_t)ntohl(u32tmp);
+ trie_node_t& node = trie[grammar.single_tags_list[u32tmp]];
+
+ uint8_t u8tmp = 0;
+ fread_throw(&u8tmp, sizeof(uint8_t), 1, input);
+ node.terminal = (u8tmp != 0);
+
+ fread_throw(&u32tmp, sizeof(uint32_t), 1, input);
+ u32tmp = (uint32_t)ntohl(u32tmp);
+ if (u32tmp) {
+ if (!node.trie) {
+ node.trie = new trie_t;
}
+ trie_unserialize(*node.trie, input, grammar, u32tmp);
}
}
}
+}
#endif
diff --git a/src/GrammarApplicator.cpp b/src/GrammarApplicator.cpp
index 00238a5..9efcf4b 100644
--- a/src/GrammarApplicator.cpp
+++ b/src/GrammarApplicator.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -31,82 +31,89 @@
namespace CG3 {
-GrammarApplicator::GrammarApplicator(UFILE *ux_err) :
-always_span(false),
-apply_mappings(true),
-apply_corrections(true),
-no_before_sections(false),
-no_sections(false),
-no_after_sections(false),
-trace(false),
-trace_name_only(false),
-trace_no_removed(false),
-trace_encl(false),
-allow_magic_readings(true),
-no_pass_origin(false),
-unsafe(false),
-ordered(false),
-show_end_tags(false),
-unicode_tags(false),
-unique_tags(false),
-dry_run(false),
-owns_grammar(false),
-input_eof(false),
-seen_barrier(false),
-is_conv(false),
-split_mappings(false),
-dep_has_spanned(false),
-dep_delimit(0),
-dep_original(false),
-dep_block_loops(true),
-dep_block_crossing(false),
-num_windows(2),
-soft_limit(300),
-hard_limit(500),
-verbosity_level(0),
-debug_level(0),
-section_max_count(0),
-has_dep(false),
-dep_highest_seen(0),
-gWindow(0),
-has_relations(false),
-grammar(0),
-ux_stderr(ux_err),
-filebase(0),
-numLines(0),
-numWindows(0),
-numCohorts(0),
-numReadings(0),
-did_index(false),
-numsections(0),
-ci_depths(6, 0),
-match_single(0),
-match_comp(0),
-match_sub(0),
-begintag(0),
-endtag(0),
-par_left_tag(0),
-par_right_tag(0),
-par_left_pos(0),
-par_right_pos(0),
-did_final_enclosure(false),
-target(0),
-mark(0),
-attach_to(0),
-unif_tags(0),
-unif_last_wordform(0),
-unif_last_baseform(0),
-unif_last_textual(0),
-unif_sets(0),
-unif_sets_firstrun(false),
-statistics(false)
+GrammarApplicator::GrammarApplicator(UFILE *ux_err)
+ : always_span(false)
+ , apply_mappings(true)
+ , apply_corrections(true)
+ , no_before_sections(false)
+ , no_sections(false)
+ , no_after_sections(false)
+ , trace(false)
+ , trace_name_only(false)
+ , trace_no_removed(false)
+ , trace_encl(false)
+ , allow_magic_readings(true)
+ , no_pass_origin(false)
+ , unsafe(false)
+ , ordered(false)
+ , show_end_tags(false)
+ , unicode_tags(false)
+ , unique_tags(false)
+ , dry_run(false)
+ , owns_grammar(false)
+ , input_eof(false)
+ , seen_barrier(false)
+ , is_conv(false)
+ , split_mappings(false)
+ , dep_has_spanned(false)
+ , dep_delimit(0)
+ , dep_original(false)
+ , dep_block_loops(true)
+ , dep_block_crossing(false)
+ , num_windows(2)
+ , soft_limit(300)
+ , hard_limit(500)
+ , verbosity_level(0)
+ , debug_level(0)
+ , section_max_count(0)
+ , has_dep(false)
+ , dep_highest_seen(0)
+ , gWindow(0)
+ , has_relations(false)
+ , grammar(0)
+ , ux_stderr(ux_err)
+ , filebase(0)
+ , numLines(0)
+ , numWindows(0)
+ , numCohorts(0)
+ , numReadings(0)
+ , did_index(false)
+ , numsections(0)
+ , ci_depths(6, 0)
+ , match_single(0)
+ , match_comp(0)
+ , match_sub(0)
+ , begintag(0)
+ , endtag(0)
+ , substtag(0)
+ , tag_begin(0)
+ , tag_end(0)
+ , tag_subst(0)
+ , par_left_tag(0)
+ , par_right_tag(0)
+ , par_left_pos(0)
+ , par_right_pos(0)
+ , did_final_enclosure(false)
+ , tmpl_cntx_pos(0)
+ , same_basic(0)
+ , target(0)
+ , mark(0)
+ , attach_to(0)
+ , current_rule(0)
+ , unif_tags(0)
+ , unif_last_wordform(0)
+ , unif_last_baseform(0)
+ , unif_last_textual(0)
+ , unif_sets(0)
+ , unif_sets_firstrun(false)
+ , statistics(false)
{
gWindow = new Window(this);
}
GrammarApplicator::~GrammarApplicator() {
Taguint32HashMap::iterator iter_stag;
- for (iter_stag = single_tags.begin() ; iter_stag != single_tags.end() ; ++iter_stag) {
+ for (iter_stag = single_tags.begin(); iter_stag != single_tags.end(); ++iter_stag) {
if (iter_stag->second && !(iter_stag->second->type & T_GRAMMAR)) {
delete iter_stag->second;
}
@@ -139,8 +146,10 @@ void GrammarApplicator::setGrammar(Grammar *res) {
single_tags = grammar->single_tags;
tag_begin = addTag(stringbits[S_BEGINTAG].getTerminatedBuffer());
tag_end = addTag(stringbits[S_ENDTAG].getTerminatedBuffer());
+ tag_subst = addTag(stringbits[S_IGNORE].getTerminatedBuffer());
begintag = tag_begin->hash;
endtag = tag_end->hash;
+ substtag = tag_subst->hash;
index_readingSet_yes.clear();
index_readingSet_yes.resize(grammar->sets_list.size());
@@ -155,7 +164,7 @@ void GrammarApplicator::index() {
if (!grammar->before_sections.empty()) {
uint32IntervalVector& m = runsections[-1];
- const_foreach (RuleVector, grammar->before_sections, iter_rules, iter_rules_end) {
+ foreach (iter_rules, grammar->before_sections) {
const Rule *r = *iter_rules;
m.insert(r->number);
}
@@ -163,7 +172,7 @@ void GrammarApplicator::index() {
if (!grammar->after_sections.empty()) {
uint32IntervalVector& m = runsections[-2];
- const_foreach (RuleVector, grammar->after_sections, iter_rules, iter_rules_end) {
+ foreach (iter_rules, grammar->after_sections) {
const Rule *r = *iter_rules;
m.insert(r->number);
}
@@ -171,7 +180,7 @@ void GrammarApplicator::index() {
if (!grammar->null_section.empty()) {
uint32IntervalVector& m = runsections[-3];
- const_foreach (RuleVector, grammar->null_section, iter_rules, iter_rules_end) {
+ foreach (iter_rules, grammar->null_section) {
const Rule *r = *iter_rules;
m.insert(r->number);
}
@@ -179,8 +188,8 @@ void GrammarApplicator::index() {
if (sections.empty()) {
int32_t smax = (int32_t)grammar->sections.size();
- for (int32_t i=0 ; i < smax ; i++) {
- const_foreach (RuleVector, grammar->rules, iter_rules, iter_rules_end) {
+ for (int32_t i = 0; i < smax; i++) {
+ foreach (iter_rules, grammar->rules) {
const Rule *r = *iter_rules;
if (r->section < 0 || r->section > i) {
continue;
@@ -192,11 +201,11 @@ void GrammarApplicator::index() {
}
else {
numsections = sections.size();
- for (uint32_t n=0 ; n<numsections ; n++) {
- for (uint32_t e=0 ; e<=n ; e++) {
- const_foreach (RuleVector, grammar->rules, iter_rules, iter_rules_end) {
+ for (uint32_t n = 0; n < numsections; n++) {
+ for (uint32_t e = 0; e <= n; e++) {
+ foreach (iter_rules, grammar->rules) {
const Rule *r = *iter_rules;
- if (r->section != (int32_t)sections[e]-1) {
+ if (r->section != (int32_t)sections[e] - 1) {
continue;
}
uint32IntervalVector& m = runsections[n];
@@ -208,7 +217,7 @@ void GrammarApplicator::index() {
if (!valid_rules.empty()) {
uint32IntervalVector vr;
- const_foreach (RuleVector, grammar->rule_by_number, iter, iter_end) {
+ foreach (iter, grammar->rule_by_number) {
if (valid_rules.contains((*iter)->line)) {
vr.push_back((*iter)->number);
}
@@ -216,6 +225,16 @@ void GrammarApplicator::index() {
valid_rules = vr;
}
+ const UChar local_utf_pattern[] = { ' ', '#', '%', 'u', '%', '0', '?', 'u', L'\u2192', '%', 'u', '%', '0', '?', 'u', 0 };
+ const UChar local_latin_pattern[] = { ' ', '#', '%', 'u', '%', '0', '?', 'u', '-', '>', '%', 'u', '%', '0', '?', 'u', 0 };
+
+ span_pattern_utf = local_utf_pattern;
+ span_pattern_latin = local_latin_pattern;
+
+ uint8_t w = static_cast<uint8_t>(floor(log10(hard_limit)) + 1);
+ span_pattern_utf[6] = span_pattern_utf[13] = '0' + w;
+ span_pattern_latin[6] = span_pattern_latin[14] = '0' + w;
+
did_index = true;
}
@@ -278,11 +297,11 @@ Tag *GrammarApplicator::addTag(const UChar *txt, bool vstr) {
bool reflow = false;
if ((tag->type & T_REGEXP) && tag->tag[0] != '"' && tag->tag[0] != '<') {
if (grammar->regex_tags.insert(tag->regexp).second) {
- foreach (Taguint32HashMap, single_tags, titer, titer_end) {
+ foreach (titer, single_tags) {
if (titer->second->type & T_TEXTUAL) {
continue;
}
- foreach (Grammar::regex_tags_t, grammar->regex_tags, iter, iter_end) {
+ foreach (iter, grammar->regex_tags) {
UErrorCode status = U_ZERO_ERROR;
uregex_setText(*iter, titer->second->tag.c_str(), titer->second->tag.length(), &status);
if (status == U_ZERO_ERROR) {
@@ -297,11 +316,11 @@ Tag *GrammarApplicator::addTag(const UChar *txt, bool vstr) {
}
if ((tag->type & T_CASE_INSENSITIVE) && tag->tag[0] != '"' && tag->tag[0] != '<') {
if (grammar->icase_tags.insert(tag).second) {
- foreach (Taguint32HashMap, single_tags, titer, titer_end) {
+ foreach (titer, single_tags) {
if (titer->second->type & T_TEXTUAL) {
continue;
}
- foreach (Grammar::icase_tags_t, grammar->icase_tags, iter, iter_end) {
+ foreach (iter, grammar->icase_tags) {
UErrorCode status = U_ZERO_ERROR;
if (u_strCaseCompare(titer->second->tag.c_str(), titer->second->tag.length(), (*iter)->tag.c_str(), (*iter)->tag.length(), U_FOLD_CASE_DEFAULT, &status) == 0) {
titer->second->type |= T_TEXTUAL;
@@ -331,14 +350,12 @@ void GrammarApplicator::printTrace(UFILE *output, uint32_t hit_by) {
if (hit_by < grammar->rule_by_number.size()) {
const Rule *r = grammar->rule_by_number[hit_by];
u_fprintf(output, "%S", keywords[r->type].getTerminatedBuffer());
- if (r->type == K_ADDRELATION || r->type == K_SETRELATION || r->type == K_REMRELATION
- || r->type == K_ADDRELATIONS || r->type == K_SETRELATIONS || r->type == K_REMRELATIONS
- ) {
- u_fprintf(output, "(%S", r->maplist->getNonEmpty().begin()->first->tag.c_str());
- if (r->type == K_ADDRELATIONS || r->type == K_SETRELATIONS || r->type == K_REMRELATIONS) {
- u_fprintf(output, ",%S", r->sublist->getNonEmpty().begin()->first->tag.c_str());
- }
- u_fprintf(output, ")");
+ if (r->type == K_ADDRELATION || r->type == K_SETRELATION || r->type == K_REMRELATION || r->type == K_ADDRELATIONS || r->type == K_SETRELATIONS || r->type == K_REMRELATIONS) {
+ u_fprintf(output, "(%S", r->maplist->getNonEmpty().begin()->first->tag.c_str());
+ if (r->type == K_ADDRELATIONS || r->type == K_SETRELATIONS || r->type == K_REMRELATIONS) {
+ u_fprintf(output, ",%S", r->sublist->getNonEmpty().begin()->first->tag.c_str());
+ }
+ u_fprintf(output, ")");
}
if (!trace_name_only || !r->name) {
u_fprintf(output, ":%u", r->line);
@@ -366,7 +383,7 @@ void GrammarApplicator::printReading(const Reading *reading, UFILE *output, size
u_fputc(';', output);
}
- for (size_t i=0 ; i<sub ; ++i) {
+ for (size_t i = 0; i < sub; ++i) {
u_fputc('\t', output);
}
@@ -375,7 +392,7 @@ void GrammarApplicator::printReading(const Reading *reading, UFILE *output, size
}
uint32SortedVector unique;
- const_foreach (Reading::tags_list_t, reading->tags_list, tter, tter_end) {
+ foreach (tter, reading->tags_list) {
if ((!show_end_tags && *tter == endtag) || *tter == begintag) {
continue;
}
@@ -404,7 +421,7 @@ void GrammarApplicator::printReading(const Reading *reading, UFILE *output, size
}
const Cohort *pr = 0;
pr = reading->parent;
- if (reading->parent->dep_parent != std::numeric_limits<uint32_t>::max()) {
+ if (reading->parent->dep_parent != DEP_NO_PARENT) {
if (reading->parent->dep_parent == 0) {
pr = reading->parent->parent->cohorts[0];
}
@@ -421,19 +438,27 @@ void GrammarApplicator::printReading(const Reading *reading, UFILE *output, size
}
if (!dep_has_spanned) {
u_fprintf_u(output, pattern,
- reading->parent->local_number,
- pr->local_number);
+ reading->parent->local_number,
+ pr->local_number);
}
else {
- if (reading->parent->dep_parent == std::numeric_limits<uint32_t>::max()) {
+ pattern = span_pattern_latin.c_str();
+ if (unicode_tags) {
+ pattern = span_pattern_utf.c_str();
+ }
+ if (reading->parent->dep_parent == DEP_NO_PARENT) {
u_fprintf_u(output, pattern,
- reading->parent->dep_self,
- reading->parent->dep_self);
+ reading->parent->parent->number,
+ reading->parent->local_number,
+ reading->parent->parent->number,
+ reading->parent->local_number);
}
else {
u_fprintf_u(output, pattern,
- reading->parent->dep_self,
- reading->parent->dep_parent);
+ reading->parent->parent->number,
+ reading->parent->local_number,
+ pr->parent->number,
+ pr->local_number);
}
}
}
@@ -441,7 +466,7 @@ void GrammarApplicator::printReading(const Reading *reading, UFILE *output, size
if (reading->parent->type & CT_RELATED) {
u_fprintf(output, " ID:%u", reading->parent->global_number);
if (!reading->parent->relations.empty()) {
- foreach (RelationCtn, reading->parent->relations, miter, miter_end) {
+ foreach (miter, reading->parent->relations) {
boost_foreach (uint32_t siter, miter->second) {
u_fprintf(output, " R:%S:%u", grammar->single_tags.find(miter->first)->second->tag.c_str(), siter);
}
@@ -450,7 +475,7 @@ void GrammarApplicator::printReading(const Reading *reading, UFILE *output, size
}
if (trace) {
- const_foreach (uint32Vector, reading->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, reading->hit_by) {
u_fputc(' ', output);
printTrace(output, *iter_hb);
}
@@ -460,12 +485,12 @@ void GrammarApplicator::printReading(const Reading *reading, UFILE *output, size
if (reading->next) {
reading->next->deleted = reading->deleted;
- printReading(reading->next, output, sub+1);
+ printReading(reading->next, output, sub + 1);
}
}
void GrammarApplicator::printCohort(Cohort *cohort, UFILE *output) {
- const UChar ws[] = {' ', '\t', 0};
+ const UChar ws[] = { ' ', '\t', 0 };
if (cohort->local_number == 0) {
goto removed;
@@ -480,7 +505,7 @@ void GrammarApplicator::printCohort(Cohort *cohort, UFILE *output) {
}
u_fprintf(output, "%S", cohort->wordform->tag.c_str());
if (cohort->wread) {
- const_foreach (Reading::tags_list_t, cohort->wread->tags_list, tter, tter_end) {
+ foreach (tter, cohort->wread->tags_list) {
if (*tter == cohort->wordform->hash) {
continue;
}
@@ -494,14 +519,14 @@ void GrammarApplicator::printCohort(Cohort *cohort, UFILE *output) {
mergeMappings(*cohort);
}
- foreach (ReadingList, cohort->readings, rter1, rter1_end) {
+ foreach (rter1, cohort->readings) {
printReading(*rter1, output);
}
if (trace && !trace_no_removed) {
- foreach (ReadingList, cohort->delayed, rter3, rter3_end) {
+ foreach (rter3, cohort->delayed) {
printReading(*rter3, output);
}
- foreach (ReadingList, cohort->deleted, rter2, rter2_end) {
+ foreach (rter2, cohort->deleted) {
printReading(*rter2, output);
}
}
@@ -509,12 +534,12 @@ void GrammarApplicator::printCohort(Cohort *cohort, UFILE *output) {
removed:
if (!cohort->text.empty() && cohort->text.find_first_not_of(ws) != UString::npos) {
u_fprintf(output, "%S", cohort->text.c_str());
- if (!ISNL(cohort->text[cohort->text.length()-1])) {
+ if (!ISNL(cohort->text[cohort->text.length() - 1])) {
u_fputc('\n', output);
}
}
- foreach (CohortVector, cohort->removed, iter, iter_end) {
+ foreach (iter, cohort->removed) {
printCohort(*iter, output);
}
}
@@ -539,13 +564,13 @@ void GrammarApplicator::printSingleWindow(SingleWindow *window, UFILE *output) {
if (!window->text.empty()) {
u_fprintf(output, "%S", window->text.c_str());
- if (!ISNL(window->text[window->text.length()-1])) {
+ if (!ISNL(window->text[window->text.length() - 1])) {
u_fputc('\n', output);
}
}
uint32_t cs = (uint32_t)window->cohorts.size();
- for (uint32_t c=0 ; c < cs ; c++) {
+ for (uint32_t c = 0; c < cs; c++) {
Cohort *cohort = window->cohorts[c];
printCohort(cohort, output);
}
@@ -575,7 +600,7 @@ void GrammarApplicator::pipeOutReading(const Reading *reading, std::ostream& out
}
uint32_t cs = 0;
- const_foreach (Reading::tags_list_t, reading->tags_list, tter, tter_end) {
+ foreach (tter, reading->tags_list) {
if (*tter == reading->baseform || *tter == reading->parent->wordform->hash) {
continue;
}
@@ -587,7 +612,7 @@ void GrammarApplicator::pipeOutReading(const Reading *reading, std::ostream& out
}
writeRaw(ss, cs);
- const_foreach (Reading::tags_list_t, reading->tags_list, tter, tter_end) {
+ foreach (tter, reading->tags_list) {
if (*tter == reading->baseform || *tter == reading->parent->wordform->hash) {
continue;
}
@@ -613,12 +638,12 @@ void GrammarApplicator::pipeOutCohort(const Cohort *cohort, std::ostream& output
if (!cohort->text.empty()) {
flags |= (1 << 0);
}
- if (has_dep && cohort->dep_parent != std::numeric_limits<uint32_t>::max()) {
+ if (has_dep && cohort->dep_parent != DEP_NO_PARENT) {
flags |= (1 << 1);
}
writeRaw(ss, flags);
- if (has_dep && cohort->dep_parent != std::numeric_limits<uint32_t>::max()) {
+ if (has_dep && cohort->dep_parent != DEP_NO_PARENT) {
writeRaw(ss, cohort->dep_parent);
}
@@ -626,7 +651,7 @@ void GrammarApplicator::pipeOutCohort(const Cohort *cohort, std::ostream& output
uint32_t cs = cohort->readings.size();
writeRaw(ss, cs);
- const_foreach (ReadingList, cohort->readings, rter1, rter1_end) {
+ foreach (rter1, cohort->readings) {
pipeOutReading(*rter1, ss);
}
if (!cohort->text.empty()) {
@@ -644,10 +669,10 @@ void GrammarApplicator::pipeOutSingleWindow(const SingleWindow& window, Process&
writeRaw(ss, window.number);
- uint32_t cs = (uint32_t)window.cohorts.size()-1;
+ uint32_t cs = (uint32_t)window.cohorts.size() - 1;
writeRaw(ss, cs);
- for (uint32_t c=1 ; c < cs+1 ; c++) {
+ for (uint32_t c = 1; c < cs + 1; c++) {
pipeOutCohort(window.cohorts[c], ss);
}
@@ -662,7 +687,9 @@ void GrammarApplicator::pipeOutSingleWindow(const SingleWindow& window, Process&
void GrammarApplicator::pipeInReading(Reading *reading, Process& input, bool force) {
uint32_t cs = 0;
readRaw(input, cs);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: reading packet length %u\n", cs);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: reading packet length %u\n", cs);
+ }
std::string buf(cs, 0);
input.read(&buf[0], cs);
@@ -670,7 +697,9 @@ void GrammarApplicator::pipeInReading(Reading *reading, Process& input, bool for
uint32_t flags = 0;
readRaw(ss, flags);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: reading flags %u\n", flags);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: reading flags %u\n", flags);
+ }
// Not marked modified, so don't bother with the heavy lifting...
if (!force && !(flags & (1 << 0))) {
@@ -686,7 +715,9 @@ void GrammarApplicator::pipeInReading(Reading *reading, Process& input, bool for
Tag *tag = addTag(str);
reading->baseform = tag->hash;
}
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: reading baseform %S\n", str.c_str());
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: reading baseform %S\n", str.c_str());
+ }
}
else {
reading->baseform = 0;
@@ -699,13 +730,17 @@ void GrammarApplicator::pipeInReading(Reading *reading, Process& input, bool for
}
readRaw(ss, cs);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: num tags %u\n", cs);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: num tags %u\n", cs);
+ }
- for (size_t i=0 ; i<cs ; ++i) {
+ for (size_t i = 0; i < cs; ++i) {
UString str = readUTF8String(ss);
Tag *tag = addTag(str);
reading->tags_list.push_back(tag->hash);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: tag %S\n", tag->tag.c_str());
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: tag %S\n", tag->tag.c_str());
+ }
}
reflowReading(*reading);
@@ -714,22 +749,30 @@ void GrammarApplicator::pipeInReading(Reading *reading, Process& input, bool for
void GrammarApplicator::pipeInCohort(Cohort *cohort, Process& input) {
uint32_t cs = 0;
readRaw(input, cs);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: cohort packet length %u\n", cs);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: cohort packet length %u\n", cs);
+ }
readRaw(input, cs);
if (cs != cohort->global_number) {
u_fprintf(ux_stderr, "Error: External returned data for cohort %u but we expected cohort %u!\n", cs, cohort->global_number);
CG3Quit(1);
}
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: cohort number %u\n", cohort->global_number);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: cohort number %u\n", cohort->global_number);
+ }
uint32_t flags = 0;
readRaw(input, flags);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: cohort flags %u\n", flags);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: cohort flags %u\n", flags);
+ }
if (flags & (1 << 1)) {
readRaw(input, cohort->dep_parent);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: cohort parent %u\n", cohort->dep_parent);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: cohort parent %u\n", cohort->dep_parent);
+ }
}
bool force_readings = false;
@@ -738,25 +781,33 @@ void GrammarApplicator::pipeInCohort(Cohort *cohort, Process& input) {
Tag *tag = addTag(str);
cohort->wordform = tag;
force_readings = true;
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: cohort wordform %S\n", tag->tag.c_str());
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: cohort wordform %S\n", tag->tag.c_str());
+ }
}
readRaw(input, cs);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: num readings %u\n", cs);
- for (size_t i=0 ; i<cs ; ++i) {
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: num readings %u\n", cs);
+ }
+ for (size_t i = 0; i < cs; ++i) {
pipeInReading(cohort->readings[i], input, force_readings);
}
if (flags & (1 << 0)) {
cohort->text = readUTF8String(input);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: cohort text %S\n", cohort->text.c_str());
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: cohort text %S\n", cohort->text.c_str());
+ }
}
}
void GrammarApplicator::pipeInSingleWindow(SingleWindow& window, Process& input) {
uint32_t cs = 0;
readRaw(input, cs);
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: window packet length %u\n", cs);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: window packet length %u\n", cs);
+ }
if (cs == 0) {
return;
@@ -767,36 +818,61 @@ void GrammarApplicator::pipeInSingleWindow(SingleWindow& window, Process& input)
u_fprintf(ux_stderr, "Error: External returned data for window %u but we expected window %u!\n", cs, window.number);
CG3Quit(1);
}
- if (debug_level > 1) u_fprintf(ux_stderr, "DEBUG: window number %u\n", window.number);
+ if (debug_level > 1) {
+ u_fprintf(ux_stderr, "DEBUG: window number %u\n", window.number);
+ }
readRaw(input, cs);
- for (size_t i=0 ; i<cs ; ++i) {
- pipeInCohort(window.cohorts[i+1], input);
+ for (size_t i = 0; i < cs; ++i) {
+ pipeInCohort(window.cohorts[i + 1], input);
}
}
void GrammarApplicator::error(const char *str, const UChar *p) {
(void)p;
- UChar buf[] = { L'R', L'U', L'N', L'T', L'I', L'M', L'E', 0 };
- u_fprintf(ux_stderr, str, buf, 0, buf);
+ if (current_rule && current_rule->line) {
+ const UChar buf[] = { 'R', 'T', ' ', 'R', 'U', 'L', 'E', 0 };
+ u_fprintf(ux_stderr, str, buf, current_rule->line, buf);
+ }
+ else {
+ const UChar buf[] = { 'R', 'T', ' ', 'I', 'N', 'P', 'U', 'T', 0 };
+ u_fprintf(ux_stderr, str, buf, numLines, buf);
+ }
}
void GrammarApplicator::error(const char *str, const char *s, const UChar *p) {
(void)p;
- UChar buf[] = { L'R', L'U', L'N', L'T', L'I', L'M', L'E', 0 };
- u_fprintf(ux_stderr, str, buf, s, 0, buf);
+ if (current_rule && current_rule->line) {
+ const UChar buf[] = { 'R', 'T', ' ', 'R', 'U', 'L', 'E', 0 };
+ u_fprintf(ux_stderr, str, buf, s, current_rule->line, buf);
+ }
+ else {
+ const UChar buf[] = { 'R', 'T', ' ', 'I', 'N', 'P', 'U', 'T', 0 };
+ u_fprintf(ux_stderr, str, buf, s, numLines, buf);
+ }
}
void GrammarApplicator::error(const char *str, const UChar *s, const UChar *p) {
(void)p;
- UChar buf[] = { L'R', L'U', L'N', L'T', L'I', L'M', L'E', 0 };
- u_fprintf(ux_stderr, str, buf, s, 0, buf);
+ if (current_rule && current_rule->line) {
+ const UChar buf[] = { 'R', 'T', ' ', 'R', 'U', 'L', 'E', 0 };
+ u_fprintf(ux_stderr, str, buf, s, current_rule->line, buf);
+ }
+ else {
+ const UChar buf[] = { 'R', 'T', ' ', 'I', 'N', 'P', 'U', 'T', 0 };
+ u_fprintf(ux_stderr, str, buf, s, numLines, buf);
+ }
}
void GrammarApplicator::error(const char *str, const char *s, const UChar *S, const UChar *p) {
(void)p;
- UChar buf[] = { L'R', L'U', L'N', L'T', L'I', L'M', L'E', 0 };
- u_fprintf(ux_stderr, str, buf, s, S, 0, buf);
+ if (current_rule && current_rule->line) {
+ const UChar buf[] = { 'R', 'T', ' ', 'R', 'U', 'L', 'E', 0 };
+ u_fprintf(ux_stderr, str, buf, s, S, current_rule->line, buf);
+ }
+ else {
+ const UChar buf[] = { 'R', 'T', ' ', 'I', 'N', 'P', 'U', 'T', 0 };
+ u_fprintf(ux_stderr, str, buf, s, S, numLines, buf);
+ }
}
-
}
diff --git a/src/GrammarApplicator.hpp b/src/GrammarApplicator.hpp
index c4f51b3..faf4b5f 100644
--- a/src/GrammarApplicator.hpp
+++ b/src/GrammarApplicator.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -31,257 +31,283 @@
#include "interval_vector.hpp"
#include "flat_unordered_set.hpp"
#include "istream.hpp"
+#include "scoped_stack.hpp"
#include <deque>
class Process;
namespace CG3 {
- class Window;
- class Grammar;
- class Reading;
- class SingleWindow;
- class Cohort;
- class ContextualTest;
- class Set;
- class Rule;
-
- typedef std::vector<UnicodeString> regexgrps_t;
-
- struct dSMC_Context {
+class Window;
+class Grammar;
+class Reading;
+class SingleWindow;
+class Cohort;
+class ContextualTest;
+class Set;
+class Rule;
+
+typedef std::vector<UnicodeString> regexgrps_t;
+
+struct dSMC_Context {
+ const ContextualTest *test;
+ Cohort **deep;
+ Cohort *origin;
+ uint64_t options;
+ bool did_test;
+ bool matched_target;
+ bool matched_tests;
+ bool in_barrier;
+};
+
+class GrammarApplicator {
+public:
+ bool always_span;
+ bool apply_mappings;
+ bool apply_corrections;
+ bool no_before_sections;
+ bool no_sections;
+ bool no_after_sections;
+ bool trace;
+ bool trace_name_only;
+ bool trace_no_removed;
+ bool trace_encl;
+ bool allow_magic_readings;
+ bool no_pass_origin;
+ bool unsafe;
+ bool ordered;
+ bool show_end_tags;
+ bool unicode_tags;
+ bool unique_tags;
+ bool dry_run;
+ bool owns_grammar;
+ bool input_eof;
+ bool seen_barrier;
+ bool is_conv;
+ bool split_mappings;
+
+ bool dep_has_spanned;
+ uint32_t dep_delimit;
+ bool dep_original;
+ bool dep_block_loops;
+ bool dep_block_crossing;
+
+ uint32_t num_windows;
+ uint32_t soft_limit;
+ uint32_t hard_limit;
+ uint32Vector sections;
+ uint32IntervalVector valid_rules;
+ uint32FlatHashMap variables;
+ uint32_t verbosity_level;
+ uint32_t debug_level;
+ uint32_t section_max_count;
+
+ GrammarApplicator(UFILE *ux_err);
+ virtual ~GrammarApplicator();
+
+ void enableStatistics();
+ void disableStatistics();
+
+ void setGrammar(Grammar *res);
+ void index();
+
+ virtual void runGrammarOnText(istream& input, UFILE *output);
+
+ bool has_dep;
+ uint32_t dep_highest_seen;
+ Window *gWindow;
+ void reflowDependencyWindow(uint32_t max = 0);
+
+ bool has_relations;
+ void reflowRelationWindow(uint32_t max = 0);
+
+ Grammar *grammar;
+
+ // Moved these public to help the library API
+ Tag *addTag(Tag *tag);
+ Tag *addTag(const UChar *tag, bool vstr = false);
+ Tag *addTag(const UString& txt, bool vstr = false);
+ void initEmptySingleWindow(SingleWindow *cSWindow);
+ uint32_t addTagToReading(Reading& reading, uint32_t tag, bool rehash = true);
+ uint32_t addTagToReading(Reading& reading, Tag *tag, bool rehash = true);
+ void runGrammarOnWindow();
+
+ typedef std::map<Reading*, TagList> all_mappings_t;
+ void splitMappings(TagList& mappings, Cohort& cohort, Reading& reading, bool mapped = false);
+ void splitAllMappings(all_mappings_t& all_mappings, Cohort& cohort, bool mapped = false);
+ Taguint32HashMap single_tags;
+
+ UFILE *ux_stderr;
+ UChar *filebase;
+ void error(const char *str, const UChar *p);
+ void error(const char *str, const char *s, const UChar *p);
+ void error(const char *str, const UChar *s, const UChar *p);
+ void error(const char *str, const char *s, const UChar *S, const UChar *p);
+ Grammar *get_grammar() { return grammar; }
+
+protected:
+ void printTrace(UFILE *output, uint32_t hit_by);
+ void printReading(const Reading *reading, UFILE *output, size_t sub = 1);
+ void printCohort(Cohort *cohort, UFILE *output);
+ virtual void printSingleWindow(SingleWindow *window, UFILE *output);
+
+ void pipeOutReading(const Reading *reading, std::ostream& output);
+ void pipeOutCohort(const Cohort *cohort, std::ostream& output);
+ void pipeOutSingleWindow(const SingleWindow& window, Process& output);
+
+ void pipeInReading(Reading *reading, Process& input, bool force = false);
+ void pipeInCohort(Cohort *cohort, Process& input);
+ void pipeInSingleWindow(SingleWindow& window, Process& input);
+
+ UString span_pattern_latin;
+ UString span_pattern_utf;
+
+ uint32_t numLines;
+ uint32_t numWindows;
+ uint32_t numCohorts;
+ uint32_t numReadings;
+
+ bool did_index;
+ uint32SortedVector dep_deep_seen;
+
+ uint32_t numsections;
+ typedef std::map<int32_t, uint32IntervalVector> RSType;
+ RSType runsections;
+
+ typedef std::map<uint32_t, Process> externals_t;
+ externals_t externals;
+
+ uint32Vector ci_depths;
+ std::map<uint32_t, CohortIterator> cohortIterators;
+ std::map<uint32_t, TopologyLeftIter> topologyLeftIters;
+ std::map<uint32_t, TopologyRightIter> topologyRightIters;
+ std::map<uint32_t, DepParentIter> depParentIters;
+ std::map<uint32_t, DepDescendentIter> depDescendentIters;
+ std::map<uint32_t, DepAncestorIter> depAncestorIters;
+
+ uint32_t match_single, match_comp, match_sub;
+ uint32_t begintag, endtag, substtag;
+ Tag *tag_begin, *tag_end, *tag_subst;
+ uint32_t par_left_tag, par_right_tag;
+ uint32_t par_left_pos, par_right_pos;
+ bool did_final_enclosure;
+
+ struct tmpl_context_t {
+ Cohort *min;
+ Cohort *max;
const ContextualTest *test;
- Cohort **deep;
- Cohort *origin;
- uint64_t options;
- bool did_test;
- bool matched_target;
- bool matched_tests;
- };
- class GrammarApplicator {
- public:
- bool always_span;
- bool apply_mappings;
- bool apply_corrections;
- bool no_before_sections;
- bool no_sections;
- bool no_after_sections;
- bool trace;
- bool trace_name_only;
- bool trace_no_removed;
- bool trace_encl;
- bool allow_magic_readings;
- bool no_pass_origin;
- bool unsafe;
- bool ordered;
- bool show_end_tags;
- bool unicode_tags;
- bool unique_tags;
- bool dry_run;
- bool owns_grammar;
- bool input_eof;
- bool seen_barrier;
- bool is_conv;
- bool split_mappings;
-
- bool dep_has_spanned;
- uint32_t dep_delimit;
- bool dep_original;
- bool dep_block_loops;
- bool dep_block_crossing;
-
- uint32_t num_windows;
- uint32_t soft_limit;
- uint32_t hard_limit;
- uint32Vector sections;
- uint32IntervalVector valid_rules;
- uint32FlatHashMap variables;
- uint32_t verbosity_level;
- uint32_t debug_level;
- uint32_t section_max_count;
-
- GrammarApplicator(UFILE *ux_err);
- virtual ~GrammarApplicator();
-
- void enableStatistics();
- void disableStatistics();
-
- void setGrammar(Grammar *res);
- void index();
-
- virtual void runGrammarOnText(istream& input, UFILE *output);
-
- bool has_dep;
- uint32_t dep_highest_seen;
- Window *gWindow;
- void reflowDependencyWindow(uint32_t max = 0);
-
- bool has_relations;
- void reflowRelationWindow(uint32_t max = 0);
-
- Grammar *grammar;
-
- // Moved these public to help the library API
- Tag *addTag(Tag *tag);
- Tag *addTag(const UChar *tag, bool vstr = false);
- Tag *addTag(const UString& txt, bool vstr = false);
- void initEmptySingleWindow(SingleWindow *cSWindow);
- uint32_t addTagToReading(Reading& reading, uint32_t tag, bool rehash = true);
- uint32_t addTagToReading(Reading& reading, Tag *tag, bool rehash = true);
- void runGrammarOnWindow();
-
- typedef std::map<Reading*, TagList> all_mappings_t;
- void splitMappings(TagList& mappings, Cohort& cohort, Reading& reading, bool mapped = false);
- void splitAllMappings(all_mappings_t& all_mappings, Cohort& cohort, bool mapped = false);
- Taguint32HashMap single_tags;
-
- UFILE *ux_stderr;
- UChar *filebase;
- void error(const char *str, const UChar *p);
- void error(const char *str, const char *s, const UChar *p);
- void error(const char *str, const UChar *s, const UChar *p);
- void error(const char *str, const char *s, const UChar *S, const UChar *p);
- Grammar *get_grammar() { return grammar; }
-
- protected:
- void printTrace(UFILE *output, uint32_t hit_by);
- void printReading(const Reading *reading, UFILE *output, size_t sub=1);
- void printCohort(Cohort *cohort, UFILE *output);
- virtual void printSingleWindow(SingleWindow *window, UFILE *output);
-
- void pipeOutReading(const Reading *reading, std::ostream& output);
- void pipeOutCohort(const Cohort *cohort, std::ostream& output);
- void pipeOutSingleWindow(const SingleWindow& window, Process& output);
-
- void pipeInReading(Reading *reading, Process& input, bool force = false);
- void pipeInCohort(Cohort *cohort, Process& input);
- void pipeInSingleWindow(SingleWindow& window, Process& input);
-
- uint32_t numLines;
- uint32_t numWindows;
- uint32_t numCohorts;
- uint32_t numReadings;
-
- bool did_index;
- uint32SortedVector dep_deep_seen;
-
- uint32_t numsections;
- typedef std::map<int32_t,uint32IntervalVector> RSType;
- RSType runsections;
-
- typedef std::map<uint32_t,Process> externals_t;
- externals_t externals;
-
- uint32Vector ci_depths;
- std::map<uint32_t,CohortIterator> cohortIterators;
- std::map<uint32_t,TopologyLeftIter> topologyLeftIters;
- std::map<uint32_t,TopologyRightIter> topologyRightIters;
- std::map<uint32_t,DepParentIter> depParentIters;
- std::map<uint32_t,DepDescendentIter> depDescendentIters;
- std::map<uint32_t,DepAncestorIter> depAncestorIters;
-
- uint32_t match_single, match_comp, match_sub;
- uint32_t begintag, endtag;
- Tag *tag_begin, *tag_end;
- uint32_t par_left_tag, par_right_tag;
- uint32_t par_left_pos, par_right_pos;
- bool did_final_enclosure;
-
- std::vector<regexgrps_t> regexgrps_store;
- std::pair<uint8_t, regexgrps_t*> regexgrps;
- bc::flat_map<uint32_t, uint8_t> regexgrps_z;
- bc::flat_map<uint32_t, regexgrps_t*> regexgrps_c;
- uint32_t same_basic;
- Cohort *target;
- Cohort *mark;
- Cohort *attach_to;
- Rule *current_rule;
-
- typedef bc::flat_map<uint32_t,Reading*> readings_plain_t;
- readings_plain_t readings_plain;
-
- typedef bc::flat_map<uint32_t, const void*> unif_tags_t;
- bc::flat_map<uint32_t,unif_tags_t*> unif_tags_rs;
- std::vector<unif_tags_t> unif_tags_store;
- bc::flat_map<uint32_t, uint32SortedVector*> unif_sets_rs;
- std::vector<uint32SortedVector> unif_sets_store;
- unif_tags_t *unif_tags;
- uint32_t unif_last_wordform;
- uint32_t unif_last_baseform;
- uint32_t unif_last_textual;
- uint32SortedVector *unif_sets;
- bool unif_sets_firstrun;
-
- uint32FlatHashSet index_regexp_yes;
- uint32FlatHashSet index_regexp_no;
- uint32FlatHashSet index_icase_yes;
- uint32FlatHashSet index_icase_no;
- std::vector<uint32FlatHashSet> index_readingSet_yes;
- std::vector<uint32FlatHashSet> index_readingSet_no;
- uint32FlatHashSet index_ruleCohort_no;
- void resetIndexes();
-
- Tag *makeBaseFromWord(uint32_t tag);
- Tag *makeBaseFromWord(Tag *tag);
-
- bool updateRuleToCohorts(Cohort& c, const uint32_t& rsit);
- void indexSingleWindow(SingleWindow& current);
- uint32_t runGrammarOnSingleWindow(SingleWindow& current);
- bool updateValidRules(const uint32IntervalVector& rules, uint32IntervalVector& intersects, const uint32_t& hash, Reading& reading);
- uint32_t runRulesOnSingleWindow(SingleWindow& current, const uint32IntervalVector& rules);
-
- enum ST_RETVALS {
- TRV_BREAK = (1 << 0),
- TRV_BARRIER = (1 << 1)
- };
- Cohort *runSingleTest(Cohort *cohort, const ContextualTest *test, uint8_t& rvs, bool *retval, Cohort **deep = 0, Cohort *origin = 0);
- Cohort *runSingleTest(SingleWindow *sWindow, size_t i, const ContextualTest *test, uint8_t& rvs, bool *retval, Cohort **deep = 0, Cohort *origin = 0);
- Cohort *runContextualTest(SingleWindow *sWindow, size_t position, const ContextualTest *test, Cohort **deep = 0, Cohort *origin = 0);
- Cohort *runDependencyTest(SingleWindow *sWindow, Cohort *current, const ContextualTest *test, Cohort **deep = 0, Cohort *origin = 0, const Cohort *self = 0);
- Cohort *runParenthesisTest(SingleWindow *sWindow, const Cohort *current, const ContextualTest *test, Cohort **deep = 0, Cohort *origin = 0);
- Cohort *runRelationTest(SingleWindow *sWindow, Cohort *current, const ContextualTest *test, Cohort **deep = 0, Cohort *origin = 0);
-
- bool doesWordformsMatch(const Tag *cword, const Tag *rword);
- uint32_t doesTagMatchRegexp(uint32_t test, const Tag& tag, bool bypass_index = false);
- uint32_t doesTagMatchIcase(uint32_t test, const Tag& tag, bool bypass_index = false);
- uint32_t doesRegexpMatchReading(const Reading& reading, const Tag& tag, bool bypass_index = false);
- uint32_t doesTagMatchReading(const Reading& reading, const Tag& tag, bool unif_mode = false, bool bypass_index = false);
- bool doesSetMatchReading_trie(const Reading& reading, const Set& theset, const trie_t& trie, bool unif_mode = false);
- bool doesSetMatchReading_tags(const Reading& reading, const Set& theset, bool unif_mode = false);
- bool doesSetMatchReading(const Reading& reading, const uint32_t set, bool bypass_index = false, bool unif_mode = false);
-
- inline bool doesSetMatchCohort_testLinked(Cohort& cohort, const Set& theset, dSMC_Context *context = 0);
- inline bool doesSetMatchCohort_helper(Cohort& cohort, const Reading& reading, const Set& theset, dSMC_Context *context = 0);
- bool doesSetMatchCohortNormal(Cohort& cohort, const uint32_t set, dSMC_Context *context = 0);
- bool doesSetMatchCohortCareful(Cohort& cohort, const uint32_t set, dSMC_Context *context = 0);
-
- bool statistics;
- ticks gtimer;
-
- Cohort *delimitAt(SingleWindow& current, Cohort *cohort);
- void reflowReading(Reading& reading);
- Tag *generateVarstringTag(const Tag *tag);
- void delTagFromReading(Reading& reading, uint32_t tag);
- void delTagFromReading(Reading& reading, Tag *tag);
- bool unmapReading(Reading& reading, const uint32_t rule);
- TagList getTagList(const Set& theSet, bool unif_mode = false) const;
- void getTagList(const Set& theSet, TagList& theTags, bool unif_mode = false) const;
- void mergeReadings(ReadingList& readings);
- void mergeMappings(Cohort& cohort);
- bool isChildOf(const Cohort *child, const Cohort *parent);
- bool wouldParentChildLoop(const Cohort *parent, const Cohort *child);
- bool wouldParentChildCross(const Cohort *parent, const Cohort *child);
- bool attachParentChild(Cohort& parent, Cohort& child, bool allowloop = false, bool allowcrossing = false);
-
- void reflowTextuals_Reading(Reading& r);
- void reflowTextuals_Cohort(Cohort& c);
- void reflowTextuals_SingleWindow(SingleWindow& sw);
- void reflowTextuals();
-
- Reading *initEmptyCohort(Cohort& cohort);
-
- std::deque<Reading> subs_any;
- Reading *get_sub_reading(Reading *tr, int sub_reading);
+ tmpl_context_t(const ContextualTest *test)
+ : min(0)
+ , max(0)
+ , test(test)
+ {
+ }
+ };
+ std::vector<tmpl_context_t> tmpl_cntxs;
+ size_t tmpl_cntx_pos;
+
+ std::vector<regexgrps_t> regexgrps_store;
+ std::pair<uint8_t, regexgrps_t*> regexgrps;
+ bc::flat_map<uint32_t, uint8_t> regexgrps_z;
+ bc::flat_map<uint32_t, regexgrps_t*> regexgrps_c;
+ uint32_t same_basic;
+ Cohort *target;
+ Cohort *mark;
+ Cohort *attach_to;
+ Rule *current_rule;
+
+ typedef bc::flat_map<uint32_t, Reading*> readings_plain_t;
+ readings_plain_t readings_plain;
+
+ typedef bc::flat_map<uint32_t, const void*> unif_tags_t;
+ bc::flat_map<uint32_t, unif_tags_t*> unif_tags_rs;
+ std::vector<unif_tags_t> unif_tags_store;
+ bc::flat_map<uint32_t, uint32SortedVector*> unif_sets_rs;
+ std::vector<uint32SortedVector> unif_sets_store;
+ unif_tags_t *unif_tags;
+ uint32_t unif_last_wordform;
+ uint32_t unif_last_baseform;
+ uint32_t unif_last_textual;
+ uint32SortedVector *unif_sets;
+ bool unif_sets_firstrun;
+
+ scoped_stack<TagList> ss_taglist;
+ scoped_stack<unif_tags_t> ss_utags;
+ scoped_stack<uint32SortedVector> ss_u32sv;
+
+ uint32FlatHashSet index_regexp_yes;
+ uint32FlatHashSet index_regexp_no;
+ uint32FlatHashSet index_icase_yes;
+ uint32FlatHashSet index_icase_no;
+ std::vector<uint32FlatHashSet> index_readingSet_yes;
+ std::vector<uint32FlatHashSet> index_readingSet_no;
+ uint32FlatHashSet index_ruleCohort_no;
+ void resetIndexes();
+
+ Tag *makeBaseFromWord(uint32_t tag);
+ Tag *makeBaseFromWord(Tag *tag);
+
+ bool updateRuleToCohorts(Cohort& c, const uint32_t& rsit);
+ void indexSingleWindow(SingleWindow& current);
+ uint32_t runGrammarOnSingleWindow(SingleWindow& current);
+ bool updateValidRules(const uint32IntervalVector& rules, uint32IntervalVector& intersects, const uint32_t& hash, Reading& reading);
+ uint32_t runRulesOnSingleWindow(SingleWindow& current, const uint32IntervalVector& rules);
+
+ enum ST_RETVALS {
+ TRV_BREAK = (1 << 0),
+ TRV_BARRIER = (1 << 1),
};
+ Cohort *runSingleTest(Cohort *cohort, const ContextualTest *test, uint8_t& rvs, bool *retval, Cohort **deep = 0, Cohort *origin = 0);
+ Cohort *runSingleTest(SingleWindow *sWindow, size_t i, const ContextualTest *test, uint8_t& rvs, bool *retval, Cohort **deep = 0, Cohort *origin = 0);
+ bool posOutputHelper(const SingleWindow *sWindow, uint32_t position, const ContextualTest *test, const Cohort *cohort, const Cohort *cdeep);
+ Cohort *runContextualTest_tmpl(SingleWindow *sWindow, size_t position, const ContextualTest *test, ContextualTest *tmpl, Cohort *& cdeep, Cohort *origin);
+ Cohort *runContextualTest(SingleWindow *sWindow, size_t position, const ContextualTest *test, Cohort **deep = 0, Cohort *origin = 0);
+ Cohort *runDependencyTest(SingleWindow *sWindow, Cohort *current, const ContextualTest *test, Cohort **deep = 0, Cohort *origin = 0, const Cohort *self = 0);
+ Cohort *runParenthesisTest(SingleWindow *sWindow, const Cohort *current, const ContextualTest *test, Cohort **deep = 0, Cohort *origin = 0);
+ Cohort *runRelationTest(SingleWindow *sWindow, Cohort *current, const ContextualTest *test, Cohort **deep = 0, Cohort *origin = 0);
+
+ bool doesWordformsMatch(const Tag *cword, const Tag *rword);
+ uint32_t doesTagMatchRegexp(uint32_t test, const Tag& tag, bool bypass_index = false);
+ uint32_t doesTagMatchIcase(uint32_t test, const Tag& tag, bool bypass_index = false);
+ uint32_t doesRegexpMatchReading(const Reading& reading, const Tag& tag, bool bypass_index = false);
+ uint32_t doesTagMatchReading(const Reading& reading, const Tag& tag, bool unif_mode = false, bool bypass_index = false);
+ bool doesSetMatchReading_trie(const Reading& reading, const Set& theset, const trie_t& trie, bool unif_mode = false);
+ bool doesSetMatchReading_tags(const Reading& reading, const Set& theset, bool unif_mode = false);
+ bool doesSetMatchReading(const Reading& reading, const uint32_t set, bool bypass_index = false, bool unif_mode = false);
+
+ inline bool doesSetMatchCohort_testLinked(Cohort& cohort, const Set& theset, dSMC_Context *context = 0);
+ inline bool doesSetMatchCohort_helper(Cohort& cohort, Reading& reading, const Set& theset, dSMC_Context *context = 0);
+ bool doesSetMatchCohortNormal(Cohort& cohort, const uint32_t set, dSMC_Context *context = 0);
+ bool doesSetMatchCohortCareful(Cohort& cohort, const uint32_t set, dSMC_Context *context = 0);
+
+ bool statistics;
+ ticks gtimer;
+
+ Cohort *delimitAt(SingleWindow& current, Cohort *cohort);
+ void reflowReading(Reading& reading);
+ Tag *generateVarstringTag(const Tag *tag);
+ void delTagFromReading(Reading& reading, uint32_t tag);
+ void delTagFromReading(Reading& reading, Tag *tag);
+ bool unmapReading(Reading& reading, const uint32_t rule);
+ TagList getTagList(const Set& theSet, bool unif_mode = false) const;
+ void getTagList(const Set& theSet, TagList& theTags, bool unif_mode = false) const;
+ void mergeReadings(ReadingList& readings);
+ void mergeMappings(Cohort& cohort);
+ bool isChildOf(const Cohort *child, const Cohort *parent);
+ bool wouldParentChildLoop(const Cohort *parent, const Cohort *child);
+ bool wouldParentChildCross(const Cohort *parent, const Cohort *child);
+ bool attachParentChild(Cohort& parent, Cohort& child, bool allowloop = false, bool allowcrossing = false);
+
+ void reflowTextuals_Reading(Reading& r);
+ void reflowTextuals_Cohort(Cohort& c);
+ void reflowTextuals_SingleWindow(SingleWindow& sw);
+ void reflowTextuals();
+
+ Reading *initEmptyCohort(Cohort& cohort);
+
+ std::deque<Reading> subs_any;
+ Reading *get_sub_reading(Reading *tr, int sub_reading);
+};
}
#endif
diff --git a/src/GrammarApplicator_matchSet.cpp b/src/GrammarApplicator_matchSet.cpp
index 0ef0572..2905fe9 100644
--- a/src/GrammarApplicator_matchSet.cpp
+++ b/src/GrammarApplicator_matchSet.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -78,7 +78,7 @@ uint32_t GrammarApplicator::doesTagMatchRegexp(uint32_t test, const Tag& tag, bo
UErrorCode status = U_ZERO_ERROR;
uregex_setText(tag.regexp, itag.tag.c_str(), itag.tag.length(), &status);
if (status != U_ZERO_ERROR) {
- u_fprintf(ux_stderr, "Error: uregex_setText(MatchSet) returned %s - cannot continue!\n", u_errorName(status));
+ u_fprintf(ux_stderr, "Error: uregex_setText(MatchSet) returned %s for tag %S before input line %u - cannot continue!\n", u_errorName(status), tag.tag.c_str(), numLines);
CG3Quit(1);
}
status = U_ZERO_ERROR;
@@ -86,7 +86,7 @@ uint32_t GrammarApplicator::doesTagMatchRegexp(uint32_t test, const Tag& tag, bo
match = itag.hash;
}
if (status != U_ZERO_ERROR) {
- u_fprintf(ux_stderr, "Error: uregex_find(MatchSet) returned %s - cannot continue!\n", u_errorName(status));
+ u_fprintf(ux_stderr, "Error: uregex_find(MatchSet) returned %s for tag %S before input line %u - cannot continue!\n", u_errorName(status), tag.tag.c_str(), numLines);
CG3Quit(1);
}
if (match) {
@@ -96,7 +96,7 @@ uint32_t GrammarApplicator::doesTagMatchRegexp(uint32_t test, const Tag& tag, bo
for (int i = 1; i <= gc; ++i) {
tmp[0] = 0;
int32_t len = uregex_group(tag.regexp, i, tmp, 1024, &status);
- regexgrps.second->resize(std::max(static_cast<size_t>(regexgrps.first)+1, regexgrps.second->size()));
+ regexgrps.second->resize(std::max(static_cast<size_t>(regexgrps.first) + 1, regexgrps.second->size()));
UnicodeString& ucstr = (*regexgrps.second)[regexgrps.first];
ucstr.remove();
ucstr.append(tmp, len);
@@ -153,7 +153,7 @@ uint32_t GrammarApplicator::doesRegexpMatchReading(const Reading& reading, const
uint32_t match = 0;
// Grammar::reindex() will do a one-time pass to mark any potential matching tag as T_TEXTUAL
- const_foreach (uint32SortedVector, reading.tags_textual, mter, mter_end) {
+ foreach (mter, reading.tags_textual) {
match = doesTagMatchRegexp(*mter, tag, bypass_index);
if (match) {
break;
@@ -205,7 +205,7 @@ uint32_t GrammarApplicator::doesTagMatchReading(const Reading& reading, const Ta
match = doesRegexpMatchReading(reading, tag, bypass_index);
}
else if (tag.type & T_CASE_INSENSITIVE) {
- const_foreach (uint32SortedVector, reading.tags_textual, mter, mter_end) {
+ foreach (mter, reading.tags_textual) {
match = doesTagMatchIcase(*mter, tag, bypass_index);
if (match) {
break;
@@ -240,9 +240,9 @@ uint32_t GrammarApplicator::doesTagMatchReading(const Reading& reading, const Ta
}
}
else {
- const_foreach (uint32SortedVector, reading.tags_textual, mter, mter_end) {
+ foreach (mter, reading.tags_textual) {
const Tag& itag = *(single_tags.find(*mter)->second);
- if (!(itag.type & (T_BASEFORM|T_WORDFORM))) {
+ if (!(itag.type & (T_BASEFORM | T_WORDFORM))) {
match = itag.hash;
if (unif_mode) {
if (unif_last_textual) {
@@ -567,7 +567,7 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
}
// If there are no sub-sets, it must be a LIST set.
else if (theset.sets.empty()) {
- retval = doesSetMatchReading_tags(reading, theset, ((theset.type & ST_TAG_UNIFY)!=0)|unif_mode);
+ retval = doesSetMatchReading_tags(reading, theset, ((theset.type & ST_TAG_UNIFY) != 0) | unif_mode);
}
// &&unified sets
else if (theset.type & ST_SET_UNIFY) {
@@ -576,9 +576,9 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
if (unif_sets_firstrun) {
const Set& uset = *grammar->sets_list[theset.sets[0]];
const size_t size = uset.sets.size();
- for (size_t i=0;i<size;++i) {
+ for (size_t i = 0; i < size; ++i) {
const Set& tset = *grammar->sets_list[uset.sets[i]];
- if (doesSetMatchReading(reading, tset.number, bypass_index, ((theset.type & ST_TAG_UNIFY)!=0)|unif_mode)) {
+ if (doesSetMatchReading(reading, tset.number, bypass_index, ((theset.type & ST_TAG_UNIFY) != 0) | unif_mode)) {
unif_sets->insert(tset.number);
}
}
@@ -587,55 +587,54 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
}
// Subsequent times, test whether any of the previously stored sets match the reading
else {
- static uint32SortedVector sets;
- sets.clear();
- foreach(uint32SortedVector, *unif_sets, usi, usi_end) {
+ BOOST_AUTO(sets, ss_u32sv.get());
+ foreach (usi, *unif_sets) {
if (doesSetMatchReading(reading, *usi, bypass_index, unif_mode)) {
- sets.insert(*usi);
+ sets->insert(*usi);
}
}
- retval = !sets.empty();
+ retval = !sets->empty();
}
}
else {
// If all else fails, it must be a SET set.
// Loop through the sub-sets and apply the set operators
const size_t size = theset.sets.size();
- for (size_t i=0;i<size;++i) {
- bool match = doesSetMatchReading(reading, theset.sets[i], bypass_index, ((theset.type & ST_TAG_UNIFY)!=0)|unif_mode);
+ for (size_t i = 0; i < size; ++i) {
+ bool match = doesSetMatchReading(reading, theset.sets[i], bypass_index, ((theset.type & ST_TAG_UNIFY) != 0) | unif_mode);
bool failfast = false;
// Operator OR does not modify match, so simply skip it.
// The result of doing so means that the other operators gain precedence.
- while (i < size-1 && theset.set_ops[i] != S_OR) {
+ while (i < size - 1 && theset.set_ops[i] != S_OR) {
switch (theset.set_ops[i]) {
- case S_PLUS:
- if (match) {
- match = doesSetMatchReading(reading, theset.sets[i+1], bypass_index, ((theset.type & ST_TAG_UNIFY)!=0)|unif_mode);
- }
- break;
- // Failfast makes a difference in A OR B ^ C OR D, where - does not.
- case S_FAILFAST:
- if (doesSetMatchReading(reading, theset.sets[i+1], bypass_index, ((theset.type & ST_TAG_UNIFY)!=0)|unif_mode)) {
+ case S_PLUS:
+ if (match) {
+ match = doesSetMatchReading(reading, theset.sets[i + 1], bypass_index, ((theset.type & ST_TAG_UNIFY) != 0) | unif_mode);
+ }
+ break;
+ // Failfast makes a difference in A OR B ^ C OR D, where - does not.
+ case S_FAILFAST:
+ if (doesSetMatchReading(reading, theset.sets[i + 1], bypass_index, ((theset.type & ST_TAG_UNIFY) != 0) | unif_mode)) {
+ match = false;
+ failfast = true;
+ }
+ break;
+ case S_MINUS:
+ if (match) {
+ if (doesSetMatchReading(reading, theset.sets[i + 1], bypass_index, ((theset.type & ST_TAG_UNIFY) != 0) | unif_mode)) {
match = false;
- failfast = true;
}
- break;
- case S_MINUS:
- if (match) {
- if (doesSetMatchReading(reading, theset.sets[i+1], bypass_index, ((theset.type & ST_TAG_UNIFY)!=0)|unif_mode)) {
- match = false;
- }
- }
- break;
- case S_NOT:
- if (!match) {
- if (!doesSetMatchReading(reading, theset.sets[i+1], bypass_index, ((theset.type & ST_TAG_UNIFY)!=0)|unif_mode)) {
- match = true;
- }
+ }
+ break;
+ case S_NOT:
+ if (!match) {
+ if (!doesSetMatchReading(reading, theset.sets[i + 1], bypass_index, ((theset.type & ST_TAG_UNIFY) != 0) | unif_mode)) {
+ match = true;
}
- break;
- default:
- break;
+ }
+ break;
+ default:
+ break;
}
++i;
}
@@ -653,7 +652,7 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
// Propagate unified tag to other sets of this set, if applicable
if (unif_mode || (theset.type & ST_TAG_UNIFY)) {
const void *tag = 0;
- for (size_t i=0 ; i<size ; ++i) {
+ for (size_t i = 0; i < size; ++i) {
BOOST_AUTO(it, unif_tags->find(theset.sets[i]));
if (it != unif_tags->end()) {
tag = it->second;
@@ -661,7 +660,7 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
}
}
if (tag) {
- for (size_t i=0 ; i<size ; ++i) {
+ for (size_t i = 0; i < size; ++i) {
(*unif_tags)[theset.sets[i]] = tag;
}
}
@@ -703,13 +702,23 @@ inline bool _check_options(std::vector<Reading*>& rv, uint32_t options, size_t n
inline bool GrammarApplicator::doesSetMatchCohort_testLinked(Cohort& cohort, const Set& theset, dSMC_Context *context) {
bool retval = true;
+ const ContextualTest *linked = 0;
+ inc_dec<size_t> ic;
+
if (context->test && context->test->linked) {
+ linked = context->test->linked;
+ }
+ else if (!tmpl_cntxs.empty() && tmpl_cntx_pos < tmpl_cntxs.size()) {
+ ic.inc(tmpl_cntx_pos);
+ linked = tmpl_cntxs[tmpl_cntxs.size() - tmpl_cntx_pos].test;
+ }
+ if (linked) {
if (!context->did_test) {
- if (context->test->linked->pos & POS_NO_PASS_ORIGIN) {
- context->matched_tests = (runContextualTest(cohort.parent, cohort.local_number, context->test->linked, context->deep, &cohort) != 0);
+ if (linked->pos & POS_NO_PASS_ORIGIN) {
+ context->matched_tests = (runContextualTest(cohort.parent, cohort.local_number, linked, context->deep, &cohort) != 0);
}
else {
- context->matched_tests = (runContextualTest(cohort.parent, cohort.local_number, context->test->linked, context->deep, context->origin) != 0);
+ context->matched_tests = (runContextualTest(cohort.parent, cohort.local_number, linked, context->deep, context->origin) != 0);
}
if (!(theset.type & ST_CHILD_UNIFY)) {
context->did_test = true;
@@ -720,35 +729,43 @@ inline bool GrammarApplicator::doesSetMatchCohort_testLinked(Cohort& cohort, con
return retval;
}
-inline bool GrammarApplicator::doesSetMatchCohort_helper(Cohort& cohort, const Reading& reading, const Set& theset, dSMC_Context *context) {
+inline bool GrammarApplicator::doesSetMatchCohort_helper(Cohort& cohort, Reading& reading, const Set& theset, dSMC_Context *context) {
bool retval = false;
- static unif_tags_t utags;
- utags.clear();
- static uint32SortedVector usets;
- usets.clear();
+ BOOST_AUTO(utags, ss_utags.get());
+ BOOST_AUTO(usets, ss_u32sv.get());
+ uint8_t orz = regexgrps.first;
if (context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY)) {
- utags = *unif_tags;
- usets = *unif_sets;
+ *utags = *unif_tags;
+ *usets = *unif_sets;
}
if (doesSetMatchReading(reading, theset.number, (theset.type & (ST_CHILD_UNIFY | ST_SPECIAL)) != 0)) {
retval = true;
if (context) {
+ if (context->options & POS_ATTACH_TO) {
+ reading.matched_target = true;
+ }
context->matched_target = true;
}
}
if (retval && context && (context->options & POS_NOT)) {
retval = !retval;
}
- if (retval && context) {
+ if (retval && context && !context->in_barrier) {
retval = doesSetMatchCohort_testLinked(cohort, theset, context);
+ if (context->options & POS_ATTACH_TO) {
+ reading.matched_tests = retval;
+ }
}
- if (context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY) && (utags.size() != unif_tags->size() || utags != *unif_tags)) {
+ if (!retval && context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY) && (utags->size() != unif_tags->size() || *utags != *unif_tags)) {
unif_tags->swap(utags);
}
- if (context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY) && usets.size() != unif_sets->size()) {
+ if (!retval && context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY) && usets->size() != unif_sets->size()) {
unif_sets->swap(usets);
}
+ if (!retval) {
+ regexgrps.first = orz;
+ }
return retval;
}
@@ -782,7 +799,7 @@ bool GrammarApplicator::doesSetMatchCohortNormal(Cohort& cohort, const uint32_t
if (lists[i] == 0) {
continue;
}
- const_foreach (ReadingList, *lists[i], iter, iter_end) {
+ foreach (iter, *lists[i]) {
Reading *reading = *iter;
if (context && context->test) {
// ToDo: Barriers need some way to escape sub-readings
@@ -836,7 +853,7 @@ bool GrammarApplicator::doesSetMatchCohortCareful(Cohort& cohort, const uint32_t
if (lists[i] == 0) {
continue;
}
- const_foreach(ReadingList, *lists[i], iter, iter_end) {
+ foreach (iter, *lists[i]) {
Reading *reading = *iter;
if (context && context->test) {
// ToDo: Barriers need some way to escape sub-readings
@@ -861,5 +878,4 @@ bool GrammarApplicator::doesSetMatchCohortCareful(Cohort& cohort, const uint32_t
return retval;
}
-
}
diff --git a/src/GrammarApplicator_reflow.cpp b/src/GrammarApplicator_reflow.cpp
index 259cac1..0ffcf90 100644
--- a/src/GrammarApplicator_reflow.cpp
+++ b/src/GrammarApplicator_reflow.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -40,9 +40,9 @@ Tag *GrammarApplicator::makeBaseFromWord(Tag *tag) {
}
static UString n;
n.clear();
- n.resize(len-2);
- n[0] = n[len-3] = '"';
- u_strncpy(&n[1], tag->tag.c_str()+2, len-4);
+ n.resize(len - 2);
+ n[0] = n[len - 3] = '"';
+ u_strncpy(&n[1], tag->tag.c_str() + 2, len - 4);
Tag *nt = addTag(n);
return nt;
}
@@ -58,12 +58,12 @@ bool GrammarApplicator::isChildOf(const Cohort *child, const Cohort *parent) {
}
else {
size_t i = 0;
- for (const Cohort *inner = child ; i<1000 ; ++i) {
- if (inner->dep_parent == 0 || inner->dep_parent == std::numeric_limits<uint32_t>::max()) {
+ for (const Cohort *inner = child; i < 1000; ++i) {
+ if (inner->dep_parent == 0 || inner->dep_parent == DEP_NO_PARENT) {
retval = false;
break;
}
- std::map<uint32_t,Cohort*>::iterator it = gWindow->cohort_map.find(inner->dep_parent);
+ std::map<uint32_t, Cohort*>::iterator it = gWindow->cohort_map.find(inner->dep_parent);
if (it != gWindow->cohort_map.end()) {
inner = it->second;
}
@@ -78,10 +78,9 @@ bool GrammarApplicator::isChildOf(const Cohort *child, const Cohort *parent) {
if (i == 1000) {
if (verbosity_level > 0) {
u_fprintf(
- ux_stderr,
- "Warning: While testing whether %u is a child of %u the counter exceeded 1000 indicating a loop higher up in the tree.\n",
- child->global_number, parent->global_number
- );
+ ux_stderr,
+ "Warning: While testing whether %u is a child of %u the counter exceeded 1000 indicating a loop higher up in the tree.\n",
+ child->global_number, parent->global_number);
}
}
}
@@ -105,12 +104,12 @@ bool GrammarApplicator::wouldParentChildLoop(const Cohort *parent, const Cohort
}
else {
size_t i = 0;
- for (const Cohort *inner = parent ; i<1000 ; ++i) {
- if (inner->dep_parent == 0 || inner->dep_parent == std::numeric_limits<uint32_t>::max()) {
+ for (const Cohort *inner = parent; i < 1000; ++i) {
+ if (inner->dep_parent == 0 || inner->dep_parent == DEP_NO_PARENT) {
retval = false;
break;
}
- std::map<uint32_t,Cohort*>::iterator it = gWindow->cohort_map.find(inner->dep_parent);
+ std::map<uint32_t, Cohort*>::iterator it = gWindow->cohort_map.find(inner->dep_parent);
if (it != gWindow->cohort_map.end()) {
inner = it->second;
}
@@ -125,10 +124,9 @@ bool GrammarApplicator::wouldParentChildLoop(const Cohort *parent, const Cohort
if (i == 1000) {
if (verbosity_level > 0) {
u_fprintf(
- ux_stderr,
- "Warning: While testing whether %u and %u would loop the counter exceeded 1000 indicating a loop higher up in the tree.\n",
- child->global_number, parent->global_number
- );
+ ux_stderr,
+ "Warning: While testing whether %u and %u would loop the counter exceeded 1000 indicating a loop higher up in the tree.\n",
+ child->global_number, parent->global_number);
}
}
}
@@ -139,9 +137,9 @@ bool GrammarApplicator::wouldParentChildCross(const Cohort *parent, const Cohort
uint32_t mn = std::min(parent->global_number, child->global_number);
uint32_t mx = std::max(parent->global_number, child->global_number);
- for (uint32_t i = mn+1 ; i<mx ; ++i) {
- std::map<uint32_t,Cohort*>::iterator it = gWindow->cohort_map.find(parent->dep_parent);
- if (it != gWindow->cohort_map.end() && it->second->dep_parent != std::numeric_limits<uint32_t>::max()) {
+ for (uint32_t i = mn + 1; i < mx; ++i) {
+ std::map<uint32_t, Cohort*>::iterator it = gWindow->cohort_map.find(parent->dep_parent);
+ if (it != gWindow->cohort_map.end() && it->second->dep_parent != DEP_NO_PARENT) {
if (it->second->dep_parent < mn || it->second->dep_parent > mx) {
return true;
}
@@ -158,10 +156,9 @@ bool GrammarApplicator::attachParentChild(Cohort& parent, Cohort& child, bool al
if (!allowloop && dep_block_loops && wouldParentChildLoop(&parent, &child)) {
if (verbosity_level > 0) {
u_fprintf(
- ux_stderr,
- "Warning: Dependency between %u and %u would cause a loop. Will not attach them.\n",
- child.global_number, parent.global_number
- );
+ ux_stderr,
+ "Warning: Dependency between %u and %u would cause a loop. Will not attach them.\n",
+ child.global_number, parent.global_number);
}
return false;
}
@@ -169,18 +166,17 @@ bool GrammarApplicator::attachParentChild(Cohort& parent, Cohort& child, bool al
if (!allowcrossing && dep_block_crossing && wouldParentChildCross(&parent, &child)) {
if (verbosity_level > 0) {
u_fprintf(
- ux_stderr,
- "Warning: Dependency between %u and %u would cause crossing branches. Will not attach them.\n",
- child.global_number, parent.global_number
- );
+ ux_stderr,
+ "Warning: Dependency between %u and %u would cause crossing branches. Will not attach them.\n",
+ child.global_number, parent.global_number);
}
return false;
}
- if (child.dep_parent == std::numeric_limits<uint32_t>::max()) {
+ if (child.dep_parent == DEP_NO_PARENT) {
child.dep_parent = child.dep_self;
}
- std::map<uint32_t,Cohort*>::iterator it = gWindow->cohort_map.find(child.dep_parent);
+ std::map<uint32_t, Cohort*>::iterator it = gWindow->cohort_map.find(child.dep_parent);
if (it != gWindow->cohort_map.end()) {
it->second->remChild(child.dep_self);
}
@@ -193,10 +189,9 @@ bool GrammarApplicator::attachParentChild(Cohort& parent, Cohort& child, bool al
if (!dep_has_spanned && child.parent != parent.parent) {
u_fprintf(
- ux_stderr,
- "Info: Dependency between %u and %u spans the window boundaries. Enumeration will be global from here on.\n",
- child.global_number, parent.global_number
- );
+ ux_stderr,
+ "Info: Dependency between %u and %u spans the window boundaries. Enumeration will be global from here on.\n",
+ child.global_number, parent.global_number);
dep_has_spanned = true;
}
return true;
@@ -207,7 +202,7 @@ void GrammarApplicator::reflowDependencyWindow(uint32_t max) {
max = gWindow->next.back()->cohorts[1]->global_number;
}
- if (gWindow->dep_window.empty()) {
+ if (gWindow->dep_window.empty() || gWindow->dep_window.begin()->second->parent == 0) {
gWindow->dep_window[0] = gWindow->current->cohorts[0];
}
else if (gWindow->dep_window.find(0) == gWindow->dep_window.end()) {
@@ -263,20 +258,19 @@ void GrammarApplicator::reflowDependencyWindow(uint32_t max) {
if (max && cohort->global_number >= max) {
break;
}
- if (cohort->dep_parent == std::numeric_limits<uint32_t>::max()) {
+ if (cohort->dep_parent == DEP_NO_PARENT) {
continue;
}
if (cohort->dep_self == cohort->global_number) {
if (!(cohort->type & CT_DEP_DONE) && gWindow->dep_map.find(cohort->dep_parent) == gWindow->dep_map.end()) {
if (verbosity_level > 0) {
u_fprintf(
- ux_stderr,
- "Warning: Parent %u of dep %u in cohort %u of window %u does not exist - ignoring.\n",
- cohort->dep_parent, cohort->dep_self, cohort->local_number, cohort->parent->number
- );
+ ux_stderr,
+ "Warning: Parent %u of dep %u in cohort %u of window %u does not exist - ignoring.\n",
+ cohort->dep_parent, cohort->dep_self, cohort->local_number, cohort->parent->number);
u_fflush(ux_stderr);
}
- cohort->dep_parent = std::numeric_limits<uint32_t>::max();
+ cohort->dep_parent = DEP_NO_PARENT;
}
else {
if (!(cohort->type & CT_DEP_DONE)) {
@@ -304,34 +298,32 @@ void GrammarApplicator::reflowRelationWindow(uint32_t max) {
cohort = cohort->prev;
}
- for ( ; cohort ; cohort = cohort->next) {
+ for (; cohort; cohort = cohort->next) {
if (max && cohort->global_number >= max) {
break;
}
- if (!cohort->relations_input.empty()) {
- for (RelationCtn::iterator rel = cohort->relations_input.begin() ; rel != cohort->relations_input.end() ; ) {
- static uint32SortedVector newrel;
- newrel.clear();
+ for (RelationCtn::iterator rel = cohort->relations_input.begin(); rel != cohort->relations_input.end();) {
+ BOOST_AUTO(newrel, ss_u32sv.get());
- boost_foreach (uint32_t target, rel->second) {
- uint32FlatHashMap::iterator it = gWindow->relation_map.find(target);
- if (it != gWindow->relation_map.end()) {
- cohort->relations[rel->first].insert(it->second);
- }
- else {
- newrel.insert(target);
- }
- }
-
- if (newrel.empty()) {
- cohort->relations_input.erase(rel++);
+ boost_foreach (uint32_t target, rel->second) {
+ uint32FlatHashMap::iterator it = gWindow->relation_map.find(target);
+ if (it != gWindow->relation_map.end()) {
+ cohort->relations[rel->first].insert(it->second);
}
else {
- rel->second = newrel;
- ++rel;
+ newrel->insert(target);
}
}
+
+ // Defer missing relations for later
+ if (newrel->empty()) {
+ cohort->relations_input.erase(rel++);
+ }
+ else {
+ rel->second = newrel;
+ ++rel;
+ }
}
}
}
@@ -351,7 +343,7 @@ void GrammarApplicator::reflowReading(Reading& reading) {
Reading::tags_list_t tlist;
tlist.swap(reading.tags_list);
- const_foreach (Reading::tags_list_t, tlist, tter, tter_end) {
+ foreach (tter, tlist) {
addTagToReading(reading, *tter, false);
}
@@ -366,14 +358,13 @@ Tag *GrammarApplicator::generateVarstringTag(const Tag *tag) {
// Replace unified sets with their matching tags
if (tag->vs_sets) {
- for (size_t i=0 ; i<tag->vs_sets->size() ; ++i) {
- static TagList tags;
- tags.clear();
+ for (size_t i = 0; i < tag->vs_sets->size(); ++i) {
+ BOOST_AUTO(tags, ss_taglist.get());
getTagList(*(*tag->vs_sets)[i], tags);
static UString rpl;
rpl.clear();
// If there are multiple tags, such as from CompositeTags, put _ between them
- const_foreach (TagList, tags, iter, iter_end) {
+ foreach (iter, *tags) {
rpl += (*iter)->tag;
if (std::distance(iter, iter_end) > 1) {
rpl += '_';
@@ -385,8 +376,8 @@ Tag *GrammarApplicator::generateVarstringTag(const Tag *tag) {
}
// Replace $1-$9 with their respective match groups
- for (size_t i = 0; i<regexgrps.first && i<9; ++i) {
- tmp.findAndReplace(stringbits[S_VS1+i], (*regexgrps.second)[i]);
+ for (size_t i = 0; i < regexgrps.first && i < 9; ++i) {
+ tmp.findAndReplace(stringbits[S_VS1 + i], (*regexgrps.second)[i]);
did_something = true;
}
@@ -413,7 +404,7 @@ Tag *GrammarApplicator::generateVarstringTag(const Tag *tag) {
mpos = std::max(mpos, pos);
}
if (found && mpos != -1) {
- UChar mode = tmp[mpos+1];
+ UChar mode = tmp[mpos + 1];
tmp.remove(mpos, 2);
if (mode == 'u') {
UnicodeString range(tmp, mpos, 1);
@@ -488,7 +479,7 @@ uint32_t GrammarApplicator::addTagToReading(Reading& reading, Tag *tag, bool reh
}
reading.mapping = tag;
}
- if (tag->type & (T_TEXTUAL|T_WORDFORM|T_BASEFORM)) {
+ if (tag->type & (T_TEXTUAL | T_WORDFORM | T_BASEFORM)) {
reading.tags_textual.insert(tag->hash);
reading.tags_textual_bloom.insert(tag->hash);
}
@@ -503,7 +494,7 @@ uint32_t GrammarApplicator::addTagToReading(Reading& reading, Tag *tag, bool reh
reading.parent->dep_self = tag->dep_self;
reading.parent->dep_parent = tag->dep_parent;
if (tag->dep_parent == tag->dep_self) {
- reading.parent->dep_parent = std::numeric_limits<uint32_t>::max();
+ reading.parent->dep_parent = DEP_NO_PARENT;
}
has_dep = true;
}
@@ -524,6 +515,32 @@ uint32_t GrammarApplicator::addTagToReading(Reading& reading, Tag *tag, bool reh
if (rehash) {
reading.rehash();
}
+
+ if (grammar->has_bag_of_tags) {
+ Reading& bot = reading.parent->parent->bag_of_tags;
+ bot.tags.insert(tag->hash);
+ bot.tags_list.push_back(tag->hash);
+ bot.tags_bloom.insert(tag->hash);
+
+ if (tag->type & (T_TEXTUAL | T_WORDFORM | T_BASEFORM)) {
+ bot.tags_textual.insert(tag->hash);
+ bot.tags_textual_bloom.insert(tag->hash);
+ }
+ if (tag->type & T_NUMERICAL) {
+ bot.tags_numerical[tag->hash] = tag;
+ }
+ if (!reading.baseform && (tag->type & T_BASEFORM)) {
+ bot.baseform = tag->hash;
+ }
+ if (!(tag->type & T_SPECIAL)) {
+ bot.tags_plain.insert(tag->hash);
+ bot.tags_plain_bloom.insert(tag->hash);
+ }
+ if (rehash) {
+ bot.rehash();
+ }
+ }
+
return tag->hash;
}
@@ -565,7 +582,7 @@ bool GrammarApplicator::unmapReading(Reading& reading, const uint32_t rule) {
}
void GrammarApplicator::splitMappings(TagList& mappings, Cohort& cohort, Reading& reading, bool mapped) {
- for (TagList::iterator it = mappings.begin() ; it != mappings.end() ;) {
+ for (TagList::iterator it = mappings.begin(); it != mappings.end();) {
Tag *& tag = *it;
while (tag->type & T_VARSTRING) {
tag = generateVarstringTag(tag);
@@ -587,14 +604,11 @@ void GrammarApplicator::splitMappings(TagList& mappings, Cohort& cohort, Reading
Tag *tag = mappings.back();
mappings.pop_back();
size_t i = mappings.size();
- foreach (TagList, mappings, ttag, ttag_end) {
+ foreach (ttag, mappings) {
// To avoid duplicating needlessly many times, check for a similar reading in the cohort that's already got this mapping
bool found = false;
- foreach (ReadingList, cohort.readings, itr, itr_end) {
- if ((*itr)->hash_plain == reading.hash_plain
- && (*itr)->mapping
- && (*itr)->mapping->hash == (*ttag)->hash
- ) {
+ foreach (itr, cohort.readings) {
+ if ((*itr)->hash_plain == reading.hash_plain && (*itr)->mapping && (*itr)->mapping->hash == (*ttag)->hash) {
found = true;
break;
}
@@ -651,19 +665,19 @@ void GrammarApplicator::splitAllMappings(all_mappings_t& all_mappings, Cohort& c
}
void GrammarApplicator::mergeReadings(ReadingList& readings) {
- static bc::flat_map<uint32_t, std::pair<uint32_t,Reading*> > mapped;
+ static bc::flat_map<uint32_t, std::pair<uint32_t, Reading*> > mapped;
mapped.clear();
mapped.reserve(readings.size());
static bc::flat_map<uint32_t, ReadingList> mlist;
mlist.clear();
mlist.reserve(readings.size());
- foreach (ReadingList, readings, iter, iter_end) {
+ foreach (iter, readings) {
Reading *r = *iter;
uint32_t hp = r->hash_plain, hplain = r->hash_plain;
uint32_t nm = 0;
if (trace) {
- foreach (uint32Vector, r->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, r->hit_by) {
hp = hash_value(*iter_hb, hp);
}
}
@@ -675,7 +689,7 @@ void GrammarApplicator::mergeReadings(ReadingList& readings) {
hp = hash_value(sub->hash_plain, hp);
hplain = hash_value(sub->hash_plain, hplain);
if (trace) {
- foreach (uint32Vector, sub->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, sub->hit_by) {
hp = hash_value(*iter_hb, hp);
}
}
@@ -693,7 +707,7 @@ void GrammarApplicator::mergeReadings(ReadingList& readings) {
}
}
mapped[hplain] = std::make_pair(nm, r);
- mlist[hp+nm].push_back(r);
+ mlist[hp + nm].push_back(r);
}
if (mlist.size() == readings.size()) {
@@ -704,13 +718,13 @@ void GrammarApplicator::mergeReadings(ReadingList& readings) {
static std::vector<Reading*> order;
order.clear();
- for (BOOST_AUTO(miter, mlist.begin()) ; miter != mlist.end() ; miter++) {
+ for (BOOST_AUTO(miter, mlist.begin()); miter != mlist.end(); miter++) {
const ReadingList& clist = miter->second;
Reading *nr = alloc_reading(*(clist.front()));
if (nr->mapping) {
erase(nr->tags_list, nr->mapping->hash);
}
- const_foreach (ReadingList, clist, iter1, iter1_end) {
+ foreach (iter1, clist) {
if ((*iter1)->mapping && std::find(nr->tags_list.begin(), nr->tags_list.end(), (*iter1)->mapping->hash) == nr->tags_list.end()) {
nr->tags_list.push_back((*iter1)->mapping->hash);
}
@@ -737,7 +751,7 @@ Cohort *GrammarApplicator::delimitAt(SingleWindow& current, Cohort *cohort) {
nwin = current.parent->allocPushSingleWindow();
}
else {
- foreach (SingleWindowCont, current.parent->next, iter, iter_end) {
+ foreach (iter, current.parent->next) {
if (*iter == ¤t) {
nwin = current.parent->allocSingleWindow();
current.parent->next.insert(++iter, nwin);
@@ -745,7 +759,7 @@ Cohort *GrammarApplicator::delimitAt(SingleWindow& current, Cohort *cohort) {
}
}
if (!nwin) {
- foreach (SingleWindowCont, current.parent->previous, iter, iter_end) {
+ foreach (iter, current.parent->previous) {
if (*iter == ¤t) {
nwin = current.parent->allocSingleWindow();
current.parent->previous.insert(iter, nwin);
@@ -775,18 +789,18 @@ Cohort *GrammarApplicator::delimitAt(SingleWindow& current, Cohort *cohort) {
nwin->appendCohort(cCohort);
uint32_t c = cohort->local_number;
- size_t nc = c+1;
- for ( ; nc < current.cohorts.size() ; nc++) {
+ size_t nc = c + 1;
+ for (; nc < current.cohorts.size(); nc++) {
current.cohorts[nc]->parent = nwin;
nwin->appendCohort(current.cohorts[nc]);
}
- c = current.cohorts.size()-c;
- for (nc = 0 ; nc < c-1 ; nc++) {
+ c = current.cohorts.size() - c;
+ for (nc = 0; nc < c - 1; nc++) {
current.cohorts.pop_back();
}
cohort = current.cohorts.back();
- foreach (ReadingList, cohort->readings, rter3, rter3_end) {
+ foreach (rter3, cohort->readings) {
Reading *reading = *rter3;
addTagToReading(*reading, endtag);
}
@@ -799,7 +813,7 @@ void GrammarApplicator::reflowTextuals_Reading(Reading& r) {
if (r.next) {
reflowTextuals_Reading(*r.next);
}
- const_foreach (uint32SortedVector, r.tags, it, it_end) {
+ foreach (it, r.tags) {
Tag *tag = single_tags.find(*it)->second;
if (tag->type & T_TEXTUAL) {
r.tags_textual.insert(*it);
@@ -809,37 +823,36 @@ void GrammarApplicator::reflowTextuals_Reading(Reading& r) {
}
void GrammarApplicator::reflowTextuals_Cohort(Cohort& c) {
- foreach (CohortVector, c.enclosed, it, it_end) {
+ foreach (it, c.enclosed) {
reflowTextuals_Cohort(**it);
}
- foreach (CohortVector, c.removed, it, it_end) {
+ foreach (it, c.removed) {
reflowTextuals_Cohort(**it);
}
- foreach (ReadingList, c.readings, it, it_end) {
+ foreach (it, c.readings) {
reflowTextuals_Reading(**it);
}
- foreach (ReadingList, c.deleted, it, it_end) {
+ foreach (it, c.deleted) {
reflowTextuals_Reading(**it);
}
- foreach (ReadingList, c.delayed, it, it_end) {
+ foreach (it, c.delayed) {
reflowTextuals_Reading(**it);
}
}
void GrammarApplicator::reflowTextuals_SingleWindow(SingleWindow& sw) {
- foreach (CohortVector, sw.cohorts, it, it_end) {
+ foreach (it, sw.cohorts) {
reflowTextuals_Cohort(**it);
}
}
void GrammarApplicator::reflowTextuals() {
- foreach (SingleWindowCont, gWindow->previous, swit, swit_end) {
+ foreach (swit, gWindow->previous) {
reflowTextuals_SingleWindow(**swit);
}
reflowTextuals_SingleWindow(*gWindow->current);
- foreach (SingleWindowCont, gWindow->next, swit, swit_end) {
+ foreach (swit, gWindow->next) {
reflowTextuals_SingleWindow(**swit);
}
}
-
}
diff --git a/src/GrammarApplicator_runContextualTest.cpp b/src/GrammarApplicator_runContextualTest.cpp
index 650036b..7e30e2a 100644
--- a/src/GrammarApplicator_runContextualTest.cpp
+++ b/src/GrammarApplicator_runContextualTest.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -36,6 +36,28 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
mark = cohort;
}
if (test->pos & POS_ATTACH_TO) {
+ if (attach_to != cohort) {
+ // Clear readings for rules that care about readings
+ ReadingList *lists[3] = { &cohort->readings };
+ if (test->pos & POS_LOOK_DELETED) {
+ lists[1] = &cohort->deleted;
+ }
+ if (test->pos & POS_LOOK_DELAYED) {
+ lists[2] = &cohort->delayed;
+ }
+
+ for (size_t i = 0; i < 3; ++i) {
+ if (lists[i] == 0) {
+ continue;
+ }
+ foreach (iter, *lists[i]) {
+ Reading *reading = *iter;
+ reading->matched_target = false;
+ reading->matched_tests = false;
+ }
+ }
+ }
+
attach_to = cohort;
}
if (deep) {
@@ -48,6 +70,7 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
*retval = doesSetMatchCohortCareful(*cohort, test->target, &context);
if (!context.matched_target && (test->pos & POS_SCANFIRST)) {
context.did_test = true;
+ // Intentionally ignoring return value to set up context.matched_target
doesSetMatchCohortNormal(*cohort, test->target, &context);
}
}
@@ -55,7 +78,7 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
*retval = doesSetMatchCohortNormal(*cohort, test->target, &context);
}
- if (origin && (test->offset != 0 || (test->pos & (POS_SCANALL|POS_SCANFIRST))) && origin == cohort && origin->local_number != 0) {
+ if (origin && (test->offset != 0 || (test->pos & (POS_SCANALL | POS_SCANFIRST))) && origin == cohort && origin->local_number != 0) {
if (!(test->pos & POS_NOT)) {
*retval = false;
}
@@ -64,7 +87,7 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
if (context.matched_target && (test->pos & POS_SCANFIRST)) {
rvs |= TRV_BREAK;
}
- else if (!(test->pos & (POS_SCANALL|POS_SCANFIRST|POS_SELF))) {
+ else if (!(test->pos & (POS_SCANALL | POS_SCANFIRST | POS_SELF))) {
rvs |= TRV_BREAK;
}
@@ -73,7 +96,7 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
context.origin = 0;
context.did_test = true;
if (test->barrier) {
- context.options = test->pos & ~POS_CAREFUL;
+ dSMC_Context context = { 0, 0, 0, test->pos & ~POS_CAREFUL, false, false, false, true };
bool barrier = doesSetMatchCohortNormal(*cohort, test->barrier, &context);
if (barrier) {
seen_barrier = true;
@@ -81,7 +104,7 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
}
}
if (test->cbarrier) {
- context.options = test->pos | POS_CAREFUL;
+ dSMC_Context context = { 0, 0, 0, test->pos | POS_CAREFUL, false, false, false, true };
bool cbarrier = doesSetMatchCohortCareful(*cohort, test->cbarrier, &context);
if (cbarrier) {
seen_barrier = true;
@@ -110,7 +133,7 @@ Cohort *GrammarApplicator::runSingleTest(SingleWindow *sWindow, size_t i, const
Cohort *getCohortInWindow(SingleWindow *& sWindow, size_t position, const ContextualTest *test, int32_t& pos) {
Cohort *cohort = 0;
pos = static_cast<int32_t>(position) + test->offset;
- // ToDo: (NOT *) and (*C) tests can be cached
+ // ToDo: (NOT*) and (*C) tests can be cached
if (test->pos & POS_ABSOLUTE) {
if (test->offset < 0) {
pos = static_cast<int32_t>(sWindow->cohorts.size()) + test->offset;
@@ -120,15 +143,15 @@ Cohort *getCohortInWindow(SingleWindow *& sWindow, size_t position, const Contex
}
}
if (pos >= 0) {
- if (pos >= static_cast<int32_t>(sWindow->cohorts.size()) && (test->pos & (POS_SPAN_RIGHT|POS_SPAN_BOTH)) && sWindow->next) {
+ if (pos >= static_cast<int32_t>(sWindow->cohorts.size()) && (test->pos & (POS_SPAN_RIGHT | POS_SPAN_BOTH)) && sWindow->next) {
sWindow = sWindow->next;
pos = 0;
}
}
else {
- if ((test->pos & (POS_SPAN_LEFT|POS_SPAN_BOTH)) && sWindow->previous) {
+ if ((test->pos & (POS_SPAN_LEFT | POS_SPAN_BOTH)) && sWindow->previous) {
sWindow = sWindow->previous;
- pos = static_cast<int32_t>(sWindow->cohorts.size())-1;
+ pos = static_cast<int32_t>(sWindow->cohorts.size()) - 1;
}
}
if (pos >= 0 && pos < static_cast<int32_t>(sWindow->cohorts.size())) {
@@ -137,6 +160,91 @@ Cohort *getCohortInWindow(SingleWindow *& sWindow, size_t position, const Contex
return cohort;
}
+bool GrammarApplicator::posOutputHelper(const SingleWindow *sWindow, uint32_t position, const ContextualTest *test, const Cohort *cohort, const Cohort *cdeep) {
+ bool good = false;
+
+ const Cohort *cs[4] = {
+ cohort,
+ cdeep,
+ cohort,
+ cdeep,
+ };
+ if (!tmpl_cntxs.empty()) {
+ cs[2] = tmpl_cntxs.back().min;
+ cs[3] = tmpl_cntxs.back().max;
+ }
+
+ std::sort(cs, cs + 4, compare_Cohort());
+
+ // If the override included * or @, don't care about offsets
+ if (test->pos & (POS_SCANFIRST | POS_SCANALL | POS_ABSOLUTE)) {
+ good = true;
+ }
+ else {
+ // ...otherwise, positive offsets need to match the leftmost of entry/exit
+ if (test->offset > 0 && static_cast<int32_t>(cs[0]->local_number) - static_cast<int32_t>(position) == test->offset) {
+ good = true;
+ }
+ // ...and, negative offsets need to match the rightmost of entry/exit
+ else if (test->offset < 0 && static_cast<int32_t>(cs[3]->local_number) - static_cast<int32_t>(position) == test->offset) {
+ good = true;
+ }
+ }
+ if (!(test->pos & (POS_SPAN_BOTH | POS_SPAN_LEFT | POS_SPAN_RIGHT)) && cdeep->parent != sWindow) {
+ good = false;
+ }
+ if (!(test->pos & POS_PASS_ORIGIN)) {
+ if (test->offset < 0 && cs[3]->local_number > position) {
+ good = false;
+ }
+ else if (test->offset > 0 && cs[0]->local_number < position) {
+ good = false;
+ }
+ }
+ return good;
+}
+
+Cohort *GrammarApplicator::runContextualTest_tmpl(SingleWindow *sWindow, size_t position, const ContextualTest *test, ContextualTest *tmpl, Cohort *& cdeep, Cohort *origin) {
+ if (test->linked) {
+ tmpl_cntxs.push_back(test->linked);
+ }
+
+ uint64_t orgpos = tmpl->pos;
+ int32_t orgoffset = tmpl->offset;
+ uint32_t orgcbar = tmpl->cbarrier;
+ uint32_t orgbar = tmpl->barrier;
+ if (test->pos & POS_TMPL_OVERRIDE) {
+ tmpl->pos = test->pos;
+ tmpl->pos &= ~(POS_NEGATE | POS_NOT | POS_MARK_JUMP);
+ tmpl->offset = test->offset;
+ if (test->offset != 0 && !(test->pos & (POS_SCANFIRST | POS_SCANALL | POS_ABSOLUTE))) {
+ tmpl->pos |= POS_SCANALL;
+ }
+ if (test->cbarrier) {
+ tmpl->cbarrier = test->cbarrier;
+ }
+ if (test->barrier) {
+ tmpl->barrier = test->barrier;
+ }
+ }
+ Cohort *cohort = runContextualTest(sWindow, position, tmpl, &cdeep, origin);
+ if (test->pos & POS_TMPL_OVERRIDE) {
+ tmpl->pos = orgpos;
+ tmpl->offset = orgoffset;
+ tmpl->cbarrier = orgcbar;
+ tmpl->barrier = orgbar;
+ if (cohort && cdeep && test->offset != 0 && !posOutputHelper(sWindow, position, test, cohort, cdeep)) {
+ cohort = 0;
+ }
+ }
+
+ if (test->linked) {
+ tmpl_cntxs.pop_back();
+ }
+
+ return cohort;
+}
+
Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t position, const ContextualTest *test, Cohort **deep, Cohort *origin) {
if (test->pos & POS_UNKNOWN) {
u_fprintf(ux_stderr, "Error: Contextual tests with position '?' cannot be used directly. Provide an override position.\n");
@@ -158,52 +266,8 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
int32_t pos = 0;
if (test->tmpl) {
- uint64_t orgpos = test->tmpl->pos;
- int32_t orgoffset = test->tmpl->offset;
- uint32_t orgcbar = test->tmpl->cbarrier;
- uint32_t orgbar = test->tmpl->barrier;
- if (test->pos & POS_TMPL_OVERRIDE) {
- test->tmpl->pos = test->pos;
- test->tmpl->pos &= ~(POS_NEGATE|POS_NOT|POS_MARK_JUMP);
- test->tmpl->offset = test->offset;
- if (test->offset != 0 && !(test->pos & (POS_SCANFIRST|POS_SCANALL|POS_ABSOLUTE))) {
- test->tmpl->pos |= POS_SCANALL;
- }
- if (test->cbarrier) {
- test->tmpl->cbarrier = test->cbarrier;
- }
- if (test->barrier) {
- test->tmpl->barrier = test->barrier;
- }
- }
Cohort *cdeep = 0;
- cohort = runContextualTest(sWindow, position, test->tmpl, &cdeep, origin);
- if (test->pos & POS_TMPL_OVERRIDE) {
- test->tmpl->pos = orgpos;
- test->tmpl->offset = orgoffset;
- test->tmpl->cbarrier = orgcbar;
- test->tmpl->barrier = orgbar;
- // ToDo: Being in a different window is not strictly a problem, so fix this assumption...
- if (cdeep && test->offset != 0) {
- int32_t reloff = int32_t(cdeep->local_number) - int32_t(position);
- if (!(test->pos & (POS_SCANFIRST|POS_SCANALL|POS_ABSOLUTE))) {
- if (cdeep->parent != sWindow || reloff != test->offset) {
- cohort = 0;
- }
- }
- if (!(test->pos & POS_PASS_ORIGIN)) {
- if (test->offset < 0 && reloff >= 0) {
- cohort = 0;
- }
- else if (test->offset > 0 && reloff <= 0) {
- cohort = 0;
- }
- }
- }
- }
- if (cohort && cdeep && test->linked) {
- cohort = runContextualTest(cdeep->parent, cdeep->local_number, test->linked, &cdeep, origin);
- }
+ cohort = runContextualTest_tmpl(sWindow, position, test, test->tmpl, cdeep, origin);
if (deep) {
*deep = cdeep;
}
@@ -211,55 +275,12 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
else if (!test->ors.empty()) {
Cohort *cdeep = 0;
boost_foreach (ContextualTest *iter, test->ors) {
- uint64_t orgpos = iter->pos;
- int32_t orgoffset = iter->offset;
- uint32_t orgcbar = iter->cbarrier;
- uint32_t orgbar = iter->barrier;
- if (test->pos & POS_TMPL_OVERRIDE) {
- iter->pos = test->pos;
- iter->pos &= ~(POS_TMPL_OVERRIDE|POS_NEGATE|POS_NOT|POS_MARK_JUMP);
- iter->offset = test->offset;
- if (test->offset != 0 && !(test->pos & (POS_SCANFIRST|POS_SCANALL|POS_ABSOLUTE))) {
- iter->pos |= POS_SCANALL;
- }
- if (test->cbarrier) {
- iter->cbarrier = test->cbarrier;
- }
- if (test->barrier) {
- iter->barrier = test->barrier;
- }
- }
dep_deep_seen.clear();
- cohort = runContextualTest(sWindow, position, iter, &cdeep, origin);
- if (test->pos & POS_TMPL_OVERRIDE) {
- iter->pos = orgpos;
- iter->offset = orgoffset;
- iter->cbarrier = orgcbar;
- iter->barrier = orgbar;
- if (cdeep && test->offset != 0) {
- int32_t reloff = int32_t(cdeep->local_number) - int32_t(position);
- if (!(test->pos & (POS_SCANFIRST|POS_SCANALL|POS_ABSOLUTE))) {
- if (cdeep->parent != sWindow || reloff != test->offset) {
- cohort = 0;
- }
- }
- if (!(test->pos & POS_PASS_ORIGIN)) {
- if (test->offset < 0 && reloff >= 0) {
- cohort = 0;
- }
- else if (test->offset > 0 && reloff <= 0) {
- cohort = 0;
- }
- }
- }
- }
+ cohort = runContextualTest_tmpl(sWindow, position, test, iter, cdeep, origin);
if (cohort) {
break;
}
}
- if (cohort && cdeep && test->linked) {
- cohort = runContextualTest(cdeep->parent, cdeep->local_number, test->linked, &cdeep, origin);
- }
if (deep) {
*deep = cdeep;
}
@@ -281,6 +302,26 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
if (deep) {
*deep = cohort;
}
+ if (!tmpl_cntxs.empty()) {
+ tmpl_context_t& tmpl_cntx = tmpl_cntxs.back();
+ uint64_t gpos = (static_cast<uint64_t>(cohort->parent->number) << 32) | cohort->local_number;
+ if (tmpl_cntx.min == 0 || gpos < (static_cast<uint64_t>(tmpl_cntx.min->parent->number) << 32 | tmpl_cntx.min->local_number)) {
+ tmpl_cntx.min = cohort;
+ }
+ if (tmpl_cntx.max == 0 || gpos > (static_cast<uint64_t>(tmpl_cntx.max->parent->number) << 32 | tmpl_cntx.max->local_number)) {
+ tmpl_cntx.max = cohort;
+ }
+ if (deep) {
+ tmpl_context_t& tmpl_cntx = tmpl_cntxs.back();
+ uint64_t gpos = (static_cast<uint64_t>((*deep)->parent->number) << 32) | (*deep)->local_number;
+ if (tmpl_cntx.min == 0 || gpos < (static_cast<uint64_t>(tmpl_cntx.min->parent->number) << 32 | tmpl_cntx.min->local_number)) {
+ tmpl_cntx.min = *deep;
+ }
+ if (tmpl_cntx.max == 0 || gpos > (static_cast<uint64_t>(tmpl_cntx.max->parent->number) << 32 | tmpl_cntx.max->local_number)) {
+ tmpl_cntx.max = *deep;
+ }
+ }
+ }
CohortIterator *it = 0;
if ((test->pos & POS_DEP_PARENT) && (test->pos & POS_DEP_GLOB)) {
@@ -292,7 +333,7 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
else if (test->pos & POS_DEP_GLOB) {
it = &depDescendentIters[ci_depths[4]++];
}
- else if (test->pos & (POS_DEP_CHILD|POS_DEP_SIBLING)) {
+ else if (test->pos & (POS_DEP_CHILD | POS_DEP_SIBLING)) {
Cohort *nc = runDependencyTest(sWindow, cohort, test, deep, origin);
if (nc) {
cohort = nc;
@@ -306,7 +347,7 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
retval = !retval;
}
}
- else if (test->pos & (POS_LEFT_PAR|POS_RIGHT_PAR)) {
+ else if (test->pos & (POS_LEFT_PAR | POS_RIGHT_PAR)) {
Cohort *nc = runParenthesisTest(sWindow, cohort, test, deep, origin);
if (nc) {
cohort = nc;
@@ -329,7 +370,43 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
retval = !retval;
}
}
- else if (test->offset == 0 && (test->pos & (POS_SCANFIRST|POS_SCANALL))) {
+ else if (test->pos & POS_BAG_OF_TAGS) {
+ bool match = doesSetMatchReading(sWindow->bag_of_tags, test->target, true);
+ if (!match && (test->pos & (POS_SPAN_BOTH | POS_SPAN_LEFT | POS_SPAN_RIGHT))) {
+ SingleWindow *left = sWindow->previous, *right = sWindow->next;
+ while (left || right) {
+ if (left && (test->pos & (POS_SPAN_BOTH | POS_SPAN_LEFT))) {
+ match = doesSetMatchReading(left->bag_of_tags, test->target, true);
+ left = left->previous;
+ }
+ else {
+ left = 0;
+ }
+ if (right && (test->pos & (POS_SPAN_BOTH | POS_SPAN_RIGHT))) {
+ match = doesSetMatchReading(right->bag_of_tags, test->target, true);
+ right = right->next;
+ }
+ else {
+ right = 0;
+ }
+ if (match) {
+ break;
+ }
+ }
+ }
+ if (test->pos & POS_NOT) {
+ match = !match;
+ }
+ if (match) {
+ if (test->linked) {
+ cohort = runContextualTest(sWindow, position, test->linked, deep, origin);
+ }
+ }
+ else {
+ retval = false;
+ }
+ }
+ else if (test->offset == 0 && (test->pos & (POS_SCANFIRST | POS_SCANALL))) {
SingleWindow *right, *left;
int32_t rpos, lpos;
@@ -344,10 +421,10 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
goto label_gotACohort;
}
- for (uint32_t i=1 ; left || right ; i++) {
+ for (uint32_t i = 1; left || right; i++) {
if (left) {
rvs = 0;
- cohort = runSingleTest(left, lpos-i, test, rvs, &retval, deep, origin);
+ cohort = runSingleTest(left, lpos - i, test, rvs, &retval, deep, origin);
if ((rvs & TRV_BREAK) && retval) {
goto label_gotACohort;
}
@@ -357,11 +434,11 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
right = 0;
}
}
- else if (lpos-i == 0) {
- if ((test->pos & (POS_SPAN_BOTH|POS_SPAN_LEFT) || always_span)) {
+ else if (lpos - i == 0) {
+ if ((test->pos & (POS_SPAN_BOTH | POS_SPAN_LEFT) || always_span)) {
left = left->previous;
if (left) {
- lpos = i+left->cohorts.size();
+ lpos = i + left->cohorts.size();
}
}
else {
@@ -371,7 +448,7 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
}
if (right) {
rvs = 0;
- cohort = runSingleTest(right, rpos+i, test, rvs, &retval, deep, origin);
+ cohort = runSingleTest(right, rpos + i, test, rvs, &retval, deep, origin);
if ((rvs & TRV_BREAK) && retval) {
goto label_gotACohort;
}
@@ -381,10 +458,10 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
left = 0;
}
}
- else if (rpos+i == right->cohorts.size()-1) {
- if ((test->pos & (POS_SPAN_BOTH|POS_SPAN_RIGHT) || always_span)) {
+ else if (rpos + i == right->cohorts.size() - 1) {
+ if ((test->pos & (POS_SPAN_BOTH | POS_SPAN_RIGHT) || always_span)) {
right = right->next;
- rpos = (0-i)-1;
+ rpos = (0 - i) - 1;
}
else {
right = 0;
@@ -416,14 +493,16 @@ Cohort *GrammarApplicator::runContextualTest(SingleWindow *sWindow, size_t posit
}
if (!(rvs & TRV_BREAK)) {
Cohort *current = cohort;
- for (; *it != CohortIterator(0) ; ++(*it)) {
+ for (; *it != CohortIterator(0); ++(*it)) {
++seen;
- if ((test->pos & POS_LEFT) && (**it)->global_number >= current->global_number) {
+ if ((test->pos & POS_LEFT) && less_Cohort(current, **it)) {
nc = 0;
+ retval = false;
break;
}
- if ((test->pos & POS_RIGHT) && (**it)->global_number <= current->global_number) {
+ if ((test->pos & POS_RIGHT) && !less_Cohort(current, **it)) {
nc = 0;
+ retval = false;
break;
}
nc = runSingleTest(**it, test, rvs, &retval, deep, origin);
@@ -527,12 +606,8 @@ Cohort *GrammarApplicator::runDependencyTest(SingleWindow *sWindow, Cohort *curr
deps = &(current->parent->cohorts[0]->dep_children);
}
else {
- std::map<uint32_t,Cohort*>::iterator it = current->parent->parent->cohort_map.find(current->dep_parent);
- if (current->parent && current->parent->parent
- && it != current->parent->parent->cohort_map.end()
- && it->second
- && !it->second->dep_children.empty()
- ) {
+ std::map<uint32_t, Cohort*>::iterator it = current->parent->parent->cohort_map.find(current->dep_parent);
+ if (it != current->parent->parent->cohort_map.end() && it->second && !it->second->dep_children.empty()) {
deps = &(it->second->dep_children);
}
else {
@@ -565,7 +640,7 @@ Cohort *GrammarApplicator::runDependencyTest(SingleWindow *sWindow, Cohort *curr
deps = &tmp_deps;
}
- const_foreach (uint32SortedVector, *deps, dter, dter_end) {
+ foreach (dter, *deps) {
if (*dter == current->global_number && !(test->pos & POS_SELF)) {
continue;
}
@@ -587,13 +662,12 @@ Cohort *GrammarApplicator::runDependencyTest(SingleWindow *sWindow, Cohort *curr
}
bool good = true;
if (current->parent != cohort->parent) {
- if ((!(test->pos & (POS_SPAN_BOTH|POS_SPAN_LEFT))) && cohort->parent->number < current->parent->number) {
+ if ((!(test->pos & (POS_SPAN_BOTH | POS_SPAN_LEFT))) && cohort->parent->number < current->parent->number) {
good = false;
}
- else if ((!(test->pos & (POS_SPAN_BOTH|POS_SPAN_RIGHT))) && cohort->parent->number > current->parent->number) {
+ else if ((!(test->pos & (POS_SPAN_BOTH | POS_SPAN_RIGHT))) && cohort->parent->number > current->parent->number) {
good = false;
}
-
}
bool retval = false;
uint8_t rvs = 0;
@@ -660,9 +734,9 @@ Cohort *GrammarApplicator::runRelationTest(SingleWindow *sWindow, Cohort *curren
CohortSet rels;
if (test->relation == grammar->tag_any) {
- const_foreach (RelationCtn, current->relations, riter, riter_end) {
+ foreach (riter, current->relations) {
boost_foreach (uint32_t citer, riter->second) {
- std::map<uint32_t,Cohort*>::iterator it = sWindow->parent->cohort_map.find(citer);
+ std::map<uint32_t, Cohort*>::iterator it = sWindow->parent->cohort_map.find(citer);
if (it != sWindow->parent->cohort_map.end()) {
rels.insert(it->second);
}
@@ -706,7 +780,7 @@ Cohort *GrammarApplicator::runRelationTest(SingleWindow *sWindow, Cohort *curren
}
Cohort *rv = 0;
- const_foreach (CohortSet, rels, iter, iter_end) {
+ foreach (iter, rels) {
uint8_t rvs = 0;
bool retval = false;
@@ -728,5 +802,4 @@ Cohort *GrammarApplicator::runRelationTest(SingleWindow *sWindow, Cohort *curren
return rv;
}
-
}
diff --git a/src/GrammarApplicator_runGrammar.cpp b/src/GrammarApplicator_runGrammar.cpp
index d523ca5..ee07da3 100644
--- a/src/GrammarApplicator_runGrammar.cpp
+++ b/src/GrammarApplicator_runGrammar.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -94,7 +94,7 @@ void GrammarApplicator::runGrammarOnText(istream& input, UFILE *output) {
index();
- uint32_t resetAfter = ((num_windows+4)*2+1);
+ uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
uint32_t lines = 0;
SingleWindow *cSWindow = 0;
@@ -113,16 +113,16 @@ void GrammarApplicator::runGrammarOnText(istream& input, UFILE *output) {
uint32FlatHashSet variables_rem;
uint32SortedVector variables_output;
- std::vector<std::pair<size_t,Reading*> > indents;
+ std::vector<std::pair<size_t, Reading*> > indents;
all_mappings_t all_mappings;
while (!input.eof()) {
++lines;
size_t offset = 0, packoff = 0;
// Read as much of the next line as will fit in the current buffer
- while (input.gets(&line[offset], line.size()-offset-1)) {
+ while (input.gets(&line[offset], line.size() - offset - 1)) {
// Copy the segment just read to cleaned
- for (size_t i=offset ; i<line.size() ; ++i) {
+ for (size_t i = offset; i < line.size(); ++i) {
// Only copy one space character, regardless of how many are in input
if (ISSPACE(line[i]) && !ISNL(line[i])) {
cleaned[packoff++] = ' ';
@@ -132,25 +132,25 @@ void GrammarApplicator::runGrammarOnText(istream& input, UFILE *output) {
}
// Break if there is a newline
if (ISNL(line[i])) {
- cleaned[packoff+1] = cleaned[packoff] = 0;
+ cleaned[packoff + 1] = cleaned[packoff] = 0;
goto gotaline; // Oh how I wish C++ had break 2;
}
if (line[i] == 0) {
- cleaned[packoff+1] = cleaned[packoff] = 0;
+ cleaned[packoff + 1] = cleaned[packoff] = 0;
break;
}
cleaned[packoff++] = line[i];
}
// If we reached this, buffer wasn't big enough. Double the size of the buffer and try again.
- offset = line.size()-2;
- line.resize(line.size()*2, 0);
- cleaned.resize(line.size()+1, 0);
+ offset = line.size() - 2;
+ line.resize(line.size() * 2, 0);
+ cleaned.resize(line.size() + 1, 0);
}
-gotaline:
+ gotaline:
// Trim trailing whitespace
- while (cleaned[0] && ISSPACE(cleaned[packoff-1])) {
- cleaned[packoff-1] = 0;
+ while (cleaned[0] && ISSPACE(cleaned[packoff - 1])) {
+ cleaned[packoff - 1] = 0;
--packoff;
}
if (!ignoreinput && cleaned[0] == '"' && cleaned[1] == '<') {
@@ -177,7 +177,7 @@ gotaline:
}
if (cSWindow && cSWindow->cohorts.size() >= soft_limit && grammar->soft_delimiters && !did_soft_lookback) {
did_soft_lookback = true;
- reverse_foreach (CohortVector, cSWindow->cohorts, iter, iter_end) {
+ reverse_foreach (iter, cSWindow->cohorts) {
if (doesSetMatchCohortNormal(**iter, grammar->soft_delimiters->number)) {
did_soft_lookback = false;
Cohort *cohort = delimitAt(*cSWindow, *iter);
@@ -198,7 +198,7 @@ gotaline:
u_fprintf(ux_stderr, "Warning: Soft limit of %u cohorts reached at line %u but found suitable soft delimiter.\n", soft_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -215,7 +215,7 @@ gotaline:
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -385,7 +385,7 @@ gotaline:
}
cCohort->readings.back()->rehash();
}
- indents.push_back(std::make_pair(indent,cReading));
+ indents.push_back(std::make_pair(indent, cReading));
numReadings++;
// Check whether the cohort still belongs to the window, as per --dep-delimit
@@ -394,6 +394,10 @@ gotaline:
gWindow->dep_map.clear();
gWindow->dep_window.clear();
+ foreach (iter, cSWindow->cohorts.back()->readings) {
+ addTagToReading(**iter, endtag);
+ }
+
cSWindow = gWindow->allocAppendSingleWindow();
initEmptySingleWindow(cSWindow);
@@ -407,6 +411,14 @@ gotaline:
lSWindow = cSWindow;
++numWindows;
did_soft_lookback = false;
+
+ if (grammar->has_bag_of_tags) {
+ // This is slow and not 100% correct as it doesn't remove the tags from the previous window
+ cCohort->parent = cSWindow;
+ foreach (rit, cCohort->readings) {
+ reflowReading(**rit);
+ }
+ }
}
}
else {
@@ -416,7 +428,7 @@ gotaline:
u_fflush(ux_stderr);
}
}
-istext:
+ istext:
if (cleaned[0]) {
if (u_strcmp(&cleaned[0], stringbits[S_CMD_FLUSH].getTerminatedBuffer()) == 0) {
u_fprintf(ux_stderr, "Info: FLUSH encountered on line %u. Flushing...\n", numLines);
@@ -426,7 +438,7 @@ istext:
if (cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
cReading = lReading = 0;
@@ -480,7 +492,7 @@ istext:
}
else if (u_strncmp(&cleaned[0], stringbits[S_CMD_SETVAR].getTerminatedBuffer(), stringbits[S_CMD_SETVAR].length()) == 0) {
//u_fprintf(ux_stderr, "Info: SETVAR encountered on line %u.\n", numLines);
- cleaned[packoff-1] = 0;
+ cleaned[packoff - 1] = 0;
line[0] = 0;
UChar *s = &cleaned[stringbits[S_CMD_SETVAR].length()];
@@ -491,6 +503,9 @@ istext:
variables_set[tag->hash] = grammar->tag_any;
variables_rem.erase(tag->hash);
variables_output.insert(tag->hash);
+ if (cSWindow == 0) {
+ variables[tag->hash] = grammar->tag_any;
+ }
}
else {
uint32_t a = 0, b = 0;
@@ -506,14 +521,14 @@ istext:
}
if (c) {
c[0] = 0;
- s = c+1;
+ s = c + 1;
}
if (!d[1]) {
u_fprintf(ux_stderr, "Warning: SETVAR on line %u had no value after the =! Defaulting to value *.\n", numLines);
b = grammar->tag_any;
}
else {
- b = addTag(d+1)->hash;
+ b = addTag(d + 1)->hash;
}
if (!c) {
d = 0;
@@ -532,7 +547,7 @@ istext:
else {
a = addTag(s)->hash;
}
- s = c+1;
+ s = c + 1;
variables_set[a] = grammar->tag_any;
variables_rem.erase(a);
variables_output.insert(a);
@@ -553,7 +568,7 @@ istext:
}
else if (u_strncmp(&cleaned[0], stringbits[S_CMD_REMVAR].getTerminatedBuffer(), stringbits[S_CMD_REMVAR].length()) == 0) {
//u_fprintf(ux_stderr, "Info: REMVAR encountered on line %u.\n", numLines);
- cleaned[packoff-1] = 0;
+ cleaned[packoff - 1] = 0;
line[0] = 0;
UChar *s = &cleaned[stringbits[S_CMD_REMVAR].length()];
@@ -567,7 +582,7 @@ istext:
variables_rem.insert(a);
variables_output.insert(a);
}
- s = c+1;
+ s = c + 1;
c = u_strchr(s, ',');
}
if (s && s[0]) {
@@ -577,7 +592,7 @@ istext:
variables_output.insert(a);
}
}
-
+
if (line[0]) {
if (lCohort) {
lCohort->text += &line[0];
@@ -603,7 +618,7 @@ istext:
if (cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
cReading = 0;
@@ -643,5 +658,4 @@ CGCMD_EXIT:
u_fflush(ux_stderr);
}
}
-
}
diff --git a/src/GrammarApplicator_runRules.cpp b/src/GrammarApplicator_runRules.cpp
index 2406df2..bba0d5f 100644
--- a/src/GrammarApplicator_runRules.cpp
+++ b/src/GrammarApplicator_runRules.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -33,7 +33,7 @@
namespace CG3 {
enum {
- RV_NOTHING = 1,
+ RV_NOTHING = 1,
RV_SOMETHING = 2,
RV_DELIMITED = 4,
};
@@ -77,7 +77,7 @@ bool GrammarApplicator::updateValidRules(const uint32IntervalVector& rules, uint
Grammar::rules_by_tag_t::const_iterator it = grammar->rules_by_tag.find(hash);
if (it != grammar->rules_by_tag.end()) {
Cohort& c = *(reading.parent);
- const_foreach (uint32IntervalVector, (it->second), rsit, rsit_end) {
+ foreach (rsit, (it->second)) {
if (updateRuleToCohorts(c, *rsit) && rules.contains(*rsit)) {
intersects.insert(*rsit);
}
@@ -89,11 +89,11 @@ bool GrammarApplicator::updateValidRules(const uint32IntervalVector& rules, uint
void GrammarApplicator::indexSingleWindow(SingleWindow& current) {
current.valid_rules.clear();
current.rule_to_cohorts.resize(grammar->rule_by_number.size());
- boost_foreach(CohortSet& cs, current.rule_to_cohorts) {
+ boost_foreach (CohortSet& cs, current.rule_to_cohorts) {
cs.clear();
}
- foreach (CohortVector, current.cohorts, iter, iter_end) {
+ foreach (iter, current.cohorts) {
Cohort *c = *iter;
for (uint32_t psit = 0; psit < c->possible_sets.size(); ++psit) {
if (c->possible_sets.test(psit) == false) {
@@ -119,19 +119,19 @@ TagList GrammarApplicator::getTagList(const Set& theSet, bool unif_mode) const {
void GrammarApplicator::getTagList(const Set& theSet, TagList& theTags, bool unif_mode) const {
if (theSet.type & ST_SET_UNIFY) {
const Set& pSet = *(grammar->sets_list[theSet.sets[0]]);
- const_foreach (uint32Vector, pSet.sets, iter, iter_end) {
+ foreach (iter, pSet.sets) {
if (unif_sets->count(*iter)) {
getTagList(*(grammar->sets_list[*iter]), theTags);
}
}
}
else if (theSet.type & ST_TAG_UNIFY) {
- const_foreach (uint32Vector, theSet.sets, iter, iter_end) {
+ foreach (iter, theSet.sets) {
getTagList(*(grammar->sets_list[*iter]), theTags, true);
}
}
else if (!theSet.sets.empty()) {
- const_foreach (uint32Vector, theSet.sets, iter, iter_end) {
+ foreach (iter, theSet.sets) {
getTagList(*(grammar->sets_list[*iter]), theTags, unif_mode);
}
}
@@ -147,10 +147,10 @@ void GrammarApplicator::getTagList(const Set& theSet, TagList& theTags, bool uni
trie_getTagList(theSet.trie_special, theTags);
}
// Eliminate consecutive duplicates. Not all duplicates, since AddCohort and Append may have multiple readings with repeated tags
- for (TagList::iterator ot = theTags.begin() ; theTags.size() > 1 && ot != theTags.end() ; ++ot) {
+ for (TagList::iterator ot = theTags.begin(); theTags.size() > 1 && ot != theTags.end(); ++ot) {
TagList::iterator it = ot;
++it;
- for ( ; it != theTags.end() && std::distance(ot, it) == 1 ; ) {
+ for (; it != theTags.end() && std::distance(ot, it) == 1;) {
if (*ot == *it) {
it = theTags.erase(it);
}
@@ -165,7 +165,7 @@ Reading *GrammarApplicator::get_sub_reading(Reading *tr, int sub_reading) {
if (sub_reading == 0) {
return tr;
}
- if (sub_reading == GSR_ANY) {
+ else if (sub_reading == GSR_ANY) {
subs_any.push_back(Reading());
Reading *reading = &subs_any.back();
*reading = *tr;
@@ -174,15 +174,15 @@ Reading *GrammarApplicator::get_sub_reading(Reading *tr, int sub_reading) {
tr = tr->next;
reading->tags_list.push_back(0);
reading->tags_list.insert(reading->tags_list.end(), tr->tags_list.begin(), tr->tags_list.end());
- boost_foreach(uint32_t tag, tr->tags) {
+ boost_foreach (uint32_t tag, tr->tags) {
reading->tags.insert(tag);
reading->tags_bloom.insert(tag);
}
- boost_foreach(uint32_t tag, tr->tags_plain) {
+ boost_foreach (uint32_t tag, tr->tags_plain) {
reading->tags_plain.insert(tag);
reading->tags_plain_bloom.insert(tag);
}
- boost_foreach(uint32_t tag, tr->tags_textual) {
+ boost_foreach (uint32_t tag, tr->tags_textual) {
reading->tags_textual.insert(tag);
reading->tags_textual_bloom.insert(tag);
}
@@ -203,23 +203,21 @@ Reading *GrammarApplicator::get_sub_reading(Reading *tr, int sub_reading) {
reading->rehash();
return reading;
}
- if (sub_reading > 0) {
- for (int i = 0; i<sub_reading && tr; ++i) {
+ else if (sub_reading > 0) {
+ for (int i = 0; i < sub_reading && tr; ++i) {
tr = tr->next;
}
- return tr;
}
- if (sub_reading < 0) {
+ else if (sub_reading < 0) {
int ntr = 0;
Reading *ttr = tr;
while (ttr) {
ttr = ttr->next;
--ntr;
}
- for (int i = ntr; i<sub_reading && tr; ++i) {
+ for (int i = ntr; i < sub_reading && tr; ++i) {
tr = tr->next;
}
- return tr;
}
return tr;
}
@@ -258,7 +256,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
current.parent->cohort_map[0] = current.cohorts.front();
- const_foreach (uint32IntervalVector, intersects, iter_rules, iter_rules_end) {
+ foreach (iter_rules, intersects) {
uint32_t j = (*iter_rules);
// Check whether this rule is in the allowed rule list from cmdline flag --rule(s)
@@ -299,14 +297,14 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
CohortSet *cohortset = ¤t.rule_to_cohorts[rule.number];
if (debug_level > 1) {
- std::cerr << "DEBUG: " << cohortset->size() << "/" << current.cohorts.size() << " = " << double(cohortset->size())/double(current.cohorts.size()) << std::endl;
+ std::cerr << "DEBUG: " << cohortset->size() << "/" << current.cohorts.size() << " = " << double(cohortset->size()) / double(current.cohorts.size()) << std::endl;
}
- for (CohortSet::const_iterator rocit = cohortset->begin() ; rocit != cohortset->end() ; ) {
+ for (CohortSet::const_iterator rocit = cohortset->begin(); rocit != cohortset->end();) {
Cohort *cohort = *rocit;
++rocit;
if (debug_level > 1) {
- std::cerr << "DEBUG: Trying cohort " << cohort->local_number << std::endl;
+ std::cerr << "DEBUG: Trying cohort " << cohort->global_number << ":" << cohort->local_number << std::endl;
}
// If the current cohort is the initial >>> one, skip it.
@@ -355,7 +353,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
continue;
}
// If it's a Delimit rule and we're at the final cohort, skip it.
- if (type == K_DELIMIT && c == current.cohorts.size()-1) {
+ if (type == K_DELIMIT && c == current.cohorts.size() - 1) {
continue;
}
@@ -385,6 +383,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
size_t num_active = 0;
size_t num_iff = 0;
+ attach_to = cohort;
// Assume that Iff rules are really Remove rules, until proven otherwise.
if (rule.type == K_IFF) {
@@ -424,6 +423,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
// This loop figures out which readings, if any, that are valid targets for the current rule
// Criteria for valid is that the reading must match both target and all contextual tests
for (size_t i = 0; i < cohort->readings.size(); ++i) {
+ // ToDo: Switch sub-readings so that they build up a passed in vector<Reading*>
Reading *reading = get_sub_reading(cohort->readings[i], rule.sub_reading);
if (!reading) {
cohort->readings[i]->matched_target = false;
@@ -444,7 +444,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
// Check if any previous reading of this cohort had the same plain signature, and if so just copy their results
// This cache is cleared on a per-cohort basis
- if (!(set.type & (ST_SPECIAL|ST_MAPPING|ST_CHILD_UNIFY)) && !readings_plain.empty()) {
+ if (!(set.type & (ST_SPECIAL | ST_MAPPING | ST_CHILD_UNIFY)) && !readings_plain.empty()) {
readings_plain_t::const_iterator rpit = readings_plain.find(reading->hash_plain);
if (rpit != readings_plain.end()) {
reading->matched_target = rpit->second->matched_target;
@@ -491,9 +491,11 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
mark = cohort;
uint8_t orz = regexgrps.first;
// Actually check if the reading is a valid target. First check if rule target matches...
- if (rule.target && doesSetMatchReading(*reading, rule.target, (set.type & (ST_CHILD_UNIFY|ST_SPECIAL)) != 0)) {
+ if (rule.target && doesSetMatchReading(*reading, rule.target, (set.type & (ST_CHILD_UNIFY | ST_SPECIAL)) != 0)) {
+ bool regex_prop = true;
if (orz != regexgrps.first) {
did_test = false;
+ regex_prop = false;
}
target = cohort;
reading->matched_target = true;
@@ -501,7 +503,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
bool good = true;
// If we didn't already run the contextual tests, run them now.
if (!did_test) {
- foreach (ContextList, rule.tests, it, it_end) {
+ foreach (it, rule.tests) {
ContextualTest *test = *it;
if (rule.flags & RF_RESETX || !(rule.flags & RF_REMEMBERX)) {
mark = cohort;
@@ -511,6 +513,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
dep_deep_seen.clear();
// Reset the counters for which types of CohortIterator we have in play
std::fill(ci_depths.begin(), ci_depths.end(), 0);
+ tmpl_cntxs.clear();
+ tmpl_cntx_pos = 0;
// Run the contextual test...
if (!(test->pos & POS_PASS_ORIGIN) && (no_pass_origin || (test->pos & POS_NO_PASS_ORIGIN))) {
test_good = (runContextualTest(¤t, c, test, 0, cohort) != 0);
@@ -521,7 +525,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
if (!test_good) {
good = test_good;
if (!statistics) {
- if (it != rule.tests.begin() && !(rule.flags & (RF_REMEMBERX|RF_KEEPORDER))) {
+ if (it != rule.tests.begin() && !(rule.flags & (RF_REMEMBERX | RF_KEEPORDER))) {
rule.tests.erase(it);
rule.tests.push_front(test);
}
@@ -542,6 +546,14 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
reading->matched_tests = true;
++num_active;
++rule.num_match;
+
+ if (regex_prop && i && regexgrps_c.size()) {
+ BOOST_AUTO(it, regexgrps_c.find(cohort->readings[i - 1]->number));
+ if (it != regexgrps_c.end()) {
+ regexgrps_c.insert(std::make_pair(reading->number, it->second));
+ regexgrps_z.insert(std::make_pair(reading->number, regexgrps_z.find(cohort->readings[i - 1]->number)->second));
+ }
+ }
}
else {
regexgrps.first = orz;
@@ -552,7 +564,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
regexgrps.first = orz;
++rule.num_fail;
}
- readings_plain.insert(std::make_pair(reading->hash_plain,reading));
+ readings_plain.insert(std::make_pair(reading->hash_plain, reading));
if (reading != cohort->readings[i]) {
cohort->readings[i]->matched_target = reading->matched_target;
@@ -568,7 +580,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
// If none of the readings were valid targets, remove this cohort from the rule's possible cohorts.
if (num_active == 0 && (num_iff == 0 || rule.type != K_IFF)) {
if (!matched_target) {
- --rocit; // We have already incremented rocit earlier, so take one step back...
+ --rocit; // We have already incremented rocit earlier, so take one step back...
rocit = cohortset->erase(rocit); // ...and one step forward again
}
continue;
@@ -588,6 +600,12 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
removed.resize(0);
selected.resize(0);
+ bool swap_ac = (attach_to != cohort);
+ Cohort *ac_c = cohort;
+ if (swap_ac) {
+ std::swap(attach_to, cohort);
+ }
+
// Remember the current state so we can compare later to see if anything has changed
const size_t state_num_readings = cohort->readings.size();
const size_t state_num_removed = cohort->deleted.size();
@@ -595,7 +613,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
bool readings_changed = false;
// This loop acts on the result of the previous loop; letting the rules do their thing on the valid readings.
- for (size_t i=0 ; i<cohort->readings.size() ; ++i) {
+ for (size_t i = 0; i < cohort->readings.size(); ++i) {
Reading *tr = get_sub_reading(cohort->readings[i], rule.sub_reading);
if (!tr) {
tr = cohort->readings[i];
@@ -662,7 +680,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
// Handle all other rule types normally, except that some will break out of the loop as they only make sense to do once per cohort.
else if (good) {
if (type == K_REMOVE) {
- if ((rule.flags & RF_UNMAPLAST) && removed.size() == cohort->readings.size()-1) {
+ if ((rule.flags & RF_UNMAPLAST) && removed.size() == cohort->readings.size() - 1) {
if (unmapReading(reading, rule.number)) {
readings_changed = true;
}
@@ -692,7 +710,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
else if (type == K_REMVARIABLE) {
reading.hit_by.push_back(rule.number);
const TagList names = getTagList(*rule.maplist);
- const_foreach (TagList, names, tter, tter_end) {
+ foreach (tter, names) {
const Tag *tag = *tter;
variables.erase(tag->hash);
if (rule.flags & RF_OUTPUT) {
@@ -728,7 +746,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
if (ei == externals.end()) {
Tag *ext = single_tags.find(rule.varname)->second;
UErrorCode err = U_ZERO_ERROR;
- u_strToUTF8(&cbuffers[0][0], CG3_BUFFER_SIZE-1, 0, ext->tag.c_str(), ext->tag.length(), &err);
+ u_strToUTF8(&cbuffers[0][0], CG3_BUFFER_SIZE - 1, 0, ext->tag.c_str(), ext->tag.length(), &err);
Process& es = externals[rule.varname];
try {
@@ -757,7 +775,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
break;
}
else if (type == K_REMCOHORT) {
- foreach (ReadingList, cohort->readings, iter, iter_end) {
+ foreach (iter, cohort->readings) {
(*iter)->hit_by.push_back(rule.number);
(*iter)->deleted = true;
}
@@ -766,12 +784,26 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
cohort->prev->enclosed.insert(cohort->prev->enclosed.end(), cohort->enclosed.begin(), cohort->enclosed.end());
cohort->enclosed.clear();
}
+ // Remove the cohort from all rules
+ foreach (cs, current.rule_to_cohorts) {
+ cs->erase(cohort);
+ }
+ // Forward all children of this cohort to the parent of this cohort
+ // ToDo: Named relations must be erased
+ while (!cohort->dep_children.empty()) {
+ uint32_t ch = cohort->dep_children.back();
+ attachParentChild(*current.parent->cohort_map[cohort->dep_parent], *current.parent->cohort_map[ch], true, true);
+ cohort->dep_children.erase(ch);
+ }
cohort->type |= CT_REMOVED;
cohort->prev->removed.push_back(cohort);
cohort->detach();
- cohort->parent = 0;
- current.cohorts.erase(current.cohorts.begin()+cohort->local_number);
- foreach (CohortVector, current.cohorts, iter, iter_end) {
+ foreach (cm, current.parent->cohort_map) {
+ cm->second->dep_children.erase(cohort->dep_self);
+ }
+ current.parent->cohort_map.erase(cohort->global_number);
+ current.cohorts.erase(current.cohorts.begin() + cohort->local_number);
+ foreach (iter, current.cohorts) {
(*iter)->local_number = std::distance(current.cohorts.begin(), iter);
}
gWindow->rebuildCohortLinks();
@@ -786,8 +818,16 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
}
index_ruleCohort_no.clear();
cohortset = ¤t.rule_to_cohorts[rule.number];
- rocit = cohortset->find(cohort);
- ++rocit;
+ rocit = cohortset->end();
+ }
+ else if (cohortset->empty()) {
+ rocit = cohortset->end();
+ }
+ else {
+ rocit = cohortset->find(current.cohorts[cohort->local_number]);
+ if (rocit != cohortset->end()) {
+ ++rocit;
+ }
}
readings_changed = true;
break;
@@ -801,32 +841,41 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
Tag *wf = 0;
std::vector<TagList> readings;
- static TagList theTags;
- theTags.clear();
+ BOOST_AUTO(theTags, ss_taglist.get());
getTagList(*rule.maplist, theTags);
- const_foreach (TagList, theTags, tter, tter_end) {
+ foreach (tter, *theTags) {
+ while ((*tter)->type & T_VARSTRING) {
+ *tter = generateVarstringTag(*tter);
+ }
if ((*tter)->type & T_WORDFORM) {
cCohort->wordform = *tter;
wf = *tter;
continue;
}
- assert(wf && "There must be a wordform before any other tags in ADDCOHORT.");
+ if (!wf) {
+ u_fprintf(ux_stderr, "Error: There must be a wordform before any other tags in ADDCOHORT on line %u before input line %u.\n", rule.line, numLines);
+ CG3Quit(1);
+ }
if ((*tter)->type & T_BASEFORM) {
- readings.resize(readings.size()+1);
+ readings.resize(readings.size() + 1);
readings.back().push_back(wf);
}
+ if (readings.empty()) {
+ u_fprintf(ux_stderr, "Error: There must be a baseform after the wordform in ADDCOHORT on line %u before input line %u.\n", rule.line, numLines);
+ CG3Quit(1);
+ }
readings.back().push_back(*tter);
}
- foreach (std::vector<TagList>, readings, rit, rit_end) {
+ foreach (rit, readings) {
Reading *cReading = alloc_reading(cCohort);
++numReadings;
insert_if_exists(cReading->parent->possible_sets, grammar->sets_any);
cReading->hit_by.push_back(rule.number);
cReading->noprint = false;
TagList mappings;
- foreach (TagList, *rit, tter, tter_end) {
+ foreach (tter, *rit) {
uint32_t hash = (*tter)->hash;
while ((*tter)->type & T_VARSTRING) {
*tter = generateVarstringTag(*tter);
@@ -862,12 +911,12 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
current.cohorts.insert(current.cohorts.begin() + cohort->local_number + 1, cCohort);
}
- foreach (CohortVector, current.cohorts, iter, iter_end) {
+ foreach (iter, current.cohorts) {
(*iter)->local_number = std::distance(current.cohorts.begin(), iter);
}
// If the new cohort is now the last cohort, add <<< to it and remove <<< from previous last cohort
if (current.cohorts.back() == cCohort) {
- boost_foreach (Reading *r, current.cohorts[current.cohorts.size()-2]->readings) {
+ boost_foreach (Reading *r, current.cohorts[current.cohorts.size() - 2]->readings) {
delTagFromReading(*r, endtag);
}
boost_foreach (Reading *r, current.cohorts.back()->readings) {
@@ -887,23 +936,248 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
++rocit;
break;
}
+ else if (rule.type == K_SPLITCOHORT) {
+ index_ruleCohort_no.clear();
+
+ std::vector<std::pair<Cohort*, std::vector<TagList> > > cohorts;
+
+ BOOST_AUTO(theTags, ss_taglist.get());
+ getTagList(*rule.maplist, theTags);
+
+ Tag *wf = 0;
+ foreach (tter, *theTags) {
+ if ((*tter)->type & T_WORDFORM) {
+ cohorts.resize(cohorts.size() + 1);
+ cohorts.back().first = alloc_cohort(¤t);
+ cohorts.back().first->global_number = gWindow->cohort_counter++;
+ wf = *tter;
+ while (wf->type & T_VARSTRING) {
+ wf = generateVarstringTag(wf);
+ }
+ cohorts.back().first->wordform = wf;
+ continue;
+ }
+ if (!wf) {
+ u_fprintf(ux_stderr, "Error: There must be a wordform before any other tags in SPLITCOHORT on line %u before input line %u.\n", rule.line, numLines);
+ CG3Quit(1);
+ }
+ }
+
+ uint32_t rel_trg = DEP_NO_PARENT;
+ std::vector<std::pair<uint32_t, uint32_t> > cohort_dep(cohorts.size());
+ cohort_dep.front().second = DEP_NO_PARENT;
+ cohort_dep.back().first = DEP_NO_PARENT;
+ cohort_dep.back().second = cohort_dep.size() - 1;
+ for (size_t i = 1; i < cohort_dep.size() - 1; ++i) {
+ cohort_dep[i].second = i;
+ }
+
+ size_t i = 0;
+ std::vector<TagList> *readings = &cohorts.front().second;
+ Tag *bf = 0;
+ foreach (tter, *theTags) {
+ if ((*tter)->type & T_WORDFORM) {
+ ++i;
+ bf = 0;
+ continue;
+ }
+ if ((*tter)->type & T_BASEFORM) {
+ readings = &cohorts[i - 1].second;
+ readings->resize(readings->size() + 1);
+ readings->back().push_back(cohorts[i - 1].first->wordform);
+ bf = *tter;
+ }
+ if (!bf) {
+ u_fprintf(ux_stderr, "Error: There must be a baseform after the wordform in SPLITCOHORT on line %u before input line %u.\n", rule.line, numLines);
+ CG3Quit(1);
+ }
+
+ UChar dep_self[12] = {};
+ UChar dep_parent[12] = {};
+ if (u_sscanf((*tter)->tag.c_str(), "%[0-9cd]->%[0-9pm]", &dep_self, &dep_parent) == 2) {
+ if (dep_self[0] == 'c' || dep_self[0] == 'd') {
+ cohort_dep[i - 1].first = DEP_NO_PARENT;
+ if (rel_trg == DEP_NO_PARENT) {
+ rel_trg = i - 1;
+ }
+ }
+ else if (u_sscanf(dep_self, "%i", &cohort_dep[i - 1].first) != 1) {
+ u_fprintf(ux_stderr, "Error: SPLITCOHORT dependency mapping dep_self was not valid on line %u before input line %u.\n", rule.line, numLines);
+ CG3Quit(1);
+ }
+ if (dep_parent[0] == 'p' || dep_parent[0] == 'm') {
+ cohort_dep[i - 1].second = DEP_NO_PARENT;
+ }
+ else if (u_sscanf(dep_parent, "%i", &cohort_dep[i - 1].second) != 1) {
+ u_fprintf(ux_stderr, "Error: SPLITCOHORT dependency mapping dep_parent was not valid on line %u before input line %u.\n", rule.line, numLines);
+ CG3Quit(1);
+ }
+ continue;
+ }
+ if ((*tter)->tag.size() == 3 && (*tter)->tag[0] == 'R' && (*tter)->tag[1] == ':' && (*tter)->tag[2] == '*') {
+ rel_trg = i - 1;
+ continue;
+ }
+ readings->back().push_back(*tter);
+ }
+
+ if (rel_trg == DEP_NO_PARENT) {
+ rel_trg = cohorts.size() - 1;
+ }
+
+ for (size_t i = 0; i < cohorts.size(); ++i) {
+ Cohort *cCohort = cohorts[i].first;
+ readings = &cohorts[i].second;
+
+ foreach (rit, *readings) {
+ TagList& tags = *rit;
+ Reading *cReading = alloc_reading(cCohort);
+ ++numReadings;
+ insert_if_exists(cReading->parent->possible_sets, grammar->sets_any);
+ cReading->hit_by.push_back(rule.number);
+ cReading->noprint = false;
+ TagList mappings;
+
+ for (size_t i = 0; i < tags.size(); ++i) {
+ if (tags[i]->hash == grammar->tag_any) {
+ uint32Vector& nt = cohort->readings.front()->tags_list;
+ if (nt.size() <= 2) {
+ continue;
+ }
+ tags.reserve(tags.size() + nt.size() - 2);
+ tags[i] = single_tags[nt[2]];
+ for (size_t j = 3, k = 1; j < nt.size(); ++j) {
+ if (single_tags[nt[j]]->type & T_DEPENDENCY) {
+ continue;
+ }
+ tags.insert(tags.begin() + i + k, single_tags[nt[j]]);
+ ++k;
+ }
+ }
+ }
+
+ foreach (tter, tags) {
+ uint32_t hash = (*tter)->hash;
+ while ((*tter)->type & T_VARSTRING) {
+ *tter = generateVarstringTag(*tter);
+ }
+ if ((*tter)->type & T_MAPPING || (*tter)->tag[0] == grammar->mapping_prefix) {
+ mappings.push_back(*tter);
+ }
+ else {
+ hash = addTagToReading(*cReading, hash);
+ }
+ if (updateValidRules(rules, intersects, hash, *cReading)) {
+ iter_rules = intersects.find(rule.number);
+ iter_rules_end = intersects.end();
+ }
+ }
+ if (!mappings.empty()) {
+ splitMappings(mappings, *cCohort, *cReading);
+ }
+ cCohort->appendReading(cReading);
+ }
+
+ if (cCohort->readings.empty()) {
+ initEmptyCohort(*cCohort);
+ }
+
+ current.parent->dep_window[cCohort->global_number] = cCohort;
+ current.parent->cohort_map[cCohort->global_number] = cCohort;
+
+ current.cohorts.insert(current.cohorts.begin() + cohort->local_number + i + 1, cCohort);
+ }
+
+ for (size_t i = 0; i < cohorts.size(); ++i) {
+ Cohort *cCohort = cohorts[i].first;
+
+ if (cohort_dep[i].first == DEP_NO_PARENT) {
+ while (!cohort->dep_children.empty()) {
+ uint32_t ch = cohort->dep_children.back();
+ attachParentChild(*cCohort, *current.parent->cohort_map[ch], true, true);
+ cohort->dep_children.erase(ch); // Just in case the attachment can't be made for some reason
+ }
+ }
+
+ if (cohort_dep[i].second == DEP_NO_PARENT) {
+ attachParentChild(*current.parent->cohort_map[cohort->dep_parent], *cCohort, true, true);
+ }
+ else {
+ attachParentChild(*current.parent->cohort_map[cohorts.front().first->global_number + cohort_dep[i].second - 1], *cCohort, true, true);
+ }
+
+ // Re-attach all named relations to the dependency tail or R:* cohort
+ if (rel_trg == i && (cohort->type & CT_RELATED)) {
+ cCohort->type |= CT_RELATED;
+ cCohort->relations.swap(cohort->relations);
+
+ std::pair<SingleWindow **, size_t> swss[3] = {
+ std::make_pair(&gWindow->previous[0], gWindow->previous.size()),
+ std::make_pair(&gWindow->current, static_cast<size_t>(1)),
+ std::make_pair(&gWindow->next[0], gWindow->next.size()),
+ };
+ for (size_t w = 0; w < 3; ++w) {
+ for (size_t sw = 0; sw < swss[w].second; ++sw) {
+ foreach (ch, swss[w].first[sw]->cohorts) {
+ foreach (rel, (*ch)->relations) {
+ if (rel->second.count(cohort->global_number)) {
+ rel->second.erase(cohort->global_number);
+ rel->second.insert(cCohort->global_number);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Remove the source cohort
+ foreach (iter, cohort->readings) {
+ (*iter)->hit_by.push_back(rule.number);
+ (*iter)->deleted = true;
+ }
+ // Move any enclosed parentheses to the previous cohort
+ if (!cohort->enclosed.empty()) {
+ cohort->prev->enclosed.insert(cohort->prev->enclosed.end(), cohort->enclosed.begin(), cohort->enclosed.end());
+ cohort->enclosed.clear();
+ }
+ cohort->type |= CT_REMOVED;
+ cohort->prev->removed.push_back(cohort);
+ cohort->detach();
+ foreach (cm, current.parent->cohort_map) {
+ cm->second->dep_children.erase(cohort->dep_self);
+ }
+ current.parent->cohort_map.erase(cohort->global_number);
+ current.cohorts.erase(current.cohorts.begin() + cohort->local_number);
+
+ // Reindex and rebuild the window
+ foreach (iter, current.cohorts) {
+ (*iter)->local_number = std::distance(current.cohorts.begin(), iter);
+ }
+ gWindow->rebuildCohortLinks();
+ indexSingleWindow(current);
+ readings_changed = true;
+
+ cohortset = ¤t.rule_to_cohorts[rule.number];
+ rocit = cohortset->find(current.cohorts[cohort->local_number]);
+ ++rocit;
+ break;
+ }
else if (rule.type == K_ADD || rule.type == K_MAP) {
index_ruleCohort_no.clear();
reading.hit_by.push_back(rule.number);
reading.noprint = false;
- static TagList mappings;
- mappings.clear();
- static TagList theTags;
- theTags.clear();
+ BOOST_AUTO(mappings, ss_taglist.get());
+ BOOST_AUTO(theTags, ss_taglist.get());
getTagList(*rule.maplist, theTags);
- foreach(TagList, theTags, tter, tter_end) {
+ foreach (tter, *theTags) {
uint32_t hash = (*tter)->hash;
while ((*tter)->type & T_VARSTRING) {
*tter = generateVarstringTag(*tter);
}
if ((*tter)->type & T_MAPPING || (*tter)->tag[0] == grammar->mapping_prefix) {
- mappings.push_back(*tter);
+ mappings->push_back(*tter);
}
else {
hash = addTagToReading(reading, *tter);
@@ -913,7 +1187,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
iter_rules_end = intersects.end();
}
}
- if (!mappings.empty()) {
+ if (!mappings->empty()) {
splitMappings(mappings, *cohort, reading, rule.type == K_MAP);
}
if (rule.type == K_MAP) {
@@ -937,19 +1211,17 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
reading.tags_list.push_back(cohort->wordform->hash);
reading.tags_list.push_back(reading.baseform);
reflowReading(reading);
- static TagList mappings;
- mappings.clear();
- static TagList theTags;
- theTags.clear();
+ BOOST_AUTO(mappings, ss_taglist.get());
+ BOOST_AUTO(theTags, ss_taglist.get());
getTagList(*rule.maplist, theTags);
- foreach (TagList, theTags, tter, tter_end) {
+ foreach (tter, *theTags) {
uint32_t hash = (*tter)->hash;
while ((*tter)->type & T_VARSTRING) {
*tter = generateVarstringTag(*tter);
}
if ((*tter)->type & T_MAPPING || (*tter)->tag[0] == grammar->mapping_prefix) {
- mappings.push_back(*tter);
+ mappings->push_back(*tter);
}
else {
hash = addTagToReading(reading, *tter);
@@ -959,7 +1231,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
iter_rules_end = intersects.end();
}
}
- if (!mappings.empty()) {
+ if (!mappings->empty()) {
splitMappings(mappings, *cohort, reading, true);
}
if (reading.hash != state_hash) {
@@ -970,24 +1242,21 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
// ToDo: Check whether this substitution will do nothing at all to the end result
// ToDo: Not actually...instead, test whether any reading in the cohort already is the end result
- size_t tpos = std::numeric_limits<size_t>::max();
- size_t tagb = reading.tags_list.size();
- static TagList theTags;
- theTags.clear();
+ BOOST_AUTO(theTags, ss_taglist.get());
getTagList(*rule.sublist, theTags);
// Modify the list of tags to remove to be the actual list of tags present, including matching regex and icase tags
- for (TagList::iterator it = theTags.begin() ; it != theTags.end() ; ) {
+ for (TagList::iterator it = theTags->begin(); it != theTags->end();) {
if (reading.tags.find((*it)->hash) == reading.tags.end()) {
- const Tag* tt = *it;
- it = theTags.erase(it);
+ const Tag *tt = *it;
+ it = theTags->erase(it);
if (tt->type & T_SPECIAL) {
if (regexgrps.second == 0) {
regexgrps.second = ®exgrps_store[used_regex];
}
uint32_t stag = doesTagMatchReading(reading, *tt, false, true);
if (stag) {
- theTags.insert(it, single_tags.find(stag)->second);
+ theTags->insert(it, single_tags.find(stag)->second);
}
}
continue;
@@ -996,25 +1265,62 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
}
// Perform the tag removal, remembering the position of the final removed tag for use as insertion spot
- const_foreach (TagList, theTags, tter, tter_end) {
- if (tpos >= reading.tags_list.size()) {
- foreach (Reading::tags_list_t, reading.tags_list, tfind, tfind_end) {
- if (*tfind == (*tter)->hash) {
- tpos = std::distance(reading.tags_list.begin(), tfind);
- --tpos;
+ size_t tpos = std::numeric_limits<size_t>::max();
+ bool plain = true;
+ for (size_t i = 0; i < reading.tags_list.size();) {
+ BOOST_AUTO(&remter, reading.tags_list[i]);
+
+ if (plain && remter == (*theTags->begin())->hash) {
+ if (reading.baseform == remter) {
+ reading.baseform = 0;
+ }
+ remter = substtag;
+ tpos = i;
+ for (size_t j = 1; j < theTags->size() && i < reading.tags_list.size(); ++j, ++i) {
+ BOOST_AUTO(&remter, reading.tags_list[i]);
+ BOOST_AUTO(tter, (*theTags)[j]->hash);
+ if (remter != tter) {
+ plain = false;
break;
}
+ reading.tags_list.erase(reading.tags_list.begin() + i);
+ reading.tags.erase(tter);
+ if (reading.baseform == tter) {
+ reading.baseform = 0;
+ }
}
+ continue;
}
- erase(reading.tags_list, (*tter)->hash);
- reading.tags.erase((*tter)->hash);
- if (reading.baseform == (*tter)->hash) {
- reading.baseform = 0;
+
+ foreach (tter, *theTags) {
+ if (remter != (*tter)->hash) {
+ continue;
+ }
+ tpos = i;
+ remter = substtag;
+ reading.tags.erase((*tter)->hash);
+ if (reading.baseform == (*tter)->hash) {
+ reading.baseform = 0;
+ }
}
+
+ ++i;
}
// Should Substitute really do nothing if no tags were removed? 2013-10-21, Eckhard says this is expected behavior.
- if (tagb != reading.tags_list.size()) {
+ if (tpos != std::numeric_limits<size_t>::max()) {
+ if (!plain) {
+ for (size_t i = 0; i < reading.tags_list.size() && i < tpos;) {
+ if (reading.tags_list[i] == substtag) {
+ reading.tags_list.erase(reading.tags_list.begin() + i);
+ --tpos;
+ }
+ else {
+ ++i;
+ }
+ }
+ }
+
Tag *wf = 0;
index_ruleCohort_no.clear();
reading.hit_by.push_back(rule.number);
@@ -1023,40 +1329,46 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
tpos = reading.tags_list.size() - 1;
}
++tpos;
- static TagList mappings;
- mappings.clear();
- static TagList theTags;
- theTags.clear();
+ BOOST_AUTO(mappings, ss_taglist.get());
+ BOOST_AUTO(theTags, ss_taglist.get());
getTagList(*rule.maplist, theTags);
- const_foreach (TagList, theTags, tter, tter_end) {
- Tag *tag = *tter;
- if (tag->type & T_VARSTRING) {
- tag = generateVarstringTag(tag);
- }
- if (tag->hash == grammar->tag_any) {
- break;
- }
- if (reading.tags.find(tag->hash) != reading.tags.end()) {
- continue;
- }
- if (tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix) {
- mappings.push_back(tag);
- }
- else {
- if (tag->type & T_WORDFORM) {
- wf = tag;
+ for (size_t i = 0; i < reading.tags_list.size();) {
+ if (reading.tags_list[i] == substtag) {
+ reading.tags_list.erase(reading.tags_list.begin() + i);
+ tpos = i;
+
+ foreach (tter, *theTags) {
+ Tag *tag = *tter;
+ if (tag->type & T_VARSTRING) {
+ tag = generateVarstringTag(tag);
+ }
+ if (tag->hash == grammar->tag_any) {
+ break;
+ }
+ if (tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix) {
+ mappings->push_back(tag);
+ }
+ else {
+ if (tag->type & T_WORDFORM) {
+ wf = tag;
+ }
+ reading.tags_list.insert(reading.tags_list.begin() + tpos, tag->hash);
+ ++tpos;
+ }
+ if (updateValidRules(rules, intersects, tag->hash, reading)) {
+ iter_rules = intersects.find(rule.number);
+ iter_rules_end = intersects.end();
+ }
}
- reading.tags_list.insert(reading.tags_list.begin()+tpos, tag->hash);
- ++tpos;
}
- if (updateValidRules(rules, intersects, tag->hash, reading)) {
- iter_rules = intersects.find(rule.number);
- iter_rules_end = intersects.end();
+ else {
+ ++i;
}
}
reflowReading(reading);
- if (!mappings.empty()) {
+
+ if (!mappings->empty()) {
splitMappings(mappings, *cohort, reading, true);
}
if (wf && wf != reading.parent->wordform) {
@@ -1096,14 +1408,16 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
Tag *bf = 0;
std::vector<TagList> readings;
- static TagList theTags;
- theTags.clear();
+ BOOST_AUTO(theTags, ss_taglist.get());
getTagList(*rule.maplist, theTags);
- const_foreach (TagList, theTags, tter, tter_end) {
+ foreach (tter, *theTags) {
+ while ((*tter)->type & T_VARSTRING) {
+ *tter = generateVarstringTag(*tter);
+ }
if ((*tter)->type & T_BASEFORM) {
bf = *tter;
- readings.resize(readings.size()+1);
+ readings.resize(readings.size() + 1);
}
if (bf == 0) {
u_fprintf(ux_stderr, "Error: There must be a baseform before any other tags in APPEND on line %u.\n", rule.line);
@@ -1112,7 +1426,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
readings.back().push_back(*tter);
}
- foreach (std::vector<TagList>, readings, rit, rit_end) {
+ foreach (rit, readings) {
Reading *cReading = alloc_reading(cohort);
++numReadings;
insert_if_exists(cReading->parent->possible_sets, grammar->sets_any);
@@ -1120,7 +1434,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
cReading->hit_by.push_back(rule.number);
cReading->noprint = false;
TagList mappings;
- foreach (TagList, *rit, tter, tter_end) {
+ foreach (tter, *rit) {
uint32_t hash = (*tter)->hash;
while ((*tter)->type & T_VARSTRING) {
*tter = generateVarstringTag(*tter);
@@ -1143,7 +1457,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
}
if (cohort->readings.size() > 1) {
- foreach (ReadingList, cohort->readings, rit, rit_end) {
+ foreach (rit, cohort->readings) {
if ((*rit)->noprint) {
delete *rit;
rit = cohort->readings.erase(rit);
@@ -1161,33 +1475,30 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
index_ruleCohort_no.clear();
cReading->hit_by.push_back(rule.number);
cReading->noprint = false;
- const_foreach (Reading::tags_list_t, reading.tags_list, iter, iter_end) {
+ foreach (iter, reading.tags_list) {
addTagToReading(*cReading, *iter);
}
if (rule.sublist) {
// ToDo: Use the code from Substitute to make this match and remove special tags
- static TagList excepts;
- excepts.clear();
+ BOOST_AUTO(excepts, ss_taglist.get());
getTagList(*rule.sublist, excepts);
- const_foreach (TagList, excepts, tter, tter_end) {
+ foreach (tter, *excepts) {
delTagFromReading(*cReading, *tter);
}
}
- static TagList mappings;
- mappings.clear();
- static TagList theTags;
- theTags.clear();
+ BOOST_AUTO(mappings, ss_taglist.get());
+ BOOST_AUTO(theTags, ss_taglist.get());
getTagList(*rule.maplist, theTags);
- foreach (TagList, theTags, tter, tter_end) {
+ foreach (tter, *theTags) {
uint32_t hash = (*tter)->hash;
while ((*tter)->type & T_VARSTRING) {
*tter = generateVarstringTag(*tter);
}
if ((*tter)->type & T_MAPPING || (*tter)->tag[0] == grammar->mapping_prefix) {
- mappings.push_back(*tter);
+ mappings->push_back(*tter);
}
else {
hash = addTagToReading(*cReading, *tter);
@@ -1197,31 +1508,40 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
iter_rules_end = intersects.end();
}
}
- if (!mappings.empty()) {
+ if (!mappings->empty()) {
splitMappings(mappings, *cohort, *cReading, true);
}
readings_changed = true;
}
else if (type == K_SETPARENT || type == K_SETCHILD) {
int32_t orgoffset = rule.dep_target->offset;
- uint32SortedVector seen_targets;
+ BOOST_AUTO(seen_targets, ss_u32sv.get());
seen_barrier = false;
bool attached = false;
Cohort *target = cohort;
while (!attached) {
+ BOOST_AUTO(utags, ss_utags.get());
+ BOOST_AUTO(usets, ss_u32sv.get());
+ *utags = *unif_tags;
+ *usets = *unif_sets;
+
Cohort *attach = 0;
- seen_targets.insert(target->global_number);
+ seen_targets->insert(target->global_number);
dep_deep_seen.clear();
+ tmpl_cntxs.clear();
+ tmpl_cntx_pos = 0;
attach_to = 0;
if (runContextualTest(target->parent, target->local_number, rule.dep_target, &attach) && attach) {
if (attach_to) {
attach = attach_to;
}
bool good = true;
- foreach (ContextList, rule.dep_tests, it, it_end) {
+ foreach (it, rule.dep_tests) {
mark = attach;
dep_deep_seen.clear();
+ tmpl_cntxs.clear();
+ tmpl_cntx_pos = 0;
test_good = (runContextualTest(attach->parent, attach->local_number, *it) != 0);
if (!test_good) {
good = test_good;
@@ -1248,7 +1568,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
if (seen_barrier || (rule.flags & RF_NEAREST)) {
break;
}
- if (seen_targets.find(attach->global_number) != seen_targets.end()) {
+ if (seen_targets->count(attach->global_number)) {
// We've found a cohort we have seen before...
// We assume running the test again would result in the same, so don't bother.
break;
@@ -1256,6 +1576,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
if (!attached) {
// Did not successfully attach due to loop restrictions; look onwards from here
target = attach;
+ unif_tags->swap(utags);
+ unif_sets->swap(usets);
if (rule.dep_target->offset != 0) {
// Temporarily set offset to +/- 1
rule.dep_target->offset = ((rule.dep_target->offset < 0) ? -1 : 1);
@@ -1273,15 +1595,19 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
// ToDo: ** tests will not correctly work for MOVE/SWITCH; cannot move cohorts between windows
Cohort *attach = 0;
dep_deep_seen.clear();
+ tmpl_cntxs.clear();
+ tmpl_cntx_pos = 0;
attach_to = 0;
if (runContextualTest(¤t, c, rule.dep_target, &attach) && attach && cohort->parent == attach->parent) {
if (attach_to) {
attach = attach_to;
}
bool good = true;
- foreach (ContextList, rule.dep_tests, it, it_end) {
+ foreach (it, rule.dep_tests) {
mark = attach;
dep_deep_seen.clear();
+ tmpl_cntxs.clear();
+ tmpl_cntx_pos = 0;
test_good = (runContextualTest(attach->parent, attach->local_number, *it) != 0);
if (!test_good) {
good = test_good;
@@ -1301,17 +1627,17 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
}
current.cohorts[cohort->local_number] = attach;
current.cohorts[attach->local_number] = cohort;
- foreach (ReadingList, cohort->readings, iter, iter_end) {
+ foreach (iter, cohort->readings) {
(*iter)->hit_by.push_back(rule.number);
}
- foreach (ReadingList, attach->readings, iter, iter_end) {
+ foreach (iter, attach->readings) {
(*iter)->hit_by.push_back(rule.number);
}
}
else {
CohortVector cohorts;
if (rule.childset1) {
- for (CohortVector::iterator iter = current.cohorts.begin() ; iter != current.cohorts.end() ; ) {
+ for (CohortVector::iterator iter = current.cohorts.begin(); iter != current.cohorts.end();) {
if (isChildOf(*iter, cohort) && doesSetMatchCohortNormal(**iter, rule.childset1)) {
cohorts.push_back(*iter);
iter = current.cohorts.erase(iter);
@@ -1323,16 +1649,16 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
}
else {
cohorts.push_back(cohort);
- current.cohorts.erase(current.cohorts.begin()+cohort->local_number);
+ current.cohorts.erase(current.cohorts.begin() + cohort->local_number);
}
- foreach (CohortVector, current.cohorts, iter, iter_end) {
+ foreach (iter, current.cohorts) {
(*iter)->local_number = std::distance(current.cohorts.begin(), iter);
}
CohortVector edges;
if (rule.childset2) {
- foreach (CohortVector, current.cohorts, iter, iter_end) {
+ foreach (iter, current.cohorts) {
if (isChildOf(*iter, attach) && doesSetMatchCohortNormal(**iter, rule.childset2)) {
edges.push_back(*iter);
}
@@ -1349,18 +1675,18 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
}
}
else if (type == K_MOVE_AFTER) {
- spot = edges.back()->local_number+1;
+ spot = edges.back()->local_number + 1;
}
while (!cohorts.empty()) {
- foreach (ReadingList, cohorts.back()->readings, iter, iter_end) {
+ foreach (iter, cohorts.back()->readings) {
(*iter)->hit_by.push_back(rule.number);
}
- current.cohorts.insert(current.cohorts.begin()+spot, cohorts.back());
+ current.cohorts.insert(current.cohorts.begin() + spot, cohorts.back());
cohorts.pop_back();
}
}
- foreach (CohortVector, current.cohorts, iter, iter_end) {
+ foreach (iter, current.cohorts) {
(*iter)->local_number = std::distance(current.cohorts.begin(), iter);
}
gWindow->rebuildCohortLinks();
@@ -1378,7 +1704,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
attach = attach_to;
}
bool good = true;
- foreach (ContextList, rule.dep_tests, it, it_end) {
+ foreach (it, rule.dep_tests) {
mark = attach;
dep_deep_seen.clear();
test_good = (runContextualTest(attach->parent, attach->local_number, *it) != 0);
@@ -1390,10 +1716,10 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
if (good) {
swapper<Cohort*> sw((rule.flags & RF_REVERSE) != 0, attach, cohort);
bool rel_did_anything = false;
- static TagList theTags;
- theTags.clear();
+ BOOST_AUTO(theTags, ss_taglist.get());
getTagList(*rule.maplist, theTags);
- const_foreach (TagList, theTags, tter, tter_end) {
+
+ foreach (tter, *theTags) {
if (type == K_ADDRELATION) {
attach->type |= CT_RELATED;
cohort->type |= CT_RELATED;
@@ -1421,15 +1747,19 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
else if (type == K_ADDRELATIONS || type == K_SETRELATIONS || type == K_REMRELATIONS) {
Cohort *attach = 0;
dep_deep_seen.clear();
+ tmpl_cntxs.clear();
+ tmpl_cntx_pos = 0;
attach_to = 0;
if (runContextualTest(¤t, c, rule.dep_target, &attach) && attach) {
if (attach_to) {
attach = attach_to;
}
bool good = true;
- foreach (ContextList, rule.dep_tests, it, it_end) {
+ foreach (it, rule.dep_tests) {
mark = attach;
dep_deep_seen.clear();
+ tmpl_cntxs.clear();
+ tmpl_cntx_pos = 0;
test_good = (runContextualTest(attach->parent, attach->local_number, *it) != 0);
if (!test_good) {
good = test_good;
@@ -1440,15 +1770,13 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
swapper<Cohort*> sw((rule.flags & RF_REVERSE) != 0, attach, cohort);
bool rel_did_anything = false;
- static TagList sublist;
- sublist.clear();
+ BOOST_AUTO(sublist, ss_taglist.get());
getTagList(*rule.sublist, sublist);
- static TagList maplist;
- maplist.clear();
+ BOOST_AUTO(maplist, ss_taglist.get());
getTagList(*rule.maplist, maplist);
- const_foreach (TagList, maplist, tter, tter_end) {
+ foreach (tter, *maplist) {
if (type == K_ADDRELATIONS) {
cohort->type |= CT_RELATED;
rel_did_anything |= cohort->addRelation((*tter)->hash, attach->global_number);
@@ -1461,7 +1789,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
rel_did_anything |= cohort->remRelation((*tter)->hash, attach->global_number);
}
}
- const_foreach (TagList, sublist, tter, tter_end) {
+ foreach (tter, *sublist) {
if (type == K_ADDRELATIONS) {
attach->type |= CT_RELATED;
rel_did_anything |= attach->addRelation((*tter)->hash, cohort->global_number);
@@ -1503,7 +1831,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
size_t oz = cohort->readings.size();
while (!removed.empty()) {
removed.back()->deleted = true;
- for (size_t i=0 ; i<oz ; ++i) {
+ for (size_t i = 0; i < oz; ++i) {
if (cohort->readings[i] == removed.back()) {
--oz;
std::swap(cohort->readings[i], cohort->readings[oz]);
@@ -1526,10 +1854,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
}
// Cohort state has changed, so mark that the section did something
- if (state_num_readings != cohort->readings.size()
- || state_num_removed != cohort->deleted.size()
- || state_num_delayed != cohort->delayed.size()
- || readings_changed) {
+ if (state_num_readings != cohort->readings.size() || state_num_removed != cohort->deleted.size() || state_num_delayed != cohort->delayed.size() || readings_changed) {
if (!(rule.flags & RF_NOITERATE) && section_max_count != 1) {
section_did_something = true;
}
@@ -1539,6 +1864,19 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
if (delimited) {
break;
}
+
+ if (swap_ac) {
+ cohort = ac_c;
+ if (cohortset->empty()) {
+ rocit = cohortset->end();
+ }
+ else {
+ rocit = cohortset->find(current.cohorts[cohort->local_number]);
+ if (rocit != cohortset->end()) {
+ ++rocit;
+ }
+ }
+ }
}
if (statistics) {
@@ -1569,11 +1907,11 @@ uint32_t GrammarApplicator::runGrammarOnSingleWindow(SingleWindow& current) {
}
if (!grammar->rules.empty() && !no_sections) {
- std::map<uint32_t,uint32_t> counter;
+ std::map<uint32_t, uint32_t> counter;
// Caveat: This may look as if it is not recursing previous sections, but those rules are preprocessed into the successive sections so they are actually run.
RSType::iterator iter = runsections.begin();
RSType::iterator iter_end = runsections.end();
- for (; iter != iter_end ;) {
+ for (; iter != iter_end;) {
if (iter->first < 0 || (section_max_count && counter[iter->first] >= section_max_count)) {
++iter;
continue;
@@ -1607,10 +1945,10 @@ void GrammarApplicator::runGrammarOnWindow() {
SingleWindow *current = gWindow->current;
did_final_enclosure = false;
- const_foreach (uint32FlatHashMap, current->variables_set, vit, vit_end) {
+ foreach (vit, current->variables_set) {
variables[vit->first] = vit->second;
}
- const_foreach (uint32FlatHashSet, current->variables_rem, vit, vit_end) {
+ foreach (vit, current->variables_rem) {
variables.erase(*vit);
}
@@ -1619,7 +1957,7 @@ void GrammarApplicator::runGrammarOnWindow() {
gWindow->dep_map.clear();
gWindow->dep_window.clear();
if (!input_eof && !gWindow->next.empty() && gWindow->next.back()->cohorts.size() > 1) {
- foreach (CohortVector, gWindow->next.back()->cohorts, iter, iter_end) {
+ foreach (iter, gWindow->next.back()->cohorts) {
Cohort *cohort = *iter;
gWindow->dep_window[cohort->global_number] = cohort;
}
@@ -1630,8 +1968,8 @@ void GrammarApplicator::runGrammarOnWindow() {
}
if (!grammar->parentheses.empty()) {
- label_scanParentheses:
- reverse_foreach (CohortVector, current->cohorts, iter, iter_end) {
+ label_scanParentheses:
+ reverse_foreach (iter, current->cohorts) {
Cohort *c = *iter;
if (c->is_pleft == 0) {
continue;
@@ -1645,7 +1983,7 @@ void GrammarApplicator::runGrammarOnWindow() {
++right;
bool found = false;
CohortVector encs;
- for (; right != current->cohorts.end() ; ++right) {
+ for (; right != current->cohorts.end(); ++right) {
Cohort *s = *right;
encs.push_back(s);
if (s->is_pright == p->second) {
@@ -1661,17 +1999,17 @@ void GrammarApplicator::runGrammarOnWindow() {
--left;
uint32_t lc = (*left)->local_number;
++right;
- for (; right != current->cohorts.end() ; ++right) {
+ for (; right != current->cohorts.end(); ++right) {
*left = *right;
(*left)->local_number = lc;
++lc;
++left;
}
current->cohorts.resize(current->cohorts.size() - encs.size());
- foreach (CohortVector, encs, eiter, eiter_end) {
+ foreach (eiter, encs) {
(*eiter)->type |= CT_ENCLOSED;
}
- foreach (CohortVector, c->enclosed, eiter2, eiter2_end) {
+ foreach (eiter2, c->enclosed) {
encs.push_back(*eiter2);
}
c->enclosed = encs;
@@ -1704,9 +2042,9 @@ label_runGrammarOnWindow_begin:
if (trace_encl) {
uint32_t hitpass = std::numeric_limits<uint32_t>::max() - pass;
size_t nc = current->cohorts.size();
- for (size_t i=0 ; i<nc ; ++i) {
+ for (size_t i = 0; i < nc; ++i) {
Cohort *c = current->cohorts[i];
- foreach (ReadingList, c->readings, rit, rit_end) {
+ foreach (rit, c->readings) {
(*rit)->hit_by.push_back(hitpass);
}
}
@@ -1719,25 +2057,25 @@ label_runGrammarOnWindow_begin:
if (!grammar->parentheses.empty() && current->has_enclosures) {
size_t nc = current->cohorts.size();
- for (size_t i=0 ; i<nc ; ++i) {
+ for (size_t i = 0; i < nc; ++i) {
Cohort *c = current->cohorts[i];
if (!c->enclosed.empty()) {
current->cohorts.resize(current->cohorts.size() + c->enclosed.size(), 0);
size_t ne = c->enclosed.size();
- for (size_t j=nc-1 ; j>i ; --j) {
- current->cohorts[j+ne] = current->cohorts[j];
- current->cohorts[j+ne]->local_number = j+ne;
+ for (size_t j = nc - 1; j > i; --j) {
+ current->cohorts[j + ne] = current->cohorts[j];
+ current->cohorts[j + ne]->local_number = j + ne;
}
- for (size_t j=0 ; j<ne ; ++j) {
- current->cohorts[i+j+1] = c->enclosed[j];
- current->cohorts[i+j+1]->local_number = i+j+1;
- current->cohorts[i+j+1]->parent = current;
- current->cohorts[i+j+1]->type &= ~CT_ENCLOSED;
+ for (size_t j = 0; j < ne; ++j) {
+ current->cohorts[i + j + 1] = c->enclosed[j];
+ current->cohorts[i + j + 1]->local_number = i + j + 1;
+ current->cohorts[i + j + 1]->parent = current;
+ current->cohorts[i + j + 1]->type &= ~CT_ENCLOSED;
}
par_left_tag = c->enclosed[0]->is_pleft;
- par_right_tag = c->enclosed[ne-1]->is_pright;
- par_left_pos = i+1;
- par_right_pos = i+ne;
+ par_right_tag = c->enclosed[ne - 1]->is_pright;
+ par_left_pos = i + 1;
+ par_right_pos = i + ne;
c->enclosed.clear();
goto label_runGrammarOnWindow_begin;
}
@@ -1752,5 +2090,4 @@ label_runGrammarOnWindow_begin:
}
}
}
-
}
diff --git a/src/GrammarWriter.cpp b/src/GrammarWriter.cpp
index 389afcc..e7dcdc2 100644
--- a/src/GrammarWriter.cpp
+++ b/src/GrammarWriter.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -58,7 +58,7 @@ void GrammarWriter::printSet(UFILE *output, const Set& curset) {
if (tags.size() > 1) {
u_fprintf(output, "(");
}
- boost_foreach (const Tag* tag, tags) {
+ boost_foreach (const Tag *tag, tags) {
printTag(output, *tag);
u_fprintf(output, " ");
}
@@ -71,7 +71,7 @@ void GrammarWriter::printSet(UFILE *output, const Set& curset) {
}
else {
used_sets.insert(curset.number);
- for (uint32_t i=0;i<curset.sets.size();i++) {
+ for (uint32_t i = 0; i < curset.sets.size(); i++) {
printSet(output, *(grammar->sets_list[curset.sets[i]]));
}
if (statistics) {
@@ -88,8 +88,8 @@ void GrammarWriter::printSet(UFILE *output, const Set& curset) {
}
u_fprintf(output, "SET %S = ", n);
u_fprintf(output, "%S ", grammar->sets_list[curset.sets[0]]->name.c_str());
- for (uint32_t i=0;i<curset.sets.size()-1;i++) {
- u_fprintf(output, "%S %S ", stringbits[curset.set_ops[i]].getTerminatedBuffer(), grammar->sets_list[curset.sets[i+1]]->name.c_str());
+ for (uint32_t i = 0; i < curset.sets.size() - 1; i++) {
+ u_fprintf(output, "%S %S ", stringbits[curset.set_ops[i]].getTerminatedBuffer(), grammar->sets_list[curset.sets[i + 1]]->name.c_str());
}
u_fprintf(output, " ;\n\n");
}
@@ -139,7 +139,7 @@ int GrammarWriter::writeGrammar(UFILE *output) {
if (!grammar->preferred_targets.empty()) {
u_fprintf(output, "PREFERRED-TARGETS = ");
uint32Vector::const_iterator iter;
- for (iter = grammar->preferred_targets.begin() ; iter != grammar->preferred_targets.end() ; iter++ ) {
+ for (iter = grammar->preferred_targets.begin(); iter != grammar->preferred_targets.end(); iter++) {
printTag(output, *(grammar->single_tags.find(*iter)->second));
u_fprintf(output, " ");
}
@@ -171,7 +171,7 @@ int GrammarWriter::writeGrammar(UFILE *output) {
//*/
bool found = false;
- const_foreach (RuleVector, grammar->rule_by_number, rule_iter, rule_iter_end) {
+ foreach (rule_iter, grammar->rule_by_number) {
const Rule& r = **rule_iter;
if (r.section == -1) {
if (!found) {
@@ -182,9 +182,9 @@ int GrammarWriter::writeGrammar(UFILE *output) {
u_fprintf(output, " ;\n");
}
}
- const_foreach (uint32Vector, grammar->sections, isec, isec_end) {
+ foreach (isec, grammar->sections) {
found = false;
- const_foreach (RuleVector, grammar->rule_by_number, rule_iter, rule_iter_end) {
+ foreach (rule_iter, grammar->rule_by_number) {
const Rule& r = **rule_iter;
if (r.section == (int32_t)*isec) {
if (!found) {
@@ -197,7 +197,7 @@ int GrammarWriter::writeGrammar(UFILE *output) {
}
}
found = false;
- const_foreach (RuleVector, grammar->rule_by_number, rule_iter, rule_iter_end) {
+ foreach (rule_iter, grammar->rule_by_number) {
const Rule& r = **rule_iter;
if (r.section == -2) {
if (!found) {
@@ -209,7 +209,7 @@ int GrammarWriter::writeGrammar(UFILE *output) {
}
}
found = false;
- const_foreach (RuleVector, grammar->rule_by_number, rule_iter, rule_iter_end) {
+ foreach (rule_iter, grammar->rule_by_number) {
const Rule& r = **rule_iter;
if (r.section == -3) {
if (!found) {
@@ -246,9 +246,9 @@ void GrammarWriter::printRule(UFILE *to, const Rule& rule) {
}
u_fprintf(to, " ");
- for (uint32_t i=0 ; i<FLAGS_COUNT ; i++) {
+ for (uint32_t i = 0; i < FLAGS_COUNT; i++) {
if (rule.flags & (1 << i)) {
- u_fprintf(to, "%S ", flags[i].getTerminatedBuffer());
+ u_fprintf(to, "%S ", g_flags[i].getTerminatedBuffer());
}
}
@@ -264,7 +264,7 @@ void GrammarWriter::printRule(UFILE *to, const Rule& rule) {
u_fprintf(to, "%S ", grammar->sets_list[rule.target]->name.c_str());
}
- const_foreach (ContextList, rule.tests, it, it_end) {
+ foreach (it, rule.tests) {
u_fprintf(to, "(");
printContextualTest(to, **it);
u_fprintf(to, ") ");
@@ -274,7 +274,7 @@ void GrammarWriter::printRule(UFILE *to, const Rule& rule) {
u_fprintf(to, "TO (");
printContextualTest(to, *(rule.dep_target));
u_fprintf(to, ") ");
- const_foreach (ContextList, rule.dep_tests, it, it_end) {
+ foreach (it, rule.dep_tests) {
u_fprintf(to, "(");
printContextualTest(to, **it);
u_fprintf(to, ") ");
@@ -295,7 +295,7 @@ void GrammarWriter::printContextualTest(UFILE *to, const ContextualTest& test) {
u_fprintf(to, "T:%u ", test.tmpl->hash);
}
else if (!test.ors.empty()) {
- for (BOOST_AUTO(iter, test.ors.begin()) ; iter != test.ors.end() ; ) {
+ for (BOOST_AUTO(iter, test.ors.begin()); iter != test.ors.end();) {
u_fprintf(to, "(");
printContextualTest(to, **iter);
u_fprintf(to, ")");
@@ -411,5 +411,4 @@ void GrammarWriter::printTag(UFILE *to, const Tag& tag) {
UString str = tag.toUString(true);
u_file_write(str.c_str(), str.length(), to);
}
-
}
diff --git a/src/GrammarWriter.hpp b/src/GrammarWriter.hpp
index 982052e..30942f2 100644
--- a/src/GrammarWriter.hpp
+++ b/src/GrammarWriter.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -24,33 +24,33 @@
#define c6d28b7452ec699b_GRAMMARWRITER_H
#include "stdafx.hpp"
-
+
namespace CG3 {
- class Grammar;
- class Tag;
- class Set;
- class Rule;
- class ContextualTest;
-
- class GrammarWriter {
- public:
- bool statistics;
-
- GrammarWriter(Grammar& res, UFILE *ux_err);
- ~GrammarWriter();
-
- int writeGrammar(UFILE *output);
-
- private:
- UFILE *ux_stderr;
- uint32FlatHashSet used_sets;
- const Grammar *grammar;
-
- void printTag(UFILE *out, const Tag& tag);
- void printSet(UFILE *output, const Set& curset);
- void printRule(UFILE *to, const Rule& rule);
- void printContextualTest(UFILE *to, const ContextualTest& test);
- };
+class Grammar;
+class Tag;
+class Set;
+class Rule;
+class ContextualTest;
+
+class GrammarWriter {
+public:
+ bool statistics;
+
+ GrammarWriter(Grammar& res, UFILE *ux_err);
+ ~GrammarWriter();
+
+ int writeGrammar(UFILE *output);
+
+private:
+ UFILE *ux_stderr;
+ uint32FlatHashSet used_sets;
+ const Grammar *grammar;
+
+ void printTag(UFILE *out, const Tag& tag);
+ void printSet(UFILE *output, const Set& curset);
+ void printRule(UFILE *to, const Rule& rule);
+ void printContextualTest(UFILE *to, const ContextualTest& test);
+};
}
#endif
diff --git a/src/IGrammarParser.hpp b/src/IGrammarParser.hpp
index 8e7e5db..6e4d935 100644
--- a/src/IGrammarParser.hpp
+++ b/src/IGrammarParser.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,20 +26,21 @@
#include "stdafx.hpp"
namespace CG3 {
- class Grammar;
+class Grammar;
- class IGrammarParser {
- public:
- virtual ~IGrammarParser() {};
- virtual void setCompatible(bool compat) = 0;
- virtual void setVerbosity(uint32_t level) = 0;
- virtual int parse_grammar_from_file(const char *filename, const char *locale, const char *codepage) = 0;
+class IGrammarParser {
+public:
+ virtual ~IGrammarParser(){};
+ virtual void setCompatible(bool compat) = 0;
+ virtual void setVerbosity(uint32_t level) = 0;
+ virtual int parse_grammar_from_file(const char *filename, const char *locale, const char *codepage) = 0;
- UFILE *ux_stderr;
- protected:
- Grammar *result;
- uint32_t verbosity;
- };
+ UFILE *ux_stderr;
+
+protected:
+ Grammar *result;
+ uint32_t verbosity;
+};
}
#endif
diff --git a/src/ApertiumApplicator.cpp b/src/MatxinApplicator.cpp
similarity index 69%
copy from src/ApertiumApplicator.cpp
copy to src/MatxinApplicator.cpp
index e594dcb..170f093 100644
--- a/src/ApertiumApplicator.cpp
+++ b/src/MatxinApplicator.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -19,7 +19,7 @@
* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "ApertiumApplicator.hpp"
+#include "MatxinApplicator.hpp"
#include "Strings.hpp"
#include "Tag.hpp"
#include "Grammar.hpp"
@@ -29,57 +29,58 @@
namespace CG3 {
-ApertiumApplicator::ApertiumApplicator(UFILE *ux_err)
- : GrammarApplicator(ux_err)
+MatxinApplicator::MatxinApplicator(UFILE *ux_err)
+ : GrammarApplicator(ux_err)
{
- nullFlush=false;
+ nullFlush = false;
wordform_case = false;
+ unique_tags = false;
print_word_forms = true;
print_only_first = false;
- runningWithNullFlush=false;
- fgetc_converter=0;
+ runningWithNullFlush = false;
+ fgetc_converter = 0;
+ fgetc_error = U_ZERO_ERROR;
}
-bool ApertiumApplicator::getNullFlush() {
+bool MatxinApplicator::getNullFlush() {
return nullFlush;
}
-void ApertiumApplicator::setNullFlush(bool pNullFlush) {
- nullFlush=pNullFlush;
+void MatxinApplicator::setNullFlush(bool pNullFlush) {
+ nullFlush = pNullFlush;
}
-UChar ApertiumApplicator::u_fgetc_wrapper(istream& input) {
+UChar MatxinApplicator::u_fgetc_wrapper(istream& input) {
if (runningWithNullFlush) {
if (!fgetc_converter) {
- fgetc_error=U_ZERO_ERROR;
+ fgetc_error = U_ZERO_ERROR;
fgetc_converter = ucnv_open(ucnv_getDefaultName(), &fgetc_error);
if (U_FAILURE(fgetc_error)) {
- u_fprintf(ux_stderr, "Error in ucnv_open: %d\n", fgetc_error);
- }
+ u_fprintf(ux_stderr, "Error in ucnv_open: %d\n", fgetc_error);
+ }
}
int ch;
int result;
- int inputsize=0;
+ int inputsize = 0;
do {
ch = input.getc_raw();
- if (ch==0) {
+ if (ch == 0) {
return 0;
}
else {
- fgetc_inputbuf[inputsize]=static_cast<char>(ch);
+ fgetc_inputbuf[inputsize] = static_cast<char>(ch);
inputsize++;
- fgetc_error=U_ZERO_ERROR;
+ fgetc_error = U_ZERO_ERROR;
result = ucnv_toUChars(fgetc_converter, fgetc_outputbuf, 5, fgetc_inputbuf, inputsize, &fgetc_error);
if (U_FAILURE(fgetc_error)) {
u_fprintf(ux_stderr, "Error conversion: %d\n", fgetc_error);
}
}
- }
- while (( ((result>=1 && fgetc_outputbuf[0]==0xFFFD)) || result<1 || U_FAILURE(fgetc_error) ) && !input.eof() && inputsize<5);
+ } while ((((result >= 1 && fgetc_outputbuf[0] == 0xFFFD)) || result < 1 || U_FAILURE(fgetc_error)) && !input.eof() && inputsize < 5);
- if (fgetc_outputbuf[0]==0xFFFD && input.eof()) {
+ if (fgetc_outputbuf[0] == 0xFFFD && input.eof()) {
return U_EOF;
}
return fgetc_outputbuf[0];
@@ -90,22 +91,22 @@ UChar ApertiumApplicator::u_fgetc_wrapper(istream& input) {
}
-void ApertiumApplicator::runGrammarOnTextWrapperNullFlush(istream& input, UFILE *output) {
+void MatxinApplicator::runGrammarOnTextWrapperNullFlush(istream& input, UFILE *output) {
setNullFlush(false);
- runningWithNullFlush=true;
+ runningWithNullFlush = true;
while (!input.eof()) {
runGrammarOnText(input, output);
u_fputc('\0', output);
u_fflush(output);
}
- runningWithNullFlush=false;
+ runningWithNullFlush = false;
}
/*
- * Run a constraint grammar on an Apertium input stream
+ * Run a constraint grammar on an Matxin input stream
*/
-void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
+void MatxinApplicator::runGrammarOnText(istream& input, UFILE *output) {
if (getNullFlush()) {
runGrammarOnTextWrapperNullFlush(input, output);
return;
@@ -138,23 +139,23 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
}
}
- UChar inchar = 0; // Current character
- bool superblank = false; // Are we in a superblank ?
- bool incohort = false; // Are we in a cohort ?
- UString firstblank; // Blanks before the first window
+ UChar inchar = 0; // Current character
+ bool superblank = false; // Are we in a superblank ?
+ bool incohort = false; // Are we in a cohort ?
+ UString firstblank; // Blanks before the first window
index();
- uint32_t resetAfter = ((num_windows+4)*2+1);
+ uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
begintag = addTag(stringbits[S_BEGINTAG].getTerminatedBuffer())->hash; // Beginning of sentence tag
- endtag = addTag(stringbits[S_ENDTAG].getTerminatedBuffer())->hash; // End of sentence tag
+ endtag = addTag(stringbits[S_ENDTAG].getTerminatedBuffer())->hash; // End of sentence tag
- SingleWindow *cSWindow = 0; // Current single window (Cohort frame)
- Cohort *cCohort = 0; // Current cohort
- Reading *cReading = 0; // Current reading
+ SingleWindow *cSWindow = 0; // Current single window (Cohort frame)
+ Cohort *cCohort = 0; // Current cohort
+ Reading *cReading = 0; // Current reading
- SingleWindow *lSWindow = 0; // Left hand single window
+ SingleWindow *lSWindow = 0; // Left hand single window
gWindow->window_span = num_windows;
gtimer = getticks();
@@ -215,8 +216,8 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
initEmptyCohort(*cCohort);
}
if (cCohort && cSWindow->cohorts.size() >= soft_limit && grammar->soft_delimiters && doesSetMatchCohortNormal(*cCohort, grammar->soft_delimiters->number)) {
- // ie. we've read some cohorts
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ // ie. we've read some cohorts
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -231,7 +232,7 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -422,7 +423,7 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
if (cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
cReading = 0;
@@ -431,6 +432,7 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
}
// Run the grammar & print results
+ u_fprintf(output, "<corpus>\n");
while (!gWindow->next.empty()) {
while (!gWindow->previous.empty() && gWindow->previous.size() > num_windows) {
SingleWindow *tmp = gWindow->previous.front();
@@ -453,6 +455,7 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
if ((inchar) && inchar != 0xffff) {
u_fprintf(output, "%C", inchar); // eg. final newline
}
+ u_fprintf(output, "</corpus>\n");
u_fflush(output);
@@ -471,7 +474,7 @@ void ApertiumApplicator::runGrammarOnText(istream& input, UFILE *output) {
* sellout<vblex><imp><p2><sg># ouzh+indirect<prn><obj><p3><m><sg>
* be# happy<vblex><inf> (for chaining cg-proc)
*/
-void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_string) {
+void MatxinApplicator::processReading(Reading *cReading, const UChar *reading_string) {
const UChar *m = reading_string;
const UChar *c = reading_string;
UString tmptag;
@@ -514,7 +517,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
base += '"';
while (*c != '\0') {
if (*c == '*') { // Initial asterisk means word is unknown, and
- // should just be copied in the output.
+ // should just be copied in the output.
unknown = true;
}
if (*c == '<' || *c == '\0') {
@@ -525,12 +528,12 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
}
if (!suf.empty()) { // Append the multiword suffix to the baseform
- // (this is normally done in pretransfer)
+ // (this is normally done in pretransfer)
base += suf;
}
base += '"';
-// u_fprintf(ux_stderr, ">> b: %S s: %S\n", base.c_str(), suf.c_str());
+ // u_fprintf(ux_stderr, ">> b: %S s: %S\n", base.c_str(), suf.c_str());
TagVector taglist;
@@ -565,7 +568,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
if (*c == '<') {
multi = false;
if (intag == true) {
- u_fprintf(ux_stderr, "Error: The Apertium stream format does not allow '<' in tag names.\n");
+ u_fprintf(ux_stderr, "Error: The Matxin stream format does not allow '<' in tag names.\n");
++c;
continue;
}
@@ -575,7 +578,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
UString bf;
bf += '"';
if (tmptag[0] == '+') {
- bf.append(tmptag.begin()+1, tmptag.end());
+ bf.append(tmptag.begin() + 1, tmptag.end());
}
else {
bf += tmptag;
@@ -592,7 +595,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
else if (*c == '>') {
multi = false;
if (intag == false) {
- u_fprintf(ux_stderr, "Error: The Apertium stream format does not allow '>' outside tag names.\n");
+ u_fprintf(ux_stderr, "Error: The Matxin stream format does not allow '>' outside tag names.\n");
++c;
continue;
}
@@ -619,7 +622,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
// Search from the back until we find a baseform, then add all tags from there until the end onto the reading
while (!taglist.empty()) {
Reading *reading = cReading;
- reverse_foreach (TagVector, taglist, riter, riter_end) {
+ reverse_foreach (riter, taglist) {
if ((*riter)->type & T_BASEFORM) {
// If current reading already has a baseform, instead create a sub-reading as target
if (reading->baseform) {
@@ -630,7 +633,7 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
// Add tags
TagList mappings;
TagVector::iterator iter = riter.base();
- for (--iter ; iter != taglist.end() ; ++iter) {
+ for (--iter; iter != taglist.end(); ++iter) {
if ((*iter)->type & T_MAPPING || (*iter)->tag[0] == grammar->mapping_prefix) {
mappings.push_back(*iter);
}
@@ -650,89 +653,40 @@ void ApertiumApplicator::processReading(Reading *cReading, const UChar *reading_
}
}
- assert(taglist.empty() && "ApertiumApplicator::processReading() did not handle all tags.");
+ assert(taglist.empty() && "MatxinApplicator::processReading() did not handle all tags.");
}
-void ApertiumApplicator::processReading(Reading *cReading, const UString& reading_string) {
+void MatxinApplicator::processReading(Reading *cReading, const UString& reading_string) {
return processReading(cReading, reading_string.c_str());
}
-void ApertiumApplicator::testPR(UFILE *output) {
- std::string texts[] = {
- "venir<vblex><imp><p2><sg>",
- "venir<vblex><inf>+lo<prn><enc><p3><nt><sg>",
- "be<vblex><inf># happy",
- "sellout<vblex><imp><p2><sg># ouzh+indirect<prn><obj><p3><m><sg>",
- "be# happy<vblex><inf>",
- "aux3<tag>+aux2<tag>+aux1<tag>+main<tag>",
- };
- for (size_t i = 0 ; i<6 ; ++i) {
- UString text(texts[i].begin(), texts[i].end());
- Reading *reading = alloc_reading(0);
- processReading(reading, text);
- if (grammar->sub_readings_ltr && reading->next) {
- reading = reverse(reading);
- }
- printReading(reading, output);
- u_fprintf(output, "\n");
- }
-}
-
-void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
+void MatxinApplicator::printReading(Reading *reading, Node& node, UFILE *output) {
if (reading->noprint) {
return;
}
if (reading->next) {
+ u_fprintf(ux_stderr, "Error: input contains sub-readings!\n");
+ u_fprintf(output, " </SENTENCE>\n");
+ u_fprintf(output, "</corpus>\n");
+ exit(-1);
+ }
+
+ /*
+ if (reading->next) {
printReading(reading->next, output);
u_fputc('+', output);
}
+//*/
- if (reading->baseform) {
- // Lop off the initial and final '"' characters
- UnicodeString bf(single_tags[reading->baseform]->tag.c_str()+1, single_tags[reading->baseform]->tag.length()-2);
-
- if (wordform_case && !reading->next) {
- // Use surface/wordform case, eg. if lt-proc
- // was called with "-w" option (which puts
- // dictionary case on lemma/basefrom)
- // Lop off the initial and final '"<>"' characters
- // ToDo: A copy does not need to be made here - use pointer offsets
- UnicodeString wf(reading->parent->wordform->tag.c_str()+2, reading->parent->wordform->tag.length()-4);
-
- int first = 0; // first occurrence of a lowercase character in baseform
- for (; first<bf.length() ; ++first) {
- if (u_islower(bf[first]) != 0) {
- break;
- }
- }
-
- // this corresponds to fst_processor.cc in lttoolbox:
- bool firstupper = first < wf.length() && (u_isupper(wf[first]) != 0);
- bool uppercase = firstupper && u_isupper(wf[wf.length()-1]);
-
- if (uppercase) {
- bf.toUpper(); // Perform a Unicode case folding to upper case -- Tino Didriksen
- }
- else if (firstupper && first < bf.length()) {
- // static_cast<UChar>(u_toupper(bf[first])) gives strange output
- UnicodeString range(bf, first, 1);
- range.toUpper();
- bf.setCharAt(first, range[0]);
- }
- } // if (wordform_case)
+ if (!reading->baseform) {
+ return;
+ }
- UString bf_escaped;
- for (int i=0 ; i<bf.length() ; ++i) {
- if (bf[i] == '^' || bf[i] == '\\' || bf[i] == '/' || bf[i] == '$' || bf[i] == '[' || bf[i] == ']' || bf[i] == '{' || bf[i] == '}' || bf[i] == '<' || bf[i] == '>') {
- bf_escaped += '\\';
- }
- bf_escaped += bf[i];
- }
- u_fprintf(output, "%S", bf_escaped.c_str());
+ // Lop off the initial and final '"' characters
+ UnicodeString bf(single_tags[reading->baseform]->tag.c_str() + 1, single_tags[reading->baseform]->tag.length() - 2);
- // Tag::printTagRaw(output, single_tags[reading->baseform]);
- }
+ node.lemma = bf.getTerminatedBuffer();
// Reorder: MAPPING tags should appear before the join of multiword tags,
// turn <vblex><actv><pri><p3><pl>+í<pr><@FMAINV><@FOO>$
@@ -741,24 +695,29 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
Reading::tags_list_t multitags_list; // everything after a +, until the first MAPPING tag
Reading::tags_list_t::iterator tter;
bool multi = false;
- for (tter = reading->tags_list.begin() ; tter != reading->tags_list.end() ; tter++) {
+ bool first = true;
+ for (tter = reading->tags_list.begin(); tter != reading->tags_list.end(); tter++) {
const Tag *tag = single_tags[*tter];
if (tag->tag[0] == '+') {
multi = true;
- } else if (tag->type & T_MAPPING) {
+ }
+ else if (tag->type & T_MAPPING) {
multi = false;
}
if (multi) {
multitags_list.push_back(*tter);
- } else {
+ }
+ else {
tags_list.push_back(*tter);
}
}
- tags_list.insert(tags_list.end(),multitags_list.begin(),multitags_list.end());
+ tags_list.insert(tags_list.end(), multitags_list.begin(), multitags_list.end());
uint32SortedVector used_tags;
- for (tter = tags_list.begin() ; tter != tags_list.end() ; tter++) {
+ UString mi;
+ first = true;
+ for (tter = tags_list.begin(); tter != tags_list.end(); tter++) {
if (unique_tags) {
if (used_tags.find(*tter) != used_tags.end()) {
continue;
@@ -773,32 +732,37 @@ void ApertiumApplicator::printReading(Reading *reading, UFILE *output) {
if (tag->tag[0] == '+') {
u_fprintf(output, "%S", tag->tag.c_str());
}
- else if (tag->tag[0] == '&') {
- u_fprintf(output, "<%S>", substr(tag->tag, 2).c_str());
+ else if (tag->tag[0] == '@') {
+ //u_fprintf(output, "<%S>", tag->tag.c_str());
+ node.si = tag->tag;
}
else {
- u_fprintf(output, "<%S>", tag->tag.c_str());
+ //u_fprintf(output, "<%S>", tag->tag.c_str());
+ if (first) {
+ mi += tag->tag;
+ first = false;
+ }
+ else {
+ mi += '|';
+ mi += tag->tag;
+ }
}
}
}
-
- if (trace) {
- const_foreach (uint32Vector, reading->hit_by, iter_hb, iter_hb_end) {
- u_fputc('<', output);
- printTrace(output, *iter_hb);
- u_fputc('>', output);
- }
- }
+ node.mi = mi;
}
-void ApertiumApplicator::printSingleWindow(SingleWindow *window, UFILE *output) {
-
+void MatxinApplicator::printSingleWindow(SingleWindow *window, UFILE *output) {
+ /*
// Window text comes at the left
if (!window->text.empty()) {
u_fprintf(output, "%S", window->text.c_str());
}
+//*/
- for (uint32_t c=0 ; c < window->cohorts.size() ; c++) {
+ u_fprintf(output, " <SENTENCE ord=\"%d\" alloc=\"0\">\n", window->number);
+
+ for (uint32_t c = 0; c < window->cohorts.size(); c++) {
if (c == 0) { // Skip magic cohort
continue;
}
@@ -810,98 +774,155 @@ void ApertiumApplicator::printSingleWindow(SingleWindow *window, UFILE *output)
}
// Start of cohort
- u_fprintf(output, "^");
-
- if (print_word_forms == true) {
- // Lop off the initial and final '"' characters
- // ToDo: A copy does not need to be made here - use pointer offsets
- UnicodeString wf(cohort->wordform->tag.c_str()+2, cohort->wordform->tag.length()-4);
- UString wf_escaped;
- for (int i=0 ; i<wf.length() ; ++i) {
- if (wf[i] == '^' || wf[i] == '\\' || wf[i] == '/' || wf[i] == '$' || wf[i] == '[' || wf[i] == ']' || wf[i] == '{' || wf[i] == '}' || wf[i] == '<' || wf[i] == '>') {
- wf_escaped += '\\';
- }
- wf_escaped += wf[i];
- }
- u_fprintf(output, "%S", wf_escaped.c_str());
- // Print the static reading tags
- if (cohort->wread) {
- const_foreach (Reading::tags_list_t, cohort->wread->tags_list, tter, tter_end) {
- if (*tter == cohort->wordform->hash) {
- continue;
- }
- const Tag *tag = single_tags[*tter];
- u_fprintf(output, "<%S>", tag->tag.c_str());
+ Node n;
+
+ // Lop off the initial and final '"' characters
+ // ToDo: A copy does not need to be made here - use pointer offsets
+ UnicodeString wf(cohort->wordform->tag.c_str() + 2, cohort->wordform->tag.length() - 4);
+ UString wf_escaped;
+ for (int i = 0; i < wf.length(); ++i) {
+ if (wf[i] == '&') {
+ wf_escaped += '&';
+ wf_escaped += 'a';
+ wf_escaped += 'm';
+ wf_escaped += 'p';
+ wf_escaped += ';';
+ }
+ else if (wf[i] == '"') {
+ wf_escaped += '&';
+ wf_escaped += 'q';
+ wf_escaped += 'u';
+ wf_escaped += 'o';
+ wf_escaped += 't';
+ wf_escaped += ';';
+ }
+ wf_escaped += wf[i];
+ }
+
+ n.self = cohort->global_number;
+ n.form = wf_escaped;
+
+ /*
+ // Print the static reading tags
+ if (cohort->wread) {
+ foreach (tter, cohort->wread->tags_list) {
+ if (*tter == cohort->wordform->hash) {
+ continue;
}
+ const Tag *tag = single_tags[*tter];
+ u_fprintf(output, "<%S>", tag->tag.c_str());
}
}
-
- bool need_slash = print_word_forms;
+//*/
//Tag::printTagRaw(output, single_tags[cohort->wordform]);
- boost_foreach (Reading *reading, cohort->readings) {
- if (need_slash) {
- u_fprintf(output, "/");
- }
- need_slash = true;
- if (grammar->sub_readings_ltr && reading->next) {
- reading = reverse(reading);
- }
- printReading(reading, output);
- if(print_only_first == true)
- {
- break;
- }
+ Reading *reading = cohort->readings[0];
+
+ printReading(reading, n, output);
+
+ // if we can't find the root by this point then
+ // set the parent to the last word in the sent,
+ // for want of a better option
+ int r = nodes.size(); // last word
+ if (deps[0].size() > 0) {
+ r = deps[0][0];
}
- if (trace) {
- const UChar not_sign = L'\u00AC';
- boost_foreach (Reading *reading, cohort->delayed) {
- if (need_slash) {
- u_fprintf(output, "/%C", not_sign);
- }
- need_slash = true;
- if (grammar->sub_readings_ltr && reading->next) {
- reading = reverse(reading);
- }
- printReading(reading, output);
- }
- boost_foreach (Reading *reading, cohort->deleted) {
- if (need_slash) {
- u_fprintf(output, "/%C", not_sign);
- }
- need_slash = true;
- if (grammar->sub_readings_ltr && reading->next) {
- reading = reverse(reading);
- }
- printReading(reading, output);
- }
+ nodes[cohort->global_number] = n;
+
+ if (cohort->dep_parent == DEP_NO_PARENT) {
+ deps[r].push_back(cohort->global_number);
+ //u_fprintf(output, "+[%d] %d -> %d || %d || %S\n", c, cohort->global_number, cohort->dep_parent, r, cohort->text.c_str());
+ }
+ else {
+ deps[cohort->dep_parent].push_back(cohort->global_number);
+ //u_fprintf(output, "#[%d] %d -> %d || %d || %S\n", c, cohort->global_number, cohort->dep_parent, r, cohort->text.c_str());
}
+
+ /*
+ u_fprintf(output, "[%d] %d -> %d || %S\n", c, cohort->global_number, cohort->dep_parent, cohort->text.c_str());
u_fprintf(output, "$");
// End of cohort
if (!cohort->text.empty()) {
u_fprintf(output, "%S", cohort->text.c_str());
}
+//*/
u_fflush(output);
}
+
+ int depth = 0;
+ procNode(depth, nodes, deps, 0, output);
+
+ u_fprintf(output, " </SENTENCE>\n");
}
-void ApertiumApplicator::mergeMappings(Cohort& cohort) {
+void MatxinApplicator::procNode(int& depth, std::map<int, Node>& nodes, std::map<int, std::vector<int> >& deps, int n, UFILE *output) {
+ Node node = nodes[n];
+ std::vector<int> v = deps[n];
+ depth = depth + 1;
+
+ // Cut off first character, if not empty
+ const UChar *si = node.si.c_str() + !node.si.empty();
+
+ if (n != 0) {
+ for (int i = 0; i < depth * 2; i++) {
+ u_fprintf(output, " ");
+ }
+
+ if (v.size() > 0) {
+ u_fprintf(output, "<NODE ord=\"%d\" alloc=\"0\" form=\"%S\" lem=\"%S\" mi=\"%S\" si=\"%S\">\n", node.self, node.form.c_str(), node.lemma.c_str(), node.mi.c_str(), si);
+ }
+ else {
+ u_fprintf(output, "<NODE ord=\"%d\" alloc=\"0\" form=\"%S\" lem=\"%S\" mi=\"%S\" si=\"%S\"/>\n", node.self, node.form.c_str(), node.lemma.c_str(), node.mi.c_str(), si);
+ depth = depth - 1;
+ }
+ }
+
+ bool found = false;
+ std::map<int, std::vector<int> >::iterator it;
+ for (it = deps.begin(); it != deps.end(); it++) {
+ if (it->first == n && it->second.size() != 0) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ return;
+ }
+ for (std::vector<int>::iterator it = v.begin(); it != v.end(); it++) {
+ procNode(depth, nodes, deps, *it, output);
+ }
+
+
+ if (n != 0) {
+ for (int i = 0; i < depth * 2; i++) {
+ u_fprintf(output, " ");
+ }
+
+ u_fprintf(output, "</NODE>\n");
+ }
+
+ depth = depth - 1;
+
+ return;
+}
+
+void MatxinApplicator::mergeMappings(Cohort& cohort) {
// Only merge readings which are completely equal (including mapping tags)
// foo<N><Sg><Acc><@←OBJ>/foo<N><Sg><Acc><@←OBJ>
// => guovvamánnu<N><Sg><Acc><@←OBJ>
// foo<N><Sg><Acc><@←SUBJ>/foo<N><Sg><Acc><@←OBJ>
// => foo<N><Sg><Acc><@←SUBJ>/foo<N><Sg><Acc><@←OBJ>
std::map<uint32_t, ReadingList> mlist;
- foreach (ReadingList, cohort.readings, iter, iter_end) {
+ foreach (iter, cohort.readings) {
Reading *r = *iter;
uint32_t hp = r->hash; // instead of hash_plain, which doesn't include mapping tags
if (trace) {
- foreach (uint32Vector, r->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, r->hit_by) {
hp = hash_value(*iter_hb, hp);
}
}
@@ -909,7 +930,7 @@ void ApertiumApplicator::mergeMappings(Cohort& cohort) {
while (sub) {
hp = hash_value(sub->hash, hp);
if (trace) {
- foreach (uint32Vector, sub->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, sub->hit_by) {
hp = hash_value(*iter_hb, hp);
}
}
@@ -926,7 +947,7 @@ void ApertiumApplicator::mergeMappings(Cohort& cohort) {
std::vector<Reading*> order;
std::map<uint32_t, ReadingList>::iterator miter;
- for (miter = mlist.begin() ; miter != mlist.end() ; miter++) {
+ for (miter = mlist.begin(); miter != mlist.end(); miter++) {
ReadingList clist = miter->second;
Reading *nr = alloc_reading(*(clist.front()));
// no merging of mapping tags
@@ -936,5 +957,4 @@ void ApertiumApplicator::mergeMappings(Cohort& cohort) {
std::sort(order.begin(), order.end(), CG3::Reading::cmp_number);
cohort.readings.insert(cohort.readings.begin(), order.begin(), order.end());
}
-
}
diff --git a/src/MatxinApplicator.hpp b/src/MatxinApplicator.hpp
new file mode 100644
index 0000000..6cb8015
--- /dev/null
+++ b/src/MatxinApplicator.hpp
@@ -0,0 +1,80 @@
+/*
+* Copyright (C) 2007-2016, GrammarSoft ApS
+* Developed by Tino Didriksen <mail at tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+*
+* This file is part of VISL CG-3
+*
+* VISL CG-3 is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* VISL CG-3 is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+#ifndef c6d28b7452ec699b_GRAMMARAPPLICATORMATXIN_H
+#define c6d28b7452ec699b_GRAMMARAPPLICATORMATXIN_H
+
+#include "GrammarApplicator.hpp"
+
+namespace CG3 {
+class MatxinApplicator : public virtual GrammarApplicator {
+public:
+ MatxinApplicator(UFILE *ux_err);
+
+ void runGrammarOnText(istream& input, UFILE *output);
+
+ bool getNullFlush();
+ bool wordform_case;
+ bool print_word_forms;
+ bool print_only_first;
+ void setNullFlush(bool pNullFlush);
+
+ void testPR(UFILE *output);
+
+protected:
+ struct Node {
+ int self;
+ UString lemma;
+ UString form;
+ UString pos;
+ UString mi;
+ UString si;
+ };
+
+ std::map<int, Node> nodes;
+ std::map<int, std::vector<int> > deps;
+
+ bool nullFlush;
+ bool runningWithNullFlush;
+
+ void printReading(Reading *reading, Node& n, UFILE *output);
+ void printSingleWindow(SingleWindow *window, UFILE *output);
+
+ void procNode(int& depth, std::map<int, Node>& nodes, std::map<int, std::vector<int> >& deps, int node, UFILE *output);
+
+
+ void runGrammarOnTextWrapperNullFlush(istream& input, UFILE *output);
+
+ UChar u_fgetc_wrapper(istream& input);
+ UConverter *fgetc_converter;
+ char fgetc_inputbuf[5];
+ UChar fgetc_outputbuf[5];
+ UErrorCode fgetc_error;
+ void mergeMappings(Cohort& cohort);
+
+private:
+ void processReading(Reading *cReading, const UChar *reading_string);
+ void processReading(Reading *cReading, const UString& reading_string);
+};
+}
+
+#endif
diff --git a/src/MweSplitApplicator.cpp b/src/MweSplitApplicator.cpp
new file mode 100644
index 0000000..58d9433
--- /dev/null
+++ b/src/MweSplitApplicator.cpp
@@ -0,0 +1,174 @@
+/*
+* Copyright (C) 2007-2016, GrammarSoft ApS
+* Developed by Tino Didriksen <mail at tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+*
+* This file is part of VISL CG-3
+*
+* VISL CG-3 is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* VISL CG-3 is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "MweSplitApplicator.hpp"
+
+namespace CG3 {
+
+MweSplitApplicator::MweSplitApplicator(UFILE *ux_err)
+ : GrammarApplicator(ux_err)
+{
+}
+
+
+void MweSplitApplicator::runGrammarOnText(istream& input, UFILE *output) {
+ GrammarApplicator::runGrammarOnText(input, output);
+}
+
+
+const Tag *MweSplitApplicator::maybeWfTag(const Reading *r) {
+ foreach (tter, r->tags_list) {
+ if ((!show_end_tags && *tter == endtag) || *tter == begintag) {
+ continue;
+ }
+ if (*tter == r->baseform || *tter == r->parent->wordform->hash) {
+ continue;
+ }
+ const Tag *tag = single_tags[*tter];
+ // If we are to split, there has to be at least one wordform on a head (not-sub) reading
+ if (tag->type & T_WORDFORM) {
+ return tag;
+ }
+ }
+ return NULL;
+}
+
+std::vector<Cohort*> MweSplitApplicator::splitMwe(Cohort *cohort) {
+ const UChar rtrimblank[] = { ' ', '\n', '\r', '\t', 0 };
+ const UChar textprefix[] = { ':', 0 };
+ std::vector<Cohort*> cos;
+ size_t n_wftags = 0;
+ size_t n_goodreadings = 0;
+ foreach (rter1, cohort->readings) {
+ if (maybeWfTag(*rter1) != NULL) {
+ ++n_wftags;
+ }
+ ++n_goodreadings;
+ }
+
+ if (n_wftags < n_goodreadings) {
+ if (n_wftags > 0) {
+ u_fprintf(ux_stderr, "WARNING: Line %u: Some but not all main-readings of %S had wordform-tags (not completely mwe-disambiguated?), not splitting.\n", numLines, cohort->wordform->tag.c_str());
+ // We also don't split if wordform-tags were only on sub-readings, but should we warn on such faulty input?
+ }
+ cos.push_back(cohort);
+ return cos;
+ }
+ foreach (r, cohort->readings) {
+ size_t pos = -1;
+ Reading *prev = NULL; // prev == NULL || prev->next == rNew (or a ->next of rNew)
+ for (Reading *sub = (*r); sub; sub = sub->next) {
+ const Tag *wfTag = maybeWfTag(sub);
+ if (wfTag == NULL) {
+ prev = prev->next;
+ }
+ else {
+ ++pos;
+ Cohort *c;
+ while (cos.size() < pos + 1) {
+ c = alloc_cohort(cohort->parent);
+ c->global_number = gWindow->cohort_counter++;
+ cohort->parent->appendCohort(c);
+ cos.push_back(c);
+ }
+ c = cos[pos];
+
+ const size_t wfEnd = wfTag->tag.size() - 3; // index before the final '>"'
+ const size_t i = 1 + wfTag->tag.find_last_not_of(rtrimblank, wfEnd);
+ const UString& wf = wfTag->tag.substr(0, i) + wfTag->tag.substr(wfEnd + 1);
+ if (c->wordform != 0 && wf != c->wordform->tag) {
+ u_fprintf(ux_stderr, "WARNING: Line %u: Ambiguous wordform-tags for same cohort, '%S' vs '%S', not splitting.\n", numLines, wf.c_str(), c->wordform->tag.c_str());
+ cos.clear();
+ cos.push_back(cohort);
+ return cos;
+ }
+ c->wordform = addTag(wf);
+ if (i < wfEnd + 1) {
+ c->text = textprefix + wfTag->tag.substr(i, wfEnd + 1 - i);
+ }
+
+ Reading *rNew = alloc_reading(*sub);
+ for (size_t i = 0; i < rNew->tags_list.size(); ++i) {
+ BOOST_AUTO(&tter, rNew->tags_list[i]);
+ if (tter == wfTag->hash || tter == rNew->parent->wordform->hash) {
+ rNew->tags_list.erase(rNew->tags_list.begin() + i);
+ rNew->tags.erase(tter);
+ }
+ }
+ cos[pos]->appendReading(rNew);
+ rNew->parent = cos[pos];
+
+ if (prev != NULL) {
+ free_reading(prev->next);
+ prev->next = 0;
+ }
+ prev = rNew;
+ }
+ }
+ }
+ if (cos.size() == 0) {
+ u_fprintf(ux_stderr, "WARNING: Line %u: Tried splitting %S, but got no new cohorts; shouldn't happen.", numLines, cohort->wordform->tag.c_str());
+ cos.push_back(cohort);
+ }
+ // The last word forms are the top readings:
+ cos[0]->text = cohort->text;
+ std::reverse(cos.begin(), cos.end());
+ return cos;
+}
+
+
+void MweSplitApplicator::printSingleWindow(SingleWindow *window, UFILE *output) {
+ boost_foreach (uint32_t var, window->variables_output) {
+ Tag *key = single_tags[var];
+ BOOST_AUTO(iter, window->variables_set.find(var));
+ if (iter != window->variables_set.end()) {
+ if (iter->second != grammar->tag_any) {
+ Tag *value = single_tags[iter->second];
+ u_fprintf(output, "%S%S=%S>\n", stringbits[S_CMD_SETVAR].getTerminatedBuffer(), key->tag.c_str(), value->tag.c_str());
+ }
+ else {
+ u_fprintf(output, "%S%S>\n", stringbits[S_CMD_SETVAR].getTerminatedBuffer(), key->tag.c_str());
+ }
+ }
+ else {
+ u_fprintf(output, "%S%S>\n", stringbits[S_CMD_REMVAR].getTerminatedBuffer(), key->tag.c_str());
+ }
+ }
+
+ if (!window->text.empty()) {
+ u_fprintf(output, "%S", window->text.c_str());
+ if (!ISNL(window->text[window->text.length() - 1])) {
+ u_fputc('\n', output);
+ }
+ }
+
+ uint32_t cs = (uint32_t)window->cohorts.size();
+ for (uint32_t c = 0; c < cs; c++) {
+ Cohort *cohort = window->cohorts[c];
+ std::vector<Cohort*> cs = splitMwe(cohort);
+ foreach (iter, cs) {
+ printCohort(*iter, output);
+ }
+ }
+ u_fputc('\n', output);
+ u_fflush(output);
+}
+}
diff --git a/src/NicelineApplicator.hpp b/src/MweSplitApplicator.hpp
similarity index 66%
copy from src/NicelineApplicator.hpp
copy to src/MweSplitApplicator.hpp
index dcc028e..55469c5 100644
--- a/src/NicelineApplicator.hpp
+++ b/src/MweSplitApplicator.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -20,27 +20,32 @@
*/
#pragma once
-#ifndef c6d28b7452ec699b_NICELINEAPPLICATOR_HPP
-#define c6d28b7452ec699b_NICELINEAPPLICATOR_HPP
+#ifndef d01e916b65a71dd3_MWESPLITAPPLICATOR_HPP
+#define d01e916b65a71dd3_MWESPLITAPPLICATOR_HPP
#include "GrammarApplicator.hpp"
+#include "Strings.hpp"
+#include "Tag.hpp"
+#include "Grammar.hpp"
+#include "Window.hpp"
+#include "SingleWindow.hpp"
+#include "Reading.hpp"
+#include "Cohort.hpp"
namespace CG3 {
-class NicelineApplicator : public virtual GrammarApplicator {
+class MweSplitApplicator : public virtual GrammarApplicator {
private:
- bool did_warn_statictags;
- bool did_warn_subreadings;
+ // bool did_warn_unhandled_situation;
+ const Tag *maybeWfTag(const Reading *r);
+ std::vector<Cohort*> splitMwe(Cohort *cohort);
public:
- NicelineApplicator(UFILE *ux_err);
+ MweSplitApplicator(UFILE *ux_err);
void runGrammarOnText(istream& input, UFILE *output);
- void printReading(const Reading *reading, UFILE *output);
- void printCohort(Cohort *cohort, UFILE *output);
void printSingleWindow(SingleWindow *window, UFILE *output);
};
-
}
#endif
diff --git a/src/NicelineApplicator.cpp b/src/NicelineApplicator.cpp
index fc81bb6..aa63c94 100644
--- a/src/NicelineApplicator.cpp
+++ b/src/NicelineApplicator.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -30,9 +30,9 @@
namespace CG3 {
NicelineApplicator::NicelineApplicator(UFILE *ux_err)
- : GrammarApplicator(ux_err),
- did_warn_statictags(false),
- did_warn_subreadings(false)
+ : GrammarApplicator(ux_err)
+ , did_warn_statictags(false)
+ , did_warn_subreadings(false)
{
}
@@ -70,7 +70,7 @@ void NicelineApplicator::runGrammarOnText(istream& input, UFILE *output) {
index();
- uint32_t resetAfter = ((num_windows+4)*2+1);
+ uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
uint32_t lines = 0;
SingleWindow *cSWindow = 0;
@@ -86,40 +86,40 @@ void NicelineApplicator::runGrammarOnText(istream& input, UFILE *output) {
++lines;
size_t offset = 0, packoff = 0;
// Read as much of the next line as will fit in the current buffer
- while (input.gets(&line[offset], line.size()-offset-1)) {
+ while (input.gets(&line[offset], line.size() - offset - 1)) {
// Copy the segment just read to cleaned
- for (size_t i=offset ; i<line.size() ; ++i) {
+ for (size_t i = offset; i < line.size(); ++i) {
// Only copy one space character, regardless of how many are in input
if (ISSPACE(line[i]) && !ISNL(line[i])) {
cleaned[packoff++] = (line[i] == '\t' ? '\t' : ' ');
while (ISSPACE(line[i]) && !ISNL(line[i])) {
if (line[i] == '\t') {
- cleaned[packoff-1] = line[i];
+ cleaned[packoff - 1] = line[i];
}
++i;
}
}
// Break if there is a newline
if (ISNL(line[i])) {
- cleaned[packoff+1] = cleaned[packoff] = 0;
+ cleaned[packoff + 1] = cleaned[packoff] = 0;
goto gotaline; // Oh how I wish C++ had break 2;
}
if (line[i] == 0) {
- cleaned[packoff+1] = cleaned[packoff] = 0;
+ cleaned[packoff + 1] = cleaned[packoff] = 0;
break;
}
cleaned[packoff++] = line[i];
}
// If we reached this, buffer wasn't big enough. Double the size of the buffer and try again.
- offset = line.size()-2;
- line.resize(line.size()*2, 0);
- cleaned.resize(line.size()+2, 0);
+ offset = line.size() - 2;
+ line.resize(line.size() * 2, 0);
+ cleaned.resize(line.size() + 2, 0);
}
-gotaline:
+ gotaline:
// Trim trailing whitespace
- while (cleaned[0] && ISSPACE(cleaned[packoff-1])) {
- cleaned[packoff-1] = 0;
+ while (cleaned[0] && ISSPACE(cleaned[packoff - 1])) {
+ cleaned[packoff - 1] = 0;
--packoff;
}
if (!ignoreinput && cleaned[0] && cleaned[0] != '<') {
@@ -138,7 +138,7 @@ gotaline:
if (cSWindow && cSWindow->cohorts.size() >= soft_limit && grammar->soft_delimiters && !did_soft_lookback) {
did_soft_lookback = true;
- reverse_foreach (CohortVector, cSWindow->cohorts, iter, iter_end) {
+ reverse_foreach (iter, cSWindow->cohorts) {
if (doesSetMatchCohortNormal(**iter, grammar->soft_delimiters->number)) {
did_soft_lookback = false;
Cohort *cohort = delimitAt(*cSWindow, *iter);
@@ -159,7 +159,7 @@ gotaline:
u_fprintf(ux_stderr, "Warning: Soft limit of %u cohorts reached at line %u but found suitable soft delimiter.\n", soft_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -175,7 +175,7 @@ gotaline:
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -306,7 +306,7 @@ gotaline:
}
}
else {
-istext:
+ istext:
if (cleaned[0] && line[0]) {
if (lCohort) {
lCohort->text += &line[0];
@@ -328,7 +328,7 @@ istext:
if (cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
cReading = 0;
@@ -366,11 +366,11 @@ void NicelineApplicator::printReading(const Reading *reading, UFILE *output) {
}
u_fputc('\t', output);
if (reading->baseform) {
- u_fprintf(output, "[%.*S]", single_tags.find(reading->baseform)->second->tag.size()-2, single_tags.find(reading->baseform)->second->tag.c_str() + 1);
+ u_fprintf(output, "[%.*S]", single_tags.find(reading->baseform)->second->tag.size() - 2, single_tags.find(reading->baseform)->second->tag.c_str() + 1);
}
uint32SortedVector unique;
- const_foreach (Reading::tags_list_t, reading->tags_list, tter, tter_end) {
+ foreach (tter, reading->tags_list) {
if ((!show_end_tags && *tter == endtag) || *tter == begintag) {
continue;
}
@@ -399,7 +399,7 @@ void NicelineApplicator::printReading(const Reading *reading, UFILE *output) {
}
const Cohort *pr = 0;
pr = reading->parent;
- if (reading->parent->dep_parent != std::numeric_limits<uint32_t>::max()) {
+ if (reading->parent->dep_parent != DEP_NO_PARENT) {
if (reading->parent->dep_parent == 0) {
pr = reading->parent->parent->cohorts[0];
}
@@ -416,19 +416,19 @@ void NicelineApplicator::printReading(const Reading *reading, UFILE *output) {
}
if (!dep_has_spanned) {
u_fprintf_u(output, pattern,
- reading->parent->local_number,
- pr->local_number);
+ reading->parent->local_number,
+ pr->local_number);
}
else {
- if (reading->parent->dep_parent == std::numeric_limits<uint32_t>::max()) {
+ if (reading->parent->dep_parent == DEP_NO_PARENT) {
u_fprintf_u(output, pattern,
- reading->parent->dep_self,
- reading->parent->dep_self);
+ reading->parent->dep_self,
+ reading->parent->dep_self);
}
else {
u_fprintf_u(output, pattern,
- reading->parent->dep_self,
- reading->parent->dep_parent);
+ reading->parent->dep_self,
+ reading->parent->dep_parent);
}
}
}
@@ -436,7 +436,7 @@ void NicelineApplicator::printReading(const Reading *reading, UFILE *output) {
if (reading->parent->type & CT_RELATED) {
u_fprintf(output, " ID:%u", reading->parent->global_number);
if (!reading->parent->relations.empty()) {
- foreach (RelationCtn, reading->parent->relations, miter, miter_end) {
+ foreach (miter, reading->parent->relations) {
boost_foreach (uint32_t siter, miter->second) {
u_fprintf(output, " R:%S:%u", grammar->single_tags.find(miter->first)->second->tag.c_str(), siter);
}
@@ -445,7 +445,7 @@ void NicelineApplicator::printReading(const Reading *reading, UFILE *output) {
}
if (trace) {
- const_foreach (uint32Vector, reading->hit_by, iter_hb, iter_hb_end) {
+ foreach (iter_hb, reading->hit_by) {
u_fputc(' ', output);
printTrace(output, *iter_hb);
}
@@ -468,7 +468,7 @@ void NicelineApplicator::printCohort(Cohort *cohort, UFILE *output) {
goto removed;
}
- u_fprintf(output, "%.*S", cohort->wordform->tag.size()-4, cohort->wordform->tag.c_str()+2);
+ u_fprintf(output, "%.*S", cohort->wordform->tag.size() - 4, cohort->wordform->tag.c_str() + 2);
if (cohort->wread && !did_warn_statictags) {
u_fprintf(ux_stderr, "Warning: Niceline CG format cannot output static tags! You are losing information!\n");
u_fflush(ux_stderr);
@@ -512,5 +512,4 @@ void NicelineApplicator::printSingleWindow(SingleWindow *window, UFILE *output)
u_fputc('\n', output);
u_fflush(output);
}
-
}
diff --git a/src/NicelineApplicator.hpp b/src/NicelineApplicator.hpp
index dcc028e..199acba 100644
--- a/src/NicelineApplicator.hpp
+++ b/src/NicelineApplicator.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -40,7 +40,6 @@ public:
void printCohort(Cohort *cohort, UFILE *output);
void printSingleWindow(SingleWindow *window, UFILE *output);
};
-
}
#endif
diff --git a/src/PlaintextApplicator.cpp b/src/PlaintextApplicator.cpp
index 70f65db..b2f40c2 100644
--- a/src/PlaintextApplicator.cpp
+++ b/src/PlaintextApplicator.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -30,7 +30,7 @@
namespace CG3 {
PlaintextApplicator::PlaintextApplicator(UFILE *ux_err)
- : GrammarApplicator(ux_err)
+ : GrammarApplicator(ux_err)
{
allow_magic_readings = true;
}
@@ -69,7 +69,7 @@ void PlaintextApplicator::runGrammarOnText(istream& input, UFILE *output) {
index();
- uint32_t resetAfter = ((num_windows+4)*2+1);
+ uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
uint32_t lines = 0;
SingleWindow *cSWindow = 0;
@@ -85,9 +85,9 @@ void PlaintextApplicator::runGrammarOnText(istream& input, UFILE *output) {
++lines;
size_t offset = 0, packoff = 0;
// Read as much of the next line as will fit in the current buffer
- while (input.gets(&line[offset], line.size()-offset-1)) {
+ while (input.gets(&line[offset], line.size() - offset - 1)) {
// Copy the segment just read to cleaned
- for (size_t i=offset ; i<line.size() ; ++i) {
+ for (size_t i = offset; i < line.size(); ++i) {
// Only copy one space character, regardless of how many are in input
if (ISSPACE(line[i]) && !ISNL(line[i])) {
cleaned[packoff++] = ' ';
@@ -97,25 +97,25 @@ void PlaintextApplicator::runGrammarOnText(istream& input, UFILE *output) {
}
// Break if there is a newline
if (ISNL(line[i])) {
- cleaned[packoff+1] = cleaned[packoff] = 0;
+ cleaned[packoff + 1] = cleaned[packoff] = 0;
goto gotaline; // Oh how I wish C++ had break 2;
}
if (line[i] == 0) {
- cleaned[packoff+1] = cleaned[packoff] = 0;
+ cleaned[packoff + 1] = cleaned[packoff] = 0;
break;
}
cleaned[packoff++] = line[i];
}
// If we reached this, buffer wasn't big enough. Double the size of the buffer and try again.
- offset = line.size()-2;
- line.resize(line.size()*2, 0);
- cleaned.resize(line.size()+1, 0);
+ offset = line.size() - 2;
+ line.resize(line.size() * 2, 0);
+ cleaned.resize(line.size() + 1, 0);
}
-gotaline:
+ gotaline:
// Trim trailing whitespace
- while (cleaned[0] && ISSPACE(cleaned[packoff-1])) {
- cleaned[packoff-1] = 0;
+ while (cleaned[0] && ISSPACE(cleaned[packoff - 1])) {
+ cleaned[packoff - 1] = 0;
--packoff;
}
if (!ignoreinput && cleaned[0] && cleaned[0] != '<') {
@@ -124,7 +124,7 @@ gotaline:
}
if (cSWindow && cSWindow->cohorts.size() >= soft_limit && grammar->soft_delimiters && !did_soft_lookback) {
did_soft_lookback = true;
- reverse_foreach (CohortVector, cSWindow->cohorts, iter, iter_end) {
+ reverse_foreach (iter, cSWindow->cohorts) {
if (doesSetMatchCohortNormal(**iter, grammar->soft_delimiters->number)) {
did_soft_lookback = false;
Cohort *cohort = delimitAt(*cSWindow, *iter);
@@ -145,7 +145,7 @@ gotaline:
u_fprintf(ux_stderr, "Warning: Soft limit of %u cohorts reached at line %u but found suitable soft delimiter.\n", soft_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -162,7 +162,7 @@ gotaline:
u_fprintf(ux_stderr, "Warning: Hard limit of %u cohorts reached at line %u - forcing break.\n", hard_limit, numLines);
u_fflush(ux_stderr);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
@@ -185,10 +185,6 @@ gotaline:
numWindows++;
did_soft_lookback = false;
}
- if (cCohort && cSWindow) {
- cSWindow->appendCohort(cCohort);
- lCohort = cCohort;
- }
if (gWindow->next.size() > num_windows) {
while (!gWindow->previous.empty() && gWindow->previous.size() > num_windows) {
SingleWindow *tmp = gWindow->previous.front();
@@ -222,7 +218,7 @@ gotaline:
}
std::vector<UnicodeString> tokens;
- for (size_t i=0 ; i<tokens_raw.size() ; ++i) {
+ for (size_t i = 0; i < tokens_raw.size(); ++i) {
UChar *p = tokens_raw[i];
size_t len = u_strlen(p);
while (*p && u_ispunct(p[0])) {
@@ -231,23 +227,23 @@ gotaline:
--len;
}
size_t tkz = tokens.size();
- while (*p && u_ispunct(p[len-1])) {
- tokens.push_back(UnicodeString(p[len-1]));
- p[len-1] = 0;
+ while (*p && u_ispunct(p[len - 1])) {
+ tokens.push_back(UnicodeString(p[len - 1]));
+ p[len - 1] = 0;
--len;
}
if (*p) {
- tokens.insert(tokens.begin()+tkz, p);
+ tokens.insert(tokens.begin() + tkz, p);
}
}
UString tag;
- for (size_t i=0 ; i<tokens.size() ; ++i) {
- UnicodeString &token = tokens[i];
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ UnicodeString& token = tokens[i];
bool first_upper = (u_isupper(token[0]) != 0);
bool all_upper = first_upper;
bool mixed_upper = false;
- for (int32_t i=1 ; i<token.length() ; ++i) {
+ for (int32_t i = 1; i < token.length(); ++i) {
if (u_isupper(token[i])) {
mixed_upper = true;
}
@@ -279,17 +275,17 @@ gotaline:
addTagToReading(*cReading, addTag(tag));
if (all_upper) {
static const char _tag[] = "<all-upper>";
- tag.assign(_tag, _tag+sizeof(_tag)-1);
+ tag.assign(_tag, _tag + sizeof(_tag) - 1);
addTagToReading(*cReading, addTag(tag));
}
if (first_upper) {
static const char _tag[] = "<first-upper>";
- tag.assign(_tag, _tag+sizeof(_tag)-1);
+ tag.assign(_tag, _tag + sizeof(_tag) - 1);
addTagToReading(*cReading, addTag(tag));
}
if (mixed_upper && !all_upper) {
static const char _tag[] = "<mixed-upper>";
- tag.assign(_tag, _tag+sizeof(_tag)-1);
+ tag.assign(_tag, _tag + sizeof(_tag) - 1);
addTagToReading(*cReading, addTag(tag));
}
}
@@ -319,7 +315,7 @@ gotaline:
if (cCohort->readings.empty()) {
initEmptyCohort(*cCohort);
}
- foreach (ReadingList, cCohort->readings, iter, iter_end) {
+ foreach (iter, cCohort->readings) {
addTagToReading(**iter, endtag);
}
cReading = 0;
@@ -368,5 +364,4 @@ void PlaintextApplicator::printSingleWindow(SingleWindow *window, UFILE *output)
u_fputc('\n', output);
u_fflush(output);
}
-
}
diff --git a/src/PlaintextApplicator.hpp b/src/PlaintextApplicator.hpp
index d67935a..abc5cb5 100644
--- a/src/PlaintextApplicator.hpp
+++ b/src/PlaintextApplicator.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -35,7 +35,6 @@ public:
void printCohort(Cohort *cohort, UFILE *output);
void printSingleWindow(SingleWindow *window, UFILE *output);
};
-
}
#endif
diff --git a/src/Reading.cpp b/src/Reading.cpp
index f3876cf..1a043df 100644
--- a/src/Reading.cpp
+++ b/src/Reading.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -80,47 +80,47 @@ void free_reading(Reading *r) {
pool_put(pool_readings, r);
}
-Reading::Reading(Cohort *p) :
-mapped(false),
-deleted(false),
-noprint(false),
-matched_target(false),
-matched_tests(false),
-baseform(0),
-hash(0),
-hash_plain(0),
-number(p ? (p->readings.size() * 1000 + 1000) : 0),
-mapping(0),
-parent(p),
-next(0)
+Reading::Reading(Cohort *p)
+ : mapped(false)
+ , deleted(false)
+ , noprint(false)
+ , matched_target(false)
+ , matched_tests(false)
+ , baseform(0)
+ , hash(0)
+ , hash_plain(0)
+ , number(p ? (p->readings.size() * 1000 + 1000) : 0)
+ , mapping(0)
+ , parent(p)
+ , next(0)
{
#ifdef CG_TRACE_OBJECTS
std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
#endif
}
-Reading::Reading(const Reading& r) :
-mapped(r.mapped),
-deleted(r.deleted),
-noprint(r.noprint),
-matched_target(false),
-matched_tests(false),
-baseform(r.baseform),
-hash(r.hash),
-hash_plain(r.hash_plain),
-number(r.number + 100),
-tags_bloom(r.tags_bloom),
-tags_plain_bloom(r.tags_plain_bloom),
-tags_textual_bloom(r.tags_textual_bloom),
-mapping(r.mapping),
-parent(r.parent),
-next(r.next),
-hit_by(r.hit_by),
-tags_list(r.tags_list),
-tags(r.tags),
-tags_plain(r.tags_plain),
-tags_textual(r.tags_textual),
-tags_numerical(r.tags_numerical)
+Reading::Reading(const Reading& r)
+ : mapped(r.mapped)
+ , deleted(r.deleted)
+ , noprint(r.noprint)
+ , matched_target(false)
+ , matched_tests(false)
+ , baseform(r.baseform)
+ , hash(r.hash)
+ , hash_plain(r.hash_plain)
+ , number(r.number + 100)
+ , tags_bloom(r.tags_bloom)
+ , tags_plain_bloom(r.tags_plain_bloom)
+ , tags_textual_bloom(r.tags_textual_bloom)
+ , mapping(r.mapping)
+ , parent(r.parent)
+ , next(r.next)
+ , hit_by(r.hit_by)
+ , tags_list(r.tags_list)
+ , tags(r.tags)
+ , tags_plain(r.tags_plain)
+ , tags_textual(r.tags_textual)
+ , tags_numerical(r.tags_numerical)
{
#ifdef CG_TRACE_OBJECTS
std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
@@ -176,7 +176,7 @@ Reading *Reading::allocateReading(const Reading& r) {
uint32_t Reading::rehash() {
hash = 0;
hash_plain = 0;
- const_foreach (uint32SortedVector, tags, iter, iter_end) {
+ foreach (iter, tags) {
if (!mapping || mapping->hash != *iter) {
hash = hash_value(*iter, hash);
}
@@ -195,5 +195,4 @@ uint32_t Reading::rehash() {
bool Reading::cmp_number(Reading *a, Reading *b) {
return a->number < b->number;
}
-
}
diff --git a/src/Reading.hpp b/src/Reading.hpp
index 5a27221..157f98e 100644
--- a/src/Reading.hpp
+++ b/src/Reading.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -29,52 +29,52 @@
#include "bloomish.hpp"
namespace CG3 {
- class Cohort;
- class Reading;
+class Cohort;
+class Reading;
- typedef std::vector<Reading*> ReadingList;
+typedef std::vector<Reading*> ReadingList;
- class Reading {
- public:
- bool mapped;
- bool deleted;
- bool noprint;
- bool matched_target;
- bool matched_tests;
- uint32_t baseform;
- uint32_t hash;
- uint32_t hash_plain;
- uint32_t number;
- uint32Bloomish tags_bloom;
- uint32Bloomish tags_plain_bloom;
- uint32Bloomish tags_textual_bloom;
- Tag *mapping;
- Cohort *parent;
- Reading *next;
- uint32Vector hit_by;
- typedef uint32Vector tags_list_t;
- tags_list_t tags_list;
- uint32SortedVector tags;
- uint32SortedVector tags_plain;
- uint32SortedVector tags_textual;
- typedef bc::flat_map<uint32_t,Tag*> tags_numerical_t;
- tags_numerical_t tags_numerical;
+class Reading {
+public:
+ bool mapped;
+ bool deleted;
+ bool noprint;
+ bool matched_target;
+ bool matched_tests;
+ uint32_t baseform;
+ uint32_t hash;
+ uint32_t hash_plain;
+ uint32_t number;
+ uint32Bloomish tags_bloom;
+ uint32Bloomish tags_plain_bloom;
+ uint32Bloomish tags_textual_bloom;
+ Tag *mapping;
+ Cohort *parent;
+ Reading *next;
+ uint32Vector hit_by;
+ typedef uint32Vector tags_list_t;
+ tags_list_t tags_list;
+ uint32SortedVector tags;
+ uint32SortedVector tags_plain;
+ uint32SortedVector tags_textual;
+ typedef bc::flat_map<uint32_t, Tag*> tags_numerical_t;
+ tags_numerical_t tags_numerical;
- Reading(Cohort *p = 0);
- Reading(const Reading& r);
- ~Reading();
- void clear();
+ Reading(Cohort *p = 0);
+ Reading(const Reading& r);
+ ~Reading();
+ void clear();
- Reading *allocateReading(Cohort *p);
- Reading *allocateReading(const Reading& r);
+ Reading *allocateReading(Cohort *p);
+ Reading *allocateReading(const Reading& r);
- uint32_t rehash();
- static bool cmp_number(Reading *a, Reading *b);
- };
+ uint32_t rehash();
+ static bool cmp_number(Reading *a, Reading *b);
+};
- Reading *alloc_reading(Cohort *p = 0);
- Reading *alloc_reading(const Reading& r);
- void free_reading(Reading *r);
+Reading *alloc_reading(Cohort *p = 0);
+Reading *alloc_reading(const Reading& r);
+void free_reading(Reading *r);
}
#endif
diff --git a/src/Relabeller.cpp b/src/Relabeller.cpp
new file mode 100644
index 0000000..68f43e3
--- /dev/null
+++ b/src/Relabeller.cpp
@@ -0,0 +1,382 @@
+/*
+* Copyright (C) 2007-2016, GrammarSoft ApS
+* Developed by Tino Didriksen <mail at tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+*
+* This file is part of VISL CG-3
+*
+* VISL CG-3 is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* VISL CG-3 is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "Relabeller.hpp"
+#include "Strings.hpp"
+#include "Grammar.hpp"
+#include "TagTrie.hpp"
+
+namespace CG3 {
+
+Relabeller::Relabeller(Grammar& res, const Grammar& relabels, UFILE *ux_err)
+ : ux_stderr(ux_err)
+ , grammar(&res)
+ , relabels(&relabels)
+{
+ UStringSetMap *as_list = new UStringSetMap;
+ UStringSetMap *as_set = new UStringSetMap;
+
+ boost_foreach (const RuleVector::value_type rule, relabels.rule_by_number) {
+ const TagVector& fromTags = trie_getTagList(rule->maplist->trie);
+ Set *target = relabels.sets_list[rule->target];
+ const TagVector& toTags = trie_getTagList(target->trie);
+ if (!(rule->maplist->trie_special.empty() && target->trie_special.empty())) {
+ u_fprintf(ux_stderr, "Warning: Relabel rule '%S' on line %d has %d special tags, skipping!\n", rule->name, rule->line);
+ continue;
+ }
+ if (!rule->tests.empty()) {
+ u_fprintf(ux_stderr, "Warning: Relabel rule '%S' on line %d had context tests, skipping!\n", rule->name, rule->line);
+ continue;
+ }
+ if (rule->wordform) {
+ u_fprintf(ux_stderr, "Warning: Relabel rule '%S' on line %d had a wordform, skipping!\n", rule->name, rule->line);
+ continue;
+ }
+ if (rule->type != K_MAP) {
+ u_fprintf(ux_stderr, "Warning: Relabel rule '%S' on line %d has unexpected keyword (expected MAP), skipping!\n", rule->name, rule->line);
+ continue;
+ }
+ if (fromTags.size() != 1) {
+ u_fprintf(ux_stderr, "Warning: Relabel rule '%S' on line %d has %d tags in the maplist (expected 1), skipping!\n", rule->name, rule->line, fromTags.size());
+ continue;
+ }
+ Tag *fromTag = fromTags[0];
+ boost_foreach (const TagVector::value_type toit, toTags) {
+ if (toit->type & T_SPECIAL) {
+ u_fprintf(ux_stderr, "Warning: Special tags (%S) not supported yet.\n", toit->tag.c_str());
+ }
+ }
+ if (!toTags.empty()) {
+ as_list->emplace(fromTag->tag.c_str(), target);
+ }
+ else {
+ as_set->emplace(fromTag->tag.c_str(), target);
+ }
+ }
+
+ relabel_as_list = as_list;
+ relabel_as_set = as_set;
+}
+
+Relabeller::~Relabeller() {
+ delete relabel_as_list;
+ relabel_as_list = 0;
+ delete relabel_as_set;
+ relabel_as_set = 0;
+}
+
+TagVector Relabeller::transferTags(const TagVector tv_r) {
+ TagVector tv_g;
+ boost_foreach (Tag *tag_r, tv_r) {
+ Tag *tag_g = new Tag(*tag_r);
+ tag_g = grammar->addTag(tag_g); // new is deleted if it exists
+ tv_g.push_back(tag_g);
+ }
+ return tv_g;
+}
+
+// From TextualParser::parseTagList
+struct freq_sorter {
+ const bc::flat_map<Tag*, size_t>& tag_freq;
+
+ freq_sorter(const bc::flat_map<Tag*, size_t>& tag_freq)
+ : tag_freq(tag_freq)
+ {
+ }
+
+ bool operator()(Tag *a, Tag *b) const {
+ // Sort highest frequency first
+ return tag_freq.find(a)->second > tag_freq.find(b)->second;
+ }
+};
+void Relabeller::addTaglistsToSet(const std::set<TagVector> tvs, Set *s) {
+ // Extracted from TextualParser::parseTagList
+
+ // Might be slightly faster to do this in relabelAsList after
+ // transferTags, but seems clearer this way and compile speed
+ // is fast enough
+
+ if (tvs.empty()) {
+ return;
+ }
+
+ bc::flat_map<Tag*, size_t> tag_freq;
+ std::set<TagVector> tvs_sort_uniq;
+
+ boost_foreach (const TagVector& tvc, tvs) {
+ TagVector& tags = const_cast<TagVector&>(tvc);
+ // From TextualParser::parseTagList
+ std::sort(tags.begin(), tags.end());
+ tags.erase(std::unique(tags.begin(), tags.end()), tags.end());
+ if (tvs_sort_uniq.insert(tags).second) {
+ boost_foreach (Tag *t, tags) {
+ ++tag_freq[t];
+ }
+ }
+ }
+ freq_sorter fs(tag_freq);
+ boost_foreach (const TagVector& tvc, tvs_sort_uniq) {
+ if (tvc.empty()) {
+ continue;
+ }
+ if (tvc.size() == 1) {
+ grammar->addTagToSet(tvc[0], s);
+ continue;
+ }
+ TagVector& tv = const_cast<TagVector&>(tvc);
+ // Sort tags by frequency, high-to-low
+ // Doing this yields a very cheap imperfect form of trie compression, but it's good enough
+ std::sort(tv.begin(), tv.end(), fs);
+ bool special = false;
+ boost_foreach (Tag *tag, tv) {
+ if (tag->type & T_SPECIAL) {
+ special = true;
+ break;
+ }
+ }
+ if (special) {
+ trie_insert(s->trie_special, tv);
+ }
+ else {
+ trie_insert(s->trie, tv);
+ }
+ }
+}
+
+void Relabeller::relabelAsList(Set *set_g, const Set *set_r, const Tag *fromTag) {
+ std::set<TagVector> old_tvs = trie_getTagsOrdered(set_g->trie);
+ trie_delete(set_g->trie);
+ set_g->trie.clear();
+
+ std::set<TagVector> taglists;
+ boost_foreach (const TagVector& old_tags, old_tvs) {
+ TagVector tags_except_from;
+
+ bool seen = false;
+ boost_foreach (Tag *old_tag, old_tags) {
+ if (old_tag->hash == fromTag->hash) {
+ seen = true;
+ }
+ else {
+ tags_except_from.push_back(old_tag);
+ }
+ }
+ std::set<TagVector> suffixes;
+ if (seen) {
+ suffixes = trie_getTagsOrdered(set_r->trie);
+ }
+ else {
+ TagVector dummy;
+ suffixes.insert(dummy);
+ }
+ boost_foreach (const TagVector& suf, suffixes) {
+ TagVector tags = TagVector(tags_except_from);
+ tags.insert(tags.end(), suf.begin(), suf.end());
+ tags = transferTags(tags);
+ taglists.insert(tags);
+ }
+ }
+ addTaglistsToSet(taglists, set_g);
+}
+
+void Relabeller::reindexSet(Set& s) {
+ s.type &= ~ST_SPECIAL;
+ s.type &= ~ST_CHILD_UNIFY;
+
+ s.type |= trie_reindex(s.trie);
+ s.type |= trie_reindex(s.trie_special);
+
+ for (uint32_t i = 0; i < s.sets.size(); ++i) {
+ Set *set = grammar->sets_list[s.sets[i]];
+ reindexSet(*set);
+ if (set->type & ST_SPECIAL) {
+ s.type |= ST_SPECIAL;
+ }
+ if (set->type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY)) {
+ s.type |= ST_CHILD_UNIFY;
+ }
+ if (set->type & ST_MAPPING) {
+ s.type |= ST_MAPPING;
+ }
+ }
+
+ if (s.type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY)) {
+ s.type |= ST_SPECIAL;
+ s.type |= ST_CHILD_UNIFY;
+ }
+}
+
+void Relabeller::addSetToGrammar(Set *s) {
+ s->setName(grammar->sets_list.size() + 100);
+ grammar->sets_list.push_back(s);
+ s->number = (uint32_t)grammar->sets_list.size() - 1;
+ reindexSet(*s);
+}
+
+uint32_t Relabeller::copyRelabelSetToGrammar(const Set *s_r) {
+ Set *s_g = grammar->allocateSet();
+
+ uint32_t nsets = s_r->sets.size();
+ s_g->sets.resize(nsets);
+ for (uint32_t i = 0; i < nsets; ++i) {
+ // First ensure all referred-to sets exist:
+ uint32_t child_num_r = s_r->sets[i];
+ uint32_t child_num_g = copyRelabelSetToGrammar(relabels->sets_list[child_num_r]);
+ s_g->sets[i] = child_num_g;
+ }
+
+ uint32_t nset_ops = s_r->set_ops.size();
+ s_g->set_ops.resize(nset_ops);
+ for (uint32_t i = 0; i < nset_ops; ++i) {
+ s_g->set_ops[i] = s_r->set_ops[i]; // enum from Strings.cpp, same across grammars
+ }
+
+ s_g->trie = trie_copy(s_r->trie, *grammar);
+ s_g->trie_special = trie_copy(s_r->trie_special, *grammar);
+ s_g->ff_tags = s_r->ff_tags; // TODO: does this get copied correctly?
+ addSetToGrammar(s_g);
+ return s_g->number;
+}
+
+void Relabeller::relabelAsSet(Set *set_g, const Set *set_r, const Tag *fromTag) {
+ if (set_g->trie.empty()) {
+ // If the grammar's set is only an +/OR/- of other
+ // sets, then we only need to change those other sets
+ return;
+ }
+ if (!set_g->sets.empty()) {
+ u_fprintf(ux_stderr, "Warning: SET %d has both trie and sets, this was unexpected.", set_g->number);
+ }
+ std::set<TagVector> old_tvs = trie_getTagsOrdered(set_g->trie);
+ trie_delete(set_g->trie);
+ set_g->trie.clear();
+ // First we split old_tvs into those that contain fromTag, tvs_with_from, and those that don't, tvs_no_from
+ // set_gW->trie = to_trie(tvs_with_from, but first removing fromTag)
+ // set_gN->trie = to_trie(tvs_no_from)
+ // Then we copy set_r to the relabelled grammar as set_gR
+ // set_gI->sets = set_gR + set_gW
+ // set_g->sets = set_gN OR set_gI
+ // We also put the special and ff tags from set_g into set_gN
+
+ std::set<TagVector> tvs_with_from;
+ std::set<TagVector> tvs_no_from;
+ boost_foreach (const TagVector& old_tags, old_tvs) {
+ TagVector tags_except_from;
+
+ bool seen = false;
+ boost_foreach (Tag *old_tag, old_tags) {
+ if (old_tag->hash == fromTag->hash) {
+ seen = true;
+ }
+ else {
+ tags_except_from.push_back(old_tag);
+ }
+ }
+ if (tags_except_from.empty()) {
+ continue;
+ }
+ if (seen) {
+ tvs_with_from.insert(transferTags(tags_except_from));
+ }
+ else {
+ tvs_no_from.insert(transferTags(tags_except_from));
+ }
+ }
+ Set *s_gN = grammar->allocateSet();
+ addTaglistsToSet(tvs_no_from, s_gN);
+ s_gN->trie_special = trie_copy(set_g->trie_special);
+ s_gN->ff_tags = set_g->ff_tags;
+ s_gN->sets = set_g->sets; // should be empty but who knows
+ s_gN->set_ops = set_g->set_ops;
+ addSetToGrammar(s_gN);
+
+ uint32_t s_gR_num = copyRelabelSetToGrammar(set_r);
+ uint32_t s_gI_num;
+ if (tvs_with_from.empty()) {
+ // We don't want to intersect with ∅, that would never match
+ s_gI_num = s_gR_num;
+ }
+ else {
+ Set *s_gW = grammar->allocateSet();
+ addTaglistsToSet(tvs_with_from, s_gW);
+ addSetToGrammar(s_gW);
+ if (s_gW->getNonEmpty().empty()) {
+ u_fprintf(ux_stderr, "Warning: unexpected empty tries when relabelling set %d!\n", set_g->number);
+ }
+
+ Set *s_gI = grammar->allocateSet(); // relabelling_of_fromTag + taglists_that_had_fromTag
+ s_gI->sets.resize(2);
+ s_gI->sets[0] = s_gR_num;
+ s_gI->sets[1] = s_gW->number;
+ s_gI->set_ops.resize(1);
+ s_gI->set_ops[0] = S_PLUS;
+ addSetToGrammar(s_gI);
+ s_gI_num = s_gI->number;
+ }
+
+ set_g->sets.resize(2); // taglists_that_had_no_fromTag OR (relabelling_of_fromTag + taglists_that_had_no_fromTag)
+ set_g->sets[0] = s_gN->number;
+ set_g->sets[1] = s_gI_num;
+ set_g->set_ops.resize(1);
+ set_g->set_ops[0] = S_OR; // TODO: can avoid this if tvs_no_from.empty()
+ reindexSet(*set_g); // This one was already added to grammar
+}
+
+void Relabeller::relabel() {
+ stdext::hash_map<UString, Tag*> tag_by_str;
+ boost_foreach (const std::vector<Tag*>::value_type tag_g, grammar->single_tags_list) {
+ tag_by_str[tag_g->tag] = tag_g;
+ }
+ stdext::hash_map<UString, std::set<Set*> > sets_by_tag;
+ boost_foreach (const std::vector<Set*>::value_type it, grammar->sets_list) {
+ const TagVector& toTags = trie_getTagList(it->trie);
+ boost_foreach (const TagVector::value_type toit, toTags) {
+ sets_by_tag[toit->tag].insert(it);
+ }
+ }
+ // RELABEL AS LIST:
+ boost_foreach (const UStringSetMap::value_type& it, *relabel_as_list) {
+ const Set *set_r = relabels->sets_list[it.second->number];
+ const Tag *fromTag = tag_by_str[it.first];
+
+ BOOST_AUTO(const sets_g, sets_by_tag.find(it.first));
+ if (sets_g != sets_by_tag.end()) {
+ boost_foreach (Set *set_g, sets_g->second) {
+ relabelAsList(set_g, set_r, fromTag);
+ }
+ }
+ }
+ // RELABEL AS SET:
+ boost_foreach (const UStringSetMap::value_type& it, *relabel_as_set) {
+ const Set *set_r = relabels->sets_list[it.second->number];
+ const Tag *fromTag = tag_by_str[it.first];
+
+ BOOST_AUTO(const sets_g, sets_by_tag.find(it.first));
+ if (sets_g != sets_by_tag.end()) {
+ boost_foreach (Set *set_g, sets_g->second) {
+ relabelAsSet(set_g, set_r, fromTag);
+ }
+ }
+ }
+ grammar->sets_by_tag.clear(); // need to re-add these, with the new sets_list.sizes
+ grammar->reindex();
+}
+}
diff --git a/src/Relabeller.hpp b/src/Relabeller.hpp
new file mode 100644
index 0000000..562e45d
--- /dev/null
+++ b/src/Relabeller.hpp
@@ -0,0 +1,89 @@
+/*
+* Copyright (C) 2007-2016, GrammarSoft ApS
+* Developed by Tino Didriksen <mail at tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+*
+* This file is part of VISL CG-3
+*
+* VISL CG-3 is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* VISL CG-3 is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// TODO: how's the uuid generated?
+#pragma once
+#ifndef b6d28b7452ec699b_RELABELLER_H
+#define b6d28b7452ec699b_RELABELLER_H
+
+#include "stdafx.hpp"
+#include "TagTrie.hpp"
+#include "Grammar.hpp"
+
+namespace CG3 {
+class Tag;
+class Set;
+
+class Relabeller {
+public:
+ Relabeller(Grammar& res, const Grammar& relabels, UFILE *ux_err);
+ ~Relabeller();
+
+ void relabel();
+
+private:
+ UFILE *ux_stderr;
+ Grammar *grammar;
+ const Grammar *relabels;
+
+ typedef stdext::hash_map<UString, UString> UStringMap;
+ typedef stdext::hash_map<UString, Set*> UStringSetMap;
+ const UStringSetMap *relabel_as_list;
+ const UStringSetMap *relabel_as_set;
+
+ typedef std::vector<Tag*> TagVector;
+ uint32_t copyRelabelSetToGrammar(const Set *set);
+ TagVector transferTags(const TagVector tv_r);
+ void addTaglistsToSet(const std::set<TagVector> tvs, Set *set);
+ void reindexSet(Set& s);
+ void addSetToGrammar(Set *s);
+ void relabelAsList(Set *set_g, const Set *set_r, const Tag *fromTag);
+ void relabelAsSet(Set *set_g, const Set *set_r, const Tag *fromTag);
+};
+
+inline trie_t *_trie_copy_helper(const trie_t& trie, Grammar& grammar) {
+ trie_t *nt = new trie_t;
+ boost_foreach (const trie_t::value_type& p, trie) {
+ Tag *t = new Tag(*p.first);
+ t = grammar.addTag(t); // new is deleted if it exists
+ (*nt)[t].terminal = p.second.terminal;
+ if (p.second.trie) {
+ (*nt)[t].trie = _trie_copy_helper(*p.second.trie);
+ }
+ }
+ return nt;
+}
+
+inline trie_t trie_copy(const trie_t& trie, Grammar& grammar) {
+ trie_t nt;
+ boost_foreach (const trie_t::value_type& p, trie) {
+ Tag *t = new Tag(*p.first);
+ t = grammar.addTag(t); // new is deleted if it exists
+ nt[t].terminal = p.second.terminal;
+ if (p.second.trie) {
+ nt[t].trie = _trie_copy_helper(*p.second.trie);
+ }
+ }
+ return nt;
+}
+}
+
+#endif
diff --git a/src/Rule.cpp b/src/Rule.cpp
index 1ea3d30..adcdb83 100644
--- a/src/Rule.cpp
+++ b/src/Rule.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,28 +26,28 @@
namespace CG3 {
-Rule::Rule() :
-name(0),
-wordform(0),
-target(0),
-childset1(0),
-childset2(0),
-line(0),
-number(0),
-varname(0),
-varvalue(0),
-flags(0),
-section(0),
-sub_reading(0),
-weight(0.0),
-quality(0.0),
-type(K_IGNORE),
-maplist(0),
-sublist(0),
-num_fail(0),
-num_match(0),
-total_time(0),
-dep_target(0)
+Rule::Rule()
+ : name(0)
+ , wordform(0)
+ , target(0)
+ , childset1(0)
+ , childset2(0)
+ , line(0)
+ , number(0)
+ , varname(0)
+ , varvalue(0)
+ , flags(0)
+ , section(0)
+ , sub_reading(0)
+ , weight(0.0)
+ , quality(0.0)
+ , type(K_IGNORE)
+ , maplist(0)
+ , sublist(0)
+ , num_fail(0)
+ , num_match(0)
+ , total_time(0)
+ , dep_target(0)
{
// Nothing in the actual body...
}
@@ -60,7 +60,7 @@ void Rule::setName(const UChar *to) {
delete[] name;
name = 0;
if (to) {
- name = new UChar[u_strlen(to)+1];
+ name = new UChar[u_strlen(to) + 1];
u_strcpy(name, to);
}
}
@@ -75,10 +75,10 @@ void Rule::reverseContextualTests() {
}
void Rule::resetStatistics() {
- foreach (ContextList, tests, it, it_end) {
+ foreach (it, tests) {
(*it)->resetStatistics();
}
- foreach (ContextList, dep_tests, it, it_end) {
+ foreach (it, dep_tests) {
(*it)->resetStatistics();
}
num_fail = 0;
@@ -89,5 +89,4 @@ void Rule::resetStatistics() {
bool Rule::cmp_quality(const Rule *a, const Rule *b) {
return a->total_time > b->total_time;
}
-
}
diff --git a/src/Rule.hpp b/src/Rule.hpp
index 34a1e5c..3287eb3 100644
--- a/src/Rule.hpp
+++ b/src/Rule.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -31,99 +31,78 @@
namespace CG3 {
- class Grammar;
- class Set;
-
- // This must be kept in lock-step with Strings.hpp's FLAGS
- enum {
- RF_NEAREST = (1 << 0),
- RF_ALLOWLOOP = (1 << 1),
- RF_DELAYED = (1 << 2),
- RF_IMMEDIATE = (1 << 3),
- RF_LOOKDELETED = (1 << 4),
- RF_LOOKDELAYED = (1 << 5),
- RF_UNSAFE = (1 << 6),
- RF_SAFE = (1 << 7),
- RF_REMEMBERX = (1 << 8),
- RF_RESETX = (1 << 9),
- RF_KEEPORDER = (1 << 10),
- RF_VARYORDER = (1 << 11),
- RF_ENCL_INNER = (1 << 12),
- RF_ENCL_OUTER = (1 << 13),
- RF_ENCL_FINAL = (1 << 14),
- RF_ENCL_ANY = (1 << 15),
- RF_ALLOWCROSS = (1 << 16),
- RF_WITHCHILD = (1 << 17),
- RF_NOCHILD = (1 << 18),
- RF_ITERATE = (1 << 19),
- RF_NOITERATE = (1 << 20),
- RF_UNMAPLAST = (1 << 21),
- RF_REVERSE = (1 << 22),
- RF_SUB = (1 << 23),
- RF_OUTPUT = (1 << 24),
-
- MASK_ENCL = RF_ENCL_INNER | RF_ENCL_OUTER | RF_ENCL_FINAL | RF_ENCL_ANY,
- };
-
- class Rule {
- public:
- UChar *name;
- Tag *wordform;
- uint32_t target;
- uint32_t childset1, childset2;
- uint32_t line, number;
- uint32_t varname, varvalue; // ToDo: varvalue is unused
- uint32_t flags;
- int32_t section;
- int32_t sub_reading;
- // ToDo: Add proper "quality" quantifier based on num_fail, num_match, total_time
- double weight, quality;
- KEYWORDS type;
- Set *maplist;
- Set *sublist;
-
- mutable ContextList tests;
- mutable ContextList dep_tests;
- mutable uint32_t num_fail, num_match;
- mutable double total_time;
- mutable ContextualTest *dep_target;
-
- Rule();
- ~Rule();
- void setName(const UChar *to);
-
- void resetStatistics();
-
- void addContextualTest(ContextualTest *to, ContextList& head);
- void reverseContextualTests();
-
- static bool cmp_quality(const Rule *a, const Rule *b);
-
- static inline size_t cmp_hash(const Rule* r) {
- return hash_value(r->number);
- }
- static inline bool cmp_compare(const Rule* a, const Rule* b) {
- return a->number < b->number;
- }
- };
-
- struct compare_Rule {
- static const size_t bucket_size = 4;
- static const size_t min_buckets = 8;
-
- inline size_t operator() (const Rule* r) const {
- return Rule::cmp_hash(r);
- }
-
- inline bool operator() (const Rule* a, const Rule* b) const {
- return Rule::cmp_compare(a, b);
- }
- };
-
- typedef std::vector<Rule*> RuleVector;
- typedef std::map<uint32_t,Rule*> RuleByLineMap;
- typedef stdext::hash_map<uint32_t,Rule*> RuleByLineHashMap;
- typedef stdext::hash_map<const Rule*, CohortSet, compare_Rule> RuleToCohortsMap;
+class Grammar;
+class Set;
+
+// This must be kept in lock-step with Strings.hpp's FLAGS
+enum {
+ RF_NEAREST = (1 << 0),
+ RF_ALLOWLOOP = (1 << 1),
+ RF_DELAYED = (1 << 2),
+ RF_IMMEDIATE = (1 << 3),
+ RF_LOOKDELETED = (1 << 4),
+ RF_LOOKDELAYED = (1 << 5),
+ RF_UNSAFE = (1 << 6),
+ RF_SAFE = (1 << 7),
+ RF_REMEMBERX = (1 << 8),
+ RF_RESETX = (1 << 9),
+ RF_KEEPORDER = (1 << 10),
+ RF_VARYORDER = (1 << 11),
+ RF_ENCL_INNER = (1 << 12),
+ RF_ENCL_OUTER = (1 << 13),
+ RF_ENCL_FINAL = (1 << 14),
+ RF_ENCL_ANY = (1 << 15),
+ RF_ALLOWCROSS = (1 << 16),
+ RF_WITHCHILD = (1 << 17),
+ RF_NOCHILD = (1 << 18),
+ RF_ITERATE = (1 << 19),
+ RF_NOITERATE = (1 << 20),
+ RF_UNMAPLAST = (1 << 21),
+ RF_REVERSE = (1 << 22),
+ RF_SUB = (1 << 23),
+ RF_OUTPUT = (1 << 24),
+
+ MASK_ENCL = RF_ENCL_INNER | RF_ENCL_OUTER | RF_ENCL_FINAL | RF_ENCL_ANY,
+};
+
+class Rule {
+public:
+ UChar *name;
+ Tag *wordform;
+ uint32_t target;
+ uint32_t childset1, childset2;
+ uint32_t line, number;
+ uint32_t varname, varvalue; // ToDo: varvalue is unused
+ uint32_t flags;
+ int32_t section;
+ int32_t sub_reading;
+ // ToDo: Add proper "quality" quantifier based on num_fail, num_match, total_time
+ double weight, quality;
+ KEYWORDS type;
+ Set *maplist;
+ Set *sublist;
+
+ mutable ContextList tests;
+ mutable ContextList dep_tests;
+ mutable uint32_t num_fail, num_match;
+ mutable double total_time;
+ mutable ContextualTest *dep_target;
+
+ Rule();
+ ~Rule();
+ void setName(const UChar *to);
+
+ void resetStatistics();
+
+ void addContextualTest(ContextualTest *to, ContextList& head);
+ void reverseContextualTests();
+
+ static bool cmp_quality(const Rule *a, const Rule *b);
+};
+
+typedef std::vector<Rule*> RuleVector;
+typedef std::map<uint32_t, Rule*> RuleByLineMap;
+typedef stdext::hash_map<uint32_t, Rule*> RuleByLineHashMap;
}
#endif
diff --git a/src/Set.cpp b/src/Set.cpp
index 7e9b280..4b00153 100644
--- a/src/Set.cpp
+++ b/src/Set.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -25,16 +25,16 @@
namespace CG3 {
-UFILE* Set::dump_hashes_out = 0;
+UFILE *Set::dump_hashes_out = 0;
-Set::Set() :
-type(0),
-line(0),
-hash(0),
-number(0),
-num_fail(0),
-num_match(0),
-total_time(0)
+Set::Set()
+ : type(0)
+ , line(0)
+ , hash(0)
+ , number(0)
+ , num_fail(0)
+ , num_match(0)
+ , total_time(0)
{
// Nothing in the actual body...
}
@@ -49,7 +49,7 @@ void Set::setName(uint32_t to) {
}
size_t n = sprintf(&cbuffers[0][0], "_G_%u_%u_", line, to);
name.reserve(n);
- name.assign(&cbuffers[0][0], &cbuffers[0][0]+n);
+ name.assign(&cbuffers[0][0], &cbuffers[0][0] + n);
}
void Set::setName(const UChar *to) {
@@ -83,10 +83,10 @@ uint32_t Set::rehash() {
}
else {
retval = hash_value(2683, retval); // Combat hash-collisions
- for (uint32_t i=0 ; i<sets.size() ; ++i) {
+ for (uint32_t i = 0; i < sets.size(); ++i) {
retval = hash_value(sets[i], retval);
}
- for (uint32_t i=0 ; i<set_ops.size() ; ++i) {
+ for (uint32_t i = 0; i < set_ops.size(); ++i) {
retval = hash_value(set_ops[i], retval);
}
}
@@ -104,22 +104,6 @@ uint32_t Set::rehash() {
return retval;
}
-uint8_t trie_reindex(const trie_t& trie) {
- uint8_t type = 0;
- boost_foreach (const trie_t::value_type& kv, trie) {
- if (kv.first->type & T_SPECIAL) {
- type |= ST_SPECIAL;
- }
- if (kv.first->type & T_MAPPING) {
- type |= ST_MAPPING;
- }
- if (kv.second.trie) {
- type |= trie_reindex(*kv.second.trie);
- }
- }
- return type;
-}
-
void Set::reindex(Grammar& grammar) {
type &= ~ST_SPECIAL;
type &= ~ST_CHILD_UNIFY;
@@ -127,13 +111,13 @@ void Set::reindex(Grammar& grammar) {
type |= trie_reindex(trie);
type |= trie_reindex(trie_special);
- for (uint32_t i=0 ; i<sets.size() ; ++i) {
+ for (uint32_t i = 0; i < sets.size(); ++i) {
Set *set = grammar.sets_by_contents.find(sets[i])->second;
set->reindex(grammar);
if (set->type & ST_SPECIAL) {
type |= ST_SPECIAL;
}
- if (set->type & (ST_TAG_UNIFY|ST_SET_UNIFY|ST_CHILD_UNIFY)) {
+ if (set->type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY)) {
type |= ST_CHILD_UNIFY;
}
if (set->type & ST_MAPPING) {
@@ -141,7 +125,7 @@ void Set::reindex(Grammar& grammar) {
}
}
- if (type & (ST_TAG_UNIFY|ST_SET_UNIFY|ST_CHILD_UNIFY)) {
+ if (type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY)) {
type |= ST_SPECIAL;
type |= ST_CHILD_UNIFY;
}
@@ -153,11 +137,11 @@ void Set::markUsed(Grammar& grammar) {
trie_markused(trie);
trie_markused(trie_special);
- boost_foreach(Tag *tag, ff_tags) {
+ boost_foreach (Tag *tag, ff_tags) {
tag->markUsed();
}
- for (uint32_t i=0 ; i<sets.size() ; ++i) {
+ for (uint32_t i = 0; i < sets.size(); ++i) {
Set *set = grammar.sets_by_contents.find(sets[i])->second;
set->markUsed(grammar);
}
@@ -168,5 +152,4 @@ void Set::resetStatistics() {
num_match = 0;
total_time = 0;
}
-
}
diff --git a/src/Set.hpp b/src/Set.hpp
index 1cf20bd..0707d4f 100644
--- a/src/Set.hpp
+++ b/src/Set.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -28,78 +28,83 @@
#include "sorted_vector.hpp"
namespace CG3 {
- class Grammar;
-
- enum {
- ST_ANY = (1 << 0),
- ST_SPECIAL = (1 << 1),
- ST_TAG_UNIFY = (1 << 2),
- ST_SET_UNIFY = (1 << 3),
- ST_CHILD_UNIFY = (1 << 4),
- ST_MAPPING = (1 << 5),
- ST_USED = (1 << 6),
- ST_STATIC = (1 << 7),
- };
-
- class Set {
- public:
- static UFILE* dump_hashes_out;
-
- uint8_t type;
- uint32_t line;
- uint32_t hash;
- uint32_t number;
- mutable uint32_t num_fail, num_match;
- mutable double total_time;
- UString name;
-
- trie_t trie;
- trie_t trie_special;
- TagSortedVector ff_tags;
-
- uint32Vector set_ops;
- uint32Vector sets;
-
- Set();
- ~Set() {
- trie_delete(trie);
- trie_delete(trie_special);
+class Grammar;
+
+enum {
+ ST_ANY = (1 << 0),
+ ST_SPECIAL = (1 << 1),
+ ST_TAG_UNIFY = (1 << 2),
+ ST_SET_UNIFY = (1 << 3),
+ ST_CHILD_UNIFY = (1 << 4),
+ ST_MAPPING = (1 << 5),
+ ST_USED = (1 << 6),
+ ST_STATIC = (1 << 7),
+
+ MASK_ST_UNIFY = ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY,
+};
+
+class Set {
+public:
+ static UFILE *dump_hashes_out;
+
+ uint8_t type;
+ uint32_t line;
+ uint32_t hash;
+ uint32_t number;
+ mutable uint32_t num_fail, num_match;
+ mutable double total_time;
+ UString name;
+
+ trie_t trie;
+ trie_t trie_special;
+ TagSortedVector ff_tags;
+
+ uint32Vector set_ops;
+ uint32Vector sets;
+
+ Set();
+ ~Set() {
+ trie_delete(trie);
+ trie_delete(trie_special);
+ }
+
+ void setName(uint32_t to = 0);
+ void setName(const UChar *to);
+ void setName(const UString& to);
+
+ bool empty() const;
+ uint32_t rehash();
+ void resetStatistics();
+ void reindex(Grammar& grammar);
+ void markUsed(Grammar& grammar);
+
+ trie_t& getNonEmpty() {
+ if (!trie.empty()) {
+ return trie;
}
-
- void setName(uint32_t to = 0);
- void setName(const UChar *to);
- void setName(const UString& to);
-
- bool empty() const;
- uint32_t rehash();
- void resetStatistics();
- void reindex(Grammar& grammar);
- void markUsed(Grammar& grammar);
-
- trie_t& getNonEmpty() {
- if (!trie.empty()) {
- return trie;
- }
- return trie_special;
+ return trie_special;
+ }
+};
+
+typedef sorted_vector<Set*> SetSet;
+typedef std::vector<Set*> SetVector;
+typedef stdext::hash_map<uint32_t, Set*> Setuint32HashMap;
+
+inline uint8_t trie_reindex(const trie_t& trie) {
+ uint8_t type = 0;
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ if (kv.first->type & T_SPECIAL) {
+ type |= ST_SPECIAL;
}
- };
-
- struct compare_Set {
- static const size_t bucket_size = 4;
- static const size_t min_buckets = 8;
-
- inline size_t operator() (const Set* x) const {
- return x->hash;
+ if (kv.first->type & T_MAPPING) {
+ type |= ST_MAPPING;
}
-
- inline bool operator() (const Set* a, const Set* b) const {
- return a->hash < b->hash;
+ if (kv.second.trie) {
+ type |= trie_reindex(*kv.second.trie);
}
- };
-
- typedef sorted_vector<Set*> SetSet;
- typedef std::vector<Set*> SetVector;
- typedef stdext::hash_map<uint32_t,Set*> Setuint32HashMap;
+ }
+ return type;
+}
}
#endif
diff --git a/src/SingleWindow.cpp b/src/SingleWindow.cpp
index dc5ffb5..a46e2d5 100644
--- a/src/SingleWindow.cpp
+++ b/src/SingleWindow.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,7 +26,7 @@
namespace CG3 {
std::vector<SingleWindow*> pool_swindows;
-pool_cleaner< std::vector<SingleWindow*> > cleaner_swindows(pool_swindows);
+pool_cleaner<std::vector<SingleWindow*> > cleaner_swindows(pool_swindows);
SingleWindow *alloc_swindow(Window *p) {
SingleWindow *s = pool_get(pool_swindows);
@@ -46,12 +46,12 @@ void free_swindow(SingleWindow *s) {
pool_put(pool_swindows, s);
}
-SingleWindow::SingleWindow(Window *p) :
-number(0),
-has_enclosures(false),
-next(0),
-previous(0),
-parent(p)
+SingleWindow::SingleWindow(Window *p)
+ : number(0)
+ , has_enclosures(false)
+ , next(0)
+ , previous(0)
+ , parent(p)
{
#ifdef CG_TRACE_OBJECTS
std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
@@ -74,7 +74,7 @@ SingleWindow::~SingleWindow() {
}
}
- foreach (CohortVector, cohorts, iter, iter_end) {
+ foreach (iter, cohorts) {
delete *iter;
}
if (next && previous) {
@@ -103,7 +103,7 @@ void SingleWindow::clear() {
}
}
- foreach(CohortVector, cohorts, iter, iter_end) {
+ foreach (iter, cohorts) {
free_cohort(*iter);
}
if (next && previous) {
@@ -134,6 +134,7 @@ void SingleWindow::clear() {
variables_set.clear();
variables_rem.clear();
variables_output.clear();
+ bag_of_tags.clear();
}
void SingleWindow::appendCohort(Cohort *cohort) {
@@ -165,5 +166,4 @@ void SingleWindow::appendCohort(Cohort *cohort) {
parent->cohort_map[0] = cohort;
}
}
-
}
diff --git a/src/SingleWindow.hpp b/src/SingleWindow.hpp
index e391072..27f7501 100644
--- a/src/SingleWindow.hpp
+++ b/src/SingleWindow.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -30,32 +30,46 @@
#include "sorted_vector.hpp"
namespace CG3 {
- class Window;
-
- class SingleWindow {
- public:
- uint32_t number;
- bool has_enclosures;
- SingleWindow *next, *previous;
- Window *parent;
- UString text;
- CohortVector cohorts;
- uint32IntervalVector valid_rules;
- uint32SortedVector hit_external;
- std::vector<CohortSet> rule_to_cohorts;
- uint32FlatHashMap variables_set;
- uint32FlatHashSet variables_rem;
- uint32SortedVector variables_output;
-
- SingleWindow(Window *p);
- ~SingleWindow();
- void clear();
-
- void appendCohort(Cohort *cohort);
- };
-
- SingleWindow *alloc_swindow(Window *p);
- void free_swindow(SingleWindow *s);
+class Window;
+
+class SingleWindow {
+public:
+ uint32_t number;
+ bool has_enclosures;
+ SingleWindow *next, *previous;
+ Window *parent;
+ UString text;
+ CohortVector cohorts;
+ uint32IntervalVector valid_rules;
+ uint32SortedVector hit_external;
+ std::vector<CohortSet> rule_to_cohorts;
+ uint32FlatHashMap variables_set;
+ uint32FlatHashSet variables_rem;
+ uint32SortedVector variables_output;
+ Reading bag_of_tags;
+
+ SingleWindow(Window *p);
+ ~SingleWindow();
+ void clear();
+
+ void appendCohort(Cohort *cohort);
+};
+
+SingleWindow *alloc_swindow(Window *p);
+void free_swindow(SingleWindow *s);
+
+inline bool less_Cohort(const Cohort *a, const Cohort *b) {
+ if (a->local_number == b->local_number) {
+ return a->parent->number < b->parent->number;
+ }
+ return a->local_number < b->local_number;
+}
+
+struct compare_Cohort {
+ bool operator()(const Cohort *a, const Cohort *b) const {
+ return less_Cohort(a, b);
+ }
+};
}
#endif
diff --git a/src/Strings.cpp b/src/Strings.cpp
index a800db6..311a6d9 100644
--- a/src/Strings.cpp
+++ b/src/Strings.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -23,7 +23,7 @@
namespace CG3 {
-UnicodeString flags[FLAGS_COUNT] = {
+UnicodeString g_flags[FLAGS_COUNT] = {
UNICODE_STRING_SIMPLE("NEAREST"),
UNICODE_STRING_SIMPLE("ALLOWLOOP"),
UNICODE_STRING_SIMPLE("DELAYED"),
@@ -49,11 +49,11 @@ UnicodeString flags[FLAGS_COUNT] = {
UNICODE_STRING_SIMPLE("REVERSE"),
UNICODE_STRING_SIMPLE("SUB"),
UNICODE_STRING_SIMPLE("OUTPUT"),
- UNICODE_STRING_SIMPLE("CAPTURE_UNIF")
+ UNICODE_STRING_SIMPLE("CAPTURE_UNIF"),
};
UnicodeString keywords[KEYWORD_COUNT] = {
- UNICODE_STRING_SIMPLE("1f283fc29adb937a892e09bbc124b85c this is a dummy keyword to hold position 0"),
+ UNICODE_STRING_SIMPLE("__CG3_DUMMY_KEYWORD__"),
UNICODE_STRING_SIMPLE("SETS"),
UNICODE_STRING_SIMPLE("LIST"),
UNICODE_STRING_SIMPLE("SET"),
@@ -111,14 +111,15 @@ UnicodeString keywords[KEYWORD_COUNT] = {
UNICODE_STRING_SIMPLE("OPTIONS"),
UNICODE_STRING_SIMPLE("STRICT-TAGS"),
UNICODE_STRING_SIMPLE("REOPEN-MAPPINGS"),
- UNICODE_STRING_SIMPLE("SUBREADINGS")
+ UNICODE_STRING_SIMPLE("SUBREADINGS"),
+ UNICODE_STRING_SIMPLE("SPLITCOHORT"),
};
-const UChar _S_SET_ISECT_U[] = {L'\u2229', 0};
-const UChar _S_SET_SYMDIFF_U[] = {L'\u2206', 0};
+const UChar _S_SET_ISECT_U[] = { L'\u2229', 0 };
+const UChar _S_SET_SYMDIFF_U[] = { L'\u2206', 0 };
UnicodeString stringbits[STRINGS_COUNT] = {
- UNICODE_STRING_SIMPLE("1f283fc29adb937a892e09bbc124b85c this is a dummy string to hold position 0"),
+ UNICODE_STRING_SIMPLE("__CG3_DUMMY_STRINGBIT__"),
UNICODE_STRING_SIMPLE("|"),
UNICODE_STRING_SIMPLE("TO"),
UNICODE_STRING_SIMPLE("OR"),
@@ -193,10 +194,9 @@ UnicodeString stringbits[STRINGS_COUNT] = {
UNICODE_STRING_SIMPLE("strict-baseforms"),
UNICODE_STRING_SIMPLE("strict-secondary"),
UNICODE_STRING_SIMPLE("<STREAMCMD:SETVAR:"),
- UNICODE_STRING_SIMPLE("<STREAMCMD:REMVAR:")
+ UNICODE_STRING_SIMPLE("<STREAMCMD:REMVAR:"),
};
-std::vector< std::vector<UChar> > gbuffers(NUM_GBUFFERS, std::vector<UChar>(CG3_BUFFER_SIZE, 0));
+std::vector<std::vector<UChar> > gbuffers(NUM_GBUFFERS, std::vector<UChar>(CG3_BUFFER_SIZE, 0));
std::vector<std::string> cbuffers(NUM_CBUFFERS, std::string(CG3_BUFFER_SIZE, 0));
-
}
diff --git a/src/Strings.hpp b/src/Strings.hpp
index 1923df6..05a7257 100644
--- a/src/Strings.hpp
+++ b/src/Strings.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -24,195 +24,196 @@
#define c6d28b7452ec699b_STRINGS_H
namespace CG3 {
- // ToDo: Add ABORT
- enum KEYWORDS {
- K_IGNORE,
- K_SETS,
- K_LIST,
- K_SET,
- K_DELIMITERS,
- K_SOFT_DELIMITERS,
- K_PREFERRED_TARGETS,
- K_MAPPING_PREFIX,
- K_MAPPINGS,
- K_CONSTRAINTS,
- K_CORRECTIONS,
- K_SECTION,
- K_BEFORE_SECTIONS,
- K_AFTER_SECTIONS,
- K_NULL_SECTION,
- K_ADD,
- K_MAP,
- K_REPLACE,
- K_SELECT,
- K_REMOVE,
- K_IFF,
- K_APPEND,
- K_SUBSTITUTE,
- K_START,
- K_END,
- K_ANCHOR,
- K_EXECUTE,
- K_JUMP,
- K_REMVARIABLE,
- K_SETVARIABLE,
- K_DELIMIT,
- K_MATCH,
- K_SETPARENT,
- K_SETCHILD,
- K_ADDRELATION,
- K_SETRELATION,
- K_REMRELATION,
- K_ADDRELATIONS,
- K_SETRELATIONS,
- K_REMRELATIONS,
- K_TEMPLATE,
- K_MOVE,
- K_MOVE_AFTER,
- K_MOVE_BEFORE,
- K_SWITCH,
- K_REMCOHORT,
- K_STATIC_SETS,
- K_UNMAP,
- K_COPY,
- K_ADDCOHORT,
- K_ADDCOHORT_AFTER,
- K_ADDCOHORT_BEFORE,
- K_EXTERNAL,
- K_EXTERNAL_ONCE,
- K_EXTERNAL_ALWAYS,
- K_OPTIONS,
- K_STRICT_TAGS,
- K_REOPEN_MAPPINGS,
- K_SUBREADINGS,
- KEYWORD_COUNT
- };
+// ToDo: Add ABORT
+enum KEYWORDS {
+ K_IGNORE,
+ K_SETS,
+ K_LIST,
+ K_SET,
+ K_DELIMITERS,
+ K_SOFT_DELIMITERS,
+ K_PREFERRED_TARGETS,
+ K_MAPPING_PREFIX,
+ K_MAPPINGS,
+ K_CONSTRAINTS,
+ K_CORRECTIONS,
+ K_SECTION,
+ K_BEFORE_SECTIONS,
+ K_AFTER_SECTIONS,
+ K_NULL_SECTION,
+ K_ADD,
+ K_MAP,
+ K_REPLACE,
+ K_SELECT,
+ K_REMOVE,
+ K_IFF,
+ K_APPEND,
+ K_SUBSTITUTE,
+ K_START,
+ K_END,
+ K_ANCHOR,
+ K_EXECUTE,
+ K_JUMP,
+ K_REMVARIABLE,
+ K_SETVARIABLE,
+ K_DELIMIT,
+ K_MATCH,
+ K_SETPARENT,
+ K_SETCHILD,
+ K_ADDRELATION,
+ K_SETRELATION,
+ K_REMRELATION,
+ K_ADDRELATIONS,
+ K_SETRELATIONS,
+ K_REMRELATIONS,
+ K_TEMPLATE,
+ K_MOVE,
+ K_MOVE_AFTER,
+ K_MOVE_BEFORE,
+ K_SWITCH,
+ K_REMCOHORT,
+ K_STATIC_SETS,
+ K_UNMAP,
+ K_COPY,
+ K_ADDCOHORT,
+ K_ADDCOHORT_AFTER,
+ K_ADDCOHORT_BEFORE,
+ K_EXTERNAL,
+ K_EXTERNAL_ONCE,
+ K_EXTERNAL_ALWAYS,
+ K_OPTIONS,
+ K_STRICT_TAGS,
+ K_REOPEN_MAPPINGS,
+ K_SUBREADINGS,
+ K_SPLITCOHORT,
+ KEYWORD_COUNT,
+};
- enum {
- S_IGNORE,
- S_PIPE,
- S_TO,
- S_OR,
- S_PLUS,
- S_MINUS,
- S_MULTIPLY,
- S_ASTERIK = S_MULTIPLY,
- S_ASTERIKTWO,
- S_FAILFAST,
- S_BACKSLASH,
- S_HASH,
- S_NOT,
- S_TEXTNOT,
- S_TEXTNEGATE,
- S_ALL,
- S_NONE,
- S_LINK,
- S_BARRIER,
- S_CBARRIER,
- S_CMD_FLUSH,
- S_CMD_EXIT,
- S_CMD_IGNORE,
- S_CMD_RESUME,
- S_TARGET,
- S_AND,
- S_IF,
- S_DELIMITSET,
- S_SOFTDELIMITSET,
- S_BEGINTAG,
- S_ENDTAG,
- S_LINKZ,
- S_SPACE,
- S_UU_LEFT,
- S_UU_RIGHT,
- S_UU_PAREN,
- S_UU_TARGET,
- S_UU_MARK,
- S_UU_ATTACHTO,
- S_RXTEXT_ANY,
- S_RXBASE_ANY,
- S_RXWORD_ANY,
- S_AFTER,
- S_BEFORE,
- S_WITH,
- S_QUESTION,
- S_VS1,
- S_VS2,
- S_VS3,
- S_VS4,
- S_VS5,
- S_VS6,
- S_VS7,
- S_VS8,
- S_VS9,
- S_VSu,
- S_VSU,
- S_VSl,
- S_VSL,
- S_GPREFIX,
- S_POSITIVE,
- S_NEGATIVE,
- S_ONCE,
- S_ALWAYS,
- S_SET_ISECT_U,
- S_SET_SYMDIFF_U,
- S_FROM,
- S_EXCEPT,
- S_UU_ENCL,
- S_UU_SAME_BASIC,
- S_NO_ISETS,
- S_NO_ITMPLS,
- S_STRICT_WFORMS,
- S_STRICT_BFORMS,
- S_STRICT_SECOND,
- S_CMD_SETVAR,
- S_CMD_REMVAR,
- STRINGS_COUNT
- };
+enum {
+ S_IGNORE,
+ S_PIPE,
+ S_TO,
+ S_OR,
+ S_PLUS,
+ S_MINUS,
+ S_MULTIPLY,
+ S_ASTERIK = S_MULTIPLY,
+ S_ASTERIKTWO,
+ S_FAILFAST,
+ S_BACKSLASH,
+ S_HASH,
+ S_NOT,
+ S_TEXTNOT,
+ S_TEXTNEGATE,
+ S_ALL,
+ S_NONE,
+ S_LINK,
+ S_BARRIER,
+ S_CBARRIER,
+ S_CMD_FLUSH,
+ S_CMD_EXIT,
+ S_CMD_IGNORE,
+ S_CMD_RESUME,
+ S_TARGET,
+ S_AND,
+ S_IF,
+ S_DELIMITSET,
+ S_SOFTDELIMITSET,
+ S_BEGINTAG,
+ S_ENDTAG,
+ S_LINKZ,
+ S_SPACE,
+ S_UU_LEFT,
+ S_UU_RIGHT,
+ S_UU_PAREN,
+ S_UU_TARGET,
+ S_UU_MARK,
+ S_UU_ATTACHTO,
+ S_RXTEXT_ANY,
+ S_RXBASE_ANY,
+ S_RXWORD_ANY,
+ S_AFTER,
+ S_BEFORE,
+ S_WITH,
+ S_QUESTION,
+ S_VS1,
+ S_VS2,
+ S_VS3,
+ S_VS4,
+ S_VS5,
+ S_VS6,
+ S_VS7,
+ S_VS8,
+ S_VS9,
+ S_VSu,
+ S_VSU,
+ S_VSl,
+ S_VSL,
+ S_GPREFIX,
+ S_POSITIVE,
+ S_NEGATIVE,
+ S_ONCE,
+ S_ALWAYS,
+ S_SET_ISECT_U,
+ S_SET_SYMDIFF_U,
+ S_FROM,
+ S_EXCEPT,
+ S_UU_ENCL,
+ S_UU_SAME_BASIC,
+ S_NO_ISETS,
+ S_NO_ITMPLS,
+ S_STRICT_WFORMS,
+ S_STRICT_BFORMS,
+ S_STRICT_SECOND,
+ S_CMD_SETVAR,
+ S_CMD_REMVAR,
+ STRINGS_COUNT,
+};
- // This must be kept in lock-step with Rule.hpp's RULE_FLAGS
- enum {
- FL_NEAREST,
- FL_ALLOWLOOP,
- FL_DELAYED,
- FL_IMMEDIATE,
- FL_LOOKDELETED,
- FL_LOOKDELAYED,
- FL_UNSAFE,
- FL_SAFE,
- FL_REMEMBERX,
- FL_RESETX,
- FL_KEEPORDER,
- FL_VARYORDER,
- FL_ENCL_INNER,
- FL_ENCL_OUTER,
- FL_ENCL_FINAL,
- FL_ENCL_ANY,
- FL_ALLOWCROSS,
- FL_WITHCHILD,
- FL_NOCHILD,
- FL_ITERATE,
- FL_NOITERATE,
- FL_UNMAPLAST,
- FL_REVERSE,
- FL_SUB,
- FL_OUTPUT,
- FL_CAPTURE_UNIF,
- FLAGS_COUNT
- };
+// This must be kept in lock-step with Rule.hpp's RULE_FLAGS
+enum {
+ FL_NEAREST,
+ FL_ALLOWLOOP,
+ FL_DELAYED,
+ FL_IMMEDIATE,
+ FL_LOOKDELETED,
+ FL_LOOKDELAYED,
+ FL_UNSAFE,
+ FL_SAFE,
+ FL_REMEMBERX,
+ FL_RESETX,
+ FL_KEEPORDER,
+ FL_VARYORDER,
+ FL_ENCL_INNER,
+ FL_ENCL_OUTER,
+ FL_ENCL_FINAL,
+ FL_ENCL_ANY,
+ FL_ALLOWCROSS,
+ FL_WITHCHILD,
+ FL_NOCHILD,
+ FL_ITERATE,
+ FL_NOITERATE,
+ FL_UNMAPLAST,
+ FL_REVERSE,
+ FL_SUB,
+ FL_OUTPUT,
+ FL_CAPTURE_UNIF,
+ FLAGS_COUNT,
+};
}
#include "stdafx.hpp"
namespace CG3 {
- extern UnicodeString keywords[KEYWORD_COUNT];
- extern UnicodeString stringbits[STRINGS_COUNT];
- extern UnicodeString flags[FLAGS_COUNT];
+extern UnicodeString keywords[KEYWORD_COUNT];
+extern UnicodeString stringbits[STRINGS_COUNT];
+extern UnicodeString g_flags[FLAGS_COUNT];
- const size_t CG3_BUFFER_SIZE = 8192;
- const size_t NUM_GBUFFERS = 1;
- extern std::vector< std::vector<UChar> > gbuffers;
- const size_t NUM_CBUFFERS = 1;
- extern std::vector<std::string> cbuffers;
+const size_t CG3_BUFFER_SIZE = 8192;
+const size_t NUM_GBUFFERS = 1;
+extern std::vector<std::vector<UChar> > gbuffers;
+const size_t NUM_CBUFFERS = 1;
+extern std::vector<std::string> cbuffers;
}
#endif
diff --git a/src/Tag.cpp b/src/Tag.cpp
index dd8878d..12c2833 100644
--- a/src/Tag.cpp
+++ b/src/Tag.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,39 +26,39 @@
namespace CG3 {
-UFILE* Tag::dump_hashes_out = 0;
-
-Tag::Tag() :
-comparison_op(OP_NOP),
-comparison_val(0),
-type(0),
-comparison_hash(0),
-dep_self(0),
-dep_parent(0),
-hash(0),
-plain_hash(0),
-number(0),
-seed(0),
-regexp(0)
+UFILE *Tag::dump_hashes_out = 0;
+
+Tag::Tag()
+ : comparison_op(OP_NOP)
+ , comparison_val(0)
+ , type(0)
+ , comparison_hash(0)
+ , dep_self(0)
+ , dep_parent(0)
+ , hash(0)
+ , plain_hash(0)
+ , number(0)
+ , seed(0)
+ , regexp(0)
{
#ifdef CG_TRACE_OBJECTS
std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
#endif
}
-Tag::Tag(const Tag& o) :
-comparison_op(o.comparison_op),
-comparison_val(o.comparison_val),
-type(o.type),
-comparison_hash(o.comparison_hash),
-dep_self(o.dep_self),
-dep_parent(o.dep_parent),
-hash(o.hash),
-plain_hash(o.plain_hash),
-number(o.number),
-seed(o.seed),
-tag(o.tag),
-regexp(0)
+Tag::Tag(const Tag& o)
+ : comparison_op(o.comparison_op)
+ , comparison_val(o.comparison_val)
+ , type(o.type)
+ , comparison_hash(o.comparison_hash)
+ , dep_self(o.dep_self)
+ , dep_parent(o.dep_parent)
+ , hash(o.hash)
+ , plain_hash(o.plain_hash)
+ , number(o.number)
+ , seed(o.seed)
+ , tag(o.tag)
+ , regexp(0)
{
#ifdef CG_TRACE_OBJECTS
std::cerr << "OBJECT: " << __PRETTY_FUNCTION__ << std::endl;
@@ -97,10 +97,10 @@ void Tag::parseTagRaw(const UChar *to, Grammar *grammar) {
const UChar *tmp = to;
if (tmp[0] && (tmp[0] == '"' || tmp[0] == '<')) {
- if ((tmp[0] == '"' && tmp[length-1] == '"') || (tmp[0] == '<' && tmp[length-1] == '>')) {
+ if ((tmp[0] == '"' && tmp[length - 1] == '"') || (tmp[0] == '<' && tmp[length - 1] == '>')) {
type |= T_TEXTUAL;
- if (tmp[0] == '"' && tmp[length-1] == '"') {
- if (tmp[1] == '<' && tmp[length-2] == '>') {
+ if (tmp[0] == '"' && tmp[length - 1] == '"') {
+ if (tmp[1] == '<' && tmp[length - 2] == '>') {
type |= T_WORDFORM;
}
else {
@@ -112,7 +112,7 @@ void Tag::parseTagRaw(const UChar *to, Grammar *grammar) {
tag.assign(tmp, length);
- foreach (Grammar::regex_tags_t, grammar->regex_tags, iter, iter_end) {
+ foreach (iter, grammar->regex_tags) {
UErrorCode status = U_ZERO_ERROR;
uregex_setText(*iter, tag.c_str(), tag.length(), &status);
if (status == U_ZERO_ERROR) {
@@ -121,21 +121,21 @@ void Tag::parseTagRaw(const UChar *to, Grammar *grammar) {
}
}
}
- foreach (Grammar::icase_tags_t, grammar->icase_tags, iter, iter_end) {
+ foreach (iter, grammar->icase_tags) {
UErrorCode status = U_ZERO_ERROR;
if (u_strCaseCompare(tag.c_str(), tag.length(), (*iter)->tag.c_str(), (*iter)->tag.length(), U_FOLD_CASE_DEFAULT, &status) == 0) {
type |= T_TEXTUAL;
}
}
- if (tag[0] == '<' && tag[length-1] == '>') {
+ if (tag[0] == '<' && tag[length - 1] == '>') {
parseNumeric();
}
if (tag[0] == '#') {
if (u_sscanf(tag.c_str(), "#%i->%i", &dep_self, &dep_parent) == 2 && dep_self != 0) {
type |= T_DEPENDENCY;
}
- const UChar local_dep_unicode[] = {'#', '%', 'i', L'\u2192', '%', 'i', 0};
+ const UChar local_dep_unicode[] = { '#', '%', 'i', L'\u2192', '%', 'i', 0 };
if (u_sscanf_u(tag.c_str(), local_dep_unicode, &dep_self, &dep_parent) == 2 && dep_self != 0) {
type |= T_DEPENDENCY;
}
@@ -161,22 +161,26 @@ void Tag::parseTagRaw(const UChar *to, Grammar *grammar) {
}
void Tag::parseNumeric() {
+ if (tag.size() >= 256) {
+ return;
+ }
UChar tkey[256];
UChar top[256];
UChar txval[256];
+ UChar spn[] = { '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0 };
tkey[0] = 0;
top[0] = 0;
txval[0] = 0;
- if (u_sscanf(tag.c_str(), "<%[^<>=:!]%[<>=:!]%[-MAXIN0-9]>", &tkey, &top, &txval) == 3 && top[0]) {
+ if (u_sscanf(tag.c_str(), "%*[<]%[^<>=:!]%[<>=:!]%[-MAXIN0-9]%*[>]", &tkey, &top, &txval) == 3 && top[0] && txval[0]) {
int32_t tval = 0;
- int32_t rv = u_sscanf(txval, "%d", &tval);
- if (txval[0] == 'M' && txval[1] == 'A' && txval[2] == 'X') {
+ int32_t r = u_strspn(txval, spn);
+ if (txval[0] == 'M' && txval[1] == 'A' && txval[2] == 'X' && txval[3] == 0) {
tval = std::numeric_limits<int32_t>::max();
}
- else if (txval[0] == 'M' && txval[1] == 'I' && txval[2] == 'N') {
+ else if (txval[0] == 'M' && txval[1] == 'I' && txval[2] == 'N' && txval[3] == 0) {
tval = std::numeric_limits<int32_t>::min();
}
- else if (rv != 1) {
+ else if (txval[r] || u_sscanf(txval, "%d", &tval) != 1) {
return;
}
if (top[0] == '<') {
@@ -327,12 +331,12 @@ UString Tag::toUString(bool escape) const {
str += ':';
}
- if (type & (T_CASE_INSENSITIVE|T_REGEXP) && tag[0] != '"') {
+ if (type & (T_CASE_INSENSITIVE | T_REGEXP) && tag[0] != '"') {
str += '/';
}
if (escape) {
- for (size_t i=0 ; i<tag.length() ; ++i) {
+ for (size_t i = 0; i < tag.length(); ++i) {
if (tag[i] == '\\' || tag[i] == '(' || tag[i] == ')' || tag[i] == ';' || tag[i] == '#') {
str += '\\';
}
@@ -343,7 +347,7 @@ UString Tag::toUString(bool escape) const {
str + tag;
}
- if (type & (T_CASE_INSENSITIVE|T_REGEXP) && tag[0] != '"') {
+ if (type & (T_CASE_INSENSITIVE | T_REGEXP) && tag[0] != '"') {
str += '/';
}
if (type & T_CASE_INSENSITIVE) {
@@ -357,5 +361,4 @@ UString Tag::toUString(bool escape) const {
}
return str;
}
-
}
diff --git a/src/Tag.hpp b/src/Tag.hpp
index 6594638..049ebbe 100644
--- a/src/Tag.hpp
+++ b/src/Tag.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -28,117 +28,110 @@
#include "flat_unordered_map.hpp"
namespace CG3 {
- class Grammar;
- class Set;
-
- typedef std::vector<Set*> SetVector;
-
- enum C_OPS {
- OP_NOP,
- OP_EQUALS,
- OP_LESSTHAN,
- OP_GREATERTHAN,
- OP_LESSEQUALS,
- OP_GREATEREQUALS,
- OP_NOTEQUALS,
- NUM_OPS
- };
-
- enum {
- T_ANY = (1 << 0),
- T_NUMERICAL = (1 << 1),
- T_MAPPING = (1 << 2),
- T_VARIABLE = (1 << 3),
- T_META = (1 << 4),
- T_WORDFORM = (1 << 5),
- T_BASEFORM = (1 << 6),
- T_TEXTUAL = (1 << 7),
- T_DEPENDENCY = (1 << 8),
- T_SAME_BASIC = (1 << 9),
- T_FAILFAST = (1 << 10),
- T_CASE_INSENSITIVE = (1 << 11),
- T_REGEXP = (1 << 12),
- T_PAR_LEFT = (1 << 13),
- T_PAR_RIGHT = (1 << 14),
- T_REGEXP_ANY = (1 << 15),
- T_VARSTRING = (1 << 16),
- T_TARGET = (1 << 17),
- T_MARK = (1 << 18),
- T_ATTACHTO = (1 << 19),
- T_SPECIAL = (1 << 20),
- T_USED = (1 << 21),
- T_GRAMMAR = (1 << 22),
- T_SET = (1 << 23),
- T_VSTR = (1 << 24),
- T_ENCL = (1 << 25),
- T_RELATION = (1 << 26),
-
- MASK_TAG_SPECIAL = T_ANY|T_TARGET|T_MARK|T_ATTACHTO|T_PAR_LEFT|T_PAR_RIGHT|T_NUMERICAL|T_VARIABLE|T_META|T_FAILFAST|T_CASE_INSENSITIVE|T_REGEXP|T_REGEXP_ANY|T_VARSTRING|T_SET|T_ENCL|T_SAME_BASIC,
- };
-
- class Tag {
- public:
- static UFILE* dump_hashes_out;
-
- C_OPS comparison_op;
- int32_t comparison_val;
- uint32_t type;
- uint32_t comparison_hash;
- uint32_t dep_self, dep_parent;
- uint32_t hash;
- uint32_t plain_hash;
- uint32_t number;
- uint32_t seed;
- UString tag;
- boost::scoped_ptr<SetVector> vs_sets;
- boost::scoped_ptr<UStringVector> vs_names;
- mutable URegularExpression *regexp;
-
- Tag();
- Tag(const Tag& o);
- ~Tag();
- void parseTagRaw(const UChar *to, Grammar *grammar);
- UString toUString(bool escape = false) const;
-
- uint32_t rehash();
- void markUsed();
- void allocateVsSets();
- void allocateVsNames();
- void parseNumeric();
- };
-
- struct compare_Tag {
- static const size_t bucket_size = 4;
- static const size_t min_buckets = 8;
-
- inline size_t operator() (const Tag* x) const {
- return x->hash;
- }
-
- inline bool operator() (const Tag* a, const Tag* b) const {
- return a->hash < b->hash;
+class Grammar;
+class Set;
+
+typedef std::vector<Set*> SetVector;
+
+enum C_OPS {
+ OP_NOP,
+ OP_EQUALS,
+ OP_LESSTHAN,
+ OP_GREATERTHAN,
+ OP_LESSEQUALS,
+ OP_GREATEREQUALS,
+ OP_NOTEQUALS,
+ NUM_OPS,
+};
+
+enum {
+ T_ANY = (1 << 0),
+ T_NUMERICAL = (1 << 1),
+ T_MAPPING = (1 << 2),
+ T_VARIABLE = (1 << 3),
+ T_META = (1 << 4),
+ T_WORDFORM = (1 << 5),
+ T_BASEFORM = (1 << 6),
+ T_TEXTUAL = (1 << 7),
+ T_DEPENDENCY = (1 << 8),
+ T_SAME_BASIC = (1 << 9),
+ T_FAILFAST = (1 << 10),
+ T_CASE_INSENSITIVE = (1 << 11),
+ T_REGEXP = (1 << 12),
+ T_PAR_LEFT = (1 << 13),
+ T_PAR_RIGHT = (1 << 14),
+ T_REGEXP_ANY = (1 << 15),
+ T_VARSTRING = (1 << 16),
+ T_TARGET = (1 << 17),
+ T_MARK = (1 << 18),
+ T_ATTACHTO = (1 << 19),
+ T_SPECIAL = (1 << 20),
+ T_USED = (1 << 21),
+ T_GRAMMAR = (1 << 22),
+ T_SET = (1 << 23),
+ T_VSTR = (1 << 24),
+ T_ENCL = (1 << 25),
+ T_RELATION = (1 << 26),
+
+ MASK_TAG_SPECIAL = T_ANY | T_TARGET | T_MARK | T_ATTACHTO | T_PAR_LEFT | T_PAR_RIGHT | T_NUMERICAL | T_VARIABLE | T_META | T_FAILFAST | T_CASE_INSENSITIVE | T_REGEXP | T_REGEXP_ANY | T_VARSTRING | T_SET | T_ENCL | T_SAME_BASIC,
+};
+
+class Tag {
+public:
+ static UFILE *dump_hashes_out;
+
+ C_OPS comparison_op;
+ int32_t comparison_val;
+ uint32_t type;
+ uint32_t comparison_hash;
+ uint32_t dep_self, dep_parent;
+ uint32_t hash;
+ uint32_t plain_hash;
+ uint32_t number;
+ uint32_t seed;
+ UString tag;
+ boost::scoped_ptr<SetVector> vs_sets;
+ boost::scoped_ptr<UStringVector> vs_names;
+ mutable URegularExpression *regexp;
+
+ Tag();
+ Tag(const Tag& o);
+ ~Tag();
+ void parseTagRaw(const UChar *to, Grammar *grammar);
+ UString toUString(bool escape = false) const;
+
+ uint32_t rehash();
+ void markUsed();
+ void allocateVsSets();
+ void allocateVsNames();
+ void parseNumeric();
+};
+
+struct compare_Tag {
+ inline bool operator()(const Tag *a, const Tag *b) const {
+ return a->hash < b->hash;
+ }
+};
+
+typedef std::vector<Tag*> TagVector;
+typedef TagVector TagList;
+typedef flat_unordered_map<uint32_t, Tag*> Taguint32HashMap;
+typedef sorted_vector<Tag*, compare_Tag> TagSortedVector;
+
+template<typename T>
+inline void fill_tagvector(const T& in, TagVector& tags, bool& did, bool& special) {
+ boost_foreach (Tag *tag, in) {
+ if (tag->type & T_NUMERICAL) {
+ did = true;
}
- };
-
- typedef std::vector<Tag*> TagVector;
- typedef TagVector TagList;
- typedef flat_unordered_map<uint32_t,Tag*> Taguint32HashMap;
- typedef sorted_vector<Tag*, compare_Tag> TagSortedVector;
-
- template<typename T>
- inline void fill_tagvector(const T& in, TagVector& tags, bool& did, bool& special) {
- boost_foreach(Tag *tag, in) {
- if (tag->type & T_NUMERICAL) {
- did = true;
- }
- else {
- if (tag->type & T_SPECIAL) {
- special = true;
- }
- tags.push_back(tag);
+ else {
+ if (tag->type & T_SPECIAL) {
+ special = true;
}
+ tags.push_back(tag);
}
}
}
+}
#endif
diff --git a/src/TagTrie.hpp b/src/TagTrie.hpp
index dd2357e..43c5f05 100644
--- a/src/TagTrie.hpp
+++ b/src/TagTrie.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -27,136 +27,149 @@
#include "Tag.hpp"
namespace CG3 {
- struct trie_node_t;
- typedef bc::flat_map<Tag*, trie_node_t, compare_Tag> trie_t;
+struct trie_node_t;
+typedef bc::flat_map<Tag*, trie_node_t, compare_Tag> trie_t;
- struct trie_node_t {
- bool terminal;
- trie_t* trie;
+struct trie_node_t {
+ bool terminal;
+ trie_t *trie;
- trie_node_t() :
- terminal(false),
- trie(0) {
- }
+ trie_node_t()
+ : terminal(false)
+ , trie(0)
+ {
+ }
- /*
+ /*
// Due to how flat_map works with copying elements around, let's not do cleanup the usual way
~trie_node_t() {
delete trie;
}
//*/
- };
+};
- inline bool trie_insert(trie_t& trie, const TagVector& tv, size_t w = 0) {
- trie_node_t& node = trie[tv[w]];
- if (node.terminal) {
- return false;
- }
- if (w < tv.size() - 1) {
- if (!node.trie) {
- node.trie = new trie_t;
- //std::cerr << "new Trie" << std::endl;
- }
- return trie_insert(*node.trie, tv, w + 1);
- }
- node.terminal = true;
- if (node.trie) {
- delete node.trie;
- node.trie = 0;
+inline bool trie_insert(trie_t& trie, const TagVector& tv, size_t w = 0) {
+ trie_node_t& node = trie[tv[w]];
+ if (node.terminal) {
+ return false;
+ }
+ if (w < tv.size() - 1) {
+ if (!node.trie) {
+ node.trie = new trie_t;
+ //std::cerr << "new Trie" << std::endl;
}
- return true;
+ return trie_insert(*node.trie, tv, w + 1);
}
+ node.terminal = true;
+ if (node.trie) {
+ delete node.trie;
+ node.trie = 0;
+ }
+ return true;
+}
- inline trie_t *_trie_copy_helper(const trie_t& trie) {
- trie_t *nt = new trie_t;
- boost_foreach (const trie_t::value_type& p, trie) {
- (*nt)[p.first].terminal = p.second.terminal;
- if (p.second.trie) {
- (*nt)[p.first].trie = _trie_copy_helper(*p.second.trie);
- }
+inline trie_t *_trie_copy_helper(const trie_t& trie) {
+ trie_t *nt = new trie_t;
+ boost_foreach (const trie_t::value_type& p, trie) {
+ (*nt)[p.first].terminal = p.second.terminal;
+ if (p.second.trie) {
+ (*nt)[p.first].trie = _trie_copy_helper(*p.second.trie);
}
- return nt;
}
+ return nt;
+}
- inline trie_t trie_copy(const trie_t& trie) {
- trie_t nt;
- boost_foreach (const trie_t::value_type& p, trie) {
- nt[p.first].terminal = p.second.terminal;
- if (p.second.trie) {
- nt[p.first].trie = _trie_copy_helper(*p.second.trie);
- }
+inline trie_t trie_copy(const trie_t& trie) {
+ trie_t nt;
+ boost_foreach (const trie_t::value_type& p, trie) {
+ nt[p.first].terminal = p.second.terminal;
+ if (p.second.trie) {
+ nt[p.first].trie = _trie_copy_helper(*p.second.trie);
}
- return nt;
}
+ return nt;
+}
- inline void trie_delete(trie_t& trie) {
- boost_foreach (trie_t::value_type& p, trie) {
- if (p.second.trie) {
- trie_delete(*p.second.trie);
- delete p.second.trie;
- p.second.trie = 0;
- }
+inline void trie_delete(trie_t& trie) {
+ boost_foreach (trie_t::value_type& p, trie) {
+ if (p.second.trie) {
+ trie_delete(*p.second.trie);
+ delete p.second.trie;
+ p.second.trie = 0;
}
}
+}
- inline bool trie_singular(const trie_t& trie) {
- if (trie.size() != 1) {
- return false;
- }
- const trie_node_t& node = trie.begin()->second;
- if (node.terminal) {
- return true;
- }
- if (node.trie) {
- return trie_singular(*node.trie);
- }
+inline bool trie_singular(const trie_t& trie) {
+ if (trie.size() != 1) {
return false;
}
+ const trie_node_t& node = trie.begin()->second;
+ if (node.terminal) {
+ return true;
+ }
+ if (node.trie) {
+ return trie_singular(*node.trie);
+ }
+ return false;
+}
- inline uint32_t trie_rehash(const trie_t& trie) {
- uint32_t retval = 0;
- boost_foreach (const trie_t::value_type& kv, trie) {
- retval = hash_value(kv.first->hash, retval);
- if (kv.second.trie) {
- retval = hash_value(trie_rehash(*kv.second.trie), retval);
- }
+inline uint32_t trie_rehash(const trie_t& trie) {
+ uint32_t retval = 0;
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ retval = hash_value(kv.first->hash, retval);
+ if (kv.second.trie) {
+ retval = hash_value(trie_rehash(*kv.second.trie), retval);
}
- return retval;
}
+ return retval;
+}
- inline void trie_markused(trie_t& trie) {
- boost_foreach (trie_t::value_type& kv, trie) {
- kv.first->markUsed();
- if (kv.second.trie) {
- trie_markused(*kv.second.trie);
- }
+inline void trie_markused(trie_t& trie) {
+ boost_foreach (trie_t::value_type& kv, trie) {
+ kv.first->markUsed();
+ if (kv.second.trie) {
+ trie_markused(*kv.second.trie);
}
}
+}
- inline void trie_getTagList(const trie_t& trie, TagList& theTags) {
- boost_foreach (const trie_t::value_type& kv, trie) {
- theTags.push_back(kv.first);
- if (kv.second.trie) {
- trie_getTagList(*kv.second.trie, theTags);
- }
+inline bool trie_hasType(trie_t& trie, uint32_t type) {
+ boost_foreach (trie_t::value_type& kv, trie) {
+ if (kv.first->type & type) {
+ return true;
+ }
+ if (kv.second.trie && trie_hasType(*kv.second.trie, type)) {
+ return true;
}
}
+ return false;
+}
- inline bool trie_getTagList(const trie_t& trie, TagList& theTags, const void *node) {
- boost_foreach (const trie_t::value_type& kv, trie) {
- theTags.push_back(kv.first);
- if (node == &kv) {
- return true;
- }
- if (kv.second.trie && trie_getTagList(*kv.second.trie, theTags, node)) {
- return true;
- }
- theTags.pop_back();
+inline void trie_getTagList(const trie_t& trie, TagList& theTags) {
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ theTags.push_back(kv.first);
+ if (kv.second.trie) {
+ trie_getTagList(*kv.second.trie, theTags);
}
- return false;
}
+}
- /*
+inline bool trie_getTagList(const trie_t& trie, TagList& theTags, const void *node) {
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ theTags.push_back(kv.first);
+ if (node == &kv) {
+ return true;
+ }
+ if (kv.second.trie && trie_getTagList(*kv.second.trie, theTags, node)) {
+ return true;
+ }
+ theTags.pop_back();
+ }
+ return false;
+}
+
+/*
inline void trie_getTagList(const trie_t& trie, TagVector& theTags) {
boost_foreach (const trie_t::value_type& kv, trie) {
theTags.push_back(kv.first);
@@ -167,94 +180,94 @@ namespace CG3 {
}
//*/
- inline TagVector trie_getTagList(const trie_t& trie) {
- TagVector theTags;
- boost_foreach (const trie_t::value_type& kv, trie) {
- theTags.push_back(kv.first);
- if (kv.second.trie) {
- trie_getTagList(*kv.second.trie, theTags);
- }
+inline TagVector trie_getTagList(const trie_t& trie) {
+ TagVector theTags;
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ theTags.push_back(kv.first);
+ if (kv.second.trie) {
+ trie_getTagList(*kv.second.trie, theTags);
}
- return theTags;
}
+ return theTags;
+}
- inline void trie_getTags(const trie_t& trie, std::set<TagVector>& rv, TagVector& tv) {
- boost_foreach (const trie_t::value_type& kv, trie) {
- tv.push_back(kv.first);
- if (kv.second.terminal) {
- std::sort(tv.begin(), tv.end());
- rv.insert(tv);
- tv.pop_back();
- continue;
- }
- if (kv.second.trie) {
- trie_getTags(*kv.second.trie, rv, tv);
- }
+inline void trie_getTags(const trie_t& trie, std::set<TagVector>& rv, TagVector& tv) {
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ tv.push_back(kv.first);
+ if (kv.second.terminal) {
+ std::sort(tv.begin(), tv.end());
+ rv.insert(tv);
+ tv.pop_back();
+ continue;
+ }
+ if (kv.second.trie) {
+ trie_getTags(*kv.second.trie, rv, tv);
}
}
+}
- inline std::set<TagVector> trie_getTags(const trie_t& trie) {
- std::set<TagVector> rv;
- boost_foreach (const trie_t::value_type& kv, trie) {
- TagVector tv;
- tv.push_back(kv.first);
- if (kv.second.terminal) {
- std::sort(tv.begin(), tv.end());
- rv.insert(tv);
- tv.pop_back();
- continue;
- }
- if (kv.second.trie) {
- trie_getTags(*kv.second.trie, rv, tv);
- }
+inline std::set<TagVector> trie_getTags(const trie_t& trie) {
+ std::set<TagVector> rv;
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ TagVector tv;
+ tv.push_back(kv.first);
+ if (kv.second.terminal) {
+ std::sort(tv.begin(), tv.end());
+ rv.insert(tv);
+ tv.pop_back();
+ continue;
+ }
+ if (kv.second.trie) {
+ trie_getTags(*kv.second.trie, rv, tv);
}
- return rv;
}
+ return rv;
+}
- inline void trie_getTagsOrdered(const trie_t& trie, std::set<TagVector>& rv, TagVector& tv) {
- boost_foreach (const trie_t::value_type& kv, trie) {
- tv.push_back(kv.first);
- if (kv.second.terminal) {
- rv.insert(tv);
- tv.pop_back();
- continue;
- }
- if (kv.second.trie) {
- trie_getTagsOrdered(*kv.second.trie, rv, tv);
- }
+inline void trie_getTagsOrdered(const trie_t& trie, std::set<TagVector>& rv, TagVector& tv) {
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ tv.push_back(kv.first);
+ if (kv.second.terminal) {
+ rv.insert(tv);
+ tv.pop_back();
+ continue;
+ }
+ if (kv.second.trie) {
+ trie_getTagsOrdered(*kv.second.trie, rv, tv);
}
}
+}
- inline std::set<TagVector> trie_getTagsOrdered(const trie_t& trie) {
- std::set<TagVector> rv;
- boost_foreach (const trie_t::value_type& kv, trie) {
- TagVector tv;
- tv.push_back(kv.first);
- if (kv.second.terminal) {
- rv.insert(tv);
- tv.pop_back();
- continue;
- }
- if (kv.second.trie) {
- trie_getTagsOrdered(*kv.second.trie, rv, tv);
- }
+inline std::set<TagVector> trie_getTagsOrdered(const trie_t& trie) {
+ std::set<TagVector> rv;
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ TagVector tv;
+ tv.push_back(kv.first);
+ if (kv.second.terminal) {
+ rv.insert(tv);
+ tv.pop_back();
+ continue;
+ }
+ if (kv.second.trie) {
+ trie_getTagsOrdered(*kv.second.trie, rv, tv);
}
- return rv;
}
+ return rv;
+}
- inline void trie_serialize(const trie_t& trie, std::ostream& out) {
- boost_foreach (const trie_t::value_type& kv, trie) {
- writeSwapped<uint32_t>(out, kv.first->number);
- writeSwapped<uint8_t>(out, kv.second.terminal);
- if (kv.second.trie) {
- writeSwapped<uint32_t>(out, kv.second.trie->size());
- trie_serialize(*kv.second.trie, out);
- }
- else {
- writeSwapped<uint32_t>(out, 0);
- }
+inline void trie_serialize(const trie_t& trie, std::ostream& out) {
+ boost_foreach (const trie_t::value_type& kv, trie) {
+ writeSwapped<uint32_t>(out, kv.first->number);
+ writeSwapped<uint8_t>(out, kv.second.terminal);
+ if (kv.second.trie) {
+ writeSwapped<uint32_t>(out, kv.second.trie->size());
+ trie_serialize(*kv.second.trie, out);
+ }
+ else {
+ writeSwapped<uint32_t>(out, 0);
}
}
}
+}
#endif
diff --git a/src/TextualParser.cpp b/src/TextualParser.cpp
index e09b90a..ad304b6 100644
--- a/src/TextualParser.cpp
+++ b/src/TextualParser.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -24,31 +24,43 @@
#include "Grammar.hpp"
#include "ContextualTest.hpp"
#include "parser_helpers.hpp"
+#include "AST.hpp"
#include <bitset>
namespace CG3 {
-TextualParser::TextualParser(Grammar& res, UFILE *ux_err) :
-verbosity_level(0),
-sets_counter(100),
-seen_mapping_prefix(0),
-option_vislcg_compat(false),
-in_section(false),
-in_before_sections(true),
-in_after_sections(false),
-in_null_section(false),
-no_isets(false),
-no_itmpls(false),
-strict_wforms(false),
-strict_bforms(false),
-strict_second(false),
-filename(0),
-locale(0),
-codepage(0),
-error_counter(0)
+TextualParser::TextualParser(Grammar& res, UFILE *ux_err, bool _dump_ast)
+ : filebase(0)
+ , verbosity_level(0)
+ , sets_counter(100)
+ , seen_mapping_prefix(0)
+ , option_vislcg_compat(false)
+ , in_section(false)
+ , in_before_sections(true)
+ , in_after_sections(false)
+ , in_null_section(false)
+ , no_isets(false)
+ , no_itmpls(false)
+ , strict_wforms(false)
+ , strict_bforms(false)
+ , strict_second(false)
+ , filename(0)
+ , locale(0)
+ , codepage(0)
+ , error_counter(0)
{
ux_stderr = ux_err;
result = &res;
+ dump_ast = _dump_ast;
+}
+
+void TextualParser::print_ast(UFILE *out) {
+ if (ast.cs.empty()) {
+ return;
+ }
+ u_fprintf(out, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", u_fgetcodepage(out));
+ u_fprintf(out, "<!-- l is line; b is begin, e is end, both are absolute UTF-16 code unit offsets (not code point) in the file -->\n");
+ ::print_ast(out, ast.cs.front().b, 0, ast.cs.front());
}
void TextualParser::incErrorCount() {
@@ -64,7 +76,9 @@ void TextualParser::incErrorCount() {
struct freq_sorter {
const bc::flat_map<Tag*, size_t>& tag_freq;
- freq_sorter(const bc::flat_map<Tag*, size_t>& tag_freq) : tag_freq(tag_freq) {
+ freq_sorter(const bc::flat_map<Tag*, size_t>& tag_freq)
+ : tag_freq(tag_freq)
+ {
}
bool operator()(Tag *a, Tag *b) const {
@@ -125,24 +139,20 @@ Tag *TextualParser::parseTag(const UChar *to, const UChar *p) {
else if (tag->type & T_WORDFORM) {
if (strict_wforms) {
error("%s: Error: Wordform tag %S not on the strict-tags list, on line %u near `%S`!\n", tag->tag.c_str(), p);
- incErrorCount();
}
}
else if (tag->type & T_BASEFORM) {
if (strict_bforms) {
error("%s: Error: Baseform tag %S not on the strict-tags list, on line %u near `%S`!\n", tag->tag.c_str(), p);
- incErrorCount();
}
}
- else if (tag->tag[0] == '<' && tag->tag[tag->tag.size()-1] == '>') {
+ else if (tag->tag[0] == '<' && tag->tag[tag->tag.size() - 1] == '>') {
if (strict_second) {
error("%s: Error: Secondary tag %S not on the strict-tags list, on line %u near `%S`!\n", tag->tag.c_str(), p);
- incErrorCount();
}
}
else {
error("%s: Error: Tag %S not on the strict-tags list, on line %u near `%S`!\n", tag->tag.c_str(), p);
- incErrorCount();
}
}
return tag;
@@ -153,6 +163,7 @@ Tag *TextualParser::addTag(Tag *tag) {
}
void TextualParser::parseTagList(UChar *& p, Set *s) {
+ AST_OPEN(TagList);
std::set<TagVector> taglists;
bc::flat_map<Tag*, size_t> tag_freq;
@@ -161,10 +172,12 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
if (*p && *p != ';' && *p != ')') {
TagVector tags;
if (*p == '(') {
+ AST_OPEN(CompositeTag);
++p;
result->lines += SKIPWS(p, ';', ')');
while (*p && *p != ';' && *p != ')') {
+ AST_OPEN(Tag);
UChar *n = p;
if (*n == '"') {
n++;
@@ -180,14 +193,17 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
Tag *t = parseTag(&gbuffers[0][0], p);
tags.push_back(t);
p = n;
+ AST_CLOSE(p);
result->lines += SKIPWS(p, ';', ')');
}
if (*p != ')') {
error("%s: Error: Expected closing ) on line %u near `%S`!\n", p);
}
++p;
+ AST_CLOSE(p);
}
else {
+ AST_OPEN(Tag);
UChar *n = p;
if (*n == '"') {
n++;
@@ -203,6 +219,7 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
Tag *t = parseTag(&gbuffers[0][0], p);
tags.push_back(t);
p = n;
+ AST_CLOSE(p);
}
// sort + uniq the tags
@@ -216,6 +233,7 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
}
}
}
+ AST_CLOSE(p);
freq_sorter fs(tag_freq);
boost_foreach (const TagVector& tvc, taglists) {
@@ -248,6 +266,7 @@ Set *TextualParser::parseSet(const UChar *name, const UChar *p) {
}
Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
+ AST_OPEN(SetInline);
uint32Vector set_ops;
uint32Vector sets;
@@ -257,6 +276,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
if (*p && *p != ';' && *p != ')') {
if (!wantop) {
if (*p == '(') {
+ AST_OPEN(CompositeTag);
if (no_isets && p[1] != '*') {
error("%s: Error: Inline set spotted on line %u near `%S`!\n", p);
}
@@ -272,6 +292,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
while (*p && *p != ';' && *p != ')') {
result->lines += SKIPWS(p, ';', ')');
+ AST_OPEN(Tag);
UChar *n = p;
if (*n == '"') {
n++;
@@ -287,12 +308,14 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
Tag *t = parseTag(&gbuffers[0][0], p);
tags.push_back(t);
p = n;
+ AST_CLOSE(p);
result->lines += SKIPWS(p, ';', ')');
}
if (*p != ')') {
error("%s: Error: Expected closing ) on line %u near `%S`!\n", p);
}
++p;
+ AST_CLOSE(p);
if (tags.size() == 0) {
error("%s: Error: Empty inline set on line %u near `%S`! Use (*) if you want to replace with nothing.\n", n);
@@ -320,6 +343,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
sets.push_back(set_c->hash);
}
else {
+ AST_OPEN(SetName);
UChar *n = p;
result->lines += SKIPTOWS(n, ')', true);
while (n[-1] == ',' || n[-1] == ']') {
@@ -332,6 +356,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
uint32_t sh = tmp->hash;
sets.push_back(sh);
p = n;
+ AST_CLOSE(p);
}
if (!set_ops.empty() && (set_ops.back() == S_SET_ISECT_U || set_ops.back() == S_SET_SYMDIFF_U)) {
@@ -407,9 +432,11 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
//dieIfKeyword(&gbuffers[0][0]);
int sop = ux_isSetOp(&gbuffers[0][0]);
if (sop != S_IGNORE) {
+ AST_OPEN(SetOp);
set_ops.push_back(sop);
wantop = false;
p = n;
+ AST_CLOSE(p);
}
else {
break;
@@ -420,6 +447,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
error("%s: Error: Expected set on line %u near `%S`!\n", p);
}
}
+ AST_CLOSE(p);
if (!s && sets.size() == 1) {
s = result->getSet(sets.back());
@@ -452,10 +480,11 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
bool had_digits = false;
UChar *n = p;
+ AST_OPEN(ContextPos);
size_t tries;
- for (tries=0 ; *p != ' ' && *p != '(' && *p != '/' && tries < 100 ; ++tries) {
- if (*p == '*' && *(p+1) == '*') {
+ for (tries = 0; *p != ' ' && *p != '(' && *p != '/' && tries < 100; ++tries) {
+ if (*p == '*' && *(p + 1) == '*') {
t.pos |= POS_SCANALL;
p += 2;
}
@@ -552,6 +581,11 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
t.pos |= POS_NUMERIC_BRANCH;
++p;
}
+ if (*p == 'B') {
+ result->has_bag_of_tags = true;
+ t.pos |= POS_BAG_OF_TAGS;
+ ++p;
+ }
if (*p == '-') {
negative = true;
++p;
@@ -559,11 +593,11 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
if (u_isdigit(*p)) {
had_digits = true;
while (*p >= '0' && *p <= '9') {
- t.offset = (t.offset*10) + (*p - '0');
+ t.offset = (t.offset * 10) + (*p - '0');
++p;
}
}
- if (*p == 'r' && *(p+1) == ':') {
+ if (*p == 'r' && *(p + 1) == ':') {
t.pos |= POS_RELATION;
p += 2;
UChar *n = p;
@@ -604,8 +638,8 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
bool negative = false;
size_t tries;
- for (tries=0 ; *p != ' ' && *p != '(' && tries < 100 ; ++tries) {
- if (*p == '*' && *(p+1) == '*') {
+ for (tries = 0; *p != ' ' && *p != '(' && tries < 100; ++tries) {
+ if (*p == '*' && *(p + 1) == '*') {
t.offset_sub = GSR_ANY;
p += 2;
}
@@ -619,7 +653,7 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
}
if (u_isdigit(*p)) {
while (*p >= '0' && *p <= '9') {
- t.offset_sub = (t.offset_sub*10) + (*p - '0');
+ t.offset_sub = (t.offset_sub * 10) + (*p - '0');
++p;
}
}
@@ -630,7 +664,9 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
}
}
- if ((t.pos & (POS_DEP_CHILD|POS_DEP_SIBLING)) && (t.pos & (POS_SCANFIRST|POS_SCANALL))) {
+ AST_CLOSE(p);
+
+ if ((t.pos & (POS_DEP_CHILD | POS_DEP_SIBLING)) && (t.pos & (POS_SCANFIRST | POS_SCANALL))) {
t.pos &= ~POS_SCANFIRST;
t.pos &= ~POS_SCANALL;
t.pos |= POS_DEP_DEEP;
@@ -648,18 +684,21 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
error("%s: Error: Invalid position on line %u near `%S` - garbage data!\n", n);
}
if (had_digits) {
- if (t.pos & (POS_DEP_CHILD|POS_DEP_SIBLING|POS_DEP_PARENT)) {
+ if (t.pos & (POS_DEP_CHILD | POS_DEP_SIBLING | POS_DEP_PARENT)) {
error("%s: Error: Invalid position on line %u near `%S` - cannot combine offsets with dependency!\n", n);
}
- if (t.pos & (POS_LEFT_PAR|POS_RIGHT_PAR)) {
+ if (t.pos & (POS_LEFT_PAR | POS_RIGHT_PAR)) {
error("%s: Error: Invalid position on line %u near `%S` - cannot combine offsets with enclosures!\n", n);
}
if (t.pos & POS_RELATION) {
error("%s: Error: Invalid position on line %u near `%S` - cannot combine offsets with relations!\n", n);
}
}
+ if ((t.pos & POS_BAG_OF_TAGS) && ((t.pos & ~(POS_BAG_OF_TAGS | POS_NOT | POS_NEGATE | POS_SPAN_BOTH | POS_SPAN_LEFT | POS_SPAN_RIGHT)) || had_digits)) {
+ error("%s: Error: Invalid position on line %u near `%S` - bag of tags may only be combined with window spanning!\n", n);
+ }
if ((t.pos & POS_DEP_PARENT) && !(t.pos & POS_DEP_GLOB)) {
- if (t.pos & (POS_LEFTMOST|POS_RIGHTMOST)) {
+ if (t.pos & (POS_LEFTMOST | POS_RIGHTMOST)) {
error("%s: Error: Invalid position on line %u near `%S` - leftmost/rightmost requires ancestor, not parent!\n", n);
}
}
@@ -693,33 +732,42 @@ void TextualParser::parseContextualTestPosition(UChar *& p, ContextualTest& t) {
}
ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
+ AST_OPEN(Context);
ContextualTest *t = result->allocateContextualTest();
ContextualTest *ot = t;
t->line = result->lines;
result->lines += SKIPWS(p);
if (ux_simplecasecmp(p, stringbits[S_TEXTNEGATE].getTerminatedBuffer(), stringbits[S_TEXTNEGATE].length())) {
+ AST_OPEN(ContextMod);
p += stringbits[S_TEXTNEGATE].length();
+ AST_CLOSE(p);
t->pos |= POS_NEGATE;
}
result->lines += SKIPWS(p);
if (ux_simplecasecmp(p, stringbits[S_ALL].getTerminatedBuffer(), stringbits[S_ALL].length())) {
+ AST_OPEN(ContextMod);
p += stringbits[S_ALL].length();
+ AST_CLOSE(p);
t->pos |= POS_ALL;
}
result->lines += SKIPWS(p);
if (ux_simplecasecmp(p, stringbits[S_NONE].getTerminatedBuffer(), stringbits[S_NONE].length())) {
+ AST_OPEN(ContextMod);
p += stringbits[S_NONE].length();
+ AST_CLOSE(p);
t->pos |= POS_NONE;
}
result->lines += SKIPWS(p);
if (ux_simplecasecmp(p, stringbits[S_TEXTNOT].getTerminatedBuffer(), stringbits[S_TEXTNOT].length())) {
+ AST_OPEN(ContextMod);
p += stringbits[S_TEXTNOT].length();
+ AST_CLOSE(p);
t->pos |= POS_NOT;
}
result->lines += SKIPWS(p);
- std::pair<size_t,UString> tmpl_data;
+ std::pair<size_t, UString> tmpl_data;
UChar *pos_p = p;
UChar *n = p;
@@ -728,6 +776,7 @@ ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
if (ux_isEmpty(&gbuffers[0][0])) {
+ AST_OPEN(TemplateInline);
if (no_itmpls) {
error("%s: Error: Inline template spotted on line %u near `%S`!\n", p);
}
@@ -760,8 +809,10 @@ ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
}
u_fflush(ux_stderr);
}
+ AST_CLOSE(p);
}
else if (gbuffers[0][0] == '[') {
+ AST_OPEN(TemplateShorthand);
++p;
result->lines += SKIPWS(p);
Set *s = parseSetInlineWrapper(p);
@@ -782,6 +833,7 @@ ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
if (*p != ']') {
error("%s: Error: Expected ']' but found '%C' on line %u near `%S`!\n", *p, p);
}
+ AST_CLOSE(p);
++p;
}
else if (gbuffers[0][0] == 'T' && gbuffers[0][1] == ':') {
@@ -791,7 +843,7 @@ ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
pos_p = p;
parseContextualTestPosition(p, *t);
p = n;
- if (t->pos & (POS_DEP_CHILD|POS_DEP_PARENT|POS_DEP_SIBLING)) {
+ if (t->pos & (POS_DEP_CHILD | POS_DEP_PARENT | POS_DEP_SIBLING)) {
result->has_dep = true;
}
if (t->pos & POS_RELATION) {
@@ -801,7 +853,8 @@ ContextualTest *TextualParser::parseContextualTestList(UChar *& p, Rule *rule) {
if (p[0] == 'T' && p[1] == ':') {
t->pos |= POS_TMPL_OVERRIDE;
-label_parseTemplateRef:
+ label_parseTemplateRef:
+ AST_OPEN(TemplateRef);
p += 2;
n = p;
result->lines += SKIPTOWS(n, ')');
@@ -812,6 +865,7 @@ label_parseTemplateRef:
t->tmpl = reinterpret_cast<ContextualTest*>(cn);
tmpl_data = std::make_pair(result->lines, &gbuffers[0][0]);
p = n;
+ AST_CLOSE(p);
result->lines += SKIPWS(p);
}
else {
@@ -821,17 +875,21 @@ label_parseTemplateRef:
result->lines += SKIPWS(p);
if (ux_simplecasecmp(p, stringbits[S_CBARRIER].getTerminatedBuffer(), stringbits[S_CBARRIER].length())) {
+ AST_OPEN(BarrierSafe);
p += stringbits[S_CBARRIER].length();
result->lines += SKIPWS(p);
Set *s = parseSetInlineWrapper(p);
t->cbarrier = s->hash;
+ AST_CLOSE(p);
}
result->lines += SKIPWS(p);
if (ux_simplecasecmp(p, stringbits[S_BARRIER].getTerminatedBuffer(), stringbits[S_BARRIER].length())) {
+ AST_OPEN(Barrier);
p += stringbits[S_BARRIER].length();
result->lines += SKIPWS(p);
Set *s = parseSetInlineWrapper(p);
t->barrier = s->hash;
+ AST_CLOSE(p);
}
result->lines += SKIPWS(p);
@@ -861,6 +919,7 @@ label_parseTemplateRef:
}
t->linked = parseContextualTestList(p, rule);
}
+ AST_CLOSE(p);
if (rule) {
if (rule->flags & RF_LOOKDELETED) {
@@ -897,6 +956,7 @@ void TextualParser::parseContextualDependencyTests(UChar *& p, Rule *rule) {
}
void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
+ AST_OPEN(Rule);
Rule *rule = result->allocateRule();
rule->line = result->lines;
rule->type = key;
@@ -906,6 +966,9 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
result->lines += SKIPWS(lp);
if (lp != p && lp < p) {
+ cur_ast->b = lp;
+ AST_OPEN(RuleWordform);
+ cur_ast->b = lp;
UChar *n = lp;
if (*n == '"') {
n++;
@@ -920,13 +983,17 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
gbuffers[0][c] = 0;
Tag *wform = parseTag(&gbuffers[0][0], lp);
rule->wordform = wform;
+ AST_CLOSE(n);
}
+ AST_OPEN(RuleType);
p += keywords[key].length();
+ AST_CLOSE(p);
result->lines += SKIPWS(p);
if (*p == ':') {
++p;
+ AST_OPEN(RuleName);
UChar *n = p;
result->lines += SKIPTOWS(n, '(');
ptrdiff_t c = n - p;
@@ -941,10 +1008,12 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
rule->setName(&gbuffers[0][0]);
}
p = n;
+ AST_CLOSE(p);
}
result->lines += SKIPWS(p);
if (key == K_EXTERNAL) {
+ AST_OPEN(RuleExternalType);
if (ux_simplecasecmp(p, stringbits[S_ONCE].getTerminatedBuffer(), stringbits[S_ONCE].length())) {
p += stringbits[S_ONCE].length();
rule->type = K_EXTERNAL_ONCE;
@@ -956,9 +1025,11 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
else {
error("%s: Error: Expected keyword ONCE or ALWAYS on line %u near `%S`!\n", p);
}
+ AST_CLOSE(p);
result->lines += SKIPWS(p);
+ AST_OPEN(RuleExternalCmd);
UChar *n = p;
if (*n == '"') {
++n;
@@ -970,8 +1041,8 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
result->lines += SKIPTOWS(n, 0, true);
ptrdiff_t c = n - p;
if (*p == '"') {
- u_strncpy(&gbuffers[0][0], p+1, c-1);
- gbuffers[0][c-2] = 0;
+ u_strncpy(&gbuffers[0][0], p + 1, c - 1);
+ gbuffers[0][c - 2] = 0;
}
else {
u_strncpy(&gbuffers[0][0], p, c);
@@ -981,16 +1052,17 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
Tag *ext = result->allocateTag(&gbuffers[0][0]);
rule->varname = ext->hash;
p = n;
+ AST_CLOSE(p);
}
lp = p;
bool setflag = true;
while (setflag) {
setflag = false;
- for (uint32_t i=0 ; i<FLAGS_COUNT ; i++) {
+ for (uint32_t i = 0; i < FLAGS_COUNT; i++) {
UChar *op = p;
- if (ux_simplecasecmp(p, flags[i].getTerminatedBuffer(), flags[i].length())) {
- p += flags[i].length();
+ if (ux_simplecasecmp(p, g_flags[i].getTerminatedBuffer(), g_flags[i].length())) {
+ p += g_flags[i].length();
rule->flags |= (1 << i);
setflag = true;
@@ -1015,12 +1087,16 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
// Rule flags followed by letters or valid set characters should not be flags.
if (*p != '(' && !ISSPACE(*p)) {
- undo_flag:
+ undo_flag:
rule->flags &= ~(1 << i);
p = op;
setflag = false;
break;
}
+
+ AST_OPEN(RuleFlag);
+ cur_ast->b = op;
+ AST_CLOSE(p);
}
result->lines += SKIPWS(p);
// If any of these is the next char, there cannot possibly be more rule options...
@@ -1031,7 +1107,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
}
}
if (rule->flags & MASK_ENCL) {
- std::bitset<sizeof(rule->flags)*CHAR_BIT> bits(static_cast<uint64_t>(rule->flags & MASK_ENCL));
+ std::bitset<sizeof(rule->flags) * CHAR_BIT> bits(static_cast<uint64_t>(rule->flags & MASK_ENCL));
if (bits.count() > 1) {
error("%s: Error: Line %u near `%S`: ENCL_* are all mutually exclusive!\n", lp);
}
@@ -1061,14 +1137,12 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
error("%s: Error: Line %u near `%S`: ITERATE and NOITERATE are mutually exclusive!\n", lp);
}
- if (!(rule->flags & (RF_ITERATE|RF_NOITERATE))) {
- if (key != K_SELECT && key != K_REMOVE && key != K_IFF
- && key != K_DELIMIT && key != K_REMCOHORT
- && key != K_MOVE && key != K_SWITCH) {
+ if (!(rule->flags & (RF_ITERATE | RF_NOITERATE))) {
+ if (key != K_SELECT && key != K_REMOVE && key != K_IFF && key != K_DELIMIT && key != K_REMCOHORT && key != K_MOVE && key != K_SWITCH) {
rule->flags |= RF_NOITERATE;
}
}
- if (key == K_UNMAP && !(rule->flags & (RF_SAFE|RF_UNSAFE))) {
+ if (key == K_UNMAP && !(rule->flags & (RF_SAFE | RF_UNSAFE))) {
rule->flags |= RF_SAFE;
}
if (rule->flags & RF_UNMAPLAST) {
@@ -1080,9 +1154,11 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
result->lines += SKIPWS(p);
if (rule->flags & RF_WITHCHILD) {
+ AST_OPEN(RuleWithChildTarget);
result->has_dep = true;
Set *s = parseSetInlineWrapper(p);
rule->childset1 = s->hash;
+ AST_CLOSE(p);
result->lines += SKIPWS(p);
}
else if (rule->flags & RF_NOCHILD) {
@@ -1091,6 +1167,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
lp = p;
if (key == K_SUBSTITUTE || key == K_EXECUTE) {
+ AST_OPEN(RuleSublist);
swapper_false swp(no_isets, no_isets);
Set *s = parseSetInlineWrapper(p);
s->reindex(*result);
@@ -1101,16 +1178,13 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
if (s->trie.empty() && s->trie_special.empty() && !(s->type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY))) {
error("%s: Error: Substitute set on line %u near `%S` was neither unified nor of LIST type!\n", lp);
}
+ AST_CLOSE(p);
}
result->lines += SKIPWS(p);
lp = p;
- if (key == K_MAP || key == K_ADD || key == K_REPLACE || key == K_APPEND || key == K_SUBSTITUTE || key == K_COPY
- || key == K_ADDRELATIONS || key == K_ADDRELATION
- || key == K_SETRELATIONS || key == K_SETRELATION
- || key == K_REMRELATIONS || key == K_REMRELATION
- || key == K_SETVARIABLE || key == K_REMVARIABLE
- || key == K_ADDCOHORT || key == K_JUMP) {
+ if (key == K_MAP || key == K_ADD || key == K_REPLACE || key == K_APPEND || key == K_SUBSTITUTE || key == K_COPY || key == K_ADDRELATIONS || key == K_ADDRELATION || key == K_SETRELATIONS || key == K_SETRELATION || key == K_REMRELATIONS || key == K_REMRELATION || key == K_SETVARIABLE || key == K_REMVARIABLE || key == K_ADDCOHORT || key == K_JUMP || key == K_SPLITCOHORT) {
+ AST_OPEN(RuleMaplist);
swapper_false swp(no_isets, no_isets);
Set *s = parseSetInlineWrapper(p);
s->reindex(*result);
@@ -1121,27 +1195,21 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
if (s->trie.empty() && s->trie_special.empty() && !(s->type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY))) {
error("%s: Error: Mapping set on line %u near `%S` was neither unified nor of LIST type!\n", lp);
}
- if (key == K_APPEND && !s->getNonEmpty().empty()) {
- if (!(s->getNonEmpty().begin()->first->type & T_BASEFORM)) {
- error("%s: Error: There must be a baseform before any other tags in APPEND on line %u near `%S`!\n", lp);
- }
- }
- if (key == K_ADDCOHORT && !s->getNonEmpty().empty()) {
- if (!(s->getNonEmpty().begin()->first->type & T_WORDFORM)) {
- error("%s: Error: There must be a wordform before any other tags in ADDCOHORT on line %u near `%S`!\n", lp);
- }
- }
+ AST_CLOSE(p);
}
bool copy_except = false;
if (key == K_COPY && ux_simplecasecmp(p, stringbits[S_EXCEPT].getTerminatedBuffer(), stringbits[S_EXCEPT].length())) {
+ AST_OPEN(RuleExcept);
p += stringbits[S_EXCEPT].length();
copy_except = true;
+ AST_CLOSE(p);
}
result->lines += SKIPWS(p);
lp = p;
if (key == K_ADDRELATIONS || key == K_SETRELATIONS || key == K_REMRELATIONS || key == K_SETVARIABLE || copy_except) {
+ AST_OPEN(RuleSublist);
swapper_false swp(no_isets, no_isets);
Set *s = parseSetInlineWrapper(p);
s->reindex(*result);
@@ -1152,9 +1220,11 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
if (s->trie.empty() && s->trie_special.empty() && !(s->type & (ST_TAG_UNIFY | ST_SET_UNIFY | ST_CHILD_UNIFY))) {
error("%s: Error: Relation/Value set on line %u near `%S` was neither unified nor of LIST type!\n", lp);
}
+ AST_CLOSE(p);
}
if (key == K_ADDCOHORT) {
+ AST_OPEN(RuleAddcohortWhere);
if (ux_simplecasecmp(p, stringbits[S_AFTER].getTerminatedBuffer(), stringbits[S_AFTER].length())) {
p += stringbits[S_AFTER].length();
rule->type = K_ADDCOHORT_AFTER;
@@ -1166,6 +1236,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
else {
error("%s: Error: Expected position keyword AFTER or BEFORE on line %u near `%S`!\n", p);
}
+ AST_CLOSE(p);
}
result->lines += SKIPWS(p);
@@ -1174,8 +1245,10 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
}
result->lines += SKIPWS(p);
+ AST_OPEN(RuleTarget);
Set *s = parseSetInlineWrapper(p);
rule->target = s->hash;
+ AST_CLOSE(p);
result->lines += SKIPWS(p);
if (ux_simplecasecmp(p, stringbits[S_IF].getTerminatedBuffer(), stringbits[S_IF].length())) {
@@ -1183,6 +1256,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
}
result->lines += SKIPWS(p);
+ AST_OPEN(Contexts);
while (*p && *p == '(') {
++p;
result->lines += SKIPWS(p);
@@ -1194,14 +1268,12 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
++p;
result->lines += SKIPWS(p);
}
+ AST_CLOSE(p);
- if (key == K_SETPARENT || key == K_SETCHILD
- || key == K_ADDRELATIONS || key == K_ADDRELATION
- || key == K_SETRELATIONS || key == K_SETRELATION
- || key == K_REMRELATIONS || key == K_REMRELATION
- || key == K_MOVE || key == K_SWITCH) {
+ if (key == K_SETPARENT || key == K_SETCHILD || key == K_ADDRELATIONS || key == K_ADDRELATION || key == K_SETRELATIONS || key == K_SETRELATION || key == K_REMRELATIONS || key == K_REMRELATION || key == K_MOVE || key == K_SWITCH) {
result->lines += SKIPWS(p);
if (key == K_MOVE) {
+ AST_OPEN(RuleMoveType);
if (ux_simplecasecmp(p, stringbits[S_AFTER].getTerminatedBuffer(), stringbits[S_AFTER].length())) {
p += stringbits[S_AFTER].length();
rule->type = K_MOVE_AFTER;
@@ -1213,6 +1285,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
else {
error("%s: Error: Expected movement keyword AFTER or BEFORE on line %u near `%S`!\n", p);
}
+ AST_CLOSE(p);
}
else if (key == K_SWITCH) {
if (ux_simplecasecmp(p, stringbits[S_WITH].getTerminatedBuffer(), stringbits[S_WITH].length())) {
@@ -1223,6 +1296,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
}
}
else {
+ AST_OPEN(RuleDirection);
if (ux_simplecasecmp(p, stringbits[S_TO].getTerminatedBuffer(), stringbits[S_TO].length())) {
p += stringbits[S_TO].length();
}
@@ -1233,25 +1307,29 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
else {
error("%s: Error: Expected dependency keyword TO or FROM on line %u near `%S`!\n", p);
}
+ AST_CLOSE(p);
}
result->lines += SKIPWS(p);
if (key == K_MOVE) {
- if (ux_simplecasecmp(p, flags[FL_WITHCHILD].getTerminatedBuffer(), flags[FL_WITHCHILD].length())) {
- p += flags[FL_WITHCHILD].length();
+ AST_OPEN(RuleWithChildDepTarget);
+ if (ux_simplecasecmp(p, g_flags[FL_WITHCHILD].getTerminatedBuffer(), g_flags[FL_WITHCHILD].length())) {
+ p += g_flags[FL_WITHCHILD].length();
result->has_dep = true;
Set *s = parseSetInlineWrapper(p);
rule->childset2 = s->hash;
result->lines += SKIPWS(p);
}
- else if (ux_simplecasecmp(p, flags[FL_NOCHILD].getTerminatedBuffer(), flags[FL_NOCHILD].length())) {
- p += flags[FL_NOCHILD].length();
+ else if (ux_simplecasecmp(p, g_flags[FL_NOCHILD].getTerminatedBuffer(), g_flags[FL_NOCHILD].length())) {
+ p += g_flags[FL_NOCHILD].length();
rule->childset2 = 0;
result->lines += SKIPWS(p);
}
+ AST_CLOSE(p);
}
lp = p;
+ AST_OPEN(ContextsTarget);
while (*p && *p == '(') {
++p;
result->lines += SKIPWS(p);
@@ -1263,13 +1341,14 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
++p;
result->lines += SKIPWS(p);
}
+ AST_CLOSE(p);
if (rule->dep_tests.empty()) {
error("%s: Error: Expected dependency target on line %u near `%S`!\n", lp);
}
rule->dep_target = rule->dep_tests.back();
rule->dep_tests.pop_back();
}
- if (key == K_SETPARENT || key == K_SETCHILD) {
+ if (key == K_SETPARENT || key == K_SETCHILD || key == K_SPLITCOHORT) {
result->has_dep = true;
}
if (key == K_SETRELATION || key == K_SETRELATIONS || key == K_ADDRELATION || key == K_ADDRELATIONS || key == K_REMRELATION || key == K_REMRELATIONS) {
@@ -1282,13 +1361,13 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
found = true;
}
else {
- foreach (ContextList, rule->tests, it, it_end) {
+ foreach (it, rule->tests) {
if ((*it)->pos & POS_MARK_JUMP) {
found = true;
break;
}
}
- foreach (ContextList, rule->dep_tests, it, it_end) {
+ foreach (it, rule->dep_tests) {
if ((*it)->pos & POS_MARK_JUMP) {
found = true;
break;
@@ -1303,9 +1382,17 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
rule->reverseContextualTests();
addRuleToGrammar(rule);
+
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ u_fprintf(ux_stderr, "%s: Warning: Expected closing ; on line %u after previous rule!\n", filebase, result->lines);
+ u_fflush(ux_stderr);
+ }
+ AST_CLOSE(p);
}
void TextualParser::parseAnchorish(UChar *& p) {
+ AST_OPEN(AnchorName);
UChar *n = p;
result->lines += SKIPTOWS(n, 0, true);
ptrdiff_t c = n - p;
@@ -1313,13 +1400,14 @@ void TextualParser::parseAnchorish(UChar *& p) {
gbuffers[0][c] = 0;
result->addAnchor(&gbuffers[0][0], result->rule_by_number.size(), true);
p = n;
+ AST_CLOSE(p);
result->lines += SKIPWS(p, ';');
if (*p != ';') {
error("%s: Error: Expected closing ; on line %u near `%S` after anchor/section name!\n", p);
}
}
-int TextualParser::parseFromUChar(UChar *input, const char *fname) {
+void TextualParser::parseFromUChar(UChar *input, const char *fname) {
if (!input || !input[0]) {
u_fprintf(ux_stderr, "%s: Error: Input is empty - cannot continue!\n", fname);
CG3Quit(1);
@@ -1327,964 +1415,916 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
UChar *p = input;
result->lines = 1;
+ AST_OPEN(Grammar);
filebase = basename(const_cast<char*>(fname));
while (*p) {
- try {
- if (verbosity_level > 0 && result->lines % 500 == 0) {
- std::cerr << "Parsing line " << result->lines << " \r" << std::flush;
- }
- result->lines += SKIPWS(p);
- // DELIMITERS
- if (ISCHR(*p,'D','d') && ISCHR(*(p+9),'S','s') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'L','l')
- && ISCHR(*(p+3),'I','i') && ISCHR(*(p+4),'M','m') && ISCHR(*(p+5),'I','i') && ISCHR(*(p+6),'T','t')
- && ISCHR(*(p+7),'E','e') && ISCHR(*(p+8),'R','r')
- && !ISSTRING(p, 9)) {
- if (result->delimiters) {
- error("%s: Error: Cannot redefine DELIMITERS on line %u near `%S`!\n", p);
- }
- result->delimiters = result->allocateSet();
- result->delimiters->line = result->lines;
- result->delimiters->setName(stringbits[S_DELIMITSET].getTerminatedBuffer());
- p += 10;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ try {
+ if (verbosity_level > 0 && result->lines % 500 == 0) {
+ std::cerr << "Parsing line " << result->lines << " \r" << std::flush;
}
- ++p;
- parseTagList(p, result->delimiters);
- result->addSet(result->delimiters);
- if (result->delimiters->trie.empty() && result->delimiters->trie_special.empty()) {
- error("%s: Error: DELIMITERS declared, but no definitions given, on line %u near `%S`!\n", p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
- }
- // SOFT-DELIMITERS
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+14),'S','s') && ISCHR(*(p+1),'O','o') && ISCHR(*(p+2),'F','f')
- && ISCHR(*(p+3),'T','t') && ISCHR(*(p+4),'-','_')
- && ISCHR(*(p+5),'D','d') && ISCHR(*(p+6),'E','e') && ISCHR(*(p+7),'L','l')
- && ISCHR(*(p+8),'I','i') && ISCHR(*(p+9),'M','m') && ISCHR(*(p+10),'I','i') && ISCHR(*(p+11),'T','t')
- && ISCHR(*(p+12),'E','e') && ISCHR(*(p+13),'R','r')
- && !ISSTRING(p, 14)) {
- if (result->soft_delimiters) {
- error("%s: Error: Cannot redefine SOFT-DELIMITERS on line %u near `%S`!\n", p);
- }
- result->soft_delimiters = result->allocateSet();
- result->soft_delimiters->line = result->lines;
- result->soft_delimiters->setName(stringbits[S_SOFTDELIMITSET].getTerminatedBuffer());
- p += 15;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
- }
- ++p;
- parseTagList(p, result->soft_delimiters);
- result->addSet(result->soft_delimiters);
- if (result->soft_delimiters->trie.empty() && result->soft_delimiters->trie_special.empty()) {
- error("%s: Error: SOFT-DELIMITERS declared, but no definitions given, on line %u near `%S`!\n", p);
+ result->lines += SKIPWS(p);
+ // DELIMITERS
+ if (IS_ICASE(p, "DELIMITERS", "delimiters")) {
+ if (result->delimiters) {
+ error("%s: Error: Cannot redefine DELIMITERS on line %u near `%S`!\n", p);
+ }
+ result->delimiters = result->allocateSet();
+ result->delimiters->line = result->lines;
+ result->delimiters->setName(stringbits[S_DELIMITSET].getTerminatedBuffer());
+ AST_OPEN(Delimiters);
+ p += 10;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ parseTagList(p, result->delimiters);
+ result->addSet(result->delimiters);
+ if (result->delimiters->trie.empty() && result->delimiters->trie_special.empty()) {
+ error("%s: Error: DELIMITERS declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
}
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ // SOFT-DELIMITERS
+ else if (IS_ICASE(p, "SOFT-DELIMITERS", "soft-delimiters")) {
+ if (result->soft_delimiters) {
+ error("%s: Error: Cannot redefine SOFT-DELIMITERS on line %u near `%S`!\n", p);
+ }
+ result->soft_delimiters = result->allocateSet();
+ result->soft_delimiters->line = result->lines;
+ result->soft_delimiters->setName(stringbits[S_SOFTDELIMITSET].getTerminatedBuffer());
+ AST_OPEN(SoftDelimiters);
+ p += 15;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ parseTagList(p, result->soft_delimiters);
+ result->addSet(result->soft_delimiters);
+ if (result->soft_delimiters->trie.empty() && result->soft_delimiters->trie_special.empty()) {
+ error("%s: Error: SOFT-DELIMITERS declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
}
- }
- // MAPPING-PREFIX
- else if (ISCHR(*p,'M','m') && ISCHR(*(p+13),'X','x') && ISCHR(*(p+1),'A','a') && ISCHR(*(p+2),'P','p')
- && ISCHR(*(p+3),'P','p') && ISCHR(*(p+4),'I','i')
- && ISCHR(*(p+5),'N','n') && ISCHR(*(p+6),'G','g') && ISCHR(*(p+7),'-','_')
- && ISCHR(*(p+8),'P','p') && ISCHR(*(p+9),'R','r') && ISCHR(*(p+10),'E','e') && ISCHR(*(p+11),'F','f')
- && ISCHR(*(p+12),'I','i')
- && !ISSTRING(p, 13)) {
+ // MAPPING-PREFIX
+ else if (IS_ICASE(p, "MAPPING-PREFIX", "mapping-prefix")) {
+ if (seen_mapping_prefix) {
+ u_fprintf(ux_stderr, "%s: Error: MAPPING-PREFIX on line %u cannot change previous prefix set on line %u!\n", filebase, result->lines, seen_mapping_prefix);
+ incErrorCount();
+ }
+ seen_mapping_prefix = result->lines;
- if (seen_mapping_prefix) {
- u_fprintf(ux_stderr, "%s: Error: MAPPING-PREFIX on line %u cannot change previous prefix set on line %u!\n", filebase, result->lines, seen_mapping_prefix);
- incErrorCount();
- }
- seen_mapping_prefix = result->lines;
+ AST_OPEN(MappingPrefix);
+ p += 14;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ result->lines += SKIPWS(p);
- p += 14;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
- }
- ++p;
- result->lines += SKIPWS(p);
+ AST_OPEN(Tag);
+ UChar *n = p;
+ result->lines += SKIPTOWS(n, ';');
+ ptrdiff_t c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ p = n;
+ AST_CLOSE(p);
- UChar *n = p;
- result->lines += SKIPTOWS(n, ';');
- ptrdiff_t c = n - p;
- u_strncpy(&gbuffers[0][0], p, c);
- gbuffers[0][c] = 0;
- p = n;
+ result->mapping_prefix = gbuffers[0][0];
+
+ if (!result->mapping_prefix) {
+ error("%s: Error: MAPPING-PREFIX declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // PREFERRED-TARGETS
+ else if (IS_ICASE(p, "PREFERRED-TARGETS", "preferred-targets")) {
+ AST_OPEN(PreferredTargets);
+ p += 17;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ result->lines += SKIPWS(p);
- result->mapping_prefix = gbuffers[0][0];
+ while (*p && *p != ';') {
+ AST_OPEN(Tag);
+ UChar *n = p;
+ if (*n == '"') {
+ n++;
+ SKIPTO_NOSPAN(n, '"');
+ if (*n != '"') {
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ }
+ }
+ result->lines += SKIPTOWS(n, ';', true);
+ ptrdiff_t c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ Tag *t = parseTag(&gbuffers[0][0], p);
+ result->preferred_targets.push_back(t->hash);
+ p = n;
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p);
+ }
- if (!result->mapping_prefix) {
- error("%s: Error: MAPPING-PREFIX declared, but no definitions given, on line %u near `%S`!\n", p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
- }
- // PREFERRED-TARGETS
- else if (ISCHR(*p,'P','p') && ISCHR(*(p+16),'S','s') && ISCHR(*(p+1),'R','r') && ISCHR(*(p+2),'E','e')
- && ISCHR(*(p+3),'F','f') && ISCHR(*(p+4),'E','e')
- && ISCHR(*(p+5),'R','r') && ISCHR(*(p+6),'R','r') && ISCHR(*(p+7),'E','e')
- && ISCHR(*(p+8),'D','d') && ISCHR(*(p+9),'-','_') && ISCHR(*(p+10),'T','t') && ISCHR(*(p+11),'A','a')
- && ISCHR(*(p+12),'R','r') && ISCHR(*(p+13),'G','g') && ISCHR(*(p+14),'E','e') && ISCHR(*(p+15),'T','t')
- && !ISSTRING(p, 16)) {
- p += 17;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
- }
- ++p;
- result->lines += SKIPWS(p);
+ if (result->preferred_targets.empty()) {
+ error("%s: Error: PREFERRED-TARGETS declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // REOPEN-MAPPINGS
+ else if (IS_ICASE(p, "REOPEN-MAPPINGS", "reopen-mappings")) {
+ AST_OPEN(ReopenMappings);
+ p += 15;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ result->lines += SKIPWS(p);
- while (*p && *p != ';') {
- UChar *n = p;
- if (*n == '"') {
- n++;
- SKIPTO_NOSPAN(n, '"');
- if (*n != '"') {
- error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ while (*p && *p != ';') {
+ AST_OPEN(Tag);
+ UChar *n = p;
+ if (*n == '"') {
+ n++;
+ SKIPTO_NOSPAN(n, '"');
+ if (*n != '"') {
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ }
}
+ result->lines += SKIPTOWS(n, ';', true);
+ ptrdiff_t c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ Tag *t = parseTag(&gbuffers[0][0], p);
+ result->reopen_mappings.insert(t->hash);
+ p = n;
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p);
+ }
+
+ if (result->reopen_mappings.empty()) {
+ error("%s: Error: REOPEN-MAPPINGS declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // STATIC-SETS
+ else if (IS_ICASE(p, "STATIC-SETS", "static-sets")) {
+ AST_OPEN(StaticSets);
+ p += 11;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ result->lines += SKIPWS(p);
+
+ while (*p && *p != ';') {
+ AST_OPEN(SetName);
+ UChar *n = p;
+ result->lines += SKIPTOWS(n, ';', true);
+ result->static_sets.push_back(UString(p, n));
+ p = n;
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p);
+ }
+
+ if (result->static_sets.empty()) {
+ error("%s: Error: STATIC-SETS declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // ADDRELATIONS
+ else if (IS_ICASE(p, "ADDRELATIONS", "addrelations")) {
+ parseRule(p, K_ADDRELATIONS);
+ }
+ // SETRELATIONS
+ else if (IS_ICASE(p, "SETRELATIONS", "setrelations")) {
+ parseRule(p, K_SETRELATIONS);
+ }
+ // REMRELATIONS
+ else if (IS_ICASE(p, "REMRELATIONS", "remrelations")) {
+ parseRule(p, K_REMRELATIONS);
+ }
+ // ADDRELATION
+ else if (IS_ICASE(p, "ADDRELATION", "addrelation")) {
+ parseRule(p, K_ADDRELATION);
+ }
+ // SETRELATION
+ else if (IS_ICASE(p, "SETRELATION", "setrelation")) {
+ parseRule(p, K_SETRELATION);
+ }
+ // REMRELATION
+ else if (IS_ICASE(p, "REMRELATION", "remrelation")) {
+ parseRule(p, K_REMRELATION);
+ }
+ // SETVARIABLE
+ else if (IS_ICASE(p, "SETVARIABLE", "setvariable")) {
+ parseRule(p, K_SETVARIABLE);
+ }
+ // REMVARIABLE
+ else if (IS_ICASE(p, "REMVARIABLE", "remvariable")) {
+ parseRule(p, K_REMVARIABLE);
+ }
+ // SETPARENT
+ else if (IS_ICASE(p, "SETPARENT", "setparent")) {
+ parseRule(p, K_SETPARENT);
+ }
+ // SETCHILD
+ else if (IS_ICASE(p, "SETCHILD", "setchild")) {
+ parseRule(p, K_SETCHILD);
+ }
+ // EXTERNAL
+ else if (IS_ICASE(p, "EXTERNAL", "external")) {
+ parseRule(p, K_EXTERNAL);
+ }
+ // REMCOHORT
+ else if (IS_ICASE(p, "REMCOHORT", "remcohort")) {
+ parseRule(p, K_REMCOHORT);
+ }
+ // ADDCOHORT
+ else if (IS_ICASE(p, "ADDCOHORT", "addcohort")) {
+ parseRule(p, K_ADDCOHORT);
+ }
+ // SPLITCOHORT
+ else if (IS_ICASE(p, "SPLITCOHORT", "splitcohort")) {
+ parseRule(p, K_SPLITCOHORT);
+ }
+ // SETS
+ else if (IS_ICASE(p, "SETS", "sets")) {
+ p += 4;
+ }
+ // LIST
+ else if (IS_ICASE(p, "LIST", "list")) {
+ Set *s = result->allocateSet();
+ s->line = result->lines;
+ AST_OPEN(List);
+ p += 4;
+ result->lines += SKIPWS(p);
+ AST_OPEN(SetName);
+ UChar *n = p;
+ result->lines += SKIPTOWS(n, 0, true);
+ while (n[-1] == ',' || n[-1] == ']') {
+ --n;
}
- result->lines += SKIPTOWS(n, ';', true);
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Tag *t = parseTag(&gbuffers[0][0], p);
- result->preferred_targets.push_back(t->hash);
+ s->setName(&gbuffers[0][0]);
p = n;
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ parseTagList(p, s);
+ s->rehash();
+ Set *tmp = result->getSet(s->hash);
+ if (tmp) {
+ if (verbosity_level > 0 && tmp->name[0] != '_' && tmp->name[1] != 'G' && tmp->name[2] != '_') {
+ u_fprintf(ux_stderr, "%s: Warning: LIST %S was defined twice with the same contents: Lines %u and %u.\n", filebase, s->name.c_str(), tmp->line, s->line);
+ u_fflush(ux_stderr);
+ }
+ }
+ result->addSet(s);
+ if (s->empty()) {
+ error("%s: Error: LIST %S declared, but no definitions given, on line %u near `%S`!\n", s->name.c_str(), p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // SET
+ else if (IS_ICASE(p, "SET", "set")) {
+ Set *s = result->allocateSet();
+ s->line = result->lines;
+ AST_OPEN(Set);
+ p += 3;
result->lines += SKIPWS(p);
- }
-
- if (result->preferred_targets.empty()) {
- error("%s: Error: PREFERRED-TARGETS declared, but no definitions given, on line %u near `%S`!\n", p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
- }
- // REOPEN-MAPPINGS
- else if (ISCHR(*p, 'R', 'r') && ISCHR(*(p + 14), 'S', 's') && ISCHR(*(p + 1), 'E', 'e') && ISCHR(*(p + 2), 'O', 'o')
- && ISCHR(*(p + 3), 'P', 'p') && ISCHR(*(p + 4), 'E', 'e') && ISCHR(*(p + 5), 'N', 'n') && ISCHR(*(p + 6), '-', '_')
- && ISCHR(*(p + 7), 'M', 'm') && ISCHR(*(p + 8), 'A', 'a') && ISCHR(*(p + 9), 'P', 'p') && ISCHR(*(p + 10), 'P', 'p')
- && ISCHR(*(p + 11), 'I', 'i') && ISCHR(*(p + 12), 'N', 'n') && ISCHR(*(p + 13), 'G', 'g')
- && !ISSTRING(p, 14)) {
- p += 15;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
- }
- ++p;
- result->lines += SKIPWS(p);
-
- while (*p && *p != ';') {
+ AST_OPEN(SetName);
UChar *n = p;
- if (*n == '"') {
- n++;
- SKIPTO_NOSPAN(n, '"');
- if (*n != '"') {
- error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
- }
+ result->lines += SKIPTOWS(n, 0, true);
+ while (n[-1] == ',' || n[-1] == ']') {
+ --n;
}
- result->lines += SKIPTOWS(n, ';', true);
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Tag *t = parseTag(&gbuffers[0][0], p);
- result->reopen_mappings.insert(t->hash);
+ s->setName(&gbuffers[0][0]);
+ uint32_t sh = hash_value(&gbuffers[0][0]);
p = n;
- result->lines += SKIPWS(p);
- }
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
- if (result->reopen_mappings.empty()) {
- error("%s: Error: REOPEN-MAPPINGS declared, but no definitions given, on line %u near `%S`!\n", p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
- }
- // STATIC-SETS
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+10),'S','s') && ISCHR(*(p+1),'T','t') && ISCHR(*(p+2),'A','a')
- && ISCHR(*(p+3),'T','t') && ISCHR(*(p+4),'I','i') && ISCHR(*(p+5),'C','c') && ISCHR(*(p+6),'-','-')
- && ISCHR(*(p+7),'S','s') && ISCHR(*(p+8),'E','e') && ISCHR(*(p+9),'T','t')
- && !ISSTRING(p, 10)) {
- p += 11;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
- }
- ++p;
- result->lines += SKIPWS(p);
+ swapper_false swp(no_isets, no_isets);
- while (*p && *p != ';') {
+ parseSetInline(p, s);
+ s->rehash();
+ Set *tmp = result->getSet(s->hash);
+ if (tmp) {
+ if (verbosity_level > 0 && tmp->name[0] != '_' && tmp->name[1] != 'G' && tmp->name[2] != '_') {
+ u_fprintf(ux_stderr, "%s: Warning: SET %S was defined twice with the same contents: Lines %u and %u.\n", filebase, s->name.c_str(), tmp->line, s->line);
+ u_fflush(ux_stderr);
+ }
+ }
+ else if (s->sets.size() == 1 && !(s->type & ST_TAG_UNIFY)) {
+ tmp = result->getSet(s->sets.back());
+ if (verbosity_level > 0) {
+ u_fprintf(ux_stderr, "%s: Warning: Set %S on line %u aliased to %S on line %u.\n", filebase, s->name.c_str(), s->line, tmp->name.c_str(), tmp->line);
+ u_fflush(ux_stderr);
+ }
+ result->maybe_used_sets.insert(tmp);
+ result->set_alias[sh] = tmp->hash;
+ result->destroySet(s);
+ s = tmp;
+ }
+ result->addSet(s);
+ if (s->empty()) {
+ error("%s: Error: SET %S declared, but no definitions given, on line %u near `%S`!\n", s->name.c_str(), p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`! Probably caused by missing set operator.\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // MAPPINGS
+ else if (IS_ICASE(p, "MAPPINGS", "mappings")) {
+ AST_OPEN(BeforeSections);
+ p += 8;
+ in_before_sections = true;
+ in_section = false;
+ in_after_sections = false;
+ in_null_section = false;
+ UChar *s = p;
+ SKIPLN(s);
+ SKIPWS(s);
+ result->lines += SKIPWS(p);
+ if (p != s) {
+ parseAnchorish(p);
+ }
+ AST_CLOSE(p);
+ }
+ // CORRECTIONS
+ else if (IS_ICASE(p, "CORRECTIONS", "corrections")) {
+ AST_OPEN(BeforeSections);
+ p += 11;
+ in_before_sections = true;
+ in_section = false;
+ in_after_sections = false;
+ in_null_section = false;
+ UChar *s = p;
+ SKIPLN(s);
+ SKIPWS(s);
+ result->lines += SKIPWS(p);
+ if (p != s) {
+ parseAnchorish(p);
+ }
+ AST_CLOSE(p);
+ }
+ // BEFORE-SECTIONS
+ else if (IS_ICASE(p, "BEFORE-SECTIONS", "before-sections")) {
+ AST_OPEN(BeforeSections);
+ p += 15;
+ in_before_sections = true;
+ in_section = false;
+ in_after_sections = false;
+ in_null_section = false;
+ UChar *s = p;
+ SKIPLN(s);
+ SKIPWS(s);
+ result->lines += SKIPWS(p);
+ if (p != s) {
+ parseAnchorish(p);
+ }
+ AST_CLOSE(p);
+ }
+ // SECTION
+ else if (IS_ICASE(p, "SECTION", "section")) {
+ AST_OPEN(Section);
+ p += 7;
+ result->sections.push_back(result->lines);
+ in_before_sections = false;
+ in_section = true;
+ in_after_sections = false;
+ in_null_section = false;
+ UChar *s = p;
+ SKIPLN(s);
+ SKIPWS(s);
+ result->lines += SKIPWS(p);
+ if (p != s) {
+ parseAnchorish(p);
+ }
+ AST_CLOSE(p);
+ }
+ // CONSTRAINTS
+ else if (IS_ICASE(p, "CONSTRAINTS", "constraints")) {
+ AST_OPEN(Section);
+ p += 11;
+ result->sections.push_back(result->lines);
+ in_before_sections = false;
+ in_section = true;
+ in_after_sections = false;
+ in_null_section = false;
+ UChar *s = p;
+ SKIPLN(s);
+ SKIPWS(s);
+ result->lines += SKIPWS(p);
+ if (p != s) {
+ parseAnchorish(p);
+ }
+ AST_CLOSE(p);
+ }
+ // AFTER-SECTIONS
+ else if (IS_ICASE(p, "AFTER-SECTIONS", "after-sections")) {
+ AST_OPEN(AfterSections);
+ p += 14;
+ in_before_sections = false;
+ in_section = false;
+ in_after_sections = true;
+ in_null_section = false;
+ UChar *s = p;
+ SKIPLN(s);
+ SKIPWS(s);
+ result->lines += SKIPWS(p);
+ if (p != s) {
+ parseAnchorish(p);
+ }
+ AST_CLOSE(p);
+ }
+ // NULL-SECTION
+ else if (IS_ICASE(p, "NULL-SECTION", "null-section")) {
+ AST_OPEN(NullSection);
+ p += 12;
+ in_before_sections = false;
+ in_section = false;
+ in_after_sections = false;
+ in_null_section = true;
+ UChar *s = p;
+ SKIPLN(s);
+ SKIPWS(s);
+ result->lines += SKIPWS(p);
+ if (p != s) {
+ parseAnchorish(p);
+ }
+ AST_CLOSE(p);
+ }
+ // SUBREADINGS
+ else if (IS_ICASE(p, "SUBREADINGS", "subreadings")) {
+ AST_OPEN(SubReadings);
+ p += 11;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ result->lines += SKIPWS(p);
+
+ AST_OPEN(SubReadingsDirection);
+ if (p[0] == 'L' || p[0] == 'l') {
+ result->sub_readings_ltr = true;
+ }
+ else if (p[0] == 'R' || p[0] == 'r') {
+ result->sub_readings_ltr = false;
+ }
+ else {
+ error("%s: Error: Expected RTL or LTR on line %u near `%S`!\n", *p, p);
+ }
UChar *n = p;
- result->lines += SKIPTOWS(n, ';', true);
- result->static_sets.push_back(UString(p, n));
+ result->lines += SKIPTOWS(n, 0, true);
p = n;
+ AST_CLOSE(p);
+
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // OPTIONS
+ else if (IS_ICASE(p, "OPTIONS", "options")) {
+ AST_OPEN(Options);
+ p += 7;
+ result->lines += SKIPWS(p, '+');
+ if (p[0] != '+' || p[1] != '=') {
+ error("%s: Error: Encountered a %C before the expected += on line %u near `%S`!\n", *p, p);
+ }
+ p += 2;
result->lines += SKIPWS(p);
- }
- if (result->static_sets.empty()) {
- error("%s: Error: STATIC-SETS declared, but no definitions given, on line %u near `%S`!\n", p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
- }
- // ADDRELATIONS
- else if (ISCHR(*p,'A','a') && ISCHR(*(p+11),'S','s') && ISCHR(*(p+1),'D','d') && ISCHR(*(p+2),'D','d')
- && ISCHR(*(p+3),'R','r') && ISCHR(*(p+4),'E','e') && ISCHR(*(p+5),'L','l') && ISCHR(*(p+6),'A','a')
- && ISCHR(*(p+7),'T','t') && ISCHR(*(p+8),'I','i') && ISCHR(*(p+9),'O','o') && ISCHR(*(p+10),'N','n')
- && !ISSTRING(p, 11)) {
- parseRule(p, K_ADDRELATIONS);
- }
- // SETRELATIONS
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+11),'S','s') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'T','t')
- && ISCHR(*(p+3),'R','r') && ISCHR(*(p+4),'E','e') && ISCHR(*(p+5),'L','l') && ISCHR(*(p+6),'A','a')
- && ISCHR(*(p+7),'T','t') && ISCHR(*(p+8),'I','i') && ISCHR(*(p+9),'O','o') && ISCHR(*(p+10),'N','n')
- && !ISSTRING(p, 11)) {
- parseRule(p, K_SETRELATIONS);
- }
- // REMRELATIONS
- else if (ISCHR(*p,'R','r') && ISCHR(*(p+11),'S','s') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'M','m')
- && ISCHR(*(p+3),'R','r') && ISCHR(*(p+4),'E','e') && ISCHR(*(p+5),'L','l') && ISCHR(*(p+6),'A','a')
- && ISCHR(*(p+7),'T','t') && ISCHR(*(p+8),'I','i') && ISCHR(*(p+9),'O','o') && ISCHR(*(p+10),'N','n')
- && !ISSTRING(p, 11)) {
- parseRule(p, K_REMRELATIONS);
- }
- // ADDRELATION
- else if (ISCHR(*p,'A','a') && ISCHR(*(p+10),'N','n') && ISCHR(*(p+1),'D','d') && ISCHR(*(p+2),'D','d')
- && ISCHR(*(p+3),'R','r') && ISCHR(*(p+4),'E','e') && ISCHR(*(p+5),'L','l') && ISCHR(*(p+6),'A','a')
- && ISCHR(*(p+7),'T','t') && ISCHR(*(p+8),'I','i') && ISCHR(*(p+9),'O','o')
- && !ISSTRING(p, 10)) {
- parseRule(p, K_ADDRELATION);
- }
- // SETRELATION
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+10),'N','n') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'T','t')
- && ISCHR(*(p+3),'R','r') && ISCHR(*(p+4),'E','e') && ISCHR(*(p+5),'L','l') && ISCHR(*(p+6),'A','a')
- && ISCHR(*(p+7),'T','t') && ISCHR(*(p+8),'I','i') && ISCHR(*(p+9),'O','o')
- && !ISSTRING(p, 10)) {
- parseRule(p, K_SETRELATION);
- }
- // REMRELATION
- else if (ISCHR(*p,'R','r') && ISCHR(*(p+10),'N','n') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'M','m')
- && ISCHR(*(p+3),'R','r') && ISCHR(*(p+4),'E','e') && ISCHR(*(p+5),'L','l') && ISCHR(*(p+6),'A','a')
- && ISCHR(*(p+7),'T','t') && ISCHR(*(p+8),'I','i') && ISCHR(*(p+9),'O','o')
- && !ISSTRING(p, 10)) {
- parseRule(p, K_REMRELATION);
- }
- // SETVARIABLE
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+10),'E','e') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'T','t')
- && ISCHR(*(p+3),'V','v') && ISCHR(*(p+4),'A','a') && ISCHR(*(p+5),'R','r') && ISCHR(*(p+6),'I','i')
- && ISCHR(*(p+7),'A','a') && ISCHR(*(p+8),'B','b') && ISCHR(*(p+9),'L','l')
- && !ISSTRING(p, 10)) {
- parseRule(p, K_SETVARIABLE);
- }
- // REMVARIABLE
- else if (ISCHR(*p,'R','r') && ISCHR(*(p+10),'E','e') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'M','m')
- && ISCHR(*(p+3),'V','v') && ISCHR(*(p+4),'A','a') && ISCHR(*(p+5),'R','r') && ISCHR(*(p+6),'I','i')
- && ISCHR(*(p+7),'A','a') && ISCHR(*(p+8),'B','b') && ISCHR(*(p+9),'L','l')
- && !ISSTRING(p, 10)) {
- parseRule(p, K_REMVARIABLE);
- }
- // SETPARENT
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+8),'T','t') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'T','t')
- && ISCHR(*(p+3),'P','p') && ISCHR(*(p+4),'A','a') && ISCHR(*(p+5),'R','r') && ISCHR(*(p+6),'E','e')
- && ISCHR(*(p+7),'N','n')
- && !ISSTRING(p, 8)) {
- parseRule(p, K_SETPARENT);
- }
- // SETCHILD
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+7),'D','d') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'T','t')
- && ISCHR(*(p+3),'C','c') && ISCHR(*(p+4),'H','h') && ISCHR(*(p+5),'I','i') && ISCHR(*(p+6),'L','l')
- && !ISSTRING(p, 7)) {
- parseRule(p, K_SETCHILD);
- }
- // EXTERNAL
- else if (ISCHR(*p,'E','e') && ISCHR(*(p+7),'L','l') && ISCHR(*(p+1),'X','x') && ISCHR(*(p+2),'T','t')
- && ISCHR(*(p+3),'E','e') && ISCHR(*(p+4),'R','r') && ISCHR(*(p+5),'N','n') && ISCHR(*(p+6),'A','a')
- && !ISSTRING(p, 7)) {
- parseRule(p, K_EXTERNAL);
- }
- // REMCOHORT
- else if (ISCHR(*p,'R','r') && ISCHR(*(p+8),'T','t') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'M','m')
- && ISCHR(*(p+3),'C','c') && ISCHR(*(p+4),'O','o') && ISCHR(*(p+5),'H','h') && ISCHR(*(p+6),'O','o')
- && ISCHR(*(p+7),'R','r')
- && !ISSTRING(p, 8)) {
- parseRule(p, K_REMCOHORT);
- }
- // ADDCOHORT
- else if (ISCHR(*p,'A','a') && ISCHR(*(p+8),'T','t') && ISCHR(*(p+1),'D','d') && ISCHR(*(p+2),'D','d')
- && ISCHR(*(p+3),'C','c') && ISCHR(*(p+4),'O','o') && ISCHR(*(p+5),'H','h') && ISCHR(*(p+6),'O','o')
- && ISCHR(*(p+7),'R','r')
- && !ISSTRING(p, 8)) {
- parseRule(p, K_ADDCOHORT);
- }
- // SETS
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+3),'S','s') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'T','t')
- && !ISSTRING(p, 3)) {
- p += 4;
- }
- // LIST
- else if (ISCHR(*p,'L','l') && ISCHR(*(p+3),'T','t') && ISCHR(*(p+1),'I','i') && ISCHR(*(p+2),'S','s')
- && !ISSTRING(p, 3)) {
- Set *s = result->allocateSet();
- s->line = result->lines;
- p += 4;
- result->lines += SKIPWS(p);
- UChar *n = p;
- result->lines += SKIPTOWS(n, 0, true);
- while (n[-1] == ',' || n[-1] == ']') {
- --n;
- }
- ptrdiff_t c = n - p;
- u_strncpy(&gbuffers[0][0], p, c);
- gbuffers[0][c] = 0;
- s->setName(&gbuffers[0][0]);
- p = n;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
- }
- ++p;
- parseTagList(p, s);
- s->rehash();
- Set *tmp = result->getSet(s->hash);
- if (tmp) {
- if (verbosity_level > 0 && tmp->name[0] != '_' && tmp->name[1] != 'G' && tmp->name[2] != '_') {
- u_fprintf(ux_stderr, "%s: Warning: LIST %S was defined twice with the same contents: Lines %u and %u.\n", filebase, s->name.c_str(), tmp->line, s->line);
- u_fflush(ux_stderr);
- }
- }
- result->addSet(s);
- if (s->empty()) {
- error("%s: Error: LIST %S declared, but no definitions given, on line %u near `%S`!\n", s->name.c_str(), p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
- }
- // SET
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+2),'T','t') && ISCHR(*(p+1),'E','e')
- && !ISSTRING(p, 2)) {
- Set *s = result->allocateSet();
- s->line = result->lines;
- p += 3;
- result->lines += SKIPWS(p);
- UChar *n = p;
- result->lines += SKIPTOWS(n, 0, true);
- while (n[-1] == ',' || n[-1] == ']') {
- --n;
+ typedef std::pair<size_t, bool*> pairs_t;
+ pairs_t pairs[] = {
+ std::pair<size_t, bool*>(S_NO_ISETS, &no_isets),
+ std::pair<size_t, bool*>(S_NO_ITMPLS, &no_itmpls),
+ std::pair<size_t, bool*>(S_STRICT_WFORMS, &strict_wforms),
+ std::pair<size_t, bool*>(S_STRICT_BFORMS, &strict_bforms),
+ std::pair<size_t, bool*>(S_STRICT_SECOND, &strict_second),
+ };
+
+ while (*p != ';') {
+ bool found = false;
+ boost_foreach (pairs_t& pair, pairs) {
+ if (ux_simplecasecmp(p, stringbits[pair.first].getTerminatedBuffer(), stringbits[pair.first].length())) {
+ AST_OPEN(Option);
+ p += stringbits[pair.first].length();
+ AST_CLOSE(p);
+ *pair.second = true;
+ result->lines += SKIPWS(p);
+ found = true;
+ }
+ }
+ if (!found) {
+ error("%s: Error: Invalid option found on line %u near `%S`!\n", p);
+ }
+ }
+
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // STRICT-TAGS
+ else if (IS_ICASE(p, "STRICT-TAGS", "strict-tags")) {
+ AST_OPEN(StrictTags);
+ p += 11;
+ result->lines += SKIPWS(p, '+');
+ if (p[0] != '+' || p[1] != '=') {
+ error("%s: Error: Encountered a %C before the expected += on line %u near `%S`!\n", *p, p);
+ }
+ p += 2;
+ result->lines += SKIPWS(p);
+
+ uint32SortedVector tmp;
+ strict_tags.swap(tmp);
+ while (*p && *p != ';') {
+ AST_OPEN(Tag);
+ UChar *n = p;
+ if (*n == '"') {
+ n++;
+ SKIPTO_NOSPAN(n, '"');
+ if (*n != '"') {
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ }
+ }
+ result->lines += SKIPTOWS(n, ';', true);
+ ptrdiff_t c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ Tag *t = parseTag(&gbuffers[0][0], p);
+ tmp.insert(t->hash);
+ p = n;
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p);
+ }
+
+ if (tmp.empty()) {
+ error("%s: Error: STRICT-TAGS declared, but no definitions given, on line %u near `%S`!\n", p);
+ }
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ strict_tags.swap(tmp);
+ AST_CLOSE(p + 1);
}
- ptrdiff_t c = n - p;
- u_strncpy(&gbuffers[0][0], p, c);
- gbuffers[0][c] = 0;
- s->setName(&gbuffers[0][0]);
- uint32_t sh = hash_value(&gbuffers[0][0]);
- p = n;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ // ANCHOR
+ else if (IS_ICASE(p, "ANCHOR", "anchor")) {
+ AST_OPEN(Anchor);
+ p += 6;
+ result->lines += SKIPWS(p);
+ parseAnchorish(p);
+ AST_CLOSE(p);
}
- ++p;
+ // INCLUDE
+ else if (IS_ICASE(p, "INCLUDE", "include")) {
+ AST_OPEN(Include);
+ p += 7;
+ result->lines += SKIPWS(p);
+ AST_OPEN(IncludeFilename);
+ UChar *n = p;
+ result->lines += SKIPTOWS(n, 0, true);
+ ptrdiff_t c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ p = n;
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ }
+ AST_CLOSE(p + 1);
- swapper_false swp(no_isets, no_isets);
-
- parseSetInline(p, s);
- s->rehash();
- Set *tmp = result->getSet(s->hash);
- if (tmp) {
- if (verbosity_level > 0 && tmp->name[0] != '_' && tmp->name[1] != 'G' && tmp->name[2] != '_') {
- u_fprintf(ux_stderr, "%s: Warning: SET %S was defined twice with the same contents: Lines %u and %u.\n", filebase, s->name.c_str(), tmp->line, s->line);
- u_fflush(ux_stderr);
- }
- }
- else if (s->sets.size() == 1 && !(s->type & ST_TAG_UNIFY)) {
- tmp = result->getSet(s->sets.back());
- if (verbosity_level > 0) {
- u_fprintf(ux_stderr, "%s: Warning: Set %S on line %u aliased to %S on line %u.\n", filebase, s->name.c_str(), s->line, tmp->name.c_str(), tmp->line);
- u_fflush(ux_stderr);
- }
- result->maybe_used_sets.insert(tmp);
- result->set_alias[sh] = tmp->hash;
- result->destroySet(s);
- s = tmp;
- }
- result->addSet(s);
- if (s->empty()) {
- error("%s: Error: SET %S declared, but no definitions given, on line %u near `%S`!\n", s->name.c_str(), p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`! Probably caused by missing set operator.\n", p);
- }
- }
- // MAPPINGS
- else if (ISCHR(*p,'M','m') && ISCHR(*(p+7),'S','s') && ISCHR(*(p+1),'A','a') && ISCHR(*(p+2),'P','p')
- && ISCHR(*(p+3),'P','p') && ISCHR(*(p+4),'I','i') && ISCHR(*(p+5),'N','n') && ISCHR(*(p+6),'G','g')
- && !ISSTRING(p, 7)) {
- p += 8;
- in_before_sections = true;
- in_section = false;
- in_after_sections = false;
- in_null_section = false;
- UChar *s = p;
- SKIPLN(s);
- SKIPWS(s);
- result->lines += SKIPWS(p);
- if (p != s) {
- parseAnchorish(p);
+ UErrorCode err = U_ZERO_ERROR;
+ u_strToUTF8(&cbuffers[0][0], CG3_BUFFER_SIZE - 1, 0, &gbuffers[0][0], u_strlen(&gbuffers[0][0]), &err);
+
+ std::string abspath;
+ if (cbuffers[0][0] == '/') {
+ abspath = &cbuffers[0][0];
+ }
+ else {
+ abspath = ux_dirname(fname);
+ abspath += &cbuffers[0][0];
+ }
+
+ size_t grammar_size = 0;
+ struct stat _stat;
+ int error = stat(abspath.c_str(), &_stat);
+
+ if (error != 0) {
+ abspath = &cbuffers[0][0];
+ error = stat(abspath.c_str(), &_stat);
+ }
+
+ if (error != 0) {
+ u_fprintf(ux_stderr, "%s: Error: Cannot stat %s due to error %d - bailing out!\n", filebase, abspath.c_str(), error);
+ CG3Quit(1);
+ }
+ else {
+ grammar_size = static_cast<size_t>(_stat.st_size);
+ }
+
+ UFILE *grammar = u_fopen(abspath.c_str(), "rb", locale, codepage);
+ if (!grammar) {
+ u_fprintf(ux_stderr, "%s: Error: Error opening %s for reading!\n", filebase, abspath.c_str());
+ CG3Quit(1);
+ }
+ UChar32 bom = u_fgetcx(grammar);
+ if (bom != 0xfeff && bom != static_cast<UChar32>(0xffffffff)) {
+ u_fungetc(bom, grammar);
+ }
+
+ boost::shared_ptr<std::vector<UChar> > gbuf(new std::vector<UChar>(grammar_size * 2, 0));
+ grammarbufs.push_back(gbuf);
+ std::vector<UChar>& data = *gbuf.get();
+ uint32_t read = u_file_read(&data[4], grammar_size * 2, grammar);
+ u_fclose(grammar);
+ if (read >= grammar_size * 2 - 1) {
+ u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n", filebase);
+ CG3Quit(1);
+ }
+ data.resize(read + 4 + 1);
+
+ uint32_t olines = 0;
+ swapper<uint32_t> oswap(true, olines, result->lines);
+ const char *obase = 0;
+ swapper<const char*> bswap(true, obase, filebase);
+
+ parseFromUChar(&data[4], abspath.c_str());
}
- }
- // CORRECTIONS
- else if (ISCHR(*p,'C','c') && ISCHR(*(p+10),'S','s') && ISCHR(*(p+1),'O','o') && ISCHR(*(p+2),'R','r')
- && ISCHR(*(p+3),'R','r') && ISCHR(*(p+4),'E','e') && ISCHR(*(p+5),'C','c') && ISCHR(*(p+6),'T','t')
- && ISCHR(*(p+7),'I','i') && ISCHR(*(p+8),'O','o') && ISCHR(*(p+9),'N','n')
- && !ISSTRING(p, 10)) {
- p += 11;
- in_before_sections = true;
- in_section = false;
- in_after_sections = false;
- in_null_section = false;
- UChar *s = p;
- SKIPLN(s);
- SKIPWS(s);
- result->lines += SKIPWS(p);
- if (p != s) {
- parseAnchorish(p);
+ // IFF
+ else if (IS_ICASE(p, "IFF", "iff")) {
+ parseRule(p, K_IFF);
}
- }
- // BEFORE-SECTIONS
- else if (ISCHR(*p,'B','b') && ISCHR(*(p+14),'S','s') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'F','f')
- && ISCHR(*(p+3),'O','o') && ISCHR(*(p+4),'R','r') && ISCHR(*(p+5),'E','e') && ISCHR(*(p+6),'-','_')
- && ISCHR(*(p+7),'S','s') && ISCHR(*(p+8),'E','e') && ISCHR(*(p+9),'C','c') && ISCHR(*(p+10),'T','t')
- && ISCHR(*(p+11),'I','i') && ISCHR(*(p+12),'O','o') && ISCHR(*(p+13),'N','n')
- && !ISSTRING(p, 14)) {
- p += 15;
- in_before_sections = true;
- in_section = false;
- in_after_sections = false;
- in_null_section = false;
- UChar *s = p;
- SKIPLN(s);
- SKIPWS(s);
- result->lines += SKIPWS(p);
- if (p != s) {
- parseAnchorish(p);
+ // MAP
+ else if (IS_ICASE(p, "MAP", "map")) {
+ parseRule(p, K_MAP);
}
- }
- // SECTION
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+6),'N','n') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'C','c')
- && ISCHR(*(p+3),'T','t') && ISCHR(*(p+4),'I','i') && ISCHR(*(p+5),'O','o')
- && !ISSTRING(p, 6)) {
- p += 7;
- result->sections.push_back(result->lines);
- in_before_sections = false;
- in_section = true;
- in_after_sections = false;
- in_null_section = false;
- UChar *s = p;
- SKIPLN(s);
- SKIPWS(s);
- result->lines += SKIPWS(p);
- if (p != s) {
- parseAnchorish(p);
+ // ADD
+ else if (IS_ICASE(p, "ADD", "add")) {
+ parseRule(p, K_ADD);
}
- }
- // CONSTRAINTS
- else if (ISCHR(*p,'C','c') && ISCHR(*(p+10),'S','s') && ISCHR(*(p+1),'O','o') && ISCHR(*(p+2),'N','n')
- && ISCHR(*(p+3),'S','s') && ISCHR(*(p+4),'T','t') && ISCHR(*(p+5),'R','r') && ISCHR(*(p+6),'A','a')
- && ISCHR(*(p+7),'I','i') && ISCHR(*(p+8),'N','n') && ISCHR(*(p+9),'T','t')
- && !ISSTRING(p, 10)) {
- p += 11;
- result->sections.push_back(result->lines);
- in_before_sections = false;
- in_section = true;
- in_after_sections = false;
- in_null_section = false;
- UChar *s = p;
- SKIPLN(s);
- SKIPWS(s);
- result->lines += SKIPWS(p);
- if (p != s) {
- parseAnchorish(p);
+ // APPEND
+ else if (IS_ICASE(p, "APPEND", "append")) {
+ parseRule(p, K_APPEND);
}
- }
- // AFTER-SECTIONS
- else if (ISCHR(*p,'A','a') && ISCHR(*(p+13),'S','s') && ISCHR(*(p+1),'F','f') && ISCHR(*(p+2),'T','t')
- && ISCHR(*(p+3),'E','e') && ISCHR(*(p+4),'R','r') && ISCHR(*(p+5),'-','_')
- && ISCHR(*(p+6),'S','s') && ISCHR(*(p+7),'E','e') && ISCHR(*(p+8),'C','c') && ISCHR(*(p+9),'T','t')
- && ISCHR(*(p+10),'I','i') && ISCHR(*(p+11),'O','o') && ISCHR(*(p+12),'N','n')
- && !ISSTRING(p, 13)) {
- p += 14;
- in_before_sections = false;
- in_section = false;
- in_after_sections = true;
- in_null_section = false;
- UChar *s = p;
- SKIPLN(s);
- SKIPWS(s);
- result->lines += SKIPWS(p);
- if (p != s) {
- parseAnchorish(p);
+ // SELECT
+ else if (IS_ICASE(p, "SELECT", "select")) {
+ parseRule(p, K_SELECT);
}
- }
- // NULL-SECTION
- else if (ISCHR(*p,'N','n') && ISCHR(*(p+11),'N','n') && ISCHR(*(p+1),'U','u') && ISCHR(*(p+2),'L','l')
- && ISCHR(*(p+3),'L','l') && ISCHR(*(p+4),'-','-') && ISCHR(*(p+5),'S','s')
- && ISCHR(*(p+6),'E','e') && ISCHR(*(p+7),'C','c') && ISCHR(*(p+8),'T','t') && ISCHR(*(p+9),'I','i')
- && ISCHR(*(p+10),'O','o')
- && !ISSTRING(p, 11)) {
- p += 12;
- in_before_sections = false;
- in_section = false;
- in_after_sections = false;
- in_null_section = true;
- UChar *s = p;
- SKIPLN(s);
- SKIPWS(s);
- result->lines += SKIPWS(p);
- if (p != s) {
- parseAnchorish(p);
+ // REMOVE
+ else if (IS_ICASE(p, "REMOVE", "remove")) {
+ parseRule(p, K_REMOVE);
}
- }
- // SUBREADINGS
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+10),'S','s') && ISCHR(*(p+1),'U','u') && ISCHR(*(p+2),'B','b')
- && ISCHR(*(p+3),'R','r') && ISCHR(*(p+4),'E','e') && ISCHR(*(p+5),'A','a')
- && ISCHR(*(p+6),'D','d') && ISCHR(*(p+7),'I','i') && ISCHR(*(p+8),'N','n') && ISCHR(*(p+9),'G','g')
- && !ISSTRING(p, 10)) {
- p += 11;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ // REPLACE
+ else if (IS_ICASE(p, "REPLACE", "replace")) {
+ parseRule(p, K_REPLACE);
}
- ++p;
- result->lines += SKIPWS(p);
- if (p[0] == 'L' || p[0] == 'l') {
- result->sub_readings_ltr = true;
+ // DELIMIT
+ else if (IS_ICASE(p, "DELIMIT", "delimit")) {
+ parseRule(p, K_DELIMIT);
}
- else if (p[0] == 'R' || p[0] == 'r') {
- result->sub_readings_ltr = false;
+ // SUBSTITUTE
+ else if (IS_ICASE(p, "SUBSTITUTE", "substitute")) {
+ parseRule(p, K_SUBSTITUTE);
}
- else {
- error("%s: Error: Expected RTL or LTR on line %u near `%S`!\n", *p, p);
+ // COPY
+ else if (IS_ICASE(p, "COPY", "copy")) {
+ parseRule(p, K_COPY);
}
- UChar *n = p;
- result->lines += SKIPTOWS(n, 0, true);
- p = n;
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ // JUMP
+ else if (IS_ICASE(p, "JUMP", "jump")) {
+ parseRule(p, K_JUMP);
}
- }
- // OPTIONS
- else if (ISCHR(*p, 'O', 'o') && ISCHR(*(p + 6), 'S', 's') && ISCHR(*(p + 1), 'P', 'p') && ISCHR(*(p + 2), 'T', 't')
- && ISCHR(*(p + 3), 'I', 'i') && ISCHR(*(p + 4), 'O', 'o') && ISCHR(*(p + 5), 'N', 'n')
- && !ISSTRING(p, 6)) {
- p += 7;
- result->lines += SKIPWS(p, '+');
- if (p[0] != '+' || p[1] != '=') {
- error("%s: Error: Encountered a %C before the expected += on line %u near `%S`!\n", *p, p);
+ // MOVE
+ else if (IS_ICASE(p, "MOVE", "move")) {
+ parseRule(p, K_MOVE);
}
- p += 2;
- result->lines += SKIPWS(p);
-
- typedef std::pair<size_t, bool&> pairs_t;
- pairs_t pairs[] = {
- { S_NO_ISETS, no_isets },
- { S_NO_ITMPLS, no_itmpls },
- { S_STRICT_WFORMS, strict_wforms },
- { S_STRICT_BFORMS, strict_bforms },
- { S_STRICT_SECOND, strict_second },
- };
-
- while (*p != ';') {
- bool found = false;
- boost_foreach(pairs_t& pair, pairs) {
- if (ux_simplecasecmp(p, stringbits[pair.first].getTerminatedBuffer(), stringbits[pair.first].length())) {
- p += stringbits[pair.first].length();
- pair.second = true;
- result->lines += SKIPWS(p);
- found = true;
- }
- }
- if (!found) {
- error("%s: Error: Invalid option found on line %u near `%S`!\n", p);
- }
+ // SWITCH
+ else if (IS_ICASE(p, "SWITCH", "switch")) {
+ parseRule(p, K_SWITCH);
}
-
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
+ // EXECUTE
+ else if (IS_ICASE(p, "EXECUTE", "execute")) {
+ parseRule(p, K_EXECUTE);
}
- }
- // STRICT-TAGS
- else if (ISCHR(*p, 'S', 's') && ISCHR(*(p + 10), 'S', 's') && ISCHR(*(p + 1), 'T', 't') && ISCHR(*(p + 2), 'R', 'r')
- && ISCHR(*(p + 3), 'I', 'i') && ISCHR(*(p + 4), 'C', 'c') && ISCHR(*(p + 5), 'T', 't')
- && ISCHR(*(p + 6), '-', '-') && ISCHR(*(p + 7), 'T', 't') && ISCHR(*(p + 8), 'A', 'a') && ISCHR(*(p + 9), 'G', 'g')
- && !ISSTRING(p, 10)) {
- p += 11;
- result->lines += SKIPWS(p, '+');
- if (p[0] != '+' || p[1] != '=') {
- error("%s: Error: Encountered a %C before the expected += on line %u near `%S`!\n", *p, p);
+ // UNMAP
+ else if (IS_ICASE(p, "UNMAP", "unmap")) {
+ parseRule(p, K_UNMAP);
}
- p += 2;
- result->lines += SKIPWS(p);
-
- uint32SortedVector tmp;
- strict_tags.swap(tmp);
- while (*p && *p != ';') {
+ // TEMPLATE
+ else if (IS_ICASE(p, "TEMPLATE", "template")) {
+ AST_OPEN(Template);
+ size_t line = result->lines;
+ p += 8;
+ result->lines += SKIPWS(p);
+ AST_OPEN(TemplateName);
UChar *n = p;
- if (*n == '"') {
- n++;
- SKIPTO_NOSPAN(n, '"');
- if (*n != '"') {
- error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
- }
- }
- result->lines += SKIPTOWS(n, ';', true);
+ result->lines += SKIPTOWS(n, 0, true);
ptrdiff_t c = n - p;
u_strncpy(&gbuffers[0][0], p, c);
gbuffers[0][c] = 0;
- Tag *t = parseTag(&gbuffers[0][0], p);
- tmp.insert(t->hash);
+ UString name(&gbuffers[0][0]);
p = n;
- result->lines += SKIPWS(p);
- }
-
- if (tmp.empty()) {
- error("%s: Error: STRICT-TAGS declared, but no definitions given, on line %u near `%S`!\n", p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
- strict_tags.swap(tmp);
- }
- // ANCHOR
- else if (ISCHR(*p,'A','a') && ISCHR(*(p+5),'R','r') && ISCHR(*(p+1),'N','n') && ISCHR(*(p+2),'C','c')
- && ISCHR(*(p+3),'H','h') && ISCHR(*(p+4),'O','o')
- && !ISSTRING(p, 5)) {
- p += 6;
- result->lines += SKIPWS(p);
- parseAnchorish(p);
- }
- // INCLUDE
- else if (ISCHR(*p,'I','i') && ISCHR(*(p+6),'E','e') && ISCHR(*(p+1),'N','n') && ISCHR(*(p+2),'C','c')
- && ISCHR(*(p+3),'L','l') && ISCHR(*(p+4),'U','u') && ISCHR(*(p+5),'D','d')
- && !ISSTRING(p, 6)) {
- p += 7;
- result->lines += SKIPWS(p);
- UChar *n = p;
- result->lines += SKIPTOWS(n, 0, true);
- ptrdiff_t c = n - p;
- u_strncpy(&gbuffers[0][0], p, c);
- gbuffers[0][c] = 0;
- p = n;
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
-
- UErrorCode err = U_ZERO_ERROR;
- u_strToUTF8(&cbuffers[0][0], CG3_BUFFER_SIZE-1, 0, &gbuffers[0][0], u_strlen(&gbuffers[0][0]), &err);
-
- std::string abspath;
- if (cbuffers[0][0] == '/') {
- abspath = &cbuffers[0][0];
- }
- else {
- abspath = ux_dirname(fname);
- abspath += &cbuffers[0][0];
- }
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
- size_t grammar_size = 0;
- struct stat _stat;
- int error = stat(abspath.c_str(), &_stat);
+ swapper_false swp(no_itmpls, no_itmpls);
- if (error != 0) {
- abspath = &cbuffers[0][0];
- error = stat(abspath.c_str(), &_stat);
- }
+ ContextualTest *t = parseContextualTestList(p);
+ t->line = line;
+ result->addTemplate(t, name.c_str());
- if (error != 0) {
- u_fprintf(ux_stderr, "%s: Error: Cannot stat %s due to error %d - bailing out!\n", filebase, abspath.c_str(), error);
- CG3Quit(1);
- }
- else {
- grammar_size = static_cast<size_t>(_stat.st_size);
- }
-
- UFILE *grammar = u_fopen(abspath.c_str(), "rb", locale, codepage);
- if (!grammar) {
- u_fprintf(ux_stderr, "%s: Error: Error opening %s for reading!\n", filebase, abspath.c_str());
- CG3Quit(1);
- }
- UChar32 bom = u_fgetcx(grammar);
- if (bom != 0xfeff && bom != static_cast<UChar32>(0xffffffff)) {
- u_fungetc(bom, grammar);
- }
-
- std::vector<UChar> data(grammar_size*2, 0);
- uint32_t read = u_file_read(&data[4], grammar_size*2, grammar);
- u_fclose(grammar);
- if (read >= grammar_size*2-1) {
- u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n", filebase);
- CG3Quit(1);
- }
- data.resize(read+4+1);
-
- uint32_t olines = 0;
- swapper<uint32_t> oswap(true, olines, result->lines);
- const char *obase = 0;
- swapper<const char*> bswap(true, obase, filebase);
-
- parseFromUChar(&data[4], abspath.c_str());
- }
- // IFF
- else if (ISCHR(*p,'I','i') && ISCHR(*(p+2),'F','f') && ISCHR(*(p+1),'F','f')
- && !ISSTRING(p, 2)) {
- parseRule(p, K_IFF);
- }
- // MAP
- else if (ISCHR(*p,'M','m') && ISCHR(*(p+2),'P','p') && ISCHR(*(p+1),'A','a')
- && !ISSTRING(p, 2)) {
- parseRule(p, K_MAP);
- }
- // ADD
- else if (ISCHR(*p,'A','a') && ISCHR(*(p+2),'D','d') && ISCHR(*(p+1),'D','d')
- && !ISSTRING(p, 2)) {
- parseRule(p, K_ADD);
- }
- // APPEND
- else if (ISCHR(*p,'A','a') && ISCHR(*(p+5),'D','d') && ISCHR(*(p+1),'P','p') && ISCHR(*(p+2),'P','p')
- && ISCHR(*(p+3),'E','e') && ISCHR(*(p+4),'N','n')
- && !ISSTRING(p, 5)) {
- parseRule(p, K_APPEND);
- }
- // SELECT
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+5),'T','t') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'L','l')
- && ISCHR(*(p+3),'E','e') && ISCHR(*(p+4),'C','c')
- && !ISSTRING(p, 5)) {
- parseRule(p, K_SELECT);
- }
- // REMOVE
- else if (ISCHR(*p,'R','r') && ISCHR(*(p+5),'E','e') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'M','m')
- && ISCHR(*(p+3),'O','o') && ISCHR(*(p+4),'V','v')
- && !ISSTRING(p, 5)) {
- parseRule(p, K_REMOVE);
- }
- // REPLACE
- else if (ISCHR(*p,'R','r') && ISCHR(*(p+6),'E','e') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'P','p')
- && ISCHR(*(p+3),'L','l') && ISCHR(*(p+4),'A','a') && ISCHR(*(p+5),'C','c')
- && !ISSTRING(p, 6)) {
- parseRule(p, K_REPLACE);
- }
- // DELIMIT
- else if (ISCHR(*p,'D','d') && ISCHR(*(p+6),'T','t') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'L','l')
- && ISCHR(*(p+3),'I','i') && ISCHR(*(p+4),'M','m') && ISCHR(*(p+5),'I','i')
- && !ISSTRING(p, 6)) {
- parseRule(p, K_DELIMIT);
- }
- // SUBSTITUTE
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+9),'E','e') && ISCHR(*(p+1),'U','u') && ISCHR(*(p+2),'B','b')
- && ISCHR(*(p+3),'S','s') && ISCHR(*(p+4),'T','t') && ISCHR(*(p+5),'I','i') && ISCHR(*(p+6),'T','t')
- && ISCHR(*(p+7),'U','u') && ISCHR(*(p+8),'T','t')
- && !ISSTRING(p, 9)) {
- parseRule(p, K_SUBSTITUTE);
- }
- // COPY
- else if (ISCHR(*p,'C','c') && ISCHR(*(p+3),'Y','y') && ISCHR(*(p+1),'O','o') && ISCHR(*(p+2),'P','p')
- && !ISSTRING(p, 3)) {
- parseRule(p, K_COPY);
- }
- // JUMP
- else if (ISCHR(*p,'J','j') && ISCHR(*(p+3),'P','p') && ISCHR(*(p+1),'U','u') && ISCHR(*(p+2),'M','m')
- && !ISSTRING(p, 3)) {
- parseRule(p, K_JUMP);
- }
- // MOVE
- else if (ISCHR(*p,'M','m') && ISCHR(*(p+3),'E','e') && ISCHR(*(p+1),'O','o') && ISCHR(*(p+2),'V','v')
- && !ISSTRING(p, 3)) {
- parseRule(p, K_MOVE);
- }
- // SWITCH
- else if (ISCHR(*p,'S','s') && ISCHR(*(p+5),'H','h') && ISCHR(*(p+1),'W','w') && ISCHR(*(p+2),'I','i')
- && ISCHR(*(p+3),'T','t') && ISCHR(*(p+4),'C','c')
- && !ISSTRING(p, 5)) {
- parseRule(p, K_SWITCH);
- }
- // EXECUTE
- else if (ISCHR(*p,'E','e') && ISCHR(*(p+6),'E','e') && ISCHR(*(p+1),'X','x') && ISCHR(*(p+2),'E','e')
- && ISCHR(*(p+3),'C','c') && ISCHR(*(p+4),'U','u') && ISCHR(*(p+5),'T','t')
- && !ISSTRING(p, 6)) {
- parseRule(p, K_EXECUTE);
- }
- // UNMAP
- else if (ISCHR(*p,'U','u') && ISCHR(*(p+4),'P','p') && ISCHR(*(p+1),'N','n') && ISCHR(*(p+2),'M','m')
- && ISCHR(*(p+3),'A','a')
- && !ISSTRING(p, 4)) {
- parseRule(p, K_UNMAP);
- }
- // TEMPLATE
- else if (ISCHR(*p,'T','t') && ISCHR(*(p+7),'E','e') && ISCHR(*(p+1),'E','e') && ISCHR(*(p+2),'M','m')
- && ISCHR(*(p+3),'P','p') && ISCHR(*(p+4),'L','l') && ISCHR(*(p+5),'A','a') && ISCHR(*(p+6),'T','t')
- && !ISSTRING(p, 7)) {
- size_t line = result->lines;
- p += 8;
- result->lines += SKIPWS(p);
- UChar *n = p;
- result->lines += SKIPTOWS(n, 0, true);
- ptrdiff_t c = n - p;
- u_strncpy(&gbuffers[0][0], p, c);
- gbuffers[0][c] = 0;
- UString name(&gbuffers[0][0]);
- p = n;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
- }
- ++p;
-
- swapper_false swp(no_itmpls, no_itmpls);
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`! Probably caused by missing set operator.\n", p);
+ }
+ AST_CLOSE(p + 1);
+ }
+ // PARENTHESES
+ else if (IS_ICASE(p, "PARENTHESES", "parentheses")) {
+ AST_OPEN(Parentheses);
+ p += 11;
+ result->lines += SKIPWS(p, '=');
+ if (*p != '=') {
+ error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ result->lines += SKIPWS(p);
- ContextualTest *t = parseContextualTestList(p);
- t->line = line;
- result->addTemplate(t, name.c_str());
+ while (*p && *p != ';') {
+ ptrdiff_t c = 0;
+ Tag *left = 0;
+ Tag *right = 0;
+ UChar *n = p;
+ result->lines += SKIPTOWS(n, '(', true);
+ if (*n != '(') {
+ error("%s: Error: Encountered %C before the expected ( on line %u near `%S`!\n", *p, p);
+ }
+ AST_OPEN(CompositeTag);
+ cur_ast->b = n;
+ n++;
+ result->lines += SKIPWS(n);
+ p = n;
+ AST_OPEN(Tag);
+ if (*n == '"') {
+ n++;
+ SKIPTO_NOSPAN(n, '"');
+ if (*n != '"') {
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ }
+ }
+ result->lines += SKIPTOWS(n, ')', true);
+ AST_CLOSE(n);
+ c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ left = parseTag(&gbuffers[0][0], p);
+ result->lines += SKIPWS(n);
+ p = n;
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`! Probably caused by missing set operator.\n", p);
- }
- }
- // PARENTHESES
- else if (ISCHR(*p,'P','p') && ISCHR(*(p+10),'S','s') && ISCHR(*(p+1),'A','a') && ISCHR(*(p+2),'R','r')
- && ISCHR(*(p+3),'E','e') && ISCHR(*(p+4),'N','n') && ISCHR(*(p+5),'T','t') && ISCHR(*(p+6),'H','h')
- && ISCHR(*(p+7),'E','e') && ISCHR(*(p+8),'S','s') && ISCHR(*(p+9),'E','e')
- && !ISSTRING(p, 10)) {
- p += 11;
- result->lines += SKIPWS(p, '=');
- if (*p != '=') {
- error("%s: Error: Encountered a %C before the expected = on line %u near `%S`!\n", *p, p);
- }
- ++p;
- result->lines += SKIPWS(p);
+ if (*p == ')') {
+ error("%s: Error: Encountered ) before the expected Right tag on line %u near `%S`!\n", p);
+ }
- while (*p && *p != ';') {
- ptrdiff_t c = 0;
- Tag *left = 0;
- Tag *right = 0;
- UChar *n = p;
- result->lines += SKIPTOWS(n, '(', true);
- if (*n != '(') {
- error("%s: Error: Encountered %C before the expected ( on line %u near `%S`!\n", *p, p);
- }
- n++;
- result->lines += SKIPWS(n);
- p = n;
- if (*n == '"') {
- n++;
- SKIPTO_NOSPAN(n, '"');
- if (*n != '"') {
- error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ AST_OPEN(Tag);
+ if (*n == '"') {
+ n++;
+ SKIPTO_NOSPAN(n, '"');
+ if (*n != '"') {
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ }
}
- }
- result->lines += SKIPTOWS(n, ')', true);
- c = n - p;
- u_strncpy(&gbuffers[0][0], p, c);
- gbuffers[0][c] = 0;
- left = parseTag(&gbuffers[0][0], p);
- result->lines += SKIPWS(n);
- p = n;
+ result->lines += SKIPTOWS(n, ')', true);
+ AST_CLOSE(n);
+ c = n - p;
+ u_strncpy(&gbuffers[0][0], p, c);
+ gbuffers[0][c] = 0;
+ right = parseTag(&gbuffers[0][0], p);
+ result->lines += SKIPWS(n);
+ p = n;
- if (*p == ')') {
- error("%s: Error: Encountered ) before the expected Right tag on line %u near `%S`!\n", p);
- }
+ if (*p != ')') {
+ error("%s: Error: Encountered %C before the expected ) on line %u near `%S`!\n", *p, p);
+ }
+ ++p;
+ AST_CLOSE(p);
+ result->lines += SKIPWS(p);
- if (*n == '"') {
- n++;
- SKIPTO_NOSPAN(n, '"');
- if (*n != '"') {
- error("%s: Error: Expected closing \" on line %u near `%S`!\n", p);
+ if (left && right) {
+ result->parentheses[left->hash] = right->hash;
+ result->parentheses_reverse[right->hash] = left->hash;
}
}
- result->lines += SKIPTOWS(n, ')', true);
- c = n - p;
- u_strncpy(&gbuffers[0][0], p, c);
- gbuffers[0][c] = 0;
- right = parseTag(&gbuffers[0][0], p);
- result->lines += SKIPWS(n);
- p = n;
- if (*p != ')') {
- error("%s: Error: Encountered %C before the expected ) on line %u near `%S`!\n", *p, p);
+ if (result->parentheses.empty()) {
+ error("%s: Error: PARENTHESES declared, but no definitions given, on line %u near `%S`!\n", p);
}
- ++p;
- result->lines += SKIPWS(p);
-
- if (left && right) {
- result->parentheses[left->hash] = right->hash;
- result->parentheses_reverse[right->hash] = left->hash;
+ result->lines += SKIPWS(p, ';');
+ if (*p != ';') {
+ error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
}
+ AST_CLOSE(p + 1);
}
-
- if (result->parentheses.empty()) {
- error("%s: Error: PARENTHESES declared, but no definitions given, on line %u near `%S`!\n", p);
- }
- result->lines += SKIPWS(p, ';');
- if (*p != ';') {
- error("%s: Error: Expected closing ; before line %u near `%S`!\n", p);
- }
- }
- // END
- else if (ISCHR(*p,'E','e') && ISCHR(*(p+2),'D','d') && ISCHR(*(p+1),'N','n')) {
- if (ISNL(*(p-1)) || ISSPACE(*(p-1))) {
- if (*(p+3) == 0 || ISNL(*(p+3)) || ISSPACE(*(p+3))) {
- break;
+ // END
+ else if (IS_ICASE(p, "END", "end")) {
+ if (ISNL(*(p - 1)) || ISSPACE(*(p - 1))) {
+ if (*(p + 3) == 0 || ISNL(*(p + 3)) || ISSPACE(*(p + 3))) {
+ break;
+ }
}
+ ++p;
}
- ++p;
- }
- // No keyword found at this position, skip a character.
- else {
- UChar *n = p;
- if (*p == ';' || *p == '"') {
- if (*p == '"') {
- ++p;
- SKIPTO_NOSPAN(p, '"');
- if (*p != '"') {
- error("%s: Error: Expected closing \" on line %u near `%S`!\n", n);
+ // No keyword found at this position, skip a character.
+ else {
+ UChar *n = p;
+ if (*p == ';' || *p == '"') {
+ if (*p == '"') {
+ ++p;
+ SKIPTO_NOSPAN(p, '"');
+ if (*p != '"') {
+ error("%s: Error: Expected closing \" on line %u near `%S`!\n", n);
+ }
}
+ result->lines += SKIPTOWS(p);
}
- result->lines += SKIPTOWS(p);
- }
- if (*p && *p != ';' && *p != '"' && !ISNL(*p) && !ISSPACE(*p)) {
- error("%s: Error: Garbage data encountered on line %u near `%S`!\n", p);
- }
- if (ISNL(*p)) {
- result->lines += 1;
+ if (*p && *p != ';' && *p != '"' && !ISNL(*p) && !ISSPACE(*p)) {
+ error("%s: Error: Garbage data encountered on line %u near `%S`!\n", p);
+ }
+ if (ISNL(*p)) {
+ result->lines += 1;
+ }
+ ++p;
}
- ++p;
}
- }
- catch (int) {
- result->lines += SKIPLN(p);
- }
+ catch (int) {
+ result->lines += SKIPLN(p);
+ }
}
- return 0;
+ AST_CLOSE(p);
}
int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, const char *cpage) {
@@ -2320,14 +2360,16 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
}
// It reads into the buffer at offset 4 because certain functions may look back, so we need some nulls in front.
- std::vector<UChar> data(result->grammar_size*2, 0);
- uint32_t read = u_file_read(&data[4], result->grammar_size*2, grammar);
+ boost::shared_ptr<std::vector<UChar> > gbuf(new std::vector<UChar>(result->grammar_size * 2, 0));
+ grammarbufs.push_back(gbuf);
+ std::vector<UChar>& data = *gbuf.get();
+ uint32_t read = u_file_read(&data[4], result->grammar_size * 2, grammar);
u_fclose(grammar);
- if (read >= result->grammar_size*2-1) {
+ if (read >= result->grammar_size * 2 - 1) {
u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n", filebase);
CG3Quit(1);
}
- data.resize(read+4+1);
+ data.resize(read + 4 + 1);
result->addAnchor(keywords[K_START].getTerminatedBuffer(), 0, true);
@@ -2414,14 +2456,11 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
result->addSet(set_c);
}
- error = parseFromUChar(&data[4], filename);
- if (error) {
- return error;
- }
+ parseFromUChar(&data[4], filename);
- result->addAnchor(keywords[K_END].getTerminatedBuffer(), result->rule_by_number.size()-1, true);
+ result->addAnchor(keywords[K_END].getTerminatedBuffer(), result->rule_by_number.size() - 1, true);
- const_foreach (RuleVector, result->rule_by_number, it, it_end) {
+ foreach (it, result->rule_by_number) {
if ((*it)->name) {
result->addAnchor((*it)->name, (*it)->number, false);
}
@@ -2457,7 +2496,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
} while (*p);
}
- const_foreach (deferred_t, deferred_tmpls, it, it_end) {
+ foreach (it, deferred_tmpls) {
uint32_t cn = hash_value(it->second.second);
if (result->templates.find(cn) == result->templates.end()) {
u_fprintf(ux_stderr, "%s: Error: Unknown template '%S' referenced on line %u!\n", filebase, it->second.second.c_str(), it->second.first);
@@ -2467,8 +2506,8 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
it->first->tmpl = result->templates.find(cn)->second;
}
- bc::flat_map<uint32_t,uint32_t> sets;
- for (BOOST_AUTO(cntx, result->contexts.begin()); cntx != result->contexts.end(); ) {
+ bc::flat_map<uint32_t, uint32_t> sets;
+ for (BOOST_AUTO(cntx, result->contexts.begin()); cntx != result->contexts.end();) {
if (cntx->second->pos & POS_NUMERIC_BRANCH) {
ContextualTest *unsafec = cntx->second;
result->contexts.erase(cntx);
@@ -2480,7 +2519,7 @@ int TextualParser::parse_grammar_from_file(const char *fname, const char *loc, c
ContextualTest *safec = result->allocateContextualTest();
copy_cntx(unsafec, safec);
-
+
safec->pos |= POS_CAREFUL;
safec->target = sets[unsafec->target];
@@ -2531,7 +2570,7 @@ void TextualParser::setVerbosity(uint32_t level) {
void TextualParser::addRuleToGrammar(Rule *rule) {
if (in_section) {
- rule->section = (int32_t)(result->sections.size()-1);
+ rule->section = (int32_t)(result->sections.size() - 1);
result->addRule(rule);
}
else if (in_after_sections) {
@@ -2547,5 +2586,4 @@ void TextualParser::addRuleToGrammar(Rule *rule) {
result->addRule(rule);
}
}
-
}
diff --git a/src/TextualParser.hpp b/src/TextualParser.hpp
index dcb65c5..2785bfa 100644
--- a/src/TextualParser.hpp
+++ b/src/TextualParser.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -28,65 +28,67 @@
#include "sorted_vector.hpp"
namespace CG3 {
- class Rule;
- class Set;
- class Tag;
- class ContextualTest;
+class Rule;
+class Set;
+class Tag;
+class ContextualTest;
- class TextualParser : public IGrammarParser {
- public:
- TextualParser(Grammar& result, UFILE *ux_err);
+class TextualParser : public IGrammarParser {
+public:
+ TextualParser(Grammar& result, UFILE *ux_err, bool dump_ast = false);
- void setCompatible(bool compat);
- void setVerbosity(uint32_t level);
+ void setCompatible(bool compat);
+ void setVerbosity(uint32_t level);
+ void print_ast(UFILE *out);
- int parse_grammar_from_file(const char *filename, const char *locale, const char *codepage);
+ int parse_grammar_from_file(const char *filename, const char *locale, const char *codepage);
- void error(const char *str);
- void error(const char *str, UChar c);
- void error(const char *str, const UChar *p);
- void error(const char *str, UChar c, const UChar *p);
- void error(const char *str, const char *s, const UChar *p);
- void error(const char *str, const UChar *s, const UChar *p);
- void error(const char *str, const char *s, const UChar *S, const UChar *p);
- Tag *addTag(Tag *tag);
- Grammar *get_grammar() { return result; }
- const char *filebase;
- uint32SortedVector strict_tags;
+ void error(const char *str);
+ void error(const char *str, UChar c);
+ void error(const char *str, const UChar *p);
+ void error(const char *str, UChar c, const UChar *p);
+ void error(const char *str, const char *s, const UChar *p);
+ void error(const char *str, const UChar *s, const UChar *p);
+ void error(const char *str, const char *s, const UChar *S, const UChar *p);
+ Tag *addTag(Tag *tag);
+ Grammar *get_grammar() { return result; }
+ const char *filebase;
+ uint32SortedVector strict_tags;
- private:
- UChar nearbuf[32];
- uint32_t verbosity_level;
- uint32_t sets_counter;
- uint32_t seen_mapping_prefix;
- bool option_vislcg_compat;
- bool in_section, in_before_sections, in_after_sections, in_null_section;
- bool no_isets, no_itmpls, strict_wforms, strict_bforms, strict_second;
- const char *filename;
- const char *locale;
- const char *codepage;
+private:
+ UChar nearbuf[32];
+ uint32_t verbosity_level;
+ uint32_t sets_counter;
+ uint32_t seen_mapping_prefix;
+ bool option_vislcg_compat;
+ bool in_section, in_before_sections, in_after_sections, in_null_section;
+ bool no_isets, no_itmpls, strict_wforms, strict_bforms, strict_second;
+ const char *filename;
+ const char *locale;
+ const char *codepage;
- typedef stdext::hash_map<ContextualTest*,std::pair<size_t,UString> > deferred_t;
- deferred_t deferred_tmpls;
+ typedef stdext::hash_map<ContextualTest*, std::pair<size_t, UString> > deferred_t;
+ deferred_t deferred_tmpls;
+ std::vector<boost::shared_ptr<std::vector<UChar> > > grammarbufs;
- int parseFromUChar(UChar *input, const char *fname = 0);
- void addRuleToGrammar(Rule *rule);
+ void parseFromUChar(UChar *input, const char *fname = 0);
+ void addRuleToGrammar(Rule *rule);
- Tag *parseTag(const UChar *to, const UChar *p = 0);
- void parseTagList(UChar *& p, Set *s);
- Set *parseSet(const UChar *name, const UChar *p = 0);
- Set *parseSetInline(UChar *& p, Set *s = 0);
- Set *parseSetInlineWrapper(UChar *& p);
- void parseContextualTestPosition(UChar *& p, ContextualTest& t);
- ContextualTest *parseContextualTestList(UChar *& p, Rule *rule = 0);
- void parseContextualTests(UChar *& p, Rule *rule);
- void parseContextualDependencyTests(UChar *& p, Rule *rule);
- void parseRule(UChar *& p, KEYWORDS key);
- void parseAnchorish(UChar *& p);
+ Tag *parseTag(const UChar *to, const UChar *p = 0);
+ void parseTagList(UChar *& p, Set *s);
+ Set *parseSet(const UChar *name, const UChar *p = 0);
+ Set *parseSetInline(UChar *& p, Set *s = 0);
+ Set *parseSetInlineWrapper(UChar *& p);
+ void parseContextualTestPosition(UChar *& p, ContextualTest& t);
+ ContextualTest *parseContextualTestList(UChar *& p, Rule *rule = 0);
+ void parseContextualTests(UChar *& p, Rule *rule);
+ void parseContextualDependencyTests(UChar *& p, Rule *rule);
+ void parseRule(UChar *& p, KEYWORDS key);
+ void parseAnchorish(UChar *& p);
- int error_counter;
- void incErrorCount();
- };
+ int error_counter;
+ void incErrorCount();
+};
}
#endif
diff --git a/src/Window.cpp b/src/Window.cpp
index 625a783..396ee65 100644
--- a/src/Window.cpp
+++ b/src/Window.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,25 +26,25 @@
namespace CG3 {
-Window::Window(GrammarApplicator *p) :
-parent(p),
-cohort_counter(1),
-window_counter(0),
-window_span(0),
-current(0)
+Window::Window(GrammarApplicator *p)
+ : parent(p)
+ , cohort_counter(1)
+ , window_counter(0)
+ , window_span(0)
+ , current(0)
{
}
Window::~Window() {
SingleWindowCont::iterator iter;
- for (iter = previous.begin() ; iter != previous.end() ; iter++) {
+ for (iter = previous.begin(); iter != previous.end(); iter++) {
delete *iter;
}
delete current;
current = 0;
- for (iter = next.begin() ; iter != next.end() ; iter++) {
+ for (iter = next.begin(); iter != next.end(); iter++) {
delete *iter;
}
}
@@ -101,7 +101,7 @@ void Window::shuffleWindowsDown() {
void Window::rebuildSingleWindowLinks() {
SingleWindow *sWindow = 0;
- foreach (SingleWindowCont, previous, iter, iter_end) {
+ foreach (iter, previous) {
(*iter)->previous = sWindow;
if (sWindow) {
sWindow->next = *iter;
@@ -117,7 +117,7 @@ void Window::rebuildSingleWindowLinks() {
sWindow = current;
}
- foreach (SingleWindowCont, next, iter, iter_end) {
+ foreach (iter, next) {
(*iter)->previous = sWindow;
if (sWindow) {
sWindow->next = *iter;
@@ -144,7 +144,7 @@ void Window::rebuildCohortLinks() {
Cohort *prev = 0;
while (sWindow) {
- foreach (CohortVector, sWindow->cohorts, citer, citer_end) {
+ foreach (citer, sWindow->cohorts) {
(*citer)->prev = prev;
(*citer)->next = 0;
if (prev) {
@@ -155,5 +155,4 @@ void Window::rebuildCohortLinks() {
sWindow = sWindow->next;
}
}
-
}
diff --git a/src/Window.hpp b/src/Window.hpp
index a31333e..fec65af 100644
--- a/src/Window.hpp
+++ b/src/Window.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,39 +26,38 @@
#include "stdafx.hpp"
namespace CG3 {
- class GrammarApplicator;
- class Cohort;
- class SingleWindow;
-
- typedef std::vector<SingleWindow*> SingleWindowCont;
-
- class Window {
- public:
- GrammarApplicator *parent;
- uint32_t cohort_counter;
- uint32_t window_counter;
- uint32_t window_span;
-
- std::map<uint32_t, Cohort*> cohort_map;
- uint32FlatHashMap dep_map;
- std::map<uint32_t, Cohort*> dep_window;
- uint32FlatHashMap relation_map;
-
- SingleWindowCont previous;
- SingleWindow *current;
- SingleWindowCont next;
-
- Window(GrammarApplicator *p);
- ~Window();
-
- SingleWindow *allocSingleWindow();
- SingleWindow *allocPushSingleWindow();
- SingleWindow *allocAppendSingleWindow();
- void shuffleWindowsDown();
- void rebuildSingleWindowLinks();
- void rebuildCohortLinks();
- };
-
+class GrammarApplicator;
+class Cohort;
+class SingleWindow;
+
+typedef std::vector<SingleWindow*> SingleWindowCont;
+
+class Window {
+public:
+ GrammarApplicator *parent;
+ uint32_t cohort_counter;
+ uint32_t window_counter;
+ uint32_t window_span;
+
+ std::map<uint32_t, Cohort*> cohort_map;
+ uint32FlatHashMap dep_map;
+ std::map<uint32_t, Cohort*> dep_window;
+ uint32FlatHashMap relation_map;
+
+ SingleWindowCont previous;
+ SingleWindow *current;
+ SingleWindowCont next;
+
+ Window(GrammarApplicator *p);
+ ~Window();
+
+ SingleWindow *allocSingleWindow();
+ SingleWindow *allocPushSingleWindow();
+ SingleWindow *allocAppendSingleWindow();
+ void shuffleWindowsDown();
+ void rebuildSingleWindowLinks();
+ void rebuildCohortLinks();
+};
}
#endif
diff --git a/src/all_cg_conv.cpp b/src/all_cg_conv.cpp
index 3ac923f..5dcadff 100644
--- a/src/all_cg_conv.cpp
+++ b/src/all_cg_conv.cpp
@@ -26,6 +26,7 @@
#include "Reading.cpp"
#include "FormatConverter.cpp"
#include "ApertiumApplicator.cpp"
+#include "MatxinApplicator.cpp"
#include "NicelineApplicator.cpp"
#include "PlaintextApplicator.cpp"
#include "FSTApplicator.cpp"
diff --git a/src/all_cg_proc.cpp b/src/all_cg_proc.cpp
index 5276c2e..f8eec22 100644
--- a/src/all_cg_proc.cpp
+++ b/src/all_cg_proc.cpp
@@ -25,4 +25,5 @@
#include "Cohort.cpp"
#include "Reading.cpp"
#include "ApertiumApplicator.cpp"
+#include "MatxinApplicator.cpp"
#include "cg_proc.cpp"
diff --git a/src/bloomish.hpp b/src/bloomish.hpp
index 9c8d0e7..c109fae 100644
--- a/src/bloomish.hpp
+++ b/src/bloomish.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -38,11 +38,11 @@ public:
}
bloomish(const bloomish<Cont>& other) {
- std::copy(other.value, other.value+4, &value[0]);
+ std::copy(other.value, other.value + 4, &value[0]);
}
void clear() {
- std::fill(value, value+4, 0);
+ std::fill(value, value + 4, 0);
}
void insert(const Cont& v) {
@@ -75,7 +75,6 @@ public:
};
typedef bloomish<uint32_t> uint32Bloomish;
-
}
#endif
diff --git a/src/cg-mwesplit.cpp b/src/cg-mwesplit.cpp
new file mode 100644
index 0000000..ee52cb6
--- /dev/null
+++ b/src/cg-mwesplit.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2007-2016, GrammarSoft ApS
+ * Developed by Tino Didriksen <mail at tinodidriksen.com>
+ * Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+ *
+ * This file is part of VISL CG-3
+ *
+ * VISL CG-3 is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * VISL CG-3 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "stdafx.hpp"
+#include "Grammar.hpp"
+#include "MweSplitApplicator.hpp"
+
+#include "version.hpp"
+
+using CG3::CG3Quit;
+
+#include <uoptions.h>
+namespace Options {
+enum OPTIONS {
+ HELP1,
+ HELP2,
+ NUM_OPTIONS,
+};
+UOption options[] = {
+ UOPTION_DEF_D("help", 'h', UOPT_NO_ARG, "shows this help"),
+ UOPTION_DEF_D("?", '?', UOPT_NO_ARG, "shows this help"),
+};
+}
+using namespace Options;
+
+int main(int argc, char **argv) {
+ UErrorCode status = U_ZERO_ERROR;
+ UFILE *ux_stdin = 0;
+ UFILE *ux_stdout = 0;
+ UFILE *ux_stderr = 0;
+
+ /* Initialize ICU */
+ u_init(&status);
+ if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
+ std::cerr << "Error: Cannot initialize ICU. Status = " << u_errorName(status) << std::endl;
+ CG3Quit(1);
+ }
+
+ U_MAIN_INIT_ARGS(argc, argv);
+ argc = u_parseArgs(argc, argv, NUM_OPTIONS, options);
+
+ if (argc < 0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
+ FILE *out = (argc < 0) ? stderr : stdout;
+ fprintf(out, "Usage: cg-mwesplit [OPTIONS]\n");
+ fprintf(out, "\n");
+ fprintf(out, "Options:\n");
+
+ size_t longest = 0;
+ for (uint32_t i = 0; i < NUM_OPTIONS; i++) {
+ if (options[i].description) {
+ size_t len = strlen(options[i].longName);
+ longest = std::max(longest, len);
+ }
+ }
+ for (uint32_t i = 0; i < NUM_OPTIONS; i++) {
+ if (options[i].description && options[i].description[0] != '!') {
+ fprintf(out, " ");
+ if (options[i].shortName) {
+ fprintf(out, "-%c,", options[i].shortName);
+ }
+ else {
+ fprintf(out, " ");
+ }
+ fprintf(out, " --%s", options[i].longName);
+ size_t ldiff = longest - strlen(options[i].longName);
+ while (ldiff--) {
+ fprintf(out, " ");
+ }
+ fprintf(out, " %s", options[i].description);
+ fprintf(out, "\n");
+ }
+ }
+
+ return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+ }
+
+ ucnv_setDefaultName("UTF-8");
+ const char *codepage_default = ucnv_getDefaultName();
+ uloc_setDefault("en_US_POSIX", &status);
+ const char *locale_default = uloc_getDefault();
+
+ ux_stdin = u_finit(stdin, locale_default, codepage_default);
+ ux_stdout = u_finit(stdout, locale_default, codepage_default);
+ ux_stderr = u_finit(stderr, locale_default, codepage_default);
+
+ CG3::Grammar grammar;
+
+ grammar.ux_stderr = ux_stderr;
+ grammar.allocateDummySet();
+ grammar.delimiters = grammar.allocateSet();
+ grammar.addTagToSet(grammar.allocateTag(CG3::stringbits[0].getTerminatedBuffer()), grammar.delimiters);
+ grammar.reindex();
+
+ CG3::MweSplitApplicator applicator(ux_stderr);
+ applicator.setGrammar(&grammar);
+
+ boost::scoped_ptr<CG3::istream> instream;
+
+ instream.reset(new CG3::istream(ux_stdin));
+
+ applicator.is_conv = true;
+ applicator.verbosity_level = 0;
+ applicator.runGrammarOnText(*instream.get(), ux_stdout);
+
+ u_fclose(ux_stdout);
+ u_fclose(ux_stderr);
+
+ u_cleanup();
+}
diff --git a/src/cg-relabel.cpp b/src/cg-relabel.cpp
new file mode 100644
index 0000000..841e3d5
--- /dev/null
+++ b/src/cg-relabel.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2007-2016, GrammarSoft ApS
+ * Developed by Tino Didriksen <mail at tinodidriksen.com>
+ * Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
+ *
+ * This file is part of VISL CG-3
+ *
+ * VISL CG-3 is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * VISL CG-3 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "stdafx.hpp"
+
+#include "Grammar.hpp"
+#include "GrammarWriter.hpp"
+#include "BinaryGrammar.hpp"
+#include "TextualParser.hpp"
+#include "Relabeller.hpp"
+
+#ifndef _WIN32
+#include <libgen.h>
+#endif
+
+#include "version.hpp"
+
+using CG3::CG3Quit;
+
+void endProgram(char *name) {
+ if (name != NULL) {
+ fprintf(stdout, "VISL CG-3 Relabeller version %u.%u.%u.%u\n",
+ CG3_VERSION_MAJOR, CG3_VERSION_MINOR, CG3_VERSION_PATCH, CG3_REVISION);
+ std::cout << basename(name) << ": relabel a binary grammar using a relabelling file" << std::endl;
+ std::cout << "USAGE: " << basename(name) << " input_grammar_file relabel_rule_file output_grammar_file" << std::endl;
+ }
+ exit(EXIT_FAILURE);
+}
+
+
+// like libcg3's, but with a non-void grammar …
+CG3::Grammar *cg3_grammar_load(const char *filename, UFILE *ux_stdout, UFILE *ux_stderr, bool require_binary = false) {
+ using namespace CG3;
+ std::ifstream input(filename, std::ios::binary);
+ if (!input) {
+ u_fprintf(ux_stderr, "Error: Error opening %s for reading!\n", filename);
+ return 0;
+ }
+ if (!input.read(&cbuffers[0][0], 4)) {
+ u_fprintf(ux_stderr, "Error: Error reading first 4 bytes from grammar!\n");
+ return 0;
+ }
+ input.close();
+
+ Grammar *grammar = new Grammar;
+ grammar->ux_stderr = ux_stderr;
+ grammar->ux_stdout = ux_stdout;
+
+ boost::scoped_ptr<IGrammarParser> parser;
+
+ if (cbuffers[0][0] == 'C' && cbuffers[0][1] == 'G' && cbuffers[0][2] == '3' && cbuffers[0][3] == 'B') {
+ parser.reset(new BinaryGrammar(*grammar, ux_stderr));
+ }
+ else {
+ if (require_binary) {
+ u_fprintf(ux_stderr, "Error: Text grammar detected -- to compile this grammar, use `cg-comp'\n");
+ CG3Quit(1);
+ }
+ parser.reset(new TextualParser(*grammar, ux_stderr));
+ }
+ if (parser->parse_grammar_from_file(filename, uloc_getDefault(), ucnv_getDefaultName())) {
+ u_fprintf(ux_stderr, "Error: Grammar could not be parsed!\n");
+ return 0;
+ }
+
+ grammar->reindex();
+
+ return grammar;
+}
+
+int main(int argc, char *argv[]) {
+ UFILE *ux_stdout = 0;
+ UFILE *ux_stderr = 0;
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (argc != 4) {
+ endProgram(argv[0]);
+ }
+
+ /* Initialize ICU */
+ u_init(&status);
+ if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
+ std::cerr << "Error: Cannot initialize ICU. Status = " << u_errorName(status) << std::endl;
+ CG3Quit(1);
+ }
+ status = U_ZERO_ERROR;
+
+ ucnv_setDefaultName("UTF-8");
+ const char *codepage_default = ucnv_getDefaultName();
+ uloc_setDefault("en_US_POSIX", &status);
+ const char *locale_default = uloc_getDefault();
+
+ ux_stdout = u_finit(stdout, locale_default, codepage_default);
+ ux_stderr = u_finit(stderr, locale_default, codepage_default);
+
+ CG3::Grammar *grammar = cg3_grammar_load(argv[1], ux_stdout, ux_stderr, true);
+ CG3::Grammar *relabel_grammar = cg3_grammar_load(argv[2], ux_stdout, ux_stderr);
+
+ CG3::Relabeller relabeller(*grammar, *relabel_grammar, ux_stderr);
+ relabeller.relabel();
+
+ FILE *gout = fopen(argv[3], "wb");
+ if (gout) {
+ CG3::BinaryGrammar writer(*grammar, ux_stderr);
+ writer.writeBinaryGrammar(gout);
+ }
+ else {
+ std::cerr << "Could not write grammar to " << argv[3] << std::endl;
+ }
+
+ delete relabel_grammar;
+ relabel_grammar = 0;
+ delete grammar;
+ grammar = 0;
+
+ u_fclose(ux_stderr);
+ u_fclose(ux_stdout);
+
+ u_cleanup();
+
+ return status;
+}
diff --git a/src/cg3.h b/src/cg3.h
index 9329ca6..bec2cd5 100644
--- a/src/cg3.h
+++ b/src/cg3.h
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
diff --git a/src/cg_comp.cpp b/src/cg_comp.cpp
index a19cc32..fa99141 100644
--- a/src/cg_comp.cpp
+++ b/src/cg_comp.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2015, GrammarSoft ApS
+ * Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -37,7 +37,7 @@ using CG3::CG3Quit;
void endProgram(char *name) {
if (name != NULL) {
fprintf(stdout, "VISL CG-3 Compiler version %u.%u.%u.%u\n",
- CG3_VERSION_MAJOR, CG3_VERSION_MINOR, CG3_VERSION_PATCH, CG3_REVISION);
+ CG3_VERSION_MAJOR, CG3_VERSION_MINOR, CG3_VERSION_PATCH, CG3_REVISION);
std::cout << basename(name) << ": compile a binary grammar from a text file" << std::endl;
std::cout << "USAGE: " << basename(name) << " grammar_file output_file" << std::endl;
}
diff --git a/src/cg_conv.cpp b/src/cg_conv.cpp
index 0513aa9..d2b6b00 100644
--- a/src/cg_conv.cpp
+++ b/src/cg_conv.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -53,13 +53,13 @@ int main(int argc, char *argv[]) {
fprintf(out, "Options:\n");
size_t longest = 0;
- for (uint32_t i=0 ; i<NUM_OPTIONS ; i++) {
+ for (uint32_t i = 0; i < NUM_OPTIONS; i++) {
if (options[i].description) {
size_t len = strlen(options[i].longName);
longest = std::max(longest, len);
}
}
- for (uint32_t i=0 ; i<NUM_OPTIONS ; i++) {
+ for (uint32_t i = 0; i < NUM_OPTIONS; i++) {
if (options[i].description && options[i].description[0] != '!') {
fprintf(out, " ");
if (options[i].shortName) {
@@ -136,7 +136,7 @@ int main(int argc, char *argv[]) {
URegularExpression *rx = 0;
for (;;) {
- rx = uregex_openC("^\"<[^>]+>\".*?^\\s+\"[^\"]+\"", UREGEX_DOTALL|UREGEX_MULTILINE, 0, &status);
+ rx = uregex_openC("^\"<[^>]+>\".*?^\\s+\"[^\"]+\"", UREGEX_DOTALL | UREGEX_MULTILINE, 0, &status);
uregex_setText(rx, buffer.c_str(), buffer.size(), &status);
if (uregex_find(rx, -1, &status)) {
fmt = CG3::FMT_CG;
@@ -144,7 +144,7 @@ int main(int argc, char *argv[]) {
}
uregex_close(rx);
- rx = uregex_openC("^\\S+ *\t *\\[\\S+\\]", UREGEX_DOTALL|UREGEX_MULTILINE, 0, &status);
+ rx = uregex_openC("^\\S+ *\t *\\[\\S+\\]", UREGEX_DOTALL | UREGEX_MULTILINE, 0, &status);
uregex_setText(rx, buffer.c_str(), buffer.size(), &status);
if (uregex_find(rx, -1, &status)) {
fmt = CG3::FMT_NICELINE;
@@ -160,7 +160,7 @@ int main(int argc, char *argv[]) {
}
uregex_close(rx);
- rx = uregex_openC("\\^[^/]+(/[^<]+(<[^>]+>)+)+\\$", UREGEX_DOTALL|UREGEX_MULTILINE, 0, &status);
+ rx = uregex_openC("\\^[^/]+(/[^<]+(<[^>]+>)+)+\\$", UREGEX_DOTALL | UREGEX_MULTILINE, 0, &status);
uregex_setText(rx, buffer.c_str(), buffer.size(), &status);
if (uregex_find(rx, -1, &status)) {
fmt = CG3::FMT_APERTIUM;
@@ -168,7 +168,7 @@ int main(int argc, char *argv[]) {
}
uregex_close(rx);
- rx = uregex_openC("^\\S+\t\\S+(\\+\\S+)+$", UREGEX_DOTALL|UREGEX_MULTILINE, 0, &status);
+ rx = uregex_openC("^\\S+\t\\S+(\\+\\S+)+$", UREGEX_DOTALL | UREGEX_MULTILINE, 0, &status);
uregex_setText(rx, buffer.c_str(), buffer.size(), &status);
if (uregex_find(rx, -1, &status)) {
fmt = CG3::FMT_FST;
@@ -193,7 +193,7 @@ int main(int argc, char *argv[]) {
}
if (options[MAPPING_PREFIX].doesOccur) {
size_t sn = strlen(options[MAPPING_PREFIX].value);
- CG3::UString buf(sn*3, 0);
+ CG3::UString buf(sn * 3, 0);
UConverter *conv = ucnv_open(codepage_default, &status);
ucnv_toUChars(conv, &buf[0], buf.size(), options[MAPPING_PREFIX].value, sn, &status);
ucnv_close(conv);
@@ -201,7 +201,7 @@ int main(int argc, char *argv[]) {
}
if (options[SUB_DELIMITER].doesOccur) {
size_t sn = strlen(options[SUB_DELIMITER].value);
- applicator.sub_delims.resize(sn*2);
+ applicator.sub_delims.resize(sn * 2);
UConverter *conv = ucnv_open(codepage_default, &status);
sn = ucnv_toUChars(conv, &applicator.sub_delims[0], applicator.sub_delims.size(), options[SUB_DELIMITER].value, sn, &status);
applicator.sub_delims.resize(sn);
diff --git a/src/cg_proc.cpp b/src/cg_proc.cpp
index 195dcc1..e2917eb 100644
--- a/src/cg_proc.cpp
+++ b/src/cg_proc.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -21,8 +21,10 @@
#include "stdafx.hpp"
#include "Grammar.hpp"
+#include "TextualParser.hpp"
#include "BinaryGrammar.hpp"
#include "ApertiumApplicator.hpp"
+#include "MatxinApplicator.hpp"
#include "GrammarApplicator.hpp"
#include <getopt.h>
@@ -37,8 +39,8 @@ using CG3::CG3Quit;
void endProgram(char *name) {
using namespace std;
fprintf(stdout, "VISL CG-3 Disambiguator version %u.%u.%u.%u\n",
- CG3_VERSION_MAJOR, CG3_VERSION_MINOR, CG3_VERSION_PATCH, CG3_REVISION);
- cout << basename(name) <<": process a stream with a constraint grammar" << endl;
+ CG3_VERSION_MAJOR, CG3_VERSION_MINOR, CG3_VERSION_PATCH, CG3_REVISION);
+ cout << basename(name) << ": process a stream with a constraint grammar" << endl;
cout << "USAGE: " << basename(name) << " [-t] [-s] [-d] [-r rule] grammar_file [input_file [output_file]]" << endl;
cout << "Options:" << endl;
#if HAVE_GETOPT_LONG
@@ -61,15 +63,15 @@ void endProgram(char *name) {
cout << " -d: morphological disambiguation (default behaviour)" << endl;
cout << " -s: specify number of sections to process" << endl;
cout << " -f: set the format of the I/O stream to NUM," << endl;
- cout << " where `0' is VISL format and `1' is " << endl;
- cout << " Apertium format (default: 1)" << endl;
+ cout << " where `0' is VISL format, `1' is " << endl;
+ cout << " Apertium format and `2' is Matxin (default: 1)" << endl;
cout << " -r: run only the named rule" << endl;
cout << " -t: print debug output on stderr" << endl;
cout << " -w: enforce surface case on lemma/baseform " << endl;
cout << " (to work with -w option of lt-proc)" << endl;
cout << " -n: do not print out the word form of each cohort" << endl;
cout << " -1: only output the first analysis if ambiguity remains" << endl;
- cout << " -z: flush output on the null character" << endl;
+ cout << " -z: flush output on the null character" << endl;
cout << " -v: version" << endl;
cout << " -h: show this help" << endl;
@@ -86,7 +88,7 @@ int main(int argc, char *argv[]) {
int sections = 0;
int stream_format = 1;
bool null_flush = false;
- char* single_rule = 0;
+ char *single_rule = 0;
UErrorCode status = U_ZERO_ERROR;
UFILE *ux_stdin = 0;
@@ -94,7 +96,7 @@ int main(int argc, char *argv[]) {
UFILE *ux_stderr = 0;
#if HAVE_GETOPT_LONG
- static struct option long_options[] = {
+ struct option long_options[] = {
{"disambiguation", 0, 0, 'd'},
{"sections", 0, 0, 's'},
{"stream-format", required_argument, 0, 'f'},
@@ -105,7 +107,7 @@ int main(int argc, char *argv[]) {
{"version", 0, 0, 'v'},
{"first", 0, 0, '1'},
{"help", 0, 0, 'h'},
- {"null-flush", 0, 0, 'z'}
+ {"null-flush", 0, 0, 'z'},
};
#endif
@@ -122,62 +124,60 @@ int main(int argc, char *argv[]) {
break;
}
- switch(c) {
-
- case 'd':
- if (cmd == 0) {
- cmd = c;
- }
- else {
- endProgram(argv[0]);
- }
- break;
-
- case 'f':
- stream_format = atoi(optarg);
- break;
-
- case 't':
- trace = true;
- break;
-
- case 'r':
- {
- // strdup() is Posix
- size_t len = strlen(optarg);
- single_rule = new char[len];
- std::copy(optarg, optarg+len, single_rule);
- break;
- }
- case 's':
- sections = atoi(optarg);
- break;
-
- case 'n':
- print_word_forms = false;
- break;
-
- case '1':
- only_first = true;
- break;
-
- case 'w':
- wordform_case = true;
- break;
-
- case 'v':
- fprintf(stdout, "VISL CG-3 Disambiguator version %u.%u.%u.%u\n",
- CG3_VERSION_MAJOR, CG3_VERSION_MINOR, CG3_VERSION_PATCH, CG3_REVISION);
-
- exit(EXIT_SUCCESS);
- break;
- case 'z':
- null_flush = true;
- break;
- case 'h':
- default:
+ switch (c) {
+ case 'd':
+ if (cmd == 0) {
+ cmd = c;
+ }
+ else {
endProgram(argv[0]);
- break;
+ }
+ break;
+
+ case 'f':
+ stream_format = atoi(optarg);
+ break;
+
+ case 't':
+ trace = true;
+ break;
+
+ case 'r': {
+ // strdup() is Posix
+ size_t len = strlen(optarg) + 1;
+ single_rule = new char[len];
+ std::copy(optarg, optarg + len, single_rule);
+ break;
+ }
+ case 's':
+ sections = atoi(optarg);
+ break;
+
+ case 'n':
+ print_word_forms = false;
+ break;
+
+ case '1':
+ only_first = true;
+ break;
+
+ case 'w':
+ wordform_case = true;
+ break;
+
+ case 'v':
+ fprintf(stdout, "VISL CG-3 Disambiguator version %u.%u.%u.%u\n",
+ CG3_VERSION_MAJOR, CG3_VERSION_MINOR, CG3_VERSION_PATCH, CG3_REVISION);
+
+ exit(EXIT_SUCCESS);
+ break;
+ case 'z':
+ null_flush = true;
+ break;
+ case 'h':
+ default:
+ endProgram(argv[0]);
+ break;
}
}
@@ -228,14 +228,14 @@ int main(int argc, char *argv[]) {
}
if (optind <= (argc - 2)) {
u_fclose(ux_stdin);
- ux_stdin = u_fopen(argv[optind+1], "rb", locale_default, codepage_default);
+ ux_stdin = u_fopen(argv[optind + 1], "rb", locale_default, codepage_default);
if (ux_stdin == NULL) {
endProgram(argv[0]);
}
}
if (optind <= (argc - 3)) {
u_fclose(ux_stdout);
- ux_stdout = u_fopen(argv[optind+2], "wb", locale_default, codepage_default);
+ ux_stdout = u_fopen(argv[optind + 2], "wb", locale_default, codepage_default);
if (ux_stdout == NULL) {
endProgram(argv[0]);
}
@@ -245,9 +245,10 @@ int main(int argc, char *argv[]) {
parser = new CG3::BinaryGrammar(grammar, ux_stderr);
}
else {
- std::cerr << "Info: Text grammar detected -- to process textual " << std::endl;
- std::cerr << "grammars, use `vislcg3', to compile this grammar, use `cg-comp'" << std::endl;
- CG3Quit(1);
+ // Forbidding text grammars makes debugging very annoying
+ std::cerr << "Warning: Text grammar detected - to better process textual" << std::endl;
+ std::cerr << "grammars, use `vislcg3'; to compile this grammar, use `cg-comp'" << std::endl;
+ parser = new CG3::TextualParser(grammar, ux_stderr);
}
grammar.ux_stderr = ux_stderr;
@@ -266,8 +267,16 @@ int main(int argc, char *argv[]) {
if (stream_format == 0) {
applicator = new CG3::GrammarApplicator(ux_stderr);
}
+ else if (stream_format == 2) {
+ CG3::MatxinApplicator *matxinApplicator = new CG3::MatxinApplicator(ux_stderr);
+ matxinApplicator->setNullFlush(null_flush);
+ matxinApplicator->wordform_case = wordform_case;
+ matxinApplicator->print_word_forms = print_word_forms;
+ matxinApplicator->print_only_first = only_first;
+ applicator = matxinApplicator;
+ }
else {
- CG3::ApertiumApplicator* apertiumApplicator= new CG3::ApertiumApplicator(ux_stderr);
+ CG3::ApertiumApplicator *apertiumApplicator = new CG3::ApertiumApplicator(ux_stderr);
apertiumApplicator->setNullFlush(null_flush);
apertiumApplicator->wordform_case = wordform_case;
apertiumApplicator->print_word_forms = print_word_forms;
@@ -276,21 +285,22 @@ int main(int argc, char *argv[]) {
}
applicator->setGrammar(&grammar);
- for (int32_t i=1 ; i<=sections ; i++) {
+ for (int32_t i = 1; i <= sections; i++) {
applicator->sections.push_back(i);
}
applicator->trace = trace;
applicator->unicode_tags = true;
+ applicator->unique_tags = false;
// This is if we want to run a single rule (-r option)
if (single_rule) {
size_t sn = strlen(single_rule);
- UChar *buf = new UChar[sn*3];
+ UChar *buf = new UChar[sn * 3];
buf[0] = 0;
buf[sn] = 0;
u_charsToUChars(single_rule, buf, sn);
- const_foreach (CG3::RuleVector, applicator->grammar->rule_by_number, riter, riter_end) {
+ foreach (riter, applicator->grammar->rule_by_number) {
const CG3::Rule *rule = *riter;
if (rule->name && u_strcmp(rule->name, buf) == 0) {
applicator->valid_rules.push_back(rule->number);
@@ -301,15 +311,13 @@ int main(int argc, char *argv[]) {
delete[] single_rule;
try {
- switch(cmd) {
-
- case 'd':
- default:
- CG3::istream instream(ux_stdin, !null_flush);
- applicator->runGrammarOnText(instream, ux_stdout);
- break;
+ switch (cmd) {
+ case 'd':
+ default:
+ CG3::istream instream(ux_stdin, !null_flush);
+ applicator->runGrammarOnText(instream, ux_stdout);
+ break;
}
-
}
catch (std::exception& e) {
std::cerr << e.what();
diff --git a/src/flat_unordered_map.hpp b/src/flat_unordered_map.hpp
index fa59798..f3fd042 100644
--- a/src/flat_unordered_map.hpp
+++ b/src/flat_unordered_map.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -51,15 +51,15 @@ public:
public:
typedef value_type reference;
- const_iterator() :
- fus(0),
- i(0)
+ const_iterator()
+ : fus(0)
+ , i(0)
{
}
- const_iterator(const flat_unordered_map& fus, size_t i = 0) :
- fus(&fus),
- i(i)
+ const_iterator(const flat_unordered_map& fus, size_t i = 0)
+ : fus(&fus)
+ , i(i)
{
}
@@ -109,7 +109,7 @@ public:
return fus->elements[i];
}
- const value_type_real* operator->() const {
+ const value_type_real *operator->() const {
return &fus->elements[i];
}
};
@@ -117,18 +117,18 @@ public:
typedef const_iterator iterator;
enum {
- DEFAULT_CAP = static_cast<size_type>(16u)
+ DEFAULT_CAP = static_cast<size_type>(16u),
};
- flat_unordered_map() :
- size_(0)
+ flat_unordered_map()
+ : size_(0)
{
}
size_t insert(const value_type& t) {
assert(t.first != res_empty && t.first != res_del && "Key cannot be res_empty or res_del!");
- if ((size_ + 1)*3/2 >= capacity() / 2) {
+ if ((size_ + 1) * 3 / 2 >= capacity() / 2) {
reserve(std::max(static_cast<size_type>(DEFAULT_CAP), capacity() * 2));
}
size_t max = capacity() - 1;
@@ -147,14 +147,14 @@ public:
void insert(It b, It e) {
size_t d = std::distance(b, e);
size_t c = capacity();
- while ((size_ + d)*3/2 >= c / 2) {
- c = std::max(static_cast<size_type>(DEFAULT_CAP), c*2);
+ while ((size_ + d) * 3 / 2 >= c / 2) {
+ c = std::max(static_cast<size_type>(DEFAULT_CAP), c * 2);
}
if (c != capacity()) {
reserve(c);
}
- for (; b != e ; ++b) {
+ for (; b != e; ++b) {
insert(*b);
}
}
@@ -318,8 +318,7 @@ private:
friend class const_iterator;
};
-typedef flat_unordered_map<uint32_t,uint32_t> uint32FlatHashMap;
-
+typedef flat_unordered_map<uint32_t, uint32_t> uint32FlatHashMap;
}
#endif
diff --git a/src/flat_unordered_set.hpp b/src/flat_unordered_set.hpp
index 5130393..2e62681 100644
--- a/src/flat_unordered_set.hpp
+++ b/src/flat_unordered_set.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -34,7 +34,6 @@ namespace CG3 {
template<typename T, T res_empty = T(-1), T res_del = T(-1) - 1>
class flat_unordered_set {
public:
-
class const_iterator : public std::iterator<std::bidirectional_iterator_tag, T> {
private:
friend class flat_unordered_set;
@@ -44,15 +43,15 @@ public:
public:
typedef T reference;
- const_iterator() :
- fus(0),
- i(0)
+ const_iterator()
+ : fus(0)
+ , i(0)
{
}
- const_iterator(const flat_unordered_set& fus, size_t i = 0) :
- fus(&fus),
- i(i)
+ const_iterator(const flat_unordered_set& fus, size_t i = 0)
+ : fus(&fus)
+ , i(i)
{
}
@@ -115,18 +114,18 @@ public:
typedef T value_type;
typedef T key_type;
enum {
- DEFAULT_CAP = static_cast<size_type>(16u)
+ DEFAULT_CAP = static_cast<size_type>(16u),
};
- flat_unordered_set() :
- size_(0)
+ flat_unordered_set()
+ : size_(0)
{
}
void insert(T t) {
assert(t != res_empty && t != res_del && "Value cannot be res_empty or res_del!");
- if ((size_ + 1)*3/2 >= capacity() / 2) {
+ if ((size_ + 1) * 3 / 2 >= capacity() / 2) {
reserve(std::max(static_cast<size_type>(DEFAULT_CAP), capacity() * 2));
}
size_t max = capacity() - 1;
@@ -144,14 +143,14 @@ public:
void insert(It b, It e) {
size_t d = std::distance(b, e);
size_t c = capacity();
- while ((size_ + d)*3/2 >= c / 2) {
- c = std::max(static_cast<size_type>(DEFAULT_CAP), c*2);
+ while ((size_ + d) * 3 / 2 >= c / 2) {
+ c = std::max(static_cast<size_type>(DEFAULT_CAP), c * 2);
}
if (c != capacity()) {
reserve(c);
}
- for (; b != e ; ++b) {
+ for (; b != e; ++b) {
insert(*b);
}
}
@@ -293,7 +292,6 @@ private:
};
typedef flat_unordered_set<uint32_t> uint32FlatHashSet;
-
}
#endif
diff --git a/src/icu_uoptions.cpp b/src/icu_uoptions.cpp
index aada116..3c41a3c 100644
--- a/src/icu_uoptions.cpp
+++ b/src/icu_uoptions.cpp
@@ -18,108 +18,108 @@
#include "icu_uoptions.hpp"
-int u_parseArgs(int argc, char* argv[],
- int optionCount, UOption options[]) {
- char *arg;
- int i=1, remaining=1;
- char c, stopOptions=0;
+int u_parseArgs(int argc, char *argv[],
+ int optionCount, UOption options[]) {
+ char *arg;
+ int i = 1, remaining = 1;
+ char c, stopOptions = 0;
- while (i<argc) {
- arg=argv[i];
- if (!stopOptions && *arg=='-' && (c=arg[1])!=0) {
- /* process an option */
- UOption *option=NULL;
- arg+=2;
- if (c=='-') {
- /* process a long option */
- if (*arg==0) {
- /* stop processing options after "--" */
- stopOptions=1;
- }
+ while (i < argc) {
+ arg = argv[i];
+ if (!stopOptions && *arg == '-' && (c = arg[1]) != 0) {
+ /* process an option */
+ UOption *option = NULL;
+ arg += 2;
+ if (c == '-') {
+ /* process a long option */
+ if (*arg == 0) {
+ /* stop processing options after "--" */
+ stopOptions = 1;
+ }
else {
- /* search for the option string */
- int j;
- for (j=0; j<optionCount; ++j) {
- if (options[j].longName && uprv_strcmp(arg, options[j].longName)==0) {
- option=options+j;
- break;
- }
- }
- if (option==NULL) {
- /* no option matches */
- return -i;
- }
- option->doesOccur=1;
+ /* search for the option string */
+ int j;
+ for (j = 0; j < optionCount; ++j) {
+ if (options[j].longName && uprv_strcmp(arg, options[j].longName) == 0) {
+ option = options + j;
+ break;
+ }
+ }
+ if (option == NULL) {
+ /* no option matches */
+ return -i;
+ }
+ option->doesOccur = 1;
- if (option->hasArg!=UOPT_NO_ARG) {
- /* parse the argument for the option, if any */
- if (i+1<argc && !(argv[i+1][0]=='-' && argv[i+1][1]!=0)) {
- /* argument in the next argv[], and there is not an option in there */
- option->value=argv[++i];
- }
- else if (option->hasArg==UOPT_REQUIRES_ARG) {
- /* there is no argument, but one is required: return with error */
- return -i;
- }
- }
- }
- }
+ if (option->hasArg != UOPT_NO_ARG) {
+ /* parse the argument for the option, if any */
+ if (i + 1 < argc && !(argv[i + 1][0] == '-' && argv[i + 1][1] != 0)) {
+ /* argument in the next argv[], and there is not an option in there */
+ option->value = argv[++i];
+ }
+ else if (option->hasArg == UOPT_REQUIRES_ARG) {
+ /* there is no argument, but one is required: return with error */
+ return -i;
+ }
+ }
+ }
+ }
else {
- /* process one or more short options */
- do {
- /* search for the option letter */
- int j;
- for (j=0; j<optionCount; ++j) {
- if (c==options[j].shortName) {
- option=options+j;
- break;
- }
- }
- if (option==NULL) {
- /* no option matches */
- return -i;
- }
- option->doesOccur=1;
+ /* process one or more short options */
+ do {
+ /* search for the option letter */
+ int j;
+ for (j = 0; j < optionCount; ++j) {
+ if (c == options[j].shortName) {
+ option = options + j;
+ break;
+ }
+ }
+ if (option == NULL) {
+ /* no option matches */
+ return -i;
+ }
+ option->doesOccur = 1;
- if (option->hasArg!=UOPT_NO_ARG) {
- /* parse the argument for the option, if any */
- if (*arg!=0) {
- /* argument following in the same argv[] */
- option->value=arg;
- /* do not process the rest of this arg as option letters */
- break;
- }
- else if (i+1<argc && !(argv[i+1][0]=='-' && argv[i+1][1]!=0)) {
- /* argument in the next argv[], and there is not an option in there */
- option->value=argv[++i];
- /* this break is redundant because we know that *arg==0 */
- break;
- }
- else if (option->hasArg==UOPT_REQUIRES_ARG) {
- /* there is no argument, but one is required: return with error */
- return -i;
- }
- }
+ if (option->hasArg != UOPT_NO_ARG) {
+ /* parse the argument for the option, if any */
+ if (*arg != 0) {
+ /* argument following in the same argv[] */
+ option->value = arg;
+ /* do not process the rest of this arg as option letters */
+ break;
+ }
+ else if (i + 1 < argc && !(argv[i + 1][0] == '-' && argv[i + 1][1] != 0)) {
+ /* argument in the next argv[], and there is not an option in there */
+ option->value = argv[++i];
+ /* this break is redundant because we know that *arg==0 */
+ break;
+ }
+ else if (option->hasArg == UOPT_REQUIRES_ARG) {
+ /* there is no argument, but one is required: return with error */
+ return -i;
+ }
+ }
- /* get the next option letter */
- option=NULL;
- c=*arg++;
- } while (c!=0);
- }
+ /* get the next option letter */
+ option = NULL;
+ c = *arg++;
+ } while (c != 0);
+ }
- if (option!=0 && option->optionFn!=0 && option->optionFn(option->context, option)<0) {
- /* the option function was called and returned an error */
- return -i;
- }
+ if (option != 0 && option->optionFn != 0 && option->optionFn(option->context, option) < 0) {
+ /* the option function was called and returned an error */
+ return -i;
+ }
- /* go to next argv[] */
- ++i;
- }
+ /* go to next argv[] */
+ ++i;
+ }
else {
- /* move a non-option up in argv[] */
- argv[remaining++]=arg;
- ++i;
- }
- }
- return remaining;
+ /* move a non-option up in argv[] */
+ argv[remaining++] = arg;
+ ++i;
+ }
+ }
+ return remaining;
}
diff --git a/src/inlines.hpp b/src/inlines.hpp
index ae675e2..1a174d7 100644
--- a/src/inlines.hpp
+++ b/src/inlines.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -34,12 +34,12 @@ const uint32_t CG3_HASH_SEED = 705577479u;
#undef get16bits
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
|| defined(_MSC_VER) || defined(__BORLANDC__) || defined(__TURBOC__)
-#define get16bits(d) (*((const uint16_t *) (d)))
+#define get16bits(d) (*((const uint16_t*) (d)))
#endif
#if !defined (get16bits)
-#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
- +(uint32_t)(((const uint8_t *)(d))[0]) )
+#define get16bits(d) ((((uint32_t)(((const uint8_t*)(d))[1])) << 8) \
+ +(uint32_t)(((const uint8_t*)(d))[0]) )
#endif
inline uint32_t SuperFastHash(const char *data, size_t len = 0, uint32_t hash = CG3_HASH_SEED) {
@@ -57,28 +57,31 @@ inline uint32_t SuperFastHash(const char *data, size_t len = 0, uint32_t hash =
len >>= 2;
/* Main loop */
- for (;len > 0; len--) {
- hash += get16bits (data);
- tmp = (get16bits (data+2) << 11) ^ hash;
- hash = (hash << 16) ^ tmp;
- data += 2*sizeof (uint16_t);
- hash += hash >> 11;
+ for (; len > 0; len--) {
+ hash += get16bits(data);
+ tmp = (get16bits(data + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2 * sizeof(uint16_t);
+ hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
- case 3: hash += get16bits (data);
- hash ^= hash << 16;
- hash ^= data[sizeof (uint16_t)] << 18;
- hash += hash >> 11;
- break;
- case 2: hash += get16bits (data);
- hash ^= hash << 11;
- hash += hash >> 17;
- break;
- case 1: hash += *data;
- hash ^= hash << 10;
- hash += hash >> 1;
+ case 3:
+ hash += get16bits(data);
+ hash ^= hash << 16;
+ hash ^= data[sizeof(uint16_t)] << 18;
+ hash += hash >> 11;
+ break;
+ case 2:
+ hash += get16bits(data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1:
+ hash += *data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
@@ -89,7 +92,7 @@ inline uint32_t SuperFastHash(const char *data, size_t len = 0, uint32_t hash =
hash ^= hash << 25;
hash += hash >> 6;
- if (hash == 0 || hash == std::numeric_limits<uint32_t>::max() || hash == std::numeric_limits<uint32_t>::max()-1) {
+ if (hash == 0 || hash == std::numeric_limits<uint32_t>::max() || hash == std::numeric_limits<uint32_t>::max() - 1) {
hash = CG3_HASH_SEED;
}
@@ -111,20 +114,21 @@ inline uint32_t SuperFastHash(const UChar *data, size_t len = 0, uint32_t hash =
len >>= 1;
/* Main loop */
- for (;len > 0; len--) {
- hash += data[0];
- tmp = (data[1] << 11) ^ hash;
- hash = (hash << 16) ^ tmp;
- data += 2;
- hash += hash >> 11;
+ for (; len > 0; len--) {
+ hash += data[0];
+ tmp = (data[1] << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2;
+ hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
- case 1: hash += data[0];
- hash ^= hash << 11;
- hash += hash >> 17;
- break;
+ case 1:
+ hash += data[0];
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
}
/* Force "avalanching" of final 127 bits */
@@ -135,7 +139,7 @@ inline uint32_t SuperFastHash(const UChar *data, size_t len = 0, uint32_t hash =
hash ^= hash << 25;
hash += hash >> 6;
- if (hash == 0 || hash == std::numeric_limits<uint32_t>::max() || hash == std::numeric_limits<uint32_t>::max()-1) {
+ if (hash == 0 || hash == std::numeric_limits<uint32_t>::max() || hash == std::numeric_limits<uint32_t>::max() - 1) {
hash = CG3_HASH_SEED;
}
@@ -172,7 +176,7 @@ inline uint32_t hash_value(uint32_t c, uint32_t h = CG3_HASH_SEED) {
}
//*
h = c + (h << 6U) + (h << 16U) - h;
- if (h == 0 || h == std::numeric_limits<uint32_t>::max() || h == std::numeric_limits<uint32_t>::max()-1) {
+ if (h == 0 || h == std::numeric_limits<uint32_t>::max() || h == std::numeric_limits<uint32_t>::max() - 1) {
h = CG3_HASH_SEED;
}
return h;
@@ -201,10 +205,10 @@ inline bool ISSPACE(const UChar c) {
}
inline bool ISSTRING(const UChar *p, const uint32_t c) {
- if (*(p-1) == '"' && *(p+c+1) == '"') {
+ if (*(p - 1) == '"' && *(p + c + 1) == '"') {
return true;
}
- if (*(p-1) == '<' && *(p+c+1) == '>') {
+ if (*(p - 1) == '<' && *(p + c + 1) == '>') {
return true;
}
return false;
@@ -212,24 +216,34 @@ inline bool ISSTRING(const UChar *p, const uint32_t c) {
inline bool ISNL(const UChar c) {
return (
- c == 0x2028L // Unicode Line Seperator
- || c == 0x2029L // Unicode Paragraph Seperator
- || c == 0x000CL // Form Feed
- || c == 0x000BL // Vertical Tab
- || c == 0x000AL // ASCII \n
- );
+ c == 0x2028L // Unicode Line Seperator
+ || c == 0x2029L // Unicode Paragraph Seperator
+ || c == 0x000CL // Form Feed
+ || c == 0x000BL // Vertical Tab
+ || c == 0x000AL // ASCII \n
+ );
}
inline bool ISESC(const UChar *p) {
- uint32_t a=1;
- while (*(p-a) && *(p-a) == '\\') {
+ uint32_t a = 1;
+ while (*(p - a) && *(p - a) == '\\') {
a++;
}
- return (a%2==0);
+ return (a % 2 == 0);
}
-inline bool ISCHR(const UChar p, const UChar a, const UChar b) {
- return ((p) && ((p) == (a) || (p) == (b)));
+template<typename C, size_t N>
+inline bool IS_ICASE(const UChar *p, const C (&uc)[N], const C (&lc)[N]) {
+ // N - 1 due to null terminator for string constants
+ if (ISSTRING(p, N - 1)) {
+ return false;
+ }
+ for (size_t i = 0; i < N - 1; ++i) {
+ if (p[i] != uc[i] && p[i] != lc[i]) {
+ return false;
+ }
+ }
+ return true;
}
inline void BACKTONL(UChar *& p) {
@@ -316,7 +330,7 @@ inline void SKIPTO_NOSPAN_RAW(UChar *& p, const UChar a) {
}
}
-inline void CG3Quit(const int32_t c = 0, const char* file = 0, const uint32_t line = 0) {
+inline void CG3Quit(const int32_t c = 0, const char *file = 0, const uint32_t line = 0) {
if (file && line) {
std::cerr << std::flush;
std::cerr << "CG3Quit triggered from " << file << " line " << line << "." << std::endl;
@@ -329,7 +343,7 @@ inline bool index_matches(const Cont& index, const VT& entry) {
return (index.find(entry) != index.end());
}
-inline void insert_if_exists(boost::dynamic_bitset<>& cont, const boost::dynamic_bitset<>* other) {
+inline void insert_if_exists(boost::dynamic_bitset<>& cont, const boost::dynamic_bitset<> *other) {
if (other && !other->empty()) {
cont.resize(std::max(cont.size(), other->size()));
cont |= *other;
@@ -351,10 +365,10 @@ inline void writeUTF8String(std::ostream& output, const UChar *str, size_t len =
len = u_strlen(str);
}
- std::vector<char> buffer(len*4);
+ std::vector<char> buffer(len * 4);
int32_t olen = 0;
UErrorCode status = U_ZERO_ERROR;
- u_strToUTF8(&buffer[0], len*4-1, &olen, str, len, &status);
+ u_strToUTF8(&buffer[0], len * 4 - 1, &olen, str, len, &status);
uint16_t cs = static_cast<uint16_t>(olen);
writeRaw(output, cs);
@@ -465,9 +479,9 @@ inline void GAppSetOpts_ranged(const char *value, Cont& cont) {
const char *nextc = strchr(comma, ',');
if (delim && (nextc == 0 || nextc > delim)) {
had_range = true;
- high = abs(atoi(delim+1));
+ high = abs(atoi(delim + 1));
}
- for (; low <= high ; ++low) {
+ for (; low <= high; ++low) {
cont.push_back(low);
}
} while ((comma = strchr(comma, ',')) != 0 && ++comma && *comma != 0);
@@ -475,7 +489,7 @@ inline void GAppSetOpts_ranged(const char *value, Cont& cont) {
if (cont.size() == 1 && !had_range) {
uint32_t val = cont.front();
cont.clear();
- for (uint32_t i=1 ; i<=val ; ++i) {
+ for (uint32_t i = 1; i <= val; ++i) {
cont.push_back(i);
}
}
@@ -484,10 +498,10 @@ inline void GAppSetOpts_ranged(const char *value, Cont& cont) {
template<typename T>
class swapper {
public:
- swapper(bool cond, T& a, T& b) :
- cond(cond),
- a(a),
- b(b)
+ swapper(bool cond, T& a, T& b)
+ : cond(cond)
+ , a(a)
+ , b(b)
{
if (cond) {
std::swap(a, b);
@@ -508,9 +522,9 @@ private:
class swapper_false {
public:
- swapper_false(bool cond, bool& b) :
- val(false),
- swp(cond, val, b)
+ swapper_false(bool cond, bool& b)
+ : val(false)
+ , swp(cond, val, b)
{}
private:
@@ -521,9 +535,9 @@ private:
template<typename T>
class uncond_swap {
public:
- uncond_swap(T& a, T b) :
- a_(a),
- b_(b)
+ uncond_swap(T& a, T b)
+ : a_(a)
+ , b_(b)
{
std::swap(a_, b_);
}
@@ -531,13 +545,36 @@ public:
~uncond_swap() {
std::swap(a_, b_);
}
+
private:
T& a_;
T b_;
};
template<typename T>
-inline T* reverse(T *head) {
+class inc_dec {
+public:
+ inc_dec()
+ : p(0)
+ {}
+
+ ~inc_dec() {
+ if (p) {
+ --(*p);
+ }
+ }
+
+ void inc(T& pt) {
+ p = &pt;
+ ++(*p);
+ }
+
+private:
+ T *p;
+};
+
+template<typename T>
+inline T *reverse(T *head) {
T *nr = 0;
while (head) {
T *next = head->next;
@@ -580,7 +617,7 @@ void pool_get(Pool& pool, Var& var) {
template<typename Pool, typename Var>
void pool_put(Pool& pool, Var& var) {
- pool.resize(pool.size()+1);
+ pool.resize(pool.size() + 1);
var.swap(pool.back());
}
@@ -613,8 +650,8 @@ template<typename Pool>
struct pool_cleaner {
Pool& pool;
- pool_cleaner(Pool& pool) :
- pool(pool)
+ pool_cleaner(Pool& pool)
+ : pool(pool)
{
}
@@ -624,7 +661,6 @@ struct pool_cleaner {
}
}
};
-
}
#endif
diff --git a/src/interval_vector.hpp b/src/interval_vector.hpp
index bb53c8b..2ec603e 100644
--- a/src/interval_vector.hpp
+++ b/src/interval_vector.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -36,12 +36,16 @@ private:
T lb;
T ub;
- explicit interval(T lb = T()) :
- lb(lb), ub(lb) {
+ explicit interval(T lb = T())
+ : lb(lb)
+ , ub(lb)
+ {
}
- explicit interval(T lb, T ub) :
- lb(lb), ub(ub) {
+ explicit interval(T lb, T ub)
+ : lb(lb)
+ , ub(ub)
+ {
}
bool operator<(const interval& o) const {
@@ -59,7 +63,7 @@ private:
size_t _size;
public:
- class const_iterator : public std::iterator<std::bidirectional_iterator_tag,T> {
+ class const_iterator : public std::iterator<std::bidirectional_iterator_tag, T> {
private:
const Cont *elements;
ContConstIter it;
@@ -68,26 +72,26 @@ public:
public:
typedef T reference;
- const_iterator() :
- elements(0),
- t(T())
+ const_iterator()
+ : elements(0)
+ , t(T())
{
}
- const_iterator(const Cont& elements, ContConstIter it) :
- elements(&elements),
- it(it),
- t(T())
+ const_iterator(const Cont& elements, ContConstIter it)
+ : elements(&elements)
+ , it(it)
+ , t(T())
{
if (it != elements.end()) {
t = it->lb;
}
}
- const_iterator(const Cont& elements, ContConstIter it, T t) :
- elements(&elements),
- it(it),
- t(t)
+ const_iterator(const Cont& elements, ContConstIter it, T t)
+ : elements(&elements)
+ , it(it)
+ , t(t)
{
}
@@ -152,16 +156,16 @@ public:
typedef T value_type;
typedef T key_type;
- interval_vector() :
- _size(0)
+ interval_vector()
+ : _size(0)
{
}
template<typename Iter>
- interval_vector(Iter b, const Iter& e) :
- _size(0)
+ interval_vector(Iter b, const Iter& e)
+ : _size(0)
{
- for (; b != e ; ++b) {
+ for (; b != e; ++b) {
insert(*b);
}
}
@@ -171,17 +175,17 @@ public:
if (it != elements.end() && t >= it->lb && t <= it->ub) {
return false;
}
- ContIter pr = it-1;
- if (it != elements.begin() && pr->ub+1 == t) {
+ ContIter pr = it - 1;
+ if (it != elements.begin() && pr->ub + 1 == t) {
++pr->ub;
- if (it != elements.end() && pr->ub+1 == it->lb) {
+ if (it != elements.end() && pr->ub + 1 == it->lb) {
pr->ub = it->ub;
elements.erase(it);
}
}
- else if (it != elements.end() && it->lb == t+1) {
+ else if (it != elements.end() && it->lb == t + 1) {
--it->lb;
- if (it != elements.begin() && pr->ub+1 == it->lb) {
+ if (it != elements.begin() && pr->ub + 1 == it->lb) {
pr->ub = it->ub;
elements.erase(it);
}
@@ -228,8 +232,8 @@ public:
return true;
}
if (it->lb < t && it->ub > t) {
- elements.insert(it+1, interval(t+1, it->ub));
- it->ub = t-1;
+ elements.insert(it + 1, interval(t + 1, it->ub));
+ it->ub = t - 1;
--_size;
return true;
}
@@ -319,7 +323,7 @@ public:
while (a != elements.end() && b != o.elements.end() && a->ub >= b->lb && b->ub >= a->lb) {
const T lb = std::max(a->lb, b->lb);
const T ub = std::min(a->ub, b->ub);
- if (!rv.elements.empty() && rv.elements.back().ub+1 == lb) {
+ if (!rv.elements.empty() && rv.elements.back().ub + 1 == lb) {
rv.elements.back().ub = ub;
}
else {
@@ -340,7 +344,6 @@ public:
};
typedef interval_vector<uint32_t> uint32IntervalVector;
-
}
#endif
diff --git a/src/istream.hpp b/src/istream.hpp
index 1f6eecc..ef7f6cd 100644
--- a/src/istream.hpp
+++ b/src/istream.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -29,9 +29,9 @@ namespace CG3 {
class istream {
public:
- istream(UFILE *s, bool strip_bom=true) :
- stream(s),
- raw(u_fgetfile(stream))
+ istream(UFILE *s, bool strip_bom = true)
+ : stream(s)
+ , raw(u_fgetfile(stream))
{
if (strip_bom) {
UChar32 bom = u_fgetcx(stream);
@@ -72,18 +72,18 @@ private:
class istream_buffer : public istream {
public:
- istream_buffer(UFILE *s, const UString& b) :
- istream(s),
- offset(0),
- raw_offset(0),
- buffer(b)
+ istream_buffer(UFILE *s, const UString& b)
+ : istream(s)
+ , offset(0)
+ , raw_offset(0)
+ , buffer(b)
{
- buffer.resize(buffer.size()+1);
- buffer.resize(buffer.size()-1);
+ buffer.resize(buffer.size() + 1);
+ buffer.resize(buffer.size() - 1);
}
UBool eof() {
- if (offset >= buffer.size() || raw_offset >= buffer.size()*sizeof(buffer[0])) {
+ if (offset >= buffer.size() || raw_offset >= buffer.size() * sizeof(buffer[0])) {
return istream::eof();
}
return false;
@@ -91,18 +91,18 @@ public:
UChar *gets(UChar *s, int32_t m) {
if (offset < buffer.size()) {
- std::fill(s, s+m, 0);
+ std::fill(s, s + m, 0);
UChar *p = &buffer[offset];
UChar *n = p;
SKIPLN(n);
- if (n-p > m) {
- n = p+m;
+ if (n - p > m) {
+ n = p + m;
}
std::copy(p, n, s);
- size_t len = n-p;
+ size_t len = n - p;
offset += len;
if (!ISNL(n[-1])) {
- istream::gets(s + (len-1), m - len);
+ istream::gets(s + (len - 1), m - len);
}
return s;
}
@@ -117,7 +117,7 @@ public:
}
int getc_raw() {
- if (raw_offset < buffer.size()*sizeof(buffer[0])) {
+ if (raw_offset < buffer.size() * sizeof(buffer[0])) {
return reinterpret_cast<char*>(&buffer[0])[raw_offset++];
}
return istream::getc_raw();
@@ -127,7 +127,6 @@ private:
size_t offset, raw_offset;
UString buffer;
};
-
}
#endif
diff --git a/src/libcg3.cpp b/src/libcg3.cpp
index f714508..a7b4954 100644
--- a/src/libcg3.cpp
+++ b/src/libcg3.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -32,9 +32,9 @@ using namespace CG3;
#include "cg3.h"
namespace {
- UFILE *ux_stdin = 0;
- UFILE *ux_stdout = 0;
- UFILE *ux_stderr = 0;
+UFILE *ux_stdin = 0;
+UFILE *ux_stdout = 0;
+UFILE *ux_stderr = 0;
}
cg3_status cg3_init(FILE *in, FILE *out, FILE *err) {
@@ -102,7 +102,7 @@ cg3_grammar *cg3_grammar_load(const char *filename) {
grammar->ux_stderr = ux_stderr;
grammar->ux_stdout = ux_stdout;
- std::auto_ptr<IGrammarParser> parser;
+ boost::scoped_ptr<IGrammarParser> parser;
if (cbuffers[0][0] == 'C' && cbuffers[0][1] == 'G' && cbuffers[0][2] == '3' && cbuffers[0][3] == 'B') {
u_fprintf(ux_stderr, "CG3 Info: Binary grammar detected.\n");
@@ -136,19 +136,19 @@ cg3_applicator *cg3_applicator_create(cg3_grammar *grammar_) {
void cg3_applicator_setflags(cg3_applicator *applicator_, uint32_t flags) {
GrammarApplicator *applicator = static_cast<GrammarApplicator*>(applicator_);
- applicator->ordered = (flags & CG3F_ORDERED) != 0;
- applicator->unsafe = (flags & CG3F_UNSAFE) != 0;
- applicator->apply_mappings = (flags & CG3F_NO_MAPPINGS) == 0;
- applicator->apply_corrections = (flags & CG3F_NO_CORRECTIONS) == 0;
+ applicator->ordered = (flags & CG3F_ORDERED) != 0;
+ applicator->unsafe = (flags & CG3F_UNSAFE) != 0;
+ applicator->apply_mappings = (flags & CG3F_NO_MAPPINGS) == 0;
+ applicator->apply_corrections = (flags & CG3F_NO_CORRECTIONS) == 0;
applicator->no_before_sections = (flags & CG3F_NO_BEFORE_SECTIONS) != 0;
- applicator->no_sections = (flags & CG3F_NO_SECTIONS) != 0;
- applicator->no_after_sections = (flags & CG3F_NO_AFTER_SECTIONS) != 0;
- applicator->trace = (flags & CG3F_TRACE) != 0;
- applicator->section_max_count = (flags & CG3F_SINGLE_RUN) != 0;
- applicator->always_span = (flags & CG3F_ALWAYS_SPAN) != 0;
- applicator->dep_block_loops = (flags & CG3F_DEP_ALLOW_LOOPS) == 0;
- applicator->dep_block_crossing = (flags & CG3F_DEP_NO_CROSSING) != 0;
- applicator->no_pass_origin = (flags & CG3F_NO_PASS_ORIGIN) != 0;
+ applicator->no_sections = (flags & CG3F_NO_SECTIONS) != 0;
+ applicator->no_after_sections = (flags & CG3F_NO_AFTER_SECTIONS) != 0;
+ applicator->trace = (flags & CG3F_TRACE) != 0;
+ applicator->section_max_count = (flags & CG3F_SINGLE_RUN) != 0;
+ applicator->always_span = (flags & CG3F_ALWAYS_SPAN) != 0;
+ applicator->dep_block_loops = (flags & CG3F_DEP_ALLOW_LOOPS) == 0;
+ applicator->dep_block_crossing = (flags & CG3F_DEP_NO_CROSSING) != 0;
+ applicator->no_pass_origin = (flags & CG3F_NO_PASS_ORIGIN) != 0;
}
void cg3_applicator_setoption(cg3_applicator *applicator_, cg3_option option, void *value_) {
@@ -156,7 +156,7 @@ void cg3_applicator_setoption(cg3_applicator *applicator_, cg3_option option, vo
switch (option) {
case CG3O_SECTIONS: {
uint32_t *value = static_cast<uint32_t*>(value_);
- for (uint32_t i=1 ; i<=*value ; ++i) {
+ for (uint32_t i = 1; i <= *value; ++i) {
applicator->sections.push_back(i);
}
break;
@@ -183,7 +183,9 @@ cg3_sentence *cg3_sentence_new(cg3_applicator *applicator_) {
return current;
}
-#pragma GCC visibility push(hidden)
+#ifndef _MSC_VER
+ #pragma GCC visibility push(hidden)
+#endif
inline Tag *_tag_copy(GrammarApplicator *to, Tag *t) {
Tag *nt = to->addTag(t->tag);
return nt;
@@ -227,7 +229,9 @@ inline Cohort *_cohort_copy(SingleWindow *ns, Cohort *oc) {
}
return nc;
}
-#pragma GCC visibility pop
+#ifndef _MSC_VER
+ #pragma GCC visibility pop
+#endif
cg3_sentence *cg3_sentence_copy(cg3_sentence *sentence_, cg3_applicator *applicator_) {
GrammarApplicator *applicator = static_cast<GrammarApplicator*>(applicator_);
@@ -319,8 +323,8 @@ void cg3_cohort_getrelation_u(cg3_cohort *cohort_, const UChar *rel, uint32_t *r
GrammarApplicator *ga = cohort->parent->parent->parent;
if ((cohort->type & CT_RELATED) && !cohort->relations.empty()) {
- foreach (RelationCtn, cohort->relations, miter, miter_end) {
- foreach (uint32Set, miter->second, siter, siter_end) {
+ foreach (miter, cohort->relations) {
+ foreach (siter, miter->second) {
if (u_strcmp(ga->single_tags.find(miter->first)->second->tag.c_str(), rel) == 0) {
*rel_parent = *siter;
}
@@ -454,7 +458,7 @@ cg3_tag *cg3_tag_create_u(cg3_applicator *applicator_, const UChar *text) {
cg3_tag *cg3_tag_create_u8(cg3_applicator *applicator, const char *text) {
UErrorCode status = U_ZERO_ERROR;
- u_strFromUTF8(&gbuffers[0][0], CG3_BUFFER_SIZE-1, 0, text, strlen(text), &status);
+ u_strFromUTF8(&gbuffers[0][0], CG3_BUFFER_SIZE - 1, 0, text, strlen(text), &status);
if (U_FAILURE(status)) {
u_fprintf(ux_stderr, "CG3 Error: Failed to convert text from UTF-8 to UTF-16. Status = %s\n", u_errorName(status));
return 0;
@@ -475,7 +479,7 @@ cg3_tag *cg3_tag_create_u32(cg3_applicator *applicator, const uint32_t *text) {
++length;
}
- u_strFromUTF32(&gbuffers[0][0], CG3_BUFFER_SIZE-1, 0, reinterpret_cast<const UChar32*>(text), length, &status);
+ u_strFromUTF32(&gbuffers[0][0], CG3_BUFFER_SIZE - 1, 0, reinterpret_cast<const UChar32*>(text), length, &status);
if (U_FAILURE(status)) {
u_fprintf(ux_stderr, "CG3 Error: Failed to convert text from UTF-32 to UTF-16. Status = %s\n", u_errorName(status));
return 0;
@@ -487,7 +491,7 @@ cg3_tag *cg3_tag_create_u32(cg3_applicator *applicator, const uint32_t *text) {
cg3_tag *cg3_tag_create_w(cg3_applicator *applicator, const wchar_t *text) {
UErrorCode status = U_ZERO_ERROR;
- u_strFromWCS(&gbuffers[0][0], CG3_BUFFER_SIZE-1, 0, text, wcslen(text), &status);
+ u_strFromWCS(&gbuffers[0][0], CG3_BUFFER_SIZE - 1, 0, text, wcslen(text), &status);
if (U_FAILURE(status)) {
u_fprintf(ux_stderr, "CG3 Error: Failed to convert text from wchar_t to UTF-16. Status = %s\n", u_errorName(status));
return 0;
@@ -505,7 +509,7 @@ const char *cg3_tag_gettext_u8(cg3_tag *tag_) {
Tag *tag = static_cast<Tag*>(tag_);
UErrorCode status = U_ZERO_ERROR;
- u_strToUTF8(&cbuffers[0][0], CG3_BUFFER_SIZE-1, 0, tag->tag.c_str(), tag->tag.length(), &status);
+ u_strToUTF8(&cbuffers[0][0], CG3_BUFFER_SIZE - 1, 0, tag->tag.c_str(), tag->tag.length(), &status);
if (U_FAILURE(status)) {
u_fprintf(ux_stderr, "CG3 Error: Failed to convert text from UChar to UTF-8. Status = %s\n", u_errorName(status));
return 0;
@@ -525,7 +529,7 @@ const uint32_t *cg3_tag_gettext_u32(cg3_tag *tag_) {
UChar32 *tmp = reinterpret_cast<UChar32*>(&cbuffers[0][0]);
- u_strToUTF32(tmp, (CG3_BUFFER_SIZE/sizeof(UChar32))-1, 0, tag->tag.c_str(), tag->tag.length(), &status);
+ u_strToUTF32(tmp, (CG3_BUFFER_SIZE / sizeof(UChar32)) - 1, 0, tag->tag.c_str(), tag->tag.length(), &status);
if (U_FAILURE(status)) {
u_fprintf(ux_stderr, "CG3 Error: Failed to convert text from UChar to UTF-32. Status = %s\n", u_errorName(status));
return 0;
@@ -540,7 +544,7 @@ const wchar_t *cg3_tag_gettext_w(cg3_tag *tag_) {
wchar_t *tmp = reinterpret_cast<wchar_t*>(&cbuffers[0][0]);
- u_strToWCS(tmp, (CG3_BUFFER_SIZE/sizeof(wchar_t))-1, 0, tag->tag.c_str(), tag->tag.length(), &status);
+ u_strToWCS(tmp, (CG3_BUFFER_SIZE / sizeof(wchar_t)) - 1, 0, tag->tag.c_str(), tag->tag.length(), &status);
if (U_FAILURE(status)) {
u_fprintf(ux_stderr, "CG3 Error: Failed to convert text from UChar to UTF-32. Status = %s\n", u_errorName(status));
return 0;
diff --git a/src/macros.hpp b/src/macros.hpp
deleted file mode 100644
index bfca73c..0000000
--- a/src/macros.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
-* Developed by Tino Didriksen <mail at tinodidriksen.com>
-* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
-*
-* This file is part of VISL CG-3
-*
-* VISL CG-3 is free software: you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation, either version 3 of the License, or
-* (at your option) any later version.
-*
-* VISL CG-3 is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with VISL CG-3. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#pragma once
-#ifndef c6d28b7452ec699b_MACROS_H
-#define c6d28b7452ec699b_MACROS_H
-
-#define foreach(type, container, iter, iter_end) \
- if (!(container).empty()) \
- for (type::iterator iter = (container).begin(), iter_end = (container).end() ; iter != iter_end ; ++iter)
-
-#define const_foreach(type, container, iter, iter_end) \
- if (!(container).empty()) \
- for (type::const_iterator iter = (container).begin(), iter_end = (container).end() ; iter != iter_end ; ++iter)
-
-#define reverse_foreach(type, container, iter, iter_end) \
- if (!(container).empty()) \
- for (type::reverse_iterator iter = (container).rbegin(), iter_end = (container).rend() ; iter != iter_end ; ++iter)
-
-#define reverse_const_foreach(type, container, iter, iter_end) \
- if (!(container).empty()) \
- for (type::reverse_const_iterator iter = (container).rbegin(), iter_end = (container).rend() ; iter != iter_end ; ++iter)
-
-#endif
diff --git a/src/main.cpp b/src/main.cpp
index bfa2428..a819d74 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -32,7 +32,7 @@ using namespace Options;
using CG3::CG3Quit;
void GAppSetOpts(CG3::GrammarApplicator& applicator, UConverter *conv);
-int main(int argc, char* argv[]) {
+int main(int argc, char *argv[]) {
UFILE *ux_stdin = 0;
UFILE *ux_stdout = 0;
UFILE *ux_stderr = 0;
@@ -78,13 +78,13 @@ int main(int argc, char* argv[]) {
fprintf(out, "Options:\n");
size_t longest = 0;
- for (uint32_t i=0 ; i<NUM_OPTIONS ; i++) {
+ for (uint32_t i = 0; i < NUM_OPTIONS; i++) {
if (options[i].description) {
size_t len = strlen(options[i].longName);
longest = std::max(longest, len);
}
}
- for (uint32_t i=0 ; i<NUM_OPTIONS ; i++) {
+ for (uint32_t i = 0; i < NUM_OPTIONS; i++) {
if (options[i].description) {
fprintf(out, " ");
if (options[i].shortName) {
@@ -109,7 +109,7 @@ int main(int argc, char* argv[]) {
fflush(out);
fflush(stderr);
- if (options[SHOW_UNUSED_SETS].doesOccur || options[SHOW_SET_HASHES].doesOccur) {
+ if (options[SHOW_UNUSED_SETS].doesOccur || options[SHOW_SET_HASHES].doesOccur || options[DUMP_AST].doesOccur) {
options[GRAMMAR_ONLY].doesOccur = true;
}
@@ -138,8 +138,8 @@ int main(int argc, char* argv[]) {
ucnv_setDefaultName("UTF-8");
const char *codepage_default = ucnv_getDefaultName();
const char *codepage_grammar = codepage_default;
- const char *codepage_input = codepage_grammar;
- const char *codepage_output = codepage_grammar;
+ const char *codepage_input = codepage_grammar;
+ const char *codepage_output = codepage_grammar;
if (options[CODEPAGE_GRAMMAR].doesOccur) {
codepage_grammar = options[CODEPAGE_GRAMMAR].value;
@@ -235,10 +235,14 @@ int main(int argc, char* argv[]) {
if (options[VERBOSE].doesOccur) {
std::cerr << "Info: Binary grammar detected." << std::endl;
}
+ if (options[DUMP_AST].doesOccur) {
+ std::cerr << "Error: --dump-ast is for textual grammars only!" << std::endl;
+ CG3Quit(1);
+ }
parser = new CG3::BinaryGrammar(grammar, ux_stderr);
}
else {
- parser = new CG3::TextualParser(grammar, ux_stderr);
+ parser = new CG3::TextualParser(grammar, ux_stderr, options[DUMP_AST].doesOccur != 0);
}
if (options[VERBOSE].doesOccur) {
if (options[VERBOSE].value) {
@@ -256,7 +260,7 @@ int main(int argc, char* argv[]) {
parser->setCompatible(options[VISLCGCOMPAT].doesOccur != 0);
if (options[VERBOSE].doesOccur) {
- std::cerr << "Initialization took " << (clock()-main_timer)/(double)CLOCKS_PER_SEC << " seconds." << std::endl;
+ std::cerr << "Initialization took " << (clock() - main_timer) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;
}
main_timer = clock();
@@ -265,10 +269,14 @@ int main(int argc, char* argv[]) {
CG3Quit(1);
}
+ if (options[DUMP_AST].doesOccur) {
+ dynamic_cast<CG3::TextualParser*>(parser)->print_ast(ux_stdout);
+ }
+
if (options[MAPPING_PREFIX].doesOccur) {
UConverter *conv = ucnv_open(codepage_cli, &status);
size_t sn = strlen(options[MAPPING_PREFIX].value);
- CG3::UString buf(sn*3, 0);
+ CG3::UString buf(sn * 3, 0);
ucnv_toUChars(conv, &buf[0], buf.size(), options[MAPPING_PREFIX].value, sn, &status);
if (grammar.is_binary && grammar.mapping_prefix != buf[0]) {
std::cerr << "Error: Mapping prefix must match the one used for compiling the binary grammar!" << std::endl;
@@ -286,7 +294,7 @@ int main(int argc, char* argv[]) {
parser = 0;
if (options[VERBOSE].doesOccur) {
- std::cerr << "Parsing grammar took " << (clock()-main_timer)/(double)CLOCKS_PER_SEC << " seconds." << std::endl;
+ std::cerr << "Parsing grammar took " << (clock() - main_timer) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;
}
main_timer = clock();
@@ -343,21 +351,21 @@ int main(int argc, char* argv[]) {
applicator.runGrammarOnText(instream, ux_stdout);
if (options[VERBOSE].doesOccur) {
- std::cerr << "Applying grammar on input took " << (clock()-main_timer)/(double)CLOCKS_PER_SEC << " seconds." << std::endl;
+ std::cerr << "Applying grammar on input took " << (clock() - main_timer) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;
}
main_timer = clock();
}
if (options[OPTIMIZE_UNSAFE].doesOccur) {
std::vector<uint32_t> bad;
- foreach (CG3::RuleVector, grammar.rule_by_number, ir, ir_end) {
+ foreach (ir, grammar.rule_by_number) {
if ((*ir)->num_match == 0) {
bad.push_back((*ir)->number);
}
}
- reverse_foreach (std::vector<uint32_t>, bad, br, br_end) {
+ reverse_foreach (br, bad) {
CG3::Rule *r = grammar.rule_by_number[*br];
- grammar.rule_by_number.erase(grammar.rule_by_number.begin()+*br);
+ grammar.rule_by_number.erase(grammar.rule_by_number.begin() + *br);
grammar.destroyRule(r);
}
std::cerr << "Optimizer removed " << bad.size() << " rules." << std::endl;
@@ -366,15 +374,15 @@ int main(int argc, char* argv[]) {
}
if (options[OPTIMIZE_SAFE].doesOccur) {
CG3::RuleVector bad;
- foreach (CG3::RuleVector, grammar.rule_by_number, ir, ir_end) {
+ foreach (ir, grammar.rule_by_number) {
if ((*ir)->num_match == 0) {
bad.push_back(*ir);
}
}
- reverse_foreach (CG3::RuleVector, bad, br, br_end) {
+ reverse_foreach (br, bad) {
grammar.rule_by_number.erase(grammar.rule_by_number.begin() + (*br)->number);
}
- foreach (CG3::RuleVector, bad, br, br_end) {
+ foreach (br, bad) {
(*br)->number = grammar.rule_by_number.size();
grammar.rule_by_number.push_back(*br);
}
@@ -393,7 +401,7 @@ int main(int argc, char* argv[]) {
writer.writeGrammar(gout);
if (options[VERBOSE].doesOccur) {
- std::cerr << "Writing textual grammar took " << (clock()-main_timer)/(double)CLOCKS_PER_SEC << " seconds." << std::endl;
+ std::cerr << "Writing textual grammar took " << (clock() - main_timer) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;
}
main_timer = clock();
}
@@ -409,7 +417,7 @@ int main(int argc, char* argv[]) {
writer.writeBinaryGrammar(gout);
if (options[VERBOSE].doesOccur) {
- std::cerr << "Writing binary grammar took " << (clock()-main_timer)/(double)CLOCKS_PER_SEC << " seconds." << std::endl;
+ std::cerr << "Writing binary grammar took " << (clock() - main_timer) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;
}
main_timer = clock();
}
@@ -425,7 +433,7 @@ int main(int argc, char* argv[]) {
u_cleanup();
if (options[VERBOSE].doesOccur) {
- std::cerr << "Cleanup took " << (clock()-main_timer)/(double)CLOCKS_PER_SEC << " seconds." << std::endl;
+ std::cerr << "Cleanup took " << (clock() - main_timer) / (double)CLOCKS_PER_SEC << " seconds." << std::endl;
}
return status;
@@ -507,11 +515,11 @@ void GAppSetOpts(CG3::GrammarApplicator& applicator, UConverter *conv) {
else {
UErrorCode status = U_ZERO_ERROR;
size_t sn = strlen(options[RULE].value);
- UChar *buf = new UChar[sn*3];
+ UChar *buf = new UChar[sn * 3];
buf[0] = 0;
- ucnv_toUChars(conv, buf, sn*3, options[RULE].value, sn, &status);
+ ucnv_toUChars(conv, buf, sn * 3, options[RULE].value, sn, &status);
- const_foreach (CG3::RuleVector, applicator.grammar->rule_by_number, riter, riter_end) {
+ foreach (riter, applicator.grammar->rule_by_number) {
const CG3::Rule *rule = *riter;
if (rule->name && u_strcmp(rule->name, buf) == 0) {
applicator.valid_rules.push_back(rule->number);
diff --git a/src/options.hpp b/src/options.hpp
index c44332d..2f90ba5 100644
--- a/src/options.hpp
+++ b/src/options.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,135 +26,137 @@
#include <uoptions.h>
namespace Options {
- enum OPTIONS {
- HELP1,
- HELP2,
- VERSION,
- VERSION_TOO_OLD,
- GRAMMAR,
- GRAMMAR_OUT,
- GRAMMAR_BIN,
- GRAMMAR_ONLY,
- ORDERED,
- UNSAFE,
- SECTIONS,
- RULES,
- RULE,
- DODEBUG,
- VERBOSE,
- QUIET,
- VISLCGCOMPAT,
- STDIN,
- STDOUT,
- STDERR,
- CODEPAGE_GLOBAL,
- CODEPAGE_GRAMMAR,
- CODEPAGE_INPUT,
- CODEPAGE_OUTPUT,
- NOMAPPINGS,
- NOCORRECTIONS,
- NOBEFORESECTIONS,
- NOSECTIONS,
- NOAFTERSECTIONS,
- TRACE,
- TRACE_NAME_ONLY,
- TRACE_NO_REMOVED,
- TRACE_ENCL,
- DRYRUN,
- SINGLERUN,
- MAXRUNS,
- STATISTICS,
- OPTIMIZE_UNSAFE,
- OPTIMIZE_SAFE,
- MAPPING_PREFIX,
- UNICODE_TAGS,
- UNIQUE_TAGS,
- NUM_WINDOWS,
- ALWAYS_SPAN,
- SOFT_LIMIT,
- HARD_LIMIT,
- DEP_DELIMIT,
- DEP_ORIGINAL,
- DEP_ALLOW_LOOPS,
- DEP_BLOCK_CROSSING,
- MAGIC_READINGS,
- NO_PASS_ORIGIN,
- SPLIT_MAPPINGS,
- SHOW_END_TAGS,
- SHOW_UNUSED_SETS,
- SHOW_TAGS,
- SHOW_TAG_HASHES,
- SHOW_SET_HASHES,
- NUM_OPTIONS
- };
+enum OPTIONS {
+ HELP1,
+ HELP2,
+ VERSION,
+ VERSION_TOO_OLD,
+ GRAMMAR,
+ GRAMMAR_OUT,
+ GRAMMAR_BIN,
+ GRAMMAR_ONLY,
+ ORDERED,
+ UNSAFE,
+ SECTIONS,
+ RULES,
+ RULE,
+ DODEBUG,
+ VERBOSE,
+ QUIET,
+ VISLCGCOMPAT,
+ STDIN,
+ STDOUT,
+ STDERR,
+ CODEPAGE_GLOBAL,
+ CODEPAGE_GRAMMAR,
+ CODEPAGE_INPUT,
+ CODEPAGE_OUTPUT,
+ NOMAPPINGS,
+ NOCORRECTIONS,
+ NOBEFORESECTIONS,
+ NOSECTIONS,
+ NOAFTERSECTIONS,
+ TRACE,
+ TRACE_NAME_ONLY,
+ TRACE_NO_REMOVED,
+ TRACE_ENCL,
+ DRYRUN,
+ SINGLERUN,
+ MAXRUNS,
+ STATISTICS,
+ OPTIMIZE_UNSAFE,
+ OPTIMIZE_SAFE,
+ MAPPING_PREFIX,
+ UNICODE_TAGS,
+ UNIQUE_TAGS,
+ NUM_WINDOWS,
+ ALWAYS_SPAN,
+ SOFT_LIMIT,
+ HARD_LIMIT,
+ DEP_DELIMIT,
+ DEP_ORIGINAL,
+ DEP_ALLOW_LOOPS,
+ DEP_BLOCK_CROSSING,
+ MAGIC_READINGS,
+ NO_PASS_ORIGIN,
+ SPLIT_MAPPINGS,
+ SHOW_END_TAGS,
+ SHOW_UNUSED_SETS,
+ SHOW_TAGS,
+ SHOW_TAG_HASHES,
+ SHOW_SET_HASHES,
+ DUMP_AST,
+ NUM_OPTIONS,
+};
- UOption options[]= {
- UOPTION_DEF_D("help", 'h', UOPT_NO_ARG, "shows this help"),
- UOPTION_DEF_D("?", '?', UOPT_NO_ARG, "shows this help"),
- UOPTION_DEF_D("version", 'V', UOPT_NO_ARG, "prints copyright and version information"),
- UOPTION_DEF_D("min-binary-revision", 0, UOPT_NO_ARG, "prints the minimum usable binary grammar revision"),
- UOPTION_DEF_D("grammar", 'g', UOPT_REQUIRES_ARG, "specifies the grammar file to use for disambiguation"),
- UOPTION_DEF_D("grammar-out", 0, UOPT_REQUIRES_ARG, "writes the compiled grammar in textual form to a file"),
- UOPTION_DEF_D("grammar-bin", 0, UOPT_REQUIRES_ARG, "writes the compiled grammar in binary form to a file"),
- UOPTION_DEF_D("grammar-only", 0, UOPT_NO_ARG, "only compiles the grammar; implies --verbose"),
- UOPTION_DEF_D("ordered", 0, UOPT_NO_ARG, "(will in future allow full ordered matching)"),
- UOPTION_DEF_D("unsafe", 'u', UOPT_NO_ARG, "allows the removal of all readings in a cohort, even the last one"),
- UOPTION_DEF_D("sections", 's', UOPT_REQUIRES_ARG, "number or ranges of sections to run; defaults to all sections"),
- UOPTION_DEF_D("rules", 0, UOPT_REQUIRES_ARG, "number or ranges of rules to run; defaults to all rules"),
- UOPTION_DEF_D("rule", 0, UOPT_REQUIRES_ARG, "a name or number of a single rule to run"),
- UOPTION_DEF_D("debug", 'd', UOPT_OPTIONAL_ARG, "enables debug output (very noisy)"),
- UOPTION_DEF_D("verbose", 'v', UOPT_OPTIONAL_ARG, "increases verbosity"),
- UOPTION_DEF_D("quiet", 0, UOPT_NO_ARG, "squelches warnings (same as -v 0)"),
- UOPTION_DEF_D("vislcg-compat", '2', UOPT_NO_ARG, "enables compatibility mode for older CG-2 and vislcg grammars"),
+UOption options[] = {
+ UOPTION_DEF_D("help", 'h', UOPT_NO_ARG, "shows this help"),
+ UOPTION_DEF_D("?", '?', UOPT_NO_ARG, "shows this help"),
+ UOPTION_DEF_D("version", 'V', UOPT_NO_ARG, "prints copyright and version information"),
+ UOPTION_DEF_D("min-binary-revision", 0, UOPT_NO_ARG, "prints the minimum usable binary grammar revision"),
+ UOPTION_DEF_D("grammar", 'g', UOPT_REQUIRES_ARG, "specifies the grammar file to use for disambiguation"),
+ UOPTION_DEF_D("grammar-out", 0, UOPT_REQUIRES_ARG, "writes the compiled grammar in textual form to a file"),
+ UOPTION_DEF_D("grammar-bin", 0, UOPT_REQUIRES_ARG, "writes the compiled grammar in binary form to a file"),
+ UOPTION_DEF_D("grammar-only", 0, UOPT_NO_ARG, "only compiles the grammar; implies --verbose"),
+ UOPTION_DEF_D("ordered", 0, UOPT_NO_ARG, "(will in future allow full ordered matching)"),
+ UOPTION_DEF_D("unsafe", 'u', UOPT_NO_ARG, "allows the removal of all readings in a cohort, even the last one"),
+ UOPTION_DEF_D("sections", 's', UOPT_REQUIRES_ARG, "number or ranges of sections to run; defaults to all sections"),
+ UOPTION_DEF_D("rules", 0, UOPT_REQUIRES_ARG, "number or ranges of rules to run; defaults to all rules"),
+ UOPTION_DEF_D("rule", 0, UOPT_REQUIRES_ARG, "a name or number of a single rule to run"),
+ UOPTION_DEF_D("debug", 'd', UOPT_OPTIONAL_ARG, "enables debug output (very noisy)"),
+ UOPTION_DEF_D("verbose", 'v', UOPT_OPTIONAL_ARG, "increases verbosity"),
+ UOPTION_DEF_D("quiet", 0, UOPT_NO_ARG, "squelches warnings (same as -v 0)"),
+ UOPTION_DEF_D("vislcg-compat", '2', UOPT_NO_ARG, "enables compatibility mode for older CG-2 and vislcg grammars"),
- UOPTION_DEF_D("stdin", 'I', UOPT_REQUIRES_ARG, "file to read input from instead of stdin"),
- UOPTION_DEF_D("stdout", 'O', UOPT_REQUIRES_ARG, "file to print output to instead of stdout"),
- UOPTION_DEF_D("stderr", 'E', UOPT_REQUIRES_ARG, "file to print errors to instead of stderr"),
+ UOPTION_DEF_D("stdin", 'I', UOPT_REQUIRES_ARG, "file to read input from instead of stdin"),
+ UOPTION_DEF_D("stdout", 'O', UOPT_REQUIRES_ARG, "file to print output to instead of stdout"),
+ UOPTION_DEF_D("stderr", 'E', UOPT_REQUIRES_ARG, "file to print errors to instead of stderr"),
- UOPTION_DEF_D("codepage-all", 'C', UOPT_REQUIRES_ARG, "codepage to use for grammar, input, and output streams; defaults to UTF-8"),
- UOPTION_DEF_D("codepage-grammar", 0, UOPT_REQUIRES_ARG, "codepage to use for grammar; overrides --codepage-all"),
- UOPTION_DEF_D("codepage-input", 0, UOPT_REQUIRES_ARG, "codepage to use for input; overrides --codepage-all"),
- UOPTION_DEF_D("codepage-output", 0, UOPT_REQUIRES_ARG, "codepage to use for output and errors; overrides --codepage-all"),
+ UOPTION_DEF_D("codepage-all", 'C', UOPT_REQUIRES_ARG, "codepage to use for grammar, input, and output streams; defaults to UTF-8"),
+ UOPTION_DEF_D("codepage-grammar", 0, UOPT_REQUIRES_ARG, "codepage to use for grammar; overrides --codepage-all"),
+ UOPTION_DEF_D("codepage-input", 0, UOPT_REQUIRES_ARG, "codepage to use for input; overrides --codepage-all"),
+ UOPTION_DEF_D("codepage-output", 0, UOPT_REQUIRES_ARG, "codepage to use for output and errors; overrides --codepage-all"),
- UOPTION_DEF_D("no-mappings", 0, UOPT_NO_ARG, "disables all MAP, ADD, and REPLACE rules"),
- UOPTION_DEF_D("no-corrections", 0, UOPT_NO_ARG, "disables all SUBSTITUTE and APPEND rules"),
- UOPTION_DEF_D("no-before-sections", 0, UOPT_NO_ARG, "disables all rules in BEFORE-SECTIONS parts"),
- UOPTION_DEF_D("no-sections", 0, UOPT_NO_ARG, "disables all rules in SECTION parts"),
- UOPTION_DEF_D("no-after-sections", 0, UOPT_NO_ARG, "disables all rules in AFTER-SECTIONS parts"),
+ UOPTION_DEF_D("no-mappings", 0, UOPT_NO_ARG, "disables all MAP, ADD, and REPLACE rules"),
+ UOPTION_DEF_D("no-corrections", 0, UOPT_NO_ARG, "disables all SUBSTITUTE and APPEND rules"),
+ UOPTION_DEF_D("no-before-sections", 0, UOPT_NO_ARG, "disables all rules in BEFORE-SECTIONS parts"),
+ UOPTION_DEF_D("no-sections", 0, UOPT_NO_ARG, "disables all rules in SECTION parts"),
+ UOPTION_DEF_D("no-after-sections", 0, UOPT_NO_ARG, "disables all rules in AFTER-SECTIONS parts"),
- UOPTION_DEF_D("trace", 't', UOPT_NO_ARG, "prints debug output alongside with normal output"),
- UOPTION_DEF_D("trace-name-only", 0, UOPT_NO_ARG, "if a rule is named, omit the line number; implies --trace"),
- UOPTION_DEF_D("trace-no-removed", 0, UOPT_NO_ARG, "does not print removed readings; implies --trace"),
- UOPTION_DEF_D("trace-encl", 0, UOPT_NO_ARG, "traces which enclosure pass is currently happening; implies --trace"),
+ UOPTION_DEF_D("trace", 't', UOPT_NO_ARG, "prints debug output alongside with normal output"),
+ UOPTION_DEF_D("trace-name-only", 0, UOPT_NO_ARG, "if a rule is named, omit the line number; implies --trace"),
+ UOPTION_DEF_D("trace-no-removed", 0, UOPT_NO_ARG, "does not print removed readings; implies --trace"),
+ UOPTION_DEF_D("trace-encl", 0, UOPT_NO_ARG, "traces which enclosure pass is currently happening; implies --trace"),
- UOPTION_DEF_D("dry-run", 0, UOPT_NO_ARG, "make no actual changes to the input"),
- UOPTION_DEF_D("single-run", 0, UOPT_NO_ARG, "runs each section only once; same as --max-runs 1"),
- UOPTION_DEF_D("max-runs", 0, UOPT_REQUIRES_ARG, "runs each section max N times; defaults to unlimited (0)"),
- UOPTION_DEF_D("statistics", 'S', UOPT_NO_ARG, "gathers profiling statistics while applying grammar"),
- UOPTION_DEF_D("optimize-unsafe", 'Z', UOPT_NO_ARG, "destructively optimize the profiled grammar to be faster"),
- UOPTION_DEF_D("optimize-safe", 'z', UOPT_NO_ARG, "conservatively optimize the profiled grammar to be faster"),
- UOPTION_DEF_D("prefix", 'p', UOPT_REQUIRES_ARG, "sets the mapping prefix; defaults to @"),
- UOPTION_DEF_D("unicode-tags", 0, UOPT_NO_ARG, "outputs Unicode code points for things like ->"),
- UOPTION_DEF_D("unique-tags", 0, UOPT_NO_ARG, "outputs unique tags only once per reading"),
+ UOPTION_DEF_D("dry-run", 0, UOPT_NO_ARG, "make no actual changes to the input"),
+ UOPTION_DEF_D("single-run", 0, UOPT_NO_ARG, "runs each section only once; same as --max-runs 1"),
+ UOPTION_DEF_D("max-runs", 0, UOPT_REQUIRES_ARG, "runs each section max N times; defaults to unlimited (0)"),
+ UOPTION_DEF_D("statistics", 'S', UOPT_NO_ARG, "gathers profiling statistics while applying grammar"),
+ UOPTION_DEF_D("optimize-unsafe", 'Z', UOPT_NO_ARG, "destructively optimize the profiled grammar to be faster"),
+ UOPTION_DEF_D("optimize-safe", 'z', UOPT_NO_ARG, "conservatively optimize the profiled grammar to be faster"),
+ UOPTION_DEF_D("prefix", 'p', UOPT_REQUIRES_ARG, "sets the mapping prefix; defaults to @"),
+ UOPTION_DEF_D("unicode-tags", 0, UOPT_NO_ARG, "outputs Unicode code points for things like ->"),
+ UOPTION_DEF_D("unique-tags", 0, UOPT_NO_ARG, "outputs unique tags only once per reading"),
- UOPTION_DEF_D("num-windows", 0, UOPT_REQUIRES_ARG, "number of windows to keep in before/ahead buffers; defaults to 2"),
- UOPTION_DEF_D("always-span", 0, UOPT_NO_ARG, "forces scanning tests to always span across window boundaries"),
- UOPTION_DEF_D("soft-limit", 0, UOPT_REQUIRES_ARG, "number of cohorts after which the SOFT-DELIMITERS kick in; defaults to 300"),
- UOPTION_DEF_D("hard-limit", 0, UOPT_REQUIRES_ARG, "number of cohorts after which the window is forcefully cut; defaults to 500"),
- UOPTION_DEF_D("dep-delimit", 'D', UOPT_OPTIONAL_ARG, "delimit windows based on dependency instead of DELIMITERS; defaults to 10"),
- UOPTION_DEF_D("dep-original", 0, UOPT_NO_ARG, "outputs the original input dependency tag even if it is no longer valid"),
- UOPTION_DEF_D("dep-allow-loops", 0, UOPT_NO_ARG, "allows the creation of circular dependencies"),
- UOPTION_DEF_D("dep-no-crossing", 0, UOPT_NO_ARG, "prevents the creation of dependencies that would result in crossing branches"),
+ UOPTION_DEF_D("num-windows", 0, UOPT_REQUIRES_ARG, "number of windows to keep in before/ahead buffers; defaults to 2"),
+ UOPTION_DEF_D("always-span", 0, UOPT_NO_ARG, "forces scanning tests to always span across window boundaries"),
+ UOPTION_DEF_D("soft-limit", 0, UOPT_REQUIRES_ARG, "number of cohorts after which the SOFT-DELIMITERS kick in; defaults to 300"),
+ UOPTION_DEF_D("hard-limit", 0, UOPT_REQUIRES_ARG, "number of cohorts after which the window is forcefully cut; defaults to 500"),
+ UOPTION_DEF_D("dep-delimit", 'D', UOPT_OPTIONAL_ARG, "delimit windows based on dependency instead of DELIMITERS; defaults to 10"),
+ UOPTION_DEF_D("dep-original", 0, UOPT_NO_ARG, "outputs the original input dependency tag even if it is no longer valid"),
+ UOPTION_DEF_D("dep-allow-loops", 0, UOPT_NO_ARG, "allows the creation of circular dependencies"),
+ UOPTION_DEF_D("dep-no-crossing", 0, UOPT_NO_ARG, "prevents the creation of dependencies that would result in crossing branches"),
- UOPTION_DEF_D("no-magic-readings", 0, UOPT_NO_ARG, "prevents running rules on magic readings"),
- UOPTION_DEF_D("no-pass-origin", 'o', UOPT_NO_ARG, "prevents scanning tests from passing the point of origin"),
- UOPTION_DEF_D("split-mappings", 0, UOPT_NO_ARG, "keep mapped readings separate in output"),
- UOPTION_DEF_D("show-end-tags", 'e', UOPT_NO_ARG, "allows the <<< tags to appear in output"),
- UOPTION_DEF_D("show-unused-sets", 0, UOPT_NO_ARG, "prints a list of unused sets and their line numbers; implies --grammar-only"),
- UOPTION_DEF_D("show-tags", 0, UOPT_NO_ARG, "prints a list of unique tags; implies --grammar-only"),
- UOPTION_DEF_D("show-tag-hashes", 0, UOPT_NO_ARG, "prints a list of tags and their hashes as they are parsed during the run"),
- UOPTION_DEF_D("show-set-hashes", 0, UOPT_NO_ARG, "prints a list of sets and their hashes; implies --grammar-only")
- };
+ UOPTION_DEF_D("no-magic-readings", 0, UOPT_NO_ARG, "prevents running rules on magic readings"),
+ UOPTION_DEF_D("no-pass-origin", 'o', UOPT_NO_ARG, "prevents scanning tests from passing the point of origin"),
+ UOPTION_DEF_D("split-mappings", 0, UOPT_NO_ARG, "keep mapped readings separate in output"),
+ UOPTION_DEF_D("show-end-tags", 'e', UOPT_NO_ARG, "allows the <<< tags to appear in output"),
+ UOPTION_DEF_D("show-unused-sets", 0, UOPT_NO_ARG, "prints a list of unused sets and their line numbers; implies --grammar-only"),
+ UOPTION_DEF_D("show-tags", 0, UOPT_NO_ARG, "prints a list of unique tags; implies --grammar-only"),
+ UOPTION_DEF_D("show-tag-hashes", 0, UOPT_NO_ARG, "prints a list of tags and their hashes as they are parsed during the run"),
+ UOPTION_DEF_D("show-set-hashes", 0, UOPT_NO_ARG, "prints a list of sets and their hashes; implies --grammar-only"),
+ UOPTION_DEF_D("dump-ast", 0, UOPT_NO_ARG, "prints the grammar parse tree; implies --grammar-only"),
+};
}
#endif
diff --git a/src/options_conv.hpp b/src/options_conv.hpp
index 4d9d61f..6267835 100644
--- a/src/options_conv.hpp
+++ b/src/options_conv.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -26,52 +26,54 @@
#include <uoptions.h>
namespace Options {
- enum OPTIONS {
- HELP1,
- HELP2,
- MAPPING_PREFIX,
- IN_AUTO,
- IN_CG,
- IN_CG2,
- IN_NICELINE,
- IN_APERTIUM,
- IN_FST,
- IN_PLAIN,
- OUT_CG,
- OUT_CG2,
- OUT_APERTIUM,
- OUT_NICELINE,
- OUT_PLAIN,
- FST_WFACTOR,
- FST_WTAG,
- SUB_DELIMITER,
- SUB_RTL,
- SUB_LTR,
- NUM_OPTIONS
- };
+enum OPTIONS {
+ HELP1,
+ HELP2,
+ MAPPING_PREFIX,
+ IN_AUTO,
+ IN_CG,
+ IN_CG2,
+ IN_NICELINE,
+ IN_APERTIUM,
+ IN_FST,
+ IN_PLAIN,
+ OUT_CG,
+ OUT_CG2,
+ OUT_APERTIUM,
+ OUT_MATXIN,
+ OUT_NICELINE,
+ OUT_PLAIN,
+ FST_WFACTOR,
+ FST_WTAG,
+ SUB_DELIMITER,
+ SUB_RTL,
+ SUB_LTR,
+ NUM_OPTIONS,
+};
- UOption options[]= {
- UOPTION_DEF_D("help", 'h', UOPT_NO_ARG, "shows this help"),
- UOPTION_DEF_D("?", '?', UOPT_NO_ARG, "shows this help"),
- UOPTION_DEF_D("prefix", 'p', UOPT_REQUIRES_ARG, "sets the mapping prefix; defaults to @"),
- UOPTION_DEF_D("in-auto", 'u', UOPT_NO_ARG, "auto-detect input format (default)"),
- UOPTION_DEF_D("in-cg", 'c', UOPT_NO_ARG, "sets input format to CG"),
- UOPTION_DEF_D("v", 'v', UOPT_NO_ARG, "!"),
- UOPTION_DEF_D("in-niceline", 'n', UOPT_NO_ARG, "sets input format to Niceline CG"),
- UOPTION_DEF_D("in-apertium", 'a', UOPT_NO_ARG, "sets input format to Apertium"),
- UOPTION_DEF_D("in-fst", 'f', UOPT_NO_ARG, "sets input format to HFST/XFST"),
- UOPTION_DEF_D("in-plain", 'p', UOPT_NO_ARG, "sets input format to plain text"),
- UOPTION_DEF_D("out-cg", 'C', UOPT_NO_ARG, "sets output format to CG (default)"),
- UOPTION_DEF_D("V", 'V', UOPT_NO_ARG, "!"),
- UOPTION_DEF_D("out-apertium", 'A', UOPT_NO_ARG, "sets output format to Apertium"),
- UOPTION_DEF_D("out-niceline", 'N', UOPT_NO_ARG, "sets output format to Niceline CG"),
- UOPTION_DEF_D("out-plain", 'P', UOPT_NO_ARG, "sets output format to plain text"),
- UOPTION_DEF_D("wfactor", 'W', UOPT_REQUIRES_ARG, "FST weight factor (defaults to 100.0)"),
- UOPTION_DEF_D("wtag", 0, UOPT_REQUIRES_ARG, "FST weight tag prefix (defaults to W)"),
- UOPTION_DEF_D("sub-delim", 'S', UOPT_REQUIRES_ARG, "FST sub-reading delimiters (defaults to #)"),
- UOPTION_DEF_D("rtl", 'r', UOPT_NO_ARG, "sets sub-reading direction to RTL (default)"),
- UOPTION_DEF_D("ltr", 'l', UOPT_NO_ARG, "sets sub-reading direction to LTR")
- };
+UOption options[] = {
+ UOPTION_DEF_D("help", 'h', UOPT_NO_ARG, "shows this help"),
+ UOPTION_DEF_D("?", '?', UOPT_NO_ARG, "shows this help"),
+ UOPTION_DEF_D("prefix", 'p', UOPT_REQUIRES_ARG, "sets the mapping prefix; defaults to @"),
+ UOPTION_DEF_D("in-auto", 'u', UOPT_NO_ARG, "auto-detect input format (default)"),
+ UOPTION_DEF_D("in-cg", 'c', UOPT_NO_ARG, "sets input format to CG"),
+ UOPTION_DEF_D("v", 'v', UOPT_NO_ARG, "!"),
+ UOPTION_DEF_D("in-niceline", 'n', UOPT_NO_ARG, "sets input format to Niceline CG"),
+ UOPTION_DEF_D("in-apertium", 'a', UOPT_NO_ARG, "sets input format to Apertium"),
+ UOPTION_DEF_D("in-fst", 'f', UOPT_NO_ARG, "sets input format to HFST/XFST"),
+ UOPTION_DEF_D("in-plain", 'p', UOPT_NO_ARG, "sets input format to plain text"),
+ UOPTION_DEF_D("out-cg", 'C', UOPT_NO_ARG, "sets output format to CG (default)"),
+ UOPTION_DEF_D("V", 'V', UOPT_NO_ARG, "!"),
+ UOPTION_DEF_D("out-apertium", 'A', UOPT_NO_ARG, "sets output format to Apertium"),
+ UOPTION_DEF_D("out-matxin", 'M', UOPT_NO_ARG, "sets output format to Matxin"),
+ UOPTION_DEF_D("out-niceline", 'N', UOPT_NO_ARG, "sets output format to Niceline CG"),
+ UOPTION_DEF_D("out-plain", 'P', UOPT_NO_ARG, "sets output format to plain text"),
+ UOPTION_DEF_D("wfactor", 'W', UOPT_REQUIRES_ARG, "FST weight factor (defaults to 100.0)"),
+ UOPTION_DEF_D("wtag", 0, UOPT_REQUIRES_ARG, "FST weight tag prefix (defaults to W)"),
+ UOPTION_DEF_D("sub-delim", 'S', UOPT_REQUIRES_ARG, "FST sub-reading delimiters (defaults to #)"),
+ UOPTION_DEF_D("rtl", 'r', UOPT_NO_ARG, "sets sub-reading direction to RTL (default)"),
+ UOPTION_DEF_D("ltr", 'l', UOPT_NO_ARG, "sets sub-reading direction to LTR"),
+};
}
#endif
diff --git a/src/parser_helpers.hpp b/src/parser_helpers.hpp
index 8d7e620..318d5a9 100644
--- a/src/parser_helpers.hpp
+++ b/src/parser_helpers.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -52,7 +52,7 @@ Tag *parseTag(const UChar *to, const UChar *p, State& state) {
Tag *tag = state.get_grammar()->allocateTag();
tag->type = 0;
- if (to && to[0]) {
+ if (to[0]) {
const UChar *tmp = to;
while (tmp[0] && (tmp[0] == '!' || tmp[0] == '^')) {
if (tmp[0] == '!' || tmp[0] == '^') {
@@ -157,7 +157,7 @@ Tag *parseTag(const UChar *to, const UChar *p, State& state) {
state.error("%s: Error: Parsing tag %S resulted in an empty tag on line %u near `%S` - cannot continue!\n", tag->tag.c_str(), p);
}
- foreach(Grammar::regex_tags_t, state.get_grammar()->regex_tags, iter, iter_end) {
+ foreach (iter, state.get_grammar()->regex_tags) {
UErrorCode status = U_ZERO_ERROR;
uregex_setText(*iter, tag->tag.c_str(), tag->tag.length(), &status);
if (status != U_ZERO_ERROR) {
@@ -168,7 +168,7 @@ Tag *parseTag(const UChar *to, const UChar *p, State& state) {
tag->type |= T_TEXTUAL;
}
}
- foreach(Grammar::icase_tags_t, state.get_grammar()->icase_tags, iter, iter_end) {
+ foreach (iter, state.get_grammar()->icase_tags) {
UErrorCode status = U_ZERO_ERROR;
if (u_strCaseCompare(tag->tag.c_str(), tag->tag.length(), (*iter)->tag.c_str(), (*iter)->tag.length(), U_FOLD_CASE_DEFAULT, &status) == 0) {
tag->type |= T_TEXTUAL;
@@ -210,9 +210,7 @@ Tag *parseTag(const UChar *to, const UChar *p, State& state) {
}
if (tag->type & T_REGEXP) {
- if (u_strcmp(tag->tag.c_str(), stringbits[S_RXTEXT_ANY].getTerminatedBuffer()) == 0
- || u_strcmp(tag->tag.c_str(), stringbits[S_RXBASE_ANY].getTerminatedBuffer()) == 0
- || u_strcmp(tag->tag.c_str(), stringbits[S_RXWORD_ANY].getTerminatedBuffer()) == 0) {
+ if (u_strcmp(tag->tag.c_str(), stringbits[S_RXTEXT_ANY].getTerminatedBuffer()) == 0 || u_strcmp(tag->tag.c_str(), stringbits[S_RXBASE_ANY].getTerminatedBuffer()) == 0 || u_strcmp(tag->tag.c_str(), stringbits[S_RXWORD_ANY].getTerminatedBuffer()) == 0) {
// ToDo: Add a case-insensitive version of T_REGEXP_ANY for unification
tag->type |= T_REGEXP_ANY;
tag->type &= ~T_REGEXP;
@@ -242,15 +240,14 @@ Tag *parseTag(const UChar *to, const UChar *p, State& state) {
}
}
}
- if (tag->type & (T_CASE_INSENSITIVE|T_REGEXP)) {
+ if (tag->type & (T_CASE_INSENSITIVE | T_REGEXP)) {
if (tag->tag[0] == '/' && tag->tag[length - 1] == '/') {
tag->tag.resize(tag->tag.size() - 1);
tag->tag.erase(tag->tag.begin());
}
}
- label_isVarstring:
- ;
+ label_isVarstring:;
}
tag->type &= ~T_SPECIAL;
@@ -274,9 +271,8 @@ Set *parseSet(const UChar *name, const UChar *p, State& state) {
}
if ((
- (name[0] == '$' && name[1] == '$')
- || (name[0] == '&' && name[1] == '&')
- ) && name[2]) {
+ (name[0] == '$' && name[1] == '$') || (name[0] == '&' && name[1] == '&')) &&
+ name[2]) {
const UChar *wname = &(name[2]);
uint32_t wrap = hash_value(wname);
Set *wtmp = state.get_grammar()->getSet(wrap);
@@ -318,7 +314,6 @@ Set *parseSet(const UChar *name, const UChar *p, State& state) {
}
return tmp;
}
-
}
#endif
diff --git a/src/process.hpp b/src/process.hpp
index a25c40d..0747637 100644
--- a/src/process.hpp
+++ b/src/process.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -160,8 +160,10 @@ public:
}
~Process() {
- popen_plus_kill(child);
- popen_plus_close(child);
+ if (child) {
+ popen_plus_kill(child);
+ popen_plus_close(child);
+ }
}
void start(const std::string& cmdline) {
diff --git a/src/GrammarWriter.hpp b/src/scoped_stack.hpp
similarity index 55%
copy from src/GrammarWriter.hpp
copy to src/scoped_stack.hpp
index 982052e..13a2b8c 100644
--- a/src/GrammarWriter.hpp
+++ b/src/scoped_stack.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -20,37 +20,59 @@
*/
#pragma once
-#ifndef c6d28b7452ec699b_GRAMMARWRITER_H
-#define c6d28b7452ec699b_GRAMMARWRITER_H
+#ifndef c6d28b7452ec699b_SCOPED_STACK_HPP
+#define c6d28b7452ec699b_SCOPED_STACK_HPP
+#include <vector>
-#include "stdafx.hpp"
-
namespace CG3 {
- class Grammar;
- class Tag;
- class Set;
- class Rule;
- class ContextualTest;
- class GrammarWriter {
- public:
- bool statistics;
-
- GrammarWriter(Grammar& res, UFILE *ux_err);
- ~GrammarWriter();
+template<typename C>
+struct scoped_stack {
+ struct proxy {
+ proxy(scoped_stack *ss)
+ : z(ss->z++)
+ , ss(ss)
+ {
+ if (ss->cs.size() < ss->z) {
+ ss->cs.resize(ss->z);
+ }
+ }
- int writeGrammar(UFILE *output);
+ ~proxy() {
+ ss->cs[z].clear();
+ --ss->z;
+ }
+
+ C *operator->() {
+ return &ss->cs[z];
+ }
+
+ C& operator*() {
+ return ss->cs[z];
+ }
+
+ operator C&() {
+ return ss->cs[z];
+ }
private:
- UFILE *ux_stderr;
- uint32FlatHashSet used_sets;
- const Grammar *grammar;
-
- void printTag(UFILE *out, const Tag& tag);
- void printSet(UFILE *output, const Set& curset);
- void printRule(UFILE *to, const Rule& rule);
- void printContextualTest(UFILE *to, const ContextualTest& test);
+ size_t z;
+ scoped_stack *ss;
};
+
+ scoped_stack()
+ : z(0)
+ {}
+
+ proxy get() {
+ return proxy(this);
+ }
+
+private:
+ friend struct proxy;
+ size_t z;
+ std::vector<C> cs;
+};
}
#endif
diff --git a/src/sorted_vector.hpp b/src/sorted_vector.hpp
index 4094384..9623ba6 100644
--- a/src/sorted_vector.hpp
+++ b/src/sorted_vector.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -29,20 +29,20 @@
#include <stdint.h> // C99 or C++0x or C++ TR1 will have this header. ToDo: Change to <cstdint> when C++0x broader support gets under way.
namespace CG3 {
- namespace detail {
- template<typename ForwardIt, typename Comp>
- bool is_sorted(ForwardIt first, ForwardIt last, Comp comp) {
- if (first != last) {
- ForwardIt next = first;
- while (++next != last) {
- if (comp(*next, *first))
- return false;
- first = next;
- }
+namespace detail {
+ template<typename ForwardIt, typename Comp>
+ bool is_sorted(ForwardIt first, ForwardIt last, Comp comp) {
+ if (first != last) {
+ ForwardIt next = first;
+ while (++next != last) {
+ if (comp(*next, *first))
+ return false;
+ first = next;
}
- return true;
}
+ return true;
}
+}
template<typename T, typename Comp = std::less<T> >
class sorted_vector {
@@ -65,7 +65,7 @@ public:
}
#endif
- std::pair<iterator,bool> insert(T t) {
+ std::pair<iterator, bool> insert(T t) {
if (elements.empty()) {
elements.push_back(t);
return std::make_pair(elements.begin(), true);
@@ -84,7 +84,7 @@ public:
size_t at = std::distance(elements.begin(), it);
if (it == elements.end() || comp(*it, t) || comp(t, *it)) {
elements.insert(it, t);
- return std::make_pair(elements.begin()+at, true);
+ return std::make_pair(elements.begin() + at, true);
}
return std::make_pair(elements.begin() + at, false);
}
@@ -238,6 +238,10 @@ public:
elements.clear();
}
+ void sort() {
+ std::sort(elements.begin(), elements.end(), Comp());
+ }
+
container& get() {
return elements;
}
@@ -248,7 +252,6 @@ private:
};
typedef sorted_vector<uint32_t> uint32SortedVector;
-
}
#endif
diff --git a/src/stdafx.hpp b/src/stdafx.hpp
index 5f300a5..650f276 100644
--- a/src/stdafx.hpp
+++ b/src/stdafx.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -24,16 +24,12 @@
#define c6d28b7452ec699b_STDAFX_H
#ifdef _MSC_VER
- // warning C4258: definition from the for loop is ignored; the definition from the enclosing scope is used
- #pragma warning (disable: 4258)
- #pragma conform(forScope, on)
- // warning C4428: universal-character-name encountered in source
- #pragma warning (disable: 4428)
// warning C4512: assignment operator could not be generated
#pragma warning (disable: 4512)
- // warning C4480: nonstandard extension used: specifying underlying type for enum
- // 'cause that is actually standard in C++11
- #pragma warning (disable: 4480)
+ // warning C4456: declaration hides previous local declaration
+ #pragma warning (disable: 4456)
+ // warning C4458: declaration hides class member
+ #pragma warning (disable: 4458)
#endif
#include <exception>
@@ -83,13 +79,21 @@
#include <boost/container/flat_map.hpp>
#include <boost/dynamic_bitset.hpp>
#include <boost/scoped_ptr.hpp>
+#include <boost/shared_ptr.hpp>
#include <boost/typeof/typeof.hpp>
#include <boost/foreach.hpp>
#define boost_foreach BOOST_FOREACH
-#define boost_reverse_foreach BOOST_REVERSE_FOREACH
#define stdext boost
#define hash_map unordered_map
+#define foreach(iter, container) \
+ if (!(container).empty()) \
+ for (BOOST_AUTO(iter, (container).begin()), iter##_end = (container).end(); iter != iter##_end; ++iter)
+
+#define reverse_foreach(iter, container) \
+ if (!(container).empty()) \
+ for (BOOST_AUTO(iter, (container).rbegin()), iter##_end = (container).rend(); iter != iter##_end; ++iter)
+
#ifdef _WIN32
#include <winsock.h> // for hton() and family.
#else
@@ -113,13 +117,12 @@
#include <unicode/ubrk.h>
namespace CG3 {
- typedef std::basic_string<UChar> UString;
- typedef std::vector<UString> UStringVector;
- typedef std::vector<uint32_t> uint32Vector;
- namespace bc = ::boost::container;
+typedef std::basic_string<UChar> UString;
+typedef std::vector<UString> UStringVector;
+typedef std::vector<uint32_t> uint32Vector;
+namespace bc = ::boost::container;
}
-#include "macros.hpp"
#include "inlines.hpp"
#include "uextras.hpp"
#include "flat_unordered_map.hpp"
diff --git a/src/test_libcg3.c b/src/test_libcg3.c
index 2b9b569..66c69ee 100644
--- a/src/test_libcg3.c
+++ b/src/test_libcg3.c
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -83,21 +83,21 @@ int main(int argc, char *argv[]) {
cg3_sentence_runrules(applicator, sentence);
- for (ci = 0, ce = cg3_sentence_numcohorts(sentence) ; ci != ce ; ++ci) {
+ for (ci = 0, ce = cg3_sentence_numcohorts(sentence); ci != ce; ++ci) {
cohort = cg3_sentence_getcohort(sentence, ci);
tag = cg3_cohort_getwordform(cohort);
tmp = cg3_tag_gettext_u8(tag);
fprintf(stdout, "%s\n", tmp);
- for (ri = 0, re = cg3_cohort_numreadings(cohort) ; ri != re ; ++ri) {
+ for (ri = 0, re = cg3_cohort_numreadings(cohort); ri != re; ++ri) {
reading = cg3_cohort_getreading(cohort, ri);
fprintf(stdout, "\t");
- for (ti = 0, te = cg3_reading_numtags(reading) ; ti != te ; ++ti) {
+ for (ti = 0, te = cg3_reading_numtags(reading); ti != te; ++ti) {
tag = cg3_reading_gettag(reading, ti);
tmp = cg3_tag_gettext_u8(tag);
fprintf(stdout, "%s ", tmp);
}
- for (ti = 0, te = cg3_reading_numtraces(reading) ; ti != te ; ++ti) {
+ for (ti = 0, te = cg3_reading_numtraces(reading); ti != te; ++ti) {
uint32_t rule_line = cg3_reading_gettrace(reading, ti);
fprintf(stdout, "TRACE:%u ", rule_line);
}
diff --git a/src/uextras.cpp b/src/uextras.cpp
index 768320c..b3978c8 100644
--- a/src/uextras.cpp
+++ b/src/uextras.cpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -29,7 +29,7 @@
namespace CG3 {
std::string ux_dirname(const char *in) {
- char tmp[32768] = {0};
+ char tmp[32768] = { 0 };
#ifdef _WIN32
char *fname = 0;
GetFullPathNameA(in, 32767, tmp, &fname);
@@ -44,11 +44,10 @@ std::string ux_dirname(const char *in) {
}
#endif
size_t tlen = strlen(tmp);
- if (tmp[tlen-1] != '/' && tmp[tlen-1] != '\\') {
- tmp[tlen+1] = 0;
+ if (tmp[tlen - 1] != '/' && tmp[tlen - 1] != '\\') {
+ tmp[tlen + 1] = 0;
tmp[tlen] = '/';
}
return tmp;
}
-
}
diff --git a/src/uextras.hpp b/src/uextras.hpp
index 1b7fcef..b9f6e01 100644
--- a/src/uextras.hpp
+++ b/src/uextras.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -96,7 +96,7 @@ inline int ux_isSetOp(const UChar *it) {
inline bool ux_isEmpty(const UChar *text) {
size_t length = u_strlen(text);
if (length > 0) {
- for (size_t i=0 ; i<length ; i++) {
+ for (size_t i = 0; i < length; i++) {
if (!ISSPACE(text[i])) {
return false;
}
@@ -106,8 +106,8 @@ inline bool ux_isEmpty(const UChar *text) {
}
inline bool ux_simplecasecmp(const UChar *a, const UChar *b, const size_t n) {
- for (size_t i = 0 ; i < n ; ++i) {
- if (a[i] != b[i] && a[i] != b[i]+32) {
+ for (size_t i = 0; i < n; ++i) {
+ if (a[i] != b[i] && a[i] != b[i] + 32) {
return false;
}
}
@@ -123,30 +123,33 @@ struct substr_t {
size_t offset, count;
value_type old_value;
- substr_t(const Str& str, size_t offset=0, size_t count=Str::npos) :
- str(str), offset(offset), count(count), old_value(0)
+ substr_t(const Str& str, size_t offset = 0, size_t count = Str::npos)
+ : str(str)
+ , offset(offset)
+ , count(count)
+ , old_value(0)
{
if (count != Str::npos) {
- old_value = str[offset+count];
+ old_value = str[offset + count];
}
}
~substr_t() {
if (count != Str::npos) {
- value_type *buf = const_cast<value_type*>(str.c_str()+offset);
+ value_type *buf = const_cast<value_type*>(str.c_str() + offset);
buf[count] = old_value;
}
}
const value_type *c_str() const {
- value_type *buf = const_cast<value_type*>(str.c_str()+offset);
+ value_type *buf = const_cast<value_type*>(str.c_str() + offset);
buf[count] = 0;
return buf;
}
};
template<typename Str>
-inline substr_t<Str> substr(const Str& str, size_t offset=0, size_t count=0) {
+inline substr_t<Str> substr(const Str& str, size_t offset = 0, size_t count = 0) {
return substr_t<Str>(str, offset, count);
}
@@ -163,7 +166,6 @@ inline UChar *ux_bufcpy(UChar *dst, const UChar *src, size_t n) {
}
std::string ux_dirname(const char *in);
-
}
#endif
diff --git a/src/version.hpp b/src/version.hpp
index 25ec59f..650d88b 100644
--- a/src/version.hpp
+++ b/src/version.hpp
@@ -1,5 +1,5 @@
/*
-* Copyright (C) 2007-2015, GrammarSoft ApS
+* Copyright (C) 2007-2016, GrammarSoft ApS
* Developed by Tino Didriksen <mail at tinodidriksen.com>
* Design by Eckhard Bick <eckhard.bick at mail.dk>, Tino Didriksen <mail at tinodidriksen.com>
*
@@ -25,12 +25,12 @@
#include <stdint.h>
-const char* const CG3_COPYRIGHT_STRING = "Copyright (C) 2007-2015 GrammarSoft ApS. Licensed under GPLv3+";
+const char *const CG3_COPYRIGHT_STRING = "Copyright (C) 2007-2016 GrammarSoft ApS. Licensed under GPLv3+";
const uint32_t CG3_VERSION_MAJOR = 0;
const uint32_t CG3_VERSION_MINOR = 9;
const uint32_t CG3_VERSION_PATCH = 9;
-const uint32_t CG3_REVISION = 10824;
+const uint32_t CG3_REVISION = 11621;
const uint32_t CG3_FEATURE_REV = 10575;
const uint32_t CG3_TOO_OLD = 10373;
const uint32_t CG3_EXTERNAL_PROTOCOL = 7226;
diff --git a/test/Apertium/T_BasicAppend/expected.txt b/test/Apertium/T_Append/expected.txt
similarity index 100%
rename from test/Apertium/T_BasicAppend/expected.txt
rename to test/Apertium/T_Append/expected.txt
diff --git a/test/Apertium/T_BasicAppend/grammar.cg3 b/test/Apertium/T_Append/grammar.cg3
similarity index 100%
rename from test/Apertium/T_BasicAppend/grammar.cg3
rename to test/Apertium/T_Append/grammar.cg3
diff --git a/test/Apertium/T_BasicAppend/input.txt b/test/Apertium/T_Append/input.txt
similarity index 100%
rename from test/Apertium/T_BasicAppend/input.txt
rename to test/Apertium/T_Append/input.txt
diff --git a/test/Apertium/T_BasicAppend/run.pl b/test/Apertium/T_Append/run.pl
similarity index 100%
rename from test/Apertium/T_BasicAppend/run.pl
rename to test/Apertium/T_Append/run.pl
diff --git a/test/Apertium/T_BasicContextTest/expected.txt b/test/Apertium/T_ContextTest/expected.txt
similarity index 100%
rename from test/Apertium/T_BasicContextTest/expected.txt
rename to test/Apertium/T_ContextTest/expected.txt
diff --git a/test/Apertium/T_BasicContextTest/grammar.cg3 b/test/Apertium/T_ContextTest/grammar.cg3
similarity index 100%
rename from test/Apertium/T_BasicContextTest/grammar.cg3
rename to test/Apertium/T_ContextTest/grammar.cg3
diff --git a/test/Apertium/T_BasicContextTest/input.txt b/test/Apertium/T_ContextTest/input.txt
similarity index 100%
rename from test/Apertium/T_BasicContextTest/input.txt
rename to test/Apertium/T_ContextTest/input.txt
diff --git a/test/Apertium/T_BasicContextTest/run.pl b/test/Apertium/T_ContextTest/run.pl
similarity index 100%
rename from test/Apertium/T_BasicContextTest/run.pl
rename to test/Apertium/T_ContextTest/run.pl
diff --git a/test/Apertium/T_BasicDelimit/expected.txt b/test/Apertium/T_Delimit/expected.txt
similarity index 100%
rename from test/Apertium/T_BasicDelimit/expected.txt
rename to test/Apertium/T_Delimit/expected.txt
diff --git a/test/Apertium/T_BasicDelimit/grammar.cg3 b/test/Apertium/T_Delimit/grammar.cg3
similarity index 100%
rename from test/Apertium/T_BasicDelimit/grammar.cg3
rename to test/Apertium/T_Delimit/grammar.cg3
diff --git a/test/Apertium/T_BasicDelimit/input.txt b/test/Apertium/T_Delimit/input.txt
similarity index 100%
rename from test/Apertium/T_BasicDelimit/input.txt
rename to test/Apertium/T_Delimit/input.txt
diff --git a/test/Apertium/T_BasicDelimit/run.pl b/test/Apertium/T_Delimit/run.pl
similarity index 100%
rename from test/Apertium/T_BasicDelimit/run.pl
rename to test/Apertium/T_Delimit/run.pl
diff --git a/test/Apertium/T_BasicIff/expected.txt b/test/Apertium/T_Iff/expected.txt
similarity index 100%
rename from test/Apertium/T_BasicIff/expected.txt
rename to test/Apertium/T_Iff/expected.txt
diff --git a/test/Apertium/T_BasicIff/grammar.cg3 b/test/Apertium/T_Iff/grammar.cg3
similarity index 100%
rename from test/Apertium/T_BasicIff/grammar.cg3
rename to test/Apertium/T_Iff/grammar.cg3
diff --git a/test/Apertium/T_BasicIff/input.txt b/test/Apertium/T_Iff/input.txt
similarity index 100%
rename from test/Apertium/T_BasicIff/input.txt
rename to test/Apertium/T_Iff/input.txt
diff --git a/test/Apertium/T_BasicIff/run.pl b/test/Apertium/T_Iff/run.pl
similarity index 100%
rename from test/Apertium/T_BasicIff/run.pl
rename to test/Apertium/T_Iff/run.pl
diff --git a/test/Apertium/T_BasicRemove/expected.txt b/test/Apertium/T_Remove/expected.txt
similarity index 100%
rename from test/Apertium/T_BasicRemove/expected.txt
rename to test/Apertium/T_Remove/expected.txt
diff --git a/test/Apertium/T_BasicRemove/grammar.cg3 b/test/Apertium/T_Remove/grammar.cg3
similarity index 100%
rename from test/Apertium/T_BasicRemove/grammar.cg3
rename to test/Apertium/T_Remove/grammar.cg3
diff --git a/test/Apertium/T_BasicRemove/input.txt b/test/Apertium/T_Remove/input.txt
similarity index 100%
rename from test/Apertium/T_BasicRemove/input.txt
rename to test/Apertium/T_Remove/input.txt
diff --git a/test/Apertium/T_BasicRemove/run.pl b/test/Apertium/T_Remove/run.pl
similarity index 100%
rename from test/Apertium/T_BasicRemove/run.pl
rename to test/Apertium/T_Remove/run.pl
diff --git a/test/Apertium/T_BasicSelect/expected.txt b/test/Apertium/T_Select/expected.txt
similarity index 100%
rename from test/Apertium/T_BasicSelect/expected.txt
rename to test/Apertium/T_Select/expected.txt
diff --git a/test/Apertium/T_BasicSelect/grammar.cg3 b/test/Apertium/T_Select/grammar.cg3
similarity index 100%
rename from test/Apertium/T_BasicSelect/grammar.cg3
rename to test/Apertium/T_Select/grammar.cg3
diff --git a/test/Apertium/T_BasicSelect/input.txt b/test/Apertium/T_Select/input.txt
similarity index 100%
rename from test/Apertium/T_BasicSelect/input.txt
rename to test/Apertium/T_Select/input.txt
diff --git a/test/Apertium/T_BasicSelect/run.pl b/test/Apertium/T_Select/run.pl
similarity index 100%
rename from test/Apertium/T_BasicSelect/run.pl
rename to test/Apertium/T_Select/run.pl
diff --git a/test/Apertium/T_BasicSubstitute/expected.txt b/test/Apertium/T_Substitute/expected.txt
similarity index 100%
rename from test/Apertium/T_BasicSubstitute/expected.txt
rename to test/Apertium/T_Substitute/expected.txt
diff --git a/test/Apertium/T_BasicSubstitute/grammar.cg3 b/test/Apertium/T_Substitute/grammar.cg3
similarity index 100%
rename from test/Apertium/T_BasicSubstitute/grammar.cg3
rename to test/Apertium/T_Substitute/grammar.cg3
diff --git a/test/Apertium/T_BasicSubstitute/input.txt b/test/Apertium/T_Substitute/input.txt
similarity index 100%
rename from test/Apertium/T_BasicSubstitute/input.txt
rename to test/Apertium/T_Substitute/input.txt
diff --git a/test/Apertium/T_BasicSubstitute/run.pl b/test/Apertium/T_Substitute/run.pl
similarity index 100%
rename from test/Apertium/T_BasicSubstitute/run.pl
rename to test/Apertium/T_Substitute/run.pl
diff --git a/test/T_BasicAppend/expected.txt b/test/T_Append/expected.txt
similarity index 93%
rename from test/T_BasicAppend/expected.txt
rename to test/T_Append/expected.txt
index 2c9e178..1f91bf6 100644
--- a/test/T_BasicAppend/expected.txt
+++ b/test/T_Append/expected.txt
@@ -12,5 +12,8 @@
"word" wanted copy also-copied noguard-copied
"word" wanted copied-except
"word" wanted copied copied-except
+"<word>"
+ "word" notwanted
"<waffle>"
"waffle" and cream @show
+
diff --git a/test/T_BasicAppend/grammar.cg3 b/test/T_Append/grammar.cg3
similarity index 86%
rename from test/T_BasicAppend/grammar.cg3
rename to test/T_Append/grammar.cg3
index e13b4d4..7c7c57e 100644
--- a/test/T_BasicAppend/grammar.cg3
+++ b/test/T_Append/grammar.cg3
@@ -10,3 +10,5 @@ COPY (copied-except) EXCEPT (also-copied copy noguard-copied) (wanted) ;
"<waffle>" APPEND ("waffle" and cream) (*) ;
"<waffle>" ADD (@show) (*) ;
+
+ADDCOHORT (VSTR:"<$1>" VSTR:"$1" notwanted) AFTER ("<(.+)>"r wanted) ;
diff --git a/test/T_BasicAppend/input.txt b/test/T_Append/input.txt
similarity index 100%
rename from test/T_BasicAppend/input.txt
rename to test/T_Append/input.txt
diff --git a/test/T_BasicAppend/grammar.cg3b.10043 b/test/T_BasicAppend/grammar.cg3b.10043
deleted file mode 100644
index 85acd67..0000000
Binary files a/test/T_BasicAppend/grammar.cg3b.10043 and /dev/null differ
diff --git a/test/T_BasicDependency/grammar.cg3b.10043 b/test/T_BasicDependency/grammar.cg3b.10043
deleted file mode 100644
index 99b4fa6..0000000
Binary files a/test/T_BasicDependency/grammar.cg3b.10043 and /dev/null differ
diff --git a/test/T_BasicIff/grammar.cg3 b/test/T_BasicIff/grammar.cg3
deleted file mode 100644
index a46b4b2..0000000
--- a/test/T_BasicIff/grammar.cg3
+++ /dev/null
@@ -1,4 +0,0 @@
-DELIMITERS = "<$.>" ;
-
-SECTION
-IFF (wanted) (NEGATE 1* ("blocker"))
diff --git a/test/T_BasicSelect/grammar.cg3b.10043 b/test/T_BasicSelect/grammar.cg3b.10043
deleted file mode 100644
index 503ae42..0000000
Binary files a/test/T_BasicSelect/grammar.cg3b.10043 and /dev/null differ
diff --git a/test/T_BasicContextTest/expected.txt b/test/T_ContextTest/expected.txt
similarity index 100%
rename from test/T_BasicContextTest/expected.txt
rename to test/T_ContextTest/expected.txt
diff --git a/test/T_BasicContextTest/grammar.cg3 b/test/T_ContextTest/grammar.cg3
similarity index 100%
rename from test/T_BasicContextTest/grammar.cg3
rename to test/T_ContextTest/grammar.cg3
diff --git a/test/T_BasicContextTest/grammar.cg3b.10043 b/test/T_ContextTest/grammar.cg3b.10043
similarity index 100%
rename from test/T_BasicContextTest/grammar.cg3b.10043
rename to test/T_ContextTest/grammar.cg3b.10043
diff --git a/test/T_BasicContextTest/input.txt b/test/T_ContextTest/input.txt
similarity index 100%
rename from test/T_BasicContextTest/input.txt
rename to test/T_ContextTest/input.txt
diff --git a/test/T_BasicDelimit/args.txt b/test/T_Delimit/args.txt
similarity index 100%
rename from test/T_BasicDelimit/args.txt
rename to test/T_Delimit/args.txt
diff --git a/test/T_BasicDelimit/expected.txt b/test/T_Delimit/expected.txt
similarity index 100%
rename from test/T_BasicDelimit/expected.txt
rename to test/T_Delimit/expected.txt
diff --git a/test/T_BasicDelimit/grammar.cg3 b/test/T_Delimit/grammar.cg3
similarity index 100%
rename from test/T_BasicDelimit/grammar.cg3
rename to test/T_Delimit/grammar.cg3
diff --git a/test/T_BasicDelimit/grammar.cg3b.10043 b/test/T_Delimit/grammar.cg3b.10043
similarity index 100%
rename from test/T_BasicDelimit/grammar.cg3b.10043
rename to test/T_Delimit/grammar.cg3b.10043
diff --git a/test/T_BasicDelimit/input.txt b/test/T_Delimit/input.txt
similarity index 100%
rename from test/T_BasicDelimit/input.txt
rename to test/T_Delimit/input.txt
diff --git a/test/T_BasicDependency/args.txt b/test/T_Dependency/args.txt
similarity index 100%
rename from test/T_BasicDependency/args.txt
rename to test/T_Dependency/args.txt
diff --git a/test/T_BasicDependency/expected.txt b/test/T_Dependency/expected.txt
similarity index 87%
rename from test/T_BasicDependency/expected.txt
rename to test/T_Dependency/expected.txt
index 56edb68..dd68b11 100644
--- a/test/T_BasicDependency/expected.txt
+++ b/test/T_Dependency/expected.txt
@@ -5,7 +5,7 @@
"<em>"
"em" <sam-> PRP @<PIV @self-or-child-S @self-or-child-c @self-or-parent-S @self-or-parent-p #3→2
"<a>"
- "o" <artd> <-sam> DET F S @>N @parent-deep #4→5
+ "o" <artd> <-sam> DET F S @>N @parent-deep @bag-self-or-prev @bag-self-or-next @bag-any-not #4→5
"<mulher>"
"mulher" <Hattr> N F S @P< §PAT #5→3
"<$.>"
@@ -13,7 +13,7 @@
</s>
"<o>"
- "o" <artd> DET M S @>N @parent-deep #1→2
+ "o" <artd> DET M S @>N @parent-deep @bag-self @bag-self-or-prev @bag-self-or-next @bag-any-not #1→2
"<carro>"
"carro" <V> N M S @SUBJ> #2→3
"<bateu>"
@@ -37,7 +37,7 @@
"<bateu>"
"bater" <fmc> <mv> V PS 3S IND VFIN @FS-STA @child-deep #4→0
"<o>"
- "o" <artd> DET M S @>N @parent-deep #5→6
+ "o" <artd> DET M S @>N @parent-deep @bag-self @bag-self-or-prev @bag-any #5→6
"<recorde>"
"recorde" <ac> N M S @<ACC §EXP §PAT #6→4
"<$.>"
@@ -59,4 +59,3 @@
"six" #7→5 ID:27 R:rightmost-descendent:23 R:rightmost-deepest-child:23 R:rightmost-self-or-descendent:23 R:rightmost-right-descendent:23 R:leftmost-deepest-child:23
"<seven>"
"seven" #8→1 ID:28 R:rightmost-sibling:23
-
diff --git a/test/T_BasicDependency/grammar.cg3 b/test/T_Dependency/grammar.cg3
similarity index 93%
rename from test/T_BasicDependency/grammar.cg3
rename to test/T_Dependency/grammar.cg3
index ab8b5f2..d77cef9 100644
--- a/test/T_BasicDependency/grammar.cg3
+++ b/test/T_Dependency/grammar.cg3
@@ -63,3 +63,9 @@ ADDRELATION (leftmost-left-ancestor) ("four") TO (lllpp (*)) ;
ADDRELATION (rightmost-right-ancestor) ("four") TO (rrrpp (*)) ;
ADDRELATION (leftmost-self-or-ancestor) ("four") TO (llppS (*)) ;
ADDRELATION (rightmost-self-or-ancestor) ("four") TO (rrppS (*)) ;
+
+ADD (@bag-self) ("o") (B ("<bateu>")) ;
+ADD (@bag-self-or-prev) ("o") (B< ("<mulher>")) ;
+ADD (@bag-self-or-next) ("o") (B> ("<árvore>")) ;
+ADD (@bag-any) ("o") (BW ("<seven>")) ;
+ADD (@bag-any-not) ("o") (NOT BW ("<seven>")) ;
diff --git a/test/T_BasicDependency/input.txt b/test/T_Dependency/input.txt
similarity index 100%
rename from test/T_BasicDependency/input.txt
rename to test/T_Dependency/input.txt
diff --git a/test/T_BasicIff/expected.txt b/test/T_Iff/expected.txt
similarity index 100%
rename from test/T_BasicIff/expected.txt
rename to test/T_Iff/expected.txt
diff --git a/test/T_Iff/grammar.cg3 b/test/T_Iff/grammar.cg3
new file mode 100644
index 0000000..74cc326
--- /dev/null
+++ b/test/T_Iff/grammar.cg3
@@ -0,0 +1,4 @@
+DELIMITERS = "<$.>" ;
+
+SECTION
+IFF (wanted) (NEGATE 1* ("blocker")) ;
diff --git a/test/T_BasicIff/grammar.cg3b.10043 b/test/T_Iff/grammar.cg3b.10043
similarity index 100%
rename from test/T_BasicIff/grammar.cg3b.10043
rename to test/T_Iff/grammar.cg3b.10043
diff --git a/test/T_BasicIff/input.txt b/test/T_Iff/input.txt
similarity index 100%
rename from test/T_BasicIff/input.txt
rename to test/T_Iff/input.txt
diff --git a/test/T_MultipleSections/grammar.cg3 b/test/T_MultipleSections/grammar.cg3
index 44ec5f3..31a17b3 100644
--- a/test/T_MultipleSections/grammar.cg3
+++ b/test/T_MultipleSections/grammar.cg3
@@ -1,7 +1,7 @@
DELIMITERS = "<$.>" ;
SECTION
-SELECT (wanted) (NOT 1* ("blocker"))
+SELECT (wanted) (NOT 1* ("blocker")) ;
SECTION
-REMOVE ("blocker")
+REMOVE ("blocker") ;
diff --git a/test/T_MweSplit/expected.txt b/test/T_MweSplit/expected.txt
new file mode 100644
index 0000000..e179d1a
--- /dev/null
+++ b/test/T_MweSplit/expected.txt
@@ -0,0 +1,76 @@
+"<dalle>"
+ "dalle" Adv Sem/Time <W:0> REMOVE:foo
+:
+"<go>"
+ "go" CS <W:0> REMOVE:foo
+; "go" Pcle Qst <W:0> "<go>" REMOVE:foo
+; "dalle" Adv Sem/Time <W:0> "<dalle >" REMOVE:foo
+"<da>"
+ "da" Adv Sem/Time <W:0>
+ "da" Adv Sem/Time <W:1>
+"<lle>"
+ "lle" Adv Sem/Time <W:0>
+ "lle" Adv Sem/Time <W:1>
+:
+"<go>"
+ "go" Pcle Qst <W:0>
+ "go" Pcle Qst <W:1>
+:\n
+"<dalle>"
+ "dalle" Adv Sem/Time <W:0>
+ "dalle" Adv Sem/Time <W:1>
+:
+"<go>"
+ "go" CS <W:0>
+ "go" CS <W:1>
+:\n
+"<dalle>"
+ "lle" Adv Sem/Time <W:0>
+ "da" Adv Sem/Time <W:0>
+ "lle" Adv Sem/Time <W:1>
+ "da" Adv Sem/Time <W:1>
+:
+"<go>"
+ "go" Pcle Qst <W:0>
+ "go" Pcle Qst <W:1>
+"<,>"
+ "," CLB <W:0>
+:\n
+"<3>"
+ "3" Num Nom
+ "3" Num Acc
+"<.>"
+ "." PUNCT
+:
+"<3>"
+ "3" Num Nom
+"<.>"
+ "." PUNCT
+"<dalle go>"
+ "go" Pcle Qst <W:0> "<go>"
+ "dalle" Adv Sem/Time <W:0> "<dalle >"
+ "go" CS <W:0> "<go>"
+ "dalle" Adv Sem/Time <W:0> "<dalle >"
+ "dalle go" CS <W:0>
+"<,>"
+ "," CLB <W:0>
+:
+"<dalle>"
+ "dalle" Adv Sem/Time <W:0>
+:
+"<go>"
+ "go" CS <W:0>
+ "go" Pcle Qst <W:0>
+"<Supmii>"
+ "suopma" N Sem/Lang Sg Ill <W:0>
+ "Suopma" N Prop Sem/Plc Sg Ill <W:0>
+ "Suopma" N Prop Sem/Sur Sg Ill <W:0>
+:
+"<sotnabeaivvi iđida go>"
+ "go" Pcle Qst <W:10> "<go>"
+ "iđit" N Sem/Time Sg Gen <W:10>
+ "sotnabeaivi" N Sem/Time Cmp/SgGen Cmp <W:10> "<sotnabeaivvi iđida >"
+ "go" Pcle Qst <W:0> "<go>"
+ "iđit" N Sem/Time Sg Gen <W:0> "<iđida >"
+ "sotnabeaivi" N Sem/Time Cmp/SgGen Tok/EndOfWord <W:0> "<sotnabeaivvi >"
+
diff --git a/test/T_MweSplit/input.txt b/test/T_MweSplit/input.txt
new file mode 100644
index 0000000..d54bcfc
--- /dev/null
+++ b/test/T_MweSplit/input.txt
@@ -0,0 +1,66 @@
+"<dalle go>"
+ "go" CS <W:0> "<go>" REMOVE:foo
+ "dalle" Adv Sem/Time <W:0> "<dalle >" REMOVE:foo
+; "go" Pcle Qst <W:0> "<go>" REMOVE:foo
+; "dalle" Adv Sem/Time <W:0> "<dalle >" REMOVE:foo
+"<dalle go>"
+ "go" Pcle Qst <W:0> "<go>"
+ "lle" Adv Sem/Time <W:0> "<lle >"
+ "da" Adv Sem/Time <W:0> "<da>"
+ "go" Pcle Qst <W:1> "<go>"
+ "lle" Adv Sem/Time <W:1> "<lle >"
+ "da" Adv Sem/Time <W:1> "<da>"
+:\n
+"<dalle go>"
+ "go" CS <W:0> "<go>"
+ "dalle" Adv Sem/Time <W:0> "<dalle >"
+ "go" CS <W:1> "<go>"
+ "dalle" Adv Sem/Time <W:1> "<dalle >"
+:\n
+"<dalle go>"
+ "go" Pcle Qst <W:0> "<go>"
+ "lle" Adv Sem/Time <W:0> "<dalle >"
+ "da" Adv Sem/Time <W:0>
+ "go" Pcle Qst <W:1> "<go>"
+ "lle" Adv Sem/Time <W:1> "<dalle >"
+ "da" Adv Sem/Time <W:1>
+"<,>"
+ "," CLB <W:0>
+:\n
+"<3.>"
+ "." PUNCT "<.>"
+ "3" Num Nom "<3>"
+ "." PUNCT "<.>"
+ "3" Num Acc "<3>"
+:
+"<3.>"
+ "." PUNCT "<.>"
+ "3" Num Nom "<3>"
+ "." PUNCT "<.>"
+ "3" Num Nom "<3>"
+"<dalle go>"
+ "go" Pcle Qst <W:0> "<go>"
+ "dalle" Adv Sem/Time <W:0> "<dalle >"
+ "go" CS <W:0> "<go>"
+ "dalle" Adv Sem/Time <W:0> "<dalle >"
+ "dalle go" CS <W:0>
+"<,>"
+ "," CLB <W:0>
+:
+"<dalle go>"
+ "go" CS <W:0> "<go>"
+ "dalle" Adv Sem/Time <W:0> "<dalle >"
+ "go" Pcle Qst <W:0> "<go>"
+ "dalle" Adv Sem/Time <W:0> "<dalle >"
+"<Supmii>"
+ "suopma" N Sem/Lang Sg Ill <W:0>
+ "Suopma" N Prop Sem/Plc Sg Ill <W:0>
+ "Suopma" N Prop Sem/Sur Sg Ill <W:0>
+:
+"<sotnabeaivvi iđida go>"
+ "go" Pcle Qst <W:10> "<go>"
+ "iđit" N Sem/Time Sg Gen <W:10>
+ "sotnabeaivi" N Sem/Time Cmp/SgGen Cmp <W:10> "<sotnabeaivvi iđida >"
+ "go" Pcle Qst <W:0> "<go>"
+ "iđit" N Sem/Time Sg Gen <W:0> "<iđida >"
+ "sotnabeaivi" N Sem/Time Cmp/SgGen Tok/EndOfWord <W:0> "<sotnabeaivvi >"
diff --git a/test/T_MweSplit/run.pl b/test/T_MweSplit/run.pl
new file mode 100755
index 0000000..9d31e1f
--- /dev/null
+++ b/test/T_MweSplit/run.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Cwd qw(realpath);
+
+my ($bindir, $sep) = $0 =~ /^(.*)(\\|\/).*/;
+$bindir = realpath $bindir;
+chdir $bindir or die("Error: Could not change directory to $bindir !");
+
+my $binary_mwesplit = $ARGV[0];
+$binary_mwesplit =~ s@/vislcg3([^/]*)$@/cg-mwesplit$1@;
+if (!$binary_mwesplit || $binary_mwesplit eq '' || !(-x $binary_mwesplit)) {
+ die("Error: $binary_mwesplit is not executable!");
+}
+
+`"$binary_mwesplit" < input.txt > output.txt 2>>stderr.txt`;
+`diff -B expected.txt output.txt >diff.txt`;
+
+if (-s "diff.txt") {
+ print STDERR "Fail.\n";
+} else {
+ print STDERR "Success Success.\n";
+}
diff --git a/test/T_NumericalTags/expected.txt b/test/T_NumericalTags/expected.txt
index 2fca68b..ad70cc2 100644
--- a/test/T_NumericalTags/expected.txt
+++ b/test/T_NumericalTags/expected.txt
@@ -8,7 +8,7 @@
"word" <VALUE=-50> @lessThanFifty @lessThanForty @-fifty @min ADD:5 ADD:7 ADD:9 ADD:13
"word" <VALUE>-50> @fifty @moreThanFifty @lessThanFifty @moreThanSixty @lessThanForty @zero @max ADD:3 ADD:4 ADD:5 ADD:6 ADD:7 ADD:10 ADD:12
"word" <VALUE=0> @lessThanFifty @lessThanForty @zero ADD:5 ADD:7 ADD:10
- "word" <VALUE:NotNumeric> @lessThanFifty @lessThanForty @zero ADD:5 ADD:7 ADD:10
+ "word" <VALUE:NotNumeric>
"word" <VALUE:x10>
"<max>"
"max" <X=1000> SELECT:18
diff --git a/test/T_Omniscan/expected.txt b/test/T_Omniscan/expected.txt
index 36944e3..b66a649 100644
--- a/test/T_Omniscan/expected.txt
+++ b/test/T_Omniscan/expected.txt
@@ -1,46 +1,46 @@
"<word>"
- "word" none #1->1
+ "word" none #1001->1001
"<word>"
- "word" third #2->2
+ "word" third #1002->1002
"<word>"
- "word" none #3->3
+ "word" none #1003->1003
"<word>"
- "word" none #4->4
+ "word" none #1004->1004
"<$.>"
"<word>"
- "word" none #6->6
+ "word" none #2001->2001
"<word>"
- "word" second #7->7
+ "word" second #2002->2002
"<word>"
- "word" none #8->8
+ "word" none #2003->2003
"<word>"
- "word" first #9->9
+ "word" first #2004->2004
"<word>"
- "word" startfourth @neighbour-right #10->20
+ "word" startfourth @neighbour-right #2005->3002
"<word>"
- "word" startfirst @self #11->9
+ "word" startfirst @self #2006->2004
"<word>"
- "word" startsecond @neighbour-left #12->14
+ "word" startsecond @neighbour-left #2007->2009
"<word>"
- "word" startthird #13->2
+ "word" startthird #2008->2008
"<word>"
- "word" second #14->14
+ "word" second #2009->2009
"<word>"
- "word" first #15->15
+ "word" first #2010->2010
"<word>"
- "word" none #16->16
+ "word" none #2011->2011
"<word>"
- "word" none #17->17
+ "word" none #2012->2012
"<$.>"
"<word>"
- "word" none #19->19
+ "word" none #3001->3001
"<word>"
- "word" fourth #20->20
+ "word" fourth #3002->3002
"<word>"
- "word" none #21->21
+ "word" none #3003->3003
"<word>"
- "word" fourth #22->22
+ "word" fourth #3004->3004
"<$.>"
diff --git a/test/T_RegExp/expected.txt b/test/T_RegExp/expected.txt
index c1638a6..343de47 100644
--- a/test/T_RegExp/expected.txt
+++ b/test/T_RegExp/expected.txt
@@ -10,3 +10,6 @@
"<word>"
"baseform" e @baseform-diff
"form" f @baseform-diff @add-f
+"<match>"
+ "match" g <AtcHAT> @gen-regex @baseform-same @slashes @icase @me
+ "match" h <AtcHAT> @gen-regex @baseform-same @slashes @icase @me
diff --git a/test/T_RegExp/grammar.cg3 b/test/T_RegExp/grammar.cg3
index e20f7f0..b4fd5dc 100644
--- a/test/T_RegExp/grammar.cg3
+++ b/test/T_RegExp/grammar.cg3
@@ -34,3 +34,5 @@ ADD (@slashes) (/^@.*-SAM/ri) ;
ADD (@icase) (/@BASEFORM-SAME/i) ;
ADD (VSTR:@add-$1) ("(.*)orm"r f) ;
+
+ADD (VSTR:@$2) ("<match>") (0 ("([^<].+)"r) LINK -1* ("<wor.*"r) LINK -1* (VSTR:"$1\(.+\)"r)) ;
diff --git a/test/T_RegExp/grammar.cg3b.10043 b/test/T_RegExp/grammar.cg3b.10043
deleted file mode 100644
index e8221b8..0000000
Binary files a/test/T_RegExp/grammar.cg3b.10043 and /dev/null differ
diff --git a/test/T_RegExp/input.txt b/test/T_RegExp/input.txt
index a417e06..b8d703a 100644
--- a/test/T_RegExp/input.txt
+++ b/test/T_RegExp/input.txt
@@ -10,3 +10,6 @@
"<word>"
"baseform" e
"form" f
+"<match>"
+ "match" g
+ "match" h
diff --git a/test/T_RelabelList/expected.txt b/test/T_RelabelList/expected.txt
new file mode 100644
index 0000000..1c44d5b
--- /dev/null
+++ b/test/T_RelabelList/expected.txt
@@ -0,0 +1,21 @@
+"<x>"
+ "x" det
+"<Y>"
+ "y" n @gold
+"<z>"
+ "z" det
+"<q>"
+ "q" n @gold
+"<abc>"
+ "a"
+ "aa" aa
+ "aaa"
+ "aaaa" aaaa
+ "b" bb bbbb
+ "bb" bb
+ "bbb" bb bbbb
+ "bbbb" bbbb
+ "c" cc cccc
+ "cc" cc
+ "ccc" cc cccc
+ "cccc" cccc
diff --git a/test/T_RelabelList/grammar.cg3 b/test/T_RelabelList/grammar.cg3
new file mode 100644
index 0000000..37a6eb3
--- /dev/null
+++ b/test/T_RelabelList/grammar.cg3
@@ -0,0 +1,42 @@
+DELIMITERS = "<$.>" ;
+
+LIST N = N;
+LIST Det = Det;
+LIST DetInd = (Det Ind);
+LIST NP = (N Prop) ;
+LIST NPAdv = (N Prop) Adv ;
+LIST NPAdvV = (N Prop) Adv V ;
+LIST NPAdvVX = (N Prop) Adv V X ;
+LIST NPGenAdv = (N Prop Gen) Adv;
+LIST NGen = (N Gen);
+LIST NDet = N Det; # as_set OR as_list !
+
+LIST CASE = Nom Acc;
+
+LIST BOLORN = (*) Uff;
+LIST EOLORN = N (*);
+
+
+LIST nochangehere = X Y Z ;
+LIST norhere = X Y ;
+
+SET N_NO_NP = N - (Prop);
+
+REMOVE NP IF (-1 (Det));
+REMOVE DetInd IF (-1 Det);
+REMOVE NPAdv IF (-1 (Det));
+REMOVE NPAdvV IF (-1 (Det));
+REMOVE NPAdvVX IF (-1 (Det));
+REMOVE NPGenAdv IF (-1 (Det));
+REMOVE NGen IF (-1 (Det));
+
+REMOVE (X) IF (-1 nochangehere LINK -1 norhere);
+
+SELECT $$CASE IF (-1 $$CASE);
+SELECT ("meh") IF (-1 BOLORN) (2 EOLORN);
+
+SUBSTITUTE (A) (*) (*);
+SUBSTITUTE (B) (BB) (*);
+SUBSTITUTE (C) (CC) (C);
+
+SUBSTITUTE:Date1 (Num Sg) (Num Sg Sem/Date) TARGET ("<[0-3][0-9][.][0-2][0-9][.][1-2][0-9][0-9][0-9]>"r Num) ;
diff --git a/test/T_RelabelList/input.txt b/test/T_RelabelList/input.txt
new file mode 100644
index 0000000..7a0efd0
--- /dev/null
+++ b/test/T_RelabelList/input.txt
@@ -0,0 +1,23 @@
+"<x>"
+ "x" det
+"<Y>"
+ "Y" np @bad
+ "y" n @gold
+"<z>"
+ "z" det
+"<q>"
+ "q" det ind @bad
+ "q" n @gold
+"<abc>"
+ "a"
+ "aa" aa
+ "aaa"
+ "aaaa" aaaa
+ "b" bb bbbb
+ "bb" bb
+ "bbb" bb bbbb
+ "bbbb" bbbb
+ "c" c
+ "cc" cc
+ "ccc" ccc
+ "cccc" cccc
diff --git a/test/T_RelabelList/relabel.cg3r b/test/T_RelabelList/relabel.cg3r
new file mode 100644
index 0000000..1f219a9
--- /dev/null
+++ b/test/T_RelabelList/relabel.cg3r
@@ -0,0 +1,20 @@
+MAP (Prop) (np);
+
+MAP (Det) (det);
+MAP (Ind) (ind);
+
+
+MAP (Nom) (nom);
+MAP (Acc) (acc);
+
+# A comment
+MAP (V) (vblex) ;
+MAP (N) (n) OR (np) ;
+MAP (Uff) (uff) - (d) ;
+
+MAP (A) (a) OR (aaa) ;
+MAP (B) (b) OR (bbb) ;
+MAP (C) (c) OR (ccc) ;
+MAP (AA) (aa) OR (aaaa) ;
+MAP (BB) (bb) OR (bbbb) ;
+MAP (CC) (cc) OR (cccc) ;
diff --git a/test/T_RelabelList/run.pl b/test/T_RelabelList/run.pl
new file mode 100755
index 0000000..96094bc
--- /dev/null
+++ b/test/T_RelabelList/run.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Cwd qw(realpath);
+
+my ($bindir, $sep) = $0 =~ /^(.*)(\\|\/).*/;
+$bindir = realpath $bindir;
+chdir $bindir or die("Error: Could not change directory to $bindir !");
+
+my $binary_comp = $ARGV[0];
+$binary_comp =~ s@/vislcg3([^/]*)$@/cg-comp$1@;
+if (!$binary_comp || $binary_comp eq '' || !(-x $binary_comp)) {
+ die("Error: $binary_comp is not executable!");
+}
+
+my $binary_relabel = $ARGV[0];
+$binary_relabel =~ s@/vislcg3([^/]*)$@/cg-relabel$1@;
+if (!$binary_relabel || $binary_relabel eq '' || !(-x $binary_relabel)) {
+ die("Error: $binary_relabel is not executable!");
+}
+
+my $binary_proc = $ARGV[0];
+if (!$binary_proc || $binary_proc eq '' || !(-x $binary_proc)) {
+ die("Error: $binary_proc is not executable!");
+}
+
+my @unlinks = (
+ 'grammar.cg3b',
+ 'grammar-out.cg3b',
+);
+for my $u (@unlinks) {
+ if (-e $u) {
+ unlink $u;
+ }
+}
+
+`"$binary_comp" grammar.cg3 grammar.cg3b >stdout.txt 2>stderr.txt`;
+`"$binary_relabel" grammar.cg3b relabel.cg3r grammar-out.cg3b >>stdout.txt 2>>stderr.txt`;
+
+if (-s "grammar.cg3b" && -s "grammar-out.cg3b") {
+ print STDERR "Success ";
+} else {
+ print STDERR "Fail ";
+}
+
+`"$binary_proc" -g grammar-out.cg3b -I input.txt -O output.txt 2>>stderr.txt`;
+`diff -B expected.txt output.txt >diff.txt`;
+
+if (-s "diff.txt") {
+ print STDERR "Fail.\n";
+} else {
+ print STDERR "Success.\n";
+}
diff --git a/test/T_RelabelList_Apertium/expected.txt b/test/T_RelabelList_Apertium/expected.txt
new file mode 100644
index 0000000..9ccde13
--- /dev/null
+++ b/test/T_RelabelList_Apertium/expected.txt
@@ -0,0 +1 @@
+^fooey/foo<n>$
diff --git a/test/T_RelabelList_Apertium/grammar.cg3 b/test/T_RelabelList_Apertium/grammar.cg3
new file mode 100644
index 0000000..5e1ad54
--- /dev/null
+++ b/test/T_RelabelList_Apertium/grammar.cg3
@@ -0,0 +1,3 @@
+DELIMITERS = "<$.>" ;
+
+SELECT (Der_foo) IF (0 (n));
diff --git a/test/T_RelabelList_Apertium/input.txt b/test/T_RelabelList_Apertium/input.txt
new file mode 100644
index 0000000..f696b94
--- /dev/null
+++ b/test/T_RelabelList_Apertium/input.txt
@@ -0,0 +1 @@
+^fooey/foo<n>/bar<n>$
diff --git a/test/T_RelabelList_Apertium/relabel.cg3r b/test/T_RelabelList_Apertium/relabel.cg3r
new file mode 100644
index 0000000..5e476e2
--- /dev/null
+++ b/test/T_RelabelList_Apertium/relabel.cg3r
@@ -0,0 +1 @@
+MAP (Der_foo) ("foo");
diff --git a/test/T_RelabelList_Apertium/run.pl b/test/T_RelabelList_Apertium/run.pl
new file mode 100755
index 0000000..8a2485c
--- /dev/null
+++ b/test/T_RelabelList_Apertium/run.pl
@@ -0,0 +1,54 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Cwd qw(realpath);
+
+my ($bindir, $sep) = $0 =~ /^(.*)(\\|\/).*/;
+$bindir = realpath $bindir;
+chdir $bindir or die("Error: Could not change directory to $bindir !");
+
+my $binary_comp = $ARGV[0];
+$binary_comp =~ s@/vislcg3([^/]*)$@/cg-comp$1@;
+if (!$binary_comp || $binary_comp eq '' || !(-x $binary_comp)) {
+ die("Error: $binary_comp is not executable!");
+}
+
+my $binary_relabel = $ARGV[0];
+$binary_relabel =~ s@/vislcg3([^/]*)$@/cg-relabel$1@;
+if (!$binary_relabel || $binary_relabel eq '' || !(-x $binary_relabel)) {
+ die("Error: $binary_relabel is not executable!");
+}
+
+my $binary_proc = $ARGV[0];
+$binary_proc =~ s@/vislcg3([^/]*)$@/cg-proc$1@;
+if (!$binary_proc || $binary_proc eq '' || !(-x $binary_proc)) {
+ die("Error: $binary_proc is not executable!");
+}
+
+my @unlinks = (
+ 'grammar.cg3b',
+ 'grammar-out.cg3b',
+);
+for my $u (@unlinks) {
+ if (-e $u) {
+ unlink $u;
+ }
+}
+
+`"$binary_comp" grammar.cg3 grammar.cg3b >stdout.txt 2>stderr.txt`;
+`"$binary_relabel" grammar.cg3b relabel.cg3r grammar-out.cg3b >>stdout.txt 2>>stderr.txt`;
+
+if (-s "grammar.cg3b" && -s "grammar-out.cg3b") {
+ print STDERR "Success ";
+} else {
+ print STDERR "Fail ";
+}
+
+`"$binary_proc" grammar-out.cg3b input.txt output.txt 2>>stderr.txt`;
+`diff -B expected.txt output.txt >diff.txt`;
+
+if (-s "diff.txt") {
+ print STDERR "Fail.\n";
+} else {
+ print STDERR "Success.\n";
+}
diff --git a/test/T_RelabelSet/expected.txt b/test/T_RelabelSet/expected.txt
new file mode 100644
index 0000000..ca184c6
--- /dev/null
+++ b/test/T_RelabelSet/expected.txt
@@ -0,0 +1,27 @@
+"<w>"
+ "w" Det @gold
+"<x>"
+ "x" Det
+"<Y>"
+ "y" N @gold
+"<Ys>"
+ "y" N Gen @gold
+"<Y>"
+ "Y" N
+"<Ys>"
+ "Ys" N Prop @gold
+"<X>"
+ "X" N Prop
+"<Z>"
+ "z" Det
+"<W>"
+ "w" N @gold
+"<Z>"
+ "z" A
+"<W>"
+ "w" N @gold
+"<Z>"
+ "z" N Gen
+"<W>"
+ "w" N @gold
+
diff --git a/test/T_RelabelSet/grammar.cg3 b/test/T_RelabelSet/grammar.cg3
new file mode 100644
index 0000000..462dbd6
--- /dev/null
+++ b/test/T_RelabelSet/grammar.cg3
@@ -0,0 +1,22 @@
+DELIMITERS = "<$.>" ;
+
+LIST adj = adj;
+LIST n = n;
+LIST np = np;
+LIST det = det;
+LIST ngen = (n gen);
+
+SELECT det;
+
+SELECT n IF (-1 det);
+# should turn into
+#SELECT (N) - (Prop) IF (-1 (Det));
+
+SELECT ngen IF (NOT -1 det) (1 n) ;
+#SELECT ((N)-(Prop))+(Gen) IF (NOT -1 (Det)) (1 (N)-(Prop));
+REMOVE ngen IF (NOT 1 n) ;
+#REMOVE ((N)-(Prop))+(Gen) IF (NOT 1 (N)-(Prop));
+
+
+SET pre-NP = det OR ngen OR adj;
+SELECT n IF (-1C pre-NP);
diff --git a/test/T_RelabelSet/input.txt b/test/T_RelabelSet/input.txt
new file mode 100644
index 0000000..360b85a
--- /dev/null
+++ b/test/T_RelabelSet/input.txt
@@ -0,0 +1,33 @@
+"<w>"
+ "w" W @bad
+ "w" Det @gold
+"<x>"
+ "x" Det
+"<Y>"
+ "Y" N Prop @bad
+ "y" N @gold
+"<Ys>"
+ "Ys" N Prop @bad
+ "y" N Gen @gold
+"<Y>"
+ "Y" N
+"<Ys>"
+ "Ys" N Prop @gold
+ "y" N Gen @bad
+"<X>"
+ "X" N Prop
+"<Z>"
+ "z" Det
+"<W>"
+ "W" N Prop @bad
+ "w" N @gold
+"<Z>"
+ "z" A
+"<W>"
+ "W" N Prop @bad
+ "w" N @gold
+"<Z>"
+ "z" N Gen
+"<W>"
+ "W" N Prop @bad
+ "w" N @gold
diff --git a/test/T_RelabelSet/relabel.cg3r b/test/T_RelabelSet/relabel.cg3r
new file mode 100644
index 0000000..7ccece2
--- /dev/null
+++ b/test/T_RelabelSet/relabel.cg3r
@@ -0,0 +1,6 @@
+MAP (det) (Det) ;
+MAP (gen) (Gen) ;
+MAP (adj) (A) ;
+
+MAP (n) (N) - (Prop) ;
+MAP (np) (N Prop) ;
diff --git a/test/T_RelabelSet/run.pl b/test/T_RelabelSet/run.pl
new file mode 100755
index 0000000..ec674b3
--- /dev/null
+++ b/test/T_RelabelSet/run.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Cwd qw(realpath);
+
+my ($bindir, $sep) = $0 =~ /^(.*)(\\|\/).*/;
+$bindir = realpath $bindir;
+chdir $bindir or die("Error: Could not change directory to $bindir !");
+
+my $binary_comp = $ARGV[0];
+$binary_comp =~ s@/vislcg3([^/]*)$@/cg-comp$1@;
+if (!$binary_comp || $binary_comp eq '' || !(-x $binary_comp)) {
+ die("Error: $binary_comp is not executable!");
+}
+
+my $binary_relabel = $ARGV[0];
+$binary_relabel =~ s@/vislcg3([^/]*)$@/cg-relabel$1@;
+if (!$binary_relabel || $binary_relabel eq '' || !(-x $binary_relabel)) {
+ die("Error: $binary_relabel is not executable!");
+}
+
+my $binary_proc = $ARGV[0];
+if (!$binary_proc || $binary_proc eq '' || !(-x $binary_proc)) {
+ die("Error: $binary_proc is not executable!");
+}
+
+my @unlinks = (
+ 'grammar.cg3b',
+ 'grammar-out.cg3b',
+);
+for my $u (@unlinks) {
+ if (-e $u) {
+ unlink $u;
+ }
+}
+
+`"$binary_comp" grammar.cg3 grammar.cg3b >stdout.txt 2>stderr.txt`;
+`"$binary_relabel" grammar.cg3b relabel.cg3r grammar-out.cg3b >>stdout.txt 2>>stderr.txt`;
+
+if (-s "grammar.cg3b" && -s "grammar-out.cg3b") {
+ print STDERR "Success ";
+} else {
+ print STDERR "Fail ";
+}
+
+`"$binary_proc" -g grammar-out.cg3b -I input.txt -O output.txt 2>>stderr.txt`;
+`diff -B expected.txt output.txt >diff.txt`;
+
+if (-s "diff.txt") {
+ print STDERR "Fail (expected).\n";
+} else {
+ print STDERR "Success.\n";
+}
diff --git a/test/T_BasicSelect/args.txt b/test/T_Select/args.txt
similarity index 100%
rename from test/T_BasicSelect/args.txt
rename to test/T_Select/args.txt
diff --git a/test/T_BasicSelect/expected.txt b/test/T_Select/expected.txt
similarity index 82%
rename from test/T_BasicSelect/expected.txt
rename to test/T_Select/expected.txt
index 7ba730e..eb51999 100644
--- a/test/T_BasicSelect/expected.txt
+++ b/test/T_Select/expected.txt
@@ -1,5 +1,5 @@
"<word>"
- "word" wanted
+ "word" wanted £affected
"<word>"
"word" mapped @A @B
"<word>"
diff --git a/test/T_BasicSelect/grammar.cg3 b/test/T_Select/grammar.cg3
similarity index 75%
rename from test/T_BasicSelect/grammar.cg3
rename to test/T_Select/grammar.cg3
index cbf5f95..b8a5b11 100644
--- a/test/T_BasicSelect/grammar.cg3
+++ b/test/T_Select/grammar.cg3
@@ -4,6 +4,8 @@ LIST ASet = wanted ;
LIST BSet = @A @B @C ;
LIST CSet = (@D @E) @F ;
+ADD (£affected) (notwanted) (-1A (wanted)) ;
+
SECTION
SELECT ASet ;
diff --git a/test/T_BasicSelect/input.txt b/test/T_Select/input.txt
similarity index 100%
rename from test/T_BasicSelect/input.txt
rename to test/T_Select/input.txt
diff --git a/test/T_BasicSubstitute/args.txt b/test/T_SplitCohort/args.txt
similarity index 100%
copy from test/T_BasicSubstitute/args.txt
copy to test/T_SplitCohort/args.txt
diff --git a/test/T_SplitCohort/expected.txt b/test/T_SplitCohort/expected.txt
new file mode 100644
index 0000000..33c0c89
--- /dev/null
+++ b/test/T_SplitCohort/expected.txt
@@ -0,0 +1,13 @@
+"<child>"
+ "child" E @p-captain #1->2 ADD:11
+; "<Captains=Samuel=Vimes>"
+; "Captains=Samuel=Vimes" PROP S ID:2 SPLITCOHORT:5
+"<Captains>"
+ "captain" A #2->3 SPLITCOHORT:5
+"<Samuel>"
+ "Samuel" B PROP S A #3->5 ID:5 R:rel_trg:3 SPLITCOHORT:5
+"<Vimes>"
+ "Vimes" C S #4->3 SPLITCOHORT:5
+"<parent>"
+ "parent" F @c-samuel #5->0 ID:3 R:rel_src:5 ADDRELATIONS(rel_src,rel_trg):3 ADD:12
+
diff --git a/test/T_SplitCohort/grammar.cg3 b/test/T_SplitCohort/grammar.cg3
new file mode 100644
index 0000000..0bc1479
--- /dev/null
+++ b/test/T_SplitCohort/grammar.cg3
@@ -0,0 +1,12 @@
+DELIMITERS = "<$.>" ;
+
+ADDRELATIONS (rel_src) (rel_trg) ("<parent>") TO (-1 (*)) ;
+
+SPLITCOHORT (
+ "<$1>"v "%L$2"v A c->2
+ "<$3>"v "$3"v B * A R:* 2->p
+ "<$4>"v "$4"v C VSTR:$5 3->2
+ ) TARGET ("<((.+?)s)=(.+?)=(.+?)>"r) (0 (/\(S|P\)/r)) ;
+
+ADD (@p-captain) (*) (p ("captain")) ;
+ADD (@c-samuel) (*) (c ("Samuel")) ;
diff --git a/test/T_SplitCohort/input.txt b/test/T_SplitCohort/input.txt
new file mode 100644
index 0000000..56c3dca
--- /dev/null
+++ b/test/T_SplitCohort/input.txt
@@ -0,0 +1,6 @@
+"<child>"
+ "child" E #1→2
+"<Captains=Samuel=Vimes>"
+ "Captains=Samuel=Vimes" PROP S #2→3
+"<parent>"
+ "parent" F #3→0
diff --git a/test/T_BasicSubstitute/args.txt b/test/T_Substitute/args.txt
similarity index 100%
rename from test/T_BasicSubstitute/args.txt
rename to test/T_Substitute/args.txt
diff --git a/test/T_BasicSubstitute/expected.txt b/test/T_Substitute/expected.txt
similarity index 53%
rename from test/T_BasicSubstitute/expected.txt
rename to test/T_Substitute/expected.txt
index 391c5d0..a3e659b 100644
--- a/test/T_BasicSubstitute/expected.txt
+++ b/test/T_Substitute/expected.txt
@@ -1,5 +1,5 @@
"<worded>"
"word" notwanted @add-good @add-mixcase-1 @add-mixcase-2 @add-regex ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13
- "word-sub" before 1 1 4 2 3 3 substituted 3 3 after @add-good @add-mixcase-1 @add-mixcase-2 @add-regex @map-word SUBSTITUTE:4 SUBSTITUTE:5 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13 SUBSTITUTE:15 MAP:16
+ "word-sub" before 1 1 4 2 4 2 3 3 substituted 3 3 after @add-good @add-mixcase-1 @add-mixcase-2 @add-regex @map-word SUBSTITUTE:4 SUBSTITUTE:5 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13 SUBSTITUTE:15 MAP:16
"word" @add-good @add-mixcase-1 @add-mixcase-2 @add-regex SUBSTITUTE:6 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13
diff --git a/test/T_BasicSubstitute/grammar.cg3 b/test/T_Substitute/grammar.cg3
similarity index 100%
rename from test/T_BasicSubstitute/grammar.cg3
rename to test/T_Substitute/grammar.cg3
diff --git a/test/T_BasicSubstitute/input.txt b/test/T_Substitute/input.txt
similarity index 100%
rename from test/T_BasicSubstitute/input.txt
rename to test/T_Substitute/input.txt
diff --git a/test/T_Templates/expected.txt b/test/T_Templates/expected.txt
index b315810..259cfc2 100644
--- a/test/T_Templates/expected.txt
+++ b/test/T_Templates/expected.txt
@@ -1,14 +1,14 @@
"<He>"
"he" <*> <NonMod> PRON PERS MASC NOM SG3 SUBJ @postGood @linkedGood
"<ate>"
- "eat" <SVO> <SV> V PAST VFIN
+ "eat" <SVO> <SV> V PAST VFIN @artnGood2
"<a>"
"a" <Indef> DET CENTRAL ART SG <W:60> @preGood
"a" <Indef> NDET CENTRAL ART SG <W:60> @preGood
"<cow>"
"cow" N NOM SG @bothGoodGood
"<with>"
- "with" PREP @beforeNccN @artnGood @artnNegateGood @artnNegateGoodOffset @branch-nonC @branch-C
+ "with" PREP @beforeNccN @artnGood1 @artnNegateGood @artnNegateGoodOffset @branch-nonC @branch-C
"<biscuits>"
"biscuit" N NOM PL @startNccN
"<and>"
@@ -17,4 +17,3 @@
"lemonade" <-Indef> N NOM SG
"<$.>"
"$." @afterNccN
-
diff --git a/test/T_Templates/grammar.cg3 b/test/T_Templates/grammar.cg3
index 417361b..bc81040 100644
--- a/test/T_Templates/grammar.cg3
+++ b/test/T_Templates/grammar.cg3
@@ -27,10 +27,11 @@ ADD (@linkedBad) (*) IF (0 (SUBJ) LINK T:A-link-B LINK 1 (>>>)) ;
TEMPLATE ArtN = 1* (ART) LINK 1* (N) ;
-ADD (@artnGood) (PREP) IF (-1 T:ArtN) ;
-ADD (@artnBad) (PREP) IF (-2 T:ArtN) ;
-ADD (@artnBad) (PREP) IF (1 T:ArtN) ;
-ADD (@artnBad) (PREP) IF (2 T:ArtN) ;
+ADD (@artnGood1) (PREP) IF (-1 T:ArtN) ;
+ADD (@artnGood2) (V) IF (1 T:ArtN) ;
+ADD (@artnBad1) (PREP) IF (-2 T:ArtN) ;
+ADD (@artnBad2) (PREP) IF (1 T:ArtN) ;
+ADD (@artnBad3) (PREP) IF (2 T:ArtN) ;
ADD (@artnNegateGood) (PREP) IF (NEGATE T:ArtN) ;
ADD (@artnNegateGoodOffset) (PREP) IF (NEGATE 1 T:ArtN) ;
ADD (@artnNegateBad) (PREP) IF (NEGATE -1 T:ArtN) ;
diff --git a/test/T_Trace/grammar.cg3 b/test/T_Trace/grammar.cg3
index b60d9fa..6cfba94 100644
--- a/test/T_Trace/grammar.cg3
+++ b/test/T_Trace/grammar.cg3
@@ -1,9 +1,9 @@
DELIMITERS = "<$.>" ;
-MAP (@tag) (*) (NEGATE 1* ("blocker"))
+MAP (@tag) (*) (NEGATE 1* ("blocker")) ;
SECTION
-SELECT (wanted) (NEGATE 1* ("blocker"))
+SELECT (wanted) (NEGATE 1* ("blocker")) ;
SECTION
-REMOVE:named ("blocker")
+REMOVE:named ("blocker") ;
diff --git a/test/runall.pl b/test/runall.pl
index 1ab0912..20db8cc 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -9,12 +9,8 @@ chdir $bindir or die("Error: Could not change directory to $bindir !");
# Search paths for the binary
my @binlist = (
- "../../build/VS12/src/Debug/vislcg3",
- "../../build/VS12/src/Release/vislcg3",
- "../../build/VS11/src/Debug/vislcg3",
- "../../build/VS11/src/Release/vislcg3",
- "../../build/VS10/src/Debug/vislcg3",
- "../../build/VS10/src/Release/vislcg3",
+ "../../build/VS14/src/Debug/vislcg3",
+ "../../build/VS14/src/Release/vislcg3",
"../src/Debug/vislcg3",
"../src/Release/vislcg3",
"../Debug/vislcg3",
diff --git a/todo.sh b/todo.sh
index b211b78..19102f7 100755
--- a/todo.sh
+++ b/todo.sh
@@ -2,4 +2,4 @@
rm -f TODO.list
cat TODO > TODO.list
echo "----------" >> TODO.list
-grep -i todo src/* >> TODO.list
+grep -i todo src/* | perl -wpne 's/^([^:]+):\s*(.+)$/$2\t: $1/;' | perl -wpne 's@^//\s*@@g;' | LC_ALL=C sort >> TODO.list
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/cg3.git
More information about the debian-science-commits
mailing list