[cg3] 01/02: Imported Upstream version 0.9.8.10157

Tino Didriksen tinodidriksen-guest at moszumanska.debian.org
Fri Oct 3 10:18:46 UTC 2014


This is an automated email from the git hooks/post-receive script.

tinodidriksen-guest pushed a commit to branch master
in repository cg3.

commit 22ea26ff0951893898ece8058d83880b8528b096
Author: Tino Didriksen <mail at tinodidriksen.com>
Date:   Fri Oct 3 10:17:48 2014 +0000

    Imported Upstream version 0.9.8.10157
---
 CMakeLists.txt        | 20 ++++++++++++++------
 TODO                  |  5 +++++
 emacs/cg.el           |  1 +
 get-boost.sh          |  2 +-
 manual/tags.xml       | 10 ----------
 src/CMakeLists.txt    | 25 ++++++++-----------------
 src/FSTApplicator.cpp | 23 +++++++++++++++++------
 src/TextualParser.cpp | 18 +++++++++---------
 src/cg_conv.cpp       |  2 ++
 src/inlines.hpp       | 13 ++++++++++---
 10 files changed, 67 insertions(+), 52 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0f77b55..1cb6f3a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,12 +11,20 @@ endif()
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
 
 file(READ "${CMAKE_CURRENT_SOURCE_DIR}/src/version.hpp" _cg3_VERSION_FILE)
-string(REGEX REPLACE ".*CG3_VERSION_MAJOR = ([0-9]+).*" "\\1" _cg3_VERSION_MAJOR "${_cg3_VERSION_FILE}")
-string(REGEX REPLACE ".*CG3_VERSION_MINOR = ([0-9]+).*" "\\1" _cg3_VERSION_MINOR "${_cg3_VERSION_FILE}")
-string(REGEX REPLACE ".*CG3_VERSION_PATCH = ([0-9]+).*" "\\1" _cg3_VERSION_PATCH "${_cg3_VERSION_FILE}")
-string(REGEX REPLACE ".*CG3_REVISION = ([0-9]+).*" "\\1" REVISION "${_cg3_VERSION_FILE}")
+string(REGEX REPLACE ".*CG3_VERSION_MAJOR = ([0-9]+).*" "\\1" _cg3_VERSION_MAJOR ${_cg3_VERSION_FILE})
+string(REGEX REPLACE ".*CG3_VERSION_MINOR = ([0-9]+).*" "\\1" _cg3_VERSION_MINOR ${_cg3_VERSION_FILE})
+string(REGEX REPLACE ".*CG3_VERSION_PATCH = ([0-9]+).*" "\\1" _cg3_VERSION_PATCH ${_cg3_VERSION_FILE})
+string(REGEX REPLACE ".*CG3_REVISION = ([0-9]+).*" "\\1" REVISION ${_cg3_VERSION_FILE})
 set(VERSION "${_cg3_VERSION_MAJOR}.${_cg3_VERSION_MINOR}.${_cg3_VERSION_PATCH}.${REVISION}")
 
+# Because Fedora / CentOS / RHEL ...
+set(CG_LIBDIR "lib")
+if(LIB_INSTALL_DIR)
+	string(REGEX REPLACE "^${CMAKE_INSTALL_PREFIX}/" "" CG_LIBDIR ${LIB_INSTALL_DIR})
+elseif(LIB_SUFFIX)
+	set(CG_LIBDIR "${CG_LIBDIR}${LIB_SUFFIX}")
+endif()
+
 option(OPT_TCMALLOC "Set to OFF to disable linking against TCMalloc" ON)
 if(APPLE)
 	message(STATUS "Disabling TCMalloc for OS X")
@@ -50,11 +58,11 @@ else()
 	# Generate pkg-config file
 	set(prefix      ${CMAKE_INSTALL_PREFIX})
 	set(exec_prefix ${CMAKE_INSTALL_PREFIX})
-	set(libdir      ${CMAKE_INSTALL_PREFIX}/lib/${CMAKE_LIBRARY_ARCHITECTURE})
+	set(libdir      ${CMAKE_INSTALL_PREFIX}/${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE})
 	set(includedir  ${CMAKE_INSTALL_PREFIX}/include)
 	configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cg3.pc.in
 		${CMAKE_CURRENT_BINARY_DIR}/cg3.pc @ONLY)
-	install(FILES "${CMAKE_CURRENT_BINARY_DIR}/cg3.pc" DESTINATION "lib/${CMAKE_LIBRARY_ARCHITECTURE}/pkgconfig")
+	install(FILES "${CMAKE_CURRENT_BINARY_DIR}/cg3.pc" DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}/pkgconfig")
 
 	install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/manual/man/"
 		DESTINATION share/man/man1
diff --git a/TODO b/TODO
index 1095a2a..2585083 100644
--- a/TODO
+++ b/TODO
@@ -50,3 +50,8 @@ ToDo: MOVE/SWITCH should leave stored cohorts where they were; optionally bring
 ToDo: Check whether boost::dynamic_bitset makes more sense to store valid_rules in
 ToDo: Optimize (0 x LINK 0 x) to just be separate contexts
 ToDo: Update docs for regex on tag unification
+ToDo: APPEND $$baseform + X test cases (see email 2014-08-20)
+ToDo: Better error for APPEND (non-baseform)
+ToDo: \u and \U escapes in textual tags; possibly only varstrings or "tags"
+ToDo: cg-conv should just convert cohorts directly - no need to build whole sentences.
+ToDo: When going from section 1 to section 1+2 the first time, just skip right to the section 2 rules
diff --git a/emacs/cg.el b/emacs/cg.el
index d7786a5..bef1151 100644
--- a/emacs/cg.el
+++ b/emacs/cg.el
@@ -148,6 +148,7 @@ re-evaluating `cg-kw-re' (or all of cg.el).")
     "SETPARENT"    "SETCHILD"
     "ADDRELATION"  "REMRELATION"  "SETRELATION"
     "ADDRELATIONS" "REMRELATIONS" "SETRELATIONS"
+    "SETVARIABLE"  "REMVARIABLE"
     "APPEND")
   "Used for indentation, highlighting etc.; don't change without
 re-evaluating `cg-kw-re' (or all of cg.el)." )
diff --git a/get-boost.sh b/get-boost.sh
index 141d347..4328842 100755
--- a/get-boost.sh
+++ b/get-boost.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-export BOOSTVER=55
+export BOOSTVER=56
 export BDOT="1.$BOOSTVER.0"
 export BUC="boost_1_${BOOSTVER}_0"
 
diff --git a/manual/tags.xml b/manual/tags.xml
index 3a02091..4223d6a 100644
--- a/manual/tags.xml
+++ b/manual/tags.xml
@@ -320,16 +320,6 @@
     </para>
   </section>
 
-  <section id="tag-inversion">
-    <title>Tag Inversion</title>
-    <para>
-      You can negate a tag by prepending a !, as in !ADV. This has the effect
-      of matching if some tag other than ADV exists in the reading, but won't
-      match if ADV is actually present. This is mostly useful in tags such as
-      (V TR !ADV) as !ADV alone would match quite a lot.
-    </para>
-  </section>
-
   <section id="global-variables">
     <title>Global Variables</title>
     <para>
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 04ad18b..9374ebb 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -129,21 +129,14 @@ if(MSVC AND NOT CMAKE_BUILD_TYPE STREQUAL Debug)
 endif()
 cg3_link(libcg3)
 set(LINKLIB libcg3)
-set(LINKLIB_TEST libcg3)
 
 if(NOT MSVC)
-	add_library(cg3-private SHARED $<TARGET_OBJECTS:libcg3-objs>)
-	set_target_properties(cg3-private PROPERTIES NO_SONAME ON)
-	cg3_link(cg3-private)
-	set(LINKLIB cg3-private)
-
-	add_library(libcg3-shared SHARED libcg3.cpp)
+	add_library(libcg3-shared SHARED libcg3.cpp $<TARGET_OBJECTS:libcg3-objs>)
 	set_target_properties(libcg3-shared PROPERTIES SOVERSION ${_cg3_VERSION_MAJOR})
 	set_target_properties(libcg3-shared PROPERTIES OUTPUT_NAME "cg3")
 	set_target_properties(libcg3-shared PROPERTIES PREFIX "lib")
 	cg3_link(libcg3-shared)
-	target_link_libraries(libcg3-shared cg3-private)
-	set(LINKLIB_TEST libcg3-shared)
+	set(LINKLIB libcg3-shared)
 endif()
 
 add_executable(cg-comp cg_comp.cpp)
@@ -180,7 +173,7 @@ cg3_link(vislcg3)
 target_link_libraries(vislcg3 ${LINKLIB})
 
 add_executable(test_libcg3 test_libcg3.c)
-target_link_libraries(test_libcg3 ${LINKLIB_TEST})
+target_link_libraries(test_libcg3 ${LINKLIB})
 
 if(APPLE)
 	foreach(t libcg3-shared cg-conv cg-comp cg-proc vislcg3)
@@ -189,9 +182,8 @@ if(APPLE)
 			TARGET ${t}
 			POST_BUILD
 			COMMAND install_name_tool -add_rpath @executable_path/ ${_file_${t}}
-			COMMAND install_name_tool -add_rpath @executable_path/../lib/cg3 ${_file_${t}}
-			COMMAND install_name_tool -add_rpath @loader_path/../lib/cg3 ${_file_${t}}
-			COMMAND install_name_tool -change libcg3-private.dylib @rpath/libcg3-private.dylib ${_file_${t}}
+			COMMAND install_name_tool -add_rpath @executable_path/../${CG_LIBDIR} ${_file_${t}}
+			COMMAND install_name_tool -add_rpath @loader_path/../${CG_LIBDIR} ${_file_${t}}
 			COMMENT "Adding @rpath to ${t}"
 			VERBATIM
 		)
@@ -200,11 +192,10 @@ endif()
 
 add_test(t_libcg3 test_libcg3 "${CMAKE_CURRENT_SOURCE_DIR}/../test/T_BasicSelect/grammar.cg3")
 
-install(TARGETS libcg3 ARCHIVE DESTINATION "lib/${CMAKE_LIBRARY_ARCHITECTURE}")
+install(TARGETS libcg3 ARCHIVE DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}")
 if(NOT MSVC)
-	set_target_properties(libcg3-shared cg-comp cg-proc cg-conv vislcg3 PROPERTIES INSTALL_RPATH "$ORIGIN/../lib/${CMAKE_LIBRARY_ARCHITECTURE}/cg3;${CMAKE_INSTALL_PREFIX}/lib/${CMAKE_LIBRARY_ARCHITECTURE}/cg3")
-	install(TARGETS cg3-private ARCHIVE DESTINATION "lib/${CMAKE_LIBRARY_ARCHITECTURE}/cg3" LIBRARY DESTINATION "lib/${CMAKE_LIBRARY_ARCHITECTURE}/cg3")
-	install(TARGETS libcg3-shared ARCHIVE DESTINATION "lib/${CMAKE_LIBRARY_ARCHITECTURE}" LIBRARY DESTINATION "lib/${CMAKE_LIBRARY_ARCHITECTURE}")
+	set_target_properties(libcg3-shared cg-comp cg-proc cg-conv vislcg3 PROPERTIES INSTALL_RPATH "$ORIGIN/../${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE};${CMAKE_INSTALL_PREFIX}/${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}")
+	install(TARGETS libcg3-shared ARCHIVE DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}" LIBRARY DESTINATION "${CG_LIBDIR}/${CMAKE_LIBRARY_ARCHITECTURE}")
 	install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cg3.h" DESTINATION include)
 endif()
 install(TARGETS cg-comp cg-proc cg-conv vislcg3 RUNTIME DESTINATION bin)
diff --git a/src/FSTApplicator.cpp b/src/FSTApplicator.cpp
index dbda04d..4482408 100644
--- a/src/FSTApplicator.cpp
+++ b/src/FSTApplicator.cpp
@@ -89,10 +89,14 @@ void FSTApplicator::runGrammarOnText(istream& input, UFILE *output) {
 			for (size_t i=offset ; i<line.size() ; ++i) {
 				// Only copy one space character, regardless of how many are in input
 				if (ISSPACE(line[i]) && !ISNL(line[i])) {
-					cleaned[packoff++] = (line[i] == '\t' ? '\t' : ' ');
+					UChar space = (line[i] == '\t' ? '\t' : ' ');
 					while (ISSPACE(line[i]) && !ISNL(line[i])) {
+						if (line[i] == '\t') {
+							space = line[i];
+						}
 						++i;
 					}
+					cleaned[packoff++] = space;
 				}
 				// Break if there is a newline
 				if (ISNL(line[i])) {
@@ -117,13 +121,16 @@ gotaline:
 			cleaned[packoff-1] = 0;
 			--packoff;
 		}
-		if (!ignoreinput && cleaned[0] && cleaned[0] != '<') {
+		if (!ignoreinput && cleaned[0]) {
 			UChar *space = &cleaned[0];
-			SKIPTO_NOSPAN(space, '\t');
+			SKIPTO_NOSPAN_RAW(space, '\t');
 
 			if (space[0] != '\t') {
-				u_fprintf(ux_stderr, "Warning: %S on line %u looked like a cohort but wasn't - treated as text.\n", &cleaned[0], numLines);
-				u_fflush(ux_stderr);
+				// If this line looks like markup, don't warn about it
+				if (cleaned[0] != '<') {
+					u_fprintf(ux_stderr, "Warning: %S on line %u looked like a cohort but wasn't - treated as text.\n", &cleaned[0], numLines);
+					u_fflush(ux_stderr);
+				}
 				goto istext;
 			}
 			space[0] = 0;
@@ -153,7 +160,7 @@ gotaline:
 			}
 
 			++space;
-			while (space) {
+			while (space && (space[0] != '+' || space[1] != '?' || space[2] != 0)) {
 				cReading = new Reading(cCohort);
 				insert_if_exists(cReading->parent->possible_sets, grammar->sets_any);
 				addTagToReading(*cReading, cCohort->wordform);
@@ -218,6 +225,10 @@ gotaline:
 					u_fprintf(ux_stderr, "Warning: Line %u had no valid baseform.\n", numLines);
 					u_fflush(ux_stderr);
 				}
+				if (single_tags[cReading->baseform]->tag.size() == 2) {
+					delTagFromReading(*cReading, cReading->baseform);
+					cReading->baseform = makeBaseFromWord(cCohort->wordform->hash)->hash;
+				}
 				if (!mappings.empty()) {
 					splitMappings(mappings, *cCohort, *cReading, true);
 				}
diff --git a/src/TextualParser.cpp b/src/TextualParser.cpp
index db8bac2..1e9ace4 100644
--- a/src/TextualParser.cpp
+++ b/src/TextualParser.cpp
@@ -82,7 +82,7 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
 					UChar *n = p;
 					if (*n == '"') {
 						n++;
-						result->lines += SKIPTO_NOSPAN(n, '"');
+						SKIPTO_NOSPAN(n, '"');
 						if (*n != '"') {
 							u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 							incErrorCount();
@@ -107,7 +107,7 @@ void TextualParser::parseTagList(UChar *& p, Set *s) {
 				UChar *n = p;
 				if (*n == '"') {
 					n++;
-					result->lines += SKIPTO_NOSPAN(n, '"');
+					SKIPTO_NOSPAN(n, '"');
 					if (*n != '"') {
 						u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 						incErrorCount();
@@ -184,7 +184,7 @@ Set *TextualParser::parseSetInline(UChar *& p, Set *s) {
 						UChar *n = p;
 						if (*n == '"') {
 							n++;
-							result->lines += SKIPTO_NOSPAN(n, '"');
+							SKIPTO_NOSPAN(n, '"');
 							if (*n != '"') {
 								u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 								incErrorCount();
@@ -826,7 +826,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
 		UChar *n = lp;
 		if (*n == '"') {
 			n++;
-			result->lines += SKIPTO_NOSPAN(n, '"');
+			SKIPTO_NOSPAN(n, '"');
 			if (*n != '"') {
 				u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 				incErrorCount();
@@ -879,7 +879,7 @@ void TextualParser::parseRule(UChar *& p, KEYWORDS key) {
 		UChar *n = p;
 		if (*n == '"') {
 			++n;
-			result->lines += SKIPTO_NOSPAN(n, '"');
+			SKIPTO_NOSPAN(n, '"');
 			if (*n != '"') {
 				u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 				incErrorCount();
@@ -1387,7 +1387,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
 				UChar *n = p;
 				if (*n == '"') {
 					n++;
-					result->lines += SKIPTO_NOSPAN(n, '"');
+					SKIPTO_NOSPAN(n, '"');
 					if (*n != '"') {
 						u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 						incErrorCount();
@@ -2024,7 +2024,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
 				p = n;
 				if (*n == '"') {
 					n++;
-					result->lines += SKIPTO_NOSPAN(n, '"');
+					SKIPTO_NOSPAN(n, '"');
 					if (*n != '"') {
 						u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 						incErrorCount();
@@ -2045,7 +2045,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
 
 				if (*n == '"') {
 					n++;
-					result->lines += SKIPTO_NOSPAN(n, '"');
+					SKIPTO_NOSPAN(n, '"');
 					if (*n != '"') {
 						u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 						incErrorCount();
@@ -2098,7 +2098,7 @@ int TextualParser::parseFromUChar(UChar *input, const char *fname) {
 			if (*p == ';' || *p == '"') {
 				if (*p == '"') {
 					++p;
-					result->lines += SKIPTO_NOSPAN(p, '"');
+					SKIPTO_NOSPAN(p, '"');
 					if (*p != '"') {
 						u_fprintf(ux_stderr, "Error: Missing closing \" on line %u!\n", result->lines);
 						incErrorCount();
diff --git a/src/cg_conv.cpp b/src/cg_conv.cpp
index 0d58dce..64b0340 100644
--- a/src/cg_conv.cpp
+++ b/src/cg_conv.cpp
@@ -100,6 +100,8 @@ int main(int argc, char *argv[]) {
 	CG3::Grammar grammar;
 
 	grammar.ux_stderr = ux_stderr;
+	grammar.delimiters = grammar.allocateSet();
+	grammar.addTagToSet(grammar.allocateTag(CG3::stringbits[0].getTerminatedBuffer()), grammar.delimiters);
 	grammar.reindex();
 
 	CG3::FormatConverter applicator(ux_stderr);
diff --git a/src/inlines.hpp b/src/inlines.hpp
index c09d3bf..81208cd 100644
--- a/src/inlines.hpp
+++ b/src/inlines.hpp
@@ -301,15 +301,22 @@ inline uint32_t SKIPTO(UChar *& p, const UChar a) {
 	return s;
 }
 
-inline uint32_t SKIPTO_NOSPAN(UChar *& p, const UChar a) {
-	uint32_t s = 0;
+inline void SKIPTO_NOSPAN(UChar *& p, const UChar a) {
 	while (*p && (*p != a || ISESC(p))) {
 		if (ISNL(*p)) {
 			break;
 		}
 		++p;
 	}
-	return s;
+}
+
+inline void SKIPTO_NOSPAN_RAW(UChar *& p, const UChar a) {
+	while (*p && *p != a) {
+		if (ISNL(*p)) {
+			break;
+		}
+		++p;
+	}
 }
 
 inline void CG3Quit(const int32_t c = 0, const char* file = 0, const uint32_t line = 0) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/cg3.git



More information about the debian-science-commits mailing list