[cg3] 01/03: Imported Upstream version 0.9.9~r10822

Tino Didriksen tinodidriksen-guest at moszumanska.debian.org
Tue Jun 30 12:34:26 UTC 2015


This is an automated email from the git hooks/post-receive script.

tinodidriksen-guest pushed a commit to branch master
in repository cg3.

commit cf226454d875f4b5d8c257482f0e7337ebe88805
Author: Tino Didriksen <mail at tinodidriksen.com>
Date:   Tue Jun 30 12:34:07 2015 +0000

    Imported Upstream version 0.9.9~r10822
---
 CMakeLists.txt                              |   9 ++++--
 ChangeLog                                   |  34 ++++++++++----------
 scripts/profile-revisions.php               |  22 ++++++-------
 src/GrammarApplicator.hpp                   |   5 +--
 src/GrammarApplicator_matchSet.cpp          |  16 ++++++----
 src/GrammarApplicator_reflow.cpp            |  47 +++++++++++++++++-----------
 src/GrammarApplicator_runContextualTest.cpp |  27 ++++++++--------
 src/GrammarApplicator_runRules.cpp          |  44 +++++++++++++++++---------
 src/version.hpp                             |   2 +-
 test/T_BasicSubstitute/expected.txt         |   3 +-
 test/T_BasicSubstitute/grammar.cg3          |   3 ++
 test/T_BasicSubstitute/grammar.cg3b.10043   | Bin 2155 -> 0 bytes
 vapply.sh                                   |   4 +--
 vparse.sh                                   |   4 +--
 14 files changed, 128 insertions(+), 92 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b53588d..e077e6e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,7 +38,7 @@ if(MSVC)
 	set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS})
 	set(CMAKE_C_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
 else()
-	set(_FLAGS_COMMON "-Wall -Wextra -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -fPIC")
+	set(_FLAGS_COMMON "-Wall -Wextra -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -Wno-unused-result -fPIC")
 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_FLAGS_COMMON} -fvisibility-inlines-hidden")
 	set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3")
 	set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
@@ -46,8 +46,11 @@ else()
 	set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3")
 	set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3")
 
-	# Enable C++11 if possible
-	if((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.6) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.1))
+	# Enable C++14 or C++11 if possible
+	if((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.9) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3))
+		message(STATUS "Enabling C++14 for ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
+		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y")
+	elseif((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.6) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.1))
 		message(STATUS "Enabling C++11 for ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
 		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
 	endif()
diff --git a/ChangeLog b/ChangeLog
index 84f27a7..7e69422 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2015-06-30  tino
+
+	* [r10821] CMakeLists.txt, vapply.sh, vparse.sh: Use C++14 where
+	  available
+	* [r10819] ChangeLog, scripts/profile-revisions.php,
+	  src/GrammarApplicator_matchSet.cpp,
+	  src/GrammarApplicator_reflow.cpp,
+	  src/GrammarApplicator_runContextualTest.cpp,
+	  src/GrammarApplicator_runRules.cpp, src/version.hpp: Fix
+	  segfault; Further reduce reallocations
+
+2015-06-28  tino
+
+	* [r10817] src/GrammarApplicator_runRules.cpp: Fix segfault
+	* [r10815] ChangeLog, src/GrammarApplicator_runRules.cpp,
+	  src/version.hpp: Fix removing enclosure owner
+
 2015-06-26  tino
 
 	* [r10811] src/ApertiumApplicator.cpp, src/FSTApplicator.cpp,
@@ -2299,20 +2316,3 @@
 	  src/GrammarApplicator_runGrammar.cpp, src/version.h: Fixed
 	  sub-readings breaking everything when they weren't real readings.
 
-2013-03-13  unhammer
-
-	* [r8875] emacs/cg.el: safer way to get x clipboard contents
-
-2013-03-12  tino
-
-	* [r8874] get-boost.sh, manual/compatibility.xml,
-	  manual/dependencies.xml,
-	  src/GrammarApplicator_runContextualTest.cpp, src/Reading.cpp,
-	  test/T_BasicDependency/expected.txt,
-	  test/T_BasicDependency/grammar.cg3: Document cc vs c*
-
-2013-01-24  tino
-
-	* [r8813] dist/Portfile: MacPorts
-	* [r8812] dist/Portfile: MacPorts
-
diff --git a/scripts/profile-revisions.php b/scripts/profile-revisions.php
index 089b400..a426918 100755
--- a/scripts/profile-revisions.php
+++ b/scripts/profile-revisions.php
@@ -9,13 +9,13 @@ function profile_revision($rev) {
 	echo "Exporting revision $rev...\n";
 	shell_exec('svn export -r '.$rev.' --ignore-externals svn+ssh://beta.visl.sdu.dk/usr/local/svn/repos/visl/tools/vislcg3/trunk '.$dir.' >/dev/null 2>&1');
 	chdir($dir);
-	shell_exec('svn export -r 3617 --ignore-externals svn+ssh://beta.visl.sdu.dk/usr/local/svn/repos/visl/trunk/parsers/dansk/etc/dancg dancg >/dev/null 2>&1');
+	shell_exec('svn export -r 10017 --ignore-externals svn+ssh://beta.visl.sdu.dk/usr/local/svn/repos/visl/trunk/parsers/dansk/etc/dancg.cg dancg >/dev/null 2>&1');
 	echo "Compiling...\n";
 
 	if (file_exists('./src/all_vislcg3.cpp')) {
 		echo "Using all_vislcg3.cpp and Boost...\n";
-		echo shell_exec('g++ -std=c++11 -DHAVE_BOOST -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -fPIC -O3 -Iinclude -Iinclude/posix ./src/all_vislcg3.cpp -o vislcg3 -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc 2>&1');
-		echo shell_exec('g++ -std=c++11 -DHAVE_BOOST -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -fPIC -O3 -Iinclude -Iinclude/posix ./src/all_vislcg3.cpp -o vislcg3-tc -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc -ltcmalloc 2>&1');
+		echo shell_exec('g++ -std=c++11 -DHAVE_BOOST -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -fPIC -O3 -Iinclude -Iinclude/exec-stream -Iinclude/posix ./src/all_vislcg3.cpp -o vislcg3 -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc 2>&1');
+		echo shell_exec('g++ -std=c++11 -DHAVE_BOOST -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -fPIC -O3 -Iinclude -Iinclude/exec-stream -Iinclude/posix ./src/all_vislcg3.cpp -o vislcg3-tc -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc -ltcmalloc 2>&1');
 	}
 	else {
 		echo "Using old-style without Boost...\n";
@@ -34,7 +34,7 @@ function profile_revision($rev) {
 	for ($i=0 ; $i<3 ; $i++) {
 		echo "Parsing...\n";
 		$start = microtime(true);
-		$time = shell_exec('/usr/bin/time ./vislcg3-tc -C ISO-8859-1 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep user | grep system');
+		$time = shell_exec('/usr/bin/time ./vislcg3-tc -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep user | grep system');
 		$times['parse'][$i]['microtime'] = microtime(true) - $start;
 		$times['parse'][$i]['time'] = trim($time);
 
@@ -45,25 +45,25 @@ function profile_revision($rev) {
 
 		echo "Applying...\n";
 		$start = microtime(true);
-		$time = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | u2i | /usr/bin/time ./vislcg3-tc -C ISO-8859-1 -g dancg.cg3b 2>&1 | grep user | grep system');
+		$time = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | /usr/bin/time ./vislcg3-tc -g dancg.cg3b 2>&1 | grep user | grep system');
 		$times['apply'][$i]['microtime'] = microtime(true) - $start;
 		$times['apply'][$i]['time'] = trim($time);
 	}
 
 	echo "Parsing via valgrind...\n";
-	$ticks = shell_exec('valgrind ./vislcg3 -C ISO-8859-1 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep "total heap usage"');
+	$ticks = shell_exec('valgrind ./vislcg3 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep "total heap usage"');
 	$times['parse']['memory'] = trim($ticks);
 
 	echo "Applying via valgrind...\n";
-	$ticks = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | u2i | valgrind ./vislcg3 -C ISO-8859-1 -g dancg.cg3b 2>&1 | grep "total heap usage"');
+	$ticks = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | valgrind ./vislcg3 -g dancg.cg3b 2>&1 | grep "total heap usage"');
 	$times['apply']['memory'] = trim($ticks);
 
 	echo "Parsing via callgrind...\n";
-	$ticks = shell_exec('valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3 -C ISO-8859-1 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep Collected');
+	$ticks = shell_exec('valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep Collected');
 	$times['parse']['ticks'] = trim($ticks);
 
 	echo "Applying via callgrind...\n";
-	$ticks = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | u2i | valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3 -C ISO-8859-1 -g dancg.cg3b 2>&1 | grep Collected');
+	$ticks = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3 -g dancg.cg3b 2>&1 | grep Collected');
 	$times['apply']['ticks'] = trim($ticks);
 
 	file_put_contents('/tmp/cg3-times-'.$rev.'.txt', var_export($times, true));
@@ -73,8 +73,8 @@ function profile_revision($rev) {
 	shell_exec('rm -rf '.$dir.' 2>&1 >/dev/null');
 }
 
-$revs = array(10373, 10297, 10034, 10016, 9645, 9274, 9249, 8923, 8001, 7397, 7134, 7000, 6987, 6898, 6885, 6781, 6692, 6500, 6328, 6268, 6242, 6170, 5932, 5930, 5926, 5918, 5839, 5810, 5773, 5729, 5431, 5129, 5042, 4879, 4779, 4545, 4513, 4493, 4474, 4410, 4292, 4031, 3991, 3896, 3852, 3800, 3689, 3682, 3617);
-$revs = array(10791);
+$revs = array(10809, 10800, 10373, 10044);
+$revs = array(10811);
 foreach ($revs as $rev) {
 	profile_revision($rev);
 }
diff --git a/src/GrammarApplicator.hpp b/src/GrammarApplicator.hpp
index 519a7de..e07fd2c 100644
--- a/src/GrammarApplicator.hpp
+++ b/src/GrammarApplicator.hpp
@@ -187,8 +187,9 @@ namespace CG3 {
 		bool did_final_enclosure;
 
 		std::vector<regexgrps_t> regexgrps_store;
-		std::pair<size_t, regexgrps_t*>* regexgrps;
-		bc::flat_map<uint32_t, std::pair<size_t,regexgrps_t*> > regexgrps_r;
+		std::pair<uint8_t, regexgrps_t*> regexgrps;
+		bc::flat_map<uint32_t, uint8_t> regexgrps_z;
+		bc::flat_map<uint32_t, regexgrps_t*> regexgrps_c;
 		uint32_t same_basic;
 		Cohort *target;
 		Cohort *mark;
diff --git a/src/GrammarApplicator_matchSet.cpp b/src/GrammarApplicator_matchSet.cpp
index 32947ef..0ef0572 100644
--- a/src/GrammarApplicator_matchSet.cpp
+++ b/src/GrammarApplicator_matchSet.cpp
@@ -96,11 +96,11 @@ uint32_t GrammarApplicator::doesTagMatchRegexp(uint32_t test, const Tag& tag, bo
 				for (int i = 1; i <= gc; ++i) {
 					tmp[0] = 0;
 					int32_t len = uregex_group(tag.regexp, i, tmp, 1024, &status);
-					regexgrps->second->resize(std::max(regexgrps->first+1, regexgrps->second->size()));
-					UnicodeString& ucstr = (*regexgrps->second)[regexgrps->first];
+					regexgrps.second->resize(std::max(static_cast<size_t>(regexgrps.first)+1, regexgrps.second->size()));
+					UnicodeString& ucstr = (*regexgrps.second)[regexgrps.first];
 					ucstr.remove();
 					ucstr.append(tmp, len);
-					++regexgrps->first;
+					++regexgrps.first;
 				}
 			}
 			else {
@@ -587,7 +587,8 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
 		}
 		// Subsequent times, test whether any of the previously stored sets match the reading
 		else {
-			uint32SortedVector sets;
+			static uint32SortedVector sets;
+			sets.clear();
 			foreach(uint32SortedVector, *unif_sets, usi, usi_end) {
 				if (doesSetMatchReading(reading, *usi, bypass_index, unif_mode)) {
 					sets.insert(*usi);
@@ -721,8 +722,11 @@ inline bool GrammarApplicator::doesSetMatchCohort_testLinked(Cohort& cohort, con
 
 inline bool GrammarApplicator::doesSetMatchCohort_helper(Cohort& cohort, const Reading& reading, const Set& theset, dSMC_Context *context) {
 	bool retval = false;
-	unif_tags_t utags;
-	uint32SortedVector usets;
+	static unif_tags_t utags;
+	utags.clear();
+	static uint32SortedVector usets;
+	usets.clear();
+
 	if (context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY)) {
 		utags = *unif_tags;
 		usets = *unif_sets;
diff --git a/src/GrammarApplicator_reflow.cpp b/src/GrammarApplicator_reflow.cpp
index 5e39d71..2f21d32 100644
--- a/src/GrammarApplicator_reflow.cpp
+++ b/src/GrammarApplicator_reflow.cpp
@@ -38,12 +38,12 @@ Tag *GrammarApplicator::makeBaseFromWord(Tag *tag) {
 	if (len < 5) {
 		return tag;
 	}
-	UChar *n = new UChar[len-1];
+	static UString n;
+	n.clear();
+	n.resize(len-2);
 	n[0] = n[len-3] = '"';
-	n[len-2] = 0;
-	u_strncpy(n+1, tag->tag.c_str()+2, len-4);
+	u_strncpy(&n[1], tag->tag.c_str()+2, len-4);
 	Tag *nt = addTag(n);
-	delete[] n;
 	return nt;
 }
 
@@ -57,8 +57,8 @@ bool GrammarApplicator::isChildOf(const Cohort *child, const Cohort *parent) {
 		retval = true;
 	}
 	else {
-		int i = 0;
-		for (const Cohort *inner = child ; i<1000;i++) {
+		size_t i = 0;
+		for (const Cohort *inner = child ; i<1000 ; ++i) {
 			if (inner->dep_parent == 0 || inner->dep_parent == std::numeric_limits<uint32_t>::max()) {
 				retval = false;
 				break;
@@ -104,8 +104,8 @@ bool GrammarApplicator::wouldParentChildLoop(const Cohort *parent, const Cohort
 		retval = true;
 	}
 	else {
-		int i = 0;
-		for (const Cohort *inner = parent ;i<1000;i++) {
+		size_t i = 0;
+		for (const Cohort *inner = parent ; i<1000 ; ++i) {
 			if (inner->dep_parent == 0 || inner->dep_parent == std::numeric_limits<uint32_t>::max()) {
 				retval = false;
 				break;
@@ -311,7 +311,8 @@ void GrammarApplicator::reflowRelationWindow(uint32_t max) {
 
 		if (!cohort->relations_input.empty()) {
 			for (RelationCtn::iterator rel = cohort->relations_input.begin() ; rel != cohort->relations_input.end() ; ) {
-				uint32SortedVector newrel;
+				static uint32SortedVector newrel;
+				newrel.clear();
 
 				boost_foreach (uint32_t target, rel->second) {
 					uint32FlatHashMap::iterator it = gWindow->relation_map.find(target);
@@ -358,14 +359,17 @@ void GrammarApplicator::reflowReading(Reading& reading) {
 }
 
 Tag *GrammarApplicator::generateVarstringTag(const Tag *tag) {
-	UnicodeString tmp(tag->tag.c_str(), tag->tag.length());
+	static UnicodeString tmp;
+	tmp.remove();
+	tmp.append(tag->tag.c_str(), tag->tag.length());
 	bool did_something = false;
 
 	// Replace unified sets with their matching tags
 	if (tag->vs_sets) {
 		for (size_t i=0 ; i<tag->vs_sets->size() ; ++i) {
 			TagList tags = getTagList(*(*tag->vs_sets)[i]);
-			UString rpl;
+			static UString rpl;
+			rpl.clear();
 			// If there are multiple tags, such as from CompositeTags, put _ between them
 			const_foreach (TagList, tags, iter, iter_end) {
 				rpl += (*iter)->tag;
@@ -379,8 +383,8 @@ Tag *GrammarApplicator::generateVarstringTag(const Tag *tag) {
 	}
 
 	// Replace $1-$9 with their respective match groups
-	for (size_t i = 0; i<regexgrps->first && i<9; ++i) {
-		tmp.findAndReplace(stringbits[S_VS1+i], (*regexgrps->second)[i]);
+	for (size_t i = 0; i<regexgrps.first && i<9; ++i) {
+		tmp.findAndReplace(stringbits[S_VS1+i], (*regexgrps.second)[i]);
 		did_something = true;
 	}
 
@@ -624,7 +628,8 @@ void GrammarApplicator::splitAllMappings(all_mappings_t& all_mappings, Cohort& c
 	if (all_mappings.empty()) {
 		return;
 	}
-	ReadingList readings = cohort.readings;
+	static ReadingList readings;
+	readings = cohort.readings;
 	boost_foreach (Reading *reading, readings) {
 		BOOST_AUTO(iter, all_mappings.find(reading));
 		if (iter == all_mappings.end()) {
@@ -644,10 +649,13 @@ void GrammarApplicator::splitAllMappings(all_mappings_t& all_mappings, Cohort& c
 }
 
 void GrammarApplicator::mergeReadings(ReadingList& readings) {
-	bc::flat_map<uint32_t, std::pair<uint32_t,Reading*> > mapped;
+	static bc::flat_map<uint32_t, std::pair<uint32_t,Reading*> > mapped;
+	mapped.clear();
 	mapped.reserve(readings.size());
-	bc::flat_map<uint32_t, ReadingList> mlist;
+	static bc::flat_map<uint32_t, ReadingList> mlist;
+	mlist.clear();
 	mlist.reserve(readings.size());
+
 	foreach (ReadingList, readings, iter, iter_end) {
 		Reading *r = *iter;
 		uint32_t hp = r->hash_plain, hplain = r->hash_plain;
@@ -691,15 +699,16 @@ void GrammarApplicator::mergeReadings(ReadingList& readings) {
 	}
 
 	readings.clear();
-	std::vector<Reading*> order;
+	static std::vector<Reading*> order;
+	order.clear();
 
 	for (BOOST_AUTO(miter, mlist.begin()) ; miter != mlist.end() ; miter++) {
-		ReadingList clist = miter->second;
+		const ReadingList& clist = miter->second;
 		Reading *nr = alloc_reading(*(clist.front()));
 		if (nr->mapping) {
 			erase(nr->tags_list, nr->mapping->hash);
 		}
-		foreach (ReadingList, clist, iter1, iter1_end) {
+		const_foreach (ReadingList, clist, iter1, iter1_end) {
 			if ((*iter1)->mapping && std::find(nr->tags_list.begin(), nr->tags_list.end(), (*iter1)->mapping->hash) == nr->tags_list.end()) {
 				nr->tags_list.push_back((*iter1)->mapping->hash);
 			}
diff --git a/src/GrammarApplicator_runContextualTest.cpp b/src/GrammarApplicator_runContextualTest.cpp
index dd7850f..c437b3f 100644
--- a/src/GrammarApplicator_runContextualTest.cpp
+++ b/src/GrammarApplicator_runContextualTest.cpp
@@ -31,7 +31,7 @@
 namespace CG3 {
 
 Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *test, uint8_t& rvs, bool *retval, Cohort **deep, Cohort *origin) {
-	size_t regexgrpz = regexgrps->first;
+	uint8_t regexgrpz = regexgrps.first;
 	if (test->pos & POS_MARK_SET) {
 		mark = cohort;
 	}
@@ -92,7 +92,7 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
 		rvs |= TRV_BREAK;
 	}
 	if (!*retval) {
-		regexgrps->first = regexgrpz;
+		regexgrps.first = regexgrpz;
 	}
 	return cohort;
 }
@@ -516,7 +516,7 @@ Cohort *GrammarApplicator::runDependencyTest(SingleWindow *sWindow, Cohort *curr
 		}
 	}
 
-	boost::scoped_ptr<uint32SortedVector> tmp_deps;
+	static uint32SortedVector tmp_deps;
 	uint32SortedVector *deps = 0;
 	if (test->pos & POS_DEP_CHILD) {
 		deps = &current->dep_children;
@@ -545,23 +545,23 @@ Cohort *GrammarApplicator::runDependencyTest(SingleWindow *sWindow, Cohort *curr
 	}
 
 	if (test->pos & MASK_POS_LORR) {
-		tmp_deps.reset(new uint32SortedVector(*deps));
+		tmp_deps = *deps;
 
 		if (test->pos & POS_LEFT) {
-			tmp_deps->assign((*deps).begin(), (*deps).lower_bound(current->global_number));
+			tmp_deps.assign(deps->begin(), deps->lower_bound(current->global_number));
 		}
 		if (test->pos & POS_RIGHT) {
-			tmp_deps->assign((*deps).lower_bound(current->global_number), (*deps).end());
+			tmp_deps.assign(deps->lower_bound(current->global_number), deps->end());
 		}
 		if (test->pos & POS_SELF) {
-			tmp_deps->insert(current->global_number);
+			tmp_deps.insert(current->global_number);
 		}
-		if ((test->pos & POS_RIGHTMOST) && !tmp_deps->empty()) {
-			uint32SortedVector::container& cont = tmp_deps->get();
+		if ((test->pos & POS_RIGHTMOST) && !tmp_deps.empty()) {
+			uint32SortedVector::container& cont = tmp_deps.get();
 			std::reverse(cont.begin(), cont.end());
 		}
 
-		deps = tmp_deps.get();
+		deps = &tmp_deps;
 	}
 
 	const_foreach (uint32SortedVector, *deps, dter, dter_end) {
@@ -655,7 +655,8 @@ Cohort *GrammarApplicator::runRelationTest(SingleWindow *sWindow, Cohort *curren
 		return 0;
 	}
 
-	CohortSet rels;
+	static CohortSet rels;
+	rels.clear();
 
 	if (test->relation == grammar->tag_any) {
 		const_foreach (RelationCtn, current->relations, riter, riter_end) {
@@ -680,12 +681,12 @@ Cohort *GrammarApplicator::runRelationTest(SingleWindow *sWindow, Cohort *curren
 	}
 
 	if (test->pos & POS_LEFT) {
-		CohortSet tmp;
+		static CohortSet tmp;
 		tmp.assign(rels.begin(), rels.lower_bound(current));
 		rels.swap(tmp);
 	}
 	if (test->pos & POS_RIGHT) {
-		CohortSet tmp;
+		static CohortSet tmp;
 		tmp.assign(rels.lower_bound(current), rels.end());
 		rels.swap(tmp);
 	}
diff --git a/src/GrammarApplicator_runRules.cpp b/src/GrammarApplicator_runRules.cpp
index 9bbd0c9..bbe2306 100644
--- a/src/GrammarApplicator_runRules.cpp
+++ b/src/GrammarApplicator_runRules.cpp
@@ -402,9 +402,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 				subs_any.clear();
 			}
 			// Varstring capture groups exist on a per-cohort basis, since we may need them for mapping later.
-			if (!regexgrps_r.empty()) {
-				regexgrps_r.clear();
-			}
+			regexgrps_z.clear();
+			regexgrps_c.clear();
 			if (!unif_tags_rs.empty()) {
 				unif_tags_rs.clear();
 			}
@@ -414,6 +413,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 
 			size_t used_regex = 0;
 			regexgrps_store.resize(std::max(regexgrps_store.size(), cohort->readings.size()));
+			regexgrps_z.reserve(std::max(regexgrps_z.size(), cohort->readings.size()));
+			regexgrps_c.reserve(std::max(regexgrps_c.size(), cohort->readings.size()));
 
 			size_t used_unif = 0;
 			unif_tags_store.resize(std::max(unif_tags_store.size(), cohort->readings.size()));
@@ -450,16 +451,19 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 						if (reading->matched_tests) {
 							++num_active;
 						}
-						regexgrps_r[reading->number] = regexgrps_r[rpit->second->number];
+						if (regexgrps_c.count(rpit->second->number)) {
+							regexgrps_c[reading->number];
+							regexgrps_c[reading->number] = regexgrps_c[rpit->second->number];
+							regexgrps_z[reading->number];
+							regexgrps_z[reading->number] = regexgrps_z[rpit->second->number];
+						}
 						continue;
 					}
 				}
 
 				// Regex capture is done on a per-reading basis, so clear all captured state.
-				regexgrps = &regexgrps_r[reading->number];
-				regexgrps->first = 0;
-				regexgrps->second = &regexgrps_store[used_regex];
-				++used_regex;
+				regexgrps.first = 0;
+				regexgrps.second = &regexgrps_store[used_regex];
 
 				// Unification is done on a per-reading basis, so clear all unification state.
 				unif_tags = &unif_tags_store[used_unif];
@@ -484,10 +488,10 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 				same_basic = reading->hash_plain;
 				target = 0;
 				mark = cohort;
-				size_t orz = regexgrps->first;
+				uint8_t orz = regexgrps.first;
 				// Actually check if the reading is a valid target. First check if rule target matches...
 				if (rule.target && doesSetMatchReading(*reading, rule.target, (set.type & (ST_CHILD_UNIFY|ST_SPECIAL)) != 0)) {
-					if (orz != regexgrps->first) {
+					if (orz != regexgrps.first) {
 						did_test = false;
 					}
 					target = cohort;
@@ -539,12 +543,12 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 						++rule.num_match;
 					}
 					else {
-						regexgrps->first = orz;
+						regexgrps.first = orz;
 					}
 					++num_iff;
 				}
 				else {
-					regexgrps->first = orz;
+					regexgrps.first = orz;
 					++rule.num_fail;
 				}
 				readings_plain.insert(std::make_pair(reading->hash_plain,reading));
@@ -553,6 +557,11 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 					cohort->readings[i]->matched_target = reading->matched_target;
 					cohort->readings[i]->matched_tests = reading->matched_tests;
 				}
+				if (regexgrps.first) {
+					regexgrps_c[reading->number] = regexgrps.second;
+					regexgrps_z[reading->number] = regexgrps.first;
+					++used_regex;
+				}
 			}
 
 			// If none of the readings were valid targets, remove this cohort from the rule's possible cohorts.
@@ -597,9 +606,11 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 				bool good = reading.matched_tests;
 				const uint32_t state_hash = reading.hash;
 
-				regexgrps = 0;
-				if (regexgrps_r.count(reading.number)) {
-					regexgrps = &regexgrps_r[reading.number];
+				regexgrps.first = 0;
+				regexgrps.second = 0;
+				if (regexgrps_c.count(reading.number)) {
+					regexgrps.second = regexgrps_c[reading.number];
+					regexgrps.first = regexgrps_z[reading.number];
 				}
 
 				// Iff needs extra special care; if it is a Remove type and we matched the target, go ahead.
@@ -957,6 +968,9 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 								const Tag* tt = *it;
 								it = theTags.erase(it);
 								if (tt->type & T_SPECIAL) {
+									if (regexgrps.second == 0) {
+										regexgrps.second = &regexgrps_store[used_regex];
+									}
 									uint32_t stag = doesTagMatchReading(reading, *tt, false, true);
 									if (stag) {
 										theTags.insert(it, single_tags.find(stag)->second);
diff --git a/src/version.hpp b/src/version.hpp
index 5194255..77d07b1 100644
--- a/src/version.hpp
+++ b/src/version.hpp
@@ -30,7 +30,7 @@ const char* const CG3_COPYRIGHT_STRING = "Copyright (C) 2007-2015 GrammarSoft Ap
 const uint32_t CG3_VERSION_MAJOR = 0;
 const uint32_t CG3_VERSION_MINOR = 9;
 const uint32_t CG3_VERSION_PATCH = 9;
-const uint32_t CG3_REVISION = 10815;
+const uint32_t CG3_REVISION = 10822;
 const uint32_t CG3_FEATURE_REV = 10575;
 const uint32_t CG3_TOO_OLD = 10373;
 const uint32_t CG3_EXTERNAL_PROTOCOL = 7226;
diff --git a/test/T_BasicSubstitute/expected.txt b/test/T_BasicSubstitute/expected.txt
index ac54dff..391c5d0 100644
--- a/test/T_BasicSubstitute/expected.txt
+++ b/test/T_BasicSubstitute/expected.txt
@@ -1,4 +1,5 @@
 "<worded>"
 	"word" notwanted @add-good @add-mixcase-1 @add-mixcase-2 @add-regex ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13
-	"word" before 1 1 4 2 3 3 substituted 3 3 after @add-good @add-mixcase-1 @add-mixcase-2 @add-regex SUBSTITUTE:4 SUBSTITUTE:5 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13
+	"word-sub" before 1 1 4 2 3 3 substituted 3 3 after @add-good @add-mixcase-1 @add-mixcase-2 @add-regex @map-word SUBSTITUTE:4 SUBSTITUTE:5 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13 SUBSTITUTE:15 MAP:16
 	"word" @add-good @add-mixcase-1 @add-mixcase-2 @add-regex SUBSTITUTE:6 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13
+
diff --git a/test/T_BasicSubstitute/grammar.cg3 b/test/T_BasicSubstitute/grammar.cg3
index ebe7f9f..ac735d6 100644
--- a/test/T_BasicSubstitute/grammar.cg3
+++ b/test/T_BasicSubstitute/grammar.cg3
@@ -11,3 +11,6 @@ SUBSTITUTE ("<.*>"r) (VSTR:"<$1ed>") ("<(.+)>"r) ;
 "<word>" ADD (@add-bad) (*) ;
 "<WoRdeD>"i ADD (@add-mixcase-2) (*) ;
 "<word.*>"r ADD (@add-regex) (*) ;
+
+SUBSTITUTE ("(.*)"r) ("$1-sub"v) (after) ;
+MAP (VSTR:@map-$1) ("word-sub") (0 ("(word)-sub"r)) ;
diff --git a/test/T_BasicSubstitute/grammar.cg3b.10043 b/test/T_BasicSubstitute/grammar.cg3b.10043
deleted file mode 100644
index ae64209..0000000
Binary files a/test/T_BasicSubstitute/grammar.cg3b.10043 and /dev/null differ
diff --git a/vapply.sh b/vapply.sh
index 7b57547..9761324 100755
--- a/vapply.sh
+++ b/vapply.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
-# 98 or 11
-CXXV=11
+# 98 or 11 or 1y
+CXXV=1y
 #svn up
 make -j5
 ./src/vislcg3 -C UTF-8 -g ~/parsers/dansk/etc/dancg --grammar-only --grammar-bin dancg.cg3b
diff --git a/vparse.sh b/vparse.sh
index f5700bc..00e0058 100755
--- a/vparse.sh
+++ b/vparse.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
-# 98 or 11
-CXXV=11
+# 98 or 11 or 1y
+CXXV=1y
 #svn up
 mkdir -p vparse
 cd vparse

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/cg3.git



More information about the debian-science-commits mailing list