[cg3] 01/03: Imported Upstream version 0.9.9~r10822
Tino Didriksen
tinodidriksen-guest at moszumanska.debian.org
Tue Jun 30 12:34:26 UTC 2015
This is an automated email from the git hooks/post-receive script.
tinodidriksen-guest pushed a commit to branch master
in repository cg3.
commit cf226454d875f4b5d8c257482f0e7337ebe88805
Author: Tino Didriksen <mail at tinodidriksen.com>
Date: Tue Jun 30 12:34:07 2015 +0000
Imported Upstream version 0.9.9~r10822
---
CMakeLists.txt | 9 ++++--
ChangeLog | 34 ++++++++++----------
scripts/profile-revisions.php | 22 ++++++-------
src/GrammarApplicator.hpp | 5 +--
src/GrammarApplicator_matchSet.cpp | 16 ++++++----
src/GrammarApplicator_reflow.cpp | 47 +++++++++++++++++-----------
src/GrammarApplicator_runContextualTest.cpp | 27 ++++++++--------
src/GrammarApplicator_runRules.cpp | 44 +++++++++++++++++---------
src/version.hpp | 2 +-
test/T_BasicSubstitute/expected.txt | 3 +-
test/T_BasicSubstitute/grammar.cg3 | 3 ++
test/T_BasicSubstitute/grammar.cg3b.10043 | Bin 2155 -> 0 bytes
vapply.sh | 4 +--
vparse.sh | 4 +--
14 files changed, 128 insertions(+), 92 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b53588d..e077e6e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,7 +38,7 @@ if(MSVC)
set(CMAKE_C_FLAGS ${CMAKE_CXX_FLAGS})
set(CMAKE_C_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
else()
- set(_FLAGS_COMMON "-Wall -Wextra -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -fPIC")
+ set(_FLAGS_COMMON "-Wall -Wextra -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -Wno-unused-result -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_FLAGS_COMMON} -fvisibility-inlines-hidden")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
@@ -46,8 +46,11 @@ else()
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O3")
- # Enable C++11 if possible
- if((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.6) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.1))
+ # Enable C++14 or C++11 if possible
+ if((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.9) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.3))
+ message(STATUS "Enabling C++14 for ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y")
+ elseif((CMAKE_COMPILER_IS_GNUCXX AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.6) OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND NOT ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.1))
message(STATUS "Enabling C++11 for ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
endif()
diff --git a/ChangeLog b/ChangeLog
index 84f27a7..7e69422 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2015-06-30 tino
+
+ * [r10821] CMakeLists.txt, vapply.sh, vparse.sh: Use C++14 where
+ available
+ * [r10819] ChangeLog, scripts/profile-revisions.php,
+ src/GrammarApplicator_matchSet.cpp,
+ src/GrammarApplicator_reflow.cpp,
+ src/GrammarApplicator_runContextualTest.cpp,
+ src/GrammarApplicator_runRules.cpp, src/version.hpp: Fix
+ segfault; Further reduce reallocations
+
+2015-06-28 tino
+
+ * [r10817] src/GrammarApplicator_runRules.cpp: Fix segfault
+ * [r10815] ChangeLog, src/GrammarApplicator_runRules.cpp,
+ src/version.hpp: Fix removing enclosure owner
+
2015-06-26 tino
* [r10811] src/ApertiumApplicator.cpp, src/FSTApplicator.cpp,
@@ -2299,20 +2316,3 @@
src/GrammarApplicator_runGrammar.cpp, src/version.h: Fixed
sub-readings breaking everything when they weren't real readings.
-2013-03-13 unhammer
-
- * [r8875] emacs/cg.el: safer way to get x clipboard contents
-
-2013-03-12 tino
-
- * [r8874] get-boost.sh, manual/compatibility.xml,
- manual/dependencies.xml,
- src/GrammarApplicator_runContextualTest.cpp, src/Reading.cpp,
- test/T_BasicDependency/expected.txt,
- test/T_BasicDependency/grammar.cg3: Document cc vs c*
-
-2013-01-24 tino
-
- * [r8813] dist/Portfile: MacPorts
- * [r8812] dist/Portfile: MacPorts
-
diff --git a/scripts/profile-revisions.php b/scripts/profile-revisions.php
index 089b400..a426918 100755
--- a/scripts/profile-revisions.php
+++ b/scripts/profile-revisions.php
@@ -9,13 +9,13 @@ function profile_revision($rev) {
echo "Exporting revision $rev...\n";
shell_exec('svn export -r '.$rev.' --ignore-externals svn+ssh://beta.visl.sdu.dk/usr/local/svn/repos/visl/tools/vislcg3/trunk '.$dir.' >/dev/null 2>&1');
chdir($dir);
- shell_exec('svn export -r 3617 --ignore-externals svn+ssh://beta.visl.sdu.dk/usr/local/svn/repos/visl/trunk/parsers/dansk/etc/dancg dancg >/dev/null 2>&1');
+ shell_exec('svn export -r 10017 --ignore-externals svn+ssh://beta.visl.sdu.dk/usr/local/svn/repos/visl/trunk/parsers/dansk/etc/dancg.cg dancg >/dev/null 2>&1');
echo "Compiling...\n";
if (file_exists('./src/all_vislcg3.cpp')) {
echo "Using all_vislcg3.cpp and Boost...\n";
- echo shell_exec('g++ -std=c++11 -DHAVE_BOOST -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -fPIC -O3 -Iinclude -Iinclude/posix ./src/all_vislcg3.cpp -o vislcg3 -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc 2>&1');
- echo shell_exec('g++ -std=c++11 -DHAVE_BOOST -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -fPIC -O3 -Iinclude -Iinclude/posix ./src/all_vislcg3.cpp -o vislcg3-tc -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc -ltcmalloc 2>&1');
+ echo shell_exec('g++ -std=c++11 -DHAVE_BOOST -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -fPIC -O3 -Iinclude -Iinclude/exec-stream -Iinclude/posix ./src/all_vislcg3.cpp -o vislcg3 -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc 2>&1');
+ echo shell_exec('g++ -std=c++11 -DHAVE_BOOST -DNDEBUG -pthread -pipe -Wall -Wextra -Wno-deprecated -fPIC -O3 -Iinclude -Iinclude/exec-stream -Iinclude/posix ./src/all_vislcg3.cpp -o vislcg3-tc -L/usr/lib/x86_64-linux-gnu -licui18n -licudata -licuio -licuuc -ltcmalloc 2>&1');
}
else {
echo "Using old-style without Boost...\n";
@@ -34,7 +34,7 @@ function profile_revision($rev) {
for ($i=0 ; $i<3 ; $i++) {
echo "Parsing...\n";
$start = microtime(true);
- $time = shell_exec('/usr/bin/time ./vislcg3-tc -C ISO-8859-1 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep user | grep system');
+ $time = shell_exec('/usr/bin/time ./vislcg3-tc -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep user | grep system');
$times['parse'][$i]['microtime'] = microtime(true) - $start;
$times['parse'][$i]['time'] = trim($time);
@@ -45,25 +45,25 @@ function profile_revision($rev) {
echo "Applying...\n";
$start = microtime(true);
- $time = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | u2i | /usr/bin/time ./vislcg3-tc -C ISO-8859-1 -g dancg.cg3b 2>&1 | grep user | grep system');
+ $time = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | /usr/bin/time ./vislcg3-tc -g dancg.cg3b 2>&1 | grep user | grep system');
$times['apply'][$i]['microtime'] = microtime(true) - $start;
$times['apply'][$i]['time'] = trim($time);
}
echo "Parsing via valgrind...\n";
- $ticks = shell_exec('valgrind ./vislcg3 -C ISO-8859-1 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep "total heap usage"');
+ $ticks = shell_exec('valgrind ./vislcg3 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep "total heap usage"');
$times['parse']['memory'] = trim($ticks);
echo "Applying via valgrind...\n";
- $ticks = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | u2i | valgrind ./vislcg3 -C ISO-8859-1 -g dancg.cg3b 2>&1 | grep "total heap usage"');
+ $ticks = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | valgrind ./vislcg3 -g dancg.cg3b 2>&1 | grep "total heap usage"');
$times['apply']['memory'] = trim($ticks);
echo "Parsing via callgrind...\n";
- $ticks = shell_exec('valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3 -C ISO-8859-1 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep Collected');
+ $ticks = shell_exec('valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3 -g dancg --grammar-only --grammar-bin dancg.cg3b 2>&1 | grep Collected');
$times['parse']['ticks'] = trim($ticks);
echo "Applying via callgrind...\n";
- $ticks = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | u2i | valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3 -C ISO-8859-1 -g dancg.cg3b 2>&1 | grep Collected');
+ $ticks = shell_exec('head -n 2000 /home/tino/vislcg3/trunk/comparison/arboretum_stripped.txt | valgrind --tool=callgrind --compress-strings=no --compress-pos=no --collect-jumps=yes --collect-systime=yes ./vislcg3 -g dancg.cg3b 2>&1 | grep Collected');
$times['apply']['ticks'] = trim($ticks);
file_put_contents('/tmp/cg3-times-'.$rev.'.txt', var_export($times, true));
@@ -73,8 +73,8 @@ function profile_revision($rev) {
shell_exec('rm -rf '.$dir.' 2>&1 >/dev/null');
}
-$revs = array(10373, 10297, 10034, 10016, 9645, 9274, 9249, 8923, 8001, 7397, 7134, 7000, 6987, 6898, 6885, 6781, 6692, 6500, 6328, 6268, 6242, 6170, 5932, 5930, 5926, 5918, 5839, 5810, 5773, 5729, 5431, 5129, 5042, 4879, 4779, 4545, 4513, 4493, 4474, 4410, 4292, 4031, 3991, 3896, 3852, 3800, 3689, 3682, 3617);
-$revs = array(10791);
+$revs = array(10809, 10800, 10373, 10044);
+$revs = array(10811);
foreach ($revs as $rev) {
profile_revision($rev);
}
diff --git a/src/GrammarApplicator.hpp b/src/GrammarApplicator.hpp
index 519a7de..e07fd2c 100644
--- a/src/GrammarApplicator.hpp
+++ b/src/GrammarApplicator.hpp
@@ -187,8 +187,9 @@ namespace CG3 {
bool did_final_enclosure;
std::vector<regexgrps_t> regexgrps_store;
- std::pair<size_t, regexgrps_t*>* regexgrps;
- bc::flat_map<uint32_t, std::pair<size_t,regexgrps_t*> > regexgrps_r;
+ std::pair<uint8_t, regexgrps_t*> regexgrps;
+ bc::flat_map<uint32_t, uint8_t> regexgrps_z;
+ bc::flat_map<uint32_t, regexgrps_t*> regexgrps_c;
uint32_t same_basic;
Cohort *target;
Cohort *mark;
diff --git a/src/GrammarApplicator_matchSet.cpp b/src/GrammarApplicator_matchSet.cpp
index 32947ef..0ef0572 100644
--- a/src/GrammarApplicator_matchSet.cpp
+++ b/src/GrammarApplicator_matchSet.cpp
@@ -96,11 +96,11 @@ uint32_t GrammarApplicator::doesTagMatchRegexp(uint32_t test, const Tag& tag, bo
for (int i = 1; i <= gc; ++i) {
tmp[0] = 0;
int32_t len = uregex_group(tag.regexp, i, tmp, 1024, &status);
- regexgrps->second->resize(std::max(regexgrps->first+1, regexgrps->second->size()));
- UnicodeString& ucstr = (*regexgrps->second)[regexgrps->first];
+ regexgrps.second->resize(std::max(static_cast<size_t>(regexgrps.first)+1, regexgrps.second->size()));
+ UnicodeString& ucstr = (*regexgrps.second)[regexgrps.first];
ucstr.remove();
ucstr.append(tmp, len);
- ++regexgrps->first;
+ ++regexgrps.first;
}
}
else {
@@ -587,7 +587,8 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
}
// Subsequent times, test whether any of the previously stored sets match the reading
else {
- uint32SortedVector sets;
+ static uint32SortedVector sets;
+ sets.clear();
foreach(uint32SortedVector, *unif_sets, usi, usi_end) {
if (doesSetMatchReading(reading, *usi, bypass_index, unif_mode)) {
sets.insert(*usi);
@@ -721,8 +722,11 @@ inline bool GrammarApplicator::doesSetMatchCohort_testLinked(Cohort& cohort, con
inline bool GrammarApplicator::doesSetMatchCohort_helper(Cohort& cohort, const Reading& reading, const Set& theset, dSMC_Context *context) {
bool retval = false;
- unif_tags_t utags;
- uint32SortedVector usets;
+ static unif_tags_t utags;
+ utags.clear();
+ static uint32SortedVector usets;
+ usets.clear();
+
if (context && !(current_rule->flags & FL_CAPTURE_UNIF) && (theset.type & ST_CHILD_UNIFY)) {
utags = *unif_tags;
usets = *unif_sets;
diff --git a/src/GrammarApplicator_reflow.cpp b/src/GrammarApplicator_reflow.cpp
index 5e39d71..2f21d32 100644
--- a/src/GrammarApplicator_reflow.cpp
+++ b/src/GrammarApplicator_reflow.cpp
@@ -38,12 +38,12 @@ Tag *GrammarApplicator::makeBaseFromWord(Tag *tag) {
if (len < 5) {
return tag;
}
- UChar *n = new UChar[len-1];
+ static UString n;
+ n.clear();
+ n.resize(len-2);
n[0] = n[len-3] = '"';
- n[len-2] = 0;
- u_strncpy(n+1, tag->tag.c_str()+2, len-4);
+ u_strncpy(&n[1], tag->tag.c_str()+2, len-4);
Tag *nt = addTag(n);
- delete[] n;
return nt;
}
@@ -57,8 +57,8 @@ bool GrammarApplicator::isChildOf(const Cohort *child, const Cohort *parent) {
retval = true;
}
else {
- int i = 0;
- for (const Cohort *inner = child ; i<1000;i++) {
+ size_t i = 0;
+ for (const Cohort *inner = child ; i<1000 ; ++i) {
if (inner->dep_parent == 0 || inner->dep_parent == std::numeric_limits<uint32_t>::max()) {
retval = false;
break;
@@ -104,8 +104,8 @@ bool GrammarApplicator::wouldParentChildLoop(const Cohort *parent, const Cohort
retval = true;
}
else {
- int i = 0;
- for (const Cohort *inner = parent ;i<1000;i++) {
+ size_t i = 0;
+ for (const Cohort *inner = parent ; i<1000 ; ++i) {
if (inner->dep_parent == 0 || inner->dep_parent == std::numeric_limits<uint32_t>::max()) {
retval = false;
break;
@@ -311,7 +311,8 @@ void GrammarApplicator::reflowRelationWindow(uint32_t max) {
if (!cohort->relations_input.empty()) {
for (RelationCtn::iterator rel = cohort->relations_input.begin() ; rel != cohort->relations_input.end() ; ) {
- uint32SortedVector newrel;
+ static uint32SortedVector newrel;
+ newrel.clear();
boost_foreach (uint32_t target, rel->second) {
uint32FlatHashMap::iterator it = gWindow->relation_map.find(target);
@@ -358,14 +359,17 @@ void GrammarApplicator::reflowReading(Reading& reading) {
}
Tag *GrammarApplicator::generateVarstringTag(const Tag *tag) {
- UnicodeString tmp(tag->tag.c_str(), tag->tag.length());
+ static UnicodeString tmp;
+ tmp.remove();
+ tmp.append(tag->tag.c_str(), tag->tag.length());
bool did_something = false;
// Replace unified sets with their matching tags
if (tag->vs_sets) {
for (size_t i=0 ; i<tag->vs_sets->size() ; ++i) {
TagList tags = getTagList(*(*tag->vs_sets)[i]);
- UString rpl;
+ static UString rpl;
+ rpl.clear();
// If there are multiple tags, such as from CompositeTags, put _ between them
const_foreach (TagList, tags, iter, iter_end) {
rpl += (*iter)->tag;
@@ -379,8 +383,8 @@ Tag *GrammarApplicator::generateVarstringTag(const Tag *tag) {
}
// Replace $1-$9 with their respective match groups
- for (size_t i = 0; i<regexgrps->first && i<9; ++i) {
- tmp.findAndReplace(stringbits[S_VS1+i], (*regexgrps->second)[i]);
+ for (size_t i = 0; i<regexgrps.first && i<9; ++i) {
+ tmp.findAndReplace(stringbits[S_VS1+i], (*regexgrps.second)[i]);
did_something = true;
}
@@ -624,7 +628,8 @@ void GrammarApplicator::splitAllMappings(all_mappings_t& all_mappings, Cohort& c
if (all_mappings.empty()) {
return;
}
- ReadingList readings = cohort.readings;
+ static ReadingList readings;
+ readings = cohort.readings;
boost_foreach (Reading *reading, readings) {
BOOST_AUTO(iter, all_mappings.find(reading));
if (iter == all_mappings.end()) {
@@ -644,10 +649,13 @@ void GrammarApplicator::splitAllMappings(all_mappings_t& all_mappings, Cohort& c
}
void GrammarApplicator::mergeReadings(ReadingList& readings) {
- bc::flat_map<uint32_t, std::pair<uint32_t,Reading*> > mapped;
+ static bc::flat_map<uint32_t, std::pair<uint32_t,Reading*> > mapped;
+ mapped.clear();
mapped.reserve(readings.size());
- bc::flat_map<uint32_t, ReadingList> mlist;
+ static bc::flat_map<uint32_t, ReadingList> mlist;
+ mlist.clear();
mlist.reserve(readings.size());
+
foreach (ReadingList, readings, iter, iter_end) {
Reading *r = *iter;
uint32_t hp = r->hash_plain, hplain = r->hash_plain;
@@ -691,15 +699,16 @@ void GrammarApplicator::mergeReadings(ReadingList& readings) {
}
readings.clear();
- std::vector<Reading*> order;
+ static std::vector<Reading*> order;
+ order.clear();
for (BOOST_AUTO(miter, mlist.begin()) ; miter != mlist.end() ; miter++) {
- ReadingList clist = miter->second;
+ const ReadingList& clist = miter->second;
Reading *nr = alloc_reading(*(clist.front()));
if (nr->mapping) {
erase(nr->tags_list, nr->mapping->hash);
}
- foreach (ReadingList, clist, iter1, iter1_end) {
+ const_foreach (ReadingList, clist, iter1, iter1_end) {
if ((*iter1)->mapping && std::find(nr->tags_list.begin(), nr->tags_list.end(), (*iter1)->mapping->hash) == nr->tags_list.end()) {
nr->tags_list.push_back((*iter1)->mapping->hash);
}
diff --git a/src/GrammarApplicator_runContextualTest.cpp b/src/GrammarApplicator_runContextualTest.cpp
index dd7850f..c437b3f 100644
--- a/src/GrammarApplicator_runContextualTest.cpp
+++ b/src/GrammarApplicator_runContextualTest.cpp
@@ -31,7 +31,7 @@
namespace CG3 {
Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *test, uint8_t& rvs, bool *retval, Cohort **deep, Cohort *origin) {
- size_t regexgrpz = regexgrps->first;
+ uint8_t regexgrpz = regexgrps.first;
if (test->pos & POS_MARK_SET) {
mark = cohort;
}
@@ -92,7 +92,7 @@ Cohort *GrammarApplicator::runSingleTest(Cohort *cohort, const ContextualTest *t
rvs |= TRV_BREAK;
}
if (!*retval) {
- regexgrps->first = regexgrpz;
+ regexgrps.first = regexgrpz;
}
return cohort;
}
@@ -516,7 +516,7 @@ Cohort *GrammarApplicator::runDependencyTest(SingleWindow *sWindow, Cohort *curr
}
}
- boost::scoped_ptr<uint32SortedVector> tmp_deps;
+ static uint32SortedVector tmp_deps;
uint32SortedVector *deps = 0;
if (test->pos & POS_DEP_CHILD) {
deps = ¤t->dep_children;
@@ -545,23 +545,23 @@ Cohort *GrammarApplicator::runDependencyTest(SingleWindow *sWindow, Cohort *curr
}
if (test->pos & MASK_POS_LORR) {
- tmp_deps.reset(new uint32SortedVector(*deps));
+ tmp_deps = *deps;
if (test->pos & POS_LEFT) {
- tmp_deps->assign((*deps).begin(), (*deps).lower_bound(current->global_number));
+ tmp_deps.assign(deps->begin(), deps->lower_bound(current->global_number));
}
if (test->pos & POS_RIGHT) {
- tmp_deps->assign((*deps).lower_bound(current->global_number), (*deps).end());
+ tmp_deps.assign(deps->lower_bound(current->global_number), deps->end());
}
if (test->pos & POS_SELF) {
- tmp_deps->insert(current->global_number);
+ tmp_deps.insert(current->global_number);
}
- if ((test->pos & POS_RIGHTMOST) && !tmp_deps->empty()) {
- uint32SortedVector::container& cont = tmp_deps->get();
+ if ((test->pos & POS_RIGHTMOST) && !tmp_deps.empty()) {
+ uint32SortedVector::container& cont = tmp_deps.get();
std::reverse(cont.begin(), cont.end());
}
- deps = tmp_deps.get();
+ deps = &tmp_deps;
}
const_foreach (uint32SortedVector, *deps, dter, dter_end) {
@@ -655,7 +655,8 @@ Cohort *GrammarApplicator::runRelationTest(SingleWindow *sWindow, Cohort *curren
return 0;
}
- CohortSet rels;
+ static CohortSet rels;
+ rels.clear();
if (test->relation == grammar->tag_any) {
const_foreach (RelationCtn, current->relations, riter, riter_end) {
@@ -680,12 +681,12 @@ Cohort *GrammarApplicator::runRelationTest(SingleWindow *sWindow, Cohort *curren
}
if (test->pos & POS_LEFT) {
- CohortSet tmp;
+ static CohortSet tmp;
tmp.assign(rels.begin(), rels.lower_bound(current));
rels.swap(tmp);
}
if (test->pos & POS_RIGHT) {
- CohortSet tmp;
+ static CohortSet tmp;
tmp.assign(rels.lower_bound(current), rels.end());
rels.swap(tmp);
}
diff --git a/src/GrammarApplicator_runRules.cpp b/src/GrammarApplicator_runRules.cpp
index 9bbd0c9..bbe2306 100644
--- a/src/GrammarApplicator_runRules.cpp
+++ b/src/GrammarApplicator_runRules.cpp
@@ -402,9 +402,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
subs_any.clear();
}
// Varstring capture groups exist on a per-cohort basis, since we may need them for mapping later.
- if (!regexgrps_r.empty()) {
- regexgrps_r.clear();
- }
+ regexgrps_z.clear();
+ regexgrps_c.clear();
if (!unif_tags_rs.empty()) {
unif_tags_rs.clear();
}
@@ -414,6 +413,8 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
size_t used_regex = 0;
regexgrps_store.resize(std::max(regexgrps_store.size(), cohort->readings.size()));
+ regexgrps_z.reserve(std::max(regexgrps_z.size(), cohort->readings.size()));
+ regexgrps_c.reserve(std::max(regexgrps_c.size(), cohort->readings.size()));
size_t used_unif = 0;
unif_tags_store.resize(std::max(unif_tags_store.size(), cohort->readings.size()));
@@ -450,16 +451,19 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
if (reading->matched_tests) {
++num_active;
}
- regexgrps_r[reading->number] = regexgrps_r[rpit->second->number];
+ if (regexgrps_c.count(rpit->second->number)) {
+ regexgrps_c[reading->number];
+ regexgrps_c[reading->number] = regexgrps_c[rpit->second->number];
+ regexgrps_z[reading->number];
+ regexgrps_z[reading->number] = regexgrps_z[rpit->second->number];
+ }
continue;
}
}
// Regex capture is done on a per-reading basis, so clear all captured state.
- regexgrps = ®exgrps_r[reading->number];
- regexgrps->first = 0;
- regexgrps->second = ®exgrps_store[used_regex];
- ++used_regex;
+ regexgrps.first = 0;
+ regexgrps.second = ®exgrps_store[used_regex];
// Unification is done on a per-reading basis, so clear all unification state.
unif_tags = &unif_tags_store[used_unif];
@@ -484,10 +488,10 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
same_basic = reading->hash_plain;
target = 0;
mark = cohort;
- size_t orz = regexgrps->first;
+ uint8_t orz = regexgrps.first;
// Actually check if the reading is a valid target. First check if rule target matches...
if (rule.target && doesSetMatchReading(*reading, rule.target, (set.type & (ST_CHILD_UNIFY|ST_SPECIAL)) != 0)) {
- if (orz != regexgrps->first) {
+ if (orz != regexgrps.first) {
did_test = false;
}
target = cohort;
@@ -539,12 +543,12 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
++rule.num_match;
}
else {
- regexgrps->first = orz;
+ regexgrps.first = orz;
}
++num_iff;
}
else {
- regexgrps->first = orz;
+ regexgrps.first = orz;
++rule.num_fail;
}
readings_plain.insert(std::make_pair(reading->hash_plain,reading));
@@ -553,6 +557,11 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
cohort->readings[i]->matched_target = reading->matched_target;
cohort->readings[i]->matched_tests = reading->matched_tests;
}
+ if (regexgrps.first) {
+ regexgrps_c[reading->number] = regexgrps.second;
+ regexgrps_z[reading->number] = regexgrps.first;
+ ++used_regex;
+ }
}
// If none of the readings were valid targets, remove this cohort from the rule's possible cohorts.
@@ -597,9 +606,11 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
bool good = reading.matched_tests;
const uint32_t state_hash = reading.hash;
- regexgrps = 0;
- if (regexgrps_r.count(reading.number)) {
- regexgrps = ®exgrps_r[reading.number];
+ regexgrps.first = 0;
+ regexgrps.second = 0;
+ if (regexgrps_c.count(reading.number)) {
+ regexgrps.second = regexgrps_c[reading.number];
+ regexgrps.first = regexgrps_z[reading.number];
}
// Iff needs extra special care; if it is a Remove type and we matched the target, go ahead.
@@ -957,6 +968,9 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
const Tag* tt = *it;
it = theTags.erase(it);
if (tt->type & T_SPECIAL) {
+ if (regexgrps.second == 0) {
+ regexgrps.second = ®exgrps_store[used_regex];
+ }
uint32_t stag = doesTagMatchReading(reading, *tt, false, true);
if (stag) {
theTags.insert(it, single_tags.find(stag)->second);
diff --git a/src/version.hpp b/src/version.hpp
index 5194255..77d07b1 100644
--- a/src/version.hpp
+++ b/src/version.hpp
@@ -30,7 +30,7 @@ const char* const CG3_COPYRIGHT_STRING = "Copyright (C) 2007-2015 GrammarSoft Ap
const uint32_t CG3_VERSION_MAJOR = 0;
const uint32_t CG3_VERSION_MINOR = 9;
const uint32_t CG3_VERSION_PATCH = 9;
-const uint32_t CG3_REVISION = 10815;
+const uint32_t CG3_REVISION = 10822;
const uint32_t CG3_FEATURE_REV = 10575;
const uint32_t CG3_TOO_OLD = 10373;
const uint32_t CG3_EXTERNAL_PROTOCOL = 7226;
diff --git a/test/T_BasicSubstitute/expected.txt b/test/T_BasicSubstitute/expected.txt
index ac54dff..391c5d0 100644
--- a/test/T_BasicSubstitute/expected.txt
+++ b/test/T_BasicSubstitute/expected.txt
@@ -1,4 +1,5 @@
"<worded>"
"word" notwanted @add-good @add-mixcase-1 @add-mixcase-2 @add-regex ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13
- "word" before 1 1 4 2 3 3 substituted 3 3 after @add-good @add-mixcase-1 @add-mixcase-2 @add-regex SUBSTITUTE:4 SUBSTITUTE:5 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13
+ "word-sub" before 1 1 4 2 3 3 substituted 3 3 after @add-good @add-mixcase-1 @add-mixcase-2 @add-regex @map-word SUBSTITUTE:4 SUBSTITUTE:5 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13 SUBSTITUTE:15 MAP:16
"word" @add-good @add-mixcase-1 @add-mixcase-2 @add-regex SUBSTITUTE:6 ADD:8 ADD:9 SUBSTITUTE:10 ADD:12 ADD:13
+
diff --git a/test/T_BasicSubstitute/grammar.cg3 b/test/T_BasicSubstitute/grammar.cg3
index ebe7f9f..ac735d6 100644
--- a/test/T_BasicSubstitute/grammar.cg3
+++ b/test/T_BasicSubstitute/grammar.cg3
@@ -11,3 +11,6 @@ SUBSTITUTE ("<.*>"r) (VSTR:"<$1ed>") ("<(.+)>"r) ;
"<word>" ADD (@add-bad) (*) ;
"<WoRdeD>"i ADD (@add-mixcase-2) (*) ;
"<word.*>"r ADD (@add-regex) (*) ;
+
+SUBSTITUTE ("(.*)"r) ("$1-sub"v) (after) ;
+MAP (VSTR:@map-$1) ("word-sub") (0 ("(word)-sub"r)) ;
diff --git a/test/T_BasicSubstitute/grammar.cg3b.10043 b/test/T_BasicSubstitute/grammar.cg3b.10043
deleted file mode 100644
index ae64209..0000000
Binary files a/test/T_BasicSubstitute/grammar.cg3b.10043 and /dev/null differ
diff --git a/vapply.sh b/vapply.sh
index 7b57547..9761324 100755
--- a/vapply.sh
+++ b/vapply.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# 98 or 11
-CXXV=11
+# 98 or 11 or 1y
+CXXV=1y
#svn up
make -j5
./src/vislcg3 -C UTF-8 -g ~/parsers/dansk/etc/dancg --grammar-only --grammar-bin dancg.cg3b
diff --git a/vparse.sh b/vparse.sh
index f5700bc..00e0058 100755
--- a/vparse.sh
+++ b/vparse.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# 98 or 11
-CXXV=11
+# 98 or 11 or 1y
+CXXV=1y
#svn up
mkdir -p vparse
cd vparse
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/cg3.git
More information about the debian-science-commits
mailing list