[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc
ossy at webkit.org
ossy at webkit.org
Wed Dec 22 15:58:12 UTC 2010
The following commit has been merged in the debian/experimental branch:
commit 44ae23292c301f28cff5b967d0ae97c931651bde
Author: ossy at webkit.org <ossy at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Wed Nov 17 11:03:40 2010 +0000
Extend YARR Interpreter with beginning character look-up optimization
https://bugs.webkit.org/show_bug.cgi?id=45751
Patch by Peter Varga <pvarga at inf.u-szeged.hu> on 2010-11-17
Reviewed by Gavin Barraclough.
Add beginning character look-up optimization which sets the start
index to the first possible successful pattern match.
Extend YARR Interpreter with lookupForBeginChars function which
implements the beginning character look-up optimization.
* yarr/RegexInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readPair):
(JSC::Yarr::Interpreter::InputStream::isNotAvailableInput):
(JSC::Yarr::Interpreter::lookupForBeginChars):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::interpret):
* yarr/RegexInterpreter.h:
(JSC::Yarr::BytecodePattern::BytecodePattern):
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@72186 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index cb6c07c..1be329a 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,24 @@
+2010-11-17 Peter Varga <pvarga at inf.u-szeged.hu>
+
+ Reviewed by Gavin Barraclough.
+
+ Extend YARR Interpreter with beginning character look-up optimization
+ https://bugs.webkit.org/show_bug.cgi?id=45751
+
+ Add beginning character look-up optimization which sets the start
+ index to the first possible successful pattern match.
+ Extend YARR Interpreter with lookupForBeginChars function which
+ implements the beginning character look-up optimization.
+
+ * yarr/RegexInterpreter.cpp:
+ (JSC::Yarr::Interpreter::InputStream::readPair):
+ (JSC::Yarr::Interpreter::InputStream::isNotAvailableInput):
+ (JSC::Yarr::Interpreter::lookupForBeginChars):
+ (JSC::Yarr::Interpreter::matchDisjunction):
+ (JSC::Yarr::Interpreter::interpret):
+ * yarr/RegexInterpreter.h:
+ (JSC::Yarr::BytecodePattern::BytecodePattern):
+
2010-11-17 Alexis Menard <alexis.menard at nokia.com>, Simon Hausmann <simon.hausmann at nokia.com>
Reviewed by Kenneth Christiansen, Tor Arne Vestbø.
diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp
index 80440d9..dc3024a 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.cpp
+++ b/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -196,6 +196,12 @@ public:
return -1;
}
+ int readPair()
+ {
+ ASSERT(pos + 1 < length);
+ return input[pos] | input[pos + 1] << 16;
+ }
+
int readChecked(int position)
{
ASSERT(position < 0);
@@ -263,6 +269,11 @@ public:
return (pos + position) == length;
}
+ bool isNotAvailableInput(int position)
+ {
+ return (pos + position) > length;
+ }
+
private:
const UChar* input;
unsigned pos;
@@ -993,10 +1004,39 @@ public:
return JSRegExpErrorNoMatch;
}
+ void lookupForBeginChars()
+ {
+ int character;
+ bool firstSingleCharFound;
+
+ while (true) {
+ if (input.isNotAvailableInput(2))
+ return;
+
+ firstSingleCharFound = false;
+
+ character = input.readPair();
+
+ for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
+ BeginChar bc = pattern->m_beginChars[i];
+
+ if (!firstSingleCharFound && bc.value <= 0xFFFF) {
+ firstSingleCharFound = true;
+ character &= 0xFFFF;
+ }
+
+ if ((character | bc.mask) == bc.value)
+ return;
+ }
+
+ input.next();
+ }
+ }
+
#define MATCH_NEXT() { ++context->term; goto matchAgain; }
#define BACKTRACK() { --context->term; goto backtrack; }
#define currentTerm() (disjunction->terms[context->term])
- JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+ JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
{
if (!--remainingMatchCount)
return JSRegExpErrorHitLimit;
@@ -1004,6 +1044,9 @@ public:
if (btrack)
BACKTRACK();
+ if (pattern->m_containsBeginChars && isBody)
+ lookupForBeginChars();
+
context->matchBegin = input.getPos();
context->term = 0;
@@ -1168,6 +1211,10 @@ public:
return JSRegExpNoMatch;
input.next();
+
+ if (pattern->m_containsBeginChars && isBody)
+ lookupForBeginChars();
+
context->matchBegin = input.getPos();
if (currentTerm().alternative.onceThrough)
@@ -1284,7 +1331,7 @@ public:
DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
- JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context);
+ JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
if (result == JSRegExpMatch) {
output[0] = context->matchBegin;
output[1] = context->matchEnd;
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
index fe775a4..dae8f9d 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.h
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -324,6 +324,7 @@ struct BytecodePattern : FastAllocBase {
: m_body(body)
, m_ignoreCase(pattern.m_ignoreCase)
, m_multiline(pattern.m_multiline)
+ , m_containsBeginChars(pattern.m_containsBeginChars)
, m_allocator(allocator)
{
newlineCharacterClass = pattern.newlineCharacterClass();
@@ -335,6 +336,8 @@ struct BytecodePattern : FastAllocBase {
// array, so that it won't delete them on destruction. We'll
// take responsibility for that.
pattern.m_userCharacterClasses.clear();
+
+ m_beginChars.append(pattern.m_beginChars);
}
~BytecodePattern()
@@ -346,12 +349,16 @@ struct BytecodePattern : FastAllocBase {
OwnPtr<ByteDisjunction> m_body;
bool m_ignoreCase;
bool m_multiline;
+ bool m_containsBeginChars;
// Each BytecodePattern is associated with a RegExp, each RegExp is associated
// with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regexAllocator.
BumpPointerAllocator* m_allocator;
CharacterClass* newlineCharacterClass;
CharacterClass* wordcharCharacterClass;
+
+ Vector<BeginChar> m_beginChars;
+
private:
Vector<ByteDisjunction*> m_allParenthesesInfo;
Vector<CharacterClass*> m_userCharacterClasses;
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list