[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc

ossy at webkit.org ossy at webkit.org
Wed Dec 22 15:58:12 UTC 2010


The following commit has been merged in the debian/experimental branch:
commit 44ae23292c301f28cff5b967d0ae97c931651bde
Author: ossy at webkit.org <ossy at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Wed Nov 17 11:03:40 2010 +0000

    Extend YARR Interpreter with beginning character look-up optimization
    https://bugs.webkit.org/show_bug.cgi?id=45751
    
    Patch by Peter Varga <pvarga at inf.u-szeged.hu> on 2010-11-17
    Reviewed by Gavin Barraclough.
    
    Add beginning character look-up optimization which sets the start
    index to the first possible successful pattern match.
    Extend YARR Interpreter with lookupForBeginChars function which
    implements the beginning character look-up optimization.
    
    * yarr/RegexInterpreter.cpp:
    (JSC::Yarr::Interpreter::InputStream::readPair):
    (JSC::Yarr::Interpreter::InputStream::isNotAvailableInput):
    (JSC::Yarr::Interpreter::lookupForBeginChars):
    (JSC::Yarr::Interpreter::matchDisjunction):
    (JSC::Yarr::Interpreter::interpret):
    * yarr/RegexInterpreter.h:
    (JSC::Yarr::BytecodePattern::BytecodePattern):
    
    
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@72186 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index cb6c07c..1be329a 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,24 @@
+2010-11-17  Peter Varga  <pvarga at inf.u-szeged.hu>
+
+        Reviewed by Gavin Barraclough.
+
+        Extend YARR Interpreter with beginning character look-up optimization
+        https://bugs.webkit.org/show_bug.cgi?id=45751
+
+        Add beginning character look-up optimization which sets the start
+        index to the first possible successful pattern match.
+        Extend YARR Interpreter with lookupForBeginChars function which
+        implements the beginning character look-up optimization.
+
+        * yarr/RegexInterpreter.cpp:
+        (JSC::Yarr::Interpreter::InputStream::readPair):
+        (JSC::Yarr::Interpreter::InputStream::isNotAvailableInput):
+        (JSC::Yarr::Interpreter::lookupForBeginChars):
+        (JSC::Yarr::Interpreter::matchDisjunction):
+        (JSC::Yarr::Interpreter::interpret):
+        * yarr/RegexInterpreter.h:
+        (JSC::Yarr::BytecodePattern::BytecodePattern):
+
 2010-11-17  Alexis Menard  <alexis.menard at nokia.com>, Simon Hausmann  <simon.hausmann at nokia.com>
 
         Reviewed by Kenneth Christiansen, Tor Arne Vestbø.
diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp
index 80440d9..dc3024a 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.cpp
+++ b/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -196,6 +196,12 @@ public:
             return -1;
         }
 
+        int readPair()
+        {
+            ASSERT(pos + 1 < length);
+            return input[pos] | input[pos + 1] << 16;
+        }
+
         int readChecked(int position)
         {
             ASSERT(position < 0);
@@ -263,6 +269,11 @@ public:
             return (pos + position) == length;
         }
 
+        bool isNotAvailableInput(int position)
+        {
+            return (pos + position) > length;
+        }
+
     private:
         const UChar* input;
         unsigned pos;
@@ -993,10 +1004,39 @@ public:
         return JSRegExpErrorNoMatch;
     }
 
+    void lookupForBeginChars()
+    {
+        int character;
+        bool firstSingleCharFound;
+
+        while (true) {
+            if (input.isNotAvailableInput(2))
+                return;
+
+            firstSingleCharFound = false;
+
+            character = input.readPair();
+
+            for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
+                BeginChar bc = pattern->m_beginChars[i];
+
+                if (!firstSingleCharFound && bc.value <= 0xFFFF) {
+                    firstSingleCharFound = true;
+                    character &= 0xFFFF;
+                }
+
+                if ((character | bc.mask) == bc.value)
+                    return;
+            }
+
+            input.next();
+        }
+    }
+
 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
 #define BACKTRACK() { --context->term; goto backtrack; }
 #define currentTerm() (disjunction->terms[context->term])
-    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
     {
         if (!--remainingMatchCount)
             return JSRegExpErrorHitLimit;
@@ -1004,6 +1044,9 @@ public:
         if (btrack)
             BACKTRACK();
 
+        if (pattern->m_containsBeginChars && isBody)
+            lookupForBeginChars();
+
         context->matchBegin = input.getPos();
         context->term = 0;
 
@@ -1168,6 +1211,10 @@ public:
                 return JSRegExpNoMatch;
 
             input.next();
+
+            if (pattern->m_containsBeginChars && isBody)
+                lookupForBeginChars();
+
             context->matchBegin = input.getPos();
 
             if (currentTerm().alternative.onceThrough)
@@ -1284,7 +1331,7 @@ public:
 
         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
 
-        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context);
+        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
         if (result == JSRegExpMatch) {
             output[0] = context->matchBegin;
             output[1] = context->matchEnd;
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
index fe775a4..dae8f9d 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.h
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -324,6 +324,7 @@ struct BytecodePattern : FastAllocBase {
         : m_body(body)
         , m_ignoreCase(pattern.m_ignoreCase)
         , m_multiline(pattern.m_multiline)
+        , m_containsBeginChars(pattern.m_containsBeginChars)
         , m_allocator(allocator)
     {
         newlineCharacterClass = pattern.newlineCharacterClass();
@@ -335,6 +336,8 @@ struct BytecodePattern : FastAllocBase {
         // array, so that it won't delete them on destruction.  We'll
         // take responsibility for that.
         pattern.m_userCharacterClasses.clear();
+
+        m_beginChars.append(pattern.m_beginChars);
     }
 
     ~BytecodePattern()
@@ -346,12 +349,16 @@ struct BytecodePattern : FastAllocBase {
     OwnPtr<ByteDisjunction> m_body;
     bool m_ignoreCase;
     bool m_multiline;
+    bool m_containsBeginChars;
     // Each BytecodePattern is associated with a RegExp, each RegExp is associated
     // with a JSGlobalData.  Cache a pointer to out JSGlobalData's m_regexAllocator.
     BumpPointerAllocator* m_allocator;
 
     CharacterClass* newlineCharacterClass;
     CharacterClass* wordcharCharacterClass;
+
+    Vector<BeginChar> m_beginChars;
+
 private:
     Vector<ByteDisjunction*> m_allParenthesesInfo;
     Vector<CharacterClass*> m_userCharacterClasses;

-- 
WebKit Debian packaging



More information about the Pkg-webkit-commits mailing list