[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc

commit-queue at webkit.org commit-queue at webkit.org
Wed Dec 22 15:56:12 UTC 2010


The following commit has been merged in the debian/experimental branch:
commit 50457a8f040404a3cd39e7198983033ef2b7a86a
Author: commit-queue at webkit.org <commit-queue at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Tue Nov 16 23:27:13 2010 +0000

    2010-11-16  Peter Varga  <pvarga at inf.u-szeged.hu>
    
            Reviewed by Gavin Barraclough.
    
            The number of recursive match calls isn't limited in YARR Interpreter
            https://bugs.webkit.org/show_bug.cgi?id=47906
    
            Check the number of the matchDisjunction recursive calls to avoid unbounded
            recursion.
            Now the matchDisjunction function returns JSRegExpResult instead of bool.
            The JSRegExpResult enum contains the result of matching or the error code
            of the failure (like HitLimit) which terminates the matching.
            The error codes are based on pcre's jsRegExpExecute error codes.
    
            * yarr/RegexInterpreter.cpp:
            (JSC::Yarr::Interpreter::parenthesesDoBacktrack):
            (JSC::Yarr::Interpreter::matchParentheses):
            (JSC::Yarr::Interpreter::backtrackParentheses):
            (JSC::Yarr::Interpreter::matchDisjunction):
            (JSC::Yarr::Interpreter::matchNonZeroDisjunction):
            (JSC::Yarr::Interpreter::interpret):
            (JSC::Yarr::Interpreter::Interpreter):
            * yarr/RegexInterpreter.h:
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@72140 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index 3f2f37e..f7aadfd 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,27 @@
+2010-11-16  Peter Varga  <pvarga at inf.u-szeged.hu>
+
+        Reviewed by Gavin Barraclough.
+
+        The number of recursive match calls isn't limited in YARR Interpreter
+        https://bugs.webkit.org/show_bug.cgi?id=47906
+
+        Check the number of the matchDisjunction recursive calls to avoid unbounded
+        recursion.
+        Now the matchDisjunction function returns JSRegExpResult instead of bool.
+        The JSRegExpResult enum contains the result of matching or the error code
+        of the failure (like HitLimit) which terminates the matching.
+        The error codes are based on pcre's jsRegExpExecute error codes.
+
+        * yarr/RegexInterpreter.cpp:
+        (JSC::Yarr::Interpreter::parenthesesDoBacktrack):
+        (JSC::Yarr::Interpreter::matchParentheses):
+        (JSC::Yarr::Interpreter::backtrackParentheses):
+        (JSC::Yarr::Interpreter::matchDisjunction):
+        (JSC::Yarr::Interpreter::matchNonZeroDisjunction):
+        (JSC::Yarr::Interpreter::interpret):
+        (JSC::Yarr::Interpreter::Interpreter):
+        * yarr/RegexInterpreter.h:
+
 2010-11-16  Brian Weinstein  <bweinstein at apple.com>
 
         Rest of the Windows build fix.
diff --git a/JavaScriptCore/yarr/RegexInterpreter.cpp b/JavaScriptCore/yarr/RegexInterpreter.cpp
index ec96636..80440d9 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.cpp
+++ b/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga at inf.u-szeged.hu), University of Szeged
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -591,20 +592,24 @@ public:
         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
         context->restoreOutput(output, firstSubpatternId, count);
     }
-    bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
+    JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
     {
         while (backTrack->matchAmount) {
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
 
-            if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true))
-                return true;
+            JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
+            if (result == JSRegExpMatch)
+                return JSRegExpMatch;
 
             resetMatches(term, context);
             popParenthesesDisjunctionContext(backTrack);
             freeParenthesesDisjunctionContext(context);
+
+            if (result != JSRegExpNoMatch)
+                return result;
         }
 
-        return false;
+        return JSRegExpNoMatch;
     }
 
     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
@@ -765,7 +770,7 @@ public:
         return false;
     }
 
-    bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
+    JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 
@@ -786,31 +791,42 @@ public:
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 // Try to do a match, and it it succeeds, add it to the list.
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     // The match failed; try to find an alternate point to carry on from.
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
-                    if (!parenthesesDoBacktrack(term, backTrack))
-                        return false;
+
+                    if (result == JSRegExpNoMatch) {
+                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+                        if (backtrackResult != JSRegExpMatch)
+                            return backtrackResult;
+                    } else
+                        return result;
                 }
             }
 
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
             recordParenthesesMatch(term, context);
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierGreedy: {
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
+
+                    if (result != JSRegExpNoMatch)
+                        return result;
+
                     break;
                 }
             }
@@ -819,15 +835,15 @@ public:
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 recordParenthesesMatch(term, context);
             }
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierNonGreedy:
-            return true;
+            return JSRegExpMatch;
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
     }
 
     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
@@ -840,7 +856,7 @@ public:
     // Non-greedy, we've already done the one less case, so don't match on popping.
     // We haven't done the one more case, so always try to add that.
     //
-    bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
+    JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
     {
         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
 
@@ -859,44 +875,58 @@ public:
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
 
             ParenthesesDisjunctionContext* context = 0;
+            JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
 
-            if (!parenthesesDoBacktrack(term, backTrack))
-                return false;
+            if (result != JSRegExpMatch)
+                return result;
 
             // While we haven't yet reached our fixed limit,
             while (backTrack->matchAmount < term.atom.quantityCount) {
                 // Try to do a match, and it it succeeds, add it to the list.
                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+
+                if (result == JSRegExpMatch)
                     appendParenthesesDisjunctionContext(backTrack, context);
                 else {
                     // The match failed; try to find an alternate point to carry on from.
                     resetMatches(term, context);
                     freeParenthesesDisjunctionContext(context);
-                    if (!parenthesesDoBacktrack(term, backTrack))
-                        return false;
+
+                    if (result == JSRegExpNoMatch) {
+                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
+                        if (backtrackResult != JSRegExpMatch)
+                            return backtrackResult;
+                    } else
+                        return result;
                 }
             }
 
             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
             context = backTrack->lastContext;
             recordParenthesesMatch(term, context);
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierGreedy: {
             if (!backTrack->matchAmount)
-                return false;
+                return JSRegExpNoMatch;
 
             ParenthesesDisjunctionContext* context = backTrack->lastContext;
-            if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+            JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
+            if (result == JSRegExpMatch) {
                 while (backTrack->matchAmount < term.atom.quantityCount) {
                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                    if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+                    JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                    if (parenthesesResult == JSRegExpMatch)
                         appendParenthesesDisjunctionContext(backTrack, context);
                     else {
                         resetMatches(term, context);
                         freeParenthesesDisjunctionContext(context);
+
+                        if (parenthesesResult != JSRegExpNoMatch)
+                            return parenthesesResult;
+
                         break;
                     }
                 }
@@ -904,60 +934,73 @@ public:
                 resetMatches(term, context);
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
+
+                if (result != JSRegExpNoMatch)
+                    return result;
             }
 
             if (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
                 recordParenthesesMatch(term, context);
             }
-            return true;
+            return JSRegExpMatch;
         }
 
         case QuantifierNonGreedy: {
             // If we've not reached the limit, try to add one more match.
             if (backTrack->matchAmount < term.atom.quantityCount) {
                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) {
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
+                if (result == JSRegExpMatch) {
                     appendParenthesesDisjunctionContext(backTrack, context);
                     recordParenthesesMatch(term, context);
-                    return true;
-                } else {
-                    resetMatches(term, context);
-                    freeParenthesesDisjunctionContext(context);
+                    return JSRegExpMatch;
                 }
+
+                resetMatches(term, context);
+                freeParenthesesDisjunctionContext(context);
+
+                if (result != JSRegExpNoMatch)
+                    return result;
             }
 
             // Nope - okay backtrack looking for an alternative.
             while (backTrack->matchAmount) {
                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
-                if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
+                if (result == JSRegExpMatch) {
                     // successful backtrack! we're back in the game!
                     if (backTrack->matchAmount) {
                         context = backTrack->lastContext;
                         recordParenthesesMatch(term, context);
                     }
-                    return true;
+                    return JSRegExpMatch;
                 }
 
                 // pop a match off the stack
                 resetMatches(term, context);
                 popParenthesesDisjunctionContext(backTrack);
                 freeParenthesesDisjunctionContext(context);
+
+                return result;
             }
 
-            return false;
+            return JSRegExpNoMatch;
         }
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
     }
 
 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
 #define BACKTRACK() { --context->term; goto backtrack; }
 #define currentTerm() (disjunction->terms[context->term])
-    bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     {
+        if (!--remainingMatchCount)
+            return JSRegExpErrorHitLimit;
+
         if (btrack)
             BACKTRACK();
 
@@ -972,14 +1015,14 @@ public:
             MATCH_NEXT();
         case ByteTerm::TypeSubpatternEnd:
             context->matchEnd = input.getPos();
-            return true;
+            return JSRegExpMatch;
 
         case ByteTerm::TypeBodyAlternativeBegin:
             MATCH_NEXT();
         case ByteTerm::TypeBodyAlternativeDisjunction:
         case ByteTerm::TypeBodyAlternativeEnd:
             context->matchEnd = input.getPos();
-            return true;
+            return JSRegExpMatch;
 
         case ByteTerm::TypeAlternativeBegin:
             MATCH_NEXT();
@@ -1069,10 +1112,16 @@ public:
             if (matchBackReference(currentTerm(), context))
                 MATCH_NEXT();
             BACKTRACK();
-        case ByteTerm::TypeParenthesesSubpattern:
-            if (matchParentheses(currentTerm(), context))
+        case ByteTerm::TypeParenthesesSubpattern: {
+            JSRegExpResult result = matchParentheses(currentTerm(), context);
+
+            if (result == JSRegExpMatch) {
                 MATCH_NEXT();
+            }  else if (result != JSRegExpNoMatch)
+                return result;
+
             BACKTRACK();
+        }
         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
             if (matchParenthesesOnceBegin(currentTerm(), context))
                 MATCH_NEXT();
@@ -1104,7 +1153,7 @@ public:
 
         switch (currentTerm().type) {
         case ByteTerm::TypeSubpatternBegin:
-            return false;
+            return JSRegExpNoMatch;
         case ByteTerm::TypeSubpatternEnd:
             ASSERT_NOT_REACHED();
 
@@ -1116,7 +1165,7 @@ public:
                 MATCH_NEXT();
 
             if (input.atEnd())
-                return false;
+                return JSRegExpNoMatch;
 
             input.next();
             context->matchBegin = input.getPos();
@@ -1172,10 +1221,16 @@ public:
                 if (backtrackBackReference(currentTerm(), context))
                     MATCH_NEXT();
                 BACKTRACK();
-            case ByteTerm::TypeParenthesesSubpattern:
-                if (backtrackParentheses(currentTerm(), context))
+            case ByteTerm::TypeParenthesesSubpattern: {
+                JSRegExpResult result = backtrackParentheses(currentTerm(), context);
+
+                if (result == JSRegExpMatch) {
                     MATCH_NEXT();
+                } else if (result != JSRegExpNoMatch)
+                    return result;
+
                 BACKTRACK();
+            }
             case ByteTerm::TypeParenthesesSubpatternOnceBegin:
                 if (backtrackParenthesesOnceBegin(currentTerm(), context))
                     MATCH_NEXT();
@@ -1199,20 +1254,23 @@ public:
         }
 
         ASSERT_NOT_REACHED();
-        return false;
+        return JSRegExpErrorNoMatch;
     }
 
-    bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+    JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
     {
-        if (matchDisjunction(disjunction, context, btrack)) {
+        JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
+
+        if (result == JSRegExpMatch) {
             while (context->matchBegin == context->matchEnd) {
-                if (!matchDisjunction(disjunction, context, true))
-                    return false;
+                result = matchDisjunction(disjunction, context, true);
+                if (result != JSRegExpMatch)
+                    return result;
             }
-            return true;
+            return JSRegExpMatch;
         }
 
-        return false;
+        return result;
     }
 
     int interpret()
@@ -1226,7 +1284,8 @@ public:
 
         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
 
-        if (matchDisjunction(pattern->m_body.get(), context)) {
+        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context);
+        if (result == JSRegExpMatch) {
             output[0] = context->matchBegin;
             output[1] = context->matchEnd;
         }
@@ -1235,6 +1294,9 @@ public:
 
         pattern->m_allocator->stopAllocator();
 
+        if (output[0] == -1 && result != JSRegExpNoMatch)
+            return result;
+
         return output[0];
     }
 
@@ -1243,6 +1305,7 @@ public:
         , output(output)
         , input(inputChar, start, length)
         , allocatorPool(0)
+        , remainingMatchCount(matchLimit)
     {
     }
 
@@ -1251,6 +1314,7 @@ private:
     int* output;
     InputStream input;
     BumpPointerPool* allocatorPool;
+    unsigned remainingMatchCount;
 };
 
 
diff --git a/JavaScriptCore/yarr/RegexInterpreter.h b/JavaScriptCore/yarr/RegexInterpreter.h
index 37f17fe..fe775a4 100644
--- a/JavaScriptCore/yarr/RegexInterpreter.h
+++ b/JavaScriptCore/yarr/RegexInterpreter.h
@@ -40,6 +40,21 @@ using WTF::BumpPointerAllocator;
 
 namespace JSC { namespace Yarr {
 
+// TODO move the matchLimit constant and the JSRegExpResult enum to the JSRegExp.h when pcre is removed.
+
+// The below limit restricts the number of "recursive" match calls in order to
+// avoid spending exponential time on complex regular expressions.
+static const unsigned matchLimit = 1000000;
+
+enum JSRegExpResult {
+    JSRegExpMatch = 1,
+    JSRegExpNoMatch = 0,
+    JSRegExpErrorNoMatch = -1,
+    JSRegExpErrorHitLimit = -2,
+    JSRegExpErrorNoMemory = -3,
+    JSRegExpErrorInternal = -4
+};
+
 class ByteDisjunction;
 
 struct ByteTerm {

-- 
WebKit Debian packaging



More information about the Pkg-webkit-commits mailing list