[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-10851-g50815da

barraclough at apple.com barraclough at apple.com
Wed Dec 22 18:16:19 UTC 2010


The following commit has been merged in the debian/experimental branch:
commit 210f453bbe5afaa4cacf51085f0287e5c4089c54
Author: barraclough at apple.com <barraclough at apple.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Thu Dec 9 05:40:29 2010 +0000

    Permit Character Class Escape in CharacterRange in Character Class.
    https://bugs.webkit.org/show_bug.cgi?id=50483
    https://bugs.webkit.org/show_bug.cgi?id=50538
    https://bugs.webkit.org/show_bug.cgi?id=50654
    https://bugs.webkit.org/show_bug.cgi?id=50646
    
    Reviewed by Sam Weinig.
    
    We recently tightened up our spec conformance in generating syntax
    error in these cases, however testing in the wild has shown this
    to be problematic. This reverts the previous change in allowing
    class escapes (e.g. \d) in ranges in character classes ([]), but
    does retain some closer conformance to the spec in only allowing
    ranges that would be permitted per the grammar rules in the spec
    (e.g. in /[\d-a-z]/ "a-z" cannot be considered as a range).
    
    JavaScriptCore:
    
    * yarr/RegexParser.h:
    (JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
    (JSC::Yarr::Parser::CharacterClassParserDelegate::atomBuiltInCharacterClass):
    (JSC::Yarr::Parser::parse):
    
    LayoutTests:
    
    * fast/js/regexp-ranges-and-escaped-hyphens-expected.txt:
    * fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js:
    * fast/regex/invalid-range-in-class-expected.txt:
    * fast/regex/pcre-test-1-expected.txt:
    * fast/regex/script-tests/invalid-range-in-class.js:
    * fast/regex/script-tests/pcre-test-1.js:
    
    
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@73594 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index 1cdc780..b887d09 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,26 @@
+2010-12-08  Gavin Barraclough  <barraclough at apple.com>
+
+        Reviewed by Sam Weinig.
+
+        Permit Character Class Escape in CharacterRange in Character Class.
+        https://bugs.webkit.org/show_bug.cgi?id=50483
+        https://bugs.webkit.org/show_bug.cgi?id=50538
+        https://bugs.webkit.org/show_bug.cgi?id=50654
+        https://bugs.webkit.org/show_bug.cgi?id=50646
+
+        We recently tightened up our spec conformance in generating syntax
+        error in these cases, however testing in the wild has shown this
+        to be problematic. This reverts the previous change in allowing
+        class escapes (e.g. \d) in ranges in character classes ([]), but
+        does retain some closer conformance to the spec in only allowing
+        ranges that would be permitted per the grammar rules in the spec
+        (e.g. in /[\d-a-z]/ "a-z" cannot be considered as a range).
+
+        * yarr/RegexParser.h:
+        (JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
+        (JSC::Yarr::Parser::CharacterClassParserDelegate::atomBuiltInCharacterClass):
+        (JSC::Yarr::Parser::parse):
+
 2010-12-08  Geoffrey Garen  <ggaren at apple.com>
 
         Reviewed by Sam Weinig.
diff --git a/JavaScriptCore/yarr/RegexParser.h b/JavaScriptCore/yarr/RegexParser.h
index 8392cdf..ec5f589 100644
--- a/JavaScriptCore/yarr/RegexParser.h
+++ b/JavaScriptCore/yarr/RegexParser.h
@@ -58,7 +58,6 @@ private:
         ParenthesesUnmatched,
         ParenthesesTypeInvalid,
         CharacterClassUnmatched,
-        CharacterClassInvalidRange,
         CharacterClassOutOfOrder,
         EscapeUnterminated,
         NumberOfErrorCodes
@@ -142,9 +141,16 @@ private:
                 m_state = Empty;
                 return;
 
+                // See coment in atomBuiltInCharacterClass below.
+                // This too is technically an error, per ECMA-262, and again we
+                // we chose to allow this.  Note a subtlely here that while we
+                // diverge from the spec's definition of CharacterRange we do
+                // remain in compliance with the grammar.  For example, consider
+                // the expression /[\d-a-z]/.  We comply with the grammar in
+                // this case by not allowing a-z to be matched as a range.
             case AfterCharacterClassHyphen:
-                // Error! We have something like /[\d-x]/.
-                m_err = CharacterClassInvalidRange;
+                m_delegate.atomCharacterClassAtom(ch);
+                m_state = Empty;
                 return;
             }
         }
@@ -167,12 +173,21 @@ private:
                 m_delegate.atomCharacterClassBuiltIn(classID, invert);
                 return;
 
+                // If we hit either of these cases, we have an invalid range that
+                // looks something like /[x-\d]/ or /[\d-\d]/.
+                // According to ECMA-262 this should be a syntax error, but
+                // empirical testing shows this to break teh webz.  Instead we
+                // comply with to the ECMA-262 grammar, and assume the grammar to
+                // have matched the range correctly, but tweak our interpretation
+                // of CharacterRange.  Effectively we implicitly handle the hyphen
+                // as if it were escaped, e.g. /[\w-_]/ is treated as /[\w\-_]/.
             case CachedCharacterHyphen:
+                m_delegate.atomCharacterClassAtom(m_character);
+                m_delegate.atomCharacterClassAtom('-');
+                // fall through
             case AfterCharacterClassHyphen:
-                // Error! If we hit either of these cases, we have an
-                // invalid range that looks something like /[x-\d]/
-                // or /[\d-\d]/.
-                m_err = CharacterClassInvalidRange;
+                m_delegate.atomCharacterClassBuiltIn(classID, invert);
+                m_state = Empty;
                 return;
             }
         }
@@ -681,7 +696,6 @@ private:
             "unmatched parentheses",
             "unrecognized character after (?",
             "missing terminating ] for character class",
-            "invalid range in character class",
             "range out of order in character class",
             "\\ at end of pattern"
         };
diff --git a/LayoutTests/ChangeLog b/LayoutTests/ChangeLog
index cb9bccc..c9ae48a 100644
--- a/LayoutTests/ChangeLog
+++ b/LayoutTests/ChangeLog
@@ -1,3 +1,28 @@
+2010-12-08  Gavin Barraclough  <barraclough at apple.com>
+
+        Reviewed by Sam Weinig.
+
+        Permit Character Class Escape in CharacterRange in Character Class.
+        https://bugs.webkit.org/show_bug.cgi?id=50483
+        https://bugs.webkit.org/show_bug.cgi?id=50538
+        https://bugs.webkit.org/show_bug.cgi?id=50654
+        https://bugs.webkit.org/show_bug.cgi?id=50646
+
+        We recently tightened up our spec conformance in generating syntax
+        error in these cases, however testing in the wild has shown this
+        to be problematic. This reverts the previous change in allowing
+        class escapes (e.g. \d) in ranges in character classes ([]), but
+        does retain some closer conformance to the spec in only allowing
+        ranges that would be permitted per the grammar rules in the spec
+        (e.g. in /[\d-a-z]/ "a-z" cannot be considered as a range).
+
+        * fast/js/regexp-ranges-and-escaped-hyphens-expected.txt:
+        * fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js:
+        * fast/regex/invalid-range-in-class-expected.txt:
+        * fast/regex/pcre-test-1-expected.txt:
+        * fast/regex/script-tests/invalid-range-in-class.js:
+        * fast/regex/script-tests/pcre-test-1.js:
+
 2010-12-08  Yuta Kitamura  <yutak at chromium.org>
 
         Unreviewed.
diff --git a/LayoutTests/fast/js/regexp-ranges-and-escaped-hyphens-expected.txt b/LayoutTests/fast/js/regexp-ranges-and-escaped-hyphens-expected.txt
index 795b5fb..3aa8084 100644
--- a/LayoutTests/fast/js/regexp-ranges-and-escaped-hyphens-expected.txt
+++ b/LayoutTests/fast/js/regexp-ranges-and-escaped-hyphens-expected.txt
@@ -5,8 +5,8 @@ On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE
 
 PASS regexp01.toString() is "1235"
 PASS regexp01a.toString() is "123 5"
-PASS /[1\s-35]+/.exec("21-3 54"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
-PASS /[1-\s35]+/.exec("21-3 54"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
+PASS regexp01b.toString() is "1-3 5"
+PASS regexp01c.toString() is "1-3 5"
 PASS regexp01d.toString() is "123 5"
 PASS regexp01e.toString() is "123 5"
 PASS regexp01f.toString() is "-3"
diff --git a/LayoutTests/fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js b/LayoutTests/fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js
index 3a3e35d..582c54e 100644
--- a/LayoutTests/fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js
+++ b/LayoutTests/fast/js/script-tests/regexp-ranges-and-escaped-hyphens.js
@@ -10,9 +10,11 @@ shouldBe('regexp01.toString()', '"1235"');
 var regexp01a = /[\s1-35]+/.exec("-123 54");
 shouldBe('regexp01a.toString()', '"123 5"');
 
-// These are invalid ranges.
-shouldThrow('/[1\\s-35]+/.exec("21-3 54");');
-shouldThrow('/[1-\\s35]+/.exec("21-3 54");');
+// These are invalid ranges, according to ECMA-262, but we allow them.
+var regexp01b = /[1\s-35]+/.exec("21-3 54");
+shouldBe('regexp01b.toString()', '"1-3 5"');
+var regexp01c = /[1-\s35]+/.exec("21-3 54");
+shouldBe('regexp01c.toString()', '"1-3 5"');
 
 var regexp01d = /[1-3\s5]+/.exec("-123 54");
 shouldBe('regexp01d.toString()', '"123 5"');
diff --git a/LayoutTests/fast/regex/invalid-range-in-class-expected.txt b/LayoutTests/fast/regex/invalid-range-in-class-expected.txt
index 99c036b..bb84b6e 100644
--- a/LayoutTests/fast/regex/invalid-range-in-class-expected.txt
+++ b/LayoutTests/fast/regex/invalid-range-in-class-expected.txt
@@ -6,9 +6,10 @@ On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE
 PASS /[a-c]+/.exec("-acbd"); is ["acb"]
 PASS /[a\-c]+/.exec("-acbd") is ["-ac"]
 PASS /[c-a]+/.exec("-acbd"); threw exception SyntaxError: Invalid regular expression: range out of order in character class.
-PASS /[\d-x]+/.exec("1-3xy"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
-PASS /[x-\d]+/.exec("1-3xy"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
-PASS /[\d-\d]+/.exec("1-3xy"); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
+PASS /[\d-x]+/.exec("1-3xy"); is ["1-3x"]
+PASS /[x-\d]+/.exec("1-3xy"); is ["1-3x"]
+PASS /[\d-\d]+/.exec("1-3xy"); is ["1-3"]
+PASS /[\d-a-z]+/.exec("az1-3y"); is ["az1-3"]
 PASS /[\d\-x]+/.exec("1-3xy"); is ["1-3x"]
 PASS /[x\-\d]+/.exec("1-3xy"); is ["1-3x"]
 PASS /[\d\-\d]+/.exec("1-3xy"); is ["1-3"]
diff --git a/LayoutTests/fast/regex/pcre-test-1-expected.txt b/LayoutTests/fast/regex/pcre-test-1-expected.txt
index c105f41..ccf6ce8 100644
--- a/LayoutTests/fast/regex/pcre-test-1-expected.txt
+++ b/LayoutTests/fast/regex/pcre-test-1-expected.txt
@@ -367,7 +367,8 @@ PASS regex91.exec(input1); is results
 PASS regex92.exec(input0); is results
 PASS regex93.exec(input0); is results
 PASS regex93.exec(input1); is results
-PASS eval(regex94); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
+PASS regex94.exec(input0); is results
+PASS regex94.exec(input1); is results
 PASS regex95.exec(input0); is results
 PASS regex96.exec(input0); is results
 PASS regex96.exec(input1); is results
@@ -1224,8 +1225,14 @@ PASS regex601.exec(input2); is results
 PASS regex603.exec(input0); is results
 PASS regex604.exec(input0); is results
 PASS regex605.exec(input0); is results
-PASS eval(regex608); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
-PASS eval(regex609); threw exception SyntaxError: Invalid regular expression: invalid range in character class.
+PASS regex608.exec(input0); is results
+PASS regex608.exec(input1); is results
+PASS regex608.exec(input2); is results
+PASS regex608.exec(input3); is results
+PASS regex609.exec(input0); is results
+PASS regex609.exec(input1); is results
+PASS regex609.exec(input2); is results
+PASS regex609.exec(input3); is results
 PASS regex610.exec(input0); is results
 PASS regex611.exec(input0); is results
 PASS regex612.exec(input0); is results
diff --git a/LayoutTests/fast/regex/script-tests/invalid-range-in-class.js b/LayoutTests/fast/regex/script-tests/invalid-range-in-class.js
index e440f35..e10af1e 100644
--- a/LayoutTests/fast/regex/script-tests/invalid-range-in-class.js
+++ b/LayoutTests/fast/regex/script-tests/invalid-range-in-class.js
@@ -9,10 +9,14 @@ shouldBe('/[a\\-c]+/.exec("-acbd")', '["-ac"]');
 // A reverse-range is invalid.
 shouldThrow('/[c-a]+/.exec("-acbd");');
 
-// A character-class in a range is invalid.
-shouldThrow('/[\\d-x]+/.exec("1-3xy");');
-shouldThrow('/[x-\\d]+/.exec("1-3xy");');
-shouldThrow('/[\\d-\\d]+/.exec("1-3xy");');
+// A character-class in a range is invalid, according to ECMA-262, but we allow it.
+shouldBe('/[\\d-x]+/.exec("1-3xy");', '["1-3x"]');
+shouldBe('/[x-\\d]+/.exec("1-3xy");', '["1-3x"]');
+shouldBe('/[\\d-\\d]+/.exec("1-3xy");', '["1-3"]');
+
+// Whilst we break with ECMA-262's definition of CharacterRange, we do comply with
+// the grammar, and as such in the following regex a-z cannot be matched as a range.
+shouldBe('/[\\d-a-z]+/.exec("az1-3y");', '["az1-3"]');
 
 // An escaped hypen should not be confused for an invalid range.
 shouldBe('/[\\d\\-x]+/.exec("1-3xy");', '["1-3x"]');
diff --git a/LayoutTests/fast/regex/script-tests/pcre-test-1.js b/LayoutTests/fast/regex/script-tests/pcre-test-1.js
index abd13dd..1c6504a 100644
--- a/LayoutTests/fast/regex/script-tests/pcre-test-1.js
+++ b/LayoutTests/fast/regex/script-tests/pcre-test-1.js
@@ -2,11 +2,6 @@ description(
 "A chunk of our port of PCRE's test suite, adapted to be more applicable to JavaScript."
 );
 
-function shouldNotCompile(patternName)
-{
-    shouldThrow("eval(" + patternName + ");");
-}
-
 var regex0 = /the quick brown fox/;
 var input0 = "the quick brown fox";
 var results = ["the quick brown fox"];
@@ -1330,8 +1325,14 @@ var input1 = "aaa";
 var results = null;
 shouldBe('regex93.exec(input1);', 'results');
 
-var regex94 = "/[\\d-z]+/";
-shouldNotCompile("regex94");
+var regex94 = /[\d-z]+/;
+var input0 = "12-34z";
+var results = ["12-34z"];
+shouldBe('regex94.exec(input0);', 'results');
+// Failers
+var input1 = "aaa";
+var results = null;
+shouldBe('regex94.exec(input1);', 'results');
 
 var regex95 = /\x5c/;
 var input0 = "\\";
@@ -4923,11 +4924,35 @@ var input0 = "ZABCDEFG";
 var results = ["ZA", "A", undefined, undefined];
 shouldBe('regex605.exec(input0);', 'results');
 
-var regex608 = "/^[a-\\d]/";
-shouldNotCompile("regex608");
+var regex608 = /^[a-\d]/;
+var input0 = "abcde";
+var results = ["a"];
+shouldBe('regex608.exec(input0);', 'results');
+var input1 = "-things";
+var results = ["-"];
+shouldBe('regex608.exec(input1);', 'results');
+var input2 = "0digit";
+var results = ["0"];
+shouldBe('regex608.exec(input2);', 'results');
+// Failers
+var input3 = "bcdef";
+var results = null;
+shouldBe('regex608.exec(input3);', 'results');
 
-var regex609 = "/^[\\d-a]/";
-shouldNotCompile("regex609");
+var regex609 = /^[\d-a]/;
+var input0 = "abcde";
+var results = ["a"];
+shouldBe('regex609.exec(input0);', 'results');
+var input1 = "-things";
+var results = ["-"];
+shouldBe('regex609.exec(input1);', 'results');
+var input2 = "0digit";
+var results = ["0"];
+shouldBe('regex609.exec(input2);', 'results');
+// Failers
+var input3 = "bcdef";
+var results = null;
+shouldBe('regex609.exec(input3);', 'results');
 
 var regex610 = /[\s]+/;
 var input0 = "> \x09\x0a\x0c\x0d\x0b<";

-- 
WebKit Debian packaging



More information about the Pkg-webkit-commits mailing list