[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc

Wed Dec 22 11:13:40 UTC 2010

The following commit has been merged in the debian/experimental branch:
commit d832c5633259b66f01546cfade188076a94fad2b
Author: zherczeg at webkit.org <zherczeg at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Thu Jul 15 14:10:43 2010 +0000

    Refactoring some parts of the lexer
    https://bugs.webkit.org/show_bug.cgi?id=41845
    
    Reviewed by Darin Adler.
    
    This patch is a precursor of refactoring the identifier
    parsing, which currently slows down the lexer, and not
    ready for landing. This patch contains those sources,
    which does not slow down the lexer (mainly style changes).
    
    SunSpider: no change (529.4ms to 528.7ms)
    --parse-only: no change (31.0ms to 31.2ms)
    
    * parser/Lexer.cpp:
    (JSC::isIdentStart): using typesOfASCIICharacters to determine
         whether the current character is in identifier start
    (JSC::isIdentPart): using typesOfASCIICharacters to determine
         whether the current character is in identifier part
    (JSC::Lexer::parseString): style fix
    (JSC::Lexer::lex): removing the else after the main which
         which reduces code duplication
    
    
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@63423 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index 16defc0..5eb5803 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,27 @@
+2010-07-15  Zoltan Herczeg  <zherczeg at webkit.org>
+
+        Reviewed by Darin Adler.
+
+        Refactoring some parts of the lexer
+        https://bugs.webkit.org/show_bug.cgi?id=41845
+
+        This patch is a precursor of refactoring the identifier
+        parsing, which currently slows down the lexer, and not
+        ready for landing. This patch contains those sources,
+        which does not slow down the lexer (mainly style changes).
+
+        SunSpider: no change (529.4ms to 528.7ms)
+        --parse-only: no change (31.0ms to 31.2ms)
+
+        * parser/Lexer.cpp:
+        (JSC::isIdentStart): using typesOfASCIICharacters to determine
+             whether the current character is in identifier start
+        (JSC::isIdentPart): using typesOfASCIICharacters to determine
+             whether the current character is in identifier part
+        (JSC::Lexer::parseString): style fix
+        (JSC::Lexer::lex): removing the else after the main which
+             which reduces code duplication
+
 2010-07-15  Mark Rowe  <mrowe at apple.com>
 
         Update the sorting in the Xcode project files.
diff --git a/JavaScriptCore/parser/Lexer.cpp b/JavaScriptCore/parser/Lexer.cpp
index 8a0a3f4..b6387e7 100644
--- a/JavaScriptCore/parser/Lexer.cpp
+++ b/JavaScriptCore/parser/Lexer.cpp
@@ -46,14 +46,16 @@ using namespace Unicode;
 namespace JSC {
 
 
-enum CharacterTypes {
+enum CharacterType {
     // Types for the main switch
-    CharacterInvalid,
 
-    CharacterAlpha,
+    // The first three types are fixed, and also used for identifying
+    // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
+    CharacterIdentifierStart,
     CharacterZero,
     CharacterNumber,
 
+    CharacterInvalid,
     CharacterLineTerminator,
     CharacterExclamationMark,
     CharacterOpenParen,
@@ -87,8 +89,8 @@ enum CharacterTypes {
     CharacterWhiteSpace,
 };
 
-// 128 ascii codes
-static unsigned short AsciiCharacters[128] = {
+// 128 ASCII codes
+static const unsigned short typesOfASCIICharacters[128] = {
 /*   0 - Null               */ CharacterInvalid,
 /*   1 - Start of Heading   */ CharacterInvalid,
 /*   2 - Start of Text      */ CharacterInvalid,
@@ -125,7 +127,7 @@ static unsigned short AsciiCharacters[128] = {
 /*  33 - !                  */ CharacterExclamationMark,
 /*  34 - "                  */ CharacterQuote,
 /*  35 - #                  */ CharacterInvalid,
-/*  36 - $                  */ CharacterAlpha,
+/*  36 - $                  */ CharacterIdentifierStart,
 /*  37 - %                  */ CharacterModulo,
 /*  38 - &                  */ CharacterAnd,
 /*  39 - '                  */ CharacterQuote,
@@ -154,64 +156,64 @@ static unsigned short AsciiCharacters[128] = {
 /*  62 - >                  */ CharacterGreater,
 /*  63 - ?                  */ CharacterQuestion,
 /*  64 - @                  */ CharacterInvalid,
-/*  65 - A                  */ CharacterAlpha,
-/*  66 - B                  */ CharacterAlpha,
-/*  67 - C                  */ CharacterAlpha,
-/*  68 - D                  */ CharacterAlpha,
-/*  69 - E                  */ CharacterAlpha,
-/*  70 - F                  */ CharacterAlpha,
-/*  71 - G                  */ CharacterAlpha,
-/*  72 - H                  */ CharacterAlpha,
-/*  73 - I                  */ CharacterAlpha,
-/*  74 - J                  */ CharacterAlpha,
-/*  75 - K                  */ CharacterAlpha,
-/*  76 - L                  */ CharacterAlpha,
-/*  77 - M                  */ CharacterAlpha,
-/*  78 - N                  */ CharacterAlpha,
-/*  79 - O                  */ CharacterAlpha,
-/*  80 - P                  */ CharacterAlpha,
-/*  81 - Q                  */ CharacterAlpha,
-/*  82 - R                  */ CharacterAlpha,
-/*  83 - S                  */ CharacterAlpha,
-/*  84 - T                  */ CharacterAlpha,
-/*  85 - U                  */ CharacterAlpha,
-/*  86 - V                  */ CharacterAlpha,
-/*  87 - W                  */ CharacterAlpha,
-/*  88 - X                  */ CharacterAlpha,
-/*  89 - Y                  */ CharacterAlpha,
-/*  90 - Z                  */ CharacterAlpha,
+/*  65 - A                  */ CharacterIdentifierStart,
+/*  66 - B                  */ CharacterIdentifierStart,
+/*  67 - C                  */ CharacterIdentifierStart,
+/*  68 - D                  */ CharacterIdentifierStart,
+/*  69 - E                  */ CharacterIdentifierStart,
+/*  70 - F                  */ CharacterIdentifierStart,
+/*  71 - G                  */ CharacterIdentifierStart,
+/*  72 - H                  */ CharacterIdentifierStart,
+/*  73 - I                  */ CharacterIdentifierStart,
+/*  74 - J                  */ CharacterIdentifierStart,
+/*  75 - K                  */ CharacterIdentifierStart,
+/*  76 - L                  */ CharacterIdentifierStart,
+/*  77 - M                  */ CharacterIdentifierStart,
+/*  78 - N                  */ CharacterIdentifierStart,
+/*  79 - O                  */ CharacterIdentifierStart,
+/*  80 - P                  */ CharacterIdentifierStart,
+/*  81 - Q                  */ CharacterIdentifierStart,
+/*  82 - R                  */ CharacterIdentifierStart,
+/*  83 - S                  */ CharacterIdentifierStart,
+/*  84 - T                  */ CharacterIdentifierStart,
+/*  85 - U                  */ CharacterIdentifierStart,
+/*  86 - V                  */ CharacterIdentifierStart,
+/*  87 - W                  */ CharacterIdentifierStart,
+/*  88 - X                  */ CharacterIdentifierStart,
+/*  89 - Y                  */ CharacterIdentifierStart,
+/*  90 - Z                  */ CharacterIdentifierStart,
 /*  91 - [                  */ CharacterOpenBracket,
 /*  92 - \                  */ CharacterBackSlash,
 /*  93 - ]                  */ CharacterCloseBracket,
 /*  94 - ^                  */ CharacterXor,
-/*  95 - _                  */ CharacterAlpha,
+/*  95 - _                  */ CharacterIdentifierStart,
 /*  96 - `                  */ CharacterInvalid,
-/*  97 - a                  */ CharacterAlpha,
-/*  98 - b                  */ CharacterAlpha,
-/*  99 - c                  */ CharacterAlpha,
-/* 100 - d                  */ CharacterAlpha,
-/* 101 - e                  */ CharacterAlpha,
-/* 102 - f                  */ CharacterAlpha,
-/* 103 - g                  */ CharacterAlpha,
-/* 104 - h                  */ CharacterAlpha,
-/* 105 - i                  */ CharacterAlpha,
-/* 106 - j                  */ CharacterAlpha,
-/* 107 - k                  */ CharacterAlpha,
-/* 108 - l                  */ CharacterAlpha,
-/* 109 - m                  */ CharacterAlpha,
-/* 110 - n                  */ CharacterAlpha,
-/* 111 - o                  */ CharacterAlpha,
-/* 112 - p                  */ CharacterAlpha,
-/* 113 - q                  */ CharacterAlpha,
-/* 114 - r                  */ CharacterAlpha,
-/* 115 - s                  */ CharacterAlpha,
-/* 116 - t                  */ CharacterAlpha,
-/* 117 - u                  */ CharacterAlpha,
-/* 118 - v                  */ CharacterAlpha,
-/* 119 - w                  */ CharacterAlpha,
-/* 120 - x                  */ CharacterAlpha,
-/* 121 - y                  */ CharacterAlpha,
-/* 122 - z                  */ CharacterAlpha,
+/*  97 - a                  */ CharacterIdentifierStart,
+/*  98 - b                  */ CharacterIdentifierStart,
+/*  99 - c                  */ CharacterIdentifierStart,
+/* 100 - d                  */ CharacterIdentifierStart,
+/* 101 - e                  */ CharacterIdentifierStart,
+/* 102 - f                  */ CharacterIdentifierStart,
+/* 103 - g                  */ CharacterIdentifierStart,
+/* 104 - h                  */ CharacterIdentifierStart,
+/* 105 - i                  */ CharacterIdentifierStart,
+/* 106 - j                  */ CharacterIdentifierStart,
+/* 107 - k                  */ CharacterIdentifierStart,
+/* 108 - l                  */ CharacterIdentifierStart,
+/* 109 - m                  */ CharacterIdentifierStart,
+/* 110 - n                  */ CharacterIdentifierStart,
+/* 111 - o                  */ CharacterIdentifierStart,
+/* 112 - p                  */ CharacterIdentifierStart,
+/* 113 - q                  */ CharacterIdentifierStart,
+/* 114 - r                  */ CharacterIdentifierStart,
+/* 115 - s                  */ CharacterIdentifierStart,
+/* 116 - t                  */ CharacterIdentifierStart,
+/* 117 - u                  */ CharacterIdentifierStart,
+/* 118 - v                  */ CharacterIdentifierStart,
+/* 119 - w                  */ CharacterIdentifierStart,
+/* 120 - x                  */ CharacterIdentifierStart,
+/* 121 - y                  */ CharacterIdentifierStart,
+/* 122 - z                  */ CharacterIdentifierStart,
 /* 123 - {                  */ CharacterOpenBrace,
 /* 124 - |                  */ CharacterOr,
 /* 125 - }                  */ CharacterCloseBrace,
@@ -335,7 +337,7 @@ static NEVER_INLINE bool isNonASCIIIdentStart(int c)
 
 static inline bool isIdentStart(int c)
 {
-    return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
+    return isASCII(c) ? typesOfASCIICharacters[c] == CharacterIdentifierStart : isNonASCIIIdentStart(c);
 }
 
 static NEVER_INLINE bool isNonASCIIIdentPart(int c)
@@ -346,7 +348,10 @@ static NEVER_INLINE bool isNonASCIIIdentPart(int c)
 
 static inline bool isIdentPart(int c)
 {
-    return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
+    // Character types are divided into two groups depending on whether they can be part of an
+    // identifier or not. Those whose type value is less or equal than CharacterNumber can be
+    // part of an identifier. (See the CharacterType definition for more details.)
+    return isASCII(c) ? typesOfASCIICharacters[c] <= CharacterNumber : isNonASCIIIdentPart(c);
 }
 
 static inline int singleEscape(int c)
@@ -456,7 +461,11 @@ ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp)
 
             stringStart = currentCharacter();
             continue;
-        } else if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
+        }
+        // Fast check for characters that require special handling.
+        // Catches -1, \n, \r, 0x2028, and 0x2029 as efficiently
+        // as possible, and lets through all common ASCII characters.
+        if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
             // New-line or end of input is not allowed
             if (UNLIKELY(isLineTerminator(m_current)) || UNLIKELY(m_current == -1))
                 return false;
@@ -493,10 +502,17 @@ start:
 
     m_delimited = false;
 
-    if (isASCII(m_current)) {
-        ASSERT(m_current >= 0 && m_current < 128);
+    CharacterType type;
+    if (LIKELY(isASCII(m_current)))
+        type = static_cast<CharacterType>(typesOfASCIICharacters[m_current]);
+    else if (isNonASCIIIdentStart(m_current))
+        type = CharacterIdentifierStart;
+    else if (isLineTerminator(m_current))
+        type = CharacterLineTerminator;
+    else
+        type = CharacterInvalid;
 
-    switch (AsciiCharacters[m_current]) {
+    switch (type) {
     case CharacterGreater:
         shift();
         if (m_current == '>') {
@@ -750,7 +766,7 @@ start:
         m_delimited = false;
         token = STRING;
         break;
-    case CharacterAlpha:
+    case CharacterIdentifierStart:
         ASSERT(isIdentStart(m_current));
         goto startIdentifierOrKeyword;
     case CharacterLineTerminator:
@@ -769,21 +785,6 @@ start:
         ASSERT_NOT_REACHED();
         goto returnError;
     }
-    } else {
-        // Rare characters
-
-        if (isNonASCIIIdentStart(m_current))
-            goto startIdentifierOrKeyword;
-        if (isLineTerminator(m_current)) {
-            shiftLineTerminator();
-            m_atLineStart = true;
-            m_terminator = true;
-            if (lastTokenWasRestrKeyword())
-                goto doneSemicolon;
-            goto start;
-        }
-        goto returnError;
-    }
 
     m_atLineStart = false;
     goto returnToken;

-- 
WebKit Debian packaging