[SCM] WebKit Debian packaging branch, webkit-1.2, updated. upstream/1.1.90-6072-g9a69373

Thu Apr 8 01:59:14 UTC 2010

The following commit has been merged in the webkit-1.2 branch:
commit 2fef2fe01de057bbf8b069025d650f07bc8e492b
Author: eric at webkit.org <eric at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Thu Feb 25 18:13:46 2010 +0000

    2010-02-25  Andreas Kling  <andreas.kling at nokia.com>
    
            Reviewed by Darin Adler.
    
            Optimize decoding of Latin-1 text by exploiting the fact that most of it will
            be ASCII-only data.
    
            https://bugs.webkit.org/show_bug.cgi?id=35233
    
            * platform/text/TextCodecLatin1.cpp:
            (WebCore::TextCodecLatin1::decode):
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@55242 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog
index bf290c0..fe10aee 100644
--- a/WebCore/ChangeLog
+++ b/WebCore/ChangeLog
@@ -1,3 +1,15 @@
+2010-02-25  Andreas Kling  <andreas.kling at nokia.com>
+
+        Reviewed by Darin Adler.
+
+        Optimize decoding of Latin-1 text by exploiting the fact that most of it will
+        be ASCII-only data.
+
+        https://bugs.webkit.org/show_bug.cgi?id=35233
+
+        * platform/text/TextCodecLatin1.cpp:
+        (WebCore::TextCodecLatin1::decode):
+
 2010-02-25  Pavel Feldman  <pfeldman at chromium.org>
 
         Reviewed by Dimitri Glazkov.
diff --git a/WebCore/platform/text/TextCodecLatin1.cpp b/WebCore/platform/text/TextCodecLatin1.cpp
index cfdc5b9..0005753 100644
--- a/WebCore/platform/text/TextCodecLatin1.cpp
+++ b/WebCore/platform/text/TextCodecLatin1.cpp
@@ -117,26 +117,76 @@ void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar)
     registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0);
 }
 
+template<size_t size> struct NonASCIIMask;
+template<> struct NonASCIIMask<4> {
+    static unsigned value() { return 0x80808080U; }
+};
+template<> struct NonASCIIMask<8> {
+    static unsigned long long value() { return 0x8080808080808080ULL; }
+};
+
+template<size_t size> struct UCharByteFiller;
+template<> struct UCharByteFiller<4> {
+    static void copy(UChar* dest, const unsigned char* src)
+    {
+        dest[0] = src[0];
+        dest[1] = src[1];
+        dest[2] = src[2];
+        dest[3] = src[3];
+    }
+};
+template<> struct UCharByteFiller<8> {
+    static void copy(UChar* dest, const unsigned char* src)
+    {
+        dest[0] = src[0];
+        dest[1] = src[1];
+        dest[2] = src[2];
+        dest[3] = src[3];
+        dest[4] = src[4];
+        dest[5] = src[5];
+        dest[6] = src[6];
+        dest[7] = src[7];
+    }
+};
+
 String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
 {
     UChar* characters;
     String result = String::createUninitialized(length, characters);
 
-    // Convert the string a fast way and simultaneously do an efficient check to see if it's all ASCII.
-    unsigned char ored = 0;
-    for (size_t i = 0; i < length; ++i) {
-        unsigned char c = bytes[i];
-        characters[i] = c;
-        ored |= c;
-    }
-
-    if (!(ored & 0x80))
-        return result;
+    const unsigned char* src = reinterpret_cast<const unsigned char*>(bytes);
+    const unsigned char* end = reinterpret_cast<const unsigned char*>(bytes + length);
+    const unsigned char* alignedEnd = reinterpret_cast<const unsigned char*>(reinterpret_cast<ptrdiff_t>(end) & ~(sizeof(uintptr_t) - 1));
+    UChar* dest = characters;
+
+    while (src < end) {
+        if (*src < 0x80) {
+            // Fast path for values < 0x80 (most Latin-1 text will be ASCII)
+            // Wait until we're at a properly aligned address, then read full CPU words.
+            if (!(reinterpret_cast<ptrdiff_t>(src) & (sizeof(uintptr_t) - 1))) {
+                while (src < alignedEnd) {
+                    uintptr_t chunk = *reinterpret_cast<const uintptr_t*>(src);
+
+                    if (chunk & NonASCIIMask<sizeof(uintptr_t)>::value())
+                        goto useLookupTable;
+
+                    UCharByteFiller<sizeof(uintptr_t)>::copy(dest, src);
+
+                    src += sizeof(uintptr_t);
+                    dest += sizeof(uintptr_t);
+                }
+
+                if (src == end)
+                    break;
+            }
+            *dest = *src;
+        } else {
+useLookupTable:
+            *dest = table[*src];
+        }
 
-    // Convert the slightly slower way when there are non-ASCII characters.
-    for (size_t i = 0; i < length; ++i) {
-        unsigned char c = bytes[i];
-        characters[i] = table[c];
+        ++src;
+        ++dest;
     }
 
     return result;

-- 
WebKit Debian packaging