[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc

Wed Dec 22 16:28:32 UTC 2010

The following commit has been merged in the debian/experimental branch:
commit 053a06696a8d2e40388361c948e131bc7e1e9760
Author: commit-queue at webkit.org <commit-queue at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Wed Nov 24 12:51:02 2010 +0000

    2010-11-24  Carlos Garcia Campos  <cgarcia at igalia.com>
    
            Reviewed by Xan Lopez.
    
            [GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend
            https://bugs.webkit.org/show_bug.cgi?id=48625
    
            GLib methods use UTF-8 strings, so we have to convert from UTF-16 to
            UTF-8 to perform the case operations and then convert back the result to
            UTF-16. GLib conversion methods return a new allocated string, so we
            have to memcpy the result into the destination buffer too. Using our
            own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h
            we don't need such memcpy, since they take an already allocated buffer
            rather than returning a new one. There's another optimization for the
            case when the destination buffer is not large enough. In that case,
            methods should return the expected destination buffer size and are
            called again with a new buffer. We can avoid the conversion to UTF-16 by
            pre-calculating the required size for the destination buffer.
    
            * wtf/unicode/glib/UnicodeGLib.cpp:
            (WTF::Unicode::getUTF16LengthFromUTF8):
            (WTF::Unicode::convertCase):
            (WTF::Unicode::foldCase):
            (WTF::Unicode::toLower):
            (WTF::Unicode::toUpper):
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@72662 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index e2c87b9..0ff903e 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,29 @@
+2010-11-24  Carlos Garcia Campos  <cgarcia at igalia.com>
+
+        Reviewed by Xan Lopez.
+
+        [GTK] Optimize foldCase, toLower and toUpper methods in glib unicode backend
+        https://bugs.webkit.org/show_bug.cgi?id=48625
+
+        GLib methods use UTF-8 strings, so we have to convert from UTF-16 to
+        UTF-8 to perform the case operations and then convert back the result to
+        UTF-16. GLib conversion methods return a new allocated string, so we
+        have to memcpy the result into the destination buffer too. Using our
+        own methods to convert between UTF-8 and UTF-16 from wtf/unicode/UTF8.h
+        we don't need such memcpy, since they take an already allocated buffer
+        rather than returning a new one. There's another optimization for the
+        case when the destination buffer is not large enough. In that case,
+        methods should return the expected destination buffer size and are
+        called again with a new buffer. We can avoid the conversion to UTF-16 by
+        pre-calculating the required size for the destination buffer.
+
+        * wtf/unicode/glib/UnicodeGLib.cpp:
+        (WTF::Unicode::getUTF16LengthFromUTF8):
+        (WTF::Unicode::convertCase):
+        (WTF::Unicode::foldCase):
+        (WTF::Unicode::toLower):
+        (WTF::Unicode::toUpper):
+
 2010-11-23  Patrick Gansterer  <paroga at webkit.org>
 
         Reviewed by Sam Weinig.
diff --git a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp
index e20c376..a01c3ee 100644
--- a/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp
+++ b/JavaScriptCore/wtf/unicode/glib/UnicodeGLib.cpp
@@ -1,6 +1,7 @@
 /*
  *  Copyright (C) 2008 Jürg Billeter <j at bitron.ch>
  *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches at access-company.com>
+ *  Copyright (C) 2010 Igalia S.L.
  *
  *  This library is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU Library General Public
@@ -22,6 +23,11 @@
 #include "config.h"
 #include "UnicodeGLib.h"
 
+#include <wtf/Vector.h>
+#include <wtf/unicode/UTF8.h>
+
+#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
+
 namespace WTF {
 namespace Unicode {
 
@@ -43,100 +49,71 @@ UChar32 foldCase(UChar32 ch)
     return *ucs4Result;
 }
 
-int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+static int getUTF16LengthFromUTF8(const gchar* utf8String, int length)
 {
-    *error = false;
-    GOwnPtr<GError> gerror;
+    int utf16Length = 0;
+    const gchar* inputString = utf8String;
 
-    GOwnPtr<char> utf8src;
-    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
-    if (gerror) {
-        *error = true;
-        return -1;
-    }
-
-    GOwnPtr<char> utf8result;
-    utf8result.set(g_utf8_casefold(utf8src.get(), -1));
+    while ((utf8String + length - inputString > 0) && *inputString) {
+        gunichar character = g_utf8_get_char(inputString);
 
-    long utf16resultLength = -1;
-    GOwnPtr<UChar> utf16result;
-    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
-    if (gerror) {
-        *error = true;
-        return -1;
+        utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1;
+        inputString = g_utf8_next_char(inputString);
     }
 
-    if (utf16resultLength > resultLength) {
-        *error = true;
-        return utf16resultLength;
-    }
-    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
-
-    return utf16resultLength;
+    return utf16Length;
 }
 
-int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length);
+
+static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction)
 {
     *error = false;
-    GOwnPtr<GError> gerror;
 
-    GOwnPtr<char> utf8src;
-    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
-    if (gerror) {
+    // Allocate a buffer big enough to hold all the characters.
+    Vector<char> buffer(srcLength * 3);
+    char* utf8Target = buffer.data();
+    const UChar* utf16Source = src;
+    ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true);
+    if (conversionResult != conversionOK) {
         *error = true;
         return -1;
     }
+    buffer.shrink(utf8Target - buffer.data());
 
-    GOwnPtr<char> utf8result;
-    utf8result.set(g_utf8_strdown(utf8src.get(), -1));
+    GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size()));
+    long utf8ResultLength = strlen(utf8Result.get());
 
-    long utf16resultLength = -1;
-    GOwnPtr<UChar> utf16result;
-    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
-    if (gerror) {
+    // Calculate the destination buffer size.
+    int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength);
+    if (realLength > resultLength) {
         *error = true;
-        return -1;
+        return realLength;
     }
 
-    if (utf16resultLength > resultLength) {
+    // Convert the result to UTF-16.
+    UChar* utf16Target = result;
+    const char* utf8Source = utf8Result.get();
+    conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true);
+    long utf16ResultLength = utf16Target - result;
+    if (conversionResult != conversionOK)
         *error = true;
-        return utf16resultLength;
-    }
-    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
 
-    return utf16resultLength;
+    return utf16ResultLength <= 0 ? -1 : utf16ResultLength;
 }
-
-int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
 {
-    *error = false;
-    GOwnPtr<GError> gerror;
-
-    GOwnPtr<char> utf8src;
-    utf8src.set(g_utf16_to_utf8(src, srcLength, 0, 0, &gerror.outPtr()));
-    if (gerror) {
-        *error = true;
-        return -1;
-    }
-
-    GOwnPtr<char> utf8result;
-    utf8result.set(g_utf8_strup(utf8src.get(), -1));
-
-    long utf16resultLength = -1;
-    GOwnPtr<UChar> utf16result;
-    utf16result.set(g_utf8_to_utf16(utf8result.get(), -1, 0, &utf16resultLength, &gerror.outPtr()));
-    if (gerror) {
-        *error = true;
-        return -1;
-    }
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold);
+}
 
-    if (utf16resultLength > resultLength) {
-        *error = true;
-        return utf16resultLength;
-    }
-    memcpy(result, utf16result.get(), utf16resultLength * sizeof(UChar));
+int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown);
+}
 
-    return utf16resultLength;
+int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
+{
+    return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup);
 }
 
 Direction direction(UChar32 c)

-- 
WebKit Debian packaging