[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-10851-g50815da

paroga at webkit.org paroga at webkit.org
Wed Dec 22 17:56:11 UTC 2010


The following commit has been merged in the debian/experimental branch:
commit 484661c038006a903da2112b83bef06899c6e512
Author: paroga at webkit.org <paroga at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Thu Dec 2 22:45:36 2010 +0000

    2010-12-02  Patrick Gansterer  <paroga at webkit.org>
    
            Reviewed by Darin Adler.
    
            Add AtomicString::fromUTF8
            https://bugs.webkit.org/show_bug.cgi?id=45594
    
            Unicode::calculateStringHashFromUTF8 creates a StringHash out of UTF8 input data and
            calculates the required length for the UTF16 conversation in one step.
            This is then used in a specialized translator for the string table of AtomicString.
    
            * JavaScriptCore.exp:
            * JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def:
            * wtf/text/AtomicString.cpp:
            (WTF::CStringTranslator::equal):
            (WTF::HashAndUTF8CharactersTranslator::hash):
            (WTF::HashAndUTF8CharactersTranslator::equal):
            (WTF::HashAndUTF8CharactersTranslator::translate):
            (WTF::AtomicString::add):
            (WTF::AtomicString::addSlowCase):
            (WTF::AtomicString::find):
            (WTF::AtomicString::fromUTF8):
            * wtf/text/AtomicString.h:
            * wtf/text/StringImpl.h:
            * wtf/text/WTFString.h:
            * wtf/unicode/UTF8.cpp:
            (WTF::Unicode::readUTF8Sequence):
            (WTF::Unicode::convertUTF8ToUTF16):
            (WTF::Unicode::calculateStringHashFromUTF8):
            (WTF::Unicode::equalUTF16WithUTF8):
            * wtf/unicode/UTF8.h:
    2010-12-02  Patrick Gansterer  <paroga at webkit.org>
    
            Reviewed by Darin Adler.
    
            Add AtomicString::fromUTF8
            https://bugs.webkit.org/show_bug.cgi?id=45594
    
            Use AtomicString::fromUTF8 directly in the libxml2 parser.
    
            * dom/XMLDocumentParserLibxml2.cpp:
            (WebCore::toAtomicString):
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@73201 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index 3c4cb1a..68c4b51 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,35 @@
+2010-12-02  Patrick Gansterer  <paroga at webkit.org>
+
+        Reviewed by Darin Adler.
+
+        Add AtomicString::fromUTF8
+        https://bugs.webkit.org/show_bug.cgi?id=45594
+
+        Unicode::calculateStringHashFromUTF8 creates a StringHash out of UTF8 input data and
+        calculates the required length for the UTF16 conversation in one step.
+        This is then used in a specialized translator for the string table of AtomicString.
+
+        * JavaScriptCore.exp:
+        * JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def:
+        * wtf/text/AtomicString.cpp:
+        (WTF::CStringTranslator::equal):
+        (WTF::HashAndUTF8CharactersTranslator::hash):
+        (WTF::HashAndUTF8CharactersTranslator::equal):
+        (WTF::HashAndUTF8CharactersTranslator::translate):
+        (WTF::AtomicString::add):
+        (WTF::AtomicString::addSlowCase):
+        (WTF::AtomicString::find):
+        (WTF::AtomicString::fromUTF8):
+        * wtf/text/AtomicString.h:
+        * wtf/text/StringImpl.h:
+        * wtf/text/WTFString.h:
+        * wtf/unicode/UTF8.cpp:
+        (WTF::Unicode::readUTF8Sequence):
+        (WTF::Unicode::convertUTF8ToUTF16):
+        (WTF::Unicode::calculateStringHashFromUTF8):
+        (WTF::Unicode::equalUTF16WithUTF8):
+        * wtf/unicode/UTF8.h:
+
 2010-12-02  Geoffrey Garen  <ggaren at apple.com>
 
         Reviewed by Sam Weinig.
diff --git a/JavaScriptCore/JavaScriptCore.exp b/JavaScriptCore/JavaScriptCore.exp
index 193fa08..e4a92c5 100644
--- a/JavaScriptCore/JavaScriptCore.exp
+++ b/JavaScriptCore/JavaScriptCore.exp
@@ -373,6 +373,8 @@ __ZN3WTF12AtomicString3addEPKtj
 __ZN3WTF12AtomicString3addEPKtjj
 __ZN3WTF12AtomicString4findEPKtjj
 __ZN3WTF12AtomicString4initEv
+__ZN3WTF12AtomicString8fromUTF8EPKc
+__ZN3WTF12AtomicString8fromUTF8EPKcm
 __ZN3WTF12createThreadEPFPvS0_ES0_
 __ZN3WTF12createThreadEPFPvS0_ES0_PKc
 __ZN3WTF12detachThreadEj
diff --git a/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def b/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def
index 1bc488f..e8105f0 100644
--- a/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def
+++ b/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def
@@ -59,6 +59,7 @@ EXPORTS
     ?broadcast at ThreadCondition@WTF@@QAEXXZ
     ?bufferLengthForStringDecimal at DecimalNumber@WTF@@QBEIXZ
     ?calculateDSTOffset at WTF@@YANNN at Z
+    ?calculateStringHashFromUTF8 at Unicode@WTF@@YAIPBD0AAI at Z
     ?calculateUTCOffset at WTF@@YAHXZ
     ?calculatedFunctionName at DebuggerCallFrame@JSC@@QBE?AVUString at 2@XZ
     ?call at JSC@@YA?AVJSValue at 1@PAVExecState at 1@V21 at W4CallType@1 at ABTCallData@1 at 1ABVArgList@1@@Z
@@ -139,6 +140,7 @@ EXPORTS
     ?empty at StringImpl@WTF@@SAPAV12 at XZ
     ?enumerable at PropertyDescriptor@JSC@@QBE_NXZ
     ?equal at Identifier@JSC@@SA_NPBVStringImpl at WTF@@PBD at Z
+    ?equalUTF16WithUTF8 at Unicode@WTF@@YA_NPB_W0PBD1 at Z
     ?evaluate at DebuggerCallFrame@JSC@@QBE?AVJSValue at 2@ABVUString at 2@AAV32@@Z
     ?evaluate at JSC@@YA?AVCompletion at 1@PAVExecState at 1@AAVScopeChain at 1@ABVSourceCode at 1@VJSValue at 1@@Z
     ?exclude at Profile@JSC@@QAEXPBVProfileNode at 2@@Z
diff --git a/JavaScriptCore/wtf/text/AtomicString.cpp b/JavaScriptCore/wtf/text/AtomicString.cpp
index c49a837..acbcd34 100644
--- a/JavaScriptCore/wtf/text/AtomicString.cpp
+++ b/JavaScriptCore/wtf/text/AtomicString.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Patrick Gansterer <paroga at paroga.com>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -26,9 +27,12 @@
 #include <wtf/HashSet.h>
 #include <wtf/Threading.h>
 #include <wtf/WTFThreadData.h>
+#include <wtf/unicode/UTF8.h>
 
 namespace WTF {
 
+using namespace Unicode;
+
 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
 
 class AtomicStringTable {
@@ -85,7 +89,7 @@ struct CStringTranslator {
             if (d[i] != c)
                 return false;
         }
-        return s[length] == 0;
+        return !s[length];
     }
 
     static void translate(StringImpl*& location, const char* const& c, unsigned hash)
@@ -206,12 +210,44 @@ struct HashAndCharactersTranslator {
     }
 };
 
+struct HashAndUTF8Characters {
+    unsigned hash;
+    const char* characters;
+    unsigned length;
+    unsigned utf16Length;
+};
+
+struct HashAndUTF8CharactersTranslator {
+    static unsigned hash(const HashAndUTF8Characters& buffer)
+    {
+        return buffer.hash;
+    }
+
+    static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
+    {
+        return equalUTF16WithUTF8(string->characters(), string->characters() + string->length(), buffer.characters, buffer.characters + buffer.length);
+    }
+
+    static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
+    {
+        UChar* target;
+        location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef();
+
+        const char* source = buffer.characters;
+        if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK)
+            ASSERT_NOT_REACHED();
+
+        location->setHash(hash);
+        location->setIsAtomic(true);
+    }
+};
+
 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
 {
     if (!s)
         return 0;
 
-    if (length == 0)
+    if (!length)
         return StringImpl::empty();
     
     UCharBuffer buf = { s, length }; 
@@ -227,7 +263,7 @@ PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsign
     ASSERT(s);
     ASSERT(existingHash);
 
-    if (length == 0)
+    if (!length)
         return StringImpl::empty();
     
     HashAndCharacters buffer = { existingHash, s, length }; 
@@ -246,7 +282,7 @@ PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
     while (s[length] != UChar(0))
         length++;
 
-    if (length == 0)
+    if (!length)
         return StringImpl::empty();
 
     UCharBuffer buf = {s, length}; 
@@ -262,7 +298,7 @@ PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
     if (!r || r->isAtomic())
         return r;
 
-    if (r->length() == 0)
+    if (!r->length())
         return StringImpl::empty();
 
     StringImpl* result = *stringTable().add(r).first;
@@ -276,7 +312,7 @@ AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned e
     ASSERT(s);
     ASSERT(existingHash);
 
-    if (length == 0)
+    if (!length)
         return static_cast<AtomicStringImpl*>(StringImpl::empty());
 
     HashAndCharacters buffer = { existingHash, s, length }; 
@@ -290,7 +326,7 @@ void AtomicString::remove(StringImpl* r)
 {
     stringTable().remove(r);
 }
-    
+
 AtomicString AtomicString::lower() const
 {
     // Note: This is a hot function in the Dromaeo benchmark.
@@ -303,4 +339,36 @@ AtomicString AtomicString::lower() const
     return AtomicString(newImpl);
 }
 
+AtomicString AtomicString::fromUTF8(const char* characters, size_t length)
+{
+    if (!characters)
+        return AtomicString();
+
+    if (!length)
+        return emptyAtom;
+
+    HashAndUTF8Characters buffer;
+    buffer.characters = characters;
+    buffer.length = length;
+    buffer.hash = calculateStringHashFromUTF8(characters, characters + length, buffer.utf16Length);
+
+    if (!buffer.hash)
+        return AtomicString();
+
+    pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
+
+    // If the string is newly-translated, then we need to adopt it.
+    // The boolean in the pair tells us if that is so.
+    AtomicString atomicString;
+    atomicString.m_string = addResult.second ? adoptRef(*addResult.first) : *addResult.first;
+    return atomicString;
 }
+
+AtomicString AtomicString::fromUTF8(const char* characters)
+{
+    if (!characters)
+        return AtomicString();
+    return fromUTF8(characters, strlen(characters));
+}
+
+} // namespace WTF
diff --git a/JavaScriptCore/wtf/text/AtomicString.h b/JavaScriptCore/wtf/text/AtomicString.h
index 06e63f4..ab5b366 100644
--- a/JavaScriptCore/wtf/text/AtomicString.h
+++ b/JavaScriptCore/wtf/text/AtomicString.h
@@ -108,6 +108,11 @@ public:
     operator QString() const { return m_string; }
 #endif
 
+    // AtomicString::fromUTF8 will return a null string if
+    // the input data contains invalid UTF-8 sequences.
+    static AtomicString fromUTF8(const char*, size_t);
+    static AtomicString fromUTF8(const char*);
+
 private:
     String m_string;
     
diff --git a/JavaScriptCore/wtf/text/StringImpl.h b/JavaScriptCore/wtf/text/StringImpl.h
index 99d0e9d..dc1dbb2 100644
--- a/JavaScriptCore/wtf/text/StringImpl.h
+++ b/JavaScriptCore/wtf/text/StringImpl.h
@@ -53,6 +53,7 @@ namespace WTF {
 
 struct CStringTranslator;
 struct HashAndCharactersTranslator;
+struct HashAndUTF8CharactersTranslator;
 struct UCharBufferTranslator;
 
 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
@@ -66,6 +67,7 @@ class StringImpl : public StringImplBase {
     friend struct JSC::IdentifierUCharBufferTranslator;
     friend struct WTF::CStringTranslator;
     friend struct WTF::HashAndCharactersTranslator;
+    friend struct WTF::HashAndUTF8CharactersTranslator;
     friend struct WTF::UCharBufferTranslator;
     friend class AtomicStringImpl;
 private:
diff --git a/JavaScriptCore/wtf/text/WTFString.h b/JavaScriptCore/wtf/text/WTFString.h
index e9d6ae4..eb95b41 100644
--- a/JavaScriptCore/wtf/text/WTFString.h
+++ b/JavaScriptCore/wtf/text/WTFString.h
@@ -309,6 +309,8 @@ public:
     String(const AECHAR*);
 #endif
 
+    // String::fromUTF8 will return a null string if
+    // the input data contains invalid UTF-8 sequences.
     static String fromUTF8(const char*, size_t);
     static String fromUTF8(const char*);
 
diff --git a/JavaScriptCore/wtf/unicode/UTF8.cpp b/JavaScriptCore/wtf/unicode/UTF8.cpp
index ca4fc1c..dc24ed5 100644
--- a/JavaScriptCore/wtf/unicode/UTF8.cpp
+++ b/JavaScriptCore/wtf/unicode/UTF8.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2007 Apple Inc.  All rights reserved.
+ * Copyright (C) 2010 Patrick Gansterer <paroga at paroga.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -25,6 +26,7 @@
 
 #include "config.h"
 #include "UTF8.h"
+#include <wtf/StringHasher.h>
 
 #include "ASCIICType.h"
 
@@ -32,7 +34,7 @@ namespace WTF {
 namespace Unicode {
 
 // FIXME: Use definition from CharacterNames.h.
-const UChar replacementCharacter = 0xFFFD;
+static const UChar replacementCharacter = 0xFFFD;
 
 inline int inlineUTF8SequenceLengthNonASCII(char b0)
 {
@@ -314,5 +316,86 @@ ConversionResult convertUTF8ToUTF16(
     return result;
 }
 
+unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length)
+{
+    if (!data)
+        return 0;
+
+    WTF::StringHasher stringHasher;
+    utf16Length = 0;
+
+    while (data < dataEnd) {
+        if (isASCII(*data)) {
+            stringHasher.addCharacter(*data++);
+            utf16Length++;
+            continue;
+        }
+
+        int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data);
+
+        if (dataEnd - data < utf8SequenceLength)
+            return false;
+
+        if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength))
+            return 0;
+
+        UChar32 character = readUTF8Sequence(data, utf8SequenceLength);
+        ASSERT(!isASCII(character));
+
+        if (U_IS_BMP(character)) {
+            // UTF-16 surrogate values are illegal in UTF-32
+            if (U_IS_SURROGATE(character))
+                return 0;
+            stringHasher.addCharacter(static_cast<UChar>(character)); // normal case
+            utf16Length++;
+        } else if (U_IS_SUPPLEMENTARY(character)) {
+            stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)),
+                                       static_cast<UChar>(U16_TRAIL(character)));
+            utf16Length += 2;
+        } else
+            return 0;
+    }
+
+    return stringHasher.hash();
+}
+
+bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd)
+{
+    while (b < bEnd) {
+        if (isASCII(*b)) {
+            if (*a++ != *b++)
+                return false;
+            continue;
+        }
+
+        int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*b);
+
+        if (bEnd - b < utf8SequenceLength)
+            return false;
+
+        if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(b), utf8SequenceLength))
+            return 0;
+
+        UChar32 character = readUTF8Sequence(b, utf8SequenceLength);
+        ASSERT(!isASCII(character));
+
+        if (U_IS_BMP(character)) {
+            // UTF-16 surrogate values are illegal in UTF-32
+            if (U_IS_SURROGATE(character))
+                return false;
+            if (*a++ != character)
+                return false;
+        } else if (U_IS_SUPPLEMENTARY(character)) {
+            if (*a++ != U16_LEAD(character))
+                return false;
+            if (*a++ != U16_TRAIL(character))
+                return false;
+        } else
+            return false;
+    }
+
+    return a == aEnd;
+}
+
 } // namespace Unicode
 } // namespace WTF
diff --git a/JavaScriptCore/wtf/unicode/UTF8.h b/JavaScriptCore/wtf/unicode/UTF8.h
index a5ed93e..1f4baca 100644
--- a/JavaScriptCore/wtf/unicode/UTF8.h
+++ b/JavaScriptCore/wtf/unicode/UTF8.h
@@ -29,7 +29,7 @@
 #include "Unicode.h"
 
 namespace WTF {
-  namespace Unicode {
+namespace Unicode {
 
     // Given a first byte, gives the length of the UTF-8 sequence it begins.
     // Returns 0 for bytes that are not legal starts of UTF-8 sequences.
@@ -69,7 +69,12 @@ namespace WTF {
     ConversionResult convertUTF16ToUTF8(
                     const UChar** sourceStart, const UChar* sourceEnd, 
                     char** targetStart, char* targetEnd, bool strict = true);
-  }
-}
+
+    unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length);
+
+    bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd);
+
+} // namespace Unicode
+} // namespace WTF
 
 #endif // WTF_UTF8_h
diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog
index ab38c3d..17d9dff 100644
--- a/WebCore/ChangeLog
+++ b/WebCore/ChangeLog
@@ -1,3 +1,15 @@
+2010-12-02  Patrick Gansterer  <paroga at webkit.org>
+
+        Reviewed by Darin Adler.
+
+        Add AtomicString::fromUTF8
+        https://bugs.webkit.org/show_bug.cgi?id=45594
+
+        Use AtomicString::fromUTF8 directly in the libxml2 parser.
+
+        * dom/XMLDocumentParserLibxml2.cpp:
+        (WebCore::toAtomicString):
+
 2010-12-02  Andy Estes  <aestes at apple.com>
 
         Fix the Qt Linux Release minimal build.
diff --git a/WebCore/dom/XMLDocumentParserLibxml2.cpp b/WebCore/dom/XMLDocumentParserLibxml2.cpp
index 7a3285f..23f9883 100644
--- a/WebCore/dom/XMLDocumentParserLibxml2.cpp
+++ b/WebCore/dom/XMLDocumentParserLibxml2.cpp
@@ -686,14 +686,12 @@ static inline String toString(const xmlChar* string)
 
 static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
 {
-    // FIXME: Use AtomicString::fromUTF8.
-    return AtomicString(toString(string, size));
+    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
 }
 
 static inline AtomicString toAtomicString(const xmlChar* string)
 {
-    // FIXME: Use AtomicString::fromUTF8.
-    return AtomicString(toString(string));
+    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
 }
 
 struct _xmlSAX2Namespace {

-- 
WebKit Debian packaging



More information about the Pkg-webkit-commits mailing list