[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-10851-g50815da
paroga at webkit.org
paroga at webkit.org
Wed Dec 22 17:56:11 UTC 2010
The following commit has been merged in the debian/experimental branch:
commit 484661c038006a903da2112b83bef06899c6e512
Author: paroga at webkit.org <paroga at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Thu Dec 2 22:45:36 2010 +0000
2010-12-02 Patrick Gansterer <paroga at webkit.org>
Reviewed by Darin Adler.
Add AtomicString::fromUTF8
https://bugs.webkit.org/show_bug.cgi?id=45594
Unicode::calculateStringHashFromUTF8 creates a StringHash out of UTF8 input data and
calculates the required length for the UTF16 conversation in one step.
This is then used in a specialized translator for the string table of AtomicString.
* JavaScriptCore.exp:
* JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def:
* wtf/text/AtomicString.cpp:
(WTF::CStringTranslator::equal):
(WTF::HashAndUTF8CharactersTranslator::hash):
(WTF::HashAndUTF8CharactersTranslator::equal):
(WTF::HashAndUTF8CharactersTranslator::translate):
(WTF::AtomicString::add):
(WTF::AtomicString::addSlowCase):
(WTF::AtomicString::find):
(WTF::AtomicString::fromUTF8):
* wtf/text/AtomicString.h:
* wtf/text/StringImpl.h:
* wtf/text/WTFString.h:
* wtf/unicode/UTF8.cpp:
(WTF::Unicode::readUTF8Sequence):
(WTF::Unicode::convertUTF8ToUTF16):
(WTF::Unicode::calculateStringHashFromUTF8):
(WTF::Unicode::equalUTF16WithUTF8):
* wtf/unicode/UTF8.h:
2010-12-02 Patrick Gansterer <paroga at webkit.org>
Reviewed by Darin Adler.
Add AtomicString::fromUTF8
https://bugs.webkit.org/show_bug.cgi?id=45594
Use AtomicString::fromUTF8 directly in the libxml2 parser.
* dom/XMLDocumentParserLibxml2.cpp:
(WebCore::toAtomicString):
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@73201 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/JavaScriptCore/ChangeLog b/JavaScriptCore/ChangeLog
index 3c4cb1a..68c4b51 100644
--- a/JavaScriptCore/ChangeLog
+++ b/JavaScriptCore/ChangeLog
@@ -1,3 +1,35 @@
+2010-12-02 Patrick Gansterer <paroga at webkit.org>
+
+ Reviewed by Darin Adler.
+
+ Add AtomicString::fromUTF8
+ https://bugs.webkit.org/show_bug.cgi?id=45594
+
+ Unicode::calculateStringHashFromUTF8 creates a StringHash out of UTF8 input data and
+ calculates the required length for the UTF16 conversation in one step.
+ This is then used in a specialized translator for the string table of AtomicString.
+
+ * JavaScriptCore.exp:
+ * JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def:
+ * wtf/text/AtomicString.cpp:
+ (WTF::CStringTranslator::equal):
+ (WTF::HashAndUTF8CharactersTranslator::hash):
+ (WTF::HashAndUTF8CharactersTranslator::equal):
+ (WTF::HashAndUTF8CharactersTranslator::translate):
+ (WTF::AtomicString::add):
+ (WTF::AtomicString::addSlowCase):
+ (WTF::AtomicString::find):
+ (WTF::AtomicString::fromUTF8):
+ * wtf/text/AtomicString.h:
+ * wtf/text/StringImpl.h:
+ * wtf/text/WTFString.h:
+ * wtf/unicode/UTF8.cpp:
+ (WTF::Unicode::readUTF8Sequence):
+ (WTF::Unicode::convertUTF8ToUTF16):
+ (WTF::Unicode::calculateStringHashFromUTF8):
+ (WTF::Unicode::equalUTF16WithUTF8):
+ * wtf/unicode/UTF8.h:
+
2010-12-02 Geoffrey Garen <ggaren at apple.com>
Reviewed by Sam Weinig.
diff --git a/JavaScriptCore/JavaScriptCore.exp b/JavaScriptCore/JavaScriptCore.exp
index 193fa08..e4a92c5 100644
--- a/JavaScriptCore/JavaScriptCore.exp
+++ b/JavaScriptCore/JavaScriptCore.exp
@@ -373,6 +373,8 @@ __ZN3WTF12AtomicString3addEPKtj
__ZN3WTF12AtomicString3addEPKtjj
__ZN3WTF12AtomicString4findEPKtjj
__ZN3WTF12AtomicString4initEv
+__ZN3WTF12AtomicString8fromUTF8EPKc
+__ZN3WTF12AtomicString8fromUTF8EPKcm
__ZN3WTF12createThreadEPFPvS0_ES0_
__ZN3WTF12createThreadEPFPvS0_ES0_PKc
__ZN3WTF12detachThreadEj
diff --git a/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def b/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def
index 1bc488f..e8105f0 100644
--- a/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def
+++ b/JavaScriptCore/JavaScriptCore.vcproj/JavaScriptCore/JavaScriptCore.def
@@ -59,6 +59,7 @@ EXPORTS
?broadcast at ThreadCondition@WTF@@QAEXXZ
?bufferLengthForStringDecimal at DecimalNumber@WTF@@QBEIXZ
?calculateDSTOffset at WTF@@YANNN at Z
+ ?calculateStringHashFromUTF8 at Unicode@WTF@@YAIPBD0AAI at Z
?calculateUTCOffset at WTF@@YAHXZ
?calculatedFunctionName at DebuggerCallFrame@JSC@@QBE?AVUString at 2@XZ
?call at JSC@@YA?AVJSValue at 1@PAVExecState at 1@V21 at W4CallType@1 at ABTCallData@1 at 1ABVArgList@1@@Z
@@ -139,6 +140,7 @@ EXPORTS
?empty at StringImpl@WTF@@SAPAV12 at XZ
?enumerable at PropertyDescriptor@JSC@@QBE_NXZ
?equal at Identifier@JSC@@SA_NPBVStringImpl at WTF@@PBD at Z
+ ?equalUTF16WithUTF8 at Unicode@WTF@@YA_NPB_W0PBD1 at Z
?evaluate at DebuggerCallFrame@JSC@@QBE?AVJSValue at 2@ABVUString at 2@AAV32@@Z
?evaluate at JSC@@YA?AVCompletion at 1@PAVExecState at 1@AAVScopeChain at 1@ABVSourceCode at 1@VJSValue at 1@@Z
?exclude at Profile@JSC@@QAEXPBVProfileNode at 2@@Z
diff --git a/JavaScriptCore/wtf/text/AtomicString.cpp b/JavaScriptCore/wtf/text/AtomicString.cpp
index c49a837..acbcd34 100644
--- a/JavaScriptCore/wtf/text/AtomicString.cpp
+++ b/JavaScriptCore/wtf/text/AtomicString.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Patrick Gansterer <paroga at paroga.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -26,9 +27,12 @@
#include <wtf/HashSet.h>
#include <wtf/Threading.h>
#include <wtf/WTFThreadData.h>
+#include <wtf/unicode/UTF8.h>
namespace WTF {
+using namespace Unicode;
+
COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
class AtomicStringTable {
@@ -85,7 +89,7 @@ struct CStringTranslator {
if (d[i] != c)
return false;
}
- return s[length] == 0;
+ return !s[length];
}
static void translate(StringImpl*& location, const char* const& c, unsigned hash)
@@ -206,12 +210,44 @@ struct HashAndCharactersTranslator {
}
};
+struct HashAndUTF8Characters {
+ unsigned hash;
+ const char* characters;
+ unsigned length;
+ unsigned utf16Length;
+};
+
+struct HashAndUTF8CharactersTranslator {
+ static unsigned hash(const HashAndUTF8Characters& buffer)
+ {
+ return buffer.hash;
+ }
+
+ static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
+ {
+ return equalUTF16WithUTF8(string->characters(), string->characters() + string->length(), buffer.characters, buffer.characters + buffer.length);
+ }
+
+ static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
+ {
+ UChar* target;
+ location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef();
+
+ const char* source = buffer.characters;
+ if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK)
+ ASSERT_NOT_REACHED();
+
+ location->setHash(hash);
+ location->setIsAtomic(true);
+ }
+};
+
PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
{
if (!s)
return 0;
- if (length == 0)
+ if (!length)
return StringImpl::empty();
UCharBuffer buf = { s, length };
@@ -227,7 +263,7 @@ PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsign
ASSERT(s);
ASSERT(existingHash);
- if (length == 0)
+ if (!length)
return StringImpl::empty();
HashAndCharacters buffer = { existingHash, s, length };
@@ -246,7 +282,7 @@ PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
while (s[length] != UChar(0))
length++;
- if (length == 0)
+ if (!length)
return StringImpl::empty();
UCharBuffer buf = {s, length};
@@ -262,7 +298,7 @@ PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
if (!r || r->isAtomic())
return r;
- if (r->length() == 0)
+ if (!r->length())
return StringImpl::empty();
StringImpl* result = *stringTable().add(r).first;
@@ -276,7 +312,7 @@ AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned e
ASSERT(s);
ASSERT(existingHash);
- if (length == 0)
+ if (!length)
return static_cast<AtomicStringImpl*>(StringImpl::empty());
HashAndCharacters buffer = { existingHash, s, length };
@@ -290,7 +326,7 @@ void AtomicString::remove(StringImpl* r)
{
stringTable().remove(r);
}
-
+
AtomicString AtomicString::lower() const
{
// Note: This is a hot function in the Dromaeo benchmark.
@@ -303,4 +339,36 @@ AtomicString AtomicString::lower() const
return AtomicString(newImpl);
}
+AtomicString AtomicString::fromUTF8(const char* characters, size_t length)
+{
+ if (!characters)
+ return AtomicString();
+
+ if (!length)
+ return emptyAtom;
+
+ HashAndUTF8Characters buffer;
+ buffer.characters = characters;
+ buffer.length = length;
+ buffer.hash = calculateStringHashFromUTF8(characters, characters + length, buffer.utf16Length);
+
+ if (!buffer.hash)
+ return AtomicString();
+
+ pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
+
+ // If the string is newly-translated, then we need to adopt it.
+ // The boolean in the pair tells us if that is so.
+ AtomicString atomicString;
+ atomicString.m_string = addResult.second ? adoptRef(*addResult.first) : *addResult.first;
+ return atomicString;
}
+
+AtomicString AtomicString::fromUTF8(const char* characters)
+{
+ if (!characters)
+ return AtomicString();
+ return fromUTF8(characters, strlen(characters));
+}
+
+} // namespace WTF
diff --git a/JavaScriptCore/wtf/text/AtomicString.h b/JavaScriptCore/wtf/text/AtomicString.h
index 06e63f4..ab5b366 100644
--- a/JavaScriptCore/wtf/text/AtomicString.h
+++ b/JavaScriptCore/wtf/text/AtomicString.h
@@ -108,6 +108,11 @@ public:
operator QString() const { return m_string; }
#endif
+ // AtomicString::fromUTF8 will return a null string if
+ // the input data contains invalid UTF-8 sequences.
+ static AtomicString fromUTF8(const char*, size_t);
+ static AtomicString fromUTF8(const char*);
+
private:
String m_string;
diff --git a/JavaScriptCore/wtf/text/StringImpl.h b/JavaScriptCore/wtf/text/StringImpl.h
index 99d0e9d..dc1dbb2 100644
--- a/JavaScriptCore/wtf/text/StringImpl.h
+++ b/JavaScriptCore/wtf/text/StringImpl.h
@@ -53,6 +53,7 @@ namespace WTF {
struct CStringTranslator;
struct HashAndCharactersTranslator;
+struct HashAndUTF8CharactersTranslator;
struct UCharBufferTranslator;
enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
@@ -66,6 +67,7 @@ class StringImpl : public StringImplBase {
friend struct JSC::IdentifierUCharBufferTranslator;
friend struct WTF::CStringTranslator;
friend struct WTF::HashAndCharactersTranslator;
+ friend struct WTF::HashAndUTF8CharactersTranslator;
friend struct WTF::UCharBufferTranslator;
friend class AtomicStringImpl;
private:
diff --git a/JavaScriptCore/wtf/text/WTFString.h b/JavaScriptCore/wtf/text/WTFString.h
index e9d6ae4..eb95b41 100644
--- a/JavaScriptCore/wtf/text/WTFString.h
+++ b/JavaScriptCore/wtf/text/WTFString.h
@@ -309,6 +309,8 @@ public:
String(const AECHAR*);
#endif
+ // String::fromUTF8 will return a null string if
+ // the input data contains invalid UTF-8 sequences.
static String fromUTF8(const char*, size_t);
static String fromUTF8(const char*);
diff --git a/JavaScriptCore/wtf/unicode/UTF8.cpp b/JavaScriptCore/wtf/unicode/UTF8.cpp
index ca4fc1c..dc24ed5 100644
--- a/JavaScriptCore/wtf/unicode/UTF8.cpp
+++ b/JavaScriptCore/wtf/unicode/UTF8.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Patrick Gansterer <paroga at paroga.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -25,6 +26,7 @@
#include "config.h"
#include "UTF8.h"
+#include <wtf/StringHasher.h>
#include "ASCIICType.h"
@@ -32,7 +34,7 @@ namespace WTF {
namespace Unicode {
// FIXME: Use definition from CharacterNames.h.
-const UChar replacementCharacter = 0xFFFD;
+static const UChar replacementCharacter = 0xFFFD;
inline int inlineUTF8SequenceLengthNonASCII(char b0)
{
@@ -314,5 +316,86 @@ ConversionResult convertUTF8ToUTF16(
return result;
}
+unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length)
+{
+ if (!data)
+ return 0;
+
+ WTF::StringHasher stringHasher;
+ utf16Length = 0;
+
+ while (data < dataEnd) {
+ if (isASCII(*data)) {
+ stringHasher.addCharacter(*data++);
+ utf16Length++;
+ continue;
+ }
+
+ int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data);
+
+ if (dataEnd - data < utf8SequenceLength)
+ return false;
+
+ if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength))
+ return 0;
+
+ UChar32 character = readUTF8Sequence(data, utf8SequenceLength);
+ ASSERT(!isASCII(character));
+
+ if (U_IS_BMP(character)) {
+ // UTF-16 surrogate values are illegal in UTF-32
+ if (U_IS_SURROGATE(character))
+ return 0;
+ stringHasher.addCharacter(static_cast<UChar>(character)); // normal case
+ utf16Length++;
+ } else if (U_IS_SUPPLEMENTARY(character)) {
+ stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)),
+ static_cast<UChar>(U16_TRAIL(character)));
+ utf16Length += 2;
+ } else
+ return 0;
+ }
+
+ return stringHasher.hash();
+}
+
+bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd)
+{
+ while (b < bEnd) {
+ if (isASCII(*b)) {
+ if (*a++ != *b++)
+ return false;
+ continue;
+ }
+
+ int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*b);
+
+ if (bEnd - b < utf8SequenceLength)
+ return false;
+
+ if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(b), utf8SequenceLength))
+ return 0;
+
+ UChar32 character = readUTF8Sequence(b, utf8SequenceLength);
+ ASSERT(!isASCII(character));
+
+ if (U_IS_BMP(character)) {
+ // UTF-16 surrogate values are illegal in UTF-32
+ if (U_IS_SURROGATE(character))
+ return false;
+ if (*a++ != character)
+ return false;
+ } else if (U_IS_SUPPLEMENTARY(character)) {
+ if (*a++ != U16_LEAD(character))
+ return false;
+ if (*a++ != U16_TRAIL(character))
+ return false;
+ } else
+ return false;
+ }
+
+ return a == aEnd;
+}
+
} // namespace Unicode
} // namespace WTF
diff --git a/JavaScriptCore/wtf/unicode/UTF8.h b/JavaScriptCore/wtf/unicode/UTF8.h
index a5ed93e..1f4baca 100644
--- a/JavaScriptCore/wtf/unicode/UTF8.h
+++ b/JavaScriptCore/wtf/unicode/UTF8.h
@@ -29,7 +29,7 @@
#include "Unicode.h"
namespace WTF {
- namespace Unicode {
+namespace Unicode {
// Given a first byte, gives the length of the UTF-8 sequence it begins.
// Returns 0 for bytes that are not legal starts of UTF-8 sequences.
@@ -69,7 +69,12 @@ namespace WTF {
ConversionResult convertUTF16ToUTF8(
const UChar** sourceStart, const UChar* sourceEnd,
char** targetStart, char* targetEnd, bool strict = true);
- }
-}
+
+ unsigned calculateStringHashFromUTF8(const char* data, const char* dataEnd, unsigned& utf16Length);
+
+ bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd);
+
+} // namespace Unicode
+} // namespace WTF
#endif // WTF_UTF8_h
diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog
index ab38c3d..17d9dff 100644
--- a/WebCore/ChangeLog
+++ b/WebCore/ChangeLog
@@ -1,3 +1,15 @@
+2010-12-02 Patrick Gansterer <paroga at webkit.org>
+
+ Reviewed by Darin Adler.
+
+ Add AtomicString::fromUTF8
+ https://bugs.webkit.org/show_bug.cgi?id=45594
+
+ Use AtomicString::fromUTF8 directly in the libxml2 parser.
+
+ * dom/XMLDocumentParserLibxml2.cpp:
+ (WebCore::toAtomicString):
+
2010-12-02 Andy Estes <aestes at apple.com>
Fix the Qt Linux Release minimal build.
diff --git a/WebCore/dom/XMLDocumentParserLibxml2.cpp b/WebCore/dom/XMLDocumentParserLibxml2.cpp
index 7a3285f..23f9883 100644
--- a/WebCore/dom/XMLDocumentParserLibxml2.cpp
+++ b/WebCore/dom/XMLDocumentParserLibxml2.cpp
@@ -686,14 +686,12 @@ static inline String toString(const xmlChar* string)
static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
{
- // FIXME: Use AtomicString::fromUTF8.
- return AtomicString(toString(string, size));
+ return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
}
static inline AtomicString toAtomicString(const xmlChar* string)
{
- // FIXME: Use AtomicString::fromUTF8.
- return AtomicString(toString(string));
+ return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
}
struct _xmlSAX2Namespace {
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list