[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc
abarth at webkit.org
abarth at webkit.org
Wed Dec 22 12:47:01 UTC 2010
The following commit has been merged in the debian/experimental branch:
commit 64ee2fbe9b3158b4ebeb5a0fc0c0f3436998a834
Author: abarth at webkit.org <abarth at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Mon Aug 30 06:31:08 2010 +0000
2010-08-29 Adam Barth <abarth at webkit.org>
Reviewed by Darin Adler.
Move UTF16 LEAD/TRAIL logic into the HTMLEntityParser
https://bugs.webkit.org/show_bug.cgi?id=44790
We now block this attack.
* http/tests/security/xssAuditor/javascript-link-HTML-entities-null-char-expected.txt:
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@66359 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/LayoutTests/ChangeLog b/LayoutTests/ChangeLog
index 1856766..543f83a 100644
--- a/LayoutTests/ChangeLog
+++ b/LayoutTests/ChangeLog
@@ -1,3 +1,25 @@
+2010-08-29 Adam Barth <abarth at webkit.org>
+
+ Reviewed by Darin Adler.
+
+ Move UTF16 LEAD/TRAIL logic into the HTMLEntityParser
+ https://bugs.webkit.org/show_bug.cgi?id=44790
+
+ We now block this attack.
+
+ * http/tests/security/xssAuditor/javascript-link-HTML-entities-null-char-expected.txt:
+
+2010-08-29 Adam Barth <abarth at webkit.org>
+
+ Reviewed by Darin Adler.
+
+ Move UTF16 LEAD/TRAIL logic into the HTMLEntityParser
+ https://bugs.webkit.org/show_bug.cgi?id=44790
+
+ We now block this attack.
+
+ * http/tests/security/xssAuditor/javascript-link-HTML-entities-null-char-expected.txt:
+
2010-08-29 Yuzo Fujishima <yuzo at google.com>
Unreviewed Chromium test expectation change for r66282.
diff --git a/LayoutTests/http/tests/security/xssAuditor/javascript-link-HTML-entities-null-char-expected.txt b/LayoutTests/http/tests/security/xssAuditor/javascript-link-HTML-entities-null-char-expected.txt
index d2349c4..513e2f8 100644
--- a/LayoutTests/http/tests/security/xssAuditor/javascript-link-HTML-entities-null-char-expected.txt
+++ b/LayoutTests/http/tests/security/xssAuditor/javascript-link-HTML-entities-null-char-expected.txt
@@ -1,2 +1,3 @@
-CONSOLE MESSAGE: line 1: SyntaxError: Parse error
+CONSOLE MESSAGE: line 1: Refused to execute a JavaScript script. Source code of script found within request.
+
diff --git a/WebCore/html/parser/HTMLEntityParser.cpp b/WebCore/html/parser/HTMLEntityParser.cpp
index f675844..6a422b8 100644
--- a/WebCore/html/parser/HTMLEntityParser.cpp
+++ b/WebCore/html/parser/HTMLEntityParser.cpp
@@ -45,23 +45,36 @@ static const UChar windowsLatin1ExtensionArray[32] = {
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
};
-inline UChar adjustEntity(unsigned value)
+inline UChar adjustEntity(UChar32 value)
{
if ((value & ~0x1F) != 0x0080)
return value;
return windowsLatin1ExtensionArray[value - 0x80];
}
-inline unsigned legalEntityFor(unsigned value)
+inline UChar32 legalEntityFor(UChar32 value)
{
// FIXME: A number of specific entity values generate parse errors.
if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
return 0xFFFD;
- if (value < 0xFFFF)
+ if (U_IS_BMP(value))
return adjustEntity(value);
return value;
}
+inline bool convertToUTF16(UChar32 value, Vector<UChar, 16>& decodedEntity)
+{
+ if (U_IS_BMP(value)) {
+ UChar character = static_cast<UChar>(value);
+ ASSERT(character == value);
+ decodedEntity.append(character);
+ return true;
+ }
+ decodedEntity.append(U16_LEAD(value));
+ decodedEntity.append(U16_TRAIL(value));
+ return true;
+}
+
inline bool isHexDigit(UChar cc)
{
return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F');
@@ -85,14 +98,15 @@ void unconsumeCharacters(SegmentedString& source, const Vector<UChar, 10>& consu
}
-unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
+bool consumeHTMLEntity(SegmentedString& source, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
{
ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
ASSERT(!notEnoughCharacters);
+ ASSERT(decodedEntity.isEmpty());
enum EntityState {
Initial,
- NumberType,
+ Number,
MaybeHexLowerCaseX,
MaybeHexUpperCaseX,
Hex,
@@ -100,7 +114,7 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
Named
};
EntityState entityState = Initial;
- unsigned result = 0;
+ UChar32 result = 0;
Vector<UChar, 10> consumedCharacters;
while (!source.isEmpty()) {
@@ -108,20 +122,20 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
switch (entityState) {
case Initial: {
if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&')
- return 0;
+ return false;
if (additionalAllowedCharacter && cc == additionalAllowedCharacter)
- return 0;
+ return false;
if (cc == '#') {
- entityState = NumberType;
+ entityState = Number;
break;
}
if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
entityState = Named;
continue;
}
- return 0;
+ return false;
}
- case NumberType: {
+ case Number: {
if (cc == 'x') {
entityState = MaybeHexLowerCaseX;
break;
@@ -135,7 +149,7 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
continue;
}
source.push('#');
- return 0;
+ return false;
}
case MaybeHexLowerCaseX: {
if (isHexDigit(cc)) {
@@ -144,7 +158,7 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
}
source.push('#');
source.push('x');
- return 0;
+ return false;
}
case MaybeHexUpperCaseX: {
if (isHexDigit(cc)) {
@@ -153,7 +167,7 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
}
source.push('#');
source.push('X');
- return 0;
+ return false;
}
case Hex: {
if (cc >= '0' && cc <= '9')
@@ -162,21 +176,21 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
result = result * 16 + 10 + cc - 'a';
else if (cc >= 'A' && cc <= 'F')
result = result * 16 + 10 + cc - 'A';
- else if (cc == ';') {
- source.advancePastNonNewline();
- return legalEntityFor(result);
- } else
- return legalEntityFor(result);
+ else {
+ if (cc == ';')
+ source.advanceAndASSERT(cc);
+ return convertToUTF16(legalEntityFor(result), decodedEntity);
+ }
break;
}
case Decimal: {
if (cc >= '0' && cc <= '9')
result = result * 10 + cc - '0';
- else if (cc == ';') {
- source.advancePastNonNewline();
- return legalEntityFor(result);
- } else
- return legalEntityFor(result);
+ else {
+ if (cc == ';')
+ source.advanceAndASSERT(cc);
+ return convertToUTF16(legalEntityFor(result), decodedEntity);
+ }
break;
}
case Named: {
@@ -194,12 +208,12 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
// We can't an entity because there might be a longer entity
// that we could match if we had more data.
unconsumeCharacters(source, consumedCharacters);
- return 0;
+ return false;
}
if (!entitySearch.mostRecentMatch()) {
ASSERT(!entitySearch.currentValue());
unconsumeCharacters(source, consumedCharacters);
- return 0;
+ return false;
}
if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) {
// We've consumed too many characters. We need to walk the
@@ -218,12 +232,13 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
}
cc = *source;
}
- if (entitySearch.mostRecentMatch()->lastCharacter() == ';')
- return entitySearch.mostRecentMatch()->value;
- if (!additionalAllowedCharacter || !(isAlphaNumeric(cc) || cc == '='))
- return entitySearch.mostRecentMatch()->value;
+ if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
+ || !additionalAllowedCharacter
+ || !(isAlphaNumeric(cc) || cc == '=')) {
+ return convertToUTF16(entitySearch.mostRecentMatch()->value, decodedEntity);
+ }
unconsumeCharacters(source, consumedCharacters);
- return 0;
+ return false;
}
}
consumedCharacters.append(cc);
@@ -232,7 +247,7 @@ unsigned consumeHTMLEntity(SegmentedString& source, bool& notEnoughCharacters, U
ASSERT(source.isEmpty());
notEnoughCharacters = true;
unconsumeCharacters(source, consumedCharacters);
- return 0;
+ return false;
}
UChar decodeNamedEntity(const char* name)
diff --git a/WebCore/html/parser/HTMLEntityParser.h b/WebCore/html/parser/HTMLEntityParser.h
index 1059b24..f02e849 100644
--- a/WebCore/html/parser/HTMLEntityParser.h
+++ b/WebCore/html/parser/HTMLEntityParser.h
@@ -31,7 +31,7 @@
namespace WebCore {
-unsigned consumeHTMLEntity(SegmentedString&, bool& notEnoughCharacters, UChar additionalAllowedCharacter = '\0');
+bool consumeHTMLEntity(SegmentedString&, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter = '\0');
// Used by the XML parser. Not suitable for use in HTML parsing. Use consumeHTMLEntity instead.
UChar decodeNamedEntity(const char*);
diff --git a/WebCore/html/parser/HTMLEntitySearch.h b/WebCore/html/parser/HTMLEntitySearch.h
index 11a23ae..0c66318 100644
--- a/WebCore/html/parser/HTMLEntitySearch.h
+++ b/WebCore/html/parser/HTMLEntitySearch.h
@@ -39,7 +39,7 @@ public:
void advance(UChar);
bool isEntityPrefix() const { return !!m_first; }
- int currentValue() const { return m_currentValue; }
+ UChar32 currentValue() const { return m_currentValue; }
int currentLength() const { return m_currentLength; }
const HTMLEntityTableEntry* mostRecentMatch() const { return m_mostRecentMatch; }
@@ -63,7 +63,7 @@ private:
}
int m_currentLength;
- int m_currentValue;
+ UChar32 m_currentValue;
const HTMLEntityTableEntry* m_mostRecentMatch;
const HTMLEntityTableEntry* m_first;
diff --git a/WebCore/html/parser/HTMLEntityTable.h b/WebCore/html/parser/HTMLEntityTable.h
index 3734c34..3b9ab4e 100644
--- a/WebCore/html/parser/HTMLEntityTable.h
+++ b/WebCore/html/parser/HTMLEntityTable.h
@@ -35,7 +35,7 @@ struct HTMLEntityTableEntry {
const UChar* entity;
int length;
- int value;
+ UChar32 value;
};
class HTMLEntityTable {
diff --git a/WebCore/html/parser/HTMLTokenizer.cpp b/WebCore/html/parser/HTMLTokenizer.cpp
index a18701a..4a8000c 100644
--- a/WebCore/html/parser/HTMLTokenizer.cpp
+++ b/WebCore/html/parser/HTMLTokenizer.cpp
@@ -119,13 +119,18 @@ void HTMLTokenizer::reset()
inline bool HTMLTokenizer::processEntity(SegmentedString& source)
{
bool notEnoughCharacters = false;
- unsigned value = consumeHTMLEntity(source, notEnoughCharacters);
+ Vector<UChar, 16> decodedEntity;
+ bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
if (notEnoughCharacters)
return false;
- if (!value)
+ if (!success) {
+ ASSERT(decodedEntity.isEmpty());
bufferCharacter('&');
- else
- bufferCodePoint(value);
+ } else {
+ Vector<UChar>::const_iterator iter = decodedEntity.begin();
+ for (; iter != decodedEntity.end(); ++iter)
+ bufferCharacter(*iter);
+ }
return true;
}
@@ -1027,16 +1032,17 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
BEGIN_STATE(CharacterReferenceInAttributeValueState) {
bool notEnoughCharacters = false;
- unsigned value = consumeHTMLEntity(source, notEnoughCharacters, m_additionalAllowedCharacter);
+ Vector<UChar, 16> decodedEntity;
+ bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
if (notEnoughCharacters)
return haveBufferedCharacterToken();
- if (!value)
+ if (!success) {
+ ASSERT(decodedEntity.isEmpty());
m_token->appendToAttributeValue('&');
- else if (value < 0xFFFF)
- m_token->appendToAttributeValue(value);
- else {
- m_token->appendToAttributeValue(U16_LEAD(value));
- m_token->appendToAttributeValue(U16_TRAIL(value));
+ } else {
+ Vector<UChar>::const_iterator iter = decodedEntity.begin();
+ for (; iter != decodedEntity.end(); ++iter)
+ m_token->appendToAttributeValue(*iter);
}
// We're supposed to switch back to the attribute value state that
// we were in when we were switched into this state. Rather than
@@ -1634,16 +1640,6 @@ inline void HTMLTokenizer::bufferCharacter(UChar character)
m_token->appendToCharacter(character);
}
-inline void HTMLTokenizer::bufferCodePoint(unsigned value)
-{
- if (value < 0xFFFF) {
- bufferCharacter(value);
- return;
- }
- bufferCharacter(U16_LEAD(value));
- bufferCharacter(U16_TRAIL(value));
-}
-
inline void HTMLTokenizer::parseError()
{
notImplemented();
diff --git a/WebCore/page/XSSAuditor.cpp b/WebCore/page/XSSAuditor.cpp
index fb0e1c0..0e6cc65 100644
--- a/WebCore/page/XSSAuditor.cpp
+++ b/WebCore/page/XSSAuditor.cpp
@@ -277,19 +277,18 @@ String XSSAuditor::decodeHTMLEntities(const String& string, bool leaveUndecodabl
if (leaveUndecodableEntitiesUntouched)
sourceShadow = source;
bool notEnoughCharacters = false;
- unsigned entity = consumeHTMLEntity(source, notEnoughCharacters);
+ Vector<UChar, 16> decodedEntity;
+ bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
// We ignore notEnoughCharacters because we might as well use this loop
// to copy the remaining characters into |result|.
-
- if (entity > 0xFFFF) {
- result.append(U16_LEAD(entity));
- result.append(U16_TRAIL(entity));
- } else if (entity && (!leaveUndecodableEntitiesUntouched || entity != 0xFFFD)){
- result.append(entity);
- } else {
+ if (!success || (!leaveUndecodableEntitiesUntouched && decodedEntity.size() == 1 && decodedEntity[0] == 0xFFFD)) {
result.append('&');
if (leaveUndecodableEntitiesUntouched)
source = sourceShadow;
+ } else {
+ Vector<UChar>::const_iterator iter = decodedEntity.begin();
+ for (; iter != decodedEntity.end(); ++iter)
+ result.append(*iter);
}
}
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list