[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc

Wed Dec 22 11:45:39 UTC 2010

The following commit has been merged in the debian/experimental branch:
commit a42f34f7d0db2ccda042e9bada4029ee97f7476f
Author: abarth at webkit.org <abarth at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Fri Aug 6 00:12:15 2010 +0000

    2010-08-05  Adam Barth  <abarth at webkit.org>
    
            Reviewed by Eric Seidel.
    
            U+0000 is turned to U+FFFD (replacement character)
            https://bugs.webkit.org/show_bug.cgi?id=42112
    
            Update test results to show null stripping.  These changes are mostly
            going back to the old results we had before we added the FFFD
            replacement.
    
            * fast/dom/stripNullFromTextNodes-expected.txt:
            * fast/tokenizer/null-in-text-expected.txt: Added.
            * fast/tokenizer/null-in-text.html: Added.
            * fast/tokenizer/null-xss-expected.txt: Added.
            * fast/tokenizer/null-xss.html: Added.
                - The main risk with stripping null characters is that they'll be
                  used in XSS attacks.  This test shows that we don't strip null
                  characters from tag names.
            * platform/mac/fast/text/stripNullFromText-expected.txt:
            * svg/dom/fuzz-path-parser-expected.txt:
            * svg/dom/rgb-color-parser-expected.txt:
    2010-08-05  Adam Barth  <abarth at webkit.org>
    
            Reviewed by Eric Seidel.
    
            U+0000 is turned to U+FFFD (replacement character)
            https://bugs.webkit.org/show_bug.cgi?id=42112
    
            This patch introduces an intentional parsing difference from the HTML5
            parsing specificiation.  The spec requires us to convert NULL
            characters to U+FFFD, but doing so causes compatibility issues with a
            number of sites, including US Bank.
    
            In this patch, we strip the null characters instead in certain cases.
            Firefox has made a corresponding change.  After gathering compatability
            data, we hope to convince the HTML WG to adopt this change.
    
            Tests: fast/tokenizer/null-in-text.html
                   fast/tokenizer/null-xss.html
    
            * html/HTMLTokenizer.cpp:
            (WebCore::HTMLTokenizer::HTMLTokenizer):
            (WebCore::HTMLTokenizer::reset):
            * html/HTMLTokenizer.h:
            (WebCore::HTMLTokenizer::setSkipLeadingNewLineForListing):
            (WebCore::HTMLTokenizer::forceNullCharacterReplacement):
            (WebCore::HTMLTokenizer::setForceNullCharacterReplacement):
            (WebCore::HTMLTokenizer::shouldSkipNullCharacters):
            (WebCore::HTMLTokenizer::InputStreamPreprocessor::InputStreamPreprocessor):
            (WebCore::HTMLTokenizer::InputStreamPreprocessor::peek):
            * html/HTMLTreeBuilder.cpp:
            (WebCore::HTMLTreeBuilder::passTokenToLegacyParser):
            (WebCore::HTMLTreeBuilder::constructTreeFromToken):
            (WebCore::HTMLTreeBuilder::processStartTagForInBody):
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@64799 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/LayoutTests/ChangeLog b/LayoutTests/ChangeLog
index f835baa..97d4928 100644
--- a/LayoutTests/ChangeLog
+++ b/LayoutTests/ChangeLog
@@ -1,5 +1,28 @@
 2010-08-05  Adam Barth  <abarth at webkit.org>
 
+        Reviewed by Eric Seidel.
+
+        U+0000 is turned to U+FFFD (replacement character)
+        https://bugs.webkit.org/show_bug.cgi?id=42112
+
+        Update test results to show null stripping.  These changes are mostly
+        going back to the old results we had before we added the FFFD
+        replacement.
+
+        * fast/dom/stripNullFromTextNodes-expected.txt:
+        * fast/tokenizer/null-in-text-expected.txt: Added.
+        * fast/tokenizer/null-in-text.html: Added.
+        * fast/tokenizer/null-xss-expected.txt: Added.
+        * fast/tokenizer/null-xss.html: Added.
+            - The main risk with stripping null characters is that they'll be
+              used in XSS attacks.  This test shows that we don't strip null
+              characters from tag names.
+        * platform/mac/fast/text/stripNullFromText-expected.txt:
+        * svg/dom/fuzz-path-parser-expected.txt:
+        * svg/dom/rgb-color-parser-expected.txt:
+
+2010-08-05  Adam Barth  <abarth at webkit.org>
+
         Reviewed by Darin Adler.
 
         js-test-pre.js's escapeHTML should escape null characters so we can see them
diff --git a/LayoutTests/fast/dom/stripNullFromTextNodes-expected.txt b/LayoutTests/fast/dom/stripNullFromTextNodes-expected.txt
index 37094a1..0908b24 100644
--- a/LayoutTests/fast/dom/stripNullFromTextNodes-expected.txt
+++ b/LayoutTests/fast/dom/stripNullFromTextNodes-expected.txt
@@ -1,2 +1,2 @@
-���������hell�����o
-The null characters should be stripped out of the string above and it should have a length of 5. And the DOM thinks the length is...19 :-(
+hello
+The null characters should be stripped out of the string above and it should have a length of 5. And the DOM thinks the length is...5!
diff --git a/LayoutTests/editing/selection/5136696-expected.txt b/LayoutTests/fast/tokenizer/null-in-text-expected.txt
similarity index 100%
copy from LayoutTests/editing/selection/5136696-expected.txt
copy to LayoutTests/fast/tokenizer/null-in-text-expected.txt
diff --git a/LayoutTests/fast/tokenizer/null-in-text.html b/LayoutTests/fast/tokenizer/null-in-text.html
new file mode 100644
index 0000000..626200d
Binary files /dev/null and b/LayoutTests/fast/tokenizer/null-in-text.html differ
diff --git a/LayoutTests/fast/tokenizer/null-xss-expected.txt b/LayoutTests/fast/tokenizer/null-xss-expected.txt
new file mode 100644
index 0000000..2462b4b
--- /dev/null
+++ b/LayoutTests/fast/tokenizer/null-xss-expected.txt
@@ -0,0 +1 @@
+alert('FAIL');
diff --git a/LayoutTests/fast/tokenizer/null-xss.html b/LayoutTests/fast/tokenizer/null-xss.html
new file mode 100644
index 0000000..a9000ef
Binary files /dev/null and b/LayoutTests/fast/tokenizer/null-xss.html differ
diff --git a/LayoutTests/platform/mac/fast/text/stripNullFromText-expected.txt b/LayoutTests/platform/mac/fast/text/stripNullFromText-expected.txt
index 6f9bbc2..51694f6 100644
--- a/LayoutTests/platform/mac/fast/text/stripNullFromText-expected.txt
+++ b/LayoutTests/platform/mac/fast/text/stripNullFromText-expected.txt
@@ -3,6 +3,4 @@ layer at (0,0) size 800x600
 layer at (0,0) size 800x600
   RenderBlock {HTML} at (0,0) size 800x600
     RenderBody {BODY} at (8,8) size 784x584
-      RenderBlock {DIV} at (0,0) size 784x21 [border: (1px solid #FF0000)]
-        RenderText {#text} at (1,2) size 16x18
-          text run at (1,2) width 16: "\x{FFFD}"
+      RenderBlock {DIV} at (0,0) size 784x2 [border: (1px solid #FF0000)]
diff --git a/LayoutTests/svg/dom/fuzz-path-parser-expected.txt b/LayoutTests/svg/dom/fuzz-path-parser-expected.txt
index d925dec..3e81df0 100644
--- a/LayoutTests/svg/dom/fuzz-path-parser-expected.txt
+++ b/LayoutTests/svg/dom/fuzz-path-parser-expected.txt
@@ -459,7 +459,7 @@ Parsed as 4 command(s) [MCCC]: M1,1Q2 9,4 1,s6,3 6,9,s6 0,0,6,1 zc2,1 7 0,1 4,Z2
 Parsed as 1 command(s) [M]: M1,1a9,2 6 1 2 2 2,za9 0 2 8,2,3,3 s5,8,8 1,HC5 8 5,8,4 2 c8 4 5 2 5 9,l3 9,H7 s6 2,0 0 a4 9,1 6,3,7,0,M6,
 Could not parse: 
 Could not parse: M
-Could not parse: M�
+Could not parse: M
 Parsed as 2 command(s) [MZ]: M1,1Z0
 PASS successfullyParsed is true
 
diff --git a/LayoutTests/svg/dom/rgb-color-parser-expected.txt b/LayoutTests/svg/dom/rgb-color-parser-expected.txt
index 2f577ec..404085f 100644
--- a/LayoutTests/svg/dom/rgb-color-parser-expected.txt
+++ b/LayoutTests/svg/dom/rgb-color-parser-expected.txt
@@ -254,8 +254,8 @@ Threw exception Error: SVG_INVALID_VALUE_ERR: DOM SVG Exception 1: rgb(71+1+()33
 Threw exception Error: SVG_INVALID_VALUE_ERR: DOM SVG Exception 1: rgb(.27+
 Threw exception Error: SVG_INVALID_VALUE_ERR: DOM SVG Exception 1: rgb(
 Threw exception Error: SVG_INVALID_VALUE_ERR: DOM SVG Exception 1: 
-Threw exception Error: SVG_INVALID_VALUE_ERR: DOM SVG Exception 1: �
-Threw exception Error: SVG_INVALID_VALUE_ERR: DOM SVG Exception 1: rgb(�)
+Threw exception Error: SVG_INVALID_VALUE_ERR: DOM SVG Exception 1: 
+Threw exception Error: SVG_INVALID_VALUE_ERR: DOM SVG Exception 1: rgb()
 PASS successfullyParsed is true
 
 TEST COMPLETE
diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog
index dfcee21..525a53a 100644
--- a/WebCore/ChangeLog
+++ b/WebCore/ChangeLog
@@ -1,3 +1,37 @@
+2010-08-05  Adam Barth  <abarth at webkit.org>
+
+        Reviewed by Eric Seidel.
+
+        U+0000 is turned to U+FFFD (replacement character)
+        https://bugs.webkit.org/show_bug.cgi?id=42112
+
+        This patch introduces an intentional parsing difference from the HTML5
+        parsing specificiation.  The spec requires us to convert NULL
+        characters to U+FFFD, but doing so causes compatibility issues with a
+        number of sites, including US Bank.
+
+        In this patch, we strip the null characters instead in certain cases.
+        Firefox has made a corresponding change.  After gathering compatability
+        data, we hope to convince the HTML WG to adopt this change.
+
+        Tests: fast/tokenizer/null-in-text.html
+               fast/tokenizer/null-xss.html
+
+        * html/HTMLTokenizer.cpp:
+        (WebCore::HTMLTokenizer::HTMLTokenizer):
+        (WebCore::HTMLTokenizer::reset):
+        * html/HTMLTokenizer.h:
+        (WebCore::HTMLTokenizer::setSkipLeadingNewLineForListing):
+        (WebCore::HTMLTokenizer::forceNullCharacterReplacement):
+        (WebCore::HTMLTokenizer::setForceNullCharacterReplacement):
+        (WebCore::HTMLTokenizer::shouldSkipNullCharacters):
+        (WebCore::HTMLTokenizer::InputStreamPreprocessor::InputStreamPreprocessor):
+        (WebCore::HTMLTokenizer::InputStreamPreprocessor::peek):
+        * html/HTMLTreeBuilder.cpp:
+        (WebCore::HTMLTreeBuilder::passTokenToLegacyParser):
+        (WebCore::HTMLTreeBuilder::constructTreeFromToken):
+        (WebCore::HTMLTreeBuilder::processStartTagForInBody):
+
 2010-08-05  Andy Estes  <aestes at apple.com>
 
         Reviewed by David Kilzer.
diff --git a/WebCore/html/HTMLTokenizer.cpp b/WebCore/html/HTMLTokenizer.cpp
index c8234e4..a52aba6 100644
--- a/WebCore/html/HTMLTokenizer.cpp
+++ b/WebCore/html/HTMLTokenizer.cpp
@@ -97,6 +97,7 @@ inline bool isEndTagBufferingState(HTMLTokenizer::State state)
 }
 
 HTMLTokenizer::HTMLTokenizer()
+    : m_inputStreamPreprocessor(this)
 {
     reset();
 }
@@ -111,6 +112,7 @@ void HTMLTokenizer::reset()
     m_token = 0;
     m_lineNumber = 0;
     m_skipLeadingNewLineForListing = false;
+    m_forceNullCharacterReplacement = false;
     m_additionalAllowedCharacter = '\0';
 }
 
diff --git a/WebCore/html/HTMLTokenizer.h b/WebCore/html/HTMLTokenizer.h
index 7ee9d41..e4ca16a 100644
--- a/WebCore/html/HTMLTokenizer.h
+++ b/WebCore/html/HTMLTokenizer.h
@@ -132,14 +132,27 @@ public:
 
     // Hack to skip leading newline in <pre>/<listing> for authoring ease.
     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
-    void skipLeadingNewLineForListing() { m_skipLeadingNewLineForListing = true; }
+    void setSkipLeadingNewLineForListing(bool value) { m_skipLeadingNewLineForListing = value; }
+
+    bool forceNullCharacterReplacement() const { return m_forceNullCharacterReplacement; }
+    void setForceNullCharacterReplacement(bool value) { m_forceNullCharacterReplacement = value; }
+
+    bool shouldSkipNullCharacters() const
+    {
+        return !m_forceNullCharacterReplacement
+            && (m_state == DataState
+                || m_state == RCDATAState
+                || m_state == RAWTEXTState
+                || m_state == PLAINTEXTState);
+    }
 
 private:
     // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
     class InputStreamPreprocessor : public Noncopyable {
     public:
-        InputStreamPreprocessor()
-            : m_nextInputCharacter('\0')
+        InputStreamPreprocessor(HTMLTokenizer* tokenizer)
+            : m_tokenizer(tokenizer)
+            , m_nextInputCharacter('\0')
             , m_skipNextNewLine(false)
         {
         }
@@ -151,6 +164,7 @@ private:
         // characters in |source| (after collapsing \r\n, etc).
         ALWAYS_INLINE bool peek(SegmentedString& source, int& lineNumber)
         {
+        PeekAgain:
             m_nextInputCharacter = *source;
 
             // Every branch in this function is expensive, so we have a
@@ -179,8 +193,15 @@ private:
                 // a number of specific character values are parse errors and should be replaced
                 // by the replacement character. We suspect this is a problem with the spec as doing
                 // that filtering breaks surrogate pair handling and causes us not to match Minefield.
-                if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source))
+                if (m_nextInputCharacter == '\0' && !shouldTreatNullAsEndOfFileMarker(source)) {
+                    if (m_tokenizer->shouldSkipNullCharacters()) {
+                        source.advancePastNonNewline();
+                        if (source.isEmpty())
+                            return false;
+                        goto PeekAgain;
+                    }
                     m_nextInputCharacter = 0xFFFD;
+                }
             }
             return true;
         }
@@ -202,6 +223,8 @@ private:
             return source.isClosed() && source.length() == 1;
         }
 
+        HTMLTokenizer* m_tokenizer;
+
         // http://www.whatwg.org/specs/web-apps/current-work/#next-input-character
         UChar m_nextInputCharacter;
         bool m_skipNextNewLine;
@@ -242,6 +265,7 @@ private:
     int m_lineNumber;
 
     bool m_skipLeadingNewLineForListing;
+    bool m_forceNullCharacterReplacement;
 
     // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer
     Vector<UChar, 32> m_temporaryBuffer;
diff --git a/WebCore/html/HTMLTreeBuilder.cpp b/WebCore/html/HTMLTreeBuilder.cpp
index 0e0d220..6235c88 100644
--- a/WebCore/html/HTMLTreeBuilder.cpp
+++ b/WebCore/html/HTMLTreeBuilder.cpp
@@ -469,7 +469,7 @@ void HTMLTreeBuilder::passTokenToLegacyParser(HTMLToken& token)
             m_lastScriptElement = static_pointer_cast<Element>(result);
             m_lastScriptElementStartLine = m_tokenizer->lineNumber();
         } else if (oldStyleToken.tagName == preTag || oldStyleToken.tagName == listingTag)
-            m_tokenizer->skipLeadingNewLineForListing();
+            m_tokenizer->setSkipLeadingNewLineForListing(true);
         else
             m_tokenizer->setState(adjustedLexerState(m_tokenizer->state(), oldStyleToken.tagName, m_document->frame()));
     } else if (token.type() == HTMLToken::EndTag) {
@@ -509,6 +509,11 @@ void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
 
     AtomicHTMLToken token(rawToken);
     processToken(token);
+
+    // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
+    // the U+0000 characters into replacement characters has compatibility
+    // problems.
+    m_tokenizer->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
 }
 
 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
@@ -848,7 +853,7 @@ void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
     if (token.name() == preTag || token.name() == listingTag) {
         processFakePEndTagIfPInButtonScope();
         m_tree.insertHTMLElement(token);
-        m_tokenizer->skipLeadingNewLineForListing();
+        m_tokenizer->setSkipLeadingNewLineForListing(true);
         m_framesetOk = false;
         return;
     }
@@ -968,7 +973,7 @@ void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
     }
     if (token.name() == textareaTag) {
         m_tree.insertHTMLElement(token);
-        m_tokenizer->skipLeadingNewLineForListing();
+        m_tokenizer->setSkipLeadingNewLineForListing(true);
         m_tokenizer->setState(HTMLTokenizer::RCDATAState);
         m_originalInsertionMode = m_insertionMode;
         m_framesetOk = false;

-- 
WebKit Debian packaging