[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc

abarth at webkit.org abarth at webkit.org
Wed Dec 22 11:54:47 UTC 2010


The following commit has been merged in the debian/experimental branch:
commit 5713b37984d0539f83b3997cfc93b7fe10b3b71d
Author: abarth at webkit.org <abarth at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date:   Wed Aug 11 07:30:57 2010 +0000

    2010-08-11  Adam Barth  <abarth at webkit.org>
    
            Reviewed by Eric Seidel.
    
            Port view-source to new parser
            https://bugs.webkit.org/show_bug.cgi?id=43746
    
            This patch switches the view-source mode for frames over to using the
            new HTML parsing infrastructure.  This patch is an architectural change
            to how we parser view source documents.
    
            Previously, the LegacyHTMLDocumentParser would output a "guide string"
            that consided of the inter-attribute whitespace and various "control"
            characters.  The HTMLViewSourceDocument would then interpret this guide
            string to approximately reconstruct the source of the original document
            and colorize various syntatic constructs.
    
            Unfortunately, that approach is inherently low-fidelity.  It's not
            really feasible to reconstruct the input document from the token
            stream.  The old view source mode also had a number of hacks in the old
            parser (e.g., to turn of decoding of HTML entities).
    
            Instead of trying to reconstruct the original document from the token
            stream, we use the segmentation information given by the tokens to
            colorize the input document itself.  Each token now caries information
            about where in the input stream it came from and where various
            subcomponents (e.g., attribute names and values) are located.  This
            approach is higher fidelity because we use this segmentation
            information to colorize the original input instead of attempting to
            reconstruct the original input.
    
            * Android.mk:
            * CMakeLists.txt:
            * GNUmakefile.am:
            * WebCore.gypi:
            * WebCore.pro:
            * WebCore.vcproj/WebCore.vcproj:
            * WebCore.xcodeproj/project.pbxproj:
            * html/HTMLDocumentParser.cpp:
            * html/HTMLToken.h:
            (WebCore::HTMLToken::clear):
            (WebCore::HTMLToken::startIndex):
            (WebCore::HTMLToken::length):
            (WebCore::HTMLToken::end):
            * html/HTMLViewSourceDocument.cpp:
            (WebCore::HTMLViewSourceDocument::createParser):
            (WebCore::HTMLViewSourceDocument::addSource):
            (WebCore::HTMLViewSourceDocument::processDoctypeToken):
            (WebCore::HTMLViewSourceDocument::processTagToken):
            (WebCore::HTMLViewSourceDocument::processCommentToken):
            (WebCore::HTMLViewSourceDocument::processCharacterToken):
            (WebCore::HTMLViewSourceDocument::addRange):
            * html/HTMLViewSourceDocument.h:
            * html/HTMLViewSourceParser.cpp: Added.
            (WebCore::HTMLViewSourceParser::~HTMLViewSourceParser):
            (WebCore::HTMLViewSourceParser::insert):
            (WebCore::HTMLViewSourceParser::pumpTokenizer):
            (WebCore::HTMLViewSourceParser::append):
            (WebCore::HTMLViewSourceParser::sourceForToken):
            (WebCore::HTMLViewSourceParser::updateTokenizerState):
            (WebCore::HTMLViewSourceParser::finish):
            (WebCore::HTMLViewSourceParser::finishWasCalled):
            * html/HTMLViewSourceParser.h: Added.
            (WebCore::HTMLViewSourceParser::HTMLViewSourceParser):
            (WebCore::HTMLViewSourceParser::document):
            * html/LegacyHTMLDocumentParser.cpp:
            (WebCore::LegacyHTMLDocumentParser::processToken):
            (WebCore::LegacyHTMLDocumentParser::processDoctypeToken):
    
    git-svn-id: http://svn.webkit.org/repository/webkit/trunk@65132 268f45cc-cd09-0410-ab3c-d52691b4dbfc

diff --git a/WebCore/Android.mk b/WebCore/Android.mk
index 9c2658e..ac894f1 100644
--- a/WebCore/Android.mk
+++ b/WebCore/Android.mk
@@ -285,6 +285,7 @@ LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) \
 	html/HTMLTableRowsCollection.cpp \
 	html/LegacyHTMLDocumentParser.cpp \
 	html/HTMLViewSourceDocument.cpp \
+	html/HTMLViewSourceParser.cpp \
 	html/ImageData.cpp \
 	html/ImageResizerThread.cpp \
 	html/LegacyPreloadScanner.cpp \
diff --git a/WebCore/CMakeLists.txt b/WebCore/CMakeLists.txt
index 4e4b181..75445c2 100644
--- a/WebCore/CMakeLists.txt
+++ b/WebCore/CMakeLists.txt
@@ -1036,6 +1036,7 @@ SET(WebCore_SOURCES
     html/LegacyHTMLDocumentParser.cpp
     html/HTMLUListElement.cpp
     html/HTMLViewSourceDocument.cpp
+    html/HTMLViewSourceParser.cpp
     html/ImageData.cpp
     html/ImageResizerThread.cpp
     html/LabelsNodeList.cpp
diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog
index cb8f8d8..efc27d6 100644
--- a/WebCore/ChangeLog
+++ b/WebCore/ChangeLog
@@ -1,3 +1,72 @@
+2010-08-11  Adam Barth  <abarth at webkit.org>
+
+        Reviewed by Eric Seidel.
+
+        Port view-source to new parser
+        https://bugs.webkit.org/show_bug.cgi?id=43746
+
+        This patch switches the view-source mode for frames over to using the
+        new HTML parsing infrastructure.  This patch is an architectural change
+        to how we parser view source documents.
+
+        Previously, the LegacyHTMLDocumentParser would output a "guide string"
+        that consided of the inter-attribute whitespace and various "control"
+        characters.  The HTMLViewSourceDocument would then interpret this guide
+        string to approximately reconstruct the source of the original document
+        and colorize various syntatic constructs.
+
+        Unfortunately, that approach is inherently low-fidelity.  It's not
+        really feasible to reconstruct the input document from the token
+        stream.  The old view source mode also had a number of hacks in the old
+        parser (e.g., to turn of decoding of HTML entities).
+
+        Instead of trying to reconstruct the original document from the token
+        stream, we use the segmentation information given by the tokens to
+        colorize the input document itself.  Each token now caries information
+        about where in the input stream it came from and where various
+        subcomponents (e.g., attribute names and values) are located.  This
+        approach is higher fidelity because we use this segmentation
+        information to colorize the original input instead of attempting to
+        reconstruct the original input.
+
+        * Android.mk:
+        * CMakeLists.txt:
+        * GNUmakefile.am:
+        * WebCore.gypi:
+        * WebCore.pro:
+        * WebCore.vcproj/WebCore.vcproj:
+        * WebCore.xcodeproj/project.pbxproj:
+        * html/HTMLDocumentParser.cpp:
+        * html/HTMLToken.h:
+        (WebCore::HTMLToken::clear):
+        (WebCore::HTMLToken::startIndex):
+        (WebCore::HTMLToken::length):
+        (WebCore::HTMLToken::end):
+        * html/HTMLViewSourceDocument.cpp:
+        (WebCore::HTMLViewSourceDocument::createParser):
+        (WebCore::HTMLViewSourceDocument::addSource):
+        (WebCore::HTMLViewSourceDocument::processDoctypeToken):
+        (WebCore::HTMLViewSourceDocument::processTagToken):
+        (WebCore::HTMLViewSourceDocument::processCommentToken):
+        (WebCore::HTMLViewSourceDocument::processCharacterToken):
+        (WebCore::HTMLViewSourceDocument::addRange):
+        * html/HTMLViewSourceDocument.h:
+        * html/HTMLViewSourceParser.cpp: Added.
+        (WebCore::HTMLViewSourceParser::~HTMLViewSourceParser):
+        (WebCore::HTMLViewSourceParser::insert):
+        (WebCore::HTMLViewSourceParser::pumpTokenizer):
+        (WebCore::HTMLViewSourceParser::append):
+        (WebCore::HTMLViewSourceParser::sourceForToken):
+        (WebCore::HTMLViewSourceParser::updateTokenizerState):
+        (WebCore::HTMLViewSourceParser::finish):
+        (WebCore::HTMLViewSourceParser::finishWasCalled):
+        * html/HTMLViewSourceParser.h: Added.
+        (WebCore::HTMLViewSourceParser::HTMLViewSourceParser):
+        (WebCore::HTMLViewSourceParser::document):
+        * html/LegacyHTMLDocumentParser.cpp:
+        (WebCore::LegacyHTMLDocumentParser::processToken):
+        (WebCore::LegacyHTMLDocumentParser::processDoctypeToken):
+
 2010-08-11  Yoshiki Hayashi  <yhayashi at google.com>
 
         Reviewed by Shinichiro Hamaji.
diff --git a/WebCore/GNUmakefile.am b/WebCore/GNUmakefile.am
index 676eb85..1a61475 100644
--- a/WebCore/GNUmakefile.am
+++ b/WebCore/GNUmakefile.am
@@ -1555,6 +1555,8 @@ webcore_sources += \
 	WebCore/html/HTMLUListElement.h \
 	WebCore/html/HTMLViewSourceDocument.cpp \
 	WebCore/html/HTMLViewSourceDocument.h \
+	WebCore/html/HTMLViewSourceParser.cpp \
+	WebCore/html/HTMLViewSourceParser.h \
 	WebCore/html/ImageData.cpp \
 	WebCore/html/ImageData.h \
 	WebCore/html/ImageResizerThread.cpp \
diff --git a/WebCore/WebCore.gypi b/WebCore/WebCore.gypi
index 108f165..a0c2d14 100644
--- a/WebCore/WebCore.gypi
+++ b/WebCore/WebCore.gypi
@@ -1721,6 +1721,8 @@
             'html/HTMLVideoElement.h',
             'html/HTMLViewSourceDocument.cpp',
             'html/HTMLViewSourceDocument.h',
+            'html/HTMLViewSourceParser.cpp',
+            'html/HTMLViewSourceParser.h',
             'html/ImageData.cpp',
             'html/ImageData.h',
             'html/ImageResizerThread.cpp',
diff --git a/WebCore/WebCore.pro b/WebCore/WebCore.pro
index 142af5f..0e63af8 100644
--- a/WebCore/WebCore.pro
+++ b/WebCore/WebCore.pro
@@ -740,6 +740,7 @@ SOURCES += \
     html/LegacyHTMLDocumentParser.cpp \
     html/HTMLUListElement.cpp \
     html/HTMLViewSourceDocument.cpp \
+    html/HTMLViewSourceParser.cpp \
     html/ImageData.cpp \
     html/ImageResizerThread.cpp \
     html/LabelsNodeList.cpp \
@@ -1504,6 +1505,7 @@ HEADERS += \
     html/HTMLUListElement.h \
     html/HTMLVideoElement.h \
     html/HTMLViewSourceDocument.h \
+    html/HTMLViewSourceParser.h \
     html/ImageData.h \
     html/ImageResizerThread.h \
     html/LabelsNodeList.h \
diff --git a/WebCore/WebCore.vcproj/WebCore.vcproj b/WebCore/WebCore.vcproj/WebCore.vcproj
index 155a816..cf0988a 100644
--- a/WebCore/WebCore.vcproj/WebCore.vcproj
+++ b/WebCore/WebCore.vcproj/WebCore.vcproj
@@ -40897,6 +40897,14 @@
 				>
 			</File>
 			<File
+				RelativePath="..\html\HTMLViewSourceParser.cpp"
+				>
+			</File>
+			<File
+				RelativePath="..\html\HTMLViewSourceParser.h"
+				>
+			</File>
+			<File
 				RelativePath="..\html\ImageData.cpp"
 				>
 			</File>
diff --git a/WebCore/WebCore.xcodeproj/project.pbxproj b/WebCore/WebCore.xcodeproj/project.pbxproj
index 29514e3..81fa73f 100644
--- a/WebCore/WebCore.xcodeproj/project.pbxproj
+++ b/WebCore/WebCore.xcodeproj/project.pbxproj
@@ -2674,6 +2674,8 @@
 		976E2BA811CAE4DE006C56A0 /* CSSPreloadScanner.h in Headers */ = {isa = PBXBuildFile; fileRef = 976E2BA611CAE4DE006C56A0 /* CSSPreloadScanner.h */; };
 		976E896011C0CA3A00EA9CA9 /* HTMLEntityParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 976E895E11C0CA3A00EA9CA9 /* HTMLEntityParser.cpp */; };
 		976E896111C0CA3A00EA9CA9 /* HTMLEntityParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 976E895F11C0CA3A00EA9CA9 /* HTMLEntityParser.h */; };
+		978B6FC912128821001595EF /* HTMLViewSourceParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 978B6FC712128821001595EF /* HTMLViewSourceParser.cpp */; };
+		978B6FCA12128821001595EF /* HTMLViewSourceParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 978B6FC812128821001595EF /* HTMLViewSourceParser.h */; };
 		979F43D31075E44A0000F83B /* RedirectScheduler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 979F43D11075E44A0000F83B /* RedirectScheduler.cpp */; };
 		979F43D41075E44A0000F83B /* RedirectScheduler.h in Headers */ = {isa = PBXBuildFile; fileRef = 979F43D21075E44A0000F83B /* RedirectScheduler.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		97C078501165D5BE003A32EF /* SuffixTree.h in Headers */ = {isa = PBXBuildFile; fileRef = 97C0784F1165D5BE003A32EF /* SuffixTree.h */; };
@@ -8479,6 +8481,8 @@
 		976E2BA611CAE4DE006C56A0 /* CSSPreloadScanner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CSSPreloadScanner.h; sourceTree = "<group>"; };
 		976E895E11C0CA3A00EA9CA9 /* HTMLEntityParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HTMLEntityParser.cpp; sourceTree = "<group>"; };
 		976E895F11C0CA3A00EA9CA9 /* HTMLEntityParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HTMLEntityParser.h; sourceTree = "<group>"; };
+		978B6FC712128821001595EF /* HTMLViewSourceParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HTMLViewSourceParser.cpp; sourceTree = "<group>"; };
+		978B6FC812128821001595EF /* HTMLViewSourceParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HTMLViewSourceParser.h; sourceTree = "<group>"; };
 		979F43D11075E44A0000F83B /* RedirectScheduler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = RedirectScheduler.cpp; sourceTree = "<group>"; };
 		979F43D21075E44A0000F83B /* RedirectScheduler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RedirectScheduler.h; sourceTree = "<group>"; };
 		97C0784F1165D5BE003A32EF /* SuffixTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SuffixTree.h; sourceTree = "<group>"; };
@@ -14171,6 +14175,8 @@
 				E446139A0CD6331000FADA75 /* HTMLVideoElement.idl */,
 				BCCD74E40A4C8DDF005FDA6D /* HTMLViewSourceDocument.cpp */,
 				BCCD74DB0A4C8D35005FDA6D /* HTMLViewSourceDocument.h */,
+				978B6FC712128821001595EF /* HTMLViewSourceParser.cpp */,
+				978B6FC812128821001595EF /* HTMLViewSourceParser.h */,
 				A77979130D6B9D0C003851B9 /* ImageData.cpp */,
 				A77979140D6B9D0C003851B9 /* ImageData.h */,
 				A77979150D6B9D0C003851B9 /* ImageData.idl */,
@@ -20149,6 +20155,7 @@
 				2EED575612109ED0007656BB /* BlobURL.h in Headers */,
 				2EED575812109EE4007656BB /* BlobRegistry.h in Headers */,
 				2EED575C12109EF3007656BB /* BlobData.h in Headers */,
+				978B6FCA12128821001595EF /* HTMLViewSourceParser.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -22575,6 +22582,7 @@
 				2EED575212109ED0007656BB /* BlobRegistryImpl.cpp in Sources */,
 				2EED575512109ED0007656BB /* BlobURL.cpp in Sources */,
 				2EED575B12109EF3007656BB /* BlobData.cpp in Sources */,
+				978B6FC912128821001595EF /* HTMLViewSourceParser.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
diff --git a/WebCore/html/HTMLToken.h b/WebCore/html/HTMLToken.h
index 2922727..e42a829 100644
--- a/WebCore/html/HTMLToken.h
+++ b/WebCore/html/HTMLToken.h
@@ -64,12 +64,22 @@ public:
 
     HTMLToken() { clear(); }
 
-    void clear()
+    void clear(int startIndex = 0)
     {
         m_type = Uninitialized;
+        m_range.m_start = startIndex;
+        m_range.m_end = startIndex;
         m_data.clear();
     }
 
+    int startIndex() const { return m_range.m_start; }
+    int endIndex() const { return m_range.m_end; }
+
+    void end(int endIndex)
+    {
+        m_range.m_end = endIndex;
+    }
+
     void makeEndOfFile()
     {
         ASSERT(m_type == Uninitialized);
@@ -321,6 +331,9 @@ private:
 
     Type m_type;
 
+    // Which characters from the input stream are represented by this token.
+    Range m_range;
+
     // "name" for DOCTYPE, StartTag, and EndTag
     // "characters" for Character
     // "data" for Comment
diff --git a/WebCore/html/HTMLViewSourceDocument.cpp b/WebCore/html/HTMLViewSourceDocument.cpp
index e1b959b..c330c4d 100644
--- a/WebCore/html/HTMLViewSourceDocument.cpp
+++ b/WebCore/html/HTMLViewSourceDocument.cpp
@@ -36,7 +36,9 @@
 #include "HTMLTableElement.h"
 #include "HTMLTableRowElement.h"
 #include "HTMLTableSectionElement.h"
-#include "LegacyHTMLDocumentParser.h"
+#include "HTMLToken.h"
+#include "HTMLViewSourceParser.h"
+#include "SegmentedString.h"
 #include "Text.h"
 #include "TextDocument.h"
 
@@ -58,10 +60,8 @@ DocumentParser* HTMLViewSourceDocument::createParser()
 #if ENABLE(XHTMLMP)
         || m_type == "application/vnd.wap.xhtml+xml"
 #endif
-        ) {
-        // FIXME: Should respect Settings::html5ParserEnabled()
-        return new LegacyHTMLDocumentParser(this);
-    }
+        )
+        return new HTMLViewSourceParser(this);
 
     return createTextDocumentParser(this);
 }
@@ -100,107 +100,91 @@ void HTMLViewSourceDocument::addViewSourceText(const String& text)
     addText(text, "");
 }
 
-void HTMLViewSourceDocument::addViewSourceToken(Token* token)
+void HTMLViewSourceDocument::addSource(const String& source, HTMLToken& token)
 {
     if (!m_current)
         createContainingTable();
 
-    if (token->tagName == textAtom)
-        addText(token->text.get(), "");
-    else if (token->tagName == commentAtom) {
-        if (token->beginTag) {
-            m_current = addSpanWithClassName("webkit-html-comment");
-            addText(String("<!--") + token->text.get() + "-->", "webkit-html-comment");
-        }
-    } else {
-        // Handle the tag.
-        String classNameStr = "webkit-html-tag";
-        m_current = addSpanWithClassName(classNameStr);
-
-        String text = "<";
-        if (!token->beginTag)
-            text += "/";
-        text += token->tagName;
-        Vector<UChar>* guide = token->m_sourceInfo.get();
-        if (!guide || !guide->size())
-            text += ">";
-
-        addText(text, classNameStr);
-
-        // Walk our guide string that tells us where attribute names/values should go.
-        if (guide && guide->size()) {
-            unsigned size = guide->size();
-            unsigned begin = 0;
-            unsigned currAttr = 0;
-            RefPtr<Attribute> attr = 0;
-            for (unsigned i = 0; i < size; i++) {
-                if (guide->at(i) == 'a' || guide->at(i) == 'x' || guide->at(i) == 'v') {
-                    // Add in the string.
-                    addText(String(static_cast<UChar*>(guide->data()) + begin, i - begin), classNameStr);
-
-                    begin = i + 1;
-
-                    if (guide->at(i) == 'a') {
-                        if (token->attrs && currAttr < token->attrs->length())
-                            attr = token->attrs->attributeItem(currAttr++);
-                        else
-                            attr = 0;
-                    }
-                    if (attr) {
-                        if (guide->at(i) == 'a') {
-                            String name = attr->name().toString();
-
-                            m_current = addSpanWithClassName("webkit-html-attribute-name");
-                            addText(name, "webkit-html-attribute-name");
-                            if (m_current != m_tbody)
-                                m_current = static_cast<Element*>(m_current->parent());
-                        } else {
-                            const String& value = attr->value().string();
-
-                            // Compare ignoring case since LegacyHTMLDocumentParser doesn't
-                            // lower names when passing in tokens to
-                            // HTMLViewSourceDocument.
-                            if (equalIgnoringCase(token->tagName, "base") && equalIgnoringCase(attr->name().localName(), "href")) {
-                                // Catch the href attribute in the base element.
-                                // It will be used for rendering anchors created
-                                // by addLink() below.
-                                setBaseElementURL(KURL(url(), value));
-                            }
-
-                            // FIXME: XML could use namespace prefixes and confuse us.
-                            if (equalIgnoringCase(attr->name().localName(), "src") || equalIgnoringCase(attr->name().localName(), "href"))
-                                m_current = addLink(value, equalIgnoringCase(token->tagName, "a"));
-                            else
-                                m_current = addSpanWithClassName("webkit-html-attribute-value");
-                            addText(value, "webkit-html-attribute-value");
-                            if (m_current != m_tbody)
-                                m_current = static_cast<Element*>(m_current->parent());
-                        }
-                    }
-                }
-            }
-
-            // Add in any string that might be left.
-            if (begin < size)
-                addText(String(static_cast<UChar*>(guide->data()) + begin, size - begin), classNameStr);
-
-            // Add in the end tag.
-            addText(">", classNameStr);
-        }
-
-        m_current = m_td;
+    switch (token.type()) {
+    case HTMLToken::Uninitialized:
+        ASSERT_NOT_REACHED();
+        break;
+    case HTMLToken::DOCTYPE:
+        processDoctypeToken(source, token);
+        break;
+    case HTMLToken::EndOfFile:
+        break;
+    case HTMLToken::StartTag:
+    case HTMLToken::EndTag:
+        processTagToken(source, token);
+        break;
+    case HTMLToken::Comment:
+        processCommentToken(source, token);
+        break;
+    case HTMLToken::Character:
+        processCharacterToken(source, token);
+        break;
     }
 }
 
-void HTMLViewSourceDocument::addViewSourceDoctypeToken(DoctypeToken* doctypeToken)
+void HTMLViewSourceDocument::processDoctypeToken(const String& source, HTMLToken&)
 {
     if (!m_current)
         createContainingTable();
     m_current = addSpanWithClassName("webkit-html-doctype");
-    String text = "<";
-    text += String::adopt(doctypeToken->m_source);
-    text += ">";
-    addText(text, "webkit-html-doctype");
+    addText(source, "webkit-html-doctype");
+    m_current = m_td;
+}
+
+void HTMLViewSourceDocument::processTagToken(const String& source, HTMLToken& token)
+{
+    String classNameStr = "webkit-html-tag";
+    m_current = addSpanWithClassName(classNameStr);
+
+    AtomicString tagName(token.name().data(), token.name().size());
+
+    unsigned index = 0;
+    HTMLToken::AttributeList::const_iterator iter = token.attributes().begin();
+    while (index < source.length()) {
+        if (iter == token.attributes().end()) {
+            // We want to show the remaining characters in the token.
+            index = addRange(source, index, source.length(), "");
+            ASSERT(index == source.length());
+            break;
+        }
+
+        AtomicString name(iter->m_name.data(), iter->m_name.size());
+        String value(iter->m_value.data(), iter->m_value.size()); 
+
+        index = addRange(source, index, iter->m_nameRange.m_start - token.startIndex(), "");
+        index = addRange(source, index, iter->m_nameRange.m_end - token.startIndex(), "webkit-html-attribute-name");
+
+        if (tagName == baseTag && name == hrefAttr) {
+            // Catch the href attribute in the base element. It will be used
+            // for rendering anchors created by addLink() below.
+            setBaseElementURL(KURL(url(), value));
+        }
+
+        index = addRange(source, index, iter->m_valueRange.m_start - token.startIndex(), "");
+
+        bool isLink = name == srcAttr || name == hrefAttr;
+        index = addRange(source, index, iter->m_valueRange.m_end - token.startIndex(), "webkit-html-attribute-value", isLink, tagName == aTag);
+
+        ++iter;
+    }
+    m_current = m_td;
+}
+
+void HTMLViewSourceDocument::processCommentToken(const String& source, HTMLToken&)
+{
+    m_current = addSpanWithClassName("webkit-html-comment");
+    addText(source, "webkit-html-comment");
+    m_current = m_td;
+}
+
+void HTMLViewSourceDocument::processCharacterToken(const String& source, HTMLToken&)
+{
+    addText(source, "");
 }
 
 PassRefPtr<Element> HTMLViewSourceDocument::addSpanWithClassName(const String& className)
@@ -287,6 +271,25 @@ void HTMLViewSourceDocument::addText(const String& text, const String& className
         m_current = m_tbody;
 }
 
+int HTMLViewSourceDocument::addRange(const String& source, int start, int end, const String& className, bool isLink, bool isAnchor)
+{
+    ASSERT(start <= end);
+    if (start == end)
+        return start;
+
+    String text = source.substring(start, end - start);
+    if (!className.isEmpty()) {
+        if (isLink)
+            m_current = addLink(text, isAnchor);
+        else
+            m_current = addSpanWithClassName(className);
+    }
+    addText(text, className);
+    if (!className.isEmpty() && m_current != m_tbody)
+        m_current = static_cast<Element*>(m_current->parent());
+    return end;
+}
+
 PassRefPtr<Element> HTMLViewSourceDocument::addLink(const String& url, bool isAnchor)
 {
     if (m_current == m_tbody)
diff --git a/WebCore/html/HTMLViewSourceDocument.h b/WebCore/html/HTMLViewSourceDocument.h
index cf6cfc1..8805848 100644
--- a/WebCore/html/HTMLViewSourceDocument.h
+++ b/WebCore/html/HTMLViewSourceDocument.h
@@ -29,11 +29,9 @@
 
 namespace WebCore {
 
-class DoctypeToken;
 class HTMLTableCellElement;
 class HTMLTableSectionElement;
-
-struct Token;
+class HTMLToken;
 
 class HTMLViewSourceDocument : public HTMLDocument {
 public:
@@ -42,20 +40,27 @@ public:
         return adoptRef(new HTMLViewSourceDocument(frame, url, mimeType));
     }
 
-    void addViewSourceToken(Token*); // Used by the LegacyHTMLDocumentParser.
+    void addSource(const String&, HTMLToken&);
+
+    void addViewSourceToken(HTMLToken&); // Used by the HTMLDocumentParser.
     void addViewSourceText(const String&); // Used by the TextDocumentParser.
-    void addViewSourceDoctypeToken(DoctypeToken*);
 
 private:
     HTMLViewSourceDocument(Frame*, const KURL&, const String& mimeType);
 
-    // Returns LegacyHTMLDocumentParser or TextDocumentParser based on m_type.
+    // Returns HTMLViewSourceParser or TextDocumentParser based on m_type.
     virtual DocumentParser* createParser();
 
+    void processDoctypeToken(const String& source, HTMLToken&);
+    void processTagToken(const String& source, HTMLToken&);
+    void processCommentToken(const String& source, HTMLToken&);
+    void processCharacterToken(const String& source, HTMLToken&);
+
     void createContainingTable();
     PassRefPtr<Element> addSpanWithClassName(const String&);
     void addLine(const String& className);
     void addText(const String& text, const String& className);
+    int addRange(const String& source, int start, int end, const String& className, bool isLink = false, bool isAnchor = false);
     PassRefPtr<Element> addLink(const String& url, bool isAnchor);
 
     String m_type;
diff --git a/WebCore/html/HTMLViewSourceParser.cpp b/WebCore/html/HTMLViewSourceParser.cpp
new file mode 100644
index 0000000..3da4c23
--- /dev/null
+++ b/WebCore/html/HTMLViewSourceParser.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "HTMLViewSourceParser.h"
+
+#include "HTMLNames.h"
+#include "HTMLTreeBuilder.h"
+#include "HTMLViewSourceDocument.h"
+
+namespace WebCore {
+
+HTMLViewSourceParser::~HTMLViewSourceParser()
+{
+}
+
+void HTMLViewSourceParser::insert(const SegmentedString&)
+{
+    ASSERT_NOT_REACHED();
+}
+
+void HTMLViewSourceParser::pumpTokenizer()
+{
+    while (m_tokenizer.nextToken(m_input.current(), m_token)) {
+        m_token.end(m_input.current().numberOfCharactersConsumed());
+        document()->addSource(sourceForToken(), m_token);
+        updateTokenizerState();
+        m_token.clear(m_input.current().numberOfCharactersConsumed());
+    }
+}
+
+void HTMLViewSourceParser::append(const SegmentedString& input)
+{
+    m_input.appendToEnd(input);
+    m_source.append(input);
+    pumpTokenizer();
+}
+
+String HTMLViewSourceParser::sourceForToken()
+{
+    if (m_token.type() == HTMLToken::EndOfFile)
+        return String();
+
+    ASSERT(m_source.numberOfCharactersConsumed() == m_token.startIndex());
+    UChar* data = 0;
+    int length = m_token.endIndex() - m_token.startIndex();
+    String source = String::createUninitialized(length, data);
+    for (int i = 0; i < length; ++i) {
+        data[i] = *m_source;
+        m_source.advance();
+    }
+    return source;
+}
+
+void HTMLViewSourceParser::updateTokenizerState()
+{
+    // FIXME: The tokenizer should do this work for us.
+    if (m_token.type() != HTMLToken::StartTag)
+        return;
+
+    AtomicString tagName(m_token.name().data(), m_token.name().size());
+    m_tokenizer.setState(HTMLTreeBuilder::adjustedLexerState(m_tokenizer.state(), tagName, m_document->frame()));
+    if (tagName == HTMLNames::scriptTag) {
+        // The tree builder handles scriptTag separately from the other tokenizer
+        // state adjustments, so we need to handle it separately too.
+        ASSERT(m_tokenizer.state() == HTMLTokenizer::DataState);
+        m_tokenizer.setState(HTMLTokenizer::ScriptDataState);
+    }
+}
+
+void HTMLViewSourceParser::finish()
+{
+    if (!m_input.haveSeenEndOfFile())
+        m_input.markEndOfFile();
+    pumpTokenizer();
+    document()->finishedParsing();
+}
+
+bool HTMLViewSourceParser::finishWasCalled()
+{
+    return m_input.haveSeenEndOfFile();
+}
+
+}
diff --git a/WebCore/html/HTMLViewSourceParser.h b/WebCore/html/HTMLViewSourceParser.h
new file mode 100644
index 0000000..2571301
--- /dev/null
+++ b/WebCore/html/HTMLViewSourceParser.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLViewSourceParser_h
+#define HTMLViewSourceParser_h
+
+#include "DecodedDataDocumentParser.h"
+#include "HTMLInputStream.h"
+#include "HTMLToken.h"
+#include "HTMLTokenizer.h"
+#include "HTMLViewSourceDocument.h"
+#include <wtf/PassOwnPtr.h>
+
+namespace WebCore {
+
+class HTMLTokenizer;
+class HTMLScriptRunner;
+class HTMLTreeBuilder;
+class HTMLPreloadScanner;
+class LegacyHTMLTreeBuilder;
+class ScriptController;
+class ScriptSourceCode;
+
+class HTMLViewSourceParser :  public DecodedDataDocumentParser {
+public:
+    // FIXME: Make private with a create method.
+    HTMLViewSourceParser(HTMLViewSourceDocument* document)
+        : DecodedDataDocumentParser(document)
+    {
+    }
+
+    virtual ~HTMLViewSourceParser();
+
+private:
+    // DocumentParser
+    virtual void insert(const SegmentedString&);
+    virtual void append(const SegmentedString&);
+    virtual void finish();
+    virtual bool finishWasCalled();
+
+    HTMLViewSourceDocument* document() const { return static_cast<HTMLViewSourceDocument*>(m_document); }
+
+    void pumpTokenizer();
+    String sourceForToken();
+    void updateTokenizerState();
+
+    HTMLInputStream m_input;
+    SegmentedString m_source;
+    HTMLToken m_token;
+    HTMLTokenizer m_tokenizer;
+};
+
+}
+
+#endif
diff --git a/WebCore/html/LegacyHTMLDocumentParser.cpp b/WebCore/html/LegacyHTMLDocumentParser.cpp
index 980d6ed..0d4d615 100644
--- a/WebCore/html/LegacyHTMLDocumentParser.cpp
+++ b/WebCore/html/LegacyHTMLDocumentParser.cpp
@@ -1926,11 +1926,8 @@ PassRefPtr<Node> LegacyHTMLDocumentParser::processToken()
     if (!m_parserStopped) {
         if (NamedNodeMap* map = m_currentToken.attrs.get())
             map->shrinkToLength();
-        if (inViewSourceMode())
-            static_cast<HTMLViewSourceDocument*>(document())->addViewSourceToken(&m_currentToken);
-        else
-            // pass the token over to the parser, the parser DOES NOT delete the token
-            n = m_treeBuilder->parseToken(&m_currentToken);
+        // pass the token over to the parser, the parser DOES NOT delete the token
+        n = m_treeBuilder->parseToken(&m_currentToken);
     }
     m_currentToken.reset();
 
@@ -1939,10 +1936,7 @@ PassRefPtr<Node> LegacyHTMLDocumentParser::processToken()
 
 void LegacyHTMLDocumentParser::processDoctypeToken()
 {
-    if (inViewSourceMode())
-        static_cast<HTMLViewSourceDocument*>(document())->addViewSourceDoctypeToken(&m_doctypeToken);
-    else
-        m_treeBuilder->parseDoctypeToken(&m_doctypeToken);
+    m_treeBuilder->parseDoctypeToken(&m_doctypeToken);
 }
 
 LegacyHTMLDocumentParser::~LegacyHTMLDocumentParser()

-- 
WebKit Debian packaging



More information about the Pkg-webkit-commits mailing list