[SCM] WebKit Debian packaging branch, debian/experimental, updated. upstream/1.3.3-9427-gc2be6fc
abarth at webkit.org
abarth at webkit.org
Wed Dec 22 11:54:06 UTC 2010
The following commit has been merged in the debian/experimental branch:
commit bdbaf374e8d0a5c9809d8193b430a9fcd881eb3f
Author: abarth at webkit.org <abarth at webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Wed Aug 11 01:55:53 2010 +0000
2010-08-10 Adam Barth <abarth at webkit.org>
Reviewed by Eric Seidel.
Clients of HTMLTokenizer should be able to see where characters went in the token
https://bugs.webkit.org/show_bug.cgi?id=43766
When viewing the source of a document, we want to colorize different
parts of the input depending on how they were tokenized. In this
patch, we expose the internal segmentation of a token by recording the
start and end offsets for each attribute name and each attribute value.
* html/HTMLToken.h:
(WebCore::HTMLToken::addNewAttribute):
(WebCore::HTMLToken::beginAttributeName):
(WebCore::HTMLToken::endAttributeName):
(WebCore::HTMLToken::beginAttributeValue):
(WebCore::HTMLToken::endAttributeValue):
(WebCore::HTMLToken::appendToAttributeName):
(WebCore::HTMLToken::appendToAttributeValue):
(WebCore::AtomicHTMLToken::AtomicHTMLToken):
* html/HTMLTokenizer.cpp:
(WebCore::HTMLTokenizer::nextToken):
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@65110 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/ChangeLog b/WebCore/ChangeLog
index 011c897..e26bb8e 100644
--- a/WebCore/ChangeLog
+++ b/WebCore/ChangeLog
@@ -1,3 +1,27 @@
+2010-08-10 Adam Barth <abarth at webkit.org>
+
+ Reviewed by Eric Seidel.
+
+ Clients of HTMLTokenizer should be able to see where characters went in the token
+ https://bugs.webkit.org/show_bug.cgi?id=43766
+
+ When viewing the source of a document, we want to colorize different
+ parts of the input depending on how they were tokenized. In this
+ patch, we expose the internal segmentation of a token by recording the
+ start and end offsets for each attribute name and each attribute value.
+
+ * html/HTMLToken.h:
+ (WebCore::HTMLToken::addNewAttribute):
+ (WebCore::HTMLToken::beginAttributeName):
+ (WebCore::HTMLToken::endAttributeName):
+ (WebCore::HTMLToken::beginAttributeValue):
+ (WebCore::HTMLToken::endAttributeValue):
+ (WebCore::HTMLToken::appendToAttributeName):
+ (WebCore::HTMLToken::appendToAttributeValue):
+ (WebCore::AtomicHTMLToken::AtomicHTMLToken):
+ * html/HTMLTokenizer.cpp:
+ (WebCore::HTMLTokenizer::nextToken):
+
2010-08-10 Dumitru Daniliuc <dumi at chromium.org>
Reviewed by David Levin.
diff --git a/WebCore/html/HTMLToken.h b/WebCore/html/HTMLToken.h
index 5f2869b..2922727 100644
--- a/WebCore/html/HTMLToken.h
+++ b/WebCore/html/HTMLToken.h
@@ -45,8 +45,16 @@ public:
EndOfFile,
};
+ class Range {
+ public:
+ int m_start;
+ int m_end;
+ };
+
class Attribute {
public:
+ Range m_nameRange;
+ Range m_valueRange;
WTF::Vector<UChar, 32> m_name;
WTF::Vector<UChar, 32> m_value;
};
@@ -146,12 +154,44 @@ public:
ASSERT(m_type == StartTag || m_type == EndTag);
m_attributes.grow(m_attributes.size() + 1);
m_currentAttribute = &m_attributes.last();
+#ifndef NDEBUG
+ m_currentAttribute->m_nameRange.m_start = 0;
+ m_currentAttribute->m_nameRange.m_end = 0;
+ m_currentAttribute->m_valueRange.m_start = 0;
+ m_currentAttribute->m_valueRange.m_end = 0;
+#endif
+ }
+
+ void beginAttributeName(int index)
+ {
+ m_currentAttribute->m_nameRange.m_start = index;
+ }
+
+ void endAttributeName(int index)
+ {
+ m_currentAttribute->m_nameRange.m_end = index;
+ m_currentAttribute->m_valueRange.m_start = index;
+ m_currentAttribute->m_valueRange.m_end = index;
+ }
+
+ void beginAttributeValue(int index)
+ {
+ m_currentAttribute->m_valueRange.m_start = index;
+#ifndef NDEBUG
+ m_currentAttribute->m_valueRange.m_end = 0;
+#endif
+ }
+
+ void endAttributeValue(int index)
+ {
+ m_currentAttribute->m_valueRange.m_end = index;
}
void appendToAttributeName(UChar character)
{
ASSERT(character);
ASSERT(m_type == StartTag || m_type == EndTag);
+ ASSERT(m_currentAttribute->m_nameRange.m_start);
m_currentAttribute->m_name.append(character);
}
@@ -159,6 +199,7 @@ public:
{
ASSERT(character);
ASSERT(m_type == StartTag || m_type == EndTag);
+ ASSERT(m_currentAttribute->m_valueRange.m_start);
m_currentAttribute->m_value.append(character);
}
@@ -323,6 +364,10 @@ public:
if (!iter->m_name.isEmpty()) {
String name(iter->m_name.data(), iter->m_name.size());
String value(iter->m_value.data(), iter->m_value.size());
+ ASSERT(iter->m_nameRange.m_start);
+ ASSERT(iter->m_nameRange.m_end);
+ ASSERT(iter->m_valueRange.m_start);
+ ASSERT(iter->m_valueRange.m_end);
RefPtr<Attribute> mappedAttribute = Attribute::createMapped(name, value);
if (!m_attributes) {
m_attributes = NamedNodeMap::create();
diff --git a/WebCore/html/HTMLTokenizer.cpp b/WebCore/html/HTMLTokenizer.cpp
index 409d462..2e5d192 100644
--- a/WebCore/html/HTMLTokenizer.cpp
+++ b/WebCore/html/HTMLTokenizer.cpp
@@ -864,6 +864,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
return emitAndResumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
@@ -873,6 +874,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
parseError();
m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(cc);
ADVANCE_TO(AttributeNameState);
}
@@ -880,19 +882,24 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
BEGIN_STATE(AttributeNameState) {
- if (isTokenizerWhitespace(cc))
+ if (isTokenizerWhitespace(cc)) {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeNameState);
- else if (cc == '/')
+ } else if (cc == '/') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(SelfClosingStartTagState);
- else if (cc == '=')
+ } else if (cc == '=') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
ADVANCE_TO(BeforeAttributeValueState);
- else if (cc == '>')
+ } else if (cc == '>') {
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
return emitAndResumeIn(source, DataState);
- else if (isASCIIUpper(cc)) {
+ } else if (isASCIIUpper(cc)) {
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
+ m_token->endAttributeName(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
@@ -914,6 +921,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
return emitAndResumeIn(source, DataState);
else if (isASCIIUpper(cc)) {
m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(toLowerCase(cc));
ADVANCE_TO(AttributeNameState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
@@ -923,6 +931,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
if (cc == '"' || cc == '\'' || cc == '<')
parseError();
m_token->addNewAttribute();
+ m_token->beginAttributeName(source.numberOfCharactersConsumed());
m_token->appendToAttributeName(cc);
ADVANCE_TO(AttributeNameState);
}
@@ -932,13 +941,16 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
BEGIN_STATE(BeforeAttributeValueState) {
if (isTokenizerWhitespace(cc))
ADVANCE_TO(BeforeAttributeValueState);
- else if (cc == '"')
+ else if (cc == '"') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
ADVANCE_TO(AttributeValueDoubleQuotedState);
- else if (cc == '&')
+ } else if (cc == '&') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(AttributeValueUnquotedState);
- else if (cc == '\'')
+ } else if (cc == '\'') {
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1);
ADVANCE_TO(AttributeValueSingleQuotedState);
- else if (cc == '>') {
+ } else if (cc == '>') {
parseError();
return emitAndResumeIn(source, DataState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
@@ -947,6 +959,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
} else {
if (cc == '<' || cc == '=' || cc == '`')
parseError();
+ m_token->beginAttributeValue(source.numberOfCharactersConsumed());
m_token->appendToAttributeValue(cc);
ADVANCE_TO(AttributeValueUnquotedState);
}
@@ -954,13 +967,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
BEGIN_STATE(AttributeValueDoubleQuotedState) {
- if (cc == '"')
+ if (cc == '"') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeValueQuotedState);
- else if (cc == '&') {
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '"';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
m_token->appendToAttributeValue(cc);
@@ -970,13 +985,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
BEGIN_STATE(AttributeValueSingleQuotedState) {
- if (cc == '\'')
+ if (cc == '\'') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(AfterAttributeValueQuotedState);
- else if (cc == '&') {
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '\'';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
m_token->appendToAttributeValue(cc);
@@ -986,15 +1003,18 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
BEGIN_STATE(AttributeValueUnquotedState) {
- if (isTokenizerWhitespace(cc))
+ if (isTokenizerWhitespace(cc)) {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
ADVANCE_TO(BeforeAttributeNameState);
- else if (cc == '&') {
+ } else if (cc == '&') {
m_additionalAllowedCharacter = '>';
ADVANCE_TO(CharacterReferenceInAttributeValueState);
- } else if (cc == '>')
+ } else if (cc == '>') {
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
return emitAndResumeIn(source, DataState);
- else if (cc == InputStreamPreprocessor::endOfFileMarker) {
+ } else if (cc == InputStreamPreprocessor::endOfFileMarker) {
parseError();
+ m_token->endAttributeValue(source.numberOfCharactersConsumed());
RECONSUME_IN(DataState);
} else {
if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list