[SCM] WebKit Debian packaging branch, debian/unstable, updated. debian/1.1.15-1-40151-g37bb677
darin
darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc
Sat Sep 26 07:41:23 UTC 2009
The following commit has been merged in the debian/unstable branch:
commit 3ec8571a7700b24ef8154719b186d9065655c176
Author: darin <darin at 268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Date: Fri May 9 17:17:14 2003 +0000
Reviewed by John.
- fixed 3191943 -- XML parsing error occurs when loading shift-JIS Japanese XML file
* khtml/misc/decoder.h: Add a new type of encoding, EncodingFromXMLHeader.
* khtml/misc/decoder.cpp:
(findXMLEncoding): Added.
(Decoder::decode): Call findXMLEncoding to extract the encoding from the XML header.
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@4331 268f45cc-cd09-0410-ab3c-d52691b4dbfc
diff --git a/WebCore/ChangeLog-2003-10-25 b/WebCore/ChangeLog-2003-10-25
index b6ac01f..9282c14 100644
--- a/WebCore/ChangeLog-2003-10-25
+++ b/WebCore/ChangeLog-2003-10-25
@@ -1,3 +1,14 @@
+2003-05-09 Darin Adler <darin at apple.com>
+
+ Reviewed by John.
+
+ - fixed 3191943 -- XML parsing error occurs when loading shift-JIS Japanese XML file
+
+ * khtml/misc/decoder.h: Add a new type of encoding, EncodingFromXMLHeader.
+ * khtml/misc/decoder.cpp:
+ (findXMLEncoding): Added.
+ (Decoder::decode): Call findXMLEncoding to extract the encoding from the XML header.
+
2003-05-08 Darin Adler <darin at apple.com>
Reviewed by John and Ken.
diff --git a/WebCore/ChangeLog-2005-08-23 b/WebCore/ChangeLog-2005-08-23
index b6ac01f..9282c14 100644
--- a/WebCore/ChangeLog-2005-08-23
+++ b/WebCore/ChangeLog-2005-08-23
@@ -1,3 +1,14 @@
+2003-05-09 Darin Adler <darin at apple.com>
+
+ Reviewed by John.
+
+ - fixed 3191943 -- XML parsing error occurs when loading shift-JIS Japanese XML file
+
+ * khtml/misc/decoder.h: Add a new type of encoding, EncodingFromXMLHeader.
+ * khtml/misc/decoder.cpp:
+ (findXMLEncoding): Added.
+ (Decoder::decode): Call findXMLEncoding to extract the encoding from the XML header.
+
2003-05-08 Darin Adler <darin at apple.com>
Reviewed by John and Ken.
diff --git a/WebCore/khtml/misc/decoder.cpp b/WebCore/khtml/misc/decoder.cpp
index 100e587..9c89817 100644
--- a/WebCore/khtml/misc/decoder.cpp
+++ b/WebCore/khtml/misc/decoder.cpp
@@ -364,6 +364,47 @@ static void skipComment(const char *&ptr, const char *pEnd)
ptr = p;
}
+// Returns the position of the encoding string.
+static int findXMLEncoding(const QCString &str, int &encodingLength)
+{
+ int len = str.length();
+
+ int pos = str.find("encoding");
+ if (pos == -1)
+ return -1;
+ pos += 8;
+
+ // Skip spaces and stray control characters.
+ while (str[pos] <= ' ' && pos != len)
+ ++pos;
+
+ // Skip equals sign.
+ if (str[pos] != '=')
+ return -1;
+ ++pos;
+
+ // Skip spaces and stray control characters.
+ while (str[pos] <= ' ' && pos != len)
+ ++pos;
+
+ // Skip quotation mark.
+ char quoteMark = str[pos];
+ if (quoteMark != '"' && quoteMark != '\'')
+ return -1;
+ ++pos;
+
+ // Find the trailing quotation mark.
+ int end = pos;
+ while (str[end] != quoteMark)
+ ++end;
+
+ if (end == len)
+ return -1;
+
+ encodingLength = end - pos;
+ return pos;
+}
+
QString Decoder::decode(const char *data, int len)
{
// Check for UTF-16 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.
@@ -445,11 +486,30 @@ QString Decoder::decode(const char *data, int len)
if(*ptr == '<') {
bool end = false;
ptr++;
+
+ // Handle comments.
if (ptr[0] == '!' && ptr[1] == '-' && ptr[2] == '-') {
ptr += 3;
skipComment(ptr, pEnd);
continue;
}
+
+ // Handle XML header, which can have encoding in it.
+ if (ptr[0] == '?' && ptr[1] == 'x' && ptr[2] == 'm' && ptr[3] == 'l') {
+ const char *end = ptr;
+ while (*end != '>' && *end != '\0') end++;
+ if (*end == '\0')
+ break;
+ QCString str(ptr, end - ptr);
+ int len;
+ int pos = findXMLEncoding(str, len);
+ if (pos != -1) {
+ setEncoding(str.mid(pos, len), EncodingFromXMLHeader);
+ if (m_type == EncodingFromXMLHeader)
+ goto found;
+ }
+ }
+
if(*ptr == '/') ptr++, end=true;
char tmp[20];
int len = 0;
@@ -498,11 +558,10 @@ QString Decoder::decode(const char *data, int len)
(str[endpos] != ' ' && str[endpos] != '"' && str[endpos] != '\''
&& str[endpos] != ';' && str[endpos] != '>') )
endpos++;
- enc = str.mid(pos, endpos-pos);
#ifdef DECODE_DEBUG
- kdDebug( 6005 ) << "Decoder: found charset: " << enc.data() << endl;
+ kdDebug( 6005 ) << "Decoder: found charset: " << str.mid(pos, endpos-pos) << endl;
#endif
- setEncoding(enc, EncodingFromMetaTag);
+ setEncoding(str.mid(pos, endpos-pos), EncodingFromMetaTag);
if( m_type == EncodingFromMetaTag ) goto found;
if ( endpos >= str.length() || str[endpos] == '/' || str[endpos] == '>' ) break;
diff --git a/WebCore/khtml/misc/decoder.h b/WebCore/khtml/misc/decoder.h
index 96f8833..4672544 100644
--- a/WebCore/khtml/misc/decoder.h
+++ b/WebCore/khtml/misc/decoder.h
@@ -36,6 +36,7 @@ public:
enum EncodingType {
DefaultEncoding,
AutoDetectedEncoding,
+ EncodingFromXMLHeader,
EncodingFromMetaTag,
EncodingFromHTTPHeader,
UserChosenEncoding
@@ -58,11 +59,13 @@ public:
protected:
// codec used for decoding. default is Latin1.
QTextCodec *m_codec;
- QTextDecoder *m_decoder; // only used for utf16
+ QTextDecoder *m_decoder;
QCString enc;
EncodingType m_type;
#if APPLE_CHANGES
+ // Our version of QString works well for all-8-bit characters, and allows null characters.
+ // This works better than QCString when there are null characters involved.
QString buffer;
#else
QCString buffer;
@@ -73,5 +76,6 @@ protected:
bool visualRTL;
};
-};
+}
+
#endif
--
WebKit Debian packaging
More information about the Pkg-webkit-commits
mailing list