[Pkg-owncloud-commits] [php-sabre-vobject] 29/46: Automatically decode from ISO-8859-1

David Prévot taffit at moszumanska.debian.org
Thu Dec 10 02:12:40 UTC 2015


This is an automated email from the git hooks/post-receive script.

taffit pushed a commit to branch master
in repository php-sabre-vobject.

commit 9d4558bf28e6abb36fe15cba36d378ccde19b311
Author: Evert Pot <me at evertpot.com>
Date:   Thu Nov 26 00:34:05 2015 -0500

    Automatically decode from ISO-8859-1
---
 lib/Parser/MimeDir.php               | 51 ++++++++++++++++++++++++++++++++++++
 tests/VObject/Parser/MimeDirTest.php | 49 ++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/lib/Parser/MimeDir.php b/lib/Parser/MimeDir.php
index ceb96bd..e2cd57d 100644
--- a/lib/Parser/MimeDir.php
+++ b/lib/Parser/MimeDir.php
@@ -38,6 +38,20 @@ class MimeDir extends Parser {
     protected $root;
 
     /**
+     * By default all input will be assumed to be UTF-8.
+     *
+     * However, both iCalendar and vCard might be encoded using different
+     * character sets. The character set is usually set in the mime-type.
+     *
+     * If this is the case, use setEncoding to specify that a different
+     * encoding will be used. If this is set, the parser will automatically
+     * convert all incoming data to UTF-8.
+     *
+     * @var string
+     */
+    protected $charset = 'UTF-8';
+
+    /**
      * Parses an iCalendar or vCard file.
      *
      * Pass a stream or a string. If null is parsed, the existing buffer is
@@ -67,6 +81,29 @@ class MimeDir extends Parser {
     }
 
     /**
+     * By default all input will be assumed to be UTF-8.
+     *
+     * However, both iCalendar and vCard might be encoded using different
+     * character sets. The character set is usually set in the mime-type.
+     *
+     * If this is the case, use setEncoding to specify that a different
+     * encoding will be used. If this is set, the parser will automatically
+     * convert all incoming data to UTF-8.
+     *
+     * @param string $charset
+     */
+    function setCharset($charset) {
+
+        $supportedEncodings = ['UTF-8', 'ISO-8859-1'];
+
+        if (!in_array($charset, $supportedEncodings)) {
+            throw new \InvalidArgumentException('Unsupported encoding. (Supported encodings: ' . implode(', ', $supportedEncodings) . ')');
+        }
+        $this->charset = $charset;
+
+    }
+
+    /**
      * Sets the input buffer. Must be a string or stream.
      *
      * @param resource|string $input
@@ -412,6 +449,20 @@ class MimeDir extends Parser {
         if (strtoupper($propObj['ENCODING']) === 'QUOTED-PRINTABLE') {
             $propObj->setQuotedPrintableValue($this->extractQuotedPrintableValue());
         } else {
+            $charset = $this->charset;
+            if (isset($propObj['CHARSET'])) {
+                // vCard 2.1 allows the character set to be specified per property.
+                $charset = (string)$propObj['CHARSET'];
+            }
+            switch($charset) {
+                case 'UTF-8' :
+                    break;
+                case 'ISO-8859-1' :
+                    $property['value'] = utf8_encode($property['value']);
+                    break;
+                default :
+                    throw new ParseException('Unsupported CHARSET: ' . $propObj['CHARSET']);
+            }
             $propObj->setRawMimeDirValue($property['value']);
         }
 
diff --git a/tests/VObject/Parser/MimeDirTest.php b/tests/VObject/Parser/MimeDirTest.php
index 60bd86a..bf7f405 100644
--- a/tests/VObject/Parser/MimeDirTest.php
+++ b/tests/VObject/Parser/MimeDirTest.php
@@ -18,4 +18,53 @@ class MimeDirTest extends \PHPUnit_Framework_TestCase {
 
     }
 
+    function testDecodeLatin1() {
+
+        $vcard = <<<VCF
+BEGIN:VCARD
+VERSION:3.0
+FN:umlaut u - \xFC
+END:VCARD\n
+VCF;
+
+        $mimeDir = new Mimedir();
+        $mimeDir->setCharSet('ISO-8859-1');
+        $vcard = $mimeDir->parse($vcard);
+        $this->assertEquals("umlaut u - \xC3\xBC", $vcard->FN->getValue());
+
+    }
+
+    function testDecodeInlineLatin1() {
+
+        $vcard = <<<VCF
+BEGIN:VCARD
+VERSION:2.1
+FN;CHARSET=ISO-8859-1:umlaut u - \xFC
+END:VCARD\n
+VCF;
+
+        $mimeDir = new Mimedir();
+        $vcard = $mimeDir->parse($vcard);
+        $this->assertEquals("umlaut u - \xC3\xBC", $vcard->FN->getValue());
+
+    }
+
+    function testDontDecodeLatin1() {
+
+        $vcard = <<<VCF
+BEGIN:VCARD
+VERSION:4.0
+FN:umlaut u - \xFC
+END:VCARD\n
+VCF;
+
+        $mimeDir = new Mimedir();
+        $vcard = $mimeDir->parse($vcard);
+        // This basically tests that we don't touch the input string if
+        // the encoding was set to UTF-8. The result is actually invalid
+        // and the validator should report this, but it tests effectively
+        // that we pass through the string byte-by-byte.
+        $this->assertEquals("umlaut u - \xFC", $vcard->FN->getValue());
+
+    }
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-owncloud/php-sabre-vobject.git



More information about the Pkg-owncloud-commits mailing list