[SCM] morituri/master: * HACKING: More unicode notes. * morituri/common/program.py: * morituri/image/table.py: * morituri/image/toc.py: * morituri/result/result.py: * morituri/rip/cd.py: * morituri/test/test_image_toc.py: Further unicode fixes, for options, CD-Text, paths, ...

js at users.alioth.debian.org js at users.alioth.debian.org
Sun Oct 19 20:09:12 UTC 2014


The following commit has been merged in the master branch:
commit dfa3efe662874969bc07bc395729e4c0af9bbc5e
Author: Thomas Vander Stichele <thomas (at) apestaart (dot) org>
Date:   Fri Sep 11 22:16:46 2009 +0000

    	* HACKING:
    	  More unicode notes.
    	* morituri/common/program.py:
    	* morituri/image/table.py:
    	* morituri/image/toc.py:
    	* morituri/result/result.py:
    	* morituri/rip/cd.py:
    	* morituri/test/test_image_toc.py:
    	  Further unicode fixes, for options, CD-Text, paths, ...

diff --git a/ChangeLog b/ChangeLog
index 8dc94bb..c257737 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2009-09-12  Thomas Vander Stichele  <thomas at apestaart dot org>
+
+	* HACKING:
+	  More unicode notes.
+	* morituri/common/program.py:
+	* morituri/image/table.py:
+	* morituri/image/toc.py:
+	* morituri/result/result.py:
+	* morituri/rip/cd.py:
+	* morituri/test/test_image_toc.py:
+	  Further unicode fixes, for options, CD-Text, paths, ...
+
 2009-09-11  Thomas Vander Stichele  <thomas at apestaart dot org>
 
 	* morituri/program/cdrdao.py:
diff --git a/HACKING b/HACKING
index 4def318..123a08d 100644
--- a/HACKING
+++ b/HACKING
@@ -29,3 +29,9 @@ unicode
   for example, always use %r to log paths
 - run with RIP_DEBUG=5 once in a while to catch unicode/logging errors.
 - Also use unicode prefix/suffix in tempfile.* methods; to force unicode.
+- filesystems on Unix do not have an encoding.  file names are bytes.
+  However, most distros default to a utf-8 interpretation
+- You can either treat paths as byte strings all the way without interpreting
+  (even when writing them to other files), or assume utf-8 on in and out.
+- also direct output to a file; redirection sets codec to ASCII and brings out
+  unicode bugs
diff --git a/morituri/common/program.py b/morituri/common/program.py
index ff5b12c..638c506 100644
--- a/morituri/common/program.py
+++ b/morituri/common/program.py
@@ -213,12 +213,17 @@ class Program(log.Loggable):
         Also works for the disc name, using disc variables for the template.
 
         @param outdir:   the directory where to write the files
-        @type  outdir:   str
+        @type  outdir:   unicode
         @param template: the template for writing the file
-        @type  template: str
+        @type  template: unicode
         @param i:        track number (0 for HTOA)
         @type  i:        int
+
+        @rtype: unicode
         """
+        assert type(outdir) is unicode, "%r is not unicode" % outdir
+        assert type(template) is unicode, "%r is not unicode" % template
+
         # returns without extension
 
         v = {}
diff --git a/morituri/image/table.py b/morituri/image/table.py
index 655fb75..1fc9dfd 100644
--- a/morituri/image/table.py
+++ b/morituri/image/table.py
@@ -60,6 +60,8 @@ class Track:
     @type indexes: dict of number -> L{Index}
     @ivar isrc:    ISRC code (12 alphanumeric characters)
     @type isrc:    str
+    @ivar cdtext:  dictionary of CD Text information; see L{CDTEXT_KEYS}.
+    @type cdtext:  str -> unicode
     """
 
     number = None
diff --git a/morituri/image/toc.py b/morituri/image/toc.py
index c86fdcf..0fff6c1 100644
--- a/morituri/image/toc.py
+++ b/morituri/image/toc.py
@@ -128,16 +128,21 @@ class TocFile(object, log.Loggable):
             if m:
                 key = m.group('key')
                 value = m.group('value')
+                # usually, value is encoded with octal escapes and in latin-1
+                # FIXME: other encodings are possible, does cdrdao handle
+                # them ?
+                value = value.decode('string-escape').decode('latin-1')
                 if key in table.CDTEXT_FIELDS:
                     # FIXME: consider ISRC separate for now, but this
                     # is a limitation of our parser approach
                     if state == 'HEADER':
                         self.table.cdtext[key] = value
-                        self.debug('Found disc CD-Text %s: %s', key, value)
+                        self.debug('Found disc CD-Text %s: %r', key, value)
                     elif state == 'TRACK':
                         if key != 'ISRC' or not currentTrack \
                             or currentTrack.isrc is not None:
-                            self.debug('Found track CD-Text %s: %s', key, value)
+                            self.debug('Found track CD-Text %s: %r',
+                                key, value)
                             currentTrack.cdtext[key] = value
 
             # look for header elements
diff --git a/morituri/result/result.py b/morituri/result/result.py
index db0dd02..b5d3b7a 100644
--- a/morituri/result/result.py
+++ b/morituri/result/result.py
@@ -26,6 +26,7 @@ from morituri.result import logger
 
 class TrackResult:
     """
+    @type filename:          unicode
     @ivar testcrc:           4-byte CRC for the test read
     @type testcrc:           int
     @ivar copycrc:           4-byte CRC for the copy read
diff --git a/morituri/rip/cd.py b/morituri/rip/cd.py
index 238ad5e..71cac1c 100644
--- a/morituri/rip/cd.py
+++ b/morituri/rip/cd.py
@@ -69,6 +69,9 @@ class Rip(logcommand.LogCommand):
                 default, "', '".join(encode.PROFILES.keys())),
             default=default)
 
+    def handleOptions(self, options):
+        options.track_template = options.track_template.decode('utf-8')
+        options.disc_template = options.disc_template.decode('utf-8')
 
     def do(self, args):
         prog = program.Program()
@@ -117,8 +120,8 @@ See  http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1
         if metadatas:
             print 'Matching releases:'
             for metadata in metadatas:
-                print 'Artist  :', metadata.artist
-                print 'Title   :', metadata.title
+                print 'Artist  : %s' % metadata.artist.encode('utf-8')
+                print 'Title   : %s' % metadata.title.encode('utf-8')
 
             # Select one of the returned releases. We just pick the first one.
             prog.metadata = metadatas[0]
@@ -140,7 +143,8 @@ See  http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1
             "full table's AR URL %s differs from toc AR URL %s" % (
             itable.getAccurateRipURL(), ittoc.getAccurateRipURL())
 
-        prog.outdir = self.options.output_directory or os.getcwd()
+        prog.outdir = (self.options.output_directory or os.getcwd())
+        prog.outdir = prog.outdir.decode('utf-8')
         profile = encode.PROFILES[self.options.profile]()
 
         # result
@@ -168,6 +172,8 @@ See  http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1
             path = prog.getPath(prog.outdir, self.options.track_template, 
                 mbdiscid, number) + '.' + profile.extension
             trackResult.number = number
+            
+            assert type(path) is unicode, "%r is not unicode" % path
             trackResult.filename = path
             if number > 0:
                 trackResult.pregap = itable.tracks[number - 1].getPregap()
@@ -175,14 +181,16 @@ See  http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1
             # FIXME: optionally allow overriding reripping
             if os.path.exists(path):
                 print 'Verifying track %d of %d: %s' % (
-                    number, len(itable.tracks), os.path.basename(path))
+                    number, len(itable.tracks),
+                    os.path.basename(path).encode('utf-8'))
                 if not prog.verifyTrack(runner, trackResult):
                     print 'Verification failed, reripping...'
                     os.unlink(path)
 
             if not os.path.exists(path):
                 print 'Ripping track %d of %d: %s' % (
-                    number, len(itable.tracks), os.path.basename(path))
+                    number, len(itable.tracks),
+                    os.path.basename(path).encode('utf-8'))
                 prog.ripTrack(runner, trackResult, 
                     offset=int(self.options.offset),
                     device=self.parentCommand.options.device,
@@ -239,18 +247,18 @@ See  http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1
         if not os.path.exists(dirname):
             os.makedirs(dirname)
 
-        self.debug('writing cue file for %s', discName)
+        self.debug('writing cue file for %r', discName)
         prog.writeCue(discName)
 
         # write .m3u file
-        m3uPath = '%s.m3u' % discName
+        m3uPath = u'%s.m3u' % discName
         handle = open(m3uPath, 'w')
-        handle.write('#EXTM3U\n')
+        handle.write(u'#EXTM3U\n')
         if htoapath:
-            handle.write('#EXTINF:%d,%s\n' % (
+            handle.write(u'#EXTINF:%d,%s\n' % (
                 itable.getTrackStart(1) / common.FRAMES_PER_SECOND,
                     os.path.basename(htoapath[:-4])))
-            handle.write('%s\n' % os.path.basename(htoapath))
+            handle.write(u'%s\n' % os.path.basename(htoapath))
 
         for i, track in enumerate(itable.tracks):
             if not track.audio:
diff --git a/morituri/test/test_image_toc.py b/morituri/test/test_image_toc.py
index 5efadf5..d740089 100644
--- a/morituri/test/test_image_toc.py
+++ b/morituri/test/test_image_toc.py
@@ -208,3 +208,22 @@ class CapitalMergeTestCase(unittest.TestCase):
         self.assertEquals(self.table.getMusicBrainzDiscId(),
             "MAj3xXf6QMy7G.BIFOyHyq4MySE-")
 
+class UnicodeTestCase(unittest.TestCase):
+    def setUp(self):
+        self._performer = u'Jos\xe9 Gonz\xe1lez'
+        self.toc = toc.TocFile(os.path.join(os.path.dirname(__file__),
+            '%s.toc' % self._performer))
+        self.toc.parse()
+        self.assertEquals(len(self.toc.table.tracks), 10)
+
+    def testGetTrackLength(self):
+        t = self.toc.table.tracks[0]
+        # first track has known length because the .toc is a single file
+        self.assertEquals(self.toc.getTrackLength(t), 12001)
+        # last track has unknown length
+        t = self.toc.table.tracks[-1]
+        self.assertEquals(self.toc.getTrackLength(t), -1)
+
+    def testGetTrackPerformer(self):
+        t = self.toc.table.tracks[0]
+        self.assertEquals(t.cdtext['PERFORMER'], self._performer)

-- 
morituri packaging



More information about the pkg-multimedia-commits mailing list