[Pkg-bazaar-commits] ./bzr/unstable r548: - Write statcache using \u style encoding to avoid
Martin Pool
mbp at sourcefrog.net
Fri Apr 10 08:19:00 UTC 2009
------------------------------------------------------------
revno: 548
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Wed 2005-05-25 09:14:28 +1200
message:
- Write statcache using \u style encoding to avoid
problems with quopri encoding causing line-wrapping etc.
modified:
TODO
bzrlib/statcache.py
-------------- next part --------------
=== modified file 'TODO'
--- a/TODO 2005-05-17 07:14:46 +0000
+++ b/TODO 2005-05-24 21:14:28 +0000
@@ -262,6 +262,9 @@
* Function that finds all changes to files under a given directory;
perhaps log should use this if a directory is given.
+* XML attributes might have trouble with filenames containing \n and
+ \r. Do we really want to support this? I think perhaps not.
+
Large things
------------
=== modified file 'bzrlib/statcache.py'
--- a/bzrlib/statcache.py 2005-05-19 06:37:21 +0000
+++ b/bzrlib/statcache.py 2005-05-24 21:14:28 +0000
@@ -15,7 +15,6 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import stat, os, sha, time
-from binascii import b2a_qp, a2b_qp
from trace import mutter
from errors import BzrError, BzrCheckError
@@ -66,9 +65,9 @@
The SHA-1 is stored in memory as a hexdigest.
-File names and file-ids are written out as the quoted-printable
-encoding of their UTF-8 representation. (file-ids shouldn't contain
-wierd characters, but it might happen.)
+File names and file-ids are written out with non-ascii or whitespace
+characters given as python-style unicode escapes. (file-ids shouldn't
+contain wierd characters, but it might happen.)
"""
# order of fields returned by fingerprint()
@@ -90,7 +89,7 @@
-CACHE_HEADER = "### bzr statcache v2"
+CACHE_HEADER = "### bzr statcache v3"
def fingerprint(abspath):
@@ -107,6 +106,14 @@
fs.st_ctime, fs.st_ino, fs.st_dev)
+
+def safe_quote(s):
+ return s.encode('unicode_escape') \
+ .replace('\n', '\\u000a') \
+ .replace(' ', '\\u0020') \
+ .replace('\r', '\\u000d')
+
+
def _write_cache(basedir, entry_iter, dangerfiles):
from atomicfile import AtomicFile
@@ -120,11 +127,11 @@
if entry[SC_FILE_ID] in dangerfiles:
continue # changed too recently
- outf.write(b2a_qp(entry[0].encode('utf-8'))) # file id
- outf.write(' ')
- outf.write(entry[1]) # hex sha1
- outf.write(' ')
- outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name
+ outf.write(safe_quote(entry[0])) # file id
+ outf.write(' ')
+ outf.write(entry[1]) # hex sha1
+ outf.write(' ')
+ outf.write(safe_quote(entry[2])) # name
for nf in entry[3:]:
outf.write(' %d' % nf)
outf.write('\n')
@@ -156,7 +163,7 @@
for l in cachefile:
f = l.split(' ')
- file_id = a2b_qp(f[0]).decode('utf-8')
+ file_id = f[0].decode('unicode_escape')
if file_id in cache:
raise BzrCheckError("duplicated file_id in cache: {%s}" % file_id)
@@ -164,7 +171,7 @@
if len(text_sha) != 40 or not sha_re.match(text_sha):
raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)
- path = a2b_qp(f[2]).decode('utf-8')
+ path = f[2].decode('unicode_escape')
if path in seen_paths:
raise BzrCheckError("duplicated path in cache: %r" % path)
seen_paths[path] = True
More information about the Pkg-bazaar-commits
mailing list