[Pkg-bazaar-commits] ./bzr/unstable r509: clean up stat cache code:

Fri Apr 10 08:19:33 UTC 2009

------------------------------------------------------------
revno: 509
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Mon 2005-05-16 14:32:21 +1000
message:
  clean up stat cache code:
  - smarter UTF-8 and quopri encoding of file names
  - check paths are not duplicated in cache
  - check lines are well-formed
  - more docs
modified:
  bzrlib/statcache.py
-------------- next part --------------
=== modified file 'bzrlib/statcache.py'

--- a/bzrlib/statcache.py	2005-05-12 00:54:18 +0000
+++ b/bzrlib/statcache.py	2005-05-16 04:32:21 +0000
@@ -18,7 +18,7 @@
 from binascii import b2a_qp, a2b_qp
 
 from trace import mutter
-from errors import BzrError
+from errors import BzrError, BzrCheckError
 
 
 """File stat cache to speed up tree comparisons.
@@ -27,6 +27,15 @@
 information of a file in the working directory, without actually
 reading and hashing the whole file.
 
+
+
+Implementation
+==============
+
+Users of this module should not need to know about how this is
+implemented, and in particular should not depend on the particular
+data which is stored or its format.
+
 This is done by maintaining a cache indexed by a file fingerprint of
 (path, size, mtime, ctime, ino, dev) pointing to the SHA-1.  If the
 fingerprint has changed, we assume the file content has not changed
@@ -54,18 +63,29 @@
 
 The cache is represented as a map from file_id to a tuple of (file_id,
 sha1, path, size, mtime, ctime, ino, dev).
+
+The SHA-1 is stored in memory as a hexdigest.
+
+File names are written out as the quoted-printable encoding of their
+UTF-8 representation.
 """
 
-
+# order of fields returned by fingerprint()
 FP_SIZE  = 0
 FP_MTIME = 1
 FP_CTIME = 2
 FP_INO   = 3
 FP_DEV   = 4
 
-
+# order of fields in the statcache file and in the in-memory map
 SC_FILE_ID = 0
-SC_SHA1    = 1 
+SC_SHA1    = 1
+SC_PATH    = 2
+SC_SIZE    = 3
+SC_MTIME   = 4
+SC_CTIME   = 5
+SC_INO     = 6
+SC_DEV     = 7
 
 
 def fingerprint(abspath):
@@ -86,14 +106,22 @@
     from atomicfile import AtomicFile
 
     cachefn = os.path.join(basedir, '.bzr', 'stat-cache')
-    outf = AtomicFile(cachefn, 'wb', 'utf-8')
+    outf = AtomicFile(cachefn, 'wb')
     try:
         for entry in entry_iter:
-            if entry[0] in dangerfiles:
-                continue
-            outf.write(entry[0] + ' ' + entry[1] + ' ')
-            outf.write(b2a_qp(entry[2], True))
-            outf.write(' %d %d %d %d %d\n' % entry[3:])
+            if len(entry) != 8:
+                raise ValueError("invalid statcache entry tuple %r" % entry)
+            
+            if entry[SC_FILE_ID] in dangerfiles:
+                continue                # changed too recently
+            outf.write(entry[0])        # file id
+            outf.write(' ')
+            outf.write(entry[1])        # hex sha1
+            outf.write(' ')
+            outf.write(b2a_qp(entry[2].encode('utf-8'), True)) # name
+            for nf in entry[3:]:
+                outf.write(' %d' % nf)
+            outf.write('\n')
 
         outf.commit()
     finally:
@@ -102,22 +130,33 @@
         
         
 def load_cache(basedir):
-    import codecs
-    
+    from sets import Set
     cache = {}
+    seen_paths = Set()
 
     try:
         cachefn = os.path.join(basedir, '.bzr', 'stat-cache')
-        cachefile = codecs.open(cachefn, 'r', 'utf-8')
+        cachefile = open(cachefn, 'r')
     except IOError:
         return cache
     
     for l in cachefile:
         f = l.split(' ')
+
         file_id = f[0]
         if file_id in cache:
             raise BzrError("duplicated file_id in cache: {%s}" % file_id)
-        cache[file_id] = (f[0], f[1], a2b_qp(f[2])) + tuple([long(x) for x in f[3:]])
+        
+        path = a2b_qp(f[2]).decode('utf-8')
+        if path in seen_paths:
+            raise BzrCheckError("duplicated path in cache: %r" % path)
+        seen_paths.add(path)
+        
+        entry = (file_id, f[1], path) + tuple([long(x) for x in f[3:]])
+        if len(entry) != 8:
+            raise ValueError("invalid statcache entry tuple %r" % entry)
+
+        cache[file_id] = entry
     return cache