[Pkg-bazaar-commits] ./bzr/unstable r436: - Avoid dangerous files when writing out stat cache

Martin Pool mbp at sourcefrog.net
Fri Apr 10 08:19:58 UTC 2009


------------------------------------------------------------
revno: 436
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Tue 2005-05-10 16:34:41 +1000
message:
  - Avoid dangerous files when writing out stat cache
  - remove build_cache in favour of just update_cache with parameter
    to flush
modified:
  bzrlib/commands.py
  bzrlib/statcache.py
-------------- next part --------------
=== modified file 'bzrlib/commands.py'
--- a/bzrlib/commands.py	2005-05-10 06:07:16 +0000
+++ b/bzrlib/commands.py	2005-05-10 06:34:41 +0000
@@ -806,8 +806,7 @@
     def run(self):
         import statcache
         b = Branch('.')
-        inv = b.read_working_inventory()
-        statcache.update_cache(b, inv)
+        statcache.update_cache(b)
 
 
 ######################################################################

=== modified file 'bzrlib/statcache.py'
--- a/bzrlib/statcache.py	2005-05-10 06:21:52 +0000
+++ b/bzrlib/statcache.py	2005-05-10 06:34:41 +0000
@@ -57,6 +57,12 @@
 """
 
 
+FP_SIZE  = 0
+FP_MTIME = 1
+FP_CTIME = 2
+FP_INO   = 3
+FP_DEV   = 4
+
 
 def fingerprint(path, abspath):
     try:
@@ -72,12 +78,14 @@
             fs.st_ctime, fs.st_ino, fs.st_dev)
 
 
-def _write_cache(branch, entry_iter):
+def _write_cache(branch, entry_iter, dangerfiles):
     from atomicfile import AtomicFile
     
     outf = AtomicFile(branch.controlfilename('stat-cache'), 'wb', 'utf-8')
     try:
         for entry in entry_iter:
+            if entry[0] in dangerfiles:
+                continue
             outf.write(entry[0] + ' ' + entry[1] + ' ')
             outf.write(b2a_qp(entry[2], True))
             outf.write(' %d %d %d %d %d\n' % entry[3:])
@@ -114,30 +122,48 @@
         yield ie.file_id, path
     
 
-def build_cache(branch):
-    inv = branch.read_working_inventory()
-
-    cache = {}
-    _update_cache_from_list(branch, cache, _files_from_inventory(inv))
+
+def update_cache(branch, flush=False):
+    """Update and return the cache for the branch.
+
+    The returned cache may contain entries that have not been written
+    to disk for files recently touched.
+
+    flush -- discard any previous cache and recalculate from scratch.
+    """
+
     
-
-
-def update_cache(branch, inv):
     # TODO: It's supposed to be faster to stat the files in order by inum.
     # We don't directly know the inum of the files of course but we do
     # know where they were last sighted, so we can sort by that.
 
-    cache = load_cache(branch)
+    if flush:
+        cache = {}
+    else:
+        cache = load_cache(branch)
+    inv = branch.read_working_inventory()
     return _update_cache_from_list(branch, cache, _files_from_inventory(inv))
 
 
 
 def _update_cache_from_list(branch, cache, to_update):
-    """Update the cache to have info on the named files.
-
-    to_update is a sequence of (file_id, path) pairs.
+    """Update and return the cache for given files.
+
+    cache -- Previously cached values to be validated.
+
+    to_update -- Sequence of (file_id, path) pairs to check.
     """
+
+    from sets import Set
+
     hardcheck = dirty = 0
+
+    # files that have been recently touched and can't be
+    # committed to a persistent cache yet.
+    
+    dangerfiles = Set()
+    now = int(time.time())
+    
     for file_id, path in to_update:
         fap = branch.abspath(path)
         fp = fingerprint(fap, path)
@@ -149,6 +175,9 @@
                 dirty += 1
             continue
 
+        if (fp[FP_MTIME] >= now) or (fp[FP_CTIME] >= now):
+            dangerfiles.add(file_id)
+
         if cacheentry and (cacheentry[3:] == fp):
             continue                    # all stat fields unchanged
 
@@ -163,10 +192,12 @@
             cache[file_id] = cacheentry
             dirty += 1
 
-    mutter('statcache: read %d files, %d changed, %d in cache'
-           % (hardcheck, dirty, len(cache)))
+    mutter('statcache: read %d files, %d changed, %d dangerous, '
+           '%d in cache'
+           % (hardcheck, dirty, len(dangerfiles), len(cache)))
         
     if dirty:
-        _write_cache(branch, cache.itervalues())
+        mutter('updating on-disk statcache')
+        _write_cache(branch, cache.itervalues(), dangerfiles)
 
     return cache



More information about the Pkg-bazaar-commits mailing list