[Pkg-bazaar-commits] ./bzr/unstable r866: - use new path-based hashcache for WorkingTree- squash mtime/ctime to whole seconds- update and if necessary write out hashcache when WorkingTree object is created.

Martin Pool mbp at sourcefrog.net
Fri Apr 10 08:21:17 UTC 2009


------------------------------------------------------------
revno: 866
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Fri 2005-07-08 16:54:58 +1000
message:
  - use new path-based hashcache for WorkingTree- squash mtime/ctime to whole seconds- update and if necessary write out hashcache when WorkingTree object is   created.
modified:
  bzrlib/commands.py
  bzrlib/hashcache.py
  bzrlib/selftest/testhashcache.py
  bzrlib/workingtree.py
-------------- next part --------------
=== modified file 'bzrlib/commands.py'
--- a/bzrlib/commands.py	2005-07-08 02:53:13 +0000
+++ b/bzrlib/commands.py	2005-07-08 06:54:58 +0000
@@ -770,7 +770,6 @@
     """List files modified in working tree."""
     hidden = True
     def run(self):
-        from bzrlib.statcache import update_cache, SC_SHA1
         from bzrlib.diff import compare_trees
 
         b = find_branch('.')
@@ -1313,16 +1312,6 @@
         help.help(topic)
 
 
-class cmd_update_stat_cache(Command):
-    """Update stat-cache mapping inodes to SHA-1 hashes.
-
-    For testing only."""
-    hidden = True
-    def run(self):
-        from bzrlib.statcache import update_cache
-        b = find_branch('.')
-        update_cache(b.base, b.read_working_inventory())
-
 
 
 class cmd_plugins(Command):

=== modified file 'bzrlib/hashcache.py'
--- a/bzrlib/hashcache.py	2005-07-08 02:50:46 +0000
+++ b/bzrlib/hashcache.py	2005-07-08 06:54:58 +0000
@@ -28,7 +28,7 @@
 
 
 
-CACHE_HEADER = "### bzr statcache v5\n"
+CACHE_HEADER = "### bzr hashcache v5\n"
 
 
 def _fingerprint(abspath):
@@ -43,8 +43,10 @@
     if stat.S_ISDIR(fs.st_mode):
         return None
 
-    return (fs.st_size, fs.st_mtime,
-            fs.st_ctime, fs.st_ino, fs.st_dev)
+    # we discard any high precision because it's not reliable; perhaps we
+    # could do better on some systems?
+    return (fs.st_size, long(fs.st_mtime),
+            long(fs.st_ctime), fs.st_ino, fs.st_dev)
 
 
 class HashCache(object):
@@ -81,21 +83,32 @@
     miss_count
         number of misses (times files have been completely re-read)
     """
+    needs_write = False
+
     def __init__(self, basedir):
         self.basedir = basedir
         self.hit_count = 0
         self.miss_count = 0
         self.stat_count = 0
         self.danger_count = 0
-
         self._cache = {}
 
 
+
+    def cache_file_name(self):
+        import os.path
+        return os.path.join(self.basedir, '.bzr', 'stat-cache')
+
+
+
+
     def clear(self):
         """Discard all cached information.
 
         This does not reset the counters."""
-        self._cache_sha1 = {}
+        if self._cache:
+            self.needs_write = True
+            self._cache = {}
 
 
     def get_sha1(self, path):
@@ -106,6 +119,7 @@
 
         import os, time
         from bzrlib.osutils import sha_file
+        from bzrlib.trace import mutter
         
         abspath = os.path.join(self.basedir, path)
         fp = _fingerprint(abspath)
@@ -134,19 +148,25 @@
                 # next time.
                 self.danger_count += 1 
                 if cache_fp:
+                    mutter("remove outdated entry for %s" % path)
+                    self.needs_write = True
                     del self._cache[path]
-            else:
+            elif (fp != cache_fp) or (digest != cache_sha1):
+                mutter("update entry for %s" % path)
+                mutter("  %r" % (fp,))
+                mutter("  %r" % (cache_fp,))
+                self.needs_write = True
                 self._cache[path] = (digest, fp)
 
             return digest
 
 
 
-    def write(self, cachefn):
+    def write(self):
         """Write contents of cache to file."""
         from atomicfile import AtomicFile
 
-        outf = AtomicFile(cachefn, 'wb')
+        outf = AtomicFile(self.cache_file_name(), 'wb')
         try:
             print >>outf, CACHE_HEADER,
 
@@ -160,13 +180,14 @@
                 print >>outf
 
             outf.commit()
+            self.needs_write = False
         finally:
             if not outf.closed:
                 outf.abort()
         
 
 
-    def read(self, cachefn):
+    def read(self):
         """Reinstate cache from file.
 
         Overwrites existing cache.
@@ -175,9 +196,16 @@
         the cache."""
         from bzrlib.trace import mutter, warning
 
-        inf = file(cachefn, 'rb')
         self._cache = {}
 
+        fn = self.cache_file_name()
+        try:
+            inf = file(fn, 'rb')
+        except IOError, e:
+            mutter("failed to open %s: %s" % (fn, e))
+            return
+
+
         hdr = inf.readline()
         if hdr != CACHE_HEADER:
             mutter('cache header marker not found at top of %s; discarding cache'
@@ -206,5 +234,8 @@
 
             self._cache[path] = (sha1, fp)
 
+        self.needs_write = False
+           
+
 
         

=== modified file 'bzrlib/selftest/testhashcache.py'
--- a/bzrlib/selftest/testhashcache.py	2005-07-08 02:53:13 +0000
+++ b/bzrlib/selftest/testhashcache.py	2005-07-08 06:54:58 +0000
@@ -39,6 +39,8 @@
         import os
         import time
 
+        # make a dummy bzr directory just to hold the cache
+        os.mkdir('.bzr')
         hc = HashCache('.')
 
         file('foo', 'wb').write('hello')
@@ -99,11 +101,11 @@
 
         # write out, read back in and check that we don't need to
         # re-read any files
-        hc.write('stat-cache')
+        hc.write()
         del hc
 
         hc = HashCache('.')
-        hc.read('stat-cache')
+        hc.read()
 
         self.assertEquals(len(hc._cache), 2)
         self.assertEquals(hc.get_sha1('foo'), sha1('g00dbye'))

=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py	2005-07-06 05:32:32 +0000
+++ b/bzrlib/workingtree.py	2005-07-08 06:54:58 +0000
@@ -14,6 +14,7 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+# TODO: Don't allow WorkingTrees to be constructed for remote branches.
 
 import os
     
@@ -30,13 +31,26 @@
     It is possible for a `WorkingTree` to have a filename which is
     not listed in the Inventory and vice versa.
     """
-    _statcache = None
-    
     def __init__(self, basedir, inv):
+        from bzrlib.hashcache import HashCache
+        from bzrlib.trace import note, mutter
+
         self._inventory = inv
         self.basedir = basedir
         self.path2id = inv.path2id
-        self._update_statcache()
+
+        # update the whole cache up front and write to disk if anything changed;
+        # in the future we might want to do this more selectively
+        hc = self._hashcache = HashCache(basedir)
+        hc.read()
+        for path, ie in inv.iter_entries():
+            hc.get_sha1(path)
+
+        if hc.needs_write:
+            mutter("write hc")
+            hc.write()
+
+
 
     def __iter__(self):
         """Iterate through file_ids for this tree.
@@ -45,22 +59,17 @@
         and the working file exists.
         """
         inv = self._inventory
-        for file_id in self._inventory:
-            # TODO: This is slightly redundant; we should be able to just
-            # check the statcache but it only includes regular files.
-            # only include files which still exist on disk
-            ie = inv[file_id]
-            if ie.kind == 'file':
-                if ((file_id in self._statcache)
-                    or (os.path.exists(self.abspath(inv.id2path(file_id))))):
-                    yield file_id
-
+        for path, ie in inv.iter_entries():
+            if os.path.exists(self.abspath(path)):
+                yield ie.file_id
 
 
     def __repr__(self):
         return "<%s of %s>" % (self.__class__.__name__,
                                self.basedir)
 
+
+
     def abspath(self, filename):
         return os.path.join(self.basedir, filename)
 
@@ -80,29 +89,24 @@
                 
     def has_id(self, file_id):
         # files that have been deleted are excluded
-        if not self.inventory.has_id(file_id):
+        inv = self._inventory
+        if not inv.has_id(file_id):
             return False
-        if file_id in self._statcache:
-            return True
-        return os.path.exists(self.abspath(self.id2path(file_id)))
+        path = inv.id2path(file_id)
+        return os.path.exists(self.abspath(path))
 
 
     __contains__ = has_id
     
 
-    def _update_statcache(self):
-        if not self._statcache:
-            from bzrlib.statcache import update_cache
-            self._statcache = update_cache(self.basedir, self.inventory)
-
     def get_file_size(self, file_id):
-        import os, stat
-        return os.stat(self._get_store_filename(file_id))[stat.ST_SIZE]
+        # is this still called?
+        raise NotImplementedError()
 
 
     def get_file_sha1(self, file_id):
-        from bzrlib.statcache import SC_SHA1
-        return self._statcache[file_id][SC_SHA1]
+        path = self._inventory.id2path(file_id)
+        return self._hashcache.get_sha1(path)
 
 
     def file_class(self, filename):
@@ -127,7 +131,7 @@
         from osutils import appendpath, file_kind
         import os
 
-        inv = self.inventory
+        inv = self._inventory
 
         def descend(from_dir_relpath, from_dir_id, dp):
             ls = os.listdir(dp)



More information about the Pkg-bazaar-commits mailing list