[Pkg-bazaar-commits] ./bzr/unstable r954: - separate out code that just scans the hash cache to find files that are possibly

Martin Pool mbp at sourcefrog.net
Fri Apr 10 08:21:34 UTC 2009


------------------------------------------------------------
revno: 954
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Fri 2005-07-22 15:49:46 -0300
message:
  - separate out code that just scans the hash cache to find files that are possibly
    changed; don't actually re-read them unless the content has changed
modified:
  bzrlib/commands.py
  bzrlib/hashcache.py
  bzrlib/workingtree.py
-------------- next part --------------
=== modified file 'bzrlib/commands.py'
--- a/bzrlib/commands.py	2005-07-21 21:32:13 +0000
+++ b/bzrlib/commands.py	2005-07-22 18:49:46 +0000
@@ -1254,7 +1254,7 @@
 
 
 
-class cmd_update_hashes(Command):
+class cmd_scan_cache(Command):
     hidden = True
     def run(self):
         from bzrlib.hashcache import HashCache
@@ -1262,12 +1262,12 @@
 
         c = HashCache('.')
         c.read()
-        c.refresh_all()
+        c.scan()
             
         print '%6d stats' % c.stat_count
         print '%6d in hashcache' % len(c._cache)
-        print '%6d files gone' % c.gone_count
-        print '%6d hashes updated' % c.miss_count
+        print '%6d files removed from cache' % c.removed_count
+        print '%6d hashes updated' % c.update_count
         print '%6d files changed too recently to cache' % c.danger_count
 
         if c.needs_write:

=== modified file 'bzrlib/hashcache.py'
--- a/bzrlib/hashcache.py	2005-07-22 18:05:47 +0000
+++ b/bzrlib/hashcache.py	2005-07-22 18:49:46 +0000
@@ -92,8 +92,8 @@
         self.miss_count = 0
         self.stat_count = 0
         self.danger_count = 0
-        self.gone_count = 0
         self.removed_count = 0
+        self.update_count = 0
         self._cache = {}
 
 
@@ -112,66 +112,74 @@
             self._cache = {}
 
 
-    def refresh_all(self):
-        prep = [(ce[1][3], path) for (path, ce) in self._cache.iteritems()]
+    def scan(self):
+        """Scan all files and remove entries where the cache entry is obsolete.
+        
+        Obsolete entries are those where the file has been modified or deleted
+        since the entry was inserted.        
+        """
+        prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
         prep.sort()
         
-        for inum, path in prep:
-            # we don't really need to re-hash them; we just need to check 
-            # if they're up to date
-            self.get_sha1(path)
+        for inum, path, cache_entry in prep:
+            abspath = os.sep.join([self.basedir, path])
+            fp = _fingerprint(abspath)
+            self.stat_count += 1
+            
+            cache_fp = cache_entry[1]
+    
+            if (not fp) or (cache_fp != fp):
+                # not here or not a regular file anymore
+                self.removed_count += 1
+                self.needs_write = True
+                del self._cache[path]
+
 
 
     def get_sha1(self, path):
         """Return the sha1 of a file.
         """
         abspath = os.sep.join([self.basedir, path])
-        fp = _fingerprint(abspath)
+        self.stat_count += 1
+        file_fp = _fingerprint(abspath)
+        
+        if not file_fp:
+            # not a regular file or not existing
+            if path in self._cache:
+                self.removed_count += 1
+                self.needs_write = True
+                del self._cache[path]
+            return None        
 
-        c = self._cache.get(path)
-        if c:
-            cache_sha1, cache_fp = c
+        if path in self._cache:
+            cache_sha1, cache_fp = self._cache[path]
         else:
             cache_sha1, cache_fp = None, None
 
-        self.stat_count += 1
-
-        if not fp:
-            # not a regular file
-            if path in self._cache:
-                self.removed_count += 1
-                self.needs_write = True
-                del self._cache[path]
-            return None
-        elif cache_fp and (cache_fp == fp):
+        if cache_fp == file_fp:
             self.hit_count += 1
             return cache_sha1
+        
+        self.miss_count += 1
+        digest = sha_file(file(abspath, 'rb', buffering=65000))
+
+        now = int(time.time())
+        if file_fp[1] >= now or file_fp[2] >= now:
+            # changed too recently; can't be cached.  we can
+            # return the result and it could possibly be cached
+            # next time.
+            self.danger_count += 1 
+            if cache_fp:
+                self.removed_count += 1
+                self.needs_write = True
+                del self._cache[path]
         else:
-            self.miss_count += 1
-            digest = sha_file(file(abspath, 'rb', buffering=65000))
-
-            now = int(time.time())
-            if fp[1] >= now or fp[2] >= now:
-                # changed too recently; can't be cached.  we can
-                # return the result and it could possibly be cached
-                # next time.
-                self.danger_count += 1 
-                if cache_fp:
-                    self.removed_count += 1
-                    self.needs_write = True
-                    del self._cache[path]
-            elif (fp != cache_fp) or (digest != cache_sha1):
-#                 mutter("update entry for %s" % path)
-#                 mutter("  %r" % (fp,))
-#                 mutter("  %r" % (cache_fp,))
-                self.needs_write = True
-                self._cache[path] = (digest, fp)
-            else:
-                # huh?
-                assert 0
-            
-            return digest
-            
+            self.update_count += 1
+            self.needs_write = True
+            self._cache[path] = (digest, file_fp)
+        
+        return digest
+        
 
 
 

=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py	2005-07-08 06:54:58 +0000
+++ b/bzrlib/workingtree.py	2005-07-22 18:49:46 +0000
@@ -43,13 +43,16 @@
         # in the future we might want to do this more selectively
         hc = self._hashcache = HashCache(basedir)
         hc.read()
-        for path, ie in inv.iter_entries():
-            hc.get_sha1(path)
+        hc.scan()
 
         if hc.needs_write:
             mutter("write hc")
             hc.write()
-
+            
+            
+    def __del__(self):
+        if self._hashcache.needs_write:
+            self._hashcache.write()
 
 
     def __iter__(self):
@@ -66,7 +69,7 @@
 
     def __repr__(self):
         return "<%s of %s>" % (self.__class__.__name__,
-                               self.basedir)
+                               getattr(self, 'basedir', None))
 
 
 



More information about the Pkg-bazaar-commits mailing list