[Pkg-bazaar-commits] ./bzr/unstable r954: - separate out code that just scans the hash cache to find files that are possibly
Martin Pool
mbp at sourcefrog.net
Fri Apr 10 08:21:34 UTC 2009
------------------------------------------------------------
revno: 954
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Fri 2005-07-22 15:49:46 -0300
message:
- separate out code that just scans the hash cache to find files that are possibly
changed; don't actually re-read them unless the content has changed
modified:
bzrlib/commands.py
bzrlib/hashcache.py
bzrlib/workingtree.py
-------------- next part --------------
=== modified file 'bzrlib/commands.py'
--- a/bzrlib/commands.py 2005-07-21 21:32:13 +0000
+++ b/bzrlib/commands.py 2005-07-22 18:49:46 +0000
@@ -1254,7 +1254,7 @@
-class cmd_update_hashes(Command):
+class cmd_scan_cache(Command):
hidden = True
def run(self):
from bzrlib.hashcache import HashCache
@@ -1262,12 +1262,12 @@
c = HashCache('.')
c.read()
- c.refresh_all()
+ c.scan()
print '%6d stats' % c.stat_count
print '%6d in hashcache' % len(c._cache)
- print '%6d files gone' % c.gone_count
- print '%6d hashes updated' % c.miss_count
+ print '%6d files removed from cache' % c.removed_count
+ print '%6d hashes updated' % c.update_count
print '%6d files changed too recently to cache' % c.danger_count
if c.needs_write:
=== modified file 'bzrlib/hashcache.py'
--- a/bzrlib/hashcache.py 2005-07-22 18:05:47 +0000
+++ b/bzrlib/hashcache.py 2005-07-22 18:49:46 +0000
@@ -92,8 +92,8 @@
self.miss_count = 0
self.stat_count = 0
self.danger_count = 0
- self.gone_count = 0
self.removed_count = 0
+ self.update_count = 0
self._cache = {}
@@ -112,66 +112,74 @@
self._cache = {}
- def refresh_all(self):
- prep = [(ce[1][3], path) for (path, ce) in self._cache.iteritems()]
+ def scan(self):
+ """Scan all files and remove entries where the cache entry is obsolete.
+
+ Obsolete entries are those where the file has been modified or deleted
+ since the entry was inserted.
+ """
+ prep = [(ce[1][3], path, ce) for (path, ce) in self._cache.iteritems()]
prep.sort()
- for inum, path in prep:
- # we don't really need to re-hash them; we just need to check
- # if they're up to date
- self.get_sha1(path)
+ for inum, path, cache_entry in prep:
+ abspath = os.sep.join([self.basedir, path])
+ fp = _fingerprint(abspath)
+ self.stat_count += 1
+
+ cache_fp = cache_entry[1]
+
+ if (not fp) or (cache_fp != fp):
+ # not here or not a regular file anymore
+ self.removed_count += 1
+ self.needs_write = True
+ del self._cache[path]
+
def get_sha1(self, path):
"""Return the sha1 of a file.
"""
abspath = os.sep.join([self.basedir, path])
- fp = _fingerprint(abspath)
+ self.stat_count += 1
+ file_fp = _fingerprint(abspath)
+
+ if not file_fp:
+ # not a regular file or not existing
+ if path in self._cache:
+ self.removed_count += 1
+ self.needs_write = True
+ del self._cache[path]
+ return None
- c = self._cache.get(path)
- if c:
- cache_sha1, cache_fp = c
+ if path in self._cache:
+ cache_sha1, cache_fp = self._cache[path]
else:
cache_sha1, cache_fp = None, None
- self.stat_count += 1
-
- if not fp:
- # not a regular file
- if path in self._cache:
- self.removed_count += 1
- self.needs_write = True
- del self._cache[path]
- return None
- elif cache_fp and (cache_fp == fp):
+ if cache_fp == file_fp:
self.hit_count += 1
return cache_sha1
+
+ self.miss_count += 1
+ digest = sha_file(file(abspath, 'rb', buffering=65000))
+
+ now = int(time.time())
+ if file_fp[1] >= now or file_fp[2] >= now:
+ # changed too recently; can't be cached. we can
+ # return the result and it could possibly be cached
+ # next time.
+ self.danger_count += 1
+ if cache_fp:
+ self.removed_count += 1
+ self.needs_write = True
+ del self._cache[path]
else:
- self.miss_count += 1
- digest = sha_file(file(abspath, 'rb', buffering=65000))
-
- now = int(time.time())
- if fp[1] >= now or fp[2] >= now:
- # changed too recently; can't be cached. we can
- # return the result and it could possibly be cached
- # next time.
- self.danger_count += 1
- if cache_fp:
- self.removed_count += 1
- self.needs_write = True
- del self._cache[path]
- elif (fp != cache_fp) or (digest != cache_sha1):
-# mutter("update entry for %s" % path)
-# mutter(" %r" % (fp,))
-# mutter(" %r" % (cache_fp,))
- self.needs_write = True
- self._cache[path] = (digest, fp)
- else:
- # huh?
- assert 0
-
- return digest
-
+ self.update_count += 1
+ self.needs_write = True
+ self._cache[path] = (digest, file_fp)
+
+ return digest
+
=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py 2005-07-08 06:54:58 +0000
+++ b/bzrlib/workingtree.py 2005-07-22 18:49:46 +0000
@@ -43,13 +43,16 @@
# in the future we might want to do this more selectively
hc = self._hashcache = HashCache(basedir)
hc.read()
- for path, ie in inv.iter_entries():
- hc.get_sha1(path)
+ hc.scan()
if hc.needs_write:
mutter("write hc")
hc.write()
-
+
+
+ def __del__(self):
+ if self._hashcache.needs_write:
+ self._hashcache.write()
def __iter__(self):
@@ -66,7 +69,7 @@
def __repr__(self):
return "<%s of %s>" % (self.__class__.__name__,
- self.basedir)
+ getattr(self, 'basedir', None))
More information about the Pkg-bazaar-commits
mailing list