[Pkg-bazaar-commits] ./bzr/unstable r866: - use new path-based hashcache for WorkingTree- squash mtime/ctime to whole seconds- update and if necessary write out hashcache when WorkingTree object is created.
Martin Pool
mbp at sourcefrog.net
Fri Apr 10 08:21:17 UTC 2009
------------------------------------------------------------
revno: 866
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Fri 2005-07-08 16:54:58 +1000
message:
- use new path-based hashcache for WorkingTree- squash mtime/ctime to whole seconds- update and if necessary write out hashcache when WorkingTree object is created.
modified:
bzrlib/commands.py
bzrlib/hashcache.py
bzrlib/selftest/testhashcache.py
bzrlib/workingtree.py
-------------- next part --------------
=== modified file 'bzrlib/commands.py'
--- a/bzrlib/commands.py 2005-07-08 02:53:13 +0000
+++ b/bzrlib/commands.py 2005-07-08 06:54:58 +0000
@@ -770,7 +770,6 @@
"""List files modified in working tree."""
hidden = True
def run(self):
- from bzrlib.statcache import update_cache, SC_SHA1
from bzrlib.diff import compare_trees
b = find_branch('.')
@@ -1313,16 +1312,6 @@
help.help(topic)
-class cmd_update_stat_cache(Command):
- """Update stat-cache mapping inodes to SHA-1 hashes.
-
- For testing only."""
- hidden = True
- def run(self):
- from bzrlib.statcache import update_cache
- b = find_branch('.')
- update_cache(b.base, b.read_working_inventory())
-
class cmd_plugins(Command):
=== modified file 'bzrlib/hashcache.py'
--- a/bzrlib/hashcache.py 2005-07-08 02:50:46 +0000
+++ b/bzrlib/hashcache.py 2005-07-08 06:54:58 +0000
@@ -28,7 +28,7 @@
-CACHE_HEADER = "### bzr statcache v5\n"
+CACHE_HEADER = "### bzr hashcache v5\n"
def _fingerprint(abspath):
@@ -43,8 +43,10 @@
if stat.S_ISDIR(fs.st_mode):
return None
- return (fs.st_size, fs.st_mtime,
- fs.st_ctime, fs.st_ino, fs.st_dev)
+ # we discard any high precision because it's not reliable; perhaps we
+ # could do better on some systems?
+ return (fs.st_size, long(fs.st_mtime),
+ long(fs.st_ctime), fs.st_ino, fs.st_dev)
class HashCache(object):
@@ -81,21 +83,32 @@
miss_count
number of misses (times files have been completely re-read)
"""
+ needs_write = False
+
def __init__(self, basedir):
self.basedir = basedir
self.hit_count = 0
self.miss_count = 0
self.stat_count = 0
self.danger_count = 0
-
self._cache = {}
+
+ def cache_file_name(self):
+ import os.path
+ return os.path.join(self.basedir, '.bzr', 'stat-cache')
+
+
+
+
def clear(self):
"""Discard all cached information.
This does not reset the counters."""
- self._cache_sha1 = {}
+ if self._cache:
+ self.needs_write = True
+ self._cache = {}
def get_sha1(self, path):
@@ -106,6 +119,7 @@
import os, time
from bzrlib.osutils import sha_file
+ from bzrlib.trace import mutter
abspath = os.path.join(self.basedir, path)
fp = _fingerprint(abspath)
@@ -134,19 +148,25 @@
# next time.
self.danger_count += 1
if cache_fp:
+ mutter("remove outdated entry for %s" % path)
+ self.needs_write = True
del self._cache[path]
- else:
+ elif (fp != cache_fp) or (digest != cache_sha1):
+ mutter("update entry for %s" % path)
+ mutter(" %r" % (fp,))
+ mutter(" %r" % (cache_fp,))
+ self.needs_write = True
self._cache[path] = (digest, fp)
return digest
- def write(self, cachefn):
+ def write(self):
"""Write contents of cache to file."""
from atomicfile import AtomicFile
- outf = AtomicFile(cachefn, 'wb')
+ outf = AtomicFile(self.cache_file_name(), 'wb')
try:
print >>outf, CACHE_HEADER,
@@ -160,13 +180,14 @@
print >>outf
outf.commit()
+ self.needs_write = False
finally:
if not outf.closed:
outf.abort()
- def read(self, cachefn):
+ def read(self):
"""Reinstate cache from file.
Overwrites existing cache.
@@ -175,9 +196,16 @@
the cache."""
from bzrlib.trace import mutter, warning
- inf = file(cachefn, 'rb')
self._cache = {}
+ fn = self.cache_file_name()
+ try:
+ inf = file(fn, 'rb')
+ except IOError, e:
+ mutter("failed to open %s: %s" % (fn, e))
+ return
+
+
hdr = inf.readline()
if hdr != CACHE_HEADER:
mutter('cache header marker not found at top of %s; discarding cache'
@@ -206,5 +234,8 @@
self._cache[path] = (sha1, fp)
+ self.needs_write = False
+
+
=== modified file 'bzrlib/selftest/testhashcache.py'
--- a/bzrlib/selftest/testhashcache.py 2005-07-08 02:53:13 +0000
+++ b/bzrlib/selftest/testhashcache.py 2005-07-08 06:54:58 +0000
@@ -39,6 +39,8 @@
import os
import time
+ # make a dummy bzr directory just to hold the cache
+ os.mkdir('.bzr')
hc = HashCache('.')
file('foo', 'wb').write('hello')
@@ -99,11 +101,11 @@
# write out, read back in and check that we don't need to
# re-read any files
- hc.write('stat-cache')
+ hc.write()
del hc
hc = HashCache('.')
- hc.read('stat-cache')
+ hc.read()
self.assertEquals(len(hc._cache), 2)
self.assertEquals(hc.get_sha1('foo'), sha1('g00dbye'))
=== modified file 'bzrlib/workingtree.py'
--- a/bzrlib/workingtree.py 2005-07-06 05:32:32 +0000
+++ b/bzrlib/workingtree.py 2005-07-08 06:54:58 +0000
@@ -14,6 +14,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+# TODO: Don't allow WorkingTrees to be constructed for remote branches.
import os
@@ -30,13 +31,26 @@
It is possible for a `WorkingTree` to have a filename which is
not listed in the Inventory and vice versa.
"""
- _statcache = None
-
def __init__(self, basedir, inv):
+ from bzrlib.hashcache import HashCache
+ from bzrlib.trace import note, mutter
+
self._inventory = inv
self.basedir = basedir
self.path2id = inv.path2id
- self._update_statcache()
+
+ # update the whole cache up front and write to disk if anything changed;
+ # in the future we might want to do this more selectively
+ hc = self._hashcache = HashCache(basedir)
+ hc.read()
+ for path, ie in inv.iter_entries():
+ hc.get_sha1(path)
+
+ if hc.needs_write:
+ mutter("write hc")
+ hc.write()
+
+
def __iter__(self):
"""Iterate through file_ids for this tree.
@@ -45,22 +59,17 @@
and the working file exists.
"""
inv = self._inventory
- for file_id in self._inventory:
- # TODO: This is slightly redundant; we should be able to just
- # check the statcache but it only includes regular files.
- # only include files which still exist on disk
- ie = inv[file_id]
- if ie.kind == 'file':
- if ((file_id in self._statcache)
- or (os.path.exists(self.abspath(inv.id2path(file_id))))):
- yield file_id
-
+ for path, ie in inv.iter_entries():
+ if os.path.exists(self.abspath(path)):
+ yield ie.file_id
def __repr__(self):
return "<%s of %s>" % (self.__class__.__name__,
self.basedir)
+
+
def abspath(self, filename):
return os.path.join(self.basedir, filename)
@@ -80,29 +89,24 @@
def has_id(self, file_id):
# files that have been deleted are excluded
- if not self.inventory.has_id(file_id):
+ inv = self._inventory
+ if not inv.has_id(file_id):
return False
- if file_id in self._statcache:
- return True
- return os.path.exists(self.abspath(self.id2path(file_id)))
+ path = inv.id2path(file_id)
+ return os.path.exists(self.abspath(path))
__contains__ = has_id
- def _update_statcache(self):
- if not self._statcache:
- from bzrlib.statcache import update_cache
- self._statcache = update_cache(self.basedir, self.inventory)
-
def get_file_size(self, file_id):
- import os, stat
- return os.stat(self._get_store_filename(file_id))[stat.ST_SIZE]
+ # is this still called?
+ raise NotImplementedError()
def get_file_sha1(self, file_id):
- from bzrlib.statcache import SC_SHA1
- return self._statcache[file_id][SC_SHA1]
+ path = self._inventory.id2path(file_id)
+ return self._hashcache.get_sha1(path)
def file_class(self, filename):
@@ -127,7 +131,7 @@
from osutils import appendpath, file_kind
import os
- inv = self.inventory
+ inv = self._inventory
def descend(from_dir_relpath, from_dir_id, dp):
ls = os.listdir(dp)
More information about the Pkg-bazaar-commits
mailing list