[Pkg-bazaar-commits] ./bzr/unstable r867: - remove bzrlib.statcache module, no longer needed.
Martin Pool
mbp at sourcefrog.net
Fri Apr 10 08:21:17 UTC 2009
------------------------------------------------------------
revno: 867
committer: Martin Pool <mbp at sourcefrog.net>
timestamp: Fri 2005-07-08 17:04:29 +1000
message:
- remove bzrlib.statcache module, no longer needed.
removed:
bzrlib/statcache.py
-------------- next part --------------
=== removed file 'bzrlib/statcache.py'
--- a/bzrlib/statcache.py 2005-07-06 10:07:31 +0000
+++ b/bzrlib/statcache.py 1970-01-01 00:00:00 +0000
@@ -1,305 +0,0 @@
-# (C) 2005 Canonical Ltd
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-import stat, os, sha, time
-
-from trace import mutter
-from errors import BzrError, BzrCheckError
-
-
-"""File stat cache to speed up tree comparisons.
-
-This module basically gives a quick way to find the SHA-1 and related
-information of a file in the working directory, without actually
-reading and hashing the whole file. The information is validated by
-checking the size, mtime, ctime, etc of the file as returned by the
-stat() system call.
-
-This has no relation to the deprecated standard Python module called
-statcache (vs bzrlib.statcache).
-
-
-
-Implementation
-==============
-
-Users of this module should not need to know about how this is
-implemented, and in particular should not depend on the particular
-data which is stored or its format.
-
-The cache maintains a mapping from filename to the SHA-1 of the
-content of the file.
-
-The cache also stores a fingerprint of (size, mtime, ctime, ino, dev)
-which is used to validate that the entry is up-to-date.
-
-
-
-This is done by maintaining a cache indexed by a file fingerprint of
-(path, size, mtime, ctime, ino, dev) pointing to the SHA-1. If the
-fingerprint has changed, we assume the file content has not changed
-either and the SHA-1 is therefore the same.
-
-If any of the fingerprint fields have changed then the file content
-*may* have changed, or it may not have. We need to reread the file
-contents to make sure, but this is not visible to the user or
-higher-level code (except as a delay of course).
-
-The mtime and ctime are stored with nanosecond fields, but not all
-filesystems give this level of precision. There is therefore a
-possible race: the file might be modified twice within a second
-without changing the size or mtime, and a SHA-1 cached from the first
-version would be wrong. We handle this by not recording a cached hash
-for any files which were modified in the current second and that
-therefore have the chance to change again before the second is up.
-
-The only known hole in this design is if the system clock jumps
-backwards crossing invocations of bzr. Please don't do that; use ntp
-to gradually adjust your clock or don't use bzr over the step.
-
-At the moment this is stored in a simple textfile; it might be nice
-to use a tdb instead to allow faster lookup by file-id.
-
-The cache is represented as a map from file_id to a tuple of (file_id,
-sha1, path, size, mtime, ctime, ino, dev).
-
-The SHA-1 is stored in memory as a hexdigest.
-
-This version of the file on disk has one line per record, and fields
-separated by \0 records.
-"""
-
-# order of fields returned by fingerprint()
-FP_SIZE = 0
-FP_MTIME = 1
-FP_CTIME = 2
-FP_INO = 3
-FP_DEV = 4
-
-# order of fields in the statcache file and in the in-memory map
-SC_FILE_ID = 0
-SC_SHA1 = 1
-SC_PATH = 2
-SC_SIZE = 3
-SC_MTIME = 4
-SC_CTIME = 5
-SC_INO = 6
-SC_DEV = 7
-
-
-
-CACHE_HEADER = "### bzr statcache v4"
-
-
-def fingerprint(abspath):
- try:
- fs = os.lstat(abspath)
- except OSError:
- # might be missing, etc
- return None
-
- if stat.S_ISDIR(fs.st_mode):
- return None
-
- return (fs.st_size, fs.st_mtime,
- fs.st_ctime, fs.st_ino, fs.st_dev)
-
-
-
-def _write_cache(basedir, entries):
- from atomicfile import AtomicFile
-
- cachefn = os.path.join(basedir, '.bzr', 'stat-cache')
- outf = AtomicFile(cachefn, 'wb')
- try:
- outf.write(CACHE_HEADER + '\n')
-
- for entry in entries:
- if len(entry) != 8:
- raise ValueError("invalid statcache entry tuple %r" % entry)
- outf.write(entry[0].encode('utf-8')) # file id
- outf.write('\0')
- outf.write(entry[1]) # hex sha1
- outf.write('\0')
- outf.write(entry[2].encode('utf-8')) # name
- for nf in entry[3:]:
- outf.write('\0%d' % nf)
- outf.write('\n')
-
- outf.commit()
- finally:
- if not outf.closed:
- outf.abort()
-
-
-def _try_write_cache(basedir, entries):
- try:
- return _write_cache(basedir, entries)
- except IOError, e:
- mutter("cannot update statcache in %s: %s" % (basedir, e))
- except OSError, e:
- mutter("cannot update statcache in %s: %s" % (basedir, e))
-
-
-
-def load_cache(basedir):
- import re
- cache = {}
- seen_paths = {}
- from bzrlib.trace import warning
-
- assert isinstance(basedir, basestring)
-
- sha_re = re.compile(r'[a-f0-9]{40}')
-
- try:
- cachefn = os.path.join(basedir, '.bzr', 'stat-cache')
- cachefile = open(cachefn, 'rb')
- except IOError:
- return cache
-
- line1 = cachefile.readline().rstrip('\r\n')
- if line1 != CACHE_HEADER:
- mutter('cache header marker not found at top of %s; discarding cache'
- % cachefn)
- return cache
-
- for l in cachefile:
- f = l.split('\0')
-
- file_id = f[0].decode('utf-8')
- if file_id in cache:
- warning("duplicated file_id in cache: {%s}" % file_id)
-
- text_sha = f[1]
- if len(text_sha) != 40 or not sha_re.match(text_sha):
- raise BzrCheckError("invalid file SHA-1 in cache: %r" % text_sha)
-
- path = f[2].decode('utf-8')
- if path in seen_paths:
- warning("duplicated path in cache: %r" % path)
- seen_paths[path] = True
-
- entry = (file_id, text_sha, path) + tuple([long(x) for x in f[3:]])
- if len(entry) != 8:
- raise ValueError("invalid statcache entry tuple %r" % entry)
-
- cache[file_id] = entry
- return cache
-
-
-
-def _files_from_inventory(inv):
- for path, ie in inv.iter_entries():
- if ie.kind != 'file':
- continue
- yield ie.file_id, path
-
-
-
-def update_cache(basedir, inv, flush=False):
- """Update and return the cache for the branch.
-
- The returned cache may contain entries that have not been written
- to disk for files recently touched.
-
- flush -- discard any previous cache and recalculate from scratch.
- """
-
- # load the existing cache; use information there to find a list of
- # files ordered by inode, which is alleged to be the fastest order
- # to stat the files.
-
- to_update = _files_from_inventory(inv)
-
- assert isinstance(flush, bool)
- if flush:
- cache = {}
- else:
- cache = load_cache(basedir)
-
- by_inode = []
- without_inode = []
- for file_id, path in to_update:
- if file_id in cache:
- by_inode.append((cache[file_id][SC_INO], file_id, path))
- else:
- without_inode.append((file_id, path))
- by_inode.sort()
-
- to_update = [a[1:] for a in by_inode] + without_inode
-
- stat_cnt = missing_cnt = new_cnt = hardcheck = change_cnt = 0
-
- # dangerfiles have been recently touched and can't be committed to
- # a persistent cache yet, but they are returned to the caller.
- dangerfiles = []
-
- now = int(time.time())
-
- ## mutter('update statcache under %r' % basedir)
- for file_id, path in to_update:
- abspath = os.path.join(basedir, path)
- fp = fingerprint(abspath)
- stat_cnt += 1
-
- cacheentry = cache.get(file_id)
-
- if fp == None: # not here
- if cacheentry:
- del cache[file_id]
- change_cnt += 1
- missing_cnt += 1
- continue
- elif not cacheentry:
- new_cnt += 1
-
- if (fp[FP_MTIME] >= now) or (fp[FP_CTIME] >= now):
- dangerfiles.append(file_id)
-
- if cacheentry and (cacheentry[3:] == fp):
- continue # all stat fields unchanged
-
- hardcheck += 1
-
- dig = sha.new(file(abspath, 'rb').read()).hexdigest()
-
- # We update the cache even if the digest has not changed from
- # last time we looked, so that the fingerprint fields will
- # match in future.
- cacheentry = (file_id, dig, path) + fp
- cache[file_id] = cacheentry
- change_cnt += 1
-
- mutter('statcache: statted %d files, read %d files, %d changed, %d dangerous, '
- '%d deleted, %d new, '
- '%d in cache'
- % (stat_cnt, hardcheck, change_cnt, len(dangerfiles),
- missing_cnt, new_cnt, len(cache)))
-
- if change_cnt:
- mutter('updating on-disk statcache')
-
- if dangerfiles:
- safe_cache = cache.copy()
- for file_id in dangerfiles:
- del safe_cache[file_id]
- else:
- safe_cache = cache
-
- _try_write_cache(basedir, safe_cache.itervalues())
-
- return cache
More information about the Pkg-bazaar-commits
mailing list