[game-data-packager] 10/16: use_file: try to match against an iterable of candidates, not a single file
Simon McVittie
smcv at debian.org
Fri Jan 8 09:14:03 UTC 2016
This is an automated email from the git hooks/post-receive script.
smcv pushed a commit to branch master
in repository game-data-packager.
commit 06432529eee7516d0652f8c7d85c031015c69c6b
Author: Simon McVittie <smcv at debian.org>
Date: Thu Jan 7 12:23:35 2016 +0000
use_file: try to match against an iterable of candidates, not a single file
This avoids needing separate logic for whether mismatches should
be logged: we already know what set of files we're trying to match.
---
game_data_packager/build.py | 195 +++++++++++++++++++-------------------------
1 file changed, 83 insertions(+), 112 deletions(-)
diff --git a/game_data_packager/build.py b/game_data_packager/build.py
index be0b9dd..d35c81b 100644
--- a/game_data_packager/build.py
+++ b/game_data_packager/build.py
@@ -424,20 +424,28 @@ class PackagingTask(object):
self._cleanup_dirs.add(self.__workdir)
return self.__workdir
- def use_file(self, wanted, path, hashes=None, log=True):
- logger.debug('found possible %s at %s', wanted.name, path)
+ def use_file(self, found, candidates, path, hashes=None):
+ logger.debug('found %s at %s', found, path)
size = os.stat(path).st_size
- if wanted.size is not None and wanted.size != size:
- if log:
- logger.warning('found possible %s\n' +
- 'but its size does not match:\n' +
- ' file: %s\n' +
- ' expected: %d bytes\n' +
- ' found : %d bytes',
- wanted.name,
- path,
- wanted.size,
- size)
+
+ assert candidates
+
+ remaining = set()
+
+ for wanted in candidates:
+ if wanted.size is None or wanted.size == size:
+ remaining.add(wanted)
+ else:
+ logger.debug('... not the right size to be %s', wanted.name)
+
+ if not remaining:
+ for candidate in candidates:
+ if not candidate.distinctive_name:
+ # silently ignore dissimilar file
+ logger.debug('... not a distinctive name, ignoring')
+ return False
+
+ self._log_not_any_of(path, size, hashes, found, candidates)
return False
if hashes is None:
@@ -446,70 +454,43 @@ class PackagingTask(object):
hashes = HashedFile.from_file(path, open(path, 'rb'), size=size,
progress=(size > QUITE_LARGE))
- if not wanted.skip_hash_matching and not hashes.matches(wanted):
- # always silence warning if several files have same
- # look_for & same size: e.g. wolf3d, spear, dune2, ...
- if log:
- sizes = []
- for lf in wanted.look_for:
- for filename in self.game.known_filenames[lf]:
- size = self.game.files[filename].size
- if size in sizes:
- log = False
- break
- sizes.append(size)
-
- if log:
- logger.warning('found possible %s\n' +
- 'but its checksums do not match:\n' +
- ' file: %s\n' +
- ' expected:\n' +
- ' md5: %s\n' +
- ' sha1: %s\n' +
- ' sha256: %s\n' +
- ' got:\n' +
- ' md5: %s\n' +
- ' sha1: %s\n' +
- ' sha256: %s',
- wanted.name,
- path,
- wanted.md5,
- wanted.sha1,
- wanted.sha256,
- hashes.md5,
- hashes.sha1,
- hashes.sha256)
- return False
+ for wanted in remaining:
+ if not wanted.skip_hash_matching and not hashes.matches(wanted):
+ logger.debug('... not the right hashes to be %s', wanted.name)
+ continue
- if wanted.unsuitable:
- logger.warning('"%s" matches known file "%s" but cannot '
- 'be used:\n%s', path, wanted.name, wanted.unsuitable)
- # ... but do not continue processing
- return True
+ if wanted.unsuitable:
+ logger.warning('"%s" matches known file "%s" but cannot '
+ 'be used:\n%s', path, wanted.name, wanted.unsuitable)
+ # ... but do not continue processing
+ return True
- logger.debug('... yes, looks good')
- self.found[wanted.name] = path
- self.file_status[wanted.name] = FillResult.COMPLETE
+ logger.debug('... matches %s', wanted.name)
+ self.found[wanted.name] = path
+ self.file_status[wanted.name] = FillResult.COMPLETE
- # opportunistically use this same file to provide anything else that
- # has the same hashes (a duplicate file with a different name)
- for other_name in (self.game.known_md5s.get(hashes.md5, set()) |
- self.game.known_sha1s.get(hashes.sha1, set()) |
- self.game.known_sha256s.get(hashes.sha256, set())):
- other = self.game.files[other_name]
- if other.matches(hashes):
- self.found[other_name] = path
- self.file_status[other_name] = FillResult.COMPLETE
+ # opportunistically use this same file to provide anything else that
+ # has the same hashes (a duplicate file with a different name)
+ for other_name in (self.game.known_md5s.get(hashes.md5, set()) |
+ self.game.known_sha1s.get(hashes.sha1, set()) |
+ self.game.known_sha256s.get(hashes.sha256, set())):
+ other = self.game.files[other_name]
+ if other is not wanted and other.matches(hashes):
+ logger.debug('... also matches %s', other_name)
+ self.found[other_name] = path
+ self.file_status[other_name] = FillResult.COMPLETE
+
+ # no point in continuing, we've identified everything that matches
+ # the hashes
+ return True
- return True
+ self._log_not_any_of(path, size, hashes, found, candidates)
def consider_file(self, path, really_should_match_something, trusted=False):
if not os.path.exists(path):
# dangling symlink
return
- tried = set()
-
match_path = '/' + path.lower()
size = os.stat(path).st_size
@@ -548,49 +529,36 @@ class PackagingTask(object):
for look_for, candidates in self.game.known_filenames.items():
if match_path.endswith('/' + look_for):
- hashes = _ensure_hashes(hashes, path, size)
- for wanted_name in candidates:
- if wanted_name in tried:
- continue
- tried.add(wanted_name)
- if self.use_file(self.game.files[wanted_name], path, hashes,
- log=(self.game.files[wanted_name].distinctive_name
- and len(candidates) == 1)):
+ candidates = [self.game.files[c] for c in candidates]
+ if candidates:
+ hashes = _ensure_hashes(hashes, path, size)
+ if self.use_file('possible "%s"' % look_for, candidates,
+ path, hashes):
return
- else:
- if len(candidates) > 1:
- candidates = [self.game.files[c] for c in candidates]
- for candidate in candidates:
- if not candidate.distinctive_name:
- break
- else:
- self._log_not_any_of(path, size, hashes,
- 'possible "%s"' % look_for, candidates)
if size in self.game.known_sizes:
- hashes = _ensure_hashes(hashes, path, size)
candidates = self.game.known_sizes[size]
- for wanted_name in candidates:
- if wanted_name in tried:
- continue
- tried.add(wanted_name)
- if self.use_file(self.game.files[wanted_name], path, hashes,
- log=(len(candidates) == 1)):
+ if candidates:
+ hashes = _ensure_hashes(hashes, path, size)
+ candidates = [self.game.files[c] for c in candidates]
+ if self.use_file('file of size %d' % size,
+ candidates, path, hashes):
return
- else:
- if len(candidates) > 1:
- self._log_not_any_of(path, size, hashes,
- 'file of size %d' % size,
- [self.game.files[c] for c in candidates])
if hashes is not None:
- for wanted_name in (self.game.known_md5s.get(hashes.md5, set()) |
+ look_for = None
+ candidates = set()
+
+ for c in (self.game.known_md5s.get(hashes.md5, set()) |
self.game.known_sha1s.get(hashes.sha1, set()) |
self.game.known_sha256s.get(hashes.sha256, set())):
- if wanted_name is not None and wanted_name not in tried:
- tried.add(wanted_name)
- if self.use_file(self.game.files[wanted_name], path, hashes):
- return
+ look_for = c
+ candidates.add(self.game.files[c])
+
+ if candidates and self.use_file('possible "%s"' % c,
+ candidates, path, hashes):
+ return
+
if not trusted:
trusted = GOG.verify_checksum(path, size, hashes.md5)
@@ -645,14 +613,17 @@ class PackagingTask(object):
' size: %d bytes\n' +
' md5: %s\n' +
' sha1: %s\n' +
- ' sha256: %s\n' +
- 'expected one of:\n')
+ ' sha256: %s\n')
args = (why, path, size, hashes.md5, hashes.sha1, hashes.sha256)
- for candidate in candidates:
- if candidate.unsuitable:
- continue
+ candidates = [c for c in candidates if not c.unsuitable]
+ if len(candidates) == 1:
+ message += 'expected:\n'
+ elif len(candidates) > 1:
+ message += 'expected one of:\n'
+
+ for candidate in candidates:
message = message + (' %s:\n' +
' size: ' + (
'%s' if candidate.size is None else '%d bytes') +
@@ -816,7 +787,7 @@ class PackagingTask(object):
if orig_time is not None:
os.utime(tmp, (orig_time, orig_time))
- if not self.use_file(wanted, tmp, hf):
+ if not self.use_file(wanted.name, (wanted,), tmp, hf):
os.remove(tmp)
if should_provide:
@@ -846,7 +817,7 @@ class PackagingTask(object):
progress=(wanted.size > QUITE_LARGE))
orig_time = os.stat(self.found[provider.name]).st_mtime
os.utime(path, (orig_time, orig_time))
- self.use_file(wanted, path, hasher)
+ self.use_file(wanted.name, (wanted,), path, hasher)
def fill_gap(self, package, wanted, download=False, log=True, recheck=False):
"""Try to unpack, download or otherwise obtain wanted.
@@ -948,7 +919,7 @@ class PackagingTask(object):
size=wanted.size, progress=True)
wf.close()
- if self.use_file(wanted, tmp, hf):
+ if self.use_file(wanted.name, (wanted,), tmp, hf):
assert self.found[wanted.name] == tmp
assert (self.file_status[wanted.name] ==
FillResult.COMPLETE)
@@ -1056,7 +1027,7 @@ class PackagingTask(object):
orig_time = os.stat(found_name).st_mtime
os.utime(tmp, (orig_time, orig_time))
- self.use_file(wanted, tmp, None)
+ self.use_file(wanted.name, (wanted,), tmp, None)
elif fmt in ('tar.*', 'tar.gz', 'tar.bz2', 'tar.xz'):
reader = open(found_name, 'rb')
with TarUnpacker(found_name, reader, compression=fmt[4:],
@@ -1268,7 +1239,7 @@ class PackagingTask(object):
self.found[basis.name], out_path])
orig_time = os.stat(found_name).st_mtime
os.utime(out_path, (orig_time, orig_time))
- self.use_file(wanted, out_path)
+ self.use_file(wanted.name, (wanted,), out_path)
elif fmt == 'umod':
with Umod(found_name) as unpacker:
@@ -1279,7 +1250,7 @@ class PackagingTask(object):
FillResult.COMPLETE)
return FillResult.COMPLETE
elif wanted.size == 0:
- self.use_file(wanted, '/dev/null')
+ self.use_file(wanted.name, (wanted,), '/dev/null')
elif self.file_status[provider_name] is FillResult.DOWNLOAD_NEEDED:
# we don't have it, but we can get it
self.file_status[wanted.name] |= FillResult.DOWNLOAD_NEEDED
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/game-data-packager.git
More information about the Pkg-games-commits
mailing list