[game-data-packager] 07/09: Unify code to stream members from a TarFile or ZipFile
Simon McVittie
smcv at debian.org
Thu Dec 24 16:28:22 UTC 2015
This is an automated email from the git hooks/post-receive script.
smcv pushed a commit to branch master
in repository game-data-packager.
commit f7915ca5c14feeab88bfcb759a98738afa83760c
Author: Simon McVittie <smcv at debian.org>
Date: Tue Dec 22 16:22:14 2015 +0000
Unify code to stream members from a TarFile or ZipFile
---
game_data_packager/build.py | 121 ++++-----------
game_data_packager/unpack/__init__.py | 282 ++++++++++++++++++++++++++++++++++
game_data_packager/unpack/__main__.py | 41 +++++
3 files changed, 357 insertions(+), 87 deletions(-)
diff --git a/game_data_packager/build.py b/game_data_packager/build.py
index 535584a..548d209 100644
--- a/game_data_packager/build.py
+++ b/game_data_packager/build.py
@@ -41,6 +41,7 @@ except ImportError:
from .gog import GOG
from .paths import (DATADIR, ETCDIR)
+from .unpack import (TarUnpacker, ZipUnpacker)
from .util import (AGENT,
MEBIBYTE,
PACKAGE_CACHE,
@@ -616,9 +617,9 @@ class PackagingTask(object):
elif extension.lower() == '.deb' and which('dpkg-deb'):
with subprocess.Popen(['dpkg-deb', '--fsys-tarfile', path],
stdout=subprocess.PIPE) as fsys_process:
- with tarfile.open(path + '//data.tar.*', mode='r|',
- fileobj=fsys_process.stdout) as tar:
- self.consider_tar_stream(path, tar)
+ with TarUnpacker(path + '//data.tar.*',
+ reader=fsys_process.stdout, compression='') as tar:
+ self.consider_stream(path, tar)
def _log_not_any_of(self, path, size, hashes, why, candidates):
message = ('found %s but it is not one of the expected ' +
@@ -723,7 +724,7 @@ class PackagingTask(object):
logger.debug('%s: %s', package.name, result)
return result
- def consider_zip(self, name, zf, provider=None):
+ def consider_stream(self, name, unpacker, provider=None):
if provider is None:
try_to_unpack = self.game.files
should_provide = set()
@@ -733,8 +734,8 @@ class PackagingTask(object):
should_provide = set(try_to_unpack)
distinctive_dirs = provider.unpack.get('distinctive_dirs', True)
- for entry in zf.infolist():
- if not entry.file_size and entry.filename.endswith('/'):
+ for entry in unpacker:
+ if not entry.is_extractable or not entry.is_regular_file:
continue
for filename in try_to_unpack:
@@ -746,10 +747,10 @@ class PackagingTask(object):
if wanted.alternatives:
continue
- if wanted.size is not None and wanted.size != entry.file_size:
+ if wanted.size not in (None, entry.size):
continue
- match_path = '/' + entry.filename.lower()
+ match_path = '/' + entry.name.lower()
for lf in wanted.look_for:
if not distinctive_dirs:
@@ -760,7 +761,7 @@ class PackagingTask(object):
if filename in self.found:
continue
- entryfile = zf.open(entry)
+ entryfile = unpacker.open(entry)
tmp = os.path.join(self.get_workdir(),
'tmp', wanted.name)
@@ -768,77 +769,28 @@ class PackagingTask(object):
mkdir_p(tmpdir)
wf = open(tmp, 'wb')
- if entry.file_size > QUITE_LARGE:
- logger.info('extracting %s from %s', entry.filename, name)
- else:
- logger.debug('extracting %s from %s', entry.filename, name)
- hf = HashedFile.from_file(
- name + '//' + entry.filename, entryfile, wf,
- size=entry.file_size,
- progress=(entry.file_size > QUITE_LARGE))
- wf.close()
- orig_time = time.mktime(entry.date_time + (0, 0, -1))
- os.utime(tmp, (orig_time, orig_time))
-
- if not self.use_file(wanted, tmp, hf):
- os.remove(tmp)
-
- if should_provide:
- for missing in sorted(should_provide):
- logger.error('%s should have provided %s but did not',
- name, missing)
-
- def consider_tar_stream(self, name, tar, provider=None):
- if provider is None:
- try_to_unpack = self.game.files
- should_provide = set()
- else:
- try_to_unpack = set(f.name for f in provider.provides_files)
- should_provide = set(try_to_unpack)
-
- for entry in tar:
- if not entry.isfile():
- continue
-
- for filename in try_to_unpack:
- wanted = self.game.files.get(filename)
-
- if wanted is None:
- continue
- if wanted.alternatives:
- continue
-
- if wanted.size is not None and wanted.size != entry.size:
- continue
-
- match_path = '/' + entry.name.lower()
-
- for lf in wanted.look_for:
- if match_path.endswith('/' + lf):
- should_provide.discard(filename)
-
- if filename in self.found:
- continue
-
- entryfile = tar.extractfile(entry)
-
- tmp = os.path.join(self.get_workdir(),
- 'tmp', wanted.name)
- tmpdir = os.path.dirname(tmp)
- mkdir_p(tmpdir)
-
- wf = open(tmp, 'wb')
- if entry.size > QUITE_LARGE:
+ if entry.size is not None and entry.size > QUITE_LARGE:
+ large = True
logger.info('extracting %s from %s', entry.name, name)
else:
+ large = False
logger.debug('extracting %s from %s', entry.name, name)
hf = HashedFile.from_file(
name + '//' + entry.name, entryfile, wf,
- size=entry.size,
- progress=(entry.size > QUITE_LARGE))
+ size=entry.size, progress=large)
wf.close()
- os.utime(tmp, (entry.mtime, entry.mtime))
+
+ if entry.mtime is not None:
+ orig_time = entry.mtime
+ elif provider is not None:
+ orig_name = self.found[provider.name]
+ orig_time = os.stat(orig_name).st_mtime
+ else:
+ orig_time = None
+
+ if orig_time is not None:
+ os.utime(tmp, (orig_time, orig_time))
if not self.use_file(wanted, tmp, hf):
os.remove(tmp)
@@ -1071,26 +1023,21 @@ class PackagingTask(object):
os.utime(tmp, (orig_time, orig_time))
self.use_file(wanted, tmp, None)
elif fmt in ('tar.gz', 'tar.bz2', 'tar.xz'):
- rf = open(found_name, 'rb')
- if 'skip' in provider.unpack:
- skipped = rf.read(provider.unpack['skip'])
- assert len(skipped) == provider.unpack['skip']
- with tarfile.open(
- found_name,
- mode='r|' + fmt[4:],
- fileobj=rf) as tar:
- self.consider_tar_stream(found_name, tar, provider)
+ reader = open(found_name, 'rb')
+ with TarUnpacker(found_name, reader, compression=fmt[4:],
+ skip=provider.unpack.get('skip', 0)) as tar:
+ self.consider_stream(found_name, tar, provider)
elif fmt == 'deb':
with subprocess.Popen(['dpkg-deb', '--fsys-tarfile', found_name],
stdout=subprocess.PIPE) as fsys_process:
- with tarfile.open(found_name + '//data.tar.*', mode='r|',
- fileobj=fsys_process.stdout) as tar:
- self.consider_tar_stream(found_name, tar, provider)
+ with TarUnpacker(found_name + '//data.tar.*',
+ fsys_process.stdout, compression='') as tar:
+ self.consider_stream(found_name, tar, provider)
elif fmt == 'zip':
if provider.name.startswith('gog_'):
package.used_sources.add(provider.name)
- with zipfile.ZipFile(found_name, 'r') as zf:
- self.consider_zip(found_name, zf, provider)
+ with ZipUnpacker(found_name) as unpacker:
+ self.consider_stream(found_name, unpacker, provider)
elif fmt == 'lha':
to_unpack = provider.unpack.get('unpack',
[f.name for f in provider.provides_files])
diff --git a/game_data_packager/unpack/__init__.py b/game_data_packager/unpack/__init__.py
new file mode 100644
index 0000000..59373a6
--- /dev/null
+++ b/game_data_packager/unpack/__init__.py
@@ -0,0 +1,282 @@
+#!/usr/bin/python3
+# encoding=utf-8
+#
+# Copyright © 2014-2015 Simon McVittie <smcv at debian.org>
+# Copyright © 2015 Alexandre Detiste <alexandre at detiste.be>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You can find the GPL license text on a Debian system under
+# /usr/share/common-licenses/GPL-2.
+
+from abc import (ABCMeta, abstractmethod)
+import errno
+import os
+import shlex
+import shutil
+import tarfile
+import time
+import zipfile
+
+class UnpackableEntry(metaclass=ABCMeta):
+ """An entry in a StreamUnpackable.
+ """
+ @property
+ @abstractmethod
+ def is_directory(self):
+ raise NotImplementedError
+
+ @property
+ @abstractmethod
+ def is_regular_file(self):
+ """True if the entry is a regular file. False if it is a
+ directory, symlink, or some special thing like an instruction
+ to patch some other file.
+ """
+ raise NotImplementedError
+
+ @property
+ def is_extractable(self):
+ """True if the entry is something that we can extract.
+
+ The default implementation is that we can extract regular files.
+ """
+ return self.is_regular_file
+
+ @property
+ def mtime(self):
+ """The last-modification time, or None if unspecified."""
+ return None
+
+ @property
+ @abstractmethod
+ def name(self):
+ """The absolute or relative filename, with Unix path separators."""
+ raise NotImplementedError
+
+ @property
+ @abstractmethod
+ def size(self):
+ """The size in bytes."""
+ raise NotImplementedError
+
+ @property
+ def type_indicator(self):
+ """One or more ASCII symbols indicating the file type."""
+ if self.is_directory:
+ ret = 'd'
+ elif self.is_regular_file:
+ ret = '-'
+ else:
+ ret = '?'
+
+ if self.is_extractable:
+ ret += 'r'
+ else:
+ ret += '-'
+
+ return ret
+
+class StreamUnpackable(metaclass=ABCMeta):
+ """An archive in which entries can be inspected and extracted
+ by iteration.
+ """
+
+ @abstractmethod
+ def __iter__(self):
+ """Iterate through UnpackableEntry objects."""
+ raise NotImplementedError
+
+ @abstractmethod
+ def open(self, member):
+ """Open a binary file-like entry for the name or entry.
+ """
+ raise NotImplementedError
+
+ def extract(self, member, path=None):
+ """Extract the given member from the archive into the given
+ directory.
+ """
+ with self.open(member) as reader:
+ filename = reader.entry.name
+ filename = filename.lstrip('/')
+
+ while filename.startswith('../'):
+ filename = filename[3:]
+ filename = filename.replace('/../', '/')
+ if filename.endswith('/..'):
+ filename = filename[:-3]
+ if filename.endswith('/'):
+ filename = filename[:-1]
+ if path is None:
+ path = '.'
+
+ dest = os.path.join(path, filename)
+ os.makedirs(os.path.dirname(dest), exist_ok=True)
+
+ try:
+ os.remove(dest)
+ except OSError as e:
+ if e.errno != errno.ENOENT:
+ raise
+
+ with open(dest, 'xb') as writer:
+ shutil.copyfileobj(reader, writer)
+
+ def extractall(self, path, members=None):
+ for entry in self:
+ if entry.is_extractable:
+ if members is None or entry.name in members:
+ self.extract(entry, path)
+
+ def printdir(self):
+ for entry in self:
+ if entry.size is None:
+ size = '?' * 9
+ else:
+ size = '%9s' % entry.size
+
+ if entry.mtime is not None:
+ mtime = time.strftime('%Y-%m-%d %H:%M:%S',
+ time.gmtime(entry.mtime))
+ else:
+ mtime = '????-??-?? ??:??:??'
+
+ print('%s %s %s %s' % (entry.type_indicator, size, mtime,
+ shlex.quote(entry.name)))
+
+class WrapperUnpacker(StreamUnpackable):
+ """Base class for a StreamUnpackable that wraps a TarFile-like object."""
+
+ def __init__(self):
+ self._impl = None
+
+ @abstractmethod
+ def _wrap_entry(self, entry):
+ raise NotImplementedError
+
+ @abstractmethod
+ def _is_entry(self, entry):
+ raise NotImplementedError
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_traceback):
+ if self._impl is not None:
+ self._impl.close()
+ self._impl = None
+
+ def __iter__(self):
+ for entry in self._impl:
+ yield self._wrap_entry(entry)
+
+ def open(self, entry):
+ assert self._is_entry(entry)
+ return self._impl.open(entry.impl)
+
+class TarEntry(UnpackableEntry):
+ __slots__ = 'impl'
+
+ def __init__(self, impl):
+ self.impl = impl
+
+ @property
+ def is_extractable(self):
+ return True
+
+ @property
+ def is_directory(self):
+ return self.impl.isdir()
+
+ @property
+ def is_regular_file(self):
+ return self.impl.isfile()
+
+ @property
+ def mtime(self):
+ return self.impl.mtime
+
+ @property
+ def name(self):
+ return self.impl.name
+
+ @property
+ def size(self):
+ return self.impl.size
+
+class TarUnpacker(WrapperUnpacker):
+ def __init__(self, name, reader=None, compression='*', skip=0):
+ super(TarUnpacker, self).__init__()
+
+ if reader is None:
+ reader = open(name, 'rb')
+
+ if skip:
+ discard = reader.read(skip)
+ assert len(discard) == skip
+
+ self._impl = tarfile.open(name, mode='r|' + compression,
+ fileobj=reader)
+
+ def open(self, entry):
+ assert isinstance(entry, TarEntry)
+ return self._impl.extractfile(entry.impl)
+
+ def _is_entry(self, entry):
+ return isinstance(entry, TarEntry)
+
+ def _wrap_entry(self, entry):
+ return TarEntry(entry)
+
+class ZipEntry(UnpackableEntry):
+ __slots__ = 'impl'
+
+ def __init__(self, impl):
+ self.impl = impl
+
+ @property
+ def is_extractable(self):
+ return True
+
+ @property
+ def is_directory(self):
+ return self.name.endswith('/')
+
+ @property
+ def is_regular_file(self):
+ return not self.name.endswith('/')
+
+ @property
+ def mtime(self):
+ return time.mktime(self.impl.date_time + (0, 0, -1))
+
+ @property
+ def name(self):
+ return self.impl.filename
+
+ @property
+ def size(self):
+ return self.impl.file_size
+
+class ZipUnpacker(WrapperUnpacker):
+ def __init__(self, name):
+ super(ZipUnpacker, self).__init__()
+ self._impl = zipfile.ZipFile(name, 'r')
+
+ def __iter__(self):
+ for entry in self._impl.infolist():
+ yield ZipEntry(entry)
+
+ def _is_entry(self, entry):
+ return isinstance(entry, ZipEntry)
+
+ def _wrap_entry(self, entry):
+ return ZipEntry(self)
diff --git a/game_data_packager/unpack/__main__.py b/game_data_packager/unpack/__main__.py
new file mode 100644
index 0000000..3ef4640
--- /dev/null
+++ b/game_data_packager/unpack/__main__.py
@@ -0,0 +1,41 @@
+#!/usr/bin/python3
+# encoding=utf-8
+#
+# Copyright © 2015 Simon McVittie <smcv at debian.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You can find the GPL license text on a Debian system under
+# /usr/share/common-licenses/GPL-2.
+
+import argparse
+import tarfile
+import zipfile
+
+from . import (TarUnpacker, ZipUnpacker)
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--output', '-o', help='extract to OUTPUT',
+ default=None)
+ parser.add_argument('archive')
+ args = parser.parse_args()
+
+ if zipfile.is_zipfile(args.archive):
+ unpacker = ZipUnpacker(args.archive)
+ elif tarfile.is_tarfile(args.archive):
+ unpacker = TarUnpacker(args.archive)
+ else:
+ raise SystemExit('Cannot work out how to unpack %r' % args.archive)
+
+ if args.output:
+ unpacker.extractall(args.output)
+ else:
+ unpacker.printdir()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/game-data-packager.git
More information about the Pkg-games-commits
mailing list