[game-data-packager] 07/09: Unify code to stream members from a TarFile or ZipFile

Simon McVittie smcv at debian.org
Thu Dec 24 16:28:22 UTC 2015


This is an automated email from the git hooks/post-receive script.

smcv pushed a commit to branch master
in repository game-data-packager.

commit f7915ca5c14feeab88bfcb759a98738afa83760c
Author: Simon McVittie <smcv at debian.org>
Date:   Tue Dec 22 16:22:14 2015 +0000

    Unify code to stream members from a TarFile or ZipFile
---
 game_data_packager/build.py           | 121 ++++-----------
 game_data_packager/unpack/__init__.py | 282 ++++++++++++++++++++++++++++++++++
 game_data_packager/unpack/__main__.py |  41 +++++
 3 files changed, 357 insertions(+), 87 deletions(-)

diff --git a/game_data_packager/build.py b/game_data_packager/build.py
index 535584a..548d209 100644
--- a/game_data_packager/build.py
+++ b/game_data_packager/build.py
@@ -41,6 +41,7 @@ except ImportError:
 
 from .gog import GOG
 from .paths import (DATADIR, ETCDIR)
+from .unpack import (TarUnpacker, ZipUnpacker)
 from .util import (AGENT,
         MEBIBYTE,
         PACKAGE_CACHE,
@@ -616,9 +617,9 @@ class PackagingTask(object):
             elif extension.lower() == '.deb' and which('dpkg-deb'):
                 with subprocess.Popen(['dpkg-deb', '--fsys-tarfile', path],
                             stdout=subprocess.PIPE) as fsys_process:
-                    with tarfile.open(path + '//data.tar.*', mode='r|',
-                           fileobj=fsys_process.stdout) as tar:
-                        self.consider_tar_stream(path, tar)
+                    with TarUnpacker(path + '//data.tar.*',
+                           reader=fsys_process.stdout, compression='') as tar:
+                        self.consider_stream(path, tar)
 
     def _log_not_any_of(self, path, size, hashes, why, candidates):
         message = ('found %s but it is not one of the expected ' +
@@ -723,7 +724,7 @@ class PackagingTask(object):
         logger.debug('%s: %s', package.name, result)
         return result
 
-    def consider_zip(self, name, zf, provider=None):
+    def consider_stream(self, name, unpacker, provider=None):
         if provider is None:
             try_to_unpack = self.game.files
             should_provide = set()
@@ -733,8 +734,8 @@ class PackagingTask(object):
             should_provide = set(try_to_unpack)
             distinctive_dirs = provider.unpack.get('distinctive_dirs', True)
 
-        for entry in zf.infolist():
-            if not entry.file_size and entry.filename.endswith('/'):
+        for entry in unpacker:
+            if not entry.is_extractable or not entry.is_regular_file:
                 continue
 
             for filename in try_to_unpack:
@@ -746,10 +747,10 @@ class PackagingTask(object):
                 if wanted.alternatives:
                     continue
 
-                if wanted.size is not None and wanted.size != entry.file_size:
+                if wanted.size not in (None, entry.size):
                     continue
 
-                match_path = '/' + entry.filename.lower()
+                match_path = '/' + entry.name.lower()
 
                 for lf in wanted.look_for:
                     if not distinctive_dirs:
@@ -760,7 +761,7 @@ class PackagingTask(object):
                         if filename in self.found:
                             continue
 
-                        entryfile = zf.open(entry)
+                        entryfile = unpacker.open(entry)
 
                         tmp = os.path.join(self.get_workdir(),
                                 'tmp', wanted.name)
@@ -768,77 +769,28 @@ class PackagingTask(object):
                         mkdir_p(tmpdir)
 
                         wf = open(tmp, 'wb')
-                        if entry.file_size > QUITE_LARGE:
-                            logger.info('extracting %s from %s', entry.filename, name)
-                        else:
-                            logger.debug('extracting %s from %s', entry.filename, name)
-                        hf = HashedFile.from_file(
-                                name + '//' + entry.filename, entryfile, wf,
-                                size=entry.file_size,
-                                progress=(entry.file_size > QUITE_LARGE))
-                        wf.close()
-                        orig_time = time.mktime(entry.date_time + (0, 0, -1))
-                        os.utime(tmp, (orig_time, orig_time))
-
-                        if not self.use_file(wanted, tmp, hf):
-                            os.remove(tmp)
-
-        if should_provide:
-            for missing in sorted(should_provide):
-                logger.error('%s should have provided %s but did not',
-                        name, missing)
-
-    def consider_tar_stream(self, name, tar, provider=None):
-        if provider is None:
-            try_to_unpack = self.game.files
-            should_provide = set()
-        else:
-            try_to_unpack = set(f.name for f in provider.provides_files)
-            should_provide = set(try_to_unpack)
-
-        for entry in tar:
-            if not entry.isfile():
-                continue
-
-            for filename in try_to_unpack:
-                wanted = self.game.files.get(filename)
-
-                if wanted is None:
-                    continue
 
-                if wanted.alternatives:
-                    continue
-
-                if wanted.size is not None and wanted.size != entry.size:
-                    continue
-
-                match_path = '/' + entry.name.lower()
-
-                for lf in wanted.look_for:
-                    if match_path.endswith('/' + lf):
-                        should_provide.discard(filename)
-
-                        if filename in self.found:
-                            continue
-
-                        entryfile = tar.extractfile(entry)
-
-                        tmp = os.path.join(self.get_workdir(),
-                                'tmp', wanted.name)
-                        tmpdir = os.path.dirname(tmp)
-                        mkdir_p(tmpdir)
-
-                        wf = open(tmp, 'wb')
-                        if entry.size > QUITE_LARGE:
+                        if entry.size is not None and entry.size > QUITE_LARGE:
+                            large = True
                             logger.info('extracting %s from %s', entry.name, name)
                         else:
+                            large = False
                             logger.debug('extracting %s from %s', entry.name, name)
                         hf = HashedFile.from_file(
                                 name + '//' + entry.name, entryfile, wf,
-                                size=entry.size,
-                                progress=(entry.size > QUITE_LARGE))
+                                size=entry.size, progress=large)
                         wf.close()
-                        os.utime(tmp, (entry.mtime, entry.mtime))
+
+                        if entry.mtime is not None:
+                            orig_time = entry.mtime
+                        elif provider is not None:
+                            orig_name = self.found[provider.name]
+                            orig_time = os.stat(orig_name).st_mtime
+                        else:
+                            orig_time = None
+
+                        if orig_time is not None:
+                            os.utime(tmp, (orig_time, orig_time))
 
                         if not self.use_file(wanted, tmp, hf):
                             os.remove(tmp)
@@ -1071,26 +1023,21 @@ class PackagingTask(object):
                     os.utime(tmp, (orig_time, orig_time))
                     self.use_file(wanted, tmp, None)
                 elif fmt in ('tar.gz', 'tar.bz2', 'tar.xz'):
-                    rf = open(found_name, 'rb')
-                    if 'skip' in provider.unpack:
-                        skipped = rf.read(provider.unpack['skip'])
-                        assert len(skipped) == provider.unpack['skip']
-                    with tarfile.open(
-                            found_name,
-                            mode='r|' + fmt[4:],
-                            fileobj=rf) as tar:
-                        self.consider_tar_stream(found_name, tar, provider)
+                    reader = open(found_name, 'rb')
+                    with TarUnpacker(found_name, reader, compression=fmt[4:],
+                            skip=provider.unpack.get('skip', 0)) as tar:
+                        self.consider_stream(found_name, tar, provider)
                 elif fmt == 'deb':
                     with subprocess.Popen(['dpkg-deb', '--fsys-tarfile', found_name],
                                 stdout=subprocess.PIPE) as fsys_process:
-                        with tarfile.open(found_name + '//data.tar.*', mode='r|',
-                               fileobj=fsys_process.stdout) as tar:
-                            self.consider_tar_stream(found_name, tar, provider)
+                        with TarUnpacker(found_name + '//data.tar.*',
+                                fsys_process.stdout, compression='') as tar:
+                            self.consider_stream(found_name, tar, provider)
                 elif fmt == 'zip':
                     if provider.name.startswith('gog_'):
                         package.used_sources.add(provider.name)
-                    with zipfile.ZipFile(found_name, 'r') as zf:
-                        self.consider_zip(found_name, zf, provider)
+                    with ZipUnpacker(found_name) as unpacker:
+                        self.consider_stream(found_name, unpacker, provider)
                 elif fmt == 'lha':
                     to_unpack = provider.unpack.get('unpack',
                             [f.name for f in provider.provides_files])
diff --git a/game_data_packager/unpack/__init__.py b/game_data_packager/unpack/__init__.py
new file mode 100644
index 0000000..59373a6
--- /dev/null
+++ b/game_data_packager/unpack/__init__.py
@@ -0,0 +1,282 @@
+#!/usr/bin/python3
+# encoding=utf-8
+#
+# Copyright © 2014-2015 Simon McVittie <smcv at debian.org>
+# Copyright © 2015 Alexandre Detiste <alexandre at detiste.be>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You can find the GPL license text on a Debian system under
+# /usr/share/common-licenses/GPL-2.
+
+from abc import (ABCMeta, abstractmethod)
+import errno
+import os
+import shlex
+import shutil
+import tarfile
+import time
+import zipfile
+
+class UnpackableEntry(metaclass=ABCMeta):
+    """An entry in a StreamUnpackable.
+    """
+    @property
+    @abstractmethod
+    def is_directory(self):
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def is_regular_file(self):
+        """True if the entry is a regular file. False if it is a
+        directory, symlink, or some special thing like an instruction
+        to patch some other file.
+        """
+        raise NotImplementedError
+
+    @property
+    def is_extractable(self):
+        """True if the entry is something that we can extract.
+
+        The default implementation is that we can extract regular files.
+        """
+        return self.is_regular_file
+
+    @property
+    def mtime(self):
+        """The last-modification time, or None if unspecified."""
+        return None
+
+    @property
+    @abstractmethod
+    def name(self):
+        """The absolute or relative filename, with Unix path separators."""
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def size(self):
+        """The size in bytes."""
+        raise NotImplementedError
+
+    @property
+    def type_indicator(self):
+        """One or more ASCII symbols indicating the file type."""
+        if self.is_directory:
+            ret = 'd'
+        elif self.is_regular_file:
+            ret = '-'
+        else:
+            ret = '?'
+
+        if self.is_extractable:
+            ret += 'r'
+        else:
+            ret += '-'
+
+        return ret
+
+class StreamUnpackable(metaclass=ABCMeta):
+    """An archive in which entries can be inspected and extracted
+    by iteration.
+    """
+
+    @abstractmethod
+    def __iter__(self):
+        """Iterate through UnpackableEntry objects."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def open(self, member):
+        """Open a binary file-like entry for the name or entry.
+        """
+        raise NotImplementedError
+
+    def extract(self, member, path=None):
+        """Extract the given member from the archive into the given
+        directory.
+        """
+        with self.open(member) as reader:
+            filename = reader.entry.name
+            filename = filename.lstrip('/')
+
+            while filename.startswith('../'):
+                filename = filename[3:]
+            filename = filename.replace('/../', '/')
+            if filename.endswith('/..'):
+                filename = filename[:-3]
+            if filename.endswith('/'):
+                filename = filename[:-1]
+            if path is None:
+                path = '.'
+
+            dest = os.path.join(path, filename)
+            os.makedirs(os.path.dirname(dest), exist_ok=True)
+
+            try:
+                os.remove(dest)
+            except OSError as e:
+                if e.errno != errno.ENOENT:
+                    raise
+
+            with open(dest, 'xb') as writer:
+                shutil.copyfileobj(reader, writer)
+
+    def extractall(self, path, members=None):
+        for entry in self:
+            if entry.is_extractable:
+                if members is None or entry.name in members:
+                    self.extract(entry, path)
+
+    def printdir(self):
+        for entry in self:
+            if entry.size is None:
+                size = '?' * 9
+            else:
+                size = '%9s' % entry.size
+
+            if entry.mtime is not None:
+                mtime = time.strftime('%Y-%m-%d %H:%M:%S',
+                        time.gmtime(entry.mtime))
+            else:
+                mtime = '????-??-?? ??:??:??'
+
+            print('%s %s %s %s' % (entry.type_indicator, size, mtime,
+                shlex.quote(entry.name)))
+
+class WrapperUnpacker(StreamUnpackable):
+    """Base class for a StreamUnpackable that wraps a TarFile-like object."""
+
+    def __init__(self):
+        self._impl = None
+
+    @abstractmethod
+    def _wrap_entry(self, entry):
+        raise NotImplementedError
+
+    @abstractmethod
+    def _is_entry(self, entry):
+        raise NotImplementedError
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, ex_type, ex_value, ex_traceback):
+        if self._impl is not None:
+            self._impl.close()
+            self._impl = None
+
+    def __iter__(self):
+        for entry in self._impl:
+            yield self._wrap_entry(entry)
+
+    def open(self, entry):
+        assert self._is_entry(entry)
+        return self._impl.open(entry.impl)
+
+class TarEntry(UnpackableEntry):
+    __slots__ = 'impl'
+
+    def __init__(self, impl):
+        self.impl = impl
+
+    @property
+    def is_extractable(self):
+        return True
+
+    @property
+    def is_directory(self):
+        return self.impl.isdir()
+
+    @property
+    def is_regular_file(self):
+        return self.impl.isfile()
+
+    @property
+    def mtime(self):
+        return self.impl.mtime
+
+    @property
+    def name(self):
+        return self.impl.name
+
+    @property
+    def size(self):
+        return self.impl.size
+
+class TarUnpacker(WrapperUnpacker):
+    def __init__(self, name, reader=None, compression='*', skip=0):
+        super(TarUnpacker, self).__init__()
+
+        if reader is None:
+            reader = open(name, 'rb')
+
+        if skip:
+            discard = reader.read(skip)
+            assert len(discard) == skip
+
+        self._impl = tarfile.open(name, mode='r|' + compression,
+                fileobj=reader)
+
+    def open(self, entry):
+        assert isinstance(entry, TarEntry)
+        return self._impl.extractfile(entry.impl)
+
+    def _is_entry(self, entry):
+        return isinstance(entry, TarEntry)
+
+    def _wrap_entry(self, entry):
+        return TarEntry(entry)
+
+class ZipEntry(UnpackableEntry):
+    __slots__ = 'impl'
+
+    def __init__(self, impl):
+        self.impl = impl
+
+    @property
+    def is_extractable(self):
+        return True
+
+    @property
+    def is_directory(self):
+        return self.name.endswith('/')
+
+    @property
+    def is_regular_file(self):
+        return not self.name.endswith('/')
+
+    @property
+    def mtime(self):
+        return time.mktime(self.impl.date_time + (0, 0, -1))
+
+    @property
+    def name(self):
+        return self.impl.filename
+
+    @property
+    def size(self):
+        return self.impl.file_size
+
+class ZipUnpacker(WrapperUnpacker):
+    def __init__(self, name):
+        super(ZipUnpacker, self).__init__()
+        self._impl = zipfile.ZipFile(name, 'r')
+
+    def __iter__(self):
+        for entry in self._impl.infolist():
+            yield ZipEntry(entry)
+
+    def _is_entry(self, entry):
+        return isinstance(entry, ZipEntry)
+
+    def _wrap_entry(self, entry):
+        return ZipEntry(self)
diff --git a/game_data_packager/unpack/__main__.py b/game_data_packager/unpack/__main__.py
new file mode 100644
index 0000000..3ef4640
--- /dev/null
+++ b/game_data_packager/unpack/__main__.py
@@ -0,0 +1,41 @@
+#!/usr/bin/python3
+# encoding=utf-8
+#
+# Copyright © 2015 Simon McVittie <smcv at debian.org>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# You can find the GPL license text on a Debian system under
+# /usr/share/common-licenses/GPL-2.
+
+import argparse
+import tarfile
+import zipfile
+
+from . import (TarUnpacker, ZipUnpacker)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--output', '-o', help='extract to OUTPUT',
+            default=None)
+    parser.add_argument('archive')
+    args = parser.parse_args()
+
+    if zipfile.is_zipfile(args.archive):
+        unpacker = ZipUnpacker(args.archive)
+    elif tarfile.is_tarfile(args.archive):
+        unpacker = TarUnpacker(args.archive)
+    else:
+        raise SystemExit('Cannot work out how to unpack %r' % args.archive)
+
+    if args.output:
+        unpacker.extractall(args.output)
+    else:
+        unpacker.printdir()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/game-data-packager.git



More information about the Pkg-games-commits mailing list