[reprotest] 02/04: main: Add a --env-build option for testing different env vars

Ximin Luo infinity0 at debian.org
Fri Oct 13 14:33:39 UTC 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch master
in repository reprotest.

commit a36fd053c1d7bb1a82658907e501f01d2dd78774
Author: Ximin Luo <infinity0 at debian.org>
Date:   Fri Oct 13 16:10:44 2017 +0200

    main: Add a --env-build option for testing different env vars
---
 README.rst            |  14 ++++++
 debian/changelog      |   2 +
 debian/control        |   2 +
 reprotest/__init__.py |  63 +++++++++++++++++++++++--
 reprotest/build.py    |  33 +++++++++++--
 reprotest/environ.py  | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++
 setup.py              |   7 +--
 7 files changed, 236 insertions(+), 11 deletions(-)

diff --git a/README.rst b/README.rst
index 18a7e58..eddc0c0 100644
--- a/README.rst
+++ b/README.rst
@@ -219,6 +219,20 @@ of names is given in the --help text for --variations.
 Most variations do not have parameters, and for them only the + and - operators
 are relevant. The variations that accept parameters are:
 
+environment.variables
+    A semicolon-separated ordered set, specifying environment variables that
+    reprotest should try to vary. Default is "REPROTEST_CAPTURE_ENVIRONMENT".
+    Supports regex-based syntax e.g.
+
+    - PID=\d{1,6}
+    - HOME=(/\w{3,12}){1,4}
+    - (GO|PYTHON|)PATH=(/\w{3,12}){1,4}(:(/\w{3,12}){1,4}){0,4}
+
+    Special cases:
+
+    - $VARNAME= (empty RHS) to tell reprotest to delete the variable
+    - $VARNAME=.{0} to tell reprotest to actually set an empty value
+    - \\x2c and \\x3b to match or generate , and ; respectively.
 user_group.available
     A semicolon-separated ordered set, specifying the available user+group
     combinations that reprotest can ``sudo(1)`` to. Default is empty, in which
diff --git a/debian/changelog b/debian/changelog
index dd9eb29..1b6650a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,6 +1,8 @@
 reprotest (0.7.3) UNRELEASED; urgency=medium
 
   * Fix --no-clean-on-error, it should work again.
+  * Add a --env-build option to try to determine which (known and unknown)
+    environment variables cause reproducibility.
 
  -- Ximin Luo <infinity0 at debian.org>  Fri, 13 Oct 2017 15:42:51 +0200
 
diff --git a/debian/control b/debian/control
index 2936f2d..4c5fb33 100644
--- a/debian/control
+++ b/debian/control
@@ -19,6 +19,7 @@ Build-Depends: debhelper (>= 10),
  locales-all <!nocheck>,
  python3-coverage <!nocheck>,
  python3-pytest <!nocheck>,
+ python3-rstr <!nocheck>,
  tox <!nocheck>,
 # these below helps diffoscope produce nicer output in tests
  python3-tlsh <!nocheck>,
@@ -36,6 +37,7 @@ Depends: ${python3:Depends},
  libdpkg-perl,
  procps,
  python3-pkg-resources,
+ python3-rstr,
  ${misc:Depends}
 Recommends: diffutils | diffoscope (>= 84),
  disorderfs (>= 0.5.2),
diff --git a/reprotest/__init__.py b/reprotest/__init__.py
index 19c43a6..5f5e793 100644
--- a/reprotest/__init__.py
+++ b/reprotest/__init__.py
@@ -21,7 +21,7 @@ import pkg_resources
 from reprotest.lib import adtlog
 from reprotest.lib import adt_testbed
 from reprotest.build import Build, VariationSpec, Variations, tool_missing
-from reprotest import presets, shell_syn
+from reprotest import environ, presets, shell_syn
 
 
 VIRT_PREFIX = "autopkgtest-virt-"
@@ -181,7 +181,7 @@ class BuildContext(collections.namedtuple('_BuildContext',
         logging.info("copying %s back from virtual server's %s", self.testbed_dist, self.local_dist)
         testbed.command('copyup', (self.testbed_dist, os.path.join(self.local_dist, '')))
 
-    def run_build(self, testbed, build, artifact_pattern, testbed_build_pre, no_clean_on_error):
+    def run_build(self, testbed, build, old_env, artifact_pattern, testbed_build_pre, no_clean_on_error):
         logging.info("starting build with source directory: %s, artifact pattern: %s",
             self.testbed_src, artifact_pattern)
         # we remove existing artifacts in case the build doesn't overwrite it
@@ -192,10 +192,12 @@ class BuildContext(collections.namedtuple('_BuildContext',
         # this dance is necessary because the cwd can't be cd'd into during the
         # setup phase under some variations like user_group
         new_script = build.to_script(no_clean_on_error)
-        logging.info("executing build...")
+        logging.info("executing build in %s ...", build.tree)
+        logging.debug("#### REPROTEST BUILD ENVVARS ##################################################")
+        logging.debug("\n".join(environ.env_diff(old_env, build.env)))
         logging.debug(new_script)
         testbed.check_exec2(['sh', '-ec', new_script],
-            xenv=['%s=%s' % (k, v) for k, v in build.env.items()],
+            xenv=['-i'] + ['%s=%s' % (k, v) for k, v in build.env.items()],
             kind='build')
         dist_base = os.path.join(self.testbed_dist, VSRC_DIR)
         testbed.check_exec2(shell_copy_pattern(dist_base, self.testbed_src, artifact_pattern))
@@ -301,7 +303,7 @@ class TestArgs(collections.namedtuple('_Test',
 
                     build = bctx.make_build_commands(build_command, os.environ)
                     bctx.copydown(testbed)
-                    bctx.run_build(testbed, build, artifact_pattern, testbed_build_pre, no_clean_on_error)
+                    bctx.run_build(testbed, build, os.environ, artifact_pattern, testbed_build_pre, no_clean_on_error)
                     bctx.copyup(testbed)
 
                     name_variation = yield bctx.local_dist
@@ -398,6 +400,49 @@ def check_auto(test_args, testbed_args, build_variations=Variations.of(Variation
         return False
 
 
+def check_env(test_args, testbed_args, build_variations=Variations.of(VariationSpec.default())):
+    # default argument [] is safe here because we never mutate it.
+    _, _, artifact_pattern, store_dir, _, _, diffoscope_args = test_args
+    with empty_or_temp_dir(store_dir, "store_dir") as result_dir:
+        assert store_dir == result_dir or store_dir is None
+        proc = test_args._replace(result_dir=result_dir).corun_builds(testbed_args)
+
+        var_x0, var_x1 = build_variations
+        dist_x0 = proc.send(("control", var_x0))
+        is_reproducible = lambda name, var: test_args.check_reproducible(proc, dist_x0, name, var)
+
+        orig_variations = var_x1.spec.variations()
+        only_varying_env = (len(orig_variations) == 0 or
+            len(orig_variations) == 1 and "environment" in orig_variations)
+
+        blacklist, blacklist_names, non_whitelist, non_whitelist_names = environ.generate_dummy_environ()
+
+        # Test blacklist
+        var_x1 = var_x1.replace.spec.extend("environment")
+        var_x1 = var_x1.replace.spec.environment.extend_variables(*blacklist)
+        if not is_reproducible("blacklist", var_x1):
+            print("Unreproducible even when varying blacklisted envvars: ", ", ".join(sorted(blacklist_names)))
+            if not only_varying_env:
+                print("This may or may not be caused by other factors; try re-running this again with --vary=-all")
+            else:
+                print("You are highly recommended to make your program reproducible when varying these.")
+            return False
+
+        # Test non-whitelist
+        var_x2 = var_x1.replace.spec.environment.extend_variables(*non_whitelist)
+        if not is_reproducible("non-whitelist", var_x2):
+            print("Unreproducible when varying unknown envvars: ", ", ".join(sorted(non_whitelist_names)))
+            print("Please file a bug to reprotest to add these to the whitelist or blacklist, to be decided.")
+            print("If blacklist, then you should also make your program reproducible when varying them.")
+            return False
+
+        print("Reproducible, even when varying known blacklisted and unknown non-whitelisted envvars! :)")
+        test_args.output_reproducible_hashes(dist_x0)
+        if orig_variations != VariationSpec.all_names():
+            print("However, other factors may still make the build unreproducible; try re-running with --vary=+all.")
+        return True
+
+
 def config_to_args(parser, filename):
     if not filename:
         return []
@@ -514,6 +559,12 @@ def cli_parser():
         'variations cause unreproducibility, potentially up to and including '
         'the ones specified by --variations and --vary. Conflicts with '
         '--extra-build.')
+    group1_0.add_argument('--env-build', default=False, action='store_true',
+        help='Automatically perform builds to try to determine which specific '
+        'environment variables cause unreproducibility, based on a hard-coded '
+        'whitelist and blacklist. You probably want to set --vary=-all as well '
+        'when setting this flag; see the man page for details. Conflicts with '
+        '--extra-build and --auto-build.')
     # TODO: remove after reprotest 0.8
     group1.add_argument('--dont-vary', default=[], action='append', help=argparse.SUPPRESS)
 
@@ -684,6 +735,8 @@ def run(argv, dry_run=None):
     specs = [spec]
     if parsed_args.auto_build:
         check_func = check_auto
+    elif parsed_args.env_build:
+        check_func = check_env
     else:
         for extra_build in parsed_args.extra_build:
             specs.append(spec.extend(extra_build))
diff --git a/reprotest/build.py b/reprotest/build.py
index cb0dcfd..ca99a12 100644
--- a/reprotest/build.py
+++ b/reprotest/build.py
@@ -13,6 +13,7 @@ import random
 import time
 import types
 
+from reprotest import environ
 from reprotest import mdiffconf
 from reprotest import shell_syn
 from reprotest.utils import AttributeReplacer
@@ -82,6 +83,15 @@ class Build(collections.namedtuple('_Build', 'build_command setup cleanup env tr
         new_mapping[key] = value
         return self._replace(env=types.MappingProxyType(new_mapping))
 
+    def modify_env(self, add, rem):
+        '''Helper function for adding a key-value pair to an immutable mapping.'''
+        new_mapping = self.env.copy()
+        for k, v in add:
+            new_mapping[k] = v
+        for k in rem:
+            del new_mapping[k]
+        return self._replace(env=types.MappingProxyType(new_mapping))
+
     def prepend_to_build_command(self, *prefix):
         '''Prepend a wrapper command onto the build_command.'''
         new_command = shell_syn.Command(
@@ -149,7 +159,7 @@ else
     exit $__x
 fi
 """
-            return """
+            return """\
 #### BEGIN REPROTEST BUILD SCRIPT ##############################################
 run_build() {{
     {0}
@@ -179,7 +189,13 @@ cleanup() {{
 def environment(ctx, build, vary):
     if not vary:
         return build
-    return build.add_env('CAPTURE_ENVIRONMENT', 'i_capture_the_environment')
+    added, removed = [], []
+    for k, v in environ.parse_environ_templates(ctx.spec.environment.variables):
+        if v is None:
+            removed += [k]
+        else:
+            added += [(k, v)]
+    return build.modify_env(added, removed)
 
 # FIXME: this requires superuser privileges.
 # Probably need to couple with "namespace" UTS unshare when not running in a
@@ -338,7 +354,8 @@ def user_group(ctx, build, vary):
         sudo_command = ('sudo', '-E', '-u', user)
     binpath = os.path.join(dirname(build.tree), 'bin')
 
-    _ = build.prepend_to_build_command(*sudo_command)
+    _ = build.prepend_to_build_command(*sudo_command,
+        *["env", "-u", "SUDO_COMMAND", "-u", "SUDO_GID", "-u", "SUDO_UID", "-u", "SUDO_USER"])
     # disorderfs needs to run as a different user.
     # we prefer that to running it as root, principle of least-privilege.
     _ = _.append_setup_exec('sh', '-ec', r'''
@@ -405,6 +422,15 @@ class TimeVariation(collections.namedtuple('_TimeVariation', 'faketimes auto_fak
         return self.empty()._replace(faketimes=self.faketimes + new_faketimes)
 
 
+class EnvironmentVariation(collections.namedtuple("_EnvironmentVariation", "variables")):
+    @classmethod
+    def default(cls):
+        return cls(mdiffconf.strlist_set(";", ["REPROTEST_CAPTURE_ENVIRONMENT"]))
+
+    def extend_variables(self, *ks):
+        return self._replace(variables=self.variables + list(ks))
+
+
 class UserGroupVariation(collections.namedtuple('_UserGroupVariation', 'available')):
     @classmethod
     def default(cls):
@@ -415,6 +441,7 @@ class VariationSpec(mdiffconf.ImmutableNamespace):
     @classmethod
     def default(cls, variations=VARIATIONS):
         default_overrides = {
+            "environment": EnvironmentVariation.default(),
             "user_group": UserGroupVariation.default(),
             "time": TimeVariation.default(),
         }
diff --git a/reprotest/environ.py b/reprotest/environ.py
new file mode 100644
index 0000000..2f53e40
--- /dev/null
+++ b/reprotest/environ.py
@@ -0,0 +1,126 @@
+# Licensed under the GPL: https://www.gnu.org/licenses/gpl-3.0.en.html
+# For details: reprotest/debian/copyright
+
+import re
+import rstr
+import os
+
+
+xe_small = {
+    "path": "(/\w{1,12}){1,4}",
+    "port": "([1-9]\d{0,3}|[1-5]\d{4})",
+    "domain": "\w{1,10}(\.\w{1,10}){0,3}",
+    "password": "\w{1,40}",
+    "username": "\w{2,20}",
+}
+
+xe_medium = dict(**{
+    "proxy_url" : "%(username)s:%(password)s@%(domain)s:%(port)s" % xe_small,
+    "pathlist": "%(path)s(:%(path)s){0,4}" % xe_small,
+}, **xe_small)
+
+
+"""
+Variables intended to control the behaviour of general run-time programs that
+include non-build and non-developer programs.
+
+See also:
+- http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
+"""
+# TODO: TMPDIR needs special treatment, make it a proper variation
+BLACKLIST = (r"""
+HOME LOGNAME USER USERNAME
+_ LANG LANGUAGE LC_\w+ MSGVERB OLDPWD PWD SHELL SHLVL TZ
+TMPDIR=(/tmp|/var/tmp|/dev/shm)
+COLORTERM COLUMNS DATEMSK EDITOR LINES LS_COLORS TERM VISUAL VTE_VERSION
+PAGER MAIL BROWSER
+ftp_proxy=ftp://%(proxy_url)s http_proxy=http://%(proxy_url)s https_proxy=https://%(proxy_url)s
+MANPATH=%(pathlist)s INFODIR=%(pathlist)s
+DISPLAY WINDOWID XAUTHORITY XMODIFIERS
+DBUS_SESSION_\w+ DESKTOP_SESSION GDMSESSION ICEAUTHORITY SESSION_MANAGER XDG_\w+ \w+_SOCKET
+QT_\w+ GTK_\w+ \w+_IM_MODULE
+SSH_\w+ GNUPG\w+ GPG_\w+
+DEBEMAIL DEBFULLNAME
+SUDO_COMMAND SUDO_GID SUDO_UID SUDO_USER
+""" % xe_medium).split()
+
+
+"""
+Variables intended to control the output of build processes, or interpreter
+settings that "normal users" aren't expected to customise in most situations.
+
+Notes:
+
+- Path variables are subtle, we keep many of them here to avoid false-positives
+  and breaking builds, but ideally they would be "in the blacklist if contents
+  are the same, else in the whitelist if contents differ".
+"""
+WHITELIST = r"""
+CC CPP CXX FC F GCJ LD OBJC OBJCXX RUSTC LEX YACC
+CFLAGS CPPFLAGS CXXFLAGS FCFLAGS FFLAGS GCJFLAGS LDFLAGS OBJCFLAGS OBJCXXFLAGS RUSTFLAGS
+DEB_\w+ DPKG_\w+
+PATH JAVA_HOME GOPATH LD_PRELOAD LD_LIBRARY_PATH PERL5LIB PYTHONPATH
+SOURCE_DATE_EPOCH BUILD_PATH_PREFIX_MAP
+""".split()
+
+
+"""
+Some stuff breaks when you unset certain vars, e.g. diffoscope breaks if PATH
+is unset. technically these are bugs, but they are so prevalent and we'd like
+to focus on more important things first.
+
+TODO: make it possible to clear this list on the command line.
+"""
+NEVER_UNSET = "HOME PATH USER LOGNAME PWD".split()
+
+
+def parse_environ_templates(variables):
+    for tmpl in variables:
+        k, sep, v = tmpl.partition("=")
+        if not v and sep:
+            yield (k, None)
+        else:
+            yield (rstr.xeger(k), rstr.xeger(v) or "i_capture_the_environment")
+
+
+def generate_dummy_environ(env=None, blacklist=BLACKLIST, whitelist=WHITELIST, never_unset=NEVER_UNSET):
+    if env is None:
+        env = os.environ
+    env = set(env.keys()) - set(never_unset)
+
+    def generate(name, variables):
+        for tmpl in variables:
+            k, sep, v = tmpl.partition("=")
+            if re.match(k, name):
+                # unset (if v empty), or generate random value matching v
+                yield (name, "%s=%s" % (name, v))
+
+    blacklist_matches = [m for n in env for m in generate(n, blacklist)]
+    # generate overrides for existing vars, and possibly generate new vars
+    b = [m[1] for m in blacklist_matches] + blacklist
+    bn = sorted(set([m[0] for m in blacklist_matches] + blacklist))
+
+    def matches(name, pp):
+        return any(re.match(p, name) for p in pp)
+    blacklist_names = [t.partition("=")[0] for t in blacklist]
+    whitelist_names = [t.partition("=")[0] for t in whitelist]
+
+    unrecognized = sorted(n for n in env
+        if (not matches(n, blacklist_names)
+        and not matches(n, whitelist_names)))
+    extra_unknown = ["[A-Z]{2,5}(_[A-Z]{2,5}){1,3}",
+                     "[A-Z]{2,5}(_[A-Z]{2,5}){1,3}",
+                     "REPROTEST_CAPTURE_ENVIRONMENT_UNKNOWN_\w+"]
+
+    # unset unrecognized stuff in the current env that doesn't match the
+    # whitelist or blacklist, which we set earlier
+    nw = ["%s=" % k for k in unrecognized] + extra_unknown
+    nwn = unrecognized + extra_unknown
+
+    return b, bn, nw, nwn
+
+
+def env_diff(old, new):
+    diff = ["-%s" % k for k in old.keys() - new.keys()] + \
+           ["+%s=%s" % (k, v) for k, v in new.items() if k in old and v != old[k]]
+    return sorted(diff, key=lambda x: x[1:])
diff --git a/setup.py b/setup.py
index 51ef252..ace7a42 100644
--- a/setup.py
+++ b/setup.py
@@ -6,11 +6,11 @@
 from setuptools import setup, find_packages
 
 setup(name='reprotest',
-      version='0.7.2',
+      version='0.7.3',
       description='Build packages and check them for reproducibility.',
       long_description=open('README.rst', encoding='utf-8').read(),
-      author='Ceridwen',
-      author_email='ceridwenv at gmail.com',
+      author='Ximin Luo, Ceridwen',
+      author_email='infinity0 at debian.org, ceridwenv at gmail.com',
       license='GPL-3+',
       url='https://anonscm.debian.org/cgit/reproducible/reprotest.git/',
       packages=find_packages(),
@@ -21,6 +21,7 @@ setup(name='reprotest',
           },
       install_requires=[
           'diffoscope',
+          'rstr',
           ],
       classifiers=[
           'Development Status :: 3 - Alpha',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/reprotest.git



More information about the Reproducible-commits mailing list