[reprotest] 01/01: main: Add a --env-build option for testing different env vars

Ximin Luo infinity0 at debian.org
Mon Sep 25 19:10:03 UTC 2017


This is an automated email from the git hooks/post-receive script.

infinity0 pushed a commit to branch env-build
in repository reprotest.

commit 51835682f078a1db301cc3911a38e04e1f1ec63d
Author: Ximin Luo <infinity0 at debian.org>
Date:   Mon Sep 25 21:09:26 2017 +0200

    main: Add a --env-build option for testing different env vars
---
 README.rst            |  14 +++++
 debian/changelog      |   2 +
 reprotest/__init__.py | 138 +++++++++++++++++++++++++++++++++++++++++++++-----
 reprotest/build.py    |  31 +++++++++++-
 4 files changed, 171 insertions(+), 14 deletions(-)

diff --git a/README.rst b/README.rst
index 8e28b3e..0c0ed7a 100644
--- a/README.rst
+++ b/README.rst
@@ -218,6 +218,20 @@ of names is given in the --help text for --variations.
 Most variations do not have parameters, and for them only the + and - operators
 are relevant. The variations that accept parameters are:
 
+environment.variables
+    A semicolon-separated ordered set, specifying environment variables that
+    reprotest should try to vary. Default is "REPROTEST_CAPTURE_ENVIRONMENT".
+    Supports regex-based syntax e.g.
+
+    - PID=\d{1,6}
+    - HOME=(/\w{3,12}){1,4}
+    - (GO|PYTHON|)PATH=(/\w{3,12}){1,4}(:(/\w{3,12}){1,4}){3,12}
+
+    Special cases:
+
+    - XDG_RUNTIME_DIR= (empty RHS) to tell reprotest to delete the variable
+    - $VARNAME=.{0} to tell reprotest to actually set an empty value
+    - \\x2c and \\x3b to match or generate , and ; respectively.
 user_group.available
     A semicolon-separated ordered set, specifying the available user+group
     combinations that reprotest can ``sudo(1)`` to. Default is empty, in which
diff --git a/debian/changelog b/debian/changelog
index 7735988..0075e7a 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,8 @@ reprotest (0.7.1) UNRELEASED; urgency=medium
   * Add a --auto-build option to try to determine which specific variations
     cause unreproducibility.
   * Fix varying both umask and user_group at the same time.
+  * Add a --env-build option to try to determine which (known and unknown)
+    environment variables cause reproducibility.
 
  -- Ximin Luo <infinity0 at debian.org>  Fri, 22 Sep 2017 17:57:31 +0200
 
diff --git a/reprotest/__init__.py b/reprotest/__init__.py
index 88e721f..6ec4a16 100644
--- a/reprotest/__init__.py
+++ b/reprotest/__init__.py
@@ -21,7 +21,7 @@ import pkg_resources
 
 from reprotest.lib import adtlog
 from reprotest.lib import adt_testbed
-from reprotest.build import Build, VariationSpec, Variations
+from reprotest.build import Build, VariationSpec, Variations, environment_template_varname
 from reprotest import presets, shell_syn
 
 
@@ -302,6 +302,18 @@ def check(build_command, source_root, artifact_pattern, store_dir=None, no_clean
         return not retcode
 
 
+def check_reproducible(proc, name, var, dist_control, diffoscope_args, store_dir):
+    dist_test = proc.send(("experiment-%s" % name, var))
+    # TODO: handle exit codes > 1 correctly, raise a CalledProcessError
+    retcode = run_diff(dist_control, dist_test, diffoscope_args, store_dir)
+    if retcode == 0:
+        return True
+    elif retcode == 1:
+        return False
+    else:
+        raise RuntimeError("diffoscope exited non-boolean %s, can't continue" % retcode)
+
+
 def check_auto(build_command, source_root, artifact_pattern, store_dir=None, no_clean_on_error=False,
           virtual_server_args=[], testbed_pre=None, testbed_init=None, host_distro='debian',
           build_variations=Variations.of(VariationSpec.default()), diffoscope_args=[]):
@@ -314,17 +326,7 @@ def check_auto(build_command, source_root, artifact_pattern, store_dir=None, no_
 
         var_x0, var_x1 = build_variations
         dist_x0 = proc.send(("control", var_x0))
-
-        def is_reproducible(name, var):
-            dist_test = proc.send(("experiment-%s" % name, var))
-            # TODO: handle exit codes > 1 correctly, raise a CalledProcessError
-            retcode = run_diff(dist_x0, dist_test, diffoscope_args, store_dir)
-            if retcode == 0:
-                return True
-            elif retcode == 1:
-                return False
-            else:
-                raise RuntimeError("diffoscope exited non-boolean %s, can't continue" % retcode)
+        is_reproducible = lambda name, var: check_reproducible(proc, name, var, dist_x0, diffoscope_args, store_dir)
 
         if not is_reproducible("0", var_x0):
             print("Not reproducible, even when fixing as much as reprotest knows how to. :(")
@@ -340,7 +342,7 @@ def check_auto(build_command, source_root, artifact_pattern, store_dir=None, no_
         varnames = VariationSpec.all_names()
         random.shuffle(varnames)
         for v in varnames:
-            var_test = var_cur._replace(spec=var_cur.spec._replace(**{v: var_x1.spec[v]}))
+            var_test = var_cur.replace_spec(**{v: var_x1.spec[v]})
             if is_reproducible(v, var_test):
                 # vary it for the next test as well, it's OK to vary it
                 var_cur = var_test
@@ -354,6 +356,108 @@ def check_auto(build_command, source_root, artifact_pattern, store_dir=None, no_
         return False
 
 
+def check_env(build_command, source_root, artifact_pattern, store_dir=None, no_clean_on_error=False,
+          virtual_server_args=[], testbed_pre=None, testbed_init=None, host_distro='debian',
+          build_variations=Variations.of(VariationSpec.default()), diffoscope_args=[]):
+    # default argument [] is safe here because we never mutate it.
+    with empty_or_temp_dir(store_dir, "store_dir") as result_dir:
+        assert store_dir == result_dir or store_dir is None
+        proc = corun_builds(
+            build_command, source_root, artifact_pattern, result_dir, no_clean_on_error,
+            virtual_server_args, testbed_pre, testbed_init, host_distro)
+
+        # Variables intended to control the behaviour of general run-time
+        # programs that include non-build and non-developer programs.
+        # TODO: might have to rm USER etc if it makes builds buggy...
+        # (or add a whitelist to EnvironmentVariation for the user to override)
+        blacklist = r"""
+            HOME LOGNAME USER USERNAME
+            _ COLORTERM EDITOR LS_COLORS OLDPWD PWD SHELL SHLVL TERM VISUAL VTE_VERSION
+            LANG LANGUAGE LC_\w+
+            DISPLAY WINDOWID XAUTHORITY XMODIFIERS
+            DBUS_SESSION_\w+ DESKTOP_SESSION GDMSESSION SESSION_MANAGER XDG_\w+ \w+_SOCKET
+            QT_\w+ GTK_\w+ \w+_IM_MODULE
+            SSH_\w+ GNUPG\w+ GPG_\w+
+            DEBEMAIL DEBFULLNAME
+        """.split()
+        blacklist_names = [environment_template_varname(t) for t in blacklist]
+        # some stuff breaks when you unset certain vars, e.g. diffoscope
+        # breaks if PATH is unset. this specific example doesn't affect us here
+        # because diffoscope runs outside of the testbed, but others might.
+        never_unset = "HOME PATH".split()
+
+        # Variables intended to control the output of build processes, or (e.g.)
+        # interpreter settings that "normal users" aren't expected to need to
+        # customise in normal situations. Notes:
+        # - "path" vars are subtle, we keep them here to avoid false-positives
+        #   and breaking builds but ideally they would be "in the blacklist if
+        #   contents differ, else in the whitelist"
+        whitelist = r"""
+            CC CPP CXX FC F GCJ LD OBJC OBJCXX RUSTC
+            CFLAGS CPPFLAGS CXXFLAGS FCFLAGS FFLAGS GCJFLAGS LDFLAGS OBJCFLAGS OBJCXXFLAGS RUSTFLAGS
+            DEB_\w+ DPKG_\w+
+            (|GO|PYTHON|MAN)PATH
+            PERL5LIB
+        """.split()
+
+        def matches(name, pp):
+            return any(re.match(p, name) for p in pp)
+
+        var_x0, var_x1 = build_variations
+        dist_x0 = proc.send(("control", var_x0))
+        is_reproducible = lambda name, var: check_reproducible(proc, name, var, dist_x0, diffoscope_args, store_dir)
+
+        orig_variations = var_x1.spec.variations()
+        only_varying_env = (len(orig_variations) == 0 or
+            len(orig_variations) == 1 and "environment" in orig_variations)
+        env = set(os.environ.keys()) - set(never_unset)
+
+        # Test blacklist
+        blacklist_matches = sorted(n for n in env
+            if matches(n, blacklist_names) and n not in never_unset)
+        spec_x1 = var_x1.spec.extend("environment")
+        spec_x1 = spec_x1._replace(environment=spec_x1.environment.extend_variables(
+            # unset stuff in the current env that matches the blacklist
+            *("%s=" % k for k in blacklist_matches),
+            # generate new stuff according to the blacklist templates
+            *blacklist))
+        var_x1 = var_x1._replace(spec=spec_x1)
+        if not is_reproducible("blacklist", var_x1):
+            print("Unreproducible even when varying blacklisted envvars: ", ", ".join(sorted(blacklist_matches + blacklist)))
+            if not only_varying_env:
+                print("This may be caused by other factors; try re-running this again with --vary=-all")
+            else:
+                print("You should (probably) make your program reproducible when varying these.")
+            return False
+
+        # Test non-whitelist
+        unrecognized = sorted(n for n in env
+            if not matches(n, blacklist_names) and not matches(n, whitelist))
+        extra_blacklist = ["REPROTEST_CAPTURE_ENVIRONMENT_UNKNOWN_\w+"]
+        spec_x2 = spec_x1._replace(environment=spec_x1.environment.extend_variables(
+            # unset stuff in the current env that doesn't match the whitelist (or blacklist, which we set earlier)
+            *("%s=" % k for k in unrecognized),
+            # TODO: could also generate some totally random vars
+            *extra_blacklist))
+        var_x2 = var_x1._replace(spec=spec_x2)
+        if not is_reproducible("non-whitelist", var_x2):
+            print("Unreproducible when varying unknown envvars: ", ", ".join(sorted(extra_blacklist + unrecognized)))
+            print("Please file a bug to reprotest to add these to the whitelist or blacklist, to be decided.")
+            print("If blacklist is chosen, then you should also make your program reproducible when varying them.")
+            return False
+
+        print("=======================")
+        print("Reproduction successful")
+        print("=======================")
+        print("No differences in %s" % artifact_pattern, flush=True)
+        run_or_tee(['sh', '-ec', 'find %s -type f -exec sha256sum "{}" \;' % artifact_pattern],
+            'SHA256SUMS', store_dir,
+            cwd=os.path.join(dist_x0, VSRC_DIR))
+        if orig_variations != VariationSpec.all_names():
+            print("However, other factors may still make the build unreproducible; try re-running with --vary=+all.")
+        return True
+
+
 def config_to_args(parser, filename):
     if not filename:
         return []
@@ -466,6 +570,12 @@ def cli_parser():
         'variations cause unreproducibility, potentially up to and including '
         'the ones specified by --variations and --vary. Conflicts with '
         '--extra-build.')
+    group1_0.add_argument('--env-build', default=False, action='store_true',
+        help='Automatically perform builds to try to determine which specific '
+        'environment variables cause unreproducibility, based on a hard-coded '
+        'whitelist and blacklist. You probably want to set --vary=-all as well '
+        'when setting this flag; see the man page for details. Conflicts with '
+        '--extra-build and --auto-build.')
     # TODO: remove after reprotest 0.8
     group1.add_argument('--dont-vary', default=[], action='append', help=argparse.SUPPRESS)
 
@@ -616,6 +726,8 @@ def run(argv, dry_run=None):
     specs = [spec]
     if parsed_args.auto_build:
         check_func = check_auto
+    elif parsed_args.env_build:
+        check_func = check_env
     else:
         for extra_build in parsed_args.extra_build:
             specs.append(spec.extend(extra_build))
diff --git a/reprotest/build.py b/reprotest/build.py
index 03c1d03..0a6fd5d 100644
--- a/reprotest/build.py
+++ b/reprotest/build.py
@@ -4,10 +4,12 @@
 import collections
 import getpass
 import grp
+import itertools
 import logging
 import os
 import shlex
 import random
+import rstr
 import time
 import types
 
@@ -156,10 +158,24 @@ fi
 # def cpu(script, env, tree):
 #     return script, env, tree
 
+def environment_template_varname(tmpl):
+    return tmpl.partition("=")[0]
+
 def environment(ctx, build, vary):
     if not vary:
         return build
-    return build.add_env('CAPTURE_ENVIRONMENT', 'i_capture_the_environment')
+    removed = []
+    for tmpl in ctx.spec.environment.variables:
+        k, sep, v = tmpl.partition("=")
+        # TODO: support rstr xeger generation of the key and value
+        if not v and sep:
+            removed += [k]
+        else:
+            build = build.add_env(rstr.xeger(k), rstr.xeger(v) or "i_capture_the_environment")
+    if removed:
+        command = ["env"] + list(itertools.chain.from_iterable(zip(itertools.repeat("-u"), removed)))
+        build = build.append_to_build_command(_shell_ast.SimpleCommand.make(*command))
+    return build
 
 # TODO: this requires superuser privileges.
 # def domain_host(ctx, script, env, tree):
@@ -380,6 +396,15 @@ class TimeVariation(collections.namedtuple('_TimeVariation', 'faketimes auto_fak
         return self.empty()._replace(faketimes=self.faketimes + new_faketimes)
 
 
+class EnvironmentVariation(collections.namedtuple("_EnvironmentVariation", "variables")):
+    @classmethod
+    def default(cls):
+        return cls(mdiffconf.strlist_set(";", ["REPROTEST_CAPTURE_ENVIRONMENT"]))
+
+    def extend_variables(self, *ks):
+        return self._replace(variables=self.variables + list(ks))
+
+
 class UserGroupVariation(collections.namedtuple('_UserGroupVariation', 'available')):
     @classmethod
     def default(cls):
@@ -390,6 +415,7 @@ class VariationSpec(mdiffconf.ImmutableNamespace):
     @classmethod
     def default(cls, variations=VARIATIONS):
         default_overrides = {
+            "environment": EnvironmentVariation.default(),
             "user_group": UserGroupVariation.default(),
             "time": TimeVariation.default(),
         }
@@ -434,6 +460,9 @@ class Variations(collections.namedtuple('_Variations', 'spec verbosity')):
     def of(cls, *specs, zero=VariationSpec.empty(), verbosity=0):
         return [cls(spec, verbosity) for spec in [zero] + list(specs)]
 
+    def replace_spec(self, *args, **kwargs):
+        return self._replace(spec=self.spec._replace(*args, **kwargs))
+
 
 if __name__ == "__main__":
     import sys

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/reprotest.git



More information about the Reproducible-commits mailing list