[reprotest] 01/01: main: Add a --env-build option for testing different env vars
Ximin Luo
infinity0 at debian.org
Mon Oct 2 16:50:04 UTC 2017
This is an automated email from the git hooks/post-receive script.
infinity0 pushed a commit to branch env-build
in repository reprotest.
commit 1511a25db9fc257c1771bcfc53d343aeaa2a0b83
Author: Ximin Luo <infinity0 at debian.org>
Date: Mon Oct 2 18:49:21 2017 +0200
main: Add a --env-build option for testing different env vars
---
README.rst | 14 +++++++
debian/changelog | 2 +
debian/control | 2 +
reprotest/__init__.py | 53 +++++++++++++++++++++++++-
reprotest/build.py | 23 ++++++++++-
reprotest/environ.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
setup.py | 5 ++-
7 files changed, 198 insertions(+), 4 deletions(-)
diff --git a/README.rst b/README.rst
index aebef9a..0b901f9 100644
--- a/README.rst
+++ b/README.rst
@@ -216,6 +216,20 @@ of names is given in the --help text for --variations.
Most variations do not have parameters, and for them only the + and - operators
are relevant. The variations that accept parameters are:
+environment.variables
+ A semicolon-separated ordered set, specifying environment variables that
+ reprotest should try to vary. Default is "REPROTEST_CAPTURE_ENVIRONMENT".
+ Supports regex-based syntax e.g.
+
+ - PID=\d{1,6}
+ - HOME=(/\w{3,12}){1,4}
+ - (GO|PYTHON|)PATH=(/\w{3,12}){1,4}(:(/\w{3,12}){1,4}){3,12}
+
+ Special cases:
+
+ - $VARNAME= (empty RHS) to tell reprotest to delete the variable
+ - $VARNAME=.{0} to tell reprotest to actually set an empty value
+ - \\x2c and \\x3b to match or generate , and ; respectively.
user_group.available
A semicolon-separated ordered set, specifying the available user+group
combinations that reprotest can ``sudo(1)`` to. Default is empty, in which
diff --git a/debian/changelog b/debian/changelog
index e2943da..81e4a18 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -10,6 +10,8 @@ reprotest (0.7.1) UNRELEASED; urgency=medium
this automatically in our presets.
* Pass --exclude-directory-metadata to diffoscope(1) by default as this is
the majority use-case. Document the other cases in README and the man page.
+ * Add a --env-build option to try to determine which (known and unknown)
+ environment variables cause reproducibility.
-- Ximin Luo <infinity0 at debian.org> Fri, 22 Sep 2017 17:57:31 +0200
diff --git a/debian/control b/debian/control
index adbd892..0a931a5 100644
--- a/debian/control
+++ b/debian/control
@@ -19,6 +19,7 @@ Build-Depends: debhelper (>= 10),
locales-all <!nocheck>,
python3-coverage <!nocheck>,
python3-pytest <!nocheck>,
+ python3-rstr <!nocheck>,
tox <!nocheck>,
# these below helps diffoscope produce nicer output in tests
python3-tlsh <!nocheck>,
@@ -36,6 +37,7 @@ Depends: ${python3:Depends},
libdpkg-perl,
procps,
python3-pkg-resources,
+ python3-rstr,
${misc:Depends}
Recommends: diffutils | diffoscope (>= 84),
disorderfs (>= 0.5.2),
diff --git a/reprotest/__init__.py b/reprotest/__init__.py
index 535ca98..e88e5c2 100644
--- a/reprotest/__init__.py
+++ b/reprotest/__init__.py
@@ -21,7 +21,7 @@ import pkg_resources
from reprotest.lib import adtlog
from reprotest.lib import adt_testbed
from reprotest.build import Build, VariationSpec, Variations, tool_missing
-from reprotest import presets, shell_syn
+from reprotest import environ, presets, shell_syn
VIRT_PREFIX = "autopkgtest-virt-"
@@ -285,6 +285,7 @@ class TestArgs(collections.namedtuple('_Test',
build = bctx.make_build_commands(
'cd "$REPROTEST_BUILD_PATH"; unset REPROTEST_BUILD_PATH; ' +
'umask "$REPROTEST_UMASK"; unset REPROTEST_UMASK; ' +
+ 'unset SUDO_COMMAND; ' +
build_command, os.environ)
logging.log(5, "build %s: %r", name, build)
build = bctx.plan_variations(build)
@@ -391,6 +392,48 @@ def check_auto(test_args, testbed_args, build_variations=Variations.of(Variation
return False
+def check_env(test_args, testbed_args, build_variations=Variations.of(VariationSpec.default())):
+ # default argument [] is safe here because we never mutate it.
+ _, _, artifact_pattern, store_dir, _, _, diffoscope_args = test_args
+ with empty_or_temp_dir(store_dir, "store_dir") as result_dir:
+ assert store_dir == result_dir or store_dir is None
+ proc = test_args._replace(result_dir=result_dir).corun_builds(testbed_args)
+
+ var_x0, var_x1 = build_variations
+ dist_x0 = proc.send(("control", var_x0))
+ is_reproducible = lambda name, var: test_args.check_reproducible(proc, dist_x0, name, var)
+
+ orig_variations = var_x1.spec.variations()
+ only_varying_env = (len(orig_variations) == 0 or
+ len(orig_variations) == 1 and "environment" in orig_variations)
+
+ blacklist, blacklist_names, non_whitelist, non_whitelist_names = environ.generate_dummy_environ()
+
+ # Test blacklist
+ var_x1 = var_x1.replace.spec.extend("environment")
+ var_x1 = var_x1.replace.spec.environment.extend_variables(*blacklist)
+ if not is_reproducible("blacklist", var_x1):
+ print("Unreproducible even when varying blacklisted envvars: ", ", ".join(sorted(blacklist_names)))
+ if not only_varying_env:
+ print("This may or may not be caused by other factors; try re-running this again with --vary=-all")
+ else:
+ print("You are highly recommended to make your program reproducible when varying these.")
+ return False
+
+ # Test non-whitelist
+ var_x2 = var_x1.replace.spec.environment.extend_variables(*non_whitelist)
+ if not is_reproducible("non-whitelist", var_x2):
+ print("Unreproducible when varying unknown envvars: ", ", ".join(sorted(non_whitelist_names)))
+ print("Please file a bug to reprotest to add these to the whitelist or blacklist, to be decided.")
+ print("If blacklist, then you should also make your program reproducible when varying them.")
+ return False
+
+ test_args.output_reproducible_hashes(dist_x0)
+ if orig_variations != VariationSpec.all_names():
+ print("However, other factors may still make the build unreproducible; try re-running with --vary=+all.")
+ return True
+
+
def config_to_args(parser, filename):
if not filename:
return []
@@ -507,6 +550,12 @@ def cli_parser():
'variations cause unreproducibility, potentially up to and including '
'the ones specified by --variations and --vary. Conflicts with '
'--extra-build.')
+ group1_0.add_argument('--env-build', default=False, action='store_true',
+ help='Automatically perform builds to try to determine which specific '
+ 'environment variables cause unreproducibility, based on a hard-coded '
+ 'whitelist and blacklist. You probably want to set --vary=-all as well '
+ 'when setting this flag; see the man page for details. Conflicts with '
+ '--extra-build and --auto-build.')
# TODO: remove after reprotest 0.8
group1.add_argument('--dont-vary', default=[], action='append', help=argparse.SUPPRESS)
@@ -670,6 +719,8 @@ def run(argv, dry_run=None):
specs = [spec]
if parsed_args.auto_build:
check_func = check_auto
+ elif parsed_args.env_build:
+ check_func = check_env
else:
for extra_build in parsed_args.extra_build:
specs.append(spec.extend(extra_build))
diff --git a/reprotest/build.py b/reprotest/build.py
index be22ff6..195146b 100644
--- a/reprotest/build.py
+++ b/reprotest/build.py
@@ -5,6 +5,7 @@ import collections
import functools
import getpass
import grp
+import itertools
import logging
import os
import shlex
@@ -14,6 +15,7 @@ import time
import types
from reprotest import _shell_ast
+from reprotest import environ
from reprotest import mdiffconf
from reprotest.utils import AttributeReplacer
@@ -178,7 +180,16 @@ fi
def environment(ctx, build, vary):
if not vary:
return build
- return build.add_env('CAPTURE_ENVIRONMENT', 'i_capture_the_environment')
+ removed = []
+ for k, v in environ.parse_environ_templates(ctx.spec.environment.variables):
+ if v is None:
+ removed += [k]
+ else:
+ build = build.add_env(k, v)
+ if removed:
+ command = ["env"] + list(itertools.chain.from_iterable(zip(itertools.repeat("-u"), removed)))
+ build = build.append_to_build_command(_shell_ast.SimpleCommand.make(*command))
+ return build
# TODO: this requires superuser privileges.
# def domain_host(ctx, script, env, tree):
@@ -402,6 +413,15 @@ class TimeVariation(collections.namedtuple('_TimeVariation', 'faketimes auto_fak
return self.empty()._replace(faketimes=self.faketimes + new_faketimes)
+class EnvironmentVariation(collections.namedtuple("_EnvironmentVariation", "variables")):
+ @classmethod
+ def default(cls):
+ return cls(mdiffconf.strlist_set(";", ["REPROTEST_CAPTURE_ENVIRONMENT"]))
+
+ def extend_variables(self, *ks):
+ return self._replace(variables=self.variables + list(ks))
+
+
class UserGroupVariation(collections.namedtuple('_UserGroupVariation', 'available')):
@classmethod
def default(cls):
@@ -412,6 +432,7 @@ class VariationSpec(mdiffconf.ImmutableNamespace):
@classmethod
def default(cls, variations=VARIATIONS):
default_overrides = {
+ "environment": EnvironmentVariation.default(),
"user_group": UserGroupVariation.default(),
"time": TimeVariation.default(),
}
diff --git a/reprotest/environ.py b/reprotest/environ.py
new file mode 100644
index 0000000..1c89cad
--- /dev/null
+++ b/reprotest/environ.py
@@ -0,0 +1,103 @@
+# Licensed under the GPL: https://www.gnu.org/licenses/gpl-3.0.en.html
+# For details: reprotest/debian/copyright
+
+import re
+import rstr
+import os
+
+
+"""
+Variables intended to control the behaviour of general run-time programs that
+include non-build and non-developer programs.
+
+See also:
+- http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
+"""
+BLACKLIST = r"""
+HOME LOGNAME USER USERNAME
+_ LANG LANGUAGE LC_\w+ MSGVERB OLDPWD PWD SHELL SHLVL TMPDIR TZ
+COLORTERM COLUMNS DATEMSK EDITOR LINES LS_COLORS TERM VISUAL VTE_VERSION
+PAGER MAIL BROWSER
+ftp_proxy http_proxy https_proxy
+MANPATH INFODIR
+DISPLAY WINDOWID XAUTHORITY XMODIFIERS
+DBUS_SESSION_\w+ DESKTOP_SESSION GDMSESSION ICEAUTHORITY SESSION_MANAGER XDG_\w+ \w+_SOCKET
+QT_\w+ GTK_\w+ \w+_IM_MODULE
+SSH_\w+ GNUPG\w+ GPG_\w+
+DEBEMAIL DEBFULLNAME
+""".split()
+
+
+"""
+Variables intended to control the output of build processes, or interpreter
+settings that "normal users" aren't expected to customise in most situations.
+
+Notes:
+
+- Path variables are subtle, we keep many of them here to avoid false-positives
+ and breaking builds, but ideally they would be "in the blacklist if contents
+ differ, else in the whitelist".
+"""
+WHITELIST = r"""
+CC CPP CXX FC F GCJ LD OBJC OBJCXX RUSTC LEX YACC
+CFLAGS CPPFLAGS CXXFLAGS FCFLAGS FFLAGS GCJFLAGS LDFLAGS OBJCFLAGS OBJCXXFLAGS RUSTFLAGS
+DEB_\w+ DPKG_\w+
+PATH JAVA_HOME GOPATH LD_PRELOAD LD_LIBRARY_PATH PERL5LIB PYTHONPATH
+SOURCE_DATE_EPOCH BUILD_PATH_PREFIX_MAP
+""".split()
+
+
+"""
+Some stuff breaks when you unset certain vars, e.g. diffoscope breaks if PATH
+is unset. technically these are bugs, but they are so prevalent and we'd like
+to focus on more important things first.
+
+TODO: make it possible to clear this list on the command line.
+"""
+NEVER_UNSET = "HOME PATH USER LOGNAME PWD".split()
+
+
+def parse_environ_templates(variables):
+ for tmpl in variables:
+ k, sep, v = tmpl.partition("=")
+ if not v and sep:
+ yield (k, None)
+ else:
+ yield (rstr.xeger(k), rstr.xeger(v) or "i_capture_the_environment")
+
+
+def generate_dummy_environ(env=None, blacklist=BLACKLIST, whitelist=WHITELIST, never_unset=NEVER_UNSET):
+ if env is None:
+ env = os.environ
+ env = set(env.keys()) - set(never_unset)
+
+ def generate(name, variables):
+ for tmpl in variables:
+ k, sep, v = tmpl.partition("=")
+ if re.match(k, name):
+ # unset (if v empty), or generate random value matching v
+ yield (name, "%s=%s" % (name, v))
+
+ blacklist_matches = [m for n in env for m in generate(n, blacklist)]
+ # generate overrides for existing vars, and possibly generate new vars
+ b = [m[1] for m in blacklist_matches] + blacklist
+ bn = sorted(set([m[0] for m in blacklist_matches] + blacklist))
+
+ def matches(name, pp):
+ return any(re.match(p, name) for p in pp)
+ blacklist_names = [t.partition("=")[0] for t in blacklist]
+ whitelist_names = [t.partition("=")[0] for t in whitelist]
+
+ unrecognized = sorted(n for n in env
+ if (not matches(n, blacklist_names)
+ and not matches(n, whitelist_names)))
+ extra_unknown = ["[A-Z]{2,5}(_[A-Z]{2,5}){1,3}",
+ "[A-Z]{2,5}(_[A-Z]{2,5}){1,3}",
+ "REPROTEST_CAPTURE_ENVIRONMENT_UNKNOWN_\w+"]
+
+ # unset unrecognized stuff in the current env that doesn't match the
+ # whitelist or blacklist, which we set earlier
+ nw = ["%s=" % k for k in unrecognized] + extra_unknown
+ nwn = unrecognized + extra_unknown
+
+ return b, bn, nw, nwn
diff --git a/setup.py b/setup.py
index 9dece1e..571cb8d 100644
--- a/setup.py
+++ b/setup.py
@@ -9,8 +9,8 @@ setup(name='reprotest',
version='0.7.1',
description='Build packages and check them for reproducibility.',
long_description=open('README.rst', encoding='utf-8').read(),
- author='Ceridwen',
- author_email='ceridwenv at gmail.com',
+ author='Ximin Luo, Ceridwen',
+ author_email='infinity0 at debian.org, ceridwenv at gmail.com',
license='GPL-3+',
url='https://anonscm.debian.org/cgit/reproducible/reprotest.git/',
packages=find_packages(),
@@ -21,6 +21,7 @@ setup(name='reprotest',
},
install_requires=[
'diffoscope',
+ 'rstr',
],
classifiers=[
'Development Status :: 3 - Alpha',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/reproducible/reprotest.git
More information about the Reproducible-commits
mailing list