[pkg-d-commits] [ldc] 57/211: ir2obj cache: Add all cmdline args to the hash, unless we are certain they can safely be ignored for ir2obj hashing.
Matthias Klumpp
mak at moszumanska.debian.org
Sun Apr 23 22:36:09 UTC 2017
This is an automated email from the git hooks/post-receive script.
mak pushed a commit to annotated tag v1.1.0
in repository ldc.
commit f6919e3fb642d1ffd97ce6721298de8e90bc3e99
Author: Johan Engelen <jbc.engelen at gmail.com>
Date: Mon Sep 19 20:37:22 2016 +0200
ir2obj cache: Add all cmdline args to the hash, unless we are certain they can safely be ignored for ir2obj hashing.
---
driver/cl_options.cpp | 5 +-
driver/cl_options.h | 5 ++
driver/ir2obj_cache.cpp | 114 ++++++++++++++++++++++++++++++----
driver/main.cpp | 15 +++--
gen/optimizer.cpp | 19 ++++++
gen/optimizer.h | 6 ++
tests/linking/ir2obj_caching_flags1.d | 37 +++++++++++
7 files changed, 181 insertions(+), 20 deletions(-)
diff --git a/driver/cl_options.cpp b/driver/cl_options.cpp
index f032708..4c0f502 100644
--- a/driver/cl_options.cpp
+++ b/driver/cl_options.cpp
@@ -17,6 +17,9 @@
namespace opts {
+// This vector is filled by parseCommandLine in main.cpp.
+llvm::SmallVector<const char *, 32> allArguments;
+
/* Option parser that defaults to zero when no explicit number is given.
* i.e.: -cov --> value = 0
* -cov=9 --> value = 9
@@ -198,7 +201,7 @@ static cl::opt<bool, true> unittest("unittest",
cl::opt<std::string>
ir2objCacheDir("ir2obj-cache", cl::desc("Use <cache dir> to cache object files for whole IR modules (experimental)"),
- cl::value_desc("cache dir"), cl::Prefix);
+ cl::value_desc("cache dir"));
static StringsAdapter strImpPathStore("J", global.params.fileImppath);
static cl::list<std::string, StringsAdapter>
diff --git a/driver/cl_options.h b/driver/cl_options.h
index 817a6b2..ba46f75 100644
--- a/driver/cl_options.h
+++ b/driver/cl_options.h
@@ -17,6 +17,7 @@
#include "driver/targetmachine.h"
#include "gen/cl_helpers.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include <deque>
@@ -28,6 +29,10 @@
namespace opts {
namespace cl = llvm::cl;
+/// Stores the commandline arguments list, including the ones specified by the
+/// config and response files.
+extern llvm::SmallVector<const char *, 32> allArguments;
+
/* Mostly generated with the following command:
egrep -e '^(cl::|#if|#e)' gen/cl_options.cpp \
| sed -re 's/^(cl::.*)\(.*$/ extern \1;/'
diff --git a/driver/ir2obj_cache.cpp b/driver/ir2obj_cache.cpp
index 0e2cd7a..3949bba 100644
--- a/driver/ir2obj_cache.cpp
+++ b/driver/ir2obj_cache.cpp
@@ -127,20 +127,87 @@ void storeCacheFileName(llvm::StringRef cacheObjectHash,
llvm::sys::path::append(filePath, llvm::Twine("ircache_") + cacheObjectHash +
"." + global.obj_ext);
}
-}
-
-namespace ir2obj {
-void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) {
- raw_hash_ostream hash_os;
+// Output to `hash_os` all commandline flags, and try to skip the ones that have
+// no influence on the object code output. The cmdline flags need to be added
+// to the ir2obj cache hash to uniquely identify the object file output.
+// Because the compiler version is part of the hash, differences in the
+// default settings between compiler versions are already taken care of.
+// (Note: config and response files may also add compiler flags.)
+void outputIR2ObjRelevantCmdlineArgs(llvm::raw_ostream &hash_os)
+{
+ // Use a "whitelist" of cmdline args that do not need to be added to the hash,
+ // and add all others. There is no harm (other than missed cache
+ // opportunities) in adding commandline arguments that also change the hashed
+ // IR, which simplifies the code here.
+ // The code does not deal well with options specified without equals sign, and
+ // will add those to the hash, resulting in missed cache opportunities.
+ for (const char *arg : opts::allArguments) {
+ if (!arg || !arg[0])
+ continue;
+
+ // Out of pre-caution, all arguments that are not prefixed with '-' are
+ // added to the hash. Such an argument could be a source file "foo.d", but
+ // also a value for the previous argument when the equals sign is omitted,
+ // for example: "-code-model default" becomes "-code-model" "default".
+ // It results in missed cache opportunities. :(
+ if (arg[0] == '-') {
+ if (arg[1] == 'O') {
+ // We deal with -O later ("-O" and "-O3" should hash equally, "" and
+ // "-O0" too)
+ continue;
+ }
+ if (arg[1] == 'c' && !arg[2])
+ continue;
+ // All options starting with these characters can be ignored (LLVM does
+ // not have options starting with capitals)
+ if (arg[1] == 'D' || arg[1] == 'H' || arg[1] == 'I' || arg[1] == 'J' ||
+ arg[1] == 'L' || arg[1] == 'X')
+ continue;
+ if (arg[1] == 'd' || arg[1] == 'v' || arg[1] == 'w') {
+ // LLVM options are long, so short options starting with 'v' or 'w' can
+ // be ignored.
+ unsigned len = 2;
+ for (; len < 11; ++len)
+ if (!arg[len])
+ break;
+ if (len < 11)
+ continue;
+ }
+ // "-of..." can be ignored
+ if (arg[1] == 'o' && arg[2] == 'f')
+ continue;
+ // "-od..." can be ignored
+ if (arg[1] == 'o' && arg[2] == 'd')
+ continue;
+ // All "-ir2..." options can be ignored
+ if (arg[1] == 'i' && arg[2] == 'r' && arg[3] == '2')
+ continue;
+ // All effects of -d-version... are already included in the IR hash.
+ if (strncmp(arg+1, "d-version", 9) == 0)
+ continue;
+ // All effects of -unittest are already included in the IR hash.
+ if (strcmp(arg + 1, "unittest") == 0) {
+ continue;
+ }
+
+ // All arguments following -run can safely be ignored
+ if (strcmp(arg + 1, "run") == 0) {
+ break;
+ }
+ }
- // Let hash depend on the compiler version:
- hash_os << global.ldc_version << global.version << global.llvm_version
- << ldc::built_with_Dcompiler_version;
+ // If we reach here, add the argument to the hash.
+ hash_os << arg;
+ }
- // Let hash depend on a few compile flags that change the outputted obj file,
- // but whose changes are not always observable in the IR:
- hash_os << codeGenOptLevel();
+ // Adding these options to the hash should not be needed after adding all
+ // cmdline args. We keep this code here however, in case we find a different
+ // solution for dealing with LLVM commandline flags. See GH #1773.
+ // Also, having these options explicitly added to the hash protects against
+ // the possibility of different default settings on different platforms (while
+ // sharing the cache).
+ outputOptimizationSettings(hash_os);
hash_os << opts::mCPU;
for (auto &attr : opts::mAttrs) {
hash_os << attr;
@@ -149,6 +216,31 @@ void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) {
hash_os << opts::mRelocModel;
hash_os << opts::mCodeModel;
hash_os << opts::disableFpElim;
+}
+
+// Output to `hash_os` all environment flags that influence object code output
+// in ways that are not observable in the pre-LLVM passes IR used for hashing.
+void outputIR2ObjRelevantEnvironmentOpts(llvm::raw_ostream &hash_os)
+{
+ // There are no relevant environment options at the moment.
+}
+
+}
+
+namespace ir2obj {
+
+void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) {
+ raw_hash_ostream hash_os;
+
+ // Let hash depend on the compiler version:
+ hash_os << global.ldc_version << global.version << global.llvm_version
+ << ldc::built_with_Dcompiler_version;
+
+ // Let hash depend on compile flags that change the outputted obj file,
+ // but whose changes are not always observable in the pre-optimized IR used
+ // for hashing:
+ outputIR2ObjRelevantCmdlineArgs(hash_os);
+ outputIR2ObjRelevantEnvironmentOpts(hash_os);
llvm::WriteBitcodeToFile(m, hash_os);
hash_os.resultAsString(str);
diff --git a/driver/main.cpp b/driver/main.cpp
index ca945e5..c1cdf5b 100644
--- a/driver/main.cpp
+++ b/driver/main.cpp
@@ -401,18 +401,17 @@ void parseCommandLine(int argc, char **argv, Strings &sourceFiles,
global.params.moduleDepsFile = nullptr;
// Build combined list of command line arguments.
- llvm::SmallVector<const char *, 32> final_args;
- final_args.push_back(argv[0]);
+ opts::allArguments.push_back(argv[0]);
ConfigFile cfg_file;
const char *explicitConfFile = tryGetExplicitConfFile(argc, argv);
std::string cfg_triple = tryGetExplicitTriple(argc, argv).getTriple();
// just ignore errors for now, they are still printed
cfg_file.read(explicitConfFile, cfg_triple.c_str());
- final_args.insert(final_args.end(), cfg_file.switches_begin(),
- cfg_file.switches_end());
+ opts::allArguments.insert(opts::allArguments.end(), cfg_file.switches_begin(),
+ cfg_file.switches_end());
- final_args.insert(final_args.end(), &argv[1], &argv[argc]);
+ opts::allArguments.insert(opts::allArguments.end(), &argv[1], &argv[argc]);
cl::SetVersionPrinter(&printVersion);
hideLLVMOptions();
@@ -429,11 +428,11 @@ void parseCommandLine(int argc, char **argv, Strings &sourceFiles,
cl::TokenizeGNUCommandLine
#endif
,
- final_args);
+ opts::allArguments);
#endif
- cl::ParseCommandLineOptions(final_args.size(),
- const_cast<char **>(final_args.data()),
+ cl::ParseCommandLineOptions(opts::allArguments.size(),
+ const_cast<char **>(opts::allArguments.data()),
"LDC - the LLVM D compiler\n");
helpOnly = mCPU == "help" ||
diff --git a/gen/optimizer.cpp b/gen/optimizer.cpp
index cb1b791..7ec1ba3 100644
--- a/gen/optimizer.cpp
+++ b/gen/optimizer.cpp
@@ -455,3 +455,22 @@ void verifyModule(llvm::Module *m) {
}
Logger::println("Verification passed!");
}
+
+// Output to `hash_os` all optimization settings that influence object code output
+// and that are not observable in the IR.
+// This is used to calculate the hash use for caching that uniquely identifies
+// the object file output.
+void outputOptimizationSettings(llvm::raw_ostream &hash_os) {
+ hash_os << optimizeLevel;
+ hash_os << willInline();
+ hash_os << disableLangSpecificPasses;
+ hash_os << disableSimplifyDruntimeCalls;
+ hash_os << disableSimplifyLibCalls;
+ hash_os << disableGCToStack;
+ hash_os << unitAtATime;
+ hash_os << stripDebug;
+ hash_os << opts::sanitize;
+ hash_os << disableLoopUnrolling;
+ hash_os << disableLoopVectorization;
+ hash_os << disableSLPVectorization;
+}
diff --git a/gen/optimizer.h b/gen/optimizer.h
index 84414e6..8a2b6c0 100644
--- a/gen/optimizer.h
+++ b/gen/optimizer.h
@@ -20,6 +20,10 @@
#include "llvm/Support/CommandLine.h"
+namespace llvm {
+class raw_ostream;
+}
+
namespace opts {
enum SanitizerCheck {
@@ -49,4 +53,6 @@ llvm::CodeGenOpt::Level codeGenOptLevel();
void verifyModule(llvm::Module *m);
+void outputOptimizationSettings(llvm::raw_ostream &hash_os);
+
#endif
diff --git a/tests/linking/ir2obj_caching_flags1.d b/tests/linking/ir2obj_caching_flags1.d
new file mode 100644
index 0000000..1baceb0
--- /dev/null
+++ b/tests/linking/ir2obj_caching_flags1.d
@@ -0,0 +1,37 @@
+// Test that certain cmdline flags result in different ir2obj cache objects, even though the LLVM IR may be the same.
+
+// Note that the NO_HIT tests should change the default setting of the tested flag.
+
+// Create and then empty the cache for correct testing when running the test multiple times.
+// RUN: %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache \
+// RUN: && %prunecache -f %T/flag1cache --max-bytes=1 \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O3 -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O2 -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O4 -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O5 -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -Os -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -Oz -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-d-passes -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-simplify-drtcalls -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-simplify-libcalls -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-gc2stack -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -enable-inlining -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -unit-at-a-time=false -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -strip-debug -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-loop-unrolling -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-loop-vectorization -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-slp-vectorization -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vectorize-loops -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -v -wi -d -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -D -H -I. -J. -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -d-version=Irrelevant -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -unittest -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN: && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vv | FileCheck --check-prefix=MUST_HIT %s
+// The last test is a MUST_HIT test, to make sure that the cache wasn't pruned somehow, which could effectively disable some NO_HIT tests.
+
+// MUST_HIT: Cache object found!
+// NO_HIT-NOT: Cache object found!
+
+void foo() {}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-d/ldc.git
More information about the pkg-d-commits
mailing list