[pkg-d-commits] [ldc] 57/211: ir2obj cache: Add all cmdline args to the hash, unless we are certain they can safely be ignored for ir2obj hashing.

Matthias Klumpp mak at moszumanska.debian.org
Sun Apr 23 22:36:09 UTC 2017


This is an automated email from the git hooks/post-receive script.

mak pushed a commit to annotated tag v1.1.0
in repository ldc.

commit f6919e3fb642d1ffd97ce6721298de8e90bc3e99
Author: Johan Engelen <jbc.engelen at gmail.com>
Date:   Mon Sep 19 20:37:22 2016 +0200

    ir2obj cache: Add all cmdline args to the hash, unless we are certain they can safely be ignored for ir2obj hashing.
---
 driver/cl_options.cpp                 |   5 +-
 driver/cl_options.h                   |   5 ++
 driver/ir2obj_cache.cpp               | 114 ++++++++++++++++++++++++++++++----
 driver/main.cpp                       |  15 +++--
 gen/optimizer.cpp                     |  19 ++++++
 gen/optimizer.h                       |   6 ++
 tests/linking/ir2obj_caching_flags1.d |  37 +++++++++++
 7 files changed, 181 insertions(+), 20 deletions(-)

diff --git a/driver/cl_options.cpp b/driver/cl_options.cpp
index f032708..4c0f502 100644
--- a/driver/cl_options.cpp
+++ b/driver/cl_options.cpp
@@ -17,6 +17,9 @@
 
 namespace opts {
 
+// This vector is filled by parseCommandLine in main.cpp.
+llvm::SmallVector<const char *, 32> allArguments;
+
 /* Option parser that defaults to zero when no explicit number is given.
  * i.e.:  -cov    --> value = 0
  *        -cov=9  --> value = 9
@@ -198,7 +201,7 @@ static cl::opt<bool, true> unittest("unittest",
 
 cl::opt<std::string>
     ir2objCacheDir("ir2obj-cache", cl::desc("Use <cache dir> to cache object files for whole IR modules (experimental)"),
-            cl::value_desc("cache dir"), cl::Prefix);
+            cl::value_desc("cache dir"));
 
 static StringsAdapter strImpPathStore("J", global.params.fileImppath);
 static cl::list<std::string, StringsAdapter>
diff --git a/driver/cl_options.h b/driver/cl_options.h
index 817a6b2..ba46f75 100644
--- a/driver/cl_options.h
+++ b/driver/cl_options.h
@@ -17,6 +17,7 @@
 
 #include "driver/targetmachine.h"
 #include "gen/cl_helpers.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include <deque>
@@ -28,6 +29,10 @@
 namespace opts {
 namespace cl = llvm::cl;
 
+/// Stores the commandline arguments list, including the ones specified by the
+/// config and response files.
+extern llvm::SmallVector<const char *, 32> allArguments;
+
 /* Mostly generated with the following command:
    egrep -e '^(cl::|#if|#e)' gen/cl_options.cpp \
     | sed -re 's/^(cl::.*)\(.*$/    extern \1;/'
diff --git a/driver/ir2obj_cache.cpp b/driver/ir2obj_cache.cpp
index 0e2cd7a..3949bba 100644
--- a/driver/ir2obj_cache.cpp
+++ b/driver/ir2obj_cache.cpp
@@ -127,20 +127,87 @@ void storeCacheFileName(llvm::StringRef cacheObjectHash,
   llvm::sys::path::append(filePath, llvm::Twine("ircache_") + cacheObjectHash +
                                         "." + global.obj_ext);
 }
-}
-
-namespace ir2obj {
 
-void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) {
-  raw_hash_ostream hash_os;
+// Output to `hash_os` all commandline flags, and try to skip the ones that have
+// no influence on the object code output. The cmdline flags need to be added
+// to the ir2obj cache hash to uniquely identify the object file output.
+// Because the compiler version is part of the hash, differences in the
+// default settings between compiler versions are already taken care of.
+// (Note: config and response files may also add compiler flags.)
+void outputIR2ObjRelevantCmdlineArgs(llvm::raw_ostream &hash_os)
+{
+  // Use a "whitelist" of cmdline args that do not need to be added to the hash,
+  // and add all others. There is no harm (other than missed cache
+  // opportunities) in adding commandline arguments that also change the hashed
+  // IR, which simplifies the code here.
+  // The code does not deal well with options specified without equals sign, and
+  // will add those to the hash, resulting in missed cache opportunities.
+  for (const char *arg : opts::allArguments) {
+    if (!arg || !arg[0])
+      continue;
+
+    // Out of pre-caution, all arguments that are not prefixed with '-' are
+    // added to the hash. Such an argument could be a source file "foo.d", but
+    // also a value for the previous argument when the equals sign is omitted,
+    // for example: "-code-model default" becomes "-code-model" "default".
+    // It results in missed cache opportunities. :(
+    if (arg[0] == '-') {
+      if (arg[1] == 'O') {
+        // We deal with -O later ("-O" and "-O3" should hash equally, "" and
+        // "-O0" too)
+        continue;
+      }
+      if (arg[1] == 'c' && !arg[2])
+        continue;
+      // All options starting with these characters can be ignored (LLVM does
+      // not have options starting with capitals)
+      if (arg[1] == 'D' || arg[1] == 'H' || arg[1] == 'I' || arg[1] == 'J' ||
+          arg[1] == 'L' || arg[1] == 'X')
+        continue;
+      if (arg[1] == 'd' || arg[1] == 'v' || arg[1] == 'w') {
+        // LLVM options are long, so short options starting with 'v' or 'w' can
+        // be ignored.
+        unsigned len = 2;
+        for (; len < 11; ++len)
+          if (!arg[len])
+            break;
+        if (len < 11)
+          continue;
+      }
+      // "-of..." can be ignored
+      if (arg[1] == 'o' && arg[2] == 'f')
+        continue;
+      // "-od..." can be ignored
+      if (arg[1] == 'o' && arg[2] == 'd')
+        continue;
+      // All  "-ir2..." options can be ignored
+      if (arg[1] == 'i' && arg[2] == 'r' && arg[3] == '2')
+        continue;
+      // All effects of -d-version... are already included in the IR hash.
+      if (strncmp(arg+1, "d-version", 9) == 0)
+        continue;
+      // All effects of -unittest are already included in the IR hash.
+      if (strcmp(arg + 1, "unittest") == 0) {
+        continue;
+      }
+
+      // All arguments following -run can safely be ignored
+      if (strcmp(arg + 1, "run") == 0) {
+        break;
+      }
+    }
 
-  // Let hash depend on the compiler version:
-  hash_os << global.ldc_version << global.version << global.llvm_version
-          << ldc::built_with_Dcompiler_version;
+    // If we reach here, add the argument to the hash.
+    hash_os << arg;
+  }
 
-  // Let hash depend on a few compile flags that change the outputted obj file,
-  // but whose changes are not always observable in the IR:
-  hash_os << codeGenOptLevel();
+  // Adding these options to the hash should not be needed after adding all
+  // cmdline args. We keep this code here however, in case we find a different
+  // solution for dealing with LLVM commandline flags. See GH #1773.
+  // Also, having these options explicitly added to the hash protects against
+  // the possibility of different default settings on different platforms (while
+  // sharing the cache).
+  outputOptimizationSettings(hash_os);
   hash_os << opts::mCPU;
   for (auto &attr : opts::mAttrs) {
     hash_os << attr;
@@ -149,6 +216,31 @@ void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) {
   hash_os << opts::mRelocModel;
   hash_os << opts::mCodeModel;
   hash_os << opts::disableFpElim;
+}
+
+// Output to `hash_os` all environment flags that influence object code output
+// in ways that are not observable in the pre-LLVM passes IR used for hashing.
+void outputIR2ObjRelevantEnvironmentOpts(llvm::raw_ostream &hash_os)
+{
+  // There are no relevant environment options at the moment.
+}
+
+}
+
+namespace ir2obj {
+
+void calculateModuleHash(llvm::Module *m, llvm::SmallString<32> &str) {
+  raw_hash_ostream hash_os;
+
+  // Let hash depend on the compiler version:
+  hash_os << global.ldc_version << global.version << global.llvm_version
+          << ldc::built_with_Dcompiler_version;
+
+  // Let hash depend on compile flags that change the outputted obj file,
+  // but whose changes are not always observable in the pre-optimized IR used
+  // for hashing:
+  outputIR2ObjRelevantCmdlineArgs(hash_os);
+  outputIR2ObjRelevantEnvironmentOpts(hash_os);
 
   llvm::WriteBitcodeToFile(m, hash_os);
   hash_os.resultAsString(str);
diff --git a/driver/main.cpp b/driver/main.cpp
index ca945e5..c1cdf5b 100644
--- a/driver/main.cpp
+++ b/driver/main.cpp
@@ -401,18 +401,17 @@ void parseCommandLine(int argc, char **argv, Strings &sourceFiles,
   global.params.moduleDepsFile = nullptr;
 
   // Build combined list of command line arguments.
-  llvm::SmallVector<const char *, 32> final_args;
-  final_args.push_back(argv[0]);
+  opts::allArguments.push_back(argv[0]);
 
   ConfigFile cfg_file;
   const char *explicitConfFile = tryGetExplicitConfFile(argc, argv);
   std::string cfg_triple = tryGetExplicitTriple(argc, argv).getTriple();
   // just ignore errors for now, they are still printed
   cfg_file.read(explicitConfFile, cfg_triple.c_str());
-  final_args.insert(final_args.end(), cfg_file.switches_begin(),
-                    cfg_file.switches_end());
+  opts::allArguments.insert(opts::allArguments.end(), cfg_file.switches_begin(),
+                            cfg_file.switches_end());
 
-  final_args.insert(final_args.end(), &argv[1], &argv[argc]);
+  opts::allArguments.insert(opts::allArguments.end(), &argv[1], &argv[argc]);
 
   cl::SetVersionPrinter(&printVersion);
   hideLLVMOptions();
@@ -429,11 +428,11 @@ void parseCommandLine(int argc, char **argv, Strings &sourceFiles,
                           cl::TokenizeGNUCommandLine
 #endif
                           ,
-                          final_args);
+                          opts::allArguments);
 #endif
 
-  cl::ParseCommandLineOptions(final_args.size(),
-                              const_cast<char **>(final_args.data()),
+  cl::ParseCommandLineOptions(opts::allArguments.size(),
+                              const_cast<char **>(opts::allArguments.data()),
                               "LDC - the LLVM D compiler\n");
 
   helpOnly = mCPU == "help" ||
diff --git a/gen/optimizer.cpp b/gen/optimizer.cpp
index cb1b791..7ec1ba3 100644
--- a/gen/optimizer.cpp
+++ b/gen/optimizer.cpp
@@ -455,3 +455,22 @@ void verifyModule(llvm::Module *m) {
   }
   Logger::println("Verification passed!");
 }
+
+// Output to `hash_os` all optimization settings that influence object code output
+// and that are not observable in the IR.
+// This is used to calculate the hash use for caching that uniquely identifies
+// the object file output.
+void outputOptimizationSettings(llvm::raw_ostream &hash_os) {
+  hash_os << optimizeLevel;
+  hash_os << willInline();
+  hash_os << disableLangSpecificPasses;
+  hash_os << disableSimplifyDruntimeCalls;
+  hash_os << disableSimplifyLibCalls;
+  hash_os << disableGCToStack;
+  hash_os << unitAtATime;
+  hash_os << stripDebug;
+  hash_os << opts::sanitize;
+  hash_os << disableLoopUnrolling;
+  hash_os << disableLoopVectorization;
+  hash_os << disableSLPVectorization;
+}
diff --git a/gen/optimizer.h b/gen/optimizer.h
index 84414e6..8a2b6c0 100644
--- a/gen/optimizer.h
+++ b/gen/optimizer.h
@@ -20,6 +20,10 @@
 
 #include "llvm/Support/CommandLine.h"
 
+namespace llvm {
+class raw_ostream;
+}
+
 namespace opts {
 
 enum SanitizerCheck {
@@ -49,4 +53,6 @@ llvm::CodeGenOpt::Level codeGenOptLevel();
 
 void verifyModule(llvm::Module *m);
 
+void outputOptimizationSettings(llvm::raw_ostream &hash_os);
+
 #endif
diff --git a/tests/linking/ir2obj_caching_flags1.d b/tests/linking/ir2obj_caching_flags1.d
new file mode 100644
index 0000000..1baceb0
--- /dev/null
+++ b/tests/linking/ir2obj_caching_flags1.d
@@ -0,0 +1,37 @@
+// Test that certain cmdline flags result in different ir2obj cache objects, even though the LLVM IR may be the same.
+
+// Note that the NO_HIT tests should change the default setting of the tested flag.
+
+// Create and then empty the cache for correct testing when running the test multiple times.
+// RUN: %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache \
+// RUN:   && %prunecache -f %T/flag1cache --max-bytes=1 \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache                                  -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O                               -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O3                              -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O2                              -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O4                              -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -O5                              -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -Os                              -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -Oz                              -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-d-passes                -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-simplify-drtcalls       -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-simplify-libcalls       -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-gc2stack                -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -enable-inlining                 -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -unit-at-a-time=false            -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -strip-debug                     -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-loop-unrolling          -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-loop-vectorization      -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -disable-slp-vectorization       -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -vectorize-loops                 -vv | FileCheck --check-prefix=NO_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -v -wi -d                        -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -D -H -I. -J.                    -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -d-version=Irrelevant            -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache -unittest                        -vv | FileCheck --check-prefix=MUST_HIT %s \
+// RUN:   && %ldc %s -c -of=%t%obj -ir2obj-cache=%T/flag1cache                                  -vv | FileCheck --check-prefix=MUST_HIT %s
+// The last test is a MUST_HIT test, to make sure that the cache wasn't pruned somehow, which could effectively disable some NO_HIT tests.
+
+// MUST_HIT: Cache object found!
+// NO_HIT-NOT: Cache object found!
+
+void foo() {}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-d/ldc.git



More information about the pkg-d-commits mailing list