[pkg-d-commits] [ldc] 112/149: Optimize static array comparisons to a memcmp call for types for which this is valid.

Matthias Klumpp mak at moszumanska.debian.org
Sun Apr 23 22:37:03 UTC 2017


This is an automated email from the git hooks/post-receive script.

mak pushed a commit to annotated tag v1.2.0
in repository ldc.

commit bd09cdc6597536db3f902ee30ef486bf7c7bf946
Author: Johan Engelen <jbc.engelen at gmail.com>
Date:   Wed Aug 17 22:53:37 2016 +0200

    Optimize static array comparisons to a memcmp call for types for which this is valid.
    
    Resolves #1632
---
 gen/arrays.cpp                      | 108 +++++++++++++++++++++++++-
 gen/runtime.cpp                     |  10 +++
 tests/codegen/array_equals_memcmp.d | 146 ++++++++++++++++++++++++++++++++++++
 3 files changed, 262 insertions(+), 2 deletions(-)

diff --git a/gen/arrays.cpp b/gen/arrays.cpp
index e778f33..ed7ee1e 100644
--- a/gen/arrays.cpp
+++ b/gen/arrays.cpp
@@ -962,8 +962,9 @@ DSliceValue *DtoAppendDCharToUnicodeString(Loc &loc, DValue *arr,
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+namespace {
 // helper for eq and cmp
-static LLValue *DtoArrayEqCmp_impl(Loc &loc, const char *func, DValue *l,
+LLValue *DtoArrayEqCmp_impl(Loc &loc, const char *func, DValue *l,
                                    DValue *r, bool useti) {
   IF_LOG Logger::println("comparing arrays");
   LLFunction *fn = getRuntimeFunction(loc, gIR->module, func);
@@ -994,14 +995,117 @@ static LLValue *DtoArrayEqCmp_impl(Loc &loc, const char *func, DValue *l,
   return gIR->funcGen().callOrInvoke(fn, args).getInstruction();
 }
 
+/// When `true` is returned, the type can be compared using `memcmp`.
+/// See `validCompareWithMemcmp`.
+bool validCompareWithMemcmpType(Type *t) {
+  switch (t->ty) {
+  case Tsarray: {
+    auto *elemType = t->nextOf()->toBasetype();
+    return validCompareWithMemcmpType(elemType);
+  }
+
+  case Tstruct:
+    // TODO: Implement when structs can be compared with memcmp. Remember that
+    // structs can have a user-defined opEquals, alignment padding bytes (in
+    // arrays), and padding bytes.
+    return false;
+
+  case Tvoid:
+  case Tint8:
+  case Tuns8:
+  case Tint16:
+  case Tuns16:
+  case Tint32:
+  case Tuns32:
+  case Tint64:
+  case Tuns64:
+  case Tint128:
+  case Tuns128:
+  case Tbool:
+  case Tchar:
+  case Twchar:
+  case Tdchar:
+  case Tpointer:
+    return true;
+
+    // TODO: Determine whether this can be "return true" too:
+    // case Tvector:
+  }
+
+  return false;
+}
+
+/// When `true` is returned, `l` and `r` can be compared using `memcmp`.
+///
+/// This function may return `false` even though `memcmp` would be valid.
+/// It may only return `true` if it is 100% certain.
+///
+/// Comparing with memcmp is often not valid, for example due to
+/// - Floating point types
+/// - Padding bytes
+/// - User-defined opEquals
+bool validCompareWithMemcmp(DValue *l, DValue *r) {
+  auto *ltype = l->type->toBasetype();
+
+  // TODO: Remove this check once `DtoArrayEqCmp_memcmp` can handle dynamic
+  // array comparisons.
+  if (ltype->ty != Tsarray)
+    return false;
+
+  auto *rtype = r->type->toBasetype();
+  // Only memcmp equivalent types (memcmp should be used for `const int[3] ==
+  // int[3]`, but not for `int[3] == short[3]`).
+  if (!ltype->equivalent(rtype))
+    return false;
+
+  auto *elemType = ltype->nextOf()->toBasetype();
+  return validCompareWithMemcmpType(elemType);
+}
+
+/// Compare `l` and `r` using memcmp. No checks are done for validity.
+///
+/// This function can currently only deal with comparisons of static arrays
+/// with memcmp.
+/// TODO: Implement dynamic array comparison: length equality check
+/// (and perhaps pointer equality check) before memcmp.
+LLValue *DtoArrayEqCmp_memcmp(Loc &loc, DValue *l, DValue *r, IRState &irs) {
+  IF_LOG Logger::println("Comparing arrays using memcmp");
+  LLFunction *fn = getRuntimeFunction(loc, gIR->module, "memcmp");
+  assert(fn);
+
+  // TODO: Remove this check once dynamic arrays are correctly dealt with.
+  assert(l->type->toBasetype()->ty == Tsarray);
+
+  auto *l_ptr = DtoArrayPtr(l);
+  auto *r_ptr = DtoArrayPtr(r);
+
+  auto *size = DtoArrayLen(l);
+  size_t elementSize = getTypeAllocSize(l_ptr->getType()->getContainedType(0));
+  if (elementSize != 1) {
+    size = irs.ir->CreateMul(size, DtoConstSize_t(elementSize));
+  }
+
+  LLValue *args[] = {DtoBitCast(l_ptr, getVoidPtrType()),
+                     DtoBitCast(r_ptr, getVoidPtrType()), size};
+  return irs.funcGen().callOrInvoke(fn, args).getInstruction();
+}
+} // end anonymous namespace
+
 ////////////////////////////////////////////////////////////////////////////////
 LLValue *DtoArrayEquals(Loc &loc, TOK op, DValue *l, DValue *r) {
   LLValue *res = nullptr;
 
-  // optimize comparisons against null by rewriting to `l.length op 0`
   if (r->isNull()) {
+    // optimize comparisons against null by rewriting to `l.length op 0`
     const auto predicate = eqTokToICmpPred(op);
     res = gIR->ir->CreateICmp(predicate, DtoArrayLen(l), DtoConstSize_t(0));
+  } else if (validCompareWithMemcmp(l, r)) {
+    // Use memcmp directly if possible. This avoids typeinfo lookup, and enables
+    // further optimization because LLVM understands the semantics of C's
+    // `memcmp`.
+    const auto predicate = eqTokToICmpPred(op);
+    const auto memcmp_result = DtoArrayEqCmp_memcmp(loc, l, r, *gIR);
+    res = gIR->ir->CreateICmp(predicate, memcmp_result, DtoConstInt(0));
   } else {
     res = DtoArrayEqCmp_impl(loc, "_adEq2", l, r, true);
     const auto predicate = eqTokToICmpPred(op, /* invert = */ true);
diff --git a/gen/runtime.cpp b/gen/runtime.cpp
index 042ad3c..726eea3 100644
--- a/gen/runtime.cpp
+++ b/gen/runtime.cpp
@@ -323,6 +323,8 @@ static void buildRuntimeModule() {
                                   llvm::Attribute::NoCapture),
       Attr_ReadOnly_NoUnwind_1_NoCapture(Attr_ReadOnly_1_NoCapture, ~0U,
                                          llvm::Attribute::NoUnwind),
+      Attr_ReadOnly_NoUnwind_1_2_NoCapture(Attr_ReadOnly_NoUnwind_1_NoCapture,
+                                           2, llvm::Attribute::NoCapture),
       Attr_ReadNone(NoAttrs, ~0U, llvm::Attribute::ReadNone),
       Attr_1_NoCapture(NoAttrs, 1, llvm::Attribute::NoCapture),
       Attr_NoAlias_1_NoCapture(Attr_1_NoCapture, 0, llvm::Attribute::NoAlias),
@@ -753,6 +755,14 @@ static void buildRuntimeModule() {
       break;
     }
   }
+
+  //////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////
+  ////// C standard library functions (a druntime link dependency)
+
+  // int memcmp(const void *s1, const void *s2, size_t n);
+  createFwdDecl(LINKc, intTy, {"memcmp"}, {voidPtrTy, voidPtrTy, sizeTy}, {},
+                Attr_ReadOnly_NoUnwind_1_2_NoCapture);
 }
 
 static void emitInstrumentationFn(const char *name) {
diff --git a/tests/codegen/array_equals_memcmp.d b/tests/codegen/array_equals_memcmp.d
new file mode 100644
index 0000000..b043485
--- /dev/null
+++ b/tests/codegen/array_equals_memcmp.d
@@ -0,0 +1,146 @@
+// Tests that static array (in)equality is optimized to a memcmp call when valid.
+// More importantly: test that memcmp is _not_ used when it is not valid.
+
+// RUN: %ldc -c -output-ll -of=%t.ll %s && FileCheck %s --check-prefix=LLVM < %t.ll
+// RUN: %ldc -c -output-s  -of=%t.s  %s && FileCheck %s --check-prefix=ASM  < %t.s
+// RUN: %ldc -O3 -run %s
+
+module mod;
+
+struct ThreeBytes
+{
+    byte a;
+    byte b;
+    byte c;
+}
+
+align(4) struct ThreeBytesAligned
+{
+    byte a;
+    byte b;
+    byte c;
+}
+
+struct Packed
+{
+    byte a;
+    byte b;
+    byte c;
+    byte d;
+}
+
+struct PackedPacked
+{
+    Packed a;
+    Packed b;
+}
+
+struct WithPadding
+{
+    int b;
+    byte a;
+}
+
+// LLVM-LABEL: define{{.*}} @{{.*}}two_uints
+bool two_uints(ref uint[2] a, const ref uint[2] b)
+{
+    // LLVM: call i32 @memcmp({{.*}}, {{.*}}, i{{32|64}} 8)
+    return a == b;
+}
+
+// LLVM-LABEL: define{{.*}} @{{.*}}unequal_two_uints
+bool unequal_two_uints(ref uint[2] a, uint[2] b)
+{
+    // LLVM: call i32 @memcmp({{.*}}, {{.*}}, i{{32|64}} 8)
+    return a != b;
+}
+
+// LLVM-LABEL: define{{.*}} @{{.*}}two_floats
+bool two_floats(float[2] a, float[2] b)
+{
+    // LLVM-NOT: memcmp
+    return a == b;
+}
+
+// LLVM-LABEL: define{{.*}} @{{.*}}four_bools
+// ASM-LABEL: four_bools{{.*}}:
+bool four_bools(bool[4] a, bool[4] b)
+{
+    // LLVM: call i32 @memcmp({{.*}}, {{.*}}, i{{32|64}} 4)
+
+    // Make sure that LLVM recognizes and optimizes-out the call to memcmp for 4 byte arrays:
+    // ASM-NOT: memcmp
+    return a == b;
+}
+
+// LLVM-LABEL: define{{.*}} @{{.*}}array_of_array
+// ASM-LABEL: array_of_array{{.*}}:
+bool array_of_array(byte[3][3] a, const byte[3][3] b)
+{
+    // LLVM: call i32 @memcmp({{.*}}, {{.*}}, i{{32|64}} 9)
+    return a == b;
+}
+
+// LLVM-LABEL: define{{.*}} @{{.*}}int3_short3
+bool int3_short3(int[3] a, short[3] b)
+{
+    // LLVM-NOT: memcmp
+    return a == b;
+    // LLVM-LABEL: ret i1
+}
+
+// LLVM-LABEL: define{{.*}} @{{.*}}pointer3
+bool pointer3(int*[3] a, int*[3] b)
+{
+    // LLVM: call i32 @memcmp({{.*}}, {{.*}}, i{{32|64}} {{12|24}})
+    return a == b;
+}
+
+// LLVM-LABEL: define{{.*}} @{{.*}}enum3
+enum E : char { a, b, c, d, e, f };
+bool enum3(E[3] a, E[3] b)
+{
+    // LLVM: call i32 @memcmp({{.*}}, {{.*}}, i{{32|64}} 3)
+    return a == b;
+}
+
+class K {}
+// LLVM-LABEL: define{{.*}} @{{.*}}klass2
+bool klass2(K[2] a, K[2] b)
+{
+    // LLVM-NOT: memcmp
+    return a == b;
+    // LLVM-LABEL: ret i1
+}
+
+void main()
+{
+    uint[2] a = [1, 2];
+    uint[2] b = [1, 2];
+    uint[2] c = [2, 1];
+    assert(two_uints(a, a));
+    assert(two_uints(a, b));
+    assert(!two_uints(a, c));
+    assert(!unequal_two_uints(a, b));
+    assert(unequal_two_uints(a, c));
+
+    assert( two_floats([1.0f, 2.0f], [1.0f, 2.0f]));
+    assert(!two_floats([1.0f, 2.0f], [2.0f, 1.0f]));
+
+    assert( four_bools([true, false, true, false], [true, false, true, false]));
+    assert(!four_bools([true, false, true, false], [true, false, true, true]));
+
+    assert( array_of_array([[1,2,3],[4,5,6],[7,8,9]],[[1,2,3],[4,5,6],[7,8,9]]));
+    assert(!array_of_array([[1,2,3],[4,5,6],[7,8,9]],[[6,6,6],[4,5,6],[7,8,9]]));
+
+    assert( int3_short3([1, 2, 3], [1, 2, 3]));
+    assert(!int3_short3([1, 2, 3], [3, 2, 3]));
+
+    int aaa = 666;
+    int bbb = 333;
+    assert( pointer3([&aaa, &bbb, &aaa], [&aaa, &bbb, &aaa]));
+    assert(!pointer3([&aaa, &bbb, &aaa], [&bbb, &bbb, &aaa]));
+
+    assert( enum3([E.a, E.e, E.b], [E.a, E.e, E.b]));
+    assert(!enum3([E.a, E.e, E.b], [E.a, E.e, E.f]));
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-d/ldc.git



More information about the pkg-d-commits mailing list