[mathicgb] 105/393: Added a custom Atomic implementation for GCC. The GCC 4.6.3 built-in std::atomic makes the program slow to a crawl while the custom Atomic that is now added gets the same speed as running with no atomic constraints at all (single-core). There is now a macro MATHICGB_USE_FAKE_ATOMIC that makes Atomic not actually impose any ordering constraints, though it is still atomic since that is derived merely from the size of the values and from being aligned, at least on x86 and x64. This can be used single-core to determine the overhead of the ordering constraints which in this case so far appear to be so small that they do not appear in my measurements - IF using the custom Atomic and not std::atomic.

Doug Torrance dtorrance-guest at moszumanska.debian.org
Fri Apr 3 15:58:41 UTC 2015


This is an automated email from the git hooks/post-receive script.

dtorrance-guest pushed a commit to branch upstream
in repository mathicgb.

commit c1105ecc92263f97fcca31431a9bcb140a94747d
Author: Bjarke Hammersholt Roune <bjarkehr.code at gmail.com>
Date:   Tue Nov 6 16:42:36 2012 +0100

    Added a custom Atomic implementation for GCC. The GCC 4.6.3 built-in std::atomic makes the program slow to a crawl while the custom Atomic that is now added gets the same speed as running with no atomic constraints at all (single-core). There is now a macro MATHICGB_USE_FAKE_ATOMIC that makes Atomic not actually impose any ordering constraints, though it is still atomic since that is derived merely from the size of the values and from being aligned, at least on x86 and x64. This can b [...]
---
 src/mathicgb/Atomic.hpp | 112 +++++++++++++++++++++++++++++++++++++-----------
 src/mathicgb/stdinc.h   |  13 ++++++
 2 files changed, 99 insertions(+), 26 deletions(-)

diff --git a/src/mathicgb/Atomic.hpp b/src/mathicgb/Atomic.hpp
index c0e53ca..0aad203 100755
--- a/src/mathicgb/Atomic.hpp
+++ b/src/mathicgb/Atomic.hpp
@@ -1,12 +1,12 @@
-#ifndef MATHICGB_ATOMIC_GUARD
-#define MATHICGB_ATOMIC_GUARD
-
+#ifndef MATHICGB_ATOMIC_GUARD
+#define MATHICGB_ATOMIC_GUARD
+
 // We need this include for std::memory_order even if we are not
-// using std::atomic.
-#include <atomic>
-
+// using std::atomic.
+#include <atomic>
+
 #if defined(_MSC_VER) && defined(MATHICGB_USE_CUSTOM_ATOMIC_X86_X64)
-
+
 /// Tells the compiler (not the CPU) to not reorder reads across this line.
 #define MATHICGB_COMPILER_READ_MEMORY_BARRIER _ReadBarrier()
 
@@ -62,10 +62,72 @@ namespace AtomicInternalMsvc {
 #endif
   template<class T> struct SeqCstSelect : public SeqCst<T, sizeof(T)> {};
 }
-
-#endif
-
-namespace AtomicInternal {
+#endif
+
+#if defined(__GNUC__) && defined(MATHICGB_USE_CUSTOM_ATOMIC_X86_X64)
+
+// As far as I can tell this is not documented to work, but it is the
+// only way to do this on GCC and it is what the Linux kernel does, so
+// that will have to be good enough for me.
+#define MATHICGB_COMPILER_READ_WRITE_MEMORY_BARRIER \
+  __asm__ __volatile__ ("" ::: "memory");
+
+// As far as I can tell there is no way to do a partial optimization
+// barrier on GCC, so we have to do the full barrier every time.
+#define MATHICGB_COMPILER_READ_MEMORY_BARRIER \
+  MATHICGB_COMPILER_READ_WRITE_MEMORY_BARRIER
+
+#define MATHICGB_COMPILER_WRITE_MEMORY_BARRIER \
+  MATHICGB_COMPILER_READ_WRITE_MEMORY_BARRIER
+
+#define MATHICGB_CPU_READ_WRITE_MEMORY_BARRIER __sync_synchronize()
+
+#define MATHICGB_SEQ_CST_LOAD(REF) \
+  AtomicInternalGCC::SeqCst<decltype(REF)>::load(REF)
+#define MATHICGB_SEQ_CST_STORE(VALUE, REF) \
+  AtomicInternalGCC::SeqCst<decltype(REF)>::store(VALUE, REF)
+
+namespace AtomicInternalGCC {
+  template<class T> struct SeqCst {
+    static T load(const T& ref) {
+      const auto ptr = static_cast<volatile T*>(const_cast<T*>(&ref));
+      return __sync_fetch_and_or((volatile T*)&ref, 0);
+    }
+    static void store(const T value, T& ref) {
+      const auto ptr = static_cast<volatile T*>(&ref);
+      while (!__sync_bool_compare_and_swap(ptr, *ptr, value)) {}
+    }
+  };
+  template<class T> struct SeqCst<const T> : public SeqCst<T> {};
+}
+
+#endif
+
+namespace AtomicInternal {
+#ifdef MATHICGB_USE_FAKE_ATOMIC
+  // This class has the same interface as the actual custom atomic
+  // class but it does absolutely no synchronization and it does not
+  // constrain compiler optimizations in any way. The purpose of this class
+  // is to enable it while running single core to compare the single core
+  // overhead of the atomic ordering constraints.
+  template<class T>
+  class FakeAtomic {
+  public:
+    FakeAtomic(): mValue() {}
+    FakeAtomic(T value): mValue(value) {}
+    T load(const std::memory_order) const {return mValue;}
+    void store(const T value, const std::memory_order order) {mValue = value;}
+
+  private:
+    T mValue;
+  };
+
+  template<class T, size_t size>
+  struct ChooseAtomic {
+    typedef FakeAtomic<T> type;
+  };
+
+#else
   /// Class for deciding which implementation of atomic to use. The default is
   /// to use std::atomic which is a fine choice if std::atomic is implemented
   /// in a reasonable way by the standard library implementation you are using.
@@ -73,6 +135,7 @@ namespace AtomicInternal {
   struct ChooseAtomic {
     typedef std::atomic<T> type;
   };
+#endif
 }
 
 #ifdef MATHICGB_USE_CUSTOM_ATOMIC_X86_X64
@@ -127,14 +190,11 @@ namespace AtomicInternal {
   template<class T>
   class CustomAtomicX86X64 {
   public:
-    MATHICGB_INLINE
     CustomAtomicX86X64(): mValue() {}
-    
-    MATHICGB_INLINE
     CustomAtomicX86X64(T value): mValue(value) {}
-
+
     MATHICGB_INLINE
-    T load(std::memory_order order) const {
+    T load(const std::memory_order order) const {
       switch (order) {
       case std::memory_order_relaxed:
         // The only constraint here is that if you read *p, then you will never
@@ -181,7 +241,7 @@ namespace AtomicInternal {
     }
 
     MATHICGB_INLINE
-    void store(const T value, std::memory_order order) {
+    void store(const T value, const std::memory_order order) {
       switch (order) {
       case std::memory_order_relaxed:
         // No ordering constraints here other than atomicity and as noted
@@ -261,20 +321,20 @@ namespace AtomicInternal {
 template<class T>
 class Atomic {
 public:
-  MATHICGB_INLINE Atomic(): mValue() {}
-  MATHICGB_INLINE Atomic(T value): mValue(value) {}
+  Atomic(): mValue() {}
+  Atomic(T value): mValue(value) {}
 
-  MATHICGB_INLINE T load(
-    std::memory_order order = std::memory_order_seq_cst
-  ) const {
+  MATHICGB_INLINE
+  T load(const std::memory_order order = std::memory_order_seq_cst) const {
     MATHICGB_ASSERT(debugAligned());
     return mValue.load(order);
   }
 
-  MATHICGB_INLINE void store(
-    T value,
-    std::memory_order order = std::memory_order_seq_cst
-  ) {
+  MATHICGB_INLINE
+  void store(
+    const T value,
+    const std::memory_order order = std::memory_order_seq_cst
+  ) {
     MATHICGB_ASSERT(debugAligned());
     mValue.store(value, order);
   }
diff --git a/src/mathicgb/stdinc.h b/src/mathicgb/stdinc.h
index b3221bd..a9860d6 100755
--- a/src/mathicgb/stdinc.h
+++ b/src/mathicgb/stdinc.h
@@ -60,6 +60,7 @@
 // so the warning is turned off.
 #pragma warning (disable: 4355)
 
+#ifndef MATHICGB_USE_FAKE_ATOMIC
 #if defined (_M_IX86) || defined(_M_X64) // if on x86 (32 bit) or x64 (64 bit)
 #define MATHICGB_USE_CUSTOM_ATOMIC_X86_X64
 #define MATHICGB_USE_CUSTOM_ATOMIC_4BYTE
@@ -67,6 +68,7 @@
 #define MATHICGB_USE_CUSTOM_ATOMIC_8BYTE
 #endif
 #endif
+#endif
 
 #elif defined (__GNUC__) // GCC compiler
 
@@ -81,6 +83,17 @@
 #define MATHICGB_UNREACHABLE __builtin_unreachable()
 #define MATHICGB_RESTRICT __restrict
 
+// if on x86 (32 bit) or x64 (64 bit)
+#ifndef MATHICGB_USE_FAKE_ATOMIC
+#if defined (_X86_) || defined(__x86_64__)
+#define MATHICGB_USE_CUSTOM_ATOMIC_X86_X64
+#define MATHICGB_USE_CUSTOM_ATOMIC_4BYTE
+#ifdef __x86_64__ // if on x64 (64 bit)
+#define MATHICGB_USE_CUSTOM_ATOMIC_8BYTE
+#endif
+#endif
+#endif
+
 #else
 
 #define MATHICGB_NO_INLINE

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mathicgb.git



More information about the debian-science-commits mailing list