Fri Apr 3 15:58:39 UTC 2015

This is an automated email from the git hooks/post-receive script.

dtorrance-guest pushed a commit to branch upstream
in repository mathicgb.

commit 0f0202fd03aef39f37c1e183db189d8bc73b0f94
Author: Bjarke Hammersholt Roune <bjarkehr.code at gmail.com>
Date:   Mon Nov 5 20:53:20 2012 +0100

    FixedSizeMonomialMap is not exposed instead of being an implementation detail of MonomialMap. MonomialMap is now concurrent and also performant. Horribly, the std::atomic of MSVC 2012 has abysmal performance, so there was no other option than to write my own :( On other compilers the new Atomic class is just implemented in terms of std::atomic, but on x86 and x64 for MSVC it uses compiler intrinsics to implement std::atomic using memory barriers. The implementation offered by MSVC 201 [...]
---
 Makefile.am                                        |   3 +-
 build/vs12/mathicgb-lib/mathicgb-lib.vcxproj       |   2 +
 .../vs12/mathicgb-lib/mathicgb-lib.vcxproj.filters |   6 +
 src/mathicgb/Atomic.hpp                            | 193 +++++++
 src/mathicgb/F4MatrixBuilder.cpp                   |  10 +-
 src/mathicgb/F4MatrixBuilder.hpp                   |   6 +-
 src/mathicgb/F4MatrixReducer.cpp                   |   1 -
 src/mathicgb/FixedSizeMonomialMap.h                | 259 +++++++++
 src/mathicgb/MonomialMap.hpp                       | 626 +++++----------------
 src/mathicgb/QuadMatrixBuilder.cpp                 |   6 +-
 src/mathicgb/QuadMatrixBuilder.hpp                 |   9 +-
 src/mathicgb/stdinc.h                              |   7 +
 12 files changed, 620 insertions(+), 508 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 587249e..4a9af4f 100755
--- a/Makefile.am
+++ b/Makefile.am
@@ -60,7 +60,8 @@ libmathicgb_la_SOURCES = src/mathicgb/BjarkeGeobucket2.cpp				\
   src/mathicgb/F4MatrixBuilder.cpp src/mathicgb/QuadMatrix.hpp			\
   src/mathicgb/QuadMatrix.cpp src/mathicgb/F4MatrixReducer.cpp			\
   src/mathicgb/F4MatrixReducer.hpp src/mathicgb/MonomialMap.hpp			\
-  src/mathicgb/RawVector.hpp
+  src/mathicgb/RawVector.hpp src/mathicgb/Atomic.hpp					\
+  src/mathicgb/FixedSizeMonomialMap.hpp	
 
 # When making a distribution file, Automake knows to include all files
 # that are necessary to build the project. EXTRA_DIST specifies files
diff --git a/build/vs12/mathicgb-lib/mathicgb-lib.vcxproj b/build/vs12/mathicgb-lib/mathicgb-lib.vcxproj
index 6ab5d00..9d19c5d 100755
--- a/build/vs12/mathicgb-lib/mathicgb-lib.vcxproj
+++ b/build/vs12/mathicgb-lib/mathicgb-lib.vcxproj
@@ -433,6 +433,7 @@
     <ClCompile Include="..\..\..\src\mathicgb\TypicalReducer.cpp" />
   </ItemGroup>
   <ItemGroup>
+    <ClInclude Include="..\..\..\src\mathicgb\Atomic.hpp" />
     <ClInclude Include="..\..\..\src\mathicgb\BjarkeGeobucket.hpp" />
     <ClInclude Include="..\..\..\src\mathicgb\BjarkeGeobucket2.hpp" />
     <ClInclude Include="..\..\..\src\mathicgb\BuchbergerAlg.hpp" />
@@ -442,6 +443,7 @@
     <ClInclude Include="..\..\..\src\mathicgb\F4MatrixBuilder.hpp" />
     <ClInclude Include="..\..\..\src\mathicgb\F4MatrixReducer.hpp" />
     <ClInclude Include="..\..\..\src\mathicgb\F4Reducer.hpp" />
+    <ClInclude Include="..\..\..\src\mathicgb\FixedSizeMonomialMap.h" />
     <ClInclude Include="..\..\..\src\mathicgb\FreeModuleOrder.hpp" />
     <ClInclude Include="..\..\..\src\mathicgb\GroebnerBasis.hpp" />
     <ClInclude Include="..\..\..\src\mathicgb\HashTourReducer.hpp" />
diff --git a/build/vs12/mathicgb-lib/mathicgb-lib.vcxproj.filters b/build/vs12/mathicgb-lib/mathicgb-lib.vcxproj.filters
index 138ba25..4c2001b 100755
--- a/build/vs12/mathicgb-lib/mathicgb-lib.vcxproj.filters
+++ b/build/vs12/mathicgb-lib/mathicgb-lib.vcxproj.filters
@@ -269,5 +269,11 @@
     <ClInclude Include="..\..\..\src\mathicgb\RawVector.hpp">
       <Filter>Source Files</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\src\mathicgb\Atomic.hpp">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\src\mathicgb\FixedSizeMonomialMap.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/src/mathicgb/Atomic.hpp b/src/mathicgb/Atomic.hpp
new file mode 100755
index 0000000..c19a041
--- /dev/null
+++ b/src/mathicgb/Atomic.hpp
@@ -0,0 +1,193 @@
+#ifndef MATHICGB_ATOMIC_GUARD
+#define MATHICGB_ATOMIC_GUARD
+
+// We need this include for std::memory_order even if we are not
+// using std::atomic.
+#include <atomic>
+
+namespace AtomicInternal {
+  /// Class for deciding which implementation of atomic to use. The default is
+  /// to use std::atomic which is a fine choice if std::atomic is implemented
+  /// in a reasonable way by the standard library implementation you are using.
+  template<class T, size_t size>
+  struct ChooseAtomic {
+    typedef std::atomic<T> type;
+  };
+}
+
+#if defined(MATHICGB_USE_CUSTOM_ATOMIC_X86_X64_MSVC_4BYTE) || \
+  defined(MATHICGB_USE_CUSTOM_ATOMIC_X86_X64_MSVC_8BYTE)
+#include <Windows.h>
+#undef max
+#undef min
+namespace AtomicInternal {
+  /// Custom Atomic class. Use for sizes that are automatically atomic on
+  /// the current platform.
+  template<class T>
+  class CustomAtomic {
+  public:
+    MATHICGB_INLINE CustomAtomic(): mValue() {}
+    MATHICGB_INLINE CustomAtomic(T value): mValue(value) {}
+
+    MATHICGB_INLINE T load(std::memory_order order) const {
+      switch (order) {
+      case std::memory_order_relaxed:
+        // In this case there are no constraints on ordering, so all we need
+        // to ensure is to perform an atomic read. That is already automatic
+        // on aligned variables of this type.
+        return mValue;
+
+      case std::memory_order_consume: {
+        // This order specifies to not move dependent reads above this load,
+        // that is to we must not load *p before loading p.
+        // The only mainstream CPU that will break this guarantee is DEC Alpha,
+        // in which case this code should not be used. The compiler can also
+        // speculate the value of p and load *p and then later check that p is
+        // what it thought it was. That will break the guarantee that we need
+        // to give, so we need a compiler optimization barrier that will
+        // disable such speculative dependent read optimizations.
+        //
+        // Unfortunately on e.g. MSVC there is not a specific barrier for this
+        // purpose so we end up blocking all read-movement, including
+        // non-dependent reads. 
+        const auto value = mValue;
+        _ReadBarrier(); // MSVC
+        return value;
+      }
+
+      case std::memory_order_acquire: {
+        // This order specifies to not move any read above this load. Some CPUs
+        // and all compilers can break this guarantee due to optimizations.
+        // x86, ARM, SPARC and x64 has this guarantee automatically, though see
+       //http://preshing.com/20120913/acquire-and-release-semantics#comment-20810
+        // for an exception on x64 that I am going to ignore - if you use these
+        // techniques it is up to you to ensure proper fencing. On other
+        // platforms we need a hardware fence in addition to the compiler
+        // optimization fence.
+        const auto value = mValue;
+        _ReadBarrier(); // MSVC
+        return mValue;
+      }
+
+      case std::memory_order_seq_cst:
+        // There must be some global order in which all sequentially consistent
+        // atomic operations are considered to have happened in. This is automatic
+        // on x64, ARM, SPARC and x64 too for reads (but not writes) - see:
+        //   http://www.stdthread.co.uk/forum/index.php?topic=72.0
+        _ReadBarrier(); // MSVC
+        return mValue;
+
+      case std::memory_order_release: // not available for load
+      case std::memory_order_acq_rel: // not available for load
+      default:
+        MATHICGB_UNREACHABLE;
+      }
+    }
+
+    MATHICGB_INLINE void store(const T value, std::memory_order order) {
+      switch (order) {
+      case std::memory_order_relaxed:
+        // No ordering constraints and we are already assuming that loads
+        // and stores to mValue are automatic so we can just store directly.
+        mValue = value;
+        break;
+
+      case std::memory_order_release:
+        // This ordering specifies that no prior writes are moved after
+        // this write.
+        _WriteBarrier();
+        mValue = value;
+        break;
+
+      case std::memory_order_acq_rel:
+        // This ordering specifies combined acquire and release guarantees:
+        //  - no prior writes are moved after this operation
+        //  - no later reads are moved before this operation
+        // This requires CPU fencing on x86/x64 because normally reads can
+        // be moved before writes to a different memory location by the CPU.
+        _WriteBarrier();
+        mValue = value;
+        MemoryBarrier();
+        break;
+
+      case std::memory_order_seq_cst:
+        // All operations happen in a globally consistent linear order.
+        _WriteBarrier();
+        mValue = value;
+        MemoryBarrier();
+        break;
+
+      case std::memory_order_consume: // not available for store
+      case std::memory_order_acquire: // not available for store
+      default:
+        MATHICGB_UNREACHABLE;
+      }
+    }
+
+  private:
+    T mValue;
+  };
+
+#ifdef MATHICGB_USE_CUSTOM_ATOMIC_X86_X64_MSVC_4BYTE
+  template<class T>
+  struct ChooseAtomic<T, 4> {
+    typedef CustomAtomic<T> type;
+  };
+#endif
+
+#ifdef MATHICGB_USE_CUSTOM_ATOMIC_X86_X64_MSVC_8BYTE
+  template<class T>
+  struct ChooseAtomic<T, 8> {
+    typedef CustomAtomic<T> type;
+  };
+#endif
+}
+#endif
+
+/// This class is equivalent to std::atomic<T*>. Some functions from the
+/// interface of std::atomic are missing - add them as necessary. Do not add
+/// operator= and operator T() --- it is better to make the code explicit
+/// about when and how loading and storing of atomic variables occurs.
+///
+/// The purpose of the class is that it performs far better than
+/// std::atomic for some implementations. For example the std::atomic in MSVC
+/// 2012 performs a compare-and-swap operation on a load even with the
+/// paramter std::memory_order_relaxed.
+///
+/// We force all the functions to be inline because they can contain switches
+/// on the value of std::memory_order. This will usually be a compile-time
+/// constant parameter so that after inlining the switch will disappear. Yet
+/// the code size of the switch may make some compilers avoid the inline.
+template<class T>
+class Atomic {
+public:
+  MATHICGB_INLINE Atomic(): mValue() {}
+  MATHICGB_INLINE Atomic(T value): mValue(value) {}
+
+  MATHICGB_INLINE T load(
+    std::memory_order order = std::memory_order_seq_cst
+  ) const {
+    MATHICGB_ASSERT(debugAligned());
+    return mValue.load(order);
+  }
+
+  MATHICGB_INLINE void store(
+    T value,
+    std::memory_order order = std::memory_order_seq_cst
+  ) {
+    MATHICGB_ASSERT(debugAligned());
+    mValue.store(value, order);
+  }
+
+private:
+  Atomic(const Atomic<T>&); // not available
+  void operator=(const Atomic<T>&); // not available
+
+  bool debugAligned() const {
+    return reinterpret_cast<size_t>(&mValue) % sizeof(void*) == 0;
+  }
+
+  typename AtomicInternal::ChooseAtomic<T, sizeof(T)>::type mValue;
+};
+
+#endif
diff --git a/src/mathicgb/F4MatrixBuilder.cpp b/src/mathicgb/F4MatrixBuilder.cpp
index 4a23b19..905281a 100755
--- a/src/mathicgb/F4MatrixBuilder.cpp
+++ b/src/mathicgb/F4MatrixBuilder.cpp
@@ -25,7 +25,7 @@ MATHICGB_INLINE QuadMatrixBuilder::LeftRightColIndex
 (
   const const_monomial monoA,
   const const_monomial monoB,
-  const ColSubsetReader& colMap,
+  const ColReader& colMap,
   QuadMatrixBuilder& builder
 ) {
   MATHICGB_ASSERT(!monoA.isNull());
@@ -73,7 +73,7 @@ MATHICGB_INLINE const std::pair<
   const const_monomial monoA1,
   const const_monomial monoA2,
   const const_monomial monoB,
-  const ColSubsetReader& colMap,
+  const ColReader& colMap,
   QuadMatrixBuilder& builder
 ) {
   MATHICGB_ASSERT(!monoA1.isNull());
@@ -371,7 +371,7 @@ void F4MatrixBuilder::appendRowBottom(
   // todo: eliminate the code-duplication between here and appendRowTop.
   MATHICGB_ASSERT(!multiple.isNull());
   MATHICGB_ASSERT(&builder != 0);
-  const ColSubsetReader colMap(builder.columnToIndexMap());
+  const ColReader colMap(builder.columnToIndexMap());
 
   for (auto it  = begin; it != end; ++it) {
     const auto col = findOrCreateColumn(it.getMonomial(), multiple, colMap, builder);
@@ -394,7 +394,7 @@ void F4MatrixBuilder::appendRowTop(
   MATHICGB_ASSERT(&poly != 0);
   MATHICGB_ASSERT(&builder != 0);
 
-  ColSubsetReader colMap(builder.columnToIndexMap());
+  ColReader colMap(builder.columnToIndexMap());
 
   auto it = poly.begin();
   const auto end = poly.end();
@@ -454,7 +454,7 @@ void F4MatrixBuilder::appendRowBottom(
   ++itA;
   ++itB;
 
-  const ColSubsetReader colMap(builder.columnToIndexMap());
+  const ColReader colMap(builder.columnToIndexMap());
 
   const const_monomial mulA = task.multiply;
   const const_monomial mulB = task.sPairMultiply;
diff --git a/src/mathicgb/F4MatrixBuilder.hpp b/src/mathicgb/F4MatrixBuilder.hpp
index dee87c1..cbc197c 100755
--- a/src/mathicgb/F4MatrixBuilder.hpp
+++ b/src/mathicgb/F4MatrixBuilder.hpp
@@ -66,7 +66,7 @@ public:
   const PolyRing& ring() const {return mBuilder.ring();}
 
 private:
-  typedef const QuadMatrixBuilder::ColSubsetReader ColSubsetReader;
+  typedef const QuadMatrixBuilder::ColReader ColReader;
 
   /** Creates a column with monomial label x and schedules a new row to
     reduce that column if possible. Here x is monoA if monoB is
@@ -104,7 +104,7 @@ private:
   MATHICGB_INLINE LeftRightColIndex findOrCreateColumn(
     const_monomial monoA,
     const_monomial monoB,
-    const ColSubsetReader& colMap,
+    const ColReader& colMap,
     QuadMatrixBuilder& builder);
 
   MATHICGB_NO_INLINE const std::pair<LeftRightColIndex, LeftRightColIndex>
@@ -119,7 +119,7 @@ private:
     const const_monomial monoA1,
     const const_monomial monoA2,
     const const_monomial monoB,
-    const ColSubsetReader& colMap,
+    const ColReader& colMap,
     QuadMatrixBuilder& builder
   );
 
diff --git a/src/mathicgb/F4MatrixReducer.cpp b/src/mathicgb/F4MatrixReducer.cpp
index 1621b99..aac24b4 100755
--- a/src/mathicgb/F4MatrixReducer.cpp
+++ b/src/mathicgb/F4MatrixReducer.cpp
@@ -120,7 +120,6 @@ namespace {
       // bit debug builds. This was on Visual Studio version
       // "11.0.50727.1 RTMREL" - Bjarke Hammersholt Roune
 
-
       // MATHICGB_RESTRICT on entries is important. It fixed a performance
       // regression on MSVC 2012 which otherwise was not able to determine that
       // entries is not an alias for anything else in this loop. I suspect that
diff --git a/src/mathicgb/FixedSizeMonomialMap.h b/src/mathicgb/FixedSizeMonomialMap.h
new file mode 100755
index 0000000..93645cb
--- /dev/null
+++ b/src/mathicgb/FixedSizeMonomialMap.h
@@ -0,0 +1,259 @@
+#ifndef MATHICGB_FIXED_SIZE_MONOMIAL_MAP_GUARD
+#define MATHICGB_FIXED_SIZE_MONOMIAL_MAP_GUARD
+
+#include "Atomic.hpp"
+#include "PolyRing.hpp"
+#include <memtailor.h>
+#include <limits>
+#include <vector>
+#include <algorithm>
+#include <mutex>
+
+/// Concurrent hashtable mapping from monomials to T with a fixed number of
+/// buckets. Lookups are lockless while insertions grab a lock.
+///
+/// There is no limitation on the number of entries that can be inserted,
+/// but performance will suffer if the ratio of elements to buckets gets
+/// high.
+///
+/// You can insert new values but you cannot change the value that an
+/// already-inserted value maps to. It is possible to clear the table
+/// but this operation is not safe for concurrency.
+template<class T>
+class FixedSizeMonomialMap {
+public:
+  typedef T mapped_type;
+  typedef std::pair<const_monomial, mapped_type> value_type;
+
+  // Construct a hash table with at least requestedBucketCount buckets. There
+  // may be more buckets. Currently the number is rounded up to the next power
+  // of two.
+  FixedSizeMonomialMap(
+    const size_t requestedBucketCount,
+    const PolyRing& ring
+  ):
+    mHashToIndexMask(computeHashMask(requestedBucketCount)),
+    mBuckets(
+      make_unique_array<Atomic<Node*>>(hashMaskToBucketCount(mHashToIndexMask))
+    ),
+    mRing(ring),
+    mNodeAlloc(sizeofNode(ring)) {}
+
+  /// Construct a hash table with at least requestedBucketCount buckets and
+  /// insert the elements from the parameter map.
+  ///
+  /// The parameter map remains a valid object that can satisfy queries.
+  /// However, it is an error to call non-const methods on the map other
+  /// than the destructor. Also, insertions into *this may or may not
+  /// be reflected for queries into map and some of the entries currently
+  /// in map will disappear.
+  FixedSizeMonomialMap(
+    const size_t requestedBucketCount,
+    FixedSizeMonomialMap<T>&& map
+  ):
+    mHashToIndexMask(computeHashMask(requestedBucketCount)),
+    mBuckets(
+      make_unique_array<Atomic<Node*>>(hashMaskToBucketCount(mHashToIndexMask))
+    ),
+    mRing(map.ring()),
+    mNodeAlloc(std::move(map.mNodeAlloc))
+  {
+    const auto tableEnd = map.mBuckets.get() + map.bucketCount();
+    for (auto tableIt = map.mBuckets.get(); tableIt != tableEnd; ++tableIt) {
+      for (Node* node = tableIt->load(); node != 0;) {
+        const size_t index = hashToIndex(mRing.monomialHashValue(node->mono));
+        Node* const next = node->next.load();
+        node->next.store(mBuckets[index].load());
+        mBuckets[index].store(node);
+        node = next;
+      }
+    }
+  }
+
+  /// Return how many elements there are in the hash table.
+  size_t elementCount() const {return mEntryCount;}
+
+  /// Return how many buckets the hash table has.
+  size_t bucketCount() const {
+    return hashMaskToBucketCount(mHashToIndexMask);
+  }
+
+  const PolyRing& ring() const {return mRing;}
+
+  // Returns the value associated to mono or null if there is no such value.
+  const mapped_type* find(const const_monomial mono) const {
+    const HashValue monoHash = mRing.monomialHashValue(mono);
+    const Node* node = bucketAtIndex(hashToIndex(monoHash));
+    for (; node != 0; node = node->next.load(std::memory_order_consume)) {
+      // To my surprise, it seems to be faster to comment out this branch.
+      // I guess the hash table has too few collisions to make it worth it.
+      //if (monoHash != mRing.monomialHashValue(node->mono))
+      //  continue;
+      if (mRing.monomialEqualHintTrue(mono, node->mono))
+        return &node->value;
+    }
+    return 0;
+  }
+
+  // As find on the product a*b.
+  MATHICGB_INLINE const mapped_type* findProduct(
+    const const_monomial a,
+    const const_monomial b
+  ) const {
+    const HashValue abHash = mRing.monomialHashOfProduct(a, b);
+    const Node* node = bucketAtIndex(hashToIndex(abHash));
+    for (; node != 0; node = node->next.load(std::memory_order_consume)) {
+      // To my surprise, it seems to be faster to comment out this branch.
+      // I guess the hash table has too few collisions to make it worth it.
+      //if (abHash != mRing.monomialHashValue(node->mono))
+      //  continue;
+      if (mRing.monomialIsProductOfHintTrue(a, b, node->mono))
+        return &node->value;
+    }
+    return 0;
+  }
+
+  /// As findProduct but looks for a1*b and a2*b at one time.
+  MATHICGB_INLINE
+  std::pair<const mapped_type*, const mapped_type*> findTwoProducts(
+    const const_monomial a1,
+    const const_monomial a2,
+    const const_monomial b
+  ) const {
+    const HashValue a1bHash = mRing.monomialHashOfProduct(a1, b);
+    const HashValue a2bHash = mRing.monomialHashOfProduct(a2, b);
+    const Node* const node1 = bucketAtIndex(hashToIndex(a1bHash));
+    const Node* const node2 = bucketAtIndex(hashToIndex(a2bHash));
+
+    if (node1 != 0 && node2 != 0 && mRing.monomialIsTwoProductsOfHintTrue
+      (a1, a2, b, node1->mono, node2->mono)
+    )
+      return std::make_pair(&node1->value, &node2->value);
+    else
+      return std::make_pair(findProduct(a1, b), findProduct(a2, b));
+  }
+
+  /// Makes value.first map to value.second unless value.first is already
+  /// present in the map - in that case nothing is done. If p is the returned
+  /// pair then *p.first is the value that value.first maps to after the insert
+  /// and p.second is true if an insertion was performed. *p.first will not
+  /// equal value.second if an insertion was not performed - unless the
+  /// inserted value equals the already present value.
+  std::pair<const mapped_type*, bool> insert(const value_type& value) {
+    const std::lock_guard<std::mutex> lockGuard(mInsertionMutex);
+    // find() loads buckets with memory_order_consume, so it may seem like
+    // we need some extra synchronization to make sure that we have the
+    // most up to date view of the bucket that value.first goes in -
+    // what if a pending insert is hiding in a cache of some other processor
+    // somewhere? We in fact have an up to date view of every bucket in the
+    // the table already because inserts only happen while holding the
+    // insertion mutex and by locking that mutex we have synchronized with
+    // all threads that previously did insertions.
+    {
+      const mapped_type* const found = find(value.first);
+      if (found != 0)
+        return std::make_pair(found, false); // key already present
+    }
+
+    const auto node = static_cast<Node*>(mNodeAlloc.alloc());
+    const size_t index = hashToIndex(mRing.monomialHashValue(value.first));
+    // the constructor initializes the first field of node->mono, so
+    // it has to be called before copying the monomial.
+    new (node) Node(bucketAtIndex(index), value.second);
+    {
+      Monomial nodeTmp(node->mono);
+      ring().monomialCopy(value.first, nodeTmp);
+    }
+    // we cannot store with memory_order_relaxed here because unlocking the
+    // lock only synchronizes with threads who later grab the lock - it does
+    // not synchronize with reading threads since they do not grab the lock.
+    mBuckets[index].store(node, std::memory_order_release);
+    return std::make_pair(&node->value, true); // successful insertion
+  }
+
+  /// This operation removes all entries from the table. This operation
+  /// requires synchronization with and mutual exclusion from all other
+  /// clients of *this - you need to supply this synchronization manually.
+  void clearNonConcurrent() {
+#ifdef MATHICGB_DEBUG
+    // requires mutual exclusion from both readers and writers, but we can
+    // only assert on this for the writers.
+    if (mInsertionMutex.try_lock())
+      mInsertionMutex.unlock();
+    else {
+      MATHICGB_ASSERT(false);
+    }
+#endif
+    const auto tableEnd = mBuckets.get() + bucketCount();
+    for (auto tableIt = mBuckets.get(); tableIt != tableEnd; ++tableIt) {
+      // we can store as std::memory_order_relaxed because the client is
+      // already required to ensure synchronization.
+      tableIt->store(0, std::memory_order_relaxed);
+    }
+
+    // This is the reason that we cannot support this operation concurrently -
+    // we have no way to know when it is safe to deallocate the monomials
+    // since readers do no synchronization.
+    mNodeAlloc.freeAllBuffers();
+  }
+
+private:
+  struct Node {
+    Node(Node* next, const mapped_type value): next(next), value(value) {}
+
+    Atomic<Node*> next;
+    const mapped_type value;
+    exponent mono[1];
+  };
+
+  static HashValue computeHashMask(const size_t requestedBucketCount) {
+    // round request up to nearest power of 2.
+    size_t pow2 = 1;
+    while (pow2 < requestedBucketCount && 2 * pow2 != 0)
+      pow2 *= 2;
+    MATHICGB_ASSERT(pow2 > 0 && (pow2 & (pow2 - 1)) == 0); // power of two
+
+    // If casting to a hash value overflows, then we get the maximum
+    // possible number of buckets based on the range of the hash
+    // value type. Only unsigned overflow is defined, so we need
+    // to assert that the hash type is unsigned.
+    static_assert(!std::numeric_limits<HashValue>::is_signed, "");
+    const auto hashToIndexMask = static_cast<HashValue>(pow2 - 1);
+    MATHICGB_ASSERT(pow2 == hashMaskToBucketCount(hashToIndexMask));
+    return hashToIndexMask;
+  }
+
+  static size_t hashMaskToBucketCount(const HashValue mask) {
+    const auto count = static_cast<size_t>(mask) + 1u; // should be power of 2
+    MATHICGB_ASSERT(count > 0 && (count & (count - 1)) == 0); 
+    return count;
+  }
+
+  static size_t sizeofNode(const PolyRing& ring) {
+    return sizeof(Node) - sizeof(exponent) + ring.maxMonomialByteSize();
+  }
+
+  size_t hashToIndex(HashValue hash) const {
+    const auto index = hash & mHashToIndexMask;
+    MATHICGB_ASSERT(index == hash % bucketCount());
+    return index;
+  }
+
+  Node* bucketAtIndex(size_t index) {
+    MATHICGB_ASSERT(index < bucketCount());
+    return mBuckets[index].load(std::memory_order_consume);
+  }
+
+  const Node* bucketAtIndex(size_t index) const {
+    MATHICGB_ASSERT(index < bucketCount());
+    return mBuckets[index].load(std::memory_order_consume);
+  }
+
+  const HashValue mHashToIndexMask;
+  std::unique_ptr<Atomic<Node*>[]> const mBuckets;
+  const PolyRing& mRing;
+  memt::BufferPool mNodeAlloc; // nodes are allocated from here.
+  std::mutex mInsertionMutex;
+};
+
+#endif
diff --git a/src/mathicgb/MonomialMap.hpp b/src/mathicgb/MonomialMap.hpp
index d56933d..24d9d3f 100755
--- a/src/mathicgb/MonomialMap.hpp
+++ b/src/mathicgb/MonomialMap.hpp
@@ -1,552 +1,200 @@
 #ifndef MATHICGB_MONOMIAL_MAP_GUARD
 #define MATHICGB_MONOMIAL_MAP_GUARD
 
+#include "FixedSizeMonomialMap.h"
+#include "Atomic.hpp"
 #include "PolyRing.hpp"
 #include <memtailor.h>
 #include <limits>
 #include <vector>
 #include <algorithm>
-
-/// A mapping from monomials to MapToType. The interface is the same as
-/// for STL maps. The interface is not complete. Add methods you need when
-/// you need it.
-template<class MapToType>
-class MonomialMap;
-
-template<class MTT>
-class FixedSizeMonomialMap {
-public:
-  typedef MTT mapped_type;
-  typedef std::pair<const_monomial, mapped_type> value_type;
-
-private:
-  struct Node {
-    Node* next;
-    mapped_type value;
-    exponent mono[1];
-  };
-
-  static HashValue computeHashMask(const size_t requestedBucketCount) {
-    // round request up to nearest power of 2.
-    size_t pow2 = 1;
-    while (pow2 < requestedBucketCount && 2 * pow2 != 0)
-      pow2 *= 2;
-    MATHICGB_ASSERT(pow2 > 0 && (pow2 & (pow2 - 1)) == 0); // power of two
-
-    // If casting to a hash value overflows, then we get the maximum
-    // possible number of buckets based on the range of the hash
-    // value type. Only unsigned overflow is defined, so we need
-    // to assert that the hash type is unsigned.
-    static_assert(!std::numeric_limits<HashValue>::is_signed, "");
-    const auto hashToIndexMask = static_cast<HashValue>(pow2 - 1);
-    MATHICGB_ASSERT(pow2 == hashMaskToBucketCount(hashToIndexMask));
-    return hashToIndexMask;
-  }
-
-  static size_t hashMaskToBucketCount(const HashValue mask) {
-    const auto count = static_cast<size_t>(mask) + 1u; // should be power of 2
-    MATHICGB_ASSERT(count > 0 && (count & (count - 1)) == 0); 
-    return count;
-  }
-
-  static size_t sizeofNode(const PolyRing& ring) {
-    return sizeof(Node) - sizeof(exponent) + ring.maxMonomialByteSize();
-  }
-
+#include <mutex>
+
+/// A concurrent hash map from monomials to T. This map can resize itself
+/// if there are too few buckets compared to entries.
+///
+/// Queries are supported through a MonomialMap::Reader object. On resize all
+/// previous readers are subject to permanent spurious misses -
+/// querying clients need to grab a fresh reader to confirm misses. Grabbing
+/// a reader incurs synchronization so do not do it for every query.
+///
+/// External synchronization with writers is required if spurious misses are
+/// not acceptable. There are no spurious hits. If misses are very rare then
+/// reads can be done with minimal overhead by following this pattern:
+///
+///  1) grab a reader X
+///  2) perform queries on X until done or there is a miss
+///  3) replace X with a fresh reader Y
+///  4) go to 2 if the miss is now a hit
+///  5) grab a lock shared with all writers
+///  6) perform the query again - a miss is now guaranteed to be accurate
+///  7) release the lock after the processing of the miss is done and go to 2
+///
+/// There is no way to avoid locking if spurious misses are not acceptable
+/// as otherwise a writer could make an insertion of the requested key at any
+/// time while processing the miss - which then makes the miss spurious
+/// after-the-fact.
+template<class T>
+class MonomialMap {
 public:
-  size_t bucketCount() const {
-    return hashMaskToBucketCount(mHashToIndexMask);
-  }
+  typedef T mapped_type;
+  typedef FixedSizeMonomialMap<mapped_type> FixedSizeMap;
+  typedef typename FixedSizeMap::value_type value_type;
 
-  FixedSizeMonomialMap(
-    const size_t requestedBucketCount,
-    const PolyRing& ring
-  ):
-    mHashToIndexMask(computeHashMask(requestedBucketCount)),
-    mBuckets(new Node*[hashMaskToBucketCount(mHashToIndexMask)]),
-    mRing(ring),
-    mNodeAlloc(sizeofNode(ring)),
-    mEntryCount(0)
+  MonomialMap(const PolyRing& ring):
+    mMap(new FixedSizeMap(InitialBucketCount, ring)),
+    mCapacityUntilGrowth
+    (maxEntries(mMap.load(std::memory_order_relaxed)->bucketCount())),
+    mRing(ring)
   {
-    std::fill_n(mBuckets, bucketCount(), static_cast<Node*>(0));
+    // We can load mMap as std::memory_order_relaxed because we just stored it
+    // and the constructor cannot run concurrently.
   }
 
-  ~FixedSizeMonomialMap() {
-    delete[] mBuckets;
-  }
-
-  /// map must not be mutated except to destruct it after this operation.
-  /// Queries into map will continue to give either a correct result or
-  /// a spurious miss. There will not be spurious hits.
-  FixedSizeMonomialMap(
-    const size_t requestedBucketCount,
-    FixedSizeMonomialMap<MTT>&& map
-  ):
-    mHashToIndexMask(computeHashMask(requestedBucketCount)),
-    mBuckets(new Node*[hashMaskToBucketCount(mHashToIndexMask)]),
-    mRing(map.ring()),
-    mNodeAlloc(std::move(map.mNodeAlloc)),
-    mEntryCount(0)
-  {
-    std::fill_n(mBuckets, bucketCount(), static_cast<Node*>(0));
-    const auto tableEnd = map.mBuckets + map.bucketCount();
-    for (auto tableIt = map.mBuckets; tableIt != tableEnd; ++tableIt) {
-      for (Node* node = *tableIt; node != 0;) {
-        const size_t index = hashToIndex(mRing.monomialHashValue(node->mono));
-        Node* const next = node->next;
-        node->next = mBuckets[index];
-        mBuckets[index] = node;
-        node = next;
-      }
-    }
+  ~MonomialMap() {
+    delete mMap.load();
   }
 
   const PolyRing& ring() const {return mRing;}
-  size_t size() const {return mEntryCount;}
 
-  const mapped_type* find(const const_monomial mono) const {
-    const HashValue monoHash = mRing.monomialHashValue(mono);
-    const Node* node = bucketAtIndex(hashToIndex(monoHash));
-    for (; node != 0; node = node->next) {
-      // To my surprise, it seems to be faster to comment out this branch.
-      // I guess the hash table has too few collisions to make it worth it.
-      //if (monoHash != mRing.monomialHashValue(node->mono))
-      //  continue;
-      if (mRing.monomialEqualHintTrue(mono, node->mono))
-        return &node->value;
-    }
-    return 0;
-  }
-
-  MATHICGB_INLINE const mapped_type* findProduct(
-    const const_monomial a,
-    const const_monomial b
-  ) const {
-    const HashValue abHash = mRing.monomialHashOfProduct(a, b);
-    const Node* node = bucketAtIndex(hashToIndex(abHash));
-    for (; node != 0; node = node->next) {
-      // To my surprise, it seems to be faster to comment out this branch.
-      // I guess the hash table has too few collisions to make it worth it.
-      //if (abHash != mRing.monomialHashValue(node->mono))
-      //  continue;
-      if (mRing.monomialIsProductOfHintTrue(a, b, node->mono))
-        return &node->value;
-    }
-    return 0;
-  }
-
-  /// As findProduct but looks for a1*b and a2*b at one time.
-  MATHICGB_INLINE
-  std::pair<const mapped_type*, const mapped_type*> findTwoProducts(
-    const const_monomial a1,
-    const const_monomial a2,
-    const const_monomial b
-  ) const {
-    const HashValue a1bHash = mRing.monomialHashOfProduct(a1, b);
-    const HashValue a2bHash = mRing.monomialHashOfProduct(a2, b);
-    const Node* const node1 = bucketAtIndex(hashToIndex(a1bHash));
-    const Node* const node2 = bucketAtIndex(hashToIndex(a2bHash));
-
-    if (node1 != 0 && node2 != 0 && mRing.monomialIsTwoProductsOfHintTrue
-      (a1, a2, b, node1->mono, node2->mono)
-    )
-      return std::make_pair(&node1->value, &node2->value);
-    else
-      return std::make_pair(findProduct(a1, b), findProduct(a2, b));
-  }
-
-  void insert(const value_type& value) {
-    Node* node = static_cast<Node*>(mNodeAlloc.alloc());
-    const size_t index = hashToIndex(mRing.monomialHashValue(value.first));
-    {
-      Monomial nodeTmp(node->mono);
-      ring().monomialCopy(value.first, nodeTmp);
-    }
-    new (&node->value) mapped_type(value.second);
-    node->next = bucketAtIndex(index);
-    mBuckets[index] = node;
-    ++mEntryCount;
-  }
-
-  void clear() {
-    std::fill_n(mBuckets, bucketCount(), static_cast<Node*>(0));
-    mNodeAlloc.freeAllBuffers();
-    mEntryCount = 0;
-  }
-
-private:
-  size_t hashToIndex(HashValue hash) const {
-    const auto index = hash & mHashToIndexMask;
-    MATHICGB_ASSERT(index == hash % bucketCount());
-    return index;
-  }
-
-  Node* bucketAtIndex(size_t index) {
-    MATHICGB_ASSERT(index < bucketCount());
-    return mBuckets[index];
-  }
-
-  const Node* bucketAtIndex(size_t index) const {
-    MATHICGB_ASSERT(index < bucketCount());
-    return mBuckets[index];
-  }
-
-  const HashValue mHashToIndexMask;
-  Node** const mBuckets;
-  const PolyRing& mRing;
-  memt::BufferPool mNodeAlloc; // nodes are allocated from here.
-  size_t mEntryCount;
-};
-
-
-
-namespace MonomialMapInternal {
-  // The map type MapClass is defined here. This is here in
-  // this namespace to avoid cluttering the class definition with
-  // a lot of ifdef's.
-
-  template<class MTT>
-  class MapClass {
+  /// All queries are performed through a Reader. Readers are subject to
+  /// permanent spurious misses on hash map resize. Grab a fresh reader
+  /// on misses to confirm them. Making a Reader imposes synchronization
+  /// overhead. External locking is required to guarantee that a miss is
+  /// genuine since there is no mutual exclusion between queries and
+  /// insertions.
+  ///
+  /// It is intentional that a reader does not have an update() method. The
+  /// purpose of this is to make the internal hash table pointer const inside
+  /// the class which guarantees to the compiler that it will never change.
+  /// This in turn allows the compiler to store the fields of the internal
+  /// hash table pointer such as the hash map mask. If there were an update()
+  /// method this would not be possible and the compiler might have to load
+  /// that mask for every query. I made it this way because I was seeming
+  /// a performance regression of several percent which then went away with
+  /// this solution. (well actually it's not a reference, which is the same
+  /// thing as a const pointer).
+  class Reader {
   public:
-    typedef MapClass Map;
-    typedef std::pair<const_monomial, MTT> value_type;
-    typedef MTT mapped_type;
-
-  private:
-    struct Node {
-      Node* next;
-      mapped_type value;
-      exponent mono[1];
-    };
-
-  public:
-    MapClass(const PolyRing& ring, size_t bucketCount = 400000):
-      mRing(ring),
-      mTable(),
-      mNodeAlloc(sizeof(Node) - sizeof(exponent) + ring.maxMonomialByteSize())
+    Reader(const MonomialMap<T>& map):
+      mMap(*map.mMap.load(std::memory_order_seq_cst))
     {
-      mGrowWhenThisManyEntries = 0;
-      mTableSize = mTable.size();
-      mEntryCount = 0;
-      growTable();
-    }
-
-    const PolyRing& ring() const {return mRing;}
-
-    MATHICGB_INLINE const mapped_type* findProduct(
-      const const_monomial a,
-      const const_monomial b
-    ) const {
-      return reader().findProduct(a, b);
+      // We grab the hash table pointer with std::memory_order_seq_cst in order
+      // to force a CPU cache flush - in this way we are more likely to get an
+      // up to date value.
     }
 
-    /// As findProduct but looks for a1*b and a2*b at one time.
-    MATHICGB_INLINE
-    std::pair<const mapped_type*, const mapped_type*> findTwoProducts(
-      const const_monomial a1,
-      const const_monomial a2,
-      const const_monomial b
-    ) const {
-      return reader().findTwoProducts(a1, a2, b);
-    }
-
-    void insert(const value_type& value) {
-      Node* node = static_cast<Node*>(mNodeAlloc.alloc());
-      const size_t index = hashToIndex(mRing.monomialHashValue(value.first));
-      {
-        Monomial nodeTmp(node->mono);
-        ring().monomialCopy(value.first, nodeTmp);
-      }
-      new (&node->value) mapped_type(value.second);
-      node->next = entry(index);
-      mTable[index] = node;
-      ++mEntryCount;
-      MATHICGB_ASSERT(mEntryCount <= mGrowWhenThisManyEntries);
-      if (mEntryCount == mGrowWhenThisManyEntries)
-        growTable();
-    }
-
-    void clear() {
-      std::fill(mTable.begin(), mTable.end(), static_cast<Node*>(0));
-      mNodeAlloc.freeAllBuffers();
-      mEntryCount = 0;
-    }
-
-    size_t size() const {return mEntryCount;}
-
-    class MapReader {
-    public:
-      const mapped_type* find(const const_monomial mono) const {
-        const HashValue monoHash = mRing.monomialHashValue(mono);
-        const Node* node = entry(hashToIndex(monoHash));
-        for (; node != 0; node = node->next) {
-          // To my surprise, it seems to be faster to comment out this branch.
-          // I guess the hash table has too few collisions to make it worth it.
-          //if (monoHash != mRing.monomialHashValue(node->mono))
-          //  continue;
-          if (mRing.monomialEqualHintTrue(mono, node->mono))
-            return &node->value;
-        }
-        return 0;
-      }
-
-      const PolyRing& ring() {return mRing;}
-
-      size_t bucketCount() const {
-        MATHICGB_ASSERT(mHashToIndexMask <
-          std::numeric_limits<decltype(mHashToIndexMask)>::max());
-        return mHashToIndexMask + 1;
-      }
-
-      MATHICGB_INLINE const mapped_type* findProduct(
-        const const_monomial a,
-        const const_monomial b
-      ) const {
-        const HashValue abHash = mRing.monomialHashOfProduct(a, b);
-        const Node* node = entry(hashToIndex(abHash));
-        for (; node != 0; node = node->next) {
-          // To my surprise, it seems to be faster to comment out this branch.
-          // I guess the hash table has too few collisions to make it worth it.
-          //if (abHash != mRing.monomialHashValue(node->mono))
-          //  continue;
-          if (mRing.monomialIsProductOfHintTrue(a, b, node->mono))
-            return &node->value;
-        }
-        return 0;
-      }
-
-      /// As findProduct but looks for a1*b and a2*b at one time.
-      MATHICGB_INLINE
-      std::pair<const mapped_type*, const mapped_type*> findTwoProducts(
-        const const_monomial a1,
-        const const_monomial a2,
-        const const_monomial b
-      ) const {
-        const HashValue a1bHash = mRing.monomialHashOfProduct(a1, b);
-        const HashValue a2bHash = mRing.monomialHashOfProduct(a2, b);
-        const Node* const node1 = entry(hashToIndex(a1bHash));
-        const Node* const node2 = entry(hashToIndex(a2bHash));
-
-        if (node1 != 0 && node2 != 0 && mRing.monomialIsTwoProductsOfHintTrue
-          (a1, a2, b, node1->mono, node2->mono)
-        )
-          return std::make_pair(&node1->value, &node2->value);
-        else
-          return std::make_pair(findProduct(a1, b), findProduct(a2, b));
-      }
-
-    private:
-      friend class MapClass<MTT>;
-      size_t hashToIndex(const HashValue hash) const {
-        const auto index = hash & mHashToIndexMask;
-        MATHICGB_ASSERT(index == hash % bucketCount());
-        return index;
-      }
-
-      const Node* entry(const size_t index) const {
-        MATHICGB_ASSERT(index < bucketCount());
-        return mTable[index];
-      }
-
-      MapReader(
-        const PolyRing& ring,
-        const Node* const* const table,
-        const HashValue mHashToIndexMask
-      ):
-        mRing(ring),
-        mTable(table),
-        mHashToIndexMask(mHashToIndexMask) {}
-
-      const PolyRing& mRing;
-      const Node* const* const mTable;
-      const HashValue mHashToIndexMask;
-    };
-
-    const mapped_type* find(const const_monomial mono) const {
-      return reader().find(mono);
-    }
-
-    const MapReader reader() const {
-      return MapReader(ring(), mTable.data(), mHashToIndexMask);
-    }
-
-  private:
-    size_t hashToIndex(HashValue hash) const {
-      const auto index = hash & mHashToIndexMask;
-      MATHICGB_ASSERT(index == hash % mTable.size());
-      return index;
-    }
-
-    Node* entry(size_t index) {
-      MATHICGB_ASSERT(index < mTable.size());
-      return mTable[index];
-    }
-
-    const Node* entry(size_t index) const {
-      MATHICGB_ASSERT(index < mTable.size());
-      return mTable[index];
-    }
-
-    void growTable() {
-      // Determine parameters for larger hash table
-      const size_t initialTableSize = 1 << 16; // must be a power of two!!!
-      const float maxLoadFactor = 0.33f; // max value of size() / mTable.size()
-      const float growthFactor = 2.0f; // multiply table size by this on growth
-
-      const size_t newTableSize = mTable.empty() ?
-        initialTableSize :
-        static_cast<size_t>(mTable.size() * growthFactor + 0.5f); // round up
-      const auto newGrowWhenThisManyEntries =
-        static_cast<size_t>(newTableSize / maxLoadFactor); // round down
-
-      MATHICGB_ASSERT((newTableSize & (newTableSize - 1)) == 0); // power of two
-
-      // Move nodes from current table into new table
-      decltype(mTable) newTable(newTableSize);
-      HashValue newHashToIndexMask = static_cast<HashValue>(newTableSize - 1);
-      const auto tableEnd = mTable.end();
-      for (auto tableIt = mTable.begin(); tableIt != tableEnd; ++tableIt) {
-        Node* node = *tableIt;
-        while (node != 0) {
-          const size_t index =
-            mRing.monomialHashValue(node->mono) & newHashToIndexMask;
-          MATHICGB_ASSERT(index < newTable.size());
-          Node* const next = node->next;
-          node->next = newTable[index];
-          newTable[index] = node;
-          node = next;
-        }
-      }
-
-      // Move newly calculated table into place
-      mTableSize = newTableSize;      
-      mTable = std::move(newTable);
-      mHashToIndexMask = newHashToIndexMask;
-      mGrowWhenThisManyEntries = newGrowWhenThisManyEntries;
-
-      MATHICGB_ASSERT(mTableSize < mGrowWhenThisManyEntries);
-    }
-
-    size_t mGrowWhenThisManyEntries;
-    size_t mTableSize;
-    HashValue mHashToIndexMask;
-    size_t mEntryCount;
-    const PolyRing& mRing;
-    std::vector<Node*> mTable;
-    memt::BufferPool mNodeAlloc; // nodes are allocated from here.
-  };
-}
-
-
-template<class MTT>
-class MonomialMap {
-public:
-  typedef MTT mapped_type;
-  //typedef MonomialMapInternal::MapClass<MTT> MapType;
-  typedef FixedSizeMonomialMap<MTT> MapType;
-
-  //typedef typename MapType::Map::iterator iterator;
-  //typedef typename MapType::Map::const_iterator const_iterator;
-  typedef typename MapType::value_type value_type;
-
-  /// Can be used for non-mutating accesses to the monomial map. This reader
-  /// can miss queries that should hit if the map is mutated after the
-  /// reader was constructed - it may only give access to a subset of the
-  /// entries in this case. The only guarantee is that if the reader
-  /// reports a hit then it really is a hit and spurious misses only happen
-  /// after the map has been mutated.
-  class SubsetReader {
-  public:
-    SubsetReader(const MonomialMap<MTT>& map): mMap(map.mMap.get()) {}
-
-    const mapped_type* find(const_monomial m) const {
-      return mMap->find(m);
+    /// Returns the value that mono maps to or null if no such key has been
+    /// inserted. Misses can be spurious! Read the comments on the parent
+    /// class.
+    const mapped_type* find(const_monomial mono) const {
+      return mMap.find(mono);
     }
 
+    // As find but looks for the product of a and b.
     const mapped_type* findProduct(
       const const_monomial a,
       const const_monomial b
     ) const {
-      return mMap->findProduct(a, b);
+      return mMap.findProduct(a, b);
     }
 
+    /// As findProduct() but looks for the two products a1*b and a2*b
+    /// simultaneously. The purpose of this is similar to that of unrolling a
+    /// loop.
     MATHICGB_INLINE
     std::pair<const mapped_type*, const mapped_type*> findTwoProducts(
       const const_monomial a1,
       const const_monomial a2,
       const const_monomial b
     ) const {
-      return mMap->findTwoProducts(a1, a2, b);
+      return mMap.findTwoProducts(a1, a2, b);
     }
 
   private:
-    const FixedSizeMonomialMap<MTT>* const mMap;
+    const FixedSizeMonomialMap<T>& mMap;
   };
 
-  MonomialMap(const PolyRing& ring):
-    mMap(make_unique<MapType>(InitialBucketCount, ring)),
-    mCapacityUntilGrowth(maxEntries(mMap->bucketCount())) {}
-
-  const mapped_type* find(const_monomial m) const {
-    return mMap->find(m);
-  }
-
-  const mapped_type* findProduct(
-    const const_monomial a,
-    const const_monomial b
-  ) const {
-    return mMap->findProduct(a, b);
-  }
-
-  MATHICGB_INLINE
-  std::pair<const mapped_type*, const mapped_type*> findTwoProducts(
-    const const_monomial a1,
-    const const_monomial a2,
-    const const_monomial b
-  ) const {
-    return mMap->findTwoProducts(a1, a2, b);
-  }
-
-  //size_t size() const {return mMap->size();}
-  void clear() {mMap->clear();}
-
-  void insert(const value_type& val) {
+  /// Removes all entries from the hash table. This requires mutual exclusion
+  /// from and synchronization with all readers and writers.
+  void clearNonConcurrent() {mMap.load()->clearNonConcurrent();}
+
+  /// Makes value.first map to value.second unless value.first is already
+  /// present in the map - in that case nothing is done. If p is the returned
+  /// pair then *p.first is the value that value.first maps to after the insert
+  /// and p.second is true if an insertion was performed. *p.first will not
+  /// equal value.second if an insertion was not performed - unless the
+  /// inserted value equals the already present value.
+  std::pair<const mapped_type*, bool> insert(const value_type& value) {
+    const std::lock_guard<std::mutex> lockGuard(mInsertionMutex);
+
+    // We can load mMap as std::memory_order_relaxed because we have already
+    // synchronized with all other mutators by locking mInsertionMutex;
+    auto map = mMap.load(std::memory_order_relaxed);
+
+    // this is a loop since it is possible to set the growth factor and
+    // the initial size so low that several rounds are required. This should
+    // only happen when debugging as otherwise such low parameters are
+    // not a good idea.
     while (mCapacityUntilGrowth == 0) {
-      const size_t currentSize = size();
-      if (mMap->bucketCount() >
+      // Resize the table by making a bigger one and using that instead.
+      if (map->bucketCount() > // check overflow
         std::numeric_limits<size_t>::max() / GrowthFactor)
       {
         throw std::bad_alloc();
       }
-      const size_t newBucketCount = mMap->bucketCount() * GrowthFactor;
-      auto nextMap = make_unique<MapType>(newBucketCount, std::move(*mMap));
-      mOldMaps.push_back(std::move(mMap));
-      mMap = std::move(nextMap);
-      mCapacityUntilGrowth = maxEntries(mMap->bucketCount()) - currentSize;
+      const size_t newBucketCount = map->bucketCount() * GrowthFactor;
+      auto nextMap =
+        make_unique<FixedSizeMap>(newBucketCount, std::move(*map));
+      mOldMaps.emplace_back(map);
+      mCapacityUntilGrowth =
+        maxEntries(nextMap->bucketCount()) - maxEntries(map->bucketCount());
+
+      // Store with std::memory_order_seq_cst to force a memory flush so that
+      // readers see the new table as soon as possible.
+      map = nextMap.release();
+      mMap.store(map, std::memory_order_seq_cst);
     }
-    mMap->insert(val);
     MATHICGB_ASSERT(mCapacityUntilGrowth > 0);
-    --mCapacityUntilGrowth;
-  }
 
-  size_t size() const {
-    return maxEntries(mMap->bucketCount()) - mCapacityUntilGrowth;
+    auto p = map->insert(value);
+    if (p.second)
+      --mCapacityUntilGrowth;
+    return p;
   }
 
-  const PolyRing& ring() const {return mMap->ring();}
+  /// Return the number of entries. This method requires locking so do not
+  /// call too much. The count may have 
+  size_t entryCount() const {
+    const std::lock_guard<std::mutex> lockGuard(mInsertMutex);
+    // We can load with std::memory_order_relaxed because we are holding the
+    // lock.
+    auto& map = *mMap.load(std::memory_order_relaxed);
+    return maxEntries(map.bucketCount()) - mCapacityUntilGrowth;
+  }
 
 private:
   static const size_t MinBucketsPerEntry = 3; // inverse of max load factor
   static const size_t GrowthFactor = 2;
-  static const size_t InitialBucketCount = 1 << 16;
+  static const size_t InitialBucketCount = 1 << 1;
 
   static size_t maxEntries(const size_t bucketCount) {
     return (bucketCount + (MinBucketsPerEntry - 1)) / MinBucketsPerEntry;
   }
 
-  std::unique_ptr<MapType> mMap;
+  Atomic<FixedSizeMap*> mMap;
+  const PolyRing& mRing;
+  std::mutex mInsertionMutex;
+
+  /// Only access this field while holding the mInsertionMutex lock.
   size_t mCapacityUntilGrowth;
-  std::vector<std::unique_ptr<MapType>> mOldMaps;
+
+  /// Only access this field while holding the mInsertionMutex lock.
+  /// Contains the old hash tables that we discarded on resize. We have to
+  /// keep these around as we have no way to determine if there are still
+  /// readers looking at them. This could be changed at the cost of
+  /// more overhead in the Reader constructor and destructor.
+  std::vector<std::unique_ptr<FixedSizeMap>> mOldMaps;
 };
 
 #endif
diff --git a/src/mathicgb/QuadMatrixBuilder.cpp b/src/mathicgb/QuadMatrixBuilder.cpp
index dc3d776..a1ae117 100755
--- a/src/mathicgb/QuadMatrixBuilder.cpp
+++ b/src/mathicgb/QuadMatrixBuilder.cpp
@@ -69,7 +69,7 @@ namespace {
   {
     MATHICGB_ASSERT(top.colCount() == bottom.colCount());
     MATHICGB_ASSERT(toMonomial.size() == bottom.colCount());
-    MATHICGB_ASSERT(toCol.find(mono) == 0);
+    MATHICGB_ASSERT(typename ToCol::Reader(toCol).find(mono) == 0);
 
     const QuadMatrixBuilder::ColIndex colCount = top.colCount();
     if (colCount == std::numeric_limits<QuadMatrixBuilder::ColIndex>::max())
@@ -107,7 +107,6 @@ QuadMatrixBuilder::LeftRightColIndex QuadMatrixBuilder::createColumnLeft
      mMonomialToCol,
      ring(),
      true);
-  MATHICGB_ASSERT(mMonomialToCol.size() == leftColCount() + rightColCount());
   MATHICGB_ASSERT
     (findColumn(monomialToBeCopied).leftIndex() == leftColCount() - 1);
 }
@@ -122,7 +121,6 @@ QuadMatrixBuilder::LeftRightColIndex QuadMatrixBuilder::createColumnRight
      mMonomialToCol,
      ring(),
      false);
-  MATHICGB_ASSERT(mMonomialToCol.size() == leftColCount() + rightColCount());
   MATHICGB_ASSERT
     (findColumn(monomialToBeCopied).rightIndex() == rightColCount() - 1);
 }
@@ -321,7 +319,7 @@ QuadMatrix QuadMatrixBuilder::buildMatrixAndClear() {
   mMonomialsLeft.clear();
   mMonomialsRight.clear();
 
-  mMonomialToCol.clear();
+  mMonomialToCol.clearNonConcurrent();
 
   MATHICGB_ASSERT(out.debugAssertValid());
   return std::move(out);
diff --git a/src/mathicgb/QuadMatrixBuilder.hpp b/src/mathicgb/QuadMatrixBuilder.hpp
index 02bf9a4..964da39 100755
--- a/src/mathicgb/QuadMatrixBuilder.hpp
+++ b/src/mathicgb/QuadMatrixBuilder.hpp
@@ -183,8 +183,7 @@ class QuadMatrixBuilder {
 
   // *** Querying columns
 
-  typedef const MonomialMap<LeftRightColIndex>::SubsetReader
-    ColSubsetReader;
+  typedef const MonomialMap<LeftRightColIndex>::Reader ColReader;
   const MonomialMap<LeftRightColIndex>& columnToIndexMap() const {
     return mMonomialToCol;
   }
@@ -193,7 +192,7 @@ class QuadMatrixBuilder {
     left and right side. Returns an invalid index if no such column
     exists. */
   LeftRightColIndex findColumn(const_monomial findThis) const {
-    auto it = mMonomialToCol.find(findThis);
+    auto it = ColReader(mMonomialToCol).find(findThis);
     if (it != 0)
       return *it;
     else
@@ -206,7 +205,7 @@ class QuadMatrixBuilder {
     const const_monomial a,
     const const_monomial b
   ) const {
-    const auto it = mMonomialToCol.findProduct(a, b);
+    const auto it = ColReader(mMonomialToCol).findProduct(a, b);
     if (it != 0)
       return *it;
     else
@@ -220,7 +219,7 @@ class QuadMatrixBuilder {
     const const_monomial a2,
     const const_monomial b
   ) const {
-    const auto it = mMonomialToCol.findTwoProducts(a1, a2, b);
+    const auto it = ColReader(mMonomialToCol).findTwoProducts(a1, a2, b);
     return std::make_pair(
       it.first != 0 ? *it.first : LeftRightColIndex(),
       it.second != 0 ? *it.second : LeftRightColIndex());
diff --git a/src/mathicgb/stdinc.h b/src/mathicgb/stdinc.h
index b0926c9..b91cada 100755
--- a/src/mathicgb/stdinc.h
+++ b/src/mathicgb/stdinc.h
@@ -61,6 +61,13 @@
 // so the warning is turned off.
 #pragma warning (disable: 4355)
 
+#if defined (_M_IX86) || defined(_M_X64) // if on x86 (32 bit) or x64 (64 bit)
+#define MATHICGB_USE_CUSTOM_ATOMIC_X86_X64_MSVC_4BYTE
+#endif
+#ifdef _M_X64 // if on x64 (64 bit)
+#define MATHICGB_USE_CUSTOM_ATOMIC_X86_X64_MSVC_8BYTE
+#endif
+
 #elif defined (__GNUC__) // GCC compiler
 
 #define MATHICGB_NO_INLINE __attribute__((noinline))

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mathicgb.git