[mathic] 23/62: Hashtable code now compiles, but is not debugged yet.

Doug Torrance dtorrance-guest at moszumanska.debian.org
Wed Apr 1 11:36:20 UTC 2015


This is an automated email from the git hooks/post-receive script.

dtorrance-guest pushed a commit to branch master
in repository mathic.

commit 8e5acc8c536367b07efc2fdc5c2755b4f9ffd59b
Author: Mike Stillman <mikestillman1 at gmail.com>
Date:   Wed Aug 1 15:36:45 2012 -0400

    Hashtable code now compiles, but is not debugged yet.
---
 src/mathic/HashTable.h | 227 +++++++++++++++++++++++++++++++++++++++++++++----
 src/test/HashTable.cpp |  17 +++-
 2 files changed, 225 insertions(+), 19 deletions(-)

diff --git a/src/mathic/HashTable.h b/src/mathic/HashTable.h
index 399dfbc..08525a9 100644
--- a/src/mathic/HashTable.h
+++ b/src/mathic/HashTable.h
@@ -11,10 +11,13 @@
 #ifndef MATHIC_HASHTABLE_GUARD
 #define MATHIC_HASHTABLE_GUARD
 
-#include <memtailor.h>
 #include "stdinc.h"
+
+#include <memtailor.h>
+#include <cmath>
 #include <utility>
 #include <string>
+
 namespace mathic {
 
   template<class Configuration> 
@@ -25,8 +28,8 @@ namespace mathic {
     typedef int Value;
 
     size_t hash(Key k);
-    bool equals(Key k1, Key k2);
-    bool combine(Value &a, const Value &b);
+    bool keysEqual(Key k1, Key k2);
+    void combine(Value &a, const Value &b);
   };
 
   template<class Configuration> 
@@ -36,9 +39,15 @@ namespace mathic {
   class HashTable {
   public:
     typedef C Configuration;
-    typedef void * node;
     typedef typename C::Key Key;
     typedef typename C::Value Value;
+
+    struct Node {
+      Node *next;
+      Key key;
+      Value value;
+    };
+
     // Create a hash table
     HashTable(const Configuration &conf, unsigned int nbits = 10);
 
@@ -55,14 +64,14 @@ namespace mathic {
     // If combine returns false, then remove the node from the hash table.
     // and return std::pair(false, ...)
     // else return std::pair(true, node in the hash table).
-    std::pair<bool, node *> insert(const Key &k, const Value &v);
+    std::pair<bool, Node *> insert(const Key &k, const Value &v);
 
     // remove 'p' from the hash table.  'p' itself is not removed???!
-    void remove(node *p);
+    void remove(Node *p);
 
-    const Key &key(node *p) const { return static_cast<Node *>(p)->k; }
+    const Key &key(Node *p) const {return p->key;}
 
-    const Value &value(node *p) const { return static_cast<Node *>(p)->v; }
+    const Value &value(Node *p) const {return p->value;}
 
     void reset(); // Major assumption: all nodes have been removed from the table already
 
@@ -77,25 +86,33 @@ namespace mathic {
     std::string name() const;
 
   private:
-    struct Node {
-      Node *next;
-      Key k;
-      Value v;
-    };
+    Node * makeNode(const Key &k, const Value &v);
+    void grow(unsigned int nbits);
 
+    // Used for consistency checking.  Returns the number of nodes in the table.
+    // Should match mNodeCount.
+    size_t computeNodeCount() const;
 
+    // Used for consistency checking.  Returns the number of nonempty bins in the hash table.
+    // Should match mBinCount.
+    size_t computeBinCount() const;
+  
     size_t mHashMask; // this is the number, in binary:  00001111...1, where
                       // the number of 1's is mLogTableSize
-
     size_t mTableSize;
     size_t mLogTableSize; // number of bits in the table: mTableSize should be 2^mLogTableSize
 
     size_t mNodeCount;  // current number of nodes in the hash table
     size_t mBinCount; // number of nonzero bins
+
     size_t mMaxCountBeforeRebuild;
 
+    // tweakable parameters
+    double mRebuildThreshold;
+    bool mAlwaysInsertAtEnd;
+
     memt::BufferPool mNodePool;
-    std::vector<node *> mHashTable;
+    std::vector<Node *> mHashTable;
     Configuration mConf;
   };
 
@@ -104,16 +121,189 @@ namespace mathic {
     mLogTableSize(nbits),
     mTableSize(1 << nbits),
     mHashMask((1 << nbits) - 1),
+    mNodeCount(0),
+    mBinCount(0),
+    mRebuildThreshold(0.1),
+    mAlwaysInsertAtEnd(true),
     mNodePool(sizeof(Node)),
-    mConf(conf)
+    mConf(conf) 
   {
     mHashTable.resize(mTableSize);
-    mMaxCountBeforeRebuild = mConf.rebuildThreshold * mTableSize;
-
+    mMaxCountBeforeRebuild = mRebuildThreshold * mTableSize;
+    
     MATHIC_ASSERT(tableIsZero(mHashTable));
   }
+  
+  template<class C>
+  void HashTable<C>::reset() {
+    mNodePool.freeAllBuffers();
+  }
+
+  template<class C>
+  typename HashTable<C>::Node *HashTable<C>::makeNode(const Key &k, const Value &v)
+  {
+    mNodeCount++;
+    Node *result = static_cast<Node *>(mNodePool.alloc());
+    result->next = 0;
+    result->key = k;
+    result->value = v;
+    return result;
+  }
+
+  template<class C>
+  std::pair<bool, typename HashTable<C>::Node *> HashTable<C>::insert(const Key &k, const Value &v) 
+  {
+    size_t fullHashVal = mConf.hashValue(k);
+    size_t hashval = fullHashVal & mHashMask;
+    
+    MATHIC_ASSERT(hashval < mHashTable.size());
+    Node *tmpNode = mHashTable[hashval];
+    Node *result = 0;
+    if (tmpNode == 0)
+      {
+	result = makeNode(k,v);
+	mHashTable[hashval] = result;
+      }
+    else
+      {
+	while (true)
+	  {
+	    if (mConf.keysEqual(tmpNode->key, k))
+	      {
+		mConf.combine(tmpNode->value, v);
+		result = tmpNode;
+		return std::pair<bool,Node *>(false,result);
+	      }
+	    if (tmpNode->next == 0)
+	      {
+		// time to insert the monomial
+		result = makeNode(k, v);
+		if (mAlwaysInsertAtEnd)
+		  {
+		    tmpNode->next = result;
+		  }
+		else
+		  {
+		    result->next = mHashTable[hashval];
+		    mHashTable[hashval] = result;
+		  }
+		break;
+	      }
+	    tmpNode = tmpNode->next;
+	  }
+      }
     
+    if (mNodeCount > mMaxCountBeforeRebuild)
+      grow(mLogTableSize + 2);  // increase by a factor of 4??
+    
+    MATHIC_ASSERT(computeNodeCount() == mNodeCount);
+    return std::pair<bool, Node *>(true,result);
+  }
+    
+  template<class C>
+  void HashTable<C>::remove(Node *p) 
+  {
+    mNodeCount--;
+    size_t const hashval = mConf.hashvalue(p->key) & mHashMask;
+    Node head;
+    Node* tmpNode = mHashTable[hashval];
+    head.next = tmpNode;
+    for (Node* q = &head; q->next != 0; q = q->next) 
+      {
+	if (q->next == p) 
+	  {
+	    q->next = p->next;
+	    mHashTable[hashval] = head.next;
+	    if (head.next == 0) mBinCount--;
+	    return;
+	  }
+      }
+    // If we get here, then the node is not at its supposed hash value.
+    // That probably means either that the node has been deleted twice
+    // or that the value in the node changed so that its hash value
+    // changed. That is not allowed.
+    MATHIC_ASSERT(false);
+  }
+  
+  template<class C>
+  void HashTable<C>::grow(unsigned int new_nbits) 
+  {
+    MATHIC_ASSERT(computeNodeCount() == mNodeCount);
+    size_t const old_table_size = mTableSize;
+    mTableSize = static_cast<size_t>(1) << new_nbits;
+    mLogTableSize = new_nbits;
+    mHashMask = mTableSize-1;
+    std::vector<Node *> old_table(mTableSize);
+    std::swap(old_table, mHashTable);
+
+    mBinCount = 0;
+    for (size_t i = 0; i < old_table_size; ++i)
+      {
+	Node *p = old_table[i];
+	while (p != 0)
+	  {
+	    Node *q = p;
+	    p = p->next;
+	    q->next = 0;
+	    // Reinsert node.  We know that it is unique
+	    size_t hashval = mConf.hashvalue(q->key) & mHashMask;
+	    Node *r = mHashTable[hashval];
+	    if (r == 0) mBinCount++;
+	    if (r == 0 || !mAlwaysInsertAtEnd) 
+	      {
+		q->next = r;
+		mHashTable[hashval] = q;
+	      }
+	    else
+	      {
+		// put it at the end
+		for ( ; r->next != 0; r = r->next) { }
+		r->next = q;
+	      }
+	  }
+      }
+    
+    mMaxCountBeforeRebuild =
+      static_cast<size_t>(std::floor(mTableSize * mRebuildThreshold));
+    
+    MATHIC_ASSERT(computeNodeCount() == mNodeCount);
+  }
+
+  template<class C>
+  size_t HashTable<C>::memoryUse() const
+  {
+    size_t result = mHashTable.capacity() * sizeof(Node *);
+    result += mNodePool.getMemoryUse();
+    return result;
+  }
+  
+  template<class C>
+  size_t HashTable<C>::computeNodeCount() const
+  {
+    size_t result = 0;
+    for (size_t i=0; i<mTableSize; i++)
+      {
+	for (Node *p = mHashTable[i]; p != 0; p = p->next) result++;
+      }
+    return result;
+  }
 
+  template<class C>
+  size_t HashTable<C>::computeBinCount() const
+  {
+    size_t result = 0;
+    for (size_t i=0; i<mTableSize; i++)
+      {
+	if (mHashTable[i] != 0) result++;
+      }
+    return result;
+  }
+
+  template<class C>
+  std::string HashTable<C>::name() const
+  {
+    return std::string("HashTable");
+  }
   
 } // namespace mathic
 
@@ -121,6 +311,7 @@ namespace mathic {
 
 // Local Variables:
 // indent-tabs-mode: nil
+// mode: c++
 // compile-command: "make -C $MATHIC/mathic "
 // End:
 
diff --git a/src/test/HashTable.cpp b/src/test/HashTable.cpp
index c0c723f..f3a9202 100755
--- a/src/test/HashTable.cpp
+++ b/src/test/HashTable.cpp
@@ -1,7 +1,22 @@
-//#include "mathic/HashTable.h"
+#include "mathic/HashTable.h"
 #include <gtest/gtest.h>
 
+namespace {
+  class HashTableConf 
+  {
+  public:
+    typedef int Key;
+    typedef int Value;
+
+    size_t hash(Key k) {return k;}
+    bool keysEqual(Key k1, Key k2) {return k1==k2;}
+    void combine(Value &a, const Value &b){a+=b;}
+  };
+}
+
 TEST(HashTable, NoOp) {
+  HashTableConf C;
+  mathic::HashTable<HashTableConf> H(C);
   ASSERT_TRUE(true);
 };
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/mathic.git



More information about the debian-science-commits mailing list