[arrayfire] 194/284: Adding a unified memory manager for all backends

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:32 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit c8cd29b1267580a851da82ac3fcb2c2762119f3a
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Mon Jan 11 12:47:11 2016 -0500

    Adding a unified memory manager for all backends
---
 src/backend/MemoryManager.cpp | 250 +++++++++++++++++++++
 src/backend/MemoryManager.hpp |  99 +++++++++
 src/backend/cpu/memory.cpp    | 247 +++++----------------
 src/backend/cpu/memory.hpp    |   1 +
 src/backend/cuda/memory.cpp   | 506 ++++++++++++------------------------------
 src/backend/cuda/memory.hpp   |   1 +
 src/backend/opencl/memory.cpp | 499 ++++++++++++++---------------------------
 src/backend/opencl/memory.hpp |   5 -
 8 files changed, 710 insertions(+), 898 deletions(-)

diff --git a/src/backend/MemoryManager.cpp b/src/backend/MemoryManager.cpp
new file mode 100644
index 0000000..621ce62
--- /dev/null
+++ b/src/backend/MemoryManager.cpp
@@ -0,0 +1,250 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include "MemoryManager.hpp"
+#include "dispatch.hpp"
+#include "err_common.hpp"
+#include "util.hpp"
+
+namespace common
+{
+
+MemoryManager::MemoryManager(int num_devices, unsigned MAX_BUFFERS, unsigned MAX_BYTES, bool debug):
+    mem_step_size(1024),
+    max_buffers(MAX_BUFFERS),
+    max_bytes(MAX_BYTES),
+    memory(num_devices),
+    debug_mode(debug)
+{
+    std::string env_var = getEnvVar("AF_MEM_DEBUG");
+    if (!env_var.empty()) {
+        this->debug_mode = env_var[0] != '0';
+    }
+    if (this->debug_mode) mem_step_size = 1;
+}
+
+void MemoryManager::garbageCollect()
+{
+    if (this->debug_mode) return;
+
+    memory_info& current = this->getCurrentMemoryInfo();
+
+    for(buffer_iter iter = current.map.begin();
+        iter != current.map.end(); ++iter) {
+
+        if (!(iter->second).manager_lock) {
+
+            if (!(iter->second).user_lock) {
+                if ((iter->second).bytes > 0) {
+                    this->nativeFree(iter->first);
+                }
+                current.total_bytes -= iter->second.bytes;
+            }
+        }
+    }
+
+    buffer_iter memory_curr = current.map.begin();
+    buffer_iter memory_end  = current.map.end();
+
+    while(memory_curr != memory_end) {
+        if (memory_curr->second.manager_lock || memory_curr->second.user_lock) {
+            ++memory_curr;
+        } else {
+            current.map.erase(memory_curr++);
+        }
+    }
+}
+
+void MemoryManager::unlock(void *ptr, bool user_unlock)
+{
+    memory_info& current = this->getCurrentMemoryInfo();
+    lock_guard_t lock(this->memory_mutex);
+
+    buffer_iter iter = current.map.find((void *)ptr);
+
+    if (iter != current.map.end()) {
+
+        iter->second.manager_lock = false;
+        if ((iter->second).user_lock && !user_unlock) return;
+
+        iter->second.user_lock = false;
+        current.lock_bytes -= iter->second.bytes;
+        current.lock_buffers--;
+
+        if (this->debug_mode) {
+            if ((iter->second).bytes > 0) {
+                this->nativeFree(iter->first);
+            }
+        }
+
+    } else {
+        this->nativeFree(ptr); // Free it because we are not sure what the size is
+    }
+}
+
+void *MemoryManager::alloc(const size_t bytes)
+{
+    memory_info& current = this->getCurrentMemoryInfo();
+
+    void *ptr = NULL;
+    size_t alloc_bytes = this->debug_mode ? bytes : (divup(bytes, mem_step_size) * mem_step_size);
+
+    if (bytes > 0) {
+
+        lock_guard_t lock(this->memory_mutex);
+
+        // There is no memory cache in debug mode
+        if (!this->debug_mode) {
+
+            // FIXME: Add better checks for garbage collection
+            // Perhaps look at total memory available as a metric
+            if (current.map.size() > this->max_buffers ||
+                current.lock_bytes >= this->max_bytes) {
+
+                this->garbageCollect();
+            }
+
+            for(buffer_iter iter = current.map.begin();
+                iter != current.map.end(); ++iter) {
+
+                buffer_info info = iter->second;
+
+                if (!info.manager_lock &&
+                    !info.user_lock &&
+                    info.bytes == alloc_bytes) {
+
+                    iter->second.manager_lock = true;
+                    current.lock_bytes += alloc_bytes;
+                    current.lock_buffers++;
+                    return iter->first;
+                }
+            }
+        }
+
+        // Perform garbage collection if memory can not be allocated
+        ptr = this->nativeAlloc(alloc_bytes);
+
+        if (!ptr) {
+            this->garbageCollect();
+            ptr = this->nativeAlloc(alloc_bytes);
+            if (!ptr) AF_ERROR("Can not allocate memory", AF_ERR_NO_MEM);
+        }
+
+        buffer_info info = {true, false, alloc_bytes};
+        current.map[ptr] = info;
+
+        current.lock_bytes += alloc_bytes;
+        current.lock_buffers++;
+        current.total_bytes += alloc_bytes;
+    }
+    return ptr;
+}
+
+void MemoryManager::userLock(const void *ptr)
+{
+    memory_info& current = this->getCurrentMemoryInfo();
+
+    lock_guard_t lock(this->memory_mutex);
+
+    buffer_iter iter = current.map.find(const_cast<void *>(ptr));
+
+    if (iter != current.map.end()) {
+        iter->second.user_lock = true;
+    } else {
+        buffer_info info = { true,
+                          true,
+                          100 }; //This number is not relevant
+
+        current.map[(void *)ptr] = info;
+    }
+}
+
+void MemoryManager::userUnlock(const void *ptr)
+{
+    memory_info& current = this->getCurrentMemoryInfo();
+
+    lock_guard_t lock(this->memory_mutex);
+
+    buffer_iter iter = current.map.find((void *)ptr);
+    if (iter != current.map.end()) {
+        iter->second.user_lock = false;
+        if (this->debug_mode) {
+            if ((iter->second).bytes > 0) {
+                this->nativeFree(iter->first);
+            }
+        }
+    }
+}
+
+size_t MemoryManager::getMemStepSize()
+{
+    lock_guard_t lock(this->memory_mutex);
+    return this->mem_step_size;
+}
+
+void MemoryManager::setMemStepSize(size_t new_step_size)
+{
+    lock_guard_t lock(this->memory_mutex);
+    this->mem_step_size = new_step_size;
+}
+
+void MemoryManager::printInfo(const char *msg, const int device)
+{
+    lock_guard_t lock(this->memory_mutex);
+    memory_info& current = this->getCurrentMemoryInfo();
+
+    std::cout << msg << std::endl;
+
+    static const std::string head("|     POINTER      |    SIZE    |  AF LOCK  | USER LOCK |");
+    static const std::string line(head.size(), '-');
+    std::cout << line << std::endl << head << std::endl << line << std::endl;
+
+    for(buffer_iter iter = current.map.begin();
+        iter != current.map.end(); ++iter) {
+
+        std::string status_mngr("Unknown");
+        std::string status_user("Unknown");
+
+        if(iter->second.manager_lock)  status_mngr = "Yes";
+        else                           status_mngr = " No";
+
+        if(iter->second.user_lock)     status_user = "Yes";
+        else                           status_user = " No";
+
+        std::string unit = "KB";
+        double size = (double)(iter->second.bytes) / 1024;
+        if(size >= 1024) {
+            size = size / 1024;
+            unit = "MB";
+        }
+
+        std::cout << "|  " << std::right << std::setw(14) << iter->first << " "
+                  << " | " << std::setw(7) << std::setprecision(4) << size << " " << unit
+                  << " | " << std::setw(9) << status_mngr
+                  << " | " << std::setw(9) << status_user
+                  << " |"  << std::endl;
+    }
+
+    std::cout << line << std::endl;
+}
+
+void MemoryManager::bufferInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+                               size_t *lock_bytes,  size_t *lock_buffers)
+{
+    memory_info current = this->getCurrentMemoryInfo();
+    lock_guard_t lock(this->memory_mutex);
+    if (alloc_bytes   ) *alloc_bytes   = current.total_bytes;
+    if (alloc_buffers ) *alloc_buffers = current.map.size();
+    if (lock_bytes    ) *lock_bytes    = current.lock_bytes;
+    if (lock_buffers  ) *lock_buffers  = current.lock_buffers;
+}
+}
diff --git a/src/backend/MemoryManager.hpp b/src/backend/MemoryManager.hpp
new file mode 100644
index 0000000..1f87ea2
--- /dev/null
+++ b/src/backend/MemoryManager.hpp
@@ -0,0 +1,99 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+
+#include <vector>
+#include <map>
+#include <mutex>
+
+namespace common
+{
+
+typedef std::mutex mutex_t;
+typedef std::lock_guard<std::mutex> lock_guard_t;
+
+class MemoryManager
+{
+    typedef struct
+    {
+        bool manager_lock;
+        bool user_lock;
+        size_t bytes;
+    } buffer_info;
+
+    typedef std::map<void *, buffer_info> buffer_t;
+    typedef buffer_t::iterator buffer_iter;
+
+    typedef struct
+    {
+        buffer_t map;
+        size_t lock_bytes;
+        size_t lock_buffers;
+        size_t total_bytes;
+    } memory_info;
+
+    size_t mem_step_size;
+    unsigned max_buffers;
+    unsigned max_bytes;
+    std::vector<memory_info> memory;
+    bool debug_mode;
+
+    memory_info& getCurrentMemoryInfo()
+    {
+        return memory[this->getActiveDeviceId()];
+    }
+
+    virtual int getActiveDeviceId()
+    {
+        return 0;
+    }
+
+public:
+    MemoryManager(int num_devices, unsigned MAX_BUFFERS, unsigned MAX_BYTES, bool debug);
+
+    void *alloc(const size_t bytes);
+
+    void unlock(void *ptr, bool user_unlock);
+
+    void garbageCollect();
+
+    void printInfo(const char *msg, const int device);
+
+    void bufferInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+                    size_t *lock_bytes,  size_t *lock_buffers);
+
+    void userLock(const void *ptr);
+
+    void userUnlock(const void *ptr);
+
+    size_t getMemStepSize();
+
+    void setMemStepSize(size_t new_step_size);
+
+    virtual void *nativeAlloc(const size_t bytes)
+    {
+        return malloc(bytes);
+    }
+
+    virtual void nativeFree(void *ptr)
+    {
+        return free((void *)ptr);
+    }
+
+    virtual ~MemoryManager()
+    {
+    }
+
+protected:
+    mutex_t memory_mutex;
+
+};
+
+}
diff --git a/src/backend/cpu/memory.cpp b/src/backend/cpu/memory.cpp
index 5eebf18..2687b30 100644
--- a/src/backend/cpu/memory.cpp
+++ b/src/backend/cpu/memory.cpp
@@ -10,244 +10,111 @@
 #include <memory.hpp>
 #include <err_cpu.hpp>
 #include <types.hpp>
-#include <map>
-#include <dispatch.hpp>
-#include <cstdlib>
-#include <mutex>
-#include <iostream>
-#include <iomanip>
-#include <string>
 #include <platform.hpp>
 #include <queue.hpp>
+#include <memory>
+#include <MemoryManager.hpp>
 
-namespace cpu
-{
+#ifndef AF_MEM_DEBUG
+#define AF_MEM_DEBUG 0
+#endif
 
-static size_t memory_resolution = 1024; //1KB
+#ifndef AF_CPU_MEM_DEBUG
+#define AF_CPU_MEM_DEBUG 0
+#endif
 
-void setMemStepSize(size_t step_bytes)
-{
-    memory_resolution = step_bytes;
-}
-
-size_t getMemStepSize(void)
+namespace cpu
 {
-    return memory_resolution;
-}
 
-class Manager
+class MemoryManager  : public common::MemoryManager
 {
-    public:
-    static bool initialized;
-    Manager()
+    int getActiveDeviceId();
+public:
+    MemoryManager();
+    void *nativeAlloc(const size_t bytes);
+    void nativeFree(void *ptr);
+    ~MemoryManager()
     {
-        initialized = true;
-    }
-
-    ~Manager()
-    {
-        garbageCollect();
+        common::lock_guard_t lock(this->memory_mutex);
+        this->garbageCollect();
     }
 };
 
-bool Manager::initialized = false;
-
-static void managerInit()
+int MemoryManager::getActiveDeviceId()
 {
-    if(Manager::initialized == false)
-        static Manager pm = Manager();
+    return cpu::getActiveDeviceId();
 }
 
-typedef struct
-{
-    bool mngr_lock; // True if locked by memory manager, false if free
-    bool user_lock; // True if locked by user, false if free
-    size_t bytes;
-} mem_info;
+MemoryManager::MemoryManager() :
+    common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_CPU_MEM_DEBUG)
+{}
 
-static size_t used_bytes = 0;
-static size_t used_buffers = 0;
-static size_t total_bytes = 0;
-typedef std::map<void *, mem_info> mem_t;
-typedef mem_t::iterator mem_iter;
 
-mem_t memory_map;
-std::mutex memory_map_mutex;
+void *MemoryManager::nativeAlloc(const size_t bytes)
+{
+    return malloc(bytes);
+}
 
-template<typename T>
-void freeWrapper(T *ptr)
+void MemoryManager::nativeFree(void *ptr)
 {
-    free((void *)ptr);
+    return free((void *)ptr);
 }
 
-void garbageCollect()
+static MemoryManager &getMemoryManager()
 {
-    for(mem_iter iter = memory_map.begin();
-        iter != memory_map.end(); ++iter) {
+    static MemoryManager instance;
+    return instance;
+}
 
-        if (!(iter->second).mngr_lock) {
+void setMemStepSize(size_t step_bytes)
+{
+    getMemoryManager().setMemStepSize(step_bytes);
+}
 
-            if (!(iter->second).user_lock) {
-                freeWrapper(iter->first);
-                total_bytes -= iter->second.bytes;
-            }
-        }
-    }
+size_t getMemStepSize(void)
+{
+    return getMemoryManager().getMemStepSize();
+}
 
-    mem_iter memory_curr = memory_map.begin();
-    mem_iter memory_end  = memory_map.end();
 
-    while(memory_curr != memory_end) {
-        if (memory_curr->second.mngr_lock || memory_curr->second.user_lock) {
-            ++memory_curr;
-        } else {
-            memory_map.erase(memory_curr++);
-        }
-    }
+void garbageCollect()
+{
+    getMemoryManager().garbageCollect();
 }
 
 void printMemInfo(const char *msg, const int device)
 {
-    std::cout << msg << std::endl;
-
-    static const std::string head("|     POINTER      |    SIZE    |  AF LOCK  | USER LOCK |");
-    static const std::string line(head.size(), '-');
-    std::cout << line << std::endl << head << std::endl << line << std::endl;
-
-    for(mem_iter iter = memory_map.begin();
-        iter != memory_map.end(); ++iter) {
-
-        std::string status_mngr("Unknown");
-        std::string status_user("Unknown");
-
-        if(iter->second.mngr_lock)  status_mngr = "Yes";
-        else                        status_mngr = " No";
-
-        if(iter->second.user_lock)  status_user = "Yes";
-        else                        status_user = " No";
-
-        std::string unit = "KB";
-        double size = (double)(iter->second.bytes) / 1024;
-        if(size >= 1024) {
-            size = size / 1024;
-            unit = "MB";
-        }
-
-        std::cout << "|  " << std::right << std::setw(14) << iter->first << " "
-                  << " | " << std::setw(7) << std::setprecision(4) << size << " " << unit
-                  << " | " << std::setw(9) << status_mngr
-                  << " | " << std::setw(9) << status_user
-                  << " |"  << std::endl;
-    }
-
-    std::cout << line << std::endl;
+    getMemoryManager().printInfo(msg, device);
 }
 
 template<typename T>
 T* memAlloc(const size_t &elements)
 {
-    managerInit();
-
-    T* ptr = NULL;
-    size_t alloc_bytes = divup(sizeof(T) * elements, memory_resolution) * memory_resolution;
-
-    if (elements > 0) {
-        std::lock_guard<std::mutex> lock(memory_map_mutex);
-
-        // FIXME: Add better checks for garbage collection
-        // Perhaps look at total memory available as a metric
-        if (memory_map.size() > MAX_BUFFERS ||
-            used_bytes >= MAX_BYTES) {
-
-            garbageCollect();
-        }
-
-        for(mem_iter iter = memory_map.begin();
-            iter != memory_map.end(); ++iter) {
-
-            mem_info info = iter->second;
-
-            if (!info.mngr_lock &&
-                !info.user_lock &&
-                 info.bytes == alloc_bytes) {
-
-                iter->second.mngr_lock = true;
-                used_bytes += alloc_bytes;
-                used_buffers++;
-                return (T *)iter->first;
-            }
-        }
-
-        // Perform garbage collection if memory can not be allocated
-        ptr = (T *)malloc(alloc_bytes);
-
-        if (ptr == NULL) {
-            AF_ERROR("Can not allocate memory", AF_ERR_NO_MEM);
-        }
-
-        mem_info info = {true, false, alloc_bytes};
-        memory_map[ptr] = info;
-
-        used_bytes += alloc_bytes;
-        used_buffers++;
-        total_bytes += alloc_bytes;
-    }
-    return ptr;
+    return (T *)getMemoryManager().alloc(elements * sizeof(T));
 }
 
 template<typename T>
-void memFreeLocked(T *ptr, bool user_unlock)
+void memFree(T *ptr)
 {
-    std::lock_guard<std::mutex> lock(memory_map_mutex);
-
-    mem_iter iter = memory_map.find((void *)ptr);
-
-    if (iter != memory_map.end()) {
-
-        iter->second.mngr_lock = false;
-        if ((iter->second).user_lock && !user_unlock) return;
-
-        iter->second.user_lock = false;
-        used_bytes -= iter->second.bytes;
-        used_buffers--;
-
-    } else {
-        freeWrapper(ptr); // Free it because we are not sure what the size is
-    }
+    return getMemoryManager().unlock((void *)ptr, false);
 }
 
 template<typename T>
-void memFree(T *ptr)
+void memFreeLocked(T *ptr, bool user_unlock)
 {
-    memFreeLocked(ptr, false);
+    return getMemoryManager().unlock((void *)ptr, user_unlock);
 }
 
 template<typename T>
 void memLock(const T *ptr)
 {
-    std::lock_guard<std::mutex> lock(memory_map_mutex);
-
-    mem_iter iter = memory_map.find((void *)ptr);
-
-    if (iter != memory_map.end()) {
-        iter->second.user_lock = true;
-    } else {
-        mem_info info = { true,
-                          true,
-                          100 }; //This number is not relevant
-
-        memory_map[(void *)ptr] = info;
-    }
+    getMemoryManager().userLock((void *)ptr);
 }
 
 template<typename T>
 void memUnlock(const T *ptr)
 {
-    std::lock_guard<std::mutex> lock(memory_map_mutex);
-    mem_iter iter = memory_map.find((void *)ptr);
-    if (iter != memory_map.end()) {
-        iter->second.user_lock = false;
-    }
+    getMemoryManager().userUnlock((void *)ptr);
 }
 
 
@@ -255,22 +122,20 @@ void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
                       size_t *lock_bytes,  size_t *lock_buffers)
 {
     getQueue().sync();
-    if (alloc_bytes   ) *alloc_bytes   = total_bytes;
-    if (alloc_buffers ) *alloc_buffers = memory_map.size();
-    if (lock_bytes    ) *lock_bytes    = used_bytes;
-    if (lock_buffers  ) *lock_buffers  = used_buffers;
+    getMemoryManager().bufferInfo(alloc_bytes, alloc_buffers,
+                                  lock_bytes,  lock_buffers);
 }
 
 template<typename T>
 T* pinnedAlloc(const size_t &elements)
 {
-    return memAlloc<T>(elements);
+    return (T *)getMemoryManager().alloc(elements * sizeof(T));
 }
 
 template<typename T>
 void pinnedFree(T* ptr)
 {
-    memFree<T>(ptr);
+    return getMemoryManager().unlock((void *)ptr, false);
 }
 
 #define INSTANTIATE(T)                                      \
diff --git a/src/backend/cpu/memory.hpp b/src/backend/cpu/memory.hpp
index 6524fe6..279b3db 100644
--- a/src/backend/cpu/memory.hpp
+++ b/src/backend/cpu/memory.hpp
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <af/defines.h>
+
 namespace cpu
 {
     template<typename T> T* memAlloc(const size_t &elements);
diff --git a/src/backend/cuda/memory.cpp b/src/backend/cuda/memory.cpp
index f37a0fe..43c37e0 100644
--- a/src/backend/cuda/memory.cpp
+++ b/src/backend/cuda/memory.cpp
@@ -20,412 +20,178 @@
 #include <map>
 #include <dispatch.hpp>
 #include <platform.hpp>
+#include <MemoryManager.hpp>
 
-namespace cuda
-{
-    static size_t memory_resolution = 1024; //1KB
-
-    void setMemStepSize(size_t step_bytes)
-    {
-        memory_resolution = step_bytes;
-    }
-
-    size_t getMemStepSize(void)
-    {
-        return memory_resolution;
-    }
-
-    template<typename T>
-    static void cudaFreeWrapper(T *ptr)
-    {
-        cudaError_t err = cudaFree(ptr);
-        if (err != cudaErrorCudartUnloading) // see issue #167
-            CUDA_CHECK(err);
-    }
-
-    template<typename T>
-    static void pinnedFreeWrapper(T *ptr)
-    {
-        cudaError_t err = cudaFreeHost(ptr);
-        if (err != cudaErrorCudartUnloading) // see issue #167
-            CUDA_CHECK(err);
-    }
-
-#ifdef AF_CUDA_MEM_DEBUG
-
-    template<typename T>
-    T* memAlloc(const size_t &elements)
-    {
-        T* ptr = NULL;
-        CUDA_CHECK(cudaMalloc(&ptr, elements * sizeof(T)));
-        return ptr;
-    }
-
-    template<typename T>
-    void memFree(T *ptr)
-    {
-        cudaFreeWrapper(ptr); // Free it because we are not sure what the size is
-    }
-
-    template<typename T>
-    void memFreeLocked(T *ptr, bool user_unlock)
-    {
-        cudaFreeWrapper(ptr); // Free it because we are not sure what the size is
-    }
-
-    template<typename T>
-    void memLock(const T *ptr)
-    {
-        return;
-    }
-
-    template<typename T>
-    void memUnlock(const T *ptr)
-    {
-        return;
-    }
-
-    template<typename T>
-    T* pinnedAlloc(const size_t &elements)
-    {
-        T* ptr = NULL;
-        CUDA_CHECK(cudaMallocHost((void **)(&ptr), elements * sizeof(T)));
-        return (T*)ptr;
-    }
 
-    template<typename T>
-    void pinnedFree(T *ptr)
-    {
-        pinnedFreeWrapper(ptr); // Free it because we are not sure what the size is
-    }
+#ifndef AF_MEM_DEBUG
+#define AF_MEM_DEBUG 0
+#endif
 
-    void garbageCollect()
-    {
-    }
+#ifndef AF_CUDA_MEM_DEBUG
+#define AF_CUDA_MEM_DEBUG 0
+#endif
 
-    void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
-                          size_t *lock_bytes,  size_t *lock_buffers)
-    {
-    }
+namespace cuda
+{
 
-    void printMemInfo(const char *msg, const int device)
+class MemoryManager  : public common::MemoryManager
+{
+    int getActiveDeviceId();
+public:
+    MemoryManager();
+    void *nativeAlloc(const size_t bytes);
+    void nativeFree(void *ptr);
+    ~MemoryManager()
     {
-        std::cout << "printMemInfo() disabled in AF_CUDA_MEM_DEBUG Mode" << std::endl;
+        common::lock_guard_t lock(this->memory_mutex);
+        this->garbageCollect();
     }
-#else
+};
 
-    // Manager Class
-    // Dummy used to call garbage collection at the end of the program
-    class Manager
-    {
-        public:
-        static bool initialized;
-        Manager()
-        {
-            initialized = true;
-        }
-
-        ~Manager()
-        {
-            // Destructors should not through exceptions
-            try {
-                for(int i = 0; i < getDeviceCount(); i++) {
-                    setDevice(i);
-                    garbageCollect();
-                }
-                pinnedGarbageCollect();
-
-            } catch (AfError &ex) {
-
-                std::string perr = getEnvVar("AF_PRINT_ERRORS");
-                if(!perr.empty()) {
-                    if(perr != "0")
-                        fprintf(stderr, "%s\n", ex.what());
-                }
-            }
-        }
-    };
-
-    bool Manager::initialized = false;
-
-    static void managerInit()
+class MemoryManagerPinned  : public common::MemoryManager
+{
+    int getActiveDeviceId();
+public:
+    MemoryManagerPinned();
+    void *nativeAlloc(const size_t bytes);
+    void nativeFree(void *ptr);
+    ~MemoryManagerPinned()
     {
-        if(Manager::initialized == false)
-            static Manager pm = Manager();
+        common::lock_guard_t lock(this->memory_mutex);
+        this->garbageCollect();
     }
+};
 
-    typedef struct
-    {
-        bool mngr_lock;
-        bool user_lock;
-        size_t bytes;
-    } mem_info;
+int MemoryManager::getActiveDeviceId()
+{
+    return cuda::getActiveDeviceId();
+}
 
-    static size_t used_bytes[DeviceManager::MAX_DEVICES] = {0};
-    static size_t used_buffers[DeviceManager::MAX_DEVICES] = {0};
-    static size_t total_bytes[DeviceManager::MAX_DEVICES] = {0};
-    typedef std::map<void *, mem_info> mem_t;
-    typedef mem_t::iterator mem_iter;
+MemoryManager::MemoryManager() :
+    common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_CUDA_MEM_DEBUG)
+{}
 
-    mem_t memory_maps[DeviceManager::MAX_DEVICES];
+void *MemoryManager::nativeAlloc(const size_t bytes)
+{
+    void *ptr = NULL;
+    CUDA_CHECK(cudaMalloc(&ptr, bytes));
+    return ptr;
+}
 
-    void garbageCollect()
-    {
-        int n = getActiveDeviceId();
-
-        for(mem_iter iter = memory_maps[n].begin();
-            iter != memory_maps[n].end(); ++iter) {
-
-            if (!(iter->second.mngr_lock)) {
-
-                if (!(iter->second.user_lock)) {
-                    cudaFreeWrapper(iter->first);
-                    total_bytes[n] -= iter->second.bytes;
-                }
-            }
-        }
-
-        mem_iter memory_curr = memory_maps[n].begin();
-        mem_iter memory_end  = memory_maps[n].end();
-
-        while(memory_curr != memory_end) {
-            if (memory_curr->second.mngr_lock || memory_curr->second.user_lock) {
-                ++memory_curr;
-            } else {
-                memory_maps[n].erase(memory_curr++);
-            }
-        }
+void MemoryManager::nativeFree(void *ptr)
+{
+    cudaError_t err = cudaFree(ptr);
+    if (err != cudaErrorCudartUnloading) {
+        CUDA_CHECK(err);
     }
+}
 
-    void printMemInfo(const char *msg, const int device)
-    {
-        std::cout << msg << std::endl;
-        std::cout << "Memory Map for Device: " << device << std::endl;
-
-        static const std::string head("|     POINTER      |    SIZE    |  AF LOCK  | USER LOCK |");
-        static const std::string line(head.size(), '-');
-        std::cout << line << std::endl << head << std::endl << line << std::endl;
-
-        for(mem_iter iter = memory_maps[device].begin();
-            iter != memory_maps[device].end(); ++iter) {
-
-            std::string status_mngr("Unknown");
-            std::string status_user("Unknown");
-
-            if(iter->second.mngr_lock)  status_mngr = "Yes";
-            else                        status_mngr = " No";
-
-            if(iter->second.user_lock)  status_user = "Yes";
-            else                        status_user = " No";
+static MemoryManager &getMemoryManager()
+{
+    static MemoryManager instance;
+    return instance;
+}
 
-            std::string unit = "KB";
-            double size = (double)(iter->second.bytes) / 1024;
-            if(size >= 1024) {
-                size = size / 1024;
-                unit = "MB";
-            }
+int MemoryManagerPinned::getActiveDeviceId()
+{
+    return cuda::getActiveDeviceId();
+}
 
-            std::cout << "|  " << std::right << std::setw(14) << iter->first << " "
-                      << " | " << std::setw(7) << std::setprecision(4) << size << " " << unit
-                      << " | " << std::setw(9) << status_mngr
-                      << " | " << std::setw(9) << status_user
-                      << " |"  << std::endl;
-        }
+MemoryManagerPinned::MemoryManagerPinned() :
+    common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_CUDA_MEM_DEBUG)
+{}
 
-        std::cout << line << std::endl;
-    }
+void *MemoryManagerPinned::nativeAlloc(const size_t bytes)
+{
+    void *ptr;
+    CUDA_CHECK(cudaMallocHost(&ptr, bytes));
+    return ptr;
+}
 
-    template<typename T>
-    T* memAlloc(const size_t &elements)
-    {
-        managerInit();
-        int n = getActiveDeviceId();
-        T* ptr = NULL;
-        size_t alloc_bytes = divup(sizeof(T) * elements, memory_resolution) * memory_resolution;
-
-        if (elements > 0) {
-
-            // FIXME: Add better checks for garbage collection
-            // Perhaps look at total memory available as a metric
-            if (memory_maps[n].size() >= MAX_BUFFERS || used_bytes[n] >= MAX_BYTES) {
-                garbageCollect();
-            }
-
-            for(mem_iter iter = memory_maps[n].begin();
-                iter != memory_maps[n].end(); ++iter) {
-
-                mem_info info = iter->second;
-
-                if (!info.mngr_lock &&
-                    !info.user_lock &&
-                     info.bytes == alloc_bytes) {
-
-                    iter->second.mngr_lock = true;
-                    used_bytes[n] += alloc_bytes;
-                    used_buffers[n]++;
-                    return (T *)iter->first;
-                }
-            }
-
-            // Perform garbage collection if memory can not be allocated
-            if (cudaMalloc((void **)&ptr, alloc_bytes) != cudaSuccess) {
-                garbageCollect();
-                CUDA_CHECK(cudaMalloc((void **)(&ptr), alloc_bytes));
-            }
-
-            mem_info info = {true, false, alloc_bytes};
-            memory_maps[n][ptr] = info;
-            used_bytes[n] += alloc_bytes;
-            used_buffers[n]++;
-            total_bytes[n] += alloc_bytes;
-        }
-        return ptr;
+void MemoryManagerPinned::nativeFree(void *ptr)
+{
+    cudaError_t err = cudaFreeHost(ptr);
+    if (err != cudaErrorCudartUnloading) {
+        CUDA_CHECK(err);
     }
+}
 
-    template<typename T>
-    void memFreeLocked(T *ptr, bool user_unlock)
-    {
-        int n = getActiveDeviceId();
-        mem_iter iter = memory_maps[n].find((void *)ptr);
-
-        if (iter != memory_maps[n].end()) {
-
-            iter->second.mngr_lock = false;
-            if ((iter->second.user_lock) && !user_unlock) return;
-
-            iter->second.user_lock = false;
+static MemoryManagerPinned &getMemoryManagerPinned()
+{
+    static MemoryManagerPinned instance;
+    return instance;
+}
 
-            used_bytes[n] -= iter->second.bytes;
-            used_buffers[n]--;
+void setMemStepSize(size_t step_bytes)
+{
+    getMemoryManager().setMemStepSize(step_bytes);
+}
 
-        } else {
-            cudaFreeWrapper(ptr); // Free it because we are not sure what the size is
-        }
-    }
+size_t getMemStepSize(void)
+{
+    return getMemoryManager().getMemStepSize();
+}
 
-    template<typename T>
-    void memFree(T *ptr)
-    {
-        memFreeLocked(ptr, false);
-    }
 
-    template<typename T>
-    void memLock(const T *ptr)
-    {
-        int n = getActiveDeviceId();
-        mem_iter iter = memory_maps[n].find((void *)ptr);
+void garbageCollect()
+{
+    getMemoryManager().garbageCollect();
+}
 
-        if (iter != memory_maps[n].end()) {
-            iter->second.user_lock = true;
-        } else {
+void printMemInfo(const char *msg, const int device)
+{
+    getMemoryManager().printInfo(msg, device);
+}
 
-            mem_info info = { true,
-                              true,
-                              100 }; //This number is not relevant
+template<typename T>
+T* memAlloc(const size_t &elements)
+{
+    return (T *)getMemoryManager().alloc(elements * sizeof(T));
+}
 
-            memory_maps[n][(void *)ptr] = info;
-        }
-    }
+template<typename T>
+void memFree(T *ptr)
+{
+    return getMemoryManager().unlock((void *)ptr, false);
+}
 
-    template<typename T>
-    void memUnlock(const T *ptr)
-    {
-        int n = getActiveDeviceId();
-        mem_iter iter = memory_maps[n].find((void *)ptr);
-        if (iter != memory_maps[n].end()) {
-            iter->second.user_lock = false;
-        }
-    }
+template<typename T>
+void memFreeLocked(T *ptr, bool user_unlock)
+{
+    return getMemoryManager().unlock((void *)ptr, user_unlock);
+}
 
-    void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
-                          size_t *lock_bytes,  size_t *lock_buffers)
-    {
-        int n = getActiveDeviceId();
-        if (alloc_bytes   ) *alloc_bytes   = total_bytes[n];
-        if (alloc_buffers ) *alloc_buffers = memory_maps[n].size();
-        if (lock_bytes    ) *lock_bytes    = used_bytes[n];
-        if (lock_buffers  ) *lock_buffers  = used_buffers[n];
-    }
+template<typename T>
+void memLock(const T *ptr)
+{
+    getMemoryManager().userLock((void *)ptr);
+}
 
-    //////////////////////////////////////////////////////////////////////////////
-    mem_t pinned_maps;
-    static size_t pinned_used_bytes = 0;
+template<typename T>
+void memUnlock(const T *ptr)
+{
+    getMemoryManager().userUnlock((void *)ptr);
+}
 
-    void pinnedGarbageCollect()
-    {
-        for(mem_iter iter = pinned_maps.begin(); iter != pinned_maps.end(); ++iter) {
-            if (!(iter->second.mngr_lock)) {
-                pinnedFreeWrapper(iter->first);
-            }
-        }
-
-        mem_iter memory_curr = pinned_maps.begin();
-        mem_iter memory_end  = pinned_maps.end();
-
-        while(memory_curr != memory_end) {
-            if (memory_curr->second.mngr_lock) {
-                ++memory_curr;
-            } else {
-                pinned_maps.erase(memory_curr++);
-            }
-        }
-    }
 
-    template<typename T>
-    T* pinnedAlloc(const size_t &elements)
-    {
-        managerInit();
-        T* ptr = NULL;
-        // Allocate the higher megabyte. Overhead of creating pinned memory is
-        // more so we want more resuable memory.
-        size_t alloc_bytes = divup(sizeof(T) * elements, 1048576) * 1048576;
-
-        if (elements > 0) {
-
-            // FIXME: Add better checks for garbage collection
-            // Perhaps look at total memory available as a metric
-            if (pinned_maps.size() >= MAX_BUFFERS || pinned_used_bytes >= MAX_BYTES) {
-                pinnedGarbageCollect();
-            }
-
-            for(mem_iter iter = pinned_maps.begin();
-                iter != pinned_maps.end(); ++iter) {
-
-                mem_info info = iter->second;
-                if (!info.mngr_lock && info.bytes == alloc_bytes) {
-                    iter->second.mngr_lock = true;
-                    pinned_used_bytes += alloc_bytes;
-                    return (T *)iter->first;
-                }
-            }
-
-            // Perform garbage collection if memory can not be allocated
-            if (cudaMallocHost((void **)&ptr, alloc_bytes) != cudaSuccess) {
-                pinnedGarbageCollect();
-                CUDA_CHECK(cudaMallocHost((void **)(&ptr), alloc_bytes));
-            }
-
-            mem_info info = {true, false, alloc_bytes};
-            pinned_maps[ptr] = info;
-            pinned_used_bytes += alloc_bytes;
-        }
-        return (T*)ptr;
-    }
+void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+                      size_t *lock_bytes,  size_t *lock_buffers)
+{
+    getMemoryManager().bufferInfo(alloc_bytes, alloc_buffers,
+                                  lock_bytes,  lock_buffers);
+}
 
-    template<typename T>
-    void pinnedFree(T *ptr)
-    {
-        mem_iter iter = pinned_maps.find((void *)ptr);
-
-        if (iter != pinned_maps.end()) {
-            iter->second.mngr_lock = false;
-            pinned_used_bytes -= iter->second.bytes;
-        } else {
-            pinnedFreeWrapper(ptr); // Free it because we are not sure what the size is
-        }
-    }
+template<typename T>
+T* pinnedAlloc(const size_t &elements)
+{
+    return (T *)getMemoryManagerPinned().alloc(elements * sizeof(T));
+}
 
-#endif
+template<typename T>
+void pinnedFree(T* ptr)
+{
+    return getMemoryManagerPinned().unlock((void *)ptr, false);
+}
 
 #define INSTANTIATE(T)                                      \
     template T* memAlloc(const size_t &elements);           \
diff --git a/src/backend/cuda/memory.hpp b/src/backend/cuda/memory.hpp
index 29e4e76..5b362cd 100644
--- a/src/backend/cuda/memory.hpp
+++ b/src/backend/cuda/memory.hpp
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <af/defines.h>
+
 namespace cuda
 {
     template<typename T> T* memAlloc(const size_t &elements);
diff --git a/src/backend/opencl/memory.cpp b/src/backend/opencl/memory.cpp
index b75955e..45b8e96 100644
--- a/src/backend/opencl/memory.cpp
+++ b/src/backend/opencl/memory.cpp
@@ -14,386 +14,221 @@
 #include <iomanip>
 #include <string>
 #include <types.hpp>
+#include "err_opencl.hpp"
 
-namespace opencl
-{
-    static size_t memory_resolution = 1024; //1KB
-
-    void setMemStepSize(size_t step_bytes)
-    {
-        memory_resolution = step_bytes;
-    }
+#include <MemoryManager.hpp>
 
-    size_t getMemStepSize(void)
-    {
-        return memory_resolution;
-    }
-
-    // Manager Class
-    // Dummy used to call garbage collection at the end of the program
-    class Manager
-    {
-        public:
-        static bool initialized;
-        Manager()
-        {
-            initialized = true;
-        }
-
-        ~Manager()
-        {
-            for(int i = 0; i < (int)getDeviceCount(); i++) {
-                setDevice(i);
-                garbageCollect();
-                pinnedGarbageCollect();
-            }
-        }
-    };
-
-    bool Manager::initialized = false;
-
-    static void managerInit()
-    {
-        if(Manager::initialized == false)
-            static Manager pm = Manager();
-    }
-
-    typedef struct
-    {
-        bool mngr_lock;
-        bool user_lock;
-        size_t bytes;
-    } mem_info;
+#ifndef AF_MEM_DEBUG
+#define AF_MEM_DEBUG 0
+#endif
 
-    static size_t used_bytes[DeviceManager::MAX_DEVICES] = {0};
-    static size_t used_buffers[DeviceManager::MAX_DEVICES] = {0};
-    static size_t total_bytes[DeviceManager::MAX_DEVICES] = {0};
+#ifndef AF_OPENCL_MEM_DEBUG
+#define AF_OPENCL_MEM_DEBUG 0
+#endif
 
-    typedef std::map<cl::Buffer *, mem_info> mem_t;
-    typedef mem_t::iterator mem_iter;
-    mem_t memory_maps[DeviceManager::MAX_DEVICES];
+namespace opencl
+{
 
-    static void destroy(cl::Buffer *ptr)
+class MemoryManager  : public common::MemoryManager
+{
+    int getActiveDeviceId();
+public:
+    MemoryManager();
+    void *nativeAlloc(const size_t bytes);
+    void nativeFree(void *ptr);
+    ~MemoryManager()
     {
-        delete ptr;
+        common::lock_guard_t lock(this->memory_mutex);
+        this->garbageCollect();
     }
+};
 
-    void garbageCollect()
-    {
-        int n = getActiveDeviceId();
-        for(mem_iter iter = memory_maps[n].begin();
-            iter != memory_maps[n].end(); ++iter) {
-
-            if (!(iter->second).mngr_lock) {
+class MemoryManagerPinned  : public common::MemoryManager
+{
+    std::vector<
+        std::map<void *, cl::Buffer>
+        > pinned_maps;
+    int getActiveDeviceId();
 
-                if (!(iter->second).user_lock) {
-                    destroy(iter->first);
-                    total_bytes[n] -= iter->second.bytes;
-                }
-            }
-        }
+public:
 
-        mem_iter memory_curr = memory_maps[n].begin();
-        mem_iter memory_end  = memory_maps[n].end();
+    MemoryManagerPinned();
 
-        while(memory_curr != memory_end) {
-            if (memory_curr->second.mngr_lock || memory_curr->second.user_lock) {
-                ++memory_curr;
-            } else {
-                memory_maps[n].erase(memory_curr++);
-            }
-        }
-    }
+    void *nativeAlloc(const size_t bytes);
+    void nativeFree(void *ptr);
 
-    void printMemInfo(const char *msg, const int device)
+    ~MemoryManagerPinned()
     {
-        std::cout << msg << std::endl;
-        std::cout << "Memory Map for Device: " << device << std::endl;
-
-        static const std::string head("|     POINTER      |    SIZE    |  AF LOCK  | USER LOCK |");
-        static const std::string line(head.size(), '-');
-        std::cout << line << std::endl << head << std::endl << line << std::endl;
-
-        for(mem_iter iter = memory_maps[device].begin();
-            iter != memory_maps[device].end(); ++iter) {
-
-            std::string status_mngr("Unknown");
-            std::string status_user("Unknown");
-
-            if(iter->second.mngr_lock)  status_mngr = "Yes";
-            else                        status_mngr = " No";
-
-            if(iter->second.user_lock)  status_user = "Yes";
-            else                        status_user = " No";
-
-            std::string unit = "KB";
-            double size = (double)(iter->second.bytes) / 1024;
-            if(size >= 1024) {
-                size = size / 1024;
-                unit = "MB";
+        common::lock_guard_t lock(this->memory_mutex);
+        this->garbageCollect();
+        for (int n = 0; n < (int)pinned_maps.size(); n++) {
+            auto pinned_curr_iter = pinned_maps[n].begin();
+            auto pinned_end_iter  = pinned_maps[n].end();
+            while (pinned_curr_iter != pinned_end_iter) {
+                pinned_maps[n].erase(pinned_curr_iter++);
             }
-
-            std::cout << "|  " << std::right << std::setw(14) << iter->first << " "
-                      << " | " << std::setw(7) << std::setprecision(4) << size << " " << unit
-                      << " | " << std::setw(9) << status_mngr
-                      << " | " << std::setw(9) << status_user
-                      << " |"  << std::endl;
         }
-
-        std::cout << line << std::endl;
     }
+};
 
-    cl::Buffer *bufferAlloc(const size_t &bytes)
-    {
-        int n = getActiveDeviceId();
-        cl::Buffer *ptr = NULL;
-        size_t alloc_bytes = divup(bytes, memory_resolution) * memory_resolution;
-
-        if (bytes > 0) {
-
-            // FIXME: Add better checks for garbage collection
-            // Perhaps look at total memory available as a metric
-            if (memory_maps[n].size() >= MAX_BUFFERS || used_bytes[n] >= MAX_BYTES) {
-                garbageCollect();
-            }
-
-            for(mem_iter iter = memory_maps[n].begin();
-                iter != memory_maps[n].end(); ++iter) {
-
-                mem_info info = iter->second;
-
-                if (!info.mngr_lock &&
-                    !info.user_lock &&
-                     info.bytes == alloc_bytes) {
-
-                    iter->second.mngr_lock = true;
-                    used_bytes[n] += alloc_bytes;
-                    used_buffers[n]++;
-                    return iter->first;
-                }
-            }
+int MemoryManager::getActiveDeviceId()
+{
+    return opencl::getActiveDeviceId();
+}
 
-            try {
-                ptr = new cl::Buffer(getContext(), CL_MEM_READ_WRITE, alloc_bytes);
-            } catch(...) {
-                garbageCollect();
-                ptr = new cl::Buffer(getContext(), CL_MEM_READ_WRITE, alloc_bytes);
-            }
+MemoryManager::MemoryManager() :
+    common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_OPENCL_MEM_DEBUG)
+{}
 
-            mem_info info = {true, false, alloc_bytes};
-            memory_maps[n][ptr] = info;
-            used_bytes[n] += alloc_bytes;
-            used_buffers[n]++;
-            total_bytes[n] += alloc_bytes;
-        }
-        return ptr;
-    }
-
-    void bufferFree(cl::Buffer *ptr)
-    {
-        bufferFreeLocked(ptr, false);
+void *MemoryManager::nativeAlloc(const size_t bytes)
+{
+    try {
+        return (void *)(new cl::Buffer(getContext(), CL_MEM_READ_WRITE, bytes));
+    } catch(cl::Error err) {
+        CL_TO_AF_ERROR(err);
     }
+}
 
-    void bufferFreeLocked(cl::Buffer *ptr, bool user_unlock)
-    {
-        int n = getActiveDeviceId();
-        mem_iter iter = memory_maps[n].find(ptr);
-
-        if (iter != memory_maps[n].end()) {
-
-            iter->second.mngr_lock = false;
-            if ((iter->second).user_lock && !user_unlock) return;
-
-            iter->second.user_lock = false;
-
-            used_bytes[n] -= iter->second.bytes;
-            used_buffers[n]--;
-        } else {
-            destroy(ptr); // Free it because we are not sure what the size is
-        }
+void MemoryManager::nativeFree(void *ptr)
+{
+    try {
+        delete (cl::Buffer *)ptr;
+    } catch(cl::Error err) {
+        CL_TO_AF_ERROR(err);
     }
+}
 
-    void bufferPop(cl::Buffer *ptr)
-    {
-        int n = getActiveDeviceId();
-        mem_iter iter = memory_maps[n].find(ptr);
+static MemoryManager &getMemoryManager()
+{
+    static MemoryManager instance;
+    return instance;
+}
 
-        if (iter != memory_maps[n].end()) {
-            iter->second.user_lock = true;
-        } else {
+int MemoryManagerPinned::getActiveDeviceId()
+{
+    return opencl::getActiveDeviceId();
+}
 
-            mem_info info = { true,
-                              true,
-                              100 }; //This number is not relevant
+MemoryManagerPinned::MemoryManagerPinned() :
+    common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_OPENCL_MEM_DEBUG),
+    pinned_maps(getDeviceCount())
+{}
 
-            memory_maps[n][ptr] = info;
-        }
-    }
+void *MemoryManagerPinned::nativeAlloc(const size_t bytes)
+{
+    void *ptr = NULL;
+    try {
+        cl::Buffer buf= cl::Buffer(getContext(), CL_MEM_ALLOC_HOST_PTR, bytes);
+        ptr = getQueue().enqueueMapBuffer(buf, true, CL_MAP_READ | CL_MAP_WRITE, 0, bytes);
+        pinned_maps[opencl::getActiveDeviceId()][ptr] = buf;
+    } catch(cl::Error err) {
+        CL_TO_AF_ERROR(err);
+    }
+    return ptr;
+}
 
-    void bufferPush(cl::Buffer *ptr)
-    {
-        int n = getActiveDeviceId();
-        mem_iter iter = memory_maps[n].find(ptr);
+void MemoryManagerPinned::nativeFree(void *ptr)
+{
+    try {
+        int n = opencl::getActiveDeviceId();
+        auto iter = pinned_maps[n].find(ptr);
 
-        if (iter != memory_maps[n].end()) {
-            iter->second.user_lock = false;
+        if (iter != pinned_maps[n].end()) {
+            getQueue().enqueueUnmapMemObject(pinned_maps[n][ptr], ptr);
+            pinned_maps[n].erase(iter);
         }
-    }
-
-    void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
-                          size_t *lock_bytes,  size_t *lock_buffers)
-    {
-        int n = getActiveDeviceId();
-        if (alloc_bytes   ) *alloc_bytes   = total_bytes[n];
-        if (alloc_buffers ) *alloc_buffers = memory_maps[n].size();
-        if (lock_bytes    ) *lock_bytes    = used_bytes[n];
-        if (lock_buffers  ) *lock_buffers  = used_buffers[n];
-    }
 
-    template<typename T>
-    T *memAlloc(const size_t &elements)
-    {
-        managerInit();
-        return (T *)bufferAlloc(elements * sizeof(T));
-    }
-
-    template<typename T>
-    void memFree(T *ptr)
-    {
-        return bufferFreeLocked((cl::Buffer *)ptr, false);
-    }
-
-    template<typename T>
-    void memFreeLocked(T *ptr, bool user_unlock)
-    {
-        return bufferFreeLocked((cl::Buffer *)ptr, user_unlock);
-    }
-
-    template<typename T>
-    void memLock(const T *ptr)
-    {
-        return bufferPop((cl::Buffer *)ptr);
-    }
-
-    template<typename T>
-    void memUnlock(const T *ptr)
-    {
-        return bufferPush((cl::Buffer *)ptr);
+    } catch(cl::Error err) {
+        CL_TO_AF_ERROR(err);
     }
+}
 
-    // pinned memory manager
-    typedef struct {
-        cl::Buffer *buf;
-        mem_info info;
-    } pinned_info;
+static MemoryManagerPinned &getMemoryManagerPinned()
+{
+    static MemoryManagerPinned instance;
+    return instance;
+}
 
-    typedef std::map<void*, pinned_info> pinned_t;
-    typedef pinned_t::iterator pinned_iter;
-    pinned_t pinned_maps[DeviceManager::MAX_DEVICES];
-    static size_t pinned_used_bytes = 0;
+void setMemStepSize(size_t step_bytes)
+{
+    getMemoryManager().setMemStepSize(step_bytes);
+}
 
-    static void pinnedDestroy(cl::Buffer *buf, void *ptr)
-    {
-        getQueue().enqueueUnmapMemObject(*buf, (void *)ptr);
-        destroy(buf);
-    }
+size_t getMemStepSize(void)
+{
+    return getMemoryManager().getMemStepSize();
+}
 
-    void pinnedGarbageCollect()
-    {
-        int n = getActiveDeviceId();
-        for(auto &iter : pinned_maps[n]) {
-            if (!(iter.second).info.mngr_lock) {
-                pinnedDestroy(iter.second.buf, iter.first);
-            }
-        }
 
-        pinned_iter memory_curr = pinned_maps[n].begin();
-        pinned_iter memory_end  = pinned_maps[n].end();
+void garbageCollect()
+{
+    getMemoryManager().garbageCollect();
+}
 
-        while(memory_curr != memory_end) {
-            if (memory_curr->second.info.mngr_lock) {
-                ++memory_curr;
-            } else {
-                memory_curr = pinned_maps[n].erase(memory_curr);
-            }
-        }
+void printMemInfo(const char *msg, const int device)
+{
+    getMemoryManager().printInfo(msg, device);
+}
 
-    }
+template<typename T>
+T* memAlloc(const size_t &elements)
+{
+    return (T *)getMemoryManager().alloc(elements * sizeof(T));
+}
 
-    void *pinnedBufferAlloc(const size_t &bytes)
-    {
-        void *ptr = NULL;
-        int n = getActiveDeviceId();
-        // Allocate the higher megabyte. Overhead of creating pinned memory is
-        // more so we want more resuable memory.
-        size_t alloc_bytes = divup(bytes, 1048576) * 1048576;
-
-        if (bytes > 0) {
-            cl::Buffer *buf = NULL;
-
-            // FIXME: Add better checks for garbage collection
-            // Perhaps look at total memory available as a metric
-            if (pinned_maps[n].size() >= MAX_BUFFERS || pinned_used_bytes >= MAX_BYTES) {
-                pinnedGarbageCollect();
-            }
+cl::Buffer *bufferAlloc(const size_t &bytes)
+{
+    return (cl::Buffer *)getMemoryManager().alloc(bytes);
+}
 
-            for(pinned_iter iter = pinned_maps[n].begin();
-                iter != pinned_maps[n].end(); ++iter) {
+template<typename T>
+void memFree(T *ptr)
+{
+    return getMemoryManager().unlock((void *)ptr, false);
+}
 
-                mem_info info = iter->second.info;
-                if (!info.mngr_lock && info.bytes == alloc_bytes) {
-                    iter->second.info.mngr_lock = true;
-                    pinned_used_bytes += alloc_bytes;
-                    return iter->first;
-                }
-            }
+void bufferFree(cl::Buffer *buf)
+{
+    return getMemoryManager().unlock((void *)buf, false);
+}
 
-            try {
-                buf = new cl::Buffer(getContext(), CL_MEM_ALLOC_HOST_PTR, alloc_bytes);
+template<typename T>
+void memFreeLocked(T *ptr, bool user_unlock)
+{
+    return getMemoryManager().unlock((void *)ptr, user_unlock);
+}
 
-                ptr = getQueue().enqueueMapBuffer(*buf, true, CL_MAP_READ|CL_MAP_WRITE,
-                                                  0, alloc_bytes);
-            } catch(...) {
-                pinnedGarbageCollect();
-                buf = new cl::Buffer(getContext(), CL_MEM_ALLOC_HOST_PTR, alloc_bytes);
+template<typename T>
+void memLock(const T *ptr)
+{
+    getMemoryManager().userLock((void *)ptr);
+}
 
-                ptr = getQueue().enqueueMapBuffer(*buf, true, CL_MAP_READ|CL_MAP_WRITE,
-                                                  0, alloc_bytes);
-            }
-            mem_info info = {true, false, alloc_bytes};
-            pinned_info pt = {buf, info};
-            pinned_maps[n][ptr] = pt;
-            pinned_used_bytes += alloc_bytes;
-        }
-        return ptr;
-    }
+template<typename T>
+void memUnlock(const T *ptr)
+{
+    getMemoryManager().userUnlock((void *)ptr);
+}
 
-    void pinnedBufferFree(void *ptr)
-    {
-        int n = getActiveDeviceId();
-        pinned_iter iter = pinned_maps[n].find(ptr);
 
-        if (iter != pinned_maps[n].end()) {
-            iter->second.info.mngr_lock = false;
-            pinned_used_bytes -= iter->second.info.bytes;
-        } else {
-            pinnedDestroy(iter->second.buf, ptr); // Free it because we are not sure what the size is
-            pinned_maps[n].erase(iter);
-        }
-    }
+void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+                      size_t *lock_bytes,  size_t *lock_buffers)
+{
+    getMemoryManager().bufferInfo(alloc_bytes, alloc_buffers,
+                                  lock_bytes,  lock_buffers);
+}
 
-    template<typename T>
-    T* pinnedAlloc(const size_t &elements)
-    {
-        managerInit();
-        return (T *)pinnedBufferAlloc(elements * sizeof(T));
-    }
+template<typename T>
+T* pinnedAlloc(const size_t &elements)
+{
+    return (T *)getMemoryManagerPinned().alloc(elements * sizeof(T));
+}
 
-    template<typename T>
-    void pinnedFree(T* ptr)
-    {
-        return pinnedBufferFree((void *) ptr);
-    }
+template<typename T>
+void pinnedFree(T* ptr)
+{
+    return getMemoryManagerPinned().unlock((void *)ptr, false);
+}
 
 #define INSTANTIATE(T)                                      \
     template T* memAlloc(const size_t &elements);           \
diff --git a/src/backend/opencl/memory.hpp b/src/backend/opencl/memory.hpp
index dce1428..da27e0d 100644
--- a/src/backend/opencl/memory.hpp
+++ b/src/backend/opencl/memory.hpp
@@ -15,12 +15,7 @@ namespace opencl
 {
 
     cl::Buffer *bufferAlloc(const size_t &bytes);
-
-    // Need these as 2 separate function and not a default argument
-    // This is because it is used as the deleter in shared pointer
-    // which cannot support default arguments
     void bufferFree(cl::Buffer *buf);
-    void bufferFreeLocked(cl::Buffer *buf, bool user_unlock);
 
     template<typename T> T *memAlloc(const size_t &elements);
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list