[arrayfire] 194/284: Adding a unified memory manager for all backends
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:32 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit c8cd29b1267580a851da82ac3fcb2c2762119f3a
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Mon Jan 11 12:47:11 2016 -0500
Adding a unified memory manager for all backends
---
src/backend/MemoryManager.cpp | 250 +++++++++++++++++++++
src/backend/MemoryManager.hpp | 99 +++++++++
src/backend/cpu/memory.cpp | 247 +++++----------------
src/backend/cpu/memory.hpp | 1 +
src/backend/cuda/memory.cpp | 506 ++++++++++++------------------------------
src/backend/cuda/memory.hpp | 1 +
src/backend/opencl/memory.cpp | 499 ++++++++++++++---------------------------
src/backend/opencl/memory.hpp | 5 -
8 files changed, 710 insertions(+), 898 deletions(-)
diff --git a/src/backend/MemoryManager.cpp b/src/backend/MemoryManager.cpp
new file mode 100644
index 0000000..621ce62
--- /dev/null
+++ b/src/backend/MemoryManager.cpp
@@ -0,0 +1,250 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <iostream>
+#include <iomanip>
+#include <string>
+#include "MemoryManager.hpp"
+#include "dispatch.hpp"
+#include "err_common.hpp"
+#include "util.hpp"
+
+namespace common
+{
+
+MemoryManager::MemoryManager(int num_devices, unsigned MAX_BUFFERS, unsigned MAX_BYTES, bool debug):
+ mem_step_size(1024),
+ max_buffers(MAX_BUFFERS),
+ max_bytes(MAX_BYTES),
+ memory(num_devices),
+ debug_mode(debug)
+{
+ std::string env_var = getEnvVar("AF_MEM_DEBUG");
+ if (!env_var.empty()) {
+ this->debug_mode = env_var[0] != '0';
+ }
+ if (this->debug_mode) mem_step_size = 1;
+}
+
+void MemoryManager::garbageCollect()
+{
+ if (this->debug_mode) return;
+
+ memory_info& current = this->getCurrentMemoryInfo();
+
+ for(buffer_iter iter = current.map.begin();
+ iter != current.map.end(); ++iter) {
+
+ if (!(iter->second).manager_lock) {
+
+ if (!(iter->second).user_lock) {
+ if ((iter->second).bytes > 0) {
+ this->nativeFree(iter->first);
+ }
+ current.total_bytes -= iter->second.bytes;
+ }
+ }
+ }
+
+ buffer_iter memory_curr = current.map.begin();
+ buffer_iter memory_end = current.map.end();
+
+ while(memory_curr != memory_end) {
+ if (memory_curr->second.manager_lock || memory_curr->second.user_lock) {
+ ++memory_curr;
+ } else {
+ current.map.erase(memory_curr++);
+ }
+ }
+}
+
+void MemoryManager::unlock(void *ptr, bool user_unlock)
+{
+ memory_info& current = this->getCurrentMemoryInfo();
+ lock_guard_t lock(this->memory_mutex);
+
+ buffer_iter iter = current.map.find((void *)ptr);
+
+ if (iter != current.map.end()) {
+
+ iter->second.manager_lock = false;
+ if ((iter->second).user_lock && !user_unlock) return;
+
+ iter->second.user_lock = false;
+ current.lock_bytes -= iter->second.bytes;
+ current.lock_buffers--;
+
+ if (this->debug_mode) {
+ if ((iter->second).bytes > 0) {
+ this->nativeFree(iter->first);
+ }
+ }
+
+ } else {
+ this->nativeFree(ptr); // Free it because we are not sure what the size is
+ }
+}
+
+void *MemoryManager::alloc(const size_t bytes)
+{
+ memory_info& current = this->getCurrentMemoryInfo();
+
+ void *ptr = NULL;
+ size_t alloc_bytes = this->debug_mode ? bytes : (divup(bytes, mem_step_size) * mem_step_size);
+
+ if (bytes > 0) {
+
+ lock_guard_t lock(this->memory_mutex);
+
+ // There is no memory cache in debug mode
+ if (!this->debug_mode) {
+
+ // FIXME: Add better checks for garbage collection
+ // Perhaps look at total memory available as a metric
+ if (current.map.size() > this->max_buffers ||
+ current.lock_bytes >= this->max_bytes) {
+
+ this->garbageCollect();
+ }
+
+ for(buffer_iter iter = current.map.begin();
+ iter != current.map.end(); ++iter) {
+
+ buffer_info info = iter->second;
+
+ if (!info.manager_lock &&
+ !info.user_lock &&
+ info.bytes == alloc_bytes) {
+
+ iter->second.manager_lock = true;
+ current.lock_bytes += alloc_bytes;
+ current.lock_buffers++;
+ return iter->first;
+ }
+ }
+ }
+
+ // Perform garbage collection if memory can not be allocated
+ ptr = this->nativeAlloc(alloc_bytes);
+
+ if (!ptr) {
+ this->garbageCollect();
+ ptr = this->nativeAlloc(alloc_bytes);
+ if (!ptr) AF_ERROR("Can not allocate memory", AF_ERR_NO_MEM);
+ }
+
+ buffer_info info = {true, false, alloc_bytes};
+ current.map[ptr] = info;
+
+ current.lock_bytes += alloc_bytes;
+ current.lock_buffers++;
+ current.total_bytes += alloc_bytes;
+ }
+ return ptr;
+}
+
+void MemoryManager::userLock(const void *ptr)
+{
+ memory_info& current = this->getCurrentMemoryInfo();
+
+ lock_guard_t lock(this->memory_mutex);
+
+ buffer_iter iter = current.map.find(const_cast<void *>(ptr));
+
+ if (iter != current.map.end()) {
+ iter->second.user_lock = true;
+ } else {
+ buffer_info info = { true,
+ true,
+ 100 }; //This number is not relevant
+
+ current.map[(void *)ptr] = info;
+ }
+}
+
+void MemoryManager::userUnlock(const void *ptr)
+{
+ memory_info& current = this->getCurrentMemoryInfo();
+
+ lock_guard_t lock(this->memory_mutex);
+
+ buffer_iter iter = current.map.find((void *)ptr);
+ if (iter != current.map.end()) {
+ iter->second.user_lock = false;
+ if (this->debug_mode) {
+ if ((iter->second).bytes > 0) {
+ this->nativeFree(iter->first);
+ }
+ }
+ }
+}
+
+size_t MemoryManager::getMemStepSize()
+{
+ lock_guard_t lock(this->memory_mutex);
+ return this->mem_step_size;
+}
+
+void MemoryManager::setMemStepSize(size_t new_step_size)
+{
+ lock_guard_t lock(this->memory_mutex);
+ this->mem_step_size = new_step_size;
+}
+
+void MemoryManager::printInfo(const char *msg, const int device)
+{
+ lock_guard_t lock(this->memory_mutex);
+ memory_info& current = this->getCurrentMemoryInfo();
+
+ std::cout << msg << std::endl;
+
+ static const std::string head("| POINTER | SIZE | AF LOCK | USER LOCK |");
+ static const std::string line(head.size(), '-');
+ std::cout << line << std::endl << head << std::endl << line << std::endl;
+
+ for(buffer_iter iter = current.map.begin();
+ iter != current.map.end(); ++iter) {
+
+ std::string status_mngr("Unknown");
+ std::string status_user("Unknown");
+
+ if(iter->second.manager_lock) status_mngr = "Yes";
+ else status_mngr = " No";
+
+ if(iter->second.user_lock) status_user = "Yes";
+ else status_user = " No";
+
+ std::string unit = "KB";
+ double size = (double)(iter->second.bytes) / 1024;
+ if(size >= 1024) {
+ size = size / 1024;
+ unit = "MB";
+ }
+
+ std::cout << "| " << std::right << std::setw(14) << iter->first << " "
+ << " | " << std::setw(7) << std::setprecision(4) << size << " " << unit
+ << " | " << std::setw(9) << status_mngr
+ << " | " << std::setw(9) << status_user
+ << " |" << std::endl;
+ }
+
+ std::cout << line << std::endl;
+}
+
+void MemoryManager::bufferInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+ size_t *lock_bytes, size_t *lock_buffers)
+{
+ memory_info current = this->getCurrentMemoryInfo();
+ lock_guard_t lock(this->memory_mutex);
+ if (alloc_bytes ) *alloc_bytes = current.total_bytes;
+ if (alloc_buffers ) *alloc_buffers = current.map.size();
+ if (lock_bytes ) *lock_bytes = current.lock_bytes;
+ if (lock_buffers ) *lock_buffers = current.lock_buffers;
+}
+}
diff --git a/src/backend/MemoryManager.hpp b/src/backend/MemoryManager.hpp
new file mode 100644
index 0000000..1f87ea2
--- /dev/null
+++ b/src/backend/MemoryManager.hpp
@@ -0,0 +1,99 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+
+#include <vector>
+#include <map>
+#include <mutex>
+
+namespace common
+{
+
+typedef std::mutex mutex_t;
+typedef std::lock_guard<std::mutex> lock_guard_t;
+
+class MemoryManager
+{
+ typedef struct
+ {
+ bool manager_lock;
+ bool user_lock;
+ size_t bytes;
+ } buffer_info;
+
+ typedef std::map<void *, buffer_info> buffer_t;
+ typedef buffer_t::iterator buffer_iter;
+
+ typedef struct
+ {
+ buffer_t map;
+ size_t lock_bytes;
+ size_t lock_buffers;
+ size_t total_bytes;
+ } memory_info;
+
+ size_t mem_step_size;
+ unsigned max_buffers;
+ unsigned max_bytes;
+ std::vector<memory_info> memory;
+ bool debug_mode;
+
+ memory_info& getCurrentMemoryInfo()
+ {
+ return memory[this->getActiveDeviceId()];
+ }
+
+ virtual int getActiveDeviceId()
+ {
+ return 0;
+ }
+
+public:
+ MemoryManager(int num_devices, unsigned MAX_BUFFERS, unsigned MAX_BYTES, bool debug);
+
+ void *alloc(const size_t bytes);
+
+ void unlock(void *ptr, bool user_unlock);
+
+ void garbageCollect();
+
+ void printInfo(const char *msg, const int device);
+
+ void bufferInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+ size_t *lock_bytes, size_t *lock_buffers);
+
+ void userLock(const void *ptr);
+
+ void userUnlock(const void *ptr);
+
+ size_t getMemStepSize();
+
+ void setMemStepSize(size_t new_step_size);
+
+ virtual void *nativeAlloc(const size_t bytes)
+ {
+ return malloc(bytes);
+ }
+
+ virtual void nativeFree(void *ptr)
+ {
+ return free((void *)ptr);
+ }
+
+ virtual ~MemoryManager()
+ {
+ }
+
+protected:
+ mutex_t memory_mutex;
+
+};
+
+}
diff --git a/src/backend/cpu/memory.cpp b/src/backend/cpu/memory.cpp
index 5eebf18..2687b30 100644
--- a/src/backend/cpu/memory.cpp
+++ b/src/backend/cpu/memory.cpp
@@ -10,244 +10,111 @@
#include <memory.hpp>
#include <err_cpu.hpp>
#include <types.hpp>
-#include <map>
-#include <dispatch.hpp>
-#include <cstdlib>
-#include <mutex>
-#include <iostream>
-#include <iomanip>
-#include <string>
#include <platform.hpp>
#include <queue.hpp>
+#include <memory>
+#include <MemoryManager.hpp>
-namespace cpu
-{
+#ifndef AF_MEM_DEBUG
+#define AF_MEM_DEBUG 0
+#endif
-static size_t memory_resolution = 1024; //1KB
+#ifndef AF_CPU_MEM_DEBUG
+#define AF_CPU_MEM_DEBUG 0
+#endif
-void setMemStepSize(size_t step_bytes)
-{
- memory_resolution = step_bytes;
-}
-
-size_t getMemStepSize(void)
+namespace cpu
{
- return memory_resolution;
-}
-class Manager
+class MemoryManager : public common::MemoryManager
{
- public:
- static bool initialized;
- Manager()
+ int getActiveDeviceId();
+public:
+ MemoryManager();
+ void *nativeAlloc(const size_t bytes);
+ void nativeFree(void *ptr);
+ ~MemoryManager()
{
- initialized = true;
- }
-
- ~Manager()
- {
- garbageCollect();
+ common::lock_guard_t lock(this->memory_mutex);
+ this->garbageCollect();
}
};
-bool Manager::initialized = false;
-
-static void managerInit()
+int MemoryManager::getActiveDeviceId()
{
- if(Manager::initialized == false)
- static Manager pm = Manager();
+ return cpu::getActiveDeviceId();
}
-typedef struct
-{
- bool mngr_lock; // True if locked by memory manager, false if free
- bool user_lock; // True if locked by user, false if free
- size_t bytes;
-} mem_info;
+MemoryManager::MemoryManager() :
+ common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_CPU_MEM_DEBUG)
+{}
-static size_t used_bytes = 0;
-static size_t used_buffers = 0;
-static size_t total_bytes = 0;
-typedef std::map<void *, mem_info> mem_t;
-typedef mem_t::iterator mem_iter;
-mem_t memory_map;
-std::mutex memory_map_mutex;
+void *MemoryManager::nativeAlloc(const size_t bytes)
+{
+ return malloc(bytes);
+}
-template<typename T>
-void freeWrapper(T *ptr)
+void MemoryManager::nativeFree(void *ptr)
{
- free((void *)ptr);
+ return free((void *)ptr);
}
-void garbageCollect()
+static MemoryManager &getMemoryManager()
{
- for(mem_iter iter = memory_map.begin();
- iter != memory_map.end(); ++iter) {
+ static MemoryManager instance;
+ return instance;
+}
- if (!(iter->second).mngr_lock) {
+void setMemStepSize(size_t step_bytes)
+{
+ getMemoryManager().setMemStepSize(step_bytes);
+}
- if (!(iter->second).user_lock) {
- freeWrapper(iter->first);
- total_bytes -= iter->second.bytes;
- }
- }
- }
+size_t getMemStepSize(void)
+{
+ return getMemoryManager().getMemStepSize();
+}
- mem_iter memory_curr = memory_map.begin();
- mem_iter memory_end = memory_map.end();
- while(memory_curr != memory_end) {
- if (memory_curr->second.mngr_lock || memory_curr->second.user_lock) {
- ++memory_curr;
- } else {
- memory_map.erase(memory_curr++);
- }
- }
+void garbageCollect()
+{
+ getMemoryManager().garbageCollect();
}
void printMemInfo(const char *msg, const int device)
{
- std::cout << msg << std::endl;
-
- static const std::string head("| POINTER | SIZE | AF LOCK | USER LOCK |");
- static const std::string line(head.size(), '-');
- std::cout << line << std::endl << head << std::endl << line << std::endl;
-
- for(mem_iter iter = memory_map.begin();
- iter != memory_map.end(); ++iter) {
-
- std::string status_mngr("Unknown");
- std::string status_user("Unknown");
-
- if(iter->second.mngr_lock) status_mngr = "Yes";
- else status_mngr = " No";
-
- if(iter->second.user_lock) status_user = "Yes";
- else status_user = " No";
-
- std::string unit = "KB";
- double size = (double)(iter->second.bytes) / 1024;
- if(size >= 1024) {
- size = size / 1024;
- unit = "MB";
- }
-
- std::cout << "| " << std::right << std::setw(14) << iter->first << " "
- << " | " << std::setw(7) << std::setprecision(4) << size << " " << unit
- << " | " << std::setw(9) << status_mngr
- << " | " << std::setw(9) << status_user
- << " |" << std::endl;
- }
-
- std::cout << line << std::endl;
+ getMemoryManager().printInfo(msg, device);
}
template<typename T>
T* memAlloc(const size_t &elements)
{
- managerInit();
-
- T* ptr = NULL;
- size_t alloc_bytes = divup(sizeof(T) * elements, memory_resolution) * memory_resolution;
-
- if (elements > 0) {
- std::lock_guard<std::mutex> lock(memory_map_mutex);
-
- // FIXME: Add better checks for garbage collection
- // Perhaps look at total memory available as a metric
- if (memory_map.size() > MAX_BUFFERS ||
- used_bytes >= MAX_BYTES) {
-
- garbageCollect();
- }
-
- for(mem_iter iter = memory_map.begin();
- iter != memory_map.end(); ++iter) {
-
- mem_info info = iter->second;
-
- if (!info.mngr_lock &&
- !info.user_lock &&
- info.bytes == alloc_bytes) {
-
- iter->second.mngr_lock = true;
- used_bytes += alloc_bytes;
- used_buffers++;
- return (T *)iter->first;
- }
- }
-
- // Perform garbage collection if memory can not be allocated
- ptr = (T *)malloc(alloc_bytes);
-
- if (ptr == NULL) {
- AF_ERROR("Can not allocate memory", AF_ERR_NO_MEM);
- }
-
- mem_info info = {true, false, alloc_bytes};
- memory_map[ptr] = info;
-
- used_bytes += alloc_bytes;
- used_buffers++;
- total_bytes += alloc_bytes;
- }
- return ptr;
+ return (T *)getMemoryManager().alloc(elements * sizeof(T));
}
template<typename T>
-void memFreeLocked(T *ptr, bool user_unlock)
+void memFree(T *ptr)
{
- std::lock_guard<std::mutex> lock(memory_map_mutex);
-
- mem_iter iter = memory_map.find((void *)ptr);
-
- if (iter != memory_map.end()) {
-
- iter->second.mngr_lock = false;
- if ((iter->second).user_lock && !user_unlock) return;
-
- iter->second.user_lock = false;
- used_bytes -= iter->second.bytes;
- used_buffers--;
-
- } else {
- freeWrapper(ptr); // Free it because we are not sure what the size is
- }
+ return getMemoryManager().unlock((void *)ptr, false);
}
template<typename T>
-void memFree(T *ptr)
+void memFreeLocked(T *ptr, bool user_unlock)
{
- memFreeLocked(ptr, false);
+ return getMemoryManager().unlock((void *)ptr, user_unlock);
}
template<typename T>
void memLock(const T *ptr)
{
- std::lock_guard<std::mutex> lock(memory_map_mutex);
-
- mem_iter iter = memory_map.find((void *)ptr);
-
- if (iter != memory_map.end()) {
- iter->second.user_lock = true;
- } else {
- mem_info info = { true,
- true,
- 100 }; //This number is not relevant
-
- memory_map[(void *)ptr] = info;
- }
+ getMemoryManager().userLock((void *)ptr);
}
template<typename T>
void memUnlock(const T *ptr)
{
- std::lock_guard<std::mutex> lock(memory_map_mutex);
- mem_iter iter = memory_map.find((void *)ptr);
- if (iter != memory_map.end()) {
- iter->second.user_lock = false;
- }
+ getMemoryManager().userUnlock((void *)ptr);
}
@@ -255,22 +122,20 @@ void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
size_t *lock_bytes, size_t *lock_buffers)
{
getQueue().sync();
- if (alloc_bytes ) *alloc_bytes = total_bytes;
- if (alloc_buffers ) *alloc_buffers = memory_map.size();
- if (lock_bytes ) *lock_bytes = used_bytes;
- if (lock_buffers ) *lock_buffers = used_buffers;
+ getMemoryManager().bufferInfo(alloc_bytes, alloc_buffers,
+ lock_bytes, lock_buffers);
}
template<typename T>
T* pinnedAlloc(const size_t &elements)
{
- return memAlloc<T>(elements);
+ return (T *)getMemoryManager().alloc(elements * sizeof(T));
}
template<typename T>
void pinnedFree(T* ptr)
{
- memFree<T>(ptr);
+ return getMemoryManager().unlock((void *)ptr, false);
}
#define INSTANTIATE(T) \
diff --git a/src/backend/cpu/memory.hpp b/src/backend/cpu/memory.hpp
index 6524fe6..279b3db 100644
--- a/src/backend/cpu/memory.hpp
+++ b/src/backend/cpu/memory.hpp
@@ -9,6 +9,7 @@
#pragma once
#include <af/defines.h>
+
namespace cpu
{
template<typename T> T* memAlloc(const size_t &elements);
diff --git a/src/backend/cuda/memory.cpp b/src/backend/cuda/memory.cpp
index f37a0fe..43c37e0 100644
--- a/src/backend/cuda/memory.cpp
+++ b/src/backend/cuda/memory.cpp
@@ -20,412 +20,178 @@
#include <map>
#include <dispatch.hpp>
#include <platform.hpp>
+#include <MemoryManager.hpp>
-namespace cuda
-{
- static size_t memory_resolution = 1024; //1KB
-
- void setMemStepSize(size_t step_bytes)
- {
- memory_resolution = step_bytes;
- }
-
- size_t getMemStepSize(void)
- {
- return memory_resolution;
- }
-
- template<typename T>
- static void cudaFreeWrapper(T *ptr)
- {
- cudaError_t err = cudaFree(ptr);
- if (err != cudaErrorCudartUnloading) // see issue #167
- CUDA_CHECK(err);
- }
-
- template<typename T>
- static void pinnedFreeWrapper(T *ptr)
- {
- cudaError_t err = cudaFreeHost(ptr);
- if (err != cudaErrorCudartUnloading) // see issue #167
- CUDA_CHECK(err);
- }
-
-#ifdef AF_CUDA_MEM_DEBUG
-
- template<typename T>
- T* memAlloc(const size_t &elements)
- {
- T* ptr = NULL;
- CUDA_CHECK(cudaMalloc(&ptr, elements * sizeof(T)));
- return ptr;
- }
-
- template<typename T>
- void memFree(T *ptr)
- {
- cudaFreeWrapper(ptr); // Free it because we are not sure what the size is
- }
-
- template<typename T>
- void memFreeLocked(T *ptr, bool user_unlock)
- {
- cudaFreeWrapper(ptr); // Free it because we are not sure what the size is
- }
-
- template<typename T>
- void memLock(const T *ptr)
- {
- return;
- }
-
- template<typename T>
- void memUnlock(const T *ptr)
- {
- return;
- }
-
- template<typename T>
- T* pinnedAlloc(const size_t &elements)
- {
- T* ptr = NULL;
- CUDA_CHECK(cudaMallocHost((void **)(&ptr), elements * sizeof(T)));
- return (T*)ptr;
- }
- template<typename T>
- void pinnedFree(T *ptr)
- {
- pinnedFreeWrapper(ptr); // Free it because we are not sure what the size is
- }
+#ifndef AF_MEM_DEBUG
+#define AF_MEM_DEBUG 0
+#endif
- void garbageCollect()
- {
- }
+#ifndef AF_CUDA_MEM_DEBUG
+#define AF_CUDA_MEM_DEBUG 0
+#endif
- void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
- size_t *lock_bytes, size_t *lock_buffers)
- {
- }
+namespace cuda
+{
- void printMemInfo(const char *msg, const int device)
+class MemoryManager : public common::MemoryManager
+{
+ int getActiveDeviceId();
+public:
+ MemoryManager();
+ void *nativeAlloc(const size_t bytes);
+ void nativeFree(void *ptr);
+ ~MemoryManager()
{
- std::cout << "printMemInfo() disabled in AF_CUDA_MEM_DEBUG Mode" << std::endl;
+ common::lock_guard_t lock(this->memory_mutex);
+ this->garbageCollect();
}
-#else
+};
- // Manager Class
- // Dummy used to call garbage collection at the end of the program
- class Manager
- {
- public:
- static bool initialized;
- Manager()
- {
- initialized = true;
- }
-
- ~Manager()
- {
- // Destructors should not through exceptions
- try {
- for(int i = 0; i < getDeviceCount(); i++) {
- setDevice(i);
- garbageCollect();
- }
- pinnedGarbageCollect();
-
- } catch (AfError &ex) {
-
- std::string perr = getEnvVar("AF_PRINT_ERRORS");
- if(!perr.empty()) {
- if(perr != "0")
- fprintf(stderr, "%s\n", ex.what());
- }
- }
- }
- };
-
- bool Manager::initialized = false;
-
- static void managerInit()
+class MemoryManagerPinned : public common::MemoryManager
+{
+ int getActiveDeviceId();
+public:
+ MemoryManagerPinned();
+ void *nativeAlloc(const size_t bytes);
+ void nativeFree(void *ptr);
+ ~MemoryManagerPinned()
{
- if(Manager::initialized == false)
- static Manager pm = Manager();
+ common::lock_guard_t lock(this->memory_mutex);
+ this->garbageCollect();
}
+};
- typedef struct
- {
- bool mngr_lock;
- bool user_lock;
- size_t bytes;
- } mem_info;
+int MemoryManager::getActiveDeviceId()
+{
+ return cuda::getActiveDeviceId();
+}
- static size_t used_bytes[DeviceManager::MAX_DEVICES] = {0};
- static size_t used_buffers[DeviceManager::MAX_DEVICES] = {0};
- static size_t total_bytes[DeviceManager::MAX_DEVICES] = {0};
- typedef std::map<void *, mem_info> mem_t;
- typedef mem_t::iterator mem_iter;
+MemoryManager::MemoryManager() :
+ common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_CUDA_MEM_DEBUG)
+{}
- mem_t memory_maps[DeviceManager::MAX_DEVICES];
+void *MemoryManager::nativeAlloc(const size_t bytes)
+{
+ void *ptr = NULL;
+ CUDA_CHECK(cudaMalloc(&ptr, bytes));
+ return ptr;
+}
- void garbageCollect()
- {
- int n = getActiveDeviceId();
-
- for(mem_iter iter = memory_maps[n].begin();
- iter != memory_maps[n].end(); ++iter) {
-
- if (!(iter->second.mngr_lock)) {
-
- if (!(iter->second.user_lock)) {
- cudaFreeWrapper(iter->first);
- total_bytes[n] -= iter->second.bytes;
- }
- }
- }
-
- mem_iter memory_curr = memory_maps[n].begin();
- mem_iter memory_end = memory_maps[n].end();
-
- while(memory_curr != memory_end) {
- if (memory_curr->second.mngr_lock || memory_curr->second.user_lock) {
- ++memory_curr;
- } else {
- memory_maps[n].erase(memory_curr++);
- }
- }
+void MemoryManager::nativeFree(void *ptr)
+{
+ cudaError_t err = cudaFree(ptr);
+ if (err != cudaErrorCudartUnloading) {
+ CUDA_CHECK(err);
}
+}
- void printMemInfo(const char *msg, const int device)
- {
- std::cout << msg << std::endl;
- std::cout << "Memory Map for Device: " << device << std::endl;
-
- static const std::string head("| POINTER | SIZE | AF LOCK | USER LOCK |");
- static const std::string line(head.size(), '-');
- std::cout << line << std::endl << head << std::endl << line << std::endl;
-
- for(mem_iter iter = memory_maps[device].begin();
- iter != memory_maps[device].end(); ++iter) {
-
- std::string status_mngr("Unknown");
- std::string status_user("Unknown");
-
- if(iter->second.mngr_lock) status_mngr = "Yes";
- else status_mngr = " No";
-
- if(iter->second.user_lock) status_user = "Yes";
- else status_user = " No";
+static MemoryManager &getMemoryManager()
+{
+ static MemoryManager instance;
+ return instance;
+}
- std::string unit = "KB";
- double size = (double)(iter->second.bytes) / 1024;
- if(size >= 1024) {
- size = size / 1024;
- unit = "MB";
- }
+int MemoryManagerPinned::getActiveDeviceId()
+{
+ return cuda::getActiveDeviceId();
+}
- std::cout << "| " << std::right << std::setw(14) << iter->first << " "
- << " | " << std::setw(7) << std::setprecision(4) << size << " " << unit
- << " | " << std::setw(9) << status_mngr
- << " | " << std::setw(9) << status_user
- << " |" << std::endl;
- }
+MemoryManagerPinned::MemoryManagerPinned() :
+ common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_CUDA_MEM_DEBUG)
+{}
- std::cout << line << std::endl;
- }
+void *MemoryManagerPinned::nativeAlloc(const size_t bytes)
+{
+ void *ptr;
+ CUDA_CHECK(cudaMallocHost(&ptr, bytes));
+ return ptr;
+}
- template<typename T>
- T* memAlloc(const size_t &elements)
- {
- managerInit();
- int n = getActiveDeviceId();
- T* ptr = NULL;
- size_t alloc_bytes = divup(sizeof(T) * elements, memory_resolution) * memory_resolution;
-
- if (elements > 0) {
-
- // FIXME: Add better checks for garbage collection
- // Perhaps look at total memory available as a metric
- if (memory_maps[n].size() >= MAX_BUFFERS || used_bytes[n] >= MAX_BYTES) {
- garbageCollect();
- }
-
- for(mem_iter iter = memory_maps[n].begin();
- iter != memory_maps[n].end(); ++iter) {
-
- mem_info info = iter->second;
-
- if (!info.mngr_lock &&
- !info.user_lock &&
- info.bytes == alloc_bytes) {
-
- iter->second.mngr_lock = true;
- used_bytes[n] += alloc_bytes;
- used_buffers[n]++;
- return (T *)iter->first;
- }
- }
-
- // Perform garbage collection if memory can not be allocated
- if (cudaMalloc((void **)&ptr, alloc_bytes) != cudaSuccess) {
- garbageCollect();
- CUDA_CHECK(cudaMalloc((void **)(&ptr), alloc_bytes));
- }
-
- mem_info info = {true, false, alloc_bytes};
- memory_maps[n][ptr] = info;
- used_bytes[n] += alloc_bytes;
- used_buffers[n]++;
- total_bytes[n] += alloc_bytes;
- }
- return ptr;
+void MemoryManagerPinned::nativeFree(void *ptr)
+{
+ cudaError_t err = cudaFreeHost(ptr);
+ if (err != cudaErrorCudartUnloading) {
+ CUDA_CHECK(err);
}
+}
- template<typename T>
- void memFreeLocked(T *ptr, bool user_unlock)
- {
- int n = getActiveDeviceId();
- mem_iter iter = memory_maps[n].find((void *)ptr);
-
- if (iter != memory_maps[n].end()) {
-
- iter->second.mngr_lock = false;
- if ((iter->second.user_lock) && !user_unlock) return;
-
- iter->second.user_lock = false;
+static MemoryManagerPinned &getMemoryManagerPinned()
+{
+ static MemoryManagerPinned instance;
+ return instance;
+}
- used_bytes[n] -= iter->second.bytes;
- used_buffers[n]--;
+void setMemStepSize(size_t step_bytes)
+{
+ getMemoryManager().setMemStepSize(step_bytes);
+}
- } else {
- cudaFreeWrapper(ptr); // Free it because we are not sure what the size is
- }
- }
+size_t getMemStepSize(void)
+{
+ return getMemoryManager().getMemStepSize();
+}
- template<typename T>
- void memFree(T *ptr)
- {
- memFreeLocked(ptr, false);
- }
- template<typename T>
- void memLock(const T *ptr)
- {
- int n = getActiveDeviceId();
- mem_iter iter = memory_maps[n].find((void *)ptr);
+void garbageCollect()
+{
+ getMemoryManager().garbageCollect();
+}
- if (iter != memory_maps[n].end()) {
- iter->second.user_lock = true;
- } else {
+void printMemInfo(const char *msg, const int device)
+{
+ getMemoryManager().printInfo(msg, device);
+}
- mem_info info = { true,
- true,
- 100 }; //This number is not relevant
+template<typename T>
+T* memAlloc(const size_t &elements)
+{
+ return (T *)getMemoryManager().alloc(elements * sizeof(T));
+}
- memory_maps[n][(void *)ptr] = info;
- }
- }
+template<typename T>
+void memFree(T *ptr)
+{
+ return getMemoryManager().unlock((void *)ptr, false);
+}
- template<typename T>
- void memUnlock(const T *ptr)
- {
- int n = getActiveDeviceId();
- mem_iter iter = memory_maps[n].find((void *)ptr);
- if (iter != memory_maps[n].end()) {
- iter->second.user_lock = false;
- }
- }
+template<typename T>
+void memFreeLocked(T *ptr, bool user_unlock)
+{
+ return getMemoryManager().unlock((void *)ptr, user_unlock);
+}
- void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
- size_t *lock_bytes, size_t *lock_buffers)
- {
- int n = getActiveDeviceId();
- if (alloc_bytes ) *alloc_bytes = total_bytes[n];
- if (alloc_buffers ) *alloc_buffers = memory_maps[n].size();
- if (lock_bytes ) *lock_bytes = used_bytes[n];
- if (lock_buffers ) *lock_buffers = used_buffers[n];
- }
+template<typename T>
+void memLock(const T *ptr)
+{
+ getMemoryManager().userLock((void *)ptr);
+}
- //////////////////////////////////////////////////////////////////////////////
- mem_t pinned_maps;
- static size_t pinned_used_bytes = 0;
+template<typename T>
+void memUnlock(const T *ptr)
+{
+ getMemoryManager().userUnlock((void *)ptr);
+}
- void pinnedGarbageCollect()
- {
- for(mem_iter iter = pinned_maps.begin(); iter != pinned_maps.end(); ++iter) {
- if (!(iter->second.mngr_lock)) {
- pinnedFreeWrapper(iter->first);
- }
- }
-
- mem_iter memory_curr = pinned_maps.begin();
- mem_iter memory_end = pinned_maps.end();
-
- while(memory_curr != memory_end) {
- if (memory_curr->second.mngr_lock) {
- ++memory_curr;
- } else {
- pinned_maps.erase(memory_curr++);
- }
- }
- }
- template<typename T>
- T* pinnedAlloc(const size_t &elements)
- {
- managerInit();
- T* ptr = NULL;
- // Allocate the higher megabyte. Overhead of creating pinned memory is
- // more so we want more resuable memory.
- size_t alloc_bytes = divup(sizeof(T) * elements, 1048576) * 1048576;
-
- if (elements > 0) {
-
- // FIXME: Add better checks for garbage collection
- // Perhaps look at total memory available as a metric
- if (pinned_maps.size() >= MAX_BUFFERS || pinned_used_bytes >= MAX_BYTES) {
- pinnedGarbageCollect();
- }
-
- for(mem_iter iter = pinned_maps.begin();
- iter != pinned_maps.end(); ++iter) {
-
- mem_info info = iter->second;
- if (!info.mngr_lock && info.bytes == alloc_bytes) {
- iter->second.mngr_lock = true;
- pinned_used_bytes += alloc_bytes;
- return (T *)iter->first;
- }
- }
-
- // Perform garbage collection if memory can not be allocated
- if (cudaMallocHost((void **)&ptr, alloc_bytes) != cudaSuccess) {
- pinnedGarbageCollect();
- CUDA_CHECK(cudaMallocHost((void **)(&ptr), alloc_bytes));
- }
-
- mem_info info = {true, false, alloc_bytes};
- pinned_maps[ptr] = info;
- pinned_used_bytes += alloc_bytes;
- }
- return (T*)ptr;
- }
+void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+ size_t *lock_bytes, size_t *lock_buffers)
+{
+ getMemoryManager().bufferInfo(alloc_bytes, alloc_buffers,
+ lock_bytes, lock_buffers);
+}
- template<typename T>
- void pinnedFree(T *ptr)
- {
- mem_iter iter = pinned_maps.find((void *)ptr);
-
- if (iter != pinned_maps.end()) {
- iter->second.mngr_lock = false;
- pinned_used_bytes -= iter->second.bytes;
- } else {
- pinnedFreeWrapper(ptr); // Free it because we are not sure what the size is
- }
- }
+template<typename T>
+T* pinnedAlloc(const size_t &elements)
+{
+ return (T *)getMemoryManagerPinned().alloc(elements * sizeof(T));
+}
-#endif
+template<typename T>
+void pinnedFree(T* ptr)
+{
+ return getMemoryManagerPinned().unlock((void *)ptr, false);
+}
#define INSTANTIATE(T) \
template T* memAlloc(const size_t &elements); \
diff --git a/src/backend/cuda/memory.hpp b/src/backend/cuda/memory.hpp
index 29e4e76..5b362cd 100644
--- a/src/backend/cuda/memory.hpp
+++ b/src/backend/cuda/memory.hpp
@@ -9,6 +9,7 @@
#pragma once
#include <af/defines.h>
+
namespace cuda
{
template<typename T> T* memAlloc(const size_t &elements);
diff --git a/src/backend/opencl/memory.cpp b/src/backend/opencl/memory.cpp
index b75955e..45b8e96 100644
--- a/src/backend/opencl/memory.cpp
+++ b/src/backend/opencl/memory.cpp
@@ -14,386 +14,221 @@
#include <iomanip>
#include <string>
#include <types.hpp>
+#include "err_opencl.hpp"
-namespace opencl
-{
- static size_t memory_resolution = 1024; //1KB
-
- void setMemStepSize(size_t step_bytes)
- {
- memory_resolution = step_bytes;
- }
+#include <MemoryManager.hpp>
- size_t getMemStepSize(void)
- {
- return memory_resolution;
- }
-
- // Manager Class
- // Dummy used to call garbage collection at the end of the program
- class Manager
- {
- public:
- static bool initialized;
- Manager()
- {
- initialized = true;
- }
-
- ~Manager()
- {
- for(int i = 0; i < (int)getDeviceCount(); i++) {
- setDevice(i);
- garbageCollect();
- pinnedGarbageCollect();
- }
- }
- };
-
- bool Manager::initialized = false;
-
- static void managerInit()
- {
- if(Manager::initialized == false)
- static Manager pm = Manager();
- }
-
- typedef struct
- {
- bool mngr_lock;
- bool user_lock;
- size_t bytes;
- } mem_info;
+#ifndef AF_MEM_DEBUG
+#define AF_MEM_DEBUG 0
+#endif
- static size_t used_bytes[DeviceManager::MAX_DEVICES] = {0};
- static size_t used_buffers[DeviceManager::MAX_DEVICES] = {0};
- static size_t total_bytes[DeviceManager::MAX_DEVICES] = {0};
+#ifndef AF_OPENCL_MEM_DEBUG
+#define AF_OPENCL_MEM_DEBUG 0
+#endif
- typedef std::map<cl::Buffer *, mem_info> mem_t;
- typedef mem_t::iterator mem_iter;
- mem_t memory_maps[DeviceManager::MAX_DEVICES];
+namespace opencl
+{
- static void destroy(cl::Buffer *ptr)
+class MemoryManager : public common::MemoryManager
+{
+ int getActiveDeviceId();
+public:
+ MemoryManager();
+ void *nativeAlloc(const size_t bytes);
+ void nativeFree(void *ptr);
+ ~MemoryManager()
{
- delete ptr;
+ common::lock_guard_t lock(this->memory_mutex);
+ this->garbageCollect();
}
+};
- void garbageCollect()
- {
- int n = getActiveDeviceId();
- for(mem_iter iter = memory_maps[n].begin();
- iter != memory_maps[n].end(); ++iter) {
-
- if (!(iter->second).mngr_lock) {
+class MemoryManagerPinned : public common::MemoryManager
+{
+ std::vector<
+ std::map<void *, cl::Buffer>
+ > pinned_maps;
+ int getActiveDeviceId();
- if (!(iter->second).user_lock) {
- destroy(iter->first);
- total_bytes[n] -= iter->second.bytes;
- }
- }
- }
+public:
- mem_iter memory_curr = memory_maps[n].begin();
- mem_iter memory_end = memory_maps[n].end();
+ MemoryManagerPinned();
- while(memory_curr != memory_end) {
- if (memory_curr->second.mngr_lock || memory_curr->second.user_lock) {
- ++memory_curr;
- } else {
- memory_maps[n].erase(memory_curr++);
- }
- }
- }
+ void *nativeAlloc(const size_t bytes);
+ void nativeFree(void *ptr);
- void printMemInfo(const char *msg, const int device)
+ ~MemoryManagerPinned()
{
- std::cout << msg << std::endl;
- std::cout << "Memory Map for Device: " << device << std::endl;
-
- static const std::string head("| POINTER | SIZE | AF LOCK | USER LOCK |");
- static const std::string line(head.size(), '-');
- std::cout << line << std::endl << head << std::endl << line << std::endl;
-
- for(mem_iter iter = memory_maps[device].begin();
- iter != memory_maps[device].end(); ++iter) {
-
- std::string status_mngr("Unknown");
- std::string status_user("Unknown");
-
- if(iter->second.mngr_lock) status_mngr = "Yes";
- else status_mngr = " No";
-
- if(iter->second.user_lock) status_user = "Yes";
- else status_user = " No";
-
- std::string unit = "KB";
- double size = (double)(iter->second.bytes) / 1024;
- if(size >= 1024) {
- size = size / 1024;
- unit = "MB";
+ common::lock_guard_t lock(this->memory_mutex);
+ this->garbageCollect();
+ for (int n = 0; n < (int)pinned_maps.size(); n++) {
+ auto pinned_curr_iter = pinned_maps[n].begin();
+ auto pinned_end_iter = pinned_maps[n].end();
+ while (pinned_curr_iter != pinned_end_iter) {
+ pinned_maps[n].erase(pinned_curr_iter++);
}
-
- std::cout << "| " << std::right << std::setw(14) << iter->first << " "
- << " | " << std::setw(7) << std::setprecision(4) << size << " " << unit
- << " | " << std::setw(9) << status_mngr
- << " | " << std::setw(9) << status_user
- << " |" << std::endl;
}
-
- std::cout << line << std::endl;
}
+};
- cl::Buffer *bufferAlloc(const size_t &bytes)
- {
- int n = getActiveDeviceId();
- cl::Buffer *ptr = NULL;
- size_t alloc_bytes = divup(bytes, memory_resolution) * memory_resolution;
-
- if (bytes > 0) {
-
- // FIXME: Add better checks for garbage collection
- // Perhaps look at total memory available as a metric
- if (memory_maps[n].size() >= MAX_BUFFERS || used_bytes[n] >= MAX_BYTES) {
- garbageCollect();
- }
-
- for(mem_iter iter = memory_maps[n].begin();
- iter != memory_maps[n].end(); ++iter) {
-
- mem_info info = iter->second;
-
- if (!info.mngr_lock &&
- !info.user_lock &&
- info.bytes == alloc_bytes) {
-
- iter->second.mngr_lock = true;
- used_bytes[n] += alloc_bytes;
- used_buffers[n]++;
- return iter->first;
- }
- }
+int MemoryManager::getActiveDeviceId()
+{
+ return opencl::getActiveDeviceId();
+}
- try {
- ptr = new cl::Buffer(getContext(), CL_MEM_READ_WRITE, alloc_bytes);
- } catch(...) {
- garbageCollect();
- ptr = new cl::Buffer(getContext(), CL_MEM_READ_WRITE, alloc_bytes);
- }
+MemoryManager::MemoryManager() :
+ common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_OPENCL_MEM_DEBUG)
+{}
- mem_info info = {true, false, alloc_bytes};
- memory_maps[n][ptr] = info;
- used_bytes[n] += alloc_bytes;
- used_buffers[n]++;
- total_bytes[n] += alloc_bytes;
- }
- return ptr;
- }
-
- void bufferFree(cl::Buffer *ptr)
- {
- bufferFreeLocked(ptr, false);
+void *MemoryManager::nativeAlloc(const size_t bytes)
+{
+ try {
+ return (void *)(new cl::Buffer(getContext(), CL_MEM_READ_WRITE, bytes));
+ } catch(cl::Error err) {
+ CL_TO_AF_ERROR(err);
}
+}
- void bufferFreeLocked(cl::Buffer *ptr, bool user_unlock)
- {
- int n = getActiveDeviceId();
- mem_iter iter = memory_maps[n].find(ptr);
-
- if (iter != memory_maps[n].end()) {
-
- iter->second.mngr_lock = false;
- if ((iter->second).user_lock && !user_unlock) return;
-
- iter->second.user_lock = false;
-
- used_bytes[n] -= iter->second.bytes;
- used_buffers[n]--;
- } else {
- destroy(ptr); // Free it because we are not sure what the size is
- }
+void MemoryManager::nativeFree(void *ptr)
+{
+ try {
+ delete (cl::Buffer *)ptr;
+ } catch(cl::Error err) {
+ CL_TO_AF_ERROR(err);
}
+}
- void bufferPop(cl::Buffer *ptr)
- {
- int n = getActiveDeviceId();
- mem_iter iter = memory_maps[n].find(ptr);
+static MemoryManager &getMemoryManager()
+{
+ static MemoryManager instance;
+ return instance;
+}
- if (iter != memory_maps[n].end()) {
- iter->second.user_lock = true;
- } else {
+int MemoryManagerPinned::getActiveDeviceId()
+{
+ return opencl::getActiveDeviceId();
+}
- mem_info info = { true,
- true,
- 100 }; //This number is not relevant
+MemoryManagerPinned::MemoryManagerPinned() :
+ common::MemoryManager(getDeviceCount(), MAX_BUFFERS, MAX_BYTES, AF_MEM_DEBUG || AF_OPENCL_MEM_DEBUG),
+ pinned_maps(getDeviceCount())
+{}
- memory_maps[n][ptr] = info;
- }
- }
+void *MemoryManagerPinned::nativeAlloc(const size_t bytes)
+{
+ void *ptr = NULL;
+ try {
+ cl::Buffer buf= cl::Buffer(getContext(), CL_MEM_ALLOC_HOST_PTR, bytes);
+ ptr = getQueue().enqueueMapBuffer(buf, true, CL_MAP_READ | CL_MAP_WRITE, 0, bytes);
+ pinned_maps[opencl::getActiveDeviceId()][ptr] = buf;
+ } catch(cl::Error err) {
+ CL_TO_AF_ERROR(err);
+ }
+ return ptr;
+}
- void bufferPush(cl::Buffer *ptr)
- {
- int n = getActiveDeviceId();
- mem_iter iter = memory_maps[n].find(ptr);
+void MemoryManagerPinned::nativeFree(void *ptr)
+{
+ try {
+ int n = opencl::getActiveDeviceId();
+ auto iter = pinned_maps[n].find(ptr);
- if (iter != memory_maps[n].end()) {
- iter->second.user_lock = false;
+ if (iter != pinned_maps[n].end()) {
+ getQueue().enqueueUnmapMemObject(pinned_maps[n][ptr], ptr);
+ pinned_maps[n].erase(iter);
}
- }
-
- void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
- size_t *lock_bytes, size_t *lock_buffers)
- {
- int n = getActiveDeviceId();
- if (alloc_bytes ) *alloc_bytes = total_bytes[n];
- if (alloc_buffers ) *alloc_buffers = memory_maps[n].size();
- if (lock_bytes ) *lock_bytes = used_bytes[n];
- if (lock_buffers ) *lock_buffers = used_buffers[n];
- }
- template<typename T>
- T *memAlloc(const size_t &elements)
- {
- managerInit();
- return (T *)bufferAlloc(elements * sizeof(T));
- }
-
- template<typename T>
- void memFree(T *ptr)
- {
- return bufferFreeLocked((cl::Buffer *)ptr, false);
- }
-
- template<typename T>
- void memFreeLocked(T *ptr, bool user_unlock)
- {
- return bufferFreeLocked((cl::Buffer *)ptr, user_unlock);
- }
-
- template<typename T>
- void memLock(const T *ptr)
- {
- return bufferPop((cl::Buffer *)ptr);
- }
-
- template<typename T>
- void memUnlock(const T *ptr)
- {
- return bufferPush((cl::Buffer *)ptr);
+ } catch(cl::Error err) {
+ CL_TO_AF_ERROR(err);
}
+}
- // pinned memory manager
- typedef struct {
- cl::Buffer *buf;
- mem_info info;
- } pinned_info;
+static MemoryManagerPinned &getMemoryManagerPinned()
+{
+ static MemoryManagerPinned instance;
+ return instance;
+}
- typedef std::map<void*, pinned_info> pinned_t;
- typedef pinned_t::iterator pinned_iter;
- pinned_t pinned_maps[DeviceManager::MAX_DEVICES];
- static size_t pinned_used_bytes = 0;
+void setMemStepSize(size_t step_bytes)
+{
+ getMemoryManager().setMemStepSize(step_bytes);
+}
- static void pinnedDestroy(cl::Buffer *buf, void *ptr)
- {
- getQueue().enqueueUnmapMemObject(*buf, (void *)ptr);
- destroy(buf);
- }
+size_t getMemStepSize(void)
+{
+ return getMemoryManager().getMemStepSize();
+}
- void pinnedGarbageCollect()
- {
- int n = getActiveDeviceId();
- for(auto &iter : pinned_maps[n]) {
- if (!(iter.second).info.mngr_lock) {
- pinnedDestroy(iter.second.buf, iter.first);
- }
- }
- pinned_iter memory_curr = pinned_maps[n].begin();
- pinned_iter memory_end = pinned_maps[n].end();
+void garbageCollect()
+{
+ getMemoryManager().garbageCollect();
+}
- while(memory_curr != memory_end) {
- if (memory_curr->second.info.mngr_lock) {
- ++memory_curr;
- } else {
- memory_curr = pinned_maps[n].erase(memory_curr);
- }
- }
+void printMemInfo(const char *msg, const int device)
+{
+ getMemoryManager().printInfo(msg, device);
+}
- }
+template<typename T>
+T* memAlloc(const size_t &elements)
+{
+ return (T *)getMemoryManager().alloc(elements * sizeof(T));
+}
- void *pinnedBufferAlloc(const size_t &bytes)
- {
- void *ptr = NULL;
- int n = getActiveDeviceId();
- // Allocate the higher megabyte. Overhead of creating pinned memory is
- // more so we want more resuable memory.
- size_t alloc_bytes = divup(bytes, 1048576) * 1048576;
-
- if (bytes > 0) {
- cl::Buffer *buf = NULL;
-
- // FIXME: Add better checks for garbage collection
- // Perhaps look at total memory available as a metric
- if (pinned_maps[n].size() >= MAX_BUFFERS || pinned_used_bytes >= MAX_BYTES) {
- pinnedGarbageCollect();
- }
+cl::Buffer *bufferAlloc(const size_t &bytes)
+{
+ return (cl::Buffer *)getMemoryManager().alloc(bytes);
+}
- for(pinned_iter iter = pinned_maps[n].begin();
- iter != pinned_maps[n].end(); ++iter) {
+template<typename T>
+void memFree(T *ptr)
+{
+ return getMemoryManager().unlock((void *)ptr, false);
+}
- mem_info info = iter->second.info;
- if (!info.mngr_lock && info.bytes == alloc_bytes) {
- iter->second.info.mngr_lock = true;
- pinned_used_bytes += alloc_bytes;
- return iter->first;
- }
- }
+void bufferFree(cl::Buffer *buf)
+{
+ return getMemoryManager().unlock((void *)buf, false);
+}
- try {
- buf = new cl::Buffer(getContext(), CL_MEM_ALLOC_HOST_PTR, alloc_bytes);
+template<typename T>
+void memFreeLocked(T *ptr, bool user_unlock)
+{
+ return getMemoryManager().unlock((void *)ptr, user_unlock);
+}
- ptr = getQueue().enqueueMapBuffer(*buf, true, CL_MAP_READ|CL_MAP_WRITE,
- 0, alloc_bytes);
- } catch(...) {
- pinnedGarbageCollect();
- buf = new cl::Buffer(getContext(), CL_MEM_ALLOC_HOST_PTR, alloc_bytes);
+template<typename T>
+void memLock(const T *ptr)
+{
+ getMemoryManager().userLock((void *)ptr);
+}
- ptr = getQueue().enqueueMapBuffer(*buf, true, CL_MAP_READ|CL_MAP_WRITE,
- 0, alloc_bytes);
- }
- mem_info info = {true, false, alloc_bytes};
- pinned_info pt = {buf, info};
- pinned_maps[n][ptr] = pt;
- pinned_used_bytes += alloc_bytes;
- }
- return ptr;
- }
+template<typename T>
+void memUnlock(const T *ptr)
+{
+ getMemoryManager().userUnlock((void *)ptr);
+}
- void pinnedBufferFree(void *ptr)
- {
- int n = getActiveDeviceId();
- pinned_iter iter = pinned_maps[n].find(ptr);
- if (iter != pinned_maps[n].end()) {
- iter->second.info.mngr_lock = false;
- pinned_used_bytes -= iter->second.info.bytes;
- } else {
- pinnedDestroy(iter->second.buf, ptr); // Free it because we are not sure what the size is
- pinned_maps[n].erase(iter);
- }
- }
+void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+ size_t *lock_bytes, size_t *lock_buffers)
+{
+ getMemoryManager().bufferInfo(alloc_bytes, alloc_buffers,
+ lock_bytes, lock_buffers);
+}
- template<typename T>
- T* pinnedAlloc(const size_t &elements)
- {
- managerInit();
- return (T *)pinnedBufferAlloc(elements * sizeof(T));
- }
+template<typename T>
+T* pinnedAlloc(const size_t &elements)
+{
+ return (T *)getMemoryManagerPinned().alloc(elements * sizeof(T));
+}
- template<typename T>
- void pinnedFree(T* ptr)
- {
- return pinnedBufferFree((void *) ptr);
- }
+template<typename T>
+void pinnedFree(T* ptr)
+{
+ return getMemoryManagerPinned().unlock((void *)ptr, false);
+}
#define INSTANTIATE(T) \
template T* memAlloc(const size_t &elements); \
diff --git a/src/backend/opencl/memory.hpp b/src/backend/opencl/memory.hpp
index dce1428..da27e0d 100644
--- a/src/backend/opencl/memory.hpp
+++ b/src/backend/opencl/memory.hpp
@@ -15,12 +15,7 @@ namespace opencl
{
cl::Buffer *bufferAlloc(const size_t &bytes);
-
- // Need these as 2 separate function and not a default argument
- // This is because it is used as the deleter in shared pointer
- // which cannot support default arguments
void bufferFree(cl::Buffer *buf);
- void bufferFreeLocked(cl::Buffer *buf, bool user_unlock);
template<typename T> T *memAlloc(const size_t &elements);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list