[arrayfire] 04/84: Fix memory leak in cuda random. Additionally allow seeds per device
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Jan 4 23:22:12 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository arrayfire.
commit ba19743bb641d90326326846603a866a9edaa144
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Mon Dec 7 18:02:19 2015 -0500
Fix memory leak in cuda random. Additionally allow seeds per device
---
src/backend/cuda/kernel/random.hpp | 106 +++++++++++++++++++++++++++----------
src/backend/cuda/random.cu | 10 ++--
2 files changed, 84 insertions(+), 32 deletions(-)
diff --git a/src/backend/cuda/kernel/random.hpp b/src/backend/cuda/kernel/random.hpp
index a79a781..01c4d61 100644
--- a/src/backend/cuda/kernel/random.hpp
+++ b/src/backend/cuda/kernel/random.hpp
@@ -20,9 +20,77 @@ namespace kernel
static const int THREADS = 256;
static const int BLOCKS = 64;
- static unsigned long long seed = 0;
- static curandState_t *states[DeviceManager::MAX_DEVICES];
- static bool is_init[DeviceManager::MAX_DEVICES] = {0};
+ static unsigned long long seeds[DeviceManager::MAX_DEVICES] = {0};
+
+ __global__ static void
+ setup_kernel(curandState_t *states, unsigned long long seed)
+ {
+ unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
+ curand_init(seed, tid, 0, &states[tid]);
+ }
+
+ class curandStateManager
+ {
+ curandState_t *_state;
+ unsigned long long _seed;
+
+ void resetSeed()
+ {
+ CUDA_LAUNCH(setup_kernel, BLOCKS, THREADS, _state, _seed);
+
+ POST_LAUNCH_CHECK();
+ }
+
+ public:
+ curandStateManager()
+ : _state(NULL), _seed(0)
+ {
+ }
+
+ ~curandStateManager()
+ {
+ if(_state != NULL) memFree((char*)_state);
+ }
+
+ unsigned long long getSeed() const
+ {
+ return _seed;
+ }
+
+ void setSeed(const unsigned long long in_seed)
+ {
+ _seed = in_seed;
+ this->resetSeed();
+ }
+
+ curandState_t* getState()
+ {
+ if(_state)
+ return _state;
+
+ _state = (curandState_t*)memAlloc<char>(BLOCKS * THREADS * sizeof(curandState_t));
+ this->resetSeed();
+ return _state;
+ }
+ };
+
+ curandState_t* getcurandState()
+ {
+ static curandStateManager states[cuda::DeviceManager::MAX_DEVICES];
+
+ int id = cuda::getActiveDeviceId();
+
+ if(!(states[id].getState())) {
+ // states[id] was not initialized. Very bad.
+ // Throw an error here
+ }
+
+ if(states[id].getSeed() != seeds[id]) {
+ states[id].setSeed(seeds[id]);
+ }
+
+ return states[id].getState();
+ }
template<typename T>
__device__
@@ -93,13 +161,6 @@ namespace kernel
cval->y = curand_normal_double(state);
}
- __global__ static void
- setup_kernel(curandState_t *states, unsigned long long seed)
- {
- unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
- curand_init(seed, tid, 0, &states[tid]);
- }
-
template<typename T>
__global__ static void
uniform_kernel(T *out, curandState_t *states, size_t elements)
@@ -130,15 +191,7 @@ namespace kernel
void setup_states()
{
- int device = getActiveDeviceId();
-
- if (!is_init[device]) {
- CUDA_CHECK(cudaMalloc(&states[device], BLOCKS * THREADS * sizeof(curandState_t)));
- }
-
- CUDA_LAUNCH((setup_kernel), BLOCKS, THREADS, states[device], seed);
- POST_LAUNCH_CHECK();
- is_init[device] = true;
+ curandState_t *state = getcurandState();
}
template<typename T>
@@ -149,7 +202,10 @@ namespace kernel
int threads = THREADS;
int blocks = divup(elements, THREADS);
if (blocks > BLOCKS) blocks = BLOCKS;
- CUDA_LAUNCH(uniform_kernel, blocks, threads, out, states[device], elements);
+
+ curandState_t *state = getcurandState();
+
+ CUDA_LAUNCH(uniform_kernel, blocks, threads, out, state, elements);
POST_LAUNCH_CHECK();
}
@@ -162,15 +218,9 @@ namespace kernel
int blocks = divup(elements, THREADS);
if (blocks > BLOCKS) blocks = BLOCKS;
- if (!states[device]) {
- CUDA_CHECK(cudaMalloc(&states[device], BLOCKS * THREADS * sizeof(curandState_t)));
-
- CUDA_LAUNCH(setup_kernel, BLOCKS, THREADS, states[device], seed);
-
- POST_LAUNCH_CHECK();
- }
+ curandState_t *state = getcurandState();
- CUDA_LAUNCH(normal_kernel, blocks, threads, out, states[device], elements);
+ CUDA_LAUNCH(normal_kernel, blocks, threads, out, state, elements);
POST_LAUNCH_CHECK();
}
diff --git a/src/backend/cuda/random.cu b/src/backend/cuda/random.cu
index 07cbdc4..e19a48c 100644
--- a/src/backend/cuda/random.cu
+++ b/src/backend/cuda/random.cu
@@ -19,7 +19,7 @@ namespace cuda
template<typename T>
Array<T> randu(const af::dim4 &dims)
{
- if (!kernel::is_init[getActiveDeviceId()]) kernel::setup_states();
+ kernel::setup_states();
Array<T> out = createEmptyArray<T>(dims);
kernel::randu(out.get(), out.elements());
return out;
@@ -28,7 +28,7 @@ namespace cuda
template<typename T>
Array<T> randn(const af::dim4 &dims)
{
- if (!kernel::is_init[getActiveDeviceId()]) kernel::setup_states();
+ kernel::setup_states();
Array<T> out = createEmptyArray<T>(dims);
kernel::randn(out.get(), out.elements());
return out;
@@ -55,13 +55,15 @@ namespace cuda
void setSeed(const uintl seed)
{
- kernel::seed = seed;
+ int id = getActiveDeviceId();
+ kernel::seeds[id] = seed;
kernel::setup_states();
}
uintl getSeed()
{
- return kernel::seed;
+ int id = getActiveDeviceId();
+ return kernel::seeds[id];
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list