[arrayfire] 04/84: Fix memory leak in cuda random. Additionally allow seeds per device

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Jan 4 23:22:12 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository arrayfire.

commit ba19743bb641d90326326846603a866a9edaa144
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Mon Dec 7 18:02:19 2015 -0500

    Fix memory leak in cuda random. Additionally allow seeds per device
---
 src/backend/cuda/kernel/random.hpp | 106 +++++++++++++++++++++++++++----------
 src/backend/cuda/random.cu         |  10 ++--
 2 files changed, 84 insertions(+), 32 deletions(-)

diff --git a/src/backend/cuda/kernel/random.hpp b/src/backend/cuda/kernel/random.hpp
index a79a781..01c4d61 100644
--- a/src/backend/cuda/kernel/random.hpp
+++ b/src/backend/cuda/kernel/random.hpp
@@ -20,9 +20,77 @@ namespace kernel
 
     static const int THREADS = 256;
     static const int BLOCKS  = 64;
-    static unsigned long long seed = 0;
-    static curandState_t *states[DeviceManager::MAX_DEVICES];
-    static bool is_init[DeviceManager::MAX_DEVICES] = {0};
+    static unsigned long long seeds[DeviceManager::MAX_DEVICES] = {0};
+
+    __global__ static void
+    setup_kernel(curandState_t *states, unsigned long long seed)
+    {
+        unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
+        curand_init(seed, tid, 0, &states[tid]);
+    }
+
+    class curandStateManager
+    {
+        curandState_t *_state;
+        unsigned long long _seed;
+
+        void resetSeed()
+        {
+            CUDA_LAUNCH(setup_kernel, BLOCKS, THREADS, _state, _seed);
+
+            POST_LAUNCH_CHECK();
+        }
+
+        public:
+        curandStateManager()
+            : _state(NULL), _seed(0)
+        {
+        }
+
+        ~curandStateManager()
+        {
+            if(_state != NULL) memFree((char*)_state);
+        }
+
+        unsigned long long getSeed() const
+        {
+            return _seed;
+        }
+
+        void setSeed(const unsigned long long in_seed)
+        {
+            _seed = in_seed;
+            this->resetSeed();
+        }
+
+        curandState_t* getState()
+        {
+            if(_state)
+                return _state;
+
+            _state = (curandState_t*)memAlloc<char>(BLOCKS * THREADS * sizeof(curandState_t));
+            this->resetSeed();
+            return _state;
+        }
+    };
+
+    curandState_t* getcurandState()
+    {
+        static curandStateManager states[cuda::DeviceManager::MAX_DEVICES];
+
+        int id = cuda::getActiveDeviceId();
+
+        if(!(states[id].getState())) {
+            // states[id] was not initialized. Very bad.
+            // Throw an error here
+        }
+
+        if(states[id].getSeed() != seeds[id]) {
+            states[id].setSeed(seeds[id]);
+        }
+
+        return states[id].getState();
+    }
 
     template<typename T>
     __device__
@@ -93,13 +161,6 @@ namespace kernel
         cval->y = curand_normal_double(state);
     }
 
-    __global__ static void
-    setup_kernel(curandState_t *states, unsigned long long seed)
-    {
-        unsigned tid = blockDim.x * blockIdx.x + threadIdx.x;
-        curand_init(seed, tid, 0, &states[tid]);
-    }
-
     template<typename T>
     __global__ static void
     uniform_kernel(T *out, curandState_t *states, size_t elements)
@@ -130,15 +191,7 @@ namespace kernel
 
     void setup_states()
     {
-        int device = getActiveDeviceId();
-
-        if (!is_init[device]) {
-            CUDA_CHECK(cudaMalloc(&states[device], BLOCKS * THREADS * sizeof(curandState_t)));
-        }
-
-        CUDA_LAUNCH((setup_kernel), BLOCKS, THREADS, states[device], seed);
-        POST_LAUNCH_CHECK();
-        is_init[device] = true;
+        curandState_t *state = getcurandState();
     }
 
     template<typename T>
@@ -149,7 +202,10 @@ namespace kernel
         int threads = THREADS;
         int blocks  = divup(elements, THREADS);
         if (blocks > BLOCKS) blocks = BLOCKS;
-        CUDA_LAUNCH(uniform_kernel, blocks, threads, out, states[device], elements);
+
+        curandState_t *state = getcurandState();
+
+        CUDA_LAUNCH(uniform_kernel, blocks, threads, out, state, elements);
         POST_LAUNCH_CHECK();
     }
 
@@ -162,15 +218,9 @@ namespace kernel
         int blocks  = divup(elements, THREADS);
         if (blocks > BLOCKS) blocks = BLOCKS;
 
-        if (!states[device]) {
-            CUDA_CHECK(cudaMalloc(&states[device], BLOCKS * THREADS * sizeof(curandState_t)));
-
-            CUDA_LAUNCH(setup_kernel, BLOCKS, THREADS, states[device], seed);
-
-            POST_LAUNCH_CHECK();
-        }
+        curandState_t *state = getcurandState();
 
-        CUDA_LAUNCH(normal_kernel, blocks, threads, out, states[device], elements);
+        CUDA_LAUNCH(normal_kernel, blocks, threads, out, state, elements);
 
         POST_LAUNCH_CHECK();
     }
diff --git a/src/backend/cuda/random.cu b/src/backend/cuda/random.cu
index 07cbdc4..e19a48c 100644
--- a/src/backend/cuda/random.cu
+++ b/src/backend/cuda/random.cu
@@ -19,7 +19,7 @@ namespace cuda
     template<typename T>
     Array<T> randu(const af::dim4 &dims)
     {
-        if (!kernel::is_init[getActiveDeviceId()]) kernel::setup_states();
+        kernel::setup_states();
         Array<T> out = createEmptyArray<T>(dims);
         kernel::randu(out.get(), out.elements());
         return out;
@@ -28,7 +28,7 @@ namespace cuda
     template<typename T>
     Array<T> randn(const af::dim4 &dims)
     {
-        if (!kernel::is_init[getActiveDeviceId()]) kernel::setup_states();
+        kernel::setup_states();
         Array<T> out  = createEmptyArray<T>(dims);
         kernel::randn(out.get(), out.elements());
         return out;
@@ -55,13 +55,15 @@ namespace cuda
 
     void setSeed(const uintl seed)
     {
-        kernel::seed = seed;
+        int id = getActiveDeviceId();
+        kernel::seeds[id] = seed;
         kernel::setup_states();
     }
 
     uintl getSeed()
     {
-        return kernel::seed;
+        int id = getActiveDeviceId();
+        return kernel::seeds[id];
     }
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list