[arrayfire] 178/408: Replaced static shared memory with dynamic in SUSAN CUDA kernel

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:51 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit a9fd10c690a88cde21f30fdccb698799cfefdbec
Author: pradeep <pradeep at arrayfire.com>
Date:   Fri Jul 31 15:44:58 2015 -0400

    Replaced static shared memory with dynamic in SUSAN CUDA kernel
---
 src/backend/cuda/kernel/susan.hpp | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/backend/cuda/kernel/susan.hpp b/src/backend/cuda/kernel/susan.hpp
index 22d9b97..b744edc 100644
--- a/src/backend/cuda/kernel/susan.hpp
+++ b/src/backend/cuda/kernel/susan.hpp
@@ -14,6 +14,7 @@
 #include <debug_cuda.hpp>
 #include <math.hpp>
 #include "config.hpp"
+#include "shared.hpp"
 
 namespace cuda
 {
@@ -41,18 +42,19 @@ inline __device__ double max_val(const double x, const double y)
     return fmax(x, y);
 }
 
-template<typename T, unsigned radius>
+template<typename T>
 __global__
 void susanKernel(T* out, const T* in,
                  const unsigned idim0, const unsigned idim1,
-                 const float t, const float g,
+                 const unsigned radius, const float t, const float g,
                  const unsigned edge)
 {
     const int rSqrd   = radius*radius;
     const int windLen = 2*radius+1;
     const int shrdLen = BLOCK_X + windLen-1;
-    const size_t SHRD_MEM_SIZE = (BLOCK_X+2*radius)*(BLOCK_Y+2*radius);
-    __shared__ T shrdMem[SHRD_MEM_SIZE];
+
+    SharedMemory<T> shared;
+    T* shrdMem = shared.getPointer();
 
     const unsigned lx = threadIdx.x;
     const unsigned ly = threadIdx.y;
@@ -106,18 +108,9 @@ void susan_responses(T* out, const T* in,
 {
     dim3 threads(BLOCK_X, BLOCK_Y);
     dim3 blocks(divup(idim0-edge*2, BLOCK_X), divup(idim1-edge*2, BLOCK_Y));
+    const size_t SMEM_SIZE = (BLOCK_X+2*radius)*(BLOCK_Y+2*radius)*sizeof(T);
 
-    switch (radius) {
-        case 1: susanKernel<T, 1><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-        case 2: susanKernel<T, 2><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-        case 3: susanKernel<T, 3><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-        case 4: susanKernel<T, 4><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-        case 5: susanKernel<T, 5><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-        case 6: susanKernel<T, 6><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-        case 7: susanKernel<T, 7><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-        case 8: susanKernel<T, 8><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-        case 9: susanKernel<T, 9><<<blocks, threads>>>(out, in, idim0, idim1, t, g, edge); break;
-    }
+    susanKernel<T><<<blocks, threads, SMEM_SIZE>>>(out, in, idim0, idim1, radius, t, g, edge);
 
     POST_LAUNCH_CHECK();
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list