[arrayfire] 27/248: Changing int to dim_t in approx kernels

Tue Nov 17 15:53:51 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.

commit c9547296b05e151fda80deca18e3e7523b444809
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Mon Aug 31 15:53:31 2015 -0400

    Changing int to dim_t in approx kernels
---
 src/backend/cpu/approx.cpp           |  4 +-
 src/backend/cuda/kernel/approx.hpp   | 94 ++++++++++++++++++------------------
 src/backend/opencl/kernel/approx.hpp | 10 ++--
 src/backend/opencl/kernel/approx1.cl | 36 +++++++-------
 src/backend/opencl/kernel/approx2.cl | 52 ++++++++++----------
 5 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/src/backend/cpu/approx.cpp b/src/backend/cpu/approx.cpp
index 1522341..78d8cf3 100644
--- a/src/backend/cpu/approx.cpp
+++ b/src/backend/cpu/approx.cpp
@@ -83,7 +83,7 @@ namespace cpu
                 gFlag = true;
             }
 
-            const int grid_x = floor(x);  // nearest grid
+            const dim_t grid_x = floor(x);  // nearest grid
             const Tp off_x = x - grid_x; // fractional offset
 
             for(dim_t idw = 0; idw < odims[3]; idw++) {
@@ -227,7 +227,7 @@ namespace cpu
                 gFlag = true;
             }
 
-            const int grid_x = floor(x),   grid_y = floor(y);   // nearest grid
+            const dim_t grid_x = floor(x),   grid_y = floor(y);   // nearest grid
             const Tp off_x  = x - grid_x, off_y  = y - grid_y; // fractional offset
 
             // Check if pVal and pVal + 1 are both valid indices
diff --git a/src/backend/cuda/kernel/approx.hpp b/src/backend/cuda/kernel/approx.hpp
index ced6c4f..fae137a 100644
--- a/src/backend/cuda/kernel/approx.hpp
+++ b/src/backend/cuda/kernel/approx.hpp
@@ -27,13 +27,13 @@ namespace cuda
         ///////////////////////////////////////////////////////////////////////////
         template<typename Ty, typename Tp>
         __device__ inline static
-        void core_nearest1(const int idx, const int idy, const int idz, const int idw,
+        void core_nearest1(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
                            Param<Ty> out, CParam<Ty> in, CParam<Tp> pos,
                            const float offGrid)
         {
-            const int omId = idw * out.strides[3] + idz * out.strides[2]
-                           + idy * out.strides[1] + idx;
-            const int pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
+            const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+                             + idy * out.strides[1] + idx;
+            const dim_t pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
 
             const Tp x = pos.ptr[pmId];
             if (x < 0 || in.dims[0] < x+1) {
@@ -41,8 +41,8 @@ namespace cuda
                 return;
             }
 
-            int ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1];
-            const int iMem = round(x) + ioff;
+            dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1];
+            const dim_t iMem = round(x) + ioff;
 
             Ty yt = in.ptr[iMem];
             out.ptr[omId] = yt;
@@ -50,16 +50,16 @@ namespace cuda
 
         template<typename Ty, typename Tp>
         __device__ inline static
-        void core_nearest2(const int idx, const int idy, const int idz, const int idw,
+        void core_nearest2(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
                            Param<Ty> out, CParam<Ty> in,
                            CParam<Tp> pos, CParam<Tp> qos, const float offGrid)
         {
-            const int omId = idw * out.strides[3] + idz * out.strides[2]
-                           + idy * out.strides[1] + idx;
-            const int pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
-                            + idy * pos.strides[1] + idx;
-            const int qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
-                            + idy * qos.strides[1] + idx;
+            const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+                             + idy * out.strides[1] + idx;
+            const dim_t pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
+                             + idy * pos.strides[1] + idx;
+            const dim_t qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
+                             + idy * qos.strides[1] + idx;
 
             const Tp x = pos.ptr[pmId], y = qos.ptr[qmId];
             if (x < 0 || y < 0 || in.dims[0] < x+1 || in.dims[1] < y+1) {
@@ -67,9 +67,9 @@ namespace cuda
                 return;
             }
 
-            const int grid_x = round(x), grid_y = round(y); // nearest grid
-            const int imId = idw * in.strides[3] + idz * in.strides[2]
-                        + grid_y * in.strides[1] + grid_x;
+            const dim_t grid_x = round(x), grid_y = round(y); // nearest grid
+            const dim_t imId = idw * in.strides[3] + idz * in.strides[2]
+                          + grid_y * in.strides[1] + grid_x;
 
             Ty val = in.ptr[imId];
             out.ptr[omId] = val;
@@ -80,13 +80,13 @@ namespace cuda
         ///////////////////////////////////////////////////////////////////////////
         template<typename Ty, typename Tp>
         __device__ inline static
-        void core_linear1(const int idx, const int idy, const int idz, const int idw,
+        void core_linear1(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
                           Param<Ty> out, CParam<Ty> in, CParam<Tp> pos,
                           const float offGrid)
         {
-            const int omId = idw * out.strides[3] + idz * out.strides[2]
-                           + idy * out.strides[1] + idx;
-            const int pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
+            const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+                             + idy * out.strides[1] + idx;
+            const dim_t pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
 
             const Tp pVal = pos.ptr[pmId];
             if (pVal < 0 || in.dims[0] < pVal+1) {
@@ -94,10 +94,10 @@ namespace cuda
                 return;
             }
 
-            const int grid_x = floor(pVal);  // nearest grid
+            const dim_t grid_x = floor(pVal);  // nearest grid
             const Tp off_x = pVal - grid_x; // fractional offset
 
-            int ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1] + grid_x;
+            dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1] + grid_x;
 
             // Check if pVal and pVal + 1 are both valid indices
             bool cond = (pVal < in.dims[0] - 1);
@@ -113,16 +113,16 @@ namespace cuda
 
         template<typename Ty, typename Tp>
         __device__ inline static
-        void core_linear2(const int idx, const int idy, const int idz, const int idw,
+        void core_linear2(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
                            Param<Ty> out, CParam<Ty> in,
                            CParam<Tp> pos, CParam<Tp> qos, const float offGrid)
         {
-            const int omId = idw * out.strides[3] + idz * out.strides[2]
-                           + idy * out.strides[1] + idx;
-            const int pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
-                           + idy * pos.strides[1] + idx;
-            const int qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
-                           + idy * qos.strides[1] + idx;
+            const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+                             + idy * out.strides[1] + idx;
+            const dim_t pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
+                             + idy * pos.strides[1] + idx;
+            const dim_t qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
+                             + idy * qos.strides[1] + idx;
 
             const Tp x = pos.ptr[pmId], y = qos.ptr[qmId];
             if (x < 0 || y < 0 || in.dims[0] < x+1 || in.dims[1] < y+1) {
@@ -130,10 +130,10 @@ namespace cuda
                 return;
             }
 
-            const int grid_x = floor(x),   grid_y = floor(y);   // nearest grid
+            const dim_t grid_x = floor(x),   grid_y = floor(y);   // nearest grid
             const Tp off_x  = x - grid_x, off_y  = y - grid_y; // fractional offset
 
-            int ioff = idw * in.strides[3] + idz * in.strides[2] + grid_y * in.strides[1] + grid_x;
+            dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + grid_y * in.strides[1] + grid_x;
 
             // Check if pVal and pVal + 1 are both valid indices
             bool condY = (y < in.dims[1] - 1);
@@ -165,14 +165,14 @@ namespace cuda
         template<typename Ty, typename Tp, af_interp_type method>
         __global__
         void approx1_kernel(Param<Ty> out, CParam<Ty> in, CParam<Tp> pos,
-                            const float offGrid, const int blocksMatX)
+                            const float offGrid, const dim_t blocksMatX)
         {
-            const int idw = blockIdx.y / out.dims[2];
-            const int idz = blockIdx.y - idw * out.dims[2];
+            const dim_t idw = blockIdx.y / out.dims[2];
+            const dim_t idz = blockIdx.y - idw * out.dims[2];
 
-            const int idy = blockIdx.x / blocksMatX;
-            const int blockIdx_x = blockIdx.x - idy * blocksMatX;
-            const int idx = blockIdx_x * blockDim.x + threadIdx.x;
+            const dim_t idy = blockIdx.x / blocksMatX;
+            const dim_t blockIdx_x = blockIdx.x - idy * blocksMatX;
+            const dim_t idx = blockIdx_x * blockDim.x + threadIdx.x;
 
             if (idx >= out.dims[0] || idy >= out.dims[1] ||
                 idz >= out.dims[2] || idw >= out.dims[3])
@@ -194,16 +194,16 @@ namespace cuda
         __global__
         void approx2_kernel(Param<Ty> out, CParam<Ty> in,
                       CParam<Tp> pos, CParam<Tp> qos, const float offGrid,
-                      const int blocksMatX, const int blocksMatY)
+                      const dim_t blocksMatX, const dim_t blocksMatY)
         {
-            const int idz = blockIdx.x / blocksMatX;
-            const int idw = blockIdx.y / blocksMatY;
+            const dim_t idz = blockIdx.x / blocksMatX;
+            const dim_t idw = blockIdx.y / blocksMatY;
 
-            int blockIdx_x = blockIdx.x - idz * blocksMatX;
-            int blockIdx_y = blockIdx.y - idw * blocksMatY;
+            dim_t blockIdx_x = blockIdx.x - idz * blocksMatX;
+            dim_t blockIdx_y = blockIdx.y - idw * blocksMatY;
 
-            int idx = threadIdx.x + blockIdx_x * blockDim.x;
-            int idy = threadIdx.y + blockIdx_y * blockDim.y;
+            dim_t idx = threadIdx.x + blockIdx_x * blockDim.x;
+            dim_t idy = threadIdx.y + blockIdx_y * blockDim.y;
 
             if (idx >= out.dims[0] || idy >= out.dims[1] ||
                 idz >= out.dims[2] || idw >= out.dims[3])
@@ -229,7 +229,7 @@ namespace cuda
                CParam<Tp> pos, const float offGrid)
         {
             dim3 threads(THREADS, 1, 1);
-            int blocksPerMat = divup(out.dims[0], threads.x);
+            dim_t blocksPerMat = divup(out.dims[0], threads.x);
             dim3 blocks(blocksPerMat * out.dims[1], out.dims[2] * out.dims[3]);
 
             CUDA_LAUNCH((approx1_kernel<Ty, Tp, method>), blocks, threads,
@@ -242,8 +242,8 @@ namespace cuda
                     CParam<Tp> pos, CParam<Tp> qos, const float offGrid)
         {
             dim3 threads(TX, TY, 1);
-            int blocksPerMatX = divup(out.dims[0], threads.x);
-            int blocksPerMatY = divup(out.dims[1], threads.y);
+            dim_t blocksPerMatX = divup(out.dims[0], threads.x);
+            dim_t blocksPerMatY = divup(out.dims[1], threads.y);
             dim3 blocks(blocksPerMatX * out.dims[2], blocksPerMatY * out.dims[3]);
 
             CUDA_LAUNCH((approx2_kernel<Ty, Tp, method>), blocks, threads,
diff --git a/src/backend/opencl/kernel/approx.hpp b/src/backend/opencl/kernel/approx.hpp
index 6ec637a..f893097 100644
--- a/src/backend/opencl/kernel/approx.hpp
+++ b/src/backend/opencl/kernel/approx.hpp
@@ -87,11 +87,11 @@ namespace opencl
 
 
                 auto approx1Op = make_kernel<Buffer, const KParam, const Buffer, const KParam,
-                                       const Buffer, const KParam, const float, const int>
+                                       const Buffer, const KParam, const float, const dim_t>
                                       (*approxKernels[device]);
 
                 NDRange local(THREADS, 1, 1);
-                int blocksPerMat = divup(out.info.dims[0], local[0]);
+                dim_t blocksPerMat = divup(out.info.dims[0], local[0]);
                 NDRange global(blocksPerMat * local[0] * out.info.dims[1],
                                out.info.dims[2] * out.info.dims[3] * local[0],
                                1);
@@ -152,12 +152,12 @@ namespace opencl
 
                 auto approx2Op = make_kernel<Buffer, const KParam, const Buffer, const KParam,
                                        const Buffer, const KParam, const Buffer, const KParam,
-                                       const float, const int, const int>
+                                       const float, const dim_t, const dim_t>
                                        (*approxKernels[device]);
 
                 NDRange local(TX, TY, 1);
-                int blocksPerMatX = divup(out.info.dims[0], local[0]);
-                int blocksPerMatY = divup(out.info.dims[1], local[1]);
+                dim_t blocksPerMatX = divup(out.info.dims[0], local[0]);
+                dim_t blocksPerMatY = divup(out.info.dims[1], local[1]);
                 NDRange global(blocksPerMatX * local[0] * out.info.dims[2],
                                blocksPerMatY * local[1] * out.info.dims[3],
                                1);
diff --git a/src/backend/opencl/kernel/approx1.cl b/src/backend/opencl/kernel/approx1.cl
index 08a6677..41acb04 100644
--- a/src/backend/opencl/kernel/approx1.cl
+++ b/src/backend/opencl/kernel/approx1.cl
@@ -32,15 +32,15 @@ Ty div(Ty a, Tp b) { a.x = a.x / b; a.y = a.y / b; return a; }
 ///////////////////////////////////////////////////////////////////////////
 // nearest-neighbor resampling
 ///////////////////////////////////////////////////////////////////////////
-void core_nearest1(const int idx, const int idy, const int idz, const int idw,
+void core_nearest1(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
                    __global       Ty *d_out, const KParam out,
                    __global const Ty *d_in,  const KParam in,
                    __global const Tp *d_pos, const KParam pos,
                    const float offGrid)
 {
-    const int omId = idw * out.strides[3] + idz * out.strides[2]
-                   + idy * out.strides[1] + idx;
-    const int pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
+    const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+                     + idy * out.strides[1] + idx;
+    const dim_t pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
 
     const Tp pVal = d_pos[pmId];
     if (pVal < 0 || in.dims[0] < pVal+1) {
@@ -48,8 +48,8 @@ void core_nearest1(const int idx, const int idy, const int idz, const int idw,
         return;
     }
 
-    int ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1];
-    const int imId = round(pVal) + ioff;
+    dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1];
+    const dim_t imId = round(pVal) + ioff;
 
     Ty y;
     set(y, d_in[imId]);
@@ -59,15 +59,15 @@ void core_nearest1(const int idx, const int idy, const int idz, const int idw,
 ///////////////////////////////////////////////////////////////////////////
 // linear resampling
 ///////////////////////////////////////////////////////////////////////////
-void core_linear1(const int idx, const int idy, const int idz, const int idw,
+void core_linear1(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
                    __global       Ty *d_out, const KParam out,
                    __global const Ty *d_in,  const KParam in,
                    __global const Tp *d_pos, const KParam pos,
                    const float offGrid)
 {
-    const int omId = idw * out.strides[3] + idz * out.strides[2]
-                   + idy * out.strides[1] + idx;
-    const int pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
+    const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+                     + idy * out.strides[1] + idx;
+    const dim_t pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
 
     const Tp pVal = d_pos[pmId];
     if (pVal < 0 || in.dims[0] < pVal+1) {
@@ -75,10 +75,10 @@ void core_linear1(const int idx, const int idy, const int idz, const int idw,
         return;
     }
 
-    const int grid_x = floor(pVal);  // nearest grid
+    const dim_t grid_x = floor(pVal);  // nearest grid
     const Tp off_x = pVal - grid_x; // fractional offset
 
-    int ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1] + grid_x;
+    dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1] + grid_x;
 
     // Check if pVal and pVal + 1 are both valid indices
     bool cond = (pVal < in.dims[0] - 1);
@@ -104,14 +104,14 @@ __kernel
 void approx1_kernel(__global       Ty *d_out, const KParam out,
                     __global const Ty *d_in,  const KParam in,
                     __global const Tp *d_pos, const KParam pos,
-                    const float offGrid, const int blocksMatX)
+                    const float offGrid, const dim_t blocksMatX)
 {
-    const int idw = get_group_id(1) / out.dims[2];
-    const int idz = get_group_id(1)  - idw * out.dims[2];
+    const dim_t idw = get_group_id(1) / out.dims[2];
+    const dim_t idz = get_group_id(1)  - idw * out.dims[2];
 
-    const int idy = get_group_id(0) / blocksMatX;
-    const int blockIdx_x = get_group_id(0) - idy * blocksMatX;
-    const int idx = get_local_id(0) + blockIdx_x * get_local_size(0);
+    const dim_t idy = get_group_id(0) / blocksMatX;
+    const dim_t blockIdx_x = get_group_id(0) - idy * blocksMatX;
+    const dim_t idx = get_local_id(0) + blockIdx_x * get_local_size(0);
 
     if(idx >= out.dims[0] ||
        idy >= out.dims[1] ||
diff --git a/src/backend/opencl/kernel/approx2.cl b/src/backend/opencl/kernel/approx2.cl
index b6ba02a..4db2508 100644
--- a/src/backend/opencl/kernel/approx2.cl
+++ b/src/backend/opencl/kernel/approx2.cl
@@ -32,19 +32,19 @@ Ty div(Ty a, Tp b) { a.x = a.x / b; a.y = a.y / b; return a; }
 ///////////////////////////////////////////////////////////////////////////
 // nearest-neighbor resampling
 ///////////////////////////////////////////////////////////////////////////
-void core_nearest2(const int idx, const int idy, const int idz, const int idw,
+void core_nearest2(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
                    __global       Ty *d_out, const KParam out,
                    __global const Ty *d_in,  const KParam in,
                    __global const Tp *d_pos, const KParam pos,
                    __global const Tp *d_qos, const KParam qos,
                    const float offGrid)
 {
-    const int omId = idw * out.strides[3] + idz * out.strides[2]
-                   + idy * out.strides[1] + idx;
-    const int pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
-                    + idy * pos.strides[1] + idx;
-    const int qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
-                    + idy * qos.strides[1] + idx;
+    const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+                     + idy * out.strides[1] + idx;
+    const dim_t pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
+                     + idy * pos.strides[1] + idx;
+    const dim_t qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
+                     + idy * qos.strides[1] + idx;
 
     const Tp x = d_pos[pmId], y = d_qos[qmId];
     if (x < 0 || y < 0 || in.dims[0] < x+1 || in.dims[1] < y+1) {
@@ -52,9 +52,9 @@ void core_nearest2(const int idx, const int idy, const int idz, const int idw,
         return;
     }
 
-    const int grid_x = round(x), grid_y = round(y); // nearest grid
-    const int imId = idw * in.strides[3] + idz * in.strides[2]
-                     + grid_y * in.strides[1] + grid_x;
+    const dim_t grid_x = round(x), grid_y = round(y); // nearest grid
+    const dim_t imId = idw * in.strides[3] + idz * in.strides[2]
+                  + grid_y * in.strides[1] + grid_x;
 
     Ty z;
     set(z, d_in[imId]);
@@ -64,19 +64,19 @@ void core_nearest2(const int idx, const int idy, const int idz, const int idw,
 ///////////////////////////////////////////////////////////////////////////
 // linear resampling
 ///////////////////////////////////////////////////////////////////////////
-void core_linear2(const int idx, const int idy, const int idz, const int idw,
+void core_linear2(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
                   __global       Ty *d_out, const KParam out,
                   __global const Ty *d_in,  const KParam in,
                   __global const Tp *d_pos, const KParam pos,
                   __global const Tp *d_qos, const KParam qos,
                   const float offGrid)
 {
-    const int omId = idw * out.strides[3] + idz * out.strides[2]
-                        + idy * out.strides[1] + idx;
-    const int pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
-                    + idy * pos.strides[1] + idx;
-    const int qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
-                    + idy * qos.strides[1] + idx;
+    const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+                     + idy * out.strides[1] + idx;
+    const dim_t pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
+                     + idy * pos.strides[1] + idx;
+    const dim_t qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
+                     + idy * qos.strides[1] + idx;
 
     const Tp x = d_pos[pmId], y = d_qos[qmId];
     if (x < 0 || y < 0 || in.dims[0] < x+1 || in.dims[1] < y+1) {
@@ -84,10 +84,10 @@ void core_linear2(const int idx, const int idy, const int idz, const int idw,
         return;
     }
 
-    const int grid_x = floor(x),   grid_y = floor(y);   // nearest grid
+    const dim_t grid_x = floor(x),   grid_y = floor(y);   // nearest grid
     const Tp off_x  = x - grid_x, off_y  = y - grid_y; // fractional offset
 
-    int ioff = idw * in.strides[3] + idz * in.strides[2] + grid_y * in.strides[1] + grid_x;
+    dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + grid_y * in.strides[1] + grid_x;
 
     // Check if pVal and pVal + 1 are both valid indices
     bool condY = (y < in.dims[1] - 1);
@@ -122,16 +122,16 @@ void approx2_kernel(__global       Ty *d_out, const KParam out,
                     __global const Ty *d_in,  const KParam in,
                     __global const Tp *d_pos, const KParam pos,
                     __global const Tp *d_qos, const KParam qos,
-                    const float offGrid, const int blocksMatX, const int blocksMatY)
+                    const float offGrid, const dim_t blocksMatX, const dim_t blocksMatY)
 {
-    const int idz = get_group_id(0) / blocksMatX;
-    const int idw = get_group_id(1) / blocksMatY;
+    const dim_t idz = get_group_id(0) / blocksMatX;
+    const dim_t idw = get_group_id(1) / blocksMatY;
 
-    const int blockIdx_x = get_group_id(0) - idz * blocksMatX;
-    const int blockIdx_y = get_group_id(1) - idw * blocksMatY;
+    const dim_t blockIdx_x = get_group_id(0) - idz * blocksMatX;
+    const dim_t blockIdx_y = get_group_id(1) - idw * blocksMatY;
 
-    const int idx = get_local_id(0) + blockIdx_x * get_local_size(0);
-    const int idy = get_local_id(1) + blockIdx_y * get_local_size(1);
+    const dim_t idx = get_local_id(0) + blockIdx_x * get_local_size(0);
+    const dim_t idy = get_local_id(1) + blockIdx_y * get_local_size(1);
 
     if(idx >= out.dims[0] ||
        idy >= out.dims[1] ||

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git