[arrayfire] 27/248: Changing int to dim_t in approx kernels
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Nov 17 15:53:51 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit c9547296b05e151fda80deca18e3e7523b444809
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Mon Aug 31 15:53:31 2015 -0400
Changing int to dim_t in approx kernels
---
src/backend/cpu/approx.cpp | 4 +-
src/backend/cuda/kernel/approx.hpp | 94 ++++++++++++++++++------------------
src/backend/opencl/kernel/approx.hpp | 10 ++--
src/backend/opencl/kernel/approx1.cl | 36 +++++++-------
src/backend/opencl/kernel/approx2.cl | 52 ++++++++++----------
5 files changed, 98 insertions(+), 98 deletions(-)
diff --git a/src/backend/cpu/approx.cpp b/src/backend/cpu/approx.cpp
index 1522341..78d8cf3 100644
--- a/src/backend/cpu/approx.cpp
+++ b/src/backend/cpu/approx.cpp
@@ -83,7 +83,7 @@ namespace cpu
gFlag = true;
}
- const int grid_x = floor(x); // nearest grid
+ const dim_t grid_x = floor(x); // nearest grid
const Tp off_x = x - grid_x; // fractional offset
for(dim_t idw = 0; idw < odims[3]; idw++) {
@@ -227,7 +227,7 @@ namespace cpu
gFlag = true;
}
- const int grid_x = floor(x), grid_y = floor(y); // nearest grid
+ const dim_t grid_x = floor(x), grid_y = floor(y); // nearest grid
const Tp off_x = x - grid_x, off_y = y - grid_y; // fractional offset
// Check if pVal and pVal + 1 are both valid indices
diff --git a/src/backend/cuda/kernel/approx.hpp b/src/backend/cuda/kernel/approx.hpp
index ced6c4f..fae137a 100644
--- a/src/backend/cuda/kernel/approx.hpp
+++ b/src/backend/cuda/kernel/approx.hpp
@@ -27,13 +27,13 @@ namespace cuda
///////////////////////////////////////////////////////////////////////////
template<typename Ty, typename Tp>
__device__ inline static
- void core_nearest1(const int idx, const int idy, const int idz, const int idw,
+ void core_nearest1(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
Param<Ty> out, CParam<Ty> in, CParam<Tp> pos,
const float offGrid)
{
- const int omId = idw * out.strides[3] + idz * out.strides[2]
- + idy * out.strides[1] + idx;
- const int pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
+ const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+ + idy * out.strides[1] + idx;
+ const dim_t pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
const Tp x = pos.ptr[pmId];
if (x < 0 || in.dims[0] < x+1) {
@@ -41,8 +41,8 @@ namespace cuda
return;
}
- int ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1];
- const int iMem = round(x) + ioff;
+ dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1];
+ const dim_t iMem = round(x) + ioff;
Ty yt = in.ptr[iMem];
out.ptr[omId] = yt;
@@ -50,16 +50,16 @@ namespace cuda
template<typename Ty, typename Tp>
__device__ inline static
- void core_nearest2(const int idx, const int idy, const int idz, const int idw,
+ void core_nearest2(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
Param<Ty> out, CParam<Ty> in,
CParam<Tp> pos, CParam<Tp> qos, const float offGrid)
{
- const int omId = idw * out.strides[3] + idz * out.strides[2]
- + idy * out.strides[1] + idx;
- const int pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
- + idy * pos.strides[1] + idx;
- const int qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
- + idy * qos.strides[1] + idx;
+ const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+ + idy * out.strides[1] + idx;
+ const dim_t pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
+ + idy * pos.strides[1] + idx;
+ const dim_t qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
+ + idy * qos.strides[1] + idx;
const Tp x = pos.ptr[pmId], y = qos.ptr[qmId];
if (x < 0 || y < 0 || in.dims[0] < x+1 || in.dims[1] < y+1) {
@@ -67,9 +67,9 @@ namespace cuda
return;
}
- const int grid_x = round(x), grid_y = round(y); // nearest grid
- const int imId = idw * in.strides[3] + idz * in.strides[2]
- + grid_y * in.strides[1] + grid_x;
+ const dim_t grid_x = round(x), grid_y = round(y); // nearest grid
+ const dim_t imId = idw * in.strides[3] + idz * in.strides[2]
+ + grid_y * in.strides[1] + grid_x;
Ty val = in.ptr[imId];
out.ptr[omId] = val;
@@ -80,13 +80,13 @@ namespace cuda
///////////////////////////////////////////////////////////////////////////
template<typename Ty, typename Tp>
__device__ inline static
- void core_linear1(const int idx, const int idy, const int idz, const int idw,
+ void core_linear1(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
Param<Ty> out, CParam<Ty> in, CParam<Tp> pos,
const float offGrid)
{
- const int omId = idw * out.strides[3] + idz * out.strides[2]
- + idy * out.strides[1] + idx;
- const int pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
+ const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+ + idy * out.strides[1] + idx;
+ const dim_t pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
const Tp pVal = pos.ptr[pmId];
if (pVal < 0 || in.dims[0] < pVal+1) {
@@ -94,10 +94,10 @@ namespace cuda
return;
}
- const int grid_x = floor(pVal); // nearest grid
+ const dim_t grid_x = floor(pVal); // nearest grid
const Tp off_x = pVal - grid_x; // fractional offset
- int ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1] + grid_x;
+ dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1] + grid_x;
// Check if pVal and pVal + 1 are both valid indices
bool cond = (pVal < in.dims[0] - 1);
@@ -113,16 +113,16 @@ namespace cuda
template<typename Ty, typename Tp>
__device__ inline static
- void core_linear2(const int idx, const int idy, const int idz, const int idw,
+ void core_linear2(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
Param<Ty> out, CParam<Ty> in,
CParam<Tp> pos, CParam<Tp> qos, const float offGrid)
{
- const int omId = idw * out.strides[3] + idz * out.strides[2]
- + idy * out.strides[1] + idx;
- const int pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
- + idy * pos.strides[1] + idx;
- const int qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
- + idy * qos.strides[1] + idx;
+ const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+ + idy * out.strides[1] + idx;
+ const dim_t pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
+ + idy * pos.strides[1] + idx;
+ const dim_t qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
+ + idy * qos.strides[1] + idx;
const Tp x = pos.ptr[pmId], y = qos.ptr[qmId];
if (x < 0 || y < 0 || in.dims[0] < x+1 || in.dims[1] < y+1) {
@@ -130,10 +130,10 @@ namespace cuda
return;
}
- const int grid_x = floor(x), grid_y = floor(y); // nearest grid
+ const dim_t grid_x = floor(x), grid_y = floor(y); // nearest grid
const Tp off_x = x - grid_x, off_y = y - grid_y; // fractional offset
- int ioff = idw * in.strides[3] + idz * in.strides[2] + grid_y * in.strides[1] + grid_x;
+ dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + grid_y * in.strides[1] + grid_x;
// Check if pVal and pVal + 1 are both valid indices
bool condY = (y < in.dims[1] - 1);
@@ -165,14 +165,14 @@ namespace cuda
template<typename Ty, typename Tp, af_interp_type method>
__global__
void approx1_kernel(Param<Ty> out, CParam<Ty> in, CParam<Tp> pos,
- const float offGrid, const int blocksMatX)
+ const float offGrid, const dim_t blocksMatX)
{
- const int idw = blockIdx.y / out.dims[2];
- const int idz = blockIdx.y - idw * out.dims[2];
+ const dim_t idw = blockIdx.y / out.dims[2];
+ const dim_t idz = blockIdx.y - idw * out.dims[2];
- const int idy = blockIdx.x / blocksMatX;
- const int blockIdx_x = blockIdx.x - idy * blocksMatX;
- const int idx = blockIdx_x * blockDim.x + threadIdx.x;
+ const dim_t idy = blockIdx.x / blocksMatX;
+ const dim_t blockIdx_x = blockIdx.x - idy * blocksMatX;
+ const dim_t idx = blockIdx_x * blockDim.x + threadIdx.x;
if (idx >= out.dims[0] || idy >= out.dims[1] ||
idz >= out.dims[2] || idw >= out.dims[3])
@@ -194,16 +194,16 @@ namespace cuda
__global__
void approx2_kernel(Param<Ty> out, CParam<Ty> in,
CParam<Tp> pos, CParam<Tp> qos, const float offGrid,
- const int blocksMatX, const int blocksMatY)
+ const dim_t blocksMatX, const dim_t blocksMatY)
{
- const int idz = blockIdx.x / blocksMatX;
- const int idw = blockIdx.y / blocksMatY;
+ const dim_t idz = blockIdx.x / blocksMatX;
+ const dim_t idw = blockIdx.y / blocksMatY;
- int blockIdx_x = blockIdx.x - idz * blocksMatX;
- int blockIdx_y = blockIdx.y - idw * blocksMatY;
+ dim_t blockIdx_x = blockIdx.x - idz * blocksMatX;
+ dim_t blockIdx_y = blockIdx.y - idw * blocksMatY;
- int idx = threadIdx.x + blockIdx_x * blockDim.x;
- int idy = threadIdx.y + blockIdx_y * blockDim.y;
+ dim_t idx = threadIdx.x + blockIdx_x * blockDim.x;
+ dim_t idy = threadIdx.y + blockIdx_y * blockDim.y;
if (idx >= out.dims[0] || idy >= out.dims[1] ||
idz >= out.dims[2] || idw >= out.dims[3])
@@ -229,7 +229,7 @@ namespace cuda
CParam<Tp> pos, const float offGrid)
{
dim3 threads(THREADS, 1, 1);
- int blocksPerMat = divup(out.dims[0], threads.x);
+ dim_t blocksPerMat = divup(out.dims[0], threads.x);
dim3 blocks(blocksPerMat * out.dims[1], out.dims[2] * out.dims[3]);
CUDA_LAUNCH((approx1_kernel<Ty, Tp, method>), blocks, threads,
@@ -242,8 +242,8 @@ namespace cuda
CParam<Tp> pos, CParam<Tp> qos, const float offGrid)
{
dim3 threads(TX, TY, 1);
- int blocksPerMatX = divup(out.dims[0], threads.x);
- int blocksPerMatY = divup(out.dims[1], threads.y);
+ dim_t blocksPerMatX = divup(out.dims[0], threads.x);
+ dim_t blocksPerMatY = divup(out.dims[1], threads.y);
dim3 blocks(blocksPerMatX * out.dims[2], blocksPerMatY * out.dims[3]);
CUDA_LAUNCH((approx2_kernel<Ty, Tp, method>), blocks, threads,
diff --git a/src/backend/opencl/kernel/approx.hpp b/src/backend/opencl/kernel/approx.hpp
index 6ec637a..f893097 100644
--- a/src/backend/opencl/kernel/approx.hpp
+++ b/src/backend/opencl/kernel/approx.hpp
@@ -87,11 +87,11 @@ namespace opencl
auto approx1Op = make_kernel<Buffer, const KParam, const Buffer, const KParam,
- const Buffer, const KParam, const float, const int>
+ const Buffer, const KParam, const float, const dim_t>
(*approxKernels[device]);
NDRange local(THREADS, 1, 1);
- int blocksPerMat = divup(out.info.dims[0], local[0]);
+ dim_t blocksPerMat = divup(out.info.dims[0], local[0]);
NDRange global(blocksPerMat * local[0] * out.info.dims[1],
out.info.dims[2] * out.info.dims[3] * local[0],
1);
@@ -152,12 +152,12 @@ namespace opencl
auto approx2Op = make_kernel<Buffer, const KParam, const Buffer, const KParam,
const Buffer, const KParam, const Buffer, const KParam,
- const float, const int, const int>
+ const float, const dim_t, const dim_t>
(*approxKernels[device]);
NDRange local(TX, TY, 1);
- int blocksPerMatX = divup(out.info.dims[0], local[0]);
- int blocksPerMatY = divup(out.info.dims[1], local[1]);
+ dim_t blocksPerMatX = divup(out.info.dims[0], local[0]);
+ dim_t blocksPerMatY = divup(out.info.dims[1], local[1]);
NDRange global(blocksPerMatX * local[0] * out.info.dims[2],
blocksPerMatY * local[1] * out.info.dims[3],
1);
diff --git a/src/backend/opencl/kernel/approx1.cl b/src/backend/opencl/kernel/approx1.cl
index 08a6677..41acb04 100644
--- a/src/backend/opencl/kernel/approx1.cl
+++ b/src/backend/opencl/kernel/approx1.cl
@@ -32,15 +32,15 @@ Ty div(Ty a, Tp b) { a.x = a.x / b; a.y = a.y / b; return a; }
///////////////////////////////////////////////////////////////////////////
// nearest-neighbor resampling
///////////////////////////////////////////////////////////////////////////
-void core_nearest1(const int idx, const int idy, const int idz, const int idw,
+void core_nearest1(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
__global Ty *d_out, const KParam out,
__global const Ty *d_in, const KParam in,
__global const Tp *d_pos, const KParam pos,
const float offGrid)
{
- const int omId = idw * out.strides[3] + idz * out.strides[2]
- + idy * out.strides[1] + idx;
- const int pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
+ const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+ + idy * out.strides[1] + idx;
+ const dim_t pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
const Tp pVal = d_pos[pmId];
if (pVal < 0 || in.dims[0] < pVal+1) {
@@ -48,8 +48,8 @@ void core_nearest1(const int idx, const int idy, const int idz, const int idw,
return;
}
- int ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1];
- const int imId = round(pVal) + ioff;
+ dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1];
+ const dim_t imId = round(pVal) + ioff;
Ty y;
set(y, d_in[imId]);
@@ -59,15 +59,15 @@ void core_nearest1(const int idx, const int idy, const int idz, const int idw,
///////////////////////////////////////////////////////////////////////////
// linear resampling
///////////////////////////////////////////////////////////////////////////
-void core_linear1(const int idx, const int idy, const int idz, const int idw,
+void core_linear1(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
__global Ty *d_out, const KParam out,
__global const Ty *d_in, const KParam in,
__global const Tp *d_pos, const KParam pos,
const float offGrid)
{
- const int omId = idw * out.strides[3] + idz * out.strides[2]
- + idy * out.strides[1] + idx;
- const int pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
+ const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+ + idy * out.strides[1] + idx;
+ const dim_t pmId = idx + (pos.dims[1] == 1 ? 0 : idy * pos.strides[1]);
const Tp pVal = d_pos[pmId];
if (pVal < 0 || in.dims[0] < pVal+1) {
@@ -75,10 +75,10 @@ void core_linear1(const int idx, const int idy, const int idz, const int idw,
return;
}
- const int grid_x = floor(pVal); // nearest grid
+ const dim_t grid_x = floor(pVal); // nearest grid
const Tp off_x = pVal - grid_x; // fractional offset
- int ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1] + grid_x;
+ dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + idy * in.strides[1] + grid_x;
// Check if pVal and pVal + 1 are both valid indices
bool cond = (pVal < in.dims[0] - 1);
@@ -104,14 +104,14 @@ __kernel
void approx1_kernel(__global Ty *d_out, const KParam out,
__global const Ty *d_in, const KParam in,
__global const Tp *d_pos, const KParam pos,
- const float offGrid, const int blocksMatX)
+ const float offGrid, const dim_t blocksMatX)
{
- const int idw = get_group_id(1) / out.dims[2];
- const int idz = get_group_id(1) - idw * out.dims[2];
+ const dim_t idw = get_group_id(1) / out.dims[2];
+ const dim_t idz = get_group_id(1) - idw * out.dims[2];
- const int idy = get_group_id(0) / blocksMatX;
- const int blockIdx_x = get_group_id(0) - idy * blocksMatX;
- const int idx = get_local_id(0) + blockIdx_x * get_local_size(0);
+ const dim_t idy = get_group_id(0) / blocksMatX;
+ const dim_t blockIdx_x = get_group_id(0) - idy * blocksMatX;
+ const dim_t idx = get_local_id(0) + blockIdx_x * get_local_size(0);
if(idx >= out.dims[0] ||
idy >= out.dims[1] ||
diff --git a/src/backend/opencl/kernel/approx2.cl b/src/backend/opencl/kernel/approx2.cl
index b6ba02a..4db2508 100644
--- a/src/backend/opencl/kernel/approx2.cl
+++ b/src/backend/opencl/kernel/approx2.cl
@@ -32,19 +32,19 @@ Ty div(Ty a, Tp b) { a.x = a.x / b; a.y = a.y / b; return a; }
///////////////////////////////////////////////////////////////////////////
// nearest-neighbor resampling
///////////////////////////////////////////////////////////////////////////
-void core_nearest2(const int idx, const int idy, const int idz, const int idw,
+void core_nearest2(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
__global Ty *d_out, const KParam out,
__global const Ty *d_in, const KParam in,
__global const Tp *d_pos, const KParam pos,
__global const Tp *d_qos, const KParam qos,
const float offGrid)
{
- const int omId = idw * out.strides[3] + idz * out.strides[2]
- + idy * out.strides[1] + idx;
- const int pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
- + idy * pos.strides[1] + idx;
- const int qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
- + idy * qos.strides[1] + idx;
+ const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+ + idy * out.strides[1] + idx;
+ const dim_t pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
+ + idy * pos.strides[1] + idx;
+ const dim_t qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
+ + idy * qos.strides[1] + idx;
const Tp x = d_pos[pmId], y = d_qos[qmId];
if (x < 0 || y < 0 || in.dims[0] < x+1 || in.dims[1] < y+1) {
@@ -52,9 +52,9 @@ void core_nearest2(const int idx, const int idy, const int idz, const int idw,
return;
}
- const int grid_x = round(x), grid_y = round(y); // nearest grid
- const int imId = idw * in.strides[3] + idz * in.strides[2]
- + grid_y * in.strides[1] + grid_x;
+ const dim_t grid_x = round(x), grid_y = round(y); // nearest grid
+ const dim_t imId = idw * in.strides[3] + idz * in.strides[2]
+ + grid_y * in.strides[1] + grid_x;
Ty z;
set(z, d_in[imId]);
@@ -64,19 +64,19 @@ void core_nearest2(const int idx, const int idy, const int idz, const int idw,
///////////////////////////////////////////////////////////////////////////
// linear resampling
///////////////////////////////////////////////////////////////////////////
-void core_linear2(const int idx, const int idy, const int idz, const int idw,
+void core_linear2(const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw,
__global Ty *d_out, const KParam out,
__global const Ty *d_in, const KParam in,
__global const Tp *d_pos, const KParam pos,
__global const Tp *d_qos, const KParam qos,
const float offGrid)
{
- const int omId = idw * out.strides[3] + idz * out.strides[2]
- + idy * out.strides[1] + idx;
- const int pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
- + idy * pos.strides[1] + idx;
- const int qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
- + idy * qos.strides[1] + idx;
+ const dim_t omId = idw * out.strides[3] + idz * out.strides[2]
+ + idy * out.strides[1] + idx;
+ const dim_t pmId = (pos.dims[2] == 1 ? 0 : idz * pos.strides[2])
+ + idy * pos.strides[1] + idx;
+ const dim_t qmId = (qos.dims[2] == 1 ? 0 : idz * qos.strides[2])
+ + idy * qos.strides[1] + idx;
const Tp x = d_pos[pmId], y = d_qos[qmId];
if (x < 0 || y < 0 || in.dims[0] < x+1 || in.dims[1] < y+1) {
@@ -84,10 +84,10 @@ void core_linear2(const int idx, const int idy, const int idz, const int idw,
return;
}
- const int grid_x = floor(x), grid_y = floor(y); // nearest grid
+ const dim_t grid_x = floor(x), grid_y = floor(y); // nearest grid
const Tp off_x = x - grid_x, off_y = y - grid_y; // fractional offset
- int ioff = idw * in.strides[3] + idz * in.strides[2] + grid_y * in.strides[1] + grid_x;
+ dim_t ioff = idw * in.strides[3] + idz * in.strides[2] + grid_y * in.strides[1] + grid_x;
// Check if pVal and pVal + 1 are both valid indices
bool condY = (y < in.dims[1] - 1);
@@ -122,16 +122,16 @@ void approx2_kernel(__global Ty *d_out, const KParam out,
__global const Ty *d_in, const KParam in,
__global const Tp *d_pos, const KParam pos,
__global const Tp *d_qos, const KParam qos,
- const float offGrid, const int blocksMatX, const int blocksMatY)
+ const float offGrid, const dim_t blocksMatX, const dim_t blocksMatY)
{
- const int idz = get_group_id(0) / blocksMatX;
- const int idw = get_group_id(1) / blocksMatY;
+ const dim_t idz = get_group_id(0) / blocksMatX;
+ const dim_t idw = get_group_id(1) / blocksMatY;
- const int blockIdx_x = get_group_id(0) - idz * blocksMatX;
- const int blockIdx_y = get_group_id(1) - idw * blocksMatY;
+ const dim_t blockIdx_x = get_group_id(0) - idz * blocksMatX;
+ const dim_t blockIdx_y = get_group_id(1) - idw * blocksMatY;
- const int idx = get_local_id(0) + blockIdx_x * get_local_size(0);
- const int idy = get_local_id(1) + blockIdx_y * get_local_size(1);
+ const dim_t idx = get_local_id(0) + blockIdx_x * get_local_size(0);
+ const dim_t idy = get_local_id(1) + blockIdx_y * get_local_size(1);
if(idx >= out.dims[0] ||
idy >= out.dims[1] ||
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list