[arrayfire] 12/34: Fixed histogram cuda/opencl kernels for indexed arrays
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Sep 27 14:46:02 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit 40f8cd1a84b2a3f821de9ceb5f57a0db1c7b3308
Author: pradeep <pradeep at arrayfire.com>
Date: Mon Sep 21 16:27:48 2015 -0400
Fixed histogram cuda/opencl kernels for indexed arrays
Added a unit test for indexed arrays
---
src/api/c/histogram.cpp | 20 ++++++++++++--------
src/backend/cpu/histogram.cpp | 8 +++++---
src/backend/cpu/histogram.hpp | 2 +-
src/backend/cuda/histogram.cu | 7 ++++---
src/backend/cuda/histogram.hpp | 2 +-
src/backend/cuda/kernel/histogram.hpp | 9 +++++----
src/backend/opencl/histogram.cpp | 7 ++++---
src/backend/opencl/histogram.hpp | 2 +-
src/backend/opencl/kernel/histogram.cl | 9 ++++++++-
src/backend/opencl/kernel/histogram.hpp | 5 +++--
test/histogram.cpp | 19 +++++++++++++++++++
11 files changed, 63 insertions(+), 27 deletions(-)
diff --git a/src/api/c/histogram.cpp b/src/api/c/histogram.cpp
index 2d5477e..a7c4be7 100644
--- a/src/api/c/histogram.cpp
+++ b/src/api/c/histogram.cpp
@@ -19,9 +19,13 @@ using namespace detail;
template<typename inType,typename outType>
static inline af_array histogram(const af_array in, const unsigned &nbins,
- const double &minval, const double &maxval)
+ const double &minval, const double &maxval,
+ const bool islinear)
{
- return getHandle(histogram<inType,outType>(getArray<inType>(in),nbins,minval,maxval));
+ if (islinear)
+ return getHandle(histogram<inType,outType, true>(getArray<inType>(in),nbins,minval,maxval));
+ else
+ return getHandle(histogram<inType,outType, false>(getArray<inType>(in),nbins,minval,maxval));
}
af_err af_histogram(af_array *out, const af_array in,
@@ -33,12 +37,12 @@ af_err af_histogram(af_array *out, const af_array in,
af_array output;
switch(type) {
- case f32: output = histogram<float , uint>(in, nbins, minval, maxval); break;
- case f64: output = histogram<double, uint>(in, nbins, minval, maxval); break;
- case b8 : output = histogram<char , uint>(in, nbins, minval, maxval); break;
- case s32: output = histogram<int , uint>(in, nbins, minval, maxval); break;
- case u32: output = histogram<uint , uint>(in, nbins, minval, maxval); break;
- case u8 : output = histogram<uchar , uint>(in, nbins, minval, maxval); break;
+ case f32: output = histogram<float , uint>(in, nbins, minval, maxval, info.isLinear()); break;
+ case f64: output = histogram<double, uint>(in, nbins, minval, maxval, info.isLinear()); break;
+ case b8 : output = histogram<char , uint>(in, nbins, minval, maxval, info.isLinear()); break;
+ case s32: output = histogram<int , uint>(in, nbins, minval, maxval, info.isLinear()); break;
+ case u32: output = histogram<uint , uint>(in, nbins, minval, maxval, info.isLinear()); break;
+ case u8 : output = histogram<uchar , uint>(in, nbins, minval, maxval, info.isLinear()); break;
default : TYPE_ERROR(1, type);
}
std::swap(*out,output);
diff --git a/src/backend/cpu/histogram.cpp b/src/backend/cpu/histogram.cpp
index de38f37..371a8d0 100644
--- a/src/backend/cpu/histogram.cpp
+++ b/src/backend/cpu/histogram.cpp
@@ -18,7 +18,7 @@ using af::dim4;
namespace cpu
{
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval)
{
float step = (maxval - minval)/(float)nbins;
@@ -36,7 +36,8 @@ Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const d
for(dim_t b3 = 0; b3 < outDims[3]; b3++) {
for(dim_t b2 = 0; b2 < outDims[2]; b2++) {
for(dim_t i=0; i<nElems; i++) {
- int bin = (int)((inData[i] - minval) / step);
+ int idx = isLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]);
+ int bin = (int)((inData[idx] - minval) / step);
bin = std::max(bin, 0);
bin = std::min(bin, (int)(nbins - 1));
outData[bin]++;
@@ -50,7 +51,8 @@ Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const d
}
#define INSTANTIATE(in_t,out_t)\
-template Array<out_t> histogram(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
+template Array<out_t> histogram<in_t, out_t, true>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval); \
+template Array<out_t> histogram<in_t, out_t, false>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
INSTANTIATE(float , uint)
INSTANTIATE(double, uint)
diff --git a/src/backend/cpu/histogram.hpp b/src/backend/cpu/histogram.hpp
index 458438f..9a73523 100644
--- a/src/backend/cpu/histogram.hpp
+++ b/src/backend/cpu/histogram.hpp
@@ -12,7 +12,7 @@
namespace cpu
{
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval);
}
diff --git a/src/backend/cuda/histogram.cu b/src/backend/cuda/histogram.cu
index e9a980f..b5e470d 100644
--- a/src/backend/cuda/histogram.cu
+++ b/src/backend/cuda/histogram.cu
@@ -22,7 +22,7 @@ using std::vector;
namespace cuda
{
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval)
{
@@ -44,13 +44,14 @@ Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const d
dim4 minmax_dims(mmNElems*2);
Array<cfloat> minmax = createHostDataArray<cfloat>(minmax_dims, &h_minmax.front());
- kernel::histogram<inType, outType>(out, in, minmax.get(), nbins);
+ kernel::histogram<inType, outType, isLinear>(out, in, minmax.get(), nbins);
return out;
}
#define INSTANTIATE(in_t,out_t)\
-template Array<out_t> histogram(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
+template Array<out_t> histogram<in_t, out_t, true>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval); \
+template Array<out_t> histogram<in_t, out_t, false>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
INSTANTIATE(float , uint)
INSTANTIATE(double, uint)
diff --git a/src/backend/cuda/histogram.hpp b/src/backend/cuda/histogram.hpp
index 8fa4592..0ef5f2e 100644
--- a/src/backend/cuda/histogram.hpp
+++ b/src/backend/cuda/histogram.hpp
@@ -12,7 +12,7 @@
namespace cuda
{
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval);
}
diff --git a/src/backend/cuda/kernel/histogram.hpp b/src/backend/cuda/kernel/histogram.hpp
index 32bee36..10c8b3f 100644
--- a/src/backend/cuda/kernel/histogram.hpp
+++ b/src/backend/cuda/kernel/histogram.hpp
@@ -28,7 +28,7 @@ __forceinline__ __device__ int minimum(int a, int b)
return (a < b ? a : b);
}
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
static __global__
void histogramKernel(Param<outType> out, CParam<inType> in,
const cfloat *d_minmax, int len,
@@ -62,7 +62,8 @@ void histogramKernel(Param<outType> out, CParam<inType> in,
__syncthreads();
for (int row = start; row < end; row += blockDim.x) {
- int bin = (int)((iptr[row] - min) / step);
+ int idx = isLinear ? row : ((row % in.dims[0]) + (row / in.dims[0])*in.strides[1]);
+ int bin = (int)((iptr[idx] - min) / step);
bin = (bin < 0) ? 0 : bin;
bin = (bin >= nbins) ? (nbins-1) : bin;
atomicAdd((shrdMem + bin), 1);
@@ -74,7 +75,7 @@ void histogramKernel(Param<outType> out, CParam<inType> in,
}
}
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
void histogram(Param<outType> out, CParam<inType> in, cfloat *d_minmax, int nbins)
{
dim3 threads(kernel::THREADS_X, 1);
@@ -86,7 +87,7 @@ void histogram(Param<outType> out, CParam<inType> in, cfloat *d_minmax, int nbin
int smem_size = nbins * sizeof(outType);
- CUDA_LAUNCH_SMEM((histogramKernel<inType, outType>), blocks, threads, smem_size,
+ CUDA_LAUNCH_SMEM((histogramKernel<inType, outType, isLinear>), blocks, threads, smem_size,
out, in, d_minmax, nElems, nbins, blk_x);
POST_LAUNCH_CHECK();
diff --git a/src/backend/opencl/histogram.cpp b/src/backend/opencl/histogram.cpp
index fbae44f..03f095a 100644
--- a/src/backend/opencl/histogram.cpp
+++ b/src/backend/opencl/histogram.cpp
@@ -22,7 +22,7 @@ using std::vector;
namespace opencl
{
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval)
{
ARG_ASSERT(1, (nbins<=kernel::MAX_BINS));
@@ -43,13 +43,14 @@ Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const d
dim4 minmax_dims(mmNElems*2);
Array<cfloat> minmax = createHostDataArray<cfloat>(minmax_dims, h_minmax.data());
- kernel::histogram<inType, outType>(out, in, minmax, nbins);
+ kernel::histogram<inType, outType, isLinear>(out, in, minmax, nbins);
return out;
}
#define INSTANTIATE(in_t,out_t)\
- template Array<out_t> histogram(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
+template Array<out_t> histogram<in_t, out_t, true>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval); \
+template Array<out_t> histogram<in_t, out_t, false>(const Array<in_t> &in, const unsigned &nbins, const double &minval, const double &maxval);
INSTANTIATE(float , uint)
INSTANTIATE(double, uint)
diff --git a/src/backend/opencl/histogram.hpp b/src/backend/opencl/histogram.hpp
index 94701f4..17b46f2 100644
--- a/src/backend/opencl/histogram.hpp
+++ b/src/backend/opencl/histogram.hpp
@@ -12,7 +12,7 @@
namespace opencl
{
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval);
}
diff --git a/src/backend/opencl/kernel/histogram.cl b/src/backend/opencl/kernel/histogram.cl
index e6b7eba..756cae6 100644
--- a/src/backend/opencl/kernel/histogram.cl
+++ b/src/backend/opencl/kernel/histogram.cl
@@ -41,7 +41,14 @@ void histogram(__global outType * d_dst,
barrier(CLK_LOCAL_MEM_FENCE);
for (int row = start; row < end; row += get_local_size(0)) {
- int bin = (int)(((float)in[row] - minval) / dx);
+#if defined(IS_LINEAR)
+ int idx = row;
+#else
+ int i0 = row % iInfo.dims[0];
+ int i1 = row / iInfo.dims[0];
+ int idx= i0+i1*iInfo.strides[1];
+#endif
+ int bin = (int)(((float)in[idx] - minval) / dx);
bin = max(bin, 0);
bin = min(bin, (int)nbins-1);
atomic_inc((localMem + bin));
diff --git a/src/backend/opencl/kernel/histogram.hpp b/src/backend/opencl/kernel/histogram.hpp
index 51b8462..88b53b6 100644
--- a/src/backend/opencl/kernel/histogram.hpp
+++ b/src/backend/opencl/kernel/histogram.hpp
@@ -30,7 +30,7 @@ static const unsigned MAX_BINS = 4000;
static const int THREADS_X = 256;
static const int THRD_LOAD = 16;
-template<typename inType, typename outType>
+template<typename inType, typename outType, bool isLinear>
void histogram(Param out, const Param in, const Param minmax, int nbins)
{
try {
@@ -45,7 +45,8 @@ void histogram(Param out, const Param in, const Param minmax, int nbins)
options << " -D inType=" << dtype_traits<inType>::getName()
<< " -D outType=" << dtype_traits<outType>::getName()
<< " -D THRD_LOAD=" << THRD_LOAD;
-
+ if (isLinear)
+ options << " -D IS_LINEAR";
if (std::is_same<inType, double>::value ||
std::is_same<inType, cdouble>::value) {
options << " -D USE_DOUBLE";
diff --git a/test/histogram.cpp b/test/histogram.cpp
index dfae986..446b7da 100644
--- a/test/histogram.cpp
+++ b/test/histogram.cpp
@@ -255,3 +255,22 @@ TEST(histogram, GFOR)
ASSERT_EQ(max<double>(abs(c_ii - b_ii)) < 1E-5, true);
}
}
+
+TEST(histogram, IndexedArray)
+{
+ using namespace af;
+
+ const long int LEN = 32;
+ array A = range(LEN, 2);
+ for (int i=16; i<28; ++i) {
+ A(seq(i, i+3), span) = i/4 - 1;
+ }
+ array B = A(seq(20), span);
+ array C = histogram(B, 4);
+ unsigned out[4];
+ C.host((void*)out);
+ ASSERT_EQ(true, out[0] == 16);
+ ASSERT_EQ(true, out[1] == 8);
+ ASSERT_EQ(true, out[2] == 8);
+ ASSERT_EQ(true, out[3] == 8);
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list