[arrayfire] 36/284: Converted resize & shift cpu fns to async calls
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:16 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 14e9d3180ecc4dde44bf66dba743524408030ba7
Author: pradeep <pradeep at arrayfire.com>
Date: Tue Nov 24 13:29:57 2015 -0500
Converted resize & shift cpu fns to async calls
---
src/backend/cpu/resize.cpp | 348 ++++++++++++++++++++++-----------------------
src/backend/cpu/shift.cpp | 60 ++++----
2 files changed, 205 insertions(+), 203 deletions(-)
diff --git a/src/backend/cpu/resize.cpp b/src/backend/cpu/resize.cpp
index 8c4da58..160ed46 100644
--- a/src/backend/cpu/resize.cpp
+++ b/src/backend/cpu/resize.cpp
@@ -14,209 +14,205 @@
#include <math.hpp>
#include <types.hpp>
#include <af/traits.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
- /**
- * noop function for round to avoid compilation
- * issues due to lack of this function in C90 based
- * compilers, it is only present in C99 and C++11
- *
- * This is not a full fledged implementation, this function
- * is to be used only for positive numbers, i m using it here
- * for calculating dimensions of arrays
- */
- dim_t round2int(float value)
- {
- return (dim_t)(value+0.5f);
- }
-
- using std::conditional;
- using std::is_same;
+/**
+ * noop function for round to avoid compilation
+ * issues due to lack of this function in C90 based
+ * compilers, it is only present in C99 and C++11
+ *
+ * This is not a full fledged implementation, this function
+ * is to be used only for positive numbers, i m using it here
+ * for calculating dimensions of arrays
+ */
+dim_t round2int(float value)
+{
+ return (dim_t)(value+0.5f);
+}
- template<typename T>
- using wtype_t = typename conditional<is_same<T, double>::value, double, float>::type;
+using std::conditional;
+using std::is_same;
- template<typename T>
- using vtype_t = typename conditional<is_complex<T>::value,
- T, wtype_t<T>
- >::type;
+template<typename T>
+using wtype_t = typename conditional<is_same<T, double>::value, double, float>::type;
- template<typename T, af_interp_type method>
- struct resize_op
- {
- void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const dim_t x, const dim_t y)
- {
- return;
- }
- };
+template<typename T>
+using vtype_t = typename conditional<is_complex<T>::value,
+ T, wtype_t<T>
+ >::type;
- template<typename T>
- struct resize_op<T, AF_INTERP_NEAREST>
+template<typename T, af_interp_type method>
+struct resize_op
+{
+ void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
+ const af::dim4 &ostrides, const af::dim4 &istrides,
+ const dim_t x, const dim_t y)
{
- void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const dim_t x, const dim_t y)
- {
- // Compute Indices
- dim_t i_x = round2int((float)x / (odims[0] / (float)idims[0]));
- dim_t i_y = round2int((float)y / (odims[1] / (float)idims[1]));
-
- if (i_x >= idims[0]) i_x = idims[0] - 1;
- if (i_y >= idims[1]) i_y = idims[1] - 1;
-
- dim_t i_off = i_y * istrides[1] + i_x;
- dim_t o_off = y * ostrides[1] + x;
- // Copy values from all channels
- for(dim_t w = 0; w < odims[3]; w++) {
- dim_t wost = w * ostrides[3];
- dim_t wist = w * istrides[3];
- for(dim_t z = 0; z < odims[2]; z++) {
- outPtr[o_off + z * ostrides[2] + wost] = inPtr[i_off + z * istrides[2] + wist];
- }
- }
- }
- };
+ return;
+ }
+};
- template<typename T>
- struct resize_op<T, AF_INTERP_BILINEAR>
+template<typename T>
+struct resize_op<T, AF_INTERP_NEAREST>
+{
+ void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
+ const af::dim4 &ostrides, const af::dim4 &istrides,
+ const dim_t x, const dim_t y)
{
- void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const dim_t x, const dim_t y)
- {
- // Compute Indices
- float f_x = (float)x / (odims[0] / (float)idims[0]);
- float f_y = (float)y / (odims[1] / (float)idims[1]);
-
- dim_t i1_x = floor(f_x);
- dim_t i1_y = floor(f_y);
-
- if (i1_x >= idims[0]) i1_x = idims[0] - 1;
- if (i1_y >= idims[1]) i1_y = idims[1] - 1;
-
- float b = f_x - i1_x;
- float a = f_y - i1_y;
-
- dim_t i2_x = (i1_x + 1 >= idims[0] ? idims[0] - 1 : i1_x + 1);
- dim_t i2_y = (i1_y + 1 >= idims[1] ? idims[1] - 1 : i1_y + 1);
-
- typedef typename dtype_traits<T>::base_type BT;
- typedef wtype_t<BT> WT;
- typedef vtype_t<T> VT;
-
- dim_t o_off = y * ostrides[1] + x;
- // Copy values from all channels
- for(dim_t w = 0; w < odims[3]; w++) {
- dim_t wst = w * istrides[3];
- for(dim_t z = 0; z < odims[2]; z++) {
- dim_t zst = z * istrides[2];
- dim_t channel_off = zst + wst;
- VT p1 = inPtr[i1_y * istrides[1] + i1_x + channel_off];
- VT p2 = inPtr[i2_y * istrides[1] + i1_x + channel_off];
- VT p3 = inPtr[i1_y * istrides[1] + i2_x + channel_off];
- VT p4 = inPtr[i2_y * istrides[1] + i2_x + channel_off];
-
- outPtr[o_off + z * ostrides[2] + w * ostrides[3]] =
- scalar<WT>((1.0f - a) * (1.0f - b)) * p1 +
- scalar<WT>(( a ) * (1.0f - b)) * p2 +
- scalar<WT>((1.0f - a) * ( b )) * p3 +
- scalar<WT>(( a ) * ( b )) * p4;
- }
+ // Compute Indices
+ dim_t i_x = round2int((float)x / (odims[0] / (float)idims[0]));
+ dim_t i_y = round2int((float)y / (odims[1] / (float)idims[1]));
+
+ if (i_x >= idims[0]) i_x = idims[0] - 1;
+ if (i_y >= idims[1]) i_y = idims[1] - 1;
+
+ dim_t i_off = i_y * istrides[1] + i_x;
+ dim_t o_off = y * ostrides[1] + x;
+ // Copy values from all channels
+ for(dim_t w = 0; w < odims[3]; w++) {
+ dim_t wost = w * ostrides[3];
+ dim_t wist = w * istrides[3];
+ for(dim_t z = 0; z < odims[2]; z++) {
+ outPtr[o_off + z * ostrides[2] + wost] = inPtr[i_off + z * istrides[2] + wist];
}
}
- };
+ }
+};
- template<typename T>
- struct resize_op<T, AF_INTERP_LOWER>
+template<typename T>
+struct resize_op<T, AF_INTERP_BILINEAR>
+{
+ void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
+ const af::dim4 &ostrides, const af::dim4 &istrides,
+ const dim_t x, const dim_t y)
{
- void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const dim_t x, const dim_t y)
- {
- // Compute Indices
- dim_t i_x = floor((float)x / (odims[0] / (float)idims[0]));
- dim_t i_y = floor((float)y / (odims[1] / (float)idims[1]));
-
- if (i_x >= idims[0]) i_x = idims[0] - 1;
- if (i_y >= idims[1]) i_y = idims[1] - 1;
-
- dim_t i_off = i_y * istrides[1] + i_x;
- dim_t o_off = y * ostrides[1] + x;
- // Copy values from all channels
- for(dim_t w = 0; w < odims[3]; w++) {
- dim_t wost = w * ostrides[3];
- dim_t wist = w * istrides[3];
- for(dim_t z = 0; z < odims[2]; z++) {
- outPtr[o_off + z * ostrides[2] + wost] = inPtr[i_off + z * istrides[2] + wist];
- }
+ // Compute Indices
+ float f_x = (float)x / (odims[0] / (float)idims[0]);
+ float f_y = (float)y / (odims[1] / (float)idims[1]);
+
+ dim_t i1_x = floor(f_x);
+ dim_t i1_y = floor(f_y);
+
+ if (i1_x >= idims[0]) i1_x = idims[0] - 1;
+ if (i1_y >= idims[1]) i1_y = idims[1] - 1;
+
+ float b = f_x - i1_x;
+ float a = f_y - i1_y;
+
+ dim_t i2_x = (i1_x + 1 >= idims[0] ? idims[0] - 1 : i1_x + 1);
+ dim_t i2_y = (i1_y + 1 >= idims[1] ? idims[1] - 1 : i1_y + 1);
+
+ typedef typename dtype_traits<T>::base_type BT;
+ typedef wtype_t<BT> WT;
+ typedef vtype_t<T> VT;
+
+ dim_t o_off = y * ostrides[1] + x;
+ // Copy values from all channels
+ for(dim_t w = 0; w < odims[3]; w++) {
+ dim_t wst = w * istrides[3];
+ for(dim_t z = 0; z < odims[2]; z++) {
+ dim_t zst = z * istrides[2];
+ dim_t channel_off = zst + wst;
+ VT p1 = inPtr[i1_y * istrides[1] + i1_x + channel_off];
+ VT p2 = inPtr[i2_y * istrides[1] + i1_x + channel_off];
+ VT p3 = inPtr[i1_y * istrides[1] + i2_x + channel_off];
+ VT p4 = inPtr[i2_y * istrides[1] + i2_x + channel_off];
+
+ outPtr[o_off + z * ostrides[2] + w * ostrides[3]] =
+ scalar<WT>((1.0f - a) * (1.0f - b)) * p1 +
+ scalar<WT>(( a ) * (1.0f - b)) * p2 +
+ scalar<WT>((1.0f - a) * ( b )) * p3 +
+ scalar<WT>(( a ) * ( b )) * p4;
}
}
- };
+ }
+};
- template<typename T, af_interp_type method>
- void resize_(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
- const af::dim4 &ostrides, const af::dim4 &istrides)
+template<typename T>
+struct resize_op<T, AF_INTERP_LOWER>
+{
+ void operator()(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
+ const af::dim4 &ostrides, const af::dim4 &istrides,
+ const dim_t x, const dim_t y)
{
- resize_op<T, method> op;
- for(dim_t y = 0; y < odims[1]; y++) {
- for(dim_t x = 0; x < odims[0]; x++) {
- op(outPtr, inPtr, odims, idims, ostrides, istrides, x, y);
+ // Compute Indices
+ dim_t i_x = floor((float)x / (odims[0] / (float)idims[0]));
+ dim_t i_y = floor((float)y / (odims[1] / (float)idims[1]));
+
+ if (i_x >= idims[0]) i_x = idims[0] - 1;
+ if (i_y >= idims[1]) i_y = idims[1] - 1;
+
+ dim_t i_off = i_y * istrides[1] + i_x;
+ dim_t o_off = y * ostrides[1] + x;
+ // Copy values from all channels
+ for(dim_t w = 0; w < odims[3]; w++) {
+ dim_t wost = w * ostrides[3];
+ dim_t wist = w * istrides[3];
+ for(dim_t z = 0; z < odims[2]; z++) {
+ outPtr[o_off + z * ostrides[2] + wost] = inPtr[i_off + z * istrides[2] + wist];
}
}
}
+};
- template<typename T>
- Array<T> resize(const Array<T> &in, const dim_t odim0, const dim_t odim1,
- const af_interp_type method)
- {
- af::dim4 idims = in.dims();
- af::dim4 odims(odim0, odim1, idims[2], idims[3]);
-
- // Create output placeholder
- Array<T> outArray = createValueArray(odims, (T)0);
-
- // Get pointers to raw data
- const T *inPtr = in.get();
- T *outPtr = outArray.get();
-
- af::dim4 ostrides = outArray.strides();
- af::dim4 istrides = in.strides();
-
- switch(method) {
- case AF_INTERP_NEAREST:
- resize_<T, AF_INTERP_NEAREST>(outPtr, inPtr, odims, idims, ostrides, istrides);
- break;
- case AF_INTERP_BILINEAR:
- resize_<T, AF_INTERP_BILINEAR>(outPtr, inPtr, odims, idims, ostrides, istrides);
- break;
- case AF_INTERP_LOWER:
- resize_<T, AF_INTERP_LOWER>(outPtr, inPtr, odims, idims, ostrides, istrides);
- break;
- default:
- break;
+template<typename T, af_interp_type method>
+void resize_(Array<T> out, const Array<T> in)
+{
+ af::dim4 idims = in.dims();
+ af::dim4 odims = out.dims();
+ const T *inPtr = in.get();
+ T *outPtr = out.get();
+ af::dim4 ostrides = out.strides();
+ af::dim4 istrides = in.strides();
+
+ resize_op<T, method> op;
+ for(dim_t y = 0; y < odims[1]; y++) {
+ for(dim_t x = 0; x < odims[0]; x++) {
+ op(outPtr, inPtr, odims, idims, ostrides, istrides, x, y);
}
- return outArray;
}
+}
+template<typename T>
+Array<T> resize(const Array<T> &in, const dim_t odim0, const dim_t odim1,
+ const af_interp_type method)
+{
+ af::dim4 idims = in.dims();
+ af::dim4 odims(odim0, odim1, idims[2], idims[3]);
+ // Create output placeholder
+ Array<T> out = createValueArray(odims, (T)0);
+ out.eval();
+ in.eval();
+
+ switch(method) {
+ case AF_INTERP_NEAREST:
+ getQueue().enqueue(resize_<T, AF_INTERP_NEAREST>, out, in); break;
+ case AF_INTERP_BILINEAR:
+ getQueue().enqueue(resize_<T, AF_INTERP_BILINEAR>, out, in); break;
+ case AF_INTERP_LOWER:
+ getQueue().enqueue(resize_<T, AF_INTERP_LOWER>, out, in); break;
+ default: break;
+ }
+ return out;
+}
-#define INSTANTIATE(T) \
+#define INSTANTIATE(T) \
template Array<T> resize<T> (const Array<T> &in, const dim_t odim0, const dim_t odim1, \
const af_interp_type method);
-
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(cfloat)
- INSTANTIATE(cdouble)
- INSTANTIATE(int)
- INSTANTIATE(uint)
- INSTANTIATE(intl)
- INSTANTIATE(uintl)
- INSTANTIATE(uchar)
- INSTANTIATE(char)
- INSTANTIATE(short)
- INSTANTIATE(ushort)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cpu/shift.cpp b/src/backend/cpu/shift.cpp
index 05cac4c..6a2b939 100644
--- a/src/backend/cpu/shift.cpp
+++ b/src/backend/cpu/shift.cpp
@@ -12,27 +12,32 @@
#include <stdexcept>
#include <err_cpu.hpp>
#include <cassert>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
- static inline dim_t simple_mod(const dim_t i, const dim_t dim)
- {
- return (i < dim) ? i : (i - dim);
- }
+static inline dim_t simple_mod(const dim_t i, const dim_t dim)
+{
+ return (i < dim) ? i : (i - dim);
+}
- template<typename T>
- Array<T> shift(const Array<T> &in, const int sdims[4])
- {
- const af::dim4 iDims = in.dims();
- af::dim4 oDims = iDims;
+template<typename T>
+Array<T> shift(const Array<T> &in, const int sdims[4])
+{
+ Array<T> out = createEmptyArray<T>(in.dims());
+ out.eval();
+ in.eval();
+ const af::dim4 temp(sdims[0], sdims[1], sdims[2], sdims[3]);
- Array<T> out = createEmptyArray<T>(oDims);
+ auto func = [=] (Array<T> out, const Array<T> in, const af::dim4 sdims) {
T* outPtr = out.get();
const T* inPtr = in.get();
- const af::dim4 ist = in.strides();
- const af::dim4 ost = out.strides();
+ const af::dim4 oDims = out.dims();
+ const af::dim4 ist = in.strides();
+ const af::dim4 ost = out.strides();
int sdims_[4];
// Need to do this because we are mapping output to input in the kernel
@@ -65,24 +70,25 @@ namespace cpu
}
}
}
+ };
+ getQueue().enqueue(func, out, in, temp);
- return out;
- }
+ return out;
+}
#define INSTANTIATE(T) \
template Array<T> shift<T>(const Array<T> &in, const int sdims[4]); \
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(cfloat)
- INSTANTIATE(cdouble)
- INSTANTIATE(int)
- INSTANTIATE(uint)
- INSTANTIATE(intl)
- INSTANTIATE(uintl)
- INSTANTIATE(uchar)
- INSTANTIATE(char)
- INSTANTIATE(short)
- INSTANTIATE(ushort)
-
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list