[arrayfire] 29/284: Converted wrap & unwrap cpu fns to async calls
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:16 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit d0223f980047dfee315569eaf359105377e978b7
Author: pradeep <pradeep at arrayfire.com>
Date: Fri Nov 20 15:48:10 2015 -0500
Converted wrap & unwrap cpu fns to async calls
---
src/backend/cpu/unwrap.cpp | 173 +++++++++++++++++++++++----------------------
src/backend/cpu/wrap.cpp | 171 ++++++++++++++++++++++----------------------
2 files changed, 175 insertions(+), 169 deletions(-)
diff --git a/src/backend/cpu/unwrap.cpp b/src/backend/cpu/unwrap.cpp
index f9c25f9..efb46be 100644
--- a/src/backend/cpu/unwrap.cpp
+++ b/src/backend/cpu/unwrap.cpp
@@ -13,112 +13,115 @@
#include <err_cpu.hpp>
#include <dispatch.hpp>
#include <math.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
- template<typename T, int d>
- void unwrap_dim(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy,
- const dim_t px, const dim_t py)
- {
- dim_t nx = (idims[0] + 2 * px - wx) / sx + 1;
-
- for(dim_t w = 0; w < odims[3]; w++) {
- for(dim_t z = 0; z < odims[2]; z++) {
-
- dim_t cOut = w * ostrides[3] + z * ostrides[2];
- dim_t cIn = w * istrides[3] + z * istrides[2];
- const T* iptr = inPtr + cIn;
- T* optr_= outPtr + cOut;
-
- for(dim_t col = 0; col < odims[d]; col++) {
- // Offset output ptr
- T* optr = optr_ + col * ostrides[d];
-
- // Calculate input window index
- dim_t winy = (col / nx);
- dim_t winx = (col % nx);
-
- dim_t startx = winx * sx;
- dim_t starty = winy * sy;
-
- dim_t spx = startx - px;
- dim_t spy = starty - py;
-
- // Short cut condition ensuring all values within input dimensions
- bool cond = (spx >= 0 && spx + wx < idims[0] && spy >= 0 && spy + wy < idims[1]);
-
- for(dim_t y = 0; y < wy; y++) {
- for(dim_t x = 0; x < wx; x++) {
- dim_t xpad = spx + x;
- dim_t ypad = spy + y;
-
- dim_t oloc = (y * wx + x);
- if (d == 0) oloc *= ostrides[1];
-
- if(cond || (xpad >= 0 && xpad < idims[0] && ypad >= 0 && ypad < idims[1])) {
- dim_t iloc = (ypad * istrides[1] + xpad * istrides[0]);
- optr[oloc] = iptr[iloc];
- } else {
- optr[oloc] = scalar<T>(0.0);
- }
+
+template<typename T, int d>
+void unwrap_dim(Array<T> out, const Array<T> in, const dim_t wx, const dim_t wy,
+ const dim_t sx, const dim_t sy, const dim_t px, const dim_t py)
+{
+ const T *inPtr = in.get();
+ T *outPtr = out.get();
+
+ af::dim4 idims = in.dims();
+ af::dim4 odims = out.dims();
+ af::dim4 istrides = in.strides();
+ af::dim4 ostrides = out.strides();
+
+ dim_t nx = (idims[0] + 2 * px - wx) / sx + 1;
+
+ for(dim_t w = 0; w < odims[3]; w++) {
+ for(dim_t z = 0; z < odims[2]; z++) {
+
+ dim_t cOut = w * ostrides[3] + z * ostrides[2];
+ dim_t cIn = w * istrides[3] + z * istrides[2];
+ const T* iptr = inPtr + cIn;
+ T* optr_= outPtr + cOut;
+
+ for(dim_t col = 0; col < odims[d]; col++) {
+ // Offset output ptr
+ T* optr = optr_ + col * ostrides[d];
+
+ // Calculate input window index
+ dim_t winy = (col / nx);
+ dim_t winx = (col % nx);
+
+ dim_t startx = winx * sx;
+ dim_t starty = winy * sy;
+
+ dim_t spx = startx - px;
+ dim_t spy = starty - py;
+
+ // Short cut condition ensuring all values within input dimensions
+ bool cond = (spx >= 0 && spx + wx < idims[0] && spy >= 0 && spy + wy < idims[1]);
+
+ for(dim_t y = 0; y < wy; y++) {
+ for(dim_t x = 0; x < wx; x++) {
+ dim_t xpad = spx + x;
+ dim_t ypad = spy + y;
+
+ dim_t oloc = (y * wx + x);
+ if (d == 0) oloc *= ostrides[1];
+
+ if(cond || (xpad >= 0 && xpad < idims[0] && ypad >= 0 && ypad < idims[1])) {
+ dim_t iloc = (ypad * istrides[1] + xpad * istrides[0]);
+ optr[oloc] = iptr[iloc];
+ } else {
+ optr[oloc] = scalar<T>(0.0);
}
}
}
}
}
}
+}
- template<typename T>
- Array<T> unwrap(const Array<T> &in, const dim_t wx, const dim_t wy,
- const dim_t sx, const dim_t sy, const dim_t px, const dim_t py, const bool is_column)
- {
- af::dim4 idims = in.dims();
-
- dim_t nx = (idims[0] + 2 * px - wx) / sx + 1;
- dim_t ny = (idims[1] + 2 * py - wy) / sy + 1;
-
- af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
+template<typename T>
+Array<T> unwrap(const Array<T> &in, const dim_t wx, const dim_t wy,
+ const dim_t sx, const dim_t sy, const dim_t px, const dim_t py, const bool is_column)
+{
+ af::dim4 idims = in.dims();
- if (!is_column) {
- std::swap(odims[0], odims[1]);
- }
+ dim_t nx = (idims[0] + 2 * px - wx) / sx + 1;
+ dim_t ny = (idims[1] + 2 * py - wy) / sy + 1;
- // Create output placeholder
- Array<T> outArray = createEmptyArray<T>(odims);
+ af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
- // Get pointers to raw data
- const T *inPtr = in.get();
- T *outPtr = outArray.get();
+ if (!is_column) {
+ std::swap(odims[0], odims[1]);
+ }
- af::dim4 ostrides = outArray.strides();
- af::dim4 istrides = in.strides();
+ Array<T> outArray = createEmptyArray<T>(odims);
- if (is_column) {
- unwrap_dim<T, 1>(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
- } else {
- unwrap_dim<T, 0>(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
- }
- return outArray;
+ if (is_column) {
+ getQueue().enqueue(unwrap_dim<T, 1>, outArray, in, wx, wy, sx, sy, px, py);
+ } else {
+ getQueue().enqueue(unwrap_dim<T, 0>, outArray, in, wx, wy, sx, sy, px, py);
}
+ return outArray;
+}
+
#define INSTANTIATE(T) \
template Array<T> unwrap<T> (const Array<T> &in, const dim_t wx, const dim_t wy, \
const dim_t sx, const dim_t sy, const dim_t px, const dim_t py, const bool is_column);
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(cfloat)
- INSTANTIATE(cdouble)
- INSTANTIATE(int)
- INSTANTIATE(uint)
- INSTANTIATE(intl)
- INSTANTIATE(uintl)
- INSTANTIATE(uchar)
- INSTANTIATE(char)
- INSTANTIATE(short)
- INSTANTIATE(ushort)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
+
}
diff --git a/src/backend/cpu/wrap.cpp b/src/backend/cpu/wrap.cpp
index a04a6f5..3ff54de 100644
--- a/src/backend/cpu/wrap.cpp
+++ b/src/backend/cpu/wrap.cpp
@@ -13,92 +13,95 @@
#include <err_cpu.hpp>
#include <dispatch.hpp>
#include <math.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
- template<typename T, int d>
- void wrap_dim(T *outPtr, const T *inPtr,
- const af::dim4 &odims, const af::dim4 &idims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const dim_t wx, const dim_t wy,
- const dim_t sx, const dim_t sy,
- const dim_t px, const dim_t py)
- {
- dim_t nx = (odims[0] + 2 * px - wx) / sx + 1;
-
- for(dim_t w = 0; w < idims[3]; w++) {
- for(dim_t z = 0; z < idims[2]; z++) {
-
- dim_t cIn = w * istrides[3] + z * istrides[2];
- dim_t cOut = w * ostrides[3] + z * ostrides[2];
- const T* iptr_ = inPtr + cIn;
- T* optr= outPtr + cOut;
-
- for(dim_t col = 0; col < idims[d]; col++) {
- // Offset output ptr
- const T* iptr = iptr_ + col * istrides[d];
-
- // Calculate input window index
- dim_t winy = (col / nx);
- dim_t winx = (col % nx);
-
- dim_t startx = winx * sx;
- dim_t starty = winy * sy;
-
- dim_t spx = startx - px;
- dim_t spy = starty - py;
-
- // Short cut condition ensuring all values within input dimensions
- bool cond = (spx >= 0 && spx + wx < odims[0] && spy >= 0 && spy + wy < odims[1]);
-
- for(dim_t y = 0; y < wy; y++) {
- for(dim_t x = 0; x < wx; x++) {
- dim_t xpad = spx + x;
- dim_t ypad = spy + y;
-
- dim_t iloc = (y * wx + x);
- if (d == 0) iloc *= istrides[1];
-
- if(cond || (xpad >= 0 && xpad < odims[0] && ypad >= 0 && ypad < odims[1])) {
- dim_t oloc = (ypad * ostrides[1] + xpad * ostrides[0]);
- // FIXME: When using threads, atomize this
- optr[oloc] += iptr[iloc];
- }
+template<typename T, int d>
+void wrap_dim(Array<T> out, const Array<T> in, const dim_t wx, const dim_t wy,
+ const dim_t sx, const dim_t sy, const dim_t px, const dim_t py)
+{
+ const T *inPtr = in.get();
+ T *outPtr = out.get();
+
+ af::dim4 idims = in.dims();
+ af::dim4 odims = out.dims();
+ af::dim4 istrides = in.strides();
+ af::dim4 ostrides = out.strides();
+
+ dim_t nx = (odims[0] + 2 * px - wx) / sx + 1;
+
+ for(dim_t w = 0; w < idims[3]; w++) {
+ for(dim_t z = 0; z < idims[2]; z++) {
+
+ dim_t cIn = w * istrides[3] + z * istrides[2];
+ dim_t cOut = w * ostrides[3] + z * ostrides[2];
+ const T* iptr_ = inPtr + cIn;
+ T* optr= outPtr + cOut;
+
+ for(dim_t col = 0; col < idims[d]; col++) {
+ // Offset output ptr
+ const T* iptr = iptr_ + col * istrides[d];
+
+ // Calculate input window index
+ dim_t winy = (col / nx);
+ dim_t winx = (col % nx);
+
+ dim_t startx = winx * sx;
+ dim_t starty = winy * sy;
+
+ dim_t spx = startx - px;
+ dim_t spy = starty - py;
+
+ // Short cut condition ensuring all values within input dimensions
+ bool cond = (spx >= 0 && spx + wx < odims[0] && spy >= 0 && spy + wy < odims[1]);
+
+ for(dim_t y = 0; y < wy; y++) {
+ for(dim_t x = 0; x < wx; x++) {
+ dim_t xpad = spx + x;
+ dim_t ypad = spy + y;
+
+ dim_t iloc = (y * wx + x);
+ if (d == 0) iloc *= istrides[1];
+
+ if(cond || (xpad >= 0 && xpad < odims[0] && ypad >= 0 && ypad < odims[1])) {
+ dim_t oloc = (ypad * ostrides[1] + xpad * ostrides[0]);
+ // FIXME: When using threads, atomize this
+ optr[oloc] += iptr[iloc];
}
}
}
}
}
}
+}
- template<typename T>
- Array<T> wrap(const Array<T> &in,
- const dim_t ox, const dim_t oy,
- const dim_t wx, const dim_t wy,
- const dim_t sx, const dim_t sy,
- const dim_t px, const dim_t py,
- const bool is_column)
- {
- af::dim4 idims = in.dims();
- af::dim4 odims(ox, oy, idims[2], idims[3]);
- Array<T> out = createValueArray<T>(odims, scalar<T>(0));
-
- const T *inPtr = in.get();
- T *outPtr = out.get();
-
- af::dim4 istrides = in.strides();
- af::dim4 ostrides = out.strides();
-
- if (is_column) {
- wrap_dim<T, true >(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
- } else {
- wrap_dim<T, false>(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
- }
+template<typename T>
+Array<T> wrap(const Array<T> &in,
+ const dim_t ox, const dim_t oy,
+ const dim_t wx, const dim_t wy,
+ const dim_t sx, const dim_t sy,
+ const dim_t px, const dim_t py,
+ const bool is_column)
+{
+ af::dim4 idims = in.dims();
+ af::dim4 odims(ox, oy, idims[2], idims[3]);
+
+ Array<T> out = createValueArray<T>(odims, scalar<T>(0));
+ out.eval();
+ in.eval();
- return out;
+ if (is_column) {
+ getQueue().enqueue(wrap_dim<T, 1>, out, in, wx, wy, sx, sy, px, py);
+ } else {
+ getQueue().enqueue(wrap_dim<T, 0>, out, in, wx, wy, sx, sy, px, py);
}
+ return out;
+}
+
#define INSTANTIATE(T) \
template Array<T> wrap<T> (const Array<T> &in, \
@@ -108,17 +111,17 @@ namespace cpu
const dim_t px, const dim_t py, \
const bool is_column);
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(cfloat)
- INSTANTIATE(cdouble)
- INSTANTIATE(int)
- INSTANTIATE(uint)
- INSTANTIATE(intl)
- INSTANTIATE(uintl)
- INSTANTIATE(uchar)
- INSTANTIATE(char)
- INSTANTIATE(short)
- INSTANTIATE(ushort)
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list