[arrayfire] 38/284: Convert morph & range cpu fns to async calls
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:17 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 258d57364178a49ce8b60add412b2efd99a1633a
Author: pradeep <pradeep at arrayfire.com>
Date: Tue Nov 24 14:52:12 2015 -0500
Convert morph & range cpu fns to async calls
---
src/backend/cpu/morph.cpp | 216 ++++++++++++++++++++++++----------------------
src/backend/cpu/range.cpp | 116 +++++++++++++------------
2 files changed, 172 insertions(+), 160 deletions(-)
diff --git a/src/backend/cpu/morph.cpp b/src/backend/cpu/morph.cpp
index eb2e1de..c64d09b 100644
--- a/src/backend/cpu/morph.cpp
+++ b/src/backend/cpu/morph.cpp
@@ -13,6 +13,8 @@
#include <Array.hpp>
#include <morph.hpp>
#include <algorithm>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -31,108 +33,41 @@ static inline unsigned getIdx(const dim4 &strides,
template<typename T, bool isDilation>
Array<T> morph(const Array<T> &in, const Array<T> &mask)
{
- const dim4 dims = in.dims();
- const dim4 window = mask.dims();
- const dim_t R0 = window[0]/2;
- const dim_t R1 = window[1]/2;
- const dim4 istrides = in.strides();
- const dim4 fstrides = mask.strides();
-
- Array<T> out = createEmptyArray<T>(dims);
- const dim4 ostrides = out.strides();
-
- T* outData = out.get();
- const T* inData = in.get();
- const T* filter = mask.get();
-
- for(dim_t b3=0; b3<dims[3]; ++b3) {
- for(dim_t b2=0; b2<dims[2]; ++b2) {
- // either channels or batch is handled by outer most loop
- for(dim_t j=0; j<dims[1]; ++j) {
- // j steps along 2nd dimension
- for(dim_t i=0; i<dims[0]; ++i) {
- // i steps along 1st dimension
- T filterResult = inData[ getIdx(istrides, i, j) ];
-
- // wj,wi steps along 2nd & 1st dimensions of filter window respectively
- for(dim_t wj=0; wj<window[1]; wj++) {
- for(dim_t wi=0; wi<window[0]; wi++) {
-
- dim_t offj = j+wj-R1;
- dim_t offi = i+wi-R0;
-
- T maskValue = filter[ getIdx(fstrides, wi, wj) ];
-
- if ((maskValue > (T)0) && offi>=0 && offj>=0 && offi<dims[0] && offj<dims[1]) {
-
- T inValue = inData[ getIdx(istrides, offi, offj) ];
-
- if (isDilation)
- filterResult = std::max(filterResult, inValue);
- else
- filterResult = std::min(filterResult, inValue);
- }
-
- } // window 1st dimension loop ends here
- } // filter window loop ends here
-
- outData[ getIdx(ostrides, i, j) ] = filterResult;
- } //1st dimension loop ends here
- } // 2nd dimension loop ends here
-
- // next iteration will be next batch if any
- outData += ostrides[2];
- inData += istrides[2];
- }
- }
-
- return out;
-}
-
-template<typename T, bool isDilation>
-Array<T> morph3d(const Array<T> &in, const Array<T> &mask)
-{
- const dim4 dims = in.dims();
- const dim4 window = mask.dims();
- const dim_t R0 = window[0]/2;
- const dim_t R1 = window[1]/2;
- const dim_t R2 = window[2]/2;
- const dim4 istrides = in.strides();
- const dim4 fstrides = mask.strides();
- const dim_t bCount = dims[3];
-
- Array<T> out = createEmptyArray<T>(dims);
- const dim4 ostrides = out.strides();
-
- T* outData = out.get();
- const T* inData = in.get();
- const T* filter = mask.get();
-
- for(dim_t batchId=0; batchId<bCount; ++batchId) {
- // either channels or batch is handled by outer most loop
- for(dim_t k=0; k<dims[2]; ++k) {
- // k steps along 3rd dimension
- for(dim_t j=0; j<dims[1]; ++j) {
- // j steps along 2nd dimension
- for(dim_t i=0; i<dims[0]; ++i) {
- // i steps along 1st dimension
- T filterResult = inData[ getIdx(istrides, i, j, k) ];
-
- // wk, wj,wi steps along 2nd & 1st dimensions of filter window respectively
- for(dim_t wk=0; wk<window[2]; wk++) {
+ Array<T> out = createEmptyArray<T>(in.dims());
+
+ auto func = [=] (Array<T> out, const Array<T> in, const Array<T> mask) {
+ const dim4 ostrides = out.strides();
+ const dim4 istrides = in.strides();
+ const dim4 fstrides = mask.strides();
+ const dim4 dims = in.dims();
+ const dim4 window = mask.dims();
+ T* outData = out.get();
+ const T* inData = in.get();
+ const T* filter = mask.get();
+ const dim_t R0 = window[0]/2;
+ const dim_t R1 = window[1]/2;
+
+ for(dim_t b3=0; b3<dims[3]; ++b3) {
+ for(dim_t b2=0; b2<dims[2]; ++b2) {
+ // either channels or batch is handled by outer most loop
+ for(dim_t j=0; j<dims[1]; ++j) {
+ // j steps along 2nd dimension
+ for(dim_t i=0; i<dims[0]; ++i) {
+ // i steps along 1st dimension
+ T filterResult = inData[ getIdx(istrides, i, j) ];
+
+ // wj,wi steps along 2nd & 1st dimensions of filter window respectively
for(dim_t wj=0; wj<window[1]; wj++) {
for(dim_t wi=0; wi<window[0]; wi++) {
- dim_t offk = k+wk-R2;
dim_t offj = j+wj-R1;
dim_t offi = i+wi-R0;
- T maskValue = filter[ getIdx(fstrides, wi, wj, wk) ];
+ T maskValue = filter[ getIdx(fstrides, wi, wj) ];
- if ((maskValue > (T)0) && offi>=0 && offj>=0 && offk>=0 &&
- offi<dims[0] && offj<dims[1] && offk<dims[2]) {
+ if ((maskValue > (T)0) && offi>=0 && offj>=0 && offi<dims[0] && offj<dims[1]) {
- T inValue = inData[ getIdx(istrides, offi, offj, offk) ];
+ T inValue = inData[ getIdx(istrides, offi, offj) ];
if (isDilation)
filterResult = std::max(filterResult, inValue);
@@ -141,17 +76,88 @@ Array<T> morph3d(const Array<T> &in, const Array<T> &mask)
}
} // window 1st dimension loop ends here
- } // window 1st dimension loop ends here
- }// filter window loop ends here
-
- outData[ getIdx(ostrides, i, j, k) ] = filterResult;
- } //1st dimension loop ends here
- } // 2nd dimension loop ends here
- } // 3rd dimension loop ends here
- // next iteration will be next batch if any
- outData += ostrides[3];
- inData += istrides[3];
- }
+ } // filter window loop ends here
+
+ outData[ getIdx(ostrides, i, j) ] = filterResult;
+ } //1st dimension loop ends here
+ } // 2nd dimension loop ends here
+
+ // next iteration will be next batch if any
+ outData += ostrides[2];
+ inData += istrides[2];
+ }
+ }
+ };
+ getQueue().enqueue(func, out, in, mask);
+
+ return out;
+}
+
+template<typename T, bool isDilation>
+Array<T> morph3d(const Array<T> &in, const Array<T> &mask)
+{
+ Array<T> out = createEmptyArray<T>(in.dims());
+
+ auto func = [=] (Array<T> out, const Array<T> in, const Array<T> mask) {
+ const dim4 dims = in.dims();
+ const dim4 window = mask.dims();
+ const dim_t R0 = window[0]/2;
+ const dim_t R1 = window[1]/2;
+ const dim_t R2 = window[2]/2;
+ const dim4 istrides = in.strides();
+ const dim4 fstrides = mask.strides();
+ const dim_t bCount = dims[3];
+ const dim4 ostrides = out.strides();
+ T* outData = out.get();
+ const T* inData = in.get();
+ const T* filter = mask.get();
+
+ for(dim_t batchId=0; batchId<bCount; ++batchId) {
+ // either channels or batch is handled by outer most loop
+ for(dim_t k=0; k<dims[2]; ++k) {
+ // k steps along 3rd dimension
+ for(dim_t j=0; j<dims[1]; ++j) {
+ // j steps along 2nd dimension
+ for(dim_t i=0; i<dims[0]; ++i) {
+ // i steps along 1st dimension
+ T filterResult = inData[ getIdx(istrides, i, j, k) ];
+
+ // wk, wj,wi steps along 2nd & 1st dimensions of filter window respectively
+ for(dim_t wk=0; wk<window[2]; wk++) {
+ for(dim_t wj=0; wj<window[1]; wj++) {
+ for(dim_t wi=0; wi<window[0]; wi++) {
+
+ dim_t offk = k+wk-R2;
+ dim_t offj = j+wj-R1;
+ dim_t offi = i+wi-R0;
+
+ T maskValue = filter[ getIdx(fstrides, wi, wj, wk) ];
+
+ if ((maskValue > (T)0) && offi>=0 && offj>=0 && offk>=0 &&
+ offi<dims[0] && offj<dims[1] && offk<dims[2]) {
+
+ T inValue = inData[ getIdx(istrides, offi, offj, offk) ];
+
+ if (isDilation)
+ filterResult = std::max(filterResult, inValue);
+ else
+ filterResult = std::min(filterResult, inValue);
+ }
+
+ } // window 1st dimension loop ends here
+ } // window 1st dimension loop ends here
+ }// filter window loop ends here
+
+ outData[ getIdx(ostrides, i, j, k) ] = filterResult;
+ } //1st dimension loop ends here
+ } // 2nd dimension loop ends here
+ } // 3rd dimension loop ends here
+ // next iteration will be next batch if any
+ outData += ostrides[3];
+ inData += istrides[3];
+ }
+ };
+ getQueue().enqueue(func, out, in, mask);
return out;
}
diff --git a/src/backend/cpu/range.cpp b/src/backend/cpu/range.cpp
index eabf3a1..1fa46b2 100644
--- a/src/backend/cpu/range.cpp
+++ b/src/backend/cpu/range.cpp
@@ -14,74 +14,80 @@
#include <err_cpu.hpp>
#include <algorithm>
#include <numeric>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
- ///////////////////////////////////////////////////////////////////////////
- // Kernel Functions
- ///////////////////////////////////////////////////////////////////////////
- template<typename T, int dim>
- void range(T *out, const dim4 &dims, const dim4 &strides)
- {
- for(dim_t w = 0; w < dims[3]; w++) {
- dim_t offW = w * strides[3];
- for(dim_t z = 0; z < dims[2]; z++) {
- dim_t offWZ = offW + z * strides[2];
- for(dim_t y = 0; y < dims[1]; y++) {
- dim_t offWZY = offWZ + y * strides[1];
- for(dim_t x = 0; x < dims[0]; x++) {
- dim_t id = offWZY + x;
- if(dim == 0) {
- out[id] = x;
- } else if(dim == 1) {
- out[id] = y;
- } else if(dim == 2) {
- out[id] = z;
- } else if(dim == 3) {
- out[id] = w;
- }
+///////////////////////////////////////////////////////////////////////////
+// Kernel Functions
+///////////////////////////////////////////////////////////////////////////
+template<typename T, int dim>
+void range(Array<T> output)
+{
+ T* out = output.get();
+
+ const dim4 dims = output.dims();
+ const dim4 strides = output.strides();
+
+ for(dim_t w = 0; w < dims[3]; w++) {
+ dim_t offW = w * strides[3];
+ for(dim_t z = 0; z < dims[2]; z++) {
+ dim_t offWZ = offW + z * strides[2];
+ for(dim_t y = 0; y < dims[1]; y++) {
+ dim_t offWZY = offWZ + y * strides[1];
+ for(dim_t x = 0; x < dims[0]; x++) {
+ dim_t id = offWZY + x;
+ if(dim == 0) {
+ out[id] = x;
+ } else if(dim == 1) {
+ out[id] = y;
+ } else if(dim == 2) {
+ out[id] = z;
+ } else if(dim == 3) {
+ out[id] = w;
}
}
}
}
}
+}
- ///////////////////////////////////////////////////////////////////////////
- // Wrapper Functions
- ///////////////////////////////////////////////////////////////////////////
- template<typename T>
- Array<T> range(const dim4& dims, const int seq_dim)
- {
- // Set dimension along which the sequence should be
- // Other dimensions are simply tiled
- int _seq_dim = seq_dim;
- if(seq_dim < 0) {
- _seq_dim = 0; // column wise sequence
- }
-
- Array<T> out = createEmptyArray<T>(dims);
- switch(_seq_dim) {
- case 0: range<T, 0>(out.get(), out.dims(), out.strides()); break;
- case 1: range<T, 1>(out.get(), out.dims(), out.strides()); break;
- case 2: range<T, 2>(out.get(), out.dims(), out.strides()); break;
- case 3: range<T, 3>(out.get(), out.dims(), out.strides()); break;
- default : AF_ERROR("Invalid rep selection", AF_ERR_ARG);
- }
-
+///////////////////////////////////////////////////////////////////////////
+// Wrapper Functions
+///////////////////////////////////////////////////////////////////////////
+template<typename T>
+Array<T> range(const dim4& dims, const int seq_dim)
+{
+ // Set dimension along which the sequence should be
+ // Other dimensions are simply tiled
+ int _seq_dim = seq_dim;
+ if(seq_dim < 0) {
+ _seq_dim = 0; // column wise sequence
+ }
- return out;
+ Array<T> out = createEmptyArray<T>(dims);
+ switch(_seq_dim) {
+ case 0: getQueue().enqueue(range<T, 0>, out); break;
+ case 1: getQueue().enqueue(range<T, 1>, out); break;
+ case 2: getQueue().enqueue(range<T, 2>, out); break;
+ case 3: getQueue().enqueue(range<T, 3>, out); break;
+ default : AF_ERROR("Invalid rep selection", AF_ERR_ARG);
}
+ return out;
+}
+
#define INSTANTIATE(T) \
template Array<T> range<T>(const af::dim4 &dims, const int seq_dims); \
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(int)
- INSTANTIATE(uint)
- INSTANTIATE(intl)
- INSTANTIATE(uintl)
- INSTANTIATE(uchar)
- INSTANTIATE(ushort)
- INSTANTIATE(short)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(ushort)
+INSTANTIATE(short)
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list