[arrayfire] 11/284: Async FFT for the CPU backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:13 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit b7c83e800e7e5ef714b9ac63e82b79f818e3b965
Author: Umar Arshad <umar at arrayfire.com>
Date: Mon Sep 21 14:36:05 2015 -0400
Async FFT for the CPU backend
---
src/backend/cpu/Array.cpp | 48 +++++++++++++++++++++++----------------------
src/backend/cpu/copy.cpp | 10 ++++++----
src/backend/cpu/fft.cpp | 39 ++++++++++++++++++++++++++----------
src/backend/cpu/reorder.cpp | 22 +++++++++++++--------
4 files changed, 74 insertions(+), 45 deletions(-)
diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 15515fa..d714fd9 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -14,6 +14,7 @@
#include <TNJ/ScalarNode.hpp>
#include <memory.hpp>
#include <platform.hpp>
+#include <async_queue.hpp>
#include <cstring>
#include <cstddef>
@@ -46,7 +47,6 @@ namespace cpu
}
}
-
template<typename T>
Array<T>::Array(af::dim4 dims, TNJ::Node_ptr n) :
info(-1, dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
@@ -67,40 +67,42 @@ namespace cpu
template<typename T>
void Array<T>::eval()
{
- if (isReady()) return;
+ auto func = [this] {
+ if (isReady()) return;
- this->setId(getActiveDeviceId());
- data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
- T *ptr = data.get();
+ setId(getActiveDeviceId());
+ data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
+ T *ptr = data.get();
- dim4 ostrs = strides();
- dim4 odims = dims();
+ dim4 ostrs = strides();
+ dim4 odims = dims();
- for (int w = 0; w < (int)odims[3]; w++) {
- dim_t offw = w * ostrs[3];
+ for (int w = 0; w < (int)odims[3]; w++) {
+ dim_t offw = w * ostrs[3];
- for (int z = 0; z < (int)odims[2]; z++) {
- dim_t offz = z * ostrs[2] + offw;
+ for (int z = 0; z < (int)odims[2]; z++) {
+ dim_t offz = z * ostrs[2] + offw;
- for (int y = 0; y < (int)odims[1]; y++) {
- dim_t offy = y * ostrs[1] + offz;
+ for (int y = 0; y < (int)odims[1]; y++) {
+ dim_t offy = y * ostrs[1] + offz;
- for (int x = 0; x < (int)odims[0]; x++) {
- dim_t id = x + offy;
+ for (int x = 0; x < (int)odims[0]; x++) {
+ dim_t id = x + offy;
- ptr[id] = *(T *)node->calc(x, y, z, w);
+ ptr[id] = *(T *)node->calc(x, y, z, w);
+ }
}
}
}
- }
-
- ready = true;
+ ready = true;
+ Node_ptr prev = node;
+ prev->reset();
+ // FIXME: Replace the current node in any JIT possible trees with the new BufferNode
+ node.reset();
+ };
- Node_ptr prev = node;
- prev->reset();
- // FIXME: Replace the current node in any JIT possible trees with the new BufferNode
- node.reset();
+ getQueue().enqueue(func);
}
template<typename T>
diff --git a/src/backend/cpu/copy.cpp b/src/backend/cpu/copy.cpp
index 433e718..58773af 100644
--- a/src/backend/cpu/copy.cpp
+++ b/src/backend/cpu/copy.cpp
@@ -67,7 +67,7 @@ namespace cpu
}
template<typename inType, typename outType>
- static void copy(Array<outType> &dst, const Array<inType> &src, outType default_value, double factor)
+ static void copy(Array<outType> dst, const Array<inType> src, outType default_value, double factor)
{
dim4 src_dims = src.dims();
dim4 dst_dims = dst.dims();
@@ -117,7 +117,7 @@ namespace cpu
template<typename T>
void multiply_inplace(Array<T> &in, double val)
{
- getQueue().enqueue(copy<T, T>,in, in, 0, val);
+ getQueue().enqueue(copy<T, T>, in, in, 0, val);
}
template<typename inType, typename outType>
@@ -126,14 +126,16 @@ namespace cpu
outType default_value, double factor)
{
Array<outType> ret = createValueArray<outType>(dims, default_value);
- getQueue().enqueue(copy<inType, outType>,ret, in, outType(default_value), factor);
+ ret.eval();
+ getQueue().sync();
+ getQueue().enqueue(copy<inType, outType>, ret, in, outType(default_value), factor);
return ret;
}
template<typename inType, typename outType>
void copyArray(Array<outType> &out, Array<inType> const &in)
{
- getQueue().enqueue(copy<inType, outType>,out, in, scalar<outType>(0), 1.0);
+ getQueue().enqueue(copy<inType, outType>, out, in, scalar<outType>(0), 1.0);
}
diff --git a/src/backend/cpu/fft.cpp b/src/backend/cpu/fft.cpp
index e41c8a1..7262e6d 100644
--- a/src/backend/cpu/fft.cpp
+++ b/src/backend/cpu/fft.cpp
@@ -16,6 +16,8 @@
#include <fftw3.h>
#include <copy.hpp>
#include <math.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -52,7 +54,7 @@ TRANSFORM(fftwf, cfloat)
TRANSFORM(fftw, cdouble)
template<typename T, int rank, bool direction>
-void fft_inplace(Array<T> &in)
+void fft_inplace_(Array<T> in)
{
int t_dims[rank];
int in_embed[rank];
@@ -90,6 +92,12 @@ void fft_inplace(Array<T> &in)
transform.destroy(plan);
}
+template<typename T, int rank, bool direction>
+void fft_inplace(Array<T> &in)
+{
+ getQueue().enqueue(fft_inplace_<T, rank, direction>, in);
+}
+
template<typename To, typename Ti>
struct fftw_real_transform;
@@ -114,14 +122,9 @@ TRANSFORM_REAL(fftwf, float , cfloat , c2r)
TRANSFORM_REAL(fftw , double, cdouble, c2r)
template<typename Tc, typename Tr, int rank>
-Array<Tc> fft_r2c(const Array<Tr> &in)
+void fft_r2c_(Array<Tc> out, const Array<Tr> in)
{
dim4 idims = in.dims();
- dim4 odims = in.dims();
-
- odims[0] = odims[0] / 2 + 1;
-
- Array<Tc> out = createEmptyArray<Tc>(odims);
int t_dims[rank];
int in_embed[rank];
@@ -157,15 +160,23 @@ Array<Tc> fft_r2c(const Array<Tr> &in)
transform.execute(plan);
transform.destroy(plan);
+}
+
+template<typename Tc, typename Tr, int rank>
+Array<Tc> fft_r2c(const Array<Tr> &in)
+{
+ dim4 odims = in.dims();
+ odims[0] = odims[0] / 2 + 1;
+ Array<Tc> out = createEmptyArray<Tc>(odims);
+
+ getQueue().enqueue(fft_r2c_<Tc, Tr, rank>, out, in);
return out;
}
template<typename Tr, typename Tc, int rank>
-Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims)
+void fft_c2r_(Array<Tr> out, const Array<Tc> in, const dim4 odims)
{
- Array<Tr> out = createEmptyArray<Tr>(odims);
-
int t_dims[rank];
int in_embed[rank];
int out_embed[rank];
@@ -200,6 +211,14 @@ Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims)
transform.execute(plan);
transform.destroy(plan);
+}
+
+template<typename Tr, typename Tc, int rank>
+Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims)
+{
+ Array<Tr> out = createEmptyArray<Tr>(odims);
+ getQueue().enqueue(fft_c2r_<Tr, Tc, rank>, out, in, odims);
+
return out;
}
diff --git a/src/backend/cpu/reorder.cpp b/src/backend/cpu/reorder.cpp
index 42da24e..5e1cd8f 100644
--- a/src/backend/cpu/reorder.cpp
+++ b/src/backend/cpu/reorder.cpp
@@ -11,19 +11,14 @@
#include <reorder.hpp>
#include <stdexcept>
#include <err_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
template<typename T>
- Array<T> reorder(const Array<T> &in, const af::dim4 &rdims)
+ void reorder_(Array<T> out, const Array<T> in, const af::dim4 oDims, const af::dim4 rdims)
{
- const af::dim4 iDims = in.dims();
- af::dim4 oDims(0);
- for(int i = 0; i < 4; i++)
- oDims[i] = iDims[rdims[i]];
-
- Array<T> out = createEmptyArray<T>(oDims);
-
T* outPtr = out.get();
const T* inPtr = in.get();
@@ -53,7 +48,18 @@ namespace cpu
}
}
}
+ }
+ template<typename T>
+ Array<T> reorder(const Array<T> &in, const af::dim4 &rdims)
+ {
+ const af::dim4 iDims = in.dims();
+ af::dim4 oDims(0);
+ for(int i = 0; i < 4; i++)
+ oDims[i] = iDims[rdims[i]];
+
+ Array<T> out = createEmptyArray<T>(oDims);
+ getQueue().enqueue(reorder_<T>, out, in, oDims, rdims);
return out;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list