[arrayfire] 204/408: FEAT/TEST: Adding R2C and C2R FFT transforms for all backends
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:58 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 155293f8875365de38715e34aa96eb6ab7d3d932
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Thu Aug 6 23:08:15 2015 -0400
FEAT/TEST: Adding R2C and C2R FFT transforms for all backends
- These transforms use lower memory foot prints, exploiting symmetry
- Added relevant tests
---
include/af/signal.h | 26 +++++++++
src/api/c/fft.cpp | 101 +++++++++++++++++++++++++++++++++++
src/api/c/fft_common.hpp | 42 +++++++++++++++
src/api/cpp/fft.cpp | 101 +++++++++++++++++++++++++++++++++++
src/backend/cpu/fft.cpp | 127 ++++++++++++++++++++++++++++++++++++++++++--
src/backend/cpu/fft.hpp | 6 +++
src/backend/cuda/copy.cu | 1 +
src/backend/cuda/fft.cpp | 107 +++++++++++++++++++++++++++++++++++--
src/backend/cuda/fft.hpp | 6 +++
src/backend/cuda/memory.cpp | 11 ++++
src/backend/opencl/fft.cpp | 125 +++++++++++++++++++++++++++++++++++++++----
src/backend/opencl/fft.hpp | 6 +++
test/fft_real.cpp | 120 +++++++++++++++++++++++++++++++++++++++++
13 files changed, 762 insertions(+), 17 deletions(-)
diff --git a/include/af/signal.h b/include/af/signal.h
index fbbd277..35edb79 100644
--- a/include/af/signal.h
+++ b/include/af/signal.h
@@ -370,6 +370,20 @@ AFAPI array idft(const array& in, const dim4 outDims);
*/
AFAPI array idft(const array& in);
+template<int rank>
+array fftR2C(const array &in,
+ const dim4& dims,
+ const double norm_factor = 0);
+
+
+template<int rank>
+array fftR2C(const array &in,
+ const double norm_factor = 0);
+
+template<int rank>
+array fftC2R(const array &in, bool is_odd = false,
+ const double norm_factor = 0);
+
/**
C++ Interface for convolution any(one through three) dimensional data
@@ -753,6 +767,18 @@ AFAPI af_err af_ifft3(af_array *out, const af_array in, const double norm_factor
*/
AFAPI af_err af_ifft3_inplace(af_array in, const double norm_factor);
+AFAPI af_err af_fft_r2c (af_array *out, const af_array in, const double norm_factor, const dim_t pad0);
+
+AFAPI af_err af_fft2_r2c(af_array *out, const af_array in, const double norm_factor, const dim_t pad0, const dim_t pad1);
+
+AFAPI af_err af_fft3_r2c(af_array *out, const af_array in, const double norm_factor, const dim_t pad0, const dim_t pad1, const dim_t pad2);
+
+AFAPI af_err af_fft_c2r (af_array *out, const af_array in, const double norm_factor, const bool is_odd);
+
+AFAPI af_err af_fft2_c2r(af_array *out, const af_array in, const double norm_factor, const bool is_odd);
+
+AFAPI af_err af_fft3_c2r(af_array *out, const af_array in, const double norm_factor, const bool is_odd);
+
/**
C Interface for convolution on one dimensional data
diff --git a/src/api/c/fft.cpp b/src/api/c/fft.cpp
index 1bc3df6..d472cdb 100644
--- a/src/api/c/fft.cpp
+++ b/src/api/c/fft.cpp
@@ -146,3 +146,104 @@ af_err af_ifft3_inplace(af_array in, const double norm_factor)
{
return fft_inplace<3, false>(in, norm_factor);
}
+
+template<typename inType, typename outType, int rank>
+static af_array fft_r2c(const af_array in, const double norm_factor,
+ const dim_t npad, const dim_t * const pad)
+{
+ return getHandle(fft_r2c<inType, outType, rank>(getArray<inType>(in),
+ norm_factor, npad, pad));
+}
+
+template<int rank>
+static af_err fft_r2c(af_array *out, const af_array in, const double norm_factor, const dim_t npad, const dim_t * const pad)
+{
+ try {
+ ArrayInfo info = getInfo(in);
+ af_dtype type = info.getType();
+ af::dim4 dims = info.dims();
+
+ DIM_ASSERT(1, (dims.ndims()>=rank));
+
+ af_array output;
+ switch(type) {
+ case f32: output = fft_r2c<float , cfloat , rank>(in, norm_factor, npad, pad); break;
+ case f64: output = fft_r2c<double, cdouble , rank>(in, norm_factor, npad, pad); break;
+ default: {
+ TYPE_ERROR(1, type);
+ }
+ }
+ std::swap(*out,output);
+ }
+ CATCHALL;
+
+ return AF_SUCCESS;
+}
+
+af_err af_fft_r2c(af_array *out, const af_array in, const double norm_factor, const dim_t pad0)
+{
+ const dim_t pad[1] = {pad0};
+ return fft_r2c<1>(out, in, norm_factor, (pad0>0?1:0), pad);
+}
+
+af_err af_fft2_r2c(af_array *out, const af_array in, const double norm_factor, const dim_t pad0, const dim_t pad1)
+{
+ const dim_t pad[2] = {pad0, pad1};
+ return fft_r2c<2>(out, in, norm_factor, (pad0>0&&pad1>0?2:0), pad);
+}
+
+af_err af_fft3_r2c(af_array *out, const af_array in, const double norm_factor, const dim_t pad0, const dim_t pad1, const dim_t pad2)
+{
+ const dim_t pad[3] = {pad0, pad1, pad2};
+ return fft_r2c<3>(out, in, norm_factor, (pad0>0&&pad1>0&&pad2>0?3:0), pad);
+}
+
+
+template<typename inType, typename outType, int rank>
+static af_array fft_c2r(const af_array in, const double norm_factor,
+ const dim4 &odims)
+{
+ return getHandle(fft_c2r<inType, outType, rank>(getArray<inType>(in),
+ norm_factor, odims));
+}
+
+template<int rank>
+static af_err fft_c2r(af_array *out, const af_array in, const double norm_factor, const bool is_odd)
+{
+ try {
+ ArrayInfo info = getInfo(in);
+ af_dtype type = info.getType();
+ af::dim4 idims = info.dims();
+
+ DIM_ASSERT(1, (idims.ndims()>=rank));
+
+ dim4 odims = idims;
+ odims[0] = 2 * (odims[0] - 1) + (is_odd ? 1 : 0);
+
+ af_array output;
+ switch(type) {
+ case c32: output = fft_c2r<cfloat , float , rank>(in, norm_factor, odims); break;
+ case c64: output = fft_c2r<cdouble, double , rank>(in, norm_factor, odims); break;
+ default: TYPE_ERROR(1, type);
+ }
+ std::swap(*out,output);
+ }
+ CATCHALL;
+
+ return AF_SUCCESS;
+}
+
+af_err af_fft_c2r(af_array *out, const af_array in, const double norm_factor, const bool is_odd)
+{
+ return fft_c2r<1>(out, in, norm_factor, is_odd);
+}
+
+af_err af_fft2_c2r(af_array *out, const af_array in, const double norm_factor, const bool is_odd)
+{
+ return fft_c2r<2>(out, in, norm_factor, is_odd);
+}
+
+af_err af_fft3_c2r(af_array *out, const af_array in, const double norm_factor, const bool is_odd)
+{
+ return fft_c2r<3>(out, in, norm_factor, is_odd);
+}
diff --git a/src/api/c/fft_common.hpp b/src/api/c/fft_common.hpp
index 8c4de85..bc6af36 100644
--- a/src/api/c/fft_common.hpp
+++ b/src/api/c/fft_common.hpp
@@ -34,3 +34,45 @@ Array<outType> fft(const Array<inType> input, const double norm_factor,
return output;
}
+
+template<typename inType, typename outType, int rank>
+Array<outType> fft_r2c(const Array<inType> input, const double norm_factor,
+ const dim_t npad, const dim_t * const pad)
+{
+ dim4 idims = input.dims();
+
+ bool is_pad = false;
+ for (int i = 0; i < npad; i++) {
+ is_pad |= (pad[i] != idims[i]);
+ }
+
+ Array<inType> tmp = input;
+
+ if (is_pad) {
+ dim4 pdims(1);
+ computePaddedDims(pdims, input.dims(), npad, pad);
+ tmp = padArray<inType, inType>(input, pdims, scalar<inType>(0), norm_factor);
+ }
+
+ Array<outType> output = fft_r2c<outType, inType, rank>(tmp);
+ if (!is_pad && norm_factor != 1) {
+ // Normalize input because tmp was not normalized
+ multiply_inplace(output, norm_factor);
+ }
+
+ return output;
+}
+
+template<typename inType, typename outType, int rank>
+Array<outType> fft_c2r(const Array<inType> input, const double norm_factor,
+ const dim4 &odims)
+{
+ Array<outType> output = fft_c2r<outType, inType, rank>(input, odims);
+
+ if (norm_factor != 1) {
+ // Normalize input because tmp was not normalized
+ multiply_inplace(output, norm_factor);
+ }
+
+ return output;
+}
diff --git a/src/api/cpp/fft.cpp b/src/api/cpp/fft.cpp
index fe92549..1fe4a09 100644
--- a/src/api/cpp/fft.cpp
+++ b/src/api/cpp/fft.cpp
@@ -180,4 +180,105 @@ void ifft3InPlace(array& in, const double norm_factor)
AF_THROW(af_ifft3_inplace(in.get(), norm));
}
+template<>
+AFAPI array fftR2C<1>(const array &in, const dim4 &dims,
+ const double norm_factor)
+{
+ af_array res;
+ AF_THROW(af_fft_r2c(&res, in.get(), norm_factor == 0 ? 1.0 : norm_factor, dims[0]));
+ return array(res);
+}
+
+template<>
+AFAPI array fftR2C<2>(const array &in, const dim4 &dims,
+ const double norm_factor)
+{
+ af_array res;
+ AF_THROW(af_fft2_r2c(&res, in.get(), norm_factor == 0 ? 1.0 : norm_factor, dims[0], dims[1]));
+ return array(res);
+}
+
+template<>
+AFAPI array fftR2C<3>(const array &in, const dim4 &dims,
+ const double norm_factor)
+{
+ af_array res;
+ AF_THROW(af_fft3_r2c(&res, in.get(), norm_factor == 0 ? 1.0 : norm_factor,
+ dims[0], dims[1], dims[2]));
+ return array(res);
+}
+
+inline dim_t getOrigDim(dim_t d, bool is_odd)
+{
+ return 2 * (d - 1) + (is_odd ? 1 : 0);
+}
+
+template<>
+AFAPI array fftC2R<1>(const array &in, const bool is_odd,
+ const double norm_factor)
+{
+ double norm = norm_factor;
+
+ if (norm == 0) {
+ dim4 idims = in.dims();
+ dim_t dim0 = getOrigDim(idims[0], is_odd);
+ norm = 1.0/dim0;
+ }
+
+ af_array res;
+ AF_THROW(af_fft_c2r(&res, in.get(), norm, is_odd));
+ return array(res);
+}
+
+template<>
+AFAPI array fftC2R<2>(const array &in, const bool is_odd,
+ const double norm_factor)
+{
+ double norm = norm_factor;
+
+ if (norm == 0) {
+ dim4 idims = in.dims();
+ dim_t dim0 = getOrigDim(idims[0], is_odd);
+ dim_t dim1 = idims[1];
+ norm = 1.0/(dim0 * dim1);
+ }
+
+
+ af_array res;
+ AF_THROW(af_fft2_c2r(&res, in.get(), norm, is_odd));
+ return array(res);
+}
+
+template<>
+AFAPI array fftC2R<3>(const array &in, const bool is_odd,
+ const double norm_factor)
+{
+ double norm = norm_factor;
+
+ if (norm == 0) {
+ dim4 idims = in.dims();
+ dim_t dim0 = getOrigDim(idims[0], is_odd);
+ dim_t dim1 = idims[1];
+ dim_t dim2 = idims[2];
+ norm = 1.0/(dim0 * dim1 * dim2);
+ }
+
+ af_array res;
+ AF_THROW(af_fft3_c2r(&res, in.get(), norm, is_odd));
+ return array(res);
+}
+
+#define FFT_REAL(rank) \
+ template<> \
+ AFAPI array fftR2C<rank>(const array &in, \
+ const double norm_factor) \
+ { \
+ return fftR2C<rank>(in, in.dims(), norm_factor); \
+ } \
+ \
+
+FFT_REAL(1)
+FFT_REAL(2)
+FFT_REAL(3)
+
}
diff --git a/src/backend/cpu/fft.cpp b/src/backend/cpu/fft.cpp
index b933ec4..e41c8a1 100644
--- a/src/backend/cpu/fft.cpp
+++ b/src/backend/cpu/fft.cpp
@@ -54,12 +54,12 @@ TRANSFORM(fftw, cdouble)
template<typename T, int rank, bool direction>
void fft_inplace(Array<T> &in)
{
- int in_dims[rank];
+ int t_dims[rank];
int in_embed[rank];
const dim4 idims = in.dims();
- computeDims<rank>(in_dims , idims);
+ computeDims<rank>(t_dims , idims);
computeDims<rank>(in_embed , in.getDataDims());
const dim4 istrides = in.strides();
@@ -75,7 +75,7 @@ void fft_inplace(Array<T> &in)
}
plan = transform.create(rank,
- in_dims,
+ t_dims,
(int)batch,
(ctype_t *)in.get(),
in_embed, (int)istrides[0],
@@ -88,7 +88,119 @@ void fft_inplace(Array<T> &in)
transform.execute(plan);
transform.destroy(plan);
+}
+
+template<typename To, typename Ti>
+struct fftw_real_transform;
+
+#define TRANSFORM_REAL(PRE, To, Ti, POST) \
+ template<> \
+ struct fftw_real_transform<To, Ti> \
+ { \
+ typedef PRE##_plan plan_t; \
+ typedef PRE##_complex ctype_t; \
+ \
+ template<typename... Args> \
+ plan_t create(Args... args) \
+ { return PRE##_plan_many_dft_##POST(args...); } \
+ void execute(plan_t plan) { return PRE##_execute(plan); } \
+ void destroy(plan_t plan) { return PRE##_destroy_plan(plan); } \
+ }; \
+
+
+TRANSFORM_REAL(fftwf, cfloat , float , r2c)
+TRANSFORM_REAL(fftw , cdouble, double, r2c)
+TRANSFORM_REAL(fftwf, float , cfloat , c2r)
+TRANSFORM_REAL(fftw , double, cdouble, c2r)
+
+template<typename Tc, typename Tr, int rank>
+Array<Tc> fft_r2c(const Array<Tr> &in)
+{
+ dim4 idims = in.dims();
+ dim4 odims = in.dims();
+
+ odims[0] = odims[0] / 2 + 1;
+
+ Array<Tc> out = createEmptyArray<Tc>(odims);
+
+ int t_dims[rank];
+ int in_embed[rank];
+ int out_embed[rank];
+
+ computeDims<rank>(t_dims , idims);
+ computeDims<rank>(in_embed , in.getDataDims());
+ computeDims<rank>(out_embed , out.getDataDims());
+
+ const dim4 istrides = in.strides();
+ const dim4 ostrides = out.strides();
+
+ typedef typename fftw_real_transform<Tc, Tr>::ctype_t ctype_t;
+ typename fftw_real_transform<Tc, Tr>::plan_t plan;
+
+ fftw_real_transform<Tc, Tr> transform;
+
+ int batch = 1;
+ for (int i = rank; i < 4; i++) {
+ batch *= idims[i];
+ }
+
+ plan = transform.create(rank,
+ t_dims,
+ (int)batch,
+ (Tr *)in.get(),
+ in_embed, (int)istrides[0],
+ (int)istrides[rank],
+ (ctype_t *)out.get(),
+ out_embed, (int)ostrides[0],
+ (int)ostrides[rank],
+ FFTW_ESTIMATE);
+ transform.execute(plan);
+ transform.destroy(plan);
+
+ return out;
+}
+
+template<typename Tr, typename Tc, int rank>
+Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims)
+{
+ Array<Tr> out = createEmptyArray<Tr>(odims);
+
+ int t_dims[rank];
+ int in_embed[rank];
+ int out_embed[rank];
+
+ computeDims<rank>(t_dims , odims);
+ computeDims<rank>(in_embed , in.getDataDims());
+ computeDims<rank>(out_embed , out.getDataDims());
+
+ const dim4 istrides = in.strides();
+ const dim4 ostrides = out.strides();
+
+ typedef typename fftw_real_transform<Tr, Tc>::ctype_t ctype_t;
+ typename fftw_real_transform<Tr, Tc>::plan_t plan;
+
+ fftw_real_transform<Tr, Tc> transform;
+
+ int batch = 1;
+ for (int i = rank; i < 4; i++) {
+ batch *= odims[i];
+ }
+
+ plan = transform.create(rank,
+ t_dims,
+ (int)batch,
+ (ctype_t *)in.get(),
+ in_embed, (int)istrides[0],
+ (int)istrides[rank],
+ (Tr *)out.get(),
+ out_embed, (int)ostrides[0],
+ (int)ostrides[rank],
+ FFTW_ESTIMATE);
+
+ transform.execute(plan);
+ transform.destroy(plan);
+ return out;
}
#define INSTANTIATE(T) \
@@ -102,5 +214,14 @@ void fft_inplace(Array<T> &in)
INSTANTIATE(cfloat )
INSTANTIATE(cdouble)
+#define INSTANTIATE_REAL(Tr, Tc) \
+ template Array<Tc> fft_r2c<Tc, Tr, 1>(const Array<Tr> &in); \
+ template Array<Tc> fft_r2c<Tc, Tr, 2>(const Array<Tr> &in); \
+ template Array<Tc> fft_r2c<Tc, Tr, 3>(const Array<Tr> &in); \
+ template Array<Tr> fft_c2r<Tr, Tc, 1>(const Array<Tc> &in, const dim4 &odims); \
+ template Array<Tr> fft_c2r<Tr, Tc, 2>(const Array<Tc> &in, const dim4 &odims); \
+ template Array<Tr> fft_c2r<Tr, Tc, 3>(const Array<Tc> &in, const dim4 &odims); \
+ INSTANTIATE_REAL(float , cfloat )
+ INSTANTIATE_REAL(double, cdouble)
}
diff --git a/src/backend/cpu/fft.hpp b/src/backend/cpu/fft.hpp
index 0414ecb..02f4c9a 100644
--- a/src/backend/cpu/fft.hpp
+++ b/src/backend/cpu/fft.hpp
@@ -14,4 +14,10 @@ namespace cpu
template<typename T, int rank, bool direction>
void fft_inplace(Array<T> &in);
+
+template<typename Tc, typename Tr, int rank>
+Array<Tc> fft_r2c(const Array<Tr> &in);
+
+template<typename Tr, typename Tc, int rank>
+Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims);
}
diff --git a/src/backend/cuda/copy.cu b/src/backend/cuda/copy.cu
index 0358bfa..43e132c 100644
--- a/src/backend/cuda/copy.cu
+++ b/src/backend/cuda/copy.cu
@@ -70,6 +70,7 @@ namespace cuda
ARG_ASSERT(1, (in.ndims() == dims.ndims()));
Array<outType> ret = createEmptyArray<outType>(dims);
kernel::copy<inType, outType>(ret, in, in.ndims(), default_value, factor);
+ CUDA_CHECK(cudaDeviceSynchronize());
return ret;
}
diff --git a/src/backend/cuda/fft.cpp b/src/backend/cuda/fft.cpp
index ff90cb4..f5f8543 100644
--- a/src/backend/cuda/fft.cpp
+++ b/src/backend/cuda/fft.cpp
@@ -103,11 +103,13 @@ void find_cufft_plan(cufftHandle &plan, int rank, int *n,
break;
}
}
+
// return mHandles[plan_index] if plan_index valid
if (plan_index!=-1) {
plan = planner.mHandles[plan_index];
return;
}
+
// otherwise create a new plan and set it at mAvailSlotIndex
// and finally set it to output plan variable
int slot_index = planner.mAvailSlotIndex;
@@ -116,6 +118,7 @@ void find_cufft_plan(cufftHandle &plan, int rank, int *n,
cufftHandle temp;
cufftResult res = cufftPlanMany(&temp, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch);
+
// If plan creation fails, clean up the memory we hold on to and try again
if (res != CUFFT_SUCCESS) {
garbageCollect();
@@ -145,6 +148,26 @@ struct cufft_transform;
CUFFT_FUNC(cfloat , C2C)
CUFFT_FUNC(cdouble, Z2Z)
+template<typename To, typename Ti>
+struct cufft_real_transform;
+
+#define CUFFT_REAL_FUNC(To, Ti, TRANSFORM_TYPE) \
+ template<> \
+ struct cufft_real_transform<To, Ti> \
+ { \
+ enum { type = CUFFT_##TRANSFORM_TYPE }; \
+ cufftResult \
+ operator() (cufftHandle plan, Ti *in, To *out) { \
+ return cufftExec##TRANSFORM_TYPE(plan, in, out); \
+ } \
+ };
+
+CUFFT_REAL_FUNC(cfloat , float , R2C)
+CUFFT_REAL_FUNC(cdouble, double, D2Z)
+
+CUFFT_REAL_FUNC(float , cfloat , C2R)
+CUFFT_REAL_FUNC(double, cdouble, Z2D)
+
template<int rank>
void computeDims(int rdims[rank], const dim4 &idims)
{
@@ -159,10 +182,10 @@ void fft_inplace(Array<T> &in)
const dim4 idims = in.dims();
const dim4 istrides = in.strides();
- int in_dims[rank];
+ int t_dims[rank];
int in_embed[rank];
- computeDims<rank>(in_dims, idims);
+ computeDims<rank>(t_dims, idims);
computeDims<rank>(in_embed, in.getDataDims());
int batch = 1;
@@ -171,7 +194,7 @@ void fft_inplace(Array<T> &in)
}
cufftHandle plan;
- find_cufft_plan(plan, rank, in_dims,
+ find_cufft_plan(plan, rank, t_dims,
in_embed , istrides[0], istrides[rank],
in_embed , istrides[0], istrides[rank],
(cufftType)cufft_transform<T>::type, batch);
@@ -180,6 +203,74 @@ void fft_inplace(Array<T> &in)
CUFFT_CHECK(transform(plan, (T *)in.get(), in.get(), direction ? CUFFT_FORWARD : CUFFT_INVERSE));
}
+template<typename Tc, typename Tr, int rank>
+Array<Tc> fft_r2c(const Array<Tr> &in)
+{
+ dim4 idims = in.dims();
+ dim4 odims = in.dims();
+
+ odims[0] = odims[0] / 2 + 1;
+
+ Array<Tc> out = createEmptyArray<Tc>(odims);
+
+ int t_dims[rank];
+ int in_embed[rank], out_embed[rank];
+
+ computeDims<rank>(t_dims, idims);
+ computeDims<rank>(in_embed, in.getDataDims());
+ computeDims<rank>(out_embed, out.getDataDims());
+
+ int batch = 1;
+ for (int i = rank; i < 4; i++) {
+ batch *= idims[i];
+ }
+
+ dim4 istrides = in.strides();
+ dim4 ostrides = out.strides();
+
+ cufftHandle plan;
+ find_cufft_plan(plan, rank, t_dims,
+ in_embed , istrides[0], istrides[rank],
+ out_embed , ostrides[0], ostrides[rank],
+ (cufftType)cufft_real_transform<Tc, Tr>::type, batch);
+
+ cufft_real_transform<Tc, Tr> transform;
+ CUFFT_CHECK(transform(plan, (Tr *)in.get(), out.get()));
+ return out;
+}
+
+template<typename Tr, typename Tc, int rank>
+Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims)
+{
+ Array<Tr> out = createEmptyArray<Tr>(odims);
+
+ int t_dims[rank];
+ int in_embed[rank], out_embed[rank];
+
+ computeDims<rank>(t_dims, odims);
+ computeDims<rank>(in_embed, in.getDataDims());
+ computeDims<rank>(out_embed, out.getDataDims());
+
+ int batch = 1;
+ for (int i = rank; i < 4; i++) {
+ batch *= odims[i];
+ }
+
+ dim4 istrides = in.strides();
+ dim4 ostrides = out.strides();
+
+ cufft_real_transform<Tr, Tc> transform;
+
+ cufftHandle plan;
+ find_cufft_plan(plan, rank, t_dims,
+ in_embed , istrides[0], istrides[rank],
+ out_embed , ostrides[0], ostrides[rank],
+ (cufftType)cufft_real_transform<Tr, Tc>::type, batch);
+
+ CUFFT_CHECK(transform(plan, (Tc *)in.get(), out.get()));
+ return out;
+}
+
#define INSTANTIATE(T) \
template void fft_inplace<T, 1, true >(Array<T> &in); \
template void fft_inplace<T, 2, true >(Array<T> &in); \
@@ -191,4 +282,14 @@ void fft_inplace(Array<T> &in)
INSTANTIATE(cfloat )
INSTANTIATE(cdouble)
+#define INSTANTIATE_REAL(Tr, Tc) \
+ template Array<Tc> fft_r2c<Tc, Tr, 1>(const Array<Tr> &in); \
+ template Array<Tc> fft_r2c<Tc, Tr, 2>(const Array<Tr> &in); \
+ template Array<Tc> fft_r2c<Tc, Tr, 3>(const Array<Tr> &in); \
+ template Array<Tr> fft_c2r<Tr, Tc, 1>(const Array<Tc> &in, const dim4 &odims); \
+ template Array<Tr> fft_c2r<Tr, Tc, 2>(const Array<Tc> &in, const dim4 &odims); \
+ template Array<Tr> fft_c2r<Tr, Tc, 3>(const Array<Tc> &in, const dim4 &odims); \
+
+ INSTANTIATE_REAL(float , cfloat )
+ INSTANTIATE_REAL(double, cdouble)
}
diff --git a/src/backend/cuda/fft.hpp b/src/backend/cuda/fft.hpp
index bac02d5..2d0ee2c 100644
--- a/src/backend/cuda/fft.hpp
+++ b/src/backend/cuda/fft.hpp
@@ -15,4 +15,10 @@ namespace cuda
template<typename T, int rank, bool direction>
void fft_inplace(Array<T> &out);
+template<typename Tc, typename Tr, int rank>
+Array<Tc> fft_r2c(const Array<Tr> &in);
+
+template<typename Tr, typename Tc, int rank>
+Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims);
+
}
diff --git a/src/backend/cuda/memory.cpp b/src/backend/cuda/memory.cpp
index 9e0da35..ee96b80 100644
--- a/src/backend/cuda/memory.cpp
+++ b/src/backend/cuda/memory.cpp
@@ -17,6 +17,8 @@
#include <dispatch.hpp>
#include <platform.hpp>
+#define AF_CUDA_MEM_DEBUG
+
namespace cuda
{
static size_t memory_resolution = 1024; //1KB
@@ -89,6 +91,15 @@ namespace cuda
pinnedFreeWrapper(ptr); // Free it because we are not sure what the size is
}
+ void garbageCollect()
+ {
+ }
+
+ void deviceMemoryInfo(size_t *alloc_bytes, size_t *alloc_buffers,
+ size_t *lock_bytes, size_t *lock_buffers)
+ {
+ }
+
#else
// Manager Class
diff --git a/src/backend/opencl/fft.cpp b/src/backend/opencl/fft.cpp
index 1ce5ac0..d5922f0 100644
--- a/src/backend/opencl/fft.cpp
+++ b/src/backend/opencl/fft.cpp
@@ -20,8 +20,6 @@
#include <string>
#include <cstdio>
#include <memory.hpp>
-#include <iostream>
-#include <handle.hpp>
using af::dim4;
using std::string;
@@ -36,6 +34,7 @@ namespace opencl
class clFFTPlanner
{
friend void find_clfft_plan(clfftPlanHandle &plan,
+ clfftLayout iLayout, clfftLayout oLayout,
clfftDim rank, size_t *clLengths,
size_t *istrides, size_t idist,
size_t *ostrides, size_t odist,
@@ -72,6 +71,7 @@ class clFFTPlanner
};
void find_clfft_plan(clfftPlanHandle &plan,
+ clfftLayout iLayout, clfftLayout oLayout,
clfftDim rank, size_t *clLengths,
size_t *istrides, size_t idist,
size_t *ostrides, size_t odist,
@@ -81,7 +81,7 @@ void find_clfft_plan(clfftPlanHandle &plan,
// create the key string
char key_str_temp[64];
- sprintf(key_str_temp, "%d:", rank);
+ sprintf(key_str_temp, "%d:%d:%d:", iLayout, oLayout, rank);
string key_string(key_str_temp);
@@ -142,13 +142,19 @@ void find_clfft_plan(clfftPlanHandle &plan,
// Context() returns the actual cl_context handle
CLFFT_CHECK(clfftCreateDefaultPlan(&temp, getContext()(), rank, clLengths));
- CLFFT_CHECK(clfftSetLayout(temp, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED));
+ // complex to complex
+ if (iLayout == oLayout) {
+ CLFFT_CHECK(clfftSetResultLocation(temp, CLFFT_INPLACE));
+ } else {
+ CLFFT_CHECK(clfftSetResultLocation(temp, CLFFT_OUTOFPLACE));
+ }
+
+ CLFFT_CHECK(clfftSetLayout(temp, iLayout, oLayout));
CLFFT_CHECK(clfftSetPlanBatchSize(temp, batch));
CLFFT_CHECK(clfftSetPlanDistance(temp, idist, odist));
CLFFT_CHECK(clfftSetPlanInStride(temp, rank, istrides));
CLFFT_CHECK(clfftSetPlanOutStride(temp, rank, ostrides));
CLFFT_CHECK(clfftSetPlanPrecision(temp, precision));
- CLFFT_CHECK(clfftSetResultLocation(temp, CLFFT_INPLACE));
CLFFT_CHECK(clfftSetPlanScale(temp, CLFFT_BACKWARD, 1.0));
// getQueue() returns object of type CommandQueue
@@ -201,20 +207,22 @@ template<typename T, int rank, bool direction>
void fft_inplace(Array<T> &in)
{
verifySupported<rank>(in.dims());
- size_t idims[4], istrides[4], iembed[4];
+ size_t tdims[4], istrides[4];
- computeDims(idims , in.dims());
- computeDims(iembed , in.getDataDims());
+ computeDims(tdims , in.dims());
computeDims(istrides, in.strides());
clfftPlanHandle plan;
int batch = 1;
for (int i = rank; i < 4; i++) {
- batch *= idims[i];
+ batch *= tdims[i];
}
- find_clfft_plan(plan, (clfftDim)rank, idims,
+ find_clfft_plan(plan,
+ CLFFT_COMPLEX_INTERLEAVED,
+ CLFFT_COMPLEX_INTERLEAVED,
+ (clfftDim)rank, tdims,
istrides, istrides[rank],
istrides, istrides[rank],
(clfftPrecision)Precision<T>::type,
@@ -229,14 +237,109 @@ void fft_inplace(Array<T> &in)
&imem, &imem, NULL));
}
+template<typename Tc, typename Tr, int rank>
+Array<Tc> fft_r2c(const Array<Tr> &in)
+{
+ dim4 odims = in.dims();
+
+ odims[0] = odims[0] / 2 + 1;
+
+ Array<Tc> out = createEmptyArray<Tc>(odims);
+
+ verifySupported<rank>(in.dims());
+ size_t tdims[4], istrides[4], ostrides[4];
+
+ computeDims(tdims , in.dims());
+ computeDims(istrides, in.strides());
+ computeDims(ostrides, out.strides());
+
+ clfftPlanHandle plan;
+
+ int batch = 1;
+ for (int i = rank; i < 4; i++) {
+ batch *= tdims[i];
+ }
+
+ find_clfft_plan(plan,
+ CLFFT_REAL,
+ CLFFT_HERMITIAN_INTERLEAVED,
+ (clfftDim)rank, tdims,
+ istrides, istrides[rank],
+ ostrides, ostrides[rank],
+ (clfftPrecision)Precision<Tc>::type,
+ batch);
+
+ cl_mem imem = (*in.get())();
+ cl_mem omem = (*out.get())();
+ cl_command_queue queue = getQueue()();
+
+ CLFFT_CHECK(clfftEnqueueTransform(plan,
+ CLFFT_FORWARD,
+ 1, &queue, 0, NULL, NULL,
+ &imem, &omem, NULL));
+
+ return out;
+}
+
+template<typename Tr, typename Tc, int rank>
+Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims)
+{
+ Array<Tr> out = createEmptyArray<Tr>(odims);
+
+ verifySupported<rank>(odims);
+ size_t tdims[4], istrides[4], ostrides[4];
+
+ computeDims(tdims , odims);
+ computeDims(istrides, in.strides());
+ computeDims(ostrides, out.strides());
+
+ clfftPlanHandle plan;
+
+ int batch = 1;
+ for (int i = rank; i < 4; i++) {
+ batch *= tdims[i];
+ }
+
+ find_clfft_plan(plan,
+ CLFFT_HERMITIAN_INTERLEAVED,
+ CLFFT_REAL,
+ (clfftDim)rank, tdims,
+ istrides, istrides[rank],
+ ostrides, ostrides[rank],
+ (clfftPrecision)Precision<Tc>::type,
+ batch);
+
+ cl_mem imem = (*in.get())();
+ cl_mem omem = (*out.get())();
+ cl_command_queue queue = getQueue()();
+
+ CLFFT_CHECK(clfftEnqueueTransform(plan,
+ CLFFT_BACKWARD,
+ 1, &queue, 0, NULL, NULL,
+ &imem, &omem, NULL));
+
+ return out;
+}
+
#define INSTANTIATE(T) \
template void fft_inplace<T, 1, true >(Array<T> &in); \
template void fft_inplace<T, 2, true >(Array<T> &in); \
template void fft_inplace<T, 3, true >(Array<T> &in); \
template void fft_inplace<T, 1, false>(Array<T> &in); \
template void fft_inplace<T, 2, false>(Array<T> &in); \
- template void fft_inplace<T, 3, false>(Array<T> &in);
+ template void fft_inplace<T, 3, false>(Array<T> &in); \
INSTANTIATE(cfloat )
INSTANTIATE(cdouble)
+
+#define INSTANTIATE_REAL(Tr, Tc) \
+ template Array<Tc> fft_r2c<Tc, Tr, 1>(const Array<Tr> &in); \
+ template Array<Tc> fft_r2c<Tc, Tr, 2>(const Array<Tr> &in); \
+ template Array<Tc> fft_r2c<Tc, Tr, 3>(const Array<Tr> &in); \
+ template Array<Tr> fft_c2r<Tr, Tc, 1>(const Array<Tc> &in, const dim4 &odims); \
+ template Array<Tr> fft_c2r<Tr, Tc, 2>(const Array<Tc> &in, const dim4 &odims); \
+ template Array<Tr> fft_c2r<Tr, Tc, 3>(const Array<Tc> &in, const dim4 &odims); \
+
+ INSTANTIATE_REAL(float , cfloat )
+ INSTANTIATE_REAL(double, cdouble)
}
diff --git a/src/backend/opencl/fft.hpp b/src/backend/opencl/fft.hpp
index 3b48254..a1d9e6e 100644
--- a/src/backend/opencl/fft.hpp
+++ b/src/backend/opencl/fft.hpp
@@ -15,4 +15,10 @@ namespace opencl
template<typename T, int rank, bool direction>
void fft_inplace(Array<T> &in);
+template<typename Tc, typename Tr, int rank>
+Array<Tc> fft_r2c(const Array<Tr> &in);
+
+template<typename Tr, typename Tc, int rank>
+Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims);
+
}
diff --git a/test/fft_real.cpp b/test/fft_real.cpp
new file mode 100644
index 0000000..c8d9a55
--- /dev/null
+++ b/test/fft_real.cpp
@@ -0,0 +1,120 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <gtest/gtest.h>
+#include <arrayfire.h>
+#include <af/dim4.hpp>
+#include <af/traits.hpp>
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include <testHelpers.hpp>
+
+using std::string;
+using std::vector;
+using af::cfloat;
+using af::cdouble;
+
+
+template<typename T>
+class FFT_REAL : public ::testing::Test
+{
+};
+
+typedef ::testing::Types<af::cfloat, af::cdouble> TestTypes;
+TYPED_TEST_CASE(FFT_REAL, TestTypes);
+
+template<int rank>
+af::array fft(const af::array &in, double norm)
+{
+ switch(rank) {
+ case 1: return af::fftNorm(in, norm);
+ case 2: return af::fft2Norm(in, norm);
+ case 3: return af::fft3Norm(in, norm);
+ default: return in;
+ }
+}
+
+#define MY_ASSERT_NEAR(aa, bb, cc) ASSERT_NEAR(abs(aa), abs(bb), (cc))
+
+template<typename Tc, int rank>
+void fft_real(af::dim4 dims)
+{
+ typedef typename af::dtype_traits<Tc>::base_type Tr;
+ if (noDoubleTests<Tr>()) return;
+
+ af::dtype ty = (af::dtype)af::dtype_traits<Tr>::af_type;
+ af::array a = af::randu(dims, ty);
+
+ bool is_odd = dims[0] & 1;
+
+ int dim0 = dims[0] / 2 + 1;
+
+ double norm = 1;
+ for (int i = 0; i < rank; i++) norm *= dims[i];
+ norm = 1/norm;
+
+ af::array as = af::fftR2C<rank>(a, norm);
+ af::array af = fft<rank>(a, norm);
+
+
+ std::vector<Tc> has(as.elements());
+ std::vector<Tc> haf(af.elements());
+
+ as.host(&has[0]);
+ af.host(&haf[0]);
+
+ for (int j = 0; j < a.elements() / dims[0]; j++) {
+ for (int i = 0; i < dim0; i++) {
+ MY_ASSERT_NEAR(haf[j * dims[0] + i], has[j * dim0 + i], 1E-2) << "at " << j * dims[0] + i;
+ }
+ }
+
+ af::array b = af::fftC2R<rank>(as, is_odd, 1);
+
+ std::vector<Tr> ha(a.elements());
+ std::vector<Tr> hb(a.elements());
+
+ a.host(&ha[0]);
+ b.host(&hb[0]);
+
+ for (int j = 0; j < a.elements(); j++) {
+ ASSERT_NEAR(ha[j], hb[j], 1E-2);
+ }
+}
+
+TYPED_TEST(FFT_REAL, Even1D)
+{
+ fft_real<TypeParam, 1>(af::dim4(1024, 256));
+}
+
+TYPED_TEST(FFT_REAL, Odd1D)
+{
+ fft_real<TypeParam, 1>(af::dim4(625, 256));
+}
+
+TYPED_TEST(FFT_REAL, Even2D)
+{
+ fft_real<TypeParam, 2>(af::dim4(1024, 256));
+}
+
+TYPED_TEST(FFT_REAL, Odd2D)
+{
+ fft_real<TypeParam, 2>(af::dim4(625, 256));
+}
+
+TYPED_TEST(FFT_REAL, Even3D)
+{
+ fft_real<TypeParam, 3>(af::dim4(32, 32, 32));
+}
+
+TYPED_TEST(FFT_REAL, Odd3D)
+{
+ fft_real<TypeParam, 3>(af::dim4(25, 32, 32));
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list