[arrayfire] 183/408: Making fft_inplace consistent across all backends
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:52 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit f816f419cf503270e2c8ec8a017a74a1c9532742
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Tue Aug 4 15:57:24 2015 -0400
Making fft_inplace consistent across all backends
- Was previously fft_common
---
src/backend/cpu/fft.cpp | 21 +++++++++------------
src/backend/cpu/fft.hpp | 2 ++
src/backend/cuda/fft.cpp | 13 +++++--------
src/backend/cuda/fft.hpp | 2 +-
src/backend/cuda/fftconvolve.cu | 8 ++++----
src/backend/opencl/fft.cpp | 23 ++++++-----------------
src/backend/opencl/fft.hpp | 2 +-
src/backend/opencl/fftconvolve.cpp | 6 +++---
src/backend/opencl/kernel/fftconvolve.hpp | 12 +++++++++---
src/backend/opencl/kernel/fftconvolve_reorder.cl | 15 ++++++++-------
10 files changed, 48 insertions(+), 56 deletions(-)
diff --git a/src/backend/cpu/fft.cpp b/src/backend/cpu/fft.cpp
index 37023b4..9a7696a 100644
--- a/src/backend/cpu/fft.cpp
+++ b/src/backend/cpu/fft.cpp
@@ -51,21 +51,18 @@ struct fftw_transform;
TRANSFORM(fftwf, cfloat)
TRANSFORM(fftw, cdouble)
-template<typename T, int rank, int direction>
-void fft_common(Array <T> &out, const Array<T> &in)
+template<typename T, int rank, bool direction>
+void fft_inplace(Array<T> &in)
{
int in_dims[rank];
int in_embed[rank];
- int out_embed[rank];
const dim4 idims = in.dims();
computeDims<rank>(in_dims , idims);
computeDims<rank>(in_embed , in.getDataDims());
- computeDims<rank>(out_embed, out.getDataDims());
const dim4 istrides = in.strides();
- const dim4 ostrides = out.strides();
typedef typename fftw_transform<T>::ctype_t ctype_t;
typename fftw_transform<T>::plan_t plan;
@@ -83,9 +80,9 @@ void fft_common(Array <T> &out, const Array<T> &in)
(ctype_t *)in.get(),
in_embed, (int)istrides[0],
(int)istrides[rank],
- (ctype_t *)out.get(),
- out_embed, (int)ostrides[0],
- (int)ostrides[rank],
+ (ctype_t *)in.get(),
+ in_embed, (int)istrides[0],
+ (int)istrides[rank],
direction ? FFTW_FORWARD : FFTW_BACKWARD,
FFTW_ESTIMATE);
@@ -113,7 +110,7 @@ Array<outType> fft(Array<inType> const &in, double norm_factor, dim_t const npad
computePaddedDims(pdims, in.dims(), npad, pad);
Array<outType> ret = padArray<inType, outType>(in, pdims);
- fft_common<outType, rank, true>(ret, ret);
+ fft_inplace<outType, rank, true>(ret);
return ret;
}
@@ -126,7 +123,7 @@ Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t co
computePaddedDims(pdims, in.dims(), npad, pad);
Array<T> ret = padArray<T, T>(in, pdims, scalar<T>(0), norm_factor);
- fft_common<T, rank, false>(ret, ret);
+ fft_inplace<T, rank, false>(ret);
return ret;
}
@@ -139,13 +136,13 @@ Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t co
INSTANTIATE1(float , cfloat )
INSTANTIATE1(double , cdouble)
-#define INSTANTIATE2(T)\
+#define INSTANTIATE2(T) \
template Array<T> fft <T, T, 1, false>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
template Array<T> fft <T, T, 2, false>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
template Array<T> fft <T, T, 3, false>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
template Array<T> ifft<T, 1>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
template Array<T> ifft<T, 2>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
- template Array<T> ifft<T, 3>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad);
+ template Array<T> ifft<T, 3>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
INSTANTIATE2(cfloat )
INSTANTIATE2(cdouble)
diff --git a/src/backend/cpu/fft.hpp b/src/backend/cpu/fft.hpp
index 252690c..eff44cd 100644
--- a/src/backend/cpu/fft.hpp
+++ b/src/backend/cpu/fft.hpp
@@ -18,4 +18,6 @@ Array<outType> fft(Array<inType> const &in, double norm_factor, dim_t const npad
template<typename T, int rank>
Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t const * const pad);
+template<typename T, int rank, bool direction>
+void fft_inplace(Array<T> &in);
}
diff --git a/src/backend/cuda/fft.cpp b/src/backend/cuda/fft.cpp
index 31d1f7c..a762c8b 100644
--- a/src/backend/cuda/fft.cpp
+++ b/src/backend/cuda/fft.cpp
@@ -154,19 +154,16 @@ void computeDims(int rdims[rank], const dim4 &idims)
}
template<typename T, int rank, bool direction>
-void fft_common(Array<T> &out, const Array<T> &in)
+void fft_inplace(Array<T> &in)
{
const dim4 idims = in.dims();
const dim4 istrides = in.strides();
- const dim4 ostrides = out.strides();
int in_dims[rank];
int in_embed[rank];
- int out_embed[rank];
computeDims<rank>(in_dims, idims);
computeDims<rank>(in_embed, in.getDataDims());
- computeDims<rank>(out_embed, out.getDataDims());
int batch = 1;
for (int i = rank; i < 4; i++) {
@@ -176,11 +173,11 @@ void fft_common(Array<T> &out, const Array<T> &in)
cufftHandle plan;
find_cufft_plan(plan, rank, in_dims,
in_embed , istrides[0], istrides[rank],
- out_embed, ostrides[0], ostrides[rank],
+ in_embed , istrides[0], istrides[rank],
(cufftType)cufft_transform<T>::type, batch);
cufft_transform<T> transform;
- CUFFT_CHECK(transform(plan, (T *)in.get(), out.get(), direction ? CUFFT_FORWARD : CUFFT_INVERSE));
+ CUFFT_CHECK(transform(plan, (T *)in.get(), in.get(), direction ? CUFFT_FORWARD : CUFFT_INVERSE));
}
void computePaddedDims(dim4 &pdims,
@@ -202,7 +199,7 @@ Array<outType> fft(Array<inType> const &in, double norm_factor, dim_t const npad
computePaddedDims(pdims, in.dims(), npad, pad);
Array<outType> ret = padArray<inType, outType>(in, pdims, scalar<outType>(0), norm_factor);
- fft_common<outType, rank, true>(ret, ret);
+ fft_inplace<outType, rank, true>(ret);
return ret;
}
@@ -216,7 +213,7 @@ Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t co
computePaddedDims(pdims, in.dims(), npad, pad);
Array<T> ret = padArray<T, T>(in, pdims, scalar<T>(0), norm_factor);
- fft_common<T, rank, false>(ret, ret);
+ fft_inplace<T, rank, false>(ret);
return ret;
}
diff --git a/src/backend/cuda/fft.hpp b/src/backend/cuda/fft.hpp
index bde7c2c..ee3ef14 100644
--- a/src/backend/cuda/fft.hpp
+++ b/src/backend/cuda/fft.hpp
@@ -19,6 +19,6 @@ template<typename T, int rank>
Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t const * const pad);
template<typename T, int rank, bool direction>
-void fft_common(Array<T> &out, const Array<T> &in);
+void fft_inplace(Array<T> &out);
}
diff --git a/src/backend/cuda/fftconvolve.cu b/src/backend/cuda/fftconvolve.cu
index 677fec7..97edeec 100644
--- a/src/backend/cuda/fftconvolve.cu
+++ b/src/backend/cuda/fftconvolve.cu
@@ -78,8 +78,8 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
kernel::packDataHelper<cT, T>(signal_packed, filter_packed, signal, filter, baseDim);
- fft_common<cT, baseDim, true>(signal_packed, signal_packed);
- fft_common<cT, baseDim, true>(filter_packed, filter_packed);
+ fft_inplace<cT, baseDim, true>(signal_packed);
+ fft_inplace<cT, baseDim, true>(filter_packed);
Array<T> out = createEmptyArray<T>(oDims);
@@ -89,13 +89,13 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
kernel::complexMultiplyHelper<T, cT>(out, signal_packed, filter_packed, signal, filter, kind);
if (kind == CONVOLVE_BATCH_KERNEL) {
- fft_common<cT, baseDim, false>(filter_packed, filter_packed);
+ fft_inplace<cT, baseDim, false>(filter_packed);
if (expand)
kernel::reorderOutputHelper<T, cT, roundOut, baseDim, true >(out, filter_packed, signal, filter, kind);
else
kernel::reorderOutputHelper<T, cT, roundOut, baseDim, false>(out, filter_packed, signal, filter, kind);
} else {
- fft_common<cT, baseDim, false>(signal_packed, signal_packed);
+ fft_inplace<cT, baseDim, false>(signal_packed);
if (expand)
kernel::reorderOutputHelper<T, cT, roundOut, baseDim, true >(out, signal_packed, signal, filter, kind);
else
diff --git a/src/backend/opencl/fft.cpp b/src/backend/opencl/fft.cpp
index 80a2a75..5094300 100644
--- a/src/backend/opencl/fft.cpp
+++ b/src/backend/opencl/fft.cpp
@@ -149,6 +149,7 @@ void find_clfft_plan(clfftPlanHandle &plan,
CLFFT_CHECK(clfftSetPlanOutStride(temp, rank, ostrides));
CLFFT_CHECK(clfftSetPlanPrecision(temp, precision));
CLFFT_CHECK(clfftSetResultLocation(temp, CLFFT_INPLACE));
+ CLFFT_CHECK(clfftSetPlanScale(temp, CLFFT_BACKWARD, 1.0));
// getQueue() returns object of type CommandQueue
// CommandQueue() returns the actual cl_command_queue handle
@@ -172,19 +173,14 @@ void computeDims(size_t rdims[4], const dim4 &idims)
}
template<typename T, int rank, bool direction>
-void fft_common(Array<T> &out, const Array<T> &in)
+void fft_inplace(Array<T> &in)
{
size_t idims[4], istrides[4], iembed[4];
- size_t odims[4], ostrides[4], oembed[4];
computeDims(idims , in.dims());
computeDims(iembed , in.getDataDims());
computeDims(istrides, in.strides());
- computeDims(odims , out.dims());
- computeDims(oembed , out.getDataDims());
- computeDims(ostrides, out.strides());
-
clfftPlanHandle plan;
int batch = 1;
@@ -194,18 +190,17 @@ void fft_common(Array<T> &out, const Array<T> &in)
find_clfft_plan(plan, (clfftDim)rank, idims,
istrides, istrides[rank],
- ostrides, ostrides[rank],
+ istrides, istrides[rank],
(clfftPrecision)Precision<T>::type,
batch);
cl_mem imem = (*in.get())();
- cl_mem omem = (*out.get())();
cl_command_queue queue = getQueue()();
CLFFT_CHECK(clfftEnqueueTransform(plan,
direction ? CLFFT_FORWARD : CLFFT_BACKWARD,
1, &queue, 0, NULL, NULL,
- &imem, &omem, NULL));
+ &imem, &imem, NULL));
}
void computePaddedDims(dim4 &pdims,
@@ -253,7 +248,7 @@ Array<outType> fft(Array<inType> const &in, double norm_factor, dim_t const npad
verifySupported<rank>(pdims);
Array<outType> ret = padArray<inType, outType>(in, pdims, scalar<outType>(0), norm_factor);
- fft_common<outType, rank, true>(ret, ret);
+ fft_inplace<outType, rank, true>(ret);
return ret;
}
@@ -267,14 +262,8 @@ Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t co
computePaddedDims(pdims, in.dims(), npad, pad);
verifySupported<rank>(pdims);
- // the input norm_factor is further scaled
- // based on the input dimensions to match
- // cuFFT behavior
- for (int i=0; i<rank; i++)
- norm_factor *= pdims[i];
-
Array<T> ret = padArray<T, T>(in, pdims, scalar<T>(0), norm_factor);
- fft_common<T, rank, false>(ret, ret);
+ fft_inplace<T, rank, false>(ret);
return ret;
}
diff --git a/src/backend/opencl/fft.hpp b/src/backend/opencl/fft.hpp
index e1ccb97..2406f5b 100644
--- a/src/backend/opencl/fft.hpp
+++ b/src/backend/opencl/fft.hpp
@@ -19,6 +19,6 @@ template<typename T, int rank>
Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t const * const pad);
template<typename T, int rank, bool direction>
-void fft_common(Array<T> &out, const Array<T> &in);
+void fft_inplace(Array<T> &in);
}
diff --git a/src/backend/opencl/fftconvolve.cpp b/src/backend/opencl/fftconvolve.cpp
index ab446b9..d97f83a 100644
--- a/src/backend/opencl/fftconvolve.cpp
+++ b/src/backend/opencl/fftconvolve.cpp
@@ -78,7 +78,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
kernel::packDataHelper<cT, T, isDouble, convT>(packed, signal, filter, baseDim, kind);
- fft_common<cT, baseDim, true>(packed, packed);
+ fft_inplace<cT, baseDim, true>(packed);
kernel::complexMultiplyHelper<cT, T, isDouble, convT>(packed, signal, filter, baseDim, kind);
@@ -95,7 +95,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
}
Array<cT> subPacked = createSubArray<cT>(packed, seqs);
- fft_common<cT, baseDim, false>(subPacked, subPacked);
+ fft_inplace<cT, baseDim, false>(subPacked);
}
else {
std::vector<af_seq> seqs;
@@ -109,7 +109,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
}
Array<cT> subPacked = createSubArray<cT>(packed, seqs);
- fft_common<cT, baseDim, false>(subPacked, subPacked);
+ fft_inplace<cT, baseDim, false>(subPacked);
}
Array<T> out = createEmptyArray<T>(oDims);
diff --git a/src/backend/opencl/kernel/fftconvolve.hpp b/src/backend/opencl/kernel/fftconvolve.hpp
index fb29a42..bc65ee7 100644
--- a/src/backend/opencl/kernel/fftconvolve.hpp
+++ b/src/backend/opencl/kernel/fftconvolve.hpp
@@ -238,6 +238,12 @@ void reorderOutputHelper(Param out,
static std::map<int, Program*> fftconvolveProgs;
static std::map<int, Kernel*> roKernel;
+ int fftScale = 1;
+
+ // Calculate the scale by which to divide clFFT results
+ for (int k = 0; k < baseDim; k++)
+ fftScale *= packed.info.dims[k];
+
int device = getActiveDeviceId();
std::call_once( compileFlags[device], [device] () {
@@ -279,19 +285,19 @@ void reorderOutputHelper(Param out,
auto roOp = make_kernel<Buffer, KParam,
Buffer, KParam,
KParam, const int,
- const int> (*roKernel[device]);
+ const int, const int> (*roKernel[device]);
if (kind == CONVOLVE_BATCH_KERNEL) {
roOp(EnqueueArgs(getQueue(), global, local),
*out.data, out.info,
*filter_tmp.data, filter_tmp.info,
- filter.info, sig_half_d0, baseDim);
+ filter.info, sig_half_d0, baseDim, fftScale);
}
else {
roOp(EnqueueArgs(getQueue(), global, local),
*out.data, out.info,
*sig_tmp.data, sig_tmp.info,
- filter.info, sig_half_d0, baseDim);
+ filter.info, sig_half_d0, baseDim, fftScale);
}
CL_DEBUG_FINISH(getQueue());
} catch (cl::Error err) {
diff --git a/src/backend/opencl/kernel/fftconvolve_reorder.cl b/src/backend/opencl/kernel/fftconvolve_reorder.cl
index 6f8269a..1eb7cde 100644
--- a/src/backend/opencl/kernel/fftconvolve_reorder.cl
+++ b/src/backend/opencl/kernel/fftconvolve_reorder.cl
@@ -15,7 +15,8 @@ void reorder_output(
KParam iInfo,
KParam fInfo,
const int half_di0,
- const int baseDim)
+ const int baseDim,
+ const int fftScale)
{
const int t = get_global_id(0);
@@ -68,9 +69,9 @@ void reorder_output(
// Copy top elements
int iidx = iInfo.offset + ti3 + ti2 + ti1 + ti0 * 2;
#if ROUND_OUT == 1
- d_out[oidx] = (T)round(d_in[iidx]);
+ d_out[oidx] = (T)round(d_in[iidx] / fftScale);
#else
- d_out[oidx] = (T)(d_in[iidx]);
+ d_out[oidx] = (T)(d_in[iidx] / fftScale);
#endif
}
else if (ti0 < half_di0 + fInfo.dims[0] - 1) {
@@ -78,18 +79,18 @@ void reorder_output(
int iidx1 = iInfo.offset + ti3 + ti2 + ti1 + ti0 * 2;
int iidx2 = iInfo.offset + ti3 + ti2 + ti1 + (ti0 - half_di0) * 2 + 1;
#if ROUND_OUT == 1
- d_out[oidx] = (T)round((d_in[iidx1] + d_in[iidx2]));
+ d_out[oidx] = (T)round((d_in[iidx1] + d_in[iidx2]) / fftScale);
#else
- d_out[oidx] = (T)((d_in[iidx1] + d_in[iidx2]));
+ d_out[oidx] = (T)((d_in[iidx1] + d_in[iidx2]) / fftScale);
#endif
}
else {
// Copy bottom elements
const int iidx = iInfo.offset + ti3 + ti2 + ti1 + (ti0 - half_di0) * 2 + 1;
#if ROUND_OUT == 1
- d_out[oidx] = (T)round(d_in[iidx]);
+ d_out[oidx] = (T)round(d_in[iidx] / fftScale);
#else
- d_out[oidx] = (T)(d_in[iidx]);
+ d_out[oidx] = (T)(d_in[iidx] / fftScale);
#endif
}
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list