[arrayfire] 183/408: Making fft_inplace consistent across all backends

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:52 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit f816f419cf503270e2c8ec8a017a74a1c9532742
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Tue Aug 4 15:57:24 2015 -0400

    Making fft_inplace consistent across all backends
    
    - Was previously fft_common
---
 src/backend/cpu/fft.cpp                          | 21 +++++++++------------
 src/backend/cpu/fft.hpp                          |  2 ++
 src/backend/cuda/fft.cpp                         | 13 +++++--------
 src/backend/cuda/fft.hpp                         |  2 +-
 src/backend/cuda/fftconvolve.cu                  |  8 ++++----
 src/backend/opencl/fft.cpp                       | 23 ++++++-----------------
 src/backend/opencl/fft.hpp                       |  2 +-
 src/backend/opencl/fftconvolve.cpp               |  6 +++---
 src/backend/opencl/kernel/fftconvolve.hpp        | 12 +++++++++---
 src/backend/opencl/kernel/fftconvolve_reorder.cl | 15 ++++++++-------
 10 files changed, 48 insertions(+), 56 deletions(-)

diff --git a/src/backend/cpu/fft.cpp b/src/backend/cpu/fft.cpp
index 37023b4..9a7696a 100644
--- a/src/backend/cpu/fft.cpp
+++ b/src/backend/cpu/fft.cpp
@@ -51,21 +51,18 @@ struct fftw_transform;
 TRANSFORM(fftwf, cfloat)
 TRANSFORM(fftw, cdouble)
 
-template<typename T, int rank, int direction>
-void fft_common(Array <T> &out, const Array<T> &in)
+template<typename T, int rank, bool direction>
+void fft_inplace(Array<T> &in)
 {
     int in_dims[rank];
     int in_embed[rank];
-    int out_embed[rank];
 
     const dim4 idims = in.dims();
 
     computeDims<rank>(in_dims  , idims);
     computeDims<rank>(in_embed , in.getDataDims());
-    computeDims<rank>(out_embed, out.getDataDims());
 
     const dim4 istrides = in.strides();
-    const dim4 ostrides = out.strides();
 
     typedef typename fftw_transform<T>::ctype_t ctype_t;
     typename fftw_transform<T>::plan_t plan;
@@ -83,9 +80,9 @@ void fft_common(Array <T> &out, const Array<T> &in)
                             (ctype_t *)in.get(),
                             in_embed, (int)istrides[0],
                             (int)istrides[rank],
-                            (ctype_t *)out.get(),
-                            out_embed, (int)ostrides[0],
-                            (int)ostrides[rank],
+                            (ctype_t *)in.get(),
+                            in_embed, (int)istrides[0],
+                            (int)istrides[rank],
                             direction ? FFTW_FORWARD : FFTW_BACKWARD,
                             FFTW_ESTIMATE);
 
@@ -113,7 +110,7 @@ Array<outType> fft(Array<inType> const &in, double norm_factor, dim_t const npad
     computePaddedDims(pdims, in.dims(), npad, pad);
 
     Array<outType> ret = padArray<inType, outType>(in, pdims);
-    fft_common<outType, rank, true>(ret, ret);
+    fft_inplace<outType, rank, true>(ret);
     return ret;
 }
 
@@ -126,7 +123,7 @@ Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t co
     computePaddedDims(pdims, in.dims(), npad, pad);
 
     Array<T> ret = padArray<T, T>(in, pdims, scalar<T>(0), norm_factor);
-    fft_common<T, rank, false>(ret, ret);
+    fft_inplace<T, rank, false>(ret);
 
     return ret;
 }
@@ -139,13 +136,13 @@ Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t co
 INSTANTIATE1(float  , cfloat )
 INSTANTIATE1(double , cdouble)
 
-#define INSTANTIATE2(T)\
+#define INSTANTIATE2(T)                                                 \
     template Array<T> fft <T, T, 1, false>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
     template Array<T> fft <T, T, 2, false>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
     template Array<T> fft <T, T, 3, false>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
     template Array<T> ifft<T, 1>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
     template Array<T> ifft<T, 2>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
-    template Array<T> ifft<T, 3>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad);
+    template Array<T> ifft<T, 3>(const Array<T> &in, double norm_factor, dim_t const npad, dim_t const * const pad); \
 
 INSTANTIATE2(cfloat )
 INSTANTIATE2(cdouble)
diff --git a/src/backend/cpu/fft.hpp b/src/backend/cpu/fft.hpp
index 252690c..eff44cd 100644
--- a/src/backend/cpu/fft.hpp
+++ b/src/backend/cpu/fft.hpp
@@ -18,4 +18,6 @@ Array<outType> fft(Array<inType> const &in, double norm_factor, dim_t const npad
 template<typename T, int rank>
 Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t const * const pad);
 
+template<typename T, int rank, bool direction>
+void fft_inplace(Array<T> &in);
 }
diff --git a/src/backend/cuda/fft.cpp b/src/backend/cuda/fft.cpp
index 31d1f7c..a762c8b 100644
--- a/src/backend/cuda/fft.cpp
+++ b/src/backend/cuda/fft.cpp
@@ -154,19 +154,16 @@ void computeDims(int rdims[rank], const dim4 &idims)
 }
 
 template<typename T, int rank, bool direction>
-void fft_common(Array<T> &out, const Array<T> &in)
+void fft_inplace(Array<T> &in)
 {
     const dim4 idims    = in.dims();
     const dim4 istrides = in.strides();
-    const dim4 ostrides = out.strides();
 
     int in_dims[rank];
     int in_embed[rank];
-    int out_embed[rank];
 
     computeDims<rank>(in_dims, idims);
     computeDims<rank>(in_embed, in.getDataDims());
-    computeDims<rank>(out_embed, out.getDataDims());
 
     int batch = 1;
     for (int i = rank; i < 4; i++) {
@@ -176,11 +173,11 @@ void fft_common(Array<T> &out, const Array<T> &in)
     cufftHandle plan;
     find_cufft_plan(plan, rank, in_dims,
                     in_embed , istrides[0], istrides[rank],
-                    out_embed, ostrides[0], ostrides[rank],
+                    in_embed , istrides[0], istrides[rank],
                     (cufftType)cufft_transform<T>::type, batch);
 
     cufft_transform<T> transform;
-    CUFFT_CHECK(transform(plan, (T *)in.get(), out.get(), direction ? CUFFT_FORWARD : CUFFT_INVERSE));
+    CUFFT_CHECK(transform(plan, (T *)in.get(), in.get(), direction ? CUFFT_FORWARD : CUFFT_INVERSE));
 }
 
 void computePaddedDims(dim4 &pdims,
@@ -202,7 +199,7 @@ Array<outType> fft(Array<inType> const &in, double norm_factor, dim_t const npad
     computePaddedDims(pdims, in.dims(), npad, pad);
 
     Array<outType> ret = padArray<inType, outType>(in, pdims, scalar<outType>(0), norm_factor);
-    fft_common<outType, rank, true>(ret, ret);
+    fft_inplace<outType, rank, true>(ret);
 
     return ret;
 }
@@ -216,7 +213,7 @@ Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t co
     computePaddedDims(pdims, in.dims(), npad, pad);
 
     Array<T> ret = padArray<T, T>(in, pdims, scalar<T>(0), norm_factor);
-    fft_common<T, rank, false>(ret, ret);
+    fft_inplace<T, rank, false>(ret);
 
     return ret;
 }
diff --git a/src/backend/cuda/fft.hpp b/src/backend/cuda/fft.hpp
index bde7c2c..ee3ef14 100644
--- a/src/backend/cuda/fft.hpp
+++ b/src/backend/cuda/fft.hpp
@@ -19,6 +19,6 @@ template<typename T, int rank>
 Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t const * const pad);
 
 template<typename T, int rank, bool direction>
-void fft_common(Array<T> &out, const Array<T> &in);
+void fft_inplace(Array<T> &out);
 
 }
diff --git a/src/backend/cuda/fftconvolve.cu b/src/backend/cuda/fftconvolve.cu
index 677fec7..97edeec 100644
--- a/src/backend/cuda/fftconvolve.cu
+++ b/src/backend/cuda/fftconvolve.cu
@@ -78,8 +78,8 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
 
     kernel::packDataHelper<cT, T>(signal_packed, filter_packed, signal, filter, baseDim);
 
-    fft_common<cT, baseDim, true>(signal_packed, signal_packed);
-    fft_common<cT, baseDim, true>(filter_packed, filter_packed);
+    fft_inplace<cT, baseDim, true>(signal_packed);
+    fft_inplace<cT, baseDim, true>(filter_packed);
 
     Array<T> out = createEmptyArray<T>(oDims);
 
@@ -89,13 +89,13 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
         kernel::complexMultiplyHelper<T, cT>(out, signal_packed, filter_packed, signal, filter, kind);
 
     if (kind == CONVOLVE_BATCH_KERNEL) {
-        fft_common<cT, baseDim, false>(filter_packed, filter_packed);
+        fft_inplace<cT, baseDim, false>(filter_packed);
         if (expand)
             kernel::reorderOutputHelper<T, cT, roundOut, baseDim, true >(out, filter_packed, signal, filter, kind);
         else
             kernel::reorderOutputHelper<T, cT, roundOut, baseDim, false>(out, filter_packed, signal, filter, kind);
     } else {
-        fft_common<cT, baseDim, false>(signal_packed, signal_packed);
+        fft_inplace<cT, baseDim, false>(signal_packed);
         if (expand)
             kernel::reorderOutputHelper<T, cT, roundOut, baseDim, true >(out, signal_packed, signal, filter, kind);
         else
diff --git a/src/backend/opencl/fft.cpp b/src/backend/opencl/fft.cpp
index 80a2a75..5094300 100644
--- a/src/backend/opencl/fft.cpp
+++ b/src/backend/opencl/fft.cpp
@@ -149,6 +149,7 @@ void find_clfft_plan(clfftPlanHandle &plan,
     CLFFT_CHECK(clfftSetPlanOutStride(temp, rank, ostrides));
     CLFFT_CHECK(clfftSetPlanPrecision(temp, precision));
     CLFFT_CHECK(clfftSetResultLocation(temp, CLFFT_INPLACE));
+    CLFFT_CHECK(clfftSetPlanScale(temp, CLFFT_BACKWARD, 1.0));
 
     // getQueue() returns object of type CommandQueue
     // CommandQueue() returns the actual cl_command_queue handle
@@ -172,19 +173,14 @@ void computeDims(size_t rdims[4], const dim4 &idims)
 }
 
 template<typename T, int rank, bool direction>
-void fft_common(Array<T> &out, const Array<T> &in)
+void fft_inplace(Array<T> &in)
 {
     size_t idims[4], istrides[4], iembed[4];
-    size_t odims[4], ostrides[4], oembed[4];
 
     computeDims(idims   , in.dims());
     computeDims(iembed  , in.getDataDims());
     computeDims(istrides, in.strides());
 
-    computeDims(odims   , out.dims());
-    computeDims(oembed  , out.getDataDims());
-    computeDims(ostrides, out.strides());
-
     clfftPlanHandle plan;
 
     int batch = 1;
@@ -194,18 +190,17 @@ void fft_common(Array<T> &out, const Array<T> &in)
 
     find_clfft_plan(plan, (clfftDim)rank, idims,
                     istrides, istrides[rank],
-                    ostrides, ostrides[rank],
+                    istrides, istrides[rank],
                     (clfftPrecision)Precision<T>::type,
                     batch);
 
     cl_mem imem = (*in.get())();
-    cl_mem omem = (*out.get())();
     cl_command_queue queue = getQueue()();
 
     CLFFT_CHECK(clfftEnqueueTransform(plan,
                                       direction ? CLFFT_FORWARD : CLFFT_BACKWARD,
                                       1, &queue, 0, NULL, NULL,
-                                      &imem, &omem, NULL));
+                                      &imem, &imem, NULL));
 }
 
 void computePaddedDims(dim4 &pdims,
@@ -253,7 +248,7 @@ Array<outType> fft(Array<inType> const &in, double norm_factor, dim_t const npad
     verifySupported<rank>(pdims);
 
     Array<outType> ret = padArray<inType, outType>(in, pdims, scalar<outType>(0), norm_factor);
-    fft_common<outType, rank, true>(ret, ret);
+    fft_inplace<outType, rank, true>(ret);
 
     return ret;
 }
@@ -267,14 +262,8 @@ Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t co
     computePaddedDims(pdims, in.dims(), npad, pad);
     verifySupported<rank>(pdims);
 
-    // the input norm_factor is further scaled
-    // based on the input dimensions to match
-    // cuFFT behavior
-    for (int i=0; i<rank; i++)
-        norm_factor *= pdims[i];
-
     Array<T> ret = padArray<T, T>(in, pdims, scalar<T>(0), norm_factor);
-    fft_common<T, rank, false>(ret, ret);
+    fft_inplace<T, rank, false>(ret);
     return ret;
 }
 
diff --git a/src/backend/opencl/fft.hpp b/src/backend/opencl/fft.hpp
index e1ccb97..2406f5b 100644
--- a/src/backend/opencl/fft.hpp
+++ b/src/backend/opencl/fft.hpp
@@ -19,6 +19,6 @@ template<typename T, int rank>
 Array<T> ifft(Array<T> const &in, double norm_factor, dim_t const npad, dim_t const * const pad);
 
 template<typename T, int rank, bool direction>
-void fft_common(Array<T> &out, const Array<T> &in);
+void fft_inplace(Array<T> &in);
 
 }
diff --git a/src/backend/opencl/fftconvolve.cpp b/src/backend/opencl/fftconvolve.cpp
index ab446b9..d97f83a 100644
--- a/src/backend/opencl/fftconvolve.cpp
+++ b/src/backend/opencl/fftconvolve.cpp
@@ -78,7 +78,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
 
     kernel::packDataHelper<cT, T, isDouble, convT>(packed, signal, filter, baseDim, kind);
 
-    fft_common<cT, baseDim, true>(packed, packed);
+    fft_inplace<cT, baseDim, true>(packed);
 
     kernel::complexMultiplyHelper<cT, T, isDouble, convT>(packed, signal, filter, baseDim, kind);
 
@@ -95,7 +95,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
         }
 
         Array<cT> subPacked = createSubArray<cT>(packed, seqs);
-        fft_common<cT, baseDim, false>(subPacked, subPacked);
+        fft_inplace<cT, baseDim, false>(subPacked);
     }
     else {
         std::vector<af_seq> seqs;
@@ -109,7 +109,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
         }
 
         Array<cT> subPacked = createSubArray<cT>(packed, seqs);
-        fft_common<cT, baseDim, false>(subPacked, subPacked);
+        fft_inplace<cT, baseDim, false>(subPacked);
     }
 
     Array<T> out = createEmptyArray<T>(oDims);
diff --git a/src/backend/opencl/kernel/fftconvolve.hpp b/src/backend/opencl/kernel/fftconvolve.hpp
index fb29a42..bc65ee7 100644
--- a/src/backend/opencl/kernel/fftconvolve.hpp
+++ b/src/backend/opencl/kernel/fftconvolve.hpp
@@ -238,6 +238,12 @@ void reorderOutputHelper(Param out,
         static std::map<int, Program*> fftconvolveProgs;
         static std::map<int, Kernel*>  roKernel;
 
+        int fftScale = 1;
+
+        // Calculate the scale by which to divide clFFT results
+        for (int k = 0; k < baseDim; k++)
+            fftScale *= packed.info.dims[k];
+
         int device = getActiveDeviceId();
 
         std::call_once( compileFlags[device], [device] () {
@@ -279,19 +285,19 @@ void reorderOutputHelper(Param out,
         auto roOp = make_kernel<Buffer, KParam,
                                 Buffer, KParam,
                                 KParam, const int,
-                                const int> (*roKernel[device]);
+                                const int, const int> (*roKernel[device]);
 
         if (kind == CONVOLVE_BATCH_KERNEL) {
             roOp(EnqueueArgs(getQueue(), global, local),
                  *out.data, out.info,
                  *filter_tmp.data, filter_tmp.info,
-                 filter.info, sig_half_d0, baseDim);
+                 filter.info, sig_half_d0, baseDim, fftScale);
         }
         else {
             roOp(EnqueueArgs(getQueue(), global, local),
                  *out.data, out.info,
                  *sig_tmp.data, sig_tmp.info,
-                 filter.info, sig_half_d0, baseDim);
+                 filter.info, sig_half_d0, baseDim, fftScale);
         }
         CL_DEBUG_FINISH(getQueue());
     } catch (cl::Error err) {
diff --git a/src/backend/opencl/kernel/fftconvolve_reorder.cl b/src/backend/opencl/kernel/fftconvolve_reorder.cl
index 6f8269a..1eb7cde 100644
--- a/src/backend/opencl/kernel/fftconvolve_reorder.cl
+++ b/src/backend/opencl/kernel/fftconvolve_reorder.cl
@@ -15,7 +15,8 @@ void reorder_output(
     KParam                iInfo,
     KParam                fInfo,
     const int        half_di0,
-    const int             baseDim)
+    const int             baseDim,
+    const int             fftScale)
 {
     const int t = get_global_id(0);
 
@@ -68,9 +69,9 @@ void reorder_output(
         // Copy top elements
         int iidx = iInfo.offset + ti3 + ti2 + ti1 + ti0 * 2;
 #if ROUND_OUT == 1
-            d_out[oidx] = (T)round(d_in[iidx]);
+        d_out[oidx] = (T)round(d_in[iidx] / fftScale);
 #else
-            d_out[oidx] = (T)(d_in[iidx]);
+        d_out[oidx] = (T)(d_in[iidx] / fftScale);
 #endif
     }
     else if (ti0 < half_di0 + fInfo.dims[0] - 1) {
@@ -78,18 +79,18 @@ void reorder_output(
         int iidx1 = iInfo.offset + ti3 + ti2 + ti1 + ti0 * 2;
         int iidx2 = iInfo.offset + ti3 + ti2 + ti1 + (ti0 - half_di0) * 2 + 1;
 #if ROUND_OUT == 1
-            d_out[oidx] = (T)round((d_in[iidx1] + d_in[iidx2]));
+        d_out[oidx] = (T)round((d_in[iidx1] + d_in[iidx2]) / fftScale);
 #else
-            d_out[oidx] = (T)((d_in[iidx1] + d_in[iidx2]));
+        d_out[oidx] = (T)((d_in[iidx1] + d_in[iidx2]) / fftScale);
 #endif
     }
     else {
         // Copy bottom elements
         const int iidx = iInfo.offset + ti3 + ti2 + ti1 + (ti0 - half_di0) * 2 + 1;
 #if ROUND_OUT == 1
-            d_out[oidx] = (T)round(d_in[iidx]);
+        d_out[oidx] = (T)round(d_in[iidx] / fftScale);
 #else
-            d_out[oidx] = (T)(d_in[iidx]);
+        d_out[oidx] = (T)(d_in[iidx] / fftScale);
 #endif
     }
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list