[arrayfire] 32/79: BUGFIX/TEST: fftConvolve now does multi dimensional batching properly
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Jun 15 13:38:05 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository arrayfire.
commit 8f1a3be7364c3842ca07b2649cc309d16ba83ffe
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Mon Jun 8 17:49:18 2015 -0400
BUGFIX/TEST: fftConvolve now does multi dimensional batching properly
- Added relevant tests
---
src/backend/cpu/fftconvolve.cpp | 36 ++++++++++++--------------
src/backend/opencl/fftconvolve.cpp | 30 ++++++++++------------
test/fftconvolve.cpp | 52 ++++++++++++++++++++++++++++++++++++++
3 files changed, 81 insertions(+), 37 deletions(-)
diff --git a/src/backend/cpu/fftconvolve.cpp b/src/backend/cpu/fftconvolve.cpp
index fe12740..7d2e97b 100644
--- a/src/backend/cpu/fftconvolve.cpp
+++ b/src/backend/cpu/fftconvolve.cpp
@@ -224,33 +224,29 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
dim_t fftScale = 1;
- af::dim4 packed_dims;
+ af::dim4 packed_dims(1, 1, 1, 1);
int fft_dims[baseDim];
af::dim4 sig_tmp_dims, sig_tmp_strides;
af::dim4 filter_tmp_dims, filter_tmp_strides;
// Pack both signal and filter on same memory array, this will ensure
- // better use of batched cuFFT capabilities
- for (dim_t k = 0; k < 4; k++) {
- if (k < baseDim)
- packed_dims[k] = nextpow2((unsigned)(sd[k] + fd[k] - 1));
- else if (k == baseDim)
- packed_dims[k] = sd[k] + fd[k];
- else
- packed_dims[k] = 1;
-
- unsigned df0 = nextpow2((unsigned)((int)ceil(sd[0] / 2.f) + fd[0] - 1));
-
- // Adjust dimension 0 size if ceil(signal/2.f)+filter-1 won't fit in
- // packed_dims[0]
- if (k == 0 && df0 == packed_dims[0])
- packed_dims[0] *= 2;
+ // better use of batched FFT capabilities
+ fft_dims[baseDim - 1] = nextpow2((unsigned)((int)ceil(sd[0] / 2.f) + fd[0] - 1));
+ packed_dims[0] = 2 * fft_dims[baseDim - 1];
+ fftScale *= fft_dims[baseDim - 1];
+
+ for (dim_t k = 1; k < baseDim; k++) {
+ packed_dims[k] = nextpow2((unsigned)(sd[k] + fd[k] - 1));
+ fft_dims[baseDim - k - 1] = packed_dims[k];
+ fftScale *= fft_dims[baseDim - k - 1];
+ }
- if (k < baseDim) {
- fft_dims[baseDim-k-1] = (k == 0) ? packed_dims[k] / 2 : packed_dims[k];
- fftScale *= fft_dims[baseDim-k-1];
- }
+ dim_t sbatch = 1, fbatch = 1;
+ for (int k = baseDim; k < 4; k++) {
+ sbatch *= sd[k];
+ fbatch *= fd[k];
}
+ packed_dims[baseDim] = (sbatch + fbatch);
Array<convT> packed = createEmptyArray<convT>(packed_dims);
convT *packed_ptr = packed.get();
diff --git a/src/backend/opencl/fftconvolve.cpp b/src/backend/opencl/fftconvolve.cpp
index 3bec3ba..06d2bc5 100644
--- a/src/backend/opencl/fftconvolve.cpp
+++ b/src/backend/opencl/fftconvolve.cpp
@@ -29,27 +29,23 @@ static const dim4 calcPackedSize(Array<T> const& i1,
const dim4 i1d = i1.dims();
const dim4 i2d = i2.dims();
- dim_t pd[4];
+ dim_t pd[4] = {1, 1, 1, 1};
// Pack both signal and filter on same memory array, this will ensure
// better use of batched cuFFT capabilities
- for (dim_t k = 0; k < 4; k++) {
- if (k == 0)
- pd[k] = nextpow2((unsigned)(i1d[k] + i2d[k] - 1)) / 2;
- else if (k < baseDim)
- pd[k] = nextpow2((unsigned)(i1d[k] + i2d[k] - 1));
- else if (k == baseDim)
- pd[k] = i1d[k] + i2d[k];
- else
- pd[k] = 1;
- }
+ pd[0] = nextpow2((unsigned)((int)ceil(i1d[0] / 2.f) + i2d[0] - 1));
- unsigned df0 = nextpow2((unsigned)((int)ceil(i1d[0] / 2.f) + i2d[0] - 1));
+ for (dim_t k = 1; k < baseDim; k++) {
+ pd[k] = nextpow2((unsigned)(i1d[k] + i2d[k] - 1));
+ }
- // Adjust dimension 0 size if ceil(signal/2.f)+filter-1 won't fit in
- // packed_dims[0]
- if (df0 == (pd[0]*2))
- pd[0] *= 2;
+ dim_t i1batch = 1;
+ dim_t i2batch = 1;
+ for (int k = baseDim; k < 4; k++) {
+ i1batch *= i1d[k];
+ i2batch *= i2d[k];
+ }
+ pd[baseDim] = (i1batch + i2batch);
return dim4(pd[0], pd[1], pd[2], pd[3]);
}
@@ -107,7 +103,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter, const bool
if (k < baseDim)
seqs.push_back(af_make_seq(0, pDims[k]-1, 1));
else if (k == baseDim)
- seqs.push_back(af_make_seq(0, sDims[k]-1, 1));
+ seqs.push_back(af_make_seq(0, pDims[k]-2, 1));
else
seqs.push_back(af_make_seq(0, 0, 1));
}
diff --git a/test/fftconvolve.cpp b/test/fftconvolve.cpp
index 537a94c..b8782af 100644
--- a/test/fftconvolve.cpp
+++ b/test/fftconvolve.cpp
@@ -588,3 +588,55 @@ TEST(FFTConvolve, Docs_Unified_Wrapper)
// 1.0000 1.0000 1.0000 0.5000
//![ex_image_convolve_3d]
}
+using namespace af;
+
+TEST(GFOR, fftConvolve2_MO)
+{
+ array A = randu(5, 5, 3);
+ array B = randu(5, 5, 3);
+ array K = randu(3, 3);
+
+ gfor(seq ii, 3) {
+ B(span, span, ii) = fftConvolve2(A(span, span, ii), K);
+ }
+
+ for (int ii = 0; ii < 3; ii++) {
+ array c_ii = fftConvolve2(A(span, span, ii), K);
+ array b_ii = B(span, span, ii);
+ ASSERT_EQ(max<double>(abs(c_ii - b_ii)) < 1E-5, true);
+ }
+}
+
+TEST(GFOR, fftConvolve2_1M)
+{
+ array A = randu(5, 5);
+ array B = randu(5, 5, 3);
+ array K = randu(3, 3, 3);
+
+ gfor(seq ii, 3) {
+ B(span, span, ii) = fftConvolve2(A, K(span, span, ii));
+ }
+
+ for (int ii = 0; ii < 3; ii++) {
+ array c_ii = fftConvolve2(A, K(span, span, ii));
+ array b_ii = B(span, span, ii);
+ ASSERT_EQ(max<double>(abs(c_ii - b_ii)) < 1E-5, true);
+ }
+}
+
+TEST(GFOR, fftConvolve2_MM)
+{
+ array A = randu(5, 5, 3);
+ array B = randu(5, 5, 3);
+ array K = randu(3, 3, 3);
+
+ gfor(seq ii, 3) {
+ B(span, span, ii) = fftConvolve2(A(span, span, ii), K(span, span, ii));
+ }
+
+ for (int ii = 0; ii < 3; ii++) {
+ array c_ii = fftConvolve2(A(span, span, ii), K(span, span, ii));
+ array b_ii = B(span, span, ii);
+ ASSERT_EQ(max<double>(abs(c_ii - b_ii)) < 1E-5, true);
+ }
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list