[clfft] 83/107: updating the transform 3D logic real forward
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:39 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 0d7e198c30de4bc8c4a76e8b6228824934767aab
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Fri Apr 17 15:49:50 2015 -0500
updating the transform 3D logic real forward
---
src/library/plan.cpp | 3 +-
src/library/transform.cpp | 87 +++++++++++++++++++++++++++++++++++++++++------
2 files changed, 78 insertions(+), 12 deletions(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 747a472..112a542 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -609,9 +609,10 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
if ( IsPo2(fftPlan->length[0])
&& (fftPlan->length[0] <= 1048576/PrecisionWidth(fftPlan->precision)) ) break;
+ if ( clLengths[0]<=32 && clLengths[1]<=32) break;
ARG_CHECK(clLengths[0] <= Large1DThreshold);
- ARG_CHECK(clLengths[0]>=32 && clLengths[1]>=32);
+
size_t biggerDim = clLengths[0] > clLengths[1] ? clLengths[0] : clLengths[1];
size_t smallerDim = biggerDim == clLengths[0] ? clLengths[1] : clLengths[0];
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index 5e60876..8ca90ee 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -821,20 +821,85 @@ clfftStatus clfftEnqueueTransform(
#endif
if(fftPlan->inputLayout == CLFFT_REAL)
{
- cl_mem *tmp_local, *out_local;
+ if(fftPlan->planTX)
+ {
+ //First row
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+ waitEvents, &rowOutEvents, clInputBuffers, clOutputBuffers, localIntBuffer ),
+ _T("clfftEnqueueTransform for row failed"));
- tmp_local = (fftPlan->placeness==CLFFT_INPLACE) ? NULL : clOutputBuffers;
- out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
+ cl_mem *mybuffers;
- //deal with 2D row first
- OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_FORWARD, numQueuesAndEvents, commQueues, numWaitEvents,
- waitEvents, &rowOutEvents, clInputBuffers, tmp_local, localIntBuffer ),
- _T("clfftEnqueueTransform for 3D-XY row failed"));
+ if (fftPlan->placeness==CLFFT_INPLACE)
+ mybuffers = clInputBuffers;
+ else
+ mybuffers = clOutputBuffers;
- //deal with 1D Z column
- OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
- outEvents, out_local, NULL, localIntBuffer ),
- _T("clfftEnqueueTransform for 3D-Z column failed"));
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[ 0 ], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+ cl_event transXOutEvents = NULL;
+ cl_event colOutEvents = NULL;
+
+
+ //First transpose
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+ &transXOutEvents, mybuffers, &localIntBuffer, NULL ),
+ _T("clfftEnqueueTransform for first transpose failed"));
+ // clReleaseEvent(rowOutEvents);
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+
+ //Second Row transform
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+ &colOutEvents, &localIntBuffer, NULL, NULL ),
+ _T("clfftEnqueueTransform for second row failed"));
+ clReleaseEvent(transXOutEvents);
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+ //Second transpose
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planTY, dir, numQueuesAndEvents, commQueues, 1, &colOutEvents,
+ outEvents, &localIntBuffer, mybuffers, NULL ),
+ _T("clfftEnqueueTransform for second transpose failed"));
+ clReleaseEvent(colOutEvents);
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+ }
+ else
+ {
+ cl_mem *tmp_local, *out_local;
+
+ tmp_local = (fftPlan->placeness==CLFFT_INPLACE) ? NULL : clOutputBuffers;
+ out_local = (fftPlan->placeness==CLFFT_INPLACE) ? clInputBuffers : clOutputBuffers;
+
+ //deal with 2D row first
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_FORWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+ waitEvents, &rowOutEvents, clInputBuffers, tmp_local, localIntBuffer ),
+ _T("clfftEnqueueTransform for 3D-XY row failed"));
+
+ //deal with 1D Z column
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_FORWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+ outEvents, out_local, NULL, localIntBuffer ),
+ _T("clfftEnqueueTransform for 3D-Z column failed"));
+ }
}
else if(fftPlan->outputLayout == CLFFT_REAL)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list