[clfft] 35/74: updating transform code to call nonsquare transposes
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:15 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.
commit 3eeeb4fcfb9f7442030ae047e5ed630379f3ee4b
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Mon Dec 7 16:26:17 2015 -0800
updating transform code to call nonsquare transposes
---
src/library/plan.cpp | 2 +-
src/library/transform.cpp | 22 ++++++++++++++++++++++
2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index e495b71..1519ea0 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1885,7 +1885,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
if (fftPlan->transflag) //Transpose for 2D
{
- clfftStatus err;
+ clfftStatus err = CLFFT_SUCCESS;
if(fftPlan->gen == Transpose_GCN)
fftPlan->action = new FFTGeneratedTransposeGCNAction(plHandle, fftPlan, *commQueueFFT, err);
else if (fftPlan->gen == Transpose_SQUARE)
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index 0efad17..616472c 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -626,6 +626,28 @@ clfftStatus clfftEnqueueTransform(
// if transpose kernel, we will fall below
if (fftPlan->transflag && !(fftPlan->planTX)) break;
+ if ( (fftPlan->gen == Transpose_NONSQUARE ) &&
+ (fftPlan->nonSquareKernelType == NON_SQUARE_TRANS_PARENT) )
+ {
+ cl_event stage1OutEvents = NULL;
+
+ OPENCL_V(clfftEnqueueTransform(fftPlan->planTX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+ waitEvents, &stage1OutEvents, clInputBuffers, NULL, NULL),
+ _T("clfftEnqueueTransform stage1 failed"));
+
+ OPENCL_V(clfftEnqueueTransform(fftPlan->planTY, dir, numQueuesAndEvents, commQueues, 1,
+ &stage1OutEvents, outEvents, clInputBuffers, NULL, NULL),
+ _T("clfftEnqueueTransform stage1 failed"));
+ clReleaseEvent(stage1OutEvents);
+
+ if (fftRepo.pStatTimer)
+ {
+ fftRepo.pStatTimer->AddSample(plHandle, fftPlan, NULL, 0, NULL, std::vector< size_t >(), std::vector< size_t >());
+ }
+
+ return CLFFT_SUCCESS;
+ }
+
cl_event rowOutEvents = NULL;
#if defined(DEBUGGING)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list