[clfft] 35/74: updating transform code to call nonsquare transposes

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:15 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.

commit 3eeeb4fcfb9f7442030ae047e5ed630379f3ee4b
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Mon Dec 7 16:26:17 2015 -0800

    updating transform code to call nonsquare transposes
---
 src/library/plan.cpp      |  2 +-
 src/library/transform.cpp | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index e495b71..1519ea0 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1885,7 +1885,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 			if (fftPlan->transflag) //Transpose for 2D
 			{
-                clfftStatus err;
+                clfftStatus err = CLFFT_SUCCESS;
 				if(fftPlan->gen == Transpose_GCN)
 					fftPlan->action = new FFTGeneratedTransposeGCNAction(plHandle, fftPlan, *commQueueFFT, err);
 				else if (fftPlan->gen == Transpose_SQUARE)
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index 0efad17..616472c 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -626,6 +626,28 @@ clfftStatus clfftEnqueueTransform(
 			// if transpose kernel, we will fall below
 			if (fftPlan->transflag && !(fftPlan->planTX)) break;
 
+			if ( (fftPlan->gen == Transpose_NONSQUARE ) &&
+				 (fftPlan->nonSquareKernelType == NON_SQUARE_TRANS_PARENT) )
+			{
+				cl_event stage1OutEvents = NULL;
+
+				OPENCL_V(clfftEnqueueTransform(fftPlan->planTX, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+					waitEvents, &stage1OutEvents, clInputBuffers, NULL, NULL),
+					_T("clfftEnqueueTransform stage1 failed"));
+
+				OPENCL_V(clfftEnqueueTransform(fftPlan->planTY, dir, numQueuesAndEvents, commQueues, 1,
+					&stage1OutEvents, outEvents, clInputBuffers, NULL, NULL),
+					_T("clfftEnqueueTransform stage1 failed"));
+				clReleaseEvent(stage1OutEvents);
+
+				if (fftRepo.pStatTimer)
+				{
+					fftRepo.pStatTimer->AddSample(plHandle, fftPlan, NULL, 0, NULL, std::vector< size_t >(), std::vector< size_t >());
+				}
+
+				return	CLFFT_SUCCESS;
+			}
+
 			cl_event rowOutEvents = NULL;
 
 #if defined(DEBUGGING)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list