[clfft] 27/109: NV workaround. Using the command queue to get the device id

Jérôme Kieffer kieffer-guest at moszumanska.debian.org
Wed May 20 07:29:22 UTC 2015


This is an automated email from the git hooks/post-receive script.

kieffer-guest pushed a commit to branch debian
in repository clfft.

commit cdb291951cde68df711ded457a7c627ae1693e9c
Author: AMD-FirePro <FirePro.Developers at amd.com>
Date:   Mon Oct 14 10:23:35 2013 +0100

    NV workaround. Using the command queue to get the device id
---
 src/library/generator.copy.cpp      |  2 +-
 src/library/generator.stockham.cpp  | 12 +++++++++---
 src/library/generator.transpose.cpp |  2 +-
 src/library/plan.cpp                | 14 +++++++-------
 src/library/plan.h                  |  4 ++--
 5 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp
index e839ed8..5afe3cd 100644
--- a/src/library/generator.copy.cpp
+++ b/src/library/generator.copy.cpp
@@ -445,7 +445,7 @@ clfftStatus FFTPlan::GetMax1DLengthPvt<Copy> (size_t * longest) const
 using namespace CopyGenerator;
 
 template<>
-clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo ) const
+clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
 {
     FFTKernelGenKeyParams params;
     OPENCL_V( this->GetKernelGenKeyPvt<Copy> (params), _T("GetKernelGenKey() failed!") );
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index fdf3644..a1b7969 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -3229,11 +3229,17 @@ clfftStatus FFTPlan::GetMax1DLengthPvt<Stockham> (size_t * longest) const
 }
 
 template<>
-clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
+clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
 {
     FFTKernelGenKeyParams params;
     OPENCL_V( this->GetKernelGenKeyPvt<Stockham> (params), _T("GetKernelGenKey() failed!") );
 
+    cl_int status = CL_SUCCESS;
+    cl_device_id Device = NULL;
+    status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL);
+
+    OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
+
 	std::string programCode;
 	Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
 	switch(pr)
@@ -3241,12 +3247,12 @@ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
 	case P_SINGLE:
 		{
 			Kernel<P_SINGLE> kernel(params);
-			kernel.GenerateKernel(programCode, devices[0]);
+			kernel.GenerateKernel(programCode, Device);
 		} break;
 	case P_DOUBLE:
 		{
 			Kernel<P_DOUBLE> kernel(params);
-			kernel.GenerateKernel(programCode, devices[0]);
+			kernel.GenerateKernel(programCode, Device);
 		} break;
 	}
 
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index 3b12504..2c83b8a 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -822,7 +822,7 @@ clfftStatus FFTPlan::GetWorkSizesPvt<Transpose> (std::vector<size_t> & globalWS,
 //	OpenCL does not take unicode strings as input, so this routine returns only ASCII strings
 //	Feed this generator the FFTPlan, and it returns the generated program as a string
 template<>
-clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo ) const
+clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
 {
 	FFTKernelGenKeyParams params;
 	OPENCL_V( this->GetKernelGenKeyPvt<Transpose> (params), _T("GetKernelGenKey() failed!") );
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index ec87b2d..61ae195 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -543,7 +543,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 	if(fftPlan->gen == Copy)
 	{
-		OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) );
+		OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) );
 		OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
 		fftPlan->baked		= true;
 		return	CLFFT_SUCCESS;
@@ -1505,7 +1505,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				//break;
 				if (fftPlan->transflag) //Transpose for 2D
 				{
-					OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateTransposeProgram() failed" ) );
+					OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateTransposeProgram() failed" ) );
 					OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
 
 					fftPlan->baked		= true;
@@ -2445,7 +2445,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 	}
 
 	//	For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels
-	OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) );
+	OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) );
 
 	//	For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels
 	OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
@@ -3265,13 +3265,13 @@ clfftStatus  FFTPlan::GetKernelGenKey (FFTKernelGenKeyParams & params) const
 	}
 }
 
-clfftStatus  FFTPlan::GenerateKernel (FFTRepo & fftRepo) const
+clfftStatus  FFTPlan::GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const
 {
 	switch(gen)
 	{
-	case Stockham:		return GenerateKernelPvt<Stockham>(fftRepo);
-	case Transpose:		return GenerateKernelPvt<Transpose>(fftRepo);
-	case Copy:			return GenerateKernelPvt<Copy>(fftRepo);
+	case Stockham:		return GenerateKernelPvt<Stockham>(fftRepo, commQueueFFT);
+	case Transpose:		return GenerateKernelPvt<Transpose>(fftRepo, commQueueFFT);
+	case Copy:			return GenerateKernelPvt<Copy>(fftRepo, commQueueFFT);
 	default:			assert(false); return CLFFT_NOTIMPLEMENTED;
 	}
 }
diff --git a/src/library/plan.h b/src/library/plan.h
index ec96fad..11319e7 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -202,7 +202,7 @@ class	FFTPlan
 	clfftStatus GetKernelGenKeyPvt (FFTKernelGenKeyParams & params) const;
 
 	template <clfftGenerators G>
-	clfftStatus GenerateKernelPvt (FFTRepo& fftRepo) const;
+	clfftStatus GenerateKernelPvt (FFTRepo& fftRepo,  const cl_command_queue commQueueFFT ) const;
 
 	template <clfftGenerators G>
 	clfftStatus GetMax1DLengthPvt (size_t *longest ) const;
@@ -338,7 +338,7 @@ public:
 
 	clfftStatus GetWorkSizes (std::vector<size_t> & globalws, std::vector<size_t> & localws) const;
 	clfftStatus GetKernelGenKey (FFTKernelGenKeyParams & params) const;
-	clfftStatus GenerateKernel (FFTRepo & fftRepo) const;
+	clfftStatus GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const;
 	clfftStatus GetMax1DLength (size_t *longest ) const;
 
 	void ResetBinarySizes();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list