[clfft] 27/109: NV workaround. Using the command queue to get the device id
Jérôme Kieffer
kieffer-guest at moszumanska.debian.org
Wed May 20 07:29:22 UTC 2015
This is an automated email from the git hooks/post-receive script.
kieffer-guest pushed a commit to branch debian
in repository clfft.
commit cdb291951cde68df711ded457a7c627ae1693e9c
Author: AMD-FirePro <FirePro.Developers at amd.com>
Date: Mon Oct 14 10:23:35 2013 +0100
NV workaround. Using the command queue to get the device id
---
src/library/generator.copy.cpp | 2 +-
src/library/generator.stockham.cpp | 12 +++++++++---
src/library/generator.transpose.cpp | 2 +-
src/library/plan.cpp | 14 +++++++-------
src/library/plan.h | 4 ++--
5 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp
index e839ed8..5afe3cd 100644
--- a/src/library/generator.copy.cpp
+++ b/src/library/generator.copy.cpp
@@ -445,7 +445,7 @@ clfftStatus FFTPlan::GetMax1DLengthPvt<Copy> (size_t * longest) const
using namespace CopyGenerator;
template<>
-clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo ) const
+clfftStatus FFTPlan::GenerateKernelPvt<Copy>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
{
FFTKernelGenKeyParams params;
OPENCL_V( this->GetKernelGenKeyPvt<Copy> (params), _T("GetKernelGenKey() failed!") );
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index fdf3644..a1b7969 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -3229,11 +3229,17 @@ clfftStatus FFTPlan::GetMax1DLengthPvt<Stockham> (size_t * longest) const
}
template<>
-clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
+clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
{
FFTKernelGenKeyParams params;
OPENCL_V( this->GetKernelGenKeyPvt<Stockham> (params), _T("GetKernelGenKey() failed!") );
+ cl_int status = CL_SUCCESS;
+ cl_device_id Device = NULL;
+ status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL);
+
+ OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) );
+
std::string programCode;
Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE;
switch(pr)
@@ -3241,12 +3247,12 @@ clfftStatus FFTPlan::GenerateKernelPvt<Stockham>(FFTRepo& fftRepo ) const
case P_SINGLE:
{
Kernel<P_SINGLE> kernel(params);
- kernel.GenerateKernel(programCode, devices[0]);
+ kernel.GenerateKernel(programCode, Device);
} break;
case P_DOUBLE:
{
Kernel<P_DOUBLE> kernel(params);
- kernel.GenerateKernel(programCode, devices[0]);
+ kernel.GenerateKernel(programCode, Device);
} break;
}
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index 3b12504..2c83b8a 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -822,7 +822,7 @@ clfftStatus FFTPlan::GetWorkSizesPvt<Transpose> (std::vector<size_t> & globalWS,
// OpenCL does not take unicode strings as input, so this routine returns only ASCII strings
// Feed this generator the FFTPlan, and it returns the generated program as a string
template<>
-clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo ) const
+clfftStatus FFTPlan::GenerateKernelPvt<Transpose> ( FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const
{
FFTKernelGenKeyParams params;
OPENCL_V( this->GetKernelGenKeyPvt<Transpose> (params), _T("GetKernelGenKey() failed!") );
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index ec87b2d..61ae195 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -543,7 +543,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
if(fftPlan->gen == Copy)
{
- OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) );
+ OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) );
OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
fftPlan->baked = true;
return CLFFT_SUCCESS;
@@ -1505,7 +1505,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
//break;
if (fftPlan->transflag) //Transpose for 2D
{
- OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateTransposeProgram() failed" ) );
+ OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateTransposeProgram() failed" ) );
OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
fftPlan->baked = true;
@@ -2445,7 +2445,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
}
// For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels
- OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) );
+ OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) );
// For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels
OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) );
@@ -3265,13 +3265,13 @@ clfftStatus FFTPlan::GetKernelGenKey (FFTKernelGenKeyParams & params) const
}
}
-clfftStatus FFTPlan::GenerateKernel (FFTRepo & fftRepo) const
+clfftStatus FFTPlan::GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const
{
switch(gen)
{
- case Stockham: return GenerateKernelPvt<Stockham>(fftRepo);
- case Transpose: return GenerateKernelPvt<Transpose>(fftRepo);
- case Copy: return GenerateKernelPvt<Copy>(fftRepo);
+ case Stockham: return GenerateKernelPvt<Stockham>(fftRepo, commQueueFFT);
+ case Transpose: return GenerateKernelPvt<Transpose>(fftRepo, commQueueFFT);
+ case Copy: return GenerateKernelPvt<Copy>(fftRepo, commQueueFFT);
default: assert(false); return CLFFT_NOTIMPLEMENTED;
}
}
diff --git a/src/library/plan.h b/src/library/plan.h
index ec96fad..11319e7 100644
--- a/src/library/plan.h
+++ b/src/library/plan.h
@@ -202,7 +202,7 @@ class FFTPlan
clfftStatus GetKernelGenKeyPvt (FFTKernelGenKeyParams & params) const;
template <clfftGenerators G>
- clfftStatus GenerateKernelPvt (FFTRepo& fftRepo) const;
+ clfftStatus GenerateKernelPvt (FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const;
template <clfftGenerators G>
clfftStatus GetMax1DLengthPvt (size_t *longest ) const;
@@ -338,7 +338,7 @@ public:
clfftStatus GetWorkSizes (std::vector<size_t> & globalws, std::vector<size_t> & localws) const;
clfftStatus GetKernelGenKey (FFTKernelGenKeyParams & params) const;
- clfftStatus GenerateKernel (FFTRepo & fftRepo) const;
+ clfftStatus GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const;
clfftStatus GetMax1DLength (size_t *longest ) const;
void ResetBinarySizes();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list