[clfft] 37/74: Bug fix for invalid work group size kernel generation.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:15 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.
commit 61cb3a6816efe9e19127221563f9b8503702f9e0
Author: santanu-thangaraj <t.santanu at gmail.com>
Date: Tue Dec 8 20:32:44 2015 +0530
Bug fix for invalid work group size kernel generation.
---
src/library/generator.transpose.nonsquare.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index 1c41936..bfd926d 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -491,6 +491,7 @@ static clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Sig
// more threads process each row.
size_t num_threads_processing_row = (256 / local_work_size_swap) * 16;
local_work_size_swap = num_lines_loaded * num_threads_processing_row;
+ local_work_size_swap = (local_work_size_swap > 256) ? 256 : local_work_size_swap;
clKernWrite(transKernel, 0) << std::endl;
@@ -1604,6 +1605,7 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
// more threads process each row.
size_t num_threads_processing_row = (256 / local_work_size_swap) * 16;
local_work_size_swap = num_lines_loaded * num_threads_processing_row;
+ local_work_size_swap = (local_work_size_swap > 256) ? 256 : local_work_size_swap;
global_item_size = local_work_size_swap * this->plan->batchsize;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list