[clfft] 37/74: Bug fix for invalid work group size kernel generation.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:15 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.

commit 61cb3a6816efe9e19127221563f9b8503702f9e0
Author: santanu-thangaraj <t.santanu at gmail.com>
Date:   Tue Dec 8 20:32:44 2015 +0530

    Bug fix for invalid work group size kernel generation.
---
 src/library/generator.transpose.nonsquare.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index 1c41936..bfd926d 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -491,6 +491,7 @@ static clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Sig
         // more threads process each row.
         size_t num_threads_processing_row = (256 / local_work_size_swap) * 16;
         local_work_size_swap = num_lines_loaded * num_threads_processing_row;
+        local_work_size_swap = (local_work_size_swap > 256) ? 256 : local_work_size_swap;
 
         clKernWrite(transKernel, 0) << std::endl;
 
@@ -1604,6 +1605,7 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
         // more threads process each row.
         size_t num_threads_processing_row = (256 / local_work_size_swap) * 16;
         local_work_size_swap = num_lines_loaded * num_threads_processing_row;
+        local_work_size_swap = (local_work_size_swap > 256) ? 256 : local_work_size_swap;
 
         global_item_size = local_work_size_swap * this->plan->batchsize;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list