[clfft] 31/74: bug fix for cases having the batch size of more than 1.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:14 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.

commit 900ef6d48c49833045384377b8769968cd296d37
Author: santanu-thangaraj <t.santanu at gmail.com>
Date:   Sat Dec 5 09:09:47 2015 +0530

    bug fix for cases having the batch size of more than 1.
---
 src/library/generator.transpose.nonsquare.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index 33992ba..20dd6dd 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -363,7 +363,7 @@ static void get_cycles(size_t *cycle_map, int num_reduced_row, int num_reduced_c
     delete[] is_swapped;
 }
 
-#define GLOBAL_MEM_FACTOR 2 //The amount of gloabl memory allocated for mtarix is(GLOBAL_MEM_FACTOR * Largest_dimension * size_of_elements)
+#define GLOBAL_MEM_FACTOR 2 //The amount of global memory allocated for matrix is(GLOBAL_MEM_FACTOR * Largest_dimension * size_of_elements)
 static clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature & params, std::string& strKernel, const size_t& lwSize, const size_t reShapeFactor)
 {
     strKernel.reserve(4096);
@@ -460,7 +460,7 @@ static clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Sig
         use_global_memory = 1;
         tmpBuffType = "global";
         /*Todo: add the appropriate logic for passing the required global memory*/
-        size_t global_mem_requirement_in_bytes = GLOBAL_MEM_FACTOR * (smaller_dim * 2) * input_elm_size_in_bytes;
+        size_t global_mem_requirement_in_bytes = avail_mem * input_elm_size_in_bytes;
 
     }
     
@@ -622,6 +622,10 @@ static clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Sig
             if (!use_global_memory) {
                 clKernWrite(transKernel, 3) << "__local " << dtInput << " tmp_tot_mem[" << avail_mem << "];" << std::endl;
             }
+            else
+            {
+                clKernWrite(transKernel, 3) << "tmp_tot_mem += " << avail_mem << " * g_index;" << std::endl; 
+            }
             clKernWrite(transKernel, 3) << tmpBuffType <<" " << dtInput << " *te = tmp_tot_mem;" << std::endl;
             clKernWrite(transKernel, 3) << tmpBuffType <<" " << dtInput << " *to = (tmp_tot_mem + " << (avail_mem >> 1) << ");" << std::endl;
             //Do not advance offset when precallback is set as the starting address of global buffer is needed

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list