[clfft] 46/74: post call back support updated with some changes.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:16 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.

commit 888518ae8c363c3357e2bdc9dbcb743a76617fff
Author: santanu-thangaraj <t.santanu at gmail.com>
Date:   Thu Dec 17 15:53:28 2015 +0530

    post call back support updated with some changes.
---
 src/library/generator.transpose.nonsquare.cpp | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index 2d94b30..eaa370a 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -435,17 +435,14 @@ static clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Sig
 
         clKernWrite(transKernel, 0) << "};" << std::endl;
         /*cycle_map[0] + 2, + 2 is added for post callback table appending*/
-        clKernWrite(transKernel, 0) << "__constant int cycle_stat["<< cycle_map[0] + 2 <<"][2] = {" << std::endl;
-        for (int i = 0; i < cycle_map[0]; i++)
+        size_t num_cycles_minus_1 = cycle_map[0] - 1;
+
+        clKernWrite(transKernel, 0) << "__constant int cycle_stat["<< cycle_map[0] <<"][2] = {" << std::endl;
+        for (int i = 0; i < num_cycles_minus_1; i++)
         {
             clKernWrite(transKernel, 0) << "{  " << cycle_stat[i * 2] << ",  " << cycle_stat[i * 2 + 1] << "}," << std::endl;
         }
-
-        /*Appending cycle_stat table for touching corner elements for post call back*/
-        size_t num_cycles_minus_1 = cycle_map[0] - 1;
-        size_t lenght_of_swap_table = cycle_stat[num_cycles_minus_1 * 2 + 1];
-        clKernWrite(transKernel, 0) << "{  " << lenght_of_swap_table + 1 << ",  " << lenght_of_swap_table + 1 << "}," << std::endl;
-        clKernWrite(transKernel, 0) << "{  " << lenght_of_swap_table + 2 << ",  " << lenght_of_swap_table + 2 << "}," << std::endl;
+        clKernWrite(transKernel, 0) << "{  " << cycle_stat[num_cycles_minus_1 * 2] << ",  " << (cycle_stat[num_cycles_minus_1 * 2 + 1] + 2)<< "}," << std::endl;
 
         clKernWrite(transKernel, 0) << "};" << std::endl;
 
@@ -531,8 +528,7 @@ static clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Sig
 
         clKernWrite(transKernel, 3) << "size_t g_index = get_group_id(0);" << std::endl;
 
-        /*cycle_map[0] + 2, + 2 is added for post callback table appending*/
-        clKernWrite(transKernel, 3) << "const size_t numGroupsY_1 = "<<(cycle_map[0] + 2)* num_grps_pro_row<<" ;" << std::endl;
+        clKernWrite(transKernel, 3) << "const size_t numGroupsY_1 = "<< cycle_map[0] * num_grps_pro_row<<" ;" << std::endl;
         for (int i = 2; i < params.fft_DataDim - 1; i++)
         {
             clKernWrite(transKernel, 3) << "const size_t numGroupsY_" << i << " = numGroupsY_" << i - 1 << " * " << params.fft_N[i] << ";" << std::endl;
@@ -1459,8 +1455,7 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
         /* The memory required by cycle_map cannot exceed 2 times row*col by design*/
         get_cycles(cycle_map, num_reduced_row, num_reduced_col);
 
-        /*cycle_map[0] + 2, + 2 is added for post callback table appending*/
-        global_item_size = local_work_size_swap * num_grps_pro_row * (cycle_map[0] + 2) * this->plan->batchsize;
+        global_item_size = local_work_size_swap * num_grps_pro_row * cycle_map[0] * this->plan->batchsize;
 
         for (int i = 2; i < this->signature.fft_DataDim - 1; i++)
         {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list