[clfft] 18/74: full working version 1.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:13 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.

commit e8800e22293880f58202882185d2caa556900eb9
Author: santanu-thangaraj <t.santanu at gmail.com>
Date:   Tue Nov 24 19:56:44 2015 +0530

    full working version 1.
---
 src/library/generator.transpose.nonsquare.cpp | 8 ++++----
 src/library/plan.cpp                          | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index 252059c..93f727f 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -1220,7 +1220,7 @@ clKernWrite(transKernel, 3) << "}" << std::endl; // end else
                 {
                 case CLFFT_COMPLEX_INTERLEAVED:
                     clKernWrite(transKernel, 12) << "inputA[loop * " << smaller_dim << " +  p * " << smaller_dim << " + j] = loc_tot_mem[p * " << smaller_dim << " + j];" << std::endl;
-                    clKernWrite(transKernel, 12) << "inputA[" << smaller_dim  * smaller_dim << "+ loop * " << smaller_dim << " +  p * " << smaller_dim << " + j] = loc_tot_mem[(" << num_lines_loaded << " + p)* " << smaller_dim << " + j] = ;" << std::endl;
+                    clKernWrite(transKernel, 12) << "inputA[" << smaller_dim  * smaller_dim << "+ loop * " << smaller_dim << " +  p * " << smaller_dim << " + j] = loc_tot_mem[(" << num_lines_loaded << " + p)* " << smaller_dim << " + j];" << std::endl;
                     break;
                 case CLFFT_COMPLEX_PLANAR:
                     clKernWrite(transKernel, 12) << "inputA_R[loop * " << smaller_dim << " +  p * " << smaller_dim << " + j] = loc_tot_mem[p * " << smaller_dim << " + j].x;" << std::endl;
@@ -1241,17 +1241,17 @@ clKernWrite(transKernel, 3) << "}" << std::endl; // end else
         clKernWrite(transKernel, 3) << "loc_swap_ptr[0] = te;" << std::endl;
         clKernWrite(transKernel, 3) << "loc_swap_ptr[1] = to;" << std::endl;
 
-        clKernWrite(transKernel, 3) << "int swap_inx;" << std::endl;
+        clKernWrite(transKernel, 3) << "int swap_inx = 0;" << std::endl;
         clKernWrite(transKernel, 3) << "for (int loop = 0; loop < " << num_swaps << "; loop ++){" << std::endl;
         clKernWrite(transKernel, 6) << "swap_inx = 1 - swap_inx;" << std::endl;
         switch (params.fft_inputLayout)
         {
         case CLFFT_COMPLEX_INTERLEAVED:
         case CLFFT_REAL:   
-            clKernWrite(transKernel, 6) << "swap(inputA, loc_swap_ptr[swap_inx], loc_swap_ptr[1 - swap_inx], swap_table[0], swap_table[1], swap_table[2]);" << std::endl;
+            clKernWrite(transKernel, 6) << "swap(inputA, loc_swap_ptr[swap_inx], loc_swap_ptr[1 - swap_inx], swap_table[loop][0], swap_table[loop][1], swap_table[loop][2]);" << std::endl;
             break;
         case CLFFT_COMPLEX_PLANAR:
-            clKernWrite(transKernel, 6) << "swap(inputA_R, inputA_I, loc_swap_ptr[swap_inx], loc_swap_ptr[1 - swap_inx], swap_table[0], swap_table[1], swap_table[2]);" << std::endl;
+            clKernWrite(transKernel, 6) << "swap(inputA_R, inputA_I, loc_swap_ptr[swap_inx], loc_swap_ptr[1 - swap_inx], swap_table[loop][0], swap_table[loop][1], swap_table[loop][2]);" << std::endl;
             break;
         
         }
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 75abcf3..5f2b0a8 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1886,8 +1886,8 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
                     if (!test_performed)
                     {
                         //test_performed = 1;
-                        fftPlan->length[1] = 64;// fftPlan->length[1];
-                        fftPlan->length[0] = fftPlan->length[1] * 2;
+                        fftPlan->length[0] = 64;// fftPlan->length[1];
+                        fftPlan->length[1] = fftPlan->length[0] * 2;
                         fftPlan->action = new FFTGeneratedTransposeNonSquareAction(plHandle, fftPlan, *commQueueFFT, err);
                         OPENCL_V(err, "FFTGeneratedTransposeNonSquareAction() failed");
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list