[clfft] 82/128: fixing accuracy issues

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:41 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit ed99fdd2cf4f3ca96af013d34a11c083fd448fd1
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Mon Sep 21 19:41:21 2015 -0500

    fixing accuracy issues
---
 src/library/generator.transpose.square.cpp | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/library/generator.transpose.square.cpp b/src/library/generator.transpose.square.cpp
index dfc16c1..c2b5efa 100644
--- a/src/library/generator.transpose.square.cpp
+++ b/src/library/generator.transpose.square.cpp
@@ -131,6 +131,9 @@ static void OffsetCalc(std::stringstream& transKernel, const FFTKernelGenKeyPara
 // the generator that it wants the twiddle factors generated inside of the transpose
 static clfftStatus genTwiddleMath( const FFTKernelGenKeyParams& params, std::stringstream& transKernel, const std::string& dtComplex, bool fwd )
 {
+
+	clKernWrite( transKernel, 9 ) << std::endl;
+
     clKernWrite( transKernel, 9 ) << dtComplex << " Wm = TW3step( (t_gx_p*32 + lidx) * (t_gy_p*32 + lidy + loop*8) );" << std::endl;
 	clKernWrite( transKernel, 9 ) << dtComplex << " Wt = TW3step( (t_gy_p*32 + lidx) * (t_gx_p*32 + lidy + loop*8) );" << std::endl;
     clKernWrite( transKernel, 9 ) << dtComplex << " Tm, Tt;" << std::endl;
@@ -155,6 +158,7 @@ static clfftStatus genTwiddleMath( const FFTKernelGenKeyParams& params, std::str
     clKernWrite( transKernel, 9 ) << "tmpt.x = Tt.x;" << std::endl;
     clKernWrite( transKernel, 9 ) << "tmpt.y = Tt.y;" << std::endl;
 
+	clKernWrite( transKernel, 9 ) << std::endl;
 
     return CLFFT_SUCCESS;
 }
@@ -582,6 +586,9 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeSquareAction::
 					return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
 			}
 
+			// If requested, generate the Twiddle math to multiply constant values
+			if( params.fft_3StepTwiddle )
+				genTwiddleMath( params, transKernel, dtComplex, fwd );
 
 			clKernWrite(transKernel, 9) << "xy_s[index] = tmpm;" << std::endl;
 			clKernWrite(transKernel, 9) << "yx_s[index] = tmpt;" << std::endl;
@@ -598,20 +605,20 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeSquareAction::
 			{
 				case CLFFT_COMPLEX_INTERLEAVED:
 					clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16/reShapeFactor << ")<" << params.fft_N[0] << "&& idx<" << params.fft_N[0] << ")" << std::endl;			
-					clKernWrite(transKernel, 12) << "xy_s[index] = inputA[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx];" << std::endl;
+					clKernWrite(transKernel, 12) << "tmpm = inputA[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx];" << std::endl;
 					clKernWrite(transKernel, 9) << "if ((t_gy_p *" <<16*reShapeFactor << " + lidx)<" << params.fft_N[0] << " && (t_gx_p * " << 16*reShapeFactor << " + lidy + loop*" << 16/reShapeFactor << ")<" << params.fft_N[0] << ") " << std::endl;
-					clKernWrite(transKernel, 12) << "yx_s[index] = inputA[(lidy + loop*" << 16/reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx];" << std::endl;
+					clKernWrite(transKernel, 12) << "tmpt = inputA[(lidy + loop*" << 16/reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx];" << std::endl;
 
 					break;
 				case CLFFT_COMPLEX_PLANAR:
 					dtInput = dtPlanar;
 					dtOutput = dtPlanar;
 					clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16/reShapeFactor << ")<" << params.fft_N[0] << "&& idx<" << params.fft_N[0] << ") {" << std::endl;			
-					clKernWrite(transKernel, 12) << "xy_s[index].x = inputA_R[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx];" << std::endl;
-					clKernWrite(transKernel, 12) << "xy_s[index].y = inputA_I[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx]; }" << std::endl;
+					clKernWrite(transKernel, 12) << "tmpm.x = inputA_R[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx];" << std::endl;
+					clKernWrite(transKernel, 12) << "tmpm.y = inputA_I[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx]; }" << std::endl;
 					clKernWrite(transKernel, 9) << "if ((t_gy_p *" <<16*reShapeFactor << " + lidx)<" << params.fft_N[0] << " && (t_gx_p * " << 16*reShapeFactor << " + lidy + loop*" << 16/reShapeFactor << ")<" << params.fft_N[0] << ") {" << std::endl;
-					clKernWrite(transKernel, 12) << "yx_s[index].x = inputA_R[(lidy + loop*" << 16/reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx];" << std::endl;
-					clKernWrite(transKernel, 12) << "yx_s[index].y = inputA_I[(lidy + loop*" << 16/reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx]; }" << std::endl;
+					clKernWrite(transKernel, 12) << "tmpt.x = inputA_R[(lidy + loop*" << 16/reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx];" << std::endl;
+					clKernWrite(transKernel, 12) << "tmpt.y = inputA_I[(lidy + loop*" << 16/reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx]; }" << std::endl;
 
 					break;
 				case CLFFT_HERMITIAN_INTERLEAVED:
@@ -624,6 +631,13 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeSquareAction::
 			}
 
 
+			// If requested, generate the Twiddle math to multiply constant values
+			if( params.fft_3StepTwiddle )
+				genTwiddleMath( params, transKernel, dtComplex, fwd );
+
+			clKernWrite(transKernel, 9) << "xy_s[index] = tmpm;" << std::endl;
+			clKernWrite(transKernel, 9) << "yx_s[index] = tmpt;" << std::endl;
+
 			clKernWrite(transKernel, 9) << "}" << std::endl;
 			clKernWrite(transKernel, 3) << "}" << std::endl;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list