[clfft] 22/128: fixing a C2R issue that produces global work-items not multiple of local

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:34 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 8db381c7b517fc703f719b61f9f8609148f21571
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Tue Aug 18 16:28:58 2015 -0500

    fixing a C2R issue that produces global work-items not multiple of local
---
 src/library/generator.copy.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp
index 3315e0d..b5d89ef 100644
--- a/src/library/generator.copy.cpp
+++ b/src/library/generator.copy.cpp
@@ -252,11 +252,15 @@ namespace CopyGenerator
 			// Setup registers
 			str += "\t"; str += RegBaseType<PR>(2); str += " R;\n\n";
 
+
+			size_t NtRounded64 = DivRoundingUp<size_t>(Nt,64) * 64;
+
 			if(!general)
 			{
 				// Setup variables
-				str += "\tuint batch, mel, mel2;\n\t";
-				str += "batch = me/"; str += SztToStr(Nt); str += ";\n\t";
+				str += "\tuint batch, meg, mel, mel2;\n\t";
+				str += "batch = me/"; str += SztToStr(NtRounded64); str += ";\n\t";
+				str += "meg = me%"; str += SztToStr(NtRounded64); str += ";\n\t";
 				str += "mel = me%"; str += SztToStr(Nt); str += ";\n\t";
 				str += "mel2 = ("; str += SztToStr(N); str += " - mel)%"; str += SztToStr(N); str += ";\n\n";
 			}
@@ -346,6 +350,7 @@ namespace CopyGenerator
 			}
 			else
 			{
+				str += "if(meg < "; str += SztToStr(Nt); str += ")\n\t{\n\t";
 				if(c2h)
 				{
 					if(inIlvd)
@@ -384,7 +389,7 @@ namespace CopyGenerator
 					{
 						str += "lwbOut[0] = R;\n\t";
 						str += "R.y = -R.y;\n\t";
-						str += "lwbOut2[0] = R;\n\n";
+						str += "lwbOut2[0] = R;\n\t";
 					}
 					else
 					{
@@ -392,9 +397,10 @@ namespace CopyGenerator
 						str += "lwbOutIm[0] = R.y;\n\t";
 						str += "R.y = -R.y;\n\t";
 						str += "lwbOutRe2[0] = R.x;\n\t";
-						str += "lwbOutIm2[0] = R.y;\n\n";
+						str += "lwbOutIm2[0] = R.y;\n\t";
 					}
 				}
+				str += "}\n\n";
 			}
 
 			str += "}\n";
@@ -463,7 +469,7 @@ clfftStatus FFTGeneratedCopyAction::getWorkSizes (std::vector<size_t> & globalWS
 				}
 				else
 				{
-					count *= (1 + this->signature.fft_N[0]/2); 
+					count *= (DivRoundingUp<size_t>((1 + this->signature.fft_N[0]/2), 64) * 64); 
 				}
 			}
 			break;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list