[clfft] 22/128: fixing a C2R issue that produces global work-items not multiple of local
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:34 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 8db381c7b517fc703f719b61f9f8609148f21571
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Tue Aug 18 16:28:58 2015 -0500
fixing a C2R issue that produces global work-items not multiple of local
---
src/library/generator.copy.cpp | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp
index 3315e0d..b5d89ef 100644
--- a/src/library/generator.copy.cpp
+++ b/src/library/generator.copy.cpp
@@ -252,11 +252,15 @@ namespace CopyGenerator
// Setup registers
str += "\t"; str += RegBaseType<PR>(2); str += " R;\n\n";
+
+ size_t NtRounded64 = DivRoundingUp<size_t>(Nt,64) * 64;
+
if(!general)
{
// Setup variables
- str += "\tuint batch, mel, mel2;\n\t";
- str += "batch = me/"; str += SztToStr(Nt); str += ";\n\t";
+ str += "\tuint batch, meg, mel, mel2;\n\t";
+ str += "batch = me/"; str += SztToStr(NtRounded64); str += ";\n\t";
+ str += "meg = me%"; str += SztToStr(NtRounded64); str += ";\n\t";
str += "mel = me%"; str += SztToStr(Nt); str += ";\n\t";
str += "mel2 = ("; str += SztToStr(N); str += " - mel)%"; str += SztToStr(N); str += ";\n\n";
}
@@ -346,6 +350,7 @@ namespace CopyGenerator
}
else
{
+ str += "if(meg < "; str += SztToStr(Nt); str += ")\n\t{\n\t";
if(c2h)
{
if(inIlvd)
@@ -384,7 +389,7 @@ namespace CopyGenerator
{
str += "lwbOut[0] = R;\n\t";
str += "R.y = -R.y;\n\t";
- str += "lwbOut2[0] = R;\n\n";
+ str += "lwbOut2[0] = R;\n\t";
}
else
{
@@ -392,9 +397,10 @@ namespace CopyGenerator
str += "lwbOutIm[0] = R.y;\n\t";
str += "R.y = -R.y;\n\t";
str += "lwbOutRe2[0] = R.x;\n\t";
- str += "lwbOutIm2[0] = R.y;\n\n";
+ str += "lwbOutIm2[0] = R.y;\n\t";
}
}
+ str += "}\n\n";
}
str += "}\n";
@@ -463,7 +469,7 @@ clfftStatus FFTGeneratedCopyAction::getWorkSizes (std::vector<size_t> & globalWS
}
else
{
- count *= (1 + this->signature.fft_N[0]/2);
+ count *= (DivRoundingUp<size_t>((1 + this->signature.fft_N[0]/2), 64) * 64);
}
}
break;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list