[clfft] 97/107: fixing some more bugs
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:41 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit e7d49b0ca8d201de43f89f7e6d7ddde00f20e20e
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Wed Jun 24 19:55:54 2015 -0500
fixing some more bugs
---
src/library/generator.stockham.cpp | 20 ++++++++++++--------
src/library/plan.cpp | 10 ++++++++--
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 4016acc..93d073d 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -957,7 +957,7 @@ namespace StockhamGenerator
passStr += rType; passStr += " TR, TI;\n\t\t";
- if(realSpecial)
+ if(realSpecial && (flag == SR_TWMUL_3STEP))
{
if(fwd)
{
@@ -969,21 +969,21 @@ namespace StockhamGenerator
passStr += regImagIndex; passStr += ");\n\t\t";
passStr += "}\n\t\telse\n\t\t{\n\t\t";
-
- passStr += "TR = (W.x * "; passStr += regRealIndex; passStr += ") + (W.y * ";
+
+ passStr += "TR = (W.x * "; passStr += regRealIndex; passStr += ") + (W.y * ";
passStr += regImagIndex; passStr += ");\n\t\t";
- passStr += "TI = -(W.y * "; passStr += regRealIndex; passStr += ") + (W.x * ";
+ passStr += "TI = (W.y * "; passStr += regRealIndex; passStr += ") - (W.x * ";
passStr += regImagIndex; passStr += ");\n\t\t";
-
+
passStr += "}\n\t\t";
}
else
{
passStr += "if(t==0)\n\t\t{\n\t\t";
- passStr += "TR = (W.x * "; passStr += regRealIndex; passStr += ") + (W.y * ";
+ passStr += "TR = (W.x * "; passStr += regRealIndex; passStr += ") + (W.y * ";
passStr += regImagIndex; passStr += ");\n\t\t";
- passStr += "TI = -(W.y * "; passStr += regRealIndex; passStr += ") + (W.x * ";
+ passStr += "TI = (W.y * "; passStr += regRealIndex; passStr += ") - (W.x * ";
passStr += regImagIndex; passStr += ");\n\t\t";
passStr += "}\n\t\telse\n\t\t{\n\t\t";
@@ -2502,7 +2502,11 @@ namespace StockhamGenerator
inReal = (params.fft_inputLayout == CLFFT_REAL) ? true : false;
outReal = (params.fft_outputLayout == CLFFT_REAL) ? true : false;
- size_t large1D = params.fft_N[0] * params.fft_N[1];
+ size_t large1D = 0;
+ if(params.fft_realSpecial)
+ large1D = params.fft_N[0] * params.fft_realSpecial_Nr;
+ else
+ large1D = params.fft_N[0] * params.fft_N[1];
// Pragma
str += ClPragma<PR>();
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 62004e4..fcf0c3a 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -811,10 +811,16 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
if( (smallerDim % 64 == 0) || (biggerDim % 64 == 0) )
padding = 64;
+
if (fftPlan->tmpBufSize==0 )
{
- fftPlan->tmpBufSize = (smallerDim + padding) * biggerDim *
- fftPlan->batchsize * fftPlan->ElementSize() / 2;
+ size_t Nf = (1 + smallerDim/2) * biggerDim;
+ fftPlan->tmpBufSize = (smallerDim + padding) * biggerDim / 2;
+
+ if(fftPlan->tmpBufSize < Nf)
+ fftPlan->tmpBufSize = Nf;
+
+ fftPlan->tmpBufSize *= ( fftPlan->batchsize * fftPlan->ElementSize() );
for (size_t index=1; index < fftPlan->length.size(); index++)
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list