[clfft] 07/23: fixing ECC issue in inverse C2R tranform; pow2 only
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Aug 18 16:08:20 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.
commit ccff4c9e438efd0f8d2e6d442a1c5284067f6e63
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Wed Aug 5 15:53:40 2015 -0500
fixing ECC issue in inverse C2R tranform; pow2 only
---
src/library/generator.stockham.cpp | 95 ++++++++++++++++++++++++++++++++++++++
src/tests/accuracy_test_pow3.cpp | 2 +-
src/tests/accuracy_test_pow5.cpp | 2 +-
3 files changed, 97 insertions(+), 2 deletions(-)
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index d145234..4f39e0c 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -877,6 +877,94 @@ namespace StockhamGenerator
}
return;
}
+
+ // block to rearrange writes of adjacent memory locations together
+ if(linearRegs && (flag == SR_WRITE) && (nextPass == NULL))
+ {
+ for(size_t r=0; r<radix; r++)
+ {
+ butterflyIndex = numPrev;
+
+ for(size_t i=0; i<numB; i++)
+ {
+ if(realSpecial && (nextPass == NULL) && (r > (radix/2)))
+ break;
+
+ if(realSpecial && (nextPass == NULL) && (r == radix/2) && (i != 0))
+ break;
+
+ if(realSpecial && (nextPass == NULL) && (r == radix/2) && (i == 0))
+ passStr += "\n\t}\n\tif( rw && !me)\n\t{";
+
+ for(size_t c=cStart; c<cEnd; c++) // component loop: 0 - real, 1 - imaginary
+ {
+ std::string tail;
+ std::string regIndex;
+ regIndex = "(*R";
+ std::string buffer;
+
+ // Write real & imag at once
+ if(interleaved && (component == SR_COMP_BOTH))
+ {
+ assert(bufferRe.compare(bufferIm) == 0); // Make sure Real & Imag buffer strings are same for interleaved data
+ buffer = bufferRe;
+ RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ")";
+ tail = "";
+ }
+ else
+ {
+ if(c == 0)
+ {
+ RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ").x";
+ buffer = bufferRe;
+ tail = interleaved ? ".x" : "";
+ }
+ else
+ {
+ RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ").y";
+ buffer = bufferIm;
+ tail = interleaved ? ".y" : "";
+ }
+ }
+
+ passStr += "\n\t";
+ passStr += buffer; passStr += "["; passStr += offset; passStr += " + ( ";
+
+ if( (numButterfly * workGroupSize) > algLS )
+ {
+ passStr += "(("; passStr += SztToStr(numButterfly);
+ passStr += "*me + "; passStr += SztToStr(butterflyIndex); passStr += ")/";
+ passStr += SztToStr(algLS); passStr += ")*"; passStr += SztToStr(algL); passStr += " + (";
+ passStr += SztToStr(numButterfly); passStr += "*me + "; passStr += SztToStr(butterflyIndex);
+ passStr += ")%"; passStr += SztToStr(algLS); passStr += " + ";
+ }
+ else
+ {
+ passStr += SztToStr(numButterfly); passStr += "*me + "; passStr += SztToStr(butterflyIndex);
+ passStr += " + ";
+ }
+
+ passStr += SztToStr(r*algLS); passStr += " )*"; passStr += SztToStr(stride); passStr += "]";
+ passStr += tail; passStr += " = "; passStr += regIndex;
+ if(scale != 1.0f) { passStr += " * "; passStr += FloatToStr(scale); passStr += FloatSuffix<PR>(); }
+ passStr += ";";
+
+ // Since we write real & imag at once, we break the loop
+ if(interleaved && (component == SR_COMP_BOTH))
+ break;
+ }
+
+ if(realSpecial && (nextPass == NULL) && (r == radix/2) && (i == 0))
+ passStr += "\n\t}\n\tif(rw)\n\t{";
+
+ butterflyIndex++;
+ }
+ }
+
+ return;
+ }
+
+
for(size_t i=0; i<numB; i++)
{
std::string regBaseCount = regBase;
@@ -1453,7 +1541,14 @@ namespace StockhamGenerator
else
{
std::string idxStr, idxStrRev;
+ if((length == 2) || ((length & (length - 1)) != 0))
+ {
idxStr += SztToStr(bid); idxStr += "*me +"; idxStr += oddpadd; idxStr += SztToStr(lid);
+ }
+ else
+ {
+ idxStr += "me + "; idxStr += SztToStr(1 + length*(r%bid)/numCR); idxStr += oddpadd;
+ }
idxStrRev += SztToStr(length); idxStrRev += " - ("; idxStrRev += idxStr; idxStrRev += " )";
passStr += "\n\t";
diff --git a/src/tests/accuracy_test_pow3.cpp b/src/tests/accuracy_test_pow3.cpp
index 844e215..cf7777e 100644
--- a/src/tests/accuracy_test_pow3.cpp
+++ b/src/tests/accuracy_test_pow3.cpp
@@ -48,7 +48,7 @@ protected:
}
};
-namespace power2
+namespace power3
{
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
// ^^^^^^^^^^^^^^^^^^^^^^^ normal 1D ^^^^^^^^^^^^^^^^^^^^^^ //
diff --git a/src/tests/accuracy_test_pow5.cpp b/src/tests/accuracy_test_pow5.cpp
index c73f524..0861c76 100644
--- a/src/tests/accuracy_test_pow5.cpp
+++ b/src/tests/accuracy_test_pow5.cpp
@@ -48,7 +48,7 @@ protected:
}
};
-namespace power2
+namespace power5
{
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
// ^^^^^^^^^^^^^^^^^^^^^^^ normal 1D ^^^^^^^^^^^^^^^^^^^^^^ //
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list