[clfft] 07/23: fixing ECC issue in inverse C2R tranform; pow2 only

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Aug 18 16:08:20 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.

commit ccff4c9e438efd0f8d2e6d442a1c5284067f6e63
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Wed Aug 5 15:53:40 2015 -0500

    fixing ECC issue in inverse C2R tranform; pow2 only
---
 src/library/generator.stockham.cpp | 95 ++++++++++++++++++++++++++++++++++++++
 src/tests/accuracy_test_pow3.cpp   |  2 +-
 src/tests/accuracy_test_pow5.cpp   |  2 +-
 3 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index d145234..4f39e0c 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -877,6 +877,94 @@ namespace StockhamGenerator
 				}
 				return;
 			}
+
+			// block to rearrange writes of adjacent memory locations together
+			if(linearRegs && (flag == SR_WRITE) && (nextPass == NULL))
+			{
+				for(size_t r=0; r<radix; r++)
+				{
+					butterflyIndex = numPrev;
+
+					for(size_t i=0; i<numB; i++)
+					{
+						if(realSpecial && (nextPass == NULL) && (r > (radix/2)))
+							break;
+
+						if(realSpecial && (nextPass == NULL) && (r == radix/2) && (i != 0))
+							break;
+
+						if(realSpecial && (nextPass == NULL) && (r == radix/2) && (i == 0))
+							passStr += "\n\t}\n\tif( rw && !me)\n\t{";
+
+						for(size_t c=cStart; c<cEnd; c++) // component loop: 0 - real, 1 - imaginary
+						{
+							std::string tail;
+							std::string regIndex;
+							regIndex = "(*R";
+							std::string buffer;
+
+							// Write real & imag at once
+							if(interleaved && (component == SR_COMP_BOTH))
+							{
+								assert(bufferRe.compare(bufferIm) == 0); // Make sure Real & Imag buffer strings are same for interleaved data
+								buffer = bufferRe;
+								RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ")";
+								tail = "";
+							}
+							else
+							{
+								if(c == 0)
+								{
+									RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ").x";
+									buffer = bufferRe;
+									tail = interleaved ? ".x" : "";
+								}
+								else
+								{
+									RegBaseAndCountAndPos("", i*radix + r, regIndex); regIndex += ").y";
+									buffer = bufferIm;
+									tail = interleaved ? ".y" : "";
+								}
+							}
+
+							passStr += "\n\t";
+							passStr += buffer; passStr += "["; passStr += offset; passStr += " + ( ";
+
+							if( (numButterfly * workGroupSize) > algLS )
+							{
+								passStr += "(("; passStr += SztToStr(numButterfly);
+								passStr += "*me + "; passStr += SztToStr(butterflyIndex); passStr += ")/";
+								passStr += SztToStr(algLS); passStr += ")*"; passStr += SztToStr(algL); passStr += " + (";
+								passStr += SztToStr(numButterfly); passStr += "*me + "; passStr += SztToStr(butterflyIndex);
+								passStr += ")%"; passStr += SztToStr(algLS); passStr += " + ";
+							}
+							else
+							{
+								passStr += SztToStr(numButterfly); passStr += "*me + "; passStr += SztToStr(butterflyIndex);
+								passStr += " + ";
+							}
+
+							passStr += SztToStr(r*algLS); passStr += " )*"; passStr += SztToStr(stride); passStr += "]";
+							passStr += tail; passStr += " = "; passStr += regIndex;
+							if(scale != 1.0f) { passStr += " * "; passStr += FloatToStr(scale); passStr += FloatSuffix<PR>(); }
+							passStr += ";";
+
+							// Since we write real & imag at once, we break the loop
+							if(interleaved && (component == SR_COMP_BOTH))
+								break;
+						}
+
+						if(realSpecial && (nextPass == NULL) && (r == radix/2) && (i == 0))
+							passStr += "\n\t}\n\tif(rw)\n\t{";
+
+						butterflyIndex++;
+					}
+				}
+
+				return;
+			}
+
+
 			for(size_t i=0; i<numB; i++)
 			{
 				std::string regBaseCount = regBase;
@@ -1453,7 +1541,14 @@ namespace StockhamGenerator
 							else
 							{
 								std::string idxStr, idxStrRev;
+								if((length == 2) || ((length & (length - 1)) != 0))
+								{
 								idxStr += SztToStr(bid); idxStr += "*me +"; idxStr += oddpadd; idxStr += SztToStr(lid);
+								}
+								else
+								{								
+								idxStr += "me + "; idxStr += SztToStr(1 + length*(r%bid)/numCR); idxStr += oddpadd;
+								}
 								idxStrRev += SztToStr(length); idxStrRev += " - ("; idxStrRev += idxStr; idxStrRev += " )";
 
 								passStr += "\n\t";
diff --git a/src/tests/accuracy_test_pow3.cpp b/src/tests/accuracy_test_pow3.cpp
index 844e215..cf7777e 100644
--- a/src/tests/accuracy_test_pow3.cpp
+++ b/src/tests/accuracy_test_pow3.cpp
@@ -48,7 +48,7 @@ protected:
 	}
 };
 
-namespace power2
+namespace power3
 {
 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
 // ^^^^^^^^^^^^^^^^^^^^^^^ normal 1D ^^^^^^^^^^^^^^^^^^^^^^ //
diff --git a/src/tests/accuracy_test_pow5.cpp b/src/tests/accuracy_test_pow5.cpp
index c73f524..0861c76 100644
--- a/src/tests/accuracy_test_pow5.cpp
+++ b/src/tests/accuracy_test_pow5.cpp
@@ -48,7 +48,7 @@ protected:
 	}
 };
 
-namespace power2
+namespace power5
 {
 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
 // ^^^^^^^^^^^^^^^^^^^^^^^ normal 1D ^^^^^^^^^^^^^^^^^^^^^^ //

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list