[clfft] 03/13: add logit to split 1d to 2d when inplace is strictly required
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Jun 5 01:29:37 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/master
in repository clfft.
commit 194ae3423cf9f2ae7bdff0295f152bf08878efd8
Author: Timmy <timmy.liu at amd.com>
Date: Fri Apr 29 18:51:44 2016 -0500
add logit to split 1d to 2d when inplace is strictly required
---
src/library/plan.cpp | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 150 insertions(+), 1 deletion(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 5e4de36..10d5072 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -35,6 +35,143 @@ const std::string beginning_of_binary( "<[
const std::string end_of_binary( "<[�_I_may_be_a_sorry_case,_but_I_don't_write_jokes_in_base_13_�]>" );
const std::string end_of_file( "<[�_You're_off_the_edge_of_the_map,_mate._Here_there_be_monsters_�]>" );
+static bool pow235(size_t num, size_t &pow2, size_t &pow3, size_t &pow5)
+{
+ //a helper function to decide if a number is only radix 2, 3 and 5
+ if (num % 2 != 0 && num % 3 != 0 && num % 5 != 0)
+ return false;
+
+ while (num > 1)
+ {
+ if (num % 5 == 0)
+ {
+ num /= 5;
+ pow5++;
+ continue;
+ }
+ if (num % 3 == 0)
+ {
+ num /= 3;
+ pow3++;
+ continue;
+ }
+ if (num % 2 == 0)
+ {
+ num /= 2;
+ pow2++;
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
+static bool split1D_for_inplace(size_t num, vector<vector<size_t> > &splitNums, clfftPrecision precision)
+{
+ /* a helper function to split big 1D to friendly 2D sizes for inplace transpose kernels
+ currently only radix 2, 3 and 5 are supported
+ the algorithm looks for ways to split up the 1D into 2D such that one of the dimensions is multiples of the other dimension.
+ And this mupliple is radix2, 3 or 5.
+ each splited dimentsion should be further splited until that it is smaller than 4096
+ */
+ size_t threshold = 4096;
+ if (precision == CLFFT_DOUBLE)
+ threshold = 2048;
+ if (num <= threshold)
+ return true;
+ if (num % 2 != 0 && num % 3 != 0 && num % 5 != 0)
+ return false;
+
+ //let's figure out pow2, pow3 and pow5 such that num = 2^pow2 * 3^pow3 * 5^pow5
+ size_t pow2, pow3, pow5;
+ pow2 = pow3 = pow5 = 0;
+ bool status = pow235(num, pow2, pow3, pow5);
+ if (!status)
+ return status;
+
+ size_t divide_factor;
+ if (pow2 % 2 != 0)
+ {
+ //pow2 is odd
+ if (pow3 % 2 != 0)
+ {
+ //pow2 and pow3 are odd
+ if (pow5 % 2 != 0)
+ {
+ //pow2, pow3 and pow5 are odd
+ //one dimension is 2*3*5 = 30 times bigger than the other dimension
+ divide_factor = 2 * 3 * 5;
+ }
+ else
+ {
+ //pow2 and pow3 are odd, pow 5 is even
+ //one dimension is 2*3 = 6 times bigger than the other dimension
+ divide_factor = 2 * 3;
+ }
+ }
+ else
+ {
+ //pow2 is odd, pow3 is even
+ if (pow5 % 2 != 0)
+ {
+ //pow2, pow5 are odd pow3 is eve
+ divide_factor = 2 * 5;
+ }
+ else
+ {
+ //pow2 is odd, pow3 and pow5 are even
+ divide_factor = 2;
+ }
+
+ }
+ }
+ else
+ {
+ //pow2 is even
+ if (pow3 % 2 != 0)
+ {
+ //pow3 is odd pow2 is even
+ if (pow5 % 2 != 0)
+ {
+ //pow2 is even, pow3 and pow5 are odd
+ divide_factor = 3 * 5;
+ }
+ else
+ {
+ //pow2 and pow5 are even, pow3 is odd
+ divide_factor = 3;
+ }
+ }
+ else
+ {
+ //pow2 and are even
+ if (pow5 % 2 != 0)
+ {
+ //pow5 is odd pow2 pow3 is eve
+ divide_factor = 5;
+ }
+ else
+ {
+ //all even
+ divide_factor = 1;
+ }
+
+ }
+ }
+
+ num = num / divide_factor;
+ //now the remaining num should have even number of pow2, pow3 and pow5 and we can do sqrt
+ size_t temp = sqrt(num);
+ vector<size_t> splitVec;
+ splitVec.push_back(temp*divide_factor);
+ splitVec.push_back(temp);
+ splitNums.push_back(splitVec);
+
+ status = status && split1D_for_inplace(temp*divide_factor, splitNums, precision);
+ status = status && split1D_for_inplace(temp, splitNums, precision);
+ return status;
+
+}
// Returns CLFFT_SUCCESS if the fp64 is present, CLFFT_DEVICE_NO_DOUBLE if it is not found.
clfftStatus checkDevExt( std::string ext, const cl_device_id &device )
@@ -629,6 +766,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
}
}
// add some special cases
+ /*
if (fftPlan->length[0] == 10000)
clLengths[1] = 100;//100 x 100
if (fftPlan->length[0] == 100000)
@@ -639,7 +777,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
clLengths[1] = 10000;//10,000 x 10,000
if (fftPlan->length[0] == 1000000000)
clLengths[1] = 10000;//10,000 x 100,000
-
+
if (fftPlan->length[0] == 3099363912)
clLengths[1] = 78732;//39366 x 78732
if (fftPlan->length[0] == 39366)
@@ -648,6 +786,17 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
clLengths[1] = 162;//162*486
if (fftPlan->length[0] == 354294)
clLengths[1] = 243;
+ */
+ if (clfftGetRequestLibNoMemAlloc() &&
+ fftPlan->placeness == CLFFT_INPLACE &&
+ (fftPlan->inputLayout == fftPlan->outputLayout) )
+ {
+ //for inplace fft with inplace transpose, the split logic is different
+ vector<vector<size_t> > splitNums;
+ bool implemented = split1D_for_inplace(fftPlan->length[0], splitNums, fftPlan->precision);
+ if (implemented)
+ clLengths[1] = splitNums[0][0];
+ }
clLengths[0] = fftPlan->length[0]/clLengths[1];
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list