[clfft] 11/32: modified swap kernel to handle really huge matrix. added 32 more test cases. passed all added test cases.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Apr 26 08:34:08 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit c8e3eb43941c1486331570eeccf47bcf10ddf627
Author: Timmy <timmy.liu at amd.com>
Date: Sat Mar 19 09:23:36 2016 -0500
modified swap kernel to handle really huge matrix. added 32 more test cases. passed all added test cases.
---
src/library/action.transpose.cpp | 31 ++++++-
src/library/generator.transpose.cpp | 162 +++++++++++++++++++++++++-----------
src/library/plan.cpp | 139 ++++++++++++++++++++++---------
src/tests/accuracy_test_pow3.cpp | 86 ++++++++++++++++++-
src/tests/accuracy_test_pow5.cpp | 88 ++++++++++++++++++++
5 files changed, 413 insertions(+), 93 deletions(-)
diff --git a/src/library/action.transpose.cpp b/src/library/action.transpose.cpp
index 1f04030..2a897bc 100644
--- a/src/library/action.transpose.cpp
+++ b/src/library/action.transpose.cpp
@@ -476,9 +476,9 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
}
else
{
- if (dim_ratio == 2 || dim_ratio == 3 || dim_ratio == 5 || dim_ratio == 10)
+ //if (dim_ratio == 2 || dim_ratio == 3 || dim_ratio == 5 || dim_ratio == 10)
+ if (dim_ratio % 2 == 0 || dim_ratio % 3 == 0 || dim_ratio % 5 == 0 || dim_ratio % 10 == 0)
{
- //1:3 ratio
size_t local_work_size_swap = 256;
std::vector<std::vector<size_t> > permutationTable;
clfft_transpose_generator::permutation_calculation(dim_ratio, smaller_dim, permutationTable);
@@ -487,8 +487,31 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
global_item_size = (permutationTable.size() + 2) * local_work_size_swap * this->plan->batchsize;
else
global_item_size = (permutationTable.size() + 2) * local_work_size_swap * this->plan->batchsize;
- for (int i = 2; i < this->plan->length.size(); i++)
- global_item_size *= this->plan->length[i];
+ //for (int i = 2; i < this->plan->length.size(); i++)
+ // global_item_size *= this->plan->length[i];
+ size_t LDS_per_WG = smaller_dim;
+ while (LDS_per_WG > 1024)//avoiding using too much lds memory. the biggest LDS memory we will allocate would be 1024*sizeof(float2/double2)*2
+ {
+ if (LDS_per_WG % 2 == 0)
+ {
+ LDS_per_WG /= 2;
+ continue;
+ }
+ if (LDS_per_WG % 3 == 0)
+ {
+ LDS_per_WG /= 3;
+ continue;
+ }
+ if (LDS_per_WG % 5 == 0)
+ {
+ LDS_per_WG /= 5;
+ continue;
+ }
+ return CLFFT_NOTIMPLEMENTED;
+ }
+
+ size_t WG_per_line = smaller_dim / LDS_per_WG;
+ global_item_size *= WG_per_line;
globalWS.push_back(global_item_size);
localWS.push_back(local_work_size_swap);
}
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index 600b71f..d39f19d 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -921,10 +921,17 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
{
if (params.fft_placeness == CLFFT_OUTOFPLACE)
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+
+ size_t smaller_dim = (params.fft_N[0] < params.fft_N[1]) ? params.fft_N[0] : params.fft_N[1];
+ size_t bigger_dim = (params.fft_N[0] >= params.fft_N[1]) ? params.fft_N[0] : params.fft_N[1];
+ size_t dim_ratio = bigger_dim / smaller_dim;
+ /*
if ( (params.fft_N[0] != 2 * params.fft_N[1]) && (params.fft_N[1] != 2 * params.fft_N[0]) &&
(params.fft_N[0] != 3 * params.fft_N[1]) && (params.fft_N[1] != 3 * params.fft_N[0]) &&
(params.fft_N[0] != 5 * params.fft_N[1]) && (params.fft_N[1] != 5 * params.fft_N[0]) &&
(params.fft_N[0] != 10 * params.fft_N[1]) && (params.fft_N[1] != 10 * params.fft_N[0]) )
+ */
+ if(dim_ratio % 2 != 0 && dim_ratio % 3 != 0 && dim_ratio % 5 != 0 && dim_ratio % 10 != 0)
{
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
}
@@ -971,9 +978,28 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
break;
}
- size_t smaller_dim = (params.fft_N[0] < params.fft_N[1]) ? params.fft_N[0] : params.fft_N[1];
- size_t bigger_dim = (params.fft_N[0] >= params.fft_N[1]) ? params.fft_N[0] : params.fft_N[1];
- size_t dim_ratio = bigger_dim / smaller_dim;
+
+ size_t LDS_per_WG = smaller_dim;
+ while (LDS_per_WG > 1024)//avoiding using too much lds memory. the biggest LDS memory we will allocate would be 1024*sizeof(float2/double2)*2
+ {
+ if (LDS_per_WG % 2 == 0)
+ {
+ LDS_per_WG /= 2;
+ continue;
+ }
+ if (LDS_per_WG % 3 == 0)
+ {
+ LDS_per_WG /= 3;
+ continue;
+ }
+ if (LDS_per_WG % 5 == 0)
+ {
+ LDS_per_WG /= 5;
+ continue;
+ }
+ return CLFFT_NOTIMPLEMENTED;
+ }
+ size_t WG_per_line = smaller_dim / LDS_per_WG;
size_t input_elm_size_in_bytes;
switch (params.fft_precision)
@@ -1073,8 +1099,8 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
genTransposePrototypeLeadingDimensionBatched(params, local_work_size_swap, dtPlanar, dtComplex, funcNameTW, transKernel, dtInput, dtOutput);
- clKernWrite(transKernel, 3) << "//each wg handles one row of " << smaller_dim << " in memory" << std::endl;
- clKernWrite(transKernel, 3) << "const int num_wg_per_batch = " << permutationTable.size() + 2 << ";" << std::endl; // number of wg per batch = number of independent cycles
+ clKernWrite(transKernel, 3) << "//each wg handles 1/"<< WG_per_line <<" row of " << LDS_per_WG << " in memory" << std::endl;
+ clKernWrite(transKernel, 3) << "const int num_wg_per_batch = " << (permutationTable.size() + 2)*WG_per_line << ";" << std::endl; // number of wg per batch = number of independent cycles
clKernWrite(transKernel, 3) << "int group_id = get_group_id(0);" << std::endl;
clKernWrite(transKernel, 3) << "int idx = get_local_id(0);" << std::endl;
@@ -1098,10 +1124,13 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
default:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
}
- clKernWrite(transKernel, 3) << "group_id -= batch_offset*" << permutationTable.size() + 2 << ";" << std::endl;
+ clKernWrite(transKernel, 3) << "group_id -= batch_offset*" << (permutationTable.size() + 2)*WG_per_line << ";" << std::endl;
clKernWrite(transKernel, 3) << std::endl;
- clKernWrite(transKernel, 3) << "int prev = swap_table[group_id][0];" << std::endl;
+ if(WG_per_line == 1)
+ clKernWrite(transKernel, 3) << "int prev = swap_table[group_id][0];" << std::endl;
+ else
+ clKernWrite(transKernel, 3) << "int prev = swap_table[group_id/" << WG_per_line <<"][0];" << std::endl;
clKernWrite(transKernel, 3) << "int next = 0;" << std::endl;
clKernWrite(transKernel, 3) << std::endl;
@@ -1110,14 +1139,14 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
case CLFFT_REAL:
case CLFFT_COMPLEX_INTERLEAVED:
{
- clKernWrite(transKernel, 3) << "__local " << dtInput << " prevValue[" << smaller_dim << "];" << std::endl;//lds within each wg should be able to store a row block (smaller_dim) of element
- clKernWrite(transKernel, 3) << "__local " << dtInput << " nextValue[" << smaller_dim << "];" << std::endl;
+ clKernWrite(transKernel, 3) << "__local " << dtInput << " prevValue[" << LDS_per_WG << "];" << std::endl;//lds within each wg should be able to store a row block (smaller_dim) of element
+ clKernWrite(transKernel, 3) << "__local " << dtInput << " nextValue[" << LDS_per_WG << "];" << std::endl;
break;
}
case CLFFT_COMPLEX_PLANAR:
{
- clKernWrite(transKernel, 3) << "__local " << dtComplex << " prevValue[" << smaller_dim << "];" << std::endl;//lds within each wg should be able to store a row block (smaller_dim) of element
- clKernWrite(transKernel, 3) << "__local " << dtComplex << " nextValue[" << smaller_dim << "];" << std::endl;
+ clKernWrite(transKernel, 3) << "__local " << dtComplex << " prevValue[" << LDS_per_WG << "];" << std::endl;//lds within each wg should be able to store a row block (smaller_dim) of element
+ clKernWrite(transKernel, 3) << "__local " << dtComplex << " nextValue[" << LDS_per_WG << "];" << std::endl;
break;
}
case CLFFT_HERMITIAN_INTERLEAVED:
@@ -1130,12 +1159,31 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
clKernWrite(transKernel, 3) << std::endl;
if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
{
- clKernWrite(transKernel, 3) << "int group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
- << " + (prev%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl; //might look like: int group_offset = (prev/3)*729*3 + (prev%3)*729;
+ if (WG_per_line == 1)
+ {
+ //might look like: int group_offset = (prev/3)*729*3 + (prev%3)*729;
+ clKernWrite(transKernel, 3) << "int group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
+ << " + (prev%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl;
+ }
+ else
+ {
+ //if smaller_dim is 2187 > 1024 this should look like int group_offset = (prev/3)*2187*3 + (prev%3)*2187 + (group_id % 3)*729;
+ clKernWrite(transKernel, 3) << "int group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
+ << " + (prev%" << dim_ratio << ")*" << smaller_dim << " + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
+ }
}
else
{
- clKernWrite(transKernel, 3) << "int group_offset = (prev*" << smaller_dim << ");" << std::endl; //might look like: int group_offset = prev*729;
+ if (WG_per_line == 1)
+ {
+ //might look like: int group_offset = prev*729;
+ clKernWrite(transKernel, 3) << "int group_offset = (prev*" << smaller_dim << ");" << std::endl;
+ }
+ else
+ {
+ //if smaller_dim is 2187 > 1024 this should look like int group_offset = prev*2187 + (group_id % 3)*729;
+ clKernWrite(transKernel, 3) << "int group_offset = (prev*" << smaller_dim << ") + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
+ }
}
clKernWrite(transKernel, 3) << std::endl;
@@ -1145,14 +1193,14 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
case CLFFT_REAL:
case CLFFT_COMPLEX_INTERLEAVED:
{
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
clKernWrite(transKernel, 3) << "prevValue[idx+" << i << "] = inputA[group_offset+idx+" << i << "];" << std::endl;
else
{
// need to handle boundary
- clKernWrite(transKernel, 3) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 3) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
clKernWrite(transKernel, 6) << "prevValue[idx+" << i << "] = inputA[group_offset+idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 3) << "}" << std::endl;
}
@@ -1164,9 +1212,9 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
case CLFFT_COMPLEX_PLANAR:
{
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
{
clKernWrite(transKernel, 3) << "prevValue[idx+" << i << "].x = inputA_R[group_offset+idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 3) << "prevValue[idx+" << i << "].y = inputA_I[group_offset+idx+" << i << "];" << std::endl;
@@ -1174,7 +1222,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
else
{
// need to handle boundary
- clKernWrite(transKernel, 3) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 3) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
clKernWrite(transKernel, 3) << "prevValue[idx+" << i << "].x = inputA_R[group_offset+idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 3) << "prevValue[idx+" << i << "].y = inputA_I[group_offset+idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 3) << "}" << std::endl;
@@ -1196,9 +1244,17 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
//takes care the last row
clKernWrite(transKernel, 6) << "if (prev == " << smaller_dim * dim_ratio - 1 << ")" << std::endl;
clKernWrite(transKernel, 9) << "next = " << smaller_dim * dim_ratio - 1 << ";" << std::endl;
-
- clKernWrite(transKernel, 6) << "group_offset = (next/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
- << " + (next%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl; //might look like: group_offset = (next/3)*729*3 + (next%3)*729;
+ if (WG_per_line == 1)
+ {
+ clKernWrite(transKernel, 6) << "group_offset = (next/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
+ << " + (next%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl; //might look like: group_offset = (next/3)*729*3 + (next%3)*729;
+ }
+ else
+ {
+ //if smaller_dim is 2187 > 1024 this should look like int group_offset = (next/3)*2187*3 + (next%3)*2187 + (group_id % 3)*729;
+ clKernWrite(transKernel, 6) << "group_offset = (next/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
+ << " + (next%" << dim_ratio << ")*" << smaller_dim << " + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
+ }
}
else
{
@@ -1206,8 +1262,15 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
//takes care the last row
clKernWrite(transKernel, 6) << "if (prev == " << smaller_dim * dim_ratio - 1 << ")" << std::endl;
clKernWrite(transKernel, 9) << "next = " << smaller_dim * dim_ratio - 1 << ";" << std::endl;
-
- clKernWrite(transKernel, 6) << "group_offset = (next*" << smaller_dim << ");" << std::endl; //might look like: int group_offset = prev*729;
+ if (WG_per_line == 1)
+ {
+ clKernWrite(transKernel, 6) << "group_offset = (next*" << smaller_dim << ");" << std::endl; //might look like: int group_offset = prev*729;
+ }
+ else
+ {
+ //if smaller_dim is 2187 > 1024 this should look like int group_offset = next*2187 + (group_id % 3)*729;
+ clKernWrite(transKernel, 6) << "group_offset = (next*" << smaller_dim << ") + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
+ }
}
@@ -1217,14 +1280,14 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
case CLFFT_REAL:
case CLFFT_COMPLEX_INTERLEAVED:
{
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
clKernWrite(transKernel, 6) << "nextValue[idx+" << i << "] = inputA[group_offset+idx+" << i << "];" << std::endl;
else
{
// need to handle boundary
- clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
clKernWrite(transKernel, 9) << "nextValue[idx+" << i << "] = inputA[group_offset+idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 6) << "}" << std::endl;
}
@@ -1236,9 +1299,9 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
case CLFFT_COMPLEX_PLANAR:
{
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
{
clKernWrite(transKernel, 6) << "nextValue[idx+" << i << "].x = inputA_R[group_offset+idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 6) << "nextValue[idx+" << i << "].y = inputA_I[group_offset+idx+" << i << "];" << std::endl;
@@ -1246,7 +1309,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
else
{
// need to handle boundary
- clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
clKernWrite(transKernel, 6) << "nextValue[idx+" << i << "].x = inputA_R[group_offset+idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 6) << "nextValue[idx+" << i << "].y = inputA_I[group_offset+idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 6) << "}" << std::endl;
@@ -1258,7 +1321,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
}
- clKernWrite(transKernel, 3) << "barrier(CLK_LOCAL_MEM_FENCE);" << std::endl;
+ clKernWrite(transKernel, 6) << "barrier(CLK_LOCAL_MEM_FENCE);" << std::endl;
clKernWrite(transKernel, 3) << std::endl;
switch (params.fft_inputLayout)
@@ -1272,9 +1335,9 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
clKernWrite(transKernel, 6) << "size_t q;" << std::endl;
clKernWrite(transKernel, 6) << dtComplex << " twiddle_factor;" << std::endl;
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
{
if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
{
@@ -1306,7 +1369,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
else
{
// need to handle boundary
- clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
{
//input is wide; output is tall
@@ -1339,14 +1402,14 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
}
else
{
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "] = prevValue[idx+" << i << "];" << std::endl;
else
{
// need to handle boundary
- clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
clKernWrite(transKernel, 9) << "inputA[group_offset+idx+" << i << "] = prevValue[idx+" << i << "];" << std::endl;
clKernWrite(transKernel, 6) << "}" << std::endl;
}
@@ -1364,9 +1427,9 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
clKernWrite(transKernel, 6) << "size_t p;" << std::endl;
clKernWrite(transKernel, 6) << "size_t q;" << std::endl;
clKernWrite(transKernel, 6) << dtComplex << " twiddle_factor;" << std::endl;
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
{
if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
{
@@ -1397,7 +1460,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
else
{
// need to handle boundary
- clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
{
//input is wide; output is tall
@@ -1430,9 +1493,9 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
}
else
{
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
{
clKernWrite(transKernel, 6) << "inputA_R[group_offset+idx+" << i << "] = prevValue[idx+" << i << "].x;" << std::endl;
clKernWrite(transKernel, 6) << "inputA_I[group_offset+idx+" << i << "] = prevValue[idx+" << i << "].y;" << std::endl;
@@ -1440,7 +1503,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
else
{
// need to handle boundary
- clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
clKernWrite(transKernel, 6) << "inputA_R[group_offset+idx+" << i << "] = prevValue[idx+" << i << "].x;" << std::endl;
clKernWrite(transKernel, 6) << "inputA_I[group_offset+idx+" << i << "] = prevValue[idx+" << i << "].y;" << std::endl;
clKernWrite(transKernel, 6) << "}" << std::endl;
@@ -1461,14 +1524,14 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
case CLFFT_COMPLEX_INTERLEAVED:
case CLFFT_COMPLEX_PLANAR:
{
- for (int i = 0; i < smaller_dim; i = i + 256)
+ for (int i = 0; i < LDS_per_WG; i = i + 256)
{
- if (i + 256 < smaller_dim)
+ if (i + 256 < LDS_per_WG)
clKernWrite(transKernel, 6) << "prevValue[idx+" << i << "] = nextValue[idx+" << i << "];" << std::endl;
else
{
// need to handle boundary
- clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+ clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << LDS_per_WG << "){" << std::endl;
clKernWrite(transKernel, 9) << "prevValue[idx + " << i << "] = nextValue[idx + " << i << "]; " << std::endl;
clKernWrite(transKernel, 6) << "}" << std::endl;
}
@@ -1486,7 +1549,10 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
clKernWrite(transKernel, 3) << std::endl;
clKernWrite(transKernel, 3) << "prev = next;" << std::endl;
- clKernWrite(transKernel, 3) << "}while(next!=swap_table[group_id][0]);" << std::endl;//end of do-while
+ if (WG_per_line == 1)
+ clKernWrite(transKernel, 3) << "}while(next!=swap_table[group_id][0]);" << std::endl;//end of do-while
+ else
+ clKernWrite(transKernel, 3) << "}while(next!=swap_table[group_id/"<< WG_per_line <<"][0]);" << std::endl;//end of do-while
clKernWrite(transKernel, 0) << "}" << std::endl;//end of kernel
if (!twiddleSwapKernel)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 71185cf..6091d98 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -612,7 +612,15 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
// add some special cases
if (fftPlan->length[0] == 100000)
clLengths[1] = 100;
+
clLengths[0] = fftPlan->length[0]/clLengths[1];
+ //timmy delete ensure clLengths[0] > clLengths[1] only when inplace is enabled
+ if (clLengths[0] < clLengths[1] && clfftGetRequestLibNoMemAlloc() && fftPlan->placeness == CLFFT_INPLACE)
+ {
+ size_t temp = clLengths[0];
+ clLengths[0] = clLengths[1];
+ clLengths[1] = temp;
+ }
// Start of block where transposes are generated; 1D FFT
while (1 && (fftPlan->inputLayout != CLFFT_REAL) && (fftPlan->outputLayout != CLFFT_REAL))
@@ -637,12 +645,21 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
//non square in-place tranpose currently support 1:2 ratio
//TODO: expand the support to 1:3, 1:5 and 1:10 ratio
+ /*
if (clfftGetRequestLibNoMemAlloc() &&
((clLengths[0] == 2*clLengths[1]) ||
(clLengths[0] == 3*clLengths[1]) ||
(clLengths[0] == 5*clLengths[1]) ||
(clLengths[0] == 10 * clLengths[1])) &&
fftPlan->placeness == CLFFT_INPLACE)
+ */
+ size_t dim_ratio = clLengths[1] / clLengths[0];
+ if (clfftGetRequestLibNoMemAlloc() &&
+ ((dim_ratio % 2 == 0) ||
+ (dim_ratio % 3 == 0) ||
+ (dim_ratio % 5 == 0) ||
+ (dim_ratio % 10 == 0)) &&
+ fftPlan->placeness == CLFFT_INPLACE)
{
padding = 0;
fftPlan->allOpsInplace = true;
@@ -695,21 +712,35 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans1Plan->gen = transGen;
trans1Plan->transflag = true;
- for (size_t index = 1; index < fftPlan->length.size(); index++)
+ if (trans1Plan->gen == Transpose_NONSQUARE || 1)
{
- //trans1Plan->length.push_back(fftPlan->length[index]);
- /*
- replacing the line above with the two lines below since:
- fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
- the batchSize for the transpose should increase accordingly.
- the iDist should decrease accordingly. Push back to length will cause a 3D transpose
- */
- trans1Plan->batchsize = trans1Plan->batchsize * fftPlan->length[index];
- trans1Plan->iDist = trans1Plan->iDist / fftPlan->length[index];
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ //trans1Plan->length.push_back(fftPlan->length[index]);
+ /*
+ replacing the line above with the two lines below since:
+ fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
+ the batchSize for the transpose should increase accordingly.
+ the iDist should decrease accordingly. Push back to length will cause a 3D transpose
+ */
+ trans1Plan->batchsize = trans1Plan->batchsize * fftPlan->length[index];
+ trans1Plan->iDist = trans1Plan->iDist / fftPlan->length[index];
- trans1Plan->inStride.push_back(fftPlan->inStride[index]);
- trans1Plan->outStride.push_back(trans1Plan->oDist);
- trans1Plan->oDist *= fftPlan->length[index];
+ trans1Plan->inStride.push_back(fftPlan->inStride[index]);
+ trans1Plan->outStride.push_back(trans1Plan->oDist);
+ trans1Plan->oDist *= fftPlan->length[index];
+ }
+ }
+ else
+ {
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ trans1Plan->length.push_back(fftPlan->length[index]);
+
+ trans1Plan->inStride.push_back(fftPlan->inStride[index]);
+ trans1Plan->outStride.push_back(trans1Plan->oDist);
+ trans1Plan->oDist *= fftPlan->length[index];
+ }
}
//Set callback data if set on top level plan
@@ -797,20 +828,34 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans2Plan->transflag = true;
- for (size_t index = 1; index < fftPlan->length.size(); index++)
+ if (trans2Plan->gen == Transpose_NONSQUARE || 1)// TIMMY delete
{
- //trans2Plan->length.push_back(fftPlan->length[index]);
- /*
- replacing the line above with the two lines below since:
- fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
- the batchSize for the transpose should increase accordingly.
- the iDist should decrease accordingly. Push back to length will cause a 3D transpose
- */
- trans2Plan->batchsize = trans2Plan->batchsize * fftPlan->length[index];
- trans2Plan->iDist = trans2Plan->iDist / fftPlan->length[index];
- trans2Plan->inStride.push_back(fftPlan->outStride[index]);
- trans2Plan->outStride.push_back(trans2Plan->oDist);
- trans2Plan->oDist *= fftPlan->length[index];
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ //trans2Plan->length.push_back(fftPlan->length[index]);
+ /*
+ replacing the line above with the two lines below since:
+ fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
+ the batchSize for the transpose should increase accordingly.
+ the iDist should decrease accordingly. Push back to length will cause a 3D transpose
+ */
+ trans2Plan->batchsize = trans2Plan->batchsize * fftPlan->length[index];
+ trans2Plan->iDist = trans2Plan->iDist / fftPlan->length[index];
+ trans2Plan->inStride.push_back(fftPlan->outStride[index]);
+ trans2Plan->outStride.push_back(trans2Plan->oDist);
+ trans2Plan->oDist *= fftPlan->length[index];
+ }
+ }
+ else
+ {
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ trans2Plan->length.push_back(fftPlan->length[index]);
+
+ trans2Plan->inStride.push_back(fftPlan->outStride[index]);
+ trans2Plan->outStride.push_back(trans2Plan->oDist);
+ trans2Plan->oDist *= fftPlan->length[index];
+ }
}
OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
@@ -891,20 +936,36 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans3Plan->transflag = true;
trans3Plan->transOutHorizontal = true;
- for (size_t index = 1; index < fftPlan->length.size(); index++)
+
+ if (trans3Plan->gen == Transpose_NONSQUARE || 1)
{
- //trans3Plan->length.push_back(fftPlan->length[index]);
- /*
- replacing the line above with the two lines below since:
- fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
- the batchSize for the transpose should increase accordingly.
- the iDist should decrease accordingly. Push back to length will cause a 3D transpose
- */
- trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];
- trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];
- trans3Plan->inStride.push_back(trans3Plan->iDist);
- trans3Plan->iDist *= fftPlan->length[index];
- trans3Plan->outStride.push_back(fftPlan->outStride[index]);
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ //trans3Plan->length.push_back(fftPlan->length[index]);
+ /*
+ replacing the line above with the two lines below since:
+ fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
+ the batchSize for the transpose should increase accordingly.
+ the iDist should decrease accordingly. Push back to length will cause a 3D transpose
+ */
+ trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];
+ trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];
+ //trans3Plan->inStride.push_back(trans3Plan->iDist);//Timmy for square
+ trans3Plan->inStride.push_back(fftPlan->inStride[index]);
+ trans3Plan->iDist *= fftPlan->length[index];
+ trans3Plan->outStride.push_back(fftPlan->outStride[index]);
+ }
+ }
+ else
+ {
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ trans3Plan->length.push_back(fftPlan->length[index]);
+
+ trans3Plan->inStride.push_back(trans3Plan->iDist);
+ trans3Plan->iDist *= fftPlan->length[index];
+ trans3Plan->outStride.push_back(fftPlan->outStride[index]);
+ }
}
//Set callback data if set on top level plan
diff --git a/src/tests/accuracy_test_pow3.cpp b/src/tests/accuracy_test_pow3.cpp
index f1369db..132e780 100644
--- a/src/tests/accuracy_test_pow3.cpp
+++ b/src/tests/accuracy_test_pow3.cpp
@@ -2008,13 +2008,95 @@ TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_interleaved_
}
//14348907 = 2187 * 2187 * 3 backward and forward, planar and interleaved, single and double, batch size 1 and 3
-/*
+
TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_14348907_1)
{
try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_planar, direction::forward); }
catch (const std::exception& err) { handle_exception(err); }
}
-*/
+TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_14348907_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_planar, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_14348907_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_14348907_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_planar, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_14348907_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(14348907, 1, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_14348907_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(14348907, 1, layout::complex_planar, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_14348907_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(14348907, 1, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_14348907_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(14348907, 1, layout::complex_planar, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+//interleaved
+TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_14348907_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_interleaved, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_14348907_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_interleaved, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_14348907_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_interleaved, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_14348907_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_interleaved, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_14348907_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(14348907, 1, layout::complex_interleaved, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_14348907_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(14348907, 1, layout::complex_interleaved, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_14348907_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(14348907, 1, layout::complex_interleaved, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_14348907_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(14348907, 1, layout::complex_interleaved, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
// ^^^^^^^^^^^^^^^^^^^^^^^ normal 2D ^^^^^^^^^^^^^^^^^^^^^^ //
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
diff --git a/src/tests/accuracy_test_pow5.cpp b/src/tests/accuracy_test_pow5.cpp
index c4ab62e..9e7f4e6 100644
--- a/src/tests/accuracy_test_pow5.cpp
+++ b/src/tests/accuracy_test_pow5.cpp
@@ -2011,6 +2011,94 @@ TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_interleaved_
catch (const std::exception& err) { handle_exception(err); }
}
+//48828125 = 3125 * 3125 * 5 backward and forward, planar and interleaved, single and double, batch size 1 and 3
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_48828125_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(48828125, 1, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_48828125_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(48828125, 1, layout::complex_planar, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_48828125_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(48828125, 3, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_48828125_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(48828125, 3, layout::complex_planar, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_48828125_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(48828125, 1, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_48828125_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(48828125, 1, layout::complex_planar, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_48828125_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(48828125, 3, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_48828125_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(48828125, 3, layout::complex_planar, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+//interleaved
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_48828125_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(48828125, 1, layout::complex_interleaved, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_48828125_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(48828125, 1, layout::complex_interleaved, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_48828125_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(48828125, 3, layout::complex_interleaved, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_48828125_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(48828125, 3, layout::complex_interleaved, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_48828125_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(48828125, 1, layout::complex_interleaved, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_48828125_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(48828125, 1, layout::complex_interleaved, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_48828125_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(48828125, 3, layout::complex_interleaved, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_48828125_3)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(48828125, 3, layout::complex_interleaved, direction::backward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
// ^^^^^^^^^^^^^^^^^^^^^^^ normal 2D ^^^^^^^^^^^^^^^^^^^^^^ //
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list