[clfft] 01/13: convert int to size_t or long

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Jun 5 01:29:36 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/master
in repository clfft.

commit 264a34f0176cf0faa13f43876086000174fa4ec6
Author: Timmy <timmy.liu at amd.com>
Date:   Tue Apr 26 21:58:55 2016 -0500

    convert int to size_t or long
---
 src/library/action.transpose.cpp    |   2 +-
 src/library/generator.stockham.h    |   4 +-
 src/library/generator.transpose.cpp | 180 ++++++++++++++++++------------------
 3 files changed, 93 insertions(+), 93 deletions(-)

diff --git a/src/library/action.transpose.cpp b/src/library/action.transpose.cpp
index aa7dc92..874cf42 100644
--- a/src/library/action.transpose.cpp
+++ b/src/library/action.transpose.cpp
@@ -282,7 +282,7 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::generateKernel(FFTRepo& fftRep
 		//general swap kernel takes care of all ratio
 		OPENCL_V(clfft_transpose_generator::genSwapKernelGeneral(this->signature, programCode, kernelFuncName, lwSize, reShapeFactor), _T("genSwapKernel() failed!"));
     }
-
+	//std::cout << programCode << std::endl;
     cl_int status = CL_SUCCESS;
     cl_device_id Device = NULL;
     status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL);
diff --git a/src/library/generator.stockham.h b/src/library/generator.stockham.h
index 798aaad..4f167c2 100644
--- a/src/library/generator.stockham.h
+++ b/src/library/generator.stockham.h
@@ -302,9 +302,9 @@ namespace StockhamGenerator
 			// Twiddle calc function
 			ss << "__attribute__((always_inline)) ";
 			ss << RegBaseType<PR>(2);
-			ss << "\n" << TwTableLargeFunc() << "(uint u)\n{\n";
+			ss << "\n" << TwTableLargeFunc() << "(size_t u)\n{\n";
 
-			ss << "\t" "uint j = u & " << unsigned(X-1) << ";\n";
+			ss << "\t" "size_t j = u & " << unsigned(X-1) << ";\n";
 			ss << "\t" ; ss << RegBaseType<PR>(2); ss << " result = ";
 			ss << TwTableLargeName();
 			ss << "[0][j];\n";
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index 5f38648..5c3df10 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -580,10 +580,10 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
 		size_t *cycle_stat = new size_t[cycle_map[0] * 2], stat_idx = 0;
 		clKernWrite(transKernel, 0) << std::endl;
 
-		clKernWrite(transKernel, 0) << "__constant int swap_table[][3] = {" << std::endl;
+		clKernWrite(transKernel, 0) << "__constant size_t swap_table[][3] = {" << std::endl;
 
 		size_t inx = 0, start_inx, swap_inx = 0, num_swaps = 0;
-		for (int i = 0; i < cycle_map[0]; i++)
+		for (size_t i = 0; i < cycle_map[0]; i++)
 		{
 			start_inx = cycle_map[++inx];
 			clKernWrite(transKernel, 0) << "{  " << start_inx << ",  " << cycle_map[inx + 1] << ",  0}," << std::endl;
@@ -592,7 +592,7 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
 
 			while (start_inx != cycle_map[++inx])
 			{
-				int action_var = (cycle_map[inx + 1] == start_inx) ? 2 : 1;
+				size_t action_var = (cycle_map[inx + 1] == start_inx) ? 2 : 1;
 				clKernWrite(transKernel, 0) << "{  " << cycle_map[inx] << ",  " << cycle_map[inx + 1] << ",  " << action_var << "}," << std::endl;
 				if (action_var == 2)
 					cycle_stat[stat_idx++] = num_swaps;
@@ -608,8 +608,8 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
 		/*cycle_map[0] + 2, + 2 is added for post callback table appending*/
 		size_t num_cycles_minus_1 = cycle_map[0] - 1;
 
-		clKernWrite(transKernel, 0) << "__constant int cycle_stat[" << cycle_map[0] << "][2] = {" << std::endl;
-		for (int i = 0; i < num_cycles_minus_1; i++)
+		clKernWrite(transKernel, 0) << "__constant size_t cycle_stat[" << cycle_map[0] << "][2] = {" << std::endl;
+		for (size_t i = 0; i < num_cycles_minus_1; i++)
 		{
 			clKernWrite(transKernel, 0) << "{  " << cycle_stat[i * 2] << ",  " << cycle_stat[i * 2 + 1] << "}," << std::endl;
 		}
@@ -622,16 +622,16 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
 		switch (params.fft_inputLayout)
 		{
 		case CLFFT_COMPLEX_INTERLEAVED:
-			clKernWrite(transKernel, 0) << "void swap(global " << dtComplex << "* inputA, " << tmpBuffType << " " << dtComplex << "* Ls, " << tmpBuffType << " " << dtComplex << " * Ld, int is, int id, int pos, int end_indx, int work_id";
+			clKernWrite(transKernel, 0) << "void swap(global " << dtComplex << "* inputA, " << tmpBuffType << " " << dtComplex << "* Ls, " << tmpBuffType << " " << dtComplex << " * Ld, size_t is, size_t id, size_t pos, size_t end_indx, size_t work_id";
 			break;
 		case CLFFT_COMPLEX_PLANAR:
-			clKernWrite(transKernel, 0) << "void swap(global " << dtPlanar << "* inputA_R, global " << dtPlanar << "* inputA_I, " << tmpBuffType << " " << dtComplex << "* Ls, " << tmpBuffType << " " << dtComplex << "* Ld, int is, int id, int pos, int end_indx, int work_id";
+			clKernWrite(transKernel, 0) << "void swap(global " << dtPlanar << "* inputA_R, global " << dtPlanar << "* inputA_I, " << tmpBuffType << " " << dtComplex << "* Ls, " << tmpBuffType << " " << dtComplex << "* Ld, size_t is, size_t id, size_t pos, size_t end_indx, size_t work_id";
 			break;
 		case CLFFT_HERMITIAN_INTERLEAVED:
 		case CLFFT_HERMITIAN_PLANAR:
 			return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
 		case CLFFT_REAL:
-			clKernWrite(transKernel, 0) << "void swap(global " << dtPlanar << "* inputA, " << tmpBuffType << " " << dtPlanar << "* Ls, " << tmpBuffType << " " << dtPlanar << "* Ld, int is, int id, int pos, int end_indx, int work_id";
+			clKernWrite(transKernel, 0) << "void swap(global " << dtPlanar << "* inputA, " << tmpBuffType << " " << dtPlanar << "* Ls, " << tmpBuffType << " " << dtPlanar << "* Ld, size_t is, size_t id, size_t pos, size_t end_indx, size_t work_id";
 			break;
 		default:
 			return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
@@ -657,7 +657,7 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
 
 		clKernWrite(transKernel, 0) << "){" << std::endl;
 
-		clKernWrite(transKernel, 3) << "for (int j = get_local_id(0); j < end_indx; j += " << local_work_size_swap << "){" << std::endl;
+		clKernWrite(transKernel, 3) << "for (size_t j = get_local_id(0); j < end_indx; j += " << local_work_size_swap << "){" << std::endl;
 
 		switch (params.fft_inputLayout)
 		{
@@ -805,7 +805,7 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
 		clKernWrite(transKernel, 3) << "size_t g_index = get_group_id(0);" << std::endl;
 
 		clKernWrite(transKernel, 3) << "const size_t numGroupsY_1 = " << cycle_map[0] * num_grps_pro_row << " ;" << std::endl;
-		for (int i = 2; i < params.fft_DataDim - 1; i++)
+		for (size_t i = 2; i < params.fft_DataDim - 1; i++)
 		{
 			clKernWrite(transKernel, 3) << "const size_t numGroupsY_" << i << " = numGroupsY_" << i - 1 << " * " << params.fft_N[i] << ";" << std::endl;
 		}
@@ -864,19 +864,19 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
 		clKernWrite(transKernel, 3) << "tmp_swap_ptr[0] = te;" << std::endl;
 		clKernWrite(transKernel, 3) << "tmp_swap_ptr[1] = to;" << std::endl;
 
-		clKernWrite(transKernel, 3) << "int swap_inx = 0;" << std::endl;
+		clKernWrite(transKernel, 3) << "size_t swap_inx = 0;" << std::endl;
 
-		clKernWrite(transKernel, 3) << "int start = cycle_stat[g_index / " << num_grps_pro_row << "][0];" << std::endl;
-		clKernWrite(transKernel, 3) << "int end = cycle_stat[g_index / " << num_grps_pro_row << "][1];" << std::endl;
+		clKernWrite(transKernel, 3) << "size_t start = cycle_stat[g_index / " << num_grps_pro_row << "][0];" << std::endl;
+		clKernWrite(transKernel, 3) << "size_t end = cycle_stat[g_index / " << num_grps_pro_row << "][1];" << std::endl;
 
-		clKernWrite(transKernel, 3) << "int end_indx = " << num_elements_loaded << ";" << std::endl;
-		clKernWrite(transKernel, 3) << "int work_id = g_index % " << num_grps_pro_row << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "size_t end_indx = " << num_elements_loaded << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "size_t work_id = g_index % " << num_grps_pro_row << ";" << std::endl;
 
 		clKernWrite(transKernel, 3) << "if( work_id == " << (num_grps_pro_row - 1) << " ){" << std::endl;
 		clKernWrite(transKernel, 6) << "end_indx = " << smaller_dim - num_elements_loaded * (num_grps_pro_row - 1) << ";" << std::endl;
 		clKernWrite(transKernel, 3) << "}" << std::endl;
 
-		clKernWrite(transKernel, 3) << "for (int loop = start; loop <= end; loop ++){" << std::endl;
+		clKernWrite(transKernel, 3) << "for (size_t loop = start; loop <= end; loop ++){" << std::endl;
 		clKernWrite(transKernel, 6) << "swap_inx = 1 - swap_inx;" << std::endl;
 
 		switch (params.fft_inputLayout)
@@ -1061,7 +1061,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
 	std::vector<std::vector<size_t> > permutationTable;
 	permutation_calculation(dim_ratio, smaller_dim, permutationTable);
 
-	clKernWrite(transKernel, 0) << "__constant int swap_table["<< permutationTable.size()+2 <<"][1] = {" << std::endl;
+	clKernWrite(transKernel, 0) << "__constant size_t swap_table["<< permutationTable.size()+2 <<"][1] = {" << std::endl;
 	clKernWrite(transKernel, 0) << "{0}," << std::endl;
 	clKernWrite(transKernel, 0) << "{"<< smaller_dim * dim_ratio - 1 <<"}," << std::endl;// add the first and last row to the swap table. needed for twiddling
 	for (std::vector<std::vector<size_t> >::iterator itor = permutationTable.begin(); itor != permutationTable.end(); itor++)
@@ -1119,12 +1119,12 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
         genTransposePrototypeLeadingDimensionBatched(params, local_work_size_swap, dtPlanar, dtComplex, funcNameTW, transKernel, dtInput, dtOutput);
 
         clKernWrite(transKernel, 3) << "//each wg handles 1/"<< WG_per_line <<" row of " << LDS_per_WG << " in memory" << std::endl;
-        clKernWrite(transKernel, 3) << "const int num_wg_per_batch = " << (permutationTable.size() + 2)*WG_per_line << ";" << std::endl; // number of wg per batch = number of independent cycles
-        clKernWrite(transKernel, 3) << "int group_id = get_group_id(0);" << std::endl;
-        clKernWrite(transKernel, 3) << "int idx = get_local_id(0);" << std::endl;
+        clKernWrite(transKernel, 3) << "const size_t num_wg_per_batch = " << (permutationTable.size() + 2)*WG_per_line << ";" << std::endl; // number of wg per batch = number of independent cycles
+        clKernWrite(transKernel, 3) << "size_t group_id = get_group_id(0);" << std::endl;
+        clKernWrite(transKernel, 3) << "size_t idx = get_local_id(0);" << std::endl;
 
         clKernWrite(transKernel, 3) << std::endl;
-        clKernWrite(transKernel, 3) << "int batch_offset = group_id / num_wg_per_batch;" << std::endl;
+        clKernWrite(transKernel, 3) << "size_t batch_offset = group_id / num_wg_per_batch;" << std::endl;
         switch (params.fft_inputLayout)
         {
         case CLFFT_REAL:
@@ -1147,10 +1147,10 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
 
         clKernWrite(transKernel, 3) << std::endl;
 		if(WG_per_line == 1)
-			clKernWrite(transKernel, 3) << "int prev = swap_table[group_id][0];" << std::endl;
+			clKernWrite(transKernel, 3) << "size_t prev = swap_table[group_id][0];" << std::endl;
 		else
-			clKernWrite(transKernel, 3) << "int prev = swap_table[group_id/" << WG_per_line <<"][0];" << std::endl;
-        clKernWrite(transKernel, 3) << "int next = 0;" << std::endl;
+			clKernWrite(transKernel, 3) << "size_t prev = swap_table[group_id/" << WG_per_line <<"][0];" << std::endl;
+        clKernWrite(transKernel, 3) << "size_t next = 0;" << std::endl;
 
         clKernWrite(transKernel, 3) << std::endl;
         switch (params.fft_inputLayout)
@@ -1180,23 +1180,23 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
         {
 			if (WG_per_line == 1)
 			{
-				//might look like: int group_offset = (prev/3)*729*3 + (prev%3)*729; 
-				clKernWrite(transKernel, 3) << "int group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
+				//might look like: size_t group_offset = (prev/3)*729*3 + (prev%3)*729; 
+				clKernWrite(transKernel, 3) << "size_t group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
 					<< " + (prev%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl;
 			}
 			else
 			{
-				//if smaller_dim is 2187 > 1024 this should look like int group_offset = (prev/3)*2187*3 + (prev%3)*2187 + (group_id % 3)*729;
-				clKernWrite(transKernel, 3) << "int group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
+				//if smaller_dim is 2187 > 1024 this should look like size_t group_offset = (prev/3)*2187*3 + (prev%3)*2187 + (group_id % 3)*729;
+				clKernWrite(transKernel, 3) << "size_t group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
 					<< " + (prev%" << dim_ratio << ")*" << smaller_dim << " + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
 			}
         }
         else
         {
-			if (WG_per_line == 1)//might look like: int group_offset = prev*729; 
-				clKernWrite(transKernel, 3) << "int group_offset = (prev*" << smaller_dim << ");" << std::endl; 
-			else//if smaller_dim is 2187 > 1024 this should look like int group_offset = prev*2187 + (group_id % 3)*729; 
-				clKernWrite(transKernel, 3) << "int group_offset = (prev*" << smaller_dim << ") + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
+			if (WG_per_line == 1)//might look like: size_t group_offset = prev*729; 
+				clKernWrite(transKernel, 3) << "size_t group_offset = (prev*" << smaller_dim << ");" << std::endl; 
+			else//if smaller_dim is 2187 > 1024 this should look like size_t group_offset = prev*2187 + (group_id % 3)*729; 
+				clKernWrite(transKernel, 3) << "size_t group_offset = (prev*" << smaller_dim << ") + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
         }
 
         clKernWrite(transKernel, 3) << std::endl;
@@ -1212,7 +1212,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
         case CLFFT_REAL:
         case CLFFT_COMPLEX_INTERLEAVED:
         {
-            for (int i = 0; i < LDS_per_WG; i = i + 256)
+            for (size_t i = 0; i < LDS_per_WG; i = i + 256)
             {
 				if (i + 256 < LDS_per_WG)
 				{
@@ -1307,7 +1307,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
             return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
         case CLFFT_COMPLEX_PLANAR:
         {
-            for (int i = 0; i < LDS_per_WG; i = i + 256)
+            for (size_t i = 0; i < LDS_per_WG; i = i + 256)
             {
                 if (i + 256 < LDS_per_WG)
                 {
@@ -1426,7 +1426,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
 			}
 			else
 			{
-				//if smaller_dim is 2187 > 1024 this should look like int group_offset = (next/3)*2187*3 + (next%3)*2187 + (group_id % 3)*729;
+				//if smaller_dim is 2187 > 1024 this should look like size_t group_offset = (next/3)*2187*3 + (next%3)*2187 + (group_id % 3)*729;
 				clKernWrite(transKernel, 6) << "group_offset = (next/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
 					<< " + (next%" << dim_ratio << ")*" << smaller_dim << " + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
 			}
@@ -1437,9 +1437,9 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
             //takes care the last row
             clKernWrite(transKernel, 6) << "if (prev == " << smaller_dim * dim_ratio - 1 << ")" << std::endl;
             clKernWrite(transKernel, 9) << "next = " << smaller_dim * dim_ratio - 1 << ";" << std::endl;
-			if (WG_per_line == 1) //might look like: int group_offset = prev*729; 
+			if (WG_per_line == 1) //might look like: size_t group_offset = prev*729; 
 				clKernWrite(transKernel, 6) << "group_offset = (next*" << smaller_dim << ");" << std::endl;
-			else//if smaller_dim is 2187 > 1024 this should look like int group_offset = next*2187 + (group_id % 3)*729;
+			else//if smaller_dim is 2187 > 1024 this should look like size_t group_offset = next*2187 + (group_id % 3)*729;
 				clKernWrite(transKernel, 6) << "group_offset = (next*" << smaller_dim << ") + (group_id % " << WG_per_line << ")*" << LDS_per_WG << ";" << std::endl;
         }
 
@@ -1450,7 +1450,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
         case CLFFT_REAL:
         case CLFFT_COMPLEX_INTERLEAVED:
         {
-            for (int i = 0; i < LDS_per_WG; i = i + 256)
+            for (size_t i = 0; i < LDS_per_WG; i = i + 256)
             {
                 if (i + 256 < LDS_per_WG)
 					if (params.fft_hasPreCallback)
@@ -1543,7 +1543,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
             return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
         case CLFFT_COMPLEX_PLANAR:
         {
-            for (int i = 0; i < LDS_per_WG; i = i + 256)
+            for (size_t i = 0; i < LDS_per_WG; i = i + 256)
             {
                 if (i + 256 < LDS_per_WG)
                 {
@@ -1659,7 +1659,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
                 clKernWrite(transKernel, 6) << "size_t q;" << std::endl;
                 clKernWrite(transKernel, 6) << dtComplex << " twiddle_factor;" << std::endl;
 
-                for (int i = 0; i < LDS_per_WG; i = i + 256)
+                for (size_t i = 0; i < LDS_per_WG; i = i + 256)
                 {
                     if (i + 256 < LDS_per_WG)
                     {
@@ -1726,7 +1726,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
             }
             else if(!twiddleSwapKernelOut)//could be twiddleSwapKernelIn
             {
-                for (int i = 0; i < LDS_per_WG; i = i + 256)
+                for (size_t i = 0; i < LDS_per_WG; i = i + 256)
                 {
 					//twiddling and callback do not coexist
 					if (params.fft_hasPostCallback)
@@ -1773,7 +1773,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
                 clKernWrite(transKernel, 6) << "size_t p;" << std::endl;
                 clKernWrite(transKernel, 6) << "size_t q;" << std::endl;
                 clKernWrite(transKernel, 6) << dtComplex << " twiddle_factor;" << std::endl;
-                for (int i = 0; i < LDS_per_WG; i = i + 256)
+                for (size_t i = 0; i < LDS_per_WG; i = i + 256)
                 {
                     if (i + 256 < LDS_per_WG)
                     {
@@ -1839,7 +1839,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
             }
 			else if (!twiddleSwapKernelOut)//could be twiddleSwapKernelIn
             {
-                for (int i = 0; i < LDS_per_WG; i = i + 256)
+                for (size_t i = 0; i < LDS_per_WG; i = i + 256)
                 {
 					//twiddling and callback do not coexist
 					if (params.fft_hasPostCallback)
@@ -1894,7 +1894,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
         case CLFFT_COMPLEX_INTERLEAVED:
         case CLFFT_COMPLEX_PLANAR:
         {
-            for (int i = 0; i < LDS_per_WG; i = i + 256)
+            for (size_t i = 0; i < LDS_per_WG; i = i + 256)
             {
                 if (i + 256 < LDS_per_WG)
                     clKernWrite(transKernel, 6) << "prevValue[idx+" << i << "] = nextValue[idx+" << i << "];" << std::endl;
@@ -2051,9 +2051,9 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 			wgPerBatch = (params.fft_N[0] / 16 / reShapeFactor)*(params.fft_N[0] / 16 / reShapeFactor + 1) / 2;
 		else
 			wgPerBatch = (params.fft_N[0] / (16 * reShapeFactor) + 1)*(params.fft_N[0] / (16 * reShapeFactor) + 1 + 1) / 2;
-		clKernWrite(transKernel, 3) << "const int numGroupsY_1 = " << wgPerBatch << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t numGroupsY_1 = " << wgPerBatch << ";" << std::endl;
 
-		for (int i = 2; i < params.fft_DataDim - 1; i++)
+		for (size_t i = 2; i < params.fft_DataDim - 1; i++)
 		{
 			clKernWrite(transKernel, 3) << "const size_t numGroupsY_" << i << " = numGroupsY_" << i - 1 << " * " << params.fft_N[i] << ";" << std::endl;
 		}
@@ -2161,37 +2161,37 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 			clKernWrite(transKernel, 3) << "float row = (" << -2.0f*(params.fft_N[0] / (16 * reShapeFactor) + 1) - 1 << "+sqrt((" << 4.0f*(params.fft_N[0] / (16 * reShapeFactor) + 1)*(params.fft_N[0] / (16 * reShapeFactor) + 1 + 1) << "-8.0f*g_index- 7)))/ (-2.0f);" << std::endl;
 
 
-		clKernWrite(transKernel, 3) << "if (row == (float)(int)row) row -= 1; " << std::endl;
-		clKernWrite(transKernel, 3) << "const int t_gy = (int)row;" << std::endl;
+		clKernWrite(transKernel, 3) << "if (row == (float)(size_t)row) row -= 1; " << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t t_gy = (size_t)row;" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
 		if (mult_of_16)
-			clKernWrite(transKernel, 3) << "const int t_gx_p = g_index - " << (params.fft_N[0] / 16 / reShapeFactor) << "*t_gy + t_gy*(t_gy + 1) / 2;" << std::endl;
+			clKernWrite(transKernel, 3) << "const long t_gx_p = g_index - " << (params.fft_N[0] / 16 / reShapeFactor) << "*t_gy + t_gy*(t_gy + 1) / 2;" << std::endl;
 		else
-			clKernWrite(transKernel, 3) << "const int t_gx_p = g_index - " << (params.fft_N[0] / (16 * reShapeFactor) + 1) << "*t_gy + t_gy*(t_gy + 1) / 2;" << std::endl;
+			clKernWrite(transKernel, 3) << "const long t_gx_p = g_index - " << (params.fft_N[0] / (16 * reShapeFactor) + 1) << "*t_gy + t_gy*(t_gy + 1) / 2;" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int t_gy_p = t_gx_p - t_gy;" << std::endl;
+		clKernWrite(transKernel, 3) << "const long t_gy_p = t_gx_p - t_gy;" << std::endl;
 
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int d_lidx = get_local_id(0) % 16;" << std::endl;
-		clKernWrite(transKernel, 3) << "const int d_lidy = get_local_id(0) / 16;" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t d_lidx = get_local_id(0) % 16;" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t d_lidy = get_local_id(0) / 16;" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int lidy = (d_lidy * 16 + d_lidx) /" << (16 * reShapeFactor) << ";" << std::endl;
-		clKernWrite(transKernel, 3) << "const int lidx = (d_lidy * 16 + d_lidx) %" << (16 * reShapeFactor) << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t lidy = (d_lidy * 16 + d_lidx) /" << (16 * reShapeFactor) << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t lidx = (d_lidy * 16 + d_lidx) %" << (16 * reShapeFactor) << ";" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int idx = lidx + t_gx_p*" << 16 * reShapeFactor << ";" << std::endl;
-		clKernWrite(transKernel, 3) << "const int idy = lidy + t_gy_p*" << 16 * reShapeFactor << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t idx = lidx + t_gx_p*" << 16 * reShapeFactor << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t idy = lidy + t_gy_p*" << 16 * reShapeFactor << ";" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int starting_index_yx = t_gy_p*" << 16 * reShapeFactor << " + t_gx_p*" << 16 * reShapeFactor*params.fft_N[0] << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t starting_index_yx = t_gy_p*" << 16 * reShapeFactor << " + t_gx_p*" << 16 * reShapeFactor*params.fft_N[0] << ";" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
@@ -2209,8 +2209,8 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 		// back into the arrays
 
 		if (mult_of_16) {
-			clKernWrite(transKernel, 3) << "int index;" << std::endl;
-			clKernWrite(transKernel, 3) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 3) << "size_t index;" << std::endl;
+			clKernWrite(transKernel, 3) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 6) << "index = lidy*" << 16 * reShapeFactor << " + lidx + loop*256;" << std::endl;
 
 			// Handle planar and interleaved right here
@@ -2290,7 +2290,7 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 
 
 			// Step2: Write from shared to global
-			clKernWrite(transKernel, 3) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 3) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 6) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop;" << std::endl;
 
 
@@ -2305,7 +2305,7 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 					else
 					{
 						//assume tranpose is only two dimensional for now
-						//int actualBatchSize = params.transposeBatchSize / params.transposeMiniBatchSize;
+						//size_t actualBatchSize = params.transposeBatchSize / params.transposeMiniBatchSize;
 						size_t blockOffset = params.fft_inStride[2];
 						clKernWrite(transKernel, 6) << params.fft_postCallback.funcname << "(outputA-" << blockOffset <<"*((get_group_id(0)/numGroupsY_1)%"<< params.transposeMiniBatchSize <<"), ((idy + loop*" << 16 / reShapeFactor << ")*"
 							<< params.fft_N[0] << " + idx + "<< blockOffset <<"*( (get_group_id(0)/numGroupsY_1 )%" << params.transposeMiniBatchSize <<") " << "), post_userdata, yx_s[index]";
@@ -2398,9 +2398,9 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 		}
 		else {//mult_of_16
 
-			clKernWrite(transKernel, 3) << "int index;" << std::endl;
+			clKernWrite(transKernel, 3) << "size_t index;" << std::endl;
 			clKernWrite(transKernel, 3) << "if (" << params.fft_N[0] << " - (t_gx_p + 1) *" << 16 * reShapeFactor << ">0){" << std::endl;
-			clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 6) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 9) << "index = lidy*" << 16 * reShapeFactor << " + lidx + loop*256;" << std::endl;
 
 			// Handle planar and interleaved right here
@@ -2471,7 +2471,7 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 			clKernWrite(transKernel, 3) << "}" << std::endl;
 
 			clKernWrite(transKernel, 3) << "else{" << std::endl;
-			clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 6) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 9) << "index = lidy*" << 16 * reShapeFactor << " + lidx + loop*256;" << std::endl;
 
 
@@ -2557,7 +2557,7 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 			// Step2: Write from shared to global
 
 			clKernWrite(transKernel, 3) << "if (" << params.fft_N[0] << " - (t_gx_p + 1) *" << 16 * reShapeFactor << ">0){" << std::endl;
-			clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 6) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 9) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop ;" << std::endl;
 
 			// Handle planar and interleaved right here
@@ -2636,7 +2636,7 @@ clfftStatus genTransposeKernelBatched(const FFTGeneratedTransposeSquareAction::S
 			clKernWrite(transKernel, 3) << "}" << std::endl;
 
 			clKernWrite(transKernel, 3) << "else{" << std::endl;
-			clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 6) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 
 			clKernWrite(transKernel, 9) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop;" << std::endl;
 
@@ -2863,13 +2863,13 @@ clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTranspos
 		genTransposePrototypeLeadingDimensionBatched(params, lwSize, dtPlanar, dtComplex, funcName, transKernel, dtInput, dtOutput);
 
 		if (mult_of_16)//number of WG per sub square block
-			clKernWrite(transKernel, 3) << "const int numGroups_square_matrix_Y_1 = " << (smaller_dim / 16 / reShapeFactor)*(smaller_dim / 16 / reShapeFactor + 1) / 2 << ";" << std::endl;
+			clKernWrite(transKernel, 3) << "const size_t  numGroups_square_matrix_Y_1 = " << (smaller_dim / 16 / reShapeFactor)*(smaller_dim / 16 / reShapeFactor + 1) / 2 << ";" << std::endl;
 		else
-			clKernWrite(transKernel, 3) << "const int numGroups_square_matrix_Y_1 = " << (smaller_dim / (16 * reShapeFactor) + 1)*(smaller_dim / (16 * reShapeFactor) + 1 + 1) / 2 << ";" << std::endl;
+			clKernWrite(transKernel, 3) << "const size_t  numGroups_square_matrix_Y_1 = " << (smaller_dim / (16 * reShapeFactor) + 1)*(smaller_dim / (16 * reShapeFactor) + 1 + 1) / 2 << ";" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int numGroupsY_1 =  numGroups_square_matrix_Y_1 * "<< dim_ratio <<";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t  numGroupsY_1 =  numGroups_square_matrix_Y_1 * "<< dim_ratio <<";" << std::endl;
 
-		for (int i = 2; i < params.fft_DataDim - 1; i++)
+		for (size_t  i = 2; i < params.fft_DataDim - 1; i++)
 		{
 			clKernWrite(transKernel, 3) << "const size_t numGroupsY_" << i << " = numGroupsY_" << i - 1 << " * " << params.fft_N[i] << ";" << std::endl;
 		}
@@ -2966,36 +2966,36 @@ clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTranspos
 
 
 		clKernWrite(transKernel, 3) << "if (row == (float)(int)row) row -= 1; " << std::endl;
-		clKernWrite(transKernel, 3) << "const int t_gy = (int)row;" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t t_gy = (int)row;" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
 		if (mult_of_16)
-			clKernWrite(transKernel, 3) << "const int t_gx_p = g_index - " << (smaller_dim / 16 / reShapeFactor) << "*t_gy + t_gy*(t_gy + 1) / 2;" << std::endl;
+			clKernWrite(transKernel, 3) << "const long t_gx_p = g_index - " << (smaller_dim / 16 / reShapeFactor) << "*t_gy + t_gy*(t_gy + 1) / 2;" << std::endl;
 		else
-			clKernWrite(transKernel, 3) << "const int t_gx_p = g_index - " << (smaller_dim / (16 * reShapeFactor) + 1) << "*t_gy + t_gy*(t_gy + 1) / 2;" << std::endl;
+			clKernWrite(transKernel, 3) << "const long t_gx_p = g_index - " << (smaller_dim / (16 * reShapeFactor) + 1) << "*t_gy + t_gy*(t_gy + 1) / 2;" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int t_gy_p = t_gx_p - t_gy;" << std::endl;
+		clKernWrite(transKernel, 3) << "const long t_gy_p = t_gx_p - t_gy;" << std::endl;
 
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int d_lidx = get_local_id(0) % 16;" << std::endl;
-		clKernWrite(transKernel, 3) << "const int d_lidy = get_local_id(0) / 16;" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t d_lidx = get_local_id(0) % 16;" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t d_lidy = get_local_id(0) / 16;" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int lidy = (d_lidy * 16 + d_lidx) /" << (16 * reShapeFactor) << ";" << std::endl;
-		clKernWrite(transKernel, 3) << "const int lidx = (d_lidy * 16 + d_lidx) %" << (16 * reShapeFactor) << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t lidy = (d_lidy * 16 + d_lidx) /" << (16 * reShapeFactor) << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t lidx = (d_lidy * 16 + d_lidx) %" << (16 * reShapeFactor) << ";" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int idx = lidx + t_gx_p*" << 16 * reShapeFactor << ";" << std::endl;
-		clKernWrite(transKernel, 3) << "const int idy = lidy + t_gy_p*" << 16 * reShapeFactor << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t idx = lidx + t_gx_p*" << 16 * reShapeFactor << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t idy = lidy + t_gy_p*" << 16 * reShapeFactor << ";" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
-		clKernWrite(transKernel, 3) << "const int starting_index_yx = t_gy_p*" << 16 * reShapeFactor << " + t_gx_p*" << 16 * reShapeFactor*params.fft_N[0] << ";" << std::endl;
+		clKernWrite(transKernel, 3) << "const size_t starting_index_yx = t_gy_p*" << 16 * reShapeFactor << " + t_gx_p*" << 16 * reShapeFactor*params.fft_N[0] << ";" << std::endl;
 
 		clKernWrite(transKernel, 3) << "" << std::endl;
 
@@ -3026,8 +3026,8 @@ clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTranspos
 		// back into the arrays
 
 		if (mult_of_16) {
-			clKernWrite(transKernel, 3) << "int index;" << std::endl;
-			clKernWrite(transKernel, 3) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 3) << "size_t index;" << std::endl;
+			clKernWrite(transKernel, 3) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 6) << "index = lidy*" << 16 * reShapeFactor << " + lidx + loop*256;" << std::endl;
 
 			// Handle planar and interleaved right here
@@ -3105,7 +3105,7 @@ clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTranspos
 
 
 			// Step2: Write from shared to global
-			clKernWrite(transKernel, 3) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 3) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 6) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop;" << std::endl;
 
 
@@ -3178,9 +3178,9 @@ clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTranspos
 		}
 		else {
 
-			clKernWrite(transKernel, 3) << "int index;" << std::endl;
+			clKernWrite(transKernel, 3) << "size_t index;" << std::endl;
 			clKernWrite(transKernel, 3) << "if (" << smaller_dim << " - (t_gx_p + 1) *" << 16 * reShapeFactor << ">0){" << std::endl;
-			clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 6) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 9) << "index = lidy*" << 16 * reShapeFactor << " + lidx + loop*256;" << std::endl;
 
 			// Handle planar and interleaved right here
@@ -3249,7 +3249,7 @@ clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTranspos
 			clKernWrite(transKernel, 3) << "}" << std::endl;
 
 			clKernWrite(transKernel, 3) << "else{" << std::endl;
-			clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 6) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 9) << "index = lidy*" << 16 * reShapeFactor << " + lidx + loop*256;" << std::endl;
 
 
@@ -3334,7 +3334,7 @@ clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTranspos
 			// Step2: Write from shared to global
 
 			clKernWrite(transKernel, 3) << "if (" << smaller_dim << " - (t_gx_p + 1) *" << 16 * reShapeFactor << ">0){" << std::endl;
-			clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 6) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 			clKernWrite(transKernel, 9) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop ;" << std::endl;
 
 			// Handle planar and interleaved right here
@@ -3403,7 +3403,7 @@ clfftStatus genTransposeKernelLeadingDimensionBatched(const FFTGeneratedTranspos
 			clKernWrite(transKernel, 3) << "}" << std::endl;
 
 			clKernWrite(transKernel, 3) << "else{" << std::endl;
-			clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+			clKernWrite(transKernel, 6) << "for (size_t loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
 
 			clKernWrite(transKernel, 9) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop;" << std::endl;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list