[clfft] 11/23: optimizing EnqueueTransform API timing measurement

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Aug 18 16:08:21 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.

commit a67ea0aac3eb188eaa785c109f23b7e99b76bd5a
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Thu Aug 6 15:20:10 2015 -0500

    optimizing EnqueueTransform API timing measurement
---
 src/client/client.cpp | 47 +++++++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/client/client.cpp b/src/client/client.cpp
index 1680822..795cacb 100644
--- a/src/client/client.cpp
+++ b/src/client/client.cpp
@@ -514,36 +514,47 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
 	//
 	cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &output_cl_mem_buffers[ 0 ];
 
-	Timer tr;
-	tr.Start();
+	OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, NULL,
+		&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
+		"clfftEnqueueTransform failed" );
+
+	OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
 
 	cl_event *outEvent = new cl_event[profile_count];
+	for( cl_uint i = 0; i < profile_count; ++i ) outEvent[i] = 0;
 
-	for( cl_uint i = 0; i < profile_count; ++i )
+	if(profile_count > 1)
 	{
-		outEvent[i] = 0;
-		if( timer ) timer->Start( clFFTID );
+		Timer tr;		
+		tr.Start();
+		for( cl_uint i = 0; i < profile_count; ++i )
+		{
+			if( timer ) timer->Start( clFFTID );
 
-		OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent[i],
-			&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
-			"clfftEnqueueTransform failed" );
+			OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent[i],
+				&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
+				"clfftEnqueueTransform failed" );
 
-		if( timer ) timer->Stop( clFFTID );
-	}
-	OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
-	if(clMedBuffer) clReleaseMemObject(clMedBuffer);
+			if( timer ) timer->Stop( clFFTID );
+		}
+		OPENCL_V_THROW( clWaitForEvents ( profile_count, outEvent ), "clWaitForEvents  failed" );
+
+		double wtime = tr.Sample()/((double)profile_count);
+
+		OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
+
+		size_t totalLen = 1;
+		for(int i=0; i<dim; i++) totalLen *= lengths[i];
+		double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);
 
-	double wtime = tr.Sample()/((double)profile_count);
-	size_t totalLen = 1;
-	for(int i=0; i<dim; i++) totalLen *= lengths[i];
-	double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);
 
-	if(profile_count > 1)
-	{
 		tout << "\nExecution wall time: " << 1000.0*wtime << " ms" << std::endl;
 		tout << "Execution gflops: " << ((double)batch_size * opsconst)/(1000000000.0*wtime) << std::endl;
+
 	}
 
+	if(clMedBuffer) clReleaseMemObject(clMedBuffer);
+
 	if( timer && (command_queue_flags & CL_QUEUE_PROFILING_ENABLE) )
 	{
 		//	Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list