[clfft] 11/23: optimizing EnqueueTransform API timing measurement
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Aug 18 16:08:21 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.
commit a67ea0aac3eb188eaa785c109f23b7e99b76bd5a
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Thu Aug 6 15:20:10 2015 -0500
optimizing EnqueueTransform API timing measurement
---
src/client/client.cpp | 47 +++++++++++++++++++++++++++++------------------
1 file changed, 29 insertions(+), 18 deletions(-)
diff --git a/src/client/client.cpp b/src/client/client.cpp
index 1680822..795cacb 100644
--- a/src/client/client.cpp
+++ b/src/client/client.cpp
@@ -514,36 +514,47 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
//
cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &output_cl_mem_buffers[ 0 ];
- Timer tr;
- tr.Start();
+ OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, NULL,
+ &input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
+ "clfftEnqueueTransform failed" );
+
+ OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
cl_event *outEvent = new cl_event[profile_count];
+ for( cl_uint i = 0; i < profile_count; ++i ) outEvent[i] = 0;
- for( cl_uint i = 0; i < profile_count; ++i )
+ if(profile_count > 1)
{
- outEvent[i] = 0;
- if( timer ) timer->Start( clFFTID );
+ Timer tr;
+ tr.Start();
+ for( cl_uint i = 0; i < profile_count; ++i )
+ {
+ if( timer ) timer->Start( clFFTID );
- OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent[i],
- &input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
- "clfftEnqueueTransform failed" );
+ OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent[i],
+ &input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
+ "clfftEnqueueTransform failed" );
- if( timer ) timer->Stop( clFFTID );
- }
- OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
- if(clMedBuffer) clReleaseMemObject(clMedBuffer);
+ if( timer ) timer->Stop( clFFTID );
+ }
+ OPENCL_V_THROW( clWaitForEvents ( profile_count, outEvent ), "clWaitForEvents failed" );
+
+ double wtime = tr.Sample()/((double)profile_count);
+
+ OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
+
+ size_t totalLen = 1;
+ for(int i=0; i<dim; i++) totalLen *= lengths[i];
+ double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);
- double wtime = tr.Sample()/((double)profile_count);
- size_t totalLen = 1;
- for(int i=0; i<dim; i++) totalLen *= lengths[i];
- double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);
- if(profile_count > 1)
- {
tout << "\nExecution wall time: " << 1000.0*wtime << " ms" << std::endl;
tout << "Execution gflops: " << ((double)batch_size * opsconst)/(1000000000.0*wtime) << std::endl;
+
}
+ if(clMedBuffer) clReleaseMemObject(clMedBuffer);
+
if( timer && (command_queue_flags & CL_QUEUE_PROFILING_ENABLE) )
{
// Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list