[clblas] 59/125: enable rect read/write for gemm

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Fri May 29 06:57:22 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clblas.

commit 15c03be40361c56bc5109ced72864607077efc00
Author: Timmy <timmy.liu at amd.com>
Date:   Fri Mar 14 16:04:27 2014 -0500

    enable rect read/write for gemm
---
 src/client/clfunc_common.hpp |  1 +
 src/client/clfunc_xgemm.hpp  | 97 ++++++++++++++++++++++++++++++++++++++++----
 src/client/client.cpp        |  6 ++-
 3 files changed, 96 insertions(+), 8 deletions(-)

diff --git a/src/client/clfunc_common.hpp b/src/client/clfunc_common.hpp
index 5f73613..bda1186 100644
--- a/src/client/clfunc_common.hpp
+++ b/src/client/clfunc_common.hpp
@@ -313,6 +313,7 @@ public:
     virtual void reset_gpu_write_buffer() = 0;
 	virtual void read_gpu_buffer() = 0;
 	virtual void roundtrip_func() = 0;
+	virtual void roundtrip_func_rect() {}
 	virtual void allochostptr_roundtrip_func() {}
 	virtual void usehostptr_roundtrip_func() {}
 	virtual void copyhostptr_roundtrip_func() {}
diff --git a/src/client/clfunc_xgemm.hpp b/src/client/clfunc_xgemm.hpp
index 9e6836d..f5552b2 100644
--- a/src/client/clfunc_xgemm.hpp
+++ b/src/client/clfunc_xgemm.hpp
@@ -454,6 +454,89 @@ public:
 		clWaitForEvents(1, &event_);
 	timer.Stop(timer_id);
 	}
+	void roundtrip_func_rect()
+	{
+	timer.Start(timer_id);
+		cl_int err;
+		//rect
+		size_t a_buffer_origin[3] = {0,0,0}; 
+		size_t a_host_origin[3] = {0,0,0};
+		size_t a_region[3] = {buffer_.m_*sizeof(T),buffer_.k_,1};
+		size_t a_buffer_row_pitch=0*sizeof(T);//lda
+		size_t a_buffer_slice_pitch=0;
+		size_t a_host_row_pitch=buffer_.lda_*sizeof(T);
+		size_t a_host_slice_pitch=0;
+
+		size_t b_buffer_origin[3] = {0,0,0}; 
+		size_t b_host_origin[3] = {0,0,0};
+		size_t b_region[3] = {buffer_.k_*sizeof(T),buffer_.n_,1};
+		size_t b_buffer_row_pitch=0*sizeof(T);//ldb
+		size_t b_buffer_slice_pitch=0;
+		size_t b_host_row_pitch=buffer_.ldb_*sizeof(T);
+		size_t b_host_slice_pitch=0;
+
+		size_t c_buffer_origin[3] = {0,0,0}; 
+		size_t c_host_origin[3] = {0,0,0};
+		size_t c_region[3] = {buffer_.m_*sizeof(T),buffer_.n_,1};
+		size_t c_buffer_row_pitch=0*sizeof(T);//ldc
+		size_t c_buffer_slice_pitch=0;
+		size_t c_host_row_pitch=buffer_.ldc_*sizeof(T);
+		size_t c_host_slice_pitch=0;
+
+        buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
+                                       (buffer_.k_*buffer_.m_ +
+                                           buffer_.offA_) * sizeof(T),
+                                       NULL, &err);
+
+        buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
+                                        (buffer_.k_ * buffer_.n_ +
+                                            buffer_.offB_) * sizeof(T),
+                                        NULL, &err);
+
+        buffer_.buf_c_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE,
+                                        (buffer_.m_ * buffer_.n_ +
+                                            buffer_.offC_) * sizeof(T),
+                                        NULL, &err);
+        /*
+		err = clEnqueueWriteBuffer(queue_, buffer_.buf_a_, CL_TRUE,
+                                   buffer_.offA_ * sizeof(T),
+                                   buffer_.lda_ * buffer_.a_num_vectors_ *
+                                       sizeof(T),
+                                   buffer_.a_, 0, NULL, NULL);
+		
+        err = clEnqueueWriteBuffer(queue_, buffer_.buf_b_, CL_TRUE,
+                                   buffer_.offB_ * sizeof(T),
+                                   buffer_.ldb_ * buffer_.b_num_vectors_ *
+                                       sizeof(T),
+                                   buffer_.b_, 0, NULL, NULL);
+
+        err = clEnqueueWriteBuffer(queue_, buffer_.buf_c_, CL_TRUE,
+                                   buffer_.offC_ * sizeof(T),
+                                   buffer_.ldc_ * buffer_.c_num_vectors_ *
+                                   sizeof(T),
+                                   buffer_.c_, 0, NULL, NULL);*/
+        err = clEnqueueWriteBufferRect(queue_, buffer_.buf_a_, CL_TRUE, a_buffer_origin, a_host_origin, a_region, a_buffer_row_pitch,
+										a_buffer_slice_pitch, a_host_row_pitch, a_host_slice_pitch, buffer_.a_, 0, NULL, NULL);
+        err = clEnqueueWriteBufferRect(queue_, buffer_.buf_b_, CL_TRUE, b_buffer_origin, b_host_origin, b_region, b_buffer_row_pitch,
+										b_buffer_slice_pitch, b_host_row_pitch, b_host_slice_pitch, buffer_.b_, 0, NULL, NULL);
+        err = clEnqueueWriteBufferRect(queue_, buffer_.buf_c_, CL_TRUE, c_buffer_origin, c_host_origin, c_region, c_buffer_row_pitch,
+										c_buffer_slice_pitch, c_host_row_pitch, c_host_slice_pitch, buffer_.c_, 0, NULL, NULL);
+
+		buffer_.lda_ = 0;
+        buffer_.ldb_ = 0;
+        buffer_.ldc_ = 0;
+		xGemm_Function(false);
+		/*
+		err = clEnqueueReadBuffer(queue_, buffer_.buf_c_, CL_TRUE,
+			                      buffer_.offC_ * sizeof(T), buffer_.ldc_ * buffer_.c_num_vectors_ *
+                                       sizeof(T),
+								  buffer_.c_, 0, NULL, &event_);
+		*/
+		err = ::clEnqueueReadBufferRect(queue_, buffer_.buf_c_, CL_TRUE, c_buffer_origin, c_host_origin, c_region, c_buffer_row_pitch,
+										c_buffer_slice_pitch, c_host_row_pitch, c_host_slice_pitch, buffer_.c_, 0, NULL, &event_);
+		clWaitForEvents(1, &event_);
+	timer.Stop(timer_id);
+	}	
 	void allochostptr_roundtrip_func()
 	{
 	timer.Start(timer_id);
@@ -528,12 +611,7 @@ public:
                                         (buffer_.ldc_ * buffer_.c_num_vectors_ +
                                             buffer_.offC_) * sizeof(T),
                                         buffer_.c_, &err);
-		xGemm_Function(false);
-		err = clEnqueueReadBuffer(queue_, buffer_.buf_c_, CL_TRUE,
-			                      buffer_.offC_ * sizeof(T), buffer_.ldc_ * buffer_.c_num_vectors_ *
-                                       sizeof(T),
-								  buffer_.c_, 0, NULL, &event_);
-		clWaitForEvents(1, &event_);
+		xGemm_Function(true);
 	timer.Stop(timer_id);
 	}
 	void copyhostptr_roundtrip_func()
@@ -554,7 +632,12 @@ public:
                                         (buffer_.ldc_ * buffer_.c_num_vectors_ +
                                             buffer_.offC_) * sizeof(T),
                                         buffer_.c_, &err);
-		xGemm_Function(true);
+		xGemm_Function(false);
+		err = clEnqueueReadBuffer(queue_, buffer_.buf_c_, CL_TRUE,
+			                      buffer_.offC_ * sizeof(T), buffer_.ldc_ * buffer_.c_num_vectors_ *
+                                       sizeof(T),
+								  buffer_.c_, 0, NULL, &event_);
+		clWaitForEvents(1, &event_);
 	timer.Stop(timer_id);
 	}
 	void usepersismem_roundtrip_func()
diff --git a/src/client/client.cpp b/src/client/client.cpp
index a55def3..1618609 100644
--- a/src/client/client.cpp
+++ b/src/client/client.cpp
@@ -102,7 +102,7 @@ int main(int argc, char *argv[])
     ( "diag", po::value<int>( &diag_option )->default_value(0), "0 = unit diagonal, 1 = non unit diagonal. only used with [list of function families]" ) // xtrsm xtrmm
     ( "profile,p", po::value<cl_uint>( &profileCount )->default_value(20), "Time and report the kernel speed (default: profiling off)" )
 	( "roundtrip", po::value<std::string>( &roundtrip )->default_value("noroundtrip"),"including the time of OpenCL memory allocation and transportation; options:roundtrip, noroundtrip(default)")
-	( "memalloc", po::value<std::string>( &memalloc )->default_value("default"),"setting the memory allocation flags for OpenCL; would not take effect if roundtrip time is not measured; options:default(default),alloc_host_ptr,use_host_ptr,copy_host_ptr,use_persistent_mem_amd")
+	( "memalloc", po::value<std::string>( &memalloc )->default_value("default"),"setting the memory allocation flags for OpenCL; would not take effect if roundtrip time is not measured; options:default(default),alloc_host_ptr,use_host_ptr,copy_host_ptr,use_persistent_mem_amd,rect_mem")
     ;
 
   po::variables_map vm;
@@ -534,6 +534,10 @@ int main(int argc, char *argv[])
 	{
 		my_function->usepersismem_roundtrip_func();
 	}
+	else if (memalloc=="rect_mem")
+	{
+		my_function->roundtrip_func_rect();
+	}
 	//my_function->reset_gpu_write_buffer();
 	my_function->releaseGPUBuffer_deleteCPUBuffer();
   }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git



More information about the debian-science-commits mailing list