[clblas] 59/125: enable rect read/write for gemm
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Fri May 29 06:57:22 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clblas.
commit 15c03be40361c56bc5109ced72864607077efc00
Author: Timmy <timmy.liu at amd.com>
Date: Fri Mar 14 16:04:27 2014 -0500
enable rect read/write for gemm
---
src/client/clfunc_common.hpp | 1 +
src/client/clfunc_xgemm.hpp | 97 ++++++++++++++++++++++++++++++++++++++++----
src/client/client.cpp | 6 ++-
3 files changed, 96 insertions(+), 8 deletions(-)
diff --git a/src/client/clfunc_common.hpp b/src/client/clfunc_common.hpp
index 5f73613..bda1186 100644
--- a/src/client/clfunc_common.hpp
+++ b/src/client/clfunc_common.hpp
@@ -313,6 +313,7 @@ public:
virtual void reset_gpu_write_buffer() = 0;
virtual void read_gpu_buffer() = 0;
virtual void roundtrip_func() = 0;
+ virtual void roundtrip_func_rect() {}
virtual void allochostptr_roundtrip_func() {}
virtual void usehostptr_roundtrip_func() {}
virtual void copyhostptr_roundtrip_func() {}
diff --git a/src/client/clfunc_xgemm.hpp b/src/client/clfunc_xgemm.hpp
index 9e6836d..f5552b2 100644
--- a/src/client/clfunc_xgemm.hpp
+++ b/src/client/clfunc_xgemm.hpp
@@ -454,6 +454,89 @@ public:
clWaitForEvents(1, &event_);
timer.Stop(timer_id);
}
+ void roundtrip_func_rect()
+ {
+ timer.Start(timer_id);
+ cl_int err;
+ //rect
+ size_t a_buffer_origin[3] = {0,0,0};
+ size_t a_host_origin[3] = {0,0,0};
+ size_t a_region[3] = {buffer_.m_*sizeof(T),buffer_.k_,1};
+ size_t a_buffer_row_pitch=0*sizeof(T);//lda
+ size_t a_buffer_slice_pitch=0;
+ size_t a_host_row_pitch=buffer_.lda_*sizeof(T);
+ size_t a_host_slice_pitch=0;
+
+ size_t b_buffer_origin[3] = {0,0,0};
+ size_t b_host_origin[3] = {0,0,0};
+ size_t b_region[3] = {buffer_.k_*sizeof(T),buffer_.n_,1};
+ size_t b_buffer_row_pitch=0*sizeof(T);//ldb
+ size_t b_buffer_slice_pitch=0;
+ size_t b_host_row_pitch=buffer_.ldb_*sizeof(T);
+ size_t b_host_slice_pitch=0;
+
+ size_t c_buffer_origin[3] = {0,0,0};
+ size_t c_host_origin[3] = {0,0,0};
+ size_t c_region[3] = {buffer_.m_*sizeof(T),buffer_.n_,1};
+ size_t c_buffer_row_pitch=0*sizeof(T);//ldc
+ size_t c_buffer_slice_pitch=0;
+ size_t c_host_row_pitch=buffer_.ldc_*sizeof(T);
+ size_t c_host_slice_pitch=0;
+
+ buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
+ (buffer_.k_*buffer_.m_ +
+ buffer_.offA_) * sizeof(T),
+ NULL, &err);
+
+ buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
+ (buffer_.k_ * buffer_.n_ +
+ buffer_.offB_) * sizeof(T),
+ NULL, &err);
+
+ buffer_.buf_c_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE,
+ (buffer_.m_ * buffer_.n_ +
+ buffer_.offC_) * sizeof(T),
+ NULL, &err);
+ /*
+ err = clEnqueueWriteBuffer(queue_, buffer_.buf_a_, CL_TRUE,
+ buffer_.offA_ * sizeof(T),
+ buffer_.lda_ * buffer_.a_num_vectors_ *
+ sizeof(T),
+ buffer_.a_, 0, NULL, NULL);
+
+ err = clEnqueueWriteBuffer(queue_, buffer_.buf_b_, CL_TRUE,
+ buffer_.offB_ * sizeof(T),
+ buffer_.ldb_ * buffer_.b_num_vectors_ *
+ sizeof(T),
+ buffer_.b_, 0, NULL, NULL);
+
+ err = clEnqueueWriteBuffer(queue_, buffer_.buf_c_, CL_TRUE,
+ buffer_.offC_ * sizeof(T),
+ buffer_.ldc_ * buffer_.c_num_vectors_ *
+ sizeof(T),
+ buffer_.c_, 0, NULL, NULL);*/
+ err = clEnqueueWriteBufferRect(queue_, buffer_.buf_a_, CL_TRUE, a_buffer_origin, a_host_origin, a_region, a_buffer_row_pitch,
+ a_buffer_slice_pitch, a_host_row_pitch, a_host_slice_pitch, buffer_.a_, 0, NULL, NULL);
+ err = clEnqueueWriteBufferRect(queue_, buffer_.buf_b_, CL_TRUE, b_buffer_origin, b_host_origin, b_region, b_buffer_row_pitch,
+ b_buffer_slice_pitch, b_host_row_pitch, b_host_slice_pitch, buffer_.b_, 0, NULL, NULL);
+ err = clEnqueueWriteBufferRect(queue_, buffer_.buf_c_, CL_TRUE, c_buffer_origin, c_host_origin, c_region, c_buffer_row_pitch,
+ c_buffer_slice_pitch, c_host_row_pitch, c_host_slice_pitch, buffer_.c_, 0, NULL, NULL);
+
+ buffer_.lda_ = 0;
+ buffer_.ldb_ = 0;
+ buffer_.ldc_ = 0;
+ xGemm_Function(false);
+ /*
+ err = clEnqueueReadBuffer(queue_, buffer_.buf_c_, CL_TRUE,
+ buffer_.offC_ * sizeof(T), buffer_.ldc_ * buffer_.c_num_vectors_ *
+ sizeof(T),
+ buffer_.c_, 0, NULL, &event_);
+ */
+ err = ::clEnqueueReadBufferRect(queue_, buffer_.buf_c_, CL_TRUE, c_buffer_origin, c_host_origin, c_region, c_buffer_row_pitch,
+ c_buffer_slice_pitch, c_host_row_pitch, c_host_slice_pitch, buffer_.c_, 0, NULL, &event_);
+ clWaitForEvents(1, &event_);
+ timer.Stop(timer_id);
+ }
void allochostptr_roundtrip_func()
{
timer.Start(timer_id);
@@ -528,12 +611,7 @@ public:
(buffer_.ldc_ * buffer_.c_num_vectors_ +
buffer_.offC_) * sizeof(T),
buffer_.c_, &err);
- xGemm_Function(false);
- err = clEnqueueReadBuffer(queue_, buffer_.buf_c_, CL_TRUE,
- buffer_.offC_ * sizeof(T), buffer_.ldc_ * buffer_.c_num_vectors_ *
- sizeof(T),
- buffer_.c_, 0, NULL, &event_);
- clWaitForEvents(1, &event_);
+ xGemm_Function(true);
timer.Stop(timer_id);
}
void copyhostptr_roundtrip_func()
@@ -554,7 +632,12 @@ public:
(buffer_.ldc_ * buffer_.c_num_vectors_ +
buffer_.offC_) * sizeof(T),
buffer_.c_, &err);
- xGemm_Function(true);
+ xGemm_Function(false);
+ err = clEnqueueReadBuffer(queue_, buffer_.buf_c_, CL_TRUE,
+ buffer_.offC_ * sizeof(T), buffer_.ldc_ * buffer_.c_num_vectors_ *
+ sizeof(T),
+ buffer_.c_, 0, NULL, &event_);
+ clWaitForEvents(1, &event_);
timer.Stop(timer_id);
}
void usepersismem_roundtrip_func()
diff --git a/src/client/client.cpp b/src/client/client.cpp
index a55def3..1618609 100644
--- a/src/client/client.cpp
+++ b/src/client/client.cpp
@@ -102,7 +102,7 @@ int main(int argc, char *argv[])
( "diag", po::value<int>( &diag_option )->default_value(0), "0 = unit diagonal, 1 = non unit diagonal. only used with [list of function families]" ) // xtrsm xtrmm
( "profile,p", po::value<cl_uint>( &profileCount )->default_value(20), "Time and report the kernel speed (default: profiling off)" )
( "roundtrip", po::value<std::string>( &roundtrip )->default_value("noroundtrip"),"including the time of OpenCL memory allocation and transportation; options:roundtrip, noroundtrip(default)")
- ( "memalloc", po::value<std::string>( &memalloc )->default_value("default"),"setting the memory allocation flags for OpenCL; would not take effect if roundtrip time is not measured; options:default(default),alloc_host_ptr,use_host_ptr,copy_host_ptr,use_persistent_mem_amd")
+ ( "memalloc", po::value<std::string>( &memalloc )->default_value("default"),"setting the memory allocation flags for OpenCL; would not take effect if roundtrip time is not measured; options:default(default),alloc_host_ptr,use_host_ptr,copy_host_ptr,use_persistent_mem_amd,rect_mem")
;
po::variables_map vm;
@@ -534,6 +534,10 @@ int main(int argc, char *argv[])
{
my_function->usepersismem_roundtrip_func();
}
+ else if (memalloc=="rect_mem")
+ {
+ my_function->roundtrip_func_rect();
+ }
//my_function->reset_gpu_write_buffer();
my_function->releaseGPUBuffer_deleteCPUBuffer();
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list