[clblas] 31/125: fix a bug in performance client. Since the performance is calculated by the average of 20 runs, clReleaseMemObject and delete buffer should be done in between runs instead of after all runs in the destructor. In certain device and size of matrix, the client will report a slower performance due to this inapproriate handling of memory. Also updated the travis CI build script. Force the client test to CPU from command line in stead of source code.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Fri May 29 06:57:19 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clblas.

commit a9856a66028b0fe5d229cd6ffc6a0ad5a9204662
Author: Timmy <timmy.liu at amd.com>
Date:   Thu Sep 26 10:26:10 2013 -0500

    fix a bug in performance client. Since the performance is calculated by the average of 20 runs, clReleaseMemObject and delete buffer should be done in between runs instead of after all runs in the destructor. In certain device and size of matrix, the client will report a slower performance due to this inapproriate handling of memory. Also updated the travis CI build script. Force the client test to CPU from command line in stead of source code.
---
 .travis.yml                  |  2 +-
 src/client/clfunc_common.hpp |  2 +-
 src/client/clfunc_xgemm.hpp  | 23 ++++++++++++++---------
 src/client/clfunc_xgemv.hpp  |  6 ++++++
 src/client/clfunc_xger.hpp   |  6 ++++++
 src/client/clfunc_xgerc.hpp  |  7 ++++++-
 src/client/clfunc_xgeru.hpp  |  7 ++++++-
 src/client/clfunc_xhemm.hpp  |  6 ++++++
 src/client/clfunc_xhemv.hpp  |  7 ++++++-
 src/client/clfunc_xher.hpp   |  7 ++++++-
 src/client/clfunc_xher2.hpp  |  6 ++++++
 src/client/clfunc_xsymm.hpp  | 17 +++++++++++------
 src/client/clfunc_xsymv.hpp  |  6 ++++++
 src/client/clfunc_xsyr.hpp   |  6 ++++++
 src/client/clfunc_xsyr2.hpp  |  7 ++++++-
 src/client/clfunc_xsyr2k.hpp |  7 ++++++-
 src/client/clfunc_xsyrk.hpp  |  6 ++++++
 src/client/clfunc_xtrmm.hpp  | 17 +++++++++++------
 src/client/clfunc_xtrmv.hpp  |  6 ++++++
 src/client/clfunc_xtrsm.hpp  | 17 +++++++++++------
 src/client/clfunc_xtrsv.hpp  |  6 ++++++
 src/client/client.cpp        | 10 ++++++----
 22 files changed, 145 insertions(+), 39 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 1039bc4..725e202 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -30,7 +30,7 @@ script:
 # Run a simple test to validate that the build works; CPU device in a VM
   - cd client
   - export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clBLAS/package/lib64:${LD_LIBRARY_PATH}
-  - ./client 
+  - ./client --cpu
 
 after_success:
   - cd ${TRAVIS_BUILD_DIR}/bin/clBLAS
diff --git a/src/client/clfunc_common.hpp b/src/client/clfunc_common.hpp
index 4876daf..293a3b6 100644
--- a/src/client/clfunc_common.hpp
+++ b/src/client/clfunc_common.hpp
@@ -313,7 +313,7 @@ public:
                               size_t M, size_t N, size_t K, size_t lda,
                               size_t ldb, size_t ldc, size_t offA, size_t offBX,
                               size_t offCY, double alpha, double beta) = 0;
-
+	virtual void releaseGPUBuffer_deleteCPUBuffer()=0;
     StatisticalTimer& timer;
     StatisticalTimer::sTimerID timer_id;
 
diff --git a/src/client/clfunc_xgemm.hpp b/src/client/clfunc_xgemm.hpp
index 17223a6..c5f706c 100644
--- a/src/client/clfunc_xgemm.hpp
+++ b/src/client/clfunc_xgemm.hpp
@@ -62,15 +62,6 @@ public:
 
     ~xGemm()
     {
-        delete buffer_.a_;
-        delete buffer_.b_;
-        delete buffer_.c_;
-        OPENCL_V_THROW( clReleaseMemObject(buffer_.buf_a_),
-                        "releasing buffer A");
-        OPENCL_V_THROW( clReleaseMemObject(buffer_.buf_b_),
-                        "releasing buffer B");
-        OPENCL_V_THROW( clReleaseMemObject(buffer_.buf_c_),
-                        "releasing buffer C");
     }
 
     void call_func()
@@ -659,6 +650,20 @@ public:
         buffer_.c_ = new T[buffer_.ldc_*buffer_.c_num_vectors_ ];
 
     }
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		delete buffer_.a_;
+        delete buffer_.b_;
+        delete buffer_.c_;
+        OPENCL_V_THROW( clReleaseMemObject(buffer_.buf_a_),
+                        "releasing buffer A");
+        OPENCL_V_THROW( clReleaseMemObject(buffer_.buf_b_),
+                        "releasing buffer B");
+        OPENCL_V_THROW( clReleaseMemObject(buffer_.buf_c_),
+                        "releasing buffer C");
+	}
 
 protected:
     void initialize_scalars(double alpha, double beta)
diff --git a/src/client/clfunc_xgemv.hpp b/src/client/clfunc_xgemv.hpp
index 2d1d5b0..cc85109 100644
--- a/src/client/clfunc_xgemv.hpp
+++ b/src/client/clfunc_xgemv.hpp
@@ -286,6 +286,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 
 protected:
     void initialize_scalars(double alpha, double beta)
diff --git a/src/client/clfunc_xger.hpp b/src/client/clfunc_xger.hpp
index 05899cd..d2f36db 100644
--- a/src/client/clfunc_xger.hpp
+++ b/src/client/clfunc_xger.hpp
@@ -217,6 +217,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 
 protected:
   void initialize_scalars(double alpha, double beta)
diff --git a/src/client/clfunc_xgerc.hpp b/src/client/clfunc_xgerc.hpp
index 829d938..ed39f79 100644
--- a/src/client/clfunc_xgerc.hpp
+++ b/src/client/clfunc_xgerc.hpp
@@ -98,7 +98,12 @@ public:
 		{}
 
   void call_func();
-
+  void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to do
+	}
 protected:
   void initialize_scalars(double alpha, double beta)
   {
diff --git a/src/client/clfunc_xgeru.hpp b/src/client/clfunc_xgeru.hpp
index 8c7d02c..dbcecc9 100644
--- a/src/client/clfunc_xgeru.hpp
+++ b/src/client/clfunc_xgeru.hpp
@@ -94,7 +94,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
-
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 protected:
 protected:
   void initialize_scalars(double alpha, double beta)
diff --git a/src/client/clfunc_xhemm.hpp b/src/client/clfunc_xhemm.hpp
index 8e46d1e..8a0c555 100644
--- a/src/client/clfunc_xhemm.hpp
+++ b/src/client/clfunc_xhemm.hpp
@@ -120,6 +120,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to do
+	}
 
 protected:
 protected:
diff --git a/src/client/clfunc_xhemv.hpp b/src/client/clfunc_xhemv.hpp
index 570c3fc..6211114 100644
--- a/src/client/clfunc_xhemv.hpp
+++ b/src/client/clfunc_xhemv.hpp
@@ -95,7 +95,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
-
+  	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to do
+	}
 protected:
 protected:
   void initialize_scalars(double alpha, double beta)
diff --git a/src/client/clfunc_xher.hpp b/src/client/clfunc_xher.hpp
index e624b55..5144b22 100644
--- a/src/client/clfunc_xher.hpp
+++ b/src/client/clfunc_xher.hpp
@@ -90,7 +90,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
-
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to do
+	}
 protected:
 protected:
   void initialize_scalars(double alpha, double beta)
diff --git a/src/client/clfunc_xher2.hpp b/src/client/clfunc_xher2.hpp
index 27d95f3..aec7cc8 100644
--- a/src/client/clfunc_xher2.hpp
+++ b/src/client/clfunc_xher2.hpp
@@ -94,6 +94,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to do
+	}
 protected:
 protected:
   void initialize_scalars(double alpha, double beta)
diff --git a/src/client/clfunc_xsymm.hpp b/src/client/clfunc_xsymm.hpp
index e9fe981..d067870 100644
--- a/src/client/clfunc_xsymm.hpp
+++ b/src/client/clfunc_xsymm.hpp
@@ -58,12 +58,6 @@ public:
 
   ~xSymm()
   {
-    delete buffer.cpuA;
-    delete buffer.cpuB;
-    delete buffer.cpuC;
-    OPENCL_V_THROW( clReleaseMemObject(buffer.A), "releasing buffer A");
-    OPENCL_V_THROW( clReleaseMemObject(buffer.B), "releasing buffer B");
-    OPENCL_V_THROW( clReleaseMemObject(buffer.C), "releasing buffer C");
   }
 
   double gflops()
@@ -212,6 +206,17 @@ public:
   buffer.cpuC = new T[buffer.N * buffer.ldc];
   buffer.cpuA = new T[buffer.a_num_vectors * buffer.lda];
   }
+  	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		delete buffer.cpuA;
+		delete buffer.cpuB;
+		delete buffer.cpuC;
+		OPENCL_V_THROW( clReleaseMemObject(buffer.A), "releasing buffer A");
+		OPENCL_V_THROW( clReleaseMemObject(buffer.B), "releasing buffer B");
+		OPENCL_V_THROW( clReleaseMemObject(buffer.C), "releasing buffer C");
+	}
 protected:
   void initialize_scalars(double alpha, double beta)
   {
diff --git a/src/client/clfunc_xsymv.hpp b/src/client/clfunc_xsymv.hpp
index 625c7ec..c928541 100644
--- a/src/client/clfunc_xsymv.hpp
+++ b/src/client/clfunc_xsymv.hpp
@@ -209,6 +209,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 protected:
     void initialize_scalars(double alpha, double beta)
     {
diff --git a/src/client/clfunc_xsyr.hpp b/src/client/clfunc_xsyr.hpp
index 172032c..4c70e69 100644
--- a/src/client/clfunc_xsyr.hpp
+++ b/src/client/clfunc_xsyr.hpp
@@ -90,6 +90,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+        //to-do
+	}
 
 protected:
 protected:
diff --git a/src/client/clfunc_xsyr2.hpp b/src/client/clfunc_xsyr2.hpp
index 761c616..9977d08 100644
--- a/src/client/clfunc_xsyr2.hpp
+++ b/src/client/clfunc_xsyr2.hpp
@@ -94,7 +94,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
-
+ 	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 protected:
 protected:
   void initialize_scalars(double alpha, double beta)
diff --git a/src/client/clfunc_xsyr2k.hpp b/src/client/clfunc_xsyr2k.hpp
index 4faa399..9fb3381 100644
--- a/src/client/clfunc_xsyr2k.hpp
+++ b/src/client/clfunc_xsyr2k.hpp
@@ -376,7 +376,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
-
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 protected:
     void initialize_scalars(double alpha, double beta)
     {
diff --git a/src/client/clfunc_xsyrk.hpp b/src/client/clfunc_xsyrk.hpp
index 5bfd0e3..ec842e2 100644
--- a/src/client/clfunc_xsyrk.hpp
+++ b/src/client/clfunc_xsyrk.hpp
@@ -293,6 +293,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 protected:
     void initialize_scalars(double alpha, double beta)
     {
diff --git a/src/client/clfunc_xtrmm.hpp b/src/client/clfunc_xtrmm.hpp
index d47ddfd..6803457 100644
--- a/src/client/clfunc_xtrmm.hpp
+++ b/src/client/clfunc_xtrmm.hpp
@@ -57,12 +57,6 @@ public:
 
     ~xTrmm()
     {
-        delete buffer_.a_;
-        delete buffer_.b_;
-        OPENCL_V_THROW(clReleaseMemObject(buffer_.buf_a_),
-                       "releasing buffer A");
-        OPENCL_V_THROW(clReleaseMemObject(buffer_.buf_b_),
-                       "releasing buffer B");
     }
 
     void call_func()
@@ -450,6 +444,17 @@ public:
         buffer_.a_ = new T[buffer_.lda_*buffer_.a_num_vectors_];
         buffer_.b_ = new T[buffer_.ldb_*buffer_.b_num_vectors_];
 	}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+        delete buffer_.a_;
+        delete buffer_.b_;
+        OPENCL_V_THROW(clReleaseMemObject(buffer_.buf_a_),
+                       "releasing buffer A");
+        OPENCL_V_THROW(clReleaseMemObject(buffer_.buf_b_),
+                       "releasing buffer B");
+	}
 protected:
     void initialize_scalars(double alpha, double beta)
     {
diff --git a/src/client/clfunc_xtrmv.hpp b/src/client/clfunc_xtrmv.hpp
index 725e9f3..80d5004 100644
--- a/src/client/clfunc_xtrmv.hpp
+++ b/src/client/clfunc_xtrmv.hpp
@@ -225,6 +225,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 protected:
   void initialize_scalars(double alpha, double beta)
   {
diff --git a/src/client/clfunc_xtrsm.hpp b/src/client/clfunc_xtrsm.hpp
index 8ae85c3..7a86be9 100644
--- a/src/client/clfunc_xtrsm.hpp
+++ b/src/client/clfunc_xtrsm.hpp
@@ -57,12 +57,6 @@ public:
 
     ~xTrsm()
     {
-        delete buffer_.a_;
-        delete buffer_.b_;
-        OPENCL_V_THROW( clReleaseMemObject(buffer_.buf_a_),
-                        "releasing buffer A");
-        OPENCL_V_THROW( clReleaseMemObject(buffer_.buf_b_),
-                        "releasing buffer B");
     }
 
     void call_func()
@@ -456,6 +450,17 @@ public:
         buffer_.a_ = new T[buffer_.lda_*buffer_.a_num_vectors_];
         buffer_.b_ = new T[buffer_.ldb_*buffer_.b_num_vectors_];
 	}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+        delete buffer_.a_;
+        delete buffer_.b_;
+        OPENCL_V_THROW(clReleaseMemObject(buffer_.buf_a_),
+                       "releasing buffer A");
+        OPENCL_V_THROW(clReleaseMemObject(buffer_.buf_b_),
+                       "releasing buffer B");
+	}
 protected:
     void initialize_scalars(double alpha, double beta)
     {
diff --git a/src/client/clfunc_xtrsv.hpp b/src/client/clfunc_xtrsv.hpp
index f0b728a..4eb0e5b 100644
--- a/src/client/clfunc_xtrsv.hpp
+++ b/src/client/clfunc_xtrsv.hpp
@@ -218,6 +218,12 @@ public:
                       size_t ldc, size_t offA, size_t offBX, size_t offCY,
                       double alpha, double beta)
 		{}
+	void releaseGPUBuffer_deleteCPUBuffer()
+	{
+		//this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
+		//need to do this before we eventually hit the destructor
+		//to-do
+	}
 protected:
   void initialize_scalars(double alpha, double beta)
   {
diff --git a/src/client/client.cpp b/src/client/client.cpp
index 1bf2454..74a8eb8 100644
--- a/src/client/client.cpp
+++ b/src/client/client.cpp
@@ -51,7 +51,7 @@ int main(int argc, char *argv[])
   cl_double beta;
   cl_uint profileCount;
   cl_uint commandQueueFlags = 0;
-  cl_device_type deviceType = CL_DEVICE_TYPE_CPU;
+  cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
   int order_option;
   //clblasOrder order;
   //clblasTranspose transA;
@@ -484,7 +484,8 @@ int main(int argc, char *argv[])
 	my_function->read_gpu_buffer();
     my_function->reset_gpu_write_buffer();*/
 	my_function->roundtrip_func();
-	my_function->reset_gpu_write_buffer();
+	//my_function->reset_gpu_write_buffer();
+	my_function->releaseGPUBuffer_deleteCPUBuffer();
   }
 
   if( commandQueueFlags & CL_QUEUE_PROFILING_ENABLE )
@@ -512,7 +513,8 @@ int main(int argc, char *argv[])
     my_function->initialize_gpu_buffer();
     my_function->call_func();
 	my_function->read_gpu_buffer();
-    my_function->reset_gpu_write_buffer();
+    //my_function->reset_gpu_write_buffer();
+	my_function->releaseGPUBuffer_deleteCPUBuffer();
   }
 
   if( commandQueueFlags & CL_QUEUE_PROFILING_ENABLE )
@@ -525,7 +527,7 @@ int main(int argc, char *argv[])
       std::endl;
   }
   }
-
+  delete my_function;
   return 0;
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git



More information about the debian-science-commits mailing list