[clblas] 64/75: Adding additional trsm samples

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Jan 24 23:30:47 UTC 2017


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/master
in repository clblas.

commit 69d38d947229e57668be51a40f9b3cd755d7320f
Author: Kent Knox <kent.knox at amd>
Date:   Mon Jan 16 12:11:29 2017 -0600

    Adding additional trsm samples
---
 src/samples/CMakeLists.txt    |  34 +++++---
 src/samples/example_ctrsm.c   | 177 +++++++++++++++++++++++++++++++++++++++
 src/samples/example_strsm.cpp | 188 ++++++++++++++++++++++++++++++++++++++++++
 src/tests/BlasBase.cpp        |   5 +-
 src/tests/cmdline.c           |   3 +-
 5 files changed, 392 insertions(+), 15 deletions(-)

diff --git a/src/samples/CMakeLists.txt b/src/samples/CMakeLists.txt
index 8422e65..53ed2fb 100644
--- a/src/samples/CMakeLists.txt
+++ b/src/samples/CMakeLists.txt
@@ -1,12 +1,12 @@
 # ########################################################################
 # Copyright 2013 Advanced Micro Devices, Inc.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 # http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,6 +19,8 @@ set(SSYMV_SAMPLE_SRC example_ssymv.c)
 set(SGEMM_SAMPLE_SRC example_sgemm.c)
 set(STRMM_SAMPLE_SRC example_strmm.c)
 set(STRSM_SAMPLE_SRC example_strsm.c)
+set(STRSM_SAMPLE_SRCPP example_strsm.cpp)
+set(CTRSM_SAMPLE_SRC example_ctrsm.c)
 set(SSYRK_SAMPLE_SRC example_ssyrk.c)
 set(SSYR2K_SAMPLE_SRC example_ssyr2k.c)
 
@@ -91,6 +93,14 @@ add_executable(example_strsm ${STRSM_SAMPLE_SRC})
 target_link_libraries(example_strsm ${OPENCL_LIBRARIES} clBLAS)
 set_property( TARGET example_strsm PROPERTY FOLDER "Samples")
 
+add_executable(example_strsm_cpp ${STRSM_SAMPLE_SRCPP})
+target_link_libraries(example_strsm_cpp ${OPENCL_LIBRARIES} clBLAS)
+set_property( TARGET example_strsm_cpp PROPERTY FOLDER "Samples")
+
+add_executable(example_ctrsm ${CTRSM_SAMPLE_SRC})
+target_link_libraries(example_ctrsm ${OPENCL_LIBRARIES} clBLAS)
+set_property( TARGET example_ctrsm PROPERTY FOLDER "Samples")
+
 add_executable(example_ssyrk ${SSYRK_SAMPLE_SRC})
 target_link_libraries(example_ssyrk ${OPENCL_LIBRARIES} clBLAS)
 set_property( TARGET example_ssyrk PROPERTY FOLDER "Samples")
@@ -267,11 +277,11 @@ else( )
     set( CLBLAS_EXAMPLE_INSTALL_DESTINATION share/clBLAS/samples)
 endif()
 install( TARGETS example_sgemm example_sgemv example_ssymv example_ssyrk
-         example_ssyr2k example_strmm example_strsm 
-         example_strmv example_strsv example_sger example_cher example_ssyr 
+         example_ssyr2k example_strmm example_strsm
+         example_strmv example_strsv example_sger example_cher example_ssyr
          example_ssyr2 example_cherk example_ssymm example_chemm
          example_stpmv example_chpmv example_stpsv example_sspmv example_sspr example_chpr
-         example_sspr2 example_zhpr2 
+         example_sspr2 example_zhpr2
          example_sgbmv example_stbmv example_ssbmv example_chbmv example_stbsv
          example_cher2k
          example_sswap example_sscal example_csscal example_scopy example_saxpy example_sdot
@@ -286,16 +296,16 @@ install( TARGETS example_sgemm example_sgemv example_ssymv example_ssyrk
 
 configure_file( "${PROJECT_SOURCE_DIR}/samples/CMakeLists.pack"
 		"${PROJECT_BINARY_DIR}/samples/CMakeLists.txt" COPYONLY )
-		
+
 if( WIN32 )
     set( CLBLAS_SAMPLE_INSTALL_DESTINATION samples)
 else( )
     set( CLBLAS_SAMPLE_INSTALL_DESTINATION share/clBLAS/samples/src)
 endif()
-		
+
 install(FILES
             example_sgemv.c
-            example_ssymv.c 
+            example_ssymv.c
             example_sgemm.c
             example_strmm.c
             example_strsm.c
@@ -303,11 +313,11 @@ install(FILES
             example_ssyr2k.c
 			example_strmv.c
 			example_strsv.c
-			example_sger.c 
-			example_ssyr.c 
+			example_sger.c
+			example_ssyr.c
 			example_ssyr2.c
 			example_ssymm.c
-			example_cher.c 
+			example_cher.c
             example_chemm.cpp
             example_cherk.cpp
             example_ssymm.c
diff --git a/src/samples/example_ctrsm.c b/src/samples/example_ctrsm.c
new file mode 100644
index 0000000..4466481
--- /dev/null
+++ b/src/samples/example_ctrsm.c
@@ -0,0 +1,177 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Include CLBLAS header. It automatically includes needed OpenCL header,
+ * so we can drop out explicit inclusion of cl.h header.
+ */
+#include <clBLAS.h>
+
+/* This example uses predefined matrices and their characteristics for
+ * simplicity purpose.
+ */
+static const clblasOrder order = clblasRowMajor;
+static const clblasSide side = clblasLeft;
+
+static const size_t M = 4;
+static const size_t N = 5;
+
+static const FloatComplex alpha = { 10, 0 };
+
+static const clblasTranspose transA = clblasNoTrans;
+static const clblasUplo uploA = clblasUpper;
+static const clblasDiag diagA = clblasNonUnit;
+static const FloatComplex A[] = {
+    { 11, 0 },{ 12, 0 },{ 13, 0 },{ 14, 0 },
+    { 0, 0 },{ 22, 0 },{ 23, 0 },{ 24, 0 },
+    { 0, 0 },{ 0, 0 },{ 33, 0 },{ 34, 0 },
+    { 0, 0 },{ 0, 0 },{ 0, 0 },{ 44, 0 }
+};
+static const size_t lda = 4;        /* i.e. lda = M */
+
+static FloatComplex B[] = {
+    { 11, 0 },{ 12, 0 },{ 13, 0 },{ 14, 0 },{ 15, 0 },
+    { 21, 0 },{ 22, 0 },{ 23, 0 },{ 24, 0 },{ 25, 0 },
+    { 31, 0 },{ 32, 0 },{ 33, 0 },{ 34, 0 },{ 35, 0 },
+    { 41, 0 },{ 42, 0 },{ 43, 0 },{ 44, 0 },{ 45, 0 },
+};
+static const size_t ldb = 5;        /* i.e. ldb = N */
+
+
+static FloatComplex result[20];         /* ldb*M */
+
+static const size_t off  = 1;
+static const size_t offA = 4 + 1;   /* M + off */
+static const size_t offB = 5 + 1;   /* N + off */
+
+static void
+printResult(const char* str)
+{
+    size_t i, j, nrows;
+
+    printf("%s:\n", str);
+
+    nrows = (sizeof(result) / sizeof(FloatComplex)) / ldb;
+    for (i = 0; i < nrows; i++) {
+        for (j = 0; j < ldb; j++) {
+            printf("%.5f ", result[i * ldb + j].x);
+        }
+        printf("\n");
+    }
+}
+
+int
+main(void)
+{
+    cl_int err;
+    cl_platform_id platform[] = { 0, 0 };
+    cl_device_id device = 0;
+    cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    cl_context ctx = 0;
+    cl_command_queue queue = 0;
+    cl_mem bufA, bufB;
+    cl_event event = NULL;
+    int ret = 0;
+
+    /* Setup OpenCL environment. */
+    err = clGetPlatformIDs(sizeof( platform ), &platform, NULL);
+    if (err != CL_SUCCESS) {
+        printf( "clGetPlatformIDs() failed with %d\n", err );
+        return 1;
+    }
+
+    err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
+    if (err != CL_SUCCESS) {
+        printf( "clGetDeviceIDs() failed with %d\n", err );
+        return 1;
+    }
+
+    props[1] = (cl_context_properties)platform;
+    ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
+    if (err != CL_SUCCESS) {
+        printf( "clCreateContext() failed with %d\n", err );
+        return 1;
+    }
+
+    queue = clCreateCommandQueue(ctx, device, 0, &err);
+    if (err != CL_SUCCESS) {
+        printf( "clCreateCommandQueue() failed with %d\n", err );
+        clReleaseContext(ctx);
+        return 1;
+    }
+
+    /* Setup clblas. */
+    err = clblasSetup();
+    if (err != CL_SUCCESS) {
+        printf("clblasSetup() failed with %d\n", err);
+        clReleaseCommandQueue(queue);
+        clReleaseContext(ctx);
+        return 1;
+    }
+
+    /* Prepare OpenCL memory objects and place matrices inside them. */
+    bufA = clCreateBuffer(ctx, CL_MEM_READ_ONLY, M * M * sizeof(*A),
+                          NULL, &err);
+    bufB = clCreateBuffer(ctx, CL_MEM_READ_WRITE, M * N * sizeof(*B),
+                          NULL, &err);
+
+    err = clEnqueueWriteBuffer(queue, bufA, CL_TRUE, 0,
+        M * M * sizeof(*A), A, 0, NULL, NULL);
+    err = clEnqueueWriteBuffer(queue, bufB, CL_TRUE, 0,
+        M * N * sizeof(*B), B, 0, NULL, NULL);
+
+    /* Call clblas function. Perform TRSM for the lower right sub-matrices */
+    err = clblasCtrsm(order, side, uploA, transA, diagA, M - off, N - off,
+                         alpha, bufA, offA, lda, bufB, offB, ldb, 1, &queue, 0,
+                         NULL, &event);
+    if (err != CL_SUCCESS) {
+        printf("clblasStrsmEx() failed with %d\n", err);
+        ret = 1;
+    }
+    else {
+        /* Wait for calculations to be finished. */
+        err = clWaitForEvents(1, &event);
+
+        /* Fetch results of calculations from GPU memory. */
+        err = clEnqueueReadBuffer(queue, bufB, CL_TRUE, 0,
+                                  M * N * sizeof(*result),
+                                  result, 0, NULL, NULL);
+
+        /* At this point you will get the result of STRSM placed in 'result' array. */
+        puts("");
+        printResult("clblasCtrsmEx result");
+    }
+
+    /* Release OpenCL events. */
+    clReleaseEvent(event);
+
+    /* Release OpenCL memory objects. */
+    clReleaseMemObject(bufB);
+    clReleaseMemObject(bufA);
+
+    /* Finalize work with clblas. */
+    clblasTeardown();
+
+    /* Release OpenCL working objects. */
+    clReleaseCommandQueue(queue);
+    clReleaseContext(ctx);
+
+    return ret;
+}
diff --git a/src/samples/example_strsm.cpp b/src/samples/example_strsm.cpp
new file mode 100644
index 0000000..da51514
--- /dev/null
+++ b/src/samples/example_strsm.cpp
@@ -0,0 +1,188 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Include CLBLAS header. It automatically includes needed OpenCL header,
+ * so we can drop out explicit inclusion of cl.h header.
+ */
+#include <clBLAS.h>
+
+/* This example uses predefined matrices and their characteristics for
+ * simplicity purpose.
+ */
+static const clblasOrder order = clblasColumnMajor;
+static const clblasSide side = clblasLeft;
+static const clblasTranspose transA = clblasNoTrans;
+static const clblasUplo uploA = clblasUpper;
+static const clblasDiag diagA = clblasNonUnit;
+
+static const cl_float alpha = 10;
+static const size_t M = 64;
+static const size_t N = 64;
+static const size_t lda = M;        /* i.e. lda = M */
+static const size_t ldb = N;        /* i.e. ldb = N */
+
+static cl_float A[lda * M];
+static cl_float B[ldb * N];
+static cl_float result[ldb * N];         /* ldb*N */
+
+static const size_t off  = 0;
+static const size_t offA = 0;   /* M + off */
+static const size_t offB = 0;   /* N + off */
+
+static void
+makeScaledIdentity(cl_float* matx, size_t M, size_t N, float scale )
+{
+    for( size_t i = 0; i < M; ++i )
+        for (size_t j = 0; j < N; ++j)
+        {
+            matx[i * M + j] = 0.0f;
+            if( i == j )
+                matx[i * M + j] = 1.0f * scale;
+        }
+
+}
+
+static void
+printResult(const char* str)
+{
+    size_t i, j, nrows;
+
+    printf("%s:\n", str);
+
+    nrows = (sizeof(result) / sizeof(cl_float)) / ldb;
+    for (i = 0; i < nrows; i++) {
+        for (j = 0; j < ldb; j++) {
+            printf("%.5e ", result[i * ldb + j]);
+        }
+        printf("\n");
+    }
+}
+
+int
+main(void)
+{
+    cl_int err;
+    // Increase platforms array for system needs; 2 covers most situations
+    cl_platform_id platforms[] = { 0,0 };
+    cl_device_id device = 0;
+    cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    cl_context ctx = 0;
+    cl_command_queue queue = 0;
+    cl_mem bufA, bufB;
+    cl_event event = NULL;
+    int ret = 0;
+
+    makeScaledIdentity( A, M, N, 1.0f );
+    makeScaledIdentity( B, M, N, 1.0f);
+    makeScaledIdentity( result, M, N, 0.0f);
+
+    /* Setup OpenCL environment. */
+    err = clGetPlatformIDs( sizeof( platforms )/ sizeof( cl_platform_id ), &platforms[0], NULL);
+    if (err != CL_SUCCESS) {
+        printf( "clGetPlatformIDs() failed with %d\n", err );
+        return 1;
+    }
+
+    // Change this statement to pick the desired platform under test
+    cl_platform_id test_platform = platforms[1];
+
+    //!!!  Change device type to validate; works on GPU, faults on CPU
+    err = clGetDeviceIDs(test_platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
+    if (err != CL_SUCCESS) {
+        printf( "clGetDeviceIDs() failed with %d\n", err );
+        return 1;
+    }
+
+    props[1] = (cl_context_properties)test_platform;
+    ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
+    if (err != CL_SUCCESS) {
+        printf( "clCreateContext() failed with %d\n", err );
+        return 1;
+    }
+
+    queue = clCreateCommandQueue(ctx, device, 0, &err);
+    if (err != CL_SUCCESS) {
+        printf( "clCreateCommandQueue() failed with %d\n", err );
+        clReleaseContext(ctx);
+        return 1;
+    }
+
+    /* Setup clblas. */
+    err = clblasSetup();
+    if (err != CL_SUCCESS) {
+        printf("clblasSetup() failed with %d\n", err);
+        clReleaseCommandQueue(queue);
+        clReleaseContext(ctx);
+        return 1;
+    }
+
+    /* Prepare OpenCL memory objects and place matrices inside them. */
+    bufA = clCreateBuffer(ctx, CL_MEM_READ_ONLY, lda * M * sizeof(*A),
+                          NULL, &err);
+    bufB = clCreateBuffer(ctx, CL_MEM_READ_WRITE, ldb * N * sizeof(*B),
+                          NULL, &err);
+
+    err = clEnqueueWriteBuffer(queue, bufA, CL_TRUE, 0,
+        lda * M * sizeof(*A), A, 0, NULL, NULL);
+    err = clEnqueueWriteBuffer(queue, bufB, CL_TRUE, 0,
+        ldb * N * sizeof(*B), B, 0, NULL, NULL);
+
+    /* Call clblas function. Perform TRSM for the lower right sub-matrices */
+    // A is identity matrix
+    // B is identity matrix
+    // Solving for identity matrices should yield an identity matrix scaled by alpha
+    err = clblasStrsm(order, side, uploA, transA, diagA, M - off, N - off,
+                         alpha, bufA, offA, lda, bufB, offB, ldb, 1, &queue, 0,
+                         NULL, &event);
+    if (err != CL_SUCCESS) {
+        printf("clblasStrsmEx() failed with %d\n", err);
+        ret = 1;
+    }
+    else {
+        /* Wait for calculations to be finished. */
+        err = clWaitForEvents(1, &event);
+
+        /* Fetch results of calculations from opencl memory. */
+        err = clEnqueueReadBuffer(queue, bufB, CL_TRUE, 0,
+                                  ldb * N * sizeof(*result),
+                                  result, 0, NULL, NULL);
+
+        // At this point, 'result' should contain a scaled identity matrix
+        puts("");
+        printResult("clblasStrsm result");
+    }
+
+    /* Release OpenCL events. */
+    clReleaseEvent(event);
+
+    /* Release OpenCL memory objects. */
+    clReleaseMemObject(bufB);
+    clReleaseMemObject(bufA);
+
+    /* Finalize work with clblas. */
+    clblasTeardown();
+
+    /* Release OpenCL working objects. */
+    clReleaseCommandQueue(queue);
+    clReleaseContext(ctx);
+
+    return ret;
+}
diff --git a/src/tests/BlasBase.cpp b/src/tests/BlasBase.cpp
index 73a6f5e..1bcc5d4 100644
--- a/src/tests/BlasBase.cpp
+++ b/src/tests/BlasBase.cpp
@@ -105,10 +105,11 @@ BlasBase::getDevice(cl_device_type type, const char* name,
 {
     cl_int err;
     cl_uint nrDevices, i, p;
-    cl_device_id *devices, result = NULL;
+    cl_device_id *devices = NULL;
+    cl_device_id result = 0;
     size_t sz;
     char *str;
-    cl_platform_id *platforms, selPlatform = NULL;
+    cl_platform_id* platforms = NULL;
     cl_uint nrPlatforms;
 
     nrPlatforms = getPlatforms(&platforms, &err);
diff --git a/src/tests/cmdline.c b/src/tests/cmdline.c
index c9519cc..235367d 100644
--- a/src/tests/cmdline.c
+++ b/src/tests/cmdline.c
@@ -18,6 +18,7 @@
 #include <string.h>         /* strcmp */
 #include <stdlib.h>         /* atoi, strtol */
 #include <stdio.h>          /* printf */
+#include <ctype.h>
 
 #include <cmdline.h>
 
@@ -92,7 +93,7 @@ doParseCmdLine(
         currArg = (const char*)argv[i];
         i++;
 
-        if (currArg[0] != '-') {
+        if ( (currArg[0] != '-') && isdigit( currArg[0] ) ){
             // some of size arguments
             switch (j) {
             case 0:

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git



More information about the debian-science-commits mailing list