[clblas] 64/75: Adding additional trsm samples
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Jan 24 23:30:47 UTC 2017
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/master
in repository clblas.
commit 69d38d947229e57668be51a40f9b3cd755d7320f
Author: Kent Knox <kent.knox at amd>
Date: Mon Jan 16 12:11:29 2017 -0600
Adding additional trsm samples
---
src/samples/CMakeLists.txt | 34 +++++---
src/samples/example_ctrsm.c | 177 +++++++++++++++++++++++++++++++++++++++
src/samples/example_strsm.cpp | 188 ++++++++++++++++++++++++++++++++++++++++++
src/tests/BlasBase.cpp | 5 +-
src/tests/cmdline.c | 3 +-
5 files changed, 392 insertions(+), 15 deletions(-)
diff --git a/src/samples/CMakeLists.txt b/src/samples/CMakeLists.txt
index 8422e65..53ed2fb 100644
--- a/src/samples/CMakeLists.txt
+++ b/src/samples/CMakeLists.txt
@@ -1,12 +1,12 @@
# ########################################################################
# Copyright 2013 Advanced Micro Devices, Inc.
-#
+#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,6 +19,8 @@ set(SSYMV_SAMPLE_SRC example_ssymv.c)
set(SGEMM_SAMPLE_SRC example_sgemm.c)
set(STRMM_SAMPLE_SRC example_strmm.c)
set(STRSM_SAMPLE_SRC example_strsm.c)
+set(STRSM_SAMPLE_SRCPP example_strsm.cpp)
+set(CTRSM_SAMPLE_SRC example_ctrsm.c)
set(SSYRK_SAMPLE_SRC example_ssyrk.c)
set(SSYR2K_SAMPLE_SRC example_ssyr2k.c)
@@ -91,6 +93,14 @@ add_executable(example_strsm ${STRSM_SAMPLE_SRC})
target_link_libraries(example_strsm ${OPENCL_LIBRARIES} clBLAS)
set_property( TARGET example_strsm PROPERTY FOLDER "Samples")
+add_executable(example_strsm_cpp ${STRSM_SAMPLE_SRCPP})
+target_link_libraries(example_strsm_cpp ${OPENCL_LIBRARIES} clBLAS)
+set_property( TARGET example_strsm_cpp PROPERTY FOLDER "Samples")
+
+add_executable(example_ctrsm ${CTRSM_SAMPLE_SRC})
+target_link_libraries(example_ctrsm ${OPENCL_LIBRARIES} clBLAS)
+set_property( TARGET example_ctrsm PROPERTY FOLDER "Samples")
+
add_executable(example_ssyrk ${SSYRK_SAMPLE_SRC})
target_link_libraries(example_ssyrk ${OPENCL_LIBRARIES} clBLAS)
set_property( TARGET example_ssyrk PROPERTY FOLDER "Samples")
@@ -267,11 +277,11 @@ else( )
set( CLBLAS_EXAMPLE_INSTALL_DESTINATION share/clBLAS/samples)
endif()
install( TARGETS example_sgemm example_sgemv example_ssymv example_ssyrk
- example_ssyr2k example_strmm example_strsm
- example_strmv example_strsv example_sger example_cher example_ssyr
+ example_ssyr2k example_strmm example_strsm
+ example_strmv example_strsv example_sger example_cher example_ssyr
example_ssyr2 example_cherk example_ssymm example_chemm
example_stpmv example_chpmv example_stpsv example_sspmv example_sspr example_chpr
- example_sspr2 example_zhpr2
+ example_sspr2 example_zhpr2
example_sgbmv example_stbmv example_ssbmv example_chbmv example_stbsv
example_cher2k
example_sswap example_sscal example_csscal example_scopy example_saxpy example_sdot
@@ -286,16 +296,16 @@ install( TARGETS example_sgemm example_sgemv example_ssymv example_ssyrk
configure_file( "${PROJECT_SOURCE_DIR}/samples/CMakeLists.pack"
"${PROJECT_BINARY_DIR}/samples/CMakeLists.txt" COPYONLY )
-
+
if( WIN32 )
set( CLBLAS_SAMPLE_INSTALL_DESTINATION samples)
else( )
set( CLBLAS_SAMPLE_INSTALL_DESTINATION share/clBLAS/samples/src)
endif()
-
+
install(FILES
example_sgemv.c
- example_ssymv.c
+ example_ssymv.c
example_sgemm.c
example_strmm.c
example_strsm.c
@@ -303,11 +313,11 @@ install(FILES
example_ssyr2k.c
example_strmv.c
example_strsv.c
- example_sger.c
- example_ssyr.c
+ example_sger.c
+ example_ssyr.c
example_ssyr2.c
example_ssymm.c
- example_cher.c
+ example_cher.c
example_chemm.cpp
example_cherk.cpp
example_ssymm.c
diff --git a/src/samples/example_ctrsm.c b/src/samples/example_ctrsm.c
new file mode 100644
index 0000000..4466481
--- /dev/null
+++ b/src/samples/example_ctrsm.c
@@ -0,0 +1,177 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Include CLBLAS header. It automatically includes needed OpenCL header,
+ * so we can drop out explicit inclusion of cl.h header.
+ */
+#include <clBLAS.h>
+
+/* This example uses predefined matrices and their characteristics for
+ * simplicity purpose.
+ */
+static const clblasOrder order = clblasRowMajor;
+static const clblasSide side = clblasLeft;
+
+static const size_t M = 4;
+static const size_t N = 5;
+
+static const FloatComplex alpha = { 10, 0 };
+
+static const clblasTranspose transA = clblasNoTrans;
+static const clblasUplo uploA = clblasUpper;
+static const clblasDiag diagA = clblasNonUnit;
+static const FloatComplex A[] = {
+ { 11, 0 },{ 12, 0 },{ 13, 0 },{ 14, 0 },
+ { 0, 0 },{ 22, 0 },{ 23, 0 },{ 24, 0 },
+ { 0, 0 },{ 0, 0 },{ 33, 0 },{ 34, 0 },
+ { 0, 0 },{ 0, 0 },{ 0, 0 },{ 44, 0 }
+};
+static const size_t lda = 4; /* i.e. lda = M */
+
+static FloatComplex B[] = {
+ { 11, 0 },{ 12, 0 },{ 13, 0 },{ 14, 0 },{ 15, 0 },
+ { 21, 0 },{ 22, 0 },{ 23, 0 },{ 24, 0 },{ 25, 0 },
+ { 31, 0 },{ 32, 0 },{ 33, 0 },{ 34, 0 },{ 35, 0 },
+ { 41, 0 },{ 42, 0 },{ 43, 0 },{ 44, 0 },{ 45, 0 },
+};
+static const size_t ldb = 5; /* i.e. ldb = N */
+
+
+static FloatComplex result[20]; /* ldb*M */
+
+static const size_t off = 1;
+static const size_t offA = 4 + 1; /* M + off */
+static const size_t offB = 5 + 1; /* N + off */
+
+static void
+printResult(const char* str)
+{
+ size_t i, j, nrows;
+
+ printf("%s:\n", str);
+
+ nrows = (sizeof(result) / sizeof(FloatComplex)) / ldb;
+ for (i = 0; i < nrows; i++) {
+ for (j = 0; j < ldb; j++) {
+ printf("%.5f ", result[i * ldb + j].x);
+ }
+ printf("\n");
+ }
+}
+
+int
+main(void)
+{
+ cl_int err;
+ cl_platform_id platform[] = { 0, 0 };
+ cl_device_id device = 0;
+ cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
+ cl_context ctx = 0;
+ cl_command_queue queue = 0;
+ cl_mem bufA, bufB;
+ cl_event event = NULL;
+ int ret = 0;
+
+ /* Setup OpenCL environment. */
+ err = clGetPlatformIDs(sizeof( platform ), &platform, NULL);
+ if (err != CL_SUCCESS) {
+ printf( "clGetPlatformIDs() failed with %d\n", err );
+ return 1;
+ }
+
+ err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
+ if (err != CL_SUCCESS) {
+ printf( "clGetDeviceIDs() failed with %d\n", err );
+ return 1;
+ }
+
+ props[1] = (cl_context_properties)platform;
+ ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
+ if (err != CL_SUCCESS) {
+ printf( "clCreateContext() failed with %d\n", err );
+ return 1;
+ }
+
+ queue = clCreateCommandQueue(ctx, device, 0, &err);
+ if (err != CL_SUCCESS) {
+ printf( "clCreateCommandQueue() failed with %d\n", err );
+ clReleaseContext(ctx);
+ return 1;
+ }
+
+ /* Setup clblas. */
+ err = clblasSetup();
+ if (err != CL_SUCCESS) {
+ printf("clblasSetup() failed with %d\n", err);
+ clReleaseCommandQueue(queue);
+ clReleaseContext(ctx);
+ return 1;
+ }
+
+ /* Prepare OpenCL memory objects and place matrices inside them. */
+ bufA = clCreateBuffer(ctx, CL_MEM_READ_ONLY, M * M * sizeof(*A),
+ NULL, &err);
+ bufB = clCreateBuffer(ctx, CL_MEM_READ_WRITE, M * N * sizeof(*B),
+ NULL, &err);
+
+ err = clEnqueueWriteBuffer(queue, bufA, CL_TRUE, 0,
+ M * M * sizeof(*A), A, 0, NULL, NULL);
+ err = clEnqueueWriteBuffer(queue, bufB, CL_TRUE, 0,
+ M * N * sizeof(*B), B, 0, NULL, NULL);
+
+ /* Call clblas function. Perform TRSM for the lower right sub-matrices */
+ err = clblasCtrsm(order, side, uploA, transA, diagA, M - off, N - off,
+ alpha, bufA, offA, lda, bufB, offB, ldb, 1, &queue, 0,
+ NULL, &event);
+ if (err != CL_SUCCESS) {
+ printf("clblasStrsmEx() failed with %d\n", err);
+ ret = 1;
+ }
+ else {
+ /* Wait for calculations to be finished. */
+ err = clWaitForEvents(1, &event);
+
+ /* Fetch results of calculations from GPU memory. */
+ err = clEnqueueReadBuffer(queue, bufB, CL_TRUE, 0,
+ M * N * sizeof(*result),
+ result, 0, NULL, NULL);
+
+ /* At this point you will get the result of STRSM placed in 'result' array. */
+ puts("");
+ printResult("clblasCtrsmEx result");
+ }
+
+ /* Release OpenCL events. */
+ clReleaseEvent(event);
+
+ /* Release OpenCL memory objects. */
+ clReleaseMemObject(bufB);
+ clReleaseMemObject(bufA);
+
+ /* Finalize work with clblas. */
+ clblasTeardown();
+
+ /* Release OpenCL working objects. */
+ clReleaseCommandQueue(queue);
+ clReleaseContext(ctx);
+
+ return ret;
+}
diff --git a/src/samples/example_strsm.cpp b/src/samples/example_strsm.cpp
new file mode 100644
index 0000000..da51514
--- /dev/null
+++ b/src/samples/example_strsm.cpp
@@ -0,0 +1,188 @@
+/* ************************************************************************
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ************************************************************************/
+
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Include CLBLAS header. It automatically includes needed OpenCL header,
+ * so we can drop out explicit inclusion of cl.h header.
+ */
+#include <clBLAS.h>
+
+/* This example uses predefined matrices and their characteristics for
+ * simplicity purpose.
+ */
+static const clblasOrder order = clblasColumnMajor;
+static const clblasSide side = clblasLeft;
+static const clblasTranspose transA = clblasNoTrans;
+static const clblasUplo uploA = clblasUpper;
+static const clblasDiag diagA = clblasNonUnit;
+
+static const cl_float alpha = 10;
+static const size_t M = 64;
+static const size_t N = 64;
+static const size_t lda = M; /* i.e. lda = M */
+static const size_t ldb = N; /* i.e. ldb = N */
+
+static cl_float A[lda * M];
+static cl_float B[ldb * N];
+static cl_float result[ldb * N]; /* ldb*N */
+
+static const size_t off = 0;
+static const size_t offA = 0; /* M + off */
+static const size_t offB = 0; /* N + off */
+
+static void
+makeScaledIdentity(cl_float* matx, size_t M, size_t N, float scale )
+{
+ for( size_t i = 0; i < M; ++i )
+ for (size_t j = 0; j < N; ++j)
+ {
+ matx[i * M + j] = 0.0f;
+ if( i == j )
+ matx[i * M + j] = 1.0f * scale;
+ }
+
+}
+
+static void
+printResult(const char* str)
+{
+ size_t i, j, nrows;
+
+ printf("%s:\n", str);
+
+ nrows = (sizeof(result) / sizeof(cl_float)) / ldb;
+ for (i = 0; i < nrows; i++) {
+ for (j = 0; j < ldb; j++) {
+ printf("%.5e ", result[i * ldb + j]);
+ }
+ printf("\n");
+ }
+}
+
+int
+main(void)
+{
+ cl_int err;
+ // Increase platforms array for system needs; 2 covers most situations
+ cl_platform_id platforms[] = { 0,0 };
+ cl_device_id device = 0;
+ cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
+ cl_context ctx = 0;
+ cl_command_queue queue = 0;
+ cl_mem bufA, bufB;
+ cl_event event = NULL;
+ int ret = 0;
+
+ makeScaledIdentity( A, M, N, 1.0f );
+ makeScaledIdentity( B, M, N, 1.0f);
+ makeScaledIdentity( result, M, N, 0.0f);
+
+ /* Setup OpenCL environment. */
+ err = clGetPlatformIDs( sizeof( platforms )/ sizeof( cl_platform_id ), &platforms[0], NULL);
+ if (err != CL_SUCCESS) {
+ printf( "clGetPlatformIDs() failed with %d\n", err );
+ return 1;
+ }
+
+ // Change this statement to pick the desired platform under test
+ cl_platform_id test_platform = platforms[1];
+
+ //!!! Change device type to validate; works on GPU, faults on CPU
+ err = clGetDeviceIDs(test_platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
+ if (err != CL_SUCCESS) {
+ printf( "clGetDeviceIDs() failed with %d\n", err );
+ return 1;
+ }
+
+ props[1] = (cl_context_properties)test_platform;
+ ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
+ if (err != CL_SUCCESS) {
+ printf( "clCreateContext() failed with %d\n", err );
+ return 1;
+ }
+
+ queue = clCreateCommandQueue(ctx, device, 0, &err);
+ if (err != CL_SUCCESS) {
+ printf( "clCreateCommandQueue() failed with %d\n", err );
+ clReleaseContext(ctx);
+ return 1;
+ }
+
+ /* Setup clblas. */
+ err = clblasSetup();
+ if (err != CL_SUCCESS) {
+ printf("clblasSetup() failed with %d\n", err);
+ clReleaseCommandQueue(queue);
+ clReleaseContext(ctx);
+ return 1;
+ }
+
+ /* Prepare OpenCL memory objects and place matrices inside them. */
+ bufA = clCreateBuffer(ctx, CL_MEM_READ_ONLY, lda * M * sizeof(*A),
+ NULL, &err);
+ bufB = clCreateBuffer(ctx, CL_MEM_READ_WRITE, ldb * N * sizeof(*B),
+ NULL, &err);
+
+ err = clEnqueueWriteBuffer(queue, bufA, CL_TRUE, 0,
+ lda * M * sizeof(*A), A, 0, NULL, NULL);
+ err = clEnqueueWriteBuffer(queue, bufB, CL_TRUE, 0,
+ ldb * N * sizeof(*B), B, 0, NULL, NULL);
+
+ /* Call clblas function. Perform TRSM for the lower right sub-matrices */
+ // A is identity matrix
+ // B is identity matrix
+ // Solving for identity matrices should yield an identity matrix scaled by alpha
+ err = clblasStrsm(order, side, uploA, transA, diagA, M - off, N - off,
+ alpha, bufA, offA, lda, bufB, offB, ldb, 1, &queue, 0,
+ NULL, &event);
+ if (err != CL_SUCCESS) {
+ printf("clblasStrsmEx() failed with %d\n", err);
+ ret = 1;
+ }
+ else {
+ /* Wait for calculations to be finished. */
+ err = clWaitForEvents(1, &event);
+
+ /* Fetch results of calculations from opencl memory. */
+ err = clEnqueueReadBuffer(queue, bufB, CL_TRUE, 0,
+ ldb * N * sizeof(*result),
+ result, 0, NULL, NULL);
+
+ // At this point, 'result' should contain a scaled identity matrix
+ puts("");
+ printResult("clblasStrsm result");
+ }
+
+ /* Release OpenCL events. */
+ clReleaseEvent(event);
+
+ /* Release OpenCL memory objects. */
+ clReleaseMemObject(bufB);
+ clReleaseMemObject(bufA);
+
+ /* Finalize work with clblas. */
+ clblasTeardown();
+
+ /* Release OpenCL working objects. */
+ clReleaseCommandQueue(queue);
+ clReleaseContext(ctx);
+
+ return ret;
+}
diff --git a/src/tests/BlasBase.cpp b/src/tests/BlasBase.cpp
index 73a6f5e..1bcc5d4 100644
--- a/src/tests/BlasBase.cpp
+++ b/src/tests/BlasBase.cpp
@@ -105,10 +105,11 @@ BlasBase::getDevice(cl_device_type type, const char* name,
{
cl_int err;
cl_uint nrDevices, i, p;
- cl_device_id *devices, result = NULL;
+ cl_device_id *devices = NULL;
+ cl_device_id result = 0;
size_t sz;
char *str;
- cl_platform_id *platforms, selPlatform = NULL;
+ cl_platform_id* platforms = NULL;
cl_uint nrPlatforms;
nrPlatforms = getPlatforms(&platforms, &err);
diff --git a/src/tests/cmdline.c b/src/tests/cmdline.c
index c9519cc..235367d 100644
--- a/src/tests/cmdline.c
+++ b/src/tests/cmdline.c
@@ -18,6 +18,7 @@
#include <string.h> /* strcmp */
#include <stdlib.h> /* atoi, strtol */
#include <stdio.h> /* printf */
+#include <ctype.h>
#include <cmdline.h>
@@ -92,7 +93,7 @@ doParseCmdLine(
currArg = (const char*)argv[i];
i++;
- if (currArg[0] != '-') {
+ if ( (currArg[0] != '-') && isdigit( currArg[0] ) ){
// some of size arguments
switch (j) {
case 0:
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git
More information about the debian-science-commits
mailing list