[clblas] 34/54: Add cl_khr_fp64 when using double precision

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jan 14 20:07:36 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clblas.

commit f7397fea7588c45ef0a4875fd007387b583fcd3c
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Tue Dec 15 18:08:53 2015 -0500

    Add cl_khr_fp64 when using double precision
---
 src/library/blas/AutoGemm/KernelOpenCL.py                          | 7 +++++++
 .../UserGemmKernelSources/dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp | 1 +
 .../UserGemmKernelSources/dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp | 1 +
 .../UserGemmKernelSources/dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp | 1 +
 .../UserGemmKernelSources/dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp | 1 +
 .../UserGemmKernelSources/dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp | 1 +
 .../UserGemmKernelSources/dgemm_Col_TN_B1_MX048_NX048_KX08_src.cpp | 1 +
 7 files changed, 13 insertions(+)

diff --git a/src/library/blas/AutoGemm/KernelOpenCL.py b/src/library/blas/AutoGemm/KernelOpenCL.py
index 3dc80fd..7b48746 100644
--- a/src/library/blas/AutoGemm/KernelOpenCL.py
+++ b/src/library/blas/AutoGemm/KernelOpenCL.py
@@ -26,6 +26,13 @@ def makeOpenCLKernelString(kernel):
   kStr += endLine
 
   ####################################
+  # Double precision pragma
+  prec = kernel.getName()[0].lower()
+  if prec == "d" or prec == "z":
+    kStr += endLine
+    kStr += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" + endLine
+
+  ####################################
   # kernel parameters
   kStr += endLine
   kStr += "/* kernel parameters */" + endLine
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp
index a5ffc44..6be60e1 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B0_MX048_NX048_KX08_src.cpp
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_NN_B0_MX048_NX048_KX08_microTileNumCols = 6;
 const unsigned int dgemm_Col_NN_B0_MX048_NX048_KX08_unroll = 8;
 
 const char * const dgemm_Col_NN_B0_MX048_NX048_KX08_src = STRINGIFY(
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n
 
 #define  M6x6 \
             rA[0] = lA[offA + 0];\
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp
index 7cab452..5b48881 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NN_B1_MX048_NX048_KX08_src.cpp
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_NN_B1_MX048_NX048_KX08_microTileNumCols = 6;
 const unsigned int dgemm_Col_NN_B1_MX048_NX048_KX08_unroll = 8;
 
 const char * const dgemm_Col_NN_B1_MX048_NX048_KX08_src = STRINGIFY(
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable   \n
 
 #define  M6x6 \
             rA[0] = lA[offA + 0];                       \
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp
index dc96015..4130062 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B0_MX048_NX048_KX08_src.cpp
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_NT_B0_MX048_NX048_KX08_microTileNumCols = 6;
 const unsigned int dgemm_Col_NT_B0_MX048_NX048_KX08_unroll = 8;
 
 const char * const dgemm_Col_NT_B0_MX048_NX048_KX08_src = STRINGIFY(
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable   \n
 \n
 \ntypedef union _GPtr {
 \n  __global float *f;
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp
index 114e312..1ccd29d 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_NT_B1_MX048_NX048_KX08_src.cpp
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_NT_B1_MX048_NX048_KX08_microTileNumCols = 6;
 const unsigned int dgemm_Col_NT_B1_MX048_NX048_KX08_unroll = 8;
 
 const char * const dgemm_Col_NT_B1_MX048_NX048_KX08_src = STRINGIFY(
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable   \n
 \n
 \ntypedef union _GPtr {
 \n  __global float *f;
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp
index d456b8c..91c7a9a 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B0_MX048_NX048_KX08_src.cpp
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_TN_B0_MX048_NX048_KX08_microTileNumCols = 6;
 const unsigned int dgemm_Col_TN_B0_MX048_NX048_KX08_unroll = 8;
 
 const char * const dgemm_Col_TN_B0_MX048_NX048_KX08_src = STRINGIFY(
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable   \n
 
 __attribute__( (reqd_work_group_size(8, 8, 1)) )
 __kernel void dgemm_Col_TN_B0_MX048_NX048_KX08_src (
diff --git a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B1_MX048_NX048_KX08_src.cpp b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B1_MX048_NX048_KX08_src.cpp
index 30e8331..79b0d0e 100644
--- a/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B1_MX048_NX048_KX08_src.cpp
+++ b/src/library/blas/AutoGemm/UserGemmKernelSources/dgemm_Col_TN_B1_MX048_NX048_KX08_src.cpp
@@ -18,6 +18,7 @@ const unsigned int dgemm_Col_TN_B1_MX048_NX048_KX08_microTileNumCols = 6;
 const unsigned int dgemm_Col_TN_B1_MX048_NX048_KX08_unroll = 8;
 
 const char * const dgemm_Col_TN_B1_MX048_NX048_KX08_src = STRINGIFY(
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable   \n
 
 __attribute__( (reqd_work_group_size(8, 8, 1)) )
 __kernel void dgemm_Col_TN_B1_MX048_NX048_KX08_src (

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clblas.git



More information about the debian-science-commits mailing list