[arrayfire] 334/408: Remove set_scalar(x, 0) instructions

Mon Sep 21 19:12:24 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 71c12e84c276e9a8b64664d8fb602a77849cc8a7
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Mon Aug 24 17:08:30 2015 -0400

    Remove set_scalar(x, 0) instructions
---
 src/backend/opencl/kernel/approx.hpp          | 11 +++++++++--
 src/backend/opencl/kernel/approx1.cl          |  9 +++++----
 src/backend/opencl/kernel/approx2.cl          | 10 +++++-----
 src/backend/opencl/kernel/gradient.cl         |  3 ++-
 src/backend/opencl/kernel/gradient.hpp        |  7 ++++++-
 src/backend/opencl/kernel/rotate.hpp          |  5 +++++
 src/backend/opencl/kernel/transform.hpp       |  7 ++++++-
 src/backend/opencl/kernel/transform_interp.cl |  4 ++--
 8 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/src/backend/opencl/kernel/approx.hpp b/src/backend/opencl/kernel/approx.hpp
index 05abe13..6ec637a 100644
--- a/src/backend/opencl/kernel/approx.hpp
+++ b/src/backend/opencl/kernel/approx.hpp
@@ -18,6 +18,9 @@
 #include <dispatch.hpp>
 #include <Param.hpp>
 #include <debug_opencl.hpp>
+#include <type_util.hpp>
+#include <math.hpp>
+#include "config.hpp"
 
 using cl::Buffer;
 using cl::Program;
@@ -50,9 +53,11 @@ namespace opencl
                 int device = getActiveDeviceId();
 
                 std::call_once( compileFlags[device], [device] () {
+                    ToNum<Ty> toNum;
                     std::ostringstream options;
                     options << " -D Ty="        << dtype_traits<Ty>::getName()
-                            << " -D Tp="        << dtype_traits<Tp>::getName();
+                            << " -D Tp="        << dtype_traits<Tp>::getName()
+                            << " -D ZERO="      << toNum(scalar<Ty>(0));
 
                     if((af_dtype) dtype_traits<Ty>::af_type == c32 ||
                        (af_dtype) dtype_traits<Ty>::af_type == c64) {
@@ -113,9 +118,11 @@ namespace opencl
                 int device = getActiveDeviceId();
 
                 std::call_once( compileFlags[device], [device] () {
+                    ToNum<Ty> toNum;
                     std::ostringstream options;
                     options << " -D Ty="        << dtype_traits<Ty>::getName()
-                            << " -D Tp="        << dtype_traits<Tp>::getName();
+                            << " -D Tp="        << dtype_traits<Tp>::getName()
+                            << " -D ZERO="      << toNum(scalar<Ty>(0));
 
                     if((af_dtype) dtype_traits<Ty>::af_type == c32 ||
                        (af_dtype) dtype_traits<Ty>::af_type == c64) {
diff --git a/src/backend/opencl/kernel/approx1.cl b/src/backend/opencl/kernel/approx1.cl
index b39d803..3531e2f 100644
--- a/src/backend/opencl/kernel/approx1.cl
+++ b/src/backend/opencl/kernel/approx1.cl
@@ -39,7 +39,7 @@ void core_nearest1(const int idx, const int idy, const int idz, const int idw,
                    const float offGrid)
 {
     const int omId = idw * out.strides[3] + idz * out.strides[2]
-                        + idy * out.strides[1] + idx;
+                   + idy * out.strides[1] + idx;
     const int pmId = idx;
 
     const Tp pVal = d_pos[pmId];
@@ -82,11 +82,12 @@ void core_linear1(const int idx, const int idy, const int idz, const int idw,
 
     // Check if pVal and pVal + 1 are both valid indices
     bool cond = (pVal < in.dims[0] - 1);
-    Ty zero; set_scalar(zero, 0);
+
+    Ty zero = ZERO;
 
     // Compute Left and Right Weighted Values
-    Ty yl; set(yl, mul(d_in[ioff] , (1 - off_x)));
-    Ty yr; set(yr, cond ? mul(d_in[ioff + 1], off_x) : zero);
+    Ty yl = mul(d_in[ioff] , (1 - off_x));
+    Ty yr = cond ? mul(d_in[ioff + 1], off_x) : zero;
     Ty yo = yl + yr;
 
     // Compute Weight used
diff --git a/src/backend/opencl/kernel/approx2.cl b/src/backend/opencl/kernel/approx2.cl
index 13d71af..c540e1b 100644
--- a/src/backend/opencl/kernel/approx2.cl
+++ b/src/backend/opencl/kernel/approx2.cl
@@ -98,11 +98,11 @@ void core_linear2(const int idx, const int idy, const int idz, const int idw,
     Tp wt = wt00 + wt10 + wt01 + wt11;
 
     // Compute Weighted Values
-    Ty zero; set_scalar(zero, 0);
-    Ty y00; set(y00,                    mul(d_in[ioff],                     wt00)       );
-    Ty y10; set(y10, (condY) ?          mul(d_in[ioff + in.strides[1]],     wt10) : zero);
-    Ty y01; set(y01, (condX) ?          mul(d_in[ioff + 1],                 wt01) : zero);
-    Ty y11; set(y11, (condX && condY) ? mul(d_in[ioff + in.strides[1] + 1], wt11) : zero);
+    Ty zero = ZERO;
+    Ty y00 =                    mul(d_in[ioff],                     wt00)       ;
+    Ty y10 = (condY) ?          mul(d_in[ioff + in.strides[1]],     wt10) : zero;
+    Ty y01 = (condX) ?          mul(d_in[ioff + 1],                 wt01) : zero;
+    Ty y11 = (condX && condY) ? mul(d_in[ioff + in.strides[1] + 1], wt11) : zero;
 
     Ty yo = y00 + y10 + y01 + y11;
 
diff --git a/src/backend/opencl/kernel/gradient.cl b/src/backend/opencl/kernel/gradient.cl
index b55d9c7..bbd4f3b 100644
--- a/src/backend/opencl/kernel/gradient.cl
+++ b/src/backend/opencl/kernel/gradient.cl
@@ -66,8 +66,9 @@ void gradient_kernel(__global T *d_grad0, const KParam grad0,
     float yf = 0.5 * (1 + (idy == 0 || idy >= (in.dims[1] - 1)));
 
     // Copy data to scratch space
+    T zero = ZERO;
     if(cond) {
-        set_scalar(sidx(ty, tx), 0);
+        sidx(ty, tx) = zero;
     } else {
         sidx(ty, tx) = d_in[iIdx];
     }
diff --git a/src/backend/opencl/kernel/gradient.hpp b/src/backend/opencl/kernel/gradient.hpp
index b1b7cca..d7ab1f5 100644
--- a/src/backend/opencl/kernel/gradient.hpp
+++ b/src/backend/opencl/kernel/gradient.hpp
@@ -17,6 +17,9 @@
 #include <dispatch.hpp>
 #include <Param.hpp>
 #include <debug_opencl.hpp>
+#include <type_util.hpp>
+#include <math.hpp>
+#include "config.hpp"
 
 using cl::Buffer;
 using cl::Program;
@@ -45,10 +48,12 @@ namespace opencl
                 int device = getActiveDeviceId();
 
                 std::call_once( compileFlags[device], [device] () {
+                    ToNum<T> toNum;
                     std::ostringstream options;
                     options << " -D T=" << dtype_traits<T>::getName()
                             << " -D TX=" << TX
-                            << " -D TY=" << TY;
+                            << " -D TY=" << TY
+                            << " -D ZERO=" << toNum(scalar<T>(0));
 
                     if((af_dtype) dtype_traits<T>::af_type == c32 ||
                        (af_dtype) dtype_traits<T>::af_type == c64) {
diff --git a/src/backend/opencl/kernel/rotate.hpp b/src/backend/opencl/kernel/rotate.hpp
index b3efaa4..e68d0ed 100644
--- a/src/backend/opencl/kernel/rotate.hpp
+++ b/src/backend/opencl/kernel/rotate.hpp
@@ -18,6 +18,9 @@
 #include <dispatch.hpp>
 #include <Param.hpp>
 #include <debug_opencl.hpp>
+#include <type_util.hpp>
+#include <math.hpp>
+#include "config.hpp"
 
 using cl::Buffer;
 using cl::Program;
@@ -62,10 +65,12 @@ namespace opencl
                 typedef typename dtype_traits<T>::base_type BT;
 
                 std::call_once( compileFlags[device], [device] () {
+                    ToNum<T> toNum;
                     std::ostringstream options;
                     options << " -D T="        << dtype_traits<T>::getName();
                     options << " -D VT="       << dtype_traits<vtype_t<T>>::getName();
                     options << " -D WT="       << dtype_traits<wtype_t<BT>>::getName();
+                    options << " -D ZERO="      << toNum(scalar<T>(0));
 
                     if((af_dtype) dtype_traits<T>::af_type == c32 ||
                        (af_dtype) dtype_traits<T>::af_type == c64) {
diff --git a/src/backend/opencl/kernel/transform.hpp b/src/backend/opencl/kernel/transform.hpp
index 3e15211..677acc3 100644
--- a/src/backend/opencl/kernel/transform.hpp
+++ b/src/backend/opencl/kernel/transform.hpp
@@ -18,6 +18,9 @@
 #include <dispatch.hpp>
 #include <Param.hpp>
 #include <debug_opencl.hpp>
+#include <type_util.hpp>
+#include <math.hpp>
+#include "config.hpp"
 
 using cl::Buffer;
 using cl::Program;
@@ -59,9 +62,11 @@ namespace opencl
                 typedef typename dtype_traits<T>::base_type BT;
 
                 std::call_once( compileFlags[device], [device] () {
+                    ToNum<T> toNum;
                     std::ostringstream options;
                     options << " -D T="        << dtype_traits<T>::getName()
-                            << " -D INVERSE="  << (isInverse ? 1 : 0);
+                            << " -D INVERSE="  << (isInverse ? 1 : 0)
+                            << " -D ZERO="     << toNum(scalar<T>(0));
                     options << " -D VT="       << dtype_traits<vtype_t<T>>::getName();
                     options << " -D WT="       << dtype_traits<wtype_t<BT>>::getName();
 
diff --git a/src/backend/opencl/kernel/transform_interp.cl b/src/backend/opencl/kernel/transform_interp.cl
index ceec286..1d82951 100644
--- a/src/backend/opencl/kernel/transform_interp.cl
+++ b/src/backend/opencl/kernel/transform_interp.cl
@@ -61,10 +61,10 @@ void transform_b(__global T *d_out, const KParam out, __global const T *d_in, co
                     + yido * tmat[4]
                            + tmat[5];
 
-    T zero; set_scalar(zero, 0);
+    T zero = ZERO;
     if (xid < -0.001 || yid < -0.001 || in.dims[0] < xid || in.dims[1] < yid) {
         for(int i = 0; i < nimages; i++) {
-            set(d_out[loco + i * out.strides[2]], zero);
+            d_out[loco + i * out.strides[2]] = zero;
         }
         return;
     }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git