[arrayfire] 324/408: Templated options are now runtime compile options for opencl scan

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:22 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 5c0da49f748b94a263a2dd7d2f7c31f71881b2c2
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Mon Aug 24 06:46:54 2015 -0400

    Templated options are now runtime compile options for opencl scan
---
 src/backend/opencl/kernel/scan_dim.hpp   | 207 +++++++++++++------------------
 src/backend/opencl/kernel/scan_first.hpp | 196 +++++++++++++----------------
 src/backend/opencl/kernel/where.hpp      |   7 +-
 src/backend/opencl/scan.cpp              |  12 +-
 4 files changed, 185 insertions(+), 237 deletions(-)

diff --git a/src/backend/opencl/kernel/scan_dim.hpp b/src/backend/opencl/kernel/scan_dim.hpp
index 6bb8cdd..84cc722 100644
--- a/src/backend/opencl/kernel/scan_dim.hpp
+++ b/src/backend/opencl/kernel/scan_dim.hpp
@@ -19,6 +19,7 @@
 #include <Param.hpp>
 #include <debug_opencl.hpp>
 #include <type_util.hpp>
+#include <cache.hpp>
 #include "names.hpp"
 #include "config.hpp"
 
@@ -34,64 +35,77 @@ namespace opencl
 {
 namespace kernel
 {
-    template<typename Ti, typename To, af_op_t op, int dim, bool isFinalPass, uint threads_y>
-    static Kernel* get_scan_dim_kernels(int kerIdx)
+    template<typename Ti, typename To, af_op_t op>
+    static Kernel get_scan_dim_kernels(int kerIdx, int dim, bool isFinalPass, uint threads_y)
     {
-        try {
-            static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
-            static std::map<int, Program*> scanProgs;
-            static std::map<int, Kernel*>  scanKerns;
-            static std::map<int, Kernel*>  bcastKerns;
-
-            int device= getActiveDeviceId();
-
-            std::call_once(compileFlags[device], [device] () {
-
-                    Binary<To, op> scan;
-                    ToNum<To> toNum;
-
-                    std::ostringstream options;
-                    options << " -D To=" << dtype_traits<To>::getName()
-                            << " -D Ti=" << dtype_traits<Ti>::getName()
-                            << " -D T=To"
-                            << " -D dim=" << dim
-                            << " -D DIMY=" << threads_y
-                            << " -D THREADS_X=" << THREADS_X
-                            << " -D init=" << toNum(scan.init())
-                            << " -D " << binOpName<op>()
-                            << " -D CPLX=" << af::iscplx<Ti>()
-                            << " -D isFinalPass=" << (int)(isFinalPass);
-                    if (std::is_same<Ti, double>::value ||
-                        std::is_same<Ti, cdouble>::value) {
-                        options << " -D USE_DOUBLE";
-                    }
-
-                    const char *ker_strs[] = {ops_cl, scan_dim_cl};
-                    const int   ker_lens[] = {ops_cl_len, scan_dim_cl_len};
-                    cl::Program prog;
-                    buildProgram(prog, 2, ker_strs, ker_lens, options.str());
-                    scanProgs[device] = new Program(prog);
-
-                    scanKerns[device] = new Kernel(*scanProgs[device],  "scan_dim_kernel");
-                    bcastKerns[device] = new Kernel(*scanProgs[device],  "bcast_dim_kernel");
-
-                });
-
-            return (kerIdx == 0) ? scanKerns[device] : bcastKerns[device];
-        } catch (cl::Error err) {
-            CL_TO_AF_ERROR(err);
-            throw;
+        std::string ref_name =
+            std::string("scan_") +
+            std::to_string(dim) +
+            std::string("_") +
+            std::to_string(isFinalPass) +
+            std::string("_") +
+            std::string(dtype_traits<Ti>::getName()) +
+            std::string("_") +
+            std::string(dtype_traits<To>::getName()) +
+            std::string("_") +
+            std::to_string(op) +
+            std::string("_") +
+            std::to_string(threads_y);
+
+        int device = getActiveDeviceId();
+        kc_t::iterator idx = kernelCaches[device].find(ref_name);
+
+        kc_entry_t entry;
+        if (idx == kernelCaches[device].end()) {
+
+            Binary<To, op> scan;
+            ToNum<To> toNum;
+
+            std::ostringstream options;
+            options << " -D To=" << dtype_traits<To>::getName()
+                    << " -D Ti=" << dtype_traits<Ti>::getName()
+                    << " -D T=To"
+                    << " -D dim=" << dim
+                    << " -D DIMY=" << threads_y
+                    << " -D THREADS_X=" << THREADS_X
+                    << " -D init=" << toNum(scan.init())
+                    << " -D " << binOpName<op>()
+                    << " -D CPLX=" << af::iscplx<Ti>()
+                    << " -D isFinalPass=" << (int)(isFinalPass);
+            if (std::is_same<Ti, double>::value ||
+                std::is_same<Ti, cdouble>::value) {
+                options << " -D USE_DOUBLE";
+            }
+
+            const char *ker_strs[] = {ops_cl, scan_dim_cl};
+            const int   ker_lens[] = {ops_cl_len, scan_dim_cl_len};
+            cl::Program prog;
+            buildProgram(prog, 2, ker_strs, ker_lens, options.str());
+
+            entry.prog = new Program(prog);
+            entry.ker = new Kernel[2];
+
+            entry.ker[0] = Kernel(*entry.prog, "scan_dim_kernel");
+            entry.ker[1] = Kernel(*entry.prog, "bcast_dim_kernel");
+
+            kernelCaches[device][ref_name] = entry;
+
+        } else {
+            entry = idx->second;
         }
+
+        return entry.ker[kerIdx];
     }
 
-    template<typename Ti, typename To, af_op_t op, int dim, bool isFinalPass, uint threads_y>
+    template<typename Ti, typename To, af_op_t op>
     static void scan_dim_launcher(Param &out,
                                   Param &tmp,
                                   const Param &in,
+                                  int dim, bool isFinalPass, uint threads_y,
                                   const uint groups_all[4])
     {
         try {
-            Kernel* ker = get_scan_dim_kernels<Ti, To, op, dim, isFinalPass, threads_y>(0);
+            Kernel ker = get_scan_dim_kernels<Ti, To, op>(0, dim, isFinalPass, threads_y);
 
             NDRange local(THREADS_X, threads_y);
             NDRange global(groups_all[0] * groups_all[2] * local[0],
@@ -103,7 +117,7 @@ namespace kernel
                                       Buffer, KParam,
                                       Buffer, KParam,
                                       uint, uint,
-                                      uint, uint>(*ker);
+                                      uint, uint>(ker);
 
 
             scanOp(EnqueueArgs(getQueue(), global, local),
@@ -117,13 +131,14 @@ namespace kernel
         }
     }
 
-    template<typename Ti, typename To, af_op_t op, int dim, bool isFinalPass, uint threads_y>
+    template<typename Ti, typename To, af_op_t op>
     static void bcast_dim_launcher(Param &out,
                                    Param &tmp,
+                                   int dim, bool isFinalPass, uint threads_y,
                                    const uint groups_all[4])
     {
         try {
-            Kernel* ker = get_scan_dim_kernels<Ti, To, op, dim, isFinalPass, threads_y>(1);
+            Kernel ker = get_scan_dim_kernels<Ti, To, op>(1, dim, isFinalPass, threads_y);
 
             NDRange local(THREADS_X, threads_y);
             NDRange global(groups_all[0] * groups_all[2] * local[0],
@@ -134,7 +149,7 @@ namespace kernel
             auto bcastOp = make_kernel<Buffer, KParam,
                                        Buffer, KParam,
                                        uint, uint,
-                                       uint, uint>(*ker);
+                                       uint, uint>(ker);
 
             bcastOp(EnqueueArgs(getQueue(), global, local),
                     *out.data, out.info, *tmp.data, tmp.info,
@@ -147,57 +162,8 @@ namespace kernel
         }
     }
 
-
-    template<typename Ti, typename To, af_op_t op, int dim, bool isFinalPass>
-    static void scan_dim_fn(Param &out,
-                            Param &tmp,
-                            const Param &in,
-                            const uint threads_y,
-                            const uint groups_all[4])
-    {
-
-        switch (threads_y) {
-        case 8:
-            (scan_dim_launcher<Ti, To, op, dim, isFinalPass, 8>)(
-                out, tmp, in, groups_all); break;
-        case 4:
-            (scan_dim_launcher<Ti, To, op, dim, isFinalPass, 4>)(
-                out, tmp, in, groups_all); break;
-        case 2:
-            (scan_dim_launcher<Ti, To, op, dim, isFinalPass, 2>)(
-                out, tmp, in, groups_all); break;
-        case 1:
-            (scan_dim_launcher<Ti, To, op, dim, isFinalPass, 1>)(
-                out, tmp, in, groups_all); break;
-        }
-
-    }
-
-    template<typename Ti, typename To, af_op_t op, int dim, bool isFinalPass>
-    static void bcast_dim_fn(Param &out,
-                             Param &tmp,
-                             const uint threads_y,
-                             const uint groups_all[4])
-    {
-
-        switch (threads_y) {
-        case 8:
-            (bcast_dim_launcher<Ti, To, op, dim, isFinalPass, 8>)(
-                out, tmp, groups_all); break;
-        case 4:
-            (bcast_dim_launcher<Ti, To, op, dim, isFinalPass, 4>)(
-                out, tmp, groups_all); break;
-        case 2:
-            (bcast_dim_launcher<Ti, To, op, dim, isFinalPass, 2>)(
-                out, tmp, groups_all); break;
-        case 1:
-            (bcast_dim_launcher<Ti, To, op, dim, isFinalPass, 1>)(
-                out, tmp, groups_all); break;
-        }
-    }
-
-    template<typename Ti, typename To, af_op_t op, int dim>
-    static void scan_dim(Param &out, const Param &in)
+    template<typename Ti, typename To, af_op_t op>
+    static void scan_dim(Param &out, const Param &in, int dim)
     {
         try {
             uint threads_y = std::min(THREADS_Y, nextpow2(out.info.dims[dim]));
@@ -212,9 +178,10 @@ namespace kernel
 
             if (groups_all[dim] == 1) {
 
-                scan_dim_fn<Ti, To, op, dim, true>(out, out, in,
-                                                   threads_y,
-                                                   groups_all);
+                scan_dim_launcher<Ti, To, op>(out, out, in,
+                                              dim, true,
+                                              threads_y,
+                                              groups_all);
             } else {
 
                 Param tmp = out;
@@ -229,27 +196,31 @@ namespace kernel
                 // FIXME: Do I need to free this ?
                 tmp.data = bufferAlloc(tmp_elements * sizeof(To));
 
-                scan_dim_fn<Ti, To, op, dim, false>(out, tmp, in,
-                                                    threads_y,
-                                                    groups_all);
+                scan_dim_launcher<Ti, To, op>(out, tmp, in,
+                                              dim, false,
+                                              threads_y,
+                                              groups_all);
 
                 int gdim = groups_all[dim];
                 groups_all[dim] = 1;
 
                 if (op == af_notzero_t) {
-                    scan_dim_fn<To, To, af_add_t, dim, true>(tmp, tmp, tmp,
-                                                             threads_y,
-                                                             groups_all);
+                    scan_dim_launcher<To, To, af_add_t>(tmp, tmp, tmp,
+                                                        dim, true,
+                                                        threads_y,
+                                                        groups_all);
                 } else {
-                    scan_dim_fn<To, To,       op, dim, true>(tmp, tmp, tmp,
-                                                             threads_y,
-                                                             groups_all);
+                    scan_dim_launcher<To, To,       op>(tmp, tmp, tmp,
+                                                        dim, true,
+                                                        threads_y,
+                                                        groups_all);
                 }
 
                 groups_all[dim] = gdim;
-                bcast_dim_fn<To, To, op, dim, true>(out, tmp,
-                                                    threads_y,
-                                                    groups_all);
+                bcast_dim_launcher<To, To, op>(out, tmp,
+                                               dim, true,
+                                               threads_y,
+                                               groups_all);
                 bufferFree(tmp.data);
             }
         } catch (cl::Error err) {
diff --git a/src/backend/opencl/kernel/scan_first.hpp b/src/backend/opencl/kernel/scan_first.hpp
index b9521fd..d7a284d 100644
--- a/src/backend/opencl/kernel/scan_first.hpp
+++ b/src/backend/opencl/kernel/scan_first.hpp
@@ -19,6 +19,7 @@
 #include <Param.hpp>
 #include <debug_opencl.hpp>
 #include <type_util.hpp>
+#include <cache.hpp>
 #include "names.hpp"
 #include "config.hpp"
 #include <memory.hpp>
@@ -36,62 +37,80 @@ namespace opencl
 namespace kernel
 {
 
-    template<typename Ti, typename To, af_op_t op, bool isFinalPass, uint threads_x>
-    static Kernel* get_scan_first_kernels(int kerIdx)
+    template<typename Ti, typename To, af_op_t op>
+    static Kernel get_scan_first_kernels(int kerIdx, bool isFinalPass, uint threads_x)
     {
-        static std::once_flag compileFlags[DeviceManager::MAX_DEVICES];
-        static std::map<int, Program*> scanProgs;
-        static std::map<int, Kernel* > scanKerns;
-        static std::map<int, Kernel* > bcastKerns;
-
-        int device= getActiveDeviceId();
-
-        std::call_once(compileFlags[device], [device] () {
-
-                const uint threads_y = THREADS_PER_GROUP / threads_x;
-                const uint SHARED_MEM_SIZE = THREADS_PER_GROUP;
-
-                Binary<To, op> scan;
-                ToNum<To> toNum;
-
-                std::ostringstream options;
-                options << " -D To=" << dtype_traits<To>::getName()
-                        << " -D Ti=" << dtype_traits<Ti>::getName()
-                        << " -D T=To"
-                        << " -D DIMX=" << threads_x
-                        << " -D DIMY=" << threads_y
-                        << " -D SHARED_MEM_SIZE=" << SHARED_MEM_SIZE
-                        << " -D init=" << toNum(scan.init())
-                        << " -D " << binOpName<op>()
-                        << " -D CPLX=" << af::iscplx<Ti>()
-                        << " -D isFinalPass=" << (int)(isFinalPass);
-                if (std::is_same<Ti, double>::value ||
-                    std::is_same<Ti, cdouble>::value) {
-                    options << " -D USE_DOUBLE";
-                }
-
-                const char *ker_strs[] = {ops_cl, scan_first_cl};
-                const int   ker_lens[] = {ops_cl_len, scan_first_cl_len};
-                cl::Program prog;
-                buildProgram(prog, 2, ker_strs, ker_lens, options.str());
-                scanProgs[device] = new Program(prog);
-
-                scanKerns[device] = new Kernel(*scanProgs[device],  "scan_first_kernel");
-                bcastKerns[device] = new Kernel(*scanProgs[device],  "bcast_first_kernel");
-
-            });
-
-        return (kerIdx == 0) ? scanKerns[device] : bcastKerns[device];
+        std::string ref_name =
+            std::string("scan_0_") +
+            std::string("_") +
+            std::to_string(isFinalPass) +
+            std::string("_") +
+            std::string(dtype_traits<Ti>::getName()) +
+            std::string("_") +
+            std::string(dtype_traits<To>::getName()) +
+            std::string("_") +
+            std::to_string(op) +
+            std::string("_") +
+            std::to_string(threads_x);
+
+        int device = getActiveDeviceId();
+        kc_t::iterator idx = kernelCaches[device].find(ref_name);
+
+        kc_entry_t entry;
+        if (idx == kernelCaches[device].end()) {
+
+            const uint threads_y = THREADS_PER_GROUP / threads_x;
+            const uint SHARED_MEM_SIZE = THREADS_PER_GROUP;
+
+            Binary<To, op> scan;
+            ToNum<To> toNum;
+
+            std::ostringstream options;
+            options << " -D To=" << dtype_traits<To>::getName()
+                    << " -D Ti=" << dtype_traits<Ti>::getName()
+                    << " -D T=To"
+                    << " -D DIMX=" << threads_x
+                    << " -D DIMY=" << threads_y
+                    << " -D SHARED_MEM_SIZE=" << SHARED_MEM_SIZE
+                    << " -D init=" << toNum(scan.init())
+                    << " -D " << binOpName<op>()
+                    << " -D CPLX=" << af::iscplx<Ti>()
+                    << " -D isFinalPass=" << (int)(isFinalPass);
+            if (std::is_same<Ti, double>::value ||
+                std::is_same<Ti, cdouble>::value) {
+                options << " -D USE_DOUBLE";
+            }
+
+            const char *ker_strs[] = {ops_cl, scan_first_cl};
+            const int   ker_lens[] = {ops_cl_len, scan_first_cl_len};
+            cl::Program prog;
+            buildProgram(prog, 2, ker_strs, ker_lens, options.str());
+
+            entry.prog = new Program(prog);
+            entry.ker = new Kernel[2];
+
+            entry.ker[0] = Kernel(*entry.prog, "scan_first_kernel");
+            entry.ker[1] = Kernel(*entry.prog, "bcast_first_kernel");
+
+            kernelCaches[device][ref_name] = entry;
+
+        } else {
+            entry = idx->second;
+        }
+
+        return entry.ker[kerIdx];
     }
 
-    template<typename Ti, typename To, af_op_t op, bool isFinalPass, uint threads_x>
+    template<typename Ti, typename To, af_op_t op>
     static void scan_first_launcher(Param &out,
                                     Param &tmp,
                                     const Param &in,
+                                    const bool isFinalPass,
                                     const uint groups_x,
-                                    const uint groups_y)
+                                    const uint groups_y,
+                                    const uint threads_x)
     {
-        Kernel* ker = get_scan_first_kernels<Ti, To, op, isFinalPass, threads_x>(0);
+        Kernel ker = get_scan_first_kernels<Ti, To, op>(0, isFinalPass, threads_x);
 
         NDRange local(threads_x, THREADS_PER_GROUP / threads_x);
         NDRange global(groups_x * out.info.dims[2] * local[0],
@@ -102,7 +121,7 @@ namespace kernel
         auto scanOp = make_kernel<Buffer, KParam,
                                   Buffer, KParam,
                                   Buffer, KParam,
-                                  uint, uint, uint>(*ker);
+                                  uint, uint, uint>(ker);
 
         scanOp(EnqueueArgs(getQueue(), global, local),
                *out.data, out.info, *tmp.data, tmp.info, *in.data, in.info,
@@ -111,14 +130,16 @@ namespace kernel
         CL_DEBUG_FINISH(getQueue());
     }
 
-    template<typename Ti, typename To, af_op_t op, bool isFinalPass, uint threads_x>
+    template<typename Ti, typename To, af_op_t op>
     static void bcast_first_launcher(Param &out,
                                      Param &tmp,
+                                     const bool isFinalPass,
                                      const uint groups_x,
-                                     const uint groups_y)
+                                     const uint groups_y,
+                                     const uint threads_x)
     {
 
-        Kernel* ker = get_scan_first_kernels<Ti, To, op, isFinalPass, threads_x>(1);
+        Kernel ker = get_scan_first_kernels<Ti, To, op>(1, isFinalPass, threads_x);
 
         NDRange local(threads_x, THREADS_PER_GROUP / threads_x);
         NDRange global(groups_x * out.info.dims[2] * local[0],
@@ -128,7 +149,7 @@ namespace kernel
 
         auto bcastOp = make_kernel<Buffer, KParam,
                                    Buffer, KParam,
-                                   uint, uint, uint>(*ker);
+                                   uint, uint, uint>(ker);
 
         bcastOp(EnqueueArgs(getQueue(), global, local),
                 *out.data, out.info, *tmp.data, tmp.info,
@@ -138,56 +159,6 @@ namespace kernel
     }
 
 
-    template<typename Ti, typename To, af_op_t op, bool isFinalPass>
-    static void scan_first_fn(Param &out,
-                              Param &tmp,
-                              const Param &in,
-                              const uint groups_x,
-                              const uint groups_y,
-                              const uint threads_x)
-    {
-
-        switch (threads_x) {
-        case 32:
-            (scan_first_launcher<Ti, To, op, isFinalPass,  32>)(
-                out, tmp, in, groups_x, groups_y); break;
-        case 64:
-            (scan_first_launcher<Ti, To, op, isFinalPass,  64>)(
-                out, tmp, in, groups_x, groups_y); break;
-        case 128:
-            (scan_first_launcher<Ti, To, op, isFinalPass, 128>)(
-                out, tmp, in, groups_x, groups_y); break;
-        case 256:
-            (scan_first_launcher<Ti, To, op, isFinalPass, 256>)(
-                out, tmp, in, groups_x, groups_y); break;
-        }
-
-    }
-
-    template<typename Ti, typename To, af_op_t op, bool isFinalPass>
-    static void bcast_first_fn(Param &out,
-                               Param &tmp,
-                               const uint groups_x,
-                               const uint groups_y,
-                               const uint threads_x)
-    {
-
-        switch (threads_x) {
-        case 32:
-            (bcast_first_launcher<Ti, To, op, isFinalPass,  32>)(
-                out, tmp, groups_x, groups_y); break;
-        case 64:
-            (bcast_first_launcher<Ti, To, op, isFinalPass,  64>)(
-                out, tmp, groups_x, groups_y); break;
-        case 128:
-            (bcast_first_launcher<Ti, To, op, isFinalPass, 128>)(
-                out, tmp, groups_x, groups_y); break;
-        case 256:
-            (bcast_first_launcher<Ti, To, op, isFinalPass, 256>)(
-                out, tmp, groups_x, groups_y); break;
-        }
-    }
-
     template<typename Ti, typename To, af_op_t op>
     static void scan_first(Param &out, const Param &in)
     {
@@ -199,7 +170,8 @@ namespace kernel
         uint groups_y = divup(out.info.dims[1], threads_y);
 
         if (groups_x == 1) {
-            scan_first_fn<Ti, To, op, true>(out, out, in,
+            scan_first_launcher<Ti, To, op>(out, out, in,
+                                            true,
                                             groups_x, groups_y,
                                             threads_x);
 
@@ -216,21 +188,25 @@ namespace kernel
 
             tmp.data = bufferAlloc(tmp_elements * sizeof(To));
 
-            scan_first_fn<Ti, To, op, false>(out, tmp, in,
-                                             groups_x, groups_y,
-                                             threads_x);
+            scan_first_launcher<Ti, To, op>(out, tmp, in,
+                                            false,
+                                            groups_x, groups_y,
+                                            threads_x);
 
             if (op == af_notzero_t) {
-                scan_first_fn<To, To, af_add_t, true>(tmp, tmp, tmp,
+                scan_first_launcher<To, To, af_add_t>(tmp, tmp, tmp,
+                                                      true,
                                                       1, groups_y,
                                                       threads_x);
             } else {
-                scan_first_fn<To, To,       op, true>(tmp, tmp, tmp,
+                scan_first_launcher<To, To,       op>(tmp, tmp, tmp,
+                                                      true,
                                                       1, groups_y,
                                                       threads_x);
             }
 
-            bcast_first_fn<To, To, op, true>(out, tmp,
+            bcast_first_launcher<To, To, op>(out, tmp,
+                                             true,
                                              groups_x,
                                              groups_y,
                                              threads_x);
diff --git a/src/backend/opencl/kernel/where.hpp b/src/backend/opencl/kernel/where.hpp
index 4b6d7e7..2cbf8c1 100644
--- a/src/backend/opencl/kernel/where.hpp
+++ b/src/backend/opencl/kernel/where.hpp
@@ -126,9 +126,10 @@ namespace kernel
             int otmp_elements = otmp.info.strides[3] * otmp.info.dims[3];
             otmp.data = bufferAlloc(otmp_elements * sizeof(uint));
 
-            scan_first_fn<T, uint, af_notzero_t, false>(otmp, rtmp, in,
-                                                        groups_x, groups_y,
-                                                        threads_x);
+            scan_first_launcher<T, uint, af_notzero_t>(otmp, rtmp, in,
+                                                       false,
+                                                       groups_x, groups_y,
+                                                       threads_x);
 
             // Linearize the dimensions and perform scan
             Param ltmp = rtmp;
diff --git a/src/backend/opencl/scan.cpp b/src/backend/opencl/scan.cpp
index ff657a4..74375da 100644
--- a/src/backend/opencl/scan.cpp
+++ b/src/backend/opencl/scan.cpp
@@ -28,12 +28,12 @@ namespace opencl
         try {
             Param Out = out;
             Param In  =   in;
-            switch (dim) {
-            case 0: kernel::scan_first<Ti, To, op   >(Out, In); break;
-            case 1: kernel::scan_dim  <Ti, To, op, 1>(Out, In); break;
-            case 2: kernel::scan_dim  <Ti, To, op, 2>(Out, In); break;
-            case 3: kernel::scan_dim  <Ti, To, op, 3>(Out, In); break;
-            }
+
+            if (dim == 0)
+                kernel::scan_first<Ti, To, op>(Out, In);
+            else
+                kernel::scan_dim  <Ti, To, op>(Out, In, dim);
+
         } catch (cl::Error &ex) {
 
             CL_TO_AF_ERROR(ex);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list