[arrayfire] 123/284: Revert "Added ENQUEUE macro in cpu backend"

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:25 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit 6058dd283ea132cef41d834ef068afad3a719200
Author: Umar Arshad <umar at arrayfire.com>
Date:   Wed Dec 30 10:32:00 2015 -0500

    Revert "Added ENQUEUE macro in cpu backend"
    
    This reverts commit 95d934613425559fa9048433bfe77bb8f151c18f.
    
    * Not necessary with the new queue class (see queue.hpp)
    * Macros bad
---
 src/backend/cpu/Array.cpp             |  5 +++--
 src/backend/cpu/approx.cpp            | 19 ++++++++++---------
 src/backend/cpu/assign.cpp            |  5 +++--
 src/backend/cpu/bilateral.cpp         |  5 +++--
 src/backend/cpu/blas.cpp              | 13 +++++++------
 src/backend/cpu/cholesky.cpp          |  5 +++--
 src/backend/cpu/convolve.cpp          |  7 ++++---
 src/backend/cpu/copy.cpp              |  9 +++++----
 src/backend/cpu/debug_cpu.hpp         | 31 -------------------------------
 src/backend/cpu/diagonal.cpp          |  7 ++++---
 src/backend/cpu/diff.cpp              |  7 ++++---
 src/backend/cpu/fast.cpp              |  3 ++-
 src/backend/cpu/fft.cpp               |  9 +++++----
 src/backend/cpu/fftconvolve.cpp       | 15 ++++++++-------
 src/backend/cpu/gradient.cpp          |  5 +++--
 src/backend/cpu/harris.cpp            | 13 +++++++------
 src/backend/cpu/hist_graphics.cpp     |  3 ++-
 src/backend/cpu/histogram.cpp         |  5 +++--
 src/backend/cpu/homography.cpp        |  3 ++-
 src/backend/cpu/hsv_rgb.cpp           |  7 ++++---
 src/backend/cpu/identity.cpp          |  5 +++--
 src/backend/cpu/iir.cpp               |  5 +++--
 src/backend/cpu/image.cpp             |  3 ++-
 src/backend/cpu/index.cpp             |  5 +++--
 src/backend/cpu/inverse.cpp           |  5 +++--
 src/backend/cpu/iota.cpp              |  5 +++--
 src/backend/cpu/ireduce.cpp           |  5 +++--
 src/backend/cpu/join.cpp              | 25 +++++++++++++------------
 src/backend/cpu/lookup.cpp            |  5 +++--
 src/backend/cpu/lu.cpp                |  9 +++++----
 src/backend/cpu/match_template.cpp    |  5 +++--
 src/backend/cpu/meanshift.cpp         |  5 +++--
 src/backend/cpu/medfilt.cpp           |  5 +++--
 src/backend/cpu/memory.cpp            |  3 ++-
 src/backend/cpu/morph.cpp             |  7 ++++---
 src/backend/cpu/nearest_neighbour.cpp |  9 +++++----
 src/backend/cpu/orb.cpp               |  3 ++-
 src/backend/cpu/platform.cpp          |  3 ++-
 src/backend/cpu/plot.cpp              |  3 ++-
 src/backend/cpu/plot3.cpp             |  3 ++-
 src/backend/cpu/qr.cpp                |  7 ++++---
 src/backend/cpu/queue.hpp             |  4 ++++
 src/backend/cpu/random.cpp            | 11 ++++++-----
 src/backend/cpu/range.cpp             | 11 ++++++-----
 src/backend/cpu/reduce.cpp            |  5 +++--
 src/backend/cpu/regions.cpp           |  5 +++--
 src/backend/cpu/reorder.cpp           |  5 +++--
 src/backend/cpu/resize.cpp            |  9 +++++----
 src/backend/cpu/rotate.cpp            |  9 +++++----
 src/backend/cpu/scan.cpp              | 11 ++++++-----
 src/backend/cpu/select.cpp            |  7 ++++---
 src/backend/cpu/set.cpp               |  3 ++-
 src/backend/cpu/shift.cpp             |  5 +++--
 src/backend/cpu/sobel.cpp             |  7 ++++---
 src/backend/cpu/solve.cpp             | 11 ++++++-----
 src/backend/cpu/sort.cpp              |  5 +++--
 src/backend/cpu/sort_by_key.cpp       |  5 +++--
 src/backend/cpu/sort_index.cpp        |  5 +++--
 src/backend/cpu/surface.cpp           |  3 ++-
 src/backend/cpu/susan.cpp             |  7 ++++---
 src/backend/cpu/svd.cpp               |  5 +++--
 src/backend/cpu/tile.cpp              |  5 +++--
 src/backend/cpu/transform.cpp         |  9 +++++----
 src/backend/cpu/transpose.cpp         |  7 ++++---
 src/backend/cpu/triangle.cpp          |  5 +++--
 src/backend/cpu/unwrap.cpp            |  7 ++++---
 src/backend/cpu/where.cpp             |  3 ++-
 src/backend/cpu/wrap.cpp              |  7 ++++---
 68 files changed, 258 insertions(+), 219 deletions(-)

diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 34c99e4..862c576 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -15,7 +15,8 @@
 #include <TNJ/BufferNode.hpp>
 #include <TNJ/ScalarNode.hpp>
 #include <memory.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <cstring>
 #include <cstddef>
 
@@ -77,7 +78,7 @@ void Array<T>::eval()
 
     data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
 
-    ENQUEUE(kernel::evalArray<T>, *this);
+    getQueue().enqueue(kernel::evalArray<T>, *this);
 
     ready = true;
     Node_ptr prev = node;
diff --git a/src/backend/cpu/approx.cpp b/src/backend/cpu/approx.cpp
index 57d3cc4..b817b84 100644
--- a/src/backend/cpu/approx.cpp
+++ b/src/backend/cpu/approx.cpp
@@ -11,7 +11,8 @@
 #include <approx.hpp>
 #include <kernel/approx1.hpp>
 #include <kernel/approx2.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
@@ -30,12 +31,12 @@ Array<Ty> approx1(const Array<Ty> &in, const Array<Tp> &pos,
 
     switch(method) {
         case AF_INTERP_NEAREST:
-            ENQUEUE(kernel::approx1<Ty, Tp, AF_INTERP_NEAREST>,
-                    out, in, pos, offGrid);
+            getQueue().enqueue(kernel::approx1<Ty, Tp, AF_INTERP_NEAREST>,
+                               out, in, pos, offGrid);
             break;
         case AF_INTERP_LINEAR:
-            ENQUEUE(kernel::approx1<Ty, Tp, AF_INTERP_LINEAR>,
-                    out, in, pos, offGrid);
+            getQueue().enqueue(kernel::approx1<Ty, Tp, AF_INTERP_LINEAR>,
+                               out, in, pos, offGrid);
             break;
         default:
             break;
@@ -60,12 +61,12 @@ Array<Ty> approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &p
 
     switch(method) {
         case AF_INTERP_NEAREST:
-            ENQUEUE(kernel::approx2<Ty, Tp, AF_INTERP_NEAREST>,
-                    out, in, pos0, pos1, offGrid);
+            getQueue().enqueue(kernel::approx2<Ty, Tp, AF_INTERP_NEAREST>,
+                               out, in, pos0, pos1, offGrid);
             break;
         case AF_INTERP_LINEAR:
-            ENQUEUE(kernel::approx2<Ty, Tp, AF_INTERP_LINEAR>,
-                    out, in, pos0, pos1, offGrid);
+            getQueue().enqueue(kernel::approx2<Ty, Tp, AF_INTERP_LINEAR>,
+                               out, in, pos0, pos1, offGrid);
             break;
         default:
             break;
diff --git a/src/backend/cpu/assign.cpp b/src/backend/cpu/assign.cpp
index df90344..463b30c 100644
--- a/src/backend/cpu/assign.cpp
+++ b/src/backend/cpu/assign.cpp
@@ -14,7 +14,8 @@
 #include <handle.hpp>
 #include <kernel/assign.hpp>
 #include <assign.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
@@ -47,7 +48,7 @@ void assign(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs)
         }
     }
 
-    ENQUEUE(kernel::assign<T>, out, rhs, std::move(isSeq),
+    getQueue().enqueue(kernel::assign<T>, out, rhs, std::move(isSeq),
             std::move(seqs), std::move(idxArrs));
 }
 
diff --git a/src/backend/cpu/bilateral.cpp b/src/backend/cpu/bilateral.cpp
index ceb8be9..abd9857 100644
--- a/src/backend/cpu/bilateral.cpp
+++ b/src/backend/cpu/bilateral.cpp
@@ -15,7 +15,8 @@
 #include <bilateral.hpp>
 #include <cmath>
 #include <algorithm>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 using af::dim4;
 
@@ -28,7 +29,7 @@ Array<outType> bilateral(const Array<inType> &in, const float &s_sigma, const fl
     in.eval();
     const dim4 dims     = in.dims();
     Array<outType> out = createEmptyArray<outType>(dims);
-    ENQUEUE(kernel::bilateral<outType, inType, isColor>, out, in, s_sigma, c_sigma);
+    getQueue().enqueue(kernel::bilateral<outType, inType, isColor>, out, in, s_sigma, c_sigma);
     return out;
 }
 
diff --git a/src/backend/cpu/blas.cpp b/src/backend/cpu/blas.cpp
index 70c8d9c..3ecb502 100644
--- a/src/backend/cpu/blas.cpp
+++ b/src/backend/cpu/blas.cpp
@@ -13,7 +13,8 @@
 #include <cassert>
 #include <err_common.hpp>
 #include <kernel/dot.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
@@ -193,7 +194,7 @@ Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
                 reinterpret_cast<BT*>(output.get()), output.dims()[0]);
         }
     };
-    ENQUEUE(func, out, lhs, rhs);
+    getQueue().enqueue(func, out, lhs, rhs);
 
     return out;
 }
@@ -207,13 +208,13 @@ Array<T> dot(const Array<T> &lhs, const Array<T> &rhs,
 
     Array<T> out = createEmptyArray<T>(af::dim4(1));
     if(optLhs == AF_MAT_CONJ && optRhs == AF_MAT_CONJ) {
-        ENQUEUE(kernel::dot<T, false, true>, out, lhs, rhs, optLhs, optRhs);
+        getQueue().enqueue(kernel::dot<T, false, true>, out, lhs, rhs, optLhs, optRhs);
     } else if (optLhs == AF_MAT_CONJ && optRhs == AF_MAT_NONE) {
-        ENQUEUE(kernel::dot<T, true, false>,out, lhs, rhs, optLhs, optRhs);
+        getQueue().enqueue(kernel::dot<T, true, false>,out, lhs, rhs, optLhs, optRhs);
     } else if (optLhs == AF_MAT_NONE && optRhs == AF_MAT_CONJ) {
-        ENQUEUE(kernel::dot<T, true, false>,out, rhs, lhs, optRhs, optLhs);
+        getQueue().enqueue(kernel::dot<T, true, false>,out, rhs, lhs, optRhs, optLhs);
     } else {
-        ENQUEUE(kernel::dot<T, false, false>,out, lhs, rhs, optLhs, optRhs);
+        getQueue().enqueue(kernel::dot<T, false, false>,out, lhs, rhs, optLhs, optRhs);
     }
     return out;
 }
diff --git a/src/backend/cpu/cholesky.cpp b/src/backend/cpu/cholesky.cpp
index b21d9c8..5e393f0 100644
--- a/src/backend/cpu/cholesky.cpp
+++ b/src/backend/cpu/cholesky.cpp
@@ -19,7 +19,8 @@
 #include <err_cpu.hpp>
 #include <triangle.hpp>
 #include <lapack_helper.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
@@ -74,7 +75,7 @@ int cholesky_inplace(Array<T> &in, const bool is_upper)
         info = potrf_func<T>()(AF_LAPACK_COL_MAJOR, uplo, N, in.get(), in.strides()[1]);
     };
 
-    ENQUEUE(func, info, in);
+    getQueue().enqueue(func, info, in);
     getQueue().sync();
 
     return info;
diff --git a/src/backend/cpu/convolve.cpp b/src/backend/cpu/convolve.cpp
index cf241c3..8218a3f 100644
--- a/src/backend/cpu/convolve.cpp
+++ b/src/backend/cpu/convolve.cpp
@@ -14,7 +14,8 @@
 #include <convolve.hpp>
 #include <err_cpu.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/convolve.hpp>
 
 using af::dim4;
@@ -50,7 +51,7 @@ Array<T> convolve(Array<T> const& signal, Array<accT> const& filter, ConvolveBat
 
     Array<T> out = createEmptyArray<T>(oDims);
 
-    ENQUEUE(kernel::convolve_nd<T, accT, baseDim, expand>,out, signal, filter, kind);
+    getQueue().enqueue(kernel::convolve_nd<T, accT, baseDim, expand>,out, signal, filter, kind);
 
     return out;
 }
@@ -80,7 +81,7 @@ Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<ac
 
     Array<T> out  = createEmptyArray<T>(oDims);
 
-    ENQUEUE(kernel::convolve2<T, accT, expand>, out, signal, c_filter, r_filter, tDims);
+    getQueue().enqueue(kernel::convolve2<T, accT, expand>, out, signal, c_filter, r_filter, tDims);
 
     return out;
 }
diff --git a/src/backend/cpu/copy.cpp b/src/backend/cpu/copy.cpp
index 91a1513..f844d95 100644
--- a/src/backend/cpu/copy.cpp
+++ b/src/backend/cpu/copy.cpp
@@ -18,7 +18,8 @@
 #include <cassert>
 #include <err_cpu.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/copy.hpp>
 
 namespace cpu
@@ -50,7 +51,7 @@ template<typename T>
 void multiply_inplace(Array<T> &in, double val)
 {
     in.eval();
-    ENQUEUE(kernel::copy<T, T>, in, in, 0, val);
+    getQueue().enqueue(kernel::copy<T, T>, in, in, 0, val);
 }
 
 template<typename inType, typename outType>
@@ -60,7 +61,7 @@ Array<outType> padArray(Array<inType> const &in, dim4 const &dims,
     Array<outType> ret = createValueArray<outType>(dims, default_value);
     ret.eval();
     in.eval();
-    ENQUEUE(kernel::copy<outType, inType>, ret, in, outType(default_value), factor);
+    getQueue().enqueue(kernel::copy<outType, inType>, ret, in, outType(default_value), factor);
     return ret;
 }
 
@@ -69,7 +70,7 @@ void copyArray(Array<outType> &out, Array<inType> const &in)
 {
     out.eval();
     in.eval();
-    ENQUEUE(kernel::copy<outType, inType>, out, in, scalar<outType>(0), 1.0);
+    getQueue().enqueue(kernel::copy<outType, inType>, out, in, scalar<outType>(0), 1.0);
 }
 
 #define INSTANTIATE(T)                                                  \
diff --git a/src/backend/cpu/debug_cpu.hpp b/src/backend/cpu/debug_cpu.hpp
deleted file mode 100644
index cbcdc22..0000000
--- a/src/backend/cpu/debug_cpu.hpp
+++ /dev/null
@@ -1,31 +0,0 @@
-/*******************************************************
- * Copyright (c) 2015, ArrayFire
- * All rights reserved.
- *
- * This file is distributed under 3-clause BSD license.
- * The complete license agreement can be obtained at:
- * http://arrayfire.com/licenses/BSD-3-Clause
- ********************************************************/
-
-#pragma once
-#include <platform.hpp>
-#include <queue.hpp>
-#include <err_cpu.hpp>
-
-#ifndef NDEBUG
-
-#define POST_LAUNCH_CHECK() do {                        \
-        getQueue().sync();                              \
-    } while(0)                                          \
-
-#else
-
-#define POST_LAUNCH_CHECK() //no-op
-
-#endif
-
-#define ENQUEUE(...)                        \
-    do {                                    \
-        getQueue().enqueue(__VA_ARGS__);    \
-        POST_LAUNCH_CHECK();                \
-    } while(0)
diff --git a/src/backend/cpu/diagonal.cpp b/src/backend/cpu/diagonal.cpp
index 6fd918d..c818f82 100644
--- a/src/backend/cpu/diagonal.cpp
+++ b/src/backend/cpu/diagonal.cpp
@@ -15,7 +15,8 @@
 #include <diagonal.hpp>
 #include <math.hpp>
 #include <err_cpu.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/diagonal.hpp>
 
 namespace cpu
@@ -30,7 +31,7 @@ Array<T> diagCreate(const Array<T> &in, const int num)
     int batch = in.dims()[1];
     Array<T> out = createEmptyArray<T>(dim4(size, size, batch));
 
-    ENQUEUE(kernel::diagCreate<T>, out, in, num);
+    getQueue().enqueue(kernel::diagCreate<T>, out, in, num);
 
     return out;
 }
@@ -44,7 +45,7 @@ Array<T> diagExtract(const Array<T> &in, const int num)
     dim_t size = std::max(idims[0], idims[1]) - std::abs(num);
     Array<T> out = createEmptyArray<T>(dim4(size, 1, idims[2], idims[3]));
 
-    ENQUEUE(kernel::diagExtract<T>, out, in, num);
+    getQueue().enqueue(kernel::diagExtract<T>, out, in, num);
 
     return out;
 }
diff --git a/src/backend/cpu/diff.cpp b/src/backend/cpu/diff.cpp
index efab130..1e374e9 100644
--- a/src/backend/cpu/diff.cpp
+++ b/src/backend/cpu/diff.cpp
@@ -9,7 +9,8 @@
 
 #include <Array.hpp>
 #include <diff.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/diff.hpp>
 
 namespace cpu
@@ -26,7 +27,7 @@ Array<T>  diff1(const Array<T> &in, const int dim)
 
     Array<T> outArray = createEmptyArray<T>(dims);
 
-    ENQUEUE(kernel::diff1<T>, outArray, in, dim);
+    getQueue().enqueue(kernel::diff1<T>, outArray, in, dim);
 
     return outArray;
 }
@@ -42,7 +43,7 @@ Array<T>  diff2(const Array<T> &in, const int dim)
 
     Array<T> outArray = createEmptyArray<T>(dims);
 
-    ENQUEUE(kernel::diff2<T>, outArray, in, dim);
+    getQueue().enqueue(kernel::diff2<T>, outArray, in, dim);
 
     return outArray;
 }
diff --git a/src/backend/cpu/fast.cpp b/src/backend/cpu/fast.cpp
index 1b3a7aa..954f457 100644
--- a/src/backend/cpu/fast.cpp
+++ b/src/backend/cpu/fast.cpp
@@ -14,7 +14,8 @@
 #include <err_cpu.hpp>
 #include <handle.hpp>
 #include <fast.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/fast.hpp>
 
 using af::dim4;
diff --git a/src/backend/cpu/fft.cpp b/src/backend/cpu/fft.cpp
index 1282963..3c1d10a 100644
--- a/src/backend/cpu/fft.cpp
+++ b/src/backend/cpu/fft.cpp
@@ -15,7 +15,8 @@
 #include <kernel/fft.hpp>
 #include <copy.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 using af::dim4;
 
@@ -26,7 +27,7 @@ template<typename T, int rank, bool direction>
 void fft_inplace(Array<T> &in)
 {
     in.eval();
-    ENQUEUE(kernel::fft_inplace<T, rank, direction>, in);
+    getQueue().enqueue(kernel::fft_inplace<T, rank, direction>, in);
 }
 
 template<typename Tc, typename Tr, int rank>
@@ -38,7 +39,7 @@ Array<Tc> fft_r2c(const Array<Tr> &in)
     odims[0] = odims[0] / 2 + 1;
     Array<Tc> out = createEmptyArray<Tc>(odims);
 
-    ENQUEUE(kernel::fft_r2c<Tc, Tr, rank>, out, in);
+    getQueue().enqueue(kernel::fft_r2c<Tc, Tr, rank>, out, in);
 
     return out;
 }
@@ -49,7 +50,7 @@ Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims)
     in.eval();
 
     Array<Tr> out = createEmptyArray<Tr>(odims);
-    ENQUEUE(kernel::fft_c2r<Tr, Tc, rank>, out, in, odims);
+    getQueue().enqueue(kernel::fft_c2r<Tr, Tc, rank>, out, in, odims);
 
     return out;
 }
diff --git a/src/backend/cpu/fftconvolve.cpp b/src/backend/cpu/fftconvolve.cpp
index aac66cd..3b4b864 100644
--- a/src/backend/cpu/fftconvolve.cpp
+++ b/src/backend/cpu/fftconvolve.cpp
@@ -17,7 +17,8 @@
 #include <fftw3.h>
 #include <copy.hpp>
 #include <convolve_common.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/fftconvolve.hpp>
 
 namespace cpu
@@ -83,11 +84,11 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
 
     // Pack signal in a complex matrix where first dimension is half the input
     // (allows faster FFT computation) and pad array to a power of 2 with 0s
-    ENQUEUE(kernel::packData<convT, T>, packed, sig_tmp_dims, sig_tmp_strides, signal);
+    getQueue().enqueue(kernel::packData<convT, T>, packed, sig_tmp_dims, sig_tmp_strides, signal);
 
     // Pad filter array with 0s
     const dim_t offset = sig_tmp_strides[3]*sig_tmp_dims[3];
-    ENQUEUE(kernel::padArray<convT, T>, packed, filter_tmp_dims, filter_tmp_strides,
+    getQueue().enqueue(kernel::padArray<convT, T>, packed, filter_tmp_dims, filter_tmp_strides,
                        filter, offset);
 
     dim4 fftDims(1, 1, 1, 1);
@@ -137,10 +138,10 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
             fftwf_destroy_plan(plan);
         }
     };
-    ENQUEUE(upstream_dft, packed, fftDims);
+    getQueue().enqueue(upstream_dft, packed, fftDims);
 
     // Multiply filter and signal FFT arrays
-    ENQUEUE(kernel::complexMultiply<convT>, packed,
+    getQueue().enqueue(kernel::complexMultiply<convT>, packed,
                        sig_tmp_dims, sig_tmp_strides,
                        filter_tmp_dims, filter_tmp_strides,
                        kind, offset);
@@ -188,7 +189,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
             fftwf_destroy_plan(plan);
         }
     };
-    ENQUEUE(upstream_idft, packed, fftDims);
+    getQueue().enqueue(upstream_idft, packed, fftDims);
 
     // Compute output dimensions
     dim4 oDims(1);
@@ -210,7 +211,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
 
     Array<T> out = createEmptyArray<T>(oDims);
 
-    ENQUEUE(kernel::reorder<T, convT, roundOut, baseDim>, out, packed, filter,
+    getQueue().enqueue(kernel::reorder<T, convT, roundOut, baseDim>, out, packed, filter,
                        sig_half_d0, fftScale, sig_tmp_dims, sig_tmp_strides, filter_tmp_dims,
                        filter_tmp_strides, expand, kind);
 
diff --git a/src/backend/cpu/gradient.cpp b/src/backend/cpu/gradient.cpp
index 57776e5..aa417f4 100644
--- a/src/backend/cpu/gradient.cpp
+++ b/src/backend/cpu/gradient.cpp
@@ -12,7 +12,8 @@
 #include <math.hpp>
 #include <stdexcept>
 #include <err_cpu.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/gradient.hpp>
 
 namespace cpu
@@ -25,7 +26,7 @@ void gradient(Array<T> &grad0, Array<T> &grad1, const Array<T> &in)
     grad1.eval();
     in.eval();
 
-    ENQUEUE(kernel::gradient<T>, grad0, grad1, in);
+    getQueue().enqueue(kernel::gradient<T>, grad0, grad1, in);
 }
 
 #define INSTANTIATE(T)                                                                  \
diff --git a/src/backend/cpu/harris.cpp b/src/backend/cpu/harris.cpp
index 07b9bed..b5ea0ca 100644
--- a/src/backend/cpu/harris.cpp
+++ b/src/backend/cpu/harris.cpp
@@ -18,7 +18,8 @@
 #include <gradient.hpp>
 #include <sort_index.hpp>
 #include <cstring>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/harris.hpp>
 
 using af::dim4;
@@ -52,14 +53,14 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
     Array<T> iy = createEmptyArray<T>(idims);
 
     // Compute first order derivatives
-    ENQUEUE(gradient<T>, iy, ix, in);
+    getQueue().enqueue(gradient<T>, iy, ix, in);
 
     Array<T> ixx = createEmptyArray<T>(idims);
     Array<T> ixy = createEmptyArray<T>(idims);
     Array<T> iyy = createEmptyArray<T>(idims);
 
     // Compute second-order derivatives
-    ENQUEUE(kernel::second_order_deriv<T>, ixx, ixy, iyy, in.elements(), ix, iy);
+    getQueue().enqueue(kernel::second_order_deriv<T>, ixx, ixy, iyy, in.elements(), ix, iy);
 
     // Convolve second-order derivatives with proper window filter
     ixx = convolve2<T, convAccT, false>(ixx, filter, filter);
@@ -70,7 +71,7 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
 
     Array<T> responses = createEmptyArray<T>(dim4(in.elements()));
 
-    ENQUEUE(kernel::harris_responses<T>, responses, idims[0], idims[1],
+    getQueue().enqueue(kernel::harris_responses<T>, responses, idims[0], idims[1],
                        ixx, ixy, iyy, k_thr, border_len);
 
     Array<float> xCorners    = createEmptyArray<float>(dim4(corner_lim));
@@ -104,7 +105,7 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
         resp_out = createEmptyArray<float>(dim4(corners_out));
 
         // Keep only the corners with higher Harris responses
-        ENQUEUE(kernel::keep_corners, x_out, y_out, resp_out, xCorners, yCorners,
+        getQueue().enqueue(kernel::keep_corners, x_out, y_out, resp_out, xCorners, yCorners,
                            harris_sorted, harris_idx, corners_out);
     } else if (max_corners == 0 && corners_found < corner_lim) {
         x_out = createEmptyArray<float>(dim4(corners_out));
@@ -119,7 +120,7 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
             memcpy(y_out.get(), y_crnrs.get(), corners_out * sizeof(float));
             memcpy(outResponses.get(), inResponses.get(), corners_out * sizeof(float));
         };
-        ENQUEUE(copyFunc, x_out, y_out, resp_out,
+        getQueue().enqueue(copyFunc, x_out, y_out, resp_out,
                            xCorners, yCorners, respCorners, corners_out);
     } else {
         x_out = xCorners;
diff --git a/src/backend/cpu/hist_graphics.cpp b/src/backend/cpu/hist_graphics.cpp
index c58f5c6..ad7d690 100644
--- a/src/backend/cpu/hist_graphics.cpp
+++ b/src/backend/cpu/hist_graphics.cpp
@@ -11,7 +11,8 @@
 
 #include <hist_graphics.hpp>
 #include <err_cpu.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
diff --git a/src/backend/cpu/histogram.cpp b/src/backend/cpu/histogram.cpp
index 2571f3e..6aa60e5 100644
--- a/src/backend/cpu/histogram.cpp
+++ b/src/backend/cpu/histogram.cpp
@@ -12,7 +12,8 @@
 #include <ArrayInfo.hpp>
 #include <Array.hpp>
 #include <histogram.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/histogram.hpp>
 
 using af::dim4;
@@ -31,7 +32,7 @@ Array<outType> histogram(const Array<inType> &in,
     Array<outType> out = createValueArray<outType>(outDims, outType(0));
     out.eval();
 
-    ENQUEUE(kernel::histogram<outType, inType, isLinear>,
+    getQueue().enqueue(kernel::histogram<outType, inType, isLinear>,
             out, in, nbins, minval, maxval);
 
     return out;
diff --git a/src/backend/cpu/homography.cpp b/src/backend/cpu/homography.cpp
index 147f5e8..4d131cf 100644
--- a/src/backend/cpu/homography.cpp
+++ b/src/backend/cpu/homography.cpp
@@ -18,7 +18,8 @@
 #include <random.hpp>
 #include <cstring>
 #include <cfloat>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 using af::dim4;
 
diff --git a/src/backend/cpu/hsv_rgb.cpp b/src/backend/cpu/hsv_rgb.cpp
index da5dbe0..4044917 100644
--- a/src/backend/cpu/hsv_rgb.cpp
+++ b/src/backend/cpu/hsv_rgb.cpp
@@ -11,7 +11,8 @@
 #include <Array.hpp>
 #include <ArrayInfo.hpp>
 #include <hsv_rgb.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/hsv_rgb.hpp>
 
 using af::dim4;
@@ -26,7 +27,7 @@ Array<T> hsv2rgb(const Array<T>& in)
 
     Array<T> out = createEmptyArray<T>(in.dims());
 
-    ENQUEUE(kernel::hsv2rgb<T>, out, in);
+    getQueue().enqueue(kernel::hsv2rgb<T>, out, in);
 
     return out;
 }
@@ -38,7 +39,7 @@ Array<T> rgb2hsv(const Array<T>& in)
 
     Array<T> out = createEmptyArray<T>(in.dims());
 
-    ENQUEUE(kernel::rgb2hsv<T>, out, in);
+    getQueue().enqueue(kernel::rgb2hsv<T>, out, in);
 
     return out;
 }
diff --git a/src/backend/cpu/identity.cpp b/src/backend/cpu/identity.cpp
index 071bb04..c5e1102 100644
--- a/src/backend/cpu/identity.cpp
+++ b/src/backend/cpu/identity.cpp
@@ -10,7 +10,8 @@
 #include <af/dim4.hpp>
 #include <Array.hpp>
 #include <identity.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/identity.hpp>
 
 namespace cpu
@@ -21,7 +22,7 @@ Array<T> identity(const dim4& dims)
 {
     Array<T> out = createEmptyArray<T>(dims);
 
-    ENQUEUE(kernel::identity<T>, out);
+    getQueue().enqueue(kernel::identity<T>, out);
 
     return out;
 }
diff --git a/src/backend/cpu/iir.cpp b/src/backend/cpu/iir.cpp
index cb390b3..049212a 100644
--- a/src/backend/cpu/iir.cpp
+++ b/src/backend/cpu/iir.cpp
@@ -13,7 +13,8 @@
 #include <Array.hpp>
 #include <iir.hpp>
 #include <convolve.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/iir.hpp>
 
 using af::dim4;
@@ -41,7 +42,7 @@ Array<T> iir(const Array<T> &b, const Array<T> &a, const Array<T> &x)
 
     Array<T> y = createEmptyArray<T>(c.dims());
 
-    ENQUEUE(kernel::iir<T>, y, c, a);
+    getQueue().enqueue(kernel::iir<T>, y, c, a);
 
     return y;
 }
diff --git a/src/backend/cpu/image.cpp b/src/backend/cpu/image.cpp
index d23ba80..b71ba23 100644
--- a/src/backend/cpu/image.cpp
+++ b/src/backend/cpu/image.cpp
@@ -16,7 +16,8 @@
 #include <image.hpp>
 #include <err_cpu.hpp>
 #include <graphics_common.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 using af::dim4;
 
diff --git a/src/backend/cpu/index.cpp b/src/backend/cpu/index.cpp
index 9c951ff..a2cdac8 100644
--- a/src/backend/cpu/index.cpp
+++ b/src/backend/cpu/index.cpp
@@ -14,7 +14,8 @@
 #include <index.hpp>
 #include <handle.hpp>
 #include <vector>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <utility>
 #include <kernel/index.hpp>
 
@@ -57,7 +58,7 @@ Array<T> index(const Array<T>& in, const af_index_t idxrs[])
     Array<T> out = createEmptyArray<T>(oDims);
 
 
-    ENQUEUE(kernel::index<T>, out, in, std::move(isSeq), std::move(seqs), std::move(idxArrs));
+    getQueue().enqueue(kernel::index<T>, out, in, std::move(isSeq), std::move(seqs), std::move(idxArrs));
 
     return out;
 }
diff --git a/src/backend/cpu/inverse.cpp b/src/backend/cpu/inverse.cpp
index 71cc9fe..ea7d7ee 100644
--- a/src/backend/cpu/inverse.cpp
+++ b/src/backend/cpu/inverse.cpp
@@ -23,7 +23,8 @@
 #include <lu.hpp>
 #include <identity.hpp>
 #include <solve.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
@@ -67,7 +68,7 @@ Array<T> inverse(const Array<T> &in)
                 A.get(), A.strides()[1],
                 pivot.get());
     };
-    ENQUEUE(func, A, pivot, M);
+    getQueue().enqueue(func, A, pivot, M);
 
     return A;
 }
diff --git a/src/backend/cpu/iota.cpp b/src/backend/cpu/iota.cpp
index 124ec5c..db19708 100644
--- a/src/backend/cpu/iota.cpp
+++ b/src/backend/cpu/iota.cpp
@@ -10,7 +10,8 @@
 #include <Array.hpp>
 #include <iota.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/iota.hpp>
 
 using namespace std;
@@ -25,7 +26,7 @@ Array<T> iota(const dim4 &dims, const dim4 &tile_dims)
 
     Array<T> out = createEmptyArray<T>(outdims);
 
-    ENQUEUE(kernel::iota<T>, out, dims, tile_dims);
+    getQueue().enqueue(kernel::iota<T>, out, dims, tile_dims);
 
     return out;
 }
diff --git a/src/backend/cpu/ireduce.cpp b/src/backend/cpu/ireduce.cpp
index 9de4a78..a40fbdf 100644
--- a/src/backend/cpu/ireduce.cpp
+++ b/src/backend/cpu/ireduce.cpp
@@ -13,7 +13,8 @@
 #include <ArrayInfo.hpp>
 #include <Array.hpp>
 #include <ireduce.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/ireduce.hpp>
 
 using af::dim4;
@@ -39,7 +40,7 @@ void ireduce(Array<T> &out, Array<uint> &loc, const Array<T> &in, const int dim)
                                                            , kernel::ireduce_dim<op, T, 3>()
                                                            , kernel::ireduce_dim<op, T, 4>()};
 
-    ENQUEUE(ireduce_funcs[in.ndims() - 1], out, loc, 0, in, 0, dim);
+    getQueue().enqueue(ireduce_funcs[in.ndims() - 1], out, loc, 0, in, 0, dim);
 }
 
 template<af_op_t op, typename T>
diff --git a/src/backend/cpu/join.cpp b/src/backend/cpu/join.cpp
index 6c9ba8f..0a5b99c 100644
--- a/src/backend/cpu/join.cpp
+++ b/src/backend/cpu/join.cpp
@@ -9,7 +9,8 @@
 
 #include <Array.hpp>
 #include <join.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/join.hpp>
 
 namespace cpu
@@ -37,7 +38,7 @@ Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second)
 
     Array<Tx> out = createEmptyArray<Tx>(odims);
 
-    ENQUEUE(kernel::join<Tx, Ty>, out, dim, first, second);
+    getQueue().enqueue(kernel::join<Tx, Ty>, out, dim, first, second);
 
     return out;
 }
@@ -71,34 +72,34 @@ Array<T> join(const int dim, const std::vector<Array<T>> &inputs)
 
     switch(n_arrays) {
         case 1:
-            ENQUEUE(kernel::join<T, 1>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 1>, dim, out, inputs);
             break;
         case 2:
-            ENQUEUE(kernel::join<T, 2>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 2>, dim, out, inputs);
             break;
         case 3:
-            ENQUEUE(kernel::join<T, 3>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 3>, dim, out, inputs);
             break;
         case 4:
-            ENQUEUE(kernel::join<T, 4>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 4>, dim, out, inputs);
             break;
         case 5:
-            ENQUEUE(kernel::join<T, 5>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 5>, dim, out, inputs);
             break;
         case 6:
-            ENQUEUE(kernel::join<T, 6>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 6>, dim, out, inputs);
             break;
         case 7:
-            ENQUEUE(kernel::join<T, 7>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 7>, dim, out, inputs);
             break;
         case 8:
-            ENQUEUE(kernel::join<T, 8>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 8>, dim, out, inputs);
             break;
         case 9:
-            ENQUEUE(kernel::join<T, 9>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T, 9>, dim, out, inputs);
             break;
         case 10:
-            ENQUEUE(kernel::join<T,10>, dim, out, inputs);
+            getQueue().enqueue(kernel::join<T,10>, dim, out, inputs);
             break;
     }
 
diff --git a/src/backend/cpu/lookup.cpp b/src/backend/cpu/lookup.cpp
index 4cc5359..1e09f4d 100644
--- a/src/backend/cpu/lookup.cpp
+++ b/src/backend/cpu/lookup.cpp
@@ -9,7 +9,8 @@
 
 #include <lookup.hpp>
 #include <cstdlib>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/lookup.hpp>
 
 namespace cpu
@@ -29,7 +30,7 @@ Array<in_t> lookup(const Array<in_t> &input, const Array<idx_t> &indices, const
 
     Array<in_t> out = createEmptyArray<in_t>(oDims);
 
-    ENQUEUE(kernel::lookup<in_t, idx_t>, out, input, indices, dim);
+    getQueue().enqueue(kernel::lookup<in_t, idx_t>, out, input, indices, dim);
 
     return out;
 }
diff --git a/src/backend/cpu/lu.cpp b/src/backend/cpu/lu.cpp
index 551c9c9..93862f2 100644
--- a/src/backend/cpu/lu.cpp
+++ b/src/backend/cpu/lu.cpp
@@ -17,7 +17,8 @@
 #include <cassert>
 #include <range.hpp>
 #include <lapack_helper.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/lu.hpp>
 
 namespace cpu
@@ -58,7 +59,7 @@ void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in)
     lower = createEmptyArray<T>(ldims);
     upper = createEmptyArray<T>(udims);
 
-    ENQUEUE(kernel::lu_split<T>, lower, upper, in_copy);
+    getQueue().enqueue(kernel::lu_split<T>, lower, upper, in_copy);
 }
 
 template<typename T>
@@ -73,11 +74,11 @@ Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
         dim4 iDims = in.dims();
         getrf_func<T>()(AF_LAPACK_COL_MAJOR, iDims[0], iDims[1], in.get(), in.strides()[1], pivot.get());
     };
-    ENQUEUE(func, in, pivot);
+    getQueue().enqueue(func, in, pivot);
 
     if(convert_pivot) {
         Array<int> p = range<int>(dim4(iDims[0]), 0);
-        ENQUEUE(kernel::convertPivot, p, pivot);
+        getQueue().enqueue(kernel::convertPivot, p, pivot);
         return p;
     } else {
         return pivot;
diff --git a/src/backend/cpu/match_template.cpp b/src/backend/cpu/match_template.cpp
index 724b773..58091a1 100644
--- a/src/backend/cpu/match_template.cpp
+++ b/src/backend/cpu/match_template.cpp
@@ -12,7 +12,8 @@
 #include <ArrayInfo.hpp>
 #include <Array.hpp>
 #include <match_template.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/match_template.hpp>
 
 using af::dim4;
@@ -28,7 +29,7 @@ Array<OutT> match_template(const Array<InT> &sImg, const Array<InT> &tImg)
 
     Array<OutT> out = createEmptyArray<OutT>(sImg.dims());
 
-    ENQUEUE(kernel::matchTemplate<OutT, InT, MatchT>, out, sImg, tImg);
+    getQueue().enqueue(kernel::matchTemplate<OutT, InT, MatchT>, out, sImg, tImg);
 
     return out;
 }
diff --git a/src/backend/cpu/meanshift.cpp b/src/backend/cpu/meanshift.cpp
index f4a0b29..b5bbf75 100644
--- a/src/backend/cpu/meanshift.cpp
+++ b/src/backend/cpu/meanshift.cpp
@@ -16,7 +16,8 @@
 #include <algorithm>
 #include <err_cpu.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/meanshift.hpp>
 
 using af::dim4;
@@ -32,7 +33,7 @@ Array<T>  meanshift(const Array<T> &in, const float &s_sigma, const float &c_sig
 
     Array<T> out = createEmptyArray<T>(in.dims());
 
-    ENQUEUE(kernel::meanShift<T, is_color>, out, in, s_sigma, c_sigma, iter);
+    getQueue().enqueue(kernel::meanShift<T, is_color>, out, in, s_sigma, c_sigma, iter);
 
     return out;
 }
diff --git a/src/backend/cpu/medfilt.cpp b/src/backend/cpu/medfilt.cpp
index 9e761c6..8ae4e33 100644
--- a/src/backend/cpu/medfilt.cpp
+++ b/src/backend/cpu/medfilt.cpp
@@ -12,7 +12,8 @@
 #include <ArrayInfo.hpp>
 #include <Array.hpp>
 #include <medfilt.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/medfilt.hpp>
 
 using af::dim4;
@@ -27,7 +28,7 @@ Array<T> medfilt(const Array<T> &in, dim_t w_len, dim_t w_wid)
 
     Array<T> out = createEmptyArray<T>(in.dims());
 
-    ENQUEUE(kernel::medfilt<T, pad>, out, in, w_len, w_wid);
+    getQueue().enqueue(kernel::medfilt<T, pad>, out, in, w_len, w_wid);
 
     return out;
 }
diff --git a/src/backend/cpu/memory.cpp b/src/backend/cpu/memory.cpp
index 79f2e57..85ba4f2 100644
--- a/src/backend/cpu/memory.cpp
+++ b/src/backend/cpu/memory.cpp
@@ -14,7 +14,8 @@
 #include <dispatch.hpp>
 #include <cstdlib>
 #include <mutex>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
diff --git a/src/backend/cpu/morph.cpp b/src/backend/cpu/morph.cpp
index 337e8a9..1ae4680 100644
--- a/src/backend/cpu/morph.cpp
+++ b/src/backend/cpu/morph.cpp
@@ -13,7 +13,8 @@
 #include <Array.hpp>
 #include <morph.hpp>
 #include <algorithm>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/morph.hpp>
 
 using af::dim4;
@@ -29,7 +30,7 @@ Array<T> morph(const Array<T> &in, const Array<T> &mask)
 
     Array<T> out = createEmptyArray<T>(in.dims());
 
-    ENQUEUE(kernel::morph<T, isDilation>, out, in, mask);
+    getQueue().enqueue(kernel::morph<T, isDilation>, out, in, mask);
 
     return out;
 }
@@ -42,7 +43,7 @@ Array<T> morph3d(const Array<T> &in, const Array<T> &mask)
 
     Array<T> out = createEmptyArray<T>(in.dims());
 
-    ENQUEUE(kernel::morph3d<T, isDilation>, out, in, mask);
+    getQueue().enqueue(kernel::morph3d<T, isDilation>, out, in, mask);
 
     return out;
 }
diff --git a/src/backend/cpu/nearest_neighbour.cpp b/src/backend/cpu/nearest_neighbour.cpp
index a3c2bb1..f1daba7 100644
--- a/src/backend/cpu/nearest_neighbour.cpp
+++ b/src/backend/cpu/nearest_neighbour.cpp
@@ -12,7 +12,8 @@
 #include <ArrayInfo.hpp>
 #include <Array.hpp>
 #include <handle.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/nearest_neighbour.hpp>
 
 using af::dim4;
@@ -42,13 +43,13 @@ void nearest_neighbour(Array<uint>& idx, Array<To>& dist,
 
     switch(dist_type) {
         case AF_SAD:
-            ENQUEUE(kernel::nearest_neighbour<T, To, AF_SAD>, idx, dist, query, train, dist_dim, n_dist);
+            getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SAD>, idx, dist, query, train, dist_dim, n_dist);
             break;
         case AF_SSD:
-            ENQUEUE(kernel::nearest_neighbour<T, To, AF_SSD>, idx, dist, query, train, dist_dim, n_dist);
+            getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SSD>, idx, dist, query, train, dist_dim, n_dist);
             break;
         case AF_SHD:
-            ENQUEUE(kernel::nearest_neighbour<T, To, AF_SHD>, idx, dist, query, train, dist_dim, n_dist);
+            getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SHD>, idx, dist, query, train, dist_dim, n_dist);
             break;
         default:
             AF_ERROR("Unsupported dist_type", AF_ERR_NOT_CONFIGURED);
diff --git a/src/backend/cpu/orb.cpp b/src/backend/cpu/orb.cpp
index 649619e..8bbfd41 100644
--- a/src/backend/cpu/orb.cpp
+++ b/src/backend/cpu/orb.cpp
@@ -18,7 +18,8 @@
 #include <convolve.hpp>
 #include <memory.hpp>
 #include <cstring>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/orb.hpp>
 
 using af::dim4;
diff --git a/src/backend/cpu/platform.cpp b/src/backend/cpu/platform.cpp
index 6ae63a9..19942f0 100644
--- a/src/backend/cpu/platform.cpp
+++ b/src/backend/cpu/platform.cpp
@@ -9,8 +9,9 @@
 
 #include <af/version.h>
 #include <af/defines.h>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
 #include <sstream>
+#include <queue.hpp>
 #include <array>
 #include <algorithm>
 #include <iostream>
diff --git a/src/backend/cpu/plot.cpp b/src/backend/cpu/plot.cpp
index 8afdea2..2ab6964 100644
--- a/src/backend/cpu/plot.cpp
+++ b/src/backend/cpu/plot.cpp
@@ -13,7 +13,8 @@
 #include <plot.hpp>
 #include <err_cpu.hpp>
 #include <graphics_common.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 using af::dim4;
 
diff --git a/src/backend/cpu/plot3.cpp b/src/backend/cpu/plot3.cpp
index c7beed6..515fe03 100644
--- a/src/backend/cpu/plot3.cpp
+++ b/src/backend/cpu/plot3.cpp
@@ -13,7 +13,8 @@
 #include <plot3.hpp>
 #include <err_cpu.hpp>
 #include <graphics_common.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 using af::dim4;
 
diff --git a/src/backend/cpu/qr.cpp b/src/backend/cpu/qr.cpp
index ca04ec9..34a39f6 100644
--- a/src/backend/cpu/qr.cpp
+++ b/src/backend/cpu/qr.cpp
@@ -17,7 +17,8 @@
 #include <err_cpu.hpp>
 #include <triangle.hpp>
 #include <lapack_helper.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
@@ -78,7 +79,7 @@ void qr(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in)
         gqr_func<T>()(AF_LAPACK_COL_MAJOR, M, M, min(M, N), q.get(), q.strides()[1], t.get());
     };
     q.resetDims(dim4(M, M));
-    ENQUEUE(func, q, t, M, N);
+    getQueue().enqueue(func, q, t, M, N);
 }
 
 template<typename T>
@@ -94,7 +95,7 @@ Array<T> qr_inplace(Array<T> &in)
     auto func = [=] (Array<T> in, Array<T> t, int M, int N) {
         geqrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N, in.get(), in.strides()[1], t.get());
     };
-    ENQUEUE(func, in, t, M, N);
+    getQueue().enqueue(func, in, t, M, N);
 
     return t;
 }
diff --git a/src/backend/cpu/queue.hpp b/src/backend/cpu/queue.hpp
index 6e5cd71..942ae25 100644
--- a/src/backend/cpu/queue.hpp
+++ b/src/backend/cpu/queue.hpp
@@ -25,6 +25,10 @@ public:
 
     if(sync_calls) { func( args... ); }
     else           { aQueue.enqueue( func, args... ); }
+#ifndef NDEBUG
+    sync();
+#endif
+
   }
   void sync() {
     if(!sync_calls) aQueue.sync();
diff --git a/src/backend/cpu/random.cpp b/src/backend/cpu/random.cpp
index f49420f..89d86c3 100644
--- a/src/backend/cpu/random.cpp
+++ b/src/backend/cpu/random.cpp
@@ -12,7 +12,8 @@
 #include <af/defines.h>
 #include <Array.hpp>
 #include <random.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/random.hpp>
 
 namespace cpu
@@ -22,7 +23,7 @@ template<typename T>
 Array<T> randu(const af::dim4 &dims)
 {
     Array<T> outArray = createEmptyArray<T>(dims);
-    ENQUEUE(kernel::randu<T>, outArray);
+    getQueue().enqueue(kernel::randu<T>, outArray);
     return outArray;
 }
 
@@ -45,7 +46,7 @@ template<typename T>
 Array<T> randn(const af::dim4 &dims)
 {
     Array<T> outArray = createEmptyArray<T>(dims);
-    ENQUEUE(kernel::randn<T>, outArray);
+    getQueue().enqueue(kernel::randn<T>, outArray);
     return outArray;
 }
 
@@ -80,7 +81,7 @@ Array<char> randu(const af::dim4 &dims)
             outPtr[i] = gen() > 0.5;
         }
     };
-    ENQUEUE(func, outArray);
+    getQueue().enqueue(func, outArray);
 
     return outArray;
 }
@@ -92,7 +93,7 @@ void setSeed(const uintl seed)
         kernel::is_first = false;
         kernel::gen_seed = seed;
     };
-    ENQUEUE(f, seed);
+    getQueue().enqueue(f, seed);
 }
 
 uintl getSeed()
diff --git a/src/backend/cpu/range.cpp b/src/backend/cpu/range.cpp
index 6be78d5..e91ba1e 100644
--- a/src/backend/cpu/range.cpp
+++ b/src/backend/cpu/range.cpp
@@ -14,7 +14,8 @@
 #include <err_cpu.hpp>
 #include <algorithm>
 #include <numeric>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/range.hpp>
 
 namespace cpu
@@ -32,10 +33,10 @@ Array<T> range(const dim4& dims, const int seq_dim)
 
     Array<T> out = createEmptyArray<T>(dims);
     switch(_seq_dim) {
-        case 0: ENQUEUE(kernel::range<T, 0>, out); break;
-        case 1: ENQUEUE(kernel::range<T, 1>, out); break;
-        case 2: ENQUEUE(kernel::range<T, 2>, out); break;
-        case 3: ENQUEUE(kernel::range<T, 3>, out); break;
+        case 0: getQueue().enqueue(kernel::range<T, 0>, out); break;
+        case 1: getQueue().enqueue(kernel::range<T, 1>, out); break;
+        case 2: getQueue().enqueue(kernel::range<T, 2>, out); break;
+        case 3: getQueue().enqueue(kernel::range<T, 3>, out); break;
         default : AF_ERROR("Invalid rep selection", AF_ERR_ARG);
     }
 
diff --git a/src/backend/cpu/reduce.cpp b/src/backend/cpu/reduce.cpp
index 90ad1f9..2d4d18e 100644
--- a/src/backend/cpu/reduce.cpp
+++ b/src/backend/cpu/reduce.cpp
@@ -15,7 +15,8 @@
 #include <ops.hpp>
 #include <functional>
 #include <complex>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/reduce.hpp>
 
 using af::dim4;
@@ -55,7 +56,7 @@ Array<To> reduce(const Array<Ti> &in, const int dim, bool change_nan, double nan
                                                                 , kernel::reduce_dim<op, Ti, To, 3>()
                                                                 , kernel::reduce_dim<op, Ti, To, 4>()};
 
-    ENQUEUE(reduce_funcs[in.ndims() - 1], out, 0, in, 0, dim, change_nan, nanval);
+    getQueue().enqueue(reduce_funcs[in.ndims() - 1], out, 0, in, 0, dim, change_nan, nanval);
 
     return out;
 }
diff --git a/src/backend/cpu/regions.cpp b/src/backend/cpu/regions.cpp
index eafc161..2384dd3 100644
--- a/src/backend/cpu/regions.cpp
+++ b/src/backend/cpu/regions.cpp
@@ -17,7 +17,8 @@
 #include <map>
 #include <set>
 #include <algorithm>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/regions.hpp>
 
 using af::dim4;
@@ -33,7 +34,7 @@ Array<T> regions(const Array<char> &in, af_connectivity connectivity)
     Array<T> out = createValueArray(in.dims(), (T)0);
     out.eval();
 
-    ENQUEUE(kernel::regions<T>, out, in, connectivity);
+    getQueue().enqueue(kernel::regions<T>, out, in, connectivity);
 
     return out;
 }
diff --git a/src/backend/cpu/reorder.cpp b/src/backend/cpu/reorder.cpp
index 237e5d6..bd15658 100644
--- a/src/backend/cpu/reorder.cpp
+++ b/src/backend/cpu/reorder.cpp
@@ -9,7 +9,8 @@
 
 #include <Array.hpp>
 #include <reorder.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/reorder.hpp>
 
 namespace cpu
@@ -26,7 +27,7 @@ Array<T> reorder(const Array<T> &in, const af::dim4 &rdims)
         oDims[i] = iDims[rdims[i]];
 
     Array<T> out = createEmptyArray<T>(oDims);
-    ENQUEUE(kernel::reorder<T>, out, in, oDims, rdims);
+    getQueue().enqueue(kernel::reorder<T>, out, in, oDims, rdims);
     return out;
 }
 
diff --git a/src/backend/cpu/resize.cpp b/src/backend/cpu/resize.cpp
index d6349a9..eaeb5d4 100644
--- a/src/backend/cpu/resize.cpp
+++ b/src/backend/cpu/resize.cpp
@@ -12,7 +12,8 @@
 #include <math.hpp>
 #include <types.hpp>
 #include <af/traits.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/resize.hpp>
 
 namespace cpu
@@ -31,11 +32,11 @@ Array<T> resize(const Array<T> &in, const dim_t odim0, const dim_t odim1,
 
     switch(method) {
         case AF_INTERP_NEAREST:
-            ENQUEUE(kernel::resize<T, AF_INTERP_NEAREST>, out, in); break;
+            getQueue().enqueue(kernel::resize<T, AF_INTERP_NEAREST>, out, in); break;
         case AF_INTERP_BILINEAR:
-            ENQUEUE(kernel::resize<T, AF_INTERP_BILINEAR>, out, in); break;
+            getQueue().enqueue(kernel::resize<T, AF_INTERP_BILINEAR>, out, in); break;
         case AF_INTERP_LOWER:
-            ENQUEUE(kernel::resize<T, AF_INTERP_LOWER>, out, in); break;
+            getQueue().enqueue(kernel::resize<T, AF_INTERP_LOWER>, out, in); break;
         default: break;
     }
     return out;
diff --git a/src/backend/cpu/rotate.cpp b/src/backend/cpu/rotate.cpp
index 289f369..0fb9b17 100644
--- a/src/backend/cpu/rotate.cpp
+++ b/src/backend/cpu/rotate.cpp
@@ -9,7 +9,8 @@
 
 #include <Array.hpp>
 #include <rotate.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include "transform_interp.hpp"
 #include <kernel/rotate.hpp>
 
@@ -26,13 +27,13 @@ Array<T> rotate(const Array<T> &in, const float theta, const af::dim4 &odims,
 
     switch(method) {
         case AF_INTERP_NEAREST:
-            ENQUEUE(kernel::rotate<T, AF_INTERP_NEAREST>, out, in, theta);
+            getQueue().enqueue(kernel::rotate<T, AF_INTERP_NEAREST>, out, in, theta);
             break;
         case AF_INTERP_BILINEAR:
-            ENQUEUE(kernel::rotate<T, AF_INTERP_BILINEAR>, out, in, theta);
+            getQueue().enqueue(kernel::rotate<T, AF_INTERP_BILINEAR>, out, in, theta);
             break;
         case AF_INTERP_LOWER:
-            ENQUEUE(kernel::rotate<T, AF_INTERP_LOWER>, out, in, theta);
+            getQueue().enqueue(kernel::rotate<T, AF_INTERP_LOWER>, out, in, theta);
             break;
         default:
             AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
diff --git a/src/backend/cpu/scan.cpp b/src/backend/cpu/scan.cpp
index adeb3d2..08431f8 100644
--- a/src/backend/cpu/scan.cpp
+++ b/src/backend/cpu/scan.cpp
@@ -14,7 +14,8 @@
 #include <Array.hpp>
 #include <scan.hpp>
 #include <ops.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/scan.hpp>
 
 using af::dim4;
@@ -33,19 +34,19 @@ Array<To> scan(const Array<Ti>& in, const int dim)
     switch (in.ndims()) {
         case 1:
             kernel::scan_dim<op, Ti, To, 1> func1;
-            ENQUEUE(func1, out, 0, in, 0, dim);
+            getQueue().enqueue(func1, out, 0, in, 0, dim);
             break;
         case 2:
             kernel::scan_dim<op, Ti, To, 2> func2;
-            ENQUEUE(func2, out, 0, in, 0, dim);
+            getQueue().enqueue(func2, out, 0, in, 0, dim);
             break;
         case 3:
             kernel::scan_dim<op, Ti, To, 3> func3;
-            ENQUEUE(func3, out, 0, in, 0, dim);
+            getQueue().enqueue(func3, out, 0, in, 0, dim);
             break;
         case 4:
             kernel::scan_dim<op, Ti, To, 4> func4;
-            ENQUEUE(func4, out, 0, in, 0, dim);
+            getQueue().enqueue(func4, out, 0, in, 0, dim);
             break;
     }
 
diff --git a/src/backend/cpu/select.cpp b/src/backend/cpu/select.cpp
index 4f845bc..1545a81 100644
--- a/src/backend/cpu/select.cpp
+++ b/src/backend/cpu/select.cpp
@@ -10,7 +10,8 @@
 #include <ArrayInfo.hpp>
 #include <Array.hpp>
 #include <select.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/select.hpp>
 
 using af::dim4;
@@ -25,7 +26,7 @@ void select(Array<T> &out, const Array<char> &cond, const Array<T> &a, const Arr
     cond.eval();
     a.eval();
     b.eval();
-    ENQUEUE(kernel::select<T>, out, cond, a, b);
+    getQueue().enqueue(kernel::select<T>, out, cond, a, b);
 }
 
 template<typename T, bool flip>
@@ -34,7 +35,7 @@ void select_scalar(Array<T> &out, const Array<char> &cond, const Array<T> &a, co
     out.eval();
     cond.eval();
     a.eval();
-    ENQUEUE(kernel::select_scalar<T, flip>, out, cond, a, b);
+    getQueue().enqueue(kernel::select_scalar<T, flip>, out, cond, a, b);
 }
 
 #define INSTANTIATE(T)                                              \
diff --git a/src/backend/cpu/set.cpp b/src/backend/cpu/set.cpp
index 49ce186..d6c2a61 100644
--- a/src/backend/cpu/set.cpp
+++ b/src/backend/cpu/set.cpp
@@ -18,7 +18,8 @@
 #include <sort.hpp>
 #include <err_cpu.hpp>
 #include <vector>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
diff --git a/src/backend/cpu/shift.cpp b/src/backend/cpu/shift.cpp
index fd56e4c..041f1ab 100644
--- a/src/backend/cpu/shift.cpp
+++ b/src/backend/cpu/shift.cpp
@@ -9,7 +9,8 @@
 
 #include <Array.hpp>
 #include <shift.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/shift.hpp>
 
 namespace cpu
@@ -23,7 +24,7 @@ Array<T> shift(const Array<T> &in, const int sdims[4])
     Array<T> out = createEmptyArray<T>(in.dims());
     const af::dim4 temp(sdims[0], sdims[1], sdims[2], sdims[3]);
 
-    ENQUEUE(kernel::shift<T>, out, in, temp);
+    getQueue().enqueue(kernel::shift<T>, out, in, temp);
 
     return out;
 }
diff --git a/src/backend/cpu/sobel.cpp b/src/backend/cpu/sobel.cpp
index 86c7363..5ece9bf 100644
--- a/src/backend/cpu/sobel.cpp
+++ b/src/backend/cpu/sobel.cpp
@@ -13,7 +13,8 @@
 #include <Array.hpp>
 #include <sobel.hpp>
 #include <convolve.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/sobel.hpp>
 
 using af::dim4;
@@ -31,8 +32,8 @@ sobelDerivatives(const Array<Ti> &img, const unsigned &ker_size)
     Array<To> dx = createEmptyArray<To>(img.dims());
     Array<To> dy = createEmptyArray<To>(img.dims());
 
-    ENQUEUE(kernel::derivative<Ti, To, true >, dx, img);
-    ENQUEUE(kernel::derivative<Ti, To, false>, dy, img);
+    getQueue().enqueue(kernel::derivative<Ti, To, true >, dx, img);
+    getQueue().enqueue(kernel::derivative<Ti, To, false>, dy, img);
 
     return std::make_pair(dx, dy);
 }
diff --git a/src/backend/cpu/solve.cpp b/src/backend/cpu/solve.cpp
index 5d1ec3b..48ea4de 100644
--- a/src/backend/cpu/solve.cpp
+++ b/src/backend/cpu/solve.cpp
@@ -16,7 +16,8 @@
 #include <cassert>
 #include <err_cpu.hpp>
 #include <lapack_helper.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
@@ -87,7 +88,7 @@ Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
                         N, NRHS, A.get(), A.strides()[1],
                         pivot.get(), B.get(), B.strides()[1]);
     };
-    ENQUEUE(func, A, B, pivot, N, NRHS);
+    getQueue().enqueue(func, A, B, pivot, N, NRHS);
 
     return B;
 }
@@ -108,7 +109,7 @@ Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop o
                         A.get(), A.strides()[1],
                         B.get(), B.strides()[1]);
     };
-    ENQUEUE(func, A, B, N, NRHS, options);
+    getQueue().enqueue(func, A, B, N, NRHS, options);
 
     return B;
 }
@@ -138,7 +139,7 @@ Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
             gesv_func<T>()(AF_LAPACK_COL_MAJOR, N, K, A.get(), A.strides()[1],
                            pivot.get(), B.get(), B.strides()[1]);
         };
-        ENQUEUE(func, A, B, pivot, N, K);
+        getQueue().enqueue(func, A, B, pivot, N, K);
     } else {
         auto func = [=] (Array<T> A, Array<T> B, int M, int N, int K) {
             int sM = A.strides()[1];
@@ -150,7 +151,7 @@ Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
                     B.get(), max(sM, sN));
         };
         B.resetDims(dim4(N, K));
-        ENQUEUE(func, A, B, M, N, K);
+        getQueue().enqueue(func, A, B, M, N, K);
     }
 
     return B;
diff --git a/src/backend/cpu/sort.cpp b/src/backend/cpu/sort.cpp
index 104a3df..bc6396b 100644
--- a/src/backend/cpu/sort.cpp
+++ b/src/backend/cpu/sort.cpp
@@ -13,7 +13,8 @@
 #include <copy.hpp>
 #include <algorithm>
 #include <functional>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/sort.hpp>
 
 namespace cpu
@@ -26,7 +27,7 @@ Array<T> sort(const Array<T> &in, const unsigned dim)
 
     Array<T> out = copyArray<T>(in);
     switch(dim) {
-        case 0: ENQUEUE(kernel::sort0<T, isAscending>, out); break;
+        case 0: getQueue().enqueue(kernel::sort0<T, isAscending>, out); break;
         default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
     }
     return out;
diff --git a/src/backend/cpu/sort_by_key.cpp b/src/backend/cpu/sort_by_key.cpp
index c683288..5a99257 100644
--- a/src/backend/cpu/sort_by_key.cpp
+++ b/src/backend/cpu/sort_by_key.cpp
@@ -9,7 +9,8 @@
 
 #include <Array.hpp>
 #include <sort_by_key.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/sort_by_key.hpp>
 
 namespace cpu
@@ -28,7 +29,7 @@ void sort_by_key(Array<Tk> &okey, Array<Tv> &oval,
     oidx.eval();
 
     switch(dim) {
-        case 0: ENQUEUE(kernel::sort0_by_key<Tk, Tv, isAscending>,
+        case 0: getQueue().enqueue(kernel::sort0_by_key<Tk, Tv, isAscending>,
                                    okey, oval, oidx, ikey, ival); break;
         default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
     }
diff --git a/src/backend/cpu/sort_index.cpp b/src/backend/cpu/sort_index.cpp
index c8c6d6e..77860ed 100644
--- a/src/backend/cpu/sort_index.cpp
+++ b/src/backend/cpu/sort_index.cpp
@@ -12,7 +12,8 @@
 #include <math.hpp>
 #include <algorithm>
 #include <numeric>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/sort_index.hpp>
 
 namespace cpu
@@ -26,7 +27,7 @@ void sort_index(Array<T> &val, Array<uint> &idx, const Array<T> &in, const uint
     val = createEmptyArray<T>(in.dims());
     idx = createEmptyArray<uint>(in.dims());
     switch(dim) {
-        case 0: ENQUEUE(kernel::sort0_index<T, isAscending>, val, idx, in); break;
+        case 0: getQueue().enqueue(kernel::sort0_index<T, isAscending>, val, idx, in); break;
         default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
     }
 }
diff --git a/src/backend/cpu/surface.cpp b/src/backend/cpu/surface.cpp
index 00d2b00..24c945c 100644
--- a/src/backend/cpu/surface.cpp
+++ b/src/backend/cpu/surface.cpp
@@ -13,7 +13,8 @@
 #include <surface.hpp>
 #include <err_cpu.hpp>
 #include <graphics_common.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 using af::dim4;
 
diff --git a/src/backend/cpu/susan.cpp b/src/backend/cpu/susan.cpp
index 4f1c327..55a2357 100644
--- a/src/backend/cpu/susan.cpp
+++ b/src/backend/cpu/susan.cpp
@@ -12,7 +12,8 @@
 #include <cmath>
 #include <math.hpp>
 #include <memory>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 #include <kernel/susan.hpp>
 
 using af::features;
@@ -39,9 +40,9 @@ unsigned susan(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out,
     auto corners_found= std::shared_ptr<unsigned>(memAlloc<unsigned>(1), memFree<unsigned>);
     corners_found.get()[0] = 0;
 
-    ENQUEUE(kernel::susan_responses<T>, response, in, idims[0], idims[1],
+    getQueue().enqueue(kernel::susan_responses<T>, response, in, idims[0], idims[1],
                        radius, diff_thr, geom_thr, edge);
-    ENQUEUE(kernel::non_maximal<T>, x_corners, y_corners, resp_corners, corners_found,
+    getQueue().enqueue(kernel::non_maximal<T>, x_corners, y_corners, resp_corners, corners_found,
                        idims[0], idims[1], response, edge, corner_lim);
     getQueue().sync();
 
diff --git a/src/backend/cpu/svd.cpp b/src/backend/cpu/svd.cpp
index 3ce627c..2ac58aa 100644
--- a/src/backend/cpu/svd.cpp
+++ b/src/backend/cpu/svd.cpp
@@ -15,7 +15,8 @@
 #if defined(WITH_CPU_LINEAR_ALGEBRA)
 #include <lapack_helper.hpp>
 #include <copy.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <queue.hpp>
 
 namespace cpu
 {
@@ -86,7 +87,7 @@ void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
                 s.get(), u.get(), u.strides()[1], vt.get(), vt.strides()[1], &superb[0]);
 #endif
     };
-    ENQUEUE(func, s, u, vt, in);
+    getQueue().enqueue(func, s, u, vt, in);
 }
 
 template <typename T, typename Tr>
diff --git a/src/backend/cpu/tile.cpp b/src/backend/cpu/tile.cpp
index 9237a79..6526917 100644
--- a/src/backend/cpu/tile.cpp
+++ b/src/backend/cpu/tile.cpp
@@ -9,7 +9,8 @@
 
 #include <Array.hpp>
 #include <tile.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 #include <kernel/tile.hpp>
 
 namespace cpu
@@ -30,7 +31,7 @@ Array<T> tile(const Array<T> &in, const af::dim4 &tileDims)
 
     Array<T> out = createEmptyArray<T>(oDims);
 
-    ENQUEUE(kernel::tile<T>, out, in);
+    getQueue().enqueue(kernel::tile<T>, out, in);
 
     return out;
 }
diff --git a/src/backend/cpu/transform.cpp b/src/backend/cpu/transform.cpp
index 5874e7a..fc71458 100644
--- a/src/backend/cpu/transform.cpp
+++ b/src/backend/cpu/transform.cpp
@@ -10,7 +10,8 @@
 #include <Array.hpp>
 #include <transform.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 #include "transform_interp.hpp"
 #include <kernel/transform.hpp>
 
@@ -28,13 +29,13 @@ Array<T> transform(const Array<T> &in, const Array<float> &transform, const af::
 
     switch(method) {
         case AF_INTERP_NEAREST :
-            ENQUEUE(kernel::transform<T, AF_INTERP_NEAREST >, out, in, transform, inverse);
+            getQueue().enqueue(kernel::transform<T, AF_INTERP_NEAREST >, out, in, transform, inverse);
             break;
         case AF_INTERP_BILINEAR:
-            ENQUEUE(kernel::transform<T, AF_INTERP_BILINEAR>, out, in, transform, inverse);
+            getQueue().enqueue(kernel::transform<T, AF_INTERP_BILINEAR>, out, in, transform, inverse);
             break;
         case AF_INTERP_LOWER   :
-            ENQUEUE(kernel::transform<T, AF_INTERP_LOWER   >, out, in, transform, inverse);
+            getQueue().enqueue(kernel::transform<T, AF_INTERP_LOWER   >, out, in, transform, inverse);
             break;
         default: AF_ERROR("Unsupported interpolation type", AF_ERR_ARG); break;
     }
diff --git a/src/backend/cpu/transpose.cpp b/src/backend/cpu/transpose.cpp
index c1d5d1d..32663e1 100644
--- a/src/backend/cpu/transpose.cpp
+++ b/src/backend/cpu/transpose.cpp
@@ -12,7 +12,8 @@
 #include <ArrayInfo.hpp>
 #include <Array.hpp>
 #include <transpose.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 #include <kernel/transpose.hpp>
 #include <utility>
 #include <cassert>
@@ -32,7 +33,7 @@ Array<T> transpose(const Array<T> &in, const bool conjugate)
     // create an array with first two dimensions swapped
     Array<T> out  = createEmptyArray<T>(outDims);
 
-    ENQUEUE(kernel::transpose<T>, out, in, conjugate);
+    getQueue().enqueue(kernel::transpose<T>, out, in, conjugate);
 
     return out;
 }
@@ -41,7 +42,7 @@ template<typename T>
 void transpose_inplace(Array<T> &in, const bool conjugate)
 {
     in.eval();
-    ENQUEUE(kernel::transpose_inplace<T>, in, conjugate);
+    getQueue().enqueue(kernel::transpose_inplace<T>, in, conjugate);
 }
 
 #define INSTANTIATE(T)                                                      \
diff --git a/src/backend/cpu/triangle.cpp b/src/backend/cpu/triangle.cpp
index fbc7f65..2a9553c 100644
--- a/src/backend/cpu/triangle.cpp
+++ b/src/backend/cpu/triangle.cpp
@@ -12,7 +12,8 @@
 #include <Array.hpp>
 #include <triangle.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 #include <kernel/triangle.hpp>
 
 namespace cpu
@@ -21,7 +22,7 @@ namespace cpu
 template<typename T, bool is_upper, bool is_unit_diag>
 void triangle(Array<T> &out, const Array<T> &in)
 {
-    ENQUEUE(kernel::triangle<T, is_upper, is_unit_diag>, out, in);
+    getQueue().enqueue(kernel::triangle<T, is_upper, is_unit_diag>, out, in);
 }
 
 template<typename T, bool is_upper, bool is_unit_diag>
diff --git a/src/backend/cpu/unwrap.cpp b/src/backend/cpu/unwrap.cpp
index d40acde..1aa37a4 100644
--- a/src/backend/cpu/unwrap.cpp
+++ b/src/backend/cpu/unwrap.cpp
@@ -11,7 +11,8 @@
 #include <unwrap.hpp>
 #include <dispatch.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 #include <kernel/unwrap.hpp>
 
 namespace cpu
@@ -36,9 +37,9 @@ Array<T> unwrap(const Array<T> &in, const dim_t wx, const dim_t wy,
     Array<T> outArray = createEmptyArray<T>(odims);
 
     if (is_column) {
-        ENQUEUE(kernel::unwrap_dim<T, 1>, outArray, in, wx, wy, sx, sy, px, py);
+        getQueue().enqueue(kernel::unwrap_dim<T, 1>, outArray, in, wx, wy, sx, sy, px, py);
     } else {
-        ENQUEUE(kernel::unwrap_dim<T, 0>, outArray, in, wx, wy, sx, sy, px, py);
+        getQueue().enqueue(kernel::unwrap_dim<T, 0>, outArray, in, wx, wy, sx, sy, px, py);
     }
 
     return outArray;
diff --git a/src/backend/cpu/where.cpp b/src/backend/cpu/where.cpp
index 734b768..018cbdf 100644
--- a/src/backend/cpu/where.cpp
+++ b/src/backend/cpu/where.cpp
@@ -16,7 +16,8 @@
 #include <where.hpp>
 #include <ops.hpp>
 #include <vector>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 using af::dim4;
 
diff --git a/src/backend/cpu/wrap.cpp b/src/backend/cpu/wrap.cpp
index 87de234..07487e0 100644
--- a/src/backend/cpu/wrap.cpp
+++ b/src/backend/cpu/wrap.cpp
@@ -11,7 +11,8 @@
 #include <wrap.hpp>
 #include <dispatch.hpp>
 #include <math.hpp>
-#include <debug_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 #include <kernel/wrap.hpp>
 
 namespace cpu
@@ -33,9 +34,9 @@ Array<T> wrap(const Array<T> &in,
     in.eval();
 
     if (is_column) {
-        ENQUEUE(kernel::wrap_dim<T, 1>, out, in, wx, wy, sx, sy, px, py);
+        getQueue().enqueue(kernel::wrap_dim<T, 1>, out, in, wx, wy, sx, sy, px, py);
     } else {
-        ENQUEUE(kernel::wrap_dim<T, 0>, out, in, wx, wy, sx, sy, px, py);
+        getQueue().enqueue(kernel::wrap_dim<T, 0>, out, in, wx, wy, sx, sy, px, py);
     }
 
     return out;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list