[arrayfire] 109/284: Added ENQUEUE macro in cpu backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:24 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 95d934613425559fa9048433bfe77bb8f151c18f
Author: pradeep <pradeep at arrayfire.com>
Date: Mon Dec 28 17:46:06 2015 -0500
Added ENQUEUE macro in cpu backend
this macro takes care of asynchronous kernel launch and calls
sync on the queue when in debug mode.
---
src/backend/cpu/Array.cpp | 5 ++---
src/backend/cpu/approx.cpp | 19 +++++++++----------
src/backend/cpu/assign.cpp | 5 ++---
src/backend/cpu/bilateral.cpp | 5 ++---
src/backend/cpu/blas.cpp | 13 ++++++-------
src/backend/cpu/cholesky.cpp | 5 ++---
src/backend/cpu/convolve.cpp | 7 +++----
src/backend/cpu/copy.cpp | 9 ++++-----
src/backend/cpu/debug_cpu.hpp | 31 +++++++++++++++++++++++++++++++
src/backend/cpu/diagonal.cpp | 7 +++----
src/backend/cpu/diff.cpp | 7 +++----
src/backend/cpu/fast.cpp | 3 +--
src/backend/cpu/fft.cpp | 9 ++++-----
src/backend/cpu/fftconvolve.cpp | 15 +++++++--------
src/backend/cpu/gradient.cpp | 5 ++---
src/backend/cpu/harris.cpp | 13 ++++++-------
src/backend/cpu/hist_graphics.cpp | 3 +--
src/backend/cpu/histogram.cpp | 5 ++---
src/backend/cpu/homography.cpp | 3 +--
src/backend/cpu/hsv_rgb.cpp | 7 +++----
src/backend/cpu/identity.cpp | 5 ++---
src/backend/cpu/iir.cpp | 5 ++---
src/backend/cpu/image.cpp | 3 +--
src/backend/cpu/index.cpp | 5 ++---
src/backend/cpu/inverse.cpp | 5 ++---
src/backend/cpu/iota.cpp | 5 ++---
src/backend/cpu/ireduce.cpp | 5 ++---
src/backend/cpu/join.cpp | 25 ++++++++++++-------------
src/backend/cpu/lookup.cpp | 5 ++---
src/backend/cpu/lu.cpp | 9 ++++-----
src/backend/cpu/match_template.cpp | 5 ++---
src/backend/cpu/meanshift.cpp | 5 ++---
src/backend/cpu/medfilt.cpp | 5 ++---
src/backend/cpu/memory.cpp | 3 +--
src/backend/cpu/morph.cpp | 7 +++----
src/backend/cpu/nearest_neighbour.cpp | 9 ++++-----
src/backend/cpu/orb.cpp | 3 +--
src/backend/cpu/platform.cpp | 3 +--
src/backend/cpu/plot.cpp | 3 +--
src/backend/cpu/plot3.cpp | 3 +--
src/backend/cpu/qr.cpp | 7 +++----
src/backend/cpu/random.cpp | 11 +++++------
src/backend/cpu/range.cpp | 11 +++++------
src/backend/cpu/reduce.cpp | 5 ++---
src/backend/cpu/regions.cpp | 5 ++---
src/backend/cpu/reorder.cpp | 5 ++---
src/backend/cpu/resize.cpp | 9 ++++-----
src/backend/cpu/rotate.cpp | 9 ++++-----
src/backend/cpu/scan.cpp | 11 +++++------
src/backend/cpu/select.cpp | 7 +++----
src/backend/cpu/set.cpp | 3 +--
src/backend/cpu/shift.cpp | 5 ++---
src/backend/cpu/sobel.cpp | 7 +++----
src/backend/cpu/solve.cpp | 11 +++++------
src/backend/cpu/sort.cpp | 5 ++---
src/backend/cpu/sort_by_key.cpp | 5 ++---
src/backend/cpu/sort_index.cpp | 5 ++---
src/backend/cpu/surface.cpp | 3 +--
src/backend/cpu/susan.cpp | 7 +++----
src/backend/cpu/svd.cpp | 5 ++---
src/backend/cpu/tile.cpp | 5 ++---
src/backend/cpu/transform.cpp | 9 ++++-----
src/backend/cpu/transpose.cpp | 7 +++----
src/backend/cpu/triangle.cpp | 5 ++---
src/backend/cpu/unwrap.cpp | 7 +++----
src/backend/cpu/where.cpp | 3 +--
src/backend/cpu/wrap.cpp | 7 +++----
67 files changed, 219 insertions(+), 254 deletions(-)
diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 40d25ac..34c99e4 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -15,8 +15,7 @@
#include <TNJ/BufferNode.hpp>
#include <TNJ/ScalarNode.hpp>
#include <memory.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <cstring>
#include <cstddef>
@@ -78,7 +77,7 @@ void Array<T>::eval()
data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
- getQueue().enqueue(kernel::evalArray<T>, *this);
+ ENQUEUE(kernel::evalArray<T>, *this);
ready = true;
Node_ptr prev = node;
diff --git a/src/backend/cpu/approx.cpp b/src/backend/cpu/approx.cpp
index 7e65486..57d3cc4 100644
--- a/src/backend/cpu/approx.cpp
+++ b/src/backend/cpu/approx.cpp
@@ -11,8 +11,7 @@
#include <approx.hpp>
#include <kernel/approx1.hpp>
#include <kernel/approx2.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
@@ -31,12 +30,12 @@ Array<Ty> approx1(const Array<Ty> &in, const Array<Tp> &pos,
switch(method) {
case AF_INTERP_NEAREST:
- getQueue().enqueue(kernel::approx1<Ty, Tp, AF_INTERP_NEAREST>,
- out, in, pos, offGrid);
+ ENQUEUE(kernel::approx1<Ty, Tp, AF_INTERP_NEAREST>,
+ out, in, pos, offGrid);
break;
case AF_INTERP_LINEAR:
- getQueue().enqueue(kernel::approx1<Ty, Tp, AF_INTERP_LINEAR>,
- out, in, pos, offGrid);
+ ENQUEUE(kernel::approx1<Ty, Tp, AF_INTERP_LINEAR>,
+ out, in, pos, offGrid);
break;
default:
break;
@@ -61,12 +60,12 @@ Array<Ty> approx2(const Array<Ty> &in, const Array<Tp> &pos0, const Array<Tp> &p
switch(method) {
case AF_INTERP_NEAREST:
- getQueue().enqueue(kernel::approx2<Ty, Tp, AF_INTERP_NEAREST>,
- out, in, pos0, pos1, offGrid);
+ ENQUEUE(kernel::approx2<Ty, Tp, AF_INTERP_NEAREST>,
+ out, in, pos0, pos1, offGrid);
break;
case AF_INTERP_LINEAR:
- getQueue().enqueue(kernel::approx2<Ty, Tp, AF_INTERP_LINEAR>,
- out, in, pos0, pos1, offGrid);
+ ENQUEUE(kernel::approx2<Ty, Tp, AF_INTERP_LINEAR>,
+ out, in, pos0, pos1, offGrid);
break;
default:
break;
diff --git a/src/backend/cpu/assign.cpp b/src/backend/cpu/assign.cpp
index 95bb7e5..df90344 100644
--- a/src/backend/cpu/assign.cpp
+++ b/src/backend/cpu/assign.cpp
@@ -14,8 +14,7 @@
#include <handle.hpp>
#include <kernel/assign.hpp>
#include <assign.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
@@ -48,7 +47,7 @@ void assign(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs)
}
}
- getQueue().enqueue(kernel::assign<T>, out, rhs, std::move(isSeq),
+ ENQUEUE(kernel::assign<T>, out, rhs, std::move(isSeq),
std::move(seqs), std::move(idxArrs));
}
diff --git a/src/backend/cpu/bilateral.cpp b/src/backend/cpu/bilateral.cpp
index bc3ad6e..ceb8be9 100644
--- a/src/backend/cpu/bilateral.cpp
+++ b/src/backend/cpu/bilateral.cpp
@@ -15,8 +15,7 @@
#include <bilateral.hpp>
#include <cmath>
#include <algorithm>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
using af::dim4;
@@ -29,7 +28,7 @@ Array<outType> bilateral(const Array<inType> &in, const float &s_sigma, const fl
in.eval();
const dim4 dims = in.dims();
Array<outType> out = createEmptyArray<outType>(dims);
- getQueue().enqueue(kernel::bilateral<outType, inType, isColor>, out, in, s_sigma, c_sigma);
+ ENQUEUE(kernel::bilateral<outType, inType, isColor>, out, in, s_sigma, c_sigma);
return out;
}
diff --git a/src/backend/cpu/blas.cpp b/src/backend/cpu/blas.cpp
index d6f5dee..70c8d9c 100644
--- a/src/backend/cpu/blas.cpp
+++ b/src/backend/cpu/blas.cpp
@@ -13,8 +13,7 @@
#include <cassert>
#include <err_common.hpp>
#include <kernel/dot.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
@@ -194,7 +193,7 @@ Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
reinterpret_cast<BT*>(output.get()), output.dims()[0]);
}
};
- getQueue().enqueue(func, out, lhs, rhs);
+ ENQUEUE(func, out, lhs, rhs);
return out;
}
@@ -208,13 +207,13 @@ Array<T> dot(const Array<T> &lhs, const Array<T> &rhs,
Array<T> out = createEmptyArray<T>(af::dim4(1));
if(optLhs == AF_MAT_CONJ && optRhs == AF_MAT_CONJ) {
- getQueue().enqueue(kernel::dot<T, false, true>, out, lhs, rhs, optLhs, optRhs);
+ ENQUEUE(kernel::dot<T, false, true>, out, lhs, rhs, optLhs, optRhs);
} else if (optLhs == AF_MAT_CONJ && optRhs == AF_MAT_NONE) {
- getQueue().enqueue(kernel::dot<T, true, false>,out, lhs, rhs, optLhs, optRhs);
+ ENQUEUE(kernel::dot<T, true, false>,out, lhs, rhs, optLhs, optRhs);
} else if (optLhs == AF_MAT_NONE && optRhs == AF_MAT_CONJ) {
- getQueue().enqueue(kernel::dot<T, true, false>,out, rhs, lhs, optRhs, optLhs);
+ ENQUEUE(kernel::dot<T, true, false>,out, rhs, lhs, optRhs, optLhs);
} else {
- getQueue().enqueue(kernel::dot<T, false, false>,out, lhs, rhs, optLhs, optRhs);
+ ENQUEUE(kernel::dot<T, false, false>,out, lhs, rhs, optLhs, optRhs);
}
return out;
}
diff --git a/src/backend/cpu/cholesky.cpp b/src/backend/cpu/cholesky.cpp
index ce11867..b21d9c8 100644
--- a/src/backend/cpu/cholesky.cpp
+++ b/src/backend/cpu/cholesky.cpp
@@ -19,8 +19,7 @@
#include <err_cpu.hpp>
#include <triangle.hpp>
#include <lapack_helper.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
@@ -75,7 +74,7 @@ int cholesky_inplace(Array<T> &in, const bool is_upper)
info = potrf_func<T>()(AF_LAPACK_COL_MAJOR, uplo, N, in.get(), in.strides()[1]);
};
- getQueue().enqueue(func, info, in);
+ ENQUEUE(func, info, in);
getQueue().sync();
return info;
diff --git a/src/backend/cpu/convolve.cpp b/src/backend/cpu/convolve.cpp
index 218ba8e..cf241c3 100644
--- a/src/backend/cpu/convolve.cpp
+++ b/src/backend/cpu/convolve.cpp
@@ -14,8 +14,7 @@
#include <convolve.hpp>
#include <err_cpu.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/convolve.hpp>
using af::dim4;
@@ -51,7 +50,7 @@ Array<T> convolve(Array<T> const& signal, Array<accT> const& filter, ConvolveBat
Array<T> out = createEmptyArray<T>(oDims);
- getQueue().enqueue(kernel::convolve_nd<T, accT, baseDim, expand>,out, signal, filter, kind);
+ ENQUEUE(kernel::convolve_nd<T, accT, baseDim, expand>,out, signal, filter, kind);
return out;
}
@@ -81,7 +80,7 @@ Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<ac
Array<T> out = createEmptyArray<T>(oDims);
- getQueue().enqueue(kernel::convolve2<T, accT, expand>, out, signal, c_filter, r_filter, tDims);
+ ENQUEUE(kernel::convolve2<T, accT, expand>, out, signal, c_filter, r_filter, tDims);
return out;
}
diff --git a/src/backend/cpu/copy.cpp b/src/backend/cpu/copy.cpp
index 9f6068d..8085a0f 100644
--- a/src/backend/cpu/copy.cpp
+++ b/src/backend/cpu/copy.cpp
@@ -18,8 +18,7 @@
#include <cassert>
#include <err_cpu.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/copy.hpp>
namespace cpu
@@ -51,7 +50,7 @@ template<typename T>
void multiply_inplace(Array<T> &in, double val)
{
in.eval();
- getQueue().enqueue(kernel::copy<T, T>, in, in, 0, val);
+ ENQUEUE(kernel::copy<T, T>, in, in, 0, val);
}
template<typename inType, typename outType>
@@ -63,7 +62,7 @@ Array<outType> padArray(Array<inType> const &in, dim4 const &dims,
in.eval();
// FIXME:
getQueue().sync();
- getQueue().enqueue(kernel::copy<outType, inType>, ret, in, outType(default_value), factor);
+ ENQUEUE(kernel::copy<outType, inType>, ret, in, outType(default_value), factor);
return ret;
}
@@ -72,7 +71,7 @@ void copyArray(Array<outType> &out, Array<inType> const &in)
{
out.eval();
in.eval();
- getQueue().enqueue(kernel::copy<outType, inType>, out, in, scalar<outType>(0), 1.0);
+ ENQUEUE(kernel::copy<outType, inType>, out, in, scalar<outType>(0), 1.0);
}
#define INSTANTIATE(T) \
diff --git a/src/backend/cpu/debug_cpu.hpp b/src/backend/cpu/debug_cpu.hpp
new file mode 100644
index 0000000..b1d8e17
--- /dev/null
+++ b/src/backend/cpu/debug_cpu.hpp
@@ -0,0 +1,31 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+#include <platform.hpp>
+#include <async_queue.hpp>
+#include <err_cpu.hpp>
+
+#ifndef NDEBUG
+
+#define POST_LAUNCH_CHECK() do { \
+ getQueue().sync(); \
+ } while(0) \
+
+#else
+
+#define POST_LAUNCH_CHECK() //no-op
+
+#endif
+
+#define ENQUEUE(...) \
+ do { \
+ getQueue().enqueue(__VA_ARGS__); \
+ POST_LAUNCH_CHECK(); \
+ } while(0)
diff --git a/src/backend/cpu/diagonal.cpp b/src/backend/cpu/diagonal.cpp
index 6c20f2e..6fd918d 100644
--- a/src/backend/cpu/diagonal.cpp
+++ b/src/backend/cpu/diagonal.cpp
@@ -15,8 +15,7 @@
#include <diagonal.hpp>
#include <math.hpp>
#include <err_cpu.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/diagonal.hpp>
namespace cpu
@@ -31,7 +30,7 @@ Array<T> diagCreate(const Array<T> &in, const int num)
int batch = in.dims()[1];
Array<T> out = createEmptyArray<T>(dim4(size, size, batch));
- getQueue().enqueue(kernel::diagCreate<T>, out, in, num);
+ ENQUEUE(kernel::diagCreate<T>, out, in, num);
return out;
}
@@ -45,7 +44,7 @@ Array<T> diagExtract(const Array<T> &in, const int num)
dim_t size = std::max(idims[0], idims[1]) - std::abs(num);
Array<T> out = createEmptyArray<T>(dim4(size, 1, idims[2], idims[3]));
- getQueue().enqueue(kernel::diagExtract<T>, out, in, num);
+ ENQUEUE(kernel::diagExtract<T>, out, in, num);
return out;
}
diff --git a/src/backend/cpu/diff.cpp b/src/backend/cpu/diff.cpp
index 3f639ca..efab130 100644
--- a/src/backend/cpu/diff.cpp
+++ b/src/backend/cpu/diff.cpp
@@ -9,8 +9,7 @@
#include <Array.hpp>
#include <diff.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/diff.hpp>
namespace cpu
@@ -27,7 +26,7 @@ Array<T> diff1(const Array<T> &in, const int dim)
Array<T> outArray = createEmptyArray<T>(dims);
- getQueue().enqueue(kernel::diff1<T>, outArray, in, dim);
+ ENQUEUE(kernel::diff1<T>, outArray, in, dim);
return outArray;
}
@@ -43,7 +42,7 @@ Array<T> diff2(const Array<T> &in, const int dim)
Array<T> outArray = createEmptyArray<T>(dims);
- getQueue().enqueue(kernel::diff2<T>, outArray, in, dim);
+ ENQUEUE(kernel::diff2<T>, outArray, in, dim);
return outArray;
}
diff --git a/src/backend/cpu/fast.cpp b/src/backend/cpu/fast.cpp
index 42607d8..1b3a7aa 100644
--- a/src/backend/cpu/fast.cpp
+++ b/src/backend/cpu/fast.cpp
@@ -14,8 +14,7 @@
#include <err_cpu.hpp>
#include <handle.hpp>
#include <fast.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/fast.hpp>
using af::dim4;
diff --git a/src/backend/cpu/fft.cpp b/src/backend/cpu/fft.cpp
index 2edced2..1282963 100644
--- a/src/backend/cpu/fft.cpp
+++ b/src/backend/cpu/fft.cpp
@@ -15,8 +15,7 @@
#include <kernel/fft.hpp>
#include <copy.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
using af::dim4;
@@ -27,7 +26,7 @@ template<typename T, int rank, bool direction>
void fft_inplace(Array<T> &in)
{
in.eval();
- getQueue().enqueue(kernel::fft_inplace<T, rank, direction>, in);
+ ENQUEUE(kernel::fft_inplace<T, rank, direction>, in);
}
template<typename Tc, typename Tr, int rank>
@@ -39,7 +38,7 @@ Array<Tc> fft_r2c(const Array<Tr> &in)
odims[0] = odims[0] / 2 + 1;
Array<Tc> out = createEmptyArray<Tc>(odims);
- getQueue().enqueue(kernel::fft_r2c<Tc, Tr, rank>, out, in);
+ ENQUEUE(kernel::fft_r2c<Tc, Tr, rank>, out, in);
return out;
}
@@ -50,7 +49,7 @@ Array<Tr> fft_c2r(const Array<Tc> &in, const dim4 &odims)
in.eval();
Array<Tr> out = createEmptyArray<Tr>(odims);
- getQueue().enqueue(kernel::fft_c2r<Tr, Tc, rank>, out, in, odims);
+ ENQUEUE(kernel::fft_c2r<Tr, Tc, rank>, out, in, odims);
return out;
}
diff --git a/src/backend/cpu/fftconvolve.cpp b/src/backend/cpu/fftconvolve.cpp
index c0a9a41..aac66cd 100644
--- a/src/backend/cpu/fftconvolve.cpp
+++ b/src/backend/cpu/fftconvolve.cpp
@@ -17,8 +17,7 @@
#include <fftw3.h>
#include <copy.hpp>
#include <convolve_common.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/fftconvolve.hpp>
namespace cpu
@@ -84,11 +83,11 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
// Pack signal in a complex matrix where first dimension is half the input
// (allows faster FFT computation) and pad array to a power of 2 with 0s
- getQueue().enqueue(kernel::packData<convT, T>, packed, sig_tmp_dims, sig_tmp_strides, signal);
+ ENQUEUE(kernel::packData<convT, T>, packed, sig_tmp_dims, sig_tmp_strides, signal);
// Pad filter array with 0s
const dim_t offset = sig_tmp_strides[3]*sig_tmp_dims[3];
- getQueue().enqueue(kernel::padArray<convT, T>, packed, filter_tmp_dims, filter_tmp_strides,
+ ENQUEUE(kernel::padArray<convT, T>, packed, filter_tmp_dims, filter_tmp_strides,
filter, offset);
dim4 fftDims(1, 1, 1, 1);
@@ -138,10 +137,10 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
fftwf_destroy_plan(plan);
}
};
- getQueue().enqueue(upstream_dft, packed, fftDims);
+ ENQUEUE(upstream_dft, packed, fftDims);
// Multiply filter and signal FFT arrays
- getQueue().enqueue(kernel::complexMultiply<convT>, packed,
+ ENQUEUE(kernel::complexMultiply<convT>, packed,
sig_tmp_dims, sig_tmp_strides,
filter_tmp_dims, filter_tmp_strides,
kind, offset);
@@ -189,7 +188,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
fftwf_destroy_plan(plan);
}
};
- getQueue().enqueue(upstream_idft, packed, fftDims);
+ ENQUEUE(upstream_idft, packed, fftDims);
// Compute output dimensions
dim4 oDims(1);
@@ -211,7 +210,7 @@ Array<T> fftconvolve(Array<T> const& signal, Array<T> const& filter,
Array<T> out = createEmptyArray<T>(oDims);
- getQueue().enqueue(kernel::reorder<T, convT, roundOut, baseDim>, out, packed, filter,
+ ENQUEUE(kernel::reorder<T, convT, roundOut, baseDim>, out, packed, filter,
sig_half_d0, fftScale, sig_tmp_dims, sig_tmp_strides, filter_tmp_dims,
filter_tmp_strides, expand, kind);
diff --git a/src/backend/cpu/gradient.cpp b/src/backend/cpu/gradient.cpp
index d1a8b0d..57776e5 100644
--- a/src/backend/cpu/gradient.cpp
+++ b/src/backend/cpu/gradient.cpp
@@ -12,8 +12,7 @@
#include <math.hpp>
#include <stdexcept>
#include <err_cpu.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/gradient.hpp>
namespace cpu
@@ -26,7 +25,7 @@ void gradient(Array<T> &grad0, Array<T> &grad1, const Array<T> &in)
grad1.eval();
in.eval();
- getQueue().enqueue(kernel::gradient<T>, grad0, grad1, in);
+ ENQUEUE(kernel::gradient<T>, grad0, grad1, in);
}
#define INSTANTIATE(T) \
diff --git a/src/backend/cpu/harris.cpp b/src/backend/cpu/harris.cpp
index 905b046..07b9bed 100644
--- a/src/backend/cpu/harris.cpp
+++ b/src/backend/cpu/harris.cpp
@@ -18,8 +18,7 @@
#include <gradient.hpp>
#include <sort_index.hpp>
#include <cstring>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/harris.hpp>
using af::dim4;
@@ -53,14 +52,14 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
Array<T> iy = createEmptyArray<T>(idims);
// Compute first order derivatives
- getQueue().enqueue(gradient<T>, iy, ix, in);
+ ENQUEUE(gradient<T>, iy, ix, in);
Array<T> ixx = createEmptyArray<T>(idims);
Array<T> ixy = createEmptyArray<T>(idims);
Array<T> iyy = createEmptyArray<T>(idims);
// Compute second-order derivatives
- getQueue().enqueue(kernel::second_order_deriv<T>, ixx, ixy, iyy, in.elements(), ix, iy);
+ ENQUEUE(kernel::second_order_deriv<T>, ixx, ixy, iyy, in.elements(), ix, iy);
// Convolve second-order derivatives with proper window filter
ixx = convolve2<T, convAccT, false>(ixx, filter, filter);
@@ -71,7 +70,7 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
Array<T> responses = createEmptyArray<T>(dim4(in.elements()));
- getQueue().enqueue(kernel::harris_responses<T>, responses, idims[0], idims[1],
+ ENQUEUE(kernel::harris_responses<T>, responses, idims[0], idims[1],
ixx, ixy, iyy, k_thr, border_len);
Array<float> xCorners = createEmptyArray<float>(dim4(corner_lim));
@@ -105,7 +104,7 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
resp_out = createEmptyArray<float>(dim4(corners_out));
// Keep only the corners with higher Harris responses
- getQueue().enqueue(kernel::keep_corners, x_out, y_out, resp_out, xCorners, yCorners,
+ ENQUEUE(kernel::keep_corners, x_out, y_out, resp_out, xCorners, yCorners,
harris_sorted, harris_idx, corners_out);
} else if (max_corners == 0 && corners_found < corner_lim) {
x_out = createEmptyArray<float>(dim4(corners_out));
@@ -120,7 +119,7 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
memcpy(y_out.get(), y_crnrs.get(), corners_out * sizeof(float));
memcpy(outResponses.get(), inResponses.get(), corners_out * sizeof(float));
};
- getQueue().enqueue(copyFunc, x_out, y_out, resp_out,
+ ENQUEUE(copyFunc, x_out, y_out, resp_out,
xCorners, yCorners, respCorners, corners_out);
} else {
x_out = xCorners;
diff --git a/src/backend/cpu/hist_graphics.cpp b/src/backend/cpu/hist_graphics.cpp
index 56f7646..c58f5c6 100644
--- a/src/backend/cpu/hist_graphics.cpp
+++ b/src/backend/cpu/hist_graphics.cpp
@@ -11,8 +11,7 @@
#include <hist_graphics.hpp>
#include <err_cpu.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
diff --git a/src/backend/cpu/histogram.cpp b/src/backend/cpu/histogram.cpp
index 19314e0..2571f3e 100644
--- a/src/backend/cpu/histogram.cpp
+++ b/src/backend/cpu/histogram.cpp
@@ -12,8 +12,7 @@
#include <ArrayInfo.hpp>
#include <Array.hpp>
#include <histogram.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/histogram.hpp>
using af::dim4;
@@ -32,7 +31,7 @@ Array<outType> histogram(const Array<inType> &in,
Array<outType> out = createValueArray<outType>(outDims, outType(0));
out.eval();
- getQueue().enqueue(kernel::histogram<outType, inType, isLinear>,
+ ENQUEUE(kernel::histogram<outType, inType, isLinear>,
out, in, nbins, minval, maxval);
return out;
diff --git a/src/backend/cpu/homography.cpp b/src/backend/cpu/homography.cpp
index d936e21..147f5e8 100644
--- a/src/backend/cpu/homography.cpp
+++ b/src/backend/cpu/homography.cpp
@@ -18,8 +18,7 @@
#include <random.hpp>
#include <cstring>
#include <cfloat>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
using af::dim4;
diff --git a/src/backend/cpu/hsv_rgb.cpp b/src/backend/cpu/hsv_rgb.cpp
index c0f19db..da5dbe0 100644
--- a/src/backend/cpu/hsv_rgb.cpp
+++ b/src/backend/cpu/hsv_rgb.cpp
@@ -11,8 +11,7 @@
#include <Array.hpp>
#include <ArrayInfo.hpp>
#include <hsv_rgb.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/hsv_rgb.hpp>
using af::dim4;
@@ -27,7 +26,7 @@ Array<T> hsv2rgb(const Array<T>& in)
Array<T> out = createEmptyArray<T>(in.dims());
- getQueue().enqueue(kernel::hsv2rgb<T>, out, in);
+ ENQUEUE(kernel::hsv2rgb<T>, out, in);
return out;
}
@@ -39,7 +38,7 @@ Array<T> rgb2hsv(const Array<T>& in)
Array<T> out = createEmptyArray<T>(in.dims());
- getQueue().enqueue(kernel::rgb2hsv<T>, out, in);
+ ENQUEUE(kernel::rgb2hsv<T>, out, in);
return out;
}
diff --git a/src/backend/cpu/identity.cpp b/src/backend/cpu/identity.cpp
index 949fced..071bb04 100644
--- a/src/backend/cpu/identity.cpp
+++ b/src/backend/cpu/identity.cpp
@@ -10,8 +10,7 @@
#include <af/dim4.hpp>
#include <Array.hpp>
#include <identity.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/identity.hpp>
namespace cpu
@@ -22,7 +21,7 @@ Array<T> identity(const dim4& dims)
{
Array<T> out = createEmptyArray<T>(dims);
- getQueue().enqueue(kernel::identity<T>, out);
+ ENQUEUE(kernel::identity<T>, out);
return out;
}
diff --git a/src/backend/cpu/iir.cpp b/src/backend/cpu/iir.cpp
index 225f39b..cb390b3 100644
--- a/src/backend/cpu/iir.cpp
+++ b/src/backend/cpu/iir.cpp
@@ -13,8 +13,7 @@
#include <Array.hpp>
#include <iir.hpp>
#include <convolve.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/iir.hpp>
using af::dim4;
@@ -42,7 +41,7 @@ Array<T> iir(const Array<T> &b, const Array<T> &a, const Array<T> &x)
Array<T> y = createEmptyArray<T>(c.dims());
- getQueue().enqueue(kernel::iir<T>, y, c, a);
+ ENQUEUE(kernel::iir<T>, y, c, a);
return y;
}
diff --git a/src/backend/cpu/image.cpp b/src/backend/cpu/image.cpp
index 767f9d4..d23ba80 100644
--- a/src/backend/cpu/image.cpp
+++ b/src/backend/cpu/image.cpp
@@ -16,8 +16,7 @@
#include <image.hpp>
#include <err_cpu.hpp>
#include <graphics_common.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
using af::dim4;
diff --git a/src/backend/cpu/index.cpp b/src/backend/cpu/index.cpp
index bd569de..9c951ff 100644
--- a/src/backend/cpu/index.cpp
+++ b/src/backend/cpu/index.cpp
@@ -14,8 +14,7 @@
#include <index.hpp>
#include <handle.hpp>
#include <vector>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <utility>
#include <kernel/index.hpp>
@@ -58,7 +57,7 @@ Array<T> index(const Array<T>& in, const af_index_t idxrs[])
Array<T> out = createEmptyArray<T>(oDims);
- getQueue().enqueue(kernel::index<T>, out, in, std::move(isSeq), std::move(seqs), std::move(idxArrs));
+ ENQUEUE(kernel::index<T>, out, in, std::move(isSeq), std::move(seqs), std::move(idxArrs));
return out;
}
diff --git a/src/backend/cpu/inverse.cpp b/src/backend/cpu/inverse.cpp
index 987ba01..71cc9fe 100644
--- a/src/backend/cpu/inverse.cpp
+++ b/src/backend/cpu/inverse.cpp
@@ -23,8 +23,7 @@
#include <lu.hpp>
#include <identity.hpp>
#include <solve.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
@@ -68,7 +67,7 @@ Array<T> inverse(const Array<T> &in)
A.get(), A.strides()[1],
pivot.get());
};
- getQueue().enqueue(func, A, pivot, M);
+ ENQUEUE(func, A, pivot, M);
return A;
}
diff --git a/src/backend/cpu/iota.cpp b/src/backend/cpu/iota.cpp
index 41f0c9c..124ec5c 100644
--- a/src/backend/cpu/iota.cpp
+++ b/src/backend/cpu/iota.cpp
@@ -10,8 +10,7 @@
#include <Array.hpp>
#include <iota.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/iota.hpp>
using namespace std;
@@ -26,7 +25,7 @@ Array<T> iota(const dim4 &dims, const dim4 &tile_dims)
Array<T> out = createEmptyArray<T>(outdims);
- getQueue().enqueue(kernel::iota<T>, out, dims, tile_dims);
+ ENQUEUE(kernel::iota<T>, out, dims, tile_dims);
return out;
}
diff --git a/src/backend/cpu/ireduce.cpp b/src/backend/cpu/ireduce.cpp
index f1efcf6..9de4a78 100644
--- a/src/backend/cpu/ireduce.cpp
+++ b/src/backend/cpu/ireduce.cpp
@@ -13,8 +13,7 @@
#include <ArrayInfo.hpp>
#include <Array.hpp>
#include <ireduce.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/ireduce.hpp>
using af::dim4;
@@ -40,7 +39,7 @@ void ireduce(Array<T> &out, Array<uint> &loc, const Array<T> &in, const int dim)
, kernel::ireduce_dim<op, T, 3>()
, kernel::ireduce_dim<op, T, 4>()};
- getQueue().enqueue(ireduce_funcs[in.ndims() - 1], out, loc, 0, in, 0, dim);
+ ENQUEUE(ireduce_funcs[in.ndims() - 1], out, loc, 0, in, 0, dim);
}
template<af_op_t op, typename T>
diff --git a/src/backend/cpu/join.cpp b/src/backend/cpu/join.cpp
index e39280c..6c9ba8f 100644
--- a/src/backend/cpu/join.cpp
+++ b/src/backend/cpu/join.cpp
@@ -9,8 +9,7 @@
#include <Array.hpp>
#include <join.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/join.hpp>
namespace cpu
@@ -38,7 +37,7 @@ Array<Tx> join(const int dim, const Array<Tx> &first, const Array<Ty> &second)
Array<Tx> out = createEmptyArray<Tx>(odims);
- getQueue().enqueue(kernel::join<Tx, Ty>, out, dim, first, second);
+ ENQUEUE(kernel::join<Tx, Ty>, out, dim, first, second);
return out;
}
@@ -72,34 +71,34 @@ Array<T> join(const int dim, const std::vector<Array<T>> &inputs)
switch(n_arrays) {
case 1:
- getQueue().enqueue(kernel::join<T, 1>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 1>, dim, out, inputs);
break;
case 2:
- getQueue().enqueue(kernel::join<T, 2>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 2>, dim, out, inputs);
break;
case 3:
- getQueue().enqueue(kernel::join<T, 3>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 3>, dim, out, inputs);
break;
case 4:
- getQueue().enqueue(kernel::join<T, 4>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 4>, dim, out, inputs);
break;
case 5:
- getQueue().enqueue(kernel::join<T, 5>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 5>, dim, out, inputs);
break;
case 6:
- getQueue().enqueue(kernel::join<T, 6>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 6>, dim, out, inputs);
break;
case 7:
- getQueue().enqueue(kernel::join<T, 7>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 7>, dim, out, inputs);
break;
case 8:
- getQueue().enqueue(kernel::join<T, 8>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 8>, dim, out, inputs);
break;
case 9:
- getQueue().enqueue(kernel::join<T, 9>, dim, out, inputs);
+ ENQUEUE(kernel::join<T, 9>, dim, out, inputs);
break;
case 10:
- getQueue().enqueue(kernel::join<T,10>, dim, out, inputs);
+ ENQUEUE(kernel::join<T,10>, dim, out, inputs);
break;
}
diff --git a/src/backend/cpu/lookup.cpp b/src/backend/cpu/lookup.cpp
index 457cdae..4cc5359 100644
--- a/src/backend/cpu/lookup.cpp
+++ b/src/backend/cpu/lookup.cpp
@@ -9,8 +9,7 @@
#include <lookup.hpp>
#include <cstdlib>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/lookup.hpp>
namespace cpu
@@ -30,7 +29,7 @@ Array<in_t> lookup(const Array<in_t> &input, const Array<idx_t> &indices, const
Array<in_t> out = createEmptyArray<in_t>(oDims);
- getQueue().enqueue(kernel::lookup<in_t, idx_t>, out, input, indices, dim);
+ ENQUEUE(kernel::lookup<in_t, idx_t>, out, input, indices, dim);
return out;
}
diff --git a/src/backend/cpu/lu.cpp b/src/backend/cpu/lu.cpp
index f0e1593..551c9c9 100644
--- a/src/backend/cpu/lu.cpp
+++ b/src/backend/cpu/lu.cpp
@@ -17,8 +17,7 @@
#include <cassert>
#include <range.hpp>
#include <lapack_helper.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/lu.hpp>
namespace cpu
@@ -59,7 +58,7 @@ void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in)
lower = createEmptyArray<T>(ldims);
upper = createEmptyArray<T>(udims);
- getQueue().enqueue(kernel::lu_split<T>, lower, upper, in_copy);
+ ENQUEUE(kernel::lu_split<T>, lower, upper, in_copy);
}
template<typename T>
@@ -74,11 +73,11 @@ Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
dim4 iDims = in.dims();
getrf_func<T>()(AF_LAPACK_COL_MAJOR, iDims[0], iDims[1], in.get(), in.strides()[1], pivot.get());
};
- getQueue().enqueue(func, in, pivot);
+ ENQUEUE(func, in, pivot);
if(convert_pivot) {
Array<int> p = range<int>(dim4(iDims[0]), 0);
- getQueue().enqueue(kernel::convertPivot, p, pivot);
+ ENQUEUE(kernel::convertPivot, p, pivot);
return p;
} else {
return pivot;
diff --git a/src/backend/cpu/match_template.cpp b/src/backend/cpu/match_template.cpp
index e5b030b..724b773 100644
--- a/src/backend/cpu/match_template.cpp
+++ b/src/backend/cpu/match_template.cpp
@@ -12,8 +12,7 @@
#include <ArrayInfo.hpp>
#include <Array.hpp>
#include <match_template.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/match_template.hpp>
using af::dim4;
@@ -29,7 +28,7 @@ Array<OutT> match_template(const Array<InT> &sImg, const Array<InT> &tImg)
Array<OutT> out = createEmptyArray<OutT>(sImg.dims());
- getQueue().enqueue(kernel::matchTemplate<OutT, InT, MatchT>, out, sImg, tImg);
+ ENQUEUE(kernel::matchTemplate<OutT, InT, MatchT>, out, sImg, tImg);
return out;
}
diff --git a/src/backend/cpu/meanshift.cpp b/src/backend/cpu/meanshift.cpp
index 6c3417a..f4a0b29 100644
--- a/src/backend/cpu/meanshift.cpp
+++ b/src/backend/cpu/meanshift.cpp
@@ -16,8 +16,7 @@
#include <algorithm>
#include <err_cpu.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/meanshift.hpp>
using af::dim4;
@@ -33,7 +32,7 @@ Array<T> meanshift(const Array<T> &in, const float &s_sigma, const float &c_sig
Array<T> out = createEmptyArray<T>(in.dims());
- getQueue().enqueue(kernel::meanShift<T, is_color>, out, in, s_sigma, c_sigma, iter);
+ ENQUEUE(kernel::meanShift<T, is_color>, out, in, s_sigma, c_sigma, iter);
return out;
}
diff --git a/src/backend/cpu/medfilt.cpp b/src/backend/cpu/medfilt.cpp
index 06cc0df..9e761c6 100644
--- a/src/backend/cpu/medfilt.cpp
+++ b/src/backend/cpu/medfilt.cpp
@@ -12,8 +12,7 @@
#include <ArrayInfo.hpp>
#include <Array.hpp>
#include <medfilt.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/medfilt.hpp>
using af::dim4;
@@ -28,7 +27,7 @@ Array<T> medfilt(const Array<T> &in, dim_t w_len, dim_t w_wid)
Array<T> out = createEmptyArray<T>(in.dims());
- getQueue().enqueue(kernel::medfilt<T, pad>, out, in, w_len, w_wid);
+ ENQUEUE(kernel::medfilt<T, pad>, out, in, w_len, w_wid);
return out;
}
diff --git a/src/backend/cpu/memory.cpp b/src/backend/cpu/memory.cpp
index e11f994..79f2e57 100644
--- a/src/backend/cpu/memory.cpp
+++ b/src/backend/cpu/memory.cpp
@@ -14,8 +14,7 @@
#include <dispatch.hpp>
#include <cstdlib>
#include <mutex>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
diff --git a/src/backend/cpu/morph.cpp b/src/backend/cpu/morph.cpp
index 462319d..337e8a9 100644
--- a/src/backend/cpu/morph.cpp
+++ b/src/backend/cpu/morph.cpp
@@ -13,8 +13,7 @@
#include <Array.hpp>
#include <morph.hpp>
#include <algorithm>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/morph.hpp>
using af::dim4;
@@ -30,7 +29,7 @@ Array<T> morph(const Array<T> &in, const Array<T> &mask)
Array<T> out = createEmptyArray<T>(in.dims());
- getQueue().enqueue(kernel::morph<T, isDilation>, out, in, mask);
+ ENQUEUE(kernel::morph<T, isDilation>, out, in, mask);
return out;
}
@@ -43,7 +42,7 @@ Array<T> morph3d(const Array<T> &in, const Array<T> &mask)
Array<T> out = createEmptyArray<T>(in.dims());
- getQueue().enqueue(kernel::morph3d<T, isDilation>, out, in, mask);
+ ENQUEUE(kernel::morph3d<T, isDilation>, out, in, mask);
return out;
}
diff --git a/src/backend/cpu/nearest_neighbour.cpp b/src/backend/cpu/nearest_neighbour.cpp
index 8292562..a3c2bb1 100644
--- a/src/backend/cpu/nearest_neighbour.cpp
+++ b/src/backend/cpu/nearest_neighbour.cpp
@@ -12,8 +12,7 @@
#include <ArrayInfo.hpp>
#include <Array.hpp>
#include <handle.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/nearest_neighbour.hpp>
using af::dim4;
@@ -43,13 +42,13 @@ void nearest_neighbour(Array<uint>& idx, Array<To>& dist,
switch(dist_type) {
case AF_SAD:
- getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SAD>, idx, dist, query, train, dist_dim, n_dist);
+ ENQUEUE(kernel::nearest_neighbour<T, To, AF_SAD>, idx, dist, query, train, dist_dim, n_dist);
break;
case AF_SSD:
- getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SSD>, idx, dist, query, train, dist_dim, n_dist);
+ ENQUEUE(kernel::nearest_neighbour<T, To, AF_SSD>, idx, dist, query, train, dist_dim, n_dist);
break;
case AF_SHD:
- getQueue().enqueue(kernel::nearest_neighbour<T, To, AF_SHD>, idx, dist, query, train, dist_dim, n_dist);
+ ENQUEUE(kernel::nearest_neighbour<T, To, AF_SHD>, idx, dist, query, train, dist_dim, n_dist);
break;
default:
AF_ERROR("Unsupported dist_type", AF_ERR_NOT_CONFIGURED);
diff --git a/src/backend/cpu/orb.cpp b/src/backend/cpu/orb.cpp
index 5dd9326..649619e 100644
--- a/src/backend/cpu/orb.cpp
+++ b/src/backend/cpu/orb.cpp
@@ -18,8 +18,7 @@
#include <convolve.hpp>
#include <memory.hpp>
#include <cstring>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/orb.hpp>
using af::dim4;
diff --git a/src/backend/cpu/platform.cpp b/src/backend/cpu/platform.cpp
index c4ac0af..98cfad4 100644
--- a/src/backend/cpu/platform.cpp
+++ b/src/backend/cpu/platform.cpp
@@ -9,9 +9,8 @@
#include <af/version.h>
#include <af/defines.h>
-#include <platform.hpp>
+#include <debug_cpu.hpp>
#include <sstream>
-#include <async_queue.hpp>
#include <array>
#include <algorithm>
#include <iostream>
diff --git a/src/backend/cpu/plot.cpp b/src/backend/cpu/plot.cpp
index 9cc7d9d..8afdea2 100644
--- a/src/backend/cpu/plot.cpp
+++ b/src/backend/cpu/plot.cpp
@@ -13,8 +13,7 @@
#include <plot.hpp>
#include <err_cpu.hpp>
#include <graphics_common.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
using af::dim4;
diff --git a/src/backend/cpu/plot3.cpp b/src/backend/cpu/plot3.cpp
index 35a7b25..c7beed6 100644
--- a/src/backend/cpu/plot3.cpp
+++ b/src/backend/cpu/plot3.cpp
@@ -13,8 +13,7 @@
#include <plot3.hpp>
#include <err_cpu.hpp>
#include <graphics_common.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
using af::dim4;
diff --git a/src/backend/cpu/qr.cpp b/src/backend/cpu/qr.cpp
index 78631fc..ca04ec9 100644
--- a/src/backend/cpu/qr.cpp
+++ b/src/backend/cpu/qr.cpp
@@ -17,8 +17,7 @@
#include <err_cpu.hpp>
#include <triangle.hpp>
#include <lapack_helper.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
@@ -79,7 +78,7 @@ void qr(Array<T> &q, Array<T> &r, Array<T> &t, const Array<T> &in)
gqr_func<T>()(AF_LAPACK_COL_MAJOR, M, M, min(M, N), q.get(), q.strides()[1], t.get());
};
q.resetDims(dim4(M, M));
- getQueue().enqueue(func, q, t, M, N);
+ ENQUEUE(func, q, t, M, N);
}
template<typename T>
@@ -95,7 +94,7 @@ Array<T> qr_inplace(Array<T> &in)
auto func = [=] (Array<T> in, Array<T> t, int M, int N) {
geqrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N, in.get(), in.strides()[1], t.get());
};
- getQueue().enqueue(func, in, t, M, N);
+ ENQUEUE(func, in, t, M, N);
return t;
}
diff --git a/src/backend/cpu/random.cpp b/src/backend/cpu/random.cpp
index 55cf295..f49420f 100644
--- a/src/backend/cpu/random.cpp
+++ b/src/backend/cpu/random.cpp
@@ -12,8 +12,7 @@
#include <af/defines.h>
#include <Array.hpp>
#include <random.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/random.hpp>
namespace cpu
@@ -23,7 +22,7 @@ template<typename T>
Array<T> randu(const af::dim4 &dims)
{
Array<T> outArray = createEmptyArray<T>(dims);
- getQueue().enqueue(kernel::randu<T>, outArray);
+ ENQUEUE(kernel::randu<T>, outArray);
return outArray;
}
@@ -46,7 +45,7 @@ template<typename T>
Array<T> randn(const af::dim4 &dims)
{
Array<T> outArray = createEmptyArray<T>(dims);
- getQueue().enqueue(kernel::randn<T>, outArray);
+ ENQUEUE(kernel::randn<T>, outArray);
return outArray;
}
@@ -81,7 +80,7 @@ Array<char> randu(const af::dim4 &dims)
outPtr[i] = gen() > 0.5;
}
};
- getQueue().enqueue(func, outArray);
+ ENQUEUE(func, outArray);
return outArray;
}
@@ -93,7 +92,7 @@ void setSeed(const uintl seed)
kernel::is_first = false;
kernel::gen_seed = seed;
};
- getQueue().enqueue(f, seed);
+ ENQUEUE(f, seed);
}
uintl getSeed()
diff --git a/src/backend/cpu/range.cpp b/src/backend/cpu/range.cpp
index b5ba5f8..6be78d5 100644
--- a/src/backend/cpu/range.cpp
+++ b/src/backend/cpu/range.cpp
@@ -14,8 +14,7 @@
#include <err_cpu.hpp>
#include <algorithm>
#include <numeric>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/range.hpp>
namespace cpu
@@ -33,10 +32,10 @@ Array<T> range(const dim4& dims, const int seq_dim)
Array<T> out = createEmptyArray<T>(dims);
switch(_seq_dim) {
- case 0: getQueue().enqueue(kernel::range<T, 0>, out); break;
- case 1: getQueue().enqueue(kernel::range<T, 1>, out); break;
- case 2: getQueue().enqueue(kernel::range<T, 2>, out); break;
- case 3: getQueue().enqueue(kernel::range<T, 3>, out); break;
+ case 0: ENQUEUE(kernel::range<T, 0>, out); break;
+ case 1: ENQUEUE(kernel::range<T, 1>, out); break;
+ case 2: ENQUEUE(kernel::range<T, 2>, out); break;
+ case 3: ENQUEUE(kernel::range<T, 3>, out); break;
default : AF_ERROR("Invalid rep selection", AF_ERR_ARG);
}
diff --git a/src/backend/cpu/reduce.cpp b/src/backend/cpu/reduce.cpp
index cd44b5e..90ad1f9 100644
--- a/src/backend/cpu/reduce.cpp
+++ b/src/backend/cpu/reduce.cpp
@@ -15,8 +15,7 @@
#include <ops.hpp>
#include <functional>
#include <complex>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/reduce.hpp>
using af::dim4;
@@ -56,7 +55,7 @@ Array<To> reduce(const Array<Ti> &in, const int dim, bool change_nan, double nan
, kernel::reduce_dim<op, Ti, To, 3>()
, kernel::reduce_dim<op, Ti, To, 4>()};
- getQueue().enqueue(reduce_funcs[in.ndims() - 1], out, 0, in, 0, dim, change_nan, nanval);
+ ENQUEUE(reduce_funcs[in.ndims() - 1], out, 0, in, 0, dim, change_nan, nanval);
return out;
}
diff --git a/src/backend/cpu/regions.cpp b/src/backend/cpu/regions.cpp
index ffac11c..eafc161 100644
--- a/src/backend/cpu/regions.cpp
+++ b/src/backend/cpu/regions.cpp
@@ -17,8 +17,7 @@
#include <map>
#include <set>
#include <algorithm>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/regions.hpp>
using af::dim4;
@@ -34,7 +33,7 @@ Array<T> regions(const Array<char> &in, af_connectivity connectivity)
Array<T> out = createValueArray(in.dims(), (T)0);
out.eval();
- getQueue().enqueue(kernel::regions<T>, out, in, connectivity);
+ ENQUEUE(kernel::regions<T>, out, in, connectivity);
return out;
}
diff --git a/src/backend/cpu/reorder.cpp b/src/backend/cpu/reorder.cpp
index 162039b..237e5d6 100644
--- a/src/backend/cpu/reorder.cpp
+++ b/src/backend/cpu/reorder.cpp
@@ -9,8 +9,7 @@
#include <Array.hpp>
#include <reorder.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/reorder.hpp>
namespace cpu
@@ -27,7 +26,7 @@ Array<T> reorder(const Array<T> &in, const af::dim4 &rdims)
oDims[i] = iDims[rdims[i]];
Array<T> out = createEmptyArray<T>(oDims);
- getQueue().enqueue(kernel::reorder<T>, out, in, oDims, rdims);
+ ENQUEUE(kernel::reorder<T>, out, in, oDims, rdims);
return out;
}
diff --git a/src/backend/cpu/resize.cpp b/src/backend/cpu/resize.cpp
index 9a5c85b..d6349a9 100644
--- a/src/backend/cpu/resize.cpp
+++ b/src/backend/cpu/resize.cpp
@@ -12,8 +12,7 @@
#include <math.hpp>
#include <types.hpp>
#include <af/traits.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/resize.hpp>
namespace cpu
@@ -32,11 +31,11 @@ Array<T> resize(const Array<T> &in, const dim_t odim0, const dim_t odim1,
switch(method) {
case AF_INTERP_NEAREST:
- getQueue().enqueue(kernel::resize<T, AF_INTERP_NEAREST>, out, in); break;
+ ENQUEUE(kernel::resize<T, AF_INTERP_NEAREST>, out, in); break;
case AF_INTERP_BILINEAR:
- getQueue().enqueue(kernel::resize<T, AF_INTERP_BILINEAR>, out, in); break;
+ ENQUEUE(kernel::resize<T, AF_INTERP_BILINEAR>, out, in); break;
case AF_INTERP_LOWER:
- getQueue().enqueue(kernel::resize<T, AF_INTERP_LOWER>, out, in); break;
+ ENQUEUE(kernel::resize<T, AF_INTERP_LOWER>, out, in); break;
default: break;
}
return out;
diff --git a/src/backend/cpu/rotate.cpp b/src/backend/cpu/rotate.cpp
index e81ee04..289f369 100644
--- a/src/backend/cpu/rotate.cpp
+++ b/src/backend/cpu/rotate.cpp
@@ -9,8 +9,7 @@
#include <Array.hpp>
#include <rotate.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include "transform_interp.hpp"
#include <kernel/rotate.hpp>
@@ -27,13 +26,13 @@ Array<T> rotate(const Array<T> &in, const float theta, const af::dim4 &odims,
switch(method) {
case AF_INTERP_NEAREST:
- getQueue().enqueue(kernel::rotate<T, AF_INTERP_NEAREST>, out, in, theta);
+ ENQUEUE(kernel::rotate<T, AF_INTERP_NEAREST>, out, in, theta);
break;
case AF_INTERP_BILINEAR:
- getQueue().enqueue(kernel::rotate<T, AF_INTERP_BILINEAR>, out, in, theta);
+ ENQUEUE(kernel::rotate<T, AF_INTERP_BILINEAR>, out, in, theta);
break;
case AF_INTERP_LOWER:
- getQueue().enqueue(kernel::rotate<T, AF_INTERP_LOWER>, out, in, theta);
+ ENQUEUE(kernel::rotate<T, AF_INTERP_LOWER>, out, in, theta);
break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
diff --git a/src/backend/cpu/scan.cpp b/src/backend/cpu/scan.cpp
index 615744f..adeb3d2 100644
--- a/src/backend/cpu/scan.cpp
+++ b/src/backend/cpu/scan.cpp
@@ -14,8 +14,7 @@
#include <Array.hpp>
#include <scan.hpp>
#include <ops.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/scan.hpp>
using af::dim4;
@@ -34,19 +33,19 @@ Array<To> scan(const Array<Ti>& in, const int dim)
switch (in.ndims()) {
case 1:
kernel::scan_dim<op, Ti, To, 1> func1;
- getQueue().enqueue(func1, out, 0, in, 0, dim);
+ ENQUEUE(func1, out, 0, in, 0, dim);
break;
case 2:
kernel::scan_dim<op, Ti, To, 2> func2;
- getQueue().enqueue(func2, out, 0, in, 0, dim);
+ ENQUEUE(func2, out, 0, in, 0, dim);
break;
case 3:
kernel::scan_dim<op, Ti, To, 3> func3;
- getQueue().enqueue(func3, out, 0, in, 0, dim);
+ ENQUEUE(func3, out, 0, in, 0, dim);
break;
case 4:
kernel::scan_dim<op, Ti, To, 4> func4;
- getQueue().enqueue(func4, out, 0, in, 0, dim);
+ ENQUEUE(func4, out, 0, in, 0, dim);
break;
}
diff --git a/src/backend/cpu/select.cpp b/src/backend/cpu/select.cpp
index d9a6795..4f845bc 100644
--- a/src/backend/cpu/select.cpp
+++ b/src/backend/cpu/select.cpp
@@ -10,8 +10,7 @@
#include <ArrayInfo.hpp>
#include <Array.hpp>
#include <select.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/select.hpp>
using af::dim4;
@@ -26,7 +25,7 @@ void select(Array<T> &out, const Array<char> &cond, const Array<T> &a, const Arr
cond.eval();
a.eval();
b.eval();
- getQueue().enqueue(kernel::select<T>, out, cond, a, b);
+ ENQUEUE(kernel::select<T>, out, cond, a, b);
}
template<typename T, bool flip>
@@ -35,7 +34,7 @@ void select_scalar(Array<T> &out, const Array<char> &cond, const Array<T> &a, co
out.eval();
cond.eval();
a.eval();
- getQueue().enqueue(kernel::select_scalar<T, flip>, out, cond, a, b);
+ ENQUEUE(kernel::select_scalar<T, flip>, out, cond, a, b);
}
#define INSTANTIATE(T) \
diff --git a/src/backend/cpu/set.cpp b/src/backend/cpu/set.cpp
index d6321bb..49ce186 100644
--- a/src/backend/cpu/set.cpp
+++ b/src/backend/cpu/set.cpp
@@ -18,8 +18,7 @@
#include <sort.hpp>
#include <err_cpu.hpp>
#include <vector>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
diff --git a/src/backend/cpu/shift.cpp b/src/backend/cpu/shift.cpp
index eca1e50..fd56e4c 100644
--- a/src/backend/cpu/shift.cpp
+++ b/src/backend/cpu/shift.cpp
@@ -9,8 +9,7 @@
#include <Array.hpp>
#include <shift.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/shift.hpp>
namespace cpu
@@ -24,7 +23,7 @@ Array<T> shift(const Array<T> &in, const int sdims[4])
Array<T> out = createEmptyArray<T>(in.dims());
const af::dim4 temp(sdims[0], sdims[1], sdims[2], sdims[3]);
- getQueue().enqueue(kernel::shift<T>, out, in, temp);
+ ENQUEUE(kernel::shift<T>, out, in, temp);
return out;
}
diff --git a/src/backend/cpu/sobel.cpp b/src/backend/cpu/sobel.cpp
index 161266d..86c7363 100644
--- a/src/backend/cpu/sobel.cpp
+++ b/src/backend/cpu/sobel.cpp
@@ -13,8 +13,7 @@
#include <Array.hpp>
#include <sobel.hpp>
#include <convolve.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/sobel.hpp>
using af::dim4;
@@ -32,8 +31,8 @@ sobelDerivatives(const Array<Ti> &img, const unsigned &ker_size)
Array<To> dx = createEmptyArray<To>(img.dims());
Array<To> dy = createEmptyArray<To>(img.dims());
- getQueue().enqueue(kernel::derivative<Ti, To, true >, dx, img);
- getQueue().enqueue(kernel::derivative<Ti, To, false>, dy, img);
+ ENQUEUE(kernel::derivative<Ti, To, true >, dx, img);
+ ENQUEUE(kernel::derivative<Ti, To, false>, dy, img);
return std::make_pair(dx, dy);
}
diff --git a/src/backend/cpu/solve.cpp b/src/backend/cpu/solve.cpp
index 0243088..5d1ec3b 100644
--- a/src/backend/cpu/solve.cpp
+++ b/src/backend/cpu/solve.cpp
@@ -16,8 +16,7 @@
#include <cassert>
#include <err_cpu.hpp>
#include <lapack_helper.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
@@ -88,7 +87,7 @@ Array<T> solveLU(const Array<T> &A, const Array<int> &pivot,
N, NRHS, A.get(), A.strides()[1],
pivot.get(), B.get(), B.strides()[1]);
};
- getQueue().enqueue(func, A, B, pivot, N, NRHS);
+ ENQUEUE(func, A, B, pivot, N, NRHS);
return B;
}
@@ -109,7 +108,7 @@ Array<T> triangleSolve(const Array<T> &A, const Array<T> &b, const af_mat_prop o
A.get(), A.strides()[1],
B.get(), B.strides()[1]);
};
- getQueue().enqueue(func, A, B, N, NRHS, options);
+ ENQUEUE(func, A, B, N, NRHS, options);
return B;
}
@@ -139,7 +138,7 @@ Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
gesv_func<T>()(AF_LAPACK_COL_MAJOR, N, K, A.get(), A.strides()[1],
pivot.get(), B.get(), B.strides()[1]);
};
- getQueue().enqueue(func, A, B, pivot, N, K);
+ ENQUEUE(func, A, B, pivot, N, K);
} else {
auto func = [=] (Array<T> A, Array<T> B, int M, int N, int K) {
int sM = A.strides()[1];
@@ -151,7 +150,7 @@ Array<T> solve(const Array<T> &a, const Array<T> &b, const af_mat_prop options)
B.get(), max(sM, sN));
};
B.resetDims(dim4(N, K));
- getQueue().enqueue(func, A, B, M, N, K);
+ ENQUEUE(func, A, B, M, N, K);
}
return B;
diff --git a/src/backend/cpu/sort.cpp b/src/backend/cpu/sort.cpp
index 6a0465c..104a3df 100644
--- a/src/backend/cpu/sort.cpp
+++ b/src/backend/cpu/sort.cpp
@@ -13,8 +13,7 @@
#include <copy.hpp>
#include <algorithm>
#include <functional>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/sort.hpp>
namespace cpu
@@ -27,7 +26,7 @@ Array<T> sort(const Array<T> &in, const unsigned dim)
Array<T> out = copyArray<T>(in);
switch(dim) {
- case 0: getQueue().enqueue(kernel::sort0<T, isAscending>, out); break;
+ case 0: ENQUEUE(kernel::sort0<T, isAscending>, out); break;
default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
}
return out;
diff --git a/src/backend/cpu/sort_by_key.cpp b/src/backend/cpu/sort_by_key.cpp
index 409b825..c683288 100644
--- a/src/backend/cpu/sort_by_key.cpp
+++ b/src/backend/cpu/sort_by_key.cpp
@@ -9,8 +9,7 @@
#include <Array.hpp>
#include <sort_by_key.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/sort_by_key.hpp>
namespace cpu
@@ -29,7 +28,7 @@ void sort_by_key(Array<Tk> &okey, Array<Tv> &oval,
oidx.eval();
switch(dim) {
- case 0: getQueue().enqueue(kernel::sort0_by_key<Tk, Tv, isAscending>,
+ case 0: ENQUEUE(kernel::sort0_by_key<Tk, Tv, isAscending>,
okey, oval, oidx, ikey, ival); break;
default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
}
diff --git a/src/backend/cpu/sort_index.cpp b/src/backend/cpu/sort_index.cpp
index ed6afea..c8c6d6e 100644
--- a/src/backend/cpu/sort_index.cpp
+++ b/src/backend/cpu/sort_index.cpp
@@ -12,8 +12,7 @@
#include <math.hpp>
#include <algorithm>
#include <numeric>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/sort_index.hpp>
namespace cpu
@@ -27,7 +26,7 @@ void sort_index(Array<T> &val, Array<uint> &idx, const Array<T> &in, const uint
val = createEmptyArray<T>(in.dims());
idx = createEmptyArray<uint>(in.dims());
switch(dim) {
- case 0: getQueue().enqueue(kernel::sort0_index<T, isAscending>, val, idx, in); break;
+ case 0: ENQUEUE(kernel::sort0_index<T, isAscending>, val, idx, in); break;
default: AF_ERROR("Not Supported", AF_ERR_NOT_SUPPORTED);
}
}
diff --git a/src/backend/cpu/surface.cpp b/src/backend/cpu/surface.cpp
index 116c784..00d2b00 100644
--- a/src/backend/cpu/surface.cpp
+++ b/src/backend/cpu/surface.cpp
@@ -13,8 +13,7 @@
#include <surface.hpp>
#include <err_cpu.hpp>
#include <graphics_common.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
using af::dim4;
diff --git a/src/backend/cpu/susan.cpp b/src/backend/cpu/susan.cpp
index 6e8d0fe..4f1c327 100644
--- a/src/backend/cpu/susan.cpp
+++ b/src/backend/cpu/susan.cpp
@@ -12,8 +12,7 @@
#include <cmath>
#include <math.hpp>
#include <memory>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/susan.hpp>
using af::features;
@@ -40,9 +39,9 @@ unsigned susan(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out,
auto corners_found= std::shared_ptr<unsigned>(memAlloc<unsigned>(1), memFree<unsigned>);
corners_found.get()[0] = 0;
- getQueue().enqueue(kernel::susan_responses<T>, response, in, idims[0], idims[1],
+ ENQUEUE(kernel::susan_responses<T>, response, in, idims[0], idims[1],
radius, diff_thr, geom_thr, edge);
- getQueue().enqueue(kernel::non_maximal<T>, x_corners, y_corners, resp_corners, corners_found,
+ ENQUEUE(kernel::non_maximal<T>, x_corners, y_corners, resp_corners, corners_found,
idims[0], idims[1], response, edge, corner_lim);
getQueue().sync();
diff --git a/src/backend/cpu/svd.cpp b/src/backend/cpu/svd.cpp
index 92912ca..3ce627c 100644
--- a/src/backend/cpu/svd.cpp
+++ b/src/backend/cpu/svd.cpp
@@ -15,8 +15,7 @@
#if defined(WITH_CPU_LINEAR_ALGEBRA)
#include <lapack_helper.hpp>
#include <copy.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
namespace cpu
{
@@ -87,7 +86,7 @@ void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
s.get(), u.get(), u.strides()[1], vt.get(), vt.strides()[1], &superb[0]);
#endif
};
- getQueue().enqueue(func, s, u, vt, in);
+ ENQUEUE(func, s, u, vt, in);
}
template <typename T, typename Tr>
diff --git a/src/backend/cpu/tile.cpp b/src/backend/cpu/tile.cpp
index 6526917..9237a79 100644
--- a/src/backend/cpu/tile.cpp
+++ b/src/backend/cpu/tile.cpp
@@ -9,8 +9,7 @@
#include <Array.hpp>
#include <tile.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/tile.hpp>
namespace cpu
@@ -31,7 +30,7 @@ Array<T> tile(const Array<T> &in, const af::dim4 &tileDims)
Array<T> out = createEmptyArray<T>(oDims);
- getQueue().enqueue(kernel::tile<T>, out, in);
+ ENQUEUE(kernel::tile<T>, out, in);
return out;
}
diff --git a/src/backend/cpu/transform.cpp b/src/backend/cpu/transform.cpp
index fc71458..5874e7a 100644
--- a/src/backend/cpu/transform.cpp
+++ b/src/backend/cpu/transform.cpp
@@ -10,8 +10,7 @@
#include <Array.hpp>
#include <transform.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include "transform_interp.hpp"
#include <kernel/transform.hpp>
@@ -29,13 +28,13 @@ Array<T> transform(const Array<T> &in, const Array<float> &transform, const af::
switch(method) {
case AF_INTERP_NEAREST :
- getQueue().enqueue(kernel::transform<T, AF_INTERP_NEAREST >, out, in, transform, inverse);
+ ENQUEUE(kernel::transform<T, AF_INTERP_NEAREST >, out, in, transform, inverse);
break;
case AF_INTERP_BILINEAR:
- getQueue().enqueue(kernel::transform<T, AF_INTERP_BILINEAR>, out, in, transform, inverse);
+ ENQUEUE(kernel::transform<T, AF_INTERP_BILINEAR>, out, in, transform, inverse);
break;
case AF_INTERP_LOWER :
- getQueue().enqueue(kernel::transform<T, AF_INTERP_LOWER >, out, in, transform, inverse);
+ ENQUEUE(kernel::transform<T, AF_INTERP_LOWER >, out, in, transform, inverse);
break;
default: AF_ERROR("Unsupported interpolation type", AF_ERR_ARG); break;
}
diff --git a/src/backend/cpu/transpose.cpp b/src/backend/cpu/transpose.cpp
index 32663e1..c1d5d1d 100644
--- a/src/backend/cpu/transpose.cpp
+++ b/src/backend/cpu/transpose.cpp
@@ -12,8 +12,7 @@
#include <ArrayInfo.hpp>
#include <Array.hpp>
#include <transpose.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/transpose.hpp>
#include <utility>
#include <cassert>
@@ -33,7 +32,7 @@ Array<T> transpose(const Array<T> &in, const bool conjugate)
// create an array with first two dimensions swapped
Array<T> out = createEmptyArray<T>(outDims);
- getQueue().enqueue(kernel::transpose<T>, out, in, conjugate);
+ ENQUEUE(kernel::transpose<T>, out, in, conjugate);
return out;
}
@@ -42,7 +41,7 @@ template<typename T>
void transpose_inplace(Array<T> &in, const bool conjugate)
{
in.eval();
- getQueue().enqueue(kernel::transpose_inplace<T>, in, conjugate);
+ ENQUEUE(kernel::transpose_inplace<T>, in, conjugate);
}
#define INSTANTIATE(T) \
diff --git a/src/backend/cpu/triangle.cpp b/src/backend/cpu/triangle.cpp
index 2a9553c..fbc7f65 100644
--- a/src/backend/cpu/triangle.cpp
+++ b/src/backend/cpu/triangle.cpp
@@ -12,8 +12,7 @@
#include <Array.hpp>
#include <triangle.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/triangle.hpp>
namespace cpu
@@ -22,7 +21,7 @@ namespace cpu
template<typename T, bool is_upper, bool is_unit_diag>
void triangle(Array<T> &out, const Array<T> &in)
{
- getQueue().enqueue(kernel::triangle<T, is_upper, is_unit_diag>, out, in);
+ ENQUEUE(kernel::triangle<T, is_upper, is_unit_diag>, out, in);
}
template<typename T, bool is_upper, bool is_unit_diag>
diff --git a/src/backend/cpu/unwrap.cpp b/src/backend/cpu/unwrap.cpp
index 1aa37a4..d40acde 100644
--- a/src/backend/cpu/unwrap.cpp
+++ b/src/backend/cpu/unwrap.cpp
@@ -11,8 +11,7 @@
#include <unwrap.hpp>
#include <dispatch.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/unwrap.hpp>
namespace cpu
@@ -37,9 +36,9 @@ Array<T> unwrap(const Array<T> &in, const dim_t wx, const dim_t wy,
Array<T> outArray = createEmptyArray<T>(odims);
if (is_column) {
- getQueue().enqueue(kernel::unwrap_dim<T, 1>, outArray, in, wx, wy, sx, sy, px, py);
+ ENQUEUE(kernel::unwrap_dim<T, 1>, outArray, in, wx, wy, sx, sy, px, py);
} else {
- getQueue().enqueue(kernel::unwrap_dim<T, 0>, outArray, in, wx, wy, sx, sy, px, py);
+ ENQUEUE(kernel::unwrap_dim<T, 0>, outArray, in, wx, wy, sx, sy, px, py);
}
return outArray;
diff --git a/src/backend/cpu/where.cpp b/src/backend/cpu/where.cpp
index 018cbdf..734b768 100644
--- a/src/backend/cpu/where.cpp
+++ b/src/backend/cpu/where.cpp
@@ -16,8 +16,7 @@
#include <where.hpp>
#include <ops.hpp>
#include <vector>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
using af::dim4;
diff --git a/src/backend/cpu/wrap.cpp b/src/backend/cpu/wrap.cpp
index 07487e0..87de234 100644
--- a/src/backend/cpu/wrap.cpp
+++ b/src/backend/cpu/wrap.cpp
@@ -11,8 +11,7 @@
#include <wrap.hpp>
#include <dispatch.hpp>
#include <math.hpp>
-#include <platform.hpp>
-#include <async_queue.hpp>
+#include <debug_cpu.hpp>
#include <kernel/wrap.hpp>
namespace cpu
@@ -34,9 +33,9 @@ Array<T> wrap(const Array<T> &in,
in.eval();
if (is_column) {
- getQueue().enqueue(kernel::wrap_dim<T, 1>, out, in, wx, wy, sx, sy, px, py);
+ ENQUEUE(kernel::wrap_dim<T, 1>, out, in, wx, wy, sx, sy, px, py);
} else {
- getQueue().enqueue(kernel::wrap_dim<T, 0>, out, in, wx, wy, sx, sy, px, py);
+ ENQUEUE(kernel::wrap_dim<T, 0>, out, in, wx, wy, sx, sy, px, py);
}
return out;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list