[arrayfire] 95/284: template parameters style fixes in cpu kernel namespace fns
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:22 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit f2b84dd3ac65aea385bd0ac1a69aa0255e9b7169
Author: pradeep <pradeep at arrayfire.com>
Date: Sat Dec 19 13:19:32 2015 -0500
template parameters style fixes in cpu kernel namespace fns
---
src/backend/cpu/bilateral.cpp | 2 +-
src/backend/cpu/copy.cpp | 4 +-
src/backend/cpu/harris.cpp | 2 +-
src/backend/cpu/histogram.cpp | 2 +-
src/backend/cpu/kernel/Array.hpp | 8 +-
src/backend/cpu/kernel/approx1.hpp | 110 +++++++++++------------
src/backend/cpu/kernel/approx2.hpp | 130 ++++++++++++++--------------
src/backend/cpu/kernel/assign.hpp | 28 +++---
src/backend/cpu/kernel/bilateral.hpp | 55 +++++-------
src/backend/cpu/kernel/convolve.hpp | 154 ++++++++++++++++-----------------
src/backend/cpu/kernel/copy.hpp | 28 +++---
src/backend/cpu/kernel/diagonal.hpp | 14 +--
src/backend/cpu/kernel/diff.hpp | 33 +++----
src/backend/cpu/kernel/fast.hpp | 38 ++++----
src/backend/cpu/kernel/fftconvolve.hpp | 16 ++--
src/backend/cpu/kernel/gradient.hpp | 2 +
src/backend/cpu/kernel/harris.hpp | 21 +----
src/backend/cpu/kernel/histogram.hpp | 12 +--
src/backend/cpu/kernel/hsv_rgb.hpp | 14 +--
src/backend/cpu/kernel/identity.hpp | 6 +-
src/backend/cpu/kernel/iir.hpp | 4 +-
src/backend/cpu/kernel/index.hpp | 16 ++--
src/backend/cpu/kernel/lookup.hpp | 26 +++---
src/backend/cpu/utility.hpp | 35 +++++++-
24 files changed, 383 insertions(+), 377 deletions(-)
diff --git a/src/backend/cpu/bilateral.cpp b/src/backend/cpu/bilateral.cpp
index c751f99..bc3ad6e 100644
--- a/src/backend/cpu/bilateral.cpp
+++ b/src/backend/cpu/bilateral.cpp
@@ -29,7 +29,7 @@ Array<outType> bilateral(const Array<inType> &in, const float &s_sigma, const fl
in.eval();
const dim4 dims = in.dims();
Array<outType> out = createEmptyArray<outType>(dims);
- getQueue().enqueue(kernel::bilateral<inType, outType, isColor>, out, in, s_sigma, c_sigma);
+ getQueue().enqueue(kernel::bilateral<outType, inType, isColor>, out, in, s_sigma, c_sigma);
return out;
}
diff --git a/src/backend/cpu/copy.cpp b/src/backend/cpu/copy.cpp
index 84cb0d1..9f6068d 100644
--- a/src/backend/cpu/copy.cpp
+++ b/src/backend/cpu/copy.cpp
@@ -63,7 +63,7 @@ Array<outType> padArray(Array<inType> const &in, dim4 const &dims,
in.eval();
// FIXME:
getQueue().sync();
- getQueue().enqueue(kernel::copy<inType, outType>, ret, in, outType(default_value), factor);
+ getQueue().enqueue(kernel::copy<outType, inType>, ret, in, outType(default_value), factor);
return ret;
}
@@ -72,7 +72,7 @@ void copyArray(Array<outType> &out, Array<inType> const &in)
{
out.eval();
in.eval();
- getQueue().enqueue(kernel::copy<inType, outType>, out, in, scalar<outType>(0), 1.0);
+ getQueue().enqueue(kernel::copy<outType, inType>, out, in, scalar<outType>(0), 1.0);
}
#define INSTANTIATE(T) \
diff --git a/src/backend/cpu/harris.cpp b/src/backend/cpu/harris.cpp
index e5ff906..905b046 100644
--- a/src/backend/cpu/harris.cpp
+++ b/src/backend/cpu/harris.cpp
@@ -43,7 +43,7 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
for (unsigned i = 0; i < filter_len; i++)
h_filter[i] = (T)1.f / (filter_len);
} else {
- kernel::gaussian1D<convAccT>(h_filter, (int)filter_len, sigma);
+ gaussian1D<convAccT>(h_filter, (int)filter_len, sigma);
}
Array<convAccT> filter = createDeviceDataArray<convAccT>(dim4(filter_len), (const void*)h_filter);
diff --git a/src/backend/cpu/histogram.cpp b/src/backend/cpu/histogram.cpp
index 7e20247..19314e0 100644
--- a/src/backend/cpu/histogram.cpp
+++ b/src/backend/cpu/histogram.cpp
@@ -32,7 +32,7 @@ Array<outType> histogram(const Array<inType> &in,
Array<outType> out = createValueArray<outType>(outDims, outType(0));
out.eval();
- getQueue().enqueue(kernel::histogram<inType, outType, isLinear>,
+ getQueue().enqueue(kernel::histogram<outType, inType, isLinear>,
out, in, nbins, minval, maxval);
return out;
diff --git a/src/backend/cpu/kernel/Array.hpp b/src/backend/cpu/kernel/Array.hpp
index e492b92..08ade50 100644
--- a/src/backend/cpu/kernel/Array.hpp
+++ b/src/backend/cpu/kernel/Array.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
#include <platform.hpp>
@@ -15,16 +17,14 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename T>
void evalArray(Array<T> in)
{
in.setId(cpu::getActiveDeviceId());
T *ptr = in.data.get();
- dim4 odims = in.dims();
- dim4 ostrs = in.strides();
+ af::dim4 odims = in.dims();
+ af::dim4 ostrs = in.strides();
bool is_linear = in.node->isLinear(odims.get());
diff --git a/src/backend/cpu/kernel/approx1.hpp b/src/backend/cpu/kernel/approx1.hpp
index 51c4804..ab12ebc 100644
--- a/src/backend/cpu/kernel/approx1.hpp
+++ b/src/backend/cpu/kernel/approx1.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
#include <math.hpp>
@@ -15,115 +17,115 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
-template<typename Ty, typename Tp, af_interp_type method>
+template<typename InT, typename LocT, af_interp_type Method>
struct approx1_op
{
- void operator()(Ty *out, const af::dim4 &odims, const dim_t oElems,
- const Ty *in, const af::dim4 &idims, const dim_t iElems,
- const Tp *pos, const af::dim4 &pdims,
- const af::dim4 &ostrides, const af::dim4 &istrides, const af::dim4 &pstrides,
- const float offGrid, const bool pBatch,
- const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw)
+ void operator()(InT *out, af::dim4 const & odims, dim_t const oElems,
+ InT const * const in, af::dim4 const & idims, dim_t const iElems,
+ LocT const * const pos, af::dim4 const & pdims,
+ af::dim4 const & ostrides, af::dim4 const & istrides, af::dim4 const & pstrides,
+ float const offGrid, bool const pBatch,
+ dim_t const idx, dim_t const idy, dim_t const idz, dim_t const idw)
{
return;
}
};
-template<typename Ty, typename Tp>
-struct approx1_op<Ty, Tp, AF_INTERP_NEAREST>
+template<typename InT, typename LocT>
+struct approx1_op<InT, LocT, AF_INTERP_NEAREST>
{
- void operator()(Ty *out, const af::dim4 &odims, const dim_t oElems,
- const Ty *in, const af::dim4 &idims, const dim_t iElems,
- const Tp *pos, const af::dim4 &pdims,
- const af::dim4 &ostrides, const af::dim4 &istrides, const af::dim4 &pstrides,
- const float offGrid, const bool pBatch,
- const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw)
+ void operator()(InT *out, af::dim4 const & odims, dim_t const oElems,
+ InT const * const in, af::dim4 const & idims, dim_t const iElems,
+ LocT const * const pos, af::dim4 const & pdims,
+ af::dim4 const & ostrides, af::dim4 const & istrides, af::dim4 const & pstrides,
+ float const offGrid, bool const pBatch,
+ dim_t const idx, dim_t const idy, dim_t const idz, dim_t const idw)
{
dim_t pmId = idx;
if(pBatch) pmId += idw * pstrides[3] + idz * pstrides[2] + idy * pstrides[1];
- const Tp x = pos[pmId];
+ LocT const x = pos[pmId];
bool gFlag = false;
if (x < 0 || idims[0] < x+1) { // No need to check y
gFlag = true;
}
- const dim_t omId = idw * ostrides[3] + idz * ostrides[2]
+ dim_t const omId = idw * ostrides[3] + idz * ostrides[2]
+ idy * ostrides[1] + idx;
if(gFlag) {
- out[omId] = scalar<Ty>(offGrid);
+ out[omId] = scalar<InT>(offGrid);
} else {
dim_t ioff = idw * istrides[3] + idz * istrides[2]
+ idy * istrides[1];
- const dim_t iMem = round(x) + ioff;
+ dim_t const iMem = round(x) + ioff;
out[omId] = in[iMem];
}
}
};
-template<typename Ty, typename Tp>
-struct approx1_op<Ty, Tp, AF_INTERP_LINEAR>
+template<typename InT, typename LocT>
+struct approx1_op<InT, LocT, AF_INTERP_LINEAR>
{
- void operator()(Ty *out, const af::dim4 &odims, const dim_t oElems,
- const Ty *in, const af::dim4 &idims, const dim_t iElems,
- const Tp *pos, const af::dim4 &pdims,
- const af::dim4 &ostrides, const af::dim4 &istrides, const af::dim4 &pstrides,
- const float offGrid, const bool pBatch,
- const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw)
+ void operator()(InT *out, af::dim4 const & odims, dim_t const oElems,
+ InT const * const in, af::dim4 const & idims, dim_t const iElems,
+ LocT const * const pos, af::dim4 const & pdims,
+ af::dim4 const & ostrides, af::dim4 const & istrides, af::dim4 const & pstrides,
+ float const offGrid, bool const pBatch,
+ dim_t const idx, dim_t const idy, dim_t const idz, dim_t const idw)
{
dim_t pmId = idx;
if(pBatch) pmId += idw * pstrides[3] + idz * pstrides[2] + idy * pstrides[1];
- const Tp x = pos[pmId];
+ LocT const x = pos[pmId];
bool gFlag = false;
if (x < 0 || idims[0] < x+1) {
gFlag = true;
}
- const dim_t grid_x = floor(x); // nearest grid
- const Tp off_x = x - grid_x; // fractional offset
+ dim_t const grid_x = floor(x); // nearest grid
+ LocT const off_x = x - grid_x; // fractional offset
- const dim_t omId = idw * ostrides[3] + idz * ostrides[2]
+ dim_t const omId = idw * ostrides[3] + idz * ostrides[2]
+ idy * ostrides[1] + idx;
if(gFlag) {
- out[omId] = scalar<Ty>(offGrid);
+ out[omId] = scalar<InT>(offGrid);
} else {
dim_t ioff = idw * istrides[3] + idz * istrides[2] + idy * istrides[1] + grid_x;
// Check if x and x + 1 are both valid indices
bool cond = (x < idims[0] - 1);
// Compute Left and Right Weighted Values
- Ty yl = ((Tp)1.0 - off_x) * in[ioff];
- Ty yr = cond ? (off_x) * in[ioff + 1] : scalar<Ty>(0);
- Ty yo = yl + yr;
+ InT yl = ((LocT)1.0 - off_x) * in[ioff];
+ InT yr = cond ? (off_x) * in[ioff + 1] : scalar<InT>(0);
+ InT yo = yl + yr;
// Compute Weight used
- Tp wt = cond ? (Tp)1.0 : (Tp)(1.0 - off_x);
+ LocT wt = cond ? (LocT)1.0 : (LocT)(1.0 - off_x);
// Write final value
out[omId] = (yo / wt);
}
}
};
-template<typename Ty, typename Tp, af_interp_type method>
-void approx1(Array<Ty> output, Array<Ty> const input,
- Array<Tp> const position, float const offGrid)
+template<typename InT, typename LocT, af_interp_type Method>
+void approx1(Array<InT> output, Array<InT> const input,
+ Array<LocT> const position, float const offGrid)
{
- Ty * out = output.get();
- Ty const * const in = input.get();
- Tp const * const pos = position.get();
- dim4 const odims = output.dims();
- dim4 const idims = input.dims();
- dim4 const pdims = position.dims();
- dim4 const ostrides = output.strides();
- dim4 const istrides = input.strides();
- dim4 const pstrides = position.strides();
- dim_t const oElems = output.elements();
- dim_t const iElems = input.elements();
-
- approx1_op<Ty, Tp, method> op;
+ InT * out = output.get();
+ InT const * const in = input.get();
+ LocT const * const pos = position.get();
+
+ af::dim4 const odims = output.dims();
+ af::dim4 const idims = input.dims();
+ af::dim4 const pdims = position.dims();
+ af::dim4 const ostrides = output.strides();
+ af::dim4 const istrides = input.strides();
+ af::dim4 const pstrides = position.strides();
+
+ dim_t const oElems = output.elements();
+ dim_t const iElems = input.elements();
+
+ approx1_op<InT, LocT, Method> op;
bool pBatch = !(pdims[1] == 1 && pdims[2] == 1 && pdims[3] == 1);
for(dim_t w = 0; w < odims[3]; w++) {
diff --git a/src/backend/cpu/kernel/approx2.hpp b/src/backend/cpu/kernel/approx2.hpp
index f80dae1..b5115e2 100644
--- a/src/backend/cpu/kernel/approx2.hpp
+++ b/src/backend/cpu/kernel/approx2.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
#include <math.hpp>
@@ -15,33 +17,31 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
-template<typename Ty, typename Tp, af_interp_type method>
+template<typename InT, typename LocT, af_interp_type Method>
struct approx2_op
{
- void operator()(Ty *out, const af::dim4 &odims, const dim_t oElems,
- const Ty *in, const af::dim4 &idims, const dim_t iElems,
- const Tp *pos, const af::dim4 &pdims, const Tp *qos, const af::dim4 &qdims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const af::dim4 &pstrides, const af::dim4 &qstrides,
- const float offGrid, const bool pBatch,
- const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw)
+ void operator()(InT *out, af::dim4 const & odims, dim_t const oElems,
+ InT const * const in, af::dim4 const & idims, dim_t const iElems,
+ LocT const * const pos, af::dim4 const & pdims, LocT const * const qos, af::dim4 const & qdims,
+ af::dim4 const & ostrides, af::dim4 const & istrides,
+ af::dim4 const & pstrides, af::dim4 const & qstrides,
+ float const offGrid, bool const pBatch,
+ dim_t const idx, dim_t const idy, dim_t const idz, dim_t const idw)
{
return;
}
};
-template<typename Ty, typename Tp>
-struct approx2_op<Ty, Tp, AF_INTERP_NEAREST>
+template<typename InT, typename LocT>
+struct approx2_op<InT, LocT, AF_INTERP_NEAREST>
{
- void operator()(Ty *out, const af::dim4 &odims, const dim_t oElems,
- const Ty *in, const af::dim4 &idims, const dim_t iElems,
- const Tp *pos, const af::dim4 &pdims, const Tp *qos, const af::dim4 &qdims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const af::dim4 &pstrides, const af::dim4 &qstrides,
- const float offGrid, const bool pBatch,
- const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw)
+ void operator()(InT *out, af::dim4 const & odims, dim_t const oElems,
+ InT const * const in, af::dim4 const & idims, dim_t const iElems,
+ LocT const * const pos, af::dim4 const & pdims, LocT const * const qos, af::dim4 const & qdims,
+ af::dim4 const & ostrides, af::dim4 const & istrides,
+ af::dim4 const & pstrides, af::dim4 const & qstrides,
+ float const offGrid, bool const pBatch,
+ dim_t const idx, dim_t const idy, dim_t const idz, dim_t const idw)
{
dim_t pmId = idy * pstrides[1] + idx;
dim_t qmId = idy * qstrides[1] + idx;
@@ -51,34 +51,34 @@ struct approx2_op<Ty, Tp, AF_INTERP_NEAREST>
}
bool gFlag = false;
- const Tp x = pos[pmId], y = qos[qmId];
+ LocT const x = pos[pmId], y = qos[qmId];
if (x < 0 || y < 0 || idims[0] < x+1 || idims[1] < y+1) {
gFlag = true;
}
- const dim_t omId = idw * ostrides[3] + idz * ostrides[2]
+ dim_t const omId = idw * ostrides[3] + idz * ostrides[2]
+ idy * ostrides[1] + idx;
if(gFlag) {
- out[omId] = scalar<Ty>(offGrid);
+ out[omId] = scalar<InT>(offGrid);
} else {
- const dim_t grid_x = round(x), grid_y = round(y); // nearest grid
- const dim_t imId = idw * istrides[3] + idz * istrides[2] +
+ dim_t const grid_x = round(x), grid_y = round(y); // nearest grid
+ dim_t const imId = idw * istrides[3] + idz * istrides[2] +
grid_y * istrides[1] + grid_x;
out[omId] = in[imId];
}
}
};
-template<typename Ty, typename Tp>
-struct approx2_op<Ty, Tp, AF_INTERP_LINEAR>
+template<typename InT, typename LocT>
+struct approx2_op<InT, LocT, AF_INTERP_LINEAR>
{
- void operator()(Ty *out, const af::dim4 &odims, const dim_t oElems,
- const Ty *in, const af::dim4 &idims, const dim_t iElems,
- const Tp *pos, const af::dim4 &pdims, const Tp *qos, const af::dim4 &qdims,
- const af::dim4 &ostrides, const af::dim4 &istrides,
- const af::dim4 &pstrides, const af::dim4 &qstrides,
- const float offGrid, const bool pBatch,
- const dim_t idx, const dim_t idy, const dim_t idz, const dim_t idw)
+ void operator()(InT *out, af::dim4 const & odims, dim_t const oElems,
+ InT const * const in, af::dim4 const & idims, dim_t const iElems,
+ LocT const * const pos, af::dim4 const & pdims, LocT const * const qos, af::dim4 const & qdims,
+ af::dim4 const & ostrides, af::dim4 const & istrides,
+ af::dim4 const & pstrides, af::dim4 const & qstrides,
+ float const offGrid, bool const pBatch,
+ dim_t const idx, dim_t const idy, dim_t const idz, dim_t const idw)
{
dim_t pmId = idy * pstrides[1] + idx;
dim_t qmId = idy * qstrides[1] + idx;
@@ -88,42 +88,42 @@ struct approx2_op<Ty, Tp, AF_INTERP_LINEAR>
}
bool gFlag = false;
- const Tp x = pos[pmId], y = qos[qmId];
+ LocT const x = pos[pmId], y = qos[qmId];
if (x < 0 || y < 0 || idims[0] < x+1 || idims[1] < y+1) {
gFlag = true;
}
- const dim_t grid_x = floor(x), grid_y = floor(y); // nearest grid
- const Tp off_x = x - grid_x, off_y = y - grid_y; // fractional offset
+ dim_t const grid_x = floor(x), grid_y = floor(y); // nearest grid
+ LocT const off_x = x - grid_x, off_y = y - grid_y; // fractional offset
// Check if pVal and pVal + 1 are both valid indices
bool condY = (y < idims[1] - 1);
bool condX = (x < idims[0] - 1);
// Compute wieghts used
- Tp wt00 = ((Tp)1.0 - off_x) * ((Tp)1.0 - off_y);
- Tp wt10 = (condY) ? ((Tp)1.0 - off_x) * (off_y) : 0;
- Tp wt01 = (condX) ? (off_x) * ((Tp)1.0 - off_y) : 0;
- Tp wt11 = (condX && condY) ? (off_x) * (off_y) : 0;
+ LocT wt00 = ((LocT)1.0 - off_x) * ((LocT)1.0 - off_y);
+ LocT wt10 = (condY) ? ((LocT)1.0 - off_x) * (off_y) : 0;
+ LocT wt01 = (condX) ? (off_x) * ((LocT)1.0 - off_y) : 0;
+ LocT wt11 = (condX && condY) ? (off_x) * (off_y) : 0;
- Tp wt = wt00 + wt10 + wt01 + wt11;
- Ty zero = scalar<Ty>(0);
+ LocT wt = wt00 + wt10 + wt01 + wt11;
+ InT zero = scalar<InT>(0);
- const dim_t omId = idw * ostrides[3] + idz * ostrides[2]
+ dim_t const omId = idw * ostrides[3] + idz * ostrides[2]
+ idy * ostrides[1] + idx;
if(gFlag) {
- out[omId] = scalar<Ty>(offGrid);
+ out[omId] = scalar<InT>(offGrid);
} else {
dim_t ioff = idw * istrides[3] + idz * istrides[2]
+ grid_y * istrides[1] + grid_x;
// Compute Weighted Values
- Ty y00 = wt00 * in[ioff];
- Ty y10 = (condY) ? wt10 * in[ioff + istrides[1]] : zero;
- Ty y01 = (condX) ? wt01 * in[ioff + 1] : zero;
- Ty y11 = (condX && condY) ? wt11 * in[ioff + istrides[1] + 1] : zero;
+ InT y00 = wt00 * in[ioff];
+ InT y10 = (condY) ? wt10 * in[ioff + istrides[1]] : zero;
+ InT y01 = (condX) ? wt01 * in[ioff + 1] : zero;
+ InT y11 = (condX && condY) ? wt11 * in[ioff + istrides[1] + 1] : zero;
- Ty yo = y00 + y10 + y01 + y11;
+ InT yo = y00 + y10 + y01 + y11;
// Write Final Value
out[omId] = (yo / wt);
@@ -131,27 +131,27 @@ struct approx2_op<Ty, Tp, AF_INTERP_LINEAR>
}
};
-template<typename Ty, typename Tp, af_interp_type method>
-void approx2(Array<Ty> output, Array<Ty> const input,
- Array<Tp> const position, Array<Tp> const qosition,
+template<typename InT, typename LocT, af_interp_type Method>
+void approx2(Array<InT> output, Array<InT> const input,
+ Array<LocT> const position, Array<LocT> const qosition,
float const offGrid)
{
- Ty * out = output.get();
- Ty const * const in = input.get();
- Tp const * const pos = position.get();
- Tp const * const qos = qosition.get();
- dim4 const odims = output.dims();
- dim4 const idims = input.dims();
- dim4 const pdims = position.dims();
- dim4 const qdims = qosition.dims();
- dim4 const ostrides = output.strides();
- dim4 const istrides = input.strides();
- dim4 const pstrides = position.strides();
- dim4 const qstrides = qosition.strides();
+ InT * out = output.get();
+ InT const * const in = input.get();
+ LocT const * const pos = position.get();
+ LocT const * const qos = qosition.get();
+ af::dim4 const odims = output.dims();
+ af::dim4 const idims = input.dims();
+ af::dim4 const pdims = position.dims();
+ af::dim4 const qdims = qosition.dims();
+ af::dim4 const ostrides = output.strides();
+ af::dim4 const istrides = input.strides();
+ af::dim4 const pstrides = position.strides();
+ af::dim4 const qstrides = qosition.strides();
dim_t const oElems = output.elements();
dim_t const iElems = input.elements();
- approx2_op<Ty, Tp, method> op;
+ approx2_op<InT, LocT, Method> op;
bool pBatch = !(pdims[2] == 1 && pdims[3] == 1);
for(dim_t w = 0; w < odims[3]; w++) {
diff --git a/src/backend/cpu/kernel/assign.hpp b/src/backend/cpu/kernel/assign.hpp
index 83f48e9..86befaf 100644
--- a/src/backend/cpu/kernel/assign.hpp
+++ b/src/backend/cpu/kernel/assign.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <vector>
#include <Array.hpp>
#include <utility.hpp>
@@ -16,25 +18,23 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename T>
-void assign(Array<T> out, const Array<T> rhs, const std::vector<bool> isSeq,
- const std::vector<af_seq> seqs, const std::vector< Array<uint> > idxArrs)
+void assign(Array<T> out, Array<T> const rhs, std::vector<bool> const isSeq,
+ std::vector<af_seq> const seqs, std::vector< Array<uint> > const idxArrs)
{
- dim4 dDims = out.getDataDims();
- dim4 pDims = out.dims();
+ af::dim4 dDims = out.getDataDims();
+ af::dim4 pDims = out.dims();
// retrieve dimensions & strides for array to which rhs is being copied to
- dim4 dst_offsets = toOffset(seqs, dDims);
- dim4 dst_strides = toStride(seqs, dDims);
+ af::dim4 dst_offsets = toOffset(seqs, dDims);
+ af::dim4 dst_strides = toStride(seqs, dDims);
// retrieve rhs array dimenesions & strides
- dim4 src_dims = rhs.dims();
- dim4 src_strides = rhs.strides();
+ af::dim4 src_dims = rhs.dims();
+ af::dim4 src_strides = rhs.strides();
// declare pointers to af_array index data
- const uint* ptr0 = idxArrs[0].get();
- const uint* ptr1 = idxArrs[1].get();
- const uint* ptr2 = idxArrs[2].get();
- const uint* ptr3 = idxArrs[3].get();
+ uint const * const ptr0 = idxArrs[0].get();
+ uint const * const ptr1 = idxArrs[1].get();
+ uint const * const ptr2 = idxArrs[2].get();
+ uint const * const ptr3 = idxArrs[3].get();
const T * src= rhs.get();
T * dst = out.get();
diff --git a/src/backend/cpu/kernel/bilateral.hpp b/src/backend/cpu/kernel/bilateral.hpp
index 2b5764f..c950bbd 100644
--- a/src/backend/cpu/kernel/bilateral.hpp
+++ b/src/backend/cpu/kernel/bilateral.hpp
@@ -7,42 +7,33 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
+#include <utility.hpp>
+#include <cmath>
namespace cpu
{
namespace kernel
{
-inline
-dim_t clamp(int a, dim_t mn, dim_t mx)
+template<typename OutT, typename InT, bool IsColor>
+void bilateral(Array<OutT> out, Array<InT> const in, float const s_sigma, float const c_sigma)
{
- return (a < (int)mn ? mn : (a > (int)mx ? mx : a));
-}
-
-inline
-unsigned getIdx(const dim4 &strides, int i, int j = 0, int k = 0, int l = 0)
-{
- return (l * strides[3] + k * strides[2] + j * strides[1] + i * strides[0]);
-}
-
-template<typename inType, typename outType, bool isColor>
-void bilateral(Array<outType> out, const Array<inType> in, float s_sigma, float c_sigma)
-{
- const dim4 dims = in.dims();
- const dim4 istrides = in.strides();
-
- const dim4 ostrides = out.strides();
+ af::dim4 const dims = in.dims();
+ af::dim4 const istrides = in.strides();
+ af::dim4 const ostrides = out.strides();
- outType *outData = out.get();
- const inType *inData = in.get();
+ OutT *outData = out.get();
+ InT const * inData = in.get();
// clamp spatical and chromatic sigma's
- float space_ = std::min(11.5f, std::max(s_sigma, 0.f));
- float color_ = std::max(c_sigma, 0.f);
- const dim_t radius = std::max((dim_t)(space_ * 1.5f), (dim_t)1);
- const float svar = space_*space_;
- const float cvar = color_*color_;
+ float space_ = std::min(11.5f, std::max(s_sigma, 0.f));
+ float color_ = std::max(c_sigma, 0.f);
+ dim_t const radius = std::max((dim_t)(space_ * 1.5f), (dim_t)1);
+ float const svar = space_*space_;
+ float const cvar = color_*color_;
for(dim_t b3=0; b3<dims[3]; ++b3) {
// b3 for loop handles following batch configurations
@@ -58,9 +49,9 @@ void bilateral(Array<outType> out, const Array<inType> in, float s_sigma, float
// j steps along 2nd dimension
for(dim_t i=0; i<dims[0]; ++i) {
// i steps along 1st dimension
- outType norm = 0.0;
- outType res = 0.0;
- const outType center = (outType)inData[getIdx(istrides, i, j)];
+ OutT norm = 0.0;
+ OutT res = 0.0;
+ OutT const center = (OutT)inData[getIdx(istrides, i, j)];
for(dim_t wj=-radius; wj<=radius; ++wj) {
// clamps offsets
dim_t tj = clamp(j+wj, 0, dims[1]-1);
@@ -68,10 +59,10 @@ void bilateral(Array<outType> out, const Array<inType> in, float s_sigma, float
// clamps offsets
dim_t ti = clamp(i+wi, 0, dims[0]-1);
// proceed
- const outType val= (outType)inData[getIdx(istrides, ti, tj)];
- const outType gauss_space = (wi*wi+wj*wj)/(-2.0*svar);
- const outType gauss_range = ((center-val)*(center-val))/(-2.0*cvar);
- const outType weight = std::exp(gauss_space+gauss_range);
+ OutT const val= (OutT)inData[getIdx(istrides, ti, tj)];
+ OutT const gauss_space = (wi*wi+wj*wj)/(-2.0*svar);
+ OutT const gauss_range = ((center-val)*(center-val))/(-2.0*cvar);
+ OutT const weight = std::exp(gauss_space+gauss_range);
norm += weight;
res += val*weight;
}
diff --git a/src/backend/cpu/kernel/convolve.hpp b/src/backend/cpu/kernel/convolve.hpp
index d39acb6..79d684d 100644
--- a/src/backend/cpu/kernel/convolve.hpp
+++ b/src/backend/cpu/kernel/convolve.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
namespace cpu
@@ -14,41 +16,39 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
-template<typename T, typename accT, bool expand>
-void one2one_1d(T *optr, T const *iptr, accT const *fptr, dim4 const &oDims,
- dim4 const &sDims, dim4 const &fDims, dim4 const &sStrides)
+template<typename InT, typename AccT, bool Expand>
+void one2one_1d(InT *optr, InT const * const iptr, AccT const * const fptr, af::dim4 const & oDims,
+ af::dim4 const & sDims, af::dim4 const & fDims, af::dim4 const & sStrides)
{
- dim_t start = (expand ? 0 : fDims[0]/2);
- dim_t end = (expand ? oDims[0] : start + sDims[0]);
+ dim_t start = (Expand ? 0 : fDims[0]/2);
+ dim_t end = (Expand ? oDims[0] : start + sDims[0]);
for(dim_t i=start; i<end; ++i) {
- accT accum = 0.0;
+ AccT accum = 0.0;
for(dim_t f=0; f<fDims[0]; ++f) {
dim_t iIdx = i-f;
- T s_val = ((iIdx>=0 &&iIdx<sDims[0])? iptr[iIdx*sStrides[0]] : T(0));
- accum += accT(s_val * fptr[f]);
+ InT s_val = ((iIdx>=0 &&iIdx<sDims[0])? iptr[iIdx*sStrides[0]] : InT(0));
+ accum += AccT(s_val * fptr[f]);
}
- optr[i-start] = T(accum);
+ optr[i-start] = InT(accum);
}
}
-template<typename T, typename accT, bool expand>
-void one2one_2d(T *optr, T const *iptr, accT const *fptr, dim4 const &oDims,
- dim4 const &sDims, dim4 const &fDims, dim4 const &oStrides,
- dim4 const &sStrides, dim4 const &fStrides)
+template<typename InT, typename AccT, bool Expand>
+void one2one_2d(InT *optr, InT const * const iptr, AccT const * const fptr, af::dim4 const & oDims,
+ af::dim4 const & sDims, af::dim4 const & fDims, af::dim4 const & oStrides,
+ af::dim4 const & sStrides, af::dim4 const & fStrides)
{
- dim_t jStart = (expand ? 0 : fDims[1]/2);
- dim_t jEnd = (expand ? oDims[1] : jStart + sDims[1]);
- dim_t iStart = (expand ? 0 : fDims[0]/2);
- dim_t iEnd = (expand ? oDims[0] : iStart + sDims[0]);
+ dim_t jStart = (Expand ? 0 : fDims[1]/2);
+ dim_t jEnd = (Expand ? oDims[1] : jStart + sDims[1]);
+ dim_t iStart = (Expand ? 0 : fDims[0]/2);
+ dim_t iEnd = (Expand ? oDims[0] : iStart + sDims[0]);
for(dim_t j=jStart; j<jEnd; ++j) {
dim_t joff = (j-jStart)*oStrides[1];
for(dim_t i=iStart; i<iEnd; ++i) {
- accT accum = accT(0);
+ AccT accum = AccT(0);
for(dim_t wj=0; wj<fDims[1]; ++wj) {
dim_t jIdx = j-wj;
dim_t w_joff = wj*fStrides[1];
@@ -58,30 +58,30 @@ void one2one_2d(T *optr, T const *iptr, accT const *fptr, dim4 const &oDims,
for(dim_t wi=0; wi<fDims[0]; ++wi) {
dim_t iIdx = i-wi;
- T s_val = T(0);
+ InT s_val = InT(0);
if ( isJValid && (iIdx>=0 && iIdx<sDims[0])) {
s_val = iptr[s_joff+iIdx*sStrides[0]];
}
- accum += accT(s_val * fptr[w_joff+wi*fStrides[0]]);
+ accum += AccT(s_val * fptr[w_joff+wi*fStrides[0]]);
}
}
- optr[joff+i-iStart] = T(accum);
+ optr[joff+i-iStart] = InT(accum);
}
}
}
-template<typename T, typename accT, bool expand>
-void one2one_3d(T *optr, T const *iptr, accT const *fptr, dim4 const &oDims,
- dim4 const &sDims, dim4 const &fDims, dim4 const &oStrides,
- dim4 const &sStrides, dim4 const &fStrides)
+template<typename InT, typename AccT, bool Expand>
+void one2one_3d(InT *optr, InT const * const iptr, AccT const * const fptr, af::dim4 const & oDims,
+ af::dim4 const & sDims, af::dim4 const & fDims, af::dim4 const & oStrides,
+ af::dim4 const & sStrides, af::dim4 const & fStrides)
{
- dim_t kStart = (expand ? 0 : fDims[2]/2);
- dim_t kEnd = (expand ? oDims[2] : kStart + sDims[2]);
- dim_t jStart = (expand ? 0 : fDims[1]/2);
- dim_t jEnd = (expand ? oDims[1] : jStart + sDims[1]);
- dim_t iStart = (expand ? 0 : fDims[0]/2);
- dim_t iEnd = (expand ? oDims[0] : iStart + sDims[0]);
+ dim_t kStart = (Expand ? 0 : fDims[2]/2);
+ dim_t kEnd = (Expand ? oDims[2] : kStart + sDims[2]);
+ dim_t jStart = (Expand ? 0 : fDims[1]/2);
+ dim_t jEnd = (Expand ? oDims[1] : jStart + sDims[1]);
+ dim_t iStart = (Expand ? 0 : fDims[0]/2);
+ dim_t iEnd = (Expand ? oDims[0] : iStart + sDims[0]);
for(dim_t k=kStart; k<kEnd; ++k) {
dim_t koff = (k-kStart)*oStrides[2];
@@ -91,7 +91,7 @@ void one2one_3d(T *optr, T const *iptr, accT const *fptr, dim4 const &oDims,
for(dim_t i=iStart; i<iEnd; ++i) {
- accT accum = accT(0);
+ AccT accum = AccT(0);
for(dim_t wk=0; wk<fDims[2]; ++wk) {
dim_t kIdx = k-wk;
dim_t w_koff = wk*fStrides[2];
@@ -107,35 +107,35 @@ void one2one_3d(T *optr, T const *iptr, accT const *fptr, dim4 const &oDims,
for(dim_t wi=0; wi<fDims[0]; ++wi) {
dim_t iIdx = i-wi;
- T s_val = T(0);
+ InT s_val = InT(0);
if ( isKValid && isJValid && (iIdx>=0 && iIdx<sDims[0])) {
s_val = iptr[s_koff+s_joff+iIdx*sStrides[0]];
}
- accum += accT(s_val * fptr[w_koff+w_joff+wi*fStrides[0]]);
+ accum += AccT(s_val * fptr[w_koff+w_joff+wi*fStrides[0]]);
}
}
}
- optr[koff+joff+i-iStart] = T(accum);
+ optr[koff+joff+i-iStart] = InT(accum);
} //i loop ends here
} // j loop ends here
} // k loop ends here
}
-template<typename T, typename accT, dim_t baseDim, bool expand>
-void convolve_nd(Array<T> out, Array<T> const signal, Array<accT> const filter, ConvolveBatchKind kind)
+template<typename InT, typename AccT, dim_t baseDim, bool Expand>
+void convolve_nd(Array<InT> out, Array<InT> const signal, Array<AccT> const filter, ConvolveBatchKind kind)
{
- T * optr = out.get();
- T const * const iptr = signal.get();
- accT const * const fptr = filter.get();
+ InT * optr = out.get();
+ InT const * const iptr = signal.get();
+ AccT const * const fptr = filter.get();
- dim4 const oDims = out.dims();
- dim4 const sDims = signal.dims();
- dim4 const fDims = filter.dims();
+ af::dim4 const oDims = out.dims();
+ af::dim4 const sDims = signal.dims();
+ af::dim4 const fDims = filter.dims();
- dim4 const oStrides = out.strides();
- dim4 const sStrides = signal.strides();
- dim4 const fStrides = filter.strides();
+ af::dim4 const oStrides = out.strides();
+ af::dim4 const sStrides = signal.strides();
+ af::dim4 const fStrides = filter.strides();
dim_t out_step[4] = {0, 0, 0, 0}; /* first value is never used, and declared for code simplicity */
dim_t in_step[4] = {0, 0, 0, 0}; /* first value is never used, and declared for code simplicity */
@@ -169,66 +169,66 @@ void convolve_nd(Array<T> out, Array<T> const signal, Array<accT> const filter,
for (dim_t b2=0; b2<batch[2]; ++b2) {
for (dim_t b1=0; b1<batch[1]; ++b1) {
- T * out = optr + b1 * out_step[1] + b2 * out_step[2] + b3 * out_step[3];
- T const *in = iptr + b1 * in_step[1] + b2 * in_step[2] + b3 * in_step[3];
- accT const *filt = fptr + b1 *filt_step[1] + b2 *filt_step[2] + b3 *filt_step[3];
+ InT * out = optr + b1 * out_step[1] + b2 * out_step[2] + b3 * out_step[3];
+ InT const *in = iptr + b1 * in_step[1] + b2 * in_step[2] + b3 * in_step[3];
+ AccT const *filt = fptr + b1 *filt_step[1] + b2 *filt_step[2] + b3 *filt_step[3];
switch(baseDim) {
- case 1: one2one_1d<T, accT, expand>(out, in, filt, oDims, sDims, fDims, sStrides); break;
- case 2: one2one_2d<T, accT, expand>(out, in, filt, oDims, sDims, fDims, oStrides, sStrides, fStrides); break;
- case 3: one2one_3d<T, accT, expand>(out, in, filt, oDims, sDims, fDims, oStrides, sStrides, fStrides); break;
+ case 1: one2one_1d<InT, AccT, Expand>(out, in, filt, oDims, sDims, fDims, sStrides); break;
+ case 2: one2one_2d<InT, AccT, Expand>(out, in, filt, oDims, sDims, fDims, oStrides, sStrides, fStrides); break;
+ case 3: one2one_3d<InT, AccT, Expand>(out, in, filt, oDims, sDims, fDims, oStrides, sStrides, fStrides); break;
}
}
}
}
}
-template<typename T, typename accT, dim_t conv_dim, bool expand>
-void convolve2_separable(T *optr, T const *iptr, accT const *fptr,
- dim4 const &oDims, dim4 const &sDims, dim4 const &orgDims, dim_t fDim,
- dim4 const &oStrides, dim4 const &sStrides, dim_t fStride)
+template<typename InT, typename AccT, dim_t conv_dim, bool Expand>
+void convolve2_separable(InT *optr, InT const * const iptr, AccT const * const fptr,
+ af::dim4 const & oDims, af::dim4 const & sDims, af::dim4 const & orgDims, dim_t fDim,
+ af::dim4 const & oStrides, af::dim4 const & sStrides, dim_t fStride)
{
for(dim_t j=0; j<oDims[1]; ++j) {
dim_t jOff = j*oStrides[1];
- dim_t cj = j + (conv_dim==1)*(expand ? 0: fDim>>1);
+ dim_t cj = j + (conv_dim==1)*(Expand ? 0: fDim>>1);
for(dim_t i=0; i<oDims[0]; ++i) {
dim_t iOff = i*oStrides[0];
- dim_t ci = i + (conv_dim==0)*(expand ? 0 : fDim>>1);
+ dim_t ci = i + (conv_dim==0)*(Expand ? 0 : fDim>>1);
- accT accum = scalar<accT>(0);
+ AccT accum = scalar<AccT>(0);
for(dim_t f=0; f<fDim; ++f) {
- T f_val = fptr[f];
- T s_val;
+ InT f_val = fptr[f];
+ InT s_val;
if (conv_dim==0) {
dim_t offi = ci - f;
bool isCIValid = offi>=0 && offi<sDims[0];
bool isCJValid = cj>=0 && cj<sDims[1];
- s_val = (isCJValid && isCIValid ? iptr[cj*sDims[0]+offi] : scalar<T>(0));
+ s_val = (isCJValid && isCIValid ? iptr[cj*sDims[0]+offi] : scalar<InT>(0));
} else {
dim_t offj = cj - f;
bool isCIValid = ci>=0 && ci<sDims[0];
bool isCJValid = offj>=0 && offj<sDims[1];
- s_val = (isCJValid && isCIValid ? iptr[offj*sDims[0]+ci] : scalar<T>(0));
+ s_val = (isCJValid && isCIValid ? iptr[offj*sDims[0]+ci] : scalar<InT>(0));
}
- accum += accT(s_val * f_val);
+ accum += AccT(s_val * f_val);
}
- optr[iOff+jOff] = T(accum);
+ optr[iOff+jOff] = InT(accum);
}
}
}
-template<typename T, typename accT, bool expand>
-void convolve2(Array<T> out, Array<T> const signal,
- Array<accT> const c_filter, Array<accT> const r_filter,
- dim4 const tDims)
+template<typename InT, typename AccT, bool Expand>
+void convolve2(Array<InT> out, Array<InT> const signal,
+ Array<AccT> const c_filter, Array<AccT> const r_filter,
+ af::dim4 const tDims)
{
- Array<T> temp = createEmptyArray<T>(tDims);
+ Array<InT> temp = createEmptyArray<InT>(tDims);
dim_t cflen = (dim_t)c_filter.elements();
dim_t rflen = (dim_t)r_filter.elements();
@@ -248,15 +248,15 @@ void convolve2(Array<T> out, Array<T> const signal,
for (dim_t b2=0; b2<oDims[2]; ++b2) {
- T const *iptr = signal.get()+ b2*sStrides[2] + i_b3Off;
- T *tptr = temp.get() + b2*tStrides[2] + t_b3Off;
- T *optr = out.get() + b2*oStrides[2] + o_b3Off;
+ InT const * const iptr = signal.get()+ b2*sStrides[2] + i_b3Off;
+ InT *tptr = temp.get() + b2*tStrides[2] + t_b3Off;
+ InT *optr = out.get() + b2*oStrides[2] + o_b3Off;
- convolve2_separable<T, accT, 0, expand>(tptr, iptr, c_filter.get(),
+ convolve2_separable<InT, AccT, 0, Expand>(tptr, iptr, c_filter.get(),
tDims, sDims, sDims, cflen,
tStrides, sStrides, c_filter.strides()[0]);
- convolve2_separable<T, accT, 1, expand>(optr, tptr, r_filter.get(),
+ convolve2_separable<InT, AccT, 1, Expand>(optr, tptr, r_filter.get(),
oDims, tDims, sDims, rflen,
oStrides, tStrides, r_filter.strides()[0]);
}
diff --git a/src/backend/cpu/kernel/copy.hpp b/src/backend/cpu/kernel/copy.hpp
index 063fb29..70d6705 100644
--- a/src/backend/cpu/kernel/copy.hpp
+++ b/src/backend/cpu/kernel/copy.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
namespace cpu
@@ -14,11 +16,9 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename T>
-void stridedCopy(T* dst, const dim4& ostrides, const T* src,
- const dim4 &dims, const dim4 &strides, unsigned dim)
+void stridedCopy(T* dst, af::dim4 const & ostrides, T const * src,
+ af::dim4 const & dims, af::dim4 const & strides, unsigned dim)
{
if(dim == 0) {
if(strides[dim] == 1) {
@@ -38,16 +38,16 @@ void stridedCopy(T* dst, const dim4& ostrides, const T* src,
}
}
-template<typename inType, typename outType>
-void copy(Array<outType> dst, const Array<inType> src, outType default_value, double factor)
+template<typename OutT, typename InT>
+void copy(Array<OutT> dst, Array<InT> const src, OutT default_value, double factor)
{
- dim4 src_dims = src.dims();
- dim4 dst_dims = dst.dims();
- dim4 src_strides = src.strides();
- dim4 dst_strides = dst.strides();
+ af::dim4 src_dims = src.dims();
+ af::dim4 dst_dims = dst.dims();
+ af::dim4 src_strides = src.strides();
+ af::dim4 dst_strides = dst.strides();
- const inType * src_ptr = src.get();
- outType * dst_ptr = dst.get();
+ InT const * const src_ptr = src.get();
+ OutT * dst_ptr = dst.get();
dim_t trgt_l = std::min(dst_dims[3], src_dims[3]);
dim_t trgt_k = std::min(dst_dims[2], src_dims[2]);
@@ -73,10 +73,10 @@ void copy(Array<outType> dst, const Array<inType> src, outType default_value, do
bool isJvalid = j<trgt_j;
for(dim_t i=0; i<dst_dims[0]; ++i) {
- outType temp = default_value;
+ OutT temp = default_value;
if (isLvalid && isKvalid && isJvalid && i<trgt_i) {
dim_t src_idx = i*src_strides[0] + src_joff + src_koff + src_loff;
- temp = outType(src_ptr[src_idx])*outType(factor);
+ temp = OutT(src_ptr[src_idx])*OutT(factor);
}
dim_t dst_idx = i*dst_strides[0] + dst_joff + dst_koff + dst_loff;
dst_ptr[dst_idx] = temp;
diff --git a/src/backend/cpu/kernel/diagonal.hpp b/src/backend/cpu/kernel/diagonal.hpp
index 596080b..0c81fc9 100644
--- a/src/backend/cpu/kernel/diagonal.hpp
+++ b/src/backend/cpu/kernel/diagonal.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
namespace cpu
@@ -14,16 +16,14 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename T>
void diagCreate(Array<T> out, Array<T> const in, int const num)
{
int batch = in.dims()[1];
int size = out.dims()[0];
- const T *iptr = in.get();
- T *optr = out.get();
+ T const * iptr = in.get();
+ T * optr = out.get();
for (int k = 0; k < batch; k++) {
for (int j = 0; j < size; j++) {
@@ -43,10 +43,10 @@ void diagCreate(Array<T> out, Array<T> const in, int const num)
template<typename T>
void diagExtract(Array<T> out, Array<T> const in, int const num)
{
- const dim4 odims = out.dims();
- const dim4 idims = in.dims();
+ dim4 const odims = out.dims();
+ dim4 const idims = in.dims();
- const int i_off = (num > 0) ? (num * in.strides()[1]) : (-num);
+ int const i_off = (num > 0) ? (num * in.strides()[1]) : (-num);
for (int l = 0; l < (int)odims[3]; l++) {
diff --git a/src/backend/cpu/kernel/diff.hpp b/src/backend/cpu/kernel/diff.hpp
index e0693b1..1a3d7ba 100644
--- a/src/backend/cpu/kernel/diff.hpp
+++ b/src/backend/cpu/kernel/diff.hpp
@@ -7,19 +7,16 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
+#include <utility.hpp>
namespace cpu
{
namespace kernel
{
-unsigned getIdx(af::dim4 strides, af::dim4 offs, int i, int j = 0, int k = 0, int l = 0)
-{
- return (l * strides[3] + k * strides[2] + j * strides[1] + i);
-}
-
-
template<typename T>
void diff1(Array<T> out, Array<T> const in, int const dim)
{
@@ -30,9 +27,8 @@ void diff1(Array<T> out, Array<T> const in, int const dim)
bool is_dim2 = dim == 2;
bool is_dim3 = dim == 3;
- // Get pointers to raw data
- const T *inPtr = in.get();
- T *outPtr = out.get();
+ T const * const inPtr = in.get();
+ T * outPtr = out.get();
// TODO: Improve this
for(dim_t l = 0; l < dims[3]; l++) {
@@ -40,11 +36,11 @@ void diff1(Array<T> out, Array<T> const in, int const dim)
for(dim_t j = 0; j < dims[1]; j++) {
for(dim_t i = 0; i < dims[0]; i++) {
// Operation: out[index] = in[index + 1 * dim_size] - in[index]
- int idx = getIdx(in.strides(), in.offsets(), i, j, k, l);
- int jdx = getIdx(in.strides(), in.offsets(),
+ int idx = getIdx(in.strides(), i, j, k, l);
+ int jdx = getIdx(in.strides(),
i + is_dim0, j + is_dim1,
k + is_dim2, l + is_dim3);
- int odx = getIdx(out.strides(), out.offsets(), i, j, k, l);
+ int odx = getIdx(out.strides(), i, j, k, l);
outPtr[odx] = inPtr[jdx] - inPtr[idx];
}
}
@@ -62,9 +58,8 @@ void diff2(Array<T> out, Array<T> const in, int const dim)
bool is_dim2 = dim == 2;
bool is_dim3 = dim == 3;
- // Get pointers to raw data
- const T *inPtr = in.get();
- T *outPtr = out.get();
+ T const * const inPtr = in.get();
+ T * outPtr = out.get();
// TODO: Improve this
for(dim_t l = 0; l < dims[3]; l++) {
@@ -72,14 +67,14 @@ void diff2(Array<T> out, Array<T> const in, int const dim)
for(dim_t j = 0; j < dims[1]; j++) {
for(dim_t i = 0; i < dims[0]; i++) {
// Operation: out[index] = in[index + 1 * dim_size] - in[index]
- int idx = getIdx(in.strides(), in.offsets(), i, j, k, l);
- int jdx = getIdx(in.strides(), in.offsets(),
+ int idx = getIdx(in.strides(), i, j, k, l);
+ int jdx = getIdx(in.strides(),
i + is_dim0, j + is_dim1,
k + is_dim2, l + is_dim3);
- int kdx = getIdx(in.strides(), in.offsets(),
+ int kdx = getIdx(in.strides(),
i + 2 * is_dim0, j + 2 * is_dim1,
k + 2 * is_dim2, l + 2 * is_dim3);
- int odx = getIdx(out.strides(), out.offsets(), i, j, k, l);
+ int odx = getIdx(out.strides(), i, j, k, l);
outPtr[odx] = inPtr[kdx] + inPtr[idx] - inPtr[jdx] - inPtr[jdx];
}
}
diff --git a/src/backend/cpu/kernel/fast.hpp b/src/backend/cpu/kernel/fast.hpp
index a3971dd..02da3e4 100644
--- a/src/backend/cpu/kernel/fast.hpp
+++ b/src/backend/cpu/kernel/fast.hpp
@@ -7,20 +7,16 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
+#include <utility.hpp>
namespace cpu
{
namespace kernel
{
-using af::dim4;
-
-inline int clamp(int f, int a, int b)
-{
- return std::max(a, std::min(f, b));
-}
-
inline int idx_y(int i)
{
if (i >= 8)
@@ -86,14 +82,14 @@ inline double abs_diff(double x, double y)
}
template<typename T>
-void locate_features(const Array<T> &in, Array<float> &score,
- Array<float> &x_out, Array<float> &y_out,
- Array<float> &score_out, unsigned* count, const float thr,
- const unsigned arc_length, const unsigned nonmax,
- const unsigned max_feat, const unsigned edge)
+void locate_features(Array<T> const & in, Array<float> & score,
+ Array<float> & x_out, Array<float> & y_out,
+ Array<float> & score_out, unsigned* count, float const thr,
+ unsigned const arc_length, unsigned const nonmax,
+ unsigned const max_feat, unsigned const edge)
{
- dim4 in_dims = in.dims();
- const T* in_ptr = in.get();
+ af::dim4 in_dims = in.dims();
+ T const * in_ptr = in.get();
for (int y = edge; y < (int)(in_dims[0] - edge); y++) {
for (int x = edge; x < (int)(in_dims[1] - edge); x++) {
@@ -179,15 +175,15 @@ void locate_features(const Array<T> &in, Array<float> &score,
}
}
-void non_maximal(const Array<float> &score, const Array<float> &x_in, const Array<float> &y_in,
- Array<float> &x_out, Array<float> &y_out, Array<float> &score_out,
- unsigned* count, const unsigned total_feat, const unsigned edge)
+void non_maximal(Array<float> const & score, const Array<float> & x_in, const Array<float> & y_in,
+ Array<float> & x_out, Array<float> & y_out, Array<float> & score_out,
+ unsigned* count, unsigned const total_feat, unsigned const edge)
{
- const float *score_ptr = score.get();
- const float *x_in_ptr = x_in.get();
- const float *y_in_ptr = y_in.get();
+ float const * score_ptr = score.get();
+ float const * x_in_ptr = x_in.get();
+ float const * y_in_ptr = y_in.get();
- dim4 score_dims = score.dims();
+ af::dim4 score_dims = score.dims();
for (unsigned k = 0; k < total_feat; k++) {
unsigned x = static_cast<unsigned>(round(x_in_ptr[k]));
diff --git a/src/backend/cpu/kernel/fftconvolve.hpp b/src/backend/cpu/kernel/fftconvolve.hpp
index 30bac66..6213cb2 100644
--- a/src/backend/cpu/kernel/fftconvolve.hpp
+++ b/src/backend/cpu/kernel/fftconvolve.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
#include <convolve_common.hpp>
@@ -15,8 +17,6 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename To, typename Ti>
void packData(Array<To> out, const af::dim4 od, const af::dim4 os, Array<Ti> const in)
{
@@ -95,12 +95,12 @@ void complexMultiply(Array<T> packed, const af::dim4 sig_dims, const af::dim4 si
T* in1_ptr = packed.get();
T* in2_ptr = packed.get() + offset;
- const dim4& od = (kind==CONVOLVE_BATCH_KERNEL ? fit_dims : sig_dims);
- const dim4& os = (kind==CONVOLVE_BATCH_KERNEL ? fit_strides : sig_strides);
- const dim4& i1d = sig_dims;
- const dim4& i2d = fit_dims;
- const dim4& i1s = sig_strides;
- const dim4& i2s = fit_strides;
+ const af::dim4& od = (kind==CONVOLVE_BATCH_KERNEL ? fit_dims : sig_dims);
+ const af::dim4& os = (kind==CONVOLVE_BATCH_KERNEL ? fit_strides : sig_strides);
+ const af::dim4& i1d = sig_dims;
+ const af::dim4& i2d = fit_dims;
+ const af::dim4& i1s = sig_strides;
+ const af::dim4& i2s = fit_strides;
for (int d3 = 0; d3 < (int)od[3]; d3++) {
for (int d2 = 0; d2 < (int)od[2]; d2++) {
diff --git a/src/backend/cpu/kernel/gradient.hpp b/src/backend/cpu/kernel/gradient.hpp
index c152fb3..1ab01ab 100644
--- a/src/backend/cpu/kernel/gradient.hpp
+++ b/src/backend/cpu/kernel/gradient.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
namespace cpu
diff --git a/src/backend/cpu/kernel/harris.hpp b/src/backend/cpu/kernel/harris.hpp
index db6551b..183cf37 100644
--- a/src/backend/cpu/kernel/harris.hpp
+++ b/src/backend/cpu/kernel/harris.hpp
@@ -7,7 +7,10 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
+#include <utility.hpp>
namespace cpu
{
@@ -15,24 +18,6 @@ namespace kernel
{
template<typename T>
-void gaussian1D(T* out, const int dim, double sigma=0.0)
-{
- if(!(sigma>0)) sigma = 0.25*dim;
-
- T sum = (T)0;
- for(int i=0;i<dim;i++)
- {
- int x = i-(dim-1)/2;
- T el = 1. / sqrt(2 * af::Pi * sigma*sigma) * exp(-((x*x)/(2*(sigma*sigma))));
- out[i] = el;
- sum += el;
- }
-
- for(int k=0;k<dim;k++)
- out[k] /= sum;
-}
-
-template<typename T>
void second_order_deriv(Array<T> ixx, Array<T> ixy, Array<T> iyy,
const unsigned in_len, const Array<T> ix, const Array<T> iy)
{
diff --git a/src/backend/cpu/kernel/histogram.hpp b/src/backend/cpu/kernel/histogram.hpp
index e26965a..9b9b897 100644
--- a/src/backend/cpu/kernel/histogram.hpp
+++ b/src/backend/cpu/kernel/histogram.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
namespace cpu
@@ -14,8 +16,8 @@ namespace cpu
namespace kernel
{
-template<typename inType, typename outType, bool isLinear>
-void histogram(Array<outType> out, Array<inType> const in,
+template<typename OutT, typename InT, bool IsLinear>
+void histogram(Array<OutT> out, Array<InT> const in,
unsigned const nbins, double const minval, double const maxval)
{
dim4 const outDims = out.dims();
@@ -25,13 +27,13 @@ void histogram(Array<outType> out, Array<inType> const in,
dim4 const oStrides = out.strides();
dim_t const nElems = inDims[0]*inDims[1];
- outType *outData = out.get();
- const inType* inData= in.get();
+ OutT *outData = out.get();
+ const InT* inData= in.get();
for(dim_t b3 = 0; b3 < outDims[3]; b3++) {
for(dim_t b2 = 0; b2 < outDims[2]; b2++) {
for(dim_t i=0; i<nElems; i++) {
- int idx = isLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]);
+ int idx = IsLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]);
int bin = (int)((inData[idx] - minval) / step);
bin = std::max(bin, 0);
bin = std::min(bin, (int)(nbins - 1));
diff --git a/src/backend/cpu/kernel/hsv_rgb.hpp b/src/backend/cpu/kernel/hsv_rgb.hpp
index d8aa954..c1f59a1 100644
--- a/src/backend/cpu/kernel/hsv_rgb.hpp
+++ b/src/backend/cpu/kernel/hsv_rgb.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
#include <cmath>
@@ -15,13 +17,11 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename T>
void hsv2rgb(Array<T> out, Array<T> const in)
{
- const dim4 dims = in.dims();
- const dim4 strides = in.strides();
+ const af::dim4 dims = in.dims();
+ const af::dim4 strides = in.strides();
dim_t obStride = out.strides()[3];
dim_t coff = strides[2];
dim_t bCount = dims[3];
@@ -72,9 +72,9 @@ void hsv2rgb(Array<T> out, Array<T> const in)
template<typename T>
void rgb2hsv(Array<T> out, Array<T> const in)
{
- const dim4 dims = in.dims();
- const dim4 strides = in.strides();
- dim4 oStrides = out.strides();
+ const af::dim4 dims = in.dims();
+ const af::dim4 strides = in.strides();
+ af::dim4 oStrides = out.strides();
dim_t bCount = dims[3];
for(dim_t b=0; b<bCount; ++b) {
diff --git a/src/backend/cpu/kernel/identity.hpp b/src/backend/cpu/kernel/identity.hpp
index 9eab13b..242ba9d 100644
--- a/src/backend/cpu/kernel/identity.hpp
+++ b/src/backend/cpu/kernel/identity.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
#include <math.hpp>
@@ -15,13 +17,11 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename T>
void identity(Array<T> out)
{
T *ptr = out.get();
- const dim4 out_dims = out.dims();
+ const af::dim4 out_dims = out.dims();
for (dim_t k = 0; k < out_dims[2] * out_dims[3]; k++) {
for (dim_t j = 0; j < out_dims[1]; j++) {
diff --git a/src/backend/cpu/kernel/iir.hpp b/src/backend/cpu/kernel/iir.hpp
index d1ca464..5182094 100644
--- a/src/backend/cpu/kernel/iir.hpp
+++ b/src/backend/cpu/kernel/iir.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <Array.hpp>
namespace cpu
@@ -14,8 +16,6 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename T>
void iir(Array<T> y, Array<T> c, Array<T> const a)
{
diff --git a/src/backend/cpu/kernel/index.hpp b/src/backend/cpu/kernel/index.hpp
index ee20c24..343d7ae 100644
--- a/src/backend/cpu/kernel/index.hpp
+++ b/src/backend/cpu/kernel/index.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <vector>
#include <Array.hpp>
#include <utility.hpp>
@@ -16,19 +18,17 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
template<typename T>
void index(Array<T> out, Array<T> const in,
std::vector<bool> const isSeq, std::vector<af_seq> const seqs,
std::vector< Array<uint> > const idxArrs)
{
- const dim4 iDims = in.dims();
- const dim4 dDims = in.getDataDims();
- const dim4 iOffs = toOffset(seqs, dDims);
- const dim4 iStrds = toStride(seqs, dDims);
- const dim4 oDims = out.dims();
- const dim4 oStrides = out.strides();
+ const af::dim4 iDims = in.dims();
+ const af::dim4 dDims = in.getDataDims();
+ const af::dim4 iOffs = toOffset(seqs, dDims);
+ const af::dim4 iStrds = toStride(seqs, dDims);
+ const af::dim4 oDims = out.dims();
+ const af::dim4 oStrides = out.strides();
const T *src = in.get();
T *dst = out.get();
const uint* ptr0 = idxArrs[0].get();
diff --git a/src/backend/cpu/kernel/lookup.hpp b/src/backend/cpu/kernel/lookup.hpp
index 551cd2f..a290ef2 100644
--- a/src/backend/cpu/kernel/lookup.hpp
+++ b/src/backend/cpu/kernel/lookup.hpp
@@ -7,6 +7,8 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
+#pragma once
+#include <af/defines.h>
#include <vector>
#include <Array.hpp>
#include <utility.hpp>
@@ -16,20 +18,18 @@ namespace cpu
namespace kernel
{
-using af::dim4;
-
-template<typename in_t, typename idx_t>
-void lookup(Array<in_t> out, Array<in_t> const input,
- Array<idx_t> const indices, unsigned const dim)
+template<typename InT, typename IndexT>
+void lookup(Array<InT> out, Array<InT> const input,
+ Array<IndexT> const indices, unsigned const dim)
{
- const dim4 iDims = input.dims();
- const dim4 oDims = out.dims();
- const dim4 iStrides = input.strides();
- const dim4 oStrides = out.strides();
- const in_t *inPtr = input.get();
- const idx_t *idxPtr = indices.get();
-
- in_t *outPtr = out.get();
+ const af::dim4 iDims = input.dims();
+ const af::dim4 oDims = out.dims();
+ const af::dim4 iStrides = input.strides();
+ const af::dim4 oStrides = out.strides();
+ const InT *inPtr = input.get();
+ const IndexT *idxPtr = indices.get();
+
+ InT *outPtr = out.get();
for (dim_t l=0; l<oDims[3]; ++l) {
diff --git a/src/backend/cpu/utility.hpp b/src/backend/cpu/utility.hpp
index 18a38f3..ed8bbd7 100644
--- a/src/backend/cpu/utility.hpp
+++ b/src/backend/cpu/utility.hpp
@@ -9,13 +9,16 @@
#pragma once
#include <af/defines.h>
+#include <af/constants.h>
+#include <cmath>
+#include <algorithm>
#include "backend.hpp"
namespace cpu
{
static inline
-dim_t trimIndex(const int &idx, const dim_t &len)
+dim_t trimIndex(int const & idx, dim_t const & len)
{
int ret_val = idx;
int offset = abs(ret_val)%len;
@@ -27,4 +30,34 @@ dim_t trimIndex(const int &idx, const dim_t &len)
return ret_val;
}
+static inline
+dim_t clamp(int a, dim_t mn, dim_t mx)
+{
+ return (a < (int)mn ? mn : (a > (int)mx ? mx : a));
+}
+
+static inline
+unsigned getIdx(af::dim4 const & strides, int i, int j = 0, int k = 0, int l = 0)
+{
+ return (l * strides[3] + k * strides[2] + j * strides[1] + i * strides[0]);
+}
+
+template<typename T>
+void gaussian1D(T* out, int const dim, double sigma=0.0)
+{
+ if(!(sigma>0)) sigma = 0.25*dim;
+
+ T sum = (T)0;
+ for(int i=0;i<dim;i++)
+ {
+ int x = i-(dim-1)/2;
+ T el = 1. / std::sqrt(2 * af::Pi * sigma*sigma) * std::exp(-((x*x)/(2*(sigma*sigma))));
+ out[i] = el;
+ sum += el;
+ }
+
+ for(int k=0;k<dim;k++)
+ out[k] /= sum;
+}
+
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list