[arrayfire] 47/284: conversion of listed functions to async calls
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:18 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 35a462c08b86dcb107f6df84428adb6dea749636
Author: pradeep <pradeep at arrayfire.com>
Date: Wed Dec 2 14:38:49 2015 -0500
conversion of listed functions to async calls
* gradient
* histogram
* hsv2rgb
* rgb2hsv
* identity
* inverse
* iota
* lookup
---
src/backend/cpu/gradient.cpp | 26 ++++--
src/backend/cpu/histogram.cpp | 44 ++++++----
src/backend/cpu/hsv_rgb.cpp | 192 ++++++++++++++++++++++--------------------
src/backend/cpu/identity.cpp | 44 +++++-----
src/backend/cpu/inverse.cpp | 13 ++-
src/backend/cpu/iota.cpp | 85 ++++++++++---------
src/backend/cpu/lookup.cpp | 50 ++++++-----
7 files changed, 258 insertions(+), 196 deletions(-)
diff --git a/src/backend/cpu/gradient.cpp b/src/backend/cpu/gradient.cpp
index 8ab2fe4..504c02a 100644
--- a/src/backend/cpu/gradient.cpp
+++ b/src/backend/cpu/gradient.cpp
@@ -12,12 +12,20 @@
#include <math.hpp>
#include <stdexcept>
#include <err_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
- template<typename T>
- void gradient(Array<T> &grad0, Array<T> &grad1, const Array<T> &in)
- {
+
+template<typename T>
+void gradient(Array<T> &grad0, Array<T> &grad1, const Array<T> &in)
+{
+ grad0.eval();
+ grad1.eval();
+ in.eval();
+
+ auto func = [=] (Array<T> grad0, Array<T> grad1, const Array<T> in) {
const af::dim4 dims = in.dims();
T *d_grad0 = grad0.get();
@@ -82,13 +90,15 @@ namespace cpu
}
}
}
- }
+ };
+ getQueue().enqueue(func, grad0, grad1, in);
+}
#define INSTANTIATE(T) \
template void gradient<T>(Array<T> &grad0, Array<T> &grad1, const Array<T> &in); \
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(cfloat)
- INSTANTIATE(cdouble)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
}
diff --git a/src/backend/cpu/histogram.cpp b/src/backend/cpu/histogram.cpp
index e382a0e..8fb3e43 100644
--- a/src/backend/cpu/histogram.cpp
+++ b/src/backend/cpu/histogram.cpp
@@ -12,6 +12,8 @@
#include <ArrayInfo.hpp>
#include <Array.hpp>
#include <histogram.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -21,31 +23,39 @@ namespace cpu
template<typename inType, typename outType, bool isLinear>
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval)
{
- float step = (maxval - minval)/(float)nbins;
+ in.eval();
const dim4 inDims = in.dims();
- dim4 iStrides = in.strides();
dim4 outDims = dim4(nbins,1,inDims[2],inDims[3]);
Array<outType> out = createValueArray<outType>(outDims, outType(0));
- dim4 oStrides = out.strides();
- dim_t nElems = inDims[0]*inDims[1];
+ out.eval();
- outType *outData = out.get();
- const inType* inData= in.get();
+ auto func = [=](Array<outType> out, const Array<inType> in,
+ const unsigned nbins, const double minval, const double maxval) {
+ const float step = (maxval - minval)/(float)nbins;
+ const dim4 inDims = in.dims();
+ const dim4 iStrides = in.strides();
+ const dim4 oStrides = out.strides();
+ const dim_t nElems = inDims[0]*inDims[1];
- for(dim_t b3 = 0; b3 < outDims[3]; b3++) {
- for(dim_t b2 = 0; b2 < outDims[2]; b2++) {
- for(dim_t i=0; i<nElems; i++) {
- int idx = isLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]);
- int bin = (int)((inData[idx] - minval) / step);
- bin = std::max(bin, 0);
- bin = std::min(bin, (int)(nbins - 1));
- outData[bin]++;
+ outType *outData = out.get();
+ const inType* inData= in.get();
+
+ for(dim_t b3 = 0; b3 < outDims[3]; b3++) {
+ for(dim_t b2 = 0; b2 < outDims[2]; b2++) {
+ for(dim_t i=0; i<nElems; i++) {
+ int idx = isLinear ? i : ((i % inDims[0]) + (i / inDims[0])*iStrides[1]);
+ int bin = (int)((inData[idx] - minval) / step);
+ bin = std::max(bin, 0);
+ bin = std::min(bin, (int)(nbins - 1));
+ outData[bin]++;
+ }
+ inData += iStrides[2];
+ outData += oStrides[2];
}
- inData += iStrides[2];
- outData += oStrides[2];
}
- }
+ };
+ getQueue().enqueue(func, out, in, nbins, minval, maxval);
return out;
}
diff --git a/src/backend/cpu/hsv_rgb.cpp b/src/backend/cpu/hsv_rgb.cpp
index 82f404f..d20416f 100644
--- a/src/backend/cpu/hsv_rgb.cpp
+++ b/src/backend/cpu/hsv_rgb.cpp
@@ -13,6 +13,8 @@
#include <hsv_rgb.hpp>
#include <err_cpu.hpp>
#include <cmath>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -22,54 +24,60 @@ namespace cpu
template<typename T>
Array<T> hsv2rgb(const Array<T>& in)
{
- const dim4 dims = in.dims();
- const dim4 strides = in.strides();
- Array<T> out = createEmptyArray<T>(dims);
- dim_t obStride = out.strides()[3];
- dim_t coff = strides[2];
- dim_t bCount = dims[3];
-
- for(dim_t b=0; b<bCount; ++b) {
- const T* src = in.get() + b * strides[3];
- T* dst = out.get() + b * obStride;
-
- for(dim_t j=0; j<dims[1]; ++j) {
- dim_t jOff = j*strides[1];
- // j steps along 2nd dimension
- for(dim_t i=0; i<dims[0]; ++i) {
- // i steps along 1st dimension
- dim_t hIdx = i*strides[0] + jOff;
- dim_t sIdx = hIdx + coff;
- dim_t vIdx = sIdx + coff;
-
- T H = src[hIdx];
- T S = src[sIdx];
- T V = src[vIdx];
-
- T R, G, B;
- R = G = B = 0;
-
- int m = (int)(H * 6);
- T f = H * 6 - m;
- T p = V * (1 - S);
- T q = V * (1 - f * S);
- T t = V * (1 - (1 - f) * S);
-
- switch (m % 6) {
- case 0: R = V, G = t, B = p; break;
- case 1: R = q, G = V, B = p; break;
- case 2: R = p, G = V, B = t; break;
- case 3: R = p, G = q, B = V; break;
- case 4: R = t, G = p, B = V; break;
- case 5: R = V, G = p, B = q; break;
+ in.eval();
+
+ Array<T> out = createEmptyArray<T>(in.dims());
+
+ auto func = [=](Array<T> out, const Array<T> in) {
+ const dim4 dims = in.dims();
+ const dim4 strides = in.strides();
+ dim_t obStride = out.strides()[3];
+ dim_t coff = strides[2];
+ dim_t bCount = dims[3];
+
+ for(dim_t b=0; b<bCount; ++b) {
+ const T* src = in.get() + b * strides[3];
+ T* dst = out.get() + b * obStride;
+
+ for(dim_t j=0; j<dims[1]; ++j) {
+ dim_t jOff = j*strides[1];
+ // j steps along 2nd dimension
+ for(dim_t i=0; i<dims[0]; ++i) {
+ // i steps along 1st dimension
+ dim_t hIdx = i*strides[0] + jOff;
+ dim_t sIdx = hIdx + coff;
+ dim_t vIdx = sIdx + coff;
+
+ T H = src[hIdx];
+ T S = src[sIdx];
+ T V = src[vIdx];
+
+ T R, G, B;
+ R = G = B = 0;
+
+ int m = (int)(H * 6);
+ T f = H * 6 - m;
+ T p = V * (1 - S);
+ T q = V * (1 - f * S);
+ T t = V * (1 - (1 - f) * S);
+
+ switch (m % 6) {
+ case 0: R = V, G = t, B = p; break;
+ case 1: R = q, G = V, B = p; break;
+ case 2: R = p, G = V, B = t; break;
+ case 3: R = p, G = q, B = V; break;
+ case 4: R = t, G = p, B = V; break;
+ case 5: R = V, G = p, B = q; break;
+ }
+
+ dst[hIdx] = R;
+ dst[sIdx] = G;
+ dst[vIdx] = B;
}
-
- dst[hIdx] = R;
- dst[sIdx] = G;
- dst[vIdx] = B;
}
}
- }
+ };
+ getQueue().enqueue(func, out, in);
return out;
}
@@ -77,53 +85,59 @@ Array<T> hsv2rgb(const Array<T>& in)
template<typename T>
Array<T> rgb2hsv(const Array<T>& in)
{
- const dim4 dims = in.dims();
- const dim4 strides = in.strides();
- Array<T> out = createEmptyArray<T>(dims);
- dim4 oStrides = out.strides();
- dim_t bCount = dims[3];
-
- for(dim_t b=0; b<bCount; ++b) {
- const T* src = in.get() + b * strides[3];
- T* dst = out.get() + b * oStrides[3];
-
- for(dim_t j=0; j<dims[1]; ++j) {
- // j steps along 2nd dimension
- dim_t oj = j * oStrides[1];
- dim_t ij = j * strides[1];
-
- for(dim_t i=0; i<dims[0]; ++i) {
- // i steps along 1st dimension
- dim_t oIdx0 = i * oStrides[0] + oj;
- dim_t oIdx1 = oIdx0 + oStrides[2];
- dim_t oIdx2 = oIdx1 + oStrides[2];
-
- dim_t iIdx0 = i * strides[0] + ij;
- dim_t iIdx1 = iIdx0 + strides[2];
- dim_t iIdx2 = iIdx1 + strides[2];
-
- T R = src[iIdx0];
- T G = src[iIdx1];
- T B = src[iIdx2];
- T Cmax = std::max(std::max(R, G), B);
- T Cmin = std::min(std::min(R, G), B);
- T delta= Cmax-Cmin;
-
- T H = 0;
-
- if (Cmax!=Cmin) {
- if (Cmax==R) H = (G-B)/delta + (G<B ? 6 : 0);
- if (Cmax==G) H = (B-R)/delta + 2;
- if (Cmax==B) H = (R-G)/delta + 4;
- H = H / 6.0f;
+ in.eval();
+
+ Array<T> out = createEmptyArray<T>(in.dims());
+
+ auto func = [=](Array<T> out, const Array<T> in) {
+ const dim4 dims = in.dims();
+ const dim4 strides = in.strides();
+ dim4 oStrides = out.strides();
+ dim_t bCount = dims[3];
+
+ for(dim_t b=0; b<bCount; ++b) {
+ const T* src = in.get() + b * strides[3];
+ T* dst = out.get() + b * oStrides[3];
+
+ for(dim_t j=0; j<dims[1]; ++j) {
+ // j steps along 2nd dimension
+ dim_t oj = j * oStrides[1];
+ dim_t ij = j * strides[1];
+
+ for(dim_t i=0; i<dims[0]; ++i) {
+ // i steps along 1st dimension
+ dim_t oIdx0 = i * oStrides[0] + oj;
+ dim_t oIdx1 = oIdx0 + oStrides[2];
+ dim_t oIdx2 = oIdx1 + oStrides[2];
+
+ dim_t iIdx0 = i * strides[0] + ij;
+ dim_t iIdx1 = iIdx0 + strides[2];
+ dim_t iIdx2 = iIdx1 + strides[2];
+
+ T R = src[iIdx0];
+ T G = src[iIdx1];
+ T B = src[iIdx2];
+ T Cmax = std::max(std::max(R, G), B);
+ T Cmin = std::min(std::min(R, G), B);
+ T delta= Cmax-Cmin;
+
+ T H = 0;
+
+ if (Cmax!=Cmin) {
+ if (Cmax==R) H = (G-B)/delta + (G<B ? 6 : 0);
+ if (Cmax==G) H = (B-R)/delta + 2;
+ if (Cmax==B) H = (R-G)/delta + 4;
+ H = H / 6.0f;
+ }
+
+ dst[oIdx0] = H;
+ dst[oIdx1] = (Cmax==0.0f ? 0 : delta/Cmax);
+ dst[oIdx2] = Cmax;
}
-
- dst[oIdx0] = H;
- dst[oIdx1] = (Cmax==0.0f ? 0 : delta/Cmax);
- dst[oIdx2] = Cmax;
}
}
- }
+ };
+ getQueue().enqueue(func, out, in);
return out;
}
diff --git a/src/backend/cpu/identity.cpp b/src/backend/cpu/identity.cpp
index 2973ae4..f7236bd 100644
--- a/src/backend/cpu/identity.cpp
+++ b/src/backend/cpu/identity.cpp
@@ -13,13 +13,17 @@
#include <Array.hpp>
#include <identity.hpp>
#include <math.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
- template<typename T>
- Array<T> identity(const dim4& dims)
- {
- Array<T> out = createEmptyArray<T>(dims);
+template<typename T>
+Array<T> identity(const dim4& dims)
+{
+ Array<T> out = createEmptyArray<T>(dims);
+
+ auto func = [=] (Array<T> out) {
T *ptr = out.get();
const dim_t *out_dims = out.dims().get();
@@ -31,23 +35,25 @@ namespace cpu
}
ptr += out_dims[0] * out_dims[1];
}
- return out;
- }
+ };
+ getQueue().enqueue(func, out);
+
+ return out;
+}
#define INSTANTIATE_IDENTITY(T) \
template Array<T> identity<T> (const af::dim4 &dims);
- INSTANTIATE_IDENTITY(float)
- INSTANTIATE_IDENTITY(double)
- INSTANTIATE_IDENTITY(cfloat)
- INSTANTIATE_IDENTITY(cdouble)
- INSTANTIATE_IDENTITY(int)
- INSTANTIATE_IDENTITY(uint)
- INSTANTIATE_IDENTITY(intl)
- INSTANTIATE_IDENTITY(uintl)
- INSTANTIATE_IDENTITY(char)
- INSTANTIATE_IDENTITY(uchar)
- INSTANTIATE_IDENTITY(short)
- INSTANTIATE_IDENTITY(ushort)
-
+INSTANTIATE_IDENTITY(float)
+INSTANTIATE_IDENTITY(double)
+INSTANTIATE_IDENTITY(cfloat)
+INSTANTIATE_IDENTITY(cdouble)
+INSTANTIATE_IDENTITY(int)
+INSTANTIATE_IDENTITY(uint)
+INSTANTIATE_IDENTITY(intl)
+INSTANTIATE_IDENTITY(uintl)
+INSTANTIATE_IDENTITY(char)
+INSTANTIATE_IDENTITY(uchar)
+INSTANTIATE_IDENTITY(short)
+INSTANTIATE_IDENTITY(ushort)
}
diff --git a/src/backend/cpu/inverse.cpp b/src/backend/cpu/inverse.cpp
index 129823b..987ba01 100644
--- a/src/backend/cpu/inverse.cpp
+++ b/src/backend/cpu/inverse.cpp
@@ -23,6 +23,8 @@
#include <lu.hpp>
#include <identity.hpp>
#include <solve.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
@@ -48,6 +50,7 @@ INV_FUNC(getri , cdouble, z)
template<typename T>
Array<T> inverse(const Array<T> &in)
{
+ in.eval();
int M = in.dims()[0];
int N = in.dims()[1];
@@ -58,12 +61,14 @@ Array<T> inverse(const Array<T> &in)
}
Array<T> A = copyArray<T>(in);
-
Array<int> pivot = lu_inplace<T>(A, false);
- getri_func<T>()(AF_LAPACK_COL_MAJOR, M,
- A.get(), A.strides()[1],
- pivot.get());
+ auto func = [=] (Array<T> A, Array<int> pivot, int M) {
+ getri_func<T>()(AF_LAPACK_COL_MAJOR, M,
+ A.get(), A.strides()[1],
+ pivot.get());
+ };
+ getQueue().enqueue(func, A, pivot, M);
return A;
}
diff --git a/src/backend/cpu/iota.cpp b/src/backend/cpu/iota.cpp
index 47bcb92..170b6a1 100644
--- a/src/backend/cpu/iota.cpp
+++ b/src/backend/cpu/iota.cpp
@@ -14,59 +14,66 @@
#include <err_cpu.hpp>
#include <algorithm>
#include <numeric>
+#include <platform.hpp>
+#include <async_queue.hpp>
using namespace std;
namespace cpu
{
- ///////////////////////////////////////////////////////////////////////////
- // Kernel Functions
- ///////////////////////////////////////////////////////////////////////////
- template<typename T>
- void iota(T *out, const dim4 &dims, const dim4 &strides, const dim4 &sdims, const dim4 &tdims)
- {
- for(dim_t w = 0; w < dims[3]; w++) {
- dim_t offW = w * strides[3];
- T valW = (w % sdims[3]) * sdims[0] * sdims[1] * sdims[2];
- for(dim_t z = 0; z < dims[2]; z++) {
- dim_t offWZ = offW + z * strides[2];
- T valZ = valW + (z % sdims[2]) * sdims[0] * sdims[1];
- for(dim_t y = 0; y < dims[1]; y++) {
- dim_t offWZY = offWZ + y * strides[1];
- T valY = valZ + (y % sdims[1]) * sdims[0];
- for(dim_t x = 0; x < dims[0]; x++) {
- dim_t id = offWZY + x;
- out[id] = valY + (x % sdims[0]);
- }
+///////////////////////////////////////////////////////////////////////////
+// Kernel Functions
+///////////////////////////////////////////////////////////////////////////
+template<typename T>
+void iota_(Array<T> output, const dim4 &sdims, const dim4 &tdims)
+{
+ const dim4 dims = output.dims();
+ T* out = output.get();
+ const dim4 strides = output.strides();
+
+ for(dim_t w = 0; w < dims[3]; w++) {
+ dim_t offW = w * strides[3];
+ T valW = (w % sdims[3]) * sdims[0] * sdims[1] * sdims[2];
+ for(dim_t z = 0; z < dims[2]; z++) {
+ dim_t offWZ = offW + z * strides[2];
+ T valZ = valW + (z % sdims[2]) * sdims[0] * sdims[1];
+ for(dim_t y = 0; y < dims[1]; y++) {
+ dim_t offWZY = offWZ + y * strides[1];
+ T valY = valZ + (y % sdims[1]) * sdims[0];
+ for(dim_t x = 0; x < dims[0]; x++) {
+ dim_t id = offWZY + x;
+ out[id] = valY + (x % sdims[0]);
}
}
}
}
+}
- ///////////////////////////////////////////////////////////////////////////
- // Wrapper Functions
- ///////////////////////////////////////////////////////////////////////////
- template<typename T>
- Array<T> iota(const dim4 &dims, const dim4 &tile_dims)
- {
- dim4 outdims = dims * tile_dims;
+///////////////////////////////////////////////////////////////////////////
+// Wrapper Functions
+///////////////////////////////////////////////////////////////////////////
+template<typename T>
+Array<T> iota(const dim4 &dims, const dim4 &tile_dims)
+{
+ dim4 outdims = dims * tile_dims;
- Array<T> out = createEmptyArray<T>(outdims);
- iota<T>(out.get(), out.dims(), out.strides(), dims, tile_dims);
+ Array<T> out = createEmptyArray<T>(outdims);
- return out;
- }
+ getQueue().enqueue(iota_<T>, out, dims, tile_dims);
+
+ return out;
+}
#define INSTANTIATE(T) \
template Array<T> iota<T>(const af::dim4 &dims, const af::dim4 &tile_dims); \
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(int)
- INSTANTIATE(uint)
- INSTANTIATE(intl)
- INSTANTIATE(uintl)
- INSTANTIATE(uchar)
- INSTANTIATE(short)
- INSTANTIATE(ushort)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cpu/lookup.cpp b/src/backend/cpu/lookup.cpp
index 128cc02..0aeee4d 100644
--- a/src/backend/cpu/lookup.cpp
+++ b/src/backend/cpu/lookup.cpp
@@ -10,6 +10,8 @@
#include <lookup.hpp>
#include <err_cpu.hpp>
#include <cstdlib>
+#include <platform.hpp>
+#include <async_queue.hpp>
namespace cpu
{
@@ -30,11 +32,10 @@ dim_t trimIndex(int idx, const dim_t &len)
template<typename in_t, typename idx_t>
Array<in_t> lookup(const Array<in_t> &input, const Array<idx_t> &indices, const unsigned dim)
{
- const dim4 iDims = input.dims();
- const dim4 iStrides = input.strides();
+ input.eval();
+ indices.eval();
- const in_t *inPtr = input.get();
- const idx_t *idxPtr = indices.get();
+ const dim4 iDims = input.dims();
dim4 oDims(1);
for (int d=0; d<4; ++d)
@@ -42,35 +43,44 @@ Array<in_t> lookup(const Array<in_t> &input, const Array<idx_t> &indices, const
Array<in_t> out = createEmptyArray<in_t>(oDims);
- dim4 oStrides = out.strides();
+ auto func = [=] (Array<in_t> out, const Array<in_t> input,
+ const Array<idx_t> indices, const unsigned dim) {
+ const dim4 iDims = input.dims();
+ const dim4 oDims = out.dims();
+ const dim4 iStrides = input.strides();
+ const dim4 oStrides = out.strides();
+ const in_t *inPtr = input.get();
+ const idx_t *idxPtr = indices.get();
- in_t *outPtr = out.get();
+ in_t *outPtr = out.get();
- for (dim_t l=0; l<oDims[3]; ++l) {
+ for (dim_t l=0; l<oDims[3]; ++l) {
- dim_t iLOff = iStrides[3]*(dim==3 ? trimIndex((dim_t)idxPtr[l], iDims[3]): l);
- dim_t oLOff = l*oStrides[3];
+ dim_t iLOff = iStrides[3]*(dim==3 ? trimIndex((dim_t)idxPtr[l], iDims[3]): l);
+ dim_t oLOff = l*oStrides[3];
- for (dim_t k=0; k<oDims[2]; ++k) {
+ for (dim_t k=0; k<oDims[2]; ++k) {
- dim_t iKOff = iStrides[2]*(dim==2 ? trimIndex((dim_t)idxPtr[k], iDims[2]): k);
- dim_t oKOff = k*oStrides[2];
+ dim_t iKOff = iStrides[2]*(dim==2 ? trimIndex((dim_t)idxPtr[k], iDims[2]): k);
+ dim_t oKOff = k*oStrides[2];
- for (dim_t j=0; j<oDims[1]; ++j) {
+ for (dim_t j=0; j<oDims[1]; ++j) {
- dim_t iJOff = iStrides[1]*(dim==1 ? trimIndex((dim_t)idxPtr[j], iDims[1]): j);
- dim_t oJOff = j*oStrides[1];
+ dim_t iJOff = iStrides[1]*(dim==1 ? trimIndex((dim_t)idxPtr[j], iDims[1]): j);
+ dim_t oJOff = j*oStrides[1];
- for (dim_t i=0; i<oDims[0]; ++i) {
+ for (dim_t i=0; i<oDims[0]; ++i) {
- dim_t iIOff = iStrides[0]*(dim==0 ? trimIndex((dim_t)idxPtr[i], iDims[0]): i);
- dim_t oIOff = i*oStrides[0];
+ dim_t iIOff = iStrides[0]*(dim==0 ? trimIndex((dim_t)idxPtr[i], iDims[0]): i);
+ dim_t oIOff = i*oStrides[0];
- outPtr[oLOff+oKOff+oJOff+oIOff] = inPtr[iLOff+iKOff+iJOff+iIOff];
+ outPtr[oLOff+oKOff+oJOff+oIOff] = inPtr[iLOff+iKOff+iJOff+iIOff];
+ }
}
}
}
- }
+ };
+ getQueue().enqueue(func, out, input, indices, dim);
return out;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list