[arrayfire] 66/284: Fixed harris & homography cpu fns to work with async fns
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:19 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 21f74eb706752c10901b3988cb709c865904cb72
Author: pradeep <pradeep at arrayfire.com>
Date: Thu Dec 10 13:47:01 2015 -0500
Fixed harris & homography cpu fns to work with async fns
src/backend/cpu/harris.cpp | 143 +++++++++++++++++++++--------------------
src/backend/cpu/homography.cpp | 32 +++++----
2 files changed, 91 insertions(+), 84 deletions(-)
diff --git a/src/backend/cpu/harris.cpp b/src/backend/cpu/harris.cpp
index d16c56a..b57b940 100644
--- a/src/backend/cpu/harris.cpp
+++ b/src/backend/cpu/harris.cpp
@@ -19,6 +19,8 @@
#include <gradient.hpp>
#include <sort_index.hpp>
#include <cstring>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -44,14 +46,14 @@ void gaussian1D(T* out, const int dim, double sigma=0.0)
template<typename T>
-void second_order_deriv(
- T* ixx_out,
- T* ixy_out,
- T* iyy_out,
- const unsigned in_len,
- const T* ix_in,
- const T* iy_in)
+void second_order_deriv(Array<T> ixx, Array<T> ixy, Array<T> iyy,
+ const unsigned in_len, const Array<T> ix, const Array<T> iy)
+ T* ixx_out = ixx.get();
+ T* ixy_out = ixy.get();
+ T* iyy_out = iyy.get();
+ const T* ix_in = ix.get();
+ const T* iy_in = iy.get();
for (unsigned x = 0; x < in_len; x++) {
ixx_out[x] = ix_in[x] * ix_in[x];
ixy_out[x] = ix_in[x] * iy_in[x];
@@ -60,16 +62,14 @@ void second_order_deriv(
template<typename T>
-void harris_responses(
- T* resp_out,
- const unsigned idim0,
- const unsigned idim1,
- const T* ixx_in,
- const T* ixy_in,
- const T* iyy_in,
- const float k_thr,
- const unsigned border_len)
+void harris_responses(Array<T> resp, const unsigned idim0, const unsigned idim1,
+ const Array<T> ixx, const Array<T> ixy, const Array<T> iyy,
+ const float k_thr, const unsigned border_len)
+ T* resp_out = resp.get();
+ const T* ixx_in = ixx.get();
+ const T* ixy_in = ixy.get();
+ const T* iyy_in = iyy.get();
const unsigned r = border_len;
for (unsigned x = r; x < idim1 - r; x++) {
@@ -87,18 +87,14 @@ void harris_responses(
template<typename T>
-void non_maximal(
- float* x_out,
- float* y_out,
- float* resp_out,
- unsigned* count,
- const unsigned idim0,
- const unsigned idim1,
- const T* resp_in,
- const float min_resp,
- const unsigned border_len,
- const unsigned max_corners)
+void non_maximal(Array<float> xOut, Array<float> yOut, Array<float> respOut, unsigned* count,
+ const unsigned idim0, const unsigned idim1, const Array<T> respIn,
+ const float min_resp, const unsigned border_len, const unsigned max_corners)
+ float* x_out = xOut.get();
+ float* y_out = yOut.get();
+ float* resp_out = respOut.get();
+ const T* resp_in = respIn.get();
// Responses on the border don't have 8-neighbors to compare, discard them
const unsigned r = border_len + 1;
@@ -131,10 +127,19 @@ void non_maximal(
-static void keep_corners(float* x_out, float* y_out, float* resp_out,
- const float* x_in, const float* y_in, const float* resp_in,
- const unsigned* resp_idx, const unsigned n_corners)
+static void keep_corners(Array<float> xOut, Array<float> yOut, Array<float> respOut,
+ const Array<float> xIn, const Array<float> yIn,
+ const Array<float> respIn, const Array<unsigned> respIdx,
+ const unsigned n_corners)
+ float* x_out = xOut.get();
+ float* y_out = yOut.get();
+ float* resp_out = respOut.get();
+ const float* x_in = xIn.get();
+ const float* y_in = yIn.get();
+ const float* resp_in = respIn.get();
+ const uint* resp_idx = respIdx.get();
// Keep only the first n_feat features
for (unsigned f = 0; f < n_corners; f++) {
x_out[f] = x_in[resp_idx[f]];
@@ -148,6 +153,8 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
const Array<T> &in, const unsigned max_corners, const float min_response,
const float sigma, const unsigned filter_len, const float k_thr)
+ in.eval();
dim4 idims = in.dims();
// Window filter
@@ -156,8 +163,7 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
if (sigma < 0.5f) {
for (unsigned i = 0; i < filter_len; i++)
h_filter[i] = (T)1.f / (filter_len);
- }
- else {
+ } else {
gaussian1D<convAccT>(h_filter, (int)filter_len, sigma);
Array<convAccT> filter = createDeviceDataArray<convAccT>(dim4(filter_len), (const void*)h_filter);
@@ -168,15 +174,14 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
Array<T> iy = createEmptyArray<T>(idims);
// Compute first order derivatives
- gradient<T>(iy, ix, in);
+ getQueue().enqueue(gradient<T>, iy, ix, in);
Array<T> ixx = createEmptyArray<T>(idims);
Array<T> ixy = createEmptyArray<T>(idims);
Array<T> iyy = createEmptyArray<T>(idims);
// Compute second-order derivatives
- second_order_deriv<T>(ixx.get(), ixy.get(), iyy.get(),
- in.elements(), ix.get(), iy.get());
+ getQueue().enqueue(second_order_deriv<T>, ixx, ixy, iyy, in.elements(), ix, iy);
// Convolve second-order derivatives with proper window filter
ixx = convolve2<T, convAccT, false>(ixx, filter, filter);
@@ -185,26 +190,22 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
const unsigned corner_lim = in.elements() * 0.2f;
- float* x_corners = memAlloc<float>(corner_lim);
- float* y_corners = memAlloc<float>(corner_lim);
- float* resp_corners = memAlloc<float>(corner_lim);
+ Array<T> responses = createEmptyArray<T>(dim4(in.elements()));
- T* resp = memAlloc<T>(in.elements());
+ getQueue().enqueue(harris_responses<T>, responses, idims[0], idims[1],
+ ixx, ixy, iyy, k_thr, border_len);
- // Calculate Harris responses for all pixels
- harris_responses<T>(resp,
- idims[0], idims[1],
- ixx.get(), ixy.get(), iyy.get(),
- k_thr, border_len);
+ Array<float> xCorners = createEmptyArray<float>(dim4(corner_lim));
+ Array<float> yCorners = createEmptyArray<float>(dim4(corner_lim));
+ Array<float> respCorners = createEmptyArray<float>(dim4(corner_lim));
const unsigned min_r = (max_corners > 0) ? 0.f : min_response;
- unsigned corners_found = 0;
// Performs non-maximal suppression
- non_maximal<T>(x_corners, y_corners, resp_corners, &corners_found,
- idims[0], idims[1], resp, min_r, border_len, corner_lim);
- memFree(resp);
+ getQueue().sync();
+ unsigned corners_found = 0;
+ non_maximal<T>(xCorners, yCorners, respCorners, &corners_found,
+ idims[0], idims[1], responses, min_r, border_len, corner_lim);
const unsigned corners_out = (max_corners > 0) ?
min(corners_found, max_corners) :
@@ -213,42 +214,42 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
return 0;
if (max_corners > 0 && corners_found > corners_out) {
- Array<float> harris_responses = createDeviceDataArray<float>(dim4(corners_found), (void*)resp_corners);
+ respCorners.resetDims(dim4(corners_found));
Array<float> harris_sorted = createEmptyArray<float>(dim4(corners_found));
Array<unsigned> harris_idx = createEmptyArray<unsigned>(dim4(corners_found));
// Sort Harris responses
- sort_index<float, false>(harris_sorted, harris_idx, harris_responses, 0);
+ sort_index<float, false>(harris_sorted, harris_idx, respCorners, 0);
x_out = createEmptyArray<float>(dim4(corners_out));
y_out = createEmptyArray<float>(dim4(corners_out));
resp_out = createEmptyArray<float>(dim4(corners_out));
// Keep only the corners with higher Harris responses
- keep_corners(x_out.get(), y_out.get(), resp_out.get(),
- x_corners, y_corners, harris_sorted.get(), harris_idx.get(),
- corners_out);
- memFree(x_corners);
- memFree(y_corners);
- }
- else if (max_corners == 0 && corners_found < corner_lim) {
+ getQueue().enqueue(keep_corners, x_out, y_out, resp_out, xCorners, yCorners,
+ harris_sorted, harris_idx, corners_out);
+ } else if (max_corners == 0 && corners_found < corner_lim) {
x_out = createEmptyArray<float>(dim4(corners_out));
y_out = createEmptyArray<float>(dim4(corners_out));
resp_out = createEmptyArray<float>(dim4(corners_out));
- memcpy(x_out.get(), x_corners, corners_out * sizeof(float));
- memcpy(y_out.get(), y_corners, corners_out * sizeof(float));
- memcpy(resp_out.get(), resp_corners, corners_out * sizeof(float));
- memFree(x_corners);
- memFree(y_corners);
- memFree(resp_corners);
- }
- else {
- x_out = createDeviceDataArray<float>(dim4(corners_out), (void*)x_corners);
- y_out = createDeviceDataArray<float>(dim4(corners_out), (void*)y_corners);
- resp_out = createDeviceDataArray<float>(dim4(corners_out), (void*)resp_corners);
+ auto copyFunc = [=](Array<float> x_out, Array<float> y_out,
+ Array<float> outResponses, const Array<float> x_crnrs,
+ const Array<float> y_crnrs, const Array<float> inResponses,
+ const unsigned corners_out) {
+ memcpy(x_out.get(), x_crnrs.get(), corners_out * sizeof(float));
+ memcpy(y_out.get(), y_crnrs.get(), corners_out * sizeof(float));
+ memcpy(outResponses.get(), inResponses.get(), corners_out * sizeof(float));
+ };
+ getQueue().enqueue(copyFunc, x_out, y_out, resp_out,
+ xCorners, yCorners, respCorners, corners_out);
+ } else {
+ x_out = xCorners;
+ y_out = yCorners;
+ resp_out = respCorners;
+ x_out.resetDims(dim4(corners_out));
+ y_out.resetDims(dim4(corners_out));
+ resp_out.resetDims(dim4(corners_out));
return corners_out;
diff --git a/src/backend/cpu/homography.cpp b/src/backend/cpu/homography.cpp
index d20f0ca..d936e21 100644
--- a/src/backend/cpu/homography.cpp
+++ b/src/backend/cpu/homography.cpp
@@ -15,13 +15,11 @@
#include <handle.hpp>
#include <homography.hpp>
#include <arith.hpp>
-#include <ireduce.hpp>
#include <random.hpp>
-#include <svd.hpp>
-#include <memory.hpp>
#include <cstring>
#include <cfloat>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -154,12 +152,9 @@ unsigned updateIterations(float inlier_ratio, unsigned iter)
template<typename T>
-int computeHomography(T* H_ptr,
- const float* rnd_ptr,
- const float* x_src_ptr,
- const float* y_src_ptr,
- const float* x_dst_ptr,
- const float* y_dst_ptr)
+int computeHomography(T* H_ptr, const float* rnd_ptr,
+ const float* x_src_ptr, const float* y_src_ptr,
+ const float* x_dst_ptr, const float* y_dst_ptr)
if ((unsigned)rnd_ptr[0] == (unsigned)rnd_ptr[1] || (unsigned)rnd_ptr[0] == (unsigned)rnd_ptr[2] ||
(unsigned)rnd_ptr[0] == (unsigned)rnd_ptr[3] || (unsigned)rnd_ptr[1] == (unsigned)rnd_ptr[2] ||
@@ -192,6 +187,8 @@ int computeHomography(T* H_ptr,
float dst_scale = sqrt(2.0f) / sqrt(dst_var);
Array<T> A = createValueArray<T>(af::dim4(9, 9), (T)0);
+ A.eval();
+ getQueue().sync();
af::dim4 Adims = A.dims();
T* A_ptr = A.get();
@@ -217,6 +214,8 @@ int computeHomography(T* H_ptr,
Array<T> V = createValueArray<T>(af::dim4(Adims[1], Adims[1]), (T)0);
+ V.eval();
+ getQueue().sync();
JacobiSVD<T>(A.get(), V.get(), 9, 9);
af::dim4 Vdims = V.dims();
@@ -262,6 +261,8 @@ int findBestHomography(Array<T> &bestH,
const float* y_dst_ptr = y_dst.get();
Array<T> H = createValueArray<T>(af::dim4(9, iterations), (T)0);
+ H.eval();
+ getQueue().sync();
const af::dim4 rdims = rnd.dims();
const af::dim4 Hdims = H.dims();
@@ -278,8 +279,7 @@ int findBestHomography(Array<T> &bestH,
const unsigned ridx = rdims[0] * i;
const float* rnd_ptr = rnd.get() + ridx;
- if (computeHomography<T>(H_ptr, rnd_ptr, x_src_ptr, y_src_ptr,
- x_dst_ptr, y_dst_ptr))
+ if (computeHomography<T>(H_ptr, rnd_ptr, x_src_ptr, y_src_ptr, x_dst_ptr, y_dst_ptr))
if (htype == AF_HOMOGRAPHY_RANSAC) {
@@ -320,7 +320,6 @@ int findBestHomography(Array<T> &bestH,
minMedian = median;
bestIdx = i;
@@ -355,6 +354,11 @@ int homography(Array<T> &bestH,
const float inlier_thr,
const unsigned iterations)
+ x_src.eval();
+ y_src.eval();
+ x_dst.eval();
+ y_dst.eval();
const af::dim4 idims = x_src.dims();
const unsigned nsamples = idims[0];
@@ -366,6 +370,8 @@ int homography(Array<T> &bestH,
Array<float> frnd = randu<float>(rdims);
Array<float> fctr = createValueArray<float>(rdims, (float)nsamples);
Array<float> rnd = arithOp<float, af_mul_t>(frnd, fctr, rdims);
+ rnd.eval();
+ getQueue().sync();
return findBestHomography<T>(bestH, x_src, y_src, x_dst, y_dst, rnd, iter, nsamples, inlier_thr, htype);
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list