[arrayfire] 45/284: converted matchTemplate, meanShift & medfilt to async calls
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:17 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 48a9e581d7f2b0ce1eb48171d5f1ceaaf7b4c712
Author: pradeep <pradeep at arrayfire.com>
Date: Wed Dec 2 12:48:56 2015 -0500
converted matchTemplate, meanShift & medfilt to async calls
---
src/backend/cpu/match_template.cpp | 206 +++++++++++++++++++------------------
src/backend/cpu/meanshift.cpp | 174 ++++++++++++++++---------------
src/backend/cpu/medfilt.cpp | 149 ++++++++++++++-------------
3 files changed, 275 insertions(+), 254 deletions(-)
diff --git a/src/backend/cpu/match_template.cpp b/src/backend/cpu/match_template.cpp
index 4d93014..02a4888 100644
--- a/src/backend/cpu/match_template.cpp
+++ b/src/backend/cpu/match_template.cpp
@@ -13,6 +13,8 @@
#include <Array.hpp>
#include <match_template.hpp>
#include <err_cpu.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -22,122 +24,126 @@ namespace cpu
template<typename inType, typename outType, af_match_type mType>
Array<outType> match_template(const Array<inType> &sImg, const Array<inType> &tImg)
{
- const dim4 sDims = sImg.dims();
- const dim4 tDims = tImg.dims();
- const dim4 sStrides = sImg.strides();
- const dim4 tStrides = tImg.strides();
-
- const dim_t tDim0 = tDims[0];
- const dim_t tDim1 = tDims[1];
- const dim_t sDim0 = sDims[0];
- const dim_t sDim1 = sDims[1];
-
- Array<outType> out = createEmptyArray<outType>(sDims);
- const dim4 oStrides = out.strides();
-
- outType tImgMean = outType(0);
- dim_t winNumElements = tImg.elements();
- bool needMean = mType==AF_ZSAD || mType==AF_LSAD ||
- mType==AF_ZSSD || mType==AF_LSSD ||
- mType==AF_ZNCC;
- const inType * tpl = tImg.get();
-
- if (needMean) {
- for(dim_t tj=0; tj<tDim1; tj++) {
- dim_t tjStride = tj*tStrides[1];
-
- for(dim_t ti=0; ti<tDim0; ti++) {
- tImgMean += (outType)tpl[tjStride+ti*tStrides[0]];
+ Array<outType> out = createEmptyArray<outType>(sImg.dims());
+
+ auto func = [=](Array<outType> out, const Array<inType> sImg, const Array<inType> tImg) {
+ const dim4 sDims = sImg.dims();
+ const dim4 tDims = tImg.dims();
+ const dim4 sStrides = sImg.strides();
+ const dim4 tStrides = tImg.strides();
+
+ const dim_t tDim0 = tDims[0];
+ const dim_t tDim1 = tDims[1];
+ const dim_t sDim0 = sDims[0];
+ const dim_t sDim1 = sDims[1];
+
+ const dim4 oStrides = out.strides();
+
+ outType tImgMean = outType(0);
+ dim_t winNumElements = tImg.elements();
+ bool needMean = mType==AF_ZSAD || mType==AF_LSAD ||
+ mType==AF_ZSSD || mType==AF_LSSD ||
+ mType==AF_ZNCC;
+ const inType * tpl = tImg.get();
+
+ if (needMean) {
+ for(dim_t tj=0; tj<tDim1; tj++) {
+ dim_t tjStride = tj*tStrides[1];
+
+ for(dim_t ti=0; ti<tDim0; ti++) {
+ tImgMean += (outType)tpl[tjStride+ti*tStrides[0]];
+ }
}
+ tImgMean /= winNumElements;
}
- tImgMean /= winNumElements;
- }
- outType * dst = out.get();
- const inType * src = sImg.get();
+ outType * dst = out.get();
+ const inType * src = sImg.get();
- for(dim_t b3=0; b3<sDims[3]; ++b3) {
- for(dim_t b2=0; b2<sDims[2]; ++b2) {
+ for(dim_t b3=0; b3<sDims[3]; ++b3) {
+ for(dim_t b2=0; b2<sDims[2]; ++b2) {
- // slide through image window after window
- for(dim_t sj=0; sj<sDim1; sj++) {
+ // slide through image window after window
+ for(dim_t sj=0; sj<sDim1; sj++) {
- dim_t ojStride = sj*oStrides[1];
+ dim_t ojStride = sj*oStrides[1];
- for(dim_t si=0; si<sDim0; si++) {
- outType disparity = outType(0);
+ for(dim_t si=0; si<sDim0; si++) {
+ outType disparity = outType(0);
- // mean for window
- // this variable will be used based on mType value
- outType wImgMean = outType(0);
- if (needMean) {
- for(dim_t tj=0,j=sj; tj<tDim1; tj++, j++) {
- dim_t jStride = j*sStrides[1];
+ // mean for window
+ // this variable will be used based on mType value
+ outType wImgMean = outType(0);
+ if (needMean) {
+ for(dim_t tj=0,j=sj; tj<tDim1; tj++, j++) {
+ dim_t jStride = j*sStrides[1];
- for(dim_t ti=0, i=si; ti<tDim0; ti++, i++) {
- inType sVal = ((j<sDim1 && i<sDim0) ?
- src[jStride + i*sStrides[0]] : inType(0));
- wImgMean += (outType)sVal;
+ for(dim_t ti=0, i=si; ti<tDim0; ti++, i++) {
+ inType sVal = ((j<sDim1 && i<sDim0) ?
+ src[jStride + i*sStrides[0]] : inType(0));
+ wImgMean += (outType)sVal;
+ }
+ }
+ wImgMean /= winNumElements;
}
- }
- wImgMean /= winNumElements;
- }
-
- // run the window match metric
- for(dim_t tj=0,j=sj; tj<tDim1; tj++, j++) {
- dim_t jStride = j*sStrides[1];
- dim_t tjStride = tj*tStrides[1];
- for(dim_t ti=0, i=si; ti<tDim0; ti++, i++) {
- inType sVal = ((j<sDim1 && i<sDim0) ?
- src[jStride + i*sStrides[0]] : inType(0));
- inType tVal = tpl[tjStride+ti*tStrides[0]];
- outType temp;
- switch(mType) {
- case AF_SAD:
- disparity += fabs((outType)sVal-(outType)tVal);
- break;
- case AF_ZSAD:
- disparity += fabs((outType)sVal - wImgMean -
- (outType)tVal + tImgMean);
- break;
- case AF_LSAD:
- disparity += fabs((outType)sVal-(wImgMean/tImgMean)*tVal);
- break;
- case AF_SSD:
- disparity += ((outType)sVal-(outType)tVal)*((outType)sVal-(outType)tVal);
- break;
- case AF_ZSSD:
- temp = ((outType)sVal - wImgMean - (outType)tVal + tImgMean);
- disparity += temp*temp;
- break;
- case AF_LSSD:
- temp = ((outType)sVal-(wImgMean/tImgMean)*tVal);
- disparity += temp*temp;
- break;
- case AF_NCC:
- //TODO: furture implementation
- break;
- case AF_ZNCC:
- //TODO: furture implementation
- break;
- case AF_SHD:
- //TODO: furture implementation
- break;
+ // run the window match metric
+ for(dim_t tj=0,j=sj; tj<tDim1; tj++, j++) {
+ dim_t jStride = j*sStrides[1];
+ dim_t tjStride = tj*tStrides[1];
+
+ for(dim_t ti=0, i=si; ti<tDim0; ti++, i++) {
+ inType sVal = ((j<sDim1 && i<sDim0) ?
+ src[jStride + i*sStrides[0]] : inType(0));
+ inType tVal = tpl[tjStride+ti*tStrides[0]];
+ outType temp;
+ switch(mType) {
+ case AF_SAD:
+ disparity += fabs((outType)sVal-(outType)tVal);
+ break;
+ case AF_ZSAD:
+ disparity += fabs((outType)sVal - wImgMean -
+ (outType)tVal + tImgMean);
+ break;
+ case AF_LSAD:
+ disparity += fabs((outType)sVal-(wImgMean/tImgMean)*tVal);
+ break;
+ case AF_SSD:
+ disparity += ((outType)sVal-(outType)tVal)*((outType)sVal-(outType)tVal);
+ break;
+ case AF_ZSSD:
+ temp = ((outType)sVal - wImgMean - (outType)tVal + tImgMean);
+ disparity += temp*temp;
+ break;
+ case AF_LSSD:
+ temp = ((outType)sVal-(wImgMean/tImgMean)*tVal);
+ disparity += temp*temp;
+ break;
+ case AF_NCC:
+ //TODO: furture implementation
+ break;
+ case AF_ZNCC:
+ //TODO: furture implementation
+ break;
+ case AF_SHD:
+ //TODO: furture implementation
+ break;
+ }
+ }
}
+ // output is just created, hence not doing the
+ // extra multiplication for 0th dim stride
+ dst[ojStride + si] = disparity;
}
}
- // output is just created, hence not doing the
- // extra multiplication for 0th dim stride
- dst[ojStride + si] = disparity;
+ src += sStrides[2];
+ dst += oStrides[2];
}
+ src += sStrides[3];
+ dst += oStrides[3];
}
- src += sStrides[2];
- dst += oStrides[2];
- }
- src += sStrides[3];
- dst += oStrides[3];
- }
+ };
+ getQueue().enqueue(func, out, sImg, tImg);
return out;
}
diff --git a/src/backend/cpu/meanshift.cpp b/src/backend/cpu/meanshift.cpp
index b52eaf9..3f99d15 100644
--- a/src/backend/cpu/meanshift.cpp
+++ b/src/backend/cpu/meanshift.cpp
@@ -16,6 +16,8 @@
#include <algorithm>
#include <err_cpu.hpp>
#include <math.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
using std::vector;
@@ -31,117 +33,123 @@ inline dim_t clamp(dim_t a, dim_t mn, dim_t mx)
template<typename T, bool is_color>
Array<T> meanshift(const Array<T> &in, const float &s_sigma, const float &c_sigma, const unsigned iter)
{
- const dim4 dims = in.dims();
- const dim4 istrides = in.strides();
- Array<T> out = createEmptyArray<T>(dims);
- const dim4 ostrides = out.strides();
+ Array<T> out = createEmptyArray<T>(in.dims());
- const dim_t bCount = (is_color ? 1 : dims[2]);
- const dim_t channels = (is_color ? dims[2] : 1);
+ auto func = [=] (Array<T> out, const Array<T> in, const float s_sigma,
+ const float c_sigma, const unsigned iter) {
+ const dim4 dims = in.dims();
+ const dim4 istrides = in.strides();
+ const dim4 ostrides = out.strides();
- // clamp spatical and chromatic sigma's
- float space_ = std::min(11.5f, s_sigma);
- const dim_t radius = std::max((int)(space_ * 1.5f), 1);
- const float cvar = c_sigma*c_sigma;
+ const dim_t bCount = (is_color ? 1 : dims[2]);
+ const dim_t channels = (is_color ? dims[2] : 1);
- vector<float> means;
- vector<float> centers;
- vector<float> tmpclrs;
- means.reserve(channels);
- centers.reserve(channels);
- tmpclrs.reserve(channels);
+ // clamp spatical and chromatic sigma's
+ float space_ = std::min(11.5f, s_sigma);
+ const dim_t radius = std::max((int)(space_ * 1.5f), 1);
+ const float cvar = c_sigma*c_sigma;
- T *outData = out.get();
- const T * inData = in.get();
+ vector<float> means;
+ vector<float> centers;
+ vector<float> tmpclrs;
+ means.reserve(channels);
+ centers.reserve(channels);
+ tmpclrs.reserve(channels);
- for(dim_t b3=0; b3<dims[3]; ++b3) {
- for(dim_t b2=0; b2<bCount; ++b2) {
+ T *outData = out.get();
+ const T * inData = in.get();
- for(dim_t j=0; j<dims[1]; ++j) {
+ for(dim_t b3=0; b3<dims[3]; ++b3) {
+ for(dim_t b2=0; b2<bCount; ++b2) {
- dim_t j_in_off = j*istrides[1];
- dim_t j_out_off = j*ostrides[1];
+ for(dim_t j=0; j<dims[1]; ++j) {
- for(dim_t i=0; i<dims[0]; ++i) {
+ dim_t j_in_off = j*istrides[1];
+ dim_t j_out_off = j*ostrides[1];
- dim_t i_in_off = i*istrides[0];
- dim_t i_out_off = i*ostrides[0];
+ for(dim_t i=0; i<dims[0]; ++i) {
- // clear means and centers for this pixel
- for(dim_t ch=0; ch<channels; ++ch) {
- means[ch] = 0.0f;
- // the expression ch*istrides[2] will only effect when ch>1
- // i.e for color images where batch is along fourth dimension
- centers[ch] = inData[j_in_off + i_in_off + ch*istrides[2]];
- }
+ dim_t i_in_off = i*istrides[0];
+ dim_t i_out_off = i*ostrides[0];
- // scope of meanshift iterationd begin
- for(unsigned it=0; it<iter; ++it) {
+ // clear means and centers for this pixel
+ for(dim_t ch=0; ch<channels; ++ch) {
+ means[ch] = 0.0f;
+ // the expression ch*istrides[2] will only effect when ch>1
+ // i.e for color images where batch is along fourth dimension
+ centers[ch] = inData[j_in_off + i_in_off + ch*istrides[2]];
+ }
- int count = 0;
- int shift_x = 0;
- int shift_y = 0;
+ // scope of meanshift iterationd begin
+ for(unsigned it=0; it<iter; ++it) {
- for(dim_t wj=-radius; wj<=radius; ++wj) {
+ int count = 0;
+ int shift_x = 0;
+ int shift_y = 0;
- int hit_count = 0;
+ for(dim_t wj=-radius; wj<=radius; ++wj) {
- for(dim_t wi=-radius; wi<=radius; ++wi) {
+ int hit_count = 0;
- dim_t tj = j + wj;
- dim_t ti = i + wi;
+ for(dim_t wi=-radius; wi<=radius; ++wi) {
- // clamps offsets
- tj = clamp(tj, 0ll, dims[1]-1);
- ti = clamp(ti, 0ll, dims[0]-1);
+ dim_t tj = j + wj;
+ dim_t ti = i + wi;
- // proceed
- float norm = 0.0f;
- for(dim_t ch=0; ch<channels; ++ch) {
- tmpclrs[ch] = inData[ tj*istrides[1] + ti*istrides[0] + ch*istrides[2]];
- norm += (centers[ch]-tmpclrs[ch]) * (centers[ch]-tmpclrs[ch]);
- }
+ // clamps offsets
+ tj = clamp(tj, 0ll, dims[1]-1);
+ ti = clamp(ti, 0ll, dims[0]-1);
- if (norm<= cvar) {
- for(dim_t ch=0; ch<channels; ++ch)
- means[ch] += tmpclrs[ch];
- shift_x += wi;
- ++hit_count;
- }
+ // proceed
+ float norm = 0.0f;
+ for(dim_t ch=0; ch<channels; ++ch) {
+ tmpclrs[ch] = inData[ tj*istrides[1] + ti*istrides[0] + ch*istrides[2]];
+ norm += (centers[ch]-tmpclrs[ch]) * (centers[ch]-tmpclrs[ch]);
+ }
- }
- count+= hit_count;
- shift_y += wj*hit_count;
- }
+ if (norm<= cvar) {
+ for(dim_t ch=0; ch<channels; ++ch)
+ means[ch] += tmpclrs[ch];
+ shift_x += wi;
+ ++hit_count;
+ }
- if (count==0) { break; }
+ }
+ count+= hit_count;
+ shift_y += wj*hit_count;
+ }
- const float fcount = 1.f/count;
- const int mean_x = (int)(shift_x*fcount+0.5f);
- const int mean_y = (int)(shift_y*fcount+0.5f);
- for(dim_t ch=0; ch<channels; ++ch)
- means[ch] *= fcount;
+ if (count==0) { break; }
+
+ const float fcount = 1.f/count;
+ const int mean_x = (int)(shift_x*fcount+0.5f);
+ const int mean_y = (int)(shift_y*fcount+0.5f);
+ for(dim_t ch=0; ch<channels; ++ch)
+ means[ch] *= fcount;
+
+ float norm = 0.f;
+ for(dim_t ch=0; ch<channels; ++ch)
+ norm += ((means[ch]-centers[ch])*(means[ch]-centers[ch]));
+ bool stop = ((abs(shift_y-mean_y)+abs(shift_x-mean_x)) + norm) <= 1;
+ shift_x = mean_x;
+ shift_y = mean_y;
+ for(dim_t ch=0; ch<channels; ++ch)
+ centers[ch] = means[ch];
+ if (stop) { break; }
+ } // scope of meanshift iterations end
- float norm = 0.f;
- for(dim_t ch=0; ch<channels; ++ch)
- norm += ((means[ch]-centers[ch])*(means[ch]-centers[ch]));
- bool stop = ((abs(shift_y-mean_y)+abs(shift_x-mean_x)) + norm) <= 1;
- shift_x = mean_x;
- shift_y = mean_y;
for(dim_t ch=0; ch<channels; ++ch)
- centers[ch] = means[ch];
- if (stop) { break; }
- } // scope of meanshift iterations end
-
- for(dim_t ch=0; ch<channels; ++ch)
- outData[j_out_off + i_out_off + ch*ostrides[2]] = centers[ch];
+ outData[j_out_off + i_out_off + ch*ostrides[2]] = centers[ch];
+ }
}
+ outData += ostrides[2];
+ inData += istrides[2];
}
- outData += ostrides[2];
- inData += istrides[2];
}
- }
+ };
+ getQueue().enqueue(func, out, in, s_sigma, c_sigma, iter);
+
return out;
}
diff --git a/src/backend/cpu/medfilt.cpp b/src/backend/cpu/medfilt.cpp
index 3ded3c0..ce921fc 100644
--- a/src/backend/cpu/medfilt.cpp
+++ b/src/backend/cpu/medfilt.cpp
@@ -14,6 +14,8 @@
#include <medfilt.hpp>
#include <err_cpu.hpp>
#include <algorithm>
+#include <platform.hpp>
+#include <async_queue.hpp>
using af::dim4;
@@ -23,114 +25,119 @@ namespace cpu
template<typename T, af_border_type pad>
Array<T> medfilt(const Array<T> &in, dim_t w_len, dim_t w_wid)
{
- const dim4 dims = in.dims();
- const dim4 istrides = in.strides();
- Array<T> out = createEmptyArray<T>(dims);
- const dim4 ostrides = out.strides();
+ Array<T> out = createEmptyArray<T>(in.dims());
- std::vector<T> wind_vals;
- wind_vals.reserve(w_len*w_wid);
+ auto func = [=] (Array<T> out, const Array<T> in,
+ dim_t w_len, dim_t w_wid) {
+ const dim4 dims = in.dims();
+ const dim4 istrides = in.strides();
+ const dim4 ostrides = out.strides();
- T const * in_ptr = in.get();
- T * out_ptr = out.get();
+ std::vector<T> wind_vals;
+ wind_vals.reserve(w_len*w_wid);
- for(int b3=0; b3<(int)dims[3]; b3++) {
+ T const * in_ptr = in.get();
+ T * out_ptr = out.get();
- for(int b2=0; b2<(int)dims[2]; b2++) {
+ for(int b3=0; b3<(int)dims[3]; b3++) {
- for(int col=0; col<(int)dims[1]; col++) {
+ for(int b2=0; b2<(int)dims[2]; b2++) {
- int ocol_off = col*ostrides[1];
+ for(int col=0; col<(int)dims[1]; col++) {
- for(int row=0; row<(int)dims[0]; row++) {
+ int ocol_off = col*ostrides[1];
- wind_vals.clear();
+ for(int row=0; row<(int)dims[0]; row++) {
- for(int wj=0; wj<(int)w_wid; ++wj) {
+ wind_vals.clear();
- bool isColOff = false;
+ for(int wj=0; wj<(int)w_wid; ++wj) {
- int im_col = col + wj-w_wid/2;
- int im_coff;
- switch(pad) {
- case AF_PAD_ZERO:
- im_coff = im_col * istrides[1];
- if (im_col < 0 || im_col>=(int)dims[1])
- isColOff = true;
- break;
- case AF_PAD_SYM:
- {
- if (im_col < 0) {
- im_col *= -1;
- isColOff = true;
- }
+ bool isColOff = false;
- if (im_col>=(int)dims[1]) {
- im_col = 2*((int)dims[1]-1) - im_col;
- isColOff = true;
- }
-
- im_coff = im_col * istrides[1];
- }
- break;
- }
-
- for(int wi=0; wi<(int)w_len; ++wi) {
-
- bool isRowOff = false;
-
- int im_row = row + wi-w_len/2;
- int im_roff;
+ int im_col = col + wj-w_wid/2;
+ int im_coff;
switch(pad) {
case AF_PAD_ZERO:
- im_roff = im_row * istrides[0];
- if (im_row < 0 || im_row>=(int)dims[0])
- isRowOff = true;
+ im_coff = im_col * istrides[1];
+ if (im_col < 0 || im_col>=(int)dims[1])
+ isColOff = true;
break;
case AF_PAD_SYM:
{
- if (im_row < 0) {
- im_row *= -1;
- isRowOff = true;
+ if (im_col < 0) {
+ im_col *= -1;
+ isColOff = true;
}
- if (im_row>=(int)dims[0]) {
- im_row = 2*((int)dims[0]-1) - im_row;
- isRowOff = true;
+ if (im_col>=(int)dims[1]) {
+ im_col = 2*((int)dims[1]-1) - im_col;
+ isColOff = true;
}
- im_roff = im_row * istrides[0];
+ im_coff = im_col * istrides[1];
}
break;
}
- if(isRowOff || isColOff) {
+ for(int wi=0; wi<(int)w_len; ++wi) {
+
+ bool isRowOff = false;
+
+ int im_row = row + wi-w_len/2;
+ int im_roff;
switch(pad) {
case AF_PAD_ZERO:
- wind_vals.push_back(0);
+ im_roff = im_row * istrides[0];
+ if (im_row < 0 || im_row>=(int)dims[0])
+ isRowOff = true;
break;
case AF_PAD_SYM:
- wind_vals.push_back(in_ptr[im_coff+im_roff]);
+ {
+ if (im_row < 0) {
+ im_row *= -1;
+ isRowOff = true;
+ }
+
+ if (im_row>=(int)dims[0]) {
+ im_row = 2*((int)dims[0]-1) - im_row;
+ isRowOff = true;
+ }
+
+ im_roff = im_row * istrides[0];
+ }
break;
}
- } else
- wind_vals.push_back(in_ptr[im_coff+im_roff]);
+
+ if(isRowOff || isColOff) {
+ switch(pad) {
+ case AF_PAD_ZERO:
+ wind_vals.push_back(0);
+ break;
+ case AF_PAD_SYM:
+ wind_vals.push_back(in_ptr[im_coff+im_roff]);
+ break;
+ }
+ } else
+ wind_vals.push_back(in_ptr[im_coff+im_roff]);
+ }
}
- }
- std::stable_sort(wind_vals.begin(),wind_vals.end());
- int off = wind_vals.size()/2;
- if (wind_vals.size()%2==0)
- out_ptr[ocol_off+row*ostrides[0]] = (wind_vals[off]+wind_vals[off-1])/2;
- else {
- out_ptr[ocol_off+row*ostrides[0]] = wind_vals[off];
+ std::stable_sort(wind_vals.begin(),wind_vals.end());
+ int off = wind_vals.size()/2;
+ if (wind_vals.size()%2==0)
+ out_ptr[ocol_off+row*ostrides[0]] = (wind_vals[off]+wind_vals[off-1])/2;
+ else {
+ out_ptr[ocol_off+row*ostrides[0]] = wind_vals[off];
+ }
}
}
+ in_ptr += istrides[2];
+ out_ptr += ostrides[2];
}
- in_ptr += istrides[2];
- out_ptr += ostrides[2];
}
- }
+ };
+ getQueue().enqueue(func, out, in, w_len, w_wid);
return out;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list