[arrayfire] 114/284: Added perspective transform to CUDA backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:24 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 7fdfe3e6437b507d11290a76d02bc2801f6a9663
Author: Peter Andreas Entschev <peter at arrayfire.com>
Date: Tue Dec 29 11:28:48 2015 -0500
Added perspective transform to CUDA backend
---
src/backend/cuda/kernel/rotate.hpp | 6 +--
src/backend/cuda/kernel/transform.hpp | 79 +++++++++++++++++++---------
src/backend/cuda/kernel/transform_interp.hpp | 65 ++++++++++++++++++-----
src/backend/cuda/transform.cu | 10 ++--
src/backend/cuda/transform.hpp | 3 +-
5 files changed, 116 insertions(+), 47 deletions(-)
diff --git a/src/backend/cuda/kernel/rotate.hpp b/src/backend/cuda/kernel/rotate.hpp
index d63f010..3cea7f2 100644
--- a/src/backend/cuda/kernel/rotate.hpp
+++ b/src/backend/cuda/kernel/rotate.hpp
@@ -60,11 +60,11 @@ namespace cuda
switch(method) {
case AF_INTERP_NEAREST:
- transform_n(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
+ transform_n(optr, out, iptr, in, t.tmat, xx, yy, limages, false); break;
case AF_INTERP_BILINEAR:
- transform_b(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
+ transform_b(optr, out, iptr, in, t.tmat, xx, yy, limages, false); break;
case AF_INTERP_LOWER:
- transform_l(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
+ transform_l(optr, out, iptr, in, t.tmat, xx, yy, limages, false); break;
default: break;
}
}
diff --git a/src/backend/cuda/kernel/transform.hpp b/src/backend/cuda/kernel/transform.hpp
index 07be0a3..599e62c 100644
--- a/src/backend/cuda/kernel/transform.hpp
+++ b/src/backend/cuda/kernel/transform.hpp
@@ -24,21 +24,42 @@ namespace cuda
// Used for batching images
static const unsigned TI = 4;
- __constant__ float c_tmat[6 * 256];
+ __constant__ float c_tmat[9 * 256];
template <typename T>
__host__ __device__
- void calc_affine_inverse(T *txo, const T *txi)
+ void calc_transf_inverse(T *txo, const T *txi, const bool perspective)
{
- T det = txi[0]*txi[4] - txi[1]*txi[3];
-
- txo[0] = txi[4] / det;
- txo[1] = txi[3] / det;
- txo[3] = txi[1] / det;
- txo[4] = txi[0] / det;
-
- txo[2] = txi[2] * -txo[0] + txi[5] * -txo[1];
- txo[5] = txi[2] * -txo[3] + txi[5] * -txo[4];
+ if (perspective) {
+ txo[0] = txi[4]*txi[8] - txi[5]*txi[7];
+ txo[1] = -(txi[1]*txi[8] - txi[2]*txi[7]);
+ txo[2] = txi[1]*txi[5] - txi[2]*txi[4];
+
+ txo[3] = -(txi[3]*txi[8] - txi[5]*txi[6]);
+ txo[4] = txi[0]*txi[8] - txi[2]*txi[6];
+ txo[5] = -(txi[0]*txi[5] - txi[2]*txi[3]);
+
+ txo[6] = txi[3]*txi[7] - txi[4]*txi[6];
+ txo[7] = -(txi[0]*txi[7] - txi[1]*txi[6]);
+ txo[8] = txi[0]*txi[4] - txi[1]*txi[3];
+
+ T det = txi[0]*txo[0] + txi[1]*txo[3] + txi[2]*txo[6];
+
+ txo[0] /= det; txo[1] /= det; txo[2] /= det;
+ txo[3] /= det; txo[4] /= det; txo[5] /= det;
+ txo[6] /= det; txo[7] /= det; txo[8] /= det;
+ }
+ else {
+ T det = txi[0]*txi[4] - txi[1]*txi[3];
+
+ txo[0] = txi[4] / det;
+ txo[1] = txi[3] / det;
+ txo[3] = txi[1] / det;
+ txo[4] = txi[0] / det;
+
+ txo[2] = txi[2] * -txo[0] + txi[5] * -txo[1];
+ txo[5] = txi[2] * -txo[3] + txi[5] * -txo[4];
+ }
}
///////////////////////////////////////////////////////////////////////////
@@ -47,7 +68,8 @@ namespace cuda
template<typename T, bool inverse, af_interp_type method>
__global__ static void
transform_kernel(Param<T> out, CParam<T> in, const int nimages,
- const int ntransforms, const int blocksXPerImage)
+ const int ntransforms, const int blocksXPerImage,
+ const int transf_len, const bool perspective)
{
// Compute which image set
const int setId = blockIdx.x / blocksXPerImage;
@@ -77,30 +99,32 @@ namespace cuda
const T *iptr = in.ptr + setId * nimages * in.strides[2];
// Transform is in constant memory.
- const float *tmat_ptr = c_tmat + t_idx * 6;
- float tmat[6];
+ const float *tmat_ptr = c_tmat + t_idx * transf_len;
+ float* tmat = new float[transf_len];
// We expect a inverse transform matrix by default
// If it is an forward transform, then we need its inverse
if(inverse) {
- #pragma unroll
- for(int i = 0; i < 6; i++)
+ #pragma unroll 3
+ for(int i = 0; i < transf_len; i++)
tmat[i] = tmat_ptr[i];
} else {
- calc_affine_inverse(tmat, tmat_ptr);
+ calc_transf_inverse(tmat, tmat_ptr, perspective);
}
if (xido >= out.dims[0] && yido >= out.dims[1]) return;
switch(method) {
case AF_INTERP_NEAREST:
- transform_n(optr, out, iptr, in, tmat, xido, yido, limages); break;
+ transform_n(optr, out, iptr, in, tmat, xido, yido, limages, perspective); break;
case AF_INTERP_BILINEAR:
- transform_b(optr, out, iptr, in, tmat, xido, yido, limages); break;
+ transform_b(optr, out, iptr, in, tmat, xido, yido, limages, perspective); break;
case AF_INTERP_LOWER:
- transform_l(optr, out, iptr, in, tmat, xido, yido, limages); break;
+ transform_l(optr, out, iptr, in, tmat, xido, yido, limages, perspective); break;
default: break;
}
+
+ delete[] tmat;
}
///////////////////////////////////////////////////////////////////////////
@@ -108,15 +132,18 @@ namespace cuda
///////////////////////////////////////////////////////////////////////////
template <typename T, af_interp_type method>
void transform(Param<T> out, CParam<T> in, CParam<float> tf,
- const bool inverse)
+ const bool inverse, const bool perspective)
{
int nimages = in.dims[2];
// Multiplied in src/backend/transform.cpp
const int ntransforms = out.dims[2] / in.dims[2];
+
+ const int transf_len = (perspective) ? 9 : 6;
+
// Copy transform to constant memory.
- CUDA_CHECK(cudaMemcpyToSymbolAsync(c_tmat, tf.ptr, ntransforms * 6 * sizeof(float), 0,
- cudaMemcpyDeviceToDevice,
+ CUDA_CHECK(cudaMemcpyToSymbolAsync(c_tmat, tf.ptr, ntransforms * transf_len * sizeof(float),
+ 0, cudaMemcpyDeviceToDevice,
cuda::getStream(cuda::getActiveDeviceId())));
dim3 threads(TX, TY, 1);
@@ -133,10 +160,12 @@ namespace cuda
if(inverse) {
CUDA_LAUNCH((transform_kernel<T, true, method>), blocks, threads,
- out, in, nimages, ntransforms, blocksXPerImage);
+ out, in, nimages, ntransforms, blocksXPerImage,
+ transf_len, perspective);
} else {
CUDA_LAUNCH((transform_kernel<T, false, method>), blocks, threads,
- out, in, nimages, ntransforms, blocksXPerImage);
+ out, in, nimages, ntransforms, blocksXPerImage,
+ transf_len, perspective);
}
POST_LAUNCH_CHECK();
}
diff --git a/src/backend/cuda/kernel/transform_interp.hpp b/src/backend/cuda/kernel/transform_interp.hpp
index 5a88fc4..1554b8e 100644
--- a/src/backend/cuda/kernel/transform_interp.hpp
+++ b/src/backend/cuda/kernel/transform_interp.hpp
@@ -42,15 +42,28 @@ namespace cuda
template<typename T>
__device__
void transform_n(T *optr, Param<T> out, const T *iptr, CParam<T> in, const float *tmat,
- const int xido, const int yido, const int nimages)
+ const int xido, const int yido, const int nimages,
+ const bool perspective)
{
// Compute input index
- int xidi = round(xido * tmat[0]
+ int xidi = 0, yidi = 0;
+ if (perspective) {
+ const float W = xido * tmat[6] + yido * tmat[7] + tmat[8];
+ xidi = round((xido * tmat[0]
+ + yido * tmat[1]
+ + tmat[2]) / W);
+ yidi = round((xido * tmat[3]
+ + yido * tmat[4]
+ + tmat[5]) / W);
+ }
+ else {
+ xidi = round(xido * tmat[0]
+ yido * tmat[1]
+ tmat[2]);
- int yidi = round(xido * tmat[3]
+ yidi = round(xido * tmat[3]
+ yido * tmat[4]
+ tmat[5]);
+ }
// Makes scale give same output as resize
// But fails rotate tests
@@ -76,17 +89,30 @@ namespace cuda
template<typename T>
__device__
void transform_b(T *optr, Param<T> out, const T *iptr, CParam<T> in, const float *tmat,
- const int xido, const int yido, const int nimages)
+ const int xido, const int yido, const int nimages,
+ const bool perspective)
{
const int loco = (yido * out.strides[1] + xido);
// Compute input index
- const float xidi = xido * tmat[0]
- + yido * tmat[1]
- + tmat[2];
- const float yidi = xido * tmat[3]
- + yido * tmat[4]
- + tmat[5];
+ float xidi = 0.0f, yidi = 0.0f;
+ if (perspective) {
+ const float W = xido * tmat[6] + yido * tmat[7] + tmat[8];
+ xidi = (xido * tmat[0]
+ + yido * tmat[1]
+ + tmat[2]) / W;
+ yidi = (xido * tmat[3]
+ + yido * tmat[4]
+ + tmat[5]) / W;
+ }
+ else {
+ xidi = xido * tmat[0]
+ + yido * tmat[1]
+ + tmat[2];
+ yidi = xido * tmat[3]
+ + yido * tmat[4]
+ + tmat[5];
+ }
if (xidi < -0.0001 || yidi < -0.0001 || in.dims[0] < xidi || in.dims[1] < yidi) {
for(int i = 0; i < nimages; i++) {
@@ -133,15 +159,28 @@ namespace cuda
template<typename T>
__device__
void transform_l(T *optr, Param<T> out, const T *iptr, CParam<T> in, const float *tmat,
- const int xido, const int yido, const int nimages)
+ const int xido, const int yido, const int nimages,
+ const bool perspective)
{
// Compute input index
- int xidi = floor(xido * tmat[0]
+ int xidi = 0, yidi = 0;
+ if (perspective) {
+ const float W = xido * tmat[6] + yido * tmat[7] + tmat[8];
+ xidi = floor((xido * tmat[0]
+ + yido * tmat[1]
+ + tmat[2]) / W);
+ yidi = floor((xido * tmat[3]
+ + yido * tmat[4]
+ + tmat[5]) / W);
+ }
+ else {
+ xidi = floor(xido * tmat[0]
+ yido * tmat[1]
+ tmat[2]);
- int yidi = floor(xido * tmat[3]
+ yidi = floor(xido * tmat[3]
+ yido * tmat[4]
+ tmat[5]);
+ }
// Makes scale give same output as resize
// But fails rotate tests
diff --git a/src/backend/cuda/transform.cu b/src/backend/cuda/transform.cu
index 853617c..07c3123 100644
--- a/src/backend/cuda/transform.cu
+++ b/src/backend/cuda/transform.cu
@@ -16,7 +16,7 @@ namespace cuda
{
template<typename T>
Array<T> transform(const Array<T> &in, const Array<float> &transform, const af::dim4 &odims,
- const af_interp_type method, const bool inverse)
+ const af_interp_type method, const bool inverse, const bool perspective)
{
const af::dim4 idims = in.dims();
@@ -24,13 +24,13 @@ namespace cuda
switch(method) {
case AF_INTERP_NEAREST:
- kernel::transform<T, AF_INTERP_NEAREST> (out, in, transform, inverse);
+ kernel::transform<T, AF_INTERP_NEAREST> (out, in, transform, inverse, perspective);
break;
case AF_INTERP_BILINEAR:
- kernel::transform<T, AF_INTERP_BILINEAR>(out, in, transform, inverse);
+ kernel::transform<T, AF_INTERP_BILINEAR>(out, in, transform, inverse, perspective);
break;
case AF_INTERP_LOWER:
- kernel::transform<T, AF_INTERP_LOWER> (out, in, transform, inverse);
+ kernel::transform<T, AF_INTERP_LOWER> (out, in, transform, inverse, perspective);
break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
@@ -43,7 +43,7 @@ namespace cuda
#define INSTANTIATE(T) \
template Array<T> transform(const Array<T> &in, const Array<float> &transform, \
const af::dim4 &odims, const af_interp_type method, \
- const bool inverse);
+ const bool inverse, const bool perspective);
INSTANTIATE(float)
INSTANTIATE(double)
diff --git a/src/backend/cuda/transform.hpp b/src/backend/cuda/transform.hpp
index eb3d71d..316953d 100644
--- a/src/backend/cuda/transform.hpp
+++ b/src/backend/cuda/transform.hpp
@@ -14,5 +14,6 @@ namespace cuda
{
template<typename T>
Array<T> transform(const Array<T> &in, const Array<float> &tf, const af::dim4 &odims,
- const af_interp_type method, const bool inverse);
+ const af_interp_type method, const bool inverse,
+ const bool perspective);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list