[arrayfire] 72/408: Add lower interpolation to rotate and transform
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:19 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 28d9e71735e176221f797f2401a1be2d4e010479
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Thu Jul 2 12:44:27 2015 -0400
Add lower interpolation to rotate and transform
---
src/api/c/rotate.cpp | 5 +++-
src/api/c/transform.cpp | 4 ++-
src/backend/cpu/rotate.cpp | 9 +++++-
src/backend/cpu/transform.cpp | 16 +++++++---
src/backend/cpu/transform_interp.hpp | 36 +++++++++++++++++++++++
src/backend/cuda/kernel/rotate.hpp | 2 ++
src/backend/cuda/kernel/transform.hpp | 2 ++
src/backend/cuda/kernel/transform_interp.hpp | 42 ++++++++++++++++++++++++---
src/backend/cuda/rotate.cu | 9 ++++--
src/backend/cuda/transform.cu | 12 ++++----
src/backend/opencl/kernel/rotate.cl | 1 +
src/backend/opencl/kernel/rotate.hpp | 6 ++--
src/backend/opencl/kernel/transform.cl | 1 +
src/backend/opencl/kernel/transform.hpp | 6 ++--
src/backend/opencl/kernel/transform_interp.cl | 36 ++++++++++++++++++++---
src/backend/opencl/rotate.cpp | 12 ++++----
src/backend/opencl/transform.cpp | 17 +++++++----
17 files changed, 179 insertions(+), 37 deletions(-)
diff --git a/src/api/c/rotate.cpp b/src/api/c/rotate.cpp
index 13db853..b792239 100644
--- a/src/api/c/rotate.cpp
+++ b/src/api/c/rotate.cpp
@@ -45,7 +45,10 @@ af_err af_rotate(af_array *out, const af_array in, const float theta,
af_dtype itype = info.getType();
- ARG_ASSERT(3, method == AF_INTERP_NEAREST || method == AF_INTERP_BILINEAR);
+ ARG_ASSERT(3, method == AF_INTERP_NEAREST ||
+ method == AF_INTERP_BILINEAR ||
+ method == AF_INTERP_LOWER);
+
DIM_ASSERT(1, idims.elements() > 0);
af::dim4 odims(odims0, odims1, idims[2], idims[3]);
diff --git a/src/api/c/transform.cpp b/src/api/c/transform.cpp
index 41ea0ac..cc17adc 100644
--- a/src/api/c/transform.cpp
+++ b/src/api/c/transform.cpp
@@ -38,7 +38,9 @@ af_err af_transform(af_array *out, const af_array in, const af_array tf,
af_dtype itype = i_info.getType();
ARG_ASSERT(2, t_info.getType() == f32);
- ARG_ASSERT(5, method == AF_INTERP_NEAREST || method == AF_INTERP_BILINEAR);
+ ARG_ASSERT(5, method == AF_INTERP_NEAREST ||
+ method == AF_INTERP_BILINEAR ||
+ method == AF_INTERP_LOWER);
DIM_ASSERT(2, (tdims[0] == 3 && tdims[1] == 2));
DIM_ASSERT(1, idims.elements() > 0);
DIM_ASSERT(1, (idims.ndims() == 2 || idims.ndims() == 3));
diff --git a/src/backend/cpu/rotate.cpp b/src/backend/cpu/rotate.cpp
index b7c4576..2293ee2 100644
--- a/src/backend/cpu/rotate.cpp
+++ b/src/backend/cpu/rotate.cpp
@@ -55,6 +55,9 @@ namespace cpu
case AF_INTERP_BILINEAR:
t_fn = &transform_b;
break;
+ case AF_INTERP_LOWER:
+ t_fn = &transform_l;
+ break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
break;
@@ -85,6 +88,10 @@ namespace cpu
rotate_<T, AF_INTERP_BILINEAR>
(out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
break;
+ case AF_INTERP_LOWER:
+ rotate_<T, AF_INTERP_LOWER>
+ (out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
+ break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
break;
@@ -96,7 +103,7 @@ namespace cpu
#define INSTANTIATE(T) \
template Array<T> rotate(const Array<T> &in, const float theta, \
- const af::dim4 &odims, const af_interp_type method); \
+ const af::dim4 &odims, const af_interp_type method);
INSTANTIATE(float)
INSTANTIATE(double)
diff --git a/src/backend/cpu/transform.cpp b/src/backend/cpu/transform.cpp
index ed9f0ad..d1cf58e 100644
--- a/src/backend/cpu/transform.cpp
+++ b/src/backend/cpu/transform.cpp
@@ -65,6 +65,9 @@ namespace cpu
case AF_INTERP_BILINEAR:
t_fn = &transform_b;
break;
+ case AF_INTERP_LOWER:
+ t_fn = &transform_l;
+ break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
break;
@@ -109,6 +112,11 @@ namespace cpu
(out.get(), in.get(), transform.get(), odims, idims,
out.strides(), in.strides(), transform.strides(), inverse);
break;
+ case AF_INTERP_LOWER:
+ transform_<T, AF_INTERP_LOWER>
+ (out.get(), in.get(), transform.get(), odims, idims,
+ out.strides(), in.strides(), transform.strides(), inverse);
+ break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
break;
@@ -118,10 +126,10 @@ namespace cpu
}
-#define INSTANTIATE(T) \
- template Array<T> transform(const Array<T> &in, const Array<float> &transform, \
- const af::dim4 &odims, const af_interp_type method, \
- const bool inverse); \
+#define INSTANTIATE(T) \
+ template Array<T> transform(const Array<T> &in, const Array<float> &transform, \
+ const af::dim4 &odims, const af_interp_type method, \
+ const bool inverse);
INSTANTIATE(float)
diff --git a/src/backend/cpu/transform_interp.hpp b/src/backend/cpu/transform_interp.hpp
index 23df154..5ad4750 100644
--- a/src/backend/cpu/transform_interp.hpp
+++ b/src/backend/cpu/transform_interp.hpp
@@ -121,4 +121,40 @@ namespace cpu
}
}
}
+
+ template<typename T>
+ void transform_l(T *out, const T *in, const float *tmat, const af::dim4 &idims,
+ const af::dim4 &ostrides, const af::dim4 &istrides,
+ const dim_t nimages, const dim_t o_offset,
+ const dim_t xx, const dim_t yy)
+ {
+ // Compute output index
+ const dim_t xi = floor(xx * tmat[0]
+ + yy * tmat[1]
+ + tmat[2]);
+ const dim_t yi = floor(xx * tmat[3]
+ + yy * tmat[4]
+ + tmat[5]);
+
+ // Compute memory location of indices
+ dim_t loci = (yi * istrides[1] + xi);
+ dim_t loco = (yy * ostrides[1] + xx);
+
+ T val = scalar<T>(0.0f);
+ // Copy to output
+ for(int batch = 0; batch < (int)idims[3]; batch++) {
+ dim_t i__ = batch * istrides[3];
+ dim_t o__ = batch * ostrides[3];
+ for(int i_idx = 0; i_idx < (int)nimages; i_idx++) {
+ dim_t i_off = i_idx * istrides[2] + i__;
+ dim_t o_off = o_offset + i_idx * ostrides[2] + o__;
+
+ if (xi < idims[0] && yi < idims[1] && xi >= 0 && yi >= 0)
+ val = in[i_off + loci];
+
+ out[o_off + loco] = val;
+ }
+ }
+ }
+
}
diff --git a/src/backend/cuda/kernel/rotate.hpp b/src/backend/cuda/kernel/rotate.hpp
index d84a454..bac4cd0 100644
--- a/src/backend/cuda/kernel/rotate.hpp
+++ b/src/backend/cuda/kernel/rotate.hpp
@@ -63,6 +63,8 @@ namespace cuda
transform_n(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
case AF_INTERP_BILINEAR:
transform_b(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
+ case AF_INTERP_LOWER:
+ transform_l(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
default: break;
}
}
diff --git a/src/backend/cuda/kernel/transform.hpp b/src/backend/cuda/kernel/transform.hpp
index 88af1a2..4fdfa6c 100644
--- a/src/backend/cuda/kernel/transform.hpp
+++ b/src/backend/cuda/kernel/transform.hpp
@@ -97,6 +97,8 @@ namespace cuda
transform_n(optr, out, iptr, in, tmat, xido, yido, limages); break;
case AF_INTERP_BILINEAR:
transform_b(optr, out, iptr, in, tmat, xido, yido, limages); break;
+ case AF_INTERP_LOWER:
+ transform_l(optr, out, iptr, in, tmat, xido, yido, limages); break;
default: break;
}
}
diff --git a/src/backend/cuda/kernel/transform_interp.hpp b/src/backend/cuda/kernel/transform_interp.hpp
index 5448bff..5a88fc4 100644
--- a/src/backend/cuda/kernel/transform_interp.hpp
+++ b/src/backend/cuda/kernel/transform_interp.hpp
@@ -46,11 +46,11 @@ namespace cuda
{
// Compute input index
int xidi = round(xido * tmat[0]
- + yido * tmat[1]
- + tmat[2]);
+ + yido * tmat[1]
+ + tmat[2]);
int yidi = round(xido * tmat[3]
- + yido * tmat[4]
- + tmat[5]);
+ + yido * tmat[4]
+ + tmat[5]);
// Makes scale give same output as resize
// But fails rotate tests
@@ -129,5 +129,39 @@ namespace cuda
optr[ooff] = (vo / wt);
}
}
+
+ template<typename T>
+ __device__
+ void transform_l(T *optr, Param<T> out, const T *iptr, CParam<T> in, const float *tmat,
+ const int xido, const int yido, const int nimages)
+ {
+ // Compute input index
+ int xidi = floor(xido * tmat[0]
+ + yido * tmat[1]
+ + tmat[2]);
+ int yidi = floor(xido * tmat[3]
+ + yido * tmat[4]
+ + tmat[5]);
+
+ // Makes scale give same output as resize
+ // But fails rotate tests
+ //if (xidi >= in.dims[0]) { xidi = in.dims[0] - 1; }
+ //if (yidi >= in.dims[1]) { yidi = in.dims[1] - 1; }
+
+ const int loci = yidi * in.strides[1] + xidi;
+ const int loco = yido * out.strides[1] + xido;
+
+ for(int i = 0; i < nimages; i++) {
+ // Compute memory location of indices
+ int ioff = loci + i * in.strides[2];
+ int ooff = loco + i * out.strides[2];
+
+ // Copy to output
+ T val = scalar<T>(0);
+ if (xidi < in.dims[0] && yidi < in.dims[1] && xidi >= 0 && yidi >= 0) val = iptr[ioff];
+
+ optr[ooff] = val;
+ }
+ }
}
}
diff --git a/src/backend/cuda/rotate.cu b/src/backend/cuda/rotate.cu
index d5efadc..24e41d7 100644
--- a/src/backend/cuda/rotate.cu
+++ b/src/backend/cuda/rotate.cu
@@ -27,6 +27,9 @@ namespace cuda
case AF_INTERP_BILINEAR:
kernel::rotate<T, AF_INTERP_BILINEAR>(out, in, theta);
break;
+ case AF_INTERP_LOWER:
+ kernel::rotate<T, AF_INTERP_LOWER> (out, in, theta);
+ break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
}
@@ -35,9 +38,9 @@ namespace cuda
}
-#define INSTANTIATE(T) \
- template Array<T> rotate(const Array<T> &in, const float theta, \
- const af::dim4 &odims, const af_interp_type method); \
+#define INSTANTIATE(T) \
+ template Array<T> rotate(const Array<T> &in, const float theta, \
+ const af::dim4 &odims, const af_interp_type method);
INSTANTIATE(float)
diff --git a/src/backend/cuda/transform.cu b/src/backend/cuda/transform.cu
index 13e1a40..214bce3 100644
--- a/src/backend/cuda/transform.cu
+++ b/src/backend/cuda/transform.cu
@@ -29,6 +29,9 @@ namespace cuda
case AF_INTERP_BILINEAR:
kernel::transform<T, AF_INTERP_BILINEAR>(out, in, transform, inverse);
break;
+ case AF_INTERP_LOWER:
+ kernel::transform<T, AF_INTERP_LOWER> (out, in, transform, inverse);
+ break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
}
@@ -37,11 +40,10 @@ namespace cuda
}
-#define INSTANTIATE(T) \
- template Array<T> transform(const Array<T> &in, const Array<float> &transform, \
- const af::dim4 &odims, const af_interp_type method, \
- const bool inverse); \
-
+#define INSTANTIATE(T) \
+ template Array<T> transform(const Array<T> &in, const Array<float> &transform, \
+ const af::dim4 &odims, const af_interp_type method, \
+ const bool inverse);
INSTANTIATE(float)
INSTANTIATE(double)
diff --git a/src/backend/opencl/kernel/rotate.cl b/src/backend/opencl/kernel/rotate.cl
index 0e1bbe4..882f9a6 100644
--- a/src/backend/opencl/kernel/rotate.cl
+++ b/src/backend/opencl/kernel/rotate.cl
@@ -9,6 +9,7 @@
#define NEAREST transform_n
#define BILINEAR transform_b
+#define LOWER transform_l
typedef struct {
float tmat[6];
diff --git a/src/backend/opencl/kernel/rotate.hpp b/src/backend/opencl/kernel/rotate.hpp
index b24d972..b3efaa4 100644
--- a/src/backend/opencl/kernel/rotate.hpp
+++ b/src/backend/opencl/kernel/rotate.hpp
@@ -80,9 +80,11 @@ namespace opencl
}
switch(method) {
- case AF_INTERP_NEAREST: options << " -D INTERP=NEAREST";
+ case AF_INTERP_NEAREST : options << " -D INTERP=NEAREST";
break;
- case AF_INTERP_BILINEAR: options << " -D INTERP=BILINEAR";
+ case AF_INTERP_BILINEAR: options << " -D INTERP=BILINEAR";
+ break;
+ case AF_INTERP_LOWER : options << " -D INTERP=LOWER";
break;
default:
break;
diff --git a/src/backend/opencl/kernel/transform.cl b/src/backend/opencl/kernel/transform.cl
index 9c7e06f..824f50c 100644
--- a/src/backend/opencl/kernel/transform.cl
+++ b/src/backend/opencl/kernel/transform.cl
@@ -9,6 +9,7 @@
#define NEAREST transform_n
#define BILINEAR transform_b
+#define LOWER transform_l
void calc_affine_inverse(float* txo, __global const float* txi)
{
diff --git a/src/backend/opencl/kernel/transform.hpp b/src/backend/opencl/kernel/transform.hpp
index dfaa18a..3e15211 100644
--- a/src/backend/opencl/kernel/transform.hpp
+++ b/src/backend/opencl/kernel/transform.hpp
@@ -78,9 +78,11 @@ namespace opencl
}
switch(method) {
- case AF_INTERP_NEAREST: options << " -D INTERP=NEAREST";
+ case AF_INTERP_NEAREST : options << " -D INTERP=NEAREST";
break;
- case AF_INTERP_BILINEAR: options << " -D INTERP=BILINEAR";
+ case AF_INTERP_BILINEAR: options << " -D INTERP=BILINEAR";
+ break;
+ case AF_INTERP_LOWER : options << " -D INTERP=LOWER";
break;
default:
break;
diff --git a/src/backend/opencl/kernel/transform_interp.cl b/src/backend/opencl/kernel/transform_interp.cl
index e7db270..ceec286 100644
--- a/src/backend/opencl/kernel/transform_interp.cl
+++ b/src/backend/opencl/kernel/transform_interp.cl
@@ -26,11 +26,11 @@ void transform_n(__global T *d_out, const KParam out, __global const T *d_in, co
{
// Compute input index
const int xidi = round(xido * tmat[0]
- + yido * tmat[1]
- + tmat[2]);
+ + yido * tmat[1]
+ + tmat[2]);
const int yidi = round(xido * tmat[3]
- + yido * tmat[4]
- + tmat[5]);
+ + yido * tmat[4]
+ + tmat[5]);
// Compute memory location of indices
const int loci = yidi * in.strides[1] + xidi;
@@ -99,3 +99,31 @@ void transform_b(__global T *d_out, const KParam out, __global const T *d_in, co
d_out[ooff] = (T)(vo / wt);
}
}
+
+void transform_l(__global T *d_out, const KParam out, __global const T *d_in, const KParam in,
+ const float *tmat, const int xido, const int yido, const int nimages)
+{
+ // Compute input index
+ const int xidi = floor(xido * tmat[0]
+ + yido * tmat[1]
+ + tmat[2]);
+ const int yidi = floor(xido * tmat[3]
+ + yido * tmat[4]
+ + tmat[5]);
+
+ // Compute memory location of indices
+ const int loci = yidi * in.strides[1] + xidi;
+ const int loco = yido * out.strides[1] + xido;
+
+ for(int i = 0; i < nimages; i++) {
+ // Compute memory location of indices
+ int ioff = loci + i * in.strides[2];
+ int ooff = loco + i * out.strides[2];
+
+ T val; set_scalar(val, 0);
+ if (xidi < in.dims[0] && yidi < in.dims[1] && xidi >= 0 && yidi >= 0) val = d_in[ioff];
+
+ d_out[ooff] = val;
+ }
+}
+
diff --git a/src/backend/opencl/rotate.cpp b/src/backend/opencl/rotate.cpp
index 9fca25a..b7888d0 100644
--- a/src/backend/opencl/rotate.cpp
+++ b/src/backend/opencl/rotate.cpp
@@ -26,7 +26,10 @@ namespace opencl
kernel::rotate<T, AF_INTERP_NEAREST> (out, in, theta);
break;
case AF_INTERP_BILINEAR:
- kernel::rotate<T, AF_INTERP_BILINEAR> (out, in, theta);
+ kernel::rotate<T, AF_INTERP_BILINEAR>(out, in, theta);
+ break;
+ case AF_INTERP_LOWER:
+ kernel::rotate<T, AF_INTERP_LOWER> (out, in, theta);
break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
@@ -37,10 +40,9 @@ namespace opencl
}
-#define INSTANTIATE(T) \
- template Array<T> rotate(const Array<T> &in, const float theta, \
- const af::dim4 &odims, const af_interp_type method); \
-
+#define INSTANTIATE(T) \
+ template Array<T> rotate(const Array<T> &in, const float theta, \
+ const af::dim4 &odims, const af_interp_type method);
INSTANTIATE(float)
INSTANTIATE(double)
diff --git a/src/backend/opencl/transform.cpp b/src/backend/opencl/transform.cpp
index 22cc88a..34bfead 100644
--- a/src/backend/opencl/transform.cpp
+++ b/src/backend/opencl/transform.cpp
@@ -33,6 +33,10 @@ namespace opencl
kernel::transform<T, true, AF_INTERP_BILINEAR>
(out, in, transform);
break;
+ case AF_INTERP_LOWER:
+ kernel::transform<T, true, AF_INTERP_LOWER>
+ (out, in, transform);
+ break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
break;
@@ -47,6 +51,10 @@ namespace opencl
kernel::transform<T, false, AF_INTERP_BILINEAR>
(out, in, transform);
break;
+ case AF_INTERP_LOWER:
+ kernel::transform<T, false, AF_INTERP_LOWER>
+ (out, in, transform);
+ break;
default:
AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
break;
@@ -57,11 +65,10 @@ namespace opencl
}
-#define INSTANTIATE(T) \
- template Array<T> transform(const Array<T> &in, const Array<float> &transform, \
- const af::dim4 &odims, const af_interp_type method, \
- const bool inverse); \
-
+#define INSTANTIATE(T) \
+ template Array<T> transform(const Array<T> &in, const Array<float> &transform, \
+ const af::dim4 &odims, const af_interp_type method, \
+ const bool inverse);
INSTANTIATE(float)
INSTANTIATE(double)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list