[arrayfire] 72/408: Add lower interpolation to rotate and transform

Mon Sep 21 19:11:19 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 28d9e71735e176221f797f2401a1be2d4e010479
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Thu Jul 2 12:44:27 2015 -0400

    Add lower interpolation to rotate and transform
---
 src/api/c/rotate.cpp                          |  5 +++-
 src/api/c/transform.cpp                       |  4 ++-
 src/backend/cpu/rotate.cpp                    |  9 +++++-
 src/backend/cpu/transform.cpp                 | 16 +++++++---
 src/backend/cpu/transform_interp.hpp          | 36 +++++++++++++++++++++++
 src/backend/cuda/kernel/rotate.hpp            |  2 ++
 src/backend/cuda/kernel/transform.hpp         |  2 ++
 src/backend/cuda/kernel/transform_interp.hpp  | 42 ++++++++++++++++++++++++---
 src/backend/cuda/rotate.cu                    |  9 ++++--
 src/backend/cuda/transform.cu                 | 12 ++++----
 src/backend/opencl/kernel/rotate.cl           |  1 +
 src/backend/opencl/kernel/rotate.hpp          |  6 ++--
 src/backend/opencl/kernel/transform.cl        |  1 +
 src/backend/opencl/kernel/transform.hpp       |  6 ++--
 src/backend/opencl/kernel/transform_interp.cl | 36 ++++++++++++++++++++---
 src/backend/opencl/rotate.cpp                 | 12 ++++----
 src/backend/opencl/transform.cpp              | 17 +++++++----
 17 files changed, 179 insertions(+), 37 deletions(-)

diff --git a/src/api/c/rotate.cpp b/src/api/c/rotate.cpp
index 13db853..b792239 100644
--- a/src/api/c/rotate.cpp
+++ b/src/api/c/rotate.cpp
@@ -45,7 +45,10 @@ af_err af_rotate(af_array *out, const af_array in, const float theta,
 
         af_dtype itype = info.getType();
 
-        ARG_ASSERT(3, method == AF_INTERP_NEAREST || method == AF_INTERP_BILINEAR);
+        ARG_ASSERT(3, method == AF_INTERP_NEAREST  ||
+                      method == AF_INTERP_BILINEAR ||
+                      method == AF_INTERP_LOWER);
+
         DIM_ASSERT(1, idims.elements() > 0);
 
         af::dim4 odims(odims0, odims1, idims[2], idims[3]);
diff --git a/src/api/c/transform.cpp b/src/api/c/transform.cpp
index 41ea0ac..cc17adc 100644
--- a/src/api/c/transform.cpp
+++ b/src/api/c/transform.cpp
@@ -38,7 +38,9 @@ af_err af_transform(af_array *out, const af_array in, const af_array tf,
         af_dtype itype = i_info.getType();
 
         ARG_ASSERT(2, t_info.getType() == f32);
-        ARG_ASSERT(5, method == AF_INTERP_NEAREST || method == AF_INTERP_BILINEAR);
+        ARG_ASSERT(5, method == AF_INTERP_NEAREST  ||
+                      method == AF_INTERP_BILINEAR ||
+                      method == AF_INTERP_LOWER);
         DIM_ASSERT(2, (tdims[0] == 3 && tdims[1] == 2));
         DIM_ASSERT(1, idims.elements() > 0);
         DIM_ASSERT(1, (idims.ndims() == 2 || idims.ndims() == 3));
diff --git a/src/backend/cpu/rotate.cpp b/src/backend/cpu/rotate.cpp
index b7c4576..2293ee2 100644
--- a/src/backend/cpu/rotate.cpp
+++ b/src/backend/cpu/rotate.cpp
@@ -55,6 +55,9 @@ namespace cpu
             case AF_INTERP_BILINEAR:
                 t_fn = &transform_b;
                 break;
+            case AF_INTERP_LOWER:
+                t_fn = &transform_l;
+                break;
             default:
                 AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                 break;
@@ -85,6 +88,10 @@ namespace cpu
                 rotate_<T, AF_INTERP_BILINEAR>
                        (out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
                 break;
+            case AF_INTERP_LOWER:
+                rotate_<T, AF_INTERP_LOWER>
+                       (out.get(), in.get(), theta, odims, idims, out.strides(), in.strides());
+                break;
             default:
                 AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                 break;
@@ -96,7 +103,7 @@ namespace cpu
 
 #define INSTANTIATE(T)                                                              \
     template Array<T> rotate(const Array<T> &in, const float theta,                 \
-                             const af::dim4 &odims, const af_interp_type method);   \
+                             const af::dim4 &odims, const af_interp_type method);
 
     INSTANTIATE(float)
     INSTANTIATE(double)
diff --git a/src/backend/cpu/transform.cpp b/src/backend/cpu/transform.cpp
index ed9f0ad..d1cf58e 100644
--- a/src/backend/cpu/transform.cpp
+++ b/src/backend/cpu/transform.cpp
@@ -65,6 +65,9 @@ namespace cpu
             case AF_INTERP_BILINEAR:
                 t_fn = &transform_b;
                 break;
+            case AF_INTERP_LOWER:
+                t_fn = &transform_l;
+                break;
             default:
                 AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                 break;
@@ -109,6 +112,11 @@ namespace cpu
                           (out.get(), in.get(), transform.get(), odims, idims,
                            out.strides(), in.strides(), transform.strides(), inverse);
                 break;
+            case AF_INTERP_LOWER:
+                transform_<T, AF_INTERP_LOWER>
+                          (out.get(), in.get(), transform.get(), odims, idims,
+                           out.strides(), in.strides(), transform.strides(), inverse);
+                break;
             default:
                 AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                 break;
@@ -118,10 +126,10 @@ namespace cpu
     }
 
 
-#define INSTANTIATE(T)                                                                          \
-    template Array<T> transform(const Array<T> &in, const Array<float> &transform,             \
-                                const af::dim4 &odims, const af_interp_type method, \
-                                const bool inverse);                    \
+#define INSTANTIATE(T)                                                                  \
+    template Array<T> transform(const Array<T> &in, const Array<float> &transform,      \
+                                const af::dim4 &odims, const af_interp_type method,     \
+                                const bool inverse);
 
 
     INSTANTIATE(float)
diff --git a/src/backend/cpu/transform_interp.hpp b/src/backend/cpu/transform_interp.hpp
index 23df154..5ad4750 100644
--- a/src/backend/cpu/transform_interp.hpp
+++ b/src/backend/cpu/transform_interp.hpp
@@ -121,4 +121,40 @@ namespace cpu
             }
         }
     }
+
+    template<typename T>
+    void transform_l(T *out, const T *in, const float *tmat, const af::dim4 &idims,
+                      const af::dim4 &ostrides, const af::dim4 &istrides,
+                      const dim_t nimages, const dim_t o_offset,
+                      const dim_t xx, const dim_t yy)
+    {
+        // Compute output index
+        const dim_t xi = floor(xx * tmat[0]
+                             + yy * tmat[1]
+                                  + tmat[2]);
+        const dim_t yi = floor(xx * tmat[3]
+                             + yy * tmat[4]
+                                  + tmat[5]);
+
+        // Compute memory location of indices
+        dim_t loci = (yi * istrides[1] + xi);
+        dim_t loco = (yy * ostrides[1] + xx);
+
+        T val = scalar<T>(0.0f);
+        // Copy to output
+        for(int batch = 0; batch < (int)idims[3]; batch++) {
+            dim_t i__ = batch * istrides[3];
+            dim_t o__ = batch * ostrides[3];
+            for(int i_idx = 0; i_idx < (int)nimages; i_idx++) {
+                dim_t i_off = i_idx * istrides[2] + i__;
+                dim_t o_off = o_offset + i_idx * ostrides[2] + o__;
+
+                if (xi < idims[0] && yi < idims[1] && xi >= 0 && yi >= 0)
+                    val = in[i_off + loci];
+
+                out[o_off + loco] = val;
+            }
+        }
+    }
+
 }
diff --git a/src/backend/cuda/kernel/rotate.hpp b/src/backend/cuda/kernel/rotate.hpp
index d84a454..bac4cd0 100644
--- a/src/backend/cuda/kernel/rotate.hpp
+++ b/src/backend/cuda/kernel/rotate.hpp
@@ -63,6 +63,8 @@ namespace cuda
                     transform_n(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
                 case AF_INTERP_BILINEAR:
                     transform_b(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
+                case AF_INTERP_LOWER:
+                    transform_l(optr, out, iptr, in, t.tmat, xx, yy, limages); break;
                 default: break;
             }
         }
diff --git a/src/backend/cuda/kernel/transform.hpp b/src/backend/cuda/kernel/transform.hpp
index 88af1a2..4fdfa6c 100644
--- a/src/backend/cuda/kernel/transform.hpp
+++ b/src/backend/cuda/kernel/transform.hpp
@@ -97,6 +97,8 @@ namespace cuda
                     transform_n(optr, out, iptr, in, tmat, xido, yido, limages); break;
                 case AF_INTERP_BILINEAR:
                     transform_b(optr, out, iptr, in, tmat, xido, yido, limages); break;
+                case AF_INTERP_LOWER:
+                    transform_l(optr, out, iptr, in, tmat, xido, yido, limages); break;
                 default: break;
             }
         }
diff --git a/src/backend/cuda/kernel/transform_interp.hpp b/src/backend/cuda/kernel/transform_interp.hpp
index 5448bff..5a88fc4 100644
--- a/src/backend/cuda/kernel/transform_interp.hpp
+++ b/src/backend/cuda/kernel/transform_interp.hpp
@@ -46,11 +46,11 @@ namespace cuda
         {
             // Compute input index
             int xidi = round(xido * tmat[0]
-                             + yido * tmat[1]
-                             + tmat[2]);
+                           + yido * tmat[1]
+                                  + tmat[2]);
             int yidi = round(xido * tmat[3]
-                             + yido * tmat[4]
-                             + tmat[5]);
+                           + yido * tmat[4]
+                                  + tmat[5]);
 
             // Makes scale give same output as resize
             // But fails rotate tests
@@ -129,5 +129,39 @@ namespace cuda
                 optr[ooff] = (vo / wt);
             }
         }
+
+        template<typename T>
+        __device__
+        void transform_l(T *optr, Param<T> out, const T *iptr, CParam<T> in, const float *tmat,
+                         const int xido, const int yido, const int nimages)
+        {
+            // Compute input index
+            int xidi = floor(xido * tmat[0]
+                           + yido * tmat[1]
+                                  + tmat[2]);
+            int yidi = floor(xido * tmat[3]
+                           + yido * tmat[4]
+                                  + tmat[5]);
+
+            // Makes scale give same output as resize
+            // But fails rotate tests
+            //if (xidi >= in.dims[0]) { xidi = in.dims[0] - 1; }
+            //if (yidi >= in.dims[1]) { yidi = in.dims[1] - 1; }
+
+            const int loci = yidi * in.strides[1]  + xidi;
+            const int loco = yido * out.strides[1] + xido;
+
+            for(int i = 0; i < nimages; i++) {
+                // Compute memory location of indices
+                int ioff = loci + i * in.strides[2];
+                int ooff = loco + i * out.strides[2];
+
+                // Copy to output
+                T val = scalar<T>(0);
+                if (xidi < in.dims[0] && yidi < in.dims[1] && xidi >= 0 && yidi >= 0) val = iptr[ioff];
+
+                optr[ooff] = val;
+            }
+        }
     }
 }
diff --git a/src/backend/cuda/rotate.cu b/src/backend/cuda/rotate.cu
index d5efadc..24e41d7 100644
--- a/src/backend/cuda/rotate.cu
+++ b/src/backend/cuda/rotate.cu
@@ -27,6 +27,9 @@ namespace cuda
             case AF_INTERP_BILINEAR:
                 kernel::rotate<T, AF_INTERP_BILINEAR>(out, in, theta);
                 break;
+            case AF_INTERP_LOWER:
+                kernel::rotate<T, AF_INTERP_LOWER>   (out, in, theta);
+                break;
             default:
                 AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
         }
@@ -35,9 +38,9 @@ namespace cuda
     }
 
 
-#define INSTANTIATE(T)                                                                          \
-    template Array<T> rotate(const Array<T> &in, const float theta,                            \
-                             const af::dim4 &odims, const af_interp_type method); \
+#define INSTANTIATE(T)                                                              \
+    template Array<T> rotate(const Array<T> &in, const float theta,                 \
+                             const af::dim4 &odims, const af_interp_type method);
 
 
     INSTANTIATE(float)
diff --git a/src/backend/cuda/transform.cu b/src/backend/cuda/transform.cu
index 13e1a40..214bce3 100644
--- a/src/backend/cuda/transform.cu
+++ b/src/backend/cuda/transform.cu
@@ -29,6 +29,9 @@ namespace cuda
             case AF_INTERP_BILINEAR:
                 kernel::transform<T, AF_INTERP_BILINEAR>(out, in, transform, inverse);
                 break;
+            case AF_INTERP_LOWER:
+                kernel::transform<T, AF_INTERP_LOWER>   (out, in, transform, inverse);
+                break;
             default:
                 AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
         }
@@ -37,11 +40,10 @@ namespace cuda
     }
 
 
-#define INSTANTIATE(T)                                                                          \
-    template Array<T> transform(const Array<T> &in, const Array<float> &transform,             \
-                                 const af::dim4 &odims, const af_interp_type method,            \
-                                 const bool inverse);                                           \
-
+#define INSTANTIATE(T)                                                                      \
+    template Array<T> transform(const Array<T> &in, const Array<float> &transform,          \
+                                const af::dim4 &odims, const af_interp_type method,         \
+                                const bool inverse);
 
     INSTANTIATE(float)
     INSTANTIATE(double)
diff --git a/src/backend/opencl/kernel/rotate.cl b/src/backend/opencl/kernel/rotate.cl
index 0e1bbe4..882f9a6 100644
--- a/src/backend/opencl/kernel/rotate.cl
+++ b/src/backend/opencl/kernel/rotate.cl
@@ -9,6 +9,7 @@
 
 #define NEAREST transform_n
 #define BILINEAR transform_b
+#define LOWER transform_l
 
 typedef struct {
     float tmat[6];
diff --git a/src/backend/opencl/kernel/rotate.hpp b/src/backend/opencl/kernel/rotate.hpp
index b24d972..b3efaa4 100644
--- a/src/backend/opencl/kernel/rotate.hpp
+++ b/src/backend/opencl/kernel/rotate.hpp
@@ -80,9 +80,11 @@ namespace opencl
                     }
 
                     switch(method) {
-                        case AF_INTERP_NEAREST: options << " -D INTERP=NEAREST";
+                        case AF_INTERP_NEAREST : options << " -D INTERP=NEAREST";
                             break;
-                        case AF_INTERP_BILINEAR:  options << " -D INTERP=BILINEAR";
+                        case AF_INTERP_BILINEAR: options << " -D INTERP=BILINEAR";
+                            break;
+                        case AF_INTERP_LOWER   : options << " -D INTERP=LOWER";
                             break;
                         default:
                             break;
diff --git a/src/backend/opencl/kernel/transform.cl b/src/backend/opencl/kernel/transform.cl
index 9c7e06f..824f50c 100644
--- a/src/backend/opencl/kernel/transform.cl
+++ b/src/backend/opencl/kernel/transform.cl
@@ -9,6 +9,7 @@
 
 #define NEAREST transform_n
 #define BILINEAR transform_b
+#define LOWER transform_l
 
 void calc_affine_inverse(float* txo, __global const float* txi)
 {
diff --git a/src/backend/opencl/kernel/transform.hpp b/src/backend/opencl/kernel/transform.hpp
index dfaa18a..3e15211 100644
--- a/src/backend/opencl/kernel/transform.hpp
+++ b/src/backend/opencl/kernel/transform.hpp
@@ -78,9 +78,11 @@ namespace opencl
                     }
 
                     switch(method) {
-                        case AF_INTERP_NEAREST: options << " -D INTERP=NEAREST";
+                        case AF_INTERP_NEAREST : options << " -D INTERP=NEAREST";
                             break;
-                        case AF_INTERP_BILINEAR:  options << " -D INTERP=BILINEAR";
+                        case AF_INTERP_BILINEAR: options << " -D INTERP=BILINEAR";
+                            break;
+                        case AF_INTERP_LOWER   : options << " -D INTERP=LOWER";
                             break;
                         default:
                             break;
diff --git a/src/backend/opencl/kernel/transform_interp.cl b/src/backend/opencl/kernel/transform_interp.cl
index e7db270..ceec286 100644
--- a/src/backend/opencl/kernel/transform_interp.cl
+++ b/src/backend/opencl/kernel/transform_interp.cl
@@ -26,11 +26,11 @@ void transform_n(__global T *d_out, const KParam out, __global const T *d_in, co
 {
     // Compute input index
     const int xidi = round(xido * tmat[0]
-                              + yido * tmat[1]
-                                     + tmat[2]);
+                         + yido * tmat[1]
+                                + tmat[2]);
     const int yidi = round(xido * tmat[3]
-                              + yido * tmat[4]
-                                     + tmat[5]);
+                         + yido * tmat[4]
+                                + tmat[5]);
 
     // Compute memory location of indices
     const int loci = yidi * in.strides[1]  + xidi;
@@ -99,3 +99,31 @@ void transform_b(__global T *d_out, const KParam out, __global const T *d_in, co
         d_out[ooff] = (T)(vo / wt);
     }
 }
+
+void transform_l(__global T *d_out, const KParam out, __global const T *d_in, const KParam in,
+                 const float *tmat, const int xido, const int yido, const int nimages)
+{
+    // Compute input index
+    const int xidi = floor(xido * tmat[0]
+                         + yido * tmat[1]
+                                + tmat[2]);
+    const int yidi = floor(xido * tmat[3]
+                         + yido * tmat[4]
+                                + tmat[5]);
+
+    // Compute memory location of indices
+    const int loci = yidi * in.strides[1]  + xidi;
+    const int loco = yido * out.strides[1] + xido;
+
+    for(int i = 0; i < nimages; i++) {
+        // Compute memory location of indices
+        int ioff = loci + i * in.strides[2];
+        int ooff = loco + i * out.strides[2];
+
+        T val; set_scalar(val, 0);
+        if (xidi < in.dims[0] && yidi < in.dims[1] && xidi >= 0 && yidi >= 0) val = d_in[ioff];
+
+        d_out[ooff] = val;
+    }
+}
+
diff --git a/src/backend/opencl/rotate.cpp b/src/backend/opencl/rotate.cpp
index 9fca25a..b7888d0 100644
--- a/src/backend/opencl/rotate.cpp
+++ b/src/backend/opencl/rotate.cpp
@@ -26,7 +26,10 @@ namespace opencl
                 kernel::rotate<T, AF_INTERP_NEAREST> (out, in, theta);
                 break;
             case AF_INTERP_BILINEAR:
-                kernel::rotate<T, AF_INTERP_BILINEAR> (out, in, theta);
+                kernel::rotate<T, AF_INTERP_BILINEAR>(out, in, theta);
+                break;
+            case AF_INTERP_LOWER:
+                kernel::rotate<T, AF_INTERP_LOWER>   (out, in, theta);
                 break;
             default:
                 AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
@@ -37,10 +40,9 @@ namespace opencl
     }
 
 
-#define INSTANTIATE(T)                                                  \
-    template Array<T> rotate(const Array<T> &in, const float theta,     \
-                             const af::dim4 &odims, const af_interp_type method); \
-
+#define INSTANTIATE(T)                                                              \
+    template Array<T> rotate(const Array<T> &in, const float theta,                 \
+                             const af::dim4 &odims, const af_interp_type method);
 
     INSTANTIATE(float)
     INSTANTIATE(double)
diff --git a/src/backend/opencl/transform.cpp b/src/backend/opencl/transform.cpp
index 22cc88a..34bfead 100644
--- a/src/backend/opencl/transform.cpp
+++ b/src/backend/opencl/transform.cpp
@@ -33,6 +33,10 @@ namespace opencl
                     kernel::transform<T, true, AF_INTERP_BILINEAR>
                                      (out, in, transform);
                     break;
+                case AF_INTERP_LOWER:
+                    kernel::transform<T, true, AF_INTERP_LOWER>
+                                     (out, in, transform);
+                    break;
                 default:
                     AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                     break;
@@ -47,6 +51,10 @@ namespace opencl
                     kernel::transform<T, false, AF_INTERP_BILINEAR>
                                      (out, in, transform);
                     break;
+                case AF_INTERP_LOWER:
+                    kernel::transform<T, false, AF_INTERP_LOWER>
+                                     (out, in, transform);
+                    break;
                 default:
                     AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
                     break;
@@ -57,11 +65,10 @@ namespace opencl
     }
 
 
-#define INSTANTIATE(T)                                                  \
-    template Array<T> transform(const Array<T> &in, const Array<float> &transform, \
-                                const af::dim4 &odims, const af_interp_type method, \
-                                const bool inverse);                    \
-
+#define INSTANTIATE(T)                                                                  \
+    template Array<T> transform(const Array<T> &in, const Array<float> &transform,      \
+                                const af::dim4 &odims, const af_interp_type method,     \
+                                const bool inverse);
 
     INSTANTIATE(float)
     INSTANTIATE(double)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git