[arrayfire] 70/248: Added short, ushort support for CUDA backend

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Nov 17 15:54:03 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.

commit 7f3ff109862f0ce8171013306d7f904586eb1e53
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Tue Sep 22 16:01:38 2015 -0400

    Added short, ushort support for CUDA backend
    
    * Need to complete OpenCL backend
---
 src/backend/cuda/Array.cpp                         |  2 ++
 src/backend/cuda/JIT/arith.cu                      |  2 ++
 src/backend/cuda/JIT/cast.cu                       | 20 +++++++----
 src/backend/cuda/JIT/exp.cu                        |  4 +++
 src/backend/cuda/JIT/hyper.cu                      |  2 ++
 src/backend/cuda/JIT/logic.cu                      |  8 +++++
 src/backend/cuda/JIT/numeric.cu                    | 42 +++++++++++++++-------
 src/backend/cuda/JIT/trig.cu                       |  4 +++
 src/backend/cuda/JIT/types.h                       |  1 +
 src/backend/cuda/all.cu                            |  2 ++
 src/backend/cuda/any.cu                            |  2 ++
 src/backend/cuda/assign.cu                         |  8 +++--
 src/backend/cuda/bilateral.cu                      |  2 ++
 src/backend/cuda/convolve.cpp                      |  2 ++
 src/backend/cuda/copy.cu                           | 38 +++++++++++++-------
 src/backend/cuda/count.cu                          |  2 ++
 src/backend/cuda/diagonal.cu                       |  2 ++
 src/backend/cuda/diff.cu                           |  2 ++
 src/backend/cuda/dilate.cu                         |  2 ++
 src/backend/cuda/dilate3d.cu                       |  2 ++
 src/backend/cuda/erode.cu                          |  2 ++
 src/backend/cuda/erode3d.cu                        |  2 ++
 src/backend/cuda/fast.cu                           |  2 ++
 src/backend/cuda/fast_pyramid.cu                   |  2 ++
 src/backend/cuda/fftconvolve.cu                    |  2 ++
 src/backend/cuda/histogram.cu                      |  2 ++
 src/backend/cuda/identity.cu                       |  2 ++
 src/backend/cuda/index.cu                          |  6 ++--
 src/backend/cuda/iota.cu                           |  2 ++
 src/backend/cuda/ireduce.cu                        |  4 +++
 src/backend/cuda/jit.cpp                           |  2 ++
 src/backend/cuda/join.cu                           | 22 +++++++-----
 src/backend/cuda/kernel/convolve.cu                | 14 ++++----
 src/backend/cuda/kernel/convolve_separable.cu      | 10 +++---
 src/backend/cuda/kernel/fast.hpp                   | 20 +++++++++++
 src/backend/cuda/kernel/memcopy.hpp                |  2 ++
 src/backend/cuda/kernel/nearest_neighbour.hpp      |  9 +++++
 src/backend/cuda/kernel/shared.hpp                 |  2 ++
 src/backend/cuda/lookup.cu                         |  4 +++
 src/backend/cuda/match_template.cu                 |  2 ++
 src/backend/cuda/math.hpp                          |  7 ++++
 src/backend/cuda/max.cu                            |  2 ++
 src/backend/cuda/meanshift.cu                      |  2 ++
 src/backend/cuda/medfilt.cu                        |  2 ++
 src/backend/cuda/memory.cpp                        |  2 ++
 src/backend/cuda/min.cu                            |  2 ++
 src/backend/cuda/nearest_neighbour.cu              |  2 ++
 src/backend/cuda/product.cu                        |  4 ++-
 src/backend/cuda/random.cu                         |  2 ++
 src/backend/cuda/range.cu                          |  2 ++
 src/backend/cuda/regions.cu                        |  2 ++
 src/backend/cuda/reorder.cu                        |  2 ++
 src/backend/cuda/resize.cu                         |  2 ++
 src/backend/cuda/rotate.cu                         |  2 ++
 src/backend/cuda/scan.cu                           |  2 ++
 src/backend/cuda/select.cu                         |  2 ++
 src/backend/cuda/set.cu                            |  2 ++
 src/backend/cuda/shift.cu                          |  2 ++
 src/backend/cuda/sobel.cu                          |  2 ++
 src/backend/cuda/sort.cu                           |  2 ++
 .../cuda/{dilate.cu => sort_by_key/ascd_s16.cu}    | 11 ++----
 .../cuda/{dilate.cu => sort_by_key/ascd_u16.cu}    | 11 ++----
 .../cuda/{dilate.cu => sort_by_key/desc_s16.cu}    | 11 ++----
 .../cuda/{dilate.cu => sort_by_key/desc_u16.cu}    | 11 ++----
 src/backend/cuda/sort_by_key_impl.hpp              |  2 ++
 src/backend/cuda/sort_index.cu                     |  2 ++
 src/backend/cuda/sum.cu                            |  2 ++
 src/backend/cuda/susan.cu                          |  2 ++
 src/backend/cuda/tile.cu                           |  2 ++
 src/backend/cuda/transform.cu                      |  2 ++
 src/backend/cuda/transpose.cu                      |  2 ++
 src/backend/cuda/transpose_inplace.cu              |  2 ++
 src/backend/cuda/triangle.cu                       |  2 ++
 src/backend/cuda/types.cpp                         |  8 +++++
 src/backend/cuda/types.hpp                         |  3 +-
 src/backend/cuda/unwrap.cu                         |  2 ++
 src/backend/cuda/where.cu                          |  2 ++
 src/backend/cuda/wrap.cu                           |  2 ++
 78 files changed, 295 insertions(+), 93 deletions(-)

diff --git a/src/backend/cuda/Array.cpp b/src/backend/cuda/Array.cpp
index ed86a8e..b7d7b3c 100644
--- a/src/backend/cuda/Array.cpp
+++ b/src/backend/cuda/Array.cpp
@@ -291,5 +291,7 @@ namespace cuda
     INSTANTIATE(char)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/JIT/arith.cu b/src/backend/cuda/JIT/arith.cu
index 01e5f41..adfa9e9 100644
--- a/src/backend/cuda/JIT/arith.cu
+++ b/src/backend/cuda/JIT/arith.cu
@@ -25,6 +25,8 @@
     ARITH_BASIC(fn, op, uchar)                          \
     ARITH_BASIC(fn, op, intl)                           \
     ARITH_BASIC(fn, op, uintl)                          \
+    ARITH_BASIC(fn, op, short)                          \
+    ARITH_BASIC(fn, op, ushort)                         \
                                                         \
     __device__ cfloat ___##fn(cfloat a, cfloat b)       \
     {                                                   \
diff --git a/src/backend/cuda/JIT/cast.cu b/src/backend/cuda/JIT/cast.cu
index db41c52..8905955 100644
--- a/src/backend/cuda/JIT/cast.cu
+++ b/src/backend/cuda/JIT/cast.cu
@@ -22,14 +22,18 @@
     CAST_BASIC(___mk##X, T, uchar)              \
     CAST_BASIC(___mk##X, T, intl)               \
     CAST_BASIC(___mk##X, T, uintl)              \
+    CAST_BASIC(___mk##X, T, short)              \
+    CAST_BASIC(___mk##X, T, ushort)             \
 
-CAST(float, S)
+CAST(float , S)
 CAST(double, D)
-CAST(int, I)
-CAST(intl, X)
-CAST(uint, U)
-CAST(uchar, V)
-CAST(uintl, Y)
+CAST(int   , I)
+CAST(intl  , X)
+CAST(short , P)
+CAST(uint  , U)
+CAST(uchar , V)
+CAST(uintl , Y)
+CAST(ushort, Q)
 
 CAST_BASIC_BOOL(___mkJ, char, float)
 CAST_BASIC_BOOL(___mkJ, char, double)
@@ -39,6 +43,8 @@ CAST_BASIC_BOOL(___mkJ, char, char)
 CAST_BASIC_BOOL(___mkJ, char, uchar)
 CAST_BASIC_BOOL(___mkJ, char, intl)
 CAST_BASIC_BOOL(___mkJ, char, uintl)
+CAST_BASIC_BOOL(___mkJ, char, short)
+CAST_BASIC_BOOL(___mkJ, char, ushort)
 
 #define CPLX_BASIC(FN, To, Tr, Ti)              \
     __device__ To FN(Ti in)                     \
@@ -56,6 +62,8 @@ CAST_BASIC_BOOL(___mkJ, char, uintl)
     CPLX_BASIC(___mk##X, T, Tr, uchar)          \
     CPLX_BASIC(___mk##X, T, Tr, uintl)          \
     CPLX_BASIC(___mk##X, T, Tr, intl)           \
+    CPLX_BASIC(___mk##X, T, Tr, ushort)         \
+    CPLX_BASIC(___mk##X, T, Tr, short)          \
 
 CPLX_CAST(cfloat, float, C)
 CPLX_CAST(cdouble, double, Z)
diff --git a/src/backend/cuda/JIT/exp.cu b/src/backend/cuda/JIT/exp.cu
index 23a3300..3f110b4 100644
--- a/src/backend/cuda/JIT/exp.cu
+++ b/src/backend/cuda/JIT/exp.cu
@@ -34,6 +34,8 @@ __device__ float sigmoidf(float in)
     MATH_BASIC(fn, uchar)                       \
     MATH_BASIC(fn, uintl)                       \
     MATH_BASIC(fn, intl)                        \
+    MATH_BASIC(fn, ushort)                      \
+    MATH_BASIC(fn, short)                       \
     __device__ double ___##fn(double a)         \
     {                                           \
         return fn(a);                           \
@@ -68,6 +70,8 @@ MATH(cbrt)
     MATH2_BASIC(fn, uchar)                          \
     MATH2_BASIC(fn, uintl)                          \
     MATH2_BASIC(fn, intl)                           \
+    MATH2_BASIC(fn, ushort)                         \
+    MATH2_BASIC(fn, short)                          \
     __device__ double ___##fn(double a, double b)   \
     {                                               \
         return fn(a, b);                            \
diff --git a/src/backend/cuda/JIT/hyper.cu b/src/backend/cuda/JIT/hyper.cu
index 00ea2da..6673fb1 100644
--- a/src/backend/cuda/JIT/hyper.cu
+++ b/src/backend/cuda/JIT/hyper.cu
@@ -24,6 +24,8 @@
     MATH_BASIC(fn, uchar)                       \
     MATH_BASIC(fn, uintl)                       \
     MATH_BASIC(fn, intl)                        \
+    MATH_BASIC(fn, ushort)                      \
+    MATH_BASIC(fn, short)                       \
     __device__ double ___##fn(double a)         \
     {                                           \
         return fn(a);                           \
diff --git a/src/backend/cuda/JIT/logic.cu b/src/backend/cuda/JIT/logic.cu
index 883f3db..6072c3c 100644
--- a/src/backend/cuda/JIT/logic.cu
+++ b/src/backend/cuda/JIT/logic.cu
@@ -25,6 +25,8 @@
     LOGIC_BASIC(fn, op, uchar)                      \
     LOGIC_BASIC(fn, op, intl)                       \
     LOGIC_BASIC(fn, op, uintl)                      \
+    LOGIC_BASIC(fn, op, short)                      \
+    LOGIC_BASIC(fn, op, ushort)                     \
                                                     \
     __device__ bool ___##fn(cfloat a, cfloat b)     \
     {                                               \
@@ -52,6 +54,8 @@ LOGIC(or, ||)
     LOGIC_BASIC(fn, op, uchar)                      \
     LOGIC_BASIC(fn, op, intl)                       \
     LOGIC_BASIC(fn, op, uintl)                      \
+    LOGIC_BASIC(fn, op, short)                      \
+    LOGIC_BASIC(fn, op, ushort)                     \
                                                     \
     __device__ bool ___##fn(cfloat a, cfloat b)     \
     {                                               \
@@ -77,6 +81,8 @@ NOT_FN(char)
 NOT_FN(uchar)
 NOT_FN(intl)
 NOT_FN(uintl)
+NOT_FN(short)
+NOT_FN(ushort)
 
 #define BIT_FN(T)                                                   \
     __device__ T ___bitand   (T lhs, T rhs) { return lhs &  rhs; }  \
@@ -91,6 +97,8 @@ BIT_FN(intl)
 BIT_FN(uchar)
 BIT_FN(uint)
 BIT_FN(uintl)
+BIT_FN(short)
+BIT_FN(ushort)
 
 __device__ char ___isNaN(float in) { return isnan(in); }
 __device__ char ___isINF(float in) { return isinf(in); }
diff --git a/src/backend/cuda/JIT/numeric.cu b/src/backend/cuda/JIT/numeric.cu
index 158cc24..8253db6 100644
--- a/src/backend/cuda/JIT/numeric.cu
+++ b/src/backend/cuda/JIT/numeric.cu
@@ -39,6 +39,8 @@ MATH_NOOP(floor, char)
 MATH_NOOP(floor, uchar)
 MATH_NOOP(floor, uintl)
 MATH_NOOP(floor, intl)
+MATH_NOOP(floor, ushort)
+MATH_NOOP(floor, short)
 
 MATH_BASIC(ceil, float)
 MATH_BASIC(ceil, double)
@@ -48,6 +50,8 @@ MATH_NOOP(ceil, char)
 MATH_NOOP(ceil, uchar)
 MATH_NOOP(ceil, uintl)
 MATH_NOOP(ceil, intl)
+MATH_NOOP(ceil, ushort)
+MATH_NOOP(ceil, short)
 
 MATH_BASIC(round, float)
 MATH_BASIC(round, double)
@@ -57,6 +61,8 @@ MATH_NOOP(round, char)
 MATH_NOOP(round, uchar)
 MATH_NOOP(round, uintl)
 MATH_NOOP(round, intl)
+MATH_NOOP(round, ushort)
+MATH_NOOP(round, short)
 
 MATH_BASIC(trunc, float)
 MATH_BASIC(trunc, double)
@@ -66,6 +72,8 @@ MATH_NOOP(trunc, char)
 MATH_NOOP(trunc, uchar)
 MATH_NOOP(trunc, uintl)
 MATH_NOOP(trunc, intl)
+MATH_NOOP(trunc, ushort)
+MATH_NOOP(trunc, short)
 
 MATH_BASIC(sign, float)
 MATH_BASIC(sign, double)
@@ -75,6 +83,8 @@ MATH_NOOP(sign, char)
 MATH_NOOP(sign, uchar)
 MATH_NOOP(sign, uintl)
 MATH_NOOP(sign, intl)
+MATH_NOOP(sign, ushort)
+MATH_NOOP(sign, short)
 
 MATH_BASIC(abs, float)
 MATH_BASIC(abs, double)
@@ -84,24 +94,30 @@ MATH_NOOP(abs, uint)
 MATH_NOOP(abs, uchar)
 MATH_NOOP(abs, uintl)
 MATH_NOOP(abs, intl)
+MATH_NOOP(abs, ushort)
+MATH_NOOP(abs, short)
 
 MATH_BASIC(tgamma, float)
 MATH_BASIC(tgamma, double)
-MATH_CAST(tgamma, int, float)
-MATH_CAST(tgamma, uint, float)
-MATH_CAST(tgamma, char, float)
-MATH_CAST(tgamma, uchar, float)
-MATH_CAST(tgamma, uintl, float)
-MATH_CAST(tgamma, intl, float)
+MATH_CAST(tgamma, int   , float)
+MATH_CAST(tgamma, uint  , float)
+MATH_CAST(tgamma, char  , float)
+MATH_CAST(tgamma, uchar , float)
+MATH_CAST(tgamma, uintl , float)
+MATH_CAST(tgamma, intl  , float)
+MATH_CAST(tgamma, ushort, float)
+MATH_CAST(tgamma, short , float)
 
 MATH_BASIC(lgamma, float)
 MATH_BASIC(lgamma, double)
-MATH_CAST(lgamma, int, float)
-MATH_CAST(lgamma, uint, float)
-MATH_CAST(lgamma, char, float)
-MATH_CAST(lgamma, uchar, float)
-MATH_CAST(lgamma, uintl, float)
-MATH_CAST(lgamma, intl, float)
+MATH_CAST(lgamma, int   , float)
+MATH_CAST(lgamma, uint  , float)
+MATH_CAST(lgamma, char  , float)
+MATH_CAST(lgamma, uchar , float)
+MATH_CAST(lgamma, uintl , float)
+MATH_CAST(lgamma, intl  , float)
+MATH_CAST(lgamma, ushort, float)
+MATH_CAST(lgamma, short , float)
 
 __device__ float ___abs(cfloat a) { return cuCabsf(a); }
 __device__ double ___abs(cdouble a) { return cuCabs(a); }
@@ -128,6 +144,8 @@ __device__ double mod(double a, double b) { return fmod(a, b); }
     MATH2_BASIC(fn, uintl)                          \
     MATH2_BASIC(fn, char)                           \
     MATH2_BASIC(fn, uchar)                          \
+    MATH2_BASIC(fn, short)                          \
+    MATH2_BASIC(fn, ushort)                         \
     __device__ double ___##fn(double a, double b)   \
     {                                               \
         return fn(a, b);                            \
diff --git a/src/backend/cuda/JIT/trig.cu b/src/backend/cuda/JIT/trig.cu
index 28f098e..372bd4d 100644
--- a/src/backend/cuda/JIT/trig.cu
+++ b/src/backend/cuda/JIT/trig.cu
@@ -24,6 +24,8 @@
     MATH_BASIC(fn, uchar)                       \
     MATH_BASIC(fn, uintl)                       \
     MATH_BASIC(fn, intl)                        \
+    MATH_BASIC(fn, ushort)                      \
+    MATH_BASIC(fn, short)                       \
     __device__ double ___##fn(double a)         \
     {                                           \
         return fn(a);                           \
@@ -51,6 +53,8 @@ ATAN2(char)
 ATAN2(uchar)
 ATAN2(uintl)
 ATAN2(intl)
+ATAN2(ushort)
+ATAN2(short)
 
 __device__ double ___atan2(double x, double y)
 {
diff --git a/src/backend/cuda/JIT/types.h b/src/backend/cuda/JIT/types.h
index 80314bc..4a97ef3 100644
--- a/src/backend/cuda/JIT/types.h
+++ b/src/backend/cuda/JIT/types.h
@@ -11,6 +11,7 @@
 #include <math_functions.h>
 typedef unsigned char uchar;
 typedef unsigned int uint;
+typedef unsigned short ushort;
 typedef cuFloatComplex cfloat;
 typedef cuDoubleComplex cdouble;
 typedef long long intl;
diff --git a/src/backend/cuda/all.cu b/src/backend/cuda/all.cu
index bfc070a..b70f98a 100644
--- a/src/backend/cuda/all.cu
+++ b/src/backend/cuda/all.cu
@@ -22,4 +22,6 @@ namespace cuda
     INSTANTIATE(af_and_t, uintl  , char)
     INSTANTIATE(af_and_t, char   , char)
     INSTANTIATE(af_and_t, uchar  , char)
+    INSTANTIATE(af_and_t, short  , char)
+    INSTANTIATE(af_and_t, ushort , char)
 }
diff --git a/src/backend/cuda/any.cu b/src/backend/cuda/any.cu
index 836970e..aa13fbb 100644
--- a/src/backend/cuda/any.cu
+++ b/src/backend/cuda/any.cu
@@ -22,4 +22,6 @@ namespace cuda
     INSTANTIATE(af_or_t, uintl  , char)
     INSTANTIATE(af_or_t, char   , char)
     INSTANTIATE(af_or_t, uchar  , char)
+    INSTANTIATE(af_or_t, short  , char)
+    INSTANTIATE(af_or_t, ushort , char)
 }
diff --git a/src/backend/cuda/assign.cu b/src/backend/cuda/assign.cu
index 7bea851..7d00b15 100644
--- a/src/backend/cuda/assign.cu
+++ b/src/backend/cuda/assign.cu
@@ -69,11 +69,13 @@ INSTANTIATE(cdouble)
 INSTANTIATE(double )
 INSTANTIATE(cfloat )
 INSTANTIATE(float  )
-INSTANTIATE(uintl  )
+INSTANTIATE(int    )
 INSTANTIATE(uint   )
 INSTANTIATE(intl   )
-INSTANTIATE(int    )
-INSTANTIATE(uchar  )
+INSTANTIATE(uintl  )
 INSTANTIATE(char   )
+INSTANTIATE(uchar  )
+INSTANTIATE(short  )
+INSTANTIATE(ushort )
 
 }
diff --git a/src/backend/cuda/bilateral.cu b/src/backend/cuda/bilateral.cu
index 4c1d7fc..bdb19fd 100644
--- a/src/backend/cuda/bilateral.cu
+++ b/src/backend/cuda/bilateral.cu
@@ -37,5 +37,7 @@ INSTANTIATE(char  ,  float)
 INSTANTIATE(int   ,  float)
 INSTANTIATE(uint  ,  float)
 INSTANTIATE(uchar ,  float)
+INSTANTIATE(short ,  float)
+INSTANTIATE(ushort,  float)
 
 }
diff --git a/src/backend/cuda/convolve.cpp b/src/backend/cuda/convolve.cpp
index 9f14e6a..23f470f 100644
--- a/src/backend/cuda/convolve.cpp
+++ b/src/backend/cuda/convolve.cpp
@@ -96,5 +96,7 @@ INSTANTIATE(uint   ,   float)
 INSTANTIATE(int    ,   float)
 INSTANTIATE(uchar  ,   float)
 INSTANTIATE(char   ,   float)
+INSTANTIATE(ushort ,   float)
+INSTANTIATE(short  ,   float)
 
 }
diff --git a/src/backend/cuda/copy.cu b/src/backend/cuda/copy.cu
index 1f23804..90f9970 100644
--- a/src/backend/cuda/copy.cu
+++ b/src/backend/cuda/copy.cu
@@ -120,16 +120,18 @@ namespace cuda
     template Array<T> copyArray<T>(const Array<T> &A);              \
     template void      multiply_inplace<T> (Array<T> &in, double norm); \
 
-    INSTANTIATE(float)
-    INSTANTIATE(double)
-    INSTANTIATE(cfloat)
+    INSTANTIATE(float  )
+    INSTANTIATE(double )
+    INSTANTIATE(cfloat )
     INSTANTIATE(cdouble)
-    INSTANTIATE(int)
-    INSTANTIATE(uint)
-    INSTANTIATE(uchar)
-    INSTANTIATE(char)
+    INSTANTIATE(int    )
+    INSTANTIATE(uint   )
+    INSTANTIATE(uchar  )
+    INSTANTIATE(char   )
     INSTANTIATE(intl   )
     INSTANTIATE(uintl  )
+    INSTANTIATE(short  )
+    INSTANTIATE(ushort )
 
 #define INSTANTIATE_PAD_ARRAY(SRC_T)                                    \
     template Array<float  > padArray<SRC_T, float  >(Array<SRC_T> const &src, dim4 const &dims, float   default_value, double factor); \
@@ -138,8 +140,10 @@ namespace cuda
     template Array<cdouble> padArray<SRC_T, cdouble>(Array<SRC_T> const &src, dim4 const &dims, cdouble default_value, double factor); \
     template Array<int    > padArray<SRC_T, int    >(Array<SRC_T> const &src, dim4 const &dims, int     default_value, double factor); \
     template Array<uint   > padArray<SRC_T, uint   >(Array<SRC_T> const &src, dim4 const &dims, uint    default_value, double factor); \
-    template Array<intl    > padArray<SRC_T, intl    >(Array<SRC_T> const &src, dim4 const &dims, intl     default_value, double factor); \
-    template Array<uintl   > padArray<SRC_T, uintl   >(Array<SRC_T> const &src, dim4 const &dims, uintl    default_value, double factor); \
+    template Array<intl   > padArray<SRC_T, intl   >(Array<SRC_T> const &src, dim4 const &dims, intl    default_value, double factor); \
+    template Array<uintl  > padArray<SRC_T, uintl  >(Array<SRC_T> const &src, dim4 const &dims, uintl   default_value, double factor); \
+    template Array<short  > padArray<SRC_T, short  >(Array<SRC_T> const &src, dim4 const &dims, short   default_value, double factor); \
+    template Array<ushort > padArray<SRC_T, ushort >(Array<SRC_T> const &src, dim4 const &dims, ushort  default_value, double factor); \
     template Array<uchar  > padArray<SRC_T, uchar  >(Array<SRC_T> const &src, dim4 const &dims, uchar   default_value, double factor); \
     template Array<char   > padArray<SRC_T, char   >(Array<SRC_T> const &src, dim4 const &dims, char    default_value, double factor); \
     template void copyArray<SRC_T, float  >(Array<float  > &dst, Array<SRC_T> const &src); \
@@ -148,8 +152,10 @@ namespace cuda
     template void copyArray<SRC_T, cdouble>(Array<cdouble> &dst, Array<SRC_T> const &src); \
     template void copyArray<SRC_T, int    >(Array<int    > &dst, Array<SRC_T> const &src); \
     template void copyArray<SRC_T, uint   >(Array<uint   > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, intl    >(Array<intl    > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, uintl   >(Array<uintl   > &dst, Array<SRC_T> const &src); \
+    template void copyArray<SRC_T, intl   >(Array<intl   > &dst, Array<SRC_T> const &src); \
+    template void copyArray<SRC_T, uintl  >(Array<uintl  > &dst, Array<SRC_T> const &src); \
+    template void copyArray<SRC_T, short  >(Array<short  > &dst, Array<SRC_T> const &src); \
+    template void copyArray<SRC_T, ushort >(Array<ushort > &dst, Array<SRC_T> const &src); \
     template void copyArray<SRC_T, uchar  >(Array<uchar  > &dst, Array<SRC_T> const &src); \
     template void copyArray<SRC_T, char   >(Array<char   > &dst, Array<SRC_T> const &src);
 
@@ -157,8 +163,10 @@ namespace cuda
     INSTANTIATE_PAD_ARRAY(double)
     INSTANTIATE_PAD_ARRAY(int   )
     INSTANTIATE_PAD_ARRAY(uint  )
-    INSTANTIATE_PAD_ARRAY(intl   )
-    INSTANTIATE_PAD_ARRAY(uintl  )
+    INSTANTIATE_PAD_ARRAY(intl  )
+    INSTANTIATE_PAD_ARRAY(uintl )
+    INSTANTIATE_PAD_ARRAY(short )
+    INSTANTIATE_PAD_ARRAY(ushort)
     INSTANTIATE_PAD_ARRAY(uchar )
     INSTANTIATE_PAD_ARRAY(char  )
 
@@ -185,6 +193,8 @@ namespace cuda
     SPECILIAZE_UNUSED_COPYARRAY(cfloat, int)
     SPECILIAZE_UNUSED_COPYARRAY(cfloat, intl)
     SPECILIAZE_UNUSED_COPYARRAY(cfloat, uintl)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat, short)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat, ushort)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, double)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, float)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, uchar)
@@ -193,4 +203,6 @@ namespace cuda
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, int)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, intl)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, uintl)
+    SPECILIAZE_UNUSED_COPYARRAY(cdouble, short)
+    SPECILIAZE_UNUSED_COPYARRAY(cdouble, ushort)
 }
diff --git a/src/backend/cuda/count.cu b/src/backend/cuda/count.cu
index d624141..365897f 100644
--- a/src/backend/cuda/count.cu
+++ b/src/backend/cuda/count.cu
@@ -20,6 +20,8 @@ namespace cuda
     INSTANTIATE(af_notzero_t, uint   , uint)
     INSTANTIATE(af_notzero_t, intl   , uint)
     INSTANTIATE(af_notzero_t, uintl  , uint)
+    INSTANTIATE(af_notzero_t, short  , uint)
+    INSTANTIATE(af_notzero_t, ushort , uint)
     INSTANTIATE(af_notzero_t, char   , uint)
     INSTANTIATE(af_notzero_t, uchar  , uint)
 }
diff --git a/src/backend/cuda/diagonal.cu b/src/backend/cuda/diagonal.cu
index 05b8025..fd023c9 100644
--- a/src/backend/cuda/diagonal.cu
+++ b/src/backend/cuda/diagonal.cu
@@ -56,5 +56,7 @@ namespace cuda
     INSTANTIATE_DIAGONAL(uintl)
     INSTANTIATE_DIAGONAL(char)
     INSTANTIATE_DIAGONAL(uchar)
+    INSTANTIATE_DIAGONAL(short)
+    INSTANTIATE_DIAGONAL(ushort)
 
 }
diff --git a/src/backend/cuda/diff.cu b/src/backend/cuda/diff.cu
index a50ba26..96135f9 100644
--- a/src/backend/cuda/diff.cu
+++ b/src/backend/cuda/diff.cu
@@ -70,5 +70,7 @@ namespace cuda
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/dilate.cu
index 0da33f2..9115ba8 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/dilate.cu
@@ -18,5 +18,7 @@ INSTANTIATE(char  , true)
 INSTANTIATE(int   , true)
 INSTANTIATE(uint  , true)
 INSTANTIATE(uchar , true)
+INSTANTIATE(short , true)
+INSTANTIATE(ushort, true)
 
 }
diff --git a/src/backend/cuda/dilate3d.cu b/src/backend/cuda/dilate3d.cu
index 32b0bab..4846e40 100644
--- a/src/backend/cuda/dilate3d.cu
+++ b/src/backend/cuda/dilate3d.cu
@@ -18,5 +18,7 @@ INSTANTIATE(char  , true)
 INSTANTIATE(int   , true)
 INSTANTIATE(uint  , true)
 INSTANTIATE(uchar , true)
+INSTANTIATE(short , true)
+INSTANTIATE(ushort, true)
 
 }
diff --git a/src/backend/cuda/erode.cu b/src/backend/cuda/erode.cu
index dbb2c8e..25ca46c 100644
--- a/src/backend/cuda/erode.cu
+++ b/src/backend/cuda/erode.cu
@@ -18,5 +18,7 @@ INSTANTIATE(char  , false)
 INSTANTIATE(int   , false)
 INSTANTIATE(uint  , false)
 INSTANTIATE(uchar , false)
+INSTANTIATE(short , false)
+INSTANTIATE(ushort, false)
 
 }
diff --git a/src/backend/cuda/erode3d.cu b/src/backend/cuda/erode3d.cu
index 808198a..c54b301 100644
--- a/src/backend/cuda/erode3d.cu
+++ b/src/backend/cuda/erode3d.cu
@@ -18,5 +18,7 @@ INSTANTIATE(char  , false)
 INSTANTIATE(int   , false)
 INSTANTIATE(uint  , false)
 INSTANTIATE(uchar , false)
+INSTANTIATE(short , false)
+INSTANTIATE(ushort, false)
 
 }
diff --git a/src/backend/cuda/fast.cu b/src/backend/cuda/fast.cu
index 7bd6f47..53741e3 100644
--- a/src/backend/cuda/fast.cu
+++ b/src/backend/cuda/fast.cu
@@ -59,5 +59,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/fast_pyramid.cu b/src/backend/cuda/fast_pyramid.cu
index 3c22236..1e1b047 100644
--- a/src/backend/cuda/fast_pyramid.cu
+++ b/src/backend/cuda/fast_pyramid.cu
@@ -50,5 +50,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/fftconvolve.cu b/src/backend/cuda/fftconvolve.cu
index 97edeec..3dde4ab 100644
--- a/src/backend/cuda/fftconvolve.cu
+++ b/src/backend/cuda/fftconvolve.cu
@@ -119,5 +119,7 @@ INSTANTIATE(uint  , float,  cfloat,  false, true)
 INSTANTIATE(int   , float,  cfloat,  false, true)
 INSTANTIATE(uchar , float,  cfloat,  false, true)
 INSTANTIATE(char  , float,  cfloat,  false, true)
+INSTANTIATE(ushort, float,  cfloat,  false, true)
+INSTANTIATE(short , float,  cfloat,  false, true)
 
 }
diff --git a/src/backend/cuda/histogram.cu b/src/backend/cuda/histogram.cu
index e9a980f..b1991b3 100644
--- a/src/backend/cuda/histogram.cu
+++ b/src/backend/cuda/histogram.cu
@@ -58,5 +58,7 @@ INSTANTIATE(char  , uint)
 INSTANTIATE(int   , uint)
 INSTANTIATE(uint  , uint)
 INSTANTIATE(uchar , uint)
+INSTANTIATE(short , uint)
+INSTANTIATE(ushort, uint)
 
 }
diff --git a/src/backend/cuda/identity.cu b/src/backend/cuda/identity.cu
index 264d5b8..6765766 100644
--- a/src/backend/cuda/identity.cu
+++ b/src/backend/cuda/identity.cu
@@ -38,5 +38,7 @@ namespace cuda
     INSTANTIATE_IDENTITY(uintl)
     INSTANTIATE_IDENTITY(char)
     INSTANTIATE_IDENTITY(uchar)
+    INSTANTIATE_IDENTITY(short)
+    INSTANTIATE_IDENTITY(ushort)
 
 }
diff --git a/src/backend/cuda/index.cu b/src/backend/cuda/index.cu
index 988f589..b1d528c 100644
--- a/src/backend/cuda/index.cu
+++ b/src/backend/cuda/index.cu
@@ -75,11 +75,13 @@ INSTANTIATE(cdouble)
 INSTANTIATE(double )
 INSTANTIATE(cfloat )
 INSTANTIATE(float  )
-INSTANTIATE(uintl  )
 INSTANTIATE(uint   )
-INSTANTIATE(intl   )
 INSTANTIATE(int    )
+INSTANTIATE(uintl  )
+INSTANTIATE(intl   )
 INSTANTIATE(uchar  )
 INSTANTIATE(char   )
+INSTANTIATE(ushort )
+INSTANTIATE(short  )
 
 }
diff --git a/src/backend/cuda/iota.cu b/src/backend/cuda/iota.cu
index ee9bcdc..eee4344 100644
--- a/src/backend/cuda/iota.cu
+++ b/src/backend/cuda/iota.cu
@@ -37,5 +37,7 @@ namespace cuda
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
 
diff --git a/src/backend/cuda/ireduce.cu b/src/backend/cuda/ireduce.cu
index 0c14a01..dece64c 100644
--- a/src/backend/cuda/ireduce.cu
+++ b/src/backend/cuda/ireduce.cu
@@ -51,6 +51,8 @@ namespace cuda
     INSTANTIATE(af_min_t, uint   )
     INSTANTIATE(af_min_t, intl   )
     INSTANTIATE(af_min_t, uintl  )
+    INSTANTIATE(af_min_t, short  )
+    INSTANTIATE(af_min_t, ushort )
     INSTANTIATE(af_min_t, char   )
     INSTANTIATE(af_min_t, uchar  )
 
@@ -63,6 +65,8 @@ namespace cuda
     INSTANTIATE(af_max_t, uint   )
     INSTANTIATE(af_max_t, intl   )
     INSTANTIATE(af_max_t, uintl  )
+    INSTANTIATE(af_max_t, short  )
+    INSTANTIATE(af_max_t, ushort )
     INSTANTIATE(af_max_t, char   )
     INSTANTIATE(af_max_t, uchar  )
 }
diff --git a/src/backend/cuda/jit.cpp b/src/backend/cuda/jit.cpp
index b001fef..af5f2d6 100644
--- a/src/backend/cuda/jit.cpp
+++ b/src/backend/cuda/jit.cpp
@@ -500,6 +500,8 @@ template void evalNodes<char   >(Param<char   > &out, Node *node);
 template void evalNodes<uchar  >(Param<uchar  > &out, Node *node);
 template void evalNodes<intl   >(Param<intl   > &out, Node *node);
 template void evalNodes<uintl  >(Param<uintl  > &out, Node *node);
+template void evalNodes<short  >(Param<short  > &out, Node *node);
+template void evalNodes<ushort >(Param<ushort > &out, Node *node);
 
 
 }
diff --git a/src/backend/cuda/join.cu b/src/backend/cuda/join.cu
index 074326e..729cec4 100644
--- a/src/backend/cuda/join.cu
+++ b/src/backend/cuda/join.cu
@@ -170,16 +170,18 @@ namespace cuda
 #define INSTANTIATE(Tx, Ty)                                                                             \
     template Array<Tx> join<Tx, Ty>(const int dim, const Array<Tx> &first, const Array<Ty> &second);   \
 
-    INSTANTIATE(float,   float)
-    INSTANTIATE(double,  double)
-    INSTANTIATE(cfloat,  cfloat)
+    INSTANTIATE(float  , float  )
+    INSTANTIATE(double , double )
+    INSTANTIATE(cfloat , cfloat )
     INSTANTIATE(cdouble, cdouble)
-    INSTANTIATE(int,     int)
-    INSTANTIATE(uint,    uint)
-    INSTANTIATE(intl,    intl)
-    INSTANTIATE(uintl,   uintl)
-    INSTANTIATE(uchar,   uchar)
-    INSTANTIATE(char,    char)
+    INSTANTIATE(int    , int    )
+    INSTANTIATE(uint   , uint   )
+    INSTANTIATE(intl   , intl   )
+    INSTANTIATE(uintl  , uintl  )
+    INSTANTIATE(short  , short  )
+    INSTANTIATE(ushort , ushort )
+    INSTANTIATE(uchar  , uchar  )
+    INSTANTIATE(char   , char   )
 
 #undef INSTANTIATE
 
@@ -194,6 +196,8 @@ namespace cuda
     INSTANTIATE(uint)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
 
diff --git a/src/backend/cuda/kernel/convolve.cu b/src/backend/cuda/kernel/convolve.cu
index 78790c3..329287d 100644
--- a/src/backend/cuda/kernel/convolve.cu
+++ b/src/backend/cuda/kernel/convolve.cu
@@ -485,12 +485,12 @@ void convolve_nd(Param<T> out, CParam<T> signal, CParam<aT> filt, ConvolveBatchK
 }
 
 #define INSTANTIATE(T, aT)  \
-	template void convolve_nd<T, aT, 1, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
-	template void convolve_nd<T, aT, 1, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
-	template void convolve_nd<T, aT, 2, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
-	template void convolve_nd<T, aT, 2, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
-	template void convolve_nd<T, aT, 3, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
-	template void convolve_nd<T, aT, 3, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+    template void convolve_nd<T, aT, 1, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+    template void convolve_nd<T, aT, 1, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+    template void convolve_nd<T, aT, 2, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+    template void convolve_nd<T, aT, 2, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+    template void convolve_nd<T, aT, 3, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+    template void convolve_nd<T, aT, 3, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
 
 
 INSTANTIATE(cdouble, cdouble)
@@ -501,6 +501,8 @@ INSTANTIATE(uint   ,   float)
 INSTANTIATE(int    ,   float)
 INSTANTIATE(uchar  ,   float)
 INSTANTIATE(char   ,   float)
+INSTANTIATE(ushort ,   float)
+INSTANTIATE(short  ,   float)
 
 }
 
diff --git a/src/backend/cuda/kernel/convolve_separable.cu b/src/backend/cuda/kernel/convolve_separable.cu
index e2caec7..196d60a 100644
--- a/src/backend/cuda/kernel/convolve_separable.cu
+++ b/src/backend/cuda/kernel/convolve_separable.cu
@@ -174,10 +174,10 @@ void convolve2(Param<T> out, CParam<T> signal, CParam<accType> filter)
 }
 
 #define INSTANTIATE(T, accType)                                         \
-	template void convolve2<T, accType, 0, true >(Param<T> out, CParam<T> signal, CParam<accType> filter); \
-	template void convolve2<T, accType, 0, false>(Param<T> out, CParam<T> signal, CParam<accType> filter); \
-	template void convolve2<T, accType, 1, true >(Param<T> out, CParam<T> signal, CParam<accType> filter); \
-	template void convolve2<T, accType, 1, false>(Param<T> out, CParam<T> signal, CParam<accType> filter); \
+    template void convolve2<T, accType, 0, true >(Param<T> out, CParam<T> signal, CParam<accType> filter); \
+    template void convolve2<T, accType, 0, false>(Param<T> out, CParam<T> signal, CParam<accType> filter); \
+    template void convolve2<T, accType, 1, true >(Param<T> out, CParam<T> signal, CParam<accType> filter); \
+    template void convolve2<T, accType, 1, false>(Param<T> out, CParam<T> signal, CParam<accType> filter); \
 
 
 INSTANTIATE(cdouble, cdouble)
@@ -188,6 +188,8 @@ INSTANTIATE(uint   ,   float)
 INSTANTIATE(int    ,   float)
 INSTANTIATE(uchar  ,   float)
 INSTANTIATE(char   ,   float)
+INSTANTIATE(ushort ,   float)
+INSTANTIATE(short  ,   float)
 
 }
 
diff --git a/src/backend/cuda/kernel/fast.hpp b/src/backend/cuda/kernel/fast.hpp
index df4f406..5f220cd 100644
--- a/src/backend/cuda/kernel/fast.hpp
+++ b/src/backend/cuda/kernel/fast.hpp
@@ -87,6 +87,16 @@ unsigned max_val(const unsigned x, const unsigned y)
     return max(x, y);
 }
 inline __device__
+short max_val(const short x, const short y)
+{
+    return max(x, y);
+}
+inline __device__
+ushort max_val(const ushort x, const ushort y)
+{
+    return max(x, y);
+}
+inline __device__
 float max_val(const float x, const float y)
 {
     return fmax(x, y);
@@ -109,6 +119,16 @@ inline __device__ unsigned abs_diff(const unsigned x, const unsigned y)
     int i = (int)x - (int)y;
     return max(-i, i);
 }
+inline __device__ short abs_diff(const short x, const short y)
+{
+    short i = x - y;
+    return max(-i, i);
+}
+inline __device__ ushort abs_diff(const ushort x, const ushort y)
+{
+    int i = (int)x - (int)y;
+    return (ushort)max(-i, i);
+}
 inline __device__ float abs_diff(const float x, const float y)
 {
     return fabs(x - y);
diff --git a/src/backend/cuda/kernel/memcopy.hpp b/src/backend/cuda/kernel/memcopy.hpp
index 4d5d192..dc437b4 100644
--- a/src/backend/cuda/kernel/memcopy.hpp
+++ b/src/backend/cuda/kernel/memcopy.hpp
@@ -142,6 +142,8 @@ namespace kernel
     OTHER_SPECIALIZATIONS(uint  )
     OTHER_SPECIALIZATIONS(intl   )
     OTHER_SPECIALIZATIONS(uintl  )
+    OTHER_SPECIALIZATIONS(short  )
+    OTHER_SPECIALIZATIONS(ushort )
     OTHER_SPECIALIZATIONS(uchar )
     OTHER_SPECIALIZATIONS(char  )
     ////////////////////////////// END - templated help functions for copy_kernel //////////////////////////////////
diff --git a/src/backend/cuda/kernel/nearest_neighbour.hpp b/src/backend/cuda/kernel/nearest_neighbour.hpp
index 14c448f..9b14cb5 100644
--- a/src/backend/cuda/kernel/nearest_neighbour.hpp
+++ b/src/backend/cuda/kernel/nearest_neighbour.hpp
@@ -69,6 +69,15 @@ struct dist_op<uintl, To, AF_SHD>
 };
 
 template<typename To>
+struct dist_op<ushort, To, AF_SHD>
+{
+    __device__ To operator()(ushort v1, ushort v2)
+    {
+        return __popc(v1 ^ v2);
+    }
+};
+
+template<typename To>
 struct dist_op<uchar, To, AF_SHD>
 {
     __device__ To operator()(uchar v1, uchar v2)
diff --git a/src/backend/cuda/kernel/shared.hpp b/src/backend/cuda/kernel/shared.hpp
index eb7b432..742afab 100644
--- a/src/backend/cuda/kernel/shared.hpp
+++ b/src/backend/cuda/kernel/shared.hpp
@@ -44,6 +44,8 @@ SPECIALIZE(cdouble)
 SPECIALIZE(char)
 SPECIALIZE(int)
 SPECIALIZE(uint)
+SPECIALIZE(short)
+SPECIALIZE(ushort)
 SPECIALIZE(uchar)
 
 #undef SPECIALIZE
diff --git a/src/backend/cuda/lookup.cu b/src/backend/cuda/lookup.cu
index 8f910de..934e68e 100644
--- a/src/backend/cuda/lookup.cu
+++ b/src/backend/cuda/lookup.cu
@@ -42,6 +42,8 @@ Array<in_t> lookup(const Array<in_t> &input, const Array<idx_t> &indices, const
     template Array<T> lookup<T, double  >(const Array<T> &input, const Array<double  > &indices, const unsigned dim); \
     template Array<T> lookup<T, int     >(const Array<T> &input, const Array<int     > &indices, const unsigned dim); \
     template Array<T> lookup<T, unsigned>(const Array<T> &input, const Array<unsigned> &indices, const unsigned dim); \
+    template Array<T> lookup<T, short   >(const Array<T> &input, const Array<short   > &indices, const unsigned dim); \
+    template Array<T> lookup<T, ushort  >(const Array<T> &input, const Array<ushort  > &indices, const unsigned dim); \
     template Array<T> lookup<T, uchar   >(const Array<T> &input, const Array<uchar   > &indices, const unsigned dim);
 
 INSTANTIATE(float   );
@@ -54,5 +56,7 @@ INSTANTIATE(intl    );
 INSTANTIATE(uintl   );
 INSTANTIATE(uchar   );
 INSTANTIATE(char    );
+INSTANTIATE(short   );
+INSTANTIATE(ushort  );
 
 }
diff --git a/src/backend/cuda/match_template.cu b/src/backend/cuda/match_template.cu
index 5b30eb0..0ce0ce2 100644
--- a/src/backend/cuda/match_template.cu
+++ b/src/backend/cuda/match_template.cu
@@ -54,5 +54,7 @@ INSTANTIATE(char  ,  float)
 INSTANTIATE(int   ,  float)
 INSTANTIATE(uint  ,  float)
 INSTANTIATE(uchar ,  float)
+INSTANTIATE(short ,  float)
+INSTANTIATE(ushort,  float)
 
 }
diff --git a/src/backend/cuda/math.hpp b/src/backend/cuda/math.hpp
index 577db84..1c495f6 100644
--- a/src/backend/cuda/math.hpp
+++ b/src/backend/cuda/math.hpp
@@ -108,6 +108,9 @@ namespace cuda
     template<> __device__  float  limit_min<float>()  { return -CUDART_INF_F; }
     template<> __device__  double limit_max<double>() { return  CUDART_INF; }
     template<> __device__  double limit_min<double>() { return -CUDART_INF; }
+    template<> __device__  short  limit_max<short>()  { return 0x7fff; }
+    template<> __device__  short  limit_min<short>()  { return 0x8000; }
+    template<> __device__  ushort limit_max<ushort>() { return ((ushort)1) << (8 * sizeof(ushort) - 1); }
 #endif
 
 #define upcast cuComplexFloatToDouble
@@ -134,6 +137,8 @@ __SDH__ cdouble conj(cdouble c) { return cuConj(c); }
 __SDH__ cfloat make_cfloat(bool     x) { return make_cuComplex(x,0);     }
 __SDH__ cfloat make_cfloat(int      x) { return make_cuComplex(x,0);     }
 __SDH__ cfloat make_cfloat(unsigned x) { return make_cuComplex(x,0);     }
+__SDH__ cfloat make_cfloat(short    x) { return make_cuComplex(x,0);     }
+__SDH__ cfloat make_cfloat(ushort   x) { return make_cuComplex(x,0);     }
 __SDH__ cfloat make_cfloat(float    x) { return make_cuComplex(x,0);     }
 __SDH__ cfloat make_cfloat(double   x) { return make_cuComplex(x,0);     }
 __SDH__ cfloat make_cfloat(cfloat   x) { return x;                    }
@@ -142,6 +147,8 @@ __SDH__ cfloat make_cfloat(cdouble  c) { return make_cuComplex(c.x,c.y); }
 __SDH__ cdouble make_cdouble(bool      x) { return make_cuDoubleComplex(x,0);       }
 __SDH__ cdouble make_cdouble(int       x) { return make_cuDoubleComplex(x,0);       }
 __SDH__ cdouble make_cdouble(unsigned  x) { return make_cuDoubleComplex(x,0);       }
+__SDH__ cdouble make_cdouble(short     x) { return make_cuDoubleComplex(x,0);       }
+__SDH__ cdouble make_cdouble(ushort    x) { return make_cuDoubleComplex(x,0);       }
 __SDH__ cdouble make_cdouble(float     x) { return make_cuDoubleComplex(x,0);       }
 __SDH__ cdouble make_cdouble(double    x) { return make_cuDoubleComplex(x,0);       }
 __SDH__ cdouble make_cdouble(cdouble   x) { return x;                       }
diff --git a/src/backend/cuda/max.cu b/src/backend/cuda/max.cu
index 7841422..c910bea 100644
--- a/src/backend/cuda/max.cu
+++ b/src/backend/cuda/max.cu
@@ -22,4 +22,6 @@ namespace cuda
     INSTANTIATE(af_max_t, uintl  , uintl  )
     INSTANTIATE(af_max_t, char   , char   )
     INSTANTIATE(af_max_t, uchar  , uchar  )
+    INSTANTIATE(af_max_t, short  , short  )
+    INSTANTIATE(af_max_t, ushort , ushort )
 }
diff --git a/src/backend/cuda/meanshift.cu b/src/backend/cuda/meanshift.cu
index 0fa1ac3..20f200b 100644
--- a/src/backend/cuda/meanshift.cu
+++ b/src/backend/cuda/meanshift.cu
@@ -42,5 +42,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/medfilt.cu b/src/backend/cuda/medfilt.cu
index 9a99cae..c87aea4 100644
--- a/src/backend/cuda/medfilt.cu
+++ b/src/backend/cuda/medfilt.cu
@@ -44,5 +44,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/memory.cpp b/src/backend/cuda/memory.cpp
index 45e410f..9b3d731 100644
--- a/src/backend/cuda/memory.cpp
+++ b/src/backend/cuda/memory.cpp
@@ -384,5 +384,7 @@ namespace cuda
     INSTANTIATE(uchar)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/min.cu b/src/backend/cuda/min.cu
index 0251414..26719de 100644
--- a/src/backend/cuda/min.cu
+++ b/src/backend/cuda/min.cu
@@ -22,4 +22,6 @@ namespace cuda
     INSTANTIATE(af_min_t, uintl  , uintl  )
     INSTANTIATE(af_min_t, char   , char   )
     INSTANTIATE(af_min_t, uchar  , uchar  )
+    INSTANTIATE(af_min_t, short  , short  )
+    INSTANTIATE(af_min_t, ushort , ushort )
 }
diff --git a/src/backend/cuda/nearest_neighbour.cu b/src/backend/cuda/nearest_neighbour.cu
index 1899c9d..789c0f5 100644
--- a/src/backend/cuda/nearest_neighbour.cu
+++ b/src/backend/cuda/nearest_neighbour.cu
@@ -73,6 +73,8 @@ INSTANTIATE(uint  , uint)
 INSTANTIATE(intl  , intl)
 INSTANTIATE(uintl , uintl)
 INSTANTIATE(uchar , uint)
+INSTANTIATE(short , int)
+INSTANTIATE(ushort, uint)
 
 INSTANTIATE(uintl, uint)    // For Hamming
 
diff --git a/src/backend/cuda/product.cu b/src/backend/cuda/product.cu
index abc5c1f..d00e140 100644
--- a/src/backend/cuda/product.cu
+++ b/src/backend/cuda/product.cu
@@ -11,7 +11,7 @@
 
 namespace cuda
 {
-    //sum
+    //mul
     INSTANTIATE(af_mul_t, float  , float  )
     INSTANTIATE(af_mul_t, double , double )
     INSTANTIATE(af_mul_t, cfloat , cfloat )
@@ -22,4 +22,6 @@ namespace cuda
     INSTANTIATE(af_mul_t, uintl  , uintl  )
     INSTANTIATE(af_mul_t, char   , int    )
     INSTANTIATE(af_mul_t, uchar  , uint   )
+    INSTANTIATE(af_mul_t, short  , int    )
+    INSTANTIATE(af_mul_t, ushort , uint   )
 }
diff --git a/src/backend/cuda/random.cu b/src/backend/cuda/random.cu
index c9e6197..07cbdc4 100644
--- a/src/backend/cuda/random.cu
+++ b/src/backend/cuda/random.cu
@@ -44,6 +44,8 @@ namespace cuda
     template Array<uintl>   randu<uintl>   (const af::dim4 &dims);
     template Array<char>    randu<char>    (const af::dim4 &dims);
     template Array<uchar>   randu<uchar>   (const af::dim4 &dims);
+    template Array<short>   randu<short>   (const af::dim4 &dims);
+    template Array<ushort>  randu<ushort>  (const af::dim4 &dims);
 
     template Array<float>   randn<float>   (const af::dim4 &dims);
     template Array<double>  randn<double>  (const af::dim4 &dims);
diff --git a/src/backend/cuda/range.cu b/src/backend/cuda/range.cu
index 9a1a7cd..ace3b1c 100644
--- a/src/backend/cuda/range.cu
+++ b/src/backend/cuda/range.cu
@@ -45,4 +45,6 @@ namespace cuda
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/regions.cu b/src/backend/cuda/regions.cu
index 656048c..6b50b71 100644
--- a/src/backend/cuda/regions.cu
+++ b/src/backend/cuda/regions.cu
@@ -65,5 +65,7 @@ INSTANTIATE(float )
 INSTANTIATE(double)
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/reorder.cu b/src/backend/cuda/reorder.cu
index 2c920e6..7292fcd 100644
--- a/src/backend/cuda/reorder.cu
+++ b/src/backend/cuda/reorder.cu
@@ -43,5 +43,7 @@ namespace cuda
     INSTANTIATE(char)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/resize.cu b/src/backend/cuda/resize.cu
index dcec972..02d3499 100644
--- a/src/backend/cuda/resize.cu
+++ b/src/backend/cuda/resize.cu
@@ -57,4 +57,6 @@ namespace cuda
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/rotate.cu b/src/backend/cuda/rotate.cu
index 24e41d7..23c99e1 100644
--- a/src/backend/cuda/rotate.cu
+++ b/src/backend/cuda/rotate.cu
@@ -53,4 +53,6 @@ namespace cuda
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/scan.cu b/src/backend/cuda/scan.cu
index a76abc2..15ee6b4 100644
--- a/src/backend/cuda/scan.cu
+++ b/src/backend/cuda/scan.cu
@@ -51,5 +51,7 @@ namespace cuda
     INSTANTIATE(af_add_t, uintl  , uintl  )
     INSTANTIATE(af_add_t, char   , int    )
     INSTANTIATE(af_add_t, uchar  , uint   )
+    INSTANTIATE(af_add_t, short  , int    )
+    INSTANTIATE(af_add_t, ushort , uint   )
     INSTANTIATE(af_notzero_t, char  , uint   )
 }
diff --git a/src/backend/cuda/select.cu b/src/backend/cuda/select.cu
index eb90730..9697da4 100644
--- a/src/backend/cuda/select.cu
+++ b/src/backend/cuda/select.cu
@@ -48,4 +48,6 @@ namespace cuda
     INSTANTIATE(uintl  )
     INSTANTIATE(char   )
     INSTANTIATE(uchar  )
+    INSTANTIATE(short  )
+    INSTANTIATE(ushort )
 }
diff --git a/src/backend/cuda/set.cu b/src/backend/cuda/set.cu
index 5b457e1..8887f83 100644
--- a/src/backend/cuda/set.cu
+++ b/src/backend/cuda/set.cu
@@ -117,4 +117,6 @@ namespace cuda
     INSTANTIATE(uint)
     INSTANTIATE(char)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/shift.cu b/src/backend/cuda/shift.cu
index f97eb4a..89e78ac 100644
--- a/src/backend/cuda/shift.cu
+++ b/src/backend/cuda/shift.cu
@@ -41,4 +41,6 @@ namespace cuda
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/sobel.cu b/src/backend/cuda/sobel.cu
index 6f9b194..ab5a693 100644
--- a/src/backend/cuda/sobel.cu
+++ b/src/backend/cuda/sobel.cu
@@ -42,5 +42,7 @@ INSTANTIATE(int   , int)
 INSTANTIATE(uint  , int)
 INSTANTIATE(char  , int)
 INSTANTIATE(uchar , int)
+INSTANTIATE(short , int)
+INSTANTIATE(ushort, int)
 
 }
diff --git a/src/backend/cuda/sort.cu b/src/backend/cuda/sort.cu
index dc74b80..9823174 100644
--- a/src/backend/cuda/sort.cu
+++ b/src/backend/cuda/sort.cu
@@ -40,4 +40,6 @@ namespace cuda
     INSTANTIATE(uint)
     INSTANTIATE(char)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/sort_by_key/ascd_s16.cu
similarity index 65%
copy from src/backend/cuda/dilate.cu
copy to src/backend/cuda/sort_by_key/ascd_s16.cu
index 0da33f2..d51e9ae 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/sort_by_key/ascd_s16.cu
@@ -7,16 +7,9 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include "morph_impl.hpp"
+#include <sort_by_key_impl.hpp>
 
 namespace cuda
 {
-
-INSTANTIATE(float , true)
-INSTANTIATE(double, true)
-INSTANTIATE(char  , true)
-INSTANTIATE(int   , true)
-INSTANTIATE(uint  , true)
-INSTANTIATE(uchar , true)
-
+    INSTANTIATE1(short, true)
 }
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/sort_by_key/ascd_u16.cu
similarity index 65%
copy from src/backend/cuda/dilate.cu
copy to src/backend/cuda/sort_by_key/ascd_u16.cu
index 0da33f2..e06036a 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/sort_by_key/ascd_u16.cu
@@ -7,16 +7,9 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include "morph_impl.hpp"
+#include <sort_by_key_impl.hpp>
 
 namespace cuda
 {
-
-INSTANTIATE(float , true)
-INSTANTIATE(double, true)
-INSTANTIATE(char  , true)
-INSTANTIATE(int   , true)
-INSTANTIATE(uint  , true)
-INSTANTIATE(uchar , true)
-
+    INSTANTIATE1(ushort, true)
 }
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/sort_by_key/desc_s16.cu
similarity index 65%
copy from src/backend/cuda/dilate.cu
copy to src/backend/cuda/sort_by_key/desc_s16.cu
index 0da33f2..63967b6 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/sort_by_key/desc_s16.cu
@@ -7,16 +7,9 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include "morph_impl.hpp"
+#include <sort_by_key_impl.hpp>
 
 namespace cuda
 {
-
-INSTANTIATE(float , true)
-INSTANTIATE(double, true)
-INSTANTIATE(char  , true)
-INSTANTIATE(int   , true)
-INSTANTIATE(uint  , true)
-INSTANTIATE(uchar , true)
-
+    INSTANTIATE1(short, false)
 }
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/sort_by_key/desc_u16.cu
similarity index 65%
copy from src/backend/cuda/dilate.cu
copy to src/backend/cuda/sort_by_key/desc_u16.cu
index 0da33f2..69dc016 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/sort_by_key/desc_u16.cu
@@ -7,16 +7,9 @@
  * http://arrayfire.com/licenses/BSD-3-Clause
  ********************************************************/
 
-#include "morph_impl.hpp"
+#include <sort_by_key_impl.hpp>
 
 namespace cuda
 {
-
-INSTANTIATE(float , true)
-INSTANTIATE(double, true)
-INSTANTIATE(char  , true)
-INSTANTIATE(int   , true)
-INSTANTIATE(uint  , true)
-INSTANTIATE(uchar , true)
-
+    INSTANTIATE1(ushort, false)
 }
diff --git a/src/backend/cuda/sort_by_key_impl.hpp b/src/backend/cuda/sort_by_key_impl.hpp
index 32758b4..9cd286c 100644
--- a/src/backend/cuda/sort_by_key_impl.hpp
+++ b/src/backend/cuda/sort_by_key_impl.hpp
@@ -40,6 +40,8 @@ namespace cuda
     INSTANTIATE(Tk, double, dr) \
     INSTANTIATE(Tk, int,    dr) \
     INSTANTIATE(Tk, uint,   dr) \
+    INSTANTIATE(Tk, short,  dr) \
+    INSTANTIATE(Tk, ushort, dr) \
     INSTANTIATE(Tk, char,   dr) \
     INSTANTIATE(Tk, uchar,  dr)
 }
diff --git a/src/backend/cuda/sort_index.cu b/src/backend/cuda/sort_index.cu
index b80287b..a073c72 100644
--- a/src/backend/cuda/sort_index.cu
+++ b/src/backend/cuda/sort_index.cu
@@ -41,5 +41,7 @@ namespace cuda
     INSTANTIATE(uint)
     INSTANTIATE(char)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/sum.cu b/src/backend/cuda/sum.cu
index 407cc98..95f2177 100644
--- a/src/backend/cuda/sum.cu
+++ b/src/backend/cuda/sum.cu
@@ -22,4 +22,6 @@ namespace cuda
     INSTANTIATE(af_add_t, uintl  , uintl  )
     INSTANTIATE(af_add_t, char   , int    )
     INSTANTIATE(af_add_t, uchar  , uint   )
+    INSTANTIATE(af_add_t, short  , int    )
+    INSTANTIATE(af_add_t, ushort , uint   )
 }
diff --git a/src/backend/cuda/susan.cu b/src/backend/cuda/susan.cu
index 8474454..6925d0c 100644
--- a/src/backend/cuda/susan.cu
+++ b/src/backend/cuda/susan.cu
@@ -63,5 +63,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/tile.cu b/src/backend/cuda/tile.cu
index 2a9af87..f15fd87 100644
--- a/src/backend/cuda/tile.cu
+++ b/src/backend/cuda/tile.cu
@@ -46,5 +46,7 @@ namespace cuda
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/transform.cu b/src/backend/cuda/transform.cu
index 214bce3..853617c 100644
--- a/src/backend/cuda/transform.cu
+++ b/src/backend/cuda/transform.cu
@@ -55,4 +55,6 @@ namespace cuda
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/transpose.cu b/src/backend/cuda/transpose.cu
index e787b6e..fff167a 100644
--- a/src/backend/cuda/transpose.cu
+++ b/src/backend/cuda/transpose.cu
@@ -46,5 +46,7 @@ INSTANTIATE(uint   )
 INSTANTIATE(uchar  )
 INSTANTIATE(intl   )
 INSTANTIATE(uintl  )
+INSTANTIATE(short)
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cuda/transpose_inplace.cu b/src/backend/cuda/transpose_inplace.cu
index 98613bc..1d34580 100644
--- a/src/backend/cuda/transpose_inplace.cu
+++ b/src/backend/cuda/transpose_inplace.cu
@@ -37,6 +37,8 @@ INSTANTIATE(uint   )
 INSTANTIATE(uchar  )
 INSTANTIATE(intl   )
 INSTANTIATE(uintl  )
+INSTANTIATE(short  )
+INSTANTIATE(ushort )
 
 }
 
diff --git a/src/backend/cuda/triangle.cu b/src/backend/cuda/triangle.cu
index 99970a0..e92b1d5 100644
--- a/src/backend/cuda/triangle.cu
+++ b/src/backend/cuda/triangle.cu
@@ -52,4 +52,6 @@ Array<T> triangle(const Array<T> &in)
     INSTANTIATE(uintl)
     INSTANTIATE(char)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/types.cpp b/src/backend/cuda/types.cpp
index f83913b..8c29c00 100644
--- a/src/backend/cuda/types.cpp
+++ b/src/backend/cuda/types.cpp
@@ -24,6 +24,8 @@ namespace cuda
     template<> const char *cuShortName<uchar   >() { return "h"; }
     template<> const char *cuShortName<intl    >() { return "x"; }
     template<> const char *cuShortName<uintl   >() { return "y"; }
+    template<> const char *cuShortName<short   >() { return "s"; }
+    template<> const char *cuShortName<ushort  >() { return "t"; }
 
     template<typename T > const char *afShortName(bool caps) { return caps ?  "Q" : "q"; }
     template<> const char *afShortName<float   >(bool caps) { return caps ?  "S" : "s"; }
@@ -36,6 +38,8 @@ namespace cuda
     template<> const char *afShortName<uchar   >(bool caps) { return caps ?  "V" : "v"; }
     template<> const char *afShortName<intl    >(bool caps) { return caps ?  "X" : "x"; }
     template<> const char *afShortName<uintl   >(bool caps) { return caps ?  "Y" : "y"; }
+    template<> const char *afShortName<short   >(bool caps) { return caps ?  "P" : "P"; }
+    template<> const char *afShortName<ushort  >(bool caps) { return caps ?  "Q" : "Q"; }
 
     template<typename T > const char *irname() { return  "i32"; }
     template<> const char *irname<float   >() { return  "float"; }
@@ -48,6 +52,8 @@ namespace cuda
     template<> const char *irname<uintl   >() { return  "i64"; }
     template<> const char *irname<char    >() { return  "i8"; }
     template<> const char *irname<uchar   >() { return  "i8"; }
+    template<> const char *irname<short   >() { return  "i16"; }
+    template<> const char *irname<ushort  >() { return  "i16"; }
 
     template <typename T>
     static inline std::string toString(T val)
@@ -89,4 +95,6 @@ namespace cuda
     INSTANTIATE(uint)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/types.hpp b/src/backend/cuda/types.hpp
index 0d807ae..26d0bb6 100644
--- a/src/backend/cuda/types.hpp
+++ b/src/backend/cuda/types.hpp
@@ -16,7 +16,8 @@ namespace cuda
     typedef cuFloatComplex   cfloat;
     typedef cuDoubleComplex cdouble;
     typedef unsigned int   uint;
-    typedef unsigned char uchar;
+    typedef unsigned char  uchar;
+    typedef unsigned short ushort;
 
     template<typename T> struct is_complex          { static const bool value = false;  };
     template<> struct           is_complex<cfloat>  { static const bool value = true;   };
diff --git a/src/backend/cuda/unwrap.cu b/src/backend/cuda/unwrap.cu
index 8600ca1..a61aba4 100644
--- a/src/backend/cuda/unwrap.cu
+++ b/src/backend/cuda/unwrap.cu
@@ -54,4 +54,6 @@ namespace cuda
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cuda/where.cu b/src/backend/cuda/where.cu
index 8e4f9cf..a43e339 100644
--- a/src/backend/cuda/where.cu
+++ b/src/backend/cuda/where.cu
@@ -42,5 +42,7 @@ namespace cuda
     INSTANTIATE(intl   )
     INSTANTIATE(uintl  )
     INSTANTIATE(uchar  )
+    INSTANTIATE(short  )
+    INSTANTIATE(ushort )
 
 }
diff --git a/src/backend/cuda/wrap.cu b/src/backend/cuda/wrap.cu
index a1e70fc..017a3a4 100644
--- a/src/backend/cuda/wrap.cu
+++ b/src/backend/cuda/wrap.cu
@@ -54,4 +54,6 @@ namespace cuda
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list