[arrayfire] 70/248: Added short, ushort support for CUDA backend
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Nov 17 15:54:03 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit 7f3ff109862f0ce8171013306d7f904586eb1e53
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Tue Sep 22 16:01:38 2015 -0400
Added short, ushort support for CUDA backend
* Need to complete OpenCL backend
---
src/backend/cuda/Array.cpp | 2 ++
src/backend/cuda/JIT/arith.cu | 2 ++
src/backend/cuda/JIT/cast.cu | 20 +++++++----
src/backend/cuda/JIT/exp.cu | 4 +++
src/backend/cuda/JIT/hyper.cu | 2 ++
src/backend/cuda/JIT/logic.cu | 8 +++++
src/backend/cuda/JIT/numeric.cu | 42 +++++++++++++++-------
src/backend/cuda/JIT/trig.cu | 4 +++
src/backend/cuda/JIT/types.h | 1 +
src/backend/cuda/all.cu | 2 ++
src/backend/cuda/any.cu | 2 ++
src/backend/cuda/assign.cu | 8 +++--
src/backend/cuda/bilateral.cu | 2 ++
src/backend/cuda/convolve.cpp | 2 ++
src/backend/cuda/copy.cu | 38 +++++++++++++-------
src/backend/cuda/count.cu | 2 ++
src/backend/cuda/diagonal.cu | 2 ++
src/backend/cuda/diff.cu | 2 ++
src/backend/cuda/dilate.cu | 2 ++
src/backend/cuda/dilate3d.cu | 2 ++
src/backend/cuda/erode.cu | 2 ++
src/backend/cuda/erode3d.cu | 2 ++
src/backend/cuda/fast.cu | 2 ++
src/backend/cuda/fast_pyramid.cu | 2 ++
src/backend/cuda/fftconvolve.cu | 2 ++
src/backend/cuda/histogram.cu | 2 ++
src/backend/cuda/identity.cu | 2 ++
src/backend/cuda/index.cu | 6 ++--
src/backend/cuda/iota.cu | 2 ++
src/backend/cuda/ireduce.cu | 4 +++
src/backend/cuda/jit.cpp | 2 ++
src/backend/cuda/join.cu | 22 +++++++-----
src/backend/cuda/kernel/convolve.cu | 14 ++++----
src/backend/cuda/kernel/convolve_separable.cu | 10 +++---
src/backend/cuda/kernel/fast.hpp | 20 +++++++++++
src/backend/cuda/kernel/memcopy.hpp | 2 ++
src/backend/cuda/kernel/nearest_neighbour.hpp | 9 +++++
src/backend/cuda/kernel/shared.hpp | 2 ++
src/backend/cuda/lookup.cu | 4 +++
src/backend/cuda/match_template.cu | 2 ++
src/backend/cuda/math.hpp | 7 ++++
src/backend/cuda/max.cu | 2 ++
src/backend/cuda/meanshift.cu | 2 ++
src/backend/cuda/medfilt.cu | 2 ++
src/backend/cuda/memory.cpp | 2 ++
src/backend/cuda/min.cu | 2 ++
src/backend/cuda/nearest_neighbour.cu | 2 ++
src/backend/cuda/product.cu | 4 ++-
src/backend/cuda/random.cu | 2 ++
src/backend/cuda/range.cu | 2 ++
src/backend/cuda/regions.cu | 2 ++
src/backend/cuda/reorder.cu | 2 ++
src/backend/cuda/resize.cu | 2 ++
src/backend/cuda/rotate.cu | 2 ++
src/backend/cuda/scan.cu | 2 ++
src/backend/cuda/select.cu | 2 ++
src/backend/cuda/set.cu | 2 ++
src/backend/cuda/shift.cu | 2 ++
src/backend/cuda/sobel.cu | 2 ++
src/backend/cuda/sort.cu | 2 ++
.../cuda/{dilate.cu => sort_by_key/ascd_s16.cu} | 11 ++----
.../cuda/{dilate.cu => sort_by_key/ascd_u16.cu} | 11 ++----
.../cuda/{dilate.cu => sort_by_key/desc_s16.cu} | 11 ++----
.../cuda/{dilate.cu => sort_by_key/desc_u16.cu} | 11 ++----
src/backend/cuda/sort_by_key_impl.hpp | 2 ++
src/backend/cuda/sort_index.cu | 2 ++
src/backend/cuda/sum.cu | 2 ++
src/backend/cuda/susan.cu | 2 ++
src/backend/cuda/tile.cu | 2 ++
src/backend/cuda/transform.cu | 2 ++
src/backend/cuda/transpose.cu | 2 ++
src/backend/cuda/transpose_inplace.cu | 2 ++
src/backend/cuda/triangle.cu | 2 ++
src/backend/cuda/types.cpp | 8 +++++
src/backend/cuda/types.hpp | 3 +-
src/backend/cuda/unwrap.cu | 2 ++
src/backend/cuda/where.cu | 2 ++
src/backend/cuda/wrap.cu | 2 ++
78 files changed, 295 insertions(+), 93 deletions(-)
diff --git a/src/backend/cuda/Array.cpp b/src/backend/cuda/Array.cpp
index ed86a8e..b7d7b3c 100644
--- a/src/backend/cuda/Array.cpp
+++ b/src/backend/cuda/Array.cpp
@@ -291,5 +291,7 @@ namespace cuda
INSTANTIATE(char)
INSTANTIATE(intl)
INSTANTIATE(uintl)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/JIT/arith.cu b/src/backend/cuda/JIT/arith.cu
index 01e5f41..adfa9e9 100644
--- a/src/backend/cuda/JIT/arith.cu
+++ b/src/backend/cuda/JIT/arith.cu
@@ -25,6 +25,8 @@
ARITH_BASIC(fn, op, uchar) \
ARITH_BASIC(fn, op, intl) \
ARITH_BASIC(fn, op, uintl) \
+ ARITH_BASIC(fn, op, short) \
+ ARITH_BASIC(fn, op, ushort) \
\
__device__ cfloat ___##fn(cfloat a, cfloat b) \
{ \
diff --git a/src/backend/cuda/JIT/cast.cu b/src/backend/cuda/JIT/cast.cu
index db41c52..8905955 100644
--- a/src/backend/cuda/JIT/cast.cu
+++ b/src/backend/cuda/JIT/cast.cu
@@ -22,14 +22,18 @@
CAST_BASIC(___mk##X, T, uchar) \
CAST_BASIC(___mk##X, T, intl) \
CAST_BASIC(___mk##X, T, uintl) \
+ CAST_BASIC(___mk##X, T, short) \
+ CAST_BASIC(___mk##X, T, ushort) \
-CAST(float, S)
+CAST(float , S)
CAST(double, D)
-CAST(int, I)
-CAST(intl, X)
-CAST(uint, U)
-CAST(uchar, V)
-CAST(uintl, Y)
+CAST(int , I)
+CAST(intl , X)
+CAST(short , P)
+CAST(uint , U)
+CAST(uchar , V)
+CAST(uintl , Y)
+CAST(ushort, Q)
CAST_BASIC_BOOL(___mkJ, char, float)
CAST_BASIC_BOOL(___mkJ, char, double)
@@ -39,6 +43,8 @@ CAST_BASIC_BOOL(___mkJ, char, char)
CAST_BASIC_BOOL(___mkJ, char, uchar)
CAST_BASIC_BOOL(___mkJ, char, intl)
CAST_BASIC_BOOL(___mkJ, char, uintl)
+CAST_BASIC_BOOL(___mkJ, char, short)
+CAST_BASIC_BOOL(___mkJ, char, ushort)
#define CPLX_BASIC(FN, To, Tr, Ti) \
__device__ To FN(Ti in) \
@@ -56,6 +62,8 @@ CAST_BASIC_BOOL(___mkJ, char, uintl)
CPLX_BASIC(___mk##X, T, Tr, uchar) \
CPLX_BASIC(___mk##X, T, Tr, uintl) \
CPLX_BASIC(___mk##X, T, Tr, intl) \
+ CPLX_BASIC(___mk##X, T, Tr, ushort) \
+ CPLX_BASIC(___mk##X, T, Tr, short) \
CPLX_CAST(cfloat, float, C)
CPLX_CAST(cdouble, double, Z)
diff --git a/src/backend/cuda/JIT/exp.cu b/src/backend/cuda/JIT/exp.cu
index 23a3300..3f110b4 100644
--- a/src/backend/cuda/JIT/exp.cu
+++ b/src/backend/cuda/JIT/exp.cu
@@ -34,6 +34,8 @@ __device__ float sigmoidf(float in)
MATH_BASIC(fn, uchar) \
MATH_BASIC(fn, uintl) \
MATH_BASIC(fn, intl) \
+ MATH_BASIC(fn, ushort) \
+ MATH_BASIC(fn, short) \
__device__ double ___##fn(double a) \
{ \
return fn(a); \
@@ -68,6 +70,8 @@ MATH(cbrt)
MATH2_BASIC(fn, uchar) \
MATH2_BASIC(fn, uintl) \
MATH2_BASIC(fn, intl) \
+ MATH2_BASIC(fn, ushort) \
+ MATH2_BASIC(fn, short) \
__device__ double ___##fn(double a, double b) \
{ \
return fn(a, b); \
diff --git a/src/backend/cuda/JIT/hyper.cu b/src/backend/cuda/JIT/hyper.cu
index 00ea2da..6673fb1 100644
--- a/src/backend/cuda/JIT/hyper.cu
+++ b/src/backend/cuda/JIT/hyper.cu
@@ -24,6 +24,8 @@
MATH_BASIC(fn, uchar) \
MATH_BASIC(fn, uintl) \
MATH_BASIC(fn, intl) \
+ MATH_BASIC(fn, ushort) \
+ MATH_BASIC(fn, short) \
__device__ double ___##fn(double a) \
{ \
return fn(a); \
diff --git a/src/backend/cuda/JIT/logic.cu b/src/backend/cuda/JIT/logic.cu
index 883f3db..6072c3c 100644
--- a/src/backend/cuda/JIT/logic.cu
+++ b/src/backend/cuda/JIT/logic.cu
@@ -25,6 +25,8 @@
LOGIC_BASIC(fn, op, uchar) \
LOGIC_BASIC(fn, op, intl) \
LOGIC_BASIC(fn, op, uintl) \
+ LOGIC_BASIC(fn, op, short) \
+ LOGIC_BASIC(fn, op, ushort) \
\
__device__ bool ___##fn(cfloat a, cfloat b) \
{ \
@@ -52,6 +54,8 @@ LOGIC(or, ||)
LOGIC_BASIC(fn, op, uchar) \
LOGIC_BASIC(fn, op, intl) \
LOGIC_BASIC(fn, op, uintl) \
+ LOGIC_BASIC(fn, op, short) \
+ LOGIC_BASIC(fn, op, ushort) \
\
__device__ bool ___##fn(cfloat a, cfloat b) \
{ \
@@ -77,6 +81,8 @@ NOT_FN(char)
NOT_FN(uchar)
NOT_FN(intl)
NOT_FN(uintl)
+NOT_FN(short)
+NOT_FN(ushort)
#define BIT_FN(T) \
__device__ T ___bitand (T lhs, T rhs) { return lhs & rhs; } \
@@ -91,6 +97,8 @@ BIT_FN(intl)
BIT_FN(uchar)
BIT_FN(uint)
BIT_FN(uintl)
+BIT_FN(short)
+BIT_FN(ushort)
__device__ char ___isNaN(float in) { return isnan(in); }
__device__ char ___isINF(float in) { return isinf(in); }
diff --git a/src/backend/cuda/JIT/numeric.cu b/src/backend/cuda/JIT/numeric.cu
index 158cc24..8253db6 100644
--- a/src/backend/cuda/JIT/numeric.cu
+++ b/src/backend/cuda/JIT/numeric.cu
@@ -39,6 +39,8 @@ MATH_NOOP(floor, char)
MATH_NOOP(floor, uchar)
MATH_NOOP(floor, uintl)
MATH_NOOP(floor, intl)
+MATH_NOOP(floor, ushort)
+MATH_NOOP(floor, short)
MATH_BASIC(ceil, float)
MATH_BASIC(ceil, double)
@@ -48,6 +50,8 @@ MATH_NOOP(ceil, char)
MATH_NOOP(ceil, uchar)
MATH_NOOP(ceil, uintl)
MATH_NOOP(ceil, intl)
+MATH_NOOP(ceil, ushort)
+MATH_NOOP(ceil, short)
MATH_BASIC(round, float)
MATH_BASIC(round, double)
@@ -57,6 +61,8 @@ MATH_NOOP(round, char)
MATH_NOOP(round, uchar)
MATH_NOOP(round, uintl)
MATH_NOOP(round, intl)
+MATH_NOOP(round, ushort)
+MATH_NOOP(round, short)
MATH_BASIC(trunc, float)
MATH_BASIC(trunc, double)
@@ -66,6 +72,8 @@ MATH_NOOP(trunc, char)
MATH_NOOP(trunc, uchar)
MATH_NOOP(trunc, uintl)
MATH_NOOP(trunc, intl)
+MATH_NOOP(trunc, ushort)
+MATH_NOOP(trunc, short)
MATH_BASIC(sign, float)
MATH_BASIC(sign, double)
@@ -75,6 +83,8 @@ MATH_NOOP(sign, char)
MATH_NOOP(sign, uchar)
MATH_NOOP(sign, uintl)
MATH_NOOP(sign, intl)
+MATH_NOOP(sign, ushort)
+MATH_NOOP(sign, short)
MATH_BASIC(abs, float)
MATH_BASIC(abs, double)
@@ -84,24 +94,30 @@ MATH_NOOP(abs, uint)
MATH_NOOP(abs, uchar)
MATH_NOOP(abs, uintl)
MATH_NOOP(abs, intl)
+MATH_NOOP(abs, ushort)
+MATH_NOOP(abs, short)
MATH_BASIC(tgamma, float)
MATH_BASIC(tgamma, double)
-MATH_CAST(tgamma, int, float)
-MATH_CAST(tgamma, uint, float)
-MATH_CAST(tgamma, char, float)
-MATH_CAST(tgamma, uchar, float)
-MATH_CAST(tgamma, uintl, float)
-MATH_CAST(tgamma, intl, float)
+MATH_CAST(tgamma, int , float)
+MATH_CAST(tgamma, uint , float)
+MATH_CAST(tgamma, char , float)
+MATH_CAST(tgamma, uchar , float)
+MATH_CAST(tgamma, uintl , float)
+MATH_CAST(tgamma, intl , float)
+MATH_CAST(tgamma, ushort, float)
+MATH_CAST(tgamma, short , float)
MATH_BASIC(lgamma, float)
MATH_BASIC(lgamma, double)
-MATH_CAST(lgamma, int, float)
-MATH_CAST(lgamma, uint, float)
-MATH_CAST(lgamma, char, float)
-MATH_CAST(lgamma, uchar, float)
-MATH_CAST(lgamma, uintl, float)
-MATH_CAST(lgamma, intl, float)
+MATH_CAST(lgamma, int , float)
+MATH_CAST(lgamma, uint , float)
+MATH_CAST(lgamma, char , float)
+MATH_CAST(lgamma, uchar , float)
+MATH_CAST(lgamma, uintl , float)
+MATH_CAST(lgamma, intl , float)
+MATH_CAST(lgamma, ushort, float)
+MATH_CAST(lgamma, short , float)
__device__ float ___abs(cfloat a) { return cuCabsf(a); }
__device__ double ___abs(cdouble a) { return cuCabs(a); }
@@ -128,6 +144,8 @@ __device__ double mod(double a, double b) { return fmod(a, b); }
MATH2_BASIC(fn, uintl) \
MATH2_BASIC(fn, char) \
MATH2_BASIC(fn, uchar) \
+ MATH2_BASIC(fn, short) \
+ MATH2_BASIC(fn, ushort) \
__device__ double ___##fn(double a, double b) \
{ \
return fn(a, b); \
diff --git a/src/backend/cuda/JIT/trig.cu b/src/backend/cuda/JIT/trig.cu
index 28f098e..372bd4d 100644
--- a/src/backend/cuda/JIT/trig.cu
+++ b/src/backend/cuda/JIT/trig.cu
@@ -24,6 +24,8 @@
MATH_BASIC(fn, uchar) \
MATH_BASIC(fn, uintl) \
MATH_BASIC(fn, intl) \
+ MATH_BASIC(fn, ushort) \
+ MATH_BASIC(fn, short) \
__device__ double ___##fn(double a) \
{ \
return fn(a); \
@@ -51,6 +53,8 @@ ATAN2(char)
ATAN2(uchar)
ATAN2(uintl)
ATAN2(intl)
+ATAN2(ushort)
+ATAN2(short)
__device__ double ___atan2(double x, double y)
{
diff --git a/src/backend/cuda/JIT/types.h b/src/backend/cuda/JIT/types.h
index 80314bc..4a97ef3 100644
--- a/src/backend/cuda/JIT/types.h
+++ b/src/backend/cuda/JIT/types.h
@@ -11,6 +11,7 @@
#include <math_functions.h>
typedef unsigned char uchar;
typedef unsigned int uint;
+typedef unsigned short ushort;
typedef cuFloatComplex cfloat;
typedef cuDoubleComplex cdouble;
typedef long long intl;
diff --git a/src/backend/cuda/all.cu b/src/backend/cuda/all.cu
index bfc070a..b70f98a 100644
--- a/src/backend/cuda/all.cu
+++ b/src/backend/cuda/all.cu
@@ -22,4 +22,6 @@ namespace cuda
INSTANTIATE(af_and_t, uintl , char)
INSTANTIATE(af_and_t, char , char)
INSTANTIATE(af_and_t, uchar , char)
+ INSTANTIATE(af_and_t, short , char)
+ INSTANTIATE(af_and_t, ushort , char)
}
diff --git a/src/backend/cuda/any.cu b/src/backend/cuda/any.cu
index 836970e..aa13fbb 100644
--- a/src/backend/cuda/any.cu
+++ b/src/backend/cuda/any.cu
@@ -22,4 +22,6 @@ namespace cuda
INSTANTIATE(af_or_t, uintl , char)
INSTANTIATE(af_or_t, char , char)
INSTANTIATE(af_or_t, uchar , char)
+ INSTANTIATE(af_or_t, short , char)
+ INSTANTIATE(af_or_t, ushort , char)
}
diff --git a/src/backend/cuda/assign.cu b/src/backend/cuda/assign.cu
index 7bea851..7d00b15 100644
--- a/src/backend/cuda/assign.cu
+++ b/src/backend/cuda/assign.cu
@@ -69,11 +69,13 @@ INSTANTIATE(cdouble)
INSTANTIATE(double )
INSTANTIATE(cfloat )
INSTANTIATE(float )
-INSTANTIATE(uintl )
+INSTANTIATE(int )
INSTANTIATE(uint )
INSTANTIATE(intl )
-INSTANTIATE(int )
-INSTANTIATE(uchar )
+INSTANTIATE(uintl )
INSTANTIATE(char )
+INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort )
}
diff --git a/src/backend/cuda/bilateral.cu b/src/backend/cuda/bilateral.cu
index 4c1d7fc..bdb19fd 100644
--- a/src/backend/cuda/bilateral.cu
+++ b/src/backend/cuda/bilateral.cu
@@ -37,5 +37,7 @@ INSTANTIATE(char , float)
INSTANTIATE(int , float)
INSTANTIATE(uint , float)
INSTANTIATE(uchar , float)
+INSTANTIATE(short , float)
+INSTANTIATE(ushort, float)
}
diff --git a/src/backend/cuda/convolve.cpp b/src/backend/cuda/convolve.cpp
index 9f14e6a..23f470f 100644
--- a/src/backend/cuda/convolve.cpp
+++ b/src/backend/cuda/convolve.cpp
@@ -96,5 +96,7 @@ INSTANTIATE(uint , float)
INSTANTIATE(int , float)
INSTANTIATE(uchar , float)
INSTANTIATE(char , float)
+INSTANTIATE(ushort , float)
+INSTANTIATE(short , float)
}
diff --git a/src/backend/cuda/copy.cu b/src/backend/cuda/copy.cu
index 1f23804..90f9970 100644
--- a/src/backend/cuda/copy.cu
+++ b/src/backend/cuda/copy.cu
@@ -120,16 +120,18 @@ namespace cuda
template Array<T> copyArray<T>(const Array<T> &A); \
template void multiply_inplace<T> (Array<T> &in, double norm); \
- INSTANTIATE(float)
- INSTANTIATE(double)
- INSTANTIATE(cfloat)
+ INSTANTIATE(float )
+ INSTANTIATE(double )
+ INSTANTIATE(cfloat )
INSTANTIATE(cdouble)
- INSTANTIATE(int)
- INSTANTIATE(uint)
- INSTANTIATE(uchar)
- INSTANTIATE(char)
+ INSTANTIATE(int )
+ INSTANTIATE(uint )
+ INSTANTIATE(uchar )
+ INSTANTIATE(char )
INSTANTIATE(intl )
INSTANTIATE(uintl )
+ INSTANTIATE(short )
+ INSTANTIATE(ushort )
#define INSTANTIATE_PAD_ARRAY(SRC_T) \
template Array<float > padArray<SRC_T, float >(Array<SRC_T> const &src, dim4 const &dims, float default_value, double factor); \
@@ -138,8 +140,10 @@ namespace cuda
template Array<cdouble> padArray<SRC_T, cdouble>(Array<SRC_T> const &src, dim4 const &dims, cdouble default_value, double factor); \
template Array<int > padArray<SRC_T, int >(Array<SRC_T> const &src, dim4 const &dims, int default_value, double factor); \
template Array<uint > padArray<SRC_T, uint >(Array<SRC_T> const &src, dim4 const &dims, uint default_value, double factor); \
- template Array<intl > padArray<SRC_T, intl >(Array<SRC_T> const &src, dim4 const &dims, intl default_value, double factor); \
- template Array<uintl > padArray<SRC_T, uintl >(Array<SRC_T> const &src, dim4 const &dims, uintl default_value, double factor); \
+ template Array<intl > padArray<SRC_T, intl >(Array<SRC_T> const &src, dim4 const &dims, intl default_value, double factor); \
+ template Array<uintl > padArray<SRC_T, uintl >(Array<SRC_T> const &src, dim4 const &dims, uintl default_value, double factor); \
+ template Array<short > padArray<SRC_T, short >(Array<SRC_T> const &src, dim4 const &dims, short default_value, double factor); \
+ template Array<ushort > padArray<SRC_T, ushort >(Array<SRC_T> const &src, dim4 const &dims, ushort default_value, double factor); \
template Array<uchar > padArray<SRC_T, uchar >(Array<SRC_T> const &src, dim4 const &dims, uchar default_value, double factor); \
template Array<char > padArray<SRC_T, char >(Array<SRC_T> const &src, dim4 const &dims, char default_value, double factor); \
template void copyArray<SRC_T, float >(Array<float > &dst, Array<SRC_T> const &src); \
@@ -148,8 +152,10 @@ namespace cuda
template void copyArray<SRC_T, cdouble>(Array<cdouble> &dst, Array<SRC_T> const &src); \
template void copyArray<SRC_T, int >(Array<int > &dst, Array<SRC_T> const &src); \
template void copyArray<SRC_T, uint >(Array<uint > &dst, Array<SRC_T> const &src); \
- template void copyArray<SRC_T, intl >(Array<intl > &dst, Array<SRC_T> const &src); \
- template void copyArray<SRC_T, uintl >(Array<uintl > &dst, Array<SRC_T> const &src); \
+ template void copyArray<SRC_T, intl >(Array<intl > &dst, Array<SRC_T> const &src); \
+ template void copyArray<SRC_T, uintl >(Array<uintl > &dst, Array<SRC_T> const &src); \
+ template void copyArray<SRC_T, short >(Array<short > &dst, Array<SRC_T> const &src); \
+ template void copyArray<SRC_T, ushort >(Array<ushort > &dst, Array<SRC_T> const &src); \
template void copyArray<SRC_T, uchar >(Array<uchar > &dst, Array<SRC_T> const &src); \
template void copyArray<SRC_T, char >(Array<char > &dst, Array<SRC_T> const &src);
@@ -157,8 +163,10 @@ namespace cuda
INSTANTIATE_PAD_ARRAY(double)
INSTANTIATE_PAD_ARRAY(int )
INSTANTIATE_PAD_ARRAY(uint )
- INSTANTIATE_PAD_ARRAY(intl )
- INSTANTIATE_PAD_ARRAY(uintl )
+ INSTANTIATE_PAD_ARRAY(intl )
+ INSTANTIATE_PAD_ARRAY(uintl )
+ INSTANTIATE_PAD_ARRAY(short )
+ INSTANTIATE_PAD_ARRAY(ushort)
INSTANTIATE_PAD_ARRAY(uchar )
INSTANTIATE_PAD_ARRAY(char )
@@ -185,6 +193,8 @@ namespace cuda
SPECILIAZE_UNUSED_COPYARRAY(cfloat, int)
SPECILIAZE_UNUSED_COPYARRAY(cfloat, intl)
SPECILIAZE_UNUSED_COPYARRAY(cfloat, uintl)
+ SPECILIAZE_UNUSED_COPYARRAY(cfloat, short)
+ SPECILIAZE_UNUSED_COPYARRAY(cfloat, ushort)
SPECILIAZE_UNUSED_COPYARRAY(cdouble, double)
SPECILIAZE_UNUSED_COPYARRAY(cdouble, float)
SPECILIAZE_UNUSED_COPYARRAY(cdouble, uchar)
@@ -193,4 +203,6 @@ namespace cuda
SPECILIAZE_UNUSED_COPYARRAY(cdouble, int)
SPECILIAZE_UNUSED_COPYARRAY(cdouble, intl)
SPECILIAZE_UNUSED_COPYARRAY(cdouble, uintl)
+ SPECILIAZE_UNUSED_COPYARRAY(cdouble, short)
+ SPECILIAZE_UNUSED_COPYARRAY(cdouble, ushort)
}
diff --git a/src/backend/cuda/count.cu b/src/backend/cuda/count.cu
index d624141..365897f 100644
--- a/src/backend/cuda/count.cu
+++ b/src/backend/cuda/count.cu
@@ -20,6 +20,8 @@ namespace cuda
INSTANTIATE(af_notzero_t, uint , uint)
INSTANTIATE(af_notzero_t, intl , uint)
INSTANTIATE(af_notzero_t, uintl , uint)
+ INSTANTIATE(af_notzero_t, short , uint)
+ INSTANTIATE(af_notzero_t, ushort , uint)
INSTANTIATE(af_notzero_t, char , uint)
INSTANTIATE(af_notzero_t, uchar , uint)
}
diff --git a/src/backend/cuda/diagonal.cu b/src/backend/cuda/diagonal.cu
index 05b8025..fd023c9 100644
--- a/src/backend/cuda/diagonal.cu
+++ b/src/backend/cuda/diagonal.cu
@@ -56,5 +56,7 @@ namespace cuda
INSTANTIATE_DIAGONAL(uintl)
INSTANTIATE_DIAGONAL(char)
INSTANTIATE_DIAGONAL(uchar)
+ INSTANTIATE_DIAGONAL(short)
+ INSTANTIATE_DIAGONAL(ushort)
}
diff --git a/src/backend/cuda/diff.cu b/src/backend/cuda/diff.cu
index a50ba26..96135f9 100644
--- a/src/backend/cuda/diff.cu
+++ b/src/backend/cuda/diff.cu
@@ -70,5 +70,7 @@ namespace cuda
INSTANTIATE(uintl)
INSTANTIATE(uchar)
INSTANTIATE(char)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/dilate.cu
index 0da33f2..9115ba8 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/dilate.cu
@@ -18,5 +18,7 @@ INSTANTIATE(char , true)
INSTANTIATE(int , true)
INSTANTIATE(uint , true)
INSTANTIATE(uchar , true)
+INSTANTIATE(short , true)
+INSTANTIATE(ushort, true)
}
diff --git a/src/backend/cuda/dilate3d.cu b/src/backend/cuda/dilate3d.cu
index 32b0bab..4846e40 100644
--- a/src/backend/cuda/dilate3d.cu
+++ b/src/backend/cuda/dilate3d.cu
@@ -18,5 +18,7 @@ INSTANTIATE(char , true)
INSTANTIATE(int , true)
INSTANTIATE(uint , true)
INSTANTIATE(uchar , true)
+INSTANTIATE(short , true)
+INSTANTIATE(ushort, true)
}
diff --git a/src/backend/cuda/erode.cu b/src/backend/cuda/erode.cu
index dbb2c8e..25ca46c 100644
--- a/src/backend/cuda/erode.cu
+++ b/src/backend/cuda/erode.cu
@@ -18,5 +18,7 @@ INSTANTIATE(char , false)
INSTANTIATE(int , false)
INSTANTIATE(uint , false)
INSTANTIATE(uchar , false)
+INSTANTIATE(short , false)
+INSTANTIATE(ushort, false)
}
diff --git a/src/backend/cuda/erode3d.cu b/src/backend/cuda/erode3d.cu
index 808198a..c54b301 100644
--- a/src/backend/cuda/erode3d.cu
+++ b/src/backend/cuda/erode3d.cu
@@ -18,5 +18,7 @@ INSTANTIATE(char , false)
INSTANTIATE(int , false)
INSTANTIATE(uint , false)
INSTANTIATE(uchar , false)
+INSTANTIATE(short , false)
+INSTANTIATE(ushort, false)
}
diff --git a/src/backend/cuda/fast.cu b/src/backend/cuda/fast.cu
index 7bd6f47..53741e3 100644
--- a/src/backend/cuda/fast.cu
+++ b/src/backend/cuda/fast.cu
@@ -59,5 +59,7 @@ INSTANTIATE(char )
INSTANTIATE(int )
INSTANTIATE(uint )
INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/fast_pyramid.cu b/src/backend/cuda/fast_pyramid.cu
index 3c22236..1e1b047 100644
--- a/src/backend/cuda/fast_pyramid.cu
+++ b/src/backend/cuda/fast_pyramid.cu
@@ -50,5 +50,7 @@ INSTANTIATE(char )
INSTANTIATE(int )
INSTANTIATE(uint )
INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/fftconvolve.cu b/src/backend/cuda/fftconvolve.cu
index 97edeec..3dde4ab 100644
--- a/src/backend/cuda/fftconvolve.cu
+++ b/src/backend/cuda/fftconvolve.cu
@@ -119,5 +119,7 @@ INSTANTIATE(uint , float, cfloat, false, true)
INSTANTIATE(int , float, cfloat, false, true)
INSTANTIATE(uchar , float, cfloat, false, true)
INSTANTIATE(char , float, cfloat, false, true)
+INSTANTIATE(ushort, float, cfloat, false, true)
+INSTANTIATE(short , float, cfloat, false, true)
}
diff --git a/src/backend/cuda/histogram.cu b/src/backend/cuda/histogram.cu
index e9a980f..b1991b3 100644
--- a/src/backend/cuda/histogram.cu
+++ b/src/backend/cuda/histogram.cu
@@ -58,5 +58,7 @@ INSTANTIATE(char , uint)
INSTANTIATE(int , uint)
INSTANTIATE(uint , uint)
INSTANTIATE(uchar , uint)
+INSTANTIATE(short , uint)
+INSTANTIATE(ushort, uint)
}
diff --git a/src/backend/cuda/identity.cu b/src/backend/cuda/identity.cu
index 264d5b8..6765766 100644
--- a/src/backend/cuda/identity.cu
+++ b/src/backend/cuda/identity.cu
@@ -38,5 +38,7 @@ namespace cuda
INSTANTIATE_IDENTITY(uintl)
INSTANTIATE_IDENTITY(char)
INSTANTIATE_IDENTITY(uchar)
+ INSTANTIATE_IDENTITY(short)
+ INSTANTIATE_IDENTITY(ushort)
}
diff --git a/src/backend/cuda/index.cu b/src/backend/cuda/index.cu
index 988f589..b1d528c 100644
--- a/src/backend/cuda/index.cu
+++ b/src/backend/cuda/index.cu
@@ -75,11 +75,13 @@ INSTANTIATE(cdouble)
INSTANTIATE(double )
INSTANTIATE(cfloat )
INSTANTIATE(float )
-INSTANTIATE(uintl )
INSTANTIATE(uint )
-INSTANTIATE(intl )
INSTANTIATE(int )
+INSTANTIATE(uintl )
+INSTANTIATE(intl )
INSTANTIATE(uchar )
INSTANTIATE(char )
+INSTANTIATE(ushort )
+INSTANTIATE(short )
}
diff --git a/src/backend/cuda/iota.cu b/src/backend/cuda/iota.cu
index ee9bcdc..eee4344 100644
--- a/src/backend/cuda/iota.cu
+++ b/src/backend/cuda/iota.cu
@@ -37,5 +37,7 @@ namespace cuda
INSTANTIATE(intl)
INSTANTIATE(uintl)
INSTANTIATE(uchar)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/ireduce.cu b/src/backend/cuda/ireduce.cu
index 0c14a01..dece64c 100644
--- a/src/backend/cuda/ireduce.cu
+++ b/src/backend/cuda/ireduce.cu
@@ -51,6 +51,8 @@ namespace cuda
INSTANTIATE(af_min_t, uint )
INSTANTIATE(af_min_t, intl )
INSTANTIATE(af_min_t, uintl )
+ INSTANTIATE(af_min_t, short )
+ INSTANTIATE(af_min_t, ushort )
INSTANTIATE(af_min_t, char )
INSTANTIATE(af_min_t, uchar )
@@ -63,6 +65,8 @@ namespace cuda
INSTANTIATE(af_max_t, uint )
INSTANTIATE(af_max_t, intl )
INSTANTIATE(af_max_t, uintl )
+ INSTANTIATE(af_max_t, short )
+ INSTANTIATE(af_max_t, ushort )
INSTANTIATE(af_max_t, char )
INSTANTIATE(af_max_t, uchar )
}
diff --git a/src/backend/cuda/jit.cpp b/src/backend/cuda/jit.cpp
index b001fef..af5f2d6 100644
--- a/src/backend/cuda/jit.cpp
+++ b/src/backend/cuda/jit.cpp
@@ -500,6 +500,8 @@ template void evalNodes<char >(Param<char > &out, Node *node);
template void evalNodes<uchar >(Param<uchar > &out, Node *node);
template void evalNodes<intl >(Param<intl > &out, Node *node);
template void evalNodes<uintl >(Param<uintl > &out, Node *node);
+template void evalNodes<short >(Param<short > &out, Node *node);
+template void evalNodes<ushort >(Param<ushort > &out, Node *node);
}
diff --git a/src/backend/cuda/join.cu b/src/backend/cuda/join.cu
index 074326e..729cec4 100644
--- a/src/backend/cuda/join.cu
+++ b/src/backend/cuda/join.cu
@@ -170,16 +170,18 @@ namespace cuda
#define INSTANTIATE(Tx, Ty) \
template Array<Tx> join<Tx, Ty>(const int dim, const Array<Tx> &first, const Array<Ty> &second); \
- INSTANTIATE(float, float)
- INSTANTIATE(double, double)
- INSTANTIATE(cfloat, cfloat)
+ INSTANTIATE(float , float )
+ INSTANTIATE(double , double )
+ INSTANTIATE(cfloat , cfloat )
INSTANTIATE(cdouble, cdouble)
- INSTANTIATE(int, int)
- INSTANTIATE(uint, uint)
- INSTANTIATE(intl, intl)
- INSTANTIATE(uintl, uintl)
- INSTANTIATE(uchar, uchar)
- INSTANTIATE(char, char)
+ INSTANTIATE(int , int )
+ INSTANTIATE(uint , uint )
+ INSTANTIATE(intl , intl )
+ INSTANTIATE(uintl , uintl )
+ INSTANTIATE(short , short )
+ INSTANTIATE(ushort , ushort )
+ INSTANTIATE(uchar , uchar )
+ INSTANTIATE(char , char )
#undef INSTANTIATE
@@ -194,6 +196,8 @@ namespace cuda
INSTANTIATE(uint)
INSTANTIATE(intl)
INSTANTIATE(uintl)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
INSTANTIATE(uchar)
INSTANTIATE(char)
diff --git a/src/backend/cuda/kernel/convolve.cu b/src/backend/cuda/kernel/convolve.cu
index 78790c3..329287d 100644
--- a/src/backend/cuda/kernel/convolve.cu
+++ b/src/backend/cuda/kernel/convolve.cu
@@ -485,12 +485,12 @@ void convolve_nd(Param<T> out, CParam<T> signal, CParam<aT> filt, ConvolveBatchK
}
#define INSTANTIATE(T, aT) \
- template void convolve_nd<T, aT, 1, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
- template void convolve_nd<T, aT, 1, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
- template void convolve_nd<T, aT, 2, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
- template void convolve_nd<T, aT, 2, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
- template void convolve_nd<T, aT, 3, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
- template void convolve_nd<T, aT, 3, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+ template void convolve_nd<T, aT, 1, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+ template void convolve_nd<T, aT, 1, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+ template void convolve_nd<T, aT, 2, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+ template void convolve_nd<T, aT, 2, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+ template void convolve_nd<T, aT, 3, true >(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
+ template void convolve_nd<T, aT, 3, false>(Param<T> out, CParam<T> signal, CParam<aT> filter, ConvolveBatchKind kind);\
INSTANTIATE(cdouble, cdouble)
@@ -501,6 +501,8 @@ INSTANTIATE(uint , float)
INSTANTIATE(int , float)
INSTANTIATE(uchar , float)
INSTANTIATE(char , float)
+INSTANTIATE(ushort , float)
+INSTANTIATE(short , float)
}
diff --git a/src/backend/cuda/kernel/convolve_separable.cu b/src/backend/cuda/kernel/convolve_separable.cu
index e2caec7..196d60a 100644
--- a/src/backend/cuda/kernel/convolve_separable.cu
+++ b/src/backend/cuda/kernel/convolve_separable.cu
@@ -174,10 +174,10 @@ void convolve2(Param<T> out, CParam<T> signal, CParam<accType> filter)
}
#define INSTANTIATE(T, accType) \
- template void convolve2<T, accType, 0, true >(Param<T> out, CParam<T> signal, CParam<accType> filter); \
- template void convolve2<T, accType, 0, false>(Param<T> out, CParam<T> signal, CParam<accType> filter); \
- template void convolve2<T, accType, 1, true >(Param<T> out, CParam<T> signal, CParam<accType> filter); \
- template void convolve2<T, accType, 1, false>(Param<T> out, CParam<T> signal, CParam<accType> filter); \
+ template void convolve2<T, accType, 0, true >(Param<T> out, CParam<T> signal, CParam<accType> filter); \
+ template void convolve2<T, accType, 0, false>(Param<T> out, CParam<T> signal, CParam<accType> filter); \
+ template void convolve2<T, accType, 1, true >(Param<T> out, CParam<T> signal, CParam<accType> filter); \
+ template void convolve2<T, accType, 1, false>(Param<T> out, CParam<T> signal, CParam<accType> filter); \
INSTANTIATE(cdouble, cdouble)
@@ -188,6 +188,8 @@ INSTANTIATE(uint , float)
INSTANTIATE(int , float)
INSTANTIATE(uchar , float)
INSTANTIATE(char , float)
+INSTANTIATE(ushort , float)
+INSTANTIATE(short , float)
}
diff --git a/src/backend/cuda/kernel/fast.hpp b/src/backend/cuda/kernel/fast.hpp
index df4f406..5f220cd 100644
--- a/src/backend/cuda/kernel/fast.hpp
+++ b/src/backend/cuda/kernel/fast.hpp
@@ -87,6 +87,16 @@ unsigned max_val(const unsigned x, const unsigned y)
return max(x, y);
}
inline __device__
+short max_val(const short x, const short y)
+{
+ return max(x, y);
+}
+inline __device__
+ushort max_val(const ushort x, const ushort y)
+{
+ return max(x, y);
+}
+inline __device__
float max_val(const float x, const float y)
{
return fmax(x, y);
@@ -109,6 +119,16 @@ inline __device__ unsigned abs_diff(const unsigned x, const unsigned y)
int i = (int)x - (int)y;
return max(-i, i);
}
+inline __device__ short abs_diff(const short x, const short y)
+{
+ short i = x - y;
+ return max(-i, i);
+}
+inline __device__ ushort abs_diff(const ushort x, const ushort y)
+{
+ int i = (int)x - (int)y;
+ return (ushort)max(-i, i);
+}
inline __device__ float abs_diff(const float x, const float y)
{
return fabs(x - y);
diff --git a/src/backend/cuda/kernel/memcopy.hpp b/src/backend/cuda/kernel/memcopy.hpp
index 4d5d192..dc437b4 100644
--- a/src/backend/cuda/kernel/memcopy.hpp
+++ b/src/backend/cuda/kernel/memcopy.hpp
@@ -142,6 +142,8 @@ namespace kernel
OTHER_SPECIALIZATIONS(uint )
OTHER_SPECIALIZATIONS(intl )
OTHER_SPECIALIZATIONS(uintl )
+ OTHER_SPECIALIZATIONS(short )
+ OTHER_SPECIALIZATIONS(ushort )
OTHER_SPECIALIZATIONS(uchar )
OTHER_SPECIALIZATIONS(char )
////////////////////////////// END - templated help functions for copy_kernel //////////////////////////////////
diff --git a/src/backend/cuda/kernel/nearest_neighbour.hpp b/src/backend/cuda/kernel/nearest_neighbour.hpp
index 14c448f..9b14cb5 100644
--- a/src/backend/cuda/kernel/nearest_neighbour.hpp
+++ b/src/backend/cuda/kernel/nearest_neighbour.hpp
@@ -69,6 +69,15 @@ struct dist_op<uintl, To, AF_SHD>
};
template<typename To>
+struct dist_op<ushort, To, AF_SHD>
+{
+ __device__ To operator()(ushort v1, ushort v2)
+ {
+ return __popc(v1 ^ v2);
+ }
+};
+
+template<typename To>
struct dist_op<uchar, To, AF_SHD>
{
__device__ To operator()(uchar v1, uchar v2)
diff --git a/src/backend/cuda/kernel/shared.hpp b/src/backend/cuda/kernel/shared.hpp
index eb7b432..742afab 100644
--- a/src/backend/cuda/kernel/shared.hpp
+++ b/src/backend/cuda/kernel/shared.hpp
@@ -44,6 +44,8 @@ SPECIALIZE(cdouble)
SPECIALIZE(char)
SPECIALIZE(int)
SPECIALIZE(uint)
+SPECIALIZE(short)
+SPECIALIZE(ushort)
SPECIALIZE(uchar)
#undef SPECIALIZE
diff --git a/src/backend/cuda/lookup.cu b/src/backend/cuda/lookup.cu
index 8f910de..934e68e 100644
--- a/src/backend/cuda/lookup.cu
+++ b/src/backend/cuda/lookup.cu
@@ -42,6 +42,8 @@ Array<in_t> lookup(const Array<in_t> &input, const Array<idx_t> &indices, const
template Array<T> lookup<T, double >(const Array<T> &input, const Array<double > &indices, const unsigned dim); \
template Array<T> lookup<T, int >(const Array<T> &input, const Array<int > &indices, const unsigned dim); \
template Array<T> lookup<T, unsigned>(const Array<T> &input, const Array<unsigned> &indices, const unsigned dim); \
+ template Array<T> lookup<T, short >(const Array<T> &input, const Array<short > &indices, const unsigned dim); \
+ template Array<T> lookup<T, ushort >(const Array<T> &input, const Array<ushort > &indices, const unsigned dim); \
template Array<T> lookup<T, uchar >(const Array<T> &input, const Array<uchar > &indices, const unsigned dim);
INSTANTIATE(float );
@@ -54,5 +56,7 @@ INSTANTIATE(intl );
INSTANTIATE(uintl );
INSTANTIATE(uchar );
INSTANTIATE(char );
+INSTANTIATE(short );
+INSTANTIATE(ushort );
}
diff --git a/src/backend/cuda/match_template.cu b/src/backend/cuda/match_template.cu
index 5b30eb0..0ce0ce2 100644
--- a/src/backend/cuda/match_template.cu
+++ b/src/backend/cuda/match_template.cu
@@ -54,5 +54,7 @@ INSTANTIATE(char , float)
INSTANTIATE(int , float)
INSTANTIATE(uint , float)
INSTANTIATE(uchar , float)
+INSTANTIATE(short , float)
+INSTANTIATE(ushort, float)
}
diff --git a/src/backend/cuda/math.hpp b/src/backend/cuda/math.hpp
index 577db84..1c495f6 100644
--- a/src/backend/cuda/math.hpp
+++ b/src/backend/cuda/math.hpp
@@ -108,6 +108,9 @@ namespace cuda
template<> __device__ float limit_min<float>() { return -CUDART_INF_F; }
template<> __device__ double limit_max<double>() { return CUDART_INF; }
template<> __device__ double limit_min<double>() { return -CUDART_INF; }
+ template<> __device__ short limit_max<short>() { return 0x7fff; }
+ template<> __device__ short limit_min<short>() { return 0x8000; }
+ template<> __device__ ushort limit_max<ushort>() { return ((ushort)1) << (8 * sizeof(ushort) - 1); }
#endif
#define upcast cuComplexFloatToDouble
@@ -134,6 +137,8 @@ __SDH__ cdouble conj(cdouble c) { return cuConj(c); }
__SDH__ cfloat make_cfloat(bool x) { return make_cuComplex(x,0); }
__SDH__ cfloat make_cfloat(int x) { return make_cuComplex(x,0); }
__SDH__ cfloat make_cfloat(unsigned x) { return make_cuComplex(x,0); }
+__SDH__ cfloat make_cfloat(short x) { return make_cuComplex(x,0); }
+__SDH__ cfloat make_cfloat(ushort x) { return make_cuComplex(x,0); }
__SDH__ cfloat make_cfloat(float x) { return make_cuComplex(x,0); }
__SDH__ cfloat make_cfloat(double x) { return make_cuComplex(x,0); }
__SDH__ cfloat make_cfloat(cfloat x) { return x; }
@@ -142,6 +147,8 @@ __SDH__ cfloat make_cfloat(cdouble c) { return make_cuComplex(c.x,c.y); }
__SDH__ cdouble make_cdouble(bool x) { return make_cuDoubleComplex(x,0); }
__SDH__ cdouble make_cdouble(int x) { return make_cuDoubleComplex(x,0); }
__SDH__ cdouble make_cdouble(unsigned x) { return make_cuDoubleComplex(x,0); }
+__SDH__ cdouble make_cdouble(short x) { return make_cuDoubleComplex(x,0); }
+__SDH__ cdouble make_cdouble(ushort x) { return make_cuDoubleComplex(x,0); }
__SDH__ cdouble make_cdouble(float x) { return make_cuDoubleComplex(x,0); }
__SDH__ cdouble make_cdouble(double x) { return make_cuDoubleComplex(x,0); }
__SDH__ cdouble make_cdouble(cdouble x) { return x; }
diff --git a/src/backend/cuda/max.cu b/src/backend/cuda/max.cu
index 7841422..c910bea 100644
--- a/src/backend/cuda/max.cu
+++ b/src/backend/cuda/max.cu
@@ -22,4 +22,6 @@ namespace cuda
INSTANTIATE(af_max_t, uintl , uintl )
INSTANTIATE(af_max_t, char , char )
INSTANTIATE(af_max_t, uchar , uchar )
+ INSTANTIATE(af_max_t, short , short )
+ INSTANTIATE(af_max_t, ushort , ushort )
}
diff --git a/src/backend/cuda/meanshift.cu b/src/backend/cuda/meanshift.cu
index 0fa1ac3..20f200b 100644
--- a/src/backend/cuda/meanshift.cu
+++ b/src/backend/cuda/meanshift.cu
@@ -42,5 +42,7 @@ INSTANTIATE(char )
INSTANTIATE(int )
INSTANTIATE(uint )
INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/medfilt.cu b/src/backend/cuda/medfilt.cu
index 9a99cae..c87aea4 100644
--- a/src/backend/cuda/medfilt.cu
+++ b/src/backend/cuda/medfilt.cu
@@ -44,5 +44,7 @@ INSTANTIATE(char )
INSTANTIATE(int )
INSTANTIATE(uint )
INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/memory.cpp b/src/backend/cuda/memory.cpp
index 45e410f..9b3d731 100644
--- a/src/backend/cuda/memory.cpp
+++ b/src/backend/cuda/memory.cpp
@@ -384,5 +384,7 @@ namespace cuda
INSTANTIATE(uchar)
INSTANTIATE(intl)
INSTANTIATE(uintl)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/min.cu b/src/backend/cuda/min.cu
index 0251414..26719de 100644
--- a/src/backend/cuda/min.cu
+++ b/src/backend/cuda/min.cu
@@ -22,4 +22,6 @@ namespace cuda
INSTANTIATE(af_min_t, uintl , uintl )
INSTANTIATE(af_min_t, char , char )
INSTANTIATE(af_min_t, uchar , uchar )
+ INSTANTIATE(af_min_t, short , short )
+ INSTANTIATE(af_min_t, ushort , ushort )
}
diff --git a/src/backend/cuda/nearest_neighbour.cu b/src/backend/cuda/nearest_neighbour.cu
index 1899c9d..789c0f5 100644
--- a/src/backend/cuda/nearest_neighbour.cu
+++ b/src/backend/cuda/nearest_neighbour.cu
@@ -73,6 +73,8 @@ INSTANTIATE(uint , uint)
INSTANTIATE(intl , intl)
INSTANTIATE(uintl , uintl)
INSTANTIATE(uchar , uint)
+INSTANTIATE(short , int)
+INSTANTIATE(ushort, uint)
INSTANTIATE(uintl, uint) // For Hamming
diff --git a/src/backend/cuda/product.cu b/src/backend/cuda/product.cu
index abc5c1f..d00e140 100644
--- a/src/backend/cuda/product.cu
+++ b/src/backend/cuda/product.cu
@@ -11,7 +11,7 @@
namespace cuda
{
- //sum
+ //mul
INSTANTIATE(af_mul_t, float , float )
INSTANTIATE(af_mul_t, double , double )
INSTANTIATE(af_mul_t, cfloat , cfloat )
@@ -22,4 +22,6 @@ namespace cuda
INSTANTIATE(af_mul_t, uintl , uintl )
INSTANTIATE(af_mul_t, char , int )
INSTANTIATE(af_mul_t, uchar , uint )
+ INSTANTIATE(af_mul_t, short , int )
+ INSTANTIATE(af_mul_t, ushort , uint )
}
diff --git a/src/backend/cuda/random.cu b/src/backend/cuda/random.cu
index c9e6197..07cbdc4 100644
--- a/src/backend/cuda/random.cu
+++ b/src/backend/cuda/random.cu
@@ -44,6 +44,8 @@ namespace cuda
template Array<uintl> randu<uintl> (const af::dim4 &dims);
template Array<char> randu<char> (const af::dim4 &dims);
template Array<uchar> randu<uchar> (const af::dim4 &dims);
+ template Array<short> randu<short> (const af::dim4 &dims);
+ template Array<ushort> randu<ushort> (const af::dim4 &dims);
template Array<float> randn<float> (const af::dim4 &dims);
template Array<double> randn<double> (const af::dim4 &dims);
diff --git a/src/backend/cuda/range.cu b/src/backend/cuda/range.cu
index 9a1a7cd..ace3b1c 100644
--- a/src/backend/cuda/range.cu
+++ b/src/backend/cuda/range.cu
@@ -45,4 +45,6 @@ namespace cuda
INSTANTIATE(intl)
INSTANTIATE(uintl)
INSTANTIATE(uchar)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/regions.cu b/src/backend/cuda/regions.cu
index 656048c..6b50b71 100644
--- a/src/backend/cuda/regions.cu
+++ b/src/backend/cuda/regions.cu
@@ -65,5 +65,7 @@ INSTANTIATE(float )
INSTANTIATE(double)
INSTANTIATE(int )
INSTANTIATE(uint )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/reorder.cu b/src/backend/cuda/reorder.cu
index 2c920e6..7292fcd 100644
--- a/src/backend/cuda/reorder.cu
+++ b/src/backend/cuda/reorder.cu
@@ -43,5 +43,7 @@ namespace cuda
INSTANTIATE(char)
INSTANTIATE(intl)
INSTANTIATE(uintl)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/resize.cu b/src/backend/cuda/resize.cu
index dcec972..02d3499 100644
--- a/src/backend/cuda/resize.cu
+++ b/src/backend/cuda/resize.cu
@@ -57,4 +57,6 @@ namespace cuda
INSTANTIATE(uintl)
INSTANTIATE(uchar)
INSTANTIATE(char)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/rotate.cu b/src/backend/cuda/rotate.cu
index 24e41d7..23c99e1 100644
--- a/src/backend/cuda/rotate.cu
+++ b/src/backend/cuda/rotate.cu
@@ -53,4 +53,6 @@ namespace cuda
INSTANTIATE(uintl)
INSTANTIATE(uchar)
INSTANTIATE(char)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/scan.cu b/src/backend/cuda/scan.cu
index a76abc2..15ee6b4 100644
--- a/src/backend/cuda/scan.cu
+++ b/src/backend/cuda/scan.cu
@@ -51,5 +51,7 @@ namespace cuda
INSTANTIATE(af_add_t, uintl , uintl )
INSTANTIATE(af_add_t, char , int )
INSTANTIATE(af_add_t, uchar , uint )
+ INSTANTIATE(af_add_t, short , int )
+ INSTANTIATE(af_add_t, ushort , uint )
INSTANTIATE(af_notzero_t, char , uint )
}
diff --git a/src/backend/cuda/select.cu b/src/backend/cuda/select.cu
index eb90730..9697da4 100644
--- a/src/backend/cuda/select.cu
+++ b/src/backend/cuda/select.cu
@@ -48,4 +48,6 @@ namespace cuda
INSTANTIATE(uintl )
INSTANTIATE(char )
INSTANTIATE(uchar )
+ INSTANTIATE(short )
+ INSTANTIATE(ushort )
}
diff --git a/src/backend/cuda/set.cu b/src/backend/cuda/set.cu
index 5b457e1..8887f83 100644
--- a/src/backend/cuda/set.cu
+++ b/src/backend/cuda/set.cu
@@ -117,4 +117,6 @@ namespace cuda
INSTANTIATE(uint)
INSTANTIATE(char)
INSTANTIATE(uchar)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/shift.cu b/src/backend/cuda/shift.cu
index f97eb4a..89e78ac 100644
--- a/src/backend/cuda/shift.cu
+++ b/src/backend/cuda/shift.cu
@@ -41,4 +41,6 @@ namespace cuda
INSTANTIATE(uintl)
INSTANTIATE(uchar)
INSTANTIATE(char)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/sobel.cu b/src/backend/cuda/sobel.cu
index 6f9b194..ab5a693 100644
--- a/src/backend/cuda/sobel.cu
+++ b/src/backend/cuda/sobel.cu
@@ -42,5 +42,7 @@ INSTANTIATE(int , int)
INSTANTIATE(uint , int)
INSTANTIATE(char , int)
INSTANTIATE(uchar , int)
+INSTANTIATE(short , int)
+INSTANTIATE(ushort, int)
}
diff --git a/src/backend/cuda/sort.cu b/src/backend/cuda/sort.cu
index dc74b80..9823174 100644
--- a/src/backend/cuda/sort.cu
+++ b/src/backend/cuda/sort.cu
@@ -40,4 +40,6 @@ namespace cuda
INSTANTIATE(uint)
INSTANTIATE(char)
INSTANTIATE(uchar)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/sort_by_key/ascd_s16.cu
similarity index 65%
copy from src/backend/cuda/dilate.cu
copy to src/backend/cuda/sort_by_key/ascd_s16.cu
index 0da33f2..d51e9ae 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/sort_by_key/ascd_s16.cu
@@ -7,16 +7,9 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
-#include "morph_impl.hpp"
+#include <sort_by_key_impl.hpp>
namespace cuda
{
-
-INSTANTIATE(float , true)
-INSTANTIATE(double, true)
-INSTANTIATE(char , true)
-INSTANTIATE(int , true)
-INSTANTIATE(uint , true)
-INSTANTIATE(uchar , true)
-
+ INSTANTIATE1(short, true)
}
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/sort_by_key/ascd_u16.cu
similarity index 65%
copy from src/backend/cuda/dilate.cu
copy to src/backend/cuda/sort_by_key/ascd_u16.cu
index 0da33f2..e06036a 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/sort_by_key/ascd_u16.cu
@@ -7,16 +7,9 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
-#include "morph_impl.hpp"
+#include <sort_by_key_impl.hpp>
namespace cuda
{
-
-INSTANTIATE(float , true)
-INSTANTIATE(double, true)
-INSTANTIATE(char , true)
-INSTANTIATE(int , true)
-INSTANTIATE(uint , true)
-INSTANTIATE(uchar , true)
-
+ INSTANTIATE1(ushort, true)
}
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/sort_by_key/desc_s16.cu
similarity index 65%
copy from src/backend/cuda/dilate.cu
copy to src/backend/cuda/sort_by_key/desc_s16.cu
index 0da33f2..63967b6 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/sort_by_key/desc_s16.cu
@@ -7,16 +7,9 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
-#include "morph_impl.hpp"
+#include <sort_by_key_impl.hpp>
namespace cuda
{
-
-INSTANTIATE(float , true)
-INSTANTIATE(double, true)
-INSTANTIATE(char , true)
-INSTANTIATE(int , true)
-INSTANTIATE(uint , true)
-INSTANTIATE(uchar , true)
-
+ INSTANTIATE1(short, false)
}
diff --git a/src/backend/cuda/dilate.cu b/src/backend/cuda/sort_by_key/desc_u16.cu
similarity index 65%
copy from src/backend/cuda/dilate.cu
copy to src/backend/cuda/sort_by_key/desc_u16.cu
index 0da33f2..69dc016 100644
--- a/src/backend/cuda/dilate.cu
+++ b/src/backend/cuda/sort_by_key/desc_u16.cu
@@ -7,16 +7,9 @@
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
-#include "morph_impl.hpp"
+#include <sort_by_key_impl.hpp>
namespace cuda
{
-
-INSTANTIATE(float , true)
-INSTANTIATE(double, true)
-INSTANTIATE(char , true)
-INSTANTIATE(int , true)
-INSTANTIATE(uint , true)
-INSTANTIATE(uchar , true)
-
+ INSTANTIATE1(ushort, false)
}
diff --git a/src/backend/cuda/sort_by_key_impl.hpp b/src/backend/cuda/sort_by_key_impl.hpp
index 32758b4..9cd286c 100644
--- a/src/backend/cuda/sort_by_key_impl.hpp
+++ b/src/backend/cuda/sort_by_key_impl.hpp
@@ -40,6 +40,8 @@ namespace cuda
INSTANTIATE(Tk, double, dr) \
INSTANTIATE(Tk, int, dr) \
INSTANTIATE(Tk, uint, dr) \
+ INSTANTIATE(Tk, short, dr) \
+ INSTANTIATE(Tk, ushort, dr) \
INSTANTIATE(Tk, char, dr) \
INSTANTIATE(Tk, uchar, dr)
}
diff --git a/src/backend/cuda/sort_index.cu b/src/backend/cuda/sort_index.cu
index b80287b..a073c72 100644
--- a/src/backend/cuda/sort_index.cu
+++ b/src/backend/cuda/sort_index.cu
@@ -41,5 +41,7 @@ namespace cuda
INSTANTIATE(uint)
INSTANTIATE(char)
INSTANTIATE(uchar)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/sum.cu b/src/backend/cuda/sum.cu
index 407cc98..95f2177 100644
--- a/src/backend/cuda/sum.cu
+++ b/src/backend/cuda/sum.cu
@@ -22,4 +22,6 @@ namespace cuda
INSTANTIATE(af_add_t, uintl , uintl )
INSTANTIATE(af_add_t, char , int )
INSTANTIATE(af_add_t, uchar , uint )
+ INSTANTIATE(af_add_t, short , int )
+ INSTANTIATE(af_add_t, ushort , uint )
}
diff --git a/src/backend/cuda/susan.cu b/src/backend/cuda/susan.cu
index 8474454..6925d0c 100644
--- a/src/backend/cuda/susan.cu
+++ b/src/backend/cuda/susan.cu
@@ -63,5 +63,7 @@ INSTANTIATE(char )
INSTANTIATE(int )
INSTANTIATE(uint )
INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/tile.cu b/src/backend/cuda/tile.cu
index 2a9af87..f15fd87 100644
--- a/src/backend/cuda/tile.cu
+++ b/src/backend/cuda/tile.cu
@@ -46,5 +46,7 @@ namespace cuda
INSTANTIATE(uintl)
INSTANTIATE(uchar)
INSTANTIATE(char)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/transform.cu b/src/backend/cuda/transform.cu
index 214bce3..853617c 100644
--- a/src/backend/cuda/transform.cu
+++ b/src/backend/cuda/transform.cu
@@ -55,4 +55,6 @@ namespace cuda
INSTANTIATE(uintl)
INSTANTIATE(uchar)
INSTANTIATE(char)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/transpose.cu b/src/backend/cuda/transpose.cu
index e787b6e..fff167a 100644
--- a/src/backend/cuda/transpose.cu
+++ b/src/backend/cuda/transpose.cu
@@ -46,5 +46,7 @@ INSTANTIATE(uint )
INSTANTIATE(uchar )
INSTANTIATE(intl )
INSTANTIATE(uintl )
+INSTANTIATE(short)
+INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/transpose_inplace.cu b/src/backend/cuda/transpose_inplace.cu
index 98613bc..1d34580 100644
--- a/src/backend/cuda/transpose_inplace.cu
+++ b/src/backend/cuda/transpose_inplace.cu
@@ -37,6 +37,8 @@ INSTANTIATE(uint )
INSTANTIATE(uchar )
INSTANTIATE(intl )
INSTANTIATE(uintl )
+INSTANTIATE(short )
+INSTANTIATE(ushort )
}
diff --git a/src/backend/cuda/triangle.cu b/src/backend/cuda/triangle.cu
index 99970a0..e92b1d5 100644
--- a/src/backend/cuda/triangle.cu
+++ b/src/backend/cuda/triangle.cu
@@ -52,4 +52,6 @@ Array<T> triangle(const Array<T> &in)
INSTANTIATE(uintl)
INSTANTIATE(char)
INSTANTIATE(uchar)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/types.cpp b/src/backend/cuda/types.cpp
index f83913b..8c29c00 100644
--- a/src/backend/cuda/types.cpp
+++ b/src/backend/cuda/types.cpp
@@ -24,6 +24,8 @@ namespace cuda
template<> const char *cuShortName<uchar >() { return "h"; }
template<> const char *cuShortName<intl >() { return "x"; }
template<> const char *cuShortName<uintl >() { return "y"; }
+ template<> const char *cuShortName<short >() { return "s"; }
+ template<> const char *cuShortName<ushort >() { return "t"; }
template<typename T > const char *afShortName(bool caps) { return caps ? "Q" : "q"; }
template<> const char *afShortName<float >(bool caps) { return caps ? "S" : "s"; }
@@ -36,6 +38,8 @@ namespace cuda
template<> const char *afShortName<uchar >(bool caps) { return caps ? "V" : "v"; }
template<> const char *afShortName<intl >(bool caps) { return caps ? "X" : "x"; }
template<> const char *afShortName<uintl >(bool caps) { return caps ? "Y" : "y"; }
+ template<> const char *afShortName<short >(bool caps) { return caps ? "P" : "P"; }
+ template<> const char *afShortName<ushort >(bool caps) { return caps ? "Q" : "Q"; }
template<typename T > const char *irname() { return "i32"; }
template<> const char *irname<float >() { return "float"; }
@@ -48,6 +52,8 @@ namespace cuda
template<> const char *irname<uintl >() { return "i64"; }
template<> const char *irname<char >() { return "i8"; }
template<> const char *irname<uchar >() { return "i8"; }
+ template<> const char *irname<short >() { return "i16"; }
+ template<> const char *irname<ushort >() { return "i16"; }
template <typename T>
static inline std::string toString(T val)
@@ -89,4 +95,6 @@ namespace cuda
INSTANTIATE(uint)
INSTANTIATE(intl)
INSTANTIATE(uintl)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/types.hpp b/src/backend/cuda/types.hpp
index 0d807ae..26d0bb6 100644
--- a/src/backend/cuda/types.hpp
+++ b/src/backend/cuda/types.hpp
@@ -16,7 +16,8 @@ namespace cuda
typedef cuFloatComplex cfloat;
typedef cuDoubleComplex cdouble;
typedef unsigned int uint;
- typedef unsigned char uchar;
+ typedef unsigned char uchar;
+ typedef unsigned short ushort;
template<typename T> struct is_complex { static const bool value = false; };
template<> struct is_complex<cfloat> { static const bool value = true; };
diff --git a/src/backend/cuda/unwrap.cu b/src/backend/cuda/unwrap.cu
index 8600ca1..a61aba4 100644
--- a/src/backend/cuda/unwrap.cu
+++ b/src/backend/cuda/unwrap.cu
@@ -54,4 +54,6 @@ namespace cuda
INSTANTIATE(uintl)
INSTANTIATE(uchar)
INSTANTIATE(char)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
diff --git a/src/backend/cuda/where.cu b/src/backend/cuda/where.cu
index 8e4f9cf..a43e339 100644
--- a/src/backend/cuda/where.cu
+++ b/src/backend/cuda/where.cu
@@ -42,5 +42,7 @@ namespace cuda
INSTANTIATE(intl )
INSTANTIATE(uintl )
INSTANTIATE(uchar )
+ INSTANTIATE(short )
+ INSTANTIATE(ushort )
}
diff --git a/src/backend/cuda/wrap.cu b/src/backend/cuda/wrap.cu
index a1e70fc..017a3a4 100644
--- a/src/backend/cuda/wrap.cu
+++ b/src/backend/cuda/wrap.cu
@@ -54,4 +54,6 @@ namespace cuda
INSTANTIATE(uintl)
INSTANTIATE(uchar)
INSTANTIATE(char)
+ INSTANTIATE(short)
+ INSTANTIATE(ushort)
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list