[arrayfire] 191/408: thrust fixes for cuda stream selection on cuda < 7.0

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:54 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 60c2be968423f896402df726bd3fc17b7f7a2259
Author: pradeep <pradeep at arrayfire.com>
Date:   Wed Aug 5 13:20:40 2015 -0400

    thrust fixes for cuda stream selection on cuda < 7.0
---
 src/backend/cuda/debug_cuda.hpp         | 25 ++++++++++++++++++++++++-
 src/backend/cuda/kernel/regions.hpp     |  8 ++++----
 src/backend/cuda/kernel/sort.hpp        |  6 ++----
 src/backend/cuda/kernel/sort_by_key.hpp |  7 ++-----
 src/backend/cuda/kernel/sort_index.hpp  |  7 +++----
 src/backend/cuda/set.cu                 | 16 +++++++---------
 6 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/backend/cuda/debug_cuda.hpp b/src/backend/cuda/debug_cuda.hpp
index deb7a16..813456b 100644
--- a/src/backend/cuda/debug_cuda.hpp
+++ b/src/backend/cuda/debug_cuda.hpp
@@ -10,9 +10,32 @@
 #pragma once
 #include <platform.hpp>
 #include <err_cuda.hpp>
+#include <thrust/version.h>
+#include <thrust/system/cuda/detail/par.h>
 
 #define THRUST_STREAM thrust::cuda::par.on(cuda::getStream(cuda::getActiveDeviceId()))
 
+#if THRUST_MAJOR_VERSION>=1 && THRUST_MINOR_VERSION>=8
+
+#define THRUST_SELECT(fn, ...) fn(THRUST_STREAM, __VA_ARGS__)
+#define THRUST_SELECT_OUT(res, fn, ...) res = fn(THRUST_STREAM, __VA_ARGS__)
+
+#else
+
+#define THRUST_SELECT(fn, ...) \
+    do {                          \
+        CUDA_CHECK(cudaStreamSynchronize(cuda::getStream(cuda::getActiveDeviceId()))); \
+        fn(__VA_ARGS__);       \
+    } while(0)
+
+#define THRUST_SELECT_OUT(res, fn, ...) \
+    do {                          \
+        CUDA_CHECK(cudaStreamSynchronize(cuda::getStream(cuda::getActiveDeviceId()))); \
+        res = fn(__VA_ARGS__);       \
+    } while(0)
+
+#endif
+
 #define CUDA_LAUNCH_SMEM(fn, blks, thrds, smem_size, ...) \
 	fn<<<blks, thrds, smem_size, cuda::getStream(cuda::getActiveDeviceId())>>>(__VA_ARGS__)
 
@@ -23,7 +46,7 @@
 #ifndef NDEBUG
 
 #define POST_LAUNCH_CHECK() do {                        \
-        CUDA_CHECK(cudaStreamSynchronize(getStream())); \
+        CUDA_CHECK(cudaStreamSynchronize(getStream(getActiveDeviceId()))); \
     } while(0)                                          \
 
 #else
diff --git a/src/backend/cuda/kernel/regions.hpp b/src/backend/cuda/kernel/regions.hpp
index 467fa6b..9524811 100644
--- a/src/backend/cuda/kernel/regions.hpp
+++ b/src/backend/cuda/kernel/regions.hpp
@@ -443,7 +443,7 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
     thrust::device_ptr<T> wrapped_tmp = thrust::device_pointer_cast(tmp);
 
     // Sort the copy
-    thrust::sort(THRUST_STREAM, wrapped_tmp, wrapped_tmp + size);
+    THRUST_SELECT(thrust::sort, wrapped_tmp, wrapped_tmp + size);
 
     // Take the max element, this is the number of label assignments to
     // compute.
@@ -453,10 +453,10 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
 
     // Find the end of each section of values
     thrust::counting_iterator<T> search_begin(0);
-    thrust::upper_bound(THRUST_STREAM, wrapped_tmp,  wrapped_tmp  + size,
+    THRUST_SELECT(thrust::upper_bound, wrapped_tmp,  wrapped_tmp  + size,
                         search_begin, search_begin + num_bins,
                         labels.begin());
-    thrust::adjacent_difference(THRUST_STREAM, labels.begin(), labels.end(), labels.begin());
+    THRUST_SELECT(thrust::adjacent_difference, labels.begin(), labels.end(), labels.begin());
 
     // Operators for the scan
     clamp_to_one<T> clamp;
@@ -464,7 +464,7 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
 
     // Perform the scan -- this can computes the correct labels for each
     // component
-    thrust::transform_exclusive_scan(THRUST_STREAM,
+    THRUST_SELECT(thrust::transform_exclusive_scan,
                                      labels.begin(),
                                      labels.end(),
                                      labels.begin(),
diff --git a/src/backend/cuda/kernel/sort.hpp b/src/backend/cuda/kernel/sort.hpp
index 6c83903..b23e308 100644
--- a/src/backend/cuda/kernel/sort.hpp
+++ b/src/backend/cuda/kernel/sort.hpp
@@ -12,7 +12,6 @@
 #include <Param.hpp>
 #include <err_cuda.hpp>
 #include <debug_cuda.hpp>
-#include <thrust/system/cuda/detail/par.h>
 #include <thrust/device_ptr.h>
 #include <thrust/sort.h>
 
@@ -41,10 +40,9 @@ namespace cuda
                         int valOffset = valWZ + y * val.strides[1];
 
                         if(isAscending) {
-                            thrust::sort(THRUST_STREAM, val_ptr + valOffset, val_ptr + valOffset + val.dims[0]);
+                            THRUST_SELECT(thrust::sort, val_ptr + valOffset, val_ptr + valOffset + val.dims[0]);
                         } else {
-                            thrust::sort(THRUST_STREAM, val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
-                                         thrust::greater<T>());
+                            THRUST_SELECT(thrust::sort, val_ptr + valOffset, val_ptr + valOffset + val.dims[0], thrust::greater<T>());
                         }
                     }
                 }
diff --git a/src/backend/cuda/kernel/sort_by_key.hpp b/src/backend/cuda/kernel/sort_by_key.hpp
index 798869f..42a3256 100644
--- a/src/backend/cuda/kernel/sort_by_key.hpp
+++ b/src/backend/cuda/kernel/sort_by_key.hpp
@@ -12,7 +12,6 @@
 #include <Param.hpp>
 #include <err_cuda.hpp>
 #include <debug_cuda.hpp>
-#include <thrust/system/cuda/detail/par.h>
 #include <thrust/device_ptr.h>
 #include <thrust/sort.h>
 
@@ -45,11 +44,9 @@ namespace cuda
                         int ovalOffset = ovalWZ + y * oval.strides[1];
 
                         if(isAscending) {
-                            thrust::sort_by_key(THRUST_STREAM, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
-                                                oval_ptr + ovalOffset);
+                            THRUST_SELECT(thrust::sort_by_key, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0], oval_ptr + ovalOffset);
                         } else {
-                            thrust::sort_by_key(THRUST_STREAM, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
-                                                oval_ptr + ovalOffset, thrust::greater<Tk>());
+                            THRUST_SELECT(thrust::sort_by_key, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0], oval_ptr + ovalOffset, thrust::greater<Tk>());
                         }
                     }
                 }
diff --git a/src/backend/cuda/kernel/sort_index.hpp b/src/backend/cuda/kernel/sort_index.hpp
index 53b19e0..9d29914 100644
--- a/src/backend/cuda/kernel/sort_index.hpp
+++ b/src/backend/cuda/kernel/sort_index.hpp
@@ -12,7 +12,6 @@
 #include <Param.hpp>
 #include <err_cuda.hpp>
 #include <debug_cuda.hpp>
-#include <thrust/system/cuda/detail/par.h>
 #include <thrust/device_ptr.h>
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
@@ -41,13 +40,13 @@ namespace cuda
                         int valOffset = valWZ + y * val.strides[1];
                         int idxOffset = idxWZ + y * idx.strides[1];
 
-                        thrust::sequence(THRUST_STREAM, idx_ptr + idxOffset, idx_ptr + idxOffset + idx.dims[0]);
+                        THRUST_SELECT(thrust::sequence, idx_ptr + idxOffset, idx_ptr + idxOffset + idx.dims[0]);
                         if(isAscending) {
-                            thrust::sort_by_key(THRUST_STREAM,
+                            THRUST_SELECT(thrust::sort_by_key,
                                     val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
                                     idx_ptr + idxOffset);
                         } else {
-                            thrust::sort_by_key(THRUST_STREAM,
+                            THRUST_SELECT(thrust::sort_by_key,
                                         val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
                                         idx_ptr + idxOffset, thrust::greater<T>());
                         }
diff --git a/src/backend/cuda/set.cu b/src/backend/cuda/set.cu
index b8343eb..5b457e1 100644
--- a/src/backend/cuda/set.cu
+++ b/src/backend/cuda/set.cu
@@ -16,7 +16,6 @@
 #include <sort.hpp>
 #include <debug_cuda.hpp>
 
-#include <thrust/system/cuda/detail/par.h>
 #include <thrust/device_ptr.h>
 #include <thrust/sort.h>
 #include <thrust/unique.h>
@@ -35,8 +34,9 @@ namespace cuda
         thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
         thrust::device_ptr<T> out_ptr_end = out_ptr + out.dims()[0];
 
-        if(!is_sorted) thrust::sort(THRUST_STREAM, out_ptr, out_ptr_end);
-        thrust::device_ptr<T> out_ptr_last = thrust::unique(THRUST_STREAM, out_ptr, out_ptr_end);
+        if(!is_sorted) THRUST_SELECT(thrust::sort, out_ptr, out_ptr_end);
+        thrust::device_ptr<T> out_ptr_last;
+        THRUST_SELECT_OUT(out_ptr_last, thrust::unique, out_ptr, out_ptr_end);
 
         out.resetDims(dim4(thrust::distance(out_ptr, out_ptr_last)));
         return out;
@@ -66,9 +66,8 @@ namespace cuda
 
         thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
 
-        thrust::device_ptr<T> out_ptr_last = thrust::set_union(THRUST_STREAM, first_ptr, first_ptr_end,
-                                                               second_ptr, second_ptr_end,
-                                                               out_ptr);
+        thrust::device_ptr<T> out_ptr_last;
+        THRUST_SELECT_OUT(out_ptr_last, thrust::set_union, first_ptr, first_ptr_end, second_ptr, second_ptr_end, out_ptr);
 
         out.resetDims(dim4(thrust::distance(out_ptr, out_ptr_last)));
 
@@ -99,9 +98,8 @@ namespace cuda
 
         thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
 
-        thrust::device_ptr<T> out_ptr_last = thrust::set_intersection(THRUST_STREAM, first_ptr, first_ptr_end,
-                                                                      second_ptr, second_ptr_end,
-                                                                      out_ptr);
+        thrust::device_ptr<T> out_ptr_last;
+        THRUST_SELECT_OUT(out_ptr_last, thrust::set_intersection, first_ptr, first_ptr_end, second_ptr, second_ptr_end, out_ptr);
 
         out.resetDims(dim4(thrust::distance(out_ptr, out_ptr_last)));
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list