[arrayfire] 188/408: Added stream parameter for upstream{thrust, cufft, cublas} calls

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:53 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit d46b15b143ea7d64c9d6e8bcbe39f382a98485fd
Author: pradeep <pradeep at arrayfire.com>
Date:   Tue Aug 4 20:10:44 2015 -0400

    Added stream parameter for upstream{thrust, cufft, cublas} calls
---
 src/backend/cuda/cublasManager.cpp      |  1 +
 src/backend/cuda/debug_cuda.hpp         |  2 ++
 src/backend/cuda/fft.cpp                |  3 ++-
 src/backend/cuda/kernel/regions.hpp     | 10 ++++++----
 src/backend/cuda/kernel/sort.hpp        |  5 +++--
 src/backend/cuda/kernel/sort_by_key.hpp |  5 +++--
 src/backend/cuda/kernel/sort_index.hpp  | 13 ++++++++-----
 src/backend/cuda/set.cu                 | 11 ++++++-----
 8 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/backend/cuda/cublasManager.cpp b/src/backend/cuda/cublasManager.cpp
index 1f34d54..ca6cfbb 100644
--- a/src/backend/cuda/cublasManager.cpp
+++ b/src/backend/cuda/cublasManager.cpp
@@ -45,6 +45,7 @@ namespace cublas {
         cublasHandle()  : handle(0)
         {
             CUBLAS_CHECK(cublasCreate(&handle));
+            CUBLAS_CHECK(cublasSetStream(handle, cuda::getStream(cuda::getActiveDeviceId())));
         }
 
         ~cublasHandle()
diff --git a/src/backend/cuda/debug_cuda.hpp b/src/backend/cuda/debug_cuda.hpp
index 3ed83d5..deb7a16 100644
--- a/src/backend/cuda/debug_cuda.hpp
+++ b/src/backend/cuda/debug_cuda.hpp
@@ -11,6 +11,8 @@
 #include <platform.hpp>
 #include <err_cuda.hpp>
 
+#define THRUST_STREAM thrust::cuda::par.on(cuda::getStream(cuda::getActiveDeviceId()))
+
 #define CUDA_LAUNCH_SMEM(fn, blks, thrds, smem_size, ...) \
 	fn<<<blks, thrds, smem_size, cuda::getStream(cuda::getActiveDeviceId())>>>(__VA_ARGS__)
 
diff --git a/src/backend/cuda/fft.cpp b/src/backend/cuda/fft.cpp
index 31d1f7c..210fa1c 100644
--- a/src/backend/cuda/fft.cpp
+++ b/src/backend/cuda/fft.cpp
@@ -13,7 +13,7 @@
 #include <Array.hpp>
 #include <copy.hpp>
 #include <fft.hpp>
-#include <err_cuda.hpp>
+#include <debug_cuda.hpp>
 #include <err_cufft.hpp>
 #include <cufft.h>
 #include <math.hpp>
@@ -123,6 +123,7 @@ void find_cufft_plan(cufftHandle &plan, int rank, int *n,
     }
 
     plan = temp;
+    cufftSetStream(plan, cuda::getStream(cuda::getActiveDeviceId()));
     planner.mHandles[slot_index] = temp;
     planner.mKeys[slot_index] = key_string;
     planner.mAvailSlotIndex = (slot_index + 1)%cuFFTPlanner::MAX_PLAN_CACHE;
diff --git a/src/backend/cuda/kernel/regions.hpp b/src/backend/cuda/kernel/regions.hpp
index 4693986..467fa6b 100644
--- a/src/backend/cuda/kernel/regions.hpp
+++ b/src/backend/cuda/kernel/regions.hpp
@@ -15,6 +15,7 @@
 #include <stdio.h>
 #include <memory.hpp>
 
+#include <thrust/system/cuda/detail/par.h>
 #include <thrust/adjacent_difference.h>
 #include <thrust/binary_search.h>
 #include <thrust/device_vector.h>
@@ -442,7 +443,7 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
     thrust::device_ptr<T> wrapped_tmp = thrust::device_pointer_cast(tmp);
 
     // Sort the copy
-    thrust::sort(wrapped_tmp, wrapped_tmp + size);
+    thrust::sort(THRUST_STREAM, wrapped_tmp, wrapped_tmp + size);
 
     // Take the max element, this is the number of label assignments to
     // compute.
@@ -452,10 +453,10 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
 
     // Find the end of each section of values
     thrust::counting_iterator<T> search_begin(0);
-    thrust::upper_bound(wrapped_tmp,  wrapped_tmp  + size,
+    thrust::upper_bound(THRUST_STREAM, wrapped_tmp,  wrapped_tmp  + size,
                         search_begin, search_begin + num_bins,
                         labels.begin());
-    thrust::adjacent_difference(labels.begin(), labels.end(), labels.begin());
+    thrust::adjacent_difference(THRUST_STREAM, labels.begin(), labels.end(), labels.begin());
 
     // Operators for the scan
     clamp_to_one<T> clamp;
@@ -463,7 +464,8 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
 
     // Perform the scan -- this can computes the correct labels for each
     // component
-    thrust::transform_exclusive_scan(labels.begin(),
+    thrust::transform_exclusive_scan(THRUST_STREAM,
+                                     labels.begin(),
                                      labels.end(),
                                      labels.begin(),
                                      clamp,
diff --git a/src/backend/cuda/kernel/sort.hpp b/src/backend/cuda/kernel/sort.hpp
index 34256d7..6c83903 100644
--- a/src/backend/cuda/kernel/sort.hpp
+++ b/src/backend/cuda/kernel/sort.hpp
@@ -12,6 +12,7 @@
 #include <Param.hpp>
 #include <err_cuda.hpp>
 #include <debug_cuda.hpp>
+#include <thrust/system/cuda/detail/par.h>
 #include <thrust/device_ptr.h>
 #include <thrust/sort.h>
 
@@ -40,9 +41,9 @@ namespace cuda
                         int valOffset = valWZ + y * val.strides[1];
 
                         if(isAscending) {
-                            thrust::sort(val_ptr + valOffset, val_ptr + valOffset + val.dims[0]);
+                            thrust::sort(THRUST_STREAM, val_ptr + valOffset, val_ptr + valOffset + val.dims[0]);
                         } else {
-                            thrust::sort(val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
+                            thrust::sort(THRUST_STREAM, val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
                                          thrust::greater<T>());
                         }
                     }
diff --git a/src/backend/cuda/kernel/sort_by_key.hpp b/src/backend/cuda/kernel/sort_by_key.hpp
index 7c63c28..798869f 100644
--- a/src/backend/cuda/kernel/sort_by_key.hpp
+++ b/src/backend/cuda/kernel/sort_by_key.hpp
@@ -12,6 +12,7 @@
 #include <Param.hpp>
 #include <err_cuda.hpp>
 #include <debug_cuda.hpp>
+#include <thrust/system/cuda/detail/par.h>
 #include <thrust/device_ptr.h>
 #include <thrust/sort.h>
 
@@ -44,10 +45,10 @@ namespace cuda
                         int ovalOffset = ovalWZ + y * oval.strides[1];
 
                         if(isAscending) {
-                            thrust::sort_by_key(okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
+                            thrust::sort_by_key(THRUST_STREAM, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
                                                 oval_ptr + ovalOffset);
                         } else {
-                            thrust::sort_by_key(okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
+                            thrust::sort_by_key(THRUST_STREAM, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
                                                 oval_ptr + ovalOffset, thrust::greater<Tk>());
                         }
                     }
diff --git a/src/backend/cuda/kernel/sort_index.hpp b/src/backend/cuda/kernel/sort_index.hpp
index df1feba..53b19e0 100644
--- a/src/backend/cuda/kernel/sort_index.hpp
+++ b/src/backend/cuda/kernel/sort_index.hpp
@@ -12,6 +12,7 @@
 #include <Param.hpp>
 #include <err_cuda.hpp>
 #include <debug_cuda.hpp>
+#include <thrust/system/cuda/detail/par.h>
 #include <thrust/device_ptr.h>
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
@@ -40,13 +41,15 @@ namespace cuda
                         int valOffset = valWZ + y * val.strides[1];
                         int idxOffset = idxWZ + y * idx.strides[1];
 
-                        thrust::sequence(idx_ptr + idxOffset, idx_ptr + idxOffset + idx.dims[0]);
+                        thrust::sequence(THRUST_STREAM, idx_ptr + idxOffset, idx_ptr + idxOffset + idx.dims[0]);
                         if(isAscending) {
-                            thrust::sort_by_key(val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
-                                                idx_ptr + idxOffset);
+                            thrust::sort_by_key(THRUST_STREAM,
+                                    val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
+                                    idx_ptr + idxOffset);
                         } else {
-                            thrust::sort_by_key(val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
-                                                idx_ptr + idxOffset, thrust::greater<T>());
+                            thrust::sort_by_key(THRUST_STREAM,
+                                        val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
+                                        idx_ptr + idxOffset, thrust::greater<T>());
                         }
                     }
                 }
diff --git a/src/backend/cuda/set.cu b/src/backend/cuda/set.cu
index b164165..b8343eb 100644
--- a/src/backend/cuda/set.cu
+++ b/src/backend/cuda/set.cu
@@ -14,8 +14,9 @@
 #include <set.hpp>
 #include <copy.hpp>
 #include <sort.hpp>
-#include <err_cuda.hpp>
+#include <debug_cuda.hpp>
 
+#include <thrust/system/cuda/detail/par.h>
 #include <thrust/device_ptr.h>
 #include <thrust/sort.h>
 #include <thrust/unique.h>
@@ -34,8 +35,8 @@ namespace cuda
         thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
         thrust::device_ptr<T> out_ptr_end = out_ptr + out.dims()[0];
 
-        if(!is_sorted) thrust::sort(out_ptr, out_ptr_end);
-        thrust::device_ptr<T> out_ptr_last = thrust::unique(out_ptr, out_ptr_end);
+        if(!is_sorted) thrust::sort(THRUST_STREAM, out_ptr, out_ptr_end);
+        thrust::device_ptr<T> out_ptr_last = thrust::unique(THRUST_STREAM, out_ptr, out_ptr_end);
 
         out.resetDims(dim4(thrust::distance(out_ptr, out_ptr_last)));
         return out;
@@ -65,7 +66,7 @@ namespace cuda
 
         thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
 
-        thrust::device_ptr<T> out_ptr_last = thrust::set_union(first_ptr, first_ptr_end,
+        thrust::device_ptr<T> out_ptr_last = thrust::set_union(THRUST_STREAM, first_ptr, first_ptr_end,
                                                                second_ptr, second_ptr_end,
                                                                out_ptr);
 
@@ -98,7 +99,7 @@ namespace cuda
 
         thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
 
-        thrust::device_ptr<T> out_ptr_last = thrust::set_intersection(first_ptr, first_ptr_end,
+        thrust::device_ptr<T> out_ptr_last = thrust::set_intersection(THRUST_STREAM, first_ptr, first_ptr_end,
                                                                       second_ptr, second_ptr_end,
                                                                       out_ptr);
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list