[arrayfire] 188/408: Added stream parameter for upstream{thrust, cufft, cublas} calls
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:53 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit d46b15b143ea7d64c9d6e8bcbe39f382a98485fd
Author: pradeep <pradeep at arrayfire.com>
Date: Tue Aug 4 20:10:44 2015 -0400
Added stream parameter for upstream{thrust, cufft, cublas} calls
---
src/backend/cuda/cublasManager.cpp | 1 +
src/backend/cuda/debug_cuda.hpp | 2 ++
src/backend/cuda/fft.cpp | 3 ++-
src/backend/cuda/kernel/regions.hpp | 10 ++++++----
src/backend/cuda/kernel/sort.hpp | 5 +++--
src/backend/cuda/kernel/sort_by_key.hpp | 5 +++--
src/backend/cuda/kernel/sort_index.hpp | 13 ++++++++-----
src/backend/cuda/set.cu | 11 ++++++-----
8 files changed, 31 insertions(+), 19 deletions(-)
diff --git a/src/backend/cuda/cublasManager.cpp b/src/backend/cuda/cublasManager.cpp
index 1f34d54..ca6cfbb 100644
--- a/src/backend/cuda/cublasManager.cpp
+++ b/src/backend/cuda/cublasManager.cpp
@@ -45,6 +45,7 @@ namespace cublas {
cublasHandle() : handle(0)
{
CUBLAS_CHECK(cublasCreate(&handle));
+ CUBLAS_CHECK(cublasSetStream(handle, cuda::getStream(cuda::getActiveDeviceId())));
}
~cublasHandle()
diff --git a/src/backend/cuda/debug_cuda.hpp b/src/backend/cuda/debug_cuda.hpp
index 3ed83d5..deb7a16 100644
--- a/src/backend/cuda/debug_cuda.hpp
+++ b/src/backend/cuda/debug_cuda.hpp
@@ -11,6 +11,8 @@
#include <platform.hpp>
#include <err_cuda.hpp>
+#define THRUST_STREAM thrust::cuda::par.on(cuda::getStream(cuda::getActiveDeviceId()))
+
#define CUDA_LAUNCH_SMEM(fn, blks, thrds, smem_size, ...) \
fn<<<blks, thrds, smem_size, cuda::getStream(cuda::getActiveDeviceId())>>>(__VA_ARGS__)
diff --git a/src/backend/cuda/fft.cpp b/src/backend/cuda/fft.cpp
index 31d1f7c..210fa1c 100644
--- a/src/backend/cuda/fft.cpp
+++ b/src/backend/cuda/fft.cpp
@@ -13,7 +13,7 @@
#include <Array.hpp>
#include <copy.hpp>
#include <fft.hpp>
-#include <err_cuda.hpp>
+#include <debug_cuda.hpp>
#include <err_cufft.hpp>
#include <cufft.h>
#include <math.hpp>
@@ -123,6 +123,7 @@ void find_cufft_plan(cufftHandle &plan, int rank, int *n,
}
plan = temp;
+ cufftSetStream(plan, cuda::getStream(cuda::getActiveDeviceId()));
planner.mHandles[slot_index] = temp;
planner.mKeys[slot_index] = key_string;
planner.mAvailSlotIndex = (slot_index + 1)%cuFFTPlanner::MAX_PLAN_CACHE;
diff --git a/src/backend/cuda/kernel/regions.hpp b/src/backend/cuda/kernel/regions.hpp
index 4693986..467fa6b 100644
--- a/src/backend/cuda/kernel/regions.hpp
+++ b/src/backend/cuda/kernel/regions.hpp
@@ -15,6 +15,7 @@
#include <stdio.h>
#include <memory.hpp>
+#include <thrust/system/cuda/detail/par.h>
#include <thrust/adjacent_difference.h>
#include <thrust/binary_search.h>
#include <thrust/device_vector.h>
@@ -442,7 +443,7 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
thrust::device_ptr<T> wrapped_tmp = thrust::device_pointer_cast(tmp);
// Sort the copy
- thrust::sort(wrapped_tmp, wrapped_tmp + size);
+ thrust::sort(THRUST_STREAM, wrapped_tmp, wrapped_tmp + size);
// Take the max element, this is the number of label assignments to
// compute.
@@ -452,10 +453,10 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
// Find the end of each section of values
thrust::counting_iterator<T> search_begin(0);
- thrust::upper_bound(wrapped_tmp, wrapped_tmp + size,
+ thrust::upper_bound(THRUST_STREAM, wrapped_tmp, wrapped_tmp + size,
search_begin, search_begin + num_bins,
labels.begin());
- thrust::adjacent_difference(labels.begin(), labels.end(), labels.begin());
+ thrust::adjacent_difference(THRUST_STREAM, labels.begin(), labels.end(), labels.begin());
// Operators for the scan
clamp_to_one<T> clamp;
@@ -463,7 +464,8 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
// Perform the scan -- this can computes the correct labels for each
// component
- thrust::transform_exclusive_scan(labels.begin(),
+ thrust::transform_exclusive_scan(THRUST_STREAM,
+ labels.begin(),
labels.end(),
labels.begin(),
clamp,
diff --git a/src/backend/cuda/kernel/sort.hpp b/src/backend/cuda/kernel/sort.hpp
index 34256d7..6c83903 100644
--- a/src/backend/cuda/kernel/sort.hpp
+++ b/src/backend/cuda/kernel/sort.hpp
@@ -12,6 +12,7 @@
#include <Param.hpp>
#include <err_cuda.hpp>
#include <debug_cuda.hpp>
+#include <thrust/system/cuda/detail/par.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
@@ -40,9 +41,9 @@ namespace cuda
int valOffset = valWZ + y * val.strides[1];
if(isAscending) {
- thrust::sort(val_ptr + valOffset, val_ptr + valOffset + val.dims[0]);
+ thrust::sort(THRUST_STREAM, val_ptr + valOffset, val_ptr + valOffset + val.dims[0]);
} else {
- thrust::sort(val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
+ thrust::sort(THRUST_STREAM, val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
thrust::greater<T>());
}
}
diff --git a/src/backend/cuda/kernel/sort_by_key.hpp b/src/backend/cuda/kernel/sort_by_key.hpp
index 7c63c28..798869f 100644
--- a/src/backend/cuda/kernel/sort_by_key.hpp
+++ b/src/backend/cuda/kernel/sort_by_key.hpp
@@ -12,6 +12,7 @@
#include <Param.hpp>
#include <err_cuda.hpp>
#include <debug_cuda.hpp>
+#include <thrust/system/cuda/detail/par.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
@@ -44,10 +45,10 @@ namespace cuda
int ovalOffset = ovalWZ + y * oval.strides[1];
if(isAscending) {
- thrust::sort_by_key(okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
+ thrust::sort_by_key(THRUST_STREAM, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
oval_ptr + ovalOffset);
} else {
- thrust::sort_by_key(okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
+ thrust::sort_by_key(THRUST_STREAM, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
oval_ptr + ovalOffset, thrust::greater<Tk>());
}
}
diff --git a/src/backend/cuda/kernel/sort_index.hpp b/src/backend/cuda/kernel/sort_index.hpp
index df1feba..53b19e0 100644
--- a/src/backend/cuda/kernel/sort_index.hpp
+++ b/src/backend/cuda/kernel/sort_index.hpp
@@ -12,6 +12,7 @@
#include <Param.hpp>
#include <err_cuda.hpp>
#include <debug_cuda.hpp>
+#include <thrust/system/cuda/detail/par.h>
#include <thrust/device_ptr.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>
@@ -40,13 +41,15 @@ namespace cuda
int valOffset = valWZ + y * val.strides[1];
int idxOffset = idxWZ + y * idx.strides[1];
- thrust::sequence(idx_ptr + idxOffset, idx_ptr + idxOffset + idx.dims[0]);
+ thrust::sequence(THRUST_STREAM, idx_ptr + idxOffset, idx_ptr + idxOffset + idx.dims[0]);
if(isAscending) {
- thrust::sort_by_key(val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
- idx_ptr + idxOffset);
+ thrust::sort_by_key(THRUST_STREAM,
+ val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
+ idx_ptr + idxOffset);
} else {
- thrust::sort_by_key(val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
- idx_ptr + idxOffset, thrust::greater<T>());
+ thrust::sort_by_key(THRUST_STREAM,
+ val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
+ idx_ptr + idxOffset, thrust::greater<T>());
}
}
}
diff --git a/src/backend/cuda/set.cu b/src/backend/cuda/set.cu
index b164165..b8343eb 100644
--- a/src/backend/cuda/set.cu
+++ b/src/backend/cuda/set.cu
@@ -14,8 +14,9 @@
#include <set.hpp>
#include <copy.hpp>
#include <sort.hpp>
-#include <err_cuda.hpp>
+#include <debug_cuda.hpp>
+#include <thrust/system/cuda/detail/par.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include <thrust/unique.h>
@@ -34,8 +35,8 @@ namespace cuda
thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
thrust::device_ptr<T> out_ptr_end = out_ptr + out.dims()[0];
- if(!is_sorted) thrust::sort(out_ptr, out_ptr_end);
- thrust::device_ptr<T> out_ptr_last = thrust::unique(out_ptr, out_ptr_end);
+ if(!is_sorted) thrust::sort(THRUST_STREAM, out_ptr, out_ptr_end);
+ thrust::device_ptr<T> out_ptr_last = thrust::unique(THRUST_STREAM, out_ptr, out_ptr_end);
out.resetDims(dim4(thrust::distance(out_ptr, out_ptr_last)));
return out;
@@ -65,7 +66,7 @@ namespace cuda
thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
- thrust::device_ptr<T> out_ptr_last = thrust::set_union(first_ptr, first_ptr_end,
+ thrust::device_ptr<T> out_ptr_last = thrust::set_union(THRUST_STREAM, first_ptr, first_ptr_end,
second_ptr, second_ptr_end,
out_ptr);
@@ -98,7 +99,7 @@ namespace cuda
thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
- thrust::device_ptr<T> out_ptr_last = thrust::set_intersection(first_ptr, first_ptr_end,
+ thrust::device_ptr<T> out_ptr_last = thrust::set_intersection(THRUST_STREAM, first_ptr, first_ptr_end,
second_ptr, second_ptr_end,
out_ptr);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list