[arrayfire] 191/408: thrust fixes for cuda stream selection on cuda < 7.0
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:11:54 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 60c2be968423f896402df726bd3fc17b7f7a2259
Author: pradeep <pradeep at arrayfire.com>
Date: Wed Aug 5 13:20:40 2015 -0400
thrust fixes for cuda stream selection on cuda < 7.0
---
src/backend/cuda/debug_cuda.hpp | 25 ++++++++++++++++++++++++-
src/backend/cuda/kernel/regions.hpp | 8 ++++----
src/backend/cuda/kernel/sort.hpp | 6 ++----
src/backend/cuda/kernel/sort_by_key.hpp | 7 ++-----
src/backend/cuda/kernel/sort_index.hpp | 7 +++----
src/backend/cuda/set.cu | 16 +++++++---------
6 files changed, 42 insertions(+), 27 deletions(-)
diff --git a/src/backend/cuda/debug_cuda.hpp b/src/backend/cuda/debug_cuda.hpp
index deb7a16..813456b 100644
--- a/src/backend/cuda/debug_cuda.hpp
+++ b/src/backend/cuda/debug_cuda.hpp
@@ -10,9 +10,32 @@
#pragma once
#include <platform.hpp>
#include <err_cuda.hpp>
+#include <thrust/version.h>
+#include <thrust/system/cuda/detail/par.h>
#define THRUST_STREAM thrust::cuda::par.on(cuda::getStream(cuda::getActiveDeviceId()))
+#if THRUST_MAJOR_VERSION>=1 && THRUST_MINOR_VERSION>=8
+
+#define THRUST_SELECT(fn, ...) fn(THRUST_STREAM, __VA_ARGS__)
+#define THRUST_SELECT_OUT(res, fn, ...) res = fn(THRUST_STREAM, __VA_ARGS__)
+
+#else
+
+#define THRUST_SELECT(fn, ...) \
+ do { \
+ CUDA_CHECK(cudaStreamSynchronize(cuda::getStream(cuda::getActiveDeviceId()))); \
+ fn(__VA_ARGS__); \
+ } while(0)
+
+#define THRUST_SELECT_OUT(res, fn, ...) \
+ do { \
+ CUDA_CHECK(cudaStreamSynchronize(cuda::getStream(cuda::getActiveDeviceId()))); \
+ res = fn(__VA_ARGS__); \
+ } while(0)
+
+#endif
+
#define CUDA_LAUNCH_SMEM(fn, blks, thrds, smem_size, ...) \
fn<<<blks, thrds, smem_size, cuda::getStream(cuda::getActiveDeviceId())>>>(__VA_ARGS__)
@@ -23,7 +46,7 @@
#ifndef NDEBUG
#define POST_LAUNCH_CHECK() do { \
- CUDA_CHECK(cudaStreamSynchronize(getStream())); \
+ CUDA_CHECK(cudaStreamSynchronize(getStream(getActiveDeviceId()))); \
} while(0) \
#else
diff --git a/src/backend/cuda/kernel/regions.hpp b/src/backend/cuda/kernel/regions.hpp
index 467fa6b..9524811 100644
--- a/src/backend/cuda/kernel/regions.hpp
+++ b/src/backend/cuda/kernel/regions.hpp
@@ -443,7 +443,7 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
thrust::device_ptr<T> wrapped_tmp = thrust::device_pointer_cast(tmp);
// Sort the copy
- thrust::sort(THRUST_STREAM, wrapped_tmp, wrapped_tmp + size);
+ THRUST_SELECT(thrust::sort, wrapped_tmp, wrapped_tmp + size);
// Take the max element, this is the number of label assignments to
// compute.
@@ -453,10 +453,10 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
// Find the end of each section of values
thrust::counting_iterator<T> search_begin(0);
- thrust::upper_bound(THRUST_STREAM, wrapped_tmp, wrapped_tmp + size,
+ THRUST_SELECT(thrust::upper_bound, wrapped_tmp, wrapped_tmp + size,
search_begin, search_begin + num_bins,
labels.begin());
- thrust::adjacent_difference(THRUST_STREAM, labels.begin(), labels.end(), labels.begin());
+ THRUST_SELECT(thrust::adjacent_difference, labels.begin(), labels.end(), labels.begin());
// Operators for the scan
clamp_to_one<T> clamp;
@@ -464,7 +464,7 @@ void regions(cuda::Param<T> out, cuda::CParam<char> in, cudaTextureObject_t tex)
// Perform the scan -- this can computes the correct labels for each
// component
- thrust::transform_exclusive_scan(THRUST_STREAM,
+ THRUST_SELECT(thrust::transform_exclusive_scan,
labels.begin(),
labels.end(),
labels.begin(),
diff --git a/src/backend/cuda/kernel/sort.hpp b/src/backend/cuda/kernel/sort.hpp
index 6c83903..b23e308 100644
--- a/src/backend/cuda/kernel/sort.hpp
+++ b/src/backend/cuda/kernel/sort.hpp
@@ -12,7 +12,6 @@
#include <Param.hpp>
#include <err_cuda.hpp>
#include <debug_cuda.hpp>
-#include <thrust/system/cuda/detail/par.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
@@ -41,10 +40,9 @@ namespace cuda
int valOffset = valWZ + y * val.strides[1];
if(isAscending) {
- thrust::sort(THRUST_STREAM, val_ptr + valOffset, val_ptr + valOffset + val.dims[0]);
+ THRUST_SELECT(thrust::sort, val_ptr + valOffset, val_ptr + valOffset + val.dims[0]);
} else {
- thrust::sort(THRUST_STREAM, val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
- thrust::greater<T>());
+ THRUST_SELECT(thrust::sort, val_ptr + valOffset, val_ptr + valOffset + val.dims[0], thrust::greater<T>());
}
}
}
diff --git a/src/backend/cuda/kernel/sort_by_key.hpp b/src/backend/cuda/kernel/sort_by_key.hpp
index 798869f..42a3256 100644
--- a/src/backend/cuda/kernel/sort_by_key.hpp
+++ b/src/backend/cuda/kernel/sort_by_key.hpp
@@ -12,7 +12,6 @@
#include <Param.hpp>
#include <err_cuda.hpp>
#include <debug_cuda.hpp>
-#include <thrust/system/cuda/detail/par.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
@@ -45,11 +44,9 @@ namespace cuda
int ovalOffset = ovalWZ + y * oval.strides[1];
if(isAscending) {
- thrust::sort_by_key(THRUST_STREAM, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
- oval_ptr + ovalOffset);
+ THRUST_SELECT(thrust::sort_by_key, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0], oval_ptr + ovalOffset);
} else {
- thrust::sort_by_key(THRUST_STREAM, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0],
- oval_ptr + ovalOffset, thrust::greater<Tk>());
+ THRUST_SELECT(thrust::sort_by_key, okey_ptr + okeyOffset, okey_ptr + okeyOffset + okey.dims[0], oval_ptr + ovalOffset, thrust::greater<Tk>());
}
}
}
diff --git a/src/backend/cuda/kernel/sort_index.hpp b/src/backend/cuda/kernel/sort_index.hpp
index 53b19e0..9d29914 100644
--- a/src/backend/cuda/kernel/sort_index.hpp
+++ b/src/backend/cuda/kernel/sort_index.hpp
@@ -12,7 +12,6 @@
#include <Param.hpp>
#include <err_cuda.hpp>
#include <debug_cuda.hpp>
-#include <thrust/system/cuda/detail/par.h>
#include <thrust/device_ptr.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>
@@ -41,13 +40,13 @@ namespace cuda
int valOffset = valWZ + y * val.strides[1];
int idxOffset = idxWZ + y * idx.strides[1];
- thrust::sequence(THRUST_STREAM, idx_ptr + idxOffset, idx_ptr + idxOffset + idx.dims[0]);
+ THRUST_SELECT(thrust::sequence, idx_ptr + idxOffset, idx_ptr + idxOffset + idx.dims[0]);
if(isAscending) {
- thrust::sort_by_key(THRUST_STREAM,
+ THRUST_SELECT(thrust::sort_by_key,
val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
idx_ptr + idxOffset);
} else {
- thrust::sort_by_key(THRUST_STREAM,
+ THRUST_SELECT(thrust::sort_by_key,
val_ptr + valOffset, val_ptr + valOffset + val.dims[0],
idx_ptr + idxOffset, thrust::greater<T>());
}
diff --git a/src/backend/cuda/set.cu b/src/backend/cuda/set.cu
index b8343eb..5b457e1 100644
--- a/src/backend/cuda/set.cu
+++ b/src/backend/cuda/set.cu
@@ -16,7 +16,6 @@
#include <sort.hpp>
#include <debug_cuda.hpp>
-#include <thrust/system/cuda/detail/par.h>
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include <thrust/unique.h>
@@ -35,8 +34,9 @@ namespace cuda
thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
thrust::device_ptr<T> out_ptr_end = out_ptr + out.dims()[0];
- if(!is_sorted) thrust::sort(THRUST_STREAM, out_ptr, out_ptr_end);
- thrust::device_ptr<T> out_ptr_last = thrust::unique(THRUST_STREAM, out_ptr, out_ptr_end);
+ if(!is_sorted) THRUST_SELECT(thrust::sort, out_ptr, out_ptr_end);
+ thrust::device_ptr<T> out_ptr_last;
+ THRUST_SELECT_OUT(out_ptr_last, thrust::unique, out_ptr, out_ptr_end);
out.resetDims(dim4(thrust::distance(out_ptr, out_ptr_last)));
return out;
@@ -66,9 +66,8 @@ namespace cuda
thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
- thrust::device_ptr<T> out_ptr_last = thrust::set_union(THRUST_STREAM, first_ptr, first_ptr_end,
- second_ptr, second_ptr_end,
- out_ptr);
+ thrust::device_ptr<T> out_ptr_last;
+ THRUST_SELECT_OUT(out_ptr_last, thrust::set_union, first_ptr, first_ptr_end, second_ptr, second_ptr_end, out_ptr);
out.resetDims(dim4(thrust::distance(out_ptr, out_ptr_last)));
@@ -99,9 +98,8 @@ namespace cuda
thrust::device_ptr<T> out_ptr = thrust::device_pointer_cast<T>(out.get());
- thrust::device_ptr<T> out_ptr_last = thrust::set_intersection(THRUST_STREAM, first_ptr, first_ptr_end,
- second_ptr, second_ptr_end,
- out_ptr);
+ thrust::device_ptr<T> out_ptr_last;
+ THRUST_SELECT_OUT(out_ptr_last, thrust::set_intersection, first_ptr, first_ptr_end, second_ptr, second_ptr_end, out_ptr);
out.resetDims(dim4(thrust::distance(out_ptr, out_ptr_last)));
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list