[arrayfire] 270/408: Using cudaMemsetAsync for SIFT
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:12 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.
commit 9bd1693de849d5b8b6c11e65296ff311c80fd957
Author: Peter Andreas Entschev <peter at arrayfire.com>
Date: Mon Aug 17 11:01:31 2015 -0400
Using cudaMemsetAsync for SIFT
---
src/backend/cuda/kernel/sift.hpp | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/src/backend/cuda/kernel/sift.hpp b/src/backend/cuda/kernel/sift.hpp
index 5008887..0c999a9 100644
--- a/src/backend/cuda/kernel/sift.hpp
+++ b/src/backend/cuda/kernel/sift.hpp
@@ -1119,7 +1119,8 @@ void sift(unsigned* out_feat,
const unsigned imel = dog_pyr[i].dims[0] * dog_pyr[i].dims[1];
const unsigned max_feat = ceil(imel * feature_ratio);
- CUDA_CHECK(cudaMemset(d_count, 0, sizeof(unsigned)));
+ CUDA_CHECK(cudaMemsetAsync(d_count, 0, sizeof(unsigned),
+ cuda::getStream(cuda::getActiveDeviceId())));
float* d_extrema_x = memAlloc<float>(max_feat);
float* d_extrema_y = memAlloc<float>(max_feat);
@@ -1150,7 +1151,8 @@ void sift(unsigned* out_feat,
continue;
}
- CUDA_CHECK(cudaMemset(d_count, 0, sizeof(unsigned)));
+ CUDA_CHECK(cudaMemsetAsync(d_count, 0, sizeof(unsigned),
+ cuda::getStream(cuda::getActiveDeviceId())));
unsigned interp_feat = 0;
@@ -1180,7 +1182,8 @@ void sift(unsigned* out_feat,
CUDA_CHECK(cudaMemcpy(&interp_feat, d_count, sizeof(unsigned), cudaMemcpyDeviceToHost));
interp_feat = min(interp_feat, max_feat);
- CUDA_CHECK(cudaMemset(d_count, 0, sizeof(unsigned)));
+ CUDA_CHECK(cudaMemsetAsync(d_count, 0, sizeof(unsigned),
+ cuda::getStream(cuda::getActiveDeviceId())));
if (interp_feat == 0) {
memFree(d_interp_x);
@@ -1241,7 +1244,8 @@ void sift(unsigned* out_feat,
unsigned nodup_feat = 0;
CUDA_CHECK(cudaMemcpy(&nodup_feat, d_count, sizeof(unsigned), cudaMemcpyDeviceToHost));
- CUDA_CHECK(cudaMemset(d_count, 0, sizeof(unsigned)));
+ CUDA_CHECK(cudaMemsetAsync(d_count, 0, sizeof(unsigned),
+ cuda::getStream(cuda::getActiveDeviceId())));
const unsigned max_oriented_feat = nodup_feat * 3;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list