[compute] 45/49: Remove redundant kernel compilation from binary_find
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Fri Dec 18 17:58:21 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository compute.
commit 36418e4cf8f79048bef8f60fef134a9e2127d34a
Author: Jakub Szuppe <j.szuppe at gmail.com>
Date: Sat Sep 19 17:00:34 2015 +0200
Remove redundant kernel compilation from binary_find
After this binary_find kernel is compiled only once during binary_find algorithm
execution instead of in every iteration of the while loop.
---
.../boost/compute/algorithm/detail/binary_find.hpp | 48 ++++++++++------------
perf/perf_binary_find.cpp | 4 ++
2 files changed, 26 insertions(+), 26 deletions(-)
diff --git a/include/boost/compute/algorithm/detail/binary_find.hpp b/include/boost/compute/algorithm/detail/binary_find.hpp
index 53dc413..79bdb8b 100644
--- a/include/boost/compute/algorithm/detail/binary_find.hpp
+++ b/include/boost/compute/algorithm/detail/binary_find.hpp
@@ -26,45 +26,32 @@ namespace detail{
///
/// Subclass of meta_kernel to perform single step in binary find.
///
+template<class InputIterator, class UnaryPredicate>
class binary_find_kernel : public meta_kernel
{
public:
- binary_find_kernel(size_t threads) : meta_kernel("binary_find")
- {
- m_threads = threads;
- }
-
- template<class InputIterator, class UnaryPredicate>
- void set_range(InputIterator first,
- InputIterator last,
- UnaryPredicate predicate)
+ binary_find_kernel(InputIterator first,
+ InputIterator last,
+ UnaryPredicate predicate)
+ : meta_kernel("binary_find")
{
typedef typename std::iterator_traits<InputIterator>::value_type value_type;
- int block = (iterator_range_size(first, last)-1)/(m_threads-1);
m_index_arg = add_arg<uint_ *>(memory_object::global_memory, "index");
+ m_block_arg = add_arg<uint_>("block");
atomic_min<uint_> atomic_min_uint;
*this <<
- "uint i = get_global_id(0) * " << block << ";\n" <<
+ "uint i = get_global_id(0) * block;\n" <<
decl<value_type>("value") << "=" << first[var<uint_>("i")] << ";\n" <<
"if(" << predicate(var<value_type>("value")) << ") {\n" <<
atomic_min_uint(var<uint_ *>("index"), var<uint_>("i")) << ";\n" <<
"}\n";
-
}
- event exec(command_queue &queue, scalar<uint_> index)
- {
- set_arg(m_index_arg, index.get_buffer());
-
- return exec_1d(queue, 0, m_threads);
- }
-
-private:
- size_t m_threads;
size_t m_index_arg;
+ size_t m_block_arg;
};
///
@@ -98,14 +85,23 @@ inline InputIterator binary_find(InputIterator first,
InputIterator search_first = first;
InputIterator search_last = last;
- while(count > find_if_limit) {
+ scalar<uint_> index(queue.get_context());
- scalar<uint_> index(queue.get_context());
+ // construct and compile binary_find kernel
+ binary_find_kernel<InputIterator, UnaryPredicate>
+ binary_find_kernel(search_first, search_last, predicate);
+ ::boost::compute::kernel kernel = binary_find_kernel.compile(queue.get_context());
+
+ // set buffer for index
+ kernel.set_arg(binary_find_kernel.m_index_arg, index.get_buffer());
+
+ while(count > find_if_limit) {
index.write(static_cast<uint_>(count), queue);
- binary_find_kernel kernel(threads);
- kernel.set_range(search_first, search_last, predicate);
- kernel.exec(queue, index);
+ // set block and run binary_find kernel
+ uint_ block = (count - 1)/(threads - 1);
+ kernel.set_arg(binary_find_kernel.m_block_arg, block);
+ queue.enqueue_1d_range_kernel(kernel, 0, threads, 0);
size_t i = index.read(queue);
diff --git a/perf/perf_binary_find.cpp b/perf/perf_binary_find.cpp
index 37ad1fb..ee7c463 100644
--- a/perf/perf_binary_find.cpp
+++ b/perf/perf_binary_find.cpp
@@ -52,6 +52,10 @@ int main(int argc, char *argv[])
device_vector.begin(), device_vector.end(), _1 < 20, queue
);
+ // just to be sure everything is finished before measuring execution time
+ // of binary_find algorithm
+ queue.finish();
+
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/compute.git
More information about the debian-science-commits
mailing list