[compute] 45/49: Remove redundant kernel compilation from binary_find

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Fri Dec 18 17:58:21 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository compute.

commit 36418e4cf8f79048bef8f60fef134a9e2127d34a
Author: Jakub Szuppe <j.szuppe at gmail.com>
Date:   Sat Sep 19 17:00:34 2015 +0200

    Remove redundant kernel compilation from binary_find
    
    After this binary_find kernel is compiled only once during binary_find algorithm
    execution instead of in every iteration of the while loop.
---
 .../boost/compute/algorithm/detail/binary_find.hpp | 48 ++++++++++------------
 perf/perf_binary_find.cpp                          |  4 ++
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/boost/compute/algorithm/detail/binary_find.hpp b/include/boost/compute/algorithm/detail/binary_find.hpp
index 53dc413..79bdb8b 100644
--- a/include/boost/compute/algorithm/detail/binary_find.hpp
+++ b/include/boost/compute/algorithm/detail/binary_find.hpp
@@ -26,45 +26,32 @@ namespace detail{
 ///
 /// Subclass of meta_kernel to perform single step in binary find.
 ///
+template<class InputIterator, class UnaryPredicate>
 class binary_find_kernel : public meta_kernel
 {
 public:
-    binary_find_kernel(size_t threads) : meta_kernel("binary_find")
-    {
-        m_threads = threads;
-    }
-
-    template<class InputIterator, class UnaryPredicate>
-    void set_range(InputIterator first,
-                   InputIterator last,
-                   UnaryPredicate predicate)
+    binary_find_kernel(InputIterator first,
+                       InputIterator last,
+                       UnaryPredicate predicate)
+        : meta_kernel("binary_find")
     {
         typedef typename std::iterator_traits<InputIterator>::value_type value_type;
-        int block = (iterator_range_size(first, last)-1)/(m_threads-1);
 
         m_index_arg = add_arg<uint_ *>(memory_object::global_memory, "index");
+        m_block_arg = add_arg<uint_>("block");
 
         atomic_min<uint_> atomic_min_uint;
 
         *this <<
-            "uint i = get_global_id(0) * " << block << ";\n" <<
+            "uint i = get_global_id(0) * block;\n" <<
             decl<value_type>("value") << "=" << first[var<uint_>("i")] << ";\n" <<
             "if(" << predicate(var<value_type>("value")) << ") {\n" <<
                 atomic_min_uint(var<uint_ *>("index"), var<uint_>("i")) << ";\n" <<
             "}\n";
-
     }
 
-    event exec(command_queue &queue, scalar<uint_> index)
-    {
-        set_arg(m_index_arg, index.get_buffer());
-
-        return exec_1d(queue, 0, m_threads);
-    }
-
-private:
-    size_t m_threads;
     size_t m_index_arg;
+    size_t m_block_arg;
 };
 
 ///
@@ -98,14 +85,23 @@ inline InputIterator binary_find(InputIterator first,
     InputIterator search_first = first;
     InputIterator search_last = last;
 
-    while(count > find_if_limit) {
+    scalar<uint_> index(queue.get_context());
 
-        scalar<uint_> index(queue.get_context());
+    // construct and compile binary_find kernel
+    binary_find_kernel<InputIterator, UnaryPredicate>
+        binary_find_kernel(search_first, search_last, predicate);
+    ::boost::compute::kernel kernel = binary_find_kernel.compile(queue.get_context());
+
+    // set buffer for index
+    kernel.set_arg(binary_find_kernel.m_index_arg, index.get_buffer());
+
+    while(count > find_if_limit) {
         index.write(static_cast<uint_>(count), queue);
 
-        binary_find_kernel kernel(threads);
-        kernel.set_range(search_first, search_last, predicate);
-        kernel.exec(queue, index);
+        // set block and run binary_find kernel
+        uint_ block = (count - 1)/(threads - 1);
+        kernel.set_arg(binary_find_kernel.m_block_arg, block);
+        queue.enqueue_1d_range_kernel(kernel, 0, threads, 0);
 
         size_t i = index.read(queue);
 
diff --git a/perf/perf_binary_find.cpp b/perf/perf_binary_find.cpp
index 37ad1fb..ee7c463 100644
--- a/perf/perf_binary_find.cpp
+++ b/perf/perf_binary_find.cpp
@@ -52,6 +52,10 @@ int main(int argc, char *argv[])
         device_vector.begin(), device_vector.end(), _1 < 20, queue
     );
 
+    // just to be sure everything is finished before measuring execution time
+    // of binary_find algorithm
+    queue.finish();
+
     perf_timer t;
     for(size_t trial = 0; trial < PERF_TRIALS; trial++){
         t.start();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/compute.git



More information about the debian-science-commits mailing list