[compute] 07/49: Reduce_by_key benchmarks for Boost.Compute, Thrust and BOLT

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Fri Dec 18 17:58:15 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository compute.

commit 33d8ed2fa9156d98c8d5e78cbe3a880eec66f2ae
Author: Jakub Szuppe <j.szuppe at gmail.com>
Date:   Wed Jul 15 11:10:06 2015 +0200

    Reduce_by_key benchmarks for Boost.Compute, Thrust and BOLT
---
 perf/CMakeLists.txt               |   3 +
 perf/perf_bolt_reduce_by_key.cpp  | 100 +++++++++++++++++++++++++++++++++
 perf/perf_reduce_by_key.cpp       | 114 ++++++++++++++++++++++++++++++++++++++
 perf/perf_thrust_reduce_by_key.cu |  91 ++++++++++++++++++++++++++++++
 4 files changed, 308 insertions(+)

diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt
index 726e66d..8afdcc2 100644
--- a/perf/CMakeLists.txt
+++ b/perf/CMakeLists.txt
@@ -51,6 +51,7 @@ set(BENCHMARKS
   rotate_copy
   host_sort
   random_number_engine
+  reduce_by_key
   saxpy
   search
   search_n
@@ -132,6 +133,7 @@ if(${BOOST_COMPUTE_HAVE_CUDA})
     thrust_merge
     thrust_partial_sum
     thrust_partition
+    thrust_reduce_by_key
     thrust_reverse
     thrust_reverse_copy
     thrust_rotate
@@ -180,6 +182,7 @@ if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
     bolt_max_element
     bolt_merge
     bolt_partial_sum
+    bolt_reduce_by_key
     bolt_saxpy
     bolt_sort
   )
diff --git a/perf/perf_bolt_reduce_by_key.cpp b/perf/perf_bolt_reduce_by_key.cpp
new file mode 100644
index 0000000..e766849
--- /dev/null
+++ b/perf/perf_bolt_reduce_by_key.cpp
@@ -0,0 +1,100 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe at gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+#include <bolt/cl/reduce_by_key.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+    return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+  int current;
+  int avgValuesNoPerKey;
+
+  unique_key()
+  {
+      current = 0;
+      avgValuesNoPerKey = 512;
+  }
+
+  int operator()()
+  {
+      double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+      if((rand() / double(RAND_MAX)) <= p)
+          return ++current;
+      return current;
+  }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+    perf_parse_args(argc, argv);
+
+    std::cout << "size: " << PERF_N << std::endl;
+
+    bolt::cl::control ctrl = bolt::cl::control::getDefault();
+    ::cl::Device device = ctrl.getDevice();
+    std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+    // create vector of keys and random values
+    std::vector<int> host_keys(PERF_N);
+    std::vector<int> host_values(PERF_N);
+    std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+    std::generate(host_values.begin(), host_values.end(), rand_int);
+
+    // create device vectors for data
+    bolt::cl::device_vector<int> device_keys(PERF_N);
+    bolt::cl::device_vector<int> device_values(PERF_N);
+
+    // transfer data to the device
+    bolt::cl::copy(host_keys.begin(), host_keys.end(), device_keys.begin());
+    bolt::cl::copy(host_values.begin(), host_values.end(), device_values.begin());
+
+    // create device vectors for the results
+    bolt::cl::device_vector<int> device_keys_results(PERF_N);
+    bolt::cl::device_vector<int> device_values_results(PERF_N);
+
+    typedef bolt::cl::device_vector<int>::iterator iterType;
+    bolt::cl::pair<iterType, iterType> result = {
+        device_keys_results.begin(),
+        device_values_results.begin()
+    };
+
+    perf_timer t;
+    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+        t.start();
+        result = bolt::cl::reduce_by_key(device_keys.begin(),
+                                         device_keys.end(),
+                                         device_values.begin(),
+                                         device_keys_results.begin(),
+                                         device_values_results.begin());
+        t.stop();
+    }
+    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+    size_t result_size = bolt::cl::distance(device_keys_results.begin(), result.first);
+    if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+        std::cout << "ERROR: "
+                  << "wrong number of keys"
+                  << std::endl;
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/perf/perf_reduce_by_key.cpp b/perf/perf_reduce_by_key.cpp
new file mode 100644
index 0000000..c88d450
--- /dev/null
+++ b/perf/perf_reduce_by_key.cpp
@@ -0,0 +1,114 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe at gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+#include <boost/compute/algorithm/reduce_by_key.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+    return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+  int current;
+  int avgValuesNoPerKey;
+
+  unique_key()
+  {
+      current = 0;
+      avgValuesNoPerKey = 512;
+  }
+
+  int operator()()
+  {
+      double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+      if((rand() / double(RAND_MAX)) <= p)
+          return ++current;
+      return current;
+  }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+    perf_parse_args(argc, argv);
+
+    std::cout << "size: " << PERF_N << std::endl;
+
+    // setup context and queue for the default device
+    boost::compute::device device = boost::compute::system::default_device();
+    boost::compute::context context(device);
+    boost::compute::command_queue queue(context, device);
+    std::cout << "device: " << device.name() << std::endl;
+
+    // create vector of keys and random values
+    std::vector<int> host_keys(PERF_N);
+    std::vector<int> host_values(PERF_N);
+    std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+    std::generate(host_values.begin(), host_values.end(), rand_int);
+
+    // create vectors for keys and values on the device and copy the data
+    boost::compute::vector<int> device_keys(PERF_N, context);
+    boost::compute::vector<int> device_values(PERF_N,context);
+    boost::compute::copy(
+        host_keys.begin(),
+        host_keys.end(),
+        device_keys.begin(),
+        queue
+    );
+    boost::compute::copy(
+        host_values.begin(),
+        host_values.end(),
+        device_values.begin(),
+        queue
+    );
+
+    // vectors for the results
+    boost::compute::vector<int> device_keys_results(PERF_N, context);
+    boost::compute::vector<int> device_values_results(PERF_N,context);
+
+    typedef boost::compute::vector<int>::iterator iterType;
+    std::pair<iterType, iterType> result(
+        device_keys_results.begin(),
+        device_values_results.begin()
+    );
+
+    // reduce by key
+    perf_timer t;
+    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+        t.start();
+        result = boost::compute::reduce_by_key(device_keys.begin(),
+                                               device_keys.end(),
+                                               device_values.begin(),
+                                               device_keys_results.begin(),
+                                               device_values_results.begin(),
+                                               queue);
+        t.stop();
+    }
+    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+    size_t result_size = std::distance(device_keys_results.begin(), result.first);
+    if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+        std::cout << "ERROR: "
+                  << "wrong number of keys" << result_size << "\n" << (host_keys[PERF_N-1] + 1)
+                  << std::endl;
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/perf/perf_thrust_reduce_by_key.cu b/perf/perf_thrust_reduce_by_key.cu
new file mode 100644
index 0000000..4266cb2
--- /dev/null
+++ b/perf/perf_thrust_reduce_by_key.cu
@@ -0,0 +1,91 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe at gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/reduce.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+    return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+  int current;
+  int avgValuesNoPerKey;
+
+  unique_key()
+  {
+      current = 0;
+      avgValuesNoPerKey = 512;
+  }
+
+  int operator()()
+  {
+      double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+      if((rand() / double(RAND_MAX)) <= p)
+          return ++current;
+      return current;
+  }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+    perf_parse_args(argc, argv);
+
+    std::cout << "size: " << PERF_N << std::endl;
+    
+    // create vector of keys and random values
+    thrust::host_vector<int> host_keys(PERF_N);
+    thrust::host_vector<int> host_values(PERF_N);
+    std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+    std::generate(host_values.begin(), host_values.end(), rand_int);
+    
+    // transfer data to the device
+    thrust::device_vector<int> device_keys = host_keys;
+    thrust::device_vector<int> device_values = host_values;
+
+    // create device vectors for the results
+    thrust::device_vector<int> device_keys_results(PERF_N);
+    thrust::device_vector<int> device_values_results(PERF_N);
+
+    typedef typename thrust::device_vector<int>::iterator iterType;
+    thrust::pair<iterType, iterType> result;
+
+    perf_timer t;
+    for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+        t.start();
+        result = thrust::reduce_by_key(device_keys.begin(),
+                                       device_keys.end(),
+                                       device_values.begin(),
+                                       device_keys_results.begin(),
+                                       device_values_results.begin());
+        cudaDeviceSynchronize();
+        t.stop();
+    }
+    std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+    
+    size_t result_size = thrust::distance(device_keys_results.begin(), result.first);
+    if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+        std::cout << "ERROR: "
+                  << "wrong number of keys"
+                  << std::endl;
+        return -1;
+    }
+
+    return 0;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/compute.git



More information about the debian-science-commits mailing list