[compute] 07/49: Reduce_by_key benchmarks for Boost.Compute, Thrust and BOLT
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Fri Dec 18 17:58:15 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository compute.
commit 33d8ed2fa9156d98c8d5e78cbe3a880eec66f2ae
Author: Jakub Szuppe <j.szuppe at gmail.com>
Date: Wed Jul 15 11:10:06 2015 +0200
Reduce_by_key benchmarks for Boost.Compute, Thrust and BOLT
---
perf/CMakeLists.txt | 3 +
perf/perf_bolt_reduce_by_key.cpp | 100 +++++++++++++++++++++++++++++++++
perf/perf_reduce_by_key.cpp | 114 ++++++++++++++++++++++++++++++++++++++
perf/perf_thrust_reduce_by_key.cu | 91 ++++++++++++++++++++++++++++++
4 files changed, 308 insertions(+)
diff --git a/perf/CMakeLists.txt b/perf/CMakeLists.txt
index 726e66d..8afdcc2 100644
--- a/perf/CMakeLists.txt
+++ b/perf/CMakeLists.txt
@@ -51,6 +51,7 @@ set(BENCHMARKS
rotate_copy
host_sort
random_number_engine
+ reduce_by_key
saxpy
search
search_n
@@ -132,6 +133,7 @@ if(${BOOST_COMPUTE_HAVE_CUDA})
thrust_merge
thrust_partial_sum
thrust_partition
+ thrust_reduce_by_key
thrust_reverse
thrust_reverse_copy
thrust_rotate
@@ -180,6 +182,7 @@ if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
bolt_max_element
bolt_merge
bolt_partial_sum
+ bolt_reduce_by_key
bolt_saxpy
bolt_sort
)
diff --git a/perf/perf_bolt_reduce_by_key.cpp b/perf/perf_bolt_reduce_by_key.cpp
new file mode 100644
index 0000000..e766849
--- /dev/null
+++ b/perf/perf_bolt_reduce_by_key.cpp
@@ -0,0 +1,100 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe at gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <iostream>
+#include <algorithm>
+#include <vector>
+
+#include <bolt/cl/copy.h>
+#include <bolt/cl/device_vector.h>
+#include <bolt/cl/reduce_by_key.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+ int current;
+ int avgValuesNoPerKey;
+
+ unique_key()
+ {
+ current = 0;
+ avgValuesNoPerKey = 512;
+ }
+
+ int operator()()
+ {
+ double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+ if((rand() / double(RAND_MAX)) <= p)
+ return ++current;
+ return current;
+ }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ bolt::cl::control ctrl = bolt::cl::control::getDefault();
+ ::cl::Device device = ctrl.getDevice();
+ std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
+
+ // create vector of keys and random values
+ std::vector<int> host_keys(PERF_N);
+ std::vector<int> host_values(PERF_N);
+ std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+ std::generate(host_values.begin(), host_values.end(), rand_int);
+
+ // create device vectors for data
+ bolt::cl::device_vector<int> device_keys(PERF_N);
+ bolt::cl::device_vector<int> device_values(PERF_N);
+
+ // transfer data to the device
+ bolt::cl::copy(host_keys.begin(), host_keys.end(), device_keys.begin());
+ bolt::cl::copy(host_values.begin(), host_values.end(), device_values.begin());
+
+ // create device vectors for the results
+ bolt::cl::device_vector<int> device_keys_results(PERF_N);
+ bolt::cl::device_vector<int> device_values_results(PERF_N);
+
+ typedef bolt::cl::device_vector<int>::iterator iterType;
+ bolt::cl::pair<iterType, iterType> result = {
+ device_keys_results.begin(),
+ device_values_results.begin()
+ };
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ result = bolt::cl::reduce_by_key(device_keys.begin(),
+ device_keys.end(),
+ device_values.begin(),
+ device_keys_results.begin(),
+ device_values_results.begin());
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ size_t result_size = bolt::cl::distance(device_keys_results.begin(), result.first);
+ if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+ std::cout << "ERROR: "
+ << "wrong number of keys"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/perf/perf_reduce_by_key.cpp b/perf/perf_reduce_by_key.cpp
new file mode 100644
index 0000000..c88d450
--- /dev/null
+++ b/perf/perf_reduce_by_key.cpp
@@ -0,0 +1,114 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe at gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <boost/compute/system.hpp>
+#include <boost/compute/algorithm/fill.hpp>
+#include <boost/compute/algorithm/reduce_by_key.hpp>
+#include <boost/compute/container/vector.hpp>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+ int current;
+ int avgValuesNoPerKey;
+
+ unique_key()
+ {
+ current = 0;
+ avgValuesNoPerKey = 512;
+ }
+
+ int operator()()
+ {
+ double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+ if((rand() / double(RAND_MAX)) <= p)
+ return ++current;
+ return current;
+ }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // setup context and queue for the default device
+ boost::compute::device device = boost::compute::system::default_device();
+ boost::compute::context context(device);
+ boost::compute::command_queue queue(context, device);
+ std::cout << "device: " << device.name() << std::endl;
+
+ // create vector of keys and random values
+ std::vector<int> host_keys(PERF_N);
+ std::vector<int> host_values(PERF_N);
+ std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+ std::generate(host_values.begin(), host_values.end(), rand_int);
+
+ // create vectors for keys and values on the device and copy the data
+ boost::compute::vector<int> device_keys(PERF_N, context);
+ boost::compute::vector<int> device_values(PERF_N,context);
+ boost::compute::copy(
+ host_keys.begin(),
+ host_keys.end(),
+ device_keys.begin(),
+ queue
+ );
+ boost::compute::copy(
+ host_values.begin(),
+ host_values.end(),
+ device_values.begin(),
+ queue
+ );
+
+ // vectors for the results
+ boost::compute::vector<int> device_keys_results(PERF_N, context);
+ boost::compute::vector<int> device_values_results(PERF_N,context);
+
+ typedef boost::compute::vector<int>::iterator iterType;
+ std::pair<iterType, iterType> result(
+ device_keys_results.begin(),
+ device_values_results.begin()
+ );
+
+ // reduce by key
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ result = boost::compute::reduce_by_key(device_keys.begin(),
+ device_keys.end(),
+ device_values.begin(),
+ device_keys_results.begin(),
+ device_values_results.begin(),
+ queue);
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ size_t result_size = std::distance(device_keys_results.begin(), result.first);
+ if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+ std::cout << "ERROR: "
+ << "wrong number of keys" << result_size << "\n" << (host_keys[PERF_N-1] + 1)
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/perf/perf_thrust_reduce_by_key.cu b/perf/perf_thrust_reduce_by_key.cu
new file mode 100644
index 0000000..4266cb2
--- /dev/null
+++ b/perf/perf_thrust_reduce_by_key.cu
@@ -0,0 +1,91 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2015 Jakub Szuppe <j.szuppe at gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#include <algorithm>
+#include <cstdlib>
+
+#include <thrust/copy.h>
+#include <thrust/device_vector.h>
+#include <thrust/generate.h>
+#include <thrust/host_vector.h>
+#include <thrust/reduce.h>
+
+#include "perf.hpp"
+
+int rand_int()
+{
+ return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
+}
+
+struct unique_key {
+ int current;
+ int avgValuesNoPerKey;
+
+ unique_key()
+ {
+ current = 0;
+ avgValuesNoPerKey = 512;
+ }
+
+ int operator()()
+ {
+ double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
+ if((rand() / double(RAND_MAX)) <= p)
+ return ++current;
+ return current;
+ }
+} UniqueKey;
+
+int main(int argc, char *argv[])
+{
+ perf_parse_args(argc, argv);
+
+ std::cout << "size: " << PERF_N << std::endl;
+
+ // create vector of keys and random values
+ thrust::host_vector<int> host_keys(PERF_N);
+ thrust::host_vector<int> host_values(PERF_N);
+ std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
+ std::generate(host_values.begin(), host_values.end(), rand_int);
+
+ // transfer data to the device
+ thrust::device_vector<int> device_keys = host_keys;
+ thrust::device_vector<int> device_values = host_values;
+
+ // create device vectors for the results
+ thrust::device_vector<int> device_keys_results(PERF_N);
+ thrust::device_vector<int> device_values_results(PERF_N);
+
+ typedef typename thrust::device_vector<int>::iterator iterType;
+ thrust::pair<iterType, iterType> result;
+
+ perf_timer t;
+ for(size_t trial = 0; trial < PERF_TRIALS; trial++){
+ t.start();
+ result = thrust::reduce_by_key(device_keys.begin(),
+ device_keys.end(),
+ device_values.begin(),
+ device_keys_results.begin(),
+ device_values_results.begin());
+ cudaDeviceSynchronize();
+ t.stop();
+ }
+ std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
+
+ size_t result_size = thrust::distance(device_keys_results.begin(), result.first);
+ if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
+ std::cout << "ERROR: "
+ << "wrong number of keys"
+ << std::endl;
+ return -1;
+ }
+
+ return 0;
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/compute.git
More information about the debian-science-commits
mailing list