[arrayfire] 271/284: OpenCL JIT now launches more threads per work group for CPU devices
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:41 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 95aaf729dfc362b08646870fa5f01d91bdebb600
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Thu Feb 4 01:53:32 2016 -0500
OpenCL JIT now launches more threads per work group for CPU devices
---
src/backend/opencl/jit.cpp | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/src/backend/opencl/jit.cpp b/src/backend/opencl/jit.cpp
index 66c7c1e..d6ab240 100644
--- a/src/backend/opencl/jit.cpp
+++ b/src/backend/opencl/jit.cpp
@@ -19,6 +19,7 @@
#include <dispatch.hpp>
#include <err_opencl.hpp>
#include <functional>
+#include <af/opencl.h>
namespace opencl
{
@@ -180,13 +181,16 @@ void evalNodes(Param &out, Node *node)
uint groups_1 = 1;
uint num_odims = 4;
+ // CPUs seem to perform better with work group size 1024
+ const int work_group_size = (getActiveDeviceType() == AFCL_DEVICE_TYPE_CPU) ? 1024 : 256;
+
while (num_odims >= 1) {
if (out.info.dims[num_odims - 1] == 1) num_odims--;
else break;
}
if (is_linear) {
- local_0 = 256;
+ local_0 = work_group_size;
uint out_elements = out.info.dims[3] * out.info.strides[3];
uint groups = divup(out_elements, local_0);
@@ -194,8 +198,8 @@ void evalNodes(Param &out, Node *node)
global_0 = divup(groups, global_1) * local_0;
} else {
- local_0 = 64;
local_1 = 4;
+ local_0 = work_group_size / local_1;
groups_0 = divup(out.info.dims[0], local_0);
groups_1 = divup(out.info.dims[1], local_1);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list