[pyfr] 60/88: Make the OpenCL/CUDA work sizes configurable.

Wed Nov 16 12:05:30 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository pyfr.

commit 49a0b4774e31272d8555ab6970157a0e33165822
Author: Freddie Witherden <freddie at witherden.org>
Date:   Sun Jul 3 18:43:52 2016 -0700

    Make the OpenCL/CUDA work sizes configurable.
---
 doc/src/user_guide.rst           | 22 ++++++++++++++++++++++
 pyfr/backends/cuda/provider.py   | 12 ++++++++++--
 pyfr/backends/opencl/provider.py | 11 +++++++++--
 pyfr/inifile.py                  |  4 ++++
 pyfr/plugins/sampler.py          |  4 +---
 5 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/doc/src/user_guide.rst b/doc/src/user_guide.rst
index 69e65d4..54dee3c 100644
--- a/doc/src/user_guide.rst
+++ b/doc/src/user_guide.rst
@@ -200,12 +200,22 @@ Parameterises the CUDA backend with
 
      ``standard`` | ``cuda-aware``
 
+4. ``block-1d`` --- block size for one dimensional pointwise kernels:
+
+     *int*
+
+5. ``block-2d`` --- block size for two dimensional pointwise kernels:
+
+    *int*, *int*
+
 Example::
 
     [backend-cuda]
     device-id = round-robin
     gimmik-max-nnz = 512
     mpi-type = standard
+    block-1d = 64
+    block-2d = 128, 2
 
 [backend-mic]
 ^^^^^^^^^^^^^^^^
@@ -242,6 +252,16 @@ Parameterises the OpenCL backend with
 
      *int*
 
+5. ``local-size-1d`` --- local work size for one dimensional pointwise
+   kernels:
+
+    *int*
+
+6. ``local-size-2d`` --- local work size for two dimensional pointwise
+   kernels:
+
+    *int*, *int*
+
 Example::
 
     [backend-opencl]
@@ -249,6 +269,8 @@ Example::
     device-type = gpu
     device-id = local-rank
     gimmik-max-nnz = 512
+    local-size-1d = 16
+    local-size-2d = 128, 1
 
 [backend-openmp]
 ^^^^^^^^^^^^^^^^
diff --git a/pyfr/backends/cuda/provider.py b/pyfr/backends/cuda/provider.py
index 46fc5a7..7962ef3 100644
--- a/pyfr/backends/cuda/provider.py
+++ b/pyfr/backends/cuda/provider.py
@@ -33,8 +33,16 @@ class CUDAPointwiseKernelProvider(CUDAKernelProvider,
     kernel_generator_cls = generator.CUDAKernelGenerator
 
     def _instantiate_kernel(self, dims, fun, arglst):
-        # Determine the grid/block
-        block = (128, 2, 1) if len(dims) == 2 else (16, 1, 1)
+        cfg = self.backend.cfg
+
+        # Determine the block size
+        if len(dims) == 1:
+            block = (cfg.getint('backend-cuda', 'block-1d', '64'), 1, 1)
+        else:
+            block = cfg.getliteral('backend-cuda', 'block-2d', '128, 1')
+            block += (1,)
+
+        # Use this to compute the grid size
         grid = get_grid_for_block(block, *dims[::-1])
 
         class PointwiseKernel(ComputeKernel):
diff --git a/pyfr/backends/opencl/provider.py b/pyfr/backends/opencl/provider.py
index 2ffa864..e4b7127 100644
--- a/pyfr/backends/opencl/provider.py
+++ b/pyfr/backends/opencl/provider.py
@@ -31,9 +31,16 @@ class OpenCLPointwiseKernelProvider(OpenCLKernelProvider,
     kernel_generator_cls = generator.OpenCLKernelGenerator
 
     def _instantiate_kernel(self, dims, fun, arglst):
-        # Global and local sizes
+        cfg = self.backend.cfg
+
+        # Determine the local work size
+        if len(dims) == 1:
+            ls = (cfg.getint('backend-opencl', 'local-size-1d', '64'),)
+        else:
+            ls = cfg.getliteral('backend-opencl', 'local-size-2d', '128, 1')
+
+        # Global work size
         gs = tuple(dims[::-1])
-        ls = (128, 2) if len(dims) == 2 else (16,)
 
         class PointwiseKernel(ComputeKernel):
             def run(self, queue, **kwargs):
diff --git a/pyfr/inifile.py b/pyfr/inifile.py
index 6db63a2..fce04c3 100644
--- a/pyfr/inifile.py
+++ b/pyfr/inifile.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from ast import literal_eval
 from collections import OrderedDict
 from configparser import SafeConfigParser, NoSectionError, NoOptionError
 import io
@@ -96,6 +97,9 @@ class Inifile(object):
     def getint(self, section, option, default=_sentinel):
         return int(self.get(section, option, default))
 
+    def getliteral(self, section, option, default=_sentinel):
+        return literal_eval(self.get(section, option, default))
+
     def items(self, section):
         return OrderedDict(self._cp.items(section))
 
diff --git a/pyfr/plugins/sampler.py b/pyfr/plugins/sampler.py
index 886257b..1cac019 100644
--- a/pyfr/plugins/sampler.py
+++ b/pyfr/plugins/sampler.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 
-import ast
-
 import numpy as np
 
 from pyfr.mpiutil import get_comm_rank_root, get_mpi
@@ -71,7 +69,7 @@ class SamplerPlugin(BasePlugin):
         self.nsteps = self.cfg.getint(cfgsect, 'nsteps')
 
         # List of points to be sampled and format
-        self.pts = ast.literal_eval(self.cfg.get(cfgsect, 'samp-pts'))
+        self.pts = self.cfg.getliteral(cfgsect, 'samp-pts')
         self.fmt = self.cfg.get(cfgsect, 'format', 'primitive')
 
         # MPI info

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git