[pyfr] 60/88: Make the OpenCL/CUDA work sizes configurable.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Wed Nov 16 12:05:30 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository pyfr.
commit 49a0b4774e31272d8555ab6970157a0e33165822
Author: Freddie Witherden <freddie at witherden.org>
Date: Sun Jul 3 18:43:52 2016 -0700
Make the OpenCL/CUDA work sizes configurable.
---
doc/src/user_guide.rst | 22 ++++++++++++++++++++++
pyfr/backends/cuda/provider.py | 12 ++++++++++--
pyfr/backends/opencl/provider.py | 11 +++++++++--
pyfr/inifile.py | 4 ++++
pyfr/plugins/sampler.py | 4 +---
5 files changed, 46 insertions(+), 7 deletions(-)
diff --git a/doc/src/user_guide.rst b/doc/src/user_guide.rst
index 69e65d4..54dee3c 100644
--- a/doc/src/user_guide.rst
+++ b/doc/src/user_guide.rst
@@ -200,12 +200,22 @@ Parameterises the CUDA backend with
``standard`` | ``cuda-aware``
+4. ``block-1d`` --- block size for one dimensional pointwise kernels:
+
+ *int*
+
+5. ``block-2d`` --- block size for two dimensional pointwise kernels:
+
+ *int*, *int*
+
Example::
[backend-cuda]
device-id = round-robin
gimmik-max-nnz = 512
mpi-type = standard
+ block-1d = 64
+ block-2d = 128, 2
[backend-mic]
^^^^^^^^^^^^^^^^
@@ -242,6 +252,16 @@ Parameterises the OpenCL backend with
*int*
+5. ``local-size-1d`` --- local work size for one dimensional pointwise
+ kernels:
+
+ *int*
+
+6. ``local-size-2d`` --- local work size for two dimensional pointwise
+ kernels:
+
+ *int*, *int*
+
Example::
[backend-opencl]
@@ -249,6 +269,8 @@ Example::
device-type = gpu
device-id = local-rank
gimmik-max-nnz = 512
+ local-size-1d = 16
+ local-size-2d = 128, 1
[backend-openmp]
^^^^^^^^^^^^^^^^
diff --git a/pyfr/backends/cuda/provider.py b/pyfr/backends/cuda/provider.py
index 46fc5a7..7962ef3 100644
--- a/pyfr/backends/cuda/provider.py
+++ b/pyfr/backends/cuda/provider.py
@@ -33,8 +33,16 @@ class CUDAPointwiseKernelProvider(CUDAKernelProvider,
kernel_generator_cls = generator.CUDAKernelGenerator
def _instantiate_kernel(self, dims, fun, arglst):
- # Determine the grid/block
- block = (128, 2, 1) if len(dims) == 2 else (16, 1, 1)
+ cfg = self.backend.cfg
+
+ # Determine the block size
+ if len(dims) == 1:
+ block = (cfg.getint('backend-cuda', 'block-1d', '64'), 1, 1)
+ else:
+ block = cfg.getliteral('backend-cuda', 'block-2d', '128, 1')
+ block += (1,)
+
+ # Use this to compute the grid size
grid = get_grid_for_block(block, *dims[::-1])
class PointwiseKernel(ComputeKernel):
diff --git a/pyfr/backends/opencl/provider.py b/pyfr/backends/opencl/provider.py
index 2ffa864..e4b7127 100644
--- a/pyfr/backends/opencl/provider.py
+++ b/pyfr/backends/opencl/provider.py
@@ -31,9 +31,16 @@ class OpenCLPointwiseKernelProvider(OpenCLKernelProvider,
kernel_generator_cls = generator.OpenCLKernelGenerator
def _instantiate_kernel(self, dims, fun, arglst):
- # Global and local sizes
+ cfg = self.backend.cfg
+
+ # Determine the local work size
+ if len(dims) == 1:
+ ls = (cfg.getint('backend-opencl', 'local-size-1d', '64'),)
+ else:
+ ls = cfg.getliteral('backend-opencl', 'local-size-2d', '128, 1')
+
+ # Global work size
gs = tuple(dims[::-1])
- ls = (128, 2) if len(dims) == 2 else (16,)
class PointwiseKernel(ComputeKernel):
def run(self, queue, **kwargs):
diff --git a/pyfr/inifile.py b/pyfr/inifile.py
index 6db63a2..fce04c3 100644
--- a/pyfr/inifile.py
+++ b/pyfr/inifile.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
+from ast import literal_eval
from collections import OrderedDict
from configparser import SafeConfigParser, NoSectionError, NoOptionError
import io
@@ -96,6 +97,9 @@ class Inifile(object):
def getint(self, section, option, default=_sentinel):
return int(self.get(section, option, default))
+ def getliteral(self, section, option, default=_sentinel):
+ return literal_eval(self.get(section, option, default))
+
def items(self, section):
return OrderedDict(self._cp.items(section))
diff --git a/pyfr/plugins/sampler.py b/pyfr/plugins/sampler.py
index 886257b..1cac019 100644
--- a/pyfr/plugins/sampler.py
+++ b/pyfr/plugins/sampler.py
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
-import ast
-
import numpy as np
from pyfr.mpiutil import get_comm_rank_root, get_mpi
@@ -71,7 +69,7 @@ class SamplerPlugin(BasePlugin):
self.nsteps = self.cfg.getint(cfgsect, 'nsteps')
# List of points to be sampled and format
- self.pts = ast.literal_eval(self.cfg.get(cfgsect, 'samp-pts'))
+ self.pts = self.cfg.getliteral(cfgsect, 'samp-pts')
self.fmt = self.cfg.get(cfgsect, 'format', 'primitive')
# MPI info
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git
More information about the debian-science-commits
mailing list