[pyfr] 36/88: Update the GiMMiK kernel providers to support GiMMiK v2.0.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Wed Nov 16 12:05:27 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository pyfr.
commit 40bfd58fbc1b6e1efae544a70bbd0c3879c42281
Author: Freddie Witherden <freddie at witherden.org>
Date: Fri May 20 10:41:34 2016 -0700
Update the GiMMiK kernel providers to support GiMMiK v2.0.
---
doc/src/user_guide.rst | 11 +++++----
pyfr/backends/cuda/gimmik.py | 21 +++++------------
pyfr/backends/opencl/gimmik.py | 24 +++++++------------
pyfr/backends/openmp/base.py | 5 ++--
pyfr/backends/{opencl => openmp}/gimmik.py | 35 +++++++++++-----------------
pyfr/backends/openmp/kernels/par-gimmik.mako | 17 ++++++++++++++
setup.py | 1 +
7 files changed, 55 insertions(+), 59 deletions(-)
diff --git a/doc/src/user_guide.rst b/doc/src/user_guide.rst
index 07c3d53..7be6872 100644
--- a/doc/src/user_guide.rst
+++ b/doc/src/user_guide.rst
@@ -21,11 +21,12 @@ Overview
PyFR |release| has a hard dependency on Python 3.3+ and the following
Python packages:
-1. `h5py <http://www.h5py.org/>`_ >= 2.6
-2. `mako <http://www.makotemplates.org/>`_ >= 1.0.0
-3. `mpi4py <http://mpi4py.scipy.org/>`_ >= 2.0
-4. `numpy <http://www.numpy.org/>`_ >= 1.8
-5. `pytools <https://pypi.python.org/pypi/pytools>`_ >= 2014.3
+1. `gimmik <https://github.com/vincentlab/GiMMiK>`_ >= 2.0
+2. `h5py <http://www.h5py.org/>`_ >= 2.6
+3. `mako <http://www.makotemplates.org/>`_ >= 1.0.0
+4. `mpi4py <http://mpi4py.scipy.org/>`_ >= 2.0
+5. `numpy <http://www.numpy.org/>`_ >= 1.8
+6. `pytools <https://pypi.python.org/pypi/pytools>`_ >= 2014.3
Note that due to a bug in `numpy <http://www.numpy.org/>`_ PyFR is not
compatible with 32-bit Python distributions.
diff --git a/pyfr/backends/cuda/gimmik.py b/pyfr/backends/cuda/gimmik.py
index 1566d26..46a2559 100644
--- a/pyfr/backends/cuda/gimmik.py
+++ b/pyfr/backends/cuda/gimmik.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
+from gimmik import generate_mm
import numpy as np
from pyfr.backends.base import ComputeKernel, NotSuitableError
@@ -14,15 +15,7 @@ class CUDAGiMMiKKernels(CUDAKernelProvider):
self.max_nnz = backend.cfg.getint('backend-cuda', 'gimmik-max-nnz',
512)
- try:
- from gimmik.generator import generateKernel
-
- self._gen_gimmik = generateKernel
- self.mul = self._mul_gimmik
- except ImportError:
- pass
-
- def _mul_gimmik(self, a, b, out, alpha=1.0, beta=0.0):
+ def mul(self, a, b, out, alpha=1.0, beta=0.0):
# Ensure the matrices are compatible
if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
raise ValueError('Incompatible matrices for out = a*b')
@@ -36,13 +29,11 @@ class CUDAGiMMiKKernels(CUDAKernelProvider):
raise NotSuitableError('Matrix too dense for GiMMiK')
# Generate
- src = self._gen_gimmik(
- a.get(), 'cuda', alpha=alpha, beta=beta,
- double=a.dtype == np.float64, reduced=True,
- )
+ src = generate_mm(a.get(), dtype=a.dtype, platform='cuda',
+ alpha=alpha, beta=beta)
# Build
- fun = self._build_kernel('gimmik_mm', src, 'PPiii')
+ fun = self._build_kernel('gimmik_mm', src, 'iPiPi')
# Determine the grid/block
block = (128, 1, 1)
@@ -51,7 +42,7 @@ class CUDAGiMMiKKernels(CUDAKernelProvider):
class MulKernel(ComputeKernel):
def run(self, queue):
fun.prepared_async_call(grid, block, queue.cuda_stream_comp,
- b, out, b.ncol, b.leaddim,
+ b.ncol, b, b.leaddim, out,
out.leaddim)
return MulKernel()
diff --git a/pyfr/backends/opencl/gimmik.py b/pyfr/backends/opencl/gimmik.py
index 0a3c475..234bb83 100644
--- a/pyfr/backends/opencl/gimmik.py
+++ b/pyfr/backends/opencl/gimmik.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
+from gimmik import generate_mm
import numpy as np
from pyfr.backends.base import ComputeKernel, NotSuitableError
@@ -13,15 +14,7 @@ class OpenCLGiMMiKKernels(OpenCLKernelProvider):
self.max_nnz = backend.cfg.getint('backend-opencl', 'gimmik-max-nnz',
512)
- try:
- from gimmik.generator import generateKernel
-
- self._gen_gimmik = generateKernel
- self.mul = self._mul_gimmik
- except ImportError:
- pass
-
- def _mul_gimmik(self, a, b, out, alpha=1.0, beta=0.0):
+ def mul(self, a, b, out, alpha=1.0, beta=0.0):
# Ensure the matrices are compatible
if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
raise ValueError('Incompatible matrices for out = a*b')
@@ -35,17 +28,16 @@ class OpenCLGiMMiKKernels(OpenCLKernelProvider):
raise NotSuitableError('Matrix too dense for GiMMiK')
# Generate
- src = self._gen_gimmik(
- a.get(), 'opencl', alpha=alpha, beta=beta,
- double=a.dtype == np.float64, reduced=True,
- )
+ src = generate_mm(a.get(), dtype=a.dtype, platform='opencl',
+ alpha=alpha, beta=beta)
# Build
- fun = self._build_kernel('gimmik_mm', src, [np.intp]*2 + [np.int32]*3)
+ fun = self._build_kernel('gimmik_mm', src,
+ [np.int32] + [np.intp, np.int32]*2)
class MulKernel(ComputeKernel):
def run(self, queue):
- fun(queue.cl_queue_comp, (b.ncol,), None, b.data, out.data,
- b.ncol, b.leaddim, out.leaddim)
+ fun(queue.cl_queue_comp, (b.ncol,), None, b.ncol,
+ b.data, b.leaddim, out.data, out.leaddim)
return MulKernel()
diff --git a/pyfr/backends/openmp/base.py b/pyfr/backends/openmp/base.py
index 945f6c2..7830f6d 100644
--- a/pyfr/backends/openmp/base.py
+++ b/pyfr/backends/openmp/base.py
@@ -15,8 +15,8 @@ class OpenMPBackend(BaseBackend):
# Take the alignment requirement to be 32-bytes
self.alignb = 32
- from pyfr.backends.openmp import (blasext, cblas, packing, provider,
- types)
+ from pyfr.backends.openmp import (blasext, cblas, gimmik, packing,
+ provider, types)
# Register our data types
self.base_matrix_cls = types.OpenMPMatrixBase
@@ -39,6 +39,7 @@ class OpenMPBackend(BaseBackend):
kprovcls = [provider.OpenMPPointwiseKernelProvider,
blasext.OpenMPBlasExtKernels,
packing.OpenMPPackingKernels,
+ gimmik.OpenMPGiMMiKKernels,
cblas.OpenMPCBLASKernels]
self._providers = [k(self) for k in kprovcls]
diff --git a/pyfr/backends/opencl/gimmik.py b/pyfr/backends/openmp/gimmik.py
similarity index 50%
copy from pyfr/backends/opencl/gimmik.py
copy to pyfr/backends/openmp/gimmik.py
index 0a3c475..edaf89c 100644
--- a/pyfr/backends/opencl/gimmik.py
+++ b/pyfr/backends/openmp/gimmik.py
@@ -1,27 +1,20 @@
# -*- coding: utf-8 -*-
+from gimmik import generate_mm
import numpy as np
from pyfr.backends.base import ComputeKernel, NotSuitableError
-from pyfr.backends.opencl.provider import OpenCLKernelProvider
+from pyfr.backends.openmp.provider import OpenMPKernelProvider
-class OpenCLGiMMiKKernels(OpenCLKernelProvider):
+class OpenMPGiMMiKKernels(OpenMPKernelProvider):
def __init__(self, backend):
super().__init__(backend)
- self.max_nnz = backend.cfg.getint('backend-opencl', 'gimmik-max-nnz',
+ self.max_nnz = backend.cfg.getint('backend-openmp', 'gimmik-max-nnz',
512)
- try:
- from gimmik.generator import generateKernel
-
- self._gen_gimmik = generateKernel
- self.mul = self._mul_gimmik
- except ImportError:
- pass
-
- def _mul_gimmik(self, a, b, out, alpha=1.0, beta=0.0):
+ def mul(self, a, b, out, alpha=1.0, beta=0.0):
# Ensure the matrices are compatible
if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
raise ValueError('Incompatible matrices for out = a*b')
@@ -34,18 +27,18 @@ class OpenCLGiMMiKKernels(OpenCLKernelProvider):
if np.count_nonzero(a.get()) > self.max_nnz:
raise NotSuitableError('Matrix too dense for GiMMiK')
- # Generate
- src = self._gen_gimmik(
- a.get(), 'opencl', alpha=alpha, beta=beta,
- double=a.dtype == np.float64, reduced=True,
- )
+ # Generate the GiMMiK kernel
+ gimmik_mm = generate_mm(a.get(), dtype=a.dtype, platform='c',
+ alpha=alpha, beta=beta)
- # Build
- fun = self._build_kernel('gimmik_mm', src, [np.intp]*2 + [np.int32]*3)
+ # Generate and build the OpenMP-wrapped GiMMiK kernel
+ tpl = self.backend.lookup.get_template('par-gimmik')
+ src = tpl.render(gimmik_mm=gimmik_mm)
+ par_gimmik_mm = self._build_kernel('par_gimmik_mm', src,
+ [np.int32] + [np.intp, np.int32]*2)
class MulKernel(ComputeKernel):
def run(self, queue):
- fun(queue.cl_queue_comp, (b.ncol,), None, b.data, out.data,
- b.ncol, b.leaddim, out.leaddim)
+ par_gimmik_mm(b.ncol, b, b.leaddim, out, out.leaddim)
return MulKernel()
diff --git a/pyfr/backends/openmp/kernels/par-gimmik.mako b/pyfr/backends/openmp/kernels/par-gimmik.mako
new file mode 100644
index 0000000..150893b
--- /dev/null
+++ b/pyfr/backends/openmp/kernels/par-gimmik.mako
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+<%inherit file='base'/>
+
+// GiMMiK kernel
+${gimmik_mm}
+
+void
+par_gimmik_mm(int N, const fpdtype_t *B, int ldb, fpdtype_t *C, int ldc)
+{
+ #pragma omp parallel
+ {
+ int begin, end;
+ loop_sched_1d(N, PYFR_ALIGN_BYTES / sizeof(fpdtype_t), &begin, &end);
+
+ gimmik_mm(end - begin, B + begin, ldb, C + begin, ldc);
+ }
+}
diff --git a/setup.py b/setup.py
index b24b7f3..1e9f2e0 100755
--- a/setup.py
+++ b/setup.py
@@ -93,6 +93,7 @@ data_files = [
# Hard dependencies
install_requires = [
+ 'gimmik >= 2.0',
'h5py >= 2.6',
'mako >= 1.0.0',
'mpi4py >= 2.0',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git
More information about the debian-science-commits
mailing list