[pyfr] 18/88: Redesign the C/OpenMP and MIC code generators.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Wed Nov 16 12:05:26 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository pyfr.
commit e50762d8fe9e19897098f0dce69b80d313682e90
Author: Freddie Witherden <freddie at witherden.org>
Date: Fri Apr 22 11:23:01 2016 -0700
Redesign the C/OpenMP and MIC code generators.
We now avoid the use of an inner-function and instead depend
on the ubiquity of #pragma omp smd for vectorization. This
enables a substantial degree of unification between the four
backend generator classes.
---
pyfr/backends/mic/generator.py | 231 ++++++++++++-------------------
pyfr/backends/mic/kernels/axnpby.mako | 2 +-
pyfr/backends/mic/kernels/base.mako | 1 -
pyfr/backends/openmp/generator.py | 223 ++++++++++++-----------------
pyfr/backends/openmp/kernels/axnpby.mako | 2 +-
pyfr/backends/openmp/kernels/base.mako | 1 -
6 files changed, 177 insertions(+), 283 deletions(-)
diff --git a/pyfr/backends/mic/generator.py b/pyfr/backends/mic/generator.py
index 5dc09de..f551ba4 100644
--- a/pyfr/backends/mic/generator.py
+++ b/pyfr/backends/mic/generator.py
@@ -11,95 +11,54 @@ class MICKernelGenerator(BaseKernelGenerator):
super().__init__(*args, **kwargs)
# Specialise
- self._dims = ['_nx'] if self.ndim == 1 else ['_ny', '_nx']
+ if self.ndim == 1:
+ self._dims = ['_nx']
+ self._deref_arg_array = self._deref_arg_array_1d
+ else:
+ self._dims = ['_ny', '_nx']
+ self._deref_arg_array = self._deref_arg_array_2d
def render(self):
- # Argument unpacking
+ # Kernel spec, unpacking code, and body
spec, unpack = self._emit_spec_unpack()
+ body = self._emit_body()
if self.ndim == 1:
- body = self._emit_body_1d()
- return '''
- void {name}({spec})
- {{
- {unpack}
- #pragma omp parallel
- {{
- int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
- int cb, ce;
- loop_sched_1d(_nx, align, &cb, &ce);
- for (int _x = cb; _x < ce; _x++)
- {{
- {body}
- }}
- }}
- }}'''.format(name=self.name, spec=spec, unpack=unpack,
- body=body)
+ tpl = '''{spec}
+ {{
+ {unpack}
+ #pragma omp parallel
+ {{
+ int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
+ int cb, ce;
+ loop_sched_1d(_nx, align, &cb, &ce);
+ for (int _x = cb; _x < ce; _x++)
+ {{
+ {body}
+ }}
+ }}
+ }}'''
else:
- innerfn = self._emit_inner_func()
- innercall = self._emit_inner_call()
- return '''{innerfn}
- void {name}({spec})
- {{
- {unpack}
- #pragma omp parallel
- {{
- int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
- int rb, re, cb, ce;
- loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
- for (int _y = rb; _y < re; _y++)
- {{
- {innercall}
- }}
- }}
- }}'''.format(innerfn=innerfn, spec=spec, unpack=unpack,
- name=self.name, innercall=innercall)
-
- def _emit_inner_func(self):
- # Get the specification and body
- spec = self._emit_inner_spec()
- body = self._emit_body_2d()
-
- # Combine
- return '''{spec}
- {{
- for (int _x = 0; _x < _nx; _x++)
- {{
- {body}
- }}
- }}'''.format(spec=spec, body=body)
-
- def _emit_inner_call(self):
- # Arguments for the inner function
- iargs = ['ce - cb']
- iargs.extend(sa.name for sa in self.scalargs)
-
- for va in self.vectargs:
- iargs.extend(self._offset_arg_array_2d(va))
-
- return '{0}_inner({1});'.format(self.name, ', '.join(iargs))
-
- def _emit_inner_spec(self):
- # Inner dimension
- ikargs = ['int _nx']
-
- # Add any scalar arguments
- ikargs.extend('{0.dtype} {0.name}'.format(sa) for sa in self.scalargs)
-
- # Vector arguments (always arrays as we're 2D)
- for va in self.vectargs:
- const = 'const' if va.intent == 'in' else ''
- stmt = '{0} {1.dtype} *__restrict__ {1.name}_v'.format(const, va)
- stmt = stmt.strip()
-
- if va.ncdim == 0:
- ikargs.append(stmt)
- else:
- for ij in ndrange(*va.cdims):
- ikargs.append(stmt + 'v'.join(str(n) for n in ij))
-
- return ('static PYFR_NOINLINE void {0}_inner({1})'
- .format(self.name, ', '.join(ikargs)))
+ tpl = '''{spec}
+ {{
+ {unpack}
+ #pragma omp parallel
+ {{
+ int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
+ int rb, re, cb, ce;
+ loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
+ for (int _y = rb; _y < re; _y++)
+ {{
+ #pragma omp simd
+ for (int _x = cb; _x < ce; _x++)
+ {{
+ {body}
+ }}
+ }}
+ }}
+ }}'''
+
+ return tpl.format(spec=spec, unpack=unpack, body=body)
def _emit_spec_unpack(self):
# Start by unpacking the dimensions
@@ -144,75 +103,65 @@ class MICKernelGenerator(BaseKernelGenerator):
kspec.append('long *arg{0}')
kpack.append('int lsd{0.name} = *arg{{0}};'.format(va))
- return (', '.join(a.format(i) for i, a in enumerate(kspec)),
- '\n'.join(a.format(i) for i, a in enumerate(kpack)))
-
- def _emit_body_1d(self):
- body = self.body
- ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
-
- for va in self.vectargs:
- # Dereference the argument
- darg = self._deref_arg(va)
-
- # Substitute
- body = re.sub(ptns[va.ncdim].format(va.name), darg, body)
+ # Number the arguments
+ params = ', '.join(a.format(i) for i, a in enumerate(kspec))
+ unpack = '\n'.join(a.format(i) for i, a in enumerate(kpack))
- return body
+ return 'void {0}({1})'.format(self.name, params), unpack
- def _emit_body_2d(self):
- body = self.body
- ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
- subs = ['{0}_v[_x]', r'{0}_v\1[_x]', r'{0}_v\1v\2[_x]']
- for va in self.vectargs:
- body = re.sub(ptns[va.ncdim].format(va.name),
- subs[va.ncdim].format(va.name), body)
+ def _deref_arg_view(self, arg):
+ ptns = ['{0}_v[{0}_vix[_x]]',
+ r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]',
+ r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1 + {0}_vcstri[_x]*\2]']
- return body
+ return ptns[arg.ncdim].format(arg.name)
- def _deref_arg(self, arg):
- if arg.isview:
- ptns = ['{0}_v[{0}_vix[_x]]',
- r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]',
- r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1'
- r' + {0}_vcstri[_x]*\2]']
+ def _deref_arg_array_1d(self, arg):
+ # Leading (sub) dimension
+ lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx'
- return ptns[arg.ncdim].format(arg.name)
+ # Vector: name_v[_x]
+ if arg.ncdim == 0:
+ ix = '_x'
+ # Stacked vector: name_v[lsdim*\1 + _x]
+ elif arg.ncdim == 1:
+ ix = r'{0}*\1 + _x'.format(lsdim)
+ # Doubly stacked vector: name_v[(nv*\1 + \2)*lsdim + _x]
else:
- # Leading (sub) dimension
- lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx'
-
- # Vector name_v[_x]
- if arg.ncdim == 0:
- ix = '_x'
- # Stacked vector; name_v[lsdim*\1 + _x]
- elif arg.ncdim == 1:
- ix = r'{0}*\1 + _x'.format(lsdim)
- # Doubly stacked vector; name_v[lsdim*nv*\1 + lsdim*\2 + _x]
- else:
- ix = r'{0}*{1}*\1 + {0}*\2 + _x'.format(lsdim, arg.cdims[1])
-
- return '{0}_v[{1}]'.format(arg.name, ix)
+ ix = r'({0}*\1 + \2)*{1} + _x'.format(arg.cdims[1], lsdim)
- def _offset_arg_array_2d(self, arg):
- stmts = []
+ return '{0}_v[{1}]'.format(arg.name, ix)
- # Broadcast vector: name + cb
+ def _deref_arg_array_2d(self, arg):
+ # Broadcast vector: name_v[_x]
if arg.isbroadcast:
- stmts.append('{0}_v + cb'.format(arg.name))
- # Matrix: name + _y*lsdim + cb
+ ix = '_x'
+ # Matrix: name_v[lsdim*_y + _x]
elif arg.ncdim == 0:
- stmts.append('{0}_v + _y*lsd{0} + cb'.format(arg.name))
- # Stacked matrix: name + (_y*nv + <0>)*lsdim + cb
+ ix = 'lsd{}*_y + _x'.format(arg.name)
+ # Stacked matrix: name_v[(_y*nv + \1)*lsdim + _x]
elif arg.ncdim == 1:
- stmts.extend('{0}_v + (_y*{1} + {2})*lsd{0} + cb'
- .format(arg.name, arg.cdims[0], i)
- for i in range(arg.cdims[0]))
- # Doubly stacked matrix: name + ((<0>*_ny + _y)*nv + <1>)*lsdim + cb
+ ix = r'(_y*{0} + \1)*lsd{1} + _x'.format(arg.cdims[0], arg.name)
+ # Doubly stacked matrix: name_v[((\1*_ny + _y)*nv + \2)*lsdim + _x]
else:
- stmts.extend('{0}_v + (({1}*_ny + _y)*{2} + {3})*lsd{0} + cb'
- .format(arg.name, i, arg.cdims[1], j)
- for i, j in ndrange(*arg.cdims))
+ ix = (r'((\1*_ny + _y)*{0} + \2)*lsd{1} + _x'
+ .format(arg.cdims[1], arg.name))
+
+ return '{0}_v[{1}]'.format(arg.name, ix)
- return stmts
+ def _emit_body(self):
+ body = self.body
+ ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
+
+ for va in self.vectargs:
+ # Dereference the argument
+ if va.isview:
+ darg = self._deref_arg_view(va)
+ else:
+ darg = self._deref_arg_array(va)
+
+ # Substitute
+ body = re.sub(ptns[va.ncdim].format(va.name), darg, body)
+
+ return body
diff --git a/pyfr/backends/mic/kernels/axnpby.mako b/pyfr/backends/mic/kernels/axnpby.mako
index a21160e..ce10996 100644
--- a/pyfr/backends/mic/kernels/axnpby.mako
+++ b/pyfr/backends/mic/kernels/axnpby.mako
@@ -2,7 +2,7 @@
<%inherit file='base'/>
<%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
-static PYFR_NOINLINE void
+static void
axnpby_inner(int n,
${', '.join('fpdtype_t *__restrict__ x{0}, '
'fpdtype_t a{0}'.format(i) for i in range(nv))})
diff --git a/pyfr/backends/mic/kernels/base.mako b/pyfr/backends/mic/kernels/base.mako
index 1f4d487..7cd4d75 100644
--- a/pyfr/backends/mic/kernels/base.mako
+++ b/pyfr/backends/mic/kernels/base.mako
@@ -6,7 +6,6 @@
#include <tgmath.h>
#define PYFR_ALIGN_BYTES ${alignb}
-#define PYFR_NOINLINE __attribute__ ((noinline))
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
diff --git a/pyfr/backends/openmp/generator.py b/pyfr/backends/openmp/generator.py
index d9d026d..aeecdc9 100644
--- a/pyfr/backends/openmp/generator.py
+++ b/pyfr/backends/openmp/generator.py
@@ -3,7 +3,6 @@
import re
from pyfr.backends.base.generator import BaseKernelGenerator
-from pyfr.util import ndrange
class OpenMPKernelGenerator(BaseKernelGenerator):
@@ -11,92 +10,53 @@ class OpenMPKernelGenerator(BaseKernelGenerator):
super().__init__(*args, **kwargs)
# Specialise
- self._dims = ['_nx'] if self.ndim == 1 else ['_ny', '_nx']
+ if self.ndim == 1:
+ self._dims = ['_nx']
+ self._deref_arg_array = self._deref_arg_array_1d
+ else:
+ self._dims = ['_ny', '_nx']
+ self._deref_arg_array = self._deref_arg_array_2d
def render(self):
- # Kernel spec
+ # Kernel spec and body
spec = self._emit_spec()
+ body = self._emit_body()
if self.ndim == 1:
- body = self._emit_body_1d()
- return '''
- {spec}
- {{
- #pragma omp parallel
- {{
- int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
- int cb, ce;
- loop_sched_1d(_nx, align, &cb, &ce);
- for (int _x = cb; _x < ce; _x++)
- {{
- {body}
- }}
- }}
- }}'''.format(spec=spec, body=body)
+ tpl = '''
+ {spec}
+ {{
+ #pragma omp parallel
+ {{
+ int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
+ int cb, ce;
+ loop_sched_1d(_nx, align, &cb, &ce);
+ for (int _x = cb; _x < ce; _x++)
+ {{
+ {body}
+ }}
+ }}
+ }}'''
else:
- innerfn = self._emit_inner_func()
- innercall = self._emit_inner_call()
- return '''{innerfn}
- {spec}
- {{
- #pragma omp parallel
- {{
- int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
- int rb, re, cb, ce;
- loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
- for (int _y = rb; _y < re; _y++)
- {{
- {innercall}
- }}
- }}
- }}'''.format(innerfn=innerfn, spec=spec,
- innercall=innercall)
-
- def _emit_inner_func(self):
- # Get the specification and body
- spec = self._emit_inner_spec()
- body = self._emit_body_2d()
-
- # Combine
- return '''{spec}
- {{
- for (int _x = 0; _x < _nx; _x++)
- {{
- {body}
- }}
- }}'''.format(spec=spec, body=body)
-
- def _emit_inner_call(self):
- # Arguments for the inner function
- iargs = ['ce - cb']
- iargs.extend(sa.name for sa in self.scalargs)
-
- for va in self.vectargs:
- iargs.extend(self._offset_arg_array_2d(va))
-
- return '{0}_inner({1});'.format(self.name, ', '.join(iargs))
-
- def _emit_inner_spec(self):
- # Inner dimension
- ikargs = ['int _nx']
-
- # Add any scalar arguments
- ikargs.extend('{0.dtype} {0.name}'.format(sa) for sa in self.scalargs)
-
- # Vector arguments (always arrays as we're 2D)
- for va in self.vectargs:
- const = 'const' if va.intent == 'in' else ''
- stmt = '{0} {1.dtype} *__restrict__ {1.name}_v'.format(const, va)
- stmt = stmt.strip()
-
- if va.ncdim == 0:
- ikargs.append(stmt)
- else:
- for ij in ndrange(*va.cdims):
- ikargs.append(stmt + 'v'.join(str(n) for n in ij))
-
- return ('static PYFR_NOINLINE void {0}_inner({1})'
- .format(self.name, ', '.join(ikargs)))
+ tpl = '''{spec}
+ {{
+ #pragma omp parallel
+ {{
+ int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
+ int rb, re, cb, ce;
+ loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
+ for (int _y = rb; _y < re; _y++)
+ {{
+ #pragma omp simd
+ for (int _x = cb; _x < ce; _x++)
+ {{
+ {body}
+ }}
+ }}
+ }}
+ }}'''
+
+ return tpl.format(spec=spec, body=body)
def _emit_spec(self):
# We first need the argument list; starting with the dimensions
@@ -132,72 +92,59 @@ class OpenMPKernelGenerator(BaseKernelGenerator):
return 'void {0}({1})'.format(self.name, ', '.join(kargs))
- def _emit_body_1d(self):
- body = self.body
- ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
- for va in self.vectargs:
- # Dereference the argument
- darg = self._deref_arg(va)
+ def _deref_arg_view(self, arg):
+ ptns = ['{0}_v[{0}_vix[_x]]',
+ r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]',
+ r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1 + {0}_vcstri[_x]*\2]']
- # Substitute
- body = re.sub(ptns[va.ncdim].format(va.name), darg, body)
-
- return body
+ return ptns[arg.ncdim].format(arg.name)
- def _emit_body_2d(self):
- body = self.body
- ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
- subs = ['{0}_v[_x]', r'{0}_v\1[_x]', r'{0}_v\1v\2[_x]']
+ def _deref_arg_array_1d(self, arg):
+ # Leading (sub) dimension
+ lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx'
- for va in self.vectargs:
- body = re.sub(ptns[va.ncdim].format(va.name),
- subs[va.ncdim].format(va.name), body)
-
- return body
-
- def _deref_arg(self, arg):
- if arg.isview:
- ptns = ['{0}_v[{0}_vix[_x]]',
- r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]',
- r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1'
- r' + {0}_vcstri[_x]*\2]']
-
- return ptns[arg.ncdim].format(arg.name)
+ # Vector: name_v[_x]
+ if arg.ncdim == 0:
+ ix = '_x'
+ # Stacked vector: name_v[lsdim*\1 + _x]
+ elif arg.ncdim == 1:
+ ix = r'{0}*\1 + _x'.format(lsdim)
+ # Doubly stacked vector: name_v[(nv*\1 + \2)*lsdim + _x]
else:
- # Leading (sub) dimension
- lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx'
-
- # Vector: name_v[_x]
- if arg.ncdim == 0:
- ix = '_x'
- # Stacked vector: name_v[lsdim*\1 + _x]
- elif arg.ncdim == 1:
- ix = r'{0}*\1 + _x'.format(lsdim)
- # Doubly stacked vector: name_v[lsdim*nv*\1 + lsdim*\2 + _x]
- else:
- ix = r'{0}*{1}*\1 + {0}*\2 + _x'.format(lsdim, arg.cdims[1])
-
- return '{0}_v[{1}]'.format(arg.name, ix)
+ ix = r'({0}*\1 + \2)*{1} + _x'.format(arg.cdims[1], lsdim)
- def _offset_arg_array_2d(self, arg):
- stmts = []
+ return '{0}_v[{1}]'.format(arg.name, ix)
- # Broadcast vector: name + cb
+ def _deref_arg_array_2d(self, arg):
+ # Broadcast vector: name_v[_x]
if arg.isbroadcast:
- stmts.append('{0}_v + cb'.format(arg.name))
- # Matrix: name + _y*lsdim + cb
+ ix = '_x'
+ # Matrix: name_v[lsdim*_y + _x]
elif arg.ncdim == 0:
- stmts.append('{0}_v + _y*lsd{0} + cb'.format(arg.name))
- # Stacked matrix: name + (_y*nv + <0>)*lsdim + cb
+ ix = 'lsd{}*_y + _x'.format(arg.name)
+ # Stacked matrix: name_v[(_y*nv + \1)*lsdim + _x]
elif arg.ncdim == 1:
- stmts.extend('{0}_v + (_y*{1} + {2})*lsd{0} + cb'
- .format(arg.name, arg.cdims[0], i)
- for i in range(arg.cdims[0]))
- # Doubly stacked matrix: name + ((<0>*_ny + _y)*nv + <1>)*lsdim + cb
+ ix = r'(_y*{0} + \1)*lsd{1} + _x'.format(arg.cdims[0], arg.name)
+ # Doubly stacked matrix: name_v[((\1*_ny + _y)*nv + \2)*lsdim + _x]
else:
- stmts.extend('{0}_v + (({1}*_ny + _y)*{2} + {3})*lsd{0} + cb'
- .format(arg.name, i, arg.cdims[1], j)
- for i, j in ndrange(*arg.cdims))
+ ix = (r'((\1*_ny + _y)*{0} + \2)*lsd{1} + _x'
+ .format(arg.cdims[1], arg.name))
+
+ return '{0}_v[{1}]'.format(arg.name, ix)
- return stmts
+ def _emit_body(self):
+ body = self.body
+ ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
+
+ for va in self.vectargs:
+ # Dereference the argument
+ if va.isview:
+ darg = self._deref_arg_view(va)
+ else:
+ darg = self._deref_arg_array(va)
+
+ # Substitute
+ body = re.sub(ptns[va.ncdim].format(va.name), darg, body)
+
+ return body
diff --git a/pyfr/backends/openmp/kernels/axnpby.mako b/pyfr/backends/openmp/kernels/axnpby.mako
index a72c0de..ca3dec9 100644
--- a/pyfr/backends/openmp/kernels/axnpby.mako
+++ b/pyfr/backends/openmp/kernels/axnpby.mako
@@ -2,7 +2,7 @@
<%inherit file='base'/>
<%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
-static PYFR_NOINLINE void
+static void
axnpby_inner(int n, fpdtype_t *__restrict__ y, fpdtype_t beta,
${', '.join('const fpdtype_t *__restrict__ x{0}, '
'fpdtype_t a{0}'.format(i) for i in range(n))})
diff --git a/pyfr/backends/openmp/kernels/base.mako b/pyfr/backends/openmp/kernels/base.mako
index 1f4d487..7cd4d75 100644
--- a/pyfr/backends/openmp/kernels/base.mako
+++ b/pyfr/backends/openmp/kernels/base.mako
@@ -6,7 +6,6 @@
#include <tgmath.h>
#define PYFR_ALIGN_BYTES ${alignb}
-#define PYFR_NOINLINE __attribute__ ((noinline))
#define min(a, b) ((a) < (b) ? (a) : (b))
#define max(a, b) ((a) > (b) ? (a) : (b))
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git
More information about the debian-science-commits
mailing list