[pyfr] 18/88: Redesign the C/OpenMP and MIC code generators.

Wed Nov 16 12:05:26 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository pyfr.

commit e50762d8fe9e19897098f0dce69b80d313682e90
Author: Freddie Witherden <freddie at witherden.org>
Date:   Fri Apr 22 11:23:01 2016 -0700

    Redesign the C/OpenMP and MIC code generators.
    
    We now avoid the use of an inner-function and instead depend
    on the ubiquity of #pragma omp smd for vectorization.  This
    enables a substantial degree of unification between the four
    backend generator classes.
---
 pyfr/backends/mic/generator.py           | 231 ++++++++++++-------------------
 pyfr/backends/mic/kernels/axnpby.mako    |   2 +-
 pyfr/backends/mic/kernels/base.mako      |   1 -
 pyfr/backends/openmp/generator.py        | 223 ++++++++++++-----------------
 pyfr/backends/openmp/kernels/axnpby.mako |   2 +-
 pyfr/backends/openmp/kernels/base.mako   |   1 -
 6 files changed, 177 insertions(+), 283 deletions(-)

diff --git a/pyfr/backends/mic/generator.py b/pyfr/backends/mic/generator.py
index 5dc09de..f551ba4 100644
--- a/pyfr/backends/mic/generator.py
+++ b/pyfr/backends/mic/generator.py
@@ -11,95 +11,54 @@ class MICKernelGenerator(BaseKernelGenerator):
         super().__init__(*args, **kwargs)
 
         # Specialise
-        self._dims = ['_nx'] if self.ndim == 1 else ['_ny', '_nx']
+        if self.ndim == 1:
+            self._dims = ['_nx']
+            self._deref_arg_array = self._deref_arg_array_1d
+        else:
+            self._dims = ['_ny', '_nx']
+            self._deref_arg_array = self._deref_arg_array_2d
 
     def render(self):
-        # Argument unpacking
+        # Kernel spec, unpacking code, and body
         spec, unpack = self._emit_spec_unpack()
+        body = self._emit_body()
 
         if self.ndim == 1:
-            body = self._emit_body_1d()
-            return '''
-                   void {name}({spec})
-                   {{
-                       {unpack}
-                       #pragma omp parallel
-                       {{
-                           int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
-                           int cb, ce;
-                           loop_sched_1d(_nx, align, &cb, &ce);
-                           for (int _x = cb; _x < ce; _x++)
-                           {{
-                               {body}
-                           }}
-                       }}
-                   }}'''.format(name=self.name, spec=spec, unpack=unpack,
-                                body=body)
+            tpl = '''{spec}
+                  {{
+                      {unpack}
+                      #pragma omp parallel
+                      {{
+                          int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
+                          int cb, ce;
+                          loop_sched_1d(_nx, align, &cb, &ce);
+                          for (int _x = cb; _x < ce; _x++)
+                          {{
+                              {body}
+                          }}
+                      }}
+                  }}'''
         else:
-            innerfn = self._emit_inner_func()
-            innercall = self._emit_inner_call()
-            return '''{innerfn}
-                   void {name}({spec})
-                   {{
-                       {unpack}
-                       #pragma omp parallel
-                       {{
-                           int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
-                           int rb, re, cb, ce;
-                           loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
-                           for (int _y = rb; _y < re; _y++)
-                           {{
-                               {innercall}
-                           }}
-                       }}
-                   }}'''.format(innerfn=innerfn, spec=spec, unpack=unpack,
-                                name=self.name, innercall=innercall)
-
-    def _emit_inner_func(self):
-        # Get the specification and body
-        spec = self._emit_inner_spec()
-        body = self._emit_body_2d()
-
-        # Combine
-        return '''{spec}
-               {{
-                   for (int _x = 0; _x < _nx; _x++)
-                   {{
-                       {body}
-                   }}
-               }}'''.format(spec=spec, body=body)
-
-    def _emit_inner_call(self):
-        # Arguments for the inner function
-        iargs = ['ce - cb']
-        iargs.extend(sa.name for sa in self.scalargs)
-
-        for va in self.vectargs:
-            iargs.extend(self._offset_arg_array_2d(va))
-
-        return '{0}_inner({1});'.format(self.name, ', '.join(iargs))
-
-    def _emit_inner_spec(self):
-        # Inner dimension
-        ikargs = ['int _nx']
-
-        # Add any scalar arguments
-        ikargs.extend('{0.dtype} {0.name}'.format(sa) for sa in self.scalargs)
-
-        # Vector arguments (always arrays as we're 2D)
-        for va in self.vectargs:
-            const = 'const' if va.intent == 'in' else ''
-            stmt = '{0} {1.dtype} *__restrict__ {1.name}_v'.format(const, va)
-            stmt = stmt.strip()
-
-            if va.ncdim == 0:
-                ikargs.append(stmt)
-            else:
-                for ij in ndrange(*va.cdims):
-                    ikargs.append(stmt + 'v'.join(str(n) for n in ij))
-
-        return ('static PYFR_NOINLINE void {0}_inner({1})'
-                .format(self.name, ', '.join(ikargs)))
+            tpl = '''{spec}
+                  {{
+                      {unpack}
+                      #pragma omp parallel
+                      {{
+                          int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
+                          int rb, re, cb, ce;
+                          loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
+                          for (int _y = rb; _y < re; _y++)
+                          {{
+                              #pragma omp simd
+                              for (int _x = cb; _x < ce; _x++)
+                              {{
+                                  {body}
+                              }}
+                          }}
+                      }}
+                  }}'''
+
+        return tpl.format(spec=spec, unpack=unpack, body=body)
 
     def _emit_spec_unpack(self):
         # Start by unpacking the dimensions
@@ -144,75 +103,65 @@ class MICKernelGenerator(BaseKernelGenerator):
                     kspec.append('long *arg{0}')
                     kpack.append('int lsd{0.name} = *arg{{0}};'.format(va))
 
-        return (', '.join(a.format(i) for i, a in enumerate(kspec)),
-                '\n'.join(a.format(i) for i, a in enumerate(kpack)))
-
-    def _emit_body_1d(self):
-        body = self.body
-        ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
-
-        for va in self.vectargs:
-            # Dereference the argument
-            darg = self._deref_arg(va)
-
-            # Substitute
-            body = re.sub(ptns[va.ncdim].format(va.name), darg, body)
+        # Number the arguments
+        params = ', '.join(a.format(i) for i, a in enumerate(kspec))
+        unpack = '\n'.join(a.format(i) for i, a in enumerate(kpack))
 
-        return body
+        return 'void {0}({1})'.format(self.name, params), unpack
 
-    def _emit_body_2d(self):
-        body = self.body
-        ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
-        subs = ['{0}_v[_x]', r'{0}_v\1[_x]', r'{0}_v\1v\2[_x]']
 
-        for va in self.vectargs:
-            body = re.sub(ptns[va.ncdim].format(va.name),
-                          subs[va.ncdim].format(va.name), body)
+    def _deref_arg_view(self, arg):
+        ptns = ['{0}_v[{0}_vix[_x]]',
+                r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]',
+                r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1 + {0}_vcstri[_x]*\2]']
 
-        return body
+        return ptns[arg.ncdim].format(arg.name)
 
-    def _deref_arg(self, arg):
-        if arg.isview:
-            ptns = ['{0}_v[{0}_vix[_x]]',
-                    r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]',
-                    r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1'
-                    r' + {0}_vcstri[_x]*\2]']
+    def _deref_arg_array_1d(self, arg):
+        # Leading (sub) dimension
+        lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx'
 
-            return ptns[arg.ncdim].format(arg.name)
+        # Vector: name_v[_x]
+        if arg.ncdim == 0:
+            ix = '_x'
+        # Stacked vector: name_v[lsdim*\1 + _x]
+        elif arg.ncdim == 1:
+            ix = r'{0}*\1 + _x'.format(lsdim)
+        # Doubly stacked vector: name_v[(nv*\1 + \2)*lsdim + _x]
         else:
-            # Leading (sub) dimension
-            lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx'
-
-            # Vector name_v[_x]
-            if arg.ncdim == 0:
-                ix = '_x'
-            # Stacked vector; name_v[lsdim*\1 + _x]
-            elif arg.ncdim == 1:
-                ix = r'{0}*\1 + _x'.format(lsdim)
-            # Doubly stacked vector; name_v[lsdim*nv*\1 + lsdim*\2 + _x]
-            else:
-                ix = r'{0}*{1}*\1 + {0}*\2 + _x'.format(lsdim, arg.cdims[1])
-
-            return '{0}_v[{1}]'.format(arg.name, ix)
+            ix = r'({0}*\1 + \2)*{1} + _x'.format(arg.cdims[1], lsdim)
 
-    def _offset_arg_array_2d(self, arg):
-        stmts = []
+        return '{0}_v[{1}]'.format(arg.name, ix)
 
-        # Broadcast vector: name + cb
+    def _deref_arg_array_2d(self, arg):
+        # Broadcast vector: name_v[_x]
         if arg.isbroadcast:
-            stmts.append('{0}_v + cb'.format(arg.name))
-        # Matrix: name + _y*lsdim + cb
+            ix = '_x'
+        # Matrix: name_v[lsdim*_y + _x]
         elif arg.ncdim == 0:
-            stmts.append('{0}_v + _y*lsd{0} + cb'.format(arg.name))
-        # Stacked matrix: name + (_y*nv + <0>)*lsdim + cb
+            ix = 'lsd{}*_y + _x'.format(arg.name)
+        # Stacked matrix: name_v[(_y*nv + \1)*lsdim + _x]
         elif arg.ncdim == 1:
-            stmts.extend('{0}_v + (_y*{1} + {2})*lsd{0} + cb'
-                         .format(arg.name, arg.cdims[0], i)
-                         for i in range(arg.cdims[0]))
-        # Doubly stacked matrix: name + ((<0>*_ny + _y)*nv + <1>)*lsdim + cb
+            ix = r'(_y*{0} + \1)*lsd{1} + _x'.format(arg.cdims[0], arg.name)
+        # Doubly stacked matrix: name_v[((\1*_ny + _y)*nv + \2)*lsdim + _x]
         else:
-            stmts.extend('{0}_v + (({1}*_ny + _y)*{2} + {3})*lsd{0} + cb'
-                         .format(arg.name, i, arg.cdims[1], j)
-                         for i, j in ndrange(*arg.cdims))
+            ix = (r'((\1*_ny + _y)*{0} + \2)*lsd{1} + _x'
+                  .format(arg.cdims[1], arg.name))
+
+        return '{0}_v[{1}]'.format(arg.name, ix)
 
-        return stmts
+    def _emit_body(self):
+        body = self.body
+        ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
+
+        for va in self.vectargs:
+            # Dereference the argument
+            if va.isview:
+                darg = self._deref_arg_view(va)
+            else:
+                darg = self._deref_arg_array(va)
+
+            # Substitute
+            body = re.sub(ptns[va.ncdim].format(va.name), darg, body)
+
+        return body
diff --git a/pyfr/backends/mic/kernels/axnpby.mako b/pyfr/backends/mic/kernels/axnpby.mako
index a21160e..ce10996 100644
--- a/pyfr/backends/mic/kernels/axnpby.mako
+++ b/pyfr/backends/mic/kernels/axnpby.mako
@@ -2,7 +2,7 @@
 <%inherit file='base'/>
 <%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
 
-static PYFR_NOINLINE void
+static void
 axnpby_inner(int n,
              ${', '.join('fpdtype_t *__restrict__ x{0}, '
                          'fpdtype_t a{0}'.format(i) for i in range(nv))})
diff --git a/pyfr/backends/mic/kernels/base.mako b/pyfr/backends/mic/kernels/base.mako
index 1f4d487..7cd4d75 100644
--- a/pyfr/backends/mic/kernels/base.mako
+++ b/pyfr/backends/mic/kernels/base.mako
@@ -6,7 +6,6 @@
 #include <tgmath.h>
 
 #define PYFR_ALIGN_BYTES ${alignb}
-#define PYFR_NOINLINE __attribute__ ((noinline))
 
 #define min(a, b) ((a) < (b) ? (a) : (b))
 #define max(a, b) ((a) > (b) ? (a) : (b))
diff --git a/pyfr/backends/openmp/generator.py b/pyfr/backends/openmp/generator.py
index d9d026d..aeecdc9 100644
--- a/pyfr/backends/openmp/generator.py
+++ b/pyfr/backends/openmp/generator.py
@@ -3,7 +3,6 @@
 import re
 
 from pyfr.backends.base.generator import BaseKernelGenerator
-from pyfr.util import ndrange
 
 
 class OpenMPKernelGenerator(BaseKernelGenerator):
@@ -11,92 +10,53 @@ class OpenMPKernelGenerator(BaseKernelGenerator):
         super().__init__(*args, **kwargs)
 
         # Specialise
-        self._dims = ['_nx'] if self.ndim == 1 else ['_ny', '_nx']
+        if self.ndim == 1:
+            self._dims = ['_nx']
+            self._deref_arg_array = self._deref_arg_array_1d
+        else:
+            self._dims = ['_ny', '_nx']
+            self._deref_arg_array = self._deref_arg_array_2d
 
     def render(self):
-        # Kernel spec
+        # Kernel spec and body
         spec = self._emit_spec()
+        body = self._emit_body()
 
         if self.ndim == 1:
-            body = self._emit_body_1d()
-            return '''
-                   {spec}
-                   {{
-                       #pragma omp parallel
-                       {{
-                           int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
-                           int cb, ce;
-                           loop_sched_1d(_nx, align, &cb, &ce);
-                           for (int _x = cb; _x < ce; _x++)
-                           {{
-                               {body}
-                           }}
-                       }}
-                   }}'''.format(spec=spec, body=body)
+            tpl = '''
+                  {spec}
+                  {{
+                      #pragma omp parallel
+                      {{
+                          int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
+                          int cb, ce;
+                          loop_sched_1d(_nx, align, &cb, &ce);
+                          for (int _x = cb; _x < ce; _x++)
+                          {{
+                              {body}
+                          }}
+                      }}
+                  }}'''
         else:
-            innerfn = self._emit_inner_func()
-            innercall = self._emit_inner_call()
-            return '''{innerfn}
-                   {spec}
-                   {{
-                       #pragma omp parallel
-                       {{
-                           int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
-                           int rb, re, cb, ce;
-                           loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
-                           for (int _y = rb; _y < re; _y++)
-                           {{
-                               {innercall}
-                           }}
-                       }}
-                   }}'''.format(innerfn=innerfn, spec=spec,
-                                innercall=innercall)
-
-    def _emit_inner_func(self):
-        # Get the specification and body
-        spec = self._emit_inner_spec()
-        body = self._emit_body_2d()
-
-        # Combine
-        return '''{spec}
-               {{
-                   for (int _x = 0; _x < _nx; _x++)
-                   {{
-                       {body}
-                   }}
-               }}'''.format(spec=spec, body=body)
-
-    def _emit_inner_call(self):
-        # Arguments for the inner function
-        iargs = ['ce - cb']
-        iargs.extend(sa.name for sa in self.scalargs)
-
-        for va in self.vectargs:
-            iargs.extend(self._offset_arg_array_2d(va))
-
-        return '{0}_inner({1});'.format(self.name, ', '.join(iargs))
-
-    def _emit_inner_spec(self):
-        # Inner dimension
-        ikargs = ['int _nx']
-
-        # Add any scalar arguments
-        ikargs.extend('{0.dtype} {0.name}'.format(sa) for sa in self.scalargs)
-
-        # Vector arguments (always arrays as we're 2D)
-        for va in self.vectargs:
-            const = 'const' if va.intent == 'in' else ''
-            stmt = '{0} {1.dtype} *__restrict__ {1.name}_v'.format(const, va)
-            stmt = stmt.strip()
-
-            if va.ncdim == 0:
-                ikargs.append(stmt)
-            else:
-                for ij in ndrange(*va.cdims):
-                    ikargs.append(stmt + 'v'.join(str(n) for n in ij))
-
-        return ('static PYFR_NOINLINE void {0}_inner({1})'
-                .format(self.name, ', '.join(ikargs)))
+            tpl = '''{spec}
+                  {{
+                      #pragma omp parallel
+                      {{
+                          int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
+                          int rb, re, cb, ce;
+                          loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
+                          for (int _y = rb; _y < re; _y++)
+                          {{
+                              #pragma omp simd
+                              for (int _x = cb; _x < ce; _x++)
+                              {{
+                                  {body}
+                              }}
+                          }}
+                      }}
+                  }}'''
+
+        return tpl.format(spec=spec, body=body)
 
     def _emit_spec(self):
         # We first need the argument list; starting with the dimensions
@@ -132,72 +92,59 @@ class OpenMPKernelGenerator(BaseKernelGenerator):
 
         return 'void {0}({1})'.format(self.name, ', '.join(kargs))
 
-    def _emit_body_1d(self):
-        body = self.body
-        ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
 
-        for va in self.vectargs:
-            # Dereference the argument
-            darg = self._deref_arg(va)
+    def _deref_arg_view(self, arg):
+        ptns = ['{0}_v[{0}_vix[_x]]',
+                r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]',
+                r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1 + {0}_vcstri[_x]*\2]']
 
-            # Substitute
-            body = re.sub(ptns[va.ncdim].format(va.name), darg, body)
-
-        return body
+        return ptns[arg.ncdim].format(arg.name)
 
-    def _emit_body_2d(self):
-        body = self.body
-        ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
-        subs = ['{0}_v[_x]', r'{0}_v\1[_x]', r'{0}_v\1v\2[_x]']
+    def _deref_arg_array_1d(self, arg):
+        # Leading (sub) dimension
+        lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx'
 
-        for va in self.vectargs:
-            body = re.sub(ptns[va.ncdim].format(va.name),
-                          subs[va.ncdim].format(va.name), body)
-
-        return body
-
-    def _deref_arg(self, arg):
-        if arg.isview:
-            ptns = ['{0}_v[{0}_vix[_x]]',
-                    r'{0}_v[{0}_vix[_x] + {0}_vcstri[_x]*\1]',
-                    r'{0}_v[{0}_vix[_x] + {0}_vrstri[_x]*\1'
-                    r' + {0}_vcstri[_x]*\2]']
-
-            return ptns[arg.ncdim].format(arg.name)
+        # Vector: name_v[_x]
+        if arg.ncdim == 0:
+            ix = '_x'
+        # Stacked vector: name_v[lsdim*\1 + _x]
+        elif arg.ncdim == 1:
+            ix = r'{0}*\1 + _x'.format(lsdim)
+        # Doubly stacked vector: name_v[(nv*\1 + \2)*lsdim + _x]
         else:
-            # Leading (sub) dimension
-            lsdim = 'lsd' + arg.name if not arg.ismpi else '_nx'
-
-            # Vector: name_v[_x]
-            if arg.ncdim == 0:
-                ix = '_x'
-            # Stacked vector: name_v[lsdim*\1 + _x]
-            elif arg.ncdim == 1:
-                ix = r'{0}*\1 + _x'.format(lsdim)
-            # Doubly stacked vector: name_v[lsdim*nv*\1 + lsdim*\2 + _x]
-            else:
-                ix = r'{0}*{1}*\1 + {0}*\2 + _x'.format(lsdim, arg.cdims[1])
-
-            return '{0}_v[{1}]'.format(arg.name, ix)
+            ix = r'({0}*\1 + \2)*{1} + _x'.format(arg.cdims[1], lsdim)
 
-    def _offset_arg_array_2d(self, arg):
-        stmts = []
+        return '{0}_v[{1}]'.format(arg.name, ix)
 
-        # Broadcast vector: name + cb
+    def _deref_arg_array_2d(self, arg):
+        # Broadcast vector: name_v[_x]
         if arg.isbroadcast:
-            stmts.append('{0}_v + cb'.format(arg.name))
-        # Matrix: name + _y*lsdim + cb
+            ix = '_x'
+        # Matrix: name_v[lsdim*_y + _x]
         elif arg.ncdim == 0:
-            stmts.append('{0}_v + _y*lsd{0} + cb'.format(arg.name))
-        # Stacked matrix: name + (_y*nv + <0>)*lsdim + cb
+            ix = 'lsd{}*_y + _x'.format(arg.name)
+        # Stacked matrix: name_v[(_y*nv + \1)*lsdim + _x]
         elif arg.ncdim == 1:
-            stmts.extend('{0}_v + (_y*{1} + {2})*lsd{0} + cb'
-                         .format(arg.name, arg.cdims[0], i)
-                         for i in range(arg.cdims[0]))
-        # Doubly stacked matrix: name + ((<0>*_ny + _y)*nv + <1>)*lsdim + cb
+            ix = r'(_y*{0} + \1)*lsd{1} + _x'.format(arg.cdims[0], arg.name)
+        # Doubly stacked matrix: name_v[((\1*_ny + _y)*nv + \2)*lsdim + _x]
         else:
-            stmts.extend('{0}_v + (({1}*_ny + _y)*{2} + {3})*lsd{0} + cb'
-                         .format(arg.name, i, arg.cdims[1], j)
-                         for i, j in ndrange(*arg.cdims))
+            ix = (r'((\1*_ny + _y)*{0} + \2)*lsd{1} + _x'
+                  .format(arg.cdims[1], arg.name))
+
+        return '{0}_v[{1}]'.format(arg.name, ix)
 
-        return stmts
+    def _emit_body(self):
+        body = self.body
+        ptns = [r'\b{0}\b', r'\b{0}\[(\d+)\]', r'\b{0}\[(\d+)\]\[(\d+)\]']
+
+        for va in self.vectargs:
+            # Dereference the argument
+            if va.isview:
+                darg = self._deref_arg_view(va)
+            else:
+                darg = self._deref_arg_array(va)
+
+            # Substitute
+            body = re.sub(ptns[va.ncdim].format(va.name), darg, body)
+
+        return body
diff --git a/pyfr/backends/openmp/kernels/axnpby.mako b/pyfr/backends/openmp/kernels/axnpby.mako
index a72c0de..ca3dec9 100644
--- a/pyfr/backends/openmp/kernels/axnpby.mako
+++ b/pyfr/backends/openmp/kernels/axnpby.mako
@@ -2,7 +2,7 @@
 <%inherit file='base'/>
 <%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
 
-static PYFR_NOINLINE void
+static void
 axnpby_inner(int n, fpdtype_t *__restrict__ y, fpdtype_t beta,
              ${', '.join('const fpdtype_t *__restrict__ x{0}, '
                          'fpdtype_t a{0}'.format(i) for i in range(n))})
diff --git a/pyfr/backends/openmp/kernels/base.mako b/pyfr/backends/openmp/kernels/base.mako
index 1f4d487..7cd4d75 100644
--- a/pyfr/backends/openmp/kernels/base.mako
+++ b/pyfr/backends/openmp/kernels/base.mako
@@ -6,7 +6,6 @@
 #include <tgmath.h>
 
 #define PYFR_ALIGN_BYTES ${alignb}
-#define PYFR_NOINLINE __attribute__ ((noinline))
 
 #define min(a, b) ((a) < (b) ? (a) : (b))
 #define max(a, b) ((a) > (b) ? (a) : (b))

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git