[pyfr] 57/88: Improve the performance of axnpby.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Wed Nov 16 12:05:29 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository pyfr.

commit a0f00ccabf69f718969a167e0159cace5280ae49
Author: Freddie Witherden <freddie at witherden.org>
Date:   Sun Jul 3 14:06:34 2016 -0700

    Improve the performance of axnpby.
---
 pyfr/backends/cuda/blasext.py            |  2 +-
 pyfr/backends/cuda/kernels/axnpby.mako   | 45 +++++++++++++++----------------
 pyfr/backends/opencl/blasext.py          |  2 +-
 pyfr/backends/opencl/kernels/axnpby.mako | 46 +++++++++++++++-----------------
 4 files changed, 46 insertions(+), 49 deletions(-)

diff --git a/pyfr/backends/cuda/blasext.py b/pyfr/backends/cuda/blasext.py
index 96b94fe..d2a406e 100644
--- a/pyfr/backends/cuda/blasext.py
+++ b/pyfr/backends/cuda/blasext.py
@@ -30,7 +30,7 @@ class CUDABlasExtKernels(CUDAKernelProvider):
 
         # Determine the grid/block
         block = (128, 1, 1)
-        grid = get_grid_for_block(block, ncolb)
+        grid = get_grid_for_block(block, ncolb, nrow)
 
         class AxnpbyKernel(ComputeKernel):
             def run(self, queue, *consts):
diff --git a/pyfr/backends/cuda/kernels/axnpby.mako b/pyfr/backends/cuda/kernels/axnpby.mako
index c19f273..2515bea 100644
--- a/pyfr/backends/cuda/kernels/axnpby.mako
+++ b/pyfr/backends/cuda/kernels/axnpby.mako
@@ -3,35 +3,34 @@
 <%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
 
 __global__ void
-axnpby(int nrow, int ncolb, int ldim,
-       ${', '.join('fpdtype_t* __restrict__ x' + str(i) for i in range(nv))},
+axnpby(int nrow, int ncolb, int ldim, fpdtype_t* __restrict__ x0,
+       ${', '.join('const fpdtype_t* __restrict__ x' + str(i)
+                   for i in range(1, nv))},
        ${', '.join('fpdtype_t a' + str(i) for i in range(nv))})
 {
+    int i = blockIdx.y*blockDim.y + threadIdx.y;
     int j = blockIdx.x*blockDim.x + threadIdx.x;
     int idx;
 
     if (j < ncolb && a0 == 0.0)
-        for (int i = 0; i < nrow; ++i)
-        {
-        % for k in subdims:
-            idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
-            x0[idx] = ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
-        % endfor
-        }
+    {
+    % for k in subdims:
+        idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
+        x0[idx] = ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
+    % endfor
+    }
     else if (j < ncolb && a0 == 1.0)
-        for (int i = 0; i < nrow; ++i)
-        {
-        % for k in subdims:
-            idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
-            x0[idx] += ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
-        % endfor
-        }
+    {
+    % for k in subdims:
+        idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
+        x0[idx] += ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
+    % endfor
+    }
     else if (j < ncolb)
-        for (int i = 0; i < nrow; ++i)
-        {
-        % for k in subdims:
-            idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
-            x0[idx] = ${pyfr.dot('a{l}', 'x{l}[idx]', l=nv)};
-        % endfor
-        }
+    {
+    % for k in subdims:
+        idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
+        x0[idx] = ${pyfr.dot('a{l}', 'x{l}[idx]', l=nv)};
+    % endfor
+    }
 }
diff --git a/pyfr/backends/opencl/blasext.py b/pyfr/backends/opencl/blasext.py
index f388770..e073ad2 100644
--- a/pyfr/backends/opencl/blasext.py
+++ b/pyfr/backends/opencl/blasext.py
@@ -31,7 +31,7 @@ class OpenCLBlasExtKernels(OpenCLKernelProvider):
         class AxnpbyKernel(ComputeKernel):
             def run(self, queue, *consts):
                 args = [x.data for x in arr] + list(consts)
-                kern(queue.cl_queue_comp, (ncolb,), None, nrow, ncolb,
+                kern(queue.cl_queue_comp, (ncolb, nrow), None, nrow, ncolb,
                      ldim, *args)
 
         return AxnpbyKernel()
diff --git a/pyfr/backends/opencl/kernels/axnpby.mako b/pyfr/backends/opencl/kernels/axnpby.mako
index 14c346e..731410d 100644
--- a/pyfr/backends/opencl/kernels/axnpby.mako
+++ b/pyfr/backends/opencl/kernels/axnpby.mako
@@ -3,35 +3,33 @@
 <%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
 
 __kernel void
-axnpby(int nrow, int ncolb, int ldim,
-       ${', '.join('__global fpdtype_t* restrict x' + str(i) for i in range(nv))},
+axnpby(int nrow, int ncolb, int ldim, __global fpdtype_t* restrict x0,
+       ${', '.join('__global const fpdtype_t* restrict x' + str(i)
+                   for i in range(1, nv))},
        ${', '.join('fpdtype_t a' + str(i) for i in range(nv))})
 {
-    int j = get_global_id(0);
+    int i = get_global_id(1), j = get_global_id(0);
     int idx;
 
     if (j < ncolb && a0 == 0.0)
-        for (int i = 0; i < nrow; ++i)
-        {
-        % for k in subdims:
-            idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
-            x0[idx] = ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
-        % endfor
-        }
+    {
+    % for k in subdims:
+        idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
+        x0[idx] = ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
+    % endfor
+    }
     else if (j < ncolb && a0 == 1.0)
-        for (int i = 0; i < nrow; ++i)
-        {
-        % for k in subdims:
-            idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
-            x0[idx] += ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
-        % endfor
-        }
+    {
+    % for k in subdims:
+        idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
+        x0[idx] += ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
+    % endfor
+    }
     else if (j < ncolb)
-        for (int i = 0; i < nrow; ++i)
-        {
-        % for k in subdims:
-            idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
-            x0[idx] = ${pyfr.dot('a{l}', 'x{l}[idx]', l=nv)};
-        % endfor
-        }
+    {
+    % for k in subdims:
+        idx = i*ldim + SOA_IX(j, ${k}, ${ncola});
+        x0[idx] = ${pyfr.dot('a{l}', 'x{l}[idx]', l=nv)};
+    % endfor
+    }
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git



More information about the debian-science-commits mailing list