[pyfr] 61/88: Improve the performance of the copy kernel in the OpenMP backend.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Wed Nov 16 12:05:30 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository pyfr.

commit 2e621fe5d28022d1238db94f537089d5c42c935a
Author: Freddie Witherden <freddie at witherden.org>
Date:   Mon Jul 11 17:10:56 2016 -0700

    Improve the performance of the copy kernel in the OpenMP backend.
---
 pyfr/backends/openmp/blasext.py              | 12 +++++++++++-
 pyfr/backends/openmp/kernels/par-memcpy.mako | 17 +++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/pyfr/backends/openmp/blasext.py b/pyfr/backends/openmp/blasext.py
index ebc29a9..6c369ca 100644
--- a/pyfr/backends/openmp/blasext.py
+++ b/pyfr/backends/openmp/blasext.py
@@ -35,9 +35,19 @@ class OpenMPBlasExtKernels(OpenMPKernelProvider):
         if dst.traits != src.traits:
             raise ValueError('Incompatible matrix types')
 
+        if dst.nbytes >= 2**31:
+            raise ValueError('Matrix too large for copy')
+
+        # Render the kernel template
+        ksrc = self.backend.lookup.get_template('par-memcpy').render()
+
+        # Build the kernel
+        kern = self._build_kernel('par_memcpy', ksrc,
+                                  [np.intp, np.intp, np.int32])
+
         class CopyKernel(ComputeKernel):
             def run(self, queue):
-                dst.data[:] = src.data.reshape(dst.data.shape)
+                kern(dst, src, dst.nbytes)
 
         return CopyKernel()
 
diff --git a/pyfr/backends/openmp/kernels/par-memcpy.mako b/pyfr/backends/openmp/kernels/par-memcpy.mako
new file mode 100644
index 0000000..f5cf7fa
--- /dev/null
+++ b/pyfr/backends/openmp/kernels/par-memcpy.mako
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+<%inherit file='base'/>
+<%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
+
+#include <string.h>
+
+void
+par_memcpy(char *dst, const char *src, int n)
+{
+    #pragma omp parallel
+    {
+        int begin, end;
+        loop_sched_1d(n, 1, &begin, &end);
+
+        memcpy(dst + begin, src + begin, end - begin);
+    }
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git



More information about the debian-science-commits mailing list