[python-dtcwt] 167/497: start to convert dtwavexfm2 to puer-er opencl

Tue Jul 21 18:06:01 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository python-dtcwt.

commit aa83be573fcb58048a88252c0d3bcc6421d57fe7
Author: Rich Wareham <rjw57 at cam.ac.uk>
Date:   Sat Nov 9 13:43:32 2013 +0000

    start to convert dtwavexfm2 to puer-er opencl
    
    currently x2.56 speedup
---
 dtcwt/opencl/lowlevel.py    | 20 ++++++++++----------
 dtcwt/opencl/transform2d.py | 37 ++++++++++++++++++++-----------------
 2 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/dtcwt/opencl/lowlevel.py b/dtcwt/opencl/lowlevel.py
index 6f5b375..75da4fb 100644
--- a/dtcwt/opencl/lowlevel.py
+++ b/dtcwt/opencl/lowlevel.py
@@ -171,28 +171,28 @@ def get_default_queue():
     ctx = cl.create_some_context()
     return cl.CommandQueue(ctx)
 
-def _to_queue(queue):
+def to_queue(queue):
     if queue is not None:
         return queue
     return get_default_queue()
 
-def _to_device(X, queue=None):
+def to_device(X, queue=None):
     if isinstance(X, cl_array.Array) and X.queue is queue:
         return X
-    return cl_array.to_device(_to_queue(queue), np.array(X, dtype=np.float32, order='C'))
+    return cl_array.to_device(to_queue(queue), np.array(X, dtype=np.float32, order='C'))
 
 def to_array(a, queue=None):
-    queue = queue or a.queue or _to_queue(queue)
+    queue = queue or a.queue or to_queue(queue)
     rv = np.empty(a.shape, a.dtype)
     cl.enqueue_copy(queue, rv, a.data).wait()
     return rv
 
 def _apply_kernel(X, h, kern, output, axis=0, elementstep=1, extra_kernel_args=None):
-    queue = _to_queue(output.queue)
+    queue = to_queue(output.queue)
 
     # If necessary, convert X and h to device arrays
-    h_device = _to_device(h, queue)
-    X_device = _to_device(X, queue)
+    h_device = to_device(h, queue)
+    X_device = to_device(X, queue)
 
     # Work out size of work group taking into account element step
     work_shape = np.array(output.shape[:3])
@@ -252,7 +252,7 @@ def axis_convolve(X, h, axis=0, queue=None, output=None):
     """
 
     _check_cl()
-    queue = _to_queue(queue)
+    queue = to_queue(queue)
     kern = _convolve_kernel_for_queue(queue.context)
 
     # Create output if not specified
@@ -266,7 +266,7 @@ def axis_convolve(X, h, axis=0, queue=None, output=None):
 
 def axis_convolve_dfilter(X, h, axis=0, queue=None, output=None):
     _check_cl()
-    queue = _to_queue(queue)
+    queue = to_queue(queue)
     kern = _dfilter_kernel_for_queue(queue.context)
 
     # Create output if not specified
@@ -279,7 +279,7 @@ def axis_convolve_dfilter(X, h, axis=0, queue=None, output=None):
 
 def axis_convolve_ifilter(X, h, axis=0, queue=None, output=None):
     _check_cl()
-    queue = _to_queue(queue)
+    queue = to_queue(queue)
     kern = _ifilter_kernel_for_queue(queue.context)
 
     # Create output if not specified
diff --git a/dtcwt/opencl/transform2d.py b/dtcwt/opencl/transform2d.py
index 0a42392..5c1b359 100644
--- a/dtcwt/opencl/transform2d.py
+++ b/dtcwt/opencl/transform2d.py
@@ -8,9 +8,11 @@ from dtcwt import biort as _biort, qshift as _qshift
 from dtcwt.defaults import DEFAULT_BIORT, DEFAULT_QSHIFT
 from dtcwt.lowlevel import appropriate_complex_type_for, asfarray
 from dtcwt.opencl.lowlevel import colfilter, coldfilt, colifilt
+from dtcwt.opencl.lowlevel import axis_convolve, axis_convolve_dfilter
+from dtcwt.opencl.lowlevel import to_device, to_queue, to_array
 from dtcwt.transform2d import q2c
 
-def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include_scale=False):
+def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include_scale=False, queue=None):
     """Perform a *n*-level DTCWT-2D decompostion on a 2D matrix *X*.
 
     :param X: 2D real array
@@ -39,19 +41,20 @@ def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include
     .. codeauthor:: Cian Shaffrey, Cambridge University, Sept 2001
 
     """
+    queue = to_queue(queue)
     X = np.atleast_2d(asfarray(X))
 
     # Try to load coefficients if biort is a string parameter
     try:
-        h0o, g0o, h1o, g1o = _biort(biort)
+        h0o, g0o, h1o, g1o = tuple(to_device(x) for x in _biort(biort))
     except TypeError:
-        h0o, g0o, h1o, g1o = biort
+        h0o, g0o, h1o, g1o = tuple(to_device(x) for x in biort)
 
     # Try to load coefficients if qshift is a string parameter
     try:
-        h0a, h0b, g0a, g0b, h1a, h1b, g1a, g1b = _qshift(qshift)
+        h0a, h0b, g0a, g0b, h1a, h1b, g1a, g1b = tuple(to_device(x) for x in _qshift(qshift))
     except TypeError:
-        h0a, h0b, g0a, g0b, h1a, h1b, g1a, g1b = qshift
+        h0a, h0b, g0a, g0b, h1a, h1b, g1a, g1b = tuple(to_device(x) for x in qshift)
 
     original_size = X.shape
 
@@ -91,15 +94,15 @@ def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include
 
     if nlevels >= 1:
         # Do odd top-level filters on cols.
-        Lo = colfilter(X,h0o).T
-        Hi = colfilter(X,h1o).T
+        Lo = to_array(axis_convolve(X,h0o,axis=0,queue=queue))
+        Hi = to_array(axis_convolve(X,h1o,axis=0,queue=queue))
 
         # Do odd top-level filters on rows.
-        LoLo = colfilter(Lo,h0o).T
+        LoLo = to_array(axis_convolve(Lo,h0o,axis=1))
         Yh[0] = np.zeros((LoLo.shape[0] >> 1, LoLo.shape[1] >> 1, 6), dtype=complex_dtype)
-        Yh[0][:,:,[0, 5]] = q2c(colfilter(Hi,h0o).T)     # Horizontal pair
-        Yh[0][:,:,[2, 3]] = q2c(colfilter(Lo,h1o).T)     # Vertical pair
-        Yh[0][:,:,[1, 4]] = q2c(colfilter(Hi,h1o).T)     # Diagonal pair
+        Yh[0][:,:,[0, 5]] = q2c(to_array(axis_convolve(Hi,h0o,axis=1,queue=queue)))     # Horizontal pair
+        Yh[0][:,:,[2, 3]] = q2c(to_array(axis_convolve(Lo,h1o,axis=1,queue=queue)))     # Vertical pair
+        Yh[0][:,:,[1, 4]] = q2c(to_array(axis_convolve(Hi,h1o,axis=1,queue=queue)))     # Diagonal pair
 
         if include_scale:
             Yscale[0] = LoLo
@@ -115,16 +118,16 @@ def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include
             LoLo = np.hstack((LoLo[:,[0]], LoLo, LoLo[:,[-1]]))
 
         # Do even Qshift filters on rows.
-        Lo = coldfilt(LoLo,h0b,h0a).T
-        Hi = coldfilt(LoLo,h1b,h1a).T
+        Lo = to_array(axis_convolve_dfilter(LoLo,h0b,axis=0,queue=queue))
+        Hi = to_array(axis_convolve_dfilter(LoLo,h1b,axis=0,queue=queue))
 
         # Do even Qshift filters on columns.
-        LoLo = coldfilt(Lo,h0b,h0a).T
+        LoLo = to_array(axis_convolve_dfilter(Lo,h0b,axis=1,queue=queue))
 
         Yh[level] = np.zeros((LoLo.shape[0]>>1, LoLo.shape[1]>>1, 6), dtype=complex_dtype)
-        Yh[level][:,:,[0, 5]] = q2c(coldfilt(Hi,h0b,h0a).T)  # Horizontal
-        Yh[level][:,:,[2, 3]] = q2c(coldfilt(Lo,h1b,h1a).T)  # Vertical
-        Yh[level][:,:,[1, 4]] = q2c(coldfilt(Hi,h1b,h1a).T)  # Diagonal   
+        Yh[level][:,:,[0, 5]] = q2c(to_array(axis_convolve_dfilter(Hi,h0b,axis=1,queue=queue)))  # Horizontal
+        Yh[level][:,:,[2, 3]] = q2c(to_array(axis_convolve_dfilter(Lo,h1b,axis=1,queue=queue)))  # Vertical
+        Yh[level][:,:,[1, 4]] = q2c(to_array(axis_convolve_dfilter(Hi,h1b,axis=1,queue=queue)))  # Diagonal   
 
         if include_scale:
             Yscale[level] = LoLo

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-dtcwt.git