[python-dtcwt] 157/497: opencl: first pass at colifilt

Tue Jul 21 18:06:00 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository python-dtcwt.

commit 725517a3213e156aa67600435efe59eae1902e2f
Author: Rich Wareham <rjw57 at cam.ac.uk>
Date:   Fri Nov 8 10:13:29 2013 +0000

    opencl: first pass at colifilt
---
 dtcwt/opencl/lowlevel.py    | 159 ++++++++++++++++++++++++++------------------
 tests/testopenclcoldfilt.py |   7 ++
 tests/testopenclcolifilt.py |  15 +++++
 3 files changed, 118 insertions(+), 63 deletions(-)

diff --git a/dtcwt/opencl/lowlevel.py b/dtcwt/opencl/lowlevel.py
index 8b13dba..10f7bba 100644
--- a/dtcwt/opencl/lowlevel.py
+++ b/dtcwt/opencl/lowlevel.py
@@ -135,6 +135,8 @@ def colifilt(X, ha, hb):
     .. codeauthor:: Nick Kingsbury, Cambridge University, August 2000
 
     """
+    queue = get_default_queue()
+
     # Make sure all inputs are arrays
     X = asfarray(X)
     ha = asfarray(ha)
@@ -150,69 +152,10 @@ def colifilt(X, ha, hb):
     if ha.shape[0] % 2 != 0:
         raise ValueError('Lengths of ha and hb must be even')
 
-    m = ha.shape[0]
-    m2 = np.fix(m*0.5)
-
-    Y = np.zeros((r*2,c), dtype=X.dtype)
-    if not np.any(np.nonzero(X[:])[0]):
-        return Y
-
-    if m2 % 2 == 0:
-        # m/2 is even, so set up t to start on d samples.
-        # Set up vector for symmetric extension of X with repeated end samples.
-        # Use 'reflect' so r < m2 works OK.
-        xe = reflect(np.arange(-m2, r+m2, dtype=np.int), -0.5, r-0.5)
-       
-        t = np.arange(3, r+m, 2)
-        if np.sum(ha*hb) > 0:
-            ta = t
-            tb = t - 1
-        else:
-            ta = t - 1
-            tb = t
-       
-        # Select odd and even samples from ha and hb. Note that due to 0-indexing
-        # 'odd' and 'even' are not perhaps what you might expect them to be.
-        hao = as_column_vector(ha[0:m:2])
-        hae = as_column_vector(ha[1:m:2])
-        hbo = as_column_vector(hb[0:m:2])
-        hbe = as_column_vector(hb[1:m:2])
-       
-        s = np.arange(0,r*2,4)
-       
-        Y[s,:]   = _column_convolve(X[xe[tb-2],:],hae)
-        Y[s+1,:] = _column_convolve(X[xe[ta-2],:],hbe)
-        Y[s+2,:] = _column_convolve(X[xe[tb  ],:],hao)
-        Y[s+3,:] = _column_convolve(X[xe[ta  ],:],hbo)
-    else:
-        # m/2 is odd, so set up t to start on b samples.
-        # Set up vector for symmetric extension of X with repeated end samples.
-        # Use 'reflect' so r < m2 works OK.
-        xe = reflect(np.arange(-m2, r+m2, dtype=np.int), -0.5, r-0.5)
-
-        t = np.arange(2, r+m-1, 2)
-        if np.sum(ha*hb) > 0:
-            ta = t
-            tb = t - 1
-        else:
-            ta = t - 1
-            tb = t
-       
-        # Select odd and even samples from ha and hb. Note that due to 0-indexing
-        # 'odd' and 'even' are not perhaps what you might expect them to be.
-        hao = as_column_vector(ha[0:m:2])
-        hae = as_column_vector(ha[1:m:2])
-        hbo = as_column_vector(hb[0:m:2])
-        hbe = as_column_vector(hb[1:m:2])
-       
-        s = np.arange(0,r*2,4)
-       
-        Y[s,:]   = _column_convolve(X[xe[tb],:],hao)
-        Y[s+1,:] = _column_convolve(X[xe[ta],:],hbo)
-        Y[s+2,:] = _column_convolve(X[xe[tb],:],hae)
-        Y[s+3,:] = _column_convolve(X[xe[ta],:],hbe)
-
-    return Y
+    # Perform filtering on columns of extended matrix X(xe,:) in 4 ways.
+    Y = axis_convolve_ifilter(X, ha, queue=queue)
+
+    return to_array(Y)
 
 def _check_cl():
     if not _HAVE_CL:
@@ -336,6 +279,21 @@ def axis_convolve_dfilter(X, h, axis=0, queue=None, output=None):
 
     return _apply_kernel(X, h, kern, output, axis=axis, elementstep=2, extra_kernel_args=[np.int32(flip_output),])
 
+def axis_convolve_ifilter(X, h, axis=0, queue=None, output=None):
+    _check_cl()
+    queue = _to_queue(queue)
+    kern = _ifilter_kernel_for_queue(queue.context)
+
+    # Create output if not specified
+    if output is None:
+        output_shape = list(X.shape)
+        output_shape[axis] <<= 1
+        output = cl_array.zeros(queue, output_shape, np.float32)
+
+    flip_output = np.dot(h.flat, h.flat[::-1]) > 0
+
+    return _apply_kernel(X, h, kern, output, axis=axis, elementstep=0.5, extra_kernel_args=[np.int32(flip_output),])
+
 @memoize
 def _convolve_kernel_for_queue(context):
     """Return a kernel for convolution suitable for use with *context*. The
@@ -356,6 +314,16 @@ def _dfilter_kernel_for_queue(context):
     kern_prog.build()
     return kern_prog.convolve_kernel
 
+ at memoize
+def _ifilter_kernel_for_queue(context):
+    """Return a kernel for convolution suitable for use with *context*. The
+    return values are memoized.
+    
+    """
+    kern_prog = cl.Program(context, CL_ARRAY_HEADER + IFILTER_KERNEL)
+    kern_prog.build()
+    return kern_prog.convolve_kernel
+
 # Functions to access OpenCL Arrays within a kernel
 CL_ARRAY_HEADER = '''
 struct array_spec
@@ -484,3 +452,68 @@ void __kernel convolve_kernel(
     }
 }
 '''
+
+IFILTER_KERNEL = '''
+void __kernel convolve_kernel(
+    const __global float* X, int4 X_strides, int4 X_shape, int X_offset,
+    const __global float* h, int h_stride, int h_shape, int h_offset,
+    __global float* Y, int4 Y_strides, int4 Y_shape, int Y_offset,
+    int axis, int flip_output)
+{
+    int4 global_coord = { get_global_id(0), get_global_id(1), get_global_id(2), 0 };
+    struct array_spec X_spec = { .strides = X_strides, .shape = X_shape, .offset = X_offset };
+    struct array_spec Y_spec = { .strides = Y_strides, .shape = Y_shape, .offset = Y_offset };
+
+    // A vector of flags with the convolution direction set
+    int4 axis_flag = (int4)(axis,axis,axis,axis) == (int4)(0,1,2,3);
+
+    // Each run of this kernel outputs *two* pixels: the result of convolving
+    // 'odd' samples (0, 2, 4, ...) with h and the result of convolving 'even'
+    // samples (1, 3, 5, ... ) with reverse(h).
+
+    // Compute the base output co-ordinate and a vector which has 1 set in the
+    // component corresponding to *axis*.
+    int4 output_coord = select(global_coord, global_coord * 4, axis_flag);
+    int4 one_px_advance = select((int4)(0,0,0,0), (int4)(1,1,1,1), axis_flag);
+
+    if(any(output_coord >= Y_shape))
+        return;
+
+    int4 X_coord = select(global_coord, global_coord * 2, axis_flag);
+
+    int4 coord_min = { 0, 0, 0, 0 };
+    int4 coord_max = X_spec.shape;
+
+    float4 output = { 0, 0, 0, 0 };
+
+    int m = h_shape>>1;
+    for(int d=0; d<m; ++d) {
+        int X_offset = 2*((m>>1)-d);
+
+        float4 h_samples = {
+            h[h_offset + (d*2)*h_stride],           // ha odd
+            h[h_offset + (1+((m-d-1)*2))*h_stride], // hb odd
+            h[h_offset + (1+(d*2))*h_stride],       // ha even
+            h[h_offset + ((m-d-1)*2)*h_stride],     // hb even
+        };
+
+        float4 X_samples = {
+            X[coord_to_offset(reflect(X_coord + (X_offset+0)*one_px_advance, coord_min, coord_max), X_spec)],
+            X[coord_to_offset(reflect(X_coord + (X_offset+1)*one_px_advance, coord_min, coord_max), X_spec)],
+            X[coord_to_offset(reflect(X_coord + (X_offset+0)*one_px_advance, coord_min, coord_max), X_spec)],
+            X[coord_to_offset(reflect(X_coord + (X_offset+1)*one_px_advance, coord_min, coord_max), X_spec)],
+        };
+
+        output += X_samples * h_samples;
+    }
+
+    if(flip_output) {
+        output = output.yxwz;
+    }
+
+    Y[coord_to_offset(output_coord, Y_spec)] = output.s0;
+    Y[coord_to_offset(output_coord + one_px_advance, Y_spec)] = output.s1;
+    Y[coord_to_offset(output_coord + 2*one_px_advance, Y_spec)] = output.s2;
+    Y[coord_to_offset(output_coord + 3*one_px_advance, Y_spec)] = output.s3;
+}
+'''
diff --git a/tests/testopenclcoldfilt.py b/tests/testopenclcoldfilt.py
index eb64e84..720925e 100644
--- a/tests/testopenclcoldfilt.py
+++ b/tests/testopenclcoldfilt.py
@@ -61,4 +61,11 @@ def test_output_size():
     Z = coldfilt_gold(lena, (-1,1), (1,-1))
     assert_almost_equal(Y, Z)
 
+ at skip_if_no_cl
+def test_qshift():
+    h0a, h0b, g0a, g0b, h1a, h1b, g1a, g1b = qshift('qshift_d')
+    y = coldfilt(lena, h1b, h1b)
+    z = coldfilt_gold(lena, h1b, h1a)
+    assert_almost_equal(y, z)
+
 # vim:sw=4:sts=4:et
diff --git a/tests/testopenclcolifilt.py b/tests/testopenclcolifilt.py
index cfe911e..136e66d 100644
--- a/tests/testopenclcolifilt.py
+++ b/tests/testopenclcolifilt.py
@@ -3,6 +3,7 @@ import os
 import numpy as np
 from dtcwt.opencl.lowlevel import colifilt
 from dtcwt.lowlevel import colifilt as colifilt_gold
+from dtcwt.coeffs import biort, qshift
 
 from nose.tools import raises
 
@@ -72,4 +73,18 @@ def test_non_orthogonal_input_non_mult_4():
     Z = colifilt_gold(lena, (1,0,0,1), (1,0,0,1))
     assert_almost_equal(Y,Z)
 
+ at skip_if_no_cl
+def test_qshift():
+    h0a, h0b, g0a, g0b, h1a, h1b, g1a, g1b = qshift('qshift_d')
+    y = colifilt(lena, h1b, h1b)
+    z = colifilt_gold(lena, h1b, h1a)
+    assert_almost_equal(y, z)
+
+ at skip_if_no_cl
+def test_qshift_odd_input():
+    h0a, h0b, g0a, g0b, h1a, h1b, g1a, g1b = qshift('qshift_d')
+    y = colifilt(lena, h1b[:-2], h1b[:-2])
+    z = colifilt_gold(lena, h1a[:-2], h1b[:-2])
+    assert_almost_equal(y, z)
+
 # vim:sw=4:sts=4:et

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-dtcwt.git