[python-dtcwt] 179/497: opencl: move all of q2c manipulation to OpenCL kernel
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Jul 21 18:06:02 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository python-dtcwt.
commit 2bbd941f8f360ca7cd0612c5a6fd434d2fc479c0
Author: Rich Wareham <rjw57 at cam.ac.uk>
Date: Sun Nov 10 14:14:20 2013 +0000
opencl: move all of q2c manipulation to OpenCL kernel
---
dtcwt/opencl/lowlevel.py | 83 +++++++++++++++++++++++++++++++++------------
dtcwt/opencl/transform2d.py | 26 +++++++-------
2 files changed, 74 insertions(+), 35 deletions(-)
diff --git a/dtcwt/opencl/lowlevel.py b/dtcwt/opencl/lowlevel.py
index a752d11..49ae4c8 100644
--- a/dtcwt/opencl/lowlevel.py
+++ b/dtcwt/opencl/lowlevel.py
@@ -296,22 +296,27 @@ def axis_convolve_ifilter(X, h, axis=0, queue=None, output=None):
return _apply_kernel(X, h, kern, output, axis=axis, elementstep=0.5)
-def q2c(X, queue=None, output=None):
+def q2c(X1, X2, X3, queue=None, output=None):
_check_cl()
queue = to_queue(queue)
kern = _q2c_kernel_for_queue(queue.context)
+ if X1.shape != X2.shape or X2.shape != X3.shape:
+ raise ValueError('All three X matrices must have the same shape.')
+
# Create output if not specified
if output is None:
output_shape = [1,1,1]
- output_shape[:len(X.shape[:2])] = X.shape[:2]
+ output_shape[:len(X1.shape[:2])] = X1.shape[:2]
output_shape[0] >>= 1
output_shape[1] >>= 1
- output_shape[2] = 2
+ output_shape[2] = 6
output = cl_array.empty(queue, output_shape, np.complex64)
# If necessary, convert X
- X_device = to_device(X, queue)
+ X1_device = to_device(X1, queue)
+ X2_device = to_device(X2, queue)
+ X3_device = to_device(X3, queue)
# Work out size of work group taking into account element step
work_shape = np.array(output.shape[:3])
@@ -325,9 +330,14 @@ def q2c(X, queue=None, output=None):
global_shape = list(int(np.ceil(x/float(y))*y) for x, y in zip(work_shape, local_shape))
- X_strides = struct.pack('iiii', *(tuple(s//X_device.dtype.itemsize for s in X_device.strides) + (0,0,0,0))[:4])
- X_shape = struct.pack('iiii', *(tuple(X_device.shape) + (1,1,1,1))[:4])
- X_offset = np.int32(X_device.offset)
+ X_shape = struct.pack('iiii', *(tuple(X1_device.shape) + (1,1,1,1))[:4])
+
+ X1_strides = struct.pack('iiii', *(tuple(s//X1_device.dtype.itemsize for s in X1_device.strides) + (0,0,0,0))[:4])
+ X1_offset = np.int32(X1_device.offset)
+ X2_strides = struct.pack('iiii', *(tuple(s//X2_device.dtype.itemsize for s in X2_device.strides) + (0,0,0,0))[:4])
+ X2_offset = np.int32(X2_device.offset)
+ X3_strides = struct.pack('iiii', *(tuple(s//X3_device.dtype.itemsize for s in X3_device.strides) + (0,0,0,0))[:4])
+ X3_offset = np.int32(X3_device.offset)
Y_strides = struct.pack('iiii', *(tuple(s//output.dtype.itemsize for s in output.strides) + (0,0,0,0))[:4])
Y_shape = struct.pack('iiii', *(tuple(output.shape) + (1,1,1,1))[:4])
@@ -335,7 +345,10 @@ def q2c(X, queue=None, output=None):
# Perform actual convolution
kern(queue, global_shape, local_shape,
- X_device.base_data, X_strides, X_shape, X_offset,
+ X_shape,
+ X1_device.base_data, X1_strides, X1_offset,
+ X2_device.base_data, X2_strides, X2_offset,
+ X3_device.base_data, X3_strides, X3_offset,
output.base_data, Y_strides, Y_shape, Y_offset)
return output
@@ -588,11 +601,16 @@ void __kernel convolve_kernel(
Q2C_KERNEL = '''
void __kernel q2c_kernel(
- const __global float* X, int4 X_strides, int4 X_shape, int X_offset,
+ int4 X_shape,
+ const __global float* X1, int4 X1_strides, int X1_offset,
+ const __global float* X2, int4 X2_strides, int X2_offset,
+ const __global float* X3, int4 X3_strides, int X3_offset,
__global float2* Y, int4 Y_strides, int4 Y_shape, int Y_offset)
{
int4 global_coord = { get_global_id(0), get_global_id(1), get_global_id(2), 0 };
- struct array_spec X_spec = { .strides = X_strides, .shape = X_shape, .offset = X_offset };
+ struct array_spec X1_spec = { .strides = X1_strides, .shape = X_shape, .offset = X1_offset };
+ struct array_spec X2_spec = { .strides = X2_strides, .shape = X_shape, .offset = X2_offset };
+ struct array_spec X3_spec = { .strides = X3_strides, .shape = X_shape, .offset = X3_offset };
struct array_spec Y_spec = { .strides = Y_strides, .shape = Y_shape, .offset = Y_offset };
int4 X_coord = global_coord * (int4)(2,2,1,1);
@@ -608,19 +626,42 @@ void __kernel q2c_kernel(
// | |
// c----d
- float4 X_samples = {
- X[coord_to_offset(X_coord, X_spec)], // a
- X[coord_to_offset(X_coord + (int4)(0,1,0,0), X_spec)], // b
- X[coord_to_offset(X_coord + (int4)(1,0,0,0), X_spec)], // c
- X[coord_to_offset(X_coord + (int4)(1,1,0,0), X_spec)], // d
+ float4 X1_samples = {
+ X1[coord_to_offset(X_coord, X1_spec)], // a
+ X1[coord_to_offset(X_coord + (int4)(0,1,0,0), X1_spec)], // b
+ X1[coord_to_offset(X_coord + (int4)(1,0,0,0), X1_spec)], // c
+ X1[coord_to_offset(X_coord + (int4)(1,1,0,0), X1_spec)], // d
};
+ X1_samples *= sqrt(0.5);
- X_samples *= sqrt(0.5);
-
- float2 z1 = { X_samples.x - X_samples.w, X_samples.y + X_samples.z };
- float2 z2 = { X_samples.x + X_samples.w, X_samples.y - X_samples.z };
+ float4 X2_samples = {
+ X2[coord_to_offset(X_coord, X2_spec)], // a
+ X2[coord_to_offset(X_coord + (int4)(0,1,0,0), X2_spec)], // b
+ X2[coord_to_offset(X_coord + (int4)(1,0,0,0), X2_spec)], // c
+ X2[coord_to_offset(X_coord + (int4)(1,1,0,0), X2_spec)], // d
+ };
+ X2_samples *= sqrt(0.5);
- Y[coord_to_offset(Y_coord, Y_spec)] = z1;
- Y[coord_to_offset(Y_coord + (int4)(0,0,1,0), Y_spec)] = z2;
+ float4 X3_samples = {
+ X3[coord_to_offset(X_coord, X3_spec)], // a
+ X3[coord_to_offset(X_coord + (int4)(0,1,0,0), X3_spec)], // b
+ X3[coord_to_offset(X_coord + (int4)(1,0,0,0), X3_spec)], // c
+ X3[coord_to_offset(X_coord + (int4)(1,1,0,0), X3_spec)], // d
+ };
+ X3_samples *= sqrt(0.5);
+
+ float2 z1a = { X1_samples.x - X1_samples.w, X1_samples.y + X1_samples.z };
+ float2 z1b = { X1_samples.x + X1_samples.w, X1_samples.y - X1_samples.z };
+ float2 z2a = { X2_samples.x - X2_samples.w, X2_samples.y + X2_samples.z };
+ float2 z2b = { X2_samples.x + X2_samples.w, X2_samples.y - X2_samples.z };
+ float2 z3a = { X3_samples.x - X3_samples.w, X3_samples.y + X3_samples.z };
+ float2 z3b = { X3_samples.x + X3_samples.w, X3_samples.y - X3_samples.z };
+
+ Y[coord_to_offset(Y_coord + (int4)(0,0,0,0), Y_spec)] = z1a;
+ Y[coord_to_offset(Y_coord + (int4)(0,0,1,0), Y_spec)] = z3a;
+ Y[coord_to_offset(Y_coord + (int4)(0,0,2,0), Y_spec)] = z2a;
+ Y[coord_to_offset(Y_coord + (int4)(0,0,3,0), Y_spec)] = z2b;
+ Y[coord_to_offset(Y_coord + (int4)(0,0,4,0), Y_spec)] = z3b;
+ Y[coord_to_offset(Y_coord + (int4)(0,0,5,0), Y_spec)] = z1b;
}
'''
diff --git a/dtcwt/opencl/transform2d.py b/dtcwt/opencl/transform2d.py
index 96fac20..4fd504d 100644
--- a/dtcwt/opencl/transform2d.py
+++ b/dtcwt/opencl/transform2d.py
@@ -8,7 +8,7 @@ from dtcwt import biort as _biort, qshift as _qshift
from dtcwt.defaults import DEFAULT_BIORT, DEFAULT_QSHIFT
from dtcwt.lowlevel import appropriate_complex_type_for, asfarray
from dtcwt.opencl.lowlevel import colfilter, coldfilt, colifilt
-from dtcwt.opencl.lowlevel import axis_convolve, axis_convolve_dfilter, q2c as cl_q2c
+from dtcwt.opencl.lowlevel import axis_convolve, axis_convolve_dfilter, q2c
from dtcwt.opencl.lowlevel import to_device, to_queue, to_array, empty
def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include_scale=False, queue=None):
@@ -98,10 +98,12 @@ def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include
# Do odd top-level filters on rows.
LoLo = axis_convolve(Lo,h0o,axis=1)
- Yh[0] = np.zeros((LoLo.shape[0] >> 1, LoLo.shape[1] >> 1, 6), dtype=complex_dtype)
- Yh[0][:,:,0:6:5] = q2c(axis_convolve(Hi,h0o,axis=1,queue=queue)) # Horizontal pair
- Yh[0][:,:,2:4:1] = q2c(axis_convolve(Lo,h1o,axis=1,queue=queue)) # Vertical pair
- Yh[0][:,:,1:5:3] = q2c(axis_convolve(Hi,h1o,axis=1,queue=queue)) # Diagonal pair
+
+ Yh[0] = to_array(q2c(
+ axis_convolve(Hi,h0o,axis=1,queue=queue),
+ axis_convolve(Lo,h1o,axis=1,queue=queue),
+ axis_convolve(Hi,h1o,axis=1,queue=queue),
+ ), queue=queue)
if include_scale:
Yscale[0] = to_array(LoLo)
@@ -125,10 +127,11 @@ def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include
# Do even Qshift filters on columns.
LoLo = axis_convolve_dfilter(Lo,h0b,axis=1,queue=queue)
- Yh[level] = np.zeros((LoLo.shape[0]>>1, LoLo.shape[1]>>1, 6), dtype=complex_dtype)
- Yh[level][:,:,0:6:5] = q2c(axis_convolve_dfilter(Hi,h0b,axis=1,queue=queue)) # Horizontal
- Yh[level][:,:,2:4:1] = q2c(axis_convolve_dfilter(Lo,h1b,axis=1,queue=queue)) # Vertical
- Yh[level][:,:,1:5:3] = q2c(axis_convolve_dfilter(Hi,h1b,axis=1,queue=queue)) # Diagonal
+ Yh[level] = to_array(q2c(
+ axis_convolve_dfilter(Hi,h0b,axis=1,queue=queue),
+ axis_convolve_dfilter(Lo,h1b,axis=1,queue=queue),
+ axis_convolve_dfilter(Hi,h1b,axis=1,queue=queue),
+ ), queue=queue)
if include_scale:
Yscale[level] = to_array(LoLo)
@@ -161,8 +164,3 @@ def dtwavexfm2(X, nlevels=3, biort=DEFAULT_BIORT, qshift=DEFAULT_QSHIFT, include
else:
return Yl, tuple(Yh)
-def q2c(y):
- """Convert from quads in y to complex numbers in z.
-
- """
- return to_array(cl_q2c(y))
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-dtcwt.git
More information about the debian-science-commits
mailing list