[opencv] 28/251: OCL code fixed, fix for NEON added
Nobuhiro Iwamatsu
iwamatsu at moszumanska.debian.org
Sun Aug 27 23:27:20 UTC 2017
This is an automated email from the git hooks/post-receive script.
iwamatsu pushed a commit to annotated tag 3.3.0
in repository opencv.
commit 704c688225283e9148742187ad517738bf77c681
Author: Rostislav Vasilikhin <rostislav.vasilikhin at intel.com>
Date: Wed Jul 5 21:52:13 2017 +0300
OCL code fixed, fix for NEON added
---
modules/imgproc/src/color.cpp | 16 ++++++++---
modules/imgproc/src/opencl/cvtcolor.cl | 50 ++++++++++++++++++++++------------
2 files changed, 45 insertions(+), 21 deletions(-)
diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp
index 3c2af95..f8cd0cb 100644
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -6608,6 +6608,14 @@ struct RGB2Luv_f
{
float32x4x3_t v_src = vld3q_f32(src);
+ v_src.val[0] = vmaxq_f32(v_src.val[0], vdupq_n_f32(0));
+ v_src.val[1] = vmaxq_f32(v_src.val[1], vdupq_n_f32(0));
+ v_src.val[2] = vmaxq_f32(v_src.val[2], vdupq_n_f32(0));
+
+ v_src.val[0] = vminq_f32(v_src.val[0], vdupq_n_f32(1));
+ v_src.val[1] = vminq_f32(v_src.val[1], vdupq_n_f32(1));
+ v_src.val[2] = vminq_f32(v_src.val[2], vdupq_n_f32(1));
+
if( gammaTab )
{
v_src.val[0] = vmulq_f32(v_src.val[0], vdupq_n_f32(gscale));
@@ -8574,7 +8582,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) );
}
- float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
+ float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
un = 13*4*_whitept[0]*d;
vn = 13*9*_whitept[1]*d;
@@ -8641,9 +8649,9 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1);
}
- float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3);
- un = 4*_whitept[0]*d;
- vn = 9*_whitept[1]*d;
+ float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
+ un = 4*13*_whitept[0]*d;
+ vn = 9*13*_whitept[1]*d;
Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
}
diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl
index 2193541..7c2e519 100644
--- a/modules/imgproc/src/opencl/cvtcolor.cl
+++ b/modules/imgproc/src/opencl/cvtcolor.cl
@@ -1963,6 +1963,10 @@ __kernel void BGR2Luv(__global const uchar * srcptr, int src_step, int src_offse
float R = src[0], G = src[1], B = src[2];
+ R = clamp(R, 0.f, 1.f);
+ G = clamp(G, 0.f, 1.f);
+ B = clamp(B, 0.f, 1.f);
+
#ifdef SRGB
R = splineInterpolate(R*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
G = splineInterpolate(G*GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
@@ -2067,15 +2071,21 @@ __kernel void Luv2BGR(__global const uchar * srcptr, int src_step, int src_offse
__global const float * src = (__global const float *)(srcptr + src_index);
__global float * dst = (__global float *)(dstptr + dst_index);
- float L = src[0], u = src[1], v = src[2], d, X, Y, Z;
- Y = (L + 16.f) * (1.f/116.f);
- Y = Y*Y*Y;
- d = (1.f/13.f)/L;
- u = fma(u, d, _un);
- v = fma(v, d, _vn);
- float iv = 1.f/v;
- X = 2.25f * u * Y * iv;
- Z = (12 - fma(3.0f, u, 20.0f * v)) * Y * 0.25f * iv;
+ float L = src[0], u = src[1], v = src[2], X, Y, Z;
+ if(L >= 8)
+ {
+ Y = fma(L, 1.f/116.f, 16.f/116.f);
+ Y = Y*Y*Y;
+ }
+ else
+ {
+ Y = L * (1.0f/903.3f); // L*(3./29.)^3
+ }
+ float up = 3.f*fma(L, _un, u);
+ float vp = 0.25f/fma(L, _vn, v);
+ vp = clamp(vp, -0.25f, 0.25f);
+ X = 3.f*Y*up*vp;
+ Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
@@ -2129,14 +2139,20 @@ __kernel void Luv2BGR(__global const uchar * src, int src_step, int src_offset,
float L = src[0]*(100.f/255.f);
float u = fma(convert_float(src[1]), 1.388235294117647f, -134.f);
float v = fma(convert_float(src[2]), 1.027450980392157f, - 140.f);
- Y = (L + 16.f) * (1.f/116.f);
- Y = Y*Y*Y;
- d = (1.f/13.f)/L;
- u = fma(u, d, _un);
- v = fma(v, d, _vn);
- float iv = 1.f/v;
- X = 2.25f * u * Y * iv ;
- Z = (12 - fma(3.0f, u, 20.0f * v)) * Y * 0.25f * iv;
+ if(L >= 8)
+ {
+ Y = fma(L, 1.f/116.f, 16.f/116.f);
+ Y = Y*Y*Y;
+ }
+ else
+ {
+ Y = L * (1.0f/903.3f); // L*(3./29.)^3
+ }
+ float up = 3.f*fma(L, _un, u);
+ float vp = 0.25f/fma(L, _vn, v);
+ vp = clamp(vp, -0.25f, 0.25f);
+ X = 3.f*Y*up*vp;
+ Z = Y*fma(fma(12.f*13.f, L, -up), vp, -5.f);
float R = fma(X, coeffs[0], fma(Y, coeffs[1], Z * coeffs[2]));
float G = fma(X, coeffs[3], fma(Y, coeffs[4], Z * coeffs[5]));
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/opencv.git
More information about the debian-science-commits
mailing list