[opencv] 92/251: initial version of Lab2RGB_f tetrahedral interpolation written
Nobuhiro Iwamatsu
iwamatsu at moszumanska.debian.org
Sun Aug 27 23:27:30 UTC 2017
This is an automated email from the git hooks/post-receive script.
iwamatsu pushed a commit to annotated tag 3.3.0
in repository opencv.
commit 4b75be801e0d22ffc5384f27fa8aba0a17c4af3b
Author: Rostislav Vasilikhin <rostislav.vasilikhin at intel.com>
Date: Fri Jan 20 21:56:44 2017 +0300
initial version of Lab2RGB_f tetrahedral interpolation written
RGB2Lab_f added, bugs fixed, moved to float
several bugs fixed
LUT fixed, no switch in tetraInterpolate()
temporary code; to be removed and rewritten
before refactoring
extra interpolations removed, some things to do left
added Lab2RGB_b +XYZ version, etc.
basic version is done, to be sped up
tetra refactored
interpolations: LUT for weights, refactor., etc.
address arithm optimized
initial version of vectorized code added (not compiling now)
compilation fixed, now segfaults
a lot of fixes, vectorization temp. disabled
fixed trilinear shift size, max error dropped from 19 to 10
fixed several bugs (255 vs 256, signed vs unsigned, bIdx)
minor changes
packed: address arithmetics fixed
shorter code
experiments with pure integer calculations
Lab2RGB max error decreased to 2; need to clean the code
ready for vectorization; need cleaning
vectorized, to be debugged
precision fixed, max error is 2
Lab->XYZ shortened
minor fixes
Lab2RGB_f version fixed, to be completely rewritten using _b code
RGB2Lab_f vectorized
minors
moved to separate file
refactored Lab2RGB to float and int versions
minor fix
Lab2RGB_f vectorized
minor refactoring
Lab2RGBint refactored: process methods, vectorize by 4 pix
Lab2RGB_f int version is done
cleanup extra code
code copied to color.cpp
fixed blue idx bug
optimizations enabled when testing; mulFracConst introduced
divConst -> mulFracConst
calc min time in perf instead of avg
minors
process() slightly sped up
Lab2RGB_f: disabled int version
reinterpret added, minor fixes in names
some warnings fixed
changes transferred to color.cpp
RGB2Lab_f code (and trilinear interpolation code) moved to rgb2lab_faster
whitespace
shift negative fixed
more warnings fixed
"constant condition" warnings fixed, little speed up
minor changes
test_photo decolor fixed
changes copied to test_lab.cpp
idx bounds checking in LUT init
several fixes
WIP: softfloat almost integrated
test_lab partially rewritten to SoftFloat
color.cpp rewritten to SoftFloat
test_lab.cpp: accuracy code added
several fixes
RGB2Lab_b testing fixed
splineBuild() rewritten to SoftFloat
accuracy control improved
rounding fixed
Luv <=> RGB: rewritten to SoftFloat
OCL cvtColor Lab and Lut rewritten to SoftFloat
minor fixes
refactored to new SoftFloat interface
round() -> cvRound, etc.
fixed OCL tests
softfloat.cpp: internal functions made static, unused ones removed
meaningful constants
extra lines removed
unused function removed
unfinished work
it works, need to fix TODOs
refactoring; more calls rewritten
mulFracConst removed
constants made bit exact; minors
changes moved to color.cpp
fixed 1 bug and 4 warnings
OCL: fixed constants
pow(x, _1_3f) replaced by cubeRoot(x)
fixed compilation on MSVC32
magic constants explained
file with internal accuracy&speed tests moved to lab_tetra branch
---
modules/core/src/softfloat.cpp | 1217 ++++++--------------------------
modules/imgproc/src/color.cpp | 733 ++++++++++++++++---
modules/imgproc/src/opencl/cvtcolor.cl | 2 +
3 files changed, 836 insertions(+), 1116 deletions(-)
diff --git a/modules/core/src/softfloat.cpp b/modules/core/src/softfloat.cpp
index 7d9d627..ac39ee8 100644
--- a/modules/core/src/softfloat.cpp
+++ b/modules/core/src/softfloat.cpp
@@ -54,7 +54,7 @@ enum {
tininess_afterRounding = 1
};
//fixed to make softfloat code stateless
-const uint_fast8_t globalDetectTininess = tininess_afterRounding;
+static const uint_fast8_t globalDetectTininess = tininess_afterRounding;
/*----------------------------------------------------------------------------
| Software floating-point exception flags.
@@ -69,7 +69,7 @@ enum {
// Disabled to make softfloat code stateless
// This function may be changed in the future for better error handling
-inline void raiseFlags( uint_fast8_t /* flags */)
+static inline void raiseFlags( uint_fast8_t /* flags */)
{
//exceptionFlags |= flags;
}
@@ -87,7 +87,7 @@ enum {
};
//fixed to make softfloat code stateless
-const uint_fast8_t globalRoundingMode = round_near_even;
+static const uint_fast8_t globalRoundingMode = round_near_even;
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
@@ -123,85 +123,65 @@ typedef softdouble float64_t;
/*----------------------------------------------------------------------------
| Integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
-float32_t ui32_to_f32( uint32_t );
-float64_t ui32_to_f64( uint32_t );
-float32_t ui64_to_f32( uint64_t );
-float64_t ui64_to_f64( uint64_t );
-float32_t i32_to_f32( int32_t );
-float64_t i32_to_f64( int32_t );
-float32_t i64_to_f32( int64_t );
-float64_t i64_to_f64( int64_t );
+static float32_t ui32_to_f32( uint32_t );
+static float64_t ui32_to_f64( uint32_t );
+static float32_t ui64_to_f32( uint64_t );
+static float64_t ui64_to_f64( uint64_t );
+static float32_t i32_to_f32( int32_t );
+static float64_t i32_to_f64( int32_t );
+static float32_t i64_to_f32( int64_t );
+static float64_t i64_to_f64( int64_t );
/*----------------------------------------------------------------------------
| 32-bit (single-precision) floating-point operations.
*----------------------------------------------------------------------------*/
-uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
-uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
-int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
-int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
-uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
-uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
-int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
-int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
-float64_t f32_to_f64( float32_t );
-float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
-float32_t f32_add( float32_t, float32_t );
-float32_t f32_sub( float32_t, float32_t );
-float32_t f32_mul( float32_t, float32_t );
-float32_t f32_mulAdd( float32_t, float32_t, float32_t );
-float32_t f32_div( float32_t, float32_t );
-float32_t f32_rem( float32_t, float32_t );
-float32_t f32_sqrt( float32_t );
-bool f32_eq( float32_t, float32_t );
-bool f32_le( float32_t, float32_t );
-bool f32_lt( float32_t, float32_t );
-bool f32_eq_signaling( float32_t, float32_t );
-bool f32_le_quiet( float32_t, float32_t );
-bool f32_lt_quiet( float32_t, float32_t );
-bool f32_isSignalingNaN( float32_t );
+static int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
+static int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
+static float64_t f32_to_f64( float32_t );
+static float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
+static float32_t f32_add( float32_t, float32_t );
+static float32_t f32_sub( float32_t, float32_t );
+static float32_t f32_mul( float32_t, float32_t );
+static float32_t f32_mulAdd( float32_t, float32_t, float32_t );
+static float32_t f32_div( float32_t, float32_t );
+static float32_t f32_rem( float32_t, float32_t );
+static float32_t f32_sqrt( float32_t );
+static bool f32_eq( float32_t, float32_t );
+static bool f32_le( float32_t, float32_t );
+static bool f32_lt( float32_t, float32_t );
/*----------------------------------------------------------------------------
| 64-bit (double-precision) floating-point operations.
*----------------------------------------------------------------------------*/
-uint_fast32_t f64_to_ui32( float64_t, uint_fast8_t, bool );
-uint_fast64_t f64_to_ui64( float64_t, uint_fast8_t, bool );
-int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
-int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
-uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );
-uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );
-int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
-int_fast64_t f64_to_i64_r_minMag( float64_t, bool );
-float32_t f64_to_f32( float64_t );
-float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
-float64_t f64_add( float64_t, float64_t );
-float64_t f64_sub( float64_t, float64_t );
-float64_t f64_mul( float64_t, float64_t );
-float64_t f64_mulAdd( float64_t, float64_t, float64_t );
-float64_t f64_div( float64_t, float64_t );
-float64_t f64_rem( float64_t, float64_t );
-float64_t f64_sqrt( float64_t );
-bool f64_eq( float64_t, float64_t );
-bool f64_le( float64_t, float64_t );
-bool f64_lt( float64_t, float64_t );
-bool f64_eq_signaling( float64_t, float64_t );
-bool f64_le_quiet( float64_t, float64_t );
-bool f64_lt_quiet( float64_t, float64_t );
-bool f64_isSignalingNaN( float64_t );
+static int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
+static int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
+static float32_t f64_to_f32( float64_t );
+static float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
+static float64_t f64_add( float64_t, float64_t );
+static float64_t f64_sub( float64_t, float64_t );
+static float64_t f64_mul( float64_t, float64_t );
+static float64_t f64_mulAdd( float64_t, float64_t, float64_t );
+static float64_t f64_div( float64_t, float64_t );
+static float64_t f64_rem( float64_t, float64_t );
+static float64_t f64_sqrt( float64_t );
+static bool f64_eq( float64_t, float64_t );
+static bool f64_le( float64_t, float64_t );
+static bool f64_lt( float64_t, float64_t );
/*----------------------------------------------------------------------------
| Ported from OpenCV and added for usability
*----------------------------------------------------------------------------*/
-float32_t f32_powi( float32_t x, int y);
-float64_t f64_powi( float64_t x, int y);
+static float32_t f32_powi( float32_t x, int y);
+static float64_t f64_powi( float64_t x, int y);
-float32_t f32_exp( float32_t x);
-float64_t f64_exp(float64_t x);
-float32_t f32_log(float32_t x);
-float64_t f64_log(float64_t x);
-float32_t f32_cbrt(float32_t x);
-float32_t f32_pow( float32_t x, float32_t y);
-float64_t f64_pow( float64_t x, float64_t y);
+static float32_t f32_exp( float32_t x);
+static float64_t f64_exp(float64_t x);
+static float32_t f32_log(float32_t x);
+static float64_t f64_log(float64_t x);
+static float32_t f32_cbrt(float32_t x);
+static float32_t f32_pow( float32_t x, float32_t y);
+static float64_t f64_pow( float64_t x, float64_t y);
/*----------------------------------------------------------------------------
| softfloat and softdouble methods and members
@@ -437,13 +417,7 @@ enum {
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
-static uint_fast32_t softfloat_roundToUI32( bool, uint_fast64_t, uint_fast8_t, bool );
-static uint_fast64_t softfloat_roundToUI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
static int_fast32_t softfloat_roundToI32( bool, uint_fast64_t, uint_fast8_t, bool );
-static int_fast64_t softfloat_roundToI64( bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
-
-/*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
static struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
@@ -478,7 +452,7 @@ static float64_t softfloat_mulAddF64( uint_fast64_t, uint_fast64_t, uint_fast64_
| significant bit to 1. This shifted-and-jammed value is returned.
*----------------------------------------------------------------------------*/
-inline uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
+static inline uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
{ return a>>dist | ((a & (((uint_fast64_t) 1<<dist) - 1)) != 0); }
/*----------------------------------------------------------------------------
@@ -491,7 +465,7 @@ inline uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
| is zero or nonzero.
*----------------------------------------------------------------------------*/
-inline uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
+static inline uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
{
//fixed unsigned unary minus: -x == ~x + 1
return (dist < 31) ? a>>dist | ((uint32_t) (a<<((~dist + 1) & 31)) != 0) : (a != 0);
@@ -506,7 +480,7 @@ inline uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
| greater than 64, the result will be either 0 or 1, depending on whether 'a'
| is zero or nonzero.
*----------------------------------------------------------------------------*/
-inline uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
+static inline uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
{
//fixed unsigned unary minus: -x == ~x + 1
return (dist < 63) ? a>>dist | ((uint64_t) (a<<((~dist + 1) & 63)) != 0) : (a != 0);
@@ -540,7 +514,7 @@ static const uint_least8_t softfloat_countLeadingZeros8[256] = {
| Returns the number of leading 0 bits before the most-significant 1 bit of
| 'a'. If 'a' is zero, 32 is returned.
*----------------------------------------------------------------------------*/
-inline uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
+static inline uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
{
uint_fast8_t count = 0;
if ( a < 0x10000 ) {
@@ -607,7 +581,7 @@ static const uint16_t softfloat_approxRecipSqrt_1k1s[16] = {
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' left by the
| number of bits given in 'dist', which must be in the range 1 to 63.
*----------------------------------------------------------------------------*/
-inline struct uint128 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
+static inline struct uint128 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
struct uint128 z;
z.v64 = a64<<dist | a0>>(-dist & 63);
@@ -622,7 +596,7 @@ inline struct uint128 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, ui
| bit of the shifted value by setting the least-significant bit to 1. This
| shifted-and-jammed value is returned.
*----------------------------------------------------------------------------*/
-inline struct uint128 softfloat_shortShiftRightJam128(uint64_t a64, uint64_t a0, uint_fast8_t dist )
+static inline struct uint128 softfloat_shortShiftRightJam128(uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
uint_fast8_t negDist = -dist;
struct uint128 z;
@@ -634,37 +608,6 @@ inline struct uint128 softfloat_shortShiftRightJam128(uint64_t a64, uint64_t a0,
}
/*----------------------------------------------------------------------------
-| Shifts the 128 bits formed by concatenating 'a' and 'extra' right by 64
-| _plus_ the number of bits given in 'dist', which must not be zero. This
-| shifted value is at most 64 nonzero bits and is returned in the 'v' field
-| of the 'struct uint64_extra' result. The 64-bit 'extra' field of the result
-| contains a value formed as follows from the bits that were shifted off: The
-| _last_ bit shifted off is the most-significant bit of the 'extra' field, and
-| the other 63 bits of the 'extra' field are all zero if and only if _all_but_
-| _the_last_ bits shifted off were all zero.
-| (This function makes more sense if 'a' and 'extra' are considered to form
-| an unsigned fixed-point number with binary point between 'a' and 'extra'.
-| This fixed-point value is shifted right by the number of bits given in
-| 'dist', and the integer part of this shifted value is returned in the 'v'
-| field of the result. The fractional part of the shifted value is modified
-| as described above and returned in the 'extra' field of the result.)
-*----------------------------------------------------------------------------*/
-inline struct uint64_extra softfloat_shiftRightJam64Extra(uint64_t a, uint64_t extra, uint_fast32_t dist )
-{
- struct uint64_extra z;
- if ( dist < 64 ) {
- z.v = a>>dist;
- //fixed unsigned unary minus: -x == ~x + 1
- z.extra = a<<((~dist + 1) & 63);
- } else {
- z.v = 0;
- z.extra = (dist == 64) ? a : (a != 0);
- }
- z.extra |= (extra != 0);
- return z;
-}
-
-/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
| number of bits given in 'dist', which must not be zero. If any nonzero bits
| are shifted off, they are "jammed" into the least-significant bit of the
@@ -681,7 +624,7 @@ static struct uint128 softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uin
| 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'. The
| addition is modulo 2^128, so any carry out is lost.
*----------------------------------------------------------------------------*/
-inline struct uint128 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+static inline struct uint128 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
struct uint128 z;
z.v0 = a0 + b0;
@@ -694,7 +637,7 @@ inline struct uint128 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64,
| and 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'.
| The subtraction is modulo 2^128, so any borrow out (carry out) is lost.
*----------------------------------------------------------------------------*/
-inline struct uint128 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+static inline struct uint128 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
struct uint128 z;
z.v0 = a0 - b0;
@@ -713,7 +656,7 @@ static struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
-float32_t f32_add( float32_t a, float32_t b )
+static float32_t f32_add( float32_t a, float32_t b )
{
uint_fast32_t uiA = a.v;
uint_fast32_t uiB = b.v;
@@ -725,7 +668,7 @@ float32_t f32_add( float32_t a, float32_t b )
}
}
-float32_t f32_div( float32_t a, float32_t b )
+static float32_t f32_div( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
@@ -823,7 +766,7 @@ float32_t f32_div( float32_t a, float32_t b )
return float32_t::fromRaw(uiZ);
}
-bool f32_eq( float32_t a, float32_t b )
+static bool f32_eq( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
@@ -841,26 +784,7 @@ bool f32_eq( float32_t a, float32_t b )
return (uiA == uiB) || ! (uint32_t) ((uiA | uiB)<<1);
}
-bool f32_eq_signaling( float32_t a, float32_t b )
-{
- uint_fast32_t uiA;
- uint_fast32_t uiB;
-
- uiA = a.v;
- uiB = b.v;
- if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
- raiseFlags( flag_invalid );
- return false;
- }
- return (uiA == uiB) || ! (uint32_t) ((uiA | uiB)<<1);
-}
-
-bool f32_isSignalingNaN( float32_t a )
-{
- return softfloat_isSigNaNF32UI( a.v );
-}
-
-bool f32_le( float32_t a, float32_t b )
+static bool f32_le( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
@@ -879,30 +803,7 @@ bool f32_le( float32_t a, float32_t b )
: (uiA == uiB) || (signA ^ (uiA < uiB));
}
-bool f32_le_quiet( float32_t a, float32_t b )
-{
- uint_fast32_t uiA;
- uint_fast32_t uiB;
- bool signA, signB;
-
- uiA = a.v;
- uiB = b.v;
- if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
- if (
- softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
- ) {
- raiseFlags( flag_invalid );
- }
- return false;
- }
- signA = signF32UI( uiA );
- signB = signF32UI( uiB );
- return
- (signA != signB) ? signA || ! (uint32_t) ((uiA | uiB)<<1)
- : (uiA == uiB) || (signA ^ (uiA < uiB));
-}
-
-bool f32_lt( float32_t a, float32_t b )
+static bool f32_lt( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
@@ -921,30 +822,7 @@ bool f32_lt( float32_t a, float32_t b )
: (uiA != uiB) && (signA ^ (uiA < uiB));
}
-bool f32_lt_quiet( float32_t a, float32_t b )
-{
- uint_fast32_t uiA;
- uint_fast32_t uiB;
- bool signA, signB;
-
- uiA = a.v;
- uiB = b.v;
- if ( isNaNF32UI( uiA ) || isNaNF32UI( uiB ) ) {
- if (
- softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
- ) {
- raiseFlags( flag_invalid );
- }
- return false;
- }
- signA = signF32UI( uiA );
- signB = signF32UI( uiB );
- return
- (signA != signB) ? signA && ((uint32_t) ((uiA | uiB)<<1) != 0)
- : (uiA != uiB) && (signA ^ (uiA < uiB));
-}
-
-float32_t f32_mulAdd( float32_t a, float32_t b, float32_t c )
+static float32_t f32_mulAdd( float32_t a, float32_t b, float32_t c )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
@@ -956,7 +834,7 @@ float32_t f32_mulAdd( float32_t a, float32_t b, float32_t c )
return softfloat_mulAddF32( uiA, uiB, uiC, 0 );
}
-float32_t f32_mul( float32_t a, float32_t b )
+static float32_t f32_mul( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
@@ -1043,7 +921,7 @@ float32_t f32_mul( float32_t a, float32_t b )
return float32_t::fromRaw(uiZ);
}
-float32_t f32_rem( float32_t a, float32_t b )
+static float32_t f32_rem( float32_t a, float32_t b )
{
uint_fast32_t uiA;
bool signA;
@@ -1163,7 +1041,7 @@ float32_t f32_rem( float32_t a, float32_t b )
return float32_t::fromRaw(uiZ);
}
-float32_t f32_roundToInt( float32_t a, uint_fast8_t roundingMode, bool exact )
+static float32_t f32_roundToInt( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
@@ -1227,7 +1105,7 @@ float32_t f32_roundToInt( float32_t a, uint_fast8_t roundingMode, bool exact )
return float32_t::fromRaw(uiZ);
}
-float32_t f32_sqrt( float32_t a )
+static float32_t f32_sqrt( float32_t a )
{
uint_fast32_t uiA;
bool signA;
@@ -1300,7 +1178,7 @@ float32_t f32_sqrt( float32_t a )
return float32_t::fromRaw(uiZ);
}
-float32_t f32_sub( float32_t a, float32_t b )
+static float32_t f32_sub( float32_t a, float32_t b )
{
uint_fast32_t uiA;
uint_fast32_t uiB;
@@ -1314,7 +1192,7 @@ float32_t f32_sub( float32_t a, float32_t b )
}
}
-float64_t f32_to_f64( float32_t a )
+static float64_t f32_to_f64( float32_t a )
{
uint_fast32_t uiA;
bool sign;
@@ -1359,7 +1237,7 @@ float64_t f32_to_f64( float32_t a )
return float64_t::fromRaw(uiZ);
}
-int_fast32_t f32_to_i32( float32_t a, uint_fast8_t roundingMode, bool exact )
+static int_fast32_t f32_to_i32( float32_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast32_t uiA;
bool sign;
@@ -1397,7 +1275,7 @@ int_fast32_t f32_to_i32( float32_t a, uint_fast8_t roundingMode, bool exact )
return softfloat_roundToI32( sign, sig64, roundingMode, exact );
}
-int_fast32_t f32_to_i32_r_minMag( float32_t a, bool exact )
+static int_fast32_t f32_to_i32_r_minMag( float32_t a, bool exact )
{
uint_fast32_t uiA;
int_fast16_t exp;
@@ -1440,254 +1318,7 @@ int_fast32_t f32_to_i32_r_minMag( float32_t a, bool exact )
return sign ? -absZ : absZ;
}
-int_fast64_t f32_to_i64( float32_t a, uint_fast8_t roundingMode, bool exact )
-{
- uint_fast32_t uiA;
- bool sign;
- int_fast16_t exp;
- uint_fast32_t sig;
- int_fast16_t shiftDist;
- uint_fast64_t sig64, extra;
- struct uint64_extra sig64Extra;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- sign = signF32UI( uiA );
- exp = expF32UI( uiA );
- sig = fracF32UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- shiftDist = 0xBE - exp;
- if ( shiftDist < 0 ) {
- raiseFlags( flag_invalid );
- return
- (exp == 0xFF) && sig ? i64_fromNaN
- : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- if ( exp ) sig |= 0x00800000;
- sig64 = (uint_fast64_t) sig<<40;
- extra = 0;
- if ( shiftDist ) {
- sig64Extra = softfloat_shiftRightJam64Extra( sig64, 0, shiftDist );
- sig64 = sig64Extra.v;
- extra = sig64Extra.extra;
- }
- return softfloat_roundToI64( sign, sig64, extra, roundingMode, exact );
-}
-
-int_fast64_t f32_to_i64_r_minMag( float32_t a, bool exact )
-{
- uint_fast32_t uiA;
- int_fast16_t exp;
- uint_fast32_t sig;
- int_fast16_t shiftDist;
- bool sign;
- uint_fast64_t sig64;
- int_fast64_t absZ;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- exp = expF32UI( uiA );
- sig = fracF32UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- shiftDist = 0xBE - exp;
- if ( 64 <= shiftDist ) {
- if ( exact && (exp | sig) ) {
- raiseFlags(flag_inexact);
- }
- return 0;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- sign = signF32UI( uiA );
- if ( shiftDist <= 0 ) {
- if ( uiA == packToF32UI( 1, 0xBE, 0 ) ) {
- return -INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;
- }
- raiseFlags( flag_invalid );
- return
- (exp == 0xFF) && sig ? i64_fromNaN
- : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- sig |= 0x00800000;
- sig64 = (uint_fast64_t) sig<<40;
- absZ = sig64>>shiftDist;
- shiftDist = 40 - shiftDist;
- if ( exact && (shiftDist < 0) && (uint32_t) (sig<<(shiftDist & 31)) ) {
- raiseFlags(flag_inexact);
- }
- return sign ? -absZ : absZ;
-}
-
-uint_fast32_t f32_to_ui32( float32_t a, uint_fast8_t roundingMode, bool exact )
-{
- uint_fast32_t uiA;
- bool sign;
- int_fast16_t exp;
- uint_fast32_t sig;
- uint_fast64_t sig64;
- int_fast16_t shiftDist;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- sign = signF32UI( uiA );
- exp = expF32UI( uiA );
- sig = fracF32UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
-#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
- if ( (exp == 0xFF) && sig ) {
-#if (ui32_fromNaN == ui32_fromPosOverflow)
- sign = 0;
-#elif (ui32_fromNaN == ui32_fromNegOverflow)
- sign = 1;
-#else
- raiseFlags( flag_invalid );
- return ui32_fromNaN;
-#endif
- }
-#endif
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- if ( exp ) sig |= 0x00800000;
- sig64 = (uint_fast64_t) sig<<32;
- shiftDist = 0xAA - exp;
- if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
- return softfloat_roundToUI32( sign, sig64, roundingMode, exact );
-}
-
-uint_fast32_t f32_to_ui32_r_minMag( float32_t a, bool exact )
-{
- uint_fast32_t uiA;
- int_fast16_t exp;
- uint_fast32_t sig;
- int_fast16_t shiftDist;
- bool sign;
- uint_fast32_t z;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- exp = expF32UI( uiA );
- sig = fracF32UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- shiftDist = 0x9E - exp;
- if ( 32 <= shiftDist ) {
- if ( exact && (exp | sig) ) {
- raiseFlags(flag_inexact);
- }
- return 0;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- sign = signF32UI( uiA );
- if ( sign || (shiftDist < 0) ) {
- raiseFlags( flag_invalid );
- return
- (exp == 0xFF) && sig ? ui32_fromNaN
- : sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- sig = (sig | 0x00800000)<<8;
- z = sig>>shiftDist;
- if ( exact && (z<<shiftDist != sig) ) {
- raiseFlags(flag_inexact);
- }
- return z;
-}
-
-uint_fast64_t f32_to_ui64( float32_t a, uint_fast8_t roundingMode, bool exact )
-{
- uint_fast32_t uiA;
- bool sign;
- int_fast16_t exp;
- uint_fast32_t sig;
- int_fast16_t shiftDist;
- uint_fast64_t sig64, extra;
- struct uint64_extra sig64Extra;
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- sign = signF32UI( uiA );
- exp = expF32UI( uiA );
- sig = fracF32UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- shiftDist = 0xBE - exp;
- if ( shiftDist < 0 ) {
- raiseFlags( flag_invalid );
- return
- (exp == 0xFF) && sig ? ui64_fromNaN
- : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- if ( exp ) sig |= 0x00800000;
- sig64 = (uint_fast64_t) sig<<40;
- extra = 0;
- if ( shiftDist ) {
- sig64Extra = softfloat_shiftRightJam64Extra( sig64, 0, shiftDist );
- sig64 = sig64Extra.v;
- extra = sig64Extra.extra;
- }
- return softfloat_roundToUI64( sign, sig64, extra, roundingMode, exact );
-}
-
-uint_fast64_t f32_to_ui64_r_minMag( float32_t a, bool exact )
-{
- uint_fast32_t uiA;
- int_fast16_t exp;
- uint_fast32_t sig;
- int_fast16_t shiftDist;
- bool sign;
- uint_fast64_t sig64, z;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- exp = expF32UI( uiA );
- sig = fracF32UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- shiftDist = 0xBE - exp;
- if ( 64 <= shiftDist ) {
- if ( exact && (exp | sig) ) {
- raiseFlags(flag_inexact);
- }
- return 0;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- sign = signF32UI( uiA );
- if ( sign || (shiftDist < 0) ) {
- raiseFlags( flag_invalid );
- return
- (exp == 0xFF) && sig ? ui64_fromNaN
- : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- sig |= 0x00800000;
- sig64 = (uint_fast64_t) sig<<40;
- z = sig64>>shiftDist;
- shiftDist = 40 - shiftDist;
- if ( exact && (shiftDist < 0) && (uint32_t) (sig<<(shiftDist & 31)) ) {
- raiseFlags(flag_inexact);
- }
- return z;
-}
-
-float64_t f64_add( float64_t a, float64_t b )
+static float64_t f64_add( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
@@ -1705,7 +1336,7 @@ float64_t f64_add( float64_t a, float64_t b )
}
}
-float64_t f64_div( float64_t a, float64_t b )
+static float64_t f64_div( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
@@ -1827,7 +1458,7 @@ float64_t f64_div( float64_t a, float64_t b )
return float64_t::fromRaw(uiZ);
}
-bool f64_eq( float64_t a, float64_t b )
+static bool f64_eq( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
@@ -1845,26 +1476,7 @@ bool f64_eq( float64_t a, float64_t b )
return (uiA == uiB) || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ));
}
-bool f64_eq_signaling( float64_t a, float64_t b )
-{
- uint_fast64_t uiA;
- uint_fast64_t uiB;
-
- uiA = a.v;
- uiB = b.v;
- if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
- raiseFlags( flag_invalid );
- return false;
- }
- return (uiA == uiB) || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ));
-}
-
-bool f64_isSignalingNaN( float64_t a )
-{
- return softfloat_isSigNaNF64UI( a.v );
-}
-
-bool f64_le( float64_t a, float64_t b )
+static bool f64_le( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
@@ -1884,31 +1496,7 @@ bool f64_le( float64_t a, float64_t b )
: (uiA == uiB) || (signA ^ (uiA < uiB));
}
-bool f64_le_quiet( float64_t a, float64_t b )
-{
- uint_fast64_t uiA;
- uint_fast64_t uiB;
- bool signA, signB;
-
- uiA = a.v;
- uiB = b.v;
- if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
- if (
- softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
- ) {
- raiseFlags( flag_invalid );
- }
- return false;
- }
- signA = signF64UI( uiA );
- signB = signF64UI( uiB );
- return
- (signA != signB)
- ? signA || ! ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
- : (uiA == uiB) || (signA ^ (uiA < uiB));
-}
-
-bool f64_lt( float64_t a, float64_t b )
+static bool f64_lt( float64_t a, float64_t b )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
@@ -1928,31 +1516,7 @@ bool f64_lt( float64_t a, float64_t b )
: (uiA != uiB) && (signA ^ (uiA < uiB));
}
-bool f64_lt_quiet( float64_t a, float64_t b )
-{
- uint_fast64_t uiA;
- uint_fast64_t uiB;
- bool signA, signB;
-
- uiA = a.v;
- uiB = b.v;
- if ( isNaNF64UI( uiA ) || isNaNF64UI( uiB ) ) {
- if (
- softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
- ) {
- raiseFlags( flag_invalid );
- }
- return false;
- }
- signA = signF64UI( uiA );
- signB = signF64UI( uiB );
- return
- (signA != signB)
- ? signA && ((uiA | uiB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
- : (uiA != uiB) && (signA ^ (uiA < uiB));
-}
-
-float64_t f64_mulAdd( float64_t a, float64_t b, float64_t c )
+static float64_t f64_mulAdd( float64_t a, float64_t b, float64_t c )
{
uint_fast64_t uiA;
uint_fast64_t uiB;
@@ -1964,7 +1528,7 @@ float64_t f64_mulAdd( float64_t a, float64_t b, float64_t c )
return softfloat_mulAddF64( uiA, uiB, uiC, 0 );
}
-float64_t f64_mul( float64_t a, float64_t b )
+static float64_t f64_mul( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
@@ -2054,7 +1618,7 @@ float64_t f64_mul( float64_t a, float64_t b )
return float64_t::fromRaw(uiZ);
}
-float64_t f64_rem( float64_t a, float64_t b )
+static float64_t f64_rem( float64_t a, float64_t b )
{
uint_fast64_t uiA;
bool signA;
@@ -2190,7 +1754,7 @@ float64_t f64_rem( float64_t a, float64_t b )
return float64_t::fromRaw(uiZ);
}
-float64_t f64_roundToInt( float64_t a, uint_fast8_t roundingMode, bool exact )
+static float64_t f64_roundToInt( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
int_fast16_t exp;
@@ -2254,7 +1818,7 @@ float64_t f64_roundToInt( float64_t a, uint_fast8_t roundingMode, bool exact )
return float64_t::fromRaw(uiZ);
}
-float64_t f64_sqrt( float64_t a )
+static float64_t f64_sqrt( float64_t a )
{
uint_fast64_t uiA;
bool signA;
@@ -2311,273 +1875,94 @@ float64_t f64_sqrt( float64_t a )
if ( expA ) {
sigA <<= 8;
sig32Z >>= 1;
- } else {
- sigA <<= 9;
- }
- rem = sigA - (uint_fast64_t) sig32Z * sig32Z;
- q = ((uint32_t) (rem>>2) * (uint_fast64_t) recipSqrt32)>>32;
- sigZ = ((uint_fast64_t) sig32Z<<32 | 1<<5) + ((uint_fast64_t) q<<3);
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- if ( (sigZ & 0x1FF) < 1<<5 ) {
- sigZ &= ~(uint_fast64_t) 0x3F;
- shiftedSigZ = sigZ>>6;
- rem = (sigA<<52) - shiftedSigZ * shiftedSigZ;
- if ( rem & UINT64_C( 0x8000000000000000 ) ) {
- --sigZ;
- } else {
- if ( rem ) sigZ |= 1;
- }
- }
- return softfloat_roundPackToF64( 0, expZ, sigZ );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- invalid:
- raiseFlags( flag_invalid );
- uiZ = defaultNaNF64UI;
- uiZ:
- return float64_t::fromRaw(uiZ);
-}
-
-float64_t f64_sub( float64_t a, float64_t b )
-{
- uint_fast64_t uiA;
- bool signA;
- uint_fast64_t uiB;
- bool signB;
-
- uiA = a.v;
- signA = signF64UI( uiA );
- uiB = b.v;
- signB = signF64UI( uiB );
-
- if ( signA == signB ) {
- return softfloat_subMagsF64( uiA, uiB, signA );
- } else {
- return softfloat_addMagsF64( uiA, uiB, signA );
- }
-}
-
-float32_t f64_to_f32( float64_t a )
-{
- uint_fast64_t uiA;
- bool sign;
- int_fast16_t exp;
- uint_fast64_t frac;
- struct commonNaN commonNaN;
- uint_fast32_t uiZ, frac32;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- sign = signF64UI( uiA );
- exp = expF64UI( uiA );
- frac = fracF64UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- if ( exp == 0x7FF ) {
- if ( frac ) {
- softfloat_f64UIToCommonNaN( uiA, &commonNaN );
- uiZ = softfloat_commonNaNToF32UI( &commonNaN );
- } else {
- uiZ = packToF32UI( sign, 0xFF, 0 );
- }
- goto uiZ;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- frac32 = (uint_fast32_t)softfloat_shortShiftRightJam64( frac, 22 ); //fixed warning on type cast
- if ( ! (exp | frac32) ) {
- uiZ = packToF32UI( sign, 0, 0 );
- goto uiZ;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- return softfloat_roundPackToF32( sign, exp - 0x381, frac32 | 0x40000000 );
- uiZ:
- return float32_t::fromRaw(uiZ);
-}
-
-int_fast32_t f64_to_i32( float64_t a, uint_fast8_t roundingMode, bool exact )
-{
- uint_fast64_t uiA;
- bool sign;
- int_fast16_t exp;
- uint_fast64_t sig;
- int_fast16_t shiftDist;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- sign = signF64UI( uiA );
- exp = expF64UI( uiA );
- sig = fracF64UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
-#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
- if ( (exp == 0x7FF) && sig ) {
-#if (i32_fromNaN == i32_fromPosOverflow)
- sign = 0;
-#elif (i32_fromNaN == i32_fromNegOverflow)
- sign = 1;
-#else
- raiseFlags( flag_invalid );
- return i32_fromNaN;
-#endif
- }
-#endif
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
- shiftDist = 0x427 - exp;
- if ( 0 < shiftDist ) sig = softfloat_shiftRightJam64( sig, shiftDist );
- return softfloat_roundToI32( sign, sig, roundingMode, exact );
-}
-
-int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )
-{
- uint_fast64_t uiA;
- int_fast16_t exp;
- uint_fast64_t sig;
- int_fast16_t shiftDist;
- bool sign;
- int_fast32_t absZ;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- exp = expF64UI( uiA );
- sig = fracF64UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- shiftDist = 0x433 - exp;
- if ( 53 <= shiftDist ) {
- if ( exact && (exp | sig) ) {
- raiseFlags(flag_inexact);
- }
- return 0;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- sign = signF64UI( uiA );
- if ( shiftDist < 22 ) {
- if (
- sign && (exp == 0x41E) && (sig < UINT64_C( 0x0000000000200000 ))
- ) {
- if ( exact && sig ) {
- raiseFlags(flag_inexact);
- }
- return -0x7FFFFFFF - 1;
- }
- raiseFlags( flag_invalid );
- return
- (exp == 0x7FF) && sig ? i32_fromNaN
- : sign ? i32_fromNegOverflow : i32_fromPosOverflow;
+ } else {
+ sigA <<= 9;
}
+ rem = sigA - (uint_fast64_t) sig32Z * sig32Z;
+ q = ((uint32_t) (rem>>2) * (uint_fast64_t) recipSqrt32)>>32;
+ sigZ = ((uint_fast64_t) sig32Z<<32 | 1<<5) + ((uint_fast64_t) q<<3);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
- sig |= UINT64_C( 0x0010000000000000 );
- absZ = (int_fast32_t)(sig>>shiftDist); //fixed warning on type cast
- if ( exact && ((uint_fast64_t) (uint_fast32_t) absZ<<shiftDist != sig) ) {
- raiseFlags(flag_inexact);
+ if ( (sigZ & 0x1FF) < 1<<5 ) {
+ sigZ &= ~(uint_fast64_t) 0x3F;
+ shiftedSigZ = sigZ>>6;
+ rem = (sigA<<52) - shiftedSigZ * shiftedSigZ;
+ if ( rem & UINT64_C( 0x8000000000000000 ) ) {
+ --sigZ;
+ } else {
+ if ( rem ) sigZ |= 1;
+ }
}
- return sign ? -absZ : absZ;
+ return softfloat_roundPackToF64( 0, expZ, sigZ );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ invalid:
+ raiseFlags( flag_invalid );
+ uiZ = defaultNaNF64UI;
+ uiZ:
+ return float64_t::fromRaw(uiZ);
}
-int_fast64_t f64_to_i64( float64_t a, uint_fast8_t roundingMode, bool exact )
+static float64_t f64_sub( float64_t a, float64_t b )
{
uint_fast64_t uiA;
- bool sign;
- int_fast16_t exp;
- uint_fast64_t sig;
- int_fast16_t shiftDist;
- struct uint64_extra sigExtra;
+ bool signA;
+ uint_fast64_t uiB;
+ bool signB;
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
uiA = a.v;
- sign = signF64UI( uiA );
- exp = expF64UI( uiA );
- sig = fracF64UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
- shiftDist = 0x433 - exp;
- if ( shiftDist <= 0 ) {
- if ( shiftDist < -11 ) goto invalid;
- sigExtra.v = sig<<-shiftDist;
- sigExtra.extra = 0;
+ signA = signF64UI( uiA );
+ uiB = b.v;
+ signB = signF64UI( uiB );
+
+ if ( signA == signB ) {
+ return softfloat_subMagsF64( uiA, uiB, signA );
} else {
- sigExtra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
+ return softfloat_addMagsF64( uiA, uiB, signA );
}
- return
- softfloat_roundToI64(
- sign, sigExtra.v, sigExtra.extra, roundingMode, exact );
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- invalid:
- raiseFlags( flag_invalid );
- return
- (exp == 0x7FF) && fracF64UI( uiA ) ? i64_fromNaN
- : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
}
-int_fast64_t f64_to_i64_r_minMag( float64_t a, bool exact )
+static float32_t f64_to_f32( float64_t a )
{
uint_fast64_t uiA;
bool sign;
int_fast16_t exp;
- uint_fast64_t sig;
- int_fast16_t shiftDist;
- int_fast64_t absZ;
+ uint_fast64_t frac;
+ struct commonNaN commonNaN;
+ uint_fast32_t uiZ, frac32;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiA = a.v;
sign = signF64UI( uiA );
exp = expF64UI( uiA );
- sig = fracF64UI( uiA );
+ frac = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
- shiftDist = 0x433 - exp;
- if ( shiftDist <= 0 ) {
- /*--------------------------------------------------------------------
- *--------------------------------------------------------------------*/
- if ( shiftDist < -10 ) {
- if ( uiA == packToF64UI( 1, 0x43E, 0 ) ) {
- return -INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;
- }
- raiseFlags( flag_invalid );
- return
- (exp == 0x7FF) && sig ? i64_fromNaN
- : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
- }
- /*--------------------------------------------------------------------
- *--------------------------------------------------------------------*/
- sig |= UINT64_C( 0x0010000000000000 );
- absZ = sig<<-shiftDist;
- } else {
- /*--------------------------------------------------------------------
- *--------------------------------------------------------------------*/
- if ( 53 <= shiftDist ) {
- if ( exact && (exp | sig) ) {
- raiseFlags(flag_inexact);
- }
- return 0;
- }
- /*--------------------------------------------------------------------
- *--------------------------------------------------------------------*/
- sig |= UINT64_C( 0x0010000000000000 );
- absZ = sig>>shiftDist;
- if ( exact && (absZ<<shiftDist != (int_fast64_t)sig) ) {
- raiseFlags(flag_inexact);
+ if ( exp == 0x7FF ) {
+ if ( frac ) {
+ softfloat_f64UIToCommonNaN( uiA, &commonNaN );
+ uiZ = softfloat_commonNaNToF32UI( &commonNaN );
+ } else {
+ uiZ = packToF32UI( sign, 0xFF, 0 );
}
+ goto uiZ;
}
- return sign ? -absZ : absZ;
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ frac32 = (uint_fast32_t)softfloat_shortShiftRightJam64( frac, 22 ); //fixed warning on type cast
+ if ( ! (exp | frac32) ) {
+ uiZ = packToF32UI( sign, 0, 0 );
+ goto uiZ;
+ }
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ return softfloat_roundPackToF32( sign, exp - 0x381, frac32 | 0x40000000 );
+ uiZ:
+ return float32_t::fromRaw(uiZ);
}
-uint_fast32_t f64_to_ui32( float64_t a, uint_fast8_t roundingMode, bool exact )
+static int_fast32_t f64_to_i32( float64_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast64_t uiA;
bool sign;
@@ -2593,15 +1978,15 @@ uint_fast32_t f64_to_ui32( float64_t a, uint_fast8_t roundingMode, bool exact )
sig = fracF64UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
-#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
+#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
if ( (exp == 0x7FF) && sig ) {
-#if (ui32_fromNaN == ui32_fromPosOverflow)
+#if (i32_fromNaN == i32_fromPosOverflow)
sign = 0;
-#elif (ui32_fromNaN == ui32_fromNegOverflow)
+#elif (i32_fromNaN == i32_fromNegOverflow)
sign = 1;
#else
raiseFlags( flag_invalid );
- return ui32_fromNaN;
+ return i32_fromNaN;
#endif
}
#endif
@@ -2610,17 +1995,17 @@ uint_fast32_t f64_to_ui32( float64_t a, uint_fast8_t roundingMode, bool exact )
if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
shiftDist = 0x427 - exp;
if ( 0 < shiftDist ) sig = softfloat_shiftRightJam64( sig, shiftDist );
- return softfloat_roundToUI32( sign, sig, roundingMode, exact );
+ return softfloat_roundToI32( sign, sig, roundingMode, exact );
}
-uint_fast32_t f64_to_ui32_r_minMag( float64_t a, bool exact )
+static int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )
{
uint_fast64_t uiA;
int_fast16_t exp;
uint_fast64_t sig;
int_fast16_t shiftDist;
bool sign;
- uint_fast32_t z;
+ int_fast32_t absZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
@@ -2639,108 +2024,31 @@ uint_fast32_t f64_to_ui32_r_minMag( float64_t a, bool exact )
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sign = signF64UI( uiA );
- if ( sign || (shiftDist < 21) ) {
+ if ( shiftDist < 22 ) {
+ if (
+ sign && (exp == 0x41E) && (sig < UINT64_C( 0x0000000000200000 ))
+ ) {
+ if ( exact && sig ) {
+ raiseFlags(flag_inexact);
+ }
+ return -0x7FFFFFFF - 1;
+ }
raiseFlags( flag_invalid );
return
- (exp == 0x7FF) && sig ? ui32_fromNaN
- : sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
+ (exp == 0x7FF) && sig ? i32_fromNaN
+ : sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig |= UINT64_C( 0x0010000000000000 );
- z = (uint_fast32_t)(sig>>shiftDist); //fixed warning on type cast
- if ( exact && ((uint_fast64_t) z<<shiftDist != sig) ) {
+ absZ = (int_fast32_t)(sig>>shiftDist); //fixed warning on type cast
+ if ( exact && ((uint_fast64_t) (uint_fast32_t) absZ<<shiftDist != sig) ) {
raiseFlags(flag_inexact);
}
- return z;
-}
-
-uint_fast64_t f64_to_ui64( float64_t a, uint_fast8_t roundingMode, bool exact )
-{
- uint_fast64_t uiA;
- bool sign;
- int_fast16_t exp;
- uint_fast64_t sig;
- int_fast16_t shiftDist;
- struct uint64_extra sigExtra;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- sign = signF64UI( uiA );
- exp = expF64UI( uiA );
- sig = fracF64UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- if ( exp ) sig |= UINT64_C( 0x0010000000000000 );
- shiftDist = 0x433 - exp;
- if ( shiftDist <= 0 ) {
- if ( shiftDist < -11 ) goto invalid;
- sigExtra.v = sig<<-shiftDist;
- sigExtra.extra = 0;
- } else {
- sigExtra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
- }
- return
- softfloat_roundToUI64(
- sign, sigExtra.v, sigExtra.extra, roundingMode, exact );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- invalid:
- raiseFlags( flag_invalid );
- return
- (exp == 0x7FF) && fracF64UI( uiA ) ? ui64_fromNaN
- : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
-}
-
-uint_fast64_t f64_to_ui64_r_minMag( float64_t a, bool exact )
-{
- uint_fast64_t uiA;
- int_fast16_t exp;
- uint_fast64_t sig;
- int_fast16_t shiftDist;
- bool sign;
- uint_fast64_t z;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- uiA = a.v;
- exp = expF64UI( uiA );
- sig = fracF64UI( uiA );
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- shiftDist = 0x433 - exp;
- if ( 53 <= shiftDist ) {
- if ( exact && (exp | sig) ) {
- raiseFlags(flag_inexact);
- }
- return 0;
- }
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- sign = signF64UI( uiA );
- if ( sign ) goto invalid;
- if ( shiftDist <= 0 ) {
- if ( shiftDist < -11 ) goto invalid;
- z = (sig | UINT64_C( 0x0010000000000000 ))<<-shiftDist;
- } else {
- sig |= UINT64_C( 0x0010000000000000 );
- z = sig>>shiftDist;
- if ( exact && (uint64_t) (sig<<(-shiftDist & 63)) ) {
- raiseFlags(flag_inexact);
- }
- }
- return z;
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- invalid:
- raiseFlags( flag_invalid );
- return
- (exp == 0x7FF) && sig ? ui64_fromNaN
- : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
+ return sign ? -absZ : absZ;
}
-float32_t i32_to_f32( int32_t a )
+static float32_t i32_to_f32( int32_t a )
{
bool sign;
uint_fast32_t absA;
@@ -2754,7 +2062,7 @@ float32_t i32_to_f32( int32_t a )
return softfloat_normRoundPackToF32( sign, 0x9C, absA );
}
-float64_t i32_to_f64( int32_t a )
+static float64_t i32_to_f64( int32_t a )
{
uint_fast64_t uiZ;
bool sign;
@@ -2775,7 +2083,7 @@ float64_t i32_to_f64( int32_t a )
return float64_t::fromRaw(uiZ);
}
-float32_t i64_to_f32( int64_t a )
+static float32_t i64_to_f32( int64_t a )
{
bool sign;
uint_fast64_t absA;
@@ -2798,7 +2106,7 @@ float32_t i64_to_f32( int64_t a )
}
}
-float64_t i64_to_f64( int64_t a )
+static float64_t i64_to_f64( int64_t a )
{
bool sign;
uint_fast64_t absA;
@@ -2812,7 +2120,7 @@ float64_t i64_to_f64( int64_t a )
return softfloat_normRoundPackToF64( sign, 0x43C, absA );
}
-float32_t softfloat_addMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
+static float32_t softfloat_addMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
{
int_fast16_t expA;
uint_fast32_t sigA;
@@ -2893,7 +2201,7 @@ float32_t softfloat_addMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
return float32_t::fromRaw(uiZ);
}
-float64_t
+static float64_t
softfloat_addMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
{
int_fast16_t expA;
@@ -2976,7 +2284,7 @@ float64_t
return float64_t::fromRaw(uiZ);
}
-uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a )
+static uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a )
{
int index;
uint16_t eps, r0;
@@ -3006,7 +2314,7 @@ uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a )
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
-uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
+static uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
{
return (uint_fast32_t) aPtr->sign<<31 | 0x7FC00000 | aPtr->v64>>41;
}
@@ -3015,14 +2323,14 @@ uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
-uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr )
+static uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr )
{
return
(uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FF8000000000000 )
| aPtr->v64>>12;
}
-uint_fast8_t softfloat_countLeadingZeros64( uint64_t a )
+static uint_fast8_t softfloat_countLeadingZeros64( uint64_t a )
{
uint_fast8_t count;
uint32_t a32;
@@ -3054,7 +2362,7 @@ uint_fast8_t softfloat_countLeadingZeros64( uint64_t a )
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
-void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr )
+static void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr )
{
if ( softfloat_isSigNaNF32UI( uiA ) ) {
raiseFlags( flag_invalid );
@@ -3070,7 +2378,7 @@ void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr )
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
-void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
+static void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
{
if ( softfloat_isSigNaNF64UI( uiA ) ) {
raiseFlags( flag_invalid );
@@ -3080,7 +2388,7 @@ void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
zPtr->v0 = 0;
}
-struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
+static struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
{
uint32_t a32, a0, b32, b0;
struct uint128 z;
@@ -3101,7 +2409,7 @@ struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
return z;
}
-float32_t
+static float32_t
softfloat_mulAddF32(
uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast8_t op )
{
@@ -3279,7 +2587,7 @@ float32_t
return float32_t::fromRaw(uiZ);
}
-float64_t
+static float64_t
softfloat_mulAddF64(
uint_fast64_t uiA, uint_fast64_t uiB, uint_fast64_t uiC, uint_fast8_t op )
{
@@ -3475,7 +2783,7 @@ float64_t
return float64_t::fromRaw(uiZ);
}
-float32_t
+static float32_t
softfloat_normRoundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
{
int_fast8_t shiftDist;
@@ -3489,7 +2797,7 @@ float32_t
}
}
-float64_t
+static float64_t
softfloat_normRoundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
{
int_fast8_t shiftDist;
@@ -3503,7 +2811,7 @@ float64_t
}
}
-struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )
+static struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )
{
int_fast8_t shiftDist;
struct exp16_sig32 z;
@@ -3514,7 +2822,7 @@ struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )
return z;
}
-struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
+static struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
{
int_fast8_t shiftDist;
struct exp16_sig64 z;
@@ -3531,7 +2839,7 @@ struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
-uint_fast32_t
+static uint_fast32_t
softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB )
{
bool isSigNaNA;
@@ -3550,7 +2858,7 @@ uint_fast32_t
| the combined NaN result. If either `uiA' or `uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
-uint_fast64_t
+static uint_fast64_t
softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB )
{
bool isSigNaNA;
@@ -3563,7 +2871,7 @@ uint_fast64_t
return (isNaNF64UI( uiA ) ? uiA : uiB) | UINT64_C( 0x0008000000000000 );
}
-float32_t
+static float32_t
softfloat_roundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
{
uint_fast8_t roundingMode;
@@ -3629,7 +2937,7 @@ float32_t
return float32_t::fromRaw(uiZ);
}
-float64_t
+static float64_t
softfloat_roundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
{
uint_fast8_t roundingMode;
@@ -3699,7 +3007,7 @@ float64_t
return float64_t::fromRaw(uiZ);
}
-int_fast32_t
+static int_fast32_t
softfloat_roundToI32(
bool sign, uint_fast64_t sig, uint_fast8_t roundingMode, bool exact )
{
@@ -3740,130 +3048,7 @@ int_fast32_t
return sign ? i32_fromNegOverflow : i32_fromPosOverflow;
}
-int_fast64_t
- softfloat_roundToI64(
- bool sign,
- uint_fast64_t sig,
- uint_fast64_t sigExtra,
- uint_fast8_t roundingMode,
- bool exact
- )
-{
- bool roundNearEven, doIncrement;
- union { uint64_t ui; int64_t i; } uZ;
- int_fast64_t z;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- roundNearEven = (roundingMode == round_near_even);
- doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
- if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
- doIncrement =
- (roundingMode
- == (sign ? round_min : round_max))
- && sigExtra;
- }
- if ( doIncrement ) {
- ++sig;
- if ( ! sig ) goto invalid;
- sig &=
- ~(uint_fast64_t)
- (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
- & roundNearEven);
- }
- //fixed unsigned unary minus: -x == ~x + 1
- uZ.ui = sign ? (~sig + 1) : sig;
- z = uZ.i;
- if ( z && ((z < 0) ^ sign) ) goto invalid;
- if ( exact && sigExtra ) {
- raiseFlags(flag_inexact);
- }
- return z;
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- invalid:
- raiseFlags( flag_invalid );
- return sign ? i64_fromNegOverflow : i64_fromPosOverflow;
-}
-
-uint_fast32_t
- softfloat_roundToUI32(
- bool sign, uint_fast64_t sig, uint_fast8_t roundingMode, bool exact )
-{
- bool roundNearEven;
- uint_fast16_t roundIncrement, roundBits;
- uint_fast32_t z;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- roundNearEven = (roundingMode == round_near_even);
- roundIncrement = 0x800;
- if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
- roundIncrement =
- (roundingMode
- == (sign ? round_min : round_max))
- ? 0xFFF
- : 0;
- }
- roundBits = sig & 0xFFF;
- sig += roundIncrement;
- if ( sig & UINT64_C( 0xFFFFF00000000000 ) ) goto invalid;
- z = (uint_fast32_t)(sig>>12); //fixed warning on type cast
- z &= ~(uint_fast32_t) (! (roundBits ^ 0x800) & roundNearEven);
- if ( sign && z ) goto invalid;
- if ( exact && roundBits ) {
- raiseFlags(flag_inexact);
- }
- return z;
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- invalid:
- raiseFlags( flag_invalid );
- return sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
-}
-
-uint_fast64_t
- softfloat_roundToUI64(
- bool sign,
- uint_fast64_t sig,
- uint_fast64_t sigExtra,
- uint_fast8_t roundingMode,
- bool exact
- )
-{
- bool roundNearEven, doIncrement;
-
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- roundNearEven = (roundingMode == round_near_even);
- doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
- if ( ! roundNearEven && (roundingMode != round_near_maxMag) ) {
- doIncrement =
- (roundingMode
- == (sign ? round_min : round_max))
- && sigExtra;
- }
- if ( doIncrement ) {
- ++sig;
- if ( ! sig ) goto invalid;
- sig &=
- ~(uint_fast64_t)
- (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
- & roundNearEven);
- }
- if ( sign && sig ) goto invalid;
- if ( exact && sigExtra ) {
- raiseFlags(flag_inexact);
- }
- return sig;
- /*------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- invalid:
- raiseFlags( flag_invalid );
- return sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
-}
-
-struct uint128
+static struct uint128
softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
{
uint_fast8_t u8NegDist;
@@ -3888,7 +3073,7 @@ struct uint128
return z;
}
-float32_t softfloat_subMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
+static float32_t softfloat_subMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
{
int_fast16_t expA;
uint_fast32_t sigA;
@@ -3985,7 +3170,7 @@ float32_t softfloat_subMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
return float32_t::fromRaw(uiZ);
}
-float64_t
+static float64_t
softfloat_subMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
{
int_fast16_t expA;
@@ -4080,7 +3265,7 @@ float64_t
return float64_t::fromRaw(uiZ);
}
-float32_t ui32_to_f32( uint32_t a )
+static float32_t ui32_to_f32( uint32_t a )
{
if ( ! a ) {
return float32_t::fromRaw(0);
@@ -4092,7 +3277,7 @@ float32_t ui32_to_f32( uint32_t a )
}
}
-float64_t ui32_to_f64( uint32_t a )
+static float64_t ui32_to_f64( uint32_t a )
{
uint_fast64_t uiZ;
int_fast8_t shiftDist;
@@ -4107,7 +3292,7 @@ float64_t ui32_to_f64( uint32_t a )
return float64_t::fromRaw(uiZ);
}
-float32_t ui64_to_f32( uint64_t a )
+static float32_t ui64_to_f32( uint64_t a )
{
int_fast8_t shiftDist;
uint_fast32_t sig;
@@ -4124,7 +3309,7 @@ float32_t ui64_to_f32( uint64_t a )
}
}
-float64_t ui64_to_f64( uint64_t a )
+static float64_t ui64_to_f64( uint64_t a )
{
if ( ! a ) {
return float64_t::fromRaw(0);
@@ -4222,7 +3407,7 @@ static const float64_t exp_prescale = float64_t::fromRaw(0x3ff71547652b82fe) * f
static const float64_t exp_postscale = float64_t::one()/float64_t(1 << EXPTAB_SCALE);
static const float64_t exp_max_val(3000*(1 << EXPTAB_SCALE)); // log10(DBL_MAX) < 3000
-float32_t f32_exp( float32_t x)
+static float32_t f32_exp( float32_t x)
{
//special cases
if(x.isNaN()) return float32_t::nan();
@@ -4250,7 +3435,7 @@ float32_t f32_exp( float32_t x)
return (buf * EXPPOLY_32F_A0 * float64_t::fromRaw(expTab[val0 & EXPTAB_MASK]) * ((((x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4));
}
-float64_t f64_exp(float64_t x)
+static float64_t f64_exp(float64_t x)
{
//special cases
if(x.isNaN()) return float64_t::nan();
@@ -4550,7 +3735,7 @@ static const uint64_t CV_DECL_ALIGNED(16) icvLogTab[] = {
// 0.69314718055994530941723212145818
static const float64_t ln_2 = float64_t::fromRaw(0x3fe62e42fefa39ef);
-float32_t f32_log(float32_t x)
+static float32_t f32_log(float32_t x)
{
//special cases
if(x.isNaN() || x < float32_t::zero()) return float32_t::nan();
@@ -4574,7 +3759,7 @@ float32_t f32_log(float32_t x)
return y0;
}
-float64_t f64_log(float64_t x)
+static float64_t f64_log(float64_t x)
{
//special cases
if(x.isNaN() || x < float64_t::zero()) return float64_t::nan();
@@ -4612,7 +3797,7 @@ float64_t f64_log(float64_t x)
Fast cube root by Ken Turkowski
(http://www.worldserver.com/turk/computergraphics/papers.html)
\* ************************************************************************** */
-float32_t f32_cbrt(float32_t x)
+static float32_t f32_cbrt(float32_t x)
{
//special cases
if (x.isNaN()) return float32_t::nan();
@@ -4649,9 +3834,9 @@ float32_t f32_cbrt(float32_t x)
/// POW functions ///
-float32_t f32_pow( float32_t x, float32_t y)
+static float32_t f32_pow( float32_t x, float32_t y)
{
- const float32_t zero = float32_t::zero(), one = float32_t::one(), inf = float32_t::inf(), nan = float32_t::nan();
+ static const float32_t zero = float32_t::zero(), one = float32_t::one(), inf = float32_t::inf(), nan = float32_t::nan();
bool xinf = x.isInf(), yinf = y.isInf(), xnan = x.isNaN(), ynan = y.isNaN();
float32_t ax = abs(x);
bool useInf = (y > zero) == (ax > one);
@@ -4676,9 +3861,9 @@ float32_t f32_pow( float32_t x, float32_t y)
return v;
}
-float64_t f64_pow( float64_t x, float64_t y)
+static float64_t f64_pow( float64_t x, float64_t y)
{
- const float64_t zero = float64_t::zero(), one = float64_t::one(), inf = float64_t::inf(), nan = float64_t::nan();
+ static const float64_t zero = float64_t::zero(), one = float64_t::one(), inf = float64_t::inf(), nan = float64_t::nan();
bool xinf = x.isInf(), yinf = y.isInf(), xnan = x.isNaN(), ynan = y.isNaN();
float64_t ax = abs(x);
bool useInf = (y > zero) == (ax > one);
@@ -4705,7 +3890,7 @@ float64_t f64_pow( float64_t x, float64_t y)
// These functions are for internal use only
-float32_t f32_powi( float32_t x, int y)
+static float32_t f32_powi( float32_t x, int y)
{
float32_t v;
//special case: (0 ** 0) == 1
@@ -4731,7 +3916,7 @@ float32_t f32_powi( float32_t x, int y)
return v;
}
-float64_t f64_powi( float64_t x, int y)
+static float64_t f64_powi( float64_t x, int y)
{
float64_t v;
//special case: (0 ** 0) == 1
diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp
index b40726d..283727b 100644
--- a/modules/imgproc/src/color.cpp
+++ b/modules/imgproc/src/color.cpp
@@ -94,6 +94,8 @@
#include "opencl_kernels_imgproc.hpp"
#include <limits>
#include "hal_replacement.hpp"
+#include "opencv2/core/hal/intrin.hpp"
+#include "opencv2/core/softfloat.hpp"
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
@@ -138,6 +140,8 @@ const int CB2GI = -5636;
const int CR2GI = -11698;
const int CR2RI = 22987;
+static const double _1_3 = 0.333333333333;
+static const float _1_3f = static_cast<float>(_1_3);
// computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n
template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
@@ -164,6 +168,32 @@ template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab)
}
}
+static void splineBuild(const softfloat* f, int n, float* tab)
+{
+ const softfloat f2(2), f3(3), f4(4);
+ softfloat cn(0);
+ softfloat* sftab = reinterpret_cast<softfloat*>(tab);
+ int i;
+ tab[0] = tab[1] = 0.0f;
+
+ for(i = 1; i < n-1; i++)
+ {
+ softfloat t = (f[i+1] - f[i]*f2 + f[i-1])*f3;
+ softfloat l = softfloat::one()/(f4 - sftab[(i-1)*4]);
+ sftab[i*4] = l; sftab[i*4+1] = (t - sftab[(i-1)*4+1])*l;
+ }
+
+ for(i = n-1; i >= 0; i--)
+ {
+ softfloat c = sftab[i*4+1] - sftab[i*4]*cn;
+ softfloat b = f[i+1] - f[i] - (cn + c*f2)/f3;
+ softfloat d = (cn - c)/f3;
+ sftab[i*4] = f[i]; sftab[i*4+1] = b;
+ sftab[i*4+2] = c; sftab[i*4+3] = d;
+ cn = c;
+ }
+}
+
// interpolates value of a function at x, 0 <= x <= n using a cubic spline.
template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n)
{
@@ -3454,7 +3484,7 @@ static const float sRGB2XYZ_D65[] =
static const float XYZ2sRGB_D65[] =
{
3.240479f, -1.53715f, -0.498535f,
- -0.969256f, 1.875991f, 0.041556f,
+ -0.969256f, 1.875991f, 0.041556f,
0.055648f, -0.204043f, 1.057311f
};
@@ -5781,19 +5811,20 @@ struct HLS2RGB_b
#endif
};
-
///////////////////////////////////// RGB <-> L*a*b* /////////////////////////////////////
static const float D65[] = { 0.950456f, 1.f, 1.088754f };
enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 };
static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4];
-static const float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f;
+static const float LabCbrtTabScale = softfloat(LAB_CBRT_TAB_SIZE*2)/softfloat(3);
static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4];
-static const float GammaTabScale = (float)GAMMA_TAB_SIZE;
+static const float GammaTabScale((int)GAMMA_TAB_SIZE);
static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];
+enum { inv_gamma_shift = 12, INV_GAMMA_TAB_SIZE = (1 << inv_gamma_shift) };
+static ushort sRGBInvGammaTab_b[INV_GAMMA_TAB_SIZE], linearInvGammaTab_b[INV_GAMMA_TAB_SIZE];
#undef lab_shift
#define lab_shift xyz_shift
#define gamma_shift 3
@@ -5801,46 +5832,150 @@ static ushort sRGBGammaTab_b[256], linearGammaTab_b[256];
#define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift))
static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B];
+static const bool enableBitExactness = true;
+static const bool enablePackedLab = true;
+enum
+{
+ lab_base_shift = 14,
+ LAB_BASE = (1 << lab_base_shift),
+};
+static ushort LabToYF_b[256*2];
+static const int minABvalue = -8145;
+static int abToXZ_b[LAB_BASE*9/4];
+
+#define clip(value) \
+ value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
+
+//all constants should be presented through integers to keep bit-exactness
+static const softdouble gammaThreshold = softdouble(809)/softdouble(20000); // 0.04045
+static const softdouble gammaInvThreshold = softdouble(7827)/softdouble(2500000); // 0.0031308
+static const softdouble gammaLowScale = softdouble(323)/softdouble(25); // 12.92
+static const softdouble gammaPower = softdouble(12)/softdouble(5); // 2.4
+static const softdouble gammaXshift = softdouble(11)/softdouble(200); // 0.055
+
+static inline softfloat applyGamma(softfloat x)
+{
+ //return x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4);
+ softdouble xd = x;
+ return (xd <= gammaThreshold ?
+ xd/gammaLowScale :
+ pow((xd + gammaXshift)/(softdouble::one()+gammaXshift), gammaPower));
+}
+
+static inline softfloat applyInvGamma(softfloat x)
+{
+ //return x <= 0.0031308 ? x*12.92f : (float)(1.055*std::pow((double)x, 1./2.4) - 0.055);
+ softdouble xd = x;
+ return (xd <= gammaInvThreshold ?
+ xd*gammaLowScale :
+ pow(xd, softdouble::one()/gammaPower)*(softdouble::one()+gammaXshift) - gammaXshift);
+}
+
static void initLabTabs()
{
static bool initialized = false;
if(!initialized)
{
- float f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1], scale = 1.f/LabCbrtTabScale;
+ static const softfloat lthresh = softfloat(216) / softfloat(24389); // 0.008856f = (6/29)^3
+ static const softfloat lscale = softfloat(841) / softfloat(108); // 7.787f = (29/3)^3/(29*4)
+ static const softfloat lbias = softfloat(16) / softfloat(116);
+ static const softfloat f255(255);
+
+ softfloat f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1];
+ softfloat scale = softfloat::one()/softfloat(LabCbrtTabScale);
int i;
for(i = 0; i <= LAB_CBRT_TAB_SIZE; i++)
{
- float x = i*scale;
- f[i] = x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x);
+ softfloat x = scale*softfloat(i);
+ f[i] = x < lthresh ? mulAdd(x, lscale, lbias) : cbrt(x);
}
splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab);
- scale = 1.f/GammaTabScale;
+ scale = softfloat::one()/softfloat(GammaTabScale);
for(i = 0; i <= GAMMA_TAB_SIZE; i++)
{
- float x = i*scale;
- g[i] = x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4);
- ig[i] = x <= 0.0031308 ? x*12.92f : (float)(1.055*std::pow((double)x, 1./2.4) - 0.055);
+ softfloat x = scale*softfloat(i);
+ g[i] = applyGamma(x);
+ ig[i] = applyInvGamma(x);
}
splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab);
splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab);
+ static const softfloat intScale(255*(1 << gamma_shift));
for(i = 0; i < 256; i++)
{
- float x = i*(1.f/255.f);
- sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4)));
+ softfloat x = softfloat(i)/f255;
+ sRGBGammaTab_b[i] = (ushort)(cvRound(intScale*applyGamma(x)));
linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift));
}
+ static const softfloat invScale = softfloat::one()/softfloat((int)INV_GAMMA_TAB_SIZE);
+ for(i = 0; i < INV_GAMMA_TAB_SIZE; i++)
+ {
+ softfloat x = invScale*softfloat(i);
+ sRGBInvGammaTab_b[i] = (ushort)(cvRound(f255*applyInvGamma(x)));
+ linearInvGammaTab_b[i] = (ushort)(cvTrunc(f255*x));
+ }
+ static const softfloat cbTabScale(softfloat::one()/(f255*(1 << gamma_shift)));
+ static const softfloat lshift2(1 << lab_shift2);
for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++)
{
- float x = i*(1.f/(255.f*(1 << gamma_shift)));
- LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x)));
+ softfloat x = cbTabScale*softfloat(i);
+ LabCbrtTab_b[i] = (ushort)(cvRound(lshift2 * (x < lthresh ? mulAdd(x, lscale, lbias) : cbrt(x))));
}
+
+ //Lookup table for L to y and ify calculations
+ static const int BASE = (1 << 14);
+ for(i = 0; i < 256; i++)
+ {
+ int y, ify;
+ //8 * 255.0 / 100.0 == 20.4
+ if( i <= 20)
+ {
+ //yy = li / 903.3f;
+ //y = L*100/903.3f; 903.3f = (29/3)^3, 255 = 17*3*5
+ y = cvRound(softfloat(i*BASE*20*9)/softfloat(17*29*29*29));
+ //fy = 7.787f * yy + 16.0f / 116.0f; 7.787f = (29/3)^3/(29*4)
+ ify = cvRound(softfloat(BASE)*(softfloat(16)/softfloat(116) + softfloat(i*5)/softfloat(3*17*29)));
+ }
+ else
+ {
+ //fy = (li + 16.0f) / 116.0f;
+ softfloat fy = (softfloat(i*100*BASE)/softfloat(255*116) +
+ softfloat(16*BASE)/softfloat(116));
+ ify = cvRound(fy);
+ //yy = fy * fy * fy;
+ y = cvRound(fy*fy*fy/softfloat(BASE*BASE));
+ }
+
+ LabToYF_b[i*2 ] = (ushort)y; // 2260 <= y <= BASE
+ LabToYF_b[i*2+1] = (ushort)ify; // 0 <= ify <= BASE
+ }
+
+ //Lookup table for a,b to x,z conversion
+ for(i = minABvalue; i < LAB_BASE*9/4+minABvalue; i++)
+ {
+ int v;
+ //6.f/29.f*BASE = 3389.730
+ if(i <= 3390)
+ {
+ //fxz[k] = (fxz[k] - 16.0f / 116.0f) / 7.787f;
+ // 7.787f = (29/3)^3/(29*4)
+ v = i*108/841 - BASE*16/116*108/841;
+ }
+ else
+ {
+ //fxz[k] = fxz[k] * fxz[k] * fxz[k];
+ v = i*i/BASE*i/BASE;
+ }
+ abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231
+ }
+
initialized = true;
}
}
+
struct RGB2Lab_b
{
typedef uchar channel_type;
@@ -5857,21 +5992,15 @@ struct RGB2Lab_b
if (!_whitept)
_whitept = D65;
- float scale[] =
- {
- (1 << lab_shift)/_whitept[0],
- (float)(1 << lab_shift),
- (1 << lab_shift)/_whitept[2]
- };
-
+ static const softfloat lshift(1 << lab_shift);
for( int i = 0; i < _3; i++ )
{
- coeffs[i*3+(blueIdx^2)] = cvRound(_coeffs[i*3]*scale[i]);
- coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
- coeffs[i*3+blueIdx] = cvRound(_coeffs[i*3+2]*scale[i]);
+ coeffs[i*3+(blueIdx^2)] = cvRound((lshift*softfloat(_coeffs[i*3 ]))/softfloat(_whitept[i]));
+ coeffs[i*3+1] = cvRound((lshift*softfloat(_coeffs[i*3+1]))/softfloat(_whitept[i]));
+ coeffs[i*3+blueIdx] = cvRound((lshift*softfloat(_coeffs[i*3+2]))/softfloat(_whitept[i]));
- CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
- coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
+ CV_Assert(coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
+ coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift));
}
}
@@ -5886,7 +6015,8 @@ struct RGB2Lab_b
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
n *= 3;
- for( i = 0; i < n; i += 3, src += scn )
+ i = 0;
+ for(; i < n; i += 3, src += scn )
{
int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]];
int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)];
@@ -5909,9 +6039,6 @@ struct RGB2Lab_b
};
-#define clip(value) \
- value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value;
-
struct RGB2Lab_f
{
typedef float channel_type;
@@ -5928,17 +6055,22 @@ struct RGB2Lab_f
if (!_whitept)
_whitept = D65;
- float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
+ softfloat scale[] = { softfloat::one() / softfloat(_whitept[0]),
+ softfloat::one(),
+ softfloat::one() / softfloat(_whitept[2]) };
for( int i = 0; i < _3; i++ )
{
int j = i * 3;
- coeffs[j + (blueIdx ^ 2)] = _coeffs[j] * scale[i];
- coeffs[j + 1] = _coeffs[j + 1] * scale[i];
- coeffs[j + blueIdx] = _coeffs[j + 2] * scale[i];
+ softfloat c0 = scale[i] * softfloat(_coeffs[j ]);
+ softfloat c1 = scale[i] * softfloat(_coeffs[j + 1]);
+ softfloat c2 = scale[i] * softfloat(_coeffs[j + 2]);
+ coeffs[j + (blueIdx ^ 2)] = c0;
+ coeffs[j + 1] = c1;
+ coeffs[j + blueIdx] = c2;
- CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
- coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*LabCbrtTabScale );
+ CV_Assert( c0 >= 0 && c1 >= 0 && c2 >= 0 &&
+ c0 + c1 + c2 < softfloat((int)LAB_CBRT_TAB_SIZE) );
}
}
@@ -5952,8 +6084,7 @@ struct RGB2Lab_f
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
n *= 3;
- static const float _1_3 = 1.0f / 3.0f;
- static const float _a = 16.0f / 116.0f;
+ static const float _a = (softfloat(16) / softfloat(116));
for (i = 0; i < n; i += 3, src += scn )
{
float R = clip(src[0]);
@@ -5969,10 +6100,10 @@ struct RGB2Lab_f
float X = R*C0 + G*C1 + B*C2;
float Y = R*C3 + G*C4 + B*C5;
float Z = R*C6 + G*C7 + B*C8;
-
- float FX = X > 0.008856f ? std::pow(X, _1_3) : (7.787f * X + _a);
- float FY = Y > 0.008856f ? std::pow(Y, _1_3) : (7.787f * Y + _a);
- float FZ = Z > 0.008856f ? std::pow(Z, _1_3) : (7.787f * Z + _a);
+ // 7.787f = (29/3)^3/(29*4), 0.008856f = (6/29)^3, 903.3 = (29/3)^3
+ float FX = X > 0.008856f ? std::pow(X, _1_3f) : (7.787f * X + _a);
+ float FY = Y > 0.008856f ? std::pow(Y, _1_3f) : (7.787f * Y + _a);
+ float FZ = Z > 0.008856f ? std::pow(Z, _1_3f) : (7.787f * Z + _a);
float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y);
float a = 500.f * (FX - FY);
@@ -5989,13 +6120,15 @@ struct RGB2Lab_f
bool srgb;
};
-struct Lab2RGB_f
+
+// Performs conversion in floats
+struct Lab2RGBfloat
{
typedef float channel_type;
- Lab2RGB_f( int _dstcn, int blueIdx, const float* _coeffs,
+ Lab2RGBfloat( int _dstcn, int _blueIdx, const float* _coeffs,
const float* _whitept, bool _srgb )
- : dstcn(_dstcn), srgb(_srgb)
+ : dstcn(_dstcn), srgb(_srgb), blueIdx(_blueIdx)
{
initLabTabs();
@@ -6006,13 +6139,14 @@ struct Lab2RGB_f
for( int i = 0; i < 3; i++ )
{
- coeffs[i+(blueIdx^2)*3] = _coeffs[i]*_whitept[i];
- coeffs[i+3] = _coeffs[i+3]*_whitept[i];
- coeffs[i+blueIdx*3] = _coeffs[i+6]*_whitept[i];
+ coeffs[i+(blueIdx^2)*3] = (softfloat(_coeffs[i] )*softfloat(_whitept[i]));
+ coeffs[i+3] = (softfloat(_coeffs[i+3])*softfloat(_whitept[i]));
+ coeffs[i+blueIdx*3] = (softfloat(_coeffs[i+6])*softfloat(_whitept[i]));
}
- lThresh = 0.008856f * 903.3f;
- fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
+ lThresh = softfloat(8); // 0.008856f * 903.3f = (6/29)^3*(29/3)^3 = 8
+ fThresh = softfloat(6)/softfloat(29); // 7.787f * 0.008856f + 16.0f / 116.0f = 6/29
+
#if CV_SSE2
haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
@@ -6022,6 +6156,7 @@ struct Lab2RGB_f
void process(__m128& v_li0, __m128& v_li1, __m128& v_ai0,
__m128& v_ai1, __m128& v_bi0, __m128& v_bi1) const
{
+ // 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4)
__m128 v_y00 = _mm_mul_ps(v_li0, _mm_set1_ps(1.0f/903.3f));
__m128 v_y01 = _mm_mul_ps(v_li1, _mm_set1_ps(1.0f/903.3f));
__m128 v_fy00 = _mm_add_ps(_mm_mul_ps(_mm_set1_ps(7.787f), v_y00), _mm_set1_ps(16.0f/116.0f));
@@ -6187,6 +6322,7 @@ struct Lab2RGB_f
float ai = src[i + 1];
float bi = src[i + 2];
+ // 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4)
float y, fy;
if (li <= lThresh)
{
@@ -6207,7 +6343,6 @@ struct Lab2RGB_f
else
fxz[j] = fxz[j] * fxz[j] * fxz[j];
-
float x = fxz[0], z = fxz[1];
float ro = C0 * x + C1 * y + C2 * z;
float go = C3 * x + C4 * y + C5 * z;
@@ -6237,18 +6372,391 @@ struct Lab2RGB_f
#if CV_SSE2
bool haveSIMD;
#endif
+ int blueIdx;
+};
+
+
+// Performs conversion in integers
+struct Lab2RGBinteger
+{
+ typedef uchar channel_type;
+
+ static const int base_shift = 14;
+ static const int BASE = (1 << base_shift);
+ // lThresh == (6/29)^3 * (29/3)^3 * BASE/100
+ static const int lThresh = 1311;
+ // fThresh == ((29/3)^3/(29*4) * (6/29)^3 + 16/116)*BASE
+ static const int fThresh = 3390;
+ // base16_116 == BASE*16/116
+ static const int base16_116 = 2260;
+ static const int shift = lab_shift+(base_shift-inv_gamma_shift);
+
+ Lab2RGBinteger( int _dstcn, int blueIdx, const float* _coeffs,
+ const float* _whitept, bool srgb )
+ : dstcn(_dstcn)
+ {
+ if(!_coeffs)
+ _coeffs = XYZ2sRGB_D65;
+ if(!_whitept)
+ _whitept = D65;
+
+ static const softfloat lshift(1 << lab_shift);
+ for(int i = 0; i < 3; i++)
+ {
+ coeffs[i+(blueIdx)*3] = cvRound(lshift*softfloat(_coeffs[i ])*softfloat(_whitept[i]));
+ coeffs[i+3] = cvRound(lshift*softfloat(_coeffs[i+3])*softfloat(_whitept[i]));
+ coeffs[i+(blueIdx^2)*3] = cvRound(lshift*softfloat(_coeffs[i+6])*softfloat(_whitept[i]));
+ }
+
+ tab = srgb ? sRGBInvGammaTab_b : linearInvGammaTab_b;
+ }
+
+ // L, a, b should be in their natural range
+ inline void process(const uchar LL, const uchar aa, const uchar bb, int& ro, int& go, int& bo) const
+ {
+ int x, y, z;
+ int ify;
+
+ y = LabToYF_b[LL*2 ];
+ ify = LabToYF_b[LL*2+1];
+
+ //float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f };
+ int adiv, bdiv;
+ adiv = aa*BASE/500 - 128*BASE/500, bdiv = bb*BASE/200 - 128*BASE/200;
+
+ int ifxz[] = {ify + adiv, ify - bdiv};
+
+ for(int k = 0; k < 2; k++)
+ {
+ int& v = ifxz[k];
+ v = abToXZ_b[v-minABvalue];
+ }
+ x = ifxz[0]; /* y = y */; z = ifxz[1];
+
+ int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2];
+ int C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5];
+ int C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
+
+ ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift);
+ go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift);
+ bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift);
+
+ ro = max(0, min((int)INV_GAMMA_TAB_SIZE-1, ro));
+ go = max(0, min((int)INV_GAMMA_TAB_SIZE-1, go));
+ bo = max(0, min((int)INV_GAMMA_TAB_SIZE-1, bo));
+
+ ro = tab[ro];
+ go = tab[go];
+ bo = tab[bo];
+ }
+
+ // L, a, b shoule be in their natural range
+ inline void processLabToXYZ(const v_uint8x16& lv, const v_uint8x16& av, const v_uint8x16& bv,
+ v_int32x4& xiv00, v_int32x4& yiv00, v_int32x4& ziv00,
+ v_int32x4& xiv01, v_int32x4& yiv01, v_int32x4& ziv01,
+ v_int32x4& xiv10, v_int32x4& yiv10, v_int32x4& ziv10,
+ v_int32x4& xiv11, v_int32x4& yiv11, v_int32x4& ziv11) const
+ {
+ v_uint16x8 lv0, lv1;
+ v_expand(lv, lv0, lv1);
+ // Load Y and IFY values from lookup-table
+ // y = LabToYF_b[L_value*2], ify = LabToYF_b[L_value*2 + 1]
+ // LabToYF_b[i*2 ] = y; // 2260 <= y <= BASE
+ // LabToYF_b[i*2+1] = ify; // 0 <= ify <= BASE
+ uint16_t CV_DECL_ALIGNED(16) v_lv0[8], v_lv1[8];
+ v_store_aligned(v_lv0, (lv0 << 1)); v_store_aligned(v_lv1, (lv1 << 1));
+ v_int16x8 ify0, ify1;
+
+ yiv00 = v_int32x4(LabToYF_b[v_lv0[0] ], LabToYF_b[v_lv0[1] ], LabToYF_b[v_lv0[2] ], LabToYF_b[v_lv0[3] ]);
+ yiv01 = v_int32x4(LabToYF_b[v_lv0[4] ], LabToYF_b[v_lv0[5] ], LabToYF_b[v_lv0[6] ], LabToYF_b[v_lv0[7] ]);
+ yiv10 = v_int32x4(LabToYF_b[v_lv1[0] ], LabToYF_b[v_lv1[1] ], LabToYF_b[v_lv1[2] ], LabToYF_b[v_lv1[3] ]);
+ yiv11 = v_int32x4(LabToYF_b[v_lv1[4] ], LabToYF_b[v_lv1[5] ], LabToYF_b[v_lv1[6] ], LabToYF_b[v_lv1[7] ]);
+
+ ify0 = v_int16x8(LabToYF_b[v_lv0[0]+1], LabToYF_b[v_lv0[1]+1], LabToYF_b[v_lv0[2]+1], LabToYF_b[v_lv0[3]+1],
+ LabToYF_b[v_lv0[4]+1], LabToYF_b[v_lv0[5]+1], LabToYF_b[v_lv0[6]+1], LabToYF_b[v_lv0[7]+1]);
+ ify1 = v_int16x8(LabToYF_b[v_lv1[0]+1], LabToYF_b[v_lv1[1]+1], LabToYF_b[v_lv1[2]+1], LabToYF_b[v_lv1[3]+1],
+ LabToYF_b[v_lv1[4]+1], LabToYF_b[v_lv1[5]+1], LabToYF_b[v_lv1[6]+1], LabToYF_b[v_lv1[7]+1]);
+
+ v_int16x8 adiv0, adiv1, bdiv0, bdiv1;
+ v_uint16x8 av0, av1, bv0, bv1;
+ v_expand(av, av0, av1); v_expand(bv, bv0, bv1);
+ //adiv = aa*BASE/500 - 128*BASE/500, bdiv = bb*BASE/200 - 128*BASE/200;
+ //approximations with reasonable precision
+ v_uint16x8 mulA = v_setall_u16(53687);
+ v_uint32x4 ma00, ma01, ma10, ma11;
+ v_uint32x4 addA = v_setall_u32(1 << 7);
+ v_mul_expand((av0 + (av0 << 2)), mulA, ma00, ma01);
+ v_mul_expand((av1 + (av1 << 2)), mulA, ma10, ma11);
+ adiv0 = v_reinterpret_as_s16(v_pack(((ma00 + addA) >> 13), ((ma01 + addA) >> 13)));
+ adiv1 = v_reinterpret_as_s16(v_pack(((ma10 + addA) >> 13), ((ma11 + addA) >> 13)));
+
+ v_uint16x8 mulB = v_setall_u16(41943);
+ v_uint32x4 mb00, mb01, mb10, mb11;
+ v_uint32x4 addB = v_setall_u32(1 << 4);
+ v_mul_expand(bv0, mulB, mb00, mb01);
+ v_mul_expand(bv1, mulB, mb10, mb11);
+ bdiv0 = v_reinterpret_as_s16(v_pack((mb00 + addB) >> 9, (mb01 + addB) >> 9));
+ bdiv1 = v_reinterpret_as_s16(v_pack((mb10 + addB) >> 9, (mb11 + addB) >> 9));
+
+ // 0 <= adiv <= 8356, 0 <= bdiv <= 20890
+ /* x = ifxz[0]; y = y; z = ifxz[1]; */
+ v_uint16x8 xiv0, xiv1, ziv0, ziv1;
+ v_int16x8 vSubA = v_setall_s16(-128*BASE/500 - minABvalue), vSubB = v_setall_s16(128*BASE/200-1 - minABvalue);
+
+ // int ifxz[] = {ify + adiv, ify - bdiv};
+ // ifxz[k] = abToXZ_b[ifxz[k]-minABvalue];
+ xiv0 = v_reinterpret_as_u16(v_add_wrap(v_add_wrap(ify0, adiv0), vSubA));
+ xiv1 = v_reinterpret_as_u16(v_add_wrap(v_add_wrap(ify1, adiv1), vSubA));
+ ziv0 = v_reinterpret_as_u16(v_add_wrap(v_sub_wrap(ify0, bdiv0), vSubB));
+ ziv1 = v_reinterpret_as_u16(v_add_wrap(v_sub_wrap(ify1, bdiv1), vSubB));
+
+ uint16_t CV_DECL_ALIGNED(16) v_x0[8], v_x1[8], v_z0[8], v_z1[8];
+ v_store_aligned(v_x0, xiv0 ); v_store_aligned(v_x1, xiv1 );
+ v_store_aligned(v_z0, ziv0 ); v_store_aligned(v_z1, ziv1 );
+
+ xiv00 = v_int32x4(abToXZ_b[v_x0[0]], abToXZ_b[v_x0[1]], abToXZ_b[v_x0[2]], abToXZ_b[v_x0[3]]);
+ xiv01 = v_int32x4(abToXZ_b[v_x0[4]], abToXZ_b[v_x0[5]], abToXZ_b[v_x0[6]], abToXZ_b[v_x0[7]]);
+ xiv10 = v_int32x4(abToXZ_b[v_x1[0]], abToXZ_b[v_x1[1]], abToXZ_b[v_x1[2]], abToXZ_b[v_x1[3]]);
+ xiv11 = v_int32x4(abToXZ_b[v_x1[4]], abToXZ_b[v_x1[5]], abToXZ_b[v_x1[6]], abToXZ_b[v_x1[7]]);
+ ziv00 = v_int32x4(abToXZ_b[v_z0[0]], abToXZ_b[v_z0[1]], abToXZ_b[v_z0[2]], abToXZ_b[v_z0[3]]);
+ ziv01 = v_int32x4(abToXZ_b[v_z0[4]], abToXZ_b[v_z0[5]], abToXZ_b[v_z0[6]], abToXZ_b[v_z0[7]]);
+ ziv10 = v_int32x4(abToXZ_b[v_z1[0]], abToXZ_b[v_z1[1]], abToXZ_b[v_z1[2]], abToXZ_b[v_z1[3]]);
+ ziv11 = v_int32x4(abToXZ_b[v_z1[4]], abToXZ_b[v_z1[5]], abToXZ_b[v_z1[6]], abToXZ_b[v_z1[7]]);
+ // abToXZ_b[i-minABvalue] = v; // -1335 <= v <= 88231
+ }
+
+ void operator()(const float* src, float* dst, int n) const
+ {
+ int dcn = dstcn;
+ float alpha = ColorChannel<float>::max();
+
+ int i = 0;
+ if(enablePackedLab)
+ {
+ v_float32x4 vldiv = v_setall_f32(256.f/100.0f);
+ v_float32x4 vf255 = v_setall_f32(255.f);
+ static const int nPixels = 16;
+ for(; i <= n*3-3*nPixels; i += 3*nPixels, dst += dcn*nPixels)
+ {
+ /*
+ int L = saturate_cast<int>(src[i]*BASE/100.0f);
+ int a = saturate_cast<int>(src[i + 1]*BASE/256);
+ int b = saturate_cast<int>(src[i + 2]*BASE/256);
+ */
+ v_float32x4 vl[4], va[4], vb[4];
+ for(int k = 0; k < 4; k++)
+ {
+ v_load_deinterleave(src + i + k*3*4, vl[k], va[k], vb[k]);
+ vl[k] *= vldiv;
+ }
+
+ v_int32x4 ivl[4], iva[4], ivb[4];
+ for(int k = 0; k < 4; k++)
+ {
+ ivl[k] = v_round(vl[k]), iva[k] = v_round(va[k]), ivb[k] = v_round(vb[k]);
+ }
+ v_int16x8 ivl16[2], iva16[2], ivb16[2];
+ ivl16[0] = v_pack(ivl[0], ivl[1]); iva16[0] = v_pack(iva[0], iva[1]); ivb16[0] = v_pack(ivb[0], ivb[1]);
+ ivl16[1] = v_pack(ivl[2], ivl[3]); iva16[1] = v_pack(iva[2], iva[3]); ivb16[1] = v_pack(ivb[2], ivb[3]);
+ v_uint8x16 ivl8, iva8, ivb8;
+ ivl8 = v_reinterpret_as_u8(v_pack(ivl16[0], ivl16[1]));
+ iva8 = v_reinterpret_as_u8(v_pack(iva16[0], iva16[1]));
+ ivb8 = v_reinterpret_as_u8(v_pack(ivb16[0], ivb16[1]));
+
+ v_int32x4 ixv[4], iyv[4], izv[4];
+
+ processLabToXYZ(ivl8, iva8, ivb8, ixv[0], iyv[0], izv[0],
+ ixv[1], iyv[1], izv[1],
+ ixv[2], iyv[2], izv[2],
+ ixv[3], iyv[3], izv[3]);
+ /*
+ ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift);
+ go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift);
+ bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift);
+ */
+ v_int32x4 C0 = v_setall_s32(coeffs[0]), C1 = v_setall_s32(coeffs[1]), C2 = v_setall_s32(coeffs[2]);
+ v_int32x4 C3 = v_setall_s32(coeffs[3]), C4 = v_setall_s32(coeffs[4]), C5 = v_setall_s32(coeffs[5]);
+ v_int32x4 C6 = v_setall_s32(coeffs[6]), C7 = v_setall_s32(coeffs[7]), C8 = v_setall_s32(coeffs[8]);
+ v_int32x4 descaleShift = v_setall_s32(1 << (shift-1)), tabsz = v_setall_s32((int)INV_GAMMA_TAB_SIZE-1);
+ for(int k = 0; k < 4; k++)
+ {
+ v_int32x4 i_r, i_g, i_b;
+ v_uint32x4 r_vecs, g_vecs, b_vecs;
+ i_r = (ixv[k]*C0 + iyv[k]*C1 + izv[k]*C2 + descaleShift) >> shift;
+ i_g = (ixv[k]*C3 + iyv[k]*C4 + izv[k]*C5 + descaleShift) >> shift;
+ i_b = (ixv[k]*C6 + iyv[k]*C7 + izv[k]*C8 + descaleShift) >> shift;
+
+ //limit indices in table and then substitute
+ //ro = tab[ro]; go = tab[go]; bo = tab[bo];
+ int32_t CV_DECL_ALIGNED(16) rshifts[4], gshifts[4], bshifts[4];
+ v_int32x4 rs = v_max(v_setzero_s32(), v_min(tabsz, i_r));
+ v_int32x4 gs = v_max(v_setzero_s32(), v_min(tabsz, i_g));
+ v_int32x4 bs = v_max(v_setzero_s32(), v_min(tabsz, i_b));
+
+ v_store_aligned(rshifts, rs);
+ v_store_aligned(gshifts, gs);
+ v_store_aligned(bshifts, bs);
+
+ r_vecs = v_uint32x4(tab[rshifts[0]], tab[rshifts[1]], tab[rshifts[2]], tab[rshifts[3]]);
+ g_vecs = v_uint32x4(tab[gshifts[0]], tab[gshifts[1]], tab[gshifts[2]], tab[gshifts[3]]);
+ b_vecs = v_uint32x4(tab[bshifts[0]], tab[bshifts[1]], tab[bshifts[2]], tab[bshifts[3]]);
+
+ v_float32x4 v_r, v_g, v_b;
+ v_r = v_cvt_f32(v_reinterpret_as_s32(r_vecs))/vf255;
+ v_g = v_cvt_f32(v_reinterpret_as_s32(g_vecs))/vf255;
+ v_b = v_cvt_f32(v_reinterpret_as_s32(b_vecs))/vf255;
+
+ if(dcn == 4)
+ {
+ v_store_interleave(dst + k*dcn*4, v_b, v_g, v_r, v_setall_f32(alpha));
+ }
+ else // dcn == 3
+ {
+ v_store_interleave(dst + k*dcn*4, v_b, v_g, v_r);
+ }
+ }
+ }
+ }
+
+ for(; i < n*3; i += 3, dst += dcn)
+ {
+ int ro, go, bo;
+ process((uchar)(src[i + 0]*255.f/100.f), (uchar)src[i + 1], (uchar)src[i + 2], ro, go, bo);
+
+ dst[0] = bo/255.f;
+ dst[1] = go/255.f;
+ dst[2] = ro/255.f;
+ if(dcn == 4)
+ dst[3] = alpha;
+ }
+ }
+
+ void operator()(const uchar* src, uchar* dst, int n) const
+ {
+ int i, dcn = dstcn;
+ uchar alpha = ColorChannel<uchar>::max();
+ i = 0;
+
+ if(enablePackedLab)
+ {
+ static const int nPixels = 8*2;
+ for(; i <= n*3-3*nPixels; i += 3*nPixels, dst += dcn*nPixels)
+ {
+ /*
+ int L = src[i + 0];
+ int a = src[i + 1];
+ int b = src[i + 2];
+ */
+ v_uint8x16 u8l, u8a, u8b;
+ v_load_deinterleave(src + i, u8l, u8a, u8b);
+
+ v_int32x4 xiv[4], yiv[4], ziv[4];
+ processLabToXYZ(u8l, u8a, u8b, xiv[0], yiv[0], ziv[0],
+ xiv[1], yiv[1], ziv[1],
+ xiv[2], yiv[2], ziv[2],
+ xiv[3], yiv[3], ziv[3]);
+ /*
+ ro = CV_DESCALE(C0 * x + C1 * y + C2 * z, shift);
+ go = CV_DESCALE(C3 * x + C4 * y + C5 * z, shift);
+ bo = CV_DESCALE(C6 * x + C7 * y + C8 * z, shift);
+ */
+ v_int32x4 C0 = v_setall_s32(coeffs[0]), C1 = v_setall_s32(coeffs[1]), C2 = v_setall_s32(coeffs[2]);
+ v_int32x4 C3 = v_setall_s32(coeffs[3]), C4 = v_setall_s32(coeffs[4]), C5 = v_setall_s32(coeffs[5]);
+ v_int32x4 C6 = v_setall_s32(coeffs[6]), C7 = v_setall_s32(coeffs[7]), C8 = v_setall_s32(coeffs[8]);
+ v_int32x4 descaleShift = v_setall_s32(1 << (shift-1));
+ v_int32x4 tabsz = v_setall_s32((int)INV_GAMMA_TAB_SIZE-1);
+ v_uint32x4 r_vecs[4], g_vecs[4], b_vecs[4];
+ for(int k = 0; k < 4; k++)
+ {
+ v_int32x4 i_r, i_g, i_b;
+ i_r = (xiv[k]*C0 + yiv[k]*C1 + ziv[k]*C2 + descaleShift) >> shift;
+ i_g = (xiv[k]*C3 + yiv[k]*C4 + ziv[k]*C5 + descaleShift) >> shift;
+ i_b = (xiv[k]*C6 + yiv[k]*C7 + ziv[k]*C8 + descaleShift) >> shift;
+
+ //limit indices in table and then substitute
+ //ro = tab[ro]; go = tab[go]; bo = tab[bo];
+ int32_t CV_DECL_ALIGNED(16) rshifts[4], gshifts[4], bshifts[4];
+ v_int32x4 rs = v_max(v_setzero_s32(), v_min(tabsz, i_r));
+ v_int32x4 gs = v_max(v_setzero_s32(), v_min(tabsz, i_g));
+ v_int32x4 bs = v_max(v_setzero_s32(), v_min(tabsz, i_b));
+
+ v_store_aligned(rshifts, rs);
+ v_store_aligned(gshifts, gs);
+ v_store_aligned(bshifts, bs);
+
+ r_vecs[k] = v_uint32x4(tab[rshifts[0]], tab[rshifts[1]], tab[rshifts[2]], tab[rshifts[3]]);
+ g_vecs[k] = v_uint32x4(tab[gshifts[0]], tab[gshifts[1]], tab[gshifts[2]], tab[gshifts[3]]);
+ b_vecs[k] = v_uint32x4(tab[bshifts[0]], tab[bshifts[1]], tab[bshifts[2]], tab[bshifts[3]]);
+ }
+
+ v_uint16x8 u_rvec0 = v_pack(r_vecs[0], r_vecs[1]), u_rvec1 = v_pack(r_vecs[2], r_vecs[3]);
+ v_uint16x8 u_gvec0 = v_pack(g_vecs[0], g_vecs[1]), u_gvec1 = v_pack(g_vecs[2], g_vecs[3]);
+ v_uint16x8 u_bvec0 = v_pack(b_vecs[0], b_vecs[1]), u_bvec1 = v_pack(b_vecs[2], b_vecs[3]);
+
+ v_uint8x16 u8_b, u8_g, u8_r;
+ u8_b = v_pack(u_bvec0, u_bvec1);
+ u8_g = v_pack(u_gvec0, u_gvec1);
+ u8_r = v_pack(u_rvec0, u_rvec1);
+
+ if(dcn == 4)
+ {
+ v_store_interleave(dst, u8_b, u8_g, u8_r, v_setall_u8(alpha));
+ }
+ else
+ {
+ v_store_interleave(dst, u8_b, u8_g, u8_r);
+ }
+ }
+ }
+
+ for (; i < n*3; i += 3, dst += dcn)
+ {
+ int ro, go, bo;
+ process(src[i + 0], src[i + 1], src[i + 2], ro, go, bo);
+
+ dst[0] = saturate_cast<uchar>(bo);
+ dst[1] = saturate_cast<uchar>(go);
+ dst[2] = saturate_cast<uchar>(ro);
+ if( dcn == 4 )
+ dst[3] = alpha;
+ }
+ }
+
+ int dstcn;
+ int coeffs[9];
+ ushort* tab;
+};
+
+
+struct Lab2RGB_f
+{
+ typedef float channel_type;
+
+ Lab2RGB_f( int _dstcn, int _blueIdx, const float* _coeffs,
+ const float* _whitept, bool _srgb )
+ : fcvt(_dstcn, _blueIdx, _coeffs, _whitept, _srgb), dstcn(_dstcn)
+ { }
+
+ void operator()(const float* src, float* dst, int n) const
+ {
+ fcvt(src, dst, n);
+ }
+
+ Lab2RGBfloat fcvt;
+ int dstcn;
};
-#undef clip
struct Lab2RGB_b
{
typedef uchar channel_type;
- Lab2RGB_b( int _dstcn, int blueIdx, const float* _coeffs,
+ Lab2RGB_b( int _dstcn, int _blueIdx, const float* _coeffs,
const float* _whitept, bool _srgb )
- : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb )
+ : fcvt(3, _blueIdx, _coeffs, _whitept, _srgb ), icvt(_dstcn, _blueIdx, _coeffs, _whitept, _srgb), dstcn(_dstcn)
{
+ useBitExactness = (!_coeffs && !_whitept && _srgb && enableBitExactness);
+
#if CV_NEON
v_scale_inv = vdupq_n_f32(100.f/255.f);
v_scale = vdupq_n_f32(255.f);
@@ -6305,6 +6813,12 @@ struct Lab2RGB_b
void operator()(const uchar* src, uchar* dst, int n) const
{
+ if(useBitExactness)
+ {
+ icvt(src, dst, n);
+ return;
+ }
+
int i, j, dcn = dstcn;
uchar alpha = ColorChannel<uchar>::max();
float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE];
@@ -6313,7 +6827,8 @@ struct Lab2RGB_b
__m128 v_res = _mm_set_ps(0.f, 128.f, 128.f, 0.f);
#endif
- for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
+ i = 0;
+ for(; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 )
{
int dn = std::min(n - i, (int)BLOCK_SIZE);
j = 0;
@@ -6360,7 +6875,7 @@ struct Lab2RGB_b
buf[j+1] = (float)(src[j+1] - 128);
buf[j+2] = (float)(src[j+2] - 128);
}
- cvt(buf, buf, dn);
+ fcvt(buf, buf, dn);
j = 0;
#if CV_NEON
@@ -6453,9 +6968,8 @@ struct Lab2RGB_b
}
}
- int dstcn;
- Lab2RGB_f cvt;
-
+ Lab2RGBfloat fcvt;
+ Lab2RGBinteger icvt;
#if CV_NEON
float32x4_t v_scale, v_scale_inv, v_128;
uint8x8_t v_alpha;
@@ -6465,8 +6979,11 @@ struct Lab2RGB_b
__m128i v_zero;
bool haveSIMD;
#endif
+ bool useBitExactness;
+ int dstcn;
};
+#undef clip
///////////////////////////////////// RGB <-> L*u*v* /////////////////////////////////////
@@ -6492,12 +7009,17 @@ struct RGB2Luv_f
if( blueIdx == 0 )
std::swap(coeffs[i*3], coeffs[i*3+2]);
CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
- coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f );
+ softfloat(coeffs[i*3]) +
+ softfloat(coeffs[i*3+1]) +
+ softfloat(coeffs[i*3+2]) < softfloat(1.5f) );
}
- float d = 1.f/std::max(whitept[0] + whitept[1]*15 + whitept[2]*3, FLT_EPSILON);
- un = 4*whitept[0]*d*13;
- vn = 9*whitept[1]*d*13;
+ softfloat d = softfloat(whitept[0]) +
+ softfloat(whitept[1])*softfloat(15) +
+ softfloat(whitept[2])*softfloat(3);
+ d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
+ un = d*softfloat(13*4)*softfloat(whitept[0]);
+ vn = d*softfloat(13*9)*softfloat(whitept[1]);
#if CV_SSE2
haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
@@ -6790,9 +7312,12 @@ struct Luv2RGB_f
coeffs[i+blueIdx*3] = _coeffs[i+6];
}
- float d = 1.f/std::max(whitept[0] + whitept[1]*15 + whitept[2]*3, FLT_EPSILON);
- un = 4*13*whitept[0]*d;
- vn = 9*13*whitept[1]*d;
+ softfloat d = softfloat(whitept[0]) +
+ softfloat(whitept[1])*softfloat(15) +
+ softfloat(whitept[2])*softfloat(3);
+ d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
+ un = softfloat(4*13)*d*softfloat(whitept[0]);
+ vn = softfloat(9*13)*d*softfloat(whitept[1]);
#if CV_SSE2
haveSIMD = checkHardwareSupport(CV_CPU_SSE2);
#endif
@@ -8534,21 +9059,15 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
{
int coeffs[9];
const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
- const float scale[] =
+ static const softfloat lshift(1 << lab_shift);
+ for( int i = 0; i < 3; i++ )
{
- (1 << lab_shift)/_whitept[0],
- (float)(1 << lab_shift),
- (1 << lab_shift)/_whitept[2]
- };
+ coeffs[i*3+(bidx^2)] = cvRound(lshift*softfloat(_coeffs[i*3 ])/softfloat(_whitept[i]));
+ coeffs[i*3+1] = cvRound(lshift*softfloat(_coeffs[i*3+1])/softfloat(_whitept[i]));
+ coeffs[i*3+bidx] = cvRound(lshift*softfloat(_coeffs[i*3+2])/softfloat(_whitept[i]));
- for (int i = 0; i < 3; i++ )
- {
- coeffs[i*3+(bidx^2)] = cvRound(_coeffs[i*3]*scale[i]);
- coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]);
- coeffs[i*3+bidx] = cvRound(_coeffs[i*3+2]*scale[i]);
-
- CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
- coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) );
+ CV_Assert(coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 &&
+ coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift));
}
Mat(1, 9, CV_32SC1, coeffs).copyTo(ucoeffs);
}
@@ -8573,36 +9092,47 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
{
float coeffs[9];
const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65;
- float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] };
+
+ softfloat scale[] = { softfloat::one() / softfloat(_whitept[0]),
+ softfloat::one(),
+ softfloat::one() / softfloat(_whitept[2]) };
for (int i = 0; i < 3; i++)
{
int j = i * 3;
- coeffs[j + (bidx ^ 2)] = _coeffs[j] * (lab ? scale[i] : 1);
- coeffs[j + 1] = _coeffs[j + 1] * (lab ? scale[i] : 1);
- coeffs[j + bidx] = _coeffs[j + 2] * (lab ? scale[i] : 1);
- CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 &&
- coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) );
+ softfloat c0 = (lab ? scale[i] : softfloat::one()) * softfloat(_coeffs[j ]);
+ softfloat c1 = (lab ? scale[i] : softfloat::one()) * softfloat(_coeffs[j + 1]);
+ softfloat c2 = (lab ? scale[i] : softfloat::one()) * softfloat(_coeffs[j + 2]);
+
+ coeffs[j + (bidx ^ 2)] = c0;
+ coeffs[j + 1] = c1;
+ coeffs[j + bidx] = c2;
+
+ CV_Assert( c0 >= 0 && c1 >= 0 && c2 >= 0 &&
+ c0 + c1 + c2 < (lab ? softfloat((int)LAB_CBRT_TAB_SIZE) : softfloat(3)/softfloat(2)));
}
- float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
- un = 13*4*_whitept[0]*d;
- vn = 13*9*_whitept[1]*d;
+ softfloat d = softfloat(_whitept[0]) +
+ softfloat(_whitept[1])*softfloat(15) +
+ softfloat(_whitept[2])*softfloat(3);
+ d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
+ un = d*softfloat(13*4)*softfloat(_whitept[0]);
+ vn = d*softfloat(13*9)*softfloat(_whitept[1]);
Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
}
- float _1_3 = 1.0f / 3.0f, _a = 16.0f / 116.0f;
+ float _a = softfloat(16)/softfloat(116);
ocl::KernelArg ucoeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs);
if (lab)
{
if (srgb)
k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab),
- ucoeffsarg, _1_3, _a);
+ ucoeffsarg, _1_3f, _a);
else
- k.args(srcarg, dstarg, ucoeffsarg, _1_3, _a);
+ k.args(srcarg, dstarg, ucoeffsarg, _1_3f, _a);
}
else
{
@@ -8648,14 +9178,17 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
for( int i = 0; i < 3; i++ )
{
- coeffs[i+(bidx^2)*3] = _coeffs[i] * (lab ? _whitept[i] : 1);
- coeffs[i+3] = _coeffs[i+3] * (lab ? _whitept[i] : 1);
- coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1);
+ coeffs[i+(bidx^2)*3] = softfloat(_coeffs[i] )*softfloat(lab ? _whitept[i] : 1);
+ coeffs[i+3] = softfloat(_coeffs[i+3])*softfloat(lab ? _whitept[i] : 1);
+ coeffs[i+bidx*3] = softfloat(_coeffs[i+6])*softfloat(lab ? _whitept[i] : 1);
}
- float d = 1.f/std::max(_whitept[0] + _whitept[1]*15 + _whitept[2]*3, FLT_EPSILON);
- un = 4*13*_whitept[0]*d;
- vn = 9*13*_whitept[1]*d;
+ softfloat d = softfloat(_whitept[0]) +
+ softfloat(_whitept[1])*softfloat(15) +
+ softfloat(_whitept[2])*softfloat(3);
+ d = softfloat::one()/max(d, softfloat(FLT_EPSILON));
+ un = softfloat(4*13)*d*softfloat(_whitept[0]);
+ vn = softfloat(9*13)*d*softfloat(_whitept[1]);
Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs);
}
@@ -8663,8 +9196,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
_dst.create(sz, CV_MAKETYPE(depth, dcn));
dst = _dst.getUMat();
- float lThresh = 0.008856f * 903.3f;
- float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f;
+ float lThresh = softfloat(8); // 0.008856f * 903.3f = (6/29)^3*(29/3)^3 = 8
+ float fThresh = softfloat(6)/softfloat(29); // 7.787f * 0.008856f + 16.0f / 116.0f = 6/29
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dstarg = ocl::KernelArg::WriteOnly(dst),
diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl
index 3fd1ad1..52bef00 100644
--- a/modules/imgproc/src/opencl/cvtcolor.cl
+++ b/modules/imgproc/src/opencl/cvtcolor.cl
@@ -1755,6 +1755,7 @@ __kernel void BGR2Lab(__global const uchar * srcptr, int src_step, int src_offse
B = splineInterpolate(B * GammaTabScale, gammaTab, GAMMA_TAB_SIZE);
#endif
+ // 7.787f = (29/3)^3/(29*4), 0.008856f = (6/29)^3, 903.3 = (29/3)^3
float X = fma(R, C0, fma(G, C1, B*C2));
float Y = fma(R, C3, fma(G, C4, B*C5));
float Z = fma(R, C6, fma(G, C7, B*C8));
@@ -1794,6 +1795,7 @@ inline void Lab2BGR_f(const float * srcbuf, float * dstbuf,
C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8];
float y, fy;
+ // 903.3 = (29/3)^3, 7.787 = (29/3)^3/(29*4)
if (li <= lThresh)
{
y = li / 903.3f;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/opencv.git
More information about the debian-science-commits
mailing list