Files
intel-graphics-compiler/3d/common/iStdLib/FloatSafe.h

1191 lines
47 KiB
C++

/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#pragma once
#include <limits.h>
#include <cmath>
namespace iSTD
{
/*****************************************************************************\
Constants:
FPU_FLOAT32_*
Description:
Binary representation of 32-bit floating point specials.
FPU_FLOAT32_COMPUTE special value can be used in result tables to mark
cases, where final value should be computed normally.
\*****************************************************************************/
const DWORD FPU_FLOAT32_NAN = 0x7FFFFFFF;
const DWORD FPU_FLOAT32_NEG_INF = 0xFF800000;
const DWORD FPU_FLOAT32_POS_INF = 0x7F800000;
const DWORD FPU_FLOAT32_NEG_ZERO = 0x80000000;
const DWORD FPU_FLOAT32_POS_ZERO = 0x00000000;
const DWORD FPU_FLOAT32_COMPUTE = 0xFFFFFFFF;
const DWORD FPU_FLOAT32_ONE = (DWORD) 0x3F800000;
const DWORD FPU_FLOAT32_MINUS_ONE = (DWORD) 0xBF800000;
/*****************************************************************************\
Enumeration:
FPU_FLOAT_CLASS
Description:
Classes of floating point numbers.
(+0, -0, +finite, -finite, +Inf, -Inf, NaN, -denorm, +denorm)
\*****************************************************************************/
enum FPU_FLOAT_CLASS {
FPU_FLOAT_CLASS_NEG_INF = 0,
FPU_FLOAT_CLASS_NEG_FINITE = 1,
FPU_FLOAT_CLASS_NEG_DENORM = 2,
FPU_FLOAT_CLASS_NEG_ZERO = 3,
FPU_FLOAT_CLASS_POS_ZERO = 4,
FPU_FLOAT_CLASS_POS_DENORM = 5,
FPU_FLOAT_CLASS_POS_FINITE = 6,
FPU_FLOAT_CLASS_POS_INF = 7,
FPU_FLOAT_CLASS_NAN = 8,
NUM_FPU_FLOAT_CLASSES = 9
};
/*****************************************************************************\
Inline Function:
Float32GetClass
Description:
Returns class (+0, -0, +finite, -finite, +Inf, -Inf, NaN) of 32-bit float.
\*****************************************************************************/
inline FPU_FLOAT_CLASS Float32GetClass( const float f )
{
FLOAT32 f32;
f32.value.f = f;
switch( f32.value.u )
{
case FPU_FLOAT32_POS_ZERO: return FPU_FLOAT_CLASS_POS_ZERO;
case FPU_FLOAT32_NEG_ZERO: return FPU_FLOAT_CLASS_NEG_ZERO;
case FPU_FLOAT32_POS_INF: return FPU_FLOAT_CLASS_POS_INF;
case FPU_FLOAT32_NEG_INF: return FPU_FLOAT_CLASS_NEG_INF;
default: break;
}
if( f32.exponent == 0xFF )
{
return FPU_FLOAT_CLASS_NAN;
}
else if( f32.exponent == 0x00 )
{
if( f32.sign == 0 )
{
return FPU_FLOAT_CLASS_POS_DENORM;
}
else
{
return FPU_FLOAT_CLASS_NEG_DENORM;
}
}
if( f32.sign )
{
return FPU_FLOAT_CLASS_NEG_FINITE;
}
return FPU_FLOAT_CLASS_POS_FINITE;
}
/*****************************************************************************\
Inline Function:
Float32IsInfinity
Description:
Returns true if class is +Inf or -Inf of 32-bit float.
\*****************************************************************************/
inline bool Float32IsInfinity( const float f )
{
FPU_FLOAT_CLASS fClass = Float32GetClass( f );
return ( fClass == FPU_FLOAT_CLASS_POS_INF ) ||
( fClass == FPU_FLOAT_CLASS_NEG_INF );
}
/*****************************************************************************\
Inline Function:
Float32IsDenorm
Description:
Returns true if class is +Denorm or -Denorm.
\*****************************************************************************/
inline bool Float32IsDenorm( const float f )
{
FPU_FLOAT_CLASS fClass = Float32GetClass( f );
return ( fClass == FPU_FLOAT_CLASS_NEG_DENORM ) ||
( fClass == FPU_FLOAT_CLASS_POS_DENORM );
}
/*****************************************************************************\
Inline Function:
Float32IsFinite
Description:
Returns true if f is finite: not +/-INF, and not NaN.
\*****************************************************************************/
inline bool Float32IsFinite( const float f )
{
FPU_FLOAT_CLASS fClass = Float32GetClass( f );
return ( fClass != FPU_FLOAT_CLASS_NAN ) &&
( fClass != FPU_FLOAT_CLASS_NEG_INF ) &&
( fClass != FPU_FLOAT_CLASS_POS_INF );
}
/*****************************************************************************\
Inline Function:
IsFPZero
Description:
Returns true if the argument x seen as a 32-bit IEEE754 floating point
number is either positive or negative zero +0.0, -0.0.
Input:
dword value that will be interpreted as a binary32 representation
of single-precision floating point value.
Output:
True if the value represents either positive or negative float zero.
\*****************************************************************************/
inline bool IsFPZero( const DWORD x )
{
return ( x == iSTD::FPU_FLOAT32_POS_ZERO ) ||
( x == iSTD::FPU_FLOAT32_NEG_ZERO );
}
/*****************************************************************************\
Inline Function:
Float32SafeAdd
Description:
Performs addition taking care of floating point specials in software.
\*****************************************************************************/
inline float Float32SafeAdd( const float arg1, const float arg2, const bool denormRetain )
{
// Table for handling IEEE 754 specials in addition
//
// a + b -Inf -X -0 +0 +X +Inf NaN
//
// -Inf -Inf -Inf -Inf -Inf -Inf NaN NaN
// -X -Inf <add> <add> <add> <add> +Inf NaN
// -0 -Inf <add> -0 +0 <add> +Inf NaN
// +0 -Inf <add> +0 +0 <add> +Inf NaN
// +X -Inf <add> <add> <add> <add> +Inf NaN
// +Inf NaN +Inf +Inf +Inf +Inf +Inf NaN
// NaN NaN NaN NaN NaN NaN NaN NaN
//
static const DWORD RESULT[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
// -Inf -X -denorm -0 +0 +denorm +X +Inf NaN
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // -Inf
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // -X
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_ZERO , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // -denorm
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_ZERO , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // -0
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_ZERO , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // +0
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_ZERO , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // +denorm
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // +X
{ FPU_FLOAT32_NAN , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // +Inf
{ FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // NaN
};
const FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
const FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
FLOAT32 f32;
f32.value.u = RESULT[ t1 ][ t2 ];
bool computeDenorms = ( denormRetain && ( Float32IsDenorm( arg1 ) || Float32IsDenorm( arg2 ) ) );
if( ( f32.value.u == FPU_FLOAT32_COMPUTE ) || ( computeDenorms ) )
{
return arg1 + arg2;
}
return f32.value.f;
}
/*****************************************************************************\
Inline Function:
Float32SafeSubtract
Description:
Performs subtraction taking care of floating point specials in software.
\*****************************************************************************/
inline float Float32SafeSubtract( const float arg1, const float arg2, const bool denormRetain )
{
FLOAT32 f32;
f32.value.f = arg2;
// flip sign bit
f32.sign ^= 1;
return Float32SafeAdd( arg1, f32.value.f, denormRetain );
}
/*****************************************************************************\
Inline Function:
Float32SafeMultiply
Description:
Performs multiplication taking care of floating point specials in software.
\*****************************************************************************/
inline float Float32SafeMultiply( const float arg1, const float arg2, const bool denormRetain )
{
// Table for handling IEEE 754 specials in multiplication
//
// a * b -Inf -X -0 +0 +X +Inf NaN
//
// -Inf +Inf +Inf NaN NaN -Inf -Inf NaN
// -X +Inf <mul> +0 -0 <mul> -Inf NaN
// -0 NaN +0 +0 -0 -0 NaN NaN
// +0 NaN -0 -0 +0 +0 NaN NaN
// +X -Inf <mul> -0 +0 <mul> +Inf NaN
// +Inf -Inf -Inf NaN NaN +Inf +Inf NaN
// NaN NaN NaN NaN NaN NaN NaN NaN
//
static const DWORD RESULT[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
// -Inf -X -denorm -0 +0 +denorm +X +Inf NaN
{ FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN }, // -Inf
{ FPU_FLOAT32_POS_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN }, // -X
{ FPU_FLOAT32_NAN , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // -denorm
{ FPU_FLOAT32_NAN , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // -0
{ FPU_FLOAT32_NAN , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // +0
{ FPU_FLOAT32_NAN , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // +denorm
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // +X
{ FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN }, // +Inf
{ FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // NaN
};
FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
FLOAT32 f32;
f32.value.u = RESULT[ t1 ][ t2 ];
bool computeDenorms = ( denormRetain && ( Float32IsDenorm( arg1 ) || Float32IsDenorm( arg2 ) ) );
if( ( f32.value.u == FPU_FLOAT32_COMPUTE ) || ( computeDenorms ) )
{
return arg1 * arg2;
}
return f32.value.f;
}
/*****************************************************************************\
Inline Function:
Float32SafeFMA
Description:
Performs fused mutliply and add taking care of floating point specials in
software.
This is machine generated code provided by SSG.
\*****************************************************************************/
inline float Float32SafeFMA( const float a, const float b, const float c )
{
const DWORD _own_large_value_32[] = { 0x71800000, 0xf1800000 };
const DWORD _own_small_value_32[] = { 0x0d800000, 0x8d800000 };
const DWORD _ones[] = { 0x3f800000, 0xbf800000 };
DWORD ux = 0;
DWORD uy = 0;
DWORD uz = 0;
DWORD ur = 0;
DWORD xbits = 0;
DWORD ybits = 0;
DWORD zbits = 0;
DWORD uhi = 0;
DWORD ulo = 0;
DWORD vhi = 0;
DWORD vlo = 0;
DWORD remain = 0;
DWORD temp = 0;
DWORD L_mask = 0;
DWORD R_mask = 0;
INT zsign = 0;
INT rsign = 0;
INT xexp = 0;
INT yexp = 0;
INT zexp = 0;
INT rexp = 0;
INT carry = 0;
INT borrow = 0;
INT rm = 0;
INT shift = 0;
INT L_shift = 0;
INT R_shift = 0;
UINT64 ubits = 0;
float resultf = 0;
float tv = 0;
float x = a;
float y = b;
float z = c;
// Set to round to nearest even.
rm = 0;
ux = FLOAT32( x >= 0.0f ? x : -x ).value.u;
uy = FLOAT32( y >= 0.0f ? y : -y ).value.u;;
uz = FLOAT32( z >= 0.0f ? z : -z ).value.u;;
int cond1 = ( ux == 0 ) |
( ux >= 0x7f800000 ) |
( ux == 0x3f800000 ) |
( uy == 0 ) |
( uy >= 0x7f800000 ) |
( uy == 0x3f800000 ) |
( uz == 0 ) |
( uz >= 0x7f800000 );
if( cond1 != 0 )
{
if( Float32IsInfinity( z ) &&
!Float32IsInfinity( x ) &&
!Float32IsInfinity( y ) )
{
resultf = ( z + x ) + y;
}
else
{
resultf = x * y + z;
}
return resultf;
}
xexp = (int)( ux >> 23 );
yexp = (int)( uy >> 23 );
zexp = (int)( uz >> 23 );
xbits = 0x00800000 | ( ux & 0x007fffff );
ybits = 0x00800000 | ( uy & 0x007fffff );
zbits = 0x00800000 | ( uz & 0x007fffff );
rsign = ( FLOAT32(x).value.s ^ FLOAT32(y).value.s ) & 0x80000000;
rexp = ( xexp + yexp ) - 0x7F;
ubits = (UINT64)xbits * ybits;
if( (DWORD) ( ubits >> 32 ) & 0x00008000 )
{
uhi = (DWORD)( ubits >> 24 );
ulo = ( (DWORD)ubits << 8 );
rexp++;
}
else
{
uhi = (DWORD)( ubits >> 23 );
ulo = ( (DWORD)ubits << 9 );
}
int cond2 = ( rexp > zexp ) |
( ( rexp == zexp ) & ( uhi >= zbits ) );
if( cond2 != 0 )
{
shift = ( rexp - zexp );
vhi = zbits;
vlo = 0;
zsign = FLOAT32(z).value.s & 0x80000000;
}
else
{
shift = ( zexp - rexp );
rexp = zexp;
vhi = uhi;
vlo = ulo;
uhi = zbits;
ulo = 0;
zsign = rsign;
rsign = FLOAT32(z).value.s & 0x80000000;
}
remain = 0;
if( shift != 0 )
{
if( shift < 32 )
{
L_shift = 32 - shift;
R_shift = shift - 0;
L_mask = ~( 0xffffffffu >> R_shift );
remain = ( vlo << L_shift );
vlo = ( ( vhi << L_shift ) & L_mask) | ( vlo >> R_shift );
vhi = ( vhi >> R_shift );
}
else if( shift < 64 )
{
L_shift = 64 - shift;
R_shift = shift - 32;
L_mask = ~( 0xffffffffu >> R_shift );
remain = ( ( vhi << L_shift ) & L_mask ) | ( vlo != 0 );
vlo = ( vhi >> R_shift );
vhi = 0;
}
else
{
remain = ( vhi | vlo ) != 0;
vhi = vlo = 0;
}
}
if( rsign == zsign )
{
temp = ulo;
ulo += vlo;
carry = ( ulo < temp );
uhi += ( vhi + carry );
if ( uhi & 0x01000000 )
{
remain = ( uhi << 31 ) | ( ( ulo | remain ) != 0 );
ur = ( uhi >> 1 ) & 0x007fffff;
rexp += 1;
}
else
{
remain = ulo | ( remain != 0 );
ur = (uhi & 0x007fffff);
}
}
else
{
remain = ( 0 - remain );
borrow = ( remain != 0 );
temp = ulo;
ulo -= borrow;
borrow = ( ulo > temp );
uhi -= borrow;
temp = ulo;
ulo -= vlo;
borrow = ( ulo > temp );
uhi -= borrow;
uhi -= vhi;
if( uhi != 0 )
{
temp = ( uhi << 8 );
shift = 0;
}
else if( ulo != 0 )
{
temp = ulo;
shift = 24;
}
else if( remain != 0 )
{
temp = remain;
shift = 24 + 32;
}
else
{
return FLOAT32( (DWORD)0x00000000 ).value.f;
}
shift += clz( temp );
if( shift < 32 )
{
L_shift = shift - 0;
R_shift = 32 - shift;
R_mask = ( (DWORD) 1 << L_shift ) - 1;
ur = ( ( uhi << L_shift ) | (( ulo >> R_shift ) & R_mask ) ) & 0x007fffff;
remain = ( ulo << L_shift ) | ( remain != 0 );
}
else if( shift < 64 )
{
L_shift = shift - 32;
R_shift = 64 - shift;
R_mask = ( (DWORD) 1 << L_shift ) - 1;
ur = ( ( ulo << L_shift ) | ( ( remain >> R_shift ) & R_mask ) ) & 0x007fffff;
remain = ( remain << L_shift );
}
else
{
L_shift = shift - 64;
ur = ( remain << L_shift ) & 0x007fffff;
remain = 0;
}
rexp -= shift;
}
if( (DWORD) rexp - 1 >= 0xFF - 1 )
{
if( rexp >= 0xFF )
{
rsign = ( (DWORD)rsign >> 31 );
if( rsign )
{
resultf = tv = FLOAT32(_own_large_value_32[(1)]).value.f * FLOAT32(_own_large_value_32[0]).value.f;
}
else
{
resultf = tv = FLOAT32(_own_large_value_32[(0)]).value.f * FLOAT32(_own_large_value_32[0]).value.f;
}
return resultf;
}
else
{
//enters here only for rexp = 0
L_shift = 31;
R_shift = 1;
L_mask = ~(0xffffffffu >> R_shift );
ur |= 0x00800000;
remain = ( ( ur << L_shift ) & L_mask ) | ( remain != 0 );
ur = ( ur >> R_shift );
}
}
else
{
ur |= ( rexp << 23 );
}
if( remain != 0 )
{
tv = ( ( (float *)_ones)[0] + ( (float *)_own_small_value_32)[0] );
int cond3, cond4, cond5, cond6;
switch( rm )
{
case ( 0 << 10 ):
cond3 = ( ( remain & 0x80000000 ) != 0 ) & ( ( ( ur & 1 ) != 0 ) |
( ( remain & ~0x80000000 ) != 0 ) );
if( cond3 != 0 )
{
ur++;
if( ur >= 0x7f800000 )
{
rsign = ( (unsigned)rsign >> 31 );
if( rsign )
{
resultf = tv =
( ( (float *) _own_large_value_32)[1] *
( (float *) _own_large_value_32)[0] );
}
else
{
resultf = tv =
(((float *) _own_large_value_32)[(0)] *
((float *) _own_large_value_32)[0]);
}
return resultf;
}
}
case ( 3 << 10 ):
cond4 = ( ur < 0x00800000 ) |
( (ur == 0x00800000 ) & ( remain == 0x80000000 ) );
if( cond4 != 0 )
{
tv = ( ( ( float *)_own_small_value_32)[0] *
( ( float *)_own_small_value_32)[0] );
}
break;
case ( 2 << 10 ):
cond5 = ( rsign & ( ur < 0x00800000 ) ) |
( (!rsign) & ( (ur < 0x007fffff ) | ( ( ur == 0x007fffff ) & ( remain < 0x80000000 ) ) ) );
if( cond5 != 0 )
{
tv = ( ( (float *)_own_small_value_32)[0] *
( (float *)_own_small_value_32)[0] );
}
if( !rsign )
{
ur++;
if( ur >= 0x7f800000 )
{
//rsign = ((unsigned) rsign >> 31);
resultf = tv = ( ( (float *)_own_large_value_32)[0] *
( (float *)_own_large_value_32)[0] );
return resultf;
}
}
break;
case ( 1 << 10 ):
cond6 = ( !rsign & ( ur < 0x00800000 ) ) |
( rsign & ( (ur < 0x007fffff ) | ( ( ur == 0x007fffff ) & ( remain < 0x80000000 ) ) ) );
if( cond6 != 0 )
{
tv = ( ( (float *)_own_small_value_32)[0] *
( (float *)_own_small_value_32)[0] );
}
if( rsign )
{
ur++;
if (ur >= 0x7f800000 )
{
//rsign = ((unsigned) rsign >> 31);
resultf = tv =
( ( (float *)_own_large_value_32)[1] *
( (float *)_own_large_value_32)[0] );
return resultf;
}
}
break;
}
}
resultf = FLOAT32( (DWORD) (rsign | ur ) ).value.f;
return resultf;
}
/*****************************************************************************\
Inline Function:
Float32SafeRSQRT
Description:
Performs correctly rounded single precision reciprocal square root
operation taking care of floating point specials in software.
\*****************************************************************************/
inline float Float32SafeRSQRT( const float arg, bool denormRetain )
{
static const DWORD RESULT[NUM_FPU_FLOAT_CLASSES] =
{
FPU_FLOAT32_NAN, // rsqrt( -inf ) = NaN
FPU_FLOAT32_NAN, // rsqrt( -X ) = NaN //but to be really OK,we should try to maintain the NaN payload
FPU_FLOAT32_NAN, // rsqrt( -denorm ) = NaN //but to be really OK,we should try to maintain the NaN payload
FPU_FLOAT32_NEG_INF, // rsqrt( -0 ) = -inf
FPU_FLOAT32_POS_INF, // rsqrt( +0 ) = +inf
FPU_FLOAT32_COMPUTE, // rsqrt( +denorm) = computed value
FPU_FLOAT32_COMPUTE, // rsqrt( +X ) == computed value
FPU_FLOAT32_POS_ZERO, // rsqrt( +inf ) == +0.0
FPU_FLOAT32_NAN // rsqrt( NaN ) == NaN
};
FPU_FLOAT_CLASS t1 = Float32GetClass( arg );
FLOAT32 f32;
f32.value.u = RESULT[ t1 ];
bool computeDenorms = denormRetain && Float32IsDenorm( arg );
if ( !computeDenorms && t1 == FPU_FLOAT_CLASS_NEG_DENORM )
{
f32.value.u = FPU_FLOAT32_NEG_INF;
}
if ( !computeDenorms && t1 == FPU_FLOAT_CLASS_POS_DENORM )
{
f32.value.u = FPU_FLOAT32_POS_INF;
}
if( ( f32.value.u == FPU_FLOAT32_COMPUTE ) || ( computeDenorms ) )
{
double darg = arg;
double s = sqrt(darg); //double-precision square root
double result = 1.0 / s; //double-precision division
return static_cast<float>(result); //back to floats
}
return f32.value.f;
}
/*****************************************************************************\
Inline Function:
Float32SafeDivide
Description:
Performs division taking care of floating point specials in software.
\*****************************************************************************/
inline float Float32SafeDivide( const float arg1, const float arg2, const bool denormRetain )
{
// Table for handling IEEE 754 specials in division
//
// a / b -Inf -X -0 +0 +X +Inf NaN
//
// -Inf NaN +Inf +Inf -Inf -Inf NaN NaN
// -X +0 <div> +Inf -Inf <div> -0 NaN
// -0 +0 +0 NaN NaN -0 -0 NaN
// +0 -0 -0 NaN NaN +0 +0 NaN
// +X -0 <div> -Inf +Inf <div> +0 NaN
// +Inf NaN -Inf -Inf +Inf +Inf NaN NaN
// NaN NaN NaN NaN NaN NaN NaN NaN
//
static const DWORD RESULT[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
// -Inf -X -denorm -0 +0 +denorm +X +Inf NaN
{ FPU_FLOAT32_NAN , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // -Inf
{ FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN }, // -X
{ FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN }, // -denorm
{ FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN }, // -0
{ FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN }, // +0
{ FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN }, // +denorm
{ FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN }, // +X
{ FPU_FLOAT32_NAN , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // +Inf
{ FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN , FPU_FLOAT32_NAN }, // NaN
};
FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
FLOAT32 f32;
f32.value.u = RESULT[ t1 ][ t2 ];
bool computeDenorms = ( denormRetain && ( Float32IsDenorm( arg1 ) || Float32IsDenorm( arg2 ) ) );
if( ( f32.value.u == FPU_FLOAT32_COMPUTE ) || ( computeDenorms ) )
{
return arg1 / arg2;
}
return f32.value.f;
}
/*****************************************************************************\
Inline Function:
Signed32SafeDivideQuotient
Description:
Computes src0 divided by src1
Table for handling signed divide quotient and remainder:
IDIV SRC0
SRC1 +INT -INT 0
+INT +INT -INT 0
-INT -INT +INT 0
0 Q:0x7FFFFFFF Q: 0x80000000 Q:0x7FFFFFFF
R:0x7FFFFFFF R: 0x80000000 R:0x7FFFFFFF
\*****************************************************************************/
inline signed long Signed32SafeDivideQuotient(
const signed long src0,
const signed long src1 )
{
if( !src1 )
{
if( src0 < 0 )
{
return LONG_MIN;
}
return LONG_MAX;
}
return src0 / src1;
}
/*****************************************************************************\
Inline Function:
Signed32SafeDivideRemainder
Description:
Computes remainder of src0 divided by src1
\*****************************************************************************/
inline signed long Signed32SafeDivideRemainder(
const signed long src0,
const signed long src1 )
{
if( !src1 )
{
if( src0 < 0 )
{
return LONG_MIN;
}
return LONG_MAX;
}
return src0 % src1;
}
/*****************************************************************************\
Inline Function:
Unsigned32SafeDivideQuotient
Description:
Computes src0 divided by src1
Table for handling unsigned divide quotient and remainder
UDIV SRC0
SRC1 <>0 0
<>0 UINT 0
0 Q:0xFFFFFFFF Q:0xFFFFFFFF
R:0xFFFFFFFF R:0xFFFFFFFF
\*****************************************************************************/
inline DWORD Unsigned32SafeDivideQuotient(
const DWORD src0,
const DWORD src1 )
{
if( !src1 )
{
return UINT_MAX;
}
return src0 / src1;
}
/*****************************************************************************\
Inline Function:
Unsigned32SafeDivideRemainder
Description:
Computes remainder of src0 divided by src1
\*****************************************************************************/
inline DWORD Unsigned32SafeDivideRemainder(
const DWORD src0,
const DWORD src1 )
{
if( !src1 )
{
return UINT_MAX;
}
return src0 % src1;
}
/*****************************************************************************\
Inline Function:
F32ToF16_d
Description:
Float32 to float16 conversion based on "Fast Half Float Conversions"
by Jeroen van der Zijp
Input:
32-bit DWORD represantation of float value
Output:
16-bit DWORD represantation of float value
\*****************************************************************************/
inline WORD F32ToF16_d( DWORD arg )
{
static const WORD btbl[512] = {
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100,
0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,
0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7c00,
0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100,
0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00,
0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfc00,
0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00
};
static const unsigned char stbl[512] = {
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x0d,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x0d
};
DWORD sexp = (arg>>23)&0x1ff;
return (WORD)(btbl[ sexp ]+( (arg&0x007fffff)>>stbl[ sexp ] ));
}
/*****************************************************************************\
Inline Function:
F32ToF16_f
Description:
Float32 to float16 conversion based on "Fast Half Float Conversions"
by Jeroen van der Zijp
Input:
32-bit float value
Output:
16-bit WORD represantation of float value
\*****************************************************************************/
inline WORD F32ToF16_f( float arg )
{
return F32ToF16_d( *(DWORD *)&arg );
}
/*****************************************************************************\
Inline Function:
F16ToF32
Description:
Float16 to float32 conversion
Input:
16-bit WORD representation of float16 value
Output:
32-bit DWORD represantation of float32 value
\*****************************************************************************/
static inline DWORD F16ToF32( WORD v )
{
unsigned long index;
return
// is exponent!=0 ?
v & 0x7C00
// is exponent==max ?
? ( v & 0x7C00 ) == 0x7C00
// is mantissa!=0 ?
? v & 0x03FF
// convert NaN
? ( ( v << 13 ) + 0x70000000 ) | 0x7f800000
// convert infinities
: ( v << 16 ) | 0x7f800000
// convert normalized values
: ( ( ( v << 13 ) + 0x70000000 ) & ~0x70000000 ) + 0x38000000
// is mantissa non-zero ?
: v & 0x03FF
// convert denormalized values
? index=bsr( v & 0x03FF ), ( ( ( ( v << 16 ) & 0x80000000 ) | ( ( v << 13 ) & 0xF800000 ) ) + 0x33800000 + ( index << 23 ) ) | ( ( ( v & 0x03FF ) << ( 23-index ) ) & ~0x800000 )
// convert zeros
: v << 16;
}
/*****************************************************************************\
Inline Function:
Float32SafeMax
Description:
MinMax of Floating Point Numbers.
Input:
arg1
arg2
isGen7
Output:
max( arg1, arg2 )
\*****************************************************************************/
inline float Float32SafeMax( const float arg1, const float arg2, bool isGen7 )
{
// Values of following arrays corresponds to results of sel.l instructions.
static const bool RESULT_preGen7[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
// -Inf -X -denorm -0 +0 +denorm +X +Inf NaN
{ true , false , false , false , false , false , false , false , true }, // -Inf
{ true , false , false , false , false , false , false , false , true }, // -X
{ true , true , true , true , true , true , false , false , true }, // -denorm
{ true , true , true , true , true , true , false , false , true }, // -0
{ true , true , true , true , true , true , false , false , true }, // +0
{ true , true , true , true , true , true , false , false , true }, // +denorm
{ true , true , true , true , true , true , false , false , true }, // +X
{ true , true , true , true , true , true , true , true , true }, // +Inf
{ false , false , false , false , false , false , false , false , false }, // NaN
};
static const bool RESULT_Gen7[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
// -Inf -X -denorm -0 +0 +denorm +X +Inf NaN
{ true , false , false , false , false , false , false , false , true }, // -Inf
{ true , false , false , false , false , false , false , false , true }, // -X
{ true , true , true , true , true , true , false , false , true }, // -denorm
{ true , true , true , true , false , true , false , false , true }, // -0
{ true , true , true , true , true , true , false , false , true }, // +0
{ true , true , true , true , true , true , false , false , true }, // +denorm
{ true , true , true , true , true , true , false , false , true }, // +X
{ true , true , true , true , true , true , true , true , true }, // +Inf
{ false , false , false , false , false , false , false , false , false }, // NaN
};
const FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
const FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
if( ( t1 == FPU_FLOAT_CLASS_NEG_FINITE || t1 == FPU_FLOAT_CLASS_POS_FINITE ) &&
( t2 == FPU_FLOAT_CLASS_NEG_FINITE || t2 == FPU_FLOAT_CLASS_POS_FINITE ) )
{
return ( arg1 >= arg2 ) ? arg1 : arg2;
}
FLOAT32 f32;
if( isGen7 )
{
f32.value.f = ( RESULT_Gen7[t1][t2] ) ? arg1 : arg2;
}
else
{
f32.value.f = ( RESULT_preGen7[t1][t2] ) ? arg1 : arg2;
}
return f32.value.f;
}
/*****************************************************************************\
Inline Function:
Float32SafeMin
Description:
MinMax of Floating Point Numbers.
Input:
arg1
arg2
isGen7
Output:
max( arg1, arg2 )
\*****************************************************************************/
inline float Float32SafeMin( const float arg1, const float arg2, bool isGen7 )
{
// Values of following arrays corresponds to results of sel.ge instruction.
static const bool RESULT_preGen7[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
// -Inf -X -denorm -0 +0 +denorm +X +Inf NaN
{ false , true , true , true , true , true , true , true , true }, // -Inf
{ false , false , true , true , true , true , true , true , true }, // -X
{ false , false , false , false , false , false , true , true , true }, // -denorm
{ false , false , false , false , false , false , true , true , true }, // -0
{ false , false , false , false , false , false , true , true , true }, // +0
{ false , false , false , false , false , false , true , true , true }, // +denorm
{ false , false , false , false , false , false , false , true , true }, // +X
{ false , false , false , false , false , false , false , false , true }, // +Inf
{ false , false , false , false , false , false , false , false , false }, // NaN
};
static const bool RESULT_Gen7[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
// -Inf -X -denorm -0 +0 +denorm +X +Inf NaN
{ false , true , true , true , true , true , true , true , true }, // -Inf
{ false , false , true , true , true , true , true , true , true }, // -X
{ false , false , false , false , false , false , true , true , true }, // -denorm
{ false , false , false , false , true , false , true , true , true }, // -0
{ false , false , false , false , false , false , true , true , true }, // +0
{ false , false , false , false , false , false , true , true , true }, // +denorm
{ false , false , false , false , false , false , false , true , true }, // +X
{ false , false , false , false , false , false , false , false , true }, // +Inf
{ false , false , false , false , false , false , false , false , false }, // NaN
};
const FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
const FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
if( ( t1 == FPU_FLOAT_CLASS_NEG_FINITE || t1 == FPU_FLOAT_CLASS_POS_FINITE ) &&
( t2 == FPU_FLOAT_CLASS_NEG_FINITE || t2 == FPU_FLOAT_CLASS_POS_FINITE ) )
{
return ( arg1 < arg2 ) ? arg1 : arg2;
}
FLOAT32 f32;
if( isGen7 )
{
f32.value.f = ( RESULT_Gen7[t1][t2] ) ? arg1 : arg2;
}
else
{
f32.value.f = ( RESULT_preGen7[t1][t2] ) ? arg1 : arg2;
}
return f32.value.f;
}
/*****************************************************************************\
Inline Function:
FloatSaturate
Description:
For a floating-point destination type, the saturation target range is [0.0,
1.0]. For a floating-point NaN, there is no "closest value"; any NaN
saturates to 0.0. (...) Any floating-point number greater than 1.0,
including +INF, saturates to 1.0. Any negative floating-point number,
including -INF, saturates to 0.0. Any floating-point number in the range 0.0
to 1.0 is not changed by saturation.
-0.0 is changed to +0.0.
Input:
const float f
Output:
float
\*****************************************************************************/
inline float FloatSaturate( const float f )
{
switch( Float32GetClass( f ) )
{
case FPU_FLOAT_CLASS_NEG_INF:
case FPU_FLOAT_CLASS_NEG_FINITE:
case FPU_FLOAT_CLASS_NEG_DENORM:
case FPU_FLOAT_CLASS_NEG_ZERO:
case FPU_FLOAT_CLASS_POS_ZERO:
case FPU_FLOAT_CLASS_NAN:
return 0.f;
case FPU_FLOAT_CLASS_POS_DENORM:
return f;
case FPU_FLOAT_CLASS_POS_FINITE:
return ( f <= 1.f ) ? f : 1.f;
case FPU_FLOAT_CLASS_POS_INF:
return 1.f;
default:
ASSERT( 0 );
return 0.f;
}
}
} // namespace iSTD