mirror of
				https://github.com/intel/intel-graphics-compiler.git
				synced 2025-11-04 08:21:06 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			1191 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1191 lines
		
	
	
		
			47 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*========================== begin_copyright_notice ============================
 | 
						|
 | 
						|
Copyright (C) 2017-2021 Intel Corporation
 | 
						|
 | 
						|
SPDX-License-Identifier: MIT
 | 
						|
 | 
						|
============================= end_copyright_notice ===========================*/
 | 
						|
 | 
						|
#pragma once
 | 
						|
 | 
						|
#include <limits.h>
 | 
						|
#include <cmath>
 | 
						|
 | 
						|
namespace iSTD
 | 
						|
{
 | 
						|
/*****************************************************************************\
 | 
						|
Constants:
 | 
						|
    FPU_FLOAT32_*
 | 
						|
 | 
						|
Description:
 | 
						|
    Binary representation of 32-bit floating point specials.
 | 
						|
    FPU_FLOAT32_COMPUTE special value can be used in result tables to mark
 | 
						|
    cases, where final value should be computed normally.
 | 
						|
\*****************************************************************************/
 | 
						|
const DWORD FPU_FLOAT32_NAN         = 0x7FFFFFFF;
 | 
						|
const DWORD FPU_FLOAT32_NEG_INF     = 0xFF800000;
 | 
						|
const DWORD FPU_FLOAT32_POS_INF     = 0x7F800000;
 | 
						|
const DWORD FPU_FLOAT32_NEG_ZERO    = 0x80000000;
 | 
						|
const DWORD FPU_FLOAT32_POS_ZERO    = 0x00000000;
 | 
						|
const DWORD FPU_FLOAT32_COMPUTE     = 0xFFFFFFFF;
 | 
						|
const DWORD FPU_FLOAT32_ONE         = (DWORD) 0x3F800000;
 | 
						|
const DWORD FPU_FLOAT32_MINUS_ONE   = (DWORD) 0xBF800000;
 | 
						|
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Enumeration:
 | 
						|
    FPU_FLOAT_CLASS
 | 
						|
 | 
						|
Description:
 | 
						|
    Classes of floating point numbers.
 | 
						|
    (+0, -0, +finite, -finite, +Inf, -Inf, NaN, -denorm, +denorm)
 | 
						|
\*****************************************************************************/
 | 
						|
enum FPU_FLOAT_CLASS {
 | 
						|
    FPU_FLOAT_CLASS_NEG_INF      = 0,
 | 
						|
    FPU_FLOAT_CLASS_NEG_FINITE   = 1,
 | 
						|
    FPU_FLOAT_CLASS_NEG_DENORM   = 2,
 | 
						|
    FPU_FLOAT_CLASS_NEG_ZERO     = 3,
 | 
						|
    FPU_FLOAT_CLASS_POS_ZERO     = 4,
 | 
						|
    FPU_FLOAT_CLASS_POS_DENORM   = 5,
 | 
						|
    FPU_FLOAT_CLASS_POS_FINITE   = 6,
 | 
						|
    FPU_FLOAT_CLASS_POS_INF      = 7,
 | 
						|
    FPU_FLOAT_CLASS_NAN          = 8,
 | 
						|
    NUM_FPU_FLOAT_CLASSES        = 9
 | 
						|
};
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32GetClass
 | 
						|
 | 
						|
Description:
 | 
						|
    Returns class (+0, -0, +finite, -finite, +Inf, -Inf, NaN) of 32-bit float.
 | 
						|
\*****************************************************************************/
 | 
						|
inline FPU_FLOAT_CLASS Float32GetClass( const float f )
 | 
						|
{
 | 
						|
    FLOAT32 f32;
 | 
						|
    f32.value.f = f;
 | 
						|
 | 
						|
    switch( f32.value.u )
 | 
						|
    {
 | 
						|
    case FPU_FLOAT32_POS_ZERO:  return FPU_FLOAT_CLASS_POS_ZERO;
 | 
						|
    case FPU_FLOAT32_NEG_ZERO:  return FPU_FLOAT_CLASS_NEG_ZERO;
 | 
						|
    case FPU_FLOAT32_POS_INF:   return FPU_FLOAT_CLASS_POS_INF;
 | 
						|
    case FPU_FLOAT32_NEG_INF:   return FPU_FLOAT_CLASS_NEG_INF;
 | 
						|
    default:                    break;
 | 
						|
    }
 | 
						|
 | 
						|
    if( f32.exponent == 0xFF )
 | 
						|
    {
 | 
						|
        return FPU_FLOAT_CLASS_NAN;
 | 
						|
    }
 | 
						|
    else if( f32.exponent == 0x00 )
 | 
						|
    {
 | 
						|
        if( f32.sign == 0 )
 | 
						|
        {
 | 
						|
            return FPU_FLOAT_CLASS_POS_DENORM;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            return FPU_FLOAT_CLASS_NEG_DENORM;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if( f32.sign )
 | 
						|
    {
 | 
						|
        return FPU_FLOAT_CLASS_NEG_FINITE;
 | 
						|
    }
 | 
						|
 | 
						|
    return FPU_FLOAT_CLASS_POS_FINITE;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32IsInfinity
 | 
						|
 | 
						|
Description:
 | 
						|
    Returns true if class is +Inf or -Inf of 32-bit float.
 | 
						|
\*****************************************************************************/
 | 
						|
inline bool Float32IsInfinity( const float f )
 | 
						|
{
 | 
						|
    FPU_FLOAT_CLASS fClass = Float32GetClass( f );
 | 
						|
 | 
						|
    return ( fClass == FPU_FLOAT_CLASS_POS_INF ) ||
 | 
						|
           ( fClass == FPU_FLOAT_CLASS_NEG_INF );
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32IsDenorm
 | 
						|
 | 
						|
Description:
 | 
						|
    Returns true if class is +Denorm or -Denorm.
 | 
						|
\*****************************************************************************/
 | 
						|
inline bool Float32IsDenorm( const float f )
 | 
						|
{
 | 
						|
    FPU_FLOAT_CLASS fClass = Float32GetClass( f );
 | 
						|
 | 
						|
    return ( fClass == FPU_FLOAT_CLASS_NEG_DENORM ) ||
 | 
						|
           ( fClass == FPU_FLOAT_CLASS_POS_DENORM );
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
 | 
						|
Inline Function:
 | 
						|
    Float32IsFinite
 | 
						|
 | 
						|
Description:
 | 
						|
    Returns true if f is finite: not +/-INF, and not NaN.
 | 
						|
\*****************************************************************************/
 | 
						|
inline bool Float32IsFinite( const float f )
 | 
						|
{
 | 
						|
    FPU_FLOAT_CLASS fClass = Float32GetClass( f );
 | 
						|
 | 
						|
    return ( fClass != FPU_FLOAT_CLASS_NAN )     &&
 | 
						|
           ( fClass != FPU_FLOAT_CLASS_NEG_INF ) &&
 | 
						|
           ( fClass != FPU_FLOAT_CLASS_POS_INF );
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    IsFPZero
 | 
						|
 | 
						|
Description:
 | 
						|
    Returns true if the argument x seen as a 32-bit IEEE754 floating point
 | 
						|
    number is either positive or negative zero  +0.0, -0.0.
 | 
						|
 | 
						|
Input:
 | 
						|
    dword value that will be interpreted as a binary32 representation
 | 
						|
    of single-precision floating point value.
 | 
						|
 | 
						|
Output:
 | 
						|
    True if the value represents either positive or negative float zero.
 | 
						|
 | 
						|
\*****************************************************************************/    
 | 
						|
inline bool IsFPZero( const DWORD x )
 | 
						|
{
 | 
						|
    return ( x == iSTD::FPU_FLOAT32_POS_ZERO ) || 
 | 
						|
           ( x == iSTD::FPU_FLOAT32_NEG_ZERO );
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32SafeAdd
 | 
						|
 | 
						|
Description:
 | 
						|
    Performs addition taking care of floating point specials in software.
 | 
						|
\*****************************************************************************/
 | 
						|
inline float Float32SafeAdd( const float arg1, const float arg2, const bool denormRetain  )
 | 
						|
{
 | 
						|
    // Table for handling IEEE 754 specials in addition
 | 
						|
    //
 | 
						|
    //  a + b       -Inf    -X      -0      +0      +X      +Inf    NaN
 | 
						|
    //
 | 
						|
    //  -Inf        -Inf    -Inf    -Inf    -Inf    -Inf    NaN     NaN
 | 
						|
    //  -X          -Inf    <add>   <add>   <add>   <add>   +Inf    NaN
 | 
						|
    //  -0          -Inf    <add>   -0      +0      <add>   +Inf    NaN
 | 
						|
    //  +0          -Inf    <add>   +0      +0      <add>   +Inf    NaN
 | 
						|
    //  +X          -Inf    <add>   <add>   <add>   <add>   +Inf    NaN
 | 
						|
    //  +Inf        NaN     +Inf    +Inf    +Inf    +Inf    +Inf    NaN
 | 
						|
    //  NaN         NaN     NaN     NaN     NaN     NaN     NaN     NaN
 | 
						|
    //
 | 
						|
 | 
						|
    static const DWORD RESULT[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
 | 
						|
    //    -Inf                  -X                    -denorm               -0                    +0                    +denorm               +X                    +Inf                  NaN
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF  , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // -Inf
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE  , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // -X
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_ZERO , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // -denorm
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_ZERO , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // -0
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_ZERO , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // +0
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_ZERO , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // +denorm
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE  , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // +X
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF  , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // +Inf
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // NaN
 | 
						|
    };
 | 
						|
 | 
						|
    const FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
 | 
						|
    const FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
 | 
						|
 | 
						|
    FLOAT32 f32;
 | 
						|
    f32.value.u = RESULT[ t1 ][ t2 ];
 | 
						|
 | 
						|
    bool computeDenorms = ( denormRetain && ( Float32IsDenorm( arg1 ) || Float32IsDenorm( arg2 ) ) );
 | 
						|
 | 
						|
    if( ( f32.value.u == FPU_FLOAT32_COMPUTE ) || ( computeDenorms ) )
 | 
						|
    {
 | 
						|
        return arg1 + arg2;
 | 
						|
    }
 | 
						|
 | 
						|
    return f32.value.f;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32SafeSubtract
 | 
						|
 | 
						|
Description:
 | 
						|
    Performs subtraction taking care of floating point specials in software.
 | 
						|
\*****************************************************************************/
 | 
						|
inline float Float32SafeSubtract( const float arg1, const float arg2, const bool denormRetain )
 | 
						|
{
 | 
						|
    FLOAT32 f32;
 | 
						|
    f32.value.f = arg2;
 | 
						|
 | 
						|
    // flip sign bit
 | 
						|
    f32.sign ^= 1;
 | 
						|
 | 
						|
    return Float32SafeAdd( arg1, f32.value.f, denormRetain );
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32SafeMultiply
 | 
						|
 | 
						|
Description:
 | 
						|
    Performs multiplication taking care of floating point specials in software.
 | 
						|
\*****************************************************************************/
 | 
						|
inline float Float32SafeMultiply( const float arg1, const float arg2, const bool denormRetain )
 | 
						|
{
 | 
						|
    // Table for handling IEEE 754 specials in multiplication
 | 
						|
    //
 | 
						|
    //  a * b       -Inf    -X      -0      +0      +X      +Inf    NaN
 | 
						|
    //
 | 
						|
    //  -Inf        +Inf    +Inf    NaN     NaN     -Inf    -Inf    NaN
 | 
						|
    //  -X          +Inf    <mul>   +0      -0      <mul>   -Inf    NaN
 | 
						|
    //  -0          NaN     +0      +0      -0      -0      NaN     NaN
 | 
						|
    //  +0          NaN     -0      -0      +0      +0      NaN     NaN
 | 
						|
    //  +X          -Inf    <mul>   -0      +0      <mul>   +Inf    NaN
 | 
						|
    //  +Inf        -Inf    -Inf    NaN     NaN     +Inf    +Inf    NaN
 | 
						|
    //  NaN         NaN     NaN     NaN     NaN     NaN     NaN     NaN
 | 
						|
    //
 | 
						|
 | 
						|
    static const DWORD RESULT[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
 | 
						|
    //    -Inf                  -X                    -denorm               -0                    +0                    +denorm               +X                    +Inf                  NaN
 | 
						|
        { FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN      },  // -Inf
 | 
						|
        { FPU_FLOAT32_POS_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN      },  // -X
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // -denorm
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // -0
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // +0
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // +denorm
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // +X
 | 
						|
        { FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN      },  // +Inf
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // NaN
 | 
						|
    };
 | 
						|
 | 
						|
    FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
 | 
						|
    FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
 | 
						|
 | 
						|
    FLOAT32 f32;
 | 
						|
    f32.value.u = RESULT[ t1 ][ t2 ];
 | 
						|
 | 
						|
    bool computeDenorms = ( denormRetain && ( Float32IsDenorm( arg1 ) || Float32IsDenorm( arg2 ) ) );
 | 
						|
 | 
						|
    if( ( f32.value.u == FPU_FLOAT32_COMPUTE ) || ( computeDenorms ) )
 | 
						|
    {
 | 
						|
        return arg1 * arg2;
 | 
						|
    }
 | 
						|
 | 
						|
    return f32.value.f;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32SafeFMA
 | 
						|
 | 
						|
Description:
 | 
						|
    Performs fused mutliply and add taking care of floating point specials in 
 | 
						|
    software.
 | 
						|
 | 
						|
    This is machine generated code provided by SSG.
 | 
						|
 | 
						|
\*****************************************************************************/
 | 
						|
inline float Float32SafeFMA( const float a, const float b, const float c )
 | 
						|
{
 | 
						|
    const DWORD _own_large_value_32[] = { 0x71800000, 0xf1800000 };
 | 
						|
    const DWORD _own_small_value_32[] = { 0x0d800000, 0x8d800000 };
 | 
						|
    const DWORD _ones[]               = { 0x3f800000, 0xbf800000 };
 | 
						|
 | 
						|
    DWORD ux = 0;
 | 
						|
    DWORD uy = 0; 
 | 
						|
    DWORD uz = 0;
 | 
						|
    DWORD ur = 0;
 | 
						|
    DWORD xbits = 0;
 | 
						|
    DWORD ybits = 0; 
 | 
						|
    DWORD zbits = 0;
 | 
						|
    DWORD uhi = 0;
 | 
						|
    DWORD ulo = 0;
 | 
						|
    DWORD vhi = 0;
 | 
						|
    DWORD vlo = 0;
 | 
						|
    DWORD remain = 0;
 | 
						|
    DWORD temp = 0;
 | 
						|
    DWORD L_mask = 0;
 | 
						|
    DWORD R_mask = 0;
 | 
						|
 | 
						|
    INT zsign = 0;
 | 
						|
    INT rsign = 0;
 | 
						|
    INT xexp = 0; 
 | 
						|
    INT yexp = 0; 
 | 
						|
    INT zexp = 0; 
 | 
						|
    INT rexp = 0;
 | 
						|
    INT carry = 0;
 | 
						|
    INT borrow = 0;
 | 
						|
    INT rm = 0;
 | 
						|
    INT shift = 0;
 | 
						|
    INT L_shift = 0;
 | 
						|
    INT R_shift = 0;
 | 
						|
 | 
						|
    UINT64 ubits = 0;
 | 
						|
    float resultf = 0;
 | 
						|
    float tv = 0;
 | 
						|
    float x = a;
 | 
						|
    float y = b;
 | 
						|
    float z = c;
 | 
						|
 | 
						|
    // Set to round to nearest even.
 | 
						|
    rm = 0;        
 | 
						|
 | 
						|
    ux = FLOAT32( x >= 0.0f ? x : -x ).value.u;
 | 
						|
    uy = FLOAT32( y >= 0.0f ? y : -y ).value.u;;
 | 
						|
    uz = FLOAT32( z >= 0.0f ? z : -z ).value.u;;
 | 
						|
 | 
						|
    int cond1 = ( ux == 0 ) | 
 | 
						|
        ( ux >= 0x7f800000 ) | 
 | 
						|
        ( ux == 0x3f800000 ) |
 | 
						|
        ( uy == 0 ) | 
 | 
						|
        ( uy >= 0x7f800000 ) | 
 | 
						|
        ( uy == 0x3f800000 ) |
 | 
						|
        ( uz == 0 ) | 
 | 
						|
        ( uz >= 0x7f800000 );
 | 
						|
 | 
						|
    if( cond1 != 0 )
 | 
						|
    {
 | 
						|
        if(  Float32IsInfinity( z ) && 
 | 
						|
            !Float32IsInfinity( x ) && 
 | 
						|
            !Float32IsInfinity( y ) )
 | 
						|
        {
 | 
						|
            resultf = ( z + x ) + y;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            resultf = x * y + z;
 | 
						|
        }
 | 
						|
 | 
						|
        return resultf;
 | 
						|
    }
 | 
						|
 | 
						|
    xexp = (int)( ux >> 23 );
 | 
						|
    yexp = (int)( uy >> 23 );
 | 
						|
    zexp = (int)( uz >> 23 );
 | 
						|
 | 
						|
    xbits = 0x00800000 | ( ux & 0x007fffff );
 | 
						|
    ybits = 0x00800000 | ( uy & 0x007fffff );
 | 
						|
    zbits = 0x00800000 | ( uz & 0x007fffff );
 | 
						|
 | 
						|
  
 | 
						|
    rsign = ( FLOAT32(x).value.s ^ FLOAT32(y).value.s ) & 0x80000000;
 | 
						|
    rexp  = ( xexp + yexp ) - 0x7F;
 | 
						|
    ubits = (UINT64)xbits * ybits;
 | 
						|
 | 
						|
    if( (DWORD) ( ubits >> 32 ) & 0x00008000 )
 | 
						|
    {
 | 
						|
        uhi = (DWORD)( ubits >> 24 );
 | 
						|
        ulo = ( (DWORD)ubits << 8 );
 | 
						|
        rexp++;
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        uhi = (DWORD)( ubits >> 23 );
 | 
						|
        ulo = ( (DWORD)ubits << 9 );
 | 
						|
    }
 | 
						|
 | 
						|
    int cond2 = ( rexp > zexp ) | 
 | 
						|
                ( ( rexp == zexp ) & ( uhi >= zbits ) );
 | 
						|
 | 
						|
    if( cond2 != 0 )
 | 
						|
    {
 | 
						|
        shift = ( rexp - zexp );
 | 
						|
        vhi = zbits;
 | 
						|
        vlo = 0;
 | 
						|
        zsign = FLOAT32(z).value.s & 0x80000000;
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        shift = ( zexp - rexp );
 | 
						|
        rexp = zexp;
 | 
						|
        vhi = uhi;
 | 
						|
        vlo = ulo;
 | 
						|
        uhi = zbits;
 | 
						|
        ulo = 0;
 | 
						|
        zsign = rsign;
 | 
						|
        rsign = FLOAT32(z).value.s & 0x80000000;
 | 
						|
    }
 | 
						|
 | 
						|
    remain = 0;
 | 
						|
    if( shift != 0 )
 | 
						|
    {
 | 
						|
        if( shift < 32 )
 | 
						|
        {
 | 
						|
            L_shift = 32 - shift;
 | 
						|
            R_shift = shift - 0;
 | 
						|
            L_mask = ~( 0xffffffffu >> R_shift );
 | 
						|
            remain = ( vlo << L_shift );
 | 
						|
            vlo = ( ( vhi << L_shift ) & L_mask) | ( vlo >> R_shift );
 | 
						|
            vhi = ( vhi >> R_shift );
 | 
						|
        }
 | 
						|
        else if( shift < 64 )
 | 
						|
        {
 | 
						|
            L_shift = 64 - shift;
 | 
						|
            R_shift = shift - 32;
 | 
						|
            L_mask = ~( 0xffffffffu >> R_shift );
 | 
						|
            remain = ( ( vhi << L_shift ) & L_mask ) | ( vlo != 0 );
 | 
						|
            vlo = ( vhi >> R_shift );
 | 
						|
            vhi = 0;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            remain = ( vhi | vlo ) != 0;
 | 
						|
            vhi = vlo = 0;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if( rsign == zsign )
 | 
						|
    {
 | 
						|
        temp = ulo;
 | 
						|
        ulo += vlo;
 | 
						|
        carry = ( ulo < temp );
 | 
						|
        uhi += ( vhi + carry );
 | 
						|
 | 
						|
        if ( uhi & 0x01000000 )
 | 
						|
        {
 | 
						|
            remain = ( uhi << 31 ) | ( ( ulo | remain ) != 0 );
 | 
						|
            ur = ( uhi >> 1 ) & 0x007fffff;
 | 
						|
            rexp += 1;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            remain = ulo | ( remain != 0 );
 | 
						|
            ur = (uhi & 0x007fffff);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        remain = ( 0 - remain );
 | 
						|
        borrow = ( remain != 0 );
 | 
						|
        temp = ulo;
 | 
						|
        ulo -= borrow;
 | 
						|
        borrow = ( ulo > temp );
 | 
						|
        uhi -= borrow;
 | 
						|
        temp = ulo;
 | 
						|
        ulo -= vlo;
 | 
						|
        borrow = ( ulo > temp );
 | 
						|
        uhi -= borrow;
 | 
						|
        uhi -= vhi;
 | 
						|
 | 
						|
        if( uhi != 0 )
 | 
						|
        {
 | 
						|
            temp = ( uhi << 8 );
 | 
						|
            shift = 0;
 | 
						|
        }
 | 
						|
        else if( ulo != 0 )
 | 
						|
        {
 | 
						|
            temp = ulo;
 | 
						|
            shift = 24;
 | 
						|
        }
 | 
						|
        else if( remain != 0 )
 | 
						|
        {
 | 
						|
            temp = remain;
 | 
						|
            shift = 24 + 32;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            return FLOAT32( (DWORD)0x00000000 ).value.f;
 | 
						|
        }
 | 
						|
 | 
						|
        shift += clz( temp );
 | 
						|
 | 
						|
        if( shift < 32 )
 | 
						|
        {
 | 
						|
            L_shift = shift - 0;
 | 
						|
            R_shift = 32 - shift;
 | 
						|
            R_mask = ( (DWORD) 1 << L_shift ) - 1;
 | 
						|
            ur = ( ( uhi << L_shift ) | (( ulo >> R_shift ) & R_mask ) ) & 0x007fffff;
 | 
						|
            remain = ( ulo << L_shift ) | ( remain != 0 );
 | 
						|
        }
 | 
						|
        else if( shift < 64 )
 | 
						|
        {
 | 
						|
            L_shift = shift - 32;
 | 
						|
            R_shift = 64 - shift;
 | 
						|
            R_mask = ( (DWORD) 1 << L_shift ) - 1;
 | 
						|
            ur = ( ( ulo << L_shift ) | ( ( remain >> R_shift ) & R_mask ) ) & 0x007fffff;
 | 
						|
            remain = ( remain << L_shift );
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            L_shift = shift - 64;
 | 
						|
            ur = ( remain << L_shift ) & 0x007fffff;
 | 
						|
            remain = 0;
 | 
						|
        }
 | 
						|
        rexp -= shift;
 | 
						|
    }
 | 
						|
 | 
						|
    if( (DWORD) rexp - 1 >= 0xFF - 1 )
 | 
						|
    {
 | 
						|
        if( rexp >= 0xFF )
 | 
						|
        {
 | 
						|
            rsign = ( (DWORD)rsign >> 31 );
 | 
						|
            if( rsign )
 | 
						|
            {
 | 
						|
                resultf = tv = FLOAT32(_own_large_value_32[(1)]).value.f * FLOAT32(_own_large_value_32[0]).value.f;
 | 
						|
            }
 | 
						|
            else
 | 
						|
            {
 | 
						|
                resultf = tv = FLOAT32(_own_large_value_32[(0)]).value.f * FLOAT32(_own_large_value_32[0]).value.f;
 | 
						|
            }
 | 
						|
 | 
						|
            return resultf;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            //enters here only for rexp = 0
 | 
						|
            L_shift = 31;
 | 
						|
            R_shift = 1;
 | 
						|
            L_mask = ~(0xffffffffu >>  R_shift );
 | 
						|
            ur |= 0x00800000;
 | 
						|
            remain = ( ( ur << L_shift ) & L_mask ) | ( remain != 0 );
 | 
						|
            ur = ( ur >> R_shift );
 | 
						|
 | 
						|
        }
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        ur |= ( rexp << 23 );
 | 
						|
    }
 | 
						|
 | 
						|
    if( remain != 0 )
 | 
						|
    {
 | 
						|
        tv = ( ( (float *)_ones)[0] + ( (float *)_own_small_value_32)[0] );
 | 
						|
        
 | 
						|
        int cond3, cond4, cond5, cond6;
 | 
						|
 | 
						|
        switch( rm )
 | 
						|
        {
 | 
						|
        case ( 0 << 10 ):
 | 
						|
            cond3 = ( ( remain & 0x80000000 ) != 0 ) & ( ( ( ur & 1 ) != 0 ) | 
 | 
						|
                    ( ( remain & ~0x80000000 ) != 0 ) );
 | 
						|
            if( cond3 != 0 )
 | 
						|
            {
 | 
						|
                ur++;
 | 
						|
                if( ur >= 0x7f800000 )
 | 
						|
                {
 | 
						|
                    rsign = ( (unsigned)rsign >> 31 );
 | 
						|
                    if( rsign )
 | 
						|
                    {
 | 
						|
                        resultf = tv =
 | 
						|
                            ( ( (float *) _own_large_value_32)[1] *
 | 
						|
                            ( (float *) _own_large_value_32)[0] );
 | 
						|
                    }
 | 
						|
                    else
 | 
						|
                    {
 | 
						|
                        resultf = tv =
 | 
						|
                            (((float *) _own_large_value_32)[(0)] *
 | 
						|
                            ((float *) _own_large_value_32)[0]);
 | 
						|
                    }
 | 
						|
                    
 | 
						|
                    return resultf;
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
        case ( 3 << 10 ):
 | 
						|
            cond4 = ( ur < 0x00800000 ) | 
 | 
						|
                    ( (ur == 0x00800000 ) & ( remain == 0x80000000 ) );
 | 
						|
 | 
						|
            if( cond4 != 0 )
 | 
						|
            {
 | 
						|
                tv = ( ( ( float *)_own_small_value_32)[0] * 
 | 
						|
                     ( ( float *)_own_small_value_32)[0] );
 | 
						|
            }
 | 
						|
            break;
 | 
						|
 | 
						|
        case ( 2 << 10 ):
 | 
						|
            cond5 = ( rsign & ( ur < 0x00800000 ) ) | 
 | 
						|
                    ( (!rsign) & ( (ur < 0x007fffff ) | ( ( ur == 0x007fffff ) & ( remain < 0x80000000 ) ) ) );
 | 
						|
 | 
						|
            if( cond5 != 0 )
 | 
						|
            {
 | 
						|
                tv = ( ( (float *)_own_small_value_32)[0] * 
 | 
						|
                       ( (float *)_own_small_value_32)[0] );
 | 
						|
            }
 | 
						|
 | 
						|
            if( !rsign )
 | 
						|
            {
 | 
						|
                ur++;
 | 
						|
                if( ur >= 0x7f800000 )
 | 
						|
                {
 | 
						|
                    //rsign = ((unsigned) rsign >> 31);
 | 
						|
                    resultf = tv = ( ( (float *)_own_large_value_32)[0] * 
 | 
						|
                                     ( (float *)_own_large_value_32)[0] );
 | 
						|
                    return resultf;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            break;
 | 
						|
 | 
						|
        case ( 1 << 10 ):
 | 
						|
            cond6 = ( !rsign & ( ur < 0x00800000 ) ) | 
 | 
						|
                    ( rsign & ( (ur < 0x007fffff ) | ( ( ur == 0x007fffff ) & ( remain < 0x80000000 ) ) ) );
 | 
						|
 | 
						|
            if( cond6 != 0 )
 | 
						|
            {
 | 
						|
                tv = ( ( (float *)_own_small_value_32)[0] * 
 | 
						|
                       ( (float *)_own_small_value_32)[0] );
 | 
						|
            }
 | 
						|
 | 
						|
            if( rsign )
 | 
						|
            {
 | 
						|
                ur++;
 | 
						|
                if (ur >= 0x7f800000 )
 | 
						|
                {
 | 
						|
                    //rsign = ((unsigned) rsign >> 31);
 | 
						|
                    resultf = tv =
 | 
						|
                        ( ( (float *)_own_large_value_32)[1] *
 | 
						|
                          ( (float *)_own_large_value_32)[0] );
 | 
						|
 | 
						|
                    return resultf;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            break;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    resultf = FLOAT32( (DWORD) (rsign | ur ) ).value.f;
 | 
						|
 | 
						|
    return resultf;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32SafeRSQRT
 | 
						|
 | 
						|
Description:
 | 
						|
    Performs correctly rounded single precision reciprocal square root 
 | 
						|
    operation taking care of floating point specials in software.
 | 
						|
\*****************************************************************************/
 | 
						|
inline float Float32SafeRSQRT( const float arg, bool denormRetain )
 | 
						|
{
 | 
						|
    static const DWORD RESULT[NUM_FPU_FLOAT_CLASSES] =
 | 
						|
    {
 | 
						|
        FPU_FLOAT32_NAN,       // rsqrt( -inf )    = NaN
 | 
						|
        FPU_FLOAT32_NAN,       // rsqrt( -X )      = NaN  //but to be really OK,we should try to maintain the NaN payload
 | 
						|
        FPU_FLOAT32_NAN,       // rsqrt( -denorm ) = NaN  //but to be really OK,we should try to maintain the NaN payload
 | 
						|
        FPU_FLOAT32_NEG_INF,   // rsqrt( -0 )      = -inf
 | 
						|
        FPU_FLOAT32_POS_INF,   // rsqrt( +0 )      = +inf
 | 
						|
        FPU_FLOAT32_COMPUTE,   // rsqrt( +denorm)  = computed value
 | 
						|
        FPU_FLOAT32_COMPUTE,   // rsqrt( +X )      == computed value
 | 
						|
        FPU_FLOAT32_POS_ZERO,  // rsqrt( +inf )    == +0.0
 | 
						|
        FPU_FLOAT32_NAN        // rsqrt( NaN )     == NaN
 | 
						|
    };
 | 
						|
 | 
						|
    FPU_FLOAT_CLASS t1 = Float32GetClass( arg );
 | 
						|
 | 
						|
    FLOAT32 f32;
 | 
						|
    f32.value.u = RESULT[ t1 ];
 | 
						|
 | 
						|
    bool computeDenorms = denormRetain &&  Float32IsDenorm( arg );
 | 
						|
 | 
						|
    if ( !computeDenorms && t1 == FPU_FLOAT_CLASS_NEG_DENORM ) 
 | 
						|
    {
 | 
						|
        f32.value.u = FPU_FLOAT32_NEG_INF;
 | 
						|
    }
 | 
						|
    if ( !computeDenorms && t1 == FPU_FLOAT_CLASS_POS_DENORM )
 | 
						|
    {
 | 
						|
        f32.value.u = FPU_FLOAT32_POS_INF;
 | 
						|
    }
 | 
						|
 | 
						|
    if( ( f32.value.u == FPU_FLOAT32_COMPUTE ) || ( computeDenorms ) )
 | 
						|
    {
 | 
						|
        double darg = arg;
 | 
						|
        double s = sqrt(darg);      //double-precision square root
 | 
						|
        double result = 1.0 / s;    //double-precision division
 | 
						|
        return static_cast<float>(result);     //back to floats
 | 
						|
    }
 | 
						|
 | 
						|
    return f32.value.f;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32SafeDivide
 | 
						|
 | 
						|
Description:
 | 
						|
    Performs division taking care of floating point specials in software.
 | 
						|
\*****************************************************************************/
 | 
						|
inline float Float32SafeDivide( const float arg1, const float arg2, const bool denormRetain )
 | 
						|
{
 | 
						|
    // Table for handling IEEE 754 specials in division
 | 
						|
    //
 | 
						|
    //  a / b       -Inf    -X      -0      +0      +X      +Inf    NaN
 | 
						|
    //
 | 
						|
    //  -Inf        NaN     +Inf    +Inf    -Inf    -Inf    NaN     NaN
 | 
						|
    //  -X          +0      <div>   +Inf    -Inf    <div>   -0      NaN
 | 
						|
    //  -0          +0      +0      NaN     NaN     -0      -0      NaN
 | 
						|
    //  +0          -0      -0      NaN     NaN     +0      +0      NaN
 | 
						|
    //  +X          -0      <div>   -Inf    +Inf    <div>   +0      NaN
 | 
						|
    //  +Inf        NaN     -Inf    -Inf    +Inf    +Inf    NaN     NaN
 | 
						|
    //  NaN         NaN     NaN     NaN     NaN     NaN     NaN     NaN
 | 
						|
    //
 | 
						|
 | 
						|
    static const DWORD RESULT[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
 | 
						|
    //    -Inf                  -X                    -denorm               -0                    +0                    +denorm               +X                    +Inf                  NaN
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // -Inf
 | 
						|
        { FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN      },  // -X
 | 
						|
        { FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN      },  // -denorm
 | 
						|
        { FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN      },  // -0
 | 
						|
        { FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN      },  // +0
 | 
						|
        { FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN      },  // +denorm
 | 
						|
        { FPU_FLOAT32_NEG_ZERO, FPU_FLOAT32_COMPUTE , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_COMPUTE , FPU_FLOAT32_POS_ZERO, FPU_FLOAT32_NAN      },  // +X
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_NEG_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_POS_INF , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // +Inf
 | 
						|
        { FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN     , FPU_FLOAT32_NAN      },  // NaN
 | 
						|
    };
 | 
						|
 | 
						|
    FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
 | 
						|
    FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
 | 
						|
 | 
						|
    FLOAT32 f32;
 | 
						|
    f32.value.u = RESULT[ t1 ][ t2 ];
 | 
						|
 | 
						|
    bool computeDenorms = ( denormRetain && ( Float32IsDenorm( arg1 ) || Float32IsDenorm( arg2 ) ) );
 | 
						|
 | 
						|
    if( ( f32.value.u == FPU_FLOAT32_COMPUTE ) || ( computeDenorms ) )
 | 
						|
    {
 | 
						|
        return arg1 / arg2;
 | 
						|
    }
 | 
						|
 | 
						|
    return f32.value.f;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Signed32SafeDivideQuotient
 | 
						|
 | 
						|
Description:
 | 
						|
    Computes src0 divided by src1
 | 
						|
    Table for handling signed divide quotient and remainder:
 | 
						|
        IDIV            SRC0    
 | 
						|
            SRC1        +INT            -INT             0
 | 
						|
            +INT        +INT            -INT             0
 | 
						|
            -INT        -INT            +INT             0
 | 
						|
              0     Q:0x7FFFFFFF    Q: 0x80000000   Q:0x7FFFFFFF
 | 
						|
                    R:0x7FFFFFFF    R: 0x80000000   R:0x7FFFFFFF
 | 
						|
\*****************************************************************************/
 | 
						|
inline signed long Signed32SafeDivideQuotient( 
 | 
						|
    const signed long src0,
 | 
						|
    const signed long src1 )
 | 
						|
{
 | 
						|
    if( !src1 )
 | 
						|
    {
 | 
						|
        if( src0 < 0 )
 | 
						|
        {
 | 
						|
            return LONG_MIN;
 | 
						|
        }
 | 
						|
        return LONG_MAX;
 | 
						|
    }
 | 
						|
 | 
						|
    return src0 / src1;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Signed32SafeDivideRemainder
 | 
						|
 | 
						|
Description:
 | 
						|
    Computes remainder of src0 divided by src1
 | 
						|
\*****************************************************************************/
 | 
						|
inline signed long Signed32SafeDivideRemainder( 
 | 
						|
    const signed long src0,
 | 
						|
    const signed long src1 )
 | 
						|
{
 | 
						|
    if( !src1 )
 | 
						|
    {
 | 
						|
        if( src0 < 0 )
 | 
						|
        {
 | 
						|
            return LONG_MIN;
 | 
						|
        }
 | 
						|
        return LONG_MAX;
 | 
						|
    }
 | 
						|
 | 
						|
    return src0 % src1;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Unsigned32SafeDivideQuotient
 | 
						|
 | 
						|
Description:
 | 
						|
    Computes src0 divided by src1
 | 
						|
       Table for handling unsigned divide quotient and remainder 
 | 
						|
          UDIV          SRC0    
 | 
						|
              SRC1      <>0             0
 | 
						|
              <>0       UINT            0
 | 
						|
                0   Q:0xFFFFFFFF    Q:0xFFFFFFFF
 | 
						|
                    R:0xFFFFFFFF    R:0xFFFFFFFF
 | 
						|
\*****************************************************************************/
 | 
						|
inline DWORD Unsigned32SafeDivideQuotient( 
 | 
						|
    const DWORD src0,
 | 
						|
    const DWORD src1 )
 | 
						|
{
 | 
						|
    if( !src1 )
 | 
						|
    {
 | 
						|
        return UINT_MAX;
 | 
						|
    }
 | 
						|
 | 
						|
    return src0 / src1;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Unsigned32SafeDivideRemainder
 | 
						|
 | 
						|
Description:
 | 
						|
    Computes remainder of src0 divided by src1
 | 
						|
\*****************************************************************************/
 | 
						|
inline DWORD Unsigned32SafeDivideRemainder( 
 | 
						|
    const DWORD src0,
 | 
						|
    const DWORD src1 )
 | 
						|
{
 | 
						|
    if( !src1 )
 | 
						|
    {
 | 
						|
        return UINT_MAX;
 | 
						|
    }
 | 
						|
 | 
						|
    return src0 % src1;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    F32ToF16_d
 | 
						|
 | 
						|
Description:
 | 
						|
    Float32 to float16 conversion based on "Fast Half Float Conversions" 
 | 
						|
    by Jeroen van der Zijp
 | 
						|
 | 
						|
Input: 
 | 
						|
    32-bit DWORD represantation of float value
 | 
						|
Output:
 | 
						|
    16-bit DWORD represantation of float value
 | 
						|
 | 
						|
\*****************************************************************************/
 | 
						|
inline WORD F32ToF16_d( DWORD arg )
 | 
						|
{
 | 
						|
    static const WORD btbl[512] = {
 | 
						|
        0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 | 
						|
        0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 | 
						|
        0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 | 
						|
        0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 | 
						|
        0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 | 
						|
        0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
 | 
						|
        0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100,
 | 
						|
        0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,
 | 
						|
        0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7c00,
 | 
						|
        0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
 | 
						|
        0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
 | 
						|
        0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
 | 
						|
        0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
 | 
						|
        0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
 | 
						|
        0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
 | 
						|
        0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,0x7c00,
 | 
						|
        0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
 | 
						|
        0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
 | 
						|
        0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
 | 
						|
        0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
 | 
						|
        0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
 | 
						|
        0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,
 | 
						|
        0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100,
 | 
						|
        0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00,
 | 
						|
        0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfc00,
 | 
						|
        0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
 | 
						|
        0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
 | 
						|
        0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
 | 
						|
        0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
 | 
						|
        0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
 | 
						|
        0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,
 | 
						|
        0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00,0xfc00
 | 
						|
    };
 | 
						|
    static const unsigned char stbl[512] = {
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
 | 
						|
        0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
 | 
						|
        0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x0d,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f,
 | 
						|
        0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,
 | 
						|
        0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
 | 
						|
        0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x0d
 | 
						|
    };
 | 
						|
    DWORD sexp = (arg>>23)&0x1ff;
 | 
						|
    return (WORD)(btbl[ sexp ]+( (arg&0x007fffff)>>stbl[ sexp ] ));
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
 | 
						|
Inline Function:
 | 
						|
    F32ToF16_f
 | 
						|
 | 
						|
Description:
 | 
						|
    Float32 to float16 conversion based on "Fast Half Float Conversions" 
 | 
						|
    by Jeroen van der Zijp
 | 
						|
 | 
						|
Input: 
 | 
						|
    32-bit float value
 | 
						|
Output:
 | 
						|
    16-bit WORD represantation of float value
 | 
						|
 | 
						|
\*****************************************************************************/
 | 
						|
inline WORD F32ToF16_f( float arg )
 | 
						|
{
 | 
						|
    return F32ToF16_d( *(DWORD *)&arg );
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
 | 
						|
Inline Function:
 | 
						|
    F16ToF32
 | 
						|
 | 
						|
Description:
 | 
						|
    Float16 to float32 conversion
 | 
						|
 | 
						|
Input: 
 | 
						|
    16-bit WORD representation of float16 value
 | 
						|
Output:
 | 
						|
    32-bit DWORD represantation of float32 value
 | 
						|
 | 
						|
\*****************************************************************************/
 | 
						|
static inline DWORD F16ToF32( WORD v )
 | 
						|
{
 | 
						|
    unsigned long index;
 | 
						|
    return 
 | 
						|
        // is exponent!=0 ?
 | 
						|
        v & 0x7C00
 | 
						|
            // is exponent==max ?
 | 
						|
            ? ( v & 0x7C00 ) == 0x7C00
 | 
						|
                // is mantissa!=0 ?
 | 
						|
                ? v & 0x03FF
 | 
						|
                    // convert NaN
 | 
						|
                    ? ( ( v << 13 ) + 0x70000000 ) | 0x7f800000
 | 
						|
                    // convert infinities
 | 
						|
                    : ( v << 16 ) | 0x7f800000
 | 
						|
                // convert normalized values
 | 
						|
                : ( ( ( v << 13 ) + 0x70000000 ) & ~0x70000000 ) + 0x38000000
 | 
						|
            // is mantissa non-zero ?
 | 
						|
            : v & 0x03FF
 | 
						|
                // convert denormalized values
 | 
						|
                ? index=bsr( v & 0x03FF ), ( ( ( ( v << 16 ) & 0x80000000 ) | ( ( v << 13 ) & 0xF800000 ) ) + 0x33800000 + ( index << 23 ) ) | ( ( ( v & 0x03FF ) << ( 23-index ) ) & ~0x800000 )
 | 
						|
                // convert zeros
 | 
						|
                : v << 16;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32SafeMax
 | 
						|
 | 
						|
Description:
 | 
						|
    MinMax of Floating Point Numbers.
 | 
						|
 | 
						|
Input:
 | 
						|
    arg1
 | 
						|
    arg2
 | 
						|
    isGen7
 | 
						|
 | 
						|
Output:
 | 
						|
    max( arg1, arg2 )
 | 
						|
 | 
						|
\*****************************************************************************/
 | 
						|
inline float Float32SafeMax( const float arg1, const float arg2, bool isGen7 )
 | 
						|
{
 | 
						|
    // Values of following arrays corresponds to results of sel.l instructions.
 | 
						|
 | 
						|
    static const bool RESULT_preGen7[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
 | 
						|
    //    -Inf   -X      -denorm   -0      +0   +denorm   +X      +Inf    NaN
 | 
						|
        { true  , false , false , false , false , false , false , false , true      },  // -Inf
 | 
						|
        { true  , false , false , false , false , false , false , false , true      },  // -X
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // -denorm
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // -0
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // +0
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // +denorm
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // +X
 | 
						|
        { true  , true  , true  , true  , true  , true  , true  , true  , true      },  // +Inf
 | 
						|
        { false , false , false , false , false , false , false , false , false     },  // NaN
 | 
						|
    };
 | 
						|
 | 
						|
    static const bool RESULT_Gen7[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
 | 
						|
    //    -Inf   -X      -denorm   -0      +0   +denorm   +X      +Inf    NaN
 | 
						|
        { true  , false , false , false , false , false , false , false , true      },  // -Inf
 | 
						|
        { true  , false , false , false , false , false , false , false , true      },  // -X
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // -denorm
 | 
						|
        { true  , true  , true  , true  , false , true  , false , false , true      },  // -0
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // +0
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // +denorm
 | 
						|
        { true  , true  , true  , true  , true  , true  , false , false , true      },  // +X
 | 
						|
        { true  , true  , true  , true  , true  , true  , true  , true  , true      },  // +Inf
 | 
						|
        { false , false , false , false , false , false , false , false , false     },  // NaN
 | 
						|
    };
 | 
						|
 | 
						|
    const FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
 | 
						|
    const FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
 | 
						|
 | 
						|
    if( ( t1 == FPU_FLOAT_CLASS_NEG_FINITE || t1 == FPU_FLOAT_CLASS_POS_FINITE ) &&
 | 
						|
        ( t2 == FPU_FLOAT_CLASS_NEG_FINITE || t2 == FPU_FLOAT_CLASS_POS_FINITE ) )
 | 
						|
    {
 | 
						|
        return ( arg1 >= arg2 ) ? arg1 : arg2;
 | 
						|
    }
 | 
						|
 | 
						|
    FLOAT32 f32;
 | 
						|
 | 
						|
    if( isGen7 )
 | 
						|
    {
 | 
						|
        f32.value.f = ( RESULT_Gen7[t1][t2] ) ? arg1 : arg2;
 | 
						|
    }
 | 
						|
    else 
 | 
						|
    {
 | 
						|
        f32.value.f = ( RESULT_preGen7[t1][t2] ) ? arg1 : arg2;
 | 
						|
    }
 | 
						|
 | 
						|
    return f32.value.f;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    Float32SafeMin
 | 
						|
 | 
						|
Description:
 | 
						|
    MinMax of Floating Point Numbers.
 | 
						|
 | 
						|
Input:
 | 
						|
    arg1
 | 
						|
    arg2
 | 
						|
    isGen7
 | 
						|
 | 
						|
Output:
 | 
						|
    max( arg1, arg2 )
 | 
						|
 | 
						|
\*****************************************************************************/
 | 
						|
inline float Float32SafeMin( const float arg1, const float arg2, bool isGen7 )
 | 
						|
{
 | 
						|
    // Values of following arrays corresponds to results of sel.ge instruction.
 | 
						|
 | 
						|
    static const bool RESULT_preGen7[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
 | 
						|
    //    -Inf   -X      -denorm   -0      +0   +denorm   +X      +Inf    NaN
 | 
						|
        { false , true  , true  , true  , true  , true  , true  , true  , true      },  // -Inf
 | 
						|
        { false , false , true  , true  , true  , true  , true  , true  , true      },  // -X
 | 
						|
        { false , false , false , false , false , false , true  , true  , true      },  // -denorm
 | 
						|
        { false , false , false , false , false , false , true  , true  , true      },  // -0
 | 
						|
        { false , false , false , false , false , false , true  , true  , true      },  // +0
 | 
						|
        { false , false , false , false , false , false , true  , true  , true      },  // +denorm
 | 
						|
        { false , false , false , false , false , false , false , true  , true      },  // +X
 | 
						|
        { false , false , false , false , false , false , false , false , true      },  // +Inf
 | 
						|
        { false , false , false , false , false , false , false , false , false     },  // NaN
 | 
						|
    };
 | 
						|
 | 
						|
    static const bool RESULT_Gen7[NUM_FPU_FLOAT_CLASSES][NUM_FPU_FLOAT_CLASSES] = {
 | 
						|
    //    -Inf   -X      -denorm   -0      +0   +denorm   +X      +Inf    NaN
 | 
						|
        { false , true  , true  , true  , true  , true  , true  , true  , true      },  // -Inf
 | 
						|
        { false , false , true  , true  , true  , true  , true  , true  , true      },  // -X
 | 
						|
        { false , false , false , false , false , false , true  , true  , true      },  // -denorm
 | 
						|
        { false , false , false , false , true  , false , true  , true  , true      },  // -0
 | 
						|
        { false , false , false , false , false , false , true  , true  , true      },  // +0
 | 
						|
        { false , false , false , false , false , false , true  , true  , true      },  // +denorm
 | 
						|
        { false , false , false , false , false , false , false , true  , true      },  // +X
 | 
						|
        { false , false , false , false , false , false , false , false , true      },  // +Inf
 | 
						|
        { false , false , false , false , false , false , false , false , false     },  // NaN
 | 
						|
    };
 | 
						|
 | 
						|
    const FPU_FLOAT_CLASS t1 = Float32GetClass( arg1 );
 | 
						|
    const FPU_FLOAT_CLASS t2 = Float32GetClass( arg2 );
 | 
						|
 | 
						|
    if( ( t1 == FPU_FLOAT_CLASS_NEG_FINITE || t1 == FPU_FLOAT_CLASS_POS_FINITE ) &&
 | 
						|
        ( t2 == FPU_FLOAT_CLASS_NEG_FINITE || t2 == FPU_FLOAT_CLASS_POS_FINITE ) )
 | 
						|
    {
 | 
						|
        return ( arg1 < arg2 ) ? arg1 : arg2;
 | 
						|
    }
 | 
						|
 | 
						|
    FLOAT32 f32;
 | 
						|
 | 
						|
    if( isGen7 )
 | 
						|
    {
 | 
						|
        f32.value.f = ( RESULT_Gen7[t1][t2] ) ? arg1 : arg2;
 | 
						|
    }
 | 
						|
    else 
 | 
						|
    {
 | 
						|
        f32.value.f = ( RESULT_preGen7[t1][t2] ) ? arg1 : arg2;
 | 
						|
    }
 | 
						|
 | 
						|
    return f32.value.f;
 | 
						|
}
 | 
						|
 | 
						|
/*****************************************************************************\
 | 
						|
Inline Function:
 | 
						|
    FloatSaturate
 | 
						|
 | 
						|
Description:
 | 
						|
 | 
						|
    For a floating-point destination type, the saturation target range is [0.0,
 | 
						|
    1.0]. For a floating-point NaN, there is no "closest value"; any NaN
 | 
						|
    saturates to 0.0. (...) Any floating-point number greater than 1.0,
 | 
						|
    including +INF, saturates to 1.0. Any negative floating-point number,
 | 
						|
    including -INF, saturates to 0.0. Any floating-point number in the range 0.0
 | 
						|
    to 1.0 is not changed by saturation.
 | 
						|
 | 
						|
    -0.0 is changed to +0.0.
 | 
						|
 | 
						|
Input:
 | 
						|
    const float f
 | 
						|
 | 
						|
Output:
 | 
						|
    float
 | 
						|
 | 
						|
\*****************************************************************************/
 | 
						|
inline float FloatSaturate( const float f )
 | 
						|
{
 | 
						|
    switch( Float32GetClass( f ) )
 | 
						|
    {
 | 
						|
    case FPU_FLOAT_CLASS_NEG_INF:
 | 
						|
    case FPU_FLOAT_CLASS_NEG_FINITE:
 | 
						|
    case FPU_FLOAT_CLASS_NEG_DENORM:
 | 
						|
    case FPU_FLOAT_CLASS_NEG_ZERO:
 | 
						|
    case FPU_FLOAT_CLASS_POS_ZERO:
 | 
						|
    case FPU_FLOAT_CLASS_NAN:
 | 
						|
        return 0.f;
 | 
						|
    case FPU_FLOAT_CLASS_POS_DENORM:
 | 
						|
        return f;
 | 
						|
    case FPU_FLOAT_CLASS_POS_FINITE:
 | 
						|
        return ( f <= 1.f ) ? f : 1.f;
 | 
						|
    case FPU_FLOAT_CLASS_POS_INF:
 | 
						|
        return 1.f;
 | 
						|
    default:
 | 
						|
        ASSERT( 0 );
 | 
						|
        return 0.f;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
} // namespace iSTD
 |