Files

493 lines
13 KiB
C++

/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#pragma once
#include "types.h"
#include <math.h> // for powf()
namespace iSTD
{
/*****************************************************************************\
Inline Function:
FloatToLong
Description:
converts a float to long using SSE, avoiding FP stack stall
\*****************************************************************************/
__forceinline long FloatToLong( const float value )
{
#if defined(_WIN32) && defined(_MSC_VER)
return _mm_cvtsi128_si32( _mm_cvttps_epi32( _mm_set_ps1( value ) ) );
#else
return (long)value;
#endif
}
/*****************************************************************************\
Inline Function:
Ceiling
Description:
Rounds a float up to the next integer value
\*****************************************************************************/
__forceinline long Ceiling( const float value )
{
long roundVal = FloatToLong( value );
if( ( value - roundVal ) != 0 )
{
return roundVal + 1;
}
return roundVal;
}
/*****************************************************************************\
Inline Function:
Scale
Description:
Scales a [0.0,1.0] float to [0,max] integer
\*****************************************************************************/
__forceinline DWORD Scale( float value, DWORD max )
{
ASSERT( CheckLimits( value, 0.0f, 1.0f ) );
return ( (DWORD)FloatToLong( (value) * (float)(max) ) );
}
/*****************************************************************************\
Inline Function:
Normalize
Description:
Normalize the floating-point value with the range [min,max] to [0.0f,1.0f]
\*****************************************************************************/
__forceinline float Normalize( float value, float min, float max )
{
return ( value - min ) / ( max - min );
}
/*****************************************************************************\
Inline Function:
Lerp
Description:
Linear-Interpolation
\*****************************************************************************/
__forceinline float Lerp( float x, float y, float z )
{
return ( ( x * ( 1 - z ) ) + ( y * z ) );
}
/*****************************************************************************\
Inline Function:
FloatToFixed
Description:
Converts a floating-point number to the specified fixed-point number
\*****************************************************************************/
template <class Type>
__forceinline Type FloatToFixed(
float value,
const int whole,
const int fractional,
const int round = 0 )
{
ASSERT( fractional + whole <= 32 );
// Optional floating point rounding precision
value += ( round != 0 )
? 0.5f * ( 1.0f / (float)( 1 << round ) )
: 0;
Type fixed = (Type)FloatToLong( value * (float)( 1 << fractional ) );
#ifdef _DEBUG
DWORD mask = 0xffffffff << ( whole + fractional );
ASSERT(
(( fixed >= 0 ) && (( fixed & mask ) == 0 )) ||
(( fixed < 0 ) && (( fixed & mask ) == mask )) );
#endif
return fixed;
}
/*****************************************************************************\
Inline Function:
FixedToFloat
Description:
Converts the specified fixed-point number to a floating-point number
\*****************************************************************************/
template <class Type>
__forceinline float FixedToFloat(
Type fixed,
const int whole,
const int fractional )
{
ASSERT( fractional + whole <= 32 );
//check sign bit if negative
if (fixed >> (fractional + whole) != 0)
{
// pad the sign from left to 32 bit
fixed |= (0xffffffff << (fractional + whole));
}
float value = (float)fixed / (float)( 1 << fractional );
return value;
}
/*****************************************************************************\
Inline Function:
Float32ToSnorm
Description:
Converts a 32bit float to a bitcount size SNORM value
\*****************************************************************************/
template< DWORD bitcount >
inline DWORD Float32ToSnorm( const float value )
{
ASSERT( bitcount <= 32 );
long snormValue = 0;
FLOAT32 f32;
f32.value.f = value;
// NaN -> 0
if( f32.exponent == BITMASK( 8 ) &&
f32.fraction != 0 )
{
snormValue = 0;
}
else
{
const bool isPosInfinity =
f32.exponent == BITMASK( 8 ) &&
f32.fraction == 0 &&
f32.sign == 0;
const bool isNegInfinity =
f32.exponent == BITMASK( 8 ) &&
f32.fraction == 0 &&
f32.sign == 1;
// Clamp > 1.0f || +Inf -> 1.0f
// Clamp < -1.0f || -Inf -> -1.0f
if( f32.value.f > 1.0f || isPosInfinity )
{
f32.value.f = 1.0f;
}
else if ( f32.value.f < -1.0f || isNegInfinity )
{
f32.value.f = -1.0f;
}
// Convert float scale to integer scale
f32.value.f *= (float)( ( 0x1 << ( bitcount - 1 ) ) - 1 );
// Convert to integer by rounding and dropping
// the fractional part
f32.value.f = ( f32.value.f >= 0 )
? f32.value.f + 0.5f
: f32.value.f - 0.5f;
snormValue = FloatToLong( f32.value.f );
}
return (DWORD)snormValue;
}
/*****************************************************************************\
Inline Function:
Float32ToSnormSM
Description:
Converts a 32bit float to a bitcount size SNORM value in Sign Magnitude format (SM)
\*****************************************************************************/
template< DWORD bitcount >
inline DWORD Float32ToSnormSM( const float value )
{
ASSERT( bitcount <= 32 );
long snormValue = 0;
FLOAT32 f32;
f32.value.f = value;
// NaN -> 0
if( f32.exponent == BITMASK( 8 ) &&
f32.fraction != 0 )
{
snormValue = 0;
}
else
{
const bool isPosInfinity =
f32.exponent == BITMASK( 8 ) &&
f32.fraction == 0 &&
f32.sign == 0;
const bool isNegInfinity =
f32.exponent == BITMASK( 8 ) &&
f32.fraction == 0 &&
f32.sign == 1;
// Clamp > 1.0f || +Inf -> 1.0f
// Clamp < -1.0f || -Inf -> -1.0f
if( f32.value.f > 1.0f || isPosInfinity )
{
f32.value.f = 1.0f;
}
else if ( f32.value.f < -1.0f || isNegInfinity )
{
f32.value.f = -1.0f;
}
// Convert float scale to integer scale
f32.value.f *= (float)( ( 0x1 << ( bitcount - 1 ) ) - 1 );
// Convert to integer by rounding and dropping
// the fractional part
f32.value.f = ( f32.value.f >= 0 )
? f32.value.f + 0.5f
: f32.value.f - 0.5f;
snormValue = FloatToLong( f32.value.f );
if(snormValue < 0)
{
snormValue *= -1;
snormValue = snormValue | (0x1 << ( bitcount - 1 ) );
}
}
return (DWORD)snormValue;
}
/*****************************************************************************\
Inline Function:
LinearToSRGB
Description:
Converts a 32bit float in linear space to SRGB space
\*****************************************************************************/
inline float LinearToSRGB( const float value )
{
float srgbValue = value;
ASSERT( value >= 0.0f && value <= 1.0f );
if (value < 0.0f)
{
srgbValue = 0.0f;
}
else if( value < 0.0031308f )
{
srgbValue = 12.92f * value;
}
else if (value < 1.0f)
{
srgbValue = ( 1.055f * powf( value, (1.0f/2.4f) ) ) - 0.055f;
}
else
{
srgbValue = 1.0f;
}
ASSERT( srgbValue >= 0.0f && srgbValue <= 1.0f );
return srgbValue;
}
/*****************************************************************************\
Inline Function:
SRGBToLinear
Description:
Converts a 32bit float in SRGB space to linear space
\*****************************************************************************/
inline float SRGBToLinear( const float value )
{
float linearValue = value;
ASSERT( value >= 0.0f && value <= 1.0f );
if( linearValue <= 0.04045f )
{
linearValue = value / 12.92f;
}
else
{
linearValue = powf( ( ( value + 0.055f ) / 1.055f ), 2.4f );
}
ASSERT( linearValue >= 0.0f && linearValue <= 1.0f );
return linearValue;
}
/*****************************************************************************\
Inline Function:
Float32ToUnorm
Description:
Converts a 32bit float to a bitcount size UNORM value
\*****************************************************************************/
template< DWORD bitcount >
inline DWORD Float32ToUnorm( const float value )
{
ASSERT( bitcount <= 32 );
DWORD unormValue = 0;
FLOAT32 f32;
f32.value.f = value;
// NaN -> 0
if( f32.exponent == BITMASK( 8 ) &&
f32.fraction != 0 )
{
unormValue = 0;
}
else
{
const bool isPosInfinity =
f32.exponent == BITMASK( 8 ) &&
f32.fraction == 0 &&
f32.sign == 0;
const bool isNegInfinity =
f32.exponent == BITMASK( 8 ) &&
f32.fraction == 0 &&
f32.sign == 1;
// Clamp > 1.0f || +Inf -> 1.0f
// Clamp < 0.0f || -Inf -> 0.0f
if( f32.value.f > 1.0f || isPosInfinity )
{
f32.value.f = 1.0f;
}
else if ( f32.value.f < 0.0f || isNegInfinity )
{
f32.value.f = 0.0f;
}
// Convert float scale to integer scale
f32.value.f *= BITMASK( bitcount );
// Convert to integer by rounding and dropping
// fractional bits
f32.value.f += 0.5f;
unormValue = (DWORD)FloatToLong( f32.value.f );
unormValue = (DWORD)f32.value.f;
unormValue = Min( (DWORD)unormValue, (DWORD)BITMASK( bitcount ) );
}
return unormValue;
}
/*****************************************************************************\
Inline Function:
Float32ToFloat16
Description:
Converts a 32bit float to a 16bit float
\*****************************************************************************/
inline unsigned short Float32ToFloat16( const float value )
{
FLOAT16 f16;
f16.value.u = 0;
FLOAT32 f32;
f32.value.f = value;
// +/-0 32bit -> +/- 0 16bit
if( f32.exponent == 0 &&
f32.fraction == 0 )
{
f16.exponent = 0;
f16.fraction = 0;
f16.sign = f32.sign;
}
// NaN 32bit -> NaN 16bit
else if( f32.exponent == BITMASK( 8 ) &&
f32.fraction != 0 )
{
f16.exponent = BITMASK( 5 );
f16.fraction = 0x1 << 9;
f16.sign = 1;
}
// +/-Inf 32bit -> +/-Inf 16bit
else if (
f32.exponent == BITMASK( 8 ) &&
f32.fraction == 0 )
{
f16.exponent = BITMASK( 5 );
f16.fraction = 0;
f16.sign = f32.sign;
}
else
{
const long ExpBias16 = 31 / 2;
const long ExpBais32 = 255 / 2;
const long expUnbiased = f32.exponent - ExpBais32;
// 32bit normalized value out of minimum range of 16bit capacity
// resulting in minimum non-denorm 16bit value
if ( expUnbiased <= -25 )
{
f16.exponent = 0;
f16.fraction = 0;
}
// 32bit normalized value within the 16bit denormalized unbiased
// exponent range
else if( expUnbiased > -25 && expUnbiased < -14 )
{
long adjustedUnbiasedExp = expUnbiased;
unsigned long adjustedFranction = f32.fraction;
// Shift the implicit 1 into the fraction, making implicit 0
// as denormalized format dictates
adjustedFranction >>= 1;
adjustedFranction |= ( 0x1 << 22 );
// Round off the fraction until the unbiased exponent is
// within a denormalized representable range
unsigned long denormShiftAmount = -1 * ( adjustedUnbiasedExp + 15 );
ASSERT( denormShiftAmount < 10 );
adjustedFranction >>= denormShiftAmount;
adjustedUnbiasedExp += denormShiftAmount;
f16.exponent = adjustedUnbiasedExp + ExpBias16;
f16.fraction = ( adjustedFranction ) >> ( 23 - 10 );
// Assert that the 16bit is actually denormalized. The result
// should never be 0 because of the addition of the implicit 1
ASSERT( f16.exponent == 0 && f16.fraction != 0 );
}
// 32bit value out of maximum dynamic range of 16bit capacity
// resulting in maximum 16bit value
else if( expUnbiased > 15 )
{
f16.exponent = 15 + ExpBias16;
f16.fraction = BITMASK( 10 );
}
// Otherwise, normalized down conversion falls within normalized
// 16bit range
else
{
f16.exponent = expUnbiased + ExpBias16;
f16.fraction = ( f32.fraction ) >> ( 23 - 10 );
}
// Sign is preserved under any circumstance
f16.sign = f32.sign;
}
return f16.value.u;
}
} // iSTD