Merge pull request #21 from nibrunieAtSi5/bf16-support
[#18] Adding minimal BFloat16 support
This commit is contained in:
commit
eb498c55ba
|
@ -94,6 +94,8 @@ OBJS_SPECIALIZE = \
|
|||
s_f16UIToCommonNaN$(OBJ) \
|
||||
s_commonNaNToF16UI$(OBJ) \
|
||||
s_propagateNaNF16UI$(OBJ) \
|
||||
s_bf16UIToCommonNaN$(OBJ) \
|
||||
s_commonNaNToBF16UI$(OBJ) \
|
||||
s_f32UIToCommonNaN$(OBJ) \
|
||||
s_commonNaNToF32UI$(OBJ) \
|
||||
s_propagateNaNF32UI$(OBJ) \
|
||||
|
@ -114,6 +116,8 @@ OBJS_OTHERS = \
|
|||
s_roundToUI64$(OBJ) \
|
||||
s_roundToI32$(OBJ) \
|
||||
s_roundToI64$(OBJ) \
|
||||
s_normSubnormalBF16Sig$(OBJ) \
|
||||
s_roundPackToBF16$(OBJ) \
|
||||
s_normSubnormalF16Sig$(OBJ) \
|
||||
s_roundPackToF16$(OBJ) \
|
||||
s_normRoundPackToF16$(OBJ) \
|
||||
|
@ -172,6 +176,8 @@ OBJS_OTHERS = \
|
|||
i64_to_extF80M$(OBJ) \
|
||||
i64_to_f128$(OBJ) \
|
||||
i64_to_f128M$(OBJ) \
|
||||
bf16_isSignalingNaN$(OBJ) \
|
||||
bf16_to_f32$(OBJ) \
|
||||
f16_to_ui32$(OBJ) \
|
||||
f16_to_ui64$(OBJ) \
|
||||
f16_to_i32$(OBJ) \
|
||||
|
@ -209,6 +215,7 @@ OBJS_OTHERS = \
|
|||
f32_to_ui64_r_minMag$(OBJ) \
|
||||
f32_to_i32_r_minMag$(OBJ) \
|
||||
f32_to_i64_r_minMag$(OBJ) \
|
||||
f32_to_bf16$(OBJ) \
|
||||
f32_to_f16$(OBJ) \
|
||||
f32_to_f64$(OBJ) \
|
||||
f32_to_extF80$(OBJ) \
|
||||
|
|
|
@ -115,6 +115,8 @@ OBJS_OTHERS = \
|
|||
s_roundToUI64$(OBJ) \
|
||||
s_roundToI32$(OBJ) \
|
||||
s_roundToI64$(OBJ) \
|
||||
s_normSubnormalBF16Sig$(OBJ) \
|
||||
s_roundPackToBF16$(OBJ) \
|
||||
s_normSubnormalF16Sig$(OBJ) \
|
||||
s_roundPackToF16$(OBJ) \
|
||||
s_normRoundPackToF16$(OBJ) \
|
||||
|
@ -173,6 +175,8 @@ OBJS_OTHERS = \
|
|||
i64_to_extF80M$(OBJ) \
|
||||
i64_to_f128$(OBJ) \
|
||||
i64_to_f128M$(OBJ) \
|
||||
bf16_isSignalingNaN$(OBJ) \
|
||||
bf16_to_f32$(OBJ) \
|
||||
f16_to_ui32$(OBJ) \
|
||||
f16_to_ui64$(OBJ) \
|
||||
f16_to_i32$(OBJ) \
|
||||
|
@ -210,6 +214,7 @@ OBJS_OTHERS = \
|
|||
f32_to_ui64_r_minMag$(OBJ) \
|
||||
f32_to_i32_r_minMag$(OBJ) \
|
||||
f32_to_i64_r_minMag$(OBJ) \
|
||||
f32_to_bf16$(OBJ) \
|
||||
f32_to_f16$(OBJ) \
|
||||
f32_to_f64$(OBJ) \
|
||||
f32_to_extF80$(OBJ) \
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions, and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the University nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=============================================================================*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "platform.h"
|
||||
#include "specialize.h"
|
||||
#include "softfloat.h"
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming `uiA' has the bit pattern of a BF16 NaN, converts
|
||||
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
||||
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr )
|
||||
{
|
||||
|
||||
if ( softfloat_isSigNaNBF16UI( uiA ) ) {
|
||||
softfloat_raiseFlags( softfloat_flag_invalid );
|
||||
}
|
||||
zPtr->sign = uiA>>15;
|
||||
zPtr->v64 = (uint_fast64_t) uiA<<56;
|
||||
zPtr->v0 = 0;
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions, and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the University nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=============================================================================*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "platform.h"
|
||||
#include "specialize.h"
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by `aPtr' into a BF16 NaN, and
|
||||
| returns the bit pattern of this value as an unsigned integer.
|
||||
*----------------------------------------------------------------------------*/
|
||||
uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr )
|
||||
{
|
||||
|
||||
return (uint_fast16_t) aPtr->sign<<15 | 0x7FC0 | aPtr->v64>>56;
|
||||
|
||||
}
|
||||
|
|
@ -117,6 +117,27 @@ uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
|
|||
uint_fast16_t
|
||||
softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
|
||||
| 16-bit brain floating-point (BF16) signaling NaN.
|
||||
| Note: This macro evaluates its argument more than once.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F))
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
|
||||
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
||||
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
|
||||
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
||||
*----------------------------------------------------------------------------*/
|
||||
uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The bit pattern for a default generated 32-bit floating-point NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
|
||||
/*----------------------------------------------------------------------------
|
||||
| This file intentionally contains no code.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
|
||||
/*----------------------------------------------------------------------------
|
||||
| This file intentionally contains no code.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
|
@ -87,6 +87,13 @@ struct commonNaN { char _unused; };
|
|||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
|
||||
| 16-bit brain floating-point (BF16) signaling NaN.
|
||||
| Note: This macro evaluates its argument more than once.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F))
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
|
||||
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
||||
|
@ -95,6 +102,14 @@ struct commonNaN { char _unused; };
|
|||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) softfloat_raiseFlags( softfloat_flag_invalid )
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
|
||||
| this NaN to the common NaN form, and stores the resulting common NaN at the
|
||||
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
|
||||
| exception is raised.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0040) ) softfloat_raiseFlags( softfloat_flag_invalid )
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
|
||||
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
||||
|
@ -110,6 +125,17 @@ struct commonNaN { char _unused; };
|
|||
uint_fast16_t
|
||||
softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The bit pattern for a default generated 16-bit BF16 floating-point NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define defaultNaNBF16UI 0x7FC0
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
|
||||
| NaN, and returns the bit pattern of this value as an unsigned integer.
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI)
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| The bit pattern for a default generated 32-bit floating-point NaN.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions, and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the University nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=============================================================================*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "platform.h"
|
||||
#include "internals.h"
|
||||
#include "specialize.h"
|
||||
#include "softfloat.h"
|
||||
|
||||
bool bf16_isSignalingNaN( bfloat16_t a )
|
||||
{
|
||||
union ui16_bf16 uA;
|
||||
|
||||
uA.f = a;
|
||||
return softfloat_isSigNaNBF16UI( uA.ui );
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions, and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the University nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=============================================================================*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "platform.h"
|
||||
#include "internals.h"
|
||||
#include "specialize.h"
|
||||
#include "softfloat.h"
|
||||
|
||||
float32_t bf16_to_f32( bfloat16_t a )
|
||||
{
|
||||
union ui16_bf16 uA;
|
||||
uint_fast16_t uiA;
|
||||
bool sign;
|
||||
int_fast16_t exp;
|
||||
uint_fast16_t frac;
|
||||
struct commonNaN commonNaN;
|
||||
uint_fast32_t uiZ;
|
||||
struct exp8_sig16 normExpSig;
|
||||
union ui32_f32 uZ;
|
||||
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
sign = signBF16UI( uiA );
|
||||
exp = expBF16UI( uiA );
|
||||
frac = fracBF16UI( uiA );
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
// NaN or Inf
|
||||
if ( exp == 0xFF ) {
|
||||
if ( frac ) {
|
||||
softfloat_bf16UIToCommonNaN( uiA, &commonNaN );
|
||||
uiZ = softfloat_commonNaNToF32UI( &commonNaN );
|
||||
} else {
|
||||
uiZ = packToF32UI( sign, 0xFF, 0 );
|
||||
}
|
||||
goto uiZ;
|
||||
}
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
// packToF32UI simply packs bitfields without any numerical change
|
||||
// which means it can be used directly for any BF16 to f32 conversions which
|
||||
// does not require bits manipulation
|
||||
// (that is everything where the 16-bit are just padded right with 16 zeros, including
|
||||
// subnormal numbers)
|
||||
uiZ = packToF32UI( sign, exp, ((uint_fast32_t) frac) <<16 );
|
||||
uiZ:
|
||||
uZ.ui = uiZ;
|
||||
return uZ.f;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions, and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the University nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=============================================================================*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "platform.h"
|
||||
#include "internals.h"
|
||||
#include "specialize.h"
|
||||
#include "softfloat.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
bfloat16_t f32_to_bf16( float32_t a )
|
||||
{
|
||||
union ui32_f32 uA;
|
||||
uint_fast32_t uiA;
|
||||
bool sign;
|
||||
int_fast16_t exp;
|
||||
uint_fast32_t frac;
|
||||
struct commonNaN commonNaN;
|
||||
uint_fast16_t uiZ, frac16;
|
||||
union ui16_bf16 uZ;
|
||||
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
sign = signF32UI( uiA );
|
||||
exp = expF32UI( uiA );
|
||||
frac = fracF32UI( uiA );
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
// infinity or NaN cases
|
||||
if ( exp == 0xFF ) {
|
||||
if ( frac ) {
|
||||
// NaN case
|
||||
softfloat_f32UIToCommonNaN( uiA, &commonNaN );
|
||||
uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
|
||||
} else {
|
||||
// infinity case
|
||||
uiZ = packToBF16UI( sign, 0xFF, 0 );
|
||||
}
|
||||
goto uiZ;
|
||||
}
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
// frac is a 24-bit mantissa, right shifted by 9
|
||||
// In the normal case, (24-9) = 15 are set
|
||||
frac16 = frac>>9 | ((frac & 0x1FF) != 0);
|
||||
if ( ! (exp | frac16) ) {
|
||||
uiZ = packToBF16UI( sign, 0, 0 );
|
||||
goto uiZ;
|
||||
}
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
// softfloat_roundPackToBF16 exponent argument (2nd argument)
|
||||
// must correspond to the exponent of fracIn[13] bits
|
||||
// (fracIn is the 3rd and last argument)
|
||||
uint_fast32_t mask = exp ? 0x4000 : 0x0; // implicit one mask added if input is a normal number
|
||||
// exponent for the lowest normal and largest subnormal should be equal
|
||||
// but is not in IEEE encoding so mantissa must be partially normalized
|
||||
// (by one bit) for subnormal numbers. Such that (exp - 1) corresponds
|
||||
// to the exponent of frac16[13]
|
||||
frac16 = frac16 << (exp ? 0 : 1);
|
||||
return softfloat_roundPackToBF16( sign, exp - 1, frac16 | mask );
|
||||
uiZ:
|
||||
uZ.ui = uiZ;
|
||||
return uZ.f;
|
||||
|
||||
}
|
||||
|
|
@ -72,6 +72,9 @@ float16_t f32_to_f16( float32_t a )
|
|||
}
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
// frac is a 24-bit significand, the bottom 9 bits LSB are extracted and OR-red
|
||||
// into a sticky flag, the top 15 MSBs are extracted, the LSB of this top slice
|
||||
// is OR-red with the sticky
|
||||
frac16 = frac>>9 | ((frac & 0x1FF) != 0);
|
||||
if ( ! (exp | frac16) ) {
|
||||
uiZ = packToF16UI( sign, 0, 0 );
|
||||
|
|
|
@ -43,6 +43,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "softfloat_types.h"
|
||||
|
||||
union ui16_f16 { uint16_t ui; float16_t f; };
|
||||
union ui16_bf16 { uint16_t ui; bfloat16_t f; };
|
||||
union ui32_f32 { uint32_t ui; float32_t f; };
|
||||
union ui64_f64 { uint64_t ui; float64_t f; };
|
||||
|
||||
|
@ -99,6 +100,18 @@ float16_t
|
|||
softfloat_mulAddF16(
|
||||
uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15))
|
||||
#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF)
|
||||
#define fracBF16UI( a ) ((a) & 0x07F)
|
||||
#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig))
|
||||
|
||||
#define isNaNBF16UI( a ) (((~(a) & 0x7FC0) == 0) && ((a) & 0x07F))
|
||||
|
||||
bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t );
|
||||
struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
*----------------------------------------------------------------------------*/
|
||||
#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))
|
||||
|
|
|
@ -169,6 +169,13 @@ bool f16_le_quiet( float16_t, float16_t );
|
|||
bool f16_lt_quiet( float16_t, float16_t );
|
||||
bool f16_isSignalingNaN( float16_t );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| 16-bit (brain float 16) floating-point operations.
|
||||
*----------------------------------------------------------------------------*/
|
||||
float32_t bf16_to_f32( bfloat16_t );
|
||||
bfloat16_t f32_to_bf16( float32_t );
|
||||
bool bf16_isSignalingNaN( bfloat16_t );
|
||||
|
||||
/*----------------------------------------------------------------------------
|
||||
| 32-bit (single-precision) floating-point operations.
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
|
|
@ -48,6 +48,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
| (typically 'float' and 'double', and possibly 'long double').
|
||||
*----------------------------------------------------------------------------*/
|
||||
typedef struct { uint16_t v; } float16_t;
|
||||
typedef struct { uint16_t v; } bfloat16_t;
|
||||
typedef struct { uint32_t v; } float32_t;
|
||||
typedef struct { uint64_t v; } float64_t;
|
||||
typedef struct { uint64_t v[2]; } float128_t;
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions, and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the University nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=============================================================================*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include "platform.h"
|
||||
#include "internals.h"
|
||||
|
||||
struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t sig )
|
||||
{
|
||||
int_fast8_t shiftDist;
|
||||
struct exp8_sig16 z;
|
||||
|
||||
shiftDist = softfloat_countLeadingZeros16( sig ) - 8;
|
||||
z.exp = 1 - shiftDist;
|
||||
z.sig = sig<<shiftDist;
|
||||
return z;
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
|
||||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions, and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the University nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=============================================================================*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "platform.h"
|
||||
#include "internals.h"
|
||||
#include "softfloat.h"
|
||||
|
||||
/** sig last significant bit is sig[7], the 7 LSBs will be used for rounding */
|
||||
bfloat16_t
|
||||
softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
|
||||
{
|
||||
uint_fast8_t roundingMode;
|
||||
bool roundNearEven;
|
||||
uint_fast8_t roundIncrement, roundBits;
|
||||
bool isTiny;
|
||||
uint_fast16_t uiZ;
|
||||
union ui16_bf16 uZ;
|
||||
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
roundingMode = softfloat_roundingMode;
|
||||
roundNearEven = (roundingMode == softfloat_round_near_even);
|
||||
roundIncrement = 0x40;
|
||||
if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
|
||||
roundIncrement =
|
||||
(roundingMode
|
||||
== (sign ? softfloat_round_min : softfloat_round_max))
|
||||
? 0x7F
|
||||
: 0;
|
||||
}
|
||||
roundBits = sig & 0x7F;
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
if ( 0xFD <= (unsigned int) exp ) {
|
||||
if ( exp < 0 ) {
|
||||
/*----------------------------------------------------------------
|
||||
*----------------------------------------------------------------*/
|
||||
isTiny =
|
||||
(softfloat_detectTininess == softfloat_tininess_beforeRounding)
|
||||
|| (exp < -1) || (sig + roundIncrement < 0x8000);
|
||||
sig = softfloat_shiftRightJam32( sig, -exp );
|
||||
exp = 0;
|
||||
roundBits = sig & 0x7F;
|
||||
if ( isTiny && roundBits ) {
|
||||
softfloat_raiseFlags( softfloat_flag_underflow );
|
||||
}
|
||||
} else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) {
|
||||
/*----------------------------------------------------------------
|
||||
*----------------------------------------------------------------*/
|
||||
softfloat_raiseFlags(
|
||||
softfloat_flag_overflow | softfloat_flag_inexact );
|
||||
uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement;
|
||||
goto uiZ;
|
||||
}
|
||||
}
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
sig = (sig + roundIncrement)>>7;
|
||||
if ( roundBits ) {
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact;
|
||||
#ifdef SOFTFLOAT_ROUND_ODD
|
||||
if ( roundingMode == softfloat_round_odd ) {
|
||||
sig |= 1;
|
||||
goto packReturn;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven);
|
||||
if ( ! sig ) exp = 0;
|
||||
/*------------------------------------------------------------------------
|
||||
*------------------------------------------------------------------------*/
|
||||
packReturn:
|
||||
uiZ = packToBF16UI( sign, exp, sig );
|
||||
uiZ:
|
||||
uZ.ui = uiZ;
|
||||
return uZ.f;
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue