Merge pull request #21 from nibrunieAtSi5/bf16-support

[#18] Adding minimal BFloat16 support
This commit is contained in:
Jerry Zhao 2023-10-13 15:23:50 -07:00 committed by GitHub
commit eb498c55ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 615 additions and 0 deletions

View File

@ -94,6 +94,8 @@ OBJS_SPECIALIZE = \
s_f16UIToCommonNaN$(OBJ) \
s_commonNaNToF16UI$(OBJ) \
s_propagateNaNF16UI$(OBJ) \
s_bf16UIToCommonNaN$(OBJ) \
s_commonNaNToBF16UI$(OBJ) \
s_f32UIToCommonNaN$(OBJ) \
s_commonNaNToF32UI$(OBJ) \
s_propagateNaNF32UI$(OBJ) \
@ -114,6 +116,8 @@ OBJS_OTHERS = \
s_roundToUI64$(OBJ) \
s_roundToI32$(OBJ) \
s_roundToI64$(OBJ) \
s_normSubnormalBF16Sig$(OBJ) \
s_roundPackToBF16$(OBJ) \
s_normSubnormalF16Sig$(OBJ) \
s_roundPackToF16$(OBJ) \
s_normRoundPackToF16$(OBJ) \
@ -172,6 +176,8 @@ OBJS_OTHERS = \
i64_to_extF80M$(OBJ) \
i64_to_f128$(OBJ) \
i64_to_f128M$(OBJ) \
bf16_isSignalingNaN$(OBJ) \
bf16_to_f32$(OBJ) \
f16_to_ui32$(OBJ) \
f16_to_ui64$(OBJ) \
f16_to_i32$(OBJ) \
@ -209,6 +215,7 @@ OBJS_OTHERS = \
f32_to_ui64_r_minMag$(OBJ) \
f32_to_i32_r_minMag$(OBJ) \
f32_to_i64_r_minMag$(OBJ) \
f32_to_bf16$(OBJ) \
f32_to_f16$(OBJ) \
f32_to_f64$(OBJ) \
f32_to_extF80$(OBJ) \

View File

@ -115,6 +115,8 @@ OBJS_OTHERS = \
s_roundToUI64$(OBJ) \
s_roundToI32$(OBJ) \
s_roundToI64$(OBJ) \
s_normSubnormalBF16Sig$(OBJ) \
s_roundPackToBF16$(OBJ) \
s_normSubnormalF16Sig$(OBJ) \
s_roundPackToF16$(OBJ) \
s_normRoundPackToF16$(OBJ) \
@ -173,6 +175,8 @@ OBJS_OTHERS = \
i64_to_extF80M$(OBJ) \
i64_to_f128$(OBJ) \
i64_to_f128M$(OBJ) \
bf16_isSignalingNaN$(OBJ) \
bf16_to_f32$(OBJ) \
f16_to_ui32$(OBJ) \
f16_to_ui64$(OBJ) \
f16_to_i32$(OBJ) \
@ -210,6 +214,7 @@ OBJS_OTHERS = \
f32_to_ui64_r_minMag$(OBJ) \
f32_to_i32_r_minMag$(OBJ) \
f32_to_i64_r_minMag$(OBJ) \
f32_to_bf16$(OBJ) \
f32_to_f16$(OBJ) \
f32_to_f64$(OBJ) \
f32_to_extF80$(OBJ) \

View File

@ -0,0 +1,59 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdint.h>
#include "platform.h"
#include "specialize.h"
#include "softfloat.h"
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a BF16 NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr )
{
if ( softfloat_isSigNaNBF16UI( uiA ) ) {
softfloat_raiseFlags( softfloat_flag_invalid );
}
zPtr->sign = uiA>>15;
zPtr->v64 = (uint_fast64_t) uiA<<56;
zPtr->v0 = 0;
}

View File

@ -0,0 +1,51 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdint.h>
#include "platform.h"
#include "specialize.h"
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a BF16 NaN, and
| returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr )
{
return (uint_fast16_t) aPtr->sign<<15 | 0x7FC0 | aPtr->v64>>56;
}

View File

@ -117,6 +117,27 @@ uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
uint_fast16_t
softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
/*----------------------------------------------------------------------------
| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
| 16-bit brain floating-point (BF16) signaling NaN.
| Note: This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F))
/*----------------------------------------------------------------------------
| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr );
/*----------------------------------------------------------------------------
| The bit pattern for a default generated 32-bit floating-point NaN.
*----------------------------------------------------------------------------*/

View File

@ -0,0 +1,5 @@
/*----------------------------------------------------------------------------
| This file intentionally contains no code.
*----------------------------------------------------------------------------*/

View File

@ -0,0 +1,5 @@
/*----------------------------------------------------------------------------
| This file intentionally contains no code.
*----------------------------------------------------------------------------*/

View File

@ -87,6 +87,13 @@ struct commonNaN { char _unused; };
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
/*----------------------------------------------------------------------------
| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
| 16-bit brain floating-point (BF16) signaling NaN.
| Note: This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F))
/*----------------------------------------------------------------------------
| Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
@ -95,6 +102,14 @@ struct commonNaN { char _unused; };
*----------------------------------------------------------------------------*/
#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) softfloat_raiseFlags( softfloat_flag_invalid )
/*----------------------------------------------------------------------------
| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by 'zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0040) ) softfloat_raiseFlags( softfloat_flag_invalid )
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
@ -110,6 +125,17 @@ struct commonNaN { char _unused; };
uint_fast16_t
softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
/*----------------------------------------------------------------------------
| The bit pattern for a default generated 16-bit BF16 floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNBF16UI 0x7FC0
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI)
/*----------------------------------------------------------------------------
| The bit pattern for a default generated 32-bit floating-point NaN.
*----------------------------------------------------------------------------*/

View File

@ -0,0 +1,51 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
bool bf16_isSignalingNaN( bfloat16_t a )
{
union ui16_bf16 uA;
uA.f = a;
return softfloat_isSigNaNBF16UI( uA.ui );
}

90
source/bf16_to_f32.c Normal file
View File

@ -0,0 +1,90 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
float32_t bf16_to_f32( bfloat16_t a )
{
union ui16_bf16 uA;
uint_fast16_t uiA;
bool sign;
int_fast16_t exp;
uint_fast16_t frac;
struct commonNaN commonNaN;
uint_fast32_t uiZ;
struct exp8_sig16 normExpSig;
union ui32_f32 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uA.f = a;
uiA = uA.ui;
sign = signBF16UI( uiA );
exp = expBF16UI( uiA );
frac = fracBF16UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
// NaN or Inf
if ( exp == 0xFF ) {
if ( frac ) {
softfloat_bf16UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToF32UI( &commonNaN );
} else {
uiZ = packToF32UI( sign, 0xFF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
// packToF32UI simply packs bitfields without any numerical change
// which means it can be used directly for any BF16 to f32 conversions which
// does not require bits manipulation
// (that is everything where the 16-bit are just padded right with 16 zeros, including
// subnormal numbers)
uiZ = packToF32UI( sign, exp, ((uint_fast32_t) frac) <<16 );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}

105
source/f32_to_bf16.c Normal file
View File

@ -0,0 +1,105 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
#include <inttypes.h>
#include <stdio.h>
bfloat16_t f32_to_bf16( float32_t a )
{
union ui32_f32 uA;
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t frac;
struct commonNaN commonNaN;
uint_fast16_t uiZ, frac16;
union ui16_bf16 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uA.f = a;
uiA = uA.ui;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
frac = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
// infinity or NaN cases
if ( exp == 0xFF ) {
if ( frac ) {
// NaN case
softfloat_f32UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
} else {
// infinity case
uiZ = packToBF16UI( sign, 0xFF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
// frac is a 24-bit mantissa, right shifted by 9
// In the normal case, (24-9) = 15 are set
frac16 = frac>>9 | ((frac & 0x1FF) != 0);
if ( ! (exp | frac16) ) {
uiZ = packToBF16UI( sign, 0, 0 );
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
// softfloat_roundPackToBF16 exponent argument (2nd argument)
// must correspond to the exponent of fracIn[13] bits
// (fracIn is the 3rd and last argument)
uint_fast32_t mask = exp ? 0x4000 : 0x0; // implicit one mask added if input is a normal number
// exponent for the lowest normal and largest subnormal should be equal
// but is not in IEEE encoding so mantissa must be partially normalized
// (by one bit) for subnormal numbers. Such that (exp - 1) corresponds
// to the exponent of frac16[13]
frac16 = frac16 << (exp ? 0 : 1);
return softfloat_roundPackToBF16( sign, exp - 1, frac16 | mask );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}

View File

@ -72,6 +72,9 @@ float16_t f32_to_f16( float32_t a )
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
// frac is a 24-bit significand, the bottom 9 bits LSB are extracted and OR-red
// into a sticky flag, the top 15 MSBs are extracted, the LSB of this top slice
// is OR-red with the sticky
frac16 = frac>>9 | ((frac & 0x1FF) != 0);
if ( ! (exp | frac16) ) {
uiZ = packToF16UI( sign, 0, 0 );

View File

@ -43,6 +43,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "softfloat_types.h"
union ui16_f16 { uint16_t ui; float16_t f; };
union ui16_bf16 { uint16_t ui; bfloat16_t f; };
union ui32_f32 { uint32_t ui; float32_t f; };
union ui64_f64 { uint64_t ui; float64_t f; };
@ -99,6 +100,18 @@ float16_t
softfloat_mulAddF16(
uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15))
#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF)
#define fracBF16UI( a ) ((a) & 0x07F)
#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig))
#define isNaNBF16UI( a ) (((~(a) & 0x7FC0) == 0) && ((a) & 0x07F))
bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t );
struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))

View File

@ -169,6 +169,13 @@ bool f16_le_quiet( float16_t, float16_t );
bool f16_lt_quiet( float16_t, float16_t );
bool f16_isSignalingNaN( float16_t );
/*----------------------------------------------------------------------------
| 16-bit (brain float 16) floating-point operations.
*----------------------------------------------------------------------------*/
float32_t bf16_to_f32( bfloat16_t );
bfloat16_t f32_to_bf16( float32_t );
bool bf16_isSignalingNaN( bfloat16_t );
/*----------------------------------------------------------------------------
| 32-bit (single-precision) floating-point operations.
*----------------------------------------------------------------------------*/

View File

@ -48,6 +48,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
| (typically 'float' and 'double', and possibly 'long double').
*----------------------------------------------------------------------------*/
typedef struct { uint16_t v; } float16_t;
typedef struct { uint16_t v; } bfloat16_t;
typedef struct { uint32_t v; } float32_t;
typedef struct { uint64_t v; } float64_t;
typedef struct { uint64_t v[2]; } float128_t;

View File

@ -0,0 +1,52 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdint.h>
#include "platform.h"
#include "internals.h"
struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t sig )
{
int_fast8_t shiftDist;
struct exp8_sig16 z;
shiftDist = softfloat_countLeadingZeros16( sig ) - 8;
z.exp = 1 - shiftDist;
z.sig = sig<<shiftDist;
return z;
}

114
source/s_roundPackToBF16.c Normal file
View File

@ -0,0 +1,114 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"
/** sig last significant bit is sig[7], the 7 LSBs will be used for rounding */
bfloat16_t
softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
{
uint_fast8_t roundingMode;
bool roundNearEven;
uint_fast8_t roundIncrement, roundBits;
bool isTiny;
uint_fast16_t uiZ;
union ui16_bf16 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundingMode = softfloat_roundingMode;
roundNearEven = (roundingMode == softfloat_round_near_even);
roundIncrement = 0x40;
if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? softfloat_round_min : softfloat_round_max))
? 0x7F
: 0;
}
roundBits = sig & 0x7F;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0xFD <= (unsigned int) exp ) {
if ( exp < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
isTiny =
(softfloat_detectTininess == softfloat_tininess_beforeRounding)
|| (exp < -1) || (sig + roundIncrement < 0x8000);
sig = softfloat_shiftRightJam32( sig, -exp );
exp = 0;
roundBits = sig & 0x7F;
if ( isTiny && roundBits ) {
softfloat_raiseFlags( softfloat_flag_underflow );
}
} else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
softfloat_raiseFlags(
softfloat_flag_overflow | softfloat_flag_inexact );
uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement;
goto uiZ;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig + roundIncrement)>>7;
if ( roundBits ) {
softfloat_exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
if ( roundingMode == softfloat_round_odd ) {
sig |= 1;
goto packReturn;
}
#endif
}
sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven);
if ( ! sig ) exp = 0;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
packReturn:
uiZ = packToBF16UI( sign, exp, sig );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}