Continuing f32_to_bf16 development

2023-08-12 08:00:12 +02:00 · 2023-08-12 08:00:12 +02:00 · fab20258f0
parent ac8688db85
commit fab20258f0
3 changed files with 14 additions and 12 deletions
--- a/source/RISCV/specialize.h
+++ b/source/RISCV/specialize.h
@ -120,7 +120,7 @@ uint_fast16_t
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 16-bit BF16 floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF16UI 0x7FC0
+#define defaultNaNBF16UI 0x7FC0

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
--- a/source/f32_to_bf16.c
+++ b/source/f32_to_bf16.c
@ -75,7 +75,8 @@ bfloat16_t f32_to_bf16( float32_t a )
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
-    frac16 = frac>>16 | ((frac & 0xFFFF) != 0);
+    // frac is a 24-bit mantissa, right shifted by 
+    frac16 = frac>>9 | ((frac & 0x1FF) != 0);
    if ( ! (exp | frac16) ) {
        uiZ = packToBF16UI( sign, 0, 0 );
        goto uiZ;
--- a/source/s_roundPackToBF16.c
+++ b/source/s_roundPackToBF16.c
@ -40,6 +40,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "internals.h"
 #include "softfloat.h"

+/** sig last significant bit is sig[7], the 7 LSBs will be used for rounding */
 bfloat16_t
 softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
 {
@ -54,18 +55,18 @@ bfloat16_t
    *------------------------------------------------------------------------*/
    roundingMode = softfloat_roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
-    roundIncrement = 0x8;
+    roundIncrement = 0x40;
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
        roundIncrement =
            (roundingMode
                 == (sign ? softfloat_round_min : softfloat_round_max))
-                ? 0xF
+                ? 0x7F
                : 0;
    }
-    roundBits = sig & 0xF;
+    roundBits = sig & 0x7F;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
-    if ( 0x1D <= (unsigned int) exp ) {
+    if ( 0xFD <= (unsigned int) exp ) {
        if ( exp < 0 ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
@ -74,22 +75,22 @@ bfloat16_t
                    || (exp < -1) || (sig + roundIncrement < 0x8000);
            sig = softfloat_shiftRightJam32( sig, -exp );
            exp = 0;
-            roundBits = sig & 0xF;
+            roundBits = sig & 0x7F;
            if ( isTiny && roundBits ) {
                softfloat_raiseFlags( softfloat_flag_underflow );
            }
-        } else if ( (0x1D < exp) || (0x8000 <= sig + roundIncrement) ) {
+        } else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            softfloat_raiseFlags(
                softfloat_flag_overflow | softfloat_flag_inexact );
-            uiZ = packToF16UI( sign, 0x1F, 0 ) - ! roundIncrement;
+            uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement;
            goto uiZ;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
-    sig = (sig + roundIncrement)>>4;
+    sig = (sig + roundIncrement)>>7;
    if ( roundBits ) {
        softfloat_exceptionFlags |= softfloat_flag_inexact;
 #ifdef SOFTFLOAT_ROUND_ODD
@ -99,12 +100,12 @@ bfloat16_t
        }
 #endif
    }
-    sig &= ~(uint_fast16_t) (! (roundBits ^ 8) & roundNearEven);
+    sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven);
    if ( ! sig ) exp = 0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 packReturn:
-    uiZ = packToF16UI( sign, exp, sig );
+    uiZ = packToBF16UI( sign, exp, sig );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;