mirror of
https://github.com/intel/llvm.git
synced 2026-02-01 17:07:36 +08:00
[MLIR] Add f8E3M4 IEEE 754 type (#101230)
This PR adds `f8E3M4` type to mlir.
`f8E3M4` type follows IEEE 754 convention
```c
f8E3M4 (IEEE 754)
- Exponent bias: 3
- Maximum stored exponent value: 6 (binary 110)
- Maximum unbiased exponent value: 6 - 3 = 3
- Minimum stored exponent value: 1 (binary 001)
- Minimum unbiased exponent value: 1 − 3 = −2
- Precision specifies the total number of bits used for the significand (mantissa),
including implicit leading integer bit = 4 + 1 = 5
- Follows IEEE 754 conventions for representation of special values
- Has Positive and Negative zero
- Has Positive and Negative infinity
- Has NaNs
Additional details:
- Max exp (unbiased): 3
- Min exp (unbiased): -2
- Infinities (+/-): S.111.0000
- Zeros (+/-): S.000.0000
- NaNs: S.111.{0,1}⁴ except S.111.0000
- Max normal number: S.110.1111 = +/-2^(6-3) x (1 + 15/16) = +/-2^3 x 31 x 2^(-4) = +/-15.5
- Min normal number: S.001.0000 = +/-2^(1-3) x (1 + 0) = +/-2^(-2)
- Max subnormal number: S.000.1111 = +/-2^(-2) x 15/16 = +/-2^(-2) x 15 x 2^(-4) = +/-15 x 2^(-6)
- Min subnormal number: S.000.0001 = +/-2^(-2) x 1/16 = +/-2^(-2) x 2^(-4) = +/-2^(-6)
```
Related PRs:
- [PR-99698](https://github.com/llvm/llvm-project/pull/99698) [APFloat]
Add support for f8E3M4 IEEE 754 type
- [PR-97118](https://github.com/llvm/llvm-project/pull/97118) [MLIR] Add
f8E4M3 IEEE 754 type
This commit is contained in:
committed by
GitHub
parent
e9c20b9132
commit
eef1d7e377
@@ -120,6 +120,7 @@ __all__ = [
|
||||
"F32Type",
|
||||
"F64Type",
|
||||
"FlatSymbolRefAttr",
|
||||
"Float8E3M4Type",
|
||||
"Float8E4M3B11FNUZType",
|
||||
"Float8E4M3FNType",
|
||||
"Float8E4M3FNUZType",
|
||||
@@ -1537,6 +1538,19 @@ class FlatSymbolRefAttr(Attribute):
|
||||
Returns the value of the FlatSymbolRef attribute as a string
|
||||
"""
|
||||
|
||||
class Float8E3M4Type(FloatType):
|
||||
static_typeid: ClassVar[TypeID]
|
||||
@staticmethod
|
||||
def get(context: Optional[Context] = None) -> Float8E3M4Type:
|
||||
"""
|
||||
Create a float8_e3m4 type.
|
||||
"""
|
||||
@staticmethod
|
||||
def isinstance(other: Type) -> bool: ...
|
||||
def __init__(self, cast_from_type: Type) -> None: ...
|
||||
@property
|
||||
def typeid(self) -> TypeID: ...
|
||||
|
||||
class Float8E4M3B11FNUZType(FloatType):
|
||||
static_typeid: ClassVar[TypeID]
|
||||
@staticmethod
|
||||
|
||||
@@ -12,6 +12,7 @@ from ..ir import (
|
||||
F16Type,
|
||||
F32Type,
|
||||
F64Type,
|
||||
Float8E3M4Type,
|
||||
Float8E4M3B11FNUZType,
|
||||
Float8E4M3FNType,
|
||||
Float8E4M3Type,
|
||||
@@ -72,6 +73,7 @@ f8E5M2 = lambda: Float8E5M2Type.get()
|
||||
f8E4M3 = lambda: Float8E4M3Type.get()
|
||||
f8E4M3FN = lambda: Float8E4M3FNType.get()
|
||||
f8E4M3B11FNUZ = lambda: Float8E4M3B11FNUZType.get()
|
||||
f8E3M4 = lambda: Float8E3M4Type.get()
|
||||
|
||||
none = lambda: NoneType.get()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user