From 38826a9a2caf619d1f6c48c4f80302e987ebcc30 Mon Sep 17 00:00:00 2001
From: Sergey Kozub <skozub@nvidia.com>
Date: Wed, 18 Dec 2024 12:44:07 -0800
Subject: [PATCH] PR #19096: Add F4E2M1FN and F8E8M0FNU types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Imported from GitHub PR https://github.com/openxla/xla/pull/19096

This PR adds F4E2M1FN primitive type (4-bit float with 2 bits exponent and 1 bit mantissa), F8E8M0FNU primitive type (8-bit float with 8 bits exponent, no mantissa and no sign) and enables loads/stores in the same way S4/U4 type is implemented.

This will enable using microscaling (MX) formats ([RFC](https://github.com/openxla/xla/discussions/18085)), such as MXFP4.

```c
F4E2M1FN
- Exponent bias: 1
- Maximum stored exponent value: 3 (binary 11)
- Maximum unbiased exponent value: 3 - 1 = 2
- Minimum stored exponent value: 1 (binary 01)
- Minimum unbiased exponent value: 1 − 1 = 0
- Has Positive and Negative zero
- Doesn't have infinity
- Doesn't have NaNs

Additional details:
- Zeros (+/-): S.00.0
- Max normal number: S.11.1 = ±2^(2) x (1 + 0.5) = ±6.0
- Min normal number: S.01.0 = ±2^(0) = ±1.0
- Min subnormal number: S.00.1 = ±2^(0) x 0.5 = ±0.5

F8E8M0FNU
- Exponent bias: 127
- Maximum stored exponent value: 254 (binary 1111'1110)
- Maximum unbiased exponent value: 254 - 127 = 127
- Minimum stored exponent value: 0 (binary 0000'0000)
- Minimum unbiased exponent value: 0 − 127 = -127
- Doesn't have zero
- Doesn't have infinity
- NaN is encoded as binary 1111'1111

Additional details:
- Zeros cannot be represented
- Negative values cannot be represented
- Mantissa is always 1
```

Related PRs:
- https://github.com/openxla/stablehlo/pull/2582
- https://github.com/jax-ml/ml_dtypes/pull/181
- https://github.com/llvm/llvm-project/pull/95392
- https://github.com/llvm/llvm-project/pull/108877
- https://github.com/jax-ml/ml_dtypes/pull/166
- https://github.com/llvm/llvm-project/pull/107127
- https://github.com/llvm/llvm-project/pull/111028

The PR is split into multiple commits just to make the review easier, it is possible that some tests could fail if only some (i.e. not all) of these commits are applied.
Copybara import of the project:

--
f493e4803eaa5ff3da3ceb130e9348c014b4a2e8 by Sergey Kozub <skozub@nvidia.com>:

Add F4E2M1FN type: import mxfloat.h

--
87d005630b310a355d7c30b22828c35237373f17 by Sergey Kozub <skozub@nvidia.com>:

Add F4E2M1FN type: primitive type

--
70ca82093faeec98f2dc5e8b82f617d99ca96849 by Sergey Kozub <skozub@nvidia.com>:

Add F4E2M1FN type: literal support

--
c479f0940da490e9668e2f48e14a7466f0c4a97f by Sergey Kozub <skozub@nvidia.com>:

Add F4E2M1FN type: conversion codegen

--
daaa3af3ce3af456f2ef44dbc291ebeb09e86d9b by Sergey Kozub <skozub@nvidia.com>:

Add F4E2M1FN type: python interface

--
1f0e19ff14733eff790726936b68ef0cf607a766 by Sergey Kozub <skozub@nvidia.com>:

Add F4E2M1FN type: FFI

--
999bf96092e57c7b3039811f2887281f347ff17a by Sergey Kozub <skozub@nvidia.com>:

Add F4E2M1FN type: HLO evaluator

--
d7d5af74c5f8a94522779a121c0a4a962156fb64 by Sergey Kozub <skozub@nvidia.com>:

Add F4E2M1FN type: add tests

--
9e8c7bc02849f241d0f05941221d99f1d08d9e67 by Sergey Kozub <skozub@nvidia.com>:

Add F8E8M0FNU type

--
1e344174b931cea4978770ab740dfed67186c2f4 by Sergey Kozub <skozub@nvidia.com>:

Addressing PR#19096 review comments

--
d4de0a369d9dc853f34f3cf3bf7dcc5a47502106 by Sergey Kozub <skozub@nvidia.com>:

Addressing PR#19096 review comments (round 2)

Merging this change closes #19096

FUTURE_COPYBARA_INTEGRATE_REVIEW=https://github.com/openxla/xla/pull/19096 from openxla:skozub/e2m1 d4de0a369d9dc853f34f3cf3bf7dcc5a47502106
PiperOrigin-RevId: 707638099
---
 tsl/platform/BUILD       | 1 +
 tsl/platform/ml_dtypes.h | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/tsl/platform/BUILD b/tsl/platform/BUILD
index eb208e6ef..9f3b13524 100644
--- a/tsl/platform/BUILD
+++ b/tsl/platform/BUILD
@@ -985,6 +985,7 @@ cc_library(
     deps = [
         "@ml_dtypes//:float8",
         "@ml_dtypes//:intn",
+        "@ml_dtypes//:mxfloat",
     ],
 )
 
diff --git a/tsl/platform/ml_dtypes.h b/tsl/platform/ml_dtypes.h
index a6a1b56af..a03fa0244 100644
--- a/tsl/platform/ml_dtypes.h
+++ b/tsl/platform/ml_dtypes.h
@@ -18,8 +18,10 @@ limitations under the License.
 
 #include "ml_dtypes/include/float8.h"  // from @ml_dtypes
 #include "ml_dtypes/include/intn.h"  // from @ml_dtypes
+#include "ml_dtypes/include/mxfloat.h"  // from @ml_dtypes
 
 namespace tsl {
+using float4_e2m1fn = ::ml_dtypes::float4_e2m1fn;
 using float8_e3m4 = ::ml_dtypes::float8_e3m4;
 using float8_e4m3 = ::ml_dtypes::float8_e4m3;
 using float8_e4m3fn = ::ml_dtypes::float8_e4m3fn;
@@ -27,6 +29,7 @@ using float8_e4m3fnuz = ::ml_dtypes::float8_e4m3fnuz;
 using float8_e4m3b11fnuz = ::ml_dtypes::float8_e4m3b11fnuz;
 using float8_e5m2 = ::ml_dtypes::float8_e5m2;
 using float8_e5m2fnuz = ::ml_dtypes::float8_e5m2fnuz;
+using float8_e8m0fnu = ::ml_dtypes::float8_e8m0fnu;
 
 using int1 = ::ml_dtypes::int1;
 using uint1 = ::ml_dtypes::uint1;