-
Notifications
You must be signed in to change notification settings - Fork 12.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Implement intrinsics for F1CVTL/F2CVTL and BF1CVTL/BF2CVTL (#…
…116959) This patch implements the following intrinsics: 8-bit floating-point convert to deinterleaved half-precision or BFloat16. ``` c // Variant is also available for: _bf16[_mf8]_x2 svfloat16x2_t svcvtl1_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) __arm_streaming; svfloat16x2_t svcvtl2_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) __arm_streaming; ``` Defined in ARM-software/acle#323 Co-authored-by: Caroline Concatto caroline.concatto@arm.com Co-authored-by: Marian Lukac marian.lukac@arm.com
- Loading branch information
1 parent
12ccb62
commit e4ee970
Showing
11 changed files
with
213 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
81 changes: 81 additions & 0 deletions
81
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py | ||
|
||
// REQUIRES: aarch64-registered-target | ||
|
||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s | ||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK | ||
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s | ||
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK | ||
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s | ||
|
||
#include <arm_sve.h> | ||
|
||
#ifdef SVE_OVERLOADED_FORMS | ||
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 | ||
#else | ||
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 | ||
#endif | ||
|
||
// CHECK-LABEL: @test_cvtl1_f16_x2( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]]) | ||
// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]] | ||
// | ||
// CPP-CHECK-LABEL: @_Z17test_cvtl1_f16_x2u13__SVMfloat8_tm( | ||
// CPP-CHECK-NEXT: entry: | ||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) | ||
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]]) | ||
// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]] | ||
// | ||
svfloat16x2_t test_cvtl1_f16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming { | ||
return SVE_ACLE_FUNC(svcvtl1_f16,_mf8,_x2_fpm)(zn, fpmr); | ||
} | ||
|
||
// CHECK-LABEL: @test_cvtl2_f16_x2( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]]) | ||
// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]] | ||
// | ||
// CPP-CHECK-LABEL: @_Z17test_cvtl2_f16_x2u13__SVMfloat8_tm( | ||
// CPP-CHECK-NEXT: entry: | ||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) | ||
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]]) | ||
// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]] | ||
// | ||
svfloat16x2_t test_cvtl2_f16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming { | ||
return SVE_ACLE_FUNC(svcvtl2_f16,_mf8,_x2_fpm)(zn, fpmr); | ||
} | ||
|
||
// CHECK-LABEL: @test_cvtl1_bf16_x2( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]]) | ||
// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]] | ||
// | ||
// CPP-CHECK-LABEL: @_Z18test_cvtl1_bf16_x2u13__SVMfloat8_tm( | ||
// CPP-CHECK-NEXT: entry: | ||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) | ||
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]]) | ||
// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]] | ||
// | ||
svbfloat16x2_t test_cvtl1_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming { | ||
return SVE_ACLE_FUNC(svcvtl1_bf16,_mf8,_x2_fpm)(zn, fpmr); | ||
} | ||
|
||
// CHECK-LABEL: @test_cvtl2_bf16_x2( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]]) | ||
// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]] | ||
// | ||
// CPP-CHECK-LABEL: @_Z18test_cvtl2_bf16_x2u13__SVMfloat8_tm( | ||
// CPP-CHECK-NEXT: entry: | ||
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) | ||
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]]) | ||
// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]] | ||
// | ||
svbfloat16x2_t test_cvtl2_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming { | ||
return SVE_ACLE_FUNC(svcvtl2_bf16,_mf8,_x2_fpm)(zn, fpmr); | ||
} |
17 changes: 17 additions & 0 deletions
17
clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_cvt.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -verify -emit-llvm-only %s | ||
|
||
// REQUIRES: aarch64-registered-target | ||
|
||
#include <arm_sve.h> | ||
|
||
|
||
void test_features_sme2_fp8(svmfloat8_t zn, fpm_t fpmr) __arm_streaming { | ||
// expected-error@+1 {{'svcvtl1_f16_mf8_x2_fpm' needs target feature sme,sme2,fp8}} | ||
svcvtl1_f16_mf8_x2_fpm(zn, fpmr); | ||
// expected-error@+1 {{'svcvtl2_f16_mf8_x2_fpm' needs target feature sme,sme2,fp8}} | ||
svcvtl2_f16_mf8_x2_fpm(zn, fpmr); | ||
// expected-error@+1 {{'svcvtl1_bf16_mf8_x2_fpm' needs target feature sme,sme2,fp8}} | ||
svcvtl1_bf16_mf8_x2_fpm(zn, fpmr); | ||
// expected-error@+1 {{'svcvtl2_bf16_mf8_x2_fpm' needs target feature sme,sme2,fp8}} | ||
svcvtl2_bf16_mf8_x2_fpm(zn, fpmr); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+fp8 -verify-machineinstrs -force-streaming < %s | FileCheck %s | ||
|
||
; F1CVTL / F2CVTL | ||
|
||
define { <vscale x 8 x half>, <vscale x 8 x half> } @f1cvtl(<vscale x 16 x i8> %zm) { | ||
; CHECK-LABEL: f1cvtl: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: f1cvtl { z0.h, z1.h }, z0.b | ||
; CHECK-NEXT: ret | ||
%res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> %zm) | ||
ret { <vscale x 8 x half>, <vscale x 8 x half> } %res | ||
} | ||
|
||
define { <vscale x 8 x half>, <vscale x 8 x half> } @f2cvtl(<vscale x 16 x i8> %zm) { | ||
; CHECK-LABEL: f2cvtl: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: f2cvtl { z0.h, z1.h }, z0.b | ||
; CHECK-NEXT: ret | ||
%res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxvbf16(<vscale x 16 x i8> %zm) | ||
ret { <vscale x 8 x half>, <vscale x 8 x half> } %res | ||
} | ||
|
||
; BF1CVTL / BF2CVTL | ||
|
||
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @bf1cvtl(<vscale x 16 x i8> %zm) { | ||
; CHECK-LABEL: bf1cvtl: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: bf1cvtl { z0.h, z1.h }, z0.b | ||
; CHECK-NEXT: ret | ||
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> %zm) | ||
ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res | ||
} | ||
|
||
define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @bf2cvtl( <vscale x 16 x i8> %zm) { | ||
; CHECK-LABEL: bf2cvtl: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: bf2cvtl { z0.h, z1.h }, z0.b | ||
; CHECK-NEXT: ret | ||
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> %zm) | ||
ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res | ||
} |