From 1b8313c12ce079aaab24c2a518804ac412a634ff Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Mon, 19 Jun 2023 11:56:50 +0200 Subject: [PATCH 1/8] Added Vector128.GetLower,GetUpper as intrinsics on arm64. Enabled OP_XCONCAT. --- src/mono/mono/arch/arm64/arm64-codegen.h | 1 + src/mono/mono/mini/cpu-arm64.mdesc | 3 +++ src/mono/mono/mini/mini-arm64.c | 25 ++++++++++++++++++------ src/mono/mono/mini/simd-intrinsics.c | 19 ------------------ 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 154fd10d85f63..0834a24ad635e 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -1047,6 +1047,7 @@ arm_encode_arith_imm (int imm, guint32 *shift) /* NEON :: move SIMD register*/ #define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn)) +#define arm_neon_mov_8b(p, rd, rn) arm_neon_orr ((p), VREG_LOW, (rd), (rn), (rn)) /* NEON :: AES */ #define arm_neon_aes_opcode(p, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), VREG_FULL, 0b00001110001010000000100000000000 | (size) << 22 | (opcode) << 12, (rd), (rn)) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 57e31d0561eff..f219c95e45a35 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -558,6 +558,9 @@ arm64_ext_imm: dest:x src1:x src2:x len:4 xinsert_i8: dest:x src1:x src2:i src3:i len:20 xinsert_r8: dest:x src1:x src2:f src3:i len:20 arm64_broadcast_elem: dest:x src1:x len:16 +xconcat: dest:x src1:x src2:x len:8 +xlower: dest:x src1:x len:4 +xupper: dest:x src1:x len:4 generic_class_init: src1:a len:44 clob:c gc_safe_point: src1:i len:12 clob:c diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 491f02f6e4044..837dc946f3be1 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -4085,7 +4085,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } case OP_CREATE_SCALAR_UNSAFE_INT: { const int t = get_type_size_macro (ins->inst_c1); - arm_neon_ins_g(code, t, dreg, sreg1, 0); + arm_neon_ins_g (code, t, dreg, sreg1, 0); break; } case OP_CREATE_SCALAR_UNSAFE_FLOAT: { @@ -4099,14 +4099,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) t = SIZE_8; break; } - arm_neon_ins_e(code, t, dreg, sreg1, 0, 0); + arm_neon_ins_e (code, t, dreg, sreg1, 0, 0); } break; } - // This requires Vector64 SIMD support - // case OP_XCONCAT: - // arm_neon_ext_16b(code, dreg, sreg1, sreg2, 8); - // break; + case OP_XCONCAT: { + if (dreg != sreg1) + arm_neon_mov (code, dreg, sreg1); + + arm_neon_ins_e (code, SIZE_8, dreg, sreg2, 1, 0); + break; + } case OP_ARM64_USHL: { arm_neon_ushl (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2); break; @@ -4118,6 +4121,16 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) arm_neon_ext_16b (code, dreg, sreg1, sreg2, ins->inst_c0); break; } + case OP_XLOWER: { + if (dreg != sreg1) + arm_neon_mov_8b (code, dreg, sreg1); + break; + } + case OP_XUPPER: + arm_neon_fdup_e (code, VREG_FULL, TYPE_F64, dreg, sreg1, 1); + break; + + /* BRANCH */ case OP_BR: mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb, MONO_R_ARM64_B); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index dca9ccbd73ccc..f9304430a9139 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1382,14 +1382,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!COMPILE_LLVM (cfg)) { if (vector_size != 128) return NULL; - switch (id) { - case SN_GetLower: - case SN_GetUpper: - return NULL; - default: - break; } - } #endif #ifdef TARGET_WASM @@ -1662,11 +1655,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi ins->inst_c1 = arg0_type; return ins; } else if (is_create_from_half_vectors_overload (fsig)) { -#if defined(TARGET_ARM64) - // Require Vector64 SIMD support - if (!COMPILE_LLVM (cfg)) - return NULL; -#endif #if defined(TARGET_AMD64) // Require Vector64 SIMD support if (!COMPILE_LLVM (cfg)) @@ -1926,13 +1914,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!is_element_type_primitive (fsig->params [0])) return NULL; int op = id == SN_GetLower ? OP_XLOWER : OP_XUPPER; - -#ifdef TARGET_AMD64 - if (!COMPILE_LLVM (cfg)) - /* These return a Vector64 */ - return NULL; -#endif - return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); } case SN_GreaterThan: From 7a06b4cd96db51eb8bf09e778f54865fb250d1a9 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Mon, 19 Jun 2023 13:58:47 +0200 Subject: [PATCH 2/8] Adding WithLower, WithUpper. --- src/mono/mono/mini/cpu-arm64.mdesc | 2 ++ src/mono/mono/mini/mini-arm64.c | 8 ++++++++ src/mono/mono/mini/simd-intrinsics.c | 4 +++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index f219c95e45a35..35fd2d54dfdae 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -561,6 +561,8 @@ arm64_broadcast_elem: dest:x src1:x len:16 xconcat: dest:x src1:x src2:x len:8 xlower: dest:x src1:x len:4 xupper: dest:x src1:x len:4 +xinsert_lower: dest:x src1:x src2:x len:8 +xinsert_upper: dest:x src1:x src2:x len:8 generic_class_init: src1:a len:44 clob:c gc_safe_point: src1:i len:12 clob:c diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 837dc946f3be1..1774767a2e605 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -4130,6 +4130,14 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) arm_neon_fdup_e (code, VREG_FULL, TYPE_F64, dreg, sreg1, 1); break; + case OP_XINSERT_LOWER: + case OP_XINSERT_UPPER: { + if (dreg != sreg1) + arm_neon_mov (code, dreg, sreg1); + + arm_neon_ins_e (code, SIZE_8, dreg, sreg2, insert_at, 0); + break; + } /* BRANCH */ case OP_BR: diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index f9304430a9139..d52eb806bced1 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1201,6 +1201,8 @@ static guint16 sri_vector_methods [] = { SN_WidenLower, SN_WidenUpper, SN_WithElement, + SN_WithLower, + SN_WithUpper, SN_Xor, SN_get_IsHardwareAccelerated, }; @@ -2287,7 +2289,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi case SN_WithUpper: { if (!is_element_type_primitive (fsig->params [0])) return NULL; - int op = id == SN_GetLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER; + int op = id == SN_WithLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER; return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); } default: From d4eb982c85eef1201a9e3a721ce7116e285497e7 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Mon, 19 Jun 2023 14:16:49 +0200 Subject: [PATCH 3/8] Fixed missing variable. --- src/mono/mono/mini/mini-arm64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 1774767a2e605..184897324e79d 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -4135,6 +4135,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) if (dreg != sreg1) arm_neon_mov (code, dreg, sreg1); + int insert_at = (ins->opcode == OP_XINSERT_LOWER) ? 0 : 1; arm_neon_ins_e (code, SIZE_8, dreg, sreg2, insert_at, 0); break; } From b6c1db304e382094cc8a08e2516f3c9f0d10fa90 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Mon, 19 Jun 2023 14:26:30 +0200 Subject: [PATCH 4/8] Restored check on x64 code. --- src/mono/mono/mini/simd-intrinsics.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index d52eb806bced1..a36e6c5ac5d0a 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1916,6 +1916,12 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!is_element_type_primitive (fsig->params [0])) return NULL; int op = id == SN_GetLower ? OP_XLOWER : OP_XUPPER; + +#ifdef TARGET_AMD64 + if (!COMPILE_LLVM (cfg)) + /* These return a Vector64 */ + return NULL; +#endif return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); } case SN_GreaterThan: From 4ab4edca86e2786836a2bd9a71f83c7d87776877 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Mon, 19 Jun 2023 16:14:39 +0200 Subject: [PATCH 5/8] xlower and xupper now sanitize the upper half of the dest register. --- src/mono/mono/mini/cpu-arm64.mdesc | 4 ++-- src/mono/mono/mini/mini-arm64.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 35fd2d54dfdae..63bb56d6164fd 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -559,8 +559,8 @@ xinsert_i8: dest:x src1:x src2:i src3:i len:20 xinsert_r8: dest:x src1:x src2:f src3:i len:20 arm64_broadcast_elem: dest:x src1:x len:16 xconcat: dest:x src1:x src2:x len:8 -xlower: dest:x src1:x len:4 -xupper: dest:x src1:x len:4 +xlower: dest:x src1:x len:8 +xupper: dest:x src1:x len:8 xinsert_lower: dest:x src1:x src2:x len:8 xinsert_upper: dest:x src1:x src2:x len:8 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 184897324e79d..0bd13dbe79811 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -4122,12 +4122,20 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; } case OP_XLOWER: { - if (dreg != sreg1) + if (dreg == sreg1) { + // clean the upper half + arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG); + arm_neon_ins_e (code, SIZE_8, dreg, NEON_TMP_REG, 1, 0); + } else { + arm_neon_eor (code, VREG_FULL, dreg, dreg, dreg); arm_neon_mov_8b (code, dreg, sreg1); + } break; } case OP_XUPPER: - arm_neon_fdup_e (code, VREG_FULL, TYPE_F64, dreg, sreg1, 1); + // shift in 64 zeros from the left + arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG); + arm_neon_ext_16b (code, dreg, sreg1, NEON_TMP_REG, 8); break; case OP_XINSERT_LOWER: From 34a29da66ca313fa4a563211a08eb6efe9f7fb4c Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Tue, 20 Jun 2023 14:46:08 +0200 Subject: [PATCH 6/8] Fixed definitions of insert opcodes to reflect that they depend on the original dest state. --- src/mono/mono/mini/cpu-arm64.mdesc | 4 ++-- src/mono/mono/mini/simd-intrinsics.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 63bb56d6164fd..845004ab73bd7 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -561,8 +561,8 @@ arm64_broadcast_elem: dest:x src1:x len:16 xconcat: dest:x src1:x src2:x len:8 xlower: dest:x src1:x len:8 xupper: dest:x src1:x len:8 -xinsert_lower: dest:x src1:x src2:x len:8 -xinsert_upper: dest:x src1:x src2:x len:8 +xinsert_lower: dest:x src1:x src2:x len:8 clob:1 +xinsert_upper: dest:x src1:x src2:x len:8 clob:1 generic_class_init: src1:a len:44 clob:c gc_safe_point: src1:i len:12 clob:c diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index a36e6c5ac5d0a..a1978b41d6374 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1919,7 +1919,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #ifdef TARGET_AMD64 if (!COMPILE_LLVM (cfg)) - /* These return a Vector64 */ + /* These return a Vector64 */ return NULL; #endif return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args); From 1ba5b99ae7a7841efa7d9fe852417b8ec9cffdf0 Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Tue, 20 Jun 2023 16:19:19 +0200 Subject: [PATCH 7/8] OP_XCONCAT can now also depend on initial dest state. --- src/mono/mono/mini/cpu-arm64.mdesc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 845004ab73bd7..2479dd0d22d62 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -558,7 +558,7 @@ arm64_ext_imm: dest:x src1:x src2:x len:4 xinsert_i8: dest:x src1:x src2:i src3:i len:20 xinsert_r8: dest:x src1:x src2:f src3:i len:20 arm64_broadcast_elem: dest:x src1:x len:16 -xconcat: dest:x src1:x src2:x len:8 +xconcat: dest:x src1:x src2:x len:8 clob:1 xlower: dest:x src1:x len:8 xupper: dest:x src1:x len:8 xinsert_lower: dest:x src1:x src2:x len:8 clob:1 From 5a4254a1bb9367284fdee566de7ada45392de4be Mon Sep 17 00:00:00 2001 From: Jan Dupej Date: Wed, 21 Jun 2023 11:50:42 +0200 Subject: [PATCH 8/8] Disabling Vector128.WithLower,WithUpper on x64. --- src/mono/mono/mini/simd-intrinsics.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index a1978b41d6374..733591a5ab421 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -2293,6 +2293,12 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } case SN_WithLower: case SN_WithUpper: { +#ifdef TARGET_AMD64 + if (!COMPILE_LLVM (cfg)) + /* These return a Vector64 */ + return NULL; +#endif + if (!is_element_type_primitive (fsig->params [0])) return NULL; int op = id == SN_WithLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER;