Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[mono][jit] Added Vector128 intrinsics that depend on Vector64 for arm64 #87765

Merged
merged 8 commits into from
Jun 21, 2023
1 change: 1 addition & 0 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,7 @@ arm_encode_arith_imm (int imm, guint32 *shift)

/* NEON :: move SIMD register*/
#define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn))
#define arm_neon_mov_8b(p, rd, rn) arm_neon_orr ((p), VREG_LOW, (rd), (rn), (rn))

/* NEON :: AES */
#define arm_neon_aes_opcode(p, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), VREG_FULL, 0b00001110001010000000100000000000 | (size) << 22 | (opcode) << 12, (rd), (rn))
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/cpu-arm64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,11 @@ arm64_ext_imm: dest:x src1:x src2:x len:4
xinsert_i8: dest:x src1:x src2:i src3:i len:20
xinsert_r8: dest:x src1:x src2:f src3:i len:20
arm64_broadcast_elem: dest:x src1:x len:16
xconcat: dest:x src1:x src2:x len:8 clob:1
xlower: dest:x src1:x len:8
xupper: dest:x src1:x len:8
xinsert_lower: dest:x src1:x src2:x len:8 clob:1
xinsert_upper: dest:x src1:x src2:x len:8 clob:1

generic_class_init: src1:a len:44 clob:c
gc_safe_point: src1:i len:12 clob:c
Expand Down
42 changes: 36 additions & 6 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -4085,7 +4085,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
}
case OP_CREATE_SCALAR_UNSAFE_INT: {
const int t = get_type_size_macro (ins->inst_c1);
arm_neon_ins_g(code, t, dreg, sreg1, 0);
arm_neon_ins_g (code, t, dreg, sreg1, 0);
break;
}
case OP_CREATE_SCALAR_UNSAFE_FLOAT: {
Expand All @@ -4099,14 +4099,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
t = SIZE_8;
break;
}
arm_neon_ins_e(code, t, dreg, sreg1, 0, 0);
arm_neon_ins_e (code, t, dreg, sreg1, 0, 0);
}
break;
}
// This requires Vector64 SIMD support
// case OP_XCONCAT:
// arm_neon_ext_16b(code, dreg, sreg1, sreg2, 8);
// break;
case OP_XCONCAT: {
if (dreg != sreg1)
arm_neon_mov (code, dreg, sreg1);

arm_neon_ins_e (code, SIZE_8, dreg, sreg2, 1, 0);
break;
}
case OP_ARM64_USHL: {
arm_neon_ushl (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2);
break;
Expand All @@ -4118,6 +4121,33 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
arm_neon_ext_16b (code, dreg, sreg1, sreg2, ins->inst_c0);
break;
}
case OP_XLOWER: {
if (dreg == sreg1) {
// clean the upper half
arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
arm_neon_ins_e (code, SIZE_8, dreg, NEON_TMP_REG, 1, 0);
} else {
arm_neon_eor (code, VREG_FULL, dreg, dreg, dreg);
arm_neon_mov_8b (code, dreg, sreg1);
}
break;
}
case OP_XUPPER:
// shift in 64 zeros from the left
arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
arm_neon_ext_16b (code, dreg, sreg1, NEON_TMP_REG, 8);
break;

case OP_XINSERT_LOWER:
case OP_XINSERT_UPPER: {
if (dreg != sreg1)
arm_neon_mov (code, dreg, sreg1);

int insert_at = (ins->opcode == OP_XINSERT_LOWER) ? 0 : 1;
arm_neon_ins_e (code, SIZE_8, dreg, sreg2, insert_at, 0);
break;
}

/* BRANCH */
case OP_BR:
mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb, MONO_R_ARM64_B);
Expand Down
25 changes: 10 additions & 15 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,8 @@ static guint16 sri_vector_methods [] = {
SN_WidenLower,
SN_WidenUpper,
SN_WithElement,
SN_WithLower,
SN_WithUpper,
SN_Xor,
SN_get_IsHardwareAccelerated,
};
Expand Down Expand Up @@ -1382,14 +1384,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
if (!COMPILE_LLVM (cfg)) {
if (vector_size != 128)
return NULL;
switch (id) {
case SN_GetLower:
case SN_GetUpper:
return NULL;
default:
break;
}
}
#endif

#ifdef TARGET_WASM
Expand Down Expand Up @@ -1662,11 +1657,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
ins->inst_c1 = arg0_type;
return ins;
} else if (is_create_from_half_vectors_overload (fsig)) {
#if defined(TARGET_ARM64)
// Require Vector64 SIMD support
if (!COMPILE_LLVM (cfg))
return NULL;
#endif
#if defined(TARGET_AMD64)
// Require Vector64 SIMD support
if (!COMPILE_LLVM (cfg))
Expand Down Expand Up @@ -1929,10 +1919,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi

#ifdef TARGET_AMD64
if (!COMPILE_LLVM (cfg))
jandupej marked this conversation as resolved.
Show resolved Hide resolved
/* These return a Vector64 */
/* These return a Vector64 */
return NULL;
#endif

return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args);
}
case SN_GreaterThan:
Expand Down Expand Up @@ -2304,9 +2293,15 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
}
case SN_WithLower:
case SN_WithUpper: {
#ifdef TARGET_AMD64
if (!COMPILE_LLVM (cfg))
/* These return a Vector64 */
return NULL;
#endif

if (!is_element_type_primitive (fsig->params [0]))
return NULL;
int op = id == SN_GetLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER;
int op = id == SN_WithLower ? OP_XINSERT_LOWER : OP_XINSERT_UPPER;
return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args);
}
default:
Expand Down