Skip to content

Commit

Permalink
[mono][jit] Implement JIT support for the arm64 Crc and Dp intrinsics…
Browse files Browse the repository at this point in the history
… sets. (#86106)

Also implement hw capacity detection for apple+arm64 platforms.
  • Loading branch information
vargaz authored May 17, 2023
1 parent 83f71b5 commit 4de58bf
Show file tree
Hide file tree
Showing 9 changed files with 196 additions and 8 deletions.
14 changes: 14 additions & 0 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,20 @@ arm_encode_arith_imm (int imm, guint32 *shift)

#define arm_autibsp(p) arm_format_autib ((p), 0b0011, 0b111)

/* CRC32 */

#define arm_format_crc32(p, sf, C, sz, rm, rn, rd) arm_emit ((p), ((sf) << 31) | (0b11010110 << 21) | (rm) << 16 | (0b010 << 13) | ((C) << 12) | ((sz) << 10) | ((rn) << 5) | ((rd) << 0))

#define arm_crc32b(p, rd, rn, rm) arm_format_crc32 ((p), 0, 0, 0b00, (rm), (rn), (rd))
#define arm_crc32h(p, rd, rn, rm) arm_format_crc32 ((p), 0, 0, 0b01, (rm), (rn), (rd))
#define arm_crc32w(p, rd, rn, rm) arm_format_crc32 ((p), 0, 0, 0b10, (rm), (rn), (rd))
#define arm_crc32x(p, rd, rn, rm) arm_format_crc32 ((p), 1, 0, 0b11, (rm), (rn), (rd))

#define arm_crc32cb(p, rd, rn, rm) arm_format_crc32 ((p), 0, 1, 0b00, (rm), (rn), (rd))
#define arm_crc32ch(p, rd, rn, rm) arm_format_crc32 ((p), 0, 1, 0b01, (rm), (rn), (rd))
#define arm_crc32cw(p, rd, rn, rm) arm_format_crc32 ((p), 0, 1, 0b10, (rm), (rn), (rd))
#define arm_crc32cx(p, rd, rn, rm) arm_format_crc32 ((p), 1, 1, 0b11, (rm), (rn), (rd))

/* C4.1.69 NEON vector ISA */

// Opcode naming convention is arm_neon_<operation>_[<op>_]<elem_count><type>
Expand Down
10 changes: 10 additions & 0 deletions src/mono/mono/arch/arm64/codegen-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,16 @@ main (int argc, char *argv [])
arm_neon_addp (code, VREG_FULL, TYPE_I8, ARMREG_R0, ARMREG_R1, ARMREG_R2);
arm_neon_faddp (code, VREG_FULL, TYPE_F32, ARMREG_R0, ARMREG_R1, ARMREG_R2);

// crc32
arm_crc32b (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
arm_crc32h (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
arm_crc32w (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
arm_crc32x (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
arm_crc32cb (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
arm_crc32ch (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
arm_crc32cw (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
arm_crc32cx (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);

for (i = 0; i < code - buf; ++i)
printf (".byte %d\n", buf [i]);
printf ("\n");
Expand Down
4 changes: 4 additions & 0 deletions src/mono/mono/mini/cpu-arm64.mdesc
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,9 @@ lscnt32: dest:i src1:i len:4
lscnt64: dest:i src1:i len:4
xop_i8_i8: dest:i src1:i len:4
xop_i4_i4: dest:i src1:i len:4
xop_i4_i4_i4: dest:i src1:i src2:i len:4
xop_i4_i4_i8: dest:i src1:i src2:i len:4
xop_ovr_x_x_x_x: dest:x src1:x src2:x src3:x len:4 clob:1
arm64_smulh: dest:i src1:i src2:i len:4
arm64_umulh: dest:i src1:i src2:i len:4
arm64_hint: len:4
Expand Down Expand Up @@ -554,6 +557,7 @@ arm64_ushl: dest:x src1:x src2:x len:4
arm64_ext_imm: dest:x src1:x src2:x len:4
xinsert_i8: dest:x src1:x src2:i src3:i len:20
xinsert_r8: dest:x src1:x src2:f src3:i len:20
arm64_broadcast_elem: dest:x src1:x len:16

generic_class_init: src1:a len:44 clob:c
gc_safe_point: src1:i len:12 clob:c
Expand Down
81 changes: 80 additions & 1 deletion src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <mono/arch/arm64/arm64-codegen.h>
#include <mono/utils/mono-mmap.h>
#include <mono/utils/mono-memory-model.h>
#include <mono/utils/mono-hwcap.h>
#include <mono/metadata/abi-details.h>
#include <mono/metadata/tokentype.h>
#include "llvm-intrinsics-types.h"
Expand Down Expand Up @@ -3835,6 +3836,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
}
break;
}
case OP_XOP_OVR_X_X_X_X: {
IntrinsicId iid = (IntrinsicId) ins->inst_c0;
g_assert (dreg == sreg1);
g_assert (mono_class_value_size (ins->klass, NULL) == 16);
switch (iid) {
case INTRINS_AARCH64_ADV_SIMD_SDOT:
arm_neon_sdot_4s (code, dreg, sreg2, sreg3);
break;
case INTRINS_AARCH64_ADV_SIMD_UDOT:
arm_neon_udot_4s (code, dreg, sreg2, sreg3);
break;
default:
g_assert_not_reached ();
break;
}
break;
}
case OP_ARM64_BROADCAST_ELEM:
arm_neon_smov (code, TYPE_I32, ARMREG_IP0, sreg1, ins->inst_c0);
arm_neon_dup_g_4s (code, dreg, ARMREG_IP0);
break;

case OP_XZERO:
arm_neon_eor_16b (code, dreg, dreg, dreg);
break;
Expand Down Expand Up @@ -5383,7 +5406,46 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
g_assert (ins->inst_c0 == INTRINS_BITREVERSE_I32);
arm_rbitw (code, dreg, sreg1);
break;

case OP_XOP_I4_I4_I4: {
switch (ins->inst_c0) {
case INTRINS_AARCH64_CRC32B:
arm_crc32b (code, dreg, sreg1, sreg2);
break;
case INTRINS_AARCH64_CRC32H:
arm_crc32h (code, dreg, sreg1, sreg2);
break;
case INTRINS_AARCH64_CRC32W:
arm_crc32w (code, dreg, sreg1, sreg2);
break;
case INTRINS_AARCH64_CRC32CB:
arm_crc32cb (code, dreg, sreg1, sreg2);
break;
case INTRINS_AARCH64_CRC32CH:
arm_crc32ch (code, dreg, sreg1, sreg2);
break;
case INTRINS_AARCH64_CRC32CW:
arm_crc32cw (code, dreg, sreg1, sreg2);
break;
default:
g_assert_not_reached ();
break;
}
break;
}
case OP_XOP_I4_I4_I8: {
switch (ins->inst_c0) {
case INTRINS_AARCH64_CRC32X:
arm_crc32x (code, dreg, sreg1, sreg2);
break;
case INTRINS_AARCH64_CRC32CX:
arm_crc32cx (code, dreg, sreg1, sreg2);
break;
default:
g_assert_not_reached ();
break;
}
break;
}
case OP_ARM64_HINT:
g_assert (ins->inst_c0 <= ARMHINT_SEVL);
arm_hint (code, ins->inst_c0);
Expand Down Expand Up @@ -6382,3 +6444,20 @@ mono_arm_emit_brx (guint8 *code, int reg)
{
return emit_brx (code, reg);
}

MonoCPUFeatures
mono_arch_get_cpu_features (void)
{
guint64 features = MONO_CPU_INITED;

if (mono_hwcap_arm64_has_crc32)
features |= MONO_CPU_ARM64_CRC;
if (mono_hwcap_arm64_has_dot)
features |= MONO_CPU_ARM64_DP;
if (mono_hwcap_arm64_has_rdm)
features |= MONO_CPU_ARM64_RDM;
if (mono_hwcap_arm64_has_sha1 && mono_hwcap_arm64_has_sha256 && mono_hwcap_arm64_has_aes)
features |= MONO_CPU_ARM64_CRYPTO;

return features;
}
2 changes: 2 additions & 0 deletions src/mono/mono/mini/mini-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -1722,6 +1722,8 @@ MINI_OP(OP_ARM64_SQXTUN2, "arm64_sqxtun2", XREG, XREG, XREG)

MINI_OP(OP_ARM64_SELECT_SCALAR, "arm64_select_scalar", XREG, XREG, IREG)
MINI_OP(OP_ARM64_SELECT_QUAD, "arm64_select_quad", XREG, XREG, IREG)
/* Take a word elem of sreg1 identified by inst_c0 and broadcast it to all elements of dreg */
MINI_OP(OP_ARM64_BROADCAST_ELEM, "arm64_broadcast_elem", XREG, XREG, NONE)

MINI_OP(OP_ARM64_FCVTN, "arm64_fcvtn", XREG, XREG, NONE)
MINI_OP(OP_ARM64_FCVTN2, "arm64_fcvtn2", XREG, XREG, XREG)
Expand Down
8 changes: 5 additions & 3 deletions src/mono/mono/mini/mini.c
Original file line number Diff line number Diff line change
Expand Up @@ -4455,9 +4455,11 @@ mini_get_cpu_features (MonoCompile* cfg)
#if !defined(MONO_CROSS_COMPILE)
if (!cfg->compile_aot || cfg->use_current_cpu) {
// detect current CPU features if we are in JIT mode or AOT with use_current_cpu flag.
#if defined(ENABLE_LLVM)
features = mono_llvm_get_cpu_features (); // llvm has a nice built-in API to detect features
#elif defined(TARGET_AMD64) || defined(TARGET_X86)
#if defined(ENABLE_LLVM) && !(defined(TARGET_ARM64) && defined(TARGET_OSX))
// llvm has a nice built-in API to detect features
// it is not implemented on some platforms like apple arm64
features = mono_llvm_get_cpu_features ();
#elif defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)
features = mono_arch_get_cpu_features ();
#endif
}
Expand Down
24 changes: 20 additions & 4 deletions src/mono/mono/mini/simd-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -3589,8 +3589,8 @@ static const IntrinGroup supported_arm_intrinsics [] = {
{ "AdvSimd", MONO_CPU_ARM64_NEON, advsimd_methods, sizeof (advsimd_methods) },
{ "Aes", MONO_CPU_ARM64_CRYPTO, crypto_aes_methods, sizeof (crypto_aes_methods) },
{ "ArmBase", MONO_CPU_ARM64_BASE, armbase_methods, sizeof (armbase_methods), TRUE },
{ "Crc32", MONO_CPU_ARM64_CRC, crc32_methods, sizeof (crc32_methods) },
{ "Dp", MONO_CPU_ARM64_DP, dp_methods, sizeof (dp_methods) },
{ "Crc32", MONO_CPU_ARM64_CRC, crc32_methods, sizeof (crc32_methods), TRUE },
{ "Dp", MONO_CPU_ARM64_DP, dp_methods, sizeof (dp_methods), TRUE },
{ "Rdm", MONO_CPU_ARM64_RDM, rdm_methods, sizeof (rdm_methods) },
{ "Sha1", MONO_CPU_ARM64_CRYPTO, sha1_methods, sizeof (sha1_methods) },
{ "Sha256", MONO_CPU_ARM64_CRYPTO, sha256_methods, sizeof (sha256_methods) },
Expand Down Expand Up @@ -3976,8 +3976,24 @@ emit_arm64_intrinsics (
MonoClass *quad_klass = mono_class_from_mono_type_internal (fsig->params [2]);
gboolean is_unsigned = type_is_unsigned (fsig->ret);
int iid = is_unsigned ? INTRINS_AARCH64_ADV_SIMD_UDOT : INTRINS_AARCH64_ADV_SIMD_SDOT;
MonoInst *quad = emit_simd_ins (cfg, arg_klass, OP_ARM64_SELECT_QUAD, args [2]->dreg, args [3]->dreg);
quad->data.op [1].klass = quad_klass;

MonoInst *quad;
if (!COMPILE_LLVM (cfg)) {
if (mono_class_value_size (arg_klass, NULL) != 16 || mono_class_value_size (quad_klass, NULL) != 16)
return NULL;
// FIXME: The c# api has ConstantExpected(Max = (byte)(15)), but the hw only supports
// selecting one of the 4 32 bit words
if (args [3]->opcode != OP_ICONST || args [3]->inst_c0 < 0 || args [3]->inst_c0 > 3) {
// FIXME: Throw the right exception ?
mono_emit_jit_icall (cfg, mono_throw_platform_not_supported, NULL);
return NULL;
}
quad = emit_simd_ins (cfg, klass, OP_ARM64_BROADCAST_ELEM, args [2]->dreg, -1);
quad->inst_c0 = args [3]->inst_c0;
} else {
quad = emit_simd_ins (cfg, arg_klass, OP_ARM64_SELECT_QUAD, args [2]->dreg, args [3]->dreg);
quad->data.op [1].klass = quad_klass;
}
MonoInst *ret = emit_simd_ins (cfg, ret_klass, OP_XOP_OVR_X_X_X_X, args [0]->dreg, args [1]->dreg);
ret->sreg3 = quad->dreg;
ret->inst_c0 = iid;
Expand Down
54 changes: 54 additions & 0 deletions src/mono/mono/utils/mono-hwcap-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,63 @@
* Licensed under the MIT license. See LICENSE file in the project root for full license information.
*/

#ifdef __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#endif

#include "mono/utils/mono-hwcap.h"

void
mono_hwcap_arch_init (void)
{
#ifdef __APPLE__
const char *prop;
guint val [16];
size_t val_len;
int res;

val_len = sizeof (val);
prop = "hw.optional.armv8_crc32";
res = sysctlbyname (prop, val, &val_len, NULL, 0);
g_assert (res == 0);
g_assert (val_len == 4);
mono_hwcap_arm64_has_crc32 = *(int*)val;

val_len = sizeof (val);
prop = "hw.optional.arm.FEAT_RDM";
res = sysctlbyname (prop, val, &val_len, NULL, 0);
g_assert (res == 0);
g_assert (val_len == 4);
mono_hwcap_arm64_has_rdm = *(int*)val;

val_len = sizeof (val);
prop = "hw.optional.arm.FEAT_DotProd";
res = sysctlbyname (prop, val, &val_len, NULL, 0);
g_assert (res == 0);
g_assert (val_len == 4);
mono_hwcap_arm64_has_dot = *(int*)val;

val_len = sizeof (val);
prop = "hw.optional.arm.FEAT_SHA1";
res = sysctlbyname (prop, val, &val_len, NULL, 0);
g_assert (res == 0);
g_assert (val_len == 4);
mono_hwcap_arm64_has_sha1 = *(int*)val;

val_len = sizeof (val);
prop = "hw.optional.arm.FEAT_SHA256";
res = sysctlbyname (prop, val, &val_len, NULL, 0);
g_assert (res == 0);
g_assert (val_len == 4);
mono_hwcap_arm64_has_sha256 = *(int*)val;

val_len = sizeof (val);
prop = "hw.optional.arm.FEAT_AES";
res = sysctlbyname (prop, val, &val_len, NULL, 0);
g_assert (res == 0);
g_assert (val_len == 4);
mono_hwcap_arm64_has_aes = *(int*)val;

#endif
}
7 changes: 7 additions & 0 deletions src/mono/mono/utils/mono-hwcap-vars.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ MONO_HWCAP_VAR(arm_has_thumb2)

#elif defined (TARGET_ARM64)

MONO_HWCAP_VAR(arm64_has_crc32)
MONO_HWCAP_VAR(arm64_has_dot)
MONO_HWCAP_VAR(arm64_has_rdm)
MONO_HWCAP_VAR(arm64_has_sha1)
MONO_HWCAP_VAR(arm64_has_sha256)
MONO_HWCAP_VAR(arm64_has_aes)

// Nothing here yet.

#elif defined (TARGET_POWERPC) || defined (TARGET_POWERPC64)
Expand Down

0 comments on commit 4de58bf

Please sign in to comment.