diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index ac75834f317e0..47cc0fb6bfd19 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7598,6 +7598,132 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_UNPREDICATED); theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 255, INS_OPTS_NONE, INS_SCALABLE_OPTS_UNPREDICATED); + + // IF_SVE_HX_3A_B + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V0, REG_P0, REG_V1, 0, + INS_OPTS_SCALABLE_S); // LD1B {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V2, REG_P7, REG_V3, 5, + INS_OPTS_SCALABLE_S); // LD1SB {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1b, EA_SCALABLE, REG_V4, REG_P3, REG_V1, 5, + INS_OPTS_SCALABLE_S); // LDFF1B {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P6, REG_V0, 31, + INS_OPTS_SCALABLE_S); // LDFF1SB {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1b, EA_SCALABLE, REG_V0, REG_P0, REG_V1, 0, + INS_OPTS_SCALABLE_D); // LD1B {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sb, EA_SCALABLE, REG_V2, REG_P7, REG_V3, 5, + INS_OPTS_SCALABLE_D); // LD1SB {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1b, EA_SCALABLE, REG_V4, REG_P3, REG_V1, 5, + INS_OPTS_SCALABLE_D); // LDFF1B {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P6, REG_V0, 31, + INS_OPTS_SCALABLE_D); // LDFF1SB {.D }, /Z, [.D{, #}] + + // IF_SVE_HX_3A_E + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V1, REG_P0, REG_V2, 0, + INS_OPTS_SCALABLE_S); // LD1H {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P4, REG_V3, 2, + INS_OPTS_SCALABLE_S); // LD1SH {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_V9, 124, + INS_OPTS_SCALABLE_S); // LD1W {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P7, REG_V3, 6, + INS_OPTS_SCALABLE_S); // LDFF1H {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V3, REG_P5, REG_V4, 62, + INS_OPTS_SCALABLE_S); // LDFF1SH {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V2, REG_P1, REG_V3, 124, + INS_OPTS_SCALABLE_S); // LDFF1W {.S }, /Z, [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1h, EA_SCALABLE, REG_V1, REG_P0, REG_V2, 0, + INS_OPTS_SCALABLE_D); // LD1H {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P4, REG_V3, 2, + INS_OPTS_SCALABLE_D); // LD1SH {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_V9, 124, + INS_OPTS_SCALABLE_D); // LD1W {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P7, REG_V3, 6, + INS_OPTS_SCALABLE_D); // LDFF1H {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sh, EA_SCALABLE, REG_V3, REG_P5, REG_V4, 62, + INS_OPTS_SCALABLE_D); // LDFF1SH {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1w, EA_SCALABLE, REG_V2, REG_P1, REG_V3, 124, + INS_OPTS_SCALABLE_D); // LDFF1W {.D }, /Z, [.D{, #}] + + // IF_SVE_IV_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0, + INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1sw, EA_SCALABLE, REG_V6, REG_P5, REG_V4, 0, + INS_OPTS_SCALABLE_D); // LD1SW {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1d, EA_SCALABLE, REG_V7, REG_P3, REG_V1, 248, + INS_OPTS_SCALABLE_D); // LDFF1D {.D }, /Z, [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ldff1sw, EA_SCALABLE, REG_V2, REG_P0, REG_V4, 124, + INS_OPTS_SCALABLE_D); // LDFF1SW {.D }, /Z, [.D{, #}] + + // IF_SVE_JI_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0, + INS_OPTS_SCALABLE_S); // ST1B {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 31, + INS_OPTS_SCALABLE_S); // ST1B {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 0, + INS_OPTS_SCALABLE_S); // ST1H {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 62, + INS_OPTS_SCALABLE_S); // ST1H {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 0, + INS_OPTS_SCALABLE_S); // ST1W {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 124, + INS_OPTS_SCALABLE_S); // ST1W {.S }, , [.S{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 0, + INS_OPTS_SCALABLE_D); // ST1B {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1b, EA_SCALABLE, REG_V1, REG_P2, REG_V3, 31, + INS_OPTS_SCALABLE_D); // ST1B {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 0, + INS_OPTS_SCALABLE_D); // ST1H {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1h, EA_SCALABLE, REG_V5, REG_P3, REG_V2, 62, + INS_OPTS_SCALABLE_D); // ST1H {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 0, + INS_OPTS_SCALABLE_D); // ST1W {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V5, REG_P4, REG_V1, 124, + INS_OPTS_SCALABLE_D); // ST1W {.D }, , [.D{, #}] + + // IF_SVE_JL_3A + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P7, REG_V4, 0, + INS_OPTS_SCALABLE_D); // ST1D {.D }, , [.D{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P7, REG_V4, 248, + INS_OPTS_SCALABLE_D); // ST1D {.D }, , [.D{, #}] + + // IF_SVE_IC_3A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rd, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 504, + INS_OPTS_SCALABLE_D); // LD1RD {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsw, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 252, + INS_OPTS_SCALABLE_D); // LD1RSW {.D }, /Z, [{, #}] + + // IF_SVE_IC_3A_A + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 0, + INS_OPTS_SCALABLE_S); // LD1RSH {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 0, + INS_OPTS_SCALABLE_S); // LD1RW {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsh, EA_SCALABLE, REG_V0, REG_P1, REG_R2, 126, + INS_OPTS_SCALABLE_D); // LD1RSH {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rw, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 252, + INS_OPTS_SCALABLE_D); // LD1RW {.D }, /Z, [{, #}] + + // IF_SVE_IC_3A_B + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V0, REG_P2, REG_R3, 0, + INS_OPTS_SCALABLE_H); // LD1RH {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V6, REG_P5, REG_R4, 0, + INS_OPTS_SCALABLE_H); // LD1RSB {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 126, + INS_OPTS_SCALABLE_S); // LD1RH {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V2, REG_P1, REG_R0, 63, + INS_OPTS_SCALABLE_S); // LD1RSB {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rh, EA_SCALABLE, REG_V3, REG_P2, REG_R1, 126, + INS_OPTS_SCALABLE_D); // LD1RH {.D }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rsb, EA_SCALABLE, REG_V4, REG_P5, REG_R6, 63, + INS_OPTS_SCALABLE_D); // LD1RSB {.D }, /Z, [{, #}] + + // IF_SVE_IC_3A_C + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V1, REG_P2, REG_R3, 0, + INS_OPTS_SCALABLE_B); // LD1RB {.B }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V5, REG_P4, REG_R3, 63, + INS_OPTS_SCALABLE_H); // LD1RB {.H }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V6, REG_P7, REG_R8, 0, + INS_OPTS_SCALABLE_S); // LD1RB {.S }, /Z, [{, #}] + theEmitter->emitIns_R_R_R_I(INS_sve_ld1rb, EA_SCALABLE, REG_V1, REG_P0, REG_R9, 63, + INS_OPTS_SCALABLE_B); // LD1RB {.D }, /Z, [{, #}] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 6028223760979..dea80c05e6b8b 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2355,6 +2355,89 @@ void emitter::emitInsSanityCheck(instrDesc* id) // iiiiii break; + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isValidUimm5(emitGetInsSC(id))); + break; + + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isValidUimm5_MultipleOf8(emitGetInsSC(id))); + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isScalableVectorSize(elemsize)); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -11498,101 +11581,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; - case INS_sve_cmpeq: - case INS_sve_cmpgt: - case INS_sve_cmpge: - case INS_sve_cmpne: - case INS_sve_cmple: - case INS_sve_cmplt: - assert(insOptsScalableStandard(opt)); - assert(isPredicateRegister(reg1)); // DDDD - assert(isLowPredicateRegister(reg2)); // ggg - assert(isVectorRegister(reg3)); // nnnnn - assert(isValidSimm5(imm)); // iiiii - fmt = IF_SVE_CY_3A; - break; - - case INS_sve_cmphi: - case INS_sve_cmphs: - case INS_sve_cmplo: - case INS_sve_cmpls: - assert(insOptsScalableStandard(opt)); - assert(isPredicateRegister(reg1)); // DDDD - assert(isLowPredicateRegister(reg2)); // ggg - assert(isVectorRegister(reg3)); // nnnnn - assert(isValidUimm7(imm)); // iiiii - fmt = IF_SVE_CY_3B; - break; - - case INS_sve_sdot: - case INS_sve_udot: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isLowVectorRegister(reg3)); // mmmm - - if (opt == INS_OPTS_SCALABLE_B) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_EY_3A; - } - else if (opt == INS_OPTS_SCALABLE_H) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_EG_3A; - } - else - { - assert(insOptsNone(opt)); - assert(isValidImm1(imm)); // i - opt = INS_OPTS_SCALABLE_H; - fmt = IF_SVE_EY_3B; - } - break; - - case INS_sve_usdot: - case INS_sve_sudot: - assert(opt == INS_OPTS_SCALABLE_B); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmm - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_EZ_3A; - break; - - case INS_sve_mul: - assert(insOptsScalableAtLeastHalf(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isLowVectorRegister(reg3)); // mmmm - - switch (opt) - { - case INS_OPTS_SCALABLE_H: - assert(isValidUimm3(imm)); // iii - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - fmt = IF_SVE_FD_3A; - break; - - case INS_OPTS_SCALABLE_S: - assert(isValidUimm2(imm)); // ii - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - fmt = IF_SVE_FD_3B; - break; - - case INS_OPTS_SCALABLE_D: - assert(isValidImm1(imm)); // i - fmt = IF_SVE_FD_3C; - break; - - default: - unreached(); - break; - } - break; - case INS_fmul: // by element, imm[0..3] selects the element of reg3 case INS_fmla: case INS_fmls: @@ -11832,109 +11820,499 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_DV_3AI; break; - case INS_sve_cdot: - assert(insOptsScalableWords(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmmmm - assert(isValidRot(imm)); // rr - assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx - - // Convert rot to bitwise representation - imm = emitEncodeRotationImm0_to_270(imm); - fmt = IF_SVE_EJ_3A; - break; + default: + // fallback to emit SVE instructions. + return emitInsSve_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt, attrReg2); - case INS_sve_cmla: - case INS_sve_sqrdcmlah: - assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmmmm - assert(isValidRot(imm)); // rr - assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + } // end switch (ins) - // Convert rot to bitwise representation - imm = emitEncodeRotationImm0_to_270(imm); - fmt = IF_SVE_EK_3A; - break; + if (isLdSt) + { + assert(!isAddSub); + assert(isGeneralRegisterOrSP(reg3)); + assert(insOptsNone(opt) || insOptsIndexed(opt)); - case INS_sve_ld1d: - assert(insOptsScalable(opt)); + if (isSIMD) + { + assert(isValidVectorLSPDatasize(size)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - if (opt == INS_OPTS_SCALABLE_Q) + assert(isVectorRegister(reg2)); + assert((scale >= 2) && (scale <= 4)); + } + else + { + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert((scale == 2) || (scale == 3)); + } + + // Load/Store Pair reserved encodings: + if (emitInsIsLoad(ins)) + { + assert(reg1 != reg2); + } + if (insOptsIndexed(opt)) + { + assert(reg1 != reg3); + assert(reg2 != reg3); + } + + reg3 = encodingSPtoZR(reg3); + + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) + { + assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero + + fmt = IF_LS_3B; + } + else + { + if ((imm & mask) == 0) { - fmt = IF_SVE_IH_3A_A; + imm >>= scale; // The immediate is scaled by the size of the ld/st + + if ((imm >= -64) && (imm <= 63)) + { + fmt = IF_LS_3C; + } } - else +#ifdef DEBUG + if (fmt != IF_LS_3C) { - assert(opt == INS_OPTS_SCALABLE_D); - fmt = IF_SVE_IH_3A; + assert(!"Instruction cannot be encoded: IF_LS_3C"); + } +#endif + } + } + else if (isAddSub) + { + bool reg2IsSP = (reg2 == REG_SP); + assert(!isLdSt); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg3)); + + if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option + { + assert(isGeneralRegisterOrZR(reg1)); + } + else + { + assert(isGeneralRegisterOrSP(reg1)); + reg1 = encodingSPtoZR(reg1); + } + + if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option + { + assert(isGeneralRegister(reg2)); + } + else + { + assert(isGeneralRegisterOrSP(reg2)); + reg2 = encodingSPtoZR(reg2); + } + + if (insOptsAnyExtend(opt)) + { + assert((imm >= 0) && (imm <= 4)); + + fmt = IF_DR_3C; + } + else if (insOptsAluShift(opt)) + { + // imm should be non-zero and in [1..63] + assert(isValidImmShift(imm, size) && (imm != 0)); + fmt = IF_DR_3B; + } + else if (imm == 0) + { + assert(insOptsNone(opt)); + + if (reg2IsSP) + { + // To encode the SP register as reg2 we must use the IF_DR_3C encoding + // and also specify a LSL of zero (imm == 0) + opt = INS_OPTS_LSL; + fmt = IF_DR_3C; + } + else + { + fmt = IF_DR_3A; + } + } + else + { + assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); + } + } + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + + // Record the attribute for the second register in the pair + id->idGCrefReg2(GCT_NONE); + if (attrReg2 != EA_UNKNOWN) + { + // Record the attribute for the second register in the pair + assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); + if (EA_IS_GCREF(attrReg2)) + { + id->idGCrefReg2(GCT_GCREF); + } + else if (EA_IS_BYREF(attrReg2)) + { + id->idGCrefReg2(GCT_BYREF); + } + } + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add a SVE instruction referencing three registers and a constant. + */ + +void emitter::emitInsSve_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */, + emitAttr attrReg2 /* = EA_UNKNOWN */) +{ + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_sve_cmpeq: + case INS_sve_cmpgt: + case INS_sve_cmpge: + case INS_sve_cmpne: + case INS_sve_cmple: + case INS_sve_cmplt: + assert(insOptsScalableStandard(opt)); + assert(isPredicateRegister(reg1)); // DDDD + assert(isLowPredicateRegister(reg2)); // ggg + assert(isVectorRegister(reg3)); // nnnnn + assert(isValidSimm5(imm)); // iiiii + fmt = IF_SVE_CY_3A; + break; + + case INS_sve_cmphi: + case INS_sve_cmphs: + case INS_sve_cmplo: + case INS_sve_cmpls: + assert(insOptsScalableStandard(opt)); + assert(isPredicateRegister(reg1)); // DDDD + assert(isLowPredicateRegister(reg2)); // ggg + assert(isVectorRegister(reg3)); // nnnnn + assert(isValidUimm7(imm)); // iiiii + fmt = IF_SVE_CY_3B; + break; + + case INS_sve_sdot: + case INS_sve_udot: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + + if (opt == INS_OPTS_SCALABLE_B) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_EY_3A; + } + else if (opt == INS_OPTS_SCALABLE_H) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_EG_3A; + } + else + { + assert(insOptsNone(opt)); + assert(isValidImm1(imm)); // i + opt = INS_OPTS_SCALABLE_H; + fmt = IF_SVE_EY_3B; + } + break; + + case INS_sve_usdot: + case INS_sve_sudot: + assert(opt == INS_OPTS_SCALABLE_B); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmm + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_EZ_3A; + break; + + case INS_sve_mul: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + + switch (opt) + { + case INS_OPTS_SCALABLE_H: + assert(isValidUimm3(imm)); // iii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FD_3A; + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm2(imm)); // ii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FD_3B; + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidImm1(imm)); // i + fmt = IF_SVE_FD_3C; + break; + + default: + unreached(); + break; + } + break; + + case INS_sve_cdot: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidRot(imm)); // rr + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + + // Convert rot to bitwise representation + imm = emitEncodeRotationImm0_to_270(imm); + fmt = IF_SVE_EJ_3A; + break; + + case INS_sve_cmla: + case INS_sve_sqrdcmlah: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidRot(imm)); // rr + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + + // Convert rot to bitwise representation + imm = emitEncodeRotationImm0_to_270(imm); + fmt = IF_SVE_EK_3A; + break; + + case INS_sve_ld1d: + assert(insOptsScalable(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + if (opt == INS_OPTS_SCALABLE_Q) + { + fmt = IF_SVE_IH_3A_A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_IH_3A; + } + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf8(imm)); + fmt = IF_SVE_IV_3A; + } + break; + + case INS_sve_ldff1d: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf8(imm)); + fmt = IF_SVE_IV_3A; + break; + + case INS_sve_ld1w: + assert(insOptsScalableWordsOrQuadwords(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IH_3A_F; + } + else + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_HX_3A_E; } break; - case INS_sve_ld1w: - assert(insOptsScalableWordsOrQuadwords(opt)); - assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IH_3A_F; - break; - case INS_sve_ld1sw: assert(opt == INS_OPTS_SCALABLE_D); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A; + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A; + } + else + { + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_IV_3A; + } + break; + + case INS_sve_ldff1sw: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_IV_3A; break; case INS_sve_ld1sb: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A_D; + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isGeneralRegister(reg3)); + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_D; + } + else + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5(imm)); + fmt = IF_SVE_HX_3A_B; + } break; case INS_sve_ld1b: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A_E; + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_E; + } + else + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5(imm)); + fmt = IF_SVE_HX_3A_B; + } + break; + + case INS_sve_ldff1b: + case INS_sve_ldff1sb: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5(imm)); + fmt = IF_SVE_HX_3A_B; break; case INS_sve_ld1sh: assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A_F; + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_F; + } + else + { + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf2(imm)); + fmt = IF_SVE_HX_3A_E; + } + break; + + case INS_sve_ld1h: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + fmt = IF_SVE_IJ_3A_G; + } + else + { + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf2(imm)); + fmt = IF_SVE_HX_3A_E; + } + break; + + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf2(imm)); + fmt = IF_SVE_HX_3A_E; break; - case INS_sve_ld1h: - assert(insOptsScalableAtLeastHalf(opt)); + case INS_sve_ldff1w: + assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - fmt = IF_SVE_IJ_3A_G; + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_HX_3A_E; break; case INS_sve_ldnf1sw: case INS_sve_ldnf1d: assert(opt == INS_OPTS_SCALABLE_D); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); fmt = IF_SVE_IL_3A; @@ -11944,7 +12322,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1w: assert(insOptsScalableWords(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); fmt = IF_SVE_IL_3A_A; @@ -11954,7 +12332,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnf1sb: assert(insOptsScalableAtLeastHalf(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); fmt = IF_SVE_IL_3A_B; @@ -11975,7 +12353,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ldnt1d: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); @@ -12017,7 +12395,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld1rod: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -12078,7 +12456,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld4q: assert(opt == INS_OPTS_SCALABLE_Q); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -12119,7 +12497,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_ld4d: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -12191,7 +12569,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st4q: assert(opt == INS_OPTS_SCALABLE_Q); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -12224,7 +12602,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_stnt1d: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isValidSimm4(imm)); @@ -12259,33 +12637,51 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st1w: case INS_sve_st1d: assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); + assert(isLowPredicateRegister(reg2)); - if (opt == INS_OPTS_SCALABLE_Q && (ins == INS_sve_st1d)) - { - fmt = IF_SVE_JN_3C_D; - } - else + if (isGeneralRegister(reg3)) { - if ((ins == INS_sve_st1w) && insOptsScalableWords(opt)) + assert(isValidSimm4(imm)); + + if (opt == INS_OPTS_SCALABLE_Q && (ins == INS_sve_st1d)) { - fmt = IF_SVE_JN_3B; + fmt = IF_SVE_JN_3C_D; } else { -#if DEBUG - if (ins == INS_sve_st1w) + if ((ins == INS_sve_st1w) && insOptsScalableWords(opt)) { - assert(opt == INS_OPTS_SCALABLE_Q); + fmt = IF_SVE_JN_3B; } else { - assert(opt == INS_OPTS_SCALABLE_D); - } +#if DEBUG + if (ins == INS_sve_st1w) + { + assert(opt == INS_OPTS_SCALABLE_Q); + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + } #endif // DEBUG - fmt = IF_SVE_JN_3C; + fmt = IF_SVE_JN_3C; + } + } + } + else + { + assert(isVectorRegister(reg3)); + if ((ins == INS_sve_st1w) && insOptsScalableWords(opt)) + { + assert(isValidUimm5_MultipleOf4(imm)); + fmt = IF_SVE_JI_3A_A; + } + else + { + assert(ins == INS_sve_st1d); + assert(isValidUimm5_MultipleOf8(imm)); + fmt = IF_SVE_JL_3A; } } break; @@ -12304,7 +12700,7 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st4d: assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); + assert(isLowPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); #ifdef DEBUG @@ -12374,12 +12770,39 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_sve_st1b: case INS_sve_st1h: assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidSimm4(imm)); - // st1h is reserved for scalable B - assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : insOptsScalableStandard(opt)); - fmt = IF_SVE_JN_3A; + assert(isLowPredicateRegister(reg2)); + + if (isGeneralRegister(reg3)) + { + assert(isValidSimm4(imm)); + // st1h is reserved for scalable B + assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : insOptsScalableStandard(opt)); + fmt = IF_SVE_JN_3A; + } + else + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + +#ifdef DEBUG + switch (ins) + { + case INS_sve_st1b: + assert(isValidUimm5(imm)); + break; + + case INS_sve_st1h: + assert(isValidUimm5_MultipleOf2(imm)); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + + fmt = IF_SVE_JI_3A_A; + } break; case INS_sve_fmla: @@ -12620,190 +13043,131 @@ void emitter::emitIns_R_R_R_I(instruction ins, { assert(opt == INS_OPTS_SCALABLE_S); assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_FJ_3B; - } - break; - - case INS_sve_sqrdmlah: - case INS_sve_sqrdmlsh: - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isLowVectorRegister(reg3)); // mmmm - - if (opt == INS_OPTS_SCALABLE_H) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm3(imm)); // i ii - fmt = IF_SVE_FK_3A; - } - else if (opt == INS_OPTS_SCALABLE_S) - { - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm - assert(isValidUimm2(imm)); // ii - fmt = IF_SVE_FK_3B; - } - else - { - assert(opt == INS_OPTS_SCALABLE_D); - assert(isValidImm1(imm)); // i - fmt = IF_SVE_FK_3C; - } - break; - - case INS_sve_fcadd: - assert(insOptsScalableAtLeastHalf(opt)); - assert(isVectorRegister(reg1)); - assert(isLowPredicateRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(isScalableVectorSize(size)); - imm = emitEncodeRotationImm90_or_270(imm); - fmt = IF_SVE_GP_3A; - break; - - case INS_sve_fmlalb: - case INS_sve_fmlalt: - case INS_sve_fmlslb: - case INS_sve_fmlslt: - case INS_sve_bfmlalb: - case INS_sve_bfmlalt: - case INS_sve_bfmlslb: - case INS_sve_bfmlslt: - assert(opt == INS_OPTS_SCALABLE_H); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmm - assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); - assert(isValidUimm3(imm)); // ii i - fmt = IF_SVE_GZ_3A; - break; - - default: - unreached(); - break; - - } // end switch (ins) - - if (isLdSt) - { - assert(!isAddSub); - assert(isGeneralRegisterOrSP(reg3)); - assert(insOptsNone(opt) || insOptsIndexed(opt)); - - if (isSIMD) - { - assert(isValidVectorLSPDatasize(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert((scale >= 2) && (scale <= 4)); - } - else - { - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert((scale == 2) || (scale == 3)); - } - - // Load/Store Pair reserved encodings: - if (emitInsIsLoad(ins)) - { - assert(reg1 != reg2); - } - if (insOptsIndexed(opt)) - { - assert(reg1 != reg3); - assert(reg2 != reg3); - } - - reg3 = encodingSPtoZR(reg3); - - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (imm == 0) - { - assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero - - fmt = IF_LS_3B; - } - else - { - if ((imm & mask) == 0) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - - if ((imm >= -64) && (imm <= 63)) - { - fmt = IF_LS_3C; - } - } -#ifdef DEBUG - if (fmt != IF_LS_3C) - { - assert(!"Instruction cannot be encoded: IF_LS_3C"); - } -#endif - } - } - else if (isAddSub) - { - bool reg2IsSP = (reg2 == REG_SP); - assert(!isLdSt); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg3)); - - if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option - { - assert(isGeneralRegisterOrZR(reg1)); - } - else - { - assert(isGeneralRegisterOrSP(reg1)); - reg1 = encodingSPtoZR(reg1); - } - - if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option - { - assert(isGeneralRegister(reg2)); - } - else - { - assert(isGeneralRegisterOrSP(reg2)); - reg2 = encodingSPtoZR(reg2); - } - - if (insOptsAnyExtend(opt)) - { - assert((imm >= 0) && (imm <= 4)); - - fmt = IF_DR_3C; - } - else if (insOptsAluShift(opt)) - { - // imm should be non-zero and in [1..63] - assert(isValidImmShift(imm, size) && (imm != 0)); - fmt = IF_DR_3B; - } - else if (imm == 0) - { - assert(insOptsNone(opt)); + fmt = IF_SVE_FJ_3B; + } + break; - if (reg2IsSP) + case INS_sve_sqrdmlah: + case INS_sve_sqrdmlsh: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isLowVectorRegister(reg3)); // mmmm + + if (opt == INS_OPTS_SCALABLE_H) { - // To encode the SP register as reg2 we must use the IF_DR_3C encoding - // and also specify a LSL of zero (imm == 0) - opt = INS_OPTS_LSL; - fmt = IF_DR_3C; + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm3(imm)); // i ii + fmt = IF_SVE_FK_3A; + } + else if (opt == INS_OPTS_SCALABLE_S) + { + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + assert(isValidUimm2(imm)); // ii + fmt = IF_SVE_FK_3B; } else { - fmt = IF_DR_3A; + assert(opt == INS_OPTS_SCALABLE_D); + assert(isValidImm1(imm)); // i + fmt = IF_SVE_FK_3C; } - } - else - { - assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); - } - } + break; + + case INS_sve_fcadd: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isScalableVectorSize(size)); + imm = emitEncodeRotationImm90_or_270(imm); + fmt = IF_SVE_GP_3A; + break; + + case INS_sve_ld1rd: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf8(imm)); + fmt = IF_SVE_IC_3A; + break; + + case INS_sve_ld1rsw: + assert(opt == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf4(imm)); + fmt = IF_SVE_IC_3A; + break; + + case INS_sve_ld1rsh: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf2(imm)); + fmt = IF_SVE_IC_3A_A; + break; + + case INS_sve_ld1rw: + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf4(imm)); + fmt = IF_SVE_IC_3A_A; + break; + + case INS_sve_ld1rh: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6_MultipleOf2(imm)); + fmt = IF_SVE_IC_3A_B; + break; + + case INS_sve_ld1rsb: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6(imm)); + fmt = IF_SVE_IC_3A_B; + break; + + case INS_sve_ld1rb: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidUimm6(imm)); + fmt = IF_SVE_IC_3A_C; + break; + + case INS_sve_fmlalb: + case INS_sve_fmlalt: + case INS_sve_fmlslb: + case INS_sve_fmlslt: + case INS_sve_bfmlalb: + case INS_sve_bfmlalt: + case INS_sve_bfmlslb: + case INS_sve_bfmlslt: + assert(opt == INS_OPTS_SCALABLE_H); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmm + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); + assert(isValidUimm3(imm)); // ii i + fmt = IF_SVE_GZ_3A; + break; + + default: + unreached(); + break; + } // end switch (ins) assert(fmt != IF_NONE); instrDesc* id = emitNewInstrCns(attr, imm); @@ -12816,22 +13180,6 @@ void emitter::emitIns_R_R_R_I(instruction ins, id->idReg2(reg2); id->idReg3(reg3); - // Record the attribute for the second register in the pair - id->idGCrefReg2(GCT_NONE); - if (attrReg2 != EA_UNKNOWN) - { - // Record the attribute for the second register in the pair - assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); - if (EA_IS_GCREF(attrReg2)) - { - id->idGCrefReg2(GCT_GCREF); - } - else if (EA_IS_BYREF(attrReg2)) - { - id->idGCrefReg2(GCT_BYREF); - } - } - dispIns(id); appendToCurIG(id); } @@ -17445,6 +17793,60 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30'. + * This only works on select formats. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_30_or_21(insFormat fmt, emitAttr size) +{ + switch (fmt) + { + case IF_SVE_HX_3A_B: + case IF_SVE_HX_3A_E: + switch (size) + { + case EA_4BYTE: + return 0; + + case EA_8BYTE: + return (1 << 30); + + default: + break; + } + + assert(!"Invalid size for vector register"); + return 0; + + case IF_SVE_IV_3A: + assert(size == EA_8BYTE); + return 0; + + case IF_SVE_JI_3A_A: + switch (size) + { + case EA_4BYTE: + return (1 << 21); + + case EA_8BYTE: + return 0; + + default: + break; + } + + assert(!"Invalid size for vector register"); + return 0; + + default: + break; + } + + assert(!"Unexpected instruction format"); + return 0; +} /***************************************************************************** * * Returns the encoding for the field 'i1:tszh:tszl' at bit locations '23-22:20-18'. @@ -17627,6 +18029,7 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_ld1q: case INS_sve_ldnt1sw: case INS_sve_st1q: + case INS_sve_ld1rb: return 1; case INS_sve_ld2b: @@ -19032,34 +19435,163 @@ void emitter::emitIns_Call(EmitCallType callType, } break; - case EA_8BYTE: - switch (fmt) + case EA_8BYTE: + switch (fmt) + { + case IF_SVE_IH_3A_F: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for D. + return ((code | (1 << 15)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '15' to 1. + + case IF_SVE_II_4A_H: + // Note: Bit '14' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for D. + return ((code | (1 << 14)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '14' to 1. + + default: + break; + } + break; + + case EA_16BYTE: + switch (fmt) + { + case IF_SVE_IH_3A_F: + return code | (1 << 20); // Set bit '20' to 1. + + case IF_SVE_II_4A_H: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for Q. + return code | (1 << 15); // Set bit '15' to 1. + + default: + break; + } + break; + + default: + assert(!"Invalid size for encoding dtype."); + break; + } + + assert(!"Invalid instruction format"); + return code; +} + +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * for the 'dtypeh' and 'dtypel' fields. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtypeh_dtypel(instruction ins, + insFormat fmt, + emitAttr size, + code_t code) +{ + switch (fmt) + { + case IF_SVE_IC_3A_A: + switch (size) + { + case EA_4BYTE: + switch (ins) + { + case INS_sve_ld1rsh: + return code | (1 << 13); // set bit '13' + + case INS_sve_ld1rw: + return code | (1 << 14); // set bit '14' + + default: + break; + } + break; + + case EA_8BYTE: + switch (ins) + { + case INS_sve_ld1rsh: + return code; + + case INS_sve_ld1rw: + return code | (1 << 14) | (1 << 13); // set bits '14' and '13' + + default: + break; + } + break; + + default: + break; + } + break; + + case IF_SVE_IC_3A_B: + switch (size) { - case IF_SVE_IH_3A_F: - // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the - // proper encoding for D. - return ((code | (1 << 15)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '15' to 1. + case EA_2BYTE: + switch (ins) + { + case INS_sve_ld1rh: + return code | (1 << 13); // set bit '13' - case IF_SVE_II_4A_H: - // Note: Bit '14' is not actually part of 'dtype', but it is necessary to set to '1' to get the - // proper encoding for D. - return ((code | (1 << 14)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '14' to 1. + case INS_sve_ld1rsb: + return code | (1 << 24) | (1 << 14); // set bit '24' and '14' + + default: + break; + } + break; + + case EA_4BYTE: + switch (ins) + { + case INS_sve_ld1rh: + return code | (1 << 14); // set bit '14' + + case INS_sve_ld1rsb: + return code | (1 << 24) | (1 << 13); // set bit '24' and '13' + + default: + break; + } + break; + + case EA_8BYTE: + switch (ins) + { + case INS_sve_ld1rh: + return code | (1 << 14) | (1 << 13); // set bits '14' and '13' + + case INS_sve_ld1rsb: + return code | (1 << 24); // set bit '24' + + default: + break; + } + break; default: break; } break; - case EA_16BYTE: - switch (fmt) + case IF_SVE_IC_3A_C: + assert(ins == INS_sve_ld1rb); + switch (size) { - case IF_SVE_IH_3A_F: - return code | (1 << 20); // Set bit '20' to 1. + case EA_1BYTE: + return code; - case IF_SVE_II_4A_H: - // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the - // proper encoding for Q. - return code | (1 << 15); // Set bit '15' to 1. + case EA_2BYTE: + return code | (1 << 13); // set bit '13' + + case EA_4BYTE: + return code | (1 << 14); // set bit '14' + + case EA_8BYTE: + return code | (1 << 14) | (1 << 13); // set bits '14' and '13' default: break; @@ -19067,11 +19599,10 @@ void emitter::emitIns_Call(EmitCallType callType, break; default: - assert(!"Invalid size for encoding dtype."); break; } - assert(!"Invalid instruction format"); + assert(!"Unexpected instruction format"); return code; } @@ -19167,27 +19698,82 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. + * // Returns the encoding for the immediate value that is a multiple of 2 as 5-bits at bit locations '20-16'. */ -/*static*/ emitter::code_t emitter::insEncodeSimm5_20_to_16(ssize_t imm) +/*static*/ emitter::code_t emitter::insEncodeUimm5_MultipleOf2_20_to_16(ssize_t imm) { - assert(isValidSimm5(imm)); - if (imm < 0) - { - imm = (imm & 0x1F); - } - return (code_t)imm << 16; + assert(isValidUimm5_MultipleOf2(imm)); + return insEncodeUimm5_20_to_16(imm / 2); } /***************************************************************************** * - * Returns the encoding for the unsigned immediate value as 5-bits at bit locations '20-16'. + * // Returns the encoding for the immediate value that is a multiple of 4 as 5-bits at bit locations '20-16'. */ -/*static*/ emitter::code_t emitter::insEncodeUimm5_20_to_16(ssize_t imm) +/*static*/ emitter::code_t emitter::insEncodeUimm5_MultipleOf4_20_to_16(ssize_t imm) { - assert(isValidUimm5(imm)); + assert(isValidUimm5_MultipleOf4(imm)); + return insEncodeUimm5_20_to_16(imm / 4); +} + +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 8 as 5-bits at bit locations '20-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm5_MultipleOf8_20_to_16(ssize_t imm) +{ + assert(isValidUimm5_MultipleOf8(imm)); + return insEncodeUimm5_20_to_16(imm / 8); +} + +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 2 as 6-bits at bit locations '21-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm6_MultipleOf2_21_to_16(ssize_t imm) +{ + assert(isValidUimm6_MultipleOf2(imm)); + return insEncodeUimm6_21_to_16(imm / 2); +} + +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 4 as 6-bits at bit locations '21-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm6_MultipleOf4_21_to_16(ssize_t imm) +{ + assert(isValidUimm6_MultipleOf4(imm)); + return insEncodeUimm6_21_to_16(imm / 4); +} + +/***************************************************************************** + * + * // Returns the encoding for the immediate value that is a multiple of 8 as 6-bits at bit locations '21-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm6_MultipleOf8_21_to_16(ssize_t imm) +{ + assert(isValidUimm6_MultipleOf8(imm)); + return insEncodeUimm6_21_to_16(imm / 8); +} + +/***************************************************************************** + * + * Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm5_20_to_16(ssize_t imm) +{ + assert(isValidSimm5(imm)); + if (imm < 0) + { + imm = (imm & 0x1F); + } return (code_t)imm << 16; } @@ -19279,6 +19865,28 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)(imm - 1) << 16; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm5_20_to_16(ssize_t imm) +{ + assert(isValidUimm5(imm)); + return (code_t)imm << 16; +} + +/***************************************************************************** + * + * Returns the encoding for the immediate value as 6-bits at bit locations '21-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm6_21_to_16(ssize_t imm) +{ + assert(isValidUimm6(imm)); + return (code_t)imm << 16; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. @@ -22708,6 +23316,136 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeUimm5_20_to_16(imm); // iiiii + code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt())); + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt())); + + switch (ins) + { + case INS_sve_ld1d: + case INS_sve_ldff1d: + code |= insEncodeUimm5_MultipleOf8_20_to_16(imm); // iiiii + break; + + case INS_sve_ld1w: + case INS_sve_ld1sw: + case INS_sve_ldff1w: + case INS_sve_ldff1sw: + code |= insEncodeUimm5_MultipleOf4_20_to_16(imm); // iiiii + break; + + default: + code |= insEncodeUimm5_MultipleOf2_20_to_16(imm); // iiiii + break; + } + + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeUimm5_MultipleOf8_20_to_16(imm); // iiiii + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSveElemsize_30_or_21(fmt, optGetSveElemsize(id->idInsOpt())); + + switch (ins) + { + case INS_sve_st1h: + code |= insEncodeUimm5_MultipleOf2_20_to_16(imm); // iiiii + break; + + case INS_sve_st1w: + code |= insEncodeUimm5_MultipleOf4_20_to_16(imm); // iiiii + break; + + default: + assert(ins == INS_sve_st1b); + code |= insEncodeUimm5_20_to_16(imm); // iiiii + break; + } + + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + + switch (ins) + { + case INS_sve_ld1rd: + code |= insEncodeUimm6_MultipleOf8_21_to_16(imm); // iiiiii + break; + + default: + assert(ins == INS_sve_ld1rsw); + code |= insEncodeUimm6_MultipleOf4_21_to_16(imm); // iiiiii + break; + } + + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code = insEncodeSveElemsize_dtypeh_dtypel(ins, fmt, optGetSveElemsize(id->idInsOpt()), code); + + switch (ins) + { + case INS_sve_ld1rw: + code |= insEncodeUimm6_MultipleOf4_21_to_16(imm); // iiiiii + break; + + case INS_sve_ld1rh: + case INS_sve_ld1rsh: + code |= insEncodeUimm6_MultipleOf2_21_to_16(imm); // iiiiii + break; + + default: + code |= insEncodeUimm6_21_to_16(imm); // iiiiii + break; + } + + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -23122,6 +23860,31 @@ void emitter::emitDispSveImmMulVl(regNumber reg1, ssize_t imm) printf("]"); } +/***************************************************************************** + * + * Prints the encoding for format [.D{, #}] + */ +void emitter::emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm) +{ + printf("["); + if (isVectorRegister(reg1)) + { + emitDispSveReg(reg1, opt, imm != 0); + } + else + { + emitDispReg(reg1, EA_8BYTE, imm != 0); + } + if (imm != 0) + { + // This does not have to be printed as hex. + // We only do it because the capstone disassembly displays this immediate as hex. + // We could not modify capstone without affecting other cases. + emitDispImm(imm, false, /* alwaysHex */ (imm > 31)); + } + printf("]"); +} + /***************************************************************************** * * Prints the encoding for the Extend Type encoding in loads/stores @@ -26218,6 +26981,33 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg4(), id->idInsOpt(), false); break; + // {.S }, /Z, [.S{, #}] + // {.D }, /Z, [.D{, #}] + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + // {.S }, /Z, [.S{, #}] + // {.D }, /Z, [.D{, #}] + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + // {.D }, /Z, [.D{, #}] + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + // {.S }, , [.S{, #}] + // {.D }, , [.D{, #}] + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + // {.D }, , [.D{, #}] + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(id->idIns()), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveImmIndex(id->idReg3(), id->idInsOpt(), imm); + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -29900,6 +30690,27 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 99b36bd4ec534..cd4b3bc973819 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -54,6 +54,7 @@ void emitDispSveExtendOpts(insOpts opt); void emitDispSveExtendOptsModN(insOpts opt, int n); void emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2, insOpts opt, insFormat fmt); void emitDispSveImmMulVl(regNumber reg1, ssize_t imm); +void emitDispSveImmIndex(regNumber reg1, insOpts opt, ssize_t imm); void emitDispLSExtendOpts(insOpts opt); void emitDispReg(regNumber reg, emitAttr attr, bool addComma); void emitDispSveReg(regNumber reg, insOpts opt, bool addComma); @@ -533,6 +534,11 @@ static code_t insEncodeSveElemsize_sz_21(emitAttr size); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +// Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30' or +// '21'. +// This only works on select formats. +static code_t insEncodeSveElemsize_30_or_21(insFormat fmt, emitAttr size); + // Returns the encoding for the field 'i1:tszh:tszl' at bit locations '23-22:20-18'. static code_t insEncodeSveElemsize_tszh_tszl_and_imm(const insOpts opt, const ssize_t imm); @@ -579,6 +585,10 @@ static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t // for the 'dtype' field. static code_t insEncodeSveElemsize_dtype_ld1w(instruction ins, insFormat fmt, emitAttr size, code_t code); +// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction +// for the 'dtypeh' and 'dtypel' fields. +static code_t insEncodeSveElemsize_dtypeh_dtypel(instruction ins, insFormat fmt, emitAttr size, code_t code); + // Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_19_to_16(ssize_t imm); @@ -600,12 +610,27 @@ static code_t insEncodeSimm4_MultipleOf16_19_to_16(ssize_t imm); // Returns the encoding for the immediate value that is a multiple of 32 as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_MultipleOf32_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value that is a multiple of 2 as 5-bits at bit locations '20-16'. +static code_t insEncodeUimm5_MultipleOf2_20_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 4 as 5-bits at bit locations '20-16'. +static code_t insEncodeUimm5_MultipleOf4_20_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 8 as 5-bits at bit locations '20-16'. +static code_t insEncodeUimm5_MultipleOf8_20_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 2 as 6-bits at bit locations '21-16'. +static code_t insEncodeUimm6_MultipleOf2_21_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 4 as 6-bits at bit locations '21-16'. +static code_t insEncodeUimm6_MultipleOf4_21_to_16(ssize_t imm); + +// Returns the encoding for the immediate value that is a multiple of 8 as 6-bits at bit locations '21-16'. +static code_t insEncodeUimm6_MultipleOf8_21_to_16(ssize_t imm); + // Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. static code_t insEncodeSimm5_20_to_16(ssize_t imm); -// Returns the encoding for the unsigned immediate value as 5-bits at bit locations '20-16'. -static code_t insEncodeUimm5_20_to_16(ssize_t imm); - // Returns the encoding for the immediate value as 2-bits at bit locations '9-8'. static code_t insEncodeUimm2_9_to_8(ssize_t imm); @@ -630,6 +655,12 @@ static code_t insEncodeUimm7_20_to_14(ssize_t imm); // Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'. static code_t insEncodeUimm4From1_19_to_16(ssize_t imm); +// Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. +static code_t insEncodeUimm5_20_to_16(ssize_t imm); + +// Returns the encoding for the immediate value as 6-bits at bit locations '21-16'. +static code_t insEncodeUimm6_21_to_16(ssize_t imm); + // Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. static code_t insEncodeImm8_12_to_5(ssize_t imm); @@ -704,6 +735,42 @@ static bool isValidSimm4_MultipleOf32(ssize_t value) return (-256 <= value) && (value <= 224) && (value % 32 == 0); }; +// Returns true if 'value' is a legal signed multiple of 2 immediate 5 bit encoding (such as for LD1H). +static bool isValidUimm5_MultipleOf2(ssize_t value) +{ + return (0 <= value) && (value <= 62) && (value % 2 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 4 immediate 5 bit encoding (such as for LD1W). +static bool isValidUimm5_MultipleOf4(ssize_t value) +{ + return (0 <= value) && (value <= 124) && (value % 4 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 8 immediate 5 bit encoding (such as for LD1D). +static bool isValidUimm5_MultipleOf8(ssize_t value) +{ + return (0 <= value) && (value <= 248) && (value % 8 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 2 immediate 6 bit encoding (such as for LD1RH). +static bool isValidUimm6_MultipleOf2(ssize_t value) +{ + return (0 <= value) && (value <= 126) && (value % 2 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 4 immediate 6 bit encoding (such as for LD1RSW). +static bool isValidUimm6_MultipleOf4(ssize_t value) +{ + return (0 <= value) && (value <= 252) && (value % 4 == 0); +}; + +// Returns true if 'value' is a legal signed multiple of 8 immediate 6 bit encoding (such as for LD1RD). +static bool isValidUimm6_MultipleOf8(ssize_t value) +{ + return (0 <= value) && (value <= 504) && (value % 8 == 0); +}; + // Returns true if 'value' is a legal immediate 1 bit encoding (such as for PEXT). static bool isValidImm1(ssize_t value) { @@ -746,6 +813,12 @@ static bool isValidUimm5(ssize_t value) return (0 <= value) && (value <= 0x1FLL); }; +// Returns true if 'value' is a legal unsigned immediate 6 bit encoding (such as for LD1RD). +static bool isValidUimm6(ssize_t value) +{ + return (0 <= value) && (value <= 63); +} + // Returns true if 'value' is a legal unsigned immediate 5 bit encoding, starting from 1 (such as for SHRNB). static bool isValidUimm5From1(ssize_t value) { @@ -1363,6 +1436,15 @@ void emitIns_R_R_R_I(instruction ins, insOpts opt = INS_OPTS_NONE, emitAttr attrReg2 = EA_UNKNOWN); +void emitInsSve_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt = INS_OPTS_NONE, + emitAttr attrReg2 = EA_UNKNOWN); + void emitIns_R_R_R_I_I(instruction ins, emitAttr attr, regNumber reg1, diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index 7350a288c92f5..0c0a401923714 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -265,7 +265,7 @@ INST6(ld1sb, "ld1sb", 0, IF_SV // LD1SB {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 000gggnnnnnttttt C400 0000 // LD1SB {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 000gggnnnnnttttt 8400 0000 // LD1SB {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 100gggnnnnnttttt C440 8000 - // LD1SB {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 100gggnnnnnttttt 8420 8000 + // LD1SB {.S }, /Z, [.S{, #}] SVE_HX_3A_B 10000100001iiiii 100gggnnnnnttttt 8420 8000 // LD1SB {.D }, /Z, [{, #, MUL VL}] SVE_IJ_3A_D 101001011000iiii 101gggnnnnnttttt A580 A000 // LD1SB {.D }, /Z, [, ] SVE_IK_4A_F 10100101100mmmmm 010gggnnnnnttttt A580 4000 @@ -275,7 +275,7 @@ INST6(ld1b, "ld1b", 0, IF_SV // LD1B {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 010gggnnnnnttttt C400 4000 // LD1B {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 010gggnnnnnttttt 8400 4000 // LD1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 110gggnnnnnttttt C440 C000 - // LD1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 110gggnnnnnttttt 8420 C000 + // LD1B {.S }, /Z, [.S{, #}] SVE_HX_3A_B 10000100001iiiii 110gggnnnnnttttt 8420 C000 // LD1B {.B }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000000iiii 101gggnnnnnttttt A400 A000 // LD1B {.B }, /Z, [, ] SVE_IK_4A_H 10100100000mmmmm 010gggnnnnnttttt A400 4000 @@ -395,7 +395,7 @@ INST5(ldff1sb, "ldff1sb", 0, IF_SV // LDFF1SB {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 001gggnnnnnttttt C400 2000 // LDFF1SB {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 001gggnnnnnttttt 8400 2000 // LDFF1SB {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 101gggnnnnnttttt C440 A000 - // LDFF1SB {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 101gggnnnnnttttt 8420 A000 + // LDFF1SB {.S }, /Z, [.S{, #}] SVE_HX_3A_B 10000100001iiiii 101gggnnnnnttttt 8420 A000 // LDFF1SB {.D }, /Z, [{, }] SVE_IG_4A_D 10100101100mmmmm 011gggnnnnnttttt A580 6000 @@ -404,7 +404,7 @@ INST5(ldff1b, "ldff1b", 0, IF_SV // LDFF1B {.D }, /Z, [, .D, ] SVE_HW_4A 110001000h0mmmmm 011gggnnnnnttttt C400 6000 // LDFF1B {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 011gggnnnnnttttt 8400 6000 // LDFF1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 111gggnnnnnttttt C440 E000 - // LDFF1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 111gggnnnnnttttt 8420 E000 + // LDFF1B {.S }, /Z, [.S{, #}] SVE_HX_3A_B 10000100001iiiii 111gggnnnnnttttt 8420 E000 // LDFF1B {.B }, /Z, [{, }] SVE_IG_4A_E 10100100000mmmmm 011gggnnnnnttttt A400 6000