Skip to content

Commit

Permalink
Cleanup some xarch emit logic (#85536)
Browse files Browse the repository at this point in the history
* Ensure floating-point codegen uses the VEX aware path

* Fix `IF_RRW_RRW_CNS` to be `IF_RWR_RRD_CNS`

* Fixup emitfmtsxarch.h to have a more consistent layout

* Allow querying the scheduling info for an insFormat

* Ensure the new insFormats are handled

* Ensure we consistently use `emitInsModeFormat`

* Ensure instructions which write to a mask register are EVEX only

* Improve REX.W handling for EVEX only instructions

* Ensure that instructions use the right update mode and tuple type

* Apply formatting patch

* Ensure DstSrcSrc is still handled correctly

* Ensure BLSI/BLSR are still handled in emitOutputAM

* Use static_assert_no_msg

* Fixing the disassembly for IF_RRW_SHF

* Fixing the IF check for shld/shrd on x86

* Use the correct name: inst_RV_TT_IV

* Ensure the 4 operand insFormats include the necessary constant

* Resolve an insFormat check on x86

* Ensure other SIMD code paths are VEX aware

* Improve throughput by using a less expensive emitSizeOfInsDsc

* Apply formatting patch

* Ensure emitSizeOfInsDsc_CNS is used for RWR_RRD_*RD_CNS

* Ensure genSimd12UpperClear uses `andps` for the pre-SSE4.1 path
  • Loading branch information
tannergooding authored May 2, 2023
1 parent b02d7a1 commit da0aa0c
Show file tree
Hide file tree
Showing 17 changed files with 1,644 additions and 1,311 deletions.
7 changes: 7 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ class CodeGen final : public CodeGenInterface
CORINFO_FIELD_HANDLE absBitmaskFlt;
CORINFO_FIELD_HANDLE absBitmaskDbl;

// Bit mask used in zeroing the 3rd element of a SIMD12
CORINFO_FIELD_HANDLE zroSimd12Elm3;

// Bit mask used in U8 -> double conversion to adjust the result.
CORINFO_FIELD_HANDLE u8ToDblBitmask;

Expand Down Expand Up @@ -925,6 +928,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genSimdUpperSave(GenTreeIntrinsic* node);
void genSimdUpperRestore(GenTreeIntrinsic* node);

void genSimd12UpperClear(regNumber tgtReg);

// TYP_SIMD12 (i.e Vector3 of size 12 bytes) is not a hardware supported size and requires
// two reads/writes on 64-bit targets. These routines abstract reading/writing of Vector3
// values through an indirection. Note that Vector3 locals allocated on stack would have
Expand Down Expand Up @@ -1532,6 +1537,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival);
void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival);
void inst_RV_RV_TT(instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, bool isRMW);
void inst_RV_RV_TT_IV(
instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, int8_t ival, bool isRMW);
#endif

void inst_set_SV_var(GenTree* tree);
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4474,7 +4474,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
}
#elif defined(TARGET_XARCH)
// XORPS is the fastest and smallest way to initialize a XMM register to zero.
inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
dblInitReg = reg;
#elif defined(TARGET_ARM64)
// We will just zero out the entire vector register. This sets it to a double/float zero value
Expand Down Expand Up @@ -4514,7 +4514,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
}
#elif defined(TARGET_XARCH)
// XORPS is the fastest and smallest way to initialize a XMM register to zero.
inst_RV_RV(INS_xorps, reg, reg, TYP_DOUBLE);
GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg);
fltInitReg = reg;
#elif defined(TARGET_ARM64)
// We will just zero out the entire vector register. This sets it to a double/float zero value
Expand Down
Loading

0 comments on commit da0aa0c

Please sign in to comment.