Skip to content

Commit

Permalink
[RyuJIT] Add "rorx" instruction (BMI2) and emit it instead of "rol" w…
Browse files Browse the repository at this point in the history
…hen possible (#41772)

* Use rorx instead of rol when possible
  • Loading branch information
EgorBo authored Sep 29, 2020
1 parent f4094bf commit 1d9e50c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 4 deletions.
22 changes: 18 additions & 4 deletions src/coreclr/src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4080,10 +4080,11 @@ void CodeGen::genCodeForShift(GenTree* tree)

if (shiftBy->isContainedIntOrIImmed())
{
emitAttr size = emitTypeSize(tree);

// Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg"
if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && shiftBy->IsIntegralConst(1))
{
emitAttr size = emitTypeSize(tree);
if (tree->GetRegNum() == operandReg)
{
GetEmitter()->emitIns_R_R(INS_add, size, tree->GetRegNum(), operandReg);
Expand All @@ -4095,16 +4096,29 @@ void CodeGen::genCodeForShift(GenTree* tree)
}
else
{
int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();

#if defined(TARGET_64BIT)
// Try to emit rorx if BMI2 is available instead of mov+rol
// it makes sense only for 64bit integers
if ((genActualType(targetType) == TYP_LONG) && (tree->GetRegNum() != operandReg) &&
compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) &&
(shiftByValue > 0) && (shiftByValue < 64))
{
const int value = tree->OperIs(GT_ROL) ? (64 - shiftByValue) : shiftByValue;
GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value);
genProduceReg(tree);
return;
}
#endif
// First, move the operand to the destination register and
// later on perform the shift in-place.
// (LSRA will try to avoid this situation through preferencing.)
if (tree->GetRegNum() != operandReg)
{
inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType);
}

int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
inst_RV_SH(ins, emitTypeSize(tree), tree->GetRegNum(), shiftByValue);
inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue);
}
}
else
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
case INS_mulx:
case INS_pdep:
case INS_pext:
case INS_rorx:
return true;
default:
return false;
Expand Down Expand Up @@ -758,6 +759,7 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c
{
switch (ins)
{
case INS_rorx:
case INS_pdep:
case INS_mulx:
{
Expand Down Expand Up @@ -1242,6 +1244,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
case INS_pextrq:
case INS_pextrw:
case INS_pextrw_sse41:
case INS_rorx:
{
// These SSE instructions write to a general purpose integer register.
return false;
Expand Down Expand Up @@ -14944,6 +14947,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_tzcnt:
case INS_popcnt:
case INS_crc32:
case INS_rorx:
case INS_pdep:
case INS_pext:
case INS_addsubps:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,7 @@ INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE,
INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract

// BMI2
INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_FLAGS_None)
INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit
INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract
INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position
Expand Down

0 comments on commit 1d9e50c

Please sign in to comment.