-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RyuJIT] Emit shlx, sarx, shrx on x64 #67182
Changes from 12 commits
3e6017a
4a7b404
23395b3
9c07d03
adbfc48
53181e4
28438a0
60742ee
ab2624f
2198328
f629bfd
2292e05
f85d34a
36b26a2
5b454bb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4403,6 +4403,34 @@ void CodeGen::genCodeForShift(GenTree* tree) | |
inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue); | ||
} | ||
} | ||
#if defined(TARGET_64BIT) | ||
else if (tree->OperIsShift() && compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2)) | ||
{ | ||
// Try to emit shlx, sarx, shrx if BMI2 is available instead of mov+shl, mov+sar, mov+shr. | ||
switch (tree->OperGet()) | ||
{ | ||
case GT_LSH: | ||
ins = INS_shlx; | ||
break; | ||
|
||
case GT_RSH: | ||
ins = INS_sarx; | ||
break; | ||
|
||
case GT_RSZ: | ||
ins = INS_shrx; | ||
break; | ||
|
||
default: | ||
unreached(); | ||
} | ||
|
||
regNumber shiftByReg = shiftBy->GetRegNum(); | ||
emitAttr size = emitTypeSize(tree); | ||
// The order of operandReg and shiftByReg are swapped to follow shlx, sarx and shrx encoding spec. | ||
GetEmitter()->emitIns_R_R_R(ins, size, tree->GetRegNum(), shiftByReg, operandReg); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might be worth a note that we don't currently support the contained form due to more complex changes being needed in the emitter. Ideally we'd fix up the emitter and use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opened #67314. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As @tannergooding , lets have a comment about containment and also specify that here, the operands are swapped because the way operands are encoded in these 3 instructions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added commnets
|
||
} | ||
#endif | ||
else | ||
{ | ||
// We must have the number of bits to shift stored in ECX, since we constrained this node to | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -749,6 +749,9 @@ bool emitter::TakesRexWPrefix(instruction ins, emitAttr attr) | |||||
case INS_pdep: | ||||||
case INS_pext: | ||||||
case INS_rorx: | ||||||
case INS_shlx: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As per the changes at other places, we have the code to have it supported only for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is under #ifdef TARGET_AMD64, not ARM64. So, I guess it is correct. |
||||||
case INS_sarx: | ||||||
case INS_shrx: | ||||||
return true; | ||||||
default: | ||||||
return false; | ||||||
|
@@ -987,17 +990,32 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c | |||||
case INS_rorx: | ||||||
case INS_pdep: | ||||||
case INS_mulx: | ||||||
// TODO: Unblock when enabled for x86 | ||||||
#ifdef TARGET_AMD64 | ||||||
case INS_shrx: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this code path is also for x86? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added #ifdef TARGET_64BIT. |
||||||
#endif | ||||||
{ | ||||||
vexPrefix |= 0x03; | ||||||
break; | ||||||
} | ||||||
|
||||||
case INS_pext: | ||||||
// TODO: Unblock when enabled for x86 | ||||||
#ifdef TARGET_AMD64 | ||||||
case INS_sarx: | ||||||
#endif | ||||||
{ | ||||||
vexPrefix |= 0x02; | ||||||
break; | ||||||
} | ||||||
|
||||||
// TODO: Unblock when enabled for x86 | ||||||
#ifdef TARGET_AMD64 | ||||||
case INS_shlx: | ||||||
{ | ||||||
vexPrefix |= 0x01; | ||||||
break; | ||||||
} | ||||||
#endif | ||||||
default: | ||||||
{ | ||||||
vexPrefix |= 0x00; | ||||||
|
@@ -1484,6 +1502,11 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) | |||||
case INS_pextrw: | ||||||
case INS_pextrw_sse41: | ||||||
case INS_rorx: | ||||||
#ifdef TARGET_AMD64 | ||||||
case INS_shlx: | ||||||
case INS_sarx: | ||||||
case INS_shrx: | ||||||
#endif | ||||||
{ | ||||||
// These SSE instructions write to a general purpose integer register. | ||||||
return false; | ||||||
|
@@ -9519,9 +9542,13 @@ void emitter::emitDispIns( | |||||
assert(IsThreeOperandAVXInstruction(ins)); | ||||||
regNumber reg2 = id->idReg2(); | ||||||
regNumber reg3 = id->idReg3(); | ||||||
if (ins == INS_bextr || ins == INS_bzhi) | ||||||
if (ins == INS_bextr || ins == INS_bzhi | ||||||
#ifdef TARGET_AMD64 | ||||||
|| ins == INS_shrx || ins == INS_shlx || ins == INS_sarx | ||||||
#endif | ||||||
) | ||||||
{ | ||||||
// BMI bextr and bzhi encodes the reg2 in VEX.vvvv and reg3 in modRM, | ||||||
// BMI bextr,bzhi, shrx, shlx and sarx encode the reg2 in VEX.vvvv and reg3 in modRM, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
We need to have similar comment where we swap the operands I pointed out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: You have already added a comment in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed. |
||||||
// which is different from most of other instructions | ||||||
regNumber tmp = reg2; | ||||||
reg2 = reg3; | ||||||
|
@@ -16323,6 +16350,16 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins | |||||
break; | ||||||
} | ||||||
|
||||||
#ifdef TARGET_AMD64 | ||||||
case INS_shlx: | ||||||
case INS_sarx: | ||||||
case INS_shrx: | ||||||
{ | ||||||
result.insLatency += PERFSCORE_LATENCY_1C; | ||||||
result.insThroughput = PERFSCORE_THROUGHPUT_2X; | ||||||
break; | ||||||
} | ||||||
#endif | ||||||
default: | ||||||
// unhandled instruction insFmt combination | ||||||
perfScoreUnhandledInstruction(id, &result); | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4850,7 +4850,7 @@ void Lowering::ContainCheckShiftRotate(GenTreeOp* node) | |
assert(source->OperGet() == GT_LONG); | ||
MakeSrcContained(node, source); | ||
} | ||
#endif // !TARGET_X86 | ||
#endif | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert this change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This has been alrady removed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean put it back (revert the deletion of the comment |
||
|
||
GenTree* shiftBy = node->gtOp2; | ||
if (IsContainableImmed(node, shiftBy) && (shiftBy->AsIntConCommon()->IconValue() <= 255) && | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -925,6 +925,17 @@ int LinearScan::BuildShiftRotate(GenTree* tree) | |
{ | ||
assert(shiftBy->OperIsConst()); | ||
} | ||
#if defined(TARGET_64BIT) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this instruction only applicable for x64? I thought it is also valid for x86? @tannergooding ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder why it worked for x86 without changing this part of the code?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Double checked that my change handles only x64 case. Will open an issue to handle x86. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Double checked that my change handles only x64 case. Will open an issue to handle x86. |
||
else if (tree->OperIsShift() && !tree->isContained() && | ||
compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2)) | ||
{ | ||
// It handles all register forms, but it does not handle contained form for memory operand. | ||
JulieLeeMSFT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
srcCount += BuildOperandUses(source, srcCandidates); | ||
srcCount += BuildOperandUses(shiftBy, srcCandidates); | ||
BuildDef(tree, dstCandidates); | ||
return srcCount; | ||
} | ||
#endif | ||
else | ||
{ | ||
srcCandidates = allRegs(TYP_INT) & ~RBM_RCX; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same here...why is it only for x64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handling for x64 only for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handling for x64 only for now.