Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Support more scaled addressing modes on arm64 #97921

Merged
merged 5 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,14 @@ class CodeGen final : public CodeGenInterface

// TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and
// move it to Lower
virtual bool genCreateAddrMode(
GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr);
virtual bool genCreateAddrMode(GenTree* addr,
bool fold,
unsigned naturalMul,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
unsigned* mulPtr,
ssize_t* cnsPtr);

#ifdef LATE_DISASM
virtual const char* siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs);
Expand Down
100 changes: 51 additions & 49 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ void CodeGen::genDefineInlineTempLabel(BasicBlock* label)
// Notes:
// This only makes an adjustment if !FEATURE_FIXED_OUT_ARGS, if there is no frame pointer,
// and if 'block' is a throw helper block with a non-zero stack level.

//
void CodeGen::genAdjustStackLevel(BasicBlock* block)
{
#if !FEATURE_FIXED_OUT_ARGS
Expand Down Expand Up @@ -1003,35 +1003,33 @@ void CodeGen::genAdjustStackLevel(BasicBlock* block)
#endif // !FEATURE_FIXED_OUT_ARGS
}

/*****************************************************************************
*
* Take an address expression and try to find the best set of components to
* form an address mode; returns non-zero if this is successful.
*
* TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
* Refactor this code so that the underlying analysis can be used in
* the RyuJIT Backend to do lowering, instead of having to call this method with the
* option to not generate the code.
*
* 'fold' specifies if it is OK to fold the array index which hangs off
* a GT_NOP node.
*
* If successful, the parameters will be set to the following values:
*
* *rv1Ptr ... base operand
* *rv2Ptr ... optional operand
* *revPtr ... true if rv2 is before rv1 in the evaluation order
* *mulPtr ... optional multiplier (2/4/8) for rv2
* Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
* *cnsPtr ... integer constant [optional]
*
* IMPORTANT NOTE: This routine doesn't generate any code, it merely
* identifies the components that might be used to
* form an address mode later on.
*/

bool CodeGen::genCreateAddrMode(
GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr)
//------------------------------------------------------------------------
// genCreateAddrMode:
// Take an address expression and try to find the best set of components to
// form an address mode; returns true if this is successful.
//
// Parameters:
// addr - Tree that potentially computes an address
// fold - Secifies if it is OK to fold the array index which hangs off a GT_NOP node.
// naturalMul - For arm64 specifies the natural multiplier for the address mode (i.e. the size of the parent
// indirection).
// revPtr - [out] True if rv2 is before rv1 in the evaluation order
// rv1Ptr - [out] Base operand
// rv2Ptr - [out] Optional operand
// mulPtr - [out] Optional multiplier for rv2. If non-zero and naturalMul is non-zero, it must match naturalMul.
// cnsPtr - [out] Integer constant [optional]
//
// Returns:
// True if some address mode components were extracted.
//
bool CodeGen::genCreateAddrMode(GenTree* addr,
bool fold,
unsigned naturalMul,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
unsigned* mulPtr,
ssize_t* cnsPtr)
{
/*
The following indirections are valid address modes on x86/x64:
Expand Down Expand Up @@ -1171,8 +1169,7 @@ bool CodeGen::genCreateAddrMode(

goto AGAIN;

#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
// TODO-ARM-CQ: For now we don't try to create a scaled index.
case GT_MUL:
if (op1->gtOverflow())
{
Expand All @@ -1182,10 +1179,11 @@ bool CodeGen::genCreateAddrMode(
FALLTHROUGH;

case GT_LSH:

mul = op1->GetScaledIndex();
if (mul)
{
unsigned mulCandidate = op1->GetScaledIndex();
if (jitIsScaleIndexMul(mulCandidate, naturalMul))
{
mul = mulCandidate;
/* We can use "[mul*rv2 + icon]" */

rv1 = nullptr;
Expand All @@ -1194,7 +1192,7 @@ bool CodeGen::genCreateAddrMode(
goto FOUND_AM;
}
break;
#endif // !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
}

default:
break;
Expand All @@ -1215,8 +1213,8 @@ bool CodeGen::genCreateAddrMode(

switch (op1->gtOper)
{
#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we don't try to create a scaled index.
#ifdef TARGET_XARCH
// TODO-ARM-CQ: For now we don't try to create a scaled index.
case GT_ADD:

if (op1->gtOverflow())
Expand All @@ -1237,6 +1235,7 @@ bool CodeGen::genCreateAddrMode(
}
}
break;
#endif // TARGET_XARCH

case GT_MUL:

Expand All @@ -1248,19 +1247,20 @@ bool CodeGen::genCreateAddrMode(
FALLTHROUGH;

case GT_LSH:

mul = op1->GetScaledIndex();
if (mul)
{
unsigned mulCandidate = op1->GetScaledIndex();
if (jitIsScaleIndexMul(mulCandidate, naturalMul))
{
/* 'op1' is a scaled value */
mul = mulCandidate;

rv1 = op2;
rv2 = op1->AsOp()->gtOp1;

int argScale;
while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
{
if (jitIsScaleIndexMul(argScale * mul))
if (jitIsScaleIndexMul(argScale * mul, naturalMul))
{
mul = mul * argScale;
rv2 = rv2->AsOp()->gtOp1;
Expand All @@ -1277,7 +1277,7 @@ bool CodeGen::genCreateAddrMode(
goto FOUND_AM;
}
break;
#endif // !TARGET_ARMARCH && !TARGET_LOONGARCH64 && !TARGET_RISCV64
}

case GT_COMMA:

Expand All @@ -1291,7 +1291,7 @@ bool CodeGen::genCreateAddrMode(
noway_assert(op2);
switch (op2->gtOper)
{
#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
#ifdef TARGET_XARCH
// TODO-ARM64-CQ, TODO-ARM-CQ: For now we only handle MUL and LSH because
// arm doesn't support both scale and offset at the same. Offset is handled
// at the emitter as a peephole optimization.
Expand All @@ -1315,6 +1315,7 @@ bool CodeGen::genCreateAddrMode(
goto AGAIN;
}
break;
#endif // TARGET_XARCH

case GT_MUL:

Expand All @@ -1326,16 +1327,17 @@ bool CodeGen::genCreateAddrMode(
FALLTHROUGH;

case GT_LSH:

mul = op2->GetScaledIndex();
if (mul)
{
unsigned mulCandidate = op2->GetScaledIndex();
if (jitIsScaleIndexMul(mulCandidate, naturalMul))
{
mul = mulCandidate;
// 'op2' is a scaled value...is it's argument also scaled?
int argScale;
rv2 = op2->AsOp()->gtOp1;
while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
{
if (jitIsScaleIndexMul(argScale * mul))
if (jitIsScaleIndexMul(argScale * mul, naturalMul))
{
mul = mul * argScale;
rv2 = rv2->AsOp()->gtOp1;
Expand All @@ -1351,7 +1353,7 @@ bool CodeGen::genCreateAddrMode(
goto FOUND_AM;
}
break;
#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64
}

case GT_COMMA:

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class CodeGenInterface
// move it to Lower
virtual bool genCreateAddrMode(GenTree* addr,
bool fold,
unsigned naturalMul,
bool* revPtr,
GenTree** rv1Ptr,
GenTree** rv2Ptr,
Expand Down
44 changes: 42 additions & 2 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,45 @@ inline unsigned __int64 BitsBetween(unsigned __int64 value, unsigned __int64 end
(end - 1); // Ones to the right of set bit in the end mask.
}

/*****************************************************************************/

//------------------------------------------------------------------------------
// jitIsScaleIndexMul: Check if the value is a valid addressing mode multiplier
// amount for x64.
//
// Parameters:
// val - The multiplier
//
// Returns:
// True if value is 1, 2, 4 or 8.
//
inline bool jitIsScaleIndexMul(size_t val)
{
// TODO-Cleanup: On arm64 the scale that can be used in addressing modes
// depends on the containing indirection, so this function should be reevaluated.
switch (val)
{
case 1:
case 2:
case 4:
case 8:
return true;
default:
return false;
}
}

//------------------------------------------------------------------------------
// jitIsScaleIndexMul: Check if the value is a valid addressing mode multiplier amount.
//
// Parameters:
// val - The multiplier
// naturalMul - For arm64, the natural multiplier (size of containing indirection)
//
// Returns:
// True if the multiplier can be used in an address mode.
//
inline bool jitIsScaleIndexMul(size_t val, unsigned naturalMul)
{
#if defined(TARGET_XARCH)
switch (val)
{
case 1:
Expand All @@ -211,6 +246,11 @@ inline bool jitIsScaleIndexMul(size_t val)
default:
return false;
}
#elif defined(TARGET_ARM64)
return val == naturalMul;
#else
return false;
#endif
}

// Returns "tree" iff "val" is a valid addressing mode scale shift amount on
Expand Down
27 changes: 14 additions & 13 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4538,20 +4538,21 @@ bool Compiler::gtMarkAddrMode(GenTree* addr, int* pCostEx, int* pCostSz, var_typ
GenTree* base; // This is the base of the address.
GenTree* idx; // This is the index.

if (codeGen->genCreateAddrMode(addr, false /*fold*/, &rev, &base, &idx, &mul, &cns))
{
unsigned naturalMul = 0;
#ifdef TARGET_ARM64
// Multiplier should be a "natural-scale" power of two number which is equal to target's width.
//
// *(ulong*)(data + index * 8); - can be optimized
// *(ulong*)(data + index * 7); - can not be optimized
// *(int*)(data + index * 2); - can not be optimized
//
naturalMul = genTypeSize(type);
#endif

#ifdef TARGET_ARMARCH
// Multiplier should be a "natural-scale" power of two number which is equal to target's width.
//
// *(ulong*)(data + index * 8); - can be optimized
// *(ulong*)(data + index * 7); - can not be optimized
// *(int*)(data + index * 2); - can not be optimized
//
if ((mul > 0) && (genTypeSize(type) != mul))
{
return false;
}
if (codeGen->genCreateAddrMode(addr, false /*fold*/, naturalMul, &rev, &base, &idx, &mul, &cns))
{
#ifdef TARGET_ARM64
assert((mul == 0) || (mul == 1) || (mul == naturalMul));
#endif

// We can form a complex addressing mode, so mark each of the interior
Expand Down
43 changes: 21 additions & 22 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6135,14 +6135,28 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par
ssize_t offset = 0;
bool rev = false;

var_types targetType = parent->OperIsIndir() ? parent->TypeGet() : TYP_UNDEF;

unsigned naturalMul = 0;
#ifdef TARGET_ARM64
// Multiplier should be a "natural-scale" power of two number which is equal to target's width.
//
// *(ulong*)(data + index * 8); - can be optimized
// *(ulong*)(data + index * 7); - can not be optimized
// *(int*)(data + index * 2); - can not be optimized
//
naturalMul = genTypeSize(targetType);
#endif

// Find out if an addressing mode can be constructed
bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address
true, // fold
&rev, // reverse ops
&base, // base addr
&index, // index val
&scale, // scaling
&offset); // displacement
bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address
true, // fold
naturalMul, // natural multiplier
&rev, // reverse ops
&base, // base addr
&index, // index val
&scale, // scaling
&offset); // displacement

#ifdef TARGET_ARM64
if (parent->OperIsIndir() && parent->AsIndir()->IsVolatile())
Expand All @@ -6158,21 +6172,6 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, GenTree* par
}
#endif

var_types targetType = parent->OperIsIndir() ? parent->TypeGet() : TYP_UNDEF;

#ifdef TARGET_ARMARCH
// Multiplier should be a "natural-scale" power of two number which is equal to target's width.
//
// *(ulong*)(data + index * 8); - can be optimized
// *(ulong*)(data + index * 7); - can not be optimized
// *(int*)(data + index * 2); - can not be optimized
//
if ((scale > 0) && (genTypeSize(targetType) != scale))
{
return false;
}
#endif

if (scale == 0)
{
scale = 1;
Expand Down
Loading