Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of CSE for GT_CNS_INT benefits ARM64 #39096

Merged
merged 8 commits into from
Jul 24, 2020
22 changes: 17 additions & 5 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6326,11 +6326,12 @@ class Compiler

struct CSEdsc
{
CSEdsc* csdNextInBucket; // used by the hash table

unsigned csdHashKey; // the orginal hashkey

unsigned csdIndex; // 1..optCSECandidateCount
CSEdsc* csdNextInBucket; // used by the hash table
size_t csdHashKey; // the orginal hashkey
ssize_t csdConstDefValue; // When we CSE similar constants, this is the value that we use as the def
ValueNum csdConstDefVN; // When we CSE similar constants, this is the ValueNumber that we use for the LclVar
// assignment
unsigned csdIndex; // 1..optCSECandidateCount
bool csdLiveAcrossCall;

unsigned short csdDefCount; // definition count
Expand Down Expand Up @@ -6359,6 +6360,7 @@ class Compiler

ValueNum defConservNormVN; // if all def occurrences share the same conservative normal value
// number, this will reflect it; otherwise, NoVN.
// not used for shared const CSE's
};

static const size_t s_optCSEhashSize;
Expand Down Expand Up @@ -6406,6 +6408,16 @@ class Compiler
void optEnsureClearCSEInfo();
#endif // DEBUG

static bool Is_Shared_Const_CSE(size_t key)
{
return ((key & TARGET_SIGN_BIT) != 0);
}

static size_t Decode_Shared_Const_CSE_Value(size_t key)
{
return (key & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS;
}

#endif // FEATURE_ANYCSE

#if FEATURE_VALNUM_CSE
Expand Down
192 changes: 157 additions & 35 deletions src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3250,100 +3250,222 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
switch (oper)
{
#ifdef TARGET_ARM
case GT_CNS_LNG:
costSz = 9;
costEx = 4;
goto COMMON_CNS;

case GT_CNS_STR:
// Uses movw/movt
costSz = 7;
costEx = 3;
costSz = 8;
costEx = 2;
goto COMMON_CNS;

case GT_CNS_LNG:
{
GenTreeIntConCommon* con = tree->AsIntConCommon();

INT64 lngVal = con->LngValue();
INT32 loVal = (INT32)(lngVal & 0xffffffff);
INT32 hiVal = (INT32)(lngVal >> 32);

if (lngVal == 0)
{
costSz = 1;
costEx = 1;
}
else
{
// Minimum of one instruction to setup hiVal,
// and one instruction to setup loVal
costSz = 4 + 4;
costEx = 1 + 1;

if (!codeGen->validImmForInstr(INS_mov, (target_ssize_t)hiVal) &&
!codeGen->validImmForInstr(INS_mvn, (target_ssize_t)hiVal))
{
// Needs extra instruction: movw/movt
costSz += 4;
costEx += 1;
}

if (!codeGen->validImmForInstr(INS_mov, (target_ssize_t)loVal) &&
!codeGen->validImmForInstr(INS_mvn, (target_ssize_t)loVal))
{
// Needs extra instruction: movw/movt
costSz += 4;
costEx += 1;
}
}
goto COMMON_CNS;
}

case GT_CNS_INT:
{
// If the constant is a handle then it will need to have a relocation
// applied to it.
// Any constant that requires a reloc must use the movw/movt sequence
//
GenTreeIntConCommon* con = tree->AsIntConCommon();
GenTreeIntConCommon* con = tree->AsIntConCommon();
INT32 conVal = con->IconValue();

if (con->ImmedValNeedsReloc(this) ||
!codeGen->validImmForInstr(INS_mov, (target_ssize_t)tree->AsIntCon()->gtIconVal))
if (con->ImmedValNeedsReloc(this))
{
// Uses movw/movt
costSz = 7;
costEx = 3;
// Requires movw/movt
costSz = 8;
costEx = 2;
}
else if (((unsigned)tree->AsIntCon()->gtIconVal) <= 0x00ff)
else if (codeGen->validImmForInstr(INS_add, (target_ssize_t)conVal))
{
// mov Rd, <const8>
costSz = 1;
// Typically included with parent oper
costSz = 2;
costEx = 1;
}
else
else if (codeGen->validImmForInstr(INS_mov, (target_ssize_t)conVal) &&
codeGen->validImmForInstr(INS_mvn, (target_ssize_t)conVal))
{
// Uses movw/mvn
costSz = 3;
// Uses mov or mvn
costSz = 4;
costEx = 1;
}
else
{
// Needs movw/movt
costSz = 8;
costEx = 2;
}
goto COMMON_CNS;
}

#elif defined TARGET_XARCH

case GT_CNS_LNG:
costSz = 10;
costEx = 3;
goto COMMON_CNS;

case GT_CNS_STR:
#ifdef TARGET_AMD64
costSz = 10;
costEx = 2;
#else // TARGET_X86
costSz = 4;
costEx = 1;
#endif
goto COMMON_CNS;

case GT_CNS_LNG:
case GT_CNS_INT:
{
GenTreeIntConCommon* con = tree->AsIntConCommon();
ssize_t conVal = (oper == GT_CNS_LNG) ? (ssize_t)con->LngValue() : con->IconValue();
bool fitsInVal = true;

#ifdef TARGET_X86
if (oper == GT_CNS_LNG)
{
INT64 lngVal = con->LngValue();

conVal = (ssize_t)lngVal; // truncate to 32-bits

fitsInVal = ((INT64)conVal == lngVal);
}
#endif // TARGET_X86

// If the constant is a handle then it will need to have a relocation
// applied to it.
//
GenTreeIntConCommon* con = tree->AsIntConCommon();

bool iconNeedsReloc = con->ImmedValNeedsReloc(this);

if (!iconNeedsReloc && con->FitsInI8())
if (iconNeedsReloc)
{
costSz = 4;
costEx = 1;
}
else if (fitsInVal && GenTreeIntConCommon::FitsInI8(conVal))
{
costSz = 1;
costEx = 1;
}
#if defined(TARGET_AMD64)
else if (iconNeedsReloc || !con->FitsInI32())
#ifdef TARGET_AMD64
else if (!GenTreeIntConCommon::FitsInI32(conVal))
{
costSz = 10;
costEx = 3;
costEx = 2;
}
#endif // TARGET_AMD64
else
{
costSz = 4;
costEx = 1;
}
#ifdef TARGET_X86
if (oper == GT_CNS_LNG)
{
costSz += fitsInVal ? 1 : 4;
costEx += 1;
}
#endif // TARGET_X86

goto COMMON_CNS;
}

#elif defined(TARGET_ARM64)
case GT_CNS_LNG:

case GT_CNS_STR:
case GT_CNS_LNG:
case GT_CNS_INT:
// TODO-ARM64-NYI: Need cost estimates.
costSz = 1;
costEx = 1;
{
GenTreeIntConCommon* con = tree->AsIntConCommon();
bool iconNeedsReloc = con->ImmedValNeedsReloc(this);
INT64 imm = con->LngValue();
emitAttr size = EA_SIZE(emitActualTypeSize(tree));

if (iconNeedsReloc)
{
costSz = 8;
costEx = 2;
}
else if (emitter::emitIns_valid_imm_for_add(imm, size))
{
costSz = 2;
costEx = 1;
}
else if (emitter::emitIns_valid_imm_for_mov(imm, size))
{
costSz = 4;
costEx = 1;
}
else
{
// Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword
// There are three forms
// movk which loads into any halfword preserving the remaining halfwords
// movz which loads into any halfword zeroing the remaining halfwords
// movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting
// the register
// In some cases it is preferable to use movn, because it has the side effect of filling the
// other halfwords
// with ones

// Determine whether movn or movz will require the fewest instructions to populate the immediate
bool preferMovz = false;
bool preferMovn = false;
int instructionCount = 4;

for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16)
{
if (!preferMovn && (uint16_t(imm >> i) == 0x0000))
{
preferMovz = true; // by using a movk to start we can save one instruction
instructionCount--;
}
else if (!preferMovz && (uint16_t(imm >> i) == 0xffff))
{
preferMovn = true; // by using a movn to start we can save one instruction
instructionCount--;
}
}

costEx = instructionCount;
costSz = 4 * instructionCount;
}
}
goto COMMON_CNS;

#else
case GT_CNS_LNG:
case GT_CNS_STR:
case GT_CNS_LNG:
case GT_CNS_INT:
#error "Unknown TARGET"
#endif
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/src/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,20 @@ CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNu
// If 3, disable both SIMD and HW Intrinsic nodes
#endif // FEATURE_SIMD

// Default 0, enable the CSE of Constants, including nearby offsets. (only for ARM64)
// If 1, disable all the CSE of Constants
// If 2, enable the CSE of Constants but don't combine with nearby offsets. (only for ARM64)
// If 3, enable the CSE of Constants including nearby offsets. (all platforms)
// If 4, enable the CSE of Constants but don't combine with nearby offsets. (all platforms)
//
CONFIG_INTEGER(JitConstCSE, W("JitConstCSE"), 0)

#define CONST_CSE_ENABLE_ARM64 0
#define CONST_CSE_DISABLE_ALL 1
#define CONST_CSE_ENABLE_ARM64_NO_SHARING 2
#define CONST_CSE_ENABLE_ALL 3
#define CONST_CSE_ENABLE_ALL_NO_SHARING 4

///
/// JIT
///
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/src/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2707,6 +2707,13 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd;
#endif
indirectCellAddress->SetRegNum(REG_R2R_INDIRECT_PARAM);
#ifdef TARGET_ARM
// Issue #xxxx : Don't attempt to CSE this constant on ARM32
//
// This constant has specific register requirements, and LSRA doesn't currently correctly
// handle them when the value is in a CSE'd local.
indirectCellAddress->SetDoNotCSE();
#endif // TARGET_ARM

// Push the stub address onto the list of arguments.
call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs);
Expand Down
Loading