Skip to content

Commit

Permalink
Implementation of CSE for GT_CNS_INT benefits ARM64 (dotnet#39096)
Browse files Browse the repository at this point in the history
* Change the type of csdHashKey to size_t

* Update gtCostSz and gtCostEx for constant nodes

* Implementation of code size optimization, CSE of constant values for ARM64
Implementation of code size optimization, CSE of constant values for ARM64
We will share a single CSE for constants that differ only in their low 12 bits on ARM64

Number of shared constant low bits set in target.h  CSE_CONST_SHARED_LOW_BITS
we use 12 bits on Arm platforms and 16 bits on XArch platforms

Disable the CSE of the REG_R2R_INDIRECT_PARAM on Arm32
as it hits  Assertion failed 'candidates != candidateBit' in lsra.cpp Line: 3723

Config variable: COMPlus_JitConstCSE
// Default 0: enable the CSE of Constants, including nearby offsets. (only for ARM64)
// If 1, disable all the CSE of Constants
// If 2, enable the CSE of Constants but don't combine with nearby offsets. (only for ARM64)
// If 3, enable the CSE of Constants including nearby offsets. (all platforms)
// If 4, enable the CSE of Constants but don't combine with nearby offsets. (all platforms)
//

* Added additional Priority 0 test coverage for Floating Point optimizations

* Fix for COMPLUS_JitConstCSE=4

* Renamed config variable from COMPlus_JitDisableConstCSE to COMPlus_JitConstCSE

* Updated with Codereview feedback, removed sort from Const CSE phase

* Fix for assertionProp issue in the refTypesdynamic test
  • Loading branch information
briansull committed Jul 24, 2020
1 parent 3cab6dd commit 9bb4e10
Show file tree
Hide file tree
Showing 18 changed files with 567 additions and 166 deletions.
22 changes: 17 additions & 5 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6326,11 +6326,12 @@ class Compiler

struct CSEdsc
{
CSEdsc* csdNextInBucket; // used by the hash table

unsigned csdHashKey; // the orginal hashkey

unsigned csdIndex; // 1..optCSECandidateCount
CSEdsc* csdNextInBucket; // used by the hash table
size_t csdHashKey; // the orginal hashkey
ssize_t csdConstDefValue; // When we CSE similar constants, this is the value that we use as the def
ValueNum csdConstDefVN; // When we CSE similar constants, this is the ValueNumber that we use for the LclVar
// assignment
unsigned csdIndex; // 1..optCSECandidateCount
bool csdLiveAcrossCall;

unsigned short csdDefCount; // definition count
Expand Down Expand Up @@ -6359,6 +6360,7 @@ class Compiler

ValueNum defConservNormVN; // if all def occurrences share the same conservative normal value
// number, this will reflect it; otherwise, NoVN.
// not used for shared const CSE's
};

static const size_t s_optCSEhashSize;
Expand Down Expand Up @@ -6406,6 +6408,16 @@ class Compiler
void optEnsureClearCSEInfo();
#endif // DEBUG

static bool Is_Shared_Const_CSE(size_t key)
{
return ((key & TARGET_SIGN_BIT) != 0);
}

static size_t Decode_Shared_Const_CSE_Value(size_t key)
{
return (key & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS;
}

#endif // FEATURE_ANYCSE

#if FEATURE_VALNUM_CSE
Expand Down
192 changes: 157 additions & 35 deletions src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3250,100 +3250,222 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
switch (oper)
{
#ifdef TARGET_ARM
case GT_CNS_LNG:
costSz = 9;
costEx = 4;
goto COMMON_CNS;

case GT_CNS_STR:
// Uses movw/movt
costSz = 7;
costEx = 3;
costSz = 8;
costEx = 2;
goto COMMON_CNS;

case GT_CNS_LNG:
{
GenTreeIntConCommon* con = tree->AsIntConCommon();

INT64 lngVal = con->LngValue();
INT32 loVal = (INT32)(lngVal & 0xffffffff);
INT32 hiVal = (INT32)(lngVal >> 32);

if (lngVal == 0)
{
costSz = 1;
costEx = 1;
}
else
{
// Minimum of one instruction to setup hiVal,
// and one instruction to setup loVal
costSz = 4 + 4;
costEx = 1 + 1;

if (!codeGen->validImmForInstr(INS_mov, (target_ssize_t)hiVal) &&
!codeGen->validImmForInstr(INS_mvn, (target_ssize_t)hiVal))
{
// Needs extra instruction: movw/movt
costSz += 4;
costEx += 1;
}

if (!codeGen->validImmForInstr(INS_mov, (target_ssize_t)loVal) &&
!codeGen->validImmForInstr(INS_mvn, (target_ssize_t)loVal))
{
// Needs extra instruction: movw/movt
costSz += 4;
costEx += 1;
}
}
goto COMMON_CNS;
}

case GT_CNS_INT:
{
// If the constant is a handle then it will need to have a relocation
// applied to it.
// Any constant that requires a reloc must use the movw/movt sequence
//
GenTreeIntConCommon* con = tree->AsIntConCommon();
GenTreeIntConCommon* con = tree->AsIntConCommon();
INT32 conVal = con->IconValue();

if (con->ImmedValNeedsReloc(this) ||
!codeGen->validImmForInstr(INS_mov, (target_ssize_t)tree->AsIntCon()->gtIconVal))
if (con->ImmedValNeedsReloc(this))
{
// Uses movw/movt
costSz = 7;
costEx = 3;
// Requires movw/movt
costSz = 8;
costEx = 2;
}
else if (((unsigned)tree->AsIntCon()->gtIconVal) <= 0x00ff)
else if (codeGen->validImmForInstr(INS_add, (target_ssize_t)conVal))
{
// mov Rd, <const8>
costSz = 1;
// Typically included with parent oper
costSz = 2;
costEx = 1;
}
else
else if (codeGen->validImmForInstr(INS_mov, (target_ssize_t)conVal) &&
codeGen->validImmForInstr(INS_mvn, (target_ssize_t)conVal))
{
// Uses movw/mvn
costSz = 3;
// Uses mov or mvn
costSz = 4;
costEx = 1;
}
else
{
// Needs movw/movt
costSz = 8;
costEx = 2;
}
goto COMMON_CNS;
}

#elif defined TARGET_XARCH

case GT_CNS_LNG:
costSz = 10;
costEx = 3;
goto COMMON_CNS;

case GT_CNS_STR:
#ifdef TARGET_AMD64
costSz = 10;
costEx = 2;
#else // TARGET_X86
costSz = 4;
costEx = 1;
#endif
goto COMMON_CNS;

case GT_CNS_LNG:
case GT_CNS_INT:
{
GenTreeIntConCommon* con = tree->AsIntConCommon();
ssize_t conVal = (oper == GT_CNS_LNG) ? (ssize_t)con->LngValue() : con->IconValue();
bool fitsInVal = true;

#ifdef TARGET_X86
if (oper == GT_CNS_LNG)
{
INT64 lngVal = con->LngValue();

conVal = (ssize_t)lngVal; // truncate to 32-bits

fitsInVal = ((INT64)conVal == lngVal);
}
#endif // TARGET_X86

// If the constant is a handle then it will need to have a relocation
// applied to it.
//
GenTreeIntConCommon* con = tree->AsIntConCommon();

bool iconNeedsReloc = con->ImmedValNeedsReloc(this);

if (!iconNeedsReloc && con->FitsInI8())
if (iconNeedsReloc)
{
costSz = 4;
costEx = 1;
}
else if (fitsInVal && GenTreeIntConCommon::FitsInI8(conVal))
{
costSz = 1;
costEx = 1;
}
#if defined(TARGET_AMD64)
else if (iconNeedsReloc || !con->FitsInI32())
#ifdef TARGET_AMD64
else if (!GenTreeIntConCommon::FitsInI32(conVal))
{
costSz = 10;
costEx = 3;
costEx = 2;
}
#endif // TARGET_AMD64
else
{
costSz = 4;
costEx = 1;
}
#ifdef TARGET_X86
if (oper == GT_CNS_LNG)
{
costSz += fitsInVal ? 1 : 4;
costEx += 1;
}
#endif // TARGET_X86

goto COMMON_CNS;
}

#elif defined(TARGET_ARM64)
case GT_CNS_LNG:

case GT_CNS_STR:
case GT_CNS_LNG:
case GT_CNS_INT:
// TODO-ARM64-NYI: Need cost estimates.
costSz = 1;
costEx = 1;
{
GenTreeIntConCommon* con = tree->AsIntConCommon();
bool iconNeedsReloc = con->ImmedValNeedsReloc(this);
INT64 imm = con->LngValue();
emitAttr size = EA_SIZE(emitActualTypeSize(tree));

if (iconNeedsReloc)
{
costSz = 8;
costEx = 2;
}
else if (emitter::emitIns_valid_imm_for_add(imm, size))
{
costSz = 2;
costEx = 1;
}
else if (emitter::emitIns_valid_imm_for_mov(imm, size))
{
costSz = 4;
costEx = 1;
}
else
{
// Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword
// There are three forms
// movk which loads into any halfword preserving the remaining halfwords
// movz which loads into any halfword zeroing the remaining halfwords
// movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting
// the register
// In some cases it is preferable to use movn, because it has the side effect of filling the
// other halfwords
// with ones

// Determine whether movn or movz will require the fewest instructions to populate the immediate
bool preferMovz = false;
bool preferMovn = false;
int instructionCount = 4;

for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16)
{
if (!preferMovn && (uint16_t(imm >> i) == 0x0000))
{
preferMovz = true; // by using a movk to start we can save one instruction
instructionCount--;
}
else if (!preferMovz && (uint16_t(imm >> i) == 0xffff))
{
preferMovn = true; // by using a movn to start we can save one instruction
instructionCount--;
}
}

costEx = instructionCount;
costSz = 4 * instructionCount;
}
}
goto COMMON_CNS;

#else
case GT_CNS_LNG:
case GT_CNS_STR:
case GT_CNS_LNG:
case GT_CNS_INT:
#error "Unknown TARGET"
#endif
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/src/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,20 @@ CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNu
// If 3, disable both SIMD and HW Intrinsic nodes
#endif // FEATURE_SIMD

// Default 0, enable the CSE of Constants, including nearby offsets. (only for ARM64)
// If 1, disable all the CSE of Constants
// If 2, enable the CSE of Constants but don't combine with nearby offsets. (only for ARM64)
// If 3, enable the CSE of Constants including nearby offsets. (all platforms)
// If 4, enable the CSE of Constants but don't combine with nearby offsets. (all platforms)
//
CONFIG_INTEGER(JitConstCSE, W("JitConstCSE"), 0)

#define CONST_CSE_ENABLE_ARM64 0
#define CONST_CSE_DISABLE_ALL 1
#define CONST_CSE_ENABLE_ARM64_NO_SHARING 2
#define CONST_CSE_ENABLE_ALL 3
#define CONST_CSE_ENABLE_ALL_NO_SHARING 4

///
/// JIT
///
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/src/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2707,6 +2707,13 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd;
#endif
indirectCellAddress->SetRegNum(REG_R2R_INDIRECT_PARAM);
#ifdef TARGET_ARM
// Issue #xxxx : Don't attempt to CSE this constant on ARM32
//
// This constant has specific register requirements, and LSRA doesn't currently correctly
// handle them when the value is in a CSE'd local.
indirectCellAddress->SetDoNotCSE();
#endif // TARGET_ARM

// Push the stub address onto the list of arguments.
call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs);
Expand Down
Loading

0 comments on commit 9bb4e10

Please sign in to comment.