From 3da009fe1de6f737b4b2d3c9486fe782f5f27a8c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sun, 27 Mar 2022 09:52:44 -0700 Subject: [PATCH 1/2] JIT: enable cloning based on loop invariant type tests Such as those added by GDV. The JIT will now clone just for type tests, or just for array bounds, or for a mixture of the two. The JIT will still produce just one fast and one slow loop. If there are a mixture of array bounds and type test conditions, all conditions must pass for control to reach the fast loop. Unlike array bounds checks, type test failures are not intrinsically rare, so there is some profitability screening to ensure that a failed type test does not force execution to run the slow version "too often". The type test must execute frequently within the loop, and be heavily biased towards success. This is work towards resolving #65206. --- src/coreclr/jit/compiler.h | 21 +- src/coreclr/jit/indirectcalltransformer.cpp | 2 - src/coreclr/jit/jitconfigvalues.h | 2 + src/coreclr/jit/loopcloning.cpp | 803 ++++++++++++------ src/coreclr/jit/loopcloning.h | 65 +- src/coreclr/jit/loopcloningopts.h | 1 + .../enumerablecloning.cs | 48 ++ .../enumerablecloning.csproj | 22 + .../typetestcloning.cs | 84 ++ .../typetestcloning.csproj | 22 + 10 files changed, 785 insertions(+), 285 deletions(-) create mode 100644 src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.cs create mode 100644 src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.csproj create mode 100644 src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.cs create mode 100644 src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.csproj diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 60c8041555930..576b39f748fbf 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6801,11 +6801,6 @@ class Compiler optMethodFlags |= OMF_HAS_GUARDEDDEVIRT; } - void clearMethodHasGuardedDevirtualization() - { - optMethodFlags &= ~OMF_HAS_GUARDEDDEVIRT; - } - void considerGuardedDevirtualization(GenTreeCall* call, IL_OFFSET ilOffset, bool isInterface, @@ -7300,10 +7295,20 @@ class Compiler struct LoopCloneVisitorInfo { LoopCloneContext* context; - unsigned loopNum; Statement* stmt; - LoopCloneVisitorInfo(LoopCloneContext* context, unsigned loopNum, Statement* stmt) - : context(context), loopNum(loopNum), stmt(nullptr) + const unsigned loopNum; + const bool cloneForArrayBounds; + const bool cloneForTypeTests; + LoopCloneVisitorInfo(LoopCloneContext* context, + unsigned loopNum, + Statement* stmt, + bool cloneForArrayBounds, + bool cloneForTypeTests) + : context(context) + , stmt(nullptr) + , loopNum(loopNum) + , cloneForArrayBounds(cloneForArrayBounds) + , cloneForTypeTests(cloneForTypeTests) { } }; diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 52ad8090f314e..14242b8f718b9 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -1204,7 +1204,6 @@ Compiler::fgWalkResult Compiler::fgDebugCheckForTransformableIndirectCalls(GenTr void Compiler::CheckNoTransformableIndirectCallsRemain() { assert(!doesMethodHaveFatPointer()); - assert(!doesMethodHaveGuardedDevirtualization()); assert(!doesMethodHaveExpRuntimeLookup()); for (BasicBlock* const block : Blocks()) @@ -1244,7 +1243,6 @@ PhaseStatus Compiler::fgTransformIndirectCalls() } clearMethodHasFatPointer(); - clearMethodHasGuardedDevirtualization(); clearMethodHasExpRuntimeLookup(); } else diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 29e977cf1d8c9..e125dfa41f5ef 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -42,6 +42,8 @@ CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0) CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts CONFIG_INTEGER(JitBreakOnUnsafeCode, W("JitBreakOnUnsafeCode"), 0) CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations. +CONFIG_INTEGER(JitCloneLoopsWithTypeTests, W("JitCloneLoopsWithTypeTests"), 1) // If 0, don't clone loops based on + // invariant type tests CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning // optimizations are performed on the fast path. CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xdd) // In debug builds, initialize the memory allocated by the nra diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 1713db4979f55..bb31ae845fda3 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -129,11 +129,19 @@ GenTree* LC_Ident::ToGenTree(Compiler* comp, BasicBlock* bb) assert(constant <= INT32_MAX); return comp->gtNewIconNode(constant); case Var: - return comp->gtNewLclvNode(constant, comp->lvaTable[constant].lvType); + return comp->gtNewLclvNode((unsigned)constant, comp->lvaTable[constant].lvType); case ArrLen: return arrLen.ToGenTree(comp, bb); case Null: return comp->gtNewIconNode(0, TYP_REF); + case ClassHandle: + return comp->gtNewIconHandleNode((size_t)constant, GTF_ICON_CLASS_HDL); + case Indir: + { + GenTree* indir = comp->gtNewIndir(TYP_I_IMPL, comp->gtNewLclvNode((unsigned)constant, TYP_REF)); + indir->gtFlags |= GTF_IND_INVARIANT; + return indir; + } default: assert(!"Could not convert LC_Ident to GenTree"); unreached(); @@ -353,21 +361,39 @@ JitExpandArrayStack* LoopCloneContext::GetConditions(unsigned loop } //-------------------------------------------------------------------------------------------------- -// EnsureDerefs - Ensure an array of dereferences is created if it doesn't exist. +// EnsureArrayDerefs - Ensure an array of array dereferences is created if it doesn't exist. // // Arguments: // loopNum the loop index. // // Return Values: -// The array of dereferences for the loop. +// The array of array dereferences for the loop. // -JitExpandArrayStack* LoopCloneContext::EnsureDerefs(unsigned loopNum) +JitExpandArrayStack* LoopCloneContext::EnsureArrayDerefs(unsigned loopNum) { - if (derefs[loopNum] == nullptr) + if (arrayDerefs[loopNum] == nullptr) { - derefs[loopNum] = new (alloc) JitExpandArrayStack(alloc, 4); + arrayDerefs[loopNum] = new (alloc) JitExpandArrayStack(alloc, 4); } - return derefs[loopNum]; + return arrayDerefs[loopNum]; +} + +//-------------------------------------------------------------------------------------------------- +// EnsureObjDerefs - Ensure an array of object dereferences is created if it doesn't exist. +// +// Arguments: +// loopNum the loop index. +// +// Return Values: +// The array of object dereferences for the loop. +// +JitExpandArrayStack* LoopCloneContext::EnsureObjDerefs(unsigned loopNum) +{ + if (objDerefs[loopNum] == nullptr) + { + objDerefs[loopNum] = new (alloc) JitExpandArrayStack(alloc, 4); + } + return objDerefs[loopNum]; } //-------------------------------------------------------------------------------------------------- @@ -862,7 +888,7 @@ BasicBlock* LoopCloneContext::CondToStmtInBlock(Compiler* // Return Values: // The local variable in the node's level. // -unsigned LC_Deref::Lcl() +unsigned LC_ArrayDeref::Lcl() { unsigned lvl = level; if (lvl == 0) @@ -882,7 +908,7 @@ unsigned LC_Deref::Lcl() // Return Values: // Return true if children are present. // -bool LC_Deref::HasChildren() +bool LC_ArrayDeref::HasChildren() { return children != nullptr && children->Size() > 0; } @@ -901,7 +927,7 @@ bool LC_Deref::HasChildren() // Return Values: // None // -void LC_Deref::DeriveLevelConditions(JitExpandArrayStack*>* conds) +void LC_ArrayDeref::DeriveLevelConditions(JitExpandArrayStack*>* conds) { if (level == 0) { @@ -943,11 +969,11 @@ void LC_Deref::DeriveLevelConditions(JitExpandArrayStack(alloc); + children = new (alloc) JitExpandArrayStack(alloc); } } @@ -960,7 +986,7 @@ void LC_Deref::EnsureChildren(CompAllocator alloc) // Return Values: // The child node if found or nullptr. // -LC_Deref* LC_Deref::Find(unsigned lcl) +LC_ArrayDeref* LC_ArrayDeref::Find(unsigned lcl) { return Find(children, lcl); } @@ -977,7 +1003,7 @@ LC_Deref* LC_Deref::Find(unsigned lcl) // // static -LC_Deref* LC_Deref::Find(JitExpandArrayStack* children, unsigned lcl) +LC_ArrayDeref* LC_ArrayDeref::Find(JitExpandArrayStack* children, unsigned lcl) { if (children == nullptr) { @@ -1022,200 +1048,249 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext JITDUMP("------------------------------------------------------------\n"); JITDUMP("Deriving cloning conditions for " FMT_LP "\n", loopNum); - LoopDsc* loop = &optLoopTable[loopNum]; - JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); - bool isIncreasingLoop = loop->lpIsIncreasingLoop(); + LoopDsc* loop = &optLoopTable[loopNum]; + JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loopNum); + assert(optInfos->Size() > 0); + + // We only need to check for iteration behavior if we have array checks. + // + bool checkIterationBehavior = false; + + for (unsigned i = 0; i < optInfos->Size(); ++i) + { + LcOptInfo* const optInfo = optInfos->Get(i); + switch (optInfo->GetOptType()) + { + case LcOptInfo::LcJaggedArray: + case LcOptInfo::LcMdArray: + checkIterationBehavior = true; + break; + + case LcOptInfo::LcTypeTest: + { + LcTypeTestOptInfo* ttInfo = optInfo->AsLcTypeTestOptInfo(); + LC_Ident objDeref = LC_Ident(ttInfo->lclNum, LC_Ident::Indir); + LC_Ident methodTable = LC_Ident(ttInfo->clsHnd, LC_Ident::ClassHandle); + LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methodTable)); + context->EnsureObjDerefs(loopNum)->Push(objDeref); + context->EnsureConditions(loopNum)->Push(cond); + break; + } + + default: + JITDUMP("Unknown opt\n"); + return false; + } + } + + if (!checkIterationBehavior) + { + // No array conditions here, so we're done + // + JITDUMP("Conditions: "); + DBEXEC(verbose, context->PrintConditions(loopNum)); + JITDUMP("\n"); + return true; + } + + // Note we see cases where the test oper is NE (array.Len) which we could handle + // with some extra care. + // + if (!GenTree::StaticOperIs(loop->lpTestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) + { + // We can't reason about how this loop iterates + return false; + } + + const bool isIncreasingLoop = loop->lpIsIncreasingLoop(); assert(isIncreasingLoop || loop->lpIsDecreasingLoop()); - if (GenTree::StaticOperIs(loop->lpTestOper(), GT_LT, GT_LE, GT_GT, GT_GE)) + // We already know that this is either increasing or decreasing loop and the + // stride is (> 0) or (< 0). Here, just take the abs() value and check if it + // is beyond the limit. + int stride = abs(loop->lpIterConst()); + + if (stride >= 58) { - // We already know that this is either increasing or decreasing loop and the - // stride is (> 0) or (< 0). Here, just take the abs() value and check if it - // is beyond the limit. - int stride = abs(loop->lpIterConst()); + // Array.MaxLength can have maximum of 0X7FFFFFC7 elements, so make sure + // the stride increment doesn't overflow or underflow the index. Hence, + // the maximum stride limit is set to + // (int.MaxValue - (Array.MaxLength - 1) + 1), which is + // (0X7fffffff - 0x7fffffc7 + 2) = 0x3a or 58. + return false; + } - if (stride >= 58) + LC_Ident ident; + // Init conditions + if (loop->lpFlags & LPFLG_CONST_INIT) + { + // Only allowing non-negative const init at this time. + // This is because the variable initialized with this constant will be used as an array index, + // and array indices must be non-negative. + if (loop->lpConstInit < 0) { - // Array.MaxLength can have maximum of 0X7FFFFFC7 elements, so make sure - // the stride increment doesn't overflow or underflow the index. Hence, - // the maximum stride limit is set to - // (int.MaxValue - (Array.MaxLength - 1) + 1), which is - // (0X7fffffff - 0x7fffffc7 + 2) = 0x3a or 58. + JITDUMP("> Init %d is invalid\n", loop->lpConstInit); return false; } - LC_Ident ident; - // Init conditions - if (loop->lpFlags & LPFLG_CONST_INIT) + if (!isIncreasingLoop) { - // Only allowing non-negative const init at this time. - // This is because the variable initialized with this constant will be used as an array index, - // and array indices must be non-negative. - if (loop->lpConstInit < 0) - { - JITDUMP("> Init %d is invalid\n", loop->lpConstInit); - return false; - } - - if (!isIncreasingLoop) - { - // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident(static_cast(loop->lpConstInit), LC_Ident::Const); - } + // For decreasing loop, the init value needs to be checked against the array length + ident = LC_Ident(static_cast(loop->lpConstInit), LC_Ident::Const); } - else + } + else + { + // iterVar >= 0 + const unsigned initLcl = loop->lpIterVar(); + if (!genActualTypeIsInt(lvaGetDesc(initLcl))) { - // iterVar >= 0 - const unsigned initLcl = loop->lpIterVar(); - if (!genActualTypeIsInt(lvaGetDesc(initLcl))) - { - JITDUMP("> Init var V%02u not compatible with TYP_INT\n", initLcl); - return false; - } - - LC_Condition geZero; - if (isIncreasingLoop) - { - geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident(initLcl, LC_Ident::Var)), - LC_Expr(LC_Ident(0, LC_Ident::Const))); - } - else - { - // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident(initLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); - } - context->EnsureConditions(loopNum)->Push(geZero); + JITDUMP("> Init var V%02u not compatible with TYP_INT\n", initLcl); + return false; } - // Limit Conditions - if (loop->lpFlags & LPFLG_CONST_LIMIT) + LC_Condition geZero; + if (isIncreasingLoop) { - int limit = loop->lpConstLimit(); - if (limit < 0) - { - JITDUMP("> limit %d is invalid\n", limit); - return false; - } - - if (isIncreasingLoop) - { - // For increasing loop, thelimit value needs to be checked against the array length - ident = LC_Ident(static_cast(limit), LC_Ident::Const); - } + geZero = + LC_Condition(GT_GE, LC_Expr(LC_Ident(initLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0, LC_Ident::Const))); } - else if (loop->lpFlags & LPFLG_VAR_LIMIT) + else { - const unsigned limitLcl = loop->lpVarLimit(); - if (!genActualTypeIsInt(lvaGetDesc(limitLcl))) - { - JITDUMP("> Limit var V%02u not compatible with TYP_INT\n", limitLcl); - return false; - } + // For decreasing loop, the init value needs to be checked against the array length + ident = LC_Ident(initLcl, LC_Ident::Var); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); + } + context->EnsureConditions(loopNum)->Push(geZero); + } - LC_Condition geZero; - if (isIncreasingLoop) - { - // For increasing loop, thelimit value needs to be checked against the array length - ident = LC_Ident(limitLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); - } - else - { - geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident(limitLcl, LC_Ident::Var)), - LC_Expr(LC_Ident(0, LC_Ident::Const))); - } + // Limit Conditions + if (loop->lpFlags & LPFLG_CONST_LIMIT) + { + int limit = loop->lpConstLimit(); + if (limit < 0) + { + JITDUMP("> limit %d is invalid\n", limit); + return false; + } - context->EnsureConditions(loopNum)->Push(geZero); + if (isIncreasingLoop) + { + // For increasing loop, thelimit value needs to be checked against the array length + ident = LC_Ident(static_cast(limit), LC_Ident::Const); } - else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) + } + else if (loop->lpFlags & LPFLG_VAR_LIMIT) + { + const unsigned limitLcl = loop->lpVarLimit(); + if (!genActualTypeIsInt(lvaGetDesc(limitLcl))) { - ArrIndex* index = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); - if (!loop->lpArrLenLimit(this, index)) - { - JITDUMP("> ArrLen not matching\n"); - return false; - } - ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); + JITDUMP("> Limit var V%02u not compatible with TYP_INT\n", limitLcl); + return false; + } - // Ensure that this array must be dereference-able, before executing the actual condition. - LC_Array array(LC_Array::Jagged, index, LC_Array::None); - context->EnsureDerefs(loopNum)->Push(array); + LC_Condition geZero; + if (isIncreasingLoop) + { + // For increasing loop, thelimit value needs to be checked against the array length + ident = LC_Ident(limitLcl, LC_Ident::Var); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); } else { - JITDUMP("> Undetected limit\n"); - return false; + geZero = + LC_Condition(GT_GE, LC_Expr(LC_Ident(limitLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0, LC_Ident::Const))); } - // Increasing loops - // GT_LT loop test: (start < end) ==> (end <= arrLen) - // GT_LE loop test: (start <= end) ==> (end < arrLen) - // - // Decreasing loops - // GT_GT loop test: (end > start) ==> (end <= arrLen) - // GT_GE loop test: (end >= start) ==> (end < arrLen) - genTreeOps opLimitCondition; - switch (loop->lpTestOper()) - { - case GT_LT: - case GT_GT: - opLimitCondition = GT_LE; - break; - case GT_LE: - case GT_GE: - opLimitCondition = GT_LT; - break; - default: - unreached(); + context->EnsureConditions(loopNum)->Push(geZero); + } + else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT) + { + ArrIndex* index = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone)); + if (!loop->lpArrLenLimit(this, index)) + { + JITDUMP("> ArrLen not matching\n"); + return false; } + ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); - for (unsigned i = 0; i < optInfos->Size(); ++i) + // Ensure that this array must be dereference-able, before executing the actual condition. + LC_Array array(LC_Array::Jagged, index, LC_Array::None); + context->EnsureArrayDerefs(loopNum)->Push(array); + } + else + { + JITDUMP("> Undetected limit\n"); + return false; + } + + // Increasing loops + // GT_LT loop test: (start < end) ==> (end <= arrLen) + // GT_LE loop test: (start <= end) ==> (end < arrLen) + // + // Decreasing loops + // GT_GT loop test: (end > start) ==> (end <= arrLen) + // GT_GE loop test: (end >= start) ==> (end < arrLen) + genTreeOps opLimitCondition; + switch (loop->lpTestOper()) + { + case GT_LT: + case GT_GT: + opLimitCondition = GT_LE; + break; + case GT_LE: + case GT_GE: + opLimitCondition = GT_LT; + break; + default: + unreached(); + } + + for (unsigned i = 0; i < optInfos->Size(); ++i) + { + LcOptInfo* optInfo = optInfos->Get(i); + switch (optInfo->GetOptType()) { - LcOptInfo* optInfo = optInfos->Get(i); - switch (optInfo->GetOptType()) + case LcOptInfo::LcJaggedArray: { - case LcOptInfo::LcJaggedArray: - { - LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); - LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); - LC_Ident arrLenIdent = LC_Ident(arrLen); - LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); - - // Ensure that this array must be dereference-able, before executing the actual condition. - LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); - context->EnsureDerefs(loopNum)->Push(array); - } - break; - case LcOptInfo::LcMdArray: - { - LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); - LC_Array arrLen(LC_Array(LC_Array::MdArray, - mdArrInfo->GetArrIndexForDim(getAllocator(CMK_LoopClone)), mdArrInfo->dim, - LC_Array::None)); - LC_Ident arrLenIdent = LC_Ident(arrLen); - LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); - context->EnsureConditions(loopNum)->Push(cond); - - // TODO: ensure array is dereference-able? - } + LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); + LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); + LC_Ident arrLenIdent = LC_Ident(arrLen); + LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); + context->EnsureConditions(loopNum)->Push(cond); + + // Ensure that this array must be dereference-able, before executing the actual condition. + LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None); + context->EnsureArrayDerefs(loopNum)->Push(array); + } + break; + case LcOptInfo::LcMdArray: + { + LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); + LC_Array arrLen(LC_Array(LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator(CMK_LoopClone)), + mdArrInfo->dim, LC_Array::None)); + LC_Ident arrLenIdent = LC_Ident(arrLen); + LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); + context->EnsureConditions(loopNum)->Push(cond); + + // TODO: ensure array is dereference-able? + } + break; + case LcOptInfo::LcTypeTest: + // handled above break; - default: - JITDUMP("Unknown opt\n"); - return false; - } + default: + JITDUMP("Unknown opt\n"); + return false; } - - JITDUMP("Conditions: "); - DBEXEC(verbose, context->PrintConditions(loopNum)); - JITDUMP("\n"); - - return true; } - return false; + JITDUMP("Conditions: "); + DBEXEC(verbose, context->PrintConditions(loopNum)); + JITDUMP("\n"); + + return true; } //------------------------------------------------------------------------------------ @@ -1325,26 +1400,34 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext // bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context) { - JitExpandArrayStack nodes(getAllocator(CMK_LoopClone)); - int maxRank = -1; + // Get the dereference-able arrays and objects. + JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loopNum); + JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loopNum); - // Get the dereference-able arrays. - JitExpandArrayStack* deref = context->EnsureDerefs(loopNum); + // We currently expect to have at least one of these. + // + assert((arrayDeref->Size() != 0) || (objDeref->Size() != 0)); + // Generate the array dereference checks. + // // For each array in the dereference list, construct a tree, - // where the nodes are array and index variables and an edge 'u-v' + // where the arrayDerefNodes are array and index variables and an edge 'u-v' // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge // 'u-v-w' transitively if u[v][w] occurs. - for (unsigned i = 0; i < deref->Size(); ++i) + // + JitExpandArrayStack arrayDerefNodes(getAllocator(CMK_LoopClone)); + int maxRank = -1; + + for (unsigned i = 0; i < arrayDeref->Size(); ++i) { - LC_Array& array = (*deref)[i]; + LC_Array& array = (*arrayDeref)[i]; // First populate the array base variable. - LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl); + LC_ArrayDeref* node = LC_ArrayDeref::Find(&arrayDerefNodes, array.arrIndex->arrLcl); if (node == nullptr) { - node = new (getAllocator(CMK_LoopClone)) LC_Deref(array, 0 /*level*/); - nodes.Push(node); + node = new (getAllocator(CMK_LoopClone)) LC_ArrayDeref(array, 0 /*level*/); + arrayDerefNodes.Push(node); } // For each dimension (level) for the array, populate the tree with the variable @@ -1353,10 +1436,10 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con for (unsigned i = 0; i < rank; ++i) { node->EnsureChildren(getAllocator(CMK_LoopClone)); - LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]); + LC_ArrayDeref* tmp = node->Find(array.arrIndex->indLcls[i]); if (tmp == nullptr) { - tmp = new (getAllocator(CMK_LoopClone)) LC_Deref(array, node->level + 1); + tmp = new (getAllocator(CMK_LoopClone)) LC_ArrayDeref(array, node->level + 1); node->children->Push(tmp); } @@ -1371,45 +1454,68 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con #ifdef DEBUG if (verbose) { - printf("Deref condition tree:\n"); - for (unsigned i = 0; i < nodes.Size(); ++i) + if (arrayDerefNodes.Size() > 0) { - nodes[i]->Print(); - printf("\n"); + printf("Array deref condition tree:\n"); + for (unsigned i = 0; i < arrayDerefNodes.Size(); ++i) + { + arrayDerefNodes[i]->Print(); + printf("\n"); + } + } + else + { + printf("No array deref conditions\n"); } } #endif - if (maxRank == -1) + if (arrayDeref->Size() > 0) { - JITDUMP("> maxRank undefined\n"); - return false; - } - - // First level will always yield the null-check, since it is made of the array base variables. - // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null) - // So add 1 after rank * 2. - unsigned condBlocks = (unsigned)maxRank * 2 + 1; + // If we have array derefs we should have set maxRank. + // + assert(maxRank != -1); + + // First level will always yield the null-check, since it is made of the array base variables. + // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null) + // So add 1 after rank * 2. + const unsigned condBlocks = (unsigned)maxRank * 2 + 1; + + // Heuristic to not create too many blocks. Defining as 3 allows, effectively, loop cloning on + // doubly-nested loops. + // REVIEW: make this based on a COMPlus configuration, at least for debug? + const unsigned maxAllowedCondBlocks = 3; + if (condBlocks > maxAllowedCondBlocks) + { + JITDUMP("> Too many condition blocks (%u > %u)\n", condBlocks, maxAllowedCondBlocks); + return false; + } - // Heuristic to not create too many blocks. Defining as 3 allows, effectively, loop cloning on - // doubly-nested loops. - // REVIEW: make this based on a COMPlus configuration, at least for debug? - const unsigned maxAllowedCondBlocks = 3; - if (condBlocks > maxAllowedCondBlocks) - { - JITDUMP("> Too many condition blocks (%u > %u)\n", condBlocks, maxAllowedCondBlocks); - return false; + // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] + JitExpandArrayStack*>* levelCond = + context->EnsureBlockConditions(loopNum, condBlocks); + for (unsigned i = 0; i < arrayDerefNodes.Size(); ++i) + { + arrayDerefNodes[i]->DeriveLevelConditions(levelCond); + } } - // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds] - JitExpandArrayStack*>* levelCond = - context->EnsureBlockConditions(loopNum, condBlocks); - for (unsigned i = 0; i < nodes.Size(); ++i) + if (objDeref->Size() > 0) { - nodes[i]->DeriveLevelConditions(levelCond); + JitExpandArrayStack*>* levelCond = context->EnsureBlockConditions(loopNum, 1); + + for (unsigned i = 0; i < objDeref->Size(); ++i) + { + // ObjDeref array has indir(lcl), we want lcl. + // + LC_Ident& mtIndirIdent = (*objDeref)[i]; + LC_Ident ident(mtIndirIdent.constant, LC_Ident::Var); + (*levelCond)[0]->Push(LC_Condition(GT_NE, LC_Expr(ident), LC_Expr(LC_Ident(LC_Ident::Null)))); + } } DBEXEC(verbose, context->PrintBlockConditions(loopNum)); + return true; } @@ -1523,6 +1629,10 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* case LcOptInfo::LcMdArray: // TODO-CQ: CLONE: Implement. break; + case LcOptInfo::LcTypeTest: + // We could optimize here. For now, let downstream opts clean this up. + break; + default: break; } @@ -1545,9 +1655,10 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* // bool Compiler::optIsLoopClonable(unsigned loopInd) { - const LoopDsc& loop = optLoopTable[loopInd]; + const LoopDsc& loop = optLoopTable[loopInd]; + const bool requireIterable = !doesMethodHaveGuardedDevirtualization(); - if (!(loop.lpFlags & LPFLG_ITER)) + if (requireIterable && !(loop.lpFlags & LPFLG_ITER)) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ". No LPFLG_ITER flag.\n", loopInd); return false; @@ -1628,12 +1739,15 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) return false; } - unsigned ivLclNum = loop.lpIterVar(); - if (lvaVarAddrExposed(ivLclNum)) + if (requireIterable) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Rejected V%02u as iter var because is address-exposed.\n", - loopInd, ivLclNum); - return false; + const unsigned ivLclNum = loop.lpIterVar(); + if (lvaVarAddrExposed(ivLclNum)) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Rejected V%02u as iter var because is address-exposed.\n", + loopInd, ivLclNum); + return false; + } } BasicBlock* top = loop.lpTop; @@ -1651,39 +1765,45 @@ bool Compiler::optIsLoopClonable(unsigned loopInd) return false; } - if ((loop.lpFlags & LPFLG_CONST_LIMIT) == 0 && (loop.lpFlags & LPFLG_VAR_LIMIT) == 0 && - (loop.lpFlags & LPFLG_ARRLEN_LIMIT) == 0) + if (requireIterable) { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop limit is neither constant, variable or array length.\n", - loopInd); - return false; - } + if ((loop.lpFlags & LPFLG_CONST_LIMIT) == 0 && (loop.lpFlags & LPFLG_VAR_LIMIT) == 0 && + (loop.lpFlags & LPFLG_ARRLEN_LIMIT) == 0) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP + ". Loop limit is neither constant, variable or array length.\n", + loopInd); + return false; + } - // TODO-CQ: Handle other loops like: - // - The ones whose limit operator is "==" or "!=" - // - The incrementing operator is multiple and divide - // - The ones that are inverted are not handled here for cases like "i *= 2" because - // they are converted to "i + i". - if (!(loop.lpIsIncreasingLoop() || loop.lpIsDecreasingLoop())) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP - ". Loop test (%s) doesn't agree with the direction (%s) of the loop.\n", - loopInd, GenTree::OpName(loop.lpTestOper()), GenTree::OpName(loop.lpIterOper())); - return false; - } + // TODO-CQ: Handle other loops like: + // - The ones whose limit operator is "==" or "!=" + // - The incrementing operator is multiple and divide + // - The ones that are inverted are not handled here for cases like "i *= 2" because + // they are converted to "i + i". + if (!(loop.lpIsIncreasingLoop() || loop.lpIsDecreasingLoop())) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP + ". Loop test (%s) doesn't agree with the direction (%s) of the loop.\n", + loopInd, GenTree::OpName(loop.lpTestOper()), GenTree::OpName(loop.lpIterOper())); + return false; + } - if (!loop.lpTestTree->OperIsCompare() || !(loop.lpTestTree->gtFlags & GTF_RELOP_ZTT)) - { - JITDUMP("Loop cloning: rejecting loop " FMT_LP ". Loop inversion NOT present, loop test [%06u] may not protect " - "entry from head.\n", - loopInd, loop.lpTestTree->gtTreeID); - return false; - } + if (!loop.lpTestTree->OperIsCompare() || !(loop.lpTestTree->gtFlags & GTF_RELOP_ZTT)) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP + ". Loop inversion NOT present, loop test [%06u] may not protect " + "entry from head.\n", + loopInd, loop.lpTestTree->gtTreeID); + return false; + } #ifdef DEBUG - GenTree* op1 = loop.lpIterator(); - assert((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); + const unsigned ivLclNum = loop.lpIterVar(); + GenTree* const op1 = loop.lpIterator(); + assert((op1->gtOper == GT_LCL_VAR) && (op1->AsLclVarCommon()->GetLclNum() == ivLclNum)); #endif + } // Otherwise, we're going to add those return blocks. fgReturnCount += loopRetCount; @@ -1725,12 +1845,15 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, assert(slowHead != nullptr); assert(insertAfter->bbJumpKind == BBJ_NONE); - JitExpandArrayStack*>* levelCond = context->GetBlockConditions(loopNum); - for (unsigned i = 0; i < levelCond->Size(); ++i) + if (context->HasBlockConditions(loopNum)) { - JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loopNum, i); - DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); - insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowHead, insertAfter); + JitExpandArrayStack*>* levelCond = context->GetBlockConditions(loopNum); + for (unsigned i = 0; i < levelCond->Size(); ++i) + { + JITDUMP("Adding loop " FMT_LP " level %u block conditions\n ", loopNum, i); + DBEXEC(verbose, context->PrintBlockLevelConditions(i, (*levelCond)[i])); + insertAfter = context->CondToStmtInBlock(this, *((*levelCond)[i]), slowHead, insertAfter); + } } // Finally insert cloning conditions after all deref conditions have been inserted. @@ -2097,7 +2220,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // ... // slowHead -?> e2 (slowHead) branch or fall-through to e2 // - // We should always have block conditions; at the minimum, the array should be deref-able. + // We should always have block conditions. + assert(context->HasBlockConditions(loopInd)); if (h->bbJumpKind == BBJ_NONE) @@ -2495,6 +2619,9 @@ bool Compiler::optReconstructArrIndex(GenTree* tree, ArrIndex* result) // array index var in some dimension. Also ensure other index vars before the identified // dimension are loop invariant. // +// If the loop has invariant type tests, check if they will succeed often enough that +// they should inspire cloning (on their own, or in conjunction with array bounds checks). +// // Return Value: // Skip sub trees if the optimization candidate is identified or else continue walking // @@ -2503,7 +2630,8 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop ArrIndex arrIndex(getAllocator(CMK_LoopClone)); // Check if array index can be optimized. - if (optReconstructArrIndex(tree, &arrIndex)) + // + if (info->cloneForArrayBounds && optReconstructArrIndex(tree, &arrIndex)) { assert(tree->gtOper == GT_COMMA); @@ -2562,11 +2690,153 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop } return WALK_SKIP_SUBTREES; } - else if (tree->gtOper == GT_ARR_ELEM) + + if (info->cloneForTypeTests && tree->OperIs(GT_JTRUE)) { - // TODO-CQ: CLONE: Implement. - return WALK_SKIP_SUBTREES; + JITDUMP("...TT considering [%06u]\n", dspTreeID(tree)); + // Look for invariant type tests. + // + GenTree* const relop = tree->AsOp()->gtGetOp1(); + + // Must be an equality comparison of some kind. + // + if (!relop->OperIs(GT_EQ, GT_NE)) + { + return WALK_CONTINUE; + } + + GenTree* relopOp1 = relop->AsOp()->gtGetOp1(); + GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); + + // One side or the other must be an indir + // The other must be a loop invariant. + // Currently, we'll just look for a constant. + // + bool match = false; + if (relopOp1->OperIs(GT_IND) && relopOp2->IsIntegralConst()) + { + match = true; + } + else if (relopOp2->OperIs(GT_IND) && relopOp1->IsIntegralConst()) + { + std::swap(relopOp1, relopOp2); + match = true; + } + + if (!match) + { + return WALK_CONTINUE; + } + + // The indir addr must be loop invariant TYP_REF local + // + GenTree* const indirAddr = relopOp1->AsIndir()->Addr(); + + if (!indirAddr->TypeIs(TYP_REF)) + { + return WALK_CONTINUE; + } + + if (!indirAddr->OperIs(GT_LCL_VAR)) + { + return WALK_CONTINUE; + } + + if (!relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)) + { + return WALK_CONTINUE; + } + + GenTreeLclVarCommon* const indirAddrLcl = indirAddr->AsLclVarCommon(); + const unsigned lclNum = indirAddrLcl->GetLclNum(); + + JITDUMP("... right form, V%02u\n", lclNum); + + if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + { + JITDUMP("... but not invariant\n"); + return WALK_CONTINUE; + } + + // Looks like we found an invariant type test. + // + JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u ... ", info->loopNum, dspTreeID(tree), lclNum); + + // We only want this type test to inspire cloning if + // + // (1) we have profile data + // (2) the loop iterates frequently each time the method is called + // (3) the type test is frequently hit during the loop iteration + // (4) the type test is biased and highly likely to succeed + // + const LoopDsc& loopDsc = optLoopTable[info->loopNum]; + BasicBlock* const loopEntry = loopDsc.lpEntry; + BasicBlock* const typeTestBlock = compCurBB; + double const loopFrequency = 0.50; + double const typeTestFrequency = 0.50; + double const typeTestBias = 0.05; + + // Check for (1) + // + if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) + { + JITDUMP(" but loop does not have profile data.\n"); + return WALK_CONTINUE; + } + + // Check for (2) + // + if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) + { + JITDUMP(" but loop does not iterate often enough.\n"); + return WALK_CONTINUE; + } + + // Check for (3) + // + if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + { + JITDUMP(" but type test does not execute often enough within the loop.\n"); + return WALK_CONTINUE; + } + + // Check for (4) + // + BasicBlock* const hotSuccessor = relop->OperIs(GT_EQ) ? typeTestBlock->bbJumpDest : typeTestBlock->bbNext; + BasicBlock* const coldSuccessor = relop->OperIs(GT_EQ) ? typeTestBlock->bbNext : typeTestBlock->bbJumpDest; + + if (!hotSuccessor->hasProfileWeight() || !coldSuccessor->hasProfileWeight()) + { + JITDUMP(" but type test successor blocks were not profiled.\n"); + return WALK_CONTINUE; + } + + if (hotSuccessor->bbWeight == BB_ZERO_WEIGHT) + { + JITDUMP(" but hot successor block " FMT_BB " is rarely run.\n", hotSuccessor->bbNum); + return WALK_CONTINUE; + } + + if (coldSuccessor->bbWeight > BB_ZERO_WEIGHT) + { + const weight_t bias = coldSuccessor->bbWeight / (hotSuccessor->bbWeight + coldSuccessor->bbWeight); + + if (bias > typeTestBias) + { + JITDUMP(" but type test not sufficiently biased: failure likelihood is " FMT_WT " > " FMT_WT "\n", bias, + typeTestBias); + return WALK_CONTINUE; + } + } + + JITDUMP(" passed profile screening\n"); + + // Update the loop context. + // + info->context->EnsureLoopOptInfo(info->loopNum) + ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, relopOp2->AsIntConCommon()->IconValue())); } + return WALK_CONTINUE; } @@ -2596,11 +2866,28 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pT // bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context) { - JITDUMP("Checking loop " FMT_LP " for optimization candidates\n", loopNum); - const LoopDsc& loop = optLoopTable[loopNum]; + const bool canCloneForArrayBounds = + ((optMethodFlags & OMF_HAS_ARRAYREF) != 0) && ((loop.lpFlags & LPFLG_ITER) != 0); + const bool canCloneForTypeTests = ((optMethodFlags & OMF_HAS_GUARDEDDEVIRT) != 0); + + if (!canCloneForArrayBounds && !canCloneForTypeTests) + { + JITDUMP("Not checking loop " FMT_LP " -- no array bounds or type tests in this method\n", loopNum); + return false; + } + + bool shouldCloneForArrayBounds = canCloneForArrayBounds; + bool shouldCloneForTypeTests = canCloneForTypeTests; + +#ifdef DEBUG + shouldCloneForTypeTests &= JitConfig.JitCloneLoopsWithTypeTests() != 0; +#endif + + JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loopNum, + shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForTypeTests ? " (type tests)" : ""); - LoopCloneVisitorInfo info(context, loopNum, nullptr); + LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForTypeTests); for (BasicBlock* const block : loop.LoopBlocks()) { compCurBB = block; diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 13bd2fa5e090c..d174952c55d2a 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -316,6 +316,20 @@ struct LcJaggedArrayOptInfo : public LcOptInfo } }; +// Optimization info for a type test +// +struct LcTypeTestOptInfo : public LcOptInfo +{ + // local whose method table is tested + unsigned lclNum; + // handle being tested for + ssize_t clsHnd; + + LcTypeTestOptInfo(unsigned lclNum, ssize_t clsHnd) : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd) + { + } +}; + /** * * Symbolic representation of a.length, or a[i][j].length or a[i,j].length and so on. @@ -421,10 +435,12 @@ struct LC_Ident Var, ArrLen, Null, + ClassHandle, + Indir, }; LC_Array arrLen; // The LC_Array if the type is "ArrLen" - unsigned constant; // The constant value if this node is of type "Const", or the lcl num if "Var" + ssize_t constant; // The constant value if this node is of type "Const", or the lcl num if "Var" IdentType type; // The type of this object // Equality operator @@ -434,6 +450,8 @@ struct LC_Ident { case Const: case Var: + case ClassHandle: + case Indir: return (type == that.type) && (constant == that.constant); case ArrLen: return (type == that.type) && (arrLen == that.arrLen); @@ -451,10 +469,16 @@ struct LC_Ident switch (type) { case Const: - printf("%u", constant); + printf("%u", (unsigned)constant); break; case Var: - printf("V%02d", constant); + printf("V%02d", (unsigned)constant); + break; + case Indir: + printf("*V%02d", (unsigned)constant); + break; + case ClassHandle: + printf("%p", constant); break; case ArrLen: arrLen.Print(); @@ -472,7 +496,7 @@ struct LC_Ident LC_Ident() : type(Invalid) { } - LC_Ident(unsigned constant, IdentType type) : constant(constant), type(type) + LC_Ident(ssize_t constant, IdentType type) : constant(constant), type(type) { } explicit LC_Ident(IdentType type) : type(type) @@ -597,24 +621,24 @@ struct LC_Condition * i => {} * } */ -struct LC_Deref +struct LC_ArrayDeref { - const LC_Array array; - JitExpandArrayStack* children; + const LC_Array array; + JitExpandArrayStack* children; unsigned level; - LC_Deref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level) + LC_ArrayDeref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level) { } - LC_Deref* Find(unsigned lcl); + LC_ArrayDeref* Find(unsigned lcl); unsigned Lcl(); bool HasChildren(); void EnsureChildren(CompAllocator alloc); - static LC_Deref* Find(JitExpandArrayStack* children, unsigned lcl); + static LC_ArrayDeref* Find(JitExpandArrayStack* children, unsigned lcl); void DeriveLevelConditions(JitExpandArrayStack*>* len); @@ -635,7 +659,7 @@ struct LC_Deref #ifdef _MSC_VER (*children)[i]->Print(indent + 1); #else // _MSC_VER - (*((JitExpandArray*)children))[i]->Print(indent + 1); + (*((JitExpandArray*)children))[i]->Print(indent + 1); #endif // _MSC_VER } } @@ -669,18 +693,22 @@ struct LoopCloneContext // The array of conditions that influence which path to take for each loop. (loop x cloning-conditions) jitstd::vector*> conditions; - // The array of dereference conditions found in each loop. (loop x deref-conditions) - jitstd::vector*> derefs; + // The array of array dereference conditions found in each loop. (loop x deref-conditions) + jitstd::vector*> arrayDerefs; + + // The array of object dereference conditions found in each loop. + jitstd::vector*> objDerefs; // The array of block levels of conditions for each loop. (loop x level x conditions) jitstd::vector*>*> blockConditions; LoopCloneContext(unsigned loopCount, CompAllocator alloc) - : alloc(alloc), optInfo(alloc), conditions(alloc), derefs(alloc), blockConditions(alloc) + : alloc(alloc), optInfo(alloc), conditions(alloc), arrayDerefs(alloc), objDerefs(alloc), blockConditions(alloc) { optInfo.resize(loopCount, nullptr); conditions.resize(loopCount, nullptr); - derefs.resize(loopCount, nullptr); + arrayDerefs.resize(loopCount, nullptr); + objDerefs.resize(loopCount, nullptr); blockConditions.resize(loopCount, nullptr); } @@ -709,8 +737,11 @@ struct LoopCloneContext // Get the conditions for loop. No allocation is performed. JitExpandArrayStack* GetConditions(unsigned loopNum); - // Ensure that the "deref" conditions array is allocated. - JitExpandArrayStack* EnsureDerefs(unsigned loopNum); + // Ensure that the array "deref" conditions array is allocated. + JitExpandArrayStack* EnsureArrayDerefs(unsigned loopNum); + + // Ensure that the obj "deref" conditions array is allocated. + JitExpandArrayStack* EnsureObjDerefs(unsigned loopNum); // Get block conditions for each loop, no allocation is performed. JitExpandArrayStack*>* GetBlockConditions(unsigned loopNum); diff --git a/src/coreclr/jit/loopcloningopts.h b/src/coreclr/jit/loopcloningopts.h index f17b915c830a6..2df5e7baf63b5 100644 --- a/src/coreclr/jit/loopcloningopts.h +++ b/src/coreclr/jit/loopcloningopts.h @@ -11,5 +11,6 @@ // Types of Loop Cloning based optimizations. LC_OPT(LcMdArray) LC_OPT(LcJaggedArray) +LC_OPT(LcTypeTest) #undef LC_OPT diff --git a/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.cs b/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.cs new file mode 100644 index 0000000000000..8aa2bb14457fd --- /dev/null +++ b/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.cs @@ -0,0 +1,48 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// PGO enables an invariant GDV type test in a loop. +// We then clone the loop based on this test. +// +// COMPlus_TieredPGO=1 + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Threading; + +class CloningForIEnumerable +{ + [MethodImpl(MethodImplOptions.NoInlining)] + public static int Sum(IEnumerable e) + { + int r = 0; + foreach(int i in e) + { + r += i; + } + return r; + } + + public static int Main() + { + List list = new List { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + + int r = 0; + + for (int i = 0; i < 30; i++) + { + r += Sum(list); + Thread.Sleep(15); + } + + Thread.Sleep(50); + + for (int i = 0; i < 70; i++) + { + r += Sum(list); + } + + return r - 5400; + } +} diff --git a/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.csproj b/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.csproj new file mode 100644 index 0000000000000..6630219f047cd --- /dev/null +++ b/src/tests/JIT/opt/GuardedDevirtualization/enumerablecloning.csproj @@ -0,0 +1,22 @@ + + + Exe + + True + + + + + + + + + diff --git a/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.cs b/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.cs new file mode 100644 index 0000000000000..4795cf91fd81b --- /dev/null +++ b/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.cs @@ -0,0 +1,84 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// PGO enables an invariant GDV type test in a loop. +// We then clone the loop based on this test. +// +// COMPlus_TieredPGO=1 + +using System; +using System.Runtime.CompilerServices; +using System.Threading; + +interface I +{ + public int F(int x, int y); +} + +class Add : I +{ + int I.F(int x, int y) => x + y; +} + +class Mul : I +{ + int I.F(int x, int y) => x * y; +} + +class CloningForTypeTests +{ + [MethodImpl(MethodImplOptions.NoInlining)] + public static int BothTypeAndArray(I m, int[] xs, int[] ys, int from, int to) + { + int r = 0; + + // Cloning conditions for this loop should include a type test + // + for (int i = from; i < to; i++) + { + r += m.F(xs[i], ys[i]); + } + return r; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static int JustType(I m, int from, int to) + { + int r = 0; + + // Cloning conditions for this loop should only include a type test + // + for (int i = from; i < to; i++) + { + r += m.F(1, 1); + } + return r; + } + + public static int Main() + { + int[] xs = new int[] { 1, 2, 3, 4 }; + int[] ys = new int[] { 4, 3, 2, 1 }; + I m = new Add(); + + int r0 = 0; + int r1 = 0; + + for (int i = 0; i < 30; i++) + { + r0 += BothTypeAndArray(m, xs, ys, 0, 3); + r1 += JustType(m, 0, 3); + Thread.Sleep(15); + } + + Thread.Sleep(50); + + for (int i = 0; i < 70; i++) + { + r0 += BothTypeAndArray(m, xs, ys, 0, 3); + r1 += JustType(m, 0, 3); + } + + return (r0 + r1) / 21; + } +} diff --git a/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.csproj b/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.csproj new file mode 100644 index 0000000000000..6630219f047cd --- /dev/null +++ b/src/tests/JIT/opt/GuardedDevirtualization/typetestcloning.csproj @@ -0,0 +1,22 @@ + + + Exe + + True + + + + + + + + + From 12b5d5b5a4c83805e2c3329f0535463c0d10e1f0 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 7 Jun 2022 13:29:12 -0700 Subject: [PATCH 2/2] review feedback --- src/coreclr/jit/loopcloning.cpp | 21 ++++---- src/coreclr/jit/loopcloning.h | 88 ++++++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index bb31ae845fda3..459f577f02dbf 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -129,16 +129,16 @@ GenTree* LC_Ident::ToGenTree(Compiler* comp, BasicBlock* bb) assert(constant <= INT32_MAX); return comp->gtNewIconNode(constant); case Var: - return comp->gtNewLclvNode((unsigned)constant, comp->lvaTable[constant].lvType); + return comp->gtNewLclvNode(lclNum, comp->lvaTable[lclNum].lvType); case ArrLen: return arrLen.ToGenTree(comp, bb); case Null: return comp->gtNewIconNode(0, TYP_REF); case ClassHandle: - return comp->gtNewIconHandleNode((size_t)constant, GTF_ICON_CLASS_HDL); + return comp->gtNewIconHandleNode((size_t)clsHnd, GTF_ICON_CLASS_HDL); case Indir: { - GenTree* indir = comp->gtNewIndir(TYP_I_IMPL, comp->gtNewLclvNode((unsigned)constant, TYP_REF)); + GenTree* const indir = comp->gtNewIndir(TYP_I_IMPL, comp->gtNewLclvNode(lclNum, TYP_REF)); indir->gtFlags |= GTF_IND_INVARIANT; return indir; } @@ -1153,13 +1153,13 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext if (isIncreasingLoop) { geZero = - LC_Condition(GT_GE, LC_Expr(LC_Ident(initLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0, LC_Ident::Const))); + LC_Condition(GT_GE, LC_Expr(LC_Ident(initLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0u, LC_Ident::Const))); } else { // For decreasing loop, the init value needs to be checked against the array length ident = LC_Ident(initLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0u, LC_Ident::Const))); } context->EnsureConditions(loopNum)->Push(geZero); } @@ -1194,12 +1194,12 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext { // For increasing loop, thelimit value needs to be checked against the array length ident = LC_Ident(limitLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const))); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0u, LC_Ident::Const))); } else { geZero = - LC_Condition(GT_GE, LC_Expr(LC_Ident(limitLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0, LC_Ident::Const))); + LC_Condition(GT_GE, LC_Expr(LC_Ident(limitLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0u, LC_Ident::Const))); } context->EnsureConditions(loopNum)->Push(geZero); @@ -1509,7 +1509,7 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con // ObjDeref array has indir(lcl), we want lcl. // LC_Ident& mtIndirIdent = (*objDeref)[i]; - LC_Ident ident(mtIndirIdent.constant, LC_Ident::Var); + LC_Ident ident(mtIndirIdent.LclNum(), LC_Ident::Var); (*levelCond)[0]->Push(LC_Condition(GT_NE, LC_Expr(ident), LC_Expr(LC_Ident(LC_Ident::Null)))); } } @@ -2833,8 +2833,11 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop // Update the loop context. // + assert(relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)); + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); + info->context->EnsureLoopOptInfo(info->loopNum) - ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, relopOp2->AsIntConCommon()->IconValue())); + ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, clsHnd)); } return WALK_CONTINUE; diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index d174952c55d2a..460a88490ffbc 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -323,9 +323,10 @@ struct LcTypeTestOptInfo : public LcOptInfo // local whose method table is tested unsigned lclNum; // handle being tested for - ssize_t clsHnd; + CORINFO_CLASS_HANDLE clsHnd; - LcTypeTestOptInfo(unsigned lclNum, ssize_t clsHnd) : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd) + LcTypeTestOptInfo(unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd) + : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd) { } }; @@ -421,11 +422,11 @@ struct LC_Array GenTree* ToGenTree(Compiler* comp, BasicBlock* bb); }; -/** - * - * Symbolic representation of either a constant like 1 or 2, or a variable like V02 or V03, or an "LC_Array", - * or the null constant. - */ +//------------------------------------------------------------------------ +// LC_Ident: symbolic representation of either a constant like 1 or 2, +// or a variable like V02 or V03, or an "LC_Array", or the null constant, +// or a class handle, or an indir of a variable like *V02. +// struct LC_Ident { enum IdentType @@ -439,46 +440,67 @@ struct LC_Ident Indir, }; - LC_Array arrLen; // The LC_Array if the type is "ArrLen" - ssize_t constant; // The constant value if this node is of type "Const", or the lcl num if "Var" - IdentType type; // The type of this object +private: + union { + unsigned constant; + unsigned lclNum; + LC_Array arrLen; + CORINFO_CLASS_HANDLE clsHnd; + }; + +public: + // The type of this object + IdentType type; // Equality operator bool operator==(const LC_Ident& that) const { + if (type != that.type) + { + return false; + } + switch (type) { case Const: - case Var: + return (constant == that.constant); case ClassHandle: + return (clsHnd == that.clsHnd); + case Var: case Indir: - return (type == that.type) && (constant == that.constant); + return (lclNum == that.lclNum); case ArrLen: - return (type == that.type) && (arrLen == that.arrLen); + return (arrLen == that.arrLen); case Null: - return (type == that.type); + return true; default: assert(!"Unknown LC_Ident type"); unreached(); } } + unsigned LclNum() const + { + assert((type == Var) || (type == Indir)); + return lclNum; + } + #ifdef DEBUG void Print() { switch (type) { case Const: - printf("%u", (unsigned)constant); + printf("%u", constant); break; case Var: - printf("V%02d", (unsigned)constant); + printf("V%02u", lclNum); break; case Indir: - printf("*V%02d", (unsigned)constant); + printf("*V%02u", lclNum); break; case ClassHandle: - printf("%p", constant); + printf("%p", clsHnd); break; case ArrLen: arrLen.Print(); @@ -496,12 +518,40 @@ struct LC_Ident LC_Ident() : type(Invalid) { } - LC_Ident(ssize_t constant, IdentType type) : constant(constant), type(type) + + explicit LC_Ident(unsigned val, IdentType type) : type(type) + { + if (type == Const) + { + constant = val; + } + else if ((type == Var) || (type == Indir)) + { + lclNum = val; + } + else + { + unreached(); + } + } + + explicit LC_Ident(CORINFO_CLASS_HANDLE val, IdentType type) : type(type) { + if (type == ClassHandle) + { + clsHnd = val; + } + else + { + unreached(); + } } + explicit LC_Ident(IdentType type) : type(type) { + assert(type == Null); } + explicit LC_Ident(const LC_Array& arrLen) : arrLen(arrLen), type(ArrLen) { }