From 5a45ea0256a96486db825a3f3a5fc22ae6354789 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Mon, 19 Feb 2024 21:16:20 +0100 Subject: [PATCH] JIT: Remove fgExpandQmarkForCastInstOf (#98610) --- src/coreclr/jit/compiler.cpp | 14 +- src/coreclr/jit/compiler.h | 1 - src/coreclr/jit/gentree.cpp | 2 - src/coreclr/jit/gentree.h | 3 - src/coreclr/jit/helperexpansion.cpp | 30 +++- src/coreclr/jit/importer.cpp | 17 +- src/coreclr/jit/morph.cpp | 240 ---------------------------- 7 files changed, 38 insertions(+), 269 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index c351419687fbf..b0518aca33f79 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5042,6 +5042,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_STRESS_SPLIT_TREE, &Compiler::StressSplitTree); #endif + // Expand casts + DoPhase(this, PHASE_EXPAND_CASTS, &Compiler::fgLateCastExpansion); + // Expand runtime lookups (an optimization but we'd better run it in tier0 too) DoPhase(this, PHASE_EXPAND_RTLOOKUPS, &Compiler::fgExpandRuntimeLookups); @@ -5051,9 +5054,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Expand thread local access DoPhase(this, PHASE_EXPAND_TLS, &Compiler::fgExpandThreadLocalAccess); - // Expand casts - DoPhase(this, PHASE_EXPAND_CASTS, &Compiler::fgLateCastExpansion); - // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); @@ -9868,14 +9868,6 @@ JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree) } break; - case GT_QMARK: - - if (tree->gtFlags & GTF_QMARK_CAST_INSTOF) - { - chars += printf("[QMARK_CAST_INSTOF]"); - } - break; - case GT_BOX: if (tree->gtFlags & GTF_BOX_VALUE) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c850b8f76176a..10a47eb729fa0 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5348,7 +5348,6 @@ class Compiler Statement* fgNewStmtFromTree(GenTree* tree, const DebugInfo& di); GenTreeQmark* fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst = nullptr); - bool fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt); bool fgExpandQmarkStmt(BasicBlock* block, Statement* stmt); void fgExpandQmarkNodes(); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ad68efef307b7..0a220b02d5677 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -17259,8 +17259,6 @@ void Compiler::gtExtractSideEffList(GenTree* expr, colon->gtOp2 = (elseSideEffects != nullptr) ? elseSideEffects : m_compiler->gtNewNothingNode(); qmark->gtType = TYP_VOID; colon->gtType = TYP_VOID; - - qmark->gtFlags &= ~GTF_QMARK_CAST_INSTOF; Append(qmark); } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 1bd7ce27004fe..ff99a9e7202af 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -507,9 +507,6 @@ enum GenTreeFlags : unsigned int GTF_RET_MERGED = 0x80000000, // GT_RETURN -- This is a return generated during epilog merging. - GTF_QMARK_CAST_INSTOF = 0x80000000, // GT_QMARK -- Is this a top (not nested) level qmark created for - // castclass or instanceof? - GTF_BOX_CLONED = 0x40000000, // GT_BOX -- this box and its operand has been cloned, cannot assume it to be single-use anymore GTF_BOX_VALUE = 0x80000000, // GT_BOX -- "box" is on a value type diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 52b7d3d68cbce..39f7b8f09d276 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1930,11 +1930,23 @@ static int PickCandidatesForTypeCheck(Compiler* comp, CORINFO_CLASS_HANDLE castToCls = comp->gtGetHelperArgClassHandle(clsArg); if (castToCls == NO_CLASS_HANDLE) { - // We don't expect the constant handle to be CSE'd here because importer - // sets GTF_DONT_CSE for it. The only case when this arg is not a constant is RUNTIMELOOKUP - // TODO-InlineCast: we should be able to handle RUNTIMELOOKUP as well. - JITDUMP("clsArg is not a constant handle - bail out.\n"); - return 0; + // If we don't see the constant class handle, we still can speculatively expand it + // for castclass case (we'll just take the unknown tree as a type check tree) + switch (helper) + { + case CORINFO_HELP_CHKCASTCLASS: + case CORINFO_HELP_CHKCASTARRAY: + case CORINFO_HELP_CHKCASTANY: + likelihoods[0] = 50; // 50% speculative guess + candidates[0] = NO_CLASS_HANDLE; + return 1; + + default: + // Otherwise, bail out. We don't expect the constant handles to be CSE'd as they normally + // have GTF_DONT_CSE flag set on them for cast helpers. + // TODO-InlineCast: One missing case to handle is isinst against Class<_Canon> + return 0; + } } if ((objArg->gtFlags & GTF_ALL_EFFECT) != 0 && comp->lvaHaveManyLocals()) @@ -2349,11 +2361,15 @@ bool Compiler::fgLateCastExpansionForCall(BasicBlock** pBlock, Statement* stmt, BasicBlock* lastTypeCheckBb = nullcheckBb; for (int candidateId = 0; candidateId < numOfCandidates; candidateId++) { - GenTree* expectedClsNode = gtNewIconEmbClsHndNode(expectedExactClasses[candidateId]); - GenTree* storeCseVal = nullptr; + const CORINFO_CLASS_HANDLE expectedCls = expectedExactClasses[candidateId]; + // if expectedCls is NO_CLASS_HANDLE, it means we should just use the original clsArg + GenTree* expectedClsNode = expectedCls != NO_CLASS_HANDLE + ? gtNewIconEmbClsHndNode(expectedCls) + : gtCloneExpr(call->gtArgs.GetUserArgByIndex(0)->GetNode()); // Manually CSE the expectedClsNode for first type check if it's the same as the original clsArg // TODO-InlineCast: consider not doing this if the helper call is cold + GenTree* storeCseVal = nullptr; if (candidateId == 0) { GenTree*& castArg = call->gtArgs.GetUserArgByIndex(0)->LateNodeRef(); diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 196054a044a50..b325b2ea49180 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -5515,11 +5515,19 @@ GenTree* Compiler::impCastClassOrIsInstToTree( bool expandInline = canExpandInline && shouldExpandInline; - if (op2->IsIconHandle(GTF_ICON_CLASS_HDL) && (helper != CORINFO_HELP_ISINSTANCEOFCLASS || !isClassExact)) + if ((helper == CORINFO_HELP_ISINSTANCEOFCLASS) && isClassExact) { - // TODO-InlineCast: move these to the late cast expansion phase as well: - // 1) isinst - // 2) op2 being GT_RUNTIMELOOKUP + // TODO-InlineCast: isinst against exact class + // It's already supported by the late cast expansion phase, but + // produces unexpected size regressions in some cases. + } + else if (!isCastClass && !op2->IsIconHandle(GTF_ICON_CLASS_HDL)) + { + // TODO-InlineCast: isinst against Class<_Canon> + } + else + { + // Expand later expandInline = false; } @@ -5666,7 +5674,6 @@ GenTree* Compiler::impCastClassOrIsInstToTree( // temp = new (this, GT_COLON) GenTreeColon(TYP_REF, reversedMTCheck ? gtNewNull() : gtClone(op1), qmarkMT); qmarkNull = gtNewQmarkNode(TYP_REF, condNull, temp->AsColon()); - qmarkNull->gtFlags |= GTF_QMARK_CAST_INSTOF; // Make QMark node a top level node by spilling it. unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling QMark2")); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 0c44bbad973ea..49052cbf9bfd6 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14505,241 +14505,6 @@ GenTreeQmark* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = N return topQmark; } -//------------------------------------------------------------------------ -// fgExpandQmarkForCastInstOf: expand qmark for cast -// -// Arguments: -// block - block containing the qmark -// stmt - statement containing the qmark -// -// Returns: -// true if the expansion introduced a throwing block -// -// Notes: -// -// For a castclass helper call, importer creates the following tree: -// tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper()); -// -// This method splits the qmark expression created by the importer into the -// following blocks: (block, asg, cond1, cond2, helper, remainder). -// Notice that op1 is the result for both the conditions. So we coalesce these -// assignments into a single block instead of two blocks resulting a nested diamond. -// -// +---------->-----------+ -// | | | -// ^ ^ v -// | | | -// block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder -// -// We expect to achieve the following codegen: -// mov rsi, rdx tmp2 = op1 // asgBlock -// test rsi, rsi goto skip if tmp2 == null ? // cond1Block -// je SKIP -// mov rcx, 0x76543210 cns = op2 // cond2Block -// cmp qword ptr [rsi], rcx goto skip if *tmp2 == op2 -// je SKIP -// call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp2 = helper(cns, tmp2) // helperBlock -// mov rsi, rax -// SKIP: // remainderBlock -// mov rdi, rsi tmp = tmp2 -// tmp has the result. -// -// Note that we can't use `tmp` during the computation of the result: we must create a new temp, -// and only assign `tmp` to the final value. This is because `tmp` may already have been annotated -// via lvaSetClass/lvaUpdateClass as having a known type. This is only true after the full expansion, -// where any other type gets converted to null. If we used `tmp` during the expansion, then it would -// appear to subsequent optimizations that cond2Block (where the type is checked) is unnecessary. -// -bool Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt) -{ -#ifdef DEBUG - if (verbose) - { - printf("\nExpanding CastInstOf qmark in " FMT_BB " (before)\n", block->bbNum); - fgDispBasicBlocks(block, block, true); - } -#endif // DEBUG - - bool introducedThrow = false; - GenTree* expr = stmt->GetRootNode(); - - GenTree* dst = nullptr; - GenTreeQmark* qmark = fgGetTopLevelQmark(expr, &dst); - - noway_assert(dst != nullptr); - assert(dst->OperIsLocalStore()); - assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF); - - // Get cond, true, false exprs for the qmark. - GenTree* condExpr = qmark->gtGetOp1(); - GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode(); - GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode(); - - // Get cond, true, false exprs for the nested qmark. - GenTree* nestedQmark = falseExpr; - GenTree* cond2Expr; - GenTree* true2Expr; - GenTree* false2Expr; - - unsigned nestedQmarkElseLikelihood = 50; - if (nestedQmark->gtOper == GT_QMARK) - { - cond2Expr = nestedQmark->gtGetOp1(); - true2Expr = nestedQmark->gtGetOp2()->AsColon()->ThenNode(); - false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode(); - nestedQmarkElseLikelihood = nestedQmark->AsQmark()->ElseNodeLikelihood(); - } - else - { - // This is a rare case that arises when we are doing minopts and encounter isinst of null. - // gtFoldExpr was still able to optimize away part of the tree (but not all). - // That means it does not match our pattern. - // - // Rather than write code to handle this case, just fake up some nodes to make it match the common - // case. Synthesize a comparison that is always true, and for the result-on-true, use the - // entire subtree we expected to be the nested question op. - - cond2Expr = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL)); - true2Expr = nestedQmark; - false2Expr = gtNewIconNode(0, TYP_I_IMPL); - } - assert(false2Expr->OperGet() == trueExpr->OperGet()); - - // Create the chain of blocks. See method header comment. - // The order of blocks after this is the following: - // block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock - // - // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock', - // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this - // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely - // remainderBlock will still be GC safe. - BasicBlockFlags propagateFlags = block->GetFlagsRaw() & BBF_GC_SAFE_POINT; - BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt); - fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock. - - BasicBlock* helperBlock = fgNewBBafter(BBJ_ALWAYS, block, true, block->Next()); - BasicBlock* cond2Block = fgNewBBafter(BBJ_COND, block, true, remainderBlock); - BasicBlock* cond1Block = fgNewBBafter(BBJ_COND, block, true, remainderBlock); - BasicBlock* asgBlock = fgNewBBafter(BBJ_ALWAYS, block, true, block->Next()); - - block->RemoveFlags(BBF_NEEDS_GCPOLL); - remainderBlock->SetFlags(propagateFlags); - helperBlock->SetFlags(BBF_NONE_QUIRK); - asgBlock->SetFlags(BBF_NONE_QUIRK); - - // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). - // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. - if (!block->HasFlag(BBF_INTERNAL)) - { - helperBlock->RemoveFlags(BBF_INTERNAL); - cond2Block->RemoveFlags(BBF_INTERNAL); - cond1Block->RemoveFlags(BBF_INTERNAL); - asgBlock->RemoveFlags(BBF_INTERNAL); - helperBlock->SetFlags(BBF_IMPORTED); - cond2Block->SetFlags(BBF_IMPORTED); - cond1Block->SetFlags(BBF_IMPORTED); - asgBlock->SetFlags(BBF_IMPORTED); - } - - // Chain the flow correctly. - assert(block->KindIs(BBJ_ALWAYS)); - block->SetTarget(asgBlock); - fgAddRefPred(asgBlock, block); - fgAddRefPred(cond1Block, asgBlock); - fgAddRefPred(remainderBlock, helperBlock); - - cond1Block->SetFalseTarget(cond2Block); - cond2Block->SetFalseTarget(helperBlock); - fgAddRefPred(cond2Block, cond1Block); - fgAddRefPred(helperBlock, cond2Block); - fgAddRefPred(remainderBlock, cond1Block); - fgAddRefPred(remainderBlock, cond2Block); - - // Set the weights; some are guesses. - asgBlock->inheritWeight(block); - cond1Block->inheritWeight(block); - - // We only have likelihood for the fast path (and fallback), but we don't know - // how often we have null in the root QMARK (although, we might be able to guess it too) - // so leave 50/50 for now. Thus, we have: - // - // [weight 1.0] - // if (obj != null) - // { - // [weight 0.5] - // if (obj.GetType() == typeof(FastType)) - // { - // [weight 0.5 * ] - // } - // else - // { - // [weight 0.5 * <100 - likelihood of FastType>] - // } - // } - // - cond2Block->inheritWeightPercentage(cond1Block, 50); - helperBlock->inheritWeightPercentage(cond2Block, nestedQmarkElseLikelihood); - - // Append cond1 as JTRUE to cond1Block - GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr); - Statement* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo()); - fgInsertStmtAtEnd(cond1Block, jmpStmt); - - // Append cond2 as JTRUE to cond2Block - jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr); - jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo()); - fgInsertStmtAtEnd(cond2Block, jmpStmt); - - unsigned tmp2 = lvaGrabTemp(false DEBUGARG("CastInstOf QMark result")); - lvaGetDesc(tmp2)->lvType = dst->TypeGet(); - - // AsgBlock should get tmp2 = op1. - GenTree* trueExprStore = gtNewStoreLclVarNode(tmp2, trueExpr)->AsLclVarCommon(); - Statement* trueStmt = fgNewStmtFromTree(trueExprStore, stmt->GetDebugInfo()); - fgInsertStmtAtEnd(asgBlock, trueStmt); - - // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper. - gtReverseCond(cond2Expr); - - if (true2Expr->OperIs(GT_CALL) && (true2Expr->AsCall()->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN)) - { - Statement* helperStmt = fgNewStmtFromTree(true2Expr, stmt->GetDebugInfo()); - fgInsertStmtAtEnd(helperBlock, helperStmt); - fgConvertBBToThrowBB(helperBlock); - setMethodHasNoReturnCalls(); - introducedThrow = true; - } - else - { - GenTree* helperExprStore = gtNewStoreLclVarNode(tmp2, true2Expr)->AsLclVarCommon(); - Statement* helperStmt = fgNewStmtFromTree(helperExprStore, stmt->GetDebugInfo()); - fgInsertStmtAtEnd(helperBlock, helperStmt); - } - - // RemainderBlock should get tmp = tmp2. - GenTree* tmp2CopyLcl = gtNewLclvNode(tmp2, dst->TypeGet()); - unsigned dstLclNum = dst->AsLclVarCommon()->GetLclNum(); - GenTree* resultCopy = - dst->OperIs(GT_STORE_LCL_FLD) - ? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(), dst->AsLclFld()->GetLclOffs(), tmp2CopyLcl) - : gtNewStoreLclVarNode(dstLclNum, tmp2CopyLcl)->AsLclVarCommon(); - Statement* resultCopyStmt = fgNewStmtFromTree(resultCopy, stmt->GetDebugInfo()); - fgInsertStmtAtBeg(remainderBlock, resultCopyStmt); - - // Finally remove the nested qmark stmt. - fgRemoveStmt(block, stmt); - -#ifdef DEBUG - if (verbose) - { - printf("\nExpanding CastInstOf qmark in " FMT_BB " (after)\n", block->bbNum); - fgDispBasicBlocks(block, remainderBlock, true); - } -#endif // DEBUG - - return introducedThrow; -} - //------------------------------------------------------------------------ // fgExpandQmarkStmt: expand a qmark into control flow // @@ -14813,11 +14578,6 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) return false; } - if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF) - { - return fgExpandQmarkForCastInstOf(block, stmt); - } - #ifdef DEBUG if (verbose) {