From 38dbe8bf5317cbe85cfeea0d64b08f9c26a72096 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 14 Apr 2022 15:40:01 -0700 Subject: [PATCH 01/25] first working version --- src/coreclr/jit/compiler.h | 12 +++-- src/coreclr/jit/optimizer.cpp | 87 +++++++++++++++++++++++++++++++---- 2 files changed, 86 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 13a7791b18d6e..2427b8b2d3393 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5969,18 +5969,24 @@ class Compiler // Do hoisting for loop "lnum" (an index into the optLoopTable), and all loops nested within it. // Tracks the expressions that have been hoisted by containing loops by temporarily recording their // value numbers in "m_hoistedInParentLoops". This set is not modified by the call. - void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); + void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack* newPreHeaders); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) // Assumes that expressions have been hoisted in containing loops if their value numbers are in // "m_hoistedInParentLoops". // - void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt); + void optHoistThisLoop(unsigned lnum, + LoopHoistContext* hoistCtxt, + ArrayStack* existingPreHeaders, + ArrayStack* newPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) // outside of that loop. Exempt expressions whose value number is in "m_hoistedInParentLoops"; add VN's of hoisted // expressions to "hoistInLoop". - void optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext); + void optHoistLoopBlocks(unsigned loopNum, + ArrayStack* blocks, + ArrayStack* newPreHeaders, + LoopHoistContext* hoistContext); // Return true if the tree looks profitable to hoist out of loop 'lnum'. bool optIsProfitableToHoistTree(GenTree* tree, unsigned lnum); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index b848d6400bca8..d344cb048712d 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6200,7 +6200,8 @@ void Compiler::optHoistLoopCode() if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) { - optHoistLoopNest(lnum, &hoistCtxt); + ArrayStack abc(getAllocatorLoopHoist()); + optHoistLoopNest(lnum, &hoistCtxt, &abc); } } @@ -6250,8 +6251,11 @@ void Compiler::optHoistLoopCode() #endif // DEBUG } -void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) +void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack* preHeadersList) { + // BlockSet newPreHeaders(BlockSetOps::MakeEmpty(this)); + // ArrayStack preHeadersList(getAllocatorLoopHoist()); + // Do this loop, then recursively do all nested loops. JITDUMP("\n%s " FMT_LP "\n", optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP ? "Loop Nest" : "Nested Loop", lnum); @@ -6262,8 +6266,6 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) m_loopsConsidered++; #endif // LOOP_HOIST_STATS - optHoistThisLoop(lnum, hoistCtxt); - VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) @@ -6282,10 +6284,22 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) } } + // TODO: Have BlockSet newPreHeaders(BlockSetOps::MakeEmpty(this)); so we don't add duplicate blocks + // in newPreHeaders. + // BlockSet newPreHeaders(BlockSetOps::MakeEmpty(this)); + // BlockSetOps::Add + for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { - optHoistLoopNest(child, hoistCtxt); + ArrayStack preHeadersListForCurrLoop(getAllocatorLoopHoist()); + optHoistLoopNest(child, hoistCtxt, &preHeadersListForCurrLoop); + + while (!preHeadersListForCurrLoop.Empty()) + { + BasicBlock* block = preHeadersListForCurrLoop.Pop(); + preHeadersList->Push(block); + } } // Now remove them. @@ -6299,9 +6313,19 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) } } } + + ArrayStack preHeadersListForCurrLoop(getAllocatorLoopHoist()); + + optHoistThisLoop(lnum, hoistCtxt, preHeadersList, &preHeadersListForCurrLoop); + + // Reset the preHeaders list that we found out from this loop. + *preHeadersList = preHeadersListForCurrLoop; } -void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) +void Compiler::optHoistThisLoop(unsigned lnum, + LoopHoistContext* hoistCtxt, + ArrayStack* existingPreHeaders, + ArrayStack* newPreHeaders) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; @@ -6420,6 +6444,12 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) // hoisting inner to outer. // ArrayStack defExec(getAllocatorLoopHoist()); + + while (!existingPreHeaders->Empty()) + { + defExec.Push(existingPreHeaders->Pop()); + } + if (pLoopDsc->lpExitCnt == 1) { assert(pLoopDsc->lpExit != nullptr); @@ -6449,7 +6479,16 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) defExec.Push(pLoopDsc->lpEntry); } - optHoistLoopBlocks(lnum, &defExec, hoistCtxt); + /* for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + if (BlockSetOps::IsMember(this, newPreHeaders, block->bbNum)) + { + defExec.Push(block); + + } + }*/ + + optHoistLoopBlocks(lnum, &defExec, newPreHeaders, hoistCtxt); } bool Compiler::optIsProfitableToHoistTree(GenTree* tree, unsigned lnum) @@ -6690,7 +6729,10 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // the loop, in the execution order, starting with the loop entry // block on top of the stack. // -void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext) +void Compiler::optHoistLoopBlocks(unsigned loopNum, + ArrayStack* blocks, + ArrayStack* newPreHeaders, + LoopHoistContext* hoistContext) { class HoistVisitor : public GenTreeVisitor { @@ -6725,6 +6767,8 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo unsigned m_loopNum; LoopHoistContext* m_hoistContext; BasicBlock* m_currentBlock; + // BlockSet newPreHeaders; + ArrayStack* preHeadersList; bool IsNodeHoistable(GenTree* node) { @@ -6820,13 +6864,19 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo UseExecutionOrder = true, }; - HoistVisitor(Compiler* compiler, unsigned loopNum, LoopHoistContext* hoistContext) + HoistVisitor(Compiler* compiler, + unsigned loopNum, + LoopHoistContext* hoistContext, + ArrayStack* newPreHeaders) : GenTreeVisitor(compiler) , m_valueStack(compiler->getAllocator(CMK_LoopHoist)) , m_beforeSideEffect(true) , m_loopNum(loopNum) , m_hoistContext(hoistContext) , m_currentBlock(nullptr) + //, newPreHeaders(BlockSetOps::MakeEmpty(compiler)) + , preHeadersList(newPreHeaders) + { } @@ -6842,6 +6892,11 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo if (top.m_hoistable) { m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loopNum, m_hoistContext); + + if ((m_compiler->optLoopTable[m_loopNum].lpFlags & LPFLG_HAS_PREHEAD) != 0) + { + preHeadersList->Push(m_compiler->optLoopTable[m_loopNum].lpHead); + } } else { @@ -7187,6 +7242,11 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo value.m_invariant = false; m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); + + if ((m_compiler->optLoopTable[m_loopNum].lpFlags & LPFLG_HAS_PREHEAD) != 0) + { + preHeadersList->Push(m_compiler->optLoopTable[m_loopNum].lpHead); + } } else if (value.Node() != tree) { @@ -7213,12 +7273,17 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo return fgWalkResult::WALK_CONTINUE; } + + // ArrayStack* GetPreHeaders() + //{ + // return &preHeadersList; + //} }; LoopDsc* loopDsc = &optLoopTable[loopNum]; assert(blocks->Top() == loopDsc->lpEntry); - HoistVisitor visitor(this, loopNum, hoistContext); + HoistVisitor visitor(this, loopNum, hoistContext, newPreHeaders); while (!blocks->Empty()) { @@ -7236,6 +7301,8 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo visitor.HoistBlock(block); } + + // return visitor.GetPreHeaders(); } void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnum, LoopHoistContext* hoistCtxt) From f7dddf1bcf11674aad5b53a7923b9db98254087b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 14 Apr 2022 15:49:46 -0700 Subject: [PATCH 02/25] Skip check of VN hoisting --- src/coreclr/jit/optimizer.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index d344cb048712d..db6436dfb88c8 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -7323,12 +7323,14 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } - if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) - { - JITDUMP(" ... already hoisted same VN in current\n"); - // already hoisted this expression in the current loop, so don't hoist this expression. - return; - } + //TODO: Below code was applicable because we were first hoisting the outer loop and then the inner loop + // But if we start from inner and go outer, then we should continue hoisting things from this loop to the parent loop. + //if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) + //{ + // JITDUMP(" ... already hoisted same VN in current\n"); + // // already hoisted this expression in the current loop, so don't hoist this expression. + // return; + //} // Create a loop pre-header in which to put the hoisted code. fgCreateLoopPreHeader(lnum); @@ -7364,7 +7366,7 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu } // Record the hoisted expression in hoistCtxt - hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); + //hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); } bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* loopVnInvariantCache) From f89e0007336f3d5d2f7c56fe0eb3d6b187548aa5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 19 Apr 2022 17:09:14 -0700 Subject: [PATCH 03/25] Account for duplicate blocks --- src/coreclr/jit/compiler.h | 19 ++--- src/coreclr/jit/optimizer.cpp | 128 +++++++++++++++++++++------------- 2 files changed, 88 insertions(+), 59 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2427b8b2d3393..1dc428e0bdcf2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5969,24 +5969,25 @@ class Compiler // Do hoisting for loop "lnum" (an index into the optLoopTable), and all loops nested within it. // Tracks the expressions that have been hoisted by containing loops by temporarily recording their // value numbers in "m_hoistedInParentLoops". This set is not modified by the call. - void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack* newPreHeaders); + // Returns the list of basic blocks that were added as preheaders as part of hoisting the current loop. + ArrayStack optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); + // void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack* newPreHeaders); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) // Assumes that expressions have been hoisted in containing loops if their value numbers are in // "m_hoistedInParentLoops". // - void optHoistThisLoop(unsigned lnum, - LoopHoistContext* hoistCtxt, - ArrayStack* existingPreHeaders, - ArrayStack* newPreHeaders); + // Returns the new preheaders created + ArrayStack optHoistThisLoop(unsigned lnum, + LoopHoistContext* hoistCtxt, + ArrayStack existingPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) // outside of that loop. Exempt expressions whose value number is in "m_hoistedInParentLoops"; add VN's of hoisted // expressions to "hoistInLoop". - void optHoistLoopBlocks(unsigned loopNum, - ArrayStack* blocks, - ArrayStack* newPreHeaders, - LoopHoistContext* hoistContext); + ArrayStack optHoistLoopBlocks(unsigned loopNum, + ArrayStack* blocks, + LoopHoistContext* hoistContext); // Return true if the tree looks profitable to hoist out of loop 'lnum'. bool optIsProfitableToHoistTree(GenTree* tree, unsigned lnum); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index db6436dfb88c8..61fdb12da1296 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6200,8 +6200,8 @@ void Compiler::optHoistLoopCode() if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) { - ArrayStack abc(getAllocatorLoopHoist()); - optHoistLoopNest(lnum, &hoistCtxt, &abc); + // ArrayStack abc(getAllocatorLoopHoist()); + optHoistLoopNest(lnum, &hoistCtxt); } } @@ -6251,7 +6251,8 @@ void Compiler::optHoistLoopCode() #endif // DEBUG } -void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack* preHeadersList) +// void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack* preHeadersList) +ArrayStack Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) { // BlockSet newPreHeaders(BlockSetOps::MakeEmpty(this)); // ArrayStack preHeadersList(getAllocatorLoopHoist()); @@ -6266,7 +6267,8 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, Arra m_loopsConsidered++; #endif // LOOP_HOIST_STATS - VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); + VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); + ArrayStack preHeadersOfChildLoops(getAllocatorLoopHoist()); if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) { @@ -6284,21 +6286,31 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, Arra } } - // TODO: Have BlockSet newPreHeaders(BlockSetOps::MakeEmpty(this)); so we don't add duplicate blocks - // in newPreHeaders. - // BlockSet newPreHeaders(BlockSetOps::MakeEmpty(this)); - // BlockSetOps::Add + BitVecTraits m_visitedTraits(fgBBNumMax * 2, this); + BitVec m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)); for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { - ArrayStack preHeadersListForCurrLoop(getAllocatorLoopHoist()); - optHoistLoopNest(child, hoistCtxt, &preHeadersListForCurrLoop); + ArrayStack preHeadersForThisLoop = optHoistLoopNest(child, hoistCtxt); - while (!preHeadersListForCurrLoop.Empty()) + while (!preHeadersForThisLoop.Empty()) { - BasicBlock* block = preHeadersListForCurrLoop.Pop(); - preHeadersList->Push(block); + BasicBlock* preHeaderBlock = preHeadersForThisLoop.Pop(); + + if (BitVecTraits::GetSize(&m_visitedTraits) < preHeaderBlock->bbNum) + { + // We don't know how many blocks will be added and hence we would grow + // the bitset here. + m_visitedTraits = BitVecTraits(BitVecTraits::GetSize(&m_visitedTraits) * 2, this); + m_visited = BitVecOps::MakeCopy(&m_visitedTraits, m_visited); + } + + if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, preHeaderBlock->bbNum)) + { + BitVecOps::AddElemD(&m_visitedTraits, m_visited, preHeaderBlock->bbNum); + preHeadersOfChildLoops.Push(preHeaderBlock); + } } } @@ -6314,18 +6326,15 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, Arra } } - ArrayStack preHeadersListForCurrLoop(getAllocatorLoopHoist()); + return optHoistThisLoop(lnum, hoistCtxt, preHeadersOfChildLoops); - optHoistThisLoop(lnum, hoistCtxt, preHeadersList, &preHeadersListForCurrLoop); - - // Reset the preHeaders list that we found out from this loop. - *preHeadersList = preHeadersListForCurrLoop; + //// Reset the preHeaders list that we found out from this loop. + //*preHeadersList = preHeadersListForCurrLoop; } -void Compiler::optHoistThisLoop(unsigned lnum, - LoopHoistContext* hoistCtxt, - ArrayStack* existingPreHeaders, - ArrayStack* newPreHeaders) +ArrayStack Compiler::optHoistThisLoop(unsigned lnum, + LoopHoistContext* hoistCtxt, + ArrayStack existingPreHeaders) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; @@ -6334,7 +6343,7 @@ void Compiler::optHoistThisLoop(unsigned lnum, if (pLoopDsc->lpFlags & LPFLG_REMOVED) { JITDUMP(" ... not hoisting " FMT_LP ": removed\n", lnum); - return; + return ArrayStack(getAllocatorLoopHoist()); } // Ensure the per-loop sets/tables are empty. @@ -6445,9 +6454,9 @@ void Compiler::optHoistThisLoop(unsigned lnum, // ArrayStack defExec(getAllocatorLoopHoist()); - while (!existingPreHeaders->Empty()) + while (!existingPreHeaders.Empty()) { - defExec.Push(existingPreHeaders->Pop()); + defExec.Push(existingPreHeaders.Pop()); } if (pLoopDsc->lpExitCnt == 1) @@ -6488,7 +6497,7 @@ void Compiler::optHoistThisLoop(unsigned lnum, } }*/ - optHoistLoopBlocks(lnum, &defExec, newPreHeaders, hoistCtxt); + return optHoistLoopBlocks(lnum, &defExec, hoistCtxt); } bool Compiler::optIsProfitableToHoistTree(GenTree* tree, unsigned lnum) @@ -6729,10 +6738,9 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // the loop, in the execution order, starting with the loop entry // block on top of the stack. // -void Compiler::optHoistLoopBlocks(unsigned loopNum, - ArrayStack* blocks, - ArrayStack* newPreHeaders, - LoopHoistContext* hoistContext) +ArrayStack Compiler::optHoistLoopBlocks(unsigned loopNum, + ArrayStack* blocks, + LoopHoistContext* hoistContext) { class HoistVisitor : public GenTreeVisitor { @@ -6767,8 +6775,9 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, unsigned m_loopNum; LoopHoistContext* m_hoistContext; BasicBlock* m_currentBlock; - // BlockSet newPreHeaders; - ArrayStack* preHeadersList; + /*BitVecTraits m_visitedTraits; + BitVec m_visited;*/ + ArrayStack m_preHeadersList; bool IsNodeHoistable(GenTree* node) { @@ -6864,22 +6873,25 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, UseExecutionOrder = true, }; - HoistVisitor(Compiler* compiler, - unsigned loopNum, - LoopHoistContext* hoistContext, - ArrayStack* newPreHeaders) + HoistVisitor(Compiler* compiler, unsigned loopNum, LoopHoistContext* hoistContext) : GenTreeVisitor(compiler) , m_valueStack(compiler->getAllocator(CMK_LoopHoist)) , m_beforeSideEffect(true) , m_loopNum(loopNum) , m_hoistContext(hoistContext) , m_currentBlock(nullptr) - //, newPreHeaders(BlockSetOps::MakeEmpty(compiler)) - , preHeadersList(newPreHeaders) + /*, m_visitedTraits(compiler->fgBBNumMax * 2, compiler) + , m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)) */ + , m_preHeadersList(compiler->getAllocatorLoopHoist()) { } + ArrayStack GetNewPreHeaders() + { + return m_preHeadersList; + } + void HoistBlock(BasicBlock* block) { m_currentBlock = block; @@ -6892,10 +6904,14 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, if (top.m_hoistable) { m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loopNum, m_hoistContext); - if ((m_compiler->optLoopTable[m_loopNum].lpFlags & LPFLG_HAS_PREHEAD) != 0) { - preHeadersList->Push(m_compiler->optLoopTable[m_loopNum].lpHead); + BasicBlock* loopHead = m_compiler->optLoopTable[m_loopNum].lpHead; + /*if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, loopHead->bbNum)) + { + preHeadersList->Push(loopHead); + }*/ + m_preHeadersList.Push(loopHead); } } else @@ -7243,10 +7259,21 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); - if ((m_compiler->optLoopTable[m_loopNum].lpFlags & LPFLG_HAS_PREHEAD) != 0) - { - preHeadersList->Push(m_compiler->optLoopTable[m_loopNum].lpHead); - } + BasicBlock* loopHead = m_compiler->optLoopTable[m_loopNum].lpHead; + + // if (BitVecTraits::GetSize(&m_visitedTraits) < loopHead->bbNum) + //{ + // // We don't know how many blocks will be added and hence we would grow + // // the bitset here. + // m_visitedTraits = BitVecTraits(m_compiler->fgBBNumMax * 2, m_compiler); + // m_visited = BitVecOps::MakeCopy(&m_visitedTraits, m_visited); + //} + + // if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, loopHead->bbNum)) + //{ + // BitVecOps::AddElemD(&m_visitedTraits, m_visited, loopHead->bbNum); + m_preHeadersList.Push(loopHead); + //} } else if (value.Node() != tree) { @@ -7283,7 +7310,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, LoopDsc* loopDsc = &optLoopTable[loopNum]; assert(blocks->Top() == loopDsc->lpEntry); - HoistVisitor visitor(this, loopNum, hoistContext, newPreHeaders); + HoistVisitor visitor(this, loopNum, hoistContext); while (!blocks->Empty()) { @@ -7302,7 +7329,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, visitor.HoistBlock(block); } - // return visitor.GetPreHeaders(); + return visitor.GetNewPreHeaders(); } void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnum, LoopHoistContext* hoistCtxt) @@ -7323,9 +7350,10 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } - //TODO: Below code was applicable because we were first hoisting the outer loop and then the inner loop - // But if we start from inner and go outer, then we should continue hoisting things from this loop to the parent loop. - //if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) + // TODO: Below code was applicable because we were first hoisting the outer loop and then the inner loop + // But if we start from inner and go outer, then we should continue hoisting things from this loop to the parent + // loop. + // if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) //{ // JITDUMP(" ... already hoisted same VN in current\n"); // // already hoisted this expression in the current loop, so don't hoist this expression. @@ -7366,7 +7394,7 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu } // Record the hoisted expression in hoistCtxt - //hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); + // hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); } bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* loopVnInvariantCache) From 700592e7bc95390d7f81d4fd5f04a73a3c732ad1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 19 Apr 2022 17:40:28 -0700 Subject: [PATCH 04/25] clean up --- src/coreclr/jit/compiler.h | 4 +-- src/coreclr/jit/optimizer.cpp | 47 ++--------------------------------- 2 files changed, 4 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1dc428e0bdcf2..a297a326d0758 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5969,15 +5969,15 @@ class Compiler // Do hoisting for loop "lnum" (an index into the optLoopTable), and all loops nested within it. // Tracks the expressions that have been hoisted by containing loops by temporarily recording their // value numbers in "m_hoistedInParentLoops". This set is not modified by the call. + // // Returns the list of basic blocks that were added as preheaders as part of hoisting the current loop. ArrayStack optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); - // void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack* newPreHeaders); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) // Assumes that expressions have been hoisted in containing loops if their value numbers are in // "m_hoistedInParentLoops". // - // Returns the new preheaders created + // Returns the new preheaders created. ArrayStack optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack existingPreHeaders); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 61fdb12da1296..189b5e8719360 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6200,7 +6200,6 @@ void Compiler::optHoistLoopCode() if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) { - // ArrayStack abc(getAllocatorLoopHoist()); optHoistLoopNest(lnum, &hoistCtxt); } } @@ -6251,12 +6250,8 @@ void Compiler::optHoistLoopCode() #endif // DEBUG } -// void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt, ArrayStack* preHeadersList) ArrayStack Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) { - // BlockSet newPreHeaders(BlockSetOps::MakeEmpty(this)); - // ArrayStack preHeadersList(getAllocatorLoopHoist()); - // Do this loop, then recursively do all nested loops. JITDUMP("\n%s " FMT_LP "\n", optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP ? "Loop Nest" : "Nested Loop", lnum); @@ -6327,9 +6322,6 @@ ArrayStack Compiler::optHoistLoopNest(unsigned lnum, LoopHoistConte } return optHoistThisLoop(lnum, hoistCtxt, preHeadersOfChildLoops); - - //// Reset the preHeaders list that we found out from this loop. - //*preHeadersList = preHeadersListForCurrLoop; } ArrayStack Compiler::optHoistThisLoop(unsigned lnum, @@ -6488,15 +6480,6 @@ ArrayStack Compiler::optHoistThisLoop(unsigned lnum, defExec.Push(pLoopDsc->lpEntry); } - /* for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) - { - if (BlockSetOps::IsMember(this, newPreHeaders, block->bbNum)) - { - defExec.Push(block); - - } - }*/ - return optHoistLoopBlocks(lnum, &defExec, hoistCtxt); } @@ -6775,8 +6758,6 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo unsigned m_loopNum; LoopHoistContext* m_hoistContext; BasicBlock* m_currentBlock; - /*BitVecTraits m_visitedTraits; - BitVec m_visited;*/ ArrayStack m_preHeadersList; bool IsNodeHoistable(GenTree* node) @@ -6906,12 +6887,7 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loopNum, m_hoistContext); if ((m_compiler->optLoopTable[m_loopNum].lpFlags & LPFLG_HAS_PREHEAD) != 0) { - BasicBlock* loopHead = m_compiler->optLoopTable[m_loopNum].lpHead; - /*if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, loopHead->bbNum)) - { - preHeadersList->Push(loopHead); - }*/ - m_preHeadersList.Push(loopHead); + m_preHeadersList.Push(m_compiler->optLoopTable[m_loopNum].lpHead); } } else @@ -7258,22 +7234,7 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo value.m_invariant = false; m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); - - BasicBlock* loopHead = m_compiler->optLoopTable[m_loopNum].lpHead; - - // if (BitVecTraits::GetSize(&m_visitedTraits) < loopHead->bbNum) - //{ - // // We don't know how many blocks will be added and hence we would grow - // // the bitset here. - // m_visitedTraits = BitVecTraits(m_compiler->fgBBNumMax * 2, m_compiler); - // m_visited = BitVecOps::MakeCopy(&m_visitedTraits, m_visited); - //} - - // if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, loopHead->bbNum)) - //{ - // BitVecOps::AddElemD(&m_visitedTraits, m_visited, loopHead->bbNum); - m_preHeadersList.Push(loopHead); - //} + m_preHeadersList.Push(m_compiler->optLoopTable[m_loopNum].lpHead); } else if (value.Node() != tree) { @@ -7301,10 +7262,6 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo return fgWalkResult::WALK_CONTINUE; } - // ArrayStack* GetPreHeaders() - //{ - // return &preHeadersList; - //} }; LoopDsc* loopDsc = &optLoopTable[loopNum]; From 3c815e224849b26adc8dd5bf36023412ed04de5a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 22 Apr 2022 15:16:56 -0700 Subject: [PATCH 05/25] wip --- src/coreclr/jit/lsra.cpp | 5 +++++ src/coreclr/jit/optimizer.cpp | 9 ++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 38921927f2091..fd976f64b23a0 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4561,6 +4561,11 @@ void LinearScan::allocateRegisters() } } + //if (compiler->info.compMethodHashPrivate == 0x0e2a3254) + //{ + // compiler->verbose = true; + //} + #ifdef DEBUG if (VERBOSE) { diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 189b5e8719360..13df2156ae508 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6448,13 +6448,15 @@ ArrayStack Compiler::optHoistThisLoop(unsigned lnum, while (!existingPreHeaders.Empty()) { - defExec.Push(existingPreHeaders.Pop()); + BasicBlock* preHeaderBlock = existingPreHeaders.Pop(); + JITDUMP(" Considering hoisting in preheader block " FMT_BB " added for the nested loop \n", preHeaderBlock->bbNum); + defExec.Push(preHeaderBlock); } if (pLoopDsc->lpExitCnt == 1) { assert(pLoopDsc->lpExit != nullptr); - JITDUMP(" Only considering hoisting in blocks that dominate exit block " FMT_BB "\n", pLoopDsc->lpExit->bbNum); + JITDUMP(" Considering hoisting in blocks that dominate exit block " FMT_BB "\n", pLoopDsc->lpExit->bbNum); BasicBlock* cur = pLoopDsc->lpExit; // Push dominators, until we reach "entry" or exit the loop. while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry) @@ -6474,7 +6476,8 @@ ArrayStack Compiler::optHoistThisLoop(unsigned lnum, } else // More than one exit { - JITDUMP(" only considering hoisting in entry block " FMT_BB "\n", pLoopDsc->lpEntry->bbNum); + JITDUMP(" Considering hoisting in entry block " FMT_BB " because " FMT_LP " has more than one exit\n", + pLoopDsc->lpEntry->bbNum, lnum); // We'll assume that only the entry block is definitely executed. // We could in the future do better. defExec.Push(pLoopDsc->lpEntry); From 9c2a7c5b0f31a15ea4b5759df416cb995ea99e54 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 26 Apr 2022 23:02:52 -0700 Subject: [PATCH 06/25] isCommaTree && hasExcep --- src/coreclr/jit/optimizer.cpp | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 13df2156ae508..c978bb847567b 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6918,6 +6918,7 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { GenTree* tree = *use; + JITDUMP("----- PostOrderVisit for [%06u]\n", dspTreeID(tree)); if (tree->OperIsLocal()) { @@ -7224,6 +7225,9 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo // cctor dependent node is initially not hoistable and may become hoistable later, // when its parent comma node is visited. // + bool visitedCurr = false; + bool isCommaTree = tree->OperIs(GT_COMMA); + bool hasExcep = false; for (int i = 0; i < m_valueStack.Height(); i++) { Value& value = m_valueStack.BottomRef(i); @@ -7231,6 +7235,14 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo if (value.m_hoistable) { assert(value.Node() != tree); + JITDUMP(" [%06u]", dspTreeID(value.Node())); + + if (isCommaTree && hasExcep) + { + JITDUMP(" not hoistable: cannot move past node that throws exception.\n"); + continue; + } + JITDUMP(" hoistable\n"); // Don't hoist this tree again. value.m_hoistable = false; @@ -7241,9 +7253,21 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo } else if (value.Node() != tree) { + if (visitedCurr && isCommaTree) + { + // If we have visited current tree, now we are visiting children. + // For GT_COMMA nodes, we want to track if any children throws and + // should not hoist further children past it. + hasExcep = (tree->gtFlags & GTF_EXCEPT) != 0; + } JITDUMP(" [%06u] not %s: %s\n", dspTreeID(value.Node()), value.m_invariant ? "invariant" : "hoistable", value.m_failReason); } + else + { + visitedCurr = true; + JITDUMP(" [%06u] not hoistable : current node\n", dspTreeID(value.Node())); + } } } From a585bc21cdb87671893037f49ef5949ad7e65a9d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 27 Apr 2022 01:13:35 -0700 Subject: [PATCH 07/25] revert lsra changes --- src/coreclr/jit/lsra.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index fd976f64b23a0..38921927f2091 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4561,11 +4561,6 @@ void LinearScan::allocateRegisters() } } - //if (compiler->info.compMethodHashPrivate == 0x0e2a3254) - //{ - // compiler->verbose = true; - //} - #ifdef DEBUG if (VERBOSE) { From 27ef4cfd0fbb9361bf911c2039129ec2d70b0f48 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 27 Apr 2022 17:25:11 -0700 Subject: [PATCH 08/25] Update hoisting condition - Only update if node to be hoisted has side-effects and the sibling that is before that throws exception --- src/coreclr/jit/optimizer.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index c978bb847567b..733f9a2eaf96a 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -7237,10 +7237,15 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo assert(value.Node() != tree); JITDUMP(" [%06u]", dspTreeID(value.Node())); - if (isCommaTree && hasExcep) + if ((value.Node()->gtFlags & GTF_ALL_EFFECT) != 0) { - JITDUMP(" not hoistable: cannot move past node that throws exception.\n"); - continue; + // If the hoistable node has any side effects, make sure + // we don't hoist it past a sibling that throws any exception. + if (isCommaTree && hasExcep) + { + JITDUMP(" not hoistable: cannot move past node that throws exception.\n"); + continue; + } } JITDUMP(" hoistable\n"); From 5a51ee4772c3f22b7861c1b6e461b1ccacd672db Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 28 Apr 2022 07:57:40 -0700 Subject: [PATCH 09/25] Change to BasicBlockList --- src/coreclr/jit/compiler.h | 10 +-- src/coreclr/jit/optimizer.cpp | 159 +++++++++++++++++++++++++--------- 2 files changed, 122 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a297a326d0758..44f06a2644e44 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5969,23 +5969,23 @@ class Compiler // Do hoisting for loop "lnum" (an index into the optLoopTable), and all loops nested within it. // Tracks the expressions that have been hoisted by containing loops by temporarily recording their // value numbers in "m_hoistedInParentLoops". This set is not modified by the call. - // + // // Returns the list of basic blocks that were added as preheaders as part of hoisting the current loop. - ArrayStack optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); + BasicBlockList* optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) // Assumes that expressions have been hoisted in containing loops if their value numbers are in // "m_hoistedInParentLoops". // // Returns the new preheaders created. - ArrayStack optHoistThisLoop(unsigned lnum, + BasicBlockList* optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, - ArrayStack existingPreHeaders); + BasicBlockList* existingPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) // outside of that loop. Exempt expressions whose value number is in "m_hoistedInParentLoops"; add VN's of hoisted // expressions to "hoistInLoop". - ArrayStack optHoistLoopBlocks(unsigned loopNum, + BasicBlockList* optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 733f9a2eaf96a..07b3265030abc 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6250,7 +6250,7 @@ void Compiler::optHoistLoopCode() #endif // DEBUG } -ArrayStack Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) +BasicBlockList* Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) { // Do this loop, then recursively do all nested loops. JITDUMP("\n%s " FMT_LP "\n", optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP ? "Loop Nest" : "Nested Loop", @@ -6263,7 +6263,8 @@ ArrayStack Compiler::optHoistLoopNest(unsigned lnum, LoopHoistConte #endif // LOOP_HOIST_STATS VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); - ArrayStack preHeadersOfChildLoops(getAllocatorLoopHoist()); + BasicBlockList* preHeadersOfChildLoops = nullptr; + BasicBlockList* firstPreHeader = nullptr; if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) { @@ -6287,11 +6288,10 @@ ArrayStack Compiler::optHoistLoopNest(unsigned lnum, LoopHoistConte for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { - ArrayStack preHeadersForThisLoop = optHoistLoopNest(child, hoistCtxt); - - while (!preHeadersForThisLoop.Empty()) + BasicBlockList* preHeadersForThisLoop = optHoistLoopNest(child, hoistCtxt); + while (preHeadersForThisLoop != nullptr) { - BasicBlock* preHeaderBlock = preHeadersForThisLoop.Pop(); + BasicBlock* preHeaderBlock = preHeadersForThisLoop->block; if (BitVecTraits::GetSize(&m_visitedTraits) < preHeaderBlock->bbNum) { @@ -6304,8 +6304,20 @@ ArrayStack Compiler::optHoistLoopNest(unsigned lnum, LoopHoistConte if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, preHeaderBlock->bbNum)) { BitVecOps::AddElemD(&m_visitedTraits, m_visited, preHeaderBlock->bbNum); - preHeadersOfChildLoops.Push(preHeaderBlock); + JITDUMP(" PREHEADER: " FMT_BB "\n", preHeaderBlock->bbNum); + + + preHeadersOfChildLoops = + new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, preHeadersOfChildLoops); + if (preHeadersOfChildLoops->next == nullptr) + { + firstPreHeader = preHeadersOfChildLoops; + } + + //preHeadersOfChildLoops.Push(preHeaderBlock); } + + preHeadersForThisLoop = preHeadersForThisLoop->next; } } @@ -6321,12 +6333,12 @@ ArrayStack Compiler::optHoistLoopNest(unsigned lnum, LoopHoistConte } } - return optHoistThisLoop(lnum, hoistCtxt, preHeadersOfChildLoops); + return optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); } -ArrayStack Compiler::optHoistThisLoop(unsigned lnum, +BasicBlockList* Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, - ArrayStack existingPreHeaders) + BasicBlockList* existingPreHeaders) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; @@ -6335,7 +6347,7 @@ ArrayStack Compiler::optHoistThisLoop(unsigned lnum, if (pLoopDsc->lpFlags & LPFLG_REMOVED) { JITDUMP(" ... not hoisting " FMT_LP ": removed\n", lnum); - return ArrayStack(getAllocatorLoopHoist()); + return nullptr; } // Ensure the per-loop sets/tables are empty. @@ -6446,24 +6458,29 @@ ArrayStack Compiler::optHoistThisLoop(unsigned lnum, // ArrayStack defExec(getAllocatorLoopHoist()); - while (!existingPreHeaders.Empty()) + + /*while (!existingPreHeaders.Empty()) { BasicBlock* preHeaderBlock = existingPreHeaders.Pop(); - JITDUMP(" Considering hoisting in preheader block " FMT_BB " added for the nested loop \n", preHeaderBlock->bbNum); + JITDUMP(" Considering hoisting in preheader block " FMT_BB " added for the nested loop \n", + preHeaderBlock->bbNum); defExec.Push(preHeaderBlock); - } + }*/ if (pLoopDsc->lpExitCnt == 1) { assert(pLoopDsc->lpExit != nullptr); - JITDUMP(" Considering hoisting in blocks that dominate exit block " FMT_BB "\n", pLoopDsc->lpExit->bbNum); + JITDUMP(" Considering hoisting in blocks that dominate exit block " FMT_BB ":\n", pLoopDsc->lpExit->bbNum); BasicBlock* cur = pLoopDsc->lpExit; // Push dominators, until we reach "entry" or exit the loop. while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry) { + JITDUMP(" -- " FMT_BB "\n", cur->bbNum); + defExec.Push(cur); cur = cur->bbIDom; } + // If we didn't reach the entry block, give up and *just* push the entry block. if (cur != pLoopDsc->lpEntry) { @@ -6472,17 +6489,34 @@ ArrayStack Compiler::optHoistThisLoop(unsigned lnum, pLoopDsc->lpEntry->bbNum); defExec.Reset(); } - defExec.Push(pLoopDsc->lpEntry); } else // More than one exit { JITDUMP(" Considering hoisting in entry block " FMT_BB " because " FMT_LP " has more than one exit\n", pLoopDsc->lpEntry->bbNum, lnum); + // We'll assume that only the entry block is definitely executed. // We could in the future do better. - defExec.Push(pLoopDsc->lpEntry); } + JITDUMP(" Considering hoisting in preheader blocks added for the nested loop:\n"); + + while (existingPreHeaders != nullptr) + { + BasicBlock* preHeaderBlock = existingPreHeaders->block; + /*JITDUMP(" Considering hoisting in preheader block " FMT_BB " added for the nested loop \n", + preHeaderBlock->bbNum);*/ + JITDUMP(" -- " FMT_BB "\n", preHeaderBlock->bbNum); + defExec.Push(preHeaderBlock); + existingPreHeaders = existingPreHeaders->next; + } + JITDUMP(" Considering hoisting in entry block:\n"); + JITDUMP(" -- " FMT_BB "\n", pLoopDsc->lpEntry->bbNum); + defExec.Push(pLoopDsc->lpEntry); + + JITDUMP("\n"); + + return optHoistLoopBlocks(lnum, &defExec, hoistCtxt); } @@ -6724,7 +6758,7 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // the loop, in the execution order, starting with the loop entry // block on top of the stack. // -ArrayStack Compiler::optHoistLoopBlocks(unsigned loopNum, +BasicBlockList* Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext) { @@ -6756,12 +6790,14 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo } }; - ArrayStack m_valueStack; - bool m_beforeSideEffect; - unsigned m_loopNum; - LoopHoistContext* m_hoistContext; - BasicBlock* m_currentBlock; - ArrayStack m_preHeadersList; + ArrayStack m_valueStack; + bool m_beforeSideEffect; + unsigned m_loopNum; + LoopHoistContext* m_hoistContext; + BasicBlock* m_currentBlock; + BasicBlockList* m_preHeadersList; + BasicBlockList* m_firstPreHeader; + //ArrayStack m_preHeadersList; bool IsNodeHoistable(GenTree* node) { @@ -6802,6 +6838,49 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo return vnIsInvariant; } + //------------------------------------------------------------------------ + // AppendPreheader: Appends the preheaders in the BasicBlockList. The list + // populated is FIFO meaning the BasicBlock added first is the first block + // in the list. During hoisting, we would extract them all and populate + // them in another accumulator BasicBlockList. There, we would add the blocks + // in LIFO order. The reason being `optHoistLoopBlocks` expects them to be + // present in execution order. + // + // Arguments: + // preHeader -- preHeader to add + // + void AppendPreheader(BasicBlock* preHeader) + { + if (m_preHeadersList == nullptr) + { + m_preHeadersList = new (m_compiler, CMK_LoopHoist) BasicBlockList(preHeader, nullptr); + m_firstPreHeader = m_preHeadersList; + } + else + { + m_preHeadersList->next = new (m_compiler, CMK_LoopHoist) BasicBlockList(preHeader, nullptr); + m_preHeadersList = m_preHeadersList->next; + } + } + + bool IsHoistingOverExcepSibling(GenTree* node, bool parentIsCommaTree, bool siblingHasExcep) + { + JITDUMP(" [%06u]", dspTreeID(node)); + + if ((node->gtFlags & GTF_ALL_EFFECT) != 0) + { + // If the hoistable node has any side effects, make sure + // we don't hoist it past a sibling that throws any exception. + if (parentIsCommaTree && siblingHasExcep) + { + JITDUMP(" not hoistable: cannot move past node that throws exception.\n"); + return false; + } + } + JITDUMP(" hoistable\n"); + return true; + } + //------------------------------------------------------------------------ // IsTreeLoopMemoryInvariant: determine if the value number of tree // is dependent on the tree being executed within the current loop @@ -6866,14 +6945,15 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo , m_currentBlock(nullptr) /*, m_visitedTraits(compiler->fgBBNumMax * 2, compiler) , m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)) */ - , m_preHeadersList(compiler->getAllocatorLoopHoist()) + , m_preHeadersList(nullptr) + //, m_preHeadersList(compiler->getAllocatorLoopHoist()) { } - ArrayStack GetNewPreHeaders() + BasicBlockList* GetNewPreHeaders() { - return m_preHeadersList; + return m_firstPreHeader; } void HoistBlock(BasicBlock* block) @@ -6890,7 +6970,9 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loopNum, m_hoistContext); if ((m_compiler->optLoopTable[m_loopNum].lpFlags & LPFLG_HAS_PREHEAD) != 0) { - m_preHeadersList.Push(m_compiler->optLoopTable[m_loopNum].lpHead); + AppendPreheader(m_compiler->optLoopTable[m_loopNum].lpHead); + /*m_preHeadersList = new (m_compiler, CMK_LoopHoist) + BasicBlockList(m_compiler->optLoopTable[m_loopNum].lpHead, m_preHeadersList);*/ } } else @@ -7235,26 +7317,20 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo if (value.m_hoistable) { assert(value.Node() != tree); - JITDUMP(" [%06u]", dspTreeID(value.Node())); - - if ((value.Node()->gtFlags & GTF_ALL_EFFECT) != 0) + + if (IsHoistingOverExcepSibling(value.Node(), isCommaTree, hasExcep)) { - // If the hoistable node has any side effects, make sure - // we don't hoist it past a sibling that throws any exception. - if (isCommaTree && hasExcep) - { - JITDUMP(" not hoistable: cannot move past node that throws exception.\n"); - continue; - } + m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); } - JITDUMP(" hoistable\n"); // Don't hoist this tree again. value.m_hoistable = false; value.m_invariant = false; - m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); - m_preHeadersList.Push(m_compiler->optLoopTable[m_loopNum].lpHead); + AppendPreheader(m_compiler->optLoopTable[m_loopNum].lpHead); + /*m_preHeadersList = new (m_compiler, CMK_LoopHoist) + BasicBlockList(m_compiler->optLoopTable[m_loopNum].lpHead, m_preHeadersList);*/ + } else if (value.Node() != tree) { @@ -7293,7 +7369,6 @@ ArrayStack Compiler::optHoistLoopBlocks(unsigned lo return fgWalkResult::WALK_CONTINUE; } - }; LoopDsc* loopDsc = &optLoopTable[loopNum]; From 65cea974a23337db1bca9e9e8b784e3cc1846e80 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 29 Apr 2022 01:33:48 -0700 Subject: [PATCH 10/25] organize preheaders --- src/coreclr/jit/compiler.h | 8 +- src/coreclr/jit/optimizer.cpp | 256 +++++++++++++++++++++++----------- 2 files changed, 183 insertions(+), 81 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 44f06a2644e44..139b517bc0365 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5971,21 +5971,21 @@ class Compiler // value numbers in "m_hoistedInParentLoops". This set is not modified by the call. // // Returns the list of basic blocks that were added as preheaders as part of hoisting the current loop. - BasicBlockList* optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); + void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) // Assumes that expressions have been hoisted in containing loops if their value numbers are in // "m_hoistedInParentLoops". // // Returns the new preheaders created. - BasicBlockList* optHoistThisLoop(unsigned lnum, + void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) // outside of that loop. Exempt expressions whose value number is in "m_hoistedInParentLoops"; add VN's of hoisted // expressions to "hoistInLoop". - BasicBlockList* optHoistLoopBlocks(unsigned loopNum, + void optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext); @@ -6369,6 +6369,8 @@ class Compiler // A loop contains itself. bool optLoopContains(unsigned l1, unsigned l2) const; + BasicBlock* optLoopEntry(BasicBlock* preHeader); + // Updates the loop table by changing loop "loopInd", whose head is required // to be "from", to be "to". Also performs this transformation for any // loop nested in "loopInd" that shares the same head as "loopInd". diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 07b3265030abc..bf8ddf3347e14 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3151,6 +3151,22 @@ bool Compiler::optLoopContains(unsigned l1, unsigned l2) const } } +//----------------------------------------------------------------------------- +// optLoopEntry: For a given preheader of a loop, returns the lpEntry. +// +// Arguments: +// preHeader -- preheader of a loop +// +// Returns: +// Corresponding loop entry block. +// +BasicBlock* Compiler::optLoopEntry(BasicBlock* preHeader) +{ + assert((preHeader->bbFlags & BBF_LOOP_PREHEADER) != 0); + + return (preHeader->bbJumpDest == nullptr) ? preHeader->bbNext : preHeader->bbJumpDest; +} + //----------------------------------------------------------------------------- // optUpdateLoopHead: Replace the `head` block of a loop in the loop table. // Considers all child loops that might share the same head (recursively). @@ -6250,7 +6266,7 @@ void Compiler::optHoistLoopCode() #endif // DEBUG } -BasicBlockList* Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) +void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) { // Do this loop, then recursively do all nested loops. JITDUMP("\n%s " FMT_LP "\n", optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP ? "Loop Nest" : "Nested Loop", @@ -6288,36 +6304,67 @@ BasicBlockList* Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hois for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { - BasicBlockList* preHeadersForThisLoop = optHoistLoopNest(child, hoistCtxt); - while (preHeadersForThisLoop != nullptr) - { - BasicBlock* preHeaderBlock = preHeadersForThisLoop->block; - - if (BitVecTraits::GetSize(&m_visitedTraits) < preHeaderBlock->bbNum) - { - // We don't know how many blocks will be added and hence we would grow - // the bitset here. - m_visitedTraits = BitVecTraits(BitVecTraits::GetSize(&m_visitedTraits) * 2, this); - m_visited = BitVecOps::MakeCopy(&m_visitedTraits, m_visited); - } - + /*BasicBlockList* preHeadersForThisLoop = */optHoistLoopNest(child, hoistCtxt); + //while (preHeadersForThisLoop != nullptr) + //{ + // BasicBlock* preHeaderBlock = preHeadersForThisLoop->block; + + // if (BitVecTraits::GetSize(&m_visitedTraits) < preHeaderBlock->bbNum) + // { + // // We don't know how many blocks will be added and hence we would grow + // // the bitset here. + // m_visitedTraits = BitVecTraits(BitVecTraits::GetSize(&m_visitedTraits) * 2, this); + // m_visited = BitVecOps::MakeCopy(&m_visitedTraits, m_visited); + // } + + // if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, preHeaderBlock->bbNum)) + // { + // BitVecOps::AddElemD(&m_visitedTraits, m_visited, preHeaderBlock->bbNum); + // JITDUMP(" PREHEADER: " FMT_BB "\n", preHeaderBlock->bbNum); + + // + // preHeadersOfChildLoops = + // new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, preHeadersOfChildLoops); + // /*if (preHeadersOfChildLoops->next == nullptr) + // { + // firstPreHeader = preHeadersOfChildLoops; + // }*/ + + // //preHeadersOfChildLoops.Push(preHeaderBlock); + // } + + // preHeadersForThisLoop = preHeadersForThisLoop->next; + //} + if (optLoopTable[child].lpFlags & LPFLG_HAS_PREHEAD) + { + BasicBlock* preHeaderBlock = optLoopTable[child].lpHead; if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, preHeaderBlock->bbNum)) { BitVecOps::AddElemD(&m_visitedTraits, m_visited, preHeaderBlock->bbNum); JITDUMP(" PREHEADER: " FMT_BB "\n", preHeaderBlock->bbNum); - - preHeadersOfChildLoops = - new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, preHeadersOfChildLoops); - if (preHeadersOfChildLoops->next == nullptr) + // Here, we are arranging the blocks in reverse execution order, so when they are pushed + // on the stack that hoist these blocks further sees them in execution order. + if (firstPreHeader == nullptr) { - firstPreHeader = preHeadersOfChildLoops; + preHeadersOfChildLoops = new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr); + firstPreHeader = preHeadersOfChildLoops; + } + else + { + preHeadersOfChildLoops->next = new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr); + preHeadersOfChildLoops = preHeadersOfChildLoops->next; } + + /*preHeadersOfChildLoops = + new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, preHeadersOfChildLoops);*/ + /*if (preHeadersOfChildLoops->next == nullptr) + { + firstPreHeader = preHeadersOfChildLoops; + }*/ //preHeadersOfChildLoops.Push(preHeaderBlock); } - - preHeadersForThisLoop = preHeadersForThisLoop->next; } } @@ -6333,10 +6380,10 @@ BasicBlockList* Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hois } } - return optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); + optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); } -BasicBlockList* Compiler::optHoistThisLoop(unsigned lnum, +void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders) { @@ -6347,7 +6394,7 @@ BasicBlockList* Compiler::optHoistThisLoop(unsigned lnum, if (pLoopDsc->lpFlags & LPFLG_REMOVED) { JITDUMP(" ... not hoisting " FMT_LP ": removed\n", lnum); - return nullptr; + return; } // Ensure the per-loop sets/tables are empty. @@ -6467,18 +6514,50 @@ BasicBlockList* Compiler::optHoistThisLoop(unsigned lnum, defExec.Push(preHeaderBlock); }*/ + bool pushAllPreheaders = false; + if (pLoopDsc->lpExitCnt == 1) { assert(pLoopDsc->lpExit != nullptr); - JITDUMP(" Considering hoisting in blocks that dominate exit block " FMT_BB ":\n", pLoopDsc->lpExit->bbNum); - BasicBlock* cur = pLoopDsc->lpExit; + JITDUMP(" Considering hoisting in blocks that either dominate exit block " FMT_BB + " or preheaders of nested loops, if any:\n", + pLoopDsc->lpExit->bbNum); + // Push dominators, until we reach "entry" or exit the loop. + // Also push the preheaders that were added for the nested loops, if any. + BasicBlock* cur = pLoopDsc->lpExit; + BasicBlockList* preHeadersList = existingPreHeaders; + while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry) { - JITDUMP(" -- " FMT_BB "\n", cur->bbNum); - + JITDUMP(" -- " FMT_BB " (dominate exit block)\n", cur->bbNum); defExec.Push(cur); cur = cur->bbIDom; + + if (preHeadersList != nullptr) + { + BasicBlock* preHeaderBlock = preHeadersList->block; + BasicBlock* lpEntry = optLoopEntry(preHeaderBlock); + if (cur->bbNum < lpEntry->bbNum) + { + JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum, + lpEntry->bbNatLoopNum); + + defExec.Push(preHeaderBlock); + preHeadersList = preHeadersList->next; + } + } + } + + // Push the remaining preheaders, if any. This usually will happen if entry + // and exit blocks of lnum is same. + while (preHeadersList != nullptr) + { + BasicBlock* preHeaderBlock = preHeadersList->block; + JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum, + optLoopEntry(preHeaderBlock)->bbNatLoopNum); + defExec.Push(preHeaderBlock); + preHeadersList = preHeadersList->next; } // If we didn't reach the entry block, give up and *just* push the entry block. @@ -6488,36 +6567,56 @@ BasicBlockList* Compiler::optHoistThisLoop(unsigned lnum, "block " FMT_BB "\n", pLoopDsc->lpEntry->bbNum); defExec.Reset(); + pushAllPreheaders = true; } } else // More than one exit { - JITDUMP(" Considering hoisting in entry block " FMT_BB " because " FMT_LP " has more than one exit\n", - pLoopDsc->lpEntry->bbNum, lnum); - // We'll assume that only the entry block is definitely executed. // We could in the future do better. - } - JITDUMP(" Considering hoisting in preheader blocks added for the nested loop:\n"); + JITDUMP(" Considering hoisting in entry block " FMT_BB " because " FMT_LP " has more than one exit\n", + pLoopDsc->lpEntry->bbNum, lnum); + pushAllPreheaders = true; + } - while (existingPreHeaders != nullptr) + if (pushAllPreheaders) { - BasicBlock* preHeaderBlock = existingPreHeaders->block; - /*JITDUMP(" Considering hoisting in preheader block " FMT_BB " added for the nested loop \n", - preHeaderBlock->bbNum);*/ - JITDUMP(" -- " FMT_BB "\n", preHeaderBlock->bbNum); - defExec.Push(preHeaderBlock); - existingPreHeaders = existingPreHeaders->next; + // We will still push all the preheaders found. + BasicBlockList* preHeadersList = existingPreHeaders; + + while (preHeadersList != nullptr) + { + BasicBlock* preHeaderBlock = preHeadersList->block; + JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum, + optLoopEntry(preHeaderBlock)->bbNatLoopNum); + defExec.Push(preHeaderBlock); + preHeadersList = preHeadersList->next; + } } - JITDUMP(" Considering hoisting in entry block:\n"); - JITDUMP(" -- " FMT_BB "\n", pLoopDsc->lpEntry->bbNum); + + JITDUMP(" -- " FMT_BB " (entry block)\n", pLoopDsc->lpEntry->bbNum); defExec.Push(pLoopDsc->lpEntry); - JITDUMP("\n"); + //JITDUMP(" Considering hoisting in preheader blocks added for the nested loop:\n"); + + //while (existingPreHeaders != nullptr) + //{ + // BasicBlock* preHeaderBlock = existingPreHeaders->block; + // /*JITDUMP(" Considering hoisting in preheader block " FMT_BB " added for the nested loop \n", + // preHeaderBlock->bbNum);*/ + // JITDUMP(" -- " FMT_BB "\n", preHeaderBlock->bbNum); + // defExec.Push(preHeaderBlock); + // existingPreHeaders = existingPreHeaders->next; + //} + //JITDUMP(" Considering hoisting in entry block:\n"); + //JITDUMP(" -- " FMT_BB "\n", pLoopDsc->lpEntry->bbNum); + //defExec.Push(pLoopDsc->lpEntry); + + //JITDUMP("\n"); - return optHoistLoopBlocks(lnum, &defExec, hoistCtxt); + optHoistLoopBlocks(lnum, &defExec, hoistCtxt); } bool Compiler::optIsProfitableToHoistTree(GenTree* tree, unsigned lnum) @@ -6758,7 +6857,7 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // the loop, in the execution order, starting with the loop entry // block on top of the stack. // -BasicBlockList* Compiler::optHoistLoopBlocks(unsigned loopNum, +void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext) { @@ -6795,8 +6894,9 @@ BasicBlockList* Compiler::optHoistLoopBlocks(unsigned loopNum, unsigned m_loopNum; LoopHoistContext* m_hoistContext; BasicBlock* m_currentBlock; - BasicBlockList* m_preHeadersList; - BasicBlockList* m_firstPreHeader; + BasicBlock* m_loopPreHeader; + //BasicBlockList* m_preHeadersList; + //BasicBlockList* m_firstPreHeader; //ArrayStack m_preHeadersList; bool IsNodeHoistable(GenTree* node) @@ -6838,30 +6938,30 @@ BasicBlockList* Compiler::optHoistLoopBlocks(unsigned loopNum, return vnIsInvariant; } - //------------------------------------------------------------------------ - // AppendPreheader: Appends the preheaders in the BasicBlockList. The list - // populated is FIFO meaning the BasicBlock added first is the first block - // in the list. During hoisting, we would extract them all and populate - // them in another accumulator BasicBlockList. There, we would add the blocks - // in LIFO order. The reason being `optHoistLoopBlocks` expects them to be - // present in execution order. - // - // Arguments: - // preHeader -- preHeader to add - // - void AppendPreheader(BasicBlock* preHeader) - { - if (m_preHeadersList == nullptr) - { - m_preHeadersList = new (m_compiler, CMK_LoopHoist) BasicBlockList(preHeader, nullptr); - m_firstPreHeader = m_preHeadersList; - } - else - { - m_preHeadersList->next = new (m_compiler, CMK_LoopHoist) BasicBlockList(preHeader, nullptr); - m_preHeadersList = m_preHeadersList->next; - } - } + ////------------------------------------------------------------------------ + //// AppendPreheader: Appends the preheaders in the BasicBlockList. The list + //// populated is FIFO meaning the BasicBlock added first is the first block + //// in the list. During hoisting, we would extract them all and populate + //// them in another accumulator BasicBlockList. There, we would add the blocks + //// in LIFO order. The reason being `optHoistLoopBlocks` expects them to be + //// present in execution order. + //// + //// Arguments: + //// preHeader -- preHeader to add + //// + //void AppendPreheader(BasicBlock* preHeader) + //{ + // if (m_preHeadersList == nullptr) + // { + // m_preHeadersList = new (m_compiler, CMK_LoopHoist) BasicBlockList(preHeader, nullptr); + // //m_firstPreHeader = m_preHeadersList; + // } + // else + // { + // m_preHeadersList->next = new (m_compiler, CMK_LoopHoist) BasicBlockList(preHeader, nullptr); + // m_preHeadersList = m_preHeadersList->next; + // } + //} bool IsHoistingOverExcepSibling(GenTree* node, bool parentIsCommaTree, bool siblingHasExcep) { @@ -6945,16 +7045,16 @@ BasicBlockList* Compiler::optHoistLoopBlocks(unsigned loopNum, , m_currentBlock(nullptr) /*, m_visitedTraits(compiler->fgBBNumMax * 2, compiler) , m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)) */ - , m_preHeadersList(nullptr) + //, m_preHeadersList(nullptr) //, m_preHeadersList(compiler->getAllocatorLoopHoist()) { } - BasicBlockList* GetNewPreHeaders() + /*BasicBlockList* GetNewPreHeaders() { - return m_firstPreHeader; - } + return m_preHeadersList; + }*/ void HoistBlock(BasicBlock* block) { @@ -6970,7 +7070,7 @@ BasicBlockList* Compiler::optHoistLoopBlocks(unsigned loopNum, m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loopNum, m_hoistContext); if ((m_compiler->optLoopTable[m_loopNum].lpFlags & LPFLG_HAS_PREHEAD) != 0) { - AppendPreheader(m_compiler->optLoopTable[m_loopNum].lpHead); + //AppendPreheader(m_compiler->optLoopTable[m_loopNum].lpHead); /*m_preHeadersList = new (m_compiler, CMK_LoopHoist) BasicBlockList(m_compiler->optLoopTable[m_loopNum].lpHead, m_preHeadersList);*/ } @@ -7327,7 +7427,7 @@ BasicBlockList* Compiler::optHoistLoopBlocks(unsigned loopNum, value.m_hoistable = false; value.m_invariant = false; - AppendPreheader(m_compiler->optLoopTable[m_loopNum].lpHead); + //AppendPreheader(m_compiler->optLoopTable[m_loopNum].lpHead); /*m_preHeadersList = new (m_compiler, CMK_LoopHoist) BasicBlockList(m_compiler->optLoopTable[m_loopNum].lpHead, m_preHeadersList);*/ @@ -7393,7 +7493,7 @@ BasicBlockList* Compiler::optHoistLoopBlocks(unsigned loopNum, visitor.HoistBlock(block); } - return visitor.GetNewPreHeaders(); + //return visitor.GetNewPreHeaders(); } void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnum, LoopHoistContext* hoistCtxt) From 5e4c323a3a5b683678d791ba6151422ba5368849 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 29 Apr 2022 19:22:23 -0700 Subject: [PATCH 11/25] update hoistedInCurLoop and hoistedInSiblingLoop --- src/coreclr/jit/compiler.h | 64 +++++++++++++++++---- src/coreclr/jit/optimizer.cpp | 101 +++++++++++++++++++++------------- 2 files changed, 117 insertions(+), 48 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 139b517bc0365..5dc65be4687d5 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5935,8 +5935,10 @@ class Compiler VNSet* m_pHoistedInCurLoop; public: - // Value numbers of expressions that have been hoisted in parent loops in the loop nest. - VNSet m_hoistedInParentLoops; + // Value numbers of expressions that have been hoisted in sibling loops in the loop nest + // at the same level. + VNSet* m_hoistedInSiblingLoop; + ArrayStack m_hoistedVNInSiblingLoop; // Value numbers of expressions that have been hoisted in the current (or most recent) loop in the nest. // Previous decisions on loop-invariance of value numbers in the current loop. @@ -5951,6 +5953,15 @@ class Compiler return m_pHoistedInCurLoop; } + /*VNSet* GetHoistedInSiblingLoop(Compiler* comp) + { + if (m_hoistedInSiblingLoop == nullptr) + { + m_hoistedInSiblingLoop = new (comp->getAllocatorLoopHoist()) VNSet(comp->getAllocatorLoopHoist()); + } + return m_hoistedInSiblingLoop; + }*/ + VNSet* ExtractHoistedInCurLoop() { VNSet* res = m_pHoistedInCurLoop; @@ -5958,24 +5969,57 @@ class Compiler return res; } + void ResetHoistedInSiblingLoop() + { + m_hoistedInSiblingLoop = nullptr; + } + + VNSet* GetVnSetHoistedForSiblingLoop() + { + if (m_hoistedVNInSiblingLoop.Empty()) + { + return nullptr; + } + return m_hoistedVNInSiblingLoop.Top(); + } + + void PushVnSetForSiblingLoop(Compiler* comp) + { + VNSet* vnSet = new (comp->getAllocatorLoopHoist()) VNSet(comp->getAllocatorLoopHoist()); + m_hoistedVNInSiblingLoop.Push(vnSet); + } + + VNSet* PopVnSetForSiblingLoop() + { + return m_hoistedVNInSiblingLoop.Pop(); + } + LoopHoistContext(Compiler* comp) : m_pHoistedInCurLoop(nullptr) - , m_hoistedInParentLoops(comp->getAllocatorLoopHoist()) + , m_hoistedInSiblingLoop(nullptr) + , m_hoistedVNInSiblingLoop(comp->getAllocatorLoopHoist()) , m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist()) { } }; - // Do hoisting for loop "lnum" (an index into the optLoopTable), and all loops nested within it. - // Tracks the expressions that have been hoisted by containing loops by temporarily recording their - // value numbers in "m_hoistedInParentLoops". This set is not modified by the call. + // Do hoisting of all loops nested within loop "lnum" (an index into the optLoopTable), followed + // by the loop "lnum" itself. + // Tracks the expressions that have been hoisted by the sibling loops that are present at the same + // level by temporarily recording their value numbers in "m_hoistedInSiblingLoop". The expressions + // that are already hoisted won't be hoisted again and CSE will remove them anyways. + // + // Both "m_pHoistedInCurLoop" and "m_hoistedInSiblingLoop" helps a lot in eliminating duplicate + // expressions getting hoisted and reducing the count of total expressions hoisted out of loop. + // When calculating the profitability, we compare this with number of registers and hence, lower + // the number of expressions getting hoisted, better chances that they will get enregistered and CSE + // considering them. // - // Returns the list of basic blocks that were added as preheaders as part of hoisting the current loop. void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) - // Assumes that expressions have been hoisted in containing loops if their value numbers are in - // "m_hoistedInParentLoops". + // Assumes that expressions have been hoisted in sibling loops if their value numbers are in + // "m_hoistedInSiblingLoop". // // Returns the new preheaders created. void optHoistThisLoop(unsigned lnum, @@ -5983,7 +6027,7 @@ class Compiler BasicBlockList* existingPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) - // outside of that loop. Exempt expressions whose value number is in "m_hoistedInParentLoops"; add VN's of hoisted + // outside of that loop. Exempt expressions whose value number is in "m_hoistedInSiblingLoop"; add VN's of hoisted // expressions to "hoistInLoop". void optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index bf8ddf3347e14..259529102e630 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6008,6 +6008,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign { printf("\nHoisting a copy of "); printTreeID(origExpr); + printf(" (" FMT_VN "," FMT_VN ") ", origExpr->gtVNPair.GetLiberal(), origExpr->gtVNPair.GetConservative()); printf(" from " FMT_BB " into PreHeader " FMT_BB " for loop " FMT_LP " <" FMT_BB ".." FMT_BB ">:\n", exprBb->bbNum, optLoopTable[lnum].lpHead->bbNum, lnum, optLoopTable[lnum].lpTop->bbNum, optLoopTable[lnum].lpBottom->bbNum); @@ -6284,27 +6285,45 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) { - // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops. - // TODO-Cleanup: we should have a set abstraction for loops. - if (hoistedInCurLoop != nullptr) - { - for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) - { -#ifdef DEBUG - bool b; - assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b)); -#endif - hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true); - } - } +// // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops. +// // TODO-Cleanup: we should have a set abstraction for loops. +// if (hoistedInCurLoop != nullptr) +// { +// for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) +// { +//#ifdef DEBUG +// bool b; +// assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b)); +//#endif +// hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true); +// } +// } BitVecTraits m_visitedTraits(fgBBNumMax * 2, this); BitVec m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)); + hoistCtxt->PushVnSetForSiblingLoop(this); for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { - /*BasicBlockList* preHeadersForThisLoop = */optHoistLoopNest(child, hoistCtxt); + optHoistLoopNest(child, hoistCtxt); + VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); + + // Add the ones hoisted in "lnum" to "hoistedInSibling" for any sibling loops. + // TODO-Cleanup: we should have a set abstraction for loops. + if (hoistedInCurLoop != nullptr) + { + for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) + { +#ifdef DEBUG + bool b; + assert(!hoistCtxt->GetVnSetHoistedForSiblingLoop()->Lookup(keys.Get(), &b)); +#endif + hoistCtxt->GetVnSetHoistedForSiblingLoop()->Set(keys.Get(), true); + } + } + + /*BasicBlockList* preHeadersForThisLoop = */ //while (preHeadersForThisLoop != nullptr) //{ // BasicBlock* preHeaderBlock = preHeadersForThisLoop->block; @@ -6367,19 +6386,22 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) } } } + hoistCtxt->PopVnSetForSiblingLoop(); - // Now remove them. - // TODO-Cleanup: we should have a set abstraction for loops. - if (hoistedInCurLoop != nullptr) - { - for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) - { - // Note that we asserted when we added these that they hadn't been members, so removing is appropriate. - hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get()); - } - } + //// Now remove them. + //// TODO-Cleanup: we should have a set abstraction for loops. + //if (hoistedInCurLoop != nullptr) + //{ + // for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) + // { + // // Note that we asserted when we added these that they hadn't been members, so removing is appropriate. + // hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get()); + // } + //} } + // Reset the VNs tracked for sibling loop + hoistCtxt->ResetHoistedInSiblingLoop(); optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); } @@ -7417,7 +7439,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, if (value.m_hoistable) { assert(value.Node() != tree); - + if (IsHoistingOverExcepSibling(value.Node(), isCommaTree, hasExcep)) { m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); @@ -7507,22 +7529,25 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } - if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal())) + VNSet* vnSetHoistedForSibling = hoistCtxt->GetVnSetHoistedForSiblingLoop(); + if ((vnSetHoistedForSibling != nullptr) && (vnSetHoistedForSibling->Lookup(tree->gtVNPair.GetLiberal()))) { - JITDUMP(" ... already hoisted same VN in parent\n"); - // already hoisted in a parent loop, so don't hoist this expression. + printf("Sibling loop -- " FMT_LP " : %s\n", lnum, this->info.compMethodName); + JITDUMP(" [%06u] ... already hoisted " FMT_VN " VN in sibling loop\n", dspTreeID(tree), + tree->gtVNPair.GetLiberal()); + + // already hoisted in a sibling loop, so don't hoist this expression. return; } - // TODO: Below code was applicable because we were first hoisting the outer loop and then the inner loop - // But if we start from inner and go outer, then we should continue hoisting things from this loop to the parent - // loop. - // if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) - //{ - // JITDUMP(" ... already hoisted same VN in current\n"); - // // already hoisted this expression in the current loop, so don't hoist this expression. - // return; - //} + if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) + { + //printf("This loop -- " FMT_LP " : %s\n", lnum, this->info.compMethodName); + JITDUMP(" [%06u] ... already hoisted " FMT_VN " in " FMT_LP "\n ", dspTreeID(tree), + tree->gtVNPair.GetLiberal(), lnum); + // already hoisted this expression in the current loop, so don't hoist this expression. + return; + } // Create a loop pre-header in which to put the hoisted code. fgCreateLoopPreHeader(lnum); @@ -7558,7 +7583,7 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu } // Record the hoisted expression in hoistCtxt - // hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); + hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true); } bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* loopVnInvariantCache) From 3afcfb55e23d1919e15744dea3f06ec63615f6c1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 29 Apr 2022 19:58:23 -0700 Subject: [PATCH 12/25] Reverse the loop order --- src/coreclr/jit/optimizer.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 259529102e630..1069fbb5811ca 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6301,11 +6301,20 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) BitVecTraits m_visitedTraits(fgBBNumMax * 2, this); BitVec m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)); - hoistCtxt->PushVnSetForSiblingLoop(this); + // Since loops in optLoopTable are arranged in reverse execution order, arrange + // them back in execution order before processing. + ArrayStack childLoops(getAllocatorLoopHoist()); for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { + childLoops.Push(child); + } + + hoistCtxt->PushVnSetForSiblingLoop(this); + while (!childLoops.Empty()) + { + unsigned child = childLoops.Pop(); optHoistLoopNest(child, hoistCtxt); VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); From a2d9302c728a60add9f6b2bc0430b0dd10c0e7ab Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 29 Apr 2022 20:22:22 -0700 Subject: [PATCH 13/25] cleanup and jit-format --- src/coreclr/jit/compiler.h | 40 +++---- src/coreclr/jit/optimizer.cpp | 198 +++++----------------------------- 2 files changed, 36 insertions(+), 202 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5dc65be4687d5..907bfbb7460e1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5937,13 +5937,13 @@ class Compiler public: // Value numbers of expressions that have been hoisted in sibling loops in the loop nest // at the same level. - VNSet* m_hoistedInSiblingLoop; ArrayStack m_hoistedVNInSiblingLoop; // Value numbers of expressions that have been hoisted in the current (or most recent) loop in the nest. // Previous decisions on loop-invariance of value numbers in the current loop. VNSet m_curLoopVnInvariantCache; + // Get the VN cache for current loop VNSet* GetHoistedInCurLoop(Compiler* comp) { if (m_pHoistedInCurLoop == nullptr) @@ -5953,15 +5953,7 @@ class Compiler return m_pHoistedInCurLoop; } - /*VNSet* GetHoistedInSiblingLoop(Compiler* comp) - { - if (m_hoistedInSiblingLoop == nullptr) - { - m_hoistedInSiblingLoop = new (comp->getAllocatorLoopHoist()) VNSet(comp->getAllocatorLoopHoist()); - } - return m_hoistedInSiblingLoop; - }*/ - + // Return the so far collected VNs in cache for current loop and reset it. VNSet* ExtractHoistedInCurLoop() { VNSet* res = m_pHoistedInCurLoop; @@ -5969,11 +5961,7 @@ class Compiler return res; } - void ResetHoistedInSiblingLoop() - { - m_hoistedInSiblingLoop = nullptr; - } - + // Get the VN cache for recent sibling loop VNSet* GetVnSetHoistedForSiblingLoop() { if (m_hoistedVNInSiblingLoop.Empty()) @@ -5996,7 +5984,6 @@ class Compiler LoopHoistContext(Compiler* comp) : m_pHoistedInCurLoop(nullptr) - , m_hoistedInSiblingLoop(nullptr) , m_hoistedVNInSiblingLoop(comp->getAllocatorLoopHoist()) , m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist()) { @@ -6004,12 +5991,12 @@ class Compiler }; // Do hoisting of all loops nested within loop "lnum" (an index into the optLoopTable), followed - // by the loop "lnum" itself. + // by the loop "lnum" itself. // Tracks the expressions that have been hoisted by the sibling loops that are present at the same - // level by temporarily recording their value numbers in "m_hoistedInSiblingLoop". The expressions + // level by temporarily recording their value numbers in "m_hoistedVNInSiblingLoop". The expressions // that are already hoisted won't be hoisted again and CSE will remove them anyways. // - // Both "m_pHoistedInCurLoop" and "m_hoistedInSiblingLoop" helps a lot in eliminating duplicate + // Both "m_pHoistedInCurLoop" and "m_hoistedVNInSiblingLoop" helps a lot in eliminating duplicate // expressions getting hoisted and reducing the count of total expressions hoisted out of loop. // When calculating the profitability, we compare this with number of registers and hence, lower // the number of expressions getting hoisted, better chances that they will get enregistered and CSE @@ -6019,19 +6006,15 @@ class Compiler // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) // Assumes that expressions have been hoisted in sibling loops if their value numbers are in - // "m_hoistedInSiblingLoop". + // "m_hoistedVNInSiblingLoop". // // Returns the new preheaders created. - void optHoistThisLoop(unsigned lnum, - LoopHoistContext* hoistCtxt, - BasicBlockList* existingPreHeaders); + void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) - // outside of that loop. Exempt expressions whose value number is in "m_hoistedInSiblingLoop"; add VN's of hoisted - // expressions to "hoistInLoop". - void optHoistLoopBlocks(unsigned loopNum, - ArrayStack* blocks, - LoopHoistContext* hoistContext); + // outside of that loop. Exempt expressions whose value number is in "m_hoistedVNInSiblingLoop"; + // add VN's of hoisted expressions to "hoistInLoop". + void optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext); // Return true if the tree looks profitable to hoist out of loop 'lnum'. bool optIsProfitableToHoistTree(GenTree* tree, unsigned lnum); @@ -6413,6 +6396,7 @@ class Compiler // A loop contains itself. bool optLoopContains(unsigned l1, unsigned l2) const; + // Returns the lpEntry for given preheader block of a loop BasicBlock* optLoopEntry(BasicBlock* preHeader); // Updates the loop table by changing loop "loopInd", whose head is required diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 1069fbb5811ca..d48f3da3e2b85 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6008,7 +6008,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign { printf("\nHoisting a copy of "); printTreeID(origExpr); - printf(" (" FMT_VN "," FMT_VN ") ", origExpr->gtVNPair.GetLiberal(), origExpr->gtVNPair.GetConservative()); + printf(" " FMT_VN, origExpr->gtVNPair.GetLiberal()); printf(" from " FMT_BB " into PreHeader " FMT_BB " for loop " FMT_LP " <" FMT_BB ".." FMT_BB ">:\n", exprBb->bbNum, optLoopTable[lnum].lpHead->bbNum, lnum, optLoopTable[lnum].lpTop->bbNum, optLoopTable[lnum].lpBottom->bbNum); @@ -6279,26 +6279,11 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) m_loopsConsidered++; #endif // LOOP_HOIST_STATS - VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); BasicBlockList* preHeadersOfChildLoops = nullptr; - BasicBlockList* firstPreHeader = nullptr; + BasicBlockList* firstPreHeader = nullptr; if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) { -// // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops. -// // TODO-Cleanup: we should have a set abstraction for loops. -// if (hoistedInCurLoop != nullptr) -// { -// for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) -// { -//#ifdef DEBUG -// bool b; -// assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b)); -//#endif -// hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true); -// } -// } - BitVecTraits m_visitedTraits(fgBBNumMax * 2, this); BitVec m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)); @@ -6311,6 +6296,7 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) childLoops.Push(child); } + // Push the fresh cache to collect the VNs for all the child loops hoistCtxt->PushVnSetForSiblingLoop(this); while (!childLoops.Empty()) { @@ -6332,39 +6318,10 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) } } - /*BasicBlockList* preHeadersForThisLoop = */ - //while (preHeadersForThisLoop != nullptr) - //{ - // BasicBlock* preHeaderBlock = preHeadersForThisLoop->block; - - // if (BitVecTraits::GetSize(&m_visitedTraits) < preHeaderBlock->bbNum) - // { - // // We don't know how many blocks will be added and hence we would grow - // // the bitset here. - // m_visitedTraits = BitVecTraits(BitVecTraits::GetSize(&m_visitedTraits) * 2, this); - // m_visited = BitVecOps::MakeCopy(&m_visitedTraits, m_visited); - // } - - // if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, preHeaderBlock->bbNum)) - // { - // BitVecOps::AddElemD(&m_visitedTraits, m_visited, preHeaderBlock->bbNum); - // JITDUMP(" PREHEADER: " FMT_BB "\n", preHeaderBlock->bbNum); - - // - // preHeadersOfChildLoops = - // new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, preHeadersOfChildLoops); - // /*if (preHeadersOfChildLoops->next == nullptr) - // { - // firstPreHeader = preHeadersOfChildLoops; - // }*/ - - // //preHeadersOfChildLoops.Push(preHeaderBlock); - // } - - // preHeadersForThisLoop = preHeadersForThisLoop->next; - //} if (optLoopTable[child].lpFlags & LPFLG_HAS_PREHEAD) { + // If any preheaders were found, add them to the tracking list + BasicBlock* preHeaderBlock = optLoopTable[child].lpHead; if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, preHeaderBlock->bbNum)) { @@ -6380,43 +6337,20 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) } else { - preHeadersOfChildLoops->next = new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr); + preHeadersOfChildLoops->next = + new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr); preHeadersOfChildLoops = preHeadersOfChildLoops->next; } - - /*preHeadersOfChildLoops = - new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, preHeadersOfChildLoops);*/ - /*if (preHeadersOfChildLoops->next == nullptr) - { - firstPreHeader = preHeadersOfChildLoops; - }*/ - - //preHeadersOfChildLoops.Push(preHeaderBlock); } } } hoistCtxt->PopVnSetForSiblingLoop(); - - //// Now remove them. - //// TODO-Cleanup: we should have a set abstraction for loops. - //if (hoistedInCurLoop != nullptr) - //{ - // for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) - // { - // // Note that we asserted when we added these that they hadn't been members, so removing is appropriate. - // hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get()); - // } - //} } - // Reset the VNs tracked for sibling loop - hoistCtxt->ResetHoistedInSiblingLoop(); optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); } -void Compiler::optHoistThisLoop(unsigned lnum, - LoopHoistContext* hoistCtxt, - BasicBlockList* existingPreHeaders) +void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; @@ -6528,23 +6462,10 @@ void Compiler::optHoistThisLoop(unsigned lnum, // or side-effect dependent things. // // We really should consider hoisting from conditionally executed blocks, if they are frequently executed - // and it is safe to evaluate the tree early. - // - // In particular if we have a loop nest, when scanning the outer loop we should consider hoisting from blocks - // in enclosed loops. However, this is likely to scale poorly, and we really should instead start - // hoisting inner to outer. + // and it is safe to evaluate the tree early // ArrayStack defExec(getAllocatorLoopHoist()); - - /*while (!existingPreHeaders.Empty()) - { - BasicBlock* preHeaderBlock = existingPreHeaders.Pop(); - JITDUMP(" Considering hoisting in preheader block " FMT_BB " added for the nested loop \n", - preHeaderBlock->bbNum); - defExec.Push(preHeaderBlock); - }*/ - bool pushAllPreheaders = false; if (pLoopDsc->lpExitCnt == 1) @@ -6555,8 +6476,10 @@ void Compiler::optHoistThisLoop(unsigned lnum, pLoopDsc->lpExit->bbNum); // Push dominators, until we reach "entry" or exit the loop. - // Also push the preheaders that were added for the nested loops, if any. - BasicBlock* cur = pLoopDsc->lpExit; + // Also push the preheaders that were added for the nested loops, + // if any, along the way such that the final block list is arranged + // in execution order + BasicBlock* cur = pLoopDsc->lpExit; BasicBlockList* preHeadersList = existingPreHeaders; while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry) @@ -6626,27 +6549,9 @@ void Compiler::optHoistThisLoop(unsigned lnum, } } - JITDUMP(" -- " FMT_BB " (entry block)\n", pLoopDsc->lpEntry->bbNum); + JITDUMP(" -- " FMT_BB " (entry block)\n", pLoopDsc->lpEntry->bbNum); defExec.Push(pLoopDsc->lpEntry); - //JITDUMP(" Considering hoisting in preheader blocks added for the nested loop:\n"); - - //while (existingPreHeaders != nullptr) - //{ - // BasicBlock* preHeaderBlock = existingPreHeaders->block; - // /*JITDUMP(" Considering hoisting in preheader block " FMT_BB " added for the nested loop \n", - // preHeaderBlock->bbNum);*/ - // JITDUMP(" -- " FMT_BB "\n", preHeaderBlock->bbNum); - // defExec.Push(preHeaderBlock); - // existingPreHeaders = existingPreHeaders->next; - //} - //JITDUMP(" Considering hoisting in entry block:\n"); - //JITDUMP(" -- " FMT_BB "\n", pLoopDsc->lpEntry->bbNum); - //defExec.Push(pLoopDsc->lpEntry); - - //JITDUMP("\n"); - - optHoistLoopBlocks(lnum, &defExec, hoistCtxt); } @@ -6888,9 +6793,7 @@ void Compiler::optCopyLoopMemoryDependence(GenTree* fromTree, GenTree* toTree) // the loop, in the execution order, starting with the loop entry // block on top of the stack. // -void Compiler::optHoistLoopBlocks(unsigned loopNum, - ArrayStack* blocks, - LoopHoistContext* hoistContext) +void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext) { class HoistVisitor : public GenTreeVisitor { @@ -6920,15 +6823,11 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, } }; - ArrayStack m_valueStack; - bool m_beforeSideEffect; - unsigned m_loopNum; - LoopHoistContext* m_hoistContext; - BasicBlock* m_currentBlock; - BasicBlock* m_loopPreHeader; - //BasicBlockList* m_preHeadersList; - //BasicBlockList* m_firstPreHeader; - //ArrayStack m_preHeadersList; + ArrayStack m_valueStack; + bool m_beforeSideEffect; + unsigned m_loopNum; + LoopHoistContext* m_hoistContext; + BasicBlock* m_currentBlock; bool IsNodeHoistable(GenTree* node) { @@ -6969,31 +6868,6 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, return vnIsInvariant; } - ////------------------------------------------------------------------------ - //// AppendPreheader: Appends the preheaders in the BasicBlockList. The list - //// populated is FIFO meaning the BasicBlock added first is the first block - //// in the list. During hoisting, we would extract them all and populate - //// them in another accumulator BasicBlockList. There, we would add the blocks - //// in LIFO order. The reason being `optHoistLoopBlocks` expects them to be - //// present in execution order. - //// - //// Arguments: - //// preHeader -- preHeader to add - //// - //void AppendPreheader(BasicBlock* preHeader) - //{ - // if (m_preHeadersList == nullptr) - // { - // m_preHeadersList = new (m_compiler, CMK_LoopHoist) BasicBlockList(preHeader, nullptr); - // //m_firstPreHeader = m_preHeadersList; - // } - // else - // { - // m_preHeadersList->next = new (m_compiler, CMK_LoopHoist) BasicBlockList(preHeader, nullptr); - // m_preHeadersList = m_preHeadersList->next; - // } - //} - bool IsHoistingOverExcepSibling(GenTree* node, bool parentIsCommaTree, bool siblingHasExcep) { JITDUMP(" [%06u]", dspTreeID(node)); @@ -7074,19 +6948,9 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, , m_loopNum(loopNum) , m_hoistContext(hoistContext) , m_currentBlock(nullptr) - /*, m_visitedTraits(compiler->fgBBNumMax * 2, compiler) - , m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)) */ - //, m_preHeadersList(nullptr) - //, m_preHeadersList(compiler->getAllocatorLoopHoist()) - { } - /*BasicBlockList* GetNewPreHeaders() - { - return m_preHeadersList; - }*/ - void HoistBlock(BasicBlock* block) { m_currentBlock = block; @@ -7099,12 +6963,6 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, if (top.m_hoistable) { m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loopNum, m_hoistContext); - if ((m_compiler->optLoopTable[m_loopNum].lpFlags & LPFLG_HAS_PREHEAD) != 0) - { - //AppendPreheader(m_compiler->optLoopTable[m_loopNum].lpHead); - /*m_preHeadersList = new (m_compiler, CMK_LoopHoist) - BasicBlockList(m_compiler->optLoopTable[m_loopNum].lpHead, m_preHeadersList);*/ - } } else { @@ -7457,11 +7315,6 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, // Don't hoist this tree again. value.m_hoistable = false; value.m_invariant = false; - - //AppendPreheader(m_compiler->optLoopTable[m_loopNum].lpHead); - /*m_preHeadersList = new (m_compiler, CMK_LoopHoist) - BasicBlockList(m_compiler->optLoopTable[m_loopNum].lpHead, m_preHeadersList);*/ - } else if (value.Node() != tree) { @@ -7523,8 +7376,6 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, visitor.HoistBlock(block); } - - //return visitor.GetNewPreHeaders(); } void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnum, LoopHoistContext* hoistCtxt) @@ -7541,20 +7392,19 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu VNSet* vnSetHoistedForSibling = hoistCtxt->GetVnSetHoistedForSiblingLoop(); if ((vnSetHoistedForSibling != nullptr) && (vnSetHoistedForSibling->Lookup(tree->gtVNPair.GetLiberal()))) { - printf("Sibling loop -- " FMT_LP " : %s\n", lnum, this->info.compMethodName); + // already hoisted in a sibling loop, so don't hoist this expression. + JITDUMP(" [%06u] ... already hoisted " FMT_VN " VN in sibling loop\n", dspTreeID(tree), tree->gtVNPair.GetLiberal()); - - // already hoisted in a sibling loop, so don't hoist this expression. return; } if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) { - //printf("This loop -- " FMT_LP " : %s\n", lnum, this->info.compMethodName); + // already hoisted this expression in the current loop, so don't hoist this expression. + JITDUMP(" [%06u] ... already hoisted " FMT_VN " in " FMT_LP "\n ", dspTreeID(tree), tree->gtVNPair.GetLiberal(), lnum); - // already hoisted this expression in the current loop, so don't hoist this expression. return; } From 63c05e309f70b7afd6c5c891afd1bdb4998b7fb4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 29 Apr 2022 21:06:59 -0700 Subject: [PATCH 14/25] Revert "Minor fix to display IG01 weight correctly" This reverts commit 757120e863b2da188db2593da1b7142fd1ecf191. --- src/coreclr/jit/codegencommon.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index ed16b9a9fa498..327d04dcbaf6e 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1745,7 +1745,6 @@ void CodeGen::genGenerateMachineCode() { compiler->opts.disAsm = true; } - compiler->compCurBB = compiler->fgFirstBB; if (compiler->opts.disAsm) { From 11925c6045b67034402005707b42323f5158956f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 9 May 2022 12:04:08 -0700 Subject: [PATCH 15/25] simplify code --- src/coreclr/jit/optimizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index d48f3da3e2b85..fce4e89bfaa4e 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6868,7 +6868,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo return vnIsInvariant; } - bool IsHoistingOverExcepSibling(GenTree* node, bool parentIsCommaTree, bool siblingHasExcep) + bool IsHoistingOverExcepSibling(GenTree* node, bool siblingHasExcep) { JITDUMP(" [%06u]", dspTreeID(node)); @@ -6876,7 +6876,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo { // If the hoistable node has any side effects, make sure // we don't hoist it past a sibling that throws any exception. - if (parentIsCommaTree && siblingHasExcep) + if (siblingHasExcep) { JITDUMP(" not hoistable: cannot move past node that throws exception.\n"); return false; @@ -7307,7 +7307,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo { assert(value.Node() != tree); - if (IsHoistingOverExcepSibling(value.Node(), isCommaTree, hasExcep)) + if (!isCommaTree || IsHoistingOverExcepSibling(value.Node(), hasExcep)) { m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); } From ee1ed34671a9f8d2c55b1c8a6429c6543f7112c9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 1 Jun 2022 16:42:41 -0700 Subject: [PATCH 16/25] Remove m_hoistedVNInSiblingLoop --- src/coreclr/jit/compiler.h | 55 ++++------------------------------- src/coreclr/jit/optimizer.cpp | 28 ------------------ 2 files changed, 6 insertions(+), 77 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 907bfbb7460e1..61f9108ccea81 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5935,10 +5935,6 @@ class Compiler VNSet* m_pHoistedInCurLoop; public: - // Value numbers of expressions that have been hoisted in sibling loops in the loop nest - // at the same level. - ArrayStack m_hoistedVNInSiblingLoop; - // Value numbers of expressions that have been hoisted in the current (or most recent) loop in the nest. // Previous decisions on loop-invariance of value numbers in the current loop. VNSet m_curLoopVnInvariantCache; @@ -5953,67 +5949,28 @@ class Compiler return m_pHoistedInCurLoop; } - // Return the so far collected VNs in cache for current loop and reset it. - VNSet* ExtractHoistedInCurLoop() - { - VNSet* res = m_pHoistedInCurLoop; - m_pHoistedInCurLoop = nullptr; - return res; - } - - // Get the VN cache for recent sibling loop - VNSet* GetVnSetHoistedForSiblingLoop() - { - if (m_hoistedVNInSiblingLoop.Empty()) - { - return nullptr; - } - return m_hoistedVNInSiblingLoop.Top(); - } - - void PushVnSetForSiblingLoop(Compiler* comp) - { - VNSet* vnSet = new (comp->getAllocatorLoopHoist()) VNSet(comp->getAllocatorLoopHoist()); - m_hoistedVNInSiblingLoop.Push(vnSet); - } - - VNSet* PopVnSetForSiblingLoop() - { - return m_hoistedVNInSiblingLoop.Pop(); - } - LoopHoistContext(Compiler* comp) - : m_pHoistedInCurLoop(nullptr) - , m_hoistedVNInSiblingLoop(comp->getAllocatorLoopHoist()) - , m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist()) + : m_pHoistedInCurLoop(nullptr), m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist()) { } }; // Do hoisting of all loops nested within loop "lnum" (an index into the optLoopTable), followed // by the loop "lnum" itself. - // Tracks the expressions that have been hoisted by the sibling loops that are present at the same - // level by temporarily recording their value numbers in "m_hoistedVNInSiblingLoop". The expressions - // that are already hoisted won't be hoisted again and CSE will remove them anyways. // - // Both "m_pHoistedInCurLoop" and "m_hoistedVNInSiblingLoop" helps a lot in eliminating duplicate - // expressions getting hoisted and reducing the count of total expressions hoisted out of loop. - // When calculating the profitability, we compare this with number of registers and hence, lower - // the number of expressions getting hoisted, better chances that they will get enregistered and CSE - // considering them. + // Both "m_pHoistedInCurLoop" helps a lot in eliminating duplicate expressions getting hoisted + // and reducing the count of total expressions hoisted out of loop. When calculating the + // profitability, we compare this with number of registers and hence, lower the number of expressions + // getting hoisted, better chances that they will get enregistered and CSE considering them. // void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) - // Assumes that expressions have been hoisted in sibling loops if their value numbers are in - // "m_hoistedVNInSiblingLoop". - // // Returns the new preheaders created. void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) - // outside of that loop. Exempt expressions whose value number is in "m_hoistedVNInSiblingLoop"; - // add VN's of hoisted expressions to "hoistInLoop". + // outside of that loop. void optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext); // Return true if the tree looks profitable to hoist out of loop 'lnum'. diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index fce4e89bfaa4e..0387b05edc7e8 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6296,27 +6296,10 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) childLoops.Push(child); } - // Push the fresh cache to collect the VNs for all the child loops - hoistCtxt->PushVnSetForSiblingLoop(this); while (!childLoops.Empty()) { unsigned child = childLoops.Pop(); optHoistLoopNest(child, hoistCtxt); - VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); - - // Add the ones hoisted in "lnum" to "hoistedInSibling" for any sibling loops. - // TODO-Cleanup: we should have a set abstraction for loops. - if (hoistedInCurLoop != nullptr) - { - for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) - { -#ifdef DEBUG - bool b; - assert(!hoistCtxt->GetVnSetHoistedForSiblingLoop()->Lookup(keys.Get(), &b)); -#endif - hoistCtxt->GetVnSetHoistedForSiblingLoop()->Set(keys.Get(), true); - } - } if (optLoopTable[child].lpFlags & LPFLG_HAS_PREHEAD) { @@ -6344,7 +6327,6 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) } } } - hoistCtxt->PopVnSetForSiblingLoop(); } optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); @@ -7389,16 +7371,6 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } - VNSet* vnSetHoistedForSibling = hoistCtxt->GetVnSetHoistedForSiblingLoop(); - if ((vnSetHoistedForSibling != nullptr) && (vnSetHoistedForSibling->Lookup(tree->gtVNPair.GetLiberal()))) - { - // already hoisted in a sibling loop, so don't hoist this expression. - - JITDUMP(" [%06u] ... already hoisted " FMT_VN " VN in sibling loop\n", dspTreeID(tree), - tree->gtVNPair.GetLiberal()); - return; - } - if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) { // already hoisted this expression in the current loop, so don't hoist this expression. From c66bb7d807ba801191105e9330765122d6b4d182 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 2 Jun 2022 13:52:17 -0700 Subject: [PATCH 17/25] Add back ResetHoistedInCurLoop Fix igWeight --- src/coreclr/jit/codegencommon.cpp | 1 + src/coreclr/jit/compiler.h | 7 +++++++ src/coreclr/jit/optimizer.cpp | 2 ++ 3 files changed, 10 insertions(+) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 327d04dcbaf6e..ed16b9a9fa498 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1745,6 +1745,7 @@ void CodeGen::genGenerateMachineCode() { compiler->opts.disAsm = true; } + compiler->compCurBB = compiler->fgFirstBB; if (compiler->opts.disAsm) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 61f9108ccea81..a37962fe27d22 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5949,6 +5949,13 @@ class Compiler return m_pHoistedInCurLoop; } + // Return the so far collected VNs in cache for current loop and reset it. + void ResetHoistedInCurLoop() + { + m_pHoistedInCurLoop = nullptr; + JITDUMP("Resetting m_pHoistedInCurLoop\n"); + } + LoopHoistContext(Compiler* comp) : m_pHoistedInCurLoop(nullptr), m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist()) { diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 0387b05edc7e8..38eb0a7907ca9 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -7358,6 +7358,8 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo visitor.HoistBlock(block); } + + hoistContext->ResetHoistedInCurLoop(); } void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnum, LoopHoistContext* hoistCtxt) From 575cf22b864e102538978f1377eae09cac5eadef Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 8 Jun 2022 09:44:00 -0700 Subject: [PATCH 18/25] Remove reversal of loop processing order --- src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/optimizer.cpp | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index a42a72934033c..98619702deaf2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -4520,7 +4520,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // We generate `xorp* tgtReg, tgtReg` which is 3-5 bytes // but which can be elided by the instruction decoder. - costEx = 1; + costEx = 1; //2; costSz = 2; } else diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 38eb0a7907ca9..dd5c34a8363f3 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6287,18 +6287,9 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) BitVecTraits m_visitedTraits(fgBBNumMax * 2, this); BitVec m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)); - // Since loops in optLoopTable are arranged in reverse execution order, arrange - // them back in execution order before processing. - ArrayStack childLoops(getAllocatorLoopHoist()); for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { - childLoops.Push(child); - } - - while (!childLoops.Empty()) - { - unsigned child = childLoops.Pop(); optHoistLoopNest(child, hoistCtxt); if (optLoopTable[child].lpFlags & LPFLG_HAS_PREHEAD) From 523bdcd5aba57f031e41814be186bb8faf6589e5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 Jun 2022 07:15:12 -0700 Subject: [PATCH 19/25] jit format --- src/coreclr/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 98619702deaf2..20c6cc8ae0775 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -4520,7 +4520,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // We generate `xorp* tgtReg, tgtReg` which is 3-5 bytes // but which can be elided by the instruction decoder. - costEx = 1; //2; + costEx = 1; // 2; costSz = 2; } else From 544ee16b2138cd18786e44d58254cab0ac9a0793 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 Jun 2022 07:28:36 -0700 Subject: [PATCH 20/25] Experimental: Also generate PerfScore: --- src/coreclr/scripts/superpmi_diffs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/scripts/superpmi_diffs.py b/src/coreclr/scripts/superpmi_diffs.py index 47243ad43d3a2..b225cf1a09900 100644 --- a/src/coreclr/scripts/superpmi_diffs.py +++ b/src/coreclr/scripts/superpmi_diffs.py @@ -207,6 +207,8 @@ def main(main_args): os.path.join(script_dir, "superpmi.py"), "asmdiffs", "--no_progress", + "--metrics", "CodeSize", + "--metrics", "PerfScore", "-core_root", core_root_dir, "-target_os", platform_name, "-target_arch", arch_name, From 76524601f202bb37b0581ebcf87e5998f3919329 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 Jun 2022 19:46:24 -0700 Subject: [PATCH 21/25] review feedback --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/optimizer.cpp | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a37962fe27d22..569fd27c076e4 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5965,7 +5965,7 @@ class Compiler // Do hoisting of all loops nested within loop "lnum" (an index into the optLoopTable), followed // by the loop "lnum" itself. // - // Both "m_pHoistedInCurLoop" helps a lot in eliminating duplicate expressions getting hoisted + // "m_pHoistedInCurLoop" helps a lot in eliminating duplicate expressions getting hoisted // and reducing the count of total expressions hoisted out of loop. When calculating the // profitability, we compare this with number of registers and hence, lower the number of expressions // getting hoisted, better chances that they will get enregistered and CSE considering them. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 20c6cc8ae0775..a42a72934033c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -4520,7 +4520,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // We generate `xorp* tgtReg, tgtReg` which is 3-5 bytes // but which can be elided by the instruction decoder. - costEx = 1; // 2; + costEx = 1; costSz = 2; } else diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index dd5c34a8363f3..fde7c9c960af6 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6450,8 +6450,11 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, Basi // Push dominators, until we reach "entry" or exit the loop. // Also push the preheaders that were added for the nested loops, - // if any, along the way such that the final block list is arranged - // in execution order + // if any, along the way such that in the final list, the dominating + // blocks are visited before the dominated blocks. + // + // TODO-CQ: In future, we should create preheaders upfront before building + // dominators so we don't have to do this extra work here. BasicBlock* cur = pLoopDsc->lpExit; BasicBlockList* preHeadersList = existingPreHeaders; From a4c83784acfdc3b011a050c123e5b09ad2012543 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 Jun 2022 19:46:33 -0700 Subject: [PATCH 22/25] fix the superpmi script --- src/coreclr/scripts/superpmi_diffs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/scripts/superpmi_diffs.py b/src/coreclr/scripts/superpmi_diffs.py index b225cf1a09900..33050897d0b2e 100644 --- a/src/coreclr/scripts/superpmi_diffs.py +++ b/src/coreclr/scripts/superpmi_diffs.py @@ -207,8 +207,8 @@ def main(main_args): os.path.join(script_dir, "superpmi.py"), "asmdiffs", "--no_progress", - "--metrics", "CodeSize", - "--metrics", "PerfScore", + "-metrics", "CodeSize", + "-metrics", "PerfScore", "-core_root", core_root_dir, "-target_os", platform_name, "-target_arch", arch_name, From dba2e9fbfc4af98061edf2fffda7c4795b6072a3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 12 Jun 2022 14:15:11 -0700 Subject: [PATCH 23/25] Revert superpmi asmdiffs change --- src/coreclr/scripts/superpmi_diffs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/scripts/superpmi_diffs.py b/src/coreclr/scripts/superpmi_diffs.py index 33050897d0b2e..47243ad43d3a2 100644 --- a/src/coreclr/scripts/superpmi_diffs.py +++ b/src/coreclr/scripts/superpmi_diffs.py @@ -207,8 +207,6 @@ def main(main_args): os.path.join(script_dir, "superpmi.py"), "asmdiffs", "--no_progress", - "-metrics", "CodeSize", - "-metrics", "PerfScore", "-core_root", core_root_dir, "-target_os", platform_name, "-target_arch", arch_name, From 3c5f3b4678ba368d1eeea73857c03236a7dfd8fc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 13 Jun 2022 13:45:49 -0700 Subject: [PATCH 24/25] Rename method --- src/coreclr/jit/optimizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index fde7c9c960af6..c1cef19403bef 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6844,7 +6844,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo return vnIsInvariant; } - bool IsHoistingOverExcepSibling(GenTree* node, bool siblingHasExcep) + bool IsHoistableOverExcepSibling(GenTree* node, bool siblingHasExcep) { JITDUMP(" [%06u]", dspTreeID(node)); @@ -7283,7 +7283,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo { assert(value.Node() != tree); - if (!isCommaTree || IsHoistingOverExcepSibling(value.Node(), hasExcep)) + if (IsHoistableOverExcepSibling(value.Node(), hasExcep)) { m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); } From 7b0287dfe442986051214160320bea4f22e3975a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 13 Jun 2022 15:55:15 -0700 Subject: [PATCH 25/25] Add a comment --- src/coreclr/jit/optimizer.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index c1cef19403bef..4cf5db0a250d0 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -7272,6 +7272,12 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo // cctor dependent node is initially not hoistable and may become hoistable later, // when its parent comma node is visited. // + // TODO-CQ: Ideally, we should be hoisting all the nodes having side-effects in execution + // order as well as the ones that don't have side-effects at all. However, currently, we + // just restrict hoisting a node(s) (that are children of `comma`) if one of the siblings + // (which is executed before the given node) has side-effects (exceptions). "Descendants + // of ancestors might have side-effects and we might hoist nodes past them. This needs + // to be addressed properly. bool visitedCurr = false; bool isCommaTree = tree->OperIs(GT_COMMA); bool hasExcep = false;