From 6d1cab045da00bd65cfa3156235779d8cc5bcc49 Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Sun, 6 Nov 2022 23:02:20 +0300 Subject: [PATCH 1/6] Migrate [Get|With]Element to local morph TODO: don't use "ReplaceWith". --- src/coreclr/jit/lclmorph.cpp | 82 ++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index c1d57adf58208..d68603d1eb0f1 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -294,6 +294,8 @@ class LocalAddressVisitor final : public GenTreeVisitor None, Nop, BitCast, + GetElement, + WithElement, LclVar, LclFld }; @@ -909,13 +911,45 @@ class LocalAddressVisitor final : public GenTreeVisitor case IndirTransform::BitCast: indir->ChangeOper(GT_BITCAST); - indir->gtGetOp1()->ChangeOper(GT_LCL_VAR); - indir->gtGetOp1()->ChangeType(varDsc->TypeGet()); - indir->gtGetOp1()->AsLclVar()->SetLclNum(lclNum); - lclNode = indir->gtGetOp1()->AsLclVarCommon(); + lclNode = BashToLclVar(indir->gtGetOp1(), lclNum); break; +#ifdef FEATURE_HW_INTRINSICS + case IndirTransform::GetElement: + { + var_types elementType = indir->TypeGet(); + assert(elementType == TYP_FLOAT); + + lclNode = BashToLclVar(indir->gtGetOp1(), lclNum); + GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType)); + GenTree* hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, + CORINFO_TYPE_FLOAT, genTypeSize(varDsc), + /* isSimdAsHWIntrinsic */ false); + indir->ReplaceWith(hwiNode, m_compiler); + } + break; + + case IndirTransform::WithElement: + { + assert(user->OperIs(GT_ASG) && (user->gtGetOp1() == indir)); + var_types elementType = indir->TypeGet(); + assert(elementType == TYP_FLOAT); + + lclNode = BashToLclVar(indir, lclNum); + GenTree* simdLclNode = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet()); + GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType)); + GenTree* elementNode = user->gtGetOp2(); + user->AsOp()->gtOp2 = + m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode, elementNode, + CORINFO_TYPE_FLOAT, genTypeSize(varDsc), + /* isSimdAsHWIntrinsic */ false); + user->ChangeType(varDsc->TypeGet()); + } + break; +#endif // FEATURE_HW_INTRINSICS + case IndirTransform::LclVar: + // TODO-ADDR: use "BashToLclVar" here. if (indir->TypeGet() != varDsc->TypeGet()) { assert(genTypeSize(indir) == genTypeSize(varDsc)); // BOOL <-> UBYTE. @@ -996,14 +1030,6 @@ class LocalAddressVisitor final : public GenTreeVisitor return IndirTransform::LclVar; } - if (varTypeIsSIMD(varDsc)) - { - // TODO-ADDR: skip SIMD variables for now, fgMorphFieldAssignToSimdSetElement and - // fgMorphFieldToSimdGetElement need to be updated to recognize LCL_FLDs or moved - // here. - return IndirTransform::None; - } - // Bool and ubyte are the same type. if ((indir->TypeIs(TYP_BOOL) && (varDsc->TypeGet() == TYP_UBYTE)) || (indir->TypeIs(TYP_UBYTE) && (varDsc->TypeGet() == TYP_BOOL))) @@ -1011,9 +1037,10 @@ class LocalAddressVisitor final : public GenTreeVisitor return IndirTransform::LclVar; } + bool isDef = user->OperIs(GT_ASG) && (user->gtGetOp1() == indir); + // For small locals on the LHS we can ignore the signed/unsigned diff. - if (user->OperIs(GT_ASG) && (user->gtGetOp1() == indir) && - (varTypeToSigned(indir) == varTypeToSigned(varDsc))) + if (isDef && (varTypeToSigned(indir) == varTypeToSigned(varDsc))) { assert(varTypeIsSmall(indir)); return IndirTransform::LclVar; @@ -1024,6 +1051,12 @@ class LocalAddressVisitor final : public GenTreeVisitor return IndirTransform::LclFld; } + if (varTypeIsSIMD(varDsc) && indir->TypeIs(TYP_FLOAT) && ((val.Offset() % genTypeSize(TYP_FLOAT)) == 0) && + m_compiler->IsBaselineSimdIsaSupported()) + { + return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; + } + // Turn this into a bitcast if we can. if ((genTypeSize(indir) == genTypeSize(varDsc)) && (varTypeIsFloating(indir) || varTypeIsFloating(varDsc))) { @@ -1298,6 +1331,27 @@ class LocalAddressVisitor final : public GenTreeVisitor { return (user == nullptr) || (user->OperIs(GT_COMMA) && (user->AsOp()->gtGetOp1() == node)); } + + //------------------------------------------------------------------------ + // BashToLclVar: Bash node to a LCL_VAR. + // + // Arguments: + // node - the node to bash + // lclNum - the local's number + // + // Return Value: + // The bashed node. + // + GenTreeLclVar* BashToLclVar(GenTree* node, unsigned lclNum) + { + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + + node->ChangeOper(GT_LCL_VAR); + node->ChangeType(varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : genActualType(varDsc)); + node->AsLclVar()->SetLclNum(lclNum); + + return node->AsLclVar(); + } }; //------------------------------------------------------------------------ From 19072fd007f87a152333671b45b3669ef99eeb81 Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Sun, 6 Nov 2022 23:03:26 +0300 Subject: [PATCH 2/6] Delete code from morph --- src/coreclr/jit/compiler.h | 2 - src/coreclr/jit/morph.cpp | 167 ------------------------------------- 2 files changed, 169 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 193399040296a..0ad68c9173e67 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5696,8 +5696,6 @@ class Compiler unsigned* indexOut, unsigned* simdSizeOut, bool ignoreUsedInSIMDIntrinsic = false); - GenTree* fgMorphFieldAssignToSimdSetElement(GenTree* tree); - GenTree* fgMorphFieldToSimdGetElement(GenTree* tree); bool fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt); void impMarkContiguousSIMDFieldAssignments(Statement* stmt); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 34c15bb737b2b..ae262eb92e6d1 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -4984,30 +4984,6 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac) noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) || ((tree->gtFlags & GTF_GLOB_REF) != 0)); -#ifdef FEATURE_SIMD - // if this field belongs to simd struct, translate it to simd intrinsic. - if (mac == nullptr) - { - if (IsBaselineSimdIsaSupported()) - { - GenTree* newTree = fgMorphFieldToSimdGetElement(tree); - if (newTree != tree) - { - newTree = fgMorphTree(newTree); - return newTree; - } - } - } - else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1())) - { - GenTreeLclVarCommon* lcl = objRef->IsLocalAddrExpr(); - if (lcl != nullptr) - { - lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField)); - } - } -#endif - // Create a default MorphAddrContext early so it doesn't go out of scope // before it is used. MorphAddrContext defMAC(MACK_Ind); @@ -9255,129 +9231,6 @@ GenTree* Compiler::getSIMDStructFromField(GenTree* tree, return nullptr; } - -/***************************************************************************** -* If a read operation tries to access simd struct field, then transform the operation -* to the SimdGetElementNode, and return the new tree. Otherwise, return the old tree. -* Argument: -* tree - GenTree*. If this pointer points to simd struct which is used for simd -* intrinsic, we will morph it as simd intrinsic NI_Vector128_GetElement. -* Return: -* A GenTree* which points to the new tree. If the tree is not for simd intrinsic, -* return nullptr. -*/ - -GenTree* Compiler::fgMorphFieldToSimdGetElement(GenTree* tree) -{ - unsigned index = 0; - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - unsigned simdSize = 0; - GenTree* simdStructNode = getSIMDStructFromField(tree, &simdBaseJitType, &index, &simdSize); - - if (simdStructNode != nullptr) - { - var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); - GenTree* op2 = gtNewIconNode(index, TYP_INT); - - assert(simdSize <= 32); - assert(simdSize >= ((index + 1) * genTypeSize(simdBaseType))); - -#if defined(TARGET_XARCH) - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - case TYP_INT: - case TYP_UINT: - case TYP_LONG: - case TYP_ULONG: - { - if (!compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - return tree; - } - break; - } - - case TYP_DOUBLE: - case TYP_FLOAT: - case TYP_SHORT: - case TYP_USHORT: - { - if (!compOpportunisticallyDependsOn(InstructionSet_SSE2)) - { - return tree; - } - break; - } - - default: - { - unreached(); - } - } -#elif defined(TARGET_ARM64) - if (!compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) - { - return tree; - } -#endif // !TARGET_XARCH && !TARGET_ARM64 - - tree = gtNewSimdGetElementNode(simdBaseType, simdStructNode, op2, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ true); - } - return tree; -} - -/***************************************************************************** -* Transform an assignment of a SIMD struct field to SimdWithElementNode, and -* return a new tree. If it is not such an assignment, then return the old tree. -* Argument: -* tree - GenTree*. If this pointer points to simd struct which is used for simd -* intrinsic, we will morph it as simd intrinsic set. -* Return: -* A GenTree* which points to the new tree. If the tree is not for simd intrinsic, -* return nullptr. -*/ - -GenTree* Compiler::fgMorphFieldAssignToSimdSetElement(GenTree* tree) -{ - assert(tree->OperGet() == GT_ASG); - - unsigned index = 0; - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - unsigned simdSize = 0; - GenTree* simdStructNode = getSIMDStructFromField(tree->gtGetOp1(), &simdBaseJitType, &index, &simdSize); - - if (simdStructNode != nullptr) - { - var_types simdType = simdStructNode->gtType; - var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); - - assert(simdSize <= 32); - assert(simdSize >= ((index + 1) * genTypeSize(simdBaseType))); - - GenTree* op2 = gtNewIconNode(index, TYP_INT); - GenTree* op3 = tree->gtGetOp2(); - NamedIntrinsic intrinsicId = NI_Vector128_WithElement; - - GenTree* target = gtClone(simdStructNode); - assert(target != nullptr); - - GenTree* simdTree = gtNewSimdWithElementNode(simdType, simdStructNode, op2, op3, simdBaseJitType, simdSize, - /* isSimdAsHWIntrinsic */ true); - - tree->AsOp()->gtOp1 = target; - tree->AsOp()->gtOp2 = simdTree; - -#ifdef DEBUG - tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif - } - - return tree; -} - #endif // FEATURE_SIMD //------------------------------------------------------------------------------ @@ -9591,26 +9444,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA noway_assert(op1 == tree->AsOp()->gtOp1); op2 = tree->AsOp()->gtOp2; -#ifdef FEATURE_SIMD - if (IsBaselineSimdIsaSupported()) - { - // We should check whether op2 should be assigned to a SIMD field or not. - // If it is, we should translate the tree to simd intrinsic. - assert(!fgGlobalMorph || ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0)); - GenTree* newTree = fgMorphFieldAssignToSimdSetElement(tree); - typ = tree->TypeGet(); - op1 = tree->gtGetOp1(); - op2 = tree->gtGetOp2(); -#ifdef DEBUG - assert((tree == newTree) && (tree->OperGet() == oper)); - if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0) - { - tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED; - } -#endif // DEBUG - } -#endif - // We can't CSE the LHS of an assignment. Only r-values can be CSEed. // Previously, the "lhs" (addr) of a block op was CSE'd. So, to duplicate the former // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type) From ff2849fc0e9b037102e52dca28f53c9d3010e53c Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Mon, 7 Nov 2022 00:25:25 +0300 Subject: [PATCH 3/6] Move fgMorphCombineSIMDFieldAssignments to local morph --- src/coreclr/jit/lclmorph.cpp | 152 +++++++++++++++++++++++++---- src/coreclr/jit/morph.cpp | 182 ----------------------------------- src/coreclr/jit/simd.cpp | 6 +- 3 files changed, 136 insertions(+), 204 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index d68603d1eb0f1..c90059aaefc56 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1172,16 +1172,23 @@ class LocalAddressVisitor final : public GenTreeVisitor // the promoted local would look like "{ int a, B }", while the IR would contain "FIELD" // nodes for the outer struct "A". // - if (indir->TypeIs(TYP_STRUCT)) + // TODO-1stClassStructs: delete this once "IND" nodes are no more. + if (indir->OperIs(GT_IND) && indir->TypeIs(TYP_STRUCT)) { - // TODO-1stClassStructs: delete this once "IND" nodes are no more. - if (indir->OperIs(GT_IND)) - { - // We do not have a layout for this node. - return; - } + return; + } + + ClassLayout* layout = indir->TypeIs(TYP_STRUCT) ? indir->GetLayout(m_compiler) : nullptr; + unsigned indSize = indir->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(indir); + if (indSize > genTypeSize(fieldType)) + { + // Retargeting this indirection to reference the promoted field would make it + // "wide", address-exposing the whole parent struct (with all of its fields). + return; + } - ClassLayout* layout = indir->GetLayout(m_compiler); + if (indir->TypeIs(TYP_STRUCT)) + { indir->SetOper(GT_OBJ); indir->AsBlk()->SetLayout(layout); indir->AsBlk()->gtBlkOpKind = GenTreeBlk::BlkOpKindInvalid; @@ -1368,6 +1375,7 @@ class LocalAddressVisitor final : public GenTreeVisitor // PhaseStatus Compiler::fgMarkAddressExposedLocals() { + bool madeChanges = false; LocalAddressVisitor visitor(this); for (BasicBlock* const block : Blocks()) @@ -1377,27 +1385,129 @@ PhaseStatus Compiler::fgMarkAddressExposedLocals() for (Statement* const stmt : block->Statements()) { +#ifdef FEATURE_SIMD + if (opts.OptimizationEnabled() && stmt->GetRootNode()->TypeIs(TYP_FLOAT) && + stmt->GetRootNode()->OperIs(GT_ASG)) + { + madeChanges |= fgMorphCombineSIMDFieldAssignments(block, stmt); + } +#endif + visitor.VisitStmt(stmt); } } - return visitor.MadeChanges() ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; + madeChanges |= visitor.MadeChanges(); + + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } -//------------------------------------------------------------------------ -// fgMarkAddressExposedLocals: Traverses the specified statement and marks address -// exposed locals. +#ifdef FEATURE_SIMD +//----------------------------------------------------------------------------------- +// fgMorphCombineSIMDFieldAssignments: +// If the RHS of the input stmt is a read for simd vector X Field, then this +// function will keep reading next few stmts based on the vector size(2, 3, 4). +// If the next stmts LHS are located contiguous and RHS are also located +// contiguous, then we replace those statements with one store. // -// Arguments: -// stmt - the statement to traverse +// Argument: +// block - BasicBlock*. block which stmt belongs to +// stmt - Statement*. the stmt node we want to check // -// Notes: -// Trees such as IND(ADDR(LCL_VAR)), that morph is expected to fold -// to just LCL_VAR, do not result in the involved local being marked -// address exposed. +// Return Value: +// Whether the assignments were successfully coalesced. // -void Compiler::fgMarkAddressExposedLocals(Statement* stmt) +bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt) { - LocalAddressVisitor visitor(this); - visitor.VisitStmt(stmt); + GenTree* tree = stmt->GetRootNode(); + assert(tree->OperGet() == GT_ASG); + + GenTree* originalLHS = tree->AsOp()->gtOp1; + GenTree* prevLHS = tree->AsOp()->gtOp1; + GenTree* prevRHS = tree->AsOp()->gtOp2; + unsigned index = 0; + CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + unsigned simdSize = 0; + GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &simdBaseJitType, &index, &simdSize, true); + + if ((simdStructNode == nullptr) || (index != 0) || (simdBaseJitType != CORINFO_TYPE_FLOAT)) + { + // if the RHS is not from a SIMD vector field X, then there is no need to check further. + return false; + } + + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); + var_types simdType = getSIMDTypeForSize(simdSize); + int assignmentsCount = simdSize / genTypeSize(simdBaseType) - 1; + int remainingAssignments = assignmentsCount; + Statement* curStmt = stmt->GetNextStmt(); + Statement* lastStmt = stmt; + + while (curStmt != nullptr && remainingAssignments > 0) + { + GenTree* exp = curStmt->GetRootNode(); + if (exp->OperGet() != GT_ASG) + { + break; + } + GenTree* curLHS = exp->gtGetOp1(); + GenTree* curRHS = exp->gtGetOp2(); + + if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS)) + { + break; + } + + remainingAssignments--; + prevLHS = curLHS; + prevRHS = curRHS; + + lastStmt = curStmt; + curStmt = curStmt->GetNextStmt(); + } + + if (remainingAssignments > 0) + { + // if the left assignments number is bigger than zero, then this means + // that the assignments are not assigning to the contiguously memory + // locations from same vector. + return false; + } + + JITDUMP("\nFound contiguous assignments from a SIMD vector to memory.\n"); + JITDUMP("From " FMT_BB ", " FMT_STMT " to " FMT_STMT "\n", block->bbNum, stmt->GetID(), lastStmt->GetID()); + + for (int i = 0; i < assignmentsCount; i++) + { + fgRemoveStmt(block, stmt->GetNextStmt()); + } + + GenTree* dstNode; + + if (originalLHS->OperIs(GT_LCL_FLD)) + { + dstNode = originalLHS; + dstNode->gtType = simdType; + } + else + { + GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); + dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst); + } + + JITDUMP("\n" FMT_BB " " FMT_STMT " (before):\n", block->bbNum, stmt->GetID()); + DISPSTMT(stmt); + + assert(!simdStructNode->CanCSE() && varTypeIsSIMD(simdStructNode)); + simdStructNode->ClearDoNotCSE(); + + tree = gtNewAssignNode(dstNode, simdStructNode); + + stmt->SetRootNode(tree); + + JITDUMP("\nReplaced " FMT_BB " " FMT_STMT " (after):\n", block->bbNum, stmt->GetID()); + DISPSTMT(stmt); + + return true; } +#endif // FEATURE_SIMD diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index ae262eb92e6d1..3c0f55cbea733 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14485,13 +14485,6 @@ void Compiler::fgMorphStmts(BasicBlock* block) fgRemoveStmt(block, stmt); continue; } -#ifdef FEATURE_SIMD - if (opts.OptimizationEnabled() && stmt->GetRootNode()->TypeGet() == TYP_FLOAT && - stmt->GetRootNode()->OperGet() == GT_ASG) - { - fgMorphCombineSIMDFieldAssignments(block, stmt); - } -#endif fgMorphStmt = stmt; compCurStmt = stmt; @@ -16103,181 +16096,6 @@ void Compiler::fgMarkDemotedImplicitByRefArgs() #endif // FEATURE_IMPLICIT_BYREFS } -#ifdef FEATURE_SIMD - -//----------------------------------------------------------------------------------- -// fgMorphCombineSIMDFieldAssignments: -// If the RHS of the input stmt is a read for simd vector X Field, then this function -// will keep reading next few stmts based on the vector size(2, 3, 4). -// If the next stmts LHS are located contiguous and RHS are also located -// contiguous, then we replace those statements with a copyblk. -// -// Argument: -// block - BasicBlock*. block which stmt belongs to -// stmt - Statement*. the stmt node we want to check -// -// return value: -// if this function successfully optimized the stmts, then return true. Otherwise -// return false; - -bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt) -{ - GenTree* tree = stmt->GetRootNode(); - assert(tree->OperGet() == GT_ASG); - - GenTree* originalLHS = tree->AsOp()->gtOp1; - GenTree* prevLHS = tree->AsOp()->gtOp1; - GenTree* prevRHS = tree->AsOp()->gtOp2; - unsigned index = 0; - CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - unsigned simdSize = 0; - GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &simdBaseJitType, &index, &simdSize, true); - - if (simdStructNode == nullptr || index != 0 || simdBaseJitType != CORINFO_TYPE_FLOAT) - { - // if the RHS is not from a SIMD vector field X, then there is no need to check further. - return false; - } - - var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); - var_types simdType = getSIMDTypeForSize(simdSize); - int assignmentsCount = simdSize / genTypeSize(simdBaseType) - 1; - int remainingAssignments = assignmentsCount; - Statement* curStmt = stmt->GetNextStmt(); - Statement* lastStmt = stmt; - - while (curStmt != nullptr && remainingAssignments > 0) - { - GenTree* exp = curStmt->GetRootNode(); - if (exp->OperGet() != GT_ASG) - { - break; - } - GenTree* curLHS = exp->gtGetOp1(); - GenTree* curRHS = exp->gtGetOp2(); - - if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS)) - { - break; - } - - remainingAssignments--; - prevLHS = curLHS; - prevRHS = curRHS; - - lastStmt = curStmt; - curStmt = curStmt->GetNextStmt(); - } - - if (remainingAssignments > 0) - { - // if the left assignments number is bigger than zero, then this means - // that the assignments are not assigning to the contiguously memory - // locations from same vector. - return false; - } -#ifdef DEBUG - if (verbose) - { - printf("\nFound contiguous assignments from a SIMD vector to memory.\n"); - printf("From " FMT_BB ", stmt ", block->bbNum); - printStmtID(stmt); - printf(" to stmt"); - printStmtID(lastStmt); - printf("\n"); - } -#endif - - for (int i = 0; i < assignmentsCount; i++) - { - fgRemoveStmt(block, stmt->GetNextStmt()); - } - - GenTree* dstNode; - - if (originalLHS->OperIs(GT_LCL_FLD)) - { - dstNode = originalLHS; - dstNode->gtType = simdType; - dstNode->AsLclFld()->SetLayout(nullptr); - - // This may have changed a partial local field into full local field - if (dstNode->IsPartialLclFld(this)) - { - dstNode->gtFlags |= GTF_VAR_USEASG; - } - else - { - dstNode->gtFlags &= ~GTF_VAR_USEASG; - } - } - else - { - GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize); - if (simdStructNode->OperIsLocal()) - { - setLclRelatedToSIMDIntrinsic(simdStructNode); - } - - GenTreeLclVarCommon* localDst = copyBlkDst->IsLocalAddrExpr(); - if (localDst != nullptr) - { - setLclRelatedToSIMDIntrinsic(localDst); - } - - if (simdStructNode->TypeGet() == TYP_BYREF) - { - assert(simdStructNode->OperIsLocal()); - assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->GetLclNum())); - simdStructNode = gtNewIndir(simdType, simdStructNode); - } - else - { - assert(varTypeIsSIMD(simdStructNode)); - } - - dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst); - } - -#ifdef DEBUG - if (verbose) - { - printf("\n" FMT_BB " stmt ", block->bbNum); - printStmtID(stmt); - printf("(before)\n"); - gtDispStmt(stmt); - } -#endif - - assert(!simdStructNode->CanCSE()); - simdStructNode->ClearDoNotCSE(); - - tree = gtNewAssignNode(dstNode, simdStructNode); - - stmt->SetRootNode(tree); - - // Since we generated a new address node which didn't exist before, - // we should expose this address manually here. - // TODO-ADDR: Remove this when LocalAddressVisitor transforms all - // local field access into LCL_FLDs, at that point we would be - // combining 2 existing LCL_FLDs or 2 FIELDs that do not reference - // a local and thus cannot result in a new address exposed local. - fgMarkAddressExposedLocals(stmt); - -#ifdef DEBUG - if (verbose) - { - printf("\nReplaced " FMT_BB " stmt", block->bbNum); - printStmtID(stmt); - printf("(after)\n"); - gtDispStmt(stmt); - } -#endif - return true; -} - -#endif // FEATURE_SIMD - //------------------------------------------------------------------------ // fgCheckStmtAfterTailCall: check that statements after the tail call stmt // candidate are in one of expected forms, that are desctibed below. diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 8ed34b43d739e..999e6948a304d 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -1748,7 +1748,11 @@ GenTree* Compiler::createAddressNodeForSIMDInit(GenTree* tree, unsigned simdSize byrefNode = gtNewOperNode(GT_COMMA, arrayRef->TypeGet(), arrBndsChk, gtCloneExpr(arrayRef)); } - GenTree* address = gtNewOperNode(GT_ADD, TYP_BYREF, byrefNode, gtNewIconNode(offset, TYP_I_IMPL)); + GenTree* address = byrefNode; + if (offset != 0) + { + address = gtNewOperNode(GT_ADD, TYP_BYREF, address, gtNewIconNode(offset, TYP_I_IMPL)); + } return address; } From bb7eaadc1e34aa8d6d2c7a9220cf5cf2a5e98f6d Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Thu, 24 Nov 2022 19:43:09 +0300 Subject: [PATCH 4/6] Save uses in Values instead of nodes --- src/coreclr/jit/lclmorph.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index c90059aaefc56..bcc27ac1e5c3f 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -34,16 +34,16 @@ class LocalAddressVisitor final : public GenTreeVisitor // class Value { - GenTree* m_node; - unsigned m_lclNum; - unsigned m_offset; - bool m_address; - INDEBUG(bool m_consumed;) + GenTree** m_use; + unsigned m_lclNum; + unsigned m_offset; + bool m_address; + INDEBUG(bool m_consumed); public: // Produce an unknown value associated with the specified node. - Value(GenTree* node) - : m_node(node) + Value(GenTree** use) + : m_use(use) , m_lclNum(BAD_VAR_NUM) , m_offset(0) , m_address(false) @@ -53,10 +53,16 @@ class LocalAddressVisitor final : public GenTreeVisitor { } + // Get the use for the node that produced this value. + GenTree** Use() const + { + return m_use; + } + // Get the node that produced this value. GenTree* Node() const { - return m_node; + return *m_use; } // Does this value represent a location? @@ -420,7 +426,7 @@ class LocalAddressVisitor final : public GenTreeVisitor } } - PushValue(node); + PushValue(use); return Compiler::WALK_CONTINUE; } @@ -559,9 +565,9 @@ class LocalAddressVisitor final : public GenTreeVisitor } private: - void PushValue(GenTree* node) + void PushValue(GenTree** use) { - m_valueStack.Push(node); + m_valueStack.Push(use); } Value& TopValue(unsigned index) From ed7dbfd0c097cfefab7c84d4ae19fd4ab527338b Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Thu, 24 Nov 2022 19:47:56 +0300 Subject: [PATCH 5/6] Remove ReplaceWith --- src/coreclr/jit/lclmorph.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index bcc27ac1e5c3f..8c95da35b4017 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -931,7 +931,8 @@ class LocalAddressVisitor final : public GenTreeVisitor GenTree* hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode, CORINFO_TYPE_FLOAT, genTypeSize(varDsc), /* isSimdAsHWIntrinsic */ false); - indir->ReplaceWith(hwiNode, m_compiler); + indir = hwiNode; + *val.Use() = hwiNode; } break; From 8f774625c48133d1fc986e9afb571db70a02569e Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Thu, 24 Nov 2022 19:50:35 +0300 Subject: [PATCH 6/6] Add ifdefs for clarity and to avoid regressing ARM --- src/coreclr/jit/lclmorph.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 8c95da35b4017..dc3c669c17753 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -300,8 +300,10 @@ class LocalAddressVisitor final : public GenTreeVisitor None, Nop, BitCast, +#ifdef FEATURE_HW_INTRINSICS GetElement, WithElement, +#endif // FEATURE_HW_INTRINSICS LclVar, LclFld }; @@ -1058,11 +1060,13 @@ class LocalAddressVisitor final : public GenTreeVisitor return IndirTransform::LclFld; } +#ifdef FEATURE_HW_INTRINSICS if (varTypeIsSIMD(varDsc) && indir->TypeIs(TYP_FLOAT) && ((val.Offset() % genTypeSize(TYP_FLOAT)) == 0) && m_compiler->IsBaselineSimdIsaSupported()) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; } +#endif // FEATURE_HW_INTRINSICS // Turn this into a bitcast if we can. if ((genTypeSize(indir) == genTypeSize(varDsc)) && (varTypeIsFloating(indir) || varTypeIsFloating(varDsc)))