diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 2e9f9ae524b88..be96c95750cc2 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5016,7 +5016,17 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) else { genConsumeAddress(addr); - emit->emitInsLoadInd(ins_Load(targetType), emitTypeSize(tree), tree->GetRegNum(), tree); + instruction loadIns = ins_Load(targetType); + if (tree->DontExtend()) + { + assert(varTypeIsSmall(tree)); + // The user of this IND does not need + // the upper bits to be set, so we don't need to use longer + // INS_movzx/INS_movsx and can use INS_mov instead. + // It usually happens when the real type is a small struct. + loadIns = INS_mov; + } + emit->emitInsLoadInd(loadIns, emitTypeSize(tree), tree->GetRegNum(), tree); } genProduceReg(tree); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 89f182703fb48..c8c7593bc9ed7 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -527,9 +527,16 @@ enum GenTreeFlags : unsigned int // alignment of 1 byte) GTF_IND_INVARIANT = 0x01000000, // GT_IND -- the target is invariant (a prejit indirection) GTF_IND_NONNULL = 0x00400000, // GT_IND -- the indirection never returns null (zero) +#if defined(TARGET_XARCH) + GTF_IND_DONT_EXTEND = 0x00200000, // GT_IND -- the indirection does not need to extend for small types +#endif // TARGET_XARCH GTF_IND_FLAGS = GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | GTF_IND_UNALIGNED | GTF_IND_INVARIANT | - GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP, + GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP +#if defined(TARGET_XARCH) + | GTF_IND_DONT_EXTEND +#endif // TARGET_XARCH + , GTF_ADDRMODE_NO_CSE = 0x80000000, // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex // addressing mode @@ -6640,6 +6647,23 @@ struct GenTreeIndir : public GenTreeOp return (gtFlags & GTF_IND_UNALIGNED) != 0; } +#if defined(TARGET_XARCH) + void SetDontExtend() + { + gtFlags |= GTF_IND_DONT_EXTEND; + } + + void ClearDontExtend() + { + gtFlags &= ~GTF_IND_DONT_EXTEND; + } + + bool DontExtend() const + { + return (gtFlags & GTF_IND_DONT_EXTEND) != 0; + } +#endif // TARGET_XARCH + #if DEBUGGABLE_GENTREE // Used only for GenTree::GetVtableForOper() GenTreeIndir() : GenTreeOp() diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 0c0667551b393..7c4dfa9c8bc18 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -2006,7 +2006,13 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR if (isDeadStore && fgTryRemoveDeadStoreLIR(node, lclVarNode, block)) { - lclVarNode->Data()->SetUnusedValue(); + GenTree* data = lclVarNode->Data(); + data->SetUnusedValue(); + + if (data->isIndir()) + { + Lowering::TransformUnusedIndirection(data->AsIndir(), this, block); + } } break; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 5b6188ab91031..b0e0fe8cc0bed 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3525,14 +3525,49 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore) convertToStoreObj = false; } } - else if (!src->OperIs(GT_LCL_VAR)) + else if (src->OperIs(GT_LCL_VAR)) { + convertToStoreObj = false; + } + else if (src->OperIs(GT_IND, GT_OBJ, GT_BLK, GT_LCL_FLD)) + { +#if !defined(TARGET_ARM64) + + if (src->TypeIs(TYP_STRUCT)) + { + src->ChangeType(lclRegType); + if (src->OperIs(GT_IND, GT_OBJ, GT_BLK)) + { + if (src->OperIs(GT_OBJ, GT_BLK)) + { + src->SetOper(GT_IND); + } + // This logic is skipped for struct indir in + // `Lowering::LowerIndir` because we don't know the size. + // Do it now. + GenTreeIndir* indir = src->AsIndir(); + LowerIndir(indir); +#if defined(TARGET_XARCH) + if (varTypeIsSmall(lclRegType)) + { + indir->SetDontExtend(); + } +#endif // TARGET_XARCH + } + } + convertToStoreObj = false; +#else // TARGET_ARM64 + // This optimization on arm64 allows more SIMD16 vars to be enregistered but it could cause + // regressions when there are many calls and before/after each one we have to store/save the upper + // half of these registers. So enable this for arm64 only when LSRA is taught not to allocate registers when + // it would have to spilled too many times. convertToStoreObj = true; +#endif // TARGET_ARM64 } else { - assert(src->OperIs(GT_LCL_VAR)); - convertToStoreObj = false; + assert(src->OperIsInitVal()); + convertToStoreObj = true; } if (convertToStoreObj) @@ -7221,6 +7256,7 @@ void Lowering::TransformUnusedIndirection(GenTreeIndir* ind, Compiler* comp, Bas bool useNullCheck = false; #else // TARGET_XARCH bool useNullCheck = !ind->Addr()->isContained(); + ind->ClearDontExtend(); #endif // !TARGET_XARCH if (useNullCheck && !ind->OperIs(GT_NULLCHECK)) @@ -7318,14 +7354,6 @@ bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode) return false; } - if (varTypeIsSmall(regType) && !src->IsConstInitVal() && !src->IsLocal()) - { - // source operand INDIR will use a widening instruction - // and generate worse code, like `movzx` instead of `mov` - // on x64. - return false; - } - JITDUMP("Replacing STORE_OBJ with STOREIND for [%06u]\n", blkNode->gtTreeID); blkNode->ChangeOper(GT_STOREIND); blkNode->ChangeType(regType); @@ -7348,6 +7376,14 @@ bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode) { assert(src->TypeIs(regType) || src->IsCnsIntOrI() || src->IsCall()); } + +#if defined(TARGET_XARCH) + if (varTypeIsSmall(regType) && src->OperIs(GT_IND)) + { + src->AsIndir()->SetDontExtend(); + } +#endif // TARGET_XARCH + LowerStoreIndirCommon(blkNode->AsStoreInd()); return true; }