From 3b90a52455b3cc1ad0de64174d393c9ad4789396 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 25 Feb 2021 16:02:55 -0800 Subject: [PATCH 1/3] [Windows/Linux arm4] Fix issue where resolution move would overwrite JCMP We were overwritting a register with resolution move which was the source of the operand used for JCMP. Special case it to also check if that is the case and filter out such registers from getting resolution done. Fixes following failures: https://helixre8s23ayyeko0k025g8.blob.core.windows.net/dotnet-runtime-refs-pull-47307-head-929ed24b5ccf4bf2b0/System.Linq.Parallel.Tests/console.89614fbb.log?sv=2019-07-07&se=2021-03-15T02%3A39%3A50Z&sr=c&sp=rl&sig=mHbQ6BCU%2BLYjzTmsd9DRDTMHtSP5LsCDsZpwjhqvgn4%3D https://helixre8s23ayyeko0k025g8.blob.core.windows.net/dotnet-runtime-refs-pull-47307-head-929ed24b5ccf4bf2b0/System.Threading.Channels.Tests/console.64cd2de3.log?sv=2019-07-07&se=2021-03-15T02%3A39%3A50Z&sr=c&sp=rl&sig=mHbQ6BCU%2BLYjzTmsd9DRDTMHtSP5LsCDsZpwjhqvgn4%3D --- src/coreclr/jit/lsra.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 996dcac0dd9f9..6a1b27b282779 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -7656,7 +7656,7 @@ void LinearScan::resolveRegisters() printf("Resolution Candidates: "); dumpConvertedVarSet(compiler, resolutionCandidateVars); printf("\n"); - printf("Has %sCritical Edges\n\n", hasCriticalEdges ? "" : "No"); + printf("Has %sCritical Edges\n\n", hasCriticalEdges ? "" : "No "); printf("Prior to Resolution\n"); foreach_block(compiler, block) @@ -8300,8 +8300,16 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } #ifdef TARGET_ARM64 - // Next, if this blocks ends with a JCMP, we have to make sure not to copy - // into the register that it uses or modify the local variable it must consume + // Next, if this blocks ends with a JCMP, we have to make sure: + // 1. Not to copy into the register that JCMP uses + // e.g. JCMP w21, BRANCH + // 2. Not to copy into the source of JCMP's operand before it is consumed + // e.g. Should not use w0 since it will contain wrong value after resolution + // call METHOD + // ; mov w0, w19 <-- should not resolve in w0 here. + // mov w21, w0 + // JCMP w21, BRANCH + // 3. Modify the local variable it must consume LclVarDsc* jcmpLocalVarDsc = nullptr; if (block->bbJumpKind == BBJ_COND) { @@ -8312,6 +8320,12 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) GenTree* op1 = lastNode->gtGetOp1(); switchRegs |= genRegMask(op1->GetRegNum()); + if (op1->OperIs(GT_COPY)) + { + GenTree* srcOp1 = op1->gtGetOp1(); + switchRegs |= genRegMask(srcOp1->GetRegNum()); + } + if (op1->IsLocal()) { GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); From 82f2484f5a1cf3bb726ca49c268bbe8b372373d0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 2 Mar 2021 14:46:44 -0800 Subject: [PATCH 2/3] rename variable and add comments --- src/coreclr/jit/lsra.cpp | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 6a1b27b282779..dbd9aea915882 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -8277,9 +8277,12 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } } - // Next, if this blocks ends with a switch table, we have to make sure not to copy - // into the registers that it uses. - regMaskTP switchRegs = RBM_NONE; + // Next, if this blocks ends with a switch table, or for Arm64, ends with JCMP instruction, + // make sure to not copy into the registers that are consumed at the end of this block. + // + // Note: Only switches and JCMP (for Arm4) have input regs (and so can be fed by copies), so those + // are the only block-ending branches that need special handling. + regMaskTP consumedRegs = RBM_NONE; if (block->bbJumpKind == BBJ_SWITCH) { // At this point, Lowering has transformed any non-switch-table blocks into @@ -8287,7 +8290,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) GenTree* switchTable = LIR::AsRange(block).LastNode(); assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE); - switchRegs = switchTable->gtRsvdRegs; + consumedRegs = switchTable->gtRsvdRegs; GenTree* op1 = switchTable->gtGetOp1(); GenTree* op2 = switchTable->gtGetOp2(); noway_assert(op1 != nullptr && op2 != nullptr); @@ -8295,8 +8298,8 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // No floating point values, so no need to worry about the register type // (i.e. for ARM32, where we used the genRegMask overload with a type). assert(varTypeIsIntegralOrI(op1) && varTypeIsIntegralOrI(op2)); - switchRegs |= genRegMask(op1->GetRegNum()); - switchRegs |= genRegMask(op2->GetRegNum()); + consumedRegs |= genRegMask(op1->GetRegNum()); + consumedRegs |= genRegMask(op2->GetRegNum()); } #ifdef TARGET_ARM64 @@ -8309,7 +8312,11 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // ; mov w0, w19 <-- should not resolve in w0 here. // mov w21, w0 // JCMP w21, BRANCH - // 3. Modify the local variable it must consume + // 3. Not to modify the local variable it must consume + + // Note: GT_COPY has special handling in codegen and its generation is merged with the + // node that consumes its result. So both, the input and output regs of GT_COPY must be + // excluded from the set available for resolution. LclVarDsc* jcmpLocalVarDsc = nullptr; if (block->bbJumpKind == BBJ_COND) { @@ -8318,12 +8325,12 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) if (lastNode->OperIs(GT_JCMP)) { GenTree* op1 = lastNode->gtGetOp1(); - switchRegs |= genRegMask(op1->GetRegNum()); + consumedRegs |= genRegMask(op1->GetRegNum()); if (op1->OperIs(GT_COPY)) { GenTree* srcOp1 = op1->gtGetOp1(); - switchRegs |= genRegMask(srcOp1->GetRegNum()); + consumedRegs |= genRegMask(srcOp1->GetRegNum()); } if (op1->IsLocal()) @@ -8402,9 +8409,10 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) { sameToReg = REG_NA; } - // If this register is used by a switch table at the end of the block, we can't do the copy - // in this block (since we can't insert it after the switch). - if ((sameToRegMask & switchRegs) != RBM_NONE) + // If this register is busy because it is used by a switch table at the end of the block + // (or for Arm64, it is consumed by JCMP), we can't do the copy in this block since we can't insert it after the switch + // (or for Arm64, can't insert and overwrite the operand/source of operand of JCMP). + if ((sameToRegMask & consumedRegs) != RBM_NONE) { sameToReg = REG_NA; } From 5ca44b3eda8415116fba928c72f5a3cca7189a5d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 2 Mar 2021 19:37:46 -0800 Subject: [PATCH 3/3] jit format --- src/coreclr/jit/lsra.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index dbd9aea915882..bf0cb968b4283 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -8410,8 +8410,9 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) sameToReg = REG_NA; } // If this register is busy because it is used by a switch table at the end of the block - // (or for Arm64, it is consumed by JCMP), we can't do the copy in this block since we can't insert it after the switch - // (or for Arm64, can't insert and overwrite the operand/source of operand of JCMP). + // (or for Arm64, it is consumed by JCMP), we can't do the copy in this block since we can't + // insert it after the switch (or for Arm64, can't insert and overwrite the operand/source + // of operand of JCMP). if ((sameToRegMask & consumedRegs) != RBM_NONE) { sameToReg = REG_NA;