From 2100b03b94bffa6c7d47c6eefa82c4fce247189c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 14 Apr 2024 13:31:23 -0700 Subject: [PATCH 1/5] Add support for containing RotateLeft/RotateRight on Arm64 --- src/coreclr/jit/codegenarm64.cpp | 42 ++++++++++++++++++++ src/coreclr/jit/lowerarmarch.cpp | 68 ++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d4e8d58db741b..8695545cc934a 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2655,6 +2655,48 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree) genProduceReg(tree); return; } + else if (op2->OperIs(GT_ROR) && op2->isContained()) + { + assert(varTypeIsIntegral(tree)); + + GenTree* a = op1; + GenTree* b = op2->gtGetOp1(); + GenTree* c = op2->gtGetOp2(); + + // The rotate amount needs to be contained as well + assert(c->isContained() && c->IsCnsIntOrI()); + + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + insOpts opt = INS_OPTS_NONE; + + if ((tree->gtFlags & GTF_SET_FLAGS) != 0) + { + // A subset of operations can still set flags + + switch (oper) + { + case GT_AND: + { + ins = INS_ands; + break; + } + + default: + { + noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set"); + } + } + } + + assert(op2->OperIs(GT_ROR)); + opt = INS_OPTS_ROR; + + emit->emitIns_R_R_R_I(ins, emitActualTypeSize(tree), targetReg, a->GetRegNum(), b->GetRegNum(), + c->AsIntConCommon()->IconValue(), opt); + + genProduceReg(tree); + return; + } else if (op2->OperIs(GT_CAST) && op2->isContained()) { assert(varTypeIsIntegral(tree)); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 9731331885bec..ae256f2b41de6 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -295,6 +295,74 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN return false; } + if (childNode->OperIs(GT_ROL, GT_ROR)) + { + // Find "a op (b rotate cns)" + + if (childNode->gtGetOp1()->isContained()) + { + // Cannot contain if the childs op1 is already contained + return false; + } + + GenTree* rotateAmountNode = childNode->gtGetOp2(); + + if (!rotateAmountNode->IsCnsIntOrI()) + { + // Cannot contain if the childs op2 is not a constant + return false; + } + + const ssize_t wrapAmount = (static_cast(genTypeSize(parentNode)) * BITS_PER_BYTE); + assert((wrapAmount == 32) || (wrapAmount == 64)); + + // Rotation is circular, so normalize to [0, wrapAmount - 1] + ssize_t rotateAmount = rotateAmountNode->AsIntCon()->IconValue() % wrapAmount; + assert((rotateAmount >= 0) && (rotateAmount <= (wrapAmount - 1))); + + if (childNode->OperIs(GT_ROL)) + { + // The actual instructions only encode rotate right but + // since rotating left by 1 is equivalen to rotating + // right by (rotateAmount - 1), we can fix things here. + + childNode->SetOper(GT_ROR); + rotateAmount = wrapAmount - rotateAmount; + } + + rotateAmountNode->AsIntCon()->SetIconValue(rotateAmount); + assert(childNode->OperIs(GT_ROR)); + + if (parentNode->OperIs(GT_AND)) + { + // These operations can still report flags + + if (IsInvariantInRange(childNode, parentNode)) + { + assert(shiftAmountNode->isContained()); + return true; + } + } + + if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) + { + // Cannot contain if the parent operation needs to set flags + return false; + } + + if (parentNode->OperIs(GT_OR, GT_XOR)) + { + if (IsInvariantInRange(childNode, parentNode)) + { + assert(rotateAmountNode->isContained()); + return true; + } + } + + // TODO: Handle BIC/BICS, EON, MVN, ORN, TST + return false; + } + if (childNode->OperIs(GT_NEG)) { // If we have a contained LSH, RSH or RSZ, we can still contain NEG if the parent is a EQ or NE. From 244b85ab65bc5233bf1417e6f79e406db4672ee1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 14 Apr 2024 13:33:13 -0700 Subject: [PATCH 2/5] Fix a variable name --- src/coreclr/jit/lowerarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index ae256f2b41de6..e01e63f37d53a 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -339,7 +339,7 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN if (IsInvariantInRange(childNode, parentNode)) { - assert(shiftAmountNode->isContained()); + assert(rotateAmountNode->isContained()); return true; } } From 322fed5c82f7877180c7ff0fb2d944fbab11a375 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 14 Apr 2024 13:50:49 -0700 Subject: [PATCH 3/5] Applying formatting patch --- src/coreclr/jit/lowerarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index e01e63f37d53a..cbdc886ee2802 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -313,7 +313,7 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN return false; } - const ssize_t wrapAmount = (static_cast(genTypeSize(parentNode)) * BITS_PER_BYTE); + const ssize_t wrapAmount = (static_cast(genTypeSize(parentNode)) * BITS_PER_BYTE); assert((wrapAmount == 32) || (wrapAmount == 64)); // Rotation is circular, so normalize to [0, wrapAmount - 1] From d281b848e1887f771d69a5a6fc656e34f00b968b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 14 Apr 2024 14:57:15 -0700 Subject: [PATCH 4/5] Ensure lsrabuild handles contained GT_ROR --- src/coreclr/jit/lsrabuild.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index d1825f4469661..3ee0f88ee2214 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3511,11 +3511,11 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) // ANDs may be contained in a chain. return BuildBinaryUses(node->AsOp(), candidates); } - if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ)) + if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ, GT_ROR)) { // NEG can be contained for mneg on arm64 // CAST and LSH for ADD with sign/zero extension - // LSH, RSH, and RSZ for various "shifted register" instructions on arm64 + // LSH, RSH, RSZ, and ROR for various "shifted register" instructions on arm64 return BuildOperandUses(node->gtGetOp1(), candidates); } #endif From 49d4625eac97ac8784bf8039f5cb81046c1877d0 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 14 Apr 2024 15:28:53 -0700 Subject: [PATCH 5/5] Ensure codegenlinear can consume contained GT_ROR --- src/coreclr/jit/codegenlinear.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 0b373a94fd3f8..145c074b8fc68 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1657,7 +1657,7 @@ void CodeGen::genConsumeRegs(GenTree* tree) } #endif // FEATURE_HW_INTRINSICS #endif // TARGET_XARCH - else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ, GT_BSWAP, GT_BSWAP16)) + else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ, GT_ROR, GT_BSWAP, GT_BSWAP16)) { genConsumeRegs(tree->gtGetOp1()); }