From 3a7abbd1bb5df6b46cab00d63e49626aaa717278 Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Sat, 19 Jun 2021 16:30:32 +0300 Subject: [PATCH 1/4] Move late arithmetic to its own section in gtlist --- src/coreclr/jit/gtlist.h | 44 ++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 127cf9b8811df..765124e12147a 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -126,32 +126,12 @@ GTNODE(UMOD , GenTreeOp ,0,GTK_BINOP) GTNODE(OR , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP)) GTNODE(XOR , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP)) GTNODE(AND , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP)) -GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP) GTNODE(LSH , GenTreeOp ,0,GTK_BINOP) GTNODE(RSH , GenTreeOp ,0,GTK_BINOP) GTNODE(RSZ , GenTreeOp ,0,GTK_BINOP) GTNODE(ROL , GenTreeOp ,0,GTK_BINOP) GTNODE(ROR , GenTreeOp ,0,GTK_BINOP) -GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod) - -// Returns high bits (top N bits of the 2N bit result of an NxN multiply) -// GT_MULHI is used in division by a constant (LowerUnsignedDivOrMod). We turn -// the div into a MULHI + some adjustments. In codegen, we only use the -// results of the high register, and we drop the low results. -GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP) - -// A mul that returns the 2N bit result of an NxN multiply. This op is used for -// multiplies that take two ints and return a long result. For 32 bit targets, -// all other multiplies with long results are morphed into helper calls. -// It is similar to GT_MULHI, the difference being that GT_MULHI drops the lo -// part of the result, whereas GT_MUL_LONG keeps both parts of the result. -// MUL_LONG is also used on ARM64, where 64 bit multiplication is more expensive. -#if !defined(TARGET_64BIT) -GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP) -#elif defined(TARGET_ARM64) -GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP) -#endif GTNODE(ASG , GenTreeOp ,0,(GTK_BINOP|GTK_NOTLIR)) GTNODE(EQ , GenTreeOp ,0,(GTK_BINOP|GTK_RELOP)) @@ -220,6 +200,30 @@ GTNODE(SIMD , GenTreeSIMD ,0,(GTK_BINOP|GTK_EXOP)) // SIM GTNODE(HWINTRINSIC , GenTreeHWIntrinsic ,0,(GTK_BINOP|GTK_EXOP)) // hardware intrinsics #endif // FEATURE_HW_INTRINSICS +//----------------------------------------------------------------------------- +// Backend-specific arithmetic nodes: +//----------------------------------------------------------------------------- + +GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod) + +// Returns high bits (top N bits of the 2N bit result of an NxN multiply) +// GT_MULHI is used in division by a constant (LowerUnsignedDivOrMod). We turn +// the div into a MULHI + some adjustments. In codegen, we only use the +// results of the high register, and we drop the low results. +GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP) + +// A mul that returns the 2N bit result of an NxN multiply. This op is used for +// multiplies that take two ints and return a long result. For 32 bit targets, +// all other multiplies with long results are morphed into helper calls. +// It is similar to GT_MULHI, the difference being that GT_MULHI drops the lo +// part of the result, whereas GT_MUL_LONG keeps both parts of the result. +// MUL_LONG is also used on ARM64, where 64 bit multiplication is more expensive. +#if !defined(TARGET_64BIT) +GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP) +#elif defined(TARGET_ARM64) +GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP) +#endif +GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP) // AndNot - emitted on ARM/ARM64 as the BIC instruction. //----------------------------------------------------------------------------- // LIR specific compare and conditional branch/set nodes: //----------------------------------------------------------------------------- From bc94fa58adf3e26da0d1df37ee5684072e0ae7c1 Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Fri, 1 Oct 2021 23:36:48 +0300 Subject: [PATCH 2/4] Implement AND_NOT for AAarch --- src/coreclr/jit/codegenarm.cpp | 7 ++-- src/coreclr/jit/codegenarm64.cpp | 11 +++++-- src/coreclr/jit/codegenarmarch.cpp | 1 + src/coreclr/jit/emitarm.cpp | 2 +- src/coreclr/jit/lower.cpp | 51 ++++++++++++++++++++++++++++-- src/coreclr/jit/lower.h | 2 ++ src/coreclr/jit/lowerarmarch.cpp | 19 +++++++++++ src/coreclr/jit/lowerxarch.cpp | 19 +++++++++++ src/coreclr/jit/lsraarm.cpp | 1 + src/coreclr/jit/lsraarm64.cpp | 1 + 10 files changed, 106 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 0ae592f0bdbc2..2e5c7cdd33ee3 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -306,8 +306,8 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) var_types targetType = treeNode->TypeGet(); emitter* emit = GetEmitter(); - assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_ADD_LO || oper == GT_ADD_HI || - oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_OR || oper == GT_XOR || oper == GT_AND); + assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_ADD_LO, GT_ADD_HI, GT_SUB_LO, GT_SUB_HI, GT_OR, GT_XOR, GT_AND, + GT_AND_NOT)); GenTree* op1 = treeNode->gtGetOp1(); GenTree* op2 = treeNode->gtGetOp2(); @@ -664,6 +664,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) case GT_AND: ins = INS_AND; break; + case GT_AND_NOT: + ins = INS_bic; + break; case GT_MUL: ins = INS_MUL; break; diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 0d4d9e9da2c20..d76ef66bf5b6f 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1813,7 +1813,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) genProduceReg(treeNode); } -// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR +// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, AND_NOT, OR and XOR // This method is expected to have called genConsumeOperands() before calling it. void CodeGen::genCodeForBinary(GenTreeOp* treeNode) { @@ -1822,8 +1822,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) var_types targetType = treeNode->TypeGet(); emitter* emit = GetEmitter(); - assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND || - oper == GT_OR || oper == GT_XOR); + assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR)); GenTree* op1 = treeNode->gtGetOp1(); GenTree* op2 = treeNode->gtGetOp2(); @@ -1842,6 +1841,9 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) case GT_AND: ins = INS_ands; break; + case GT_AND_NOT: + ins = INS_bics; + break; default: noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set"); } @@ -3115,6 +3117,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) case GT_AND: ins = INS_and; break; + case GT_AND_NOT: + ins = INS_bic; + break; case GT_DIV: ins = INS_sdiv; break; diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 8b9cf4b6a5f6d..559ff575150f9 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -213,6 +213,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_OR: case GT_XOR: case GT_AND: + case GT_AND_NOT: assert(varTypeIsIntegralOrI(treeNode)); FALLTHROUGH; diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index 30c6f7e219ae9..ee377744eb188 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -8095,7 +8095,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, if (dst->gtSetFlags()) { assert((ins == INS_add) || (ins == INS_adc) || (ins == INS_sub) || (ins == INS_sbc) || (ins == INS_and) || - (ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn)); + (ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn) || (ins == INS_bic)); flags = INS_FLAGS_SET; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a98c7b130f9e7..fadff6d2c778e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -139,8 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_AND: case GT_OR: case GT_XOR: - ContainCheckBinary(node->AsOp()); - break; + return LowerBinaryArithmeticCommon(node->AsOp()); case GT_MUL: case GT_MULHI: @@ -5098,6 +5097,54 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) return nullptr; } +//------------------------------------------------------------------------ +// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node. +// +// Recognizes opportunities for using target-independent "combined" nodes +// (currently AND_NOT on ARMArch). Calls the target-specific "LowerBinaryArithmetic" +// method, which checks for more nodes and containment. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* node) +{ + // TODO-CQ-XArch: support BMI2 "andn" in codegen and condition + // this logic on the support for the instruction set on XArch. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef TARGET_ARMARCH + if (comp->opts.OptimizationEnabled() && node->OperIs(GT_AND)) + { + GenTree* opNode = nullptr; + GenTree* notNode = nullptr; + if (node->gtGetOp1()->OperIs(GT_NOT)) + { + notNode = node->gtGetOp1(); + opNode = node->gtGetOp2(); + } + else if (node->gtGetOp2()->OperIs(GT_NOT)) + { + notNode = node->gtGetOp2(); + opNode = node->gtGetOp1(); + } + + if (notNode != nullptr) + { + node->gtOp1 = opNode; + node->gtOp2 = notNode->AsUnOp()->gtGetOp1(); + node->ChangeOper(GT_AND_NOT); + BlockRange().Remove(notNode); + } + } +#endif // TARGET_ARMARCH + + return LowerBinaryArithmetic(node); +} + //------------------------------------------------------------------------ // LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node. // diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 65e936b8b2167..8a9362da25971 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -297,6 +297,8 @@ class Lowering final : public Phase void LowerStoreIndir(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); + GenTree* LowerBinaryArithmeticCommon(GenTreeOp* node); + GenTree* LowerBinaryArithmetic(GenTreeOp* node); bool LowerUnsignedDivOrMod(GenTreeOp* divMod); GenTree* LowerConstIntDivOrMod(GenTree* node); GenTree* LowerSignedDivOrMod(GenTree* node); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index c1691ea6b508a..985cae1898fea 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -277,6 +277,25 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Currently only performs containment checks. +// +// TODO-CQ-ARMArch: take advantage of "madd" and "msub" here. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node) +{ + ContainCheckBinary(node); + + return node->gtNext; +} + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 9f4909aa24049..9521c4f881224 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -190,6 +190,25 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } +// LowerBinaryArithmetic: lowers the given binary arithmetic node. +// +// Currently only performs containment checks. +// +// TODO-CQ-XArch: take advantage of the BMI instructions here. +// +// Arguments: +// node - the arithmetic node to lower +// +// Returns: +// The next node to lower. +// +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node) +{ + ContainCheckBinary(node); + + return node->gtNext; +} + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index ad345d9acdcfc..95429df030d08 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -368,6 +368,7 @@ int LinearScan::BuildNode(GenTree* tree) FALLTHROUGH; case GT_AND: + case GT_AND_NOT: case GT_OR: case GT_XOR: case GT_LSH: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 3bb42a1269d57..99eedbe0a7ec6 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -269,6 +269,7 @@ int LinearScan::BuildNode(GenTree* tree) FALLTHROUGH; case GT_AND: + case GT_AND_NOT: case GT_OR: case GT_XOR: case GT_LSH: From 7c580a4c2d04a6df8c4a40ccaebdafdc9d76cc75 Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Mon, 25 Oct 2021 19:29:07 +0300 Subject: [PATCH 3/4] Delete the unnecessary platfrom-specific methods They were a leftover from some previous work. --- src/coreclr/jit/lower.cpp | 13 +++++++------ src/coreclr/jit/lower.h | 1 - src/coreclr/jit/lowerarmarch.cpp | 19 ------------------- src/coreclr/jit/lowerxarch.cpp | 19 ------------------- 4 files changed, 7 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index fadff6d2c778e..7c5ef9d309390 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -139,7 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_AND: case GT_OR: case GT_XOR: - return LowerBinaryArithmeticCommon(node->AsOp()); + return LowerBinaryArithmetic(node->AsOp()); case GT_MUL: case GT_MULHI: @@ -5098,11 +5098,10 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) } //------------------------------------------------------------------------ -// LowerBinaryArithmeticCommon: lowers the given binary arithmetic node. +// LowerBinaryArithmetic: lowers the given binary arithmetic node. // // Recognizes opportunities for using target-independent "combined" nodes -// (currently AND_NOT on ARMArch). Calls the target-specific "LowerBinaryArithmetic" -// method, which checks for more nodes and containment. +// (currently AND_NOT on ARMArch). Performs containment checks. // // Arguments: // node - the arithmetic node to lower @@ -5110,7 +5109,7 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) // Returns: // The next node to lower. // -GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* node) +GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node) { // TODO-CQ-XArch: support BMI2 "andn" in codegen and condition // this logic on the support for the instruction set on XArch. @@ -5142,7 +5141,9 @@ GenTree* Lowering::LowerBinaryArithmeticCommon(GenTreeOp* node) } #endif // TARGET_ARMARCH - return LowerBinaryArithmetic(node); + ContainCheckBinary(node); + + return node->gtNext; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 8a9362da25971..9b42bbaad3766 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -297,7 +297,6 @@ class Lowering final : public Phase void LowerStoreIndir(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); - GenTree* LowerBinaryArithmeticCommon(GenTreeOp* node); GenTree* LowerBinaryArithmetic(GenTreeOp* node); bool LowerUnsignedDivOrMod(GenTreeOp* divMod); GenTree* LowerConstIntDivOrMod(GenTree* node); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 985cae1898fea..c1691ea6b508a 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -277,25 +277,6 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } -// LowerBinaryArithmetic: lowers the given binary arithmetic node. -// -// Currently only performs containment checks. -// -// TODO-CQ-ARMArch: take advantage of "madd" and "msub" here. -// -// Arguments: -// node - the arithmetic node to lower -// -// Returns: -// The next node to lower. -// -GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node) -{ - ContainCheckBinary(node); - - return node->gtNext; -} - //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 9521c4f881224..9f4909aa24049 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -190,25 +190,6 @@ GenTree* Lowering::LowerMul(GenTreeOp* mul) return mul->gtNext; } -// LowerBinaryArithmetic: lowers the given binary arithmetic node. -// -// Currently only performs containment checks. -// -// TODO-CQ-XArch: take advantage of the BMI instructions here. -// -// Arguments: -// node - the arithmetic node to lower -// -// Returns: -// The next node to lower. -// -GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node) -{ - ContainCheckBinary(node); - - return node->gtNext; -} - //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // From 0570c9d17f068a49b196a0ab2b75988a21fd75f6 Mon Sep 17 00:00:00 2001 From: SingleAccretion Date: Mon, 25 Oct 2021 19:31:35 +0300 Subject: [PATCH 4/4] Mention the SIMD origins of AND_NOT --- src/coreclr/jit/gtlist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 765124e12147a..f4c48e411f768 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -223,7 +223,8 @@ GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP) #elif defined(TARGET_ARM64) GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP) #endif -GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP) // AndNot - emitted on ARM/ARM64 as the BIC instruction. +// AndNot - emitted on ARM/ARM64 as the BIC instruction. Also used for creating AndNot HWINTRINSIC vector nodes in a cross-ISA manner. +GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP) //----------------------------------------------------------------------------- // LIR specific compare and conditional branch/set nodes: //-----------------------------------------------------------------------------