Skip to content

Commit

Permalink
Implement GT_AND_NOT for ARM/ARM64 (#59881)
Browse files Browse the repository at this point in the history
* Move late arithmetic to its own section in gtlist

* Implement AND_NOT for AAarch

* Delete the unnecessary platfrom-specific methods

They were a leftover from some previous work.

* Mention the SIMD origins of AND_NOT
  • Loading branch information
SingleAccretion authored Nov 6, 2021
1 parent b344bb5 commit 8273939
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 28 deletions.
7 changes: 5 additions & 2 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,8 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
var_types targetType = treeNode->TypeGet();
emitter* emit = GetEmitter();

assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_ADD_LO || oper == GT_ADD_HI ||
oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_OR || oper == GT_XOR || oper == GT_AND);
assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_ADD_LO, GT_ADD_HI, GT_SUB_LO, GT_SUB_HI, GT_OR, GT_XOR, GT_AND,
GT_AND_NOT));

GenTree* op1 = treeNode->gtGetOp1();
GenTree* op2 = treeNode->gtGetOp2();
Expand Down Expand Up @@ -671,6 +671,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_AND:
ins = INS_AND;
break;
case GT_AND_NOT:
ins = INS_bic;
break;
case GT_MUL:
ins = INS_MUL;
break;
Expand Down
11 changes: 8 additions & 3 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1817,7 +1817,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
genProduceReg(treeNode);
}

// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, AND_NOT, OR and XOR
// This method is expected to have called genConsumeOperands() before calling it.
void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
{
Expand All @@ -1826,8 +1826,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
var_types targetType = treeNode->TypeGet();
emitter* emit = GetEmitter();

assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND ||
oper == GT_OR || oper == GT_XOR);
assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));

GenTree* op1 = treeNode->gtGetOp1();
GenTree* op2 = treeNode->gtGetOp2();
Expand All @@ -1846,6 +1845,9 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
case GT_AND:
ins = INS_ands;
break;
case GT_AND_NOT:
ins = INS_bics;
break;
default:
noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set");
}
Expand Down Expand Up @@ -3119,6 +3121,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_AND:
ins = INS_and;
break;
case GT_AND_NOT:
ins = INS_bic;
break;
case GT_DIV:
ins = INS_sdiv;
break;
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
case GT_OR:
case GT_XOR:
case GT_AND:
case GT_AND_NOT:
assert(varTypeIsIntegralOrI(treeNode));

FALLTHROUGH;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8095,7 +8095,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
if (dst->gtSetFlags())
{
assert((ins == INS_add) || (ins == INS_adc) || (ins == INS_sub) || (ins == INS_sbc) || (ins == INS_and) ||
(ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn));
(ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn) || (ins == INS_bic));
flags = INS_FLAGS_SET;
}

Expand Down
45 changes: 25 additions & 20 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,32 +126,12 @@ GTNODE(UMOD , GenTreeOp ,0,GTK_BINOP)
GTNODE(OR , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
GTNODE(XOR , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
GTNODE(AND , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP)

GTNODE(LSH , GenTreeOp ,0,GTK_BINOP)
GTNODE(RSH , GenTreeOp ,0,GTK_BINOP)
GTNODE(RSZ , GenTreeOp ,0,GTK_BINOP)
GTNODE(ROL , GenTreeOp ,0,GTK_BINOP)
GTNODE(ROR , GenTreeOp ,0,GTK_BINOP)
GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod)

// Returns high bits (top N bits of the 2N bit result of an NxN multiply)
// GT_MULHI is used in division by a constant (LowerUnsignedDivOrMod). We turn
// the div into a MULHI + some adjustments. In codegen, we only use the
// results of the high register, and we drop the low results.
GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP)

// A mul that returns the 2N bit result of an NxN multiply. This op is used for
// multiplies that take two ints and return a long result. For 32 bit targets,
// all other multiplies with long results are morphed into helper calls.
// It is similar to GT_MULHI, the difference being that GT_MULHI drops the lo
// part of the result, whereas GT_MUL_LONG keeps both parts of the result.
// MUL_LONG is also used on ARM64, where 64 bit multiplication is more expensive.
#if !defined(TARGET_64BIT)
GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP)
#elif defined(TARGET_ARM64)
GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP)
#endif

GTNODE(ASG , GenTreeOp ,0,(GTK_BINOP|GTK_NOTLIR))
GTNODE(EQ , GenTreeOp ,0,(GTK_BINOP|GTK_RELOP))
Expand Down Expand Up @@ -220,6 +200,31 @@ GTNODE(SIMD , GenTreeSIMD ,0,(GTK_BINOP|GTK_EXOP)) // SIM
GTNODE(HWINTRINSIC , GenTreeHWIntrinsic ,0,(GTK_BINOP|GTK_EXOP)) // hardware intrinsics
#endif // FEATURE_HW_INTRINSICS

//-----------------------------------------------------------------------------
// Backend-specific arithmetic nodes:
//-----------------------------------------------------------------------------

GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod)

// Returns high bits (top N bits of the 2N bit result of an NxN multiply)
// GT_MULHI is used in division by a constant (LowerUnsignedDivOrMod). We turn
// the div into a MULHI + some adjustments. In codegen, we only use the
// results of the high register, and we drop the low results.
GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP)

// A mul that returns the 2N bit result of an NxN multiply. This op is used for
// multiplies that take two ints and return a long result. For 32 bit targets,
// all other multiplies with long results are morphed into helper calls.
// It is similar to GT_MULHI, the difference being that GT_MULHI drops the lo
// part of the result, whereas GT_MUL_LONG keeps both parts of the result.
// MUL_LONG is also used on ARM64, where 64 bit multiplication is more expensive.
#if !defined(TARGET_64BIT)
GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP)
#elif defined(TARGET_ARM64)
GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP)
#endif
// AndNot - emitted on ARM/ARM64 as the BIC instruction. Also used for creating AndNot HWINTRINSIC vector nodes in a cross-ISA manner.
GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP)
//-----------------------------------------------------------------------------
// LIR specific compare and conditional branch/set nodes:
//-----------------------------------------------------------------------------
Expand Down
52 changes: 50 additions & 2 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_AND:
case GT_OR:
case GT_XOR:
ContainCheckBinary(node->AsOp());
break;
return LowerBinaryArithmetic(node->AsOp());

case GT_MUL:
case GT_MULHI:
Expand Down Expand Up @@ -5104,6 +5103,55 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
return nullptr;
}

//------------------------------------------------------------------------
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
//
// Recognizes opportunities for using target-independent "combined" nodes
// (currently AND_NOT on ARMArch). Performs containment checks.
//
// Arguments:
// node - the arithmetic node to lower
//
// Returns:
// The next node to lower.
//
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node)
{
// TODO-CQ-XArch: support BMI2 "andn" in codegen and condition
// this logic on the support for the instruction set on XArch.
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef TARGET_ARMARCH
if (comp->opts.OptimizationEnabled() && node->OperIs(GT_AND))
{
GenTree* opNode = nullptr;
GenTree* notNode = nullptr;
if (node->gtGetOp1()->OperIs(GT_NOT))
{
notNode = node->gtGetOp1();
opNode = node->gtGetOp2();
}
else if (node->gtGetOp2()->OperIs(GT_NOT))
{
notNode = node->gtGetOp2();
opNode = node->gtGetOp1();
}

if (notNode != nullptr)
{
node->gtOp1 = opNode;
node->gtOp2 = notNode->AsUnOp()->gtGetOp1();
node->ChangeOper(GT_AND_NOT);
BlockRange().Remove(notNode);
}
}
#endif // TARGET_ARMARCH

ContainCheckBinary(node);

return node->gtNext;
}

//------------------------------------------------------------------------
// LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
//
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ class Lowering final : public Phase
void LowerStoreIndir(GenTreeStoreInd* node);
GenTree* LowerAdd(GenTreeOp* node);
GenTree* LowerMul(GenTreeOp* mul);
GenTree* LowerBinaryArithmetic(GenTreeOp* node);
bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
GenTree* LowerConstIntDivOrMod(GenTree* node);
GenTree* LowerSignedDivOrMod(GenTree* node);
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lsraarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ int LinearScan::BuildNode(GenTree* tree)
FALLTHROUGH;

case GT_AND:
case GT_AND_NOT:
case GT_OR:
case GT_XOR:
case GT_LSH:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ int LinearScan::BuildNode(GenTree* tree)
FALLTHROUGH;

case GT_AND:
case GT_AND_NOT:
case GT_OR:
case GT_XOR:
case GT_LSH:
Expand Down

0 comments on commit 8273939

Please sign in to comment.