Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement GT_AND_NOT for ARM/ARM64 #59881

Merged
merged 4 commits into from
Nov 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,8 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
var_types targetType = treeNode->TypeGet();
emitter* emit = GetEmitter();

assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_ADD_LO || oper == GT_ADD_HI ||
oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_OR || oper == GT_XOR || oper == GT_AND);
assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_ADD_LO, GT_ADD_HI, GT_SUB_LO, GT_SUB_HI, GT_OR, GT_XOR, GT_AND,
GT_AND_NOT));

GenTree* op1 = treeNode->gtGetOp1();
GenTree* op2 = treeNode->gtGetOp2();
Expand Down Expand Up @@ -664,6 +664,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_AND:
ins = INS_AND;
break;
case GT_AND_NOT:
ins = INS_bic;
break;
case GT_MUL:
ins = INS_MUL;
break;
Expand Down
11 changes: 8 additions & 3 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1813,7 +1813,7 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
genProduceReg(treeNode);
}

// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, AND_NOT, OR and XOR
// This method is expected to have called genConsumeOperands() before calling it.
void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
{
Expand All @@ -1822,8 +1822,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
var_types targetType = treeNode->TypeGet();
emitter* emit = GetEmitter();

assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND ||
oper == GT_OR || oper == GT_XOR);
assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));

GenTree* op1 = treeNode->gtGetOp1();
GenTree* op2 = treeNode->gtGetOp2();
Expand All @@ -1842,6 +1841,9 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
case GT_AND:
ins = INS_ands;
break;
case GT_AND_NOT:
ins = INS_bics;
break;
default:
noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set");
}
Expand Down Expand Up @@ -3115,6 +3117,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_AND:
ins = INS_and;
break;
case GT_AND_NOT:
ins = INS_bic;
break;
case GT_DIV:
ins = INS_sdiv;
break;
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
case GT_OR:
case GT_XOR:
case GT_AND:
case GT_AND_NOT:
assert(varTypeIsIntegralOrI(treeNode));

FALLTHROUGH;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8095,7 +8095,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
if (dst->gtSetFlags())
{
assert((ins == INS_add) || (ins == INS_adc) || (ins == INS_sub) || (ins == INS_sbc) || (ins == INS_and) ||
(ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn));
(ins == INS_orr) || (ins == INS_eor) || (ins == INS_orn) || (ins == INS_bic));
flags = INS_FLAGS_SET;
}

Expand Down
45 changes: 25 additions & 20 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,32 +126,12 @@ GTNODE(UMOD , GenTreeOp ,0,GTK_BINOP)
GTNODE(OR , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
GTNODE(XOR , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
GTNODE(AND , GenTreeOp ,1,(GTK_BINOP|GTK_LOGOP))
GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP)

GTNODE(LSH , GenTreeOp ,0,GTK_BINOP)
GTNODE(RSH , GenTreeOp ,0,GTK_BINOP)
GTNODE(RSZ , GenTreeOp ,0,GTK_BINOP)
GTNODE(ROL , GenTreeOp ,0,GTK_BINOP)
GTNODE(ROR , GenTreeOp ,0,GTK_BINOP)
GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod)

// Returns high bits (top N bits of the 2N bit result of an NxN multiply)
// GT_MULHI is used in division by a constant (LowerUnsignedDivOrMod). We turn
// the div into a MULHI + some adjustments. In codegen, we only use the
// results of the high register, and we drop the low results.
GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP)

// A mul that returns the 2N bit result of an NxN multiply. This op is used for
// multiplies that take two ints and return a long result. For 32 bit targets,
// all other multiplies with long results are morphed into helper calls.
// It is similar to GT_MULHI, the difference being that GT_MULHI drops the lo
// part of the result, whereas GT_MUL_LONG keeps both parts of the result.
// MUL_LONG is also used on ARM64, where 64 bit multiplication is more expensive.
#if !defined(TARGET_64BIT)
GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP)
#elif defined(TARGET_ARM64)
GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP)
#endif

GTNODE(ASG , GenTreeOp ,0,(GTK_BINOP|GTK_NOTLIR))
GTNODE(EQ , GenTreeOp ,0,(GTK_BINOP|GTK_RELOP))
Expand Down Expand Up @@ -220,6 +200,31 @@ GTNODE(SIMD , GenTreeSIMD ,0,(GTK_BINOP|GTK_EXOP)) // SIM
GTNODE(HWINTRINSIC , GenTreeHWIntrinsic ,0,(GTK_BINOP|GTK_EXOP)) // hardware intrinsics
#endif // FEATURE_HW_INTRINSICS

//-----------------------------------------------------------------------------
// Backend-specific arithmetic nodes:
//-----------------------------------------------------------------------------

GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod)

// Returns high bits (top N bits of the 2N bit result of an NxN multiply)
// GT_MULHI is used in division by a constant (LowerUnsignedDivOrMod). We turn
// the div into a MULHI + some adjustments. In codegen, we only use the
// results of the high register, and we drop the low results.
GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP)

// A mul that returns the 2N bit result of an NxN multiply. This op is used for
// multiplies that take two ints and return a long result. For 32 bit targets,
// all other multiplies with long results are morphed into helper calls.
// It is similar to GT_MULHI, the difference being that GT_MULHI drops the lo
// part of the result, whereas GT_MUL_LONG keeps both parts of the result.
// MUL_LONG is also used on ARM64, where 64 bit multiplication is more expensive.
#if !defined(TARGET_64BIT)
GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP)
#elif defined(TARGET_ARM64)
GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP)
#endif
// AndNot - emitted on ARM/ARM64 as the BIC instruction. Also used for creating AndNot HWINTRINSIC vector nodes in a cross-ISA manner.
GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP)
//-----------------------------------------------------------------------------
// LIR specific compare and conditional branch/set nodes:
//-----------------------------------------------------------------------------
Expand Down
52 changes: 50 additions & 2 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_AND:
case GT_OR:
case GT_XOR:
ContainCheckBinary(node->AsOp());
break;
return LowerBinaryArithmetic(node->AsOp());

case GT_MUL:
case GT_MULHI:
Expand Down Expand Up @@ -5098,6 +5097,55 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
return nullptr;
}

//------------------------------------------------------------------------
// LowerBinaryArithmetic: lowers the given binary arithmetic node.
//
// Recognizes opportunities for using target-independent "combined" nodes
// (currently AND_NOT on ARMArch). Performs containment checks.
//
// Arguments:
// node - the arithmetic node to lower
//
// Returns:
// The next node to lower.
//
GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* node)
{
// TODO-CQ-XArch: support BMI2 "andn" in codegen and condition
// this logic on the support for the instruction set on XArch.
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef TARGET_ARMARCH
if (comp->opts.OptimizationEnabled() && node->OperIs(GT_AND))
{
GenTree* opNode = nullptr;
GenTree* notNode = nullptr;
if (node->gtGetOp1()->OperIs(GT_NOT))
echesakov marked this conversation as resolved.
Show resolved Hide resolved
{
notNode = node->gtGetOp1();
opNode = node->gtGetOp2();
}
else if (node->gtGetOp2()->OperIs(GT_NOT))
{
notNode = node->gtGetOp2();
opNode = node->gtGetOp1();
}

if (notNode != nullptr)
{
node->gtOp1 = opNode;
node->gtOp2 = notNode->AsUnOp()->gtGetOp1();
node->ChangeOper(GT_AND_NOT);
BlockRange().Remove(notNode);
}
}
#endif // TARGET_ARMARCH

ContainCheckBinary(node);

return node->gtNext;
}

//------------------------------------------------------------------------
// LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
//
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ class Lowering final : public Phase
void LowerStoreIndir(GenTreeStoreInd* node);
GenTree* LowerAdd(GenTreeOp* node);
GenTree* LowerMul(GenTreeOp* mul);
GenTree* LowerBinaryArithmetic(GenTreeOp* node);
bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
GenTree* LowerConstIntDivOrMod(GenTree* node);
GenTree* LowerSignedDivOrMod(GenTree* node);
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lsraarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ int LinearScan::BuildNode(GenTree* tree)
FALLTHROUGH;

case GT_AND:
case GT_AND_NOT:
case GT_OR:
case GT_XOR:
case GT_LSH:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ int LinearScan::BuildNode(GenTree* tree)
FALLTHROUGH;

case GT_AND:
case GT_AND_NOT:
case GT_OR:
case GT_XOR:
case GT_LSH:
Expand Down