Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize VectorX<T>.ConditionalSelect for constant masks #104092

Merged
merged 10 commits into from
Jul 4, 2024
Merged
51 changes: 51 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29442,6 +29442,57 @@ bool GenTree::IsInvariant() const
return OperIsConst() || OperIs(GT_LCL_ADDR) || OperIs(GT_FTN_ADDR);
}

//-------------------------------------------------------------------
// IsVectorPerElementMask: returns true if this node is a vector constant per-element mask
// (every element has either all bits set or none of them).
//
// Arguments:
// simdBaseType - the base type of the constant being checked.
// simdSize - the size of the SIMD type of the intrinsic.
//
// Returns:
// True if this node is a vector constant per-element mask.
//
bool GenTree::IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const
ezhevita marked this conversation as resolved.
Show resolved Hide resolved
{
#ifdef FEATURE_SIMD
if (IsCnsVec())
{
const GenTreeVecCon* vecCon = AsVecCon();

int elementCount = vecCon->ElementCount(simdSize, simdBaseType);

switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u8[0], elementCount);
case TYP_SHORT:
case TYP_USHORT:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u16[0], elementCount);
case TYP_INT:
case TYP_UINT:
case TYP_FLOAT:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u32[0], elementCount);
case TYP_LONG:
case TYP_ULONG:
case TYP_DOUBLE:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u64[0], elementCount);
default:
unreached();
}
}
else if (OperIsHWIntrinsic())
{
// TODO-XARCH-AVX512 Use VPBLENDM* and take input directly from K registers if cond is from
// MoveMaskToVectorSpecial.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment isn't applicable to the general query, it was specific to the CndSel lowering logic

return HWIntrinsicInfo::ReturnsPerElementMask(AsHWIntrinsic()->GetHWIntrinsicId());
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
}
#endif // FEATURE_SIMD

return false;
}

//------------------------------------------------------------------------
// IsNeverNegative: returns true if the given tree is known to be never
// negative, i. e. the upper bit will always be zero.
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2317,6 +2317,7 @@ struct GenTree
bool Precedes(GenTree* other);

bool IsInvariant() const;
bool IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const;

bool IsNeverNegative(Compiler* comp) const;
bool IsNeverNegativeOne(Compiler* comp) const;
Expand Down
9 changes: 4 additions & 5 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2994,13 +2994,12 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
GenTree* op3 = node->Op(3);

// If the condition vector comes from a hardware intrinsic that
// returns a per-element mask (marked with HW_Flag_ReturnsPerElementMask),
// we can optimize the entire conditional select to
// a single BlendVariable instruction (if supported by the architecture)
// returns a per-element mask, we can optimize the entire
// conditional select to a single BlendVariable instruction
// (if supported by the architecture)

// TODO-XARCH-AVX512 Use VPBLENDM* and take input directly from K registers if cond is from MoveMaskToVectorSpecial.
// First, determine if the condition is a per-element mask
if (op1->OperIsHWIntrinsic() && HWIntrinsicInfo::ReturnsPerElementMask(op1->AsHWIntrinsic()->GetHWIntrinsicId()))
if (op1->IsVectorPerElementMask(simdBaseType, simdSize))
{
// Next, determine if the target architecture supports BlendVariable
NamedIntrinsic blendVariableId = NI_Illegal;
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ static bool ElementsAreSame(T* array, size_t size)
return true;
}

template <typename T>
static bool ElementsAreAllBitsSetOrZero(T* array, size_t size)
{
for (size_t i = 0; i < size; i++)
{
if (array[i] != static_cast<T>(0) && array[i] != static_cast<T>(~0))
return false;
}
return true;
}

struct simd8_t
{
union
Expand Down
Loading