Skip to content

Commit

Permalink
[release/7.0-preview1] Ensure that we aren't accidentally generating …
Browse files Browse the repository at this point in the history
…instructions for unsupported ISAs (#64224)

* Assert that the ISA of the set intrinsic ID is supported

* Ensure gtNewSimdCmpOpAllNode and gtNewSimdCmpOpAnyNode don't generate AVX2 instructions when not supported

* Ensure codegen for Vector128.Dot when SSSE3 is disabled is correct

* Update src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Co-authored-by: Jan Kotas <jkotas@microsoft.com>

* Ensure Vector256.Sum has a check for AVX2

Co-authored-by: Tanner Gooding <tagoo@outlook.com>
Co-authored-by: Jan Kotas <jkotas@microsoft.com>
  • Loading branch information
3 people authored Jan 25, 2022
1 parent 0a462a7 commit aa73132
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 30 deletions.
61 changes: 42 additions & 19 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19371,20 +19371,22 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op,

NamedIntrinsic intrinsic = NI_Illegal;

#if defined(TARGET_XARCH)
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
}
#endif // TARGET_XARCH

switch (op)
{
#if defined(TARGET_XARCH)
case GT_EQ:
{
intrinsic = (simdSize == 32) ? NI_Vector256_op_Equality : NI_Vector128_op_Equality;
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));

intrinsic = NI_Vector256_op_Equality;
}
else
{
intrinsic = NI_Vector128_op_Equality;
}
break;
}

Expand All @@ -19400,6 +19402,12 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op,

if (simdSize == 32)
{
// TODO-XArch-CQ: It's a non-trivial amount of work to support these
// for floating-point while only utilizing AVX. It would require, among
// other things, inverting the comparison and potentially support for a
// new Avx.TestNotZ intrinsic to ensure the codegen remains efficient.
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));

intrinsic = NI_Vector256_op_Equality;
getAllBitsSet = NI_Vector256_get_AllBitsSet;
}
Expand Down Expand Up @@ -19510,14 +19518,6 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op,

NamedIntrinsic intrinsic = NI_Illegal;

#if defined(TARGET_XARCH)
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
}
#endif // TARGET_XARCH

switch (op)
{
#if defined(TARGET_XARCH)
Expand All @@ -19530,7 +19530,20 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op,
// We want to generate a comparison along the lines of
// GT_XX(op1, op2).As<T, TInteger>() != Vector128<TInteger>.Zero

intrinsic = (simdSize == 32) ? NI_Vector256_op_Inequality : NI_Vector128_op_Inequality;
if (simdSize == 32)
{
// TODO-XArch-CQ: It's a non-trivial amount of work to support these
// for floating-point while only utilizing AVX. It would require, among
// other things, inverting the comparison and potentially support for a
// new Avx.TestNotZ intrinsic to ensure the codegen remains efficient.
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));

intrinsic = NI_Vector256_op_Inequality;
}
else
{
intrinsic = NI_Vector128_op_Inequality;
}

op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);
Expand All @@ -19552,7 +19565,17 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op,

case GT_NE:
{
intrinsic = (simdSize == 32) ? NI_Vector256_op_Inequality : NI_Vector128_op_Inequality;
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));

intrinsic = NI_Vector256_op_Inequality;
}
else
{
intrinsic = NI_Vector128_op_Inequality;
}
break;
}
#elif defined(TARGET_ARM64)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
const HWIntrinsic intrin(node);

// We need to validate that other phases of the compiler haven't introduced unsupported intrinsics
assert(compiler->compIsaSupportedDebugOnly(HWIntrinsicInfo::lookupIsa(intrin.id)));

regNumber targetReg = node->GetRegNum();

regNumber op1Reg = REG_NA;
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId);
size_t numArgs = node->GetOperandCount();

// We need to validate that other phases of the compiler haven't introduced unsupported intrinsics
assert(compiler->compIsaSupportedDebugOnly(isa));

int ival = HWIntrinsicInfo::lookupIval(intrinsicId, compiler->compOpportunisticallyDependsOn(InstructionSet_AVX));

assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId));
Expand Down
25 changes: 15 additions & 10 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand Down Expand Up @@ -1287,7 +1287,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand All @@ -1305,7 +1305,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand Down Expand Up @@ -1339,7 +1339,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand All @@ -1357,7 +1357,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand Down Expand Up @@ -1391,7 +1391,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand All @@ -1409,7 +1409,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand Down Expand Up @@ -1443,7 +1443,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand All @@ -1461,7 +1461,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
{
var_types simdType = getSIMDTypeForSize(simdSize);

Expand Down Expand Up @@ -2024,7 +2024,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
assert(sig->numArgs == 1);
var_types simdType = getSIMDTypeForSize(simdSize);

if (varTypeIsFloating(simdBaseType))
if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
// Vector256 for integer types requires AVX2
break;
}
else if (varTypeIsFloating(simdBaseType))
{
if (!compOpportunisticallyDependsOn(InstructionSet_SSE3))
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3429,7 +3429,7 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
// e6, e7, e4, e5 | e2, e3, e0, e1
// e7, e6, e5, e4 | e3, e2, e1, e0

shuffleConst = 0x4D;
shuffleConst = 0x4E;
break;
}

Expand Down

0 comments on commit aa73132

Please sign in to comment.