Skip to content

Commit

Permalink
Updating CreateScalar to be intrinsic for Vector64/128/256 (#77798)
Browse files Browse the repository at this point in the history
* Updating CreateScalar to be intrinsic for Vector64/128/256

* Applying formatting patch

* Fixing CreateScalar VecCon nodes created on import and find use before insert

* Applying formatting patch

* Ensure we zero extend TYP_BYTE and TYP_SHORT

* Ensure TYP_UBYTE and TYP_USHORT are also explicitly zero extended

* Fix the cast and add a comment explaining "why"

* Applying formatting patch
  • Loading branch information
tannergooding authored Nov 11, 2022
1 parent 4820105 commit 00e6482
Show file tree
Hide file tree
Showing 11 changed files with 407 additions and 294 deletions.
37 changes: 30 additions & 7 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17362,32 +17362,48 @@ bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
// true if node represents a constant; otherwise, false
bool GenTreeVecCon::IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val)
{
var_types simdType = node->TypeGet();
var_types simdBaseType = node->GetSimdBaseType();
unsigned simdSize = node->GetSimdSize();
NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
var_types simdType = node->TypeGet();
var_types simdBaseType = node->GetSimdBaseType();
unsigned simdSize = node->GetSimdSize();

size_t argCnt = node->GetOperandCount();
size_t cnsArgCnt = 0;

switch (node->GetHWIntrinsicId())
switch (intrinsic)
{
case NI_Vector128_Create:
case NI_Vector128_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
#if defined(TARGET_XARCH)
case NI_Vector256_Create:
case NI_Vector256_CreateScalar:
case NI_Vector256_CreateScalarUnsafe:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
case NI_Vector64_CreateScalar:
case NI_Vector64_CreateScalarUnsafe:
#endif
{
// Zero out the simd32Val
simd32Val = {};

// These intrinsics are meant to set the same value to every element.
if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, simd32Val, simdBaseType))
{
// Now assign the rest of the arguments.
for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
// CreateScalar leaves the upper bits as zero

#if defined(TARGET_XARCH)
if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar))
#elif defined(TARGET_ARM64)
if ((intrinsic != NI_Vector64_CreateScalar) && (intrinsic != NI_Vector128_CreateScalar))
#endif
{
HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
// Now assign the rest of the arguments.
for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
{
HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
}
}

cnsArgCnt = 1;
Expand Down Expand Up @@ -18974,6 +18990,13 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}

case NI_Vector128_get_Zero:
case NI_Vector256_get_Zero:
{
// These HWIntrinsic operations are contained as part of Sse41.Insert
return true;
}

default:
{
return false;
Expand Down
47 changes: 40 additions & 7 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Vector64_CreateScalar:
case NI_Vector64_CreateScalarUnsafe:
{
if (genTypeSize(simdBaseType) == 8)
Expand All @@ -556,12 +557,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector64_Create:
case NI_Vector128_Create:
case NI_Vector128_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
{
uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
assert((sig->numArgs == 1) || (sig->numArgs == simdLength));

bool isConstant = true;
bool isConstant = true;
bool isCreateScalar = (intrinsic == NI_Vector64_CreateScalar) || (intrinsic == NI_Vector128_CreateScalar);

if (varTypeIsFloating(simdBaseType))
{
Expand Down Expand Up @@ -620,7 +623,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.u8[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < simdLength - 1; index++)
{
Expand All @@ -641,7 +649,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.u16[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand All @@ -662,7 +675,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.u32[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand All @@ -683,7 +701,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.u64[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand All @@ -703,7 +726,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.f32[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand All @@ -723,7 +751,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.f64[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,13 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins,

regNumber op1Reg = op1->GetRegNum();

if ((ins == INS_insertps) && (op1Reg == REG_NA))
{
// insertps is special and can contain op1 when it is zero
assert(op1->isContained() && op1->IsVectorZero());
op1Reg = targetReg;
}

assert(targetReg != REG_NA);
assert(op1Reg != REG_NA);

Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ HARDWARE_INTRINSIC(Vector64, ConvertToInt64,
HARDWARE_INTRINSIC(Vector64, ConvertToSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, ConvertToUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, CreateScalar, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(Vector64, Divide, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, Dot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
Expand Down Expand Up @@ -143,6 +144,7 @@ HARDWARE_INTRINSIC(Vector128, ConvertToSingle,
HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
Expand Down
Loading

0 comments on commit 00e6482

Please sign in to comment.