Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating CreateScalar to be intrinsic for Vector64/128/256 #77798

Merged
merged 12 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17321,32 +17321,48 @@ bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
// true if node represents a constant; otherwise, false
bool GenTreeVecCon::IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val)
{
var_types simdType = node->TypeGet();
var_types simdBaseType = node->GetSimdBaseType();
unsigned simdSize = node->GetSimdSize();
NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
var_types simdType = node->TypeGet();
var_types simdBaseType = node->GetSimdBaseType();
unsigned simdSize = node->GetSimdSize();

size_t argCnt = node->GetOperandCount();
size_t cnsArgCnt = 0;

switch (node->GetHWIntrinsicId())
switch (intrinsic)
{
case NI_Vector128_Create:
case NI_Vector128_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
#if defined(TARGET_XARCH)
case NI_Vector256_Create:
case NI_Vector256_CreateScalar:
case NI_Vector256_CreateScalarUnsafe:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
case NI_Vector64_CreateScalar:
case NI_Vector64_CreateScalarUnsafe:
#endif
{
// Zero out the simd32Val
simd32Val = {};
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

// These intrinsics are meant to set the same value to every element.
if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, simd32Val, simdBaseType))
{
// Now assign the rest of the arguments.
for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
// CreateScalar leaves the upper bits as zero

#if defined(TARGET_XARCH)
if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar))
#elif defined(TARGET_ARM64)
if ((intrinsic != NI_Vector64_CreateScalar) && (intrinsic != NI_Vector128_CreateScalar))
#endif
{
HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
// Now assign the rest of the arguments.
for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
{
HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
}
}

cnsArgCnt = 1;
Expand Down Expand Up @@ -18933,6 +18949,13 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}

case NI_Vector128_get_Zero:
case NI_Vector256_get_Zero:
{
// These HWIntrinsic operations are contained as part of Sse41.Insert
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
return true;
}

default:
{
return false;
Expand Down
47 changes: 40 additions & 7 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Vector64_CreateScalar:
case NI_Vector64_CreateScalarUnsafe:
{
if (genTypeSize(simdBaseType) == 8)
Expand All @@ -556,12 +557,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector64_Create:
case NI_Vector128_Create:
case NI_Vector128_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
{
uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
assert((sig->numArgs == 1) || (sig->numArgs == simdLength));

bool isConstant = true;
bool isConstant = true;
bool isCreateScalar = (intrinsic == NI_Vector64_CreateScalar) || (intrinsic == NI_Vector128_CreateScalar);

if (varTypeIsFloating(simdBaseType))
{
Expand Down Expand Up @@ -620,7 +623,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.u8[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < simdLength - 1; index++)
{
Expand All @@ -641,7 +649,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.u16[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand All @@ -662,7 +675,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.u32[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand All @@ -683,7 +701,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.u64[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand All @@ -703,7 +726,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.f32[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand All @@ -723,7 +751,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal;
}

if (sig->numArgs == 1)
if (isCreateScalar)
{
vecCon->gtSimd32Val = {};
vecCon->gtSimd32Val.f64[0] = cnsVal;
}
else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,13 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins,

regNumber op1Reg = op1->GetRegNum();

if ((ins == INS_insertps) && (op1Reg == REG_NA))
{
// insertps is special and can contain op1 when it is zero
assert(op1->isContained() && op1->IsVectorZero());
op1Reg = targetReg;
}

assert(targetReg != REG_NA);
assert(op1Reg != REG_NA);

Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ HARDWARE_INTRINSIC(Vector64, ConvertToInt64,
HARDWARE_INTRINSIC(Vector64, ConvertToSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, ConvertToUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, CreateScalar, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(Vector64, Divide, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, Dot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
Expand Down Expand Up @@ -143,6 +144,7 @@ HARDWARE_INTRINSIC(Vector128, ConvertToSingle,
HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
Expand Down
Loading