diff --git a/src/coreclr/src/jit/codegen.h b/src/coreclr/src/jit/codegen.h index 4262e561fe78..2d6167fb8916 100644 --- a/src/coreclr/src/jit/codegen.h +++ b/src/coreclr/src/jit/codegen.h @@ -978,7 +978,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genSIMDIntrinsicInit(GenTreeSIMD* simdNode); void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode); void genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode); - void genSIMDIntrinsicUnOpWithImm(GenTreeSIMD* simdNode); void genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode); void genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode); void genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode); diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 2392eb88a22d..79fd8fb6b6b4 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3821,15 +3821,11 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicInitN(simdNode); break; - case SIMDIntrinsicSqrt: - case SIMDIntrinsicAbs: case SIMDIntrinsicCast: case SIMDIntrinsicConvertToSingle: case SIMDIntrinsicConvertToInt32: case SIMDIntrinsicConvertToDouble: case SIMDIntrinsicConvertToInt64: - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: genSIMDIntrinsicUnOp(simdNode); break; @@ -3847,24 +3843,11 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThanOrEqual: genSIMDIntrinsicBinOp(simdNode); break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - genSIMDIntrinsicRelOp(simdNode); - break; - case SIMDIntrinsicDotProduct: genSIMDIntrinsicDotProduct(simdNode); break; @@ -3888,10 +3871,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicUpperRestore(simdNode); break; - case SIMDIntrinsicSelect: - NYI("SIMDIntrinsicSelect lowered during import to (a & sel) | (b & ~sel)"); - break; - default: noway_assert(!"Unimplemented SIMD intrinsic."); unreached(); @@ -3949,24 +3928,15 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type { switch (intrinsicId) { - case SIMDIntrinsicAbs: - result = INS_fabs; - break; case SIMDIntrinsicAdd: result = INS_fadd; break; case SIMDIntrinsicBitwiseAnd: result = INS_and; break; - case SIMDIntrinsicBitwiseAndNot: - result = INS_bic; - break; case SIMDIntrinsicBitwiseOr: result = INS_orr; break; - case SIMDIntrinsicBitwiseXor: - result = INS_eor; - break; case SIMDIntrinsicCast: result = INS_mov; break; @@ -3980,24 +3950,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicEqual: result = INS_fcmeq; break; - case SIMDIntrinsicGreaterThan: - result = INS_fcmgt; - break; - case SIMDIntrinsicGreaterThanOrEqual: - result = INS_fcmge; - break; - case SIMDIntrinsicLessThan: - result = INS_fcmlt; - break; - case SIMDIntrinsicLessThanOrEqual: - result = INS_fcmle; - break; - case SIMDIntrinsicMax: - result = INS_fmax; - break; - case SIMDIntrinsicMin: - result = INS_fmin; - break; case SIMDIntrinsicMul: result = INS_fmul; break; @@ -4006,12 +3958,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // Return lower bytes instruction here result = INS_fcvtn; break; - case SIMDIntrinsicSelect: - result = INS_bsl; - break; - case SIMDIntrinsicSqrt: - result = INS_fsqrt; - break; case SIMDIntrinsicSub: result = INS_fsub; break; @@ -4021,12 +3967,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicWidenHi: result = INS_fcvtl2; break; - case SIMDIntrinsicCeil: - result = INS_frintp; - break; - case SIMDIntrinsicFloor: - result = INS_frintm; - break; default: assert(!"Unsupported SIMD intrinsic"); unreached(); @@ -4038,25 +3978,15 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type switch (intrinsicId) { - case SIMDIntrinsicAbs: - assert(!isUnsigned); - result = INS_abs; - break; case SIMDIntrinsicAdd: result = INS_add; break; case SIMDIntrinsicBitwiseAnd: result = INS_and; break; - case SIMDIntrinsicBitwiseAndNot: - result = INS_bic; - break; case SIMDIntrinsicBitwiseOr: result = INS_orr; break; - case SIMDIntrinsicBitwiseXor: - result = INS_eor; - break; case SIMDIntrinsicCast: result = INS_mov; break; @@ -4067,26 +3997,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type case SIMDIntrinsicEqual: result = INS_cmeq; break; - case SIMDIntrinsicGreaterThan: - result = isUnsigned ? INS_cmhi : INS_cmgt; - break; - case SIMDIntrinsicGreaterThanOrEqual: - result = isUnsigned ? INS_cmhs : INS_cmge; - break; - case SIMDIntrinsicLessThan: - assert(!isUnsigned); - result = INS_cmlt; - break; - case SIMDIntrinsicLessThanOrEqual: - assert(!isUnsigned); - result = INS_cmle; - break; - case SIMDIntrinsicMax: - result = isUnsigned ? INS_umax : INS_smax; - break; - case SIMDIntrinsicMin: - result = isUnsigned ? INS_umin : INS_smin; - break; case SIMDIntrinsicMul: result = INS_mul; break; @@ -4095,9 +4005,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // Return lower bytes instruction here result = INS_xtn; break; - case SIMDIntrinsicSelect: - result = INS_bsl; - break; case SIMDIntrinsicSub: result = INS_sub; break; @@ -4256,13 +4163,11 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs || + assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64 || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCeil || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicFloor); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64); GenTree* op1 = simdNode->gtGetOp1(); var_types baseType = simdNode->gtSIMDBaseType; @@ -4407,14 +4312,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThan || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicLessThanOrEqual || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThanOrEqual); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual); GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); @@ -4442,65 +4340,6 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) genProduceReg(simdNode); } -//-------------------------------------------------------------------------------- -// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater -// == and != -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) -{ - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality); - - GenTree* op1 = simdNode->gtGetOp1(); - GenTree* op2 = simdNode->gtGetOp2(); - var_types baseType = simdNode->gtSIMDBaseType; - regNumber targetReg = simdNode->GetRegNum(); - var_types targetType = simdNode->TypeGet(); - - genConsumeOperands(simdNode); - regNumber op1Reg = op1->GetRegNum(); - regNumber op2Reg = op2->GetRegNum(); - regNumber otherReg = op2Reg; - - instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, baseType); - emitAttr attr = (simdNode->gtSIMDSize > 8) ? EA_16BYTE : EA_8BYTE; - insOpts opt = genGetSimdInsOpt(attr, baseType); - - // TODO-ARM64-CQ Contain integer constants where possible - - regNumber tmpFloatReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); - - GetEmitter()->emitIns_R_R_R(ins, attr, tmpFloatReg, op1Reg, op2Reg, opt); - - if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) - { - // For 12Byte vectors we must set upper bits to get correct comparison - // We do not assume upper bits are zero. - instGen_Set_Reg_To_Imm(EA_4BYTE, targetReg, -1); - GetEmitter()->emitIns_R_R_I(INS_ins, EA_4BYTE, tmpFloatReg, targetReg, 3); - } - - GetEmitter()->emitIns_R_R(INS_uminv, attr, tmpFloatReg, tmpFloatReg, - (simdNode->gtSIMDSize > 8) ? INS_OPTS_16B : INS_OPTS_8B); - - GetEmitter()->emitIns_R_R_I(INS_mov, EA_1BYTE, targetReg, tmpFloatReg, 0); - - if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality) - { - GetEmitter()->emitIns_R_R_I(INS_eor, EA_4BYTE, targetReg, targetReg, 0x1); - } - - GetEmitter()->emitIns_R_R_I(INS_and, EA_4BYTE, targetReg, targetReg, 0x1); - - genProduceReg(simdNode); -} - //-------------------------------------------------------------------------------- // genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product. // diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 5a2dfcfbfb59..1fd8d8d54756 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -2621,6 +2621,16 @@ class Compiler var_types baseType, unsigned size); + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type, + NamedIntrinsic hwIntrinsicID, + var_types baseType, + unsigned size) + { + GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, hwIntrinsicID, baseType, size); + node->gtFlags |= GTF_SIMDASHW_OP; + return node; + } + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode( var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) { @@ -7970,9 +7980,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX static bool isRelOpSIMDIntrinsic(SIMDIntrinsicID intrinsicId) { - return (intrinsicId == SIMDIntrinsicEqual || intrinsicId == SIMDIntrinsicLessThan || - intrinsicId == SIMDIntrinsicLessThanOrEqual || intrinsicId == SIMDIntrinsicGreaterThan || - intrinsicId == SIMDIntrinsicGreaterThanOrEqual); + return (intrinsicId == SIMDIntrinsicEqual); } // Returns base type of a TYP_SIMD local. @@ -8076,22 +8084,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index. GenTreeSIMD* impSIMDGetFixed(var_types simdType, var_types baseType, unsigned simdSize, int index); - // Creates a GT_SIMD tree for Select operation - GenTree* impSIMDSelect(CORINFO_CLASS_HANDLE typeHnd, - var_types baseType, - unsigned simdVectorSize, - GenTree* op1, - GenTree* op2, - GenTree* op3); - - // Creates a GT_SIMD tree for Min/Max operation - GenTree* impSIMDMinMax(SIMDIntrinsicID intrinsicId, - CORINFO_CLASS_HANDLE typeHnd, - var_types baseType, - unsigned simdVectorSize, - GenTree* op1, - GenTree* op2); - // Transforms operands and returns the SIMD intrinsic to be applied on // transformed operands to obtain given relop result. SIMDIntrinsicID impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, @@ -8101,9 +8093,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX GenTree** op1, GenTree** op2); - // Creates a GT_SIMD tree for Abs intrinsic. - GenTree* impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned simdVectorSize, GenTree* op1); - #if defined(TARGET_XARCH) // Transforms operands and returns the SIMD intrinsic to be applied on @@ -8113,26 +8102,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX GenTree** op1, GenTree** op2); - // Transforms operands and returns the SIMD intrinsic to be applied on - // transformed operands to obtain > comparison result. - SIMDIntrinsicID impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd, - unsigned simdVectorSize, - GenTree** op1, - GenTree** op2); - - // Transforms operands and returns the SIMD intrinsic to be applied on - // transformed operands to obtain >= comparison result. - SIMDIntrinsicID impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd, - unsigned simdVectorSize, - GenTree** op1, - GenTree** op2); - - // Transforms operands and returns the SIMD intrinsic to be applied on - // transformed operands to obtain >= comparison result in case of int32 - // and small int base type vectors. - SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual( - CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2); - #endif // defined(TARGET_XARCH) void setLclRelatedToSIMDIntrinsic(GenTree* tree); diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index ece2e7c753fe..191c87322b4e 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -16520,13 +16520,7 @@ bool GenTree::isContained() const { // We have to cast away const-ness since AsOp() method is non-const. const GenTree* childNode = AsOp()->gtGetOp1(); - assert((isMarkedContained == false) || childNode->IsSIMDEqualityOrInequality()); - } - - // these either produce a result in register or set flags reg. - else if (IsSIMDEqualityOrInequality()) - { - assert(!isMarkedContained); + assert(isMarkedContained == false); } // if it's contained it can't be unused. @@ -18451,13 +18445,8 @@ bool GenTree::isCommutativeSIMDIntrinsic() case SIMDIntrinsicAdd: case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicEqual: - case SIMDIntrinsicMax: - case SIMDIntrinsicMin: case SIMDIntrinsicMul: - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: return true; default: return false; diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 62abda039dbe..c43227bd6631 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -1650,8 +1650,6 @@ struct GenTree inline bool IsBoxedValue(); - inline bool IsSIMDEqualityOrInequality() const; - static bool OperIsList(genTreeOps gtOper) { return gtOper == GT_LIST; @@ -6799,7 +6797,50 @@ inline bool GenTree::IsIntegralConstVector(ssize_t constVal) assert(gtGetOp2IfPresent() == nullptr); return true; } -#endif +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + if (gtOper == GT_HWINTRINSIC) + { + GenTreeHWIntrinsic* node = AsHWIntrinsic(); + + if (!varTypeIsIntegral(node->gtSIMDBaseType)) + { + // Can't be an integral constant + return false; + } + + GenTree* op1 = gtGetOp1(); + GenTree* op2 = gtGetOp2(); + + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + + if (op1 == nullptr) + { + assert(op2 == nullptr); + + if (constVal == 0) + { +#if defined(TARGET_XARCH) + return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero); +#elif defined(TARGET_ARM64) + return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero); +#endif // !TARGET_XARCH && !TARGET_ARM64 + } + } + else if ((op2 == nullptr) && !op1->OperIsList()) + { + if (op1->IsIntegralConst(constVal)) + { +#if defined(TARGET_XARCH) + return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create); +#elif defined(TARGET_ARM64) + return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create); +#endif // !TARGET_XARCH && !TARGET_ARM64 + } + } + } +#endif // FEATURE_HW_INTRINSICS return false; } @@ -6829,19 +6870,6 @@ inline bool GenTree::IsBoxedValue() return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE); } -inline bool GenTree::IsSIMDEqualityOrInequality() const -{ -#ifdef FEATURE_SIMD - if (gtOper == GT_SIMD) - { - SIMDIntrinsicID id = AsSIMD()->gtSIMDIntrinsicID; - return (id == SIMDIntrinsicOpEquality) || (id == SIMDIntrinsicOpInEquality); - } -#endif - - return false; -} - inline GenTree* GenTree::MoveNext() { assert(OperIsAnyList()); diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 15c623ceca15..cd197c7f2784 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -482,11 +482,14 @@ bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op) GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr) { GenTree* arg = nullptr; - if (argType == TYP_STRUCT) + if (varTypeIsStruct(argType)) { - unsigned int argSizeBytes; - var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes); - argType = getSIMDTypeForSize(argSizeBytes); + if (!varTypeIsSIMD(argType)) + { + unsigned int argSizeBytes; + var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes); + argType = getSIMDTypeForSize(argSizeBytes); + } assert(varTypeIsSIMD(argType)); arg = impSIMDPopStack(argType, expectAddr); assert(varTypeIsSIMD(arg->TypeGet())); @@ -605,9 +608,9 @@ static bool isSupportedBaseType(NamedIntrinsic intrinsic, var_types baseType) #ifdef TARGET_XARCH assert((intrinsic >= NI_Vector128_As && intrinsic <= NI_Vector128_AsUInt64) || - (intrinsic >= NI_Vector128_get_AllBitsSet && intrinsic <= NI_Vector128_ToVector256Unsafe) || + (intrinsic >= NI_Vector128_get_AllBitsSet && intrinsic <= NI_Vector128_WithElement) || (intrinsic >= NI_Vector256_As && intrinsic <= NI_Vector256_AsUInt64) || - (intrinsic >= NI_Vector256_get_AllBitsSet && intrinsic <= NI_Vector256_ToScalar)); + (intrinsic >= NI_Vector256_get_AllBitsSet && intrinsic <= NI_Vector256_WithElement)); #else assert((intrinsic >= NI_Vector64_AsByte && intrinsic <= NI_Vector64_AsUInt32) || (intrinsic >= NI_Vector64_get_AllBitsSet && intrinsic <= NI_Vector64_ToVector128Unsafe) || diff --git a/src/coreclr/src/jit/hwintrinsiclistarm64.h b/src/coreclr/src/jit/hwintrinsiclistarm64.h index 5407ba7670ef..5084324f8492 100644 --- a/src/coreclr/src/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/hwintrinsiclistarm64.h @@ -29,6 +29,8 @@ HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, GetElement, 8, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_IMM, HW_Flag_NoJmpTableIMM|HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector64, op_Equality, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, op_Inequality, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, ToScalar, 8, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, ToVector128, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, ToVector128Unsafe, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) @@ -56,6 +58,8 @@ HARDWARE_INTRINSIC(Vector128, get_Count, 1 HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_IMM, HW_Flag_NoJmpTableIMM|HW_Flag_SupportsContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, GetLower, 16, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -93,6 +97,7 @@ HARDWARE_INTRINSIC(AdvSimd, AddWideningUpper, 1 HARDWARE_INTRINSIC(AdvSimd, And, -1, 2, {INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and, INS_and}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, BitwiseClear, -1, 2, {INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic, INS_bic}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -109,6 +114,7 @@ HARDWARE_INTRINSIC(AdvSimd, ExtractAndNarrowHigh, 1 HARDWARE_INTRINSIC(AdvSimd, ExtractAndNarrowLow, 8, 1, {INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_xtn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, ExtractVector64, 8, 3, {INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_invalid, INS_invalid, INS_ext, INS_invalid}, HW_Category_IMM, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, ExtractVector128, 16, 3, {INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext}, HW_Category_IMM, HW_Flag_SupportsContainment|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, Floor, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintm, INS_frintm}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, FusedAddHalving, -1, 2, {INS_shadd, INS_uhadd, INS_shadd, INS_uhadd, INS_shadd, INS_uhadd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, FusedAddRoundedHalving, -1, 2, {INS_srhadd, INS_urhadd, INS_srhadd, INS_urhadd, INS_srhadd, INS_urhadd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, FusedMultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fmla, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_HasRMWSemantics) diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index c041364c966b..c6017fb12c44 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -50,10 +50,12 @@ HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -80,9 +82,11 @@ HARDWARE_INTRINSIC(Vector256, get_Zero, HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsdsse2}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/src/jit/lower.h b/src/coreclr/src/jit/lower.h index f45e9973fa18..91d5561d52fc 100644 --- a/src/coreclr/src/jit/lower.h +++ b/src/coreclr/src/jit/lower.h @@ -318,6 +318,7 @@ class Lowering final : public Phase #ifdef FEATURE_HW_INTRINSICS void LowerHWIntrinsic(GenTreeHWIntrinsic* node); void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition); + void LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp); void LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node); void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node); diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index a6050c9baa48..c47cb7ee4e9f 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -553,6 +553,20 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return; } + case NI_Vector64_op_Equality: + case NI_Vector128_op_Equality: + { + LowerHWIntrinsicCmpOp(node, GT_EQ); + return; + } + + case NI_Vector64_op_Inequality: + case NI_Vector128_op_Inequality: + { + LowerHWIntrinsicCmpOp(node, GT_NE); + return; + } + default: break; } @@ -625,6 +639,122 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) return false; } +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic +// +// Arguments: +// node - The hardware intrinsic node. +// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE +// +void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + var_types baseType = node->gtSIMDBaseType; + unsigned simdSize = node->gtSIMDSize; + var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + + assert((intrinsicId == NI_Vector64_op_Equality) || (intrinsicId == NI_Vector64_op_Inequality) || + (intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality)); + + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(baseType)); + assert(simdSize != 0); + assert(node->gtType == TYP_BOOL); + assert((cmpOp == GT_EQ) || (cmpOp == GT_NE)); + + // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality): + // /--* op2 simd + // /--* op1 simd + // node = * HWINTRINSIC simd T op_Equality + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + NamedIntrinsic cmpIntrinsic; + + switch (baseType) + { + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + { + cmpIntrinsic = NI_AdvSimd_CompareEqual; + break; + } + + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + { + cmpIntrinsic = NI_AdvSimd_Arm64_CompareEqual; + break; + } + + default: + { + unreached(); + } + } + + GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, baseType, simdSize); + BlockRange().InsertBefore(node, cmp); + LowerNode(cmp); + + if ((baseType == TYP_FLOAT) && (simdSize == 12)) + { + // For TYP_SIMD12 we need to clear the upper bits and can't assume their value + + GenTree* idxCns = comp->gtNewIconNode(3, TYP_INT); + BlockRange().InsertAfter(cmp, idxCns); + + GenTree* insCns = comp->gtNewIconNode(cmpOp == GT_EQ ? -1 : 0, TYP_INT); + BlockRange().InsertAfter(idxCns, insCns); + + GenTree* tmp = + comp->gtNewSimdAsHWIntrinsicNode(simdType, cmp, idxCns, insCns, NI_AdvSimd_Insert, TYP_INT, simdSize); + BlockRange().InsertAfter(insCns, tmp); + LowerNode(tmp); + + cmp = tmp; + } + + GenTree* msk = comp->gtNewSimdHWIntrinsicNode(simdType, cmp, NI_AdvSimd_Arm64_MinAcross, TYP_UBYTE, simdSize); + BlockRange().InsertAfter(cmp, msk); + LowerNode(msk); + + GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT); + BlockRange().InsertAfter(msk, zroCns); + + GenTree* val = comp->gtNewSimdAsHWIntrinsicNode(TYP_UBYTE, msk, zroCns, NI_AdvSimd_Extract, TYP_UBYTE, simdSize); + BlockRange().InsertAfter(zroCns, val); + LowerNode(val); + + zroCns = comp->gtNewIconNode(0, TYP_INT); + BlockRange().InsertAfter(val, zroCns); + + node->ChangeOper(cmpOp); + + node->gtType = TYP_INT; + node->gtOp1 = val; + node->gtOp2 = zroCns; + + // The CompareEqual will set (condition is true) or clear (condition is false) all bits of the respective element + // The MinAcross then ensures we get either all bits set (all conditions are true) or clear (any condition is false) + // So, we need to invert the condition from the operation since we compare against zero + + GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::NE : GenCondition::EQ; + GenTree* cc = LowerNodeCC(node, cmpCnd); + + node->gtType = TYP_VOID; + node->ClearUnusedValue(); + + LowerNode(node); +} + //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call // @@ -1191,11 +1321,6 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2()); break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - // TODO-ARM64-CQ Support containing 0 - break; - case SIMDIntrinsicGetItem: { // This implements get_Item method. The sources are: diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 3cb0c50afba5..db5617b4f143 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -743,14 +743,6 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) // the addr of SIMD vector with the given index. simdNode->gtOp1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG; } - else if (simdNode->IsSIMDEqualityOrInequality()) - { - LowerNodeCC(simdNode, - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality ? GenCondition::EQ : GenCondition::NE); - - simdNode->gtType = TYP_VOID; - simdNode->ClearUnusedValue(); - } #endif ContainCheckSIMD(simdNode); } @@ -947,6 +939,20 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return; } + case NI_Vector128_op_Equality: + case NI_Vector256_op_Equality: + { + LowerHWIntrinsicCmpOp(node, GT_EQ); + return; + } + + case NI_Vector128_op_Inequality: + case NI_Vector256_op_Inequality: + { + LowerHWIntrinsicCmpOp(node, GT_NE); + return; + } + case NI_SSE2_Insert: case NI_SSE41_Insert: case NI_SSE41_X64_Insert: @@ -1155,6 +1161,238 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) ContainCheckHWIntrinsic(node); } +//---------------------------------------------------------------------------------------------- +// Lowering::LowerHWIntrinsicCmpOp: Lowers a Vector128 or Vector256 comparison intrinsic +// +// Arguments: +// node - The hardware intrinsic node. +// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE +// +void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) +{ + NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + var_types baseType = node->gtSIMDBaseType; + unsigned simdSize = node->gtSIMDSize; + var_types simdType = Compiler::getSIMDTypeForSize(simdSize); + + assert((intrinsicId == NI_Vector128_op_Equality) || (intrinsicId == NI_Vector128_op_Inequality) || + (intrinsicId == NI_Vector256_op_Equality) || (intrinsicId == NI_Vector256_op_Inequality)); + + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(baseType)); + assert(simdSize != 0); + assert(node->gtType == TYP_BOOL); + assert((cmpOp == GT_EQ) || (cmpOp == GT_NE)); + + // We have the following (with the appropriate simd size and where the intrinsic could be op_Inequality): + // /--* op2 simd + // /--* op1 simd + // node = * HWINTRINSIC simd T op_Equality + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; + + if (op2->IsIntegralConstVector(0) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + // On SSE4.1 or higher we can optimize comparisons against zero to + // just use PTEST. We can't support it for floating-point, however, + // as it has both +0.0 and -0.0 where +0.0 == -0.0 + + node->gtOp1 = op1; + BlockRange().Remove(op2); + + LIR::Use op1Use(BlockRange(), &node->gtOp1, node); + ReplaceWithLclVar(op1Use); + op1 = node->gtOp1; + + op2 = comp->gtClone(op1); + BlockRange().InsertAfter(op1, op2); + node->gtOp2 = op2; + + if (simdSize == 32) + { + node->gtHWIntrinsicId = NI_AVX_TestZ; + LowerHWIntrinsicCC(node, NI_AVX_PTEST, cmpCnd); + } + else + { + node->gtHWIntrinsicId = NI_SSE41_TestZ; + LowerHWIntrinsicCC(node, NI_SSE41_PTEST, cmpCnd); + } + + return; + } + + NamedIntrinsic cmpIntrinsic; + var_types cmpType; + NamedIntrinsic mskIntrinsic; + var_types mskType; + int mskConstant; + + switch (baseType) + { + case TYP_BYTE: + case TYP_UBYTE: + case TYP_SHORT: + case TYP_USHORT: + case TYP_INT: + case TYP_UINT: + { + cmpType = baseType; + mskType = TYP_UBYTE; + + if (simdSize == 32) + { + cmpIntrinsic = NI_AVX2_CompareEqual; + mskIntrinsic = NI_AVX2_MoveMask; + mskConstant = -1; + } + else + { + assert(simdSize == 16); + + cmpIntrinsic = NI_SSE2_CompareEqual; + mskIntrinsic = NI_SSE2_MoveMask; + mskConstant = 0xFFFF; + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + mskType = TYP_UBYTE; + + if (simdSize == 32) + { + cmpIntrinsic = NI_AVX2_CompareEqual; + cmpType = baseType; + mskIntrinsic = NI_AVX2_MoveMask; + mskConstant = -1; + } + else + { + assert(simdSize == 16); + + if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + cmpIntrinsic = NI_SSE41_CompareEqual; + cmpType = baseType; + } + else + { + cmpIntrinsic = NI_SSE2_CompareEqual; + cmpType = TYP_UINT; + } + + mskIntrinsic = NI_SSE2_MoveMask; + mskConstant = 0xFFFF; + } + break; + } + + case TYP_FLOAT: + { + cmpType = baseType; + mskType = baseType; + + if (simdSize == 32) + { + cmpIntrinsic = NI_AVX_CompareEqual; + mskIntrinsic = NI_AVX_MoveMask; + mskConstant = 0xFF; + } + else + { + cmpIntrinsic = NI_SSE_CompareEqual; + mskIntrinsic = NI_SSE_MoveMask; + + if (simdSize == 16) + { + mskConstant = 0xF; + } + else if (simdSize == 12) + { + mskConstant = 0x7; + } + else + { + assert(simdSize == 8); + mskConstant = 0x3; + } + } + break; + } + + case TYP_DOUBLE: + { + cmpType = baseType; + mskType = baseType; + + if (simdSize == 32) + { + cmpIntrinsic = NI_AVX_CompareEqual; + mskIntrinsic = NI_AVX_MoveMask; + mskConstant = 0xF; + } + else + { + assert(simdSize == 16); + + cmpIntrinsic = NI_SSE2_CompareEqual; + mskIntrinsic = NI_SSE2_MoveMask; + mskConstant = 0x3; + } + break; + } + + default: + { + unreached(); + } + } + + GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op1, op2, cmpIntrinsic, cmpType, simdSize); + BlockRange().InsertBefore(node, cmp); + LowerNode(cmp); + + GenTree* msk = comp->gtNewSimdHWIntrinsicNode(TYP_INT, cmp, mskIntrinsic, mskType, simdSize); + BlockRange().InsertAfter(cmp, msk); + LowerNode(msk); + + GenTree* mskCns = comp->gtNewIconNode(mskConstant, TYP_INT); + BlockRange().InsertAfter(msk, mskCns); + + if ((baseType == TYP_FLOAT) && (simdSize < 16)) + { + // For TYP_SIMD8 and TYP_SIMD12 we need to clear the upper bits and can't assume their value + + GenTree* tmp = comp->gtNewOperNode(GT_AND, TYP_INT, msk, mskCns); + BlockRange().InsertAfter(mskCns, tmp); + LowerNode(msk); + + msk = tmp; + + mskCns = comp->gtNewIconNode(mskConstant, TYP_INT); + BlockRange().InsertAfter(msk, mskCns); + } + + node->ChangeOper(cmpOp); + + node->gtType = TYP_INT; + node->gtOp1 = msk; + node->gtOp2 = mskCns; + + GenTree* cc = LowerNodeCC(node, cmpCnd); + + node->gtType = TYP_VOID; + node->ClearUnusedValue(); + + LowerNode(node); +} + //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsicCreate: Lowers a Vector128 or Vector256 Create call // @@ -3701,19 +3939,6 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2()); break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - // On SSE4/AVX, we can generate optimal code for (in)equality - // against zero using ptest. We can safely do this optimization - // for integral vectors but not for floating-point for the reason - // that we have +0.0 and -0.0 and +0.0 == -0.0 - op2 = simdNode->gtGetOp2(); - if ((comp->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0)) - { - MakeSrcContained(simdNode, op2); - } - break; - case SIMDIntrinsicGetItem: { // This implements get_Item method. The sources are: diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 60084a8e65f3..2c93dd860ffb 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -812,16 +812,12 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { case SIMDIntrinsicInit: case SIMDIntrinsicCast: - case SIMDIntrinsicSqrt: - case SIMDIntrinsicAbs: case SIMDIntrinsicConvertToSingle: case SIMDIntrinsicConvertToInt32: case SIMDIntrinsicConvertToDouble: case SIMDIntrinsicConvertToInt64: case SIMDIntrinsicWidenLo: case SIMDIntrinsicWidenHi: - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: // No special handling required. break; @@ -868,16 +864,8 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThanOrEqual: // No special handling required. break; @@ -926,23 +914,10 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) // We have an array and an index, which may be contained. break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - buildInternalFloatRegisterDefForNode(simdTree); - break; - case SIMDIntrinsicDotProduct: buildInternalFloatRegisterDefForNode(simdTree); break; - case SIMDIntrinsicSelect: - // TODO-ARM64-CQ Allow lowering to see SIMDIntrinsicSelect so we can generate BSL VC, VA, VB - // bsl target register must be VC. Reserve a temp in case we need to shuffle things. - // This will require a different approach, as GenTreeSIMD has only two operands. - assert(!"SIMDIntrinsicSelect not yet supported"); - buildInternalFloatRegisterDefForNode(simdTree); - break; - case SIMDIntrinsicInitArrayX: case SIMDIntrinsicInitFixed: case SIMDIntrinsicCopyToArray: @@ -956,7 +931,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicGetY: case SIMDIntrinsicGetZ: case SIMDIntrinsicGetW: - case SIMDIntrinsicInstEquals: case SIMDIntrinsicHWAccel: case SIMDIntrinsicWiden: case SIMDIntrinsicInvalid: diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index 44462d7a91ad..c1a5e0b586b4 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -1507,8 +1507,7 @@ int LinearScan::ComputeOperandDstCount(GenTree* operand) // Stores and void-typed operands may be encountered when processing call nodes, which contain // pointers to argument setup stores. assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() || - operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->IsSIMDEqualityOrInequality() || - operand->TypeGet() == TYP_VOID); + operand->OperIsCompare() || operand->OperIs(GT_CMP) || operand->TypeGet() == TYP_VOID); return 0; } } diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 1bc0f36bfb26..1239ca23c7bb 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1862,21 +1862,17 @@ int LinearScan::BuildIntrinsic(GenTree* tree) // int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { - // Only SIMDIntrinsicInit can be contained. Other than that, - // only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount. - int dstCount = simdTree->IsValue() ? 1 : 0; + // All intrinsics have a dstCount of 1 + assert(simdTree->IsValue()); + bool buildUses = true; regMaskTP dstCandidates = RBM_NONE; if (simdTree->isContained()) { + // Only SIMDIntrinsicInit can be contained assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); } - else if (dstCount != 1) - { - assert((simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) || - (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality)); - } SetContainsAVXFlags(simdTree->gtSIMDSize); GenTree* op1 = simdTree->gtGetOp1(); GenTree* op2 = simdTree->gtGetOp2(); @@ -1956,35 +1952,11 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); break; - case SIMDIntrinsicAbs: - // float/double vectors: This gets implemented as bitwise-And operation - // with a mask and hence should never see here. - // - // Must be a Vector or Vector Vector - assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT || - simdTree->gtSIMDBaseType == TYP_BYTE); - assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported); - break; - - case SIMDIntrinsicSqrt: - // SSE2 has no instruction support for sqrt on integer vectors. - noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType)); - break; - - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: - assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported); - break; - case SIMDIntrinsicAdd: case SIMDIntrinsicSub: case SIMDIntrinsicMul: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: // SSE2 32-bit integer multiplication requires two temp regs if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT && compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) @@ -1997,40 +1969,6 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicEqual: break; - // SSE2 doesn't support < and <= directly on int vectors. - // Instead we need to use > and >= with swapped operands. - case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: - noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType)); - break; - - // SIMDIntrinsicEqual is supported only on non-floating point base type vectors. - // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors. - // Instead we need to use < and <= with swapped operands. - case SIMDIntrinsicGreaterThan: - noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType)); - break; - - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - if (simdTree->gtGetOp2()->isContained()) - { - // If the second operand is contained then ContainCheckSIMD has determined - // that PTEST can be used. We only need a single source register and no - // internal registers. - } - else - { - // Can't use PTEST so we need 2 source registers, 1 internal SIMD register - // (to hold the result of PCMPEQD or other similar SIMD compare instruction) - // and one internal INT register (to hold the result of PMOVMSKB). - buildInternalIntRegisterDefForNode(simdTree); - buildInternalFloatRegisterDefForNode(simdTree); - } - // These SIMD nodes only set the condition flags. - dstCount = 0; - break; - case SIMDIntrinsicDotProduct: // Float/Double vectors: // For SSE, or AVX with 32-byte vectors, we also need an internal register @@ -2258,14 +2196,7 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) srcCount = BuildRMWUses(simdTree); } buildInternalRegisterUses(); - if (dstCount == 1) - { - BuildDef(simdTree, dstCandidates); - } - else - { - assert(dstCount == 0); - } + BuildDef(simdTree, dstCandidates); return srcCount; } #endif // FEATURE_SIMD diff --git a/src/coreclr/src/jit/namedintrinsiclist.h b/src/coreclr/src/jit/namedintrinsiclist.h index 457d434898c9..c9a87d782a62 100644 --- a/src/coreclr/src/jit/namedintrinsiclist.h +++ b/src/coreclr/src/jit/namedintrinsiclist.h @@ -44,10 +44,12 @@ enum NamedIntrinsic : unsigned short NI_SIMD_AS_HWINTRINSIC_START, #if defined(TARGET_XARCH) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + NI_##classId##_##id, #include "simdashwintrinsiclistxarch.h" #elif defined(TARGET_ARM64) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + NI_##classId##_##id, #include "simdashwintrinsiclistarm64.h" #endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) NI_SIMD_AS_HWINTRINSIC_END, diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index bfd8c04f7678..3bd38ef53719 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -1075,19 +1075,9 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in case SIMDIntrinsicSub: case SIMDIntrinsicMul: case SIMDIntrinsicDiv: - case SIMDIntrinsicSqrt: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: - case SIMDIntrinsicAbs: case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicGreaterThanOrEqual: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: case SIMDIntrinsicDotProduct: case SIMDIntrinsicCast: case SIMDIntrinsicConvertToSingle: @@ -1268,218 +1258,6 @@ SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd, SIMDIntrinsicShuffleSSE2, TYP_INT, size); return SIMDIntrinsicBitwiseAnd; } - -// impSIMDLongRelOpGreaterThan: transforms operands and returns the SIMD intrinsic to be applied on -// transformed operands to obtain > comparison result. -// -// Arguments: -// typeHnd - type handle of SIMD vector -// size - SIMD vector size -// pOp1 - in-out parameter; first operand -// pOp2 - in-out parameter; second operand -// -// Return Value: -// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands -// -SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd, - unsigned size, - GenTree** pOp1, - GenTree** pOp2) -{ - var_types simdType = (*pOp1)->TypeGet(); - assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); - - // GreaterThan(v1, v2) where v1 and v2 are vector long. - // Let us consider the case of single long element comparison. - // say L1 = (x1, y1) and L2 = (x2, y2) where x1, y1, x2, and y2 are 32-bit integers that comprise the longs L1 and - // L2. - // - // GreaterThan(L1, L2) can be expressed in terms of > relationship between 32-bit integers that comprise L1 and L2 - // as - // = (x1, y1) > (x2, y2) - // = (x1 > x2) || [(x1 == x2) && (y1 > y2)] - eq (1) - // - // t = (v1 > v2) 32-bit signed comparison - // u = (v1 == v2) 32-bit sized element equality - // v = (v1 > v2) 32-bit unsigned comparison - // - // z = shuffle(t, (3, 3, 1, 1)) - This corresponds to (x1 > x2) in eq(1) above - // t1 = Shuffle(v, (2, 2, 0, 0)) - This corresponds to (y1 > y2) in eq(1) above - // u1 = Shuffle(u, (3, 3, 1, 1)) - This corresponds to (x1 == x2) in eq(1) above - // w = And(t1, u1) - This corresponds to [(x1 == x2) && (y1 > y2)] in eq(1) above - // Result = BitwiseOr(z, w) - - // Since op1 and op2 gets used multiple times, make sure side effects are computed. - GenTree* dupOp1 = nullptr; - GenTree* dupOp2 = nullptr; - GenTree* dupDupOp1 = nullptr; - GenTree* dupDupOp2 = nullptr; - - if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); - dupDupOp1 = gtNewLclvNode(dupOp1->AsLclVarCommon()->GetLclNum(), simdType); - } - else - { - dupOp1 = gtCloneExpr(*pOp1); - dupDupOp1 = gtCloneExpr(*pOp1); - } - - if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); - dupDupOp2 = gtNewLclvNode(dupOp2->AsLclVarCommon()->GetLclNum(), simdType); - } - else - { - dupOp2 = gtCloneExpr(*pOp2); - dupDupOp2 = gtCloneExpr(*pOp2); - } - - assert(dupDupOp1 != nullptr && dupDupOp2 != nullptr); - assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*pOp1 != nullptr && *pOp2 != nullptr); - - // v1GreaterThanv2Signed - signed 32-bit comparison - GenTree* v1GreaterThanv2Signed = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicGreaterThan, TYP_INT, size); - - // v1Equalsv2 - 32-bit equality - GenTree* v1Equalsv2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicEqual, TYP_INT, size); - - // v1GreaterThanv2Unsigned - unsigned 32-bit comparison - var_types tempBaseType = TYP_UINT; - SIMDIntrinsicID sid = impSIMDRelOp(SIMDIntrinsicGreaterThan, typeHnd, size, &tempBaseType, &dupDupOp1, &dupDupOp2); - GenTree* v1GreaterThanv2Unsigned = gtNewSIMDNode(simdType, dupDupOp1, dupDupOp2, sid, tempBaseType, size); - - GenTree* z = gtNewSIMDNode(simdType, v1GreaterThanv2Signed, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), - SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); - GenTree* t1 = gtNewSIMDNode(simdType, v1GreaterThanv2Unsigned, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), - SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size); - GenTree* u1 = gtNewSIMDNode(simdType, v1Equalsv2, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), SIMDIntrinsicShuffleSSE2, - TYP_FLOAT, size); - GenTree* w = gtNewSIMDNode(simdType, u1, t1, SIMDIntrinsicBitwiseAnd, TYP_INT, size); - - *pOp1 = z; - *pOp2 = w; - return SIMDIntrinsicBitwiseOr; -} - -// impSIMDLongRelOpGreaterThanOrEqual: transforms operands and returns the SIMD intrinsic to be applied on -// transformed operands to obtain >= comparison result. -// -// Arguments: -// typeHnd - type handle of SIMD vector -// size - SIMD vector size -// pOp1 - in-out parameter; first operand -// pOp2 - in-out parameter; second operand -// -// Return Value: -// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands -// -SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd, - unsigned size, - GenTree** pOp1, - GenTree** pOp2) -{ - var_types simdType = (*pOp1)->TypeGet(); - assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); - - // expand this to (a == b) | (a > b) - GenTree* dupOp1 = nullptr; - GenTree* dupOp2 = nullptr; - - if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); - } - else - { - dupOp1 = gtCloneExpr(*pOp1); - } - - if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); - } - else - { - dupOp2 = gtCloneExpr(*pOp2); - } - - assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*pOp1 != nullptr && *pOp2 != nullptr); - - // (a==b) - SIMDIntrinsicID id = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2); - *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, id, TYP_LONG, size); - - // (a > b) - id = impSIMDLongRelOpGreaterThan(typeHnd, size, &dupOp1, &dupOp2); - *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, id, TYP_LONG, size); - - return SIMDIntrinsicBitwiseOr; -} - -// impSIMDInt32OrSmallIntRelOpGreaterThanOrEqual: transforms operands and returns the SIMD intrinsic to be applied on -// transformed operands to obtain >= comparison result in case of integer base type vectors -// -// Arguments: -// typeHnd - type handle of SIMD vector -// size - SIMD vector size -// baseType - base type of SIMD vector -// pOp1 - in-out parameter; first operand -// pOp2 - in-out parameter; second operand -// -// Return Value: -// Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands -// -SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual( - CORINFO_CLASS_HANDLE typeHnd, unsigned size, var_types baseType, GenTree** pOp1, GenTree** pOp2) -{ - var_types simdType = (*pOp1)->TypeGet(); - assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType)); - - // This routine should be used only for integer base type vectors - assert(varTypeIsIntegral(baseType)); - if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE)) - { - return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2); - } - - // expand this to (a == b) | (a > b) - GenTree* dupOp1 = nullptr; - GenTree* dupOp2 = nullptr; - - if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd); - } - else - { - dupOp1 = gtCloneExpr(*pOp1); - } - - if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0) - { - dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd); - } - else - { - dupOp2 = gtCloneExpr(*pOp2); - } - - assert(dupOp1 != nullptr && dupOp2 != nullptr); - assert(*pOp1 != nullptr && *pOp2 != nullptr); - - // (a==b) - *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicEqual, baseType, size); - - // (a > b) - *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicGreaterThan, baseType, size); - - return SIMDIntrinsicBitwiseOr; -} #endif // TARGET_XARCH // Transforms operands and returns the SIMD intrinsic to be applied on @@ -1514,32 +1292,9 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, if (varTypeIsFloating(baseType)) { - // SSE2/AVX doesn't support > and >= on vector float/double. - // Therefore, we need to use < and <= with swapped operands - if (relOpIntrinsicId == SIMDIntrinsicGreaterThan || relOpIntrinsicId == SIMDIntrinsicGreaterThanOrEqual) - { - GenTree* tmp = *pOp1; - *pOp1 = *pOp2; - *pOp2 = tmp; - - intrinsicID = - (relOpIntrinsicId == SIMDIntrinsicGreaterThan) ? SIMDIntrinsicLessThan : SIMDIntrinsicLessThanOrEqual; - } } else if (varTypeIsIntegral(baseType)) { - // SSE/AVX doesn't support < and <= on integer base type vectors. - // Therefore, we need to use > and >= with swapped operands. - if (intrinsicID == SIMDIntrinsicLessThan || intrinsicID == SIMDIntrinsicLessThanOrEqual) - { - GenTree* tmp = *pOp1; - *pOp1 = *pOp2; - *pOp2 = tmp; - - intrinsicID = (relOpIntrinsicId == SIMDIntrinsicLessThan) ? SIMDIntrinsicGreaterThan - : SIMDIntrinsicGreaterThanOrEqual; - } - if ((getSIMDSupportLevel() == SIMD_SSE2_Supported) && baseType == TYP_LONG) { // There is no direct SSE2 support for comparing TYP_LONG vectors. @@ -1548,28 +1303,13 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, { intrinsicID = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2); } - else if (intrinsicID == SIMDIntrinsicGreaterThan) - { - intrinsicID = impSIMDLongRelOpGreaterThan(typeHnd, size, pOp1, pOp2); - } - else if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual) - { - intrinsicID = impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2); - } else { unreached(); } } // SSE2 and AVX direct support for signed comparison of int32, int16 and int8 types - else if (!varTypeIsUnsigned(baseType)) - { - if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual) - { - intrinsicID = impSIMDIntegralRelOpGreaterThanOrEqual(typeHnd, size, baseType, pOp1, pOp2); - } - } - else // unsigned + else if (varTypeIsUnsigned(baseType)) { // Vector, Vector, Vector and Vector: // SSE2 supports > for signed comparison. Therefore, to use it for @@ -1644,21 +1384,7 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, return impSIMDRelOp(intrinsicID, typeHnd, size, inOutBaseType, pOp1, pOp2); } } -#elif defined(TARGET_ARM64) - // TODO-ARM64-CQ handle comparisons against zero - - // TARGET_ARM64 doesn't support < and <= on register register comparisons - // Therefore, we need to use > and >= with swapped operands. - if (intrinsicID == SIMDIntrinsicLessThan || intrinsicID == SIMDIntrinsicLessThanOrEqual) - { - GenTree* tmp = *pOp1; - *pOp1 = *pOp2; - *pOp2 = tmp; - - intrinsicID = - (intrinsicID == SIMDIntrinsicLessThan) ? SIMDIntrinsicGreaterThan : SIMDIntrinsicGreaterThanOrEqual; - } -#else // !TARGET_XARCH +#elif !defined(TARGET_ARM64) assert(!"impSIMDRelOp() unimplemented on target arch"); unreached(); #endif // !TARGET_XARCH @@ -1666,411 +1392,6 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, return intrinsicID; } -//------------------------------------------------------------------------- -// impSIMDAbs: creates GT_SIMD node to compute Abs value of a given vector. -// -// Arguments: -// typeHnd - type handle of SIMD vector -// baseType - base type of vector -// size - vector size in bytes -// op1 - operand of Abs intrinsic -// -GenTree* Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned size, GenTree* op1) -{ - assert(varTypeIsSIMD(op1)); - - var_types simdType = op1->TypeGet(); - GenTree* retVal = nullptr; - -#ifdef TARGET_XARCH - // When there is no direct support, Abs(v) could be computed - // on integer vectors as follows: - // BitVector = v < vector.Zero - // result = ConditionalSelect(BitVector, vector.Zero - v, v) - - bool useConditionalSelect = false; - if (getSIMDSupportLevel() == SIMD_SSE2_Supported) - { - // SSE2 doesn't support abs on signed integer type vectors. - if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE) - { - useConditionalSelect = true; - } - } - else - { - assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported); - if (baseType == TYP_LONG) - { - // SSE4/AVX2 don't support abs on long type vector. - useConditionalSelect = true; - } - } - - if (useConditionalSelect) - { - // This works only on integer vectors not on float/double vectors. - assert(varTypeIsIntegral(baseType)); - - GenTree* op1Assign; - unsigned op1LclNum; - - if (op1->OperGet() == GT_LCL_VAR) - { - op1LclNum = op1->AsLclVarCommon()->GetLclNum(); - op1Assign = nullptr; - } - else - { - op1LclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs op1")); - lvaSetStruct(op1LclNum, typeHnd, false); - op1Assign = gtNewTempAssign(op1LclNum, op1); - op1 = gtNewLclvNode(op1LclNum, op1->TypeGet()); - } - - // Assign Vector.Zero to a temp since it is needed more than once - GenTree* vecZero = gtNewSIMDVectorZero(simdType, baseType, size); - unsigned vecZeroLclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs VecZero")); - lvaSetStruct(vecZeroLclNum, typeHnd, false); - GenTree* vecZeroAssign = gtNewTempAssign(vecZeroLclNum, vecZero); - - // Construct BitVector = v < vector.Zero - GenTree* bitVecOp1 = op1; - GenTree* bitVecOp2 = gtNewLclvNode(vecZeroLclNum, vecZero->TypeGet()); - var_types relOpBaseType = baseType; - SIMDIntrinsicID relOpIntrinsic = - impSIMDRelOp(SIMDIntrinsicLessThan, typeHnd, size, &relOpBaseType, &bitVecOp1, &bitVecOp2); - GenTree* bitVec = gtNewSIMDNode(simdType, bitVecOp1, bitVecOp2, relOpIntrinsic, relOpBaseType, size); - unsigned bitVecLclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs bitVec")); - lvaSetStruct(bitVecLclNum, typeHnd, false); - GenTree* bitVecAssign = gtNewTempAssign(bitVecLclNum, bitVec); - bitVec = gtNewLclvNode(bitVecLclNum, bitVec->TypeGet()); - - // Construct condSelectOp1 = vector.Zero - v - GenTree* subOp1 = gtNewLclvNode(vecZeroLclNum, vecZero->TypeGet()); - GenTree* subOp2 = gtNewLclvNode(op1LclNum, op1->TypeGet()); - GenTree* negVec = gtNewSIMDNode(simdType, subOp1, subOp2, SIMDIntrinsicSub, baseType, size); - - // Construct ConditionalSelect(bitVec, vector.Zero - v, v) - GenTree* vec = gtNewLclvNode(op1LclNum, op1->TypeGet()); - retVal = impSIMDSelect(typeHnd, baseType, size, bitVec, negVec, vec); - - // Prepend bitVec assignment to retVal. - // retVal = (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v) - retVal = gtNewOperNode(GT_COMMA, simdType, bitVecAssign, retVal); - - // Prepend vecZero assignment to retVal. - // retVal = (tmp1 = vector.Zero), (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v) - retVal = gtNewOperNode(GT_COMMA, simdType, vecZeroAssign, retVal); - - // If op1 was assigned to a temp, prepend that to retVal. - if (op1Assign != nullptr) - { - // retVal = (v=op1), (tmp1 = vector.Zero), (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v) - retVal = gtNewOperNode(GT_COMMA, simdType, op1Assign, retVal); - } - } - else if (varTypeIsFloating(baseType)) - { - // Abs(vf) = vf & new SIMDVector(0x7fffffff); - // Abs(vd) = vf & new SIMDVector(0x7fffffffffffffff); - GenTree* bitMask = nullptr; - if (baseType == TYP_FLOAT) - { - float f; - static_assert_no_msg(sizeof(float) == sizeof(int)); - *((int*)&f) = 0x7fffffff; - bitMask = gtNewDconNode(f); - } - else if (baseType == TYP_DOUBLE) - { - double d; - static_assert_no_msg(sizeof(double) == sizeof(__int64)); - *((__int64*)&d) = 0x7fffffffffffffffLL; - bitMask = gtNewDconNode(d); - } - - assert(bitMask != nullptr); - bitMask->gtType = baseType; - GenTree* bitMaskVector = gtNewSIMDNode(simdType, bitMask, SIMDIntrinsicInit, baseType, size); - retVal = gtNewSIMDNode(simdType, op1, bitMaskVector, SIMDIntrinsicBitwiseAnd, baseType, size); - } - else if (baseType == TYP_USHORT || baseType == TYP_UBYTE || baseType == TYP_UINT || baseType == TYP_ULONG) - { - // Abs is a no-op on unsigned integer type vectors - retVal = op1; - } - else - { - assert(getSIMDSupportLevel() >= SIMD_SSE4_Supported); - assert(baseType != TYP_LONG); - - retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size); - } -#elif defined(TARGET_ARM64) - if (varTypeIsUnsigned(baseType)) - { - // Abs is a no-op on unsigned integer type vectors - retVal = op1; - } - else - { - retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size); - } -#else // !defined(TARGET_XARCH)_ && !defined(TARGET_ARM64) - assert(!"Abs intrinsic on non-xarch target not implemented"); -#endif // !TARGET_XARCH - - return retVal; -} - -// Creates a GT_SIMD tree for Select operation -// -// Arguments: -// typeHnd - type handle of SIMD vector -// baseType - base type of SIMD vector -// size - SIMD vector size -// op1 - first operand = Condition vector vc -// op2 - second operand = va -// op3 - third operand = vb -// -// Return Value: -// Returns GT_SIMD tree that computes Select(vc, va, vb) -// -GenTree* Compiler::impSIMDSelect( - CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned size, GenTree* op1, GenTree* op2, GenTree* op3) -{ - assert(varTypeIsSIMD(op1)); - var_types simdType = op1->TypeGet(); - assert(op2->TypeGet() == simdType); - assert(op3->TypeGet() == simdType); - - // TODO-ARM64-CQ Support generating select instruction for SIMD - - // Select(BitVector vc, va, vb) = (va & vc) | (vb & !vc) - // Select(op1, op2, op3) = (op2 & op1) | (op3 & !op1) - // = SIMDIntrinsicBitwiseOr(SIMDIntrinsicBitwiseAnd(op2, op1), - // SIMDIntrinsicBitwiseAndNot(op3, op1)) - // - // If Op1 has side effect, create an assignment to a temp - GenTree* tmp = op1; - GenTree* asg = nullptr; - if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0) - { - unsigned lclNum = lvaGrabTemp(true DEBUGARG("SIMD Select")); - lvaSetStruct(lclNum, typeHnd, false); - tmp = gtNewLclvNode(lclNum, op1->TypeGet()); - asg = gtNewTempAssign(lclNum, op1); - } - - GenTree* andExpr = gtNewSIMDNode(simdType, op2, tmp, SIMDIntrinsicBitwiseAnd, baseType, size); - GenTree* dupOp1 = gtCloneExpr(tmp); - assert(dupOp1 != nullptr); -#ifdef TARGET_ARM64 - // ARM64 implements SIMDIntrinsicBitwiseAndNot as Left & ~Right - GenTree* andNotExpr = gtNewSIMDNode(simdType, op3, dupOp1, SIMDIntrinsicBitwiseAndNot, baseType, size); -#else - // XARCH implements SIMDIntrinsicBitwiseAndNot as ~Left & Right - GenTree* andNotExpr = gtNewSIMDNode(simdType, dupOp1, op3, SIMDIntrinsicBitwiseAndNot, baseType, size); -#endif - GenTree* simdTree = gtNewSIMDNode(simdType, andExpr, andNotExpr, SIMDIntrinsicBitwiseOr, baseType, size); - - // If asg not null, create a GT_COMMA tree. - if (asg != nullptr) - { - simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), asg, simdTree); - } - - return simdTree; -} - -// Creates a GT_SIMD tree for Min/Max operation -// -// Arguments: -// IntrinsicId - SIMD intrinsic Id, either Min or Max -// typeHnd - type handle of SIMD vector -// baseType - base type of SIMD vector -// size - SIMD vector size -// op1 - first operand = va -// op2 - second operand = vb -// -// Return Value: -// Returns GT_SIMD tree that computes Max(va, vb) -// -GenTree* Compiler::impSIMDMinMax(SIMDIntrinsicID intrinsicId, - CORINFO_CLASS_HANDLE typeHnd, - var_types baseType, - unsigned size, - GenTree* op1, - GenTree* op2) -{ - assert(intrinsicId == SIMDIntrinsicMin || intrinsicId == SIMDIntrinsicMax); - assert(varTypeIsSIMD(op1)); - var_types simdType = op1->TypeGet(); - assert(op2->TypeGet() == simdType); - -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) - GenTree* simdTree = nullptr; - -#ifdef TARGET_XARCH - // SSE2 has direct support for float/double/signed word/unsigned byte. - // SSE4.1 has direct support for int32/uint32/signed byte/unsigned word. - // For other integer types we compute min/max as follows - // - // int32/uint32 (SSE2) - // int64/uint64 (SSE2&SSE4): - // compResult = (op1 < op2) in case of Min - // (op1 > op2) in case of Max - // Min/Max(op1, op2) = Select(compResult, op1, op2) - // - // unsigned word (SSE2): - // op1 = op1 - 2^15 ; to make it fit within a signed word - // op2 = op2 - 2^15 ; to make it fit within a signed word - // result = SSE2 signed word Min/Max(op1, op2) - // result = result + 2^15 ; readjust it back - // - // signed byte (SSE2): - // op1 = op1 + 2^7 ; to make it unsigned - // op1 = op1 + 2^7 ; to make it unsigned - // result = SSE2 unsigned byte Min/Max(op1, op2) - // result = result - 2^15 ; readjust it back - - if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE || - (getSIMDSupportLevel() >= SIMD_SSE4_Supported && - (baseType == TYP_BYTE || baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_USHORT))) - { - // SSE2 or SSE4.1 has direct support - simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, baseType, size); - } - else if (baseType == TYP_USHORT || baseType == TYP_BYTE) - { - assert(getSIMDSupportLevel() == SIMD_SSE2_Supported); - int constVal; - SIMDIntrinsicID operIntrinsic; - SIMDIntrinsicID adjustIntrinsic; - var_types minMaxOperBaseType; - if (baseType == TYP_USHORT) - { - constVal = 0x80008000; - operIntrinsic = SIMDIntrinsicSub; - adjustIntrinsic = SIMDIntrinsicAdd; - minMaxOperBaseType = TYP_SHORT; - } - else - { - assert(baseType == TYP_BYTE); - constVal = 0x80808080; - operIntrinsic = SIMDIntrinsicAdd; - adjustIntrinsic = SIMDIntrinsicSub; - minMaxOperBaseType = TYP_UBYTE; - } - - GenTree* initVal = gtNewIconNode(constVal); - GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size); - - // Assign constVector to a temp, since we intend to use it more than once - // TODO-CQ: We have quite a few such constant vectors constructed during - // the importation of SIMD intrinsics. Make sure that we have a single - // temp per distinct constant per method. - GenTree* tmp = fgInsertCommaFormTemp(&constVector, typeHnd); - - // op1 = op1 - constVector - // op2 = op2 - constVector - op1 = gtNewSIMDNode(simdType, op1, constVector, operIntrinsic, baseType, size); - op2 = gtNewSIMDNode(simdType, op2, tmp, operIntrinsic, baseType, size); - - // compute min/max of op1 and op2 considering them as if minMaxOperBaseType - simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, minMaxOperBaseType, size); - - // re-adjust the value by adding or subtracting constVector - tmp = gtNewLclvNode(tmp->AsLclVarCommon()->GetLclNum(), tmp->TypeGet()); - simdTree = gtNewSIMDNode(simdType, simdTree, tmp, adjustIntrinsic, baseType, size); - } -#elif defined(TARGET_ARM64) - // Arm64 has direct support for all types except int64/uint64 - // For which we compute min/max as follows - // - // int64/uint64 - // compResult = (op1 < op2) in case of Min - // (op1 > op2) in case of Max - // Min/Max(op1, op2) = Select(compResult, op1, op2) - if (baseType != TYP_ULONG && baseType != TYP_LONG) - { - simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, baseType, size); - } -#endif - else - { - GenTree* dupOp1 = nullptr; - GenTree* dupOp2 = nullptr; - GenTree* op1Assign = nullptr; - GenTree* op2Assign = nullptr; - unsigned op1LclNum; - unsigned op2LclNum; - - if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op1LclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max")); - lvaSetStruct(op1LclNum, typeHnd, false); - dupOp1 = gtNewLclvNode(op1LclNum, op1->TypeGet()); - op1Assign = gtNewTempAssign(op1LclNum, op1); - op1 = gtNewLclvNode(op1LclNum, op1->TypeGet()); - } - else - { - dupOp1 = gtCloneExpr(op1); - } - - if ((op2->gtFlags & GTF_SIDE_EFFECT) != 0) - { - op2LclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max")); - lvaSetStruct(op2LclNum, typeHnd, false); - dupOp2 = gtNewLclvNode(op2LclNum, op2->TypeGet()); - op2Assign = gtNewTempAssign(op2LclNum, op2); - op2 = gtNewLclvNode(op2LclNum, op2->TypeGet()); - } - else - { - dupOp2 = gtCloneExpr(op2); - } - - SIMDIntrinsicID relOpIntrinsic = - (intrinsicId == SIMDIntrinsicMin) ? SIMDIntrinsicLessThan : SIMDIntrinsicGreaterThan; - var_types relOpBaseType = baseType; - - // compResult = op1 relOp op2 - // simdTree = Select(compResult, op1, op2); - assert(dupOp1 != nullptr); - assert(dupOp2 != nullptr); - relOpIntrinsic = impSIMDRelOp(relOpIntrinsic, typeHnd, size, &relOpBaseType, &dupOp1, &dupOp2); - GenTree* compResult = gtNewSIMDNode(simdType, dupOp1, dupOp2, relOpIntrinsic, relOpBaseType, size); - unsigned compResultLclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max")); - lvaSetStruct(compResultLclNum, typeHnd, false); - GenTree* compResultAssign = gtNewTempAssign(compResultLclNum, compResult); - compResult = gtNewLclvNode(compResultLclNum, compResult->TypeGet()); - simdTree = impSIMDSelect(typeHnd, baseType, size, compResult, op1, op2); - simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), compResultAssign, simdTree); - - // Now create comma trees if we have created assignments of op1/op2 to temps - if (op2Assign != nullptr) - { - simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), op2Assign, simdTree); - } - - if (op1Assign != nullptr) - { - simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), op1Assign, simdTree); - } - } - - assert(simdTree != nullptr); - return simdTree; -#else // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) - assert(!"impSIMDMinMax() unimplemented on target arch"); - unreached(); -#endif // !(defined(TARGET_XARCH) || defined(TARGET_ARM64)) -} - //------------------------------------------------------------------------ // getOp1ForConstructor: Get the op1 for a constructor call. // @@ -2906,44 +2227,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicInstEquals: - { - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType, instMethod); - - assert(op1->TypeGet() == simdType); - assert(op2->TypeGet() == simdType); - - simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size); - if (simdType == TYP_SIMD12) - { - simdTree->gtFlags |= GTF_SIMD12_OP; - } - retVal = simdTree; - } - break; - - case SIMDIntrinsicOpInEquality: - { - // op1 is the first operand - // op2 is the second operand - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType, instMethod); - simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size); - if (simdType == TYP_SIMD12) - { - simdTree->gtFlags |= GTF_SIMD12_OP; - } - retVal = simdTree; - } - break; - case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicGreaterThanOrEqual: { op2 = impSIMDPopStack(simdType); op1 = impSIMDPopStack(simdType, instMethod); @@ -2959,9 +2243,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: { #if defined(DEBUG) // check for the cases where we don't support intrinsics. @@ -3010,48 +2292,11 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, op2 = impSIMDPopStack(simdType); op1 = impSIMDPopStack(simdType, instMethod); -#ifdef TARGET_XARCH - if (simdIntrinsicID == SIMDIntrinsicBitwiseAndNot) - { - // XARCH implements SIMDIntrinsicBitwiseAndNot as ~op1 & op2, while the - // software implementation does op1 & ~op2, so we need to swap the operands - - GenTree* tmp = op2; - op2 = op1; - op1 = tmp; - } -#endif // TARGET_XARCH - simdTree = gtNewSIMDNode(simdType, op1, op2, simdIntrinsicID, baseType, size); retVal = simdTree; } break; - case SIMDIntrinsicSelect: - { - // op3 is a SIMD variable that is the second source - // op2 is a SIMD variable that is the first source - // op1 is a SIMD variable which is the bit mask. - op3 = impSIMDPopStack(simdType); - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType); - - retVal = impSIMDSelect(clsHnd, baseType, size, op1, op2, op3); - } - break; - - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: - { - // op1 is the first operand; if instance method, op1 is "this" arg - // op2 is the second operand - op2 = impSIMDPopStack(simdType); - op1 = impSIMDPopStack(simdType, instMethod); - - retVal = impSIMDMinMax(simdIntrinsicID, clsHnd, baseType, size, op1, op2); - } - break; - case SIMDIntrinsicGetItem: { // op1 is a SIMD variable that is "this" arg @@ -3118,43 +2363,6 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } break; - case SIMDIntrinsicSqrt: - { -#if (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(DEBUG) - // SSE/AVX/ARM64 doesn't support sqrt on integer type vectors and hence - // should never be seen as an intrinsic here. See SIMDIntrinsicList.h - // for supported base types for this intrinsic. - if (!varTypeIsFloating(baseType)) - { - assert(!"Sqrt not supported on integer vectors\n"); - return nullptr; - } -#endif // (defined(TARGET_XARCH) || defined(TARGET_ARM64)) && defined(DEBUG) - - op1 = impSIMDPopStack(simdType); - - retVal = gtNewSIMDNode(genActualType(callType), op1, nullptr, simdIntrinsicID, baseType, size); - } - break; - - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: -#if defined(TARGET_XARCH) - // Rounding instructions are only available from SSE4.1. - if (getSIMDSupportLevel() < SIMD_SSE4_Supported) - { - return nullptr; - } -#endif // defined(TARGET_XARCH) - op1 = impSIMDPopStack(simdType); - retVal = gtNewSIMDNode(genActualType(callType), op1, simdIntrinsicID, baseType, size); - break; - - case SIMDIntrinsicAbs: - op1 = impSIMDPopStack(simdType); - retVal = impSIMDAbs(clsHnd, baseType, size, op1); - break; - case SIMDIntrinsicGetW: retVal = impSIMDGetFixed(simdType, baseType, size, 3); break; diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 1463ed30ca40..a4f07b0c2dc8 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -10,12 +10,12 @@ static const SimdAsHWIntrinsicInfo simdAsHWIntrinsicInfoArray[] = { // clang-format off #if defined(TARGET_XARCH) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ - {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + {NI_##classId##_##id, name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, #include "simdashwintrinsiclistxarch.h" #elif defined(TARGET_ARM64) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ - {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, +#define SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + {NI_##classId##_##id, name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, #include "simdashwintrinsiclistarm64.h" #else #error Unsupported platform @@ -65,6 +65,15 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(CORINFO_SIG_INFO* sig, return NI_Illegal; } + unsigned numArgs = sig->numArgs; + bool isInstanceMethod = false; + + if (sig->hasThis()) + { + numArgs++; + isInstanceMethod = true; + } + for (int i = 0; i < (NI_SIMD_AS_HWINTRINSIC_END - NI_SIMD_AS_HWINTRINSIC_START - 1); i++) { const SimdAsHWIntrinsicInfo& intrinsicInfo = simdAsHWIntrinsicInfoArray[i]; @@ -74,12 +83,12 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(CORINFO_SIG_INFO* sig, continue; } - if (sig->numArgs != static_cast(intrinsicInfo.numArgs)) + if (numArgs != static_cast(intrinsicInfo.numArgs)) { continue; } - if (sig->hasThis() != SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsicInfo.id)) + if (isInstanceMethod != SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsicInfo.id)) { continue; } @@ -170,30 +179,38 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, return nullptr; } - var_types retType = JITtype2varType(sig->retType); - var_types baseType = TYP_UNKNOWN; - var_types simdType = TYP_UNKNOWN; - unsigned simdSize = 0; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + var_types retType = JITtype2varType(sig->retType); + var_types baseType = TYP_UNKNOWN; + var_types simdType = TYP_UNKNOWN; + unsigned simdSize = 0; + unsigned numArgs = sig->numArgs; + bool isInstanceMethod = false; // We want to resolve and populate the handle cache for this type even // if it isn't the basis for anything carried on the node. baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); - assert(simdSize != 0); - - CORINFO_CLASS_HANDLE argClass; if (retType == TYP_STRUCT) { baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); retType = getSIMDTypeForSize(simdSize); } - else + else if (numArgs != 0) { argClass = info.compCompHnd->getArgClass(sig, sig->args); baseType = getBaseTypeAndSizeOfSIMDType(argClass, &simdSize); } - if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0)) + if (sig->hasThis()) + { + assert(SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic)); + numArgs++; + + isInstanceMethod = true; + argClass = clsHnd; + } + else if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (numArgs != 0)) { // We need to fixup the clsHnd in the case we are an intrinsic on Vector // The first argument will be the appropriate Vector handle to use @@ -206,15 +223,16 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); } - simdType = getSIMDTypeForSize(simdSize); - assert(varTypeIsSIMD(simdType)); - - if (!varTypeIsArithmetic(baseType)) + if (!varTypeIsArithmetic(baseType) || (simdSize == 0)) { - // We only support intrinsics on the 10 primitive arithmetic types + // We get here for a devirtualization of IEquatable`1.Equals + // or if the user tries to use Vector with an unsupported type return nullptr; } + simdType = getSIMDTypeForSize(simdSize); + assert(varTypeIsSIMD(simdType)); + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); if ((hwIntrinsic == NI_Illegal) || !varTypeIsSIMD(simdType)) @@ -250,14 +268,19 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; GenTree* op2 = nullptr; - bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); - - switch (sig->numArgs) + switch (numArgs) { + case 0: + { + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + return gtNewSimdAsHWIntrinsicNode(retType, hwIntrinsic, baseType, simdSize); + } + case 1: { - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); return gtNewSimdAsHWIntrinsicNode(retType, op1, hwIntrinsic, baseType, simdSize); @@ -265,12 +288,13 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, case 2: { - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)) { @@ -311,18 +335,31 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(retType != TYP_UNKNOWN); assert(varTypeIsArithmetic(baseType)); assert(simdSize != 0); - assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); assert(SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType) == intrinsic); - CORINFO_ARG_LIST_HANDLE argList = sig->args; - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass; + var_types simdType = getSIMDTypeForSize(simdSize); + assert(varTypeIsSIMD(simdType)); + + CORINFO_ARG_LIST_HANDLE argList = sig->args; + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; GenTree* op1 = nullptr; GenTree* op2 = nullptr; + GenTree* op3 = nullptr; SimdAsHWIntrinsicClassId classId = SimdAsHWIntrinsicInfo::lookupClassId(intrinsic); - bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + unsigned numArgs = sig->numArgs; + bool isInstanceMethod = false; + + if (sig->hasThis()) + { + assert(SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic)); + numArgs++; + + isInstanceMethod = true; + argClass = clsHnd; + } #if defined(TARGET_XARCH) bool isVectorT256 = (SimdAsHWIntrinsicInfo::lookupClassId(intrinsic) == SimdAsHWIntrinsicClassId::VectorT256); @@ -342,12 +379,64 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); #endif - switch (sig->numArgs) + switch (numArgs) { + case 0: + { + switch (intrinsic) + { +#if defined(TARGET_XARCH) + case NI_VectorT128_get_Count: + case NI_VectorT256_get_Count: + { + GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT); + countNode->gtFlags |= GTF_ICON_SIMD_COUNT; + return countNode; + } +#elif defined(TARGET_ARM64) + case NI_VectorT128_get_Count: + { + GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT); + countNode->gtFlags |= GTF_ICON_SIMD_COUNT; + return countNode; + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + } + case 1: { - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + bool isOpExplicit = (intrinsic == NI_VectorT128_op_Explicit); + +#if defined(TARGET_XARCH) + isOpExplicit |= (intrinsic == NI_VectorT256_op_Explicit); +#endif + + if (isOpExplicit) + { + // We fold away the cast here, as it only exists to satisfy the + // type system. It is safe to do this here since the op1 type + // and the signature return type are both the same TYP_SIMD. + + op1 = impSIMDPopStack(retType, /* expectAddr: */ false, sig->retTypeClass); + SetOpLclRelatedToSIMDIntrinsic(op1); + assert(op1->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); + + return op1; + } + + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -447,12 +536,13 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 2: { - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); - op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); @@ -681,7 +771,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } #else #error Unsupported platform -#endif // TARGET_XARCH +#endif // !TARGET_XARCH && !TARGET_ARM64 default: { @@ -692,6 +782,49 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } break; } + + case 3: + { + CORINFO_ARG_LIST_HANDLE arg2 = isInstanceMethod ? argList : info.compCompHnd->getArgNext(argList); + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = isInstanceMethod ? simdType + : JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { +#if defined(TARGET_XARCH) + case NI_VectorT128_ConditionalSelect: + case NI_VectorT256_ConditionalSelect: + { + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op2, op3); + } +#elif defined(TARGET_ARM64) + case NI_VectorT128_ConditionalSelect: + { + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op2, op3); + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + } } assert(!"Unexpected SimdAsHWIntrinsic"); @@ -723,7 +856,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, { assert(featureSIMD); assert(retType != TYP_UNKNOWN); - assert(varTypeIsIntegral(baseType)); + assert(varTypeIsArithmetic(baseType)); assert(simdSize != 0); assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); assert(op1 != nullptr); diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index cfd47939cf3d..9621486e6965 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -6,12 +6,24 @@ #ifndef SIMD_AS_HWINTRINSIC #error Define SIMD_AS_HWINTRINSIC before including this file #endif + +#if defined(SIMD_AS_HWINTRINSIC_ID) || defined(SIMD_AS_HWINTRINSIC_NM) +#error SIMD_AS_HWINTRINSIC_ID and SIMD_AS_HWINTRINSIC_NM should not be defined before including this file +#endif /*****************************************************************************/ // clang-format off #ifdef FEATURE_HW_INTRINSICS +// Defines a SimdAsHWIntrinsic where the name is implicitly taken from the id +#define SIMD_AS_HWINTRINSIC_ID(classId, id, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + SIMD_AS_HWINTRINSIC(classId, id, #id, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) + +// Defines a SimdAsHWIntrinsic where the name is explicit +#define SIMD_AS_HWINTRINSIC_NM(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) + /* Note * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` * Each intrinsic has a `NumArg` for number of parameters @@ -22,65 +34,94 @@ */ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics -SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector2, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics -SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector3, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics -SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector4, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Max, 2, {NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_AdvSimd_Max, NI_AdvSimd_Arm64_Max}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Min, 2, {NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_AdvSimd_Min, NI_AdvSimd_Arm64_Min}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Arm64_Add}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_AdvSimd_Arm64_Divide}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Abs, 1, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Ceiling, NI_AdvSimd_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(VectorT128, EqualsInstance, "Equals", 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThanOrEqual, 2, {NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Max, 2, {NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_AdvSimd_Max, NI_AdvSimd_Arm64_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Min, 2, {NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_AdvSimd_Min, NI_AdvSimd_Arm64_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Addition, 2, {NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Arm64_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_BitwiseAnd, 2, {NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_AdvSimd_Arm64_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Equality, 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Sqrt, NI_AdvSimd_Arm64_Sqrt}, SimdAsHWIntrinsicFlag::None) + +#undef SIMD_AS_HWINTRINSIC_NM +#undef SIMD_AS_HWINTRINSIC_ID #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index 8f2ac6264041..d13153db4aad 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -6,12 +6,24 @@ #ifndef SIMD_AS_HWINTRINSIC #error Define SIMD_AS_HWINTRINSIC before including this file #endif + +#if defined(SIMD_AS_HWINTRINSIC_ID) || defined(SIMD_AS_HWINTRINSIC_NM) +#error SIMD_AS_HWINTRINSIC_ID and SIMD_AS_HWINTRINSIC_NM should not be defined before including this file +#endif /*****************************************************************************/ // clang-format off #ifdef FEATURE_HW_INTRINSICS +// Defines a SimdAsHWIntrinsic where the name is implicitly taken from the id +#define SIMD_AS_HWINTRINSIC_ID(classId, id, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + SIMD_AS_HWINTRINSIC(classId, id, #id, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) + +// Defines a SimdAsHWIntrinsic where the name is explicit +#define SIMD_AS_HWINTRINSIC_NM(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + SIMD_AS_HWINTRINSIC(classId, id, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) + /* Note * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` * Each intrinsic has a `NumArg` for number of parameters @@ -22,87 +34,127 @@ */ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics -SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector2, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector2, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics -SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector3, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector3, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics -SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(Vector4, EqualsInstance, "Equals", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Equality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Equality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Inequality, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_op_Inequality, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(Vector4, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_Illegal}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) -SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Max, 2, {NI_VectorT128_Max, NI_SSE2_Max, NI_SSE2_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_SSE_Max, NI_SSE2_Max}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, Min, 2, {NI_VectorT128_Min, NI_SSE2_Min, NI_SSE2_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_SSE_Min, NI_SSE2_Min}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE_Add, NI_SSE2_Add}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Abs, 1, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Ceiling, NI_SSE41_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, ConditionalSelect, 3, {NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect, NI_VectorT128_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(VectorT128, EqualsInstance, "Equals", 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThanOrEqual, 2, {NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Max, 2, {NI_VectorT128_Max, NI_SSE2_Max, NI_SSE2_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_SSE_Max, NI_SSE2_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, Min, 2, {NI_VectorT128_Min, NI_SSE2_Min, NI_SSE2_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_SSE_Min, NI_SSE2_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Addition, 2, {NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE_Add, NI_SSE2_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Equality, 2, {NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality, NI_Vector128_op_Equality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Explicit, 1, {NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit, NI_VectorT128_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Inequality, 2, {NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality, NI_Vector128_op_Inequality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT128, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Sqrt, NI_SSE2_Sqrt}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* -// ISA Function name NumArg Instructions Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ISA ID Name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT256, Abs, 1, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) -SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, LessThanOrEqual, 2, {NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, Max, 2, {NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_AVX_Max, NI_AVX_Max}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, Min, 2, {NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_AVX_Min, NI_AVX_Min}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Addition, 2, {NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX_Add, NI_AVX_Add}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Divide, NI_AVX_Divide}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Abs, 1, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Ceiling, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Ceiling, NI_AVX_Ceiling}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, ConditionalSelect, 3, {NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect, NI_VectorT256_ConditionalSelect}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_NM(VectorT256, EqualsInstance, "Equals", 2, {NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality}, SimdAsHWIntrinsicFlag::InstanceMethod) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_AllBitsSet, 0, {NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Count, 0, {NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Zero, 0, {NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, LessThanOrEqual, 2, {NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Max, 2, {NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_AVX_Max, NI_AVX_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, Min, 2, {NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_AVX_Min, NI_AVX_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Addition, 2, {NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX_Add, NI_AVX_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Divide, NI_AVX_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Equality, 2, {NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality, NI_Vector256_op_Equality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Explicit, 1, {NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit, NI_VectorT256_op_Explicit}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Inequality, 2, {NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality, NI_Vector256_op_Inequality}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC_ID(VectorT256, SquareRoot, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Sqrt, NI_AVX_Sqrt}, SimdAsHWIntrinsicFlag::None) + +#undef SIMD_AS_HWINTRINSIC_NM +#undef SIMD_AS_HWINTRINSIC_ID #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index b777d1da4eee..e6aa4db08fe5 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -130,21 +130,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicSqrt: - if (baseType == TYP_FLOAT) - { - result = INS_sqrtps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_sqrtpd; - } - else - { - unreached(); - } - break; - case SIMDIntrinsicAdd: if (baseType == TYP_FLOAT) { @@ -233,108 +218,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicMin: - if (baseType == TYP_FLOAT) - { - result = INS_minps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_minpd; - } - else if (baseType == TYP_UBYTE) - { - result = INS_pminub; - } - else if (baseType == TYP_SHORT) - { - result = INS_pminsw; - } - else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) - { - if (baseType == TYP_BYTE) - { - result = INS_pminsb; - } - else if (baseType == TYP_USHORT) - { - result = INS_pminuw; - } - else if (baseType == TYP_INT) - { - result = INS_pminsd; - } - else if (baseType == TYP_UINT) - { - result = INS_pminud; - } - } - else - { - unreached(); - } - break; - - case SIMDIntrinsicMax: - if (baseType == TYP_FLOAT) - { - result = INS_maxps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_maxpd; - } - else if (baseType == TYP_UBYTE) - { - result = INS_pmaxub; - } - else if (baseType == TYP_SHORT) - { - result = INS_pmaxsw; - } - else if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) - { - if (baseType == TYP_BYTE) - { - result = INS_pmaxsb; - } - else if (baseType == TYP_USHORT) - { - result = INS_pmaxuw; - } - else if (baseType == TYP_INT) - { - result = INS_pmaxsd; - } - else if (baseType == TYP_UINT) - { - result = INS_pmaxud; - } - } - else - { - unreached(); - } - break; - - case SIMDIntrinsicAbs: - if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) - { - if (baseType == TYP_INT) - { - result = INS_pabsd; - } - else if (baseType == TYP_SHORT) - { - result = INS_pabsw; - } - else if (baseType == TYP_BYTE) - { - result = INS_pabsb; - } - } - break; - case SIMDIntrinsicEqual: if (baseType == TYP_FLOAT) { @@ -367,65 +250,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicLessThan: - // Packed integers use > with swapped operands - assert(baseType != TYP_INT); - - if (baseType == TYP_FLOAT) - { - result = INS_cmpps; - assert(ival != nullptr); - *ival = 1; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_cmppd; - assert(ival != nullptr); - *ival = 1; - } - break; - - case SIMDIntrinsicLessThanOrEqual: - // Packed integers use (a==b) || ( b > a) in place of a <= b. - assert(baseType != TYP_INT); - - if (baseType == TYP_FLOAT) - { - result = INS_cmpps; - assert(ival != nullptr); - *ival = 2; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_cmppd; - assert(ival != nullptr); - *ival = 2; - } - break; - - case SIMDIntrinsicGreaterThan: - // Packed float/double use < with swapped operands - assert(!varTypeIsFloating(baseType)); - - // SSE2 supports only signed > - if (baseType == TYP_INT) - { - result = INS_pcmpgtd; - } - else if (baseType == TYP_SHORT) - { - result = INS_pcmpgtw; - } - else if (baseType == TYP_BYTE) - { - result = INS_pcmpgtb; - } - else if ((baseType == TYP_LONG) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)) - { - result = INS_pcmpgtq; - } - break; - case SIMDIntrinsicBitwiseAnd: if (baseType == TYP_FLOAT) { @@ -441,25 +265,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicBitwiseAndNot: - if (baseType == TYP_FLOAT) - { - result = INS_andnps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_andnpd; - } - else if (baseType == TYP_INT) - { - result = INS_pandn; - } - else if (varTypeIsIntegral(baseType)) - { - result = INS_pandn; - } - break; - case SIMDIntrinsicBitwiseOr: if (baseType == TYP_FLOAT) { @@ -475,21 +280,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type } break; - case SIMDIntrinsicBitwiseXor: - if (baseType == TYP_FLOAT) - { - result = INS_xorps; - } - else if (baseType == TYP_DOUBLE) - { - result = INS_xorpd; - } - else if (varTypeIsIntegral(baseType)) - { - result = INS_pxor; - } - break; - case SIMDIntrinsicCast: result = INS_movaps; break; @@ -645,26 +435,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type result = INS_insertps; break; - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: - if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) - { - if (baseType == TYP_FLOAT) - { - result = INS_roundps; - } - else - { - assert(baseType == TYP_DOUBLE); - result = INS_roundpd; - } - - assert(ival != nullptr); - *ival = (intrinsicId == SIMDIntrinsicCeil) ? ROUNDPS_TOWARD_POSITIVE_INFINITY_IMM - : ROUNDPS_TOWARD_NEGATIVE_INFINITY_IMM; - } - break; - default: assert(!"Unsupported SIMD intrinsic"); unreached(); @@ -760,10 +530,10 @@ void CodeGen::genSIMDScalarMove( void CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg) { - // We just use `INS_xorps` instead of `getOpForSIMDIntrinsic(SIMDIntrinsicBitwiseXor, baseType)` - // since `genSIMDZero` is used for both `System.Numerics.Vectors` and HardwareIntrinsics. Modern - // CPUs handle this specially in the renamer and it never hits the execution pipeline, additionally - // `INS_xorps` is always available (when using either the legacy or VEX encoding). + // We just use `INS_xorps` since `genSIMDZero` is used for both `System.Numerics.Vectors` and + // HardwareIntrinsics. Modern CPUs handle this specially in the renamer and it never hits the + // execution pipeline, additionally `INS_xorps` is always available (when using either the + // legacy or VEX encoding). inst_RV_RV(INS_xorps, targetReg, targetReg, targetType, emitActualTypeSize(targetType)); } @@ -1062,8 +832,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs); + assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast); GenTree* op1 = simdNode->gtGetOp1(); var_types baseType = simdNode->gtSIMDBaseType; @@ -1080,32 +849,6 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) genProduceReg(simdNode); } -//---------------------------------------------------------------------------------- -// genSIMDIntrinsicUnOpWithImm: Generate code for SIMD Intrinsic unary operations with an imm8, such as Ceil. -// -// Arguments: -// simdNode - The GT_SIMD node -// -// Return Value: -// None. -// -void CodeGen::genSIMDIntrinsicUnOpWithImm(GenTreeSIMD* simdNode) -{ - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCeil || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicFloor); - - GenTree* op1 = simdNode->gtGetOp1(); - var_types baseType = simdNode->gtSIMDBaseType; - regNumber targetReg = simdNode->GetRegNum(); - assert(targetReg != REG_NA); - var_types targetType = simdNode->TypeGet(); - - regNumber op1Reg = genConsumeReg(op1); - unsigned ival; - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival); - assert((ival >= 0) && (ival <= 255)); - GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op1Reg, (int8_t)ival); -} - //---------------------------------------------------------------------------------- // genSIMDIntrinsic32BitConvert: Generate code for 32-bit SIMD Convert (int/uint <-> float) // @@ -1627,7 +1370,26 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) genSIMDZero(simdType, baseType, tmpReg); if (!varTypeIsUnsigned(baseType)) { - instruction compareIns = getOpForSIMDIntrinsic(SIMDIntrinsicGreaterThan, baseType); + instruction compareIns = INS_invalid; + + if (baseType == TYP_INT) + { + compareIns = INS_pcmpgtd; + } + else if (baseType == TYP_SHORT) + { + compareIns = INS_pcmpgtw; + } + else if (baseType == TYP_BYTE) + { + compareIns = INS_pcmpgtb; + } + else if ((baseType == TYP_LONG) && (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)) + { + compareIns = INS_pcmpgtq; + } + + assert(compareIns != INS_invalid); inst_RV_RV(compareIns, tmpReg, targetReg, simdType, emitSize); } inst_RV_RV(widenIns, targetReg, tmpReg, simdType); @@ -1797,10 +1559,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax); + simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr); GenTree* op1 = simdNode->gtGetOp1(); GenTree* op2 = simdNode->gtGetOp2(); @@ -1997,7 +1756,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) switch (simdNode->gtSIMDIntrinsicID) { case SIMDIntrinsicEqual: - case SIMDIntrinsicGreaterThan: { assert(targetReg != REG_NA); @@ -2010,12 +1768,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) } #endif - // Greater-than: Floating point vectors use "<" with swapped operands - if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan) - { - assert(!varTypeIsFloating(baseType)); - } - unsigned ival = 0; instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival); @@ -2047,124 +1799,6 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) } break; - case SIMDIntrinsicLessThan: - case SIMDIntrinsicLessThanOrEqual: - { - assert(targetReg != REG_NA); - - // Int vectors use ">" and ">=" with swapped operands - assert(varTypeIsFloating(baseType)); - - // Get the instruction opcode for compare operation - unsigned ival; - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival); - - // targetReg = op1reg RelOp op2reg - // Thefore, we can optimize if op1Reg == targetReg - if (op1Reg != targetReg) - { - inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); - } - - assert((ival >= 0) && (ival <= 255)); - GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op2Reg, (int8_t)ival); - } - break; - - // (In)Equality that produces bool result instead of a bit vector - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: - { - // We're only setting condition flags, if a 0/1 value is desired then Lowering should have inserted a SETCC. - assert(targetReg == REG_NA); - - var_types simdType = op1->TypeGet(); - // TODO-1stClassStructs: Temporary to minimize asmDiffs - if (simdType == TYP_DOUBLE) - { - simdType = TYP_SIMD8; - } - - // Here we should consider TYP_SIMD12 operands as if they were TYP_SIMD16 - // since both the operands will be in XMM registers. - if (simdType == TYP_SIMD12) - { - simdType = TYP_SIMD16; - } - - // On SSE4/AVX, we can generate optimal code for (in)equality against zero using ptest. - if (op2->isContained()) - { - assert((compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported) && op2->IsIntegralConstVector(0)); - inst_RV_RV(INS_ptest, op1->GetRegNum(), op1->GetRegNum(), simdType, emitActualTypeSize(simdType)); - } - else - { - // We need one additional SIMD register to store the result of the SIMD compare. - regNumber tmpReg1 = simdNode->GetSingleTempReg(RBM_ALLFLOAT); - - // tmpReg1 = (op1Reg == op2Reg) - // Call this value of tmpReg1 as 'compResult' for further reference below. - regNumber otherReg = op2Reg; - if (tmpReg1 != op2Reg) - { - if (tmpReg1 != op1Reg) - { - inst_RV_RV(ins_Copy(simdType), tmpReg1, op1Reg, simdType, emitActualTypeSize(simdType)); - } - } - else - { - otherReg = op1Reg; - } - - // For all integer types we can use TYP_INT comparison. - unsigned ival = 0; - instruction ins = - getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival); - - if (varTypeIsFloating(baseType)) - { - assert((ival >= 0) && (ival <= 255)); - GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, (int8_t)ival); - } - else - { - inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType)); - } - - regNumber intReg = simdNode->GetSingleTempReg(RBM_ALLINT); - inst_RV_RV(INS_pmovmskb, intReg, tmpReg1, simdType, emitActualTypeSize(simdType)); - // There's no pmovmskw/pmovmskd/pmovmskq but they're not needed anyway. Vector compare - // instructions produce "all ones"/"all zeroes" components and pmovmskb extracts a - // subset of each component's ones/zeroes. In the end we need to know if the result is - // "all ones" where the number of ones is given by the vector byte size, not by the - // vector component count. So, for AVX registers we need to compare to 0xFFFFFFFF and - // for SSE registers we need to compare to 0x0000FFFF. - // The SIMD12 case is handled specially, because we can't rely on the upper bytes being - // zero, so we must compare only the lower 3 floats (hence the byte mask of 0xFFF). - // Note that -1 is used instead of 0xFFFFFFFF, on x64 emit doesn't correctly recognize - // that 0xFFFFFFFF can be encoded in a single byte and emits the longer 3DFFFFFFFF - // encoding instead of 83F8FF. - ssize_t mask; - if ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) - { - mask = 0x00000FFF; - GetEmitter()->emitIns_R_I(INS_and, EA_4BYTE, intReg, mask); - } - else if (emitActualTypeSize(simdType) == 32) - { - mask = -1; - } - else - { - mask = 0x0000FFFF; - } - GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, mask); - } - } - break; - default: noway_assert(!"Unimplemented SIMD relational operation."); unreached(); @@ -3224,9 +2858,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicInitN(simdNode); break; - case SIMDIntrinsicSqrt: case SIMDIntrinsicCast: - case SIMDIntrinsicAbs: genSIMDIntrinsicUnOp(simdNode); break; @@ -3254,21 +2886,11 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) case SIMDIntrinsicMul: case SIMDIntrinsicDiv: case SIMDIntrinsicBitwiseAnd: - case SIMDIntrinsicBitwiseAndNot: case SIMDIntrinsicBitwiseOr: - case SIMDIntrinsicBitwiseXor: - case SIMDIntrinsicMin: - case SIMDIntrinsicMax: genSIMDIntrinsicBinOp(simdNode); break; - case SIMDIntrinsicOpEquality: - case SIMDIntrinsicOpInEquality: case SIMDIntrinsicEqual: - case SIMDIntrinsicLessThan: - case SIMDIntrinsicGreaterThan: - case SIMDIntrinsicLessThanOrEqual: - case SIMDIntrinsicGreaterThanOrEqual: genSIMDIntrinsicRelOp(simdNode); break; @@ -3298,11 +2920,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) genSIMDIntrinsicUpperRestore(simdNode); break; - case SIMDIntrinsicCeil: - case SIMDIntrinsicFloor: - genSIMDIntrinsicUnOpWithImm(simdNode); - break; - default: noway_assert(!"Unimplemented SIMD intrinsic."); unreached(); diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h index 7b535c0112dc..813a937fd056 100644 --- a/src/coreclr/src/jit/simdintrinsiclist.h +++ b/src/coreclr/src/jit/simdintrinsiclist.h @@ -76,13 +76,6 @@ SIMD_INTRINSIC("set_Y", true, SetY, SIMD_INTRINSIC("set_Z", true, SetZ, "setZ", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) SIMD_INTRINSIC("set_W", true, SetW, "setW", TYP_VOID, 2, {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// Object.Equals() -SIMD_INTRINSIC("Equals", true, InstEquals, "equals", TYP_BOOL, 2, {TYP_BYREF, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - -// Operator == and != -SIMD_INTRINSIC("op_Equality", false, OpEquality, "==", TYP_BOOL, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("op_Inequality", false, OpInEquality, "!=", TYP_BOOL, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - // Arithmetic Operations SIMD_INTRINSIC("op_Addition", false, Add, "+", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("op_Subtraction", false, Sub, "-", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) @@ -96,28 +89,12 @@ SIMD_INTRINSIC("op_Multiply", false, Mul, SIMD_INTRINSIC("op_Division", false, Div, "/", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -// SquareRoot is recognized as an intrinsic only for float or double vectors -SIMD_INTRINSIC("SquareRoot", false, Sqrt, "sqrt", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) - -SIMD_INTRINSIC("Ceiling", false, Ceil, "ceil", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) -SIMD_INTRINSIC("Floor", false, Floor, "floor", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF}) - -SIMD_INTRINSIC("Min", false, Min, "min", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("Max", false, Max, "max", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - // Vector Relational operators SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("LessThan", false, LessThan, "lt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("LessThanOrEqual", false, LessThanOrEqual, "le", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("GreaterThan", false, GreaterThan, "gt", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("GreaterThanOrEqual", false, GreaterThanOrEqual, "ge", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // Bitwise operations SIMD_INTRINSIC("op_BitwiseAnd", false, BitwiseAnd, "&", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("AndNot", false, BitwiseAndNot, "&~", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) SIMD_INTRINSIC("op_BitwiseOr", false, BitwiseOr, "|", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) -SIMD_INTRINSIC("op_ExclusiveOr", false, BitwiseXor, "^", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) // Dot Product #if defined(TARGET_XARCH) @@ -128,9 +105,6 @@ SIMD_INTRINSIC("Dot", false, DotProduct, SIMD_INTRINSIC("Dot", false, DotProduct, "Dot", TYP_UNKNOWN, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_UNDEF, TYP_UNDEF}) #endif -// Select -SIMD_INTRINSIC("ConditionalSelect", false, Select, "Select", TYP_STRUCT, 3, {TYP_STRUCT, TYP_STRUCT, TYP_STRUCT}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) - // Cast SIMD_INTRINSIC("op_Explicit", false, Cast, "Cast", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG}) diff --git a/src/coreclr/src/jit/vartype.h b/src/coreclr/src/jit/vartype.h index d69a75e5bfb5..e34ee7e5a8df 100644 --- a/src/coreclr/src/jit/vartype.h +++ b/src/coreclr/src/jit/vartype.h @@ -113,6 +113,12 @@ inline bool varTypeIsUnsigned(T vt) return ((varTypeClassification[TypeGet(vt)] & (VTF_UNS)) != 0); } +template +inline bool varTypeIsSigned(T vt) +{ + return varTypeIsIntegralOrI(vt) && !varTypeIsUnsigned(vt); +} + // If "vt" is an unsigned integral type, returns the corresponding signed integral type, otherwise // return "vt". inline var_types varTypeUnsignedToSigned(var_types vt) @@ -140,6 +146,32 @@ inline var_types varTypeUnsignedToSigned(var_types vt) } } +// If "vt" is a signed integral type, returns the corresponding unsigned integral type, otherwise +// return "vt". +inline var_types varTypeSignedToUnsigned(var_types vt) +{ + if (varTypeIsSigned(vt)) + { + switch (vt) + { + case TYP_BYTE: + return TYP_UBYTE; + case TYP_SHORT: + return TYP_USHORT; + case TYP_INT: + return TYP_UINT; + case TYP_LONG: + return TYP_ULONG; + default: + unreached(); + } + } + else + { + return vt; + } +} + template inline bool varTypeIsFloating(T vt) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index e27b5bf883d1..4cfebe72cab6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -84,12 +84,12 @@ public static Vector One } private static readonly Vector s_one = new Vector(GetOneValue()); - internal static Vector AllOnes + internal static Vector AllBitsSet { [Intrinsic] - get => s_allOnes; + get => s_allBitsSet; } - private static readonly Vector s_allOnes = new Vector(GetAllBitsSetValue()); + private static readonly Vector s_allBitsSet = new Vector(GetAllBitsSetValue()); #endregion Static Members #region Constructors @@ -478,11 +478,7 @@ public readonly unsafe T this[int index] [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector)) - { - return false; - } - return Equals((Vector)obj); + return (obj is Vector other) && Equals(other); } /// @@ -493,130 +489,7 @@ public override readonly bool Equals(object? obj) [Intrinsic] public readonly bool Equals(Vector other) { - if (Vector.IsHardwareAccelerated) - { - for (int g = 0; g < Count; g++) - { - if (!ScalarEquals(this[g], other[g])) - { - return false; - } - } - return true; - } - else - { - if (typeof(T) == typeof(byte)) - { - return - this.register.byte_0 == other.register.byte_0 - && this.register.byte_1 == other.register.byte_1 - && this.register.byte_2 == other.register.byte_2 - && this.register.byte_3 == other.register.byte_3 - && this.register.byte_4 == other.register.byte_4 - && this.register.byte_5 == other.register.byte_5 - && this.register.byte_6 == other.register.byte_6 - && this.register.byte_7 == other.register.byte_7 - && this.register.byte_8 == other.register.byte_8 - && this.register.byte_9 == other.register.byte_9 - && this.register.byte_10 == other.register.byte_10 - && this.register.byte_11 == other.register.byte_11 - && this.register.byte_12 == other.register.byte_12 - && this.register.byte_13 == other.register.byte_13 - && this.register.byte_14 == other.register.byte_14 - && this.register.byte_15 == other.register.byte_15; - } - else if (typeof(T) == typeof(sbyte)) - { - return - this.register.sbyte_0 == other.register.sbyte_0 - && this.register.sbyte_1 == other.register.sbyte_1 - && this.register.sbyte_2 == other.register.sbyte_2 - && this.register.sbyte_3 == other.register.sbyte_3 - && this.register.sbyte_4 == other.register.sbyte_4 - && this.register.sbyte_5 == other.register.sbyte_5 - && this.register.sbyte_6 == other.register.sbyte_6 - && this.register.sbyte_7 == other.register.sbyte_7 - && this.register.sbyte_8 == other.register.sbyte_8 - && this.register.sbyte_9 == other.register.sbyte_9 - && this.register.sbyte_10 == other.register.sbyte_10 - && this.register.sbyte_11 == other.register.sbyte_11 - && this.register.sbyte_12 == other.register.sbyte_12 - && this.register.sbyte_13 == other.register.sbyte_13 - && this.register.sbyte_14 == other.register.sbyte_14 - && this.register.sbyte_15 == other.register.sbyte_15; - } - else if (typeof(T) == typeof(ushort)) - { - return - this.register.uint16_0 == other.register.uint16_0 - && this.register.uint16_1 == other.register.uint16_1 - && this.register.uint16_2 == other.register.uint16_2 - && this.register.uint16_3 == other.register.uint16_3 - && this.register.uint16_4 == other.register.uint16_4 - && this.register.uint16_5 == other.register.uint16_5 - && this.register.uint16_6 == other.register.uint16_6 - && this.register.uint16_7 == other.register.uint16_7; - } - else if (typeof(T) == typeof(short)) - { - return - this.register.int16_0 == other.register.int16_0 - && this.register.int16_1 == other.register.int16_1 - && this.register.int16_2 == other.register.int16_2 - && this.register.int16_3 == other.register.int16_3 - && this.register.int16_4 == other.register.int16_4 - && this.register.int16_5 == other.register.int16_5 - && this.register.int16_6 == other.register.int16_6 - && this.register.int16_7 == other.register.int16_7; - } - else if (typeof(T) == typeof(uint)) - { - return - this.register.uint32_0 == other.register.uint32_0 - && this.register.uint32_1 == other.register.uint32_1 - && this.register.uint32_2 == other.register.uint32_2 - && this.register.uint32_3 == other.register.uint32_3; - } - else if (typeof(T) == typeof(int)) - { - return - this.register.int32_0 == other.register.int32_0 - && this.register.int32_1 == other.register.int32_1 - && this.register.int32_2 == other.register.int32_2 - && this.register.int32_3 == other.register.int32_3; - } - else if (typeof(T) == typeof(ulong)) - { - return - this.register.uint64_0 == other.register.uint64_0 - && this.register.uint64_1 == other.register.uint64_1; - } - else if (typeof(T) == typeof(long)) - { - return - this.register.int64_0 == other.register.int64_0 - && this.register.int64_1 == other.register.int64_1; - } - else if (typeof(T) == typeof(float)) - { - return - this.register.single_0 == other.register.single_0 - && this.register.single_1 == other.register.single_1 - && this.register.single_2 == other.register.single_2 - && this.register.single_3 == other.register.single_3; - } - else if (typeof(T) == typeof(double)) - { - return - this.register.double_0 == other.register.double_0 - && this.register.double_1 == other.register.double_1; - } - else - { - throw new NotSupportedException(SR.Arg_TypeNotSupported); - } - } + return this == other; } /// @@ -1722,7 +1595,7 @@ public readonly bool TryCopyTo(Span destination) /// The one's complement vector. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator ~(Vector value) => - s_allOnes ^ value; + AllBitsSet ^ value; #endregion Bitwise Operators #region Logical Operators @@ -1734,8 +1607,133 @@ public readonly bool TryCopyTo(Span destination) /// True if all elements are equal; False otherwise. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool operator ==(Vector left, Vector right) => - left.Equals(right); + public static bool operator ==(Vector left, Vector right) + { + if (Vector.IsHardwareAccelerated) + { + for (int g = 0; g < Count; g++) + { + if (!ScalarEquals(left[g], right[g])) + { + return false; + } + } + return true; + } + else + { + if (typeof(T) == typeof(byte)) + { + return + left.register.byte_0 == right.register.byte_0 + && left.register.byte_1 == right.register.byte_1 + && left.register.byte_2 == right.register.byte_2 + && left.register.byte_3 == right.register.byte_3 + && left.register.byte_4 == right.register.byte_4 + && left.register.byte_5 == right.register.byte_5 + && left.register.byte_6 == right.register.byte_6 + && left.register.byte_7 == right.register.byte_7 + && left.register.byte_8 == right.register.byte_8 + && left.register.byte_9 == right.register.byte_9 + && left.register.byte_10 == right.register.byte_10 + && left.register.byte_11 == right.register.byte_11 + && left.register.byte_12 == right.register.byte_12 + && left.register.byte_13 == right.register.byte_13 + && left.register.byte_14 == right.register.byte_14 + && left.register.byte_15 == right.register.byte_15; + } + else if (typeof(T) == typeof(sbyte)) + { + return + left.register.sbyte_0 == right.register.sbyte_0 + && left.register.sbyte_1 == right.register.sbyte_1 + && left.register.sbyte_2 == right.register.sbyte_2 + && left.register.sbyte_3 == right.register.sbyte_3 + && left.register.sbyte_4 == right.register.sbyte_4 + && left.register.sbyte_5 == right.register.sbyte_5 + && left.register.sbyte_6 == right.register.sbyte_6 + && left.register.sbyte_7 == right.register.sbyte_7 + && left.register.sbyte_8 == right.register.sbyte_8 + && left.register.sbyte_9 == right.register.sbyte_9 + && left.register.sbyte_10 == right.register.sbyte_10 + && left.register.sbyte_11 == right.register.sbyte_11 + && left.register.sbyte_12 == right.register.sbyte_12 + && left.register.sbyte_13 == right.register.sbyte_13 + && left.register.sbyte_14 == right.register.sbyte_14 + && left.register.sbyte_15 == right.register.sbyte_15; + } + else if (typeof(T) == typeof(ushort)) + { + return + left.register.uint16_0 == right.register.uint16_0 + && left.register.uint16_1 == right.register.uint16_1 + && left.register.uint16_2 == right.register.uint16_2 + && left.register.uint16_3 == right.register.uint16_3 + && left.register.uint16_4 == right.register.uint16_4 + && left.register.uint16_5 == right.register.uint16_5 + && left.register.uint16_6 == right.register.uint16_6 + && left.register.uint16_7 == right.register.uint16_7; + } + else if (typeof(T) == typeof(short)) + { + return + left.register.int16_0 == right.register.int16_0 + && left.register.int16_1 == right.register.int16_1 + && left.register.int16_2 == right.register.int16_2 + && left.register.int16_3 == right.register.int16_3 + && left.register.int16_4 == right.register.int16_4 + && left.register.int16_5 == right.register.int16_5 + && left.register.int16_6 == right.register.int16_6 + && left.register.int16_7 == right.register.int16_7; + } + else if (typeof(T) == typeof(uint)) + { + return + left.register.uint32_0 == right.register.uint32_0 + && left.register.uint32_1 == right.register.uint32_1 + && left.register.uint32_2 == right.register.uint32_2 + && left.register.uint32_3 == right.register.uint32_3; + } + else if (typeof(T) == typeof(int)) + { + return + left.register.int32_0 == right.register.int32_0 + && left.register.int32_1 == right.register.int32_1 + && left.register.int32_2 == right.register.int32_2 + && left.register.int32_3 == right.register.int32_3; + } + else if (typeof(T) == typeof(ulong)) + { + return + left.register.uint64_0 == right.register.uint64_0 + && left.register.uint64_1 == right.register.uint64_1; + } + else if (typeof(T) == typeof(long)) + { + return + left.register.int64_0 == right.register.int64_0 + && left.register.int64_1 == right.register.int64_1; + } + else if (typeof(T) == typeof(float)) + { + return + left.register.single_0 == right.register.single_0 + && left.register.single_1 == right.register.single_1 + && left.register.single_2 == right.register.single_2 + && left.register.single_3 == right.register.single_3; + } + else if (typeof(T) == typeof(double)) + { + return + left.register.double_0 == right.register.double_0 + && left.register.double_1 == right.register.double_1; + } + else + { + throw new NotSupportedException(SR.Arg_TypeNotSupported); + } + } + } /// /// Returns a boolean indicating whether any single pair of elements in the given vectors are not equal. diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt index 86bd98984a94..b556656e918c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt @@ -89,12 +89,12 @@ namespace System.Numerics } private static readonly Vector s_one = new Vector(GetOneValue()); - internal static Vector AllOnes + internal static Vector AllBitsSet { [Intrinsic] - get => s_allOnes; + get => s_allBitsSet; } - private static readonly Vector s_allOnes = new Vector(GetAllBitsSetValue()); + private static readonly Vector s_allBitsSet = new Vector(GetAllBitsSetValue()); #endregion Static Members #region Constructors @@ -322,11 +322,7 @@ namespace System.Numerics [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector)) - { - return false; - } - return Equals((Vector)obj); + return (obj is Vector other) && Equals(other); } /// @@ -337,56 +333,7 @@ namespace System.Numerics [Intrinsic] public readonly bool Equals(Vector other) { - if (Vector.IsHardwareAccelerated) - { - for (int g = 0; g < Count; g++) - { - if (!ScalarEquals(this[g], other[g])) - { - return false; - } - } - return true; - } - else - { -<# - foreach (Type type in supportedTypes) - { -#> - <#=GenerateIfStatementHeader(type)#> - { - return -<# - for (int g = 0; g < GetNumFields(type, totalSize); g++) - { -#> -<# - if (g == 0) - { -#> - this.<#=GetRegisterFieldName(type, g)#> == other.<#=GetRegisterFieldName(type, g)#> -<# - } - else - { -#> - && this.<#=GetRegisterFieldName(type, g)#> == other.<#=GetRegisterFieldName(type, g)#><#=(g == (GetNumFields(type, totalSize) -1)) ? ";" : ""#> -<# - } -#> -<# - } -#> - } -<# - } -#> - else - { - throw new NotSupportedException(SR.Arg_TypeNotSupported); - } - } + return this == other; } /// @@ -884,7 +831,7 @@ namespace System.Numerics /// The one's complement vector. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector operator ~(Vector value) => - s_allOnes ^ value; + AllBitsSet ^ value; #endregion Bitwise Operators #region Logical Operators @@ -896,8 +843,59 @@ namespace System.Numerics /// True if all elements are equal; False otherwise. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool operator ==(Vector left, Vector right) => - left.Equals(right); + public static bool operator ==(Vector left, Vector right) + { + if (Vector.IsHardwareAccelerated) + { + for (int g = 0; g < Count; g++) + { + if (!ScalarEquals(left[g], right[g])) + { + return false; + } + } + return true; + } + else + { +<# + foreach (Type type in supportedTypes) + { +#> + <#=GenerateIfStatementHeader(type)#> + { + return +<# + for (int g = 0; g < GetNumFields(type, totalSize); g++) + { +#> +<# + if (g == 0) + { +#> + left.<#=GetRegisterFieldName(type, g)#> == right.<#=GetRegisterFieldName(type, g)#> +<# + } + else + { +#> + && left.<#=GetRegisterFieldName(type, g)#> == right.<#=GetRegisterFieldName(type, g)#><#=(g == (GetNumFields(type, totalSize) -1)) ? ";" : ""#> +<# + } +#> +<# + } +#> + } +<# + } +#> + else + { + throw new NotSupportedException(SR.Arg_TypeNotSupported); + } + } + } /// /// Returns a boolean indicating whether any single pair of elements in the given vectors are not equal. diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs index 38b572207d91..cee4a0e4ca66 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2.cs @@ -65,9 +65,7 @@ public override readonly int GetHashCode() [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector2)) - return false; - return Equals((Vector2)obj); + return (obj is Vector2 other) && Equals(other); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs index 27d3469bfb92..b776c5d0a3ca 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs @@ -91,7 +91,7 @@ public readonly void CopyTo(float[] array, int index) [Intrinsic] public readonly bool Equals(Vector2 other) { - return this.X == other.X && this.Y == other.Y; + return this == other; } #endregion Public Instance Methods @@ -275,7 +275,8 @@ public static Vector2 SquareRoot(Vector2 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator ==(Vector2 left, Vector2 right) { - return left.Equals(right); + return left.X == right.X && + left.Y == right.Y; } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs index af110ffcc56d..1ca945caa5bf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3.cs @@ -70,9 +70,7 @@ public override readonly int GetHashCode() [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector3)) - return false; - return Equals((Vector3)obj); + return (obj is Vector3 other) && Equals(other); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs index df32e8331d70..3ff9a8aa6525 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs @@ -106,9 +106,7 @@ public readonly void CopyTo(float[] array, int index) [Intrinsic] public readonly bool Equals(Vector3 other) { - return X == other.X && - Y == other.Y && - Z == other.Z; + return this == other; } #endregion Public Instance Methods @@ -294,9 +292,9 @@ public static Vector3 SquareRoot(Vector3 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator ==(Vector3 left, Vector3 right) { - return (left.X == right.X && - left.Y == right.Y && - left.Z == right.Z); + return left.X == right.X && + left.Y == right.Y && + left.Z == right.Z; } /// @@ -309,9 +307,7 @@ public static Vector3 SquareRoot(Vector3 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator !=(Vector3 left, Vector3 right) { - return (left.X != right.X || - left.Y != right.Y || - left.Z != right.Z); + return !(left == right); } #endregion Public Static Operators } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs index 45a9f42264b5..c6dc6a8de8c4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4.cs @@ -73,9 +73,7 @@ public override readonly int GetHashCode() [MethodImpl(MethodImplOptions.AggressiveInlining)] public override readonly bool Equals(object? obj) { - if (!(obj is Vector4)) - return false; - return Equals((Vector4)obj); + return (obj is Vector4 other) && Equals(other); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs index 70d692457e1a..4e95120c244c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs @@ -136,10 +136,7 @@ public readonly void CopyTo(float[] array, int index) [Intrinsic] public readonly bool Equals(Vector4 other) { - return this.X == other.X - && this.Y == other.Y - && this.Z == other.Z - && this.W == other.W; + return this == other; } #endregion Public Instance Methods @@ -329,7 +326,10 @@ public static Vector4 SquareRoot(Vector4 value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator ==(Vector4 left, Vector4 right) { - return left.Equals(right); + return left.X == right.X + && left.Y == right.Y + && left.Z == right.Z + && left.W == right.W; } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs index f16d1c4fc577..4d97d1ad70ca 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs @@ -230,7 +230,7 @@ public static Vector LessThan(Vector left, Vector right) public static bool LessThanAll(Vector left, Vector right) where T : struct { Vector cond = (Vector)Vector.LessThan(left, right); - return cond.Equals(Vector.AllOnes); + return cond.Equals(Vector.AllBitsSet); } /// @@ -328,7 +328,7 @@ public static Vector LessThanOrEqual(Vector left, Vector r public static bool LessThanOrEqualAll(Vector left, Vector right) where T : struct { Vector cond = (Vector)Vector.LessThanOrEqual(left, right); - return cond.Equals(Vector.AllOnes); + return cond.Equals(Vector.AllBitsSet); } /// @@ -427,7 +427,7 @@ public static Vector GreaterThan(Vector left, Vector right) public static bool GreaterThanAll(Vector left, Vector right) where T : struct { Vector cond = (Vector)Vector.GreaterThan(left, right); - return cond.Equals(Vector.AllOnes); + return cond.Equals(Vector.AllBitsSet); } /// @@ -526,7 +526,7 @@ public static Vector GreaterThanOrEqual(Vector left, Vector(Vector left, Vector right) where T : struct { Vector cond = (Vector)Vector.GreaterThanOrEqual(left, right); - return cond.Equals(Vector.AllOnes); + return cond.Equals(Vector.AllBitsSet); } ///