From 56518a7546bf2ad200936938e0f33cd07d54a378 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 4 May 2020 17:01:36 -0700 Subject: [PATCH] Adding basic support for recognizing and handling SIMD intrinsics as HW intrinsics (#35421) * Adding basic support for recognizing and handling SIMD intrinsics as HW intrinsics * Applying formatting patch * Fixing a preprocessor concatenation for non windows * Add a default case to workaround a compiler warning on FreeBSD * Fixing a noway_assert to include GT_HWINTRINSIC * Fixing some asserts that were being triggered * Use getSIMDVectorRegisterByteLength * Applying formatting patch * Fixing ARM64 to use the actual type size * Removing the [Intrinsic] attribute from some Vector2/3/4 methods which aren't intrinsic * Updating SSE/SSE2 CompareGreaterThan and related functions to be table driven * Fixing the SimdAsHWIntrinsic relational operations to match the GT_SIMD behavior * Ensure that GT_HWINTRINSIC fixes the type for certain TYP_SIMD8 * Fixing the SimdAsHWIntrinsic Vector.op_Multiply support to match the GT_SIMD behavior * Fixing the SimdAsHWIntrinsic Vector2/3 Division to match the GT_SIMD behavior * Porting Abs, Min, and Max to use the SimdAsHWIntrinsic support * Minor fixups to the SSE2 codepath * Applying formatting patch * Fixing a check in lowering * Mark SimdAsHWIntrinsic nodes so we can lookup the correct handle * Adding the 3 operand overload for gtNewSimdAsHWIntrinsicNode * Fixing BuildHWIntrinsic to properly take RMW into account * Fixing the rationalize handling of GT_HWINTRINSIC to account for SIMD vs non-SIMD nodes * Fixing the importer to not create SIMD nodes if featureSIMD is disabled * Fixing the SSE4.2 implementation of CompareLessThan * Preserve the base type for subtraction/addition operations * Applying formatting patch * Responding to PR feedback * Fixing a copy/paste error under reinterpret cast * Fixing abs to expect 1 argument * Adding method comment headers that were missing * Removing unused table entries from SimdAsHWIntrinsic for Vector2/3/4 * Ensure we catch intrinsics from the Vector static class * Fixing SSSE3_Abs and AVX2_Abs to get the base type from the first argument * Ensure we adjust the class handle used for intrinsics from the Vector static class * Ensure we populate the handle cache for clsHnd even if it isn't used * Fix where we grab the base type from for the static Vector class * Fixing ConditionalSelect and improving the messages used for impCloneExpr in SimdAsHWIntrinsic * Ensure we clone the constVectorDup before using it * Applying formatting patch --- src/coreclr/src/jit/CMakeLists.txt | 10 +- src/coreclr/src/jit/compiler.h | 64 +- src/coreclr/src/jit/flowgraph.cpp | 11 +- src/coreclr/src/jit/gentree.cpp | 15 +- src/coreclr/src/jit/gentree.h | 5 +- src/coreclr/src/jit/hwintrinsic.cpp | 15 +- src/coreclr/src/jit/hwintrinsic.h | 66 +- .../src/jit/hwintrinsiccodegenarm64.cpp | 2 +- .../src/jit/hwintrinsiccodegenxarch.cpp | 53 +- src/coreclr/src/jit/hwintrinsiclistxarch.h | 28 +- src/coreclr/src/jit/hwintrinsicxarch.cpp | 101 +- src/coreclr/src/jit/importer.cpp | 16 +- src/coreclr/src/jit/lower.cpp | 8 +- src/coreclr/src/jit/lowerarmarch.cpp | 9 + src/coreclr/src/jit/lowerxarch.cpp | 106 +- src/coreclr/src/jit/lsraxarch.cpp | 90 +- src/coreclr/src/jit/namedintrinsiclist.h | 10 + src/coreclr/src/jit/rationalize.cpp | 26 + src/coreclr/src/jit/simd.cpp | 44 +- src/coreclr/src/jit/simdashwintrinsic.cpp | 1110 +++++++++++++++++ src/coreclr/src/jit/simdashwintrinsic.h | 130 ++ .../src/jit/simdashwintrinsiclistarm64.h | 89 ++ .../src/jit/simdashwintrinsiclistxarch.h | 111 ++ .../src/System/Numerics/Vector2_Intrinsics.cs | 2 - .../src/System/Numerics/Vector3_Intrinsics.cs | 2 - .../src/System/Numerics/Vector4_Intrinsics.cs | 2 - 26 files changed, 1824 insertions(+), 301 deletions(-) create mode 100644 src/coreclr/src/jit/simdashwintrinsic.cpp create mode 100644 src/coreclr/src/jit/simdashwintrinsic.h create mode 100644 src/coreclr/src/jit/simdashwintrinsiclistarm64.h create mode 100644 src/coreclr/src/jit/simdashwintrinsiclistxarch.h diff --git a/src/coreclr/src/jit/CMakeLists.txt b/src/coreclr/src/jit/CMakeLists.txt index 5093ce9a0161f..8ba2c47973fde 100644 --- a/src/coreclr/src/jit/CMakeLists.txt +++ b/src/coreclr/src/jit/CMakeLists.txt @@ -170,6 +170,7 @@ if (CLR_CMAKE_TARGET_WIN32) regset.h sideeffects.h simd.h + simdashwintrinsic.h simdintrinsiclist.h sm.h smallhash.h @@ -204,14 +205,16 @@ if (CLR_CMAKE_TARGET_WIN32) instrsarm.h instrsarm64.h registerarm.h - registerarm64.h) + registerarm64.h + simdashwintrinsiclistarm64.h) elseif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) list (APPEND JIT_HEADERS emitfmtsxarch.h emitxarch.h hwintrinsiclistxarch.h hwintrinsic.h - instrsxarch.h) + instrsxarch.h + simdashwintrinsiclistxarch.h) endif () endif(CLR_CMAKE_TARGET_WIN32) @@ -223,6 +226,7 @@ set( JIT_AMD64_SOURCES lowerxarch.cpp lsraxarch.cpp simd.cpp + simdashwintrinsic.cpp simdcodegenxarch.cpp targetamd64.cpp unwindamd64.cpp @@ -249,6 +253,7 @@ set( JIT_I386_SOURCES lowerxarch.cpp lsraxarch.cpp simd.cpp + simdashwintrinsic.cpp simdcodegenxarch.cpp targetx86.cpp unwindx86.cpp @@ -264,6 +269,7 @@ set( JIT_ARM64_SOURCES lsraarmarch.cpp lsraarm64.cpp simd.cpp + simdashwintrinsic.cpp targetarm64.cpp unwindarm.cpp unwindarm64.cpp diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index a76847e85ba40..61939a9bca3fe 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -61,6 +61,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "hwintrinsic.h" #include "simd.h" +#include "simdashwintrinsic.h" // This is only used locally in the JIT to indicate that // a verification block should be inserted @@ -2615,6 +2616,36 @@ class Compiler NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size); + + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode( + var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) + { + GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, baseType, size); + node->gtFlags |= GTF_SIMDASHW_OP; + return node; + } + + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode( + var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) + { + GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, op2, hwIntrinsicID, baseType, size); + node->gtFlags |= GTF_SIMDASHW_OP; + return node; + } + + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type, + GenTree* op1, + GenTree* op2, + GenTree* op3, + NamedIntrinsic hwIntrinsicID, + var_types baseType, + unsigned size) + { + GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, baseType, size); + node->gtFlags |= GTF_SIMDASHW_OP; + return node; + } + GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, @@ -3689,16 +3720,36 @@ class Compiler CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, bool mustExpand); + GenTree* impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand); protected: bool compSupportsHWIntrinsic(CORINFO_InstructionSet isa); + GenTree* impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_SIG_INFO* sig, + var_types retType, + var_types baseType, + unsigned simdSize); + + GenTree* impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, + var_types retType, + var_types baseType, + unsigned simdSize, + GenTree* op1, + GenTree* op2, + GenTree* op3); + GenTree* impSpecialIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig); - GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass); + GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr = false); GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType); GenTree* addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand, int immUpperBound); @@ -3712,6 +3763,13 @@ class Compiler GenTree* impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig); GenTree* impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig); + GenTree* impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + var_types retType, + var_types baseType, + unsigned simdSize, + GenTree* op1, + GenTree* op2); #endif // TARGET_XARCH #endif // FEATURE_HW_INTRINSICS GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd, @@ -8203,8 +8261,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return emitTypeSize(TYP_SIMD8); } +public: // Returns the codegen type for a given SIMD size. - var_types getSIMDTypeForSize(unsigned size) + static var_types getSIMDTypeForSize(unsigned size) { var_types simdType = TYP_UNDEF; if (size == 8) @@ -8230,6 +8289,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return simdType; } +private: unsigned getSIMDInitTempVarNum() { if (lvaSIMDInitTempVarNum == BAD_VAR_NUM) diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 1a11b13cb4ef2..689fa7f0606e8 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -22298,12 +22298,11 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call) */ GenTree* Compiler::fgGetStructAsStructPtr(GenTree* tree) { - noway_assert((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_FIELD) || (tree->gtOper == GT_IND) || - (tree->gtOper == GT_BLK) || (tree->gtOper == GT_OBJ) || tree->OperIsSIMD() || - // tree->gtOper == GT_CALL || cannot get address of call. - // tree->gtOper == GT_MKREFANY || inlining should've been aborted due to mkrefany opcode. - // tree->gtOper == GT_RET_EXPR || cannot happen after fgUpdateInlineReturnExpressionPlaceHolder - (tree->gtOper == GT_COMMA)); + noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() || + tree->OperIsHWIntrinsic()); + // GT_CALL, cannot get address of call. + // GT_MKREFANY, inlining should've been aborted due to mkrefany opcode. + // GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder switch (tree->OperGet()) { diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index dfab57adc3583..279402fdc4007 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -17206,6 +17206,12 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) if (varTypeIsSIMD(tree)) { structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT); +#ifdef FEATURE_HW_INTRINSICS + if (structHnd == NO_CLASS_HANDLE) + { + structHnd = gtGetStructHandleForHWSIMD(tree->gtType, TYP_FLOAT); + } +#endif } #endif break; @@ -17272,7 +17278,14 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: - structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType); + if ((tree->gtFlags & GTF_SIMDASHW_OP) != 0) + { + structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType); + } + else + { + structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType); + } break; #endif break; diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index a3fe7aab7474c..12645fb12d219 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -745,7 +745,7 @@ struct GenTree #define GTF_UNSIGNED 0x00008000 // With GT_CAST: the source operand is an unsigned type // With operators: the specified node is an unsigned operator - // + // #define GTF_LATE_ARG 0x00010000 // The specified node is evaluated to a temp in the arg list, and this temp is added to gtCallLateArgs. #define GTF_SPILL 0x00020000 // Needs to be spilled here @@ -915,6 +915,9 @@ struct GenTree #define GTF_SIMD12_OP 0x80000000 // GT_SIMD -- Indicates that the operands need to be handled as SIMD12 // even if they have been retyped as SIMD16. +#define GTF_SIMDASHW_OP 0x80000000 // GT_HWINTRINSIC -- Indicates that the structHandle should be gotten from gtGetStructHandleForSIMD + // rarther than from gtGetStructHandleForHWSIMD. + //--------------------------------------------------------------------- // // GenTree flags stored in gtDebugFlags. diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 0e62ffe07fafc..efcf5b891567b 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -495,16 +495,17 @@ bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op) } //------------------------------------------------------------------------ -// // getArgForHWIntrinsic: pop an argument from the stack and validate its type +// getArgForHWIntrinsic: pop an argument from the stack and validate its type // // Arguments: -// argType -- the required type of argument -// argClass -- the class handle of argType +// argType -- the required type of argument +// argClass -- the class handle of argType +// expectAddr -- if true indicates we are expecting type stack entry to be a TYP_BYREF. // // Return Value: // the validated argument // -GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass) +GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr) { GenTree* arg = nullptr; if (argType == TYP_STRUCT) @@ -512,9 +513,9 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE unsigned int argSizeBytes; var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes); argType = getSIMDTypeForSize(argSizeBytes); - assert((argType == TYP_SIMD8) || (argType == TYP_SIMD16) || (argType == TYP_SIMD32)); - arg = impSIMDPopStack(argType); - assert((arg->TypeGet() == TYP_SIMD8) || (arg->TypeGet() == TYP_SIMD16) || (arg->TypeGet() == TYP_SIMD32)); + assert(varTypeIsSIMD(argType)); + arg = impSIMDPopStack(argType, expectAddr); + assert(varTypeIsSIMD(arg->TypeGet())); } else { diff --git a/src/coreclr/src/jit/hwintrinsic.h b/src/coreclr/src/jit/hwintrinsic.h index 960b6582ca35e..fc3686fbe403e 100644 --- a/src/coreclr/src/jit/hwintrinsic.h +++ b/src/coreclr/src/jit/hwintrinsic.h @@ -304,7 +304,7 @@ struct HWIntrinsicInfo } #ifdef TARGET_XARCH - static int lookupIval(NamedIntrinsic id) + static int lookupIval(NamedIntrinsic id, bool opportunisticallyDependsOnAVX) { switch (id) { @@ -321,6 +321,17 @@ struct HWIntrinsicInfo case NI_SSE_CompareScalarGreaterThan: case NI_SSE2_CompareGreaterThan: case NI_SSE2_CompareScalarGreaterThan: + case NI_AVX_CompareGreaterThan: + { + if (opportunisticallyDependsOnAVX) + { + return static_cast(FloatComparisonMode::OrderedGreaterThanSignaling); + } + + assert(id != NI_AVX_CompareGreaterThan); + return static_cast(FloatComparisonMode::OrderedLessThanSignaling); + } + case NI_SSE_CompareLessThan: case NI_SSE_CompareScalarLessThan: case NI_SSE2_CompareLessThan: @@ -334,6 +345,17 @@ struct HWIntrinsicInfo case NI_SSE_CompareScalarGreaterThanOrEqual: case NI_SSE2_CompareGreaterThanOrEqual: case NI_SSE2_CompareScalarGreaterThanOrEqual: + case NI_AVX_CompareGreaterThanOrEqual: + { + if (opportunisticallyDependsOnAVX) + { + return static_cast(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling); + } + + assert(id != NI_AVX_CompareGreaterThanOrEqual); + return static_cast(FloatComparisonMode::OrderedLessThanOrEqualSignaling); + } + case NI_SSE_CompareLessThanOrEqual: case NI_SSE_CompareScalarLessThanOrEqual: case NI_SSE2_CompareLessThanOrEqual: @@ -356,6 +378,17 @@ struct HWIntrinsicInfo case NI_SSE_CompareScalarNotGreaterThan: case NI_SSE2_CompareNotGreaterThan: case NI_SSE2_CompareScalarNotGreaterThan: + case NI_AVX_CompareNotGreaterThan: + { + if (opportunisticallyDependsOnAVX) + { + return static_cast(FloatComparisonMode::UnorderedNotGreaterThanSignaling); + } + + assert(id != NI_AVX_CompareNotGreaterThan); + return static_cast(FloatComparisonMode::UnorderedNotLessThanSignaling); + } + case NI_SSE_CompareNotLessThan: case NI_SSE_CompareScalarNotLessThan: case NI_SSE2_CompareNotLessThan: @@ -369,6 +402,17 @@ struct HWIntrinsicInfo case NI_SSE_CompareScalarNotGreaterThanOrEqual: case NI_SSE2_CompareNotGreaterThanOrEqual: case NI_SSE2_CompareScalarNotGreaterThanOrEqual: + case NI_AVX_CompareNotGreaterThanOrEqual: + { + if (opportunisticallyDependsOnAVX) + { + return static_cast(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling); + } + + assert(id != NI_AVX_CompareNotGreaterThanOrEqual); + return static_cast(FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling); + } + case NI_SSE_CompareNotLessThanOrEqual: case NI_SSE_CompareScalarNotLessThanOrEqual: case NI_SSE2_CompareNotLessThanOrEqual: @@ -437,26 +481,6 @@ struct HWIntrinsicInfo return static_cast(FloatRoundingMode::ToZero); } - case NI_AVX_CompareGreaterThan: - { - return static_cast(FloatComparisonMode::OrderedGreaterThanSignaling); - } - - case NI_AVX_CompareGreaterThanOrEqual: - { - return static_cast(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling); - } - - case NI_AVX_CompareNotGreaterThan: - { - return static_cast(FloatComparisonMode::UnorderedNotGreaterThanSignaling); - } - - case NI_AVX_CompareNotGreaterThanOrEqual: - { - return static_cast(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling); - } - default: { return -1; diff --git a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp index 767d18f1b3af6..3d224d4f71808 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp @@ -215,7 +215,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - emitSize = EA_SIZE(node->gtSIMDSize); + emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); opt = genGetSimdInsOpt(emitSize, intrin.baseType); if ((opt == INS_OPTS_1D) && (intrin.category == HW_Category_SimpleSIMD)) diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index 04214e137c759..2b6336d2cecd8 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -82,9 +82,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsicId); HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); - int ival = HWIntrinsicInfo::lookupIval(intrinsicId); int numArgs = HWIntrinsicInfo::lookupNumArgs(node); + int ival = HWIntrinsicInfo::lookupIval(intrinsicId, compiler->compOpportunisticallyDependsOn(InstructionSet_AVX)); + assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId)); if (genIsTableDrivenHWIntrinsic(intrinsicId, category)) @@ -102,7 +103,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(numArgs >= 0); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); assert(ins != INS_invalid); - emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); assert(simdSize != 0); switch (numArgs) @@ -254,11 +255,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else if (node->TypeGet() == TYP_VOID) { - genHWIntrinsic_R_RM(node, ins, EA_ATTR(node->gtSIMDSize), op1Reg, op2); + genHWIntrinsic_R_RM(node, ins, simdSize, op1Reg, op2); } else { - genHWIntrinsic_R_R_RM(node, ins, EA_ATTR(node->gtSIMDSize)); + genHWIntrinsic_R_R_RM(node, ins, simdSize); } break; } @@ -550,7 +551,7 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, i var_types targetType = node->TypeGet(); regNumber targetReg = node->GetRegNum(); GenTree* op1 = node->gtGetOp1(); - emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); emitter* emit = GetEmitter(); // TODO-XArch-CQ: Commutative operations can have op1 be contained @@ -632,7 +633,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, regNumber targetReg = node->GetRegNum(); GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); - emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); emitter* emit = GetEmitter(); // TODO-XArch-CQ: Commutative operations can have op1 be contained @@ -796,7 +797,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins) GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); GenTree* op3 = nullptr; - emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); emitter* emit = GetEmitter(); assert(op1->OperIsList()); @@ -1150,7 +1151,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) assert(node->gtGetOp2() == nullptr); emitter* emit = GetEmitter(); - emitAttr attr = EA_ATTR(node->gtSIMDSize); + emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); switch (intrinsicId) @@ -1408,25 +1409,6 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - // All integer overloads are handled by table codegen - case NI_SSE2_CompareLessThan: - { - assert(op1 != nullptr); - assert(op2 != nullptr); - - assert(baseType == TYP_DOUBLE); - - int ival = HWIntrinsicInfo::lookupIval(intrinsicId); - assert((ival >= 0) && (ival <= 127)); - - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); - emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival); - - break; - } - case NI_SSE2_X64_ConvertScalarToVector128Double: { assert(baseType == TYP_LONG); @@ -1677,7 +1659,7 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; var_types baseType = node->gtSIMDBaseType; - emitAttr attr = EA_ATTR(node->gtSIMDSize); + emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); var_types targetType = node->TypeGet(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); int numArgs = HWIntrinsicInfo::lookupNumArgs(node); @@ -1990,7 +1972,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; var_types baseType = node->gtSIMDBaseType; - emitAttr attr = EA_ATTR(node->gtSIMDSize); + emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); GenTree* op1 = node->gtGetOp1(); regNumber targetReg = node->GetRegNum(); @@ -2016,16 +1998,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (op3->isContained() || op3->isUsedFromSpillTemp()) - { - // 213 form: op1 = (op2 * op1) + [op3] - - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); - - isCommutative = !copiesUpperBits; - } - else if (op2->isContained() || op2->isUsedFromSpillTemp()) + if (op2->isContained() || op2->isUsedFromSpillTemp()) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2045,7 +2018,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { - // 213 form: op1 = (op2 * op1) + op3 + // 213 form: op1 = (op2 * op1) + [op3] op1Reg = op1->GetRegNum(); op2Reg = op2->GetRegNum(); diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 49240a41ac717..4be0cdadd809c 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -111,11 +111,11 @@ HARDWARE_INTRINSIC(SSE, CompareEqual, HARDWARE_INTRINSIC(SSE, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -131,9 +131,9 @@ HARDWARE_INTRINSIC(SSE, CompareNotEqual, HARDWARE_INTRINSIC(SSE, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) @@ -212,15 +212,15 @@ HARDWARE_INTRINSIC(SSE2, CompareEqual, HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_Special, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -232,9 +232,9 @@ HARDWARE_INTRINSIC(SSE2, CompareNotEqual, HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) @@ -338,7 +338,7 @@ HARDWARE_INTRINSIC(SSE3, MoveLowAndDuplicate, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSSE3 Intrinsics -HARDWARE_INTRINSIC(SSSE3, Abs, 16, 1, {INS_invalid, INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSSE3, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(SSSE3, AlignRight, 16, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSSE3, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_invalid, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSSE3, HorizontalAddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -404,6 +404,7 @@ HARDWARE_INTRINSIC(SSE41_X64, Insert, // SSE42 Intrinsics HARDWARE_INTRINSIC(SSE42, Crc32, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) HARDWARE_INTRINSIC(SSE42, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE42, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -428,7 +429,6 @@ HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector128, HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_vbroadcastf128}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, Compare, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_IMM, HW_Flag_NoFlag) - HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, CompareGreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -441,7 +441,6 @@ HARDWARE_INTRINSIC(AVX, CompareNotLessThan, HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) - HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) @@ -497,7 +496,7 @@ HARDWARE_INTRINSIC(AVX, Xor, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX2 Intrinsics -HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_pabsb, INS_pabsw, INS_pabsw, INS_pabsd, INS_pabsd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) @@ -511,13 +510,14 @@ HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector256, HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, 32, 1, {INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, CompareEqual, 32, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2, GatherVector256, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2, GatherMaskVector128, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2, GatherMaskVector256, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) diff --git a/src/coreclr/src/jit/hwintrinsicxarch.cpp b/src/coreclr/src/jit/hwintrinsicxarch.cpp index 3125f10cd56dd..010cd5fa5063d 100644 --- a/src/coreclr/src/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsicxarch.cpp @@ -1288,35 +1288,6 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND switch (intrinsic) { - case NI_SSE_CompareGreaterThan: - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE_CompareNotGreaterThan: - case NI_SSE_CompareNotGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - // These intrinsics are "special import" because the non-AVX path isn't directly - // hardware supported. Instead, they start with "swapped operands" and we fix that here. - - FloatComparisonMode comparison = - static_cast(HWIntrinsicInfo::lookupIval(intrinsic)); - comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), - NI_AVX_Compare, baseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize); - } - break; - } - case NI_SSE_CompareScalarGreaterThan: case NI_SSE_CompareScalarGreaterThanOrEqual: case NI_SSE_CompareScalarNotGreaterThan: @@ -1334,9 +1305,8 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND // hardware supported. Instead, they start with "swapped operands" and we fix that here. FloatComparisonMode comparison = - static_cast(HWIntrinsicInfo::lookupIval(intrinsic)); - comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), + static_cast(HWIntrinsicInfo::lookupIval(intrinsic, true)); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), NI_AVX_CompareScalar, baseType, simdSize); } else @@ -1395,68 +1365,6 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN switch (intrinsic) { - case NI_SSE2_CompareGreaterThan: - { - if (baseType != TYP_DOUBLE) - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_CompareGreaterThan, baseType, simdSize); - - break; - } - - __fallthrough; - } - - case NI_SSE2_CompareGreaterThanOrEqual: - case NI_SSE2_CompareNotGreaterThan: - case NI_SSE2_CompareNotGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - assert(baseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - // These intrinsics are "special import" because the non-AVX path isn't directly - // hardware supported. Instead, they start with "swapped operands" and we fix that here. - - FloatComparisonMode comparison = - static_cast(HWIntrinsicInfo::lookupIval(intrinsic)); - comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), - NI_AVX_Compare, baseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize); - } - break; - } - - case NI_SSE2_CompareLessThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - - if (baseType == TYP_DOUBLE) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, baseType, simdSize); - } - else - { - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareGreaterThan, baseType, simdSize); - } - break; - } - case NI_SSE2_CompareScalarGreaterThan: case NI_SSE2_CompareScalarGreaterThanOrEqual: case NI_SSE2_CompareScalarNotGreaterThan: @@ -1473,9 +1381,8 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN // hardware supported. Instead, they start with "swapped operands" and we fix that here. FloatComparisonMode comparison = - static_cast(HWIntrinsicInfo::lookupIval(intrinsic)); - comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), + static_cast(HWIntrinsicInfo::lookupIval(intrinsic, true)); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), NI_AVX_CompareScalar, baseType, simdSize); } else diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 99d51600991c2..c5f01f3e0f9ab 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -3497,6 +3497,11 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, return hwintrinsic; } + + if ((ni > NI_SIMD_AS_HWINTRINSIC_START) && (ni < NI_SIMD_AS_HWINTRINSIC_END)) + { + return impSimdAsHWIntrinsic(ni, clsHnd, method, sig, mustExpand); + } #endif // FEATURE_HW_INTRINSICS } } @@ -4152,7 +4157,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_MathF_FusedMultiplyAdd: { #ifdef TARGET_XARCH - if (compExactlyDependsOn(InstructionSet_FMA)) + if (compExactlyDependsOn(InstructionSet_FMA) && supportSIMDTypes()) { assert(varTypeIsFloating(callType)); @@ -4467,6 +4472,15 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } #ifdef FEATURE_HW_INTRINSICS + else if (strcmp(namespaceName, "System.Numerics") == 0) + { + CORINFO_SIG_INFO sig; + info.compCompHnd->getMethodSig(method, &sig); + + int sizeOfVectorT = getSIMDVectorRegisterByteLength(); + + result = SimdAsHWIntrinsicInfo::lookupId(&sig, className, methodName, enclosingClassName, sizeOfVectorT); + } else if (strncmp(namespaceName, "System.Runtime.Intrinsics", 25) == 0) { namespaceName += 25; diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 48345ac254cc9..3a4a6603ab7c5 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -1331,11 +1331,12 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) LclVarDsc* varDsc = &comp->lvaTable[varNum]; type = varDsc->lvType; } - else if (arg->OperGet() == GT_SIMD) + else if (arg->OperIs(GT_SIMD, GT_HWINTRINSIC)) { - assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12)); + GenTreeJitIntrinsic* jitIntrinsic = reinterpret_cast(arg); + assert((jitIntrinsic->gtSIMDSize == 12) || (jitIntrinsic->gtSIMDSize == 16)); - if (arg->AsSIMD()->gtSIMDSize == 12) + if (jitIntrinsic->gtSIMDSize == 12) { type = TYP_SIMD12; } @@ -5288,6 +5289,7 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node) #ifdef FEATURE_SIMD case GT_SIMD: + case GT_HWINTRINSIC: assert(node->TypeGet() != TYP_SIMD12); break; #ifdef TARGET_64BIT diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 4ae61be664ee3..75dfc14fa153a 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -526,6 +526,15 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) // void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { + assert(node->TypeGet() != TYP_SIMD32); + + if (node->TypeGet() == TYP_SIMD12) + { + // GT_HWINTRINSIC node requiring to produce TYP_SIMD12 in fact + // produces a TYP_SIMD16 result + node->gtType = TYP_SIMD16; + } + ContainCheckHWIntrinsic(node); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 92e9965f17c81..ad0426bbb620d 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -920,8 +920,61 @@ void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { + if (node->TypeGet() == TYP_SIMD12) + { + // GT_HWINTRINSIC node requiring to produce TYP_SIMD12 in fact + // produces a TYP_SIMD16 result + node->gtType = TYP_SIMD16; + } + switch (node->gtHWIntrinsicId) { + case NI_SSE2_CompareGreaterThan: + { + if (node->gtSIMDBaseType != TYP_DOUBLE) + { + assert(varTypeIsIntegral(node->gtSIMDBaseType)); + break; + } + + __fallthrough; + } + + case NI_SSE_CompareGreaterThan: + case NI_SSE_CompareGreaterThanOrEqual: + case NI_SSE_CompareNotGreaterThan: + case NI_SSE_CompareNotGreaterThanOrEqual: + case NI_SSE2_CompareGreaterThanOrEqual: + case NI_SSE2_CompareNotGreaterThan: + case NI_SSE2_CompareNotGreaterThanOrEqual: + { + assert((node->gtSIMDBaseType == TYP_FLOAT) || (node->gtSIMDBaseType == TYP_DOUBLE)); + + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) + { + break; + } + + // pre-AVX doesn't actually support these intrinsics in hardware so we need to swap the operands around + std::swap(node->gtOp1, node->gtOp2); + break; + } + + case NI_SSE2_CompareLessThan: + case NI_SSE42_CompareLessThan: + case NI_AVX2_CompareLessThan: + { + if (node->gtSIMDBaseType == TYP_DOUBLE) + { + break; + } + assert(varTypeIsIntegral(node->gtSIMDBaseType)); + + // this isn't actually supported in hardware so we need to swap the operands around + std::swap(node->gtOp1, node->gtOp2); + break; + } + case NI_SSE_CompareScalarOrderedEqual: LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FEQ); break; @@ -2655,7 +2708,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge switch (containingIntrinsicId) { case NI_SSE_Shuffle: - case NI_SSE2_CompareLessThan: case NI_SSE2_ShiftLeftLogical: case NI_SSE2_ShiftRightArithmetic: case NI_SSE2_ShiftRightLogical: @@ -2975,6 +3027,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); int numArgs = HWIntrinsicInfo::lookupNumArgs(node); var_types baseType = node->gtSIMDBaseType; + unsigned simdSize = node->gtSIMDSize; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); @@ -2993,6 +3046,24 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) return; } + if (HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_IMM) + { + GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node); + assert(lastOp != nullptr); + + if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI()) + { + MakeSrcContained(node, lastOp); + } + } + + if ((node->gtSIMDSize == 8) || (node->gtSIMDSize == 12)) + { + // TODO-XArch-CQ: Ideally we would key this off of the size containingNode + // expects vs the size node actually is or would be if spilled to the stack + return; + } + // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained const bool isCommutative = HWIntrinsicInfo::IsCommutative(intrinsicId); @@ -3270,28 +3341,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case HW_Category_Special: - { - if (intrinsicId == NI_SSE2_CompareLessThan) - { - bool supportsRegOptional = false; - - if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - MakeSrcContained(node, op2); - } - else if (supportsRegOptional) - { - op2->SetRegOptional(); - } - } - else - { - unreached(); - } - break; - } - default: { unreached(); @@ -3479,17 +3528,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { unreached(); } - - if (HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_IMM) - { - GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node); - assert(lastOp != nullptr); - - if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI()) - { - MakeSrcContained(node, lastOp); - } - } } } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 524de87f521a1..f784b55453871 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -2458,8 +2458,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) assert(isRMW); // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 - srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2); + tgtPrefUse = BuildUse(op1); + + srcCount += 1; + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2); srcCount += BuildDelayFreeUses(op3, RBM_XMM0); buildUses = false; @@ -2493,7 +2495,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) assert(isRMW); // CRC32 may operate over "byte" but on x86 only RBM_BYTE_REGS can be used as byte registers. - srcCount += BuildOperandUses(op1); + tgtPrefUse = BuildUse(op1); + + srcCount += 1; srcCount += BuildDelayFreeUses(op2, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); buildUses = false; @@ -2539,29 +2543,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (op3->isContained()) - { - // 213 form: op1 = (op2 * op1) + [op3] - - if (copiesUpperBits) - { - tgtPrefUse = BuildUse(op1); - - srcCount += 1; - srcCount += BuildDelayFreeUses(op2); - } - else - { - // op1 and op2 are commutative, so don't - // set either to be tgtPref or delayFree - - srcCount += BuildOperandUses(op1); - srcCount += BuildOperandUses(op2); - } - - srcCount += BuildOperandUses(op3); - } - else if (op2->isContained()) + if (op2->isContained()) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2583,25 +2565,22 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - // 213 form: op1 = (op2 * op1) + op3 + // 213 form: op1 = (op2 * op1) + [op3] + + tgtPrefUse = BuildUse(op1); + srcCount += 1; if (copiesUpperBits) { - tgtPrefUse = BuildUse(op1); - - srcCount += 1; srcCount += BuildDelayFreeUses(op2); } else { - // op1 and op2 are commutative, so don't - // set either to be tgtPref or delayFree - - srcCount += BuildOperandUses(op1); - srcCount += BuildOperandUses(op2); + tgtPrefUse2 = BuildUse(op2); + srcCount += 1; } - srcCount += BuildDelayFreeUses(op3); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3); } buildUses = false; @@ -2612,10 +2591,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) case NI_AVX2_GatherVector256: { assert(numArgs == 3); + assert(!isRMW); + // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op2); + // op3 should always be contained + assert(op3->isContained()); + // get a tmp register for mask that will be cleared by gather instructions buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs()); setInternalRegsDelayFree = true; @@ -2628,16 +2612,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) case NI_AVX2_GatherMaskVector256: { assert(numArgs == 5); + assert(!isRMW); + assert(intrinsicTree->gtGetOp1()->OperIsList()); + + GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest(); + GenTree* op4 = argList->Current(); + // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); - srcCount += BuildOperandUses(op2); + srcCount += BuildDelayFreeUses(op2); srcCount += BuildDelayFreeUses(op3); - - assert(intrinsicTree->gtGetOp1()->OperIsList()); - GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList(); - GenTree* op4 = argList->Rest()->Rest()->Rest()->Current(); srcCount += BuildDelayFreeUses(op4); + // op5 should always be contained + assert(argList->Rest()->Current()->isContained()); + // get a tmp register for mask that will be cleared by gather instructions buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs()); setInternalRegsDelayFree = true; @@ -2661,6 +2650,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { srcCount += BuildAddrUses(op1); } + else if (isRMW && !op1->isContained()) + { + tgtPrefUse = BuildUse(op1); + srcCount += 1; + } else { srcCount += BuildOperandUses(op1); @@ -2672,9 +2666,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { srcCount += BuildAddrUses(op2->gtGetOp1()); } - else if (isRMW) + else if (isRMW && !op2->isContained()) { - srcCount += BuildDelayFreeUses(op2); + if (HWIntrinsicInfo::IsCommutative(intrinsicId)) + { + tgtPrefUse2 = BuildUse(op2); + srcCount += 1; + } + else + { + srcCount += BuildDelayFreeUses(op2); + } } else { @@ -2683,7 +2685,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (op3 != nullptr) { - srcCount += (isRMW) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); + srcCount += isRMW ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); } } } diff --git a/src/coreclr/src/jit/namedintrinsiclist.h b/src/coreclr/src/jit/namedintrinsiclist.h index f4969a57ea298..d105eabdbb3dd 100644 --- a/src/coreclr/src/jit/namedintrinsiclist.h +++ b/src/coreclr/src/jit/namedintrinsiclist.h @@ -38,6 +38,16 @@ enum NamedIntrinsic : unsigned short #include "hwintrinsiclistarm64.h" #endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) NI_HW_INTRINSIC_END, + + NI_SIMD_AS_HWINTRINSIC_START, +#if defined(TARGET_XARCH) +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name, +#include "simdashwintrinsiclistxarch.h" +#elif defined(TARGET_ARM64) +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name, +#include "simdashwintrinsiclistarm64.h" +#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) + NI_SIMD_AS_HWINTRINSIC_END, #endif // FEATURE_HW_INTRINSICS }; diff --git a/src/coreclr/src/jit/rationalize.cpp b/src/coreclr/src/jit/rationalize.cpp index 9f45bede3abbb..07bdd29f83411 100644 --- a/src/coreclr/src/jit/rationalize.cpp +++ b/src/coreclr/src/jit/rationalize.cpp @@ -768,6 +768,32 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge break; #endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + { + GenTreeHWIntrinsic* hwIntrinsicNode = node->AsHWIntrinsic(); + + if (!hwIntrinsicNode->isSIMD()) + { + break; + } + + noway_assert(comp->supportSIMDTypes()); + + // TODO-1stClassStructs: This should be handled more generally for enregistered or promoted + // structs that are passed or returned in a different register type than their enregistered + // type(s). + if ((hwIntrinsicNode->gtType == TYP_I_IMPL) && (hwIntrinsicNode->gtSIMDSize == TARGET_POINTER_SIZE)) + { + // This happens when it is consumed by a GT_RET_EXPR. + // It can only be a Vector2f or Vector2i. + assert(genTypeSize(hwIntrinsicNode->gtSIMDBaseType) == 4); + hwIntrinsicNode->gtType = TYP_SIMD8; + } + break; + } +#endif // FEATURE_HW_INTRINSICS + default: // These nodes should not be present in HIR. assert(!node->OperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP, GT_LOCKADD)); diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 9077971e67cf3..bfd8c04f76789 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -162,11 +162,13 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u if (typeHnd == m_simdHandleCache->SIMDFloatHandle) { simdBaseType = TYP_FLOAT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDIntHandle) { simdBaseType = TYP_INT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDVector2Handle) @@ -192,46 +194,55 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u } else if (typeHnd == m_simdHandleCache->SIMDVectorHandle) { + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDUShortHandle) { simdBaseType = TYP_USHORT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDUByteHandle) { simdBaseType = TYP_UBYTE; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDDoubleHandle) { simdBaseType = TYP_DOUBLE; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDLongHandle) { simdBaseType = TYP_LONG; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDShortHandle) { simdBaseType = TYP_SHORT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDByteHandle) { simdBaseType = TYP_BYTE; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDUIntHandle) { simdBaseType = TYP_UINT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDULongHandle) { simdBaseType = TYP_ULONG; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } @@ -253,6 +264,8 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u { if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0) { + size = getSIMDVectorRegisterByteLength(); + if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0) { m_simdHandleCache->SIMDFloatHandle = typeHnd; @@ -348,6 +361,7 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0) { m_simdHandleCache->SIMDVectorHandle = typeHnd; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Found type Vector\n"); } else @@ -356,18 +370,6 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u } } } - if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr) - { - // If not a fixed size vector then its size is same as SIMD vector - // register length in bytes - if (size == 0) - { - size = getSIMDVectorRegisterByteLength(); - } - - *sizeBytes = size; - setUsesSIMDTypes(true); - } } #ifdef FEATURE_HW_INTRINSICS else if (isIntrinsicType(typeHnd)) @@ -776,18 +778,18 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u simdBaseType = TYP_UNKNOWN; } #endif // TARGET_XARCH + } +#endif // FEATURE_HW_INTRINSICS - if (sizeBytes != nullptr) - { - *sizeBytes = size; - } + if (sizeBytes != nullptr) + { + *sizeBytes = size; + } - if (simdBaseType != TYP_UNKNOWN) - { - setUsesSIMDTypes(true); - } + if (simdBaseType != TYP_UNKNOWN) + { + setUsesSIMDTypes(true); } -#endif // FEATURE_HW_INTRINSICS return simdBaseType; } diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp new file mode 100644 index 0000000000000..1463ed30ca407 --- /dev/null +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -0,0 +1,1110 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "jitpch.h" +#include "simdashwintrinsic.h" + +#ifdef FEATURE_HW_INTRINSICS + +static const SimdAsHWIntrinsicInfo simdAsHWIntrinsicInfoArray[] = { +// clang-format off +#if defined(TARGET_XARCH) +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, +#include "simdashwintrinsiclistxarch.h" +#elif defined(TARGET_ARM64) +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, +#include "simdashwintrinsiclistarm64.h" +#else +#error Unsupported platform +#endif + // clang-format on +}; + +//------------------------------------------------------------------------ +// lookup: Gets the SimdAsHWIntrinsicInfo associated with a given NamedIntrinsic +// +// Arguments: +// id -- The NamedIntrinsic associated with the SimdAsHWIntrinsic to lookup +// +// Return Value: +// The SimdAsHWIntrinsicInfo associated with id +const SimdAsHWIntrinsicInfo& SimdAsHWIntrinsicInfo::lookup(NamedIntrinsic id) +{ + assert(id != NI_Illegal); + + assert(id > NI_SIMD_AS_HWINTRINSIC_START); + assert(id < NI_SIMD_AS_HWINTRINSIC_END); + + return simdAsHWIntrinsicInfoArray[id - NI_SIMD_AS_HWINTRINSIC_START - 1]; +} + +//------------------------------------------------------------------------ +// lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet +// +// Arguments: +// className -- The name of the class associated with the SimdIntrinsic to lookup +// methodName -- The name of the method associated with the SimdIntrinsic to lookup +// enclosingClassName -- The name of the enclosing class +// sizeOfVectorT -- The size of Vector in bytes +// +// Return Value: +// The NamedIntrinsic associated with methodName and classId +NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(CORINFO_SIG_INFO* sig, + const char* className, + const char* methodName, + const char* enclosingClassName, + int sizeOfVectorT) +{ + SimdAsHWIntrinsicClassId classId = lookupClassId(className, enclosingClassName, sizeOfVectorT); + + if (classId == SimdAsHWIntrinsicClassId::Unknown) + { + return NI_Illegal; + } + + for (int i = 0; i < (NI_SIMD_AS_HWINTRINSIC_END - NI_SIMD_AS_HWINTRINSIC_START - 1); i++) + { + const SimdAsHWIntrinsicInfo& intrinsicInfo = simdAsHWIntrinsicInfoArray[i]; + + if (classId != intrinsicInfo.classId) + { + continue; + } + + if (sig->numArgs != static_cast(intrinsicInfo.numArgs)) + { + continue; + } + + if (sig->hasThis() != SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsicInfo.id)) + { + continue; + } + + if (strcmp(methodName, intrinsicInfo.name) != 0) + { + continue; + } + + return intrinsicInfo.id; + } + + return NI_Illegal; +} + +//------------------------------------------------------------------------ +// lookupClassId: Gets the SimdAsHWIntrinsicClassId for a given class name and enclsoing class name +// +// Arguments: +// className -- The name of the class associated with the SimdAsHWIntrinsicClassId to lookup +// enclosingClassName -- The name of the enclosing class +// sizeOfVectorT -- The size of Vector in bytes +// +// Return Value: +// The SimdAsHWIntrinsicClassId associated with className and enclosingClassName +SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* className, + const char* enclosingClassName, + int sizeOfVectorT) +{ + assert(className != nullptr); + + if ((enclosingClassName != nullptr) || (className[0] != 'V')) + { + return SimdAsHWIntrinsicClassId::Unknown; + } + if (strcmp(className, "Vector2") == 0) + { + return SimdAsHWIntrinsicClassId::Vector2; + } + if (strcmp(className, "Vector3") == 0) + { + return SimdAsHWIntrinsicClassId::Vector3; + } + if (strcmp(className, "Vector4") == 0) + { + return SimdAsHWIntrinsicClassId::Vector4; + } + if ((strcmp(className, "Vector") == 0) || (strcmp(className, "Vector`1") == 0)) + { +#if defined(TARGET_XARCH) + if (sizeOfVectorT == 32) + { + return SimdAsHWIntrinsicClassId::VectorT256; + } +#endif // TARGET_XARCH + + assert(sizeOfVectorT == 16); + return SimdAsHWIntrinsicClassId::VectorT128; + } + + return SimdAsHWIntrinsicClassId::Unknown; +} + +//------------------------------------------------------------------------ +// impSimdAsIntrinsic: Import a SIMD intrinsic as a GT_HWINTRINSIC node if possible +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// clsHnd -- class handle containing the intrinsic function. +// method -- method handle of the intrinsic function. +// sig -- signature of the intrinsic call +// mustExpand -- true if the intrinsic must return a GenTree*; otherwise, false +// +// Return Value: +// The GT_HWINTRINSIC node, or nullptr if not a supported intrinsic +// +GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand) +{ + assert(!mustExpand); + + if (!featureSIMD) + { + // We can't support SIMD intrinsics if the JIT doesn't support the feature + return nullptr; + } + + var_types retType = JITtype2varType(sig->retType); + var_types baseType = TYP_UNKNOWN; + var_types simdType = TYP_UNKNOWN; + unsigned simdSize = 0; + + // We want to resolve and populate the handle cache for this type even + // if it isn't the basis for anything carried on the node. + baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); + assert(simdSize != 0); + + CORINFO_CLASS_HANDLE argClass; + + if (retType == TYP_STRUCT) + { + baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); + retType = getSIMDTypeForSize(simdSize); + } + else + { + argClass = info.compCompHnd->getArgClass(sig, sig->args); + baseType = getBaseTypeAndSizeOfSIMDType(argClass, &simdSize); + } + + if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0)) + { + // We need to fixup the clsHnd in the case we are an intrinsic on Vector + // The first argument will be the appropriate Vector handle to use + clsHnd = info.compCompHnd->getArgClass(sig, sig->args); + + // We also need to adjust the baseType as some methods on Vector return + // a type different than the operation we need to perform. An example + // is LessThan or Equals which takes double but returns long. This is + // unlike the counterparts on Vector which take a return the same type. + baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); + } + + simdType = getSIMDTypeForSize(simdSize); + assert(varTypeIsSIMD(simdType)); + + if (!varTypeIsArithmetic(baseType)) + { + // We only support intrinsics on the 10 primitive arithmetic types + return nullptr; + } + + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + + if ((hwIntrinsic == NI_Illegal) || !varTypeIsSIMD(simdType)) + { + // The baseType isn't supported by the intrinsic + return nullptr; + } + + if (SimdAsHWIntrinsicInfo::IsFloatingPointUsed(intrinsic)) + { + // Set `compFloatingPointUsed` to cover the scenario where an intrinsic + // is operating on SIMD fields, but where no SIMD local vars are in use. + compFloatingPointUsed = true; + } + + if (hwIntrinsic == intrinsic) + { + // The SIMD intrinsic requires special handling outside the normal code path + return impSimdAsHWIntrinsicSpecial(intrinsic, clsHnd, sig, retType, baseType, simdSize); + } + + CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic); + + if (!compOpportunisticallyDependsOn(hwIntrinsicIsa)) + { + // The JIT doesn't support the required ISA + return nullptr; + } + + CORINFO_ARG_LIST_HANDLE argList = sig->args; + var_types argType = TYP_UNKNOWN; + + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + + bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + + switch (sig->numArgs) + { + case 1: + { + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + return gtNewSimdAsHWIntrinsicNode(retType, op1, hwIntrinsic, baseType, simdSize); + } + + case 2: + { + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)) + { + std::swap(op1, op2); + } + + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + } + + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; +} + +//------------------------------------------------------------------------ +// impSimdAsHWIntrinsicSpecial: Import a SIMD intrinsic as a GT_HWINTRINSIC node if possible +// This method handles cases which cannot be table driven +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// clsHnd -- class handle containing the intrinsic function. +// sig -- signature of the intrinsic call +// retType -- the return type of the intrinsic call +// baseType -- the base type of SIMD type of the intrinsic +// simdSize -- the size of the SIMD type of the intrinsic +// +// Return Value: +// The GT_HWINTRINSIC node, or nullptr if not a supported intrinsic +// +GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_SIG_INFO* sig, + var_types retType, + var_types baseType, + unsigned simdSize) +{ + assert(featureSIMD); + assert(retType != TYP_UNKNOWN); + assert(varTypeIsArithmetic(baseType)); + assert(simdSize != 0); + assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); + assert(SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType) == intrinsic); + + CORINFO_ARG_LIST_HANDLE argList = sig->args; + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass; + + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + + SimdAsHWIntrinsicClassId classId = SimdAsHWIntrinsicInfo::lookupClassId(intrinsic); + bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + +#if defined(TARGET_XARCH) + bool isVectorT256 = (SimdAsHWIntrinsicInfo::lookupClassId(intrinsic) == SimdAsHWIntrinsicClassId::VectorT256); + + if ((baseType != TYP_FLOAT) && !compOpportunisticallyDependsOn(InstructionSet_SSE2)) + { + // Vector, for everything but float, requires at least SSE2 + return nullptr; + } + else if (!compOpportunisticallyDependsOn(InstructionSet_SSE)) + { + // Vector requires at least SSE + return nullptr; + } + + // Vector, when 32-bytes, requires at least AVX2 + assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); +#endif + + switch (sig->numArgs) + { + case 1: + { + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { +#if defined(TARGET_XARCH) + case NI_Vector2_Abs: + case NI_Vector3_Abs: + case NI_Vector4_Abs: + case NI_VectorT128_Abs: + case NI_VectorT256_Abs: + { + if (varTypeIsFloating(baseType)) + { + // Abs(vf) = vf & new SIMDVector(0x7fffffff); + // Abs(vd) = vf & new SIMDVector(0x7fffffffffffffff); + GenTree* bitMask = nullptr; + + if (baseType == TYP_FLOAT) + { + static_assert_no_msg(sizeof(float) == sizeof(int)); + int mask = 0x7fffffff; + bitMask = gtNewDconNode(*((float*)&mask), TYP_FLOAT); + } + else + { + assert(baseType == TYP_DOUBLE); + static_assert_no_msg(sizeof(double) == sizeof(__int64)); + + __int64 mask = 0x7fffffffffffffffLL; + bitMask = gtNewDconNode(*((double*)&mask), TYP_DOUBLE); + } + assert(bitMask != nullptr); + + bitMask = gtNewSIMDNode(retType, bitMask, SIMDIntrinsicInit, baseType, simdSize); + + intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseAnd : NI_VectorT128_op_BitwiseAnd; + intrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + + return gtNewSimdAsHWIntrinsicNode(retType, op1, bitMask, intrinsic, baseType, simdSize); + } + else if (varTypeIsUnsigned(baseType)) + { + return op1; + } + else if ((baseType != TYP_LONG) && compOpportunisticallyDependsOn(InstructionSet_SSSE3)) + { + return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_SSSE3_Abs, baseType, simdSize); + } + else + { + GenTree* tmp; + NamedIntrinsic hwIntrinsic; + + GenTree* op1Dup1; + op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.Abs")); + + GenTree* op1Dup2; + op1Dup1 = impCloneExpr(op1Dup1, &op1Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.Abs")); + + // op1 = op1 < Zero + tmp = gtNewSIMDVectorZero(retType, baseType, simdSize); + hwIntrinsic = isVectorT256 ? NI_VectorT256_LessThan : NI_VectorT128_LessThan; + op1 = impSimdAsHWIntrinsicRelOp(hwIntrinsic, clsHnd, retType, baseType, simdSize, op1, tmp); + + // tmp = Zero - op1Dup1 + tmp = gtNewSIMDVectorZero(retType, baseType, simdSize); + hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract; + tmp = gtNewSimdAsHWIntrinsicNode(retType, tmp, op1Dup1, hwIntrinsic, baseType, simdSize); + + // result = ConditionalSelect(op1, tmp, op1Dup2) + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, tmp, op1Dup2); + } + break; + } +#elif defined(TARGET_ARM64) + case NI_VectorT128_Abs: + { + assert(varTypeIsUnsigned(baseType)); + return op1; + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + break; + } + + case 2: + { + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { +#if defined(TARGET_XARCH) + case NI_Vector2_op_Division: + case NI_Vector3_op_Division: + { + // Vector2/3 div: since the top-most elements will be zero, we end up + // perfoming 0/0 which is a NAN. Therefore, post division we need to set the + // top-most elements to zero. This is achieved by left logical shift followed + // by right logical shift of the result. + + // These are 16 byte operations, so we subtract from 16 bytes, not the vector register length. + unsigned shiftCount = 16 - simdSize; + assert((shiftCount > 0) && (shiftCount <= 16)); + + // retNode = Sse.Divide(op1, op2); + GenTree* retNode = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, NI_SSE_Divide, baseType, simdSize); + + // retNode = Sse.ShiftLeftLogical128BitLane(retNode.AsInt32(), shiftCount).AsSingle() + retNode = gtNewSimdAsHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT), + NI_SSE2_ShiftLeftLogical128BitLane, TYP_INT, simdSize); + + // retNode = Sse.ShiftRightLogical128BitLane(retNode.AsInt32(), shiftCount).AsSingle() + retNode = gtNewSimdAsHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, TYP_INT, simdSize); + + return retNode; + } + + case NI_VectorT128_Equals: + case NI_VectorT128_GreaterThan: + case NI_VectorT128_GreaterThanOrEqual: + case NI_VectorT128_LessThan: + case NI_VectorT128_LessThanOrEqual: + case NI_VectorT256_GreaterThan: + case NI_VectorT256_GreaterThanOrEqual: + case NI_VectorT256_LessThan: + case NI_VectorT256_LessThanOrEqual: + { + return impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); + } + + case NI_VectorT128_Max: + case NI_VectorT128_Min: + case NI_VectorT256_Max: + case NI_VectorT256_Min: + { + if ((baseType == TYP_BYTE) || (baseType == TYP_USHORT)) + { + GenTree* constVal = nullptr; + var_types opType = baseType; + + NamedIntrinsic opIntrinsic; + NamedIntrinsic hwIntrinsic; + + switch (baseType) + { + case TYP_BYTE: + { + constVal = gtNewIconNode(0x80808080, TYP_INT); + opIntrinsic = NI_VectorT128_op_Subtraction; + baseType = TYP_UBYTE; + break; + } + + case TYP_USHORT: + { + constVal = gtNewIconNode(0x80008000, TYP_INT); + opIntrinsic = NI_VectorT128_op_Addition; + baseType = TYP_SHORT; + break; + } + + default: + { + unreached(); + } + } + + GenTree* constVector = + gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, TYP_INT, simdSize); + + GenTree* constVectorDup1; + constVector = impCloneExpr(constVector, &constVectorDup1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone constVector for Vector.Max/Min")); + + GenTree* constVectorDup2; + constVectorDup1 = + impCloneExpr(constVectorDup1, &constVectorDup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone constVector for Vector.Max/Min")); + + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType); + + // op1 = op1 - constVector + // -or- + // op1 = op1 + constVector + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, opType, simdSize); + + // op2 = op2 - constVectorDup1 + // -or- + // op2 = op2 + constVectorDup1 + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup1, hwIntrinsic, opType, simdSize); + + // op1 = Max(op1, op2) + // -or- + // op1 = Min(op1, op2) + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + + // result = op1 + constVectorDup2 + // -or- + // result = op1 - constVectorDup2 + opIntrinsic = (opIntrinsic == NI_VectorT128_op_Subtraction) ? NI_VectorT128_op_Addition + : NI_VectorT128_op_Subtraction; + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType); + return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup2, hwIntrinsic, opType, simdSize); + } + + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.Max/Min")); + + GenTree* op2Dup; + op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op2 for Vector.Max/Min")); + + if ((intrinsic == NI_VectorT128_Max) || (intrinsic == NI_VectorT256_Max)) + { + intrinsic = isVectorT256 ? NI_VectorT256_GreaterThan : NI_VectorT128_GreaterThan; + } + else + { + intrinsic = isVectorT256 ? NI_VectorT256_LessThan : NI_VectorT128_LessThan; + } + + // op1 = op1 > op2 + // -or- + // op1 = op1 < op2 + op1 = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); + + // result = ConditionalSelect(op1, op1Dup, op2Dup) + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup); + } + + case NI_VectorT128_op_Multiply: + { + assert(baseType == TYP_INT); + + NamedIntrinsic hwIntrinsic = NI_Illegal; + + if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + hwIntrinsic = NI_SSE41_MultiplyLow; + } + else + { + // op1Dup = op1 + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.Multiply")); + + // op2Dup = op2 + GenTree* op2Dup; + op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op2 for Vector.Multiply")); + + // op1 = Sse2.ShiftRightLogical128BitLane(op1, 4) + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(4, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize); + + // op2 = Sse2.ShiftRightLogical128BitLane(op1, 4) + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(4, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize); + + // op2 = Sse2.Multiply(op2.AsUInt64(), op1.AsUInt64()).AsInt32() + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG, simdSize); + + // op2 = Sse2.Shuffle(op2, (0, 0, 2, 0)) + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), + NI_SSE2_Shuffle, baseType, simdSize); + + // op1 = Sse2.Multiply(op1Dup.AsUInt64(), op2Dup.AsUInt64()).AsInt32() + op1 = + gtNewSimdAsHWIntrinsicNode(retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG, simdSize); + + // op1 = Sse2.Shuffle(op1, (0, 0, 2, 0)) + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), + NI_SSE2_Shuffle, baseType, simdSize); + + // result = Sse2.UnpackLow(op1, op2) + hwIntrinsic = NI_SSE2_UnpackLow; + } + assert(hwIntrinsic != NI_Illegal); + + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } +#elif defined(TARGET_ARM64) + case NI_VectorT128_Max: + case NI_VectorT128_Min: + { + assert((baseType == TYP_LONG) || (baseType == TYP_ULONG)); + + NamedIntrinsic hwIntrinsic; + + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.Max/Min")); + + GenTree* op2Dup; + op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op2 for Vector.Max/Min")); + + intrinsic = (intrinsic == NI_VectorT128_Max) ? NI_VectorT128_GreaterThan : NI_VectorT128_LessThan; + + // op1 = op1 > op2 + // -or- + // op1 = op1 < op2 + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + + // result = ConditionalSelect(op1, op1Dup, op2Dup) + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup); + } +#else +#error Unsupported platform +#endif // TARGET_XARCH + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + break; + } + } + + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; +} + +//------------------------------------------------------------------------ +// impSimdAsHWIntrinsicCndSel: Import a SIMD conditional select intrinsic +// +// Arguments: +// clsHnd -- class handle containing the intrinsic function. +// retType -- the return type of the intrinsic call +// baseType -- the base type of SIMD type of the intrinsic +// simdSize -- the size of the SIMD type of the intrinsic +// op1 -- the first operand of the intrinsic +// op2 -- the second operand of the intrinsic +// op3 -- the third operand of the intrinsic +// +// Return Value: +// The GT_HWINTRINSIC node representing the conditional select +// +GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, + var_types retType, + var_types baseType, + unsigned simdSize, + GenTree* op1, + GenTree* op2, + GenTree* op3) +{ + assert(featureSIMD); + assert(retType != TYP_UNKNOWN); + assert(varTypeIsIntegral(baseType)); + assert(simdSize != 0); + assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); + assert(op1 != nullptr); + assert(op2 != nullptr); + assert(op3 != nullptr); + +#if defined(TARGET_XARCH) + bool isVectorT256 = (simdSize == 32); + + // Vector for the rel-ops covered here requires at least SSE2 + assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + + // Vector, when 32-bytes, requires at least AVX2 + assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); + + if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + NamedIntrinsic hwIntrinsic = NI_SSE41_BlendVariable; + + if (isVectorT256) + { + hwIntrinsic = varTypeIsIntegral(baseType) ? NI_AVX2_BlendVariable : NI_AVX_BlendVariable; + } + + return gtNewSimdAsHWIntrinsicNode(retType, op3, op2, op1, hwIntrinsic, baseType, simdSize); + } +#endif // TARGET_XARCH + + NamedIntrinsic hwIntrinsic; + + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.ConditionalSelect")); + + // op2 = op2 & op1 + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, op1, hwIntrinsic, baseType, simdSize); + + // op3 = op3 & ~op1Dup + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_AndNot, baseType); + + if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(NI_VectorT128_AndNot)) + { + std::swap(op3, op1Dup); + } + + op3 = gtNewSimdAsHWIntrinsicNode(retType, op3, op1Dup, hwIntrinsic, baseType, simdSize); + + // result = op2 | op3 + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType); + return gtNewSimdAsHWIntrinsicNode(retType, op2, op3, hwIntrinsic, baseType, simdSize); +} + +#if defined(TARGET_XARCH) +//------------------------------------------------------------------------ +// impSimdAsHWIntrinsicRelOp: Import a SIMD relational operator intrinsic +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// clsHnd -- class handle containing the intrinsic function. +// retType -- the return type of the intrinsic call +// baseType -- the base type of SIMD type of the intrinsic +// simdSize -- the size of the SIMD type of the intrinsic +// op1 -- the first operand of the intrinsic +// op2 -- the second operand of the intrinsic +// +// Return Value: +// The GT_HWINTRINSIC node representing the relational operator +// +GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + var_types retType, + var_types baseType, + unsigned simdSize, + GenTree* op1, + GenTree* op2) +{ + assert(featureSIMD); + assert(retType != TYP_UNKNOWN); + assert(varTypeIsIntegral(baseType)); + assert(simdSize != 0); + assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); + assert(op1 != nullptr); + assert(op2 != nullptr); + assert(!SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic)); + + bool isVectorT256 = (SimdAsHWIntrinsicInfo::lookupClassId(intrinsic) == SimdAsHWIntrinsicClassId::VectorT256); + + // Vector for the rel-ops covered here requires at least SSE2 + assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + + // Vector, when 32-bytes, requires at least AVX2 + assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); + + switch (intrinsic) + { + case NI_VectorT128_Equals: + case NI_VectorT256_Equals: + { + // These ones aren't "special", but they are used by the other + // relational operators and so are defined for convenience. + + NamedIntrinsic hwIntrinsic = NI_Illegal; + + if (isVectorT256 || ((baseType != TYP_LONG) && (baseType != TYP_ULONG))) + { + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + assert(hwIntrinsic != intrinsic); + } + else if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + hwIntrinsic = NI_SSE41_CompareEqual; + } + else + { + // There is no direct SSE2 support for comparing TYP_LONG vectors. + // These have to be implemented in terms of TYP_INT vector comparison operations. + // + // tmp = (op1 == op2) i.e. compare for equality as if op1 and op2 are Vector + // op1 = tmp + // op2 = Shuffle(tmp, (2, 3, 0, 1)) + // result = BitwiseAnd(op1, op2) + // + // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of + // respective long elements. + + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, TYP_INT); + assert(hwIntrinsic != intrinsic); + + GenTree* tmp = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, TYP_INT, simdSize); + + tmp = impCloneExpr(tmp, &op1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone tmp for Vector.Equals")); + + op2 = gtNewSimdAsHWIntrinsicNode(retType, tmp, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); + + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); + assert(hwIntrinsic != NI_VectorT128_op_BitwiseAnd); + } + assert(hwIntrinsic != NI_Illegal); + + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + + case NI_VectorT128_GreaterThanOrEqual: + case NI_VectorT128_LessThanOrEqual: + case NI_VectorT256_GreaterThanOrEqual: + case NI_VectorT256_LessThanOrEqual: + { + // There is no direct support for doing a combined comparison and equality for integral types. + // These have to be implemented by performing both halves and combining their results. + // + // op1Dup = op1 + // op2Dup = op2 + // + // op1 = GreaterThan(op1, op2) + // op2 = Equals(op1Dup, op2Dup) + // + // result = BitwiseOr(op1, op2) + // + // Where the GreaterThan(op1, op2) comparison could also be LessThan(op1, op2) + + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.GreaterThanOrEqual/LessThanOrEqual")); + + GenTree* op2Dup; + op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op2 for Vector.GreaterThanOrEqual/LessThanOrEqual")); + + NamedIntrinsic eqIntrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals; + + switch (intrinsic) + { + case NI_VectorT128_GreaterThanOrEqual: + { + intrinsic = NI_VectorT128_GreaterThan; + break; + } + + case NI_VectorT128_LessThanOrEqual: + { + intrinsic = NI_VectorT128_LessThan; + break; + } + + case NI_VectorT256_GreaterThanOrEqual: + { + intrinsic = NI_VectorT256_GreaterThan; + break; + } + + case NI_VectorT256_LessThanOrEqual: + { + intrinsic = NI_VectorT256_LessThan; + break; + } + + default: + { + unreached(); + } + } + + op1 = impSimdAsHWIntrinsicRelOp(eqIntrinsic, clsHnd, retType, baseType, simdSize, op1, op2); + op2 = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1Dup, op2Dup); + intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr; + + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + + case NI_VectorT128_GreaterThan: + case NI_VectorT128_LessThan: + case NI_VectorT256_GreaterThan: + case NI_VectorT256_LessThan: + { + NamedIntrinsic hwIntrinsic = NI_Illegal; + + if (varTypeIsUnsigned(baseType)) + { + // Vector, Vector, Vector and Vector: + // Hardware supports > for signed comparison. Therefore, to use it for + // comparing unsigned numbers, we subtract a constant from both the + // operands such that the result fits within the corresponding signed + // type. The resulting signed numbers are compared using signed comparison. + // + // Vector: constant to be subtracted is 2^7 + // Vector constant to be subtracted is 2^15 + // Vector constant to be subtracted is 2^31 + // Vector constant to be subtracted is 2^63 + // + // We need to treat op1 and op2 as signed for comparison purpose after + // the transformation. + + GenTree* constVal = nullptr; + var_types opType = baseType; + + switch (baseType) + { + case TYP_UBYTE: + { + constVal = gtNewIconNode(0x80808080, TYP_INT); + baseType = TYP_BYTE; + break; + } + + case TYP_USHORT: + { + constVal = gtNewIconNode(0x80008000, TYP_INT); + baseType = TYP_SHORT; + break; + } + + case TYP_UINT: + { + constVal = gtNewIconNode(0x80000000, TYP_INT); + baseType = TYP_INT; + break; + } + + case TYP_ULONG: + { + constVal = gtNewLconNode(0x8000000000000000); + baseType = TYP_LONG; + break; + } + + default: + { + unreached(); + } + } + + GenTree* constVector = + gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, constVal->TypeGet(), simdSize); + + GenTree* constVectorDup; + constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone constVector for Vector.GreaterThan/LessThan")); + + NamedIntrinsic hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract; + + // op1 = op1 - constVector + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, opType, simdSize); + + // op2 = op2 - constVector + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, opType, simdSize); + } + + // This should have been mutated by the above path + assert(varTypeIsIntegral(baseType) && !varTypeIsUnsigned(baseType)); + + if (isVectorT256 || (baseType != TYP_LONG)) + { + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + assert(hwIntrinsic != intrinsic); + } + else if (compOpportunisticallyDependsOn(InstructionSet_SSE42)) + { + hwIntrinsic = + (intrinsic == NI_VectorT128_GreaterThan) ? NI_SSE42_CompareGreaterThan : NI_SSE42_CompareLessThan; + } + else + { + // There is no direct SSE2 support for comparing TYP_LONG vectors. + // These have to be implemented in terms of TYP_INT vector comparison operations. + // + // Let us consider the case of single long element comparison. + // Say op1 = (x1, y1) and op2 = (x2, y2) where x1, y1, x2, and y2 are 32-bit integers that comprise the + // longs op1 and op2. + // + // GreaterThan(op1, op2) can be expressed in terms of > relationship between 32-bit integers that + // comprise op1 and op2 as + // = (x1, y1) > (x2, y2) + // = (x1 > x2) || [(x1 == x2) && (y1 > y2)] - eq (1) + // + // op1Dup1 = op1 + // op1Dup2 = op1Dup1 + // op2Dup1 = op2 + // op2Dup2 = op2Dup1 + // + // t = (op1 > op2) - 32-bit signed comparison + // u = (op1Dup1 == op2Dup1) - 32-bit equality comparison + // v = (op1Dup2 > op2Dup2) - 32-bit unsigned comparison + // + // op1 = Shuffle(t, (3, 3, 1, 1)) - This corresponds to (x1 > x2) in eq(1) above + // v = Shuffle(v, (2, 2, 0, 0)) - This corresponds to (y1 > y2) in eq(1) above + // u = Shuffle(u, (3, 3, 1, 1)) - This corresponds to (x1 == x2) in eq(1) above + // op2 = BitwiseAnd(v, u) - This corresponds to [(x1 == x2) && (y1 > y2)] in eq(1) above + // + // result = BitwiseOr(op1, op2) + + GenTree* op1Dup1; + op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.GreaterThan/LessThan")); + + GenTree* op1Dup2; + op1Dup1 = impCloneExpr(op1Dup1, &op1Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for Vector.GreaterThan/LessThan")); + + GenTree* op2Dup1; + op2 = impCloneExpr(op2, &op2Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op2 for Vector.GreaterThan/LessThan")); + + GenTree* op2Dup2; + op2Dup1 = impCloneExpr(op2Dup1, &op2Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op2 Vector.GreaterThan/LessThan")); + + GenTree* t = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_INT, simdSize, op1, op2); + GenTree* u = impSimdAsHWIntrinsicRelOp(NI_VectorT128_Equals, clsHnd, retType, TYP_INT, simdSize, + op1Dup1, op2Dup1); + GenTree* v = + impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_UINT, simdSize, op1Dup2, op2Dup2); + + op1 = gtNewSimdAsHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); + + v = gtNewSimdAsHWIntrinsicNode(retType, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); + u = gtNewSimdAsHWIntrinsicNode(retType, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); + + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); + op2 = gtNewSimdAsHWIntrinsicNode(retType, v, u, hwIntrinsic, baseType, simdSize); + + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType); + } + assert(hwIntrinsic != NI_Illegal); + + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + + default: + { + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; + } + } +} +#endif // TARGET_XARCH + +#endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdashwintrinsic.h b/src/coreclr/src/jit/simdashwintrinsic.h new file mode 100644 index 0000000000000..e5d951e38703d --- /dev/null +++ b/src/coreclr/src/jit/simdashwintrinsic.h @@ -0,0 +1,130 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#ifndef _SIMD_AS_HWINTRINSIC_H_ +#define _SIMD_AS_HWINTRINSIC_H_ + +enum class SimdAsHWIntrinsicClassId +{ + Unknown, + Vector2, + Vector3, + Vector4, + VectorT128, + VectorT256, +}; + +enum class SimdAsHWIntrinsicFlag : unsigned int +{ + None = 0, + + // Indicates compFloatingPointUsed does not need to be set. + NoFloatingPointUsed = 0x1, + + // Indicates the intrinsic is for an instance method. + InstanceMethod = 0x02, + + // Indicates the operands should be swapped in importation. + NeedsOperandsSwapped = 0x04, +}; + +inline SimdAsHWIntrinsicFlag operator~(SimdAsHWIntrinsicFlag value) +{ + return static_cast(~static_cast(value)); +} + +inline SimdAsHWIntrinsicFlag operator|(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +inline SimdAsHWIntrinsicFlag operator&(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) +{ + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + +inline SimdAsHWIntrinsicFlag operator^(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) +{ + return static_cast(static_cast(lhs) ^ static_cast(rhs)); +} + +struct SimdAsHWIntrinsicInfo +{ + NamedIntrinsic id; + const char* name; + SimdAsHWIntrinsicClassId classId; + int numArgs; + NamedIntrinsic hwIntrinsic[10]; + SimdAsHWIntrinsicFlag flags; + + static const SimdAsHWIntrinsicInfo& lookup(NamedIntrinsic id); + + static NamedIntrinsic lookupId(CORINFO_SIG_INFO* sig, + const char* className, + const char* methodName, + const char* enclosingClassName, + int sizeOfVectorT); + static SimdAsHWIntrinsicClassId lookupClassId(const char* className, + const char* enclosingClassName, + int sizeOfVectorT); + + // Member lookup + + static NamedIntrinsic lookupId(NamedIntrinsic id) + { + return lookup(id).id; + } + + static const char* lookupName(NamedIntrinsic id) + { + return lookup(id).name; + } + + static SimdAsHWIntrinsicClassId lookupClassId(NamedIntrinsic id) + { + return lookup(id).classId; + } + + static int lookupNumArgs(NamedIntrinsic id) + { + return lookup(id).numArgs; + } + + static NamedIntrinsic lookupHWIntrinsic(NamedIntrinsic id, var_types type) + { + if ((type < TYP_BYTE) || (type > TYP_DOUBLE)) + { + assert(!"Unexpected type"); + return NI_Illegal; + } + return lookup(id).hwIntrinsic[type - TYP_BYTE]; + } + + static SimdAsHWIntrinsicFlag lookupFlags(NamedIntrinsic id) + { + return lookup(id).flags; + } + + // Flags lookup + + static bool IsFloatingPointUsed(NamedIntrinsic id) + { + SimdAsHWIntrinsicFlag flags = lookupFlags(id); + return (flags & SimdAsHWIntrinsicFlag::NoFloatingPointUsed) == SimdAsHWIntrinsicFlag::None; + } + + static bool IsInstanceMethod(NamedIntrinsic id) + { + SimdAsHWIntrinsicFlag flags = lookupFlags(id); + return (flags & SimdAsHWIntrinsicFlag::InstanceMethod) == SimdAsHWIntrinsicFlag::InstanceMethod; + } + + static bool NeedsOperandsSwapped(NamedIntrinsic id) + { + SimdAsHWIntrinsicFlag flags = lookupFlags(id); + return (flags & SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) == SimdAsHWIntrinsicFlag::NeedsOperandsSwapped; + } +}; + +#endif // _SIMD_AS_HWINTRINSIC_H_ diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h new file mode 100644 index 0000000000000..cfd47939cf3dc --- /dev/null +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -0,0 +1,89 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*****************************************************************************/ +#ifndef SIMD_AS_HWINTRINSIC +#error Define SIMD_AS_HWINTRINSIC before including this file +#endif +/*****************************************************************************/ + +// clang-format off + +#ifdef FEATURE_HW_INTRINSICS + +/* Note + * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` + * Each intrinsic has a `NumArg` for number of parameters + * Each intrinsic has 10 `NamedIntrinsic` fields that list the HWIntrinsic that should be generated based-on the base type + * NI_Illegal is used to represent an unsupported type + * Using the same Intrinsic ID as the represented entry is used to indicate special handling is required + * Each intrinsic has one or more flags with type of `enum SimdAsHWIntrinsicFlag` +*/ + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector2 Intrinsics +SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector3 Intrinsics +SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector4 Intrinsics +SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Max, 2, {NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_AdvSimd_Max, NI_AdvSimd_Arm64_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Min, 2, {NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_AdvSimd_Min, NI_AdvSimd_Arm64_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Arm64_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_AdvSimd_Arm64_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) + +#endif // FEATURE_HW_INTRINSICS + +#undef SIMD_AS_HWINTRINSIC + +// clang-format on diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h new file mode 100644 index 0000000000000..8f2ac6264041c --- /dev/null +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -0,0 +1,111 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*****************************************************************************/ +#ifndef SIMD_AS_HWINTRINSIC +#error Define SIMD_AS_HWINTRINSIC before including this file +#endif +/*****************************************************************************/ + +// clang-format off + +#ifdef FEATURE_HW_INTRINSICS + +/* Note + * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` + * Each intrinsic has a `NumArg` for number of parameters + * Each intrinsic has 10 `NamedIntrinsic` fields that list the HWIntrinsic that should be generated based-on the base type + * NI_Illegal is used to represent an unsupported type + * Using the same Intrinsic ID as the represented entry is used to indicate special handling is required + * Each intrinsic has one or more flags with type of `enum SimdAsHWIntrinsicFlag` +*/ + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector2 Intrinsics +SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector3 Intrinsics +SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector4 Intrinsics +SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Max, 2, {NI_VectorT128_Max, NI_SSE2_Max, NI_SSE2_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_SSE_Max, NI_SSE2_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Min, 2, {NI_VectorT128_Min, NI_SSE2_Min, NI_SSE2_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_SSE_Min, NI_SSE2_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE_Add, NI_SSE2_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT256, Abs, 1, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, LessThanOrEqual, 2, {NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, Max, 2, {NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_AVX_Max, NI_AVX_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, Min, 2, {NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_AVX_Min, NI_AVX_Min}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Addition, 2, {NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX_Add, NI_AVX_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Divide, NI_AVX_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) + +#endif // FEATURE_HW_INTRINSICS + +#undef SIMD_AS_HWINTRINSIC + +// clang-format on diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs index b2d58347904e5..27d3469bfb92c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs @@ -211,7 +211,6 @@ public static Vector2 SquareRoot(Vector2 value) /// The scalar value. /// The source vector. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector2 operator *(float left, Vector2 right) { @@ -224,7 +223,6 @@ public static Vector2 SquareRoot(Vector2 value) /// The source vector. /// The scalar value. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector2 operator *(Vector2 left, float right) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs index c41baa46aa8f7..df32e8331d70c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs @@ -230,7 +230,6 @@ public static Vector3 SquareRoot(Vector3 value) /// The source vector. /// The scalar value. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector3 operator *(Vector3 left, float right) { @@ -243,7 +242,6 @@ public static Vector3 SquareRoot(Vector3 value) /// The scalar value. /// The source vector. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector3 operator *(float left, Vector3 right) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs index 440c78882d44a..70d692457e1a5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs @@ -265,7 +265,6 @@ public static Vector4 SquareRoot(Vector4 value) /// The source vector. /// The scalar value. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 operator *(Vector4 left, float right) { @@ -278,7 +277,6 @@ public static Vector4 SquareRoot(Vector4 value) /// The scalar value. /// The source vector. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 operator *(float left, Vector4 right) {