dotnet · tannergooding · Apr 5, 2024 · Oct 24, 2023 · Oct 25, 2023 · Oct 27, 2023
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
@@ -55,11 +55,11 @@
     JITHELPER(CORINFO_HELP_ULMOD,               JIT_ULMod,          CORINFO_HELP_SIG_16_STACK)
     JITHELPER(CORINFO_HELP_LNG2DBL,             JIT_Lng2Dbl,        CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_ULNG2DBL,            JIT_ULng2Dbl,       CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2INT,      JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBL2INT,             JIT_Dbl2Int,        CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2INT_OVF,         JIT_Dbl2IntOvf,     CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2LNG,      JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBL2LNG,             JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2LNG_OVF,         JIT_Dbl2LngOvf,     CORINFO_HELP_SIG_8_STACK)
-    DYNAMICJITHELPER(CORINFO_HELP_DBL2UINT,     JIT_Dbl2Lng,        CORINFO_HELP_SIG_8_STACK)
+    JITHELPER(CORINFO_HELP_DBL2UINT,            JIT_Dbl2UInt,       CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2UINT_OVF,        JIT_Dbl2UIntOvf,    CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2ULNG,            JIT_Dbl2ULng,       CORINFO_HELP_SIG_8_STACK)
     JITHELPER(CORINFO_HELP_DBL2ULNG_OVF,        JIT_Dbl2ULngOvf,    CORINFO_HELP_SIG_8_STACK)

diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
@@ -7604,21 +7604,24 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
     noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
 
     // We shouldn't be seeing uint64 here as it should have been converted
-    // into a helper call by either front-end or lowering phase.
-    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
+    // into a helper call by either front-end or lowering phase, unless we have AVX512F
+    // accelerated conversions.
+    assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
+           compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
 
     // If the dstType is TYP_UINT, we have 32-bits to encode the
     // float number. Any of 33rd or above bits can be the sign bit.
     // To achieve it we pretend as if we are converting it to a long.
-    if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
+    if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) &&
+        !compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
     {
         dstType = TYP_LONG;
     }
 
     // Note that we need to specify dstType here so that it will determine
     // the size of destination integer register and also the rex.w prefix.
     genConsumeOperands(treeNode->AsOp());
-    instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType));
+    instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
     GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
     genProduceReg(treeNode);
 }

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
@@ -3200,6 +3200,14 @@ class Compiler
                                  CorInfoType simdBaseJitType,
                                  unsigned    simdSize);
 
+#if defined(TARGET_XARCH)
+    GenTree* gtNewSimdCvtNode(var_types              type,
+                              GenTree*               op1,
+                              CorInfoType            simdTargetBaseJitType,
+                              CorInfoType            simdSourceBaseJitType,
+                              unsigned               simdSize);
+#endif //TARGET_XARCH
+
     GenTree* gtNewSimdCreateBroadcastNode(
         var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
 

diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h
@@ -4000,7 +4000,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
         case INS_comiss:
         case INS_cvtss2sd:
         case INS_cvtss2si:
-        case INS_cvttss2si:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_divss:
         case INS_extractps:
         case INS_insertps:
@@ -4043,7 +4044,8 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
         case INS_comisd:
         case INS_cvtsd2si:
         case INS_cvtsd2ss:
-        case INS_cvttsd2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
         case INS_divsd:
         case INS_maxsd:
         case INS_minsd:

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
@@ -1522,9 +1522,11 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
         switch (ins)
         {
             case INS_cvtss2si:
-            case INS_cvttss2si:
+            case INS_cvttss2si32:
+            case INS_cvttss2si64:
             case INS_cvtsd2si:
-            case INS_cvttsd2si:
+            case INS_cvttsd2si32:
+            case INS_cvttsd2si64:
             case INS_movd:
             case INS_movnti:
             case INS_andn:
@@ -1544,7 +1546,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
 #endif // TARGET_AMD64
             case INS_vcvtsd2usi:
             case INS_vcvtss2usi:
-            case INS_vcvttsd2usi:
             {
                 if (attr == EA_8BYTE)
                 {
@@ -2723,8 +2724,10 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
         case INS_blsmsk:
         case INS_blsr:
         case INS_bzhi:
-        case INS_cvttsd2si:
-        case INS_cvttss2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_cvtsd2si:
         case INS_cvtss2si:
         case INS_extractps:
@@ -2748,7 +2751,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
 #endif
         case INS_vcvtsd2usi:
         case INS_vcvtss2usi:
-        case INS_vcvttsd2usi:
+        case INS_vcvttsd2usi32:
+        case INS_vcvttsd2usi64:
         case INS_vcvttss2usi32:
         case INS_vcvttss2usi64:
         {
@@ -11605,22 +11609,20 @@ void emitter::emitDispIns(
                     break;
                 }
 
-                case INS_cvttsd2si:
+                case INS_cvttsd2si32:
+                case INS_cvttsd2si64:
                 case INS_cvtss2si:
                 case INS_cvtsd2si:
-                case INS_cvttss2si:
+                case INS_cvttss2si32:
+                case INS_cvttss2si64:
                 case INS_vcvtsd2usi:
                 case INS_vcvtss2usi:
-                case INS_vcvttsd2usi:
-                {
-                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
-                    break;
-                }
-
+                case INS_vcvttsd2usi32:
+                case INS_vcvttsd2usi64:
                 case INS_vcvttss2usi32:
                 case INS_vcvttss2usi64:
                 {
-                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE));
+                    printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
                     break;
                 }
 
@@ -19050,7 +19052,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             break;
         }
 
-        case INS_cvttsd2si:
+        case INS_cvttsd2si32:
+        case INS_cvttsd2si64:
         case INS_cvtsd2si:
         case INS_cvtsi2sd32:
         case INS_cvtsi2ss32:
@@ -19059,7 +19062,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_vcvtsd2usi:
         case INS_vcvtusi2ss32:
         case INS_vcvtusi2ss64:
-        case INS_vcvttsd2usi:
+        case INS_vcvttsd2usi32:
+        case INS_vcvttsd2usi64:
         case INS_vcvttss2usi32:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;
             result.insLatency += PERFSCORE_LATENCY_7C;
@@ -19071,7 +19075,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             result.insLatency += PERFSCORE_LATENCY_5C;
             break;
 
-        case INS_cvttss2si:
+        case INS_cvttss2si32:
+        case INS_cvttss2si64:
         case INS_cvtss2si:
         case INS_vcvtss2usi:
             result.insThroughput = PERFSCORE_THROUGHPUT_1C;