Changing the way helper functions are handled in morph

fixing debug checks hitting asserts for TYP_ULONG and TYP_UINT at IR level
dotnet · tannergooding · Apr 5, 2024 · Oct 24, 2023 · Oct 25, 2023 · Oct 27, 2023
commit 9c4edd5f9dc3985ea8434ce5a5119e206a2b4fdd
diff --git a/difs/doubleToLong-avx512-base.asm b/difs/doubleToLong-avx512-base.asm
@@ -0,0 +1,18 @@
+; Assembly listing for method Program:DoubleToLong(double):long (FullOpts)
+; Emitting BLENDED_CODE for X64 with AVX512 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       C5F877               vzeroupper 
+
+G_M000_IG02:                ;; offset=0x0003
+       C4E1FB2CC0           vcvttsd2si  rax, xmm0
+
+G_M000_IG03:                ;; offset=0x0008
+       C3                   ret      
+
+; Total bytes of code 9
diff --git a/difs/doubleToLong-avx512-base.txt b/difs/doubleToLong-avx512-base.txt
@@ -0,0 +1,18 @@
+; Assembly listing for method Program:DoubleToLong(double):long (FullOpts)
+; Emitting BLENDED_CODE for X64 with AVX512 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       C5F877               vzeroupper 
+
+G_M000_IG02:                ;; offset=0x0003
+       C4E1FB2CC0           vcvttsd2si  rax, xmm0
+
+G_M000_IG03:                ;; offset=0x0008
+       C3                   ret      
+
+; Total bytes of code 9
diff --git a/difs/doubleToLong-avx512-diff.asm b/difs/doubleToLong-avx512-diff.asm
@@ -0,0 +1,28 @@
+; Assembly listing for method Program:DoubleToLong(double):long (FullOpts)
+; Emitting BLENDED_CODE for X64 with AVX512 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       C5F877               vzeroupper 
+
+G_M000_IG02:                ;; offset=0x0003
+       62F3FD0855053200000000 vfixupimmsd xmm0, xmm0, xmmword ptr [reloc @RWD00], 0
+       C5F9C20D390000000D   vcmppd   xmm1, xmm0, xmmword ptr [reloc @RWD16], 13
+       C5F8101541000000     vmovups  xmm2, xmmword ptr [reloc @RWD32]
+       C4E1FB2CC0           vcvttsd2si  rax, xmm0
+       62F2FD087CC0         vpbroadcastq  xmm0, rax
+       62F3ED0825C8CA       vpternlogq xmm1, xmm2, xmm0, -54
+       C4E1F97EC8           vmovd    rax, xmm1
+
+G_M000_IG03:                ;; offset=0x0036
+       C3                   ret      
+
+RWD00  	dq	0000000000000088h, 0000000000000000h
+RWD16  	dq	43E0000000000000h, 43E0000000000000h
+RWD32  	dq	7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
+
+; Total bytes of code 55
diff --git a/difs/doubleToLong-non-avx512-base.asm b/difs/doubleToLong-non-avx512-base.asm
@@ -0,0 +1,17 @@
+; Assembly listing for method Program:DoubleToLong(double):long (FullOpts)
+; Emitting BLENDED_CODE for generic X64 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+
+G_M000_IG02:                ;; offset=0x0000
+       F2480F2CC0           cvttsd2si  rax, xmm0
+
+G_M000_IG03:                ;; offset=0x0005
+       C3                   ret      
+
+; Total bytes of code 6
diff --git a/difs/doubleToLong-non-avx512-diff.asm b/difs/doubleToLong-non-avx512-diff.asm
@@ -0,0 +1,20 @@
+; Assembly listing for method Program:DoubleToLong(double):long (FullOpts)
+; Emitting BLENDED_CODE for generic X64 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       4883EC28             sub      rsp, 40
+
+G_M000_IG02:                ;; offset=0x0004
+       E8E771C95F           call     CORINFO_HELP_DBL2LNG
+       90                   nop      
+
+G_M000_IG03:                ;; offset=0x000A
+       4883C428             add      rsp, 40
+       C3                   ret      
+
+; Total bytes of code 15
diff --git a/difs/doubleToLong128-avx512-base.asm b/difs/doubleToLong128-avx512-base.asm
@@ -0,0 +1,35 @@
+; Assembly listing for method Program:DoubleToLong128(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[long] (FullOpts)
+; Emitting BLENDED_CODE for X64 with AVX512 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+; 0 inlinees with PGO data; 8 single block inlinees; 4 inlinees without PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       4883EC38             sub      rsp, 56
+       C5F877               vzeroupper 
+
+G_M000_IG02:                ;; offset=0x0007
+       488B02               mov      rax, qword ptr [rdx]
+       4889442428           mov      qword ptr [rsp+0x28], rax
+       C4E1FB2C442428       vcvttsd2si rax, qword ptr [rsp+0x28]
+       4889442430           mov      qword ptr [rsp+0x30], rax
+       488B442430           mov      rax, qword ptr [rsp+0x30]
+       488B5208             mov      rdx, qword ptr [rdx+0x08]
+       4889542418           mov      qword ptr [rsp+0x18], rdx
+       C4E1FB2C542418       vcvttsd2si rdx, qword ptr [rsp+0x18]
+       4889542420           mov      qword ptr [rsp+0x20], rdx
+       488B542420           mov      rdx, qword ptr [rsp+0x20]
+       48890424             mov      qword ptr [rsp], rax
+       4889542408           mov      qword ptr [rsp+0x08], rdx
+       C5F8280424           vmovaps  xmm0, xmmword ptr [rsp]
+       C5F81101             vmovups  xmmword ptr [rcx], xmm0
+       488BC1               mov      rax, rcx
+
+G_M000_IG03:                ;; offset=0x004F
+       4883C438             add      rsp, 56
+       C3                   ret      
+
+; Total bytes of code 84
diff --git a/difs/doubleToLong128-avx512-diff.asm b/difs/doubleToLong128-avx512-diff.asm
@@ -0,0 +1,29 @@
+; Assembly listing for method Program:DoubleToLong128(System.Runtime.Intrinsics.Vector128`1[double]):System.Runtime.Intrinsics.Vector128`1[long] (FullOpts)
+; Emitting BLENDED_CODE for X64 with AVX512 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       C5F877               vzeroupper 
+
+G_M000_IG02:                ;; offset=0x0003
+       C5F81002             vmovups  xmm0, xmmword ptr [rdx]
+       62F3FD0854052E00000000 vfixupimmpd xmm0, xmm0, xmmword ptr [reloc @RWD00], 0
+       C5F9C20D350000000D   vcmppd   xmm1, xmm0, xmmword ptr [reloc @RWD16], 13
+       C5F810153D000000     vmovups  xmm2, xmmword ptr [reloc @RWD32]
+       62F1FD087AC0         vcvttpd2qq xmm0, xmm0
+       62F3ED0825C8CA       vpternlogq xmm1, xmm2, xmm0, -54
+       C5F81109             vmovups  xmmword ptr [rcx], xmm1
+       488BC1               mov      rax, rcx
+
+G_M000_IG03:                ;; offset=0x0037
+       C3                   ret      
+
+RWD00  	dq	0000000000000088h, 0000000000000088h
+RWD16  	dq	43E0000000000000h, 43E0000000000000h
+RWD32  	dq	7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
+
+; Total bytes of code 56
diff --git a/difs/floatToUint-avx512-diff.asm b/difs/floatToUint-avx512-diff.asm
@@ -0,0 +1,21 @@
+; Assembly listing for method Program:FloatToUint(float):uint (FullOpts)
+; Emitting BLENDED_CODE for X64 with AVX512 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       C5F877               vzeroupper 
+
+G_M000_IG02:                ;; offset=0x0003
+       62F37D0855051200000000 vfixupimmss xmm0, xmm0, xmmword ptr [reloc @RWD00], 0
+       62F17E0878C0         vcvttss2usi  eax, xmm0
+
+G_M000_IG03:                ;; offset=0x0014
+       C3                   ret      
+
+RWD00  	dq	0000000008000088h, 0000000000000000h
+
+; Total bytes of code 21
diff --git a/difs/floatToUint-avx512_base.asm b/difs/floatToUint-avx512_base.asm
@@ -0,0 +1,16 @@
+; Assembly listing for method Program:FloatToUint(float):uint (FullOpts)
+; Emitting BLENDED_CODE for X64 with AVX512 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       C5F877               vzeroupper 
+
+G_M000_IG02:                ;; offset=0x0003
+       C4E1FA2CC0           vcvttss2si  rax, xmm0
+
+G_M000_IG03:                ;; offset=0x0008
+       C3                   ret   
diff --git a/difs/floatToUint-non-avx512-base.asm b/difs/floatToUint-non-avx512-base.asm
@@ -0,0 +1,17 @@
+; Assembly listing for method Program:FloatToUint(float):uint (FullOpts)
+; Emitting BLENDED_CODE for generic X64 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+
+G_M000_IG02:                ;; offset=0x0000
+       F3480F2CC0           cvttss2si  rax, xmm0
+
+G_M000_IG03:                ;; offset=0x0005
+       C3                   ret      
+
+; Total bytes of code 6
diff --git a/difs/floatToUint-non-avx512-diff.asm b/difs/floatToUint-non-avx512-diff.asm
@@ -0,0 +1,20 @@
+; Assembly listing for method Program:FloatToUint(float):uint (FullOpts)
+; Emitting BLENDED_CODE for generic X64 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       4883EC28             sub      rsp, 40
+
+G_M000_IG02:                ;; offset=0x0004
+       E867CCB65F           call     CORINFO_HELP_FLT2UINT
+       90                   nop      
+
+G_M000_IG03:                ;; offset=0x000A
+       4883C428             add      rsp, 40
+       C3                   ret      
+
+; Total bytes of code 15
diff --git a/difs/floatToUint-non-avx512-diff.txt b/difs/floatToUint-non-avx512-diff.txt
@@ -0,0 +1,20 @@
+; Assembly listing for method Program:FloatToUint(float):uint (FullOpts)
+; Emitting BLENDED_CODE for generic X64 - Windows
+; FullOpts code
+; optimized code
+; rsp based frame
+; partially interruptible
+; No PGO data
+
+G_M000_IG01:                ;; offset=0x0000
+       4883EC28             sub      rsp, 40
+
+G_M000_IG02:                ;; offset=0x0004
+       E867CCB65F           call     CORINFO_HELP_FLT2UINT
+       90                   nop      
+
+G_M000_IG03:                ;; offset=0x000A
+       4883C428             add      rsp, 40
+       C3                   ret      
+
+; Total bytes of code 15
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
@@ -508,27 +508,12 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                 switch (dstType)
                 {
                     case TYP_INT:
-<<<<<<< HEAD
 #ifdef TARGET_XARCH
                         if (!tree->IsSaturatedConversion())
                         {
                             return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
                         }
 #endif //TARGET_XARCH
-=======
-#ifdef TARGET_AMD64
-<<<<<<< HEAD
-                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
-#else //TARGET_AMD64
->>>>>>> 3b121bdc382 (adding handling for scalar conversion cases for SSE2. Remaining float/double -> long/int for AVX512.)
-                        return nullptr;
-=======
-                        if (!tree->IsSaturatedConversion())
-                        {
-                            return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
-                        }
->>>>>>> 59d881e8d6a (partial changes for float to int conversion using double to int for avx512. vfixup not working. next step is to fix the vfixup instruction and get it working)
-#endif //TARGET_AMD64
                         return nullptr;
 
                     case TYP_UINT:
@@ -543,17 +528,14 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
 
                     case TYP_LONG:
-<<<<<<< HEAD
 #ifdef TARGET_XARCH
                         if (!tree->IsSaturatedConversion())
                         {
                             return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
                         }
-#endif //TARGET_XARCH
                         return nullptr;
-=======
+#endif //TARGET_XARCH
                         return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
->>>>>>> 3b121bdc382 (adding handling for scalar conversion cases for SSE2. Remaining float/double -> long/int for AVX512.)
 
                     case TYP_ULONG:
 #ifdef TARGET_AMD64

diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp
@@ -1248,15 +1248,15 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
                         //run vfixupimmsd base on table and no flags reporting
                         GenTree* saturate_val = gtNewSimdHWIntrinsicNode(simdType, op1, op2Clone, tbl, gtNewIconNode(0),
-                                                                    NI_AVX512F_Fixup, fieldType, simdSize);
+                                                                    NI_AVX512F_Fixup, simdBaseJitType, simdSize);
 
-                        GenTree* max_val = gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast<float>(INT64_MAX)), fieldType, simdSize);
+                        GenTree* max_val = gtNewSimdCreateBroadcastNode(simdType, gtNewDconNodeF(static_cast<float>(INT64_MAX)), simdBaseJitType, simdSize);
                         GenTree* max_valDup = gtNewSimdCreateBroadcastNode(simdType, gtNewIconNode(INT64_MAX, TYP_LONG), CORINFO_TYPE_LONG, simdSize);
                         //we will be using the input value twice
                         GenTree* saturate_valDup = fgMakeMultiUse(&saturate_val);
 
                         //usage 1 --> compare with max value of integer
-                        saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, fieldType, simdSize);
+                        saturate_val = gtNewSimdCmpOpNode(GT_GE, simdType, saturate_val, max_val, simdBaseJitType, simdSize);
                         //cast it
 
                         NamedIntrinsic intrinsic =    (simdSize == 16) ? NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation