From 3fa96044d06d1355e16da014478fd99eba3c01a5 Mon Sep 17 00:00:00 2001
From: lilinus <linus.hamlin@outlook.com>
Date: Wed, 11 Oct 2023 16:57:23 +0200
Subject: [PATCH 1/6] Use BigMul for 32x32=64 in decimal

---
 .../src/System/Decimal.DecCalc.cs                 | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
index bc107fb7549c6..6082de804153b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
@@ -184,20 +184,7 @@ private static ulong UInt32x32To64(uint a, uint b)
 
             private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result)
             {
-                ulong low = UInt32x32To64((uint)a, (uint)b); // lo partial prod
-                ulong mid = UInt32x32To64((uint)a, (uint)(b >> 32)); // mid 1 partial prod
-                ulong high = UInt32x32To64((uint)(a >> 32), (uint)(b >> 32));
-                high += mid >> 32;
-                low += mid <<= 32;
-                if (low < mid)  // test for carry
-                    high++;
-
-                mid = UInt32x32To64((uint)(a >> 32), (uint)b);
-                high += mid >> 32;
-                low += mid <<= 32;
-                if (low < mid)  // test for carry
-                    high++;
-
+                ulong high = Math.BigMul(a, b, out ulong low);
                 if (high > uint.MaxValue)
                     Number.ThrowOverflowException(SR.Overflow_Decimal);
                 result.Low64 = low;

From fab4430b43d7f0c90210013f44664a80273a2b96 Mon Sep 17 00:00:00 2001
From: lilinus <linus.hamlin@outlook.com>
Date: Thu, 12 Oct 2023 15:32:20 +0200
Subject: [PATCH 2/6] remove UInt32x32To64

---
 .../src/System/Decimal.DecCalc.cs             | 103 +++++++++---------
 1 file changed, 49 insertions(+), 54 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
index 6082de804153b..1e9425a69ee77 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
@@ -177,11 +177,6 @@ private static unsafe uint GetExponent(double d)
                 return (uint)(BitConverter.DoubleToUInt64Bits(d) >> 52) & 0x7FFu;
             }
 
-            private static ulong UInt32x32To64(uint a, uint b)
-            {
-                return (ulong)a * (ulong)b;
-            }
-
             private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result)
             {
                 ulong high = Math.BigMul(a, b, out ulong low);
@@ -381,7 +376,7 @@ private static uint Div96By64(ref Buf12 bufNum, ulong den)
 
                 // Compute full remainder, rem = dividend - (quo * divisor).
                 //
-                ulong prod = UInt32x32To64(quo, (uint)den); // quo * lo divisor
+                ulong prod = quo * (den & uint.MaxValue); // quo * lo divisor
                 num -= prod;
 
                 if (num > ~prod)
@@ -427,8 +422,8 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen)
 
                 // Compute full remainder, rem = dividend - (quo * divisor).
                 //
-                ulong prod1 = UInt32x32To64(quo, bufDen.U0); // quo * lo divisor
-                ulong prod2 = UInt32x32To64(quo, bufDen.U1); // quo * mid divisor
+                ulong prod1 = (ulong)quo * bufDen.U0; // quo * lo divisor
+                ulong prod2 = (ulong)quo * bufDen.U1; // quo * mid divisor
                 prod2 += prod1 >> 32;
                 prod1 = (uint)prod1 | (prod2 << 32);
                 prod2 >>= 32;
@@ -487,23 +482,23 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen)
             /// <returns>Returns highest 32 bits of product</returns>
             private static uint IncreaseScale(ref Buf12 bufNum, uint power)
             {
-                ulong tmp = UInt32x32To64(bufNum.U0, power);
+                ulong tmp = (ulong)bufNum.U0 * power;
                 bufNum.U0 = (uint)tmp;
                 tmp >>= 32;
-                tmp += UInt32x32To64(bufNum.U1, power);
+                tmp += (ulong)bufNum.U1 * power;
                 bufNum.U1 = (uint)tmp;
                 tmp >>= 32;
-                tmp += UInt32x32To64(bufNum.U2, power);
+                tmp += (ulong)bufNum.U2 * power;
                 bufNum.U2 = (uint)tmp;
                 return (uint)(tmp >> 32);
             }
 
             private static void IncreaseScale64(ref Buf12 bufNum, uint power)
             {
-                ulong tmp = UInt32x32To64(bufNum.U0, power);
+                ulong tmp = (ulong)bufNum.U0 * power;
                 bufNum.U0 = (uint)tmp;
                 tmp >>= 32;
-                tmp += UInt32x32To64(bufNum.U1, power);
+                tmp += (ulong)bufNum.U1 * power;
                 bufNum.High64 = tmp;
             }
 
@@ -921,11 +916,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                             {
                                 if (scale <= MaxInt32Scale)
                                 {
-                                    low64 = UInt32x32To64((uint)low64, UInt32Powers10[scale]);
+                                    low64 = (low64 & uint.MaxValue) * UInt32Powers10[scale];
                                     goto AlignedAdd;
                                 }
                                 scale -= MaxInt32Scale;
-                                low64 = UInt32x32To64((uint)low64, TenToPowerNine);
+                                low64 = (low64 & uint.MaxValue) * TenToPowerNine;
                             } while (low64 <= uint.MaxValue);
                         }
 
@@ -934,8 +929,8 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                             power = TenToPowerNine;
                             if (scale < MaxInt32Scale)
                                 power = UInt32Powers10[scale];
-                            tmpLow = UInt32x32To64((uint)low64, power);
-                            tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
+                            tmpLow = (low64 & uint.MaxValue) * power;
+                            tmp64 = ((low64 >> 32) * power) + (tmpLow >> 32);
                             low64 = (uint)tmpLow + (tmp64 << 32);
                             high = (uint)(tmp64 >> 32);
                             if ((scale -= MaxInt32Scale) <= 0)
@@ -950,11 +945,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                         power = TenToPowerNine;
                         if (scale < MaxInt32Scale)
                             power = UInt32Powers10[scale];
-                        tmpLow = UInt32x32To64((uint)low64, power);
-                        tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
+                        tmpLow = (low64 & uint.MaxValue) * power;
+                        tmp64 = ((low64 >> 32) * power) + (tmpLow >> 32);
                         low64 = (uint)tmpLow + (tmp64 << 32);
                         tmp64 >>= 32;
-                        tmp64 += UInt32x32To64(high, power);
+                        tmp64 += (ulong)high * power;
 
                         scale -= MaxInt32Scale;
                         if (tmp64 > uint.MaxValue)
@@ -986,7 +981,7 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                         for (uint cur = 0; ;)
                         {
                             Debug.Assert(cur < Buf24.Length);
-                            tmp64 += UInt32x32To64(rgulNum[cur], power);
+                            tmp64 += (ulong)rgulNum[cur] * power;
                             rgulNum[cur] = (uint)tmp64;
                             cur++;
                             tmp64 >>= 32;
@@ -1189,10 +1184,10 @@ internal static long VarCyFromDec(ref DecCalc pdecIn)
                     if (pdecIn.High != 0)
                         goto ThrowOverflow;
                     uint pwr = UInt32Powers10[-scale];
-                    ulong high = UInt32x32To64(pwr, pdecIn.Mid);
+                    ulong high = (ulong)pwr * pdecIn.Mid;
                     if (high > uint.MaxValue)
                         goto ThrowOverflow;
-                    ulong low = UInt32x32To64(pwr, pdecIn.Low);
+                    ulong low = (ulong)pwr * pdecIn.Low;
                     low += high <<= 32;
                     if (low < high)
                         goto ThrowOverflow;
@@ -1277,11 +1272,11 @@ private static int VarDecCmpSub(in decimal d1, in decimal d2)
                     do
                     {
                         uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale];
-                        ulong tmpLow = UInt32x32To64((uint)low64, power);
-                        ulong tmp = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
+                        ulong tmpLow = (low64 & uint.MaxValue) * power;
+                        ulong tmp = ((low64 >> 32) * power) + (tmpLow >> 32);
                         low64 = (uint)tmpLow + (tmp << 32);
                         tmp >>= 32;
-                        tmp += UInt32x32To64(high, power);
+                        tmp += (ulong)high * power;
                         // If the scaled value has more than 96 significant bits then it's greater than d2
                         if (tmp > uint.MaxValue)
                             return sign;
@@ -1324,7 +1319,7 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     {
                         // Upper 64 bits are zero.
                         //
-                        ulong low64 = UInt32x32To64(d1.Low, d2.Low);
+                        ulong low64 = (ulong)d1.Low * d2.Low;
                         if (scale > DEC_SCALE_MAX)
                         {
                             // Result scale is too big.  Divide result by power of 10 to reduce it.
@@ -1358,16 +1353,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     else
                     {
                         // Left value is 32-bit, result fits in 4 uints
-                        tmp = UInt32x32To64(d1.Low, d2.Low);
+                        tmp = (ulong)d1.Low * d2.Low;
                         bufProd.U0 = (uint)tmp;
 
-                        tmp = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32);
+                        tmp = ((ulong)d1.Low * d2.Mid) + (tmp >> 32);
                         bufProd.U1 = (uint)tmp;
                         tmp >>= 32;
 
                         if (d2.High != 0)
                         {
-                            tmp += UInt32x32To64(d1.Low, d2.High);
+                            tmp += (ulong)d1.Low * d2.High;
                             if (tmp > uint.MaxValue)
                             {
                                 bufProd.Mid64 = tmp;
@@ -1382,16 +1377,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                 else if ((d2.High | d2.Mid) == 0)
                 {
                     // Right value is 32-bit, result fits in 4 uints
-                    tmp = UInt32x32To64(d2.Low, d1.Low);
+                    tmp = (ulong)d2.Low * d1.Low;
                     bufProd.U0 = (uint)tmp;
 
-                    tmp = UInt32x32To64(d2.Low, d1.Mid) + (tmp >> 32);
+                    tmp = ((ulong)d2.Low * d1.Mid) + (tmp >> 32);
                     bufProd.U1 = (uint)tmp;
                     tmp >>= 32;
 
                     if (d1.High != 0)
                     {
-                        tmp += UInt32x32To64(d2.Low, d1.High);
+                        tmp += (ulong)d2.Low * d1.High;
                         if (tmp > uint.MaxValue)
                         {
                             bufProd.Mid64 = tmp;
@@ -1426,12 +1421,12 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     // [p-5][p-4][p-3][p-2][p-1][p-0]      prod[] array
                     //
 
-                    tmp = UInt32x32To64(d1.Low, d2.Low);
+                    tmp = (ulong)d1.Low * d2.Low;
                     bufProd.U0 = (uint)tmp;
 
-                    ulong tmp2 = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32);
+                    ulong tmp2 = ((ulong)d1.Low * d2.Mid) + (tmp >> 32);
 
-                    tmp = UInt32x32To64(d1.Mid, d2.Low);
+                    tmp = (ulong)d1.Mid * d2.Low;
                     tmp += tmp2; // this could generate carry
                     bufProd.U1 = (uint)tmp;
                     if (tmp < tmp2) // detect carry
@@ -1439,39 +1434,39 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     else
                         tmp2 = tmp >> 32;
 
-                    tmp = UInt32x32To64(d1.Mid, d2.Mid) + tmp2;
+                    tmp = ((ulong)d1.Mid * d2.Mid) + tmp2;
 
                     if ((d1.High | d2.High) > 0)
                     {
                         // Highest 32 bits is non-zero.     Calculate 5 more partial products.
                         //
-                        tmp2 = UInt32x32To64(d1.Low, d2.High);
+                        tmp2 = (ulong)d1.Low * d2.High;
                         tmp += tmp2; // this could generate carry
                         uint tmp3 = 0;
                         if (tmp < tmp2) // detect carry
                             tmp3 = 1;
 
-                        tmp2 = UInt32x32To64(d1.High, d2.Low);
+                        tmp2 = (ulong)d1.High * d2.Low;
                         tmp += tmp2; // this could generate carry
                         bufProd.U2 = (uint)tmp;
                         if (tmp < tmp2) // detect carry
                             tmp3++;
                         tmp2 = ((ulong)tmp3 << 32) | (tmp >> 32);
 
-                        tmp = UInt32x32To64(d1.Mid, d2.High);
+                        tmp = (ulong)d1.Mid * d2.High;
                         tmp += tmp2; // this could generate carry
                         tmp3 = 0;
                         if (tmp < tmp2) // detect carry
                             tmp3 = 1;
 
-                        tmp2 = UInt32x32To64(d1.High, d2.Mid);
+                        tmp2 = (ulong)d1.High * d2.Mid;
                         tmp += tmp2; // this could generate carry
                         bufProd.U3 = (uint)tmp;
                         if (tmp < tmp2) // detect carry
                             tmp3++;
                         tmp = ((ulong)tmp3 << 32) | (tmp >> 32);
 
-                        bufProd.High64 = UInt32x32To64(d1.High, d2.High) + tmp;
+                        bufProd.High64 = ((ulong)d1.High * d2.High) + tmp;
 
                         hiProd = 5;
                     }
@@ -1594,7 +1589,7 @@ internal static void VarDecFromR4(float input, out DecCalc result)
                     power = -power;
                     if (power < 10)
                     {
-                        result.Low64 = UInt32x32To64(mant, UInt32Powers10[power]);
+                        result.Low64 = (ulong)mant * UInt32Powers10[power];
                     }
                     else
                     {
@@ -1602,14 +1597,14 @@ internal static void VarDecFromR4(float input, out DecCalc result)
                         //
                         if (power > 18)
                         {
-                            ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 18]);
+                            ulong low64 = (ulong)mant * UInt32Powers10[power - 18];
                             UInt64x64To128(low64, TenToPowerEighteen, ref result);
                         }
                         else
                         {
-                            ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 9]);
-                            ulong hi64 = UInt32x32To64(TenToPowerNine, (uint)(low64 >> 32));
-                            low64 = UInt32x32To64(TenToPowerNine, (uint)low64);
+                            ulong low64 = (ulong)mant * UInt32Powers10[power - 9];
+                            ulong hi64 = TenToPowerNine * (low64 >> 32);
+                            low64 = TenToPowerNine * (low64 & uint.MaxValue);
                             result.Low = (uint)low64;
                             hi64 += low64 >> 32;
                             result.Mid = (uint)hi64;
@@ -1762,8 +1757,8 @@ internal static void VarDecFromR8(double input, out DecCalc result)
                     if (power < 10)
                     {
                         uint pow10 = UInt32Powers10[power];
-                        ulong low64 = UInt32x32To64((uint)mant, pow10);
-                        ulong hi64 = UInt32x32To64((uint)(mant >> 32), pow10);
+                        ulong low64 = (mant & uint.MaxValue) * pow10;
+                        ulong hi64 = (mant >> 32) * pow10;
                         result.Low = (uint)low64;
                         hi64 += low64 >> 32;
                         result.Mid = (uint)hi64;
@@ -1967,7 +1962,7 @@ internal static unsafe void VarDecDiv(ref DecCalc d1, ref DecCalc d2)
                         if (IncreaseScale(ref bufQuo, power) != 0)
                             goto ThrowOverflow;
 
-                        ulong num = UInt32x32To64(remainder, power);
+                        ulong num = (ulong)remainder * power;
                         // TODO: https://github.com/dotnet/runtime/issues/5213
                         uint div = (uint)(num / den);
                         remainder = (uint)num - div * den;
@@ -2200,7 +2195,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2)
                     do
                     {
                         uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale];
-                        ulong tmp = UInt32x32To64(d2.Low, power);
+                        ulong tmp = (ulong)d2.Low * power;
                         d2.Low = (uint)tmp;
                         tmp >>= 32;
                         tmp += (d2.Mid + ((ulong)d2.High << 32)) * power;
@@ -2227,7 +2222,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2)
                                 break;
                             uint power = iCurScale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[iCurScale];
                             scale += iCurScale;
-                            ulong tmp = UInt32x32To64(bufQuo.U0, power);
+                            ulong tmp = (ulong)bufQuo.U0 * power;
                             bufQuo.U0 = (uint)tmp;
                             tmp >>= 32;
                             bufQuo.High64 = tmp + bufQuo.High64 * power;
@@ -2288,12 +2283,12 @@ private static unsafe void VarDecModFull(ref DecCalc d1, ref DecCalc d2, int sca
                 {
                     uint power = scale <= -MaxInt32Scale ? TenToPowerNine : UInt32Powers10[-scale];
                     uint* buf = (uint*)&b;
-                    ulong tmp64 = UInt32x32To64(b.Buf24.U0, power);
+                    ulong tmp64 = (ulong)b.Buf24.U0 * power;
                     b.Buf24.U0 = (uint)tmp64;
                     for (int i = 1; i <= high; i++)
                     {
                         tmp64 >>= 32;
-                        tmp64 += UInt32x32To64(buf[i], power);
+                        tmp64 += (ulong)buf[i] * power;
                         buf[i] = (uint)tmp64;
                     }
                     // The high bit of the dividend must not be set.

From 6f04db38ed0d081c029fc2e69b72348168417cb7 Mon Sep 17 00:00:00 2001
From: lilinus <linus.hamlin@outlook.com>
Date: Fri, 13 Oct 2023 09:28:20 +0200
Subject: [PATCH 3/6] Revert "remove UInt32x32To64"

This reverts commit fab4430b43d7f0c90210013f44664a80273a2b96.
---
 .../src/System/Decimal.DecCalc.cs             | 103 +++++++++---------
 1 file changed, 54 insertions(+), 49 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
index 1e9425a69ee77..6082de804153b 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
@@ -177,6 +177,11 @@ private static unsafe uint GetExponent(double d)
                 return (uint)(BitConverter.DoubleToUInt64Bits(d) >> 52) & 0x7FFu;
             }
 
+            private static ulong UInt32x32To64(uint a, uint b)
+            {
+                return (ulong)a * (ulong)b;
+            }
+
             private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result)
             {
                 ulong high = Math.BigMul(a, b, out ulong low);
@@ -376,7 +381,7 @@ private static uint Div96By64(ref Buf12 bufNum, ulong den)
 
                 // Compute full remainder, rem = dividend - (quo * divisor).
                 //
-                ulong prod = quo * (den & uint.MaxValue); // quo * lo divisor
+                ulong prod = UInt32x32To64(quo, (uint)den); // quo * lo divisor
                 num -= prod;
 
                 if (num > ~prod)
@@ -422,8 +427,8 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen)
 
                 // Compute full remainder, rem = dividend - (quo * divisor).
                 //
-                ulong prod1 = (ulong)quo * bufDen.U0; // quo * lo divisor
-                ulong prod2 = (ulong)quo * bufDen.U1; // quo * mid divisor
+                ulong prod1 = UInt32x32To64(quo, bufDen.U0); // quo * lo divisor
+                ulong prod2 = UInt32x32To64(quo, bufDen.U1); // quo * mid divisor
                 prod2 += prod1 >> 32;
                 prod1 = (uint)prod1 | (prod2 << 32);
                 prod2 >>= 32;
@@ -482,23 +487,23 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen)
             /// <returns>Returns highest 32 bits of product</returns>
             private static uint IncreaseScale(ref Buf12 bufNum, uint power)
             {
-                ulong tmp = (ulong)bufNum.U0 * power;
+                ulong tmp = UInt32x32To64(bufNum.U0, power);
                 bufNum.U0 = (uint)tmp;
                 tmp >>= 32;
-                tmp += (ulong)bufNum.U1 * power;
+                tmp += UInt32x32To64(bufNum.U1, power);
                 bufNum.U1 = (uint)tmp;
                 tmp >>= 32;
-                tmp += (ulong)bufNum.U2 * power;
+                tmp += UInt32x32To64(bufNum.U2, power);
                 bufNum.U2 = (uint)tmp;
                 return (uint)(tmp >> 32);
             }
 
             private static void IncreaseScale64(ref Buf12 bufNum, uint power)
             {
-                ulong tmp = (ulong)bufNum.U0 * power;
+                ulong tmp = UInt32x32To64(bufNum.U0, power);
                 bufNum.U0 = (uint)tmp;
                 tmp >>= 32;
-                tmp += (ulong)bufNum.U1 * power;
+                tmp += UInt32x32To64(bufNum.U1, power);
                 bufNum.High64 = tmp;
             }
 
@@ -916,11 +921,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                             {
                                 if (scale <= MaxInt32Scale)
                                 {
-                                    low64 = (low64 & uint.MaxValue) * UInt32Powers10[scale];
+                                    low64 = UInt32x32To64((uint)low64, UInt32Powers10[scale]);
                                     goto AlignedAdd;
                                 }
                                 scale -= MaxInt32Scale;
-                                low64 = (low64 & uint.MaxValue) * TenToPowerNine;
+                                low64 = UInt32x32To64((uint)low64, TenToPowerNine);
                             } while (low64 <= uint.MaxValue);
                         }
 
@@ -929,8 +934,8 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                             power = TenToPowerNine;
                             if (scale < MaxInt32Scale)
                                 power = UInt32Powers10[scale];
-                            tmpLow = (low64 & uint.MaxValue) * power;
-                            tmp64 = ((low64 >> 32) * power) + (tmpLow >> 32);
+                            tmpLow = UInt32x32To64((uint)low64, power);
+                            tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
                             low64 = (uint)tmpLow + (tmp64 << 32);
                             high = (uint)(tmp64 >> 32);
                             if ((scale -= MaxInt32Scale) <= 0)
@@ -945,11 +950,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                         power = TenToPowerNine;
                         if (scale < MaxInt32Scale)
                             power = UInt32Powers10[scale];
-                        tmpLow = (low64 & uint.MaxValue) * power;
-                        tmp64 = ((low64 >> 32) * power) + (tmpLow >> 32);
+                        tmpLow = UInt32x32To64((uint)low64, power);
+                        tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
                         low64 = (uint)tmpLow + (tmp64 << 32);
                         tmp64 >>= 32;
-                        tmp64 += (ulong)high * power;
+                        tmp64 += UInt32x32To64(high, power);
 
                         scale -= MaxInt32Scale;
                         if (tmp64 > uint.MaxValue)
@@ -981,7 +986,7 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                         for (uint cur = 0; ;)
                         {
                             Debug.Assert(cur < Buf24.Length);
-                            tmp64 += (ulong)rgulNum[cur] * power;
+                            tmp64 += UInt32x32To64(rgulNum[cur], power);
                             rgulNum[cur] = (uint)tmp64;
                             cur++;
                             tmp64 >>= 32;
@@ -1184,10 +1189,10 @@ internal static long VarCyFromDec(ref DecCalc pdecIn)
                     if (pdecIn.High != 0)
                         goto ThrowOverflow;
                     uint pwr = UInt32Powers10[-scale];
-                    ulong high = (ulong)pwr * pdecIn.Mid;
+                    ulong high = UInt32x32To64(pwr, pdecIn.Mid);
                     if (high > uint.MaxValue)
                         goto ThrowOverflow;
-                    ulong low = (ulong)pwr * pdecIn.Low;
+                    ulong low = UInt32x32To64(pwr, pdecIn.Low);
                     low += high <<= 32;
                     if (low < high)
                         goto ThrowOverflow;
@@ -1272,11 +1277,11 @@ private static int VarDecCmpSub(in decimal d1, in decimal d2)
                     do
                     {
                         uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale];
-                        ulong tmpLow = (low64 & uint.MaxValue) * power;
-                        ulong tmp = ((low64 >> 32) * power) + (tmpLow >> 32);
+                        ulong tmpLow = UInt32x32To64((uint)low64, power);
+                        ulong tmp = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
                         low64 = (uint)tmpLow + (tmp << 32);
                         tmp >>= 32;
-                        tmp += (ulong)high * power;
+                        tmp += UInt32x32To64(high, power);
                         // If the scaled value has more than 96 significant bits then it's greater than d2
                         if (tmp > uint.MaxValue)
                             return sign;
@@ -1319,7 +1324,7 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     {
                         // Upper 64 bits are zero.
                         //
-                        ulong low64 = (ulong)d1.Low * d2.Low;
+                        ulong low64 = UInt32x32To64(d1.Low, d2.Low);
                         if (scale > DEC_SCALE_MAX)
                         {
                             // Result scale is too big.  Divide result by power of 10 to reduce it.
@@ -1353,16 +1358,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     else
                     {
                         // Left value is 32-bit, result fits in 4 uints
-                        tmp = (ulong)d1.Low * d2.Low;
+                        tmp = UInt32x32To64(d1.Low, d2.Low);
                         bufProd.U0 = (uint)tmp;
 
-                        tmp = ((ulong)d1.Low * d2.Mid) + (tmp >> 32);
+                        tmp = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32);
                         bufProd.U1 = (uint)tmp;
                         tmp >>= 32;
 
                         if (d2.High != 0)
                         {
-                            tmp += (ulong)d1.Low * d2.High;
+                            tmp += UInt32x32To64(d1.Low, d2.High);
                             if (tmp > uint.MaxValue)
                             {
                                 bufProd.Mid64 = tmp;
@@ -1377,16 +1382,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                 else if ((d2.High | d2.Mid) == 0)
                 {
                     // Right value is 32-bit, result fits in 4 uints
-                    tmp = (ulong)d2.Low * d1.Low;
+                    tmp = UInt32x32To64(d2.Low, d1.Low);
                     bufProd.U0 = (uint)tmp;
 
-                    tmp = ((ulong)d2.Low * d1.Mid) + (tmp >> 32);
+                    tmp = UInt32x32To64(d2.Low, d1.Mid) + (tmp >> 32);
                     bufProd.U1 = (uint)tmp;
                     tmp >>= 32;
 
                     if (d1.High != 0)
                     {
-                        tmp += (ulong)d2.Low * d1.High;
+                        tmp += UInt32x32To64(d2.Low, d1.High);
                         if (tmp > uint.MaxValue)
                         {
                             bufProd.Mid64 = tmp;
@@ -1421,12 +1426,12 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     // [p-5][p-4][p-3][p-2][p-1][p-0]      prod[] array
                     //
 
-                    tmp = (ulong)d1.Low * d2.Low;
+                    tmp = UInt32x32To64(d1.Low, d2.Low);
                     bufProd.U0 = (uint)tmp;
 
-                    ulong tmp2 = ((ulong)d1.Low * d2.Mid) + (tmp >> 32);
+                    ulong tmp2 = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32);
 
-                    tmp = (ulong)d1.Mid * d2.Low;
+                    tmp = UInt32x32To64(d1.Mid, d2.Low);
                     tmp += tmp2; // this could generate carry
                     bufProd.U1 = (uint)tmp;
                     if (tmp < tmp2) // detect carry
@@ -1434,39 +1439,39 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     else
                         tmp2 = tmp >> 32;
 
-                    tmp = ((ulong)d1.Mid * d2.Mid) + tmp2;
+                    tmp = UInt32x32To64(d1.Mid, d2.Mid) + tmp2;
 
                     if ((d1.High | d2.High) > 0)
                     {
                         // Highest 32 bits is non-zero.     Calculate 5 more partial products.
                         //
-                        tmp2 = (ulong)d1.Low * d2.High;
+                        tmp2 = UInt32x32To64(d1.Low, d2.High);
                         tmp += tmp2; // this could generate carry
                         uint tmp3 = 0;
                         if (tmp < tmp2) // detect carry
                             tmp3 = 1;
 
-                        tmp2 = (ulong)d1.High * d2.Low;
+                        tmp2 = UInt32x32To64(d1.High, d2.Low);
                         tmp += tmp2; // this could generate carry
                         bufProd.U2 = (uint)tmp;
                         if (tmp < tmp2) // detect carry
                             tmp3++;
                         tmp2 = ((ulong)tmp3 << 32) | (tmp >> 32);
 
-                        tmp = (ulong)d1.Mid * d2.High;
+                        tmp = UInt32x32To64(d1.Mid, d2.High);
                         tmp += tmp2; // this could generate carry
                         tmp3 = 0;
                         if (tmp < tmp2) // detect carry
                             tmp3 = 1;
 
-                        tmp2 = (ulong)d1.High * d2.Mid;
+                        tmp2 = UInt32x32To64(d1.High, d2.Mid);
                         tmp += tmp2; // this could generate carry
                         bufProd.U3 = (uint)tmp;
                         if (tmp < tmp2) // detect carry
                             tmp3++;
                         tmp = ((ulong)tmp3 << 32) | (tmp >> 32);
 
-                        bufProd.High64 = ((ulong)d1.High * d2.High) + tmp;
+                        bufProd.High64 = UInt32x32To64(d1.High, d2.High) + tmp;
 
                         hiProd = 5;
                     }
@@ -1589,7 +1594,7 @@ internal static void VarDecFromR4(float input, out DecCalc result)
                     power = -power;
                     if (power < 10)
                     {
-                        result.Low64 = (ulong)mant * UInt32Powers10[power];
+                        result.Low64 = UInt32x32To64(mant, UInt32Powers10[power]);
                     }
                     else
                     {
@@ -1597,14 +1602,14 @@ internal static void VarDecFromR4(float input, out DecCalc result)
                         //
                         if (power > 18)
                         {
-                            ulong low64 = (ulong)mant * UInt32Powers10[power - 18];
+                            ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 18]);
                             UInt64x64To128(low64, TenToPowerEighteen, ref result);
                         }
                         else
                         {
-                            ulong low64 = (ulong)mant * UInt32Powers10[power - 9];
-                            ulong hi64 = TenToPowerNine * (low64 >> 32);
-                            low64 = TenToPowerNine * (low64 & uint.MaxValue);
+                            ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 9]);
+                            ulong hi64 = UInt32x32To64(TenToPowerNine, (uint)(low64 >> 32));
+                            low64 = UInt32x32To64(TenToPowerNine, (uint)low64);
                             result.Low = (uint)low64;
                             hi64 += low64 >> 32;
                             result.Mid = (uint)hi64;
@@ -1757,8 +1762,8 @@ internal static void VarDecFromR8(double input, out DecCalc result)
                     if (power < 10)
                     {
                         uint pow10 = UInt32Powers10[power];
-                        ulong low64 = (mant & uint.MaxValue) * pow10;
-                        ulong hi64 = (mant >> 32) * pow10;
+                        ulong low64 = UInt32x32To64((uint)mant, pow10);
+                        ulong hi64 = UInt32x32To64((uint)(mant >> 32), pow10);
                         result.Low = (uint)low64;
                         hi64 += low64 >> 32;
                         result.Mid = (uint)hi64;
@@ -1962,7 +1967,7 @@ internal static unsafe void VarDecDiv(ref DecCalc d1, ref DecCalc d2)
                         if (IncreaseScale(ref bufQuo, power) != 0)
                             goto ThrowOverflow;
 
-                        ulong num = (ulong)remainder * power;
+                        ulong num = UInt32x32To64(remainder, power);
                         // TODO: https://github.com/dotnet/runtime/issues/5213
                         uint div = (uint)(num / den);
                         remainder = (uint)num - div * den;
@@ -2195,7 +2200,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2)
                     do
                     {
                         uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale];
-                        ulong tmp = (ulong)d2.Low * power;
+                        ulong tmp = UInt32x32To64(d2.Low, power);
                         d2.Low = (uint)tmp;
                         tmp >>= 32;
                         tmp += (d2.Mid + ((ulong)d2.High << 32)) * power;
@@ -2222,7 +2227,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2)
                                 break;
                             uint power = iCurScale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[iCurScale];
                             scale += iCurScale;
-                            ulong tmp = (ulong)bufQuo.U0 * power;
+                            ulong tmp = UInt32x32To64(bufQuo.U0, power);
                             bufQuo.U0 = (uint)tmp;
                             tmp >>= 32;
                             bufQuo.High64 = tmp + bufQuo.High64 * power;
@@ -2283,12 +2288,12 @@ private static unsafe void VarDecModFull(ref DecCalc d1, ref DecCalc d2, int sca
                 {
                     uint power = scale <= -MaxInt32Scale ? TenToPowerNine : UInt32Powers10[-scale];
                     uint* buf = (uint*)&b;
-                    ulong tmp64 = (ulong)b.Buf24.U0 * power;
+                    ulong tmp64 = UInt32x32To64(b.Buf24.U0, power);
                     b.Buf24.U0 = (uint)tmp64;
                     for (int i = 1; i <= high; i++)
                     {
                         tmp64 >>= 32;
-                        tmp64 += (ulong)buf[i] * power;
+                        tmp64 += UInt32x32To64(buf[i], power);
                         buf[i] = (uint)tmp64;
                     }
                     // The high bit of the dividend must not be set.

From 05729880a6e3f0f34ee354d552fade055acd38f4 Mon Sep 17 00:00:00 2001
From: lilinus <linus.hamlin@outlook.com>
Date: Fri, 13 Oct 2023 09:56:27 +0200
Subject: [PATCH 4/6] Add internal Math.BigMul(uint, uint)

---
 .../src/System/Decimal.DecCalc.cs             | 103 +++++++++---------
 .../System.Private.CoreLib/src/System/Math.cs |   5 +
 2 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
index 6082de804153b..f91f82d31358d 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs
@@ -177,11 +177,6 @@ private static unsafe uint GetExponent(double d)
                 return (uint)(BitConverter.DoubleToUInt64Bits(d) >> 52) & 0x7FFu;
             }
 
-            private static ulong UInt32x32To64(uint a, uint b)
-            {
-                return (ulong)a * (ulong)b;
-            }
-
             private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result)
             {
                 ulong high = Math.BigMul(a, b, out ulong low);
@@ -381,7 +376,7 @@ private static uint Div96By64(ref Buf12 bufNum, ulong den)
 
                 // Compute full remainder, rem = dividend - (quo * divisor).
                 //
-                ulong prod = UInt32x32To64(quo, (uint)den); // quo * lo divisor
+                ulong prod = Math.BigMul(quo, (uint)den); // quo * lo divisor
                 num -= prod;
 
                 if (num > ~prod)
@@ -427,8 +422,8 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen)
 
                 // Compute full remainder, rem = dividend - (quo * divisor).
                 //
-                ulong prod1 = UInt32x32To64(quo, bufDen.U0); // quo * lo divisor
-                ulong prod2 = UInt32x32To64(quo, bufDen.U1); // quo * mid divisor
+                ulong prod1 = Math.BigMul(quo, bufDen.U0); // quo * lo divisor
+                ulong prod2 = Math.BigMul(quo, bufDen.U1); // quo * mid divisor
                 prod2 += prod1 >> 32;
                 prod1 = (uint)prod1 | (prod2 << 32);
                 prod2 >>= 32;
@@ -487,23 +482,23 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen)
             /// <returns>Returns highest 32 bits of product</returns>
             private static uint IncreaseScale(ref Buf12 bufNum, uint power)
             {
-                ulong tmp = UInt32x32To64(bufNum.U0, power);
+                ulong tmp = Math.BigMul(bufNum.U0, power);
                 bufNum.U0 = (uint)tmp;
                 tmp >>= 32;
-                tmp += UInt32x32To64(bufNum.U1, power);
+                tmp += Math.BigMul(bufNum.U1, power);
                 bufNum.U1 = (uint)tmp;
                 tmp >>= 32;
-                tmp += UInt32x32To64(bufNum.U2, power);
+                tmp += Math.BigMul(bufNum.U2, power);
                 bufNum.U2 = (uint)tmp;
                 return (uint)(tmp >> 32);
             }
 
             private static void IncreaseScale64(ref Buf12 bufNum, uint power)
             {
-                ulong tmp = UInt32x32To64(bufNum.U0, power);
+                ulong tmp = Math.BigMul(bufNum.U0, power);
                 bufNum.U0 = (uint)tmp;
                 tmp >>= 32;
-                tmp += UInt32x32To64(bufNum.U1, power);
+                tmp += Math.BigMul(bufNum.U1, power);
                 bufNum.High64 = tmp;
             }
 
@@ -921,11 +916,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                             {
                                 if (scale <= MaxInt32Scale)
                                 {
-                                    low64 = UInt32x32To64((uint)low64, UInt32Powers10[scale]);
+                                    low64 = Math.BigMul((uint)low64, UInt32Powers10[scale]);
                                     goto AlignedAdd;
                                 }
                                 scale -= MaxInt32Scale;
-                                low64 = UInt32x32To64((uint)low64, TenToPowerNine);
+                                low64 = Math.BigMul((uint)low64, TenToPowerNine);
                             } while (low64 <= uint.MaxValue);
                         }
 
@@ -934,8 +929,8 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                             power = TenToPowerNine;
                             if (scale < MaxInt32Scale)
                                 power = UInt32Powers10[scale];
-                            tmpLow = UInt32x32To64((uint)low64, power);
-                            tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
+                            tmpLow = Math.BigMul((uint)low64, power);
+                            tmp64 = Math.BigMul((uint)(low64 >> 32), power) + (tmpLow >> 32);
                             low64 = (uint)tmpLow + (tmp64 << 32);
                             high = (uint)(tmp64 >> 32);
                             if ((scale -= MaxInt32Scale) <= 0)
@@ -950,11 +945,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                         power = TenToPowerNine;
                         if (scale < MaxInt32Scale)
                             power = UInt32Powers10[scale];
-                        tmpLow = UInt32x32To64((uint)low64, power);
-                        tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
+                        tmpLow = Math.BigMul((uint)low64, power);
+                        tmp64 = Math.BigMul((uint)(low64 >> 32), power) + (tmpLow >> 32);
                         low64 = (uint)tmpLow + (tmp64 << 32);
                         tmp64 >>= 32;
-                        tmp64 += UInt32x32To64(high, power);
+                        tmp64 += Math.BigMul(high, power);
 
                         scale -= MaxInt32Scale;
                         if (tmp64 > uint.MaxValue)
@@ -986,7 +981,7 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
                         for (uint cur = 0; ;)
                         {
                             Debug.Assert(cur < Buf24.Length);
-                            tmp64 += UInt32x32To64(rgulNum[cur], power);
+                            tmp64 += Math.BigMul(rgulNum[cur], power);
                             rgulNum[cur] = (uint)tmp64;
                             cur++;
                             tmp64 >>= 32;
@@ -1189,10 +1184,10 @@ internal static long VarCyFromDec(ref DecCalc pdecIn)
                     if (pdecIn.High != 0)
                         goto ThrowOverflow;
                     uint pwr = UInt32Powers10[-scale];
-                    ulong high = UInt32x32To64(pwr, pdecIn.Mid);
+                    ulong high = Math.BigMul(pwr, pdecIn.Mid);
                     if (high > uint.MaxValue)
                         goto ThrowOverflow;
-                    ulong low = UInt32x32To64(pwr, pdecIn.Low);
+                    ulong low = Math.BigMul(pwr, pdecIn.Low);
                     low += high <<= 32;
                     if (low < high)
                         goto ThrowOverflow;
@@ -1277,11 +1272,11 @@ private static int VarDecCmpSub(in decimal d1, in decimal d2)
                     do
                     {
                         uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale];
-                        ulong tmpLow = UInt32x32To64((uint)low64, power);
-                        ulong tmp = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
+                        ulong tmpLow = Math.BigMul((uint)low64, power);
+                        ulong tmp = Math.BigMul((uint)(low64 >> 32), power) + (tmpLow >> 32);
                         low64 = (uint)tmpLow + (tmp << 32);
                         tmp >>= 32;
-                        tmp += UInt32x32To64(high, power);
+                        tmp += Math.BigMul(high, power);
                         // If the scaled value has more than 96 significant bits then it's greater than d2
                         if (tmp > uint.MaxValue)
                             return sign;
@@ -1324,7 +1319,7 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     {
                         // Upper 64 bits are zero.
                         //
-                        ulong low64 = UInt32x32To64(d1.Low, d2.Low);
+                        ulong low64 = Math.BigMul(d1.Low, d2.Low);
                         if (scale > DEC_SCALE_MAX)
                         {
                             // Result scale is too big.  Divide result by power of 10 to reduce it.
@@ -1358,16 +1353,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     else
                     {
                         // Left value is 32-bit, result fits in 4 uints
-                        tmp = UInt32x32To64(d1.Low, d2.Low);
+                        tmp = Math.BigMul(d1.Low, d2.Low);
                         bufProd.U0 = (uint)tmp;
 
-                        tmp = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32);
+                        tmp = Math.BigMul(d1.Low, d2.Mid) + (tmp >> 32);
                         bufProd.U1 = (uint)tmp;
                         tmp >>= 32;
 
                         if (d2.High != 0)
                         {
-                            tmp += UInt32x32To64(d1.Low, d2.High);
+                            tmp += Math.BigMul(d1.Low, d2.High);
                             if (tmp > uint.MaxValue)
                             {
                                 bufProd.Mid64 = tmp;
@@ -1382,16 +1377,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                 else if ((d2.High | d2.Mid) == 0)
                 {
                     // Right value is 32-bit, result fits in 4 uints
-                    tmp = UInt32x32To64(d2.Low, d1.Low);
+                    tmp = Math.BigMul(d2.Low, d1.Low);
                     bufProd.U0 = (uint)tmp;
 
-                    tmp = UInt32x32To64(d2.Low, d1.Mid) + (tmp >> 32);
+                    tmp = Math.BigMul(d2.Low, d1.Mid) + (tmp >> 32);
                     bufProd.U1 = (uint)tmp;
                     tmp >>= 32;
 
                     if (d1.High != 0)
                     {
-                        tmp += UInt32x32To64(d2.Low, d1.High);
+                        tmp += Math.BigMul(d2.Low, d1.High);
                         if (tmp > uint.MaxValue)
                         {
                             bufProd.Mid64 = tmp;
@@ -1426,12 +1421,12 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     // [p-5][p-4][p-3][p-2][p-1][p-0]      prod[] array
                     //
 
-                    tmp = UInt32x32To64(d1.Low, d2.Low);
+                    tmp = Math.BigMul(d1.Low, d2.Low);
                     bufProd.U0 = (uint)tmp;
 
-                    ulong tmp2 = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32);
+                    ulong tmp2 = Math.BigMul(d1.Low, d2.Mid) + (tmp >> 32);
 
-                    tmp = UInt32x32To64(d1.Mid, d2.Low);
+                    tmp = Math.BigMul(d1.Mid, d2.Low);
                     tmp += tmp2; // this could generate carry
                     bufProd.U1 = (uint)tmp;
                     if (tmp < tmp2) // detect carry
@@ -1439,39 +1434,39 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
                     else
                         tmp2 = tmp >> 32;
 
-                    tmp = UInt32x32To64(d1.Mid, d2.Mid) + tmp2;
+                    tmp = Math.BigMul(d1.Mid, d2.Mid) + tmp2;
 
                     if ((d1.High | d2.High) > 0)
                     {
                         // Highest 32 bits is non-zero.     Calculate 5 more partial products.
                         //
-                        tmp2 = UInt32x32To64(d1.Low, d2.High);
+                        tmp2 = Math.BigMul(d1.Low, d2.High);
                         tmp += tmp2; // this could generate carry
                         uint tmp3 = 0;
                         if (tmp < tmp2) // detect carry
                             tmp3 = 1;
 
-                        tmp2 = UInt32x32To64(d1.High, d2.Low);
+                        tmp2 = Math.BigMul(d1.High, d2.Low);
                         tmp += tmp2; // this could generate carry
                         bufProd.U2 = (uint)tmp;
                         if (tmp < tmp2) // detect carry
                             tmp3++;
                         tmp2 = ((ulong)tmp3 << 32) | (tmp >> 32);
 
-                        tmp = UInt32x32To64(d1.Mid, d2.High);
+                        tmp = Math.BigMul(d1.Mid, d2.High);
                         tmp += tmp2; // this could generate carry
                         tmp3 = 0;
                         if (tmp < tmp2) // detect carry
                             tmp3 = 1;
 
-                        tmp2 = UInt32x32To64(d1.High, d2.Mid);
+                        tmp2 = Math.BigMul(d1.High, d2.Mid);
                         tmp += tmp2; // this could generate carry
                         bufProd.U3 = (uint)tmp;
                         if (tmp < tmp2) // detect carry
                             tmp3++;
                         tmp = ((ulong)tmp3 << 32) | (tmp >> 32);
 
-                        bufProd.High64 = UInt32x32To64(d1.High, d2.High) + tmp;
+                        bufProd.High64 = Math.BigMul(d1.High, d2.High) + tmp;
 
                         hiProd = 5;
                     }
@@ -1594,7 +1589,7 @@ internal static void VarDecFromR4(float input, out DecCalc result)
                     power = -power;
                     if (power < 10)
                     {
-                        result.Low64 = UInt32x32To64(mant, UInt32Powers10[power]);
+                        result.Low64 = Math.BigMul(mant, UInt32Powers10[power]);
                     }
                     else
                     {
@@ -1602,14 +1597,14 @@ internal static void VarDecFromR4(float input, out DecCalc result)
                         //
                         if (power > 18)
                         {
-                            ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 18]);
+                            ulong low64 = Math.BigMul(mant, UInt32Powers10[power - 18]);
                             UInt64x64To128(low64, TenToPowerEighteen, ref result);
                         }
                         else
                         {
-                            ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 9]);
-                            ulong hi64 = UInt32x32To64(TenToPowerNine, (uint)(low64 >> 32));
-                            low64 = UInt32x32To64(TenToPowerNine, (uint)low64);
+                            ulong low64 = Math.BigMul(mant, UInt32Powers10[power - 9]);
+                            ulong hi64 = Math.BigMul(TenToPowerNine, (uint)(low64 >> 32));
+                            low64 = Math.BigMul(TenToPowerNine, (uint)low64);
                             result.Low = (uint)low64;
                             hi64 += low64 >> 32;
                             result.Mid = (uint)hi64;
@@ -1762,8 +1757,8 @@ internal static void VarDecFromR8(double input, out DecCalc result)
                     if (power < 10)
                     {
                         uint pow10 = UInt32Powers10[power];
-                        ulong low64 = UInt32x32To64((uint)mant, pow10);
-                        ulong hi64 = UInt32x32To64((uint)(mant >> 32), pow10);
+                        ulong low64 = Math.BigMul((uint)mant, pow10);
+                        ulong hi64 = Math.BigMul((uint)(mant >> 32), pow10);
                         result.Low = (uint)low64;
                         hi64 += low64 >> 32;
                         result.Mid = (uint)hi64;
@@ -1967,7 +1962,7 @@ internal static unsafe void VarDecDiv(ref DecCalc d1, ref DecCalc d2)
                         if (IncreaseScale(ref bufQuo, power) != 0)
                             goto ThrowOverflow;
 
-                        ulong num = UInt32x32To64(remainder, power);
+                        ulong num = Math.BigMul(remainder, power);
                         // TODO: https://github.com/dotnet/runtime/issues/5213
                         uint div = (uint)(num / den);
                         remainder = (uint)num - div * den;
@@ -2200,7 +2195,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2)
                     do
                     {
                         uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale];
-                        ulong tmp = UInt32x32To64(d2.Low, power);
+                        ulong tmp = Math.BigMul(d2.Low, power);
                         d2.Low = (uint)tmp;
                         tmp >>= 32;
                         tmp += (d2.Mid + ((ulong)d2.High << 32)) * power;
@@ -2227,7 +2222,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2)
                                 break;
                             uint power = iCurScale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[iCurScale];
                             scale += iCurScale;
-                            ulong tmp = UInt32x32To64(bufQuo.U0, power);
+                            ulong tmp = Math.BigMul(bufQuo.U0, power);
                             bufQuo.U0 = (uint)tmp;
                             tmp >>= 32;
                             bufQuo.High64 = tmp + bufQuo.High64 * power;
@@ -2288,12 +2283,12 @@ private static unsafe void VarDecModFull(ref DecCalc d1, ref DecCalc d2, int sca
                 {
                     uint power = scale <= -MaxInt32Scale ? TenToPowerNine : UInt32Powers10[-scale];
                     uint* buf = (uint*)&b;
-                    ulong tmp64 = UInt32x32To64(b.Buf24.U0, power);
+                    ulong tmp64 = Math.BigMul(b.Buf24.U0, power);
                     b.Buf24.U0 = (uint)tmp64;
                     for (int i = 1; i <= high; i++)
                     {
                         tmp64 >>= 32;
-                        tmp64 += UInt32x32To64(buf[i], power);
+                        tmp64 += Math.BigMul(buf[i], power);
                         buf[i] = (uint)tmp64;
                     }
                     // The high bit of the dividend must not be set.
diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index b3904a946e51a..5fcf2ed694fcb 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -155,6 +155,11 @@ internal static void ThrowNegateTwosCompOverflow()
             throw new OverflowException(SR.Overflow_NegateTwosCompNum);
         }
 
+        internal static ulong BigMul(uint a, uint b)
+        {
+            return ((ulong)a) * b;
+        }
+
         public static long BigMul(int a, int b)
         {
             return ((long)a) * b;

From 495a8e5766fe13aff4df5211d80d88017c8231db Mon Sep 17 00:00:00 2001
From: lilinus <linus.hamlin@outlook.com>
Date: Fri, 13 Oct 2023 11:04:36 +0200
Subject: [PATCH 5/6] Use x86 intrinsic in Math.BugMul(uint, uint) for 32 bit

---
 .../System.Private.CoreLib/src/System/Math.cs          | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs
index 5fcf2ed694fcb..53e96f11c7ca4 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Math.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs
@@ -155,8 +155,16 @@ internal static void ThrowNegateTwosCompOverflow()
             throw new OverflowException(SR.Overflow_NegateTwosCompNum);
         }
 
-        internal static ulong BigMul(uint a, uint b)
+        internal static unsafe ulong BigMul(uint a, uint b)
         {
+#if TARGET_32BIT
+            if (Bmi2.IsSupported)
+            {
+                uint low;
+                uint high = Bmi2.MultiplyNoFlags(a, b, &low);
+                return ((ulong)high << 32) | low;
+            }
+#endif
             return ((ulong)a) * b;
         }
 

From 8c3f5ca0caf38ed9fa88feb2820ec5e6ed5e2f61 Mon Sep 17 00:00:00 2001
From: lilinus <linus.hamlin@outlook.com>
Date: Tue, 17 Oct 2023 16:28:52 +0200
Subject: [PATCH 6/6] Use unsigned Math.BigMul

---
 src/libraries/System.Private.CoreLib/src/System/DateTime.cs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/DateTime.cs b/src/libraries/System.Private.CoreLib/src/System/DateTime.cs
index 3571fff343152..39a6e35a22f52 100644
--- a/src/libraries/System.Private.CoreLib/src/System/DateTime.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/DateTime.cs
@@ -1385,7 +1385,7 @@ internal void GetDate(out int year, out int month, out int day)
             // y100 = number of whole 100-year periods since 3/1/0000
             // r1 = (day number within 100-year period) * 4
             (uint y100, uint r1) = Math.DivRem(((uint)(UTicks / TicksPer6Hours) | 3U) + 1224, DaysPer400Years);
-            ulong u2 = (ulong)Math.BigMul((int)EafMultiplier, (int)r1 | 3);
+            ulong u2 = Math.BigMul(EafMultiplier, r1 | 3U);
             ushort daySinceMarch1 = (ushort)((uint)u2 / EafDivider);
             int n3 = 2141 * daySinceMarch1 + 197913;
             year = (int)(100 * y100 + (uint)(u2 >> 32));
@@ -1447,7 +1447,7 @@ public int Day
             {
                 // r1 = (day number within 100-year period) * 4
                 uint r1 = (((uint)(UTicks / TicksPer6Hours) | 3U) + 1224) % DaysPer400Years;
-                ulong u2 = (ulong)Math.BigMul((int)EafMultiplier, (int)r1 | 3);
+                ulong u2 = Math.BigMul(EafMultiplier, r1 | 3U);
                 ushort daySinceMarch1 = (ushort)((uint)u2 / EafDivider);
                 int n3 = 2141 * daySinceMarch1 + 197913;
                 // Return 1-based day-of-month
@@ -1524,7 +1524,7 @@ public int Month
             {
                 // r1 = (day number within 100-year period) * 4
                 uint r1 = (((uint)(UTicks / TicksPer6Hours) | 3U) + 1224) % DaysPer400Years;
-                ulong u2 = (ulong)Math.BigMul((int)EafMultiplier, (int)r1 | 3);
+                ulong u2 = Math.BigMul(EafMultiplier, r1 | 3U);
                 ushort daySinceMarch1 = (ushort)((uint)u2 / EafDivider);
                 int n3 = 2141 * daySinceMarch1 + 197913;
                 return (ushort)(n3 >> 16) - (daySinceMarch1 >= March1BasedDayOfNewYear ? 12 : 0);