From 3fa96044d06d1355e16da014478fd99eba3c01a5 Mon Sep 17 00:00:00 2001 From: lilinus Date: Wed, 11 Oct 2023 16:57:23 +0200 Subject: [PATCH 1/6] Use BigMul for 32x32=64 in decimal --- .../src/System/Decimal.DecCalc.cs | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs index bc107fb7549c6..6082de804153b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs @@ -184,20 +184,7 @@ private static ulong UInt32x32To64(uint a, uint b) private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result) { - ulong low = UInt32x32To64((uint)a, (uint)b); // lo partial prod - ulong mid = UInt32x32To64((uint)a, (uint)(b >> 32)); // mid 1 partial prod - ulong high = UInt32x32To64((uint)(a >> 32), (uint)(b >> 32)); - high += mid >> 32; - low += mid <<= 32; - if (low < mid) // test for carry - high++; - - mid = UInt32x32To64((uint)(a >> 32), (uint)b); - high += mid >> 32; - low += mid <<= 32; - if (low < mid) // test for carry - high++; - + ulong high = Math.BigMul(a, b, out ulong low); if (high > uint.MaxValue) Number.ThrowOverflowException(SR.Overflow_Decimal); result.Low64 = low; From fab4430b43d7f0c90210013f44664a80273a2b96 Mon Sep 17 00:00:00 2001 From: lilinus Date: Thu, 12 Oct 2023 15:32:20 +0200 Subject: [PATCH 2/6] remove UInt32x32To64 --- .../src/System/Decimal.DecCalc.cs | 103 +++++++++--------- 1 file changed, 49 insertions(+), 54 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs index 6082de804153b..1e9425a69ee77 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs @@ -177,11 +177,6 @@ private static unsafe uint GetExponent(double d) return (uint)(BitConverter.DoubleToUInt64Bits(d) >> 52) & 0x7FFu; } - private static ulong UInt32x32To64(uint a, uint b) - { - return (ulong)a * (ulong)b; - } - private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result) { ulong high = Math.BigMul(a, b, out ulong low); @@ -381,7 +376,7 @@ private static uint Div96By64(ref Buf12 bufNum, ulong den) // Compute full remainder, rem = dividend - (quo * divisor). // - ulong prod = UInt32x32To64(quo, (uint)den); // quo * lo divisor + ulong prod = quo * (den & uint.MaxValue); // quo * lo divisor num -= prod; if (num > ~prod) @@ -427,8 +422,8 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen) // Compute full remainder, rem = dividend - (quo * divisor). // - ulong prod1 = UInt32x32To64(quo, bufDen.U0); // quo * lo divisor - ulong prod2 = UInt32x32To64(quo, bufDen.U1); // quo * mid divisor + ulong prod1 = (ulong)quo * bufDen.U0; // quo * lo divisor + ulong prod2 = (ulong)quo * bufDen.U1; // quo * mid divisor prod2 += prod1 >> 32; prod1 = (uint)prod1 | (prod2 << 32); prod2 >>= 32; @@ -487,23 +482,23 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen) /// Returns highest 32 bits of product private static uint IncreaseScale(ref Buf12 bufNum, uint power) { - ulong tmp = UInt32x32To64(bufNum.U0, power); + ulong tmp = (ulong)bufNum.U0 * power; bufNum.U0 = (uint)tmp; tmp >>= 32; - tmp += UInt32x32To64(bufNum.U1, power); + tmp += (ulong)bufNum.U1 * power; bufNum.U1 = (uint)tmp; tmp >>= 32; - tmp += UInt32x32To64(bufNum.U2, power); + tmp += (ulong)bufNum.U2 * power; bufNum.U2 = (uint)tmp; return (uint)(tmp >> 32); } private static void IncreaseScale64(ref Buf12 bufNum, uint power) { - ulong tmp = UInt32x32To64(bufNum.U0, power); + ulong tmp = (ulong)bufNum.U0 * power; bufNum.U0 = (uint)tmp; tmp >>= 32; - tmp += UInt32x32To64(bufNum.U1, power); + tmp += (ulong)bufNum.U1 * power; bufNum.High64 = tmp; } @@ -921,11 +916,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) { if (scale <= MaxInt32Scale) { - low64 = UInt32x32To64((uint)low64, UInt32Powers10[scale]); + low64 = (low64 & uint.MaxValue) * UInt32Powers10[scale]; goto AlignedAdd; } scale -= MaxInt32Scale; - low64 = UInt32x32To64((uint)low64, TenToPowerNine); + low64 = (low64 & uint.MaxValue) * TenToPowerNine; } while (low64 <= uint.MaxValue); } @@ -934,8 +929,8 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) power = TenToPowerNine; if (scale < MaxInt32Scale) power = UInt32Powers10[scale]; - tmpLow = UInt32x32To64((uint)low64, power); - tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); + tmpLow = (low64 & uint.MaxValue) * power; + tmp64 = ((low64 >> 32) * power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp64 << 32); high = (uint)(tmp64 >> 32); if ((scale -= MaxInt32Scale) <= 0) @@ -950,11 +945,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) power = TenToPowerNine; if (scale < MaxInt32Scale) power = UInt32Powers10[scale]; - tmpLow = UInt32x32To64((uint)low64, power); - tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); + tmpLow = (low64 & uint.MaxValue) * power; + tmp64 = ((low64 >> 32) * power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp64 << 32); tmp64 >>= 32; - tmp64 += UInt32x32To64(high, power); + tmp64 += (ulong)high * power; scale -= MaxInt32Scale; if (tmp64 > uint.MaxValue) @@ -986,7 +981,7 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) for (uint cur = 0; ;) { Debug.Assert(cur < Buf24.Length); - tmp64 += UInt32x32To64(rgulNum[cur], power); + tmp64 += (ulong)rgulNum[cur] * power; rgulNum[cur] = (uint)tmp64; cur++; tmp64 >>= 32; @@ -1189,10 +1184,10 @@ internal static long VarCyFromDec(ref DecCalc pdecIn) if (pdecIn.High != 0) goto ThrowOverflow; uint pwr = UInt32Powers10[-scale]; - ulong high = UInt32x32To64(pwr, pdecIn.Mid); + ulong high = (ulong)pwr * pdecIn.Mid; if (high > uint.MaxValue) goto ThrowOverflow; - ulong low = UInt32x32To64(pwr, pdecIn.Low); + ulong low = (ulong)pwr * pdecIn.Low; low += high <<= 32; if (low < high) goto ThrowOverflow; @@ -1277,11 +1272,11 @@ private static int VarDecCmpSub(in decimal d1, in decimal d2) do { uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale]; - ulong tmpLow = UInt32x32To64((uint)low64, power); - ulong tmp = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); + ulong tmpLow = (low64 & uint.MaxValue) * power; + ulong tmp = ((low64 >> 32) * power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp << 32); tmp >>= 32; - tmp += UInt32x32To64(high, power); + tmp += (ulong)high * power; // If the scaled value has more than 96 significant bits then it's greater than d2 if (tmp > uint.MaxValue) return sign; @@ -1324,7 +1319,7 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) { // Upper 64 bits are zero. // - ulong low64 = UInt32x32To64(d1.Low, d2.Low); + ulong low64 = (ulong)d1.Low * d2.Low; if (scale > DEC_SCALE_MAX) { // Result scale is too big. Divide result by power of 10 to reduce it. @@ -1358,16 +1353,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else { // Left value is 32-bit, result fits in 4 uints - tmp = UInt32x32To64(d1.Low, d2.Low); + tmp = (ulong)d1.Low * d2.Low; bufProd.U0 = (uint)tmp; - tmp = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32); + tmp = ((ulong)d1.Low * d2.Mid) + (tmp >> 32); bufProd.U1 = (uint)tmp; tmp >>= 32; if (d2.High != 0) { - tmp += UInt32x32To64(d1.Low, d2.High); + tmp += (ulong)d1.Low * d2.High; if (tmp > uint.MaxValue) { bufProd.Mid64 = tmp; @@ -1382,16 +1377,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else if ((d2.High | d2.Mid) == 0) { // Right value is 32-bit, result fits in 4 uints - tmp = UInt32x32To64(d2.Low, d1.Low); + tmp = (ulong)d2.Low * d1.Low; bufProd.U0 = (uint)tmp; - tmp = UInt32x32To64(d2.Low, d1.Mid) + (tmp >> 32); + tmp = ((ulong)d2.Low * d1.Mid) + (tmp >> 32); bufProd.U1 = (uint)tmp; tmp >>= 32; if (d1.High != 0) { - tmp += UInt32x32To64(d2.Low, d1.High); + tmp += (ulong)d2.Low * d1.High; if (tmp > uint.MaxValue) { bufProd.Mid64 = tmp; @@ -1426,12 +1421,12 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) // [p-5][p-4][p-3][p-2][p-1][p-0] prod[] array // - tmp = UInt32x32To64(d1.Low, d2.Low); + tmp = (ulong)d1.Low * d2.Low; bufProd.U0 = (uint)tmp; - ulong tmp2 = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32); + ulong tmp2 = ((ulong)d1.Low * d2.Mid) + (tmp >> 32); - tmp = UInt32x32To64(d1.Mid, d2.Low); + tmp = (ulong)d1.Mid * d2.Low; tmp += tmp2; // this could generate carry bufProd.U1 = (uint)tmp; if (tmp < tmp2) // detect carry @@ -1439,39 +1434,39 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else tmp2 = tmp >> 32; - tmp = UInt32x32To64(d1.Mid, d2.Mid) + tmp2; + tmp = ((ulong)d1.Mid * d2.Mid) + tmp2; if ((d1.High | d2.High) > 0) { // Highest 32 bits is non-zero. Calculate 5 more partial products. // - tmp2 = UInt32x32To64(d1.Low, d2.High); + tmp2 = (ulong)d1.Low * d2.High; tmp += tmp2; // this could generate carry uint tmp3 = 0; if (tmp < tmp2) // detect carry tmp3 = 1; - tmp2 = UInt32x32To64(d1.High, d2.Low); + tmp2 = (ulong)d1.High * d2.Low; tmp += tmp2; // this could generate carry bufProd.U2 = (uint)tmp; if (tmp < tmp2) // detect carry tmp3++; tmp2 = ((ulong)tmp3 << 32) | (tmp >> 32); - tmp = UInt32x32To64(d1.Mid, d2.High); + tmp = (ulong)d1.Mid * d2.High; tmp += tmp2; // this could generate carry tmp3 = 0; if (tmp < tmp2) // detect carry tmp3 = 1; - tmp2 = UInt32x32To64(d1.High, d2.Mid); + tmp2 = (ulong)d1.High * d2.Mid; tmp += tmp2; // this could generate carry bufProd.U3 = (uint)tmp; if (tmp < tmp2) // detect carry tmp3++; tmp = ((ulong)tmp3 << 32) | (tmp >> 32); - bufProd.High64 = UInt32x32To64(d1.High, d2.High) + tmp; + bufProd.High64 = ((ulong)d1.High * d2.High) + tmp; hiProd = 5; } @@ -1594,7 +1589,7 @@ internal static void VarDecFromR4(float input, out DecCalc result) power = -power; if (power < 10) { - result.Low64 = UInt32x32To64(mant, UInt32Powers10[power]); + result.Low64 = (ulong)mant * UInt32Powers10[power]; } else { @@ -1602,14 +1597,14 @@ internal static void VarDecFromR4(float input, out DecCalc result) // if (power > 18) { - ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 18]); + ulong low64 = (ulong)mant * UInt32Powers10[power - 18]; UInt64x64To128(low64, TenToPowerEighteen, ref result); } else { - ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 9]); - ulong hi64 = UInt32x32To64(TenToPowerNine, (uint)(low64 >> 32)); - low64 = UInt32x32To64(TenToPowerNine, (uint)low64); + ulong low64 = (ulong)mant * UInt32Powers10[power - 9]; + ulong hi64 = TenToPowerNine * (low64 >> 32); + low64 = TenToPowerNine * (low64 & uint.MaxValue); result.Low = (uint)low64; hi64 += low64 >> 32; result.Mid = (uint)hi64; @@ -1762,8 +1757,8 @@ internal static void VarDecFromR8(double input, out DecCalc result) if (power < 10) { uint pow10 = UInt32Powers10[power]; - ulong low64 = UInt32x32To64((uint)mant, pow10); - ulong hi64 = UInt32x32To64((uint)(mant >> 32), pow10); + ulong low64 = (mant & uint.MaxValue) * pow10; + ulong hi64 = (mant >> 32) * pow10; result.Low = (uint)low64; hi64 += low64 >> 32; result.Mid = (uint)hi64; @@ -1967,7 +1962,7 @@ internal static unsafe void VarDecDiv(ref DecCalc d1, ref DecCalc d2) if (IncreaseScale(ref bufQuo, power) != 0) goto ThrowOverflow; - ulong num = UInt32x32To64(remainder, power); + ulong num = (ulong)remainder * power; // TODO: https://github.com/dotnet/runtime/issues/5213 uint div = (uint)(num / den); remainder = (uint)num - div * den; @@ -2200,7 +2195,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2) do { uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale]; - ulong tmp = UInt32x32To64(d2.Low, power); + ulong tmp = (ulong)d2.Low * power; d2.Low = (uint)tmp; tmp >>= 32; tmp += (d2.Mid + ((ulong)d2.High << 32)) * power; @@ -2227,7 +2222,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2) break; uint power = iCurScale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[iCurScale]; scale += iCurScale; - ulong tmp = UInt32x32To64(bufQuo.U0, power); + ulong tmp = (ulong)bufQuo.U0 * power; bufQuo.U0 = (uint)tmp; tmp >>= 32; bufQuo.High64 = tmp + bufQuo.High64 * power; @@ -2288,12 +2283,12 @@ private static unsafe void VarDecModFull(ref DecCalc d1, ref DecCalc d2, int sca { uint power = scale <= -MaxInt32Scale ? TenToPowerNine : UInt32Powers10[-scale]; uint* buf = (uint*)&b; - ulong tmp64 = UInt32x32To64(b.Buf24.U0, power); + ulong tmp64 = (ulong)b.Buf24.U0 * power; b.Buf24.U0 = (uint)tmp64; for (int i = 1; i <= high; i++) { tmp64 >>= 32; - tmp64 += UInt32x32To64(buf[i], power); + tmp64 += (ulong)buf[i] * power; buf[i] = (uint)tmp64; } // The high bit of the dividend must not be set. From 6f04db38ed0d081c029fc2e69b72348168417cb7 Mon Sep 17 00:00:00 2001 From: lilinus Date: Fri, 13 Oct 2023 09:28:20 +0200 Subject: [PATCH 3/6] Revert "remove UInt32x32To64" This reverts commit fab4430b43d7f0c90210013f44664a80273a2b96. --- .../src/System/Decimal.DecCalc.cs | 103 +++++++++--------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs index 1e9425a69ee77..6082de804153b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs @@ -177,6 +177,11 @@ private static unsafe uint GetExponent(double d) return (uint)(BitConverter.DoubleToUInt64Bits(d) >> 52) & 0x7FFu; } + private static ulong UInt32x32To64(uint a, uint b) + { + return (ulong)a * (ulong)b; + } + private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result) { ulong high = Math.BigMul(a, b, out ulong low); @@ -376,7 +381,7 @@ private static uint Div96By64(ref Buf12 bufNum, ulong den) // Compute full remainder, rem = dividend - (quo * divisor). // - ulong prod = quo * (den & uint.MaxValue); // quo * lo divisor + ulong prod = UInt32x32To64(quo, (uint)den); // quo * lo divisor num -= prod; if (num > ~prod) @@ -422,8 +427,8 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen) // Compute full remainder, rem = dividend - (quo * divisor). // - ulong prod1 = (ulong)quo * bufDen.U0; // quo * lo divisor - ulong prod2 = (ulong)quo * bufDen.U1; // quo * mid divisor + ulong prod1 = UInt32x32To64(quo, bufDen.U0); // quo * lo divisor + ulong prod2 = UInt32x32To64(quo, bufDen.U1); // quo * mid divisor prod2 += prod1 >> 32; prod1 = (uint)prod1 | (prod2 << 32); prod2 >>= 32; @@ -482,23 +487,23 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen) /// Returns highest 32 bits of product private static uint IncreaseScale(ref Buf12 bufNum, uint power) { - ulong tmp = (ulong)bufNum.U0 * power; + ulong tmp = UInt32x32To64(bufNum.U0, power); bufNum.U0 = (uint)tmp; tmp >>= 32; - tmp += (ulong)bufNum.U1 * power; + tmp += UInt32x32To64(bufNum.U1, power); bufNum.U1 = (uint)tmp; tmp >>= 32; - tmp += (ulong)bufNum.U2 * power; + tmp += UInt32x32To64(bufNum.U2, power); bufNum.U2 = (uint)tmp; return (uint)(tmp >> 32); } private static void IncreaseScale64(ref Buf12 bufNum, uint power) { - ulong tmp = (ulong)bufNum.U0 * power; + ulong tmp = UInt32x32To64(bufNum.U0, power); bufNum.U0 = (uint)tmp; tmp >>= 32; - tmp += (ulong)bufNum.U1 * power; + tmp += UInt32x32To64(bufNum.U1, power); bufNum.High64 = tmp; } @@ -916,11 +921,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) { if (scale <= MaxInt32Scale) { - low64 = (low64 & uint.MaxValue) * UInt32Powers10[scale]; + low64 = UInt32x32To64((uint)low64, UInt32Powers10[scale]); goto AlignedAdd; } scale -= MaxInt32Scale; - low64 = (low64 & uint.MaxValue) * TenToPowerNine; + low64 = UInt32x32To64((uint)low64, TenToPowerNine); } while (low64 <= uint.MaxValue); } @@ -929,8 +934,8 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) power = TenToPowerNine; if (scale < MaxInt32Scale) power = UInt32Powers10[scale]; - tmpLow = (low64 & uint.MaxValue) * power; - tmp64 = ((low64 >> 32) * power) + (tmpLow >> 32); + tmpLow = UInt32x32To64((uint)low64, power); + tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp64 << 32); high = (uint)(tmp64 >> 32); if ((scale -= MaxInt32Scale) <= 0) @@ -945,11 +950,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) power = TenToPowerNine; if (scale < MaxInt32Scale) power = UInt32Powers10[scale]; - tmpLow = (low64 & uint.MaxValue) * power; - tmp64 = ((low64 >> 32) * power) + (tmpLow >> 32); + tmpLow = UInt32x32To64((uint)low64, power); + tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp64 << 32); tmp64 >>= 32; - tmp64 += (ulong)high * power; + tmp64 += UInt32x32To64(high, power); scale -= MaxInt32Scale; if (tmp64 > uint.MaxValue) @@ -981,7 +986,7 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) for (uint cur = 0; ;) { Debug.Assert(cur < Buf24.Length); - tmp64 += (ulong)rgulNum[cur] * power; + tmp64 += UInt32x32To64(rgulNum[cur], power); rgulNum[cur] = (uint)tmp64; cur++; tmp64 >>= 32; @@ -1184,10 +1189,10 @@ internal static long VarCyFromDec(ref DecCalc pdecIn) if (pdecIn.High != 0) goto ThrowOverflow; uint pwr = UInt32Powers10[-scale]; - ulong high = (ulong)pwr * pdecIn.Mid; + ulong high = UInt32x32To64(pwr, pdecIn.Mid); if (high > uint.MaxValue) goto ThrowOverflow; - ulong low = (ulong)pwr * pdecIn.Low; + ulong low = UInt32x32To64(pwr, pdecIn.Low); low += high <<= 32; if (low < high) goto ThrowOverflow; @@ -1272,11 +1277,11 @@ private static int VarDecCmpSub(in decimal d1, in decimal d2) do { uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale]; - ulong tmpLow = (low64 & uint.MaxValue) * power; - ulong tmp = ((low64 >> 32) * power) + (tmpLow >> 32); + ulong tmpLow = UInt32x32To64((uint)low64, power); + ulong tmp = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp << 32); tmp >>= 32; - tmp += (ulong)high * power; + tmp += UInt32x32To64(high, power); // If the scaled value has more than 96 significant bits then it's greater than d2 if (tmp > uint.MaxValue) return sign; @@ -1319,7 +1324,7 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) { // Upper 64 bits are zero. // - ulong low64 = (ulong)d1.Low * d2.Low; + ulong low64 = UInt32x32To64(d1.Low, d2.Low); if (scale > DEC_SCALE_MAX) { // Result scale is too big. Divide result by power of 10 to reduce it. @@ -1353,16 +1358,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else { // Left value is 32-bit, result fits in 4 uints - tmp = (ulong)d1.Low * d2.Low; + tmp = UInt32x32To64(d1.Low, d2.Low); bufProd.U0 = (uint)tmp; - tmp = ((ulong)d1.Low * d2.Mid) + (tmp >> 32); + tmp = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32); bufProd.U1 = (uint)tmp; tmp >>= 32; if (d2.High != 0) { - tmp += (ulong)d1.Low * d2.High; + tmp += UInt32x32To64(d1.Low, d2.High); if (tmp > uint.MaxValue) { bufProd.Mid64 = tmp; @@ -1377,16 +1382,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else if ((d2.High | d2.Mid) == 0) { // Right value is 32-bit, result fits in 4 uints - tmp = (ulong)d2.Low * d1.Low; + tmp = UInt32x32To64(d2.Low, d1.Low); bufProd.U0 = (uint)tmp; - tmp = ((ulong)d2.Low * d1.Mid) + (tmp >> 32); + tmp = UInt32x32To64(d2.Low, d1.Mid) + (tmp >> 32); bufProd.U1 = (uint)tmp; tmp >>= 32; if (d1.High != 0) { - tmp += (ulong)d2.Low * d1.High; + tmp += UInt32x32To64(d2.Low, d1.High); if (tmp > uint.MaxValue) { bufProd.Mid64 = tmp; @@ -1421,12 +1426,12 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) // [p-5][p-4][p-3][p-2][p-1][p-0] prod[] array // - tmp = (ulong)d1.Low * d2.Low; + tmp = UInt32x32To64(d1.Low, d2.Low); bufProd.U0 = (uint)tmp; - ulong tmp2 = ((ulong)d1.Low * d2.Mid) + (tmp >> 32); + ulong tmp2 = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32); - tmp = (ulong)d1.Mid * d2.Low; + tmp = UInt32x32To64(d1.Mid, d2.Low); tmp += tmp2; // this could generate carry bufProd.U1 = (uint)tmp; if (tmp < tmp2) // detect carry @@ -1434,39 +1439,39 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else tmp2 = tmp >> 32; - tmp = ((ulong)d1.Mid * d2.Mid) + tmp2; + tmp = UInt32x32To64(d1.Mid, d2.Mid) + tmp2; if ((d1.High | d2.High) > 0) { // Highest 32 bits is non-zero. Calculate 5 more partial products. // - tmp2 = (ulong)d1.Low * d2.High; + tmp2 = UInt32x32To64(d1.Low, d2.High); tmp += tmp2; // this could generate carry uint tmp3 = 0; if (tmp < tmp2) // detect carry tmp3 = 1; - tmp2 = (ulong)d1.High * d2.Low; + tmp2 = UInt32x32To64(d1.High, d2.Low); tmp += tmp2; // this could generate carry bufProd.U2 = (uint)tmp; if (tmp < tmp2) // detect carry tmp3++; tmp2 = ((ulong)tmp3 << 32) | (tmp >> 32); - tmp = (ulong)d1.Mid * d2.High; + tmp = UInt32x32To64(d1.Mid, d2.High); tmp += tmp2; // this could generate carry tmp3 = 0; if (tmp < tmp2) // detect carry tmp3 = 1; - tmp2 = (ulong)d1.High * d2.Mid; + tmp2 = UInt32x32To64(d1.High, d2.Mid); tmp += tmp2; // this could generate carry bufProd.U3 = (uint)tmp; if (tmp < tmp2) // detect carry tmp3++; tmp = ((ulong)tmp3 << 32) | (tmp >> 32); - bufProd.High64 = ((ulong)d1.High * d2.High) + tmp; + bufProd.High64 = UInt32x32To64(d1.High, d2.High) + tmp; hiProd = 5; } @@ -1589,7 +1594,7 @@ internal static void VarDecFromR4(float input, out DecCalc result) power = -power; if (power < 10) { - result.Low64 = (ulong)mant * UInt32Powers10[power]; + result.Low64 = UInt32x32To64(mant, UInt32Powers10[power]); } else { @@ -1597,14 +1602,14 @@ internal static void VarDecFromR4(float input, out DecCalc result) // if (power > 18) { - ulong low64 = (ulong)mant * UInt32Powers10[power - 18]; + ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 18]); UInt64x64To128(low64, TenToPowerEighteen, ref result); } else { - ulong low64 = (ulong)mant * UInt32Powers10[power - 9]; - ulong hi64 = TenToPowerNine * (low64 >> 32); - low64 = TenToPowerNine * (low64 & uint.MaxValue); + ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 9]); + ulong hi64 = UInt32x32To64(TenToPowerNine, (uint)(low64 >> 32)); + low64 = UInt32x32To64(TenToPowerNine, (uint)low64); result.Low = (uint)low64; hi64 += low64 >> 32; result.Mid = (uint)hi64; @@ -1757,8 +1762,8 @@ internal static void VarDecFromR8(double input, out DecCalc result) if (power < 10) { uint pow10 = UInt32Powers10[power]; - ulong low64 = (mant & uint.MaxValue) * pow10; - ulong hi64 = (mant >> 32) * pow10; + ulong low64 = UInt32x32To64((uint)mant, pow10); + ulong hi64 = UInt32x32To64((uint)(mant >> 32), pow10); result.Low = (uint)low64; hi64 += low64 >> 32; result.Mid = (uint)hi64; @@ -1962,7 +1967,7 @@ internal static unsafe void VarDecDiv(ref DecCalc d1, ref DecCalc d2) if (IncreaseScale(ref bufQuo, power) != 0) goto ThrowOverflow; - ulong num = (ulong)remainder * power; + ulong num = UInt32x32To64(remainder, power); // TODO: https://github.com/dotnet/runtime/issues/5213 uint div = (uint)(num / den); remainder = (uint)num - div * den; @@ -2195,7 +2200,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2) do { uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale]; - ulong tmp = (ulong)d2.Low * power; + ulong tmp = UInt32x32To64(d2.Low, power); d2.Low = (uint)tmp; tmp >>= 32; tmp += (d2.Mid + ((ulong)d2.High << 32)) * power; @@ -2222,7 +2227,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2) break; uint power = iCurScale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[iCurScale]; scale += iCurScale; - ulong tmp = (ulong)bufQuo.U0 * power; + ulong tmp = UInt32x32To64(bufQuo.U0, power); bufQuo.U0 = (uint)tmp; tmp >>= 32; bufQuo.High64 = tmp + bufQuo.High64 * power; @@ -2283,12 +2288,12 @@ private static unsafe void VarDecModFull(ref DecCalc d1, ref DecCalc d2, int sca { uint power = scale <= -MaxInt32Scale ? TenToPowerNine : UInt32Powers10[-scale]; uint* buf = (uint*)&b; - ulong tmp64 = (ulong)b.Buf24.U0 * power; + ulong tmp64 = UInt32x32To64(b.Buf24.U0, power); b.Buf24.U0 = (uint)tmp64; for (int i = 1; i <= high; i++) { tmp64 >>= 32; - tmp64 += (ulong)buf[i] * power; + tmp64 += UInt32x32To64(buf[i], power); buf[i] = (uint)tmp64; } // The high bit of the dividend must not be set. From 05729880a6e3f0f34ee354d552fade055acd38f4 Mon Sep 17 00:00:00 2001 From: lilinus Date: Fri, 13 Oct 2023 09:56:27 +0200 Subject: [PATCH 4/6] Add internal Math.BigMul(uint, uint) --- .../src/System/Decimal.DecCalc.cs | 103 +++++++++--------- .../System.Private.CoreLib/src/System/Math.cs | 5 + 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs index 6082de804153b..f91f82d31358d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.DecCalc.cs @@ -177,11 +177,6 @@ private static unsafe uint GetExponent(double d) return (uint)(BitConverter.DoubleToUInt64Bits(d) >> 52) & 0x7FFu; } - private static ulong UInt32x32To64(uint a, uint b) - { - return (ulong)a * (ulong)b; - } - private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result) { ulong high = Math.BigMul(a, b, out ulong low); @@ -381,7 +376,7 @@ private static uint Div96By64(ref Buf12 bufNum, ulong den) // Compute full remainder, rem = dividend - (quo * divisor). // - ulong prod = UInt32x32To64(quo, (uint)den); // quo * lo divisor + ulong prod = Math.BigMul(quo, (uint)den); // quo * lo divisor num -= prod; if (num > ~prod) @@ -427,8 +422,8 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen) // Compute full remainder, rem = dividend - (quo * divisor). // - ulong prod1 = UInt32x32To64(quo, bufDen.U0); // quo * lo divisor - ulong prod2 = UInt32x32To64(quo, bufDen.U1); // quo * mid divisor + ulong prod1 = Math.BigMul(quo, bufDen.U0); // quo * lo divisor + ulong prod2 = Math.BigMul(quo, bufDen.U1); // quo * mid divisor prod2 += prod1 >> 32; prod1 = (uint)prod1 | (prod2 << 32); prod2 >>= 32; @@ -487,23 +482,23 @@ private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen) /// Returns highest 32 bits of product private static uint IncreaseScale(ref Buf12 bufNum, uint power) { - ulong tmp = UInt32x32To64(bufNum.U0, power); + ulong tmp = Math.BigMul(bufNum.U0, power); bufNum.U0 = (uint)tmp; tmp >>= 32; - tmp += UInt32x32To64(bufNum.U1, power); + tmp += Math.BigMul(bufNum.U1, power); bufNum.U1 = (uint)tmp; tmp >>= 32; - tmp += UInt32x32To64(bufNum.U2, power); + tmp += Math.BigMul(bufNum.U2, power); bufNum.U2 = (uint)tmp; return (uint)(tmp >> 32); } private static void IncreaseScale64(ref Buf12 bufNum, uint power) { - ulong tmp = UInt32x32To64(bufNum.U0, power); + ulong tmp = Math.BigMul(bufNum.U0, power); bufNum.U0 = (uint)tmp; tmp >>= 32; - tmp += UInt32x32To64(bufNum.U1, power); + tmp += Math.BigMul(bufNum.U1, power); bufNum.High64 = tmp; } @@ -921,11 +916,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) { if (scale <= MaxInt32Scale) { - low64 = UInt32x32To64((uint)low64, UInt32Powers10[scale]); + low64 = Math.BigMul((uint)low64, UInt32Powers10[scale]); goto AlignedAdd; } scale -= MaxInt32Scale; - low64 = UInt32x32To64((uint)low64, TenToPowerNine); + low64 = Math.BigMul((uint)low64, TenToPowerNine); } while (low64 <= uint.MaxValue); } @@ -934,8 +929,8 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) power = TenToPowerNine; if (scale < MaxInt32Scale) power = UInt32Powers10[scale]; - tmpLow = UInt32x32To64((uint)low64, power); - tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); + tmpLow = Math.BigMul((uint)low64, power); + tmp64 = Math.BigMul((uint)(low64 >> 32), power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp64 << 32); high = (uint)(tmp64 >> 32); if ((scale -= MaxInt32Scale) <= 0) @@ -950,11 +945,11 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) power = TenToPowerNine; if (scale < MaxInt32Scale) power = UInt32Powers10[scale]; - tmpLow = UInt32x32To64((uint)low64, power); - tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); + tmpLow = Math.BigMul((uint)low64, power); + tmp64 = Math.BigMul((uint)(low64 >> 32), power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp64 << 32); tmp64 >>= 32; - tmp64 += UInt32x32To64(high, power); + tmp64 += Math.BigMul(high, power); scale -= MaxInt32Scale; if (tmp64 > uint.MaxValue) @@ -986,7 +981,7 @@ internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign) for (uint cur = 0; ;) { Debug.Assert(cur < Buf24.Length); - tmp64 += UInt32x32To64(rgulNum[cur], power); + tmp64 += Math.BigMul(rgulNum[cur], power); rgulNum[cur] = (uint)tmp64; cur++; tmp64 >>= 32; @@ -1189,10 +1184,10 @@ internal static long VarCyFromDec(ref DecCalc pdecIn) if (pdecIn.High != 0) goto ThrowOverflow; uint pwr = UInt32Powers10[-scale]; - ulong high = UInt32x32To64(pwr, pdecIn.Mid); + ulong high = Math.BigMul(pwr, pdecIn.Mid); if (high > uint.MaxValue) goto ThrowOverflow; - ulong low = UInt32x32To64(pwr, pdecIn.Low); + ulong low = Math.BigMul(pwr, pdecIn.Low); low += high <<= 32; if (low < high) goto ThrowOverflow; @@ -1277,11 +1272,11 @@ private static int VarDecCmpSub(in decimal d1, in decimal d2) do { uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale]; - ulong tmpLow = UInt32x32To64((uint)low64, power); - ulong tmp = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32); + ulong tmpLow = Math.BigMul((uint)low64, power); + ulong tmp = Math.BigMul((uint)(low64 >> 32), power) + (tmpLow >> 32); low64 = (uint)tmpLow + (tmp << 32); tmp >>= 32; - tmp += UInt32x32To64(high, power); + tmp += Math.BigMul(high, power); // If the scaled value has more than 96 significant bits then it's greater than d2 if (tmp > uint.MaxValue) return sign; @@ -1324,7 +1319,7 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) { // Upper 64 bits are zero. // - ulong low64 = UInt32x32To64(d1.Low, d2.Low); + ulong low64 = Math.BigMul(d1.Low, d2.Low); if (scale > DEC_SCALE_MAX) { // Result scale is too big. Divide result by power of 10 to reduce it. @@ -1358,16 +1353,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else { // Left value is 32-bit, result fits in 4 uints - tmp = UInt32x32To64(d1.Low, d2.Low); + tmp = Math.BigMul(d1.Low, d2.Low); bufProd.U0 = (uint)tmp; - tmp = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32); + tmp = Math.BigMul(d1.Low, d2.Mid) + (tmp >> 32); bufProd.U1 = (uint)tmp; tmp >>= 32; if (d2.High != 0) { - tmp += UInt32x32To64(d1.Low, d2.High); + tmp += Math.BigMul(d1.Low, d2.High); if (tmp > uint.MaxValue) { bufProd.Mid64 = tmp; @@ -1382,16 +1377,16 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else if ((d2.High | d2.Mid) == 0) { // Right value is 32-bit, result fits in 4 uints - tmp = UInt32x32To64(d2.Low, d1.Low); + tmp = Math.BigMul(d2.Low, d1.Low); bufProd.U0 = (uint)tmp; - tmp = UInt32x32To64(d2.Low, d1.Mid) + (tmp >> 32); + tmp = Math.BigMul(d2.Low, d1.Mid) + (tmp >> 32); bufProd.U1 = (uint)tmp; tmp >>= 32; if (d1.High != 0) { - tmp += UInt32x32To64(d2.Low, d1.High); + tmp += Math.BigMul(d2.Low, d1.High); if (tmp > uint.MaxValue) { bufProd.Mid64 = tmp; @@ -1426,12 +1421,12 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) // [p-5][p-4][p-3][p-2][p-1][p-0] prod[] array // - tmp = UInt32x32To64(d1.Low, d2.Low); + tmp = Math.BigMul(d1.Low, d2.Low); bufProd.U0 = (uint)tmp; - ulong tmp2 = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32); + ulong tmp2 = Math.BigMul(d1.Low, d2.Mid) + (tmp >> 32); - tmp = UInt32x32To64(d1.Mid, d2.Low); + tmp = Math.BigMul(d1.Mid, d2.Low); tmp += tmp2; // this could generate carry bufProd.U1 = (uint)tmp; if (tmp < tmp2) // detect carry @@ -1439,39 +1434,39 @@ internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2) else tmp2 = tmp >> 32; - tmp = UInt32x32To64(d1.Mid, d2.Mid) + tmp2; + tmp = Math.BigMul(d1.Mid, d2.Mid) + tmp2; if ((d1.High | d2.High) > 0) { // Highest 32 bits is non-zero. Calculate 5 more partial products. // - tmp2 = UInt32x32To64(d1.Low, d2.High); + tmp2 = Math.BigMul(d1.Low, d2.High); tmp += tmp2; // this could generate carry uint tmp3 = 0; if (tmp < tmp2) // detect carry tmp3 = 1; - tmp2 = UInt32x32To64(d1.High, d2.Low); + tmp2 = Math.BigMul(d1.High, d2.Low); tmp += tmp2; // this could generate carry bufProd.U2 = (uint)tmp; if (tmp < tmp2) // detect carry tmp3++; tmp2 = ((ulong)tmp3 << 32) | (tmp >> 32); - tmp = UInt32x32To64(d1.Mid, d2.High); + tmp = Math.BigMul(d1.Mid, d2.High); tmp += tmp2; // this could generate carry tmp3 = 0; if (tmp < tmp2) // detect carry tmp3 = 1; - tmp2 = UInt32x32To64(d1.High, d2.Mid); + tmp2 = Math.BigMul(d1.High, d2.Mid); tmp += tmp2; // this could generate carry bufProd.U3 = (uint)tmp; if (tmp < tmp2) // detect carry tmp3++; tmp = ((ulong)tmp3 << 32) | (tmp >> 32); - bufProd.High64 = UInt32x32To64(d1.High, d2.High) + tmp; + bufProd.High64 = Math.BigMul(d1.High, d2.High) + tmp; hiProd = 5; } @@ -1594,7 +1589,7 @@ internal static void VarDecFromR4(float input, out DecCalc result) power = -power; if (power < 10) { - result.Low64 = UInt32x32To64(mant, UInt32Powers10[power]); + result.Low64 = Math.BigMul(mant, UInt32Powers10[power]); } else { @@ -1602,14 +1597,14 @@ internal static void VarDecFromR4(float input, out DecCalc result) // if (power > 18) { - ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 18]); + ulong low64 = Math.BigMul(mant, UInt32Powers10[power - 18]); UInt64x64To128(low64, TenToPowerEighteen, ref result); } else { - ulong low64 = UInt32x32To64(mant, UInt32Powers10[power - 9]); - ulong hi64 = UInt32x32To64(TenToPowerNine, (uint)(low64 >> 32)); - low64 = UInt32x32To64(TenToPowerNine, (uint)low64); + ulong low64 = Math.BigMul(mant, UInt32Powers10[power - 9]); + ulong hi64 = Math.BigMul(TenToPowerNine, (uint)(low64 >> 32)); + low64 = Math.BigMul(TenToPowerNine, (uint)low64); result.Low = (uint)low64; hi64 += low64 >> 32; result.Mid = (uint)hi64; @@ -1762,8 +1757,8 @@ internal static void VarDecFromR8(double input, out DecCalc result) if (power < 10) { uint pow10 = UInt32Powers10[power]; - ulong low64 = UInt32x32To64((uint)mant, pow10); - ulong hi64 = UInt32x32To64((uint)(mant >> 32), pow10); + ulong low64 = Math.BigMul((uint)mant, pow10); + ulong hi64 = Math.BigMul((uint)(mant >> 32), pow10); result.Low = (uint)low64; hi64 += low64 >> 32; result.Mid = (uint)hi64; @@ -1967,7 +1962,7 @@ internal static unsafe void VarDecDiv(ref DecCalc d1, ref DecCalc d2) if (IncreaseScale(ref bufQuo, power) != 0) goto ThrowOverflow; - ulong num = UInt32x32To64(remainder, power); + ulong num = Math.BigMul(remainder, power); // TODO: https://github.com/dotnet/runtime/issues/5213 uint div = (uint)(num / den); remainder = (uint)num - div * den; @@ -2200,7 +2195,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2) do { uint power = scale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[scale]; - ulong tmp = UInt32x32To64(d2.Low, power); + ulong tmp = Math.BigMul(d2.Low, power); d2.Low = (uint)tmp; tmp >>= 32; tmp += (d2.Mid + ((ulong)d2.High << 32)) * power; @@ -2227,7 +2222,7 @@ internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2) break; uint power = iCurScale >= MaxInt32Scale ? TenToPowerNine : UInt32Powers10[iCurScale]; scale += iCurScale; - ulong tmp = UInt32x32To64(bufQuo.U0, power); + ulong tmp = Math.BigMul(bufQuo.U0, power); bufQuo.U0 = (uint)tmp; tmp >>= 32; bufQuo.High64 = tmp + bufQuo.High64 * power; @@ -2288,12 +2283,12 @@ private static unsafe void VarDecModFull(ref DecCalc d1, ref DecCalc d2, int sca { uint power = scale <= -MaxInt32Scale ? TenToPowerNine : UInt32Powers10[-scale]; uint* buf = (uint*)&b; - ulong tmp64 = UInt32x32To64(b.Buf24.U0, power); + ulong tmp64 = Math.BigMul(b.Buf24.U0, power); b.Buf24.U0 = (uint)tmp64; for (int i = 1; i <= high; i++) { tmp64 >>= 32; - tmp64 += UInt32x32To64(buf[i], power); + tmp64 += Math.BigMul(buf[i], power); buf[i] = (uint)tmp64; } // The high bit of the dividend must not be set. diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index b3904a946e51a..5fcf2ed694fcb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -155,6 +155,11 @@ internal static void ThrowNegateTwosCompOverflow() throw new OverflowException(SR.Overflow_NegateTwosCompNum); } + internal static ulong BigMul(uint a, uint b) + { + return ((ulong)a) * b; + } + public static long BigMul(int a, int b) { return ((long)a) * b; From 495a8e5766fe13aff4df5211d80d88017c8231db Mon Sep 17 00:00:00 2001 From: lilinus Date: Fri, 13 Oct 2023 11:04:36 +0200 Subject: [PATCH 5/6] Use x86 intrinsic in Math.BugMul(uint, uint) for 32 bit --- .../System.Private.CoreLib/src/System/Math.cs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Math.cs b/src/libraries/System.Private.CoreLib/src/System/Math.cs index 5fcf2ed694fcb..53e96f11c7ca4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Math.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Math.cs @@ -155,8 +155,16 @@ internal static void ThrowNegateTwosCompOverflow() throw new OverflowException(SR.Overflow_NegateTwosCompNum); } - internal static ulong BigMul(uint a, uint b) + internal static unsafe ulong BigMul(uint a, uint b) { +#if TARGET_32BIT + if (Bmi2.IsSupported) + { + uint low; + uint high = Bmi2.MultiplyNoFlags(a, b, &low); + return ((ulong)high << 32) | low; + } +#endif return ((ulong)a) * b; } From 8c3f5ca0caf38ed9fa88feb2820ec5e6ed5e2f61 Mon Sep 17 00:00:00 2001 From: lilinus Date: Tue, 17 Oct 2023 16:28:52 +0200 Subject: [PATCH 6/6] Use unsigned Math.BigMul --- src/libraries/System.Private.CoreLib/src/System/DateTime.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/DateTime.cs b/src/libraries/System.Private.CoreLib/src/System/DateTime.cs index 3571fff343152..39a6e35a22f52 100644 --- a/src/libraries/System.Private.CoreLib/src/System/DateTime.cs +++ b/src/libraries/System.Private.CoreLib/src/System/DateTime.cs @@ -1385,7 +1385,7 @@ internal void GetDate(out int year, out int month, out int day) // y100 = number of whole 100-year periods since 3/1/0000 // r1 = (day number within 100-year period) * 4 (uint y100, uint r1) = Math.DivRem(((uint)(UTicks / TicksPer6Hours) | 3U) + 1224, DaysPer400Years); - ulong u2 = (ulong)Math.BigMul((int)EafMultiplier, (int)r1 | 3); + ulong u2 = Math.BigMul(EafMultiplier, r1 | 3U); ushort daySinceMarch1 = (ushort)((uint)u2 / EafDivider); int n3 = 2141 * daySinceMarch1 + 197913; year = (int)(100 * y100 + (uint)(u2 >> 32)); @@ -1447,7 +1447,7 @@ public int Day { // r1 = (day number within 100-year period) * 4 uint r1 = (((uint)(UTicks / TicksPer6Hours) | 3U) + 1224) % DaysPer400Years; - ulong u2 = (ulong)Math.BigMul((int)EafMultiplier, (int)r1 | 3); + ulong u2 = Math.BigMul(EafMultiplier, r1 | 3U); ushort daySinceMarch1 = (ushort)((uint)u2 / EafDivider); int n3 = 2141 * daySinceMarch1 + 197913; // Return 1-based day-of-month @@ -1524,7 +1524,7 @@ public int Month { // r1 = (day number within 100-year period) * 4 uint r1 = (((uint)(UTicks / TicksPer6Hours) | 3U) + 1224) % DaysPer400Years; - ulong u2 = (ulong)Math.BigMul((int)EafMultiplier, (int)r1 | 3); + ulong u2 = Math.BigMul(EafMultiplier, r1 | 3U); ushort daySinceMarch1 = (ushort)((uint)u2 / EafDivider); int n3 = 2141 * daySinceMarch1 + 197913; return (ushort)(n3 >> 16) - (daySinceMarch1 >= March1BasedDayOfNewYear ? 12 : 0);