Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Format/Parse binary from/to BigInteger #85392

Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
3c127ea
Initial draft commit: add FormatBigIntegerToBin().
lateapexearlyspeed Apr 26, 2023
64a0ead
Fix comment: use '?:' to assign ValueStringBuilder variable to make i…
lateapexearlyspeed Apr 28, 2023
4216498
Refine FormatBigIntegerToBin(); and consider chars overflow scenario.
lateapexearlyspeed Apr 29, 2023
1bcf7f8
Update Format code for final binary format definition.
lateapexearlyspeed May 15, 2023
63923da
Refine FormatBigIntegerToBin().
lateapexearlyspeed May 23, 2023
19c701d
consider case where output is span
lateapexearlyspeed May 25, 2023
56e701f
Turn to use try..finally to return array pool.
lateapexearlyspeed May 27, 2023
817c58c
Initial add method BinNumberToBigInteger().
lateapexearlyspeed May 27, 2023
24d88c7
Update FormatProvider.Number.cs to support AllowBinarySpecifier.
lateapexearlyspeed May 30, 2023
5a90e15
Use BinNumberToBigInteger().
lateapexearlyspeed May 30, 2023
3987f71
Add tests of Format.
lateapexearlyspeed May 30, 2023
8b58eb7
Add tests of Parse().
lateapexearlyspeed Jun 1, 2023
6cea91a
Improve Format(): use ValueStringBuilder just as wrapper for destinat…
lateapexearlyspeed Jun 1, 2023
3007b48
Fix comment: use ch == '0' || ch == '1'
lateapexearlyspeed Jun 2, 2023
cd0a03d
Fix comment: refactor ParseNumber() to extract common abstract operat…
lateapexearlyspeed Aug 25, 2023
f22d2e9
Fix comment: refine naming; make BinNumberToBigInteger() general patt…
lateapexearlyspeed Sep 1, 2023
17434aa
Fix comment: use internal 'kcbitUint'.
lateapexearlyspeed Sep 15, 2023
bdfddf5
Fix comment: rename 'Bin' method names to 'Binary' ones; remove unnec…
lateapexearlyspeed Oct 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,54 @@ private static unsafe bool AllowHyphenDuringParsing(NumberFormatInfo info)
return ret;
}

private interface IDigitValidator
{
static abstract bool IsValidChar(char c);
static abstract bool IsHexBinary();
}

private readonly struct IntegerDigitValidator : IDigitValidator
{
public static bool IsValidChar(char c) => char.IsAsciiDigit(c);

public static bool IsHexBinary() => false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: IsHexOrBinary

-or- maybe IsHexOrBinaryParser and rename IDigitValidator to IDigitParser, to more closely match the other.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Longer term I'd like to see this move even closer to the IHexOrBinaryParser logic we have in corelib, but that can be done in a future PR and isn't necessary as part of this.

Having the names and general logic mostly start to line up helps with that, however.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks.

}

private readonly struct HexDigitValidator : IDigitValidator
{
public static bool IsValidChar(char c) => char.IsAsciiHexDigit(c);

public static bool IsHexBinary() => true;
}

private readonly struct BinaryDigitValidator : IDigitValidator
{
public static bool IsValidChar(char c)
{
return c is '0' or '1';
}

public static bool IsHexBinary() => true;
}


private static unsafe bool ParseNumber(ref char* str, char* strEnd, NumberStyles options, scoped ref NumberBuffer number, StringBuilder? sb, NumberFormatInfo numfmt, bool parseDecimal)
{
if ((options & NumberStyles.AllowHexSpecifier) != 0)
{
return ParseNumberStyle<HexDigitValidator>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

if ((options & NumberStyles.AllowBinarySpecifier) != 0)
{
return ParseNumberStyle<BinaryDigitValidator>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

return ParseNumberStyle<IntegerDigitValidator>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

private static unsafe bool ParseNumberStyle<TDigitValidator>(ref char* str, char* strEnd, NumberStyles options, scoped ref NumberBuffer number, StringBuilder? sb, NumberFormatInfo numfmt, bool parseDecimal)
where TDigitValidator : struct, IDigitValidator
{
Debug.Assert(str != null);
Debug.Assert(strEnd != null);
Expand Down Expand Up @@ -440,11 +487,11 @@ private static unsafe bool ParseNumber(ref char* str, char* strEnd, NumberStyles
int digEnd = 0;
while (true)
{
if (char.IsAsciiDigit(ch) || (((options & NumberStyles.AllowHexSpecifier) != 0) && char.IsBetween((char)(ch | 0x20), 'a', 'f')))
if (TDigitValidator.IsValidChar(ch))
{
state |= StateDigits;

if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && ((options & NumberStyles.AllowHexSpecifier) != 0)))
if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && TDigitValidator.IsHexBinary()))
{
if (digCount < maxParseDigits)
{
Expand Down
202 changes: 198 additions & 4 deletions src/libraries/System.Runtime.Numerics/src/System/Numerics/BigNumber.cs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ internal static class BigNumber
| NumberStyles.AllowLeadingSign | NumberStyles.AllowTrailingSign
| NumberStyles.AllowParentheses | NumberStyles.AllowDecimalPoint
| NumberStyles.AllowThousands | NumberStyles.AllowExponent
| NumberStyles.AllowCurrencySymbol | NumberStyles.AllowHexSpecifier);
| NumberStyles.AllowCurrencySymbol | NumberStyles.AllowHexSpecifier
| NumberStyles.AllowBinarySpecifier);

private static ReadOnlySpan<uint> UInt32PowersOfTen => new uint[] { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };

Expand Down Expand Up @@ -371,10 +372,13 @@ internal static ParsingStatus TryParseBigInteger(ReadOnlySpan<char> value, Numbe
{
return HexNumberToBigInteger(ref bigNumber, out result);
}
else

if ((style & NumberStyles.AllowBinarySpecifier) != 0)
{
return NumberToBigInteger(ref bigNumber, out result);
return BinNumberToBigInteger(ref bigNumber, out result);
}

return NumberToBigInteger(ref bigNumber, out result);
}

internal static BigInteger ParseBigInteger(string value, NumberStyles style, NumberFormatInfo info)
Expand Down Expand Up @@ -511,6 +515,96 @@ private static ParsingStatus HexNumberToBigInteger(ref BigNumberBuffer number, o
}
}

private static ParsingStatus BinNumberToBigInteger(ref BigNumberBuffer number, out BigInteger result)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: can we expand "Bin" to "Binary" in the name? I realize this is likely an attempt to match the conciseness of "Hex", but "Bin" and "Big" are so close that this keeps making me do a double-take to know which it was.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed method names, also for other 'Bin' related ones.

{
if (number.digits is null || number.digits.Length < 2)
{
result = default;
return ParsingStatus.Failed;
}

const int DigitsPerBlock = 32;
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

int totalDigitCount = number.digits.Length - 1; // Ignore trailing '\0'

int blockCount = Math.DivRem(totalDigitCount, DigitsPerBlock, out int remainingDigitsInBlock);
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
if (remainingDigitsInBlock == 0)
{
remainingDigitsInBlock = DigitsPerBlock;
}
else
{
blockCount++;
}

Debug.Assert(number.digits[0] is '0' or '1');
bool isNegative = number.digits[0] == '1';

uint[]? arrayFromPool = null;
Span<uint> bufferSpan = blockCount <= BigIntegerCalculator.StackAllocThreshold ?
stackalloc uint[blockCount] : (arrayFromPool = ArrayPool<uint>.Shared.Rent(blockCount)).AsSpan(0, blockCount);
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

try
{
uint currentBlock = isNegative ? 0xFF_FF_FF_FF : 0x0;
int bufferPos = blockCount;
foreach (ReadOnlyMemory<char> chunkMem in number.digits.GetChunks())
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
{
ReadOnlySpan<char> chunk = chunkMem.Span;
foreach (char c in chunk)
{
if (c == '\0')
{
break;
}

Debug.Assert(c is '0' or '1');
currentBlock = (currentBlock << 1) | (uint)(c - '0');

if (--remainingDigitsInBlock == 0)
{
bufferSpan[--bufferPos] = currentBlock;
remainingDigitsInBlock = DigitsPerBlock;

// we do not need to reset currentBlock now, because it should always set all its bits by left shift in subsequent iterations
}
}

Debug.Assert(bufferPos > 0 || remainingDigitsInBlock == DigitsPerBlock);
}

Debug.Assert(bufferPos == 0 && remainingDigitsInBlock == DigitsPerBlock);

if (isNegative)
{
NumericsHelpers.DangerousMakeTwosComplement(bufferSpan);
}

bufferSpan = bufferSpan.TrimEnd(0u);
if (bufferSpan.IsEmpty)
{
result = BigInteger.Zero;
}
else if (bufferSpan.Length == 1 && bufferSpan[0] <= int.MaxValue)
{
result = new BigInteger((int)(isNegative ? -bufferSpan[0] : bufferSpan[0]), (uint[]?)null);
}
else
{
result = new BigInteger(isNegative ? -1 : 1, bufferSpan.ToArray());
}

return ParsingStatus.OK;
}
finally
{
if (arrayFromPool is not null)
{
ArrayPool<uint>.Shared.Return(arrayFromPool);
}
}
}

//
// This threshold is for choosing the algorithm to use based on the number of digits.
//
Expand Down Expand Up @@ -1002,6 +1096,103 @@ internal static char ParseFormatSpecifier(ReadOnlySpan<char> format, out int dig
}
}

private static string? FormatBigIntegerToBin(bool targetSpan, BigInteger value, int digits, Span<char> destination, out int charsWritten, out bool spanSuccess)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unlike existing FormatBigIntegerToHex(), this FormatBigIntegerToBin() is implemented by calculating required char length before format, this can:

  • avoids from extending ValueStringBuilder's capacity during append char;
  • for targetSpan flow, formatted chars can write to wanted destination span directly rather than allocate buffer in ValueStringBuilder and copy to destination at the end

Please give advice if not proper, thanks !

{
// Get the bytes that make up the BigInteger.
byte[]? arrayToReturnToPool = null;
Span<byte> bytes = stackalloc byte[64]; // arbitrary threshold
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
if (!value.TryWriteOrCountBytes(bytes, out int bytesWrittenOrNeeded))
{
bytes = arrayToReturnToPool = ArrayPool<byte>.Shared.Rent(bytesWrittenOrNeeded);
bool success = value.TryWriteBytes(bytes, out _);
Debug.Assert(success);
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
}
bytes = bytes.Slice(0, bytesWrittenOrNeeded);

Debug.Assert(!bytes.IsEmpty);

byte highByte = bytes[^1];

int charsInHighByte = 9 - byte.LeadingZeroCount(value._sign >= 0 ? highByte : (byte)~highByte);
long tmpCharCount = charsInHighByte + ((long)(bytes.Length - 1) << 3);

if (tmpCharCount > Array.MaxLength)
{
Debug.Assert(arrayToReturnToPool is not null);
ArrayPool<byte>.Shared.Return(arrayToReturnToPool);

throw new FormatException(SR.Format_TooLarge);
}

int charsForBits = (int)tmpCharCount;

Debug.Assert(digits < Array.MaxLength);
int charsIncludeDigits = Math.Max(digits, charsForBits);

try
{
scoped ValueStringBuilder sb;
if (targetSpan)
{
if (charsIncludeDigits > destination.Length)
{
charsWritten = 0;
spanSuccess = false;
return null;
}

// Because we have ensured destination can take actual char length, so now just use ValueStringBuilder as wrapper so that subsequent logic can be reused by 2 flows (targetSpan and non-targetSpan);
// meanwhile there is no need to copy to destination again after format data for targetSpan flow.
sb = new ValueStringBuilder(destination);
}
else
{
// each byte is typically eight chars
sb = charsIncludeDigits > 512 ? new ValueStringBuilder(charsIncludeDigits) : new ValueStringBuilder(stackalloc char[charsIncludeDigits]);
}

if (digits > charsForBits)
{
sb.Append(value._sign >= 0 ? '0' : '1', digits - charsForBits);
}

AppendByte(ref sb, highByte, charsInHighByte - 1);

for (int i = bytes.Length - 2; i >= 0; i--)
{
AppendByte(ref sb, bytes[i]);
}

Debug.Assert(sb.Length == charsIncludeDigits);

if (targetSpan)
{
charsWritten = charsIncludeDigits;
spanSuccess = true;
return null;
}

charsWritten = 0;
spanSuccess = false;
return sb.ToString();
}
finally
{
if (arrayToReturnToPool is not null)
{
ArrayPool<byte>.Shared.Return(arrayToReturnToPool);
}
}

static void AppendByte(ref ValueStringBuilder sb, byte b, int startHighBit = 7)
{
for (int i = startHighBit; i >= 0; i--)
{
sb.Append((char)('0' + ((b >> i) & 0x1)));
}
}
}

internal static string FormatBigInteger(BigInteger value, string? format, NumberFormatInfo info)
{
return FormatBigInteger(targetSpan: false, value, format, format, info, default, out _, out _)!;
Expand All @@ -1026,7 +1217,10 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
{
return FormatBigIntegerToHex(targetSpan, value, fmt, digits, info, destination, out charsWritten, out spanSuccess);
}

if (fmt == 'b' || fmt == 'B')
{
return FormatBigIntegerToBin(targetSpan, value, digits, destination, out charsWritten, out spanSuccess);
}

if (value._bits == null)
{
Expand Down
Loading
Loading