Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement IUtf8SpanParsable on Char and Rune #105773

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/libraries/Common/tests/System/GenericMathHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,14 @@ public static class SpanParsableHelper<TSelf>
public static bool TryParse(ReadOnlySpan<char> s, IFormatProvider provider, out TSelf result) => TSelf.TryParse(s, provider, out result);
}

public static class Utf8SpanParsableHelper<TSelf>
where TSelf : IUtf8SpanParsable<TSelf>
{
public static TSelf Parse(ReadOnlySpan<byte> s, IFormatProvider provider) => TSelf.Parse(s, provider);

public static bool TryParse(ReadOnlySpan<byte> s, IFormatProvider provider, out TSelf result) => TSelf.TryParse(s, provider, out result);
}

public static class SubtractionOperatorsHelper<TSelf, TOther, TResult>
where TSelf : ISubtractionOperators<TSelf, TOther, TResult>
{
Expand Down
33 changes: 33 additions & 0 deletions src/libraries/System.Private.CoreLib/src/System/Char.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public readonly struct Char
IMinMaxValue<char>,
IUnsignedNumber<char>,
IUtf8SpanFormattable,
IUtf8SpanParsable<char>,
IUtfChar<char>,
IBinaryIntegerParseAndFormatInfo<char>
{
Expand Down Expand Up @@ -230,6 +231,38 @@ internal static bool TryParse(ReadOnlySpan<char> s, out char result)
return true;
}

/// <inheritdoc cref="IUtf8SpanParsable{TSelf}.Parse(ReadOnlySpan{byte}, IFormatProvider?)" />
static char IUtf8SpanParsable<char>.Parse(ReadOnlySpan<byte> utf8Text, IFormatProvider? provider)
{
if (Rune.DecodeFromUtf8(utf8Text, out Rune rune, out int bytesConsumed) != Buffers.OperationStatus.Done ||
bytesConsumed != utf8Text.Length)
{
ThrowHelper.ThrowFormatInvalidString();
}

if (!rune.IsBmp)
{
Number.ThrowOverflowException<char>();
}

return (char)rune.Value;
}

/// <inheritdoc cref="IUtf8SpanParsable{TSelf}.TryParse(ReadOnlySpan{byte}, IFormatProvider?, out TSelf)" />
static bool IUtf8SpanParsable<char>.TryParse(ReadOnlySpan<byte> utf8Text, IFormatProvider? provider, out char result)
{
if (Rune.DecodeFromUtf8(utf8Text, out Rune rune, out int bytesConsumed) != Buffers.OperationStatus.Done ||
bytesConsumed != utf8Text.Length ||
!rune.IsBmp)
{
result = '\0';
return false;
}

result = (char)rune.Value;
return true;
}

//
// Static Methods
//
Expand Down
25 changes: 25 additions & 0 deletions src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ namespace System.Text
#pragma warning disable SA1001 // Commas should be spaced correctly
, ISpanFormattable
, IUtf8SpanFormattable
, IUtf8SpanParsable<Rune>
#pragma warning restore SA1001
#endif
{
Expand Down Expand Up @@ -919,6 +920,30 @@ bool ISpanFormattable.TryFormat(Span<char> destination, out int charsWritten, Re
bool IUtf8SpanFormattable.TryFormat(Span<byte> utf8Destination, out int bytesWritten, ReadOnlySpan<char> format, IFormatProvider? provider) =>
TryEncodeToUtf8(utf8Destination, out bytesWritten);

/// <inheritdoc cref="IUtf8SpanParsable{TSelf}.TryParse(ReadOnlySpan{byte}, IFormatProvider?, out TSelf)" />
static bool IUtf8SpanParsable<Rune>.TryParse(ReadOnlySpan<byte> utf8Text, IFormatProvider? provider, out Rune result)
{
if (DecodeFromUtf8(utf8Text, out result, out int bytesConsumed) == OperationStatus.Done)
{
if (bytesConsumed == utf8Text.Length)
{
return true;
}
result = ReplacementChar;
}
return false;
}

/// <inheritdoc cref="IUtf8SpanParsable{TSelf}.Parse(ReadOnlySpan{byte}, IFormatProvider?)" />
static Rune IUtf8SpanParsable<Rune>.Parse(ReadOnlySpan<byte> utf8Text, System.IFormatProvider? provider)
{
if (DecodeFromUtf8(utf8Text, out Rune result, out int bytesConsumed) != OperationStatus.Done || bytesConsumed != utf8Text.Length)
{
ThrowHelper.ThrowFormatInvalidString();
}
return result;
}

string IFormattable.ToString(string? format, IFormatProvider? formatProvider) => ToString();
#endif

Expand Down
6 changes: 5 additions & 1 deletion src/libraries/System.Runtime/ref/System.Runtime.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1161,6 +1161,8 @@ public CannotUnloadAppDomainException(string? message, System.Exception? innerEx
static char System.ISpanParsable<char>.Parse(System.ReadOnlySpan<char> s, System.IFormatProvider? provider) { throw null; }
static bool System.ISpanParsable<char>.TryParse(System.ReadOnlySpan<char> s, System.IFormatProvider? provider, out char result) { throw null; }
bool System.IUtf8SpanFormattable.TryFormat(System.Span<byte> utf8Destination, out int bytesWritten, System.ReadOnlySpan<char> format, System.IFormatProvider? provider) { throw null; }
static char System.IUtf8SpanParsable<char>.Parse(System.ReadOnlySpan<byte> utf8Text, System.IFormatProvider? provider) { throw null; }
static bool System.IUtf8SpanParsable<char>.TryParse(System.ReadOnlySpan<byte> utf8Text, System.IFormatProvider? provider, out char result) { throw null; }
static char System.Numerics.IAdditionOperators<char, char, char>.operator +(char left, char right) { throw null; }
static char System.Numerics.IAdditionOperators<char, char, char>.operator checked +(char left, char right) { throw null; }
int System.Numerics.IBinaryInteger<char>.GetByteCount() { throw null; }
Expand Down Expand Up @@ -15297,7 +15299,7 @@ public enum NormalizationForm
[System.Runtime.Versioning.UnsupportedOSPlatformAttribute("browser")]
FormKD = 6,
}
public readonly partial struct Rune : System.IComparable, System.IComparable<System.Text.Rune>, System.IEquatable<System.Text.Rune>, System.IFormattable, System.ISpanFormattable, System.IUtf8SpanFormattable
public readonly partial struct Rune : System.IComparable, System.IComparable<System.Text.Rune>, System.IEquatable<System.Text.Rune>, System.IFormattable, System.ISpanFormattable, System.IUtf8SpanFormattable, System.IUtf8SpanParsable<System.Text.Rune>
{
private readonly int _dummyPrimitive;
public Rune(char ch) { throw null; }
Expand Down Expand Up @@ -15353,6 +15355,8 @@ public enum NormalizationForm
string System.IFormattable.ToString(string? format, System.IFormatProvider? formatProvider) { throw null; }
bool System.ISpanFormattable.TryFormat(System.Span<char> destination, out int charsWritten, System.ReadOnlySpan<char> format, System.IFormatProvider? provider) { throw null; }
bool System.IUtf8SpanFormattable.TryFormat(System.Span<byte> utf8Destination, out int bytesWritten, System.ReadOnlySpan<char> format, System.IFormatProvider? provider) { throw null; }
static System.Text.Rune System.IUtf8SpanParsable<System.Text.Rune>.Parse(System.ReadOnlySpan<byte> utf8Text, System.IFormatProvider? provider) { throw null; }
static bool System.IUtf8SpanParsable<System.Text.Rune>.TryParse(System.ReadOnlySpan<byte> utf8Text, System.IFormatProvider? provider, out System.Text.Rune result) { throw null; }
public static System.Text.Rune ToLower(System.Text.Rune value, System.Globalization.CultureInfo culture) { throw null; }
public static System.Text.Rune ToLowerInvariant(System.Text.Rune value) { throw null; }
public override string ToString() { throw null; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
Link="System\Uri.MethodsTests.cs" />
<Compile Include="$(CommonTestPath)System\EnumTypes.cs"
Link="Common\System\EnumTypes.cs" />
<Compile Include="$(CommonTestPath)System\GenericMathHelpers.cs"
Link="Common\System\GenericMathHelpers.cs" />
<Compile Include="$(CommonTestPath)System\MockType.cs"
Link="Common\System\MockType.cs" />
<Compile Include="$(CommonTestPath)Tests\System\StringTests.cs"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,48 @@ public static void Parse_Invalid(string s, Type exceptionType)
Assert.Throws(exceptionType, () => char.Parse(s));
}

[Theory]
[InlineData(new byte[] { 0x30 }, '\u0030')] // ASCII byte
[InlineData(new byte[] { 0xC3, 0x90 }, '\u00d0')] // [ C3 90 ] is U+00D0 LATIN CAPITAL LETTER ETH
[InlineData(new byte[] { 0xE2, 0x88, 0xB4 }, '\u2234')] // [ E2 88 B4 ] is U+2234 THEREFORE
public static void ParseUtf8(byte[] data, char expectedChar)
{
Assert.Equal(expectedChar, Utf8SpanParsableHelper<char>.Parse(data, null));
Assert.True(Utf8SpanParsableHelper<char>.TryParse(data, null, out char actualChar));
Assert.Equal(expectedChar, actualChar);
}

[Theory]
[InlineData(new byte[0], typeof(FormatException))] // empty buffer
[InlineData(new byte[] { 0x30, 0x40, 0x50 }, typeof(FormatException))] // Multiple ASCII bytes
[InlineData(new byte[] { 0x80 }, typeof(FormatException))] // standalone continuation byte
[InlineData(new byte[] { 0x80, 0x80, 0x80 }, typeof(FormatException))] // standalone continuation byte
[InlineData(new byte[] { 0xC1 }, typeof(FormatException))] // C1 is never a valid UTF-8 byte
[InlineData(new byte[] { 0xF5 }, typeof(FormatException))] // F5 is never a valid UTF-8 byte
[InlineData(new byte[] { 0xC2 }, typeof(FormatException))] // C2 is a valid byte; expecting it to be followed by a continuation byte
[InlineData(new byte[] { 0xED }, typeof(FormatException))] // ED is a valid byte; expecting it to be followed by a continuation byte
[InlineData(new byte[] { 0xF4 }, typeof(FormatException))] // F4 is a valid byte; expecting it to be followed by a continuation byte
[InlineData(new byte[] { 0xC2, 0xC2 }, typeof(FormatException))] // C2 not followed by continuation byte
[InlineData(new byte[] { 0xC1, 0xBF }, typeof(FormatException))] // [ C1 BF ] is overlong 2-byte sequence, all overlong sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xE0, 0x9F }, typeof(FormatException))] // [ E0 9F ] is overlong 3-byte sequence, all overlong sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xE0, 0xA0 }, typeof(FormatException))] // [ E0 A0 ] is valid 2-byte start of 3-byte sequence
[InlineData(new byte[] { 0xED, 0x9F }, typeof(FormatException))] // [ ED 9F ] is valid 2-byte start of 3-byte sequence
[InlineData(new byte[] { 0xED, 0xBF }, typeof(FormatException))] // [ ED BF ] would place us in UTF-16 surrogate range, all surrogate sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xEE, 0x80 }, typeof(FormatException))] // [ EE 80 ] is valid 2-byte start of 3-byte sequence
[InlineData(new byte[] { 0xF0, 0x8F }, typeof(FormatException))] // [ F0 8F ] is overlong 4-byte sequence, all overlong sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xF0, 0x90 }, typeof(FormatException))] // [ F0 90 ] is valid 2-byte start of 4-byte sequence
[InlineData(new byte[] { 0xF4, 0x90 }, typeof(FormatException))] // [ F4 90 ] would place us beyond U+10FFFF, all such sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xE2, 0x88, 0xC0 }, typeof(FormatException))] // [ E2 88 ] followed by non-continuation byte, maximal invalid subsequence length 2
[InlineData(new byte[] { 0xF0, 0x9F, 0x98 }, typeof(FormatException))] // [ F0 9F 98 ] is valid 3-byte start of 4-byte sequence
[InlineData(new byte[] { 0xF0, 0x9F, 0x98, 0x20 }, typeof(FormatException))] // [ F0 9F 98 ] followed by non-continuation byte, maximal invalid subsequence length 3
[InlineData(new byte[] { 0xF0, 0x9F, 0x98, 0xB2 }, typeof(OverflowException))] // [ F0 9F 98 B2 ] is U+1F632 ASTONISHED FACE; outside char range
public static void ParseUtf8_Invalid(byte[] data, Type exceptionType)
{
Assert.Throws(exceptionType, () => Utf8SpanParsableHelper<char>.Parse(data, null));
Assert.False(Utf8SpanParsableHelper<char>.TryParse(data, null, out char actualChar));
Assert.Equal('\0', actualChar);
}

private static IEnumerable<char> GetTestCharsNotInCategory(params UnicodeCategory[] categories)
{
Assert.Equal(s_latinTestSet.Length, s_unicodeTestSet.Length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,48 @@ public static void DecodeFromUtf8(byte[] data, OperationStatus expectedOperation
Assert.Equal(expectedBytesConsumed, actualBytesConsumed);
}

[Theory]
[InlineData(new byte[] { 0x30 }, 0x0030)] // ASCII byte
[InlineData(new byte[] { 0xC3, 0x90 }, 0x00D0)] // [ C3 90 ] is U+00D0 LATIN CAPITAL LETTER ETH
[InlineData(new byte[] { 0xE2, 0x88, 0xB4 }, 0x2234)] // [ E2 88 B4 ] is U+2234 THEREFORE
[InlineData(new byte[] { 0xF0, 0x9F, 0x98, 0xB2 }, 0x1F632)] // [ F0 9F 98 B2 ] is U+1F632 ASTONISHED FACE
public static void ParseUtf8(byte[] data, int expectedRuneValue)
{
Assert.Equal(expectedRuneValue, Utf8SpanParsableHelper<Rune>.Parse(data, null).Value);
Assert.True(Utf8SpanParsableHelper<Rune>.TryParse(data, null, out Rune actualRune));
Assert.Equal(expectedRuneValue, actualRune.Value);
}

[Theory]
[InlineData(new byte[0])] // empty buffer
[InlineData(new byte[] { 0x30, 0x40, 0x50 })] // Multiple ASCII bytes
[InlineData(new byte[] { 0x80 })] // standalone continuation byte
[InlineData(new byte[] { 0x80, 0x80, 0x80 })] // standalone continuation byte
[InlineData(new byte[] { 0xC1 })] // C1 is never a valid UTF-8 byte
[InlineData(new byte[] { 0xF5 })] // F5 is never a valid UTF-8 byte
[InlineData(new byte[] { 0xC2 })] // C2 is a valid byte; expecting it to be followed by a continuation byte
[InlineData(new byte[] { 0xED })] // ED is a valid byte; expecting it to be followed by a continuation byte
[InlineData(new byte[] { 0xF4 })] // F4 is a valid byte; expecting it to be followed by a continuation byte
[InlineData(new byte[] { 0xC2, 0xC2 })] // C2 not followed by continuation byte
[InlineData(new byte[] { 0xC1, 0xBF })] // [ C1 BF ] is overlong 2-byte sequence, all overlong sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xE0, 0x9F })] // [ E0 9F ] is overlong 3-byte sequence, all overlong sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xE0, 0xA0 })] // [ E0 A0 ] is valid 2-byte start of 3-byte sequence
[InlineData(new byte[] { 0xED, 0x9F })] // [ ED 9F ] is valid 2-byte start of 3-byte sequence
[InlineData(new byte[] { 0xED, 0xBF })] // [ ED BF ] would place us in UTF-16 surrogate range, all surrogate sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xEE, 0x80 })] // [ EE 80 ] is valid 2-byte start of 3-byte sequence
[InlineData(new byte[] { 0xF0, 0x8F })] // [ F0 8F ] is overlong 4-byte sequence, all overlong sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xF0, 0x90 })] // [ F0 90 ] is valid 2-byte start of 4-byte sequence
[InlineData(new byte[] { 0xF4, 0x90 })] // [ F4 90 ] would place us beyond U+10FFFF, all such sequences have maximal invalid subsequence length 1
[InlineData(new byte[] { 0xE2, 0x88, 0xC0 })] // [ E2 88 ] followed by non-continuation byte, maximal invalid subsequence length 2
[InlineData(new byte[] { 0xF0, 0x9F, 0x98 })] // [ F0 9F 98 ] is valid 3-byte start of 4-byte sequence
[InlineData(new byte[] { 0xF0, 0x9F, 0x98, 0x20 })] // [ F0 9F 98 ] followed by non-continuation byte, maximal invalid subsequence length 3
public static void ParseUtf8_Invalid(byte[] data)
{
Assert.Throws<FormatException>(() => Utf8SpanParsableHelper<Rune>.Parse(data, null));
Assert.False(Utf8SpanParsableHelper<Rune>.TryParse(data, null, out Rune actualRune));
Assert.Equal(Rune.ReplacementChar, actualRune);
}

[Theory]
[InlineData(new byte[0], OperationStatus.NeedMoreData, 0xFFFD, 0)] // empty buffer
[InlineData(new byte[] { 0x30 }, OperationStatus.Done, 0x0030, 1)] // ASCII byte
Expand Down
Loading