-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BinaryWriter perf and memory improvements #47316
Changes from 4 commits
e5e9d20
02285ba
05da1a2
2584f97
97250f2
e1d9cdb
20f5326
915e54a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Runtime.CompilerServices; | ||
|
||
namespace System.Runtime.InteropServices | ||
{ | ||
public static class SafeBufferUtil | ||
{ | ||
/// <summary> | ||
/// Creates an unmanaged buffer of the specified length. | ||
/// </summary> | ||
public static SafeBuffer CreateSafeBuffer(nuint byteLength) | ||
{ | ||
return new AllocHGlobalSafeHandle(byteLength); | ||
} | ||
|
||
private sealed class AllocHGlobalSafeHandle : SafeBuffer | ||
{ | ||
public AllocHGlobalSafeHandle(nuint cb) : base(ownsHandle: true) | ||
{ | ||
#if !NETCOREAPP | ||
RuntimeHelpers.PrepareConstrainedRegions(); | ||
#endif | ||
try | ||
{ | ||
// intentionally empty to avoid ThreadAbortException in netfx runtimes | ||
} | ||
finally | ||
{ | ||
SetHandle(Marshal.AllocHGlobal((nint)cb)); | ||
} | ||
|
||
Initialize(cb); | ||
} | ||
|
||
protected override bool ReleaseHandle() | ||
{ | ||
Marshal.FreeHGlobal(handle); | ||
return true; | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,236 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Numerics; | ||
using System.Reflection; | ||
using System.Runtime.InteropServices; | ||
using System.Text; | ||
using Moq; | ||
using Xunit; | ||
|
||
namespace System.IO.Tests | ||
{ | ||
public class BinaryWriter_EncodingTests | ||
{ | ||
[Fact] | ||
public void Ctor_Default_UsesFastUtf8() | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream()); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_EncodingUtf8Singleton_UsesFastUtf8() | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), Encoding.UTF8); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Theory] | ||
[InlineData(true, true)] | ||
[InlineData(true, false)] | ||
[InlineData(false, true)] | ||
[InlineData(false, false)] | ||
public void Ctor_NewUtf8Encoding_UsesFastUtf8(bool emitIdentifier, bool throwOnInvalidBytes) | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UTF8Encoding(emitIdentifier, throwOnInvalidBytes)); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_Utf8EncodingWithSingleCharReplacementChar_UsesFastUtf8() | ||
{ | ||
Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("x"), DecoderFallback.ExceptionFallback); | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding); | ||
Assert.True(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_Utf8EncodingWithMultiCharReplacementChar_DoesNotUseFastUtf8() | ||
{ | ||
Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("xx"), DecoderFallback.ExceptionFallback); | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding); | ||
Assert.False(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_NotUtf8EncodingType_DoesNotUseFastUtf8() | ||
{ | ||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UnicodeEncoding()); | ||
Assert.False(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Fact] | ||
public void Ctor_Utf8EncodingDerivedTypeWithWrongCodePage_DoesNotUseFastUtf8() | ||
{ | ||
Mock<UTF8Encoding> mockEncoding = new Mock<UTF8Encoding>(); | ||
mockEncoding.Setup(o => o.CodePage).Returns(65000 /* UTF-7 code page */); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not think it is worth it to pick up this heavy dependency here to just save like 3 lines. For cases where it is really worth, we just need a way how to conditionally disable tests using test techniques that are incompatible with runtime mode (single file, trimming, no JIT, no reflection emit, no private reflection, etc.). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We use Moq in a few other test projects in this repo (see search results). I can remove the dependency for this project, but what does that mean for the general test framework guidance? See Steve's comment at #47316 (comment) and my response there for a little more context on why I'm using (and mocking) the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think using heavy test framework for testing high-level libraries like I do not think it is a good practice to use these heavy test frameworks for testing core platform (ie stuff in CoreLib). I agree that we should have this test (as long as the implementation stays what it is). Do you agree that the use of Moq saves you like 3 lines on code in this case? |
||
BinaryWriter writer = new BinaryWriter(new MemoryStream(), mockEncoding.Object); | ||
Assert.False(IsUsingFastUtf8(writer)); | ||
} | ||
|
||
[Theory] | ||
[InlineData('x')] // 1 UTF-8 byte | ||
[InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes) | ||
[InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes) | ||
public void WriteSingleChar_FastUtf8(char ch) | ||
{ | ||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream); | ||
|
||
writer.Write(ch); | ||
|
||
Assert.Equal(Encoding.UTF8.GetBytes(new char[] { ch }), stream.ToArray()); | ||
} | ||
|
||
[Theory] | ||
[InlineData('x')] // 1 UTF-8 byte | ||
[InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes) | ||
[InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes) | ||
public void WriteSingleChar_NotUtf8NoArrayPoolRentalNeeded(char ch) | ||
{ | ||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */); | ||
|
||
writer.Write(ch); | ||
|
||
Assert.Equal(Encoding.Unicode.GetBytes(new char[] { ch }), stream.ToArray()); | ||
} | ||
|
||
[Fact] | ||
public void WriteSingleChar_ArrayPoolRentalNeeded() | ||
{ | ||
string replacementString = new string('v', 10_000); | ||
Encoding encoding = Encoding.GetEncoding("ascii", new EncoderReplacementFallback(replacementString), DecoderFallback.ExceptionFallback); | ||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream, encoding); | ||
|
||
writer.Write('\uFFFD'); // not ASCII | ||
|
||
Assert.Equal(Encoding.ASCII.GetBytes(replacementString), stream.ToArray()); | ||
} | ||
|
||
[Theory] | ||
[InlineData(128 * 1024)] | ||
[InlineData(768 * 1024)] | ||
[InlineData(2 * 1024 * 1024)] | ||
public void WriteChars_FastUtf8(int stringLengthInChars) | ||
{ | ||
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); | ||
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); | ||
|
||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream); | ||
|
||
writer.Write(stringToWrite.ToCharArray()); // writing a char buffer doesn't emit the length upfront | ||
Assert.Equal(expectedBytes, stream.GetBuffer()[..expectedBytes.Length]); | ||
} | ||
|
||
[Theory] | ||
[InlineData(0)] | ||
[InlineData(128 * 1024)] | ||
[InlineData(768 * 1024)] | ||
[InlineData(2 * 1024 * 1024)] | ||
public void WriteString_FastUtf8(int stringLengthInChars) | ||
{ | ||
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); | ||
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); | ||
|
||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream); | ||
|
||
writer.Write(stringToWrite); | ||
stream.Position = 0; | ||
|
||
Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); | ||
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); | ||
} | ||
|
||
[Theory] | ||
[InlineData(24)] | ||
[InlineData(25)] | ||
public void WriteString_FastUtf8_UsingThreeByteChars(int stringLengthInChars) | ||
{ | ||
string stringToWrite = new string('\u2023', stringLengthInChars); // TRIANGULAR BULLET | ||
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); | ||
|
||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream); | ||
|
||
writer.Write(stringToWrite); | ||
stream.Position = 0; | ||
|
||
Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); | ||
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); | ||
} | ||
|
||
[Theory] | ||
[InlineData(128 * 1024)] | ||
[InlineData(768 * 1024)] | ||
[InlineData(2 * 1024 * 1024)] | ||
public void WriteString_NotUtf8(int stringLengthInChars) | ||
{ | ||
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); | ||
byte[] expectedBytes = Encoding.Unicode.GetBytes(stringToWrite); | ||
|
||
MemoryStream stream = new MemoryStream(); | ||
BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */); | ||
|
||
writer.Write(stringToWrite); | ||
stream.Position = 0; | ||
|
||
Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); | ||
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); | ||
} | ||
|
||
[Fact] | ||
public unsafe void WriteChars_VeryLargeArray_DoesNotOverflow() | ||
{ | ||
const nuint INPUT_LEN_IN_CHARS = 1_500_000_000; | ||
const nuint OUTPUT_LEN_IN_BYTES = 3_500_000_000; // overallocate | ||
|
||
SafeBuffer unmanagedInputBuffer; | ||
SafeBuffer unmanagedOutputBufer; | ||
try | ||
{ | ||
unmanagedInputBuffer = SafeBufferUtil.CreateSafeBuffer(INPUT_LEN_IN_CHARS * sizeof(char)); | ||
unmanagedOutputBufer = SafeBufferUtil.CreateSafeBuffer(OUTPUT_LEN_IN_BYTES * sizeof(byte)); | ||
} | ||
catch (OutOfMemoryException) | ||
{ | ||
return; // skip test in low-mem conditions | ||
} | ||
|
||
using (unmanagedInputBuffer) | ||
using (unmanagedOutputBufer) | ||
{ | ||
Span<char> inputSpan = new Span<char>((char*)unmanagedInputBuffer.DangerousGetHandle(), (int)INPUT_LEN_IN_CHARS); | ||
inputSpan.Fill('\u0224'); // LATIN CAPITAL LETTER Z WITH HOOK | ||
Stream outStream = new UnmanagedMemoryStream(unmanagedOutputBufer, 0, (long)unmanagedOutputBufer.ByteLength, FileAccess.ReadWrite); | ||
BinaryWriter writer = new BinaryWriter(outStream); | ||
|
||
writer.Write(inputSpan); // will write 3 billion bytes to the output | ||
|
||
Assert.Equal(3_000_000_000, outStream.Position); | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thank you for writing all the tests! and especially covering this particular edge case! 👍 |
||
|
||
private static bool IsUsingFastUtf8(BinaryWriter writer) | ||
{ | ||
return (bool)writer.GetType().GetField("_useFastUtf8", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(writer); | ||
} | ||
|
||
private static string GenerateLargeUnicodeString(int charCount) | ||
{ | ||
return string.Create(charCount, (object)null, static (buffer, _) => | ||
{ | ||
for (int i = 0; i < buffer.Length; i++) | ||
{ | ||
buffer[i] = (char)((i % 0xF00) + 0x100); // U+0100..U+0FFF (mix of 2-byte and 3-byte chars) | ||
} | ||
}); | ||
} | ||
|
||
private static int Get7BitEncodedIntByteLength(uint value) => (BitOperations.Log2(value) / 7) + 1; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is just a test file... is this really necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. But everything is non-shipping code until it gets copied & pasted into a shipping product. :)