Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BinaryWriter perf and memory improvements #47316

Merged
merged 8 commits into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Runtime.CompilerServices;

namespace System.Runtime.InteropServices
{
public static class SafeBufferUtil
{
/// <summary>
/// Creates an unmanaged buffer of the specified length.
/// </summary>
public static SafeBuffer CreateSafeBuffer(nuint byteLength)
{
return new AllocHGlobalSafeHandle(byteLength);
}

private sealed class AllocHGlobalSafeHandle : SafeBuffer
{
public AllocHGlobalSafeHandle(nuint cb) : base(ownsHandle: true)
{
#if !NETCOREAPP
RuntimeHelpers.PrepareConstrainedRegions();
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a test file... is this really necessary?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. But everything is non-shipping code until it gets copied & pasted into a shipping product. :)

try
{
// intentionally empty to avoid ThreadAbortException in netfx runtimes
}
finally
{
SetHandle(Marshal.AllocHGlobal((nint)cb));
}

Initialize(cb);
}

protected override bool ReleaseHandle()
{
Marshal.FreeHGlobal(handle);
return true;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<Compile Include="System\Buffers\BoundedMemory.Unix.cs" />
<Compile Include="System\Buffers\BoundedMemory.Windows.cs" />
<Compile Include="System\Buffers\PoisonPagePlacement.cs" />
<Compile Include="System\Runtime\InteropServices\SafeBufferUtil.cs" />
<Compile Include="System\IO\FileCleanupTestBase.cs" />
<Compile Include="System\TestEnvironment.cs" />
<Compile Include="System\ThreadCultureChange.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Numerics;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Text;
using Moq;
using Xunit;

namespace System.IO.Tests
{
public class BinaryWriter_EncodingTests
{
[Fact]
public void Ctor_Default_UsesFastUtf8()
{
BinaryWriter writer = new BinaryWriter(new MemoryStream());
Assert.True(IsUsingFastUtf8(writer));
}

[Fact]
public void Ctor_EncodingUtf8Singleton_UsesFastUtf8()
{
BinaryWriter writer = new BinaryWriter(new MemoryStream(), Encoding.UTF8);
Assert.True(IsUsingFastUtf8(writer));
}

[Theory]
[InlineData(true, true)]
[InlineData(true, false)]
[InlineData(false, true)]
[InlineData(false, false)]
public void Ctor_NewUtf8Encoding_UsesFastUtf8(bool emitIdentifier, bool throwOnInvalidBytes)
{
BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UTF8Encoding(emitIdentifier, throwOnInvalidBytes));
Assert.True(IsUsingFastUtf8(writer));
}

[Fact]
public void Ctor_Utf8EncodingWithSingleCharReplacementChar_UsesFastUtf8()
{
Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("x"), DecoderFallback.ExceptionFallback);
BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding);
Assert.True(IsUsingFastUtf8(writer));
}

[Fact]
public void Ctor_Utf8EncodingWithMultiCharReplacementChar_DoesNotUseFastUtf8()
{
Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("xx"), DecoderFallback.ExceptionFallback);
BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding);
Assert.False(IsUsingFastUtf8(writer));
}

[Fact]
public void Ctor_NotUtf8EncodingType_DoesNotUseFastUtf8()
{
BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UnicodeEncoding());
Assert.False(IsUsingFastUtf8(writer));
}

[Fact]
public void Ctor_Utf8EncodingDerivedTypeWithWrongCodePage_DoesNotUseFastUtf8()
{
Mock<UTF8Encoding> mockEncoding = new Mock<UTF8Encoding>();
mockEncoding.Setup(o => o.CodePage).Returns(65000 /* UTF-7 code page */);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am ok with using Mock<T> as long as we don't have any AOT test suite that is going to fail.

obraz

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think it is worth it to pick up this heavy dependency here to just save like 3 lines.

For cases where it is really worth, we just need a way how to conditionally disable tests using test techniques that are incompatible with runtime mode (single file, trimming, no JIT, no reflection emit, no private reflection, etc.).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use Moq in a few other test projects in this repo (see search results). I can remove the dependency for this project, but what does that mean for the general test framework guidance?

See Steve's comment at #47316 (comment) and my response there for a little more context on why I'm using (and mocking) the CodePage property in the first place. We could tweak that logic and render the whole thing moot.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using heavy test framework for testing high-level libraries like Microsoft.Extensions.* is fine.

I do not think it is a good practice to use these heavy test frameworks for testing core platform (ie stuff in CoreLib).

I agree that we should have this test (as long as the implementation stays what it is). Do you agree that the use of Moq saves you like 3 lines on code in this case?

BinaryWriter writer = new BinaryWriter(new MemoryStream(), mockEncoding.Object);
Assert.False(IsUsingFastUtf8(writer));
}

[Theory]
[InlineData('x')] // 1 UTF-8 byte
[InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes)
[InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes)
public void WriteSingleChar_FastUtf8(char ch)
{
MemoryStream stream = new MemoryStream();
BinaryWriter writer = new BinaryWriter(stream);

writer.Write(ch);

Assert.Equal(Encoding.UTF8.GetBytes(new char[] { ch }), stream.ToArray());
}

[Theory]
[InlineData('x')] // 1 UTF-8 byte
[InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes)
[InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes)
public void WriteSingleChar_NotUtf8NoArrayPoolRentalNeeded(char ch)
{
MemoryStream stream = new MemoryStream();
BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */);

writer.Write(ch);

Assert.Equal(Encoding.Unicode.GetBytes(new char[] { ch }), stream.ToArray());
}

[Fact]
public void WriteSingleChar_ArrayPoolRentalNeeded()
{
string replacementString = new string('v', 10_000);
Encoding encoding = Encoding.GetEncoding("ascii", new EncoderReplacementFallback(replacementString), DecoderFallback.ExceptionFallback);
MemoryStream stream = new MemoryStream();
BinaryWriter writer = new BinaryWriter(stream, encoding);

writer.Write('\uFFFD'); // not ASCII

Assert.Equal(Encoding.ASCII.GetBytes(replacementString), stream.ToArray());
}

[Theory]
[InlineData(128 * 1024)]
[InlineData(768 * 1024)]
[InlineData(2 * 1024 * 1024)]
public void WriteChars_FastUtf8(int stringLengthInChars)
{
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars);
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite);

MemoryStream stream = new MemoryStream();
BinaryWriter writer = new BinaryWriter(stream);

writer.Write(stringToWrite.ToCharArray()); // writing a char buffer doesn't emit the length upfront
Assert.Equal(expectedBytes, stream.GetBuffer()[..expectedBytes.Length]);
}

[Theory]
[InlineData(0)]
[InlineData(128 * 1024)]
[InlineData(768 * 1024)]
[InlineData(2 * 1024 * 1024)]
public void WriteString_FastUtf8(int stringLengthInChars)
{
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars);
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite);

MemoryStream stream = new MemoryStream();
BinaryWriter writer = new BinaryWriter(stream);

writer.Write(stringToWrite);
stream.Position = 0;

Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt());
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]);
}

[Theory]
[InlineData(24)]
[InlineData(25)]
public void WriteString_FastUtf8_UsingThreeByteChars(int stringLengthInChars)
{
string stringToWrite = new string('\u2023', stringLengthInChars); // TRIANGULAR BULLET
byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite);

MemoryStream stream = new MemoryStream();
BinaryWriter writer = new BinaryWriter(stream);

writer.Write(stringToWrite);
stream.Position = 0;

Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt());
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]);
}

[Theory]
[InlineData(128 * 1024)]
[InlineData(768 * 1024)]
[InlineData(2 * 1024 * 1024)]
public void WriteString_NotUtf8(int stringLengthInChars)
{
string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars);
byte[] expectedBytes = Encoding.Unicode.GetBytes(stringToWrite);

MemoryStream stream = new MemoryStream();
BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */);

writer.Write(stringToWrite);
stream.Position = 0;

Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt());
Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]);
}

[Fact]
public unsafe void WriteChars_VeryLargeArray_DoesNotOverflow()
{
const nuint INPUT_LEN_IN_CHARS = 1_500_000_000;
const nuint OUTPUT_LEN_IN_BYTES = 3_500_000_000; // overallocate

SafeBuffer unmanagedInputBuffer;
SafeBuffer unmanagedOutputBufer;
try
{
unmanagedInputBuffer = SafeBufferUtil.CreateSafeBuffer(INPUT_LEN_IN_CHARS * sizeof(char));
unmanagedOutputBufer = SafeBufferUtil.CreateSafeBuffer(OUTPUT_LEN_IN_BYTES * sizeof(byte));
}
catch (OutOfMemoryException)
{
return; // skip test in low-mem conditions
}

using (unmanagedInputBuffer)
using (unmanagedOutputBufer)
{
Span<char> inputSpan = new Span<char>((char*)unmanagedInputBuffer.DangerousGetHandle(), (int)INPUT_LEN_IN_CHARS);
inputSpan.Fill('\u0224'); // LATIN CAPITAL LETTER Z WITH HOOK
Stream outStream = new UnmanagedMemoryStream(unmanagedOutputBufer, 0, (long)unmanagedOutputBufer.ByteLength, FileAccess.ReadWrite);
BinaryWriter writer = new BinaryWriter(outStream);

writer.Write(inputSpan); // will write 3 billion bytes to the output

Assert.Equal(3_000_000_000, outStream.Position);
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for writing all the tests! and especially covering this particular edge case! 👍


private static bool IsUsingFastUtf8(BinaryWriter writer)
{
return (bool)writer.GetType().GetField("_useFastUtf8", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(writer);
}

private static string GenerateLargeUnicodeString(int charCount)
{
return string.Create(charCount, (object)null, static (buffer, _) =>
{
for (int i = 0; i < buffer.Length; i++)
{
buffer[i] = (char)((i % 0xF00) + 0x100); // U+0100..U+0FFF (mix of 2-byte and 3-byte chars)
}
});
}

private static int Get7BitEncodedIntByteLength(uint value) => (BitOperations.Log2(value) / 7) + 1;
}
}
4 changes: 4 additions & 0 deletions src/libraries/System.IO/tests/System.IO.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<Compile Include="StreamWriter\StreamWriter.cs" />
<Compile Include="StreamWriter\StreamWriter.StringCtorTests.cs" />
<Compile Include="BinaryWriter\BinaryWriter.DisposeAsync.cs" />
<Compile Include="BinaryWriter\BinaryWriter.EncodingTests.cs" />
<Compile Include="BinaryWriter\BinaryWriter.WriteByteCharTests.cs" />
<Compile Include="BinaryWriter\BinaryWriter.WriteTests.cs" />
<Compile Include="BinaryWriter\BinaryWriterTests.cs" />
Expand Down Expand Up @@ -58,4 +59,7 @@
<Compile Include="$(CommonPath)System\Net\StreamBuffer.cs" Link="ProductionCode\Common\System\Net\StreamBuffer.cs" />
<Compile Include="$(CommonPath)System\Threading\Tasks\TaskToApm.cs" Link="Common\System\Threading\Tasks\TaskToApm.cs" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Moq" Version="$(MoqVersion)" />
</ItemGroup>
</Project>
14 changes: 6 additions & 8 deletions src/libraries/System.Private.CoreLib/src/System/Decimal.cs
Original file line number Diff line number Diff line change
Expand Up @@ -587,16 +587,14 @@ public static bool TryGetBits(decimal d, Span<int> destination, out int valuesWr
return true;
}

internal static void GetBytes(in decimal d, byte[] buffer)
internal static void GetBytes(in decimal d, Span<byte> buffer)
{
Debug.Assert(buffer != null && buffer.Length >= 16, "[GetBytes]buffer != null && buffer.Length >= 16");
Debug.Assert(buffer.Length >= 16, "buffer.Length >= 16");

Span<byte> span = buffer;

BinaryPrimitives.WriteInt32LittleEndian(span, (int)d.Low);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(4), (int)d.Mid);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(8), (int)d.High);
BinaryPrimitives.WriteInt32LittleEndian(span.Slice(12), d._flags);
BinaryPrimitives.WriteInt32LittleEndian(buffer, (int)d.Low);
BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(4), (int)d.Mid);
BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(8), (int)d.High);
BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(12), d._flags);
}

internal static decimal ToDecimal(ReadOnlySpan<byte> span)
Expand Down
Loading