diff --git a/src/libraries/Common/tests/TestUtilities/System/Runtime/InteropServices/SafeBufferUtil.cs b/src/libraries/Common/tests/TestUtilities/System/Runtime/InteropServices/SafeBufferUtil.cs new file mode 100644 index 0000000000000..f667b1e26cec5 --- /dev/null +++ b/src/libraries/Common/tests/TestUtilities/System/Runtime/InteropServices/SafeBufferUtil.cs @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; + +namespace System.Runtime.InteropServices +{ + public static class SafeBufferUtil + { + /// + /// Creates an unmanaged buffer of the specified length. + /// + public static SafeBuffer CreateSafeBuffer(nuint byteLength) + { + return new AllocHGlobalSafeHandle(byteLength); + } + + private sealed class AllocHGlobalSafeHandle : SafeBuffer + { + public AllocHGlobalSafeHandle(nuint cb) : base(ownsHandle: true) + { +#if !NETCOREAPP + RuntimeHelpers.PrepareConstrainedRegions(); +#endif + try + { + // intentionally empty to avoid ThreadAbortException in netfx runtimes + } + finally + { + SetHandle(Marshal.AllocHGlobal((nint)cb)); + } + + Initialize(cb); + } + + protected override bool ReleaseHandle() + { + Marshal.FreeHGlobal(handle); + return true; + } + } + } +} diff --git a/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj b/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj index 0215f429d889f..3e6464239d718 100644 --- a/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj +++ b/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj @@ -19,6 +19,7 @@ + diff --git a/src/libraries/System.IO/tests/BinaryWriter/BinaryWriter.EncodingTests.cs b/src/libraries/System.IO/tests/BinaryWriter/BinaryWriter.EncodingTests.cs new file mode 100644 index 0000000000000..b22020a36dc4f --- /dev/null +++ b/src/libraries/System.IO/tests/BinaryWriter/BinaryWriter.EncodingTests.cs @@ -0,0 +1,256 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Numerics; +using System.Reflection; +using System.Runtime.InteropServices; +using System.Text; +using Xunit; + +namespace System.IO.Tests +{ + public class BinaryWriter_EncodingTests + { + [Fact] + public void Ctor_Default_UsesFastUtf8() + { + BinaryWriter writer = new BinaryWriter(new MemoryStream()); + Assert.True(IsUsingFastUtf8(writer)); + } + + [Fact] + public void Ctor_EncodingUtf8Singleton_UsesFastUtf8() + { + BinaryWriter writer = new BinaryWriter(new MemoryStream(), Encoding.UTF8); + Assert.True(IsUsingFastUtf8(writer)); + } + + [Theory] + [InlineData(true, true)] + [InlineData(true, false)] + [InlineData(false, true)] + [InlineData(false, false)] + public void Ctor_NewUtf8Encoding_UsesFastUtf8(bool emitIdentifier, bool throwOnInvalidBytes) + { + BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UTF8Encoding(emitIdentifier, throwOnInvalidBytes)); + Assert.True(IsUsingFastUtf8(writer)); + } + + [Fact] + public void Ctor_Utf8EncodingWithSingleCharReplacementChar_UsesFastUtf8() + { + Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("x"), DecoderFallback.ExceptionFallback); + BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding); + Assert.True(IsUsingFastUtf8(writer)); + } + + [Fact] + public void Ctor_Utf8EncodingWithMultiCharReplacementChar_DoesNotUseFastUtf8() + { + Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("xx"), DecoderFallback.ExceptionFallback); + BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding); + Assert.False(IsUsingFastUtf8(writer)); + } + + [Fact] + public void Ctor_NotUtf8EncodingType_DoesNotUseFastUtf8() + { + BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UnicodeEncoding()); + Assert.False(IsUsingFastUtf8(writer)); + } + + [Fact] + public void Ctor_Utf8EncodingDerivedTypeWithWrongCodePage_DoesNotUseFastUtf8() + { + BinaryWriter writer = new BinaryWriter(new MemoryStream(), new NotActuallyUTF8Encoding()); + Assert.False(IsUsingFastUtf8(writer)); + } + + [Fact] + public void Ctor_Utf8EncodingDerivedTypeWithCorrectCodePage_DoesNotUseFastUtf8() + { + BinaryWriter writer = new BinaryWriter(new MemoryStream(), new MyCustomUTF8Encoding()); + Assert.True(IsUsingFastUtf8(writer)); + } + + [Theory] + [InlineData('x')] // 1 UTF-8 byte + [InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes) + [InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes) + public void WriteSingleChar_FastUtf8(char ch) + { + MemoryStream stream = new MemoryStream(); + BinaryWriter writer = new BinaryWriter(stream); + + writer.Write(ch); + + Assert.Equal(Encoding.UTF8.GetBytes(new char[] { ch }), stream.ToArray()); + } + + [Theory] + [InlineData('x')] // 1 UTF-8 byte + [InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes) + [InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes) + public void WriteSingleChar_NotUtf8NoArrayPoolRentalNeeded(char ch) + { + MemoryStream stream = new MemoryStream(); + BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */); + + writer.Write(ch); + + Assert.Equal(Encoding.Unicode.GetBytes(new char[] { ch }), stream.ToArray()); + } + + [Fact] + public void WriteSingleChar_ArrayPoolRentalNeeded() + { + string replacementString = new string('v', 10_000); + Encoding encoding = Encoding.GetEncoding("ascii", new EncoderReplacementFallback(replacementString), DecoderFallback.ExceptionFallback); + MemoryStream stream = new MemoryStream(); + BinaryWriter writer = new BinaryWriter(stream, encoding); + + writer.Write('\uFFFD'); // not ASCII + + Assert.Equal(Encoding.ASCII.GetBytes(replacementString), stream.ToArray()); + } + + [Theory] + [InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary + [InlineData(32 * 1024)] // char count within 64k rental boundary, byte count not + [InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary + public void WriteChars_FastUtf8(int stringLengthInChars) + { + string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); + byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); + + MemoryStream stream = new MemoryStream(); + BinaryWriter writer = new BinaryWriter(stream); + + writer.Write(stringToWrite.ToCharArray()); // writing a char buffer doesn't emit the length upfront + Assert.Equal(expectedBytes, stream.GetBuffer()[..expectedBytes.Length]); + } + + [Theory] + [InlineData(24)] // within stackalloc path + [InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary + [InlineData(32 * 1024)] // char count within 64k rental boundary, byte count not + [InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary + public void WriteString_FastUtf8(int stringLengthInChars) + { + string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); + byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); + + MemoryStream stream = new MemoryStream(); + BinaryWriter writer = new BinaryWriter(stream); + + writer.Write(stringToWrite); + stream.Position = 0; + + Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); + Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); + } + + [Theory] + [InlineData(127 / 3)] // within stackalloc fast path + [InlineData(127 / 3 + 1)] // not within stackalloc fast path + public void WriteString_FastUtf8_UsingThreeByteChars(int stringLengthInChars) + { + string stringToWrite = new string('\u2023', stringLengthInChars); // TRIANGULAR BULLET + byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite); + + MemoryStream stream = new MemoryStream(); + BinaryWriter writer = new BinaryWriter(stream); + + writer.Write(stringToWrite); + stream.Position = 0; + + Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); + Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); + } + + [Theory] + [InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary + [InlineData(48 * 1024)] // char count within 64k rental boundary, byte count not + [InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary + public void WriteString_NotUtf8(int stringLengthInChars) + { + string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars); + byte[] expectedBytes = Encoding.Unicode.GetBytes(stringToWrite); + + MemoryStream stream = new MemoryStream(); + BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */); + + writer.Write(stringToWrite); + stream.Position = 0; + + Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt()); + Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]); + } + + [Fact] + [PlatformSpecific(~TestPlatforms.Android)] // OOM on Android could be uncatchable & kill the test runner + public unsafe void WriteChars_VeryLargeArray_DoesNotOverflow() + { + const nuint INPUT_LEN_IN_CHARS = 1_500_000_000; + const nuint OUTPUT_LEN_IN_BYTES = 3_500_000_000; // overallocate + + SafeBuffer unmanagedInputBuffer = null; + SafeBuffer unmanagedOutputBufer = null; + try + { + try + { + unmanagedInputBuffer = SafeBufferUtil.CreateSafeBuffer(INPUT_LEN_IN_CHARS * sizeof(char)); + unmanagedOutputBufer = SafeBufferUtil.CreateSafeBuffer(OUTPUT_LEN_IN_BYTES * sizeof(byte)); + } + catch (OutOfMemoryException) + { + return; // skip test in low-mem conditions + } + + Span inputSpan = new Span((char*)unmanagedInputBuffer.DangerousGetHandle(), (int)INPUT_LEN_IN_CHARS); + inputSpan.Fill('\u0224'); // LATIN CAPITAL LETTER Z WITH HOOK + Stream outStream = new UnmanagedMemoryStream(unmanagedOutputBufer, 0, (long)unmanagedOutputBufer.ByteLength, FileAccess.ReadWrite); + BinaryWriter writer = new BinaryWriter(outStream); + + writer.Write(inputSpan); // will write 3 billion bytes to the output + + Assert.Equal(3_000_000_000, outStream.Position); + } + finally + { + unmanagedInputBuffer?.Dispose(); + unmanagedOutputBufer?.Dispose(); + } + } + + private static bool IsUsingFastUtf8(BinaryWriter writer) + { + return (bool)writer.GetType().GetField("_useFastUtf8", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(writer); + } + + private static string GenerateLargeUnicodeString(int charCount) + { + return string.Create(charCount, (object)null, static (buffer, _) => + { + for (int i = 0; i < buffer.Length; i++) + { + buffer[i] = (char)((i % 0xF00) + 0x100); // U+0100..U+0FFF (mix of 2-byte and 3-byte chars) + } + }); + } + + private static int Get7BitEncodedIntByteLength(uint value) => (BitOperations.Log2(value) / 7) + 1; + + // subclasses UTF8Encoding, but returns a non-UTF8 code page + private class NotActuallyUTF8Encoding : UTF8Encoding + { + public override int CodePage => 65000; // UTF-7 code page + } + + // subclasses UTF8Encoding, returns UTF-8 code page + private class MyCustomUTF8Encoding : UTF8Encoding + { + } + } +} diff --git a/src/libraries/System.IO/tests/System.IO.Tests.csproj b/src/libraries/System.IO/tests/System.IO.Tests.csproj index 99db6557308a5..c3f821f0f5ad7 100644 --- a/src/libraries/System.IO/tests/System.IO.Tests.csproj +++ b/src/libraries/System.IO/tests/System.IO.Tests.csproj @@ -15,6 +15,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.cs index bc81619838fb6..97e620e68f0a7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Decimal.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.cs @@ -587,16 +587,14 @@ public static bool TryGetBits(decimal d, Span destination, out int valuesWr return true; } - internal static void GetBytes(in decimal d, byte[] buffer) + internal static void GetBytes(in decimal d, Span buffer) { - Debug.Assert(buffer != null && buffer.Length >= 16, "[GetBytes]buffer != null && buffer.Length >= 16"); + Debug.Assert(buffer.Length >= 16, "buffer.Length >= 16"); - Span span = buffer; - - BinaryPrimitives.WriteInt32LittleEndian(span, (int)d.Low); - BinaryPrimitives.WriteInt32LittleEndian(span.Slice(4), (int)d.Mid); - BinaryPrimitives.WriteInt32LittleEndian(span.Slice(8), (int)d.High); - BinaryPrimitives.WriteInt32LittleEndian(span.Slice(12), d._flags); + BinaryPrimitives.WriteInt32LittleEndian(buffer, (int)d.Low); + BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(4), (int)d.Mid); + BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(8), (int)d.High); + BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(12), d._flags); } internal static decimal ToDecimal(ReadOnlySpan span) diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs b/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs index e4434f9ecde95..a9cd6bd157589 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs @@ -1,11 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Text; -using System.Diagnostics; using System.Buffers; -using System.Threading.Tasks; using System.Buffers.Binary; +using System.Runtime.InteropServices; +using System.Text; +using System.Threading.Tasks; namespace System.IO { @@ -15,32 +15,26 @@ namespace System.IO // public class BinaryWriter : IDisposable, IAsyncDisposable { + private const int MaxArrayPoolRentalSize = 64 * 1024; // try to keep rentals to a reasonable size + public static readonly BinaryWriter Null = new BinaryWriter(); protected Stream OutStream; - private readonly byte[] _buffer; // temp space for writing primitives to. private readonly Encoding _encoding; - private readonly Encoder _encoder; - private readonly bool _leaveOpen; - - // Perf optimization stuff - private byte[]? _largeByteBuffer; // temp space for writing chars. - private int _maxChars; // max # of chars we can put in _largeByteBuffer - // Size should be around the max number of chars/string * Encoding's max bytes/char - private const int LargeByteBufferSize = 256; + private readonly bool _useFastUtf8; // Protected default constructor that sets the output stream // to a null stream (a bit bucket). protected BinaryWriter() { OutStream = Stream.Null; - _buffer = new byte[16]; - _encoding = EncodingCache.UTF8NoBOM; - _encoder = _encoding.GetEncoder(); + _encoding = Encoding.UTF8; + _useFastUtf8 = true; } - public BinaryWriter(Stream output) : this(output, EncodingCache.UTF8NoBOM, false) + // BinaryWriter never emits a BOM, so can use Encoding.UTF8 fast singleton + public BinaryWriter(Stream output) : this(output, Encoding.UTF8, false) { } @@ -58,10 +52,9 @@ public BinaryWriter(Stream output, Encoding encoding, bool leaveOpen) throw new ArgumentException(SR.Argument_StreamNotWritable); OutStream = output; - _buffer = new byte[16]; _encoding = encoding; - _encoder = _encoding.GetEncoder(); _leaveOpen = leaveOpen; + _useFastUtf8 = encoding.IsUTF8CodePage && encoding.EncoderFallback.MaxCharCount <= 1; } // Closes this writer and releases any system resources associated with the @@ -182,18 +175,39 @@ public virtual void Write(byte[] buffer, int index, int count) // advanced by two. // Note this method cannot handle surrogates properly in UTF-8. // - public virtual unsafe void Write(char ch) + public virtual void Write(char ch) { - if (char.IsSurrogate(ch)) + if (!Rune.TryCreate(ch, out Rune rune)) // optimistically assume UTF-8 code path (which uses Rune) will be hit + { throw new ArgumentException(SR.Arg_SurrogatesNotAllowedAsSingleChar); + } - Debug.Assert(_encoding.GetMaxByteCount(1) <= 16, "_encoding.GetMaxByteCount(1) <= 16)"); - int numBytes = 0; - fixed (byte* pBytes = &_buffer[0]) + Span buffer = stackalloc byte[8]; // reasonable guess for worst-case expansion for any arbitrary encoding + + if (_useFastUtf8) { - numBytes = _encoder.GetBytes(&ch, 1, pBytes, _buffer.Length, flush: true); + int utf8ByteCount = rune.EncodeToUtf8(buffer); + OutStream.Write(buffer.Slice(0, utf8ByteCount)); + } + else + { + byte[]? rented = null; + int maxByteCount = _encoding.GetMaxByteCount(1); + + if (maxByteCount > buffer.Length) + { + rented = ArrayPool.Shared.Rent(maxByteCount); + buffer = rented; + } + + int actualByteCount = _encoding.GetBytes(MemoryMarshal.CreateReadOnlySpan(ref ch, 1), buffer); + OutStream.Write(buffer.Slice(0, actualByteCount)); + + if (rented != null) + { + ArrayPool.Shared.Return(rented); + } } - OutStream.Write(_buffer, 0, numBytes); } // Writes a character array to this stream. @@ -206,8 +220,7 @@ public virtual void Write(char[] chars) if (chars == null) throw new ArgumentNullException(nameof(chars)); - byte[] bytes = _encoding.GetBytes(chars, 0, chars.Length); - OutStream.Write(bytes, 0, bytes.Length); + WriteCharsCommonWithoutLengthPrefix(chars, useThisWriteOverride: false); } // Writes a section of a character array to this stream. @@ -217,23 +230,33 @@ public virtual void Write(char[] chars) // public virtual void Write(char[] chars, int index, int count) { - byte[] bytes = _encoding.GetBytes(chars, index, count); - OutStream.Write(bytes, 0, bytes.Length); + if (chars == null) + throw new ArgumentNullException(nameof(chars)); + if (index < 0) + throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_NeedNonNegNum); + if (count < 0) + throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum); + if (index > chars.Length - count) + throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_IndexCount); + + WriteCharsCommonWithoutLengthPrefix(chars.AsSpan(index, count), useThisWriteOverride: false); } // Writes a double to this stream. The current position of the stream is // advanced by eight. // - public virtual unsafe void Write(double value) + public virtual void Write(double value) { - BinaryPrimitives.WriteDoubleLittleEndian(_buffer, value); - OutStream.Write(_buffer, 0, 8); + Span buffer = stackalloc byte[sizeof(double)]; + BinaryPrimitives.WriteDoubleLittleEndian(buffer, value); + OutStream.Write(buffer); } public virtual void Write(decimal value) { - decimal.GetBytes(value, _buffer); - OutStream.Write(_buffer, 0, 16); + Span buffer = stackalloc byte[sizeof(decimal)]; + decimal.GetBytes(value, buffer); + OutStream.Write(buffer); } // Writes a two-byte signed integer to this stream. The current position of @@ -241,9 +264,9 @@ public virtual void Write(decimal value) // public virtual void Write(short value) { - _buffer[0] = (byte)value; - _buffer[1] = (byte)(value >> 8); - OutStream.Write(_buffer, 0, 2); + Span buffer = stackalloc byte[sizeof(short)]; + BinaryPrimitives.WriteInt16LittleEndian(buffer, value); + OutStream.Write(buffer); } // Writes a two-byte unsigned integer to this stream. The current position @@ -252,9 +275,9 @@ public virtual void Write(short value) [CLSCompliant(false)] public virtual void Write(ushort value) { - _buffer[0] = (byte)value; - _buffer[1] = (byte)(value >> 8); - OutStream.Write(_buffer, 0, 2); + Span buffer = stackalloc byte[sizeof(ushort)]; + BinaryPrimitives.WriteUInt16LittleEndian(buffer, value); + OutStream.Write(buffer); } // Writes a four-byte signed integer to this stream. The current position @@ -262,11 +285,9 @@ public virtual void Write(ushort value) // public virtual void Write(int value) { - _buffer[0] = (byte)value; - _buffer[1] = (byte)(value >> 8); - _buffer[2] = (byte)(value >> 16); - _buffer[3] = (byte)(value >> 24); - OutStream.Write(_buffer, 0, 4); + Span buffer = stackalloc byte[sizeof(int)]; + BinaryPrimitives.WriteInt32LittleEndian(buffer, value); + OutStream.Write(buffer); } // Writes a four-byte unsigned integer to this stream. The current position @@ -275,11 +296,9 @@ public virtual void Write(int value) [CLSCompliant(false)] public virtual void Write(uint value) { - _buffer[0] = (byte)value; - _buffer[1] = (byte)(value >> 8); - _buffer[2] = (byte)(value >> 16); - _buffer[3] = (byte)(value >> 24); - OutStream.Write(_buffer, 0, 4); + Span buffer = stackalloc byte[sizeof(uint)]; + BinaryPrimitives.WriteUInt32LittleEndian(buffer, value); + OutStream.Write(buffer); } // Writes an eight-byte signed integer to this stream. The current position @@ -287,8 +306,9 @@ public virtual void Write(uint value) // public virtual void Write(long value) { - BinaryPrimitives.WriteInt64LittleEndian(_buffer, value); - OutStream.Write(_buffer, 0, 8); + Span buffer = stackalloc byte[sizeof(long)]; + BinaryPrimitives.WriteInt64LittleEndian(buffer, value); + OutStream.Write(buffer); } // Writes an eight-byte unsigned integer to this stream. The current @@ -297,8 +317,9 @@ public virtual void Write(long value) [CLSCompliant(false)] public virtual void Write(ulong value) { - BinaryPrimitives.WriteUInt64LittleEndian(_buffer, value); - OutStream.Write(_buffer, 0, 8); + Span buffer = stackalloc byte[sizeof(ulong)]; + BinaryPrimitives.WriteUInt64LittleEndian(buffer, value); + OutStream.Write(buffer); } // Writes a float to this stream. The current position of the stream is @@ -306,12 +327,9 @@ public virtual void Write(ulong value) // public virtual void Write(float value) { - uint tmpValue = (uint)BitConverter.SingleToInt32Bits(value); - _buffer[0] = (byte)tmpValue; - _buffer[1] = (byte)(tmpValue >> 8); - _buffer[2] = (byte)(tmpValue >> 16); - _buffer[3] = (byte)(tmpValue >> 24); - OutStream.Write(_buffer, 0, 4); + Span buffer = stackalloc byte[sizeof(float)]; + BinaryPrimitives.WriteSingleLittleEndian(buffer, value); + OutStream.Write(buffer); } // Writes a half to this stream. The current position of the stream is @@ -319,10 +337,9 @@ public virtual void Write(float value) // public virtual void Write(Half value) { - ushort tmpValue = (ushort)BitConverter.HalfToInt16Bits(value); - _buffer[0] = (byte)tmpValue; - _buffer[1] = (byte)(tmpValue >> 8); - OutStream.Write(_buffer, 0, 2); + Span buffer = stackalloc byte[sizeof(ushort) /* = sizeof(Half) */]; + BinaryPrimitives.WriteHalfLittleEndian(buffer, value); + OutStream.Write(buffer); } // Writes a length-prefixed string to this stream in the BinaryWriter's @@ -330,102 +347,43 @@ public virtual void Write(Half value) // an encoded unsigned integer with variable length, and then writes that many characters // to the stream. // - public virtual unsafe void Write(string value) + public virtual void Write(string value) { if (value == null) throw new ArgumentNullException(nameof(value)); - int totalBytes = _encoding.GetByteCount(value); - Write7BitEncodedInt(totalBytes); - - if (_largeByteBuffer == null) - { - _largeByteBuffer = new byte[LargeByteBufferSize]; - _maxChars = _largeByteBuffer.Length / _encoding.GetMaxByteCount(1); - } - - if (totalBytes <= _largeByteBuffer.Length) - { - _encoding.GetBytes(value, _largeByteBuffer); - OutStream.Write(_largeByteBuffer, 0, totalBytes); - return; - } + // Common: UTF-8, small string, avoid 2-pass calculation + // Less common: UTF-8, large string, avoid 2-pass calculation + // Uncommon: excessively large string or not UTF-8 - int numLeft = value.Length; - int charStart = 0; - ReadOnlySpan str = value; - - // The previous implementation had significant issues packing encoded - // characters efficiently into the byte buffer. This was due to the assumption, - // that every input character will take up the maximum possible size of a character in any given encoding, - // thus resulting in a lot of unused space within the byte buffer. - // However, in scenarios where the number of characters aligns perfectly with the buffer size the new - // implementation saw some performance regressions, therefore in such scenarios (ASCIIEncoding) - // work will be delegated to the previous implementation. - if (_encoding.GetType() == typeof(UTF8Encoding)) + if (_useFastUtf8) { - while (numLeft > 0) + if (value.Length <= 127 / 3) { - _encoder.Convert(str.Slice(charStart), _largeByteBuffer, numLeft <= _maxChars, out int charCount, out int byteCount, out bool _); - - OutStream.Write(_largeByteBuffer, 0, byteCount); - charStart += charCount; - numLeft -= charCount; + // Max expansion: each char -> 3 bytes, so 127 bytes max of data, +1 for length prefix + Span buffer = stackalloc byte[128]; + int actualByteCount = _encoding.GetBytes(value, buffer.Slice(1)); + buffer[0] = (byte)actualByteCount; // bypass call to Write7BitEncodedInt + OutStream.Write(buffer.Slice(0, actualByteCount + 1 /* length prefix */)); + return; } - } - - else - { - WriteWhenEncodingIsNotUtf8(value, totalBytes); - } - } - - private unsafe void WriteWhenEncodingIsNotUtf8(string value, int len) - { - // This method should only be called from BinaryWriter(string), which does a null-check - Debug.Assert(_largeByteBuffer != null); - - int numLeft = value.Length; - int charStart = 0; - - // Aggressively try to not allocate memory in this loop for - // runtime performance reasons. Use an Encoder to write out - // the string correctly (handling surrogates crossing buffer - // boundaries properly). -#if DEBUG - int totalBytes = 0; -#endif - while (numLeft > 0) - { - // Figure out how many chars to process this round. - int charCount = (numLeft > _maxChars) ? _maxChars : numLeft; - int byteLen; - - checked + else if (value.Length <= MaxArrayPoolRentalSize / 3) { - if (charStart < 0 || charCount < 0 || charStart > value.Length - charCount) - { - throw new ArgumentOutOfRangeException(nameof(value)); - } - fixed (char* pChars = value) - { - fixed (byte* pBytes = &_largeByteBuffer[0]) - { - byteLen = _encoder.GetBytes(pChars + charStart, charCount, pBytes, _largeByteBuffer.Length, charCount == numLeft); - } - } + byte[] rented = ArrayPool.Shared.Rent(value.Length * 3); // max expansion: each char -> 3 bytes + int actualByteCount = _encoding.GetBytes(value, rented); + Write7BitEncodedInt(actualByteCount); + OutStream.Write(rented, 0, actualByteCount); + ArrayPool.Shared.Return(rented); + return; } -#if DEBUG - totalBytes += byteLen; - Debug.Assert(totalBytes <= len && byteLen <= _largeByteBuffer.Length, "BinaryWriter::Write(String) - More bytes encoded than expected!"); -#endif - OutStream.Write(_largeByteBuffer, 0, byteLen); - charStart += charCount; - numLeft -= charCount; } -#if DEBUG - Debug.Assert(totalBytes == len, "BinaryWriter::Write(String) - Didn't write out all the bytes!"); -#endif + + // Slow path: not fast UTF-8, or data is very large. We need to fall back + // to a 2-pass mechanism so that we're not renting absurdly large arrays. + + int actualBytecount = _encoding.GetByteCount(value); + Write7BitEncodedInt(actualBytecount); + WriteCharsCommonWithoutLengthPrefix(value, useThisWriteOverride: false); } public virtual void Write(ReadOnlySpan buffer) @@ -451,15 +409,69 @@ public virtual void Write(ReadOnlySpan buffer) public virtual void Write(ReadOnlySpan chars) { - byte[] bytes = ArrayPool.Shared.Rent(_encoding.GetMaxByteCount(chars.Length)); - try + // When Write(ROS) was first introduced, it dispatched to the this.Write(byte[], ...) + // virtual method rather than write directly to the output stream. We maintain that same + // double-indirection for compat purposes. + WriteCharsCommonWithoutLengthPrefix(chars, useThisWriteOverride: true); + } + + private void WriteCharsCommonWithoutLengthPrefix(ReadOnlySpan chars, bool useThisWriteOverride) + { + // If our input is truly enormous, the call to GetMaxByteCount might overflow, + // which we want to avoid. Theoretically, any Encoding could expand from chars -> bytes + // at an enormous ratio and cause us problems anyway given small inputs, but this is so + // unrealistic that we needn't worry about it. + + byte[] rented; + + if (chars.Length <= MaxArrayPoolRentalSize) { - int bytesWritten = _encoding.GetBytes(chars, bytes); - Write(bytes, 0, bytesWritten); + // GetByteCount may walk the buffer contents, resulting in 2 passes over the data. + // We prefer GetMaxByteCount because it's a constant-time operation. + + int maxByteCount = _encoding.GetMaxByteCount(chars.Length); + if (maxByteCount <= MaxArrayPoolRentalSize) + { + rented = ArrayPool.Shared.Rent(maxByteCount); + int actualByteCount = _encoding.GetBytes(chars, rented); + WriteToOutStream(rented, 0, actualByteCount, useThisWriteOverride); + ArrayPool.Shared.Return(rented); + return; + } } - finally + + // We're dealing with an enormous amount of data, so acquire an Encoder. + // It should be rare that callers pass sufficiently large inputs to hit + // this code path, and the cost of the operation is dominated by the transcoding + // step anyway, so it's ok for us to take the allocation here. + + rented = ArrayPool.Shared.Rent(MaxArrayPoolRentalSize); + Encoder encoder = _encoding.GetEncoder(); + bool completed; + + do { - ArrayPool.Shared.Return(bytes); + encoder.Convert(chars, rented, flush: true, out int charsConsumed, out int bytesWritten, out completed); + if (bytesWritten != 0) + { + WriteToOutStream(rented, 0, bytesWritten, useThisWriteOverride); + } + + chars = chars.Slice(charsConsumed); + } while (!completed); + + ArrayPool.Shared.Return(rented); + + void WriteToOutStream(byte[] buffer, int offset, int count, bool useThisWriteOverride) + { + if (useThisWriteOverride) + { + Write(buffer, offset, count); // bounce through this.Write(...) overridden logic + } + else + { + OutStream.Write(buffer, offset, count); // ignore this.Write(...) override, go straight to inner stream + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs index d455cf36def6e..fb6ad0f4d0b28 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs @@ -956,6 +956,9 @@ public unsafe string GetString(ReadOnlySpan bytes) public virtual int CodePage => _codePage; + // Quick accessor for "is UTF8?" + internal bool IsUTF8CodePage => CodePage == CodePageUTF8; + // IsAlwaysNormalized // Returns true if the encoding is always normalized for the specified encoding form public bool IsAlwaysNormalized() =>