diff --git a/src/libraries/Common/tests/TestUtilities/System/Runtime/InteropServices/SafeBufferUtil.cs b/src/libraries/Common/tests/TestUtilities/System/Runtime/InteropServices/SafeBufferUtil.cs
new file mode 100644
index 0000000000000..f667b1e26cec5
--- /dev/null
+++ b/src/libraries/Common/tests/TestUtilities/System/Runtime/InteropServices/SafeBufferUtil.cs
@@ -0,0 +1,44 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+
+namespace System.Runtime.InteropServices
+{
+ public static class SafeBufferUtil
+ {
+ ///
+ /// Creates an unmanaged buffer of the specified length.
+ ///
+ public static SafeBuffer CreateSafeBuffer(nuint byteLength)
+ {
+ return new AllocHGlobalSafeHandle(byteLength);
+ }
+
+ private sealed class AllocHGlobalSafeHandle : SafeBuffer
+ {
+ public AllocHGlobalSafeHandle(nuint cb) : base(ownsHandle: true)
+ {
+#if !NETCOREAPP
+ RuntimeHelpers.PrepareConstrainedRegions();
+#endif
+ try
+ {
+ // intentionally empty to avoid ThreadAbortException in netfx runtimes
+ }
+ finally
+ {
+ SetHandle(Marshal.AllocHGlobal((nint)cb));
+ }
+
+ Initialize(cb);
+ }
+
+ protected override bool ReleaseHandle()
+ {
+ Marshal.FreeHGlobal(handle);
+ return true;
+ }
+ }
+ }
+}
diff --git a/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj b/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj
index 0215f429d889f..3e6464239d718 100644
--- a/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj
+++ b/src/libraries/Common/tests/TestUtilities/TestUtilities.csproj
@@ -19,6 +19,7 @@
+
diff --git a/src/libraries/System.IO/tests/BinaryWriter/BinaryWriter.EncodingTests.cs b/src/libraries/System.IO/tests/BinaryWriter/BinaryWriter.EncodingTests.cs
new file mode 100644
index 0000000000000..b22020a36dc4f
--- /dev/null
+++ b/src/libraries/System.IO/tests/BinaryWriter/BinaryWriter.EncodingTests.cs
@@ -0,0 +1,256 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Numerics;
+using System.Reflection;
+using System.Runtime.InteropServices;
+using System.Text;
+using Xunit;
+
+namespace System.IO.Tests
+{
+ public class BinaryWriter_EncodingTests
+ {
+ [Fact]
+ public void Ctor_Default_UsesFastUtf8()
+ {
+ BinaryWriter writer = new BinaryWriter(new MemoryStream());
+ Assert.True(IsUsingFastUtf8(writer));
+ }
+
+ [Fact]
+ public void Ctor_EncodingUtf8Singleton_UsesFastUtf8()
+ {
+ BinaryWriter writer = new BinaryWriter(new MemoryStream(), Encoding.UTF8);
+ Assert.True(IsUsingFastUtf8(writer));
+ }
+
+ [Theory]
+ [InlineData(true, true)]
+ [InlineData(true, false)]
+ [InlineData(false, true)]
+ [InlineData(false, false)]
+ public void Ctor_NewUtf8Encoding_UsesFastUtf8(bool emitIdentifier, bool throwOnInvalidBytes)
+ {
+ BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UTF8Encoding(emitIdentifier, throwOnInvalidBytes));
+ Assert.True(IsUsingFastUtf8(writer));
+ }
+
+ [Fact]
+ public void Ctor_Utf8EncodingWithSingleCharReplacementChar_UsesFastUtf8()
+ {
+ Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("x"), DecoderFallback.ExceptionFallback);
+ BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding);
+ Assert.True(IsUsingFastUtf8(writer));
+ }
+
+ [Fact]
+ public void Ctor_Utf8EncodingWithMultiCharReplacementChar_DoesNotUseFastUtf8()
+ {
+ Encoding encoding = Encoding.GetEncoding("utf-8", new EncoderReplacementFallback("xx"), DecoderFallback.ExceptionFallback);
+ BinaryWriter writer = new BinaryWriter(new MemoryStream(), encoding);
+ Assert.False(IsUsingFastUtf8(writer));
+ }
+
+ [Fact]
+ public void Ctor_NotUtf8EncodingType_DoesNotUseFastUtf8()
+ {
+ BinaryWriter writer = new BinaryWriter(new MemoryStream(), new UnicodeEncoding());
+ Assert.False(IsUsingFastUtf8(writer));
+ }
+
+ [Fact]
+ public void Ctor_Utf8EncodingDerivedTypeWithWrongCodePage_DoesNotUseFastUtf8()
+ {
+ BinaryWriter writer = new BinaryWriter(new MemoryStream(), new NotActuallyUTF8Encoding());
+ Assert.False(IsUsingFastUtf8(writer));
+ }
+
+ [Fact]
+ public void Ctor_Utf8EncodingDerivedTypeWithCorrectCodePage_DoesNotUseFastUtf8()
+ {
+ BinaryWriter writer = new BinaryWriter(new MemoryStream(), new MyCustomUTF8Encoding());
+ Assert.True(IsUsingFastUtf8(writer));
+ }
+
+ [Theory]
+ [InlineData('x')] // 1 UTF-8 byte
+ [InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes)
+ [InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes)
+ public void WriteSingleChar_FastUtf8(char ch)
+ {
+ MemoryStream stream = new MemoryStream();
+ BinaryWriter writer = new BinaryWriter(stream);
+
+ writer.Write(ch);
+
+ Assert.Equal(Encoding.UTF8.GetBytes(new char[] { ch }), stream.ToArray());
+ }
+
+ [Theory]
+ [InlineData('x')] // 1 UTF-8 byte
+ [InlineData('\u00e9')] // LATIN SMALL LETTER E WITH ACUTE (2 UTF-8 bytes)
+ [InlineData('\u2130')] // SCRIPT CAPITAL E (3 UTF-8 bytes)
+ public void WriteSingleChar_NotUtf8NoArrayPoolRentalNeeded(char ch)
+ {
+ MemoryStream stream = new MemoryStream();
+ BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */);
+
+ writer.Write(ch);
+
+ Assert.Equal(Encoding.Unicode.GetBytes(new char[] { ch }), stream.ToArray());
+ }
+
+ [Fact]
+ public void WriteSingleChar_ArrayPoolRentalNeeded()
+ {
+ string replacementString = new string('v', 10_000);
+ Encoding encoding = Encoding.GetEncoding("ascii", new EncoderReplacementFallback(replacementString), DecoderFallback.ExceptionFallback);
+ MemoryStream stream = new MemoryStream();
+ BinaryWriter writer = new BinaryWriter(stream, encoding);
+
+ writer.Write('\uFFFD'); // not ASCII
+
+ Assert.Equal(Encoding.ASCII.GetBytes(replacementString), stream.ToArray());
+ }
+
+ [Theory]
+ [InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary
+ [InlineData(32 * 1024)] // char count within 64k rental boundary, byte count not
+ [InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary
+ public void WriteChars_FastUtf8(int stringLengthInChars)
+ {
+ string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars);
+ byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite);
+
+ MemoryStream stream = new MemoryStream();
+ BinaryWriter writer = new BinaryWriter(stream);
+
+ writer.Write(stringToWrite.ToCharArray()); // writing a char buffer doesn't emit the length upfront
+ Assert.Equal(expectedBytes, stream.GetBuffer()[..expectedBytes.Length]);
+ }
+
+ [Theory]
+ [InlineData(24)] // within stackalloc path
+ [InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary
+ [InlineData(32 * 1024)] // char count within 64k rental boundary, byte count not
+ [InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary
+ public void WriteString_FastUtf8(int stringLengthInChars)
+ {
+ string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars);
+ byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite);
+
+ MemoryStream stream = new MemoryStream();
+ BinaryWriter writer = new BinaryWriter(stream);
+
+ writer.Write(stringToWrite);
+ stream.Position = 0;
+
+ Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt());
+ Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]);
+ }
+
+ [Theory]
+ [InlineData(127 / 3)] // within stackalloc fast path
+ [InlineData(127 / 3 + 1)] // not within stackalloc fast path
+ public void WriteString_FastUtf8_UsingThreeByteChars(int stringLengthInChars)
+ {
+ string stringToWrite = new string('\u2023', stringLengthInChars); // TRIANGULAR BULLET
+ byte[] expectedBytes = Encoding.UTF8.GetBytes(stringToWrite);
+
+ MemoryStream stream = new MemoryStream();
+ BinaryWriter writer = new BinaryWriter(stream);
+
+ writer.Write(stringToWrite);
+ stream.Position = 0;
+
+ Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt());
+ Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]);
+ }
+
+ [Theory]
+ [InlineData(8 * 1024)] // both char count & byte count within 64k rental boundary
+ [InlineData(48 * 1024)] // char count within 64k rental boundary, byte count not
+ [InlineData(256 * 1024)] // neither char count nor byte count within 64k rental boundary
+ public void WriteString_NotUtf8(int stringLengthInChars)
+ {
+ string stringToWrite = GenerateLargeUnicodeString(stringLengthInChars);
+ byte[] expectedBytes = Encoding.Unicode.GetBytes(stringToWrite);
+
+ MemoryStream stream = new MemoryStream();
+ BinaryWriter writer = new BinaryWriter(stream, Encoding.Unicode /* little endian */);
+
+ writer.Write(stringToWrite);
+ stream.Position = 0;
+
+ Assert.Equal(expectedBytes.Length /* byte count */, new BinaryReader(stream).Read7BitEncodedInt());
+ Assert.Equal(expectedBytes, stream.GetBuffer()[Get7BitEncodedIntByteLength((uint)expectedBytes.Length)..(int)stream.Length]);
+ }
+
+ [Fact]
+ [PlatformSpecific(~TestPlatforms.Android)] // OOM on Android could be uncatchable & kill the test runner
+ public unsafe void WriteChars_VeryLargeArray_DoesNotOverflow()
+ {
+ const nuint INPUT_LEN_IN_CHARS = 1_500_000_000;
+ const nuint OUTPUT_LEN_IN_BYTES = 3_500_000_000; // overallocate
+
+ SafeBuffer unmanagedInputBuffer = null;
+ SafeBuffer unmanagedOutputBufer = null;
+ try
+ {
+ try
+ {
+ unmanagedInputBuffer = SafeBufferUtil.CreateSafeBuffer(INPUT_LEN_IN_CHARS * sizeof(char));
+ unmanagedOutputBufer = SafeBufferUtil.CreateSafeBuffer(OUTPUT_LEN_IN_BYTES * sizeof(byte));
+ }
+ catch (OutOfMemoryException)
+ {
+ return; // skip test in low-mem conditions
+ }
+
+ Span inputSpan = new Span((char*)unmanagedInputBuffer.DangerousGetHandle(), (int)INPUT_LEN_IN_CHARS);
+ inputSpan.Fill('\u0224'); // LATIN CAPITAL LETTER Z WITH HOOK
+ Stream outStream = new UnmanagedMemoryStream(unmanagedOutputBufer, 0, (long)unmanagedOutputBufer.ByteLength, FileAccess.ReadWrite);
+ BinaryWriter writer = new BinaryWriter(outStream);
+
+ writer.Write(inputSpan); // will write 3 billion bytes to the output
+
+ Assert.Equal(3_000_000_000, outStream.Position);
+ }
+ finally
+ {
+ unmanagedInputBuffer?.Dispose();
+ unmanagedOutputBufer?.Dispose();
+ }
+ }
+
+ private static bool IsUsingFastUtf8(BinaryWriter writer)
+ {
+ return (bool)writer.GetType().GetField("_useFastUtf8", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(writer);
+ }
+
+ private static string GenerateLargeUnicodeString(int charCount)
+ {
+ return string.Create(charCount, (object)null, static (buffer, _) =>
+ {
+ for (int i = 0; i < buffer.Length; i++)
+ {
+ buffer[i] = (char)((i % 0xF00) + 0x100); // U+0100..U+0FFF (mix of 2-byte and 3-byte chars)
+ }
+ });
+ }
+
+ private static int Get7BitEncodedIntByteLength(uint value) => (BitOperations.Log2(value) / 7) + 1;
+
+ // subclasses UTF8Encoding, but returns a non-UTF8 code page
+ private class NotActuallyUTF8Encoding : UTF8Encoding
+ {
+ public override int CodePage => 65000; // UTF-7 code page
+ }
+
+ // subclasses UTF8Encoding, returns UTF-8 code page
+ private class MyCustomUTF8Encoding : UTF8Encoding
+ {
+ }
+ }
+}
diff --git a/src/libraries/System.IO/tests/System.IO.Tests.csproj b/src/libraries/System.IO/tests/System.IO.Tests.csproj
index 99db6557308a5..c3f821f0f5ad7 100644
--- a/src/libraries/System.IO/tests/System.IO.Tests.csproj
+++ b/src/libraries/System.IO/tests/System.IO.Tests.csproj
@@ -15,6 +15,7 @@
+
diff --git a/src/libraries/System.Private.CoreLib/src/System/Decimal.cs b/src/libraries/System.Private.CoreLib/src/System/Decimal.cs
index bc81619838fb6..97e620e68f0a7 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Decimal.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Decimal.cs
@@ -587,16 +587,14 @@ public static bool TryGetBits(decimal d, Span destination, out int valuesWr
return true;
}
- internal static void GetBytes(in decimal d, byte[] buffer)
+ internal static void GetBytes(in decimal d, Span buffer)
{
- Debug.Assert(buffer != null && buffer.Length >= 16, "[GetBytes]buffer != null && buffer.Length >= 16");
+ Debug.Assert(buffer.Length >= 16, "buffer.Length >= 16");
- Span span = buffer;
-
- BinaryPrimitives.WriteInt32LittleEndian(span, (int)d.Low);
- BinaryPrimitives.WriteInt32LittleEndian(span.Slice(4), (int)d.Mid);
- BinaryPrimitives.WriteInt32LittleEndian(span.Slice(8), (int)d.High);
- BinaryPrimitives.WriteInt32LittleEndian(span.Slice(12), d._flags);
+ BinaryPrimitives.WriteInt32LittleEndian(buffer, (int)d.Low);
+ BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(4), (int)d.Mid);
+ BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(8), (int)d.High);
+ BinaryPrimitives.WriteInt32LittleEndian(buffer.Slice(12), d._flags);
}
internal static decimal ToDecimal(ReadOnlySpan span)
diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs b/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs
index e4434f9ecde95..a9cd6bd157589 100644
--- a/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/IO/BinaryWriter.cs
@@ -1,11 +1,11 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
-using System.Text;
-using System.Diagnostics;
using System.Buffers;
-using System.Threading.Tasks;
using System.Buffers.Binary;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading.Tasks;
namespace System.IO
{
@@ -15,32 +15,26 @@ namespace System.IO
//
public class BinaryWriter : IDisposable, IAsyncDisposable
{
+ private const int MaxArrayPoolRentalSize = 64 * 1024; // try to keep rentals to a reasonable size
+
public static readonly BinaryWriter Null = new BinaryWriter();
protected Stream OutStream;
- private readonly byte[] _buffer; // temp space for writing primitives to.
private readonly Encoding _encoding;
- private readonly Encoder _encoder;
-
private readonly bool _leaveOpen;
-
- // Perf optimization stuff
- private byte[]? _largeByteBuffer; // temp space for writing chars.
- private int _maxChars; // max # of chars we can put in _largeByteBuffer
- // Size should be around the max number of chars/string * Encoding's max bytes/char
- private const int LargeByteBufferSize = 256;
+ private readonly bool _useFastUtf8;
// Protected default constructor that sets the output stream
// to a null stream (a bit bucket).
protected BinaryWriter()
{
OutStream = Stream.Null;
- _buffer = new byte[16];
- _encoding = EncodingCache.UTF8NoBOM;
- _encoder = _encoding.GetEncoder();
+ _encoding = Encoding.UTF8;
+ _useFastUtf8 = true;
}
- public BinaryWriter(Stream output) : this(output, EncodingCache.UTF8NoBOM, false)
+ // BinaryWriter never emits a BOM, so can use Encoding.UTF8 fast singleton
+ public BinaryWriter(Stream output) : this(output, Encoding.UTF8, false)
{
}
@@ -58,10 +52,9 @@ public BinaryWriter(Stream output, Encoding encoding, bool leaveOpen)
throw new ArgumentException(SR.Argument_StreamNotWritable);
OutStream = output;
- _buffer = new byte[16];
_encoding = encoding;
- _encoder = _encoding.GetEncoder();
_leaveOpen = leaveOpen;
+ _useFastUtf8 = encoding.IsUTF8CodePage && encoding.EncoderFallback.MaxCharCount <= 1;
}
// Closes this writer and releases any system resources associated with the
@@ -182,18 +175,39 @@ public virtual void Write(byte[] buffer, int index, int count)
// advanced by two.
// Note this method cannot handle surrogates properly in UTF-8.
//
- public virtual unsafe void Write(char ch)
+ public virtual void Write(char ch)
{
- if (char.IsSurrogate(ch))
+ if (!Rune.TryCreate(ch, out Rune rune)) // optimistically assume UTF-8 code path (which uses Rune) will be hit
+ {
throw new ArgumentException(SR.Arg_SurrogatesNotAllowedAsSingleChar);
+ }
- Debug.Assert(_encoding.GetMaxByteCount(1) <= 16, "_encoding.GetMaxByteCount(1) <= 16)");
- int numBytes = 0;
- fixed (byte* pBytes = &_buffer[0])
+ Span buffer = stackalloc byte[8]; // reasonable guess for worst-case expansion for any arbitrary encoding
+
+ if (_useFastUtf8)
{
- numBytes = _encoder.GetBytes(&ch, 1, pBytes, _buffer.Length, flush: true);
+ int utf8ByteCount = rune.EncodeToUtf8(buffer);
+ OutStream.Write(buffer.Slice(0, utf8ByteCount));
+ }
+ else
+ {
+ byte[]? rented = null;
+ int maxByteCount = _encoding.GetMaxByteCount(1);
+
+ if (maxByteCount > buffer.Length)
+ {
+ rented = ArrayPool.Shared.Rent(maxByteCount);
+ buffer = rented;
+ }
+
+ int actualByteCount = _encoding.GetBytes(MemoryMarshal.CreateReadOnlySpan(ref ch, 1), buffer);
+ OutStream.Write(buffer.Slice(0, actualByteCount));
+
+ if (rented != null)
+ {
+ ArrayPool.Shared.Return(rented);
+ }
}
- OutStream.Write(_buffer, 0, numBytes);
}
// Writes a character array to this stream.
@@ -206,8 +220,7 @@ public virtual void Write(char[] chars)
if (chars == null)
throw new ArgumentNullException(nameof(chars));
- byte[] bytes = _encoding.GetBytes(chars, 0, chars.Length);
- OutStream.Write(bytes, 0, bytes.Length);
+ WriteCharsCommonWithoutLengthPrefix(chars, useThisWriteOverride: false);
}
// Writes a section of a character array to this stream.
@@ -217,23 +230,33 @@ public virtual void Write(char[] chars)
//
public virtual void Write(char[] chars, int index, int count)
{
- byte[] bytes = _encoding.GetBytes(chars, index, count);
- OutStream.Write(bytes, 0, bytes.Length);
+ if (chars == null)
+ throw new ArgumentNullException(nameof(chars));
+ if (index < 0)
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (count < 0)
+ throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (index > chars.Length - count)
+ throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_IndexCount);
+
+ WriteCharsCommonWithoutLengthPrefix(chars.AsSpan(index, count), useThisWriteOverride: false);
}
// Writes a double to this stream. The current position of the stream is
// advanced by eight.
//
- public virtual unsafe void Write(double value)
+ public virtual void Write(double value)
{
- BinaryPrimitives.WriteDoubleLittleEndian(_buffer, value);
- OutStream.Write(_buffer, 0, 8);
+ Span buffer = stackalloc byte[sizeof(double)];
+ BinaryPrimitives.WriteDoubleLittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
public virtual void Write(decimal value)
{
- decimal.GetBytes(value, _buffer);
- OutStream.Write(_buffer, 0, 16);
+ Span buffer = stackalloc byte[sizeof(decimal)];
+ decimal.GetBytes(value, buffer);
+ OutStream.Write(buffer);
}
// Writes a two-byte signed integer to this stream. The current position of
@@ -241,9 +264,9 @@ public virtual void Write(decimal value)
//
public virtual void Write(short value)
{
- _buffer[0] = (byte)value;
- _buffer[1] = (byte)(value >> 8);
- OutStream.Write(_buffer, 0, 2);
+ Span buffer = stackalloc byte[sizeof(short)];
+ BinaryPrimitives.WriteInt16LittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
// Writes a two-byte unsigned integer to this stream. The current position
@@ -252,9 +275,9 @@ public virtual void Write(short value)
[CLSCompliant(false)]
public virtual void Write(ushort value)
{
- _buffer[0] = (byte)value;
- _buffer[1] = (byte)(value >> 8);
- OutStream.Write(_buffer, 0, 2);
+ Span buffer = stackalloc byte[sizeof(ushort)];
+ BinaryPrimitives.WriteUInt16LittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
// Writes a four-byte signed integer to this stream. The current position
@@ -262,11 +285,9 @@ public virtual void Write(ushort value)
//
public virtual void Write(int value)
{
- _buffer[0] = (byte)value;
- _buffer[1] = (byte)(value >> 8);
- _buffer[2] = (byte)(value >> 16);
- _buffer[3] = (byte)(value >> 24);
- OutStream.Write(_buffer, 0, 4);
+ Span buffer = stackalloc byte[sizeof(int)];
+ BinaryPrimitives.WriteInt32LittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
// Writes a four-byte unsigned integer to this stream. The current position
@@ -275,11 +296,9 @@ public virtual void Write(int value)
[CLSCompliant(false)]
public virtual void Write(uint value)
{
- _buffer[0] = (byte)value;
- _buffer[1] = (byte)(value >> 8);
- _buffer[2] = (byte)(value >> 16);
- _buffer[3] = (byte)(value >> 24);
- OutStream.Write(_buffer, 0, 4);
+ Span buffer = stackalloc byte[sizeof(uint)];
+ BinaryPrimitives.WriteUInt32LittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
// Writes an eight-byte signed integer to this stream. The current position
@@ -287,8 +306,9 @@ public virtual void Write(uint value)
//
public virtual void Write(long value)
{
- BinaryPrimitives.WriteInt64LittleEndian(_buffer, value);
- OutStream.Write(_buffer, 0, 8);
+ Span buffer = stackalloc byte[sizeof(long)];
+ BinaryPrimitives.WriteInt64LittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
// Writes an eight-byte unsigned integer to this stream. The current
@@ -297,8 +317,9 @@ public virtual void Write(long value)
[CLSCompliant(false)]
public virtual void Write(ulong value)
{
- BinaryPrimitives.WriteUInt64LittleEndian(_buffer, value);
- OutStream.Write(_buffer, 0, 8);
+ Span buffer = stackalloc byte[sizeof(ulong)];
+ BinaryPrimitives.WriteUInt64LittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
// Writes a float to this stream. The current position of the stream is
@@ -306,12 +327,9 @@ public virtual void Write(ulong value)
//
public virtual void Write(float value)
{
- uint tmpValue = (uint)BitConverter.SingleToInt32Bits(value);
- _buffer[0] = (byte)tmpValue;
- _buffer[1] = (byte)(tmpValue >> 8);
- _buffer[2] = (byte)(tmpValue >> 16);
- _buffer[3] = (byte)(tmpValue >> 24);
- OutStream.Write(_buffer, 0, 4);
+ Span buffer = stackalloc byte[sizeof(float)];
+ BinaryPrimitives.WriteSingleLittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
// Writes a half to this stream. The current position of the stream is
@@ -319,10 +337,9 @@ public virtual void Write(float value)
//
public virtual void Write(Half value)
{
- ushort tmpValue = (ushort)BitConverter.HalfToInt16Bits(value);
- _buffer[0] = (byte)tmpValue;
- _buffer[1] = (byte)(tmpValue >> 8);
- OutStream.Write(_buffer, 0, 2);
+ Span buffer = stackalloc byte[sizeof(ushort) /* = sizeof(Half) */];
+ BinaryPrimitives.WriteHalfLittleEndian(buffer, value);
+ OutStream.Write(buffer);
}
// Writes a length-prefixed string to this stream in the BinaryWriter's
@@ -330,102 +347,43 @@ public virtual void Write(Half value)
// an encoded unsigned integer with variable length, and then writes that many characters
// to the stream.
//
- public virtual unsafe void Write(string value)
+ public virtual void Write(string value)
{
if (value == null)
throw new ArgumentNullException(nameof(value));
- int totalBytes = _encoding.GetByteCount(value);
- Write7BitEncodedInt(totalBytes);
-
- if (_largeByteBuffer == null)
- {
- _largeByteBuffer = new byte[LargeByteBufferSize];
- _maxChars = _largeByteBuffer.Length / _encoding.GetMaxByteCount(1);
- }
-
- if (totalBytes <= _largeByteBuffer.Length)
- {
- _encoding.GetBytes(value, _largeByteBuffer);
- OutStream.Write(_largeByteBuffer, 0, totalBytes);
- return;
- }
+ // Common: UTF-8, small string, avoid 2-pass calculation
+ // Less common: UTF-8, large string, avoid 2-pass calculation
+ // Uncommon: excessively large string or not UTF-8
- int numLeft = value.Length;
- int charStart = 0;
- ReadOnlySpan str = value;
-
- // The previous implementation had significant issues packing encoded
- // characters efficiently into the byte buffer. This was due to the assumption,
- // that every input character will take up the maximum possible size of a character in any given encoding,
- // thus resulting in a lot of unused space within the byte buffer.
- // However, in scenarios where the number of characters aligns perfectly with the buffer size the new
- // implementation saw some performance regressions, therefore in such scenarios (ASCIIEncoding)
- // work will be delegated to the previous implementation.
- if (_encoding.GetType() == typeof(UTF8Encoding))
+ if (_useFastUtf8)
{
- while (numLeft > 0)
+ if (value.Length <= 127 / 3)
{
- _encoder.Convert(str.Slice(charStart), _largeByteBuffer, numLeft <= _maxChars, out int charCount, out int byteCount, out bool _);
-
- OutStream.Write(_largeByteBuffer, 0, byteCount);
- charStart += charCount;
- numLeft -= charCount;
+ // Max expansion: each char -> 3 bytes, so 127 bytes max of data, +1 for length prefix
+ Span buffer = stackalloc byte[128];
+ int actualByteCount = _encoding.GetBytes(value, buffer.Slice(1));
+ buffer[0] = (byte)actualByteCount; // bypass call to Write7BitEncodedInt
+ OutStream.Write(buffer.Slice(0, actualByteCount + 1 /* length prefix */));
+ return;
}
- }
-
- else
- {
- WriteWhenEncodingIsNotUtf8(value, totalBytes);
- }
- }
-
- private unsafe void WriteWhenEncodingIsNotUtf8(string value, int len)
- {
- // This method should only be called from BinaryWriter(string), which does a null-check
- Debug.Assert(_largeByteBuffer != null);
-
- int numLeft = value.Length;
- int charStart = 0;
-
- // Aggressively try to not allocate memory in this loop for
- // runtime performance reasons. Use an Encoder to write out
- // the string correctly (handling surrogates crossing buffer
- // boundaries properly).
-#if DEBUG
- int totalBytes = 0;
-#endif
- while (numLeft > 0)
- {
- // Figure out how many chars to process this round.
- int charCount = (numLeft > _maxChars) ? _maxChars : numLeft;
- int byteLen;
-
- checked
+ else if (value.Length <= MaxArrayPoolRentalSize / 3)
{
- if (charStart < 0 || charCount < 0 || charStart > value.Length - charCount)
- {
- throw new ArgumentOutOfRangeException(nameof(value));
- }
- fixed (char* pChars = value)
- {
- fixed (byte* pBytes = &_largeByteBuffer[0])
- {
- byteLen = _encoder.GetBytes(pChars + charStart, charCount, pBytes, _largeByteBuffer.Length, charCount == numLeft);
- }
- }
+ byte[] rented = ArrayPool.Shared.Rent(value.Length * 3); // max expansion: each char -> 3 bytes
+ int actualByteCount = _encoding.GetBytes(value, rented);
+ Write7BitEncodedInt(actualByteCount);
+ OutStream.Write(rented, 0, actualByteCount);
+ ArrayPool.Shared.Return(rented);
+ return;
}
-#if DEBUG
- totalBytes += byteLen;
- Debug.Assert(totalBytes <= len && byteLen <= _largeByteBuffer.Length, "BinaryWriter::Write(String) - More bytes encoded than expected!");
-#endif
- OutStream.Write(_largeByteBuffer, 0, byteLen);
- charStart += charCount;
- numLeft -= charCount;
}
-#if DEBUG
- Debug.Assert(totalBytes == len, "BinaryWriter::Write(String) - Didn't write out all the bytes!");
-#endif
+
+ // Slow path: not fast UTF-8, or data is very large. We need to fall back
+ // to a 2-pass mechanism so that we're not renting absurdly large arrays.
+
+ int actualBytecount = _encoding.GetByteCount(value);
+ Write7BitEncodedInt(actualBytecount);
+ WriteCharsCommonWithoutLengthPrefix(value, useThisWriteOverride: false);
}
public virtual void Write(ReadOnlySpan buffer)
@@ -451,15 +409,69 @@ public virtual void Write(ReadOnlySpan buffer)
public virtual void Write(ReadOnlySpan chars)
{
- byte[] bytes = ArrayPool.Shared.Rent(_encoding.GetMaxByteCount(chars.Length));
- try
+ // When Write(ROS) was first introduced, it dispatched to the this.Write(byte[], ...)
+ // virtual method rather than write directly to the output stream. We maintain that same
+ // double-indirection for compat purposes.
+ WriteCharsCommonWithoutLengthPrefix(chars, useThisWriteOverride: true);
+ }
+
+ private void WriteCharsCommonWithoutLengthPrefix(ReadOnlySpan chars, bool useThisWriteOverride)
+ {
+ // If our input is truly enormous, the call to GetMaxByteCount might overflow,
+ // which we want to avoid. Theoretically, any Encoding could expand from chars -> bytes
+ // at an enormous ratio and cause us problems anyway given small inputs, but this is so
+ // unrealistic that we needn't worry about it.
+
+ byte[] rented;
+
+ if (chars.Length <= MaxArrayPoolRentalSize)
{
- int bytesWritten = _encoding.GetBytes(chars, bytes);
- Write(bytes, 0, bytesWritten);
+ // GetByteCount may walk the buffer contents, resulting in 2 passes over the data.
+ // We prefer GetMaxByteCount because it's a constant-time operation.
+
+ int maxByteCount = _encoding.GetMaxByteCount(chars.Length);
+ if (maxByteCount <= MaxArrayPoolRentalSize)
+ {
+ rented = ArrayPool.Shared.Rent(maxByteCount);
+ int actualByteCount = _encoding.GetBytes(chars, rented);
+ WriteToOutStream(rented, 0, actualByteCount, useThisWriteOverride);
+ ArrayPool.Shared.Return(rented);
+ return;
+ }
}
- finally
+
+ // We're dealing with an enormous amount of data, so acquire an Encoder.
+ // It should be rare that callers pass sufficiently large inputs to hit
+ // this code path, and the cost of the operation is dominated by the transcoding
+ // step anyway, so it's ok for us to take the allocation here.
+
+ rented = ArrayPool.Shared.Rent(MaxArrayPoolRentalSize);
+ Encoder encoder = _encoding.GetEncoder();
+ bool completed;
+
+ do
{
- ArrayPool.Shared.Return(bytes);
+ encoder.Convert(chars, rented, flush: true, out int charsConsumed, out int bytesWritten, out completed);
+ if (bytesWritten != 0)
+ {
+ WriteToOutStream(rented, 0, bytesWritten, useThisWriteOverride);
+ }
+
+ chars = chars.Slice(charsConsumed);
+ } while (!completed);
+
+ ArrayPool.Shared.Return(rented);
+
+ void WriteToOutStream(byte[] buffer, int offset, int count, bool useThisWriteOverride)
+ {
+ if (useThisWriteOverride)
+ {
+ Write(buffer, offset, count); // bounce through this.Write(...) overridden logic
+ }
+ else
+ {
+ OutStream.Write(buffer, offset, count); // ignore this.Write(...) override, go straight to inner stream
+ }
}
}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs
index d455cf36def6e..fb6ad0f4d0b28 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs
@@ -956,6 +956,9 @@ public unsafe string GetString(ReadOnlySpan bytes)
public virtual int CodePage => _codePage;
+ // Quick accessor for "is UTF8?"
+ internal bool IsUTF8CodePage => CodePage == CodePageUTF8;
+
// IsAlwaysNormalized
// Returns true if the encoding is always normalized for the specified encoding form
public bool IsAlwaysNormalized() =>