From dda29ffcac372bc7d8754ea83fbe152ec8ec7a49 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Sat, 15 Feb 2020 01:47:34 +0100 Subject: [PATCH] Fix and optimize EscapeUnescapeIri (#32025) * Remove byte[] allocation per encoded character * Remove dead code from EscapeUnescapeIri * Use int instead of IntPtr for stack buffer * Use sizeof(int) instead of 4 as const * Fix EscapeUnescapeIri for escaped surrogate pairs --- .../src/System/IriHelper.cs | 77 +++++++++---------- .../FunctionalTests/EscapeUnescapeIriTests.cs | 33 ++++++++ ...System.Private.Uri.Functional.Tests.csproj | 1 + 3 files changed, 71 insertions(+), 40 deletions(-) create mode 100644 src/libraries/System.Private.Uri/tests/FunctionalTests/EscapeUnescapeIriTests.cs diff --git a/src/libraries/System.Private.Uri/src/System/IriHelper.cs b/src/libraries/System.Private.Uri/src/System/IriHelper.cs index 5b821405717b0..bb18ff9485c94 100644 --- a/src/libraries/System.Private.Uri/src/System/IriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/IriHelper.cs @@ -109,19 +109,11 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end ValueStringBuilder dest = new ValueStringBuilder(size); byte[]? bytes = null; - const int percentEncodingLen = 3; // Escaped UTF-8 will take 3 chars: %AB. - int bufferRemaining = 0; - int next = start; char ch; - bool escape = false; - bool surrogatePair = false; for (; next < end; ++next) { - escape = false; - surrogatePair = false; - if ((ch = pInput[next]) == '%') { if (next + 2 < end) @@ -226,56 +218,61 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end { // unicode - char ch2; + bool escape; + bool surrogatePair = false; + + char ch2 = '\0'; if ((char.IsHighSurrogate(ch)) && (next + 1 < end)) { ch2 = pInput[next + 1]; escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query); - if (!escape) - { - // copy the two chars - dest.Append(pInput[next++]); - dest.Append(pInput[next]); - } } else { - if (CheckIriUnicodeRange(ch, component == UriComponents.Query)) + escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query); + } + + if (escape) + { + Span encodedBytes = stackalloc byte[4]; + + Rune rune; + if (surrogatePair) { - // copy it - dest.Append(pInput[next]); + rune = new Rune(ch, ch2); } - else + else if (!Rune.TryCreate(ch, out rune)) { - // escape it - escape = true; + rune = Rune.ReplacementChar; } - } - } - else - { - // just copy the character - dest.Append(pInput[next]); - } - if (escape) - { - const int MaxNumberOfBytesEncoded = 4; + int bytesWritten = rune.EncodeToUtf8(encodedBytes); + encodedBytes = encodedBytes.Slice(0, bytesWritten); - byte[] encodedBytes = new byte[MaxNumberOfBytesEncoded]; - fixed (byte* pEncodedBytes = &encodedBytes[0]) + foreach (byte b in encodedBytes) + { + UriHelper.EscapeAsciiChar(b, ref dest); + } + } + else { - int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, MaxNumberOfBytesEncoded); - Debug.Assert(encodedBytesCount <= MaxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount"); - - bufferRemaining -= encodedBytesCount * percentEncodingLen; - - for (int count = 0; count < encodedBytesCount; ++count) + dest.Append(ch); + if (surrogatePair) { - UriHelper.EscapeAsciiChar(encodedBytes[count], ref dest); + dest.Append(ch2); } } + + if (surrogatePair) + { + next++; + } + } + else + { + // just copy the character + dest.Append(pInput[next]); } } diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/EscapeUnescapeIriTests.cs b/src/libraries/System.Private.Uri/tests/FunctionalTests/EscapeUnescapeIriTests.cs new file mode 100644 index 0000000000000..db095dd3da78d --- /dev/null +++ b/src/libraries/System.Private.Uri/tests/FunctionalTests/EscapeUnescapeIriTests.cs @@ -0,0 +1,33 @@ +using System.Collections.Generic; +using Xunit; + +namespace System.PrivateUri.Tests +{ + public class EscapeUnescapeIriTests + { + public static IEnumerable ReplacesStandaloneSurrogatesWithReplacementChar() + { + const string UrlEncodedReplacementChar = "%EF%BF%BD"; + const string HighSurrogate = "\ud83f"; + const string LowSurrogate = "\udffe"; + + yield return new object[] { "a", "a" }; + yield return new object[] { HighSurrogate + LowSurrogate, "%F0%9F%BF%BE" }; + yield return new object[] { HighSurrogate, UrlEncodedReplacementChar }; + yield return new object[] { LowSurrogate, UrlEncodedReplacementChar }; + yield return new object[] { LowSurrogate + HighSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar }; + yield return new object[] { LowSurrogate + LowSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar }; + yield return new object[] { HighSurrogate + HighSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar }; + } + + [Theory] + [MemberData(nameof(ReplacesStandaloneSurrogatesWithReplacementChar))] + public static void ReplacesStandaloneSurrogatesWithReplacementChar(string input, string expected) + { + const string Prefix = "scheme:"; + Uri uri = new Uri(Prefix + input); + string actual = uri.AbsoluteUri.Substring(Prefix.Length); + Assert.Equal(expected, actual); + } + } +} diff --git a/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj b/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj index a568e2d1069f1..ca63421c45fca 100644 --- a/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj +++ b/src/libraries/System.Private.Uri/tests/FunctionalTests/System.Private.Uri.Functional.Tests.csproj @@ -4,6 +4,7 @@ +