Skip to content

Commit

Permalink
Fix and optimize EscapeUnescapeIri (dotnet#32025)
Browse files Browse the repository at this point in the history
* Remove byte[] allocation per encoded character

* Remove dead code from EscapeUnescapeIri

* Use int instead of IntPtr for stack buffer

* Use sizeof(int) instead of 4 as const

* Fix EscapeUnescapeIri for escaped surrogate pairs
  • Loading branch information
MihaZupan authored Feb 15, 2020
1 parent 7c62303 commit dda29ff
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 40 deletions.
77 changes: 37 additions & 40 deletions src/libraries/System.Private.Uri/src/System/IriHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,19 +109,11 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
ValueStringBuilder dest = new ValueStringBuilder(size);
byte[]? bytes = null;

const int percentEncodingLen = 3; // Escaped UTF-8 will take 3 chars: %AB.
int bufferRemaining = 0;

int next = start;
char ch;
bool escape = false;
bool surrogatePair = false;

for (; next < end; ++next)
{
escape = false;
surrogatePair = false;

if ((ch = pInput[next]) == '%')
{
if (next + 2 < end)
Expand Down Expand Up @@ -226,56 +218,61 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
{
// unicode

char ch2;
bool escape;
bool surrogatePair = false;

char ch2 = '\0';

if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
{
ch2 = pInput[next + 1];
escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
if (!escape)
{
// copy the two chars
dest.Append(pInput[next++]);
dest.Append(pInput[next]);
}
}
else
{
if (CheckIriUnicodeRange(ch, component == UriComponents.Query))
escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query);
}

if (escape)
{
Span<byte> encodedBytes = stackalloc byte[4];

Rune rune;
if (surrogatePair)
{
// copy it
dest.Append(pInput[next]);
rune = new Rune(ch, ch2);
}
else
else if (!Rune.TryCreate(ch, out rune))
{
// escape it
escape = true;
rune = Rune.ReplacementChar;
}
}
}
else
{
// just copy the character
dest.Append(pInput[next]);
}

if (escape)
{
const int MaxNumberOfBytesEncoded = 4;
int bytesWritten = rune.EncodeToUtf8(encodedBytes);
encodedBytes = encodedBytes.Slice(0, bytesWritten);

byte[] encodedBytes = new byte[MaxNumberOfBytesEncoded];
fixed (byte* pEncodedBytes = &encodedBytes[0])
foreach (byte b in encodedBytes)
{
UriHelper.EscapeAsciiChar(b, ref dest);
}
}
else
{
int encodedBytesCount = Encoding.UTF8.GetBytes(pInput + next, surrogatePair ? 2 : 1, pEncodedBytes, MaxNumberOfBytesEncoded);
Debug.Assert(encodedBytesCount <= MaxNumberOfBytesEncoded, "UTF8 encoder should not exceed specified byteCount");

bufferRemaining -= encodedBytesCount * percentEncodingLen;

for (int count = 0; count < encodedBytesCount; ++count)
dest.Append(ch);
if (surrogatePair)
{
UriHelper.EscapeAsciiChar(encodedBytes[count], ref dest);
dest.Append(ch2);
}
}

if (surrogatePair)
{
next++;
}
}
else
{
// just copy the character
dest.Append(pInput[next]);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System.Collections.Generic;
using Xunit;

namespace System.PrivateUri.Tests
{
public class EscapeUnescapeIriTests
{
public static IEnumerable<object[]> ReplacesStandaloneSurrogatesWithReplacementChar()
{
const string UrlEncodedReplacementChar = "%EF%BF%BD";
const string HighSurrogate = "\ud83f";
const string LowSurrogate = "\udffe";

yield return new object[] { "a", "a" };
yield return new object[] { HighSurrogate + LowSurrogate, "%F0%9F%BF%BE" };
yield return new object[] { HighSurrogate, UrlEncodedReplacementChar };
yield return new object[] { LowSurrogate, UrlEncodedReplacementChar };
yield return new object[] { LowSurrogate + HighSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
yield return new object[] { LowSurrogate + LowSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
yield return new object[] { HighSurrogate + HighSurrogate, UrlEncodedReplacementChar + UrlEncodedReplacementChar };
}

[Theory]
[MemberData(nameof(ReplacesStandaloneSurrogatesWithReplacementChar))]
public static void ReplacesStandaloneSurrogatesWithReplacementChar(string input, string expected)
{
const string Prefix = "scheme:";
Uri uri = new Uri(Prefix + input);
string actual = uri.AbsoluteUri.Substring(Prefix.Length);
Assert.Equal(expected, actual);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
</PropertyGroup>
<ItemGroup>
<Compile Include="AppxUriValue.cs" />
<Compile Include="EscapeUnescapeIriTests.cs" />
<Compile Include="IdnCheckHostNameTest.cs" />
<Compile Include="IdnDnsSafeHostTest.cs" />
<Compile Include="IdnHostNameValidationTest.cs" />
Expand Down

0 comments on commit dda29ff

Please sign in to comment.