Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize CheckIriUnicodeRange #31860

Merged
merged 6 commits into from
Feb 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 28 additions & 59 deletions src/libraries/System.Private.Uri/src/System/IriHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// See the LICENSE file in the project root for more information.

using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Text;

namespace System
Expand All @@ -12,6 +11,7 @@ internal static class IriHelper
{
//
// Checks if provided non surrogate char lies in iri range
// This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
//
internal static bool CheckIriUnicodeRange(char unicode, bool isQuery)
{
Expand All @@ -25,58 +25,27 @@ internal static bool CheckIriUnicodeRange(char unicode, bool isQuery)
// Check if highSurr and lowSurr are a surrogate pair then
// it checks if the combined char is in the range
// Takes in isQuery because iri restrictions for query are different
// This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
//
internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, ref bool surrogatePair, bool isQuery)
internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, out bool isSurrogatePair, bool isQuery)
{
bool inRange = false;
surrogatePair = false;

Debug.Assert(char.IsHighSurrogate(highSurr));

if (char.IsSurrogatePair(highSurr, lowSurr))
if (Rune.TryCreate(highSurr, lowSurr, out Rune rune))
{
surrogatePair = true;
ReadOnlySpan<char> chars = stackalloc char[2] { highSurr, lowSurr };
string surrPair = new string(chars);
if (((string.CompareOrdinal(surrPair, "\U00010000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0001FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00020000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0002FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00030000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0003FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00040000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0004FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00050000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0005FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00060000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0006FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00070000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0007FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00080000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0008FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00090000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0009FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000A0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000AFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000B0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000BFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000C0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000CFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000D0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000DFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000E1000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000EFFFD") <= 0)) ||
(isQuery &&
(((string.CompareOrdinal(surrPair, "\U000F0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000FFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00100000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0010FFFD") <= 0)))))
{
inRange = true;
}
isSurrogatePair = true;

// U+xxFFFE..U+xxFFFF is always private use for all planes, so we exclude it.
// U+E0000..U+E0FFF is disallowed per the 'ucschar' definition in the ABNF.
// U+F0000 and above are only allowed for 'iprivate' per the ABNF (isQuery = true).

return ((rune.Value & 0xFFFF) < 0xFFFE)
&& ((uint)(rune.Value - 0xE0000) >= (0xE1000 - 0xE0000))
&& (isQuery || rune.Value < 0xF0000);
}

return inRange;
isSurrogatePair = false;
return false;
}

//
Expand Down Expand Up @@ -148,7 +117,7 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
int startSeq = next;
int byteCount = 1;
// lazy initialization of max size, will reuse the array for next sequences
if ((object?)bytes == null)
if (bytes is null)
bytes = new byte[end - next];

bytes[0] = (byte)ch;
Expand Down Expand Up @@ -218,22 +187,30 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
{
// unicode

bool escape;
bool isInIriUnicodeRange;
bool surrogatePair = false;

char ch2 = '\0';

if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
{
ch2 = pInput[next + 1];
escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
}
else
{
escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query);
isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
}

if (escape)
if (isInIriUnicodeRange)
{
dest.Append(ch);
if (surrogatePair)
{
dest.Append(ch2);
}
}
else
{
Span<byte> encodedBytes = stackalloc byte[4];

Expand All @@ -255,14 +232,6 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
UriHelper.EscapeAsciiChar(b, ref dest);
}
}
else
{
dest.Append(ch);
if (surrogatePair)
{
dest.Append(ch2);
}
}

if (surrogatePair)
{
Expand Down
3 changes: 1 addition & 2 deletions src/libraries/System.Private.Uri/src/System/Uri.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4435,8 +4435,7 @@ private unsafe Check CheckCanonical(char* str, ref ushort idx, ushort end, char
{
if ((i + 1) < end)
{
bool surrPair = false;
valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], ref surrPair, true);
valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], out _, true);
}
}
else
Expand Down
6 changes: 3 additions & 3 deletions src/libraries/System.Private.Uri/src/System/UriHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,12 @@ internal static unsafe void MatchUTF8Sequence(ref ValueStringBuilder dest, Span<
if (iriParsing)
{
if (!isHighSurr)
{
inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], isQuery);
}
else
{
bool surrPair = false;
inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1],
ref surrPair, isQuery);
inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1], out _, isQuery);
}
}

Expand Down