Skip to content

Commit

Permalink
Optimize CheckIriUnicodeRange (#31860)
Browse files Browse the repository at this point in the history
* Optimize CheckIriUnicodeRange

* Improve clarity of if check in CheckIriUnicodeRange

Co-Authored-By: Stephen Toub <stoub@microsoft.com>

* Improve clarity of if check in CheckIriUnicodeRange

* Optimize range checks that are equivalent mod 2^16

* Invert escape boolean condition

Co-authored-by: Stephen Toub <stoub@microsoft.com>
  • Loading branch information
MihaZupan and stephentoub authored Feb 18, 2020
1 parent 4ebe5ee commit b00f349
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 64 deletions.
87 changes: 28 additions & 59 deletions src/libraries/System.Private.Uri/src/System/IriHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// See the LICENSE file in the project root for more information.

using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Text;

namespace System
Expand All @@ -12,6 +11,7 @@ internal static class IriHelper
{
//
// Checks if provided non surrogate char lies in iri range
// This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
//
internal static bool CheckIriUnicodeRange(char unicode, bool isQuery)
{
Expand All @@ -25,58 +25,27 @@ internal static bool CheckIriUnicodeRange(char unicode, bool isQuery)
// Check if highSurr and lowSurr are a surrogate pair then
// it checks if the combined char is in the range
// Takes in isQuery because iri restrictions for query are different
// This method implements the ABNF checks per https://tools.ietf.org/html/rfc3987#section-2.2
//
internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, ref bool surrogatePair, bool isQuery)
internal static bool CheckIriUnicodeRange(char highSurr, char lowSurr, out bool isSurrogatePair, bool isQuery)
{
bool inRange = false;
surrogatePair = false;

Debug.Assert(char.IsHighSurrogate(highSurr));

if (char.IsSurrogatePair(highSurr, lowSurr))
if (Rune.TryCreate(highSurr, lowSurr, out Rune rune))
{
surrogatePair = true;
ReadOnlySpan<char> chars = stackalloc char[2] { highSurr, lowSurr };
string surrPair = new string(chars);
if (((string.CompareOrdinal(surrPair, "\U00010000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0001FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00020000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0002FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00030000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0003FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00040000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0004FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00050000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0005FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00060000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0006FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00070000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0007FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00080000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0008FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00090000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0009FFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000A0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000AFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000B0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000BFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000C0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000CFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000D0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000DFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U000E1000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000EFFFD") <= 0)) ||
(isQuery &&
(((string.CompareOrdinal(surrPair, "\U000F0000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U000FFFFD") <= 0)) ||
((string.CompareOrdinal(surrPair, "\U00100000") >= 0)
&& (string.CompareOrdinal(surrPair, "\U0010FFFD") <= 0)))))
{
inRange = true;
}
isSurrogatePair = true;

// U+xxFFFE..U+xxFFFF is always private use for all planes, so we exclude it.
// U+E0000..U+E0FFF is disallowed per the 'ucschar' definition in the ABNF.
// U+F0000 and above are only allowed for 'iprivate' per the ABNF (isQuery = true).

return ((rune.Value & 0xFFFF) < 0xFFFE)
&& ((uint)(rune.Value - 0xE0000) >= (0xE1000 - 0xE0000))
&& (isQuery || rune.Value < 0xF0000);
}

return inRange;
isSurrogatePair = false;
return false;
}

//
Expand Down Expand Up @@ -150,7 +119,7 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
int startSeq = next;
int byteCount = 1;
// lazy initialization of max size, will reuse the array for next sequences
if ((object?)bytes == null)
if (bytes is null)
bytes = new byte[end - next];

bytes[0] = (byte)ch;
Expand Down Expand Up @@ -220,22 +189,30 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
{
// unicode

bool escape;
bool isInIriUnicodeRange;
bool surrogatePair = false;

char ch2 = '\0';

if ((char.IsHighSurrogate(ch)) && (next + 1 < end))
{
ch2 = pInput[next + 1];
escape = !CheckIriUnicodeRange(ch, ch2, ref surrogatePair, component == UriComponents.Query);
isInIriUnicodeRange = CheckIriUnicodeRange(ch, ch2, out surrogatePair, component == UriComponents.Query);
}
else
{
escape = !CheckIriUnicodeRange(ch, component == UriComponents.Query);
isInIriUnicodeRange = CheckIriUnicodeRange(ch, component == UriComponents.Query);
}

if (escape)
if (isInIriUnicodeRange)
{
dest.Append(ch);
if (surrogatePair)
{
dest.Append(ch2);
}
}
else
{
Rune rune;
if (surrogatePair)
Expand All @@ -255,14 +232,6 @@ internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end
UriHelper.EscapeAsciiChar(b, ref dest);
}
}
else
{
dest.Append(ch);
if (surrogatePair)
{
dest.Append(ch2);
}
}

if (surrogatePair)
{
Expand Down
3 changes: 1 addition & 2 deletions src/libraries/System.Private.Uri/src/System/Uri.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4435,8 +4435,7 @@ private unsafe Check CheckCanonical(char* str, ref ushort idx, ushort end, char
{
if ((i + 1) < end)
{
bool surrPair = false;
valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], ref surrPair, true);
valid = IriHelper.CheckIriUnicodeRange(c, str[i + 1], out _, true);
}
}
else
Expand Down
6 changes: 3 additions & 3 deletions src/libraries/System.Private.Uri/src/System/UriHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,12 @@ internal static unsafe void MatchUTF8Sequence(ref ValueStringBuilder dest, Span<
if (iriParsing)
{
if (!isHighSurr)
{
inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], isQuery);
}
else
{
bool surrPair = false;
inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1],
ref surrPair, isQuery);
inIriRange = IriHelper.CheckIriUnicodeRange(unescapedChars[j], unescapedChars[j + 1], out _, isQuery);
}
}

Expand Down

0 comments on commit b00f349

Please sign in to comment.