From 39d72d4fe711058c459cfa59d71cc71ebb12012f Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Fri, 22 Jan 2021 23:35:33 +0100 Subject: [PATCH] Remove excess allocations in Uri.ReCreateParts (#34864) * Remove excess allocations in Uri.ReCreateParts * Fix Compression offset * Revert VSB optimizations * Use noEscape.Length * Simplify TryGetUnicodeEquivalent * Remove ValueStringBuilderExtensions * Index into chars instead of dest * Remove unreachable code block * Use StackallocThreshold constant * Add comments about why MemoryMarshal is used to recreate the span --- .../src/System/IriHelper.cs | 4 +- .../System.Private.Uri/src/System/Uri.cs | 471 ++++++++---------- .../System.Private.Uri/src/System/UriExt.cs | 2 +- .../src/System/UriHelper.cs | 54 +- .../tests/UnitTests/Fakes/FakeUri.cs | 1 + 5 files changed, 236 insertions(+), 296 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/IriHelper.cs b/src/libraries/System.Private.Uri/src/System/IriHelper.cs index f357a44b9bd2b..ebc433bb1e2f5 100644 --- a/src/libraries/System.Private.Uri/src/System/IriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/IriHelper.cs @@ -89,8 +89,8 @@ internal static bool CheckIsReserved(char ch, UriComponents component) internal static unsafe string EscapeUnescapeIri(char* pInput, int start, int end, UriComponents component) { int size = end - start; - ValueStringBuilder dest = size <= 256 - ? new ValueStringBuilder(stackalloc char[256]) + var dest = size <= Uri.StackallocThreshold + ? new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]) : new ValueStringBuilder(size); Span maxUtf8EncodedSpan = stackalloc byte[4]; diff --git a/src/libraries/System.Private.Uri/src/System/Uri.cs b/src/libraries/System.Private.Uri/src/System/Uri.cs index 8e4d1d8e845ab..cfa15a2d18e9f 100644 --- a/src/libraries/System.Private.Uri/src/System/Uri.cs +++ b/src/libraries/System.Private.Uri/src/System/Uri.cs @@ -34,6 +34,7 @@ public partial class Uri : ISerializable public static readonly string UriSchemeNetPipe = UriParser.NetPipeUri.SchemeName; public static readonly string SchemeDelimiter = "://"; + internal const int StackallocThreshold = 512; internal const int c_MaxUriBufferSize = 0xFFF0; private const int c_MaxUriSchemeName = 1024; @@ -1184,7 +1185,7 @@ public string IdnHost else if (hostType == Flags.BasicHostType && InFact(Flags.HostNotCanonical | Flags.E_HostNotCanonical)) { // Unescape everything - ValueStringBuilder dest = new ValueStringBuilder(stackalloc char[256]); + var dest = new ValueStringBuilder(stackalloc char[StackallocThreshold]); UriHelper.UnescapeString(host, 0, host.Length, ref dest, c_DummyChar, c_DummyChar, c_DummyChar, @@ -2631,28 +2632,24 @@ private string GetUnescapedParts(UriComponents uriParts, UriFormat formatAs) private string ReCreateParts(UriComponents parts, ushort nonCanonical, UriFormat formatAs) { EnsureHostString(false); - string stemp = (parts & UriComponents.Host) == 0 ? string.Empty : _info.Host!; - // we reserve more space than required because a canonical Ipv6 Host - // may take more characters than in original _string - // Also +3 is for :// and +1 is for absent first slash - // Also we may escape every character, hence multiplying by 12 - // UTF-8 can use up to 4 bytes per char * 3 chars per byte (%A4) = 12 encoded chars - int count = (_info.Offset.End - _info.Offset.User) * (formatAs == UriFormat.UriEscaped ? 12 : 1); - char[] chars = new char[stemp.Length + count + _syntax.SchemeName.Length + 3 + 1]; - count = 0; + + string str = _string; + + var dest = str.Length <= StackallocThreshold + ? new ValueStringBuilder(stackalloc char[StackallocThreshold]) + : new ValueStringBuilder(str.Length); //Scheme and slashes if ((parts & UriComponents.Scheme) != 0) { - _syntax.SchemeName.CopyTo(0, chars, count, _syntax.SchemeName.Length); - count += _syntax.SchemeName.Length; + dest.Append(_syntax.SchemeName); if (parts != UriComponents.Scheme) { - chars[count++] = ':'; + dest.Append(':'); if (InFact(Flags.AuthorityFound)) { - chars[count++] = '/'; - chars[count++] = '/'; + dest.Append('/'); + dest.Append('/'); } } } @@ -2660,6 +2657,8 @@ private string ReCreateParts(UriComponents parts, ushort nonCanonical, UriFormat //UserInfo if ((parts & UriComponents.UserInfo) != 0 && InFact(Flags.HasUserInfo)) { + ReadOnlySpan slice = str.AsSpan(_info.Offset.User, _info.Offset.Host - _info.Offset.User); + if ((nonCanonical & (ushort)UriComponents.UserInfo) != 0) { switch (formatAs) @@ -2667,10 +2666,7 @@ private string ReCreateParts(UriComponents parts, ushort nonCanonical, UriFormat case UriFormat.UriEscaped: if (NotAny(Flags.UserEscaped)) { - chars = UriHelper.EscapeString( - _string.AsSpan(_info.Offset.User, _info.Offset.Host - _info.Offset.User), - chars, ref count, - checkExistingEscaped: true, '?', '#'); + UriHelper.EscapeString(slice, ref dest, checkExistingEscaped: true, '?', '#');; } else { @@ -2678,264 +2674,209 @@ private string ReCreateParts(UriComponents parts, ushort nonCanonical, UriFormat { // We should throw here but currently just accept user input known as invalid } - _string.CopyTo(_info.Offset.User, chars, count, _info.Offset.Host - _info.Offset.User); - count += (_info.Offset.Host - _info.Offset.User); + dest.Append(slice); } break; case UriFormat.SafeUnescaped: - chars = UriHelper.UnescapeString(_string, _info.Offset.User, _info.Offset.Host - 1, - chars, ref count, '@', '/', '\\', InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : - UnescapeMode.EscapeUnescape, _syntax, false); - chars[count++] = '@'; + UriHelper.UnescapeString(slice[..^1], + ref dest, '@', '/', '\\', + InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape, + _syntax, isQuery: false); + dest.Append('@'); break; case UriFormat.Unescaped: - chars = UriHelper.UnescapeString(_string, _info.Offset.User, _info.Offset.Host, chars, - ref count, c_DummyChar, c_DummyChar, c_DummyChar, - UnescapeMode.Unescape | UnescapeMode.UnescapeAll, _syntax, false); + UriHelper.UnescapeString(slice, + ref dest, c_DummyChar, c_DummyChar, c_DummyChar, + UnescapeMode.Unescape | UnescapeMode.UnescapeAll, + _syntax, isQuery: false); break; default: //V1ToStringUnescape - chars = UriHelper.UnescapeString(_string, _info.Offset.User, _info.Offset.Host, chars, - ref count, c_DummyChar, c_DummyChar, c_DummyChar, UnescapeMode.CopyOnly, _syntax, - false); + dest.Append(slice); break; } } else { - UriHelper.UnescapeString(_string, _info.Offset.User, _info.Offset.Host, chars, ref count, - c_DummyChar, c_DummyChar, c_DummyChar, UnescapeMode.CopyOnly, _syntax, false); + dest.Append(slice); } + if (parts == UriComponents.UserInfo) { //strip '@' delimiter - --count; + dest.Length--; } } // Host - if ((parts & UriComponents.Host) != 0 && stemp.Length != 0) + if ((parts & UriComponents.Host) != 0) { - UnescapeMode mode; - if (formatAs != UriFormat.UriEscaped && HostType == Flags.BasicHostType - && (nonCanonical & (ushort)UriComponents.Host) != 0) - { - // only Basic host could be in the escaped form - mode = formatAs == UriFormat.Unescaped - ? (UnescapeMode.Unescape | UnescapeMode.UnescapeAll) : - (InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape); - } - else - { - mode = UnescapeMode.CopyOnly; - } + string host = _info.Host!; - // NormalizedHost - if ((parts & UriComponents.NormalizedHost) != 0) + if (host.Length != 0) { - stemp = UriHelper.StripBidiControlCharacters(stemp, stemp); - - var hostBuilder = new ValueStringBuilder(stackalloc char[512]); - - // The host may be invalid punycode (www.xn--?-pck.com), - // but we shouldn't throw after the constructor. - if (DomainNameHelper.TryGetUnicodeEquivalent(stemp, ref hostBuilder)) + UnescapeMode mode; + if (formatAs != UriFormat.UriEscaped && HostType == Flags.BasicHostType + && (nonCanonical & (ushort)UriComponents.Host) != 0) { - stemp = hostBuilder.ToString(); + // only Basic host could be in the escaped form + mode = formatAs == UriFormat.Unescaped + ? (UnescapeMode.Unescape | UnescapeMode.UnescapeAll) : + (InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape); } else { - hostBuilder.Dispose(); + mode = UnescapeMode.CopyOnly; } - } - chars = UriHelper.UnescapeString(stemp, 0, stemp.Length, chars, ref count, '/', '?', '#', mode, - _syntax, false); + var hostBuilder = new ValueStringBuilder(stackalloc char[StackallocThreshold]); - // A fix up only for SerializationInfo and IpV6 host with a scopeID - if ((parts & UriComponents.SerializationInfoString) != 0 && HostType == Flags.IPv6HostType && - _info.ScopeId is not null) - { - _info.ScopeId.CopyTo(0, chars, count - 1, _info.ScopeId.Length); - count += _info.ScopeId.Length; - chars[count - 1] = ']'; - } - } - - //Port (always wants a ':' delimiter if got to this method) - if ((parts & UriComponents.Port) != 0) - { - if ((nonCanonical & (ushort)UriComponents.Port) == 0) - { - //take it from _string - if (InFact(Flags.NotDefaultPort)) + // NormalizedHost + if ((parts & UriComponents.NormalizedHost) != 0) { - int start = _info.Offset.Path; - while (_string[--start] != ':') + host = UriHelper.StripBidiControlCharacters(host, host); + + // Upconvert any punycode to unicode, xn--pck -> ? + if (!DomainNameHelper.TryGetUnicodeEquivalent(host, ref hostBuilder)) { - ; + hostBuilder.Length = 0; } - _string.CopyTo(start, chars, count, _info.Offset.Path - start); - count += (_info.Offset.Path - start); } - else if ((parts & UriComponents.StrongPort) != 0 && _syntax.DefaultPort != UriParser.NoDefaultPort) + + UriHelper.UnescapeString(hostBuilder.Length == 0 ? host : hostBuilder.AsSpan(), + ref dest, '/', '?', '#', + mode, + _syntax, isQuery: false); + + hostBuilder.Dispose(); + + // A fix up only for SerializationInfo and IpV6 host with a scopeID + if ((parts & UriComponents.SerializationInfoString) != 0 && HostType == Flags.IPv6HostType && _info.ScopeId != null) { - chars[count++] = ':'; - stemp = _info.Offset.PortValue.ToString(CultureInfo.InvariantCulture); - stemp.CopyTo(0, chars, count, stemp.Length); - count += stemp.Length; + dest.Length--; + dest.Append(_info.ScopeId); + dest.Append(']'); } } - else if (InFact(Flags.NotDefaultPort) || ((parts & UriComponents.StrongPort) != 0 && - _syntax.DefaultPort != UriParser.NoDefaultPort)) - { - // recreate string from port value - chars[count++] = ':'; - stemp = _info.Offset.PortValue.ToString(CultureInfo.InvariantCulture); - stemp.CopyTo(0, chars, count, stemp.Length); - count += stemp.Length; - } } - int delimiterAwareIndex; + //Port (always wants a ':' delimiter if got to this method) + if ((parts & UriComponents.Port) != 0 && + (InFact(Flags.NotDefaultPort) || ((parts & UriComponents.StrongPort) != 0 && _syntax.DefaultPort != UriParser.NoDefaultPort))) + { + dest.Append(':'); + + const int MaxUshortLength = 5; + bool success = _info.Offset.PortValue.TryFormat(dest.AppendSpan(MaxUshortLength), out int charsWritten); + Debug.Assert(success); + dest.Length -= MaxUshortLength - charsWritten; + } //Path if ((parts & UriComponents.Path) != 0) { - chars = GetCanonicalPath(chars, ref count, formatAs); + GetCanonicalPath(ref dest, formatAs); // (possibly strip the leading '/' delimiter) if (parts == UriComponents.Path) { - if (InFact(Flags.AuthorityFound) && count != 0 && chars[0] == '/') + int offset; + if (InFact(Flags.AuthorityFound) && dest.Length != 0 && dest[0] == '/') { - delimiterAwareIndex = 1; --count; + offset = 1; } else { - delimiterAwareIndex = 0; + offset = 0; } - return count == 0 ? string.Empty : new string(chars, delimiterAwareIndex, count); + + string result = dest.AsSpan(offset).ToString(); + dest.Dispose(); + return result; } } //Query (possibly strip the '?' delimiter) if ((parts & UriComponents.Query) != 0 && _info.Offset.Query < _info.Offset.Fragment) { - delimiterAwareIndex = (_info.Offset.Query + 1); + int offset = (_info.Offset.Query + 1); if (parts != UriComponents.Query) - chars[count++] = '?'; //see Fragment+1 below + dest.Append('?'); + + UnescapeMode mode = UnescapeMode.CopyOnly; if ((nonCanonical & (ushort)UriComponents.Query) != 0) { - switch (formatAs) + if (formatAs == UriFormat.UriEscaped) { - case UriFormat.UriEscaped: - //Can Assert IsImplicitfile == false - if (NotAny(Flags.UserEscaped)) - { - chars = UriHelper.EscapeString( - _string.AsSpan(delimiterAwareIndex, _info.Offset.Fragment - delimiterAwareIndex), - chars, ref count, - checkExistingEscaped: true, '#'); - } - else - { - UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.Fragment, chars, - ref count, c_DummyChar, c_DummyChar, c_DummyChar, UnescapeMode.CopyOnly, _syntax, - true); - } - break; - - case V1ToStringUnescape: - - chars = UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.Fragment, chars, - ref count, '#', c_DummyChar, c_DummyChar, (InFact(Flags.UserEscaped) ? - UnescapeMode.Unescape : UnescapeMode.EscapeUnescape) | UnescapeMode.V1ToStringFlag, - _syntax, true); - break; - - case UriFormat.Unescaped: - - chars = UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.Fragment, chars, - ref count, '#', c_DummyChar, c_DummyChar, - (UnescapeMode.Unescape | UnescapeMode.UnescapeAll), _syntax, true); - break; - - default: // UriFormat.SafeUnescaped + if (NotAny(Flags.UserEscaped)) + { + UriHelper.EscapeString( + str.AsSpan(offset, _info.Offset.Fragment - offset), + ref dest, checkExistingEscaped: true, '#'); - chars = UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.Fragment, chars, - ref count, '#', c_DummyChar, c_DummyChar, (InFact(Flags.UserEscaped) ? - UnescapeMode.Unescape : UnescapeMode.EscapeUnescape), _syntax, true); - break; + goto AfterQuery; + } + } + else + { + mode = formatAs switch + { + V1ToStringUnescape => (InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape) | UnescapeMode.V1ToStringFlag, + UriFormat.Unescaped => UnescapeMode.Unescape | UnescapeMode.UnescapeAll, + _ => InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape + }; } } - else - { - UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.Fragment, chars, ref count, - c_DummyChar, c_DummyChar, c_DummyChar, UnescapeMode.CopyOnly, _syntax, true); - } + + UriHelper.UnescapeString(str, offset, _info.Offset.Fragment, + ref dest, '#', c_DummyChar, c_DummyChar, + mode, _syntax, isQuery: true); } + AfterQuery: //Fragment (possibly strip the '#' delimiter) if ((parts & UriComponents.Fragment) != 0 && _info.Offset.Fragment < _info.Offset.End) { - delimiterAwareIndex = _info.Offset.Fragment + 1; + int offset = _info.Offset.Fragment + 1; if (parts != UriComponents.Fragment) - chars[count++] = '#'; //see Fragment+1 below + dest.Append('#'); + + UnescapeMode mode = UnescapeMode.CopyOnly; if ((nonCanonical & (ushort)UriComponents.Fragment) != 0) { - switch (formatAs) + if (formatAs == UriFormat.UriEscaped) { - case UriFormat.UriEscaped: - if (NotAny(Flags.UserEscaped)) - { - chars = UriHelper.EscapeString( - _string.AsSpan(delimiterAwareIndex, _info.Offset.End - delimiterAwareIndex), - chars, ref count, - checkExistingEscaped: true); - } - else - { - UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.End, chars, - ref count, c_DummyChar, c_DummyChar, c_DummyChar, UnescapeMode.CopyOnly, _syntax, - false); - } - break; - - case V1ToStringUnescape: - - chars = UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.End, chars, - ref count, '#', c_DummyChar, c_DummyChar, (InFact(Flags.UserEscaped) ? - UnescapeMode.Unescape : UnescapeMode.EscapeUnescape) | UnescapeMode.V1ToStringFlag, - _syntax, false); - break; - case UriFormat.Unescaped: - - chars = UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.End, chars, - ref count, '#', c_DummyChar, c_DummyChar, - UnescapeMode.Unescape | UnescapeMode.UnescapeAll, _syntax, false); - break; - - default: // UriFormat.SafeUnescaped + if (NotAny(Flags.UserEscaped)) + { + UriHelper.EscapeString( + str.AsSpan(offset, _info.Offset.End - offset), + ref dest, checkExistingEscaped: true); - chars = UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.End, chars, - ref count, '#', c_DummyChar, c_DummyChar, (InFact(Flags.UserEscaped) ? - UnescapeMode.Unescape : UnescapeMode.EscapeUnescape), _syntax, false); - break; + goto AfterFragment; + } + } + else + { + mode = formatAs switch + { + V1ToStringUnescape => (InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape) | UnescapeMode.V1ToStringFlag, + UriFormat.Unescaped => UnescapeMode.Unescape | UnescapeMode.UnescapeAll, + _ => InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape + }; } } - else - { - UriHelper.UnescapeString(_string, delimiterAwareIndex, _info.Offset.End, chars, ref count, - c_DummyChar, c_DummyChar, c_DummyChar, UnescapeMode.CopyOnly, _syntax, false); - } + + UriHelper.UnescapeString(str, offset, _info.Offset.End, + ref dest, '#', c_DummyChar, c_DummyChar, + mode, _syntax, isQuery: false); } + AfterFragment: - return new string(chars, 0, count); + return dest.ToString(); } // @@ -4438,15 +4379,15 @@ private unsafe Check CheckCanonical(char* str, ref int idx, int end, char delim) // the passed array must be long enough to hold at least // canonical unescaped path representation (allocated by the caller) // - private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat formatAs) + private unsafe void GetCanonicalPath(ref ValueStringBuilder dest, UriFormat formatAs) { if (InFact(Flags.FirstSlashAbsent)) - dest[pos++] = '/'; + dest.Append('/'); if (_info.Offset.Path == _info.Offset.Query) - return dest; + return; - int end = pos; + int start = dest.Length; int dosPathIdx = SecuredPathIndex; @@ -4457,8 +4398,7 @@ private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat forma { if (InFact(Flags.ShouldBeCompressed)) { - _string.CopyTo(_info.Offset.Path, dest, end, _info.Offset.Query - _info.Offset.Path); - end += (_info.Offset.Query - _info.Offset.Path); + dest.Append(_string.AsSpan(_info.Offset.Path, _info.Offset.Query - _info.Offset.Path)); // If the path was found as needed compression and contains escaped characters, unescape only // interesting characters (safe) @@ -4468,8 +4408,10 @@ private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat forma { fixed (char* pdest = dest) { - UnescapeOnly(pdest, pos, ref end, '.', '/', + int end = dest.Length; + UnescapeOnly(pdest, start, ref end, '.', '/', _syntax.InFact(UriSyntaxFlags.ConvertPathSlashes) ? '\\' : c_DummyChar); + dest.Length = end; } } } @@ -4487,29 +4429,37 @@ private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat forma str = str.Insert(dosPathIdx + _info.Offset.Path - 1, ":"); } - dest = UriHelper.EscapeString( + UriHelper.EscapeString( str.AsSpan(_info.Offset.Path, _info.Offset.Query - _info.Offset.Path), - dest, ref end, - checkExistingEscaped: !IsImplicitFile, '?', '#'); + ref dest, checkExistingEscaped: !IsImplicitFile, '?', '#'); } else { - _string.CopyTo(_info.Offset.Path, dest, end, _info.Offset.Query - _info.Offset.Path); - end += (_info.Offset.Query - _info.Offset.Path); + dest.Append(_string.AsSpan(_info.Offset.Path, _info.Offset.Query - _info.Offset.Path)); } } // On Unix, escape '\\' in path of file uris to '%5C' canonical form. if (!IsWindowsSystem && InFact(Flags.BackslashInPath) && _syntax.NotAny(UriSyntaxFlags.ConvertPathSlashes) && _syntax.InFact(UriSyntaxFlags.FileLikeUri) && !IsImplicitFile) { - dest = UriHelper.EscapeString(new string(dest, pos, end - pos), dest, ref pos, checkExistingEscaped: true, '\\'); - end = pos; + // We can't do an in-place escape, create a copy + var copy = new ValueStringBuilder(stackalloc char[StackallocThreshold]); + copy.Append(dest.AsSpan(start, dest.Length - start)); + + dest.Length = start; + + // CS8350 & CS8352: We can't pass `copy` and `dest` as arguments together as that could leak the scope of the above stackalloc + // As a workaround, re-create the Span in a way that avoids analysis + ReadOnlySpan copySpan = MemoryMarshal.CreateReadOnlySpan(ref copy.GetPinnableReference(), copy.Length); + UriHelper.EscapeString(copySpan, ref dest, checkExistingEscaped: true, '\\'); + start = dest.Length; + + copy.Dispose(); } } else { - _string.CopyTo(_info.Offset.Path, dest, end, _info.Offset.Query - _info.Offset.Path); - end += (_info.Offset.Query - _info.Offset.Path); + dest.Append(_string.AsSpan(_info.Offset.Path, _info.Offset.Query - _info.Offset.Path)); if (InFact(Flags.ShouldBeCompressed)) { @@ -4521,8 +4471,10 @@ private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat forma { fixed (char* pdest = dest) { - UnescapeOnly(pdest, pos, ref end, '.', '/', + int end = dest.Length; + UnescapeOnly(pdest, start, ref end, '.', '/', _syntax.InFact(UriSyntaxFlags.ConvertPathSlashes) ? '\\' : c_DummyChar); + dest.Length = end; } } } @@ -4536,77 +4488,82 @@ private unsafe char[] GetCanonicalPath(char[] dest, ref int pos, UriFormat forma // // (path is already >= 3 chars if recognized as a DOS-like) // - if (dosPathIdx != 0 && dest[dosPathIdx + pos - 1] == '|') - dest[dosPathIdx + pos - 1] = ':'; + int offset = start + dosPathIdx; + if (dosPathIdx != 0 && dest[offset - 1] == '|') + dest[offset - 1] = ':'; - if (InFact(Flags.ShouldBeCompressed)) + if (InFact(Flags.ShouldBeCompressed) && dest.Length - offset > 0) { // It will also convert back slashes if needed - Compress(dest, pos + dosPathIdx, ref end, _syntax); - if (dest[pos] == '\\') - dest[pos] = '/'; + dest.Length = offset + Compress(dest.RawChars.Slice(offset, dest.Length - offset), _syntax); + if (dest[start] == '\\') + dest[start] = '/'; // Escape path if requested and found as not fully escaped if (formatAs == UriFormat.UriEscaped && NotAny(Flags.UserEscaped) && InFact(Flags.E_PathNotCanonical)) { //Note: Flags.UserEscaped check is solely based on trusting the user - dest = UriHelper.EscapeString(new string(dest, pos, end - pos), dest, ref pos, checkExistingEscaped: !IsImplicitFile, '?', '#'); - end = pos; + + // We can't do an in-place escape, create a copy + var copy = new ValueStringBuilder(stackalloc char[StackallocThreshold]); + copy.Append(dest.AsSpan(start, dest.Length - start)); + + dest.Length = start; + + // CS8350 & CS8352: We can't pass `copy` and `dest` as arguments together as that could leak the scope of the above stackalloc + // As a workaround, re-create the Span in a way that avoids analysis + ReadOnlySpan copySpan = MemoryMarshal.CreateReadOnlySpan(ref copy.GetPinnableReference(), copy.Length); + UriHelper.EscapeString(copySpan, ref dest, checkExistingEscaped: !IsImplicitFile, '?', '#'); + start = dest.Length; + + copy.Dispose(); } } - else if (_syntax.InFact(UriSyntaxFlags.ConvertPathSlashes) && InFact(Flags.BackslashInPath)) - { - for (int i = pos; i < end; ++i) - if (dest[i] == '\\') dest[i] = '/'; - } if (formatAs != UriFormat.UriEscaped && InFact(Flags.PathNotCanonical)) { UnescapeMode mode; - if (InFact(Flags.PathNotCanonical)) + switch (formatAs) { - switch (formatAs) - { - case V1ToStringUnescape: + case V1ToStringUnescape: - mode = (InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape) - | UnescapeMode.V1ToStringFlag; - if (IsImplicitFile) - mode &= ~UnescapeMode.Unescape; - break; + mode = (InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape) + | UnescapeMode.V1ToStringFlag; + if (IsImplicitFile) + mode &= ~UnescapeMode.Unescape; + break; - case UriFormat.Unescaped: - mode = IsImplicitFile ? UnescapeMode.CopyOnly - : UnescapeMode.Unescape | UnescapeMode.UnescapeAll; - break; + case UriFormat.Unescaped: + mode = IsImplicitFile ? UnescapeMode.CopyOnly + : UnescapeMode.Unescape | UnescapeMode.UnescapeAll; + break; - default: // UriFormat.SafeUnescaped + default: // UriFormat.SafeUnescaped - mode = InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape; - if (IsImplicitFile) - mode &= ~UnescapeMode.Unescape; - break; - } - } - else - { - mode = UnescapeMode.CopyOnly; + mode = InFact(Flags.UserEscaped) ? UnescapeMode.Unescape : UnescapeMode.EscapeUnescape; + if (IsImplicitFile) + mode &= ~UnescapeMode.Unescape; + break; } - char[] dest1 = new char[dest.Length]; - Buffer.BlockCopy(dest, 0, dest1, 0, end * sizeof(char)); - fixed (char* pdest = dest1) + if (mode != UnescapeMode.CopyOnly) { - dest = UriHelper.UnescapeString(pdest, pos, end, dest, ref pos, '?', '#', c_DummyChar, mode, - _syntax, false); + // We can't do an in-place unescape, create a copy + var copy = new ValueStringBuilder(stackalloc char[StackallocThreshold]); + copy.Append(dest.AsSpan(start, dest.Length - start)); + + dest.Length = start; + fixed (char* pCopy = copy) + { + UriHelper.UnescapeString(pCopy, 0, copy.Length, + ref dest, '?', '#', c_DummyChar, + mode, + _syntax, isQuery: false); + } + + copy.Dispose(); } } - else - { - pos = end; - } - - return dest; } // works only with ASCII characters, used to partially unescape path before compressing diff --git a/src/libraries/System.Private.Uri/src/System/UriExt.cs b/src/libraries/System.Private.Uri/src/System/UriExt.cs index 66552d086da08..ca43d833a79de 100644 --- a/src/libraries/System.Private.Uri/src/System/UriExt.cs +++ b/src/libraries/System.Private.Uri/src/System/UriExt.cs @@ -534,7 +534,7 @@ public static string UnescapeDataString(string stringToUnescape) if (position == -1) return stringToUnescape; - var vsb = new ValueStringBuilder(stackalloc char[256]); + var vsb = new ValueStringBuilder(stackalloc char[StackallocThreshold]); vsb.EnsureCapacity(stringToUnescape.Length); vsb.Append(stringToUnescape.AsSpan(0, position)); diff --git a/src/libraries/System.Private.Uri/src/System/UriHelper.cs b/src/libraries/System.Private.Uri/src/System/UriHelper.cs index 27988b1bda87f..660d15d49ade4 100644 --- a/src/libraries/System.Private.Uri/src/System/UriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/UriHelper.cs @@ -3,7 +3,6 @@ using System.Text; using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; using System.Runtime.InteropServices; using System.Buffers; @@ -153,20 +152,13 @@ internal static string EscapeString( // Otherwise, create a ValueStringBuilder to store the escaped data into, // append to it all of the noEscape chars we already iterated through, // escape the rest, and return the result as a string. - var vsb = new ValueStringBuilder(stackalloc char[256]); + var vsb = new ValueStringBuilder(stackalloc char[Uri.StackallocThreshold]); vsb.Append(stringToEscape.AsSpan(0, i)); EscapeStringToBuilder(stringToEscape.AsSpan(i), ref vsb, noEscape, checkExistingEscaped); return vsb.ToString(); } - // forceX characters are always escaped if found - // destPos - starting offset in dest for output, on return this will be an exclusive "end" in the output. - // In case "dest" has lack of space it will be reallocated by preserving the _whole_ content up to current destPos - // Returns null if nothing has to be escaped AND passed dest was null, otherwise the resulting array with the updated destPos - [return: NotNullIfNotNull("dest")] - internal static char[]? EscapeString( - ReadOnlySpan stringToEscape, - char[]? dest, ref int destPos, + internal static unsafe void EscapeString(ReadOnlySpan stringToEscape, ref ValueStringBuilder dest, bool checkExistingEscaped, char forceEscape1 = '\0', char forceEscape2 = '\0') { // Get the table of characters that do not need to be escaped. @@ -193,35 +185,17 @@ internal static string EscapeString( for (; i < stringToEscape.Length && (c = stringToEscape[i]) <= 0x7F && noEscape[c]; i++) ; if (i == stringToEscape.Length) { - if (dest != null) - { - EnsureCapacity(dest, destPos, stringToEscape.Length); - stringToEscape.CopyTo(dest.AsSpan(destPos)); - destPos += stringToEscape.Length; - } - - return dest; + dest.Append(stringToEscape); } + else + { + dest.Append(stringToEscape.Slice(0, i)); - // Otherwise, create a ValueStringBuilder to store the escaped data into, - // append to it all of the noEscape chars we already iterated through, and - // escape the rest into the ValueStringBuilder. - var vsb = new ValueStringBuilder(stackalloc char[256]); - vsb.Append(stringToEscape.Slice(0, i)); - EscapeStringToBuilder(stringToEscape.Slice(i), ref vsb, noEscape, checkExistingEscaped); - - // Finally update dest with the result. - EnsureCapacity(dest, destPos, vsb.Length); - vsb.TryCopyTo(dest.AsSpan(destPos), out int charsWritten); - destPos += charsWritten; - return dest; + // CS8350 & CS8352: We can't pass `noEscape` and `dest` as arguments together as that could leak the scope of the above stackalloc + // As a workaround, re-create the Span in a way that avoids analysis + ReadOnlySpan noEscapeCopy = MemoryMarshal.CreateReadOnlySpan(ref MemoryMarshal.GetReference(noEscape), noEscape.Length); - static void EnsureCapacity(char[]? dest, int destSize, int requiredSize) - { - if (dest == null || dest.Length - destSize < requiredSize) - { - Array.Resize(ref dest, destSize + requiredSize + 120); // 120 == arbitrary minimum-empty space copied from previous implementation - } + EscapeStringToBuilder(stringToEscape.Slice(i), ref dest, noEscapeCopy, checkExistingEscaped); } } @@ -342,6 +316,14 @@ internal static unsafe void UnescapeString(string input, int start, int end, ref UnescapeString(pStr, start, end, ref dest, rsvd1, rsvd2, rsvd3, unescapeMode, syntax, isQuery); } } + internal static unsafe void UnescapeString(ReadOnlySpan input, ref ValueStringBuilder dest, + char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser? syntax, bool isQuery) + { + fixed (char* pStr = &MemoryMarshal.GetReference(input)) + { + UnescapeString(pStr, 0, input.Length, ref dest, rsvd1, rsvd2, rsvd3, unescapeMode, syntax, isQuery); + } + } internal static unsafe void UnescapeString(char* pStr, int start, int end, ref ValueStringBuilder dest, char rsvd1, char rsvd2, char rsvd3, UnescapeMode unescapeMode, UriParser? syntax, bool isQuery) { diff --git a/src/libraries/System.Private.Uri/tests/UnitTests/Fakes/FakeUri.cs b/src/libraries/System.Private.Uri/tests/UnitTests/Fakes/FakeUri.cs index ee0a6fe04ac73..e23e65223c4db 100644 --- a/src/libraries/System.Private.Uri/tests/UnitTests/Fakes/FakeUri.cs +++ b/src/libraries/System.Private.Uri/tests/UnitTests/Fakes/FakeUri.cs @@ -7,6 +7,7 @@ public class Uri { internal const char c_DummyChar = (char)0xFFFF; //An Invalid Unicode character used as a dummy char passed into the parameter internal const int c_MaxUriBufferSize = 0xFFF0; + internal const int StackallocThreshold = 512; internal static bool IriParsingStatic(UriParser? syntax) {