-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
EscapingUtilities.cs
312 lines (278 loc) · 13.2 KB
/
EscapingUtilities.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.Build.Framework;
using Microsoft.NET.StringTools;
#nullable disable
namespace Microsoft.Build.Shared
{
/// <summary>
/// This class implements static methods to assist with unescaping of %XX codes
/// in the MSBuild file format.
/// </summary>
/// <remarks>
/// PERF: since we escape and unescape relatively frequently, it may be worth caching
/// the last N strings that were (un)escaped
/// </remarks>
internal static class EscapingUtilities
{
/// <summary>
/// Optional cache of escaped strings for use when needing to escape in performance-critical scenarios with significant
/// expected string reuse.
/// </summary>
private static Dictionary<string, string> s_unescapedToEscapedStrings = new Dictionary<string, string>(StringComparer.Ordinal);
private static bool TryDecodeHexDigit(char character, out int value)
{
if (character >= '0' && character <= '9')
{
value = character - '0';
return true;
}
if (character >= 'A' && character <= 'F')
{
value = character - 'A' + 10;
return true;
}
if (character >= 'a' && character <= 'f')
{
value = character - 'a' + 10;
return true;
}
value = default;
return false;
}
/// <summary>
/// Replaces all instances of %XX in the input string with the character represented
/// by the hexadecimal number XX.
/// </summary>
/// <param name="escapedString">The string to unescape.</param>
/// <param name="trim">If the string should be trimmed before being unescaped.</param>
/// <returns>unescaped string</returns>
internal static string UnescapeAll(string escapedString, bool trim = false)
{
// If the string doesn't contain anything, then by definition it doesn't
// need unescaping.
if (String.IsNullOrEmpty(escapedString))
{
return escapedString;
}
// If there are no percent signs, just return the original string immediately.
// Don't even instantiate the StringBuilder.
int indexOfPercent = escapedString.IndexOf('%');
if (indexOfPercent == -1)
{
return trim ? escapedString.Trim() : escapedString;
}
// This is where we're going to build up the final string to return to the caller.
StringBuilder unescapedString = StringBuilderCache.Acquire(escapedString.Length);
int currentPosition = 0;
int escapedStringLength = escapedString.Length;
if (trim)
{
while (currentPosition < escapedString.Length && Char.IsWhiteSpace(escapedString[currentPosition]))
{
currentPosition++;
}
if (currentPosition == escapedString.Length)
{
return String.Empty;
}
while (Char.IsWhiteSpace(escapedString[escapedStringLength - 1]))
{
escapedStringLength--;
}
}
// Loop until there are no more percent signs in the input string.
while (indexOfPercent != -1)
{
// There must be two hex characters following the percent sign
// for us to even consider doing anything with this.
if (
(indexOfPercent <= (escapedStringLength - 3)) &&
TryDecodeHexDigit(escapedString[indexOfPercent + 1], out int digit1) &&
TryDecodeHexDigit(escapedString[indexOfPercent + 2], out int digit2))
{
// First copy all the characters up to the current percent sign into
// the destination.
unescapedString.Append(escapedString, currentPosition, indexOfPercent - currentPosition);
// Convert the %XX to an actual real character.
char unescapedCharacter = (char)((digit1 << 4) + digit2);
// if the unescaped character is not on the exception list, append it
unescapedString.Append(unescapedCharacter);
// Advance the current pointer to reflect the fact that the destination string
// is up to date with everything up to and including this escape code we just found.
currentPosition = indexOfPercent + 3;
}
// Find the next percent sign.
indexOfPercent = escapedString.IndexOf('%', indexOfPercent + 1);
}
// Okay, there are no more percent signs in the input string, so just copy the remaining
// characters into the destination.
unescapedString.Append(escapedString, currentPosition, escapedStringLength - currentPosition);
return StringBuilderCache.GetStringAndRelease(unescapedString);
}
/// <summary>
/// Adds instances of %XX in the input string where the char to be escaped appears
/// XX is the hex value of the ASCII code for the char. Interns and caches the result.
/// </summary>
/// <comment>
/// NOTE: Only recommended for use in scenarios where there's expected to be significant
/// repetition of the escaped string. Cache currently grows unbounded.
/// </comment>
internal static string EscapeWithCaching(string unescapedString)
{
return EscapeWithOptionalCaching(unescapedString, cache: true);
}
/// <summary>
/// Adds instances of %XX in the input string where the char to be escaped appears
/// XX is the hex value of the ASCII code for the char.
/// </summary>
/// <param name="unescapedString">The string to escape.</param>
/// <returns>escaped string</returns>
internal static string Escape(string unescapedString)
{
return EscapeWithOptionalCaching(unescapedString, cache: false);
}
/// <summary>
/// Adds instances of %XX in the input string where the char to be escaped appears
/// XX is the hex value of the ASCII code for the char. Caches if requested.
/// </summary>
/// <param name="unescapedString">The string to escape.</param>
/// <param name="cache">
/// True if the cache should be checked, and if the resultant string
/// should be cached.
/// </param>
private static string EscapeWithOptionalCaching(string unescapedString, bool cache)
{
// If there are no special chars, just return the original string immediately.
// Don't even instantiate the StringBuilder.
if (String.IsNullOrEmpty(unescapedString) || !ContainsReservedCharacters(unescapedString))
{
return unescapedString;
}
// next, if we're caching, check to see if it's already there.
if (cache)
{
lock (s_unescapedToEscapedStrings)
{
string cachedEscapedString;
if (s_unescapedToEscapedStrings.TryGetValue(unescapedString, out cachedEscapedString))
{
return cachedEscapedString;
}
}
}
// This is where we're going to build up the final string to return to the caller.
StringBuilder escapedStringBuilder = StringBuilderCache.Acquire(unescapedString.Length * 2);
AppendEscapedString(escapedStringBuilder, unescapedString);
if (!cache)
{
return StringBuilderCache.GetStringAndRelease(escapedStringBuilder);
}
string escapedString = Strings.WeakIntern(escapedStringBuilder.ToString());
StringBuilderCache.Release(escapedStringBuilder);
lock (s_unescapedToEscapedStrings)
{
s_unescapedToEscapedStrings[unescapedString] = escapedString;
}
return escapedString;
}
/// <summary>
/// Before trying to actually escape the string, it can be useful to call this method to determine
/// if escaping is necessary at all. This can save lots of calls to copy around item metadata
/// that is really the same whether escaped or not.
/// </summary>
/// <param name="unescapedString"></param>
/// <returns></returns>
private static bool ContainsReservedCharacters(
string unescapedString)
{
return -1 != unescapedString.IndexOfAny(s_charsToEscape);
}
/// <summary>
/// Determines whether the string contains the escaped form of '*' or '?'.
/// </summary>
/// <param name="escapedString"></param>
/// <returns></returns>
internal static bool ContainsEscapedWildcards(string escapedString)
{
if (escapedString.Length < 3)
{
return false;
}
// Look for the first %. We know that it has to be followed by at least two more characters so we subtract 2
// from the length to search.
int index = escapedString.IndexOf('%', 0, escapedString.Length - 2);
while (index != -1)
{
if (escapedString[index + 1] == '2' && (escapedString[index + 2] == 'a' || escapedString[index + 2] == 'A'))
{
// %2a or %2A
return true;
}
if (escapedString[index + 1] == '3' && (escapedString[index + 2] == 'f' || escapedString[index + 2] == 'F'))
{
// %3f or %3F
return true;
}
// Continue searching for % starting at (index + 1). We know that it has to be followed by at least two
// more characters so we subtract 2 from the length of the substring to search.
index = escapedString.IndexOf('%', index + 1, escapedString.Length - (index + 1) - 2);
}
return false;
}
/// <summary>
/// Convert the given integer into its hexadecimal representation.
/// </summary>
/// <param name="x">The number to convert, which must be non-negative and less than 16</param>
/// <returns>The character which is the hexadecimal representation of <paramref name="x"/>.</returns>
private static char HexDigitChar(int x)
{
return (char)(x + (x < 10 ? '0' : ('a' - 10)));
}
/// <summary>
/// Append the escaped version of the given character to a <see cref="StringBuilder"/>.
/// </summary>
/// <param name="sb">The <see cref="StringBuilder"/> to which to append.</param>
/// <param name="ch">The character to escape.</param>
private static void AppendEscapedChar(StringBuilder sb, char ch)
{
// Append the escaped version which is a percent sign followed by two hexadecimal digits
sb.Append('%');
sb.Append(HexDigitChar(ch / 0x10));
sb.Append(HexDigitChar(ch & 0x0F));
}
/// <summary>
/// Append the escaped version of the given string to a <see cref="StringBuilder"/>.
/// </summary>
/// <param name="sb">The <see cref="StringBuilder"/> to which to append.</param>
/// <param name="unescapedString">The unescaped string.</param>
private static void AppendEscapedString(StringBuilder sb, string unescapedString)
{
// Replace each unescaped special character with an escape sequence one
for (int idx = 0; ;)
{
int nextIdx = unescapedString.IndexOfAny(s_charsToEscape, idx);
if (nextIdx == -1)
{
sb.Append(unescapedString, idx, unescapedString.Length - idx);
break;
}
sb.Append(unescapedString, idx, nextIdx - idx);
AppendEscapedChar(sb, unescapedString[nextIdx]);
idx = nextIdx + 1;
}
}
/// <summary>
/// Special characters that need escaping.
/// It's VERY important that the percent character is the FIRST on the list - since it's both a character
/// we escape and use in escape sequences, we can unintentionally escape other escape sequences if we
/// don't process it first. Of course we'll have a similar problem if we ever decide to escape hex digits
/// (that would require rewriting the algorithm) but since it seems unlikely that we ever do, this should
/// be good enough to avoid complicating the algorithm at this point.
/// </summary>
private static readonly char[] s_charsToEscape = { '%', '*', '?', '@', '$', '(', ')', ';', '\'' };
}
}