-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
(MQ cleanup) Remove some unsafe code from System.Xml #43379
Changes from 5 commits
1a31b76
cb2b6fd
d19f99d
74684f0
2cdeefa
e6de742
af2355e
6ebfdef
7dba9f6
aed0874
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,12 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System; | ||
using System.Collections; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Text; | ||
using System.Diagnostics; | ||
using System.Globalization; | ||
using System.IO; | ||
using System.Runtime.InteropServices; | ||
using System.Text; | ||
using System.Xml.Schema; | ||
|
||
namespace System.Xml | ||
|
@@ -333,15 +332,11 @@ public NestedBinXml(SymbolTables symbolTables, int docState, NestedBinXml? next) | |
private readonly bool _ignoreComments; | ||
private readonly DtdProcessing _dtdProcessing; | ||
|
||
private readonly Encoding _unicode; | ||
|
||
// current version of the protocol | ||
private byte _version; | ||
|
||
public XmlSqlBinaryReader(Stream stream, byte[] data, int len, string baseUri, bool closeInput, XmlReaderSettings settings) | ||
{ | ||
_unicode = System.Text.Encoding.Unicode; | ||
|
||
_xnt = settings.NameTable!; | ||
if (_xnt == null) | ||
{ | ||
|
@@ -2349,28 +2344,21 @@ private int ScanText(out int start) | |
private string GetString(int pos, int cch) | ||
{ | ||
Debug.Assert(pos >= 0 && cch >= 0); | ||
if (checked(pos + (cch * 2)) > _end) | ||
if (checked(pos + (cch * sizeof(char))) > _end) | ||
throw new XmlException(SR.Xml_UnexpectedEOF1, (string[]?)null); | ||
if (cch == 0) | ||
return string.Empty; | ||
// GetStringUnaligned is _significantly_ faster than unicode.GetString() | ||
// but since IA64 doesn't support unaligned reads, we can't do it if | ||
// the address is not aligned properly. Since the byte[] will be aligned, | ||
// we can detect address alignment my just looking at the offset | ||
if ((pos & 1) == 0) | ||
return GetStringAligned(_data, pos, cch); | ||
else | ||
return _unicode.GetString(_data, pos, checked(cch * 2)); | ||
} | ||
|
||
private unsafe string GetStringAligned(byte[] data, int offset, int cch) | ||
{ | ||
Debug.Assert((offset & 1) == 0); | ||
fixed (byte* pb = data) | ||
return string.Create(cch, (_data, pos), static (dstChars, state) => | ||
{ | ||
char* p = (char*)(pb + offset); | ||
return new string(p, 0, cch); | ||
} | ||
// bitblt source bytes directly into the destination char span | ||
// n.b. source buffer assumed to be well-formed UTF-16 machine endian | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A bit of an expansion on this comment: the original code had two different behaviors based on whether the underlying data was aligned or unaligned. If the underlying data was aligned, the original code would basically memmove the contents of the old buffer into the new string, not performing any UTF-16 validation. If the underlying data was unaligned, the original code would go through Since the updated code is just a simple memmove, I opted for the "don't perform any validation" behavior. |
||
|
||
int cch = dstChars.Length; | ||
ReadOnlySpan<byte> srcBytes = state._data.AsSpan(state.pos, checked(cch * sizeof(char))); | ||
Span<byte> dstBytes = MemoryMarshal.AsBytes(dstChars); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically |
||
srcBytes.CopyTo(dstBytes); | ||
}); | ||
} | ||
|
||
private string GetAttributeText(int i) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2846,37 +2846,6 @@ private void InitFromDouble(double dbl) | |
} | ||
}; | ||
|
||
/* ---------------------------------------------------------------------------- | ||
IntToString() | ||
|
||
Converts an integer to a string according to XPath rules. | ||
*/ | ||
private static unsafe string IntToString(int val) | ||
{ | ||
// The maximum number of characters needed to represent any int value is 11 | ||
const int BufSize = 12; | ||
char* pBuf = stackalloc char[BufSize]; | ||
char* pch = pBuf += BufSize; | ||
uint u = (uint)(val < 0 ? -val : val); | ||
|
||
while (u >= 10) | ||
{ | ||
// Fast division by 10 | ||
uint quot = (uint)((0x66666667L * u) >> 32) >> 2; | ||
*(--pch) = (char)((u - quot * 10) + '0'); | ||
u = quot; | ||
} | ||
|
||
*(--pch) = (char)(u + '0'); | ||
|
||
if (val < 0) | ||
{ | ||
*(--pch) = '-'; | ||
} | ||
|
||
return new string(pch, 0, (int)(pBuf - pch)); | ||
} | ||
|
||
/* ---------------------------------------------------------------------------- | ||
DoubleToString() | ||
|
||
|
@@ -2890,7 +2859,7 @@ public static string DoubleToString(double dbl) | |
|
||
if (IsInteger(dbl, out iVal)) | ||
{ | ||
return IntToString(iVal); | ||
return iVal.ToString(CultureInfo.InvariantCulture); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed custom implementation in favor of delegating to the runtime's already-optimized On .NET 5/6, dereferencing |
||
} | ||
|
||
// Handle NaN and infinity | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code is written this way to avoid bounds checks later in the method.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@GrabYourPitchforks, (maybe you already know it but) since #40180 was merged, the
uint
cast workaround is not needed in some cases where constant operands are involved. It turned out that in some cases, RyuJIT does not elide the bound check withuint
casts, where it was doing before. I posted some findings on macOS here: #11623 (comment). It is unlikely that this construct with (non const) operands is affected by that change, but perhaps would be good to double check with the latest master. I have a feeling that in some places in the framework, we can remove theuint
cast, as they are deoptomized.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@am11 This is a good observation! As these other optimizations come online I think it'll be useful to perform a libraries-wide sweep of all of these patterns. It's always great to make the code more readable while maintaining peak efficiency. :)