Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(MQ cleanup) Remove some unsafe code from System.Xml #43379

Merged
merged 10 commits into from
Nov 3, 2020
77 changes: 29 additions & 48 deletions src/libraries/System.Private.Xml/src/System/Xml/BinHexDecoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ internal override bool IsFull
}
}

internal override unsafe int Decode(char[] chars, int startPos, int len)
internal override int Decode(char[] chars, int startPos, int len)
{
if (chars == null)
{
Expand All @@ -61,20 +61,15 @@ internal override unsafe int Decode(char[] chars, int startPos, int len)
return 0;
}

int bytesDecoded, charsDecoded;
fixed (char* pChars = &chars[startPos])
{
fixed (byte* pBytes = &_buffer![_curIndex])
{
Decode(pChars, pChars + len, pBytes, pBytes + (_endIndex - _curIndex),
ref _hasHalfByteCached, ref _cachedHalfByte, out charsDecoded, out bytesDecoded);
}
}
Decode(chars.AsSpan(startPos, len), _buffer.AsSpan(_curIndex, _endIndex - _curIndex),
ref _hasHalfByteCached, ref _cachedHalfByte,
out int charsDecoded, out int bytesDecoded);

_curIndex += bytesDecoded;
return charsDecoded;
}

internal override unsafe int Decode(string str, int startPos, int len)
internal override int Decode(string str, int startPos, int len)
{
if (str == null)
{
Expand All @@ -98,15 +93,9 @@ internal override unsafe int Decode(string str, int startPos, int len)
return 0;
}

int bytesDecoded, charsDecoded;
fixed (char* pChars = str)
{
fixed (byte* pBytes = &_buffer![_curIndex])
{
Decode(pChars + startPos, pChars + startPos + len, pBytes, pBytes + (_endIndex - _curIndex),
ref _hasHalfByteCached, ref _cachedHalfByte, out charsDecoded, out bytesDecoded);
}
}
Decode(str.AsSpan(startPos, len), _buffer.AsSpan(_curIndex, _endIndex - _curIndex),
ref _hasHalfByteCached, ref _cachedHalfByte,
out int charsDecoded, out int bytesDecoded);

_curIndex += bytesDecoded;
return charsDecoded;
Expand Down Expand Up @@ -135,7 +124,7 @@ internal override void SetNextOutputBuffer(Array buffer, int index, int count)
//
// Static methods
//
public static unsafe byte[] Decode(char[] chars, bool allowOddChars)
public static byte[] Decode(char[] chars, bool allowOddChars)
{
if (chars == null)
{
Expand All @@ -149,17 +138,10 @@ public static unsafe byte[] Decode(char[] chars, bool allowOddChars)
}

byte[] bytes = new byte[(len + 1) / 2];
int bytesDecoded, charsDecoded;
bool hasHalfByteCached = false;
byte cachedHalfByte = 0;

fixed (char* pChars = &chars[0])
{
fixed (byte* pBytes = &bytes[0])
{
Decode(pChars, pChars + len, pBytes, pBytes + bytes.Length, ref hasHalfByteCached, ref cachedHalfByte, out charsDecoded, out bytesDecoded);
}
}
Decode(chars, bytes, ref hasHalfByteCached, ref cachedHalfByte, out int charsDecoded, out int bytesDecoded);

if (hasHalfByteCached && !allowOddChars)
{
Expand All @@ -168,9 +150,7 @@ public static unsafe byte[] Decode(char[] chars, bool allowOddChars)

if (bytesDecoded < bytes.Length)
{
byte[] tmp = new byte[bytesDecoded];
Buffer.BlockCopy(bytes, 0, tmp, 0, bytesDecoded);
bytes = tmp;
Array.Resize(ref bytes, bytesDecoded);
}

return bytes;
Expand All @@ -180,22 +160,23 @@ public static unsafe byte[] Decode(char[] chars, bool allowOddChars)
// Private methods
//

private static unsafe void Decode(char* pChars, char* pCharsEndPos,
byte* pBytes, byte* pBytesEndPos,
ref bool hasHalfByteCached, ref byte cachedHalfByte,
out int charsDecoded, out int bytesDecoded)
private static void Decode(ReadOnlySpan<char> chars,
Span<byte> bytes,
ref bool hasHalfByteCached, ref byte cachedHalfByte,
out int charsDecoded, out int bytesDecoded)
{
#if DEBUG
Debug.Assert(pCharsEndPos - pChars >= 0);
Debug.Assert(pBytesEndPos - pBytes >= 0);
#endif
int iByte = 0;
int iChar = 0;

char* pChar = pChars;
byte* pByte = pBytes;
while (pChar < pCharsEndPos && pByte < pBytesEndPos)
for (; iChar < chars.Length; iChar++)
{
if ((uint)iByte >= (uint)bytes.Length)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Code is written this way to avoid bounds checks later in the method.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@GrabYourPitchforks, (maybe you already know it but) since #40180 was merged, the uint cast workaround is not needed in some cases where constant operands are involved. It turned out that in some cases, RyuJIT does not elide the bound check with uint casts, where it was doing before. I posted some findings on macOS here: #11623 (comment). It is unlikely that this construct with (non const) operands is affected by that change, but perhaps would be good to double check with the latest master. I have a feeling that in some places in the framework, we can remove the uint cast, as they are deoptomized.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@am11 This is a good observation! As these other optimizations come online I think it'll be useful to perform a libraries-wide sweep of all of these patterns. It's always great to make the code more readable while maintaining peak efficiency. :)

{
break; // ran out of space in the destination buffer
}

byte halfByte;
char ch = *pChar++;
char ch = chars[iChar];

int val = HexConverter.FromChar(ch);
if (val != 0xFF)
Expand All @@ -208,12 +189,12 @@ private static unsafe void Decode(char* pChars, char* pCharsEndPos,
}
else
{
throw new XmlException(SR.Xml_InvalidBinHexValue, new string(pChars, 0, (int)(pCharsEndPos - pChars)));
throw new XmlException(SR.Xml_InvalidBinHexValue, chars.ToString());
}

if (hasHalfByteCached)
{
*pByte++ = (byte)((cachedHalfByte << 4) + halfByte);
bytes[iByte++] = (byte)((cachedHalfByte << 4) + halfByte);
hasHalfByteCached = false;
}
else
Expand All @@ -223,8 +204,8 @@ private static unsafe void Decode(char* pChars, char* pCharsEndPos,
}
}

bytesDecoded = (int)(pByte - pBytes);
charsDecoded = (int)(pChar - pChars);
bytesDecoded = iByte;
charsDecoded = iChar;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Xml.Schema;

namespace System.Xml
Expand Down Expand Up @@ -333,15 +332,11 @@ public NestedBinXml(SymbolTables symbolTables, int docState, NestedBinXml? next)
private readonly bool _ignoreComments;
private readonly DtdProcessing _dtdProcessing;

private readonly Encoding _unicode;

// current version of the protocol
private byte _version;

public XmlSqlBinaryReader(Stream stream, byte[] data, int len, string baseUri, bool closeInput, XmlReaderSettings settings)
{
_unicode = System.Text.Encoding.Unicode;

_xnt = settings.NameTable!;
if (_xnt == null)
{
Expand Down Expand Up @@ -2349,28 +2344,21 @@ private int ScanText(out int start)
private string GetString(int pos, int cch)
{
Debug.Assert(pos >= 0 && cch >= 0);
if (checked(pos + (cch * 2)) > _end)
if (checked(pos + (cch * sizeof(char))) > _end)
throw new XmlException(SR.Xml_UnexpectedEOF1, (string[]?)null);
if (cch == 0)
return string.Empty;
// GetStringUnaligned is _significantly_ faster than unicode.GetString()
// but since IA64 doesn't support unaligned reads, we can't do it if
// the address is not aligned properly. Since the byte[] will be aligned,
// we can detect address alignment my just looking at the offset
if ((pos & 1) == 0)
return GetStringAligned(_data, pos, cch);
else
return _unicode.GetString(_data, pos, checked(cch * 2));
}

private unsafe string GetStringAligned(byte[] data, int offset, int cch)
{
Debug.Assert((offset & 1) == 0);
fixed (byte* pb = data)
return string.Create(cch, (_data, pos), static (dstChars, state) =>
{
char* p = (char*)(pb + offset);
return new string(p, 0, cch);
}
// bitblt source bytes directly into the destination char span
// n.b. source buffer assumed to be well-formed UTF-16 machine endian
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit of an expansion on this comment: the original code had two different behaviors based on whether the underlying data was aligned or unaligned. If the underlying data was aligned, the original code would basically memmove the contents of the old buffer into the new string, not performing any UTF-16 validation. If the underlying data was unaligned, the original code would go through Encoding.Unicode.GetString, which would perform UTF-16 validation. This really only affects whether unpaired surrogate characters get replaced with '\uFFFD' in the destination string.

Since the updated code is just a simple memmove, I opted for the "don't perform any validation" behavior.


int cch = dstChars.Length;
ReadOnlySpan<byte> srcBytes = state._data.AsSpan(state.pos, checked(cch * sizeof(char)));
Span<byte> dstBytes = MemoryMarshal.AsBytes(dstChars);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically MemoryMarshal is an unsafe-equivalent API. But at least we got rid of the pointer manipulation in this method. :)

srcBytes.CopyTo(dstBytes);
});
}

private string GetAttributeText(int i)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9732,36 +9732,30 @@ private void RegisterConsumedCharacters(long characters, bool inEntityReference)
}
}

internal static unsafe void AdjustLineInfo(char[] chars, int startPos, int endPos, bool isNormalized, ref LineInfo lineInfo)
internal static void AdjustLineInfo(char[] chars, int startPos, int endPos, bool isNormalized, ref LineInfo lineInfo)
{
Debug.Assert(startPos >= 0);
Debug.Assert(endPos < chars.Length);
Debug.Assert(startPos <= endPos);

fixed (char* pChars = &chars[startPos])
{
AdjustLineInfo(pChars, endPos - startPos, isNormalized, ref lineInfo);
}
AdjustLineInfo(chars.AsSpan(startPos, endPos - startPos), isNormalized, ref lineInfo);
}

internal static unsafe void AdjustLineInfo(string str, int startPos, int endPos, bool isNormalized, ref LineInfo lineInfo)
internal static void AdjustLineInfo(string str, int startPos, int endPos, bool isNormalized, ref LineInfo lineInfo)
{
Debug.Assert(startPos >= 0);
Debug.Assert(endPos < str.Length);
Debug.Assert(startPos <= endPos);

fixed (char* pChars = str)
{
AdjustLineInfo(pChars + startPos, endPos - startPos, isNormalized, ref lineInfo);
}
AdjustLineInfo(str.AsSpan(startPos, endPos - startPos), isNormalized, ref lineInfo);
}

internal static unsafe void AdjustLineInfo(char* pChars, int length, bool isNormalized, ref LineInfo lineInfo)
private static void AdjustLineInfo(ReadOnlySpan<char> chars, bool isNormalized, ref LineInfo lineInfo)
{
int lastNewLinePos = -1;
for (int i = 0; i < length; i++)
for (int i = 0; i < chars.Length; i++)
{
switch (pChars[i])
switch (chars[i])
{
case '\n':
lineInfo.lineNo++;
Expand All @@ -9774,7 +9768,8 @@ internal static unsafe void AdjustLineInfo(char* pChars, int length, bool isNorm
}
lineInfo.lineNo++;
lastNewLinePos = i;
if (i + 1 < length && pChars[i + 1] == '\n')
int nextIdx = i + 1;
if ((uint)nextIdx < (uint)chars.Length && chars[nextIdx] == '\n')
{
i++;
lastNewLinePos++;
Expand All @@ -9784,7 +9779,7 @@ internal static unsafe void AdjustLineInfo(char* pChars, int length, bool isNorm
}
if (lastNewLinePos >= 0)
{
lineInfo.linePos = length - lastNewLinePos;
lineInfo.linePos = chars.Length - lastNewLinePos;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1459,19 +1459,13 @@ internal static string[] SplitString(string value, StringSplitOptions splitStrin
internal static bool IsNegativeZero(double value)
{
// Simple equals function will report that -0 is equal to +0, so compare bits instead
if (value == 0 && DoubleToInt64Bits(value) == DoubleToInt64Bits(-0e0))
if (value == 0 && BitConverter.DoubleToInt64Bits(value) == BitConverter.DoubleToInt64Bits(-0e0))
{
return true;
}
return false;
}

private static unsafe long DoubleToInt64Bits(double value)
{
// NOTE: BitConverter.DoubleToInt64Bits is missing in Silverlight
return *((long*)&value);
}

internal static void VerifyCharData(string? data, ExceptionType exceptionType)
{
VerifyCharData(data, exceptionType, exceptionType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2846,37 +2846,6 @@ private void InitFromDouble(double dbl)
}
};

/* ----------------------------------------------------------------------------
IntToString()

Converts an integer to a string according to XPath rules.
*/
private static unsafe string IntToString(int val)
{
// The maximum number of characters needed to represent any int value is 11
const int BufSize = 12;
char* pBuf = stackalloc char[BufSize];
char* pch = pBuf += BufSize;
uint u = (uint)(val < 0 ? -val : val);

while (u >= 10)
{
// Fast division by 10
uint quot = (uint)((0x66666667L * u) >> 32) >> 2;
*(--pch) = (char)((u - quot * 10) + '0');
u = quot;
}

*(--pch) = (char)(u + '0');

if (val < 0)
{
*(--pch) = '-';
}

return new string(pch, 0, (int)(pBuf - pch));
}

/* ----------------------------------------------------------------------------
DoubleToString()

Expand All @@ -2890,7 +2859,7 @@ public static string DoubleToString(double dbl)

if (IsInteger(dbl, out iVal))
{
return IntToString(iVal);
return iVal.ToString(CultureInfo.InvariantCulture);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed custom implementation in favor of delegating to the runtime's already-optimized int.ToString implementation.

On .NET 5/6, dereferencing CultureInfo.InvariantCulture is very fast due to recent JIT optimizations.

}

// Handle NaN and infinity
Expand Down