Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Big endian fixes for dotnet runtime #47981

Merged
merged 4 commits into from
Feb 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n
currentUInt32 = Unsafe.ReadUnaligned<ushort>(pBuffer);
if (!AllBytesInUInt32AreAscii(currentUInt32))
{
if (!BitConverter.IsLittleEndian)
{
currentUInt32 = currentUInt32 << 16;
}
goto FoundNonAsciiData;
}

Expand Down Expand Up @@ -1678,6 +1682,10 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf
asciiData = Unsafe.ReadUnaligned<ushort>(pAsciiBuffer + currentOffset);
if (!AllBytesInUInt32AreAscii(asciiData))
{
if (!BitConverter.IsLittleEndian)
{
asciiData = asciiData << 16;
}
goto FoundNonAsciiData;
}

Expand Down Expand Up @@ -1719,11 +1727,23 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf

// Drain ASCII bytes one at a time.

while (((byte)asciiData & 0x80) == 0)
if (BitConverter.IsLittleEndian)
{
pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
currentOffset++;
asciiData >>= 8;
while (((byte)asciiData & 0x80) == 0)
{
pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
currentOffset++;
asciiData >>= 8;
}
}
else
{
while ((asciiData & 0x80000000) == 0)
{
asciiData = BitOperations.RotateLeft(asciiData, 8);
pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
currentOffset++;
}
}

goto Finish;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ private static uint ExtractFourUtf8BytesFromSurrogatePair(uint value)
tempB |= tempA;

uint tempC = (value << 2) & 0x0000_0F00u; // = [ 00000000 00000000 0000yyyy 00000000 ]
uint tempD = (value >> 6) & 0x0003_0000u; // = [ 00000000 00000000 00yy0000 00000000 ]
uint tempD = (value >> 4) & 0x0000_3000u; // = [ 00000000 00000000 00yy0000 00000000 ]
tempD |= tempC;

uint tempE = (value & 0x3Fu) + 0xF080_8080u; // = [ 11110000 10000000 10000000 10xxxxxx ]
Expand Down Expand Up @@ -232,7 +232,7 @@ private static uint ExtractUtf8TwoByteSequenceFromFirstUtf16Char(uint value)
// want to return [ ######## ######## 110yyyyy 10xxxxxx ]

uint temp = (value >> 16) & 0x3Fu; // [ 00000000 00000000 00000000 00xxxxxx ]
value = (value >> 22) & 0x1F00u; // [ 00000000 00000000 000yyyyy 0000000 ]
value = (value >> 14) & 0x1F00u; // [ 00000000 00000000 000yyyyy 0000000 ]
return value + temp + 0xC080u;
}
}
Expand Down Expand Up @@ -498,7 +498,7 @@ private static bool UInt32BeginsWithUtf8FourByteMask(uint value)
// Return statement is written this way to work around https://github.com/dotnet/runtime/issues/4207.

return (BitConverter.IsLittleEndian && (((value - 0x8080_80F0u) & 0xC0C0_C0F8u) == 0))
|| (!BitConverter.IsLittleEndian && (((value - 0xF080_8000u) & 0xF8C0_C0C0u) == 0));
|| (!BitConverter.IsLittleEndian && (((value - 0xF080_8080u) & 0xF8C0_C0C0u) == 0));
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
}
else
{
pOutputBuffer[0] = (byte)(thisDWord >> 24); // extract [ AA 00 ## ## ]
pOutputBuffer[0] = (byte)(thisDWord >> 16); // extract [ 00 AA ## ## ]
nealef marked this conversation as resolved.
Show resolved Hide resolved
}

pInputBuffer++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ private void WriteArrayAsBytes(Array array, int typeLength)
if (!BitConverter.IsLittleEndian)
{
// we know that we are writing a primitive type, so just do a simple swap
Debug.Fail("Re-review this code if/when we start running on big endian systems");
for (int i = 0; i < bufferUsed; i += typeLength)
{
for (int j = 0; j < typeLength / 2; j++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,6 @@ private void ReadArrayAsBytes(ParseRecord pr)
if (!BitConverter.IsLittleEndian)
{
// we know that we are reading a primitive type, so just do a simple swap
Debug.Fail("Re-review this code if/when we start running on big endian systems");
for (int i = 0; i < bufferUsed; i += typeLength)
{
for (int j = 0; j < typeLength / 2; j++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,14 @@
<Reference Include="System.Collections" />
<Reference Include="System.Diagnostics.Debug" />
<Reference Include="System.Diagnostics.Tools" />
<Reference Include="System.Memory" />
<Reference Include="System.Resources.ResourceManager" />
<Reference Include="System.Runtime" />
<Reference Include="System.Runtime.Extensions" />
<Reference Include="System.Runtime.InteropServices" />
<Reference Include="System.Threading" />
</ItemGroup>
<ItemGroup Condition="!$(TargetFramework.StartsWith('$(NetCoreAppCurrent)'))">
<PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers.Binary;
using System.Reflection;
using System.IO;
using System.Diagnostics;
Expand Down Expand Up @@ -99,6 +100,28 @@ internal struct CodePageDataFileHeader
internal short unused1; // Add an unused WORD so that CodePages is aligned with DWORD boundary.
}
private const int CODEPAGE_DATA_FILE_HEADER_SIZE = 44;
internal static unsafe void ReadCodePageDataFileHeader(Stream stream, byte[] codePageDataFileHeader)
{
stream.Read(codePageDataFileHeader, 0, codePageDataFileHeader.Length);
if (!BitConverter.IsLittleEndian)
{
fixed (byte* pBytes = &codePageDataFileHeader[0])
{
CodePageDataFileHeader* p = (CodePageDataFileHeader*)pBytes;
char *pTableName = &p->TableName;
for (int i = 0; i < 16; i++)
{
pTableName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pTableName[i]);
}
ushort *pVersion = &p->Version;
for (int i = 0; i < 4; i++)
{
pVersion[i] = BinaryPrimitives.ReverseEndianness(pVersion[i]);
}
p->CodePageCount = BinaryPrimitives.ReverseEndianness(p->CodePageCount);
}
}
}

[StructLayout(LayoutKind.Explicit, Pack = 2)]
internal unsafe struct CodePageIndex
Expand All @@ -112,6 +135,25 @@ internal unsafe struct CodePageIndex
[FieldOffset(0x24)]
internal int Offset; // DWORD
}
internal static unsafe void ReadCodePageIndex(Stream stream, byte[] codePageIndex)
{
stream.Read(codePageIndex, 0, codePageIndex.Length);
if (!BitConverter.IsLittleEndian)
{
fixed (byte* pBytes = &codePageIndex[0])
{
CodePageIndex* p = (CodePageIndex*)pBytes;
char *pCodePageName = &p->CodePageName;
for (int i = 0; i < 16; i++)
{
pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
}
p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
p->Offset = BinaryPrimitives.ReverseEndianness(p->Offset);
}
}
}

[StructLayout(LayoutKind.Explicit)]
internal unsafe struct CodePageHeader
Expand All @@ -136,6 +178,30 @@ internal unsafe struct CodePageHeader
internal ushort ByteReplace; // WORD // default replacement bytes
}
private const int CODEPAGE_HEADER_SIZE = 48;
internal static unsafe void ReadCodePageHeader(Stream stream, byte[] codePageHeader)
{
stream.Read(codePageHeader, 0, codePageHeader!.Length);
if (!BitConverter.IsLittleEndian)
{
fixed (byte* pBytes = &codePageHeader[0])
{
CodePageHeader* p = (CodePageHeader*)pBytes;
char *pCodePageName = &p->CodePageName;
for (int i = 0; i < 16; i++)
{
pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
}
p->VersionMajor = BinaryPrimitives.ReverseEndianness(p->VersionMajor);
p->VersionMinor = BinaryPrimitives.ReverseEndianness(p->VersionMinor);
p->VersionRevision = BinaryPrimitives.ReverseEndianness(p->VersionRevision);
p->VersionBuild = BinaryPrimitives.ReverseEndianness(p->VersionBuild);
p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
p->UnicodeReplace = (char)BinaryPrimitives.ReverseEndianness((ushort)p->UnicodeReplace);
p->ByteReplace = BinaryPrimitives.ReverseEndianness(p->ByteReplace);
}
}
}

// Initialize our global stuff
private static readonly byte[] s_codePagesDataHeader = new byte[CODEPAGE_DATA_FILE_HEADER_SIZE];
Expand Down Expand Up @@ -166,7 +232,7 @@ internal static Stream GetEncodingDataStream(string tableName)
}

// Read the header
stream.Read(s_codePagesDataHeader, 0, s_codePagesDataHeader.Length);
ReadCodePageDataFileHeader(stream, s_codePagesDataHeader);

return stream;
}
Expand Down Expand Up @@ -210,14 +276,14 @@ private unsafe bool FindCodePage(int codePage)
CodePageIndex* pCodePageIndex = (CodePageIndex*)pBytes;
for (int i = 0; i < codePagesCount; i++)
{
s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);

if (pCodePageIndex->CodePage == codePage)
{
// Found it!
long position = s_codePagesEncodingDataStream.Position;
s_codePagesEncodingDataStream.Seek((long)pCodePageIndex->Offset, SeekOrigin.Begin);
s_codePagesEncodingDataStream.Read(m_codePageHeader, 0, m_codePageHeader!.Length);
ReadCodePageHeader(s_codePagesEncodingDataStream, m_codePageHeader);
m_firstDataWordOffset = (int)s_codePagesEncodingDataStream.Position; // stream now pointing to the codepage data

if (i == codePagesCount - 1) // last codepage
Expand All @@ -229,7 +295,7 @@ private unsafe bool FindCodePage(int codePage)
// Read Next codepage data to get the offset and then calculate the size
s_codePagesEncodingDataStream.Seek(position, SeekOrigin.Begin);
int currentOffset = pCodePageIndex->Offset;
s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);
m_dataSize = pCodePageIndex->Offset - currentOffset - m_codePageHeader.Length;
}

Expand Down Expand Up @@ -266,7 +332,7 @@ internal static unsafe int GetCodePageByteSize(int codePage)
CodePageIndex* pCodePageIndex = (CodePageIndex*)pBytes;
for (int i = 0; i < codePagesCount; i++)
{
s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);

if (pCodePageIndex->CodePage == codePage)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,34 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.IO;
using System.Buffers.Binary;
using System.Runtime.Serialization;
using System.Runtime.CompilerServices;

namespace System.Text
{
internal abstract partial class BaseCodePageEncoding : EncodingNLS, ISerializable
{
internal static unsafe void ReadCodePageIndex(Stream stream, Span<byte> codePageIndex)
{
stream.Read(codePageIndex);
if (!BitConverter.IsLittleEndian)
{
fixed (byte* pBytes = &codePageIndex[0])
{
CodePageIndex* p = (CodePageIndex*)pBytes;
char *pCodePageName = &p->CodePageName;
for (int i = 0; i < 16; i++)
{
pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
}
p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
p->Offset = BinaryPrimitives.ReverseEndianness(p->Offset);
}
}
}

internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider provider)
{
lock (s_streamLock)
Expand All @@ -29,7 +50,7 @@ internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider pr

for (int i = 0; i < codePagesCount; i++)
{
s_codePagesEncodingDataStream.Read(pCodePageIndex);
ReadCodePageIndex(s_codePagesEncodingDataStream, pCodePageIndex);

string codePageName;
switch (codePageIndex.CodePage)
Expand Down
Loading