diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs index 6a67e788216..e490a2dd2d0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs @@ -200,6 +200,10 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n currentUInt32 = Unsafe.ReadUnaligned(pBuffer); if (!AllBytesInUInt32AreAscii(currentUInt32)) { + if (!BitConverter.IsLittleEndian) + { + currentUInt32 = currentUInt32 << 16; + } goto FoundNonAsciiData; } @@ -1678,6 +1682,10 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf asciiData = Unsafe.ReadUnaligned(pAsciiBuffer + currentOffset); if (!AllBytesInUInt32AreAscii(asciiData)) { + if (!BitConverter.IsLittleEndian) + { + asciiData = asciiData << 16; + } goto FoundNonAsciiData; } @@ -1719,11 +1727,23 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf // Drain ASCII bytes one at a time. - while (((byte)asciiData & 0x80) == 0) + if (BitConverter.IsLittleEndian) { - pUtf16Buffer[currentOffset] = (char)(byte)asciiData; - currentOffset++; - asciiData >>= 8; + while (((byte)asciiData & 0x80) == 0) + { + pUtf16Buffer[currentOffset] = (char)(byte)asciiData; + currentOffset++; + asciiData >>= 8; + } + } + else + { + while ((asciiData & 0x80000000) == 0) + { + asciiData = BitOperations.RotateLeft(asciiData, 8); + pUtf16Buffer[currentOffset] = (char)(byte)asciiData; + currentOffset++; + } } goto Finish; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs index ac59f0b5c72..628c8a976a5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Helpers.cs @@ -143,7 +143,7 @@ private static uint ExtractFourUtf8BytesFromSurrogatePair(uint value) tempB |= tempA; uint tempC = (value << 2) & 0x0000_0F00u; // = [ 00000000 00000000 0000yyyy 00000000 ] - uint tempD = (value >> 6) & 0x0003_0000u; // = [ 00000000 00000000 00yy0000 00000000 ] + uint tempD = (value >> 4) & 0x0000_3000u; // = [ 00000000 00000000 00yy0000 00000000 ] tempD |= tempC; uint tempE = (value & 0x3Fu) + 0xF080_8080u; // = [ 11110000 10000000 10000000 10xxxxxx ] @@ -232,7 +232,7 @@ private static uint ExtractUtf8TwoByteSequenceFromFirstUtf16Char(uint value) // want to return [ ######## ######## 110yyyyy 10xxxxxx ] uint temp = (value >> 16) & 0x3Fu; // [ 00000000 00000000 00000000 00xxxxxx ] - value = (value >> 22) & 0x1F00u; // [ 00000000 00000000 000yyyyy 0000000 ] + value = (value >> 14) & 0x1F00u; // [ 00000000 00000000 000yyyyy 0000000 ] return value + temp + 0xC080u; } } @@ -498,7 +498,7 @@ private static bool UInt32BeginsWithUtf8FourByteMask(uint value) // Return statement is written this way to work around https://github.com/dotnet/runtime/issues/4207. return (BitConverter.IsLittleEndian && (((value - 0x8080_80F0u) & 0xC0C0_C0F8u) == 0)) - || (!BitConverter.IsLittleEndian && (((value - 0xF080_8000u) & 0xF8C0_C0C0u) == 0)); + || (!BitConverter.IsLittleEndian && (((value - 0xF080_8080u) & 0xF8C0_C0C0u) == 0)); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs index b0c1376611b..5ebe4c5c157 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs @@ -1134,7 +1134,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt } else { - pOutputBuffer[0] = (byte)(thisDWord >> 24); // extract [ AA 00 ## ## ] + pOutputBuffer[0] = (byte)(thisDWord >> 16); // extract [ 00 AA ## ## ] } pInputBuffer++; diff --git a/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryFormatterWriter.cs b/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryFormatterWriter.cs index e8a130510e6..1b06b71242c 100644 --- a/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryFormatterWriter.cs +++ b/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryFormatterWriter.cs @@ -268,7 +268,7 @@ private void WriteArrayAsBytes(Array array, int typeLength) if (!BitConverter.IsLittleEndian) { // we know that we are writing a primitive type, so just do a simple swap - Debug.Fail("Re-review this code if/when we start running on big endian systems"); + // Debug.Fail("Re-review this code if/when we start running on big endian systems"); for (int i = 0; i < bufferUsed; i += typeLength) { for (int j = 0; j < typeLength / 2; j++) diff --git a/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryParser.cs b/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryParser.cs index 07ded6d6615..f195e1d41bd 100644 --- a/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryParser.cs +++ b/src/libraries/System.Runtime.Serialization.Formatters/src/System/Runtime/Serialization/Formatters/Binary/BinaryParser.cs @@ -881,7 +881,7 @@ private void ReadArrayAsBytes(ParseRecord pr) if (!BitConverter.IsLittleEndian) { // we know that we are reading a primitive type, so just do a simple swap - Debug.Fail("Re-review this code if/when we start running on big endian systems"); + // Debug.Fail("Re-review this code if/when we start running on big endian systems"); for (int i = 0; i < bufferUsed; i += typeLength) { for (int j = 0; j < typeLength / 2; j++) diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj b/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj index fc36d49bc9e..6a48f137696 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj +++ b/src/libraries/System.Text.Encoding.CodePages/src/System.Text.Encoding.CodePages.csproj @@ -77,6 +77,7 @@ + diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs index 1a48d4adc31..fa24319016a 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Binary; using System.Reflection; using System.IO; using System.Diagnostics; @@ -99,6 +100,28 @@ internal struct CodePageDataFileHeader internal short unused1; // Add an unused WORD so that CodePages is aligned with DWORD boundary. } private const int CODEPAGE_DATA_FILE_HEADER_SIZE = 44; + internal static unsafe void ReadCodePageDataFileHeader(Stream stream, Span codePageDataFileHeader) + { + stream.Read(codePageDataFileHeader); + if (!BitConverter.IsLittleEndian) + { + fixed (byte* pBytes = &codePageDataFileHeader[0]) + { + CodePageDataFileHeader* p = (CodePageDataFileHeader*)pBytes; + char *pTableName = &p->TableName; + for (int i = 0; i < 16; i++) + { + pTableName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pTableName[i]); + } + ushort *pVersion = &p->Version; + for (int i = 0; i < 4; i++) + { + pVersion[i] = BinaryPrimitives.ReverseEndianness(pVersion[i]); + } + p->CodePageCount = BinaryPrimitives.ReverseEndianness(p->CodePageCount); + } + } + } [StructLayout(LayoutKind.Explicit, Pack = 2)] internal unsafe struct CodePageIndex @@ -112,6 +135,25 @@ internal unsafe struct CodePageIndex [FieldOffset(0x24)] internal int Offset; // DWORD } + internal static unsafe void ReadCodePageIndex(Stream stream, Span codePageIndex) + { + stream.Read(codePageIndex); + if (!BitConverter.IsLittleEndian) + { + fixed (byte* pBytes = &codePageIndex[0]) + { + CodePageIndex* p = (CodePageIndex*)pBytes; + char *pCodePageName = &p->CodePageName; + for (int i = 0; i < 16; i++) + { + pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]); + } + p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage); + p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount); + p->Offset = BinaryPrimitives.ReverseEndianness(p->Offset); + } + } + } [StructLayout(LayoutKind.Explicit)] internal unsafe struct CodePageHeader @@ -136,6 +178,30 @@ internal unsafe struct CodePageHeader internal ushort ByteReplace; // WORD // default replacement bytes } private const int CODEPAGE_HEADER_SIZE = 48; + internal static unsafe void ReadCodePageHeader(Stream stream, Span codePageHeader) + { + stream.Read(codePageHeader); + if (!BitConverter.IsLittleEndian) + { + fixed (byte* pBytes = &codePageHeader[0]) + { + CodePageHeader* p = (CodePageHeader*)pBytes; + char *pCodePageName = &p->CodePageName; + for (int i = 0; i < 16; i++) + { + pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]); + } + p->VersionMajor = BinaryPrimitives.ReverseEndianness(p->VersionMajor); + p->VersionMinor = BinaryPrimitives.ReverseEndianness(p->VersionMinor); + p->VersionRevision = BinaryPrimitives.ReverseEndianness(p->VersionRevision); + p->VersionBuild = BinaryPrimitives.ReverseEndianness(p->VersionBuild); + p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage); + p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount); + p->UnicodeReplace = (char)BinaryPrimitives.ReverseEndianness((ushort)p->UnicodeReplace); + p->ByteReplace = BinaryPrimitives.ReverseEndianness(p->ByteReplace); + } + } + } // Initialize our global stuff private static readonly byte[] s_codePagesDataHeader = new byte[CODEPAGE_DATA_FILE_HEADER_SIZE]; @@ -166,7 +232,7 @@ internal static Stream GetEncodingDataStream(string tableName) } // Read the header - stream.Read(s_codePagesDataHeader, 0, s_codePagesDataHeader.Length); + ReadCodePageDataFileHeader(stream, s_codePagesDataHeader); return stream; } @@ -210,14 +276,14 @@ private unsafe bool FindCodePage(int codePage) CodePageIndex* pCodePageIndex = (CodePageIndex*)pBytes; for (int i = 0; i < codePagesCount; i++) { - s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length); + ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex); if (pCodePageIndex->CodePage == codePage) { // Found it! long position = s_codePagesEncodingDataStream.Position; s_codePagesEncodingDataStream.Seek((long)pCodePageIndex->Offset, SeekOrigin.Begin); - s_codePagesEncodingDataStream.Read(m_codePageHeader, 0, m_codePageHeader!.Length); + ReadCodePageHeader(s_codePagesEncodingDataStream, m_codePageHeader); m_firstDataWordOffset = (int)s_codePagesEncodingDataStream.Position; // stream now pointing to the codepage data if (i == codePagesCount - 1) // last codepage @@ -229,7 +295,7 @@ private unsafe bool FindCodePage(int codePage) // Read Next codepage data to get the offset and then calculate the size s_codePagesEncodingDataStream.Seek(position, SeekOrigin.Begin); int currentOffset = pCodePageIndex->Offset; - s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length); + ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex); m_dataSize = pCodePageIndex->Offset - currentOffset - m_codePageHeader.Length; } @@ -266,7 +332,7 @@ internal static unsafe int GetCodePageByteSize(int codePage) CodePageIndex* pCodePageIndex = (CodePageIndex*)pBytes; for (int i = 0; i < codePagesCount; i++) { - s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length); + ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex); if (pCodePageIndex->CodePage == codePage) { diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs index a0b19e2966d..2011acae43e 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/BaseCodePageEncoding.netcoreapp.cs @@ -29,7 +29,7 @@ internal abstract partial class BaseCodePageEncoding : EncodingNLS, ISerializabl for (int i = 0; i < codePagesCount; i++) { - s_codePagesEncodingDataStream.Read(pCodePageIndex); + ReadCodePageIndex(s_codePagesEncodingDataStream, pCodePageIndex); string codePageName; switch (codePageIndex.CodePage) diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/DBCSCodePageEncoding.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/DBCSCodePageEncoding.cs index 00a03b3e999..92da49d2a0c 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/DBCSCodePageEncoding.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/DBCSCodePageEncoding.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Buffers.Binary; using System.IO; using System.Diagnostics; using System.Text; @@ -41,6 +42,18 @@ internal DBCSCodePageEncoding(int codePage, int dataCodePage, EncoderFallback en { } + internal static unsafe char ReadChar(char *pChar) + { + if (BitConverter.IsLittleEndian) + { + return *pChar; + } + else + { + return (char)BinaryPrimitives.ReverseEndianness((ushort)*pChar); + } + } + // MBCS data section: // // We treat each multibyte pattern as 2 bytes in our table. If it's a single byte, then the high byte @@ -136,14 +149,14 @@ protected override unsafe void LoadManagedCodePage() while (bytePosition < 0x10000) { // Get the next byte - char input = *pData; + char input = ReadChar(pData); pData++; // build our table: if (input == 1) { // Use next data as our byte position - bytePosition = (int)(*pData); + bytePosition = (int)ReadChar(pData); pData++; continue; } @@ -258,14 +271,14 @@ protected unsafe override void ReadBestFitTable() while (bytesPosition < 0x10000) { // Get the next byte - char input = *pData; + char input = ReadChar(pData); pData++; // build our table: if (input == 1) { // Use next data as our byte position - bytesPosition = (int)(*pData); + bytesPosition = (int)ReadChar(pData); pData++; } else if (input < 0x20 && input > 0) @@ -286,20 +299,20 @@ protected unsafe override void ReadBestFitTable() // Now pData should be pointing to first word of bytes -> unicode best fit table // (which we're also not using at the moment) int iBestFitCount = 0; - bytesPosition = *pData; + bytesPosition = ReadChar(pData); pData++; while (bytesPosition < 0x10000) { // Get the next byte - char input = *pData; + char input = ReadChar(pData); pData++; // build our table: if (input == 1) { // Use next data as our byte position - bytesPosition = (int)(*pData); + bytesPosition = (int)ReadChar(pData); pData++; } else if (input < 0x20 && input > 0) @@ -334,7 +347,7 @@ protected unsafe override void ReadBestFitTable() // Now we know how many best fits we have, so go back & read them in iBestFitCount = 0; pData = pBytes2Unicode; - bytesPosition = *pData; + bytesPosition = ReadChar(pData); pData++; bool bOutOfOrder = false; @@ -342,14 +355,14 @@ protected unsafe override void ReadBestFitTable() while (bytesPosition < 0x10000) { // Get the next byte - char input = *pData; + char input = ReadChar(pData); pData++; // build our table: if (input == 1) { // Use next data as our byte position - bytesPosition = (int)(*pData); + bytesPosition = (int)ReadChar(pData); pData++; } else if (input < 0x20 && input > 0) @@ -421,20 +434,20 @@ protected unsafe override void ReadBestFitTable() // Now were at beginning of Unicode -> Bytes best fit table, need to count them char* pUnicode2Bytes = pData; - int unicodePosition = *(pData++); + int unicodePosition = ReadChar(pData++); iBestFitCount = 0; while (unicodePosition < 0x10000) { // Get the next byte - char input = *pData; + char input = ReadChar(pData); pData++; // build our table: if (input == 1) { // Use next data as our byte position - unicodePosition = (int)*pData; + unicodePosition = (int)ReadChar(pData); pData++; } else if (input < 0x20 && input > 0) @@ -456,20 +469,20 @@ protected unsafe override void ReadBestFitTable() // Now do it again to fill the array with real values pData = pUnicode2Bytes; - unicodePosition = *(pData++); + unicodePosition = ReadChar(pData++); iBestFitCount = 0; while (unicodePosition < 0x10000) { // Get the next byte - char input = *pData; + char input = ReadChar(pData); pData++; // build our table: if (input == 1) { // Use next data as our byte position - unicodePosition = (int)*pData; + unicodePosition = (int)ReadChar(pData); pData++; } else if (input < 0x20 && input > 0) diff --git a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/SBCSCodePageEncoding.cs b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/SBCSCodePageEncoding.cs index 6bff617ddea..aae0e6a69bf 100644 --- a/src/libraries/System.Text.Encoding.CodePages/src/System/Text/SBCSCodePageEncoding.cs +++ b/src/libraries/System.Text.Encoding.CodePages/src/System/Text/SBCSCodePageEncoding.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Buffers.Binary; using System.IO; using System.Diagnostics; using System.Text; @@ -32,6 +33,18 @@ public SBCSCodePageEncoding(int codePage, int dataCodePage) : base(codePage, dat { } + internal static unsafe ushort ReadUInt16(byte *pByte) + { + if (BitConverter.IsLittleEndian) + { + return *(ushort *)pByte; + } + else + { + return BinaryPrimitives.ReverseEndianness(*(ushort *)pByte); + } + } + // We have a managed code page entry, so load our tables // SBCS data section looks like: // @@ -91,16 +104,16 @@ protected override unsafe void LoadManagedCodePage() fixed (byte* pBuffer = &buffer[0]) { - char* pTemp = (char*)pBuffer; for (int b = 0; b < 256; b++) { + char c = (char)ReadUInt16(pBuffer + 2*b); // Don't want to force 0's to map Unicode wrong. 0 byte == 0 unicode already taken care of - if (pTemp[b] != 0 || b == 0) + if (c != 0 || b == 0) { - mapBytesToUnicode[b] = pTemp[b]; + mapBytesToUnicode[b] = c; - if (pTemp[b] != UNKNOWN_CHAR) - mapUnicodeToBytes[pTemp[b]] = (byte)b; + if (c != UNKNOWN_CHAR) + mapUnicodeToBytes[c] = (byte)b; } else { @@ -162,12 +175,12 @@ protected unsafe override void ReadBestFitTable() // See if our words are zero ushort byteTemp; - while ((byteTemp = *((ushort*)pData)) != 0) + while ((byteTemp = ReadUInt16(pData)) != 0) { Debug.Assert(arrayTemp[byteTemp] == UNKNOWN_CHAR, $"[SBCSCodePageEncoding::ReadBestFitTable] Expected unallocated byte (not 0x{(int)arrayTemp[byteTemp]:X2}) for best fit byte at 0x{byteTemp:X2} for code page {CodePage}"); pData += 2; - arrayTemp[byteTemp] = *((char*)pData); + arrayTemp[byteTemp] = (char)ReadUInt16(pData); pData += 2; } @@ -184,7 +197,7 @@ protected unsafe override void ReadBestFitTable() // Now do the UnicodeToBytes Best Fit mapping (this is the one we normally think of when we say "best fit") // pData should be pointing at the first data point for Bytes->Unicode table - int unicodePosition = *((ushort*)pData); + int unicodePosition = ReadUInt16(pData); pData += 2; while (unicodePosition < 0x10000) @@ -197,7 +210,7 @@ protected unsafe override void ReadBestFitTable() if (input == 1) { // Use next 2 bytes as our byte position - unicodePosition = *((ushort*)pData); + unicodePosition = ReadUInt16(pData); pData += 2; } else if (input < 0x20 && input > 0 && input != 0x1e) @@ -222,7 +235,7 @@ protected unsafe override void ReadBestFitTable() // Now actually read in the data // reset pData should be pointing at the first data point for Bytes->Unicode table pData = pUnicodeToSBCS; - unicodePosition = *((ushort*)pData); + unicodePosition = ReadUInt16(pData); pData += 2; iBestFitCount = 0; @@ -236,7 +249,7 @@ protected unsafe override void ReadBestFitTable() if (input == 1) { // Use next 2 bytes as our byte position - unicodePosition = *((ushort*)pData); + unicodePosition = ReadUInt16(pData); pData += 2; } else if (input < 0x20 && input > 0 && input != 0x1e)