diff --git a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs index 09307d22545f6..f981eac6405db 100644 --- a/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Unix/System.Globalization.Native/Interop.Normalization.cs @@ -10,9 +10,9 @@ internal static partial class Interop internal static partial class Globalization { [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IsNormalized")] - internal static extern int IsNormalized(NormalizationForm normalizationForm, string src, int srcLen); + internal static extern unsafe int IsNormalized(NormalizationForm normalizationForm, char* src, int srcLen); [DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_NormalizeString")] - internal static extern int NormalizeString(NormalizationForm normalizationForm, string src, int srcLen, [Out] char[] dstBuffer, int dstBufferCapacity); + internal static extern unsafe int NormalizeString(NormalizationForm normalizationForm, char* src, int srcLen, char* dstBuffer, int dstBufferCapacity); } } diff --git a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs index 3e954483d1813..362b7ffb4eefe 100644 --- a/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs +++ b/src/libraries/Common/src/Interop/Windows/Normaliz/Interop.Normalization.cs @@ -3,21 +3,21 @@ // See the LICENSE file in the project root for more information. using System.Runtime.InteropServices; +using System.Text; internal static partial class Interop { internal static partial class Normaliz { [DllImport("Normaliz.dll", CharSet = CharSet.Unicode, SetLastError = true)] - internal static extern bool IsNormalizedString(int normForm, string source, int length); + internal static extern unsafe BOOL IsNormalizedString(NormalizationForm normForm, char* source, int length); [DllImport("Normaliz.dll", CharSet = CharSet.Unicode, SetLastError = true)] - internal static extern int NormalizeString( - int normForm, - string source, + internal static extern unsafe int NormalizeString( + NormalizationForm normForm, + char* source, int sourceLength, - [System.Runtime.InteropServices.OutAttribute] - char[]? destination, + char* destination, int destinationLength); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs index 37c9024c33d42..0a95d018f0ab0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Unix.cs @@ -2,14 +2,16 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Diagnostics; +using System.Runtime.InteropServices; using System.Text; namespace System.Globalization { internal static partial class Normalization { - internal static bool IsNormalized(string strInput, NormalizationForm normalizationForm) + internal static unsafe bool IsNormalized(string strInput, NormalizationForm normalizationForm) { if (GlobalizationMode.Invariant) { @@ -20,7 +22,11 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati ValidateArguments(strInput, normalizationForm); - int ret = Interop.Globalization.IsNormalized(normalizationForm, strInput, strInput.Length); + int ret; + fixed (char* pInput = strInput) + { + ret = Interop.Globalization.IsNormalized(normalizationForm, pInput, strInput.Length); + } if (ret == -1) { @@ -30,7 +36,7 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati return ret == 1; } - internal static string Normalize(string strInput, NormalizationForm normalizationForm) + internal static unsafe string Normalize(string strInput, NormalizationForm normalizationForm) { if (GlobalizationMode.Invariant) { @@ -41,26 +47,62 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio ValidateArguments(strInput, normalizationForm); - char[] buf = new char[strInput.Length]; - - for (int attempts = 2; attempts > 0; attempts--) + char[]? toReturn = null; + try { - int realLen = Interop.Globalization.NormalizeString(normalizationForm, strInput, strInput.Length, buf, buf.Length); + const int StackallocThreshold = 512; - if (realLen == -1) + Span buffer = strInput.Length <= StackallocThreshold + ? stackalloc char[StackallocThreshold] + : (toReturn = ArrayPool.Shared.Rent(strInput.Length)); + + for (int attempt = 0; attempt < 2; attempt++) { - throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + int realLen; + fixed (char* pInput = strInput) + fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) + { + realLen = Interop.Globalization.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); + } + + if (realLen == -1) + { + throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + } + + if (realLen <= buffer.Length) + { + ReadOnlySpan result = buffer.Slice(0, realLen); + return result.SequenceEqual(strInput) + ? strInput + : new string(result); + } + + Debug.Assert(realLen > StackallocThreshold); + + if (attempt == 0) + { + if (toReturn != null) + { + // Clear toReturn first to ensure we don't return the same buffer twice + char[] temp = toReturn; + toReturn = null; + ArrayPool.Shared.Return(temp); + } + + buffer = toReturn = ArrayPool.Shared.Rent(realLen); + } } - if (realLen <= buf.Length) + throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + } + finally + { + if (toReturn != null) { - return new string(buf, 0, realLen); + ArrayPool.Shared.Return(toReturn); } - - buf = new char[realLen]; } - - throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); } // ----------------------------- diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs index e96789d1695e6..65e459a13a3e9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/Normalization.Windows.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Buffers; using System.Diagnostics; using System.Runtime.InteropServices; using System.Text; @@ -10,7 +11,7 @@ namespace System.Globalization { internal static partial class Normalization { - internal static bool IsNormalized(string strInput, NormalizationForm normalizationForm) + internal static unsafe bool IsNormalized(string strInput, NormalizationForm normalizationForm) { if (GlobalizationMode.Invariant) { @@ -24,7 +25,11 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati // The only way to know if IsNormalizedString failed is through checking the Win32 last error // IsNormalizedString pinvoke has SetLastError attribute property which will set the last error // to 0 (ERROR_SUCCESS) before executing the calls. - bool result = Interop.Normaliz.IsNormalizedString((int)normalizationForm, strInput, strInput.Length); + Interop.BOOL result; + fixed (char* pInput = strInput) + { + result = Interop.Normaliz.IsNormalizedString(normalizationForm, pInput, strInput.Length); + } int lastError = Marshal.GetLastWin32Error(); switch (lastError) @@ -51,10 +56,10 @@ internal static bool IsNormalized(string strInput, NormalizationForm normalizati throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); } - return result; + return result != Interop.BOOL.FALSE; } - internal static string Normalize(string strInput, NormalizationForm normalizationForm) + internal static unsafe string Normalize(string strInput, NormalizationForm normalizationForm) { if (GlobalizationMode.Invariant) { @@ -65,84 +70,85 @@ internal static string Normalize(string strInput, NormalizationForm normalizatio Debug.Assert(strInput != null); - // we depend on Win32 last error when calling NormalizeString - // NormalizeString pinvoke has SetLastError attribute property which will set the last error - // to 0 (ERROR_SUCCESS) before executing the calls. - - // Guess our buffer size first - int iLength = Interop.Normaliz.NormalizeString((int)normalizationForm, strInput, strInput.Length, null, 0); + if (strInput.Length == 0) + { + return string.Empty; + } - int lastError = Marshal.GetLastWin32Error(); - // Could have an error (actually it'd be quite hard to have an error here) - if ((lastError != Interop.Errors.ERROR_SUCCESS) || iLength < 0) + char[]? toReturn = null; + try { - if (lastError == Interop.Errors.ERROR_INVALID_PARAMETER) + const int StackallocThreshold = 512; + + Span buffer = strInput.Length <= StackallocThreshold + ? stackalloc char[StackallocThreshold] + : (toReturn = ArrayPool.Shared.Rent(strInput.Length)); + + while (true) { - if (normalizationForm != NormalizationForm.FormC && - normalizationForm != NormalizationForm.FormD && - normalizationForm != NormalizationForm.FormKC && - normalizationForm != NormalizationForm.FormKD) + // we depend on Win32 last error when calling NormalizeString + // NormalizeString pinvoke has SetLastError attribute property which will set the last error + // to 0 (ERROR_SUCCESS) before executing the calls. + int realLength; + fixed (char* pInput = strInput) + fixed (char* pDest = &MemoryMarshal.GetReference(buffer)) { - throw new ArgumentException(SR.Argument_InvalidNormalizationForm, nameof(normalizationForm)); + realLength = Interop.Normaliz.NormalizeString(normalizationForm, pInput, strInput.Length, pDest, buffer.Length); } + int lastError = Marshal.GetLastWin32Error(); - throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + switch (lastError) + { + case Interop.Errors.ERROR_SUCCESS: + ReadOnlySpan result = buffer.Slice(0, realLength); + return result.SequenceEqual(strInput) + ? strInput + : new string(result); + + // Do appropriate stuff for the individual errors: + case Interop.Errors.ERROR_INSUFFICIENT_BUFFER: + realLength = Math.Abs(realLength); + Debug.Assert(realLength > buffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); + if (toReturn != null) + { + // Clear toReturn first to ensure we don't return the same buffer twice + char[] temp = toReturn; + toReturn = null; + ArrayPool.Shared.Return(temp); + } + Debug.Assert(realLength > StackallocThreshold); + buffer = toReturn = ArrayPool.Shared.Rent(realLength); + continue; + + case Interop.Errors.ERROR_INVALID_PARAMETER: + case Interop.Errors.ERROR_NO_UNICODE_TRANSLATION: + if (normalizationForm != NormalizationForm.FormC && + normalizationForm != NormalizationForm.FormD && + normalizationForm != NormalizationForm.FormKC && + normalizationForm != NormalizationForm.FormKD) + { + throw new ArgumentException(SR.Argument_InvalidNormalizationForm, nameof(normalizationForm)); + } + + // Illegal code point or order found. Ie: FFFE or D800 D800, etc. + throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); + + case Interop.Errors.ERROR_NOT_ENOUGH_MEMORY: + throw new OutOfMemoryException(); + + default: + // We shouldn't get here... + throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); + } } - - // We shouldn't really be able to get here..., guessing length is - // a trivial math function... - // Can't really be Out of Memory, but just in case: - if (lastError == Interop.Errors.ERROR_NOT_ENOUGH_MEMORY) - throw new OutOfMemoryException(); - - // Who knows what happened? Not us! - throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); } - - // Don't break for empty strings (only possible for D & KD and not really possible at that) - if (iLength == 0) return string.Empty; - - // Someplace to stick our buffer - char[] cBuffer; - - while (true) + finally { - // (re)allocation buffer and normalize string - cBuffer = new char[iLength]; - - // NormalizeString pinvoke has SetLastError attribute property which will set the last error - // to 0 (ERROR_SUCCESS) before executing the calls. - iLength = Interop.Normaliz.NormalizeString((int)normalizationForm, strInput, strInput.Length, cBuffer, cBuffer.Length); - lastError = Marshal.GetLastWin32Error(); - - if (lastError == Interop.Errors.ERROR_SUCCESS) - break; - - // Could have an error (actually it'd be quite hard to have an error here) - switch (lastError) + if (toReturn != null) { - // Do appropriate stuff for the individual errors: - case Interop.Errors.ERROR_INSUFFICIENT_BUFFER: - iLength = Math.Abs(iLength); - Debug.Assert(iLength > cBuffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); - continue; - - case Interop.Errors.ERROR_INVALID_PARAMETER: - case Interop.Errors.ERROR_NO_UNICODE_TRANSLATION: - // Illegal code point or order found. Ie: FFFE or D800 D800, etc. - throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex, nameof(strInput)); - - case Interop.Errors.ERROR_NOT_ENOUGH_MEMORY: - throw new OutOfMemoryException(); - - default: - // We shouldn't get here... - throw new InvalidOperationException(SR.Format(SR.UnknownError_Num, lastError)); + ArrayPool.Shared.Return(toReturn); } } - - // Copy our buffer into our new string, which will be the appropriate size - return new string(cBuffer, 0, iLength); } } }