Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly serialize Unicode special characters #3744

Merged
merged 1 commit into from
May 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/Elasticsearch.Net/Extensions/CharUtils.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace Elasticsearch.Net
{
internal static class CharUtils
{
// https://referencesource.microsoft.com/#mscorlib/system/security/util/hex.cs,1bfe838f662feef3
// converts number to hex digit. Does not do any range checks.
internal static char HexDigit(int num) => (char)(num < 10 ? num + 48 : num + 55);
}
}
8 changes: 2 additions & 6 deletions src/Elasticsearch.Net/Extensions/X509CertificateExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ internal static string GetCertHashString(this X509Certificate certificate)
return EncodeHexString(bytes);
}

// https://referencesource.microsoft.com/#mscorlib/system/security/util/hex.cs,1bfe838f662feef3
// converts number to hex digit. Does not do any range checks.
private static char HexDigit(int num) => (char)(num < 10 ? num + '0' : num + ('A' - 10));

private static string EncodeHexString(byte[] sArray)
{
string result = null;
Expand All @@ -26,9 +22,9 @@ private static string EncodeHexString(byte[] sArray)
for (int i = 0, j = 0; i < sArray.Length; i++)
{
var digit = (sArray[i] & 0xf0) >> 4;
hexOrder[j++] = HexDigit(digit);
hexOrder[j++] = CharUtils.HexDigit(digit);
digit = sArray[i] & 0x0f;
hexOrder[j++] = HexDigit(digit);
hexOrder[j++] = CharUtils.HexDigit(digit);
}
result = new string(hexOrder);
return result;
Expand Down
157 changes: 58 additions & 99 deletions src/Elasticsearch.Net/Utf8Json/JsonWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ public void WriteString(string value)
// for JIT Optimization, for-loop i < str.Length
for (int i = 0; i < value.Length; i++)
{
byte escapeChar = default(byte);
byte escapeChar = default;
switch (value[i])
{
case '"':
Expand All @@ -414,105 +414,54 @@ public void WriteString(string value)
case '\t':
escapeChar = (byte)'t';
break;
// use switch jumptable
case (char)0:
case (char)1:
case (char)2:
case (char)3:
case (char)4:
case (char)5:
case (char)6:
case (char)7:
case (char)11:
case (char)14:
case (char)15:
case (char)16:
case (char)17:
case (char)18:
case (char)19:
case (char)20:
case (char)21:
case (char)22:
case (char)23:
case (char)24:
case (char)25:
case (char)26:
case (char)27:
case (char)28:
case (char)29:
case (char)30:
case (char)31:
case (char)32:
case (char)33:
case (char)35:
case (char)36:
case (char)37:
case (char)38:
case (char)39:
case (char)40:
case (char)41:
case (char)42:
case (char)43:
case (char)44:
case (char)45:
case (char)46:
case (char)47:
case (char)48:
case (char)49:
case (char)50:
case (char)51:
case (char)52:
case (char)53:
case (char)54:
case (char)55:
case (char)56:
case (char)57:
case (char)58:
case (char)59:
case (char)60:
case (char)61:
case (char)62:
case (char)63:
case (char)64:
case (char)65:
case (char)66:
case (char)67:
case (char)68:
case (char)69:
case (char)70:
case (char)71:
case (char)72:
case (char)73:
case (char)74:
case (char)75:
case (char)76:
case (char)77:
case (char)78:
case (char)79:
case (char)80:
case (char)81:
case (char)82:
case (char)83:
case (char)84:
case (char)85:
case (char)86:
case (char)87:
case (char)88:
case (char)89:
case (char)90:
case (char)91:
default:
continue;
}

max += 2;
case (char)0:
case (char)1:
case (char)2:
case (char)3:
case (char)4:
case (char)5:
case (char)6:
case (char)7:
case (char)11:
case (char)14:
case (char)15:
case (char)16:
case (char)17:
case (char)18:
case (char)19:
case (char)20:
case (char)21:
case (char)22:
case (char)23:
case (char)24:
case (char)25:
case (char)26:
case (char)27:
case (char)28:
case (char)29:
case (char)30:
case (char)31:
case '\u0085':
case '\u2028':
case '\u2029':
break;
default:
continue;
}

max += escapeChar == default ? 6 : 2;
BinaryUtil.EnsureCapacity(ref buffer, startoffset, max); // check +escape capacity

offset += StringEncoding.UTF8.GetBytes(value, from, i - from, buffer, offset);
offset += StringEncoding.UTF8.GetBytes(value, from, i - from, buffer, offset);
from = i + 1;
buffer[offset++] = (byte)'\\';
buffer[offset++] = escapeChar;
}

if (escapeChar == default)
ToUnicode(value[i], ref offset, buffer);
else
{
buffer[offset++] = (byte)'\\';
buffer[offset++] = escapeChar;
}
}

if (from != value.Length)
{
Expand All @@ -521,5 +470,15 @@ public void WriteString(string value)

buffer[offset++] = (byte)'\"';
}
}

private static void ToUnicode(char c, ref int offset, byte[] buffer)
{
buffer[offset++] = (byte)'\\';
buffer[offset++] = (byte)'u';
buffer[offset++] = (byte)CharUtils.HexDigit((c >> 12) & '\x000f');
buffer[offset++] = (byte)CharUtils.HexDigit((c >> 8) & '\x000f');
buffer[offset++] = (byte)CharUtils.HexDigit((c >> 4) & '\x000f');
buffer[offset++] = (byte)CharUtils.HexDigit(c & '\x000f');
}
}
}
24 changes: 24 additions & 0 deletions src/Tests/Tests.Reproduce/GithubIssue3743.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using System.Linq;
using Elastic.Xunit.XunitPlumbing;
using Elasticsearch.Net;
using FluentAssertions;
using Newtonsoft.Json;
using Tests.Core.Serialization;

namespace Tests.Reproduce
{
public class GithubIssue3743
{
[U]
public void SerializesUnicodeEscapeSequences()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

++

{
var doc = new { value = new string(Enumerable.Range(0, 9727).Select(i => (char)i).ToArray()) };

var internalJson = SerializationTester.Default.Client.SourceSerializer.SerializeToString(doc, formatting: SerializationFormatting.None);
var jsonNet = JsonConvert.SerializeObject(doc, Formatting.None);

// json.net lowercases unicode escape sequences, utf8json uppercases them (faster op). Both are valid and accepted
internalJson.Should().BeEquivalentTo(jsonNet);
}
}
}