diff --git a/YamlDotNet.Test/Serialization/SerializationTests.cs b/YamlDotNet.Test/Serialization/SerializationTests.cs index 7bff67b1..1088f0ad 100644 --- a/YamlDotNet.Test/Serialization/SerializationTests.cs +++ b/YamlDotNet.Test/Serialization/SerializationTests.cs @@ -886,6 +886,15 @@ public void SerializationOfAnchorWorksInJson() .BeEquivalentTo(@"{""x"": {""z"": {""v"": ""1""}}, ""y"": {""k"": {""z"": {""v"": ""1""}}}}"); } + [Fact] + public void SerializationOfUtf32WorksInJson() + { + var obj = new { TestProperty = "Sea life \U0001F99E" }; + + SerializerBuilder.JsonCompatible().Build().Serialize(obj).Trim().Should() + .Be(@"{""TestProperty"": ""Sea life \uD83E\uDD9E""}"); + } + [Fact] // Todo: this is actually roundtrip public void DeserializationOfDefaultsWorkInJson() diff --git a/YamlDotNet/Core/Emitter.cs b/YamlDotNet/Core/Emitter.cs index 01824d10..7931c8a0 100644 --- a/YamlDotNet/Core/Emitter.cs +++ b/YamlDotNet/Core/Emitter.cs @@ -66,6 +66,7 @@ public class Emitter : IEmitter private bool isWhitespace; private bool isIndentation; private readonly bool forceIndentLess; + private readonly bool useUtf16SurrogatePair; private readonly string newLine; private bool isDocumentEndWritten; @@ -148,6 +149,7 @@ public Emitter(TextWriter output, EmitterSettings settings) this.maxSimpleKeyLength = settings.MaxSimpleKeyLength; this.skipAnchorName = settings.SkipAnchorName; this.forceIndentLess = !settings.IndentSequences; + this.useUtf16SurrogatePair = settings.UseUtf16SurrogatePairs; this.newLine = settings.NewLine; this.output = output; @@ -1189,8 +1191,20 @@ private void WriteDoubleQuotedScalar(string value, bool allowBreaks) { if (index + 1 < value.Length && IsLowSurrogate(value[index + 1])) { - Write('U'); - Write(char.ConvertToUtf32(character, value[index + 1]).ToString("X08", CultureInfo.InvariantCulture)); + if (useUtf16SurrogatePair) + { + Write('u'); + Write(code.ToString("X04", CultureInfo.InvariantCulture)); + Write('\\'); + Write('u'); + Write(((ushort)value[index + 1]).ToString("X04", CultureInfo.InvariantCulture)); + } + else + { + Write('U'); + Write(char.ConvertToUtf32(character, value[index + 1]).ToString("X08", CultureInfo.InvariantCulture)); + } + index++; } else diff --git a/YamlDotNet/Core/EmitterSettings.cs b/YamlDotNet/Core/EmitterSettings.cs index cf44f15c..57ee975d 100644 --- a/YamlDotNet/Core/EmitterSettings.cs +++ b/YamlDotNet/Core/EmitterSettings.cs @@ -63,13 +63,22 @@ public sealed class EmitterSettings /// public bool IndentSequences { get; } + /// + /// If true, then 4-byte UTF-32 characters are broken into two 2-byte code-points. + /// + /// + /// This ensures compatibility with JSON format, as it does not allow '\Uxxxxxxxxx' + /// and instead expects two escaped 2-byte character '\uxxxx\uxxxx'. + /// + public bool UseUtf16SurrogatePairs { get; } + public static readonly EmitterSettings Default = new EmitterSettings(); public EmitterSettings() { } - public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxSimpleKeyLength, bool skipAnchorName = false, bool indentSequences = false, string? newLine = null) + public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxSimpleKeyLength, bool skipAnchorName = false, bool indentSequences = false, string? newLine = null, bool useUtf16SurrogatePairs = false) { if (bestIndent < 2 || bestIndent > 9) { @@ -93,6 +102,7 @@ public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxS SkipAnchorName = skipAnchorName; IndentSequences = indentSequences; NewLine = newLine ?? Environment.NewLine; + UseUtf16SurrogatePairs = useUtf16SurrogatePairs; } public EmitterSettings WithBestIndent(int bestIndent) @@ -104,7 +114,8 @@ public EmitterSettings WithBestIndent(int bestIndent) MaxSimpleKeyLength, SkipAnchorName, IndentSequences, - NewLine + NewLine, + UseUtf16SurrogatePairs ); } @@ -117,7 +128,8 @@ public EmitterSettings WithBestWidth(int bestWidth) MaxSimpleKeyLength, SkipAnchorName, IndentSequences, - NewLine + NewLine, + UseUtf16SurrogatePairs ); } @@ -130,7 +142,8 @@ public EmitterSettings WithMaxSimpleKeyLength(int maxSimpleKeyLength) maxSimpleKeyLength, SkipAnchorName, IndentSequences, - NewLine + NewLine, + UseUtf16SurrogatePairs ); } @@ -143,7 +156,8 @@ public EmitterSettings WithNewLine(string newLine) MaxSimpleKeyLength, SkipAnchorName, IndentSequences, - newLine + newLine, + UseUtf16SurrogatePairs ); } @@ -167,7 +181,8 @@ public EmitterSettings WithoutAnchorName() MaxSimpleKeyLength, true, IndentSequences, - NewLine + NewLine, + UseUtf16SurrogatePairs ); } @@ -180,7 +195,22 @@ public EmitterSettings WithIndentedSequences() MaxSimpleKeyLength, SkipAnchorName, true, - NewLine + NewLine, + UseUtf16SurrogatePairs + ); + } + + public EmitterSettings WithUtf16SurrogatePairs() + { + return new EmitterSettings( + BestIndent, + BestWidth, + IsCanonical, + MaxSimpleKeyLength, + SkipAnchorName, + IndentSequences, + NewLine, + true ); } } diff --git a/YamlDotNet/Serialization/SerializerBuilder.cs b/YamlDotNet/Serialization/SerializerBuilder.cs index bae3896d..ec9a7feb 100755 --- a/YamlDotNet/Serialization/SerializerBuilder.cs +++ b/YamlDotNet/Serialization/SerializerBuilder.cs @@ -366,7 +366,8 @@ public SerializerBuilder JsonCompatible() { this.emitterSettings = this.emitterSettings .WithMaxSimpleKeyLength(int.MaxValue) - .WithoutAnchorName(); + .WithoutAnchorName() + .WithUtf16SurrogatePairs(); return this .WithTypeConverter(new GuidConverter(true), w => w.InsteadOf()) diff --git a/YamlDotNet/Serialization/StaticSerializerBuilder.cs b/YamlDotNet/Serialization/StaticSerializerBuilder.cs index 4c165fbc..76726701 100644 --- a/YamlDotNet/Serialization/StaticSerializerBuilder.cs +++ b/YamlDotNet/Serialization/StaticSerializerBuilder.cs @@ -370,7 +370,8 @@ public StaticSerializerBuilder JsonCompatible() { this.emitterSettings = this.emitterSettings .WithMaxSimpleKeyLength(int.MaxValue) - .WithoutAnchorName(); + .WithoutAnchorName() + .WithUtf16SurrogatePairs(); return this .WithTypeConverter(new GuidConverter(true), w => w.InsteadOf())