Skip to content

Commit

Permalink
Merge pull request #998 from nahk-ivanov/alexiva/fix-json-utf32
Browse files Browse the repository at this point in the history
Fix JSON serialization for UTF-32 characters.

+semver:fix
  • Loading branch information
EdwardCooke authored Nov 10, 2024
2 parents 1055eb7 + 7333635 commit d2128b2
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 11 deletions.
9 changes: 9 additions & 0 deletions YamlDotNet.Test/Serialization/SerializationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,15 @@ public void SerializationOfAnchorWorksInJson()
.BeEquivalentTo(@"{""x"": {""z"": {""v"": ""1""}}, ""y"": {""k"": {""z"": {""v"": ""1""}}}}");
}

[Fact]
public void SerializationOfUtf32WorksInJson()
{
var obj = new { TestProperty = "Sea life \U0001F99E" };

SerializerBuilder.JsonCompatible().Build().Serialize(obj).Trim().Should()
.Be(@"{""TestProperty"": ""Sea life \uD83E\uDD9E""}");
}

[Fact]
// Todo: this is actually roundtrip
public void DeserializationOfDefaultsWorkInJson()
Expand Down
18 changes: 16 additions & 2 deletions YamlDotNet/Core/Emitter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public class Emitter : IEmitter
private bool isWhitespace;
private bool isIndentation;
private readonly bool forceIndentLess;
private readonly bool useUtf16SurrogatePair;
private readonly string newLine;

private bool isDocumentEndWritten;
Expand Down Expand Up @@ -148,6 +149,7 @@ public Emitter(TextWriter output, EmitterSettings settings)
this.maxSimpleKeyLength = settings.MaxSimpleKeyLength;
this.skipAnchorName = settings.SkipAnchorName;
this.forceIndentLess = !settings.IndentSequences;
this.useUtf16SurrogatePair = settings.UseUtf16SurrogatePairs;
this.newLine = settings.NewLine;

this.output = output;
Expand Down Expand Up @@ -1189,8 +1191,20 @@ private void WriteDoubleQuotedScalar(string value, bool allowBreaks)
{
if (index + 1 < value.Length && IsLowSurrogate(value[index + 1]))
{
Write('U');
Write(char.ConvertToUtf32(character, value[index + 1]).ToString("X08", CultureInfo.InvariantCulture));
if (useUtf16SurrogatePair)
{
Write('u');
Write(code.ToString("X04", CultureInfo.InvariantCulture));
Write('\\');
Write('u');
Write(((ushort)value[index + 1]).ToString("X04", CultureInfo.InvariantCulture));
}
else
{
Write('U');
Write(char.ConvertToUtf32(character, value[index + 1]).ToString("X08", CultureInfo.InvariantCulture));
}

index++;
}
else
Expand Down
44 changes: 37 additions & 7 deletions YamlDotNet/Core/EmitterSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,22 @@ public sealed class EmitterSettings
/// </summary>
public bool IndentSequences { get; }

/// <summary>
/// If true, then 4-byte UTF-32 characters are broken into two 2-byte code-points.
/// </summary>
/// <remarks>
/// This ensures compatibility with JSON format, as it does not allow '\Uxxxxxxxxx'
/// and instead expects two escaped 2-byte character '\uxxxx\uxxxx'.
/// </remarks>
public bool UseUtf16SurrogatePairs { get; }

public static readonly EmitterSettings Default = new EmitterSettings();

public EmitterSettings()
{
}

public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxSimpleKeyLength, bool skipAnchorName = false, bool indentSequences = false, string? newLine = null)
public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxSimpleKeyLength, bool skipAnchorName = false, bool indentSequences = false, string? newLine = null, bool useUtf16SurrogatePairs = false)
{
if (bestIndent < 2 || bestIndent > 9)
{
Expand All @@ -93,6 +102,7 @@ public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxS
SkipAnchorName = skipAnchorName;
IndentSequences = indentSequences;
NewLine = newLine ?? Environment.NewLine;
UseUtf16SurrogatePairs = useUtf16SurrogatePairs;
}

public EmitterSettings WithBestIndent(int bestIndent)
Expand All @@ -104,7 +114,8 @@ public EmitterSettings WithBestIndent(int bestIndent)
MaxSimpleKeyLength,
SkipAnchorName,
IndentSequences,
NewLine
NewLine,
UseUtf16SurrogatePairs
);
}

Expand All @@ -117,7 +128,8 @@ public EmitterSettings WithBestWidth(int bestWidth)
MaxSimpleKeyLength,
SkipAnchorName,
IndentSequences,
NewLine
NewLine,
UseUtf16SurrogatePairs
);
}

Expand All @@ -130,7 +142,8 @@ public EmitterSettings WithMaxSimpleKeyLength(int maxSimpleKeyLength)
maxSimpleKeyLength,
SkipAnchorName,
IndentSequences,
NewLine
NewLine,
UseUtf16SurrogatePairs
);
}

Expand All @@ -143,7 +156,8 @@ public EmitterSettings WithNewLine(string newLine)
MaxSimpleKeyLength,
SkipAnchorName,
IndentSequences,
newLine
newLine,
UseUtf16SurrogatePairs
);
}

Expand All @@ -167,7 +181,8 @@ public EmitterSettings WithoutAnchorName()
MaxSimpleKeyLength,
true,
IndentSequences,
NewLine
NewLine,
UseUtf16SurrogatePairs
);
}

Expand All @@ -180,7 +195,22 @@ public EmitterSettings WithIndentedSequences()
MaxSimpleKeyLength,
SkipAnchorName,
true,
NewLine
NewLine,
UseUtf16SurrogatePairs
);
}

public EmitterSettings WithUtf16SurrogatePairs()
{
return new EmitterSettings(
BestIndent,
BestWidth,
IsCanonical,
MaxSimpleKeyLength,
SkipAnchorName,
IndentSequences,
NewLine,
true
);
}
}
Expand Down
3 changes: 2 additions & 1 deletion YamlDotNet/Serialization/SerializerBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,8 @@ public SerializerBuilder JsonCompatible()
{
this.emitterSettings = this.emitterSettings
.WithMaxSimpleKeyLength(int.MaxValue)
.WithoutAnchorName();
.WithoutAnchorName()
.WithUtf16SurrogatePairs();

return this
.WithTypeConverter(new GuidConverter(true), w => w.InsteadOf<GuidConverter>())
Expand Down
3 changes: 2 additions & 1 deletion YamlDotNet/Serialization/StaticSerializerBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,8 @@ public StaticSerializerBuilder JsonCompatible()
{
this.emitterSettings = this.emitterSettings
.WithMaxSimpleKeyLength(int.MaxValue)
.WithoutAnchorName();
.WithoutAnchorName()
.WithUtf16SurrogatePairs();

return this
.WithTypeConverter(new GuidConverter(true), w => w.InsteadOf<GuidConverter>())
Expand Down

0 comments on commit d2128b2

Please sign in to comment.