Skip to content

Commit

Permalink
Merge branch '2.19'
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Nov 15, 2024
2 parents 47b03bd + 788eb90 commit 9253f9f
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 20 deletions.
5 changes: 5 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,11 @@ Zhanghao (@zhangOranges)
* Contributed #1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding
(2.18.0)

Justin Gosselin (@jgosselin-accesso)
* Reported #1359: Non-surrogate characters being incorrectly combined when
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
(2.18.2)

Eduard Gomoliako (@Gems)
* Contributed #1356: Make `JsonGenerator::writeTypePrefix` method to not write a
`WRAPPER_ARRAY` when `typeIdDef.id == null`
Expand Down
4 changes: 4 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ a pure JSON library.

#1353: Use fastdoubleparser 1.0.90
(fixed by @pjfanning)
#1359: Non-surrogate characters being incorrectly combined when
`JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled
(reported by Justin G)
(fixed by @pjfanning)

2.18.0 (26-Sep-2024)

Expand Down
10 changes: 6 additions & 4 deletions src/main/java/tools/jackson/core/json/UTF8JsonGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ public UTF8JsonGenerator(ObjectWriteContext writeCtxt, IOContext ioCtxt,
_bufferRecyclable = true;
_outputBuffer = ioCtxt.allocWriteEncodingBuffer();
_outputEnd = _outputBuffer.length;

// To be exact, each char can take up to 6 bytes when escaped (Unicode
// escape with backslash, 'u' and 4 hex digits); but to avoid fluctuation,
// we will actually round down to only do up to 1/8 number of chars
Expand Down Expand Up @@ -1546,7 +1547,7 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// 3- or 4-byte character
if (_isSurrogateChar(ch)) {
if (_isStartOfSurrogatePair(ch)) {
final boolean combineSurrogates = JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_formatWriteFeatures);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
Expand Down Expand Up @@ -1594,7 +1595,7 @@ private final void _writeStringSegment2(final String text, int offset, final int
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
} else {
// 3- or 4-byte character
if (_isSurrogateChar(ch)) {
if (_isStartOfSurrogatePair(ch)) {
final boolean combineSurrogates = JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_formatWriteFeatures);
if (combineSurrogates && offset < end) {
char highSurrogate = (char) ch;
Expand Down Expand Up @@ -2297,8 +2298,9 @@ private byte[] getHexBytes() {
}

// @since 2.18
private boolean _isSurrogateChar(int ch) {
return (ch & 0xD800) == 0xD800;
private static boolean _isStartOfSurrogatePair(final int ch) {
// In 0xD800 - 0xDBFF range?
return (ch & 0xFC00) == 0xD800;
}
}

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package tools.jackson.core.json;
package tools.jackson.core.write;

import java.io.ByteArrayOutputStream;
import java.io.StringWriter;
Expand All @@ -7,11 +7,14 @@
import org.junit.jupiter.api.Test;

import tools.jackson.core.*;
import tools.jackson.core.json.JsonFactory;
import tools.jackson.core.json.JsonWriteFeature;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

class Surrogate223Test extends JUnit5TestBase
class SurrogateWrite223Test extends JUnit5TestBase
{
private final JsonFactory DEFAULT_JSON_F = newStreamFactory();

Expand Down Expand Up @@ -69,25 +72,54 @@ void surrogatesByteBacked() throws Exception
@Test
void surrogatesCharBacked() throws Exception
{
Writer out;
JsonGenerator g;
final String toQuote = new String(Character.toChars(0x1F602));
assertEquals(2, toQuote.length()); // just sanity check

out = new StringWriter();
g = DEFAULT_JSON_F.createGenerator(ObjectWriteContext.empty(), out);
g.writeStartArray();
g.writeString(toQuote);
g.writeEndArray();
g.close();
Writer out = new StringWriter();
try (JsonGenerator g = DEFAULT_JSON_F.createGenerator(ObjectWriteContext.empty(), out)) {
g.writeStartArray();
g.writeString(toQuote);
g.writeEndArray();
}
assertEquals(2 + 2 + 2, out.toString().length()); // brackets, quotes, 2 chars as is

// Also parse back to ensure correctness
JsonParser p = DEFAULT_JSON_F.createParser(ObjectReadContext.empty(), out.toString());
assertToken(JsonToken.START_ARRAY, p.nextToken());
assertToken(JsonToken.VALUE_STRING, p.nextToken());
assertEquals(toQuote, p.getText());
assertToken(JsonToken.END_ARRAY, p.nextToken());
p.close();
try (JsonParser p = DEFAULT_JSON_F.createParser(ObjectReadContext.empty(), out.toString())) {
assertToken(JsonToken.START_ARRAY, p.nextToken());
assertToken(JsonToken.VALUE_STRING, p.nextToken());
assertEquals(toQuote, p.getText());
assertToken(JsonToken.END_ARRAY, p.nextToken());
}
}

//https://github.com/FasterXML/jackson-core/issues/1359
@Test
void checkNonSurrogates() throws Exception {
JsonFactory f = JsonFactory.builder()
.enable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8)
.build();
ByteArrayOutputStream out = new ByteArrayOutputStream();
try (JsonGenerator gen = f.createGenerator(ObjectWriteContext.empty(), out)) {
gen.writeStartObject();

// Inside the BMP, beyond surrogate block; 0xFF0C - full-width comma
gen.writeStringProperty("test_full_width", "foo" + new String(Character.toChars(0xFF0C)) + "bar");

// Inside the BMP, beyond surrogate block; 0xFE6A - small form percent
gen.writeStringProperty("test_small_form", "foo" + new String(Character.toChars(0xFE6A)) + "bar");

// Inside the BMP, before the surrogate block; 0x3042 - Hiragana A
gen.writeStringProperty("test_hiragana", "foo" + new String(Character.toChars(0x3042)) + "bar");

// Outside the BMP; 0x1F60A - emoji
gen.writeStringProperty("test_emoji", new String(Character.toChars(0x1F60A)));

gen.writeEndObject();
}
String json = out.toString("UTF-8");
assertTrue(json.contains("foo\uFF0Cbar"));
assertTrue(json.contains("foo\uFE6Abar"));
assertTrue(json.contains("foo\u3042bar"));
assertTrue(json.contains("\"test_emoji\":\"\uD83D\uDE0A\""));
}
}

0 comments on commit 9253f9f

Please sign in to comment.