From 8e8970650951cef63bae78dd41ba45c98a7d75a5 Mon Sep 17 00:00:00 2001 From: Talgat <31318590+TalgatAkhm@users.noreply.github.com> Date: Tue, 12 Sep 2023 04:10:54 +0300 Subject: [PATCH] Fix NPE in escaping if output settings cloned twice (#1964) * Fix NPE in escaping if output settings cloned twice * Refactored coreCharset to be set in charset() --------- Co-authored-by: Jonathan Hedley --- CHANGES | 3 +++ src/main/java/org/jsoup/nodes/Document.java | 12 +++++++----- src/test/java/org/jsoup/nodes/EntitiesTest.java | 12 ++++++++++++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/CHANGES b/CHANGES index f8c1966db9..989650e889 100644 --- a/CHANGES +++ b/CHANGES @@ -31,6 +31,9 @@ Release 1.16.2 [PENDING] * Bugfix: `form` elements and empty elements (such as `img`) did not have their attributes de-duplicated. + * Bugfix: if Document.OutputSettings was cloned from a clone, an NPE would be thrown when used. + + * Bugfix: in Jsoup.connect(url), URL paths containing a %2B were incorrectly recoded to a '+', or a '+' was recoded to a ' '. Fixed by reverting to the previous behavior of not encoding supplied paths, other than normalizing to ASCII. diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java index 70c571f802..9930dc5e24 100644 --- a/src/main/java/org/jsoup/nodes/Document.java +++ b/src/main/java/org/jsoup/nodes/Document.java @@ -385,9 +385,9 @@ public static class OutputSettings implements Cloneable { public enum Syntax {html, xml} private Entities.EscapeMode escapeMode = Entities.EscapeMode.base; - private Charset charset = DataUtil.UTF_8; + private Charset charset; + Entities.CoreCharset coreCharset; // fast encoders for ascii and utf8 private final ThreadLocal encoderThreadLocal = new ThreadLocal<>(); // initialized by start of OuterHtmlVisitor - @Nullable Entities.CoreCharset coreCharset; // fast encoders for ascii and utf8 private boolean prettyPrint = true; private boolean outline = false; @@ -395,7 +395,9 @@ public enum Syntax {html, xml} private int maxPaddingWidth = 30; private Syntax syntax = Syntax.html; - public OutputSettings() {} + public OutputSettings() { + charset(DataUtil.UTF_8); + } /** * Get the document's current HTML escape mode: base, which provides a limited set of named HTML @@ -439,6 +441,7 @@ public Charset charset() { */ public OutputSettings charset(Charset charset) { this.charset = charset; + coreCharset = Entities.CoreCharset.byName(charset.name()); return this; } @@ -456,7 +459,6 @@ CharsetEncoder prepareEncoder() { // created at start of OuterHtmlVisitor so each pass has own encoder, so OutputSettings can be shared among threads CharsetEncoder encoder = charset.newEncoder(); encoderThreadLocal.set(encoder); - coreCharset = Entities.CoreCharset.byName(encoder.charset().name()); return encoder; } @@ -570,7 +572,7 @@ public OutputSettings clone() { } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } - clone.charset(charset.name()); // new charset and charset encoder + clone.charset(charset.name()); // new charset, coreCharset, and charset encoder clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name()); // indentAmount, maxPaddingWidth, and prettyPrint are primitives so object.clone() will handle return clone; diff --git a/src/test/java/org/jsoup/nodes/EntitiesTest.java b/src/test/java/org/jsoup/nodes/EntitiesTest.java index 886dfca525..7243c2f55c 100644 --- a/src/test/java/org/jsoup/nodes/EntitiesTest.java +++ b/src/test/java/org/jsoup/nodes/EntitiesTest.java @@ -6,6 +6,7 @@ import static org.jsoup.nodes.Document.OutputSettings; import static org.jsoup.nodes.Entities.EscapeMode.*; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; public class EntitiesTest { @@ -163,4 +164,15 @@ public class EntitiesTest { Document xml = Jsoup.parse(input, "", Parser.xmlParser()); assertEquals(input, xml.html()); } + + @Test public void escapeByClonedOutputSettings() { + OutputSettings outputSettings = new OutputSettings(); + String text = "Hello &<> Å å π 新 there ¾ © »"; + OutputSettings clone1 = outputSettings.clone(); + OutputSettings clone2 = outputSettings.clone(); + + String escaped1 = assertDoesNotThrow(() -> Entities.escape(text, clone1)); + String escaped2 = assertDoesNotThrow(() -> Entities.escape(text, clone2)); + assertEquals(escaped1, escaped2); + } }