From 68ce79ac71d2887636c1cecdd85452b17ef3671d Mon Sep 17 00:00:00 2001 From: ahmetzjanovta Date: Sat, 3 Jun 2023 18:59:17 +0300 Subject: [PATCH 1/2] Fix NPE in escaping if output settings cloned twice --- src/main/java/org/jsoup/nodes/Document.java | 1 + src/test/java/org/jsoup/nodes/EntitiesTest.java | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java index 273e33aabd..575289778c 100644 --- a/src/main/java/org/jsoup/nodes/Document.java +++ b/src/main/java/org/jsoup/nodes/Document.java @@ -565,6 +565,7 @@ public OutputSettings clone() { throw new RuntimeException(e); } clone.charset(charset.name()); // new charset and charset encoder + clone.coreCharset = Entities.CoreCharset.byName(charset.name()); clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name()); // indentAmount, maxPaddingWidth, and prettyPrint are primitives so object.clone() will handle return clone; diff --git a/src/test/java/org/jsoup/nodes/EntitiesTest.java b/src/test/java/org/jsoup/nodes/EntitiesTest.java index 886dfca525..7243c2f55c 100644 --- a/src/test/java/org/jsoup/nodes/EntitiesTest.java +++ b/src/test/java/org/jsoup/nodes/EntitiesTest.java @@ -6,6 +6,7 @@ import static org.jsoup.nodes.Document.OutputSettings; import static org.jsoup.nodes.Entities.EscapeMode.*; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; public class EntitiesTest { @@ -163,4 +164,15 @@ public class EntitiesTest { Document xml = Jsoup.parse(input, "", Parser.xmlParser()); assertEquals(input, xml.html()); } + + @Test public void escapeByClonedOutputSettings() { + OutputSettings outputSettings = new OutputSettings(); + String text = "Hello &<> Å å π 新 there ¾ © »"; + OutputSettings clone1 = outputSettings.clone(); + OutputSettings clone2 = outputSettings.clone(); + + String escaped1 = assertDoesNotThrow(() -> Entities.escape(text, clone1)); + String escaped2 = assertDoesNotThrow(() -> Entities.escape(text, clone2)); + assertEquals(escaped1, escaped2); + } } From 3fdfb64ddd28f65753592bac57f466f521d802af Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Tue, 12 Sep 2023 11:07:09 +1000 Subject: [PATCH 2/2] Refactored coreCharset to be set in charset() --- CHANGES | 3 +++ src/main/java/org/jsoup/nodes/Document.java | 13 +++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/CHANGES b/CHANGES index 46bb3a3ceb..e803dd39dd 100644 --- a/CHANGES +++ b/CHANGES @@ -22,6 +22,9 @@ Release 1.16.2 [PENDING] * Bugfix: `form` elements and empty elements (such as `img`) did not have their attributes de-duplicated. + * Bugfix: if Document.OutputSettings was cloned from a clone, an NPE would be thrown when used. + + * Change: removed previously deprecated methods Document#normalise, Element#forEach(org.jsoup.helper.Consumer<>), Node#forEach(org.jsoup.helper.Consumer<>), and the org.jsoup.helper.Consumer interface; the latter being a previously required compatibility shim prior to Android's de-sugaring support. diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java index 575289778c..20ff8c74d2 100644 --- a/src/main/java/org/jsoup/nodes/Document.java +++ b/src/main/java/org/jsoup/nodes/Document.java @@ -379,9 +379,9 @@ public static class OutputSettings implements Cloneable { public enum Syntax {html, xml} private Entities.EscapeMode escapeMode = Entities.EscapeMode.base; - private Charset charset = DataUtil.UTF_8; + private Charset charset; + Entities.CoreCharset coreCharset; // fast encoders for ascii and utf8 private final ThreadLocal encoderThreadLocal = new ThreadLocal<>(); // initialized by start of OuterHtmlVisitor - @Nullable Entities.CoreCharset coreCharset; // fast encoders for ascii and utf8 private boolean prettyPrint = true; private boolean outline = false; @@ -389,7 +389,9 @@ public enum Syntax {html, xml} private int maxPaddingWidth = 30; private Syntax syntax = Syntax.html; - public OutputSettings() {} + public OutputSettings() { + charset(DataUtil.UTF_8); + } /** * Get the document's current HTML escape mode: base, which provides a limited set of named HTML @@ -433,6 +435,7 @@ public Charset charset() { */ public OutputSettings charset(Charset charset) { this.charset = charset; + coreCharset = Entities.CoreCharset.byName(charset.name()); return this; } @@ -450,7 +453,6 @@ CharsetEncoder prepareEncoder() { // created at start of OuterHtmlVisitor so each pass has own encoder, so OutputSettings can be shared among threads CharsetEncoder encoder = charset.newEncoder(); encoderThreadLocal.set(encoder); - coreCharset = Entities.CoreCharset.byName(encoder.charset().name()); return encoder; } @@ -564,8 +566,7 @@ public OutputSettings clone() { } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } - clone.charset(charset.name()); // new charset and charset encoder - clone.coreCharset = Entities.CoreCharset.byName(charset.name()); + clone.charset(charset.name()); // new charset, coreCharset, and charset encoder clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name()); // indentAmount, maxPaddingWidth, and prettyPrint are primitives so object.clone() will handle return clone;