From 7e9160110f3cac233e748f4f31c2ec22f0286f6b Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Mon, 4 Dec 2023 15:42:51 +1100 Subject: [PATCH] Sub-query should terminate on a combinator after any clause Missed setting seenCombinator for attributes / sub-clauses. Fixes #2073 Regression in: d126488db626790a69ce99bb96bf3cfe9c591534 Also added EvaluatorDebug which creates a XML outline and a sexpr for the query, to aid visualizing the query. Might make it public and add it to try.jsoup.org. --- CHANGES.md | 4 + .../java/org/jsoup/select/QueryParser.java | 18 +-- .../java/org/jsoup/select/EvaluatorDebug.java | 83 +++++++++++++ .../org/jsoup/select/QueryParserTest.java | 115 ++++++++++++------ .../java/org/jsoup/select/SelectorTest.java | 10 ++ 5 files changed, 186 insertions(+), 44 deletions(-) create mode 100644 src/test/java/org/jsoup/select/EvaluatorDebug.java diff --git a/CHANGES.md b/CHANGES.md index a9fa0a289f..3c1b3f1850 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,6 +17,10 @@ thrown. [2068](https://github.com/jhy/jsoup/issues/2068) * A multi-point encoded emoji entity may be incorrectly decoded to the replacement character. [2074](https://github.com/jhy/jsoup/issues/2074) +* (Regression) in a selector like `parent [attr=va], other`, the `, OR` was binding to `[attr=va]` instead of + `parent [attr=va]`, causing incorrect selections. The fix includes a EvaluatorDebug class that generates a sexpr + to represent the query, allowing simpler and more thorough query parse + tests. [2073](https://github.com/jhy/jsoup/issues/2073) --- Older changes for versions 0.1.1 (2010-Jan-31) through 1.17.1 (2023-Nov-27) may be found in diff --git a/src/main/java/org/jsoup/select/QueryParser.java b/src/main/java/org/jsoup/select/QueryParser.java index b495336602..ed003cc400 100644 --- a/src/main/java/org/jsoup/select/QueryParser.java +++ b/src/main/java/org/jsoup/select/QueryParser.java @@ -145,21 +145,21 @@ private void combinator(char combinator) { private String consumeSubQuery() { StringBuilder sq = StringUtil.borrowBuilder(); - boolean seenNonCombinator = false; // eat until we hit a combinator after eating something else + boolean seenClause = false; // eat until we hit a combinator after eating something else while (!tq.isEmpty()) { + if (tq.matchesAny(Combinators)) { + if (seenClause) + break; + sq.append(tq.consume()); + continue; + } + seenClause = true; if (tq.matches("(")) sq.append("(").append(tq.chompBalanced('(', ')')).append(")"); else if (tq.matches("[")) sq.append("[").append(tq.chompBalanced('[', ']')).append("]"); - else if (tq.matchesAny(Combinators)) - if (seenNonCombinator) - break; - else - sq.append(tq.consume()); - else { - seenNonCombinator = true; + else sq.append(tq.consume()); - } } return StringUtil.releaseBuilder(sq); } diff --git a/src/test/java/org/jsoup/select/EvaluatorDebug.java b/src/test/java/org/jsoup/select/EvaluatorDebug.java new file mode 100644 index 0000000000..7174ce58f2 --- /dev/null +++ b/src/test/java/org/jsoup/select/EvaluatorDebug.java @@ -0,0 +1,83 @@ +package org.jsoup.select; + +import org.jsoup.internal.StringUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; + +public class EvaluatorDebug { + + /** + Cast an Evaluator into a pseudo Document, to help visualize the query. Quite coupled to the current impl. + */ + public static Document asDocument(Evaluator eval) { + Document doc = new Document(null); + doc.outputSettings().outline(true).indentAmount(2); + + Element el = asElement(eval); + doc.appendChild(el); + + return doc; + } + + public static Document asDocument(String query) { + Evaluator eval = QueryParser.parse(query); + return asDocument(eval); + } + + public static Element asElement(Evaluator eval) { + Class evalClass = eval.getClass(); + Element el = new Element(evalClass.getSimpleName()); + el.attr("css", eval.toString()); + el.attr("cost", Integer.toString(eval.cost())); + + if (eval instanceof CombiningEvaluator) { + for (Evaluator inner : ((CombiningEvaluator) eval).sortedEvaluators) { + el.appendChild(asElement(inner)); + } + } else if (eval instanceof StructuralEvaluator.ImmediateParentRun) { + for (Evaluator inner : ((StructuralEvaluator.ImmediateParentRun) eval).evaluators) { + el.appendChild(asElement(inner)); + } + } else if (eval instanceof StructuralEvaluator) { + Evaluator inner = ((StructuralEvaluator) eval).evaluator; + el.appendChild(asElement(inner)); + } + + return el; + } + + public static String sexpr(String query) { + Document doc = asDocument(query); + + SexprVisitor sv = new SexprVisitor(); + doc.childNode(0).traverse(sv); // skip outer #document + return sv.result(); + } + + static class SexprVisitor implements NodeVisitor { + StringBuilder sb = StringUtil.borrowBuilder(); + + @Override public void head(Node node, int depth) { + sb + .append('(') + .append(node.nodeName()); + + if (node.childNodeSize() == 0) + sb + .append(" '") + .append(node.attr("css")) + .append("'"); + else + sb.append(" "); + } + + @Override public void tail(Node node, int depth) { + sb.append(')'); + } + + String result() { + return StringUtil.releaseBuilder(sb); + } + } +} diff --git a/src/test/java/org/jsoup/select/QueryParserTest.java b/src/test/java/org/jsoup/select/QueryParserTest.java index 51b7c925d2..aa2ca80461 100644 --- a/src/test/java/org/jsoup/select/QueryParserTest.java +++ b/src/test/java/org/jsoup/select/QueryParserTest.java @@ -4,6 +4,7 @@ import org.jsoup.nodes.Document; import org.junit.jupiter.api.Test; +import static org.jsoup.select.EvaluatorDebug.sexpr; import static org.junit.jupiter.api.Assertions.*; /** @@ -26,49 +27,80 @@ public class QueryParserTest { @Test public void testImmediateParentRun() { String query = "div > p > bold.brass"; - Evaluator eval1 = QueryParser.parse(query); - assertEquals(query, eval1.toString()); - - StructuralEvaluator.ImmediateParentRun run = (StructuralEvaluator.ImmediateParentRun) eval1; - assertTrue(run.evaluators.get(0) instanceof Evaluator.Tag); - assertTrue(run.evaluators.get(1) instanceof Evaluator.Tag); - assertTrue(run.evaluators.get(2) instanceof CombiningEvaluator.And); + assertEquals("(ImmediateParentRun (Tag 'div')(Tag 'p')(And (Tag 'bold')(Class '.brass')))", sexpr(query)); + + /* + + + + + + + + + */ } @Test public void testOrGetsCorrectPrecedence() { // tests that a selector "a b, c d, e f" evals to (a AND b) OR (c AND d) OR (e AND f)" // top level or, three child ands - Evaluator eval = QueryParser.parse("a b, c d, e f"); - assertTrue(eval instanceof CombiningEvaluator.Or); - CombiningEvaluator.Or or = (CombiningEvaluator.Or) eval; - assertEquals(3, or.evaluators.size()); - for (Evaluator innerEval: or.evaluators) { - assertTrue(innerEval instanceof CombiningEvaluator.And); - CombiningEvaluator.And and = (CombiningEvaluator.And) innerEval; - assertEquals(2, and.evaluators.size()); - assertTrue(and.evaluators.get(0) instanceof StructuralEvaluator.Parent); - assertTrue(and.evaluators.get(1) instanceof Evaluator.Tag); - } + String query = "a b, c d, e f"; + String parsed = sexpr(query); + assertEquals("(Or (And (Tag 'b')(Parent (Tag 'a')))(And (Tag 'd')(Parent (Tag 'c')))(And (Tag 'f')(Parent (Tag 'e'))))", parsed); + + /* + + + + + + + + + + + + + + + + + + + + + */ } @Test public void testParsesMultiCorrectly() { - String query = ".foo.qux > ol.bar, ol > li + li"; - Evaluator eval = QueryParser.parse(query); - assertTrue(eval instanceof CombiningEvaluator.Or); - CombiningEvaluator.Or or = (CombiningEvaluator.Or) eval; - assertEquals(2, or.evaluators.size()); - - StructuralEvaluator.ImmediateParentRun run = (StructuralEvaluator.ImmediateParentRun) or.evaluators.get(0); - CombiningEvaluator.And andRight = (CombiningEvaluator.And) or.evaluators.get(1); - - assertEquals(".foo.qux > ol.bar", run.toString()); - assertEquals(2, run.evaluators.size()); - Evaluator runAnd = run.evaluators.get(0); - assertTrue(runAnd instanceof CombiningEvaluator.And); - assertEquals(".foo.qux", runAnd.toString()); - assertEquals("ol > li + li", andRight.toString()); - assertEquals(2, andRight.evaluators.size()); - assertEquals(query, eval.toString()); + String query = ".foo.qux[attr=bar] > ol.bar, ol > li + li"; + String parsed = sexpr(query); + assertEquals("(Or (And (Tag 'li')(ImmediatePreviousSibling (ImmediateParentRun (Tag 'ol')(Tag 'li'))))(ImmediateParentRun (And (AttributeWithValue '[attr=bar]')(Class '.foo')(Class '.qux'))(And (Tag 'ol')(Class '.bar'))))", parsed); + + /* + + + + + + + + + + + + + + + + + + + + + + + */ } @Test public void exceptionOnUncloseAttribute() { @@ -97,5 +129,18 @@ public class QueryParserTest { String q = "a:not(:has(span.foo)) b d > e + f ~ g"; Evaluator parse = QueryParser.parse(q); assertEquals(q, parse.toString()); + String parsed = sexpr(q); + assertEquals("(And (Tag 'g')(PreviousSibling (And (Tag 'f')(ImmediatePreviousSibling (ImmediateParentRun (And (Tag 'd')(Parent (And (Tag 'b')(Parent (And (Tag 'a')(Not (Has (And (Tag 'span')(Class '.foo')))))))))(Tag 'e'))))))", parsed); + } + + @Test public void parsesOrAfterAttribute() { + // https://github.com/jhy/jsoup/issues/2073 + String q = "#parent [class*=child], .some-other-selector .nested"; + String parsed = sexpr(q); + assertEquals("(Or (And (Parent (Id '#parent'))(AttributeWithValueContaining '[class*=child]'))(And (Class '.nested')(Parent (Class '.some-other-selector'))))", parsed); + + assertEquals("(Or (Class '.some-other-selector')(And (Parent (Id '#parent'))(AttributeWithValueContaining '[class*=child]')))", sexpr("#parent [class*=child], .some-other-selector")); + assertEquals("(Or (Class '.some-other-selector')(And (Id '#el')(AttributeWithValueContaining '[class*=child]')))", sexpr("#el[class*=child], .some-other-selector")); + assertEquals("(Or (And (Parent (Id '#parent'))(AttributeWithValueContaining '[class*=child]'))(And (Class '.nested')(Parent (Class '.some-other-selector'))))", sexpr("#parent [class*=child], .some-other-selector .nested")); } } diff --git a/src/test/java/org/jsoup/select/SelectorTest.java b/src/test/java/org/jsoup/select/SelectorTest.java index cd2f519a27..fd5740e41c 100644 --- a/src/test/java/org/jsoup/select/SelectorTest.java +++ b/src/test/java/org/jsoup/select/SelectorTest.java @@ -1220,4 +1220,14 @@ public void wildcardNamespaceMatchesNoNamespace() { Evaluator parse = QueryParser.parse(query); assertEquals(query, parse.toString()); } + + @Test public void orAfterClass() { + // see also QueryParserTest#parsesOrAfterAttribute + // https://github.com/jhy/jsoup/issues/2073 + Document doc = Jsoup.parse("
"); + String q = "#parent [class*=child], .some-other-selector .nested"; + assertEquals("(Or (And (Parent (Id '#parent'))(AttributeWithValueContaining '[class*=child]'))(And (Class '.nested')(Parent (Class '.some-other-selector'))))", EvaluatorDebug.sexpr(q)); + Elements els = doc.select(q); + assertEquals(3, els.size()); + } }