Skip to content

Commit

Permalink
Merge branch 'master' into escaping-npe-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jhy committed Sep 12, 2023
2 parents 3fdfb64 + 23573ef commit f99b874
Show file tree
Hide file tree
Showing 20 changed files with 343 additions and 213 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ target/
.settings/
*Thrash*
bin/
.vscode/
25 changes: 25 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,37 @@ Release 1.16.2 [PENDING]
lastElementSibling, firstElementChild, and lastElementChild. They now inplace filter/skip in the child-node list, vs
having to allocate and scan a complete Element filtered list.

* Improvement: optimized internal methods that previously called Element.children() to use filter/skip child-node list
accessors instead, reducing new Element List allocations.

* Improvement: tweaked the performance of parsing :pseudo selectors.

* Improvement: when using the `:empty` pseudo-selector, blank textnodes are now considered empty. Previously,
an element containing any whitespace was not considered empty.
<https://github.com/jhy/jsoup/issues/1976>

* Bugfix: `form` elements and empty elements (such as `img`) did not have their attributes de-duplicated.
<https://github.com/jhy/jsoup/pull/1950>

* Bugfix: if Document.OutputSettings was cloned from a clone, an NPE would be thrown when used.
<https://github.com/jhy/jsoup/pull/1964>

* Bugfix: in Jsoup.connect(url), URL paths containing a %2B were incorrectly recoded to a '+', or a '+' was recoded
to a ' '. Fixed by reverting to the previous behavior of not encoding supplied paths, other than normalizing to
ASCII.
<https://github.com/jhy/jsoup/issues/1952>

* Bugfix: in Jsoup.connect(url), the ConstrainableInputStream would clear Thread interrupts when reading the body.
This precluded callers from spawning a thread, running a number of requests for a length of time, then joining that
thread after interrupting it.
<https://github.com/jhy/jsoup/issues/1991>

* Bugfix: when tracking HTML source positions, the closing tags for H1...H6 elements were not tracked correctly.
<https://github.com/jhy/jsoup/issues/1987>

* Bugfix: in Jsoup.connect(), a DELETE method request did not support a request body.
<https://github.com/jhy/jsoup/issues/1972>

* Change: removed previously deprecated methods Document#normalise, Element#forEach(org.jsoup.helper.Consumer<>),
Node#forEach(org.jsoup.helper.Consumer<>), and the org.jsoup.helper.Consumer interface; the latter being a
previously required compatibility shim prior to Android's de-sugaring support.
Expand Down
8 changes: 4 additions & 4 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>3.2.1</version>
<version>3.3.0</version>
<configuration>
<excludes>
<exclude>org/jsoup/examples/**</exclude>
Expand Down Expand Up @@ -147,7 +147,7 @@
<plugin>
<groupId>org.apache.felix</groupId>
<artifactId>maven-bundle-plugin</artifactId>
<version>5.1.8</version>
<version>5.1.9</version>
<executions>
<execution>
<id>bundle-manifest</id>
Expand Down Expand Up @@ -185,7 +185,7 @@
</plugin>
<plugin>
<artifactId>maven-failsafe-plugin</artifactId>
<version>3.1.0</version>
<version>3.1.2</version>
<executions>
<execution>
<goals>
Expand Down Expand Up @@ -307,7 +307,7 @@
<plugins>
<plugin>
<artifactId>maven-failsafe-plugin</artifactId>
<version>3.1.0</version>
<version>3.1.2</version>
<executions>
<execution>
<goals>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/Connection.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public interface Connection {
* GET and POST http methods.
*/
enum Method {
GET(false), POST(true), PUT(true), DELETE(false), PATCH(true), HEAD(false), OPTIONS(false), TRACE(false);
GET(false), POST(true), PUT(true), DELETE(true), PATCH(true), HEAD(false), OPTIONS(false), TRACE(false);

private final boolean hasBody;

Expand Down
26 changes: 11 additions & 15 deletions src/main/java/org/jsoup/helper/UrlBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,24 +38,20 @@ URL build() {
u.getUserInfo(),
IDN.toASCII(decodePart(u.getHost())), // puny-code
u.getPort(),
decodePart(u.getPath()),
null, null // query and fragment appended later so as not to encode
null, null, null // path, query and fragment appended later so as not to encode
);

String normUrl = uri.toASCIIString();
if (q != null || u.getRef() != null) {
StringBuilder sb = StringUtil.borrowBuilder().append(normUrl);
if (q != null) {
sb.append('?');
appendToAscii(StringUtil.releaseBuilder(q), true, sb);
}
if (u.getRef() != null) {
sb.append('#');
appendToAscii(u.getRef(), false, sb);
}
normUrl = StringUtil.releaseBuilder(sb);
StringBuilder normUrl = StringUtil.borrowBuilder().append(uri.toASCIIString());
appendToAscii(u.getPath(), false, normUrl);
if (q != null) {
normUrl.append('?');
appendToAscii(StringUtil.releaseBuilder(q), true, normUrl);
}
u = new URL(normUrl);
if (u.getRef() != null) {
normUrl.append('#');
appendToAscii(u.getRef(), false, normUrl);
}
u = new URL(StringUtil.releaseBuilder(normUrl));
return u;
} catch (MalformedURLException | URISyntaxException | UnsupportedEncodingException e) {
// we assert here so that any incomplete normalization issues can be caught in devel. but in practise,
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jsoup/helper/W3CDom.java
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ public Document fromJsoup(org.jsoup.nodes.Element in) {
}
out.setXmlStandalone(true);
// if in is Document, use the root element, not the wrapping document, as the context:
org.jsoup.nodes.Element context = (in instanceof org.jsoup.nodes.Document) ? in.child(0) : in;
org.jsoup.nodes.Element context = (in instanceof org.jsoup.nodes.Document) ? in.firstElementChild() : in;
out.setUserData(ContextProperty, context, null);
convert(inDoc != null ? inDoc : in, out);
return out;
Expand Down Expand Up @@ -251,7 +251,7 @@ public void convert(org.jsoup.nodes.Element in, Document out) {
}
builder.syntax = inDoc.outputSettings().syntax();
}
org.jsoup.nodes.Element rootEl = in instanceof org.jsoup.nodes.Document ? in.child(0) : in; // skip the #root node if a Document
org.jsoup.nodes.Element rootEl = in instanceof org.jsoup.nodes.Document ? in.firstElementChild() : in; // skip the #root node if a Document
NodeTraversor.traverse(builder, rootEl);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ public static ConstrainableInputStream wrap(InputStream in, int bufferSize, int
public int read(byte[] b, int off, int len) throws IOException {
if (interrupted || capped && remaining <= 0)
return -1;
if (Thread.interrupted()) {
// interrupted latches, because parse() may call twice (and we still want the thread interupt to clear)
if (Thread.currentThread().isInterrupted()) {
// interrupted latches, because parse() may call twice
interrupted = true;
return -1;
}
Expand Down
16 changes: 11 additions & 5 deletions src/main/java/org/jsoup/nodes/Document.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,11 @@ else if (!(node instanceof LeafNode)) // scans forward across comments, text, pr
@return the root HTML element.
*/
private Element htmlEl() {
for (Element el: childElementsList()) {
Element el = firstElementChild();
while (el != null) {
if (el.normalName().equals("html"))
return el;
el = el.nextElementSibling();
}
return appendElement("html");
}
Expand All @@ -118,10 +120,12 @@ private Element htmlEl() {
@return {@code head} element.
*/
public Element head() {
Element html = htmlEl();
for (Element el: html.childElementsList()) {
final Element html = htmlEl();
Element el = html.firstElementChild();
while (el != null) {
if (el.normalName().equals("head"))
return el;
el = el.nextElementSibling();
}
return html.prependElement("head");
}
Expand All @@ -136,10 +140,12 @@ public Element head() {
had no contents, or the outermost {@code <frameset> element} for frameset documents.
*/
public Element body() {
Element html = htmlEl();
for (Element el: html.childElementsList()) {
final Element html = htmlEl();
Element el = html.firstElementChild();
while (el != null) {
if ("body".equals(el.normalName()) || "frameset".equals(el.normalName()))
return el;
el = el.nextElementSibling();
}
return html.appendElement("body");
}
Expand Down
6 changes: 4 additions & 2 deletions src/main/java/org/jsoup/nodes/Node.java
Original file line number Diff line number Diff line change
Expand Up @@ -456,8 +456,10 @@ public Node wrap(String html) {
}

private Element getDeepChild(Element el) {
while (el.childrenSize() > 0) {
el = el.childElementsList().get(0);
Element child = el.firstElementChild();
while (child != null) {
el = child;
child = child.firstElementChild();
}
return el;
}
Expand Down
16 changes: 4 additions & 12 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -424,20 +424,12 @@ Element popStackToClose(String elName) {
// elnames is sorted, comes from Constants
void popStackToClose(String... elNames) {
for (int pos = stack.size() -1; pos >= 0; pos--) {
Element next = stack.get(pos);
Element el = stack.get(pos);
stack.remove(pos);
if (inSorted(next.normalName(), elNames))
break;
}
}

void popStackToBefore(String elName) {
for (int pos = stack.size() -1; pos >= 0; pos--) {
Element next = stack.get(pos);
if (next.normalName().equals(elName)) {
if (inSorted(el.normalName(), elNames)) {
if (currentToken instanceof Token.EndTag)
onNodeClosed(el, currentToken);
break;
} else {
stack.remove(pos);
}
}
}
Expand Down
26 changes: 16 additions & 10 deletions src/main/java/org/jsoup/select/Evaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ public static final class IsLastChild extends Evaluator {
@Override
public boolean matches(Element root, Element element) {
final Element p = element.parent();
return p != null && !(p instanceof Document) && element.elementSiblingIndex() == p.childrenSize()-1;
return p != null && !(p instanceof Document) && element == p.lastElementChild();
}

@Override
Expand Down Expand Up @@ -617,9 +617,11 @@ protected int calculatePosition(Element root, Element element) {
return 0;

int pos = 0;
int size = parent.childrenSize();
for (int i = element.elementSiblingIndex(); i < size; i++) {
if (parent.child(i).normalName().equals(element.normalName())) pos++;
Element next = element;
while (next != null) {
if (next.normalName().equals(element.normalName()))
pos++;
next = next.nextElementSibling();
}
return pos;
}
Expand All @@ -637,7 +639,7 @@ public static final class IsFirstChild extends Evaluator {
@Override
public boolean matches(Element root, Element element) {
final Element p = element.parent();
return p != null && !(p instanceof Document) && element.elementSiblingIndex() == 0;
return p != null && !(p instanceof Document) && element == p.firstElementChild();
}

@Override
Expand All @@ -654,7 +656,7 @@ public String toString() {
public static final class IsRoot extends Evaluator {
@Override
public boolean matches(Element root, Element element) {
final Element r = root instanceof Document?root.child(0):root;
final Element r = root instanceof Document ? root.firstElementChild() : root;
return element == r;
}

Expand Down Expand Up @@ -687,12 +689,13 @@ public boolean matches(Element root, Element element) {
if (p==null || p instanceof Document) return false;

int pos = 0;
int size = p.childrenSize();
for (int i = 0; i < size; i++) {
if (p.child(i).normalName().equals(element.normalName()))
Element next = p.firstElementChild();
while (next != null) {
if (next.normalName().equals(element.normalName()))
pos++;
if (pos > 1)
break;
next = next.nextElementSibling();
}
return pos == 1;
}
Expand All @@ -707,7 +710,10 @@ public static final class IsEmpty extends Evaluator {
public boolean matches(Element root, Element element) {
List<Node> family = element.childNodes();
for (Node n : family) {
if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false;
if (n instanceof TextNode)
return ((TextNode)n).isBlank();
if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType))
return false;
}
return true;
}
Expand Down
Loading

0 comments on commit f99b874

Please sign in to comment.