Skip to content

Commit

Permalink
Don't recode URL paths
Browse files Browse the repository at this point in the history
URL paths containing a %2B were incorrectly recoded to a '+', or a '+' was recoded to a ' '. Fixed by reverting to the previous behavior of not encoding supplied paths, other than normalizing to ASCII.

Fixes #1952
  • Loading branch information
jhy committed Sep 8, 2023
1 parent e1880ad commit 1e69577
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 17 deletions.
5 changes: 5 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ Release 1.16.2 [PENDING]
* Bugfix: `form` elements and empty elements (such as `img`) did not have their attributes de-duplicated.
<https://github.com/jhy/jsoup/pull/1950>

* Bugfix: in Jsoup.connect(url), URL paths containing a %2B were incorrectly recoded to a '+', or a '+' was recoded
to a ' '. Fixed by reverting to the previous behavior of not encoding supplied paths, other than normalizing to
ASCII.
<https://github.com/jhy/jsoup/issues/1952>

* Change: removed previously deprecated methods Document#normalise, Element#forEach(org.jsoup.helper.Consumer<>),
Node#forEach(org.jsoup.helper.Consumer<>), and the org.jsoup.helper.Consumer interface; the latter being a
previously required compatibility shim prior to Android's de-sugaring support.
Expand Down
26 changes: 11 additions & 15 deletions src/main/java/org/jsoup/helper/UrlBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,24 +38,20 @@ URL build() {
u.getUserInfo(),
IDN.toASCII(decodePart(u.getHost())), // puny-code
u.getPort(),
decodePart(u.getPath()),
null, null // query and fragment appended later so as not to encode
null, null, null // path, query and fragment appended later so as not to encode
);

String normUrl = uri.toASCIIString();
if (q != null || u.getRef() != null) {
StringBuilder sb = StringUtil.borrowBuilder().append(normUrl);
if (q != null) {
sb.append('?');
appendToAscii(StringUtil.releaseBuilder(q), true, sb);
}
if (u.getRef() != null) {
sb.append('#');
appendToAscii(u.getRef(), false, sb);
}
normUrl = StringUtil.releaseBuilder(sb);
StringBuilder normUrl = StringUtil.borrowBuilder().append(uri.toASCIIString());
appendToAscii(u.getPath(), false, normUrl);
if (q != null) {
normUrl.append('?');
appendToAscii(StringUtil.releaseBuilder(q), true, normUrl);
}
u = new URL(normUrl);
if (u.getRef() != null) {
normUrl.append('#');
appendToAscii(u.getRef(), false, normUrl);
}
u = new URL(StringUtil.releaseBuilder(normUrl));
return u;
} catch (MalformedURLException | URISyntaxException | UnsupportedEncodingException e) {
// we assert here so that any incomplete normalization issues can be caught in devel. but in practise,
Expand Down
18 changes: 16 additions & 2 deletions src/test/java/org/jsoup/helper/HttpConnectionTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,13 @@ public void caseInsensitiveHeaders(Locale locale) {
}

@Test public void encodeUrl() throws MalformedURLException {
URL url1 = new URL("https://test.com/foo bar/[One]?q=white space#frag");
URL url1 = new URL("https://test.com/foo%20bar/%5BOne%5D?q=white+space#frag");
URL url2 = new UrlBuilder(url1).build();
assertEquals("https://test.com/foo%20bar/%5BOne%5D?q=white+space#frag", url2.toExternalForm());
}

@Test void encodedUrlDoesntDoubleEncode() throws MalformedURLException {
URL url1 = new URL("https://test.com/foo bar/[One]?q=white space#frag ment");
URL url1 = new URL("https://test.com/foo%20bar/%5BOne%5D?q=white+space#frag%20ment");
URL url2 = new UrlBuilder(url1).build();
URL url3 = new UrlBuilder(url2).build();
assertEquals("https://test.com/foo%20bar/%5BOne%5D?q=white+space#frag%20ment", url2.toExternalForm());
Expand All @@ -274,6 +274,20 @@ public void caseInsensitiveHeaders(Locale locale) {
assertEquals("https://example.com/a%20b%20c?query+string", url.toExternalForm());
}

@Test void encodedUrlPathIsPreserved() {
// https://github.com/jhy/jsoup/issues/1952
Connection connect = Jsoup.connect("https://example.com/%2B32");
URL url = connect.request().url();
assertEquals("https://example.com/%2B32", url.toExternalForm());
}

@Test void urlPathPlusIsPreserved() {
// https://github.com/jhy/jsoup/issues/1952
Connection connect = Jsoup.connect("https://example.com/123+456");
URL url = connect.request().url();
assertEquals("https://example.com/123+456", url.toExternalForm());
}

@Test public void noUrlThrowsValidationError() throws IOException {
HttpConnection con = new HttpConnection();
boolean threw = false;
Expand Down

0 comments on commit 1e69577

Please sign in to comment.