Skip to content

Commit

Permalink
Clean up parseInputStream
Browse files Browse the repository at this point in the history
  • Loading branch information
Isira-Seneviratne committed Dec 29, 2023
1 parent f81acc6 commit 3262f81
Showing 1 changed file with 8 additions and 18 deletions.
26 changes: 8 additions & 18 deletions src/main/java/org/jsoup/helper/DataUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import org.jsoup.internal.ControllableInputStream;
import org.jsoup.internal.Normalizer;
import org.jsoup.internal.SharedConstants;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.Document;
Expand Down Expand Up @@ -172,16 +171,15 @@ static void crossStreams(final InputStream in, final OutputStream out) throws IO
static Document parseInputStream(@Nullable InputStream input, @Nullable String charsetName, String baseUri, Parser parser) throws IOException {
if (input == null) // empty body
return new Document(baseUri);
input = ControllableInputStream.wrap(input, DefaultBufferSize, 0);

@Nullable Document doc = null;

// read the start of the stream and look for a BOM or meta charset
try {
input.mark(DefaultBufferSize);
ByteBuffer firstBytes = readToByteBuffer(input, firstReadBufferSize - 1); // -1 because we read one more to see if completed. First read is < buffer size, so can't be invalid.
boolean fullyRead = (input.read() == -1);
input.reset();
try (InputStream wrappedInputStream = ControllableInputStream.wrap(input, DefaultBufferSize, 0)) {
wrappedInputStream.mark(DefaultBufferSize);
ByteBuffer firstBytes = readToByteBuffer(wrappedInputStream, firstReadBufferSize - 1); // -1 because we read one more to see if completed. First read is < buffer size, so can't be invalid.
boolean fullyRead = (wrappedInputStream.read() == -1);
wrappedInputStream.reset();

// look for BOM - overrides any other header or input
BomCharset bomCharset = detectCharsetFromBom(firstBytes);
Expand Down Expand Up @@ -222,9 +220,8 @@ else if (first instanceof Comment) {
if (comment.isXmlDeclaration())
decl = comment.asXmlDeclaration();
}
if (decl != null) {
if (decl.name().equalsIgnoreCase("xml"))
foundCharset = decl.attr("encoding");
if (decl != null && decl.name().equalsIgnoreCase("xml")) {
foundCharset = decl.attr("encoding");
}
}
foundCharset = validateCharset(foundCharset);
Expand All @@ -241,8 +238,7 @@ else if (first instanceof Comment) {
if (doc == null) {
if (charsetName == null)
charsetName = defaultCharsetName;
BufferedReader reader = new BufferedReader(new InputStreamReader(input, Charset.forName(charsetName)), DefaultBufferSize); // Android level does not allow us try-with-resources
try {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(wrappedInputStream, Charset.forName(charsetName)), DefaultBufferSize)) {
if (bomCharset != null && bomCharset.offset) { // creating the buffered reader ignores the input pos, so must skip here
long skipped = reader.skip(1);
Validate.isTrue(skipped == 1); // WTF if this fails.
Expand All @@ -260,14 +256,8 @@ else if (first instanceof Comment) {
doc.charset(UTF_8);
}
}
finally {
reader.close();
}
}
}
finally {
input.close();
}
return doc;
}

Expand Down

0 comments on commit 3262f81

Please sign in to comment.