Skip to content

Commit

Permalink
Performance: Avoid spending time decompressing skipped frames.
Browse files Browse the repository at this point in the history
This commit adds an internal `DecodeOptions::skip_frame_decoding` flag
and sets it to true from `ReadDecoder::finish_decoding_image_data`.
This results in the following performance gains when using the
`next_frame_info` public API to skip frames: `change: [-94.186% -94.170%
-94.157%] (p = 0.00 < 0.05)`

This commit is mainly motivated by https://crbug.com/371060427
and image-rs#510.  Hat tip
to @kornelski for suggesting this approach in
image-rs#510 (comment)
  • Loading branch information
anforowicz committed Nov 15, 2024
1 parent 2a619fe commit 4c2e2d1
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 3 deletions.
50 changes: 50 additions & 0 deletions src/decoder/read_decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ use crate::common::Info;
pub(crate) struct ReadDecoder<R: Read> {
reader: BufReader<R>,
decoder: StreamingDecoder,
state: State,
}

impl<R: Read> ReadDecoder<R> {
pub fn new(r: R) -> Self {
Self {
reader: BufReader::with_capacity(CHUNK_BUFFER_SIZE, r),
decoder: StreamingDecoder::new(),
state: State::Initial,
}
}

Expand All @@ -37,6 +39,7 @@ impl<R: Read> ReadDecoder<R> {
Self {
reader: BufReader::with_capacity(CHUNK_BUFFER_SIZE, r),
decoder,
state: State::Initial,
}
}

Expand Down Expand Up @@ -106,6 +109,10 @@ impl<R: Read> ReadDecoder<R> {
///
/// Prerequisite: **Not** within `IDAT` / `fdAT` chunk sequence.
pub fn read_until_image_data(&mut self) -> Result<(), DecodingError> {
// Caller should guarantee that we are in a state that meets function prerequisites.
debug_assert!(self.state != State::StartOfImageData);
debug_assert!(self.state != State::MiddleOfImageData);

loop {
match self.decode_next_without_image_data()? {
Decoded::ChunkBegin(_, chunk::IDAT) | Decoded::ChunkBegin(_, chunk::fdAT) => break,
Expand All @@ -119,6 +126,9 @@ impl<R: Read> ReadDecoder<R> {
_ => {}
}
}

self.state = State::StartOfImageData;
self.decoder.set_skip_frame_decoding(false);
Ok(())
}

Expand All @@ -130,6 +140,13 @@ impl<R: Read> ReadDecoder<R> {
&mut self,
image_data: &mut Vec<u8>,
) -> Result<ImageDataCompletionStatus, DecodingError> {
// Caller should guarantee that we are in a state that meets function prerequisites.
debug_assert!(matches!(
self.state,
State::StartOfImageData | State::MiddleOfImageData
));

self.state = State::MiddleOfImageData;
match self.decode_next(image_data)? {
Decoded::ImageData => Ok(ImageDataCompletionStatus::ExpectingMoreData),
Decoded::ImageDataFlushed => Ok(ImageDataCompletionStatus::Done),
Expand All @@ -148,9 +165,22 @@ impl<R: Read> ReadDecoder<R> {
///
/// Prerequisite: Input is currently positioned within `IDAT` / `fdAT` chunk sequence.
pub fn finish_decoding_image_data(&mut self) -> Result<(), DecodingError> {
match self.state {
// If skipping image data from start to end, then bypass decompression.
State::StartOfImageData => self.decoder.set_skip_frame_decoding(true),

// Otherwise, keep passing the remainder of the data to the decompressor
// (e.g. to detect adler32 errors if this is what `DecodeOptions` ask for).
State::MiddleOfImageData => (),

// Caller should guarantee that we are in a state that meets function prerequisites.
_ => unreachable!(),
}

loop {
let mut to_be_discarded = vec![];
if let ImageDataCompletionStatus::Done = self.decode_image_data(&mut to_be_discarded)? {
self.state = State::OutsideOfImageData;
return Ok(());
}
}
Expand All @@ -160,10 +190,12 @@ impl<R: Read> ReadDecoder<R> {
///
/// Prerequisite: `IEND` chunk hasn't been reached yet.
pub fn read_until_end_of_input(&mut self) -> Result<(), DecodingError> {
debug_assert!(self.state != State::EndOfInput);
while !matches!(
self.decode_next_and_discard_image_data()?,
Decoded::ImageEnd
) {}
self.state = State::EndOfInput;
Ok(())
}

Expand All @@ -177,3 +209,21 @@ pub(crate) enum ImageDataCompletionStatus {
ExpectingMoreData,
Done,
}

#[derive(Debug, Eq, PartialEq)]
enum State {
/// Before the first `IDAT`.
Initial,

/// At the start of a new `IDAT` or `fdAT` sequence.
StartOfImageData,

/// In the middle of an `IDAT` or `fdAT` sequence.
MiddleOfImageData,

/// Outside of an `IDAT` or `fdAT` sequence.
OutsideOfImageData,

/// After consuming `IEND`.
EndOfInput,
}
87 changes: 84 additions & 3 deletions src/decoder/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ pub struct DecodeOptions {
ignore_text_chunk: bool,
ignore_iccp_chunk: bool,
skip_ancillary_crc_failures: bool,
skip_frame_decoding: bool,
}

impl Default for DecodeOptions {
Expand All @@ -438,6 +439,7 @@ impl Default for DecodeOptions {
ignore_text_chunk: false,
ignore_iccp_chunk: false,
skip_ancillary_crc_failures: true,
skip_frame_decoding: false,
}
}
}
Expand Down Expand Up @@ -615,6 +617,10 @@ impl StreamingDecoder {
.set_skip_ancillary_crc_failures(skip_ancillary_crc_failures)
}

pub(crate) fn set_skip_frame_decoding(&mut self, skip_frame_decoding: bool) {
self.decode_options.skip_frame_decoding = skip_frame_decoding;
}

/// Low level StreamingDecoder interface.
///
/// Allows to stream partial data to the encoder. Returns a tuple containing the bytes that have
Expand Down Expand Up @@ -752,7 +758,16 @@ impl StreamingDecoder {
debug_assert!(type_str == IDAT || type_str == chunk::fdAT);
let len = std::cmp::min(buf.len(), self.current_chunk.remaining as usize);
let buf = &buf[..len];
let consumed = self.inflater.decompress(buf, image_data)?;
let consumed = if self.decode_options.skip_frame_decoding {
// `inflater.reset()` is not strictly necessary. We do it anyway to ensure
// that if (unexpectedly) `skip_frame_decoding` changes before the end of this
// frame, then it will (most likely) lead to decompression errors later (when
// restarting again from the middle).
self.inflater.reset();
len
} else {
self.inflater.decompress(buf, image_data)?
};
self.current_chunk.crc.update(&buf[..consumed]);
self.current_chunk.remaining -= consumed as u32;
if self.current_chunk.remaining == 0 {
Expand Down Expand Up @@ -811,7 +826,9 @@ impl StreamingDecoder {
&& (self.current_chunk.type_ == IDAT || self.current_chunk.type_ == chunk::fdAT)
{
self.current_chunk.type_ = type_str;
self.inflater.finish_compressed_chunks(image_data)?;
if !self.decode_options.skip_frame_decoding {
self.inflater.finish_compressed_chunks(image_data)?;
}
self.inflater.reset();
self.ready_for_idat_chunks = false;
self.ready_for_fdat_chunks = false;
Expand Down Expand Up @@ -1704,7 +1721,7 @@ mod tests {
use super::ScaledFloat;
use super::SourceChromaticities;
use crate::test_utils::*;
use crate::{Decoder, DecodingError, Reader};
use crate::{DecodeOptions, Decoder, DecodingError, Reader};
use approx::assert_relative_eq;
use byteorder::WriteBytesExt;
use std::cell::RefCell;
Expand Down Expand Up @@ -2748,4 +2765,68 @@ mod tests {
&err,
);
}

struct ByteByByteReader<R: Read>(R);

impl<R: Read> Read for ByteByByteReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let len = buf.len().min(1);
self.0.read(&mut buf[..len])
}
}

/// When decoding byte-by-byte we will decompress all image bytes *before*
/// consuming the final 4 bytes of the adler32 checksum. And we consume
/// image bytes using `ReadDecoder.decode_image_data` but the remainder of
/// the compressed data is consumed using
/// `ReadDecoder.finish_decoding_image_data`. In
/// https://github.com/image-rs/image-png/pull/533 we consider tweaking
/// `finish_decoding_image_data` to discard the final, non-image bytes,
/// rather then decompressing them - the initial naive implementation of
/// such short-circuiting would have effectively disabled adler32 checksum
/// processing when decoding byte-by-byte. This is what this test checks.
#[test]
fn test_broken_adler_checksum_when_decoding_byte_by_byte() {
let png = {
let width = 16;
let mut frame_data = generate_rgba8_with_width_and_height(width, width);

// Inject invalid adler32 checksum
let adler32: &mut [u8; 4] = frame_data.last_chunk_mut::<4>().unwrap();
*adler32 = [12, 34, 56, 78];

let mut png = VecDeque::new();
write_png_sig(&mut png);
write_rgba8_ihdr_with_width(&mut png, width);
write_chunk(&mut png, b"IDAT", &frame_data);
write_iend(&mut png);
png
};

// Default `DecodeOptions` ignore adler32 - we expect no errors here.
let mut reader = Decoder::new(ByteByByteReader(png.clone()))
.read_info()
.unwrap();
let mut buf = vec![0; reader.output_buffer_size()];
reader.next_frame(&mut buf).unwrap();

// But we expect to get an error after explicitly opting into adler32
// checks.
let mut reader = {
let mut options = DecodeOptions::default();
options.set_ignore_adler32(false);
Decoder::new_with_options(ByteByByteReader(png.clone()), options)
.read_info()
.unwrap()
};
let err = reader.next_frame(&mut buf).unwrap_err();
assert!(
matches!(&err, DecodingError::Format(_)),
"Unexpected kind of error: {:?}",
&err,
);
let msg = format!("{:?}", err);
assert!(msg.contains("CorruptFlateStream"));
assert!(msg.contains("WrongChecksum"));
}
}

0 comments on commit 4c2e2d1

Please sign in to comment.