From d71ff962b08aca2f1c9c1724dfdab5bc1ec6ecd2 Mon Sep 17 00:00:00 2001 From: Sean McArthur Date: Mon, 18 Dec 2023 08:49:44 -0500 Subject: [PATCH] fix(http1): add internal limit for chunked extensions (#3495) The chunked transfer-encoding allows for extensions within the header of each chunk. hyper currently ignores the extension bytes. Sending large amounts of bytes in the extensions will waste CPU reaing and skipping them. This change adds an internal limit to how many bytes will be read and ignored in a single body, before returning an error. Reported-by: Bartek Nowotarski --- src/proto/h1/decode.rs | 91 +++++++++++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 14 deletions(-) diff --git a/src/proto/h1/decode.rs b/src/proto/h1/decode.rs index f855ecba06..265d3a49d0 100644 --- a/src/proto/h1/decode.rs +++ b/src/proto/h1/decode.rs @@ -11,6 +11,11 @@ use super::DecodedLength; use self::Kind::{Chunked, Eof, Length}; +/// Maximum amount of bytes allowed in chunked extensions. +/// +/// This limit is currentlty applied for the entire body, not per chunk. +const CHUNKED_EXTENSIONS_LIMIT: u64 = 1024 * 16; + /// Decoders to handle different Transfer-Encodings. /// /// If a message body does not include a Transfer-Encoding, it *should* @@ -25,7 +30,11 @@ enum Kind { /// A Reader used when a Content-Length header is passed with a positive integer. Length(u64), /// A Reader used when Transfer-Encoding is `chunked`. - Chunked(ChunkedState, u64), + Chunked { + state: ChunkedState, + chunk_len: u64, + extensions_cnt: u64, + }, /// A Reader used for responses that don't indicate a length or chunked. /// /// The bool tracks when EOF is seen on the transport. @@ -73,7 +82,11 @@ impl Decoder { pub(crate) fn chunked() -> Decoder { Decoder { - kind: Kind::Chunked(ChunkedState::new(), 0), + kind: Kind::Chunked { + state: ChunkedState::new(), + chunk_len: 0, + extensions_cnt: 0, + }, } } @@ -96,7 +109,12 @@ impl Decoder { pub(crate) fn is_eof(&self) -> bool { matches!( self.kind, - Length(0) | Chunked(ChunkedState::End, _) | Eof(true) + Length(0) + | Chunked { + state: ChunkedState::End, + .. + } + | Eof(true) ) } @@ -127,11 +145,15 @@ impl Decoder { Poll::Ready(Ok(buf)) } } - Chunked(ref mut state, ref mut size) => { + Chunked { + ref mut state, + ref mut chunk_len, + ref mut extensions_cnt, + } => { loop { let mut buf = None; // advances the chunked state - *state = ready!(state.step(cx, body, size, &mut buf))?; + *state = ready!(state.step(cx, body, chunk_len, extensions_cnt, &mut buf))?; if *state == ChunkedState::End { trace!("end of chunked"); return Poll::Ready(Ok(Bytes::new())); @@ -202,6 +224,7 @@ impl ChunkedState { cx: &mut Context<'_>, body: &mut R, size: &mut u64, + extensions_cnt: &mut u64, buf: &mut Option, ) -> Poll> { use self::ChunkedState::*; @@ -209,7 +232,7 @@ impl ChunkedState { Start => ChunkedState::read_start(cx, body, size), Size => ChunkedState::read_size(cx, body, size), SizeLws => ChunkedState::read_size_lws(cx, body), - Extension => ChunkedState::read_extension(cx, body), + Extension => ChunkedState::read_extension(cx, body, extensions_cnt), SizeLf => ChunkedState::read_size_lf(cx, body, *size), Body => ChunkedState::read_body(cx, body, size, buf), BodyCr => ChunkedState::read_body_cr(cx, body), @@ -306,6 +329,7 @@ impl ChunkedState { fn read_extension( cx: &mut Context<'_>, rdr: &mut R, + extensions_cnt: &mut u64, ) -> Poll> { trace!("read_extension"); // We don't care about extensions really at all. Just ignore them. @@ -320,7 +344,17 @@ impl ChunkedState { io::ErrorKind::InvalidData, "invalid chunk extension contains newline", ))), - _ => Poll::Ready(Ok(ChunkedState::Extension)), // no supported extensions + _ => { + *extensions_cnt += 1; + if *extensions_cnt >= CHUNKED_EXTENSIONS_LIMIT { + Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "chunk extensions over limit", + ))) + } else { + Poll::Ready(Ok(ChunkedState::Extension)) + } + } // no supported extensions } } fn read_size_lf( @@ -491,7 +525,6 @@ mod tests { } } - #[cfg(feature = "nightly")] impl MemRead for Bytes { fn read_mem(&mut self, _: &mut Context<'_>, len: usize) -> Poll> { let n = std::cmp::min(len, self.len()); @@ -519,10 +552,12 @@ mod tests { let mut state = ChunkedState::new(); let rdr = &mut s.as_bytes(); let mut size = 0; + let mut ext_cnt = 0; loop { - let result = - futures_util::future::poll_fn(|cx| state.step(cx, rdr, &mut size, &mut None)) - .await; + let result = futures_util::future::poll_fn(|cx| { + state.step(cx, rdr, &mut size, &mut ext_cnt, &mut None) + }) + .await; let desc = format!("read_size failed for {:?}", s); state = result.expect(desc.as_str()); if state == ChunkedState::Body || state == ChunkedState::EndCr { @@ -536,10 +571,12 @@ mod tests { let mut state = ChunkedState::new(); let rdr = &mut s.as_bytes(); let mut size = 0; + let mut ext_cnt = 0; loop { - let result = - futures_util::future::poll_fn(|cx| state.step(cx, rdr, &mut size, &mut None)) - .await; + let result = futures_util::future::poll_fn(|cx| { + state.step(cx, rdr, &mut size, &mut ext_cnt, &mut None) + }) + .await; state = match result { Ok(s) => s, Err(e) => { @@ -632,6 +669,32 @@ mod tests { assert_eq!("1234567890abcdef", &result); } + #[tokio::test] + async fn test_read_chunked_extensions_over_limit() { + // construct a chunked body where each individual chunked extension + // is totally fine, but combined is over the limit. + let per_chunk = super::CHUNKED_EXTENSIONS_LIMIT * 2 / 3; + let mut scratch = vec![]; + for _ in 0..2 { + scratch.extend(b"1;"); + scratch.extend(b"x".repeat(per_chunk as usize)); + scratch.extend(b"\r\nA\r\n"); + } + scratch.extend(b"0\r\n\r\n"); + let mut mock_buf = Bytes::from(scratch); + + let mut decoder = Decoder::chunked(); + let buf1 = decoder.decode_fut(&mut mock_buf).await.expect("decode1"); + assert_eq!(&buf1[..], b"A"); + + let err = decoder + .decode_fut(&mut mock_buf) + .await + .expect_err("decode2"); + assert_eq!(err.kind(), io::ErrorKind::InvalidData); + assert_eq!(err.to_string(), "chunk extensions over limit"); + } + #[cfg(not(miri))] #[tokio::test] async fn test_read_chunked_trailer_with_missing_lf() {