diff --git a/tokio/src/io/util/read_line.rs b/tokio/src/io/util/read_line.rs index d625a76b80a..6b17e06dbd0 100644 --- a/tokio/src/io/util/read_line.rs +++ b/tokio/src/io/util/read_line.rs @@ -5,6 +5,7 @@ use std::future::Future; use std::io; use std::mem; use std::pin::Pin; +use std::string::FromUtf8Error; use std::task::{Context, Poll}; cfg_io_util! { @@ -16,7 +17,7 @@ cfg_io_util! { /// This is the buffer we were provided. It will be replaced with an empty string /// while reading to postpone utf-8 handling until after reading. output: &'a mut String, - /// The actual allocation of the string is moved into a vector instead. + /// The actual allocation of the string is moved into this vector instead. buf: Vec, /// The number of bytes appended to buf. This can be less than buf.len() if /// the buffer was not empty when the operation was started. @@ -42,31 +43,33 @@ fn put_back_original_data(output: &mut String, mut vector: Vec, num_bytes_re *output = String::from_utf8(vector).expect("The original data must be valid utf-8."); } -pub(super) fn read_line_internal( - reader: Pin<&mut R>, - cx: &mut Context<'_>, +/// This handles the various failure cases and puts the string back into `output`. +/// +/// The `truncate_on_io_error` bool is necessary because `read_to_string` and `read_line` +/// disagree on what should happen when an IO error occurs. +pub(super) fn finish_string_read( + io_res: io::Result, + utf8_res: Result, + read: usize, output: &mut String, - buf: &mut Vec, - read: &mut usize, + truncate_on_io_error: bool, ) -> Poll> { - let io_res = ready!(read_until_internal(reader, cx, b'\n', buf, read)); - let utf8_res = String::from_utf8(mem::replace(buf, Vec::new())); - - // At this point both buf and output are empty. The allocation is in utf8_res. - - debug_assert!(buf.is_empty()); match (io_res, utf8_res) { (Ok(num_bytes), Ok(string)) => { - debug_assert_eq!(*read, 0); + debug_assert_eq!(read, 0); *output = string; Poll::Ready(Ok(num_bytes)) } (Err(io_err), Ok(string)) => { *output = string; + if truncate_on_io_error { + let original_len = output.len() - read; + output.truncate(original_len); + } Poll::Ready(Err(io_err)) } (Ok(num_bytes), Err(utf8_err)) => { - debug_assert_eq!(*read, 0); + debug_assert_eq!(read, 0); put_back_original_data(output, utf8_err.into_bytes(), num_bytes); Poll::Ready(Err(io::Error::new( @@ -75,13 +78,29 @@ pub(super) fn read_line_internal( ))) } (Err(io_err), Err(utf8_err)) => { - put_back_original_data(output, utf8_err.into_bytes(), *read); + put_back_original_data(output, utf8_err.into_bytes(), read); Poll::Ready(Err(io_err)) } } } +pub(super) fn read_line_internal( + reader: Pin<&mut R>, + cx: &mut Context<'_>, + output: &mut String, + buf: &mut Vec, + read: &mut usize, +) -> Poll> { + let io_res = ready!(read_until_internal(reader, cx, b'\n', buf, read)); + let utf8_res = String::from_utf8(mem::replace(buf, Vec::new())); + + // At this point both buf and output are empty. The allocation is in utf8_res. + + debug_assert!(buf.is_empty()); + finish_string_read(io_res, utf8_res, *read, output, false) +} + impl Future for ReadLine<'_, R> { type Output = io::Result; diff --git a/tokio/src/io/util/read_to_end.rs b/tokio/src/io/util/read_to_end.rs index a2cd99bed09..67aa8618a1c 100644 --- a/tokio/src/io/util/read_to_end.rs +++ b/tokio/src/io/util/read_to_end.rs @@ -2,7 +2,7 @@ use crate::io::AsyncRead; use std::future::Future; use std::io; -use std::mem::MaybeUninit; +use std::mem::{self, MaybeUninit}; use std::pin::Pin; use std::task::{Context, Poll}; @@ -12,81 +12,152 @@ use std::task::{Context, Poll}; pub struct ReadToEnd<'a, R: ?Sized> { reader: &'a mut R, buf: &'a mut Vec, - start_len: usize, + /// The number of bytes appended to buf. This can be less than buf.len() if + /// the buffer was not empty when the operation was started. + read: usize, } -pub(crate) fn read_to_end<'a, R>(reader: &'a mut R, buf: &'a mut Vec) -> ReadToEnd<'a, R> +pub(crate) fn read_to_end<'a, R>(reader: &'a mut R, buffer: &'a mut Vec) -> ReadToEnd<'a, R> where R: AsyncRead + Unpin + ?Sized, { - let start_len = buf.len(); + prepare_buffer(buffer, reader); ReadToEnd { reader, - buf, - start_len, + buf: buffer, + read: 0, } } -struct Guard<'a> { - buf: &'a mut Vec, - len: usize, -} - -impl Drop for Guard<'_> { - fn drop(&mut self) { - unsafe { - self.buf.set_len(self.len); +/// SAFETY: Before first calling this method, the unused capacity must have been +/// prepared for use with the provided AsyncRead. This can be done using the +/// `prepare_buffer` function later in this file. +pub(super) unsafe fn read_to_end_internal( + buf: &mut Vec, + mut reader: Pin<&mut R>, + num_read: &mut usize, + cx: &mut Context<'_>, +) -> Poll> { + loop { + // safety: The caller promised to prepare the buffer. + let ret = ready!(poll_read_to_end(buf, reader.as_mut(), cx)); + match ret { + Err(err) => return Poll::Ready(Err(err)), + Ok(0) => return Poll::Ready(Ok(mem::replace(num_read, 0))), + Ok(num) => { + *num_read += num; + } } } } -// This uses an adaptive system to extend the vector when it fills. We want to -// avoid paying to allocate and zero a huge chunk of memory if the reader only -// has 4 bytes while still making large reads if the reader does have a ton -// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every -// time is 4,500 times (!) slower than this if the reader has a very small -// amount of data to return. -// -// Because we're extending the buffer with uninitialized data for trusted -// readers, we need to make sure to truncate that if any of this panics. -pub(super) fn read_to_end_internal( - mut rd: Pin<&mut R>, - cx: &mut Context<'_>, +/// Tries to read from the provided AsyncRead. +/// +/// The length of the buffer is increased by the number of bytes read. +/// +/// SAFETY: Before first calling this method, the unused capacity must have been +/// prepared for use with the provided AsyncRead. This can be done using the +/// `prepare_buffer` function later in this file. +unsafe fn poll_read_to_end( buf: &mut Vec, - start_len: usize, + read: Pin<&mut R>, + cx: &mut Context<'_>, ) -> Poll> { - let mut g = Guard { - len: buf.len(), - buf, + // This uses an adaptive system to extend the vector when it fills. We want to + // avoid paying to allocate and zero a huge chunk of memory if the reader only + // has 4 bytes while still making large reads if the reader does have a ton + // of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every + // time is 4,500 times (!) slower than this if the reader has a very small + // amount of data to return. + reserve(buf, &*read, 32); + + let unused_capacity: &mut [MaybeUninit] = get_unused_capacity(buf); + + // safety: The buffer has been prepared for use with the AsyncRead before + // calling this function. + let slice: &mut [u8] = { + let ptr = unused_capacity.as_mut_ptr().cast::(); + let len = unused_capacity.len(); + std::slice::from_raw_parts_mut(ptr, len) }; - let ret; - loop { - if g.len == g.buf.len() { - unsafe { - g.buf.reserve(32); - let capacity = g.buf.capacity(); - g.buf.set_len(capacity); - let b = &mut *(&mut g.buf[g.len..] as *mut [u8] as *mut [MaybeUninit]); + let res = ready!(read.poll_read(cx, slice)); + if let Ok(num) = res { + // safety: There are two situations: + // + // 1. The AsyncRead has not overriden `prepare_uninitialized_buffer`. + // + // In this situation, the default implementation of that method will have + // zeroed the unused capacity. This means that setting the length will + // never expose uninitialized memory in the vector. + // + // Note that the assert! below ensures that we don't set the length to + // something larger than the capacity, which malicious implementors might + // try to have us do. + // + // 2. The AsyncRead has overriden `prepare_uninitialized_buffer`. + // + // In this case, the safety of the `set_len` call below relies on this + // guarantee from the documentation on `prepare_uninitialized_buffer`: + // + // > This function isn't actually unsafe to call but unsafe to implement. + // > The implementer must ensure that either the whole buf has been zeroed + // > or poll_read() overwrites the buffer without reading it and returns + // > correct value. + // + // Note that `prepare_uninitialized_buffer` is unsafe to implement, so this + // is a guarantee we can rely on in unsafe code. + // + // The assert!() is technically only necessary in the first case. + let new_len = buf.len() + num; + assert!(new_len <= buf.capacity()); - rd.prepare_uninitialized_buffer(b); - } - } + buf.set_len(new_len); + } + Poll::Ready(res) +} - match ready!(rd.as_mut().poll_read(cx, &mut g.buf[g.len..])) { - Ok(0) => { - ret = Poll::Ready(Ok(g.len - start_len)); - break; - } - Ok(n) => g.len += n, - Err(e) => { - ret = Poll::Ready(Err(e)); - break; - } - } +/// This function prepares the unused capacity for use with the provided AsyncRead. +pub(super) fn prepare_buffer(buf: &mut Vec, read: &R) { + let buffer = get_unused_capacity(buf); + + // safety: This function is only unsafe to implement. + unsafe { + read.prepare_uninitialized_buffer(buffer); } +} - ret +/// Allocates more memory and ensures that the unused capacity is prepared for use +/// with the `AsyncRead`. +fn reserve(buf: &mut Vec, read: &R, bytes: usize) { + if buf.capacity() - buf.len() >= bytes { + return; + } + buf.reserve(bytes); + // The call above has reallocated the buffer, so we must reinitialize the entire + // unused capacity, even if we already initialized some of it before the resize. + prepare_buffer(buf, read); +} + +/// Returns the unused capacity of the provided vector. +/// +/// This function does not need to be marked unsafe, as MaybeUninit does not +/// require the underlying memory to initialized. +fn get_unused_capacity(buf: &mut Vec) -> &mut [MaybeUninit] { + let prepare_from = buf.len(); + let prepare_len = buf.capacity() - prepare_from; + // safety: prepare_from is the length of the vector, so it will stay inside the + // allocation. + let ptr = unsafe { buf.as_mut_ptr().add(prepare_from).cast::>() }; + + // safety: The memory is properly allocated due to the invariants provided by + // Vec, and since the item type is MaybeUninit, it is safe for the + // memory to be uninitialized. + let slice: &mut [MaybeUninit] = unsafe { std::slice::from_raw_parts_mut(ptr, prepare_len) }; + debug_assert_eq!(slice.len() + buf.len(), buf.capacity()); + debug_assert_eq!(ptr as usize - buf[..].as_mut_ptr() as usize, buf.len(),); + + slice } impl Future for ReadToEnd<'_, A> @@ -96,8 +167,10 @@ where type Output = io::Result; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - let this = &mut *self; - read_to_end_internal(Pin::new(&mut this.reader), cx, this.buf, this.start_len) + let Self { reader, buf, read } = &mut *self; + + // safety: The constructor of ReadToEnd calls `prepare_buffer` + unsafe { read_to_end_internal(buf, Pin::new(*reader), read, cx) } } } diff --git a/tokio/src/io/util/read_to_string.rs b/tokio/src/io/util/read_to_string.rs index cab0505ab83..e71d9d3c38c 100644 --- a/tokio/src/io/util/read_to_string.rs +++ b/tokio/src/io/util/read_to_string.rs @@ -1,4 +1,5 @@ -use crate::io::util::read_to_end::read_to_end_internal; +use crate::io::util::read_line::finish_string_read; +use crate::io::util::read_to_end::{prepare_buffer, read_to_end_internal}; use crate::io::AsyncRead; use std::future::Future; @@ -12,47 +13,51 @@ cfg_io_util! { #[must_use = "futures do nothing unless you `.await` or poll them"] pub struct ReadToString<'a, R: ?Sized> { reader: &'a mut R, - buf: &'a mut String, - bytes: Vec, - start_len: usize, + /// This is the buffer we were provided. It will be replaced with an empty string + /// while reading to postpone utf-8 handling until after reading. + output: &'a mut String, + /// The actual allocation of the string is moved into this vector instead. + buf: Vec, + /// The number of bytes appended to buf. This can be less than buf.len() if + /// the buffer was not empty when the operation was started. + read: usize, } } -pub(crate) fn read_to_string<'a, R>(reader: &'a mut R, buf: &'a mut String) -> ReadToString<'a, R> +pub(crate) fn read_to_string<'a, R>( + reader: &'a mut R, + string: &'a mut String, +) -> ReadToString<'a, R> where R: AsyncRead + ?Sized + Unpin, { - let start_len = buf.len(); + let mut buf = mem::replace(string, String::new()).into_bytes(); + prepare_buffer(&mut buf, reader); ReadToString { reader, - bytes: mem::replace(buf, String::new()).into_bytes(), buf, - start_len, + output: string, + read: 0, } } -fn read_to_string_internal( +/// SAFETY: Before first calling this method, the unused capacity must have been +/// prepared for use with the provided AsyncRead. This can be done using the +/// `prepare_buffer` function in `read_to_end.rs`. +unsafe fn read_to_string_internal( reader: Pin<&mut R>, + output: &mut String, + buf: &mut Vec, + read: &mut usize, cx: &mut Context<'_>, - buf: &mut String, - bytes: &mut Vec, - start_len: usize, ) -> Poll> { - let ret = ready!(read_to_end_internal(reader, cx, bytes, start_len))?; - match String::from_utf8(mem::replace(bytes, Vec::new())) { - Ok(string) => { - debug_assert!(buf.is_empty()); - *buf = string; - Poll::Ready(Ok(ret)) - } - Err(e) => { - *bytes = e.into_bytes(); - Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "stream did not contain valid UTF-8", - ))) - } - } + let io_res = ready!(read_to_end_internal(buf, reader, read, cx)); + let utf8_res = String::from_utf8(mem::replace(buf, Vec::new())); + + // At this point both buf and output are empty. The allocation is in utf8_res. + + debug_assert!(buf.is_empty()); + finish_string_read(io_res, utf8_res, *read, output, true) } impl Future for ReadToString<'_, A> @@ -65,17 +70,12 @@ where let Self { reader, buf, - bytes, - start_len, + output, + read, } = &mut *self; - let ret = read_to_string_internal(Pin::new(reader), cx, buf, bytes, *start_len); - if let Poll::Ready(Err(_)) = ret { - // Put back the original string. - bytes.truncate(*start_len); - **buf = String::from_utf8(mem::replace(bytes, Vec::new())) - .expect("original string no longer utf-8"); - } - ret + + // safety: The constructor of ReadToString called `prepare_buffer`. + unsafe { read_to_string_internal(Pin::new(*reader), output, buf, read, cx) } } }