Skip to content

Commit

Permalink
Auto merge of rust-lang#117925 - kornelski:read-to-oom, r=Amanieu
Browse files Browse the repository at this point in the history
Handle out of memory errors in io:Read::read_to_end()

rust-lang#116570 got stuck due to a [procedural confusion](rust-lang#116570 (comment)). Retrying so that it can get FCP with the proper team now. cc `@joshtriplett` `@BurntSushi`

----

I'd like to propose handling of out-of-memory errors in the default implementation of `io::Read::read_to_end()` and `fs::read()`. These methods create/grow a `Vec` with a size that is external to the program, and could be arbitrarily large.

Due to being I/O methods, they can already fail in a variety of ways, in theory even including `ENOMEM` from the OS too, so another failure case should not surprise anyone.

While this may not help much Linux with overcommit, it's useful for other platforms like WASM. [Internals thread](https://internals.rust-lang.org/t/io-read-read-to-end-should-handle-oom/19662).

I've added documentation that makes it explicit that the OOM handling is a nice-to-have, and not a guarantee of the trait.

I haven't changed the implementation of `impl Read for &[u8]` and `VecDeque` out of caution, because in these cases users could assume `read` can't fail.

This code uses `try_reserve()` + `extend_from_slice()` which is optimized since rust-lang#117503.
  • Loading branch information
bors committed Jan 30, 2024
2 parents 5518eaa + 60f4628 commit 5c9c3c7
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 7 deletions.
10 changes: 6 additions & 4 deletions library/std/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,8 @@ pub fn read<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
fn inner(path: &Path) -> io::Result<Vec<u8>> {
let mut file = File::open(path)?;
let size = file.metadata().map(|m| m.len() as usize).ok();
let mut bytes = Vec::with_capacity(size.unwrap_or(0));
let mut bytes = Vec::new();
bytes.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
io::default_read_to_end(&mut file, &mut bytes, size)?;
Ok(bytes)
}
Expand Down Expand Up @@ -302,7 +303,8 @@ pub fn read_to_string<P: AsRef<Path>>(path: P) -> io::Result<String> {
fn inner(path: &Path) -> io::Result<String> {
let mut file = File::open(path)?;
let size = file.metadata().map(|m| m.len() as usize).ok();
let mut string = String::with_capacity(size.unwrap_or(0));
let mut string = String::new();
string.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
io::default_read_to_string(&mut file, &mut string, size)?;
Ok(string)
}
Expand Down Expand Up @@ -774,14 +776,14 @@ impl Read for &File {
// Reserves space in the buffer based on the file size when available.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
buf.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
io::default_read_to_end(self, buf, size)
}

// Reserves space in the buffer based on the file size when available.
fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
let size = buffer_capacity_required(self);
buf.reserve(size.unwrap_or(0));
buf.try_reserve_exact(size.unwrap_or(0)).map_err(|_| io::ErrorKind::OutOfMemory)?;
io::default_read_to_string(self, buf, size)
}
}
Expand Down
1 change: 1 addition & 0 deletions library/std/src/io/buffered/bufreader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ impl<R: ?Sized + Read> Read for BufReader<R> {
// delegate to the inner implementation.
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let inner_buf = self.buffer();
buf.try_reserve(inner_buf.len()).map_err(|_| io::ErrorKind::OutOfMemory)?;
buf.extend_from_slice(inner_buf);
let nread = inner_buf.len();
self.discard_buffer();
Expand Down
5 changes: 3 additions & 2 deletions library/std/src/io/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,9 @@ impl Read for &[u8] {

#[inline]
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
buf.extend_from_slice(*self);
let len = self.len();
buf.try_reserve(len).map_err(|_| ErrorKind::OutOfMemory)?;
buf.extend_from_slice(*self);
*self = &self[len..];
Ok(len)
}
Expand Down Expand Up @@ -451,7 +452,7 @@ impl<A: Allocator> Read for VecDeque<u8, A> {
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
// The total len is known upfront so we can reserve it in a single call.
let len = self.len();
buf.reserve(len);
buf.try_reserve(len).map_err(|_| ErrorKind::OutOfMemory)?;

let (front, back) = self.as_slices();
buf.extend_from_slice(front);
Expand Down
38 changes: 37 additions & 1 deletion library/std/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,8 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
loop {
match r.read(&mut probe) {
Ok(n) => {
// there is no way to recover from allocation failure here
// because the data has already been read.
buf.extend_from_slice(&probe[..n]);
return Ok(n);
}
Expand Down Expand Up @@ -462,7 +464,8 @@ pub(crate) fn default_read_to_end<R: Read + ?Sized>(
}

if buf.len() == buf.capacity() {
buf.reserve(PROBE_SIZE); // buf is full, need more space
// buf is full, need more space
buf.try_reserve(PROBE_SIZE).map_err(|_| ErrorKind::OutOfMemory)?;
}

let mut spare = buf.spare_capacity_mut();
Expand Down Expand Up @@ -815,6 +818,39 @@ pub trait Read {
/// file.)
///
/// [`std::fs::read`]: crate::fs::read
///
/// ## Implementing `read_to_end`
///
/// When implementing the `io::Read` trait, it is recommended to allocate
/// memory using [`Vec::try_reserve`]. However, this behavior is not guaranteed
/// by all implementations, and `read_to_end` may not handle out-of-memory
/// situations gracefully.
///
/// ```no_run
/// # use std::io::{self, BufRead};
/// # struct Example { example_datasource: io::Empty } impl Example {
/// # fn get_some_data_for_the_example(&self) -> &'static [u8] { &[] }
/// fn read_to_end(&mut self, dest_vec: &mut Vec<u8>) -> io::Result<usize> {
/// let initial_vec_len = dest_vec.len();
/// loop {
/// let src_buf = self.example_datasource.fill_buf()?;
/// if src_buf.is_empty() {
/// break;
/// }
/// dest_vec.try_reserve(src_buf.len()).map_err(|_| io::ErrorKind::OutOfMemory)?;
/// dest_vec.extend_from_slice(src_buf);
///
/// // Any irreversible side effects should happen after `try_reserve` succeeds,
/// // to avoid losing data on allocation error.
/// let read = src_buf.len();
/// self.example_datasource.consume(read);
/// }
/// Ok(dest_vec.len() - initial_vec_len)
/// }
/// # }
/// ```
///
/// [`Vec::try_reserve`]: crate::vec::Vec::try_reserve
#[stable(feature = "rust1", since = "1.0.0")]
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
default_read_to_end(self, buf, None)
Expand Down

0 comments on commit 5c9c3c7

Please sign in to comment.