From 31fd98ca723b93cc6101a3e29843ea5cf094e159 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 12 Oct 2023 08:13:24 -0700 Subject: [PATCH] Merge pull request from GHSA-c827-hfw6-qwvm * Fix `rustix::fs::Dir` to avoid unbounded buffer growth. Fix `Dir`'s buffer size computation to avoid resizing past a fixed upper limit. This prevents it from growing without bound, such as in the case of `Dir::rewind` for repeated iterations with the same `Dir`. * Don't let `Dir` continue to try to iterate after a failure. * Handle `io::Errno::INTR` gracefully. * Write a more detailed comment on the buffer growth policy. * Also mention that no buffer can ever be big enough for everything. * Add tests against over-allocation & stuck iterator * Rm `dir_iterator_does_not_overallocate` unit test in favour of docs * Extend `test_dir` to cover `rewind`. * Consistently handle directory removal as ending the stream. libc implementations of directory iteration handle directory removal by just ending the stream. In the linux_raw backend, this looks like `ENOENT` from `getdents64`, so change the code to check for `ENOENT` and end the stream. This requires changing the `dir_iterator_does_not_get_stuck_on_io_error` test to no longer expect a failure, so it's now renamed to `dir_iterator_handles_dir_removal`. To test the error case, add a new `dir_iterator_handles_io_errors` test which uses `dup2` to induce an error, in both the linux_raw and libc backends. This exposes the fact that the libc `Dir` implementation was also assuming that users would stop iterating after hitting a failure, so add a `any_errors` flag to the libc backend as well. * Add a test for removing the directory after doing `read_from`. * In the libc backend, handle `ENOENT` when opening ".". --------- Co-authored-by: cyqsimon <28627918+cyqsimon@users.noreply.github.com> --- src/backend/libc/fs/dir.rs | 86 ++++++++++++++++++++++++----- src/backend/linux_raw/fs/dir.rs | 95 ++++++++++++++++++++++++++++----- tests/fs/dir.rs | 69 +++++++++++++++++++++++- 3 files changed, 224 insertions(+), 26 deletions(-) diff --git a/src/backend/libc/fs/dir.rs b/src/backend/libc/fs/dir.rs index 91b06e17f..c69e68ca2 100644 --- a/src/backend/libc/fs/dir.rs +++ b/src/backend/libc/fs/dir.rs @@ -30,8 +30,13 @@ use core::ptr::NonNull; use libc_errno::{errno, set_errno, Errno}; /// `DIR*` -#[repr(transparent)] -pub struct Dir(NonNull); +pub struct Dir { + /// The `libc` `DIR` pointer. + libc_dir: NonNull, + + /// Have we seen any errors in this iteration? + any_errors: bool, +} impl Dir { /// Construct a `Dir` that reads entries from the given directory @@ -43,20 +48,35 @@ impl Dir { #[inline] fn _read_from(fd: BorrowedFd<'_>) -> io::Result { + let mut any_errors = false; + // Given an arbitrary `OwnedFd`, it's impossible to know whether the // user holds a `dup`'d copy which could continue to modify the // file description state, which would cause Undefined Behavior after // our call to `fdopendir`. To prevent this, we obtain an independent // `OwnedFd`. let flags = fcntl_getfl(fd)?; - let fd_for_dir = openat(fd, cstr!("."), flags | OFlags::CLOEXEC, Mode::empty())?; + let fd_for_dir = match openat(fd, cstr!("."), flags | OFlags::CLOEXEC, Mode::empty()) { + Ok(fd) => fd, + Err(io::Errno::NOENT) => { + // If "." doesn't exist, it means the directory was removed. + // We treat that as iterating through a directory with no + // entries. + any_errors = true; + crate::io::dup(fd)? + } + Err(err) => return Err(err), + }; let raw = owned_fd(fd_for_dir); unsafe { let libc_dir = c::fdopendir(raw); if let Some(libc_dir) = NonNull::new(libc_dir) { - Ok(Self(libc_dir)) + Ok(Self { + libc_dir, + any_errors, + }) } else { let err = io::Errno::last_os_error(); let _ = c::close(raw); @@ -68,13 +88,19 @@ impl Dir { /// `rewinddir(self)` #[inline] pub fn rewind(&mut self) { - unsafe { c::rewinddir(self.0.as_ptr()) } + self.any_errors = false; + unsafe { c::rewinddir(self.libc_dir.as_ptr()) } } /// `readdir(self)`, where `None` means the end of the directory. pub fn read(&mut self) -> Option> { + // If we've seen errors, don't continue to try to read anyting further. + if self.any_errors { + return None; + } + set_errno(Errno(0)); - let dirent_ptr = unsafe { libc_readdir(self.0.as_ptr()) }; + let dirent_ptr = unsafe { libc_readdir(self.libc_dir.as_ptr()) }; if dirent_ptr.is_null() { let curr_errno = errno().0; if curr_errno == 0 { @@ -82,6 +108,7 @@ impl Dir { None } else { // `errno` is unknown or non-zero, so an error occurred. + self.any_errors = true; Some(Err(io::Errno(curr_errno))) } } else { @@ -120,7 +147,7 @@ impl Dir { /// `fstat(self)` #[inline] pub fn stat(&self) -> io::Result { - fstat(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.0.as_ptr())) }) + fstat(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.libc_dir.as_ptr())) }) } /// `fstatfs(self)` @@ -134,14 +161,14 @@ impl Dir { )))] #[inline] pub fn statfs(&self) -> io::Result { - fstatfs(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.0.as_ptr())) }) + fstatfs(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.libc_dir.as_ptr())) }) } /// `fstatvfs(self)` #[cfg(not(any(solarish, target_os = "haiku", target_os = "redox", target_os = "wasi")))] #[inline] pub fn statvfs(&self) -> io::Result { - fstatvfs(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.0.as_ptr())) }) + fstatvfs(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.libc_dir.as_ptr())) }) } /// `fchdir(self)` @@ -150,7 +177,7 @@ impl Dir { #[cfg_attr(doc_cfg, doc(cfg(feature = "process")))] #[inline] pub fn chdir(&self) -> io::Result<()> { - fchdir(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.0.as_ptr())) }) + fchdir(unsafe { BorrowedFd::borrow_raw(c::dirfd(self.libc_dir.as_ptr())) }) } } @@ -163,7 +190,7 @@ unsafe impl Send for Dir {} impl Drop for Dir { #[inline] fn drop(&mut self) { - unsafe { c::closedir(self.0.as_ptr()) }; + unsafe { c::closedir(self.libc_dir.as_ptr()) }; } } @@ -179,7 +206,7 @@ impl Iterator for Dir { impl fmt::Debug for Dir { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Dir") - .field("fd", unsafe { &c::dirfd(self.0.as_ptr()) }) + .field("fd", unsafe { &c::dirfd(self.libc_dir.as_ptr()) }) .finish() } } @@ -293,3 +320,38 @@ fn check_dirent_layout(dirent: &c::dirent) { } ); } + +#[test] +fn dir_iterator_handles_io_errors() { + // create a dir, keep the FD, then delete the dir + let tmp = tempfile::tempdir().unwrap(); + let fd = crate::fs::openat( + crate::fs::CWD, + tmp.path(), + crate::fs::OFlags::RDONLY | crate::fs::OFlags::CLOEXEC, + crate::fs::Mode::empty(), + ) + .unwrap(); + + let file_fd = crate::fs::openat( + &fd, + tmp.path().join("test.txt"), + crate::fs::OFlags::WRONLY | crate::fs::OFlags::CREATE, + crate::fs::Mode::RWXU, + ) + .unwrap(); + + let mut dir = Dir::read_from(&fd).unwrap(); + + // Reach inside the `Dir` and replace its directory with a file, which + // will cause the subsequent `readdir` to fail. + unsafe { + let raw_fd = c::dirfd(dir.libc_dir.as_ptr()); + let mut owned_fd: crate::fd::OwnedFd = crate::fd::FromRawFd::from_raw_fd(raw_fd); + crate::io::dup2(&file_fd, &mut owned_fd).unwrap(); + core::mem::forget(owned_fd); + } + + assert!(matches!(dir.next(), Some(Err(_)))); + assert!(matches!(dir.next(), None)); +} diff --git a/src/backend/linux_raw/fs/dir.rs b/src/backend/linux_raw/fs/dir.rs index 4df589af5..ea1017957 100644 --- a/src/backend/linux_raw/fs/dir.rs +++ b/src/backend/linux_raw/fs/dir.rs @@ -18,9 +18,17 @@ pub struct Dir { /// The `OwnedFd` that we read directory entries from. fd: OwnedFd, + /// Have we seen any errors in this iteration? + any_errors: bool, + + /// Should we rewind the stream on the next iteration? + rewind: bool, + + /// The buffer for `linux_dirent64` entries. buf: Vec, + + /// Where we are in the buffer. pos: usize, - next: Option, } impl Dir { @@ -38,25 +46,39 @@ impl Dir { Ok(Self { fd: fd_for_dir, + any_errors: false, + rewind: false, buf: Vec::new(), pos: 0, - next: None, }) } /// `rewinddir(self)` #[inline] pub fn rewind(&mut self) { + self.any_errors = false; + self.rewind = true; self.pos = self.buf.len(); - self.next = Some(0); } /// `readdir(self)`, where `None` means the end of the directory. pub fn read(&mut self) -> Option> { - if let Some(next) = self.next.take() { - match crate::backend::fs::syscalls::_seek(self.fd.as_fd(), next as i64, SEEK_SET) { + // If we've seen errors, don't continue to try to read anyting further. + if self.any_errors { + return None; + } + + // If a rewind was requested, seek to the beginning. + if self.rewind { + self.rewind = false; + match io::retry_on_intr(|| { + crate::backend::fs::syscalls::_seek(self.fd.as_fd(), 0, SEEK_SET) + }) { Ok(_) => (), - Err(err) => return Some(Err(err)), + Err(err) => { + self.any_errors = true; + return Some(Err(err)); + } } } @@ -78,7 +100,7 @@ impl Dir { if self.buf.len() - self.pos < size_of::() { match self.read_more()? { Ok(()) => (), - Err(e) => return Some(Err(e)), + Err(err) => return Some(Err(err)), } } @@ -136,14 +158,31 @@ impl Dir { } fn read_more(&mut self) -> Option> { - let og_len = self.buf.len(); - // Capacity increment currently chosen by wild guess. - self.buf - .resize(self.buf.capacity() + 32 * size_of::(), 0); - let nread = match crate::backend::fs::syscalls::getdents(self.fd.as_fd(), &mut self.buf) { + // The first few times we're called, we allocate a relatively small + // buffer, because many directories are small. If we're called more, + // use progressively larger allocations, up to a fixed maximum. + // + // The specific sizes and policy here have not been tuned in detail yet + // and may need to be adjusted. In doing so, we should be careful to + // avoid unbounded buffer growth. This buffer only exists to share the + // cost of a `getdents` call over many entries, so if it gets too big, + // cache and heap usage will outweigh the benefit. And ultimately, + // directories can contain more entries than we can allocate contiguous + // memory for, so we'll always need to cap the size at some point. + if self.buf.len() < 1024 * size_of::() { + self.buf.reserve(32 * size_of::()); + } + self.buf.resize(self.buf.capacity(), 0); + let nread = match io::retry_on_intr(|| { + crate::backend::fs::syscalls::getdents(self.fd.as_fd(), &mut self.buf) + }) { Ok(nread) => nread, + Err(io::Errno::NOENT) => { + self.any_errors = true; + return None; + } Err(err) => { - self.buf.resize(og_len, 0); + self.any_errors = true; return Some(Err(err)); } }; @@ -225,3 +264,33 @@ impl DirEntry { self.d_ino } } + +#[test] +fn dir_iterator_handles_io_errors() { + // create a dir, keep the FD, then delete the dir + let tmp = tempfile::tempdir().unwrap(); + let fd = crate::fs::openat( + crate::fs::CWD, + tmp.path(), + crate::fs::OFlags::RDONLY | crate::fs::OFlags::CLOEXEC, + crate::fs::Mode::empty(), + ) + .unwrap(); + + let file_fd = crate::fs::openat( + &fd, + tmp.path().join("test.txt"), + crate::fs::OFlags::WRONLY | crate::fs::OFlags::CREATE, + crate::fs::Mode::RWXU, + ) + .unwrap(); + + let mut dir = Dir::read_from(&fd).unwrap(); + + // Reach inside the `Dir` and replace its directory with a file, which + // will cause the subsequent `getdents64` to fail. + crate::io::dup2(&file_fd, &mut dir.fd).unwrap(); + + assert!(matches!(dir.next(), Some(Err(_)))); + assert!(matches!(dir.next(), None)); +} diff --git a/tests/fs/dir.rs b/tests/fs/dir.rs index b72f42e75..9c89a90ba 100644 --- a/tests/fs/dir.rs +++ b/tests/fs/dir.rs @@ -8,7 +8,7 @@ fn test_dir() { ) .unwrap(); - let dir = rustix::fs::Dir::read_from(&t).unwrap(); + let mut dir = rustix::fs::Dir::read_from(&t).unwrap(); let _file = rustix::fs::openat( &t, @@ -18,6 +18,32 @@ fn test_dir() { ) .unwrap(); + // Read the directory entries. We use `while let Some(entry)` so that we + // don't consume the `Dir` so that we can run more tests on it. + let mut saw_dot = false; + let mut saw_dotdot = false; + let mut saw_cargo_toml = false; + while let Some(entry) = dir.read() { + let entry = entry.unwrap(); + if entry.file_name() == rustix::cstr!(".") { + saw_dot = true; + } else if entry.file_name() == rustix::cstr!("..") { + saw_dotdot = true; + } else if entry.file_name() == rustix::cstr!("Cargo.toml") { + saw_cargo_toml = true; + } + } + assert!(saw_dot); + assert!(saw_dotdot); + assert!(saw_cargo_toml); + + // Rewind the directory so we can iterate over the entries again. + dir.rewind(); + + // For what comes next, we don't need `mut` anymore. + let dir = dir; + + // Read the directory entries, again. This time we use `for entry in dir`. let mut saw_dot = false; let mut saw_dotdot = false; let mut saw_cargo_toml = false; @@ -35,3 +61,44 @@ fn test_dir() { assert!(saw_dotdot); assert!(saw_cargo_toml); } + +#[test] +fn dir_iterator_handles_dir_removal() { + // create a dir, keep the FD, then delete the dir + let tmp = tempfile::tempdir().unwrap(); + let fd = rustix::fs::openat( + rustix::fs::CWD, + tmp.path(), + rustix::fs::OFlags::RDONLY | rustix::fs::OFlags::CLOEXEC, + rustix::fs::Mode::empty(), + ) + .unwrap(); + + // Drop the `TempDir`, which deletes the directory. + drop(tmp); + + let mut dir = rustix::fs::Dir::read_from(&fd).unwrap(); + assert!(matches!(dir.next(), None)); +} + +// Like `dir_iterator_handles_dir_removal`, but close the directory after +// `Dir::read_from`. +#[test] +fn dir_iterator_handles_dir_removal_after_open() { + // create a dir, keep the FD, then delete the dir + let tmp = tempfile::tempdir().unwrap(); + let fd = rustix::fs::openat( + rustix::fs::CWD, + tmp.path(), + rustix::fs::OFlags::RDONLY | rustix::fs::OFlags::CLOEXEC, + rustix::fs::Mode::empty(), + ) + .unwrap(); + + let mut dir = rustix::fs::Dir::read_from(&fd).unwrap(); + + // Drop the `TempDir`, which deletes the directory. + drop(tmp); + + assert!(matches!(dir.next(), None)); +}