From 40577177b2bc6df7a63d8762b6f74386502f1b63 Mon Sep 17 00:00:00 2001 From: Ulrich Hornung Date: Sat, 16 Mar 2024 17:10:49 +0100 Subject: [PATCH] re-use existing fd for stdout even if its a seek-able file this is important as the fd holds the file offset we need to use --- src/uu/dd/Cargo.toml | 4 +- src/uu/dd/src/dd.rs | 59 ++++++++++++++++++--- src/uucore/src/lib/lib.rs | 1 + src/uucore/src/lib/mods.rs | 1 + src/uucore/src/lib/mods/io.rs | 98 +++++++++++++++++++++++++++++++++++ tests/by-util/test_dd.rs | 50 +++++++++++++++++- 6 files changed, 202 insertions(+), 11 deletions(-) create mode 100644 src/uucore/src/lib/mods/io.rs diff --git a/src/uu/dd/Cargo.toml b/src/uu/dd/Cargo.toml index 1dbb37bde55..be51898fe3e 100644 --- a/src/uu/dd/Cargo.toml +++ b/src/uu/dd/Cargo.toml @@ -20,11 +20,9 @@ gcd = { workspace = true } libc = { workspace = true } uucore = { workspace = true, features = ["format", "quoting-style"] } -[target.'cfg(any(target_os = "linux"))'.dependencies] -nix = { workspace = true, features = ["fs"] } - [target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies] signal-hook = { workspace = true } +nix = { workspace = true, features = ["fs"] } [[bin]] name = "dd" diff --git a/src/uu/dd/src/dd.rs b/src/uu/dd/src/dd.rs index 07a754deb51..6c771d35226 100644 --- a/src/uu/dd/src/dd.rs +++ b/src/uu/dd/src/dd.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fname, ftype, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized fadvise Fadvise FADV DONTNEED ESPIPE bufferedoutput +// spell-checker:ignore fname, ftype, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, behaviour, bmax, bremain, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rremain, rsofar, rstat, sigusr, wlen, wstat seekable oconv canonicalized fadvise Fadvise FADV DONTNEED ESPIPE bufferedoutput, SETFL mod blocks; mod bufferedoutput; @@ -16,8 +16,13 @@ mod progress; use crate::bufferedoutput::BufferedOutput; use blocks::conv_block_unblock_helper; use datastructures::*; +#[cfg(any(target_os = "linux", target_os = "android"))] +use nix::fcntl::FcntlArg::F_SETFL; +#[cfg(any(target_os = "linux", target_os = "android"))] +use nix::fcntl::OFlag; use parseargs::Parser; use progress::{gen_prog_updater, ProgUpdate, ReadStat, StatusLevel, WriteStat}; +use uucore::io::OwnedFileDescriptorOrHandle; use std::cmp; use std::env; @@ -31,6 +36,8 @@ use std::os::unix::{ fs::FileTypeExt, io::{AsRawFd, FromRawFd}, }; +#[cfg(windows)] +use std::os::windows::{fs::MetadataExt, io::AsHandle}; use std::path::Path; use std::sync::{ atomic::{AtomicBool, Ordering::Relaxed}, @@ -227,7 +234,7 @@ impl Source { Err(e) => Err(e), } } - Self::File(f) => f.seek(io::SeekFrom::Start(n)), + Self::File(f) => f.seek(io::SeekFrom::Current(n.try_into().unwrap())), #[cfg(unix)] Self::Fifo(f) => io::copy(&mut f.take(n), &mut io::sink()), } @@ -283,7 +290,24 @@ impl<'a> Input<'a> { /// Instantiate this struct with stdin as a source. fn new_stdin(settings: &'a Settings) -> UResult { #[cfg(not(unix))] - let mut src = Source::Stdin(io::stdin()); + let mut src = { + let f = File::from(io::stdin().as_handle().try_clone_to_owned()?); + let is_file = if let Ok(metadata) = f.metadata() { + // this hack is needed as there is no other way on windows + // to differentiate between the case where `seek` works + // on a file handle or not. i.e. when the handle is no real + // file but a pipe, `seek` is still successful, but following + // `read`s are not affected by the seek. + metadata.creation_time() != 0 + } else { + false + }; + if is_file { + Source::File(f) + } else { + Source::Stdin(io::stdin()) + } + }; #[cfg(unix)] let mut src = Source::stdin_as_file(); if settings.skip > 0 { @@ -557,7 +581,7 @@ impl Dest { return Ok(len); } } - f.seek(io::SeekFrom::Start(n)) + f.seek(io::SeekFrom::Current(n.try_into().unwrap())) } #[cfg(unix)] Self::Fifo(f) => { @@ -699,6 +723,11 @@ impl<'a> Output<'a> { if !settings.oconv.notrunc { dst.set_len(settings.seek).ok(); } + + Self::prepare_file(dst, settings) + } + + fn prepare_file(dst: File, settings: &'a Settings) -> UResult { let density = if settings.oconv.sparse { Density::Sparse } else { @@ -710,6 +739,24 @@ impl<'a> Output<'a> { Ok(Self { dst, settings }) } + /// Instantiate this struct with file descriptor as a destination. + /// + /// This is useful e.g. for the case when the file descriptor was + /// already opened by the system (stdout) and has a state + /// (current position) that shall be used. + fn new_file_from_stdout(settings: &'a Settings) -> UResult { + let fx = OwnedFileDescriptorOrHandle::from(io::stdout())?; + #[cfg(any(target_os = "linux", target_os = "android"))] + if let Some(libc_flags) = make_linux_oflags(&settings.oflags) { + nix::fcntl::fcntl( + fx.as_raw().as_raw_fd(), + F_SETFL(OFlag::from_bits_retain(libc_flags)), + )?; + } + + Self::prepare_file(fx.into_file(), settings) + } + /// Instantiate this struct with the given named pipe as a destination. #[cfg(unix)] fn new_fifo(filename: &Path, settings: &'a Settings) -> UResult { @@ -1287,9 +1334,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { #[cfg(unix)] Some(ref outfile) if is_fifo(outfile) => Output::new_fifo(Path::new(&outfile), &settings)?, Some(ref outfile) => Output::new_file(Path::new(&outfile), &settings)?, - None if is_stdout_redirected_to_seekable_file() => { - Output::new_file(Path::new(&stdout_canonicalized()), &settings)? - } + None if is_stdout_redirected_to_seekable_file() => Output::new_file_from_stdout(&settings)?, None => Output::new_stdout(&settings)?, }; dd_copy(i, o).map_err_context(|| "IO error".to_string()) diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 9557dcc7695..e891cc40410 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -22,6 +22,7 @@ pub use uucore_procs::*; // * cross-platform modules pub use crate::mods::display; pub use crate::mods::error; +pub use crate::mods::io; pub use crate::mods::line_ending; pub use crate::mods::os; pub use crate::mods::panic; diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs index 40b5046f27e..29508e31a89 100644 --- a/src/uucore/src/lib/mods.rs +++ b/src/uucore/src/lib/mods.rs @@ -6,6 +6,7 @@ pub mod display; pub mod error; +pub mod io; pub mod line_ending; pub mod os; pub mod panic; diff --git a/src/uucore/src/lib/mods/io.rs b/src/uucore/src/lib/mods/io.rs new file mode 100644 index 00000000000..0d2240e9b1c --- /dev/null +++ b/src/uucore/src/lib/mods/io.rs @@ -0,0 +1,98 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +/// Encapsulates differences between OSs regarding the access to +/// file handles / descriptors. +/// This is useful when dealing with lower level stdin/stdout access. +/// +/// In detail: +/// On unix like OSs, file _descriptors_ are used in this context. +/// On windows OSs, file _handles_ are used. +/// +/// Even though they are distinct classes, they share common functionality. +/// Access to this common functionality is provided in `OwnedFileDescriptorOrHandle`. + +#[cfg(not(windows))] +use std::os::fd::{AsFd, OwnedFd}; +#[cfg(windows)] +use std::os::windows::io::{AsHandle, OwnedHandle}; +use std::{ + fs::{File, OpenOptions}, + io, + path::Path, + process::Stdio, +}; + +#[cfg(windows)] +type NativeType = OwnedHandle; +#[cfg(not(windows))] +type NativeType = OwnedFd; + +/// abstraction wrapper for native file handle / file descriptor +pub struct OwnedFileDescriptorOrHandle { + fx: NativeType, +} + +impl OwnedFileDescriptorOrHandle { + /// create from underlying native type + pub fn new(x: NativeType) -> Self { + Self { fx: x } + } + + /// create by opening a file + pub fn open_file(options: &OpenOptions, path: &Path) -> io::Result { + let f = options.open(path)?; + Self::from(f) + } + + /// conversion from borrowed native type + /// + /// e.g. `std::io::stdout()`, `std::fs::File`, ... + #[cfg(windows)] + pub fn from(t: T) -> io::Result { + Ok(Self { + fx: t.as_handle().try_clone_to_owned()?, + }) + } + + /// conversion from borrowed native type + /// + /// e.g. `std::io::stdout()`, `std::fs::File`, ... + #[cfg(not(windows))] + pub fn from(t: T) -> io::Result { + Ok(Self { + fx: t.as_fd().try_clone_to_owned()?, + }) + } + + /// instantiates a corresponding `File` + pub fn into_file(self) -> File { + File::from(self.fx) + } + + /// instantiates a corresponding `Stdio` + pub fn into_stdio(self) -> Stdio { + Stdio::from(self.fx) + } + + /// clones self. useful when needing another + /// owned reference to same file + pub fn try_clone(&self) -> io::Result { + self.fx.try_clone().map(Self::new) + } + + /// provides native type to be used with + /// OS specific functions without abstraction + pub fn as_raw(&self) -> &NativeType { + &self.fx + } +} + +/// instantiates a corresponding `Stdio` +impl From for Stdio { + fn from(value: OwnedFileDescriptorOrHandle) -> Self { + value.into_stdio() + } +} diff --git a/tests/by-util/test_dd.rs b/tests/by-util/test_dd.rs index 401a5c5ef70..c909527c0a7 100644 --- a/tests/by-util/test_dd.rs +++ b/tests/by-util/test_dd.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, availible, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat abcdefghijklm abcdefghi nabcde nabcdefg abcdefg fifoname +// spell-checker:ignore fname, tname, fpath, specfile, testfile, unspec, ifile, ofile, outfile, fullblock, urand, fileio, atoe, atoibm, availible, behaviour, bmax, bremain, btotal, cflags, creat, ctable, ctty, datastructures, doesnt, etoa, fileout, fname, gnudd, iconvflags, iseek, nocache, noctty, noerror, nofollow, nolinks, nonblock, oconvflags, oseek, outfile, parseargs, rlen, rmax, rposition, rremain, rsofar, rstat, sigusr, sigval, wlen, wstat abcdefghijklm abcdefghi nabcde nabcdefg abcdefg fifoname seekable #[cfg(unix)] use crate::common::util::run_ucmd_as_root_with_stdin_stdout; @@ -11,6 +11,7 @@ use crate::common::util::TestScenario; use crate::common::util::{UCommand, TESTS_BINARY}; use regex::Regex; +use uucore::io::OwnedFileDescriptorOrHandle; use std::fs::{File, OpenOptions}; use std::io::{BufReader, Read, Write}; @@ -1713,3 +1714,50 @@ fn test_reading_partial_blocks_from_fifo_unbuffered() { let expected = b"0+2 records in\n0+2 records out\n4 bytes copied"; assert!(output.stderr.starts_with(expected)); } + +#[test] +fn test_stdin_stdout_not_rewound_even_when_connected_to_seekable_file() { + use std::process::Stdio; + + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.write("in", "abcde"); + + let stdin = OwnedFileDescriptorOrHandle::open_file( + OpenOptions::new().read(true), + at.plus("in").as_path(), + ) + .unwrap(); + let stdout = OwnedFileDescriptorOrHandle::open_file( + OpenOptions::new().create(true).write(true), + at.plus("out").as_path(), + ) + .unwrap(); + let stderr = OwnedFileDescriptorOrHandle::open_file( + OpenOptions::new().create(true).write(true), + at.plus("err").as_path(), + ) + .unwrap(); + + ts.ucmd() + .args(&["bs=1", "skip=1", "count=1"]) + .set_stdin(Stdio::from(stdin.try_clone().unwrap())) + .set_stdout(Stdio::from(stdout.try_clone().unwrap())) + .set_stderr(Stdio::from(stderr.try_clone().unwrap())) + .succeeds(); + + ts.ucmd() + .args(&["bs=1", "skip=1"]) + .set_stdin(stdin) + .set_stdout(stdout) + .set_stderr(stderr) + .succeeds(); + + let err_file_content = std::fs::read_to_string(at.plus_as_string("err")).unwrap(); + println!("stderr:\n{}", err_file_content); + + let out_file_content = std::fs::read_to_string(at.plus_as_string("out")).unwrap(); + println!("stdout:\n{}", out_file_content); + assert_eq!(out_file_content, "bde"); +}