diff --git a/Cargo.lock b/Cargo.lock index 705593af7e6..e5e479b4867 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3325,6 +3325,7 @@ dependencies = [ "md-5", "memchr", "nix", + "number_prefix", "once_cell", "os_display", "sha1", diff --git a/src/uu/du/Cargo.toml b/src/uu/du/Cargo.toml index bb9a2a97b02..bc53986b3c5 100644 --- a/src/uu/du/Cargo.toml +++ b/src/uu/du/Cargo.toml @@ -19,7 +19,7 @@ chrono = { workspace = true } # For the --exclude & --exclude-from options glob = { workspace = true } clap = { workspace = true } -uucore = { workspace = true } +uucore = { workspace = true, features = ["format"] } [target.'cfg(target_os = "windows")'.dependencies] windows-sys = { workspace = true, features = [ diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 1935248dafb..74fa4154db5 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -75,9 +75,6 @@ const ABOUT: &str = help_about!("du.md"); const AFTER_HELP: &str = help_section!("after help", "du.md"); const USAGE: &str = help_usage!("du.md"); -// TODO: Support Z & Y (currently limited by size of u64) -const UNITS: [(char, u32); 6] = [('E', 6), ('P', 5), ('T', 4), ('G', 3), ('M', 2), ('K', 1)]; - struct TraversalOptions { all: bool, separate_dirs: bool, @@ -117,7 +114,8 @@ enum Time { #[derive(Clone)] enum SizeFormat { - Human(u64), + HumanDecimal, + HumanBinary, BlockSize(u64), } @@ -549,18 +547,14 @@ impl StatPrinter { return size.to_string(); } match self.size_format { - SizeFormat::Human(multiplier) => { - if size == 0 { - return "0".to_string(); - } - for &(unit, power) in &UNITS { - let limit = multiplier.pow(power); - if size >= limit { - return format!("{:.1}{}", (size as f64) / (limit as f64), unit); - } - } - format!("{size}B") - } + SizeFormat::HumanDecimal => uucore::format::human::human_readable( + size, + uucore::format::human::SizeFormat::Decimal, + ), + SizeFormat::HumanBinary => uucore::format::human::human_readable( + size, + uucore::format::human::SizeFormat::Binary, + ), SizeFormat::BlockSize(block_size) => div_ceil(size, block_size).to_string(), } } @@ -688,9 +682,9 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { }); let size_format = if matches.get_flag(options::HUMAN_READABLE) { - SizeFormat::Human(1024) + SizeFormat::HumanBinary } else if matches.get_flag(options::SI) { - SizeFormat::Human(1000) + SizeFormat::HumanDecimal } else if matches.get_flag(options::BYTES) { SizeFormat::BlockSize(1) } else if matches.get_flag(options::BLOCK_SIZE_1K) { diff --git a/src/uu/ls/Cargo.toml b/src/uu/ls/Cargo.toml index 1dae3f033e9..cfd85cf9d72 100644 --- a/src/uu/ls/Cargo.toml +++ b/src/uu/ls/Cargo.toml @@ -26,6 +26,7 @@ lscolors = { workspace = true } uucore = { workspace = true, features = [ "colors", "entries", + "format", "fs", "fsxattr", "quoting-style", diff --git a/src/uu/ls/src/ls.rs b/src/uu/ls/src/ls.rs index 0221ae09681..829ddb45638 100644 --- a/src/uu/ls/src/ls.rs +++ b/src/uu/ls/src/ls.rs @@ -12,7 +12,6 @@ use clap::{ use glob::{MatchOptions, Pattern}; use lscolors::{LsColors, Style}; -use number_prefix::NumberPrefix; use std::{cell::OnceCell, num::IntErrorKind}; use std::{collections::HashSet, io::IsTerminal}; @@ -37,6 +36,7 @@ use std::{ use term_grid::{Cell, Direction, Filling, Grid, GridOptions}; use unicode_width::UnicodeWidthStr; use uucore::error::USimpleError; +use uucore::format::human::{human_readable, SizeFormat}; #[cfg(all(unix, not(any(target_os = "android", target_os = "macos"))))] use uucore::fsxattr::has_acl; #[cfg(any( @@ -313,13 +313,6 @@ enum Sort { Width, } -#[derive(PartialEq)] -enum SizeFormat { - Bytes, - Binary, // Powers of 1024, --human-readable, -h - Decimal, // Powers of 1000, --si -} - #[derive(PartialEq, Eq)] enum Files { All, @@ -3038,30 +3031,6 @@ fn display_date(metadata: &Metadata, config: &Config) -> String { } } -// There are a few peculiarities to how GNU formats the sizes: -// 1. One decimal place is given if and only if the size is smaller than 10 -// 2. It rounds sizes up. -// 3. The human-readable format uses powers for 1024, but does not display the "i" -// that is commonly used to denote Kibi, Mebi, etc. -// 4. Kibi and Kilo are denoted differently ("k" and "K", respectively) -fn format_prefixed(prefixed: &NumberPrefix) -> String { - match prefixed { - NumberPrefix::Standalone(bytes) => bytes.to_string(), - NumberPrefix::Prefixed(prefix, bytes) => { - // Remove the "i" from "Ki", "Mi", etc. if present - let prefix_str = prefix.symbol().trim_end_matches('i'); - - // Check whether we get more than 10 if we round up to the first decimal - // because we want do display 9.81 as "9.9", not as "10". - if (10.0 * bytes).ceil() >= 100.0 { - format!("{:.0}{}", bytes.ceil(), prefix_str) - } else { - format!("{:.1}{}", (10.0 * bytes).ceil() / 10.0, prefix_str) - } - } - } -} - #[allow(dead_code)] enum SizeOrDeviceId { Size(String), @@ -3104,13 +3073,7 @@ fn display_len_or_rdev(metadata: &Metadata, config: &Config) -> SizeOrDeviceId { } fn display_size(size: u64, config: &Config) -> String { - // NOTE: The human-readable behavior deviates from the GNU ls. - // The GNU ls uses binary prefixes by default. - match config.size_format { - SizeFormat::Binary => format_prefixed(&NumberPrefix::binary(size as f64)), - SizeFormat::Decimal => format_prefixed(&NumberPrefix::decimal(size as f64)), - SizeFormat::Bytes => size.to_string(), - } + human_readable(size, config.size_format) } #[cfg(unix)] diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 635ee140397..b13903bc6af 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -20,6 +20,7 @@ path = "src/lib/lib.rs" [dependencies] clap = { workspace = true } uucore_procs = { workspace = true } +number_prefix = { workspace = true } dns-lookup = { version = "2.0.4", optional = true } dunce = { version = "1.0.4", optional = true } wild = "2.2" diff --git a/src/uucore/src/lib/features/format/human.rs b/src/uucore/src/lib/features/format/human.rs new file mode 100644 index 00000000000..28d143a42e3 --- /dev/null +++ b/src/uucore/src/lib/features/format/human.rs @@ -0,0 +1,65 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +// spell-checker:ignore gnulibs sfmt + +//! `human`-size formatting +//! +//! Format sizes like gnulibs human_readable() would + +use number_prefix::NumberPrefix; + +#[derive(Copy, Clone, PartialEq)] +pub enum SizeFormat { + Bytes, + Binary, // Powers of 1024, --human-readable, -h + Decimal, // Powers of 1000, --si +} + +// There are a few peculiarities to how GNU formats the sizes: +// 1. One decimal place is given if and only if the size is smaller than 10 +// 2. It rounds sizes up. +// 3. The human-readable format uses powers for 1024, but does not display the "i" +// that is commonly used to denote Kibi, Mebi, etc. +// 4. Kibi and Kilo are denoted differently ("k" and "K", respectively) +fn format_prefixed(prefixed: &NumberPrefix) -> String { + match prefixed { + NumberPrefix::Standalone(bytes) => bytes.to_string(), + NumberPrefix::Prefixed(prefix, bytes) => { + // Remove the "i" from "Ki", "Mi", etc. if present + let prefix_str = prefix.symbol().trim_end_matches('i'); + + // Check whether we get more than 10 if we round up to the first decimal + // because we want do display 9.81 as "9.9", not as "10". + if (10.0 * bytes).ceil() >= 100.0 { + format!("{:.0}{}", bytes.ceil(), prefix_str) + } else { + format!("{:.1}{}", (10.0 * bytes).ceil() / 10.0, prefix_str) + } + } + } +} + +pub fn human_readable(size: u64, sfmt: SizeFormat) -> String { + match sfmt { + SizeFormat::Binary => format_prefixed(&NumberPrefix::binary(size as f64)), + SizeFormat::Decimal => format_prefixed(&NumberPrefix::decimal(size as f64)), + SizeFormat::Bytes => size.to_string(), + } +} + +#[cfg(test)] +#[test] +fn test_human_readable() { + let test_cases = [ + (133456345, SizeFormat::Binary, "128M"), + (12 * 1024 * 1024, SizeFormat::Binary, "12M"), + (8500, SizeFormat::Binary, "8.4K"), + ]; + + for &(size, sfmt, expected_str) in &test_cases { + assert_eq!(human_readable(size, sfmt), expected_str); + } +} diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 8f662080dcb..b82b5f62acf 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -32,6 +32,7 @@ mod argument; mod escape; +pub mod human; pub mod num_format; pub mod num_parser; mod spec; diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 2bc694acbc0..a1c7bfb9374 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (paths) atim sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty tsublink testfile1 testfile2 filelist testdir testfile +// spell-checker:ignore (paths) atim sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty tsublink testfile1 testfile2 filelist fpath testdir testfile #[cfg(not(windows))] use regex::Regex; @@ -543,6 +543,34 @@ fn test_du_h_flag_empty_file() { .stdout_only("0\tempty.txt\n"); } +#[test] +fn test_du_h_precision() { + let test_cases = [ + (133456345, "128M"), + (12 * 1024 * 1024, "12M"), + (8500, "8.4K"), + ]; + + for &(test_len, expected_output) in &test_cases { + let (at, mut ucmd) = at_and_ucmd!(); + + let fpath = at.plus("test.txt"); + std::fs::File::create(&fpath) + .expect("cannot create test file") + .set_len(test_len) + .expect("cannot truncate test len to size"); + ucmd.arg("-h") + .arg("--apparent-size") + .arg(&fpath) + .succeeds() + .stdout_only(format!( + "{}\t{}\n", + expected_output, + &fpath.to_string_lossy() + )); + } +} + #[cfg(feature = "touch")] #[test] fn test_du_time() {