diff --git a/src/uucore/src/lib/features/format/argument.rs b/src/uucore/src/lib/features/format/argument.rs index db18cf51890..92d6c1603fb 100644 --- a/src/uucore/src/lib/features/format/argument.rs +++ b/src/uucore/src/lib/features/format/argument.rs @@ -3,9 +3,14 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use crate::{ + error::set_exit_code, + features::format::num_parser::{ParseError, ParsedNumber}, + quoting_style::{escape_name, Quotes, QuotingStyle}, + show_error, show_warning, +}; use os_display::Quotable; - -use crate::{error::set_exit_code, show_warning}; +use std::ffi::OsStr; /// An argument for formatting /// @@ -40,16 +45,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::Char(c) => *c, - FormatArgument::Unparsed(s) => { - let mut chars = s.chars(); - let Some(c) = chars.next() else { - return '\0'; - }; - let None = chars.next() else { - return '\0'; - }; - c - } + FormatArgument::Unparsed(s) => s.chars().next().unwrap_or('\0'), _ => '\0', } } @@ -60,25 +56,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::UnsignedInt(n) => *n, - FormatArgument::Unparsed(s) => { - let opt = if let Some(s) = s.strip_prefix("0x") { - u64::from_str_radix(s, 16).ok() - } else if let Some(s) = s.strip_prefix('0') { - u64::from_str_radix(s, 8).ok() - } else if let Some(s) = s.strip_prefix('\'') { - s.chars().next().map(|c| c as u64) - } else { - s.parse().ok() - }; - match opt { - Some(n) => n, - None => { - show_warning!("{}: expected a numeric value", s.quote()); - set_exit_code(1); - 0 - } - } - } + FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_u64(s), s), _ => 0, } } @@ -89,29 +67,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::SignedInt(n) => *n, - FormatArgument::Unparsed(s) => { - // For hex, we parse `u64` because we do not allow another - // minus sign. We might need to do more precise parsing here. - let opt = if let Some(s) = s.strip_prefix("-0x") { - u64::from_str_radix(s, 16).ok().map(|x| -(x as i64)) - } else if let Some(s) = s.strip_prefix("0x") { - u64::from_str_radix(s, 16).ok().map(|x| x as i64) - } else if s.starts_with("-0") || s.starts_with('0') { - i64::from_str_radix(s, 8).ok() - } else if let Some(s) = s.strip_prefix('\'') { - s.chars().next().map(|x| x as i64) - } else { - s.parse().ok() - }; - match opt { - Some(n) => n, - None => { - show_warning!("{}: expected a numeric value", s.quote()); - set_exit_code(1); - 0 - } - } - } + FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_i64(s), s), _ => 0, } } @@ -122,23 +78,7 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { }; match next { FormatArgument::Float(n) => *n, - FormatArgument::Unparsed(s) => { - let opt = if s.starts_with("0x") || s.starts_with("-0x") { - unimplemented!("Hexadecimal floats are unimplemented!") - } else if let Some(s) = s.strip_prefix('\'') { - s.chars().next().map(|x| x as u64 as f64) - } else { - s.parse().ok() - }; - match opt { - Some(n) => n, - None => { - show_warning!("{}: expected a numeric value", s.quote()); - set_exit_code(1); - 0.0 - } - } - } + FormatArgument::Unparsed(s) => extract_value(ParsedNumber::parse_f64(s), s), _ => 0.0, } } @@ -150,3 +90,39 @@ impl<'a, T: Iterator> ArgumentIter<'a> for T { } } } + +fn extract_value(p: Result>, input: &str) -> T { + match p { + Ok(v) => v, + Err(e) => { + set_exit_code(1); + let input = escape_name( + OsStr::new(input), + &QuotingStyle::C { + quotes: Quotes::None, + }, + ); + match e { + ParseError::Overflow => { + show_error!("{}: Numerical result out of range", input.quote()); + Default::default() + } + ParseError::NotNumeric => { + show_error!("{}: expected a numeric value", input.quote()); + Default::default() + } + ParseError::PartialMatch(v, rest) => { + if input.starts_with('\'') { + show_warning!( + "{}: character(s) following character constant have been ignored", + &rest, + ); + } else { + show_error!("{}: value not completely converted", input.quote()); + } + v + } + } + } + } +} diff --git a/src/uucore/src/lib/features/format/mod.rs b/src/uucore/src/lib/features/format/mod.rs index 4d30753d610..8f662080dcb 100644 --- a/src/uucore/src/lib/features/format/mod.rs +++ b/src/uucore/src/lib/features/format/mod.rs @@ -33,6 +33,7 @@ mod argument; mod escape; pub mod num_format; +pub mod num_parser; mod spec; pub use argument::*; diff --git a/src/uucore/src/lib/features/format/num_parser.rs b/src/uucore/src/lib/features/format/num_parser.rs new file mode 100644 index 00000000000..0d65651d7e9 --- /dev/null +++ b/src/uucore/src/lib/features/format/num_parser.rs @@ -0,0 +1,378 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! Utilities for parsing numbers in various formats + +// spell-checker:ignore powf copysign prec inity + +#[derive(Clone, Copy, PartialEq)] +pub enum Base { + Binary = 2, + Octal = 8, + Decimal = 10, + Hexadecimal = 16, +} + +impl Base { + pub fn digit(&self, c: char) -> Option { + fn from_decimal(c: char) -> u64 { + u64::from(c) - u64::from('0') + } + match self { + Self::Binary => ('0'..='1').contains(&c).then(|| from_decimal(c)), + Self::Octal => ('0'..='7').contains(&c).then(|| from_decimal(c)), + Self::Decimal => c.is_ascii_digit().then(|| from_decimal(c)), + Self::Hexadecimal => match c.to_ascii_lowercase() { + '0'..='9' => Some(from_decimal(c)), + c @ 'a'..='f' => Some(u64::from(c) - u64::from('a') + 10), + _ => None, + }, + } + } +} + +/// Type returned if a number could not be parsed in its entirety +#[derive(Debug, PartialEq)] +pub enum ParseError<'a, T> { + /// The input as a whole makes no sense + NotNumeric, + /// The beginning of the input made sense and has been parsed, + /// while the remaining doesn't. + PartialMatch(T, &'a str), + /// The integral part has overflowed the requested type, or + /// has overflowed the `u64` internal storage when parsing the + /// integral part of a floating point number. + Overflow, +} + +impl<'a, T> ParseError<'a, T> { + fn map(self, f: impl FnOnce(T, &'a str) -> ParseError<'a, U>) -> ParseError<'a, U> { + match self { + Self::NotNumeric => ParseError::NotNumeric, + Self::Overflow => ParseError::Overflow, + Self::PartialMatch(v, s) => f(v, s), + } + } +} + +/// A number parser for binary, octal, decimal, hexadecimal and single characters. +/// +/// Internally, in order to get the maximum possible precision and cover the full +/// range of u64 and i64 without losing precision for f64, the returned number is +/// decomposed into: +/// - A `base` value +/// - A `neg` sign bit +/// - A `integral` positive part +/// - A `fractional` positive part +/// - A `precision` representing the number of digits in the fractional part +/// +/// If the fractional part cannot be represented on a `u64`, parsing continues +/// silently by ignoring non-significant digits. +pub struct ParsedNumber { + base: Base, + negative: bool, + integral: u64, + fractional: u64, + precision: usize, +} + +impl ParsedNumber { + fn into_i64(self) -> Option { + if self.negative { + i64::try_from(-i128::from(self.integral)).ok() + } else { + i64::try_from(self.integral).ok() + } + } + + /// Parse a number as i64. No fractional part is allowed. + pub fn parse_i64(input: &str) -> Result> { + match Self::parse(input, true) { + Ok(v) => v.into_i64().ok_or(ParseError::Overflow), + Err(e) => Err(e.map(|v, rest| { + v.into_i64() + .map(|v| ParseError::PartialMatch(v, rest)) + .unwrap_or(ParseError::Overflow) + })), + } + } + + /// Parse a number as u64. No fractional part is allowed. + pub fn parse_u64(input: &str) -> Result> { + match Self::parse(input, true) { + Ok(v) | Err(ParseError::PartialMatch(v, _)) if v.negative => { + Err(ParseError::NotNumeric) + } + Ok(v) => Ok(v.integral), + Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.integral, rest))), + } + } + + fn into_f64(self) -> f64 { + let n = self.integral as f64 + + (self.fractional as f64) / (self.base as u8 as f64).powf(self.precision as f64); + if self.negative { + -n + } else { + n + } + } + + /// Parse a number as f64 + pub fn parse_f64(input: &str) -> Result> { + match Self::parse(input, false) { + Ok(v) => Ok(v.into_f64()), + Err(ParseError::NotNumeric) => Self::parse_f64_special_values(input), + Err(e) => Err(e.map(|v, rest| ParseError::PartialMatch(v.into_f64(), rest))), + } + } + + fn parse_f64_special_values(input: &str) -> Result> { + let (sign, rest) = if let Some(input) = input.strip_prefix('-') { + (-1.0, input) + } else { + (1.0, input) + }; + let prefix = rest + .chars() + .take(3) + .map(|c| c.to_ascii_lowercase()) + .collect::(); + let special = match prefix.as_str() { + "inf" => f64::INFINITY, + "nan" => f64::NAN, + _ => return Err(ParseError::NotNumeric), + } + .copysign(sign); + if rest.len() == 3 { + Ok(special) + } else { + Err(ParseError::PartialMatch(special, &rest[3..])) + } + } + + #[allow(clippy::cognitive_complexity)] + fn parse(input: &str, integral_only: bool) -> Result> { + // Parse the "'" prefix separately + if let Some(rest) = input.strip_prefix('\'') { + let mut chars = rest.char_indices().fuse(); + let v = chars.next().map(|(_, c)| Self { + base: Base::Decimal, + negative: false, + integral: u64::from(c), + fractional: 0, + precision: 0, + }); + return match (v, chars.next()) { + (Some(v), None) => Ok(v), + (Some(v), Some((i, _))) => Err(ParseError::PartialMatch(v, &rest[i..])), + (None, _) => Err(ParseError::NotNumeric), + }; + } + + // Initial minus sign + let (negative, unsigned) = if let Some(input) = input.strip_prefix('-') { + (true, input) + } else { + (false, input) + }; + + // Parse an optional base prefix ("0b" / "0B" / "0" / "0x" / "0X"). "0" is octal unless a + // fractional part is allowed in which case it is an insignificant leading 0. A "0" prefix + // will not be consumed in case the parsable string contains only "0": the leading extra "0" + // will have no influence on the result. + let (base, rest) = if let Some(rest) = unsigned.strip_prefix('0') { + if let Some(rest) = rest.strip_prefix(['b', 'B']) { + (Base::Binary, rest) + } else if let Some(rest) = rest.strip_prefix(['x', 'X']) { + (Base::Hexadecimal, rest) + } else if integral_only { + (Base::Octal, unsigned) + } else { + (Base::Decimal, unsigned) + } + } else { + (Base::Decimal, unsigned) + }; + if rest.is_empty() { + return Err(ParseError::NotNumeric); + } + + // Parse the integral part of the number + let mut chars = rest.chars().enumerate().fuse().peekable(); + let mut integral = 0u64; + while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { + chars.next(); + integral = integral + .checked_mul(base as u64) + .and_then(|n| n.checked_add(d)) + .ok_or(ParseError::Overflow)?; + } + + // Parse the fractional part of the number if there can be one and the input contains + // a '.' decimal separator. + let (mut fractional, mut precision) = (0u64, 0); + if matches!(chars.peek(), Some(&(_, '.'))) + && matches!(base, Base::Decimal | Base::Hexadecimal) + && !integral_only + { + chars.next(); + let mut ended = false; + while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) { + chars.next(); + if !ended { + if let Some(f) = fractional + .checked_mul(base as u64) + .and_then(|n| n.checked_add(d)) + { + (fractional, precision) = (f, precision + 1); + } else { + ended = true; + } + } + } + } + + // If nothing has been parsed, declare the parsing unsuccessful + if let Some((0, _)) = chars.peek() { + return Err(ParseError::NotNumeric); + } + + // Return what has been parsed so far. It there are extra characters, mark the + // parsing as a partial match. + let parsed = Self { + base, + negative, + integral, + fractional, + precision, + }; + if let Some((first_unparsed, _)) = chars.next() { + Err(ParseError::PartialMatch(parsed, &rest[first_unparsed..])) + } else { + Ok(parsed) + } + } +} + +#[cfg(test)] +mod tests { + use super::{ParseError, ParsedNumber}; + + #[test] + fn test_decimal_u64() { + assert_eq!(Ok(123), ParsedNumber::parse_u64("123")); + assert_eq!( + Ok(u64::MAX), + ParsedNumber::parse_u64(&format!("{}", u64::MAX)) + ); + assert!(matches!( + ParsedNumber::parse_u64("-123"), + Err(ParseError::NotNumeric) + )); + assert!(matches!( + ParsedNumber::parse_u64(""), + Err(ParseError::NotNumeric) + )); + assert!(matches!( + ParsedNumber::parse_u64("123.15"), + Err(ParseError::PartialMatch(123, ".15")) + )); + } + + #[test] + fn test_decimal_i64() { + assert_eq!(Ok(123), ParsedNumber::parse_i64("123")); + assert_eq!(Ok(-123), ParsedNumber::parse_i64("-123")); + assert!(matches!( + ParsedNumber::parse_i64("--123"), + Err(ParseError::NotNumeric) + )); + assert_eq!( + Ok(i64::MAX), + ParsedNumber::parse_i64(&format!("{}", i64::MAX)) + ); + assert_eq!( + Ok(i64::MIN), + ParsedNumber::parse_i64(&format!("{}", i64::MIN)) + ); + assert!(matches!( + ParsedNumber::parse_i64(&format!("{}", u64::MAX)), + Err(ParseError::Overflow) + )); + assert!(matches!( + ParsedNumber::parse_i64(&format!("{}", i64::MAX as u64 + 1)), + Err(ParseError::Overflow) + )); + } + + #[test] + fn test_decimal_f64() { + assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123")); + assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123")); + assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.")); + assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.")); + assert_eq!(Ok(123.0), ParsedNumber::parse_f64("123.0")); + assert_eq!(Ok(-123.0), ParsedNumber::parse_f64("-123.0")); + assert_eq!(Ok(123.15), ParsedNumber::parse_f64("123.15")); + assert_eq!(Ok(-123.15), ParsedNumber::parse_f64("-123.15")); + assert_eq!(Ok(0.15), ParsedNumber::parse_f64(".15")); + assert_eq!(Ok(-0.15), ParsedNumber::parse_f64("-.15")); + assert_eq!( + Ok(0.15), + ParsedNumber::parse_f64(".150000000000000000000000000231313") + ); + assert!(matches!(ParsedNumber::parse_f64("1.2.3"), + Err(ParseError::PartialMatch(f, ".3")) if f == 1.2)); + assert_eq!(Ok(f64::INFINITY), ParsedNumber::parse_f64("inf")); + assert_eq!(Ok(f64::NEG_INFINITY), ParsedNumber::parse_f64("-inf")); + assert!(ParsedNumber::parse_f64("NaN").unwrap().is_nan()); + assert!(ParsedNumber::parse_f64("NaN").unwrap().is_sign_positive()); + assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_nan()); + assert!(ParsedNumber::parse_f64("-NaN").unwrap().is_sign_negative()); + assert!(matches!(ParsedNumber::parse_f64("-infinity"), + Err(ParseError::PartialMatch(f, "inity")) if f == f64::NEG_INFINITY)); + assert!(ParsedNumber::parse_f64(&format!("{}", u64::MAX)).is_ok()); + assert!(ParsedNumber::parse_f64(&format!("{}", i64::MIN)).is_ok()); + } + + #[test] + fn test_hexadecimal() { + assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0x123")); + assert_eq!(Ok(0x123), ParsedNumber::parse_u64("0X123")); + assert_eq!(Ok(0xfe), ParsedNumber::parse_u64("0xfE")); + assert_eq!(Ok(-0x123), ParsedNumber::parse_i64("-0x123")); + + assert_eq!(Ok(0.5), ParsedNumber::parse_f64("0x.8")); + assert_eq!(Ok(0.0625), ParsedNumber::parse_f64("0x.1")); + assert_eq!(Ok(15.0078125), ParsedNumber::parse_f64("0xf.02")); + } + + #[test] + fn test_octal() { + assert_eq!(Ok(0), ParsedNumber::parse_u64("0")); + assert_eq!(Ok(0o123), ParsedNumber::parse_u64("0123")); + assert_eq!(Ok(0o123), ParsedNumber::parse_u64("00123")); + assert_eq!(Ok(0), ParsedNumber::parse_u64("00")); + assert!(matches!( + ParsedNumber::parse_u64("008"), + Err(ParseError::PartialMatch(0, "8")) + )); + assert!(matches!( + ParsedNumber::parse_u64("08"), + Err(ParseError::PartialMatch(0, "8")) + )); + assert!(matches!( + ParsedNumber::parse_u64("0."), + Err(ParseError::PartialMatch(0, ".")) + )); + } + + #[test] + fn test_binary() { + assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0b1011")); + assert_eq!(Ok(0b1011), ParsedNumber::parse_u64("0B1011")); + } +} diff --git a/tests/by-util/test_printf.rs b/tests/by-util/test_printf.rs index dfd13159043..48fc1e6ace2 100644 --- a/tests/by-util/test_printf.rs +++ b/tests/by-util/test_printf.rs @@ -162,6 +162,14 @@ fn sub_char() { .stdout_only("the letter A"); } +#[test] +fn sub_char_from_string() { + new_ucmd!() + .args(&["%c%c%c", "five", "%", "oval"]) + .succeeds() + .stdout_only("f%o"); +} + #[test] fn sub_num_int() { new_ucmd!() @@ -427,7 +435,6 @@ fn sub_float_dec_places() { } #[test] -#[ignore = "hexadecimal floats are unimplemented"] fn sub_float_hex_in() { new_ucmd!() .args(&["%f", "0xF1.1F"]) @@ -591,3 +598,44 @@ fn sub_general_round_float_leading_zeroes() { .succeeds() .stdout_only("1.00001"); } + +#[test] +fn partial_float() { + new_ucmd!() + .args(&["%.2f is %s", "42.03x", "a lot"]) + .fails() + .code_is(1) + .stdout_is("42.03 is a lot") + .stderr_is("printf: '42.03x': value not completely converted\n"); +} + +#[test] +fn partial_integer() { + new_ucmd!() + .args(&["%d is %s", "42x23", "a lot"]) + .fails() + .code_is(1) + .stdout_is("42 is a lot") + .stderr_is("printf: '42x23': value not completely converted\n"); +} + +#[test] +fn test_overflow() { + new_ucmd!() + .args(&["%d", "36893488147419103232"]) + .fails() + .code_is(1) + .stderr_is("printf: '36893488147419103232': Numerical result out of range\n"); +} + +#[test] +fn partial_char() { + new_ucmd!() + .args(&["%d", "'abc"]) + .fails() + .code_is(1) + .stdout_is("97") + .stderr_is( + "printf: warning: bc: character(s) following character constant have been ignored\n", + ); +}