From 398f421f2a201b86fb0fd13e9c4981ba86cb4d7c Mon Sep 17 00:00:00 2001 From: Petr Portnov Date: Thu, 24 Nov 2022 00:10:34 +0300 Subject: [PATCH 1/6] feat: add intrinsics for numeric parsing --- crates/jrsonnet-stdlib/src/lib.rs | 5 +- crates/jrsonnet-stdlib/src/strings.rs | 89 +++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/crates/jrsonnet-stdlib/src/lib.rs b/crates/jrsonnet-stdlib/src/lib.rs index 103fe07b..dc385427 100644 --- a/crates/jrsonnet-stdlib/src/lib.rs +++ b/crates/jrsonnet-stdlib/src/lib.rs @@ -128,6 +128,9 @@ pub fn stdlib_uncached(settings: Rc>) -> ObjValue { ("asciiUpper", builtin_ascii_upper::INST), ("asciiLower", builtin_ascii_lower::INST), ("findSubstr", builtin_find_substr::INST), + ("parseInt", builtin_parse_int::INST), + ("parseOctal", builtin_parse_octal::INST), + ("parseHex", builtin_parse_hex::INST), // Misc ("length", builtin_length::INST), ("startsWith", builtin_starts_with::INST), @@ -312,7 +315,7 @@ impl jrsonnet_evaluator::ContextInitializer for ContextInitializer { out.build() } #[cfg(feature = "legacy-this-file")] - fn initialize(&self, s: State, source: Source) -> jrsonnet_evaluator::Context { + fn initialize(&self, s: State, source: Source) -> Context { let mut builder = ObjValueBuilder::new(); builder.with_super(self.stdlib_obj.clone()); builder diff --git a/crates/jrsonnet-stdlib/src/strings.rs b/crates/jrsonnet-stdlib/src/strings.rs index 2d72a95a..742497eb 100644 --- a/crates/jrsonnet-stdlib/src/strings.rs +++ b/crates/jrsonnet-stdlib/src/strings.rs @@ -1,6 +1,7 @@ use jrsonnet_evaluator::{ error::{ErrorKind::*, Result}, function::builtin, + throw, typed::{Either2, VecVal, M1}, val::ArrValue, Either, IStr, Val, @@ -73,3 +74,91 @@ pub fn builtin_find_substr(pat: IStr, str: IStr) -> Result { } Ok(out.into()) } + +#[builtin] +pub fn builtin_parse_int(raw: IStr) -> Result { + let mut chars = raw.chars(); + if let Some(first_char) = chars.next() { + if first_char == '-' { + let remaining = chars.as_str(); + if remaining.is_empty() { + throw!("Not an integer: \"{}\"", raw); + } + parse_nat::<10>(remaining).map(|value| -value) + } else { + parse_nat::<10>(raw.as_str()) + } + } else { + throw!("Not an integer: \"{}\"", raw); + } +} + +#[builtin] +pub fn builtin_parse_octal(raw: IStr) -> Result { + if raw.is_empty() { + throw!("Not an octal number: \"\""); + } + + parse_nat::<8>(raw.as_str()) +} + +#[builtin] +pub fn builtin_parse_hex(raw: IStr) -> Result { + if raw.is_empty() { + throw!("Not hexadecimal: \"\""); + } + + parse_nat::<16>(raw.as_str()) +} + +fn parse_nat(raw: &str) -> Result { + debug_assert!( + 1 <= BASE && BASE <= 16, + "integer base should be between 1 and 16" + ); + + const ZERO_CODE: u32 = '0' as u32; + const UPPER_A_CODE: u32 = 'A' as u32; + const LOWER_A_CODE: u32 = 'a' as u32; + + #[inline] + fn checked_sub_if(condition: bool, lhs: u32, rhs: u32) -> Option { + if condition { + lhs.checked_sub(rhs) + } else { + None + } + } + + let base = BASE as f64; + + raw.chars().try_fold(0f64, |aggregate, digit| { + let digit = digit as u32; + let digit = if let Some(digit) = checked_sub_if(BASE > 10, digit, LOWER_A_CODE) { + digit + 10 + } else if let Some(digit) = checked_sub_if(BASE > 10, digit, UPPER_A_CODE) { + digit + 10 + } else { + digit.checked_sub(ZERO_CODE).unwrap_or(BASE) + }; + + if digit < BASE { + Ok(base * aggregate + digit as f64) + } else { + throw!("{raw} is not a base {BASE} integer",); + } + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_nat_base_10() { + assert_eq!(parse_nat::<10>("0").unwrap(), 0.); + assert_eq!(parse_nat::<10>("3").unwrap(), 3.); + assert_eq!(parse_nat::<10>("27").unwrap(), 10. * 2. + 7.); + assert_eq!(parse_nat::<10>("123").unwrap(), 10. * (10. * 1. + 2.) + 3.); + } +} From 2d81df4933d9fbf1020a65a1504ec5a9d519da2a Mon Sep 17 00:00:00 2001 From: Petr Portnov Date: Thu, 24 Nov 2022 00:25:29 +0300 Subject: [PATCH 2/6] feat: enhance messages of intensified numeric parsers --- crates/jrsonnet-stdlib/src/strings.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/jrsonnet-stdlib/src/strings.rs b/crates/jrsonnet-stdlib/src/strings.rs index 742497eb..951b38d2 100644 --- a/crates/jrsonnet-stdlib/src/strings.rs +++ b/crates/jrsonnet-stdlib/src/strings.rs @@ -82,21 +82,21 @@ pub fn builtin_parse_int(raw: IStr) -> Result { if first_char == '-' { let remaining = chars.as_str(); if remaining.is_empty() { - throw!("Not an integer: \"{}\"", raw); + throw!("Integer only consists of a minus"); } parse_nat::<10>(remaining).map(|value| -value) } else { parse_nat::<10>(raw.as_str()) } } else { - throw!("Not an integer: \"{}\"", raw); + throw!("Empty decimal integer \"{}\"", raw); } } #[builtin] pub fn builtin_parse_octal(raw: IStr) -> Result { if raw.is_empty() { - throw!("Not an octal number: \"\""); + throw!("Empty octal integer"); } parse_nat::<8>(raw.as_str()) @@ -105,7 +105,7 @@ pub fn builtin_parse_octal(raw: IStr) -> Result { #[builtin] pub fn builtin_parse_hex(raw: IStr) -> Result { if raw.is_empty() { - throw!("Not hexadecimal: \"\""); + throw!("Empty hexadecimal integer"); } parse_nat::<16>(raw.as_str()) @@ -145,7 +145,7 @@ fn parse_nat(raw: &str) -> Result { if digit < BASE { Ok(base * aggregate + digit as f64) } else { - throw!("{raw} is not a base {BASE} integer",); + throw!("\"{raw}\" is not a base {BASE} integer",); } }) } From b4d7701301994c939ca114c922fc1ec23c2735a0 Mon Sep 17 00:00:00 2001 From: Petr Portnov | PROgrm_JARvis Date: Thu, 24 Nov 2022 00:29:16 +0300 Subject: [PATCH 3/6] chore: use debug-form of strings when printing errors in parse intrinsics Co-authored-by: Yaroslav Bolyukin --- crates/jrsonnet-stdlib/src/strings.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/jrsonnet-stdlib/src/strings.rs b/crates/jrsonnet-stdlib/src/strings.rs index 951b38d2..bae0e0d5 100644 --- a/crates/jrsonnet-stdlib/src/strings.rs +++ b/crates/jrsonnet-stdlib/src/strings.rs @@ -50,6 +50,10 @@ pub fn builtin_ascii_lower(str: IStr) -> Result { Ok(str.to_ascii_lowercase()) } +pub fn repeat(what: Either![IStr, ArrValue], count: i32) { + joi +} + #[builtin] pub fn builtin_find_substr(pat: IStr, str: IStr) -> Result { if pat.is_empty() || str.is_empty() || pat.len() > str.len() { @@ -89,7 +93,7 @@ pub fn builtin_parse_int(raw: IStr) -> Result { parse_nat::<10>(raw.as_str()) } } else { - throw!("Empty decimal integer \"{}\"", raw); + throw!("Empty decimal integer",); } } @@ -145,7 +149,7 @@ fn parse_nat(raw: &str) -> Result { if digit < BASE { Ok(base * aggregate + digit as f64) } else { - throw!("\"{raw}\" is not a base {BASE} integer",); + throw!("{raw:?} is not a base {BASE} integer",); } }) } From 4de6d4c2f2041c1c7e2b94e4de5d271ddfaea8d6 Mon Sep 17 00:00:00 2001 From: Petr Portnov Date: Thu, 24 Nov 2022 00:41:26 +0300 Subject: [PATCH 4/6] chore: simplify `builtin_parse_int` --- crates/jrsonnet-stdlib/src/strings.rs | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/crates/jrsonnet-stdlib/src/strings.rs b/crates/jrsonnet-stdlib/src/strings.rs index bae0e0d5..47d59433 100644 --- a/crates/jrsonnet-stdlib/src/strings.rs +++ b/crates/jrsonnet-stdlib/src/strings.rs @@ -50,10 +50,6 @@ pub fn builtin_ascii_lower(str: IStr) -> Result { Ok(str.to_ascii_lowercase()) } -pub fn repeat(what: Either![IStr, ArrValue], count: i32) { - joi -} - #[builtin] pub fn builtin_find_substr(pat: IStr, str: IStr) -> Result { if pat.is_empty() || str.is_empty() || pat.len() > str.len() { @@ -81,19 +77,18 @@ pub fn builtin_find_substr(pat: IStr, str: IStr) -> Result { #[builtin] pub fn builtin_parse_int(raw: IStr) -> Result { - let mut chars = raw.chars(); - if let Some(first_char) = chars.next() { - if first_char == '-' { - let remaining = chars.as_str(); - if remaining.is_empty() { - throw!("Integer only consists of a minus"); - } - parse_nat::<10>(remaining).map(|value| -value) - } else { - parse_nat::<10>(raw.as_str()) + if let Some(raw) = raw.strip_prefix('-') { + if raw.is_empty() { + throw!("integer only consists of a minus") } + + parse_nat::<10>(raw).map(|value| -value) } else { - throw!("Empty decimal integer",); + if raw.is_empty() { + throw!("empty integer") + } + + parse_nat::<10>(raw.as_str()) } } From c11ac9dc4b1b192694598e961dff53de46312bfb Mon Sep 17 00:00:00 2001 From: Petr Portnov Date: Thu, 24 Nov 2022 00:43:50 +0300 Subject: [PATCH 5/6] chore: start errors from lowercase in parse intrinsics --- crates/jrsonnet-stdlib/src/strings.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/jrsonnet-stdlib/src/strings.rs b/crates/jrsonnet-stdlib/src/strings.rs index 47d59433..4bd3877a 100644 --- a/crates/jrsonnet-stdlib/src/strings.rs +++ b/crates/jrsonnet-stdlib/src/strings.rs @@ -95,7 +95,7 @@ pub fn builtin_parse_int(raw: IStr) -> Result { #[builtin] pub fn builtin_parse_octal(raw: IStr) -> Result { if raw.is_empty() { - throw!("Empty octal integer"); + throw!("empty octal integer"); } parse_nat::<8>(raw.as_str()) @@ -104,7 +104,7 @@ pub fn builtin_parse_octal(raw: IStr) -> Result { #[builtin] pub fn builtin_parse_hex(raw: IStr) -> Result { if raw.is_empty() { - throw!("Empty hexadecimal integer"); + throw!("empty hexadecimal integer"); } parse_nat::<16>(raw.as_str()) From 1e1f97c48a9ab6709a0473460e2789997ff9eb12 Mon Sep 17 00:00:00 2001 From: Petr Portnov Date: Thu, 24 Nov 2022 00:54:29 +0300 Subject: [PATCH 6/6] chore: enhance `parse_nat` tests --- crates/jrsonnet-stdlib/src/strings.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/crates/jrsonnet-stdlib/src/strings.rs b/crates/jrsonnet-stdlib/src/strings.rs index 4bd3877a..fe070df5 100644 --- a/crates/jrsonnet-stdlib/src/strings.rs +++ b/crates/jrsonnet-stdlib/src/strings.rs @@ -153,11 +153,27 @@ fn parse_nat(raw: &str) -> Result { mod tests { use super::*; + #[test] + fn parse_nat_base_8() { + assert_eq!(parse_nat::<8>("0").unwrap(), 0.); + assert_eq!(parse_nat::<8>("5").unwrap(), 5.); + assert_eq!(parse_nat::<8>("32").unwrap(), 0o32 as f64); + assert_eq!(parse_nat::<8>("761").unwrap(), 0o761 as f64); + } + #[test] fn parse_nat_base_10() { assert_eq!(parse_nat::<10>("0").unwrap(), 0.); assert_eq!(parse_nat::<10>("3").unwrap(), 3.); - assert_eq!(parse_nat::<10>("27").unwrap(), 10. * 2. + 7.); - assert_eq!(parse_nat::<10>("123").unwrap(), 10. * (10. * 1. + 2.) + 3.); + assert_eq!(parse_nat::<10>("27").unwrap(), 27.); + assert_eq!(parse_nat::<10>("123").unwrap(), 123.); + } + + #[test] + fn parse_nat_base_16() { + assert_eq!(parse_nat::<16>("0").unwrap(), 0.); + assert_eq!(parse_nat::<16>("A").unwrap(), 10.); + assert_eq!(parse_nat::<16>("a9").unwrap(), 0xA9 as f64); + assert_eq!(parse_nat::<16>("BbC").unwrap(), 0xBBC as f64); } }