diff --git a/README.md b/README.md index 7451c73..be4c7a1 100644 --- a/README.md +++ b/README.md @@ -332,14 +332,14 @@ Memory usage might change in future versions, but I'll try to keep this table up | Features | ROM, bytes | Static RAM, bytes | |---------------------------------|:----------:|:-----------------:| -| | 10146 | 317 | -| `autocomplete` | 12008 | 333 | -| `history` | 12246 | 358 | -| `autocomplete` `history` | 13506 | 374 | -| `help` | 14362 | 587 | -| `autocomplete` `help` | 15278 | 599 | -| `history` `help` | 16456 | 628 | -| `autocomplete` `history` `help` | 16344 | 640 | +| | 10120 | 274 | +| `autocomplete` | 12116 | 290 | +| `history` | 12406 | 315 | +| `autocomplete` `history` | 13704 | 331 | +| `help` | 14216 | 544 | +| `autocomplete` `help` | 15290 | 556 | +| `history` `help` | 16488 | 585 | +| `autocomplete` `history` `help` | 16594 | 597 | This table is generated using this [script](examples/arduino/memory.sh). As table shows, enabling help adds quite a lot to memory usage since help usually requires a lot of text to be stored. diff --git a/embedded-cli-macros/src/command/help.rs b/embedded-cli-macros/src/command/help.rs index 113b885..11e3c33 100644 --- a/embedded-cli-macros/src/command/help.rs +++ b/embedded-cli-macros/src/command/help.rs @@ -199,7 +199,7 @@ fn create_command_help(command: &Command) -> TokenStream { let mut state = States::Normal; let mut args = command.args().args(); - while let Some(Ok(arg)) = args.next() { + while let Some(arg) = args.next() { match arg { #(#option_name_arms)* #(#option_value_arms)* diff --git a/embedded-cli-macros/src/command/model.rs b/embedded-cli-macros/src/command/model.rs index faf87e9..cc2564c 100644 --- a/embedded-cli-macros/src/command/model.rs +++ b/embedded-cli-macros/src/command/model.rs @@ -154,23 +154,11 @@ impl CommandArg { ShortName::Generated => field_name.chars().next().unwrap(), ShortName::Fixed(c) => c, }); - if let Some(short) = short { - if !short.is_ascii_alphabetic() { - return Err(Error::custom("Flag char must be alphabetic ASCII")); - } - } let long = arg_attrs.long.map(|s| match s { LongName::Generated => field_name.from_case(Case::Snake).to_case(Case::Kebab), LongName::Fixed(name) => name, }); - if let Some(long) = &long { - if long.chars().any(|c| !c.is_ascii_alphanumeric() && c != '-') { - return Err(Error::custom( - "Option name must consist of alphanumeric ASCII chars", - )); - } - } let aa = TypedArg::new(&field.ty); diff --git a/embedded-cli-macros/src/command/parse.rs b/embedded-cli-macros/src/command/parse.rs index 99d5c88..747200b 100644 --- a/embedded-cli-macros/src/command/parse.rs +++ b/embedded-cli-macros/src/command/parse.rs @@ -246,9 +246,6 @@ fn create_arg_parsing(command: &Command) -> (TokenStream, Vec) { let mut args = command.args().args(); while let Some(arg) = args.next() { - let arg = arg.map_err(|err| match err { - _cli::arguments::ArgError::NonAsciiShortOption => _cli::service::ParseError::NonAsciiShortOption - })?; match arg { #(#option_name_arms)* #(#option_value_arms)* diff --git a/embedded-cli/src/arguments.rs b/embedded-cli/src/arguments.rs index 52f17d7..e7cad70 100644 --- a/embedded-cli/src/arguments.rs +++ b/embedded-cli/src/arguments.rs @@ -1,4 +1,7 @@ -use crate::token::{Tokens, TokensIter}; +use crate::{ + token::{Tokens, TokensIter}, + utils, +}; #[derive(Debug, Eq, PartialEq)] pub enum Arg<'a> { @@ -13,8 +16,7 @@ pub enum Arg<'a> { /// `--config` will be a long option with name `config` LongOption(&'a str), - /// Short option. Only single ASCII char is stored (without `-`). - /// UTF-8 here is not supported. + /// Short option. Only single UTF-8 char is stored (without `-`). /// /// In `get --config normal -f file -vs` /// `-f` and `-vs` will be short options. @@ -50,18 +52,13 @@ impl<'a> PartialEq for ArgList<'a> { } } -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum ArgError { - NonAsciiShortOption, -} - #[derive(Debug)] pub struct ArgsIter<'a> { values_only: bool, - /// Short options (ASCII chars) that + /// Short options (utf8 chars) that /// are left from previous iteration - leftover: &'a [u8], + leftover: &'a str, tokens: TokensIter<'a>, } @@ -70,7 +67,7 @@ impl<'a> ArgsIter<'a> { fn new(tokens: TokensIter<'a>) -> Self { Self { values_only: false, - leftover: &[], + leftover: "", tokens, } } @@ -85,31 +82,19 @@ impl<'a> ArgsIter<'a> { } impl<'a> Iterator for ArgsIter<'a> { - type Item = Result, ArgError>; + type Item = Arg<'a>; fn next(&mut self) -> Option { - fn process_leftover<'a>(byte: u8) -> Result, ArgError> { - if byte.is_ascii_alphabetic() { - // SAFETY: we checked that this is alphabetic ASCII - Ok(Arg::ShortOption(unsafe { - char::from_u32_unchecked(byte as u32) - })) - } else { - Err(ArgError::NonAsciiShortOption) - } - } - - if !self.leftover.is_empty() { - let byte = self.leftover[0]; - self.leftover = &self.leftover[1..]; - return Some(process_leftover(byte)); + if let Some((opt, leftover)) = utils::char_pop_front(self.leftover) { + self.leftover = leftover; + return Some(Arg::ShortOption(opt)); } let raw = self.tokens.next()?; let bytes = raw.as_bytes(); if self.values_only { - return Some(Ok(Arg::Value(raw))); + return Some(Arg::Value(raw)); } let token = if bytes.len() > 1 && bytes[0] == b'-' { @@ -121,14 +106,17 @@ impl<'a> Iterator for ArgsIter<'a> { Arg::LongOption(unsafe { raw.get_unchecked(2..) }) } } else { - self.leftover = &bytes[2..]; - return Some(process_leftover(bytes[1])); + let (opt, leftover) = + unsafe { utils::char_pop_front(raw.get_unchecked(1..)).unwrap_unchecked() }; + self.leftover = leftover; + + return Some(Arg::ShortOption(opt)); } } else { Arg::Value(raw) }; - Some(Ok(token)) + Some(token) } } @@ -176,36 +164,35 @@ mod tests { use crate::{arguments::ArgList, token::Tokens}; - use super::{Arg, ArgError}; + use super::Arg; #[rstest] #[case("arg1 --option1 val1 -f val2 -vs", &[ - Ok(Arg::Value("arg1")), - Ok(Arg::LongOption("option1")), - Ok(Arg::Value("val1")), - Ok(Arg::ShortOption('f')), - Ok(Arg::Value("val2")), - Ok(Arg::ShortOption('v')), - Ok(Arg::ShortOption('s')), + Arg::Value("arg1"), + Arg::LongOption("option1"), + Arg::Value("val1"), + Arg::ShortOption('f'), + Arg::Value("val2"), + Arg::ShortOption('v'), + Arg::ShortOption('s'), ])] #[case("arg1 --option1 -- val1 -f val2 -vs", &[ - Ok(Arg::Value("arg1")), - Ok(Arg::LongOption("option1")), - Ok(Arg::DoubleDash), - Ok(Arg::Value("val1")), - Ok(Arg::Value("-f")), - Ok(Arg::Value("val2")), - Ok(Arg::Value("-vs")), + Arg::Value("arg1"), + Arg::LongOption("option1"), + Arg::DoubleDash, + Arg::Value("val1"), + Arg::Value("-f"), + Arg::Value("val2"), + Arg::Value("-vs"), ])] - #[case("arg1 -бjв", &[ - Ok(Arg::Value("arg1")), - Err(ArgError::NonAsciiShortOption), - Err(ArgError::NonAsciiShortOption), - Ok(Arg::ShortOption('j')), - Err(ArgError::NonAsciiShortOption), - Err(ArgError::NonAsciiShortOption), + #[case("arg1 -бj佗𑿌", &[ + Arg::Value("arg1"), + Arg::ShortOption('б'), + Arg::ShortOption('j'), + Arg::ShortOption('佗'), + Arg::ShortOption('𑿌'), ])] - fn arg_tokens(#[case] input: &str, #[case] expected: &[Result, ArgError>]) { + fn arg_tokens(#[case] input: &str, #[case] expected: &[Arg<'_>]) { let mut input = input.as_bytes().to_vec(); let input = core::str::from_utf8_mut(&mut input).unwrap(); let tokens = Tokens::new(input); diff --git a/embedded-cli/src/cli.rs b/embedded-cli/src/cli.rs index e85a08e..d3e4e94 100644 --- a/embedded-cli/src/cli.rs +++ b/embedded-cli/src/cli.rs @@ -14,6 +14,7 @@ use crate::{ input::{ControlInput, Input, InputGenerator}, service::{Autocomplete, CommandProcessor, Help, ParseError, ProcessError}, token::Tokens, + utils, writer::{WriteExt, Writer}, }; @@ -404,10 +405,6 @@ where self.writer.write_str("missing required argument: ")?; self.writer.write_str(name)?; } - ParseError::NonAsciiShortOption => { - self.writer - .write_str("non-ascii in short options is not supported")?; - } ParseError::ParseValueError { value, expected } => { self.writer.write_str("failed to parse '")?; self.writer.write_str(value)?; @@ -424,11 +421,10 @@ where self.writer.write_str(name)?; } ParseError::UnexpectedShortOption { name } => { - // short options are guaranteed to be ascii alphabetic - if name.is_ascii_alphabetic() { - self.writer.write_str("unexpected option: -")?; - self.writer.write_bytes(&[name as u8])?; - } + let mut buf = [0; 4]; + let buf = utils::encode_utf8(name, &mut buf); + self.writer.write_str("unexpected option: -")?; + self.writer.write_str(buf)?; } ParseError::UnknownCommand => { self.writer.write_str("unknown command")?; diff --git a/embedded-cli/src/help.rs b/embedded-cli/src/help.rs index a317cb1..96951ea 100644 --- a/embedded-cli/src/help.rs +++ b/embedded-cli/src/help.rs @@ -16,7 +16,7 @@ impl<'a> HelpRequest<'a> { let mut args = command.args().args(); if command.name() == "help" { match args.next() { - Some(Ok(Arg::Value(name))) => { + Some(Arg::Value(name)) => { let command = RawCommand::new(name, args.into_args()); Some(HelpRequest::Command(command)) } @@ -25,9 +25,7 @@ impl<'a> HelpRequest<'a> { } } // check if any other option is -h or --help - else if args - .any(|arg| arg == Ok(Arg::LongOption("help")) || arg == Ok(Arg::ShortOption('h'))) - { + else if args.any(|arg| arg == Arg::LongOption("help") || arg == Arg::ShortOption('h')) { Some(HelpRequest::Command(command.clone())) } else { None diff --git a/embedded-cli/src/service.rs b/embedded-cli/src/service.rs index a0873d2..cf4dd3f 100644 --- a/embedded-cli/src/service.rs +++ b/embedded-cli/src/service.rs @@ -22,8 +22,6 @@ pub enum ParseError<'a> { name: &'a str, }, - NonAsciiShortOption, - ParseValueError { value: &'a str, expected: &'static str, diff --git a/embedded-cli/src/utf8.rs b/embedded-cli/src/utf8.rs index 45102a9..e43fe38 100644 --- a/embedded-cli/src/utf8.rs +++ b/embedded-cli/src/utf8.rs @@ -15,37 +15,43 @@ impl Utf8Accum { // Plain and stupid utf-8 validation // Bytes are supposed to be human input so it's okay to be not blazing fast - if byte <= 0x7F { - self.partial = 0; - self.expected = 0; - self.buffer[0] = byte; - // SAFETY: ascii chars are all valid utf-8 chars - return Some(unsafe { core::str::from_utf8_unchecked(&self.buffer[..1]) }); - } else if (0xC0..=0xDF).contains(&byte) { - // this is first octet of 2-byte value + if byte >= 0xF8 { + return None; + } else if byte >= 0xF0 { + // this is first octet of 4-byte value self.buffer[0] = byte; self.partial = 1; - self.expected = 1; - } else if (0xE0..=0xEF).contains(&byte) { + self.expected = 3; + } else if byte >= 0xE0 { // this is first octet of 3-byte value self.buffer[0] = byte; self.partial = 1; self.expected = 2; - } else if (0xF0..=0xF7).contains(&byte) { - // this is first octet of 4-byte value + } else if byte >= 0xC0 { + // this is first octet of 2-byte value self.buffer[0] = byte; self.partial = 1; - self.expected = 3; - } else if (0x80..=0xBF).contains(&byte) && self.expected > 0 { - // this is one of other octets of multi-byte value - self.buffer[self.partial as usize] = byte; - self.partial += 1; - self.expected -= 1; - if self.expected == 0 { - let len = self.partial as usize; - self.partial = 0; - // SAFETY: we checked previously that buffer contains valid utf8 - return Some(unsafe { core::str::from_utf8_unchecked(&self.buffer[..len]) }); + self.expected = 1; + } else if byte >= 0x80 { + if self.expected > 0 { + // this is one of other octets of multi-byte value + self.buffer[self.partial as usize] = byte; + self.partial += 1; + self.expected -= 1; + if self.expected == 0 { + let len = self.partial as usize; + // SAFETY: we checked previously that buffer contains valid utf8 + unsafe { + return Some(core::str::from_utf8_unchecked(&self.buffer[..len])); + } + } + } + } else { + self.expected = 0; + self.buffer[0] = byte; + // SAFETY: ascii chars are all valid utf-8 chars + unsafe { + return Some(core::str::from_utf8_unchecked(&self.buffer[..1])); } } diff --git a/embedded-cli/src/utils.rs b/embedded-cli/src/utils.rs index 3131d8b..b3bf449 100644 --- a/embedded-cli/src/utils.rs +++ b/embedded-cli/src/utils.rs @@ -34,6 +34,40 @@ pub fn char_count(text: &str) -> usize { count } +pub fn char_pop_front(text: &str) -> Option<(char, &str)> { + if text.is_empty() { + None + } else { + let bytes = text.as_bytes(); + let first = bytes[0]; + + let mut codepoint = if first < 0x80 { + first as u32 + } else if (first & 0xE0) == 0xC0 { + (first & 0x1F) as u32 + } else { + (first & 0x0F) as u32 + }; + + let mut bytes = &bytes[1..]; + // go over all other bytes and add merge into codepoint + while !bytes.is_empty() && (bytes[0] & 0xC0) == 0x80 { + codepoint <<= 6; + codepoint |= bytes[0] as u32 & 0x3F; + bytes = &bytes[1..]; + } + + // SAFETY: after all modifications codepoint is valid u32 char + // and bytes contains valid utf-8 sequence + unsafe { + Some(( + char::from_u32_unchecked(codepoint), + core::str::from_utf8_unchecked(bytes), + )) + } + } +} + /// Returns length (in bytes) of longest common prefix pub fn common_prefix_len(left: &str, right: &str) -> usize { let mut accum1 = Utf8Accum::default(); @@ -55,6 +89,45 @@ pub fn common_prefix_len(left: &str, right: &str) -> usize { pos } +/// Encodes given character as UTF-8 into the provided byte buffer, +/// and then returns the subslice of the buffer that contains the encoded character. +pub fn encode_utf8(ch: char, buf: &mut [u8]) -> &str { + let mut code = ch as u32; + + if code < 0x80 { + buf[0] = ch as u8; + unsafe { + return core::str::from_utf8_unchecked(&buf[..1]); + } + } + + let mut counter = if code < 0x800 { + // 2-byte char + 1 + } else if code < 0x10000 { + // 3-byte char + 2 + } else { + // 4-byte char + 3 + }; + + let first_b_mask = (0x780 >> counter) as u8; + + let len = counter + 1; + while counter > 0 { + buf[counter] = ((code as u8) & 0b0011_1111) | 0b1000_0000; + code >>= 6; + counter -= 1; + } + + buf[0] = code as u8 | first_b_mask; + + unsafe { + return core::str::from_utf8_unchecked(&buf[..len]); + } +} + pub fn trim_start(input: &str) -> &str { if let Some(pos) = input.as_bytes().iter().position(|b| *b != b' ') { input.get(pos..).unwrap_or("") @@ -87,6 +160,7 @@ pub unsafe fn split_at_mut(buf: &mut [u8], mid: usize) -> (&mut [u8], &mut [u8]) #[cfg(test)] mod tests { use rstest::rstest; + use std::format; use crate::utils; @@ -121,6 +195,28 @@ mod tests { assert_eq!(utils::char_count(text), text.chars().count()) } + #[test] + fn char_pop_front() { + let text = "abcd абв 佐佗佟𑿁 𑿆𑿌"; + for (i, ch) in text.char_indices() { + let (popped_ch, left) = utils::char_pop_front(&text[i..]).unwrap(); + assert_eq!(popped_ch, ch); + assert_eq!(&text[i..], format!("{}{}", ch, left).as_str()); + } + assert!(utils::char_pop_front("").is_none()) + } + + #[test] + fn char_encode() { + let text = "abcd абв 佐佗佟𑿁 𑿆𑿌"; + for ch in text.chars() { + let mut buf1 = [0; 4]; + let mut buf2 = [0; 4]; + assert_eq!(ch.encode_utf8(&mut buf1), utils::encode_utf8(ch, &mut buf2)); + } + assert!(utils::char_pop_front("").is_none()) + } + #[rstest] #[case("abcdef", "abcdef")] #[case("abcdef", "abc")] diff --git a/embedded-cli/tests/cli/base.rs b/embedded-cli/tests/cli/base.rs index 40ad619..e296c86 100644 --- a/embedded-cli/tests/cli/base.rs +++ b/embedded-cli/tests/cli/base.rs @@ -26,7 +26,7 @@ fn simple_input() { cli.received_commands(), vec![Ok(RawCommand { name: "set".to_string(), - args: vec![Ok(Arg::Value("led".to_string()))], + args: vec![Arg::Value("led".to_string())], })] ); } diff --git a/embedded-cli/tests/cli/options.rs b/embedded-cli/tests/cli/options.rs index dc637ce..62c95cf 100644 --- a/embedded-cli/tests/cli/options.rs +++ b/embedded-cli/tests/cli/options.rs @@ -12,13 +12,13 @@ enum CliTestCommand<'a> { #[arg(short, long)] name: Option<&'a str>, - #[arg(long = "conf")] + #[arg(long = "конф")] config: &'a str, #[arg(short)] level: u8, - #[arg(short = 'V', long)] + #[arg(short = 'Ю', long)] verbose: bool, file: &'a str, @@ -63,28 +63,28 @@ impl<'a> From> for TestCommand { } #[rstest] -#[case("cmd --name test-name --conf config -l 5 -V some-file", TestCommand::Cmd { +#[case("cmd --name test-name --конф config -l 5 -Ю some-file", TestCommand::Cmd { name: Some("test-name".to_string()), config: "config".to_string(), level: 5, verbose: true, file: "some-file".to_string(), })] -#[case("cmd --conf config -l 35 --verbose some-file", TestCommand::Cmd { +#[case("cmd --конф config -l 35 --verbose some-file", TestCommand::Cmd { name: None, config: "config".to_string(), level: 35, verbose: true, file: "some-file".to_string(), })] -#[case("cmd --conf conf2 file -n name2 -Vl 25", TestCommand::Cmd { +#[case("cmd --конф conf2 file -n name2 -Юl 25", TestCommand::Cmd { name: Some("name2".to_string()), config: "conf2".to_string(), level: 25, verbose: true, file: "file".to_string(), })] -#[case("cmd file3 --conf conf3 -l 17", TestCommand::Cmd { +#[case("cmd file3 --конф conf3 -l 17", TestCommand::Cmd { name: None, config: "conf3".to_string(), level: 17, diff --git a/embedded-cli/tests/cli/terminal.rs b/embedded-cli/tests/cli/terminal.rs index 97e453b..ed0cf97 100644 --- a/embedded-cli/tests/cli/terminal.rs +++ b/embedded-cli/tests/cli/terminal.rs @@ -87,7 +87,13 @@ impl Terminal { current.push(' '); } } - current.insert(cursor, c); + if let Some((insert_pos, _)) = + current.char_indices().skip(cursor).next() + { + current.insert(insert_pos, c); + } else { + current.push(c); + } cursor += 1; } _ => unimplemented!(), diff --git a/embedded-cli/tests/cli/wrapper.rs b/embedded-cli/tests/cli/wrapper.rs index 5052f7e..70266c1 100644 --- a/embedded-cli/tests/cli/wrapper.rs +++ b/embedded-cli/tests/cli/wrapper.rs @@ -1,7 +1,7 @@ use std::{cell::RefCell, convert::Infallible, fmt::Debug, marker::PhantomData, rc::Rc}; use embedded_cli::{ - arguments::{Arg as CliArg, ArgError}, + arguments::Arg as CliArg, cli::{Cli, CliBuilder, CliHandle}, command::RawCommand as CliRawCommand, service::{Autocomplete, CommandProcessor, Help, ParseError as CliParseError, ProcessError}, @@ -78,7 +78,7 @@ pub enum Arg { #[derive(Clone, Debug, Eq, PartialEq)] pub struct RawCommand { pub name: String, - pub args: Vec>, + pub args: Vec, } impl_convert! {CliRawCommand<'_> => RawCommand, command, { @@ -95,13 +95,10 @@ impl<'a> From> for RawCommand { .args() .args() .map(|arg| match arg { - Ok(arg) => Ok(match arg { - CliArg::DoubleDash => Arg::DoubleDash, - CliArg::LongOption(name) => Arg::LongOption(name.to_string()), - CliArg::ShortOption(name) => Arg::ShortOption(name), - CliArg::Value(value) => Arg::Value(value.to_string()), - }), - Err(err) => Err(err), + CliArg::DoubleDash => Arg::DoubleDash, + CliArg::LongOption(name) => Arg::LongOption(name.to_string()), + CliArg::ShortOption(name) => Arg::ShortOption(name), + CliArg::Value(value) => Arg::Value(value.to_string()), }) .collect(), }