Skip to content

Commit

Permalink
feat: add utf8 short options
Browse files Browse the repository at this point in the history
BREAKING CHANGE: ArgsIter gives plain Arg instead of Result, NonAsciiShortOption error is removed
  • Loading branch information
funbiscuit committed Apr 20, 2024
1 parent 75f05bc commit a83b84e
Show file tree
Hide file tree
Showing 14 changed files with 202 additions and 133 deletions.
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,14 +332,14 @@ Memory usage might change in future versions, but I'll try to keep this table up

| Features | ROM, bytes | Static RAM, bytes |
|---------------------------------|:----------:|:-----------------:|
| | 10146 | 317 |
| `autocomplete` | 12008 | 333 |
| `history` | 12246 | 358 |
| `autocomplete` `history` | 13506 | 374 |
| `help` | 14362 | 587 |
| `autocomplete` `help` | 15278 | 599 |
| `history` `help` | 16456 | 628 |
| `autocomplete` `history` `help` | 16344 | 640 |
| | 10120 | 274 |
| `autocomplete` | 12116 | 290 |
| `history` | 12406 | 315 |
| `autocomplete` `history` | 13704 | 331 |
| `help` | 14216 | 544 |
| `autocomplete` `help` | 15290 | 556 |
| `history` `help` | 16488 | 585 |
| `autocomplete` `history` `help` | 16594 | 597 |

This table is generated using this [script](examples/arduino/memory.sh).
As table shows, enabling help adds quite a lot to memory usage since help usually requires a lot of text to be stored.
Expand Down
2 changes: 1 addition & 1 deletion embedded-cli-macros/src/command/help.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ fn create_command_help(command: &Command) -> TokenStream {
let mut state = States::Normal;

let mut args = command.args().args();
while let Some(Ok(arg)) = args.next() {
while let Some(arg) = args.next() {
match arg {
#(#option_name_arms)*
#(#option_value_arms)*
Expand Down
12 changes: 0 additions & 12 deletions embedded-cli-macros/src/command/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,23 +154,11 @@ impl CommandArg {
ShortName::Generated => field_name.chars().next().unwrap(),
ShortName::Fixed(c) => c,
});
if let Some(short) = short {
if !short.is_ascii_alphabetic() {
return Err(Error::custom("Flag char must be alphabetic ASCII"));
}
}

let long = arg_attrs.long.map(|s| match s {
LongName::Generated => field_name.from_case(Case::Snake).to_case(Case::Kebab),
LongName::Fixed(name) => name,
});
if let Some(long) = &long {
if long.chars().any(|c| !c.is_ascii_alphanumeric() && c != '-') {
return Err(Error::custom(
"Option name must consist of alphanumeric ASCII chars",
));
}
}

let aa = TypedArg::new(&field.ty);

Expand Down
3 changes: 0 additions & 3 deletions embedded-cli-macros/src/command/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,6 @@ fn create_arg_parsing(command: &Command) -> (TokenStream, Vec<TokenStream>) {

let mut args = command.args().args();
while let Some(arg) = args.next() {
let arg = arg.map_err(|err| match err {
_cli::arguments::ArgError::NonAsciiShortOption => _cli::service::ParseError::NonAsciiShortOption
})?;
match arg {
#(#option_name_arms)*
#(#option_value_arms)*
Expand Down
95 changes: 41 additions & 54 deletions embedded-cli/src/arguments.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::token::{Tokens, TokensIter};
use crate::{
token::{Tokens, TokensIter},
utils,
};

#[derive(Debug, Eq, PartialEq)]
pub enum Arg<'a> {
Expand All @@ -13,8 +16,7 @@ pub enum Arg<'a> {
/// `--config` will be a long option with name `config`
LongOption(&'a str),

/// Short option. Only single ASCII char is stored (without `-`).
/// UTF-8 here is not supported.
/// Short option. Only single UTF-8 char is stored (without `-`).
///
/// In `get --config normal -f file -vs`
/// `-f` and `-vs` will be short options.
Expand Down Expand Up @@ -50,18 +52,13 @@ impl<'a> PartialEq for ArgList<'a> {
}
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ArgError {
NonAsciiShortOption,
}

#[derive(Debug)]
pub struct ArgsIter<'a> {
values_only: bool,

/// Short options (ASCII chars) that
/// Short options (utf8 chars) that
/// are left from previous iteration
leftover: &'a [u8],
leftover: &'a str,

tokens: TokensIter<'a>,
}
Expand All @@ -70,7 +67,7 @@ impl<'a> ArgsIter<'a> {
fn new(tokens: TokensIter<'a>) -> Self {
Self {
values_only: false,
leftover: &[],
leftover: "",
tokens,
}
}
Expand All @@ -85,31 +82,19 @@ impl<'a> ArgsIter<'a> {
}

impl<'a> Iterator for ArgsIter<'a> {
type Item = Result<Arg<'a>, ArgError>;
type Item = Arg<'a>;

fn next(&mut self) -> Option<Self::Item> {
fn process_leftover<'a>(byte: u8) -> Result<Arg<'a>, ArgError> {
if byte.is_ascii_alphabetic() {
// SAFETY: we checked that this is alphabetic ASCII
Ok(Arg::ShortOption(unsafe {
char::from_u32_unchecked(byte as u32)
}))
} else {
Err(ArgError::NonAsciiShortOption)
}
}

if !self.leftover.is_empty() {
let byte = self.leftover[0];
self.leftover = &self.leftover[1..];
return Some(process_leftover(byte));
if let Some((opt, leftover)) = utils::char_pop_front(self.leftover) {
self.leftover = leftover;
return Some(Arg::ShortOption(opt));
}

let raw = self.tokens.next()?;
let bytes = raw.as_bytes();

if self.values_only {
return Some(Ok(Arg::Value(raw)));
return Some(Arg::Value(raw));
}

let token = if bytes.len() > 1 && bytes[0] == b'-' {
Expand All @@ -121,14 +106,17 @@ impl<'a> Iterator for ArgsIter<'a> {
Arg::LongOption(unsafe { raw.get_unchecked(2..) })
}
} else {
self.leftover = &bytes[2..];
return Some(process_leftover(bytes[1]));
let (opt, leftover) =
unsafe { utils::char_pop_front(raw.get_unchecked(1..)).unwrap_unchecked() };
self.leftover = leftover;

return Some(Arg::ShortOption(opt));
}
} else {
Arg::Value(raw)
};

Some(Ok(token))
Some(token)
}
}

Expand Down Expand Up @@ -176,36 +164,35 @@ mod tests {

use crate::{arguments::ArgList, token::Tokens};

use super::{Arg, ArgError};
use super::Arg;

#[rstest]
#[case("arg1 --option1 val1 -f val2 -vs", &[
Ok(Arg::Value("arg1")),
Ok(Arg::LongOption("option1")),
Ok(Arg::Value("val1")),
Ok(Arg::ShortOption('f')),
Ok(Arg::Value("val2")),
Ok(Arg::ShortOption('v')),
Ok(Arg::ShortOption('s')),
Arg::Value("arg1"),
Arg::LongOption("option1"),
Arg::Value("val1"),
Arg::ShortOption('f'),
Arg::Value("val2"),
Arg::ShortOption('v'),
Arg::ShortOption('s'),
])]
#[case("arg1 --option1 -- val1 -f val2 -vs", &[
Ok(Arg::Value("arg1")),
Ok(Arg::LongOption("option1")),
Ok(Arg::DoubleDash),
Ok(Arg::Value("val1")),
Ok(Arg::Value("-f")),
Ok(Arg::Value("val2")),
Ok(Arg::Value("-vs")),
Arg::Value("arg1"),
Arg::LongOption("option1"),
Arg::DoubleDash,
Arg::Value("val1"),
Arg::Value("-f"),
Arg::Value("val2"),
Arg::Value("-vs"),
])]
#[case("arg1 -бjв", &[
Ok(Arg::Value("arg1")),
Err(ArgError::NonAsciiShortOption),
Err(ArgError::NonAsciiShortOption),
Ok(Arg::ShortOption('j')),
Err(ArgError::NonAsciiShortOption),
Err(ArgError::NonAsciiShortOption),
#[case("arg1 -бj佗𑿌", &[
Arg::Value("arg1"),
Arg::ShortOption('б'),
Arg::ShortOption('j'),
Arg::ShortOption('佗'),
Arg::ShortOption('𑿌'),
])]
fn arg_tokens(#[case] input: &str, #[case] expected: &[Result<Arg<'_>, ArgError>]) {
fn arg_tokens(#[case] input: &str, #[case] expected: &[Arg<'_>]) {
let mut input = input.as_bytes().to_vec();
let input = core::str::from_utf8_mut(&mut input).unwrap();
let tokens = Tokens::new(input);
Expand Down
14 changes: 5 additions & 9 deletions embedded-cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::{
input::{ControlInput, Input, InputGenerator},
service::{Autocomplete, CommandProcessor, Help, ParseError, ProcessError},
token::Tokens,
utils,
writer::{WriteExt, Writer},
};

Expand Down Expand Up @@ -404,10 +405,6 @@ where
self.writer.write_str("missing required argument: ")?;
self.writer.write_str(name)?;
}
ParseError::NonAsciiShortOption => {
self.writer
.write_str("non-ascii in short options is not supported")?;
}
ParseError::ParseValueError { value, expected } => {
self.writer.write_str("failed to parse '")?;
self.writer.write_str(value)?;
Expand All @@ -424,11 +421,10 @@ where
self.writer.write_str(name)?;
}
ParseError::UnexpectedShortOption { name } => {
// short options are guaranteed to be ascii alphabetic
if name.is_ascii_alphabetic() {
self.writer.write_str("unexpected option: -")?;
self.writer.write_bytes(&[name as u8])?;
}
let mut buf = [0; 4];
let buf = utils::encode_utf8(name, &mut buf);
self.writer.write_str("unexpected option: -")?;
self.writer.write_str(buf)?;
}
ParseError::UnknownCommand => {
self.writer.write_str("unknown command")?;
Expand Down
6 changes: 2 additions & 4 deletions embedded-cli/src/help.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ impl<'a> HelpRequest<'a> {
let mut args = command.args().args();
if command.name() == "help" {
match args.next() {
Some(Ok(Arg::Value(name))) => {
Some(Arg::Value(name)) => {
let command = RawCommand::new(name, args.into_args());
Some(HelpRequest::Command(command))
}
Expand All @@ -25,9 +25,7 @@ impl<'a> HelpRequest<'a> {
}
}
// check if any other option is -h or --help
else if args
.any(|arg| arg == Ok(Arg::LongOption("help")) || arg == Ok(Arg::ShortOption('h')))
{
else if args.any(|arg| arg == Arg::LongOption("help") || arg == Arg::ShortOption('h')) {
Some(HelpRequest::Command(command.clone()))
} else {
None
Expand Down
2 changes: 0 additions & 2 deletions embedded-cli/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ pub enum ParseError<'a> {
name: &'a str,
},

NonAsciiShortOption,

ParseValueError {
value: &'a str,
expected: &'static str,
Expand Down
52 changes: 29 additions & 23 deletions embedded-cli/src/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,37 +15,43 @@ impl Utf8Accum {
// Plain and stupid utf-8 validation
// Bytes are supposed to be human input so it's okay to be not blazing fast

if byte <= 0x7F {
self.partial = 0;
self.expected = 0;
self.buffer[0] = byte;
// SAFETY: ascii chars are all valid utf-8 chars
return Some(unsafe { core::str::from_utf8_unchecked(&self.buffer[..1]) });
} else if (0xC0..=0xDF).contains(&byte) {
// this is first octet of 2-byte value
if byte >= 0xF8 {
return None;
} else if byte >= 0xF0 {
// this is first octet of 4-byte value
self.buffer[0] = byte;
self.partial = 1;
self.expected = 1;
} else if (0xE0..=0xEF).contains(&byte) {
self.expected = 3;
} else if byte >= 0xE0 {
// this is first octet of 3-byte value
self.buffer[0] = byte;
self.partial = 1;
self.expected = 2;
} else if (0xF0..=0xF7).contains(&byte) {
// this is first octet of 4-byte value
} else if byte >= 0xC0 {
// this is first octet of 2-byte value
self.buffer[0] = byte;
self.partial = 1;
self.expected = 3;
} else if (0x80..=0xBF).contains(&byte) && self.expected > 0 {
// this is one of other octets of multi-byte value
self.buffer[self.partial as usize] = byte;
self.partial += 1;
self.expected -= 1;
if self.expected == 0 {
let len = self.partial as usize;
self.partial = 0;
// SAFETY: we checked previously that buffer contains valid utf8
return Some(unsafe { core::str::from_utf8_unchecked(&self.buffer[..len]) });
self.expected = 1;
} else if byte >= 0x80 {
if self.expected > 0 {
// this is one of other octets of multi-byte value
self.buffer[self.partial as usize] = byte;
self.partial += 1;
self.expected -= 1;
if self.expected == 0 {
let len = self.partial as usize;
// SAFETY: we checked previously that buffer contains valid utf8
unsafe {
return Some(core::str::from_utf8_unchecked(&self.buffer[..len]));
}
}
}
} else {
self.expected = 0;
self.buffer[0] = byte;
// SAFETY: ascii chars are all valid utf-8 chars
unsafe {
return Some(core::str::from_utf8_unchecked(&self.buffer[..1]));
}
}

Expand Down
Loading

0 comments on commit a83b84e

Please sign in to comment.