Skip to content

Commit

Permalink
Fix parsing some semver strings in WIT (#1697)
Browse files Browse the repository at this point in the history
* Fix parsing some semver strings in WIT

Specifically fix parsing intermediate `-` characters in pre-release
versions and build metadata. Additionally fix alphabetic characters
following numeric characters.

Closes #1692

* Fix build warnings
  • Loading branch information
alexcrichton committed Jul 22, 2024
1 parent 9c7a49a commit 72fb1e0
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 20 deletions.
86 changes: 66 additions & 20 deletions crates/wit-parser/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1208,33 +1208,79 @@ fn parse_version(tokens: &mut Tokenizer<'_>) -> Result<(Span, Version)> {
let version = Version::parse(string).map_err(|e| Error::new(span, e.to_string()))?;
return Ok((span, version));

// According to `semver.org` this is what we're parsing:
//
// ```ebnf
// <pre-release> ::= <dot-separated pre-release identifiers>
//
// <dot-separated pre-release identifiers> ::= <pre-release identifier>
// | <pre-release identifier> "." <dot-separated pre-release identifiers>
//
// <build> ::= <dot-separated build identifiers>
//
// <dot-separated build identifiers> ::= <build identifier>
// | <build identifier> "." <dot-separated build identifiers>
//
// <pre-release identifier> ::= <alphanumeric identifier>
// | <numeric identifier>
//
// <build identifier> ::= <alphanumeric identifier>
// | <digits>
//
// <alphanumeric identifier> ::= <non-digit>
// | <non-digit> <identifier characters>
// | <identifier characters> <non-digit>
// | <identifier characters> <non-digit> <identifier characters>
//
// <numeric identifier> ::= "0"
// | <positive digit>
// | <positive digit> <digits>
//
// <identifier characters> ::= <identifier character>
// | <identifier character> <identifier characters>
//
// <identifier character> ::= <digit>
// | <non-digit>
//
// <non-digit> ::= <letter>
// | "-"
//
// <digits> ::= <digit>
// | <digit> <digits>
// ```
//
// This is loosely based on WIT syntax and an approximation is parsed here:
//
// * This function starts by parsing the optional leading `-` and `+` which
// indicates pre-release and build metadata.
// * Afterwards all of $id, $integer, `-`, and `.` are chomped. The only
// exception here is that if `.` isn't followed by $id, $integer, or `-`
// then it's assumed that it's something like `use a:b@1.0.0-a.{...}`
// where the `.` is part of WIT syntax, not semver.
//
// Note that this additionally doesn't try to return any first-class errors.
// Instead this bails out on something unrecognized for something else in
// the system to return an error.
fn eat_ids(tokens: &mut Tokenizer<'_>, prefix: Token, end: &mut Span) -> Result<()> {
if !tokens.eat(prefix)? {
return Ok(());
}
loop {
match tokens.next()? {
Some((span, Token::Id)) | Some((span, Token::Integer)) => end.end = span.end,
other => break Err(err_expected(tokens, "an id or integer", other).into()),
}

// If there's no trailing period, then this semver identifier is
// done.
let mut clone = tokens.clone();
if !clone.eat(Token::Period)? {
break Ok(());
}

// If there's more to the identifier, then eat the period for real
// and continue
if clone.eat(Token::Id)? || clone.eat(Token::Integer)? {
tokens.eat(Token::Period)?;
continue;
match clone.next()? {
Some((span, Token::Id | Token::Integer | Token::Minus)) => {
end.end = span.end;
*tokens = clone;
}
Some((_span, Token::Period)) => match clone.next()? {
Some((span, Token::Id | Token::Integer | Token::Minus)) => {
end.end = span.end;
*tokens = clone;
}
_ => break Ok(()),
},
_ => break Ok(()),
}

// Otherwise for something like `use foo:bar/baz@1.2.3+foo.{` stop
// the parsing here.
break Ok(());
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions crates/wit-parser/tests/ui/version-syntax.wit
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

package a:b@1.0.0 {}
package a:b@1.0.1 {}
package a:b@1.0.1-- {}
package a:b@1.0.1-a+a {}
package a:b@1.0.1-1+1 {}
package a:b@1.0.1-1a+1a {}
package a:b@1.0.0-11-a {}
package a:b@1.0.0-a1.1-a {}
package a:b@1.0.0-11ab {}
52 changes: 52 additions & 0 deletions crates/wit-parser/tests/ui/version-syntax.wit.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"worlds": [],
"interfaces": [],
"types": [],
"packages": [
{
"name": "a:b@1.0.0-11-a",
"interfaces": {},
"worlds": {}
},
{
"name": "a:b@1.0.0-11ab",
"interfaces": {},
"worlds": {}
},
{
"name": "a:b@1.0.0-a1.1-a",
"interfaces": {},
"worlds": {}
},
{
"name": "a:b@1.0.0",
"interfaces": {},
"worlds": {}
},
{
"name": "a:b@1.0.1-1+1",
"interfaces": {},
"worlds": {}
},
{
"name": "a:b@1.0.1--",
"interfaces": {},
"worlds": {}
},
{
"name": "a:b@1.0.1-1a+1a",
"interfaces": {},
"worlds": {}
},
{
"name": "a:b@1.0.1-a+a",
"interfaces": {},
"worlds": {}
},
{
"name": "a:b@1.0.1",
"interfaces": {},
"worlds": {}
}
]
}

0 comments on commit 72fb1e0

Please sign in to comment.