Skip to content

Commit

Permalink
feat(wit-component): expose wit parsing and wasm detection functions (#…
Browse files Browse the repository at this point in the history
…1232)

* feat(wit-component): expose wit parsing and wasm detection functions

WIT parsing and wasm detection functions were used inside wasm-tools
but not available to downstream consumers.

This commit refactors the code slightly and puts it in wit-component,
and makes two functions available (wit_component::parse_wit_from_path,
wit_component::is_wasm_binary_or_wat) for downstream consumers of
wit-component to use

Signed-off-by: Victor Adossi <vadossi@cosmonic.com>

* fix(tests): remove canonicalize usage

Signed-off-by: Victor Adossi <vadossi@cosmonic.com>

---------

Signed-off-by: Victor Adossi <vadossi@cosmonic.com>
  • Loading branch information
vados-cosmonic authored Oct 4, 2023
1 parent 25a6916 commit 67fb7b0
Show file tree
Hide file tree
Showing 7 changed files with 186 additions and 71 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions crates/wit-component/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ anyhow = { workspace = true }
log = "0.4.17"
bitflags = "2.3.3"
indexmap = { workspace = true }
wat = { workspace = true, optional = true }
wast = { workspace = true }
wat = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }

Expand All @@ -36,4 +37,4 @@ wat = { workspace = true }
wasmtime = { workspace = true }

[features]
dummy-module = ['dep:wat']
dummy-module = []
102 changes: 100 additions & 2 deletions crates/wit-component/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
#![deny(missing_docs)]

use anyhow::{bail, Result};
use std::fmt::Display;
use anyhow::{bail, Context, Result};
use std::str::FromStr;
use std::{fmt::Display, path::Path};
use wasm_encoder::CanonicalOption;
use wit_parser::{PackageId, Resolve, UnresolvedPackage};

mod decoding;
mod encoding;
Expand Down Expand Up @@ -80,3 +81,100 @@ pub(crate) fn base_producers() -> wasm_metadata::Producers {
producer.add("processed-by", "wit-component", env!("CARGO_PKG_VERSION"));
producer
}

/// Parse a WIT file from a path that represents a top level 'wit' directory,
/// normally containing a 'deps' folder.
pub fn parse_wit_from_path(path: impl AsRef<Path>) -> Result<(Resolve, PackageId)> {
let mut resolver = Resolve::default();
let id = match path.as_ref() {
// Directories can be directly fed into the resolver
p if p.is_dir() => {
resolver
.push_dir(p)
.with_context(|| {
format!(
"failed to resolve directory while parsing WIT for path [{}]",
p.display()
)
})?
.0
}
// Non-directory files (including symlinks) can be either:
// - Wasm modules (binary or WAT) that are WIT packages
// - WIT files
p => {
let file_contents = std::fs::read(p)
.with_context(|| format!("failed to parse WIT from path [{}]", p.display()))?;

// Check if the bytes represent a Wasm module (either binary or WAT encoded)
if is_wasm_binary_or_wat(&file_contents) {
let bytes = wat::parse_bytes(&file_contents).map_err(|mut e| {
e.set_path(p);
e
})?;
match decode(&bytes)? {
DecodedWasm::Component(..) => {
bail!("specified path is a component, not a wit package")
}
DecodedWasm::WitPackage(resolve, pkg) => return Ok((resolve, pkg)),
}
} else {
// If the bytes are not a WASM module, they should be WIT that can be parsed
// into a package by the resolver
let text = match std::str::from_utf8(&file_contents) {
Ok(s) => s,
Err(_) => bail!("input file is not valid utf-8"),
};
let pkg = UnresolvedPackage::parse(p, text)?;
resolver.push(pkg)?
}
}
};
Ok((resolver, id))
}

/// Detect quickly if supplied bytes represent a Wasm module,
/// whether binary encoded or in WAT-encoded.
///
/// This briefly lexes past whitespace and comments as a `*.wat` file to see if
/// we can find a left-paren. If that fails then it's probably `*.wit` instead.
///
///
/// Examples
/// ```
/// # use wit_component::is_wasm_binary_or_wat;
/// assert!(is_wasm_binary_or_wat(r#"
/// (module
/// (type (;0;) (func))
/// (func (;0;) (type 0)
/// nop
/// )
/// )
/// "#));
/// ```
pub fn is_wasm_binary_or_wat(bytes: impl AsRef<[u8]>) -> bool {
use wast::lexer::{Lexer, TokenKind};

if bytes.as_ref().starts_with(b"\0asm") {
return true;
}
let text = match std::str::from_utf8(bytes.as_ref()) {
Ok(s) => s,
Err(_) => return true,
};

let lexer = Lexer::new(text);
let mut iter = lexer.iter(0);

while let Some(next) = iter.next() {
match next.map(|t| t.kind) {
Ok(TokenKind::Whitespace)
| Ok(TokenKind::BlockComment)
| Ok(TokenKind::LineComment) => {}
Ok(TokenKind::LParen) => return true,
_ => break,
}
}

false
}
59 changes: 59 additions & 0 deletions crates/wit-component/tests/wit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use anyhow::Result;
use wit_component::{is_wasm_binary_or_wat, parse_wit_from_path};

const EXAMPLE_MODULE_WAT: &str = r#"
(module
(type (;0;) (func))
(func (;0;) (type 0)
nop
)
)
"#;

/// Ensure that parse_wit_from_path works with directories
#[test]
fn parse_wit_dir() -> Result<()> {
drop(env_logger::try_init());

let (resolver, package_id) = parse_wit_from_path("tests/wit/parse-dir/wit")?;
assert!(resolver
.select_world(package_id, "foo-world".into())
.is_ok());

Ok(())
}

/// Ensure that parse_wit_from_path works for a single file
#[test]
fn parse_wit_file() -> Result<()> {
drop(env_logger::try_init());

let (resolver, package_id) = parse_wit_from_path("tests/wit/parse-dir/wit/deps/bar/bar.wit")?;
resolver.select_world(package_id, "bar-world".into())?;
assert!(resolver
.interfaces
.iter()
.any(|(_, iface)| iface.name == Some("bar".into())));

Ok(())
}

/// Ensure that parse_with_from_path fails for missing paths
#[test]
fn parse_wit_missing_path() -> Result<()> {
drop(env_logger::try_init());

assert!(parse_wit_from_path("tests/nonexistent/path").is_err());

Ok(())
}

/// Ensure that is_wasm_binary_or_wat works for binaries
#[test]
fn check_wasm_from_bytes() -> Result<()> {
drop(env_logger::try_init());

assert!(is_wasm_binary_or_wat(wat::parse_str(EXAMPLE_MODULE_WAT)?));

Ok(())
}
9 changes: 9 additions & 0 deletions crates/wit-component/tests/wit/parse-dir/wit/deps/bar/bar.wit
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package foo:bar;

interface bar {
f: func() -> bool;
}

world bar-world {
export bar;
}
5 changes: 5 additions & 0 deletions crates/wit-component/tests/wit/parse-dir/wit/world.wit
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package foo:foo;

world foo-world {
import foo:bar/bar;
}
76 changes: 9 additions & 67 deletions src/bin/wasm-tools/component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ use std::io::Read;
use std::path::{Path, PathBuf};
use wasm_encoder::{Encode, Section};
use wasm_tools::Output;
use wit_component::{ComponentEncoder, DecodedWasm, Linker, StringEncoding, WitPrinter};
use wit_parser::{PackageId, Resolve, UnresolvedPackage};
use wit_component::{
is_wasm_binary_or_wat, parse_wit_from_path, ComponentEncoder, DecodedWasm, Linker,
StringEncoding, WitPrinter,
};
use wit_parser::{Resolve, UnresolvedPackage};

/// WebAssembly wit-based component tooling.
#[derive(Parser)]
Expand Down Expand Up @@ -210,7 +213,7 @@ impl EmbedOpts {
} else {
Some(self.io.parse_input_wasm()?)
};
let (resolve, id) = parse_wit(&self.wit)?;
let (resolve, id) = parse_wit_from_path(self.wit)?;
let world = resolve.select_world(id, self.world.as_deref())?;

let encoded = wit_component::metadata::encode(
Expand Down Expand Up @@ -456,7 +459,7 @@ impl WitOpts {
decode_wasm(&bytes).context("failed to decode WIT document")?
}
_ => {
let (resolve, id) = parse_wit(input)?;
let (resolve, id) = parse_wit_from_path(input)?;
DecodedWasm::WitPackage(resolve, id)
}
},
Expand All @@ -466,7 +469,7 @@ impl WitOpts {
.read_to_end(&mut stdin)
.context("failed to read <stdin>")?;

if is_wasm(&stdin) {
if is_wasm_binary_or_wat(&stdin) {
let bytes = wat::parse_bytes(&stdin).map_err(|mut e| {
e.set_path("<stdin>");
e
Expand Down Expand Up @@ -619,7 +622,7 @@ impl TargetsOpts {

/// Executes the application.
fn run(self) -> Result<()> {
let (resolve, package_id) = parse_wit(&self.wit)?;
let (resolve, package_id) = parse_wit_from_path(&self.wit)?;
let world = resolve.select_world(package_id, self.world.as_deref())?;
let component_to_test = self.input.parse_wasm()?;

Expand All @@ -629,67 +632,6 @@ impl TargetsOpts {
}
}

fn parse_wit(path: &Path) -> Result<(Resolve, PackageId)> {
let mut resolve = Resolve::default();
let id = if path.is_dir() {
resolve.push_dir(&path)?.0
} else {
let contents =
std::fs::read(&path).with_context(|| format!("failed to read file {path:?}"))?;
if is_wasm(&contents) {
let bytes = wat::parse_bytes(&contents).map_err(|mut e| {
e.set_path(path);
e
})?;
match wit_component::decode(&bytes)? {
DecodedWasm::Component(..) => {
bail!("specified path is a component, not a wit package")
}
DecodedWasm::WitPackage(resolve, pkg) => return Ok((resolve, pkg)),
}
} else {
let text = match std::str::from_utf8(&contents) {
Ok(s) => s,
Err(_) => bail!("input file is not valid utf-8"),
};
let pkg = UnresolvedPackage::parse(&path, text)?;
resolve.push(pkg)?
}
};
Ok((resolve, id))
}

/// Test to see if a string is probably a `*.wat` text syntax.
///
/// This briefly lexes past whitespace and comments as a `*.wat` file to see if
/// we can find a left-paren. If that fails then it's probably `*.wit` instead.
fn is_wasm(bytes: &[u8]) -> bool {
use wast::lexer::{Lexer, TokenKind};

if bytes.starts_with(b"\0asm") {
return true;
}
let text = match std::str::from_utf8(bytes) {
Ok(s) => s,
Err(_) => return true,
};

let lexer = Lexer::new(text);
let mut iter = lexer.iter(0);

while let Some(next) = iter.next() {
match next.map(|t| t.kind) {
Ok(TokenKind::Whitespace)
| Ok(TokenKind::BlockComment)
| Ok(TokenKind::LineComment) => {}
Ok(TokenKind::LParen) => return true,
_ => break,
}
}

false
}

fn decode_wasm(bytes: &[u8]) -> Result<DecodedWasm> {
if wasmparser::Parser::is_component(bytes) {
wit_component::decode(bytes)
Expand Down

0 comments on commit 67fb7b0

Please sign in to comment.