diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000000..9fc755d3acc --- /dev/null +++ b/.travis.yml @@ -0,0 +1,59 @@ +arch: + - arm64 + +language: rust +rust: + - nightly-2019-08-15 + +cache: + directories: + - /home/travis/.sccache/ + - /home/travis/.cargo/bin/ + +script: + # Sccache + # - curl -L https://github.com/mozilla/sccache/releases/download/0.2.10/sccache-0.2.10-x86_64-unknown-linux-musl.tar.gz | tar xzf - + # - export RUSTC_WRAPPER=`pwd`/sccache-0.2.10-x86_64-unknown-linux-musl/sccache + - test -f /home/travis/.cargo/bin/sccache || cargo install sccache + - export RUSTC_WRAPPER=/home/travis/.cargo/bin/sccache + - mkdir -p /home/travis/.sccache/ + - export SCCACHE_DIR="/home/travis/.sccache/" + - SCCACHE_ERROR_LOG=`pwd`/sccache.log RUST_LOG=debug $RUSTC_WRAPPER --start-server + - $RUSTC_WRAPPER -s + + # Tests + - make spectests-singlepass + + # Release + - make release-singlepass + - make wapm + - make build-install + - mkdir -p artifacts + - cp ./wasmer.tar.gz ./artifacts/$(./scripts/binary-name.sh) + +before_deploy: + # Set up git user name and tag this commit + - git config --local user.name "Syrus Akbary" + - git config --local user.email "syrus@wasmer.io" + - export TRAVIS_TAG="0.10.2" + # - git tag $TRAVIS_TAG + +deploy: + provider: releases + file_glob: true + file: artifacts/* + api_key: $GITHUB_OAUTH_TOKEN + # This is set to the previous artifacts are not deleted by travis + skip_cleanup: true + on: + branch: feature/singlepass-aarch64 + + +addons: + apt: + packages: + - cmake + +branches: + only: + - feature/singlepass-aarch64 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 51c0983c848..5c200b192c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## **[Unreleased]** +- [#713](https://github.com/wasmerio/wasmer/pull/713) Add AArch64 support for singlepass. - [#995](https://github.com/wasmerio/wasmer/pull/995) Detect when a global is read without being initialized (emit a proper error instead of panicking) - [#996](https://github.com/wasmerio/wasmer/pull/997) Refactored spectests, emtests and wasitests to use default compiler logic - [#992](https://github.com/wasmerio/wasmer/pull/992) Updates WAPM version to 0.4.1, fix arguments issue introduced in #990 diff --git a/Cargo.lock b/Cargo.lock index 37410c3568b..e21ab6c531a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,11 +23,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "arrayvec" -version = "0.4.12" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "nodrop 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] name = "atty" @@ -60,11 +57,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "blake2b_simd" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)", + "arrayvec 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "constant_time_eq 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -118,14 +115,14 @@ dependencies = [ "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "cc" -version = "1.0.46" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -160,7 +157,7 @@ name = "cmake" version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -188,7 +185,7 @@ dependencies = [ "failure 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "failure_derive 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", "target-lexicon 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -259,23 +256,23 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "crossbeam-epoch 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-epoch 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "crossbeam-epoch" -version = "0.7.2" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "memoffset 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -296,6 +293,16 @@ dependencies = [ "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "crossbeam-utils" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "csv" version = "1.1.1" @@ -322,7 +329,7 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -335,26 +342,25 @@ dependencies = [ [[package]] name = "dynasm" -version = "0.3.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)", + "owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "dynasmrt" -version = "0.3.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "take_mut 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -413,8 +419,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", - "synstructure 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", + "synstructure 0.12.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -461,7 +467,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -489,7 +495,7 @@ name = "heck" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "unicode-segmentation 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -556,7 +562,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -600,7 +606,7 @@ name = "llvm-sys" version = "80.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -623,6 +629,11 @@ dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "maybe-uninit" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "md5" version = "0.6.1" @@ -636,15 +647,6 @@ dependencies = [ "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "memmap" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "memmap" version = "0.7.0" @@ -656,7 +658,7 @@ dependencies = [ [[package]] name = "memoffset" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -668,17 +670,12 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)", "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "nodrop" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "num-traits" version = "0.2.8" @@ -698,7 +695,7 @@ dependencies = [ [[package]] name = "owning_ref" -version = "0.3.3" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -752,7 +749,7 @@ dependencies = [ "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -777,7 +774,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -895,7 +892,7 @@ version = "6.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)", "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -904,7 +901,7 @@ name = "rayon" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "crossbeam-deque 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "rayon-core 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -914,7 +911,7 @@ name = "rayon-core" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "crossbeam-deque 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-queue 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1056,7 +1053,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1071,8 +1068,11 @@ dependencies = [ [[package]] name = "smallvec" -version = "0.6.12" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "maybe-uninit 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "stable_deref_trait" @@ -1086,23 +1086,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "structopt" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", - "structopt-derive 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "structopt-derive 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "structopt-derive" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro-error 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1127,7 +1127,7 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1145,20 +1145,15 @@ dependencies = [ [[package]] name = "synstructure" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "take_mut" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "target-lexicon" version = "0.8.1" @@ -1257,12 +1252,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "unicode-segmentation" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -1311,7 +1306,7 @@ name = "wabt-sys" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)", "cmake 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1340,7 +1335,7 @@ dependencies = [ "glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", - "structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "structopt 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "typetag 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "wabt 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "wasmer-clif-backend 0.10.2", @@ -1389,7 +1384,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "cranelift-codegen 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", "target-lexicon 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1453,7 +1448,7 @@ name = "wasmer-llvm-backend" version = "0.10.2" dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)", "goblin 0.0.24 (registry+https://github.com/rust-lang/crates.io-index)", "inkwell 0.1.0 (git+https://github.com/wasmerio/inkwell?branch=llvm8-0)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1462,7 +1457,7 @@ dependencies = [ "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", "semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", "wabt 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)", "wasmer-runtime-core 0.10.2", "wasmparser 0.39.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1520,8 +1515,8 @@ name = "wasmer-runtime-core" version = "0.10.2" dependencies = [ "bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "blake2b_simd 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)", + "blake2b_simd 0.5.9 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)", "digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "errno 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", "hex 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1536,7 +1531,7 @@ dependencies = [ "serde-bench 0.0.7 (registry+https://github.com/rust-lang/crates.io-index)", "serde_bytes 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", "wasmparser 0.39.2 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1557,12 +1552,12 @@ name = "wasmer-singlepass-backend" version = "0.10.2" dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "dynasm 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "dynasmrt 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "dynasm 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "dynasmrt 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", "nix 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)", + "smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", "wasmer-runtime-core 0.10.2", ] @@ -1664,19 +1659,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" -"checksum arrayvec 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" +"checksum arrayvec 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" "checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" "checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" "checksum bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b8ab639324e3ee8774d296864fbc0dbbb256cf1a41c490b94cba90c082915f92" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" -"checksum blake2b_simd 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)" = "5850aeee1552f495dd0250014cf64b82b7c8879a89d83b33bbdace2cc4f63182" +"checksum blake2b_simd 0.5.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b83b7baab1e671718d78204225800d6b170e648188ac7dc992e9d6bddf87d0c0" "checksum bstr 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8d6c2c5b58ab920a4f5aeaaca34b4488074e8cc7596af94e6f8c6ff247c60245" "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" "checksum cargo_toml 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "097f5ce64ba566a83d9d914fd005de1e5937fdd57d8c5d99a7593040955d75a9" "checksum cast 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427" "checksum cbindgen 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9daec6140ab4dcd38c3dd57e580b59a621172a526ac79f1527af760a55afeafd" -"checksum cc 1.0.46 (registry+https://github.com/rust-lang/crates.io-index)" = "0213d356d3c4ea2c18c40b037c3be23cd639825c18f25ee670ac7813beeef99c" +"checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" @@ -1690,16 +1685,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum cranelift-native 0.44.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fd16b58e95af9ee837218cf41e70306becc1fc7d7dada55dac42df5130a4a4ba" "checksum criterion 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "0363053954f3e679645fc443321ca128b7b950a6fe288cf5f9335cc22ee58394" "checksum criterion-plot 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76f9212ddf2f4a9eb2d401635190600656a1f88a932ef53d06e7fa4c7e02fb8e" -"checksum crossbeam-deque 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b18cd2e169ad86297e6bc0ad9aa679aee9daa4f19e8163860faf7c164e4f5a71" -"checksum crossbeam-epoch 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fedcd6772e37f3da2a9af9bf12ebe046c0dfe657992377b4df982a2b54cd37a9" +"checksum crossbeam-deque 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c3aa945d63861bfe624b55d153a39684da1e8c0bc8fba932f7ee3a3c16cea3ca" +"checksum crossbeam-epoch 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5064ebdbf05ce3cb95e45c8b086f72263f4166b29b97f6baff7ef7fe047b55ac" "checksum crossbeam-queue 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7c979cd6cfe72335896575c6b5688da489e420d36a27a0b9eb0c73db574b4a4b" "checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6" +"checksum crossbeam-utils 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ce446db02cdc3165b94ae73111e570793400d0794e46125cc4056c81cbb039f4" "checksum csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "37519ccdfd73a75821cac9319d4fce15a81b9fcf75f951df5b9988aa3a0af87d" "checksum csv-core 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9b5cadb6b25c77aeff80ba701712494213f4a8418fcda2ee11b6560c3ad0bf4c" "checksum ctor 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "cd8ce37ad4184ab2ce004c33bf6379185d3b1c95801cab51026bd271bf68eedc" "checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" -"checksum dynasm 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f36d49ab6f8ecc642d2c6ee10fda04ba68003ef0277300866745cdde160e6b40" -"checksum dynasmrt 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a4c408a211e7f5762829f5e46bdff0c14bc3b1517a21a4bb781c716bf88b0c68" +"checksum dynasm 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8654f63488a94cd11feac2a609fdcdecd09e02fb582731f635783689fbb429f3" +"checksum dynasmrt 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b0046b083139885c38990f2fb9822d06f6c5902068d93a6ed9e56b63011b9932" "checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" "checksum enum-methods 0.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7798e7da2d4cb0d6d6fc467e8d6b5bf247e9e989f786dde1732d79899c32bb10" "checksum erased-serde 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3beee4bc16478a1b26f2e80ad819a52d24745e292f521a63c16eea5f74b7eb60" @@ -1732,16 +1728,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum llvm-sys 80.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2110cd4daf9cd8e39dd3b933b1a2a2ac7315e91f7c92b3a20beab526c63b5978" "checksum lock_api 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f8912e782533a93a167888781b836336a6ca5da6175c05944c86cf28c31104dc" "checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +"checksum maybe-uninit 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" "checksum md5 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e6bcd6433cff03a4bfc3d9834d504467db1f1cf6d0ea765d37d330249ed629d" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" -"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" "checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" -"checksum memoffset 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a85c1a8c329f11437034d7313dca647c79096523533a1c79e86f1d0f657c7cc" +"checksum memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" "checksum nix 0.15.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3b2e0b4f3320ed72aaedb9a5ac838690a8047c7b275da22711fddff4f8a14229" -"checksum nodrop 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" "checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32" "checksum num_cpus 1.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "155394f924cdddf08149da25bfb932d226b4a593ca7468b08191ff6335941af5" -"checksum owning_ref 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "cdf84f41639e037b484f93433aa3897863b561ed65c6e59c7073d7c561710f37" +"checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13" "checksum page_size 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f89ef58b3d32420dbd1a43d2f38ae92f6239ef12bb556ab09ca55445f5a67242" "checksum parking_lot 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f842b1982eb6c2fe34036a4fbfb06dd185a3f5c8edfaacdf7d1ea10b07de6252" "checksum parking_lot_core 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b876b1b9e7ac6e1a74a6da34d25c42e17e8862aa409cbbbdcfc8d86c6f3bc62b" @@ -1783,17 +1778,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum serde_bytes 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)" = "45af0182ff64abaeea290235eb67da3825a576c5d53e642c4d5b652e12e6effc" "checksum serde_derive 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)" = "ca13fc1a832f793322228923fbb3aba9f3f44444898f835d31ad1b74fa0a2bf8" "checksum serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)" = "2f72eb2a68a7dc3f9a691bfda9305a1c017a6215e5a4545c258500d2099a37c2" -"checksum smallvec 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "533e29e15d0748f28afbaf4ff7cab44d73e483a8e50b38c40bd13b7f3d48f542" +"checksum smallvec 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "f7b0758c52e15a8b5e3691eae6cc559f08eee9406e548a4477ba4e67770a82b6" "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" -"checksum structopt 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6d4f66a4c0ddf7aee4677995697366de0749b0139057342eccbb609b12d0affc" -"checksum structopt-derive 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8fe0c13e476b4e21ff7f5c4ace3818b6d7bdc16897c31c73862471bc1663acae" +"checksum structopt 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c167b61c7d4c126927f5346a4327ce20abf8a186b8041bbeb1ce49e5db49587b" +"checksum structopt-derive 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "519621841414165d2ad0d4c92be8f41844203f2b67e245f9345a5a12d40c69d7" "checksum syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d3b891b9015c88c576343b9b3e41c2c11a51c219ef067b264bd9c8aa9b441dad" "checksum syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)" = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" -"checksum syn 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0e7bedb3320d0f3035594b0b723c8a28d7d336a3eda3881db79e61d676fb644c" +"checksum syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "661641ea2aa15845cddeb97dad000d22070bb5c1fb456b96c1cba883ec691e92" "checksum synom 0.11.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a393066ed9010ebaed60b9eafa373d4b1baac186dd7e008555b0f702b51945b6" -"checksum synstructure 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f085a5855930c0441ca1288cf044ea4aecf4f43a91668abdb870b4ba546a203" -"checksum take_mut 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" +"checksum synstructure 0.12.2 (registry+https://github.com/rust-lang/crates.io-index)" = "575be94ccb86e8da37efb894a87e2b660be299b41d8ef347f9d6d79fbe61b1ba" "checksum target-lexicon 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7975cb2c6f37d77b190bc5004a2bb015971464756fde9514651a525ada2a741a" "checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" @@ -1805,7 +1799,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum typenum 1.11.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6d2783fe2d6b8c1101136184eb41be8b1ad379e4657050b8aaff0c79ee7575f9" "checksum typetag 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6ebb2c484029d695fb68a06d80e1536c68d491b3e0cf874c66abed255e831cfe" "checksum typetag-impl 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b63fd4799e4d0ec5cf0b055ebb8e2c3a657bbf76a84f6edc77ca60780e000204" -"checksum unicode-segmentation 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49f5526225fd8b77342d5986ab5f6055552e9c0776193b5b63fd53b46debfad7" +"checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" "checksum unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20" "checksum unicode-xid 0.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f860d7d29cf02cb2f3f359fd35991af3d30bac52c57d265a3c461074cb4dc" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" diff --git a/Cargo.toml b/Cargo.toml index 13369c452c9..c9cc342d9b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ byteorder = "1.3" errno = "0.2" structopt = "0.3" wabt = "0.9.1" -wasmer-clif-backend = { path = "lib/clif-backend" } +wasmer-clif-backend = { path = "lib/clif-backend", optional = true } wasmer-singlepass-backend = { path = "lib/singlepass-backend", optional = true } wasmer-middleware-common = { path = "lib/middleware-common" } wasmer-runtime = { path = "lib/runtime" } @@ -73,7 +73,7 @@ serde = { version = "1", features = ["derive"] } # used by the plugin example typetag = "0.1" # used by the plugin example [features] -default = ["fast-tests", "wasi", "backend-cranelift"] +default = ["fast-tests", "wasi"] "loader-kernel" = ["wasmer-kernel-loader"] debug = ["wasmer-runtime-core/debug"] trace = ["wasmer-runtime-core/trace"] @@ -82,6 +82,7 @@ extra-debug = ["wasmer-clif-backend/debug", "wasmer-runtime-core/debug"] # This feature will allow cargo test to run much faster fast-tests = [] backend-cranelift = [ + "wasmer-clif-backend", "wasmer-runtime-core/backend-cranelift", "wasmer-runtime/cranelift", "wasmer-middleware-common-tests/clif", diff --git a/Dockerfile b/Dockerfile index 6affe470faf..73507be300e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,7 @@ FROM wasmer-build-env AS wasmer-build WORKDIR /home/circleci/wasmer COPY . /home/circleci/wasmer RUN sudo chmod -R 777 . -RUN cargo build --release +RUN cargo build --release --features backend-cranelift FROM debian:stretch AS wasmer WORKDIR /root/ diff --git a/Makefile b/Makefile index ad8b762c3c6..07e595e4487 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ generate: generate-spectests generate-emtests generate-wasitests # Spectests spectests-singlepass: - cargo test --manifest-path lib/spectests/Cargo.toml --release --features singlepass -- --nocapture + cargo test --manifest-path lib/spectests/Cargo.toml --release --features singlepass -- --nocapture --test-threads 1 spectests-cranelift: cargo test --manifest-path lib/spectests/Cargo.toml --release --features clif -- --nocapture @@ -102,7 +102,7 @@ llvm: spectests-llvm emtests-llvm wasitests-llvm # All tests capi: - cargo build --release + cargo build --release --features backend-cranelift cargo build -p wasmer-runtime-c-api --release test-capi: capi @@ -151,7 +151,7 @@ lint: precommit: lint test debug: - cargo build --release --features backend-singlepass,debug,trace + cargo build --release --features backend-cranelift,backend-singlepass,debug,trace install: cargo install --path . @@ -265,4 +265,7 @@ dep-graph: cargo deps --optional-deps --filter wasmer-wasi wasmer-wasi-tests wasmer-kernel-loader wasmer-dev-utils wasmer-llvm-backend wasmer-emscripten wasmer-emscripten-tests wasmer-runtime-core wasmer-runtime wasmer-middleware-common wasmer-middleware-common-tests wasmer-singlepass-backend wasmer-clif-backend wasmer --manifest-path Cargo.toml | dot -Tpng > wasmer_depgraph.png docs: - cargo doc --features=backend-cranelift,backend-singlepass,backend-llvm,docs,wasi,managed + cargo doc --features=backend-singlepass,backend-cranelift,backend-llvm,docs,wasi,managed + +wapm: + cargo build --release --manifest-path wapm-cli/Cargo.toml --features "telemetry update-notifications" diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 23f806dca1e..720115f1828 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -123,7 +123,7 @@ jobs: displayName: Build (Windows) condition: and(succeeded(), eq(variables['Agent.OS'], 'Windows_NT')) - bash: | - cargo build --release --manifest-path wapm-cli/Cargo.toml --features "telemetry update-notifications" + make wapm displayName: Build WAPM condition: | eq(variables['Build.SourceBranch'], 'refs/heads/master') diff --git a/examples/fib.wat b/examples/fib.wat new file mode 100644 index 00000000000..a797fdae484 --- /dev/null +++ b/examples/fib.wat @@ -0,0 +1,20 @@ +(module + (func $main (result i32) + (call $fib (i32.const 40)) + ) + + (func $fib (param $n i32) (result i32) + (if (i32.eq (get_local $n) (i32.const 0)) + (then (return (i32.const 1))) + ) + (if (i32.eq (get_local $n) (i32.const 1)) + (then (return (i32.const 1))) + ) + (i32.add + (call $fib (i32.sub (get_local $n) (i32.const 1))) + (call $fib (i32.sub (get_local $n) (i32.const 2))) + ) + ) + + (export "main" (func $main)) +) diff --git a/examples/hello_world/Cargo.toml b/examples/hello_world/Cargo.toml new file mode 100644 index 00000000000..a71d719ed41 --- /dev/null +++ b/examples/hello_world/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "hello_world" +version = "0.1.0" +authors = ["losfair "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/examples/hello_world/src/main.rs b/examples/hello_world/src/main.rs new file mode 100644 index 00000000000..ca24b9cead6 --- /dev/null +++ b/examples/hello_world/src/main.rs @@ -0,0 +1,7 @@ +fn main() { + for i in 0..8 { + let s = format!("Hello, {}", i); + println!("{}", s); + } + panic!("OK"); +} diff --git a/examples/iterative_hash/src/main.rs b/examples/iterative_hash/src/main.rs index 043bc02a1d4..66ed10405f2 100644 --- a/examples/iterative_hash/src/main.rs +++ b/examples/iterative_hash/src/main.rs @@ -20,7 +20,7 @@ fn main() { let diff = millis - last_millis; if diff >= 100 { record_count += 1; - println!("{}", (i - round_count) as f64 / diff as f64); + println!("{}", ((i - round_count) as u128) * 1000000 / diff ); last_millis = millis; round_count = i; } diff --git a/install.sh b/install.sh index 0420aad329f..e6c874e2a0e 100755 --- a/install.sh +++ b/install.sh @@ -208,6 +208,7 @@ initArch() { printf "$cyan> Using WASMER_ARCH ($WASMER_ARCH).$reset\n" ARCH="$WASMER_ARCH" fi + # If you modify this list, please also modify scripts/binary-name.sh case $ARCH in amd64) ARCH="amd64";; x86_64) ARCH="amd64";; diff --git a/lib/middleware-common-tests/src/lib.rs b/lib/middleware-common-tests/src/lib.rs index 0b6754c5f65..1515f5cd2a6 100644 --- a/lib/middleware-common-tests/src/lib.rs +++ b/lib/middleware-common-tests/src/lib.rs @@ -4,38 +4,43 @@ mod tests { use wasmer_middleware_common::metering::*; use wasmer_runtime_core::codegen::{MiddlewareChain, StreamingCompiler}; - use wasmer_runtime_core::{backend::Compiler, compile_with, imports, Func}; + use wasmer_runtime_core::fault::{pop_code_version, push_code_version}; + use wasmer_runtime_core::state::CodeVersion; + use wasmer_runtime_core::{ + backend::{Backend, Compiler}, + compile_with, imports, Func, + }; #[cfg(feature = "llvm")] - fn get_compiler(limit: u64) -> impl Compiler { + fn get_compiler(limit: u64) -> (impl Compiler, Backend) { use wasmer_llvm_backend::ModuleCodeGenerator as LLVMMCG; let c: StreamingCompiler = StreamingCompiler::new(move || { let mut chain = MiddlewareChain::new(); chain.push(Metering::new(limit)); chain }); - c + (c, Backend::LLVM) } #[cfg(feature = "singlepass")] - fn get_compiler(limit: u64) -> impl Compiler { + fn get_compiler(limit: u64) -> (impl Compiler, Backend) { use wasmer_singlepass_backend::ModuleCodeGenerator as SinglePassMCG; let c: StreamingCompiler = StreamingCompiler::new(move || { let mut chain = MiddlewareChain::new(); chain.push(Metering::new(limit)); chain }); - c + (c, Backend::Singlepass) } #[cfg(not(any(feature = "llvm", feature = "clif", feature = "singlepass")))] compile_error!("compiler not specified, activate a compiler via features"); #[cfg(feature = "clif")] - fn get_compiler(_limit: u64) -> impl Compiler { + fn get_compiler(_limit: u64) -> (impl Compiler, Backend) { compile_error!("cranelift does not implement metering"); use wasmer_clif_backend::CraneliftCompiler; - CraneliftCompiler::new() + (CraneliftCompiler::new(), Backend::Cranelift) } // Assemblyscript @@ -103,7 +108,8 @@ mod tests { let limit = 100u64; - let module = compile_with(&wasm_binary, &get_compiler(limit)).unwrap(); + let (compiler, backend_id) = get_compiler(limit); + let module = compile_with(&wasm_binary, &compiler).unwrap(); let import_object = imports! {}; let mut instance = module.instantiate(&import_object).unwrap(); @@ -111,7 +117,23 @@ mod tests { set_points_used(&mut instance, 0u64); let add_to: Func<(i32, i32), i32> = instance.func("add_to").unwrap(); + + let cv_pushed = if let Some(msm) = instance.module.runnable_module.get_module_state_map() { + push_code_version(CodeVersion { + baseline: true, + msm: msm, + base: instance.module.runnable_module.get_code().unwrap().as_ptr() as usize, + backend: backend_id, + }); + true + } else { + false + }; + let value = add_to.call(3, 4).unwrap(); + if cv_pushed { + pop_code_version().unwrap(); + } // verify it returns the correct value assert_eq!(value, 7); @@ -127,7 +149,8 @@ mod tests { let limit = 100u64; - let module = compile_with(&wasm_binary, &get_compiler(limit)).unwrap(); + let (compiler, backend_id) = get_compiler(limit); + let module = compile_with(&wasm_binary, &compiler).unwrap(); let import_object = imports! {}; let mut instance = module.instantiate(&import_object).unwrap(); @@ -135,7 +158,22 @@ mod tests { set_points_used(&mut instance, 0u64); let add_to: Func<(i32, i32), i32> = instance.func("add_to").unwrap(); + + let cv_pushed = if let Some(msm) = instance.module.runnable_module.get_module_state_map() { + push_code_version(CodeVersion { + baseline: true, + msm: msm, + base: instance.module.runnable_module.get_code().unwrap().as_ptr() as usize, + backend: backend_id, + }); + true + } else { + false + }; let result = add_to.call(10_000_000, 4); + if cv_pushed { + pop_code_version().unwrap(); + } let err = result.unwrap_err(); match err { diff --git a/lib/middleware-common/src/block_trace.rs b/lib/middleware-common/src/block_trace.rs new file mode 100644 index 00000000000..f104ab086b4 --- /dev/null +++ b/lib/middleware-common/src/block_trace.rs @@ -0,0 +1,153 @@ +use wasmer_runtime_core::{ + codegen::{Event, EventSink, FunctionMiddleware, InternalEvent}, + module::ModuleInfo, + wasmparser::Operator, +}; + +pub struct BlockTrace { + func_idx: usize, + evt_idx: usize, +} + +impl BlockTrace { + pub fn new() -> BlockTrace { + BlockTrace { + func_idx: std::usize::MAX, + evt_idx: 0, + } + } +} + +impl FunctionMiddleware for BlockTrace { + type Error = String; + fn feed_event<'a, 'b: 'a>( + &mut self, + op: Event<'a, 'b>, + _module_info: &ModuleInfo, + sink: &mut EventSink<'a, 'b>, + ) -> Result<(), Self::Error> { + match op { + Event::Internal(InternalEvent::FunctionBegin(_)) => { + self.func_idx = self.func_idx.wrapping_add(1); + self.evt_idx = 0; + let func_idx = self.func_idx; + let evt_idx = self.evt_idx; + sink.push(op); + sink.push(Event::Internal(InternalEvent::Breakpoint(Box::new( + move |info| { + eprintln!( + "[BlockTrace] ({}, {}) -> enter_func % {:?}", + func_idx, + evt_idx, + info.fault + .and_then(|x| unsafe { x.read_stack(Some(1)) }) + .unwrap() + .frames[0] + ); + Ok(()) + }, + )))) + } + Event::Wasm(Operator::Call { .. }) => { + let func_idx = self.func_idx; + let evt_idx = self.evt_idx; + sink.push(op); + sink.push(Event::Internal(InternalEvent::Breakpoint(Box::new( + move |info| { + eprintln!( + "[BlockTrace] ({}, {}) -> leave_call % {:?}", + func_idx, + evt_idx, + info.fault + .and_then(|x| unsafe { x.read_stack(Some(1)) }) + .unwrap() + .frames[0] + ); + Ok(()) + }, + )))) + } + Event::Wasm(Operator::Block { .. }) => { + let func_idx = self.func_idx; + let evt_idx = self.evt_idx; + sink.push(op); + sink.push(Event::Internal(InternalEvent::Breakpoint(Box::new( + move |info| { + eprintln!( + "[BlockTrace] ({}, {}) -> block % {:?}", + func_idx, + evt_idx, + info.fault + .and_then(|x| unsafe { x.read_stack(Some(1)) }) + .unwrap() + .frames[0] + ); + Ok(()) + }, + )))) + } + Event::Wasm(Operator::Loop { .. }) => { + let func_idx = self.func_idx; + let evt_idx = self.evt_idx; + sink.push(op); + sink.push(Event::Internal(InternalEvent::Breakpoint(Box::new( + move |info| { + eprintln!( + "[BlockTrace] ({}, {}) -> loop % {:?}", + func_idx, + evt_idx, + info.fault + .and_then(|x| unsafe { x.read_stack(Some(1)) }) + .unwrap() + .frames[0] + ); + Ok(()) + }, + )))) + } + Event::Wasm(Operator::If { .. }) => { + let func_idx = self.func_idx; + let evt_idx = self.evt_idx; + sink.push(op); + sink.push(Event::Internal(InternalEvent::Breakpoint(Box::new( + move |info| { + eprintln!( + "[BlockTrace] ({}, {}) -> if % {:?}", + func_idx, + evt_idx, + info.fault + .and_then(|x| unsafe { x.read_stack(Some(1)) }) + .unwrap() + .frames[0] + ); + Ok(()) + }, + )))) + } + Event::Wasm(Operator::Else { .. }) => { + let func_idx = self.func_idx; + let evt_idx = self.evt_idx; + sink.push(op); + sink.push(Event::Internal(InternalEvent::Breakpoint(Box::new( + move |info| { + eprintln!( + "[BlockTrace] ({}, {}) -> else % {:?}", + func_idx, + evt_idx, + info.fault + .and_then(|x| unsafe { x.read_stack(Some(1)) }) + .unwrap() + .frames[0] + ); + Ok(()) + }, + )))) + } + _ => { + sink.push(op); + } + } + self.evt_idx += 1; + Ok(()) + } +} diff --git a/lib/middleware-common/src/call_trace.rs b/lib/middleware-common/src/call_trace.rs index 04a763abcdf..5cb77534c56 100644 --- a/lib/middleware-common/src/call_trace.rs +++ b/lib/middleware-common/src/call_trace.rs @@ -1,9 +1,23 @@ +use std::sync::{ + atomic::{AtomicU32, Ordering}, + Arc, +}; use wasmer_runtime_core::{ codegen::{Event, EventSink, FunctionMiddleware, InternalEvent}, module::ModuleInfo, }; -pub struct CallTrace; +pub struct CallTrace { + counter: Arc, +} + +impl CallTrace { + pub fn new() -> CallTrace { + CallTrace { + counter: Arc::new(AtomicU32::new(0)), + } + } +} impl FunctionMiddleware for CallTrace { type Error = String; @@ -13,10 +27,13 @@ impl FunctionMiddleware for CallTrace { _module_info: &ModuleInfo, sink: &mut EventSink<'a, 'b>, ) -> Result<(), Self::Error> { + let counter = self.counter.clone(); + match op { Event::Internal(InternalEvent::FunctionBegin(id)) => sink.push(Event::Internal( InternalEvent::Breakpoint(Box::new(move |_| { - eprintln!("func ({})", id); + let idx = counter.fetch_add(1, Ordering::SeqCst); + eprintln!("[{}] func ({})", idx, id); Ok(()) })), )), diff --git a/lib/middleware-common/src/lib.rs b/lib/middleware-common/src/lib.rs index c7900c83a73..85885383f1d 100644 --- a/lib/middleware-common/src/lib.rs +++ b/lib/middleware-common/src/lib.rs @@ -10,5 +10,7 @@ #![doc(html_favicon_url = "https://wasmer.io/static/icons/favicon.ico")] #![doc(html_logo_url = "https://avatars3.githubusercontent.com/u/44205449?s=200&v=4")] +#[cfg(unix)] +pub mod block_trace; pub mod call_trace; pub mod metering; diff --git a/lib/runtime-core/build.rs b/lib/runtime-core/build.rs index 81884f0e18b..3a549e45fd4 100644 --- a/lib/runtime-core/build.rs +++ b/lib/runtime-core/build.rs @@ -29,14 +29,20 @@ fn main() { println!("cargo:rustc-cfg=nightly"); } - if cfg!(all(target_os = "linux", target_arch = "x86_64")) { - cc::Build::new() - .file("image-loading-linux-x86-64.s") - .compile("image-loading"); - } else if cfg!(all(target_os = "macos", target_arch = "x86_64")) { - cc::Build::new() - .file("image-loading-macos-x86-64.s") - .compile("image-loading"); - } else { + let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + + match (target_os.as_str(), target_arch.as_str()) { + ("linux", "x86_64") => { + cc::Build::new() + .file("image-loading-linux-x86-64.s") + .compile("image-loading"); + } + ("macos", "x86_64") => { + cc::Build::new() + .file("image-loading-macos-x86-64.s") + .compile("image-loading"); + } + _ => {} } } diff --git a/lib/runtime-core/src/backend.rs b/lib/runtime-core/src/backend.rs index 893874ed934..df4ef775160 100644 --- a/lib/runtime-core/src/backend.rs +++ b/lib/runtime-core/src/backend.rs @@ -72,6 +72,82 @@ impl std::str::FromStr for Backend { } } +#[derive(Copy, Clone, Debug)] +pub enum Architecture { + X64, + Aarch64, +} + +#[repr(u8)] +#[derive(Copy, Clone, Debug)] +pub enum InlineBreakpointType { + Trace, + Middleware, + Unknown, +} + +#[derive(Clone, Debug)] +pub struct InlineBreakpoint { + pub size: usize, + pub ty: InlineBreakpointType, +} + +pub fn get_inline_breakpoint_size(arch: Architecture, backend: Backend) -> Option { + match (arch, backend) { + (Architecture::X64, Backend::Singlepass) => Some(7), + (Architecture::Aarch64, Backend::Singlepass) => Some(12), + _ => None, + } +} + +pub fn read_inline_breakpoint( + arch: Architecture, + backend: Backend, + code: &[u8], +) -> Option { + match arch { + Architecture::X64 => match backend { + Backend::Singlepass => { + if code.len() < 7 { + None + } else if &code[..6] == &[0x0f, 0x0b, 0x0f, 0xb9, 0xcd, 0xff] { + // ud2 ud (int 0xff) code + Some(InlineBreakpoint { + size: 7, + ty: match code[6] { + 0 => InlineBreakpointType::Trace, + 1 => InlineBreakpointType::Middleware, + _ => InlineBreakpointType::Unknown, + }, + }) + } else { + None + } + } + _ => None, + }, + Architecture::Aarch64 => match backend { + Backend::Singlepass => { + if code.len() < 12 { + None + } else if &code[..8] == &[0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff] { + Some(InlineBreakpoint { + size: 12, + ty: match code[8] { + 0 => InlineBreakpointType::Trace, + 1 => InlineBreakpointType::Middleware, + _ => InlineBreakpointType::Unknown, + }, + }) + } else { + None + } + } + _ => None, + }, + } +} + #[cfg(test)] mod backend_test { use super::*; diff --git a/lib/runtime-core/src/codegen.rs b/lib/runtime-core/src/codegen.rs index 9e42d80490a..99158fb1501 100644 --- a/lib/runtime-core/src/codegen.rs +++ b/lib/runtime-core/src/codegen.rs @@ -1,5 +1,7 @@ //! The codegen module provides common functions and data structures used by multiple backends //! during the code generation process. +#[cfg(unix)] +use crate::fault::FaultInfo; use crate::{ backend::RunnableModule, backend::{Backend, CacheGen, Compiler, CompilerConfig, Features, Token}, @@ -64,9 +66,17 @@ impl fmt::Debug for InternalEvent { } /// Information for a breakpoint +#[cfg(unix)] pub struct BreakpointInfo<'a> { /// Fault. - pub fault: Option<&'a dyn Any>, + pub fault: Option<&'a FaultInfo>, +} + +/// Information for a breakpoint +#[cfg(not(unix))] +pub struct BreakpointInfo { + /// Fault placeholder. + pub fault: Option<()>, } /// A trait that represents the functions needed to be implemented to generate code for a module. diff --git a/lib/runtime-core/src/fault.rs b/lib/runtime-core/src/fault.rs index a2dd8f5e1fb..bc64c15fa52 100644 --- a/lib/runtime-core/src/fault.rs +++ b/lib/runtime-core/src/fault.rs @@ -5,17 +5,31 @@ pub mod raw { //! The raw module contains required externed function interfaces for the fault module. use std::ffi::c_void; + #[cfg(target_arch = "x86_64")] extern "C" { + /// Load registers and return on the stack [stack_end..stack_begin]. pub fn run_on_alternative_stack(stack_end: *mut u64, stack_begin: *mut u64) -> u64; + /// Internal routine for switching into a backend without information about where registers are preserved. pub fn register_preservation_trampoline(); // NOT safe to call directly + } + + /// Internal routine for switching into a backend without information about where registers are preserved. + #[cfg(not(target_arch = "x86_64"))] + pub extern "C" fn register_preservation_trampoline() { + unimplemented!("register_preservation_trampoline"); + } + + extern "C" { + /// libc setjmp pub fn setjmp(env: *mut c_void) -> i32; + /// libc longjmp pub fn longjmp(env: *mut c_void, val: i32) -> !; } } use crate::codegen::{BreakpointInfo, BreakpointMap}; -use crate::state::x64::{build_instance_image, read_stack, X64Register, GPR, XMM}; -use crate::state::CodeVersion; +use crate::state::x64::{build_instance_image, read_stack, X64Register, GPR}; +use crate::state::{CodeVersion, ExecutionStateImage}; use crate::vm; use libc::{mmap, mprotect, siginfo_t, MAP_ANON, MAP_PRIVATE, PROT_NONE, PROT_READ, PROT_WRITE}; use nix::sys::signal::{ @@ -29,13 +43,19 @@ use std::process; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Once; +#[cfg(target_arch = "x86_64")] pub(crate) unsafe fn run_on_alternative_stack(stack_end: *mut u64, stack_begin: *mut u64) -> u64 { raw::run_on_alternative_stack(stack_end, stack_begin) } +#[cfg(not(target_arch = "x86_64"))] +pub(crate) unsafe fn run_on_alternative_stack(_stack_end: *mut u64, _stack_begin: *mut u64) -> u64 { + unimplemented!("run_on_alternative_stack"); +} + const TRAP_STACK_SIZE: usize = 1048576; // 1MB -const SETJMP_BUFFER_LEN: usize = 27; +const SETJMP_BUFFER_LEN: usize = 128; type SetJmpBuffer = [i32; SETJMP_BUFFER_LEN]; struct UnwindInfo { @@ -202,6 +222,13 @@ unsafe fn with_breakpoint_map) -> R>(f: F) - f(inner.breakpoints.as_ref()) } +#[cfg(not(target_arch = "x86_64"))] +/// Allocates and runs with the given stack size and closure. +pub fn allocate_and_run R>(_size: usize, f: F) -> R { + f() +} + +#[cfg(target_arch = "x86_64")] /// Allocates and runs with the given stack size and closure. pub fn allocate_and_run R>(size: usize, f: F) -> R { struct Context R, R> { @@ -245,12 +272,75 @@ extern "C" fn signal_trap_handler( siginfo: *mut siginfo_t, ucontext: *mut c_void, ) { + use crate::backend::{ + get_inline_breakpoint_size, read_inline_breakpoint, Architecture, InlineBreakpointType, + }; + + #[cfg(target_arch = "x86_64")] + static ARCH: Architecture = Architecture::X64; + + #[cfg(target_arch = "aarch64")] + static ARCH: Architecture = Architecture::Aarch64; + + let mut should_unwind = false; + let mut unwind_result: Box = Box::new(()); + unsafe { let fault = get_fault_info(siginfo as _, ucontext); + let early_return = allocate_and_run(TRAP_STACK_SIZE, || { + CURRENT_CODE_VERSIONS.with(|versions| { + let versions = versions.borrow(); + for v in versions.iter() { + let magic_size = if let Some(x) = get_inline_breakpoint_size(ARCH, v.backend) { + x + } else { + continue; + }; + let ip = fault.ip.get(); + let end = v.base + v.msm.total_size; + if ip >= v.base && ip < end && ip + magic_size <= end { + if let Some(ib) = read_inline_breakpoint( + ARCH, + v.backend, + std::slice::from_raw_parts(ip as *const u8, magic_size), + ) { + match ib.ty { + InlineBreakpointType::Trace => {} + InlineBreakpointType::Middleware => { + let out: Option>> = + with_breakpoint_map(|bkpt_map| { + bkpt_map.and_then(|x| x.get(&ip)).map(|x| { + x(BreakpointInfo { + fault: Some(&fault), + }) + }) + }); + if let Some(Ok(())) = out { + } else if let Some(Err(e)) = out { + should_unwind = true; + unwind_result = e; + } + } + _ => println!("Unknown breakpoint type: {:?}", ib.ty), + } + + fault.ip.set(ip + magic_size); + return true; + } + break; + } + } + false + }) + }); + if should_unwind { + begin_unsafe_unwind(unwind_result); + } + if early_return { + return; + } - let mut unwind_result: Box = Box::new(()); - - let should_unwind = allocate_and_run(TRAP_STACK_SIZE, || { + should_unwind = allocate_and_run(TRAP_STACK_SIZE, || { let mut is_suspend_signal = false; WAS_SIGINT_TRIGGERED.with(|x| x.set(false)); @@ -259,7 +349,7 @@ extern "C" fn signal_trap_handler( Ok(SIGTRAP) => { // breakpoint let out: Option>> = with_breakpoint_map(|bkpt_map| { - bkpt_map.and_then(|x| x.get(&(fault.ip as usize))).map(|x| { + bkpt_map.and_then(|x| x.get(&(fault.ip.get()))).map(|x| { x(BreakpointInfo { fault: Some(&fault), }) @@ -289,17 +379,9 @@ extern "C" fn signal_trap_handler( } let ctx: &mut vm::Ctx = &mut **CURRENT_CTX.with(|x| x.get()); - let rsp = fault.known_registers[X64Register::GPR(GPR::RSP).to_index().0].unwrap(); - - let es_image = CURRENT_CODE_VERSIONS.with(|versions| { - let versions = versions.borrow(); - read_stack( - || versions.iter(), - rsp as usize as *const u64, - fault.known_registers, - Some(fault.ip as usize as u64), - ) - }); + let es_image = fault + .read_stack(None) + .expect("fault.read_stack() failed. Broken invariants?"); if is_suspend_signal { let image = build_instance_image(ctx, es_image); @@ -367,19 +449,109 @@ unsafe fn install_sighandler() { sigaction(SIGINT, &sa_interrupt).unwrap(); } +#[derive(Debug, Clone)] /// Info about the fault pub struct FaultInfo { /// Faulting address. pub faulting_addr: *const c_void, /// Instruction pointer. - pub ip: *const c_void, - /// Known registers. + pub ip: &'static Cell, + /// Values of known registers. pub known_registers: [Option; 32], } -/// Gets fault info for the given siginfo and context pointers. +impl FaultInfo { + /// Parses the stack and builds an execution state image. + pub unsafe fn read_stack(&self, max_depth: Option) -> Option { + let rsp = match self.known_registers[X64Register::GPR(GPR::RSP).to_index().0] { + Some(x) => x, + None => return None, + }; + + Some(CURRENT_CODE_VERSIONS.with(|versions| { + let versions = versions.borrow(); + read_stack( + || versions.iter(), + rsp as usize as *const u64, + self.known_registers, + Some(self.ip.get() as u64), + max_depth, + ) + })) + } +} + +#[cfg(all(target_os = "linux", target_arch = "aarch64"))] +/// Get fault info from siginfo and ucontext. +pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *mut c_void) -> FaultInfo { + #[allow(dead_code)] + #[allow(non_camel_case_types)] + #[repr(packed)] + struct sigcontext { + fault_address: u64, + regs: [u64; 31], + sp: u64, + pc: u64, + pstate: u64, + reserved: [u8; 4096], + } + + #[allow(dead_code)] + #[allow(non_camel_case_types)] + #[repr(packed)] + struct ucontext { + unknown: [u8; 176], + uc_mcontext: sigcontext, + } + + #[allow(dead_code)] + #[allow(non_camel_case_types)] + #[repr(C)] + struct siginfo_t { + si_signo: i32, + si_errno: i32, + si_code: i32, + si_addr: u64, + // ... + } + + let siginfo = siginfo as *const siginfo_t; + let si_addr = (*siginfo).si_addr; + + let ucontext = ucontext as *mut ucontext; + let gregs = &(*ucontext).uc_mcontext.regs; + + let mut known_registers: [Option; 32] = [None; 32]; + + known_registers[X64Register::GPR(GPR::R15).to_index().0] = Some(gregs[15] as _); + known_registers[X64Register::GPR(GPR::R14).to_index().0] = Some(gregs[14] as _); + known_registers[X64Register::GPR(GPR::R13).to_index().0] = Some(gregs[13] as _); + known_registers[X64Register::GPR(GPR::R12).to_index().0] = Some(gregs[12] as _); + known_registers[X64Register::GPR(GPR::R11).to_index().0] = Some(gregs[11] as _); + known_registers[X64Register::GPR(GPR::R10).to_index().0] = Some(gregs[10] as _); + known_registers[X64Register::GPR(GPR::R9).to_index().0] = Some(gregs[9] as _); + known_registers[X64Register::GPR(GPR::R8).to_index().0] = Some(gregs[8] as _); + known_registers[X64Register::GPR(GPR::RSI).to_index().0] = Some(gregs[6] as _); + known_registers[X64Register::GPR(GPR::RDI).to_index().0] = Some(gregs[7] as _); + known_registers[X64Register::GPR(GPR::RDX).to_index().0] = Some(gregs[2] as _); + known_registers[X64Register::GPR(GPR::RCX).to_index().0] = Some(gregs[1] as _); + known_registers[X64Register::GPR(GPR::RBX).to_index().0] = Some(gregs[3] as _); + known_registers[X64Register::GPR(GPR::RAX).to_index().0] = Some(gregs[0] as _); + + known_registers[X64Register::GPR(GPR::RBP).to_index().0] = Some(gregs[5] as _); + known_registers[X64Register::GPR(GPR::RSP).to_index().0] = Some(gregs[28] as _); + + FaultInfo { + faulting_addr: si_addr as usize as _, + ip: std::mem::transmute::<&mut u64, &'static Cell>(&mut (*ucontext).uc_mcontext.pc), + known_registers, + } +} + #[cfg(all(target_os = "linux", target_arch = "x86_64"))] -pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *const c_void) -> FaultInfo { +/// Get fault info from siginfo and ucontext. +pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *mut c_void) -> FaultInfo { + use crate::state::x64::XMM; use libc::{ _libc_xmmreg, ucontext_t, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, REG_R8, REG_R9, REG_RAX, REG_RBP, REG_RBX, REG_RCX, REG_RDI, REG_RDX, REG_RIP, REG_RSI, REG_RSP, @@ -402,9 +574,8 @@ pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *const c_void) -> let siginfo = siginfo as *const siginfo_t; let si_addr = (*siginfo).si_addr; - let ucontext = ucontext as *const ucontext_t; - let gregs = &(*ucontext).uc_mcontext.gregs; - let fpregs = &*(*ucontext).uc_mcontext.fpregs; + let ucontext = ucontext as *mut ucontext_t; + let gregs = &mut (*ucontext).uc_mcontext.gregs; let mut known_registers: [Option; 32] = [None; 32]; known_registers[X64Register::GPR(GPR::R15).to_index().0] = Some(gregs[REG_R15 as usize] as _); @@ -425,33 +596,43 @@ pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *const c_void) -> known_registers[X64Register::GPR(GPR::RBP).to_index().0] = Some(gregs[REG_RBP as usize] as _); known_registers[X64Register::GPR(GPR::RSP).to_index().0] = Some(gregs[REG_RSP as usize] as _); - known_registers[X64Register::XMM(XMM::XMM0).to_index().0] = Some(read_xmm(&fpregs._xmm[0])); - known_registers[X64Register::XMM(XMM::XMM1).to_index().0] = Some(read_xmm(&fpregs._xmm[1])); - known_registers[X64Register::XMM(XMM::XMM2).to_index().0] = Some(read_xmm(&fpregs._xmm[2])); - known_registers[X64Register::XMM(XMM::XMM3).to_index().0] = Some(read_xmm(&fpregs._xmm[3])); - known_registers[X64Register::XMM(XMM::XMM4).to_index().0] = Some(read_xmm(&fpregs._xmm[4])); - known_registers[X64Register::XMM(XMM::XMM5).to_index().0] = Some(read_xmm(&fpregs._xmm[5])); - known_registers[X64Register::XMM(XMM::XMM6).to_index().0] = Some(read_xmm(&fpregs._xmm[6])); - known_registers[X64Register::XMM(XMM::XMM7).to_index().0] = Some(read_xmm(&fpregs._xmm[7])); - known_registers[X64Register::XMM(XMM::XMM8).to_index().0] = Some(read_xmm(&fpregs._xmm[8])); - known_registers[X64Register::XMM(XMM::XMM9).to_index().0] = Some(read_xmm(&fpregs._xmm[9])); - known_registers[X64Register::XMM(XMM::XMM10).to_index().0] = Some(read_xmm(&fpregs._xmm[10])); - known_registers[X64Register::XMM(XMM::XMM11).to_index().0] = Some(read_xmm(&fpregs._xmm[11])); - known_registers[X64Register::XMM(XMM::XMM12).to_index().0] = Some(read_xmm(&fpregs._xmm[12])); - known_registers[X64Register::XMM(XMM::XMM13).to_index().0] = Some(read_xmm(&fpregs._xmm[13])); - known_registers[X64Register::XMM(XMM::XMM14).to_index().0] = Some(read_xmm(&fpregs._xmm[14])); - known_registers[X64Register::XMM(XMM::XMM15).to_index().0] = Some(read_xmm(&fpregs._xmm[15])); + if !(*ucontext).uc_mcontext.fpregs.is_null() { + let fpregs = &*(*ucontext).uc_mcontext.fpregs; + known_registers[X64Register::XMM(XMM::XMM0).to_index().0] = Some(read_xmm(&fpregs._xmm[0])); + known_registers[X64Register::XMM(XMM::XMM1).to_index().0] = Some(read_xmm(&fpregs._xmm[1])); + known_registers[X64Register::XMM(XMM::XMM2).to_index().0] = Some(read_xmm(&fpregs._xmm[2])); + known_registers[X64Register::XMM(XMM::XMM3).to_index().0] = Some(read_xmm(&fpregs._xmm[3])); + known_registers[X64Register::XMM(XMM::XMM4).to_index().0] = Some(read_xmm(&fpregs._xmm[4])); + known_registers[X64Register::XMM(XMM::XMM5).to_index().0] = Some(read_xmm(&fpregs._xmm[5])); + known_registers[X64Register::XMM(XMM::XMM6).to_index().0] = Some(read_xmm(&fpregs._xmm[6])); + known_registers[X64Register::XMM(XMM::XMM7).to_index().0] = Some(read_xmm(&fpregs._xmm[7])); + known_registers[X64Register::XMM(XMM::XMM8).to_index().0] = Some(read_xmm(&fpregs._xmm[8])); + known_registers[X64Register::XMM(XMM::XMM9).to_index().0] = Some(read_xmm(&fpregs._xmm[9])); + known_registers[X64Register::XMM(XMM::XMM10).to_index().0] = + Some(read_xmm(&fpregs._xmm[10])); + known_registers[X64Register::XMM(XMM::XMM11).to_index().0] = + Some(read_xmm(&fpregs._xmm[11])); + known_registers[X64Register::XMM(XMM::XMM12).to_index().0] = + Some(read_xmm(&fpregs._xmm[12])); + known_registers[X64Register::XMM(XMM::XMM13).to_index().0] = + Some(read_xmm(&fpregs._xmm[13])); + known_registers[X64Register::XMM(XMM::XMM14).to_index().0] = + Some(read_xmm(&fpregs._xmm[14])); + known_registers[X64Register::XMM(XMM::XMM15).to_index().0] = + Some(read_xmm(&fpregs._xmm[15])); + } FaultInfo { faulting_addr: si_addr as usize as _, - ip: gregs[REG_RIP as usize] as _, + ip: std::mem::transmute::<&mut i64, &'static Cell>(&mut gregs[REG_RIP as usize]), known_registers, } } /// Get fault info from siginfo and ucontext. #[cfg(all(target_os = "macos", target_arch = "x86_64"))] -pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *const c_void) -> FaultInfo { +pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *mut c_void) -> FaultInfo { + use crate::state::x64::XMM; #[allow(dead_code)] #[repr(C)] struct ucontext_t { @@ -460,7 +641,7 @@ pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *const c_void) -> uc_stack: libc::stack_t, uc_link: *const ucontext_t, uc_mcsize: u64, - uc_mcontext: *const mcontext_t, + uc_mcontext: *mut mcontext_t, } #[repr(C)] struct exception_state { @@ -518,8 +699,8 @@ pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *const c_void) -> let siginfo = siginfo as *const siginfo_t; let si_addr = (*siginfo).si_addr; - let ucontext = ucontext as *const ucontext_t; - let ss = &(*(*ucontext).uc_mcontext).ss; + let ucontext = ucontext as *mut ucontext_t; + let ss = &mut (*(*ucontext).uc_mcontext).ss; let fs = &(*(*ucontext).uc_mcontext).fs; let mut known_registers: [Option; 32] = [None; 32]; @@ -561,7 +742,7 @@ pub unsafe fn get_fault_info(siginfo: *const c_void, ucontext: *const c_void) -> FaultInfo { faulting_addr: si_addr, - ip: ss.rip as _, + ip: std::mem::transmute::<&mut u64, &'static Cell>(&mut ss.rip), known_registers, } } diff --git a/lib/runtime-core/src/lib.rs b/lib/runtime-core/src/lib.rs index eb506f4cb43..56f98bc7163 100644 --- a/lib/runtime-core/src/lib.rs +++ b/lib/runtime-core/src/lib.rs @@ -64,7 +64,7 @@ pub mod vm; pub mod vmcalls; #[cfg(all(unix, target_arch = "x86_64"))] pub use trampoline_x64 as trampoline; -#[cfg(all(unix, target_arch = "x86_64"))] +#[cfg(unix)] pub mod fault; pub mod state; #[cfg(feature = "managed")] diff --git a/lib/runtime-core/src/state.rs b/lib/runtime-core/src/state.rs index 317976db500..1b7d3f34bbe 100644 --- a/lib/runtime-core/src/state.rs +++ b/lib/runtime-core/src/state.rs @@ -2,6 +2,7 @@ //! state could read or updated at runtime. Use cases include generating stack traces, switching //! generated code from one tier to another, or serializing state of a running instace. +use crate::backend::Backend; use std::collections::BTreeMap; use std::ops::Bound::{Included, Unbounded}; @@ -180,8 +181,11 @@ pub struct CodeVersion { /// `ModuleStateMap` for this code version. pub msm: ModuleStateMap, - /// A pointer to the machine code for this module. + /// A pointer to the machine code for this module. pub base: usize, + + /// The backend used to compile this module. + pub backend: Backend, } impl ModuleStateMap { @@ -472,9 +476,143 @@ impl InstanceImage { } } -#[cfg(all(unix, target_arch = "x86_64"))] +/// Declarations for x86-64 registers. +#[cfg(unix)] +pub mod x64_decl { + use super::*; + + /// General-purpose registers. + #[repr(u8)] + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum GPR { + /// RAX register + RAX, + /// RCX register + RCX, + /// RDX register + RDX, + /// RBX register + RBX, + /// RSP register + RSP, + /// RBP register + RBP, + /// RSI register + RSI, + /// RDI register + RDI, + /// R8 register + R8, + /// R9 register + R9, + /// R10 register + R10, + /// R11 register + R11, + /// R12 register + R12, + /// R13 register + R13, + /// R14 register + R14, + /// R15 register + R15, + } + + /// XMM registers. + #[repr(u8)] + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum XMM { + /// XMM register 0 + XMM0, + /// XMM register 1 + XMM1, + /// XMM register 2 + XMM2, + /// XMM register 3 + XMM3, + /// XMM register 4 + XMM4, + /// XMM register 5 + XMM5, + /// XMM register 6 + XMM6, + /// XMM register 7 + XMM7, + /// XMM register 8 + XMM8, + /// XMM register 9 + XMM9, + /// XMM register 10 + XMM10, + /// XMM register 11 + XMM11, + /// XMM register 12 + XMM12, + /// XMM register 13 + XMM13, + /// XMM register 14 + XMM14, + /// XMM register 15 + XMM15, + } + + /// A machine register under the x86-64 architecture. + #[derive(Copy, Clone, Debug, Eq, PartialEq)] + pub enum X64Register { + /// General-purpose registers. + GPR(GPR), + /// XMM (floating point/SIMD) registers. + XMM(XMM), + } + + impl X64Register { + /// Returns the index of the register. + pub fn to_index(&self) -> RegisterIndex { + match *self { + X64Register::GPR(x) => RegisterIndex(x as usize), + X64Register::XMM(x) => RegisterIndex(x as usize + 16), + } + } + + /// Converts a DWARD regnum to X64Register. + pub fn from_dwarf_regnum(x: u16) -> Option { + Some(match x { + 0 => X64Register::GPR(GPR::RAX), + 1 => X64Register::GPR(GPR::RDX), + 2 => X64Register::GPR(GPR::RCX), + 3 => X64Register::GPR(GPR::RBX), + 4 => X64Register::GPR(GPR::RSI), + 5 => X64Register::GPR(GPR::RDI), + 6 => X64Register::GPR(GPR::RBP), + 7 => X64Register::GPR(GPR::RSP), + 8 => X64Register::GPR(GPR::R8), + 9 => X64Register::GPR(GPR::R9), + 10 => X64Register::GPR(GPR::R10), + 11 => X64Register::GPR(GPR::R11), + 12 => X64Register::GPR(GPR::R12), + 13 => X64Register::GPR(GPR::R13), + 14 => X64Register::GPR(GPR::R14), + 15 => X64Register::GPR(GPR::R15), + + 17 => X64Register::XMM(XMM::XMM0), + 18 => X64Register::XMM(XMM::XMM1), + 19 => X64Register::XMM(XMM::XMM2), + 20 => X64Register::XMM(XMM::XMM3), + 21 => X64Register::XMM(XMM::XMM4), + 22 => X64Register::XMM(XMM::XMM5), + 23 => X64Register::XMM(XMM::XMM6), + 24 => X64Register::XMM(XMM::XMM7), + _ => return None, + }) + } + } +} + +#[cfg(unix)] pub mod x64 { //! The x64 state module contains functions to generate state and code for x64 targets. + pub use super::x64_decl::*; use super::*; use crate::codegen::BreakpointMap; use crate::fault::{ @@ -522,7 +660,7 @@ pub mod x64 { let mut last_stack_offset: u64 = 0; // rbp - let mut known_registers: [Option; 24] = [None; 24]; + let mut known_registers: [Option; 32] = [None; 32]; let local_functions_vec: Vec<&FunctionStateMap> = msm.local_functions.iter().map(|(_, v)| v).collect(); @@ -902,12 +1040,19 @@ pub mod x64 { mut stack: *const u64, initially_known_registers: [Option; 32], mut initial_address: Option, + max_depth: Option, ) -> ExecutionStateImage { let mut known_registers: [Option; 32] = initially_known_registers; let mut results: Vec = vec![]; let mut was_baseline = true; - for _ in 0.. { + for depth in 0.. { + if let Some(max_depth) = max_depth { + if depth >= max_depth { + return ExecutionStateImage { frames: results }; + } + } + let ret_addr = initial_address.take().unwrap_or_else(|| { let x = *stack; stack = stack.offset(1); @@ -918,6 +1063,7 @@ pub mod x64 { let mut is_baseline: Option = None; for version in versions() { + //println!("Lookup IP: {:x}", ret_addr); match version .msm .lookup_call_ip(ret_addr as usize, version.base) @@ -1105,144 +1251,10 @@ pub mod x64 { stack: wasm_stack, locals: wasm_locals, }; + //println!("WFS = {:?}", wfs); results.push(wfs); } unreachable!(); } - - /// A kind of GPR register - #[repr(u8)] - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] - pub enum GPR { - /// RAX Register - RAX, - /// RCX Register - RCX, - /// RDX Register - RDX, - /// RBX Register - RBX, - /// RSP Register - RSP, - /// RBP Register - RBP, - /// RSI Register - RSI, - /// RDI Register - RDI, - /// R8 Register - R8, - /// R9 Register - R9, - /// R10 Register - R10, - /// R11 Register - R11, - /// R12 Register - R12, - /// R13 Register - R13, - /// R14 Register - R14, - /// R15 Register - R15, - } - - /// A kind of XMM register - #[repr(u8)] - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] - pub enum XMM { - /// XMM0 Register - XMM0, - /// XMM1 Register - XMM1, - /// XMM2 Register - XMM2, - /// XMM3 Register - XMM3, - /// XMM4 Register - XMM4, - /// XMM5 Register - XMM5, - /// XMM6 Register - XMM6, - /// XMM7 Register - XMM7, - /// XMM8 Register - XMM8, - /// XMM9 Register - XMM9, - /// XMM10 Register - XMM10, - /// XMM11 Register - XMM11, - /// XMM12 Register - XMM12, - /// XMM13 Register - XMM13, - /// XMM14 Register - XMM14, - /// XMM15 Register - XMM15, - } - - /// A kind of register belonging to the x64 register set - #[derive(Copy, Clone, Debug, Eq, PartialEq)] - pub enum X64Register { - /// A register belonging to the GPR register set - GPR(GPR), - /// A register belonging to the XMM register set - XMM(XMM), - } - - impl X64Register { - /// Returns a `RegisterIndex` for the current `X64Register`. - pub fn to_index(&self) -> RegisterIndex { - match *self { - X64Register::GPR(x) => RegisterIndex(x as usize), - X64Register::XMM(x) => RegisterIndex(x as usize + 16), - } - } - - /// Returns an `Option` for the given DWARF register integer number. - pub fn from_dwarf_regnum(x: u16) -> Option { - Some(match x { - 0 => X64Register::GPR(GPR::RAX), - 1 => X64Register::GPR(GPR::RDX), - 2 => X64Register::GPR(GPR::RCX), - 3 => X64Register::GPR(GPR::RBX), - 4 => X64Register::GPR(GPR::RSI), - 5 => X64Register::GPR(GPR::RDI), - 6 => X64Register::GPR(GPR::RBP), - 7 => X64Register::GPR(GPR::RSP), - 8 => X64Register::GPR(GPR::R8), - 9 => X64Register::GPR(GPR::R9), - 10 => X64Register::GPR(GPR::R10), - 11 => X64Register::GPR(GPR::R11), - 12 => X64Register::GPR(GPR::R12), - 13 => X64Register::GPR(GPR::R13), - 14 => X64Register::GPR(GPR::R14), - 15 => X64Register::GPR(GPR::R15), - - 17 => X64Register::XMM(XMM::XMM0), - 18 => X64Register::XMM(XMM::XMM1), - 19 => X64Register::XMM(XMM::XMM2), - 20 => X64Register::XMM(XMM::XMM3), - 21 => X64Register::XMM(XMM::XMM4), - 22 => X64Register::XMM(XMM::XMM5), - 23 => X64Register::XMM(XMM::XMM6), - 24 => X64Register::XMM(XMM::XMM7), - 25 => X64Register::XMM(XMM::XMM8), - 26 => X64Register::XMM(XMM::XMM9), - 27 => X64Register::XMM(XMM::XMM10), - 28 => X64Register::XMM(XMM::XMM11), - 29 => X64Register::XMM(XMM::XMM12), - 30 => X64Register::XMM(XMM::XMM13), - 31 => X64Register::XMM(XMM::XMM14), - 32 => X64Register::XMM(XMM::XMM15), - _ => return None, - }) - } - } } diff --git a/lib/runtime-core/src/tiering.rs b/lib/runtime-core/src/tiering.rs index bb4426b125d..7d2a27d874e 100644 --- a/lib/runtime-core/src/tiering.rs +++ b/lib/runtime-core/src/tiering.rs @@ -1,6 +1,6 @@ //! The tiering module supports switching between code compiled with different optimization levels //! as runtime. -use crate::backend::{Compiler, CompilerConfig}; +use crate::backend::{Backend, Compiler, CompilerConfig}; use crate::compile_with_config; use crate::fault::{ catch_unsafe_unwind, ensure_sighandler, pop_code_version, push_code_version, with_ctx, @@ -43,6 +43,7 @@ struct OptimizationState { } struct OptimizationOutcome { + backend_id: Backend, module: Module, } @@ -53,6 +54,7 @@ unsafe impl Sync for CtxWrapper {} unsafe fn do_optimize( binary: &[u8], + backend_id: Backend, compiler: Box, ctx: &Mutex, state: &OptimizationState, @@ -72,7 +74,7 @@ unsafe fn do_optimize( let ctx_inner = ctx.lock().unwrap(); if !ctx_inner.0.is_null() { - *state.outcome.lock().unwrap() = Some(OptimizationOutcome { module }); + *state.outcome.lock().unwrap() = Some(OptimizationOutcome { backend_id, module }); set_wasm_interrupt_on_ctx(ctx_inner.0); } } @@ -85,7 +87,8 @@ pub unsafe fn run_tiering ShellExitOperation>( import_object: &ImportObject, start_raw: extern "C" fn(&mut Ctx), baseline: &mut Instance, - optimized_backends: Vec Box + Send>>, + baseline_backend: Backend, + optimized_backends: Vec<(Backend, Box Box + Send>)>, interactive_shell: F, ) -> Result<(), String> { ensure_sighandler(); @@ -107,9 +110,9 @@ pub unsafe fn run_tiering ShellExitOperation>( let ctx_box = ctx_box.clone(); let opt_state = opt_state.clone(); ::std::thread::spawn(move || { - for backend in optimized_backends { + for (backend_id, backend) in optimized_backends { if !ctx_box.lock().unwrap().0.is_null() { - do_optimize(&wasm_binary, backend(), &ctx_box, &opt_state); + do_optimize(&wasm_binary, backend_id, backend(), &ctx_box, &opt_state); } } }); @@ -125,6 +128,7 @@ pub unsafe fn run_tiering ShellExitOperation>( .get_module_state_map() .unwrap(), base: baseline.module.runnable_module.get_code().unwrap().as_ptr() as usize, + backend: baseline_backend, }); let n_versions: Cell = Cell::new(1); @@ -135,7 +139,7 @@ pub unsafe fn run_tiering ShellExitOperation>( })); loop { - let new_optimized: Option<&mut Instance> = { + let new_optimized: Option<(Backend, &mut Instance)> = { let mut outcome = opt_state.outcome.lock().unwrap(); if let Some(x) = outcome.take() { let instance = x @@ -144,12 +148,12 @@ pub unsafe fn run_tiering ShellExitOperation>( .map_err(|e| format!("Can't instantiate module: {:?}", e))?; // Keep the optimized code alive. optimized_instances.push(instance); - optimized_instances.last_mut() + optimized_instances.last_mut().map(|y| (x.backend_id, y)) } else { None } }; - if let Some(optimized) = new_optimized { + if let Some((backend_id, optimized)) = new_optimized { let base = module_info.imported_functions.len(); let code_ptr = optimized .module @@ -186,6 +190,7 @@ pub unsafe fn run_tiering ShellExitOperation>( .get_code() .unwrap() .as_ptr() as usize, + backend: backend_id, }); n_versions.set(n_versions.get() + 1); diff --git a/lib/singlepass-backend/Cargo.toml b/lib/singlepass-backend/Cargo.toml index c985546cc45..292b7038f02 100644 --- a/lib/singlepass-backend/Cargo.toml +++ b/lib/singlepass-backend/Cargo.toml @@ -12,8 +12,8 @@ readme = "README.md" [dependencies] wasmer-runtime-core = { path = "../runtime-core", version = "0.10.2" } -dynasm = "0.3.2" -dynasmrt = "0.3.1" +dynasm = "0.5" +dynasmrt = "0.5" lazy_static = "1.4" byteorder = "1.3" nix = "0.15" diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index e825c32e3b8..87ce3116d16 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -1,8 +1,14 @@ #![allow(clippy::forget_copy)] // Used by dynasm. #![warn(unused_imports)] -use crate::{emitter_x64::*, machine::*, protect_unix}; -use dynasmrt::{x64::Assembler, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi}; +use crate::emitter_x64::*; +use crate::machine::*; +use crate::protect_unix; +#[cfg(target_arch = "aarch64")] +use dynasmrt::aarch64::Assembler; +#[cfg(target_arch = "x86_64")] +use dynasmrt::x64::Assembler; +use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi}; use smallvec::SmallVec; use std::{ any::Any, @@ -16,7 +22,8 @@ use std::{ }; use wasmer_runtime_core::{ backend::{ - sys::Memory, Backend, CacheGen, CompilerConfig, MemoryBoundCheckMode, RunnableModule, Token, + get_inline_breakpoint_size, sys::Memory, Architecture, Backend, CacheGen, CompilerConfig, + MemoryBoundCheckMode, RunnableModule, Token, }, cache::{Artifact, Error as CacheError}, codegen::*, @@ -38,6 +45,14 @@ use wasmer_runtime_core::{ wasmparser::{MemoryImmediate, Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType}, }; +#[cfg(target_arch = "aarch64")] +#[allow(dead_code)] +static ARCH: Architecture = Architecture::Aarch64; +#[cfg(target_arch = "x86_64")] +#[allow(dead_code)] +static ARCH: Architecture = Architecture::X64; + +#[cfg(target_arch = "x86_64")] lazy_static! { /// Performs a System V call to `target` with [stack_top..stack_base] as the argument list, from right to left. static ref CONSTRUCT_STACK_AND_CALL_WASM: unsafe extern "C" fn (stack_top: *const u64, stack_base: *const u64, ctx: *mut vm::Ctx, target: *const vm::Func) -> u64 = { @@ -126,6 +141,45 @@ lazy_static! { }; } +#[cfg(target_arch = "aarch64")] +#[repr(C)] +#[allow(dead_code)] +struct CallCtx { + ctx: *mut vm::Ctx, + stack: *mut u64, + target: *mut u8, +} + +#[cfg(target_arch = "aarch64")] +lazy_static! { + /// Switches stack and executes the provided callback. + static ref SWITCH_STACK: unsafe extern "C" fn (stack: *mut u64, cb: extern "C" fn (*mut u8) -> u64, userdata: *mut u8) -> u64 = { + let mut assembler = Assembler::new().unwrap(); + let offset = assembler.offset(); + dynasm!( + assembler + ; .arch aarch64 + ; sub x0, x0, 16 + ; mov x8, sp + ; str x8, [x0, 0] + ; str x30, [x0, 8] + ; adr x30, >done + ; mov sp, x0 + ; mov x0, x2 + ; br x1 + ; done: + ; ldr x30, [sp, 8] + ; ldr x8, [sp, 0] + ; mov sp, x8 + ; br x30 + ); + let buf = assembler.finalize().unwrap(); + let ret = unsafe { mem::transmute(buf.ptr(offset)) }; + mem::forget(buf); + ret + }; +} + pub struct X64ModuleCodeGenerator { functions: Vec, signatures: Option>>, @@ -273,15 +327,133 @@ impl RunnableModule for X64ExecutionContext { let args = slice::from_raw_parts(args, num_params_plus_one.unwrap().as_ptr() as usize - 1); - let args_reverse: SmallVec<[u64; 8]> = args.iter().cloned().rev().collect(); + let ret = match protect_unix::call_protected( || { - CONSTRUCT_STACK_AND_CALL_WASM( - args_reverse.as_ptr(), - args_reverse.as_ptr().offset(args_reverse.len() as isize), - ctx, - func.as_ptr(), - ) + #[cfg(target_arch = "x86_64")] + { + let args_reverse: SmallVec<[u64; 8]> = args.iter().cloned().rev().collect(); + CONSTRUCT_STACK_AND_CALL_WASM( + args_reverse.as_ptr(), + args_reverse.as_ptr().offset(args_reverse.len() as isize), + ctx, + func.as_ptr(), + ) + } + #[cfg(target_arch = "aarch64")] + { + struct CallCtx<'a> { + args: &'a [u64], + ctx: *mut vm::Ctx, + callable: NonNull, + } + extern "C" fn call_fn(f: *mut u8) -> u64 { + unsafe { + let f = &*(f as *const CallCtx); + let callable: extern "C" fn( + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64, + ) + -> u64 = std::mem::transmute(f.callable); + let mut args = f.args.iter(); + callable( + f.ctx as u64, + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + args.next().cloned().unwrap_or(0), + ) + } + } + let mut cctx = CallCtx { + args: &args, + ctx: ctx, + callable: func, + }; + use libc::{ + mmap, munmap, MAP_ANON, MAP_NORESERVE, MAP_PRIVATE, PROT_READ, + PROT_WRITE, + }; + const STACK_SIZE: usize = 1048576 * 1024; // 1GB of virtual address space for stack. + let stack_ptr = mmap( + ::std::ptr::null_mut(), + STACK_SIZE, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, + -1, + 0, + ); + if stack_ptr as isize == -1 { + panic!("unable to allocate stack"); + } + // TODO: Mark specific regions in the stack as PROT_NONE. + let ret = SWITCH_STACK( + (stack_ptr as *mut u8).offset(STACK_SIZE as isize) as *mut u64, + call_fn, + &mut cctx as *mut CallCtx as *mut u8, + ); + munmap(stack_ptr, STACK_SIZE); + ret + } }, Some(execution_context.breakpoints.clone()), ) { @@ -359,12 +531,14 @@ impl ModuleCodeGenerator for X64ModuleCodeGenerator { fn new() -> X64ModuleCodeGenerator { + let a = Assembler::new().unwrap(); + X64ModuleCodeGenerator { functions: vec![], signatures: None, function_signatures: None, function_labels: Some(HashMap::new()), - assembler: Some(Assembler::new().unwrap()), + assembler: Some(a), func_import_count: 0, config: None, } @@ -405,15 +579,12 @@ impl ModuleCodeGenerator .or_insert_with(|| (assembler.new_dynamic_label(), None)); begin_label_info.1 = Some(begin_offset); + assembler.arch_emit_entry_trampoline(); let begin_label = begin_label_info.0; let mut machine = Machine::new(); machine.track_state = self.config.as_ref().unwrap().track_state; - dynasm!( - assembler - ; => begin_label - //; int 3 - ); + assembler.emit_label(begin_label); let code = X64FunctionCode { local_function_id: self.functions.len(), @@ -551,6 +722,7 @@ impl ModuleCodeGenerator let a = self.assembler.as_mut().unwrap(); let offset = a.offset(); + a.arch_emit_entry_trampoline(); let label = a.get_label(); a.emit_label(label); labels.insert(id, (label, Some(offset))); @@ -584,7 +756,7 @@ impl ModuleCodeGenerator Location::Memory(GPR::RAX, imported_func_addr as i32), Location::GPR(GPR::RAX), ); - a.emit_jmp_location(Location::GPR(GPR::RAX)); + a.emit_host_redirection(GPR::RAX); self.func_import_count += 1; @@ -627,6 +799,29 @@ impl X64FunctionCode { .insert(m.state.wasm_inst_offset, SuspendOffset::Trappable(offset)); } + #[allow(dead_code)] + fn mark_inline_breakpoint( + a: &mut Assembler, + m: &Machine, + fsm: &mut FunctionStateMap, + control_stack: &mut [ControlFrame], + ) { + let state_diff_id = Self::get_state_diff(m, fsm, control_stack); + let offset = a.get_offset().0; + fsm.trappable_offsets.insert( + offset, + OffsetInfo { + end_offset: offset + + get_inline_breakpoint_size(ARCH, Backend::Singlepass) + .expect("cannot get inline breakpoint size"), + activate_offset: offset, + diff_id: state_diff_id, + }, + ); + fsm.wasm_offset_to_target_offset + .insert(m.state.wasm_inst_offset, SuspendOffset::Trappable(offset)); + } + /// Moves `loc` to a valid location for `div`/`idiv`. fn emit_relaxed_xdiv( a: &mut Assembler, @@ -1203,7 +1398,10 @@ impl X64FunctionCode { value_stack.push(ret); Self::emit_relaxed_avx(a, m, f, loc_a, loc_b, ret); - a.emit_and(Size::S32, Location::Imm32(1), ret); // FIXME: Why? + + // Workaround for behavior inconsistency among different backing implementations. + // (all bits or only the least significant bit are set to one?) + a.emit_and(Size::S32, Location::Imm32(1), ret); } /// Floating point (AVX) binary operation with both operands popped from the virtual stack. @@ -1927,11 +2125,18 @@ impl FunctionCodeGenerator for X64FunctionCode { Event::Internal(x) => { match x { InternalEvent::Breakpoint(callback) => { - a.emit_bkpt(); + use wasmer_runtime_core::backend::InlineBreakpointType; self.breakpoints .as_mut() .unwrap() .insert(a.get_offset(), callback); + Self::mark_trappable( + a, + &self.machine, + &mut self.fsm, + &mut self.control_stack, + ); + a.emit_inline_breakpoint(InlineBreakpointType::Middleware); } InternalEvent::FunctionBegin(_) | InternalEvent::FunctionEnd => {} InternalEvent::GetInternal(idx) => { @@ -2377,17 +2582,21 @@ impl FunctionCodeGenerator for X64FunctionCode { _ => unreachable!(), }; - let zero_path = a.get_label(); - let end = a.get_label(); - - a.emit_test_gpr_64(src); - a.emit_jmp(Condition::Equal, zero_path); - a.emit_bsr(Size::S32, Location::GPR(src), Location::GPR(dst)); - a.emit_xor(Size::S32, Location::Imm32(31), Location::GPR(dst)); - a.emit_jmp(Condition::None, end); - a.emit_label(zero_path); - a.emit_mov(Size::S32, Location::Imm32(32), Location::GPR(dst)); - a.emit_label(end); + if a.arch_has_xzcnt() { + a.arch_emit_lzcnt(Size::S32, Location::GPR(src), Location::GPR(dst)); + } else { + let zero_path = a.get_label(); + let end = a.get_label(); + + a.emit_test_gpr_64(src); + a.emit_jmp(Condition::Equal, zero_path); + a.emit_bsr(Size::S32, Location::GPR(src), Location::GPR(dst)); + a.emit_xor(Size::S32, Location::Imm32(31), Location::GPR(dst)); + a.emit_jmp(Condition::None, end); + a.emit_label(zero_path); + a.emit_mov(Size::S32, Location::Imm32(32), Location::GPR(dst)); + a.emit_label(end); + } match loc { Location::Imm32(_) | Location::Memory(_, _) => { @@ -2429,16 +2638,20 @@ impl FunctionCodeGenerator for X64FunctionCode { _ => unreachable!(), }; - let zero_path = a.get_label(); - let end = a.get_label(); - - a.emit_test_gpr_64(src); - a.emit_jmp(Condition::Equal, zero_path); - a.emit_bsf(Size::S32, Location::GPR(src), Location::GPR(dst)); - a.emit_jmp(Condition::None, end); - a.emit_label(zero_path); - a.emit_mov(Size::S32, Location::Imm32(32), Location::GPR(dst)); - a.emit_label(end); + if a.arch_has_xzcnt() { + a.arch_emit_tzcnt(Size::S32, Location::GPR(src), Location::GPR(dst)); + } else { + let zero_path = a.get_label(); + let end = a.get_label(); + + a.emit_test_gpr_64(src); + a.emit_jmp(Condition::Equal, zero_path); + a.emit_bsf(Size::S32, Location::GPR(src), Location::GPR(dst)); + a.emit_jmp(Condition::None, end); + a.emit_label(zero_path); + a.emit_mov(Size::S32, Location::Imm32(32), Location::GPR(dst)); + a.emit_label(end); + } match loc { Location::Imm32(_) | Location::Memory(_, _) => { @@ -2759,17 +2972,21 @@ impl FunctionCodeGenerator for X64FunctionCode { _ => unreachable!(), }; - let zero_path = a.get_label(); - let end = a.get_label(); - - a.emit_test_gpr_64(src); - a.emit_jmp(Condition::Equal, zero_path); - a.emit_bsr(Size::S64, Location::GPR(src), Location::GPR(dst)); - a.emit_xor(Size::S64, Location::Imm32(63), Location::GPR(dst)); - a.emit_jmp(Condition::None, end); - a.emit_label(zero_path); - a.emit_mov(Size::S64, Location::Imm32(64), Location::GPR(dst)); - a.emit_label(end); + if a.arch_has_xzcnt() { + a.arch_emit_lzcnt(Size::S64, Location::GPR(src), Location::GPR(dst)); + } else { + let zero_path = a.get_label(); + let end = a.get_label(); + + a.emit_test_gpr_64(src); + a.emit_jmp(Condition::Equal, zero_path); + a.emit_bsr(Size::S64, Location::GPR(src), Location::GPR(dst)); + a.emit_xor(Size::S64, Location::Imm32(63), Location::GPR(dst)); + a.emit_jmp(Condition::None, end); + a.emit_label(zero_path); + a.emit_mov(Size::S64, Location::Imm32(64), Location::GPR(dst)); + a.emit_label(end); + } match loc { Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => { @@ -2811,16 +3028,20 @@ impl FunctionCodeGenerator for X64FunctionCode { _ => unreachable!(), }; - let zero_path = a.get_label(); - let end = a.get_label(); - - a.emit_test_gpr_64(src); - a.emit_jmp(Condition::Equal, zero_path); - a.emit_bsf(Size::S64, Location::GPR(src), Location::GPR(dst)); - a.emit_jmp(Condition::None, end); - a.emit_label(zero_path); - a.emit_mov(Size::S64, Location::Imm32(64), Location::GPR(dst)); - a.emit_label(end); + if a.arch_has_xzcnt() { + a.arch_emit_tzcnt(Size::S64, Location::GPR(src), Location::GPR(dst)); + } else { + let zero_path = a.get_label(); + let end = a.get_label(); + + a.emit_test_gpr_64(src); + a.emit_jmp(Condition::Equal, zero_path); + a.emit_bsf(Size::S64, Location::GPR(src), Location::GPR(dst)); + a.emit_jmp(Condition::None, end); + a.emit_label(zero_path); + a.emit_mov(Size::S64, Location::Imm32(64), Location::GPR(dst)); + a.emit_label(end); + } match loc { Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => { @@ -3105,209 +3326,239 @@ impl FunctionCodeGenerator for X64FunctionCode { Assembler::emit_vdivss, ), Operator::F32Max => { - let src2 = - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let src1 = - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let ret = self.machine.acquire_locations( - a, - &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], - false, - )[0]; - self.value_stack.push(ret); + if !a.arch_supports_canonicalize_nan() { + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmaxss, + ); + } else { + let src2 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let src1 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); - let tmp1 = self.machine.acquire_temp_xmm().unwrap(); - let tmp2 = self.machine.acquire_temp_xmm().unwrap(); - let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); - let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); + let tmp1 = self.machine.acquire_temp_xmm().unwrap(); + let tmp2 = self.machine.acquire_temp_xmm().unwrap(); + let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); + let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); - let src1 = match src1 { - Location::XMM(x) => x, - Location::GPR(_) | Location::Memory(_, _) => { - a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); - tmp1 - } - Location::Imm32(_) => { - a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); - tmp1 - } - Location::Imm64(_) => { - a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); - tmp1 - } - _ => unreachable!(), - }; - let src2 = match src2 { - Location::XMM(x) => x, - Location::GPR(_) | Location::Memory(_, _) => { - a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); - tmp2 - } - Location::Imm32(_) => { - a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); - tmp2 - } - Location::Imm64(_) => { - a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); - tmp2 + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + _ => unreachable!(), + }; + let src2 = match src2 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + tmp2 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + _ => unreachable!(), + }; + + let tmp_xmm1 = XMM::XMM8; + let tmp_xmm2 = XMM::XMM9; + let tmp_xmm3 = XMM::XMM10; + + static CANONICAL_NAN: u128 = 0x7FC0_0000; + a.emit_mov(Size::S32, Location::XMM(src1), Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::XMM(src2), Location::GPR(tmpg2)); + a.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1)); + a.emit_vmaxss(src1, XMMOrMemory::XMM(src2), tmp_xmm1); + let label1 = a.get_label(); + let label2 = a.get_label(); + a.emit_jmp(Condition::NotEqual, label1); + a.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); + a.emit_jmp(Condition::None, label2); + a.emit_label(label1); + a.emit_vxorps(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2); + a.emit_label(label2); + a.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3); + a.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); + a.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1); + // load float canonical nan + a.emit_mov( + Size::S64, + Location::Imm64((&CANONICAL_NAN as *const u128) as u64), + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2)); + a.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); + match ret { + Location::XMM(x) => { + a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); + } + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(Size::S64, Location::XMM(src1), ret); + } + _ => unreachable!(), } - _ => unreachable!(), - }; - let tmp_xmm1 = XMM::XMM8; - let tmp_xmm2 = XMM::XMM9; - let tmp_xmm3 = XMM::XMM10; - - static CANONICAL_NAN: u128 = 0x7FC0_0000; - a.emit_mov(Size::S32, Location::XMM(src1), Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::XMM(src2), Location::GPR(tmpg2)); - a.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1)); - a.emit_vmaxss(src1, XMMOrMemory::XMM(src2), tmp_xmm1); - let label1 = a.get_label(); - let label2 = a.get_label(); - a.emit_jmp(Condition::NotEqual, label1); - a.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); - a.emit_jmp(Condition::None, label2); - a.emit_label(label1); - a.emit_vxorps(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2); - a.emit_label(label2); - a.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3); - a.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); - a.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1); - // load float canonical nan - a.emit_mov( - Size::S64, - Location::Imm64((&CANONICAL_NAN as *const u128) as u64), - Location::GPR(tmpg1), - ); - a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2)); - a.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); - match ret { - Location::XMM(x) => { - a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); - } - Location::Memory(_, _) | Location::GPR(_) => { - a.emit_mov(Size::S64, Location::XMM(src1), ret); - } - _ => unreachable!(), + self.machine.release_temp_gpr(tmpg2); + self.machine.release_temp_gpr(tmpg1); + self.machine.release_temp_xmm(tmp2); + self.machine.release_temp_xmm(tmp1); } - - self.machine.release_temp_gpr(tmpg2); - self.machine.release_temp_gpr(tmpg1); - self.machine.release_temp_xmm(tmp2); - self.machine.release_temp_xmm(tmp1); } Operator::F32Min => { - let src2 = - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let src1 = - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let ret = self.machine.acquire_locations( - a, - &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], - false, - )[0]; - self.value_stack.push(ret); + if !a.arch_supports_canonicalize_nan() { + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vminss, + ); + } else { + let src2 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let src1 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); - let tmp1 = self.machine.acquire_temp_xmm().unwrap(); - let tmp2 = self.machine.acquire_temp_xmm().unwrap(); - let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); - let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); + let tmp1 = self.machine.acquire_temp_xmm().unwrap(); + let tmp2 = self.machine.acquire_temp_xmm().unwrap(); + let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); + let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); - let src1 = match src1 { - Location::XMM(x) => x, - Location::GPR(_) | Location::Memory(_, _) => { - a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); - tmp1 - } - Location::Imm32(_) => { - a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); - tmp1 - } - Location::Imm64(_) => { - a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); - tmp1 - } - _ => unreachable!(), - }; - let src2 = match src2 { - Location::XMM(x) => x, - Location::GPR(_) | Location::Memory(_, _) => { - a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); - tmp2 - } - Location::Imm32(_) => { - a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); - tmp2 - } - Location::Imm64(_) => { - a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); - tmp2 + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + _ => unreachable!(), + }; + let src2 = match src2 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + tmp2 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + _ => unreachable!(), + }; + + let tmp_xmm1 = XMM::XMM8; + let tmp_xmm2 = XMM::XMM9; + let tmp_xmm3 = XMM::XMM10; + + static NEG_ZERO: u128 = 0x8000_0000; + static CANONICAL_NAN: u128 = 0x7FC0_0000; + a.emit_mov(Size::S32, Location::XMM(src1), Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::XMM(src2), Location::GPR(tmpg2)); + a.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1)); + a.emit_vminss(src1, XMMOrMemory::XMM(src2), tmp_xmm1); + let label1 = a.get_label(); + let label2 = a.get_label(); + a.emit_jmp(Condition::NotEqual, label1); + a.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); + a.emit_jmp(Condition::None, label2); + a.emit_label(label1); + // load float -0.0 + a.emit_mov( + Size::S64, + Location::Imm64((&NEG_ZERO as *const u128) as u64), + Location::GPR(tmpg1), + ); + a.emit_mov( + Size::S64, + Location::Memory(tmpg1, 0), + Location::XMM(tmp_xmm2), + ); + a.emit_label(label2); + a.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3); + a.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); + a.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1); + // load float canonical nan + a.emit_mov( + Size::S64, + Location::Imm64((&CANONICAL_NAN as *const u128) as u64), + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2)); + a.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); + match ret { + Location::XMM(x) => { + a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); + } + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(Size::S64, Location::XMM(src1), ret); + } + _ => unreachable!(), } - _ => unreachable!(), - }; - let tmp_xmm1 = XMM::XMM8; - let tmp_xmm2 = XMM::XMM9; - let tmp_xmm3 = XMM::XMM10; - - static NEG_ZERO: u128 = 0x8000_0000; - static CANONICAL_NAN: u128 = 0x7FC0_0000; - a.emit_mov(Size::S32, Location::XMM(src1), Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::XMM(src2), Location::GPR(tmpg2)); - a.emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1)); - a.emit_vminss(src1, XMMOrMemory::XMM(src2), tmp_xmm1); - let label1 = a.get_label(); - let label2 = a.get_label(); - a.emit_jmp(Condition::NotEqual, label1); - a.emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); - a.emit_jmp(Condition::None, label2); - a.emit_label(label1); - // load float -0.0 - a.emit_mov( - Size::S64, - Location::Imm64((&NEG_ZERO as *const u128) as u64), - Location::GPR(tmpg1), - ); - a.emit_mov( - Size::S64, - Location::Memory(tmpg1, 0), - Location::XMM(tmp_xmm2), - ); - a.emit_label(label2); - a.emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3); - a.emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); - a.emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1); - // load float canonical nan - a.emit_mov( - Size::S64, - Location::Imm64((&CANONICAL_NAN as *const u128) as u64), - Location::GPR(tmpg1), - ); - a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2)); - a.emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); - match ret { - Location::XMM(x) => { - a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); - } - Location::Memory(_, _) | Location::GPR(_) => { - a.emit_mov(Size::S64, Location::XMM(src1), ret); - } - _ => unreachable!(), + self.machine.release_temp_gpr(tmpg2); + self.machine.release_temp_gpr(tmpg1); + self.machine.release_temp_xmm(tmp2); + self.machine.release_temp_xmm(tmp1); } - - self.machine.release_temp_gpr(tmpg2); - self.machine.release_temp_gpr(tmpg1); - self.machine.release_temp_xmm(tmp2); - self.machine.release_temp_xmm(tmp1); } Operator::F32Eq => Self::emit_fp_cmpop_avx( a, @@ -3437,11 +3688,34 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp)); - a.emit_btc_gpr_imm8_32(31, tmp); - a.emit_mov(Size::S32, Location::GPR(tmp), ret); - self.machine.release_temp_gpr(tmp); + + if a.arch_has_fneg() { + let tmp = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp), + ); + a.arch_emit_f32_neg(tmp, tmp); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp), + ret, + ); + self.machine.release_temp_xmm(tmp); + } else { + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(tmp)); + a.emit_btc_gpr_imm8_32(31, tmp); + a.emit_mov(Size::S32, Location::GPR(tmp), ret); + self.machine.release_temp_gpr(tmp); + } } Operator::F64Const { value } => { @@ -3476,209 +3750,239 @@ impl FunctionCodeGenerator for X64FunctionCode { Assembler::emit_vdivsd, ), Operator::F64Max => { - let src2 = - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let src1 = - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let ret = self.machine.acquire_locations( - a, - &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], - false, - )[0]; - self.value_stack.push(ret); + if !a.arch_supports_canonicalize_nan() { + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vmaxsd, + ); + } else { + let src2 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let src1 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); - let tmp1 = self.machine.acquire_temp_xmm().unwrap(); - let tmp2 = self.machine.acquire_temp_xmm().unwrap(); - let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); - let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); + let tmp1 = self.machine.acquire_temp_xmm().unwrap(); + let tmp2 = self.machine.acquire_temp_xmm().unwrap(); + let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); + let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); - let src1 = match src1 { - Location::XMM(x) => x, - Location::GPR(_) | Location::Memory(_, _) => { - a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); - tmp1 - } - Location::Imm32(_) => { - a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); - tmp1 - } - Location::Imm64(_) => { - a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); - tmp1 - } - _ => unreachable!(), - }; - let src2 = match src2 { - Location::XMM(x) => x, - Location::GPR(_) | Location::Memory(_, _) => { - a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); - tmp2 - } - Location::Imm32(_) => { - a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); - tmp2 - } - Location::Imm64(_) => { - a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); - tmp2 + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + _ => unreachable!(), + }; + let src2 = match src2 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + tmp2 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + _ => unreachable!(), + }; + + let tmp_xmm1 = XMM::XMM8; + let tmp_xmm2 = XMM::XMM9; + let tmp_xmm3 = XMM::XMM10; + + static CANONICAL_NAN: u128 = 0x7FF8_0000_0000_0000; + a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2)); + a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1)); + a.emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1); + let label1 = a.get_label(); + let label2 = a.get_label(); + a.emit_jmp(Condition::NotEqual, label1); + a.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); + a.emit_jmp(Condition::None, label2); + a.emit_label(label1); + a.emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2); + a.emit_label(label2); + a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3); + a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); + a.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1); + // load float canonical nan + a.emit_mov( + Size::S64, + Location::Imm64((&CANONICAL_NAN as *const u128) as u64), + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2)); + a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); + match ret { + Location::XMM(x) => { + a.emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); + } + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(Size::S64, Location::XMM(src1), ret); + } + _ => unreachable!(), } - _ => unreachable!(), - }; - let tmp_xmm1 = XMM::XMM8; - let tmp_xmm2 = XMM::XMM9; - let tmp_xmm3 = XMM::XMM10; - - static CANONICAL_NAN: u128 = 0x7FF8_0000_0000_0000; - a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2)); - a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1)); - a.emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1); - let label1 = a.get_label(); - let label2 = a.get_label(); - a.emit_jmp(Condition::NotEqual, label1); - a.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); - a.emit_jmp(Condition::None, label2); - a.emit_label(label1); - a.emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2); - a.emit_label(label2); - a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3); - a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); - a.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1); - // load float canonical nan - a.emit_mov( - Size::S64, - Location::Imm64((&CANONICAL_NAN as *const u128) as u64), - Location::GPR(tmpg1), - ); - a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2)); - a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); - match ret { - Location::XMM(x) => { - a.emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); - } - Location::Memory(_, _) | Location::GPR(_) => { - a.emit_mov(Size::S64, Location::XMM(src1), ret); - } - _ => unreachable!(), + self.machine.release_temp_gpr(tmpg2); + self.machine.release_temp_gpr(tmpg1); + self.machine.release_temp_xmm(tmp2); + self.machine.release_temp_xmm(tmp1); } - - self.machine.release_temp_gpr(tmpg2); - self.machine.release_temp_gpr(tmpg1); - self.machine.release_temp_xmm(tmp2); - self.machine.release_temp_xmm(tmp1); } Operator::F64Min => { - let src2 = - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let src1 = - get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); - let ret = self.machine.acquire_locations( - a, - &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], - false, - )[0]; - self.value_stack.push(ret); + if !a.arch_supports_canonicalize_nan() { + Self::emit_fp_binop_avx( + a, + &mut self.machine, + &mut self.value_stack, + Assembler::emit_vminsd, + ); + } else { + let src2 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let src1 = get_location_released( + a, + &mut self.machine, + self.value_stack.pop().unwrap(), + ); + let ret = self.machine.acquire_locations( + a, + &[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))], + false, + )[0]; + self.value_stack.push(ret); - let tmp1 = self.machine.acquire_temp_xmm().unwrap(); - let tmp2 = self.machine.acquire_temp_xmm().unwrap(); - let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); - let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); + let tmp1 = self.machine.acquire_temp_xmm().unwrap(); + let tmp2 = self.machine.acquire_temp_xmm().unwrap(); + let tmpg1 = self.machine.acquire_temp_gpr().unwrap(); + let tmpg2 = self.machine.acquire_temp_gpr().unwrap(); - let src1 = match src1 { - Location::XMM(x) => x, - Location::GPR(_) | Location::Memory(_, _) => { - a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); - tmp1 - } - Location::Imm32(_) => { - a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); - tmp1 - } - Location::Imm64(_) => { - a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); - tmp1 - } - _ => unreachable!(), - }; - let src2 = match src2 { - Location::XMM(x) => x, - Location::GPR(_) | Location::Memory(_, _) => { - a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); - tmp2 - } - Location::Imm32(_) => { - a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); - a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); - tmp2 - } - Location::Imm64(_) => { - a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); - tmp2 + let src1 = match src1 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src1, Location::XMM(tmp1)); + tmp1 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src1, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1)); + tmp1 + } + _ => unreachable!(), + }; + let src2 = match src2 { + Location::XMM(x) => x, + Location::GPR(_) | Location::Memory(_, _) => { + a.emit_mov(Size::S64, src2, Location::XMM(tmp2)); + tmp2 + } + Location::Imm32(_) => { + a.emit_mov(Size::S32, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + Location::Imm64(_) => { + a.emit_mov(Size::S64, src2, Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2)); + tmp2 + } + _ => unreachable!(), + }; + + let tmp_xmm1 = XMM::XMM8; + let tmp_xmm2 = XMM::XMM9; + let tmp_xmm3 = XMM::XMM10; + + static NEG_ZERO: u128 = 0x8000_0000_0000_0000; + static CANONICAL_NAN: u128 = 0x7FF8_0000_0000_0000; + a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1)); + a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2)); + a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1)); + a.emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1); + let label1 = a.get_label(); + let label2 = a.get_label(); + a.emit_jmp(Condition::NotEqual, label1); + a.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); + a.emit_jmp(Condition::None, label2); + a.emit_label(label1); + // load float -0.0 + a.emit_mov( + Size::S64, + Location::Imm64((&NEG_ZERO as *const u128) as u64), + Location::GPR(tmpg1), + ); + a.emit_mov( + Size::S64, + Location::Memory(tmpg1, 0), + Location::XMM(tmp_xmm2), + ); + a.emit_label(label2); + a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3); + a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); + a.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1); + // load float canonical nan + a.emit_mov( + Size::S64, + Location::Imm64((&CANONICAL_NAN as *const u128) as u64), + Location::GPR(tmpg1), + ); + a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2)); + a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); + match ret { + Location::XMM(x) => { + a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); + } + Location::Memory(_, _) | Location::GPR(_) => { + a.emit_mov(Size::S64, Location::XMM(src1), ret); + } + _ => unreachable!(), } - _ => unreachable!(), - }; - let tmp_xmm1 = XMM::XMM8; - let tmp_xmm2 = XMM::XMM9; - let tmp_xmm3 = XMM::XMM10; - - static NEG_ZERO: u128 = 0x8000_0000_0000_0000; - static CANONICAL_NAN: u128 = 0x7FF8_0000_0000_0000; - a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1)); - a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2)); - a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1)); - a.emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1); - let label1 = a.get_label(); - let label2 = a.get_label(); - a.emit_jmp(Condition::NotEqual, label1); - a.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2)); - a.emit_jmp(Condition::None, label2); - a.emit_label(label1); - // load float -0.0 - a.emit_mov( - Size::S64, - Location::Imm64((&NEG_ZERO as *const u128) as u64), - Location::GPR(tmpg1), - ); - a.emit_mov( - Size::S64, - Location::Memory(tmpg1, 0), - Location::XMM(tmp_xmm2), - ); - a.emit_label(label2); - a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3); - a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1); - a.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1); - // load float canonical nan - a.emit_mov( - Size::S64, - Location::Imm64((&CANONICAL_NAN as *const u128) as u64), - Location::GPR(tmpg1), - ); - a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2)); - a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1); - match ret { - Location::XMM(x) => { - a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x)); - } - Location::Memory(_, _) | Location::GPR(_) => { - a.emit_mov(Size::S64, Location::XMM(src1), ret); - } - _ => unreachable!(), + self.machine.release_temp_gpr(tmpg2); + self.machine.release_temp_gpr(tmpg1); + self.machine.release_temp_xmm(tmp2); + self.machine.release_temp_xmm(tmp1); } - - self.machine.release_temp_gpr(tmpg2); - self.machine.release_temp_gpr(tmpg1); - self.machine.release_temp_xmm(tmp2); - self.machine.release_temp_xmm(tmp1); } Operator::F64Eq => Self::emit_fp_cmpop_avx( a, @@ -3823,11 +4127,33 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(tmp)); - a.emit_btc_gpr_imm8_64(63, tmp); - a.emit_mov(Size::S64, Location::GPR(tmp), ret); - self.machine.release_temp_gpr(tmp); + if a.arch_has_fneg() { + let tmp = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp), + ); + a.arch_emit_f64_neg(tmp, tmp); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp), + ret, + ); + self.machine.release_temp_xmm(tmp); + } else { + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(tmp)); + a.emit_btc_gpr_imm8_64(63, tmp); + a.emit_mov(Size::S64, Location::GPR(tmp), ret); + self.machine.release_temp_gpr(tmp); + } } Operator::F64PromoteF32 => Self::emit_fp_unop_avx( @@ -3938,24 +4264,48 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f32_int_conv_check(a, &mut self.machine, tmp_in, -1.0, 4294967296.0); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_uf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check(a, &mut self.machine, tmp_in, -1.0, 4294967296.0); - a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I32TruncSF32 => { @@ -3967,30 +4317,55 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f32_int_conv_check( - a, - &mut self.machine, - tmp_in, - -2147483904.0, - 2147483648.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_sf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check( + a, + &mut self.machine, + tmp_in, + -2147483904.0, + 2147483648.0, + ); - a.emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + a.emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I64TruncSF32 => { @@ -4002,47 +4377,57 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f32_int_conv_check( - a, - &mut self.machine, - tmp_in, - -9223373136366403584.0, - 9223372036854775808.0, - ); - a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_sf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check( + a, + &mut self.machine, + tmp_in, + -9223373136366403584.0, + 9223372036854775808.0, + ); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I64TruncUF32 => { - /* - ; movq xmm5, r15 - ; mov r15d, 1593835520u32 as i32 //float 9.22337203E+18 - ; movd xmm1, r15d - ; movd xmm2, Rd(reg as u8) - ; movd xmm3, Rd(reg as u8) - ; subss xmm2, xmm1 - ; cvttss2si Rq(reg as u8), xmm2 - ; mov r15, QWORD 0x8000000000000000u64 as i64 - ; xor r15, Rq(reg as u8) - ; cvttss2si Rq(reg as u8), xmm3 - ; ucomiss xmm3, xmm1 - ; cmovae Rq(reg as u8), r15 - ; movq r15, xmm5 - */ let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -4051,54 +4436,79 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f32_int_conv_check( - a, - &mut self.machine, - tmp_in, - -1.0, - 18446744073709551616.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_uf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 - let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 - let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 - let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check( + a, + &mut self.machine, + tmp_in, + -1.0, + 18446744073709551616.0, + ); - a.emit_mov( - Size::S32, - Location::Imm32(1593835520u32), - Location::GPR(tmp), - ); //float 9.22337203E+18 - a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x1)); - a.emit_mov(Size::S32, Location::XMM(tmp_in), Location::XMM(tmp_x2)); - a.emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); - a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov( - Size::S64, - Location::Imm64(0x8000000000000000u64), - Location::GPR(tmp), - ); - a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); - a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); - a.emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2); - a.emit_cmovae_gpr_64(tmp, tmp_out); - a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 + let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 + let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 - self.machine.release_temp_xmm(tmp_x2); - self.machine.release_temp_xmm(tmp_x1); - self.machine.release_temp_gpr(tmp); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + a.emit_mov( + Size::S32, + Location::Imm32(1593835520u32), + Location::GPR(tmp), + ); //float 9.22337203E+18 + a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x1)); + a.emit_mov(Size::S32, Location::XMM(tmp_in), Location::XMM(tmp_x2)); + a.emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp), + ); + a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); + a.emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2); + a.emit_cmovae_gpr_64(tmp, tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_x2); + self.machine.release_temp_xmm(tmp_x1); + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I32TruncUF64 => { @@ -4110,24 +4520,49 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f64_int_conv_check(a, &mut self.machine, tmp_in, -1.0, 4294967296.0); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_uf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check(a, &mut self.machine, tmp_in, -1.0, 4294967296.0); + + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I32TruncSF64 => { @@ -4139,35 +4574,60 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - - let real_in = match loc { - Location::Imm32(_) | Location::Imm64(_) => { - a.emit_mov(Size::S64, loc, Location::GPR(tmp_out)); - a.emit_mov(Size::S64, Location::GPR(tmp_out), Location::XMM(tmp_in)); - tmp_in - } - Location::XMM(x) => x, - _ => { - a.emit_mov(Size::S64, loc, Location::XMM(tmp_in)); - tmp_in - } - }; - Self::emit_f64_int_conv_check( - a, - &mut self.machine, - real_in, - -2147483649.0, - 2147483648.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_sf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + let real_in = match loc { + Location::Imm32(_) | Location::Imm64(_) => { + a.emit_mov(Size::S64, loc, Location::GPR(tmp_out)); + a.emit_mov(Size::S64, Location::GPR(tmp_out), Location::XMM(tmp_in)); + tmp_in + } + Location::XMM(x) => x, + _ => { + a.emit_mov(Size::S64, loc, Location::XMM(tmp_in)); + tmp_in + } + }; + + Self::emit_f64_int_conv_check( + a, + &mut self.machine, + real_in, + -2147483649.0, + 2147483648.0, + ); - a.emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out); - a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + a.emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I64TruncSF64 => { @@ -4179,30 +4639,55 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f64_int_conv_check( - a, - &mut self.machine, - tmp_in, - -9223372036854777856.0, - 9223372036854775808.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_sf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check( + a, + &mut self.machine, + tmp_in, + -9223372036854777856.0, + 9223372036854775808.0, + ); - a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I64TruncUF64 => { @@ -4214,54 +4699,79 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f64_int_conv_check( - a, - &mut self.machine, - tmp_in, - -1.0, - 18446744073709551616.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_uf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 - let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 - let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 - let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check( + a, + &mut self.machine, + tmp_in, + -1.0, + 18446744073709551616.0, + ); - a.emit_mov( - Size::S64, - Location::Imm64(4890909195324358656u64), - Location::GPR(tmp), - ); //double 9.2233720368547758E+18 - a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x1)); - a.emit_mov(Size::S64, Location::XMM(tmp_in), Location::XMM(tmp_x2)); - a.emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); - a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov( - Size::S64, - Location::Imm64(0x8000000000000000u64), - Location::GPR(tmp), - ); - a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); - a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); - a.emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2); - a.emit_cmovae_gpr_64(tmp, tmp_out); - a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 + let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 + let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 - self.machine.release_temp_xmm(tmp_x2); - self.machine.release_temp_xmm(tmp_x1); - self.machine.release_temp_gpr(tmp); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(4890909195324358656u64), + Location::GPR(tmp), + ); //double 9.2233720368547758E+18 + a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x1)); + a.emit_mov(Size::S64, Location::XMM(tmp_in), Location::XMM(tmp_x2)); + a.emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp), + ); + a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); + a.emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2); + a.emit_cmovae_gpr_64(tmp, tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_x2); + self.machine.release_temp_xmm(tmp_x1); + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::F32ConvertSI32 => { @@ -4273,15 +4783,40 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2ss_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_si32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F32ConvertUI32 => { let loc = @@ -4292,15 +4827,39 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_ui32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F32ConvertSI64 => { let loc = @@ -4311,15 +4870,39 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_si64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F32ConvertUI64 => { let loc = @@ -4330,31 +4913,55 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - let tmp = self.machine.acquire_temp_gpr().unwrap(); - - let do_convert = a.get_label(); - let end_convert = a.get_label(); - - a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); - a.emit_test_gpr_64(tmp_in); - a.emit_jmp(Condition::Signed, do_convert); - a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_jmp(Condition::None, end_convert); - a.emit_label(do_convert); - a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); - a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); - a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); - a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); - a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_vaddss(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); - a.emit_label(end_convert); - a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); - - self.machine.release_temp_gpr(tmp); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_ui64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + let tmp = self.machine.acquire_temp_gpr().unwrap(); + + let do_convert = a.get_label(); + let end_convert = a.get_label(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_test_gpr_64(tmp_in); + a.emit_jmp(Condition::Signed, do_convert); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_jmp(Condition::None, end_convert); + a.emit_label(do_convert); + a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); + a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); + a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); + a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_vaddss(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); + a.emit_label(end_convert); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F64ConvertSI32 => { @@ -4366,15 +4973,40 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2sd_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_si32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F64ConvertUI32 => { let loc = @@ -4385,15 +5017,40 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_ui32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F64ConvertSI64 => { let loc = @@ -4404,15 +5061,40 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_si64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F64ConvertUI64 => { let loc = @@ -4423,31 +5105,56 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - let tmp = self.machine.acquire_temp_gpr().unwrap(); - let do_convert = a.get_label(); - let end_convert = a.get_label(); - - a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); - a.emit_test_gpr_64(tmp_in); - a.emit_jmp(Condition::Signed, do_convert); - a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_jmp(Condition::None, end_convert); - a.emit_label(do_convert); - a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); - a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); - a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); - a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); - a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_vaddsd(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); - a.emit_label(end_convert); - a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); - - self.machine.release_temp_gpr(tmp); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_ui64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + let tmp = self.machine.acquire_temp_gpr().unwrap(); + + let do_convert = a.get_label(); + let end_convert = a.get_label(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_test_gpr_64(tmp_in); + a.emit_jmp(Condition::Signed, do_convert); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_jmp(Condition::None, end_convert); + a.emit_label(do_convert); + a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); + a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); + a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); + a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_vaddsd(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); + a.emit_label(end_convert); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::Call { function_index } => { @@ -4594,10 +5301,17 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, |a| { - a.emit_call_location(Location::Memory( - GPR::RAX, - (vm::Anyfunc::offset_func() as usize) as i32, - )); + if a.arch_requires_indirect_call_trampoline() { + a.arch_emit_indirect_call_with_trampoline(Location::Memory( + GPR::RAX, + (vm::Anyfunc::offset_func() as usize) as i32, + )); + } else { + a.emit_call_location(Location::Memory( + GPR::RAX, + (vm::Anyfunc::offset_func() as usize) as i32, + )); + } }, params.iter().map(|x| *x), Some((&mut self.fsm, &mut self.control_stack)), @@ -4831,7 +5545,13 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, |a| { - a.emit_call_location(Location::GPR(GPR::RAX)); + let label = a.get_label(); + let after = a.get_label(); + a.emit_jmp(Condition::None, after); + a.emit_label(label); + a.emit_host_redirection(GPR::RAX); + a.emit_label(after); + a.emit_call_label(label); }, iter::once(Location::Imm32(memory_index.index() as u32)), None, @@ -4870,7 +5590,13 @@ impl FunctionCodeGenerator for X64FunctionCode { a, &mut self.machine, |a| { - a.emit_call_location(Location::GPR(GPR::RAX)); + let label = a.get_label(); + let after = a.get_label(); + a.emit_jmp(Condition::None, after); + a.emit_label(label); + a.emit_host_redirection(GPR::RAX); + a.emit_label(after); + a.emit_call_label(label); }, iter::once(Location::Imm32(memory_index.index() as u32)) .chain(iter::once(param_pages)), @@ -5667,7 +6393,9 @@ impl FunctionCodeGenerator for X64FunctionCode { a.emit_lea_label(table_label, Location::GPR(GPR::RCX)); a.emit_mov(Size::S32, cond, Location::GPR(GPR::RDX)); - a.emit_imul_imm32_gpr64(5, GPR::RDX); + + let instr_size = a.get_jmp_instr_size(); + a.emit_imul_imm32_gpr64(instr_size as _, GPR::RDX); a.emit_add(Size::S64, Location::GPR(GPR::RCX), Location::GPR(GPR::RDX)); a.emit_jmp_location(Location::GPR(GPR::RDX)); diff --git a/lib/singlepass-backend/src/emitter_x64.rs b/lib/singlepass-backend/src/emitter_x64.rs index a40d5b5e5f0..0a6c04d08d6 100644 --- a/lib/singlepass-backend/src/emitter_x64.rs +++ b/lib/singlepass-backend/src/emitter_x64.rs @@ -1,5 +1,13 @@ use dynasmrt::{x64::Assembler, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi}; -pub use wasmer_runtime_core::state::x64::{GPR, XMM}; +use wasmer_runtime_core::backend::InlineBreakpointType; +pub use wasmer_runtime_core::state::x64_decl::{GPR, XMM}; + +fn _dummy(_a: &Assembler) { + dynasm!( + _a + ; .arch x64 + ); +} #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum Location { @@ -56,6 +64,7 @@ pub trait Emitter { fn get_label(&mut self) -> Self::Label; fn get_offset(&self) -> Self::Offset; + fn get_jmp_instr_size(&self) -> u8; fn emit_u64(&mut self, x: u64); @@ -186,6 +195,100 @@ pub trait Emitter { fn emit_call_location(&mut self, loc: Location); fn emit_bkpt(&mut self); + + fn emit_host_redirection(&mut self, target: GPR); + fn emit_inline_breakpoint(&mut self, ty: InlineBreakpointType); + + fn arch_has_itruncf(&self) -> bool { + false + } + fn arch_emit_i32_trunc_sf32(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i32_trunc_sf64(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i32_trunc_uf32(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i32_trunc_uf64(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i64_trunc_sf32(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i64_trunc_sf64(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i64_trunc_uf32(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + fn arch_emit_i64_trunc_uf64(&mut self, _src: XMM, _dst: GPR) { + unimplemented!() + } + + fn arch_has_fconverti(&self) -> bool { + false + } + fn arch_emit_f32_convert_si32(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f32_convert_si64(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f32_convert_ui32(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f32_convert_ui64(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_convert_si32(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_convert_si64(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_convert_ui32(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_convert_ui64(&mut self, _src: GPR, _dst: XMM) { + unimplemented!() + } + + fn arch_has_fneg(&self) -> bool { + false + } + fn arch_emit_f32_neg(&mut self, _src: XMM, _dst: XMM) { + unimplemented!() + } + fn arch_emit_f64_neg(&mut self, _src: XMM, _dst: XMM) { + unimplemented!() + } + + fn arch_has_xzcnt(&self) -> bool { + false + } + fn arch_emit_lzcnt(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!() + } + fn arch_emit_tzcnt(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!() + } + + fn arch_supports_canonicalize_nan(&self) -> bool { + true + } + + fn arch_requires_indirect_call_trampoline(&self) -> bool { + false + } + + fn arch_emit_indirect_call_with_trampoline(&mut self, _loc: Location) { + unimplemented!() + } + + // Emits entry trampoline just before the real function. + fn arch_emit_entry_trampoline(&mut self) {} } macro_rules! unop_gpr { @@ -528,6 +631,10 @@ impl Emitter for Assembler { self.offset() } + fn get_jmp_instr_size(&self) -> u8 { + 5 + } + fn emit_u64(&mut self, x: u64) { self.push_u64(x); } @@ -1159,4 +1266,17 @@ impl Emitter for Assembler { fn emit_bkpt(&mut self) { dynasm!(self ; int 0x3); } + + fn emit_host_redirection(&mut self, target: GPR) { + self.emit_jmp_location(Location::GPR(target)); + } + + fn emit_inline_breakpoint(&mut self, ty: InlineBreakpointType) { + dynasm!(self + ; ud2 + ; .byte 0x0f ; .byte (0xb9u8 as i8) // ud + ; int -1 + ; .byte (ty as u8 as i8) + ); + } } diff --git a/lib/singlepass-backend/src/lib.rs b/lib/singlepass-backend/src/lib.rs index ccea7502ee9..b920b56cdf3 100644 --- a/lib/singlepass-backend/src/lib.rs +++ b/lib/singlepass-backend/src/lib.rs @@ -14,6 +14,7 @@ #[cfg(not(any( all(target_os = "macos", target_arch = "x86_64"), all(target_os = "linux", target_arch = "x86_64"), + all(target_os = "linux", target_arch = "aarch64"), )))] compile_error!("This crate doesn't yet support compiling on operating systems other than linux and macos and architectures other than x86_64"); @@ -33,6 +34,8 @@ mod codegen_x64; mod emitter_x64; mod machine; pub mod protect_unix; +#[cfg(target_arch = "aarch64")] +mod translator_aarch64; pub use codegen_x64::X64FunctionCode as FunctionCodeGenerator; pub use codegen_x64::X64ModuleCodeGenerator as ModuleCodeGenerator; diff --git a/lib/singlepass-backend/src/translator_aarch64.rs b/lib/singlepass-backend/src/translator_aarch64.rs new file mode 100644 index 00000000000..0c2b7a6ecbf --- /dev/null +++ b/lib/singlepass-backend/src/translator_aarch64.rs @@ -0,0 +1,1869 @@ +#![allow(dead_code)] + +use crate::emitter_x64::*; +use dynasmrt::{aarch64::Assembler, AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi}; +use wasmer_runtime_core::backend::InlineBreakpointType; + +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +pub struct AX(pub u32); + +impl AX { + pub fn x(&self) -> u32 { + self.0 + } +} + +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +pub struct AV(pub u32); + +impl AV { + pub fn v(&self) -> u32 { + self.0 + } +} + +/* +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum GPR { + RAX, + RCX, + RDX, + RBX, + RSP, + RBP, + RSI, + RDI, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, +} + +#[repr(u8)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum XMM { + XMM0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7, +} +*/ + +pub fn map_gpr(gpr: GPR) -> AX { + use GPR::*; + + match gpr { + RAX => AX(0), + RCX => AX(1), + RDX => AX(2), + RBX => AX(3), + RSP => AX(28), + RBP => AX(5), + RSI => AX(6), + RDI => AX(7), + R8 => AX(8), + R9 => AX(9), + R10 => AX(10), + R11 => AX(11), + R12 => AX(12), + R13 => AX(13), + R14 => AX(14), + R15 => AX(15), + } +} + +pub fn map_xmm(xmm: XMM) -> AV { + use XMM::*; + + match xmm { + XMM0 => AV(0), + XMM1 => AV(1), + XMM2 => AV(2), + XMM3 => AV(3), + XMM4 => AV(4), + XMM5 => AV(5), + XMM6 => AV(6), + XMM7 => AV(7), + XMM8 => AV(8), + XMM9 => AV(9), + XMM10 => AV(10), + XMM11 => AV(11), + XMM12 => AV(12), + XMM13 => AV(13), + XMM14 => AV(14), + XMM15 => AV(15), + } +} + +pub fn get_aarch64_assembler() -> Assembler { + let a = Assembler::new().unwrap(); + dynasm!( + a + ; .arch aarch64 + ; .alias x_rsp, x28 + ; .alias x_tmp1, x27 + ; .alias w_tmp1, w27 + ; .alias x_tmp2, x26 + ; .alias w_tmp2, w26 + ; .alias x_tmp3, x25 + ; .alias w_tmp3, w25 + ; .alias d_tmp1, d28 + ; .alias d_tmp2, d27 + ; .alias v_tmp1, v28 + ; .alias v_tmp2, v27 + ); + a +} + +const X_TMP1: u32 = 27; +const X_TMP2: u32 = 26; +const X_TMP3: u32 = 25; +const V_TMP1: u32 = 28; +const V_TMP2: u32 = 27; + +macro_rules! binop_imm32_gpr { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::Imm32(src), Location::GPR(dst)) => { + dynasm!($assembler + ; b >after + ; data: + ; .dword src as i32 + ; after: + ; ldr w_tmp1, { + dynasm!($assembler + ; b >after + ; data: + ; .qword src as i64 + ; after: + ; ldr x_tmp1, $otherwise + } + }; +} + +macro_rules! binop_imm32_mem { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::Imm32(src), Location::Memory(dst, disp)) => { + if disp >= 0 { + dynasm!($assembler ; add x_tmp3, X(map_gpr(dst).x()), disp as u32); + } else { + dynasm!($assembler ; sub x_tmp3, X(map_gpr(dst).x()), (-disp) as u32); + } + dynasm!($assembler + ; b >after + ; data: + ; .dword src as i32 + ; after: + ; ldr w_tmp1, { + if disp >= 0 { + dynasm!($assembler ; add x_tmp3, X(map_gpr(dst).x()), disp as u32); + } else { + dynasm!($assembler ; sub x_tmp3, X(map_gpr(dst).x()), (-disp) as u32); + } + dynasm!($assembler + ; b >after + ; data: + ; .qword src as i64 + ; after: + ; ldr x_tmp1, $otherwise + } + }; +} + +macro_rules! binop_gpr_gpr { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + dynasm!($assembler + ; $ins W(map_gpr(dst).x()), W(map_gpr(dst).x()), W(map_gpr(src).x()) + ); + }, + (Size::S64, Location::GPR(src), Location::GPR(dst)) => { + dynasm!($assembler + ; $ins X(map_gpr(dst).x()), X(map_gpr(dst).x()), X(map_gpr(src).x()) + ); + }, + _ => $otherwise + } + }; +} + +macro_rules! binop_gpr_mem { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::GPR(src), Location::Memory(base, disp)) => { + if disp >= 0 { + dynasm!($assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!($assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, $otherwise + } + }; +} + +macro_rules! binop_mem_gpr { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::Memory(base, disp), Location::GPR(dst)) => { + if disp >= 0 { + dynasm!($assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!($assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, $otherwise + } + }; +} + +macro_rules! binop_all_nofp { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + binop_imm32_gpr!($ins, $assembler, $sz, $src, $dst, { + binop_imm32_mem!($ins, $assembler, $sz, $src, $dst, { + binop_gpr_gpr!($ins, $assembler, $sz, $src, $dst, { + binop_gpr_mem!($ins, $assembler, $sz, $src, $dst, { + binop_mem_gpr!($ins, $assembler, $sz, $src, $dst, $otherwise) + }) + }) + }) + }) + }; +} + +macro_rules! binop_shift { + ($ins:ident, $assembler:tt, $sz:expr, $src:expr, $dst:expr, $otherwise:block) => { + match ($sz, $src, $dst) { + (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => { + assert!(imm < 32); + dynasm!($assembler ; $ins W(map_gpr(dst).x()), W(map_gpr(dst).x()), imm as u32); + }, + (Size::S32, Location::Imm8(imm), Location::Memory(base, disp)) => { + assert!(imm < 32); + if disp >= 0 { + dynasm!($assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + dynasm!($assembler ; $ins W(map_gpr(dst).x()), W(map_gpr(dst).x()), W(map_gpr(GPR::RCX).x())); + }, + (Size::S32, Location::GPR(GPR::RCX), Location::Memory(base, disp)) => { + if disp >= 0 { + dynasm!($assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + assert!(imm < 32); + dynasm!($assembler ; $ins X(map_gpr(dst).x()), X(map_gpr(dst).x()), imm as u32); + }, + (Size::S64, Location::Imm8(imm), Location::Memory(base, disp)) => { + assert!(imm < 32); + if disp >= 0 { + dynasm!($assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + dynasm!($assembler ; $ins X(map_gpr(dst).x()), X(map_gpr(dst).x()), X(map_gpr(GPR::RCX).x())); + }, + (Size::S64, Location::GPR(GPR::RCX), Location::Memory(base, disp)) => { + if disp >= 0 { + dynasm!($assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, $otherwise + } + } +} + +macro_rules! avx_fn { + ($ins:ident, $width:ident, $width_int:ident, $name:ident) => { + fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match src2 { + XMMOrMemory::XMM(src2) => dynasm!(self ; $ins $width(map_xmm(dst).v()), $width(map_xmm(src1).v()), $width(map_xmm(src2).v())), + XMMOrMemory::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match src2 { + XMMOrMemory::XMM(src2) => { + dynasm!( + self + ; fcmpe $width(map_xmm(src1).v()), $width(map_xmm(src2).v()) + ; cset w_tmp1, $cmpty + ; mov V(map_xmm(dst).v()).$width[0], $width_int(X_TMP1) + ); + }, + XMMOrMemory::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + fn $name(&mut self, src1: XMM, _src2: XMMOrMemory, dst: XMM) { + dynasm!(self ; $ins $width(map_xmm(dst).v()), $width(map_xmm(src1).v())); + } + } +} + +macro_rules! avx_fn_cvt { + ($ins:ident, $width_src:ident, $width_dst:ident, $name:ident) => { + fn $name(&mut self, src1: XMM, _src2: XMMOrMemory, dst: XMM) { + dynasm!(self ; $ins $width_dst(map_xmm(dst).v()), $width_src(map_xmm(src1).v())); + } + } +} + +impl Emitter for Assembler { + type Label = DynamicLabel; + type Offset = AssemblyOffset; + + fn get_label(&mut self) -> DynamicLabel { + self.new_dynamic_label() + } + + fn get_offset(&self) -> AssemblyOffset { + self.offset() + } + + fn get_jmp_instr_size(&self) -> u8 { + 4 + } + + fn emit_u64(&mut self, x: u64) { + self.push_u64(x); + } + + fn emit_label(&mut self, label: Self::Label) { + dynasm!(self ; => label); + } + + fn emit_nop(&mut self) { + dynasm!(self ; nop); + } + + fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; mov W(map_gpr(dst).x()), W(map_gpr(src).x())); + } + (Size::S32, Location::Memory(base, disp), Location::GPR(dst)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, after; data: ; .dword x as i32; after: ; ldr w_tmp1, { + dynasm!(self ; b >after; data: ; .dword x as i32; after: ; ldr W(map_gpr(dst).x()), { + dynasm!(self ; b >after; data: ; .dword x as i32; after: ; ldr W(map_gpr(dst).x()), { + dynasm!(self ; mov X(map_gpr(dst).x()), X(map_gpr(src).x())); + } + (Size::S64, Location::Memory(base, disp), Location::GPR(dst)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, after; data: ; .qword x as i64; after: ; ldr x_tmp1, { + dynasm!(self ; b >after; data: ; .qword x as i64; after: ; ldr X(map_gpr(dst).x()), { + dynasm!(self ; b >after; data: ; .qword x as i64; after: ; ldr X(map_gpr(dst).x()), { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, after; data: ; .dword x as i32; after: ; ldr w_tmp1, { + dynasm!(self ; b >after; data: ; .dword x as u8 as i32; after: ; ldr W(map_gpr(dst).x()), { + dynasm!(self ; b >after; data: ; .dword x as u8 as i32; after: ; ldr W(map_gpr(dst).x()), { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, after; data: ; .dword x as i32; after: ; ldr w_tmp1, { + dynasm!(self ; b >after; data: ; .dword x as u16 as i32; after: ; ldr W(map_gpr(dst).x()), { + dynasm!(self ; b >after; data: ; .dword x as u16 as i32; after: ; ldr W(map_gpr(dst).x()), { + dynasm!(self ; fmov S(map_xmm(dst).v()), S(map_xmm(src).v())); + } + (Size::S32, Location::XMM(src), Location::GPR(dst)) => { + dynasm!(self ; fmov W(map_gpr(dst).x()), S(map_xmm(src).v())); + } + (Size::S32, Location::GPR(src), Location::XMM(dst)) => { + dynasm!(self ; fmov S(map_xmm(dst).v()), W(map_gpr(src).x())); + } + (Size::S32, Location::Memory(base, disp), Location::XMM(dst)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + dynasm!(self ; fmov D(map_xmm(dst).v()), D(map_xmm(src).v())); + } + (Size::S64, Location::XMM(src), Location::GPR(dst)) => { + dynasm!(self ; fmov X(map_gpr(dst).x()), D(map_xmm(src).v())); + } + (Size::S64, Location::GPR(src), Location::XMM(dst)) => { + dynasm!(self ; fmov D(map_xmm(dst).v()), X(map_gpr(src).x())); + } + (Size::S64, Location::Memory(base, disp), Location::XMM(dst)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, panic!("NOT IMPL: {:?} {:?} {:?}", sz, src, dst), + } + } + + fn emit_lea(&mut self, sz: Size, src: Location, dst: Location) { + match (sz, src, dst) { + (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + } + fn emit_lea_label(&mut self, label: Self::Label, dst: Location) { + match dst { + Location::GPR(dst) => { + dynasm!(self ; adr X(map_gpr(dst).x()), =>label); + } + _ => unreachable!(), + } + } + + fn emit_cdq(&mut self) { + dynasm!( + self + ; b >after + ; bit_tester: + ; .dword 0x80000000u32 as i32 + ; all_ones: + ; .dword 0xffffffffu32 as i32 + ; after: + ; ldr w_tmp1, zero + ; not_zero: + ; ldr W(map_gpr(GPR::RDX).x()), after + ; zero: + ; mov W(map_gpr(GPR::RDX).x()), wzr + ; after: + ); + } + fn emit_cqo(&mut self) { + dynasm!( + self + ; b >after + ; bit_tester: + ; .qword 0x8000000000000000u64 as i64 + ; all_ones: + ; .qword 0xffffffffffffffffu64 as i64 + ; after: + ; ldr x_tmp1, zero + ; not_zero: + ; ldr X(map_gpr(GPR::RDX).x()), after + ; zero: + ; mov X(map_gpr(GPR::RDX).x()), xzr + ; after: + ); + } + fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(eor, self, sz, src, dst, { unreachable!("xor") }); + } + fn emit_jmp(&mut self, condition: Condition, label: Self::Label) { + use Condition::*; + + match condition { + None => dynasm!(self ; b =>label), + Above => dynasm!(self ; b.hi =>label), + AboveEqual => dynasm!(self ; b.hs =>label), + Below => dynasm!(self ; b.lo =>label), + BelowEqual => dynasm!(self ; b.ls =>label), + Greater => dynasm!(self ; b.gt =>label), + GreaterEqual => dynasm!(self ; b.ge =>label), + Less => dynasm!(self ; b.lt =>label), + LessEqual => dynasm!(self ; b.le =>label), + Equal => dynasm!(self ; b.eq =>label), + NotEqual => dynasm!(self ; b.ne =>label), + Signed => dynasm!(self ; b.vs =>label), // TODO: Review this + } + } + + fn emit_jmp_location(&mut self, loc: Location) { + match loc { + Location::GPR(x) => dynasm!(self ; br X(map_gpr(x).x())), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + } + + fn emit_conditional_trap(&mut self, condition: Condition) { + use Condition::*; + + match condition { + None => dynasm!(self ; b >fail), + Above => dynasm!(self ; b.hi >fail), + AboveEqual => dynasm!(self ; b.hs >fail), + Below => dynasm!(self ; b.lo >fail), + BelowEqual => dynasm!(self ; b.ls >fail), + Greater => dynasm!(self ; b.gt >fail), + GreaterEqual => dynasm!(self ; b.ge >fail), + Less => dynasm!(self ; b.lt >fail), + LessEqual => dynasm!(self ; b.le >fail), + Equal => dynasm!(self ; b.eq >fail), + NotEqual => dynasm!(self ; b.ne >fail), + Signed => dynasm!(self ; b.vs >fail), // TODO: Review this + } + dynasm!( + self + ; b >ok + ; fail: + ; .dword 0 ; .dword 0 + ; ok: + ); + } + + fn emit_set(&mut self, condition: Condition, dst: GPR) { + use Condition::*; + + match condition { + None => dynasm!(self ; b >set), + Above => dynasm!(self ; b.hi >set), + AboveEqual => dynasm!(self ; b.hs >set), + Below => dynasm!(self ; b.lo >set), + BelowEqual => dynasm!(self ; b.ls >set), + Greater => dynasm!(self ; b.gt >set), + GreaterEqual => dynasm!(self ; b.ge >set), + Less => dynasm!(self ; b.lt >set), + LessEqual => dynasm!(self ; b.le >set), + Equal => dynasm!(self ; b.eq >set), + NotEqual => dynasm!(self ; b.ne >set), + Signed => dynasm!(self ; b.vs >set), // TODO: Review this + } + dynasm!( + self + ; mov W(map_gpr(dst).x()), wzr + ; b >ok + ; set: + ; mov W(map_gpr(dst).x()), 1 + ; ok: + ); + } + + fn emit_push(&mut self, sz: Size, src: Location) { + match (sz, src) { + (Size::S64, Location::Imm32(src)) => dynasm!(self + ; b >after + ; data: + ; .dword src as i32 + ; after: + ; ldr w_tmp1, dynasm!(self + ; sub x_rsp, x_rsp, 8 + ; str X(map_gpr(src).x()), [x_rsp] + ), + (Size::S64, Location::Memory(base, disp)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, panic!("push {:?} {:?}", sz, src), + } + } + fn emit_pop(&mut self, sz: Size, dst: Location) { + match (sz, dst) { + (Size::S64, Location::GPR(dst)) => dynasm!(self + ; ldr X(map_gpr(dst).x()), [x_rsp] + ; add x_rsp, x_rsp, 8 + ), + (Size::S64, Location::Memory(base, disp)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, panic!("pop {:?} {:?}", sz, dst), + } + } + fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) { + match (sz, left, right) { + (Size::S32, Location::Imm32(left), Location::GPR(right)) => { + dynasm!(self + ; b >after + ; data: + ; .dword left as i32 + ; after: + ; ldr w_tmp1, { + dynasm!(self + ; b >after + ; data: + ; .qword left as i64 + ; after: + ; ldr x_tmp1, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, after + ; data: + ; .dword left as i32 + ; after: + ; ldr w_tmp1, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, after + ; data: + ; .qword left as i64 + ; after: + ; ldr x_tmp1, dynasm!( + self + ; cmp W(map_gpr(right).x()), W(map_gpr(left).x()) + ), + (Size::S64, Location::GPR(left), Location::GPR(right)) => dynasm!( + self + ; cmp X(map_gpr(right).x()), X(map_gpr(left).x()) + ), + (Size::S32, Location::GPR(left), Location::Memory(base, disp)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + } + fn emit_add(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(add, self, sz, src, dst, { unreachable!("add") }); + } + fn emit_sub(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(sub, self, sz, src, dst, { unreachable!("sub") }); + } + + fn emit_imul(&mut self, sz: Size, src: Location, dst: Location) { + binop_gpr_gpr!(mul, self, sz, src, dst, { + binop_mem_gpr!(mul, self, sz, src, dst, { unreachable!() }) + }); + } + fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR) { + dynasm!( + self + ; b >after + ; data: + ; .dword src as i32 + ; after: + ; ldr w_tmp1, { + match divisor { + Location::GPR(x) => dynasm!( + self + ; mov w_tmp1, W(map_gpr(x).x()) + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + dynasm!( + self + ; mov w_tmp2, W(map_gpr(GPR::RAX).x()) + ; udiv W(map_gpr(GPR::RAX).x()), w_tmp2, w_tmp1 + ; msub W(map_gpr(GPR::RDX).x()), W(map_gpr(GPR::RAX).x()), w_tmp1, w_tmp2 + ) + } + Size::S64 => { + match divisor { + Location::GPR(x) => dynasm!( + self + ; mov x_tmp1, X(map_gpr(x).x()) + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + dynasm!( + self + ; mov x_tmp2, X(map_gpr(GPR::RAX).x()) + ; udiv X(map_gpr(GPR::RAX).x()), x_tmp2, x_tmp1 + ; msub X(map_gpr(GPR::RDX).x()), X(map_gpr(GPR::RAX).x()), x_tmp1, x_tmp2 + ) + } + _ => unreachable!(), + } + } + fn emit_idiv(&mut self, sz: Size, divisor: Location) { + match sz { + Size::S32 => { + match divisor { + Location::GPR(x) => dynasm!( + self + ; mov w_tmp1, W(map_gpr(x).x()) + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + dynasm!( + self + ; mov w_tmp2, W(map_gpr(GPR::RAX).x()) + ; sdiv W(map_gpr(GPR::RAX).x()), w_tmp2, w_tmp1 + ; msub W(map_gpr(GPR::RDX).x()), W(map_gpr(GPR::RAX).x()), w_tmp1, w_tmp2 + ) + } + Size::S64 => { + match divisor { + Location::GPR(x) => dynasm!( + self + ; mov x_tmp1, X(map_gpr(x).x()) + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + dynasm!( + self + ; mov x_tmp2, X(map_gpr(GPR::RAX).x()) + ; sdiv X(map_gpr(GPR::RAX).x()), x_tmp2, x_tmp1 + ; msub X(map_gpr(GPR::RDX).x()), X(map_gpr(GPR::RAX).x()), x_tmp1, x_tmp2 + ) + } + _ => unreachable!(), + } + } + fn emit_shl(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(lsl, self, sz, src, dst, { unreachable!("shl") }); + } + fn emit_shr(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(lsr, self, sz, src, dst, { unreachable!("shr") }); + } + fn emit_sar(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(asr, self, sz, src, dst, { unreachable!("sar") }); + } + fn emit_rol(&mut self, sz: Size, src: Location, dst: Location) { + // TODO: We are changing content of `src` (possibly RCX) here. Will this break any assumptions? + match sz { + Size::S32 => match src { + Location::Imm8(x) => { + assert!(x < 32); + binop_shift!(ror, self, sz, Location::Imm8(32 - x), dst, { + unreachable!("rol") + }); + } + Location::GPR(GPR::RCX) => { + dynasm!( + self + ; mov w_tmp1, 32 + ; sub W(map_gpr(GPR::RCX).x()), w_tmp1, W(map_gpr(GPR::RCX).x()) + ); + binop_shift!(ror, self, sz, src, dst, { unreachable!("rol") }); + } + _ => unreachable!(), + }, + Size::S64 => match src { + Location::Imm8(x) => { + assert!(x < 64); + binop_shift!(ror, self, sz, Location::Imm8(64 - x), dst, { + unreachable!("rol") + }); + } + Location::GPR(GPR::RCX) => { + dynasm!( + self + ; mov x_tmp1, 64 + ; sub X(map_gpr(GPR::RCX).x()), x_tmp1, X(map_gpr(GPR::RCX).x()) + ); + binop_shift!(ror, self, sz, src, dst, { unreachable!("rol") }); + } + _ => unreachable!(), + }, + _ => unreachable!(), + } + } + fn emit_ror(&mut self, sz: Size, src: Location, dst: Location) { + binop_shift!(ror, self, sz, src, dst, { unreachable!("ror") }); + } + fn emit_and(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(and, self, sz, src, dst, { unreachable!("and") }); + } + fn emit_or(&mut self, sz: Size, src: Location, dst: Location) { + binop_all_nofp!(orr, self, sz, src, dst, { unreachable!("or") }); + } + fn emit_bsr(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!("aarch64: bsr"); + } + fn emit_bsf(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!("aarch64: bsf"); + } + fn arch_has_xzcnt(&self) -> bool { + true + } + fn arch_emit_lzcnt(&mut self, sz: Size, src: Location, dst: Location) { + emit_clz_variant(self, sz, &src, &dst, false); + } + fn arch_emit_tzcnt(&mut self, sz: Size, src: Location, dst: Location) { + emit_clz_variant(self, sz, &src, &dst, true); + } + fn emit_neg(&mut self, _sz: Size, _value: Location) { + unimplemented!("aarch64: neg"); + } + fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location) { + match sz { + Size::S32 => { + match src { + Location::GPR(src) => dynasm!( + self + ; mov w_tmp1, W(map_gpr(src).x()) + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + match dst { + Location::GPR(dst) => { + dynasm!( + self + ; mov v_tmp1.S[0], w_tmp1 + ; cnt v_tmp1.B16, v_tmp1.B16 + ; mov w_tmp1, v_tmp1.S[0] + ; mov W(map_gpr(dst).x()), w_tmp1 + ; add W(map_gpr(dst).x()), W(map_gpr(dst).x()), w_tmp1, lsr 8 + ; add W(map_gpr(dst).x()), W(map_gpr(dst).x()), w_tmp1, lsr 16 + ; add W(map_gpr(dst).x()), W(map_gpr(dst).x()), w_tmp1, lsr 24 + ; and W(map_gpr(dst).x()), W(map_gpr(dst).x()), 255 + ); + } + _ => unreachable!(), + } + } + Size::S64 => { + match src { + Location::GPR(src) => dynasm!( + self + ; mov x_tmp1, X(map_gpr(src).x()) + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + match dst { + Location::GPR(dst) => { + dynasm!( + self + ; mov v_tmp1.D[0], x_tmp1 + ; cnt v_tmp1.B16, v_tmp1.B16 + ; mov x_tmp1, v_tmp1.D[0] + ; mov X(map_gpr(dst).x()), x_tmp1 + ; add X(map_gpr(dst).x()), X(map_gpr(dst).x()), x_tmp1, lsr 8 + ; add X(map_gpr(dst).x()), X(map_gpr(dst).x()), x_tmp1, lsr 16 + ; add X(map_gpr(dst).x()), X(map_gpr(dst).x()), x_tmp1, lsr 24 + ; add X(map_gpr(dst).x()), X(map_gpr(dst).x()), x_tmp1, lsr 32 + ; add X(map_gpr(dst).x()), X(map_gpr(dst).x()), x_tmp1, lsr 40 + ; add X(map_gpr(dst).x()), X(map_gpr(dst).x()), x_tmp1, lsr 48 + ; add X(map_gpr(dst).x()), X(map_gpr(dst).x()), x_tmp1, lsr 56 + ; and X(map_gpr(dst).x()), X(map_gpr(dst).x()), 255 + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } + } + fn emit_movzx(&mut self, sz_src: Size, src: Location, _sz_dst: Size, dst: Location) { + match (sz_src, src, dst) { + (Size::S8, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; uxtb W(map_gpr(dst).x()), W(map_gpr(src).x())); + } + (Size::S16, Location::GPR(src), Location::GPR(dst)) => { + dynasm!(self ; uxth W(map_gpr(dst).x()), W(map_gpr(src).x())); + } + (Size::S8, Location::Memory(base, disp), Location::GPR(dst)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + } + fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location) { + match (sz_src, src, sz_dst, dst) { + (Size::S8, Location::GPR(src), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; sxtb W(map_gpr(dst).x()), W(map_gpr(src).x())); + } + (Size::S16, Location::GPR(src), Size::S32, Location::GPR(dst)) => { + dynasm!(self ; sxth W(map_gpr(dst).x()), W(map_gpr(src).x())); + } + (Size::S8, Location::Memory(base, disp), Size::S32, Location::GPR(dst)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + dynasm!(self ; sxtb X(map_gpr(dst).x()), W(map_gpr(src).x())); + } + (Size::S16, Location::GPR(src), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; sxth X(map_gpr(dst).x()), W(map_gpr(src).x())); + } + (Size::S32, Location::GPR(src), Size::S64, Location::GPR(dst)) => { + dynasm!(self ; sxtw X(map_gpr(dst).x()), W(map_gpr(src).x())); + } + (Size::S8, Location::Memory(base, disp), Size::S64, Location::GPR(dst)) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + } + + fn emit_xchg(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!("aarch64: xchg") + } + fn emit_lock_xadd(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!("aarch64: xadd") + } + fn emit_lock_cmpxchg(&mut self, _sz: Size, _src: Location, _dst: Location) { + unimplemented!("aarch64: cmpxchg") + } + fn emit_vmovaps(&mut self, _src: XMMOrMemory, _dst: XMMOrMemory) { + unimplemented!("aarch64: vmovaps") + } + fn emit_vmovapd(&mut self, _src: XMMOrMemory, _dst: XMMOrMemory) { + unimplemented!("aarch64: vmovapd") + } + fn emit_vxorps(&mut self, _src1: XMM, _src2: XMMOrMemory, _dst: XMM) { + unimplemented!("aarch64: vxorps") + } + fn emit_vxorpd(&mut self, _src1: XMM, _src2: XMMOrMemory, _dst: XMM) { + unimplemented!("aarch64: vxorpd") + } + fn emit_vcmpunordss(&mut self, _src1: XMM, _src2: XMMOrMemory, _dst: XMM) { + unimplemented!("aarch64: vcmpunordss") + } + fn emit_vcmpunordsd(&mut self, _src1: XMM, _src2: XMMOrMemory, _dst: XMM) { + unimplemented!("aarch64: vcmpunordsd") + } + + fn emit_vcmpordss(&mut self, _src1: XMM, _src2: XMMOrMemory, _dst: XMM) { + unimplemented!("aarch64: vcmpordss") + } + fn emit_vcmpordsd(&mut self, _src1: XMM, _src2: XMMOrMemory, _dst: XMM) { + unimplemented!("aarch64: vcmpordsd") + } + + fn emit_vblendvps(&mut self, _src1: XMM, _src2: XMMOrMemory, _mask: XMM, _dst: XMM) { + unimplemented!("aarch64: vblendvps") + } + fn emit_vblendvpd(&mut self, _src1: XMM, _src2: XMMOrMemory, _mask: XMM, _dst: XMM) { + unimplemented!("aarch64: vblendvpd") + } + + avx_fn!(fadd, S, W, emit_vaddss); + avx_fn!(fsub, S, W, emit_vsubss); + avx_fn!(fmul, S, W, emit_vmulss); + avx_fn!(fdiv, S, W, emit_vdivss); + avx_fn!(fmax, S, W, emit_vmaxss); + avx_fn!(fmin, S, W, emit_vminss); + avx_cmp!(gt, S, W, emit_vcmpgtss); + avx_cmp!(ge, S, W, emit_vcmpgess); + avx_cmp!(mi, S, W, emit_vcmpltss); + avx_cmp!(ls, S, W, emit_vcmpless); + avx_cmp!(eq, S, W, emit_vcmpeqss); + avx_cmp!(ne, S, W, emit_vcmpneqss); + avx_fn_unop!(fsqrt, S, emit_vsqrtss); + avx_fn_unop!(frintn, S, emit_vroundss_nearest); // to nearest with ties to even + avx_fn_unop!(frintm, S, emit_vroundss_floor); // toward minus infinity + avx_fn_unop!(frintp, S, emit_vroundss_ceil); // toward positive infinity + avx_fn_unop!(frintz, S, emit_vroundss_trunc); // toward zero + avx_fn_cvt!(fcvt, S, D, emit_vcvtss2sd); + + avx_fn!(fadd, D, X, emit_vaddsd); + avx_fn!(fsub, D, X, emit_vsubsd); + avx_fn!(fmul, D, X, emit_vmulsd); + avx_fn!(fdiv, D, X, emit_vdivsd); + avx_fn!(fmax, D, X, emit_vmaxsd); + avx_fn!(fmin, D, X, emit_vminsd); + avx_cmp!(gt, D, X, emit_vcmpgtsd); + avx_cmp!(ge, D, X, emit_vcmpgesd); + avx_cmp!(mi, D, X, emit_vcmpltsd); + avx_cmp!(ls, D, X, emit_vcmplesd); + avx_cmp!(eq, D, X, emit_vcmpeqsd); + avx_cmp!(ne, D, X, emit_vcmpneqsd); + avx_fn_unop!(fsqrt, D, emit_vsqrtsd); + avx_fn_unop!(frintn, D, emit_vroundsd_nearest); // to nearest with ties to even + avx_fn_unop!(frintm, D, emit_vroundsd_floor); // toward minus infinity + avx_fn_unop!(frintp, D, emit_vroundsd_ceil); // toward positive infinity + avx_fn_unop!(frintz, D, emit_vroundsd_trunc); // toward zero + avx_fn_cvt!(fcvt, D, S, emit_vcvtsd2ss); + + fn arch_has_itruncf(&self) -> bool { + true + } + fn arch_emit_i32_trunc_sf32(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzs W(map_gpr(dst).x()), S(map_xmm(src).v())); + } + fn arch_emit_i32_trunc_sf64(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzs W(map_gpr(dst).x()), D(map_xmm(src).v())); + } + fn arch_emit_i32_trunc_uf32(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzu W(map_gpr(dst).x()), S(map_xmm(src).v())); + } + fn arch_emit_i32_trunc_uf64(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzu W(map_gpr(dst).x()), D(map_xmm(src).v())); + } + fn arch_emit_i64_trunc_sf32(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzs X(map_gpr(dst).x()), S(map_xmm(src).v())); + } + fn arch_emit_i64_trunc_sf64(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzs X(map_gpr(dst).x()), D(map_xmm(src).v())); + } + fn arch_emit_i64_trunc_uf32(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzu X(map_gpr(dst).x()), S(map_xmm(src).v())); + } + fn arch_emit_i64_trunc_uf64(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzu X(map_gpr(dst).x()), D(map_xmm(src).v())); + } + + fn arch_has_fconverti(&self) -> bool { + true + } + fn arch_emit_f32_convert_si32(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; scvtf S(map_xmm(dst).v()), W(map_gpr(src).x())); + } + fn arch_emit_f32_convert_si64(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; scvtf S(map_xmm(dst).v()), X(map_gpr(src).x())); + } + fn arch_emit_f32_convert_ui32(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; ucvtf S(map_xmm(dst).v()), W(map_gpr(src).x())); + } + fn arch_emit_f32_convert_ui64(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; ucvtf S(map_xmm(dst).v()), X(map_gpr(src).x())); + } + fn arch_emit_f64_convert_si32(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; scvtf D(map_xmm(dst).v()), W(map_gpr(src).x())); + } + fn arch_emit_f64_convert_si64(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; scvtf D(map_xmm(dst).v()), X(map_gpr(src).x())); + } + fn arch_emit_f64_convert_ui32(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; ucvtf D(map_xmm(dst).v()), W(map_gpr(src).x())); + } + fn arch_emit_f64_convert_ui64(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; ucvtf D(map_xmm(dst).v()), X(map_gpr(src).x())); + } + + fn arch_has_fneg(&self) -> bool { + true + } + fn arch_emit_f32_neg(&mut self, src: XMM, dst: XMM) { + dynasm!(self ; fneg S(map_xmm(dst).v()), S(map_xmm(src).v())); + } + fn arch_emit_f64_neg(&mut self, src: XMM, dst: XMM) { + dynasm!(self ; fneg D(map_xmm(dst).v()), D(map_xmm(src).v())); + } + + fn emit_btc_gpr_imm8_32(&mut self, _src: u8, _dst: GPR) { + unimplemented!(); + } + fn emit_btc_gpr_imm8_64(&mut self, _src: u8, _dst: GPR) { + unimplemented!(); + } + fn emit_cmovae_gpr_32(&mut self, _src: GPR, _dst: GPR) { + unimplemented!(); + } + fn emit_cmovae_gpr_64(&mut self, _src: GPR, _dst: GPR) { + unimplemented!(); + } + fn emit_ucomiss(&mut self, _src: XMMOrMemory, _dst: XMM) { + unimplemented!(); + } + fn emit_ucomisd(&mut self, _src: XMMOrMemory, _dst: XMM) { + unimplemented!(); + } + fn emit_cvttss2si_32(&mut self, _src: XMMOrMemory, _dst: GPR) { + unimplemented!(); + } + fn emit_cvttss2si_64(&mut self, _src: XMMOrMemory, _dst: GPR) { + unimplemented!(); + } + fn emit_cvttsd2si_32(&mut self, _src: XMMOrMemory, _dst: GPR) { + unimplemented!(); + } + fn emit_cvttsd2si_64(&mut self, _src: XMMOrMemory, _dst: GPR) { + unimplemented!(); + } + fn emit_vcvtsi2ss_32(&mut self, _src1: XMM, _src2: GPROrMemory, _dst: XMM) { + unimplemented!(); + } + fn emit_vcvtsi2ss_64(&mut self, _src1: XMM, _src2: GPROrMemory, _dst: XMM) { + unimplemented!(); + } + fn emit_vcvtsi2sd_32(&mut self, _src1: XMM, _src2: GPROrMemory, _dst: XMM) { + unimplemented!(); + } + fn emit_vcvtsi2sd_64(&mut self, _src1: XMM, _src2: GPROrMemory, _dst: XMM) { + unimplemented!(); + } + fn emit_test_gpr_64(&mut self, _reg: GPR) { + unimplemented!(); + } + + fn emit_ud2(&mut self) { + dynasm!(self ; .dword 0 ; .dword 2) + } + fn emit_ret(&mut self) { + dynasm!(self + ; ldr x_tmp1, [x_rsp] + ; add x_rsp, x_rsp, 8 + ; br x_tmp1 + ); + } + fn emit_call_label(&mut self, label: Self::Label) { + dynasm!(self + ; b >after + ; addr: + ; .qword =>label // Is this the offset? + ; after: + + // Calculate the target address. + ; ldr x_tmp1, done + ; str x_tmp2, [x_rsp] + + // Jump. + ; br x_tmp1 + ; done: + ); + } + fn emit_call_location(&mut self, loc: Location) { + match loc { + Location::GPR(x) => dynasm!(self + // Push return address. + ; sub x_rsp, x_rsp, 8 + ; adr x_tmp1, >done + ; str x_tmp1, [x_rsp] + + // Jump. + ; br X(map_gpr(x).x()) + ; done: + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, done + ; str x_tmp1, [x_rsp] + + // Read memory. + ; ldr x_tmp1, [x_tmp3] + + // Jump. + ; br x_tmp1 + ; done: + ); + } + _ => unreachable!(), + } + } + + fn emit_bkpt(&mut self) { + dynasm!(self ; .dword 0 ; .dword 1) + } + + fn emit_host_redirection(&mut self, target: GPR) { + let target = map_gpr(target); + dynasm!( + self + ; sub sp, sp, 80 + ; str x30, [sp, 0] // LR + ; str X(target.x()), [sp, 8] + // Save callee-saved registers as required by x86-64 conventions. + ; str X(map_gpr(GPR::RBX).x()), [sp, 16] + ; str X(map_gpr(GPR::R12).x()), [sp, 24] + ; str X(map_gpr(GPR::R13).x()), [sp, 32] + ; str X(map_gpr(GPR::R14).x()), [sp, 40] + ; str X(map_gpr(GPR::R15).x()), [sp, 48] + ; str X(map_gpr(GPR::RBP).x()), [sp, 56] + ; str X(map_gpr(GPR::RSP).x()), [sp, 64] + ; adr x30, >after + + // Put parameters in correct order + ; sub sp, sp, 64 + ; str X(map_gpr(GPR::RDI).x()), [sp, 0] + ; str X(map_gpr(GPR::RSI).x()), [sp, 8] + ; str X(map_gpr(GPR::RDX).x()), [sp, 16] + ; str X(map_gpr(GPR::RCX).x()), [sp, 24] + ; str X(map_gpr(GPR::R8).x()), [sp, 32] + ; str X(map_gpr(GPR::R9).x()), [sp, 40] + ; ldr x0, [sp, 0] + ; ldr x1, [sp, 8] + ; ldr x2, [sp, 16] + ; ldr x3, [sp, 24] + ; ldr x4, [sp, 32] + ; ldr x5, [sp, 40] + ; add sp, sp, 64 + + // Branch to saved target + ; ldr x8, [sp, 8] + ; br x8 + + ; after: + ; ldr x30, [sp, 0] // LR + ; ldr X(map_gpr(GPR::RBX).x()), [sp, 16] + ; ldr X(map_gpr(GPR::R12).x()), [sp, 24] + ; ldr X(map_gpr(GPR::R13).x()), [sp, 32] + ; ldr X(map_gpr(GPR::R14).x()), [sp, 40] + ; ldr X(map_gpr(GPR::R15).x()), [sp, 48] + ; ldr X(map_gpr(GPR::RBP).x()), [sp, 56] + ; ldr X(map_gpr(GPR::RSP).x()), [sp, 64] + ; add sp, sp, 80 + + ; ldr x_tmp1, [x_rsp] + ; add x_rsp, x_rsp, 8 + ; br x_tmp1 + ); + } + + fn emit_inline_breakpoint(&mut self, ty: InlineBreakpointType) { + dynasm!(self + ; .dword 0 + ; .dword -1 + ; .dword (ty as u8 as i32) + ); + } + + fn arch_supports_canonicalize_nan(&self) -> bool { + false + } + + fn arch_requires_indirect_call_trampoline(&self) -> bool { + true + } + + fn arch_emit_indirect_call_with_trampoline(&mut self, loc: Location) { + match loc { + Location::GPR(x) => { + dynasm!(self + // Push return address. + ; sub x_rsp, x_rsp, 8 + ; adr x_tmp1, >done + ; str x_tmp1, [x_rsp] + ); + self.emit_host_redirection(x); + dynasm!(self ; done: ); + } + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, done + ; str x_tmp1, [x_rsp] + + // Read memory. + ; ldr X(map_gpr(GPR::RAX).x()), [x_tmp3] + ); + self.emit_host_redirection(GPR::RAX); + dynasm!(self ; done: ); + } + _ => unreachable!(), + } + } + + fn arch_emit_entry_trampoline(&mut self) { + dynasm!( + self + ; mov x18, x28 + ; mov x28, sp // WASM stack pointer + ; ldr x9, >v_65536 + ; sub sp, sp, x9 // Pre-allocate the WASM stack + ; sub x28, x28, 16 // for the last two arguments + + // Fixup param locations. + ; str x0, [sp, 0] + ; str x1, [sp, 8] + ; str x2, [sp, 16] + ; str x3, [sp, 24] + ; str x4, [sp, 32] + ; str x5, [sp, 40] + ; str x6, [x28, 0] + ; str x7, [x28, 8] + ; ldr X(map_gpr(GPR::RDI).x()), [sp, 0] + ; ldr X(map_gpr(GPR::RSI).x()), [sp, 8] + ; ldr X(map_gpr(GPR::RDX).x()), [sp, 16] + ; ldr X(map_gpr(GPR::RCX).x()), [sp, 24] + ; ldr X(map_gpr(GPR::R8).x()), [sp, 32] + ; ldr X(map_gpr(GPR::R9).x()), [sp, 40] + + ; str x19, [sp, 0] + ; str x20, [sp, 8] + ; str x21, [sp, 16] + ; str x22, [sp, 24] + ; str x23, [sp, 32] + ; str x24, [sp, 40] + ; str x25, [sp, 48] + ; str x26, [sp, 56] + ; str x27, [sp, 64] + ; str x18, [sp, 72] // previously x28 + ; str x29, [sp, 80] + ; str x30, [sp, 88] + + // return address + ; adr x20, >done + ; sub x28, x28, 8 + ; str x20, [x28] // Keep this consistent with RSP mapping in translator_aarch64 + + // Jump to target function! + ; b >real_entry + + ; done: + ; ldr x19, [sp, 0] + ; ldr x20, [sp, 8] + ; ldr x21, [sp, 16] + ; ldr x22, [sp, 24] + ; ldr x23, [sp, 32] + ; ldr x24, [sp, 40] + ; ldr x25, [sp, 48] + ; ldr x26, [sp, 56] + ; ldr x27, [sp, 64] + ; ldr x28, [sp, 72] + ; ldr x29, [sp, 80] + ; ldr x30, [sp, 88] + ; ldr x9, >v_65536 + ; add sp, sp, x9 // Resume stack pointer + ; br x30 // LR + + ; v_65536: + ; .qword 524288 + + ; real_entry: + ) + } +} + +fn emit_clz_variant( + assembler: &mut Assembler, + sz: Size, + src: &Location, + dst: &Location, + reversed: bool, +) { + match sz { + Size::S32 => { + match *src { + Location::GPR(src) => dynasm!( + assembler + ; mov w_tmp1, W(map_gpr(src).x()) + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + match *dst { + Location::GPR(dst) => { + if reversed { + dynasm!(assembler ; rbit w_tmp1, w_tmp1); + } + dynasm!( + assembler + ; clz W(map_gpr(dst).x()), w_tmp1 + ); + } + _ => unreachable!(), + } + } + Size::S64 => { + match *src { + Location::GPR(src) => dynasm!( + assembler + ; mov x_tmp1, X(map_gpr(src).x()) + ), + Location::Memory(base, disp) => { + if disp >= 0 { + dynasm!(assembler ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, unreachable!(), + } + match *dst { + Location::GPR(dst) => { + if reversed { + dynasm!(assembler ; rbit x_tmp1, x_tmp1) + } + dynasm!( + assembler + ; clz X(map_gpr(dst).x()), x_tmp1 + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} diff --git a/lib/spectests/tests/excludes.txt b/lib/spectests/tests/excludes.txt index 45310f2e8ff..a2742b0dcc2 100644 --- a/lib/spectests/tests/excludes.txt +++ b/lib/spectests/tests/excludes.txt @@ -306,6 +306,8 @@ llvm:skip:simd_binaryen.wast:*:unix # Module - caught panic Any singlepass:skip:simd.wast:* # SIMD not implemented singlepass:skip:simd_binaryen.wast:* # SIMD not implemented +singlepass:skip:atomic.wast:*:*:aarch64 # Threads not yet supported on singlepass + singlepass:fail:address.wast:192 # AssertTrap - expected trap, got Runtime:Error unknown error singlepass:fail:address.wast:194 # AssertTrap - expected trap, got [] singlepass:fail:address.wast:195 # AssertTrap - expected trap, got [] diff --git a/scripts/binary-name.sh b/scripts/binary-name.sh index 4ac8ef8e36b..0245690b726 100755 --- a/scripts/binary-name.sh +++ b/scripts/binary-name.sh @@ -5,9 +5,11 @@ initArch() { if [ -n "$WASMER_ARCH" ]; then ARCH="$WASMER_ARCH" fi + # If you modify this list, please also modify install.sh case $ARCH in amd64) ARCH="amd64";; x86_64) ARCH="amd64";; + aarch64) ARCH="arm64";; i386) ARCH="386";; *) echo "Architecture ${ARCH} is not supported by this installation script"; exit 1;; esac diff --git a/src/bin/wasmer.rs b/src/bin/wasmer.rs index d4fce7dec78..a08f7bd532b 100644 --- a/src/bin/wasmer.rs +++ b/src/bin/wasmer.rs @@ -21,6 +21,7 @@ use std::collections::HashMap; use structopt::{clap, StructOpt}; use wasmer::*; +#[cfg(feature = "backend-cranelift")] use wasmer_clif_backend::CraneliftCompiler; #[cfg(feature = "backend-llvm")] use wasmer_llvm_backend::{LLVMCompiler, LLVMOptions}; @@ -36,8 +37,6 @@ use wasmer_runtime_core::{ debug, loader::{Instance as LoadedInstance, LocalLoader}, }; -#[cfg(feature = "backend-singlepass")] -use wasmer_singlepass_backend::SinglePassCompiler; #[cfg(feature = "wasi")] use wasmer_wasi; @@ -62,7 +61,7 @@ enum CLIOptions { SelfUpdate, } -#[derive(Debug, StructOpt)] +#[derive(Debug, StructOpt, Clone)] struct PrestandardFeatures { /// Enable support for the SIMD proposal. #[structopt(long = "enable-simd")] @@ -117,7 +116,7 @@ pub struct LLVMCLIOptions { obj_file: Option, } -#[derive(Debug, StructOpt)] +#[derive(Debug, StructOpt, Clone)] struct Run { /// Disable the cache #[structopt(long = "disable-cache")] @@ -127,7 +126,8 @@ struct Run { #[structopt(parse(from_os_str))] path: PathBuf, - // Disable the cache + /// Name of the backend to use. (x86_64) + #[cfg(target_arch = "x86_64")] #[structopt( long = "backend", default_value = "cranelift", @@ -136,6 +136,16 @@ struct Run { )] backend: Backend, + /// Name of the backend to use. (aarch64) + #[cfg(target_arch = "aarch64")] + #[structopt( + long = "backend", + default_value = "singlepass", + case_insensitive = true, + possible_values = Backend::variants(), + )] + backend: Backend, + /// Invoke a specified function #[structopt(long = "invoke", short = "i")] invoke: Option, @@ -188,6 +198,14 @@ struct Run { #[structopt(long = "track-state")] track_state: bool, + // Enable the CallTrace middleware. + #[structopt(long = "call-trace")] + call_trace: bool, + + // Enable the BlockTrace middleware. + #[structopt(long = "block-trace")] + block_trace: bool, + /// The command name is a string that will override the first argument passed /// to the wasm program. This is used in wapm to provide nicer output in /// help commands and error messages of the running wasm program @@ -340,6 +358,7 @@ fn execute_wasi( env_vars: Vec<(&str, &str)>, module: wasmer_runtime_core::Module, mapped_dirs: Vec<(String, PathBuf)>, + _wasm_binary: &[u8], ) -> Result<(), String> { let args = if let Some(cn) = &options.command_name { [cn.clone()] @@ -381,7 +400,7 @@ fn execute_wasi( unsafe { run_tiering( module.info(), - &wasm_binary, + &_wasm_binary, if let Some(ref path) = options.resume { let mut f = File::open(path).unwrap(); let mut out: Vec = vec![]; @@ -396,12 +415,21 @@ fn execute_wasi( &import_object, start_raw, &mut instance, + options.backend, options .optimized_backends .iter() - .map(|&backend| -> Box Box + Send> { - Box::new(move || get_compiler_by_backend(backend).unwrap()) - }) + .map( + |&backend| -> (Backend, Box Box + Send>) { + let options = options.clone(); + ( + backend, + Box::new(move || { + get_compiler_by_backend(backend, &options).unwrap() + }), + ) + }, + ) .collect(), interactive_shell, )? @@ -411,19 +439,46 @@ fn execute_wasi( #[cfg(not(feature = "managed"))] { use wasmer_runtime::error::RuntimeError; - let result = start.call(); + #[cfg(unix)] + use wasmer_runtime_core::{ + fault::{pop_code_version, push_code_version}, + state::CodeVersion, + }; + + let result; + + #[cfg(unix)] + { + let cv_pushed = + if let Some(msm) = instance.module.runnable_module.get_module_state_map() { + push_code_version(CodeVersion { + baseline: true, + msm: msm, + base: instance.module.runnable_module.get_code().unwrap().as_ptr() as usize, + backend: options.backend, + }); + true + } else { + false + }; + result = start.call(); + if cv_pushed { + pop_code_version().unwrap(); + } + } + #[cfg(not(unix))] + { + result = start.call(); + } if let Err(ref err) = result { match err { RuntimeError::Trap { msg } => return Err(format!("wasm trap occured: {}", msg)), - #[cfg(feature = "wasi")] RuntimeError::Error { data } => { if let Some(error_code) = data.downcast_ref::() { std::process::exit(error_code.code as i32) } } - #[cfg(not(feature = "wasi"))] - RuntimeError::Error { .. } => (), } return Err(format!("error: {:?}", err)); } @@ -503,7 +558,7 @@ fn execute_wasm(options: &Run) -> Result<(), String> { .map_err(|e| format!("Can't convert from wast to wasm: {:?}", e))?; } - let compiler: Box = match get_compiler_by_backend(options.backend) { + let compiler: Box = match get_compiler_by_backend(options.backend, options) { Some(x) => x, None => return Err("the requested backend is not enabled".into()), }; @@ -692,6 +747,7 @@ fn execute_wasm(options: &Run) -> Result<(), String> { env_vars, module, mapped_dirs, + &wasm_binary, )?; } else { let import_object = wasmer_runtime_core::import::ImportObject::new(); @@ -714,11 +770,12 @@ fn execute_wasm(options: &Run) -> Result<(), String> { Some(fun) => fun, _ => "main", }; - instance + let result = instance .dyn_func(&invoke_fn) .map_err(|e| format!("{:?}", e))? .call(&args) .map_err(|e| format!("{:?}", e))?; + println!("main() returned: {:?}", result); } } @@ -847,13 +904,38 @@ fn validate(validate: Validate) { } } -fn get_compiler_by_backend(backend: Backend) -> Option> { +fn get_compiler_by_backend(backend: Backend, _opts: &Run) -> Option> { Some(match backend { #[cfg(feature = "backend-singlepass")] - Backend::Singlepass => Box::new(SinglePassCompiler::new()), + Backend::Singlepass => { + use wasmer_runtime_core::codegen::MiddlewareChain; + use wasmer_runtime_core::codegen::StreamingCompiler; + use wasmer_singlepass_backend::ModuleCodeGenerator as SinglePassMCG; + + let opts = _opts.clone(); + let middlewares_gen = move || { + let mut middlewares = MiddlewareChain::new(); + if opts.call_trace { + use wasmer_middleware_common::call_trace::CallTrace; + middlewares.push(CallTrace::new()); + } + if opts.block_trace { + use wasmer_middleware_common::block_trace::BlockTrace; + middlewares.push(BlockTrace::new()); + } + middlewares + }; + + let c: StreamingCompiler = + StreamingCompiler::new(middlewares_gen); + Box::new(c) + } #[cfg(not(feature = "backend-singlepass"))] Backend::Singlepass => return None, + #[cfg(feature = "backend-cranelift")] Backend::Cranelift => Box::new(CraneliftCompiler::new()), + #[cfg(not(feature = "backend-cranelift"))] + Backend::Cranelift => return None, #[cfg(feature = "backend-llvm")] Backend::LLVM => Box::new(LLVMCompiler::new()), #[cfg(not(feature = "backend-llvm"))]