Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

portable-atomic 0.3.19 fails to compile on aarch64 / Apple Silicon #1400

Open
teohhanhui opened this issue Oct 22, 2023 · 11 comments
Open

portable-atomic 0.3.19 fails to compile on aarch64 / Apple Silicon #1400

teohhanhui opened this issue Oct 22, 2023 · 11 comments

Comments

@teohhanhui
Copy link

   Compiling portable-atomic v0.3.19
{standard input}: Assembler messages:
{standard input}:80: Error: selected processor does not support `casp x6,x7,x4,x5,[x0]'
{standard input}:103: Error: selected processor does not support `caspa x6,x7,x4,x5,[x0]'
{standard input}:126: Error: selected processor does not support `caspl x6,x7,x4,x5,[x0]'
{standard input}:149: Error: selected processor does not support `caspal x6,x7,x4,x5,[x0]'
error: Failed to assemble `.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
       2:
       ldxp x0, x1, [x3]
       stxp w2, x0, x1, [x3]
       cbnz w2, 2b
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
       2:
       ldaxp x0, x1, [x3]
       stxp w2, x0, x1, [x3]
       cbnz w2, 2b
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
       2:
       ldaxp x0, x1, [x3]
       stlxp w2, x0, x1, [x3]
       cbnz w2, 2b
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x0, [x19, 0x10]
           ldr x6, [x19, 0x0]
           ldr x7, [x19, 0x8]
           ldr x4, [x19, 0x18]
           ldr x5, [x19, 0x20]
       casp x6, x7, x4, x5, [x0]
           str x6, [x19, 0x0]
           str x7, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x0, [x19, 0x10]
           ldr x6, [x19, 0x0]
           ldr x7, [x19, 0x8]
           ldr x4, [x19, 0x18]
           ldr x5, [x19, 0x20]
       caspa x6, x7, x4, x5, [x0]
           str x6, [x19, 0x0]
           str x7, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x0, [x19, 0x10]
           ldr x6, [x19, 0x0]
           ldr x7, [x19, 0x8]
           ldr x4, [x19, 0x18]
           ldr x5, [x19, 0x20]
       caspl x6, x7, x4, x5, [x0]
           str x6, [x19, 0x0]
           str x7, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x0, [x19, 0x10]
           ldr x6, [x19, 0x0]
           ldr x7, [x19, 0x8]
           ldr x4, [x19, 0x18]
           ldr x5, [x19, 0x20]
       caspal x6, x7, x4, x5, [x0]
           str x6, [x19, 0x0]
           str x7, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
           ldr x4, [x19, 0x8]
           ldr x5, [x19, 0x10]
           ldr x6, [x19, 0x18]
           ldr x7, [x19, 0x20]
       2:
       ldxp x0, x1, [x3]
       cmp x0, x4
       cset w2, ne
       cmp x1, x5
       cinc w2, w2, ne
       cbz w2, 3f
       stxp w2, x0, x1, [x3]
       cbnz w2, 2b
       b 4f
       3:
       stxp w2, x6, x7, [x3]
       cbnz w2, 2b
       4:
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
           ldr x4, [x19, 0x8]
           ldr x5, [x19, 0x10]
           ldr x6, [x19, 0x18]
           ldr x7, [x19, 0x20]
       2:
       ldaxp x0, x1, [x3]
       cmp x0, x4
       cset w2, ne
       cmp x1, x5
       cinc w2, w2, ne
       cbz w2, 3f
       stxp w2, x0, x1, [x3]
       cbnz w2, 2b
       b 4f
       3:
       stxp w2, x6, x7, [x3]
       cbnz w2, 2b
       4:
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
           ldr x4, [x19, 0x8]
           ldr x5, [x19, 0x10]
           ldr x6, [x19, 0x18]
           ldr x7, [x19, 0x20]
       2:
       ldxp x0, x1, [x3]
       cmp x0, x4
       cset w2, ne
       cmp x1, x5
       cinc w2, w2, ne
       cbz w2, 3f
       stlxp w2, x0, x1, [x3]
       cbnz w2, 2b
       b 4f
       3:
       stlxp w2, x6, x7, [x3]
       cbnz w2, 2b
       4:
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
           ldr x4, [x19, 0x8]
           ldr x5, [x19, 0x10]
           ldr x6, [x19, 0x18]
           ldr x7, [x19, 0x20]
       2:
       ldaxp x0, x1, [x3]
       cmp x0, x4
       cset w2, ne
       cmp x1, x5
       cinc w2, w2, ne
       cbz w2, 3f
       stlxp w2, x0, x1, [x3]
       cbnz w2, 2b
       b 4f
       3:
       stlxp w2, x6, x7, [x3]
       cbnz w2, 2b
       4:
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10
       .text
       
       
       `

error: could not compile `portable-atomic` (lib) due to previous error

cargo build works fine.

(I'm on Apple M2 if that makes any difference...)

@bjorn3
Copy link
Member

bjorn3 commented Oct 22, 2023

What does as --version show?

@teohhanhui
Copy link
Author

$ as --version
GNU assembler version 2.39-15.fc38
Copyright (C) 2022 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or later.
This program has absolutely no warranty.
This assembler was configured for a target of `aarch64-redhat-linux'.

@teohhanhui
Copy link
Author

Here is the relevant line with cargo clif build -vv:

Caused by:
  process didn't exit successfully: `CARGO=/home/teohhanhui/.rustup/toolchains/1.73.0-aarch64-unknown-linux-gnu/bin/cargo CARGO_CRATE_NAME=portable_atomic CARGO_MANIFEST_DIR=/home/teohhanhui/.cargo/registry/src/index.crates.io-6f17d22bba15001f/portable-atomic-0.3.19 CARGO_PKG_AUTHORS='' CARGO_PKG_DESCRIPTION='Portable atomic types including support for 128-bit atomics, atomic float, etc.
  ' CARGO_PKG_HOMEPAGE='' CARGO_PKG_LICENSE='Apache-2.0 OR MIT' CARGO_PKG_LICENSE_FILE='' CARGO_PKG_NAME=portable-atomic CARGO_PKG_README=README.md CARGO_PKG_REPOSITORY='https://github.com/taiki-e/portable-atomic' CARGO_PKG_RUST_VERSION=1.34 CARGO_PKG_VERSION=0.3.19 CARGO_PKG_VERSION_MAJOR=0 CARGO_PKG_VERSION_MINOR=3 CARGO_PKG_VERSION_PATCH=19 CARGO_PKG_VERSION_PRE='' LD_LIBRARY_PATH='/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps:/home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist/lib:/home/teohhanhui/.rustup/toolchains/nightly-2023-10-21-aarch64-unknown-linux-gnu/lib:/home/teohhanhui/.rustup/toolchains/1.73.0-aarch64-unknown-linux-gnu/lib' OUT_DIR=/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/build/portable-atomic-e80a5564de69f2ff/out /home/teohhanhui/.rustup/toolchains/nightly-2023-10-21-aarch64-unknown-linux-gnu/bin/rustc --crate-name portable_atomic --edition=2018 /home/teohhanhui/.cargo/registry/src/index.crates.io-6f17d22bba15001f/portable-atomic-0.3.19/src/lib.rs --error-format=json --json=diagnostic-rendered-ansi,artifacts,future-incompat --diagnostic-width=162 --crate-type lib --emit=dep-info,metadata,link -C embed-bitcode=no -C debuginfo=2 --cfg 'feature="default"' --cfg 'feature="fallback"' -C metadata=d450f53f36cb5f95 -C extra-filename=-d450f53f36cb5f95 --out-dir /home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps -L dependency=/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps --cap-lints warn -Cpanic=abort -Zpanic-abort-tests -Zcodegen-backend=/home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist/lib/librustc_codegen_cranelift.so --sysroot /home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist --cfg portable_atomic_nightly --cfg portable_atomic_new_atomic_intrinsics --cfg portable_atomic_unstable_strict_provenance_atomic_ptr --cfg portable_atomic_llvm15` (exit status: 1)

@bjorn3
Copy link
Member

bjorn3 commented Oct 22, 2023

Looks like it is necessary to pass -march=armv8-a+lse to the assembler to make it accept the casp family of instructions. As workaround you could add .arg("-march=armv8-a+lse") right after https://github.com/bjorn3/rustc_codegen_cranelift/blob/56c6c86661498c61a0f877e43c9e6aa928882fa1/src/global_asm.rs#L141

@teohhanhui
Copy link
Author

The workaround works. For the record, Apple M1 / M2 are armv8.5-a: https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L203

But is there a way for us to pass the correct -mcpu or -march here? If I understand correctly, there is no way for us to tell, right? Considering the inline asm might not be meant for only the current CPU (-mcpu=native)?

https://doc.rust-lang.org/reference/inline-assembly.html#rules-for-inline-assembly

  • The compiler cannot assume that the instructions in the asm are the ones that will actually end up executed.

    • This effectively means that the compiler must treat the asm! as a black box and only take the interface specification into account, not the instructions themselves.
    • Runtime code patching is allowed, via target-specific mechanisms.

@bjorn3
Copy link
Member

bjorn3 commented Oct 22, 2023

I presume it has to be based on the cpu and features from the target spec, the -Ctarget-cpu and -Ctarget-features cli flags as well as the #[target_feature(enable = "...")] on the function that contains the inline asm. Just like how the set of target features would be determined for regular functions.

@taiki-e
Copy link
Member

taiki-e commented Oct 23, 2023

Does this problem also exist in the main branch of portable-atomic? I hope taiki-e/portable-atomic#98 (which switched from #[target_feature(enable = "...")] to .arch_extension directive) fixed the problem.

EDIT: Nah, I don't think that would still work as .arch_extension directive would not be used if FEAT_LSE is available at compile time. That said, I believe that always using it may help avoid problems. EDIT: see #1400 (comment)

@bjorn3
Copy link
Member

bjorn3 commented Oct 23, 2023

cg_clif doesn't currently set target_feature = "lse" or any other target_feature cfg, so if portable-atomic's build script doesn't set portable_atomic_target_feature = "lse" .arch_extension should be used by portable-atomic. In any case thanks for pointing me to that asm directive. That will likely make it easier to fix this on the cg_clif side.

@taiki-e
Copy link
Member

taiki-e commented Oct 23, 2023

if portable-atomic's build script doesn't set portable_atomic_target_feature = "lse"

If the compiler version is greater than 1.61 or is nightly or compiler version detection fails, portable-atomic will not set that cfg. (That cfg is a hack for older compilers whose aarch64_target_feature is not stable.)

https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L355
https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L216

And I also confirmed that cg_clif does not set cfg(target_feature = "lse").

// no compile error even if compiled with RUSTFLAGS='-C target-feature=+lse'
#[cfg(target_feature = "lse")]
compile_error!("target_feature");
#[cfg(portable_atomic_target_feature = "lse")]
compile_error!("portable_atomic_target_feature");

So, I think the reason is that #[target_feature(enable = "...")] is used for a function for dynamic detection in portable-atomic 0.3.19.

Since it has already been replaced with .arch_extension directive in taiki-e/portable-atomic#98, as said above, so the main branch of portable-atomic should not have this problem.
(I confirmed that the error does not occur on aarch64-linux with RUSTFLAGS='-C target-feature=+lse'.)

@taiki-e
Copy link
Member

taiki-e commented Oct 23, 2023

taiki-e/portable-atomic#98 has been published in portable-atomic 1.5.0. (If you are using 0.3.x and cannot upgrade to 1.x for some reason, you can also get that change by using 0.3.20+ which is built on top of 1.x.)

@taiki-e
Copy link
Member

taiki-e commented Dec 1, 2023

By the way, portable-atomic now tests compatibility with cg_clif in its CI (for x86_64, aarch64, riscv64). taiki-e/portable-atomic@4c8156a

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants