rustc_codegen_cranelift icon indicating copy to clipboard operation
rustc_codegen_cranelift copied to clipboard

portable-atomic 0.3.19 fails to compile on aarch64 / Apple Silicon

Open teohhanhui opened this issue 2 years ago • 11 comments

   Compiling portable-atomic v0.3.19
{standard input}: Assembler messages:
{standard input}:80: Error: selected processor does not support `casp x6,x7,x4,x5,[x0]'
{standard input}:103: Error: selected processor does not support `caspa x6,x7,x4,x5,[x0]'
{standard input}:126: Error: selected processor does not support `caspl x6,x7,x4,x5,[x0]'
{standard input}:149: Error: selected processor does not support `caspal x6,x7,x4,x5,[x0]'
error: Failed to assemble `.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
       2:
       ldxp x0, x1, [x3]
       stxp w2, x0, x1, [x3]
       cbnz w2, 2b
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
       2:
       ldaxp x0, x1, [x3]
       stxp w2, x0, x1, [x3]
       cbnz w2, 2b
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
       2:
       ldaxp x0, x1, [x3]
       stlxp w2, x0, x1, [x3]
       cbnz w2, 2b
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x0, [x19, 0x10]
           ldr x6, [x19, 0x0]
           ldr x7, [x19, 0x8]
           ldr x4, [x19, 0x18]
           ldr x5, [x19, 0x20]
       casp x6, x7, x4, x5, [x0]
           str x6, [x19, 0x0]
           str x7, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x0, [x19, 0x10]
           ldr x6, [x19, 0x0]
           ldr x7, [x19, 0x8]
           ldr x4, [x19, 0x18]
           ldr x5, [x19, 0x20]
       caspa x6, x7, x4, x5, [x0]
           str x6, [x19, 0x0]
           str x7, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x0, [x19, 0x10]
           ldr x6, [x19, 0x0]
           ldr x7, [x19, 0x8]
           ldr x4, [x19, 0x18]
           ldr x5, [x19, 0x20]
       caspl x6, x7, x4, x5, [x0]
           str x6, [x19, 0x0]
           str x7, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x0, [x19, 0x10]
           ldr x6, [x19, 0x0]
           ldr x7, [x19, 0x8]
           ldr x4, [x19, 0x18]
           ldr x5, [x19, 0x20]
       caspal x6, x7, x4, x5, [x0]
           str x6, [x19, 0x0]
           str x7, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
           ldr x4, [x19, 0x8]
           ldr x5, [x19, 0x10]
           ldr x6, [x19, 0x18]
           ldr x7, [x19, 0x20]
       2:
       ldxp x0, x1, [x3]
       cmp x0, x4
       cset w2, ne
       cmp x1, x5
       cinc w2, w2, ne
       cbz w2, 3f
       stxp w2, x0, x1, [x3]
       cbnz w2, 2b
       b 4f
       3:
       stxp w2, x6, x7, [x3]
       cbnz w2, 2b
       4:
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
           ldr x4, [x19, 0x8]
           ldr x5, [x19, 0x10]
           ldr x6, [x19, 0x18]
           ldr x7, [x19, 0x20]
       2:
       ldaxp x0, x1, [x3]
       cmp x0, x4
       cset w2, ne
       cmp x1, x5
       cinc w2, w2, ne
       cbz w2, 3f
       stxp w2, x0, x1, [x3]
       cbnz w2, 2b
       b 4f
       3:
       stxp w2, x6, x7, [x3]
       cbnz w2, 2b
       4:
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
           ldr x4, [x19, 0x8]
           ldr x5, [x19, 0x10]
           ldr x6, [x19, 0x18]
           ldr x7, [x19, 0x20]
       2:
       ldxp x0, x1, [x3]
       cmp x0, x4
       cset w2, ne
       cmp x1, x5
       cinc w2, w2, ne
       cbz w2, 3f
       stlxp w2, x0, x1, [x3]
       cbnz w2, 2b
       b 4f
       3:
       stlxp w2, x6, x7, [x3]
       cbnz w2, 2b
       4:
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9
       .text
       
       
       .globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10
       .type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10,@function
       .section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10,"ax",@progbits
       __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10:
           stp fp, lr, [sp, #-32]!
           mov fp, sp
           str x19, [sp, #24]
           mov x19, x0
           ldr x3, [x19, 0x0]
           ldr x4, [x19, 0x8]
           ldr x5, [x19, 0x10]
           ldr x6, [x19, 0x18]
           ldr x7, [x19, 0x20]
       2:
       ldaxp x0, x1, [x3]
       cmp x0, x4
       cset w2, ne
       cmp x1, x5
       cinc w2, w2, ne
       cbz w2, 3f
       stlxp w2, x0, x1, [x3]
       cbnz w2, 2b
       b 4f
       3:
       stlxp w2, x6, x7, [x3]
       cbnz w2, 2b
       4:
           str x0, [x19, 0x0]
           str x1, [x19, 0x8]
           ldr x19, [sp, #24]
           ldp fp, lr, [sp], #32
           ret
       .size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10
       .text
       
       
       `

error: could not compile `portable-atomic` (lib) due to previous error

cargo build works fine.

(I'm on Apple M2 if that makes any difference...)

teohhanhui avatar Oct 22 '23 18:10 teohhanhui

What does as --version show?

bjorn3 avatar Oct 22 '23 18:10 bjorn3

$ as --version
GNU assembler version 2.39-15.fc38
Copyright (C) 2022 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or later.
This program has absolutely no warranty.
This assembler was configured for a target of `aarch64-redhat-linux'.

teohhanhui avatar Oct 22 '23 18:10 teohhanhui

Here is the relevant line with cargo clif build -vv:

Caused by:
  process didn't exit successfully: `CARGO=/home/teohhanhui/.rustup/toolchains/1.73.0-aarch64-unknown-linux-gnu/bin/cargo CARGO_CRATE_NAME=portable_atomic CARGO_MANIFEST_DIR=/home/teohhanhui/.cargo/registry/src/index.crates.io-6f17d22bba15001f/portable-atomic-0.3.19 CARGO_PKG_AUTHORS='' CARGO_PKG_DESCRIPTION='Portable atomic types including support for 128-bit atomics, atomic float, etc.
  ' CARGO_PKG_HOMEPAGE='' CARGO_PKG_LICENSE='Apache-2.0 OR MIT' CARGO_PKG_LICENSE_FILE='' CARGO_PKG_NAME=portable-atomic CARGO_PKG_README=README.md CARGO_PKG_REPOSITORY='https://github.com/taiki-e/portable-atomic' CARGO_PKG_RUST_VERSION=1.34 CARGO_PKG_VERSION=0.3.19 CARGO_PKG_VERSION_MAJOR=0 CARGO_PKG_VERSION_MINOR=3 CARGO_PKG_VERSION_PATCH=19 CARGO_PKG_VERSION_PRE='' LD_LIBRARY_PATH='/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps:/home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist/lib:/home/teohhanhui/.rustup/toolchains/nightly-2023-10-21-aarch64-unknown-linux-gnu/lib:/home/teohhanhui/.rustup/toolchains/1.73.0-aarch64-unknown-linux-gnu/lib' OUT_DIR=/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/build/portable-atomic-e80a5564de69f2ff/out /home/teohhanhui/.rustup/toolchains/nightly-2023-10-21-aarch64-unknown-linux-gnu/bin/rustc --crate-name portable_atomic --edition=2018 /home/teohhanhui/.cargo/registry/src/index.crates.io-6f17d22bba15001f/portable-atomic-0.3.19/src/lib.rs --error-format=json --json=diagnostic-rendered-ansi,artifacts,future-incompat --diagnostic-width=162 --crate-type lib --emit=dep-info,metadata,link -C embed-bitcode=no -C debuginfo=2 --cfg 'feature="default"' --cfg 'feature="fallback"' -C metadata=d450f53f36cb5f95 -C extra-filename=-d450f53f36cb5f95 --out-dir /home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps -L dependency=/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps --cap-lints warn -Cpanic=abort -Zpanic-abort-tests -Zcodegen-backend=/home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist/lib/librustc_codegen_cranelift.so --sysroot /home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist --cfg portable_atomic_nightly --cfg portable_atomic_new_atomic_intrinsics --cfg portable_atomic_unstable_strict_provenance_atomic_ptr --cfg portable_atomic_llvm15` (exit status: 1)

teohhanhui avatar Oct 22 '23 18:10 teohhanhui

Looks like it is necessary to pass -march=armv8-a+lse to the assembler to make it accept the casp family of instructions. As workaround you could add .arg("-march=armv8-a+lse") right after https://github.com/bjorn3/rustc_codegen_cranelift/blob/56c6c86661498c61a0f877e43c9e6aa928882fa1/src/global_asm.rs#L141

bjorn3 avatar Oct 22 '23 18:10 bjorn3

The workaround works. For the record, Apple M1 / M2 are armv8.5-a: https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L203

But is there a way for us to pass the correct -mcpu or -march here? If I understand correctly, there is no way for us to tell, right? Considering the inline asm might not be meant for only the current CPU (-mcpu=native)?

https://doc.rust-lang.org/reference/inline-assembly.html#rules-for-inline-assembly

  • The compiler cannot assume that the instructions in the asm are the ones that will actually end up executed.

    • This effectively means that the compiler must treat the asm! as a black box and only take the interface specification into account, not the instructions themselves.
    • Runtime code patching is allowed, via target-specific mechanisms.

teohhanhui avatar Oct 22 '23 19:10 teohhanhui

I presume it has to be based on the cpu and features from the target spec, the -Ctarget-cpu and -Ctarget-features cli flags as well as the #[target_feature(enable = "...")] on the function that contains the inline asm. Just like how the set of target features would be determined for regular functions.

bjorn3 avatar Oct 22 '23 19:10 bjorn3

Does this problem also exist in the main branch of portable-atomic? I hope https://github.com/taiki-e/portable-atomic/pull/98 (which switched from #[target_feature(enable = "...")] to .arch_extension directive) fixed the problem.

~~EDIT: Nah, I don't think that would still work as .arch_extension directive would not be used if FEAT_LSE is available at compile time. That said, I believe that always using it may help avoid problems.~~ EDIT: see https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400#issuecomment-1774599775

taiki-e avatar Oct 23 '23 02:10 taiki-e

cg_clif doesn't currently set target_feature = "lse" or any other target_feature cfg, so if portable-atomic's build script doesn't set portable_atomic_target_feature = "lse" .arch_extension should be used by portable-atomic. In any case thanks for pointing me to that asm directive. That will likely make it easier to fix this on the cg_clif side.

bjorn3 avatar Oct 23 '23 06:10 bjorn3

if portable-atomic's build script doesn't set portable_atomic_target_feature = "lse"

If the compiler version is greater than 1.61 or is nightly or compiler version detection fails, portable-atomic will not set that cfg. (That cfg is a hack for older compilers whose aarch64_target_feature is not stable.)

https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L355 https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L216

And I also confirmed that cg_clif does not set cfg(target_feature = "lse").

// no compile error even if compiled with RUSTFLAGS='-C target-feature=+lse'
#[cfg(target_feature = "lse")]
compile_error!("target_feature");
#[cfg(portable_atomic_target_feature = "lse")]
compile_error!("portable_atomic_target_feature");

So, I think the reason is that #[target_feature(enable = "...")] is used for a function for dynamic detection in portable-atomic 0.3.19.

Since it has already been replaced with .arch_extension directive in https://github.com/taiki-e/portable-atomic/pull/98, as said above, so the main branch of portable-atomic should not have this problem. (I confirmed that the error does not occur on aarch64-linux with RUSTFLAGS='-C target-feature=+lse'.)

taiki-e avatar Oct 23 '23 07:10 taiki-e

https://github.com/taiki-e/portable-atomic/pull/98 has been published in portable-atomic 1.5.0. (If you are using 0.3.x and cannot upgrade to 1.x for some reason, you can also get that change by using 0.3.20+ which is built on top of 1.x.)

taiki-e avatar Oct 23 '23 17:10 taiki-e

By the way, portable-atomic now tests compatibility with cg_clif in its CI (for x86_64, aarch64, riscv64). https://github.com/taiki-e/portable-atomic/commit/4c8156ae026632bb5025ed7f657f2911ecf9426b

taiki-e avatar Dec 01 '23 14:12 taiki-e