rustc_codegen_cranelift
rustc_codegen_cranelift copied to clipboard
portable-atomic 0.3.19 fails to compile on aarch64 / Apple Silicon
Compiling portable-atomic v0.3.19
{standard input}: Assembler messages:
{standard input}:80: Error: selected processor does not support `casp x6,x7,x4,x5,[x0]'
{standard input}:103: Error: selected processor does not support `caspa x6,x7,x4,x5,[x0]'
{standard input}:126: Error: selected processor does not support `caspl x6,x7,x4,x5,[x0]'
{standard input}:149: Error: selected processor does not support `caspal x6,x7,x4,x5,[x0]'
error: Failed to assemble `.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x3, [x19, 0x0]
2:
ldxp x0, x1, [x3]
stxp w2, x0, x1, [x3]
cbnz w2, 2b
str x0, [x19, 0x0]
str x1, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n0
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x3, [x19, 0x0]
2:
ldaxp x0, x1, [x3]
stxp w2, x0, x1, [x3]
cbnz w2, 2b
str x0, [x19, 0x0]
str x1, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n1
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x3, [x19, 0x0]
2:
ldaxp x0, x1, [x3]
stlxp w2, x0, x1, [x3]
cbnz w2, 2b
str x0, [x19, 0x0]
str x1, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n2
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x0, [x19, 0x10]
ldr x6, [x19, 0x0]
ldr x7, [x19, 0x8]
ldr x4, [x19, 0x18]
ldr x5, [x19, 0x20]
casp x6, x7, x4, x5, [x0]
str x6, [x19, 0x0]
str x7, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n3
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x0, [x19, 0x10]
ldr x6, [x19, 0x0]
ldr x7, [x19, 0x8]
ldr x4, [x19, 0x18]
ldr x5, [x19, 0x20]
caspa x6, x7, x4, x5, [x0]
str x6, [x19, 0x0]
str x7, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n4
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x0, [x19, 0x10]
ldr x6, [x19, 0x0]
ldr x7, [x19, 0x8]
ldr x4, [x19, 0x18]
ldr x5, [x19, 0x20]
caspl x6, x7, x4, x5, [x0]
str x6, [x19, 0x0]
str x7, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n5
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x0, [x19, 0x10]
ldr x6, [x19, 0x0]
ldr x7, [x19, 0x8]
ldr x4, [x19, 0x18]
ldr x5, [x19, 0x20]
caspal x6, x7, x4, x5, [x0]
str x6, [x19, 0x0]
str x7, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n6
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x3, [x19, 0x0]
ldr x4, [x19, 0x8]
ldr x5, [x19, 0x10]
ldr x6, [x19, 0x18]
ldr x7, [x19, 0x20]
2:
ldxp x0, x1, [x3]
cmp x0, x4
cset w2, ne
cmp x1, x5
cinc w2, w2, ne
cbz w2, 3f
stxp w2, x0, x1, [x3]
cbnz w2, 2b
b 4f
3:
stxp w2, x6, x7, [x3]
cbnz w2, 2b
4:
str x0, [x19, 0x0]
str x1, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n7
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x3, [x19, 0x0]
ldr x4, [x19, 0x8]
ldr x5, [x19, 0x10]
ldr x6, [x19, 0x18]
ldr x7, [x19, 0x20]
2:
ldaxp x0, x1, [x3]
cmp x0, x4
cset w2, ne
cmp x1, x5
cinc w2, w2, ne
cbz w2, 3f
stxp w2, x0, x1, [x3]
cbnz w2, 2b
b 4f
3:
stxp w2, x6, x7, [x3]
cbnz w2, 2b
4:
str x0, [x19, 0x0]
str x1, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n8
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x3, [x19, 0x0]
ldr x4, [x19, 0x8]
ldr x5, [x19, 0x10]
ldr x6, [x19, 0x18]
ldr x7, [x19, 0x20]
2:
ldxp x0, x1, [x3]
cmp x0, x4
cset w2, ne
cmp x1, x5
cinc w2, w2, ne
cbz w2, 3f
stlxp w2, x0, x1, [x3]
cbnz w2, 2b
b 4f
3:
stlxp w2, x6, x7, [x3]
cbnz w2, 2b
4:
str x0, [x19, 0x0]
str x1, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n9
.text
.globl __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10
.type __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10,@function
.section .text.__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10,"ax",@progbits
__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10:
stp fp, lr, [sp, #-32]!
mov fp, sp
str x19, [sp, #24]
mov x19, x0
ldr x3, [x19, 0x0]
ldr x4, [x19, 0x8]
ldr x5, [x19, 0x10]
ldr x6, [x19, 0x18]
ldr x7, [x19, 0x20]
2:
ldaxp x0, x1, [x3]
cmp x0, x4
cset w2, ne
cmp x1, x5
cinc w2, w2, ne
cbz w2, 3f
stlxp w2, x0, x1, [x3]
cbnz w2, 2b
b 4f
3:
stlxp w2, x6, x7, [x3]
cbnz w2, 2b
4:
str x0, [x19, 0x0]
str x1, [x19, 0x8]
ldr x19, [sp, #24]
ldp fp, lr, [sp], #32
ret
.size __inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10, .-__inline_asm_portable_atomic__ee358c2b4b9105a6_cgu__0_n10
.text
`
error: could not compile `portable-atomic` (lib) due to previous error
cargo build works fine.
(I'm on Apple M2 if that makes any difference...)
What does as --version show?
$ as --version
GNU assembler version 2.39-15.fc38
Copyright (C) 2022 Free Software Foundation, Inc.
This program is free software; you may redistribute it under the terms of
the GNU General Public License version 3 or later.
This program has absolutely no warranty.
This assembler was configured for a target of `aarch64-redhat-linux'.
Here is the relevant line with cargo clif build -vv:
Caused by:
process didn't exit successfully: `CARGO=/home/teohhanhui/.rustup/toolchains/1.73.0-aarch64-unknown-linux-gnu/bin/cargo CARGO_CRATE_NAME=portable_atomic CARGO_MANIFEST_DIR=/home/teohhanhui/.cargo/registry/src/index.crates.io-6f17d22bba15001f/portable-atomic-0.3.19 CARGO_PKG_AUTHORS='' CARGO_PKG_DESCRIPTION='Portable atomic types including support for 128-bit atomics, atomic float, etc.
' CARGO_PKG_HOMEPAGE='' CARGO_PKG_LICENSE='Apache-2.0 OR MIT' CARGO_PKG_LICENSE_FILE='' CARGO_PKG_NAME=portable-atomic CARGO_PKG_README=README.md CARGO_PKG_REPOSITORY='https://github.com/taiki-e/portable-atomic' CARGO_PKG_RUST_VERSION=1.34 CARGO_PKG_VERSION=0.3.19 CARGO_PKG_VERSION_MAJOR=0 CARGO_PKG_VERSION_MINOR=3 CARGO_PKG_VERSION_PATCH=19 CARGO_PKG_VERSION_PRE='' LD_LIBRARY_PATH='/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps:/home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist/lib:/home/teohhanhui/.rustup/toolchains/nightly-2023-10-21-aarch64-unknown-linux-gnu/lib:/home/teohhanhui/.rustup/toolchains/1.73.0-aarch64-unknown-linux-gnu/lib' OUT_DIR=/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/build/portable-atomic-e80a5564de69f2ff/out /home/teohhanhui/.rustup/toolchains/nightly-2023-10-21-aarch64-unknown-linux-gnu/bin/rustc --crate-name portable_atomic --edition=2018 /home/teohhanhui/.cargo/registry/src/index.crates.io-6f17d22bba15001f/portable-atomic-0.3.19/src/lib.rs --error-format=json --json=diagnostic-rendered-ansi,artifacts,future-incompat --diagnostic-width=162 --crate-type lib --emit=dep-info,metadata,link -C embed-bitcode=no -C debuginfo=2 --cfg 'feature="default"' --cfg 'feature="fallback"' -C metadata=d450f53f36cb5f95 -C extra-filename=-d450f53f36cb5f95 --out-dir /home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps -L dependency=/home/teohhanhui/Projects/bowtienet/bowtie/target/debug/deps --cap-lints warn -Cpanic=abort -Zpanic-abort-tests -Zcodegen-backend=/home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist/lib/librustc_codegen_cranelift.so --sysroot /home/teohhanhui/Projects/bjorn3/rustc_codegen_cranelift/dist --cfg portable_atomic_nightly --cfg portable_atomic_new_atomic_intrinsics --cfg portable_atomic_unstable_strict_provenance_atomic_ptr --cfg portable_atomic_llvm15` (exit status: 1)
Looks like it is necessary to pass -march=armv8-a+lse to the assembler to make it accept the casp family of instructions. As workaround you could add .arg("-march=armv8-a+lse") right after https://github.com/bjorn3/rustc_codegen_cranelift/blob/56c6c86661498c61a0f877e43c9e6aa928882fa1/src/global_asm.rs#L141
The workaround works. For the record, Apple M1 / M2 are armv8.5-a: https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L203
But is there a way for us to pass the correct -mcpu or -march here? If I understand correctly, there is no way for us to tell, right? Considering the inline asm might not be meant for only the current CPU (-mcpu=native)?
https://doc.rust-lang.org/reference/inline-assembly.html#rules-for-inline-assembly
The compiler cannot assume that the instructions in the asm are the ones that will actually end up executed.
- This effectively means that the compiler must treat the asm! as a black box and only take the interface specification into account, not the instructions themselves.
- Runtime code patching is allowed, via target-specific mechanisms.
I presume it has to be based on the cpu and features from the target spec, the -Ctarget-cpu and -Ctarget-features cli flags as well as the #[target_feature(enable = "...")] on the function that contains the inline asm. Just like how the set of target features would be determined for regular functions.
Does this problem also exist in the main branch of portable-atomic? I hope https://github.com/taiki-e/portable-atomic/pull/98 (which switched from #[target_feature(enable = "...")] to .arch_extension directive) fixed the problem.
~~EDIT: Nah, I don't think that would still work as .arch_extension directive would not be used if FEAT_LSE is available at compile time. That said, I believe that always using it may help avoid problems.~~ EDIT: see https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400#issuecomment-1774599775
cg_clif doesn't currently set target_feature = "lse" or any other target_feature cfg, so if portable-atomic's build script doesn't set portable_atomic_target_feature = "lse" .arch_extension should be used by portable-atomic. In any case thanks for pointing me to that asm directive. That will likely make it easier to fix this on the cg_clif side.
if portable-atomic's build script doesn't set
portable_atomic_target_feature = "lse"
If the compiler version is greater than 1.61 or is nightly or compiler version detection fails, portable-atomic will not set that cfg. (That cfg is a hack for older compilers whose aarch64_target_feature is not stable.)
https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L355 https://github.com/taiki-e/portable-atomic/blob/35ca3edc081e37ea52844d8ea6f2b5361b4bbc75/build.rs#L216
And I also confirmed that cg_clif does not set cfg(target_feature = "lse").
// no compile error even if compiled with RUSTFLAGS='-C target-feature=+lse'
#[cfg(target_feature = "lse")]
compile_error!("target_feature");
#[cfg(portable_atomic_target_feature = "lse")]
compile_error!("portable_atomic_target_feature");
So, I think the reason is that #[target_feature(enable = "...")] is used for a function for dynamic detection in portable-atomic 0.3.19.
Since it has already been replaced with .arch_extension directive in https://github.com/taiki-e/portable-atomic/pull/98, as said above, so the main branch of portable-atomic should not have this problem.
(I confirmed that the error does not occur on aarch64-linux with RUSTFLAGS='-C target-feature=+lse'.)
https://github.com/taiki-e/portable-atomic/pull/98 has been published in portable-atomic 1.5.0. (If you are using 0.3.x and cannot upgrade to 1.x for some reason, you can also get that change by using 0.3.20+ which is built on top of 1.x.)
By the way, portable-atomic now tests compatibility with cg_clif in its CI (for x86_64, aarch64, riscv64). https://github.com/taiki-e/portable-atomic/commit/4c8156ae026632bb5025ed7f657f2911ecf9426b