rust-gpu icon indicating copy to clipboard operation
rust-gpu copied to clipboard

Failed to compile on aarch64-unknown-linux-gnu

Open Nehliin opened this issue 1 year ago • 5 comments

Expected Behaviour

It to compile successfully

System Info

  • Rust: 1.62.0
  • OS: aarch64-unknown-linux-gnu
  • GPU: none

Backtrace

Backtrace

 --- stderr
  thread 'rustc' panicked at 'called `Result::unwrap()` on an `Err` value: Custom { kind: InvalidData, error: "Invalid file size field in entry header (\"0644  1982\")" }', /mnt/disks/storage_disk/rust/cargo/git/checkouts/rust-gpu-e0a37a82a46176e6/116bf9c/crates/rustc_codegen_spirv/src/link.rs:525:35
  stack backtrace:
     0:     0xffff86127ca0 - std::backtrace_rs::backtrace::libunwind::trace::h648d14ad39db9f5f
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5
     1:     0xffff86127ca0 - std::backtrace_rs::backtrace::trace_unsynchronized::h80e5b63ff877aab3
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5
     2:     0xffff86127ca0 - std::sys_common::backtrace::_print_fmt::h932adf53ae94968c
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/sys_common/backtrace.rs:66:5
     3:     0xffff86127ca0 - <std::sys_common::backtrace::_print::DisplayBacktrace as core::fmt::Display>::fmt::ha9436005d9d5d0b4
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/sys_common/backtrace.rs:45:22
     4:     0xffff8617c900 - core::fmt::write::h1ce38ca39a011fbe
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/core/src/fmt/mod.rs:1194:17
     5:     0xffff8611a4bc - std::io::Write::write_fmt::h218238e7bc4dd547
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/io/mod.rs:1655:15
     6:     0xffff8612a798 - std::sys_common::backtrace::_print::h3b3966277c6e5cdf
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/sys_common/backtrace.rs:48:5
     7:     0xffff8612a798 - std::sys_common::backtrace::print::hd6ae1feb7e68fbd1
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/sys_common/backtrace.rs:35:9
     8:     0xffff8612a798 - std::panicking::default_hook::{{closure}}::hbf75d1a84a6e3ae1
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/panicking.rs:295:22
     9:     0xffff8612a3fc - std::panicking::default_hook::hbfbf632144e7acd6
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/panicking.rs:314:9
    10:     0xffff84f50f1c - rustc_codegen_spirv::__rustc_codegen_backend::{{closure}}::h52939a582f59f3b6
    11:     0xffff7ca17f1c - <alloc::boxed::Box<F,A> as core::ops::function::Fn<Args>>::call::h0fefec2037b9a1e3
    12:     0xffff7ca1ef98 - proc_macro::bridge::client::<impl proc_macro::bridge::Bridge>::enter::{{closure}}::{{closure}}::h1cc7e1ab5e8d9a05
    13:     0xffff8612aef8 - std::panicking::rust_panic_with_hook::hc2b832ef03c3b55e
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/panicking.rs:702:17
    14:     0xffff8612ad3c - std::panicking::begin_panic_handler::{{closure}}::ha0e3a421d0c2bfda
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/panicking.rs:588:13
    15:     0xffff86128148 - std::sys_common::backtrace::__rust_end_short_backtrace::h92ee4ff4607313a9
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/sys_common/backtrace.rs:138:18
    16:     0xffff8612aa88 - rust_begin_unwind
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/panicking.rs:584:5
    17:     0xffff860f6a70 - core::panicking::panic_fmt::h3f83846442da705e
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/core/src/panicking.rs:142:14
    18:     0xffff860f6b14 - core::result::unwrap_failed::h0c52b2a289501a24
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/core/src/result.rs:1785:5
    19:     0xffff8508395c - rustc_codegen_spirv::link::link::hc48135cd5b3a8647
    20:     0xffff84f4f39c - <rustc_codegen_spirv::SpirvCodegenBackend as rustc_codegen_ssa::traits::backend::CodegenBackend>::link::h95f0c769ed7a4ec6
    21:     0xffff86eff6a0 - <rustc_interface[eac5e8417c8637c9]::queries::Linker>::link
    22:     0xffff86da28e4 - rustc_span[4a959f8ef4519685]::with_source_map::<core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>, rustc_interface[eac5e8417c8637c9]::interface::create_compiler_and_run<core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>, rustc_driver[8f95f0e38abb57c4]::run_compiler::{closure#1}>::{closure#1}>
    23:     0xffff86d43c7c - <scoped_tls[634381164f613822]::ScopedKey<rustc_span[4a959f8ef4519685]::SessionGlobals>>::set::<rustc_interface[eac5e8417c8637c9]::interface::run_compiler<core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>, rustc_driver[8f95f0e38abb57c4]::run_compiler::{closure#1}>::{closure#0}, core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>>
    24:     0xffff86d64868 - std[c56c092363f4ade8]::sys_common::backtrace::__rust_begin_short_backtrace::<rustc_interface[eac5e8417c8637c9]::util::run_in_thread_pool_with_globals<rustc_interface[eac5e8417c8637c9]::interface::run_compiler<core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>, rustc_driver[8f95f0e38abb57c4]::run_compiler::{closure#1}>::{closure#0}, core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>>::{closure#0}, core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>>
    25:     0xffff86d65a40 - <<std[c56c092363f4ade8]::thread::Builder>::spawn_unchecked_<rustc_interface[eac5e8417c8637c9]::util::run_in_thread_pool_with_globals<rustc_interface[eac5e8417c8637c9]::interface::run_compiler<core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>, rustc_driver[8f95f0e38abb57c4]::run_compiler::{closure#1}>::{closure#0}, core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>>::{closure#0}, core[b005b901b8faa718]::result::Result<(), rustc_errors[e9bf8764f3e94d3f]::ErrorGuaranteed>>::{closure#1} as core[b005b901b8faa718]::ops::function::FnOnce<()>>::call_once::{shim:vtable#0}
    26:     0xffff86134538 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::h56c477bdbe6a2be7
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/alloc/src/boxed.rs:1858:9
    27:     0xffff86134538 - <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once::hb47603d4e399f69f
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/alloc/src/boxed.rs:1858:9
    28:     0xffff86134538 - std::sys::unix::thread::Thread::new::thread_start::hbb365f3870df3279
                                 at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/sys/unix/thread.rs:108:17
    29:     0xffff8de2f628 - start_thread
    30:     0xffff85ff001c - <unknown>
    31:                0x0 - <unknown>
  error: internal compiler error: unexpected panic

  note: the compiler unexpectedly panicked. this is a bug.

  note: we would appreciate a bug report: https://github.com/EmbarkStudios/rust-gpu/issues/new

  note: rustc 1.62.0-nightly (1f7fb6413 2022-04-10) running on aarch64-unknown-linux-gnu

  note: compiler flags: --crate-type lib --crate-type dylib -C opt-level=3 -C embed-bitcode=no -Z unstable-options -Z codegen-backend=/mnt/disks/storage_disk/ark/components/shader-builder-rust/../../target/ark-shader-builder-rust/release/librustc_codegen_spirv.so -C symbol-mangling-version=v0 -C llvm-args=--module-output=multiple --spirv-metadata=name-variables -C target-feature=+StorageImageWriteWithoutFormat,+RuntimeDescriptorArray,+ImageQuery,+ext:SPV_EXT_descriptor_indexing

  note: some of the compiler flags provided by cargo are hidden

  query stack during panic:
  end of query stack
  note: `rust-gpu` version 0.4.0-alpha.12

Nehliin avatar Aug 03 '22 13:08 Nehliin

These are both marked as fixed but even then, they're making me suspicious:

  • https://github.com/mdsteele/rust-ar/issues/6
  • https://github.com/mdsteele/rust-ar/issues/10

What's the name of the crate that failed to compile? If it's longer than 16 characters, what happens if you rename it to less than 16 characters? (feel free to try e.g. 13 or 7, just be sure)

eddyb avatar Aug 03 '22 13:08 eddyb

The crate which ran rust-gpu in it's build script was longer than 16 characters but I still seem to have the same problem even after making it shorter (8 long)

Nehliin avatar Aug 03 '22 14:08 Nehliin

The crate which ran rust-gpu in it's build script was longer than 16 characters

Sorry, no, I wasn't referring to the crate using Rust-GPU, but the shader crate being compiled with Rust-GPU (you posted an ICE output from a compilation, but I want the few lines below/above that in the output, that include "Compiling my-shaders-foo" or whatever the name is).

eddyb avatar Aug 03 '22 21:08 eddyb

I changed that as well to be a very short name (a-s). Here is the output above the previous backtrace:

     Finished release [optimized] target(s) in 0.04s
       Running `../../target/ark-shader-builder-rust/release/ark-shader-builder-rust --shaders-path ../shaders --out-entry-points-path /mnt/disks/storage_disk/ark/target/debug/build/ark-sman-7b3e1965247604c8/out/rust_shader_entry_points.rs --out-contents-path /mnt/disks/storage_disk/ark/target/debug/build/ark-sman-7b3e1965247604c8/out/rust_shader_contents.rs`
     Compiling a-s v0.1.0 (/mnt/disks/storage_disk/ark/components/shaders)

Nehliin avatar Aug 04 '22 11:08 Nehliin

Nevermind those ideas, if it is file length, it's already ~guaranteed over 16:

$ ar tv ~/.rustup/toolchains/nightly-x*64*/lib/rustlib/x*64*/lib/libcore-*.rlib
rw-r--r-- 0/0 55736376 Jan  1 02:00 1970 lib.rmeta
rw-r--r-- 0/0 3735096 Jan  1 02:00 1970 core-0e3656b1fda5fd7b.core.7728b167-cgu.0.rcgu.o

(the first hash, 0e3656b1fda5fd7b above, which I believe to be per-crate, not per-CGU, is already 16 long on its own)

eddyb avatar Aug 08 '22 05:08 eddyb

So I was able to run a build of the current main (805297146) via binfmt+qemu+podman: (warning: don't do this in your regular Rust-GPU checkout, make a new workdir, otherwise host Cargo will probably get very confused when finding the target dir with all the "aarch64 host" files)

$ git log --oneline | head -n1
805297146 Update to 0.4.0-alpha.14 (#905)
$ podman run -it --userns=keep-id -v $PWD:$PWD -w $PWD docker.io/arm64v8/rust
Trying to pull docker.io/arm64v8/rust:latest...
...
eddy@168e4dacc9b4:~$ cargo compiletest
info: syncing channel updates for 'nightly-2022-04-11-aarch64-unknown-linux-gnu'
...
  Downloaded ar v0.9.0
...
   Compiling rustc_codegen_spirv v0.4.0-alpha.14 (/home/eddy/Projects/rust-gpu-issue-889/crates/rustc_codegen_spirv)
   Compiling compiletests v0.0.0 (/home/eddy/Projects/rust-gpu-issue-889/tests)
    Finished release [optimized] target(s) in 42m 18s
     Running `target/release/compiletests`
...
   Compiling compiletests-deps-helper v0.0.0 (/home/eddy/Projects/rust-gpu-issue-889/tests/deps-helper)
    Finished dev [unoptimized + debuginfo] target(s) in 4m 05s

running 185 tests
...
test result: ok. 184 passed; 0 failed; 1 ignored; 0 measured; 0 filtered out; finished in 279.66s
eddy@168e4dacc9b4:~$ ar tv target/compiletest-deps/spirv-unknown-spv1.3/debug/deps/libcompiletests_deps_helper-*.rlib
--------- 0/0   1494 Jan  1 00:00 1970 lib.rmeta
rw-r--r-- 1000/100    116 Aug 15 16:15 2022 compiletests_deps_helper-11b751367bf20fcb.56c7h0wvqsscq974.rcgu.o

(omitted most of the noisy parts of the log with ...)

It's a bit on the slow side, but keep in mind that it includes the native build of spirv-tools-sys too. But yeah, it works, so to narrow this down we'll have to figure out what's different in your environment.

eddyb avatar Aug 15 '22 17:08 eddyb

Got a .rlib from @Nehliin and finally looked in the right place:

Fields
[  file name   ][  mtime   ][UID ][GID ][ mode ][  size  ] 
@eddyb's
/0              1660587966  1000  100   100644  116       `
@Nehliin's
/0              1660289552  516447758516447758100644  116       `

So the ar crate is willing to include UIDs/GIDs so large that they push other fields out of their place. Honestly, it should write into a [u8; 60] buffer on the stack first and detect overflow that way. (I think fields "touching" is fine, i.e. space is not a separator, just used instead of 0s)

I didn't even know UIDs/GIDs could be that large, but wikipedia has some interesting history:

POSIX requires the UID to be an integer type. Most Unix-like operating systems represent the UID as an unsigned integer. The size of UID values varies amongst different systems; some UNIX OS's used 15-bit values, allowing values up to 32767, while others such as Linux (before version 2.4) supported 16-bit UIDs, making 65536 unique IDs possible. The majority of modern Unix-like systems (e.g., Solaris-2.0 in 1990, Linux 2.4 in 2001) have switched to 32-bit UIDs

There's nothing aarch64-specific here, and that was entirely a red herring. It's just the cloud/container/etc. setup that @Nehliin is using (just GCP defaults?), and it should be possible to reproduce on any platform.

Two things are going wrong here:

  • we shouldn't be storing this information anyway (and we don't for lib.rmeta)
    • rustc doesn't do it (nor do ar/llvm-ar by default, they both require an U modifier) and that's why not everything is broken in such an high-UID/GID environment
    • the ar crate using Header::from_metadata for append_path/append_file is a hazardous default here - among other things, it will mess with reproducible builds because of the mtime field (cc @bjorn3)
  • the ar crate is being buggy about ar-incompatible UIDs/GIDs: it should either error or truncate
    • looks like GNU ar will keep only the first 6 characters, while llvm-ar only the last 6 characters (out of the base 10 representation of the UID/GID), so they're not even consistent:
$ touch foo.o
$ sudo chown 1999999992:3999999994 foo.o
$ ar qU gnu.a foo.o
ar: creating gnu.a
$ llvm-ar qU llvm.a foo.o
llvm-ar: warning: creating llvm.a
$ cat gnu.a
!<arch>
foo.o/          1660643250  199999399999100644  0         `
$ cat llvm.a
!<arch>
foo.o/          1660643250  999992999994644     0         `

eddyb avatar Aug 16 '22 09:08 eddyb

the ar crate using Header::from_metadata for append_path/append_file is a hazardous default here - among other things, it will mess with reproducible builds because of the mtime field (cc @bjorn3)

cg_clif uses Header::new followed by .append() instead. This enables reproducible builds as no permissions are set in the Header. By the way https://github.com/rust-lang/rust/pull/97485 will switch all in-tree codegen backends to a rust write of llvm's archive writer, which has an explicit flag for reproducible build that is unconditionally set by rustc. The writer impl can be found at https://github.com/rust-lang/ar_archive_writer (or the ar_archive_writer crate on crates.io). Both llvm and this rust rewrite truncate if it doesn't fit in 6 characters: https://github.com/rust-lang/ar_archive_writer/blob/532a0453ebea7b103546ce7c62cebeef3b54979e/src/archive_writer.rs#L56-L57

bjorn3 avatar Aug 16 '22 11:08 bjorn3

Instead of fixing this (pretty much in the way @bjorn3 described) with no documented repro, I decided to take another stab at https://github.com/EmbarkStudios/rust-gpu/issues/889#issuecomment-1215514515 but native x64 containers and --uidmap-ing instead:

$ podman run -it --uidmap 1999999997:0:1 --gidmap 1899999996:0:1 --user 1999999997:1899999996 -w /tmp docker.io/rust
1999999997@a87f7a319144:~$ id
uid=1999999997(1999999997) gid=1899999996(1899999996) groups=1899999996(1899999996)
1999999997@a87f7a319144:~$ git clone https://github.com/EmbarkStudios/rust-gpu
Cloning into 'rust-gpu'...
...
1999999997@a87f7a319144:~$ cd rust-gpu
1999999997@a87f7a319144:~/rust-gpu$ cargo compiletest
info: syncing channel updates for 'nightly-2022-04-11-x86_64-unknown-linux-gnu'
...
  Downloaded ar v0.9.0
...
   Compiling rustc_codegen_spirv v0.4.0-alpha.14 (/tmp/rust-gpu/crates/rustc_codegen_spirv)
   Compiling compiletests v0.0.0 (/tmp/rust-gpu/tests)
    Finished release [optimized] target(s) in 4m 06s
     Running `target/release/compiletests`
...)
   Compiling compiletests-deps-helper v0.0.0 (/tmp/rust-gpu/tests/deps-helper)
    Finished dev [unoptimized + debuginfo] target(s) in 18.97ss

running 185 testss
normalized stderr:
thread 'rustc' panicked at 'called `Result::unwrap()` on an `Err` value: Custom { kind: InvalidData, error: "Invalid file size field in entry header (/"100644  97/")" }', crates/rustc_codegen_spirv/src/link.rs:525:35
...
test result: FAILED. 18 passed; 166 failed; 1 ignored; 0 measured; 0 filtered out; finished in 7.11s

(again omitted most of the noisy parts of the log with ...)


Also, there's a way to find rlibs with the unnecessary metadata like UIDs/GIDs (when it doesn't cause ICEs):

$ find target -type f -name '*.rlib' -exec bash -c 'ar tv "{}" | grep -v 0/0 > /dev/null && (echo "{}:"; ar tv "{}"; echo)' ';'
target/compiletest-deps/spirv-unknown-spv1.3/debug/libcompiletests_deps_helper.rlib:
--------- 0/0   1494 Jan  1 02:00 1970 lib.rmeta
rw-r--r-- 1000/100    116 Aug 15 21:26 2022 compiletests_deps_helper-11b751367bf20fcb.56c7h0wvqsscq974.rcgu.o

target/compiletest-deps/spirv-unknown-spv1.3/debug/deps/libbitflags-8bcc6a30d8aa2e2e.rlib:
--------- 0/0  49469 Jan  1 02:00 1970 lib.rmeta
rw-r--r-- 1000/100    116 Aug 15 21:24 2022 bitflags-8bcc6a30d8aa2e2e.bitflags.f9f0adc0-cgu.0.rcgu.o

...

(I'm only seeing results from Rust-GPU's usage of the ar crate, none from normal rustc artifacts, which makes sense, otherwise they would also break in the high-UID/GID environments)

eddyb avatar Aug 17 '22 09:08 eddyb