burn
burn copied to clipboard
WGPU error: Shader '' parsing error: Incompatible operands: LogicalAnd(vec4<bool>, _)
I am trying to enable wgpu onnx-tests, however, I am getting this strange error. All tests are passing for ndarray and tch.
#[cfg(feature = "backend-wgpu")]
pub type Backend = burn::backend::Wgpu;
#[cfg(all(
feature = "backend-ndarray",
not(feature = "backend-wgpu"),
not(feature = "backend-tch")
))]
pub type Backend = burn::backend::NdArray<f32>;
#[cfg(feature = "backend-tch")]
pub type Backend = burn::backend::LibTorch<f32>;
// Import the shared macro
use crate::include_models;
include_models!(and);
#[cfg(test)]
mod tests {
use super::*;
use burn::tensor::{Bool, Tensor, TensorData};
use crate::backend::Backend;
#[test]
fn and() {
let device = Default::default();
let model: and::Model<Backend> = and::Model::new(&device);
let input_x = Tensor::<Backend, 4, Bool>::from_bool(
TensorData::from([[[[false, false, true, true]]]]),
&device,
);
let input_y = Tensor::<Backend, 4, Bool>::from_bool(
TensorData::from([[[[false, true, false, true]]]]),
&device,
);
let output = model.forward(input_x, input_y).to_data();
let expected = TensorData::from([[[[false, false, false, true]]]]);
output.assert_eq(&expected, true);
}
}
The errors:
Running tests/test_mod.rs (/Users/dilshod/Projects/burn/target/debug/deps/test_mod-9ce59c654818100c)
running 242 tests
test abs::tests::abs ... ok
test argmin::tests::argmin_1d ... ok
thread 'and::tests::and' panicked at /Users/dilshod/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/wgpu-25.0.2/src/backend/wgpu_core.rs:1051:30:
wgpu error: Validation Error
Caused by:
In Device::create_shader_module
Shader '' parsing error: Incompatible operands: LogicalAnd(vec4<bool>, _)
Incompatible operands: LogicalAnd(vec4<bool>, _)
thread 'and::tests::and' panicked at /Users/dilshod/Projects/burn/crates/burn-fusion/src/stream/execution/ordering.rs:67:38:
index out of bounds: the len is 0 but the index is 0
stack backtrace:
0: 0x10660c504 - <std::sys::backtrace::BacktraceLock::print::DisplayBacktrace as core::fmt::Display>::fmt::h373e57e2286956dc
1: 0x106629b8c - core::fmt::write::h2c4a0b98b09e3b30
2: 0x1066097e4 - std::io::Write::write_fmt::h619de9749845ad1b
3: 0x10660c3b8 - std::sys::backtrace::BacktraceLock::print::h3eb1535b8d3666ca
4: 0x10660dbf4 - std::panicking::default_hook::{{closure}}::hf623c44b740b115f
5: 0x10660da08 - std::panicking::default_hook::h8875fb31ec87dfad
6: 0x105972300 - test::test_main_with_exit_callback::{{closure}}::h9fc2c419030d8caf
7: 0x10660e6b8 - std::panicking::rust_panic_with_hook::hdd8ceeeb04975c2b
8: 0x10660e2b0 - std::panicking::begin_panic_handler::{{closure}}::hdf417b72ab8ffff8
9: 0x10660c9b0 - std::sys::backtrace::__rust_end_short_backtrace::h507d79c50996742e
10: 0x10660df8c - __rustc[5224e6b81cd82a8f]::rust_begin_unwind
11: 0x1066ac40c - core::panicking::panic_fmt::h3505bfbec5a0b799
12: 0x1066ac58c - core::panicking::panic_bounds_check::he97faa96f3a18922
13: 0x104ec10e8 - <usize as core::slice::index::SliceIndex<[T]>>::index::h08a01d4dda84def2
14: 0x104efeaf4 - <alloc::vec::Vec<T,A> as core::ops::index::Index<I>>::index::h8133231da83a569a
15: 0x1047eb0c0 - burn_fusion::stream::execution::ordering::OrderedExecution<R>::execute_operations::h03e4085842520b45
16: 0x10487753c - burn_fusion::stream::queue::execution::QueueExecution<R>::execute_strategy::hafe9fbeedeb0a88d
17: 0x104877828 - burn_fusion::stream::queue::execution::QueueExecution<R>::run::h53d03db85150cb56
18: 0x104e522dc - burn_fusion::stream::queue::execution::<impl burn_fusion::stream::queue::base::OperationQueue<R>>::execute_block_optimization::h3e645412ef6ba306
19: 0x104e523f4 - burn_fusion::stream::queue::execution::<impl burn_fusion::stream::queue::base::OperationQueue<R>>::execute::hae34ad08a946a6a0
20: 0x105436088 - <burn_fusion::stream::multi::Segment<R> as burn_fusion::stream::execution::processor::StreamSegment<<R as burn_fusion::backend::FusionRuntime>::Optimization>>::execute::ha41c4290388252df
21: 0x105328478 - burn_fusion::stream::execution::processor::Processor<O>::explore::h4db7ac7fd82d56d4
22: 0x105328650 - burn_fusion::stream::execution::processor::Processor<O>::process::hed24618643f619cd
23: 0x10542e3dc - burn_fusion::stream::multi::MultiStream<R>::enqueue_operation::h45c2d082060551ff
24: 0x10542f850 - burn_fusion::stream::multi::MultiStream<R>::register::hd9421df4a66a5641
25: 0x104b888bc - burn_fusion::server::FusionServer<R>::register::h1231600d2c3674b5
26: 0x104d11728 - <burn_fusion::client::mutex::MutexFusionClient<R> as burn_fusion::client::base::FusionClient<R>>::register::hf0793f7b23591407
27: 0x105287c84 - <burn_fusion::tensor::FusionTensor<R> as core::ops::drop::Drop>::drop::h1406b9c98e3487ba
28: 0x10524ca2c - core::ptr::drop_in_place<burn_fusion::tensor::FusionTensor<burn_cubecl::fusion::FusionCubeRuntime<cubecl_wgpu::runtime::WgpuRuntime,u32>>>::h5db23c89770f4f66
29: 0x1053d3d68 - burn_fusion::ops::boolean::<impl burn_tensor::tensor::ops::bool_tensor::BoolTensorOps<burn_fusion::backend::Fusion<B>> for burn_fusion::backend::Fusion<B>>::bool_and::h6ddce8d278aefd95
30: 0x104d5c838 - burn_tensor::tensor::api::bool::<impl burn_tensor::tensor::api::base::Tensor<B,_,burn_tensor::tensor::api::kind::Bool>>::bool_and::h95a11262423f67a5
31: 0x104e24b90 - test_mod::and::and::Model<B>::forward::h63c4cc2e490584db
32: 0x105175664 - test_mod::and::tests::and::h9a16a7f7a4633156
33: 0x104f56920 - test_mod::and::tests::and::{{closure}}::hdd84aa58ae1c80f7
34: 0x10523cde8 - core::ops::function::FnOnce::call_once::hd53d4d969a716269
35: 0x10597673c - test::__rust_begin_short_backtrace::ha9b990352ecdf496
36: 0x1059759fc - test::run_test::{{closure}}::h2eb38d5b94f96a1f
37: 0x105945c9c - std::sys::backtrace::__rust_begin_short_backtrace::h0ed48394d7c0c8df
38: 0x105948d60 - core::ops::function::FnOnce::call_once{{vtable.shim}}::hf9d9c328caaab3b1
39: 0x106610ca8 - std::sys::pal::unix::thread::Thread::new::thread_start::h447d747a543e4adc
40: 0x1915ebc0c - __pthread_cond_wait
thread 'and::tests::and' panicked at library/core/src/panicking.rs:233:5:
panic in a destructor during cleanup
thread caused non-unwinding panic. aborting.
error: test failed, to rerun pass `--test test_mod`
Caused by:
process didn't exit successfully: `/Users/dilshod/Projects/burn/target/debug/deps/test_mod-9ce59c654818100c` (signal: 6, SIGABRT: process abort signal)
Running tests/test_record_type.rs (/Users/dilshod/Projects/burn/target/debug/deps/test_record_type-bdfb707a99fec346)
running 10 tests
CC @wingertge @nathanielsimard
After this PR merge: https://github.com/tracel-ai/burn/pull/3584
you can run:
[onnx-tests]% cargo test --features backend-wgpu
I can't seem to reproduce this, the shader looks correct and seems to run, but then it crashes with a STATUS_STACK_BUFFER_OVERRUN.
I got this error on Mac. Here are the details:
[mnist]% uname -a Darwin Mac.attlocal.net 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:30:55 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6031 arm64
It's on Apple M3 Max
The STATUS_STACK_BUFFER_OVERRUN still seems to be triggered by the backend, because it's not happening with ndarray. So there might be a second bug in there and the wgpu compile error is hiding the secondary issue, or it's the other way around and that's why I can't reproduce the wgpu issue.
@wingertge I got the logs
CUBECL_DEBUG_LOG=/Users/dilshod/Projects/burn/crates/burn-import/out/debug.log cargo test --features backend-wgpu
that's odd, it kinda looks like a bug in the WGSL compiler that causes type inference to fail.
Shader '' parsing error: Incompatible operands: LogicalAnd(vec4<bool>, _)
Makes it seem like it can't infer the type of the rhs, but they're both explicitly defined as vec4<bool>, and obviously logical and is supported for two boolean vectors.
let l_6 = vec4<bool>(l_5);
let l_7 = vec4<bool>(l_4);
let l_8 = l_7 && l_6;
So I'm not sure why it might fail specifically on mac, or if the buffer overrun is actually hiding the error on my end.
@louisfd, since you have Mac, can you check if this is failing for you too.
I'm not sure if vec4
https://compute.fornwall.net/#source=fn%20compute()%20-%3E%20f32%20%7B%0A%20%20let%20l_6%20%3D%20vec4%3Cbool%3E(true)%3B%0A%20%20let%20l_7%20%3D%20vec4%3Cbool%3E(true)%3B%0A%20%20let%20l_8%20%3D%20l_7%20%26%26%20l_6%3B%0A%20%20return%200.0%3B%0A%7D
I'm not sure if vec4 && vec4 is valid wgsl anyway, I remember seeing a bug in wgpu about incorrectly accepting it (though I can't fnd it anymore). Logical and is fine though.
https://compute.fornwall.net/#source=fn%20compute()%20-%3E%20f32%20%7B%0A%20%20let%20l_6%20%3D%20vec4%3Cbool%3E(true)%3B%0A%20%20let%20l_7%20%3D%20vec4%3Cbool%3E(true)%3B%0A%20%20let%20l_8%20%3D%20l_7%20%26%26%20l_6%3B%0A%20%20return%200.0%3B%0A%7D
If you have a mac, do you mind running?
cd crates/burn-import/onnx-tests
cargo test --features test-wgpu
Running on a mac at main gives:
running 242 tests
test add::tests::add_shape_with_scalar_and_shape ... ok
test argmin::tests::argmin_1d ... ok
test argmax::tests::argmax_1d ... ok
test argmin::tests::argmin ... FAILED
test add::tests::add_scalar_to_int_tensor_and_int_tensor_to_int_tensor ... FAILED
test argmax::tests::argmax ... FAILED
thread 'and::tests::and' panicked at /Users/arthurkb/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/wgpu-25.0.2/src/backend/wgpu_core.rs:1051:30:
wgpu error: Validation Error
Caused by:
In Device::create_shader_module
Shader '' parsing error: Incompatible operands: LogicalAnd(vec4<bool>, _)
Incompatible operands: LogicalAnd(vec4<bool>, _)
thread 'and::tests::and' panicked at /Users/arthurkb/Documents/Projects/burn/crates/burn-fusion/src/stream/execution/ordering.rs:67:38:
index out of bounds: the len is 0 but the index is 0
I have also seen STATUS_STACK_BUFFER_OVERRUN from wgpu on windows when there are wgsl compile errors.
CCing @nathanielsimard and @laggui, I am also getting this error when I am evaluating Yolo11x but passing with torch backend.
CCing @nathanielsimard and @laggui, I am also getting this error when I am evaluating Yolo11x but passing with torch backend.
There shouldn't be any shader error when using torch 😅 maybe the wrong backend is being configured?