XNNPACK (?) error 2 when compiling on raspberry pi 4 8gb
i was unable to build on rpi4 8gb (dietpi) due to what I believe were XNNPACK problems (pardon the lack of technical details):
In file included from /usr/include/c++/12/algorithm:60,
from /home/wuhei/dev/OnnxStream/src/sd.cpp:23:
In static member function ‘static constexpr _Tp* std::__copy_move<_IsMove, true, std::random_access_iterator_tag>::__copy_m(const _Tp*, const _Tp*, _Tp*) [with _Tp = int; bool _IsMove = false]’,
inlined from ‘constexpr _OI std::__copy_move_a2(_II, _II, _OI) [with bool _IsMove = false; _II = int*; _OI = int*]’ at /usr/include/c++/12/bits/stl_algobase.h:495:30,
inlined from ‘constexpr _OI std::__copy_move_a1(_II, _II, _OI) [with bool _IsMove = false; _II = int*; _OI = int*]’ at /usr/include/c++/12/bits/stl_algobase.h:522:42,
inlined from ‘constexpr _OI std::__copy_move_a(_II, _II, _OI) [with bool _IsMove = false; _II = int*; _OI = int*]’ at /usr/include/c++/12/bits/stl_algobase.h:529:31,
inlined from ‘constexpr _OI std::copy(_II, _II, _OI) [with _II = int*; _OI = int*]’ at /usr/include/c++/12/bits/stl_algobase.h:620:7,
inlined from ‘static _ForwardIterator std::__uninitialized_copy<true>::__uninit_copy(_InputIterator, _InputIterator, _ForwardIterator) [with _InputIterator = int*; _ForwardIterator = int*]’ at /usr/include/c++/12/bits/stl_uninitialized.h:147:27,
inlined from ‘_ForwardIterator std::uninitialized_copy(_InputIterator, _InputIterator, _ForwardIterator) [with _InputIterator = int*; _ForwardIterator = int*]’ at /usr/include/c++/12/bits/stl_uninitialized.h:185:15,
inlined from ‘constexpr _ForwardIterator std::__uninitialized_copy_a(_InputIterator, _InputIterator, _ForwardIterator, allocator<_Tp>&) [with _InputIterator = int*; _ForwardIterator = int*; _Tp = int]’ at /usr/include/c++/12/bits/stl_uninitialized.h:372:37,
inlined from ‘constexpr std::vector<_Tp, _Alloc>& std::vector<_Tp, _Alloc>::operator=(const std::vector<_Tp, _Alloc>&) [with _Tp = int; _Alloc = std::allocator<int>]’ at /usr/include/c++/12/bits/vector.tcc:251:35,
inlined from ‘ncnn::Mat prompt_solve(std::unordered_map<std::__cxx11::basic_string<char>, int>&, std::unordered_map<std::pair<std::__cxx11::basic_string<char>, std::__cxx11::basic_string<char> >, int, pair_hash>&, ncnn::Net&, std::string, onnxstream::tensor_vector<long int>*)’ at /home/wuhei/dev/OnnxStream/src/sd.cpp:2156:29:
/usr/include/c++/12/bits/stl_algobase.h:431:30: warning: ‘void* __builtin_memmove(void*, const void*, long unsigned int)’ specified bound 18446744073709551316 exceeds maximum object size 9223372036854775807 [-Wstringop-overflow=]
431 | __builtin_memmove(__result, __first, sizeof(_Tp) * _Num);
| ~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[ 90%] Building CXX object CMakeFiles/sd.dir/onnxstream.cpp.o
In file included from /usr/include/c++/12/bits/stl_algobase.h:64,
from /usr/include/c++/12/vector:60,
from /home/wuhei/dev/OnnxStream/src/onnxstream.h:4,
from /home/wuhei/dev/OnnxStream/src/onnxstream.cpp:1:
/usr/include/c++/12/bits/stl_pair.h: In instantiation of ‘constexpr std::pair<typename std::__strip_reference_wrapper<typename std::decay<_Tp>::type>::__type, typename std::__strip_reference_wrapper<typename std::decay<_Tp2>::type>::__type> std::make_pair(_T1&&, _T2&&) [with _T1 = float&; _T2 = float&; typename __strip_reference_wrapper<typename decay<_Tp2>::type>::__type = float; typename decay<_Tp2>::type = float; typename __strip_reference_wrapper<typename decay<_Tp>::type>::__type = float; typename decay<_Tp>::type = float]’:
/home/wuhei/dev/OnnxStream/src/onnxstream.cpp:3048:30: required from here
/usr/include/c++/12/bits/stl_pair.h:741:5: note: parameter passing for argument of type ‘std::pair<float, float>’ when C++17 is enabled changed to match C++14 in GCC 10.1
741 | make_pair(_T1&& __x, _T2&& __y)
| ^~~~~~~~~
[100%] Linking CXX executable sd
/usr/bin/ld: XNNPACK/libXNNPACK.a(packing.cc.o): in function `xnn_packed_stride_kai_qs4_weights_and_biases':
packing.cc:(.text+0x3a14): undefined reference to `kai_get_rhs_packed_stride_rhs_pack_kxn_qsi4cxp_qsu4cxs1s0'
/usr/bin/ld: XNNPACK/libXNNPACK.a(packing.cc.o): in function `xnn_pack_kai_qs4_weights_and_biases':
packing.cc:(.text+0x3a84): undefined reference to `kai_run_rhs_pack_kxn_qsi4cxp_qsu4cxs1s0'
/usr/bin/ld: packing.cc:(.text+0x3ab0): undefined reference to `kai_run_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0'
/usr/bin/ld: XNNPACK/libXNNPACK.a(packing.cc.o): in function `xnn_packed_stride_kai_qb4_weights_and_biases':
packing.cc:(.text+0x3af8): undefined reference to `kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0'
/usr/bin/ld: XNNPACK/libXNNPACK.a(packing.cc.o): in function `xnn_pack_kai_qb4_weights_and_biases':
packing.cc:(.text+0x3b9c): undefined reference to `kai_run_rhs_pack_kxn_qsi4c32p_qsu4c32s1s0'
/usr/bin/ld: packing.cc:(.text+0x3bdc): undefined reference to `kai_run_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0'
/usr/bin/ld: XNNPACK/libmicrokernels-prod.a(qp8-f32-qb4w-gemm-minmax-1x4c16s2-aarch64-neondot.c.o): in function `xnn_qp8_f32_qb4w_gemm_minmax_ukernel_1x4c16s2__aarch64_neondot':
qp8-f32-qb4w-gemm-minmax-1x4c16s2-aarch64-neondot.c:(.text+0x28): undefined reference to `kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p4x8_1x4x32_neon_dotprod'
/usr/bin/ld: XNNPACK/libmicrokernels-prod.a(qp8-f32-qb4w-gemm-minmax-1x8c16s2-aarch64-neondot.c.o): in function `xnn_qp8_f32_qb4w_gemm_minmax_ukernel_1x8c16s2__aarch64_neondot':
qp8-f32-qb4w-gemm-minmax-1x8c16s2-aarch64-neondot.c:(.text+0x28): undefined reference to `kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4c32p8x8_1x8x32_neon_dotprod'
/usr/bin/ld: XNNPACK/libmicrokernels-prod.a(qp8-f32-qb4w-gemm-minmax-8x4c16s2-mstep2-neoni8mm.c.o): in function `xnn_qp8_f32_qb4w_gemm_minmax_ukernel_8x4c16s2__neoni8mm_mstep2':
qp8-f32-qb4w-gemm-minmax-8x4c16s2-mstep2-neoni8mm.c:(.text+0x28): undefined reference to `kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4c32p4x8_8x4x32_neon_i8mm'
/usr/bin/ld: XNNPACK/libmicrokernels-prod.a(qp8-f32-qc4w-gemm-minmax-1x8c16s2-aarch64-neondot.c.o): in function `xnn_qp8_f32_qc4w_gemm_minmax_ukernel_1x8c16s2__aarch64_neondot':
qp8-f32-qc4w-gemm-minmax-1x8c16s2-aarch64-neondot.c:(.text+0x8): undefined reference to `kai_run_matmul_clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod'
/usr/bin/ld: XNNPACK/libmicrokernels-prod.a(qp8-f32-qc4w-gemm-minmax-8x8c16s2-mstep2-neoni8mm.c.o): in function `xnn_qp8_f32_qc4w_gemm_minmax_ukernel_8x8c16s2__neoni8mm_mstep2':
qp8-f32-qc4w-gemm-minmax-8x8c16s2-mstep2-neoni8mm.c:(.text+0x8): undefined reference to `kai_run_matmul_clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm'
/usr/bin/ld: XNNPACK/libmicrokernels-prod.a(x8-packq-aarch64-neon-f32qp8-u2.c.o): in function `xnn_x8_packq_f32qp8_ukernel__aarch64_neon_u2':
x8-packq-aarch64-neon-f32qp8-u2.c:(.text+0x0): undefined reference to `kai_run_lhs_quant_pack_qai8dxp_f32'
collect2: error: ld returned 1 exit status
gmake[2]: *** [CMakeFiles/sd.dir/build.make:117: sd] Error 1
gmake[1]: *** [CMakeFiles/Makefile2:111: CMakeFiles/sd.dir/all] Error 2
gmake: *** [Makefile:91: all] Error 2
I fixed it by adding -DXNNPACK_ENABLE_KLEIDIAI=OFF to src/CMakeLists.txt
I presume this was a fault on my side but maybe it can help someone else?
Thanks for reporting this.
I'll test it with a RPI as soon as possible.
Thanks, Vito
I just committed the fix.
The problem was caused by one of the latest commits, as part of the XNNPACK version update.
Thanks, Vito
Tried this out on a Pi 5
git clone https://github.com/vitoplantamura/OnnxStream.git
cd OnnxStream/src/
mkdir build && cd $_
cmake ..
cmake --build . --config Release
[ 0%] Building C object CMakeFiles/microkernels-prod.dir/src/f16-avgpool/f16-avgpool-9p8x-minmax-neonfp16arith-c8.c.o
/home/foo/OnnxStream/src/build/XNNPACK-source/src/f16-avgpool/f16-avgpool-9p8x-minmax-neonfp16arith-c8.c: In function ‘xnn_f16_avgpool_minmax_ukernel_9p8x__neonfp16arith_c8’:
/home/foo/OnnxStream/src/build/XNNPACK-source/src/f16-avgpool/f16-avgpool-9p8x-minmax-neonfp16arith-c8.c:30:66: error: passing argument 1 of ‘vld1q_dup_u16’ from incompatible pointer type [-Wincompatible-pointer-types]
30 | const float16x8_t vscale = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->scalar.scale));
| ^~~~~~~~~~~~~~~~~~~~~
| |
| const xnn_float16 *
In file included from /home/foo/OnnxStream/src/build/XNNPACK-source/src/f16-avgpool/f16-avgpool-9p8x-minmax-neonfp16arith-c8.c:8:
/usr/lib/gcc/aarch64-linux-gnu/14/include/arm_neon.h:13230:32: note: expected ‘const uint16_t *’ {aka ‘const short unsigned int *’} but argument is of type ‘const xnn_float16 *’
13230 | vld1q_dup_u16 (const uint16_t* __a)
| ~~~~~~~~~~~~~~~~^~~
/home/foo/OnnxStream/src/build/XNNPACK-source/src/f16-avgpool/f16-avgpool-9p8x-minmax-neonfp16arith-c8.c:31:64: error: passing argument 1 of ‘vld1q_dup_u16’ from incompatible pointer type [-Wincompatible-pointer-types]
31 | const float16x8_t vmin = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->scalar.min));
| ^~~~~~~~~~~~~~~~~~~
| |
| const xnn_float16 *
/usr/lib/gcc/aarch64-linux-gnu/14/include/arm_neon.h:13230:32: note: expected ‘const uint16_t *’ {aka ‘const short unsigned int *’} but argument is of type ‘const xnn_float16 *’
13230 | vld1q_dup_u16 (const uint16_t* __a)
| ~~~~~~~~~~~~~~~~^~~
/home/foo/OnnxStream/src/build/XNNPACK-source/src/f16-avgpool/f16-avgpool-9p8x-minmax-neonfp16arith-c8.c:32:64: error: passing argument 1 of ‘vld1q_dup_u16’ from incompatible pointer type [-Wincompatible-pointer-types]
32 | const float16x8_t vmax = vreinterpretq_f16_u16(vld1q_dup_u16(¶ms->scalar.max));
| ^~~~~~~~~~~~~~~~~~~
| |
| const xnn_float16 *
/usr/lib/gcc/aarch64-linux-gnu/14/include/arm_neon.h:13230:32: note: expected ‘const uint16_t *’ {aka ‘const short unsigned int *’} but argument is of type ‘const xnn_float16 *’
13230 | vld1q_dup_u16 (const uint16_t* __a)
| ~~~~~~~~~~~~~~~~^~~
/home/foo/OnnxStream/src/build/XNNPACK-source/src/f16-avgpool/f16-avgpool-9p8x-minmax-neonfp16arith-c8.c:300:25: error: passing argument 1 of ‘vst1_lane_u32’ from incompatible pointer type [-Wincompatible-pointer-types]
300 | vst1_lane_u32((uint16_t*) output, vreinterpret_u32_f16(vout_lo), 0); output = (xnn_float16*) output + 2;
| ^~~~~~~~~~~~~~~~~~
| |
| uint16_t * {aka short unsigned int *}
/usr/lib/gcc/aarch64-linux-gnu/14/include/arm_neon.h:22274:26: note: expected ‘uint32_t *’ {aka ‘unsigned int *’} but argument is of type ‘uint16_t *’ {aka ‘short unsigned int *’}
22274 | vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane)
| ~~~~~~~~~~^~~
gmake[5]: *** [CMakeFiles/microkernels-prod.dir/build.make:79: CMakeFiles/microkernels-prod.dir/src/f16-avgpool/f16-avgpool-9p8x-minmax-neonfp16arith-c8.c.o] Error 1
gmake[4]: *** [CMakeFiles/Makefile2:208: CMakeFiles/microkernels-prod.dir/all] Error 2
gmake[3]: *** [Makefile:136: all] Error 2
gmake[2]: *** [CMakeFiles/xnnpack_build.dir/build.make:86: xnnpack_build-prefix/src/xnnpack_build-stamp/xnnpack_build-build] Error 2
gmake[1]: *** [CMakeFiles/Makefile2:90: CMakeFiles/xnnpack_build.dir/all] Error 2
gmake: *** [Makefile:91: all] Error 2
hi @DavidBachmann ,
Your specific error should be caused by an outdated version of GCC. Can you try updating it?
Vito