Disable sse41, sse42, avx, avx2 on 32-bit

Open vbraun opened this issue 1 year ago • 0 comments
On a AMD Ryzen 5 3600 with Debian 12 32-bit, linbox fails to build with
make[1]: Leaving directory '/var/lib/buildbot/worker/sage_git/build/local/var/tmp/sage/build/linbox-1.7.0+sage-2024-05-18/src/linbox/algorithms/matrix-blas3'
Making all in polynomial-matrix
make[1]: Entering directory '/var/lib/buildbot/worker/sage_git/build/local/var/tmp/sage/build/linbox-1.7.0+sage-2024-05-18/src/linbox/algorithms/polynomial-matrix'
make[1]: Nothing to be done for 'all'.
make[1]: Leaving directory '/var/lib/buildbot/worker/sage_git/build/local/var/tmp/sage/build/linbox-1.7.0+sage-2024-05-18/src/linbox/algorithms/polynomial-matrix'
make[1]: Entering directory '/var/lib/buildbot/worker/sage_git/build/local/var/tmp/sage/build/linbox-1.7.0+sage-2024-05-18/src/linbox/algorithms'
/usr/bin/bash ../../libtool  --tag=CXX   --mode=compile g++ -std=gnu++11 -DHAVE_CONFIG_H -I. -I../..  -I../.. -DDISABLE_COMMENTATOR -O2 -march=native -mfpmath=sse -Wall -DNDEBUG -UDEBUG -DFFLAS_COMPILED -DFFPACK_COMPILED  -I/var/lib/buildbot/worker/sage_git/build/local/include -pthread -I/var/lib/buildbot/worker/sage_git/build/local/include  -I/var/lib/buildbot/worker/sage_git/build/local/include -I/var/lib/buildbot/worker/sage_git/build/local/include  -g -O2 -c -o diophantine-solver.lo diophantine-solver.C
libtool: compile:  g++ -std=gnu++11 -DHAVE_CONFIG_H -I. -I../.. -I../.. -DDISABLE_COMMENTATOR -O2 -march=native -mfpmath=sse -Wall -DNDEBUG -UDEBUG -DFFLAS_COMPILED -DFFPACK_COMPILED -I/var/lib/buildbot/worker/sage_git/build/local/include -pthread -I/var/lib/buildbot/worker/sage_git/build/local/include -I/var/lib/buildbot/worker/sage_git/build/local/include -I/var/lib/buildbot/worker/sage_git/build/local/include -g -O2 -c diophantine-solver.C  -fPIC -DPIC -o .libs/diophantine-solver.o
In file included from ../../linbox/matrix/dense-matrix.h:37,
                 from ../../linbox/solutions/methods.h:26,
                 from ../../linbox/algorithms/rational-solver.h:41,
                 from ../../linbox/algorithms/diophantine-solver.h:29,
                 from diophantine-solver.C:26:
../../linbox/vector/vector-traits.h:153:29: warning: 'template<class _Arg1, class _Arg2, class _Result> struct std::binary_function' is deprecated [-Wdeprecated-declarations]
  153 |                 public std::binary_function<const std::pair<size_t, Element>&, const std::pair<size_t, Element>&, bool > {
      |                             ^~~~~~~~~~~~~~~
In file included from /usr/include/c++/12/string:48,
                 from /usr/include/c++/12/bits/locale_classes.h:40,
                 from /usr/include/c++/12/bits/ios_base.h:41,
                 from /usr/include/c++/12/ios:42,
                 from /usr/include/c++/12/ostream:38,
                 from /usr/include/c++/12/iostream:39,
                 from ../../linbox/linbox-config.h:45,
                 from diophantine-solver.C:25:
/usr/include/c++/12/bits/stl_function.h:131:12: note: declared here
  131 |     struct binary_function
      |            ^~~~~~~~~~~~~~~
In file included from ../../linbox/algorithms/polynomial-matrix/fft.h:43,
                 from ../../linbox/algorithms/polynomial-matrix/matpoly-mult-fft-wordsize-fast.inl:33,
                 from ../../linbox/algorithms/polynomial-matrix/matpoly-mult-fft.h:369,
                 from ../../linbox/algorithms/polynomial-matrix/polynomial-matrix-domain.h:34,
                 from ../../linbox/algorithms/polynomial-matrix/order-basis.h:41,
                 from ../../linbox/algorithms/block-massey-domain.h:42,
                 from ../../linbox/algorithms/lifting-container.h:45,
                 from ../../linbox/algorithms/./dixon-solver/./dixon-solver-dense.inl:26,
                 from ../../linbox/algorithms/./dixon-solver/dixon-solver-dense.h:363,
                 from ../../linbox/algorithms/rational-solver.h:583:
../../linbox/algorithms/polynomial-matrix/fft-integral.inl: In member function 'void LinBox::FFT_base<Field, Simd, typename std::enable_if<(Field::is_elt_integral_v && typename Simd::is_same_element<Field>::value)>::type>::DIF_core_laststeps(Element*, size_t, size_t, const Element*, const Element*, const simd_vect_t&, const simd_vect_t&, LinBox::FFTSimdHelper<4>) const':
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:703:49: error: incomplete type 'Simd128<long long unsigned int>' {aka 'Simd128_impl<true, true, false, 8>'} used in nested name specifier
  703 |                         V7 = Simd128<uint64_t>::mulx (V4, Wp);
      |                                                 ^~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl: In member function 'void LinBox::FFT_base<Field, Simd, typename std::enable_if<(Field::is_elt_integral_v && typename Simd::is_same_element<Field>::value)>::type>::DIF_core_laststeps(Element*, size_t, size_t, const Element*, const Element*, const simd_vect_t&, const simd_vect_t&, LinBox::FFTSimdHelper<8>) const':
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:821:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
  821 |                         V3 = Simd256<uint64_t>::permute128<0x20> (V1, V2);
      |                                                 ^~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:822:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
  822 |                         V4 = Simd256<uint64_t>::permute128<0x31> (V1, V2);
      |                                                 ^~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:842:48: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
  842 |                         Q = Simd256<uint64_t>::mulx (V4, W2p);
      |                                                ^~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl: In member function 'void LinBox::FFT_base<Field, Simd, typename std::enable_if<(Field::is_elt_integral_v && typename Simd::is_same_element<Field>::value)>::type>::DIT_core_firststeps(Element*, size_t&, size_t&, const Element*&, const Element*&, const simd_vect_t&, const simd_vect_t&, LinBox::FFTSimdHelper<4>) const':
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1213:49: error: incomplete type 'Simd128<long long unsigned int>' {aka 'Simd128_impl<true, true, false, 8>'} used in nested name specifier
 1213 |                         T2 = Simd128<uint64_t>::mulx (T1, Wp);
      |                                                 ^~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl: In member function 'void LinBox::FFT_base<Field, Simd, typename std::enable_if<(Field::is_elt_integral_v && typename Simd::is_same_element<Field>::value)>::type>::DIT_core_firststeps(Element*, size_t&, size_t&, const Element*&, const Element*&, const simd_vect_t&, const simd_vect_t&, LinBox::FFTSimdHelper<8>) const':
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1356:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1356 |                         V3 = Simd256<uint64_t>::unpacklo_intrinsic(V6,V7);
      |                                                 ^~~~~~~~~~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1357:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1357 |                         V4 = Simd256<uint64_t>::unpackhi_intrinsic(V6,V7);
      |                                                 ^~~~~~~~~~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1373:48: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1373 |                         Q = Simd256<uint64_t>::mulx (V5, Wp);
      |                                                ^~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1382:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1382 |                         V6 = Simd256<uint64_t>::unpacklo_intrinsic (V2, V7);
      |                                                 ^~~~~~~~~~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1392:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1392 |                         V6 = Simd256<uint64_t>::unpacklo_intrinsic (V1, V2);
      |                                                 ^~~~~~~~~~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1393:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1393 |                         V7 = Simd256<uint64_t>::unpackhi_intrinsic (V1, V2);
      |                                                 ^~~~~~~~~~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1394:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1394 |                         V3 = Simd256<uint64_t>::permute128<0x20> (V6, V7);
      |                                                 ^~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1395:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1395 |                         V4 = Simd256<uint64_t>::permute128<0x31> (V6, V7);
      |                                                 ^~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1403:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1403 |                         V1 = Simd256<uint64_t>::permute128<0x20> (V3, V4);
      |                                                 ^~~~~~~~~~
../../linbox/algorithms/polynomial-matrix/fft-integral.inl:1404:49: error: incomplete type 'Simd256<long long unsigned int>' {aka 'Simd256_impl<true, true, false, 8>'} used in nested name specifier
 1404 |                         V2 = Simd256<uint64_t>::permute128<0x31> (V3, V4);
      |                                                 ^~~~~~~~~~
make[1]: *** [Makefile:680: diophantine-solver.lo] Error 1
make[1]: Leaving directory '/var/lib/buildbot/worker/sage_git/build/local/var/tmp/sage/build/linbox-1.7.0+sage-2024-05-18/src/linbox/algorithms'
make: *** [Makefile:716: all-recursive] Error 1
This is because the CPU supports sse41, 42, avx, and avx2 but simd128_int64.inl is only included on 64-bit. Most 32-bit machines nowadays are probably modern CPUs for CI and not >10 year old hardware.
Probably also makes not much sense to try to hardware-accelerate 64-bit on 32-bit OS. I'm proposing to just disable sse41+ on 32-bit.