xtensor icon indicating copy to clipboard operation
xtensor copied to clipboard

xt::equal fails for int64_t due to SIMD issues.

Open Ivorforce opened this issue 1 year ago • 1 comments

System info

macOS 13.7, intel. cmake version 3.30.5 (CMake; JetBrains IDE bundle; build 22) Apple clang version 15.0.0 (clang-1500.1.0.2.5)

Reproduction

#define XTENSOR_USE_XSIMD
#include <iostream>
#include <set>

#include "xtensor/xarray.hpp"
#include "xtensor/xtensor_forward.hpp"
#include "xtensor/xbuilder.hpp"
#include "xtensor/xio.hpp"

int main()
{
	xt::xarray<int64_t> x = xt::arange(50);
	xt::xarray<bool> res = xt::equal(x, x);

	std::cout << res << std::endl;
}

When removing XTENSOR_USE_XSIMD, it runs fine. It also works when using xt::xarray<bool> res = xt::cast<uint8_t>(xt::equal(x, y));. I'm pretty sure it fails for assignment to bool from all int types.

Error

====================[ Build | xtensor_tests | Release ]=========================
/Users/lukas/Applications/CLion.app/Contents/bin/cmake/mac/x64/bin/cmake --build /Users/lukas/dev/xtensor-tests/cmake-build-release --target xtensor_tests -j 14
[1/2] Building CXX object CMakeFiles/xtensor_tests.dir/main.cpp.o
FAILED: CMakeFiles/xtensor_tests.dir/main.cpp.o 
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/c++  -I/Users/lukas/dev/xtensor-tests/xtensor/include -I/Users/lukas/dev/xtensor-tests/xtl/include -I/Users/lukas/dev/xtensor-tests/xsimd/include -O3 -DNDEBUG -std=gnu++20 -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.2.sdk -mmacosx-version-min=13.7 -fcolor-diagnostics -MD -MT CMakeFiles/xtensor_tests.dir/main.cpp.o -MF CMakeFiles/xtensor_tests.dir/main.cpp.o.d -o CMakeFiles/xtensor_tests.dir/main.cpp.o -c /Users/lukas/dev/xtensor-tests/main.cpp
In file included from /Users/lukas/dev/xtensor-tests/main.cpp:5:
In file included from /Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xarray.hpp:19:
In file included from /Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xbuffer_adaptor.hpp:21:
In file included from /Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xstorage.hpp:21:
In file included from /Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xexception.hpp:24:
In file included from /Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xtensor_config.hpp:61:
In file included from /Users/lukas/dev/xtensor-tests/xsimd/include/xsimd/xsimd.hpp:67:
/Users/lukas/dev/xtensor-tests/xsimd/include/xsimd/types/xsimd_api.hpp:2253:9: error: no matching function for call to 'store'
        kernel::store<A>(src, dst, A {});
        ^~~~~~~~~~~~~~~~
/Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xcontainer.hpp:756:18: note: in instantiation of function template specialization 'xsimd::store_as<xsimd::sse4_1, long long>' requested here
        xt_simd::store_as(std::addressof(storage()[i]), e, align_mode());
                 ^
/Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xassign.hpp:784:25: note: in instantiation of function template specialization 'xt::xcontainer<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>>::store_simd<xsimd::aligned_mode, xsimd::batch_bool<long long>>' requested here
            e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
                        ^
/Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xassign.hpp:466:47: note: in instantiation of function template specialization 'xt::linear_assigner<true>::run<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>, xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
                linear_assigner<simd_assign>::run(de1, de2);
                                              ^
/Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xassign.hpp:488:20: note: in instantiation of function template specialization 'xt::xexpression_assigner_base<xt::xtensor_expression_tag>::assign_data<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>, xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        base_type::assign_data(e1, e2, trivial_broadcast);
                   ^
/Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xassign.hpp:226:44: note: in instantiation of function template specialization 'xt::xexpression_assigner<xt::xtensor_expression_tag>::assign_xexpression<xt::xexpression<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>>, xt::xexpression<xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>>' requested here
                xexpression_assigner<tag>::assign_xexpression(e1, e2);
                                           ^
/Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xsemantic.hpp:661:13: note: in instantiation of function template specialization 'xt::assign_xexpression<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>, xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        xt::assign_xexpression(*this, e);
            ^
/Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xsemantic.hpp:513:37: note: in instantiation of function template specialization 'xt::xcontainer_semantic<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>>::assign_xexpression<xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        return this->derived_cast().assign_xexpression(e);
                                    ^
/Users/lukas/dev/xtensor-tests/xtensor/include/xtensor/xarray.hpp:510:24: note: in instantiation of function template specialization 'xt::xsemantic_base<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>>::assign<xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        semantic_base::assign(e);
                       ^
/Users/lukas/dev/xtensor-tests/main.cpp:62:25: note: in instantiation of function template specialization 'xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>::xarray_container<xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        xt::xarray<bool> res = xt::equal(x, x);
                               ^
/Users/lukas/dev/xtensor-tests/xsimd/include/xsimd/arch/./generic/xsimd_generic_memory.hpp:518:27: note: candidate template ignored: could not match 'xsimd::sse4_1' against 'long long'
        XSIMD_INLINE void store(batch_bool<T, A> const& self, bool* mem, requires_arch<generic>) noexcept
                          ^
1 error generated.
ninja: build stopped: subcommand failed.

I'm not sure why it defaults to SSE4.1, but if I whack it down to core2 (macOS minimum) I still get a similar error:

====================[ Build | xtensor_tests | Release ]=========================
/Users/lukas/Applications/CLion.app/Contents/bin/cmake/mac/x64/bin/cmake --build /Users/lukas/dev/xtensor-tests/cmake-build-release --target xtensor_tests -j 14
[1/2] Building CXX object CMakeFiles/xtensor_tests.dir/main.cpp.o
FAILED: CMakeFiles/xtensor_tests.dir/main.cpp.o 
/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/c++ -DXTENSOR_USE_XSIMD -isystem /Users/lukas/dev/xtensor-tests/prefix/include -march=core2 -O3 -DNDEBUG -std=gnu++20 -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.2.sdk -mmacosx-version-min=13.7 -fcolor-diagnostics -MD -MT CMakeFiles/xtensor_tests.dir/main.cpp.o -MF CMakeFiles/xtensor_tests.dir/main.cpp.o.d -o CMakeFiles/xtensor_tests.dir/main.cpp.o -c /Users/lukas/dev/xtensor-tests/main.cpp
In file included from /Users/lukas/dev/xtensor-tests/main.cpp:4:
In file included from /Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xarray.hpp:19:
In file included from /Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xbuffer_adaptor.hpp:21:
In file included from /Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xstorage.hpp:21:
In file included from /Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xexception.hpp:24:
In file included from /Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xtensor_config.hpp:61:
In file included from /Users/lukas/dev/xtensor-tests/prefix/include/xsimd/xsimd.hpp:67:
/Users/lukas/dev/xtensor-tests/prefix/include/xsimd/types/xsimd_api.hpp:2253:9: error: no matching function for call to 'store'
        kernel::store<A>(src, dst, A {});
        ^~~~~~~~~~~~~~~~
/Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xcontainer.hpp:756:18: note: in instantiation of function template specialization 'xsimd::store_as<xsimd::ssse3, long long>' requested here
        xt_simd::store_as(std::addressof(storage()[i]), e, align_mode());
                 ^
/Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xassign.hpp:784:25: note: in instantiation of function template specialization 'xt::xcontainer<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>>::store_simd<xsimd::aligned_mode, xsimd::batch_bool<long long>>' requested here
            e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
                        ^
/Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xassign.hpp:466:47: note: in instantiation of function template specialization 'xt::linear_assigner<true>::run<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>, xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
                linear_assigner<simd_assign>::run(de1, de2);
                                              ^
/Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xassign.hpp:488:20: note: in instantiation of function template specialization 'xt::xexpression_assigner_base<xt::xtensor_expression_tag>::assign_data<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>, xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        base_type::assign_data(e1, e2, trivial_broadcast);
                   ^
/Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xassign.hpp:226:44: note: in instantiation of function template specialization 'xt::xexpression_assigner<xt::xtensor_expression_tag>::assign_xexpression<xt::xexpression<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>>, xt::xexpression<xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>>' requested here
                xexpression_assigner<tag>::assign_xexpression(e1, e2);
                                           ^
/Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xsemantic.hpp:661:13: note: in instantiation of function template specialization 'xt::assign_xexpression<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>, xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        xt::assign_xexpression(*this, e);
            ^
/Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xsemantic.hpp:513:37: note: in instantiation of function template specialization 'xt::xcontainer_semantic<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>>::assign_xexpression<xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        return this->derived_cast().assign_xexpression(e);
                                    ^
/Users/lukas/dev/xtensor-tests/prefix/include/xtensor/xarray.hpp:510:24: note: in instantiation of function template specialization 'xt::xsemantic_base<xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>>::assign<xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        semantic_base::assign(e);
                       ^
/Users/lukas/dev/xtensor-tests/main.cpp:12:25: note: in instantiation of function template specialization 'xt::xarray_container<xt::uvector<bool, xsimd::aligned_allocator<bool, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>>::xarray_container<xt::xfunction<xt::detail::equal_to, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &, const xt::xarray_container<xt::uvector<long long, xsimd::aligned_allocator<long long, 16>>, xt::layout_type::row_major, xt::svector<unsigned long>> &>>' requested here
        xt::xarray<bool> res = xt::equal(x, x);
                               ^
/Users/lukas/dev/xtensor-tests/prefix/include/xsimd/arch/./generic/xsimd_generic_memory.hpp:518:27: note: candidate template ignored: could not match 'xsimd::ssse3' against 'long long'
        XSIMD_INLINE void store(batch_bool<T, A> const& self, bool* mem, requires_arch<generic>) noexcept
                          ^
1 error generated.
ninja: build stopped: subcommand failed.

Ivorforce avatar Nov 24 '24 16:11 Ivorforce

Your issue is that there is an implicit cast from xt::equal(x, x) to xt::xarray<bool> add a cast to do the conversion.

#define XTENSOR_USE_XSIMD
#include <iostream>
#include <set>

#include "xtensor/xarray.hpp"
#include "xtensor/xtensor_forward.hpp"
#include "xtensor/xbuilder.hpp"
#include "xtensor/xio.hpp"

int main()
{
	xt::xarray<float> x = xt::arange(50);
	xt::xarray<bool> res = xt::cast<bool>(xt::equal(x, x));
	std::cout << res << std::endl;
}

spectre-ns avatar Dec 26 '24 01:12 spectre-ns