oneDPL
oneDPL copied to clipboard
Inconsecutive implementation of `__select_backend` - V1
Here is another implementation of https://github.com/oneapi-src/oneDPL/pull/1455
This PR based on prototype from @rarutyun
Source prototype has been placed here:
- https://github.com/oneapi-src/oneDPL/tree/rarutyun/tag_dispatching_prototype
- PR: https://github.com/oneapi-src/oneDPL/pull/164
!!! ATTENTION !!!
in this PR we disable for_loop
staff for hetero policies:
-
for_loop
; -
for_loop_n
; -
for_loop_strided
; -
for_loop_n_strided
.
Tag dispatching mechanism:
- Allows to select parallel pattern to go (with
select_backend
function)- Decision is made once basing on the execution policy and Iterator category
- Provides nested tags for the next level dispatching (parallel backend, vectorization, etc.): For example:
__serial_backend_tag
,__tbb_backend_tag
,__omp_backend_tag
,__device_backend_tag
,__fpga_backend_tag
,is_vector
.
- Patterns are selected based on the tag
- overload with generic tag (customization point)
- overloads for concrete tag types with optimized implementation
- Parallel backend as well as vectorized vs non-vectorized bricks are selected basing on the nested tags.
Overall schema of tag dispatching:
Algorithm level - first level:
- call
const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, ...);
on algorithm level; - pass
__dispatch_tag
into patterns;
Pattern level - second level:
- get
_BackendTag
from the fist__Tag
parameter type; - pass
_BackendTag{}
(instance of the backend tag into backend implementation.
Host backend tags:
struct __serial_backend_tag { }; // For serial backend
struct __tbb_backend_tag { }; // For TBB backend
struct __omp_backend_tag { }; // For OMP backend
---
title: Host backend tags
---
classDiagram
class __serial_backend_tag {
}
class __tbb_backend_tag {
}
class __omp_backend_tag {
}
Hetero backend tags:
struct __device_backend_tag { };
#if _ONEDPL_FPGA_DEVICE
struct __fpga_backend_tag : __device_backend_tag { };
#endif // _ONEDPL_FPGA_DEVICE
---
title: Hetero backend tags
---
classDiagram
__device_backend_tag <|-- __fpga_backend_tag
class __device_backend_tag {
}
class __fpga_backend_tag {
}
Types of dispatching tags (host tags):
template <class _IsVector>
struct __serial_tag
{
using __is_vector = _IsVector;
};
template <class _IsVector>
struct __parallel_tag
{
using __is_vector = _IsVector;
using __backend_tag = __par_backend_tag;
};
struct __parallel_forward_tag
{
using __is_vector = ::std::false_type;
using __backend_tag = __par_backend_tag;
};
Types of dispatching tags (hetero tags):
template <typename _BackendTag>
struct __hetero_tag
{
using __backend_tag = _BackendTag;
};
How we define __par_backend_tag
:
#if _ONEDPL_PAR_BACKEND_TBB
using __par_backend_tag = __tbb_backend_tag;
#elif _ONEDPL_PAR_BACKEND_OPENMP
using __par_backend_tag = __omp_backend_tag;
#elif _ONEDPL_PAR_BACKEND_SERIAL
using __par_backend_tag = __serial_backend_tag;
#else
# error "Parallel backend was not specified"
#endif
Typical changes in the code
Changes in pattern calls:
- before:
template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
{
return oneapi::dpl::__internal::__pattern_any_of(
::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred,
oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec),
oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec));
}
- after:
template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
{
// 1. Build required dispatching tag by call __select_backend function:
// - we should pass into `__select_backend` execution policy and one iterator of each iterator types from params.
const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first);
// 2. Pass dispatching tag into pattern call
// So __is_vectorization_preferred and __is_parallelization_preferred calls aren't required anymore
return oneapi::dpl::__internal::__pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first,
__last, __pred);
}
Functions with enable_if_..._policy<...>
and /*parallel=*/::std::false_type
before:
template <class _ExecutionPolicy, class _ForwardIterator, class _Pred, class _IsVector>
oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool>
__pattern_any_of(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred, _IsVector,
/*parallel=*/::std::false_type) noexcept;
after:
template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Pred>
bool
__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred) noexcept;
- in these functions we able to get
_IsVector
type astypename _Tag::__is_vector
Functions with __enable_if_host_execution_policy_conditional
, __is_random_access_iterator_v
and /*parallel=*/::std::true_type
before:
template <class _ExecutionPolicy, class _RandomAccessIterator, class _Function, class _IsVector>
oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<
_ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>>
__pattern_walk1(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Function, _IsVector, /*parallel=*/::std::true_type);
after:
template <class _ExecutionPolicy, class _RandomAccessIterator, class _Function>
void
__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Function);
Functions with enable_if_..._policy<...>
and /*parallel=*/::std::true_type
before:
template <class _ExecutionPolicy, class _RandomAccessIterator, class _Pred, class _IsVector>
oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool>
__pattern_any_of(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred, _IsVector,
/*parallel=*/::std::true_type);
after:
template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Pred>
bool
__pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred);
in these functions we move class _IsVector
to first template param place.
Functions with __enable_if_device_execution_policy
before:
template <typename _ExecutionPolicy, typename _ForwardIterator, typename _Function,
oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, int> = 0>
auto
__pattern_walk1_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f);
after:
template <typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator, typename _Function>
auto
__pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f);
Changes in the oneDPL
host policy classes
As result of this work now we have more clear host policy classes:
// 2.4, Sequential execution policy
class sequenced_policy
{
};
// 2.5, Parallel execution policy
class parallel_policy
{
};
// 2.6, Parallel+Vector execution policy
class parallel_unsequenced_policy
{
};
class unsequenced_policy
{
};
All functions like __allow_unsequenced
, __allow_vector
and __allow_parallel
has been removed from these classes as not required anymore.
__select_backend()
functions
__select_backend()
functions for host policies and iterators
template <class... _IteratorTypes>
__serial_tag<std::false_type>
__select_backend(oneapi::dpl::execution::sequenced_policy, _IteratorTypes&&...)
{
return {};
}
template <class... _IteratorTypes>
__serial_tag<__internal::__is_random_access_iterator<_IteratorTypes...>>
__select_backend(oneapi::dpl::execution::unsequenced_policy, _IteratorTypes&&...)
{
return {};
}
template <class... _IteratorTypes>
__tag_type<std::false_type, _IteratorTypes...>
__select_backend(oneapi::dpl::execution::parallel_policy, _IteratorTypes&&...)
{
return {};
}
template <class... _IteratorTypes>
__tag_type<__internal::__is_random_access_iterator<_IteratorTypes...>, _IteratorTypes...>
__select_backend(oneapi::dpl::execution::parallel_unsequenced_policy, _IteratorTypes&&...)
{
return {};
}
__select_backend()
functions for hetero policies and iterators
template <class... _IteratorTypes, typename _KernelName>
::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__device_backend_tag>>
__select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&...)
{
return {};
}
#if _ONEDPL_FPGA_DEVICE
template <class... _IteratorTypes, unsigned int _Factor, typename _KernelName>
::std::enable_if_t<__is_random_access_iterator_v<_IteratorTypes...>, __hetero_tag<__fpga_backend_tag>>
__select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorTypes&&...)
{
return {};
}
#endif
__select_backend()
functions for hetero policies and ranges (in the namespace __ranges
)
template <typename _KernelName, typename... _Ranges>
oneapi::dpl::__internal::__hetero_tag<oneapi::dpl::__internal::__device_backend_tag>
__select_backend(const execution::device_policy<_KernelName>&, _Ranges&&...)
{
return {};
}
#if _ONEDPL_FPGA_DEVICE
template <unsigned int _Factor, typename _KernelName, typename... _Ranges>
oneapi::dpl::__internal::__hetero_tag<oneapi::dpl::__internal::__fpga_backend_tag>
__select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _Ranges&&...)
{
return {};
}
#endif