compile-time-regular-expressions icon indicating copy to clipboard operation
compile-time-regular-expressions copied to clipboard

Optimization: Extracting common elements from select<>

Open Andersama opened this issue 4 years ago • 1 comments
trafficstars

In the process of writing this optimization it seems like there may be an opportunity to parallelize select<>'s by processing the first atoms at the same time perhaps with simd. //helpers I threw in first.hpp

template<typename... Content, typename... Tail> constexpr auto first_and_trailing(ctll::list<sequence<Content...>, Tail...>) noexcept {
	if constexpr (sizeof...(Content) || sizeof...(Tail))
		return first_and_trailing(ctll::list<Content..., Tail...>());
	else
		return ctll::list_pop_pair<ctll::list<>, ctll::list<>>();
}

template<typename... Content, typename... Tail> constexpr auto first_and_trailing(ctll::list<ctll::list<Content...>, Tail...>) noexcept {
	if constexpr (sizeof...(Content) || sizeof...(Tail))
		return first_and_trailing(ctll::list<Content..., Tail...>());
	else
		return ctll::list_pop_pair<ctll::list<>, ctll::list<>>();
}

//repeats (we can extract sequences if the repeat is non-optional)
template<size_t A, size_t B, typename... Content, typename... Tail> constexpr auto first_and_trailing(ctll::list<repeat<A,B, Content...>, Tail...>) noexcept {
	if constexpr (A && B) {
		if constexpr (B > 1) {
			return first_and_trailing(ctll::list<Content..., repeat<A - 1, B - 1, Content...>, Tail...>());
		} else {
			return first_and_trailing(ctll::list<Content..., Tail...>());
		}
	} else if constexpr (A) {
		return first_and_trailing(ctll::list<Content..., repeat<A-1, B, Content...>, Tail...>());
	} else { //some form of optional
		return ctll::list_pop_pair<ctll::list<repeat<A, B, Content...>>, ctll::list<Tail...>>();
	}
}

template<size_t A, size_t B, typename... Content, typename... Tail> constexpr auto first_and_trailing(ctll::list<lazy_repeat<A,B, Content...>, Tail...>) noexcept {
	if constexpr (A && B) {
		if constexpr (B > 1) {
			return first_and_trailing(ctll::list<Content..., lazy_repeat<A - 1, B - 1, Content...>, Tail...>());
		} else {
			return first_and_trailing(ctll::list<Content..., Tail...>());
		}
	} else if constexpr (A) {
		return first_and_trailing(ctll::list<Content..., lazy_repeat<A-1, B, Content...>, Tail...>());
	} else { //some form of optional
		return ctll::list_pop_pair<ctll::list<lazy_repeat<A, B, Content...>>, ctll::list<Tail...>>();
	}
}

template<size_t A, size_t B, typename... Content, typename... Tail> constexpr auto first_and_trailing(ctll::list<possessive_repeat<A,B, Content...>, Tail...>) noexcept {
	if constexpr (A && B) {
		if constexpr (B > 1) {
			return first_and_trailing(ctll::list<Content..., possessive_repeat<A - 1, B - 1, Content...>, Tail...>());
		} else {
			return first_and_trailing(ctll::list<Content..., Tail...>());
		}
	} else if constexpr (A) {
		return first_and_trailing(ctll::list<Content..., possessive_repeat<A-1, B, Content...>, Tail...>());
	} else { //some form of optional
		return ctll::list_pop_pair<ctll::list<possessive_repeat<A, B, Content...>>, ctll::list<Tail...>>();
	}
}

template<auto C, auto... String, typename... Tail> constexpr auto first_and_trailing(ctll::list<string<C, String...>, Tail...>) noexcept {
	if constexpr (sizeof...(String)) {
		return ctll::list_pop_pair<ctll::list<character<C>>, ctll::list<string<String...>, Tail...>>();
	} else {
		return ctll::list_pop_pair<ctll::list<character<C>>, ctll::list<Tail...>>();
	}
}

template<typename T, typename... Types>
constexpr bool is_any_of_v = std::disjunction_v<std::is_same<T, Types>...>;

template<typename T>
constexpr bool is_empty_sequence = is_any_of_v<T, sequence<>, ctll::list<>, ctll::_nothing>;

template<typename... Content, typename... Listed>
constexpr auto CTRE_FORCE_INLINE concatenate_into_sequence(sequence<Content...>, ctll::list<Listed...>) noexcept {
	return concatenate_into_sequence(sequence<Content..., Listed...>{});
}

template<typename... Content, typename... Listed>
constexpr auto CTRE_FORCE_INLINE concatenate_into_sequence(sequence<Content...>, sequence<Listed...>) noexcept {
	return concatenate_into_sequence(sequence<Content..., Listed...>{});
}

template<typename... Content, typename... Tail>
constexpr auto CTRE_FORCE_INLINE concatenate_into_sequence(ctll::list<sequence<Content...>, Tail...>) noexcept {
	return concatenate_into_sequence(sequence<Content...>{});
}
template<typename... Content, typename... Tail>
constexpr auto CTRE_FORCE_INLINE concatenate_into_sequence(sequence<sequence<Content...>, Tail...>) noexcept {
	return concatenate_into_sequence(sequence<Content...>{});
}

template<typename... Content>
constexpr auto CTRE_FORCE_INLINE concatenate_into_sequence(ctll::list<Content...>) noexcept {
	return sequence<Content...>{};
}
template<typename... Content>
constexpr auto CTRE_FORCE_INLINE concatenate_into_sequence(sequence<Content...>) noexcept {
	return sequence<Content...>{};
}

//recursive helper to extract as many items into a sequence
template<typename... Lead, typename... Content, typename... Tail> constexpr auto first_and_trailing(ctll::list<sequence<Lead...>, select<Content...>, Tail...>) noexcept {
	if constexpr (sizeof...(Content) > 1) {
		//note: we can perform the "distribute into alternation" optimization here if we append Tail... to head_element
		//this will catch cases where Tail... may contain atoms which may line up in the select<> branch
		//eg: if a branch is "shorter" than the rest it may use part of Tail... to match
		using head_element = decltype(ctll::front(ctll::list<Content...>{}));
		using first_pair = decltype(first_and_trailing(ctll::list<head_element>{}));
		using first_element = decltype(first_pair().front);
		using first_trailing = decltype(first_pair().list);
		//first element must be something and it must be the same for all select branches
		if constexpr ((!is_empty_sequence<decltype(ctll::front(first_element{}))>) &&
			((std::is_same_v<first_element, decltype(calculate_first_and_trailing(Content{}).front)>) && ...)) {
			//if we have trailing elements across all the select branches
			if constexpr (((!is_empty_sequence<decltype(ctll::front(decltype(calculate_first_and_trailing(Content{}).list){}))>) && ...)) {
				return first_and_trailing(ctll::list<decltype(concatenate_into_sequence(sequence<Lead...>, first_element{})),
					select<decltype(concatenate_into_sequence(calculate_first_and_trailing(Content{}).list))...>,
					Tail...
				>{});
				//if we distributed into the alternation then we no longer need Tail...
				/*
				return first_and_trailing(ctll::list<decltype(concatenate_into_sequence(sequence<Lead...>, first_element{})),
					select<decltype(concatenate_into_sequence(calculate_first_and_trailing(Content{}).list))...>
				>{});
				*/

			//if we have don't have trailing elements at all
			} else if constexpr (((is_empty_sequence<decltype(ctll::front(decltype(calculate_first_and_trailing(Content{}).list){}))>) && ...)) {
				return first_and_trailing(ctll::list<decltype(concatenate_into_sequence(sequence<Lead...>, first_element{})), Tail...>{});
				//if we distributed into the alternation then we no longer need Tail...
				/*
				return first_and_trailing(ctll::list<decltype(concatenate_into_sequence(sequence<Lead...>, first_element{}))>{});
				*/
			} else {
				if constexpr (((MatchesCharacter<Content>::template value<decltype(*std::declval<::std::string_view::iterator>())>) && ...)) {
					return first_and_trailing(ctll::list<Lead..., set<Content...>, Tail...>{});
				} else {
					return first_and_trailing(ctll::list<Lead..., select<Content...>, Tail...>{});
				}
			}
		} else {
			if constexpr (((MatchesCharacter<Content>::template value<decltype(*std::declval<::std::string_view::iterator>())>) && ...)) {
				return first_and_trailing(ctll::list<Lead..., set<Content...>, Tail...>{});
			} else {
				return first_and_trailing(ctll::list<Lead..., select<Content...>, Tail...>{});
			}
		}
	} else {
		if constexpr (((MatchesCharacter<Content>::template value<decltype(*std::declval<::std::string_view::iterator>())>) && ...)) {
			return first_and_trailing(ctll::list<Lead..., set<Content...>, Tail...>{});
		} else {
			return first_and_trailing(ctll::list<Lead..., select<Content...>, Tail...>{});
		}
	}
}
//base case select
template<typename... Content, typename... Tail> constexpr auto first_and_trailing(ctll::list<select<Content...>, Tail...>) noexcept {
	if constexpr (sizeof...(Content) > 1) {
		using head_element = decltype(ctll::front(ctll::list<Content...>{}));
		using first_pair = decltype(first_and_trailing(ctll::list<head_element>{}));
		using first_element = decltype(first_pair().front);
		using first_trailing = decltype(first_pair().list);
		if constexpr ((!is_empty_sequence<decltype(ctll::front(first_element{}))>) &&
			((std::is_same_v<first_element, decltype(calculate_first_and_trailing(Content{}).front)>) && ...)) {
			
			if constexpr (((!is_empty_sequence<decltype(ctll::front(decltype(calculate_first_and_trailing(Content{}).list){}))>) && ...)) {
				return first_and_trailing(ctll::list<decltype(concatenate_into_sequence(first_element{})),
					select<decltype(concatenate_into_sequence(calculate_first_and_trailing(Content{}).list))...>,
					Tail...
				>{});
			} else if constexpr (((is_empty_sequence<decltype(ctll::front(decltype(calculate_first_and_trailing(Content{}).list){}))>) && ...)) {
				return first_and_trailing(
					ctll::list<decltype(concatenate_into_sequence(first_element{})), Tail... >{}
				);
			} else {
				if constexpr (((MatchesCharacter<Content>::template value<decltype(*std::declval<::std::string_view::iterator>())>) && ...)) {
					return ctll::list_pop_pair<ctll::list<set<Content...>>, ctll::list<Tail...>>();
				} else {
					return ctll::list_pop_pair<ctll::list<select<Content...>>, ctll::list<Tail...>>();
				}
			}
		} else {
			if constexpr (((MatchesCharacter<Content>::template value<decltype(*std::declval<::std::string_view::iterator>())>) && ...)) {
				return ctll::list_pop_pair<ctll::list<set<Content...>>, ctll::list<Tail...>>();
			} else {
				return ctll::list_pop_pair<ctll::list<select<Content...>>, ctll::list<Tail...>>();
			}
		}
	} else {
		if constexpr (((MatchesCharacter<Content>::template value<decltype(*std::declval<::std::string_view::iterator>())>) && ...)) {
			return ctll::list_pop_pair<ctll::list<set<Content...>>, ctll::list<Tail...>>();
		} else {
			return ctll::list_pop_pair<ctll::list<select<Content...>>, ctll::list<Tail...>>();
		}
	}
}

//general case
template<typename T, typename... Tail> constexpr auto first_and_trailing(ctll::list<T, Tail...>) noexcept {
	return ctll::list_pop_pair<ctll::list<T>, ctll::list<Tail...>>();
}

// user facing interface
template<typename... Content> constexpr auto calculate_first_and_trailing(Content...) noexcept {
	if constexpr (sizeof...(Content) > 0ULL)
		return first_and_trailing(ctll::list<Content...>());
	else
		return ctll::list_pop_pair<ctll::list<>, ctll::list<>>();
}

//replacement for select<> in evaluation.hpp

template <typename R, typename Iterator, typename EndIterator, typename HeadOptions, typename... Content, typename... TailOptions, typename... Tail>
constexpr CTRE_FORCE_INLINE R evaluate(const Iterator begin, Iterator current, const EndIterator end, const flags& f, R captures, ctll::list<select<HeadOptions, TailOptions...>, Tail...>) noexcept {
	using first_pair = decltype(calculate_first_and_trailing(HeadOptions{}));
	using first_element = decltype(first_pair().front);
	using first_trailing = decltype(first_pair().list);
	if constexpr (sizeof...(TailOptions) &&
		//first common element must be something
		(!is_empty_sequence<decltype(ctll::front(first_element{}))>) &&
		//first element is shared
		((std::is_same_v<first_element, decltype(calculate_first_and_trailing(TailOptions{}).front)>) && ...)
		) {
		constexpr bool has_trailing_elements = 
			(!is_empty_sequence<decltype(ctll::front(first_trailing{}))>) &&
			((!is_empty_sequence<decltype(ctll::front(decltype(calculate_first_and_trailing(TailOptions{}).list){}))>) && ...);
		constexpr bool has_no_trailing_elements =
			(is_empty_sequence<decltype(ctll::front(first_trailing{}))>) &&
			((is_empty_sequence<decltype(ctll::front(decltype(calculate_first_and_trailing(TailOptions{}).list){}))>) && ...);
		if constexpr (has_trailing_elements) {
			return evaluate(begin, current, end, f, captures, ctll::list<
				decltype(concatenate_into_sequence(first_element{})),
				select<
					decltype(concatenate_into_sequence(first_trailing{})),
					decltype(concatenate_into_sequence(calculate_first_and_trailing(TailOptions{}).list))...
				>,
				Tail...
			>{});
		} else if constexpr (has_no_trailing_elements) {
			return evaluate(begin, current, end, f, captures, ctll::list<
				decltype(concatenate_into_sequence(first_element{})), Tail...
			>{});
		}
	}
	//if possible treat select as character class
	if constexpr (MatchesCharacter<HeadOptions>::template value<decltype(*std::declval<Iterator>())> &&
		((MatchesCharacter<TailOptions>::template value<decltype(*std::declval<Iterator>())>) && ...)) {
		return evaluate(begin, current, end, f, captures, ctll::list<set<HeadOptions, TailOptions...>, Tail...>());
	} else { //otherwise
		if (auto r = evaluate(begin, current, end, f, captures, ctll::list<HeadOptions, Tail...>{})) {
			return r;
		} else {
			return evaluate(begin, current, end, f, captures, ctll::list<select<TailOptions...>, Tail...>{});
		}
	}
}

Andersama avatar Dec 10 '20 08:12 Andersama

This isn't necessarily a complete version of this optimization, it will trip up on some cases, this is just a general sketch I have working so far.

Andersama avatar Dec 10 '20 08:12 Andersama