Skip to content

Commit

Permalink
Vectorize reverse_copy() (microsoft#804)
Browse files Browse the repository at this point in the history
Co-authored-by: Billy Robert O'Neal III <[email protected]>
Co-authored-by: Stephan T. Lavavej <[email protected]>
  • Loading branch information
3 people authored and CaseyCarter committed Jul 28, 2020
1 parent f0ff5b4 commit f8f6dff
Show file tree
Hide file tree
Showing 4 changed files with 309 additions and 109 deletions.
52 changes: 52 additions & 0 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,26 @@ _STL_DISABLE_CLANG_WARNINGS
#pragma push_macro("new")
#undef new

#if _USE_STD_VECTOR_ALGORITHMS
_EXTERN_C
// The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms
// won't be stored beyond the lifetime of the function, and that the function will only reference arrays denoted by
// those pointers. The optimizer also assumes in that case that a pointer parameter is not returned to the caller via
// the return value, so functions using "noalias" must usually return void. This attribute is valuable because these
// functions are in native code objects that the compiler cannot analyze. In the absence of the noalias attribute, the
// compiler has to assume that the denoted arrays are "globally address taken", and that any later calls to
// unanalyzable routines may modify those arrays.
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
const void* _First, const void* _Last, void* _Dest) noexcept;
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
const void* _First, const void* _Last, void* _Dest) noexcept;
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
const void* _First, const void* _Last, void* _Dest) noexcept;
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
const void* _First, const void* _Last, void* _Dest) noexcept;
_END_EXTERN_C
#endif // _USE_STD_VECTOR_ALGORITHMS

_STD_BEGIN
// COMMON SORT PARAMETERS
_INLINE_VAR constexpr int _ISORT_MAX = 32; // maximum size for insertion sort
Expand Down Expand Up @@ -4520,6 +4540,7 @@ namespace ranges {
constexpr bool _Allow_vectorization =
conjunction_v<_Is_trivially_swappable<_Elem>, negation<is_volatile<_Elem>>>;

#pragma warning(suppress : 6326) // Potential comparison of a constant with another constant
if constexpr (_Allow_vectorization && _Nx <= 8 && (_Nx & (_Nx - 1)) == 0) {
if (!_STD is_constant_evaluated()) {
_Elem* const _First_addr = _STD to_address(_First);
Expand Down Expand Up @@ -4584,6 +4605,37 @@ _CONSTEXPR20 _OutIt reverse_copy(_BidIt _First, _BidIt _Last, _OutIt _Dest) {
const auto _UFirst = _Get_unwrapped(_First);
auto _ULast = _Get_unwrapped(_Last);
auto _UDest = _Get_unwrapped_n(_Dest, _Idl_distance<_BidIt>(_UFirst, _ULast));

#if _HAS_IF_CONSTEXPR && _USE_STD_VECTOR_ALGORITHMS
using _Elem = remove_pointer_t<decltype(_UFirst)>;
using _DestElem = remove_pointer_t<decltype(_UDest)>;
constexpr bool _Allow_vectorization = conjunction_v<is_same<remove_const_t<_Elem>, remove_const_t<_DestElem>>,
is_pointer<decltype(_UFirst)>, is_trivially_copyable<_Elem>, negation<is_volatile<_Elem>>>;
constexpr size_t _Nx = sizeof(_Elem);

#pragma warning(suppress : 6326) // Potential comparison of a constant with another constant
if constexpr (_Allow_vectorization && _Nx <= 8 && (_Nx & (_Nx - 1)) == 0) {
#ifdef __cpp_lib_is_constant_evaluated
if (!_STD is_constant_evaluated())
#endif // __cpp_lib_is_constant_evaluated
{
if constexpr (_Nx == 1) {
__std_reverse_copy_trivially_copyable_1(_UFirst, _ULast, _UDest);
} else if constexpr (_Nx == 2) {
__std_reverse_copy_trivially_copyable_2(_UFirst, _ULast, _UDest);
} else if constexpr (_Nx == 4) {
__std_reverse_copy_trivially_copyable_4(_UFirst, _ULast, _UDest);
} else {
__std_reverse_copy_trivially_copyable_8(_UFirst, _ULast, _UDest);
}

_UDest += _ULast - _UFirst;
_Seek_wrapped(_Dest, _UDest);
return _Dest;
}
}
#endif // _HAS_IF_CONSTEXPR && _USE_STD_VECTOR_ALGORITHMS

for (; _UFirst != _ULast; ++_UDest) {
*_UDest = *--_ULast;
}
Expand Down
116 changes: 21 additions & 95 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -5481,116 +5481,42 @@ _NODISCARD _CONSTEXPR20 bool _Check_match_counts(
}

// FUNCTION TEMPLATE reverse
#if _HAS_IF_CONSTEXPR
template <class _BidIt>
_CONSTEXPR20 void reverse(const _BidIt _First, const _BidIt _Last) { // reverse elements in [_First, _Last)
_Adl_verify_range(_First, _Last);
auto _UFirst = _Get_unwrapped(_First);
auto _ULast = _Get_unwrapped(_Last);
#if _USE_STD_VECTOR_ALGORITHMS
#if _HAS_IF_CONSTEXPR && _USE_STD_VECTOR_ALGORITHMS
using _Elem = remove_pointer_t<decltype(_UFirst)>;
constexpr bool _Allow_vectorization =
conjunction_v<is_pointer<decltype(_UFirst)>, _Is_trivially_swappable<_Elem>, negation<is_volatile<_Elem>>>;
constexpr size_t _Nx = sizeof(_Elem);

if constexpr (_Allow_vectorization && sizeof(_Elem) == 1) {
#ifdef __cpp_lib_is_constant_evaluated
if (!_STD is_constant_evaluated())
#endif // __cpp_lib_is_constant_evaluated
{
__std_reverse_trivially_swappable_1(_UFirst, _ULast);
return;
}
} else if constexpr (_Allow_vectorization && sizeof(_Elem) == 2) {
#ifdef __cpp_lib_is_constant_evaluated
if (!_STD is_constant_evaluated())
#endif // __cpp_lib_is_constant_evaluated
{
__std_reverse_trivially_swappable_2(_UFirst, _ULast);
return;
}
} else if constexpr (_Allow_vectorization && sizeof(_Elem) == 4) {
#pragma warning(suppress : 6326) // Potential comparison of a constant with another constant
if constexpr (_Allow_vectorization && _Nx <= 8 && (_Nx & (_Nx - 1)) == 0) {
#ifdef __cpp_lib_is_constant_evaluated
if (!_STD is_constant_evaluated())
#endif // __cpp_lib_is_constant_evaluated
{
__std_reverse_trivially_swappable_4(_UFirst, _ULast);
return;
}
} else if constexpr (_Allow_vectorization && sizeof(_Elem) == 8) {
#ifdef __cpp_lib_is_constant_evaluated
if (!_STD is_constant_evaluated())
#endif // __cpp_lib_is_constant_evaluated
{
__std_reverse_trivially_swappable_8(_UFirst, _ULast);
if constexpr (_Nx == 1) {
__std_reverse_trivially_swappable_1(_UFirst, _ULast);
} else if constexpr (_Nx == 2) {
__std_reverse_trivially_swappable_2(_UFirst, _ULast);
} else if constexpr (_Nx == 4) {
__std_reverse_trivially_swappable_4(_UFirst, _ULast);
} else {
__std_reverse_trivially_swappable_8(_UFirst, _ULast);
}

return;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
#endif // _HAS_IF_CONSTEXPR && _USE_STD_VECTOR_ALGORITHMS

for (; _UFirst != _ULast && _UFirst != --_ULast; ++_UFirst) {
_STD iter_swap(_UFirst, _ULast);
}
}
#else // ^^^ _HAS_IF_CONSTEXPR / !_HAS_IF_CONSTEXPR vvv
template <class _BidIt>
void _Reverse_unchecked1(_BidIt _First, _BidIt _Last, integral_constant<size_t, 0>) {
// reverse elements in [_First, _Last), general bidirectional iterators
for (; _First != _Last && _First != --_Last; ++_First) {
_STD iter_swap(_First, _Last);
}
}

#if _USE_STD_VECTOR_ALGORITHMS
template <class _BidIt>
void _Reverse_unchecked1(const _BidIt _First, const _BidIt _Last, integral_constant<size_t, 1>) {
// reverse elements in [_First, _Last), pointers to trivially swappable of size 1
__std_reverse_trivially_swappable_1(_First, _Last);
}

template <class _BidIt>
void _Reverse_unchecked1(const _BidIt _First, const _BidIt _Last, integral_constant<size_t, 2>) {
// reverse elements in [_First, _Last), pointers to trivially swappable of size 2
__std_reverse_trivially_swappable_2(_First, _Last);
}

template <class _BidIt>
void _Reverse_unchecked1(const _BidIt _First, const _BidIt _Last, integral_constant<size_t, 4>) {
// reverse elements in [_First, _Last), pointers to trivially swappable of size 4
__std_reverse_trivially_swappable_4(_First, _Last);
}

template <class _BidIt>
void _Reverse_unchecked1(const _BidIt _First, const _BidIt _Last, integral_constant<size_t, 8>) {
// reverse elements in [_First, _Last), pointers to trivially swappable of size 8
__std_reverse_trivially_swappable_8(_First, _Last);
}
#endif // _USE_STD_VECTOR_ALGORITHMS

template <class _BidIt>
void _Reverse_unchecked(const _BidIt _First, const _BidIt _Last) {
// reverse elements in [_First, _Last), choose optimization
#if _USE_STD_VECTOR_ALGORITHMS
using _Elem = remove_pointer_t<_BidIt>;
constexpr size_t _Opt =
is_pointer_v<_BidIt> //
&& _Is_trivially_swappable_v<_Elem> //
&& !is_volatile_v<_Elem> //
&& (sizeof(_Elem) == 1 || sizeof(_Elem) == 2 || sizeof(_Elem) == 4 || sizeof(_Elem) == 8)
? sizeof(_Elem)
: 0;
#else // ^^^ vectorize / no vectorize vvv
constexpr size_t _Opt = 0;
#endif // _USE_STD_VECTOR_ALGORITHMS
_Reverse_unchecked1(_First, _Last, integral_constant<size_t, _Opt>{});
}

template <class _BidIt>
void reverse(const _BidIt _First, const _BidIt _Last) {
// reverse elements in [_First, _Last)
_Adl_verify_range(_First, _Last);
_Reverse_unchecked(_Get_unwrapped(_First), _Get_unwrapped(_Last));
}
#endif // _HAS_IF_CONSTEXPR

#if _HAS_CXX17
template <class _ExPo, class _BidIt, _Enable_if_execution_policy_t<_ExPo> = 0>
Expand Down Expand Up @@ -5694,19 +5620,19 @@ _FwdIt _Rotate_unchecked1(_FwdIt _First, _FwdIt _Mid, _FwdIt _Last, forward_iter
template <class _BidIt>
_BidIt _Rotate_unchecked1(_BidIt _First, _BidIt _Mid, _BidIt _Last, bidirectional_iterator_tag) {
// rotate [_First, _Last) left by distance(_First, _Mid) positions, bidirectional iterators
_Reverse_unchecked(_First, _Mid);
_Reverse_unchecked(_Mid, _Last);
_STD reverse(_First, _Mid);
_STD reverse(_Mid, _Last);
auto _Tmp = _Reverse_until_sentinel_unchecked(_First, _Mid, _Last);
_Reverse_unchecked(_Tmp.first, _Tmp.second);
_STD reverse(_Tmp.first, _Tmp.second);
return _Mid != _Tmp.first ? _Tmp.first : _Tmp.second;
}

template <class _RanIt>
_RanIt _Rotate_unchecked1(_RanIt _First, _RanIt _Mid, _RanIt _Last, random_access_iterator_tag) {
// rotate [_First, _Last) left by distance(_First, _Mid) positions, random-access iterators
_Reverse_unchecked(_First, _Mid);
_Reverse_unchecked(_Mid, _Last);
_Reverse_unchecked(_First, _Last);
_STD reverse(_First, _Mid);
_STD reverse(_Mid, _Last);
_STD reverse(_First, _Last);
return _First + (_Last - _Mid);
}

Expand Down
Loading

0 comments on commit f8f6dff

Please sign in to comment.