From cf8979b0e09bec41c9659d3e25353198f78d02a3 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 16 Mar 2024 16:57:56 +0200 Subject: [PATCH 01/13] Remove attempt on unsized find vectorization... ... except for 1-byte element, where we can still yield to `memchr` drive-by: signed coverage doesn't make difference here --- benchmarks/src/find_and_count.cpp | 19 ------- stl/inc/xutility | 80 +++++++++------------------ stl/src/vector_algorithms.cpp | 89 +++++-------------------------- 3 files changed, 37 insertions(+), 151 deletions(-) diff --git a/benchmarks/src/find_and_count.cpp b/benchmarks/src/find_and_count.cpp index 0b7561d4a9..7b205aee6a 100644 --- a/benchmarks/src/find_and_count.cpp +++ b/benchmarks/src/find_and_count.cpp @@ -42,31 +42,12 @@ BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); - -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); - -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); - -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); - BENCHMARK_MAIN(); diff --git a/stl/inc/xutility b/stl/inc/xutility index c946f9379d..8dde034ab1 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -90,11 +90,6 @@ const void* __stdcall __std_find_trivial_2(const void* _First, const void* _Last const void* __stdcall __std_find_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; -const void* __stdcall __std_find_trivial_unsized_1(const void* _First, uint8_t _Val) noexcept; -const void* __stdcall __std_find_trivial_unsized_2(const void* _First, uint16_t _Val) noexcept; -const void* __stdcall __std_find_trivial_unsized_4(const void* _First, uint32_t _Val) noexcept; -const void* __stdcall __std_find_trivial_unsized_8(const void* _First, uint64_t _Val) noexcept; - const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; @@ -170,27 +165,6 @@ _Ty* __std_find_trivial(_Ty* const _First, _Ty* const _Last, const _TVal _Val) n } } -template -_Ty* __std_find_trivial_unsized(_Ty* const _First, const _TVal _Val) noexcept { - if constexpr (is_pointer_v<_TVal> || is_null_pointer_v<_TVal>) { - return _STD __std_find_trivial_unsized(_First, reinterpret_cast(_Val)); - } else if constexpr (sizeof(_Ty) == 1) { - return const_cast<_Ty*>( - static_cast(::__std_find_trivial_unsized_1(_First, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 2) { - return const_cast<_Ty*>( - static_cast(::__std_find_trivial_unsized_2(_First, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 4) { - return const_cast<_Ty*>( - static_cast(::__std_find_trivial_unsized_4(_First, static_cast(_Val)))); - } else if constexpr (sizeof(_Ty) == 8) { - return const_cast<_Ty*>( - static_cast(::__std_find_trivial_unsized_8(_First, static_cast(_Val)))); - } else { - static_assert(_Always_false<_Ty>, "Unexpected size"); - } -} - template _Ty* __std_min_element(_Ty* const _First, _Ty* const _Last) noexcept { constexpr bool _Signed = is_signed_v<_Ty>; @@ -5976,7 +5950,14 @@ namespace ranges { requires indirect_binary_predicate, const _Ty*> _NODISCARD constexpr _It _Find_unchecked(_It _First, const _Se _Last, const _Ty& _Val, _Pj _Proj = {}) { constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>; - if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> && _Sized_or_unreachable_sentinel_for<_Se, _It> + + // TRANSITION, DevCom - 1614562 : not trying wmemchr + // Only byte sized elements are suitable for unsized optimization + constexpr bool _Is_memchr_sized_elements = sizeof(_Iter_value_t<_It>) == 1; + + if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> + && (_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Is_memchr_sized_elements) + || same_as<_Se, unreachable_sentinel_t> && _Is_memchr_sized_elements) && same_as<_Pj, identity>) { if (!_STD is_constant_evaluated()) { if (!_STD _Could_compare_equal_to_value_type<_It>(_Val)) { @@ -5989,48 +5970,37 @@ namespace ranges { } using _Ptr_t = remove_reference_t<_Iter_ref_t<_It>>*; -#if _USE_STD_VECTOR_ALGORITHMS + const auto _First_ptr = _STD _To_address(_First); _Ptr_t _Result; - +#if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_sized) { const auto _Last_ptr = _First_ptr + (_Last - _First); + _Result = _STD __std_find_trivial(_First_ptr, _Last_ptr, _Val); + } else { + _STL_INTERNAL_STATIC_ASSERT(_Is_memchr_sized_elements); + + _Result = static_cast<_Ptr_t>(_CSTD memchr(_First_ptr, static_cast(_Val), SIZE_MAX)); + } +#else // ^^^ _USE_STD_VECTOR_ALGORITHMS / !_USE_STD_VECTOR_ALGORITHMS vvv + _STL_INTERNAL_STATIC_ASSERT(_Is_memchr_sized_elements); - _Result = _STD __std_find_trivial(_First_ptr, _Last_ptr, _Val); + size_t _Count; + if constexpr (_Is_sized) { + _Count = static_cast(_Last - _First); } else { - _Result = _STD __std_find_trivial_unsized(_First_ptr, _Val); + _Count = SIZE_MAX; } + _Result = static_cast<_Ptr_t>(_CSTD memchr(_First_ptr, static_cast(_Val), _Count)); +#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ + if constexpr (is_pointer_v<_It>) { return _Result; } else { return _RANGES next(_STD move(_First), _Result - _First_ptr); } -#else // ^^^ _USE_STD_VECTOR_ALGORITHMS / !_USE_STD_VECTOR_ALGORITHMS vvv - if constexpr (sizeof(_Iter_value_t<_It>) == 1) { - size_t _Count; - if constexpr (_Is_sized) { - _Count = static_cast(_Last - _First); - } else { - _Count = SIZE_MAX; - } - - const auto _First_ptr = _STD to_address(_First); - const auto _Result = - static_cast<_Ptr_t>(_CSTD memchr(_First_ptr, static_cast(_Val), _Count)); - if (_Result) { - if constexpr (is_pointer_v<_It>) { - return _Result; - } else { - return _RANGES next(_STD move(_First), _Result - _First_ptr); - } - } else { - return _RANGES next(_STD move(_First), _Last); - } - } - // TRANSITION, DevCom-1614562: not trying wmemchr -#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ } } diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index bf9de5a308..01ea49c2ca 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1837,79 +1837,9 @@ namespace { #endif // !_M_ARM64EC }; - // The below functions have exactly the same signature as the extern "C" functions, up to calling convention. - // This makes sure the template specialization is fused with the extern "C" function. - // In optimized builds it avoids an extra call, as this function is too large to inline. - - template - const void* __stdcall __std_find_trivial_unsized_impl(const void* _First, const _Ty _Val) noexcept { -#ifndef _M_ARM64EC - if ((reinterpret_cast(_First) & (sizeof(_Ty) - 1)) != 0) { - // _First isn't aligned to sizeof(_Ty), so we need to use the scalar fallback below. - // This can happen with 8-byte elements on x86's 4-aligned stack. It can also happen with packed structs. - } else if (_Use_avx2()) { - _Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414 - - // We read by vector-sized pieces, and we align pointers to vector-sized boundary. - // From start partial piece we mask out matches that don't belong to the range. - // This makes sure we never cross page boundary, thus we read 'as if' sequentially. - constexpr size_t _Vector_pad_mask = 0x1F; - constexpr unsigned int _Full_mask = 0xFFFF'FFFF; - - const __m256i _Comparand = _Traits::_Set_avx(_Val); - const intptr_t _Pad_start = reinterpret_cast(_First) & _Vector_pad_mask; - const unsigned int _Mask = _Full_mask << _Pad_start; - _Advance_bytes(_First, -_Pad_start); - - __m256i _Data = _mm256_load_si256(static_cast(_First)); - unsigned int _Bingo = static_cast(_mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand))); - - _Bingo &= _Mask; - - for (;;) { - if (_Bingo != 0) { - unsigned long _Offset = _tzcnt_u32(_Bingo); - _Advance_bytes(_First, _Offset); - return _First; - } - - _Advance_bytes(_First, 32); - - _Data = _mm256_load_si256(static_cast(_First)); - _Bingo = static_cast(_mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand))); - } - } else if (_Traits::_Sse_available()) { - // We read by vector-sized pieces, and we align pointers to vector-sized boundary. - // From start partial piece we mask out matches that don't belong to the range. - // This makes sure we never cross page boundary, thus we read 'as if' sequentially. - constexpr size_t _Vector_pad_mask = 0xF; - constexpr unsigned int _Full_mask = 0xFFFF; - - const __m128i _Comparand = _Traits::_Set_sse(_Val); - const intptr_t _Pad_start = reinterpret_cast(_First) & _Vector_pad_mask; - const unsigned int _Mask = _Full_mask << _Pad_start; - _Advance_bytes(_First, -_Pad_start); - - __m128i _Data = _mm_load_si128(static_cast(_First)); - unsigned int _Bingo = static_cast(_mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand))); - - _Bingo &= _Mask; - - for (;;) { - if (_Bingo != 0) { - unsigned long _Offset; - _BitScanForward(&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable] - _Advance_bytes(_First, _Offset); - return _First; - } - - _Advance_bytes(_First, 16); - - _Data = _mm_load_si128(static_cast(_First)); - _Bingo = static_cast(_mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand))); - } - } -#endif // !_M_ARM64EC + // TRANSITION, ABI: used only in functions preserved for binary compatibility + template + const void* __std_find_trivial_unsized_impl(const void* _First, const _Ty _Val) noexcept { auto _Ptr = static_cast(_First); while (*_Ptr != _Val) { ++_Ptr; @@ -2079,20 +2009,25 @@ namespace { extern "C" { +// TRANSITION, ABI: preserved for binary compatibility const void* __stdcall __std_find_trivial_unsized_1(const void* const _First, const uint8_t _Val) noexcept { - return __std_find_trivial_unsized_impl<_Find_traits_1>(_First, _Val); + return memchr(_First, _Val, SIZE_MAX); } +// TRANSITION, ABI: preserved for binary compatibility const void* __stdcall __std_find_trivial_unsized_2(const void* const _First, const uint16_t _Val) noexcept { - return __std_find_trivial_unsized_impl<_Find_traits_2>(_First, _Val); + // TRANSITION, DevCom-1614562: not trying wmemchr + return __std_find_trivial_unsized_impl(_First, _Val); } +// TRANSITION, ABI: preserved for binary compatibility const void* __stdcall __std_find_trivial_unsized_4(const void* const _First, const uint32_t _Val) noexcept { - return __std_find_trivial_unsized_impl<_Find_traits_4>(_First, _Val); + return __std_find_trivial_unsized_impl(_First, _Val); } +// TRANSITION, ABI: preserved for binary compatibility const void* __stdcall __std_find_trivial_unsized_8(const void* const _First, const uint64_t _Val) noexcept { - return __std_find_trivial_unsized_impl<_Find_traits_8>(_First, _Val); + return __std_find_trivial_unsized_impl(_First, _Val); } const void* __stdcall __std_find_trivial_1( From 3fe60e17c5f5de51419d7307297075f4c4892602 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 16 Mar 2024 17:08:52 +0200 Subject: [PATCH 02/13] restore the comment, it is still valid for the rest --- stl/src/vector_algorithms.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 01ea49c2ca..3af4d32a42 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1847,6 +1847,10 @@ namespace { return _Ptr; } + // The below functions have exactly the same signature as the extern "C" functions, up to calling convention. + // This makes sure the template specialization can be fused with the extern "C" function. + // In optimized builds it avoids an extra call, as these functions are too large to inline. + template const void* __stdcall __std_find_trivial_impl(const void* _First, const void* _Last, _Ty _Val) noexcept { #ifndef _M_ARM64EC From bc73a3dab9f7379f96c3edfc1683e6d126cd7377 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 16 Mar 2024 17:29:07 +0200 Subject: [PATCH 03/13] more parentheses for clang --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 8dde034ab1..01dd19b436 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5956,7 +5956,7 @@ namespace ranges { constexpr bool _Is_memchr_sized_elements = sizeof(_Iter_value_t<_It>) == 1; if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> - && (_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Is_memchr_sized_elements) + && ((_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Is_memchr_sized_elements)) || same_as<_Se, unreachable_sentinel_t> && _Is_memchr_sized_elements) && same_as<_Pj, identity>) { if (!_STD is_constant_evaluated()) { From 906025ee90f737e486ed9f018130da733a176a15 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 16 Mar 2024 17:58:56 +0200 Subject: [PATCH 04/13] even more parentheses for clang --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 01dd19b436..5672444ab2 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5957,7 +5957,7 @@ namespace ranges { if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> && ((_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Is_memchr_sized_elements)) - || same_as<_Se, unreachable_sentinel_t> && _Is_memchr_sized_elements) + ||( same_as<_Se, unreachable_sentinel_t> && _Is_memchr_sized_elements)) && same_as<_Pj, identity>) { if (!_STD is_constant_evaluated()) { if (!_STD _Could_compare_equal_to_value_type<_It>(_Val)) { From 7b6f1031f46b28bbb85e6c9a523042023dc186fb Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 16 Mar 2024 18:07:05 +0200 Subject: [PATCH 05/13] clang format --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 5672444ab2..4abfda3679 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5957,7 +5957,7 @@ namespace ranges { if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> && ((_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Is_memchr_sized_elements)) - ||( same_as<_Se, unreachable_sentinel_t> && _Is_memchr_sized_elements)) + || (same_as<_Se, unreachable_sentinel_t> && _Is_memchr_sized_elements)) && same_as<_Pj, identity>) { if (!_STD is_constant_evaluated()) { if (!_STD _Could_compare_equal_to_value_type<_It>(_Val)) { From 3de165b415bda2dbbe0ecd094230abf1493ae15c Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 16 Mar 2024 19:08:39 +0200 Subject: [PATCH 06/13] reduce newlines to make the PR more red --- stl/inc/xutility | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 4abfda3679..d4f0b34bbd 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5949,11 +5949,10 @@ namespace ranges { template _Se, class _Ty, class _Pj = identity> requires indirect_binary_predicate, const _Ty*> _NODISCARD constexpr _It _Find_unchecked(_It _First, const _Se _Last, const _Ty& _Val, _Pj _Proj = {}) { - constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>; - // TRANSITION, DevCom - 1614562 : not trying wmemchr // Only byte sized elements are suitable for unsized optimization constexpr bool _Is_memchr_sized_elements = sizeof(_Iter_value_t<_It>) == 1; + constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>; if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> && ((_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Is_memchr_sized_elements)) @@ -5969,8 +5968,7 @@ namespace ranges { } } - using _Ptr_t = remove_reference_t<_Iter_ref_t<_It>>*; - + using _Ptr_t = remove_reference_t<_Iter_ref_t<_It>>*; const auto _First_ptr = _STD _To_address(_First); _Ptr_t _Result; From bebcbee6631d5704ff4e018f18b62def3fe56dd3 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 16 Mar 2024 19:10:08 +0200 Subject: [PATCH 07/13] Aha, it is now `const` --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 3af4d32a42..d1e2b654e4 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1839,7 +1839,7 @@ namespace { // TRANSITION, ABI: used only in functions preserved for binary compatibility template - const void* __std_find_trivial_unsized_impl(const void* _First, const _Ty _Val) noexcept { + const void* __std_find_trivial_unsized_impl(const void* const _First, const _Ty _Val) noexcept { auto _Ptr = static_cast(_First); while (*_Ptr != _Val) { ++_Ptr; From 257f6852d661f4ed3df1497bbc6183f1efb678a4 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 18 Mar 2024 11:36:12 -0700 Subject: [PATCH 08/13] Remove spaces in bug citation comment. --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index d4f0b34bbd..32d64e1f2a 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5949,7 +5949,7 @@ namespace ranges { template _Se, class _Ty, class _Pj = identity> requires indirect_binary_predicate, const _Ty*> _NODISCARD constexpr _It _Find_unchecked(_It _First, const _Se _Last, const _Ty& _Val, _Pj _Proj = {}) { - // TRANSITION, DevCom - 1614562 : not trying wmemchr + // TRANSITION, DevCom-1614562: not trying wmemchr // Only byte sized elements are suitable for unsized optimization constexpr bool _Is_memchr_sized_elements = sizeof(_Iter_value_t<_It>) == 1; constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>; From d4af0d7c69a197ec84e05f6d603d9f12179a0336 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 18 Mar 2024 12:12:22 -0700 Subject: [PATCH 09/13] `_Is_memchr_sized_elements` => `_Single_byte_elements`, also rephrase comment --- stl/inc/xutility | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 32d64e1f2a..353639fbd3 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5950,13 +5950,13 @@ namespace ranges { requires indirect_binary_predicate, const _Ty*> _NODISCARD constexpr _It _Find_unchecked(_It _First, const _Se _Last, const _Ty& _Val, _Pj _Proj = {}) { // TRANSITION, DevCom-1614562: not trying wmemchr - // Only byte sized elements are suitable for unsized optimization - constexpr bool _Is_memchr_sized_elements = sizeof(_Iter_value_t<_It>) == 1; - constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>; + // Only one-byte elements are suitable for unsized optimization + constexpr bool _Single_byte_elements = sizeof(_Iter_value_t<_It>) == 1; + constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>; if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> - && ((_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Is_memchr_sized_elements)) - || (same_as<_Se, unreachable_sentinel_t> && _Is_memchr_sized_elements)) + && ((_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Single_byte_elements)) + || (same_as<_Se, unreachable_sentinel_t> && _Single_byte_elements)) && same_as<_Pj, identity>) { if (!_STD is_constant_evaluated()) { if (!_STD _Could_compare_equal_to_value_type<_It>(_Val)) { @@ -5977,12 +5977,12 @@ namespace ranges { const auto _Last_ptr = _First_ptr + (_Last - _First); _Result = _STD __std_find_trivial(_First_ptr, _Last_ptr, _Val); } else { - _STL_INTERNAL_STATIC_ASSERT(_Is_memchr_sized_elements); + _STL_INTERNAL_STATIC_ASSERT(_Single_byte_elements); _Result = static_cast<_Ptr_t>(_CSTD memchr(_First_ptr, static_cast(_Val), SIZE_MAX)); } #else // ^^^ _USE_STD_VECTOR_ALGORITHMS / !_USE_STD_VECTOR_ALGORITHMS vvv - _STL_INTERNAL_STATIC_ASSERT(_Is_memchr_sized_elements); + _STL_INTERNAL_STATIC_ASSERT(_Single_byte_elements); size_t _Count; if constexpr (_Is_sized) { From df909551dfe6b1eb274b7919814a1e220412049d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 18 Mar 2024 12:20:51 -0700 Subject: [PATCH 10/13] Simplify with a conditional expression. --- stl/inc/xutility | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 353639fbd3..c182446cc1 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5955,8 +5955,8 @@ namespace ranges { constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>; if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> - && ((_Is_sized && (_USE_STD_VECTOR_ALGORITHMS || _Single_byte_elements)) - || (same_as<_Se, unreachable_sentinel_t> && _Single_byte_elements)) + && (_Single_byte_elements ? _Is_sized || same_as<_Se, unreachable_sentinel_t> + : _Is_sized && _USE_STD_VECTOR_ALGORITHMS) && same_as<_Pj, identity>) { if (!_STD is_constant_evaluated()) { if (!_STD _Could_compare_equal_to_value_type<_It>(_Val)) { From c33cf5e9826429edc49afcb09d71000f2d835368 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 18 Mar 2024 12:51:07 -0700 Subject: [PATCH 11/13] Refactor to avoid two `memchr()` paths. --- stl/inc/xutility | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index c182446cc1..ed91b77f88 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5973,27 +5973,24 @@ namespace ranges { _Ptr_t _Result; #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Is_sized) { + if constexpr (!_Single_byte_elements) { + _STL_INTERNAL_STATIC_ASSERT(_Is_sized); const auto _Last_ptr = _First_ptr + (_Last - _First); _Result = _STD __std_find_trivial(_First_ptr, _Last_ptr, _Val); - } else { + } else +#endif // ^^^ _USE_STD_VECTOR_ALGORITHMS ^^^ + { _STL_INTERNAL_STATIC_ASSERT(_Single_byte_elements); + size_t _Count; + if constexpr (_Is_sized) { + _Count = static_cast(_Last - _First); + } else { + _Count = SIZE_MAX; + } - _Result = static_cast<_Ptr_t>(_CSTD memchr(_First_ptr, static_cast(_Val), SIZE_MAX)); - } -#else // ^^^ _USE_STD_VECTOR_ALGORITHMS / !_USE_STD_VECTOR_ALGORITHMS vvv - _STL_INTERNAL_STATIC_ASSERT(_Single_byte_elements); - - size_t _Count; - if constexpr (_Is_sized) { - _Count = static_cast(_Last - _First); - } else { - _Count = SIZE_MAX; + _Result = static_cast<_Ptr_t>(_CSTD memchr(_First_ptr, static_cast(_Val), _Count)); } - _Result = static_cast<_Ptr_t>(_CSTD memchr(_First_ptr, static_cast(_Val), _Count)); -#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ - if constexpr (is_pointer_v<_It>) { return _Result; } else { From f04f0614361001ac10a95fc055964564df3ea37e Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 18 Mar 2024 14:37:35 -0700 Subject: [PATCH 12/13] Restore logic for when `memchr` returns null. --- stl/inc/xutility | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/stl/inc/xutility b/stl/inc/xutility index ed91b77f88..53719feb08 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5989,6 +5989,12 @@ namespace ranges { } _Result = static_cast<_Ptr_t>(_CSTD memchr(_First_ptr, static_cast(_Val), _Count)); + + if constexpr (_Is_sized) { + if (_Result == nullptr) { + return _RANGES next(_STD move(_First), _Last); + } + } } if constexpr (is_pointer_v<_It>) { From bd6a5f0a39d58c3a49b543498e19475a28ca7b0b Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 18 Mar 2024 23:33:14 -0700 Subject: [PATCH 13/13] Improve perf for sized single-byte, adjust comment. --- stl/inc/xutility | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 53719feb08..260c44f211 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5950,7 +5950,7 @@ namespace ranges { requires indirect_binary_predicate, const _Ty*> _NODISCARD constexpr _It _Find_unchecked(_It _First, const _Se _Last, const _Ty& _Val, _Pj _Proj = {}) { // TRANSITION, DevCom-1614562: not trying wmemchr - // Only one-byte elements are suitable for unsized optimization + // Only single-byte elements are suitable for unsized optimization constexpr bool _Single_byte_elements = sizeof(_Iter_value_t<_It>) == 1; constexpr bool _Is_sized = sized_sentinel_for<_Se, _It>; @@ -5973,8 +5973,8 @@ namespace ranges { _Ptr_t _Result; #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (!_Single_byte_elements) { - _STL_INTERNAL_STATIC_ASSERT(_Is_sized); + if constexpr (_Is_sized) { + // When _Is_sized && _Single_byte_elements, prefer this over memchr() for performance const auto _Last_ptr = _First_ptr + (_Last - _First); _Result = _STD __std_find_trivial(_First_ptr, _Last_ptr, _Val); } else