Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vector_algorithms.cpp: Add vzeroupper, so that it is there even in /Od #3630

Merged
merged 2 commits into from
Apr 7, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,19 @@ namespace {
void _Advance_bytes(const void*& _Target, ptrdiff_t _Offset) noexcept {
_Target = static_cast<const unsigned char*>(_Target) + _Offset;
}

// TRANSITION, DevCom-10331414
struct [[nodiscard]] _Zeroupper_on_exit {
_Zeroupper_on_exit() = default;

_Zeroupper_on_exit(const _Zeroupper_on_exit&) = delete;
_Zeroupper_on_exit& operator=(const _Zeroupper_on_exit&) = delete;

~_Zeroupper_on_exit() {
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
_mm256_zeroupper();
}
};

} // unnamed namespace

extern "C" {
Expand All @@ -89,6 +102,8 @@ __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias(
_Advance_bytes(_First1, 32);
_Advance_bytes(_First2, 32);
} while (_First1 != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

constexpr size_t _Mask_16 = ~((static_cast<size_t>(1) << 4) - 1);
Expand Down Expand Up @@ -174,6 +189,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
_mm256_storeu_si256(static_cast<__m256i*>(_Last), _Left_reversed);
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 32 && _Use_sse42()) {
Expand Down Expand Up @@ -214,6 +231,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
_mm256_storeu_si256(static_cast<__m256i*>(_Last), _Left_reversed);
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 32 && _Use_sse42()) {
Expand Down Expand Up @@ -250,6 +269,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
_mm256_storeu_si256(static_cast<__m256i*>(_Last), _Left_reversed);
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 32 && _Use_sse2()) {
Expand Down Expand Up @@ -284,6 +305,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs
_mm256_storeu_si256(static_cast<__m256i*>(_Last), _Left_reversed);
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 32 && _Use_sse2()) {
Expand Down Expand Up @@ -320,6 +343,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
_mm256_storeu_si256(static_cast<__m256i*>(_Dest), _Block_reversed);
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) {
Expand Down Expand Up @@ -355,6 +380,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
_mm256_storeu_si256(static_cast<__m256i*>(_Dest), _Block_reversed);
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) {
Expand Down Expand Up @@ -387,6 +414,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
_mm256_storeu_si256(static_cast<__m256i*>(_Dest), _Block_reversed);
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 16 && _Use_sse2()) {
Expand Down Expand Up @@ -417,6 +446,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
_mm256_storeu_si256(static_cast<__m256i*>(_Dest), _Block_reversed);
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 16 && _Use_sse2()) {
Expand Down Expand Up @@ -1200,6 +1231,8 @@ namespace {
template <class _Traits, class _Ty>
const void* __stdcall __std_find_trivial_unsized(const void* _First, const _Ty _Val) noexcept {
if (_Use_avx2()) {
_Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414

// We read by vector-sized pieces, and we align pointers to vector-sized boundary.
// From start partial piece we mask out matches that don't belong to the range.
// This makes sure we never cross page boundary, thus we read 'as if' sequentially.
Expand Down Expand Up @@ -1282,6 +1315,8 @@ namespace {

const size_t _Avx_size = _Size_bytes & ~size_t{0x1F};
if (_Avx_size != 0 && _Use_avx2()) {
_Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414

const __m256i _Comparand = _Traits::_Set_avx(_Val);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Avx_size);
Expand Down Expand Up @@ -1341,6 +1376,8 @@ namespace {
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);
_Size_bytes &= 0x1F;

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

const size_t _Sse_size = _Size_bytes & ~size_t{0xF};
Expand Down