diff --git a/stl/inc/atomic b/stl/inc/atomic index 633280f5d5..40a8edc9e9 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -20,6 +20,7 @@ #if _HAS_CXX20 #include #endif // _HAS_CXX20 +#include #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) @@ -48,32 +49,31 @@ _STL_DISABLE_CLANG_WARNINGS #endif // _DEBUG #endif // _INVALID_MEMORY_ORDER -#if 0 // TRANSITION, ABI // MACRO _STD_COMPARE_EXCHANGE_128 -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B || defined(_M_ARM64) +#ifdef _WIN64 +#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 #define _STD_COMPARE_EXCHANGE_128 _InterlockedCompareExchange128 -#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B || defined(_M_ARM64) -#if defined(_M_X64) && !_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B +#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv // 16-byte atomics are separately compiled for x64, as not all x64 hardware has the cmpxchg16b // instruction; in the event this instruction is not available, the fallback is a global -// CRITICAL_SECTION shared by all 16-byte atomics. +// synchronization object shared by all 16-byte atomics. // (Note: machines without this instruction typically have 2 cores or fewer, so this isn't too bad) // All pointer parameters must be 16-byte aligned. -_NODISCARD extern "C" unsigned char __cdecl __std_atomic_compare_exchange_128( +extern "C" _NODISCARD unsigned char __stdcall __std_atomic_compare_exchange_128( _Inout_bytecount_(16) long long* _Destination, _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, _Inout_bytecount_(16) long long* _ComparandResult) noexcept; -_NODISCARD extern "C" bool __cdecl __std_atomic_has_cmpxchg16b() noexcept; +extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; #define _STD_COMPARE_EXCHANGE_128 __std_atomic_compare_exchange_128 -#endif // defined(_M_X64) && !_STD_ATOMIC_ALWAYS_USE_CMPXCHG16B +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 +#endif // _WIN64 // MACRO _ATOMIC_HAS_DCAS // Controls whether atomic::is_always_lock_free triggers for sizeof(void *) or 2 * sizeof(void *) -#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B || !defined(_M_X64) +#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 || !defined(_M_X64) #define _ATOMIC_HAS_DCAS 1 -#else // ^^ We always have DCAS / We only sometimes have DCAS vvv +#else // ^^^ We always have DCAS / We only sometimes have DCAS vvv #define _ATOMIC_HAS_DCAS 0 -#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B || !defined(_M_X64) -#endif // TRANSITION, ABI +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 || !defined(_M_X64) // MACRO _ATOMIC_CHOOSE_INTRINSIC #if defined(_M_IX86) || defined(_M_X64) @@ -118,6 +118,9 @@ _NODISCARD extern "C" bool __cdecl __std_atomic_has_cmpxchg16b() noexcept; #define ATOMIC_LLONG_LOCK_FREE 2 #define ATOMIC_POINTER_LOCK_FREE 2 +_EXTERN_C +_Smtx_t* __stdcall __std_atomic_get_mutex(const void* _Key) noexcept; +_END_EXTERN_C // Padding bits should not participate in cmpxchg comparison starting in C++20. // Clang does not have __builtin_zero_non_value_bits to exclude these bits to implement this C++20 feature. // The EDG front-end substitutes everything and runs into incomplete types passed to atomic. @@ -341,6 +344,12 @@ struct _Atomic_storage_traits { // properties for how _Ty is stored in an atomic static constexpr bool _Uses_padding = _Padding_size != 0; }; +template +struct _Atomic_storage_traits<_Ty&> { // properties for how _Ty is stored in an atomic_ref + static constexpr size_t _Storage_size = sizeof(_Ty); + static constexpr bool _Uses_padding = false; +}; + // STRUCT TEMPLATE _Atomic_padded template ::_Uses_padding> struct _Atomic_padded { // aggregate to allow explicit constexpr zeroing of padding @@ -352,11 +361,29 @@ template struct _Atomic_padded<_Ty, false> { alignas(sizeof(_Ty)) mutable _Ty _Value; // align to sizeof(T); x86 stack aligns 8-byte objects on 4-byte boundaries }; + +template +struct _Atomic_padded<_Ty&, false> { + _Ty& _Value; +}; + #endif // TRANSITION, ABI +template +struct _Atomic_storage_types { + using _TStorage = _Atomic_padded<_Ty>; + using _Spinlock = long; +}; + +template +struct _Atomic_storage_types<_Ty&> { + using _TStorage = _Ty&; + using _Spinlock = _Smtx_t*; // POINTER TO mutex +}; + // STRUCT TEMPLATE _Atomic_storage #if 1 // TRANSITION, ABI -template +template )> #else // ^^^ don't break ABI / break ABI vvv template ::_Storage_size> #endif // TRANSITION, ABI @@ -371,8 +398,9 @@ void _Atomic_wait_direct( const _Value_type _Observed_bytes = _Atomic_reinterpret_as<_Value_type>(_This->load(_Order)); if (_Expected_bytes != _Observed_bytes) { #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + using _TVal = remove_reference_t<_Ty>; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; const _Value_type _Mask_val = _Atomic_reinterpret_as<_Value_type>(_Mask._Ref()); if (((_Expected_bytes ^ _Observed_bytes) & _Mask_val) == 0) { @@ -391,13 +419,13 @@ void _Atomic_wait_direct( #endif // _HAS_CXX20 #if 1 // TRANSITION, ABI -inline void _Atomic_lock_spinlock(long& _Spinlock) noexcept { +inline void _Atomic_lock_acquire(long& _Spinlock) noexcept { while (_InterlockedExchange(&_Spinlock, 1)) { _YIELD_PROCESSOR(); } } -inline void _Atomic_unlock_spinlock(long& _Spinlock) noexcept { +inline void _Atomic_lock_release(long& _Spinlock) noexcept { #if defined(_M_ARM) || defined(_M_ARM64) _Memory_barrier(); __iso_volatile_store32(reinterpret_cast(&_Spinlock), 0); @@ -407,32 +435,56 @@ inline void _Atomic_unlock_spinlock(long& _Spinlock) noexcept { #endif // hardware } -class _Spinlock_guard { + +inline void _Atomic_lock_acquire(_Smtx_t* _Spinlock) noexcept { + _Smtx_lock_exclusive(_Spinlock); +} + +inline void _Atomic_lock_release(_Smtx_t* _Spinlock) noexcept { + _Smtx_unlock_exclusive(_Spinlock); +} + +template +class _Atomic_lock_guard { public: - explicit _Spinlock_guard(long& _Spinlock_) noexcept : _Spinlock(_Spinlock_) { - _Atomic_lock_spinlock(_Spinlock); + explicit _Atomic_lock_guard(_Spinlock_t& _Spinlock_) noexcept : _Spinlock(_Spinlock_) { + _Atomic_lock_acquire(_Spinlock); } - ~_Spinlock_guard() { - _Atomic_unlock_spinlock(_Spinlock); + ~_Atomic_lock_guard() { + _Atomic_lock_release(_Spinlock); } - _Spinlock_guard(const _Spinlock_guard&) = delete; - _Spinlock_guard& operator=(const _Spinlock_guard&) = delete; + _Atomic_lock_guard(const _Atomic_lock_guard&) = delete; + _Atomic_lock_guard& operator=(const _Atomic_lock_guard&) = delete; private: - long& _Spinlock; + _Spinlock_t& _Spinlock; }; #if _HAS_CXX20 -inline bool __stdcall _Atomic_wait_compare_non_lock_free( +template +bool __stdcall _Atomic_wait_compare_non_lock_free( const void* _Storage, void* _Comparand, size_t _Size, void* _Spinlock_raw) noexcept { - long& _Spinlock = *static_cast(_Spinlock_raw); - _Atomic_lock_spinlock(_Spinlock); + _Spinlock_t& _Spinlock = *static_cast<_Spinlock_t*>(_Spinlock_raw); + _Atomic_lock_acquire(_Spinlock); const auto _Cmp_result = _CSTD memcmp(_Storage, _Comparand, _Size); - _Atomic_unlock_spinlock(_Spinlock); + _Atomic_lock_release(_Spinlock); return _Cmp_result == 0; } + +#ifdef _WIN64 +inline bool __stdcall _Atomic_wait_compare_16_bytes(const void* _Storage, void* _Comparand, size_t, void*) noexcept { + const auto _Dest = static_cast(const_cast(_Storage)); + const auto _Cmp = static_cast(_Comparand); + alignas(16) long long _Tmp[2] = {_Cmp[0], _Cmp[1]}; +#ifdef _M_X64 + return _STD_COMPARE_EXCHANGE_128(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; +#else // ^^^ _M_X64 / ARM64 vvv + return _InterlockedCompareExchange128_nf(_Dest, _Tmp[1], _Tmp[0], _Tmp) != 0; +#endif // ^^^ ARM64 ^^^ +} +#endif // _WIN64 #endif // _HAS_CXX20 #endif // TRANSITION, ABI @@ -441,40 +493,41 @@ struct _Atomic_storage { // Provides operations common to all specializations of std::atomic, load, store, exchange, and CAS. // Locking version used when hardware has no atomic operations for sizeof(_Ty). + using _TVal = remove_reference_t<_Ty>; + using _Guard = _Atomic_lock_guard::_Spinlock>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage(_Value) { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage(_Value) { // non-atomically initialize this atomic } - void store(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + void store(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // store with sequential consistency _Check_store_memory_order(_Order); - _Lock(); + _Guard _Lock{_Spinlock}; _Storage = _Value; - _Unlock(); } - _NODISCARD _Ty load(const memory_order _Order = memory_order_seq_cst) const noexcept { + _NODISCARD _TVal load(const memory_order _Order = memory_order_seq_cst) const noexcept { // load with sequential consistency _Check_load_memory_order(_Order); - _Lock(); - _Ty _Local(_Storage); - _Unlock(); + _Guard _Lock{_Spinlock}; + _TVal _Local(_Storage); return _Local; } - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange _Value with _Storage with sequential consistency _Check_memory_order(_Order); - _Lock(); - _Ty _Result(_Storage); + _Guard _Lock{_Spinlock}; + _TVal _Result(_Storage); _Storage = _Value; - _Unlock(); return _Result; } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with sequential consistency, plain _Check_memory_order(_Order); const auto _Storage_ptr = _STD addressof(_Storage); @@ -483,50 +536,49 @@ struct _Atomic_storage { #if _CMPXCHG_MASK_OUT_PADDING_BITS __builtin_zero_non_value_bits(_Expected_ptr); #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _Lock(); + _Guard _Lock{_Spinlock}; #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Local; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Local; const auto _Local_ptr = _Local._Ptr(); - _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_Ty)); + _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_TVal)); __builtin_zero_non_value_bits(_Local_ptr); - _Result = _CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_Ty)) == 0; + _Result = _CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_TVal)) == 0; } else { - _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0; + _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_TVal)) == 0; } #else // _CMPXCHG_MASK_OUT_PADDING_BITS - _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0; + _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_TVal)) == 0; #endif // _CMPXCHG_MASK_OUT_PADDING_BITS if (_Result) { - _CSTD memcpy(_Storage_ptr, _STD addressof(_Desired), sizeof(_Ty)); + _CSTD memcpy(_Storage_ptr, _STD addressof(_Desired), sizeof(_TVal)); } else { - _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_Ty)); + _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_TVal)); } - _Unlock(); return _Result; } #if _HAS_CXX20 - void wait(_Ty _Expected, memory_order = memory_order_seq_cst) const noexcept { + void wait(_TVal _Expected, memory_order = memory_order_seq_cst) const noexcept { const auto _Storage_ptr = _STD addressof(_Storage); const auto _Expected_ptr = _STD addressof(_Expected); for (;;) { { - _Spinlock_guard _Lock{_Spinlock}; - if (_CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) != 0) { + _Guard _Lock{_Spinlock}; + if (_CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_TVal)) != 0) { // contents differed, we might be done, check for padding #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Local; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Local; const auto _Local_ptr = _Local._Ptr(); - _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_Ty)); + _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_TVal)); __builtin_zero_non_value_bits(_Local_ptr); __builtin_zero_non_value_bits(_Expected_ptr); - if (_CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_Ty)) == 0) { + if (_CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_TVal)) == 0) { // _Storage differs from _Expected only by padding; copy the padding from _Storage into // _Expected - _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_Ty)); + _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_TVal)); } else { // truly different, we're done return; @@ -539,8 +591,8 @@ struct _Atomic_storage { } } // unlock - __std_atomic_wait_indirect(_Storage_ptr, _Expected_ptr, sizeof(_Ty), &_Spinlock, - &_Atomic_wait_compare_non_lock_free, _Atomic_wait_no_timeout); + __std_atomic_wait_indirect(_Storage_ptr, _Expected_ptr, sizeof(_TVal), &_Spinlock, + &_Atomic_wait_compare_non_lock_free, _Atomic_wait_no_timeout); } } @@ -554,43 +606,37 @@ struct _Atomic_storage { #endif // _HAS_CXX20 #if 1 // TRANSITION, ABI - void _Lock() const noexcept { // lock the spinlock - _Atomic_lock_spinlock(_Spinlock); - } - - void _Unlock() const noexcept { // unlock the spinlock - _Atomic_unlock_spinlock(_Spinlock); +protected: + void _Init_spinlock_for_ref() noexcept { + _Spinlock = __std_atomic_get_mutex(_STD addressof(_Storage)); } private: - mutable long _Spinlock = 0; + // Spinlock integer for non-lock-free atomic. mutex pointer for non-lock-free atomic_ref + mutable typename _Atomic_storage_types<_Ty>::_Spinlock _Spinlock{}; public: _Ty _Storage{}; #else // ^^^ don't break ABI / break ABI vvv - void _Lock() const noexcept { // lock the spinlock - _Smtx_lock_exclusive(&_Spinlock); - } - - void _Unlock() const noexcept { // unlock the spinlock - _Smtx_unlock_exclusive(&_Spinlock); - } - _Ty _Storage; - mutable _Smtx_t _Spinlock = 0; + mutable _Smtx_t _Mutex{}; #endif // TRANSITION, ABI }; template struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics + + using _TVal = remove_reference_t<_Ty>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage{_Value} { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage{_Value} { // non-atomically initialize this atomic } - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); #if defined(_M_ARM) || defined(_M_ARM64) @@ -602,7 +648,7 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics #endif // hardware } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); switch (_Order) { @@ -625,36 +671,36 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics } } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); char _As_bytes = __iso_volatile_load8(_Mem); _Compiler_or_memory_barrier(); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); char _As_bytes = __iso_volatile_load8(_Mem); _Load_barrier(_Order); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order char _As_bytes; _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange8, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order char _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation char _Prev_bytes; #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; const char _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); for (;;) { @@ -683,7 +729,7 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics } #if _HAS_CXX20 - void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + void wait(const _TVal _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); } @@ -696,18 +742,22 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics } #endif // _HAS_CXX20 - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty>::_TStorage _Storage; }; template struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics + + using _TVal = remove_reference_t<_Ty>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage{_Value} { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage{_Value} { // non-atomically initialize this atomic } - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); #if defined(_M_ARM) || defined(_M_ARM64) @@ -719,7 +769,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics #endif // hardware } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); switch (_Order) { @@ -742,35 +792,35 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); short _As_bytes = __iso_volatile_load16(_Mem); _Compiler_or_memory_barrier(); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); short _As_bytes = __iso_volatile_load16(_Mem); _Load_barrier(_Order); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order short _As_bytes; _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange16, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order short _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation short _Prev_bytes; #if _CMPXCHG_MASK_OUT_PADDING_BITS if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + _Storage_for<_TVal> _Mask{_Form_mask}; const short _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); for (;;) { @@ -781,7 +831,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); @@ -799,7 +849,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } #if _HAS_CXX20 - void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + void wait(const _TVal _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); } @@ -812,18 +862,22 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } #endif // _HAS_CXX20 - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty>::_TStorage _Storage; }; template struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics + + using _TVal = remove_reference_t<_Ty>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage{_Value} { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage{_Value} { // non-atomically initialize this atomic } - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency #if defined(_M_ARM) || defined(_M_ARM64) _Memory_barrier(); __iso_volatile_store32(_Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); @@ -833,7 +887,7 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics #endif // hardware } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const int _As_bytes = _Atomic_reinterpret_as(_Value); switch (_Order) { @@ -856,35 +910,35 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); auto _As_bytes = __iso_volatile_load32(_Mem); _Compiler_or_memory_barrier(); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); auto _As_bytes = __iso_volatile_load32(_Mem); _Load_barrier(_Order); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long _As_bytes; _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order long _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation long _Prev_bytes; #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; const long _Mask_val = _Atomic_reinterpret_as(_Mask); for (;;) { @@ -895,7 +949,7 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); @@ -908,12 +962,12 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics return true; } - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } #if _HAS_CXX20 - void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + void wait(const _TVal _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); } @@ -926,18 +980,22 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } #endif // _HAS_CXX20 - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty>::_TStorage _Storage; }; template struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics + + using _TVal = remove_reference_t<_Ty>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept : _Storage{_Value} { + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Storage{_Value} { // non-atomically initialize this atomic } - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); #if defined(_M_IX86) @@ -953,7 +1011,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics #endif // _M_ARM64 } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); switch (_Order) { @@ -976,7 +1034,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics } } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); long long _As_bytes; #ifdef _M_ARM @@ -986,10 +1044,10 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics _As_bytes = __iso_volatile_load64(_Mem); _Compiler_or_memory_barrier(); #endif - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); #ifdef _M_ARM long long _As_bytes = __ldrexd(_Mem); @@ -997,36 +1055,36 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics long long _As_bytes = __iso_volatile_load64(_Mem); #endif _Load_barrier(_Order); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } #if defined(_M_IX86) && defined(__clang__) // TRANSITION, LLVM-46595 - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with (effectively) sequential consistency - _Ty _Temp{load()}; + _TVal _Temp{load()}; while (!compare_exchange_strong(_Temp, _Value, _Order)) { // keep trying } return _Temp; } #else // ^^^ defined(_M_IX86) && defined(__clang__), LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv - _Ty exchange(const _Ty _Value, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange64, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - return reinterpret_cast<_Ty&>(_As_bytes); + return reinterpret_cast<_TVal&>(_As_bytes); } #endif // ^^^ !defined(_M_IX86) || !defined(__clang__) ^^^ - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order long long _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation long long _Prev_bytes; #if _CMPXCHG_MASK_OUT_PADDING_BITS - if constexpr (_Might_have_non_value_bits<_Ty>) { - _Storage_for<_Ty> _Mask{_Form_mask}; + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; const long long _Mask_val = _Atomic_reinterpret_as(_Mask); for (;;) { @@ -1038,7 +1096,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics } if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); @@ -1051,12 +1109,12 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return true; } - _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_TVal)); return false; } #if _HAS_CXX20 - void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + void wait(const _TVal _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); } @@ -1069,35 +1127,37 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics } #endif // _HAS_CXX20 - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty>::_TStorage _Storage; }; -#if 0 // TRANSITION, ABI -#if defined(_M_X64) || defined(_M_ARM64) +#ifdef _WIN64 template -struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics +struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics + // TRANSITION, ABI: replace '_Ty&' with '_Ty' in this specialization + using _TVal = remove_reference_t<_Ty&>; + _Atomic_storage() = default; - /* implicit */ constexpr _Atomic_storage(const _Ty _Value) noexcept + /* implicit */ constexpr _Atomic_storage(conditional_t, _Ty&, const _TVal> _Value) noexcept : _Storage{_Value} {} // non-atomically initialize this atomic - void store(const _Ty _Value) noexcept { // store with sequential consistency + void store(const _TVal _Value) noexcept { // store with sequential consistency (void) exchange(_Value); } - void store(const _Ty _Value, const memory_order _Order) noexcept { // store with given memory order + void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order _Check_store_memory_order(_Order); (void) exchange(_Value, _Order); } - _NODISCARD _Ty load() const noexcept { // load with sequential consistency + _NODISCARD _TVal load() const noexcept { // load with sequential consistency long long* const _Storage_ptr = const_cast(_Atomic_address_as(_Storage)); _Int128 _Result{}; // atomic CAS 0 with 0 (void) _STD_COMPARE_EXCHANGE_128(_Storage_ptr, 0, 0, &_Result._Low); - return reinterpret_cast<_Ty&>(_Result); + return reinterpret_cast<_TVal&>(_Result); } - _NODISCARD _Ty load(const memory_order _Order) const noexcept { // load with given memory order + _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order #ifdef _M_ARM64 long long* const _Storage_ptr = const_cast(_Atomic_address_as(_Storage)); _Int128 _Result{}; // atomic CAS 0 with 0 @@ -1119,36 +1179,71 @@ struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics break; } - return reinterpret_cast<_Ty&>(_Result); + return reinterpret_cast<_TVal&>(_Result); #else // ^^^ _M_ARM64 / _M_X64 vvv _Check_load_memory_order(_Order); return load(); #endif // _M_ARM64 } - _Ty exchange(const _Ty _Value) noexcept { // exchange with sequential consistency - _Ty _Result{_Value}; + _TVal exchange(const _TVal _Value) noexcept { // exchange with sequential consistency + _TVal _Result{_Value}; while (!compare_exchange_strong(_Result, _Value)) { // keep trying } return _Result; } - _Ty exchange(const _Ty _Value, const memory_order _Order) noexcept { // exchange with given memory order - _Ty _Result{_Value}; + _TVal exchange(const _TVal _Value, const memory_order _Order) noexcept { // exchange with given memory order + _TVal _Result{_Value}; while (!compare_exchange_strong(_Result, _Value, _Order)) { // keep trying } return _Result; } - bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, + bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order _Int128 _Desired_bytes{}; - _CSTD memcpy(&_Desired_bytes, _STD addressof(_Desired), sizeof(_Ty)); + _CSTD memcpy(&_Desired_bytes, _STD addressof(_Desired), sizeof(_TVal)); _Int128 _Expected_temp{}; - _CSTD memcpy(&_Expected_temp, _STD addressof(_Expected), sizeof(_Ty)); + _CSTD memcpy(&_Expected_temp, _STD addressof(_Expected), sizeof(_TVal)); unsigned char _Result; +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Int128 _Expected_originally{}; + _CSTD memcpy(&_Expected_originally, _STD addressof(_Expected), sizeof(_TVal)); + + _Storage_for<_TVal> _Mask{_Form_mask}; + _Int128 _Mask_val{}; + _CSTD memcpy(&_Mask_val, _Mask._Ptr(), sizeof(_TVal)); + for (;;) { +#ifdef _M_ARM64 + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedCompareExchange128, + _Atomic_address_as(_Storage), _Desired_bytes._High, _Desired_bytes._Low, + &_Expected_temp._Low); +#else // ^^^ _M_ARM64 / _M_X64 vvv + (void) _Order; + _Result = _STD_COMPARE_EXCHANGE_128(&reinterpret_cast(_Storage), _Desired_bytes._High, + _Desired_bytes._Low, &_Expected_temp._Low); +#endif // _M_ARM64 + if (_Result) { + return true; + } + + if (((_Expected_temp._Low ^ _Expected_originally._Low) & _Mask_val._Low) != 0 + || ((_Expected_temp._High ^ _Expected_originally._High) & _Mask_val._High) != 0) { + _CSTD memcpy(_STD addressof(_Expected), &_Expected_temp, sizeof(_TVal)); + return false; + } + + _Expected_temp._Low = + (_Expected_originally._Low & _Mask_val._Low) | (_Expected_temp._Low & ~_Mask_val._Low); + _Expected_temp._High = + (_Expected_originally._High & _Mask_val._High) | (_Expected_temp._High & ~_Mask_val._High); + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS #ifdef _M_ARM64 _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedCompareExchange128, _Atomic_address_as(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); @@ -1158,13 +1253,43 @@ struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics &reinterpret_cast(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #endif // _M_ARM64 if (_Result == 0) { - _CSTD memcpy(_STD addressof(_Expected), &_Expected_temp, sizeof(_Ty)); + _CSTD memcpy(_STD addressof(_Expected), &_Expected_temp, sizeof(_TVal)); } return _Result != 0; } #if _HAS_CXX20 + void wait(_TVal _Expected, memory_order _Order = memory_order_seq_cst) const noexcept { + const auto _Storage_ptr = _STD addressof(_Storage); + const auto _Expected_ptr = _STD addressof(_Expected); + _Int128 _Expected_bytes = reinterpret_cast(_Expected); + + for (;;) { + const _TVal _Observed = load(_Order); + _Int128 _Observed_bytes = reinterpret_cast(_Observed); + if (_Observed_bytes._Low != _Expected_bytes._Low || _Observed_bytes._High != _Expected_bytes._High) { +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_TVal>) { + _Storage_for<_TVal> _Mask{_Form_mask}; + const _Int128 _Mask_val = reinterpret_cast(_Mask._Ref()); + + if (((_Expected_bytes._Low ^ _Observed_bytes._Low) & _Mask_val._Low) == 0 + && ((_Expected_bytes._High ^ _Observed_bytes._High) & _Mask_val._High) == 0) { + _Expected_bytes = _Observed_bytes; + continue; + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS + + return; + } + + __std_atomic_wait_indirect(_Storage_ptr, _Expected_ptr, sizeof(_TVal), nullptr, + &_Atomic_wait_compare_16_bytes, _Atomic_wait_no_timeout); + } + } + void notify_one() noexcept { __std_atomic_notify_one_indirect(_STD addressof(_Storage)); } @@ -1179,10 +1304,9 @@ struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics long long _High; }; - _Atomic_padded<_Ty> _Storage; + typename _Atomic_storage_types<_Ty&>::_TStorage _Storage; }; -#endif // defined(_M_X64) || defined(_M_ARM64) -#endif // TRANSITION, ABI +#endif // _WIN64 // STRUCT TEMPLATE _Atomic_integral template @@ -1191,302 +1315,310 @@ struct _Atomic_integral; // not defined template struct _Atomic_integral<_Ty, 1> : _Atomic_storage<_Ty> { // atomic integral operations using 1-byte intrinsics using _Base = _Atomic_storage<_Ty>; + using typename _Base::_TVal; #ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 _Atomic_integral() = default; - /* implicit */ constexpr _Atomic_integral(const _Ty _Value) noexcept : _Base(_Value) {} + /* implicit */ constexpr _Atomic_integral(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Base(_Value) {} #else // ^^^ workaround / no workaround vvv using _Base::_Base; #endif // ^^^ no workaround ^^^ - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedAnd8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedOr8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedXor8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty operator++(int) noexcept { - return static_cast<_Ty>(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), 1)); + _TVal operator++(int) noexcept { + return static_cast<_TVal>(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), 1)); } - _Ty operator++() noexcept { + _TVal operator++() noexcept { unsigned char _Before = static_cast(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), 1)); ++_Before; - return static_cast<_Ty>(_Before); + return static_cast<_TVal>(_Before); } - _Ty operator--(int) noexcept { + _TVal operator--(int) noexcept { return static_cast<_Ty>(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), -1)); } - _Ty operator--() noexcept { + _TVal operator--() noexcept { unsigned char _Before = static_cast(_InterlockedExchangeAdd8(_Atomic_address_as(this->_Storage), -1)); --_Before; - return static_cast<_Ty>(_Before); + return static_cast<_TVal>(_Before); } }; template struct _Atomic_integral<_Ty, 2> : _Atomic_storage<_Ty> { // atomic integral operations using 2-byte intrinsics using _Base = _Atomic_storage<_Ty>; + using typename _Base::_TVal; #ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 _Atomic_integral() = default; - /* implicit */ constexpr _Atomic_integral(const _Ty _Value) noexcept : _Base(_Value) {} + /* implicit */ constexpr _Atomic_integral(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Base(_Value) {} #else // ^^^ workaround / no workaround vvv using _Base::_Base; #endif // ^^^ no workaround ^^^ - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedAnd16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedOr16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedXor16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty operator++(int) noexcept { + _TVal operator++(int) noexcept { unsigned short _After = static_cast(_InterlockedIncrement16(_Atomic_address_as(this->_Storage))); --_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator++() noexcept { - return static_cast<_Ty>(_InterlockedIncrement16(_Atomic_address_as(this->_Storage))); + _TVal operator++() noexcept { + return static_cast<_TVal>(_InterlockedIncrement16(_Atomic_address_as(this->_Storage))); } - _Ty operator--(int) noexcept { + _TVal operator--(int) noexcept { unsigned short _After = static_cast(_InterlockedDecrement16(_Atomic_address_as(this->_Storage))); ++_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator--() noexcept { - return static_cast<_Ty>(_InterlockedDecrement16(_Atomic_address_as(this->_Storage))); + _TVal operator--() noexcept { + return static_cast<_TVal>(_InterlockedDecrement16(_Atomic_address_as(this->_Storage))); } }; template struct _Atomic_integral<_Ty, 4> : _Atomic_storage<_Ty> { // atomic integral operations using 4-byte intrinsics using _Base = _Atomic_storage<_Ty>; + using typename _Base::_TVal; #ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 _Atomic_integral() = default; - /* implicit */ constexpr _Atomic_integral(const _Ty _Value) noexcept : _Base(_Value) {} + /* implicit */ constexpr _Atomic_integral(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Base(_Value) {} #else // ^^^ workaround / no workaround vvv using _Base::_Base; #endif // ^^^ no workaround ^^^ - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedAnd, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedOr, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; _ATOMIC_CHOOSE_INTRINSIC( _Order, _Result, _InterlockedXor, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty operator++(int) noexcept { + _TVal operator++(int) noexcept { unsigned long _After = static_cast(_InterlockedIncrement(_Atomic_address_as(this->_Storage))); --_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator++() noexcept { - return static_cast<_Ty>(_InterlockedIncrement(_Atomic_address_as(this->_Storage))); + _TVal operator++() noexcept { + return static_cast<_TVal>(_InterlockedIncrement(_Atomic_address_as(this->_Storage))); } - _Ty operator--(int) noexcept { + _TVal operator--(int) noexcept { unsigned long _After = static_cast(_InterlockedDecrement(_Atomic_address_as(this->_Storage))); ++_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator--() noexcept { - return static_cast<_Ty>(_InterlockedDecrement(_Atomic_address_as(this->_Storage))); + _TVal operator--() noexcept { + return static_cast<_TVal>(_InterlockedDecrement(_Atomic_address_as(this->_Storage))); } }; template struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral operations using 8-byte intrinsics using _Base = _Atomic_storage<_Ty>; + using typename _Base::_TVal; #ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 _Atomic_integral() = default; - /* implicit */ constexpr _Atomic_integral(const _Ty _Value) noexcept : _Base(_Value) {} + /* implicit */ constexpr _Atomic_integral(conditional_t, _Ty, const _TVal> _Value) noexcept + : _Base(_Value) {} #else // ^^^ workaround / no workaround vvv using _Base::_Base; #endif // ^^^ no workaround ^^^ #if defined(_M_IX86) && defined(__clang__) // TRANSITION, LLVM-46595 - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency - _Ty _Temp{this->load()}; + _TVal _Temp{this->load()}; while (!this->compare_exchange_strong(_Temp, _Temp + _Operand, _Order)) { // keep trying } return _Temp; } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency - _Ty _Temp{this->load()}; + _TVal _Temp{this->load()}; while (!this->compare_exchange_strong(_Temp, _Temp & _Operand, _Order)) { // keep trying } return _Temp; } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency - _Ty _Temp{this->load()}; + _TVal _Temp{this->load()}; while (!this->compare_exchange_strong(_Temp, _Temp | _Operand, _Order)) { // keep trying } return _Temp; } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { // effectively sequential consistency - _Ty _Temp{this->load()}; + _TVal _Temp{this->load()}; while (!this->compare_exchange_strong(_Temp, _Temp ^ _Operand, _Order)) { // keep trying } return _Temp; } - _Ty operator++(int) noexcept { - return fetch_add(static_cast<_Ty>(1)); + _TVal operator++(int) noexcept { + return fetch_add(static_cast<_TVal>(1)); } - _Ty operator++() noexcept { - return fetch_add(static_cast<_Ty>(1)) + static_cast<_Ty>(1); + _TVal operator++() noexcept { + return fetch_add(static_cast<_TVal>(1)) + static_cast<_TVal>(1); } - _Ty operator--(int) noexcept { - return fetch_add(static_cast<_Ty>(-1)); + _TVal operator--(int) noexcept { + return fetch_add(static_cast<_TVal>(-1)); } - _Ty operator--() noexcept { - return fetch_add(static_cast<_Ty>(-1)) - static_cast<_Ty>(1); + _TVal operator--() noexcept { + return fetch_add(static_cast<_TVal>(-1)) - static_cast<_TVal>(1); } #else // ^^^ defined(_M_IX86) && defined(__clang__), LLVM-46595 / !defined(_M_IX86) || !defined(__clang__) vvv - _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_and(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedAnd64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_or(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedOr64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty fetch_xor(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { + _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedXor64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); - return static_cast<_Ty>(_Result); + return static_cast<_TVal>(_Result); } - _Ty operator++(int) noexcept { + _TVal operator++(int) noexcept { unsigned long long _After = static_cast(_InterlockedIncrement64(_Atomic_address_as(this->_Storage))); --_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator++() noexcept { - return static_cast<_Ty>(_InterlockedIncrement64(_Atomic_address_as(this->_Storage))); + _TVal operator++() noexcept { + return static_cast<_TVal>(_InterlockedIncrement64(_Atomic_address_as(this->_Storage))); } - _Ty operator--(int) noexcept { + _TVal operator--(int) noexcept { unsigned long long _After = static_cast(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); ++_After; - return static_cast<_Ty>(_After); + return static_cast<_TVal>(_After); } - _Ty operator--() noexcept { - return static_cast<_Ty>(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); + _TVal operator--() noexcept { + return static_cast<_TVal>(_InterlockedDecrement64(_Atomic_address_as(this->_Storage))); } #endif // ^^^ !defined(_M_IX86) || !defined(__clang__) ^^^ }; @@ -1644,6 +1776,85 @@ struct _Atomic_integral_facade : _Atomic_integral<_Ty> { } }; +// STRUCT TEMPLATE _Atomic_integral_facade +template +struct _Atomic_integral_facade<_Ty&> : _Atomic_integral<_Ty&> { + // provides operator overloads and other support for atomic integral specializations + using _Base = _Atomic_integral<_Ty&>; + using difference_type = _Ty; + +#ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 + _Atomic_integral_facade() = default; + /* implicit */ constexpr _Atomic_integral_facade(_Ty& _Value) noexcept : _Base(_Value) {} +#else // ^^^ workaround / no workaround vvv + using _Base::_Base; +#endif // ^^^ no workaround ^^^ + + _NODISCARD static _Ty _Negate(const _Ty _Value) noexcept { // returns two's complement negated value of _Value + return static_cast<_Ty>(0U - static_cast>(_Value)); + } + + _Ty fetch_add(const _Ty _Operand) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_add(_Operand); + } + + _Ty fetch_add(const _Ty _Operand, const memory_order _Order) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_add(_Operand, _Order); + } + + _Ty fetch_sub(const _Ty _Operand) const noexcept { + return fetch_add(_Negate(_Operand)); + } + + _Ty fetch_sub(const _Ty _Operand, const memory_order _Order) const noexcept { + return fetch_add(_Negate(_Operand), _Order); + } + + _Ty operator+=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_add(_Operand) + _Operand); + } + + _Ty operator-=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_sub(_Operand) - _Operand); + } + + _Ty fetch_and(const _Ty _Operand) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_and(_Operand); + } + + _Ty fetch_and(const _Ty _Operand, const memory_order _Order) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_and(_Operand, _Order); + } + + _Ty fetch_or(const _Ty _Operand) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_or(_Operand); + } + + _Ty fetch_or(const _Ty _Operand, const memory_order _Order) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_or(_Operand, _Order); + } + + _Ty fetch_xor(const _Ty _Operand) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_xor(_Operand); + } + + _Ty fetch_xor(const _Ty _Operand, const memory_order _Order) const noexcept { + return const_cast<_Atomic_integral_facade*>(this)->_Base::fetch_xor(_Operand, _Order); + } + + _Ty operator&=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_and(_Operand) & _Operand); + } + + _Ty operator|=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_or(_Operand) | _Operand); + } + + _Ty operator^=(const _Ty _Operand) const noexcept { + return static_cast<_Ty>(fetch_xor(_Operand) ^ _Operand); + } +}; + #if _HAS_CXX20 template struct _Atomic_floating : _Atomic_storage<_Ty> { @@ -1703,6 +1914,46 @@ struct _Atomic_floating : _Atomic_storage<_Ty> { return const_cast<_Atomic_floating*>(this)->fetch_sub(_Operand) - _Operand; } }; + +template +struct _Atomic_floating<_Ty&> : _Atomic_storage<_Ty&> { + // provides atomic floating-point operations + using _Base = _Atomic_storage<_Ty&>; + using difference_type = _Ty; + +#ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 + _Atomic_floating() = default; + /* implicit */ constexpr _Atomic_floating(_Ty& _Value) noexcept : _Base(_Value) {} +#else // ^^^ workaround / no workaround vvv + using _Base::_Base; +#endif // ^^^ no workaround ^^^ + + _Ty fetch_add(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Ty _Temp{this->load(memory_order_relaxed)}; + while (!const_cast<_Atomic_floating*>(this)->_Base::compare_exchange_strong( + _Temp, _Temp + _Operand, _Order)) { // keep trying + } + + return _Temp; + } + + _Ty fetch_sub(const _Ty _Operand, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Ty _Temp{this->load(memory_order_relaxed)}; + while (!const_cast<_Atomic_floating*>(this)->_Base::compare_exchange_strong( + _Temp, _Temp - _Operand, _Order)) { // keep trying + } + + return _Temp; + } + + _Ty operator+=(const _Ty _Operand) const noexcept { + return fetch_add(_Operand) + _Operand; + } + + _Ty operator-=(const _Ty _Operand) const noexcept { + return fetch_sub(_Operand) - _Operand; + } +}; #endif // _HAS_CXX20 // STRUCT TEMPLATE _Atomic_pointer @@ -1807,23 +2058,85 @@ struct _Atomic_pointer : _Atomic_storage<_Ty> { } }; + +// STRUCT TEMPLATE _Atomic_pointer +template +struct _Atomic_pointer<_Ty&> : _Atomic_storage<_Ty&> { + using _Base = _Atomic_storage<_Ty&>; + using difference_type = ptrdiff_t; + +#ifdef __cplusplus_winrt // TRANSITION, VSO-1083296 + _Atomic_pointer() = default; + /* implicit */ constexpr _Atomic_pointer(_Ty& _Value) noexcept : _Base(_Value) {} +#else // ^^^ workaround / no workaround vvv + using _Base::_Base; +#endif // ^^^ no workaround ^^^ + + _Ty fetch_add(const ptrdiff_t _Diff, const memory_order _Order = memory_order_seq_cst) const noexcept { + const ptrdiff_t _Shift_bytes = + static_cast(static_cast(_Diff) * sizeof(remove_pointer_t<_Ty>)); + ptrdiff_t _Result; +#if defined(_M_IX86) || defined(_M_ARM) + _ATOMIC_CHOOSE_INTRINSIC( + _Order, _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), _Shift_bytes); +#else // ^^^ 32 bits / 64 bits vvv + _ATOMIC_CHOOSE_INTRINSIC( + _Order, _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), _Shift_bytes); +#endif // hardware + return reinterpret_cast<_Ty>(_Result); + } + + _Ty fetch_sub(const ptrdiff_t _Diff) const noexcept { + return fetch_add(static_cast(0 - static_cast(_Diff))); + } + + _Ty fetch_sub(const ptrdiff_t _Diff, const memory_order _Order) const noexcept { + return fetch_add(static_cast(0 - static_cast(_Diff)), _Order); + } + + _Ty operator++(int) const noexcept { + return fetch_add(1); + } + + _Ty operator++() const noexcept { + return fetch_add(1) + 1; + } + + _Ty operator--(int) const noexcept { + return fetch_add(-1); + } + + _Ty operator--() const noexcept { + return fetch_add(-1) - 1; + } + + _Ty operator+=(const ptrdiff_t _Diff) const noexcept { + return fetch_add(_Diff) + _Diff; + } + + _Ty operator-=(const ptrdiff_t _Diff) const noexcept { + return fetch_add(static_cast(0 - static_cast(_Diff))) - _Diff; + } +}; + + // STRUCT TEMPLATE atomic #define ATOMIC_VAR_INIT(_Value) \ { _Value } -template +template using _Choose_atomic_base2_t = - typename _Select && !is_same_v>::template _Apply<_Atomic_integral_facade<_Ty>, - typename _Select && is_object_v>>::template _Apply<_Atomic_pointer<_Ty>, - _Atomic_storage<_Ty>>>; + typename _Select && !is_same_v>::template _Apply<_Atomic_integral_facade<_Ty>, + typename _Select && is_object_v>>::template _Apply< + _Atomic_pointer<_Ty>, _Atomic_storage<_Ty>>>; #if _HAS_CXX20 -template -using _Choose_atomic_base_t = - typename _Select>::template _Apply<_Atomic_floating<_Ty>, _Choose_atomic_base2_t<_Ty>>; +template +using _Choose_atomic_base_t = typename _Select>::template _Apply<_Atomic_floating<_Ty>, + _Choose_atomic_base2_t<_TVal, _Ty>>; #else // ^^^ _HAS_CXX20 // !_HAS_CXX20 vvv -template -using _Choose_atomic_base_t = _Choose_atomic_base2_t<_Ty>; +template +using _Choose_atomic_base_t = _Choose_atomic_base2_t<_TVal, _Ty>; #endif //_HAS_CXX20 template @@ -2010,6 +2323,121 @@ template atomic(_Ty) -> atomic<_Ty>; #endif // _HAS_CXX17 +#if _HAS_CXX20 +template +struct atomic_ref : _Choose_atomic_base_t<_Ty, _Ty&> { // atomic reference +private: + using _Base = _Choose_atomic_base_t<_Ty, _Ty&>; + +public: + // clang-format off + static_assert(is_trivially_copyable_v<_Ty> && is_copy_constructible_v<_Ty> && is_move_constructible_v<_Ty> + && is_copy_assignable_v<_Ty> && is_move_assignable_v<_Ty>, + "atomic_ref requires T to be trivially copyable, copy constructible, move constructible, copy assignable, " + "and move assignable."); + // clang-format on + + using value_type = _Ty; + + explicit atomic_ref(_Ty& _Value) noexcept /* strengthened */ : _Base(_Value) { + if constexpr (_Is_potentially_lock_free) { + _Check_alignment(_Value); + } else { + this->_Init_spinlock_for_ref(); + } + } + + atomic_ref(const atomic_ref&) noexcept = default; + + atomic_ref& operator=(const atomic_ref&) = delete; + + static constexpr bool is_always_lock_free = _Is_always_lock_free; + + static constexpr bool _Is_potentially_lock_free = + sizeof(_Ty) <= 2 * sizeof(void*) && (sizeof(_Ty) & (sizeof(_Ty) - 1)) == 0; + + static constexpr size_t required_alignment = _Is_potentially_lock_free ? sizeof(_Ty) : alignof(_Ty); + + _NODISCARD bool is_lock_free() const noexcept { +#if _ATOMIC_HAS_DCAS + return is_always_lock_free; +#else // ^^^ _ATOMIC_HAS_DCAS / !_ATOMIC_HAS_DCAS vvv + if constexpr (is_always_lock_free) { + return true; + } else { + return __std_atomic_has_cmpxchg16b() != 0; + } +#endif // _ATOMIC_HAS_DCAS + } + + void store(const _Ty _Value) const noexcept { + const_cast(this)->_Base::store(_Value); + } + + void store(const _Ty _Value, const memory_order _Order) const noexcept { + const_cast(this)->_Base::store(_Value, _Order); + } + + _Ty operator=(const _Ty _Value) const noexcept { + store(_Value); + return _Value; + } + + _Ty exchange(const _Ty _Value) const noexcept { + return const_cast(this)->_Base::exchange(_Value); + } + + _Ty exchange(const _Ty _Value, const memory_order _Order) const noexcept { + return const_cast(this)->_Base::exchange(_Value, _Order); + } + + bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired) const noexcept { + return const_cast(this)->_Base::compare_exchange_strong(_Expected, _Desired); + } + + bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order) const noexcept { + return const_cast(this)->_Base::compare_exchange_strong(_Expected, _Desired, _Order); + } + + bool compare_exchange_strong( + _Ty& _Expected, const _Ty _Desired, const memory_order _Success, const memory_order _Failure) const noexcept { + return compare_exchange_strong(_Expected, _Desired, _Combine_cas_memory_orders(_Success, _Failure)); + } + + bool compare_exchange_weak(_Ty& _Expected, const _Ty _Desired) const noexcept { + return compare_exchange_strong(_Expected, _Desired); + } + + bool compare_exchange_weak(_Ty& _Expected, const _Ty _Desired, const memory_order _Order) const noexcept { + return compare_exchange_strong(_Expected, _Desired, _Order); + } + + bool compare_exchange_weak( + _Ty& _Expected, const _Ty _Desired, const memory_order _Success, const memory_order _Failure) const noexcept { + return compare_exchange_strong(_Expected, _Desired, _Combine_cas_memory_orders(_Success, _Failure)); + } + + operator _Ty() const noexcept { + return this->load(); + } + + void notify_one() const noexcept { + const_cast(this)->_Base::notify_one(); + } + + void notify_all() const noexcept { + const_cast(this)->_Base::notify_all(); + } + +private: + static void _Check_alignment([[maybe_unused]] const _Ty& _Value) { + _ATOMIC_REF_CHECK_ALIGNMENT( + (reinterpret_cast(_STD addressof(_Value)) & (required_alignment - 1)) == 0, + "atomic_ref underlying object is not aligned as required_alignment"); + } +}; +#endif // _HAS_CXX20 + // NONMEMBER OPERATIONS ON ATOMIC TYPES template _NODISCARD bool atomic_is_lock_free(const volatile atomic<_Ty>* _Mem) noexcept { diff --git a/stl/inc/xatomic.h b/stl/inc/xatomic.h index 8498f18c1a..467469cda4 100644 --- a/stl/inc/xatomic.h +++ b/stl/inc/xatomic.h @@ -11,6 +11,9 @@ #include #include +#if defined(_WIN64) && (_MSC_FULL_VER < 192829203) // TRANSITION +#include // Visual Studio 2019 to define 128-bit CAS in +#endif // defined(_WIN64) && (_MSC_FULL_VER < 192829203), TRANSITION #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) diff --git a/stl/inc/yvals.h b/stl/inc/yvals.h index 1a70aa17ea..7943b6c484 100644 --- a/stl/inc/yvals.h +++ b/stl/inc/yvals.h @@ -208,6 +208,21 @@ _STL_DISABLE_CLANG_WARNINGS #define _STL_INTERNAL_STATIC_ASSERT(...) #endif // _ENABLE_STL_INTERNAL_CHECK +#ifndef _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK +#ifdef _DEBUG +#define _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK 1 +#else // ^^^ _DEBUG ^^^ // vvv !_DEBUG vvv +#define _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK 0 +#endif // _DEBUG +#endif // _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK + +#if _ENABLE_ATOMIC_REF_ALIGNMENT_CHECK +#define _ATOMIC_REF_CHECK_ALIGNMENT(cond, mesg) _STL_VERIFY(cond, mesg) +#else +#define _ATOMIC_REF_CHECK_ALIGNMENT(cond, mesg) _Analysis_assume_(cond) +#endif + + #include #define _WARNING_MESSAGE(NUMBER, MESSAGE) __FILE__ "(" _CRT_STRINGIZE(__LINE__) "): warning " NUMBER ": " MESSAGE @@ -306,6 +321,22 @@ _STL_DISABLE_CLANG_WARNINGS #define _LOCK_DEBUG 3 #define _LOCK_AT_THREAD_EXIT 4 +#ifndef _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B +#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WINBLUE && defined(_WIN64) +#define _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B 1 +#else // ^^^ modern 64-bit // less modern or 32-bit vvv +#define _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B 0 +#endif // _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WINBLUE && defined(_WIN64) +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B + +#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 && defined(_M_ARM64) +#error ARM64 requires _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B to be 1. +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 && defined(_M_ARM64) + +#if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 && !defined(_WIN64) +#error _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 requires 64-bit. +#endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 && !defined(_WIN64) + #ifdef __cplusplus _STD_BEGIN enum _Uninitialized { // tag for suppressing initialization diff --git a/stl/inc/yvals_core.h b/stl/inc/yvals_core.h index 3239d8eb25..db211c25b7 100644 --- a/stl/inc/yvals_core.h +++ b/stl/inc/yvals_core.h @@ -131,6 +131,7 @@ // Other C++17 deprecation warnings // _HAS_CXX20 directly controls: +// P0019R8 atomic_ref // P0020R6 atomic, atomic, atomic // P0122R7 // P0202R3 constexpr For And exchange() @@ -147,6 +148,7 @@ // P0482R6 Library Support For char8_t // (mbrtoc8 and c8rtomb not yet implemented) // P0487R1 Fixing operator>>(basic_istream&, CharT*) +// P0528R3 Atomic Compare-And-Exchange With Padding Bits // P0550R2 remove_cvref // P0553R4 Rotating And Counting Functions // P0556R3 Integral Power-Of-2 Operations (renamed by P1956R1) @@ -182,6 +184,9 @@ // (except the std::invoke function which is implemented in C++17) // P1085R2 Removing span Comparisons // P1115R3 erase()/erase_if() Return size_type +// P1123R0 Atomic Compare-And-Exchange With Padding Bits For atomic_ref +// P1135R6 The C++20 Synchronization Library +// (partially implemented) // P1207R4 Movability Of Single-Pass Iterators // (partially implemented) // P1209R0 erase_if(), erase() @@ -207,6 +212,7 @@ // P1907R2 ranges::ssize // P1956R1 has_single_bit(), bit_ceil(), bit_floor(), bit_width() // P1959R0 Removing weak_equality And strong_equality +// P1960R0 atomic_ref Cleanup // P1964R2 Replacing boolean With boolean-testable // P1976R2 Explicit Constructors For Fixed-Extent span From Dynamic-Extent Ranges // P2091R0 Fixing Issues With Range Access CPOs @@ -1137,6 +1143,7 @@ #define __cpp_lib_atomic_flag_test 201907L #define __cpp_lib_atomic_float 201711L #define __cpp_lib_atomic_lock_free_type_aliases 201907L +#define __cpp_lib_atomic_ref 201806L #define __cpp_lib_atomic_shared_ptr 201711L #define __cpp_lib_atomic_wait 201907L #define __cpp_lib_bind_front 201907L @@ -1262,13 +1269,18 @@ compiler option, or define _ALLOW_RTCc_IN_STL to acknowledge that you have recei #error In yvals_core.h, defined(MRTDLL) implies defined(_M_CEE_PURE); !defined(_M_CEE_PURE) implies !defined(MRTDLL) #endif // defined(MRTDLL) && !defined(_M_CEE_PURE) -#define _STL_WIN32_WINNT_WINXP 0x0501 // _WIN32_WINNT_WINXP from sdkddkver.h -#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h -#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h +#define _STL_WIN32_WINNT_WINXP 0x0501 // _WIN32_WINNT_WINXP from sdkddkver.h +#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h +#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h +#define _STL_WIN32_WINNT_WINBLUE 0x0603 // _WIN32_WINNT_WINBLUE from sdkddkver.h +#define _STL_WIN32_WINNT_WIN10 0x0A00 // _WIN32_WINNT_WIN10 from sdkddkver.h // Note that the STL DLL builds will set this to XP for ABI compatibility with VS2015 which supported XP. #ifndef _STL_WIN32_WINNT -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_ONECORE) || defined(_CRT_APP) +#if defined(_M_ARM64) +// The first ARM64 Windows was Windows 10 +#define _STL_WIN32_WINNT _STL_WIN32_WINNT_WIN10 +#elif defined(_M_ARM) || defined(_ONECORE) || defined(_CRT_APP) // The first ARM or OneCore or App Windows was Windows 8 #define _STL_WIN32_WINNT _STL_WIN32_WINNT_WIN8 #else // ^^^ default to Win8 // default to Vista vvv diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index 0a45b12aee..9e6c9b19e2 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -198,6 +198,24 @@ namespace { } } #endif // _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE + + _NODISCARD unsigned char __std_atomic_compare_exchange_128_fallback(_Inout_bytecount_(16) long long* _Destination, + _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, + _Inout_bytecount_(16) long long* _ComparandResult) noexcept { + static SRWLOCK _Mtx = SRWLOCK_INIT; + _SrwLock_guard _Guard{_Mtx}; + if (_Destination[0] == _ComparandResult[0] && _Destination[1] == _ComparandResult[1]) { + _ComparandResult[0] = _Destination[0]; + _ComparandResult[1] = _Destination[1]; + _Destination[0] = _ExchangeLow; + _Destination[1] = _ExchangeHigh; + return static_cast(true); + } else { + _ComparandResult[0] = _Destination[0]; + _ComparandResult[1] = _Destination[1]; + return static_cast(false); + } + } } // unnamed namespace @@ -339,4 +357,62 @@ __std_atomic_api_level __stdcall __std_atomic_set_api_level(__std_atomic_api_lev return _Acquire_wait_functions(); #endif // !_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE } + +#pragma warning(push) +#pragma warning(disable : 4324) // structure was padded due to alignment specifier +_Smtx_t* __stdcall __std_atomic_get_mutex(const void* const _Key) noexcept { + constexpr size_t _Table_size_power = 8; + constexpr size_t _Table_size = 1 << _Table_size_power; + constexpr size_t _Table_index_mask = _Table_size - 1; + + struct alignas(std::hardware_destructive_interference_size) _Table_entry { + _Smtx_t _Mutex; + }; + + static _Table_entry _Table[_Table_size]{}; + + auto _Index = reinterpret_cast(_Key); + _Index ^= _Index >> (_Table_size_power * 2); + _Index ^= _Index >> _Table_size_power; + return &_Table[_Index & _Table_index_mask]._Mutex; +} +#pragma warning(pop) + +_NODISCARD unsigned char __stdcall __std_atomic_compare_exchange_128(_Inout_bytecount_(16) long long* _Destination, + _In_ long long _ExchangeHigh, _In_ long long _ExchangeLow, + _Inout_bytecount_(16) long long* _ComparandResult) noexcept { +#if !defined(_WIN64) + return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); +#elif _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 + return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); +#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv + if (__std_atomic_has_cmpxchg16b()) { + return _InterlockedCompareExchange128(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); + } + + return __std_atomic_compare_exchange_128_fallback(_Destination, _ExchangeHigh, _ExchangeLow, _ComparandResult); +#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 +} + +_NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept { +#if !defined(_WIN64) + return false; +#elif _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 + return true; +#else // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 vvv + constexpr char _Cmpxchg_Absent = 0; + constexpr char _Cmpxchg_Present = 1; + constexpr char _Cmpxchg_Unknown = 2; + + static std::atomic _Cached_value{_Cmpxchg_Unknown}; + + char _Value = _Cached_value.load(std::memory_order_relaxed); + if (_Value == _Cmpxchg_Unknown) { + _Value = IsProcessorFeaturePresent(PF_COMPARE_EXCHANGE128) ? _Cmpxchg_Present : _Cmpxchg_Absent; + _Cached_value.store(_Value, std::memory_order_relaxed); + } + + return _Value; +#endif // ^^^ _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 0 +} _END_EXTERN_C diff --git a/stl/src/msvcp_atomic_wait.src b/stl/src/msvcp_atomic_wait.src index ec335cc161..d8f2d843b5 100644 --- a/stl/src/msvcp_atomic_wait.src +++ b/stl/src/msvcp_atomic_wait.src @@ -6,14 +6,17 @@ LIBRARY LIBRARYNAME EXPORTS - __std_atomic_wait_get_deadline - __std_atomic_wait_get_remaining_timeout + __std_atomic_compare_exchange_128 + __std_atomic_get_mutex + __std_atomic_has_cmpxchg16b __std_atomic_notify_all_direct __std_atomic_notify_all_indirect __std_atomic_notify_one_direct __std_atomic_notify_one_indirect __std_atomic_set_api_level __std_atomic_wait_direct + __std_atomic_wait_get_deadline + __std_atomic_wait_get_remaining_timeout __std_atomic_wait_indirect __std_bulk_submit_threadpool_work __std_close_threadpool_work diff --git a/tests/libcxx/expected_results.txt b/tests/libcxx/expected_results.txt index 5d3dc61e13..97eb5036ed 100644 --- a/tests/libcxx/expected_results.txt +++ b/tests/libcxx/expected_results.txt @@ -49,6 +49,7 @@ std/containers/unord/unord.map/unord.map.modifiers/insert_and_emplace_allocator_ std/containers/unord/unord.set/insert_and_emplace_allocator_requirements.pass.cpp FAIL # libc++ doesn't yet implement P1423R3, so it expects an old value for `__cpp_lib_char8_t` +std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp FAIL std/language.support/support.limits/support.limits.general/filesystem.version.pass.cpp FAIL std/language.support/support.limits/support.limits.general/istream.version.pass.cpp FAIL std/language.support/support.limits/support.limits.general/limits.version.pass.cpp FAIL @@ -252,9 +253,6 @@ std/utilities/memory/default.allocator/allocator.members/allocate.verify.cpp SKI # *** MISSING STL FEATURES *** -# C++20 P0019R8 "atomic_ref" -std/language.support/support.limits/support.limits.general/atomic.version.pass.cpp FAIL - # C++20 P0355R7 " Calendars And Time Zones" std/utilities/time/days.pass.cpp FAIL std/utilities/time/months.pass.cpp FAIL diff --git a/tests/libcxx/skipped_tests.txt b/tests/libcxx/skipped_tests.txt index 9d43be173d..0aceecdbb5 100644 --- a/tests/libcxx/skipped_tests.txt +++ b/tests/libcxx/skipped_tests.txt @@ -49,6 +49,7 @@ containers\unord\unord.map\unord.map.modifiers\insert_and_emplace_allocator_requ containers\unord\unord.set\insert_and_emplace_allocator_requirements.pass.cpp # libc++ doesn't yet implement P1423R3, so it expects an old value for `__cpp_lib_char8_t` +language.support\support.limits\support.limits.general\atomic.version.pass.cpp language.support\support.limits\support.limits.general\filesystem.version.pass.cpp language.support\support.limits\support.limits.general\istream.version.pass.cpp language.support\support.limits\support.limits.general\limits.version.pass.cpp @@ -252,9 +253,6 @@ utilities\memory\default.allocator\allocator.members\allocate.verify.cpp # *** MISSING STL FEATURES *** -# C++20 P0019R8 "atomic_ref" -language.support\support.limits\support.limits.general\atomic.version.pass.cpp - # C++20 P0355R7 " Calendars And Time Zones" utilities\time\days.pass.cpp utilities\time\months.pass.cpp diff --git a/tests/std/include/test_atomic_wait.hpp b/tests/std/include/test_atomic_wait.hpp index a856798f3a..7be16c0d1a 100644 --- a/tests/std/include/test_atomic_wait.hpp +++ b/tests/std/include/test_atomic_wait.hpp @@ -10,8 +10,8 @@ #include #include -template -void test_atomic_wait_func(const UnderlyingType old_value, const UnderlyingType new_value, +template