From 2a7629dd650de27382d8975cb3bd7b72e3e90eac Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Tue, 28 Feb 2023 14:31:27 -0500 Subject: [PATCH 1/4] Prefer non Impl:: atomic_{load,store} in AtomicDataElement since using relaxed memory order --- core/src/impl/Kokkos_Atomic_View.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/core/src/impl/Kokkos_Atomic_View.hpp b/core/src/impl/Kokkos_Atomic_View.hpp index 45d01c9f9f..23d4c2524c 100644 --- a/core/src/impl/Kokkos_Atomic_View.hpp +++ b/core/src/impl/Kokkos_Atomic_View.hpp @@ -39,7 +39,7 @@ class AtomicDataElement { KOKKOS_INLINE_FUNCTION const_value_type operator=(const_value_type& val) const { - Kokkos::Impl::atomic_store(ptr, val, Kokkos::Impl::memory_order_relaxed); + Kokkos::atomic_store(ptr, val); return val; } @@ -194,9 +194,7 @@ class AtomicDataElement { bool operator>(const_value_type& val) const { return *ptr > val; } KOKKOS_INLINE_FUNCTION - operator value_type() const { - return Kokkos::Impl::atomic_load(ptr, Kokkos::Impl::memory_order_relaxed); - } + operator value_type() const { return Kokkos::atomic_load(ptr); } }; template From 3bcf389e2b7170d6e2877b7724ad06c346e943ed Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Tue, 28 Feb 2023 14:34:46 -0500 Subject: [PATCH 2/4] Use directly memory order from desul in Impl:: atomic funtion templates --- core/src/Kokkos_Atomics_Desul_Wrapper.hpp | 20 ++++++++------------ core/src/impl/Kokkos_ChaseLev.hpp | 16 +++++++++------- core/src/impl/Kokkos_HostThreadTeam.hpp | 8 ++++---- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/core/src/Kokkos_Atomics_Desul_Wrapper.hpp b/core/src/Kokkos_Atomics_Desul_Wrapper.hpp index b8697d415a..519ed3eb89 100644 --- a/core/src/Kokkos_Atomics_Desul_Wrapper.hpp +++ b/core/src/Kokkos_Atomics_Desul_Wrapper.hpp @@ -253,26 +253,22 @@ namespace Impl { using type = desul::MemoryOrderRelaxed; }; template KOKKOS_INLINE_FUNCTION - bool atomic_compare_exchange_strong(T* const dest, T& expected, const T desired, MemOrderSuccess, MemOrderFailure) { - return desul::atomic_compare_exchange_strong(dest, expected, desired, - typename KokkosToDesulMemoryOrder::type(), - typename KokkosToDesulMemoryOrder::type(), - KOKKOS_DESUL_MEM_SCOPE); - + bool atomic_compare_exchange_strong(T* const dest, T& expected, const T desired, MemOrderSuccess succ, MemOrderFailure fail) { + return desul::atomic_compare_exchange_strong(dest, expected, desired, succ, fail, KOKKOS_DESUL_MEM_SCOPE); } template KOKKOS_INLINE_FUNCTION - T atomic_load(const T* const src, MemoryOrder) { - return desul::atomic_load(src, typename KokkosToDesulMemoryOrder::type(), KOKKOS_DESUL_MEM_SCOPE); + T atomic_load(const T* const src, MemoryOrder order) { + return desul::atomic_load(src, order, KOKKOS_DESUL_MEM_SCOPE); } template KOKKOS_INLINE_FUNCTION - void atomic_store(T* const src, const T val, MemoryOrder) { - return desul::atomic_store(src, val, typename KokkosToDesulMemoryOrder::type(), KOKKOS_DESUL_MEM_SCOPE); + void atomic_store(T* const src, const T val, MemoryOrder order) { + return desul::atomic_store(src, val, order, KOKKOS_DESUL_MEM_SCOPE); } -} +} // namespace Impl -} +} // namespace Kokkos #undef KOKKOS_DESUL_MEM_SCOPE diff --git a/core/src/impl/Kokkos_ChaseLev.hpp b/core/src/impl/Kokkos_ChaseLev.hpp index 855654408e..d8ab77b205 100644 --- a/core/src/impl/Kokkos_ChaseLev.hpp +++ b/core/src/impl/Kokkos_ChaseLev.hpp @@ -172,7 +172,8 @@ struct ChaseLevDeque { } #else if (!Impl::atomic_compare_exchange_strong( - &m_top, t, t + 1, memory_order_seq_cst, memory_order_relaxed)) { + &m_top, t, t + 1, desul::MemoryOrderSeqCst(), + desul::MemoryOrderRelaxed())) { /* failed race, someone else stole it */ return_value = nullptr; } @@ -195,7 +196,7 @@ struct ChaseLevDeque { KOKKOS_INLINE_FUNCTION bool push(node_type& node) { auto b = m_bottom; // memory order relaxed - auto t = Impl::atomic_load(&m_top, memory_order_acquire); + auto t = Impl::atomic_load(&m_top, desul::MemoryOrderAcquire()); auto& a = m_array; if (b - t > a.size() - 1) { /* queue is full, resize */ @@ -204,7 +205,7 @@ struct ChaseLevDeque { return false; } a[b] = &node; // relaxed - Impl::atomic_store(&m_bottom, b + 1, memory_order_release); + Impl::atomic_store(&m_bottom, b + 1, desul::MemoryOrderRelease()); return true; } @@ -213,7 +214,7 @@ struct ChaseLevDeque { auto t = m_top; // TODO @tasking @memory_order DSH: atomic load acquire Kokkos::memory_fence(); // seq_cst fence, so why does the above need to be // acquire? - auto b = Impl::atomic_load(&m_bottom, memory_order_acquire); + auto b = Impl::atomic_load(&m_bottom, desul::MemoryOrderAcquire()); OptionalRef return_value; if (t < b) { /* Non-empty queue */ @@ -231,8 +232,9 @@ struct ChaseLevDeque { return_value = nullptr; } #else - if (!Impl::atomic_compare_exchange_strong( - &m_top, t, t + 1, memory_order_seq_cst, memory_order_relaxed)) { + if (!Impl::atomic_compare_exchange_strong(&m_top, t, t + 1, + desul::MemoryOrderSeqCst(), + desul::MemoryOrderRelaxed())) { return_value = nullptr; } #endif @@ -247,7 +249,7 @@ struct ChaseLevDeque { // essentially using the memory order in this version as a fence, which // may be unnecessary auto buffer_ptr = (node_type***)&m_array.buffer; - auto a = Impl::atomic_load(buffer_ptr, memory_order_acquire); // + auto a = Impl::atomic_load(buffer_ptr, desul::MemoryOrderAcquire()); // technically consume ordered, but acquire should be fine return_value = *static_cast(a[t % m_array->size]); // relaxed; we'd have to replace the m_array->size if we ever allow growth diff --git a/core/src/impl/Kokkos_HostThreadTeam.hpp b/core/src/impl/Kokkos_HostThreadTeam.hpp index 1fec93237a..35ced1b56c 100644 --- a/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -481,14 +481,14 @@ class HostThreadTeamMember { // with a return value of 'true' Kokkos::Impl::atomic_store(shared_value, value, - Kokkos::Impl::memory_order_release); + desul::MemoryOrderRelease()); m_data.team_rendezvous_release(); // This thread released all other threads from 'team_rendezvous' // with a return value of 'false' } else { value = Kokkos::Impl::atomic_load(shared_value, - Kokkos::Impl::memory_order_acquire); + desul::MemoryOrderAcquire()); } })) @@ -516,7 +516,7 @@ class HostThreadTeamMember { if (1 < m_data.m_team_size) { Kokkos::Impl::atomic_store(shared_value, value, - Kokkos::Impl::memory_order_release); + desul::MemoryOrderRelease()); } m_data.team_rendezvous_release(); @@ -524,7 +524,7 @@ class HostThreadTeamMember { // with a return value of 'false' } else { value = Kokkos::Impl::atomic_load(shared_value, - Kokkos::Impl::memory_order_acquire); + desul::MemoryOrderAcquire()); })) KOKKOS_IF_ON_DEVICE( From 1abf65388b4ef39019d35c84a61b1190835fe4e8 Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Tue, 28 Feb 2023 14:35:32 -0500 Subject: [PATCH 3/4] Drop Kokkos memory oder classes --- core/src/Kokkos_Atomics_Desul_Wrapper.hpp | 25 ----- core/src/impl/Kokkos_Atomic_Memory_Order.hpp | 105 ------------------- 2 files changed, 130 deletions(-) delete mode 100644 core/src/impl/Kokkos_Atomic_Memory_Order.hpp diff --git a/core/src/Kokkos_Atomics_Desul_Wrapper.hpp b/core/src/Kokkos_Atomics_Desul_Wrapper.hpp index 519ed3eb89..bda3783980 100644 --- a/core/src/Kokkos_Atomics_Desul_Wrapper.hpp +++ b/core/src/Kokkos_Atomics_Desul_Wrapper.hpp @@ -26,7 +26,6 @@ static_assert(false, #include #include -#include #include // clang-format off @@ -228,30 +227,6 @@ T atomic_compare_exchange(T* const dest, desul::Impl::dont_deduce_this_parameter } namespace Impl { - - template - struct KokkosToDesulMemoryOrder; - - template<> - struct KokkosToDesulMemoryOrder { - using type = desul::MemoryOrderSeqCst; - }; - template<> - struct KokkosToDesulMemoryOrder { - using type = desul::MemoryOrderAcquire; - }; - template<> - struct KokkosToDesulMemoryOrder { - using type = desul::MemoryOrderRelease; - }; - template<> - struct KokkosToDesulMemoryOrder { - using type = desul::MemoryOrderAcqRel; - }; - template<> - struct KokkosToDesulMemoryOrder { - using type = desul::MemoryOrderRelaxed; - }; template KOKKOS_INLINE_FUNCTION bool atomic_compare_exchange_strong(T* const dest, T& expected, const T desired, MemOrderSuccess succ, MemOrderFailure fail) { return desul::atomic_compare_exchange_strong(dest, expected, desired, succ, fail, KOKKOS_DESUL_MEM_SCOPE); diff --git a/core/src/impl/Kokkos_Atomic_Memory_Order.hpp b/core/src/impl/Kokkos_Atomic_Memory_Order.hpp deleted file mode 100644 index 6d1bfb9c82..0000000000 --- a/core/src/impl/Kokkos_Atomic_Memory_Order.hpp +++ /dev/null @@ -1,105 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#ifndef KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP -#define KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP - -#include - -#include - -namespace Kokkos { -namespace Impl { - -/** @file - * Provides strongly-typed analogs of the standard memory order enumerators. - * In addition to (very slightly) reducing the constant propagation burden on - * the compiler, this allows us to give compile-time errors for things that - * don't make sense, like atomic_load with memory order release. - */ - -struct memory_order_seq_cst_t { - using memory_order = memory_order_seq_cst_t; -#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || \ - defined(KOKKOS_ENABLE_INTEL_ATOMICS) || \ - defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) - static constexpr auto gnu_constant = __ATOMIC_SEQ_CST; -#endif - static constexpr auto std_constant = std::memory_order_seq_cst; -}; -constexpr memory_order_seq_cst_t memory_order_seq_cst = {}; - -struct memory_order_relaxed_t { - using memory_order = memory_order_relaxed_t; -#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || \ - defined(KOKKOS_ENABLE_INTEL_ATOMICS) || \ - defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) - static constexpr auto gnu_constant = __ATOMIC_RELAXED; -#endif - static constexpr auto std_constant = std::memory_order_relaxed; -}; - -// FIXME_OPENMPTARGET - The `declare target` is needed for the Intel GPUs with -// the OpenMPTarget backend -#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_INTEL) -#pragma omp declare target -#endif - -constexpr memory_order_relaxed_t memory_order_relaxed = {}; - -#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_COMPILER_INTEL) -#pragma omp end declare target -#endif - -struct memory_order_acquire_t { - using memory_order = memory_order_acquire_t; -#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || \ - defined(KOKKOS_ENABLE_INTEL_ATOMICS) || \ - defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) - static constexpr auto gnu_constant = __ATOMIC_ACQUIRE; -#endif - static constexpr auto std_constant = std::memory_order_acquire; -}; -constexpr memory_order_acquire_t memory_order_acquire = {}; - -struct memory_order_release_t { - using memory_order = memory_order_release_t; -#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || \ - defined(KOKKOS_ENABLE_INTEL_ATOMICS) || \ - defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) - static constexpr auto gnu_constant = __ATOMIC_RELEASE; -#endif - static constexpr auto std_constant = std::memory_order_release; -}; -constexpr memory_order_release_t memory_order_release = {}; - -struct memory_order_acq_rel_t { - using memory_order = memory_order_acq_rel_t; -#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || \ - defined(KOKKOS_ENABLE_INTEL_ATOMICS) || \ - defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) - static constexpr auto gnu_constant = __ATOMIC_ACQ_REL; -#endif - static constexpr auto std_constant = std::memory_order_acq_rel; -}; -constexpr memory_order_acq_rel_t memory_order_acq_rel = {}; - -// Intentionally omit consume (for now) - -} // end namespace Impl -} // end namespace Kokkos - -#endif // KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP From f419b7303d5a96dfe9450d691459a7af3789b594 Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Wed, 1 Mar 2023 12:20:14 -0500 Subject: [PATCH 4/4] Add missing header include --- core/src/HIP/Kokkos_HIP_Instance.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/HIP/Kokkos_HIP_Instance.hpp b/core/src/HIP/Kokkos_HIP_Instance.hpp index 51b3f79a9d..ee0fd9f726 100644 --- a/core/src/HIP/Kokkos_HIP_Instance.hpp +++ b/core/src/HIP/Kokkos_HIP_Instance.hpp @@ -22,6 +22,7 @@ #include #include +#include #include namespace Kokkos {