diff --git a/core/src/OpenACC/Kokkos_OpenACC_FunctorAdapter.hpp b/core/src/OpenACC/Kokkos_OpenACC_FunctorAdapter.hpp index 1325e61e1d..f2fd1a3619 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_FunctorAdapter.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_FunctorAdapter.hpp @@ -17,27 +17,40 @@ #ifndef KOKKOS_OPENACC_FUNCTOR_ADAPTER_HPP #define KOKKOS_OPENACC_FUNCTOR_ADAPTER_HPP +#include #include namespace Kokkos::Experimental::Impl { -template -class FunctorAdapter { - Functor m_functor; - using WorkTag = typename Policy::work_tag; - - public: - FunctorAdapter(Functor const &functor) : m_functor(functor) {} - - template - KOKKOS_FUNCTION void operator()(Args &&... args) const { - if constexpr (std::is_void_v) { - m_functor(static_cast(args)...); - } else { - m_functor(WorkTag(), static_cast(args)...); - } +enum class RoutineClause { worker, seq }; + +template +class FunctorAdapter; + +#define KOKKOS_IMPL_ACC_FUNCTOR_ADAPTER(CLAUSE) \ + template \ + class FunctorAdapter { \ + Functor m_functor; \ + using WorkTag = typename Policy::work_tag; \ + \ + public: \ + FunctorAdapter(Functor const &functor) : m_functor(functor) {} \ + \ + KOKKOS_IMPL_ACC_PRAGMA_HELPER(routine CLAUSE) \ + template \ + KOKKOS_FUNCTION void operator()(Args &&... args) const { \ + if constexpr (std::is_void_v) { \ + m_functor(static_cast(args)...); \ + } else { \ + m_functor(WorkTag(), static_cast(args)...); \ + } \ + } \ } -}; + +KOKKOS_IMPL_ACC_FUNCTOR_ADAPTER(worker); +KOKKOS_IMPL_ACC_FUNCTOR_ADAPTER(seq); + +#undef KOKKOS_IMPL_ACC_FUNCTOR_ADAPTER } // namespace Kokkos::Experimental::Impl diff --git a/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_MDRange.hpp b/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_MDRange.hpp index a55a18bc24..550436fe7b 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_MDRange.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_MDRange.hpp @@ -640,7 +640,9 @@ template class Kokkos::Impl::ParallelFor, Kokkos::Experimental::OpenACC> { using Policy = MDRangePolicy; - Kokkos::Experimental::Impl::FunctorAdapter m_functor; + Kokkos::Experimental::Impl::FunctorAdapter< + Functor, Policy, Kokkos::Experimental::Impl::RoutineClause::seq> + m_functor; Policy m_policy; public: diff --git a/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Range.hpp b/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Range.hpp index ede93ec19e..6ddfc352fc 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Range.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Range.hpp @@ -78,7 +78,9 @@ template class Kokkos::Impl::ParallelFor, Kokkos::Experimental::OpenACC> { using Policy = Kokkos::RangePolicy; - Kokkos::Experimental::Impl::FunctorAdapter m_functor; + Kokkos::Experimental::Impl::FunctorAdapter< + Functor, Policy, Kokkos::Experimental::Impl::RoutineClause::seq> + m_functor; Policy m_policy; using ScheduleType = Kokkos::Experimental::Impl::OpenACCScheduleType; diff --git a/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp b/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp index 1dc7b28912..b5cf670791 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_ParallelFor_Team.hpp @@ -31,7 +31,9 @@ class Kokkos::Impl::ParallelFor, private: using Policy = Kokkos::Impl::TeamPolicyInternal; - Kokkos::Experimental::Impl::FunctorAdapter m_functor; + Kokkos::Experimental::Impl::FunctorAdapter< + FunctorType, Policy, Kokkos::Experimental::Impl::RoutineClause::seq> + m_functor; using Member = typename Policy::member_type; const Policy m_policy; @@ -130,7 +132,8 @@ class Kokkos::Impl::ParallelFor, private: using Policy = Kokkos::Impl::TeamPolicyInternal; - Kokkos::Experimental::Impl::FunctorAdapter m_functor; + Kokkos::Experimental::Impl::FunctorAdapter + m_functor; using Member = typename Policy::member_type; const Policy m_policy; diff --git a/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_MDRange.hpp b/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_MDRange.hpp index 121a2cfe3f..0ebd8b219f 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_MDRange.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_MDRange.hpp @@ -76,7 +76,9 @@ class Kokkos::Impl::ParallelReduce( + Kokkos::Experimental::Impl::FunctorAdapter< + FunctorType, Policy, + Kokkos::Experimental::Impl::RoutineClause::seq>( m_functor_reducer.get_functor()), std::conditional_t< std::is_same_v, diff --git a/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Range.hpp b/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Range.hpp index 30f4797d83..e70b8997f0 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Range.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Range.hpp @@ -74,7 +74,9 @@ class Kokkos::Impl::ParallelReduce( + Kokkos::Experimental::Impl::FunctorAdapter< + FunctorType, Policy, + Kokkos::Experimental::Impl::RoutineClause::seq>( m_functor_reducer.get_functor()), std::conditional_t< std::is_same_v, diff --git a/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Team.hpp b/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Team.hpp index 4276f0f167..d572072aba 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Team.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_ParallelReduce_Team.hpp @@ -21,6 +21,14 @@ #include #include +#ifdef KOKKOS_ENABLE_OPENACC_COLLAPSE_HIERARCHICAL_CONSTRUCTS +#define KOKKOS_IMPL_OPENACC_LOOP_CLAUSE \ + Kokkos::Experimental::Impl::RoutineClause::seq +#else +#define KOKKOS_IMPL_OPENACC_LOOP_CLAUSE \ + Kokkos::Experimental::Impl::RoutineClause::worker +#endif + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // Hierarchical Parallelism -> Team level implementation @@ -67,7 +75,8 @@ class Kokkos::Impl::ParallelReduce( + Kokkos::Experimental::Impl::FunctorAdapter< + FunctorType, Policy, KOKKOS_IMPL_OPENACC_LOOP_CLAUSE>( m_functor_reducer.get_functor()), std::conditional_t< std::is_same_v, diff --git a/core/src/OpenACC/Kokkos_OpenACC_ParallelScan_Range.hpp b/core/src/OpenACC/Kokkos_OpenACC_ParallelScan_Range.hpp index 82401fd021..56f9db0db8 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_ParallelScan_Range.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_ParallelScan_Range.hpp @@ -63,8 +63,9 @@ class Kokkos::Impl::ParallelScan, } else { chunk_size = default_scan_chunk_size; } - const Kokkos::Experimental::Impl::FunctorAdapter functor( - m_functor); + const Kokkos::Experimental::Impl::FunctorAdapter< + Functor, Policy, Kokkos::Experimental::Impl::RoutineClause::seq> + functor(m_functor); const IndexType N = end - begin; const IndexType n_chunks = (N + chunk_size - 1) / chunk_size; Kokkos::View chunk_values(