Skip to content

Commit

Permalink
Merge pull request #5738 from Rombur/refactor_openmp
Browse files Browse the repository at this point in the history
Refactor OpenMP backend
  • Loading branch information
dalg24 authored Jan 10, 2023
2 parents 459e881 + fbfa01e commit dafb577
Show file tree
Hide file tree
Showing 9 changed files with 337 additions and 285 deletions.
122 changes: 122 additions & 0 deletions core/src/OpenMP/Kokkos_OpenMP.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
#define KOKKOS_IMPL_PUBLIC_INCLUDE
#endif

#include <OpenMP/Kokkos_OpenMP.hpp>
#include <OpenMP/Kokkos_OpenMP_Instance.hpp>

#include <impl/Kokkos_ExecSpaceManager.hpp>

namespace Kokkos {

OpenMP::OpenMP()
: m_space_instance(&Impl::OpenMPInternal::singleton(),
[](Impl::OpenMPInternal *) {}) {
Impl::OpenMPInternal::singleton().verify_is_initialized(
"OpenMP instance constructor");
}

OpenMP::OpenMP(int pool_size)
: m_space_instance(new Impl::OpenMPInternal(pool_size),
[](Impl::OpenMPInternal *ptr) {
ptr->finalize();
delete ptr;
}) {
Impl::OpenMPInternal::singleton().verify_is_initialized(
"OpenMP instance constructor");
}

int OpenMP::impl_get_current_max_threads() noexcept {
return Impl::OpenMPInternal::get_current_max_threads();
}

void OpenMP::impl_initialize(InitializationSettings const &settings) {
Impl::OpenMPInternal::singleton().initialize(
settings.has_num_threads() ? settings.get_num_threads() : -1);
}

void OpenMP::impl_finalize() { Impl::OpenMPInternal::singleton().finalize(); }

void OpenMP::print_configuration(std::ostream &os, bool /*verbose*/) const {
os << "Host Parallel Execution Space:\n";
os << " KOKKOS_ENABLE_OPENMP: yes\n";

os << "OpenMP Atomics:\n";
os << " KOKKOS_ENABLE_OPENMP_ATOMICS: ";
#ifdef KOKKOS_ENABLE_OPENMP_ATOMICS
os << "yes\n";
#else
os << "no\n";
#endif

os << "\nOpenMP Runtime Configuration:\n";

m_space_instance->print_configuration(os);
}

int OpenMP::concurrency(OpenMP const &instance) {
return impl_thread_pool_size(instance);
}

void OpenMP::fence(const std::string &name) const {
Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>(
name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, []() {});
}

bool OpenMP::impl_is_initialized() noexcept {
return Impl::OpenMPInternal::singleton().is_initialized();
}

bool OpenMP::in_parallel(OpenMP const &exec_space) noexcept {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
return (
(exec_space.impl_internal_space_instance()->m_level < omp_get_level()) &&
(!Impl::t_openmp_instance ||
Impl::t_openmp_instance->m_level < omp_get_level()));
#else
return exec_space.impl_internal_space_instance()->m_level < omp_get_level();
#endif
}

int OpenMP::impl_thread_pool_size(OpenMP const &exec_space) noexcept {
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
return OpenMP::in_parallel(exec_space)
? omp_get_num_threads()
: (Impl::t_openmp_instance
? Impl::t_openmp_instance->m_pool_size
: exec_space.impl_internal_space_instance()->m_pool_size);
#else
return OpenMP::in_parallel(exec_space)
? omp_get_num_threads()
: exec_space.impl_internal_space_instance()->m_pool_size;
#endif
}

int OpenMP::impl_max_hardware_threads() noexcept {
return Impl::g_openmp_hardware_max_threads;
}

namespace Impl {

int g_openmp_space_factory_initialized =
initialize_space_factory<OpenMP>("050_OpenMP");

} // namespace Impl

} // namespace Kokkos
87 changes: 84 additions & 3 deletions core/src/Kokkos_OpenMP.hpp → core/src/OpenMP/Kokkos_OpenMP.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ static_assert(false,
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <impl/Kokkos_InitializationSettings.hpp>

#include <omp.h>

#include <vector>

/*--------------------------------------------------------------------------*/
Expand Down Expand Up @@ -81,7 +83,7 @@ class OpenMP {
void print_configuration(std::ostream& os, bool verbose = false) const;

/// \brief is the instance running a parallel algorithm
inline static bool in_parallel(OpenMP const& = OpenMP()) noexcept;
static bool in_parallel(OpenMP const& = OpenMP()) noexcept;

/// \brief Wait until all dispatched functors complete on the given instance
///
Expand Down Expand Up @@ -120,15 +122,15 @@ class OpenMP {
/// \brief Free any resources being consumed by the default execution space
static void impl_finalize();

inline static int impl_thread_pool_size(OpenMP const& = OpenMP()) noexcept;
static int impl_thread_pool_size(OpenMP const& = OpenMP()) noexcept;

/** \brief The rank of the executing thread in this thread pool */
inline static int impl_thread_pool_rank() noexcept;

inline static int impl_thread_pool_size(int depth, OpenMP const& = OpenMP());

// use UniqueToken
inline static int impl_max_hardware_threads() noexcept;
static int impl_max_hardware_threads() noexcept;

// use UniqueToken
KOKKOS_INLINE_FUNCTION
Expand All @@ -154,6 +156,85 @@ class OpenMP {
Kokkos::Impl::HostSharedPtr<Impl::OpenMPInternal> m_space_instance;
};

inline int OpenMP::impl_thread_pool_rank() noexcept {
// FIXME_OPENMP Can we remove this when removing partition_master? It's only
// used in one partition_master test
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
KOKKOS_IF_ON_HOST(
(return Impl::t_openmp_instance ? 0 : omp_get_thread_num();))
#else
KOKKOS_IF_ON_HOST((return omp_get_thread_num();))
#endif

KOKKOS_IF_ON_DEVICE((return -1;))
}

inline void OpenMP::impl_static_fence(std::string const& name) {
Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>(
name,
Kokkos::Tools::Experimental::SpecialSynchronizationCases::
GlobalDeviceSynchronization,
[]() {});
}

inline bool OpenMP::is_asynchronous(OpenMP const& /*instance*/) noexcept {
return false;
}

inline int OpenMP::impl_thread_pool_size(int depth, OpenMP const& exec_space) {
return depth < 2 ? impl_thread_pool_size(exec_space) : 1;
}

KOKKOS_INLINE_FUNCTION
int OpenMP::impl_hardware_thread_id() noexcept {
KOKKOS_IF_ON_HOST((return omp_get_thread_num();))

KOKKOS_IF_ON_DEVICE((return -1;))
}

#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
template <typename F>
KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions,
int partition_size) {
#if _OPENMP >= 201511
if (omp_get_max_active_levels() > 1) {
#else
if (omp_get_nested()) {
#endif
using Exec = Impl::OpenMPInternal;

Exec* prev_instance = &Impl::OpenMPInternal::singleton();

Exec::validate_partition_impl(prev_instance->m_pool_size, num_partitions,
partition_size);

OpenMP::memory_space space;

#pragma omp parallel num_threads(num_partitions)
{
Exec thread_local_instance(partition_size);
Impl::t_openmp_instance = &thread_local_instance;

size_t pool_reduce_bytes = 32 * partition_size;
size_t team_reduce_bytes = 32 * partition_size;
size_t team_shared_bytes = 1024 * partition_size;
size_t thread_local_bytes = 1024;

thread_local_instance.resize_thread_data(
pool_reduce_bytes, team_reduce_bytes, team_shared_bytes,
thread_local_bytes);

omp_set_num_threads(partition_size);
f(omp_get_thread_num(), omp_get_num_threads());
Impl::t_openmp_instance = nullptr;
}
} else {
// nested openmp not enabled
f(0, 1);
}
}
#endif

namespace Tools {
namespace Experimental {
template <>
Expand Down
64 changes: 0 additions & 64 deletions core/src/OpenMP/Kokkos_OpenMP_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,68 +384,4 @@ bool OpenMPInternal::verify_is_initialized(const char *const label) const {
return m_initialized;
}
} // namespace Impl

//----------------------------------------------------------------------------

OpenMP::OpenMP()
: m_space_instance(&Impl::OpenMPInternal::singleton(),
[](Impl::OpenMPInternal *) {}) {
Impl::OpenMPInternal::singleton().verify_is_initialized(
"OpenMP instance constructor");
}

OpenMP::OpenMP(int pool_size)
: m_space_instance(new Impl::OpenMPInternal(pool_size),
[](Impl::OpenMPInternal *ptr) {
ptr->finalize();
delete ptr;
}) {
Impl::OpenMPInternal::singleton().verify_is_initialized(
"OpenMP instance constructor");
}

int OpenMP::impl_get_current_max_threads() noexcept {
return Impl::OpenMPInternal::get_current_max_threads();
}

void OpenMP::impl_initialize(InitializationSettings const &settings) {
Impl::OpenMPInternal::singleton().initialize(
settings.has_num_threads() ? settings.get_num_threads() : -1);
}

void OpenMP::impl_finalize() { Impl::OpenMPInternal::singleton().finalize(); }

void OpenMP::print_configuration(std::ostream &os, bool /*verbose*/) const {
os << "Host Parallel Execution Space:\n";
os << " KOKKOS_ENABLE_OPENMP: yes\n";

os << "OpenMP Atomics:\n";
os << " KOKKOS_ENABLE_OPENMP_ATOMICS: ";
#ifdef KOKKOS_ENABLE_OPENMP_ATOMICS
os << "yes\n";
#else
os << "no\n";
#endif

os << "\nOpenMP Runtime Configuration:\n";

m_space_instance->print_configuration(os);
}

int OpenMP::concurrency(OpenMP const &instance) {
return impl_thread_pool_size(instance);
}

void OpenMP::fence(const std::string &name) const {
Kokkos::Tools::Experimental::Impl::profile_fence_event<Kokkos::OpenMP>(
name, Kokkos::Tools::Experimental::Impl::DirectFenceIDHandle{1}, []() {});
}

namespace Impl {

int g_openmp_space_factory_initialized =
initialize_space_factory<OpenMP>("050_OpenMP");

} // namespace Impl

} // namespace Kokkos
Loading

0 comments on commit dafb577

Please sign in to comment.