diff --git a/core/src/Kokkos_Macros.hpp b/core/src/Kokkos_Macros.hpp index a884c037b3..00f4686ad8 100644 --- a/core/src/Kokkos_Macros.hpp +++ b/core/src/Kokkos_Macros.hpp @@ -55,6 +55,7 @@ #ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H #include +#include #endif //---------------------------------------------------------------------------- diff --git a/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp b/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp index c8a6dfec6f..88140a7647 100644 --- a/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp +++ b/core/src/OpenACC/Kokkos_OpenACC_Traits.hpp @@ -22,8 +22,7 @@ namespace Kokkos::Experimental::Impl { struct OpenACC_Traits { -#if defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ - defined(KOKKOS_ARCH_AMPERE) || defined(KOKKOS_ARCH_HOPPER) +#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) static constexpr acc_device_t dev_type = acc_device_nvidia; static constexpr bool may_fallback_to_host = false; #else diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp index 5f56e23144..02f42ee2a6 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp @@ -89,13 +89,10 @@ void OpenMPTargetInternal::impl_initialize() { // FIXME_OPENMPTARGET: Only fix the number of teams for NVIDIA architectures // from Pascal and upwards. -#if defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ - defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \ - defined(KOKKOS_ARCH_HOPPER) -#if defined(KOKKOS_COMPILER_CLANG) && (KOKKOS_COMPILER_CLANG >= 1300) +#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) && defined(KOKKOS_COMPILER_CLANG) && \ + (KOKKOS_COMPILER_CLANG >= 1300) omp_set_num_teams(512); #endif -#endif } int OpenMPTargetInternal::impl_is_initialized() { return m_is_initialized ? 1 : 0; diff --git a/core/src/SYCL/Kokkos_SYCL.cpp b/core/src/SYCL/Kokkos_SYCL.cpp index c665631dd6..f8a1efda8e 100644 --- a/core/src/SYCL/Kokkos_SYCL.cpp +++ b/core/src/SYCL/Kokkos_SYCL.cpp @@ -128,10 +128,7 @@ void SYCL::impl_initialize(InitializationSettings const& settings) { // If the device id is not specified and there are no GPUs, sidestep Kokkos // device selection and use whatever is available (if no GPU architecture is // specified). -#if !defined(KOKKOS_ARCH_INTEL_GPU) && !defined(KOKKOS_ARCH_KEPLER) && \ - !defined(KOKKOS_ARCH_MAXWELL) && !defined(KOKKOS_ARCH_PASCAL) && \ - !defined(KOKKOS_ARCH_VOLTA) && !defined(KOKKOS_ARCH_TURING75) && \ - !defined(KOKKOS_ARCH_AMPERE) && !defined(KOKKOS_ARCH_HOPPER) +#if !defined(KOKKOS_ARCH_INTEL_GPU) && !defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) if (!settings.has_device_id() && gpu_devices.empty()) { Impl::SYCLInternal::singleton().initialize(sycl::device()); Impl::SYCLInternal::m_syclDev = 0; diff --git a/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp b/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp index 80f5db0558..62a41fe91f 100644 --- a/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp +++ b/core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp @@ -303,11 +303,8 @@ class TeamPolicyInternal (sizeof(double) + m_thread_scratch_size[0]); return std::min({ int(m_space.impl_internal_space_instance()->m_maxWorkgroupSize), - // FIXME_SYCL Avoid requesting to many registers on NVIDIA GPUs. -#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \ - defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ - defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \ - defined(KOKKOS_ARCH_HOPPER) + // FIXME_SYCL Avoid requesting too many registers on NVIDIA GPUs. +#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) 256, #endif max_threads_for_memory @@ -336,11 +333,8 @@ class TeamPolicyInternal m_thread_scratch_size[0]); return std::min({ int(m_space.impl_internal_space_instance()->m_maxWorkgroupSize), - // FIXME_SYCL Avoid requesting to many registers on NVIDIA GPUs. -#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \ - defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \ - defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \ - defined(KOKKOS_ARCH_HOPPER) + // FIXME_SYCL Avoid requesting too many registers on NVIDIA GPUs. +#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) 256, #endif max_threads_for_memory diff --git a/core/src/impl/Kokkos_NvidiaGpuArchitectures.hpp b/core/src/impl/Kokkos_NvidiaGpuArchitectures.hpp new file mode 100644 index 0000000000..956b6dffea --- /dev/null +++ b/core/src/impl/Kokkos_NvidiaGpuArchitectures.hpp @@ -0,0 +1,58 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_CUDA_NVIDIA_GPU_ARCHITECTURES_HPP +#define KOKKOS_CUDA_NVIDIA_GPU_ARCHITECTURES_HPP + +#if defined(KOKKOS_ARCH_KEPLER30) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 30 +#elif defined(KOKKOS_ARCH_KEPLER32) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 32 +#elif defined(KOKKOS_ARCH_KEPLER35) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 35 +#elif defined(KOKKOS_ARCH_KEPLER37) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 37 +#elif defined(KOKKOS_ARCH_MAXWELL50) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 50 +#elif defined(KOKKOS_ARCH_MAXWELL52) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 52 +#elif defined(KOKKOS_ARCH_MAXWELL53) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 53 +#elif defined(KOKKOS_ARCH_PASCAL60) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 60 +#elif defined(KOKKOS_ARCH_PASCAL61) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 61 +#elif defined(KOKKOS_ARCH_VOLTA70) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 70 +#elif defined(KOKKOS_ARCH_VOLTA72) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 72 +#elif defined(KOKKOS_ARCH_TURING75) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 75 +#elif defined(KOKKOS_ARCH_AMPERE80) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 80 +#elif defined(KOKKOS_ARCH_AMPERE86) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 86 +#elif defined(KOKKOS_ARCH_ADA89) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 89 +#elif defined(KOKKOS_ARCH_HOPPER90) +#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 90 +#elif defined(KOKKOS_ENABLE_CUDA) +// do not raise an error on other backends that may run on NVIDIA GPUs such as +// OpenACC, OpenMPTarget, or SYCL +#error NVIDIA GPU arch not recognized +#endif + +#endif