Skip to content

Commit

Permalink
Merge pull request #5948 from dalg24/kokkos_arch_nvidia_gpu_macro
Browse files Browse the repository at this point in the history
Introduce `KOKKOS_IMPL_ARCH_NVIDIA_GPU` macro
  • Loading branch information
dalg24 authored Mar 23, 2023
2 parents 65aa95e + b097f74 commit 3fc7789
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 21 deletions.
1 change: 1 addition & 0 deletions core/src/Kokkos_Macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@

#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
#include <KokkosCore_config.h>
#include <impl/Kokkos_NvidiaGpuArchitectures.hpp>
#endif

//----------------------------------------------------------------------------
Expand Down
3 changes: 1 addition & 2 deletions core/src/OpenACC/Kokkos_OpenACC_Traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
namespace Kokkos::Experimental::Impl {

struct OpenACC_Traits {
#if defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \
defined(KOKKOS_ARCH_AMPERE) || defined(KOKKOS_ARCH_HOPPER)
#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU)
static constexpr acc_device_t dev_type = acc_device_nvidia;
static constexpr bool may_fallback_to_host = false;
#else
Expand Down
7 changes: 2 additions & 5 deletions core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,10 @@ void OpenMPTargetInternal::impl_initialize() {

// FIXME_OPENMPTARGET: Only fix the number of teams for NVIDIA architectures
// from Pascal and upwards.
#if defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \
defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \
defined(KOKKOS_ARCH_HOPPER)
#if defined(KOKKOS_COMPILER_CLANG) && (KOKKOS_COMPILER_CLANG >= 1300)
#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU) && defined(KOKKOS_COMPILER_CLANG) && \
(KOKKOS_COMPILER_CLANG >= 1300)
omp_set_num_teams(512);
#endif
#endif
}
int OpenMPTargetInternal::impl_is_initialized() {
return m_is_initialized ? 1 : 0;
Expand Down
5 changes: 1 addition & 4 deletions core/src/SYCL/Kokkos_SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,7 @@ void SYCL::impl_initialize(InitializationSettings const& settings) {
// If the device id is not specified and there are no GPUs, sidestep Kokkos
// device selection and use whatever is available (if no GPU architecture is
// specified).
#if !defined(KOKKOS_ARCH_INTEL_GPU) && !defined(KOKKOS_ARCH_KEPLER) && \
!defined(KOKKOS_ARCH_MAXWELL) && !defined(KOKKOS_ARCH_PASCAL) && \
!defined(KOKKOS_ARCH_VOLTA) && !defined(KOKKOS_ARCH_TURING75) && \
!defined(KOKKOS_ARCH_AMPERE) && !defined(KOKKOS_ARCH_HOPPER)
#if !defined(KOKKOS_ARCH_INTEL_GPU) && !defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU)
if (!settings.has_device_id() && gpu_devices.empty()) {
Impl::SYCLInternal::singleton().initialize(sycl::device());
Impl::SYCLInternal::m_syclDev = 0;
Expand Down
14 changes: 4 additions & 10 deletions core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,8 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>
(sizeof(double) + m_thread_scratch_size[0]);
return std::min({
int(m_space.impl_internal_space_instance()->m_maxWorkgroupSize),
// FIXME_SYCL Avoid requesting to many registers on NVIDIA GPUs.
#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \
defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \
defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \
defined(KOKKOS_ARCH_HOPPER)
// FIXME_SYCL Avoid requesting too many registers on NVIDIA GPUs.
#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU)
256,
#endif
max_threads_for_memory
Expand Down Expand Up @@ -336,11 +333,8 @@ class TeamPolicyInternal<Kokkos::Experimental::SYCL, Properties...>
m_thread_scratch_size[0]);
return std::min<int>({
int(m_space.impl_internal_space_instance()->m_maxWorkgroupSize),
// FIXME_SYCL Avoid requesting to many registers on NVIDIA GPUs.
#if defined(KOKKOS_ARCH_KEPLER) || defined(KOKKOS_ARCH_MAXWELL) || \
defined(KOKKOS_ARCH_PASCAL) || defined(KOKKOS_ARCH_VOLTA) || \
defined(KOKKOS_ARCH_TURING75) || defined(KOKKOS_ARCH_AMPERE) || \
defined(KOKKOS_ARCH_HOPPER)
// FIXME_SYCL Avoid requesting too many registers on NVIDIA GPUs.
#if defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU)
256,
#endif
max_threads_for_memory
Expand Down
58 changes: 58 additions & 0 deletions core/src/impl/Kokkos_NvidiaGpuArchitectures.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOS_CUDA_NVIDIA_GPU_ARCHITECTURES_HPP
#define KOKKOS_CUDA_NVIDIA_GPU_ARCHITECTURES_HPP

#if defined(KOKKOS_ARCH_KEPLER30)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 30
#elif defined(KOKKOS_ARCH_KEPLER32)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 32
#elif defined(KOKKOS_ARCH_KEPLER35)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 35
#elif defined(KOKKOS_ARCH_KEPLER37)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 37
#elif defined(KOKKOS_ARCH_MAXWELL50)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 50
#elif defined(KOKKOS_ARCH_MAXWELL52)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 52
#elif defined(KOKKOS_ARCH_MAXWELL53)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 53
#elif defined(KOKKOS_ARCH_PASCAL60)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 60
#elif defined(KOKKOS_ARCH_PASCAL61)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 61
#elif defined(KOKKOS_ARCH_VOLTA70)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 70
#elif defined(KOKKOS_ARCH_VOLTA72)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 72
#elif defined(KOKKOS_ARCH_TURING75)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 75
#elif defined(KOKKOS_ARCH_AMPERE80)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 80
#elif defined(KOKKOS_ARCH_AMPERE86)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 86
#elif defined(KOKKOS_ARCH_ADA89)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 89
#elif defined(KOKKOS_ARCH_HOPPER90)
#define KOKKOS_IMPL_ARCH_NVIDIA_GPU 90
#elif defined(KOKKOS_ENABLE_CUDA)
// do not raise an error on other backends that may run on NVIDIA GPUs such as
// OpenACC, OpenMPTarget, or SYCL
#error NVIDIA GPU arch not recognized
#endif

#endif

0 comments on commit 3fc7789

Please sign in to comment.