From 71e0ecaf47b07864689a120fba75835c7196bc8b Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Tue, 31 Jan 2023 12:53:40 -0700 Subject: [PATCH] CRS: Use Kokkos device function macros rather than duplicating code when compiling for GPU targets --- core/src/Kokkos_Crs.hpp | 40 ++-------------------------------------- 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/core/src/Kokkos_Crs.hpp b/core/src/Kokkos_Crs.hpp index 1f298a70cb..92931b5849 100644 --- a/core/src/Kokkos_Crs.hpp +++ b/core/src/Kokkos_Crs.hpp @@ -304,11 +304,11 @@ struct CountAndFillBase { Functor m_functor; counts_type m_counts; struct Count {}; - inline void operator()(Count, size_type i) const { + KOKKOS_FUNCTION void operator()(Count, size_type i) const { m_counts(i) = m_functor(i, nullptr); } struct Fill {}; - inline void operator()(Fill, size_type i) const { + KOKKOS_FUNCTION void operator()(Fill, size_type i) const { auto j = m_crs.row_map(i); /* we don't want to access entries(entries.size()), even if its just to get its address and never use it. this can happen when row (i) is empty and @@ -323,42 +323,6 @@ struct CountAndFillBase { CountAndFillBase(CrsType& crs, Functor const& f) : m_crs(crs), m_functor(f) {} }; -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) -#if defined(KOKKOS_ENABLE_CUDA) -#define EXEC_SPACE Kokkos::Cuda -#elif defined(KOKKOS_ENABLE_HIP) -#define EXEC_SPACE Kokkos::HIP -#endif -template -struct CountAndFillBase { - using data_type = typename CrsType::data_type; - using size_type = typename CrsType::size_type; - using row_map_type = typename CrsType::row_map_type; - using counts_type = row_map_type; - CrsType m_crs; - Functor m_functor; - counts_type m_counts; - struct Count {}; - __device__ inline void operator()(Count, size_type i) const { - m_counts(i) = m_functor(i, nullptr); - } - struct Fill {}; - __device__ inline void operator()(Fill, size_type i) const { - auto j = m_crs.row_map(i); - /* we don't want to access entries(entries.size()), even if its just to get - its address and never use it. this can happen when row (i) is empty and - all rows after it are also empty. we could compare to row_map(i + 1), but - that is a read from global memory, whereas dimension_0() should be part - of the View in registers (or constant memory) */ - data_type* fill = (j == static_cast(m_crs.entries.extent(0))) - ? nullptr - : (&(m_crs.entries(j))); - m_functor(i, fill); - } - CountAndFillBase(CrsType& crs, Functor const& f) : m_crs(crs), m_functor(f) {} -}; -#endif - template struct CountAndFill : public CountAndFillBase { using base_type = CountAndFillBase;