From 88f6bc32ca065a7f4204d748ead44a5b56e4ec1a Mon Sep 17 00:00:00 2001 From: kliegeois Date: Sat, 4 Apr 2020 07:55:57 +0200 Subject: [PATCH] Stokhos: add GEMV for MP vector --- packages/stokhos/CMakeLists.txt | 17 + packages/stokhos/cmake/Stokhos_config.h.in | 8 + packages/stokhos/src/CMakeLists.txt | 5 + .../vector/KokkosBlas2_gemv_MP_Vector.hpp | 311 ++++++++++++++++++ .../kokkos/vector/KokkosBlas_MP_Vector.hpp | 4 + 5 files changed, 345 insertions(+) create mode 100644 packages/stokhos/src/sacado/kokkos/vector/KokkosBlas2_gemv_MP_Vector.hpp diff --git a/packages/stokhos/CMakeLists.txt b/packages/stokhos/CMakeLists.txt index 33a0ad276d4a..2c2a05917ab5 100644 --- a/packages/stokhos/CMakeLists.txt +++ b/packages/stokhos/CMakeLists.txt @@ -30,6 +30,23 @@ TRIBITS_ADD_OPTION_AND_DEFINE(${PACKAGE_NAME}_ENABLE_Ensemble_Reduction "Enable reduction across across ensemble components." OFF ) +IF(NOT HAVE_STOKHOS_ENSEMBLE_REDUCT AND NOT Stokhos_ENABLE_CUDA) + TRIBITS_ADD_OPTION_AND_DEFINE(${PACKAGE_NAME}_ENABLE_Ensemble_GEMV + HAVE_STOKHOS_ENSEMBLE_GEMV + "Enable optimized ensemble GEMV implementation." + ON ) +ELSE() + TRIBITS_ADD_OPTION_AND_DEFINE(${PACKAGE_NAME}_ENABLE_Ensemble_GEMV + HAVE_STOKHOS_ENSEMBLE_GEMV + "Enable optimized ensemble GEMV implementation." + OFF ) +ENDIF() + +IF(HAVE_STOKHOS_ENSEMBLE_GEMV) + SET(Stokhos_Ensemble_GEMV_Cache_Size 180224 CACHE INTEGER "Cache size used in the ensemble GEMV") + SET(Stokhos_Ensemble_GEMV_Team_Size 1 CACHE INTEGER "Team size used in the ensemble GEMV") +ENDIF() + TRIBITS_ADD_OPTION_AND_DEFINE( Stokhos_ENABLE_CXX11 HAVE_STOKHOS_CXX11 diff --git a/packages/stokhos/cmake/Stokhos_config.h.in b/packages/stokhos/cmake/Stokhos_config.h.in index 54ee33f8f1a9..7459bb627903 100644 --- a/packages/stokhos/cmake/Stokhos_config.h.in +++ b/packages/stokhos/cmake/Stokhos_config.h.in @@ -127,3 +127,11 @@ /* Define if pce scalar type is enabled */ #cmakedefine HAVE_STOKHOS_PCE_SCALAR_TYPE + +/* Define if ensemble GEMV is enabled */ +#cmakedefine HAVE_STOKHOS_ENSEMBLE_GEMV +#ifdef HAVE_STOKHOS_ENSEMBLE_GEMV +// Cache size +#define STOKHOS_GEMV_CACHE_SIZE @Stokhos_Ensemble_GEMV_Cache_Size@ +#define STOKHOS_GEMV_TEAM_SIZE @Stokhos_Ensemble_GEMV_Team_Size@ +#endif // HAVE_STOKHOS_ENSEMBLE_GEMV diff --git a/packages/stokhos/src/CMakeLists.txt b/packages/stokhos/src/CMakeLists.txt index e7b7f160bb53..f8095978169d 100644 --- a/packages/stokhos/src/CMakeLists.txt +++ b/packages/stokhos/src/CMakeLists.txt @@ -678,6 +678,11 @@ IF (Stokhos_ENABLE_Sacado) sacado/kokkos/vector/Teuchos_SerialQRDenseSolver_MP_Vector.hpp ${CMAKE_CURRENT_BINARY_DIR}/Stokhos_Ensemble_Sizes.hpp ) + IF(HAVE_STOKHOS_ENSEMBLE_GEMV) + LIST(APPEND SACADO_HEADERS + sacado/kokkos/vector/KokkosBlas2_gemv_MP_Vector.hpp + ) + ENDIF() LIST(APPEND SACADO_SOURCES sacado/kokkos/vector/Kokkos_View_MP_Vector.cpp ) diff --git a/packages/stokhos/src/sacado/kokkos/vector/KokkosBlas2_gemv_MP_Vector.hpp b/packages/stokhos/src/sacado/kokkos/vector/KokkosBlas2_gemv_MP_Vector.hpp new file mode 100644 index 000000000000..a6175ba4778b --- /dev/null +++ b/packages/stokhos/src/sacado/kokkos/vector/KokkosBlas2_gemv_MP_Vector.hpp @@ -0,0 +1,311 @@ +// @HEADER +// *********************************************************************** +// +// Stokhos Package +// Copyright (2009) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Eric T. Phipps (etphipp@sandia.gov). +// +// *********************************************************************** +// @HEADER + +#ifndef KOKKOSBLAS2_GEMV_MP_VECTOR_HPP +#define KOKKOSBLAS2_GEMV_MP_VECTOR_HPP + +#include +#include "Sacado_ConfigDefs.h" + +#include "Stokhos_ViewStorage.hpp" +#include "Sacado_MP_Vector.hpp" +#include "Kokkos_View_MP_Vector.hpp" +#include "Kokkos_ArithTraits_MP_Vector.hpp" +#include "KokkosBlas.hpp" + +#include "Stokhos_Sacado_Kokkos_MP_Vector.hpp" +#include "Kokkos_Core.hpp" + +#include "Stokhos_config.h" + +#define Sacado_MP_Vector_GEMV_Tile_Size(size) (STOKHOS_GEMV_CACHE_SIZE / size) + +// Functor for the update +template +struct updateF +{ + using AlphaCoeffType = typename AViewType::non_const_value_type; + using BetaCoeffType = typename YViewType::non_const_value_type; + + updateF(const AlphaCoeffType &alpha, + const AViewType &A, + const XViewType &x, + const BetaCoeffType &beta, + const YViewType &y, + const IndexType m_c) : alpha_(alpha), A_(A), x_(x), beta_(beta), y_(y), m_c_(m_c) + { + } + +public: + // i_tile is the current tile of the input matrix A + KOKKOS_INLINE_FUNCTION void + operator()(const IndexType &i_tile) const + { + const IndexType m = y_.extent(0); + const IndexType n = x_.extent(0); + + IndexType i_min = m_c_ * i_tile; + bool last_tile = (i_min + m_c_ >= m); + IndexType i_max = (last_tile) ? m : (i_min + m_c_); + +#ifdef STOKHOS_HAVE_PRAGMA_UNROLL +#pragma unroll +#endif + if (beta_ == BetaCoeffType(0.)) + for (IndexType i = i_min; i < i_max; ++i) + y_(i) = beta_; + else + for (IndexType i = i_min; i < i_max; ++i) + y_(i) *= beta_; + + for (IndexType j = 0; j < n; ++j) + { + AlphaCoeffType alphab = alpha_ * x_(j); + + for (IndexType i = i_min; i < i_max; ++i) + y_(i) += alphab * A_(i, j); + } + } + +private: + AlphaCoeffType alpha_; + typename AViewType::const_type A_; + typename XViewType::const_type x_; + BetaCoeffType beta_; + YViewType y_; + const IndexType m_c_; +}; + +// Functor for the inner products +template +struct innerF +{ + using execution_space = typename AViewType::execution_space; + using policy_type = Kokkos::TeamPolicy; + using member_type = typename policy_type::member_type; + + using AlphaCoeffType = typename AViewType::non_const_value_type; + using Scalar = AlphaCoeffType; + + innerF(const AlphaCoeffType &alpha, + const AViewType &A, + const XViewType &x, + const YViewType &y, + const IndexType n_c) : alpha_(alpha), A_(A), x_(x), y_(y), n_c_(n_c) + { + } + +public: + KOKKOS_INLINE_FUNCTION void + operator()(const member_type &team) const + { + const IndexType m = y_.extent(0); + const IndexType n = x_.extent(0); + + const int j = team.league_rank(); + const IndexType j_min = n_c_ * j; + const IndexType nj = (j_min + n_c_ > n) ? (n - j_min) : n_c_; + const IndexType i_min = j % m; + + for (IndexType i = i_min; i < m; ++i) + { + Scalar tmp = 0.; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, nj), [=](int jj, Scalar &tmp_sum) { + tmp_sum += A_(jj + j_min, i) * x_(jj + j_min); + }, + tmp); + if (team.team_rank() == 0) + { + tmp *= alpha_; + Kokkos::atomic_add(&y_(i), tmp); + } + } + for (IndexType i = 0; i < i_min; ++i) + { + Scalar tmp = 0.; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, nj), [=](int jj, Scalar &tmp_sum) { + tmp_sum += A_(jj + j_min, i) * x_(jj + j_min); + }, + tmp); + if (team.team_rank() == 0) + { + tmp *= alpha_; + Kokkos::atomic_add(&y_(i), tmp); + } + } + } + +private: + AlphaCoeffType alpha_; + typename AViewType::const_type A_; + typename XViewType::const_type x_; + YViewType y_; + const IndexType n_c_; +}; + +template +void update_MP( + typename VA::const_value_type &alpha, + const VA &A, + const VX &x, + typename VY::const_value_type &beta, + const VY &y) +{ + using execution_space = typename VA::execution_space; + using IndexType = typename VA::size_type; + using policy_type = Kokkos::RangePolicy; + + // Get the dimensions + const size_t m = y.extent(0); + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + const size_t N = execution_space::thread_pool_size(); +#else + const size_t N = execution_space::impl_thread_pool_size(); +#endif + const size_t m_c_star = Sacado_MP_Vector_GEMV_Tile_Size(sizeof(Scalar)); + const size_t n_tiles_per_thread = ceil(((double)m) / (N * m_c_star)); + const size_t m_c = ceil(((double)m) / (N * n_tiles_per_thread)); + const size_t n_tiles = N * n_tiles_per_thread; + + policy_type range(0, n_tiles); + + using functor_type = updateF; + functor_type functor(alpha, A, x, beta, y, m_c); + + Kokkos::parallel_for("KokkosBlas::gemv[Update]", range, functor); +} + +template +void inner_products_MP( + typename VA::const_value_type &alpha, + const VA &A, + const VX &x, + typename VY::const_value_type &beta, + const VY &y) +{ + using execution_space = typename VA::execution_space; + using IndexType = typename VA::size_type; + using team_policy_type = Kokkos::TeamPolicy; + + // Get the dimensions + const size_t m = y.extent(0); + const size_t n = x.extent(0); + + const size_t team_size = STOKHOS_GEMV_TEAM_SIZE; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + const size_t N = execution_space::thread_pool_size(); +#else + const size_t N = execution_space::impl_thread_pool_size(); +#endif + const size_t m_c_star = Sacado_MP_Vector_GEMV_Tile_Size(sizeof(Scalar)); + const size_t n_tiles_per_thread = ceil(((double)n) / (N * m_c_star)); + const size_t m_c = ceil(((double)n) / (N * n_tiles_per_thread)); + const size_t n_per_tile2 = m_c * team_size; + + const size_t n_i2 = ceil(((double)n) / n_per_tile2); + + team_policy_type team(n_i2, team_size); + + if (beta == Scalar(0.)) + Kokkos::parallel_for( + m, KOKKOS_LAMBDA(const int i) { + y(i) = beta; + }); + else + Kokkos::parallel_for( + m, KOKKOS_LAMBDA(const int i) { + y(i) *= beta; + }); + + using functor_type = innerF; + functor_type functor(alpha, A, x, y, n_per_tile2); + + Kokkos::parallel_for("KokkosBlas::gemv[InnerProducts]", team, functor); +} + +namespace KokkosBlas +{ +template +typename std::enable_if>::value && + Kokkos::is_view_mp_vector>::value && + Kokkos::is_view_mp_vector>::value>::type +gemv(const char trans[], + typename Kokkos::View::const_value_type &alpha, + const Kokkos::View &A, + const Kokkos::View &x, + typename Kokkos::View::const_value_type &beta, + const Kokkos::View &y) +{ + typedef typename Kokkos::View::value_type Scalar; + typedef Kokkos::View VA; + typedef Kokkos::View VX; + typedef Kokkos::View VY; + + static_assert(VA::rank == 2, "GEMM: A must have rank 2 (be a matrix)."); + static_assert(VX::rank == 1, "GEMM: x must have rank 1 (be a vector)."); + static_assert(VY::rank == 1, "GEMM: y must have rank 1 (be a vector)."); + + if (trans[0] == 'n' || trans[0] == 'N') + update_MP(alpha, A, x, beta, y); + else + inner_products_MP(alpha, A, x, beta, y); +} + +} // namespace KokkosBlas +#endif diff --git a/packages/stokhos/src/sacado/kokkos/vector/KokkosBlas_MP_Vector.hpp b/packages/stokhos/src/sacado/kokkos/vector/KokkosBlas_MP_Vector.hpp index 8646b239bcb4..e7e493186560 100644 --- a/packages/stokhos/src/sacado/kokkos/vector/KokkosBlas_MP_Vector.hpp +++ b/packages/stokhos/src/sacado/kokkos/vector/KokkosBlas_MP_Vector.hpp @@ -42,6 +42,10 @@ #ifndef KOKKOSBLAS_MP_VECTOR_HPP #define KOKKOSBLAS_MP_VECTOR_HPP +#ifdef HAVE_STOKHOS_ENSEMBLE_GEMV +#include "KokkosBlas2_gemv_MP_Vector.hpp" +#endif + #include "KokkosBlas3_gemm_MP_Vector.hpp" #include "KokkosBlas.hpp"