From 1595bb994c28ed07b3c8bde3c835c65d25c2b814 Mon Sep 17 00:00:00 2001 From: Phil Miller Date: Wed, 17 Mar 2021 14:46:37 -0400 Subject: [PATCH 1/2] Add execution space argument to Panzer's TeamPolicy builder --- .../src/Panzer_HierarchicParallelism.hpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_HierarchicParallelism.hpp b/packages/panzer/disc-fe/src/Panzer_HierarchicParallelism.hpp index 48dc2e4df07d..fff0491baaca 100644 --- a/packages/panzer/disc-fe/src/Panzer_HierarchicParallelism.hpp +++ b/packages/panzer/disc-fe/src/Panzer_HierarchicParallelism.hpp @@ -134,11 +134,26 @@ namespace panzer { const int tmp_vector_size = this->template vectorSize(); if (use_auto_team_size_) - return Kokkos::TeamPolicy(league_size,Kokkos::AUTO(), - tmp_vector_size); + return Kokkos::TeamPolicy(league_size,Kokkos::AUTO(), + tmp_vector_size); return Kokkos::TeamPolicy(league_size,team_size_,tmp_vector_size); } + + /// Returns a TeamPolicy for hierarchic parallelism. + template + Kokkos::TeamPolicy teamPolicy(ExecSpace exec_space, const int& league_size) + { + const int tmp_vector_size = this->template vectorSize(); + + return Kokkos::TeamPolicy + ( + exec_space, + league_size, + use_auto_team_size_ ? Kokkos::AUTO() : team_size_, + tmp_vector_size + ); + } }; } From efacbd2f1a5db5de47e709b13b0689f6e9a3ab45 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Mon, 22 Mar 2021 14:03:45 -0600 Subject: [PATCH 2/2] Panzer: add exec space instances to hierarchic policy ctor for running with multiple cuda streams --- .../src/Panzer_HierarchicParallelism.hpp | 14 +-- .../disc-fe/test/core_tests/CMakeLists.txt | 6 + .../core_tests/hierarchic_team_policy.cpp | 117 ++++++++++++++++++ 3 files changed, 129 insertions(+), 8 deletions(-) create mode 100644 packages/panzer/disc-fe/test/core_tests/hierarchic_team_policy.cpp diff --git a/packages/panzer/disc-fe/src/Panzer_HierarchicParallelism.hpp b/packages/panzer/disc-fe/src/Panzer_HierarchicParallelism.hpp index fff0491baaca..6039925165a1 100644 --- a/packages/panzer/disc-fe/src/Panzer_HierarchicParallelism.hpp +++ b/packages/panzer/disc-fe/src/Panzer_HierarchicParallelism.hpp @@ -140,19 +140,17 @@ namespace panzer { return Kokkos::TeamPolicy(league_size,team_size_,tmp_vector_size); } - /// Returns a TeamPolicy for hierarchic parallelism. + /// Returns a TeamPolicy for hierarchic parallelism using an exec_space instance (for cuda streams). template Kokkos::TeamPolicy teamPolicy(ExecSpace exec_space, const int& league_size) { const int tmp_vector_size = this->template vectorSize(); - return Kokkos::TeamPolicy - ( - exec_space, - league_size, - use_auto_team_size_ ? Kokkos::AUTO() : team_size_, - tmp_vector_size - ); + if (use_auto_team_size_) + return Kokkos::TeamPolicy(exec_space,league_size,Kokkos::AUTO(), + tmp_vector_size); + + return Kokkos::TeamPolicy(exec_space,league_size,team_size_,tmp_vector_size); } }; diff --git a/packages/panzer/disc-fe/test/core_tests/CMakeLists.txt b/packages/panzer/disc-fe/test/core_tests/CMakeLists.txt index ab89bb1d4a87..3b26b194c3a5 100644 --- a/packages/panzer/disc-fe/test/core_tests/CMakeLists.txt +++ b/packages/panzer/disc-fe/test/core_tests/CMakeLists.txt @@ -136,6 +136,12 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 1 ) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + hierarchic_team_policy + SOURCES hierarchic_team_policy.cpp ${UNIT_TEST_DRIVER} + NUM_MPI_PROCS 1 + ) + #TRIBITS_ADD_EXECUTABLE_AND_TEST( # epetra_test # SOURCES epetra_test.cpp ${UNIT_TEST_DRIVER} diff --git a/packages/panzer/disc-fe/test/core_tests/hierarchic_team_policy.cpp b/packages/panzer/disc-fe/test/core_tests/hierarchic_team_policy.cpp new file mode 100644 index 000000000000..4da950f4e7b8 --- /dev/null +++ b/packages/panzer/disc-fe/test/core_tests/hierarchic_team_policy.cpp @@ -0,0 +1,117 @@ +// @HEADER +// *********************************************************************** +// +// Panzer: A partial differential equation assembly +// engine for strongly coupled complex multiphysics systems +// Copyright (2011) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Roger P. Pawlowski (rppawlo@sandia.gov) and +// Eric C. Cyr (eccyr@sandia.gov) +// *********************************************************************** +// @HEADER + +#include +#include +#include + +#include "Panzer_HierarchicParallelism.hpp" +#include "Sacado.hpp" + +namespace panzer_test { + + const int M = 100; + const int N = 16; + + template + void checkPolicy(bool use_stream_instance, + VectorType& a, VectorType& b, VectorType& c, + bool& success, OutputStream& out) + { + Kokkos::deep_copy(a,0.0); + Kokkos::deep_copy(b,1.0); + Kokkos::deep_copy(c,2.0); + + if (use_stream_instance) { + PHX::ExecSpace exec_space; + auto policy = panzer::HP::inst().teamPolicy(exec_space,M); + Kokkos::parallel_for("test 0",policy,KOKKOS_LAMBDA (const Kokkos::TeamPolicy::member_type team){ + const int i = team.league_rank(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,N), [&] (const int j) { + a(i,j) += b(i,j) + c(i,j); + }); + }); + } + else { + auto policy = panzer::HP::inst().teamPolicy(M); + Kokkos::parallel_for("test 0",policy,KOKKOS_LAMBDA (const Kokkos::TeamPolicy::member_type team){ + const int i = team.league_rank(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,N), [&] (const int j) { + a(i,j) += b(i,j) + c(i,j); + }); + }); + } + Kokkos::fence(); + + auto a_host = Kokkos::create_mirror_view(a); + Kokkos::deep_copy(a_host,a); + auto tol = 1000.0 * std::numeric_limits::epsilon(); + + for (int i=0; i < M; ++i) { + for (int j=0; j < N; ++j) { + TEST_FLOATING_EQUALITY(Sacado::ScalarValue::eval(a(i,j)),3.0,tol); + } + } + } + + TEUCHOS_UNIT_TEST(HierarchicTeamPolicy, StreamsDouble) + { + using Scalar = double; + PHX::View a("a",M,N); + PHX::View b("b",M,N); + PHX::View c("c",M,N); + panzer_test::checkPolicy(false,a,b,c,success,out); // default exec space + panzer_test::checkPolicy(true,a,b,c,success,out); // specify exec space + } + + TEUCHOS_UNIT_TEST(HierarchicTeamPolicy, StreamsDFAD) + { + using Scalar = Sacado::Fad::DFad; + const int deriv_dim = 8; + PHX::View a("a",M,N,deriv_dim); + PHX::View b("b",M,N,deriv_dim); + PHX::View c("c",M,N,deriv_dim); + panzer_test::checkPolicy(false,a,b,c,success,out); // default exec space + panzer_test::checkPolicy(true,a,b,c,success,out); // specify exec space + } + +}