From c4acebd185997f6f95157635dc9be7d034e06dc4 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Wed, 30 Jun 2021 16:20:28 -0600 Subject: [PATCH 01/14] Panzer: fix some disc-fe core tests for UVM off --- .../src/Panzer_IntrepidOrientation.cpp | 2 +- .../core_tests/hierarchic_team_policy.cpp | 4 +- .../test/core_tests/zero_sensitivities.cpp | 50 ++++++++++++------- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntrepidOrientation.cpp b/packages/panzer/disc-fe/src/Panzer_IntrepidOrientation.cpp index a74cde808066..6367820cd334 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntrepidOrientation.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntrepidOrientation.cpp @@ -94,7 +94,7 @@ namespace panzer { // construct orientation information for (int c=0;c + Kokkos::View nodes(connMgr.getConnectivity(localCellId), numVertexPerCell); orientation[localCellId] = (Intrepid2::Orientation::getOrientation(cellTopo, nodes)); } diff --git a/packages/panzer/disc-fe/test/core_tests/hierarchic_team_policy.cpp b/packages/panzer/disc-fe/test/core_tests/hierarchic_team_policy.cpp index 4da950f4e7b8..9db18fc0a4f2 100644 --- a/packages/panzer/disc-fe/test/core_tests/hierarchic_team_policy.cpp +++ b/packages/panzer/disc-fe/test/core_tests/hierarchic_team_policy.cpp @@ -44,6 +44,8 @@ #include #include +#include "KokkosExp_View_Fad.hpp" +#include "Kokkos_DynRankView_Fad.hpp" #include "Panzer_HierarchicParallelism.hpp" #include "Sacado.hpp" @@ -88,7 +90,7 @@ namespace panzer_test { for (int i=0; i < M; ++i) { for (int j=0; j < N; ++j) { - TEST_FLOATING_EQUALITY(Sacado::ScalarValue::eval(a(i,j)),3.0,tol); + TEST_FLOATING_EQUALITY(Sacado::ScalarValue::eval(a_host(i,j)),3.0,tol); } } } diff --git a/packages/panzer/disc-fe/test/core_tests/zero_sensitivities.cpp b/packages/panzer/disc-fe/test/core_tests/zero_sensitivities.cpp index d2c0ad0466b1..f17968209c6c 100644 --- a/packages/panzer/disc-fe/test/core_tests/zero_sensitivities.cpp +++ b/packages/panzer/disc-fe/test/core_tests/zero_sensitivities.cpp @@ -43,6 +43,8 @@ #include #include +#include "KokkosExp_View_Fad.hpp" +#include "Kokkos_DynRankView_Fad.hpp" #include "Phalanx_DataLayout_MDALayout.hpp" #include "Phalanx_MDField.hpp" #include "Phalanx_KokkosViewFactory.hpp" @@ -137,20 +139,23 @@ namespace panzer { a.setFieldData(PHX::KokkosViewFactory::type,PHX::Device>::buildView(a.fieldTag())); // initialize - for (int cell = 0; cell < a.extent_int(0); ++cell) { - for (int pt=0; pt < a.extent_int(1); ++pt) { - a(cell,pt) = 2.0; - } - } + a.deep_copy(2.0); + // for (int cell = 0; cell < a.extent_int(0); ++cell) { + // for (int pt=0; pt < a.extent_int(1); ++pt) { + // a(cell,pt) = 2.0; + // } + // } // Compute Kokkos::parallel_for(a.extent_int(0),ComputeA > (a)); typename PHX::Device().fence(); + + auto a_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),a.get_static_view()); // Check for (int cell = 0; cell < a.extent_int(0); ++cell) for (int pt=0; pt < a.extent_int(1); ++pt) - TEST_FLOATING_EQUALITY(a(cell,pt),2.0,tolerance); + TEST_FLOATING_EQUALITY(a_host(cell,pt),2.0,tolerance); } // *********************** @@ -162,28 +167,39 @@ namespace panzer { derivative_dimension.push_back(2); a.setFieldData(PHX::KokkosViewFactory::type,PHX::Device>::buildView(a.fieldTag(),derivative_dimension)); - // initialize + // Initialize (use raw kokkos to avoid compiler warning about MDField dtor with host function calls). + auto a_dev = a.get_static_view(); + Kokkos::parallel_for("initialize zero sensitivity vec",a.extent(0),KOKKOS_LAMBDA (const int cell) { + //for (int cell = 0; cell < a.extent_int(0); ++cell) { + for (int pt=0; pt < a_dev.extent_int(1); ++pt) { + a_dev(cell,pt) = 1.0; + a_dev(cell,pt).fastAccessDx(0) = 2.0; + a_dev(cell,pt).fastAccessDx(1) = 3.0; + } + }); + + // Check initial values + auto a_host = Kokkos::create_mirror_view(a.get_static_view()); + Kokkos::deep_copy(a_host,a.get_static_view()); for (int cell = 0; cell < a.extent_int(0); ++cell) { for (int pt=0; pt < a.extent_int(1); ++pt) { - a(cell,pt) = 1.0; - a(cell,pt).fastAccessDx(0) = 2.0; - a(cell,pt).fastAccessDx(1) = 3.0; - TEST_FLOATING_EQUALITY(a(cell,pt).val(),1.0,1e-12); - TEST_FLOATING_EQUALITY(a(cell,pt).fastAccessDx(0),2.0,1e-12); - TEST_FLOATING_EQUALITY(a(cell,pt).fastAccessDx(1),3.0,1e-12); + TEST_FLOATING_EQUALITY(a_host(cell,pt).val(),1.0,1e-12); + TEST_FLOATING_EQUALITY(a_host(cell,pt).fastAccessDx(0),2.0,1e-12); + TEST_FLOATING_EQUALITY(a_host(cell,pt).fastAccessDx(1),3.0,1e-12); } } - + // Compute Kokkos::parallel_for(a.extent(0),ComputeB > (a)); typename PHX::Device().fence(); // Check + Kokkos::deep_copy(a_host,a.get_static_view()); for (int cell = 0; cell < a.extent_int(0); ++cell) { for (int pt=0; pt < a.extent_int(1); ++pt) { - TEST_FLOATING_EQUALITY(a(cell,pt).val(),1.0,1e-12); - TEST_FLOATING_EQUALITY(a(cell,pt).fastAccessDx(0),0.0,1e-12); - TEST_FLOATING_EQUALITY(a(cell,pt).fastAccessDx(1),0.0,1e-12); + TEST_FLOATING_EQUALITY(a_host(cell,pt).val(),1.0,1e-12); + TEST_FLOATING_EQUALITY(a_host(cell,pt).fastAccessDx(0),0.0,1e-12); + TEST_FLOATING_EQUALITY(a_host(cell,pt).fastAccessDx(1),0.0,1e-12); } } From 52c15162f61e3948f9ae9fac292b21b60d91752b Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Thu, 1 Jul 2021 06:22:49 -0600 Subject: [PATCH 02/14] Panzer: fix core/evaluators tests for no-UVM --- .../panzer/disc-fe/src/evaluators/Panzer_Sum_impl.hpp | 6 ++++-- packages/panzer/disc-fe/test/core_tests/evaluators.cpp | 10 ---------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_Sum_impl.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_Sum_impl.hpp index fa0761d4619d..456a8a6e2a8e 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_Sum_impl.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_Sum_impl.hpp @@ -65,6 +65,7 @@ Sum( // check if the user wants to scale each term independently auto local_scalars = PHX::View("scalars",value_names->size()); + auto local_scalars_host = Kokkos::create_mirror_view(local_scalars); if(p.isType > >("Scalars")) { auto scalars_v = *p.get > >("Scalars"); @@ -72,12 +73,13 @@ Sum( TEUCHOS_ASSERT(scalars_v.size()==value_names->size()); for (std::size_t i=0; i < value_names->size(); ++i) - local_scalars(i) = scalars_v[i]; + local_scalars_host(i) = scalars_v[i]; } else { for (std::size_t i=0; i < value_names->size(); ++i) - local_scalars(i) = 1.0; + local_scalars_host(i) = 1.0; } + Kokkos::deep_copy(local_scalars,local_scalars_host); scalars = local_scalars; diff --git a/packages/panzer/disc-fe/test/core_tests/evaluators.cpp b/packages/panzer/disc-fe/test/core_tests/evaluators.cpp index d5d4c5e06231..ca43b3e47662 100644 --- a/packages/panzer/disc-fe/test/core_tests/evaluators.cpp +++ b/packages/panzer/disc-fe/test/core_tests/evaluators.cpp @@ -96,8 +96,6 @@ namespace panzer { TEUCHOS_UNIT_TEST(evaluators, Sum) { - - ParameterList p("Sum Test"); p.set("Sum Name", "Sum of Sources"); RCP > value_names = rcp(new vector); @@ -114,8 +112,6 @@ namespace panzer { TEUCHOS_UNIT_TEST(evaluators, ScalarToVector) { - - ParameterList p("ScalarToVector Test"); p.set("Vector Name", "U"); RCP > scalar_names = rcp(new vector); @@ -136,8 +132,6 @@ namespace panzer { TEUCHOS_UNIT_TEST(evaluators, VectorToScalar) { - - ParameterList p("VectorToScalar Test"); p.set("Vector Name", "U"); RCP > scalar_names = rcp(new vector); @@ -158,8 +152,6 @@ namespace panzer { TEUCHOS_UNIT_TEST(evaluators, DirichletResidual) { - - ParameterList p("DirichletResidual Test"); p.set("Residual Name", "Residual_TEMP"); p.set("DOF Name", "TEMP"); @@ -173,7 +165,6 @@ namespace panzer { TEUCHOS_UNIT_TEST(evaluators, DOF) { - using panzer::IntegrationRule; using panzer::BasisIRLayout; @@ -298,7 +289,6 @@ namespace panzer { TEUCHOS_UNIT_TEST(evaluators, Integrator_GradBasisDotVector) { - using panzer::IntegrationRule; using panzer::BasisIRLayout; From db999000e6e66740e34d8841b4122515404b4540 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Thu, 1 Jul 2021 15:02:37 -0600 Subject: [PATCH 03/14] Panzer: start of uvm removal for integration values object --- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 134 +++++++++-------- .../disc-fe/src/Panzer_IntegrationValues2.hpp | 10 +- .../test/core_tests/integration_values2.cpp | 140 +++++++++++------- 3 files changed, 158 insertions(+), 126 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index 6c995b7b2820..1e9660a24e31 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -292,7 +292,7 @@ getCubature(const PHX::MDField& in_node_coordinates, int num_space_dim = int_rule->topology->getDimension(); if (int_rule->isSide() && num_space_dim==1) { - std::cout << "WARNING: 0-D quadrature rule ifrastructure does not exist!!! Will not be able to do " + std::cout << "WARNING: 0-D quadrature rule infrastructure does not exist!!! Will not be able to do " << "non-natural integration rules."; return; } @@ -895,21 +895,12 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi // copy the dynamic data structures into the static data structures { - size_type num_ip = dyn_cub_points.extent(0); - size_type num_dims = dyn_cub_points.extent(1); - - for (size_type ip = 0; ip < num_ip; ++ip) { - cub_weights(ip) = dyn_cub_weights(ip); - for (size_type dim = 0; dim < num_dims; ++dim) - cub_points(ip,dim) = dyn_cub_points(ip,dim); - } + Kokkos::deep_copy(cub_weights.get_static_view(),dyn_cub_weights.get_view()); + Kokkos::deep_copy(cub_points.get_static_view(),dyn_cub_points.get_view()); } if (int_rule->isSide()) { - const size_type num_ip = dyn_cub_points.extent(0), num_side_dims = dyn_side_cub_points.extent(1); - for (size_type ip = 0; ip < num_ip; ++ip) - for (size_type dim = 0; dim < num_side_dims; ++dim) - side_cub_points(ip,dim) = dyn_side_cub_points(ip,dim); + Kokkos::deep_copy(side_cub_points.get_static_view(),dyn_side_cub_points.get_view()); } const int num_cells = in_num_cells < 0 ? in_node_coordinates.extent(0) : in_num_cells; @@ -917,15 +908,14 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi { size_type num_nodes = in_node_coordinates.extent(1); size_type num_dims = in_node_coordinates.extent(2); - - for (int cell = 0; cell < num_cells; ++cell) { - for (size_type node = 0; node < num_nodes; ++node) { - for (size_type dim = 0; dim < num_dims; ++dim) { - node_coordinates(cell,node,dim) = - in_node_coordinates(cell,node,dim); - } - } - } + auto node_coordinates_k = node_coordinates.get_view(); + auto in_node_coordinates_k = in_node_coordinates.get_view(); + + Kokkos::MDRangePolicy> policy({0,0,0},{num_cells,num_nodes,num_dims}); + Kokkos::parallel_for("node coordinates",policy,KOKKOS_LAMBDA (const int cell,const int node,const int dim) { + node_coordinates_k(cell,node,dim) = in_node_coordinates_k(cell,node,dim); + }); + PHX::Device::execution_space().fence(); } auto s_in_node_coordinates = Kokkos::subview(in_node_coordinates.get_view(),std::make_pair(0,num_cells),Kokkos::ALL(),Kokkos::ALL()); @@ -961,24 +951,27 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi else TEUCHOS_ASSERT(false); // Shakib contravarient metric tensor - for (int cell = 0; cell < num_cells; ++cell) { - for (size_type ip = 0; ip < contravarient.extent(1); ++ip) { - + { + auto covarient_k = covarient.get_view(); + auto contravarient_k = contravarient.get_view(); + auto jac_k = jac.get_view(); + Kokkos::MDRangePolicy> policy({0,0},{num_cells,static_cast(contravarient.extent(1))}); + Kokkos::parallel_for("evalaute covarient metric tensor",policy,KOKKOS_LAMBDA (const int cell,const int ip) { // zero out matrix - for (size_type i = 0; i < contravarient.extent(2); ++i) - for (size_type j = 0; j < contravarient.extent(3); ++j) - covarient(cell,ip,i,j) = 0.0; + for (int i = 0; i < static_cast(contravarient_k.extent(2)); ++i) + for (int j = 0; j < static_cast(contravarient_k.extent(3)); ++j) + covarient_k(cell,ip,i,j) = 0.0; // g^{ij} = \frac{\parital x_i}{\partial \chi_\alpha}\frac{\parital x_j}{\partial \chi_\alpha} - for (size_type i = 0; i < contravarient.extent(2); ++i) { - for (size_type j = 0; j < contravarient.extent(3); ++j) { - for (size_type alpha = 0; alpha < contravarient.extent(2); ++alpha) { - covarient(cell,ip,i,j) += jac(cell,ip,i,alpha) * jac(cell,ip,j,alpha); + for (int i = 0; i < static_cast(contravarient_k.extent(2)); ++i) { + for (int j = 0; j < static_cast(contravarient_k.extent(3)); ++j) { + for (int alpha = 0; alpha < static_cast(contravarient_k.extent(2)); ++alpha) { + covarient_k(cell,ip,i,j) += jac_k(cell,ip,i,alpha) * jac_k(cell,ip,j,alpha); } } } - - } + }); + PHX::Device::execution_space().fence(); } auto s_covarient = Kokkos::subview(covarient.get_view(),std::make_pair(0,num_cells),Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); @@ -986,16 +979,20 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi Intrepid2::RealSpaceTools::inverse(s_contravarient, s_covarient); // norm of g_ij - for (int cell = 0; cell < num_cells; ++cell) { - for (size_type ip = 0; ip < contravarient.extent(1); ++ip) { - norm_contravarient(cell,ip) = 0.0; - for (size_type i = 0; i < contravarient.extent(2); ++i) { - for (size_type j = 0; j < contravarient.extent(3); ++j) { - norm_contravarient(cell,ip) += contravarient(cell,ip,i,j) * contravarient(cell,ip,i,j); + { + auto contravarient_k = contravarient.get_view(); + auto norm_contravarient_k = norm_contravarient.get_view(); + Kokkos::MDRangePolicy> policy({0,0},{num_cells,static_cast(contravarient.extent(1))}); + Kokkos::parallel_for("evaluate norm_contravarient",policy,KOKKOS_LAMBDA (const int cell,const int ip) { + norm_contravarient_k(cell,ip) = 0.0; + for (int i = 0; i < static_cast(contravarient_k.extent(2)); ++i) { + for (int j = 0; j < static_cast(contravarient_k.extent(3)); ++j) { + norm_contravarient_k(cell,ip) += contravarient_k(cell,ip,i,j) * contravarient_k(cell,ip,i,j); } } - norm_contravarient(cell,ip) = std::sqrt(norm_contravarient(cell,ip)); - } + norm_contravarient_k(cell,ip) = std::sqrt(norm_contravarient_k(cell,ip)); + }); + PHX::Device::execution_space().fence(); } } @@ -1172,17 +1169,16 @@ getCubatureCV(const PHX::MDField& in_node_coordinates, { size_type num_nodes = in_node_coordinates.extent(1); size_type num_dims = in_node_coordinates.extent(2); - - for (size_type cell = 0; cell < num_cells; ++cell) { - for (size_type node = 0; node < num_nodes; ++node) { - for (size_type dim = 0; dim < num_dims; ++dim) { - node_coordinates(cell,node,dim) = - in_node_coordinates(cell,node,dim); - dyn_node_coordinates(cell,node,dim) = - Sacado::ScalarValue::eval(in_node_coordinates(cell,node,dim)); - } - } - } + auto node_coordinates_k = node_coordinates.get_view(); + auto dyn_node_coordinates_k = dyn_node_coordinates.get_view(); + auto in_node_coordinates_k = in_node_coordinates.get_view(); + + Kokkos::MDRangePolicy> policy({0,0,0},{num_cells,num_nodes,num_dims}); + Kokkos::parallel_for("getCubatureCV: node coordinates",policy,KOKKOS_LAMBDA (const int cell,const int node,const int dim) { + node_coordinates_k(cell,node,dim) = in_node_coordinates_k(cell,node,dim); + dyn_node_coordinates_k(cell,node,dim) = Sacado::ScalarValue::eval(in_node_coordinates_k(cell,node,dim)); + }); + PHX::Device::execution_space().fence(); } auto s_dyn_phys_cub_points = Kokkos::subdynrankview(dyn_phys_cub_points.get_view(),cell_range,Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); @@ -1199,19 +1195,27 @@ getCubatureCV(const PHX::MDField& in_node_coordinates, // size_type num_cells = dyn_phys_cub_points.extent(0); size_type num_ip =dyn_phys_cub_points.extent(1); size_type num_dims = dyn_phys_cub_points.extent(2); - - for (size_type cell = 0; cell < num_cells; ++cell) { - for (size_type ip = 0; ip < num_ip; ++ip) { - if (int_rule->cv_type != "side") - weighted_measure(cell,ip) = dyn_phys_cub_weights(cell,ip); - for (size_type dim = 0; dim < num_dims; ++dim) { - ip_coordinates(cell,ip,dim) = dyn_phys_cub_points(cell,ip,dim); - if (int_rule->cv_type == "side") - weighted_normals(cell,ip,dim) = dyn_phys_cub_norms(cell,ip,dim); - } + auto weighted_measure_k = weighted_measure.get_view(); + auto dyn_phys_cub_weights_k = dyn_phys_cub_weights.get_view(); + auto ip_coordinates_k = ip_coordinates.get_view(); + auto dyn_phys_cub_points_k = dyn_phys_cub_points.get_view(); + auto weighted_normals_k = weighted_normals.get_view(); + auto dyn_phys_cub_norms_k = dyn_phys_cub_norms.get_view(); + bool is_side = false; + if (int_rule->cv_type == "side") + is_side = true; + + Kokkos::MDRangePolicy> policy({0,0},{num_cells,num_ip}); + Kokkos::parallel_for("getCubatureCV: weighted measure",policy,KOKKOS_LAMBDA (const int cell,const int ip) { + if (!is_side) + weighted_measure_k(cell,ip) = dyn_phys_cub_weights_k(cell,ip); + for (size_type dim = 0; dim < num_dims; ++dim) { + ip_coordinates_k(cell,ip,dim) = dyn_phys_cub_points_k(cell,ip,dim); + if (is_side) + weighted_normals_k(cell,ip,dim) = dyn_phys_cub_norms_k(cell,ip,dim); } - } - + }); + PHX::Device::execution_space().fence(); } template diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp index bc088b1d403f..5b0d10854f26 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp @@ -159,15 +159,17 @@ namespace panzer { void swapQuadraturePoints(int cell,int a,int b); static void convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]); - - protected: + /// This should be a private method, but using lambdas on cuda forces this to be public. + void evaluateRemainingValues(const PHX::MDField & in_node_coordinates, const int in_num_cells); + /// This should be a private method, but using lambdas on cuda forces this to be public. + void getCubatureCV(const PHX::MDField & in_node_coordinates, const int in_num_cells); + protected: // TODO: Make this a utility function that only exists in source file Teuchos::RCP> getIntrepidCubature(const panzer::IntegrationRule & ir) const; - private: bool alloc_arrays; std::string prefix; @@ -175,8 +177,6 @@ namespace panzer { void generateSurfaceCubatureValues(const PHX::MDField & in_node_coordinates, const int in_num_cells,const SubcellConnectivity & face_connectivity); void getCubature(const PHX::MDField & in_node_coordinates, const int in_num_cells); - void getCubatureCV(const PHX::MDField & in_node_coordinates, const int in_num_cells); - void evaluateRemainingValues(const PHX::MDField & in_node_coordinates, const int in_num_cells); void evaluateValuesCV(const PHX::MDField & vertex_coordinates,const int in_num_cells); }; diff --git a/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp b/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp index 33e520f295f1..342f86da798c 100644 --- a/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp +++ b/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp @@ -93,27 +93,30 @@ namespace panzer { typedef panzer::ArrayTraits >::size_type size_type; const size_type x = 0; const size_type y = 1; - for (size_type cell = 0; cell < node_coordinates.extent(0); ++cell) { - node_coordinates(cell,0,x) = 0.0; - node_coordinates(cell,0,y) = 0.0; - node_coordinates(cell,1,x) = 1.0; - node_coordinates(cell,1,y) = 0.0; - node_coordinates(cell,2,x) = 1.0; - node_coordinates(cell,2,y) = 1.0; - node_coordinates(cell,3,x) = 0.0; - node_coordinates(cell,3,y) = 1.0; - } + auto node_coordinates_k = node_coordinates.get_view(); + Kokkos::parallel_for("initialize node coords",node_coordinates.extent(0), + KOKKOS_LAMBDA (const int cell) { + node_coordinates_k(cell,0,x) = 0.0; + node_coordinates_k(cell,0,y) = 0.0; + node_coordinates_k(cell,1,x) = 1.0; + node_coordinates_k(cell,1,y) = 0.0; + node_coordinates_k(cell,2,x) = 1.0; + node_coordinates_k(cell,2,y) = 1.0; + node_coordinates_k(cell,3,x) = 0.0; + node_coordinates_k(cell,3,y) = 1.0; + }); int_values.evaluateValues(node_coordinates); TEST_EQUALITY(int_values.ip_coordinates.extent(1), 4); double realspace_x_coord = (1.0/std::sqrt(3.0) + 1.0) / 2.0; double realspace_y_coord = (1.0/std::sqrt(3.0) + 1.0) / 2.0; - TEST_FLOATING_EQUALITY(int_values.ip_coordinates(0,0,0), + auto tmp_coords = int_values.ip_coordinates; + auto tmp_coords_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),tmp_coords.get_view()); + TEST_FLOATING_EQUALITY(tmp_coords_host(0,0,0), realspace_x_coord, 1.0e-8); - TEST_FLOATING_EQUALITY(int_values.ip_coordinates(0,0,1), + TEST_FLOATING_EQUALITY(tmp_coords_host(0,0,1), realspace_y_coord, 1.0e-8); - } TEUCHOS_UNIT_TEST(integration_values, control_volume) @@ -158,16 +161,18 @@ namespace panzer { typedef panzer::ArrayTraits >::size_type size_type; const size_type x = 0; const size_type y = 1; - for (size_type cell = 0; cell < node_coordinates.extent(0); ++cell) { - node_coordinates(cell,0,x) = 0.0; - node_coordinates(cell,0,y) = 0.0; - node_coordinates(cell,1,x) = 1.0; - node_coordinates(cell,1,y) = 0.0; - node_coordinates(cell,2,x) = 1.0; - node_coordinates(cell,2,y) = 1.0; - node_coordinates(cell,3,x) = 0.0; - node_coordinates(cell,3,y) = 1.0; - } + auto node_coordinates_k = node_coordinates.get_view(); + Kokkos::parallel_for("initialize node coords",node_coordinates.extent(0), + KOKKOS_LAMBDA (const int cell) { + node_coordinates_k(cell,0,x) = 0.0; + node_coordinates_k(cell,0,y) = 0.0; + node_coordinates_k(cell,1,x) = 1.0; + node_coordinates_k(cell,1,y) = 0.0; + node_coordinates_k(cell,2,x) = 1.0; + node_coordinates_k(cell,2,y) = 1.0; + node_coordinates_k(cell,3,x) = 0.0; + node_coordinates_k(cell,3,y) = 1.0; + }); int_values_vol.evaluateValues(node_coordinates); int_values_side.evaluateValues(node_coordinates); @@ -177,15 +182,19 @@ namespace panzer { TEST_EQUALITY(int_values_side.weighted_normals.extent(1), 4); double realspace_x_coord_1 = 0.25; double realspace_y_coord_1 = 0.25; - TEST_FLOATING_EQUALITY(int_values_vol.ip_coordinates(0,0,0), + auto tmp_coords = int_values_vol.ip_coordinates; + auto tmp_coords_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),tmp_coords.get_view()); + TEST_FLOATING_EQUALITY(tmp_coords_host(0,0,0), realspace_x_coord_1, 1.0e-8); - TEST_FLOATING_EQUALITY(int_values_vol.ip_coordinates(0,0,1), + TEST_FLOATING_EQUALITY(tmp_coords_host(0,0,1), realspace_y_coord_1, 1.0e-8); double realspace_x_coord_2 = 0.5; double realspace_y_coord_2 = 0.25; - TEST_FLOATING_EQUALITY(int_values_side.ip_coordinates(0,0,0), + auto tmp_side_coords = int_values_side.ip_coordinates; + auto tmp_side_coords_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),tmp_side_coords.get_view()); + TEST_FLOATING_EQUALITY(tmp_side_coords_host(0,0,0), realspace_x_coord_2, 1.0e-8); - TEST_FLOATING_EQUALITY(int_values_side.ip_coordinates(0,0,1), + TEST_FLOATING_EQUALITY(tmp_side_coords_host(0,0,1), realspace_y_coord_2, 1.0e-8); } @@ -225,25 +234,29 @@ namespace panzer { typedef panzer::ArrayTraits >::size_type size_type; const size_type x = 0; const size_type y = 1; - for (size_type cell = 0; cell < node_coordinates.extent(0); ++cell) { - node_coordinates(cell,0,x) = 0.0; - node_coordinates(cell,0,y) = 0.0; - node_coordinates(cell,1,x) = 1.0; - node_coordinates(cell,1,y) = 0.0; - node_coordinates(cell,2,x) = 1.0; - node_coordinates(cell,2,y) = 1.0; - node_coordinates(cell,3,x) = 0.0; - node_coordinates(cell,3,y) = 1.0; - } + auto node_coordinates_k = node_coordinates.get_view(); + Kokkos::parallel_for("initialize node coords",node_coordinates.extent(0), + KOKKOS_LAMBDA (const int cell) { + node_coordinates_k(cell,0,x) = 0.0; + node_coordinates_k(cell,0,y) = 0.0; + node_coordinates_k(cell,1,x) = 1.0; + node_coordinates_k(cell,1,y) = 0.0; + node_coordinates_k(cell,2,x) = 1.0; + node_coordinates_k(cell,2,y) = 1.0; + node_coordinates_k(cell,3,x) = 0.0; + node_coordinates_k(cell,3,y) = 1.0; + }); int_values_bc.evaluateValues(node_coordinates); + + auto ip_coords_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),int_values_bc.ip_coordinates.get_view()); TEST_EQUALITY(int_values_bc.ip_coordinates.extent(1), 2); double realspace_x_coord_1 = 1.0; double realspace_y_coord_1 = 0.25; - TEST_FLOATING_EQUALITY(int_values_bc.ip_coordinates(0,0,0), + TEST_FLOATING_EQUALITY(ip_coords_host(0,0,0), realspace_x_coord_1, 1.0e-8); - TEST_FLOATING_EQUALITY(int_values_bc.ip_coordinates(0,0,1), + TEST_FLOATING_EQUALITY(ip_coords_host(0,0,1), realspace_y_coord_1, 1.0e-8); } @@ -282,27 +295,29 @@ namespace panzer { typedef panzer::ArrayTraits >::size_type size_type; const size_type x = 0; const size_type y = 1; - for (size_type cell = 0; cell < node_coordinates.extent(0); ++cell) { - node_coordinates(cell,0,x) = 0.0; - node_coordinates(cell,0,y) = 0.0; - node_coordinates(cell,1,x) = 1.0; - node_coordinates(cell,1,y) = 0.0; - node_coordinates(cell,2,x) = 1.5; - node_coordinates(cell,2,y) = 1.5; - node_coordinates(cell,3,x) = 0.0; - node_coordinates(cell,3,y) = 1.0; - } + auto node_coordinates_k = node_coordinates.get_view(); + Kokkos::parallel_for("initialize node coords",node_coordinates.extent(0), + KOKKOS_LAMBDA (const int cell) { + node_coordinates_k(cell,0,x) = 0.0; + node_coordinates_k(cell,0,y) = 0.0; + node_coordinates_k(cell,1,x) = 1.0; + node_coordinates_k(cell,1,y) = 0.0; + node_coordinates_k(cell,2,x) = 1.5; + node_coordinates_k(cell,2,y) = 1.5; + node_coordinates_k(cell,3,x) = 0.0; + node_coordinates_k(cell,3,y) = 1.0; + }); int_values.evaluateValues(node_coordinates); // pull out arrays to look at - auto weighted_measure = int_values.weighted_measure; - auto jac_det = int_values.jac_det; - auto ref_ip_coordinates = int_values.ref_ip_coordinates; - auto ip_coordinates = int_values.ip_coordinates; - auto jac = int_values.jac; - auto jac_inv = int_values.jac_inv; + auto weighted_measure = Kokkos::create_mirror_view(int_values.weighted_measure.get_view()); + auto jac_det = Kokkos::create_mirror_view(int_values.jac_det.get_view()); + auto ref_ip_coordinates = Kokkos::create_mirror_view(int_values.ref_ip_coordinates.get_view()); + auto ip_coordinates = Kokkos::create_mirror_view(int_values.ip_coordinates.get_view()); + auto jac = Kokkos::create_mirror_view(int_values.jac.get_view()); + auto jac_inv = Kokkos::create_mirror_view(int_values.jac_inv.get_view()); int num_cells = ip_coordinates.extent(0); int num_points = ip_coordinates.extent(1); @@ -320,7 +335,13 @@ namespace panzer { int new_pt = 1; int_values.swapQuadraturePoints(tst_cell,org_pt,new_pt); - + Kokkos::deep_copy(weighted_measure,int_values.weighted_measure.get_view()); + Kokkos::deep_copy(jac_det,int_values.jac_det.get_view()); + Kokkos::deep_copy(ref_ip_coordinates,int_values.ref_ip_coordinates.get_view()); + Kokkos::deep_copy(ip_coordinates,int_values.ip_coordinates.get_view()); + Kokkos::deep_copy(jac,int_values.jac.get_view()); + Kokkos::deep_copy(jac_inv,int_values.jac_inv.get_view()); + TEST_EQUALITY( weighted_measure(ref_cell,org_pt), weighted_measure(tst_cell,new_pt)); TEST_EQUALITY( jac_det(ref_cell,org_pt), jac_det(tst_cell,new_pt)); TEST_EQUALITY(ref_ip_coordinates(ref_cell,org_pt,0), ref_ip_coordinates(tst_cell,new_pt,0)); @@ -343,6 +364,13 @@ namespace panzer { int new_pt = 3; int_values.swapQuadraturePoints(tst_cell,org_pt,new_pt); + Kokkos::deep_copy(weighted_measure,int_values.weighted_measure.get_view()); + Kokkos::deep_copy(jac_det,int_values.jac_det.get_view()); + Kokkos::deep_copy(ref_ip_coordinates,int_values.ref_ip_coordinates.get_view()); + Kokkos::deep_copy(ip_coordinates,int_values.ip_coordinates.get_view()); + Kokkos::deep_copy(jac,int_values.jac.get_view()); + Kokkos::deep_copy(jac_inv,int_values.jac_inv.get_view()); + TEST_EQUALITY( weighted_measure(ref_cell,org_pt), weighted_measure(tst_cell,new_pt)); TEST_EQUALITY( jac_det(ref_cell,org_pt), jac_det(tst_cell,new_pt)); From a4c9c227f49060e77d9dc1186fab33ce7b67dd3c Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Thu, 1 Jul 2021 16:17:24 -0600 Subject: [PATCH 04/14] Panzer: cleanup cuda warnings --- packages/panzer/disc-fe/src/Panzer_L2Projection.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_L2Projection.cpp b/packages/panzer/disc-fe/src/Panzer_L2Projection.cpp index fe3718d2097f..a6ade200128a 100644 --- a/packages/panzer/disc-fe/src/Panzer_L2Projection.cpp +++ b/packages/panzer/disc-fe/src/Panzer_L2Projection.cpp @@ -115,8 +115,8 @@ namespace panzer { const auto basisValues = workset.getBasisValues(targetBasisDescriptor_,integrationDescriptor_); - const auto unweightedBasis = basisValues.basis_scalar; - const auto weightedBasis = basisValues.weighted_basis_scalar; + const auto unweightedBasis = basisValues.basis_scalar.get_static_view(); + const auto weightedBasis = basisValues.weighted_basis_scalar.get_static_view(); // Offsets (this assumes UVM, need to fix) const std::vector& offsets = targetGlobalIndexer_->getGIDFieldOffsets(block,fieldIndex); @@ -215,8 +215,8 @@ namespace panzer { const auto basisValues = workset.getBasisValues(targetBasisDescriptor_,integrationDescriptor_); - const auto unweightedBasis = basisValues.basis_vector; - const auto weightedBasis = basisValues.weighted_basis_vector; + const auto unweightedBasis = basisValues.basis_vector.get_static_view(); + const auto weightedBasis = basisValues.weighted_basis_vector.get_static_view(); // Offsets (this assumes UVM, need to fix) const std::vector& offsets = targetGlobalIndexer_->getGIDFieldOffsets(block,fieldIndex); From cb4759502b872aee5c498f0473a38a4dd21c5865 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Fri, 2 Jul 2021 08:43:25 -0600 Subject: [PATCH 05/14] Panzer: integration values swapQP working on device --- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 59 +++++++------ .../disc-fe/src/Panzer_IntegrationValues2.hpp | 6 +- .../src/Panzer_IntegrationValues2_impl.hpp | 84 +++++++++++++++++++ .../test/core_tests/integration_values2.cpp | 22 ++++- 4 files changed, 141 insertions(+), 30 deletions(-) create mode 100644 packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index 1e9660a24e31..5f5c06df5b03 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -58,6 +58,9 @@ #include "Panzer_Traits.hpp" #include "Panzer_SubcellConnectivity.hpp" +// For device member functions to avoid CUDA RDC in unit testing +#include "Panzer_IntegrationValues2_impl.hpp" + // *********************************************************** // * Specializations of setupArrays() for different array types // *********************************************************** @@ -372,12 +375,12 @@ convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scal binormal[2] = 0.; } } - + /* template void IntegrationValues2:: swapQuadraturePoints(int cell, int a, - int b) + int b) const { const int new_cell_point = a; const int old_cell_point = b; @@ -385,60 +388,62 @@ swapQuadraturePoints(int cell, const int cell_dim = ref_ip_coordinates.extent(2); #ifdef PANZER_DEBUG - TEUCHOS_ASSERT(cell < ip_coordinates.extent_int(0)); - TEUCHOS_ASSERT(a < ip_coordinates.extent_int(1)); - TEUCHOS_ASSERT(b < ip_coordinates.extent_int(1)); - TEUCHOS_ASSERT(cell >= 0); - TEUCHOS_ASSERT(a >= 0); - TEUCHOS_ASSERT(b >= 0); + KOKKOS_ASSERT(cell < ip_coordinates.extent_int(0)); + KOKKOS_ASSERT(a < ip_coordinates.extent_int(1)); + KOKKOS_ASSERT(b < ip_coordinates.extent_int(1)); + KOKKOS_ASSERT(cell >= 0); + KOKKOS_ASSERT(a >= 0); + KOKKOS_ASSERT(b >= 0); #endif - - Scalar hold; - hold = weighted_measure(cell,new_cell_point); + // Using scratch_for_compute_side_measure instead of hold. This + // works around UVM issues of DFAD temporaries in device code. + // Scalar hold; + + scratch_for_compute_side_measure(0) = weighted_measure(cell,new_cell_point); weighted_measure(cell,new_cell_point) = weighted_measure(cell,old_cell_point); - weighted_measure(cell,old_cell_point) = hold; + weighted_measure(cell,old_cell_point) = scratch_for_compute_side_measure(0); - hold = jac_det(cell,new_cell_point); + scratch_for_compute_side_measure(0) = jac_det(cell,new_cell_point); jac_det(cell,new_cell_point) = jac_det(cell,old_cell_point); - jac_det(cell,old_cell_point) = hold; + jac_det(cell,old_cell_point) = scratch_for_compute_side_measure(0); for(int dim=0;dim void IntegrationValues2:: generateSurfaceCubatureValues(const PHX::MDField& in_node_coordinates, diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp index 5b0d10854f26..73f6e8152726 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp @@ -155,13 +155,17 @@ namespace panzer { * \param[in] cell Cell index * \param[in] a Quadrature point a * \param[in] b Quadrature point b + * + * NOTE: this is a non-const function but we had to make it const to run on device. */ - void swapQuadraturePoints(int cell,int a,int b); + KOKKOS_INLINE_FUNCTION + void swapQuadraturePoints(int cell,int a,int b) const; static void convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]); /// This should be a private method, but using lambdas on cuda forces this to be public. void evaluateRemainingValues(const PHX::MDField & in_node_coordinates, const int in_num_cells); + /// This should be a private method, but using lambdas on cuda forces this to be public. void getCubatureCV(const PHX::MDField & in_node_coordinates, const int in_num_cells); diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp new file mode 100644 index 000000000000..3b1becc16291 --- /dev/null +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp @@ -0,0 +1,84 @@ +#ifndef PANZER_INTEGRATION_VALUES_2_IMPL_HPP +#define PANZER_INTEGRATION_VALUES_2_IMPL_HPP + +/* Normally, we would fold this definition into the .cpp file since we + use ETI to compile this object. However, for unit testing the stand + alone device function swapQuadraturePoints, we need the definition + at the calling point to avoid requiring relocatable device code for + cuda. So for the IntegrationValues2 object, if there is a + standalone device member function, put it in this file (for unit + testing). + */ +namespace panzer { + +template +void IntegrationValues2:: +swapQuadraturePoints(int cell, + int a, + int b) const +{ + const int new_cell_point = a; + const int old_cell_point = b; + + const int cell_dim = ref_ip_coordinates.extent(2); + +#ifdef PANZER_DEBUG + KOKKOS_ASSERT(cell < ip_coordinates.extent_int(0)); + KOKKOS_ASSERT(a < ip_coordinates.extent_int(1)); + KOKKOS_ASSERT(b < ip_coordinates.extent_int(1)); + KOKKOS_ASSERT(cell >= 0); + KOKKOS_ASSERT(a >= 0); + KOKKOS_ASSERT(b >= 0); +#endif + + // Using scratch_for_compute_side_measure instead of hold. This + // works around UVM issues of DFAD temporaries in device code. + // Scalar hold; + + scratch_for_compute_side_measure(0) = weighted_measure(cell,new_cell_point); + weighted_measure(cell,new_cell_point) = weighted_measure(cell,old_cell_point); + weighted_measure(cell,old_cell_point) = scratch_for_compute_side_measure(0); + + scratch_for_compute_side_measure(0) = jac_det(cell,new_cell_point); + jac_det(cell,new_cell_point) = jac_det(cell,old_cell_point); + jac_det(cell,old_cell_point) = scratch_for_compute_side_measure(0); + + for(int dim=0;dim + struct DoSwapOnDevice { + int cell_,orig_pt_,new_pt_; + panzer::IntegrationValues2 int_values_; + DoSwapOnDevice(int cell,int orig_pt,int new_pt,panzer::IntegrationValues2& int_values) + : cell_(cell),orig_pt_(orig_pt),new_pt_(new_pt),int_values_(int_values){} + KOKKOS_INLINE_FUNCTION void operator() (const int ) const { + int_values_.swapQuadraturePoints(cell_,orig_pt_,new_pt_); + } + }; + TEUCHOS_UNIT_TEST(integration_values, quadpt_swap) { Teuchos::RCP topo = @@ -334,7 +350,8 @@ namespace panzer { int org_pt = 0; int new_pt = 1; - int_values.swapQuadraturePoints(tst_cell,org_pt,new_pt); + Kokkos::parallel_for("swap qp",1,DoSwapOnDevice(tst_cell,org_pt,new_pt,int_values)); + Kokkos::deep_copy(weighted_measure,int_values.weighted_measure.get_view()); Kokkos::deep_copy(jac_det,int_values.jac_det.get_view()); Kokkos::deep_copy(ref_ip_coordinates,int_values.ref_ip_coordinates.get_view()); @@ -363,7 +380,8 @@ namespace panzer { int org_pt = 1; int new_pt = 3; - int_values.swapQuadraturePoints(tst_cell,org_pt,new_pt); + Kokkos::parallel_for("swap qp",1,DoSwapOnDevice(tst_cell,org_pt,new_pt,int_values)); + Kokkos::deep_copy(weighted_measure,int_values.weighted_measure.get_view()); Kokkos::deep_copy(jac_det,int_values.jac_det.get_view()); Kokkos::deep_copy(ref_ip_coordinates,int_values.ref_ip_coordinates.get_view()); From bc55f3079880cb862669c5fe843ec4b714ac6da5 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Fri, 2 Jul 2021 15:05:11 -0600 Subject: [PATCH 06/14] Panzer: checkpoint progress on uvm removal from int values --- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 37 +++++---- .../disc-fe/src/Panzer_IntegrationValues2.hpp | 4 +- .../src/Panzer_SubcellConnectivity.cpp | 44 ++++++++--- .../test/core_tests/integration_values2.cpp | 79 +++++++++++-------- 4 files changed, 104 insertions(+), 60 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index 5f5c06df5b03..f174d1de214d 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -464,14 +464,14 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ { const int num_nodes = in_node_coordinates.extent(1); const int num_dims = in_node_coordinates.extent(2); + auto node_coordinates_k = node_coordinates.get_view(); + auto in_node_coordinates_k = in_node_coordinates.get_view(); - for(int cell=0; cell> policy({0,0,0},{num_cells,num_nodes,num_dims}); + Kokkos::parallel_for("node_coordinates",policy,KOKKOS_LAMBDA (const int cell, const int node, const int dim) { + node_coordinates_k(cell,node,dim) = in_node_coordinates_k(cell,node,dim); + }); + PHX::Device::execution_space().fence(); } // NOTE: We are assuming that each face can have a different number of points. @@ -497,8 +497,12 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ if(cell_dim==1){ tmp_side_cub_weights = Kokkos::DynRankView("tmp_side_cub_weights",num_points_on_face); tmp_side_cub_points = Kokkos::DynRankView("cell_tmp_side_cub_points",num_points_on_face,cell_dim); - tmp_side_cub_weights(0)=1.; - tmp_side_cub_points(0,0) = (subcell_index==0)? -1. : 1.; + auto tmp_side_cub_weights_host = Kokkos::create_mirror_view(tmp_side_cub_weights); + auto tmp_side_cub_points_host = Kokkos::create_mirror_view(tmp_side_cub_points); + tmp_side_cub_weights_host(0)=1.; + tmp_side_cub_points_host(0,0) = (subcell_index==0)? -1. : 1.; + Kokkos::deep_copy(tmp_side_cub_weights,tmp_side_cub_weights_host); + Kokkos::deep_copy(tmp_side_cub_points,tmp_side_cub_points_host); } else { // Get the face topology from the cell topology @@ -523,12 +527,17 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ cell_topology); } - - for(int local_point=0;local_point & in_node_coordinates, const int in_num_cells); + /// This should be a private method, but using lambdas on cuda forces this to be public. + void generateSurfaceCubatureValues(const PHX::MDField & in_node_coordinates, const int in_num_cells,const SubcellConnectivity & face_connectivity); + protected: // TODO: Make this a utility function that only exists in source file @@ -179,7 +182,6 @@ namespace panzer { std::string prefix; std::vector ddims_; - void generateSurfaceCubatureValues(const PHX::MDField & in_node_coordinates, const int in_num_cells,const SubcellConnectivity & face_connectivity); void getCubature(const PHX::MDField & in_node_coordinates, const int in_num_cells); void evaluateValuesCV(const PHX::MDField & vertex_coordinates,const int in_num_cells); }; diff --git a/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.cpp b/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.cpp index ae83948aa57e..7dcafa39ef84 100644 --- a/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.cpp +++ b/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.cpp @@ -64,21 +64,39 @@ setup(const panzer::LocalMeshPartition & partition) _cell_to_subcells_adj = PHX::View("cell_to_subcells_adj", num_cells+1); _cell_to_subcells = PHX::View("cell_to_subcells", num_cells*num_faces_per_cell); - _subcell_to_cells_adj(0)=0; - for(int face=0;face >())); mesh.local_cells = PHX::View("local_cells",2); - mesh.local_cells(0) = 0; - mesh.local_cells(1) = 1; + auto local_cells_host = Kokkos::create_mirror_view(mesh.local_cells); + local_cells_host(0) = 0; + local_cells_host(1) = 1; + Kokkos::deep_copy(mesh.local_cells,local_cells_host); mesh.cell_vertices = PHX::View("cell_vertices",2,4,2); PHX::View coordinates("coordinates",6,2); - coordinates(0,0) = 1.; coordinates(0,1) = 3.; - coordinates(1,0) = 3.; coordinates(1,1) = 3.; - coordinates(2,0) = 5.; coordinates(2,1) = 3.; - coordinates(3,0) = 2.; coordinates(3,1) = 1.; - coordinates(4,0) = 3.; coordinates(4,1) = 1.; - coordinates(5,0) = 4.; coordinates(5,1) = 1.; - -#define SET_NC(cell,node,vertex) mesh.cell_vertices(cell,node,0) = coordinates(vertex,0); mesh.cell_vertices(cell,node,1) = coordinates(vertex,1); + auto coordinates_host = Kokkos::create_mirror_view(coordinates); + coordinates_host(0,0) = 1.; coordinates_host(0,1) = 3.; + coordinates_host(1,0) = 3.; coordinates_host(1,1) = 3.; + coordinates_host(2,0) = 5.; coordinates_host(2,1) = 3.; + coordinates_host(3,0) = 2.; coordinates_host(3,1) = 1.; + coordinates_host(4,0) = 3.; coordinates_host(4,1) = 1.; + coordinates_host(5,0) = 4.; coordinates_host(5,1) = 1.; + Kokkos::deep_copy(coordinates,coordinates_host); + + auto cell_vertices_host = Kokkos::create_mirror_view(mesh.cell_vertices); +#define SET_NC(cell,node,vertex) cell_vertices_host(cell,node,0) = coordinates_host(vertex,0); cell_vertices_host(cell,node,1) = coordinates_host(vertex,1); SET_NC(0,0, 0); SET_NC(0,1, 3); SET_NC(0,2, 4); @@ -463,27 +468,33 @@ namespace panzer { SET_NC(1,2, 5); SET_NC(1,3, 2); #undef SET_NC + Kokkos::deep_copy(mesh.cell_vertices,cell_vertices_host); mesh.face_to_cells = PHX::View("face_to_cells",6); - mesh.face_to_cells(0,0) = 0; mesh.face_to_cells(0,1) = 1; - mesh.face_to_cells(1,0) = 0; mesh.face_to_cells(1,1) = -1; - mesh.face_to_cells(2,0) = 0; mesh.face_to_cells(2,1) = 1; - mesh.face_to_cells(3,0) = 0; mesh.face_to_cells(3,1) = -1; - mesh.face_to_cells(4,0) = 1; mesh.face_to_cells(4,1) = -1; - mesh.face_to_cells(5,0) = 1; mesh.face_to_cells(5,1) = -1; + auto face_to_cells_host = Kokkos::create_mirror_view(mesh.face_to_cells); + face_to_cells_host(0,0) = 0; face_to_cells_host(0,1) = 1; + face_to_cells_host(1,0) = 0; face_to_cells_host(1,1) = -1; + face_to_cells_host(2,0) = 0; face_to_cells_host(2,1) = 1; + face_to_cells_host(3,0) = 0; face_to_cells_host(3,1) = -1; + face_to_cells_host(4,0) = 1; face_to_cells_host(4,1) = -1; + face_to_cells_host(5,0) = 1; face_to_cells_host(5,1) = -1; + Kokkos::deep_copy(mesh.face_to_cells,face_to_cells_host); mesh.face_to_lidx = PHX::View("face_to_lidx",6); - mesh.face_to_lidx(0,0) = 0; mesh.face_to_lidx(0,1) = 2; - mesh.face_to_lidx(1,0) = 1; mesh.face_to_lidx(1,1) = -1; - mesh.face_to_lidx(2,0) = 2; mesh.face_to_lidx(2,1) = 0; - mesh.face_to_lidx(3,0) = 3; mesh.face_to_lidx(3,1) = -1; - mesh.face_to_lidx(4,0) = 1; mesh.face_to_lidx(4,1) = -1; - mesh.face_to_lidx(5,0) = 3; mesh.face_to_lidx(5,1) = -1; + auto face_to_lidx_host = Kokkos::create_mirror_view(mesh.face_to_lidx); + face_to_lidx_host(0,0) = 0; face_to_lidx_host(0,1) = 2; + face_to_lidx_host(1,0) = 1; face_to_lidx_host(1,1) = -1; + face_to_lidx_host(2,0) = 2; face_to_lidx_host(2,1) = 0; + face_to_lidx_host(3,0) = 3; face_to_lidx_host(3,1) = -1; + face_to_lidx_host(4,0) = 1; face_to_lidx_host(4,1) = -1; + face_to_lidx_host(5,0) = 3; face_to_lidx_host(5,1) = -1; + Kokkos::deep_copy(mesh.face_to_lidx,face_to_lidx_host); mesh.cell_to_faces = PHX::View("cell_to_faces",2,4); - mesh.cell_to_faces(0,0) = 0; mesh.cell_to_faces(0,1) = 1; mesh.cell_to_faces(0,2) = 2; mesh.cell_to_faces(0,3) = 3; - mesh.cell_to_faces(1,0) = 2; mesh.cell_to_faces(1,1) = 4; mesh.cell_to_faces(1,2) = 0; mesh.cell_to_faces(1,3) = 5; - + auto cell_to_faces_host = Kokkos::create_mirror_view(mesh.cell_to_faces); + cell_to_faces_host(0,0) = 0; cell_to_faces_host(0,1) = 1; cell_to_faces_host(0,2) = 2; cell_to_faces_host(0,3) = 3; + cell_to_faces_host(1,0) = 2; cell_to_faces_host(1,1) = 4; cell_to_faces_host(1,2) = 0; cell_to_faces_host(1,3) = 5; + Kokkos::deep_copy(mesh.cell_to_faces,cell_to_faces_host); } const auto id = panzer::IntegrationDescriptor(2, panzer::IntegrationDescriptor::SURFACE); @@ -494,11 +505,15 @@ namespace panzer { // Fill node coordinates panzer::MDFieldArrayFactory af("prefix_",true); auto node_coordinates = af.buildStaticArray("node_coordinates",2,4,2); - - for(int i=0; i Date: Tue, 6 Jul 2021 15:17:07 -0600 Subject: [PATCH 07/14] Panzer: remove uvm from surface rotation matrices --- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 122 ++++++++++-------- .../disc-fe/src/Panzer_IntegrationValues2.hpp | 2 + .../src/Panzer_IntegrationValues2_impl.hpp | 85 +++++++++--- 3 files changed, 138 insertions(+), 71 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index f174d1de214d..df0ef75090bc 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -324,6 +324,7 @@ getCubature(const PHX::MDField& in_node_coordinates, *(int_rule->topology)); } + /* template void IntegrationValues2:: convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]) @@ -351,7 +352,7 @@ convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scal } const T t = sqrt(transverse[0]*transverse[0]+transverse[1]*transverse[1]+transverse[2]*transverse[2]); - TEUCHOS_ASSERT(t != 0.); + KOKKOS_ASSERT(t != 0.); for(int dim=0;dim<3;++dim){ transverse[dim] /= t; } @@ -375,6 +376,7 @@ convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scal binormal[2] = 0.; } } + */ /* template void IntegrationValues2:: @@ -556,12 +558,16 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ s_node_coordinates, cell_topology); + PHX::Device::execution_space().fence(); + auto side_inverse_jacobian = Kokkos::DynRankView("side_inv_jac",num_cells,num_points_on_face,cell_dim,cell_dim); cell_tools.setJacobianInv(side_inverse_jacobian, side_jacobian); auto side_det_jacobian = Kokkos::DynRankView("side_det_jac",num_cells,num_points_on_face); cell_tools.setJacobianDet(side_det_jacobian, side_jacobian); + PHX::Device::execution_space().fence(); + // Calculate measures (quadrature weights in physical space) for this side auto side_weighted_measure = Kokkos::DynRankView("side_weighted_measure",num_cells,num_points_on_face); if(cell_dim == 1){ @@ -571,65 +577,79 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ computeEdgeMeasure(side_weighted_measure, side_jacobian, tmp_side_cub_weights, subcell_index,cell_topology, scratch_for_compute_side_measure.get_view()); + PHX::Device::execution_space().fence(); } else if(cell_dim == 3){ Intrepid2::FunctionSpaceTools:: computeFaceMeasure(side_weighted_measure, side_jacobian, tmp_side_cub_weights, subcell_index,cell_topology, scratch_for_compute_side_measure.get_view()); + PHX::Device::execution_space().fence(); } // Calculate normals auto side_normals = Kokkos::DynRankView("side_normals",num_cells,num_points_on_face,cell_dim); if(cell_dim == 1){ - int other_subcell_index = (subcell_index==0) ? 1 : 0; + const int other_subcell_index = (subcell_index==0) ? 1 : 0; + auto in_node_coordinates_k = in_node_coordinates.get_view(); + const auto min = std::numeric_limits::type>::min(); - for(int cell=0;cell::type>::min()); - } + Kokkos::parallel_for("compute normals 1D",num_cells,KOKKOS_LAMBDA (const int cell) { + Scalar norm = (in_node_coordinates_k(cell,subcell_index,0) - in_node_coordinates_k(cell,other_subcell_index,0)); + side_normals(cell,0,0) = norm / fabs(norm + min); + }); } else { cell_tools.getPhysicalSideNormals(side_normals,side_jacobian,subcell_index,cell_topology); // Normalize each normal - for(int cell=0;cell> policy({0,0},{num_cells,num_points_on_face}); + Kokkos::parallel_for("Normalize the normals",policy,KOKKOS_LAMBDA (const int cell,const int point) { + Scalar n = 0.; + for(int dim=0;dim 0.){ + n = std::sqrt(n); for(int dim=0;dim 0.){ - n = std::sqrt(n); - for(int dim=0;dim> policy({0,0},{num_cells,num_points_on_face}); + Kokkos::parallel_for("copy values",policy,KOKKOS_LAMBDA (const int cell,const int side_point) { const int cell_point = point_offset + side_point; - weighted_measure(cell,cell_point) = side_weighted_measure(cell,side_point); - jac_det(cell,cell_point) = side_det_jacobian(cell,side_point); + weighted_measure_k(cell,cell_point) = side_weighted_measure(cell,side_point); + jac_det_k(cell,cell_point) = side_det_jacobian(cell,side_point); for(int dim=0;dim& in_node_ // We also need surface rotation matrices in order to enforce alignment { const int num_points = ir.getPointOffset(num_subcells); - Scalar normal[3]; - Scalar transverse[3]; - Scalar binormal[3]; - for(int i=0;i<3;i++){normal[i]=0.;} - for(int i=0;i<3;i++){transverse[i]=0.;} - for(int i=0;i<3;i++){binormal[i]=0.;} - for(int cell=0; cell> policy({0,0,0},{num_cells,num_subcells,num_points}); + Kokkos::parallel_for("create surface rotation matrices",policy,KOKKOS_LAMBDA (const int cell,const int subcell_index,const int point) { + Scalar normal[3]; + + for(int dim=0; dim= 0); #endif - // Using scratch_for_compute_side_measure instead of hold. This - // works around UVM issues of DFAD temporaries in device code. - // Scalar hold; + // If this is a DFAD type, we will need to fix allocation to size the derivative array. + Scalar hold; - scratch_for_compute_side_measure(0) = weighted_measure(cell,new_cell_point); + hold = weighted_measure(cell,new_cell_point); weighted_measure(cell,new_cell_point) = weighted_measure(cell,old_cell_point); - weighted_measure(cell,old_cell_point) = scratch_for_compute_side_measure(0); + weighted_measure(cell,old_cell_point) = hold; - scratch_for_compute_side_measure(0) = jac_det(cell,new_cell_point); + hold = jac_det(cell,new_cell_point); jac_det(cell,new_cell_point) = jac_det(cell,old_cell_point); - jac_det(cell,old_cell_point) = scratch_for_compute_side_measure(0); + jac_det(cell,old_cell_point) = hold; for(int dim=0;dim +void IntegrationValues2:: +convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]) +{ + using T = Scalar; + + const T n = sqrt(normal[0]*normal[0]+normal[1]*normal[1]+normal[2]*normal[2]); + + // If this fails then the geometry for this cell is probably undefined + if(n > 0.){ + // Make sure transverse is not parallel to normal within some margin of error + transverse[0]=0.;transverse[1]=1.;transverse[2]=0.; + if(std::fabs(normal[0]*transverse[0]+normal[1]*transverse[1])>0.9){ + transverse[0]=1.;transverse[1]=0.; + } + + const T nt = normal[0]*transverse[0]+normal[1]*transverse[1]+normal[2]*transverse[2]; + + // Note normal has unit length + const T mult = nt/(n*n); // = nt + + // Remove normal projection from transverse + for(int dim=0;dim<3;++dim){ + transverse[dim] = transverse[dim] - mult * normal[dim]; + } + + const T t = sqrt(transverse[0]*transverse[0]+transverse[1]*transverse[1]+transverse[2]*transverse[2]); + KOKKOS_ASSERT(t != 0.); + for(int dim=0;dim<3;++dim){ + transverse[dim] /= t; + } + + // We assume a right handed system such that b = n \times t + binormal[0] = (normal[1] * transverse[2] - normal[2] * transverse[1]); + binormal[1] = (normal[2] * transverse[0] - normal[0] * transverse[2]); + binormal[2] = (normal[0] * transverse[1] - normal[1] * transverse[0]); + + // Normalize binormal + const T b = sqrt(binormal[0]*binormal[0]+binormal[1]*binormal[1]+binormal[2]*binormal[2]); + for(int dim=0;dim<3;++dim){ + binormal[dim] /= b; + } + } else { + transverse[0] = 0.; + transverse[1] = 0.; + transverse[2] = 0.; + binormal[0] = 0.; + binormal[1] = 0.; + binormal[2] = 0.; + } +} + } #endif From 33f51d3e88523cc47687764aa45c6a2500696c53 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Wed, 7 Jul 2021 12:31:09 -0600 Subject: [PATCH 08/14] Panzer: checkpoint subcells work --- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 4 +- .../src/Panzer_SubcellConnectivity.cpp | 17 +++ .../src/Panzer_SubcellConnectivity.hpp | 105 ++++++++++++++---- .../panzer/disc-fe/src/Panzer_Workset.cpp | 1 + 4 files changed, 105 insertions(+), 22 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index df0ef75090bc..c7004a1e16f2 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -685,7 +685,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ // Enforce alignment across surface quadrature points const int num_points = ip_coordinates.extent_int(1); - const int num_faces_per_cell = face_connectivity.numSubcellsOnCell(0); + const int num_faces_per_cell = face_connectivity.numSubcellsOnCellHost(0); const int num_points_per_face = num_points / num_faces_per_cell; // Now we need to align the cubature points for each face @@ -703,7 +703,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ std::vector point_order(num_points_per_face); // Iterate through faces - for(int face=0; face("_num_subcells_device",1); + Kokkos::deep_copy(_num_subcells_device,_num_subcells); _num_cells = num_cells; + _num_cells_device = PHX::View("_num_cells_device",1); + Kokkos::deep_copy(_num_cells_device,_num_cells); _subcell_to_cells_adj = PHX::View("subcell_to_cells_adj", num_faces+1); _subcell_to_cells = PHX::View("subcell_to_cells", num_faces*num_cells_per_face); @@ -64,6 +68,13 @@ setup(const panzer::LocalMeshPartition & partition) _cell_to_subcells_adj = PHX::View("cell_to_subcells_adj", num_cells+1); _cell_to_subcells = PHX::View("cell_to_subcells", num_cells*num_faces_per_cell); + // Host copies + _subcell_to_cells_adj_host = PHX::View::HostMirror("subcell_to_cells_adj_host", num_faces+1); + _subcell_to_cells_host = PHX::View::HostMirror("subcell_to_cells_host", num_faces*num_cells_per_face); + _subcell_to_local_subcells_host = PHX::View::HostMirror("subcell_to_local_subcells_host", num_faces*num_cells_per_face); + _cell_to_subcells_adj_host = PHX::View::HostMirror("cell_to_subcells_adj_host", num_cells+1); + _cell_to_subcells_host = PHX::View::HostMirror("cell_to_subcells_host", num_cells*num_faces_per_cell); + // This line not needed since kokkos initializes the arrays above to zero //_subcell_to_cells_adj(0)=0; @@ -99,6 +110,12 @@ setup(const panzer::LocalMeshPartition & partition) PHX::Device::execution_space().fence(); } + // Copy values to host + Kokkos::deep_copy(_subcell_to_cells_adj_host, _subcell_to_cells_adj); + Kokkos::deep_copy(_subcell_to_cells_host, _subcell_to_cells); + Kokkos::deep_copy(_subcell_to_local_subcells_host,_subcell_to_local_subcells); + Kokkos::deep_copy(_cell_to_subcells_adj_host,_cell_to_subcells_adj); + Kokkos::deep_copy(_cell_to_subcells_host,_cell_to_subcells); } } diff --git a/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.hpp b/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.hpp index 0828a69f0ab0..5a255c3aa3a9 100644 --- a/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.hpp +++ b/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.hpp @@ -69,7 +69,8 @@ class SubcellConnectivity * \return Number of subcells associated with the cells */ KOKKOS_INLINE_FUNCTION - int numSubcells() const {return _num_subcells;} + int numSubcells() const {return _num_subcells_device(0);} + int numSubcellsHost() const {return _num_subcells;} /** * \brief Gives number of cells in connectivity @@ -77,7 +78,8 @@ class SubcellConnectivity * \return Number of subcells associated with the cells */ KOKKOS_INLINE_FUNCTION - int numCells() const {return _num_cells;} + int numCells() const {return _num_cells_device(0);} + int numCellsHost() const {return _num_cells;} /** * \brief gives number of subcells (e.g. faces) found on a given cell @@ -90,6 +92,8 @@ class SubcellConnectivity */ KOKKOS_INLINE_FUNCTION int numSubcellsOnCell(const int cell) const; + inline + int numSubcellsOnCellHost(const int cell) const; /** * \brief Returns the number of cells attached to a given subcell @@ -104,6 +108,8 @@ class SubcellConnectivity */ KOKKOS_INLINE_FUNCTION int numCellsOnSubcell(const int subcell) const; + inline + int numCellsOnSubcellHost(const int subcell) const; /** * \brief Get the subcell index for a given cell and local subcell index @@ -121,6 +127,8 @@ class SubcellConnectivity */ KOKKOS_INLINE_FUNCTION int subcellForCell(const int cell, const int local_subcell_index) const; + inline + int subcellForCellHost(const int cell, const int local_subcell_index) const; /** * \brief Get the cell for a given subcell and a local_cell_index @@ -138,6 +146,8 @@ class SubcellConnectivity */ KOKKOS_INLINE_FUNCTION int cellForSubcell(const int subcell, const int local_cell_index) const; + inline + int cellForSubcellHost(const int subcell, const int local_cell_index) const; /** * \brief Get the local subcell index given a subcell and a local cell index @@ -151,29 +161,38 @@ class SubcellConnectivity */ KOKKOS_INLINE_FUNCTION int localSubcellForSubcell(const int subcell, const int local_cell_index) const; + inline + int localSubcellForSubcellHost(const int subcell, const int local_cell_index) const; protected: /// Number of subcells for a given number of cells int _num_subcells; + PHX::View _num_subcells_device; /// Number of cells int _num_cells; + PHX::View _num_cells_device; /// Adjacency array for indexing into subcell_to_cells array PHX::View _subcell_to_cells_adj; + PHX::View::HostMirror _subcell_to_cells_adj_host; /// Mapping from subcells to cells PHX::View _subcell_to_cells; + PHX::View::HostMirror _subcell_to_cells_host; /// Mapping from subcell indexes to local subcell indexes PHX::View _subcell_to_local_subcells; + PHX::View::HostMirror _subcell_to_local_subcells_host; /// Adjacency array for indexing into cell_to_subcells array PHX::View _cell_to_subcells_adj; + PHX::View::HostMirror _cell_to_subcells_adj_host; /// Mapping from cells to subcells PHX::View _cell_to_subcells; + PHX::View::HostMirror _cell_to_subcells_host; }; @@ -211,34 +230,48 @@ SubcellConnectivity:: numSubcellsOnCell(const int cell) const { #ifdef PANZER_DEBUG -#ifndef KOKKOS_ENABLE_CUDA - TEUCHOS_ASSERT(cell >= 0 and cell < _num_cells); -#endif + KOKKOS_ASSERT(cell >= 0 and cell < _num_cells_device(0)); #endif return _cell_to_subcells_adj(cell+1)-_cell_to_subcells_adj(cell); } int SubcellConnectivity:: -numCellsOnSubcell(const int subcell) const +numSubcellsOnCellHost(const int cell) const { #ifdef PANZER_DEBUG -#ifndef KOKKOS_ENABLE_CUDA - TEUCHOS_ASSERT(subcell >= 0 and subcell < _num_subcells); + KOKKOS_ASSERT(cell >= 0 and cell < _num_cells); #endif + return _cell_to_subcells_adj_host(cell+1)-_cell_to_subcells_adj_host(cell); +} + +int +SubcellConnectivity:: +numCellsOnSubcell(const int subcell) const +{ +#ifdef PANZER_DEBUG + KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells_device(0)); #endif return _subcell_to_cells_adj(subcell+1)-_subcell_to_cells_adj(subcell); } int SubcellConnectivity:: -subcellForCell(const int cell, const int local_subcell_index) const +numCellsOnSubcellHost(const int subcell) const { #ifdef PANZER_DEBUG -#ifndef KOKKOS_ENABLE_CUDA - TEUCHOS_ASSERT(cell >= 0 and cell < _num_cells); - TEUCHOS_ASSERT(local_subcell_index < numSubcellsOnCell(cell)); + KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells); #endif + return _subcell_to_cells_adj_host(subcell+1)-_subcell_to_cells_adj_host(subcell); +} + +int +SubcellConnectivity:: +subcellForCell(const int cell, const int local_subcell_index) const +{ +#ifdef PANZER_DEBUG + KOKKOS_ASSERT(cell >= 0 and cell < _num_cells_device(0)); + KOKKOS_ASSERT(local_subcell_index < numSubcellsOnCell(cell)); #endif const int index = _cell_to_subcells_adj(cell)+local_subcell_index; return _cell_to_subcells(index); @@ -246,13 +279,23 @@ subcellForCell(const int cell, const int local_subcell_index) const int SubcellConnectivity:: -cellForSubcell(const int subcell, const int local_cell_index) const +subcellForCellHost(const int cell, const int local_subcell_index) const { #ifdef PANZER_DEBUG -#ifndef KOKKOS_ENABLE_CUDA - TEUCHOS_ASSERT(subcell >= 0 and subcell < _num_subcells); - TEUCHOS_ASSERT(local_cell_index < numCellsOnSubcell(subcell)); + KOKKOS_ASSERT(cell >= 0 and cell < _num_cells); + KOKKOS_ASSERT(local_subcell_index < numSubcellsOnCellHost(cell)); #endif + const int index = _cell_to_subcells_adj_host(cell)+local_subcell_index; + return _cell_to_subcells_host(index); +} + +int +SubcellConnectivity:: +cellForSubcell(const int subcell, const int local_cell_index) const +{ +#ifdef PANZER_DEBUG + KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells_device(0)); + KOKKOS_ASSERT(local_cell_index < numCellsOnSubcell(subcell)); #endif const int index = _subcell_to_cells_adj(subcell)+local_cell_index; return _subcell_to_cells(index); @@ -260,18 +303,40 @@ cellForSubcell(const int subcell, const int local_cell_index) const int SubcellConnectivity:: -localSubcellForSubcell(const int subcell, const int local_cell_index) const +cellForSubcellHost(const int subcell, const int local_cell_index) const { #ifdef PANZER_DEBUG -#ifndef KOKKOS_ENABLE_CUDA - TEUCHOS_ASSERT(subcell >= 0 and subcell < _num_subcells); - TEUCHOS_ASSERT(local_cell_index < numCellsOnSubcell(subcell)); + KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells); + KOKKOS_ASSERT(local_cell_index < numCellsOnSubcellHost(subcell)); #endif + const int index = _subcell_to_cells_adj_host(subcell)+local_cell_index; + return _subcell_to_cells_host(index); +} + +int +SubcellConnectivity:: +localSubcellForSubcell(const int subcell, const int local_cell_index) const +{ +#ifdef PANZER_DEBUG + KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells_device(0)); + KOKKOS_ASSERT(local_cell_index < numCellsOnSubcell(subcell)); #endif const int index = _subcell_to_cells_adj(subcell)+local_cell_index; return _subcell_to_local_subcells(index); } +int +SubcellConnectivity:: +localSubcellForSubcellHost(const int subcell, const int local_cell_index) const +{ +#ifdef PANZER_DEBUG + KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells); + KOKKOS_ASSERT(local_cell_index < numCellsOnSubcellHost(subcell)); +#endif + const int index = _subcell_to_cells_adj_host(subcell)+local_cell_index; + return _subcell_to_local_subcells_host(index); +} + } // namespace panzer #endif diff --git a/packages/panzer/disc-fe/src/Panzer_Workset.cpp b/packages/panzer/disc-fe/src/Panzer_Workset.cpp index c76f7af72888..4475238fa06a 100644 --- a/packages/panzer/disc-fe/src/Panzer_Workset.cpp +++ b/packages/panzer/disc-fe/src/Panzer_Workset.cpp @@ -52,6 +52,7 @@ #include "Panzer_LocalMeshInfo.hpp" #include "Panzer_PointGenerator.hpp" #include "Panzer_PointValues2.hpp" +#include "Panzer_IntegrationValues2_impl.hpp" // needed for lambda dispatch with convertNormalToRotationMatrix() #include "Panzer_SubcellConnectivity.hpp" #include "Panzer_OrientationsInterface.hpp" From bfc5b7f698c7e846f6458bdd89926215ffd8bc8e Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Mon, 12 Jul 2021 16:15:33 -0600 Subject: [PATCH 09/14] Panzer: integration values is uvm free While all tests pass, these changes produce a ton of cuda compiler warnings for calling host function from device functions. It's an issue with lambda captures triggering ctor and dtor of int value members. Will fix next. --- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 121 ++++++++++-------- .../disc-fe/src/Panzer_IntegrationValues2.hpp | 1 - .../src/Panzer_IntegrationValues2_impl.hpp | 4 +- .../test/core_tests/integration_values2.cpp | 18 +-- 4 files changed, 81 insertions(+), 63 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index c7004a1e16f2..c9a7abe244ee 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -471,7 +471,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ Kokkos::MDRangePolicy> policy({0,0,0},{num_cells,num_nodes,num_dims}); Kokkos::parallel_for("node_coordinates",policy,KOKKOS_LAMBDA (const int cell, const int node, const int dim) { - node_coordinates_k(cell,node,dim) = in_node_coordinates_k(cell,node,dim); + node_coordinates_k(cell,node,dim) = in_node_coordinates_k(cell,node,dim); }); PHX::Device::execution_space().fence(); } @@ -699,25 +699,33 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ #define PANZER_DOT(a,b) (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]) #define PANZER_CROSS(a,b,c) {c[0] = a[1]*b[2] - a[2]*b[1]; c[1] = a[2]*b[0] - a[0]*b[2]; c[2] = a[0]*b[1] - a[1]*b[0];} - // Reorder index vector - std::vector point_order(num_points_per_face); + // Reorder index vector. This is scratch space that needs to be + // allocated per thread now. Should use team size instead of num + // faces, but then we need to convert the lambda below to a + // functor to query the policy using the functor. We'll just live + // with the extra memory since it is temporary scratch. + // std::vector point_order(num_points_per_face); + PHX::View point_order("scratch: point_order",face_connectivity.numSubcellsHost(),num_points_per_face); // Iterate through faces - for(int face=0; face= 0); + KOKKOS_ASSERT(lidx_1 >= 0); // To compare points on planes, it is good to center the planes around the origin // Find the face center for the left and right cell (may not be the same point - e.g. periodic conditions) @@ -726,14 +734,14 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ for(int face_point=0; face_point& in_node_ const int example_point_1 = lidx_1*num_points_per_face; // Load the transverse and binormal for the 0 cell (default) - Scalar t[3] = {surface_rotation_matrices(cell_0,example_point_0,1,0), surface_rotation_matrices(cell_0,example_point_0,1,1), surface_rotation_matrices(cell_0,example_point_0,1,2)}; - Scalar b[3] = {surface_rotation_matrices(cell_0,example_point_0,2,0), surface_rotation_matrices(cell_0,example_point_0,2,1), surface_rotation_matrices(cell_0,example_point_0,2,2)}; + Scalar t[3] = {surface_rotation_matrices_k(cell_0,example_point_0,1,0), surface_rotation_matrices_k(cell_0,example_point_0,1,1), surface_rotation_matrices_k(cell_0,example_point_0,1,2)}; + Scalar b[3] = {surface_rotation_matrices_k(cell_0,example_point_0,2,0), surface_rotation_matrices_k(cell_0,example_point_0,2,1), surface_rotation_matrices_k(cell_0,example_point_0,2,2)}; // In case the faces are not antiparallel (e.g. periodic wedge), we may need to change the transverse and binormal { // Get the normals for each face for constructing one of the plane vectors (transverse) - const Scalar n0[3] = {surface_rotation_matrices(cell_0,example_point_0,0,0), surface_rotation_matrices(cell_0,example_point_0,0,1), surface_rotation_matrices(cell_0,example_point_0,0,2)}; - const Scalar n1[3] = {surface_rotation_matrices(cell_1,example_point_1,0,0), surface_rotation_matrices(cell_1,example_point_1,0,1), surface_rotation_matrices(cell_1,example_point_1,0,2)}; + const Scalar n0[3] = {surface_rotation_matrices_k(cell_0,example_point_0,0,0), surface_rotation_matrices_k(cell_0,example_point_0,0,1), surface_rotation_matrices_k(cell_0,example_point_0,0,2)}; + const Scalar n1[3] = {surface_rotation_matrices_k(cell_1,example_point_1,0,0), surface_rotation_matrices_k(cell_1,example_point_1,0,1), surface_rotation_matrices_k(cell_1,example_point_1,0,2)}; // n_0*n_1 == -1 (antiparallel), n_0*n_1 == 1 (parallel - bad), |n_0*n_1| < 1 (other) const Scalar n0_dot_n1 = PANZER_DOT(n0,n1); @@ -767,7 +775,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ // This causes a host of issues (e.g. identifying 180 degree periodic wedges), but we have to support virtual cells as a priority // Therefore, we will just assume that the ordering is fine (not valid for 180 degree periodic wedges) if(std::fabs(n0_dot_n1 - 1.) < 1.e-8) - continue; + return; // Rotated faces case (e.g. periodic wedge) we need to check for non-antiparallel face normals if(std::fabs(n0_dot_n1 + 1.) > 1.e-2){ @@ -794,7 +802,6 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ b[2] /= mag_b; } - } // Now that we have a reference coordinate plane in which to align our points @@ -805,7 +812,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ // Differential position in mesh Scalar x0[3] = {0}; Scalar x1[3] = {0}; - + // Iterate through points in cell 1 and find which point they align with in cell 0 for(int face_point_1=0; face_point_1& in_node_ // Load position shifted by face center for(int dim=0; dim& in_node_ // Load position shifted by face center for(int dim=0; dim& in_node_ // If the distance, compared to the size of the cell, is small, we assume these are the same points if(p012 / r2 < 1.e-12){ - point_order[face_point_1] = face_point_0; + point_order(face,face_point_1) = face_point_0; break; } // Big problem - there wan't a point that aligned properly - TEUCHOS_ASSERT(face_point_0 != num_points_per_face-1); + KOKKOS_ASSERT(face_point_0 != num_points_per_face-1); } } - + // Now re-order the points on face 1 to correct the alignment const int point_offset = lidx_1*num_points_per_face; for( int face_point_1 = 0; face_point_1 < num_points_per_face - 1; ++face_point_1 ){ // While the point is not yet in place - keep swapping until it is in place (N^2 operations - not great) - while( face_point_1 != point_order[face_point_1] ){ + while( face_point_1 != point_order(face,face_point_1) ){ // We need to swap with the component in this position - const int face_point_0 = point_order[face_point_1]; - swapQuadraturePoints(cell_1,point_offset+face_point_1,point_offset+face_point_0); - std::swap( point_order[face_point_1], point_order[face_point_0] ); + const int face_point_0 = point_order(face,face_point_1); + int_values_device.swapQuadraturePoints(cell_1,point_offset+face_point_1,point_offset+face_point_0); + std::swap( point_order(face,face_point_1), point_order(face,face_point_0) ); } } - } + + }); + PHX::Device::execution_space().fence(); } #undef PANZER_DOT @@ -874,41 +883,51 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ // I'm not sure if these should exist for surface integrals, but here we go! // Shakib contravarient metric tensor - for (int cell = 0; cell < num_cells; ++cell) { - for (size_type ip = 0; ip < contravarient.extent(1); ++ip) { - + { + auto contravarient_k = this->contravarient.get_static_view(); + auto covarient_k = this->covarient.get_static_view(); + auto jac_k = this->jac.get_static_view(); + Kokkos::MDRangePolicy> policy({0,0},{(int)num_cells,(int)contravarient.extent(1)}); + Kokkos::parallel_for("covarient metric tensor",policy,KOKKOS_LAMBDA (const int cell,const int ip) { // zero out matrix - for (size_type i = 0; i < contravarient.extent(2); ++i) - for (size_type j = 0; j < contravarient.extent(3); ++j) - covarient(cell,ip,i,j) = 0.0; + for (size_type i = 0; i < contravarient_k.extent(2); ++i) + for (size_type j = 0; j < contravarient_k.extent(3); ++j) + covarient_k(cell,ip,i,j) = 0.0; // g^{ij} = \frac{\parital x_i}{\partial \chi_\alpha}\frac{\parital x_j}{\partial \chi_\alpha} - for (size_type i = 0; i < contravarient.extent(2); ++i) { - for (size_type j = 0; j < contravarient.extent(3); ++j) { - for (size_type alpha = 0; alpha < contravarient.extent(2); ++alpha) { - covarient(cell,ip,i,j) += jac(cell,ip,i,alpha) * jac(cell,ip,j,alpha); + for (size_type i = 0; i < contravarient_k.extent(2); ++i) { + for (size_type j = 0; j < contravarient_k.extent(3); ++j) { + for (size_type alpha = 0; alpha < contravarient_k.extent(2); ++alpha) { + covarient_k(cell,ip,i,j) += jac_k(cell,ip,i,alpha) * jac_k(cell,ip,j,alpha); } } } - - } + }); + PHX::Device::execution_space().fence(); } - auto s_contravarient = Kokkos::subview(contravarient.get_view(), std::make_pair(0,num_cells),Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); - auto s_covarient = Kokkos::subview(covarient.get_view(), std::make_pair(0,num_cells),Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); - Intrepid2::RealSpaceTools::inverse(s_contravarient, s_covarient); + { + auto s_contravarient = Kokkos::subview(contravarient.get_view(), std::make_pair(0,num_cells),Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); + auto s_covarient = Kokkos::subview(covarient.get_view(), std::make_pair(0,num_cells),Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); + Intrepid2::RealSpaceTools::inverse(s_contravarient, s_covarient); + PHX::Device::execution_space().fence(); + } // norm of g_ij - for (int cell = 0; cell < num_cells; ++cell) { - for (size_type ip = 0; ip < contravarient.extent(1); ++ip) { - norm_contravarient(cell,ip) = 0.0; - for (size_type i = 0; i < contravarient.extent(2); ++i) { - for (size_type j = 0; j < contravarient.extent(3); ++j) { - norm_contravarient(cell,ip) += contravarient(cell,ip,i,j) * contravarient(cell,ip,i,j); + { + auto contravarient_k = this->contravarient.get_static_view(); + auto norm_contravarient_k = this->norm_contravarient.get_static_view(); + Kokkos::MDRangePolicy> policy({0,0},{(int)num_cells,(int)contravarient.extent(1)}); + Kokkos::parallel_for("covarient metric tensor",policy,KOKKOS_LAMBDA (const int cell,const int ip) { + norm_contravarient_k(cell,ip) = 0.0; + for (size_type i = 0; i < contravarient_k.extent(2); ++i) { + for (size_type j = 0; j < contravarient_k.extent(3); ++j) { + norm_contravarient_k(cell,ip) += contravarient_k(cell,ip,i,j) * contravarient_k(cell,ip,i,j); } } - norm_contravarient(cell,ip) = std::sqrt(norm_contravarient(cell,ip)); - } + norm_contravarient_k(cell,ip) = std::sqrt(norm_contravarient_k(cell,ip)); + }); + PHX::Device::execution_space().fence(); } } diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp index 178a5d45dd87..c33e8bb0f265 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp @@ -162,7 +162,6 @@ namespace panzer { void swapQuadraturePoints(int cell,int a,int b) const; KOKKOS_INLINE_FUNCTION - //void convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]); static void convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]); /// This should be a private method, but using lambdas on cuda forces this to be public. diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp index 8106f1e9b507..8098ebf52c04 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp @@ -71,9 +71,9 @@ swapQuadraturePoints(int cell, // Rotation matrices are always in 3D for(int dim=0; dim<3; ++dim){ for(int dim2=0; dim2<3; ++dim2){ - scratch_for_compute_side_measure(0) = surface_rotation_matrices(cell,new_cell_point,dim,dim2); + hold = surface_rotation_matrices(cell,new_cell_point,dim,dim2); surface_rotation_matrices(cell,new_cell_point,dim,dim2) = surface_rotation_matrices(cell,old_cell_point,dim,dim2); - surface_rotation_matrices(cell,old_cell_point,dim,dim2) = scratch_for_compute_side_measure(0); + surface_rotation_matrices(cell,old_cell_point,dim,dim2) = hold; } } } diff --git a/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp b/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp index 46b0ab93c325..ce944bab6a76 100644 --- a/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp +++ b/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp @@ -507,7 +507,7 @@ namespace panzer { auto node_coordinates = af.buildStaticArray("node_coordinates",2,4,2); { auto node_coordinates_host = Kokkos::create_mirror_view(node_coordinates.get_static_view()); - auto cell_vertices_host = Kokkos::create_mirror_view(mesh.cell_vertices); + auto cell_vertices_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),mesh.cell_vertices); for(int i=0; icellForSubcell(0,0); - const int cell_1 = connectivity->cellForSubcell(0,1); + const int cell_0 = connectivity->cellForSubcellHost(0,0); + const int cell_1 = connectivity->cellForSubcellHost(0,1); - const int lidx_0 = connectivity->localSubcellForSubcell(0,0); - const int lidx_1 = connectivity->localSubcellForSubcell(0,1); + const int lidx_0 = connectivity->localSubcellForSubcellHost(0,0); + const int lidx_1 = connectivity->localSubcellForSubcellHost(0,1); TEST_EQUALITY(cell_0,0); TEST_EQUALITY(cell_1,1); @@ -585,11 +585,11 @@ namespace panzer { // Face 2 { - const int cell_0 = connectivity->cellForSubcell(2,0); - const int cell_1 = connectivity->cellForSubcell(2,1); + const int cell_0 = connectivity->cellForSubcellHost(2,0); + const int cell_1 = connectivity->cellForSubcellHost(2,1); - const int lidx_0 = connectivity->localSubcellForSubcell(2,0); - const int lidx_1 = connectivity->localSubcellForSubcell(2,1); + const int lidx_0 = connectivity->localSubcellForSubcellHost(2,0); + const int lidx_1 = connectivity->localSubcellForSubcellHost(2,1); TEST_EQUALITY(cell_0,0); TEST_EQUALITY(cell_1,1); From bc049035b298b65cf5495eb1a6e8d7003478423e Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Mon, 12 Jul 2021 17:42:46 -0600 Subject: [PATCH 10/14] Panzer: fix int_values cuda warnings --- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 139 ++---------------- .../disc-fe/src/Panzer_IntegrationValues2.hpp | 37 +++-- .../src/Panzer_IntegrationValues2_impl.hpp | 18 ++- .../test/core_tests/integration_values2.cpp | 56 ++++--- 4 files changed, 93 insertions(+), 157 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index c9a7abe244ee..7c2b395e2dee 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -324,128 +324,6 @@ getCubature(const PHX::MDField& in_node_coordinates, *(int_rule->topology)); } - /* -template -void IntegrationValues2:: -convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]) -{ - using T = Scalar; - - const T n = sqrt(normal[0]*normal[0]+normal[1]*normal[1]+normal[2]*normal[2]); - - // If this fails then the geometry for this cell is probably undefined - if(n > 0.){ - // Make sure transverse is not parallel to normal within some margin of error - transverse[0]=0.;transverse[1]=1.;transverse[2]=0.; - if(std::fabs(normal[0]*transverse[0]+normal[1]*transverse[1])>0.9){ - transverse[0]=1.;transverse[1]=0.; - } - - const T nt = normal[0]*transverse[0]+normal[1]*transverse[1]+normal[2]*transverse[2]; - - // Note normal has unit length - const T mult = nt/(n*n); // = nt - - // Remove normal projection from transverse - for(int dim=0;dim<3;++dim){ - transverse[dim] = transverse[dim] - mult * normal[dim]; - } - - const T t = sqrt(transverse[0]*transverse[0]+transverse[1]*transverse[1]+transverse[2]*transverse[2]); - KOKKOS_ASSERT(t != 0.); - for(int dim=0;dim<3;++dim){ - transverse[dim] /= t; - } - - // We assume a right handed system such that b = n \times t - binormal[0] = (normal[1] * transverse[2] - normal[2] * transverse[1]); - binormal[1] = (normal[2] * transverse[0] - normal[0] * transverse[2]); - binormal[2] = (normal[0] * transverse[1] - normal[1] * transverse[0]); - - // Normalize binormal - const T b = sqrt(binormal[0]*binormal[0]+binormal[1]*binormal[1]+binormal[2]*binormal[2]); - for(int dim=0;dim<3;++dim){ - binormal[dim] /= b; - } - } else { - transverse[0] = 0.; - transverse[1] = 0.; - transverse[2] = 0.; - binormal[0] = 0.; - binormal[1] = 0.; - binormal[2] = 0.; - } -} - */ - /* -template -void IntegrationValues2:: -swapQuadraturePoints(int cell, - int a, - int b) const -{ - const int new_cell_point = a; - const int old_cell_point = b; - - const int cell_dim = ref_ip_coordinates.extent(2); - -#ifdef PANZER_DEBUG - KOKKOS_ASSERT(cell < ip_coordinates.extent_int(0)); - KOKKOS_ASSERT(a < ip_coordinates.extent_int(1)); - KOKKOS_ASSERT(b < ip_coordinates.extent_int(1)); - KOKKOS_ASSERT(cell >= 0); - KOKKOS_ASSERT(a >= 0); - KOKKOS_ASSERT(b >= 0); -#endif - - // Using scratch_for_compute_side_measure instead of hold. This - // works around UVM issues of DFAD temporaries in device code. - // Scalar hold; - - scratch_for_compute_side_measure(0) = weighted_measure(cell,new_cell_point); - weighted_measure(cell,new_cell_point) = weighted_measure(cell,old_cell_point); - weighted_measure(cell,old_cell_point) = scratch_for_compute_side_measure(0); - - scratch_for_compute_side_measure(0) = jac_det(cell,new_cell_point); - jac_det(cell,new_cell_point) = jac_det(cell,old_cell_point); - jac_det(cell,old_cell_point) = scratch_for_compute_side_measure(0); - - for(int dim=0;dim void IntegrationValues2:: generateSurfaceCubatureValues(const PHX::MDField& in_node_coordinates, @@ -708,9 +586,14 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ PHX::View point_order("scratch: point_order",face_connectivity.numSubcellsHost(),num_points_per_face); // Iterate through faces + auto ref_ip_coordinates_k = ref_ip_coordinates.get_view(); auto ip_coordinates_k = ip_coordinates.get_view(); + auto weighted_measure_k = weighted_measure.get_view(); + auto jac_k = jac.get_view(); + auto jac_det_k = jac_det.get_view(); + auto jac_inv_k = jac_inv.get_view(); + auto surface_normals_k = surface_normals.get_view(); auto surface_rotation_matrices_k = surface_rotation_matrices.get_view(); - auto& int_values_device = *this; // for cuda Kokkos::parallel_for("face iteration",face_connectivity.numSubcellsHost(),KOKKOS_LAMBDA (const int face) { // Cells for sides 0 and 1 const int cell_0 = face_connectivity.cellForSubcell(face,0); @@ -866,7 +749,15 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ while( face_point_1 != point_order(face,face_point_1) ){ // We need to swap with the component in this position const int face_point_0 = point_order(face,face_point_1); - int_values_device.swapQuadraturePoints(cell_1,point_offset+face_point_1,point_offset+face_point_0); + panzer::swapQuadraturePoints(cell_1,point_offset+face_point_1,point_offset+face_point_0, + ref_ip_coordinates_k, + ip_coordinates_k, + weighted_measure_k, + jac_k, + jac_det_k, + jac_inv_k, + surface_normals_k, + surface_rotation_matrices_k); std::swap( point_order(face,face_point_1), point_order(face,face_point_0) ); } } diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp index c33e8bb0f265..8272bb524657 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp @@ -57,6 +57,31 @@ namespace panzer { class SubcellConnectivity; + /** + * \brief Swap the ordering of quadrature points in a specified cell. + * + * \param[in] cell Cell index + * \param[in] a Quadrature point a + * \param[in] b Quadrature point b + * + * NOTE: this should be a member function of IntegrationValues but + * lambda capture generates a ton of cuda compiler warnings. Making + * it a stand alone function fixes this. + */ + template + KOKKOS_INLINE_FUNCTION + void swapQuadraturePoints(int cell,int a,int b, + T0& ref_ip_coordinates, + T1& ip_coordinates, + T2& weighted_measure, + T3& jac, + T4& jac_det, + T5& jac_inv, + T6& surface_normals, + T7& surface_rotation_matrices); + template class IntegrationValues2 { public: @@ -149,18 +174,6 @@ namespace panzer { Array_Point scratch_for_compute_side_measure; // size: span() == jac.span() - /** - * \brief Swap the ordering of quadrature points in a specified cell. - * - * \param[in] cell Cell index - * \param[in] a Quadrature point a - * \param[in] b Quadrature point b - * - * NOTE: this is a non-const function but we had to make it const to run on device. - */ - KOKKOS_INLINE_FUNCTION - void swapQuadraturePoints(int cell,int a,int b) const; - KOKKOS_INLINE_FUNCTION static void convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]); diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp index 8098ebf52c04..86ecb5eeabd6 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp @@ -10,12 +10,22 @@ testing). */ namespace panzer { - -template -void IntegrationValues2:: + +template +void swapQuadraturePoints(int cell, int a, - int b) const + int b, + T0& ref_ip_coordinates, + T1& ip_coordinates, + T2& weighted_measure, + T3& jac, + T4& jac_det, + T5& jac_inv, + T6& surface_normals, + T7& surface_rotation_matrices) { const int new_cell_point = a; const int old_cell_point = b; diff --git a/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp b/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp index ce944bab6a76..f50ef6961b8f 100644 --- a/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp +++ b/packages/panzer/disc-fe/test/core_tests/integration_values2.cpp @@ -262,21 +262,6 @@ namespace panzer { } - // This should be a simple lambda but the RCPs and std::vector in - // the int_values triggers warnings for calling host code form a - // host/device function. We eliminate the warnings by using a - // functor that is memcopied to device. - template - struct DoSwapOnDevice { - int cell_,orig_pt_,new_pt_; - panzer::IntegrationValues2 int_values_; - DoSwapOnDevice(int cell,int orig_pt,int new_pt,panzer::IntegrationValues2& int_values) - : cell_(cell),orig_pt_(orig_pt),new_pt_(new_pt),int_values_(int_values){} - KOKKOS_INLINE_FUNCTION void operator() (const int ) const { - int_values_.swapQuadraturePoints(cell_,orig_pt_,new_pt_); - } - }; - TEUCHOS_UNIT_TEST(integration_values, quadpt_swap) { Teuchos::RCP topo = @@ -350,7 +335,25 @@ namespace panzer { int org_pt = 0; int new_pt = 1; - Kokkos::parallel_for("swap qp",1,DoSwapOnDevice(tst_cell,org_pt,new_pt,int_values)); + auto ref_ip_coordinates_k = int_values.ref_ip_coordinates.get_view(); + auto ip_coordinates_k = int_values.ip_coordinates.get_view(); + auto weighted_measure_k = int_values.weighted_measure.get_view(); + auto jac_k = int_values.jac.get_view(); + auto jac_det_k = int_values.jac_det.get_view(); + auto jac_inv_k = int_values.jac_inv.get_view(); + auto surface_normals_k = int_values.surface_normals.get_view(); + auto surface_rotation_matrices_k = int_values.surface_rotation_matrices.get_view(); + Kokkos::parallel_for("swap qp",1,KOKKOS_LAMBDA (const int ) { + panzer::swapQuadraturePoints(tst_cell,org_pt,new_pt, + ref_ip_coordinates_k, + ip_coordinates_k, + weighted_measure_k, + jac_k, + jac_det_k, + jac_inv_k, + surface_normals_k, + surface_rotation_matrices_k); + }); Kokkos::deep_copy(weighted_measure,int_values.weighted_measure.get_view()); Kokkos::deep_copy(jac_det,int_values.jac_det.get_view()); @@ -380,7 +383,26 @@ namespace panzer { int org_pt = 1; int new_pt = 3; - Kokkos::parallel_for("swap qp",1,DoSwapOnDevice(tst_cell,org_pt,new_pt,int_values)); + auto ref_ip_coordinates_k = int_values.ref_ip_coordinates.get_view(); + auto ip_coordinates_k = int_values.ip_coordinates.get_view(); + auto weighted_measure_k = int_values.weighted_measure.get_view(); + auto jac_k = int_values.jac.get_view(); + auto jac_det_k = int_values.jac_det.get_view(); + auto jac_inv_k = int_values.jac_inv.get_view(); + auto surface_normals_k = int_values.surface_normals.get_view(); + auto surface_rotation_matrices_k = int_values.surface_rotation_matrices.get_view(); + // Kokkos::parallel_for("swap qp",1,DoSwapOnDevice(tst_cell,org_pt,new_pt,int_values)); + Kokkos::parallel_for("swap qp",1,KOKKOS_LAMBDA (const int ) { + panzer::swapQuadraturePoints(tst_cell,org_pt,new_pt, + ref_ip_coordinates_k, + ip_coordinates_k, + weighted_measure_k, + jac_k, + jac_det_k, + jac_inv_k, + surface_normals_k, + surface_rotation_matrices_k); + }); Kokkos::deep_copy(weighted_measure,int_values.weighted_measure.get_view()); Kokkos::deep_copy(jac_det,int_values.jac_det.get_view()); From b83d1042b6a01fa94e187f590786e39fef78632c Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Wed, 14 Jul 2021 08:45:22 -0600 Subject: [PATCH 11/14] Panzer: move convertNormalToRotationMatrix out of IntValues --- .../Panzer_ConvertNormalToRotationMatrix.hpp | 61 +++++++++++++++++++ .../disc-fe/src/Panzer_IntegrationValues2.cpp | 3 +- .../disc-fe/src/Panzer_IntegrationValues2.hpp | 3 - .../src/Panzer_IntegrationValues2_impl.hpp | 52 ---------------- .../panzer/disc-fe/src/Panzer_Workset.cpp | 4 +- 5 files changed, 65 insertions(+), 58 deletions(-) create mode 100644 packages/panzer/disc-fe/src/Panzer_ConvertNormalToRotationMatrix.hpp diff --git a/packages/panzer/disc-fe/src/Panzer_ConvertNormalToRotationMatrix.hpp b/packages/panzer/disc-fe/src/Panzer_ConvertNormalToRotationMatrix.hpp new file mode 100644 index 000000000000..ed92d3f7430d --- /dev/null +++ b/packages/panzer/disc-fe/src/Panzer_ConvertNormalToRotationMatrix.hpp @@ -0,0 +1,61 @@ +#ifndef PANZER_CONVERT_NORMAL_TO_ROTATION_MATRIX_HPP +#define PANZER_CONVERT_NORMAL_TO_ROTATION_MATRIX_HPP + +namespace panzer { + +template +KOKKOS_INLINE_FUNCTION +void +convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]) +{ + using T = Scalar; + + const T n = sqrt(normal[0]*normal[0]+normal[1]*normal[1]+normal[2]*normal[2]); + + // If this fails then the geometry for this cell is probably undefined + if(n > 0.){ + // Make sure transverse is not parallel to normal within some margin of error + transverse[0]=0.;transverse[1]=1.;transverse[2]=0.; + if(std::fabs(normal[0]*transverse[0]+normal[1]*transverse[1])>0.9){ + transverse[0]=1.;transverse[1]=0.; + } + + const T nt = normal[0]*transverse[0]+normal[1]*transverse[1]+normal[2]*transverse[2]; + + // Note normal has unit length + const T mult = nt/(n*n); // = nt + + // Remove normal projection from transverse + for(int dim=0;dim<3;++dim){ + transverse[dim] = transverse[dim] - mult * normal[dim]; + } + + const T t = sqrt(transverse[0]*transverse[0]+transverse[1]*transverse[1]+transverse[2]*transverse[2]); + KOKKOS_ASSERT(t != 0.); + for(int dim=0;dim<3;++dim){ + transverse[dim] /= t; + } + + // We assume a right handed system such that b = n \times t + binormal[0] = (normal[1] * transverse[2] - normal[2] * transverse[1]); + binormal[1] = (normal[2] * transverse[0] - normal[0] * transverse[2]); + binormal[2] = (normal[0] * transverse[1] - normal[1] * transverse[0]); + + // Normalize binormal + const T b = sqrt(binormal[0]*binormal[0]+binormal[1]*binormal[1]+binormal[2]*binormal[2]); + for(int dim=0;dim<3;++dim){ + binormal[dim] /= b; + } + } else { + transverse[0] = 0.; + transverse[1] = 0.; + transverse[2] = 0.; + binormal[0] = 0.; + binormal[1] = 0.; + binormal[2] = 0.; + } +} + +} + +#endif diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index 7c2b395e2dee..a2d606e0e36f 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -57,6 +57,7 @@ #include "Panzer_CommonArrayFactories.hpp" #include "Panzer_Traits.hpp" #include "Panzer_SubcellConnectivity.hpp" +#include "Panzer_ConvertNormalToRotationMatrix.hpp" // For device member functions to avoid CUDA RDC in unit testing #include "Panzer_IntegrationValues2_impl.hpp" @@ -548,7 +549,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ Scalar transverse[3]; Scalar binormal[3]; - convertNormalToRotationMatrix(normal,transverse,binormal); + panzer::convertNormalToRotationMatrix(normal,transverse,binormal); for(int dim=0; dim<3; ++dim){ surface_rotation_matrices_k(cell,point,0,dim) = normal[dim]; diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp index 8272bb524657..bab023aa6e54 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.hpp @@ -174,9 +174,6 @@ namespace panzer { Array_Point scratch_for_compute_side_measure; // size: span() == jac.span() - KOKKOS_INLINE_FUNCTION - static void convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]); - /// This should be a private method, but using lambdas on cuda forces this to be public. void evaluateRemainingValues(const PHX::MDField & in_node_coordinates, const int in_num_cells); diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp index 86ecb5eeabd6..2af2de2e8381 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2_impl.hpp @@ -88,58 +88,6 @@ swapQuadraturePoints(int cell, } } -template -void IntegrationValues2:: -convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scalar binormal[3]) -{ - using T = Scalar; - - const T n = sqrt(normal[0]*normal[0]+normal[1]*normal[1]+normal[2]*normal[2]); - - // If this fails then the geometry for this cell is probably undefined - if(n > 0.){ - // Make sure transverse is not parallel to normal within some margin of error - transverse[0]=0.;transverse[1]=1.;transverse[2]=0.; - if(std::fabs(normal[0]*transverse[0]+normal[1]*transverse[1])>0.9){ - transverse[0]=1.;transverse[1]=0.; - } - - const T nt = normal[0]*transverse[0]+normal[1]*transverse[1]+normal[2]*transverse[2]; - - // Note normal has unit length - const T mult = nt/(n*n); // = nt - - // Remove normal projection from transverse - for(int dim=0;dim<3;++dim){ - transverse[dim] = transverse[dim] - mult * normal[dim]; - } - - const T t = sqrt(transverse[0]*transverse[0]+transverse[1]*transverse[1]+transverse[2]*transverse[2]); - KOKKOS_ASSERT(t != 0.); - for(int dim=0;dim<3;++dim){ - transverse[dim] /= t; - } - - // We assume a right handed system such that b = n \times t - binormal[0] = (normal[1] * transverse[2] - normal[2] * transverse[1]); - binormal[1] = (normal[2] * transverse[0] - normal[0] * transverse[2]); - binormal[2] = (normal[0] * transverse[1] - normal[1] * transverse[0]); - - // Normalize binormal - const T b = sqrt(binormal[0]*binormal[0]+binormal[1]*binormal[1]+binormal[2]*binormal[2]); - for(int dim=0;dim<3;++dim){ - binormal[dim] /= b; - } - } else { - transverse[0] = 0.; - transverse[1] = 0.; - transverse[2] = 0.; - binormal[0] = 0.; - binormal[1] = 0.; - binormal[2] = 0.; - } -} - } #endif diff --git a/packages/panzer/disc-fe/src/Panzer_Workset.cpp b/packages/panzer/disc-fe/src/Panzer_Workset.cpp index 4475238fa06a..52355178b38e 100644 --- a/packages/panzer/disc-fe/src/Panzer_Workset.cpp +++ b/packages/panzer/disc-fe/src/Panzer_Workset.cpp @@ -52,7 +52,7 @@ #include "Panzer_LocalMeshInfo.hpp" #include "Panzer_PointGenerator.hpp" #include "Panzer_PointValues2.hpp" -#include "Panzer_IntegrationValues2_impl.hpp" // needed for lambda dispatch with convertNormalToRotationMatrix() +#include "Panzer_ConvertNormalToRotationMatrix.hpp" #include "Panzer_SubcellConnectivity.hpp" #include "Panzer_OrientationsInterface.hpp" @@ -148,7 +148,7 @@ correctVirtualNormals(const Teuchos::RCP > iv normal[d] = -n_d; } - panzer::IntegrationValues2::convertNormalToRotationMatrix(normal,transverse,binormal); + panzer::convertNormalToRotationMatrix(normal,transverse,binormal); for(int dim=0; dim<3; ++dim){ iv->surface_rotation_matrices(virtual_cell,virtual_cell_point,0,dim) = normal[dim]; From f9ca3d48d2610d8d4bf30ee7fe8d0b38b3d6a012 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Wed, 14 Jul 2021 12:57:52 -0600 Subject: [PATCH 12/14] Panzer: switch to use Kokkos version of math functions --- .../Panzer_ConvertNormalToRotationMatrix.hpp | 2 +- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 24 ++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_ConvertNormalToRotationMatrix.hpp b/packages/panzer/disc-fe/src/Panzer_ConvertNormalToRotationMatrix.hpp index ed92d3f7430d..6e4454f383b8 100644 --- a/packages/panzer/disc-fe/src/Panzer_ConvertNormalToRotationMatrix.hpp +++ b/packages/panzer/disc-fe/src/Panzer_ConvertNormalToRotationMatrix.hpp @@ -16,7 +16,7 @@ convertNormalToRotationMatrix(const Scalar normal[3], Scalar transverse[3], Scal if(n > 0.){ // Make sure transverse is not parallel to normal within some margin of error transverse[0]=0.;transverse[1]=1.;transverse[2]=0.; - if(std::fabs(normal[0]*transverse[0]+normal[1]*transverse[1])>0.9){ + if(Kokkos::Experimental::fabs(normal[0]*transverse[0]+normal[1]*transverse[1])>0.9){ transverse[0]=1.;transverse[1]=0.; } diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index a2d606e0e36f..367e5ec6c2c5 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -491,7 +491,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ } // If n is zero then this is - hopefully - a virtual cell if(n > 0.){ - n = std::sqrt(n); + n = Kokkos::Experimental::sqrt(n); for(int dim=0;dim& in_node_ // FIXME: Currently virtual cells will set their surface normal along the same direction as the cell they "reflect" // This causes a host of issues (e.g. identifying 180 degree periodic wedges), but we have to support virtual cells as a priority // Therefore, we will just assume that the ordering is fine (not valid for 180 degree periodic wedges) - if(std::fabs(n0_dot_n1 - 1.) < 1.e-8) + if(Kokkos::Experimental::fabs(n0_dot_n1 - 1.) < 1.e-8) return; // Rotated faces case (e.g. periodic wedge) we need to check for non-antiparallel face normals - if(std::fabs(n0_dot_n1 + 1.) > 1.e-2){ + if(Kokkos::Experimental::fabs(n0_dot_n1 + 1.) > 1.e-2){ // Now we need to define an arbitrary transverse and binormal in the plane across which the faces are anti-symmetric // We can do this by setting t = n_0 \times n_1 PANZER_CROSS(n0,n1,t); // Normalize the transverse vector - const Scalar mag_t = std::sqrt(PANZER_DOT(t,t)); + const Scalar mag_t = Kokkos::Experimental::sqrt(PANZER_DOT(t,t)); t[0] /= mag_t; t[1] /= mag_t; t[2] /= mag_t; @@ -680,7 +680,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ b[2] = n0[2] + n1[2]; // Normalize the binormal vector - const Scalar mag_b = std::sqrt(PANZER_DOT(b,b)); + const Scalar mag_b = Kokkos::Experimental::sqrt(PANZER_DOT(b,b)); b[0] /= mag_b; b[1] /= mag_b; b[2] /= mag_b; @@ -759,7 +759,9 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ jac_inv_k, surface_normals_k, surface_rotation_matrices_k); - std::swap( point_order(face,face_point_1), point_order(face,face_point_0) ); + Scalar tmp = point_order(face,face_point_1); + point_order(face,face_point_1) = point_order(face,face_point_0); + point_order(face,face_point_0) = tmp; } } @@ -817,7 +819,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ norm_contravarient_k(cell,ip) += contravarient_k(cell,ip,i,j) * contravarient_k(cell,ip,i,j); } } - norm_contravarient_k(cell,ip) = std::sqrt(norm_contravarient_k(cell,ip)); + norm_contravarient_k(cell,ip) = Kokkos::Experimental::sqrt(norm_contravarient_k(cell,ip)); }); PHX::Device::execution_space().fence(); } @@ -863,6 +865,7 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi cub_points.get_view(), node_coordinates.get_view(), *(int_rule->topology)); + PHX::Device::execution_space().fence(); auto s_jac_inv = Kokkos::subview(jac_inv.get_view(),std::make_pair(0,num_cells),Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); cell_tools.setJacobianInv(s_jac_inv, s_jac); @@ -870,6 +873,8 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi auto s_jac_det = Kokkos::subview(jac_det.get_view(),std::make_pair(0,num_cells),Kokkos::ALL()); cell_tools.setJacobianDet(s_jac_det, s_jac); + PHX::Device::execution_space().fence(); + auto s_weighted_measure = Kokkos::subview(weighted_measure.get_view(),std::make_pair(0,num_cells),Kokkos::ALL()); if (!int_rule->isSide()) { Intrepid2::FunctionSpaceTools:: @@ -889,6 +894,8 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi } else TEUCHOS_ASSERT(false); + PHX::Device::execution_space().fence(); + // Shakib contravarient metric tensor { auto covarient_k = covarient.get_view(); @@ -916,6 +923,7 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi auto s_covarient = Kokkos::subview(covarient.get_view(),std::make_pair(0,num_cells),Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); auto s_contravarient = Kokkos::subview(contravarient.get_view(),std::make_pair(0,num_cells),Kokkos::ALL(),Kokkos::ALL(),Kokkos::ALL()); Intrepid2::RealSpaceTools::inverse(s_contravarient, s_covarient); + PHX::Device::execution_space().fence(); // norm of g_ij { @@ -929,7 +937,7 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi norm_contravarient_k(cell,ip) += contravarient_k(cell,ip,i,j) * contravarient_k(cell,ip,i,j); } } - norm_contravarient_k(cell,ip) = std::sqrt(norm_contravarient_k(cell,ip)); + norm_contravarient_k(cell,ip) = Kokkos::Experimental::sqrt(norm_contravarient_k(cell,ip)); }); PHX::Device::execution_space().fence(); } From 74066e32b7fc812d31539eac26a593a20af315c7 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Wed, 14 Jul 2021 15:45:32 -0600 Subject: [PATCH 13/14] Panzer: changes to subcell size functions for uvm removal --- .../disc-fe/src/Panzer_IntegrationValues2.cpp | 4 +-- .../src/Panzer_SubcellConnectivity.cpp | 7 ---- .../src/Panzer_SubcellConnectivity.hpp | 36 +++++++------------ 3 files changed, 15 insertions(+), 32 deletions(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index 367e5ec6c2c5..8ab12f96c901 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -584,7 +584,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ // functor to query the policy using the functor. We'll just live // with the extra memory since it is temporary scratch. // std::vector point_order(num_points_per_face); - PHX::View point_order("scratch: point_order",face_connectivity.numSubcellsHost(),num_points_per_face); + PHX::View point_order("scratch: point_order",face_connectivity.numSubcells(),num_points_per_face); // Iterate through faces auto ref_ip_coordinates_k = ref_ip_coordinates.get_view(); @@ -595,7 +595,7 @@ generateSurfaceCubatureValues(const PHX::MDField& in_node_ auto jac_inv_k = jac_inv.get_view(); auto surface_normals_k = surface_normals.get_view(); auto surface_rotation_matrices_k = surface_rotation_matrices.get_view(); - Kokkos::parallel_for("face iteration",face_connectivity.numSubcellsHost(),KOKKOS_LAMBDA (const int face) { + Kokkos::parallel_for("face iteration",face_connectivity.numSubcells(),KOKKOS_LAMBDA (const int face) { // Cells for sides 0 and 1 const int cell_0 = face_connectivity.cellForSubcell(face,0); const int cell_1 = face_connectivity.cellForSubcell(face,1); diff --git a/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.cpp b/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.cpp index 34fc08e5c38b..160d2b1c3958 100644 --- a/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.cpp +++ b/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.cpp @@ -55,13 +55,6 @@ setup(const panzer::LocalMeshPartition & partition) const int num_faces_per_cell = partition.cell_to_faces.extent(1); const int num_cells_per_face = 2; - _num_subcells = num_faces; - _num_subcells_device = PHX::View("_num_subcells_device",1); - Kokkos::deep_copy(_num_subcells_device,_num_subcells); - _num_cells = num_cells; - _num_cells_device = PHX::View("_num_cells_device",1); - Kokkos::deep_copy(_num_cells_device,_num_cells); - _subcell_to_cells_adj = PHX::View("subcell_to_cells_adj", num_faces+1); _subcell_to_cells = PHX::View("subcell_to_cells", num_faces*num_cells_per_face); _subcell_to_local_subcells = PHX::View("subcell_to_local_subcells", num_faces*num_cells_per_face); diff --git a/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.hpp b/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.hpp index 5a255c3aa3a9..b6c639c1336d 100644 --- a/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.hpp +++ b/packages/panzer/disc-fe/src/Panzer_SubcellConnectivity.hpp @@ -58,7 +58,7 @@ class SubcellConnectivity public: /// Default constructor - SubcellConnectivity():_num_subcells(-1),_num_cells(-1){} + SubcellConnectivity() = default; /// Default destructor ~SubcellConnectivity() = default; @@ -69,8 +69,7 @@ class SubcellConnectivity * \return Number of subcells associated with the cells */ KOKKOS_INLINE_FUNCTION - int numSubcells() const {return _num_subcells_device(0);} - int numSubcellsHost() const {return _num_subcells;} + int numSubcells() const {return _subcell_to_cells_adj.extent(0)-1;} /** * \brief Gives number of cells in connectivity @@ -78,8 +77,7 @@ class SubcellConnectivity * \return Number of subcells associated with the cells */ KOKKOS_INLINE_FUNCTION - int numCells() const {return _num_cells_device(0);} - int numCellsHost() const {return _num_cells;} + int numCells() const {return _cell_to_subcells_adj.extent(0)-1;} /** * \brief gives number of subcells (e.g. faces) found on a given cell @@ -166,14 +164,6 @@ class SubcellConnectivity protected: - /// Number of subcells for a given number of cells - int _num_subcells; - PHX::View _num_subcells_device; - - /// Number of cells - int _num_cells; - PHX::View _num_cells_device; - /// Adjacency array for indexing into subcell_to_cells array PHX::View _subcell_to_cells_adj; PHX::View::HostMirror _subcell_to_cells_adj_host; @@ -230,7 +220,7 @@ SubcellConnectivity:: numSubcellsOnCell(const int cell) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(cell >= 0 and cell < _num_cells_device(0)); + KOKKOS_ASSERT(cell >= 0 and cell < numCells()); #endif return _cell_to_subcells_adj(cell+1)-_cell_to_subcells_adj(cell); } @@ -240,7 +230,7 @@ SubcellConnectivity:: numSubcellsOnCellHost(const int cell) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(cell >= 0 and cell < _num_cells); + KOKKOS_ASSERT(cell >= 0 and cell < numCells()); #endif return _cell_to_subcells_adj_host(cell+1)-_cell_to_subcells_adj_host(cell); } @@ -250,7 +240,7 @@ SubcellConnectivity:: numCellsOnSubcell(const int subcell) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells_device(0)); + KOKKOS_ASSERT(subcell >= 0 and subcell < numSubcells()); #endif return _subcell_to_cells_adj(subcell+1)-_subcell_to_cells_adj(subcell); } @@ -260,7 +250,7 @@ SubcellConnectivity:: numCellsOnSubcellHost(const int subcell) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells); + KOKKOS_ASSERT(subcell >= 0 and subcell < numSubcells()); #endif return _subcell_to_cells_adj_host(subcell+1)-_subcell_to_cells_adj_host(subcell); } @@ -270,7 +260,7 @@ SubcellConnectivity:: subcellForCell(const int cell, const int local_subcell_index) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(cell >= 0 and cell < _num_cells_device(0)); + KOKKOS_ASSERT(cell >= 0 and cell < numCells()); KOKKOS_ASSERT(local_subcell_index < numSubcellsOnCell(cell)); #endif const int index = _cell_to_subcells_adj(cell)+local_subcell_index; @@ -282,7 +272,7 @@ SubcellConnectivity:: subcellForCellHost(const int cell, const int local_subcell_index) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(cell >= 0 and cell < _num_cells); + KOKKOS_ASSERT(cell >= 0 and cell < numCell()); KOKKOS_ASSERT(local_subcell_index < numSubcellsOnCellHost(cell)); #endif const int index = _cell_to_subcells_adj_host(cell)+local_subcell_index; @@ -294,7 +284,7 @@ SubcellConnectivity:: cellForSubcell(const int subcell, const int local_cell_index) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells_device(0)); + KOKKOS_ASSERT(subcell >= 0 and subcell < numSubcells()); KOKKOS_ASSERT(local_cell_index < numCellsOnSubcell(subcell)); #endif const int index = _subcell_to_cells_adj(subcell)+local_cell_index; @@ -306,7 +296,7 @@ SubcellConnectivity:: cellForSubcellHost(const int subcell, const int local_cell_index) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells); + KOKKOS_ASSERT(subcell >= 0 and subcell < numSubcells()); KOKKOS_ASSERT(local_cell_index < numCellsOnSubcellHost(subcell)); #endif const int index = _subcell_to_cells_adj_host(subcell)+local_cell_index; @@ -318,7 +308,7 @@ SubcellConnectivity:: localSubcellForSubcell(const int subcell, const int local_cell_index) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells_device(0)); + KOKKOS_ASSERT(subcell >= 0 and subcell < numSubcells()); KOKKOS_ASSERT(local_cell_index < numCellsOnSubcell(subcell)); #endif const int index = _subcell_to_cells_adj(subcell)+local_cell_index; @@ -330,7 +320,7 @@ SubcellConnectivity:: localSubcellForSubcellHost(const int subcell, const int local_cell_index) const { #ifdef PANZER_DEBUG - KOKKOS_ASSERT(subcell >= 0 and subcell < _num_subcells); + KOKKOS_ASSERT(subcell >= 0 and subcell < numSubcells()); KOKKOS_ASSERT(local_cell_index < numCellsOnSubcellHost(subcell)); #endif const int index = _subcell_to_cells_adj_host(subcell)+local_cell_index; From 9d89a89bd96bf631f750b5b06f4c3b3122e5718a Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Thu, 15 Jul 2021 07:03:56 -0600 Subject: [PATCH 14/14] Panzer: fix clang/gcc compiler warning --- packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp index 8ab12f96c901..da55b2e18b2d 100644 --- a/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp +++ b/packages/panzer/disc-fe/src/Panzer_IntegrationValues2.cpp @@ -852,7 +852,7 @@ evaluateRemainingValues(const PHX::MDField& in_node_coordi auto node_coordinates_k = node_coordinates.get_view(); auto in_node_coordinates_k = in_node_coordinates.get_view(); - Kokkos::MDRangePolicy> policy({0,0,0},{num_cells,num_nodes,num_dims}); + Kokkos::MDRangePolicy> policy({0,0,0},{(int)num_cells,(int)num_nodes,(int)num_dims}); Kokkos::parallel_for("node coordinates",policy,KOKKOS_LAMBDA (const int cell,const int node,const int dim) { node_coordinates_k(cell,node,dim) = in_node_coordinates_k(cell,node,dim); });