Merge Pull Request #9475 from trilinos/Trilinos/master_merge_20210723…

…_000552 Automatically Merged using Trilinos Master Merge AutoTester PR Title: Trilinos Master Merge PR Generator: Auto PR created to promote from master_merge_20210723_000552 branch to master PR Author: trilinos-autotester
trilinos · Jul 23, 2021 · 4c26aae · 4c26aae
2 parents 3229939 + 9561151
commit 4c26aae
Show file tree

Hide file tree

Showing 128 changed files with 2,396 additions and 822 deletions.
diff --git a/cmake/std/PullRequestLinuxCuda10.1.243TestingSettings.cmake b/cmake/std/PullRequestLinuxCuda10.1.243TestingSettings.cmake
@@ -145,6 +145,23 @@ set (SEACASAprepro_aprepro_command_line_vars_test_DISABLE ON CACHE BOOL "Tempora
 set (SEACASAprepro_aprepro_command_line_include_test_DISABLE ON CACHE BOOL "Temporary disable due to jsrun polluting stderr")
 set (SEACASAprepro_aprepro_test_dump_reread_DISABLE ON CACHE BOOL "Temporary disable due to jsrun polluting stderr")
 
+# Disable tests detailed in issue #8796
+set (Adelus_vector_random_MPI_1_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+
+# Disable tests detailed in issue #8799
+set (ROL_adapters_minitensor_test_function_test_01_MPI_4_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+set (ROL_adapters_minitensor_test_function_test_02_MPI_4_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+set (ROL_adapters_minitensor_test_sol_test_01_MPI_4_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+set (ROL_adapters_minitensor_test_vector_test_01_MPI_4_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+set (ROL_test_algorithm_TypeB_TrustRegionSPG_MPI_1_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+
+# Disable tests detailed in issue #8800
+set (KokkosCore_UnitTest_CudaTimingBased_MPI_1_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+
+# Disable tests detailed in issue #8129
+set (PanzerDiscFE_integration_values2_MPI_1_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+set (Stokhos_TpetraCrsMatrixMPVectorUnitTest_Cuda_MPI_4_DISABLE ON CACHE BOOL "Temporary disable for CUDA PR testing")
+
 set (CMAKE_CXX_STANDARD "14" CACHE STRING "Set C++ standard to C++14")
 # set (CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions")
 

diff --git a/packages/intrepid2/src/Shared/Intrepid2_Kernels.hpp b/packages/intrepid2/src/Shared/Intrepid2_Kernels.hpp
@@ -275,22 +275,22 @@ namespace Intrepid2 {
         case NORM_TWO:{
           for (ordinal_type i=0;i<m;++i)
             for (ordinal_type j=0;j<n;++j)
-              r_val += A(i,j)*A(i,j);
+              r_val += A.access(i,j)*A.access(i,j);
           r_val = sqrt(r_val);
           break;
         }
         case NORM_INF:{
           for (ordinal_type i=0;i<m;++i)
             for (ordinal_type j=0;j<n;++j) {
-              const value_type current = Util<value_type>::abs(A(i,j));
+              const value_type current = Util<value_type>::abs(A.access(i,j));
               r_val = (r_val < current ? current : r_val);
             }
           break;
         }
         case NORM_ONE:{
           for (ordinal_type i=0;i<m;++i)
             for (ordinal_type j=0;j<n;++j)
-              r_val += Util<value_type>::abs(A(i,j));
+              r_val += Util<value_type>::abs(A.access(i,j));
           break;
         }
         default: {

diff --git a/packages/ml/src/Utils/ml_MultiLevelPreconditioner.cpp b/packages/ml/src/Utils/ml_MultiLevelPreconditioner.cpp
@@ -2866,6 +2866,24 @@ Print(int level)
       sprintf(name,"Rmat_%d", LevelID_[i]);
       ML_Operator_Print_UsingGlobalOrdering(mlptr->Rmat+LevelID_[i], name, NULL,NULL);
     }
+
+    if(ml_nodes_) {
+      for( int i=0 ; i<NumLevels_ ; ++i ) {
+        sprintf(name,"Amat_nodes_%d", LevelID_[i]);
+        ML_Operator_Print_UsingGlobalOrdering(ml_nodes_->Amat+LevelID_[i], name, NULL,NULL);
+        sprintf(name,"Tmat_nodes_%d", LevelID_[i]);
+        ML_Operator_Print_UsingGlobalOrdering(Tmat_array[LevelID_[i]], name, NULL,NULL);
+      }
+      for( int i=1 ; i<NumLevels_ ; ++i ) {
+        sprintf(name,"Pmat_nodes_%d", LevelID_[i]);
+        ML_Operator_Print_UsingGlobalOrdering(ml_nodes_->Pmat+LevelID_[i], name, NULL,NULL);
+      }
+      for( int i=0 ; i<NumLevels_-1 ; ++i ) {
+        sprintf(name,"Rmat_nodes_%d", LevelID_[i]);
+        ML_Operator_Print_UsingGlobalOrdering(ml_nodes_->Rmat+LevelID_[i], name, NULL,NULL);
+
+      }
+    }
 
   } //if-then-else
 

diff --git a/packages/ml/src/Utils/ml_ValidateParameters.cpp b/packages/ml/src/Utils/ml_ValidateParameters.cpp
@@ -145,13 +145,19 @@ void ML_Epetra::SetValidSmooParams(Teuchos::ParameterList *PL, Teuchos::Array<st
 # endif
   /* Unlisted Options */
   setIntParameter("smoother: self overlap",0,"experimental option",PL,intParam);
+  setDoubleParameter("aggregation: enrich beta",0.0,"Unlisted option",PL,dblParam);
+
   /* Unlisted Options that should probably be listed */
   PL->set("smoother: self list",dummy);
   PL->sublist("smoother: self list").disableRecursiveValidation();
   setDoubleParameter("coarse: add to diag", 0.0,"Unlisted option",PL,dblParam);
   PL->set("coarse: split communicator",false);
   PL->set("smoother: split communicator",false);
-
+  PL->set("dump matrix: enable",false);
+
+
+
+
 
   // From ml_Multilevel_Smoothers.cpp:
   setIntParameter("smoother: ParaSails matrix",0,"Unlisted option",PL,intParam);

diff --git a/packages/nox/src-loca/src-tpetra/LOCA_BorderedSolver_TpetraHouseholder.cpp b/packages/nox/src-loca/src-tpetra/LOCA_BorderedSolver_TpetraHouseholder.cpp
@@ -773,8 +773,8 @@ LOCA::BorderedSolver::TpetraHouseholder::solve(
     //*tpetraPrecMatrix = *jac_crs;
     {
       tpetraPrecMatrix->resumeFill();
-      auto jac_view = jac_crs->getLocalMatrix().values;
-      auto prec_view = tpetraPrecMatrix->getLocalMatrix().values;
+      auto jac_view = jac_crs->getLocalMatrixDevice().values;
+      auto prec_view = tpetraPrecMatrix->getLocalMatrixDevice().values;
       Kokkos::deep_copy(prec_view,jac_view);
       tpetraPrecMatrix->fillComplete();
     }
@@ -1116,17 +1116,15 @@ updateCrsMatrixForPreconditioner(const NOX::Abstract::MultiVector& UU,
 
   auto& UU_tpetra = NOX::Tpetra::getTpetraMultiVector(UU);
   auto& VV_tpetra = NOX::Tpetra::getTpetraMultiVector(VV);
-  const_cast<NOX::TMultiVector&>(UU_tpetra).sync_device();
-  const_cast<NOX::TMultiVector&>(VV_tpetra).sync_device();
-  const auto uu = UU_tpetra.getLocalViewDevice();
-  const auto vv = VV_tpetra.getLocalViewDevice();
+  const auto uu = UU_tpetra.getLocalViewDevice(::Tpetra::Access::ReadOnly);
+  const auto vv = VV_tpetra.getLocalViewDevice(::Tpetra::Access::ReadOnly);
 
   const auto numRows = matrix.getNodeNumRows();
   const auto rowMap = matrix.getRowMap()->getLocalMap();
   const auto colMap = matrix.getColMap()->getLocalMap();
   const auto uMap = UU_tpetra.getMap()->getLocalMap();
   const auto vMap = VV_tpetra.getMap()->getLocalMap();
-  auto J_view = matrix.getLocalMatrix();
+  auto J_view = matrix.getLocalMatrixDevice();
   auto numConstraintsLocal = numConstraints; // for cuda lambda capture
 
   TEUCHOS_ASSERT(static_cast<size_t>(matrix.getRowMap()->getNodeNumElements()) == uu.extent(0));

diff --git a/packages/nox/src-loca/src-tpetra/LOCA_Tpetra_ConstraintModelEvaluator.cpp b/packages/nox/src-loca/src-tpetra/LOCA_Tpetra_ConstraintModelEvaluator.cpp
@@ -185,8 +185,7 @@ namespace LOCA {
       using extractor = ::Thyra::TpetraOperatorVectorExtraction<NOX::Scalar,NOX::LocalOrdinal,NOX::GlobalOrdinal,NOX::NodeType>;
       for (size_t i=0; i < me_g_.size(); ++i) {
         auto tmp = extractor::getTpetraMultiVector(me_g_[i]);
-        tmp->sync_host();
-        auto val = tmp->getLocalViewHost();
+        auto val = tmp->getLocalViewHost(Tpetra::Access::ReadOnly);
         constraints_(i,0) = val(0,0);
         if (printDebug_)
           std::cout << "LOCA::ConstraintME: constraints_(" << i << ")=" << val(0,0) << std::endl;
@@ -278,8 +277,7 @@ namespace LOCA {
       for (size_t j=0; j < me_dgdp_.size(); ++j) {
         for (size_t l=0; l < paramIDs.size(); ++l) {
           auto tmp = extractor::getTpetraMultiVector(me_dgdp_[j][paramIDs[l]]);
-          tmp->sync_host();
-          auto val = tmp->getLocalViewHost();
+          auto val = tmp->getLocalViewHost(Tpetra::Access::ReadOnly);
           // first col contains g, so we shift the columns by one
           dgdp(j,l+1) = val(0,0);
         }

diff --git a/packages/nox/src-loca/src-tpetra/LOCA_Tpetra_LowRankUpdateRowMatrix.cpp b/packages/nox/src-loca/src-tpetra/LOCA_Tpetra_LowRankUpdateRowMatrix.cpp
@@ -27,8 +27,6 @@ namespace LOCA {
       J_rowMatrix(jacRowMatrix),
       nonconst_U(U_multiVec),
       nonconst_V(V_multiVec),
-      U_DeviceView(nonconst_U->getLocalViewDevice()),
-      V_DeviceView(nonconst_V->getLocalViewDevice()),
       includeUV(include_UV_terms),
       m(U_multiVec->getNumVectors()),
       U_map(*U_multiVec->getMap()),
@@ -140,6 +138,7 @@ namespace LOCA {
                                  "ERROR - LOCA::LowRankRowMatrix::getLocalRowView() - NOT implemented yet!");
     }
 
+#ifdef TPETRA_ENABLE_DEPRECATED_CODE 
     void
     LowRankUpdateRowMatrix::getGlobalRowCopy(NOX::GlobalOrdinal GlobalRow,
                                              const Teuchos::ArrayView<NOX::GlobalOrdinal> &Indices,
@@ -177,6 +176,7 @@ namespace LOCA {
       TEUCHOS_TEST_FOR_EXCEPTION(true,std::runtime_error,
                                  "ERROR - LOCA::LowRankRowMatrix::getLocalRowView() - NOT implemented yet!");
     }
+#endif
 
     // Use the default implementation!
     // NOX::LocalOrdinal
@@ -274,10 +274,12 @@ namespace LOCA {
     NOX::Scalar LowRankUpdateRowMatrix::computeUV(int u_row_lid, int v_row_lid) const
     {
       NOX::Scalar val = 0.0;
+      auto U_HostView=nonconst_U->getLocalViewHost(::Tpetra::Access::ReadOnly);
+      auto V_HostView=nonconst_V->getLocalViewHost(::Tpetra::Access::ReadOnly);
 
       // val = sum_{k=0}^m U(i,k)*V(j,k)
       for (int k=0; k<m; ++k)
-        val += U_DeviceView(u_row_lid,k) * V_DeviceView(v_row_lid,k);
+        val += U_HostView(u_row_lid,k) * V_HostView(v_row_lid,k);
 
       return val;
     }

diff --git a/packages/nox/src-loca/src-tpetra/LOCA_Tpetra_LowRankUpdateRowMatrix.hpp b/packages/nox/src-loca/src-tpetra/LOCA_Tpetra_LowRankUpdateRowMatrix.hpp
@@ -108,6 +108,7 @@ namespace LOCA {
       getLocalRowView (NOX::LocalOrdinal LocalRow,
                        NOX::TRowMatrix::local_inds_host_view_type &Indices,
                        NOX::TRowMatrix::values_host_view_type &Values) const override;
+#ifdef TPETRA_ENABLE_DEPRECATED_CODE 
       virtual void
       getGlobalRowCopy (NOX::GlobalOrdinal GlobalRow,
                         const Teuchos::ArrayView<NOX::GlobalOrdinal> &Indices,
@@ -126,6 +127,7 @@ namespace LOCA {
       getLocalRowView (NOX::LocalOrdinal LocalRow,
                        Teuchos::ArrayView<const NOX::LocalOrdinal>& indices,
                        Teuchos::ArrayView<const NOX::Scalar>& values) const override;
+#endif
 
       // Use the default implementation!
       // virtual NOX::LocalOrdinal
@@ -177,12 +179,6 @@ namespace LOCA {
       //! Stores pointer to non-const V
       Teuchos::RCP<NOX::TMultiVector> nonconst_V;
 
-      //! View of U
-      const typename NOX::TMultiVector::dual_view_type::t_dev U_DeviceView;
-
-      //! View of V
-      const typename NOX::TMultiVector::dual_view_type::t_dev V_DeviceView;
-
       //! Flag indicating whether to include U*V^T terms
       bool includeUV;
 

diff --git a/packages/panzer/adapters-stk/example/PoissonInterfaceTpetra/main.cpp b/packages/panzer/adapters-stk/example/PoissonInterfaceTpetra/main.cpp
@@ -301,14 +301,11 @@ testJacobian (panzer::AssemblyEngine_TemplateManager<panzer::Traits>& ae_tm,
       const bool i_mine = row_map.isNodeGlobalElement(i);
       double x_prev = 0;
       int i_lid = 0;
-      x->sync_host();
-      auto x_host = x->getLocalViewHost();
+      auto x_host = x->getLocalViewHost(Tpetra::Access::ReadWrite);
       if (i_mine && i >= 0) {
         i_lid = col_map.getLocalElement(i);
         x_prev = x_host(i_lid,0);
         x_host(i_lid,0) += delta;
-        x->modify_host();
-        x->sync_device();
       }
       ep_con->set_x(x);
       panzer::AssemblyEngineInArgs input(ghost_con, ep_con);
@@ -319,10 +316,8 @@ testJacobian (panzer::AssemblyEngine_TemplateManager<panzer::Traits>& ae_tm,
         f0 = ep_con->get_f();
       else {
         TpetraVector& f = *ep_con->get_f();
-        f.sync_host();
-        f0->sync_host();
-        const auto f_host = f.getLocalViewHost();
-        const auto f0_host = f0->getLocalViewHost();
+        const auto f_host = f.getLocalViewHost(Tpetra::Access::ReadOnly);
+        const auto f0_host = f0->getLocalViewHost(Tpetra::Access::ReadOnly);
         if (i_mine) {
           //(*x)[i_lid] = x_prev;
           x_host(i_lid,0) = x_prev;
@@ -369,14 +364,14 @@ testJacobian (panzer::AssemblyEngine_TemplateManager<panzer::Traits>& ae_tm,
     RCP<TpetraCrsMatrix> D = Tpetra::MatrixMatrix::add(-1.0, false, *ep_con->get_A(), 1.0, false, *A_fd);
     Kokkos::fence();
 
-    auto local_A_fd = A_fd->getLocalMatrix().values;
+    auto local_A_fd = A_fd->getLocalMatrixDevice().values;
     double local_A_fd_inf_norm = 0.0;
     Kokkos::parallel_reduce(local_A_fd.size(),KOKKOS_LAMBDA (const int i, double& abs_max) {
       double val = std::fabs(local_A_fd(i));
       if (val > abs_max) abs_max = val;
     },Kokkos::Max<double>(local_A_fd_inf_norm));
 
-    auto local_D = D->getLocalMatrix().values;
+    auto local_D = D->getLocalMatrixDevice().values;
     double local_D_inf_norm = 0.0;
     Kokkos::parallel_reduce(local_D.size(),KOKKOS_LAMBDA (const int i, double& abs_max) {
       double val = std::fabs(local_D(i));

diff --git a/packages/panzer/adapters-stk/src/Panzer_STK_CheckSidesetOverlap.cpp b/packages/panzer/adapters-stk/src/Panzer_STK_CheckSidesetOverlap.cpp
@@ -0,0 +1,126 @@
+// @HEADER
+// ***********************************************************************
+//
+//           Panzer: A partial differential equation assembly
+//       engine for strongly coupled complex multiphysics systems
+//                 Copyright (2011) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact Roger P. Pawlowski ([email protected]) and
+// Eric C. Cyr ([email protected])
+// ***********************************************************************
+// @HEADER
+
+#include "Panzer_STK_CheckSidesetOverlap.hpp"
+#include <vector>
+#include <algorithm>
+
+namespace panzer_stk {
+
+  /// Returns true if the sidesets overlap.
+  bool checkSidesetOverlap(const std::string& side_a_name,
+                           const std::string& side_b_name,
+                           const panzer_stk::STK_Interface& mesh) {
+
+    const bool print_debug = false;
+
+    // Get the locally owned nodes of sideset a
+    std::vector<stk::mesh::EntityId> gids_a;
+    {
+      std::vector<stk::mesh::Entity> nodes_a;
+      stk::mesh::Part* part_a = mesh.getSideset(side_a_name);
+      TEUCHOS_TEST_FOR_EXCEPTION(part_a==nullptr,std::runtime_error,
+                                 "panzer::checkSidesetOverlap: Unknown side set name \"" << side_a_name << "\"");
+      stk::mesh::Selector selector_a = *part_a & mesh.getMetaData()->locally_owned_part();
+      const bool sort_by_gid = true;
+      stk::mesh::get_selected_entities(selector_a,mesh.getBulkData()->buckets(mesh.getNodeRank()),nodes_a,sort_by_gid);
+      // convert the entities to global ids
+      gids_a.resize(nodes_a.size());
+      size_t i = 0;
+      for (auto&& node : nodes_a) {
+        gids_a[i] = mesh.getBulkData()->identifier(node);
+        ++i;
+      }
+    }
+
+    // Get all nodes of sideset b (including nodes from all mpi processes)
+    std::vector<stk::mesh::EntityId> gids_b;
+    {
+      std::vector<stk::mesh::Entity> nodes_b;
+      stk::mesh::Part* part_b = mesh.getSideset(side_b_name);
+      TEUCHOS_TEST_FOR_EXCEPTION(part_b==nullptr,std::runtime_error,
+                                 "panzer::checkSidesetOverlap: Unknown side set name \"" << side_b_name << "\"");
+      stk::mesh::Selector selector_b = *part_b;
+      const bool sort_by_gid = true;
+      stk::mesh::get_selected_entities(selector_b,mesh.getBulkData()->buckets(mesh.getNodeRank()),nodes_b,sort_by_gid);
+      // convert the entities to global ids
+      gids_b.resize(nodes_b.size());
+      size_t i = 0;
+      for (auto&& node : nodes_b) {
+        gids_b[i] = mesh.getBulkData()->identifier(node);
+        ++i;
+      }
+    }
+
+    // Sort the element gids so we can use binary search
+    std::sort(gids_b.begin(),gids_b.end());
+
+    if (print_debug) {
+      Teuchos::FancyOStream os(Teuchos::rcpFromRef(std::cout));
+      os.setShowProcRank(true);
+      os << std::endl;
+      os << "gids_a.size()=" << gids_a.size() << std::endl;
+      for (auto&& gid : gids_a)
+        os << "gid_a=" << gid << std::endl;
+      os << "gids_b.size()=" << gids_b.size() << std::endl;
+      for (auto&& gid : gids_b)
+        os << "gid_b=" << gid << std::endl;
+    }
+
+    // Search for each node in a in b
+    // 0 = no overlap, 1 = overlap
+    // We use int for MPI communication
+    int has_local_overlap = 0;
+    for (auto&& a : gids_a) {
+      if (std::binary_search(gids_b.begin(),gids_b.end(),a)) {
+        has_local_overlap = 1;
+        break;
+      }
+    }
+    int has_overlap = 0;
+    Teuchos::reduceAll(*mesh.getComm(),Teuchos::REDUCE_SUM,1,&has_local_overlap,&has_overlap);
+    if (has_overlap == 0)
+      return false;
+
+    return true;
+  }
+}