From 26dbd33e7f44f77eb9f96c71f3eabeda873ec9a0 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 25 Nov 2024 13:12:44 -0700 Subject: [PATCH 01/23] Tpetra: Initialize KokkosKernels TPLs during init Signed-off-by: Brian Kelley --- packages/tpetra/core/src/Tpetra_Core.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/tpetra/core/src/Tpetra_Core.cpp b/packages/tpetra/core/src/Tpetra_Core.cpp index 51680bb8f75d..4dfd077a3796 100644 --- a/packages/tpetra/core/src/Tpetra_Core.cpp +++ b/packages/tpetra/core/src/Tpetra_Core.cpp @@ -19,6 +19,7 @@ #include "Tpetra_Details_checkLaunchBlocking.hpp" #include "Tpetra_Details_KokkosTeuchosTimerInjection.hpp" #include "Tpetra_Details_Behavior.hpp" +#include "KokkosKernels_EagerInitialize.hpp" namespace Tpetra { @@ -129,6 +130,9 @@ namespace Tpetra { (! kokkosIsInitialized, std::logic_error, "At the end of " "initKokkosIfNeeded, Kokkos is not initialized. " "Please report this bug to the Tpetra developers."); + // Now that the Kokkos backend(s) are initialized, + // initialize all KokkosKernels TPLs. + KokkosKernels::eager_initialize(); } #ifdef HAVE_TPETRACORE_MPI From 29210258de52b732c739d6c26028d17c4d4f9644 Mon Sep 17 00:00:00 2001 From: "Justin M. LaPre" Date: Wed, 13 Nov 2024 11:27:35 -0700 Subject: [PATCH 02/23] Add support for using env variable EXTRA_CONFIGURE_ARGS can now be used to pass extra flags to our build scripts. Also had to remove FORCE from Trilinos_ENABLE_TESTS flag so that we are able to override it. Signed-off-by: Justin M. LaPre --- packages/framework/ini-files/config-specs.ini | 2 +- packages/framework/pr_tools/LaunchDriver.py | 3 +++ .../pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py | 3 ++- .../trilinosprhelpers/TrilinosPRConfigurationStandard.py | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index de052bca3530..984232a11289 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1557,7 +1557,7 @@ use SEMS_COMMON_CUDA_11 use CUDA11-RUN-SERIAL-TESTS -opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF +opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL : OFF [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all] use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables diff --git a/packages/framework/pr_tools/LaunchDriver.py b/packages/framework/pr_tools/LaunchDriver.py index fe4f20f40372..b98603fa26d7 100755 --- a/packages/framework/pr_tools/LaunchDriver.py +++ b/packages/framework/pr_tools/LaunchDriver.py @@ -116,8 +116,11 @@ def main(argv): if args.kokkos_develop: cmd += " --kokkos-develop" + # extra-configure-args flag currently takes precedence over the env. var. if args.extra_configure_args: cmd += f" --extra-configure-args=\"{args.extra_configure_args}\"" + elif os.getenv("EXTRA_CONFIGURE_ARGS"): + cmd += f" --extra-configure-args=\"{os.getenv('EXTRA_CONFIGURE_ARGS')}\"" print("LaunchDriver> EXEC: " + cmd, flush=True) diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py index 5ca1499c2dfa..f62bda5229a4 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py @@ -790,7 +790,8 @@ def prepare_test(self): "F77", "F90", "FC", - "MODULESHOME" + "MODULESHOME", + "EXTRA_CONFIGURE_ARGS" ] self.message("") tr_env.set_environment.pretty_print_envvars(envvar_filter=envvars_to_print) diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py index 401824ea8b6d..1f8038c6eaf5 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py @@ -17,7 +17,7 @@ class TrilinosPRConfigurationStandard(TrilinosPRConfigurationBase): Implements Standard mode Trilinos Pull Request Driver """ def __init__(self, args): - super(TrilinosPRConfigurationStandard, self).__init__(args) + super().__init__(args) def execute_test(self): From f308170575af7de9a805716ccb5eef8726870dd6 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Tue, 26 Nov 2024 17:29:05 -0700 Subject: [PATCH 03/23] MueLu: Add function to enforce boundary condtions on initial guess This simplifies solving systems where Dirichlet conditions have only been enforced on matrix rows. Signed-off-by: Christian Glusa --- packages/muelu/example/basic/Simple.cpp | 4 +- .../src/Utils/MueLu_UtilitiesBase_decl.hpp | 7 ++++ .../src/Utils/MueLu_UtilitiesBase_def.hpp | 31 ++++++++++++++ packages/muelu/test/scaling/Driver.cpp | 4 +- packages/muelu/test/scaling/DriverCore.hpp | 7 +++- packages/muelu/test/unit_tests/Utilities.cpp | 42 +++++++++++++++++++ 6 files changed, 92 insertions(+), 3 deletions(-) diff --git a/packages/muelu/example/basic/Simple.cpp b/packages/muelu/example/basic/Simple.cpp index 4afe89e0b5e1..6b1cd48bd8d0 100644 --- a/packages/muelu/example/basic/Simple.cpp +++ b/packages/muelu/example/basic/Simple.cpp @@ -85,6 +85,8 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar clp.setOption("belosType", &belosType, "belos solver type: (Pseudoblock CG | Block CG | Pseudoblock GMRES | Block GMRES | ...) see BelosSolverFactory.hpp for exhaustive list of solvers"); bool computeCondEst = false; clp.setOption("condEst", "noCondEst", &computeCondEst, "compute condition number estimate (currently only available for Pseudoblock CG)"); + bool enforceBoundaryConditionsOnInitialGuess = true; + clp.setOption("enforceBCs", "noEnforceBCs", &enforceBoundaryConditionsOnInitialGuess, "enforce Dirichlet boundary condition on initial guess"); double tol = 1e-12; clp.setOption("tol", &tol, "solver convergence tolerance"); bool binaryFormat = false; @@ -201,7 +203,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib &lib, int ar // ========================================================================= { comm->barrier(); - SystemSolve(A, X, B, H, Prec, out, solveType, belosType, false, false, useML, cacheSize, 0, scaleResidualHist, solvePreconditioned, maxIts, tol, computeCondEst); + SystemSolve(A, X, B, H, Prec, out, solveType, belosType, false, false, useML, cacheSize, 0, scaleResidualHist, solvePreconditioned, maxIts, tol, computeCondEst, enforceBoundaryConditionsOnInitialGuess); comm->barrier(); } diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp index 8aeac791865d..57018cd0b047 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp @@ -254,6 +254,13 @@ class UtilitiesBase { */ static Teuchos::ArrayRCP DetectDirichletRowsExt(const Xpetra::Matrix& A, bool& bHasZeroDiagonal, const Magnitude& tol = Teuchos::ScalarTraits::zero()); + /*! @brief Detect Dirichlet rows and copy values from RHS multivector to InitialGuess for Dirichlet rows. + + This can be used to assure that the InitialGuess satisfies the boundary conditions enforced on A. + Useful in particular for using CG when boundary conditions have only been enforce by one-and-zeroing rows of A, but not columns. + */ + static void EnforceInitialCondition(const Xpetra::Matrix& A, const Xpetra::MultiVector& RHS, Xpetra::MultiVector& InitialGuess, const Magnitude& tol = Teuchos::ScalarTraits::zero(), const bool count_twos_as_dirichlet = false); + /*! @brief Find non-zero values in an ArrayRCP Compares the value to 2 * machine epsilon diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp index 6e181415b09b..47db9ba0635f 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp @@ -1180,6 +1180,37 @@ UtilitiesBase:: return boundaryNodes; } +template +void UtilitiesBase:: + EnforceInitialCondition(const Xpetra::Matrix& A, + const Xpetra::MultiVector& RHS, + Xpetra::MultiVector& InitialGuess, + const typename Teuchos::ScalarTraits::magnitudeType& tol, + const bool count_twos_as_dirichlet) { + using range_type = Kokkos::RangePolicy; + + auto dirichletRows = DetectDirichletRows_kokkos(A, tol, count_twos_as_dirichlet); + + LocalOrdinal numRows = A.getLocalNumRows(); + LocalOrdinal numVectors = RHS.getNumVectors(); + TEUCHOS_ASSERT_EQUALITY(numVectors, Teuchos::as(InitialGuess.getNumVectors())); +#ifdef MUELU_DEBUG + TEUCHOS_ASSERT(RHS.getMap()->isCompatible(InitialGuess.getMap())); +#endif + + auto lclRHS = RHS.getDeviceLocalView(Xpetra::Access::ReadOnly); + auto lclInitialGuess = InitialGuess.getDeviceLocalView(Xpetra::Access::ReadWrite); + + Kokkos::parallel_for( + "MueLu:Utils::EnforceInitialCondition", range_type(0, numRows), + KOKKOS_LAMBDA(const LO row) { + if (dirichletRows(row)) { + for (LocalOrdinal j = 0; j < numVectors; ++j) + lclInitialGuess(row, j) = lclRHS(row, j); + } + }); +} + template void UtilitiesBase:: FindNonZeros(const Teuchos::ArrayRCP vals, diff --git a/packages/muelu/test/scaling/Driver.cpp b/packages/muelu/test/scaling/Driver.cpp index 39f2fd5d2053..6b2ef85b8845 100644 --- a/packages/muelu/test/scaling/Driver.cpp +++ b/packages/muelu/test/scaling/Driver.cpp @@ -204,6 +204,8 @@ int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib& lib, int ar clp.setOption("belosType", &belosType, "belos solver type: (Pseudoblock CG | Block CG | Pseudoblock GMRES | Block GMRES | ...) see BelosSolverFactory.hpp for exhaustive list of solvers"); bool computeCondEst = false; clp.setOption("condEst", "noCondEst", &computeCondEst, "compute condition number estimate (currently only available for Pseudoblock CG)"); + bool enforceBoundaryConditionsOnInitialGuess = true; + clp.setOption("enforceBCs", "noEnforceBCs", &enforceBoundaryConditionsOnInitialGuess, "enforce Dirichlet boundary condition on initial guess"); double dtol = 1e-12, tol; clp.setOption("tol", &dtol, "solver convergence tolerance"); bool binaryFormat = false; @@ -523,7 +525,7 @@ int main_(Teuchos::CommandLineProcessor& clp, Xpetra::UnderlyingLib& lib, int ar } // Solve the system numResolves+1 times - SystemSolve(A, X, B, H, Prec, out2, solveType, belosType, profileSolve, useAMGX, useML, cacheSize, numResolves, scaleResidualHist, solvePreconditioned, maxIts, tol, computeCondEst); + SystemSolve(A, X, B, H, Prec, out2, solveType, belosType, profileSolve, useAMGX, useML, cacheSize, numResolves, scaleResidualHist, solvePreconditioned, maxIts, tol, computeCondEst, enforceBoundaryConditionsOnInitialGuess); comm->barrier(); } catch (const std::exception& e) { diff --git a/packages/muelu/test/scaling/DriverCore.hpp b/packages/muelu/test/scaling/DriverCore.hpp index e04b9e51748b..89e3dd5c04c6 100644 --- a/packages/muelu/test/scaling/DriverCore.hpp +++ b/packages/muelu/test/scaling/DriverCore.hpp @@ -233,7 +233,8 @@ void SystemSolve(Teuchos::RCP using Teuchos::RCP; using Teuchos::rcp; @@ -272,6 +273,10 @@ void SystemSolve(Teuchos::RCP tm = rcp(new TimeMonitor(*TimeMonitor::getNewTimer("Driver: 3 - LHS and RHS initialization"))); X->putScalar(zero); + if (enforceBoundaryConditionsOnInitialGuess) { + out << "Enforcing boundary conditions on initial guess\n"; + Utilities::EnforceInitialCondition(*A, *B, *X); + } tm = Teuchos::null; if (solveType == "none") { diff --git a/packages/muelu/test/unit_tests/Utilities.cpp b/packages/muelu/test/unit_tests/Utilities.cpp index 97842d36f208..77a18d329854 100644 --- a/packages/muelu/test/unit_tests/Utilities.cpp +++ b/packages/muelu/test/unit_tests/Utilities.cpp @@ -144,6 +144,47 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Utilities, DetectDirichletRows, Scalar, LocalO } // DetectDirichletRows +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Utilities, EnforceInitialCondition, Scalar, LocalOrdinal, GlobalOrdinal, Node) { +#include + MUELU_TESTING_SET_OSTREAM; + MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); + + typedef typename Teuchos::ScalarTraits TST; + + RCP A = TestHelpers::TestFactory::Build1DPoisson(100); + Teuchos::ArrayView indices; + Teuchos::ArrayView values; + + LocalOrdinal localRowToZero = 5; + A->resumeFill(); + A->getLocalRowView(localRowToZero, indices, values); + Array newvalues(values.size(), TST::zero()); + for (int j = 0; j < indices.size(); j++) + // keep diagonal + if (indices[j] == localRowToZero) newvalues[j] = values[j]; + A->replaceLocalValues(localRowToZero, indices, newvalues); + + A->fillComplete(); + + auto RHS = MultiVectorFactory::Build(A->getRangeMap(), 1); + RHS->randomize(); + auto X = MultiVectorFactory::Build(A->getDomainMap(), 1); + X->putScalar(666. * TST::one()); + Utilities::EnforceInitialCondition(*A, *RHS, *X, TST::magnitude(0.26)); + + auto lclRHS = RHS->getHostLocalView(Xpetra::Access::ReadOnly); + auto lclX = X->getHostLocalView(Xpetra::Access::ReadOnly); + + // row 5 is Dirichlet + for (size_t row = 0; row < A->getLocalNumRows(); ++row) { + if (row == 5) { + TEST_EQUALITY(lclRHS(row, 0), lclX(row, 0)); + } else { + TEST_EQUALITY(666. * TST::one(), lclX(row, 0)); + } + } +} // EnforceInitialCondition + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Utilities, GetDiagonalInverse, Scalar, LocalOrdinal, GlobalOrdinal, Node) { #include MUELU_TESTING_SET_OSTREAM; @@ -683,6 +724,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(Utilities, TransposeNonsymmetricConstMatrix, S #define MUELU_ETI_GROUP(Scalar, LO, GO, Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Utilities, MatMatMult_EpetraVsTpetra, Scalar, LO, GO, Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Utilities, DetectDirichletRows, Scalar, LO, GO, Node) \ + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Utilities, EnforceInitialCondition, Scalar, LO, GO, Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Utilities, GetDiagonalInverse, Scalar, LO, GO, Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Utilities, GetLumpedDiagonal, Scalar, LO, GO, Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(Utilities, GetInverse, Scalar, LO, GO, Node) \ From a9ab78b1d48123667891e6569c72813f459b0526 Mon Sep 17 00:00:00 2001 From: "Justin M. LaPre" Date: Mon, 25 Nov 2024 13:18:38 -0700 Subject: [PATCH 04/23] add FORCE back to Trilinos_ENABLE_TESTS Signed-off-by: Justin M. LaPre --- packages/framework/ini-files/config-specs.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 984232a11289..de052bca3530 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1557,7 +1557,7 @@ use SEMS_COMMON_CUDA_11 use CUDA11-RUN-SERIAL-TESTS -opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL : OFF +opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all] use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables From 37caafbd64e07d2248816c0f041339870b2267ba Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Mon, 2 Dec 2024 09:47:18 -0800 Subject: [PATCH 05/23] Fix out-of-bound access on RHS Signed-off-by: Vinh Dang --- packages/adelus/src/Adelus_forward.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/adelus/src/Adelus_forward.hpp b/packages/adelus/src/Adelus_forward.hpp index d6a6db280cc3..a1f573d4faf7 100644 --- a/packages/adelus/src/Adelus_forward.hpp +++ b/packages/adelus/src/Adelus_forward.hpp @@ -105,9 +105,11 @@ void forward(HandleType& ahandle, ZViewType& Z, RHSViewType& RHS) // count_row++; //} int curr_lrid = k/nprocs_col;//note: nprocs_col (global var) cannot be read in a device function - Kokkos::parallel_for(Kokkos::RangePolicy(0,RHS.extent(1)), KOKKOS_LAMBDA (const int i) { - ck(0,i) = RHS(curr_lrid,i); - }); + if (curr_lrid < static_cast(RHS.extent(0))) { //note: to avoid out-of-bounds access on RHS + Kokkos::parallel_for(Kokkos::RangePolicy(0,RHS.extent(1)), KOKKOS_LAMBDA (const int i) { + ck(0,i) = RHS(curr_lrid,i); + }); + } #if defined(ADELUS_HOST_PINNED_MEM_MPI) && (defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)) Kokkos::deep_copy(h_ck,ck); From 519dbe40e2d9bb21e3b3fcce6368a9cbe4377239 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Mon, 2 Dec 2024 10:11:16 -0800 Subject: [PATCH 06/23] Update note Signed-off-by: Vinh Dang --- packages/adelus/src/Adelus_forward.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/adelus/src/Adelus_forward.hpp b/packages/adelus/src/Adelus_forward.hpp index a1f573d4faf7..19b04ee6f2b8 100644 --- a/packages/adelus/src/Adelus_forward.hpp +++ b/packages/adelus/src/Adelus_forward.hpp @@ -105,7 +105,7 @@ void forward(HandleType& ahandle, ZViewType& Z, RHSViewType& RHS) // count_row++; //} int curr_lrid = k/nprocs_col;//note: nprocs_col (global var) cannot be read in a device function - if (curr_lrid < static_cast(RHS.extent(0))) { //note: to avoid out-of-bounds access on RHS + if (curr_lrid < static_cast(RHS.extent(0))) { //note: to avoid out-of-bounds access on the RHS Kokkos::parallel_for(Kokkos::RangePolicy(0,RHS.extent(1)), KOKKOS_LAMBDA (const int i) { ck(0,i) = RHS(curr_lrid,i); }); From 656000ebbd28117fee85103149be8ac54532abc7 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Mon, 2 Dec 2024 11:09:42 -0500 Subject: [PATCH 07/23] Tacho : require RocBLAS, RocSparse, and RocSolver for HIP backend Signed-off-by: iyamazaki --- packages/shylu/shylu_node/tacho/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/shylu/shylu_node/tacho/CMakeLists.txt b/packages/shylu/shylu_node/tacho/CMakeLists.txt index 2efce7b95211..affbed55bbb7 100644 --- a/packages/shylu/shylu_node/tacho/CMakeLists.txt +++ b/packages/shylu/shylu_node/tacho/CMakeLists.txt @@ -7,6 +7,12 @@ IF (Kokkos_ENABLE_CUDA) ENDIF() ENDIF() +IF (Kokkos_ENABLE_HIP) + IF (NOT (TPL_ENABLE_ROCBLAS AND TPL_ENABLE_ROCSPARSE AND TPL_ENABLE_ROCSOLVER)) + MESSAGE(FATAL_ERROR "Tacho can not be build with HIP without enabling ROCBLAS, ROCSPARSE, and ROCSOLVER TPLs.") + ENDIF() +ENDIF() + IF (Kokkos_ENABLE_THREADS) IF (NOT Kokkos_ENABLE_OPENMP) MESSAGE(FATAL_ERROR "Tacho can not be build with Pthreads as the Kokkos Host Backend.") From 9fdb887762f1e5c1eac0e8f0705e84ffd11f0d80 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Mon, 2 Dec 2024 19:32:31 -0500 Subject: [PATCH 08/23] Tacho : update TPL instruction Signed-off-by: iyamazaki --- packages/shylu/shylu_node/tacho/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/shylu/shylu_node/tacho/CMakeLists.txt b/packages/shylu/shylu_node/tacho/CMakeLists.txt index affbed55bbb7..239098f10aba 100644 --- a/packages/shylu/shylu_node/tacho/CMakeLists.txt +++ b/packages/shylu/shylu_node/tacho/CMakeLists.txt @@ -9,7 +9,7 @@ ENDIF() IF (Kokkos_ENABLE_HIP) IF (NOT (TPL_ENABLE_ROCBLAS AND TPL_ENABLE_ROCSPARSE AND TPL_ENABLE_ROCSOLVER)) - MESSAGE(FATAL_ERROR "Tacho can not be build with HIP without enabling ROCBLAS, ROCSPARSE, and ROCSOLVER TPLs.") + MESSAGE(FATAL_ERROR "Tacho can not be build with HIP without enabling ROCBLAS, ROCSPARSE, and ROCSOLVER TPLs. Please disable Tacho, or enable these three TPLs") ENDIF() ENDIF() From 749f3fcc4f5d7b7bf34a06ab7a85a458dcdeb6a5 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Tue, 3 Dec 2024 09:05:12 -0700 Subject: [PATCH 09/23] KOKKOS_CUSPARSE_SAFE_CALL -> IFPACK2_DETAILS_CUSPARSE_SAFE_CALL Kokkos Kernels is deprecating KOKKOS_CUSPARSE_SAFE_CALL. Bring it in-tree as IFPACK2_DETAILS_CUSPARSE_SAFE_CALL. Signed-off-by: Carl Pearson --- ...fpack2_LocalSparseTriangularSolver_def.hpp | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_def.hpp b/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_def.hpp index c5ecbcbde653..0c39e53f65f4 100644 --- a/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_def.hpp +++ b/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_def.hpp @@ -10,6 +10,9 @@ #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP +#include // ostringstream +#include // runtime_error + #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp" #include "Tpetra_CrsMatrix.hpp" #include "Tpetra_Core.hpp" @@ -24,6 +27,53 @@ namespace Ifpack2 { namespace Details { + +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) + +inline void cusparse_error_throw(cusparseStatus_t cusparseStatus, const char* name, + const char* file, const int line) { + std::ostringstream out; +#if defined(CUSPARSE_VERSION) && (10300 <= CUSPARSE_VERSION) + out << name << " error( " << cusparseGetErrorName(cusparseStatus) << "): " << cusparseGetErrorString(cusparseStatus); +#else + out << name << " error( "; + switch (cusparseStatus) { + case CUSPARSE_STATUS_NOT_INITIALIZED: + out << "CUSPARSE_STATUS_NOT_INITIALIZED): cusparse handle was not " + "created correctly."; + break; + case CUSPARSE_STATUS_ALLOC_FAILED: + out << "CUSPARSE_STATUS_ALLOC_FAILED): you might tried to allocate too " + "much memory"; + break; + case CUSPARSE_STATUS_INVALID_VALUE: out << "CUSPARSE_STATUS_INVALID_VALUE)"; break; + case CUSPARSE_STATUS_ARCH_MISMATCH: out << "CUSPARSE_STATUS_ARCH_MISMATCH)"; break; + case CUSPARSE_STATUS_MAPPING_ERROR: out << "CUSPARSE_STATUS_MAPPING_ERROR)"; break; + case CUSPARSE_STATUS_EXECUTION_FAILED: out << "CUSPARSE_STATUS_EXECUTION_FAILED)"; break; + case CUSPARSE_STATUS_INTERNAL_ERROR: out << "CUSPARSE_STATUS_INTERNAL_ERROR)"; break; + case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: out << "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED)"; break; + case CUSPARSE_STATUS_ZERO_PIVOT: out << "CUSPARSE_STATUS_ZERO_PIVOT)"; break; + default: out << "unrecognized error code): this is bad!"; break; + } +#endif // CUSPARSE_VERSION + if (file) { + out << " " << file << ":" << line; + } + throw std::runtime_error(out.str()); +} + +inline void cusparse_safe_call(cusparseStatus_t cusparseStatus, const char* name, const char* file = nullptr, + const int line = 0) { + if (CUSPARSE_STATUS_SUCCESS != cusparseStatus) { + cusparse_error_throw(cusparseStatus, name, file, line); + } +} + +#define IFPACK2_DETAILS_CUSPARSE_SAFE_CALL(call) \ + Ifpack2::Details::cusparse_safe_call(call, #call, __FILE__, __LINE__) + +#endif // defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) + struct TrisolverType { enum Enum { Internal, //!< Tpetra::CrsMatrix::localSolve @@ -675,7 +725,7 @@ compute () #if (CUSPARSE_VERSION >= 12100) auto *sptrsv_handle = kh_v_[i]->get_sptrsv_handle(); auto cusparse_handle = sptrsv_handle->get_cuSparseHandle(); - KOKKOS_CUSPARSE_SAFE_CALL( + IFPACK2_DETAILS_CUSPARSE_SAFE_CALL( cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream())); cusparseSpSV_updateMatrix(cusparse_handle->handle, cusparse_handle->spsvDescr, From fccec092cd16d3235cd7de1428b594f54fd6f4cc Mon Sep 17 00:00:00 2001 From: "Justin M. LaPre" Date: Mon, 2 Dec 2024 09:43:15 -0700 Subject: [PATCH 10/23] add rhel8_sems...all-no-epetra This turns on CUDA11 tests and Trilinos_ENABLE_TESTS. epetra is turned off in the inherited config. Signed-off-by: Justin M. LaPre --- packages/framework/ini-files/config-specs.ini | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index de052bca3530..b70fe5b0fc33 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1563,6 +1563,12 @@ opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables use PACKAGE-ENABLES|ALL-NO-EPETRA +[rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all-no-epetra] +use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all + +use CUDA11-RUN-SERIAL-TESTS +opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : ON + [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables] # uses sems-v2 modules use RHEL8 From 958c71d817034cf1789b48b3c26cbf8fad213c14 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Tue, 3 Dec 2024 10:54:47 -0700 Subject: [PATCH 11/23] Fix framework unit test Signed-off-by: Samuel E. Browne --- .../pr_tools/unittests/test_PullRequestLinuxDriverTest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py b/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py index 8d596681d910..c02193f18a28 100755 --- a/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py +++ b/packages/framework/pr_tools/unittests/test_PullRequestLinuxDriverTest.py @@ -84,6 +84,7 @@ def setUp(self): req_mem_per_core=3.0, max_cores_allowed=12, num_concurrent_tests=-1, + slots_per_gpu=2, ccache_enable=False, dry_run=False, use_explicit_cachefile=False, From 9ec47bc3a4c735da7f01ff8ad942a10830ced5a2 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Tue, 3 Dec 2024 15:48:45 -0700 Subject: [PATCH 12/23] build_env_info: resolve issues with ninja (#13647) This PR resolves issues with ninja as generator reported in https://github.com/trilinos/Trilinos/issues/13643 In Trilinos builds with ninja as generator, calling `make clean` followed by recompiling resulted in compilation errors due to a missing generated file Kokkos_Version_Info.cpp that was deleted during clean The changes in this PR resolve the issue Added notes: * The issue did not occur with Unix Makefiles as generator * The issue did not ocur with standalond kokkos Signed-off-by: Nathan Ellingwood Co-authored-by: Daniel Arndt --- packages/kokkos/cmake/build_env_info.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/kokkos/cmake/build_env_info.cmake b/packages/kokkos/cmake/build_env_info.cmake index ac28b2d8503a..76afbb74b63c 100644 --- a/packages/kokkos/cmake/build_env_info.cmake +++ b/packages/kokkos/cmake/build_env_info.cmake @@ -4,7 +4,7 @@ find_package(Git QUIET) set(CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_DIR}) set(pre_configure_dir ${CMAKE_CURRENT_LIST_DIR}) -set(post_configure_dir ${CMAKE_BINARY_DIR}/generated) +set(post_configure_dir ${CMAKE_CURRENT_BINARY_DIR}/generated) set(pre_configure_file ${pre_configure_dir}/Kokkos_Version_Info.cpp.in) set(post_configure_file ${post_configure_dir}/Kokkos_Version_Info.cpp) @@ -105,7 +105,7 @@ function(check_git_setup) ${CURRENT_LIST_DIR}/build_env_info.cmake BYPRODUCTS ${post_configure_file} ) - add_library(impl_git_version ${CMAKE_BINARY_DIR}/generated/Kokkos_Version_Info.cpp) + add_library(impl_git_version ${CMAKE_CURRENT_BINARY_DIR}/generated/Kokkos_Version_Info.cpp) target_include_directories(impl_git_version PUBLIC ${CMAKE_BINARY_DIR}/generated) target_compile_features(impl_git_version PRIVATE cxx_raw_string_literals) add_dependencies(impl_git_version AlwaysCheckGit) From dac035af0116cc4b7129148088e99e85fb96d9b7 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Mon, 2 Dec 2024 15:37:30 -0700 Subject: [PATCH 13/23] Phalanx: remove use of deprecated kokkos code in 4.5 release Signed-off-by: Roger Pawlowski --- packages/phalanx/test/Kokkos/tKokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/phalanx/test/Kokkos/tKokkos.cpp b/packages/phalanx/test/Kokkos/tKokkos.cpp index b42e8355e4b3..560fffcae102 100644 --- a/packages/phalanx/test/Kokkos/tKokkos.cpp +++ b/packages/phalanx/test/Kokkos/tKokkos.cpp @@ -954,7 +954,7 @@ namespace phalanx_test { n.count_ += 1; n.mean_ += ( a(i) - n_minus_one.mean_ ) / n.count_; n.M2_ += ( a(i) - n_minus_one.mean_ ) * ( a(i) - n.mean_ ); - success_local = Kokkos::atomic_compare_exchange_strong(&(values()),n_minus_one,n); + success_local = (n_minus_one == Kokkos::atomic_compare_exchange(&(values()),n_minus_one,n)); } while (!success_local); }); PHX::Device().fence(); From 48adaa24d5f001d08cfff26f339e020c85eaf2dd Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Tue, 3 Dec 2024 07:46:34 -0700 Subject: [PATCH 14/23] Phalanx: cleanup unit test warnings Signed-off-by: Roger Pawlowski --- .../phalanx/test/Kokkos/tKokkosVirtualFunctionOnDevice.cpp | 7 ++++--- .../UnmanagedFields/MDField/MDField_TestEvaluators_Def.hpp | 4 ++-- packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/packages/phalanx/test/Kokkos/tKokkosVirtualFunctionOnDevice.cpp b/packages/phalanx/test/Kokkos/tKokkosVirtualFunctionOnDevice.cpp index 5f5aca03b35b..07ffee0b5340 100644 --- a/packages/phalanx/test/Kokkos/tKokkosVirtualFunctionOnDevice.cpp +++ b/packages/phalanx/test/Kokkos/tKokkosVirtualFunctionOnDevice.cpp @@ -46,12 +46,13 @@ namespace phalanx_test { // Derived class class IdealGasLaw : public EquationOfState { - double mass_; // mass + // double mass_; // mass double gamma_; // ratio of specific heats - double r_; // Boltzmann constant + // double r_; // Boltzmann constant public: KOKKOS_FUNCTION - IdealGasLaw() : mass_(28.0), gamma_(5./3.), r_(1.38066e-23) {} + // IdealGasLaw() : mass_(28.0), gamma_(5./3.), r_(1.38066e-23) {} + IdealGasLaw() : gamma_(5./3.) {} KOKKOS_FUNCTION double a(const double& rho, diff --git a/packages/phalanx/test/UnmanagedFields/MDField/MDField_TestEvaluators_Def.hpp b/packages/phalanx/test/UnmanagedFields/MDField/MDField_TestEvaluators_Def.hpp index 0e6687c439cd..c24bdcfc3384 100644 --- a/packages/phalanx/test/UnmanagedFields/MDField/MDField_TestEvaluators_Def.hpp +++ b/packages/phalanx/test/UnmanagedFields/MDField/MDField_TestEvaluators_Def.hpp @@ -12,8 +12,8 @@ #include "Phalanx_DataLayout_MDALayout.hpp" #include "Phalanx_FieldTag_Tag.hpp" -class CELL; -class BASIS; +struct CELL; +struct BASIS; namespace PHX { diff --git a/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp b/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp index 4c6884f8cbdd..2ba4ec5ef18f 100644 --- a/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp +++ b/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp @@ -14,8 +14,8 @@ #include "Phalanx_FieldTag_Tag.hpp" #include "Phalanx_MDField.hpp" -class CELL; -class BASIS; +struct CELL; +struct BASIS; namespace PHX { From a2ae6c437101f53580bc3bca45244cb6a82fe271 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Tue, 3 Dec 2024 07:47:46 -0700 Subject: [PATCH 15/23] Phalanx: fix warnings for c++20 code deprecations Signed-off-by: Roger Pawlowski --- .../phalanx/example/FiniteElementAssembly/Mesh.cpp | 8 ++++---- .../example/FiniteElementAssembly/WorksetBuilder.hpp | 4 ++-- .../evaluators/GatherSolution_Def.hpp | 5 ++--- .../evaluators/ScatterResidual_Def.hpp | 11 ++++------- .../evaluators/GatherSolution_Def.hpp | 4 ++-- .../evaluators/ScatterResidual_Def.hpp | 12 ++++-------- .../EvaluatorUnitTester/AllRanksEvaluator_Def.hpp | 2 +- .../DuplicateFieldEvaluator_Def.hpp | 2 +- .../test/EvaluatorUnitTester/SimpleEvaluator_Def.hpp | 2 +- packages/phalanx/test/Kokkos/tKokkosNestedLambda.cpp | 4 ++-- packages/phalanx/test/Kokkos/tKokkosPerf.cpp | 2 +- 11 files changed, 24 insertions(+), 32 deletions(-) diff --git a/packages/phalanx/example/FiniteElementAssembly/Mesh.cpp b/packages/phalanx/example/FiniteElementAssembly/Mesh.cpp index b18837921c17..0c8ad139218d 100644 --- a/packages/phalanx/example/FiniteElementAssembly/Mesh.cpp +++ b/packages/phalanx/example/FiniteElementAssembly/Mesh.cpp @@ -204,7 +204,7 @@ KOKKOS_INLINE_FUNCTION void Mesh::operator() (const ComputeJac_Tag& , const team_t& team) const { const int cell = team.league_rank(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,qp_.extent(0)), [=] (const int& qp) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,qp_.extent(0)), [&] (const int& qp) { for (int basis=0; basis < static_cast(basis_.extent(1)); ++basis) { for (int i=0; i < 3; ++i) { for (int j=0; j < 3; ++j) { @@ -220,7 +220,7 @@ KOKKOS_INLINE_FUNCTION void Mesh::operator() (const ComputeInvJac_Tag& , const team_t& team) const { const int cell = team.league_rank(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,qp_.extent(0)), [=] (const int& qp) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,qp_.extent(0)), [&] (const int& qp) { inv_jac_(cell,qp,0,0) = jac_(cell,qp,1,1) * jac_(cell,qp,2,2) - jac_(cell,qp,1,2) * jac_(cell,qp,2,1); inv_jac_(cell,qp,1,1) = jac_(cell,qp,2,2) * jac_(cell,qp,0,0) - jac_(cell,qp,2,0) * jac_(cell,qp,0,2); inv_jac_(cell,qp,2,2) = jac_(cell,qp,0,0) * jac_(cell,qp,1,1) - jac_(cell,qp,0,1) * jac_(cell,qp,1,0); @@ -246,7 +246,7 @@ KOKKOS_INLINE_FUNCTION void Mesh::operator() (const ComputeCoords_Tag& , const team_t& team) const { const int cell = team.league_rank(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,qp_.extent(0)), [=] (const int& qp) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,qp_.extent(0)), [&] (const int& qp) { for (int basis=0; basis < static_cast(basis_.extent(1)); ++basis) { qp_coords_(cell,qp,0) += basis_(qp,basis) * coords_(cell,basis,0); qp_coords_(cell,qp,1) += basis_(qp,basis) * coords_(cell,basis,1); @@ -260,7 +260,7 @@ KOKKOS_INLINE_FUNCTION void Mesh::operator() (const ComputeGradBasisReal_Tag& , const team_t& team) const { const int cell = team.league_rank(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,grad_basis_real_.extent(1)), [=] (const int& qp) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,grad_basis_real_.extent(1)), [&] (const int& qp) { const int num_basis = static_cast(grad_basis_real_.extent(2)); for (int basis=0; basis < num_basis; ++basis) for (int dim1=0; dim1 < 3; ++dim1) diff --git a/packages/phalanx/example/FiniteElementAssembly/WorksetBuilder.hpp b/packages/phalanx/example/FiniteElementAssembly/WorksetBuilder.hpp index aab8d2e09ce0..42e4bb4d28d5 100644 --- a/packages/phalanx/example/FiniteElementAssembly/WorksetBuilder.hpp +++ b/packages/phalanx/example/FiniteElementAssembly/WorksetBuilder.hpp @@ -31,7 +31,7 @@ struct WorksetBuilder { void operator() (const CopyWorksetDetJac_Tag& , const team_t& team) const { const int cell = team.league_rank(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,mesh_det_jac.extent(1)), [=] (const int& qp) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,mesh_det_jac.extent(1)), [&] (const int& qp) { workset_det_jac(cell,qp) = mesh_det_jac(cell+first_cell_global_index,qp); //printf("det_jac=%f\n",workset.det_jac_(cell,qp)); }); @@ -41,7 +41,7 @@ struct WorksetBuilder { void operator() (const CopyWorksetGradBasisReal_Tag& , const team_t& team) const { const int cell = team.league_rank(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,mesh_det_jac.extent(1)), [=] (const int& qp) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,mesh_det_jac.extent(1)), [&] (const int& qp) { for (int basis=0; basis < static_cast(mesh_grad_basis_real.extent(2)); ++basis) for (int dim=0; dim < static_cast(mesh_grad_basis_real.extent(3)); ++dim) workset_grad_basis_real(cell,qp,basis,dim) = diff --git a/packages/phalanx/example/FiniteElementAssembly/evaluators/GatherSolution_Def.hpp b/packages/phalanx/example/FiniteElementAssembly/evaluators/GatherSolution_Def.hpp index 511117b15828..bc4ca937f18a 100644 --- a/packages/phalanx/example/FiniteElementAssembly/evaluators/GatherSolution_Def.hpp +++ b/packages/phalanx/example/FiniteElementAssembly/evaluators/GatherSolution_Def.hpp @@ -50,7 +50,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const const int cell = team.league_rank(); if (team.team_rank() == 0) { // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,field.extent(1)), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,field.extent(1)), [&] (const int& node) { field(cell,node) = x( gids(cell_global_offset_index+cell,node) * num_equations + field_index); }); } @@ -94,8 +94,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const { const int cell = team.league_rank(); if (team.team_rank() == 0) { - // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,field.extent(1)), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,field.extent(1)), [&] (const int& node) { field(cell,node).val() = x(gids(cell_global_offset_index+cell,node) * num_equations + field_index); field(cell,node).fastAccessDx(num_equations * node + field_index) = 1.0; }); diff --git a/packages/phalanx/example/FiniteElementAssembly/evaluators/ScatterResidual_Def.hpp b/packages/phalanx/example/FiniteElementAssembly/evaluators/ScatterResidual_Def.hpp index 1ddb6ffd7d82..71524568daa1 100644 --- a/packages/phalanx/example/FiniteElementAssembly/evaluators/ScatterResidual_Def.hpp +++ b/packages/phalanx/example/FiniteElementAssembly/evaluators/ScatterResidual_Def.hpp @@ -54,7 +54,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const const int local_cell = team.league_rank(); if (team.team_rank() == 0) { // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,residual_contribution.extent(1)), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,residual_contribution.extent(1)), [&] (const int& node) { const int residual_index = gids(cell_global_offset_index+local_cell,node) * num_equations + equation_index; global_residual_atomic(residual_index) += residual_contribution(local_cell,node); }); @@ -106,15 +106,13 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const const int num_nodes = residual_contribution.extent(1); if (team.team_rank() == 0) { - // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_nodes), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_nodes), [&] (const int& node) { const int global_row_index = gids(cell_global_offset_index+cell,node) * num_equations + equation_index; global_residual_atomic(global_row_index) += residual_contribution(cell,node).val(); }); } - // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_nodes), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_nodes), [&] (const int& node) { const int global_row_index = gids(cell_global_offset_index+cell,node) * num_equations + equation_index; @@ -122,8 +120,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const for (int col_node=0; col_node < num_nodes; ++col_node) { // loop over equations - // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_equations),[=] (const int& col_eq) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_equations),[&] (const int& col_eq) { const int global_col_index = gids(cell_global_offset_index+cell,col_node) * num_equations + col_eq; const int derivative_index = col_node * num_equations + col_eq; global_jacobian.sumIntoValues(global_row_index,&global_col_index,1, diff --git a/packages/phalanx/example/FiniteElementAssembly_MixedFieldTypes/evaluators/GatherSolution_Def.hpp b/packages/phalanx/example/FiniteElementAssembly_MixedFieldTypes/evaluators/GatherSolution_Def.hpp index defeee2c940a..787f3eeaeefe 100644 --- a/packages/phalanx/example/FiniteElementAssembly_MixedFieldTypes/evaluators/GatherSolution_Def.hpp +++ b/packages/phalanx/example/FiniteElementAssembly_MixedFieldTypes/evaluators/GatherSolution_Def.hpp @@ -50,7 +50,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const const int cell = team.league_rank(); if (team.team_rank() == 0) { // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,field.extent(1)), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,field.extent(1)), [&] (const int& node) { field(cell,node) = x( gids(cell_global_offset_index+cell,node) * num_equations + field_index); }); } @@ -95,7 +95,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const const int cell = team.league_rank(); if (team.team_rank() == 0) { // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,field.extent(1)), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,field.extent(1)), [&] (const int& node) { field(cell,node).val() = x(gids(cell_global_offset_index+cell,node) * num_equations + field_index); field(cell,node).fastAccessDx(num_equations * node + field_index) = 1.0; }); diff --git a/packages/phalanx/example/FiniteElementAssembly_MixedFieldTypes/evaluators/ScatterResidual_Def.hpp b/packages/phalanx/example/FiniteElementAssembly_MixedFieldTypes/evaluators/ScatterResidual_Def.hpp index 1ddb6ffd7d82..72d729ae454b 100644 --- a/packages/phalanx/example/FiniteElementAssembly_MixedFieldTypes/evaluators/ScatterResidual_Def.hpp +++ b/packages/phalanx/example/FiniteElementAssembly_MixedFieldTypes/evaluators/ScatterResidual_Def.hpp @@ -53,8 +53,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const { const int local_cell = team.league_rank(); if (team.team_rank() == 0) { - // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,residual_contribution.extent(1)), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,residual_contribution.extent(1)), [&] (const int& node) { const int residual_index = gids(cell_global_offset_index+local_cell,node) * num_equations + equation_index; global_residual_atomic(residual_index) += residual_contribution(local_cell,node); }); @@ -106,15 +105,13 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const const int num_nodes = residual_contribution.extent(1); if (team.team_rank() == 0) { - // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_nodes), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_nodes), [&] (const int& node) { const int global_row_index = gids(cell_global_offset_index+cell,node) * num_equations + equation_index; global_residual_atomic(global_row_index) += residual_contribution(cell,node).val(); }); } - // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_nodes), [=] (const int& node) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_nodes), [&] (const int& node) { const int global_row_index = gids(cell_global_offset_index+cell,node) * num_equations + equation_index; @@ -122,8 +119,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const for (int col_node=0; col_node < num_nodes; ++col_node) { // loop over equations - // Fix gcc 5/6 lambda bug by changing to capture by value (potentially less efficient) - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_equations),[=] (const int& col_eq) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_equations),[&] (const int& col_eq) { const int global_col_index = gids(cell_global_offset_index+cell,col_node) * num_equations + col_eq; const int derivative_index = col_node * num_equations + col_eq; global_jacobian.sumIntoValues(global_row_index,&global_col_index,1, diff --git a/packages/phalanx/test/EvaluatorUnitTester/AllRanksEvaluator_Def.hpp b/packages/phalanx/test/EvaluatorUnitTester/AllRanksEvaluator_Def.hpp index b69110b58d97..fd69b0154179 100644 --- a/packages/phalanx/test/EvaluatorUnitTester/AllRanksEvaluator_Def.hpp +++ b/packages/phalanx/test/EvaluatorUnitTester/AllRanksEvaluator_Def.hpp @@ -60,7 +60,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const { const int i = team.league_rank(); - Kokkos::single(Kokkos::PerTeam(team), [=] () { + Kokkos::single(Kokkos::PerTeam(team), [&] () { x1(i) = f1(i) * f1(i); }); diff --git a/packages/phalanx/test/EvaluatorUnitTester/DuplicateFieldEvaluator_Def.hpp b/packages/phalanx/test/EvaluatorUnitTester/DuplicateFieldEvaluator_Def.hpp index 6c17b2eee83f..c6301328f516 100644 --- a/packages/phalanx/test/EvaluatorUnitTester/DuplicateFieldEvaluator_Def.hpp +++ b/packages/phalanx/test/EvaluatorUnitTester/DuplicateFieldEvaluator_Def.hpp @@ -42,7 +42,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const const int cell = team.league_rank(); const int num_qp = static_cast(a.extent(1)); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_qp), [=] (const int& qp) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_qp), [&] (const int& qp) { const int num_dim = static_cast(c.extent(2)); a(cell,qp) = 0.0; for (int i = 0; i < num_dim; ++i) diff --git a/packages/phalanx/test/EvaluatorUnitTester/SimpleEvaluator_Def.hpp b/packages/phalanx/test/EvaluatorUnitTester/SimpleEvaluator_Def.hpp index 50b8eac74e77..847422c9ce6f 100644 --- a/packages/phalanx/test/EvaluatorUnitTester/SimpleEvaluator_Def.hpp +++ b/packages/phalanx/test/EvaluatorUnitTester/SimpleEvaluator_Def.hpp @@ -40,7 +40,7 @@ operator()(const Kokkos::TeamPolicy::member_type& team) const const int cell = team.league_rank(); const int num_qp = static_cast(a.extent(1)); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_qp), [=] (const int& qp) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_qp), [&] (const int& qp) { const int num_dim = static_cast(c.extent(2)); a(cell,qp) = 0.0; for (int i = 0; i < num_dim; ++i) diff --git a/packages/phalanx/test/Kokkos/tKokkosNestedLambda.cpp b/packages/phalanx/test/Kokkos/tKokkosNestedLambda.cpp index 1a5b2b7bd2d2..a744c5d1631c 100644 --- a/packages/phalanx/test/Kokkos/tKokkosNestedLambda.cpp +++ b/packages/phalanx/test/Kokkos/tKokkosNestedLambda.cpp @@ -34,9 +34,9 @@ class MyFunctor { { const int cell = team.league_rank(); const int num_pts = a_.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_pts), [=] (const int& pt) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_pts), [&] (const int& pt) { const int num_eq = a_.extent(2); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_eq), [=] (const int& eq) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,num_eq), [&] (const int& eq) { c_(cell,pt,eq) = a_(cell,pt,eq) + b_(cell,pt,eq); }); }); diff --git a/packages/phalanx/test/Kokkos/tKokkosPerf.cpp b/packages/phalanx/test/Kokkos/tKokkosPerf.cpp index abf9001649d9..4c5d25cb9dbf 100644 --- a/packages/phalanx/test/Kokkos/tKokkosPerf.cpp +++ b/packages/phalanx/test/Kokkos/tKokkosPerf.cpp @@ -82,7 +82,7 @@ namespace phalanx_test { { const int i = thread.league_rank(); const int num_qp = rho_.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,0,num_qp), [=] (const int& ip) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,0,num_qp), [&] (const int& ip) { rho_(i,ip) = k_(0) * P_(i,ip) / T_(i,ip); }); } From 5461f373541758dc36912c8e6e377ae7b33fe6ad Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Wed, 27 Nov 2024 14:08:42 -0700 Subject: [PATCH 16/23] Add uninitialized to promoted warnings list Signed-off-by: Samuel E. Browne --- cmake/ProjectCompilerPostConfig.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ProjectCompilerPostConfig.cmake b/cmake/ProjectCompilerPostConfig.cmake index 2f47f7104de1..1cdedd56a667 100644 --- a/cmake/ProjectCompilerPostConfig.cmake +++ b/cmake/ProjectCompilerPostConfig.cmake @@ -43,7 +43,7 @@ IF (KokkosEnable) ENDIF() set(upcoming_warnings shadow ${Trilinos_ADDITIONAL_WARNINGS}) -set(promoted_warnings parentheses sign-compare unused-variable reorder) +set(promoted_warnings parentheses sign-compare unused-variable reorder uninitialized) if("${Trilinos_WARNINGS_MODE}" STREQUAL "WARN") enable_warnings("${upcoming_warnings}") From 655b422531a1e4b9a17e3e01097f2e7b40691957 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Tue, 3 Dec 2024 13:58:48 -0700 Subject: [PATCH 17/23] Run more Kokkos tests serially with CUDA builds See discussion at: https://github.com/trilinos/Trilinos/pull/13637#issuecomment-2515383203. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index cf190a554ffb..27e71e07c0bd 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1249,8 +1249,10 @@ opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF -[CUDA11-RUN-SERIAL-TESTS] +[CUDA-RUN-SERIAL-TESTS] opt-set-cmake-var Kokkos_CoreUnitTest_Cuda1_SET_RUN_SERIAL BOOL FORCE : ON +opt-set-cmake-var Kokkos_CoreUnitTest_CudaTimingBased_SET_RUN_SERIAL BOOL FORCE : ON +opt-set-cmake-var Kokkos_CoreUnitTest_Default_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_sparse_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_batched_dla_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var Intrepid2_unit-test_MonolithicExecutable_Intrepid2_Tests_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON @@ -1532,7 +1534,7 @@ opt-set-cmake-var Adelus_vector_random_npr4_rhs1_MPI_4_DISABLE BOOL : ON use PACKAGE-ENABLES|NO-EPETRA -use CUDA11-RUN-SERIAL-TESTS +use CUDA-RUN-SERIAL-TESTS [rhel8_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.1.4_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_all] # uses sems-v2 modules @@ -1561,8 +1563,7 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use PACKAGE-ENABLES|NO-EPETRA use COMMON_SPACK_TPLS use SEMS_COMMON_CUDA - -use CUDA11-RUN-SERIAL-TESTS +use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF @@ -1678,7 +1679,7 @@ opt-set-cmake-var Adelus_vector_random_npr4_rhs1_MPI_4_DISABLE BOOL : ON use PACKAGE-ENABLES|NO-EPETRA -use CUDA11-RUN-SERIAL-TESTS +use CUDA-RUN-SERIAL-TESTS [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_all] # uses sems-v2 modules @@ -2113,6 +2114,8 @@ use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use SEMS_COMMON_CUDA +use CUDA-RUN-SERIAL-TESTS + opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL : ON opt-set-cmake-var TPL_ENABLE_X11 BOOL : OFF opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING FORCE : --bind-to;none --mca btl ^smcuda @@ -2120,7 +2123,6 @@ opt-set-cmake-var Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC BOOL : OFF [rhel8_cuda-11-gcc-openmpi_release_static_Ampere80_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables] use rhel8_cuda-gcc-openmpi_release_static_Ampere80_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables -use CUDA11-RUN-SERIAL-TESTS opt-set-cmake-var ROL_test_elementwise_TpetraMultiVector_MPI_4_DISABLE BOOL : ON [rhel8_cuda-gcc-openmpi_release_static_Ampere80_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables] @@ -2139,7 +2141,7 @@ use USE-UVM|YES use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use SEMS_COMMON_CUDA -use CUDA11-RUN-SERIAL-TESTS +use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF opt-set-cmake-var Kokkos_ENABLE_TESTS BOOL FORCE : ON From 00e64a8c578b06d7e00d59103d42c6cddff49614 Mon Sep 17 00:00:00 2001 From: Christian Glusa Date: Fri, 1 Nov 2024 13:32:55 -0600 Subject: [PATCH 18/23] TPLs BLAS and LAPACK: Add OpenBLAS to library names This allows to use OpenBLAS for BLAS and LAPACK without additional configuration. Signed-off-by: Christian Glusa --- cmake/TPLs/FindTPLBLAS.cmake | 2 +- cmake/TPLs/FindTPLLAPACK.cmake | 2 +- cmake/tribits/common_tpls/FindTPLBLAS.cmake | 2 +- cmake/tribits/common_tpls/FindTPLLAPACK.cmake | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/TPLs/FindTPLBLAS.cmake b/cmake/TPLs/FindTPLBLAS.cmake index e2b802502c85..d726dac1ab88 100644 --- a/cmake/TPLs/FindTPLBLAS.cmake +++ b/cmake/TPLs/FindTPLBLAS.cmake @@ -22,4 +22,4 @@ if (MSVC AND NOT endif() tribits_tpl_find_include_dirs_and_libraries( BLAS - REQUIRED_LIBS_NAMES "blas blas_win32") + REQUIRED_LIBS_NAMES "blas blas_win32 openblas") diff --git a/cmake/TPLs/FindTPLLAPACK.cmake b/cmake/TPLs/FindTPLLAPACK.cmake index 8852cbf6b062..170962a309f8 100644 --- a/cmake/TPLs/FindTPLLAPACK.cmake +++ b/cmake/TPLs/FindTPLLAPACK.cmake @@ -16,4 +16,4 @@ if (MSVC AND NOT endif() tribits_tpl_find_include_dirs_and_libraries( LAPACK - REQUIRED_LIBS_NAMES "lapack lapack_win32") + REQUIRED_LIBS_NAMES "lapack lapack_win32 openblas") diff --git a/cmake/tribits/common_tpls/FindTPLBLAS.cmake b/cmake/tribits/common_tpls/FindTPLBLAS.cmake index 1ebe176a80c5..95360d3ebd0f 100644 --- a/cmake/tribits/common_tpls/FindTPLBLAS.cmake +++ b/cmake/tribits/common_tpls/FindTPLBLAS.cmake @@ -8,7 +8,7 @@ # @HEADER -set(REQUIRED_LIBS_NAMES "blas blas_win32") +set(REQUIRED_LIBS_NAMES "blas blas_win32 openblas") # # Second, search for BLAS components (if allowed) using the standard diff --git a/cmake/tribits/common_tpls/FindTPLLAPACK.cmake b/cmake/tribits/common_tpls/FindTPLLAPACK.cmake index 9874532fea7c..436cd0ac801c 100644 --- a/cmake/tribits/common_tpls/FindTPLLAPACK.cmake +++ b/cmake/tribits/common_tpls/FindTPLLAPACK.cmake @@ -14,7 +14,7 @@ # to trigger the right behavior in the function # tribits_tpl_find_include_dirs_and_libraries(). # -set(REQUIRED_LIBS_NAMES "lapack lapack_win32") +set(REQUIRED_LIBS_NAMES "lapack lapack_win32 openblas") # # Second, search for LAPACK components (if allowed) using the standard From 9f87f1bf19a8b4833276a459abad8ff9978614d7 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 2 Dec 2024 07:45:16 -0700 Subject: [PATCH 19/23] Correct/unify CUDA BLAS/LAPACK specs Intend to be using system BLAS/LAPACK for all CEE CUDA configs. Looks like this was done per-config, and then applied after the SEMS_COMMON_CUDA_11 block usage. This made the BLAS/LAPACK configs in SEMS_COMMON_CUDA_11 irrelevant, so best to just move those per-config settings into the common block and replace the unused ones. As a benefit, this corrects the ones that were missed for the newer UVM configs (which didn't matter, because the BLAS/LAPACK issue that was resolved with the system libraries was only manifesting in test failures, and we don't currently run tests for the UVM line). Also then remove the BLAS/LAPACK specs from RHEL_COMPILER|CUDA, since they will be set explicitly later. The only ones that mattered were the ones that applied to the AT2 container configs, so for those I copied the existing configs from the GCC container config. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 49 +++++++------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 27e71e07c0bd..eab085335c94 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -395,15 +395,23 @@ opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING : ${SEMS_NETCDF_LIBRARY_PATH|ENV}/ opt-set-cmake-var TPL_BLAS_LIBRARIES STRING : ${BLAS_LIBRARIES|ENV} opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING : ${LAPACK_LIBRARIES|ENV} -[COMMON_SPACK_TPLS] -use COMMON - -# BLAS & LAPACK +[SPACK_NETLIB_BLAS_LAPACK] opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lblas;-lgfortran;-lgomp opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-llapack;-lgfortran;-lgomp opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib +[SPACK_OPENBLAS_BLAS_LAPACK] +opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm +opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm + +[COMMON_SPACK_TPLS] +use COMMON + +use SPACK_NETLIB_BLAS_LAPACK + # Boost opt-set-cmake-var BoostLib_INCLUDE_DIRS PATH FORCE : ${BOOST_INC|ENV} opt-set-cmake-var BoostLib_LIBRARY_DIRS PATH FORCE : ${BOOST_LIB|ENV} @@ -1176,11 +1184,7 @@ opt-set-cmake-var Rythmos_StepperBuilder_UnitTest_MPI_1_DISABLE BOOL : ON [RHEL_COMPILER|CUDA] use NODE-TYPE|CUDA -opt-set-cmake-var MPI_EXEC FILEPATH : mpiexec -opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${CBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${CBLAS_ROOT|ENV}/lib/libblas.a;-L${CBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm -opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${LAPACK_ROOT|ENV}/lib -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : -L${LAPACK_ROOT|ENV}/lib;-lgfortran;-lgomp;${LAPACK_ROOT|ENV}/lib/liblapack.a +opt-set-cmake-var MPI_EXEC FILEPATH : mpiexec [COMPILER|GNU] opt-set-cmake-var MPI_EXEC FILEPATH : mpirun @@ -1213,16 +1217,15 @@ opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON #TPL_*_LIBRARIES -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lopenblas;-lgfortran;-lgomp -opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lopenblas;-lgfortran;-lgomp -opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} +# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 #TPL_[INCLUDE|LIBRARY]_DIRS opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} @@ -1470,14 +1473,10 @@ opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON #TPL_*_LIBRARIES -# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} @@ -1616,14 +1615,10 @@ opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON #TPL_*_LIBRARIES -# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl -# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} @@ -1877,11 +1872,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS - -opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm -opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm +use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline @@ -1932,13 +1923,10 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS +use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-label -opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm -opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : "" @@ -2141,6 +2129,7 @@ use USE-UVM|YES use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use SEMS_COMMON_CUDA +use SPACK_OPENBLAS_BLAS_LAPACK use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF From 019c516c3ad6433d9a49d6a4561e808b2952d560 Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 2 Dec 2024 08:11:31 -0700 Subject: [PATCH 20/23] Separate BLAS/LAPACK from COMMON_SPACK_TPLS COMMON_SPACK_TPLS was too specific for CEE/internal configs, so separate the BLAS/LAPACK from it to isolate CEE-only specs (such as the explicit Netlib BLAS/LAPACK handling). Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index eab085335c94..db535bfd98b7 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -410,8 +410,6 @@ opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/ [COMMON_SPACK_TPLS] use COMMON -use SPACK_NETLIB_BLAS_LAPACK - # Boost opt-set-cmake-var BoostLib_INCLUDE_DIRS PATH FORCE : ${BOOST_INC|ENV} opt-set-cmake-var BoostLib_LIBRARY_DIRS PATH FORCE : ${BOOST_LIB|ENV} @@ -468,8 +466,6 @@ use COMMON_SPACK_TPLS # Overrides from [COMMON_SPACK_TPLS] to let container handle the values opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : "" -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : "" -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : "" opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : "" opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING FORCE : superlu;m @@ -1340,6 +1336,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS +use SPACK_NETLIB_BLAS_LAPACK opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON @@ -1380,6 +1377,7 @@ use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS +use SPACK_NETLIB_BLAS_LAPACK opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self @@ -1699,6 +1697,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS +use SPACK_NETLIB_BLAS_LAPACK opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING : superlu;m opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF @@ -1735,6 +1734,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS +use SPACK_NETLIB_BLAS_LAPACK opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var CMAKE_CXX_EXTENSIONS BOOL : OFF @@ -1769,6 +1769,7 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS +use SPACK_NETLIB_BLAS_LAPACK opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING FORCE : superlu;m opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF From 004c4826dfcd286e85198b193fa28c88a4bf3b4a Mon Sep 17 00:00:00 2001 From: "Samuel E. Browne" Date: Mon, 2 Dec 2024 14:15:23 -0700 Subject: [PATCH 21/23] Split SEMS vs. general CUDA 11 settings Now we can use general settings in the containerized environments, and leave the SEMS settings in the existing environments. Signed-off-by: Samuel E. Browne --- packages/framework/ini-files/config-specs.ini | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index db535bfd98b7..b63be816f24b 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1189,8 +1189,7 @@ opt-set-cmake-var Trilinos_WARNINGS_MODE STRING : WARN [COMPILER|INTEL] opt-set-cmake-var MPI_EXEC FILEPATH : mpirun -[SEMS_COMMON_CUDA] -# TPL ENABLE/DISABLE settings +[CUDA_TPL_ENABLES] opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON opt-set-cmake-var TPL_ENABLE_BinUtils BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Boost BOOL FORCE : ON @@ -1211,8 +1210,11 @@ opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_SuperLU BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON +opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF +opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON +opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF -#TPL_*_LIBRARIES +[SEMS_CUDA_TPL_LOCATIONS] opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" @@ -1223,7 +1225,6 @@ opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 -#TPL_[INCLUDE|LIBRARY]_DIRS opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} opt-set-cmake-var ParMETIS_INCLUDE_DIRS STRING FORCE : ${PARMETIS_INC|ENV} opt-set-cmake-var ParMETIS_LIBRARY_DIRS STRING FORCE : ${PARMETIS_LIB|ENV} @@ -1232,12 +1233,13 @@ opt-set-cmake-var Scotch_LIBRARY_DIRS STRING FORCE : ${SCOTCH_LIB|ENV} opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} +[CUDA] +use CUDA_TPL_ENABLES + #CXX Settings opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fPIC -Wall -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wreorder -Wreturn-type -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wwrite-strings #Package Options -opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF -opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON opt-set-cmake-var Kokkos_ENABLE_CUDA_LAMBDA BOOL FORCE : ON opt-set-cmake-var Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA BOOL FORCE : ON #opt-set-cmake-var Kokkos_ENABLE_Debug_Bounds_Check BOOL FORCE : ON @@ -1246,7 +1248,10 @@ opt-set-cmake-var Panzer_FADTYPE STRING FORCE : "Sacado::Fad::DFad" opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON -opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF + +[SEMS_COMMON_CUDA] +use CUDA +use SEMS_CUDA_TPL_LOCATIONS [CUDA-RUN-SERIAL-TESTS] opt-set-cmake-var Kokkos_CoreUnitTest_Cuda1_SET_RUN_SERIAL BOOL FORCE : ON @@ -2102,7 +2107,7 @@ use USE-RDC|NO use USE-UVM|NO use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA -use SEMS_COMMON_CUDA +use CUDA use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL : ON @@ -2129,14 +2134,13 @@ use USE-RDC|NO use USE-UVM|YES use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA -use SEMS_COMMON_CUDA +use CUDA use SPACK_OPENBLAS_BLAS_LAPACK use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF opt-set-cmake-var Kokkos_ENABLE_TESTS BOOL FORCE : ON - [rhel8_python_debug_shared_no-kokkos-arch_no-asan_no-complex_no-fpic_no-mpi_no-pt_no-rdc_no-uvm_deprecated-on_pr-framework] use PACKAGE-ENABLES|PR-FRAMEWORK From 773e0f9fcd4fd4421afc6ebdbdf2673c9e1b6d2d Mon Sep 17 00:00:00 2001 From: "Roscoe A. Bartlett" Date: Sat, 2 Nov 2024 08:08:43 -0600 Subject: [PATCH 22/23] Use default finds for BLAS and LAPACK In a desire to make Trilinos easier to configure by default, try to use the default finds for BLAS and LAPACK. Leave the existing settings for AT1 builds (since they will soon be replaced by AT2 builds, and also because we know from experience that there may be multiple BLAS/LAPACKs on those systems, and we probably want to keep using the one we've been using). Signed-off-by: Roscoe A. Bartlett --- packages/framework/ini-files/config-specs.ini | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index b63be816f24b..53b437054819 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -392,8 +392,6 @@ opt-set-cmake-var Scotch_LIBRARY_DIRS PATH : ${SEMS_SCOTCH_LIBRARY_PATH|ENV} # Explicit libraries opt-set-cmake-var TPL_DLlib_LIBRARIES PATH : ${DL_LIBRARIES|ENV} opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING : ${SEMS_NETCDF_LIBRARY_PATH|ENV}/libnetcdf.so;${SEMS_NETCDF_LIBRARY_PATH|ENV}/libpnetcdf.a -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING : ${BLAS_LIBRARIES|ENV} -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING : ${LAPACK_LIBRARIES|ENV} [SPACK_NETLIB_BLAS_LAPACK] opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-lblas;-lgfortran;-lgomp @@ -401,12 +399,6 @@ opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : -L${BLAS_ROOT|ENV}/lib;-llapack;-lgfortran;-lgomp opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${BLAS_ROOT|ENV}/lib -[SPACK_OPENBLAS_BLAS_LAPACK] -opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm -opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib -opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm - [COMMON_SPACK_TPLS] use COMMON @@ -1878,7 +1870,6 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline @@ -1929,11 +1920,9 @@ use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-PACKAGE-ENABLES use COMMON_SPACK_TPLS -use SPACK_OPENBLAS_BLAS_LAPACK opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-nonnull-compare -Wno-address -Wno-inline -Wno-unused-but-set-variable -Wno-unused-label - opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : "" opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF @@ -2135,7 +2124,6 @@ use USE-UVM|YES use USE-DEPRECATED|YES use PACKAGE-ENABLES|NO-EPETRA use CUDA -use SPACK_OPENBLAS_BLAS_LAPACK use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF From dc78725eac544cbde96f3dc67aa8e344288094cd Mon Sep 17 00:00:00 2001 From: "Justin M. LaPre" Date: Thu, 5 Dec 2024 11:33:23 -0700 Subject: [PATCH 23/23] Update to use CUDA-RUN-SERIAL-TESTS Signed-off-by: Justin M. LaPre --- packages/framework/ini-files/config-specs.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index aa5ebcbc42a5..a59a18d50591 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1568,7 +1568,7 @@ use PACKAGE-ENABLES|ALL-NO-EPETRA [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all-no-epetra] use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all -use CUDA11-RUN-SERIAL-TESTS +use CUDA-RUN-SERIAL-TESTS opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : ON [rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables]